ia64/xen-unstable

changeset 17751:b60cf40fae13

iommu: Handle sibling device assignment correctly

Domctl interface is extended to allow libxc retrieve device group
information from hypervisor. Vendor-specific iommu_ops is also
extended by adding a new operation "get_device_group_id()", which is
currently a null pointer but could be implemented later for vt-d.

Error will be raised from tools side when user trying to assign PCI
device with a sibling device being driven by dom0. User will keep
being prompted until he has hidden the entire device group (at least,
the sibling devices driven by dom0) in dom0 kernel
parameter. Hopefully this framework could be flexible enough to
support both amd iommu and vt-d.

The following 2 cases are not covered by this patch, but should be
easy to handle.
* Checking for hot-plug devices (maybe we can delay calling
ImageHandler.signalDeviceModel() until all checks are done?)
* Checking for splitted device group between different passthru
domains

Signed-off-by: Wei Wang <wei.wang2@amd.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed May 28 14:41:23 2008 +0100 (2008-05-28)
parents c2fab221b3ec
children f681c4de91fc
files tools/libxc/xc_domain.c tools/libxc/xenctrl.h tools/python/xen/lowlevel/xc/xc.c tools/python/xen/xend/server/pciif.py xen/arch/x86/domctl.c xen/drivers/passthrough/amd/pci_amd_iommu.c xen/drivers/passthrough/iommu.c xen/drivers/passthrough/vtd/iommu.c xen/include/public/domctl.h xen/include/xen/iommu.h
line diff
     1.1 --- a/tools/libxc/xc_domain.c	Wed May 28 12:22:36 2008 +0100
     1.2 +++ b/tools/libxc/xc_domain.c	Wed May 28 14:41:23 2008 +0100
     1.3 @@ -767,6 +767,37 @@ int xc_assign_device(
     1.4      return do_domctl(xc_handle, &domctl);
     1.5  }
     1.6  
     1.7 +int xc_get_device_group(
     1.8 +    int xc_handle,
     1.9 +    uint32_t domid,
    1.10 +    uint32_t machine_bdf,
    1.11 +    uint32_t max_sdevs,
    1.12 +    uint32_t *num_sdevs,
    1.13 +    uint32_t *sdev_array)
    1.14 +{
    1.15 +    int rc;
    1.16 +    DECLARE_DOMCTL;
    1.17 +
    1.18 +    domctl.cmd = XEN_DOMCTL_get_device_group;
    1.19 +    domctl.domain = (domid_t)domid;
    1.20 +
    1.21 +    domctl.u.get_device_group.machine_bdf = machine_bdf;
    1.22 +    domctl.u.get_device_group.max_sdevs = max_sdevs;
    1.23 +
    1.24 +    set_xen_guest_handle(domctl.u.get_device_group.sdev_array, sdev_array);
    1.25 +
    1.26 +    if ( lock_pages(sdev_array, max_sdevs * sizeof(*sdev_array)) != 0 )
    1.27 +    {
    1.28 +        PERROR("Could not lock memory for xc_get_device_group\n");
    1.29 +        return -ENOMEM;
    1.30 +    }
    1.31 +    rc = do_domctl(xc_handle, &domctl);
    1.32 +    unlock_pages(sdev_array, max_sdevs * sizeof(*sdev_array));
    1.33 +
    1.34 +    *num_sdevs = domctl.u.get_device_group.num_sdevs;
    1.35 +    return rc;
    1.36 +}
    1.37 +
    1.38  int xc_test_assign_device(
    1.39      int xc_handle,
    1.40      uint32_t domid,
     2.1 --- a/tools/libxc/xenctrl.h	Wed May 28 12:22:36 2008 +0100
     2.2 +++ b/tools/libxc/xenctrl.h	Wed May 28 14:41:23 2008 +0100
     2.3 @@ -955,6 +955,13 @@ int xc_assign_device(int xc_handle,
     2.4                       uint32_t domid,
     2.5                       uint32_t machine_bdf);
     2.6  
     2.7 +int xc_get_device_group(int xc_handle,
     2.8 +                     uint32_t domid,
     2.9 +                     uint32_t machine_bdf,
    2.10 +                     uint32_t max_sdevs,
    2.11 +                     uint32_t *num_sdevs,
    2.12 +                     uint32_t *sdev_array);
    2.13 +
    2.14  int xc_test_assign_device(int xc_handle,
    2.15                            uint32_t domid,
    2.16                            uint32_t machine_bdf);
     3.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Wed May 28 12:22:36 2008 +0100
     3.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Wed May 28 14:41:23 2008 +0100
     3.3 @@ -646,6 +646,68 @@ static PyObject *pyxc_deassign_device(Xc
     3.4      return Py_BuildValue("i", bdf);
     3.5  }
     3.6  
     3.7 +static PyObject *pyxc_get_device_group(XcObject *self,
     3.8 +                                         PyObject *args)
     3.9 +{
    3.10 +    domid_t domid;
    3.11 +    uint32_t bdf = 0;
    3.12 +    uint32_t max_sdevs, num_sdevs;
    3.13 +    int seg, bus, dev, func, rc, i;
    3.14 +    PyObject *Pystr;
    3.15 +    char *group_str;
    3.16 +    char dev_str[9];
    3.17 +    uint32_t *sdev_array;
    3.18 +
    3.19 +    if ( !PyArg_ParseTuple(args, "iiiii", &domid, &seg, &bus, &dev, &func) )
    3.20 +        return NULL;
    3.21 +
    3.22 +    /* Maximum allowed siblings device number per group */
    3.23 +    max_sdevs = 1024;
    3.24 +
    3.25 +    if ( (sdev_array = malloc(max_sdevs * sizeof(*sdev_array))) == NULL )
    3.26 +        return PyErr_NoMemory();
    3.27 +    memset(sdev_array, 0, max_sdevs * sizeof(*sdev_array));
    3.28 +
    3.29 +    bdf |= (bus & 0xff) << 16;
    3.30 +    bdf |= (dev & 0x1f) << 11;
    3.31 +    bdf |= (func & 0x7) << 8;
    3.32 +
    3.33 +    rc = xc_get_device_group(self->xc_handle,
    3.34 +        domid, bdf, max_sdevs, &num_sdevs, sdev_array);
    3.35 +
    3.36 +    if ( rc < 0 )
    3.37 +    {
    3.38 +      free(sdev_array); 
    3.39 +      return pyxc_error_to_exception();
    3.40 +    }
    3.41 +
    3.42 +    if ( !num_sdevs )
    3.43 +    {
    3.44 +       free(sdev_array);
    3.45 +       return Py_BuildValue("s", "");
    3.46 +    }
    3.47 +
    3.48 +    if ( (group_str = malloc(num_sdevs * sizeof(dev_str))) == NULL )
    3.49 +        return PyErr_NoMemory();
    3.50 +    memset(group_str, '\0', num_sdevs * sizeof(dev_str));
    3.51 +
    3.52 +    for ( i = 0; i < num_sdevs; i++ )
    3.53 +    {
    3.54 +        bus = (sdev_array[i] >> 16) & 0xff;
    3.55 +        dev = (sdev_array[i] >> 11) & 0x1f;
    3.56 +        func = (sdev_array[i] >> 8) & 0x7;
    3.57 +        sprintf(dev_str, "%02x:%02x.%x,", bus, dev, func);
    3.58 +        strcat(group_str, dev_str);
    3.59 +    }
    3.60 +
    3.61 +    Pystr = Py_BuildValue("s", group_str);
    3.62 +
    3.63 +    free(sdev_array);
    3.64 +    free(group_str);
    3.65 +
    3.66 +    return Pystr;
    3.67 +}
    3.68 +
    3.69  #ifdef __ia64__
    3.70  static PyObject *pyxc_nvram_init(XcObject *self,
    3.71                                   PyObject *args)
    3.72 @@ -1584,6 +1646,17 @@ static PyMethodDef pyxc_methods[] = {
    3.73        " value   [long]:     Value of param.\n"
    3.74        "Returns: [int] 0 on success.\n" },
    3.75  
    3.76 +    { "get_device_group",
    3.77 +      (PyCFunction)pyxc_get_device_group,
    3.78 +      METH_VARARGS, "\n"
    3.79 +      "get sibling devices infomation.\n"
    3.80 +      " dom     [int]:      Domain to assign device to.\n"
    3.81 +      " seg     [int]:      PCI segment.\n"
    3.82 +      " bus     [int]:      PCI bus.\n"
    3.83 +      " dev     [int]:      PCI dev.\n"
    3.84 +      " func    [int]:      PCI func.\n"
    3.85 +      "Returns: [string]:   Sibling devices \n" },
    3.86 +
    3.87       { "test_assign_device",
    3.88         (PyCFunction)pyxc_test_assign_device,
    3.89         METH_VARARGS | METH_KEYWORDS, "\n"
     4.1 --- a/tools/python/xen/xend/server/pciif.py	Wed May 28 12:22:36 2008 +0100
     4.2 +++ b/tools/python/xen/xend/server/pciif.py	Wed May 28 14:41:23 2008 +0100
     4.3 @@ -226,6 +226,39 @@ class PciController(DevController):
     4.4  
     4.5          return sxpr    
     4.6  
     4.7 +    def CheckSiblingDevices(self, domid, dev):
     4.8 +        """ Check if all sibling devices of dev are owned by pciback
     4.9 +        """
    4.10 +        if not self.vm.info.is_hvm():
    4.11 +            return
    4.12 +
    4.13 +        group_str = xc.get_device_group(domid, dev.domain, dev.bus, dev.slot, dev.func)
    4.14 +        if group_str == "":
    4.15 +            return
    4.16 +
    4.17 +        #group string format xx:xx.x,xx:xx.x,
    4.18 +        devstr_len = group_str.find(',')
    4.19 +        for i in range(0, len(group_str), devstr_len + 1):
    4.20 +            (bus, slotfunc) = group_str[i:i + devstr_len].split(':')
    4.21 +            (slot, func) = slotfunc.split('.')
    4.22 +            b = parse_hex(bus)
    4.23 +            d = parse_hex(slot)
    4.24 +            f = parse_hex(func)
    4.25 +            try:
    4.26 +                sdev = PciDevice(dev.domain, b, d, f)
    4.27 +            except Exception, e:
    4.28 +                #no dom0 drivers bound to sdev
    4.29 +                continue
    4.30 +
    4.31 +            if sdev.driver!='pciback':
    4.32 +                raise VmError(("pci: PCI Backend does not own\n "+ \
    4.33 +                    "sibling device %s of device %s\n"+ \
    4.34 +                    "See the pciback.hide kernel "+ \
    4.35 +                    "command-line parameter or\n"+ \
    4.36 +                    "bind your slot/device to the PCI backend using sysfs" \
    4.37 +                    )%(sdev.name, dev.name))
    4.38 +        return
    4.39 +
    4.40      def setupOneDevice(self, domain, bus, slot, func):
    4.41          """ Attach I/O resources for device to frontend domain
    4.42          """
    4.43 @@ -245,6 +278,8 @@ class PciController(DevController):
    4.44                      "bind your slot/device to the PCI backend using sysfs" \
    4.45                      )%(dev.name))
    4.46  
    4.47 +        self.CheckSiblingDevices(fe_domid, dev)
    4.48 +
    4.49          PCIQuirk(dev.vendor, dev.device, dev.subvendor, dev.subdevice, domain, 
    4.50                  bus, slot, func)
    4.51  
     5.1 --- a/xen/arch/x86/domctl.c	Wed May 28 12:22:36 2008 +0100
     5.2 +++ b/xen/arch/x86/domctl.c	Wed May 28 14:41:23 2008 +0100
     5.3 @@ -526,6 +526,45 @@ long arch_do_domctl(
     5.4      }
     5.5      break;
     5.6  
     5.7 +    case XEN_DOMCTL_get_device_group:
     5.8 +    {
     5.9 +        struct domain *d;
    5.10 +        u32 max_sdevs;
    5.11 +        u8 bus, devfn;
    5.12 +        XEN_GUEST_HANDLE_64(uint32) sdevs;
    5.13 +        int num_sdevs;
    5.14 +
    5.15 +        ret = -ENOSYS;
    5.16 +        if ( !iommu_enabled )
    5.17 +            break;
    5.18 +
    5.19 +        ret = -EINVAL;
    5.20 +        if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
    5.21 +            break;
    5.22 +
    5.23 +        bus = (domctl->u.get_device_group.machine_bdf >> 16) & 0xff;
    5.24 +        devfn = (domctl->u.get_device_group.machine_bdf >> 8) & 0xff;
    5.25 +        max_sdevs = domctl->u.get_device_group.max_sdevs;
    5.26 +        sdevs = domctl->u.get_device_group.sdev_array;
    5.27 +
    5.28 +        num_sdevs = iommu_get_device_group(d, bus, devfn, sdevs, max_sdevs);
    5.29 +        if ( num_sdevs < 0 )
    5.30 +        {
    5.31 +            dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
    5.32 +            ret = -EFAULT;
    5.33 +            domctl->u.get_device_group.num_sdevs = 0;
    5.34 +        }
    5.35 +        else
    5.36 +        {
    5.37 +            ret = 0;
    5.38 +            domctl->u.get_device_group.num_sdevs = num_sdevs;
    5.39 +        }
    5.40 +        if ( copy_to_guest(u_domctl, domctl, 1) )
    5.41 +            ret = -EFAULT;
    5.42 +        rcu_unlock_domain(d);
    5.43 +    }
    5.44 +    break;
    5.45 +
    5.46      case XEN_DOMCTL_test_assign_device:
    5.47      {
    5.48          u8 bus, devfn;
     6.1 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Wed May 28 12:22:36 2008 +0100
     6.2 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Wed May 28 14:41:23 2008 +0100
     6.3 @@ -635,6 +635,16 @@ static void amd_iommu_return_device(
     6.4      reassign_device(s, t, bus, devfn);
     6.5  }
     6.6  
     6.7 +static int amd_iommu_group_id(u8 bus, u8 devfn)
     6.8 +{
     6.9 +    int rt;
    6.10 +    int bdf = (bus << 8) | devfn;
    6.11 +    rt = ( bdf < ivrs_bdf_entries ) ?
    6.12 +        ivrs_mappings[bdf].dte_requestor_id :
    6.13 +        bdf;
    6.14 +    return rt;
    6.15 +}
    6.16 +
    6.17  struct iommu_ops amd_iommu_ops = {
    6.18      .init = amd_iommu_domain_init,
    6.19      .assign_device  = amd_iommu_assign_device,
    6.20 @@ -642,4 +652,5 @@ struct iommu_ops amd_iommu_ops = {
    6.21      .map_page = amd_iommu_map_page,
    6.22      .unmap_page = amd_iommu_unmap_page,
    6.23      .reassign_device = amd_iommu_return_device,
    6.24 +    .get_device_group_id = amd_iommu_group_id,
    6.25  };
     7.1 --- a/xen/drivers/passthrough/iommu.c	Wed May 28 12:22:36 2008 +0100
     7.2 +++ b/xen/drivers/passthrough/iommu.c	Wed May 28 14:41:23 2008 +0100
     7.3 @@ -16,6 +16,7 @@
     7.4  #include <xen/sched.h>
     7.5  #include <xen/iommu.h>
     7.6  #include <xen/paging.h>
     7.7 +#include <xen/guest_access.h>
     7.8  
     7.9  extern struct iommu_ops intel_iommu_ops;
    7.10  extern struct iommu_ops amd_iommu_ops;
    7.11 @@ -216,7 +217,41 @@ static int iommu_setup(void)
    7.12  }
    7.13  __initcall(iommu_setup);
    7.14  
    7.15 +int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
    7.16 +    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
    7.17 +{
    7.18 +    struct hvm_iommu *hd = domain_hvm_iommu(d);
    7.19 +    struct pci_dev *pdev;
    7.20 +    int group_id, sdev_id;
    7.21 +    u32 bdf;
    7.22 +    int i = 0;
    7.23 +    struct iommu_ops *ops = hd->platform_ops;
    7.24  
    7.25 +    if ( !iommu_enabled || !ops || !ops->get_device_group_id )
    7.26 +        return 0;
    7.27 +
    7.28 +    group_id = ops->get_device_group_id(bus, devfn);
    7.29 +
    7.30 +    list_for_each_entry(pdev,
    7.31 +        &(dom0->arch.hvm_domain.hvm_iommu.pdev_list), list)
    7.32 +    {
    7.33 +        if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
    7.34 +            continue;
    7.35 +
    7.36 +        sdev_id = ops->get_device_group_id(pdev->bus, pdev->devfn);
    7.37 +        if ( (sdev_id == group_id) && (i < max_sdevs) )
    7.38 +        {
    7.39 +            bdf = 0;
    7.40 +            bdf |= (pdev->bus & 0xff) << 16;
    7.41 +            bdf |= (pdev->devfn & 0xff) << 8;
    7.42 +            if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
    7.43 +                return -1;
    7.44 +            i++;
    7.45 +        }
    7.46 +    }
    7.47 +
    7.48 +    return i;
    7.49 +}
    7.50  /*
    7.51   * Local variables:
    7.52   * mode: C
     8.1 --- a/xen/drivers/passthrough/vtd/iommu.c	Wed May 28 12:22:36 2008 +0100
     8.2 +++ b/xen/drivers/passthrough/vtd/iommu.c	Wed May 28 14:41:23 2008 +0100
     8.3 @@ -1955,6 +1955,7 @@ struct iommu_ops intel_iommu_ops = {
     8.4      .map_page = intel_iommu_map_page,
     8.5      .unmap_page = intel_iommu_unmap_page,
     8.6      .reassign_device = reassign_device_ownership,
     8.7 +    .get_device_group_id = NULL,
     8.8  };
     8.9  
    8.10  /*
     9.1 --- a/xen/include/public/domctl.h	Wed May 28 12:22:36 2008 +0100
     9.2 +++ b/xen/include/public/domctl.h	Wed May 28 14:41:23 2008 +0100
     9.3 @@ -448,6 +448,16 @@ struct xen_domctl_assign_device {
     9.4  typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
     9.5  DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);
     9.6  
     9.7 +/* Retrieve sibling devices infomation of machine_bdf */
     9.8 +#define XEN_DOMCTL_get_device_group 50
     9.9 +struct xen_domctl_get_device_group {
    9.10 +    uint32_t  machine_bdf;      /* IN */
    9.11 +    uint32_t  max_sdevs;        /* IN */
    9.12 +    uint32_t  num_sdevs;        /* OUT */
    9.13 +    XEN_GUEST_HANDLE_64(uint32)  sdev_array;   /* OUT */
    9.14 +};
    9.15 +typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t;
    9.16 +DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t);
    9.17  
    9.18  /* Pass-through interrupts: bind real irq -> hvm devfn. */
    9.19  #define XEN_DOMCTL_bind_pt_irq       38
    9.20 @@ -619,6 +629,7 @@ struct xen_domctl {
    9.21          struct xen_domctl_hvmcontext        hvmcontext;
    9.22          struct xen_domctl_address_size      address_size;
    9.23          struct xen_domctl_sendtrigger       sendtrigger;
    9.24 +        struct xen_domctl_get_device_group  get_device_group;
    9.25          struct xen_domctl_assign_device     assign_device;
    9.26          struct xen_domctl_bind_pt_irq       bind_pt_irq;
    9.27          struct xen_domctl_memory_mapping    memory_mapping;
    10.1 --- a/xen/include/xen/iommu.h	Wed May 28 12:22:36 2008 +0100
    10.2 +++ b/xen/include/xen/iommu.h	Wed May 28 14:41:23 2008 +0100
    10.3 @@ -61,6 +61,8 @@ void iommu_domain_destroy(struct domain 
    10.4  int device_assigned(u8 bus, u8 devfn);
    10.5  int assign_device(struct domain *d, u8 bus, u8 devfn);
    10.6  void deassign_device(struct domain *d, u8 bus, u8 devfn);
    10.7 +int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
    10.8 +    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
    10.9  void reassign_device_ownership(struct domain *source,
   10.10                                 struct domain *target,
   10.11                                 u8 bus, u8 devfn);
   10.12 @@ -98,6 +100,7 @@ struct iommu_ops {
   10.13      int (*unmap_page)(struct domain *d, unsigned long gfn);
   10.14      void (*reassign_device)(struct domain *s, struct domain *t,
   10.15                              u8 bus, u8 devfn);
   10.16 +    int (*get_device_group_id)(u8 bus, u8 devfn);
   10.17  };
   10.18  
   10.19  #endif /* _IOMMU_H_ */