]> xenbits.xensource.com Git - libvirt.git/commitdiff
qemu: add vfio devices to cgroup ACL when appropriate
authorLaine Stump <laine@laine.org>
Mon, 29 Apr 2013 17:15:26 +0000 (13:15 -0400)
committerLaine Stump <laine@laine.org>
Tue, 30 Apr 2013 01:52:28 +0000 (21:52 -0400)
PCIO device assignment using VFIO requires read/write access by the
qemu process to /dev/vfio/vfio, and /dev/vfio/nn, where "nn" is the
VFIO group number that the assigned device belongs to (and can be
found with the function virPCIDeviceGetVFIOGroupDev)

/dev/vfio/vfio can be accessible to any guest without danger
(according to vfio developers), so it is added to the static ACL.

The group device must be dynamically added to the cgroup ACL for each
vfio hostdev in two places:

1) for any devices in the persistent config when the domain is started
   (done during qemuSetupCgroup())

2) at device attach time for any hotplug devices (done in
   qemuDomainAttachHostDevice)

The group device must be removed from the ACL when a device it
"hot-unplugged" (in qemuDomainDetachHostDevice())

Note that USB devices are already doing their own cgroup setup and
teardown in the hostdev-usb specific function. I chose to make the new
functions generic and call them in a common location though. We can
then move the USB-specific code (which is duplicated in two locations)
to this single location. I'll be posting a followup patch to do that.

src/qemu/qemu.conf
src/qemu/qemu_cgroup.c
src/qemu/qemu_cgroup.h
src/qemu/qemu_hotplug.c
src/qemu/test_libvirtd_qemu.aug.in

index 87bdf70d8dd07688938b77806e5ed20207b1b679..0f0a24c20ef4eaee1a6eead466370edc0b6b892d 100644 (file)
 #    "/dev/null", "/dev/full", "/dev/zero",
 #    "/dev/random", "/dev/urandom",
 #    "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
-#    "/dev/rtc","/dev/hpet"
+#    "/dev/rtc","/dev/hpet", "/dev/vfio/vfio"
 #]
 
 
index 891984a5b15299a2df92522225818f64f8db9b2d..92c53d9e1c5c861b77bece7387d655229002c53b 100644 (file)
@@ -39,7 +39,7 @@ static const char *const defaultDeviceACL[] = {
     "/dev/null", "/dev/full", "/dev/zero",
     "/dev/random", "/dev/urandom",
     "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
-    "/dev/rtc", "/dev/hpet",
+    "/dev/rtc", "/dev/hpet", "/dev/vfio/vfio",
     NULL,
 };
 #define DEVICE_PTY_MAJOR 136
@@ -214,6 +214,131 @@ int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev ATTRIBUTE_UNUSED,
 }
 
 
+int
+qemuSetupHostdevCGroup(virDomainObjPtr vm,
+                       virDomainHostdevDefPtr dev)
+{
+    int ret = -1;
+    qemuDomainObjPrivatePtr priv = vm->privateData;
+    virPCIDevicePtr pci = NULL;
+    char *path = NULL;
+
+    /* currently this only does something for PCI devices using vfio
+     * for device assignment, but it is called for *all* hostdev
+     * devices.
+     */
+
+    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
+        return 0;
+
+    if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) {
+
+        switch (dev->source.subsys.type) {
+        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI:
+            if (dev->source.subsys.u.pci.backend
+                != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
+                int rc;
+
+                pci = virPCIDeviceNew(dev->source.subsys.u.pci.addr.domain,
+                                      dev->source.subsys.u.pci.addr.bus,
+                                      dev->source.subsys.u.pci.addr.slot,
+                                      dev->source.subsys.u.pci.addr.function);
+                if (!pci)
+                    goto cleanup;
+
+                if (!(path = virPCIDeviceGetVFIOGroupDev(pci)))
+                    goto cleanup;
+
+                VIR_DEBUG("Cgroup allow %s for PCI device assignment", path);
+                rc = virCgroupAllowDevicePath(priv->cgroup, path,
+                                              VIR_CGROUP_DEVICE_RW);
+                virDomainAuditCgroupPath(vm, priv->cgroup,
+                                         "allow", path, "rw", rc);
+                if (rc < 0) {
+                    virReportSystemError(-rc,
+                                         _("Unable to allow access "
+                                           "for device path %s"),
+                                         path);
+                    goto cleanup;
+                }
+            }
+            break;
+        default:
+            break;
+        }
+    }
+
+    ret = 0;
+cleanup:
+    virPCIDeviceFree(pci);
+    VIR_FREE(path);
+    return ret;
+}
+
+
+
+int
+qemuTeardownHostdevCgroup(virDomainObjPtr vm,
+                       virDomainHostdevDefPtr dev)
+{
+    int ret = -1;
+    qemuDomainObjPrivatePtr priv = vm->privateData;
+    virPCIDevicePtr pci = NULL;
+    char *path = NULL;
+
+    /* currently this only does something for PCI devices using vfio
+     * for device assignment, but it is called for *all* hostdev
+     * devices.
+     */
+
+    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
+        return 0;
+
+    if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) {
+
+        switch (dev->source.subsys.type) {
+        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI:
+            if (dev->source.subsys.u.pci.backend
+                != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
+                int rc;
+
+                pci = virPCIDeviceNew(dev->source.subsys.u.pci.addr.domain,
+                                      dev->source.subsys.u.pci.addr.bus,
+                                      dev->source.subsys.u.pci.addr.slot,
+                                      dev->source.subsys.u.pci.addr.function);
+                if (!pci)
+                    goto cleanup;
+
+                if (!(path = virPCIDeviceGetVFIOGroupDev(pci)))
+                    goto cleanup;
+
+                VIR_DEBUG("Cgroup deny %s for PCI device assignment", path);
+                rc = virCgroupDenyDevicePath(priv->cgroup, path,
+                                             VIR_CGROUP_DEVICE_RWM);
+                virDomainAuditCgroupPath(vm, priv->cgroup,
+                                         "deny", path, "rwm", rc);
+                if (rc < 0) {
+                    virReportSystemError(-rc,
+                                         _("Unable to deny access "
+                                           "for device path %s"),
+                                         path);
+                    goto cleanup;
+                }
+            }
+            break;
+        default:
+            break;
+        }
+    }
+
+    ret = 0;
+cleanup:
+    virPCIDeviceFree(pci);
+    VIR_FREE(path);
+    return ret;
+}
+
+
 int qemuInitCgroup(virQEMUDriverPtr driver,
                    virDomainObjPtr vm,
                    bool startup)
@@ -423,6 +548,12 @@ int qemuSetupCgroup(virQEMUDriverPtr driver,
             virDomainHostdevDefPtr hostdev = vm->def->hostdevs[i];
             virUSBDevicePtr usb;
 
+            if (qemuSetupHostdevCGroup(vm, hostdev) < 0)
+                goto cleanup;
+
+            /* NB: the code below here should be moved into
+             * qemuSetupHostdevCGroup()
+             */
             if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS)
                 continue;
             if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB)
index e63f443ed968f15aaf171fa0e92dc6354f82c2ae..f499cb6873966a0f9646d3c90a587d7251f558bf 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * qemu_cgroup.h: QEMU cgroup management
  *
- * Copyright (C) 2006-2007, 2009-2012 Red Hat, Inc.
+ * Copyright (C) 2006-2007, 2009-2013 Red Hat, Inc.
  * Copyright (C) 2006 Daniel P. Berrange
  *
  * This library is free software; you can redistribute it and/or
@@ -36,6 +36,12 @@ int qemuTeardownDiskCgroup(virDomainObjPtr vm,
 int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev,
                                  const char *path,
                                  void *opaque);
+int qemuSetupHostdevCGroup(virDomainObjPtr vm,
+                           virDomainHostdevDefPtr dev)
+   ATTRIBUTE_RETURN_CHECK;
+int qemuTeardownHostdevCgroup(virDomainObjPtr vm,
+                              virDomainHostdevDefPtr dev)
+   ATTRIBUTE_RETURN_CHECK;
 int qemuInitCgroup(virQEMUDriverPtr driver,
                    virDomainObjPtr vm,
                    bool startup);
index f5fa1c4cf0658ff23d436e1e911266fb39276a75..6beb2d6ee1ba754ec5ac2b6ffcd7e594ff9a085f 100644 (file)
@@ -1225,9 +1225,12 @@ int qemuDomainAttachHostDevice(virQEMUDriverPtr driver,
         virUSBDeviceListSteal(list, usb);
     }
 
+    if (qemuSetupHostdevCGroup(vm, hostdev) < 0)
+       goto cleanup;
+
     if (virSecurityManagerSetHostdevLabel(driver->securityManager,
                                           vm->def, hostdev, NULL) < 0)
-        goto cleanup;
+        goto teardown_cgroup;
 
     switch (hostdev->source.subsys.type) {
     case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI:
@@ -1257,6 +1260,10 @@ error:
                                               vm->def, hostdev, NULL) < 0)
         VIR_WARN("Unable to restore host device labelling on hotplug fail");
 
+teardown_cgroup:
+    if (qemuTeardownHostdevCgroup(vm, hostdev) < 0)
+        VIR_WARN("Unable to remove host device cgroup ACL on hotplug fail");
+
 cleanup:
     virObjectUnref(list);
     if (usb)
@@ -2499,6 +2506,9 @@ int qemuDomainDetachThisHostDevice(virQEMUDriverPtr driver,
     }
 
     if (!ret) {
+        if (qemuTeardownHostdevCgroup(vm, detach) < 0)
+            VIR_WARN("Failed to remove host device cgroup ACL");
+
         if (virSecurityManagerRestoreHostdevLabel(driver->securityManager,
                                                   vm->def, detach, NULL) < 0) {
             VIR_WARN("Failed to restore host device labelling");
index 0aec9977debfd544c9e349d70baff717867e5e12..26ca0688d81ad9ff2b1149637b82b75b727f398d 100644 (file)
@@ -42,6 +42,7 @@ module Test_libvirtd_qemu =
     { "8" = "/dev/kqemu" }
     { "9" = "/dev/rtc" }
     { "10" = "/dev/hpet" }
+    { "11" = "/dev/vfio/vfio" }
 }
 { "save_image_format" = "raw" }
 { "dump_image_format" = "raw" }