]> xenbits.xensource.com Git - libvirt.git/commitdiff
qemu: Add support for /dev/userfaultfd
authorJiri Denemark <jdenemar@redhat.com>
Thu, 8 Feb 2024 14:56:38 +0000 (15:56 +0100)
committerJiri Denemark <jdenemar@redhat.com>
Tue, 13 Feb 2024 16:44:26 +0000 (17:44 +0100)
/dev/userfaultfd device is preferred over userfaultfd syscall for
post-copy migrations. Unless qemu driver is configured to disable mount
namespace or to forbid access to /dev/userfaultfd in cgroup_device_acl,
we will copy it to the limited /dev filesystem QEMU will have access to
and label it appropriately. So in the default configuration post-copy
migration will be allowed even without enabling
vm.unprivileged_userfaultfd sysctl.

Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
Reviewed-by: Ján Tomko <jtomko@redhat.com>
src/qemu/qemu.conf.in
src/qemu/qemu_cgroup.c
src/qemu/qemu_process.c
src/qemu/qemu_security.c
src/qemu/qemu_security.h
src/qemu/test_libvirtd_qemu.aug.in

index 34025a02ef23893e9fbc5cfb11abcdb6dc63bff1..f406df8749a2b30e1a1d7c8cb752e994b5b360d0 100644 (file)
 #cgroup_device_acl = [
 #    "/dev/null", "/dev/full", "/dev/zero",
 #    "/dev/random", "/dev/urandom",
-#    "/dev/ptmx", "/dev/kvm"
+#    "/dev/ptmx", "/dev/kvm",
+#    "/dev/userfaultfd"
 #]
 #
 # RDMA migration requires the following extra files to be added to the list:
index 47402b37507b6728501391c784b6b5a360f0a55b..5a5ba763a014fbdc8b321925a17c2ac311e5e30a 100644 (file)
@@ -41,6 +41,7 @@ const char *const defaultDeviceACL[] = {
     "/dev/null", "/dev/full", "/dev/zero",
     "/dev/random", "/dev/urandom",
     "/dev/ptmx", "/dev/kvm",
+    "/dev/userfaultfd",
     NULL,
 };
 #define DEVICE_PTY_MAJOR 136
index 0a6c18a67133c3a16f88c65c04347a2942d45815..6e51d6586be5f81f38a966cb4e04d1b3a58dd11d 100644 (file)
@@ -2882,6 +2882,40 @@ qemuProcessStartManagedPRDaemon(virDomainObj *vm)
 }
 
 
+static int
+qemuProcessAllowPostCopyMigration(virDomainObj *vm)
+{
+    qemuDomainObjPrivate *priv = vm->privateData;
+    virQEMUDriver *driver = priv->driver;
+    g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
+    const char *const *devices = (const char *const *) cfg->cgroupDeviceACL;
+    const char *uffd = "/dev/userfaultfd";
+    int rc;
+
+    if (!virFileExists(uffd)) {
+        VIR_DEBUG("%s is not supported by the host", uffd);
+        return 0;
+    }
+
+    if (!devices)
+        devices = defaultDeviceACL;
+
+    if (!g_strv_contains(devices, uffd)) {
+        VIR_DEBUG("%s is not allowed by device ACL", uffd);
+        return 0;
+    }
+
+    VIR_DEBUG("Labeling %s in mount namespace", uffd);
+    if ((rc = qemuSecurityDomainSetMountNSPathLabel(driver, vm, uffd)) < 0)
+        return -1;
+
+    if (rc == 1)
+        VIR_DEBUG("Mount namespace is not enabled, leaving %s as is", uffd);
+
+    return 0;
+}
+
+
 static int
 qemuProcessInitPasswords(virQEMUDriver *driver,
                          virDomainObj *vm,
@@ -7802,6 +7836,10 @@ qemuProcessLaunch(virConnectPtr conn,
         qemuProcessStartManagedPRDaemon(vm) < 0)
         goto cleanup;
 
+    VIR_DEBUG("Setting up permissions to allow post-copy migration");
+    if (qemuProcessAllowPostCopyMigration(vm) < 0)
+        goto cleanup;
+
     VIR_DEBUG("Setting domain security labels");
     if (qemuSecuritySetAllLabel(driver,
                                 vm,
index 8bcef14d089d11e686e2e26b0eb2082f484100b8..4aaa863ae9dbaab3063cf3847fff3429df6c60fc 100644 (file)
@@ -615,6 +615,51 @@ qemuSecurityDomainRestorePathLabel(virQEMUDriver *driver,
 }
 
 
+/**
+ * qemuSecurityDomainSetMountNSPathLabel:
+ *
+ * Label given path in mount namespace. If mount namespace is not enabled,
+ * nothing is labeled at all.
+ *
+ * Because the label is only applied in mount namespace, there's no need to
+ * restore it.
+ *
+ * Returns 0 on success,
+ *         1 when mount namespace is not enabled,
+ *        -1 on error.
+ */
+int
+qemuSecurityDomainSetMountNSPathLabel(virQEMUDriver *driver,
+                                      virDomainObj *vm,
+                                      const char *path)
+{
+    int ret = -1;
+
+    if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) {
+        VIR_DEBUG("Not labeling '%s': mount namespace disabled for domain '%s'",
+                  path, vm->def->name);
+        return 1;
+    }
+
+    if (virSecurityManagerTransactionStart(driver->securityManager) < 0)
+        goto cleanup;
+
+    if (virSecurityManagerDomainSetPathLabel(driver->securityManager,
+                                             vm->def, path, false) < 0)
+        goto cleanup;
+
+    if (virSecurityManagerTransactionCommit(driver->securityManager,
+                                            vm->pid, false) < 0)
+        goto cleanup;
+
+    ret = 0;
+
+ cleanup:
+    virSecurityManagerTransactionAbort(driver->securityManager);
+    return ret;
+}
+
+
 /**
  * qemuSecurityCommandRun:
  * @driver: the QEMU driver
index 10f11771b495933a81337fe6be3916c88c775c32..41da33debcc8a9a119c02c2f11a07afe74c4402b 100644 (file)
@@ -110,6 +110,11 @@ int qemuSecurityDomainRestorePathLabel(virQEMUDriver *driver,
                                        virDomainObj *vm,
                                        const char *path);
 
+int
+qemuSecurityDomainSetMountNSPathLabel(virQEMUDriver *driver,
+                                      virDomainObj *vm,
+                                      const char *path);
+
 int qemuSecurityCommandRun(virQEMUDriver *driver,
                            virDomainObj *vm,
                            virCommand *cmd,
index e4cfde6cc79fac10a9e610ac30a503f900320bba..b97e6de11ee7c31187e7cacb3e4d5dbb712233b1 100644 (file)
@@ -67,6 +67,7 @@ module Test_libvirtd_qemu =
     { "5" = "/dev/urandom" }
     { "6" = "/dev/ptmx" }
     { "7" = "/dev/kvm" }
+    { "8" = "/dev/userfaultfd" }
 }
 { "save_image_format" = "raw" }
 { "dump_image_format" = "raw" }