From 2049ef99425db33f1e66fa8e184d153231096d83 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 22 Jul 2013 15:21:15 +0100 Subject: [PATCH] Create + setup cgroups atomically for QEMU process Currently the QEMU driver creates the VM's cgroup prior to forking, and then uses a virCommand hook to move the child into the cgroup. This won't work with systemd whose APIs do the creation of cgroups + attachment of processes atomically. Fortunately we have a handshake taking place between the QEMU driver and the child process prior to QEMU being exec()d, which was introduced to allow setup of disk locking. By good fortune this synchronization point can be used to enable the QEMU driver to do atomic setup of cgroups removing the use of the hook script. Signed-off-by: Daniel P. Berrange --- src/qemu/qemu_cgroup.c | 12 +++++++++--- src/qemu/qemu_process.c | 39 +++++++++++++++++---------------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 61894900b7..11c9d29f50 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -683,6 +683,9 @@ qemuInitCgroup(virQEMUDriverPtr driver, &priv->cgroup) < 0) goto cleanup; + if (virCgroupAddTask(priv->cgroup, vm->pid) < 0) + goto cleanup; + done: ret = 0; cleanup: @@ -738,6 +741,12 @@ qemuSetupCgroup(virQEMUDriverPtr driver, virCapsPtr caps = NULL; int ret = -1; + if (!vm->pid) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot setup cgroups until process is started")); + return -1; + } + if (qemuInitCgroup(driver, vm) < 0) return -1; @@ -1009,8 +1018,5 @@ qemuAddToCgroup(virDomainObjPtr vm) if (priv->cgroup == NULL) return 0; /* Not supported, so claim success */ - if (virCgroupAddTask(priv->cgroup, getpid()) < 0) - return -1; - return 0; } diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index c5f281a866..e8e459e23d 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -1929,6 +1929,12 @@ qemuProcessInitCpuAffinity(virQEMUDriverPtr driver, virBitmapPtr cpumap = NULL; virBitmapPtr cpumapToSet = NULL; + if (!vm->pid) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot setup CPU affinity until process is started")); + return -1; + } + if (!(cpumap = qemuPrepareCpumap(driver, nodemask))) return -1; @@ -1949,11 +1955,7 @@ qemuProcessInitCpuAffinity(virQEMUDriverPtr driver, } } - /* We are pressuming we are running between fork/exec of QEMU - * so use '0' to indicate our own process ID. No threads are - * running at this point - */ - if (virProcessSetAffinity(0 /* Self */, cpumapToSet) < 0) + if (virProcessSetAffinity(vm->pid, cpumapToSet) < 0) goto cleanup; ret = 0; @@ -2562,19 +2564,6 @@ static int qemuProcessHook(void *data) if (virSecurityManagerClearSocketLabel(h->driver->securityManager, h->vm->def) < 0) goto cleanup; - /* This must take place before exec(), so that all QEMU - * memory allocation is on the correct NUMA node - */ - VIR_DEBUG("Moving process to cgroup"); - if (qemuAddToCgroup(h->vm) < 0) - goto cleanup; - - /* This must be done after cgroup placement to avoid resetting CPU - * affinity */ - if (!h->vm->def->cputune.emulatorpin && - qemuProcessInitCpuAffinity(h->driver, h->vm, h->nodemask) < 0) - goto cleanup; - if (virNumaSetupMemoryPolicy(h->vm->def->numatune, h->nodemask) < 0) goto cleanup; @@ -3671,10 +3660,6 @@ int qemuProcessStart(virConnectPtr conn, goto cleanup; } - VIR_DEBUG("Setting up domain cgroup (if required)"); - if (qemuSetupCgroup(driver, vm, nodemask) < 0) - goto cleanup; - if (VIR_ALLOC(priv->monConfig) < 0) goto cleanup; @@ -3844,6 +3829,16 @@ int qemuProcessStart(virConnectPtr conn, goto cleanup; } + VIR_DEBUG("Setting up domain cgroup (if required)"); + if (qemuSetupCgroup(driver, vm, nodemask) < 0) + goto cleanup; + + /* This must be done after cgroup placement to avoid resetting CPU + * affinity */ + if (!vm->def->cputune.emulatorpin && + qemuProcessInitCpuAffinity(driver, vm, nodemask) < 0) + goto cleanup; + VIR_DEBUG("Setting domain security labels"); if (virSecurityManagerSetAllLabel(driver->securityManager, vm->def, stdin_path) < 0) -- 2.39.5