]> xenbits.xensource.com Git - libvirt.git/commitdiff
qemu_domain.c: NUMA CPUs auto-fill for incomplete topologies
authorDaniel Henrique Barboza <danielhb413@gmail.com>
Wed, 10 Jun 2020 18:35:51 +0000 (15:35 -0300)
committerMichal Privoznik <mprivozn@redhat.com>
Thu, 18 Jun 2020 10:31:54 +0000 (12:31 +0200)
Libvirt allows the user to define an incomplete NUMA topology, where
the sum of all CPUs in each cell is less than the total of VCPUs.
What ends up happening is that QEMU allocates the non-enumerated CPUs
in the first NUMA node. This behavior is being flagged as 'to be
deprecated' at least since QEMU commit ec78f8114bc4 ("numa: use
possible_cpus for not mapped CPUs check").

In [1], Maxiwell suggested that we forbid the user to define such
topologies. In his review [2], Peter Krempa pointed out that we can't
break existing guests, and suggested that Libvirt should emulate the
QEMU behavior of putting the remaining vCPUs in the first NUMA node
in these cases.

This patch implements Peter Krempa's suggestion. Since we're going
to most likely end up with disjointed NUMA configuration in node 0
after the auto-fill, we're making auto-fill dependent on QEMU_CAPS_NUMA.

A following patch will update the documentation not just to inform
about the auto-fill mechanic with incomplete NUMA topologies, but also
to discourage the user to create such topologies in the future. This
approach also makes Libvirt independent of whether QEMU changes
its current behavior since we're either auto-filling the CPUs in
node 0 or the user (hopefully) is aware that incomplete topologies,
although supported in Libvirt, are to be avoided.

[1] https://www.redhat.com/archives/libvir-list/2019-June/msg00224.html
[2] https://www.redhat.com/archives/libvir-list/2019-June/msg00263.html

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
src/qemu/qemu_domain.c
src/qemu/qemu_domain.h
src/qemu/qemu_driver.c

index 33ce0ad9920a18262eb073775d7ec2c1dfad04c1..72874ee4fd23fc09d1f75d7098ac811ef0028fef 100644 (file)
@@ -4963,6 +4963,50 @@ qemuDomainDefTsegPostParse(virDomainDefPtr def,
 }
 
 
+/**
+ * qemuDomainDefNumaCPUsRectify:
+ * @numa: pointer to numa definition
+ * @maxCpus: number of CPUs this numa is supposed to have
+ *
+ * This function emulates the (to be deprecated) behavior of filling
+ * up in node0 with the remaining CPUs, in case of an incomplete NUMA
+ * setup, up to getVcpusMax.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int
+qemuDomainDefNumaCPUsRectify(virDomainDefPtr def, virQEMUCapsPtr qemuCaps)
+{
+    unsigned int vcpusMax, numacpus;
+
+    /* QEMU_CAPS_NUMA tells us if QEMU is able to handle disjointed
+     * NUMA CPU ranges. The filling process will create a disjointed
+     * setup in node0 most of the time. Do not proceed if QEMU
+     * can't handle it.*/
+    if (virDomainNumaGetNodeCount(def->numa) == 0 ||
+        !virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA))
+        return 0;
+
+    vcpusMax = virDomainDefGetVcpusMax(def);
+    numacpus = virDomainNumaGetCPUCountTotal(def->numa);
+
+    if (numacpus < vcpusMax) {
+        if (virDomainNumaFillCPUsInNode(def->numa, 0, vcpusMax) < 0)
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int
+qemuDomainDefNumaCPUsPostParse(virDomainDefPtr def,
+                               virQEMUCapsPtr qemuCaps)
+{
+    return qemuDomainDefNumaCPUsRectify(def, qemuCaps);
+}
+
+
 static int
 qemuDomainDefPostParseBasic(virDomainDefPtr def,
                             void *opaque G_GNUC_UNUSED)
@@ -5049,6 +5093,9 @@ qemuDomainDefPostParse(virDomainDefPtr def,
     if (qemuDomainDefTsegPostParse(def, qemuCaps) < 0)
         return -1;
 
+    if (qemuDomainDefNumaCPUsPostParse(def, qemuCaps) < 0)
+        return -1;
+
     return 0;
 }
 
index 41d3f1561dd99f74b33b45b1222fefebacc506b2..e78a2b935ddac9a8053972ce5ef32780c8dd0b38 100644 (file)
@@ -1297,3 +1297,7 @@ qemuDomainInitializePflashStorageSource(virDomainObjPtr vm);
 bool
 qemuDomainDiskBlockJobIsSupported(virDomainObjPtr vm,
                                   virDomainDiskDefPtr disk);
+
+int
+qemuDomainDefNumaCPUsRectify(virDomainDefPtr def,
+                             virQEMUCapsPtr qemuCaps);
index e482d08f3ab4d7330d91020c72479a4a554d1ab5..0424f0785f9eb0e5c722f64ca7a7bc9dc3a96b56 100644 (file)
@@ -4994,11 +4994,13 @@ qemuDomainSetVcpusAgent(virDomainObjPtr vm,
 
 static int
 qemuDomainSetVcpusMax(virQEMUDriverPtr driver,
+                      virDomainObjPtr vm,
                       virDomainDefPtr def,
                       virDomainDefPtr persistentDef,
                       unsigned int nvcpus)
 {
     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
+    qemuDomainObjPrivatePtr priv = vm->privateData;
     unsigned int topologycpus;
 
     if (def) {
@@ -5029,6 +5031,9 @@ qemuDomainSetVcpusMax(virQEMUDriverPtr driver,
     if (virDomainDefSetVcpusMax(persistentDef, nvcpus, driver->xmlopt) < 0)
         return -1;
 
+    if (qemuDomainDefNumaCPUsRectify(persistentDef, priv->qemuCaps) < 0)
+        return -1;
+
     if (virDomainDefSave(persistentDef, driver->xmlopt, cfg->configDir) < 0)
         return -1;
 
@@ -5076,7 +5081,7 @@ qemuDomainSetVcpusFlags(virDomainPtr dom,
     if (useAgent)
         ret = qemuDomainSetVcpusAgent(vm, nvcpus);
     else if (flags & VIR_DOMAIN_VCPU_MAXIMUM)
-        ret = qemuDomainSetVcpusMax(driver, def, persistentDef, nvcpus);
+        ret = qemuDomainSetVcpusMax(driver, vm, def, persistentDef, nvcpus);
     else
         ret = qemuDomainSetVcpusInternal(driver, vm, def, persistentDef,
                                          nvcpus, hotpluggable);