]> xenbits.xensource.com Git - libvirt.git/commitdiff
nodedev: Export NUMA node locality for PCI devices
authorMichal Privoznik <mprivozn@redhat.com>
Wed, 7 May 2014 16:07:12 +0000 (18:07 +0200)
committerMichal Privoznik <mprivozn@redhat.com>
Fri, 6 Jun 2014 13:10:57 +0000 (15:10 +0200)
A PCI device can be associated with a specific NUMA node. Later, when
a guest is pinned to one NUMA node the PCI device can be assigned on
different NUMA node. This makes DMA transfers travel across nodes and
thus results in suboptimal performance. We should expose the NUMA node
locality for PCI devices so management applications can make better
decisions.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
docs/formatnode.html.in
docs/schemas/nodedev.rng
src/conf/node_device_conf.c
src/conf/node_device_conf.h
src/node_device/node_device_udev.c
tests/nodedevschemadata/pci_1002_71c4.xml
tests/nodedevschemadata/pci_8086_10c9_sriov_pf.xml

index b424c96fa6111d253d158c518b08e689a37a2d6d..76bf8af1bf158f0bd9afb76a33c17e08402a259d 100644 (file)
                 have a list of <code>address</code> subelements, one
                 for each VF on this PF.
               </dd>
+              <dt><code>numa</code></dt>
+              <dd>
+                This optional element contains information on the PCI device
+                with respect to NUMA. For example, the optional
+                <code>node</code> attribute tells which NUMA node is the PCI
+                device associated with.
+              </dd>
             </dl>
           </dd>
           <dt><code>usb_device</code></dt>
index 81ab4d4856d67afaeff49513cf0cd8da0b0b0967..02d41063d5d5f9f607f6ae7cddee4bacd4f6f097 100644 (file)
       </element>
     </optional>
 
+    <optional>
+      <element name='numa'>
+        <optional>
+          <attribute name='node'>
+            <data type='int'/>
+          </attribute>
+        </optional>
+      </element>
+    </optional>
+
   </define>
 
   <define name='capusbdev'>
index e65b5e4578755fefb6e4884f9005bf8ec3fa8572..99fa448e8b820faa5b7d62e30679e762e886515a 100644 (file)
@@ -346,6 +346,9 @@ char *virNodeDeviceDefFormat(const virNodeDeviceDef *def)
                 virBufferAdjustIndent(&buf, -2);
                 virBufferAddLit(&buf, "</iommuGroup>\n");
             }
+            if (data->pci_dev.numa_node >= 0)
+                virBufferAsprintf(&buf, "<numa node='%d'/>\n",
+                                  data->pci_dev.numa_node);
             break;
         case VIR_NODE_DEV_CAP_USB_DEV:
             virBufferAsprintf(&buf, "<bus>%d</bus>\n", data->usb_dev.bus);
@@ -520,6 +523,41 @@ char *virNodeDeviceDefFormat(const virNodeDeviceDef *def)
     return NULL;
 }
 
+/**
+ * virNodeDevCapsDefParseIntOptional:
+ * @xpath:  XPath to evaluate
+ * @ctxt:   Context
+ * @value:  Where to store parsed value
+ * @def:    Node device which is parsed
+ * @invalid_error_fmt:  error message to print on invalid format
+ *
+ * Returns: -1 on error (invalid int format under @xpath)
+ *           0 if @xpath was not found (@value is untouched)
+ *           1 on success
+ */
+static int
+virNodeDevCapsDefParseIntOptional(const char *xpath,
+                                  xmlXPathContextPtr ctxt,
+                                  int *value,
+                                  virNodeDeviceDefPtr def,
+                                  const char *invalid_error_fmt)
+{
+    int ret;
+    int val;
+
+    ret = virXPathInt(xpath, ctxt, &val);
+    if (ret < -1) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       invalid_error_fmt,
+                       def->name);
+        return -1;
+    } else if (ret == -1) {
+        return 0;
+    }
+    *value = val;
+    return 1;
+}
+
 static int
 virNodeDevCapsDefParseULong(const char *xpath,
                             xmlXPathContextPtr ctxt,
@@ -1101,6 +1139,12 @@ virNodeDevCapPCIDevParseXML(xmlXPathContextPtr ctxt,
             goto out;
         }
     }
+
+    if (virNodeDevCapsDefParseIntOptional("number(./numa[1]/@node)", ctxt,
+                                          &data->pci_dev.numa_node, def,
+                                          _("invalid NUMA node ID supplied for '%s'")) < 0)
+        goto out;
+
     ret = 0;
  out:
     ctxt->node = orignode;
index 50e68059a60dae8cbacc091e7547ee19b9f34db6..50ce4b3f705f714cb0680750d54fa0db40a934a8 100644 (file)
@@ -115,6 +115,7 @@ struct _virNodeDevCapsDef {
             virPCIDeviceAddressPtr *iommuGroupDevices;
             size_t nIommuGroupDevices;
             unsigned int iommuGroupNumber;
+            int numa_node;
         } pci_dev;
         struct {
             unsigned int bus;
index 9a951d9bd02155dceb6e1c6551f50ad2e9109578..91fc16f301eac06e36a10479767ed651d1680146 100644 (file)
@@ -493,6 +493,18 @@ static int udevProcessPCI(struct udev_device *device,
         goto out;
     }
 
+    rc = udevGetIntSysfsAttr(device,
+                            "numa_node",
+                            &data->pci_dev.numa_node,
+                            10);
+    if (rc == PROPERTY_ERROR) {
+        goto out;
+    } else if (rc == PROPERTY_MISSING) {
+        /* The default value is -1, because it can't be 0
+         * as zero is valid node number. */
+        data->pci_dev.numa_node = -1;
+    }
+
     if (!virPCIGetPhysicalFunction(syspath, &data->pci_dev.physical_function))
         data->pci_dev.flags |= VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION;
 
index 6de09c1c6edd225d0e46635c56cfc87f4939d814..6d5d85bc3cbb16b6ae8d70abe8c0feb4a39e02ef 100644 (file)
@@ -8,5 +8,6 @@
     <function>0</function>
     <product id='0x71c4'>M56GL [Mobility FireGL V5200]</product>
     <vendor id='0x1002'>ATI Technologies Inc</vendor>
+    <numa node='1'/>
   </capability>
 </device>
index eff89328ad06db6f6a7a0fc19c9a856217239079..6e1dc868a6501a1a49df9609dda4111e10a0c99a 100644 (file)
@@ -12,5 +12,6 @@
       <address domain='0x0000' bus='0x02' slot='0x00' function='0x0'/>
       <address domain='0x0000' bus='0x02' slot='0x00' function='0x1'/>
     </iommuGroup>
+    <numa node='0'/>
   </capability>
 </device>