]> xenbits.xensource.com Git - libvirt.git/commitdiff
Support configuration of huge pages in guests
authorDaniel P. Berrange <berrange@redhat.com>
Tue, 25 Aug 2009 14:05:18 +0000 (15:05 +0100)
committerDaniel P. Berrange <berrange@redhat.com>
Thu, 3 Sep 2009 12:51:55 +0000 (13:51 +0100)
Add option to domain XML for

     <memoryBacking>
        <hugepages/>
     </memoryBacking>

* configure.in: Add check for mntent.h
* qemud/libvirtd_qemu.aug, qemud/test_libvirtd_qemu.aug, src/qemu.conf
  Add 'hugetlbfs_mount' config parameter
* src/qemu_conf.c, src/qemu_conf.h: Check for -mem-path flag in QEMU,
  and pass it when hugepages are requested.
  Load hugetlbfs_mount config parameter, search for mount if not given.
* src/qemu_driver.c: Free hugetlbfs_mount/path parameter in driver shutdown.
  Create directory for QEMU hugepage usage, chowning if required.
* docs/formatdomain.html.in: Document memoryBacking/hugepages elements
* docs/schemas/domain.rng: Add memoryBacking/hugepages elements to schema
* src/util.c, src/util.h, src/libvirt_private.syms: Add virFileFindMountPoint
  helper API
* tests/qemuhelptest.c: Add -mem-path constants
* tests/qemuxml2argvtest.c, tests/qemuxml2xmltest.c: Add tests for hugepage
  handling
* tests/qemuxml2argvdata/qemuxml2argv-hugepages.xml,
  tests/qemuxml2argvdata/qemuxml2argv-hugepages.args: Data files for
  hugepage tests

20 files changed:
configure.in
docs/formatdomain.html
docs/formatdomain.html.in
docs/schemas/domain.rng
qemud/libvirtd_qemu.aug
qemud/test_libvirtd_qemu.aug
src/domain_conf.c
src/domain_conf.h
src/libvirt_private.syms
src/qemu.conf
src/qemu_conf.c
src/qemu_conf.h
src/qemu_driver.c
src/util.c
src/util.h
tests/qemuhelptest.c
tests/qemuxml2argvdata/qemuxml2argv-hugepages.args [new file with mode: 0644]
tests/qemuxml2argvdata/qemuxml2argv-hugepages.xml [new file with mode: 0644]
tests/qemuxml2argvtest.c
tests/qemuxml2xmltest.c

index d28c44a2357b8471e0c7c996f52cd41fa17ec354..43b9b462e8264e2480b96800338e001166155db0 100644 (file)
@@ -83,7 +83,7 @@ dnl Availability of various not common threadsafe functions
 AC_CHECK_FUNCS([strerror_r strtok_r getmntent_r getgrnam_r getpwuid_r])
 
 dnl Availability of various common headers (non-fatal if missing).
-AC_CHECK_HEADERS([pwd.h paths.h regex.h sys/syslimits.h sys/utsname.h sys/wait.h winsock2.h sched.h termios.h sys/poll.h syslog.h])
+AC_CHECK_HEADERS([pwd.h paths.h regex.h sys/syslimits.h sys/utsname.h sys/wait.h winsock2.h sched.h termios.h sys/poll.h syslog.h mntent.h])
 
 dnl Where are the XDR functions?
 dnl If portablexdr is installed, prefer that.
index 3368ad50042bb4cca440cf8818731465bbb24593..a7154577c27abfc5e7b06734c6b6cc5bd4f89013 100644 (file)
         ...
        &lt;memory&gt;524288&lt;/memory&gt;
        &lt;currentMemory&gt;524288&lt;/currentMemory&gt;
+       &lt;memoryBacking&gt;
+         &lt;hugepages/&gt;
+        &lt;/memoryBacking&gt;
        &lt;vcpu&gt;1&lt;/vcpu&gt;
        ...</pre>
         <dl><dt><code>memory</code></dt><dd>The maximum allocation of memory for the guest at boot time.
        The units for this value are kilobytes (i.e. blocks of 1024 bytes)</dd><dt><code>currentMemory</code></dt><dd>The actual allocation of memory for the guest. This value
        be less than the maximum allocation, to allow for ballooning
        up the guests memory on the fly. If this is omitted, it defaults
-       to the same value as the <code>memory<code> element</code></code></dd><dt><code>vcpu</code></dt><dd>The content of this element defines the number of virtual
+       to the same value as the <code>memory<code> element</code></code></dd><dt><code>memoryBacking</code></dt><dd>The optional <code>memoryBacking</code> element, may have an
+       <code>hugepages</code> element set within it. This tells the
+       hypervisor that the guest should have its memory allocated using
+       hugepages instead of the normal native page size.</dd><dt><code>vcpu</code></dt><dd>The content of this element defines the number of virtual
        CPUs allocated for the guest OS.</dd></dl>
         <h3>
           <a name="elementsLifecycle" id="elementsLifecycle">Lifecycle control</a>
index 211f7edb5de25947aa9cc1b75333694647688c8f..915814d792eb1dce11944448f79c55135ab7dcd0 100644 (file)
         ...
        &lt;memory&gt;524288&lt;/memory&gt;
        &lt;currentMemory&gt;524288&lt;/currentMemory&gt;
+       &lt;memoryBacking&gt;
+         &lt;hugepages/&gt;
+        &lt;/memoryBacking&gt;
        &lt;vcpu&gt;1&lt;/vcpu&gt;
        ...</pre>
 
        be less than the maximum allocation, to allow for ballooning
        up the guests memory on the fly. If this is omitted, it defaults
        to the same value as the <code>memory<code> element</dd>
+      <dt><code>memoryBacking</code></dt>
+      <dd>The optional <code>memoryBacking</code> element, may have an
+       <code>hugepages</code> element set within it. This tells the
+       hypervisor that the guest should have its memory allocated using
+       hugepages instead of the normal native page size.</dd>
       <dt><code>vcpu</code></dt>
       <dd>The content of this element defines the number of virtual
        CPUs allocated for the guest OS.</dd>
index 4bd301a68dabb9ffb430ef87b0447ed5af524d00..e5780615a5c52f02d48eb9736a7550b7b8a0134c 100644 (file)
           <ref name="memoryKB"/>
         </element>
       </optional>
+      <optional>
+       <element name="memoryBacking">
+         <optional>
+           <element name="hugepages">
+             <empty/>
+           </element>
+         </optional>
+       </element>
+      </optional>
       <optional>
         <element name="vcpu">
           <optional>
index 2175e1412888a3e3c5743484e6598833083c9b91..f0b2a5e3c1365467458a5e3945013b0261cabb4e 100644 (file)
@@ -35,6 +35,7 @@ module Libvirtd_qemu =
                  | str_array_entry "cgroup_controllers"
                  | str_array_entry "cgroup_device_acl"
                  | str_entry "save_image_format"
+                 | str_entry "hugetlbfs_mount"
 
    (* Each enty in the config is one of the following three ... *)
    let entry = vnc_entry
index 72f2227932f95c61ab6b2ce1fdc1c795703bba1d..ac8943865107fde786004ed61e293233e5d99c1e 100644 (file)
@@ -91,6 +91,8 @@ cgroup_controllers = [ \"cpu\", \"devices\" ]
 cgroup_device_acl = [ \"/dev/null\", \"/dev/full\", \"/dev/zero\" ]
 
 save_image_format = \"gzip\"
+
+hugetlbfs_mount = \"/dev/hugepages\"
 "
 
    test Libvirtd_qemu.lns get conf =
@@ -192,3 +194,5 @@ save_image_format = \"gzip\"
 }
 { "#empty" }
 { "save_image_format" = "gzip" }
+{ "#empty" }
+{ "hugetlbfs_mount" = "/dev/hugepages" }
\ No newline at end of file
index 79225a89bb802da086411f67d4297fb136f62b07..e95fbe036407c9ac63b0bd1c61ab431b1609b372 100644 (file)
@@ -2537,6 +2537,10 @@ static virDomainDefPtr virDomainDefParseXML(virConnectPtr conn,
     if (virXPathULong(conn, "string(./currentMemory[1])", ctxt, &def->memory) < 0)
         def->memory = def->maxmem;
 
+    node = virXPathNode(conn, "./memoryBacking/hugepages", ctxt);
+    if (node)
+        def->hugepage_backed = 1;
+
     if (virXPathULong(conn, "string(./vcpu[1])", ctxt, &def->vcpus) < 0)
         def->vcpus = 1;
 
@@ -4161,7 +4165,11 @@ char *virDomainDefFormat(virConnectPtr conn,
     virBufferVSprintf(&buf, "  <memory>%lu</memory>\n", def->maxmem);
     virBufferVSprintf(&buf, "  <currentMemory>%lu</currentMemory>\n",
                       def->memory);
-
+    if (def->hugepage_backed) {
+        virBufferAddLit(&buf, "  <memoryBacking>\n");
+        virBufferAddLit(&buf, "    <hugepages/>\n");
+        virBufferAddLit(&buf, "  </memoryBacking>\n");
+    }
     for (n = 0 ; n < def->cpumasklen ; n++)
         if (def->cpumask[n] != 1)
             allones = 0;
index 53d30d24d5655f706662021ee246d9d9d2c490a1..661773e427a5df06c32487f28a3064e301c2da3d 100644 (file)
@@ -535,6 +535,7 @@ struct _virDomainDef {
 
     unsigned long memory;
     unsigned long maxmem;
+    unsigned char hugepage_backed;
     unsigned long vcpus;
     int cpumasklen;
     char *cpumask;
index ead3390e7b63e44f982fa38ac2eb497ae6e21400..eec8be64c78080e266526f02112fbc97a82ff715 100644 (file)
@@ -408,6 +408,7 @@ virGetUserDirectory;
 virGetUserName;
 virGetUserID;
 virGetGroupID;
+virFileFindMountPoint;
 
 
 # uuid.h
index 9aecc2637e5329e67771fbae52c5df99f17620a4..06babc4a3514230110c85db1a575850c552c3035 100644 (file)
 # the process of saving a domain in order to save disk space.
 #
 # save_image_format = "raw"
+
+# If provided by the host and a hugetlbfs mount point is configured,
+# a guest may request huge page backing.  When this mount point is
+# unspecified here, determination of a host mount point in /proc/mounts
+# will be attempted.  Specifying an explicit mount overrides detection
+# of the same in /proc/mounts.  Setting the mount point to "" will
+# disable guest hugepage backing.
+#
+# NB, within this mount point, guests will create memory backing files
+# in a location of  $MOUNTPOINT/libvirt/qemu
+
+# hugetlbfs_mount = "/dev/hugepages"
index 22f5edd248c87cdd55d033acd6ae721a384319bd..918ccf214301e06293edf6e531618e53875f7b7c 100644 (file)
@@ -35,6 +35,7 @@
 #include <sys/wait.h>
 #include <arpa/inet.h>
 #include <sys/utsname.h>
+#include <mntent.h>
 
 #include "c-ctype.h"
 #include "virterror_internal.h"
@@ -87,6 +88,7 @@ VIR_ENUM_IMPL(qemuVideo, VIR_DOMAIN_VIDEO_TYPE_LAST,
               NULL, /* no arg needed for xen */
               NULL /* don't support vbox */);
 
+#define PROC_MOUNT_BUF_LEN 255
 
 int qemudLoadDriverConfig(struct qemud_driver *driver,
                           const char *filename) {
@@ -106,6 +108,21 @@ int qemudLoadDriverConfig(struct qemud_driver *driver,
         return -1;
     }
 
+#ifdef HAVE_MNTENT_H
+    /* For privileged driver, try and find hugepage mount automatically.
+     * Non-privileged driver requires admin to create a dir for the
+     * user, chown it, and then let user configure it manually */
+    if (driver->privileged &&
+        !(driver->hugetlbfs_mount = virFileFindMountPoint("hugetlbfs"))) {
+        if (errno != ENOENT) {
+            virReportSystemError(NULL, errno, "%s",
+                                 _("unable to find hugetlbfs mountpoint"));
+            return -1;
+        }
+    }
+#endif
+
+
     /* Just check the file is readable before opening it, otherwise
      * libvirt emits an error.
      */
@@ -290,6 +307,17 @@ int qemudLoadDriverConfig(struct qemud_driver *driver,
         }
     }
 
+     p = virConfGetValue (conf, "hugetlbfs_mount");
+     CHECK_TYPE ("hugetlbfs_mount", VIR_CONF_STRING);
+     if (p && p->str) {
+         VIR_FREE(driver->hugetlbfs_mount);
+         if (!(driver->hugetlbfs_mount = strdup(p->str))) {
+             virReportOOMError(NULL);
+             virConfFree(conf);
+             return -1;
+         }
+     }
+
     virConfFree (conf);
     return 0;
 }
@@ -784,6 +812,8 @@ static unsigned int qemudComputeCmdFlags(const char *help,
         flags |= QEMUD_CMD_FLAG_DRIVE_BOOT;
     if (strstr(help, "-pcidevice"))
         flags |= QEMUD_CMD_FLAG_PCIDEVICE;
+    if (strstr(help, "-mem-path"))
+        flags |= QEMUD_CMD_FLAG_MEM_PATH;
 
     if (version >= 9000)
         flags |= QEMUD_CMD_FLAG_VNC_COLON;
@@ -1583,6 +1613,26 @@ int qemudBuildCommandLine(virConnectPtr conn,
         ADD_ARG_LIT("-no-kvm");
     ADD_ARG_LIT("-m");
     ADD_ARG_LIT(memory);
+    if (def->hugepage_backed) {
+        if (!driver->hugetlbfs_mount) {
+            qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                             "%s", _("hugetlbfs filesystem is not mounted"));
+            goto error;
+        }
+        if (!driver->hugepage_path) {
+            qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                             "%s", _("hugepages are disabled by administrator config"));
+            goto error;
+        }
+        if (!(qemuCmdFlags & QEMUD_CMD_FLAG_MEM_PATH)) {
+            qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                             _("hugepage backing not supported by '%s'"),
+                             def->emulator);
+            goto error;
+        }
+        ADD_ARG_LIT("-mem-path");
+        ADD_ARG_LIT(driver->hugepage_path);
+    }
     ADD_ARG_LIT("-smp");
     ADD_ARG_LIT(vcpus);
 
index a126dacfd750d4292ddf3699fa3fc42664c21754..f12694092cd66f09f87f2d6ccca344627e9ddf6e 100644 (file)
@@ -67,6 +67,7 @@ enum qemud_cmd_flags {
     QEMUD_CMD_FLAG_HOST_NET_ADD = QEMUD_CMD_FLAG_0_10, /* host_net_add monitor command */
 
     QEMUD_CMD_FLAG_PCIDEVICE     = (1 << 17), /* PCI device assignment only supported by qemu-kvm */
+    QEMUD_CMD_FLAG_MEM_PATH      = (1 << 18), /* mmap'ped guest backing supported */
 };
 
 /* Main driver state */
@@ -99,6 +100,8 @@ struct qemud_driver {
     char *vncListen;
     char *vncPassword;
     char *vncSASLdir;
+    char *hugetlbfs_mount;
+    char *hugepage_path;
 
     virCapsPtr caps;
 
index b921b71f268121bdda276e742522b93dff2cf620..099cbe9c2892e8bc03306ff70108dd6968898fb9 100644 (file)
@@ -555,6 +555,37 @@ qemudStartup(int privileged) {
         goto error;
     }
 
+    /* If hugetlbfs is present, then we need to create a sub-directory within
+     * it, since we can't assume the root mount point has permissions that
+     * will let our spawned QEMU instances use it.
+     *
+     * NB the check for '/', since user may config "" to disable hugepages
+     * even when mounted
+     */
+    if (qemu_driver->hugetlbfs_mount &&
+        qemu_driver->hugetlbfs_mount[0] == '/') {
+        char *mempath = NULL;
+        if (virAsprintf(&mempath, "%s/libvirt/qemu", qemu_driver->hugetlbfs_mount) < 0)
+            goto out_of_memory;
+
+        if ((rc = virFileMakePath(mempath)) != 0) {
+            virReportSystemError(NULL, rc,
+                                 _("unable to create hugepage path %s"), mempath);
+            VIR_FREE(mempath);
+            goto error;
+        }
+        if (qemu_driver->privileged &&
+            chown(mempath, qemu_driver->user, qemu_driver->group) < 0) {
+            virReportSystemError(NULL, errno,
+                                 _("unable to set ownership on %s to %d:%d"),
+                                 mempath, qemu_driver->user, qemu_driver->group);
+            VIR_FREE(mempath);
+            goto error;
+        }
+
+        qemu_driver->hugepage_path = mempath;
+    }
+
     /* Get all the running persistent or transient configs first */
     if (virDomainLoadAllConfigs(NULL,
                                 qemu_driver->caps,
@@ -686,6 +717,8 @@ qemudShutdown(void) {
     VIR_FREE(qemu_driver->vncPassword);
     VIR_FREE(qemu_driver->vncSASLdir);
     VIR_FREE(qemu_driver->saveImageFormat);
+    VIR_FREE(qemu_driver->hugetlbfs_mount);
+    VIR_FREE(qemu_driver->hugepage_path);
 
     /* Free domain callback list */
     virDomainEventCallbackListFree(qemu_driver->domainEventCallbacks);
index 0d4f3fa49545d05248715d2eb09779b29a715d8f..282f7d91882145ef1d32760d46f9af0634d9a33c 100644 (file)
@@ -60,7 +60,9 @@
 #if HAVE_CAPNG
 #include <cap-ng.h>
 #endif
-
+#ifdef HAVE_MNTENT_H
+#include <mntent.h>
+#endif
 
 #include "virterror_internal.h"
 #include "logging.h"
@@ -1983,3 +1985,37 @@ int virGetGroupID(virConnectPtr conn,
     return 0;
 }
 #endif
+
+
+#ifdef HAVE_MNTENT_H
+/* search /proc/mounts for mount point of *type; return pointer to
+ * malloc'ed string of the path if found, otherwise return NULL
+ * with errno set to an appropriate value.
+ */
+char *virFileFindMountPoint(const char *type)
+{
+    FILE *f;
+    struct mntent mb;
+    char mntbuf[1024];
+    char *ret = NULL;
+
+    f = setmntent("/proc/mounts", "r");
+    if (!f)
+        return NULL;
+
+    while (getmntent_r(f, &mb, mntbuf, sizeof(mntbuf))) {
+        if (STREQ(mb.mnt_type, type)) {
+            ret = strdup(mb.mnt_dir);
+            goto cleanup;
+        }
+    }
+
+    if (!ret)
+        errno = ENOENT;
+
+cleanup:
+    endmntent(f);
+
+    return ret;
+}
+#endif
index b3e628ae7713626f5538ea6075366693b7b10798..896e1b4af86bc519e206792d6350034a34dabdd5 100644 (file)
@@ -233,4 +233,8 @@ int virGetGroupID(virConnectPtr conn,
 int virRandomInitialize(unsigned int seed);
 int virRandom(int max);
 
+#ifdef HAVE_MNTENT_H
+char *virFileFindMountPoint(const char *type);
+#endif
+
 #endif /* __VIR_UTIL_H__ */
index ad2045fa5ef3841ce852c0e655e52632d6e147b5..a42a1ba09abb1aae8a1faec20b1c300148d30be6 100644 (file)
@@ -105,7 +105,8 @@ mymain(int argc, char **argv)
             QEMUD_CMD_FLAG_VNET_HDR |
             QEMUD_CMD_FLAG_MIGRATE_KVM_STDIO |
             QEMUD_CMD_FLAG_KVM |
-            QEMUD_CMD_FLAG_DRIVE_FORMAT,
+            QEMUD_CMD_FLAG_DRIVE_FORMAT |
+            QEMUD_CMD_FLAG_MEM_PATH,
             9001,  1, 74);
     DO_TEST("qemu-0.10.5",
             QEMUD_CMD_FLAG_KQEMU |
@@ -136,7 +137,8 @@ mymain(int argc, char **argv)
             QEMUD_CMD_FLAG_DRIVE_FORMAT |
             QEMUD_CMD_FLAG_VGA |
             QEMUD_CMD_FLAG_0_10 |
-            QEMUD_CMD_FLAG_PCIDEVICE,
+            QEMUD_CMD_FLAG_PCIDEVICE |
+            QEMUD_CMD_FLAG_MEM_PATH,
             10005, 1,  0);
     DO_TEST("kvm-86",
             QEMUD_CMD_FLAG_VNC_COLON |
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args b/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args
new file mode 100644 (file)
index 0000000..f10a40e
--- /dev/null
@@ -0,0 +1 @@
+LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test /usr/bin/qemu -S -M pc -m 214 -mem-path /dev/hugepages/libvirt/qemu -smp 1 -nographic -monitor unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -hda /dev/HostVG/QEMUGuest1 -net none -serial none -parallel none -usb
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages.xml b/tests/qemuxml2argvdata/qemuxml2argv-hugepages.xml
new file mode 100644 (file)
index 0000000..e25286f
--- /dev/null
@@ -0,0 +1,25 @@
+<domain type='qemu'>
+  <name>QEMUGuest1</name>
+  <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+  <memory>219200</memory>
+  <currentMemory>219200</currentMemory>
+  <memoryBacking>
+    <hugepages/>
+  </memoryBacking>
+  <vcpu>1</vcpu>
+  <os>
+    <type arch='i686' machine='pc'>hvm</type>
+    <boot dev='hd'/>
+  </os>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>destroy</on_crash>
+  <devices>
+    <emulator>/usr/bin/qemu</emulator>
+    <disk type='block' device='disk'>
+      <source dev='/dev/HostVG/QEMUGuest1'/>
+      <target dev='hda' bus='ide'/>
+    </disk>
+  </devices>
+</domain>
index 6f25e7d77fb65bd3c88c6b2849909cf1bce3d453..ade57b16ecde8e367a36935cd9365edfd255d8c4 100644 (file)
@@ -160,7 +160,11 @@ mymain(int argc, char **argv)
 
     if ((driver.caps = testQemuCapsInit()) == NULL)
         return EXIT_FAILURE;
-    if((driver.stateDir = strdup("/nowhere")) == NULL)
+    if ((driver.stateDir = strdup("/nowhere")) == NULL)
+        return EXIT_FAILURE;
+    if ((driver.hugetlbfs_mount = strdup("/dev/hugepages")) == NULL)
+        return EXIT_FAILURE;
+    if ((driver.hugepage_path = strdup("/dev/hugepages/libvirt/qemu")) == NULL)
         return EXIT_FAILURE;
 
 #define DO_TEST_FULL(name, extraFlags, migrateFrom)                     \
@@ -189,6 +193,7 @@ mymain(int argc, char **argv)
     DO_TEST("bootloader", 0);
     DO_TEST("clock-utc", 0);
     DO_TEST("clock-localtime", 0);
+    DO_TEST("hugepages", QEMUD_CMD_FLAG_MEM_PATH);
     DO_TEST("disk-cdrom", 0);
     DO_TEST("disk-cdrom-empty", QEMUD_CMD_FLAG_DRIVE);
     DO_TEST("disk-floppy", 0);
index 7db76118c665c1a6882c0ae08fb7a57fdaadc3b5..7f19f78af9f764a9e23d6fe9b9c3377cfef041b8 100644 (file)
@@ -92,6 +92,7 @@ mymain(int argc, char **argv)
     DO_TEST("bootloader");
     DO_TEST("clock-utc");
     DO_TEST("clock-localtime");
+    DO_TEST("hugepages");
     DO_TEST("disk-cdrom");
     DO_TEST("disk-floppy");
     DO_TEST("disk-many");