AC_CHECK_FUNCS([strerror_r strtok_r getmntent_r getgrnam_r getpwuid_r])
dnl Availability of various common headers (non-fatal if missing).
-AC_CHECK_HEADERS([pwd.h paths.h regex.h sys/syslimits.h sys/utsname.h sys/wait.h winsock2.h sched.h termios.h sys/poll.h syslog.h])
+AC_CHECK_HEADERS([pwd.h paths.h regex.h sys/syslimits.h sys/utsname.h sys/wait.h winsock2.h sched.h termios.h sys/poll.h syslog.h mntent.h])
dnl Where are the XDR functions?
dnl If portablexdr is installed, prefer that.
...
<memory>524288</memory>
<currentMemory>524288</currentMemory>
+ <memoryBacking>
+ <hugepages/>
+ </memoryBacking>
<vcpu>1</vcpu>
...</pre>
<dl><dt><code>memory</code></dt><dd>The maximum allocation of memory for the guest at boot time.
The units for this value are kilobytes (i.e. blocks of 1024 bytes)</dd><dt><code>currentMemory</code></dt><dd>The actual allocation of memory for the guest. This value
be less than the maximum allocation, to allow for ballooning
up the guests memory on the fly. If this is omitted, it defaults
- to the same value as the <code>memory<code> element</code></code></dd><dt><code>vcpu</code></dt><dd>The content of this element defines the number of virtual
+ to the same value as the <code>memory<code> element</code></code></dd><dt><code>memoryBacking</code></dt><dd>The optional <code>memoryBacking</code> element, may have an
+ <code>hugepages</code> element set within it. This tells the
+ hypervisor that the guest should have its memory allocated using
+ hugepages instead of the normal native page size.</dd><dt><code>vcpu</code></dt><dd>The content of this element defines the number of virtual
CPUs allocated for the guest OS.</dd></dl>
<h3>
<a name="elementsLifecycle" id="elementsLifecycle">Lifecycle control</a>
...
<memory>524288</memory>
<currentMemory>524288</currentMemory>
+ <memoryBacking>
+ <hugepages/>
+ </memoryBacking>
<vcpu>1</vcpu>
...</pre>
be less than the maximum allocation, to allow for ballooning
up the guests memory on the fly. If this is omitted, it defaults
to the same value as the <code>memory<code> element</dd>
+ <dt><code>memoryBacking</code></dt>
+ <dd>The optional <code>memoryBacking</code> element, may have an
+ <code>hugepages</code> element set within it. This tells the
+ hypervisor that the guest should have its memory allocated using
+ hugepages instead of the normal native page size.</dd>
<dt><code>vcpu</code></dt>
<dd>The content of this element defines the number of virtual
CPUs allocated for the guest OS.</dd>
<ref name="memoryKB"/>
</element>
</optional>
+ <optional>
+ <element name="memoryBacking">
+ <optional>
+ <element name="hugepages">
+ <empty/>
+ </element>
+ </optional>
+ </element>
+ </optional>
<optional>
<element name="vcpu">
<optional>
| str_array_entry "cgroup_controllers"
| str_array_entry "cgroup_device_acl"
| str_entry "save_image_format"
+ | str_entry "hugetlbfs_mount"
(* Each enty in the config is one of the following three ... *)
let entry = vnc_entry
cgroup_device_acl = [ \"/dev/null\", \"/dev/full\", \"/dev/zero\" ]
save_image_format = \"gzip\"
+
+hugetlbfs_mount = \"/dev/hugepages\"
"
test Libvirtd_qemu.lns get conf =
}
{ "#empty" }
{ "save_image_format" = "gzip" }
+{ "#empty" }
+{ "hugetlbfs_mount" = "/dev/hugepages" }
\ No newline at end of file
if (virXPathULong(conn, "string(./currentMemory[1])", ctxt, &def->memory) < 0)
def->memory = def->maxmem;
+ node = virXPathNode(conn, "./memoryBacking/hugepages", ctxt);
+ if (node)
+ def->hugepage_backed = 1;
+
if (virXPathULong(conn, "string(./vcpu[1])", ctxt, &def->vcpus) < 0)
def->vcpus = 1;
virBufferVSprintf(&buf, " <memory>%lu</memory>\n", def->maxmem);
virBufferVSprintf(&buf, " <currentMemory>%lu</currentMemory>\n",
def->memory);
-
+ if (def->hugepage_backed) {
+ virBufferAddLit(&buf, " <memoryBacking>\n");
+ virBufferAddLit(&buf, " <hugepages/>\n");
+ virBufferAddLit(&buf, " </memoryBacking>\n");
+ }
for (n = 0 ; n < def->cpumasklen ; n++)
if (def->cpumask[n] != 1)
allones = 0;
unsigned long memory;
unsigned long maxmem;
+ unsigned char hugepage_backed;
unsigned long vcpus;
int cpumasklen;
char *cpumask;
virGetUserName;
virGetUserID;
virGetGroupID;
+virFileFindMountPoint;
# uuid.h
# the process of saving a domain in order to save disk space.
#
# save_image_format = "raw"
+
+# If provided by the host and a hugetlbfs mount point is configured,
+# a guest may request huge page backing. When this mount point is
+# unspecified here, determination of a host mount point in /proc/mounts
+# will be attempted. Specifying an explicit mount overrides detection
+# of the same in /proc/mounts. Setting the mount point to "" will
+# disable guest hugepage backing.
+#
+# NB, within this mount point, guests will create memory backing files
+# in a location of $MOUNTPOINT/libvirt/qemu
+
+# hugetlbfs_mount = "/dev/hugepages"
#include <sys/wait.h>
#include <arpa/inet.h>
#include <sys/utsname.h>
+#include <mntent.h>
#include "c-ctype.h"
#include "virterror_internal.h"
NULL, /* no arg needed for xen */
NULL /* don't support vbox */);
+#define PROC_MOUNT_BUF_LEN 255
int qemudLoadDriverConfig(struct qemud_driver *driver,
const char *filename) {
return -1;
}
+#ifdef HAVE_MNTENT_H
+ /* For privileged driver, try and find hugepage mount automatically.
+ * Non-privileged driver requires admin to create a dir for the
+ * user, chown it, and then let user configure it manually */
+ if (driver->privileged &&
+ !(driver->hugetlbfs_mount = virFileFindMountPoint("hugetlbfs"))) {
+ if (errno != ENOENT) {
+ virReportSystemError(NULL, errno, "%s",
+ _("unable to find hugetlbfs mountpoint"));
+ return -1;
+ }
+ }
+#endif
+
+
/* Just check the file is readable before opening it, otherwise
* libvirt emits an error.
*/
}
}
+ p = virConfGetValue (conf, "hugetlbfs_mount");
+ CHECK_TYPE ("hugetlbfs_mount", VIR_CONF_STRING);
+ if (p && p->str) {
+ VIR_FREE(driver->hugetlbfs_mount);
+ if (!(driver->hugetlbfs_mount = strdup(p->str))) {
+ virReportOOMError(NULL);
+ virConfFree(conf);
+ return -1;
+ }
+ }
+
virConfFree (conf);
return 0;
}
flags |= QEMUD_CMD_FLAG_DRIVE_BOOT;
if (strstr(help, "-pcidevice"))
flags |= QEMUD_CMD_FLAG_PCIDEVICE;
+ if (strstr(help, "-mem-path"))
+ flags |= QEMUD_CMD_FLAG_MEM_PATH;
if (version >= 9000)
flags |= QEMUD_CMD_FLAG_VNC_COLON;
ADD_ARG_LIT("-no-kvm");
ADD_ARG_LIT("-m");
ADD_ARG_LIT(memory);
+ if (def->hugepage_backed) {
+ if (!driver->hugetlbfs_mount) {
+ qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ "%s", _("hugetlbfs filesystem is not mounted"));
+ goto error;
+ }
+ if (!driver->hugepage_path) {
+ qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ "%s", _("hugepages are disabled by administrator config"));
+ goto error;
+ }
+ if (!(qemuCmdFlags & QEMUD_CMD_FLAG_MEM_PATH)) {
+ qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("hugepage backing not supported by '%s'"),
+ def->emulator);
+ goto error;
+ }
+ ADD_ARG_LIT("-mem-path");
+ ADD_ARG_LIT(driver->hugepage_path);
+ }
ADD_ARG_LIT("-smp");
ADD_ARG_LIT(vcpus);
QEMUD_CMD_FLAG_HOST_NET_ADD = QEMUD_CMD_FLAG_0_10, /* host_net_add monitor command */
QEMUD_CMD_FLAG_PCIDEVICE = (1 << 17), /* PCI device assignment only supported by qemu-kvm */
+ QEMUD_CMD_FLAG_MEM_PATH = (1 << 18), /* mmap'ped guest backing supported */
};
/* Main driver state */
char *vncListen;
char *vncPassword;
char *vncSASLdir;
+ char *hugetlbfs_mount;
+ char *hugepage_path;
virCapsPtr caps;
goto error;
}
+ /* If hugetlbfs is present, then we need to create a sub-directory within
+ * it, since we can't assume the root mount point has permissions that
+ * will let our spawned QEMU instances use it.
+ *
+ * NB the check for '/', since user may config "" to disable hugepages
+ * even when mounted
+ */
+ if (qemu_driver->hugetlbfs_mount &&
+ qemu_driver->hugetlbfs_mount[0] == '/') {
+ char *mempath = NULL;
+ if (virAsprintf(&mempath, "%s/libvirt/qemu", qemu_driver->hugetlbfs_mount) < 0)
+ goto out_of_memory;
+
+ if ((rc = virFileMakePath(mempath)) != 0) {
+ virReportSystemError(NULL, rc,
+ _("unable to create hugepage path %s"), mempath);
+ VIR_FREE(mempath);
+ goto error;
+ }
+ if (qemu_driver->privileged &&
+ chown(mempath, qemu_driver->user, qemu_driver->group) < 0) {
+ virReportSystemError(NULL, errno,
+ _("unable to set ownership on %s to %d:%d"),
+ mempath, qemu_driver->user, qemu_driver->group);
+ VIR_FREE(mempath);
+ goto error;
+ }
+
+ qemu_driver->hugepage_path = mempath;
+ }
+
/* Get all the running persistent or transient configs first */
if (virDomainLoadAllConfigs(NULL,
qemu_driver->caps,
VIR_FREE(qemu_driver->vncPassword);
VIR_FREE(qemu_driver->vncSASLdir);
VIR_FREE(qemu_driver->saveImageFormat);
+ VIR_FREE(qemu_driver->hugetlbfs_mount);
+ VIR_FREE(qemu_driver->hugepage_path);
/* Free domain callback list */
virDomainEventCallbackListFree(qemu_driver->domainEventCallbacks);
#if HAVE_CAPNG
#include <cap-ng.h>
#endif
-
+#ifdef HAVE_MNTENT_H
+#include <mntent.h>
+#endif
#include "virterror_internal.h"
#include "logging.h"
return 0;
}
#endif
+
+
+#ifdef HAVE_MNTENT_H
+/* search /proc/mounts for mount point of *type; return pointer to
+ * malloc'ed string of the path if found, otherwise return NULL
+ * with errno set to an appropriate value.
+ */
+char *virFileFindMountPoint(const char *type)
+{
+ FILE *f;
+ struct mntent mb;
+ char mntbuf[1024];
+ char *ret = NULL;
+
+ f = setmntent("/proc/mounts", "r");
+ if (!f)
+ return NULL;
+
+ while (getmntent_r(f, &mb, mntbuf, sizeof(mntbuf))) {
+ if (STREQ(mb.mnt_type, type)) {
+ ret = strdup(mb.mnt_dir);
+ goto cleanup;
+ }
+ }
+
+ if (!ret)
+ errno = ENOENT;
+
+cleanup:
+ endmntent(f);
+
+ return ret;
+}
+#endif
int virRandomInitialize(unsigned int seed);
int virRandom(int max);
+#ifdef HAVE_MNTENT_H
+char *virFileFindMountPoint(const char *type);
+#endif
+
#endif /* __VIR_UTIL_H__ */
QEMUD_CMD_FLAG_VNET_HDR |
QEMUD_CMD_FLAG_MIGRATE_KVM_STDIO |
QEMUD_CMD_FLAG_KVM |
- QEMUD_CMD_FLAG_DRIVE_FORMAT,
+ QEMUD_CMD_FLAG_DRIVE_FORMAT |
+ QEMUD_CMD_FLAG_MEM_PATH,
9001, 1, 74);
DO_TEST("qemu-0.10.5",
QEMUD_CMD_FLAG_KQEMU |
QEMUD_CMD_FLAG_DRIVE_FORMAT |
QEMUD_CMD_FLAG_VGA |
QEMUD_CMD_FLAG_0_10 |
- QEMUD_CMD_FLAG_PCIDEVICE,
+ QEMUD_CMD_FLAG_PCIDEVICE |
+ QEMUD_CMD_FLAG_MEM_PATH,
10005, 1, 0);
DO_TEST("kvm-86",
QEMUD_CMD_FLAG_VNC_COLON |
--- /dev/null
+LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test /usr/bin/qemu -S -M pc -m 214 -mem-path /dev/hugepages/libvirt/qemu -smp 1 -nographic -monitor unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -hda /dev/HostVG/QEMUGuest1 -net none -serial none -parallel none -usb
--- /dev/null
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory>219200</memory>
+ <currentMemory>219200</currentMemory>
+ <memoryBacking>
+ <hugepages/>
+ </memoryBacking>
+ <vcpu>1</vcpu>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu</emulator>
+ <disk type='block' device='disk'>
+ <source dev='/dev/HostVG/QEMUGuest1'/>
+ <target dev='hda' bus='ide'/>
+ </disk>
+ </devices>
+</domain>
if ((driver.caps = testQemuCapsInit()) == NULL)
return EXIT_FAILURE;
- if((driver.stateDir = strdup("/nowhere")) == NULL)
+ if ((driver.stateDir = strdup("/nowhere")) == NULL)
+ return EXIT_FAILURE;
+ if ((driver.hugetlbfs_mount = strdup("/dev/hugepages")) == NULL)
+ return EXIT_FAILURE;
+ if ((driver.hugepage_path = strdup("/dev/hugepages/libvirt/qemu")) == NULL)
return EXIT_FAILURE;
#define DO_TEST_FULL(name, extraFlags, migrateFrom) \
DO_TEST("bootloader", 0);
DO_TEST("clock-utc", 0);
DO_TEST("clock-localtime", 0);
+ DO_TEST("hugepages", QEMUD_CMD_FLAG_MEM_PATH);
DO_TEST("disk-cdrom", 0);
DO_TEST("disk-cdrom-empty", QEMUD_CMD_FLAG_DRIVE);
DO_TEST("disk-floppy", 0);
DO_TEST("bootloader");
DO_TEST("clock-utc");
DO_TEST("clock-localtime");
+ DO_TEST("hugepages");
DO_TEST("disk-cdrom");
DO_TEST("disk-floppy");
DO_TEST("disk-many");