#include "qemu_driver.h"
#endif
#ifdef WITH_LXC
-#include "lxc_driver.h"
+#include "lxc/lxc_driver.h"
#endif
#ifdef WITH_UML
#include "uml_driver.h"
endif
LXC_DRIVER_SOURCES = \
- lxc_conf.c lxc_conf.h \
- lxc_container.c lxc_container.h \
- lxc_driver.c lxc_driver.h \
- veth.c veth.h \
+ lxc/lxc_conf.c lxc/lxc_conf.h \
+ lxc/lxc_container.c lxc/lxc_container.h \
+ lxc/lxc_driver.c lxc/lxc_driver.h \
+ lxc/veth.c lxc/veth.h \
cgroup.c cgroup.h
LXC_CONTROLLER_SOURCES = \
- lxc_conf.c lxc_conf.h \
- lxc_container.c lxc_container.h \
- lxc_controller.c \
- veth.c veth.h \
+ lxc/lxc_conf.c lxc/lxc_conf.h \
+ lxc/lxc_container.c lxc/lxc_container.h \
+ lxc/lxc_controller.c \
+ lxc/veth.c lxc/veth.h \
cgroup.c cgroup.h
PHYP_DRIVER_SOURCES = \
--- /dev/null
+/*
+ * Copyright IBM Corp. 2008
+ *
+ * lxc_conf.c: config functions for managing linux containers
+ *
+ * Authors:
+ * David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+/* includes */
+#include <config.h>
+
+#include <sys/utsname.h>
+
+#include "lxc_conf.h"
+#include "nodeinfo.h"
+#include "virterror_internal.h"
+#include "logging.h"
+
+
+#define VIR_FROM_THIS VIR_FROM_LXC
+
+/* Functions */
+virCapsPtr lxcCapsInit(void)
+{
+ struct utsname utsname;
+ virCapsPtr caps;
+ virCapsGuestPtr guest;
+
+ uname(&utsname);
+
+ if ((caps = virCapabilitiesNew(utsname.machine,
+ 0, 0)) == NULL)
+ goto no_memory;
+
+ /* Some machines have problematic NUMA toplogy causing
+ * unexpected failures. We don't want to break the QEMU
+ * driver in this scenario, so log errors & carry on
+ */
+ if (nodeCapsInitNUMA(caps) < 0) {
+ virCapabilitiesFreeNUMAInfo(caps);
+ VIR_WARN0("Failed to query host NUMA topology, disabling NUMA capabilities");
+ }
+
+ /* XXX shouldn't 'borrow' KVM's prefix */
+ virCapabilitiesSetMacPrefix(caps, (unsigned char []){ 0x52, 0x54, 0x00 });
+
+ if ((guest = virCapabilitiesAddGuest(caps,
+ "exe",
+ utsname.machine,
+ sizeof(int) == 4 ? 32 : 8,
+ BINDIR "/libvirt_lxc",
+ NULL,
+ 0,
+ NULL)) == NULL)
+ goto no_memory;
+
+ if (virCapabilitiesAddGuestDomain(guest,
+ "lxc",
+ NULL,
+ NULL,
+ 0,
+ NULL) == NULL)
+ goto no_memory;
+
+ /* LXC Requires an emulator in the XML */
+ virCapabilitiesSetEmulatorRequired(caps);
+
+ return caps;
+
+no_memory:
+ virCapabilitiesFree(caps);
+ return NULL;
+}
+
+int lxcLoadDriverConfig(lxc_driver_t *driver)
+{
+ /* Set the container configuration directory */
+ if ((driver->configDir = strdup(LXC_CONFIG_DIR)) == NULL)
+ goto no_memory;
+ if ((driver->stateDir = strdup(LXC_STATE_DIR)) == NULL)
+ goto no_memory;
+ if ((driver->logDir = strdup(LXC_LOG_DIR)) == NULL)
+ goto no_memory;
+
+ return 0;
+
+no_memory:
+ virReportOOMError(NULL);
+ return -1;
+}
--- /dev/null
+/*
+ * Copyright IBM Corp. 2008
+ *
+ * lxc_conf.h: header file for linux container config functions
+ *
+ * Authors:
+ * David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef LXC_CONF_H
+#define LXC_CONF_H
+
+#include <config.h>
+
+#include "internal.h"
+#include "domain_conf.h"
+#include "domain_event.h"
+#include "capabilities.h"
+#include "threads.h"
+#include "cgroup.h"
+
+#define LXC_CONFIG_DIR SYSCONF_DIR "/libvirt/lxc"
+#define LXC_STATE_DIR LOCAL_STATE_DIR "/run/libvirt/lxc"
+#define LXC_LOG_DIR LOCAL_STATE_DIR "/log/libvirt/lxc"
+
+typedef struct __lxc_driver lxc_driver_t;
+struct __lxc_driver {
+ virMutex lock;
+
+ virCapsPtr caps;
+
+ virCgroupPtr cgroup;
+ virDomainObjList domains;
+ char *configDir;
+ char *autostartDir;
+ char *stateDir;
+ char *logDir;
+ int have_netns;
+
+ /* An array of callbacks */
+ virDomainEventCallbackListPtr domainEventCallbacks;
+ virDomainEventQueuePtr domainEventQueue;
+ int domainEventTimer;
+ int domainEventDispatching;
+};
+
+int lxcLoadDriverConfig(lxc_driver_t *driver);
+virCapsPtr lxcCapsInit(void);
+
+#define lxcError(conn, dom, code, fmt...) \
+ virReportErrorHelper(conn, VIR_FROM_LXC, code, __FILE__, \
+ __FUNCTION__, __LINE__, fmt)
+
+#endif /* LXC_CONF_H */
--- /dev/null
+/*
+ * Copyright IBM Corp. 2008
+ * Copyright Red Hat 2008-2009
+ *
+ * lxc_container.c: file description
+ *
+ * Authors:
+ * David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ * Daniel P. Berrange <berrange@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <config.h>
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <mntent.h>
+
+/* Yes, we want linux private one, for _syscall2() macro */
+#include <linux/unistd.h>
+
+/* For MS_MOVE */
+#include <linux/fs.h>
+
+#if HAVE_CAPNG
+#include <cap-ng.h>
+#endif
+
+#include "virterror_internal.h"
+#include "logging.h"
+#include "lxc_container.h"
+#include "util.h"
+#include "memory.h"
+#include "veth.h"
+
+#define VIR_FROM_THIS VIR_FROM_LXC
+
+/*
+ * GLibc headers are behind the kernel, so we define these
+ * constants if they're not present already.
+ */
+
+#ifndef CLONE_NEWPID
+#define CLONE_NEWPID 0x20000000
+#endif
+#ifndef CLONE_NEWUTS
+#define CLONE_NEWUTS 0x04000000
+#endif
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000
+#endif
+#ifndef CLONE_NEWIPC
+#define CLONE_NEWIPC 0x08000000
+#endif
+#ifndef CLONE_NEWNET
+#define CLONE_NEWNET 0x40000000 /* New network namespace */
+#endif
+
+/* messages between parent and container */
+typedef char lxc_message_t;
+#define LXC_CONTINUE_MSG 'c'
+
+typedef struct __lxc_child_argv lxc_child_argv_t;
+struct __lxc_child_argv {
+ virDomainDefPtr config;
+ unsigned int nveths;
+ char **veths;
+ int monitor;
+ char *ttyPath;
+};
+
+
+/**
+ * lxcContainerExecInit:
+ * @vmDef: Ptr to vm definition structure
+ *
+ * Exec the container init string. The container init will replace then
+ * be running in the current process
+ *
+ * Does not return
+ */
+static int lxcContainerExecInit(virDomainDefPtr vmDef)
+{
+ const char *const argv[] = {
+ vmDef->os.init,
+ NULL,
+ };
+
+ return execve(argv[0], (char **)argv, NULL);
+}
+
+/**
+ * lxcContainerSetStdio:
+ * @control: the conrol FD
+ * @ttyPath: Name of tty to set as the container console
+ *
+ * Sets the given tty as the primary conosole for the container as well as
+ * stdout, stdin and stderr.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcContainerSetStdio(int control, int ttyfd)
+{
+ int rc = -1;
+ int open_max, i;
+
+ if (setsid() < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("setsid failed"));
+ goto cleanup;
+ }
+
+ if (ioctl(ttyfd, TIOCSCTTY, NULL) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("ioctl(TIOCSTTY) failed"));
+ goto cleanup;
+ }
+
+ /* Just in case someone forget to set FD_CLOEXEC, explicitly
+ * close all FDs before executing the container */
+ open_max = sysconf (_SC_OPEN_MAX);
+ for (i = 0; i < open_max; i++)
+ if (i != ttyfd && i != control)
+ close(i);
+
+ if (dup2(ttyfd, 0) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("dup2(stdin) failed"));
+ goto cleanup;
+ }
+
+ if (dup2(ttyfd, 1) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("dup2(stdout) failed"));
+ goto cleanup;
+ }
+
+ if (dup2(ttyfd, 2) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("dup2(stderr) failed"));
+ goto cleanup;
+ }
+
+ rc = 0;
+
+cleanup:
+ return rc;
+}
+
+/**
+ * lxcContainerSendContinue:
+ * @monitor: control FD to child
+ *
+ * Sends the continue message via the socket pair stored in the vm
+ * structure.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+int lxcContainerSendContinue(int control)
+{
+ int rc = -1;
+ lxc_message_t msg = LXC_CONTINUE_MSG;
+ int writeCount = 0;
+
+ writeCount = safewrite(control, &msg, sizeof(msg));
+ if (writeCount != sizeof(msg)) {
+ virReportSystemError(NULL, errno, "%s",
+ _("unable to send container continue message"));
+ goto error_out;
+ }
+
+ rc = 0;
+
+error_out:
+ return rc;
+}
+
+/**
+ * lxcContainerWaitForContinue:
+ * @control: control FD from parent
+ *
+ * This function will wait for the container continue message from the
+ * parent process. It will send this message on the socket pair stored in
+ * the vm structure once it has completed the post clone container setup.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcContainerWaitForContinue(int control)
+{
+ lxc_message_t msg;
+ int readLen;
+
+ readLen = saferead(control, &msg, sizeof(msg));
+ if (readLen != sizeof(msg) ||
+ msg != LXC_CONTINUE_MSG) {
+ virReportSystemError(NULL, errno, "%s",
+ _("Failed to read the container continue message"));
+ return -1;
+ }
+ close(control);
+
+ DEBUG0("Received container continue message");
+
+ return 0;
+}
+
+
+/**
+ * lxcEnableInterfaces:
+ * @vm: Pointer to vm structure
+ *
+ * This function will enable the interfaces for this container.
+ *
+ * Returns 0 on success or nonzero in case of error
+ */
+static int lxcContainerEnableInterfaces(unsigned int nveths,
+ char **veths)
+{
+ int rc = 0;
+ unsigned int i;
+
+ for (i = 0 ; i < nveths ; i++) {
+ DEBUG("Enabling %s", veths[i]);
+ rc = vethInterfaceUpOrDown(veths[i], 1);
+ if (0 != rc) {
+ goto error_out;
+ }
+ }
+
+ /* enable lo device only if there were other net devices */
+ if (veths)
+ rc = vethInterfaceUpOrDown("lo", 1);
+
+error_out:
+ return rc;
+}
+
+
+//_syscall2(int, pivot_root, char *, newroot, const char *, oldroot)
+extern int pivot_root(const char * new_root,const char * put_old);
+
+static int lxcContainerChildMountSort(const void *a, const void *b)
+{
+ const char **sa = (const char**)a;
+ const char **sb = (const char**)b;
+
+ /* Delibrately reversed args - we need to unmount deepest
+ children first */
+ return strcmp(*sb, *sa);
+}
+
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+
+#ifndef MNT_DETACH
+#define MNT_DETACH 0x00000002
+#endif
+
+#ifndef MS_PRIVATE
+#define MS_PRIVATE (1<<18)
+#endif
+
+#ifndef MS_SLAVE
+#define MS_SLAVE (1<<19)
+#endif
+
+static int lxcContainerPivotRoot(virDomainFSDefPtr root)
+{
+ int rc, ret;
+ char *oldroot = NULL, *newroot = NULL;
+
+ ret = -1;
+
+ /* root->parent must be private, so make / private. */
+ if (mount("", "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to make root private"));
+ goto err;
+ }
+
+ if (virAsprintf(&oldroot, "%s/.oldroot", root->src) < 0) {
+ virReportOOMError(NULL);
+ goto err;
+ }
+
+ if ((rc = virFileMakePath(oldroot)) < 0) {
+ virReportSystemError(NULL, rc,
+ _("failed to create %s"),
+ oldroot);
+ goto err;
+ }
+
+ /* Create a tmpfs root since old and new roots must be
+ * on separate filesystems */
+ if (mount("tmprootfs", oldroot, "tmpfs", 0, NULL) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to mount empty tmpfs at %s"),
+ oldroot);
+ goto err;
+ }
+
+ /* Create a directory called 'new' in tmpfs */
+ if (virAsprintf(&newroot, "%s/new", oldroot) < 0) {
+ virReportOOMError(NULL);
+ goto err;
+ }
+
+ if ((rc = virFileMakePath(newroot)) < 0) {
+ virReportSystemError(NULL, rc,
+ _("failed to create %s"),
+ newroot);
+ goto err;
+ }
+
+ /* ... and mount our root onto it */
+ if (mount(root->src, newroot, NULL, MS_BIND|MS_REC, NULL) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to bind new root %s into tmpfs"),
+ root->src);
+ goto err;
+ }
+
+ /* Now we chroot into the tmpfs, then pivot into the
+ * root->src bind-mounted onto '/new' */
+ if (chdir(newroot) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to chroot into %s"), newroot);
+ goto err;
+ }
+
+ /* The old root directory will live at /.oldroot after
+ * this and will soon be unmounted completely */
+ if (pivot_root(".", ".oldroot") < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to pivot root"));
+ goto err;
+ }
+
+ /* CWD is undefined after pivot_root, so go to / */
+ if (chdir("/") < 0)
+ goto err;
+
+ ret = 0;
+
+err:
+ VIR_FREE(oldroot);
+ VIR_FREE(newroot);
+
+ return ret;
+}
+
+
+static int lxcContainerMountBasicFS(virDomainFSDefPtr root)
+{
+ const struct {
+ const char *src;
+ const char *dst;
+ const char *type;
+ } mnts[] = {
+ { "/dev", "/dev", "tmpfs" },
+ { "/proc", "/proc", "proc" },
+ { "/sys", "/sys", "sysfs" },
+#if WITH_SELINUX
+ { "none", "/selinux", "selinuxfs" },
+#endif
+ };
+ int i, rc = -1;
+ char *devpts;
+
+ if (virAsprintf(&devpts, "/.oldroot%s/dev/pts", root->src) < 0) {
+ virReportOOMError(NULL);
+ return rc;
+ }
+
+ for (i = 0 ; i < ARRAY_CARDINALITY(mnts) ; i++) {
+ if (virFileMakePath(mnts[i].dst) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to mkdir %s"),
+ mnts[i].src);
+ goto cleanup;
+ }
+ if (mount(mnts[i].src, mnts[i].dst, mnts[i].type, 0, NULL) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to mount %s on %s"),
+ mnts[i].type, mnts[i].type);
+ goto cleanup;
+ }
+ }
+
+ if ((rc = virFileMakePath("/dev/pts") < 0)) {
+ virReportSystemError(NULL, rc, "%s",
+ _("cannot create /dev/pts"));
+ goto cleanup;
+ }
+
+ VIR_DEBUG("Trying to move %s to %s", devpts, "/dev/pts");
+ if ((rc = mount(devpts, "/dev/pts", NULL, MS_MOVE, NULL)) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to mount /dev/pts in container"));
+ goto cleanup;
+ }
+
+ rc = 0;
+
+ cleanup:
+ VIR_FREE(devpts);
+
+ return rc;
+}
+
+static int lxcContainerPopulateDevices(void)
+{
+ int i;
+ const struct {
+ int maj;
+ int min;
+ mode_t mode;
+ const char *path;
+ } devs[] = {
+ { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL, 0666, "/dev/null" },
+ { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO, 0666, "/dev/zero" },
+ { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL, 0666, "/dev/full" },
+ { LXC_DEV_MAJ_TTY, LXC_DEV_MIN_CONSOLE, 0600, "/dev/console" },
+ { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM, 0666, "/dev/random" },
+ { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM, 0666, "/dev/urandom" },
+ };
+
+ /* Populate /dev/ with a few important bits */
+ for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) {
+ dev_t dev = makedev(devs[i].maj, devs[i].min);
+ if (mknod(devs[i].path, S_IFCHR, dev) < 0 ||
+ chmod(devs[i].path, devs[i].mode)) {
+ virReportSystemError(NULL, errno,
+ _("failed to make device %s"),
+ devs[i].path);
+ return -1;
+ }
+ }
+
+ if (access("/dev/pts/ptmx", W_OK) == 0) {
+ if (symlink("/dev/pts/ptmx", "/dev/ptmx") < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to create symlink /dev/ptmx to /dev/pts/ptmx"));
+ return -1;
+ }
+ } else {
+ dev_t dev = makedev(LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX);
+ if (mknod("/dev/ptmx", S_IFCHR, dev) < 0 ||
+ chmod("/dev/ptmx", 0666)) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to make device /dev/ptmx"));
+ return -1;
+ }
+ }
+
+
+ return 0;
+}
+
+
+static int lxcContainerMountNewFS(virDomainDefPtr vmDef)
+{
+ int i;
+
+ /* Pull in rest of container's mounts */
+ for (i = 0 ; i < vmDef->nfss ; i++) {
+ char *src;
+ if (STREQ(vmDef->fss[i]->dst, "/"))
+ continue;
+ // XXX fix
+ if (vmDef->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT)
+ continue;
+
+ if (virAsprintf(&src, "/.oldroot/%s", vmDef->fss[i]->src) < 0) {
+ virReportOOMError(NULL);
+ return -1;
+ }
+
+ if (virFileMakePath(vmDef->fss[i]->dst) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to create %s"),
+ vmDef->fss[i]->dst);
+ VIR_FREE(src);
+ return -1;
+ }
+ if (mount(src, vmDef->fss[i]->dst, NULL, MS_BIND, NULL) < 0) {
+ VIR_FREE(src);
+ virReportSystemError(NULL, errno,
+ _("failed to mount %s at %s"),
+ vmDef->fss[i]->src,
+ vmDef->fss[i]->dst);
+ return -1;
+ }
+ VIR_FREE(src);
+ }
+
+ return 0;
+}
+
+
+static int lxcContainerUnmountOldFS(void)
+{
+ struct mntent mntent;
+ char **mounts = NULL;
+ int nmounts = 0;
+ FILE *procmnt;
+ int i;
+ char mntbuf[1024];
+
+ if (!(procmnt = setmntent("/proc/mounts", "r"))) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to read /proc/mounts"));
+ return -1;
+ }
+ while (getmntent_r(procmnt, &mntent, mntbuf, sizeof(mntbuf)) != NULL) {
+ VIR_DEBUG("Got %s", mntent.mnt_dir);
+ if (!STRPREFIX(mntent.mnt_dir, "/.oldroot"))
+ continue;
+
+ if (VIR_REALLOC_N(mounts, nmounts+1) < 0) {
+ endmntent(procmnt);
+ virReportOOMError(NULL);
+ return -1;
+ }
+ if (!(mounts[nmounts++] = strdup(mntent.mnt_dir))) {
+ endmntent(procmnt);
+ virReportOOMError(NULL);
+ return -1;
+ }
+ }
+ endmntent(procmnt);
+
+ if (mounts)
+ qsort(mounts, nmounts, sizeof(mounts[0]),
+ lxcContainerChildMountSort);
+
+ for (i = 0 ; i < nmounts ; i++) {
+ VIR_DEBUG("Umount %s", mounts[i]);
+ if (umount(mounts[i]) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to unmount '%s'"),
+ mounts[i]);
+ return -1;
+ }
+ VIR_FREE(mounts[i]);
+ }
+ VIR_FREE(mounts);
+
+ return 0;
+}
+
+
+/* Got a FS mapped to /, we're going the pivot_root
+ * approach to do a better-chroot-than-chroot
+ * this is based on this thread http://lkml.org/lkml/2008/3/5/29
+ */
+static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
+ virDomainFSDefPtr root)
+{
+ /* Gives us a private root, leaving all parent OS mounts on /.oldroot */
+ if (lxcContainerPivotRoot(root) < 0)
+ return -1;
+
+ /* Mounts the core /proc, /sys, /dev, /dev/pts filesystems */
+ if (lxcContainerMountBasicFS(root) < 0)
+ return -1;
+
+ /* Populates device nodes in /dev/ */
+ if (lxcContainerPopulateDevices() < 0)
+ return -1;
+
+ /* Sets up any non-root mounts from guest config */
+ if (lxcContainerMountNewFS(vmDef) < 0)
+ return -1;
+
+ /* Gets rid of all remaining mounts from host OS, including /.oldroot itself */
+ if (lxcContainerUnmountOldFS() < 0)
+ return -1;
+
+ return 0;
+}
+
+/* Nothing mapped to /, we're using the main root,
+ but with extra stuff mapped in */
+static int lxcContainerSetupExtraMounts(virDomainDefPtr vmDef)
+{
+ int i;
+
+ if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to make / slave"));
+ return -1;
+ }
+ for (i = 0 ; i < vmDef->nfss ; i++) {
+ // XXX fix to support other mount types
+ if (vmDef->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT)
+ continue;
+
+ if (mount(vmDef->fss[i]->src,
+ vmDef->fss[i]->dst,
+ NULL,
+ MS_BIND,
+ NULL) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to mount %s at %s"),
+ vmDef->fss[i]->src,
+ vmDef->fss[i]->dst);
+ return -1;
+ }
+ }
+
+ /* mount /proc */
+ if (mount("lxcproc", "/proc", "proc", 0, NULL) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to mount /proc"));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int lxcContainerSetupMounts(virDomainDefPtr vmDef,
+ virDomainFSDefPtr root)
+{
+ if (root)
+ return lxcContainerSetupPivotRoot(vmDef, root);
+ else
+ return lxcContainerSetupExtraMounts(vmDef);
+}
+
+
+/*
+ * This is running as the 'init' process insid the container.
+ * It removes some capabilities that could be dangerous to
+ * host system, since they are not currently "containerized"
+ */
+static int lxcContainerDropCapabilities(void)
+{
+#if HAVE_CAPNG
+ int ret;
+
+ capng_get_caps_process();
+
+ if ((ret = capng_updatev(CAPNG_DROP,
+ CAPNG_EFFECTIVE | CAPNG_PERMITTED |
+ CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
+ CAP_SYS_BOOT, /* No use of reboot */
+ CAP_SYS_MODULE, /* No kernel module loading */
+ CAP_SYS_TIME, /* No changing the clock */
+ CAP_AUDIT_CONTROL, /* No messing with auditing status */
+ CAP_MAC_ADMIN, /* No messing with LSM config */
+ -1 /* sentinal */)) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to remove capabilities %d"), ret);
+ return -1;
+ }
+
+ if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to apply capabilities: %d"), ret);
+ return -1;
+ }
+
+ /* Need to prevent them regaining any caps on exec */
+ if ((ret = capng_lock()) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to lock capabilities: %d"), ret);
+ return -1;
+ }
+
+#else
+ VIR_WARN0(_("libcap-ng support not compiled in, unable to clear capabilities"));
+#endif
+ return 0;
+}
+
+
+/**
+ * lxcChild:
+ * @argv: Pointer to container arguments
+ *
+ * This function is run in the process clone()'d in lxcStartContainer.
+ * Perform a number of container setup tasks:
+ * Setup container file system
+ * mount container /proca
+ * Then exec's the container init
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcContainerChild( void *data )
+{
+ lxc_child_argv_t *argv = data;
+ virDomainDefPtr vmDef = argv->config;
+ int ttyfd;
+ char *ttyPath;
+ virDomainFSDefPtr root;
+
+ if (NULL == vmDef) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ "%s", _("lxcChild() passed invalid vm definition"));
+ return -1;
+ }
+
+ root = virDomainGetRootFilesystem(vmDef);
+
+ if (root) {
+ if (virAsprintf(&ttyPath, "%s%s", root->src, argv->ttyPath) < 0) {
+ virReportOOMError(NULL);
+ return -1;
+ }
+ } else {
+ if (!(ttyPath = strdup(argv->ttyPath))) {
+ virReportOOMError(NULL);
+ return -1;
+ }
+ }
+
+ ttyfd = open(ttyPath, O_RDWR|O_NOCTTY);
+ if (ttyfd < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to open tty %s"),
+ ttyPath);
+ return -1;
+ }
+ VIR_FREE(ttyPath);
+
+ if (lxcContainerSetStdio(argv->monitor, ttyfd) < 0) {
+ close(ttyfd);
+ return -1;
+ }
+ close(ttyfd);
+
+ if (lxcContainerSetupMounts(vmDef, root) < 0)
+ return -1;
+
+ /* Wait for interface devices to show up */
+ if (lxcContainerWaitForContinue(argv->monitor) < 0)
+ return -1;
+
+ /* enable interfaces */
+ if (lxcContainerEnableInterfaces(argv->nveths, argv->veths) < 0)
+ return -1;
+
+ /* drop a set of root capabilities */
+ if (lxcContainerDropCapabilities() < 0)
+ return -1;
+
+ /* this function will only return if an error occured */
+ return lxcContainerExecInit(vmDef);
+}
+
+static int userns_supported(void)
+{
+ return lxcContainerAvailable(LXC_CONTAINER_FEATURE_USER) == 0;
+}
+
+/**
+ * lxcContainerStart:
+ * @driver: pointer to driver structure
+ * @vm: pointer to virtual machine structure
+ *
+ * Starts a container process by calling clone() with the namespace flags
+ *
+ * Returns PID of container on success or -1 in case of error
+ */
+int lxcContainerStart(virDomainDefPtr def,
+ unsigned int nveths,
+ char **veths,
+ int control,
+ char *ttyPath)
+{
+ pid_t pid;
+ int flags;
+ int stacksize = getpagesize() * 4;
+ char *stack, *stacktop;
+ lxc_child_argv_t args = { def, nveths, veths, control, ttyPath };
+
+ /* allocate a stack for the container */
+ if (VIR_ALLOC_N(stack, stacksize) < 0) {
+ virReportOOMError(NULL);
+ return -1;
+ }
+ stacktop = stack + stacksize;
+
+ flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC|SIGCHLD;
+
+ if (userns_supported())
+ flags |= CLONE_NEWUSER;
+
+ if (def->nets != NULL)
+ flags |= CLONE_NEWNET;
+
+ pid = clone(lxcContainerChild, stacktop, flags, &args);
+ VIR_FREE(stack);
+ DEBUG("clone() returned, %d", pid);
+
+ if (pid < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to run clone container"));
+ return -1;
+ }
+
+ return pid;
+}
+
+static int lxcContainerDummyChild(void *argv ATTRIBUTE_UNUSED)
+{
+ _exit(0);
+}
+
+int lxcContainerAvailable(int features)
+{
+ int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|
+ CLONE_NEWIPC|SIGCHLD;
+ int cpid;
+ char *childStack;
+ char *stack;
+ int childStatus;
+
+ if (features & LXC_CONTAINER_FEATURE_USER)
+ flags |= CLONE_NEWUSER;
+
+ if (features & LXC_CONTAINER_FEATURE_NET)
+ flags |= CLONE_NEWNET;
+
+ if (VIR_ALLOC_N(stack, getpagesize() * 4) < 0) {
+ DEBUG0("Unable to allocate stack");
+ return -1;
+ }
+
+ childStack = stack + (getpagesize() * 4);
+
+ cpid = clone(lxcContainerDummyChild, childStack, flags, NULL);
+ VIR_FREE(stack);
+ if (cpid < 0) {
+ char ebuf[1024];
+ DEBUG("clone call returned %s, container support is not enabled",
+ virStrerror(errno, ebuf, sizeof ebuf));
+ return -1;
+ } else {
+ waitpid(cpid, &childStatus, 0);
+ }
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright IBM Corp. 2008
+ *
+ * lxc_container.h: header file for fcns run inside container
+ *
+ * Authors:
+ * David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef LXC_CONTAINER_H
+#define LXC_CONTAINER_H
+
+#include "lxc_conf.h"
+
+enum {
+ LXC_CONTAINER_FEATURE_NET = (1 << 0),
+ LXC_CONTAINER_FEATURE_USER = (1 << 1),
+};
+
+#define LXC_DEV_MAJ_MEMORY 1
+#define LXC_DEV_MIN_NULL 3
+#define LXC_DEV_MIN_ZERO 5
+#define LXC_DEV_MIN_FULL 7
+#define LXC_DEV_MIN_RANDOM 8
+#define LXC_DEV_MIN_URANDOM 9
+
+#define LXC_DEV_MAJ_TTY 5
+#define LXC_DEV_MIN_CONSOLE 1
+#define LXC_DEV_MIN_PTMX 2
+
+#define LXC_DEV_MAJ_PTY 136
+
+int lxcContainerSendContinue(int control);
+
+int lxcContainerStart(virDomainDefPtr def,
+ unsigned int nveths,
+ char **veths,
+ int control,
+ char *ttyPath);
+
+int lxcContainerAvailable(int features);
+
+#endif /* LXC_CONTAINER_H */
--- /dev/null
+/*
+ * Copyright IBM Corp. 2008
+ *
+ * lxc_controller.c: linux container process controller
+ *
+ * Authors:
+ * David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <config.h>
+
+#include <sys/epoll.h>
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <unistd.h>
+#include <paths.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <getopt.h>
+#include <sys/mount.h>
+
+#if HAVE_CAPNG
+#include <cap-ng.h>
+#endif
+
+#include "virterror_internal.h"
+#include "logging.h"
+#include "util.h"
+
+#include "lxc_conf.h"
+#include "lxc_container.h"
+#include "veth.h"
+#include "memory.h"
+#include "util.h"
+
+#define VIR_FROM_THIS VIR_FROM_LXC
+
+struct cgroup_device_policy {
+ char type;
+ int major;
+ int minor;
+};
+
+/**
+ * lxcSetContainerResources
+ * @def: pointer to virtual machine structure
+ *
+ * Creates a cgroup for the container, moves the task inside,
+ * and sets resource limits
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcSetContainerResources(virDomainDefPtr def)
+{
+ virCgroupPtr driver;
+ virCgroupPtr cgroup;
+ int rc = -1;
+ int i;
+ struct cgroup_device_policy devices[] = {
+ {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL},
+ {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO},
+ {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL},
+ {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM},
+ {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM},
+ {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_CONSOLE},
+ {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX},
+ {0, 0, 0}};
+
+ rc = virCgroupForDriver("lxc", &driver, 1, 0);
+ if (rc != 0) {
+ /* Skip all if no driver cgroup is configured */
+ if (rc == -ENXIO || rc == -ENOENT)
+ return 0;
+
+ virReportSystemError(NULL, -rc, "%s",
+ _("Unable to get cgroup for driver"));
+ return rc;
+ }
+
+ rc = virCgroupForDomain(driver, def->name, &cgroup, 1);
+ if (rc != 0) {
+ virReportSystemError(NULL, -rc,
+ _("Unable to create cgroup for domain %s"),
+ def->name);
+ goto cleanup;
+ }
+
+ rc = virCgroupSetMemory(cgroup, def->maxmem);
+ if (rc != 0) {
+ virReportSystemError(NULL, -rc,
+ _("Unable to set memory limit for domain %s"),
+ def->name);
+ goto cleanup;
+ }
+
+ rc = virCgroupDenyAllDevices(cgroup);
+ if (rc != 0) {
+ virReportSystemError(NULL, -rc,
+ _("Unable to deny devices for domain %s"),
+ def->name);
+ goto cleanup;
+ }
+
+ for (i = 0; devices[i].type != 0; i++) {
+ struct cgroup_device_policy *dev = &devices[i];
+ rc = virCgroupAllowDevice(cgroup,
+ dev->type,
+ dev->major,
+ dev->minor);
+ if (rc != 0) {
+ virReportSystemError(NULL, -rc,
+ _("Unable to allow device %c:%d:%d for domain %s"),
+ dev->type, dev->major, dev->minor, def->name);
+ goto cleanup;
+ }
+ }
+
+ rc = virCgroupAllowDeviceMajor(cgroup, 'c', LXC_DEV_MAJ_PTY);
+ if (rc != 0) {
+ virReportSystemError(NULL, -rc,
+ _("Unable to allow PYT devices for domain %s"),
+ def->name);
+ goto cleanup;
+ }
+
+ rc = virCgroupAddTask(cgroup, getpid());
+ if (rc != 0) {
+ virReportSystemError(NULL, -rc,
+ _("Unable to add task %d to cgroup for domain %s"),
+ getpid(), def->name);
+ }
+
+cleanup:
+ virCgroupFree(&driver);
+ virCgroupFree(&cgroup);
+
+ return rc;
+}
+
+static char*lxcMonitorPath(virDomainDefPtr def)
+{
+ char *sockpath;
+
+ if (virAsprintf(&sockpath, "%s/%s.sock",
+ LXC_STATE_DIR, def->name) < 0)
+ virReportOOMError(NULL);
+ return sockpath;
+}
+
+static int lxcMonitorServer(const char *sockpath)
+{
+ int fd;
+ struct sockaddr_un addr;
+
+ if ((fd = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to create server socket '%s'"),
+ sockpath);
+ goto error;
+ }
+
+ unlink(sockpath);
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+
+ if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to bind server socket '%s'"),
+ sockpath);
+ goto error;
+ }
+ if (listen(fd, 30 /* backlog */ ) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to listen server socket %s"),
+ sockpath);
+ goto error;
+ }
+
+ return fd;
+
+error:
+ if (fd != -1)
+ close(fd);
+ return -1;
+}
+
+/**
+ * lxcFdForward:
+ * @readFd: file descriptor to read
+ * @writeFd: file desriptor to write
+ *
+ * Reads 1 byte of data from readFd and writes to writeFd.
+ *
+ * Returns 0 on success, EAGAIN if returned on read, or -1 in case of error
+ */
+static int lxcFdForward(int readFd, int writeFd)
+{
+ int rc = -1;
+ char buf[2];
+
+ if (1 != (saferead(readFd, buf, 1))) {
+ if (EAGAIN == errno) {
+ rc = EAGAIN;
+ goto cleanup;
+ }
+
+ virReportSystemError(NULL, errno,
+ _("read of fd %d failed"),
+ readFd);
+ goto cleanup;
+ }
+
+ if (1 != (safewrite(writeFd, buf, 1))) {
+ virReportSystemError(NULL, errno,
+ _("write to fd %d failed"),
+ writeFd);
+ goto cleanup;
+ }
+
+ rc = 0;
+
+cleanup:
+ return rc;
+}
+
+
+static int lxcControllerClearCapabilities(void)
+{
+#if HAVE_CAPNG
+ int ret;
+
+ capng_clear(CAPNG_SELECT_BOTH);
+
+ if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to apply capabilities: %d"), ret);
+ return -1;
+ }
+#else
+ VIR_WARN0(_("libcap-ng support not compiled in, unable to clear capabilities"));
+#endif
+ return 0;
+}
+
+typedef struct _lxcTtyForwardFd_t {
+ int fd;
+ int active;
+} lxcTtyForwardFd_t;
+
+/**
+ * lxcTtyForward:
+ * @appPty: Open fd for application facing Pty
+ * @contPty: Open fd for container facing Pty
+ *
+ * Forwards traffic between fds. Data read from appPty will be written to contPty
+ * This process loops forever.
+ * This uses epoll in edge triggered mode to avoid a hard loop on POLLHUP
+ * events when the user disconnects the virsh console via ctrl-]
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcControllerMain(int monitor,
+ int client,
+ int appPty,
+ int contPty)
+{
+ int rc = -1;
+ int epollFd;
+ struct epoll_event epollEvent;
+ int numEvents;
+ int numActive = 0;
+ lxcTtyForwardFd_t fdArray[2];
+ int timeout = -1;
+ int curFdOff = 0;
+ int writeFdOff = 0;
+
+ fdArray[0].fd = appPty;
+ fdArray[0].active = 0;
+ fdArray[1].fd = contPty;
+ fdArray[1].active = 0;
+
+ /* create the epoll fild descriptor */
+ epollFd = epoll_create(2);
+ if (0 > epollFd) {
+ virReportSystemError(NULL, errno, "%s",
+ _("epoll_create(2) failed"));
+ goto cleanup;
+ }
+
+ /* add the file descriptors the epoll fd */
+ memset(&epollEvent, 0x00, sizeof(epollEvent));
+ epollEvent.events = EPOLLIN|EPOLLET; /* edge triggered */
+ epollEvent.data.fd = appPty;
+ if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, appPty, &epollEvent)) {
+ virReportSystemError(NULL, errno, "%s",
+ _("epoll_ctl(appPty) failed"));
+ goto cleanup;
+ }
+ epollEvent.data.fd = contPty;
+ if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, contPty, &epollEvent)) {
+ virReportSystemError(NULL, errno, "%s",
+ _("epoll_ctl(contPty) failed"));
+ goto cleanup;
+ }
+
+ epollEvent.events = EPOLLIN;
+ epollEvent.data.fd = monitor;
+ if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, monitor, &epollEvent)) {
+ virReportSystemError(NULL, errno, "%s",
+ _("epoll_ctl(contPty) failed"));
+ goto cleanup;
+ }
+
+ epollEvent.events = EPOLLHUP;
+ epollEvent.data.fd = client;
+ if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, client, &epollEvent)) {
+ virReportSystemError(NULL, errno, "%s",
+ _("epoll_ctl(contPty) failed"));
+ goto cleanup;
+ }
+
+ while (1) {
+ /* if active fd's, return if no events, else wait forever */
+ timeout = (numActive > 0) ? 0 : -1;
+ numEvents = epoll_wait(epollFd, &epollEvent, 1, timeout);
+ if (numEvents > 0) {
+ if (epollEvent.data.fd == monitor) {
+ int fd = accept(monitor, NULL, 0);
+ if (client != -1) { /* Already connected, so kick new one out */
+ close(fd);
+ continue;
+ }
+ client = fd;
+ epollEvent.events = EPOLLHUP;
+ epollEvent.data.fd = client;
+ if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, client, &epollEvent)) {
+ virReportSystemError(NULL, errno, "%s",
+ _("epoll_ctl(contPty) failed"));
+ goto cleanup;
+ }
+ } else if (client != -1 && epollEvent.data.fd == client) {
+ if (0 > epoll_ctl(epollFd, EPOLL_CTL_DEL, client, &epollEvent)) {
+ virReportSystemError(NULL, errno, "%s",
+ _("epoll_ctl(contPty) failed"));
+ goto cleanup;
+ }
+ close(client);
+ client = -1;
+ } else {
+ if (epollEvent.events & EPOLLIN) {
+ curFdOff = epollEvent.data.fd == appPty ? 0 : 1;
+ if (!fdArray[curFdOff].active) {
+ fdArray[curFdOff].active = 1;
+ ++numActive;
+ }
+ } else if (epollEvent.events & EPOLLHUP) {
+ DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd);
+ continue;
+ } else {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("error event %d"), epollEvent.events);
+ goto cleanup;
+ }
+ }
+ } else if (0 == numEvents) {
+ if (2 == numActive) {
+ /* both fds active, toggle between the two */
+ curFdOff ^= 1;
+ } else {
+ /* only one active, if current is active, use it, else it */
+ /* must be the other one (ie. curFd just went inactive) */
+ curFdOff = fdArray[curFdOff].active ? curFdOff : curFdOff ^ 1;
+ }
+
+ } else {
+ if (EINTR == errno) {
+ continue;
+ }
+
+ /* error */
+ virReportSystemError(NULL, errno, "%s",
+ _("epoll_wait() failed"));
+ goto cleanup;
+
+ }
+
+ if (0 < numActive) {
+ writeFdOff = curFdOff ^ 1;
+ rc = lxcFdForward(fdArray[curFdOff].fd, fdArray[writeFdOff].fd);
+
+ if (EAGAIN == rc) {
+ /* this fd no longer has data, set it as inactive */
+ --numActive;
+ fdArray[curFdOff].active = 0;
+ } else if (-1 == rc) {
+ goto cleanup;
+ }
+
+ }
+
+ }
+
+ rc = 0;
+
+cleanup:
+ close(appPty);
+ close(contPty);
+ close(epollFd);
+ return rc;
+}
+
+
+
+/**
+ * lxcControllerMoveInterfaces
+ * @nveths: number of interfaces
+ * @veths: interface names
+ * @container: pid of container
+ *
+ * Moves network interfaces into a container's namespace
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcControllerMoveInterfaces(unsigned int nveths,
+ char **veths,
+ pid_t container)
+{
+ unsigned int i;
+ for (i = 0 ; i < nveths ; i++)
+ if (moveInterfaceToNetNs(veths[i], container) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to move interface %s to ns %d"),
+ veths[i], container);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/**
+ * lxcCleanupInterfaces:
+ * @conn: pointer to connection
+ * @vm: pointer to virtual machine structure
+ *
+ * Cleans up the container interfaces by deleting the veth device pairs.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcControllerCleanupInterfaces(unsigned int nveths,
+ char **veths)
+{
+ unsigned int i;
+ for (i = 0 ; i < nveths ; i++)
+ if (vethDelete(veths[i]) < 0)
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to delete veth: %s"), veths[i]);
+ /* will continue to try to cleanup any other interfaces */
+
+ return 0;
+}
+
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+
+#ifndef MS_SLAVE
+#define MS_SLAVE (1<<19)
+#endif
+
+static int
+lxcControllerRun(virDomainDefPtr def,
+ unsigned int nveths,
+ char **veths,
+ int monitor,
+ int client,
+ int appPty)
+{
+ int rc = -1;
+ int control[2] = { -1, -1};
+ int containerPty;
+ char *containerPtyPath;
+ pid_t container = -1;
+ virDomainFSDefPtr root;
+ char *devpts = NULL;
+ char *devptmx = NULL;
+
+ if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("sockpair failed"));
+ goto cleanup;
+ }
+
+ root = virDomainGetRootFilesystem(def);
+
+ /*
+ * If doing a chroot style setup, we need to prepare
+ * a private /dev/pts for the child now, which they
+ * will later move into position.
+ *
+ * This is complex because 'virsh console' needs to
+ * use /dev/pts from the host OS, and the guest OS
+ * needs to use /dev/pts from the guest.
+ *
+ * This means that we (libvirt_lxc) need to see and
+ * use both /dev/pts instances. We're running in the
+ * host OS context though and don't want to expose
+ * the guest OS /dev/pts there.
+ *
+ * Thus we call unshare(CLONE_NS) so that we can see
+ * the guest's new /dev/pts, without it becoming
+ * visible to the host OS. We also put the root FS
+ * into slave mode, just in case it was currently
+ * marked as shared
+ */
+ if (root) {
+ VIR_DEBUG0("Setting up private /dev/pts");
+ if (unshare(CLONE_NEWNS) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("cannot unshare mount namespace"));
+ goto cleanup;
+ }
+
+ if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to switch root mount into slave mode"));
+ goto cleanup;
+ }
+
+ if (virAsprintf(&devpts, "%s/dev/pts", root->src) < 0 ||
+ virAsprintf(&devptmx, "%s/dev/pts/ptmx", root->src) < 0) {
+ virReportOOMError(NULL);
+ goto cleanup;
+ }
+
+ if (virFileMakePath(devpts) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to make path %s"),
+ devpts);
+ goto cleanup;
+ }
+
+ VIR_DEBUG("Mouting 'devpts' on %s", devpts);
+ if (mount("devpts", devpts, "devpts", 0, "newinstance,ptmxmode=0666") < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to mount devpts on %s"),
+ devpts);
+ goto cleanup;
+ }
+
+ if (access(devptmx, R_OK) < 0) {
+ VIR_WARN0("kernel does not support private devpts, using shared devpts");
+ VIR_FREE(devptmx);
+ }
+ }
+
+ if (devptmx) {
+ VIR_DEBUG("Opening tty on private %s", devptmx);
+ if (virFileOpenTtyAt(devptmx,
+ &containerPty,
+ &containerPtyPath,
+ 0) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to allocate tty"));
+ goto cleanup;
+ }
+ } else {
+ VIR_DEBUG0("Opening tty on shared /dev/ptmx");
+ if (virFileOpenTty(&containerPty,
+ &containerPtyPath,
+ 0) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to allocate tty"));
+ goto cleanup;
+ }
+ }
+
+
+ if (lxcSetContainerResources(def) < 0)
+ goto cleanup;
+
+ if ((container = lxcContainerStart(def,
+ nveths,
+ veths,
+ control[1],
+ containerPtyPath)) < 0)
+ goto cleanup;
+ close(control[1]);
+ control[1] = -1;
+
+ if (lxcControllerMoveInterfaces(nveths, veths, container) < 0)
+ goto cleanup;
+
+ if (lxcContainerSendContinue(control[0]) < 0)
+ goto cleanup;
+
+ /* Now the container is running, there's no need for us to keep
+ any elevated capabilities */
+ if (lxcControllerClearCapabilities() < 0)
+ goto cleanup;
+
+ rc = lxcControllerMain(monitor, client, appPty, containerPty);
+
+cleanup:
+ VIR_FREE(devptmx);
+ VIR_FREE(devpts);
+ if (control[0] != -1)
+ close(control[0]);
+ if (control[1] != -1)
+ close(control[1]);
+ VIR_FREE(containerPtyPath);
+ if (containerPty != -1)
+ close(containerPty);
+
+ if (container > 1) {
+ kill(container, SIGTERM);
+ waitpid(container, NULL, 0);
+ }
+ return rc;
+}
+
+
+int main(int argc, char *argv[])
+{
+ pid_t pid;
+ int rc = 1;
+ int client;
+ char *name = NULL;
+ int nveths = 0;
+ char **veths = NULL;
+ int monitor = -1;
+ int appPty = -1;
+ int bg = 0;
+ virCapsPtr caps = NULL;
+ virDomainDefPtr def = NULL;
+ char *configFile = NULL;
+ char *sockpath = NULL;
+ const struct option options[] = {
+ { "background", 0, NULL, 'b' },
+ { "name", 1, NULL, 'n' },
+ { "veth", 1, NULL, 'v' },
+ { "console", 1, NULL, 'c' },
+ { "help", 0, NULL, 'h' },
+ { 0, 0, 0, 0 },
+ };
+
+ while (1) {
+ int c;
+
+ c = getopt_long(argc, argv, "dn:v:m:c:h",
+ options, NULL);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'b':
+ bg = 1;
+ break;
+
+ case 'n':
+ if ((name = strdup(optarg)) == NULL) {
+ virReportOOMError(NULL);
+ goto cleanup;
+ }
+ break;
+
+ case 'v':
+ if (VIR_REALLOC_N(veths, nveths+1) < 0) {
+ virReportOOMError(NULL);
+ goto cleanup;
+ }
+ if ((veths[nveths++] = strdup(optarg)) == NULL) {
+ virReportOOMError(NULL);
+ goto cleanup;
+ }
+ break;
+
+ case 'c':
+ if (virStrToLong_i(optarg, NULL, 10, &appPty) < 0) {
+ fprintf(stderr, "malformed --console argument '%s'", optarg);
+ goto cleanup;
+ }
+ break;
+
+ case 'h':
+ case '?':
+ fprintf(stderr, "\n");
+ fprintf(stderr, "syntax: %s [OPTIONS]\n", argv[0]);
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Options\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " -b, --background\n");
+ fprintf(stderr, " -n NAME, --name NAME\n");
+ fprintf(stderr, " -c FD, --console FD\n");
+ fprintf(stderr, " -v VETH, --veth VETH\n");
+ fprintf(stderr, " -h, --help\n");
+ fprintf(stderr, "\n");
+ goto cleanup;
+ }
+ }
+
+
+ if (name == NULL) {
+ fprintf(stderr, "%s: missing --name argument for configuration\n", argv[0]);
+ goto cleanup;
+ }
+
+ if (appPty < 0) {
+ fprintf(stderr, "%s: missing --console argument for container PTY\n", argv[0]);
+ goto cleanup;
+ }
+
+ if (getuid() && 0) {
+ fprintf(stderr, "%s: must be run as the 'root' user\n", argv[0]);
+ goto cleanup;
+ }
+
+ if ((caps = lxcCapsInit()) == NULL)
+ goto cleanup;
+
+ if ((configFile = virDomainConfigFile(NULL,
+ LXC_STATE_DIR,
+ name)) == NULL)
+ goto cleanup;
+
+ if ((def = virDomainDefParseFile(NULL, caps, configFile,
+ VIR_DOMAIN_XML_INACTIVE)) == NULL)
+ goto cleanup;
+
+ if (def->nnets != nveths) {
+ fprintf(stderr, "%s: expecting %d veths, but got %d\n",
+ argv[0], def->nnets, nveths);
+ goto cleanup;
+ }
+
+ if ((sockpath = lxcMonitorPath(def)) == NULL)
+ goto cleanup;
+
+ if ((monitor = lxcMonitorServer(sockpath)) < 0)
+ goto cleanup;
+
+ if (bg) {
+ if ((pid = fork()) < 0)
+ goto cleanup;
+
+ if (pid > 0) {
+ if ((rc = virFileWritePid(LXC_STATE_DIR, name, pid)) != 0) {
+ virReportSystemError(NULL, rc,
+ _("Unable to write pid file '%s/%s.pid'"),
+ LXC_STATE_DIR, name);
+ _exit(1);
+ }
+
+ /* First child now exits, allowing original caller
+ * (ie libvirtd's LXC driver to complete their
+ * waitpid & continue */
+ _exit(0);
+ }
+
+ /* Don't hold onto any cwd we inherit from libvirtd either */
+ if (chdir("/") < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("Unable to change to root dir"));
+ goto cleanup;
+ }
+
+ if (setsid() < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("Unable to become session leader"));
+ goto cleanup;
+ }
+ }
+
+ /* Accept initial client which is the libvirtd daemon */
+ if ((client = accept(monitor, NULL, 0)) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("Failed connection from LXC driver"));
+ goto cleanup;
+ }
+
+ rc = lxcControllerRun(def, nveths, veths, monitor, client, appPty);
+
+
+cleanup:
+ if (def)
+ virFileDeletePid(LXC_STATE_DIR, def->name);
+ lxcControllerCleanupInterfaces(nveths, veths);
+ if (sockpath)
+ unlink(sockpath);
+ VIR_FREE(sockpath);
+
+ return rc;
+}
--- /dev/null
+/*
+ * Copyright IBM Corp. 2008
+ *
+ * lxc_driver.c: linux container driver functions
+ *
+ * Authors:
+ * David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <config.h>
+
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/utsname.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/poll.h>
+#include <unistd.h>
+#include <wait.h>
+
+#include "virterror_internal.h"
+#include "logging.h"
+#include "datatypes.h"
+#include "lxc_conf.h"
+#include "lxc_container.h"
+#include "lxc_driver.h"
+#include "memory.h"
+#include "util.h"
+#include "bridge.h"
+#include "veth.h"
+#include "event.h"
+#include "nodeinfo.h"
+#include "uuid.h"
+
+
+#define VIR_FROM_THIS VIR_FROM_LXC
+
+static int lxcStartup(int privileged);
+static int lxcShutdown(void);
+static lxc_driver_t *lxc_driver = NULL;
+
+/* Functions */
+
+static void lxcDriverLock(lxc_driver_t *driver)
+{
+ virMutexLock(&driver->lock);
+}
+static void lxcDriverUnlock(lxc_driver_t *driver)
+{
+ virMutexUnlock(&driver->lock);
+}
+
+static void lxcDomainEventFlush(int timer, void *opaque);
+static void lxcDomainEventQueue(lxc_driver_t *driver,
+ virDomainEventPtr event);
+
+
+static virDrvOpenStatus lxcOpen(virConnectPtr conn,
+ virConnectAuthPtr auth ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED)
+{
+ /* Verify uri was specified */
+ if (conn->uri == NULL) {
+ if (lxc_driver == NULL)
+ return VIR_DRV_OPEN_DECLINED;
+
+ conn->uri = xmlParseURI("lxc:///");
+ if (!conn->uri) {
+ virReportOOMError(conn);
+ return VIR_DRV_OPEN_ERROR;
+ }
+ } else {
+ if (conn->uri->scheme == NULL ||
+ STRNEQ(conn->uri->scheme, "lxc"))
+ return VIR_DRV_OPEN_DECLINED;
+
+ /* Leave for remote driver */
+ if (conn->uri->server != NULL)
+ return VIR_DRV_OPEN_DECLINED;
+
+ /* If path isn't '/' then they typoed, tell them correct path */
+ if (STRNEQ(conn->uri->path, "/")) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("unexpected LXC URI path '%s', try lxc:///"),
+ conn->uri->path);
+ return VIR_DRV_OPEN_ERROR;
+ }
+
+ /* URI was good, but driver isn't active */
+ if (lxc_driver == NULL) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ "%s", _("lxc state driver is not active"));
+ return VIR_DRV_OPEN_ERROR;
+ }
+ }
+
+ conn->privateData = lxc_driver;
+
+ return VIR_DRV_OPEN_SUCCESS;
+}
+
+static int lxcClose(virConnectPtr conn)
+{
+ lxc_driver_t *driver = conn->privateData;
+
+ lxcDriverLock(driver);
+ virDomainEventCallbackListRemoveConn(conn, driver->domainEventCallbacks);
+ lxcDriverUnlock(driver);
+
+ conn->privateData = NULL;
+ return 0;
+}
+
+static char *lxcGetCapabilities(virConnectPtr conn) {
+ lxc_driver_t *driver = conn->privateData;
+ char *xml;
+
+ lxcDriverLock(driver);
+ if ((xml = virCapabilitiesFormatXML(driver->caps)) == NULL)
+ virReportOOMError(conn);
+ lxcDriverUnlock(driver);
+
+ return xml;
+}
+
+
+static virDomainPtr lxcDomainLookupByID(virConnectPtr conn,
+ int id)
+{
+ lxc_driver_t *driver = conn->privateData;
+ virDomainObjPtr vm;
+ virDomainPtr dom = NULL;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByID(&driver->domains, id);
+ lxcDriverUnlock(driver);
+
+ if (!vm) {
+ lxcError(conn, NULL, VIR_ERR_NO_DOMAIN, NULL);
+ goto cleanup;
+ }
+
+ dom = virGetDomain(conn, vm->def->name, vm->def->uuid);
+ if (dom)
+ dom->id = vm->def->id;
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ return dom;
+}
+
+static virDomainPtr lxcDomainLookupByUUID(virConnectPtr conn,
+ const unsigned char *uuid)
+{
+ lxc_driver_t *driver = conn->privateData;
+ virDomainObjPtr vm;
+ virDomainPtr dom = NULL;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByUUID(&driver->domains, uuid);
+ lxcDriverUnlock(driver);
+
+ if (!vm) {
+ lxcError(conn, NULL, VIR_ERR_NO_DOMAIN, NULL);
+ goto cleanup;
+ }
+
+ dom = virGetDomain(conn, vm->def->name, vm->def->uuid);
+ if (dom)
+ dom->id = vm->def->id;
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ return dom;
+}
+
+static virDomainPtr lxcDomainLookupByName(virConnectPtr conn,
+ const char *name)
+{
+ lxc_driver_t *driver = conn->privateData;
+ virDomainObjPtr vm;
+ virDomainPtr dom = NULL;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByName(&driver->domains, name);
+ lxcDriverUnlock(driver);
+ if (!vm) {
+ lxcError(conn, NULL, VIR_ERR_NO_DOMAIN, NULL);
+ goto cleanup;
+ }
+
+ dom = virGetDomain(conn, vm->def->name, vm->def->uuid);
+ if (dom)
+ dom->id = vm->def->id;
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ return dom;
+}
+
+static int lxcListDomains(virConnectPtr conn, int *ids, int nids) {
+ lxc_driver_t *driver = conn->privateData;
+ int got = 0, i;
+
+ lxcDriverLock(driver);
+ for (i = 0 ; i < driver->domains.count && got < nids ; i++) {
+ virDomainObjLock(driver->domains.objs[i]);
+ if (virDomainIsActive(driver->domains.objs[i]))
+ ids[got++] = driver->domains.objs[i]->def->id;
+ virDomainObjUnlock(driver->domains.objs[i]);
+ }
+ lxcDriverUnlock(driver);
+
+ return got;
+}
+
+static int lxcNumDomains(virConnectPtr conn) {
+ lxc_driver_t *driver = conn->privateData;
+ int n = 0, i;
+
+ lxcDriverLock(driver);
+ for (i = 0 ; i < driver->domains.count ; i++) {
+ virDomainObjLock(driver->domains.objs[i]);
+ if (virDomainIsActive(driver->domains.objs[i]))
+ n++;
+ virDomainObjUnlock(driver->domains.objs[i]);
+ }
+ lxcDriverUnlock(driver);
+
+ return n;
+}
+
+static int lxcListDefinedDomains(virConnectPtr conn,
+ char **const names, int nnames) {
+ lxc_driver_t *driver = conn->privateData;
+ int got = 0, i;
+
+ lxcDriverLock(driver);
+ for (i = 0 ; i < driver->domains.count && got < nnames ; i++) {
+ virDomainObjLock(driver->domains.objs[i]);
+ if (!virDomainIsActive(driver->domains.objs[i])) {
+ if (!(names[got++] = strdup(driver->domains.objs[i]->def->name))) {
+ virReportOOMError(conn);
+ virDomainObjUnlock(driver->domains.objs[i]);
+ goto cleanup;
+ }
+ }
+ virDomainObjUnlock(driver->domains.objs[i]);
+ }
+ lxcDriverUnlock(driver);
+
+ return got;
+
+ cleanup:
+ for (i = 0 ; i < got ; i++)
+ VIR_FREE(names[i]);
+ lxcDriverUnlock(driver);
+ return -1;
+}
+
+
+static int lxcNumDefinedDomains(virConnectPtr conn) {
+ lxc_driver_t *driver = conn->privateData;
+ int n = 0, i;
+
+ lxcDriverLock(driver);
+ for (i = 0 ; i < driver->domains.count ; i++) {
+ virDomainObjLock(driver->domains.objs[i]);
+ if (!virDomainIsActive(driver->domains.objs[i]))
+ n++;
+ virDomainObjUnlock(driver->domains.objs[i]);
+ }
+ lxcDriverUnlock(driver);
+
+ return n;
+}
+
+
+
+static virDomainPtr lxcDomainDefine(virConnectPtr conn, const char *xml)
+{
+ lxc_driver_t *driver = conn->privateData;
+ virDomainDefPtr def = NULL;
+ virDomainObjPtr vm = NULL;
+ virDomainPtr dom = NULL;
+ virDomainEventPtr event = NULL;
+ int newVM = 1;
+
+ lxcDriverLock(driver);
+ if (!(def = virDomainDefParseString(conn, driver->caps, xml,
+ VIR_DOMAIN_XML_INACTIVE)))
+ goto cleanup;
+
+ /* See if a VM with matching UUID already exists */
+ vm = virDomainFindByUUID(&driver->domains, def->uuid);
+ if (vm) {
+ /* UUID matches, but if names don't match, refuse it */
+ if (STRNEQ(vm->def->name, def->name)) {
+ char uuidstr[VIR_UUID_STRING_BUFLEN];
+ virUUIDFormat(vm->def->uuid, uuidstr);
+ lxcError(conn, NULL, VIR_ERR_OPERATION_FAILED,
+ _("domain '%s' is already defined with uuid %s"),
+ vm->def->name, uuidstr);
+ goto cleanup;
+ }
+
+ /* UUID & name match */
+ virDomainObjUnlock(vm);
+ newVM = 0;
+ } else {
+ /* UUID does not match, but if a name matches, refuse it */
+ vm = virDomainFindByName(&driver->domains, def->name);
+ if (vm) {
+ char uuidstr[VIR_UUID_STRING_BUFLEN];
+ virUUIDFormat(vm->def->uuid, uuidstr);
+ lxcError(conn, NULL, VIR_ERR_OPERATION_FAILED,
+ _("domain '%s' is already defined with uuid %s"),
+ def->name, uuidstr);
+ goto cleanup;
+ }
+ }
+
+ if ((def->nets != NULL) && !(driver->have_netns)) {
+ lxcError(conn, NULL, VIR_ERR_NO_SUPPORT,
+ "%s", _("System lacks NETNS support"));
+ goto cleanup;
+ }
+
+ if (!(vm = virDomainAssignDef(conn, &driver->domains, def)))
+ goto cleanup;
+ def = NULL;
+ vm->persistent = 1;
+
+ if (virDomainSaveConfig(conn,
+ driver->configDir,
+ vm->newDef ? vm->newDef : vm->def) < 0) {
+ virDomainRemoveInactive(&driver->domains, vm);
+ vm = NULL;
+ goto cleanup;
+ }
+
+ event = virDomainEventNewFromObj(vm,
+ VIR_DOMAIN_EVENT_DEFINED,
+ newVM ?
+ VIR_DOMAIN_EVENT_DEFINED_ADDED :
+ VIR_DOMAIN_EVENT_DEFINED_UPDATED);
+
+ dom = virGetDomain(conn, vm->def->name, vm->def->uuid);
+ if (dom)
+ dom->id = vm->def->id;
+
+cleanup:
+ virDomainDefFree(def);
+ if (vm)
+ virDomainObjUnlock(vm);
+ if (event)
+ lxcDomainEventQueue(driver, event);
+ lxcDriverUnlock(driver);
+ return dom;
+}
+
+static int lxcDomainUndefine(virDomainPtr dom)
+{
+ lxc_driver_t *driver = dom->conn->privateData;
+ virDomainObjPtr vm;
+ virDomainEventPtr event = NULL;
+ int ret = -1;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByUUID(&driver->domains, dom->uuid);
+ if (!vm) {
+ lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
+ "%s", _("no domain with matching uuid"));
+ goto cleanup;
+ }
+
+ if (virDomainIsActive(vm)) {
+ lxcError(dom->conn, dom, VIR_ERR_OPERATION_INVALID,
+ "%s", _("cannot delete active domain"));
+ goto cleanup;
+ }
+
+ if (!vm->persistent) {
+ lxcError(dom->conn, dom, VIR_ERR_OPERATION_INVALID,
+ "%s", _("cannot undefine transient domain"));
+ goto cleanup;
+ }
+
+ if (virDomainDeleteConfig(dom->conn,
+ driver->configDir,
+ driver->autostartDir,
+ vm) < 0)
+ goto cleanup;
+
+ event = virDomainEventNewFromObj(vm,
+ VIR_DOMAIN_EVENT_UNDEFINED,
+ VIR_DOMAIN_EVENT_UNDEFINED_REMOVED);
+
+ virDomainRemoveInactive(&driver->domains, vm);
+ vm = NULL;
+ ret = 0;
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ if (event)
+ lxcDomainEventQueue(driver, event);
+ lxcDriverUnlock(driver);
+ return ret;
+}
+
+static int lxcDomainGetInfo(virDomainPtr dom,
+ virDomainInfoPtr info)
+{
+ lxc_driver_t *driver = dom->conn->privateData;
+ virDomainObjPtr vm;
+ virCgroupPtr cgroup = NULL;
+ int ret = -1;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByUUID(&driver->domains, dom->uuid);
+
+ if (!vm) {
+ lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
+ "%s", _("no domain with matching uuid"));
+ goto cleanup;
+ }
+
+ info->state = vm->state;
+
+ if (!virDomainIsActive(vm) || driver->cgroup == NULL) {
+ info->cpuTime = 0;
+ } else {
+ if (virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) != 0) {
+ lxcError(dom->conn, dom, VIR_ERR_INTERNAL_ERROR,
+ _("Unable to get cgroup for %s\n"), vm->def->name);
+ goto cleanup;
+ }
+
+ if (virCgroupGetCpuacctUsage(cgroup, &(info->cpuTime)) < 0) {
+ lxcError(dom->conn, dom, VIR_ERR_OPERATION_FAILED, ("cannot read cputime for domain"));
+ goto cleanup;
+ }
+ }
+
+ info->maxMem = vm->def->maxmem;
+ info->memory = vm->def->memory;
+ info->nrVirtCpu = 1;
+ ret = 0;
+
+cleanup:
+ lxcDriverUnlock(driver);
+ if (cgroup)
+ virCgroupFree(&cgroup);
+ if (vm)
+ virDomainObjUnlock(vm);
+ return ret;
+}
+
+static char *lxcGetOSType(virDomainPtr dom)
+{
+ lxc_driver_t *driver = dom->conn->privateData;
+ virDomainObjPtr vm;
+ char *ret = NULL;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByUUID(&driver->domains, dom->uuid);
+ lxcDriverUnlock(driver);
+
+ if (!vm) {
+ lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
+ "%s", _("no domain with matching uuid"));
+ goto cleanup;
+ }
+
+ ret = strdup(vm->def->os.type);
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ return ret;
+}
+
+static char *lxcDomainDumpXML(virDomainPtr dom,
+ int flags)
+{
+ lxc_driver_t *driver = dom->conn->privateData;
+ virDomainObjPtr vm;
+ char *ret = NULL;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByUUID(&driver->domains, dom->uuid);
+ lxcDriverUnlock(driver);
+
+ if (!vm) {
+ lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
+ "%s", _("no domain with matching uuid"));
+ goto cleanup;
+ }
+
+ ret = virDomainDefFormat(dom->conn,
+ (flags & VIR_DOMAIN_XML_INACTIVE) &&
+ vm->newDef ? vm->newDef : vm->def,
+ flags);
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ return ret;
+}
+
+
+/**
+ * lxcVmCleanup:
+ * @vm: Ptr to VM to clean up
+ *
+ * waitpid() on the container process. kill and wait the tty process
+ * This is called by both lxcDomainDestroy and lxcSigHandler when a
+ * container exits.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcVMCleanup(virConnectPtr conn,
+ lxc_driver_t *driver,
+ virDomainObjPtr vm)
+{
+ int rc = -1;
+ int waitRc;
+ int childStatus = -1;
+ virCgroupPtr cgroup;
+ int i;
+
+ while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) &&
+ errno == EINTR)
+ ; /* empty */
+
+ if ((waitRc != vm->pid) && (errno != ECHILD)) {
+ virReportSystemError(conn, errno,
+ _("waitpid failed to wait for container %d: %d"),
+ vm->pid, waitRc);
+ }
+
+ rc = 0;
+
+ if (WIFEXITED(childStatus)) {
+ rc = WEXITSTATUS(childStatus);
+ DEBUG("container exited with rc: %d", rc);
+ }
+
+ virEventRemoveHandle(vm->monitorWatch);
+ close(vm->monitor);
+
+ virFileDeletePid(driver->stateDir, vm->def->name);
+ virDomainDeleteConfig(conn, driver->stateDir, NULL, vm);
+
+ vm->state = VIR_DOMAIN_SHUTOFF;
+ vm->pid = -1;
+ vm->def->id = -1;
+ vm->monitor = -1;
+
+ for (i = 0 ; i < vm->def->nnets ; i++) {
+ vethInterfaceUpOrDown(vm->def->nets[i]->ifname, 0);
+ vethDelete(vm->def->nets[i]->ifname);
+ }
+
+ if (driver->cgroup &&
+ virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) == 0) {
+ virCgroupRemove(cgroup);
+ virCgroupFree(&cgroup);
+ }
+
+ if (vm->newDef) {
+ virDomainDefFree(vm->def);
+ vm->def = vm->newDef;
+ vm->def->id = -1;
+ vm->newDef = NULL;
+ }
+
+ return rc;
+}
+
+/**
+ * lxcSetupInterfaces:
+ * @def: pointer to virtual machine structure
+ *
+ * Sets up the container interfaces by creating the veth device pairs and
+ * attaching the parent end to the appropriate bridge. The container end
+ * will moved into the container namespace later after clone has been called.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcSetupInterfaces(virConnectPtr conn,
+ virDomainDefPtr def,
+ unsigned int *nveths,
+ char ***veths)
+{
+ int rc = -1, i;
+ char *bridge = NULL;
+ char parentVeth[PATH_MAX] = "";
+ char containerVeth[PATH_MAX] = "";
+ brControl *brctl = NULL;
+
+ if (brInit(&brctl) != 0)
+ return -1;
+
+ for (i = 0 ; i < def->nnets ; i++) {
+ switch (def->nets[i]->type) {
+ case VIR_DOMAIN_NET_TYPE_NETWORK:
+ {
+ virNetworkPtr network = virNetworkLookupByName(conn,
+ def->nets[i]->data.network.name);
+ if (!network) {
+ goto error_exit;
+ }
+
+ bridge = virNetworkGetBridgeName(network);
+
+ virNetworkFree(network);
+ break;
+ }
+ case VIR_DOMAIN_NET_TYPE_BRIDGE:
+ bridge = def->nets[i]->data.bridge.brname;
+ break;
+ }
+
+ DEBUG("bridge: %s", bridge);
+ if (NULL == bridge) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ "%s", _("failed to get bridge for interface"));
+ goto error_exit;
+ }
+
+ DEBUG0("calling vethCreate()");
+ if (NULL != def->nets[i]->ifname) {
+ strcpy(parentVeth, def->nets[i]->ifname);
+ }
+ DEBUG("parentVeth: %s, containerVeth: %s", parentVeth, containerVeth);
+ if (0 != (rc = vethCreate(parentVeth, PATH_MAX, containerVeth, PATH_MAX))) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to create veth device pair: %d"), rc);
+ goto error_exit;
+ }
+ if (NULL == def->nets[i]->ifname) {
+ def->nets[i]->ifname = strdup(parentVeth);
+ }
+ if (VIR_REALLOC_N(*veths, (*nveths)+1) < 0)
+ goto error_exit;
+ if (((*veths)[(*nveths)++] = strdup(containerVeth)) == NULL)
+ goto error_exit;
+
+ if (NULL == def->nets[i]->ifname) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ "%s", _("failed to allocate veth names"));
+ goto error_exit;
+ }
+
+ if (0 != (rc = brAddInterface(brctl, bridge, parentVeth))) {
+ virReportSystemError(conn, rc,
+ _("failed to add %s device to %s"),
+ parentVeth, bridge);
+ goto error_exit;
+ }
+
+ if (0 != (rc = vethInterfaceUpOrDown(parentVeth, 1))) {
+ virReportSystemError(conn, rc, "%s",
+ _("failed to enable parent ns veth device"));
+ goto error_exit;
+ }
+
+ }
+
+ rc = 0;
+
+error_exit:
+ brShutdown(brctl);
+ return rc;
+}
+
+
+static int lxcMonitorClient(virConnectPtr conn,
+ lxc_driver_t * driver,
+ virDomainObjPtr vm)
+{
+ char *sockpath = NULL;
+ int fd;
+ struct sockaddr_un addr;
+
+ if (virAsprintf(&sockpath, "%s/%s.sock",
+ driver->stateDir, vm->def->name) < 0) {
+ virReportOOMError(conn);
+ return -1;
+ }
+
+ if ((fd = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
+ virReportSystemError(conn, errno, "%s",
+ _("failed to create client socket"));
+ goto error;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+
+ if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
+ virReportSystemError(conn, errno, "%s",
+ _("failed to connect to client socket"));
+ goto error;
+ }
+
+ VIR_FREE(sockpath);
+ return fd;
+
+error:
+ VIR_FREE(sockpath);
+ if (fd != -1)
+ close(fd);
+ return -1;
+}
+
+
+static int lxcVmTerminate(virConnectPtr conn,
+ lxc_driver_t *driver,
+ virDomainObjPtr vm,
+ int signum)
+{
+ if (signum == 0)
+ signum = SIGINT;
+
+ if (vm->pid <= 0) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("invalid PID %d for container"), vm->pid);
+ return -1;
+ }
+
+ if (kill(vm->pid, signum) < 0) {
+ if (errno != ESRCH) {
+ virReportSystemError(conn, errno,
+ _("failed to kill pid %d"),
+ vm->pid);
+ return -1;
+ }
+ }
+
+ vm->state = VIR_DOMAIN_SHUTDOWN;
+
+ return lxcVMCleanup(conn, driver, vm);
+}
+
+static void lxcMonitorEvent(int watch,
+ int fd,
+ int events ATTRIBUTE_UNUSED,
+ void *data)
+{
+ lxc_driver_t *driver = data;
+ virDomainObjPtr vm = NULL;
+ virDomainEventPtr event = NULL;
+ unsigned int i;
+
+ lxcDriverLock(driver);
+ for (i = 0 ; i < driver->domains.count ; i++) {
+ virDomainObjPtr tmpvm = driver->domains.objs[i];
+ virDomainObjLock(tmpvm);
+ if (tmpvm->monitorWatch == watch) {
+ vm = tmpvm;
+ break;
+ }
+ virDomainObjUnlock(tmpvm);
+ }
+ if (!vm) {
+ virEventRemoveHandle(watch);
+ goto cleanup;
+ }
+
+ if (vm->monitor != fd) {
+ virEventRemoveHandle(watch);
+ goto cleanup;
+ }
+
+ if (lxcVmTerminate(NULL, driver, vm, SIGINT) < 0) {
+ virEventRemoveHandle(watch);
+ } else {
+ event = virDomainEventNewFromObj(vm,
+ VIR_DOMAIN_EVENT_STOPPED,
+ VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
+ }
+ if (!vm->persistent) {
+ virDomainRemoveInactive(&driver->domains, vm);
+ vm = NULL;
+ }
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ if (event)
+ lxcDomainEventQueue(driver, event);
+ lxcDriverUnlock(driver);
+}
+
+
+static int lxcControllerStart(virConnectPtr conn,
+ virDomainObjPtr vm,
+ int nveths,
+ char **veths,
+ int appPty,
+ int logfd)
+{
+ int i;
+ int rc;
+ int ret = -1;
+ int largc = 0, larga = 0;
+ const char **largv = NULL;
+ pid_t child;
+ int status;
+ fd_set keepfd;
+ char appPtyStr[30];
+ const char *emulator;
+
+ FD_ZERO(&keepfd);
+
+#define ADD_ARG_SPACE \
+ do { \
+ if (largc == larga) { \
+ larga += 10; \
+ if (VIR_REALLOC_N(largv, larga) < 0) \
+ goto no_memory; \
+ } \
+ } while (0)
+
+#define ADD_ARG(thisarg) \
+ do { \
+ ADD_ARG_SPACE; \
+ largv[largc++] = thisarg; \
+ } while (0)
+
+#define ADD_ARG_LIT(thisarg) \
+ do { \
+ ADD_ARG_SPACE; \
+ if ((largv[largc++] = strdup(thisarg)) == NULL) \
+ goto no_memory; \
+ } while (0)
+
+ snprintf(appPtyStr, sizeof(appPtyStr), "%d", appPty);
+
+ emulator = vm->def->emulator;
+
+ ADD_ARG_LIT(emulator);
+ ADD_ARG_LIT("--name");
+ ADD_ARG_LIT(vm->def->name);
+ ADD_ARG_LIT("--console");
+ ADD_ARG_LIT(appPtyStr);
+ ADD_ARG_LIT("--background");
+
+ for (i = 0 ; i < nveths ; i++) {
+ ADD_ARG_LIT("--veth");
+ ADD_ARG_LIT(veths[i]);
+ }
+
+ ADD_ARG(NULL);
+
+ FD_SET(appPty, &keepfd);
+
+ if (virExec(conn, largv, NULL, &keepfd, &child,
+ -1, &logfd, &logfd,
+ VIR_EXEC_NONE) < 0)
+ goto cleanup;
+
+ /* We now wait for the process to exit - the controller
+ * will fork() itself into the background - waiting for
+ * it to exit thus guarentees it has written its pidfile
+ */
+ while ((rc = waitpid(child, &status, 0) == -1) && errno == EINTR);
+ if (rc == -1) {
+ virReportSystemError(conn, errno,
+ _("cannot wait for '%s'"),
+ largv[0]);
+ goto cleanup;
+ }
+
+ if (!(WIFEXITED(status) && WEXITSTATUS(status) == 0)) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("container '%s' unexpectedly shutdown during startup"),
+ largv[0]);
+ goto cleanup;
+ }
+
+#undef ADD_ARG
+#undef ADD_ARG_LIT
+#undef ADD_ARG_SPACE
+
+ ret = 0;
+
+cleanup:
+ for (i = 0 ; i < largc ; i++)
+ VIR_FREE(largv[i]);
+
+ return ret;
+
+no_memory:
+ virReportOOMError(conn);
+ goto cleanup;
+}
+
+
+/**
+ * lxcVmStart:
+ * @conn: pointer to connection
+ * @driver: pointer to driver structure
+ * @vm: pointer to virtual machine structure
+ *
+ * Starts a vm
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcVmStart(virConnectPtr conn,
+ lxc_driver_t * driver,
+ virDomainObjPtr vm)
+{
+ int rc = -1;
+ unsigned int i;
+ int parentTty;
+ char *parentTtyPath = NULL;
+ char *logfile = NULL;
+ int logfd = -1;
+ unsigned int nveths = 0;
+ char **veths = NULL;
+
+ if ((rc = virFileMakePath(driver->logDir)) < 0) {
+ virReportSystemError(conn, rc,
+ _("cannot create log directory '%s'"),
+ driver->logDir);
+ return -1;
+ }
+
+ if (virAsprintf(&logfile, "%s/%s.log",
+ driver->logDir, vm->def->name) < 0) {
+ virReportOOMError(conn);
+ return -1;
+ }
+
+ /* open parent tty */
+ if (virFileOpenTty(&parentTty, &parentTtyPath, 1) < 0) {
+ virReportSystemError(conn, errno, "%s",
+ _("failed to allocate tty"));
+ goto cleanup;
+ }
+ if (vm->def->console &&
+ vm->def->console->type == VIR_DOMAIN_CHR_TYPE_PTY) {
+ VIR_FREE(vm->def->console->data.file.path);
+ vm->def->console->data.file.path = parentTtyPath;
+ } else {
+ VIR_FREE(parentTtyPath);
+ }
+
+ if (lxcSetupInterfaces(conn, vm->def, &nveths, &veths) != 0)
+ goto cleanup;
+
+ /* Persist the live configuration now we have veth & tty info */
+ if (virDomainSaveConfig(conn, driver->stateDir, vm->def) < 0) {
+ rc = -1;
+ goto cleanup;
+ }
+
+ if ((logfd = open(logfile, O_WRONLY | O_TRUNC | O_CREAT,
+ S_IRUSR|S_IWUSR)) < 0) {
+ virReportSystemError(conn, errno,
+ _("failed to open '%s'"),
+ logfile);
+ goto cleanup;
+ }
+
+ if (lxcControllerStart(conn,
+ vm,
+ nveths, veths,
+ parentTty, logfd) < 0)
+ goto cleanup;
+
+ /* Connect to the controller as a client *first* because
+ * this will block until the child has written their
+ * pid file out to disk */
+ if ((vm->monitor = lxcMonitorClient(conn, driver, vm)) < 0)
+ goto cleanup;
+
+ /* And get its pid */
+ if ((rc = virFileReadPid(driver->stateDir, vm->def->name, &vm->pid)) != 0) {
+ virReportSystemError(conn, rc,
+ _("Failed to read pid file %s/%s.pid"),
+ driver->stateDir, vm->def->name);
+ rc = -1;
+ goto cleanup;
+ }
+
+ vm->def->id = vm->pid;
+ vm->state = VIR_DOMAIN_RUNNING;
+
+ if ((vm->monitorWatch = virEventAddHandle(
+ vm->monitor,
+ VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
+ lxcMonitorEvent,
+ driver, NULL)) < 0) {
+ lxcVmTerminate(conn, driver, vm, 0);
+ goto cleanup;
+ }
+
+ rc = 0;
+
+cleanup:
+ for (i = 0 ; i < nveths ; i++) {
+ if (rc != 0)
+ vethDelete(veths[i]);
+ VIR_FREE(veths[i]);
+ }
+ if (rc != 0 && vm->monitor != -1) {
+ close(vm->monitor);
+ vm->monitor = -1;
+ }
+ if (parentTty != -1)
+ close(parentTty);
+ if (logfd != -1)
+ close(logfd);
+ VIR_FREE(logfile);
+ return rc;
+}
+
+/**
+ * lxcDomainStart:
+ * @dom: domain to start
+ *
+ * Looks up domain and starts it.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcDomainStart(virDomainPtr dom)
+{
+ lxc_driver_t *driver = dom->conn->privateData;
+ virDomainObjPtr vm;
+ virDomainEventPtr event = NULL;
+ int ret = -1;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByName(&driver->domains, dom->name);
+ if (!vm) {
+ lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
+ _("no domain named %s"), dom->name);
+ goto cleanup;
+ }
+
+ if ((vm->def->nets != NULL) && !(driver->have_netns)) {
+ lxcError(dom->conn, NULL, VIR_ERR_NO_SUPPORT,
+ "%s", _("System lacks NETNS support"));
+ goto cleanup;
+ }
+
+ ret = lxcVmStart(dom->conn, driver, vm);
+
+ if (ret == 0)
+ event = virDomainEventNewFromObj(vm,
+ VIR_DOMAIN_EVENT_STARTED,
+ VIR_DOMAIN_EVENT_STARTED_BOOTED);
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ if (event)
+ lxcDomainEventQueue(driver, event);
+ lxcDriverUnlock(driver);
+ return ret;
+}
+
+/**
+ * lxcDomainCreateAndStart:
+ * @conn: pointer to connection
+ * @xml: XML definition of domain
+ * @flags: Unused
+ *
+ * Creates a domain based on xml and starts it
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static virDomainPtr
+lxcDomainCreateAndStart(virConnectPtr conn,
+ const char *xml,
+ unsigned int flags ATTRIBUTE_UNUSED) {
+ lxc_driver_t *driver = conn->privateData;
+ virDomainObjPtr vm = NULL;
+ virDomainDefPtr def;
+ virDomainPtr dom = NULL;
+ virDomainEventPtr event = NULL;
+
+ lxcDriverLock(driver);
+ if (!(def = virDomainDefParseString(conn, driver->caps, xml,
+ VIR_DOMAIN_XML_INACTIVE)))
+ goto cleanup;
+
+ /* See if a VM with matching UUID already exists */
+ vm = virDomainFindByUUID(&driver->domains, def->uuid);
+ if (vm) {
+ /* UUID matches, but if names don't match, refuse it */
+ if (STRNEQ(vm->def->name, def->name)) {
+ char uuidstr[VIR_UUID_STRING_BUFLEN];
+ virUUIDFormat(vm->def->uuid, uuidstr);
+ lxcError(conn, NULL, VIR_ERR_OPERATION_FAILED,
+ _("domain '%s' is already defined with uuid %s"),
+ vm->def->name, uuidstr);
+ goto cleanup;
+ }
+
+ /* UUID & name match, but if VM is already active, refuse it */
+ if (virDomainIsActive(vm)) {
+ lxcError(conn, NULL, VIR_ERR_OPERATION_FAILED,
+ _("domain is already active as '%s'"), vm->def->name);
+ goto cleanup;
+ }
+ virDomainObjUnlock(vm);
+ } else {
+ /* UUID does not match, but if a name matches, refuse it */
+ vm = virDomainFindByName(&driver->domains, def->name);
+ if (vm) {
+ char uuidstr[VIR_UUID_STRING_BUFLEN];
+ virUUIDFormat(vm->def->uuid, uuidstr);
+ lxcError(conn, NULL, VIR_ERR_OPERATION_FAILED,
+ _("domain '%s' is already defined with uuid %s"),
+ def->name, uuidstr);
+ goto cleanup;
+ }
+ }
+
+ if ((def->nets != NULL) && !(driver->have_netns)) {
+ lxcError(conn, NULL, VIR_ERR_NO_SUPPORT,
+ "%s", _("System lacks NETNS support"));
+ goto cleanup;
+ }
+
+
+ if (!(vm = virDomainAssignDef(conn, &driver->domains, def)))
+ goto cleanup;
+ def = NULL;
+
+ if (lxcVmStart(conn, driver, vm) < 0) {
+ virDomainRemoveInactive(&driver->domains, vm);
+ vm = NULL;
+ goto cleanup;
+ }
+
+ event = virDomainEventNewFromObj(vm,
+ VIR_DOMAIN_EVENT_STARTED,
+ VIR_DOMAIN_EVENT_STARTED_BOOTED);
+
+ dom = virGetDomain(conn, vm->def->name, vm->def->uuid);
+ if (dom)
+ dom->id = vm->def->id;
+
+cleanup:
+ virDomainDefFree(def);
+ if (vm)
+ virDomainObjUnlock(vm);
+ if (event)
+ lxcDomainEventQueue(driver, event);
+ lxcDriverUnlock(driver);
+ return dom;
+}
+
+/**
+ * lxcDomainShutdown:
+ * @dom: Ptr to domain to shutdown
+ *
+ * Sends SIGINT to container root process to request it to shutdown
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcDomainShutdown(virDomainPtr dom)
+{
+ lxc_driver_t *driver = dom->conn->privateData;
+ virDomainObjPtr vm;
+ virDomainEventPtr event = NULL;
+ int ret = -1;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByID(&driver->domains, dom->id);
+ if (!vm) {
+ lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
+ _("no domain with id %d"), dom->id);
+ goto cleanup;
+ }
+
+ ret = lxcVmTerminate(dom->conn, driver, vm, 0);
+ event = virDomainEventNewFromObj(vm,
+ VIR_DOMAIN_EVENT_STOPPED,
+ VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
+ if (!vm->persistent) {
+ virDomainRemoveInactive(&driver->domains, vm);
+ vm = NULL;
+ }
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ if (event)
+ lxcDomainEventQueue(driver, event);
+ lxcDriverUnlock(driver);
+ return ret;
+}
+
+
+static int
+lxcDomainEventRegister (virConnectPtr conn,
+ virConnectDomainEventCallback callback,
+ void *opaque,
+ virFreeCallback freecb)
+{
+ lxc_driver_t *driver = conn->privateData;
+ int ret;
+
+ lxcDriverLock(driver);
+ ret = virDomainEventCallbackListAdd(conn, driver->domainEventCallbacks,
+ callback, opaque, freecb);
+ lxcDriverUnlock(driver);
+
+ return ret;
+}
+
+static int
+lxcDomainEventDeregister (virConnectPtr conn,
+ virConnectDomainEventCallback callback)
+{
+ lxc_driver_t *driver = conn->privateData;
+ int ret;
+
+ lxcDriverLock(driver);
+ if (driver->domainEventDispatching)
+ ret = virDomainEventCallbackListMarkDelete(conn, driver->domainEventCallbacks,
+ callback);
+ else
+ ret = virDomainEventCallbackListRemove(conn, driver->domainEventCallbacks,
+ callback);
+ lxcDriverUnlock(driver);
+
+ return ret;
+}
+
+static void lxcDomainEventDispatchFunc(virConnectPtr conn,
+ virDomainEventPtr event,
+ virConnectDomainEventCallback cb,
+ void *cbopaque,
+ void *opaque)
+{
+ lxc_driver_t *driver = opaque;
+
+ /* Drop the lock whle dispatching, for sake of re-entrancy */
+ lxcDriverUnlock(driver);
+ virDomainEventDispatchDefaultFunc(conn, event, cb, cbopaque, NULL);
+ lxcDriverLock(driver);
+}
+
+
+static void lxcDomainEventFlush(int timer ATTRIBUTE_UNUSED, void *opaque)
+{
+ lxc_driver_t *driver = opaque;
+ virDomainEventQueue tempQueue;
+
+ lxcDriverLock(driver);
+
+ driver->domainEventDispatching = 1;
+
+ /* Copy the queue, so we're reentrant safe */
+ tempQueue.count = driver->domainEventQueue->count;
+ tempQueue.events = driver->domainEventQueue->events;
+ driver->domainEventQueue->count = 0;
+ driver->domainEventQueue->events = NULL;
+
+ virEventUpdateTimeout(driver->domainEventTimer, -1);
+ virDomainEventQueueDispatch(&tempQueue,
+ driver->domainEventCallbacks,
+ lxcDomainEventDispatchFunc,
+ driver);
+
+ /* Purge any deleted callbacks */
+ virDomainEventCallbackListPurgeMarked(driver->domainEventCallbacks);
+
+ driver->domainEventDispatching = 0;
+ lxcDriverUnlock(driver);
+}
+
+
+/* driver must be locked before calling */
+static void lxcDomainEventQueue(lxc_driver_t *driver,
+ virDomainEventPtr event)
+{
+ if (virDomainEventQueuePush(driver->domainEventQueue,
+ event) < 0)
+ virDomainEventFree(event);
+ if (lxc_driver->domainEventQueue->count == 1)
+ virEventUpdateTimeout(driver->domainEventTimer, 0);
+}
+
+/**
+ * lxcDomainDestroy:
+ * @dom: Ptr to domain to destroy
+ *
+ * Sends SIGKILL to container root process to terminate the container
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcDomainDestroy(virDomainPtr dom)
+{
+ lxc_driver_t *driver = dom->conn->privateData;
+ virDomainObjPtr vm;
+ virDomainEventPtr event = NULL;
+ int ret = -1;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByID(&driver->domains, dom->id);
+ if (!vm) {
+ lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
+ _("no domain with id %d"), dom->id);
+ goto cleanup;
+ }
+
+ ret = lxcVmTerminate(dom->conn, driver, vm, SIGKILL);
+ event = virDomainEventNewFromObj(vm,
+ VIR_DOMAIN_EVENT_STOPPED,
+ VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
+ if (!vm->persistent) {
+ virDomainRemoveInactive(&driver->domains, vm);
+ vm = NULL;
+ }
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ if (event)
+ lxcDomainEventQueue(driver, event);
+ lxcDriverUnlock(driver);
+ return ret;
+}
+
+static int lxcCheckNetNsSupport(void)
+{
+ const char *argv[] = {"ip", "link", "set", "lo", "netns", "-1", NULL};
+ int ip_rc;
+
+ if (virRun(NULL, argv, &ip_rc) < 0 ||
+ !(WIFEXITED(ip_rc) && (WEXITSTATUS(ip_rc) != 255)))
+ return 0;
+
+ if (lxcContainerAvailable(LXC_CONTAINER_FEATURE_NET) < 0)
+ return 0;
+
+ return 1;
+}
+
+
+static void
+lxcAutostartConfigs(lxc_driver_t *driver) {
+ unsigned int i;
+ /* XXX: Figure out a better way todo this. The domain
+ * startup code needs a connection handle in order
+ * to lookup the bridge associated with a virtual
+ * network
+ */
+ virConnectPtr conn = virConnectOpen("lxc:///");
+ /* Ignoring NULL conn which is mostly harmless here */
+
+ lxcDriverLock(driver);
+ for (i = 0 ; i < driver->domains.count ; i++) {
+ virDomainObjPtr vm = driver->domains.objs[i];
+ virDomainObjLock(vm);
+ if (vm->autostart &&
+ !virDomainIsActive(vm)) {
+ int ret = lxcVmStart(conn, driver, vm);
+ if (ret < 0) {
+ virErrorPtr err = virGetLastError();
+ VIR_ERROR(_("Failed to autostart VM '%s': %s\n"),
+ vm->def->name,
+ err ? err->message : "");
+ } else {
+ virDomainEventPtr event =
+ virDomainEventNewFromObj(vm,
+ VIR_DOMAIN_EVENT_STARTED,
+ VIR_DOMAIN_EVENT_STARTED_BOOTED);
+ if (event)
+ lxcDomainEventQueue(driver, event);
+ }
+ }
+ virDomainObjUnlock(vm);
+ }
+ lxcDriverUnlock(driver);
+
+ if (conn)
+ virConnectClose(conn);
+}
+
+
+static int lxcStartup(int privileged)
+{
+ unsigned int i;
+ char *ld;
+ int rc;
+
+ /* Valgrind gets very annoyed when we clone containers, so
+ * disable LXC when under valgrind
+ * XXX remove this when valgrind is fixed
+ */
+ ld = getenv("LD_PRELOAD");
+ if (ld && strstr(ld, "vgpreload"))
+ return -1;
+
+ /* Check that the user is root */
+ if (!privileged) {
+ return -1;
+ }
+
+ if (VIR_ALLOC(lxc_driver) < 0) {
+ return -1;
+ }
+ if (virMutexInit(&lxc_driver->lock) < 0) {
+ VIR_FREE(lxc_driver);
+ return -1;
+ }
+ lxcDriverLock(lxc_driver);
+
+ /* Check that this is a container enabled kernel */
+ if (lxcContainerAvailable(0) < 0) {
+ VIR_INFO0("LXC support not available in this kernel, disabling driver");
+ goto cleanup;
+ }
+
+ if (VIR_ALLOC(lxc_driver->domainEventCallbacks) < 0)
+ goto cleanup;
+ if (!(lxc_driver->domainEventQueue = virDomainEventQueueNew()))
+ goto cleanup;
+
+ if ((lxc_driver->domainEventTimer =
+ virEventAddTimeout(-1, lxcDomainEventFlush, lxc_driver, NULL)) < 0)
+ goto cleanup;
+
+ lxc_driver->have_netns = lxcCheckNetNsSupport();
+
+ rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1);
+ if (rc < 0) {
+ char buf[1024];
+ VIR_WARN("Unable to create cgroup for driver: %s",
+ virStrerror(-rc, buf, sizeof(buf)));
+ }
+
+ /* Call function to load lxc driver configuration information */
+ if (lxcLoadDriverConfig(lxc_driver) < 0)
+ goto cleanup;
+
+ if ((lxc_driver->caps = lxcCapsInit()) == NULL)
+ goto cleanup;
+
+ if (virDomainLoadAllConfigs(NULL,
+ lxc_driver->caps,
+ &lxc_driver->domains,
+ lxc_driver->configDir,
+ lxc_driver->autostartDir,
+ 0, NULL, NULL) < 0)
+ goto cleanup;
+
+ for (i = 0 ; i < lxc_driver->domains.count ; i++) {
+ virDomainObjPtr vm = lxc_driver->domains.objs[i];
+ char *config = NULL;
+ virDomainDefPtr tmp;
+
+ virDomainObjLock(vm);
+ if ((vm->monitor = lxcMonitorClient(NULL, lxc_driver, vm)) < 0) {
+ virDomainObjUnlock(vm);
+ continue;
+ }
+
+ /* Read pid from controller */
+ if ((rc = virFileReadPid(lxc_driver->stateDir, vm->def->name, &vm->pid)) != 0) {
+ close(vm->monitor);
+ vm->monitor = -1;
+ virDomainObjUnlock(vm);
+ continue;
+ }
+
+ if ((config = virDomainConfigFile(NULL,
+ lxc_driver->stateDir,
+ vm->def->name)) == NULL) {
+ virDomainObjUnlock(vm);
+ continue;
+ }
+
+ /* Try and load the live config */
+ tmp = virDomainDefParseFile(NULL, lxc_driver->caps, config, 0);
+ VIR_FREE(config);
+ if (tmp) {
+ vm->newDef = vm->def;
+ vm->def = tmp;
+ }
+
+ if (vm->pid != 0) {
+ vm->def->id = vm->pid;
+ vm->state = VIR_DOMAIN_RUNNING;
+ } else {
+ vm->def->id = -1;
+ close(vm->monitor);
+ vm->monitor = -1;
+ }
+ virDomainObjUnlock(vm);
+ }
+
+ lxcDriverUnlock(lxc_driver);
+ return 0;
+
+cleanup:
+ lxcDriverUnlock(lxc_driver);
+ lxcShutdown();
+ return -1;
+}
+
+static void lxcNotifyLoadDomain(virDomainObjPtr vm, int newVM, void *opaque)
+{
+ lxc_driver_t *driver = opaque;
+
+ if (newVM) {
+ virDomainEventPtr event =
+ virDomainEventNewFromObj(vm,
+ VIR_DOMAIN_EVENT_DEFINED,
+ VIR_DOMAIN_EVENT_DEFINED_ADDED);
+ if (event)
+ lxcDomainEventQueue(driver, event);
+ }
+}
+
+/**
+ * lxcReload:
+ *
+ * Function to restart the LXC driver, it will recheck the configuration
+ * files and perform autostart
+ */
+static int
+lxcReload(void) {
+ if (!lxc_driver)
+ return 0;
+
+ lxcDriverLock(lxc_driver);
+ virDomainLoadAllConfigs(NULL,
+ lxc_driver->caps,
+ &lxc_driver->domains,
+ lxc_driver->configDir,
+ lxc_driver->autostartDir,
+ 0, lxcNotifyLoadDomain, lxc_driver);
+ lxcDriverUnlock(lxc_driver);
+
+ lxcAutostartConfigs(lxc_driver);
+
+ return 0;
+}
+
+static int lxcShutdown(void)
+{
+ if (lxc_driver == NULL)
+ return(-1);
+
+ lxcDriverLock(lxc_driver);
+ virDomainObjListFree(&lxc_driver->domains);
+
+ virDomainEventCallbackListFree(lxc_driver->domainEventCallbacks);
+ virDomainEventQueueFree(lxc_driver->domainEventQueue);
+
+ if (lxc_driver->domainEventTimer != -1)
+ virEventRemoveTimeout(lxc_driver->domainEventTimer);
+
+ virCapabilitiesFree(lxc_driver->caps);
+ VIR_FREE(lxc_driver->configDir);
+ VIR_FREE(lxc_driver->autostartDir);
+ VIR_FREE(lxc_driver->stateDir);
+ VIR_FREE(lxc_driver->logDir);
+ lxcDriverUnlock(lxc_driver);
+ virMutexDestroy(&lxc_driver->lock);
+ VIR_FREE(lxc_driver);
+
+ return 0;
+}
+
+/**
+ * lxcActive:
+ *
+ * Checks if the LXC daemon is active, i.e. has an active domain
+ *
+ * Returns 1 if active, 0 otherwise
+ */
+static int
+lxcActive(void) {
+ unsigned int i;
+ int active = 0;
+
+ if (lxc_driver == NULL)
+ return(0);
+
+ lxcDriverLock(lxc_driver);
+ for (i = 0 ; i < lxc_driver->domains.count ; i++) {
+ virDomainObjLock(lxc_driver->domains.objs[i]);
+ if (virDomainIsActive(lxc_driver->domains.objs[i]))
+ active = 1;
+ virDomainObjUnlock(lxc_driver->domains.objs[i]);
+ }
+ lxcDriverUnlock(lxc_driver);
+
+ return active;
+}
+
+static int lxcVersion(virConnectPtr conn, unsigned long *version)
+{
+ struct utsname ver;
+ int maj;
+ int min;
+ int rev;
+
+ uname(&ver);
+
+ if (sscanf(ver.release, "%i.%i.%i", &maj, &min, &rev) != 3) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("Unknown release: %s"), ver.release);
+ return -1;
+ }
+
+ *version = (maj * 1000 * 1000) + (min * 1000) + rev;
+
+ return 0;
+}
+
+static char *lxcGetSchedulerType(virDomainPtr domain ATTRIBUTE_UNUSED,
+ int *nparams)
+{
+ if (nparams)
+ *nparams = 1;
+
+ return strdup("posix");
+}
+
+static int lxcSetSchedulerParameters(virDomainPtr domain,
+ virSchedParameterPtr params,
+ int nparams)
+{
+ lxc_driver_t *driver = domain->conn->privateData;
+ int i;
+ virCgroupPtr group = NULL;
+ virDomainObjPtr vm = NULL;
+ int ret = -1;
+
+ if (driver->cgroup == NULL)
+ return -1;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByUUID(&driver->domains, domain->uuid);
+
+ if (vm == NULL) {
+ lxcError(NULL, domain, VIR_ERR_INTERNAL_ERROR,
+ _("No such domain %s"), domain->uuid);
+ goto cleanup;
+ }
+
+ if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0)
+ goto cleanup;
+
+ for (i = 0; i < nparams; i++) {
+ virSchedParameterPtr param = ¶ms[i];
+ if (param->type != VIR_DOMAIN_SCHED_FIELD_ULLONG) {
+ lxcError(NULL, domain, VIR_ERR_INVALID_ARG,
+ _("invalid type for cpu_shares tunable, expected a 'ullong'"));
+ goto cleanup;
+ }
+
+ if (STREQ(param->field, "cpu_shares")) {
+ if (virCgroupSetCpuShares(group, params[i].value.ul) != 0)
+ goto cleanup;
+ } else {
+ lxcError(NULL, domain, VIR_ERR_INVALID_ARG,
+ _("Invalid parameter `%s'"), param->field);
+ goto cleanup;
+ }
+ }
+ ret = 0;
+
+cleanup:
+ lxcDriverUnlock(driver);
+ virCgroupFree(&group);
+ if (vm)
+ virDomainObjUnlock(vm);
+ return ret;
+}
+
+static int lxcGetSchedulerParameters(virDomainPtr domain,
+ virSchedParameterPtr params,
+ int *nparams)
+{
+ lxc_driver_t *driver = domain->conn->privateData;
+ virCgroupPtr group = NULL;
+ virDomainObjPtr vm = NULL;
+ unsigned long long val;
+ int ret = -1;
+
+ if (driver->cgroup == NULL)
+ return -1;
+
+ if ((*nparams) != 1) {
+ lxcError(NULL, domain, VIR_ERR_INVALID_ARG,
+ "%s", _("Invalid parameter count"));
+ return -1;
+ }
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByUUID(&driver->domains, domain->uuid);
+
+ if (vm == NULL) {
+ lxcError(NULL, domain, VIR_ERR_INTERNAL_ERROR,
+ _("No such domain %s"), domain->uuid);
+ goto cleanup;
+ }
+
+ if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0)
+ goto cleanup;
+
+ if (virCgroupGetCpuShares(group, &val) != 0)
+ goto cleanup;
+ params[0].value.ul = val;
+ strncpy(params[0].field, "cpu_shares", sizeof(params[0].field));
+ params[0].type = VIR_DOMAIN_SCHED_FIELD_ULLONG;
+
+ ret = 0;
+
+cleanup:
+ lxcDriverUnlock(driver);
+ virCgroupFree(&group);
+ if (vm)
+ virDomainObjUnlock(vm);
+ return ret;
+}
+
+static int lxcDomainGetAutostart(virDomainPtr dom,
+ int *autostart) {
+ lxc_driver_t *driver = dom->conn->privateData;
+ virDomainObjPtr vm;
+ int ret = -1;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByUUID(&driver->domains, dom->uuid);
+ lxcDriverUnlock(driver);
+
+ if (!vm) {
+ char uuidstr[VIR_UUID_STRING_BUFLEN];
+ virUUIDFormat(dom->uuid, uuidstr);
+ lxcError(dom->conn, dom, VIR_ERR_NO_DOMAIN,
+ _("no domain with matching uuid '%s'"), uuidstr);
+ goto cleanup;
+ }
+
+ *autostart = vm->autostart;
+ ret = 0;
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ return ret;
+}
+
+static int lxcDomainSetAutostart(virDomainPtr dom,
+ int autostart) {
+ lxc_driver_t *driver = dom->conn->privateData;
+ virDomainObjPtr vm;
+ char *configFile = NULL, *autostartLink = NULL;
+ int ret = -1;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByUUID(&driver->domains, dom->uuid);
+
+ if (!vm) {
+ char uuidstr[VIR_UUID_STRING_BUFLEN];
+ virUUIDFormat(dom->uuid, uuidstr);
+ lxcError(dom->conn, dom, VIR_ERR_NO_DOMAIN,
+ _("no domain with matching uuid '%s'"), uuidstr);
+ goto cleanup;
+ }
+
+ if (!vm->persistent) {
+ lxcError(dom->conn, dom, VIR_ERR_INTERNAL_ERROR,
+ "%s", _("cannot set autostart for transient domain"));
+ goto cleanup;
+ }
+
+ autostart = (autostart != 0);
+
+ if (vm->autostart != autostart) {
+ if ((configFile = virDomainConfigFile(dom->conn, driver->configDir, vm->def->name)) == NULL)
+ goto cleanup;
+ if ((autostartLink = virDomainConfigFile(dom->conn, driver->autostartDir, vm->def->name)) == NULL)
+ goto cleanup;
+
+ if (autostart) {
+ int err;
+
+ if ((err = virFileMakePath(driver->autostartDir))) {
+ virReportSystemError(dom->conn, err,
+ _("cannot create autostart directory %s"),
+ driver->autostartDir);
+ goto cleanup;
+ }
+
+ if (symlink(configFile, autostartLink) < 0) {
+ virReportSystemError(dom->conn, errno,
+ _("Failed to create symlink '%s to '%s'"),
+ autostartLink, configFile);
+ goto cleanup;
+ }
+ } else {
+ if (unlink(autostartLink) < 0 && errno != ENOENT && errno != ENOTDIR) {
+ virReportSystemError(dom->conn, errno,
+ _("Failed to delete symlink '%s'"),
+ autostartLink);
+ goto cleanup;
+ }
+ }
+
+ vm->autostart = autostart;
+ }
+ ret = 0;
+
+cleanup:
+ VIR_FREE(configFile);
+ VIR_FREE(autostartLink);
+ if (vm)
+ virDomainObjUnlock(vm);
+ lxcDriverUnlock(driver);
+ return ret;
+}
+
+static char *lxcGetHostname (virConnectPtr conn)
+{
+ char *result;
+
+ result = virGetHostname();
+ if (result == NULL) {
+ virReportSystemError (conn, errno,
+ "%s", _("failed to determine host name"));
+ return NULL;
+ }
+ /* Caller frees this string. */
+ return result;
+}
+
+/* Function Tables */
+static virDriver lxcDriver = {
+ VIR_DRV_LXC, /* the number virDrvNo */
+ "LXC", /* the name of the driver */
+ lxcOpen, /* open */
+ lxcClose, /* close */
+ NULL, /* supports_feature */
+ NULL, /* type */
+ lxcVersion, /* version */
+ lxcGetHostname, /* getHostname */
+ NULL, /* getMaxVcpus */
+ nodeGetInfo, /* nodeGetInfo */
+ lxcGetCapabilities, /* getCapabilities */
+ lxcListDomains, /* listDomains */
+ lxcNumDomains, /* numOfDomains */
+ lxcDomainCreateAndStart, /* domainCreateXML */
+ lxcDomainLookupByID, /* domainLookupByID */
+ lxcDomainLookupByUUID, /* domainLookupByUUID */
+ lxcDomainLookupByName, /* domainLookupByName */
+ NULL, /* domainSuspend */
+ NULL, /* domainResume */
+ lxcDomainShutdown, /* domainShutdown */
+ NULL, /* domainReboot */
+ lxcDomainDestroy, /* domainDestroy */
+ lxcGetOSType, /* domainGetOSType */
+ NULL, /* domainGetMaxMemory */
+ NULL, /* domainSetMaxMemory */
+ NULL, /* domainSetMemory */
+ lxcDomainGetInfo, /* domainGetInfo */
+ NULL, /* domainSave */
+ NULL, /* domainRestore */
+ NULL, /* domainCoreDump */
+ NULL, /* domainSetVcpus */
+ NULL, /* domainPinVcpu */
+ NULL, /* domainGetVcpus */
+ NULL, /* domainGetMaxVcpus */
+ NULL, /* domainGetSecurityLabel */
+ NULL, /* nodeGetSecurityModel */
+ lxcDomainDumpXML, /* domainDumpXML */
+ NULL, /* domainXmlFromNative */
+ NULL, /* domainXmlToNative */
+ lxcListDefinedDomains, /* listDefinedDomains */
+ lxcNumDefinedDomains, /* numOfDefinedDomains */
+ lxcDomainStart, /* domainCreate */
+ lxcDomainDefine, /* domainDefineXML */
+ lxcDomainUndefine, /* domainUndefine */
+ NULL, /* domainAttachDevice */
+ NULL, /* domainDetachDevice */
+ lxcDomainGetAutostart, /* domainGetAutostart */
+ lxcDomainSetAutostart, /* domainSetAutostart */
+ lxcGetSchedulerType, /* domainGetSchedulerType */
+ lxcGetSchedulerParameters, /* domainGetSchedulerParameters */
+ lxcSetSchedulerParameters, /* domainSetSchedulerParameters */
+ NULL, /* domainMigratePrepare */
+ NULL, /* domainMigratePerform */
+ NULL, /* domainMigrateFinish */
+ NULL, /* domainBlockStats */
+ NULL, /* domainInterfaceStats */
+ NULL, /* domainBlockPeek */
+ NULL, /* domainMemoryPeek */
+ nodeGetCellsFreeMemory, /* nodeGetCellsFreeMemory */
+ nodeGetFreeMemory, /* getFreeMemory */
+ lxcDomainEventRegister, /* domainEventRegister */
+ lxcDomainEventDeregister, /* domainEventDeregister */
+ NULL, /* domainMigratePrepare2 */
+ NULL, /* domainMigrateFinish2 */
+ NULL, /* nodeDeviceDettach */
+ NULL, /* nodeDeviceReAttach */
+ NULL, /* nodeDeviceReset */
+};
+
+static virStateDriver lxcStateDriver = {
+ .initialize = lxcStartup,
+ .cleanup = lxcShutdown,
+ .active = lxcActive,
+ .reload = lxcReload,
+};
+
+int lxcRegister(void)
+{
+ virRegisterDriver(&lxcDriver);
+ virRegisterStateDriver(&lxcStateDriver);
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright IBM Corp. 2008
+ *
+ * lxc_driver.h: header file for linux container driver functions
+ *
+ * Authors:
+ * David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef LXC_DRIVER_H
+#define LXC_DRIVER_H
+
+#include <config.h>
+
+/* Function declarations */
+int lxcRegister(void);
+
+#endif /* LXC_DRIVER_H */
--- /dev/null
+/*
+ * veth.c: Tools for managing veth pairs
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * See COPYING.LIB for the License of this software
+ *
+ * Authors:
+ * David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ */
+
+#include <config.h>
+
+#include <string.h>
+#include <stdio.h>
+
+#include "veth.h"
+#include "internal.h"
+#include "logging.h"
+#include "memory.h"
+#include "util.h"
+
+/* Functions */
+/**
+ * getFreeVethName:
+ * @veth: name for veth device (NULL to find first open)
+ * @maxLen: max length of veth name
+ * @startDev: device number to start at (x in vethx)
+ *
+ * Looks in /sys/class/net/ to find the first available veth device
+ * name.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int getFreeVethName(char *veth, int maxLen, int startDev)
+{
+ int rc = -1;
+ int devNum = startDev-1;
+ char path[PATH_MAX];
+
+ do {
+ ++devNum;
+ snprintf(path, PATH_MAX, "/sys/class/net/veth%d/", devNum);
+ } while (virFileExists(path));
+
+ snprintf(veth, maxLen, "veth%d", devNum);
+
+ rc = devNum;
+
+ return rc;
+}
+
+/**
+ * vethCreate:
+ * @veth1: name for one end of veth pair
+ * @veth1MaxLen: max length of veth1 name
+ * @veth2: name for one end of veth pair
+ * @veth2MaxLen: max length of veth1 name
+ *
+ * Creates a veth device pair using the ip command:
+ * ip link add veth1 type veth peer name veth2
+ * NOTE: If veth1 and veth2 names are not specified, ip will auto assign
+ * names. There seems to be two problems here -
+ * 1) There doesn't seem to be a way to determine the names of the
+ * devices that it creates. They show up in ip link show and
+ * under /sys/class/net/ however there is no guarantee that they
+ * are the devices that this process just created.
+ * 2) Once one of the veth devices is moved to another namespace, it
+ * is no longer visible in the parent namespace. This seems to
+ * confuse the name assignment causing it to fail with File exists.
+ * Because of these issues, this function currently forces the caller
+ * to fully specify the veth device names.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+int vethCreate(char* veth1, int veth1MaxLen,
+ char* veth2, int veth2MaxLen)
+{
+ int rc = -1;
+ const char *argv[] = {
+ "ip", "link", "add", veth1, "type", "veth", "peer", "name", veth2, NULL
+ };
+ int cmdResult;
+ int vethDev = 0;
+
+ if ((NULL == veth1) || (NULL == veth2)) {
+ goto error_out;
+ }
+
+ DEBUG("veth1: %s veth2: %s", veth1, veth2);
+
+ while ((1 > strlen(veth1)) || STREQ(veth1, veth2)) {
+ vethDev = getFreeVethName(veth1, veth1MaxLen, 0);
+ ++vethDev;
+ DEBUG("assigned veth1: %s", veth1);
+ }
+
+ while ((1 > strlen(veth2)) || STREQ(veth1, veth2)) {
+ vethDev = getFreeVethName(veth2, veth2MaxLen, vethDev);
+ ++vethDev;
+ DEBUG("assigned veth2: %s", veth2);
+ }
+
+ DEBUG("veth1: %s veth2: %s", veth1, veth2);
+ rc = virRun(NULL, argv, &cmdResult);
+
+ if (0 == rc) {
+ rc = cmdResult;
+ }
+
+error_out:
+ return rc;
+}
+
+/**
+ * vethDelete:
+ * @veth: name for one end of veth pair
+ *
+ * This will delete both veth devices in a pair. Only one end needs to
+ * be specified. The ip command will identify and delete the other veth
+ * device as well.
+ * ip link del veth
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+int vethDelete(const char *veth)
+{
+ int rc = -1;
+ const char *argv[] = {"ip", "link", "del", veth, NULL};
+ int cmdResult;
+
+ if (NULL == veth) {
+ goto error_out;
+ }
+
+ DEBUG("veth: %s", veth);
+
+ rc = virRun(NULL, argv, &cmdResult);
+
+ if (0 == rc) {
+ rc = cmdResult;
+ }
+
+error_out:
+ return rc;
+}
+
+/**
+ * vethInterfaceUpOrDown:
+ * @veth: name of veth device
+ * @upOrDown: 0 => down, 1 => up
+ *
+ * Enables a veth device using the ifconfig command. A NULL inetAddress
+ * will cause it to be left off the command line.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+int vethInterfaceUpOrDown(const char* veth, int upOrDown)
+{
+ int rc = -1;
+ const char *argv[] = {"ifconfig", veth, NULL, NULL};
+ int cmdResult;
+
+ if (NULL == veth) {
+ goto error_out;
+ }
+
+ if (0 == upOrDown)
+ argv[2] = "down";
+ else
+ argv[2] = "up";
+
+ rc = virRun(NULL, argv, &cmdResult);
+
+ if (0 == rc) {
+ rc = cmdResult;
+ }
+
+error_out:
+ return rc;
+}
+
+/**
+ * moveInterfaceToNetNs:
+ * @interface: name of device
+ * @pidInNs: PID of process in target net namespace
+ *
+ * Moves the given device into the target net namespace specified by the given
+ * pid using this command:
+ * ip link set interface netns pidInNs
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+int moveInterfaceToNetNs(const char* iface, int pidInNs)
+{
+ int rc = -1;
+ char *pid = NULL;
+ const char *argv[] = {
+ "ip", "link", "set", iface, "netns", NULL, NULL
+ };
+ int cmdResult;
+
+ if (NULL == iface) {
+ goto error_out;
+ }
+
+ if (virAsprintf(&pid, "%d", pidInNs) == -1)
+ goto error_out;
+
+ argv[5] = pid;
+ rc = virRun(NULL, argv, &cmdResult);
+ if (0 == rc)
+ rc = cmdResult;
+
+error_out:
+ VIR_FREE(pid);
+ return rc;
+}
--- /dev/null
+/*
+ * veth.h: Interface to tools for managing veth pairs
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * See COPYING.LIB for the License of this software
+ *
+ * Authors:
+ * David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ */
+
+#ifndef VETH_H
+#define VETH_H
+
+#include <config.h>
+
+/* Function declarations */
+int vethCreate(char* veth1, int veth1MaxLen, char* veth2,
+ int veth2MaxLen);
+int vethDelete(const char* veth);
+int vethInterfaceUpOrDown(const char* veth, int upOrDown);
+int moveInterfaceToNetNs(const char *iface, int pidInNs);
+
+#endif /* VETH_H */
+++ /dev/null
-/*
- * Copyright IBM Corp. 2008
- *
- * lxc_conf.c: config functions for managing linux containers
- *
- * Authors:
- * David L. Leskovec <dlesko at linux.vnet.ibm.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-/* includes */
-#include <config.h>
-
-#include <sys/utsname.h>
-
-#include "lxc_conf.h"
-#include "nodeinfo.h"
-#include "virterror_internal.h"
-#include "logging.h"
-
-
-#define VIR_FROM_THIS VIR_FROM_LXC
-
-/* Functions */
-virCapsPtr lxcCapsInit(void)
-{
- struct utsname utsname;
- virCapsPtr caps;
- virCapsGuestPtr guest;
-
- uname(&utsname);
-
- if ((caps = virCapabilitiesNew(utsname.machine,
- 0, 0)) == NULL)
- goto no_memory;
-
- /* Some machines have problematic NUMA toplogy causing
- * unexpected failures. We don't want to break the QEMU
- * driver in this scenario, so log errors & carry on
- */
- if (nodeCapsInitNUMA(caps) < 0) {
- virCapabilitiesFreeNUMAInfo(caps);
- VIR_WARN0("Failed to query host NUMA topology, disabling NUMA capabilities");
- }
-
- /* XXX shouldn't 'borrow' KVM's prefix */
- virCapabilitiesSetMacPrefix(caps, (unsigned char []){ 0x52, 0x54, 0x00 });
-
- if ((guest = virCapabilitiesAddGuest(caps,
- "exe",
- utsname.machine,
- sizeof(int) == 4 ? 32 : 8,
- BINDIR "/libvirt_lxc",
- NULL,
- 0,
- NULL)) == NULL)
- goto no_memory;
-
- if (virCapabilitiesAddGuestDomain(guest,
- "lxc",
- NULL,
- NULL,
- 0,
- NULL) == NULL)
- goto no_memory;
-
- /* LXC Requires an emulator in the XML */
- virCapabilitiesSetEmulatorRequired(caps);
-
- return caps;
-
-no_memory:
- virCapabilitiesFree(caps);
- return NULL;
-}
-
-int lxcLoadDriverConfig(lxc_driver_t *driver)
-{
- /* Set the container configuration directory */
- if ((driver->configDir = strdup(LXC_CONFIG_DIR)) == NULL)
- goto no_memory;
- if ((driver->stateDir = strdup(LXC_STATE_DIR)) == NULL)
- goto no_memory;
- if ((driver->logDir = strdup(LXC_LOG_DIR)) == NULL)
- goto no_memory;
-
- return 0;
-
-no_memory:
- virReportOOMError(NULL);
- return -1;
-}
+++ /dev/null
-/*
- * Copyright IBM Corp. 2008
- *
- * lxc_conf.h: header file for linux container config functions
- *
- * Authors:
- * David L. Leskovec <dlesko at linux.vnet.ibm.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef LXC_CONF_H
-#define LXC_CONF_H
-
-#include <config.h>
-
-#include "internal.h"
-#include "domain_conf.h"
-#include "domain_event.h"
-#include "capabilities.h"
-#include "threads.h"
-#include "cgroup.h"
-
-#define LXC_CONFIG_DIR SYSCONF_DIR "/libvirt/lxc"
-#define LXC_STATE_DIR LOCAL_STATE_DIR "/run/libvirt/lxc"
-#define LXC_LOG_DIR LOCAL_STATE_DIR "/log/libvirt/lxc"
-
-typedef struct __lxc_driver lxc_driver_t;
-struct __lxc_driver {
- virMutex lock;
-
- virCapsPtr caps;
-
- virCgroupPtr cgroup;
- virDomainObjList domains;
- char *configDir;
- char *autostartDir;
- char *stateDir;
- char *logDir;
- int have_netns;
-
- /* An array of callbacks */
- virDomainEventCallbackListPtr domainEventCallbacks;
- virDomainEventQueuePtr domainEventQueue;
- int domainEventTimer;
- int domainEventDispatching;
-};
-
-int lxcLoadDriverConfig(lxc_driver_t *driver);
-virCapsPtr lxcCapsInit(void);
-
-#define lxcError(conn, dom, code, fmt...) \
- virReportErrorHelper(conn, VIR_FROM_LXC, code, __FILE__, \
- __FUNCTION__, __LINE__, fmt)
-
-#endif /* LXC_CONF_H */
+++ /dev/null
-/*
- * Copyright IBM Corp. 2008
- * Copyright Red Hat 2008-2009
- *
- * lxc_container.c: file description
- *
- * Authors:
- * David L. Leskovec <dlesko at linux.vnet.ibm.com>
- * Daniel P. Berrange <berrange@redhat.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <config.h>
-
-#include <fcntl.h>
-#include <limits.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <sys/mount.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <mntent.h>
-
-/* Yes, we want linux private one, for _syscall2() macro */
-#include <linux/unistd.h>
-
-/* For MS_MOVE */
-#include <linux/fs.h>
-
-#if HAVE_CAPNG
-#include <cap-ng.h>
-#endif
-
-#include "virterror_internal.h"
-#include "logging.h"
-#include "lxc_container.h"
-#include "util.h"
-#include "memory.h"
-#include "veth.h"
-
-#define VIR_FROM_THIS VIR_FROM_LXC
-
-/*
- * GLibc headers are behind the kernel, so we define these
- * constants if they're not present already.
- */
-
-#ifndef CLONE_NEWPID
-#define CLONE_NEWPID 0x20000000
-#endif
-#ifndef CLONE_NEWUTS
-#define CLONE_NEWUTS 0x04000000
-#endif
-#ifndef CLONE_NEWUSER
-#define CLONE_NEWUSER 0x10000000
-#endif
-#ifndef CLONE_NEWIPC
-#define CLONE_NEWIPC 0x08000000
-#endif
-#ifndef CLONE_NEWNET
-#define CLONE_NEWNET 0x40000000 /* New network namespace */
-#endif
-
-/* messages between parent and container */
-typedef char lxc_message_t;
-#define LXC_CONTINUE_MSG 'c'
-
-typedef struct __lxc_child_argv lxc_child_argv_t;
-struct __lxc_child_argv {
- virDomainDefPtr config;
- unsigned int nveths;
- char **veths;
- int monitor;
- char *ttyPath;
-};
-
-
-/**
- * lxcContainerExecInit:
- * @vmDef: Ptr to vm definition structure
- *
- * Exec the container init string. The container init will replace then
- * be running in the current process
- *
- * Does not return
- */
-static int lxcContainerExecInit(virDomainDefPtr vmDef)
-{
- const char *const argv[] = {
- vmDef->os.init,
- NULL,
- };
-
- return execve(argv[0], (char **)argv, NULL);
-}
-
-/**
- * lxcContainerSetStdio:
- * @control: the conrol FD
- * @ttyPath: Name of tty to set as the container console
- *
- * Sets the given tty as the primary conosole for the container as well as
- * stdout, stdin and stderr.
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcContainerSetStdio(int control, int ttyfd)
-{
- int rc = -1;
- int open_max, i;
-
- if (setsid() < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("setsid failed"));
- goto cleanup;
- }
-
- if (ioctl(ttyfd, TIOCSCTTY, NULL) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("ioctl(TIOCSTTY) failed"));
- goto cleanup;
- }
-
- /* Just in case someone forget to set FD_CLOEXEC, explicitly
- * close all FDs before executing the container */
- open_max = sysconf (_SC_OPEN_MAX);
- for (i = 0; i < open_max; i++)
- if (i != ttyfd && i != control)
- close(i);
-
- if (dup2(ttyfd, 0) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("dup2(stdin) failed"));
- goto cleanup;
- }
-
- if (dup2(ttyfd, 1) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("dup2(stdout) failed"));
- goto cleanup;
- }
-
- if (dup2(ttyfd, 2) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("dup2(stderr) failed"));
- goto cleanup;
- }
-
- rc = 0;
-
-cleanup:
- return rc;
-}
-
-/**
- * lxcContainerSendContinue:
- * @monitor: control FD to child
- *
- * Sends the continue message via the socket pair stored in the vm
- * structure.
- *
- * Returns 0 on success or -1 in case of error
- */
-int lxcContainerSendContinue(int control)
-{
- int rc = -1;
- lxc_message_t msg = LXC_CONTINUE_MSG;
- int writeCount = 0;
-
- writeCount = safewrite(control, &msg, sizeof(msg));
- if (writeCount != sizeof(msg)) {
- virReportSystemError(NULL, errno, "%s",
- _("unable to send container continue message"));
- goto error_out;
- }
-
- rc = 0;
-
-error_out:
- return rc;
-}
-
-/**
- * lxcContainerWaitForContinue:
- * @control: control FD from parent
- *
- * This function will wait for the container continue message from the
- * parent process. It will send this message on the socket pair stored in
- * the vm structure once it has completed the post clone container setup.
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcContainerWaitForContinue(int control)
-{
- lxc_message_t msg;
- int readLen;
-
- readLen = saferead(control, &msg, sizeof(msg));
- if (readLen != sizeof(msg) ||
- msg != LXC_CONTINUE_MSG) {
- virReportSystemError(NULL, errno, "%s",
- _("Failed to read the container continue message"));
- return -1;
- }
- close(control);
-
- DEBUG0("Received container continue message");
-
- return 0;
-}
-
-
-/**
- * lxcEnableInterfaces:
- * @vm: Pointer to vm structure
- *
- * This function will enable the interfaces for this container.
- *
- * Returns 0 on success or nonzero in case of error
- */
-static int lxcContainerEnableInterfaces(unsigned int nveths,
- char **veths)
-{
- int rc = 0;
- unsigned int i;
-
- for (i = 0 ; i < nveths ; i++) {
- DEBUG("Enabling %s", veths[i]);
- rc = vethInterfaceUpOrDown(veths[i], 1);
- if (0 != rc) {
- goto error_out;
- }
- }
-
- /* enable lo device only if there were other net devices */
- if (veths)
- rc = vethInterfaceUpOrDown("lo", 1);
-
-error_out:
- return rc;
-}
-
-
-//_syscall2(int, pivot_root, char *, newroot, const char *, oldroot)
-extern int pivot_root(const char * new_root,const char * put_old);
-
-static int lxcContainerChildMountSort(const void *a, const void *b)
-{
- const char **sa = (const char**)a;
- const char **sb = (const char**)b;
-
- /* Delibrately reversed args - we need to unmount deepest
- children first */
- return strcmp(*sb, *sa);
-}
-
-#ifndef MS_REC
-#define MS_REC 16384
-#endif
-
-#ifndef MNT_DETACH
-#define MNT_DETACH 0x00000002
-#endif
-
-#ifndef MS_PRIVATE
-#define MS_PRIVATE (1<<18)
-#endif
-
-#ifndef MS_SLAVE
-#define MS_SLAVE (1<<19)
-#endif
-
-static int lxcContainerPivotRoot(virDomainFSDefPtr root)
-{
- int rc, ret;
- char *oldroot = NULL, *newroot = NULL;
-
- ret = -1;
-
- /* root->parent must be private, so make / private. */
- if (mount("", "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to make root private"));
- goto err;
- }
-
- if (virAsprintf(&oldroot, "%s/.oldroot", root->src) < 0) {
- virReportOOMError(NULL);
- goto err;
- }
-
- if ((rc = virFileMakePath(oldroot)) < 0) {
- virReportSystemError(NULL, rc,
- _("failed to create %s"),
- oldroot);
- goto err;
- }
-
- /* Create a tmpfs root since old and new roots must be
- * on separate filesystems */
- if (mount("tmprootfs", oldroot, "tmpfs", 0, NULL) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to mount empty tmpfs at %s"),
- oldroot);
- goto err;
- }
-
- /* Create a directory called 'new' in tmpfs */
- if (virAsprintf(&newroot, "%s/new", oldroot) < 0) {
- virReportOOMError(NULL);
- goto err;
- }
-
- if ((rc = virFileMakePath(newroot)) < 0) {
- virReportSystemError(NULL, rc,
- _("failed to create %s"),
- newroot);
- goto err;
- }
-
- /* ... and mount our root onto it */
- if (mount(root->src, newroot, NULL, MS_BIND|MS_REC, NULL) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to bind new root %s into tmpfs"),
- root->src);
- goto err;
- }
-
- /* Now we chroot into the tmpfs, then pivot into the
- * root->src bind-mounted onto '/new' */
- if (chdir(newroot) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to chroot into %s"), newroot);
- goto err;
- }
-
- /* The old root directory will live at /.oldroot after
- * this and will soon be unmounted completely */
- if (pivot_root(".", ".oldroot") < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to pivot root"));
- goto err;
- }
-
- /* CWD is undefined after pivot_root, so go to / */
- if (chdir("/") < 0)
- goto err;
-
- ret = 0;
-
-err:
- VIR_FREE(oldroot);
- VIR_FREE(newroot);
-
- return ret;
-}
-
-
-static int lxcContainerMountBasicFS(virDomainFSDefPtr root)
-{
- const struct {
- const char *src;
- const char *dst;
- const char *type;
- } mnts[] = {
- { "/dev", "/dev", "tmpfs" },
- { "/proc", "/proc", "proc" },
- { "/sys", "/sys", "sysfs" },
-#if WITH_SELINUX
- { "none", "/selinux", "selinuxfs" },
-#endif
- };
- int i, rc = -1;
- char *devpts;
-
- if (virAsprintf(&devpts, "/.oldroot%s/dev/pts", root->src) < 0) {
- virReportOOMError(NULL);
- return rc;
- }
-
- for (i = 0 ; i < ARRAY_CARDINALITY(mnts) ; i++) {
- if (virFileMakePath(mnts[i].dst) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to mkdir %s"),
- mnts[i].src);
- goto cleanup;
- }
- if (mount(mnts[i].src, mnts[i].dst, mnts[i].type, 0, NULL) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to mount %s on %s"),
- mnts[i].type, mnts[i].type);
- goto cleanup;
- }
- }
-
- if ((rc = virFileMakePath("/dev/pts") < 0)) {
- virReportSystemError(NULL, rc, "%s",
- _("cannot create /dev/pts"));
- goto cleanup;
- }
-
- VIR_DEBUG("Trying to move %s to %s", devpts, "/dev/pts");
- if ((rc = mount(devpts, "/dev/pts", NULL, MS_MOVE, NULL)) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to mount /dev/pts in container"));
- goto cleanup;
- }
-
- rc = 0;
-
- cleanup:
- VIR_FREE(devpts);
-
- return rc;
-}
-
-static int lxcContainerPopulateDevices(void)
-{
- int i;
- const struct {
- int maj;
- int min;
- mode_t mode;
- const char *path;
- } devs[] = {
- { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL, 0666, "/dev/null" },
- { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO, 0666, "/dev/zero" },
- { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL, 0666, "/dev/full" },
- { LXC_DEV_MAJ_TTY, LXC_DEV_MIN_CONSOLE, 0600, "/dev/console" },
- { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM, 0666, "/dev/random" },
- { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM, 0666, "/dev/urandom" },
- };
-
- /* Populate /dev/ with a few important bits */
- for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) {
- dev_t dev = makedev(devs[i].maj, devs[i].min);
- if (mknod(devs[i].path, S_IFCHR, dev) < 0 ||
- chmod(devs[i].path, devs[i].mode)) {
- virReportSystemError(NULL, errno,
- _("failed to make device %s"),
- devs[i].path);
- return -1;
- }
- }
-
- if (access("/dev/pts/ptmx", W_OK) == 0) {
- if (symlink("/dev/pts/ptmx", "/dev/ptmx") < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to create symlink /dev/ptmx to /dev/pts/ptmx"));
- return -1;
- }
- } else {
- dev_t dev = makedev(LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX);
- if (mknod("/dev/ptmx", S_IFCHR, dev) < 0 ||
- chmod("/dev/ptmx", 0666)) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to make device /dev/ptmx"));
- return -1;
- }
- }
-
-
- return 0;
-}
-
-
-static int lxcContainerMountNewFS(virDomainDefPtr vmDef)
-{
- int i;
-
- /* Pull in rest of container's mounts */
- for (i = 0 ; i < vmDef->nfss ; i++) {
- char *src;
- if (STREQ(vmDef->fss[i]->dst, "/"))
- continue;
- // XXX fix
- if (vmDef->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT)
- continue;
-
- if (virAsprintf(&src, "/.oldroot/%s", vmDef->fss[i]->src) < 0) {
- virReportOOMError(NULL);
- return -1;
- }
-
- if (virFileMakePath(vmDef->fss[i]->dst) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to create %s"),
- vmDef->fss[i]->dst);
- VIR_FREE(src);
- return -1;
- }
- if (mount(src, vmDef->fss[i]->dst, NULL, MS_BIND, NULL) < 0) {
- VIR_FREE(src);
- virReportSystemError(NULL, errno,
- _("failed to mount %s at %s"),
- vmDef->fss[i]->src,
- vmDef->fss[i]->dst);
- return -1;
- }
- VIR_FREE(src);
- }
-
- return 0;
-}
-
-
-static int lxcContainerUnmountOldFS(void)
-{
- struct mntent mntent;
- char **mounts = NULL;
- int nmounts = 0;
- FILE *procmnt;
- int i;
- char mntbuf[1024];
-
- if (!(procmnt = setmntent("/proc/mounts", "r"))) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to read /proc/mounts"));
- return -1;
- }
- while (getmntent_r(procmnt, &mntent, mntbuf, sizeof(mntbuf)) != NULL) {
- VIR_DEBUG("Got %s", mntent.mnt_dir);
- if (!STRPREFIX(mntent.mnt_dir, "/.oldroot"))
- continue;
-
- if (VIR_REALLOC_N(mounts, nmounts+1) < 0) {
- endmntent(procmnt);
- virReportOOMError(NULL);
- return -1;
- }
- if (!(mounts[nmounts++] = strdup(mntent.mnt_dir))) {
- endmntent(procmnt);
- virReportOOMError(NULL);
- return -1;
- }
- }
- endmntent(procmnt);
-
- if (mounts)
- qsort(mounts, nmounts, sizeof(mounts[0]),
- lxcContainerChildMountSort);
-
- for (i = 0 ; i < nmounts ; i++) {
- VIR_DEBUG("Umount %s", mounts[i]);
- if (umount(mounts[i]) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to unmount '%s'"),
- mounts[i]);
- return -1;
- }
- VIR_FREE(mounts[i]);
- }
- VIR_FREE(mounts);
-
- return 0;
-}
-
-
-/* Got a FS mapped to /, we're going the pivot_root
- * approach to do a better-chroot-than-chroot
- * this is based on this thread http://lkml.org/lkml/2008/3/5/29
- */
-static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
- virDomainFSDefPtr root)
-{
- /* Gives us a private root, leaving all parent OS mounts on /.oldroot */
- if (lxcContainerPivotRoot(root) < 0)
- return -1;
-
- /* Mounts the core /proc, /sys, /dev, /dev/pts filesystems */
- if (lxcContainerMountBasicFS(root) < 0)
- return -1;
-
- /* Populates device nodes in /dev/ */
- if (lxcContainerPopulateDevices() < 0)
- return -1;
-
- /* Sets up any non-root mounts from guest config */
- if (lxcContainerMountNewFS(vmDef) < 0)
- return -1;
-
- /* Gets rid of all remaining mounts from host OS, including /.oldroot itself */
- if (lxcContainerUnmountOldFS() < 0)
- return -1;
-
- return 0;
-}
-
-/* Nothing mapped to /, we're using the main root,
- but with extra stuff mapped in */
-static int lxcContainerSetupExtraMounts(virDomainDefPtr vmDef)
-{
- int i;
-
- if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to make / slave"));
- return -1;
- }
- for (i = 0 ; i < vmDef->nfss ; i++) {
- // XXX fix to support other mount types
- if (vmDef->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT)
- continue;
-
- if (mount(vmDef->fss[i]->src,
- vmDef->fss[i]->dst,
- NULL,
- MS_BIND,
- NULL) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to mount %s at %s"),
- vmDef->fss[i]->src,
- vmDef->fss[i]->dst);
- return -1;
- }
- }
-
- /* mount /proc */
- if (mount("lxcproc", "/proc", "proc", 0, NULL) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to mount /proc"));
- return -1;
- }
-
- return 0;
-}
-
-static int lxcContainerSetupMounts(virDomainDefPtr vmDef,
- virDomainFSDefPtr root)
-{
- if (root)
- return lxcContainerSetupPivotRoot(vmDef, root);
- else
- return lxcContainerSetupExtraMounts(vmDef);
-}
-
-
-/*
- * This is running as the 'init' process insid the container.
- * It removes some capabilities that could be dangerous to
- * host system, since they are not currently "containerized"
- */
-static int lxcContainerDropCapabilities(void)
-{
-#if HAVE_CAPNG
- int ret;
-
- capng_get_caps_process();
-
- if ((ret = capng_updatev(CAPNG_DROP,
- CAPNG_EFFECTIVE | CAPNG_PERMITTED |
- CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
- CAP_SYS_BOOT, /* No use of reboot */
- CAP_SYS_MODULE, /* No kernel module loading */
- CAP_SYS_TIME, /* No changing the clock */
- CAP_AUDIT_CONTROL, /* No messing with auditing status */
- CAP_MAC_ADMIN, /* No messing with LSM config */
- -1 /* sentinal */)) < 0) {
- lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
- _("failed to remove capabilities %d"), ret);
- return -1;
- }
-
- if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
- lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
- _("failed to apply capabilities: %d"), ret);
- return -1;
- }
-
- /* Need to prevent them regaining any caps on exec */
- if ((ret = capng_lock()) < 0) {
- lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
- _("failed to lock capabilities: %d"), ret);
- return -1;
- }
-
-#else
- VIR_WARN0(_("libcap-ng support not compiled in, unable to clear capabilities"));
-#endif
- return 0;
-}
-
-
-/**
- * lxcChild:
- * @argv: Pointer to container arguments
- *
- * This function is run in the process clone()'d in lxcStartContainer.
- * Perform a number of container setup tasks:
- * Setup container file system
- * mount container /proca
- * Then exec's the container init
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcContainerChild( void *data )
-{
- lxc_child_argv_t *argv = data;
- virDomainDefPtr vmDef = argv->config;
- int ttyfd;
- char *ttyPath;
- virDomainFSDefPtr root;
-
- if (NULL == vmDef) {
- lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
- "%s", _("lxcChild() passed invalid vm definition"));
- return -1;
- }
-
- root = virDomainGetRootFilesystem(vmDef);
-
- if (root) {
- if (virAsprintf(&ttyPath, "%s%s", root->src, argv->ttyPath) < 0) {
- virReportOOMError(NULL);
- return -1;
- }
- } else {
- if (!(ttyPath = strdup(argv->ttyPath))) {
- virReportOOMError(NULL);
- return -1;
- }
- }
-
- ttyfd = open(ttyPath, O_RDWR|O_NOCTTY);
- if (ttyfd < 0) {
- virReportSystemError(NULL, errno,
- _("failed to open tty %s"),
- ttyPath);
- return -1;
- }
- VIR_FREE(ttyPath);
-
- if (lxcContainerSetStdio(argv->monitor, ttyfd) < 0) {
- close(ttyfd);
- return -1;
- }
- close(ttyfd);
-
- if (lxcContainerSetupMounts(vmDef, root) < 0)
- return -1;
-
- /* Wait for interface devices to show up */
- if (lxcContainerWaitForContinue(argv->monitor) < 0)
- return -1;
-
- /* enable interfaces */
- if (lxcContainerEnableInterfaces(argv->nveths, argv->veths) < 0)
- return -1;
-
- /* drop a set of root capabilities */
- if (lxcContainerDropCapabilities() < 0)
- return -1;
-
- /* this function will only return if an error occured */
- return lxcContainerExecInit(vmDef);
-}
-
-static int userns_supported(void)
-{
- return lxcContainerAvailable(LXC_CONTAINER_FEATURE_USER) == 0;
-}
-
-/**
- * lxcContainerStart:
- * @driver: pointer to driver structure
- * @vm: pointer to virtual machine structure
- *
- * Starts a container process by calling clone() with the namespace flags
- *
- * Returns PID of container on success or -1 in case of error
- */
-int lxcContainerStart(virDomainDefPtr def,
- unsigned int nveths,
- char **veths,
- int control,
- char *ttyPath)
-{
- pid_t pid;
- int flags;
- int stacksize = getpagesize() * 4;
- char *stack, *stacktop;
- lxc_child_argv_t args = { def, nveths, veths, control, ttyPath };
-
- /* allocate a stack for the container */
- if (VIR_ALLOC_N(stack, stacksize) < 0) {
- virReportOOMError(NULL);
- return -1;
- }
- stacktop = stack + stacksize;
-
- flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC|SIGCHLD;
-
- if (userns_supported())
- flags |= CLONE_NEWUSER;
-
- if (def->nets != NULL)
- flags |= CLONE_NEWNET;
-
- pid = clone(lxcContainerChild, stacktop, flags, &args);
- VIR_FREE(stack);
- DEBUG("clone() returned, %d", pid);
-
- if (pid < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to run clone container"));
- return -1;
- }
-
- return pid;
-}
-
-static int lxcContainerDummyChild(void *argv ATTRIBUTE_UNUSED)
-{
- _exit(0);
-}
-
-int lxcContainerAvailable(int features)
-{
- int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|
- CLONE_NEWIPC|SIGCHLD;
- int cpid;
- char *childStack;
- char *stack;
- int childStatus;
-
- if (features & LXC_CONTAINER_FEATURE_USER)
- flags |= CLONE_NEWUSER;
-
- if (features & LXC_CONTAINER_FEATURE_NET)
- flags |= CLONE_NEWNET;
-
- if (VIR_ALLOC_N(stack, getpagesize() * 4) < 0) {
- DEBUG0("Unable to allocate stack");
- return -1;
- }
-
- childStack = stack + (getpagesize() * 4);
-
- cpid = clone(lxcContainerDummyChild, childStack, flags, NULL);
- VIR_FREE(stack);
- if (cpid < 0) {
- char ebuf[1024];
- DEBUG("clone call returned %s, container support is not enabled",
- virStrerror(errno, ebuf, sizeof ebuf));
- return -1;
- } else {
- waitpid(cpid, &childStatus, 0);
- }
-
- return 0;
-}
+++ /dev/null
-/*
- * Copyright IBM Corp. 2008
- *
- * lxc_container.h: header file for fcns run inside container
- *
- * Authors:
- * David L. Leskovec <dlesko at linux.vnet.ibm.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef LXC_CONTAINER_H
-#define LXC_CONTAINER_H
-
-#include "lxc_conf.h"
-
-enum {
- LXC_CONTAINER_FEATURE_NET = (1 << 0),
- LXC_CONTAINER_FEATURE_USER = (1 << 1),
-};
-
-#define LXC_DEV_MAJ_MEMORY 1
-#define LXC_DEV_MIN_NULL 3
-#define LXC_DEV_MIN_ZERO 5
-#define LXC_DEV_MIN_FULL 7
-#define LXC_DEV_MIN_RANDOM 8
-#define LXC_DEV_MIN_URANDOM 9
-
-#define LXC_DEV_MAJ_TTY 5
-#define LXC_DEV_MIN_CONSOLE 1
-#define LXC_DEV_MIN_PTMX 2
-
-#define LXC_DEV_MAJ_PTY 136
-
-int lxcContainerSendContinue(int control);
-
-int lxcContainerStart(virDomainDefPtr def,
- unsigned int nveths,
- char **veths,
- int control,
- char *ttyPath);
-
-int lxcContainerAvailable(int features);
-
-#endif /* LXC_CONTAINER_H */
+++ /dev/null
-/*
- * Copyright IBM Corp. 2008
- *
- * lxc_controller.c: linux container process controller
- *
- * Authors:
- * David L. Leskovec <dlesko at linux.vnet.ibm.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <config.h>
-
-#include <sys/epoll.h>
-#include <sys/wait.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <sys/un.h>
-#include <unistd.h>
-#include <paths.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <getopt.h>
-#include <sys/mount.h>
-
-#if HAVE_CAPNG
-#include <cap-ng.h>
-#endif
-
-#include "virterror_internal.h"
-#include "logging.h"
-#include "util.h"
-
-#include "lxc_conf.h"
-#include "lxc_container.h"
-#include "veth.h"
-#include "memory.h"
-#include "util.h"
-
-#define VIR_FROM_THIS VIR_FROM_LXC
-
-struct cgroup_device_policy {
- char type;
- int major;
- int minor;
-};
-
-/**
- * lxcSetContainerResources
- * @def: pointer to virtual machine structure
- *
- * Creates a cgroup for the container, moves the task inside,
- * and sets resource limits
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcSetContainerResources(virDomainDefPtr def)
-{
- virCgroupPtr driver;
- virCgroupPtr cgroup;
- int rc = -1;
- int i;
- struct cgroup_device_policy devices[] = {
- {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL},
- {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO},
- {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL},
- {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM},
- {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM},
- {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_CONSOLE},
- {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX},
- {0, 0, 0}};
-
- rc = virCgroupForDriver("lxc", &driver, 1, 0);
- if (rc != 0) {
- /* Skip all if no driver cgroup is configured */
- if (rc == -ENXIO || rc == -ENOENT)
- return 0;
-
- virReportSystemError(NULL, -rc, "%s",
- _("Unable to get cgroup for driver"));
- return rc;
- }
-
- rc = virCgroupForDomain(driver, def->name, &cgroup, 1);
- if (rc != 0) {
- virReportSystemError(NULL, -rc,
- _("Unable to create cgroup for domain %s"),
- def->name);
- goto cleanup;
- }
-
- rc = virCgroupSetMemory(cgroup, def->maxmem);
- if (rc != 0) {
- virReportSystemError(NULL, -rc,
- _("Unable to set memory limit for domain %s"),
- def->name);
- goto cleanup;
- }
-
- rc = virCgroupDenyAllDevices(cgroup);
- if (rc != 0) {
- virReportSystemError(NULL, -rc,
- _("Unable to deny devices for domain %s"),
- def->name);
- goto cleanup;
- }
-
- for (i = 0; devices[i].type != 0; i++) {
- struct cgroup_device_policy *dev = &devices[i];
- rc = virCgroupAllowDevice(cgroup,
- dev->type,
- dev->major,
- dev->minor);
- if (rc != 0) {
- virReportSystemError(NULL, -rc,
- _("Unable to allow device %c:%d:%d for domain %s"),
- dev->type, dev->major, dev->minor, def->name);
- goto cleanup;
- }
- }
-
- rc = virCgroupAllowDeviceMajor(cgroup, 'c', LXC_DEV_MAJ_PTY);
- if (rc != 0) {
- virReportSystemError(NULL, -rc,
- _("Unable to allow PYT devices for domain %s"),
- def->name);
- goto cleanup;
- }
-
- rc = virCgroupAddTask(cgroup, getpid());
- if (rc != 0) {
- virReportSystemError(NULL, -rc,
- _("Unable to add task %d to cgroup for domain %s"),
- getpid(), def->name);
- }
-
-cleanup:
- virCgroupFree(&driver);
- virCgroupFree(&cgroup);
-
- return rc;
-}
-
-static char*lxcMonitorPath(virDomainDefPtr def)
-{
- char *sockpath;
-
- if (virAsprintf(&sockpath, "%s/%s.sock",
- LXC_STATE_DIR, def->name) < 0)
- virReportOOMError(NULL);
- return sockpath;
-}
-
-static int lxcMonitorServer(const char *sockpath)
-{
- int fd;
- struct sockaddr_un addr;
-
- if ((fd = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to create server socket '%s'"),
- sockpath);
- goto error;
- }
-
- unlink(sockpath);
- memset(&addr, 0, sizeof(addr));
- addr.sun_family = AF_UNIX;
- strncpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
-
- if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to bind server socket '%s'"),
- sockpath);
- goto error;
- }
- if (listen(fd, 30 /* backlog */ ) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to listen server socket %s"),
- sockpath);
- goto error;
- }
-
- return fd;
-
-error:
- if (fd != -1)
- close(fd);
- return -1;
-}
-
-/**
- * lxcFdForward:
- * @readFd: file descriptor to read
- * @writeFd: file desriptor to write
- *
- * Reads 1 byte of data from readFd and writes to writeFd.
- *
- * Returns 0 on success, EAGAIN if returned on read, or -1 in case of error
- */
-static int lxcFdForward(int readFd, int writeFd)
-{
- int rc = -1;
- char buf[2];
-
- if (1 != (saferead(readFd, buf, 1))) {
- if (EAGAIN == errno) {
- rc = EAGAIN;
- goto cleanup;
- }
-
- virReportSystemError(NULL, errno,
- _("read of fd %d failed"),
- readFd);
- goto cleanup;
- }
-
- if (1 != (safewrite(writeFd, buf, 1))) {
- virReportSystemError(NULL, errno,
- _("write to fd %d failed"),
- writeFd);
- goto cleanup;
- }
-
- rc = 0;
-
-cleanup:
- return rc;
-}
-
-
-static int lxcControllerClearCapabilities(void)
-{
-#if HAVE_CAPNG
- int ret;
-
- capng_clear(CAPNG_SELECT_BOTH);
-
- if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
- lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
- _("failed to apply capabilities: %d"), ret);
- return -1;
- }
-#else
- VIR_WARN0(_("libcap-ng support not compiled in, unable to clear capabilities"));
-#endif
- return 0;
-}
-
-typedef struct _lxcTtyForwardFd_t {
- int fd;
- int active;
-} lxcTtyForwardFd_t;
-
-/**
- * lxcTtyForward:
- * @appPty: Open fd for application facing Pty
- * @contPty: Open fd for container facing Pty
- *
- * Forwards traffic between fds. Data read from appPty will be written to contPty
- * This process loops forever.
- * This uses epoll in edge triggered mode to avoid a hard loop on POLLHUP
- * events when the user disconnects the virsh console via ctrl-]
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcControllerMain(int monitor,
- int client,
- int appPty,
- int contPty)
-{
- int rc = -1;
- int epollFd;
- struct epoll_event epollEvent;
- int numEvents;
- int numActive = 0;
- lxcTtyForwardFd_t fdArray[2];
- int timeout = -1;
- int curFdOff = 0;
- int writeFdOff = 0;
-
- fdArray[0].fd = appPty;
- fdArray[0].active = 0;
- fdArray[1].fd = contPty;
- fdArray[1].active = 0;
-
- /* create the epoll fild descriptor */
- epollFd = epoll_create(2);
- if (0 > epollFd) {
- virReportSystemError(NULL, errno, "%s",
- _("epoll_create(2) failed"));
- goto cleanup;
- }
-
- /* add the file descriptors the epoll fd */
- memset(&epollEvent, 0x00, sizeof(epollEvent));
- epollEvent.events = EPOLLIN|EPOLLET; /* edge triggered */
- epollEvent.data.fd = appPty;
- if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, appPty, &epollEvent)) {
- virReportSystemError(NULL, errno, "%s",
- _("epoll_ctl(appPty) failed"));
- goto cleanup;
- }
- epollEvent.data.fd = contPty;
- if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, contPty, &epollEvent)) {
- virReportSystemError(NULL, errno, "%s",
- _("epoll_ctl(contPty) failed"));
- goto cleanup;
- }
-
- epollEvent.events = EPOLLIN;
- epollEvent.data.fd = monitor;
- if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, monitor, &epollEvent)) {
- virReportSystemError(NULL, errno, "%s",
- _("epoll_ctl(contPty) failed"));
- goto cleanup;
- }
-
- epollEvent.events = EPOLLHUP;
- epollEvent.data.fd = client;
- if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, client, &epollEvent)) {
- virReportSystemError(NULL, errno, "%s",
- _("epoll_ctl(contPty) failed"));
- goto cleanup;
- }
-
- while (1) {
- /* if active fd's, return if no events, else wait forever */
- timeout = (numActive > 0) ? 0 : -1;
- numEvents = epoll_wait(epollFd, &epollEvent, 1, timeout);
- if (numEvents > 0) {
- if (epollEvent.data.fd == monitor) {
- int fd = accept(monitor, NULL, 0);
- if (client != -1) { /* Already connected, so kick new one out */
- close(fd);
- continue;
- }
- client = fd;
- epollEvent.events = EPOLLHUP;
- epollEvent.data.fd = client;
- if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, client, &epollEvent)) {
- virReportSystemError(NULL, errno, "%s",
- _("epoll_ctl(contPty) failed"));
- goto cleanup;
- }
- } else if (client != -1 && epollEvent.data.fd == client) {
- if (0 > epoll_ctl(epollFd, EPOLL_CTL_DEL, client, &epollEvent)) {
- virReportSystemError(NULL, errno, "%s",
- _("epoll_ctl(contPty) failed"));
- goto cleanup;
- }
- close(client);
- client = -1;
- } else {
- if (epollEvent.events & EPOLLIN) {
- curFdOff = epollEvent.data.fd == appPty ? 0 : 1;
- if (!fdArray[curFdOff].active) {
- fdArray[curFdOff].active = 1;
- ++numActive;
- }
- } else if (epollEvent.events & EPOLLHUP) {
- DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd);
- continue;
- } else {
- lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
- _("error event %d"), epollEvent.events);
- goto cleanup;
- }
- }
- } else if (0 == numEvents) {
- if (2 == numActive) {
- /* both fds active, toggle between the two */
- curFdOff ^= 1;
- } else {
- /* only one active, if current is active, use it, else it */
- /* must be the other one (ie. curFd just went inactive) */
- curFdOff = fdArray[curFdOff].active ? curFdOff : curFdOff ^ 1;
- }
-
- } else {
- if (EINTR == errno) {
- continue;
- }
-
- /* error */
- virReportSystemError(NULL, errno, "%s",
- _("epoll_wait() failed"));
- goto cleanup;
-
- }
-
- if (0 < numActive) {
- writeFdOff = curFdOff ^ 1;
- rc = lxcFdForward(fdArray[curFdOff].fd, fdArray[writeFdOff].fd);
-
- if (EAGAIN == rc) {
- /* this fd no longer has data, set it as inactive */
- --numActive;
- fdArray[curFdOff].active = 0;
- } else if (-1 == rc) {
- goto cleanup;
- }
-
- }
-
- }
-
- rc = 0;
-
-cleanup:
- close(appPty);
- close(contPty);
- close(epollFd);
- return rc;
-}
-
-
-
-/**
- * lxcControllerMoveInterfaces
- * @nveths: number of interfaces
- * @veths: interface names
- * @container: pid of container
- *
- * Moves network interfaces into a container's namespace
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcControllerMoveInterfaces(unsigned int nveths,
- char **veths,
- pid_t container)
-{
- unsigned int i;
- for (i = 0 ; i < nveths ; i++)
- if (moveInterfaceToNetNs(veths[i], container) < 0) {
- lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
- _("failed to move interface %s to ns %d"),
- veths[i], container);
- return -1;
- }
-
- return 0;
-}
-
-
-/**
- * lxcCleanupInterfaces:
- * @conn: pointer to connection
- * @vm: pointer to virtual machine structure
- *
- * Cleans up the container interfaces by deleting the veth device pairs.
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcControllerCleanupInterfaces(unsigned int nveths,
- char **veths)
-{
- unsigned int i;
- for (i = 0 ; i < nveths ; i++)
- if (vethDelete(veths[i]) < 0)
- lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
- _("failed to delete veth: %s"), veths[i]);
- /* will continue to try to cleanup any other interfaces */
-
- return 0;
-}
-
-#ifndef MS_REC
-#define MS_REC 16384
-#endif
-
-#ifndef MS_SLAVE
-#define MS_SLAVE (1<<19)
-#endif
-
-static int
-lxcControllerRun(virDomainDefPtr def,
- unsigned int nveths,
- char **veths,
- int monitor,
- int client,
- int appPty)
-{
- int rc = -1;
- int control[2] = { -1, -1};
- int containerPty;
- char *containerPtyPath;
- pid_t container = -1;
- virDomainFSDefPtr root;
- char *devpts = NULL;
- char *devptmx = NULL;
-
- if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("sockpair failed"));
- goto cleanup;
- }
-
- root = virDomainGetRootFilesystem(def);
-
- /*
- * If doing a chroot style setup, we need to prepare
- * a private /dev/pts for the child now, which they
- * will later move into position.
- *
- * This is complex because 'virsh console' needs to
- * use /dev/pts from the host OS, and the guest OS
- * needs to use /dev/pts from the guest.
- *
- * This means that we (libvirt_lxc) need to see and
- * use both /dev/pts instances. We're running in the
- * host OS context though and don't want to expose
- * the guest OS /dev/pts there.
- *
- * Thus we call unshare(CLONE_NS) so that we can see
- * the guest's new /dev/pts, without it becoming
- * visible to the host OS. We also put the root FS
- * into slave mode, just in case it was currently
- * marked as shared
- */
- if (root) {
- VIR_DEBUG0("Setting up private /dev/pts");
- if (unshare(CLONE_NEWNS) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("cannot unshare mount namespace"));
- goto cleanup;
- }
-
- if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to switch root mount into slave mode"));
- goto cleanup;
- }
-
- if (virAsprintf(&devpts, "%s/dev/pts", root->src) < 0 ||
- virAsprintf(&devptmx, "%s/dev/pts/ptmx", root->src) < 0) {
- virReportOOMError(NULL);
- goto cleanup;
- }
-
- if (virFileMakePath(devpts) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to make path %s"),
- devpts);
- goto cleanup;
- }
-
- VIR_DEBUG("Mouting 'devpts' on %s", devpts);
- if (mount("devpts", devpts, "devpts", 0, "newinstance,ptmxmode=0666") < 0) {
- virReportSystemError(NULL, errno,
- _("failed to mount devpts on %s"),
- devpts);
- goto cleanup;
- }
-
- if (access(devptmx, R_OK) < 0) {
- VIR_WARN0("kernel does not support private devpts, using shared devpts");
- VIR_FREE(devptmx);
- }
- }
-
- if (devptmx) {
- VIR_DEBUG("Opening tty on private %s", devptmx);
- if (virFileOpenTtyAt(devptmx,
- &containerPty,
- &containerPtyPath,
- 0) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to allocate tty"));
- goto cleanup;
- }
- } else {
- VIR_DEBUG0("Opening tty on shared /dev/ptmx");
- if (virFileOpenTty(&containerPty,
- &containerPtyPath,
- 0) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to allocate tty"));
- goto cleanup;
- }
- }
-
-
- if (lxcSetContainerResources(def) < 0)
- goto cleanup;
-
- if ((container = lxcContainerStart(def,
- nveths,
- veths,
- control[1],
- containerPtyPath)) < 0)
- goto cleanup;
- close(control[1]);
- control[1] = -1;
-
- if (lxcControllerMoveInterfaces(nveths, veths, container) < 0)
- goto cleanup;
-
- if (lxcContainerSendContinue(control[0]) < 0)
- goto cleanup;
-
- /* Now the container is running, there's no need for us to keep
- any elevated capabilities */
- if (lxcControllerClearCapabilities() < 0)
- goto cleanup;
-
- rc = lxcControllerMain(monitor, client, appPty, containerPty);
-
-cleanup:
- VIR_FREE(devptmx);
- VIR_FREE(devpts);
- if (control[0] != -1)
- close(control[0]);
- if (control[1] != -1)
- close(control[1]);
- VIR_FREE(containerPtyPath);
- if (containerPty != -1)
- close(containerPty);
-
- if (container > 1) {
- kill(container, SIGTERM);
- waitpid(container, NULL, 0);
- }
- return rc;
-}
-
-
-int main(int argc, char *argv[])
-{
- pid_t pid;
- int rc = 1;
- int client;
- char *name = NULL;
- int nveths = 0;
- char **veths = NULL;
- int monitor = -1;
- int appPty = -1;
- int bg = 0;
- virCapsPtr caps = NULL;
- virDomainDefPtr def = NULL;
- char *configFile = NULL;
- char *sockpath = NULL;
- const struct option options[] = {
- { "background", 0, NULL, 'b' },
- { "name", 1, NULL, 'n' },
- { "veth", 1, NULL, 'v' },
- { "console", 1, NULL, 'c' },
- { "help", 0, NULL, 'h' },
- { 0, 0, 0, 0 },
- };
-
- while (1) {
- int c;
-
- c = getopt_long(argc, argv, "dn:v:m:c:h",
- options, NULL);
-
- if (c == -1)
- break;
-
- switch (c) {
- case 'b':
- bg = 1;
- break;
-
- case 'n':
- if ((name = strdup(optarg)) == NULL) {
- virReportOOMError(NULL);
- goto cleanup;
- }
- break;
-
- case 'v':
- if (VIR_REALLOC_N(veths, nveths+1) < 0) {
- virReportOOMError(NULL);
- goto cleanup;
- }
- if ((veths[nveths++] = strdup(optarg)) == NULL) {
- virReportOOMError(NULL);
- goto cleanup;
- }
- break;
-
- case 'c':
- if (virStrToLong_i(optarg, NULL, 10, &appPty) < 0) {
- fprintf(stderr, "malformed --console argument '%s'", optarg);
- goto cleanup;
- }
- break;
-
- case 'h':
- case '?':
- fprintf(stderr, "\n");
- fprintf(stderr, "syntax: %s [OPTIONS]\n", argv[0]);
- fprintf(stderr, "\n");
- fprintf(stderr, "Options\n");
- fprintf(stderr, "\n");
- fprintf(stderr, " -b, --background\n");
- fprintf(stderr, " -n NAME, --name NAME\n");
- fprintf(stderr, " -c FD, --console FD\n");
- fprintf(stderr, " -v VETH, --veth VETH\n");
- fprintf(stderr, " -h, --help\n");
- fprintf(stderr, "\n");
- goto cleanup;
- }
- }
-
-
- if (name == NULL) {
- fprintf(stderr, "%s: missing --name argument for configuration\n", argv[0]);
- goto cleanup;
- }
-
- if (appPty < 0) {
- fprintf(stderr, "%s: missing --console argument for container PTY\n", argv[0]);
- goto cleanup;
- }
-
- if (getuid() && 0) {
- fprintf(stderr, "%s: must be run as the 'root' user\n", argv[0]);
- goto cleanup;
- }
-
- if ((caps = lxcCapsInit()) == NULL)
- goto cleanup;
-
- if ((configFile = virDomainConfigFile(NULL,
- LXC_STATE_DIR,
- name)) == NULL)
- goto cleanup;
-
- if ((def = virDomainDefParseFile(NULL, caps, configFile,
- VIR_DOMAIN_XML_INACTIVE)) == NULL)
- goto cleanup;
-
- if (def->nnets != nveths) {
- fprintf(stderr, "%s: expecting %d veths, but got %d\n",
- argv[0], def->nnets, nveths);
- goto cleanup;
- }
-
- if ((sockpath = lxcMonitorPath(def)) == NULL)
- goto cleanup;
-
- if ((monitor = lxcMonitorServer(sockpath)) < 0)
- goto cleanup;
-
- if (bg) {
- if ((pid = fork()) < 0)
- goto cleanup;
-
- if (pid > 0) {
- if ((rc = virFileWritePid(LXC_STATE_DIR, name, pid)) != 0) {
- virReportSystemError(NULL, rc,
- _("Unable to write pid file '%s/%s.pid'"),
- LXC_STATE_DIR, name);
- _exit(1);
- }
-
- /* First child now exits, allowing original caller
- * (ie libvirtd's LXC driver to complete their
- * waitpid & continue */
- _exit(0);
- }
-
- /* Don't hold onto any cwd we inherit from libvirtd either */
- if (chdir("/") < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("Unable to change to root dir"));
- goto cleanup;
- }
-
- if (setsid() < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("Unable to become session leader"));
- goto cleanup;
- }
- }
-
- /* Accept initial client which is the libvirtd daemon */
- if ((client = accept(monitor, NULL, 0)) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("Failed connection from LXC driver"));
- goto cleanup;
- }
-
- rc = lxcControllerRun(def, nveths, veths, monitor, client, appPty);
-
-
-cleanup:
- if (def)
- virFileDeletePid(LXC_STATE_DIR, def->name);
- lxcControllerCleanupInterfaces(nveths, veths);
- if (sockpath)
- unlink(sockpath);
- VIR_FREE(sockpath);
-
- return rc;
-}
+++ /dev/null
-/*
- * Copyright IBM Corp. 2008
- *
- * lxc_driver.c: linux container driver functions
- *
- * Authors:
- * David L. Leskovec <dlesko at linux.vnet.ibm.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <config.h>
-
-#include <fcntl.h>
-#include <sched.h>
-#include <sys/utsname.h>
-#include <stdbool.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/poll.h>
-#include <unistd.h>
-#include <wait.h>
-
-#include "virterror_internal.h"
-#include "logging.h"
-#include "datatypes.h"
-#include "lxc_conf.h"
-#include "lxc_container.h"
-#include "lxc_driver.h"
-#include "memory.h"
-#include "util.h"
-#include "bridge.h"
-#include "veth.h"
-#include "event.h"
-#include "nodeinfo.h"
-#include "uuid.h"
-
-
-#define VIR_FROM_THIS VIR_FROM_LXC
-
-static int lxcStartup(int privileged);
-static int lxcShutdown(void);
-static lxc_driver_t *lxc_driver = NULL;
-
-/* Functions */
-
-static void lxcDriverLock(lxc_driver_t *driver)
-{
- virMutexLock(&driver->lock);
-}
-static void lxcDriverUnlock(lxc_driver_t *driver)
-{
- virMutexUnlock(&driver->lock);
-}
-
-static void lxcDomainEventFlush(int timer, void *opaque);
-static void lxcDomainEventQueue(lxc_driver_t *driver,
- virDomainEventPtr event);
-
-
-static virDrvOpenStatus lxcOpen(virConnectPtr conn,
- virConnectAuthPtr auth ATTRIBUTE_UNUSED,
- int flags ATTRIBUTE_UNUSED)
-{
- /* Verify uri was specified */
- if (conn->uri == NULL) {
- if (lxc_driver == NULL)
- return VIR_DRV_OPEN_DECLINED;
-
- conn->uri = xmlParseURI("lxc:///");
- if (!conn->uri) {
- virReportOOMError(conn);
- return VIR_DRV_OPEN_ERROR;
- }
- } else {
- if (conn->uri->scheme == NULL ||
- STRNEQ(conn->uri->scheme, "lxc"))
- return VIR_DRV_OPEN_DECLINED;
-
- /* Leave for remote driver */
- if (conn->uri->server != NULL)
- return VIR_DRV_OPEN_DECLINED;
-
- /* If path isn't '/' then they typoed, tell them correct path */
- if (STRNEQ(conn->uri->path, "/")) {
- lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
- _("unexpected LXC URI path '%s', try lxc:///"),
- conn->uri->path);
- return VIR_DRV_OPEN_ERROR;
- }
-
- /* URI was good, but driver isn't active */
- if (lxc_driver == NULL) {
- lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
- "%s", _("lxc state driver is not active"));
- return VIR_DRV_OPEN_ERROR;
- }
- }
-
- conn->privateData = lxc_driver;
-
- return VIR_DRV_OPEN_SUCCESS;
-}
-
-static int lxcClose(virConnectPtr conn)
-{
- lxc_driver_t *driver = conn->privateData;
-
- lxcDriverLock(driver);
- virDomainEventCallbackListRemoveConn(conn, driver->domainEventCallbacks);
- lxcDriverUnlock(driver);
-
- conn->privateData = NULL;
- return 0;
-}
-
-static char *lxcGetCapabilities(virConnectPtr conn) {
- lxc_driver_t *driver = conn->privateData;
- char *xml;
-
- lxcDriverLock(driver);
- if ((xml = virCapabilitiesFormatXML(driver->caps)) == NULL)
- virReportOOMError(conn);
- lxcDriverUnlock(driver);
-
- return xml;
-}
-
-
-static virDomainPtr lxcDomainLookupByID(virConnectPtr conn,
- int id)
-{
- lxc_driver_t *driver = conn->privateData;
- virDomainObjPtr vm;
- virDomainPtr dom = NULL;
-
- lxcDriverLock(driver);
- vm = virDomainFindByID(&driver->domains, id);
- lxcDriverUnlock(driver);
-
- if (!vm) {
- lxcError(conn, NULL, VIR_ERR_NO_DOMAIN, NULL);
- goto cleanup;
- }
-
- dom = virGetDomain(conn, vm->def->name, vm->def->uuid);
- if (dom)
- dom->id = vm->def->id;
-
-cleanup:
- if (vm)
- virDomainObjUnlock(vm);
- return dom;
-}
-
-static virDomainPtr lxcDomainLookupByUUID(virConnectPtr conn,
- const unsigned char *uuid)
-{
- lxc_driver_t *driver = conn->privateData;
- virDomainObjPtr vm;
- virDomainPtr dom = NULL;
-
- lxcDriverLock(driver);
- vm = virDomainFindByUUID(&driver->domains, uuid);
- lxcDriverUnlock(driver);
-
- if (!vm) {
- lxcError(conn, NULL, VIR_ERR_NO_DOMAIN, NULL);
- goto cleanup;
- }
-
- dom = virGetDomain(conn, vm->def->name, vm->def->uuid);
- if (dom)
- dom->id = vm->def->id;
-
-cleanup:
- if (vm)
- virDomainObjUnlock(vm);
- return dom;
-}
-
-static virDomainPtr lxcDomainLookupByName(virConnectPtr conn,
- const char *name)
-{
- lxc_driver_t *driver = conn->privateData;
- virDomainObjPtr vm;
- virDomainPtr dom = NULL;
-
- lxcDriverLock(driver);
- vm = virDomainFindByName(&driver->domains, name);
- lxcDriverUnlock(driver);
- if (!vm) {
- lxcError(conn, NULL, VIR_ERR_NO_DOMAIN, NULL);
- goto cleanup;
- }
-
- dom = virGetDomain(conn, vm->def->name, vm->def->uuid);
- if (dom)
- dom->id = vm->def->id;
-
-cleanup:
- if (vm)
- virDomainObjUnlock(vm);
- return dom;
-}
-
-static int lxcListDomains(virConnectPtr conn, int *ids, int nids) {
- lxc_driver_t *driver = conn->privateData;
- int got = 0, i;
-
- lxcDriverLock(driver);
- for (i = 0 ; i < driver->domains.count && got < nids ; i++) {
- virDomainObjLock(driver->domains.objs[i]);
- if (virDomainIsActive(driver->domains.objs[i]))
- ids[got++] = driver->domains.objs[i]->def->id;
- virDomainObjUnlock(driver->domains.objs[i]);
- }
- lxcDriverUnlock(driver);
-
- return got;
-}
-
-static int lxcNumDomains(virConnectPtr conn) {
- lxc_driver_t *driver = conn->privateData;
- int n = 0, i;
-
- lxcDriverLock(driver);
- for (i = 0 ; i < driver->domains.count ; i++) {
- virDomainObjLock(driver->domains.objs[i]);
- if (virDomainIsActive(driver->domains.objs[i]))
- n++;
- virDomainObjUnlock(driver->domains.objs[i]);
- }
- lxcDriverUnlock(driver);
-
- return n;
-}
-
-static int lxcListDefinedDomains(virConnectPtr conn,
- char **const names, int nnames) {
- lxc_driver_t *driver = conn->privateData;
- int got = 0, i;
-
- lxcDriverLock(driver);
- for (i = 0 ; i < driver->domains.count && got < nnames ; i++) {
- virDomainObjLock(driver->domains.objs[i]);
- if (!virDomainIsActive(driver->domains.objs[i])) {
- if (!(names[got++] = strdup(driver->domains.objs[i]->def->name))) {
- virReportOOMError(conn);
- virDomainObjUnlock(driver->domains.objs[i]);
- goto cleanup;
- }
- }
- virDomainObjUnlock(driver->domains.objs[i]);
- }
- lxcDriverUnlock(driver);
-
- return got;
-
- cleanup:
- for (i = 0 ; i < got ; i++)
- VIR_FREE(names[i]);
- lxcDriverUnlock(driver);
- return -1;
-}
-
-
-static int lxcNumDefinedDomains(virConnectPtr conn) {
- lxc_driver_t *driver = conn->privateData;
- int n = 0, i;
-
- lxcDriverLock(driver);
- for (i = 0 ; i < driver->domains.count ; i++) {
- virDomainObjLock(driver->domains.objs[i]);
- if (!virDomainIsActive(driver->domains.objs[i]))
- n++;
- virDomainObjUnlock(driver->domains.objs[i]);
- }
- lxcDriverUnlock(driver);
-
- return n;
-}
-
-
-
-static virDomainPtr lxcDomainDefine(virConnectPtr conn, const char *xml)
-{
- lxc_driver_t *driver = conn->privateData;
- virDomainDefPtr def = NULL;
- virDomainObjPtr vm = NULL;
- virDomainPtr dom = NULL;
- virDomainEventPtr event = NULL;
- int newVM = 1;
-
- lxcDriverLock(driver);
- if (!(def = virDomainDefParseString(conn, driver->caps, xml,
- VIR_DOMAIN_XML_INACTIVE)))
- goto cleanup;
-
- /* See if a VM with matching UUID already exists */
- vm = virDomainFindByUUID(&driver->domains, def->uuid);
- if (vm) {
- /* UUID matches, but if names don't match, refuse it */
- if (STRNEQ(vm->def->name, def->name)) {
- char uuidstr[VIR_UUID_STRING_BUFLEN];
- virUUIDFormat(vm->def->uuid, uuidstr);
- lxcError(conn, NULL, VIR_ERR_OPERATION_FAILED,
- _("domain '%s' is already defined with uuid %s"),
- vm->def->name, uuidstr);
- goto cleanup;
- }
-
- /* UUID & name match */
- virDomainObjUnlock(vm);
- newVM = 0;
- } else {
- /* UUID does not match, but if a name matches, refuse it */
- vm = virDomainFindByName(&driver->domains, def->name);
- if (vm) {
- char uuidstr[VIR_UUID_STRING_BUFLEN];
- virUUIDFormat(vm->def->uuid, uuidstr);
- lxcError(conn, NULL, VIR_ERR_OPERATION_FAILED,
- _("domain '%s' is already defined with uuid %s"),
- def->name, uuidstr);
- goto cleanup;
- }
- }
-
- if ((def->nets != NULL) && !(driver->have_netns)) {
- lxcError(conn, NULL, VIR_ERR_NO_SUPPORT,
- "%s", _("System lacks NETNS support"));
- goto cleanup;
- }
-
- if (!(vm = virDomainAssignDef(conn, &driver->domains, def)))
- goto cleanup;
- def = NULL;
- vm->persistent = 1;
-
- if (virDomainSaveConfig(conn,
- driver->configDir,
- vm->newDef ? vm->newDef : vm->def) < 0) {
- virDomainRemoveInactive(&driver->domains, vm);
- vm = NULL;
- goto cleanup;
- }
-
- event = virDomainEventNewFromObj(vm,
- VIR_DOMAIN_EVENT_DEFINED,
- newVM ?
- VIR_DOMAIN_EVENT_DEFINED_ADDED :
- VIR_DOMAIN_EVENT_DEFINED_UPDATED);
-
- dom = virGetDomain(conn, vm->def->name, vm->def->uuid);
- if (dom)
- dom->id = vm->def->id;
-
-cleanup:
- virDomainDefFree(def);
- if (vm)
- virDomainObjUnlock(vm);
- if (event)
- lxcDomainEventQueue(driver, event);
- lxcDriverUnlock(driver);
- return dom;
-}
-
-static int lxcDomainUndefine(virDomainPtr dom)
-{
- lxc_driver_t *driver = dom->conn->privateData;
- virDomainObjPtr vm;
- virDomainEventPtr event = NULL;
- int ret = -1;
-
- lxcDriverLock(driver);
- vm = virDomainFindByUUID(&driver->domains, dom->uuid);
- if (!vm) {
- lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
- "%s", _("no domain with matching uuid"));
- goto cleanup;
- }
-
- if (virDomainIsActive(vm)) {
- lxcError(dom->conn, dom, VIR_ERR_OPERATION_INVALID,
- "%s", _("cannot delete active domain"));
- goto cleanup;
- }
-
- if (!vm->persistent) {
- lxcError(dom->conn, dom, VIR_ERR_OPERATION_INVALID,
- "%s", _("cannot undefine transient domain"));
- goto cleanup;
- }
-
- if (virDomainDeleteConfig(dom->conn,
- driver->configDir,
- driver->autostartDir,
- vm) < 0)
- goto cleanup;
-
- event = virDomainEventNewFromObj(vm,
- VIR_DOMAIN_EVENT_UNDEFINED,
- VIR_DOMAIN_EVENT_UNDEFINED_REMOVED);
-
- virDomainRemoveInactive(&driver->domains, vm);
- vm = NULL;
- ret = 0;
-
-cleanup:
- if (vm)
- virDomainObjUnlock(vm);
- if (event)
- lxcDomainEventQueue(driver, event);
- lxcDriverUnlock(driver);
- return ret;
-}
-
-static int lxcDomainGetInfo(virDomainPtr dom,
- virDomainInfoPtr info)
-{
- lxc_driver_t *driver = dom->conn->privateData;
- virDomainObjPtr vm;
- virCgroupPtr cgroup = NULL;
- int ret = -1;
-
- lxcDriverLock(driver);
- vm = virDomainFindByUUID(&driver->domains, dom->uuid);
-
- if (!vm) {
- lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
- "%s", _("no domain with matching uuid"));
- goto cleanup;
- }
-
- info->state = vm->state;
-
- if (!virDomainIsActive(vm) || driver->cgroup == NULL) {
- info->cpuTime = 0;
- } else {
- if (virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) != 0) {
- lxcError(dom->conn, dom, VIR_ERR_INTERNAL_ERROR,
- _("Unable to get cgroup for %s\n"), vm->def->name);
- goto cleanup;
- }
-
- if (virCgroupGetCpuacctUsage(cgroup, &(info->cpuTime)) < 0) {
- lxcError(dom->conn, dom, VIR_ERR_OPERATION_FAILED, ("cannot read cputime for domain"));
- goto cleanup;
- }
- }
-
- info->maxMem = vm->def->maxmem;
- info->memory = vm->def->memory;
- info->nrVirtCpu = 1;
- ret = 0;
-
-cleanup:
- lxcDriverUnlock(driver);
- if (cgroup)
- virCgroupFree(&cgroup);
- if (vm)
- virDomainObjUnlock(vm);
- return ret;
-}
-
-static char *lxcGetOSType(virDomainPtr dom)
-{
- lxc_driver_t *driver = dom->conn->privateData;
- virDomainObjPtr vm;
- char *ret = NULL;
-
- lxcDriverLock(driver);
- vm = virDomainFindByUUID(&driver->domains, dom->uuid);
- lxcDriverUnlock(driver);
-
- if (!vm) {
- lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
- "%s", _("no domain with matching uuid"));
- goto cleanup;
- }
-
- ret = strdup(vm->def->os.type);
-
-cleanup:
- if (vm)
- virDomainObjUnlock(vm);
- return ret;
-}
-
-static char *lxcDomainDumpXML(virDomainPtr dom,
- int flags)
-{
- lxc_driver_t *driver = dom->conn->privateData;
- virDomainObjPtr vm;
- char *ret = NULL;
-
- lxcDriverLock(driver);
- vm = virDomainFindByUUID(&driver->domains, dom->uuid);
- lxcDriverUnlock(driver);
-
- if (!vm) {
- lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
- "%s", _("no domain with matching uuid"));
- goto cleanup;
- }
-
- ret = virDomainDefFormat(dom->conn,
- (flags & VIR_DOMAIN_XML_INACTIVE) &&
- vm->newDef ? vm->newDef : vm->def,
- flags);
-
-cleanup:
- if (vm)
- virDomainObjUnlock(vm);
- return ret;
-}
-
-
-/**
- * lxcVmCleanup:
- * @vm: Ptr to VM to clean up
- *
- * waitpid() on the container process. kill and wait the tty process
- * This is called by both lxcDomainDestroy and lxcSigHandler when a
- * container exits.
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcVMCleanup(virConnectPtr conn,
- lxc_driver_t *driver,
- virDomainObjPtr vm)
-{
- int rc = -1;
- int waitRc;
- int childStatus = -1;
- virCgroupPtr cgroup;
- int i;
-
- while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) &&
- errno == EINTR)
- ; /* empty */
-
- if ((waitRc != vm->pid) && (errno != ECHILD)) {
- virReportSystemError(conn, errno,
- _("waitpid failed to wait for container %d: %d"),
- vm->pid, waitRc);
- }
-
- rc = 0;
-
- if (WIFEXITED(childStatus)) {
- rc = WEXITSTATUS(childStatus);
- DEBUG("container exited with rc: %d", rc);
- }
-
- virEventRemoveHandle(vm->monitorWatch);
- close(vm->monitor);
-
- virFileDeletePid(driver->stateDir, vm->def->name);
- virDomainDeleteConfig(conn, driver->stateDir, NULL, vm);
-
- vm->state = VIR_DOMAIN_SHUTOFF;
- vm->pid = -1;
- vm->def->id = -1;
- vm->monitor = -1;
-
- for (i = 0 ; i < vm->def->nnets ; i++) {
- vethInterfaceUpOrDown(vm->def->nets[i]->ifname, 0);
- vethDelete(vm->def->nets[i]->ifname);
- }
-
- if (driver->cgroup &&
- virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) == 0) {
- virCgroupRemove(cgroup);
- virCgroupFree(&cgroup);
- }
-
- if (vm->newDef) {
- virDomainDefFree(vm->def);
- vm->def = vm->newDef;
- vm->def->id = -1;
- vm->newDef = NULL;
- }
-
- return rc;
-}
-
-/**
- * lxcSetupInterfaces:
- * @def: pointer to virtual machine structure
- *
- * Sets up the container interfaces by creating the veth device pairs and
- * attaching the parent end to the appropriate bridge. The container end
- * will moved into the container namespace later after clone has been called.
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcSetupInterfaces(virConnectPtr conn,
- virDomainDefPtr def,
- unsigned int *nveths,
- char ***veths)
-{
- int rc = -1, i;
- char *bridge = NULL;
- char parentVeth[PATH_MAX] = "";
- char containerVeth[PATH_MAX] = "";
- brControl *brctl = NULL;
-
- if (brInit(&brctl) != 0)
- return -1;
-
- for (i = 0 ; i < def->nnets ; i++) {
- switch (def->nets[i]->type) {
- case VIR_DOMAIN_NET_TYPE_NETWORK:
- {
- virNetworkPtr network = virNetworkLookupByName(conn,
- def->nets[i]->data.network.name);
- if (!network) {
- goto error_exit;
- }
-
- bridge = virNetworkGetBridgeName(network);
-
- virNetworkFree(network);
- break;
- }
- case VIR_DOMAIN_NET_TYPE_BRIDGE:
- bridge = def->nets[i]->data.bridge.brname;
- break;
- }
-
- DEBUG("bridge: %s", bridge);
- if (NULL == bridge) {
- lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
- "%s", _("failed to get bridge for interface"));
- goto error_exit;
- }
-
- DEBUG0("calling vethCreate()");
- if (NULL != def->nets[i]->ifname) {
- strcpy(parentVeth, def->nets[i]->ifname);
- }
- DEBUG("parentVeth: %s, containerVeth: %s", parentVeth, containerVeth);
- if (0 != (rc = vethCreate(parentVeth, PATH_MAX, containerVeth, PATH_MAX))) {
- lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
- _("failed to create veth device pair: %d"), rc);
- goto error_exit;
- }
- if (NULL == def->nets[i]->ifname) {
- def->nets[i]->ifname = strdup(parentVeth);
- }
- if (VIR_REALLOC_N(*veths, (*nveths)+1) < 0)
- goto error_exit;
- if (((*veths)[(*nveths)++] = strdup(containerVeth)) == NULL)
- goto error_exit;
-
- if (NULL == def->nets[i]->ifname) {
- lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
- "%s", _("failed to allocate veth names"));
- goto error_exit;
- }
-
- if (0 != (rc = brAddInterface(brctl, bridge, parentVeth))) {
- virReportSystemError(conn, rc,
- _("failed to add %s device to %s"),
- parentVeth, bridge);
- goto error_exit;
- }
-
- if (0 != (rc = vethInterfaceUpOrDown(parentVeth, 1))) {
- virReportSystemError(conn, rc, "%s",
- _("failed to enable parent ns veth device"));
- goto error_exit;
- }
-
- }
-
- rc = 0;
-
-error_exit:
- brShutdown(brctl);
- return rc;
-}
-
-
-static int lxcMonitorClient(virConnectPtr conn,
- lxc_driver_t * driver,
- virDomainObjPtr vm)
-{
- char *sockpath = NULL;
- int fd;
- struct sockaddr_un addr;
-
- if (virAsprintf(&sockpath, "%s/%s.sock",
- driver->stateDir, vm->def->name) < 0) {
- virReportOOMError(conn);
- return -1;
- }
-
- if ((fd = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
- virReportSystemError(conn, errno, "%s",
- _("failed to create client socket"));
- goto error;
- }
-
- memset(&addr, 0, sizeof(addr));
- addr.sun_family = AF_UNIX;
- strncpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
-
- if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
- virReportSystemError(conn, errno, "%s",
- _("failed to connect to client socket"));
- goto error;
- }
-
- VIR_FREE(sockpath);
- return fd;
-
-error:
- VIR_FREE(sockpath);
- if (fd != -1)
- close(fd);
- return -1;
-}
-
-
-static int lxcVmTerminate(virConnectPtr conn,
- lxc_driver_t *driver,
- virDomainObjPtr vm,
- int signum)
-{
- if (signum == 0)
- signum = SIGINT;
-
- if (vm->pid <= 0) {
- lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
- _("invalid PID %d for container"), vm->pid);
- return -1;
- }
-
- if (kill(vm->pid, signum) < 0) {
- if (errno != ESRCH) {
- virReportSystemError(conn, errno,
- _("failed to kill pid %d"),
- vm->pid);
- return -1;
- }
- }
-
- vm->state = VIR_DOMAIN_SHUTDOWN;
-
- return lxcVMCleanup(conn, driver, vm);
-}
-
-static void lxcMonitorEvent(int watch,
- int fd,
- int events ATTRIBUTE_UNUSED,
- void *data)
-{
- lxc_driver_t *driver = data;
- virDomainObjPtr vm = NULL;
- virDomainEventPtr event = NULL;
- unsigned int i;
-
- lxcDriverLock(driver);
- for (i = 0 ; i < driver->domains.count ; i++) {
- virDomainObjPtr tmpvm = driver->domains.objs[i];
- virDomainObjLock(tmpvm);
- if (tmpvm->monitorWatch == watch) {
- vm = tmpvm;
- break;
- }
- virDomainObjUnlock(tmpvm);
- }
- if (!vm) {
- virEventRemoveHandle(watch);
- goto cleanup;
- }
-
- if (vm->monitor != fd) {
- virEventRemoveHandle(watch);
- goto cleanup;
- }
-
- if (lxcVmTerminate(NULL, driver, vm, SIGINT) < 0) {
- virEventRemoveHandle(watch);
- } else {
- event = virDomainEventNewFromObj(vm,
- VIR_DOMAIN_EVENT_STOPPED,
- VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
- }
- if (!vm->persistent) {
- virDomainRemoveInactive(&driver->domains, vm);
- vm = NULL;
- }
-
-cleanup:
- if (vm)
- virDomainObjUnlock(vm);
- if (event)
- lxcDomainEventQueue(driver, event);
- lxcDriverUnlock(driver);
-}
-
-
-static int lxcControllerStart(virConnectPtr conn,
- virDomainObjPtr vm,
- int nveths,
- char **veths,
- int appPty,
- int logfd)
-{
- int i;
- int rc;
- int ret = -1;
- int largc = 0, larga = 0;
- const char **largv = NULL;
- pid_t child;
- int status;
- fd_set keepfd;
- char appPtyStr[30];
- const char *emulator;
-
- FD_ZERO(&keepfd);
-
-#define ADD_ARG_SPACE \
- do { \
- if (largc == larga) { \
- larga += 10; \
- if (VIR_REALLOC_N(largv, larga) < 0) \
- goto no_memory; \
- } \
- } while (0)
-
-#define ADD_ARG(thisarg) \
- do { \
- ADD_ARG_SPACE; \
- largv[largc++] = thisarg; \
- } while (0)
-
-#define ADD_ARG_LIT(thisarg) \
- do { \
- ADD_ARG_SPACE; \
- if ((largv[largc++] = strdup(thisarg)) == NULL) \
- goto no_memory; \
- } while (0)
-
- snprintf(appPtyStr, sizeof(appPtyStr), "%d", appPty);
-
- emulator = vm->def->emulator;
-
- ADD_ARG_LIT(emulator);
- ADD_ARG_LIT("--name");
- ADD_ARG_LIT(vm->def->name);
- ADD_ARG_LIT("--console");
- ADD_ARG_LIT(appPtyStr);
- ADD_ARG_LIT("--background");
-
- for (i = 0 ; i < nveths ; i++) {
- ADD_ARG_LIT("--veth");
- ADD_ARG_LIT(veths[i]);
- }
-
- ADD_ARG(NULL);
-
- FD_SET(appPty, &keepfd);
-
- if (virExec(conn, largv, NULL, &keepfd, &child,
- -1, &logfd, &logfd,
- VIR_EXEC_NONE) < 0)
- goto cleanup;
-
- /* We now wait for the process to exit - the controller
- * will fork() itself into the background - waiting for
- * it to exit thus guarentees it has written its pidfile
- */
- while ((rc = waitpid(child, &status, 0) == -1) && errno == EINTR);
- if (rc == -1) {
- virReportSystemError(conn, errno,
- _("cannot wait for '%s'"),
- largv[0]);
- goto cleanup;
- }
-
- if (!(WIFEXITED(status) && WEXITSTATUS(status) == 0)) {
- lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
- _("container '%s' unexpectedly shutdown during startup"),
- largv[0]);
- goto cleanup;
- }
-
-#undef ADD_ARG
-#undef ADD_ARG_LIT
-#undef ADD_ARG_SPACE
-
- ret = 0;
-
-cleanup:
- for (i = 0 ; i < largc ; i++)
- VIR_FREE(largv[i]);
-
- return ret;
-
-no_memory:
- virReportOOMError(conn);
- goto cleanup;
-}
-
-
-/**
- * lxcVmStart:
- * @conn: pointer to connection
- * @driver: pointer to driver structure
- * @vm: pointer to virtual machine structure
- *
- * Starts a vm
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcVmStart(virConnectPtr conn,
- lxc_driver_t * driver,
- virDomainObjPtr vm)
-{
- int rc = -1;
- unsigned int i;
- int parentTty;
- char *parentTtyPath = NULL;
- char *logfile = NULL;
- int logfd = -1;
- unsigned int nveths = 0;
- char **veths = NULL;
-
- if ((rc = virFileMakePath(driver->logDir)) < 0) {
- virReportSystemError(conn, rc,
- _("cannot create log directory '%s'"),
- driver->logDir);
- return -1;
- }
-
- if (virAsprintf(&logfile, "%s/%s.log",
- driver->logDir, vm->def->name) < 0) {
- virReportOOMError(conn);
- return -1;
- }
-
- /* open parent tty */
- if (virFileOpenTty(&parentTty, &parentTtyPath, 1) < 0) {
- virReportSystemError(conn, errno, "%s",
- _("failed to allocate tty"));
- goto cleanup;
- }
- if (vm->def->console &&
- vm->def->console->type == VIR_DOMAIN_CHR_TYPE_PTY) {
- VIR_FREE(vm->def->console->data.file.path);
- vm->def->console->data.file.path = parentTtyPath;
- } else {
- VIR_FREE(parentTtyPath);
- }
-
- if (lxcSetupInterfaces(conn, vm->def, &nveths, &veths) != 0)
- goto cleanup;
-
- /* Persist the live configuration now we have veth & tty info */
- if (virDomainSaveConfig(conn, driver->stateDir, vm->def) < 0) {
- rc = -1;
- goto cleanup;
- }
-
- if ((logfd = open(logfile, O_WRONLY | O_TRUNC | O_CREAT,
- S_IRUSR|S_IWUSR)) < 0) {
- virReportSystemError(conn, errno,
- _("failed to open '%s'"),
- logfile);
- goto cleanup;
- }
-
- if (lxcControllerStart(conn,
- vm,
- nveths, veths,
- parentTty, logfd) < 0)
- goto cleanup;
-
- /* Connect to the controller as a client *first* because
- * this will block until the child has written their
- * pid file out to disk */
- if ((vm->monitor = lxcMonitorClient(conn, driver, vm)) < 0)
- goto cleanup;
-
- /* And get its pid */
- if ((rc = virFileReadPid(driver->stateDir, vm->def->name, &vm->pid)) != 0) {
- virReportSystemError(conn, rc,
- _("Failed to read pid file %s/%s.pid"),
- driver->stateDir, vm->def->name);
- rc = -1;
- goto cleanup;
- }
-
- vm->def->id = vm->pid;
- vm->state = VIR_DOMAIN_RUNNING;
-
- if ((vm->monitorWatch = virEventAddHandle(
- vm->monitor,
- VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
- lxcMonitorEvent,
- driver, NULL)) < 0) {
- lxcVmTerminate(conn, driver, vm, 0);
- goto cleanup;
- }
-
- rc = 0;
-
-cleanup:
- for (i = 0 ; i < nveths ; i++) {
- if (rc != 0)
- vethDelete(veths[i]);
- VIR_FREE(veths[i]);
- }
- if (rc != 0 && vm->monitor != -1) {
- close(vm->monitor);
- vm->monitor = -1;
- }
- if (parentTty != -1)
- close(parentTty);
- if (logfd != -1)
- close(logfd);
- VIR_FREE(logfile);
- return rc;
-}
-
-/**
- * lxcDomainStart:
- * @dom: domain to start
- *
- * Looks up domain and starts it.
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcDomainStart(virDomainPtr dom)
-{
- lxc_driver_t *driver = dom->conn->privateData;
- virDomainObjPtr vm;
- virDomainEventPtr event = NULL;
- int ret = -1;
-
- lxcDriverLock(driver);
- vm = virDomainFindByName(&driver->domains, dom->name);
- if (!vm) {
- lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
- _("no domain named %s"), dom->name);
- goto cleanup;
- }
-
- if ((vm->def->nets != NULL) && !(driver->have_netns)) {
- lxcError(dom->conn, NULL, VIR_ERR_NO_SUPPORT,
- "%s", _("System lacks NETNS support"));
- goto cleanup;
- }
-
- ret = lxcVmStart(dom->conn, driver, vm);
-
- if (ret == 0)
- event = virDomainEventNewFromObj(vm,
- VIR_DOMAIN_EVENT_STARTED,
- VIR_DOMAIN_EVENT_STARTED_BOOTED);
-
-cleanup:
- if (vm)
- virDomainObjUnlock(vm);
- if (event)
- lxcDomainEventQueue(driver, event);
- lxcDriverUnlock(driver);
- return ret;
-}
-
-/**
- * lxcDomainCreateAndStart:
- * @conn: pointer to connection
- * @xml: XML definition of domain
- * @flags: Unused
- *
- * Creates a domain based on xml and starts it
- *
- * Returns 0 on success or -1 in case of error
- */
-static virDomainPtr
-lxcDomainCreateAndStart(virConnectPtr conn,
- const char *xml,
- unsigned int flags ATTRIBUTE_UNUSED) {
- lxc_driver_t *driver = conn->privateData;
- virDomainObjPtr vm = NULL;
- virDomainDefPtr def;
- virDomainPtr dom = NULL;
- virDomainEventPtr event = NULL;
-
- lxcDriverLock(driver);
- if (!(def = virDomainDefParseString(conn, driver->caps, xml,
- VIR_DOMAIN_XML_INACTIVE)))
- goto cleanup;
-
- /* See if a VM with matching UUID already exists */
- vm = virDomainFindByUUID(&driver->domains, def->uuid);
- if (vm) {
- /* UUID matches, but if names don't match, refuse it */
- if (STRNEQ(vm->def->name, def->name)) {
- char uuidstr[VIR_UUID_STRING_BUFLEN];
- virUUIDFormat(vm->def->uuid, uuidstr);
- lxcError(conn, NULL, VIR_ERR_OPERATION_FAILED,
- _("domain '%s' is already defined with uuid %s"),
- vm->def->name, uuidstr);
- goto cleanup;
- }
-
- /* UUID & name match, but if VM is already active, refuse it */
- if (virDomainIsActive(vm)) {
- lxcError(conn, NULL, VIR_ERR_OPERATION_FAILED,
- _("domain is already active as '%s'"), vm->def->name);
- goto cleanup;
- }
- virDomainObjUnlock(vm);
- } else {
- /* UUID does not match, but if a name matches, refuse it */
- vm = virDomainFindByName(&driver->domains, def->name);
- if (vm) {
- char uuidstr[VIR_UUID_STRING_BUFLEN];
- virUUIDFormat(vm->def->uuid, uuidstr);
- lxcError(conn, NULL, VIR_ERR_OPERATION_FAILED,
- _("domain '%s' is already defined with uuid %s"),
- def->name, uuidstr);
- goto cleanup;
- }
- }
-
- if ((def->nets != NULL) && !(driver->have_netns)) {
- lxcError(conn, NULL, VIR_ERR_NO_SUPPORT,
- "%s", _("System lacks NETNS support"));
- goto cleanup;
- }
-
-
- if (!(vm = virDomainAssignDef(conn, &driver->domains, def)))
- goto cleanup;
- def = NULL;
-
- if (lxcVmStart(conn, driver, vm) < 0) {
- virDomainRemoveInactive(&driver->domains, vm);
- vm = NULL;
- goto cleanup;
- }
-
- event = virDomainEventNewFromObj(vm,
- VIR_DOMAIN_EVENT_STARTED,
- VIR_DOMAIN_EVENT_STARTED_BOOTED);
-
- dom = virGetDomain(conn, vm->def->name, vm->def->uuid);
- if (dom)
- dom->id = vm->def->id;
-
-cleanup:
- virDomainDefFree(def);
- if (vm)
- virDomainObjUnlock(vm);
- if (event)
- lxcDomainEventQueue(driver, event);
- lxcDriverUnlock(driver);
- return dom;
-}
-
-/**
- * lxcDomainShutdown:
- * @dom: Ptr to domain to shutdown
- *
- * Sends SIGINT to container root process to request it to shutdown
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcDomainShutdown(virDomainPtr dom)
-{
- lxc_driver_t *driver = dom->conn->privateData;
- virDomainObjPtr vm;
- virDomainEventPtr event = NULL;
- int ret = -1;
-
- lxcDriverLock(driver);
- vm = virDomainFindByID(&driver->domains, dom->id);
- if (!vm) {
- lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
- _("no domain with id %d"), dom->id);
- goto cleanup;
- }
-
- ret = lxcVmTerminate(dom->conn, driver, vm, 0);
- event = virDomainEventNewFromObj(vm,
- VIR_DOMAIN_EVENT_STOPPED,
- VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
- if (!vm->persistent) {
- virDomainRemoveInactive(&driver->domains, vm);
- vm = NULL;
- }
-
-cleanup:
- if (vm)
- virDomainObjUnlock(vm);
- if (event)
- lxcDomainEventQueue(driver, event);
- lxcDriverUnlock(driver);
- return ret;
-}
-
-
-static int
-lxcDomainEventRegister (virConnectPtr conn,
- virConnectDomainEventCallback callback,
- void *opaque,
- virFreeCallback freecb)
-{
- lxc_driver_t *driver = conn->privateData;
- int ret;
-
- lxcDriverLock(driver);
- ret = virDomainEventCallbackListAdd(conn, driver->domainEventCallbacks,
- callback, opaque, freecb);
- lxcDriverUnlock(driver);
-
- return ret;
-}
-
-static int
-lxcDomainEventDeregister (virConnectPtr conn,
- virConnectDomainEventCallback callback)
-{
- lxc_driver_t *driver = conn->privateData;
- int ret;
-
- lxcDriverLock(driver);
- if (driver->domainEventDispatching)
- ret = virDomainEventCallbackListMarkDelete(conn, driver->domainEventCallbacks,
- callback);
- else
- ret = virDomainEventCallbackListRemove(conn, driver->domainEventCallbacks,
- callback);
- lxcDriverUnlock(driver);
-
- return ret;
-}
-
-static void lxcDomainEventDispatchFunc(virConnectPtr conn,
- virDomainEventPtr event,
- virConnectDomainEventCallback cb,
- void *cbopaque,
- void *opaque)
-{
- lxc_driver_t *driver = opaque;
-
- /* Drop the lock whle dispatching, for sake of re-entrancy */
- lxcDriverUnlock(driver);
- virDomainEventDispatchDefaultFunc(conn, event, cb, cbopaque, NULL);
- lxcDriverLock(driver);
-}
-
-
-static void lxcDomainEventFlush(int timer ATTRIBUTE_UNUSED, void *opaque)
-{
- lxc_driver_t *driver = opaque;
- virDomainEventQueue tempQueue;
-
- lxcDriverLock(driver);
-
- driver->domainEventDispatching = 1;
-
- /* Copy the queue, so we're reentrant safe */
- tempQueue.count = driver->domainEventQueue->count;
- tempQueue.events = driver->domainEventQueue->events;
- driver->domainEventQueue->count = 0;
- driver->domainEventQueue->events = NULL;
-
- virEventUpdateTimeout(driver->domainEventTimer, -1);
- virDomainEventQueueDispatch(&tempQueue,
- driver->domainEventCallbacks,
- lxcDomainEventDispatchFunc,
- driver);
-
- /* Purge any deleted callbacks */
- virDomainEventCallbackListPurgeMarked(driver->domainEventCallbacks);
-
- driver->domainEventDispatching = 0;
- lxcDriverUnlock(driver);
-}
-
-
-/* driver must be locked before calling */
-static void lxcDomainEventQueue(lxc_driver_t *driver,
- virDomainEventPtr event)
-{
- if (virDomainEventQueuePush(driver->domainEventQueue,
- event) < 0)
- virDomainEventFree(event);
- if (lxc_driver->domainEventQueue->count == 1)
- virEventUpdateTimeout(driver->domainEventTimer, 0);
-}
-
-/**
- * lxcDomainDestroy:
- * @dom: Ptr to domain to destroy
- *
- * Sends SIGKILL to container root process to terminate the container
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcDomainDestroy(virDomainPtr dom)
-{
- lxc_driver_t *driver = dom->conn->privateData;
- virDomainObjPtr vm;
- virDomainEventPtr event = NULL;
- int ret = -1;
-
- lxcDriverLock(driver);
- vm = virDomainFindByID(&driver->domains, dom->id);
- if (!vm) {
- lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN,
- _("no domain with id %d"), dom->id);
- goto cleanup;
- }
-
- ret = lxcVmTerminate(dom->conn, driver, vm, SIGKILL);
- event = virDomainEventNewFromObj(vm,
- VIR_DOMAIN_EVENT_STOPPED,
- VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
- if (!vm->persistent) {
- virDomainRemoveInactive(&driver->domains, vm);
- vm = NULL;
- }
-
-cleanup:
- if (vm)
- virDomainObjUnlock(vm);
- if (event)
- lxcDomainEventQueue(driver, event);
- lxcDriverUnlock(driver);
- return ret;
-}
-
-static int lxcCheckNetNsSupport(void)
-{
- const char *argv[] = {"ip", "link", "set", "lo", "netns", "-1", NULL};
- int ip_rc;
-
- if (virRun(NULL, argv, &ip_rc) < 0 ||
- !(WIFEXITED(ip_rc) && (WEXITSTATUS(ip_rc) != 255)))
- return 0;
-
- if (lxcContainerAvailable(LXC_CONTAINER_FEATURE_NET) < 0)
- return 0;
-
- return 1;
-}
-
-
-static void
-lxcAutostartConfigs(lxc_driver_t *driver) {
- unsigned int i;
- /* XXX: Figure out a better way todo this. The domain
- * startup code needs a connection handle in order
- * to lookup the bridge associated with a virtual
- * network
- */
- virConnectPtr conn = virConnectOpen("lxc:///");
- /* Ignoring NULL conn which is mostly harmless here */
-
- lxcDriverLock(driver);
- for (i = 0 ; i < driver->domains.count ; i++) {
- virDomainObjPtr vm = driver->domains.objs[i];
- virDomainObjLock(vm);
- if (vm->autostart &&
- !virDomainIsActive(vm)) {
- int ret = lxcVmStart(conn, driver, vm);
- if (ret < 0) {
- virErrorPtr err = virGetLastError();
- VIR_ERROR(_("Failed to autostart VM '%s': %s\n"),
- vm->def->name,
- err ? err->message : "");
- } else {
- virDomainEventPtr event =
- virDomainEventNewFromObj(vm,
- VIR_DOMAIN_EVENT_STARTED,
- VIR_DOMAIN_EVENT_STARTED_BOOTED);
- if (event)
- lxcDomainEventQueue(driver, event);
- }
- }
- virDomainObjUnlock(vm);
- }
- lxcDriverUnlock(driver);
-
- if (conn)
- virConnectClose(conn);
-}
-
-
-static int lxcStartup(int privileged)
-{
- unsigned int i;
- char *ld;
- int rc;
-
- /* Valgrind gets very annoyed when we clone containers, so
- * disable LXC when under valgrind
- * XXX remove this when valgrind is fixed
- */
- ld = getenv("LD_PRELOAD");
- if (ld && strstr(ld, "vgpreload"))
- return -1;
-
- /* Check that the user is root */
- if (!privileged) {
- return -1;
- }
-
- if (VIR_ALLOC(lxc_driver) < 0) {
- return -1;
- }
- if (virMutexInit(&lxc_driver->lock) < 0) {
- VIR_FREE(lxc_driver);
- return -1;
- }
- lxcDriverLock(lxc_driver);
-
- /* Check that this is a container enabled kernel */
- if (lxcContainerAvailable(0) < 0) {
- VIR_INFO0("LXC support not available in this kernel, disabling driver");
- goto cleanup;
- }
-
- if (VIR_ALLOC(lxc_driver->domainEventCallbacks) < 0)
- goto cleanup;
- if (!(lxc_driver->domainEventQueue = virDomainEventQueueNew()))
- goto cleanup;
-
- if ((lxc_driver->domainEventTimer =
- virEventAddTimeout(-1, lxcDomainEventFlush, lxc_driver, NULL)) < 0)
- goto cleanup;
-
- lxc_driver->have_netns = lxcCheckNetNsSupport();
-
- rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1);
- if (rc < 0) {
- char buf[1024];
- VIR_WARN("Unable to create cgroup for driver: %s",
- virStrerror(-rc, buf, sizeof(buf)));
- }
-
- /* Call function to load lxc driver configuration information */
- if (lxcLoadDriverConfig(lxc_driver) < 0)
- goto cleanup;
-
- if ((lxc_driver->caps = lxcCapsInit()) == NULL)
- goto cleanup;
-
- if (virDomainLoadAllConfigs(NULL,
- lxc_driver->caps,
- &lxc_driver->domains,
- lxc_driver->configDir,
- lxc_driver->autostartDir,
- 0, NULL, NULL) < 0)
- goto cleanup;
-
- for (i = 0 ; i < lxc_driver->domains.count ; i++) {
- virDomainObjPtr vm = lxc_driver->domains.objs[i];
- char *config = NULL;
- virDomainDefPtr tmp;
-
- virDomainObjLock(vm);
- if ((vm->monitor = lxcMonitorClient(NULL, lxc_driver, vm)) < 0) {
- virDomainObjUnlock(vm);
- continue;
- }
-
- /* Read pid from controller */
- if ((rc = virFileReadPid(lxc_driver->stateDir, vm->def->name, &vm->pid)) != 0) {
- close(vm->monitor);
- vm->monitor = -1;
- virDomainObjUnlock(vm);
- continue;
- }
-
- if ((config = virDomainConfigFile(NULL,
- lxc_driver->stateDir,
- vm->def->name)) == NULL) {
- virDomainObjUnlock(vm);
- continue;
- }
-
- /* Try and load the live config */
- tmp = virDomainDefParseFile(NULL, lxc_driver->caps, config, 0);
- VIR_FREE(config);
- if (tmp) {
- vm->newDef = vm->def;
- vm->def = tmp;
- }
-
- if (vm->pid != 0) {
- vm->def->id = vm->pid;
- vm->state = VIR_DOMAIN_RUNNING;
- } else {
- vm->def->id = -1;
- close(vm->monitor);
- vm->monitor = -1;
- }
- virDomainObjUnlock(vm);
- }
-
- lxcDriverUnlock(lxc_driver);
- return 0;
-
-cleanup:
- lxcDriverUnlock(lxc_driver);
- lxcShutdown();
- return -1;
-}
-
-static void lxcNotifyLoadDomain(virDomainObjPtr vm, int newVM, void *opaque)
-{
- lxc_driver_t *driver = opaque;
-
- if (newVM) {
- virDomainEventPtr event =
- virDomainEventNewFromObj(vm,
- VIR_DOMAIN_EVENT_DEFINED,
- VIR_DOMAIN_EVENT_DEFINED_ADDED);
- if (event)
- lxcDomainEventQueue(driver, event);
- }
-}
-
-/**
- * lxcReload:
- *
- * Function to restart the LXC driver, it will recheck the configuration
- * files and perform autostart
- */
-static int
-lxcReload(void) {
- if (!lxc_driver)
- return 0;
-
- lxcDriverLock(lxc_driver);
- virDomainLoadAllConfigs(NULL,
- lxc_driver->caps,
- &lxc_driver->domains,
- lxc_driver->configDir,
- lxc_driver->autostartDir,
- 0, lxcNotifyLoadDomain, lxc_driver);
- lxcDriverUnlock(lxc_driver);
-
- lxcAutostartConfigs(lxc_driver);
-
- return 0;
-}
-
-static int lxcShutdown(void)
-{
- if (lxc_driver == NULL)
- return(-1);
-
- lxcDriverLock(lxc_driver);
- virDomainObjListFree(&lxc_driver->domains);
-
- virDomainEventCallbackListFree(lxc_driver->domainEventCallbacks);
- virDomainEventQueueFree(lxc_driver->domainEventQueue);
-
- if (lxc_driver->domainEventTimer != -1)
- virEventRemoveTimeout(lxc_driver->domainEventTimer);
-
- virCapabilitiesFree(lxc_driver->caps);
- VIR_FREE(lxc_driver->configDir);
- VIR_FREE(lxc_driver->autostartDir);
- VIR_FREE(lxc_driver->stateDir);
- VIR_FREE(lxc_driver->logDir);
- lxcDriverUnlock(lxc_driver);
- virMutexDestroy(&lxc_driver->lock);
- VIR_FREE(lxc_driver);
-
- return 0;
-}
-
-/**
- * lxcActive:
- *
- * Checks if the LXC daemon is active, i.e. has an active domain
- *
- * Returns 1 if active, 0 otherwise
- */
-static int
-lxcActive(void) {
- unsigned int i;
- int active = 0;
-
- if (lxc_driver == NULL)
- return(0);
-
- lxcDriverLock(lxc_driver);
- for (i = 0 ; i < lxc_driver->domains.count ; i++) {
- virDomainObjLock(lxc_driver->domains.objs[i]);
- if (virDomainIsActive(lxc_driver->domains.objs[i]))
- active = 1;
- virDomainObjUnlock(lxc_driver->domains.objs[i]);
- }
- lxcDriverUnlock(lxc_driver);
-
- return active;
-}
-
-static int lxcVersion(virConnectPtr conn, unsigned long *version)
-{
- struct utsname ver;
- int maj;
- int min;
- int rev;
-
- uname(&ver);
-
- if (sscanf(ver.release, "%i.%i.%i", &maj, &min, &rev) != 3) {
- lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
- _("Unknown release: %s"), ver.release);
- return -1;
- }
-
- *version = (maj * 1000 * 1000) + (min * 1000) + rev;
-
- return 0;
-}
-
-static char *lxcGetSchedulerType(virDomainPtr domain ATTRIBUTE_UNUSED,
- int *nparams)
-{
- if (nparams)
- *nparams = 1;
-
- return strdup("posix");
-}
-
-static int lxcSetSchedulerParameters(virDomainPtr domain,
- virSchedParameterPtr params,
- int nparams)
-{
- lxc_driver_t *driver = domain->conn->privateData;
- int i;
- virCgroupPtr group = NULL;
- virDomainObjPtr vm = NULL;
- int ret = -1;
-
- if (driver->cgroup == NULL)
- return -1;
-
- lxcDriverLock(driver);
- vm = virDomainFindByUUID(&driver->domains, domain->uuid);
-
- if (vm == NULL) {
- lxcError(NULL, domain, VIR_ERR_INTERNAL_ERROR,
- _("No such domain %s"), domain->uuid);
- goto cleanup;
- }
-
- if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0)
- goto cleanup;
-
- for (i = 0; i < nparams; i++) {
- virSchedParameterPtr param = ¶ms[i];
- if (param->type != VIR_DOMAIN_SCHED_FIELD_ULLONG) {
- lxcError(NULL, domain, VIR_ERR_INVALID_ARG,
- _("invalid type for cpu_shares tunable, expected a 'ullong'"));
- goto cleanup;
- }
-
- if (STREQ(param->field, "cpu_shares")) {
- if (virCgroupSetCpuShares(group, params[i].value.ul) != 0)
- goto cleanup;
- } else {
- lxcError(NULL, domain, VIR_ERR_INVALID_ARG,
- _("Invalid parameter `%s'"), param->field);
- goto cleanup;
- }
- }
- ret = 0;
-
-cleanup:
- lxcDriverUnlock(driver);
- virCgroupFree(&group);
- if (vm)
- virDomainObjUnlock(vm);
- return ret;
-}
-
-static int lxcGetSchedulerParameters(virDomainPtr domain,
- virSchedParameterPtr params,
- int *nparams)
-{
- lxc_driver_t *driver = domain->conn->privateData;
- virCgroupPtr group = NULL;
- virDomainObjPtr vm = NULL;
- unsigned long long val;
- int ret = -1;
-
- if (driver->cgroup == NULL)
- return -1;
-
- if ((*nparams) != 1) {
- lxcError(NULL, domain, VIR_ERR_INVALID_ARG,
- "%s", _("Invalid parameter count"));
- return -1;
- }
-
- lxcDriverLock(driver);
- vm = virDomainFindByUUID(&driver->domains, domain->uuid);
-
- if (vm == NULL) {
- lxcError(NULL, domain, VIR_ERR_INTERNAL_ERROR,
- _("No such domain %s"), domain->uuid);
- goto cleanup;
- }
-
- if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0)
- goto cleanup;
-
- if (virCgroupGetCpuShares(group, &val) != 0)
- goto cleanup;
- params[0].value.ul = val;
- strncpy(params[0].field, "cpu_shares", sizeof(params[0].field));
- params[0].type = VIR_DOMAIN_SCHED_FIELD_ULLONG;
-
- ret = 0;
-
-cleanup:
- lxcDriverUnlock(driver);
- virCgroupFree(&group);
- if (vm)
- virDomainObjUnlock(vm);
- return ret;
-}
-
-static int lxcDomainGetAutostart(virDomainPtr dom,
- int *autostart) {
- lxc_driver_t *driver = dom->conn->privateData;
- virDomainObjPtr vm;
- int ret = -1;
-
- lxcDriverLock(driver);
- vm = virDomainFindByUUID(&driver->domains, dom->uuid);
- lxcDriverUnlock(driver);
-
- if (!vm) {
- char uuidstr[VIR_UUID_STRING_BUFLEN];
- virUUIDFormat(dom->uuid, uuidstr);
- lxcError(dom->conn, dom, VIR_ERR_NO_DOMAIN,
- _("no domain with matching uuid '%s'"), uuidstr);
- goto cleanup;
- }
-
- *autostart = vm->autostart;
- ret = 0;
-
-cleanup:
- if (vm)
- virDomainObjUnlock(vm);
- return ret;
-}
-
-static int lxcDomainSetAutostart(virDomainPtr dom,
- int autostart) {
- lxc_driver_t *driver = dom->conn->privateData;
- virDomainObjPtr vm;
- char *configFile = NULL, *autostartLink = NULL;
- int ret = -1;
-
- lxcDriverLock(driver);
- vm = virDomainFindByUUID(&driver->domains, dom->uuid);
-
- if (!vm) {
- char uuidstr[VIR_UUID_STRING_BUFLEN];
- virUUIDFormat(dom->uuid, uuidstr);
- lxcError(dom->conn, dom, VIR_ERR_NO_DOMAIN,
- _("no domain with matching uuid '%s'"), uuidstr);
- goto cleanup;
- }
-
- if (!vm->persistent) {
- lxcError(dom->conn, dom, VIR_ERR_INTERNAL_ERROR,
- "%s", _("cannot set autostart for transient domain"));
- goto cleanup;
- }
-
- autostart = (autostart != 0);
-
- if (vm->autostart != autostart) {
- if ((configFile = virDomainConfigFile(dom->conn, driver->configDir, vm->def->name)) == NULL)
- goto cleanup;
- if ((autostartLink = virDomainConfigFile(dom->conn, driver->autostartDir, vm->def->name)) == NULL)
- goto cleanup;
-
- if (autostart) {
- int err;
-
- if ((err = virFileMakePath(driver->autostartDir))) {
- virReportSystemError(dom->conn, err,
- _("cannot create autostart directory %s"),
- driver->autostartDir);
- goto cleanup;
- }
-
- if (symlink(configFile, autostartLink) < 0) {
- virReportSystemError(dom->conn, errno,
- _("Failed to create symlink '%s to '%s'"),
- autostartLink, configFile);
- goto cleanup;
- }
- } else {
- if (unlink(autostartLink) < 0 && errno != ENOENT && errno != ENOTDIR) {
- virReportSystemError(dom->conn, errno,
- _("Failed to delete symlink '%s'"),
- autostartLink);
- goto cleanup;
- }
- }
-
- vm->autostart = autostart;
- }
- ret = 0;
-
-cleanup:
- VIR_FREE(configFile);
- VIR_FREE(autostartLink);
- if (vm)
- virDomainObjUnlock(vm);
- lxcDriverUnlock(driver);
- return ret;
-}
-
-static char *lxcGetHostname (virConnectPtr conn)
-{
- char *result;
-
- result = virGetHostname();
- if (result == NULL) {
- virReportSystemError (conn, errno,
- "%s", _("failed to determine host name"));
- return NULL;
- }
- /* Caller frees this string. */
- return result;
-}
-
-/* Function Tables */
-static virDriver lxcDriver = {
- VIR_DRV_LXC, /* the number virDrvNo */
- "LXC", /* the name of the driver */
- lxcOpen, /* open */
- lxcClose, /* close */
- NULL, /* supports_feature */
- NULL, /* type */
- lxcVersion, /* version */
- lxcGetHostname, /* getHostname */
- NULL, /* getMaxVcpus */
- nodeGetInfo, /* nodeGetInfo */
- lxcGetCapabilities, /* getCapabilities */
- lxcListDomains, /* listDomains */
- lxcNumDomains, /* numOfDomains */
- lxcDomainCreateAndStart, /* domainCreateXML */
- lxcDomainLookupByID, /* domainLookupByID */
- lxcDomainLookupByUUID, /* domainLookupByUUID */
- lxcDomainLookupByName, /* domainLookupByName */
- NULL, /* domainSuspend */
- NULL, /* domainResume */
- lxcDomainShutdown, /* domainShutdown */
- NULL, /* domainReboot */
- lxcDomainDestroy, /* domainDestroy */
- lxcGetOSType, /* domainGetOSType */
- NULL, /* domainGetMaxMemory */
- NULL, /* domainSetMaxMemory */
- NULL, /* domainSetMemory */
- lxcDomainGetInfo, /* domainGetInfo */
- NULL, /* domainSave */
- NULL, /* domainRestore */
- NULL, /* domainCoreDump */
- NULL, /* domainSetVcpus */
- NULL, /* domainPinVcpu */
- NULL, /* domainGetVcpus */
- NULL, /* domainGetMaxVcpus */
- NULL, /* domainGetSecurityLabel */
- NULL, /* nodeGetSecurityModel */
- lxcDomainDumpXML, /* domainDumpXML */
- NULL, /* domainXmlFromNative */
- NULL, /* domainXmlToNative */
- lxcListDefinedDomains, /* listDefinedDomains */
- lxcNumDefinedDomains, /* numOfDefinedDomains */
- lxcDomainStart, /* domainCreate */
- lxcDomainDefine, /* domainDefineXML */
- lxcDomainUndefine, /* domainUndefine */
- NULL, /* domainAttachDevice */
- NULL, /* domainDetachDevice */
- lxcDomainGetAutostart, /* domainGetAutostart */
- lxcDomainSetAutostart, /* domainSetAutostart */
- lxcGetSchedulerType, /* domainGetSchedulerType */
- lxcGetSchedulerParameters, /* domainGetSchedulerParameters */
- lxcSetSchedulerParameters, /* domainSetSchedulerParameters */
- NULL, /* domainMigratePrepare */
- NULL, /* domainMigratePerform */
- NULL, /* domainMigrateFinish */
- NULL, /* domainBlockStats */
- NULL, /* domainInterfaceStats */
- NULL, /* domainBlockPeek */
- NULL, /* domainMemoryPeek */
- nodeGetCellsFreeMemory, /* nodeGetCellsFreeMemory */
- nodeGetFreeMemory, /* getFreeMemory */
- lxcDomainEventRegister, /* domainEventRegister */
- lxcDomainEventDeregister, /* domainEventDeregister */
- NULL, /* domainMigratePrepare2 */
- NULL, /* domainMigrateFinish2 */
- NULL, /* nodeDeviceDettach */
- NULL, /* nodeDeviceReAttach */
- NULL, /* nodeDeviceReset */
-};
-
-static virStateDriver lxcStateDriver = {
- .initialize = lxcStartup,
- .cleanup = lxcShutdown,
- .active = lxcActive,
- .reload = lxcReload,
-};
-
-int lxcRegister(void)
-{
- virRegisterDriver(&lxcDriver);
- virRegisterStateDriver(&lxcStateDriver);
- return 0;
-}
+++ /dev/null
-/*
- * Copyright IBM Corp. 2008
- *
- * lxc_driver.h: header file for linux container driver functions
- *
- * Authors:
- * David L. Leskovec <dlesko at linux.vnet.ibm.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef LXC_DRIVER_H
-#define LXC_DRIVER_H
-
-#include <config.h>
-
-/* Function declarations */
-int lxcRegister(void);
-
-#endif /* LXC_DRIVER_H */
#include "memory.h"
#include "util.h"
#include "bridge.h"
-#include "veth.h"
#define VIR_FROM_THIS VIR_FROM_ONE
+++ /dev/null
-/*
- * veth.c: Tools for managing veth pairs
- *
- * Copyright IBM Corp. 2008
- *
- * See COPYING.LIB for the License of this software
- *
- * Authors:
- * David L. Leskovec <dlesko at linux.vnet.ibm.com>
- */
-
-#include <config.h>
-
-#include <string.h>
-#include <stdio.h>
-
-#include "veth.h"
-#include "internal.h"
-#include "logging.h"
-#include "memory.h"
-#include "util.h"
-
-/* Functions */
-/**
- * getFreeVethName:
- * @veth: name for veth device (NULL to find first open)
- * @maxLen: max length of veth name
- * @startDev: device number to start at (x in vethx)
- *
- * Looks in /sys/class/net/ to find the first available veth device
- * name.
- *
- * Returns 0 on success or -1 in case of error
- */
-static int getFreeVethName(char *veth, int maxLen, int startDev)
-{
- int rc = -1;
- int devNum = startDev-1;
- char path[PATH_MAX];
-
- do {
- ++devNum;
- snprintf(path, PATH_MAX, "/sys/class/net/veth%d/", devNum);
- } while (virFileExists(path));
-
- snprintf(veth, maxLen, "veth%d", devNum);
-
- rc = devNum;
-
- return rc;
-}
-
-/**
- * vethCreate:
- * @veth1: name for one end of veth pair
- * @veth1MaxLen: max length of veth1 name
- * @veth2: name for one end of veth pair
- * @veth2MaxLen: max length of veth1 name
- *
- * Creates a veth device pair using the ip command:
- * ip link add veth1 type veth peer name veth2
- * NOTE: If veth1 and veth2 names are not specified, ip will auto assign
- * names. There seems to be two problems here -
- * 1) There doesn't seem to be a way to determine the names of the
- * devices that it creates. They show up in ip link show and
- * under /sys/class/net/ however there is no guarantee that they
- * are the devices that this process just created.
- * 2) Once one of the veth devices is moved to another namespace, it
- * is no longer visible in the parent namespace. This seems to
- * confuse the name assignment causing it to fail with File exists.
- * Because of these issues, this function currently forces the caller
- * to fully specify the veth device names.
- *
- * Returns 0 on success or -1 in case of error
- */
-int vethCreate(char* veth1, int veth1MaxLen,
- char* veth2, int veth2MaxLen)
-{
- int rc = -1;
- const char *argv[] = {
- "ip", "link", "add", veth1, "type", "veth", "peer", "name", veth2, NULL
- };
- int cmdResult;
- int vethDev = 0;
-
- if ((NULL == veth1) || (NULL == veth2)) {
- goto error_out;
- }
-
- DEBUG("veth1: %s veth2: %s", veth1, veth2);
-
- while ((1 > strlen(veth1)) || STREQ(veth1, veth2)) {
- vethDev = getFreeVethName(veth1, veth1MaxLen, 0);
- ++vethDev;
- DEBUG("assigned veth1: %s", veth1);
- }
-
- while ((1 > strlen(veth2)) || STREQ(veth1, veth2)) {
- vethDev = getFreeVethName(veth2, veth2MaxLen, vethDev);
- ++vethDev;
- DEBUG("assigned veth2: %s", veth2);
- }
-
- DEBUG("veth1: %s veth2: %s", veth1, veth2);
- rc = virRun(NULL, argv, &cmdResult);
-
- if (0 == rc) {
- rc = cmdResult;
- }
-
-error_out:
- return rc;
-}
-
-/**
- * vethDelete:
- * @veth: name for one end of veth pair
- *
- * This will delete both veth devices in a pair. Only one end needs to
- * be specified. The ip command will identify and delete the other veth
- * device as well.
- * ip link del veth
- *
- * Returns 0 on success or -1 in case of error
- */
-int vethDelete(const char *veth)
-{
- int rc = -1;
- const char *argv[] = {"ip", "link", "del", veth, NULL};
- int cmdResult;
-
- if (NULL == veth) {
- goto error_out;
- }
-
- DEBUG("veth: %s", veth);
-
- rc = virRun(NULL, argv, &cmdResult);
-
- if (0 == rc) {
- rc = cmdResult;
- }
-
-error_out:
- return rc;
-}
-
-/**
- * vethInterfaceUpOrDown:
- * @veth: name of veth device
- * @upOrDown: 0 => down, 1 => up
- *
- * Enables a veth device using the ifconfig command. A NULL inetAddress
- * will cause it to be left off the command line.
- *
- * Returns 0 on success or -1 in case of error
- */
-int vethInterfaceUpOrDown(const char* veth, int upOrDown)
-{
- int rc = -1;
- const char *argv[] = {"ifconfig", veth, NULL, NULL};
- int cmdResult;
-
- if (NULL == veth) {
- goto error_out;
- }
-
- if (0 == upOrDown)
- argv[2] = "down";
- else
- argv[2] = "up";
-
- rc = virRun(NULL, argv, &cmdResult);
-
- if (0 == rc) {
- rc = cmdResult;
- }
-
-error_out:
- return rc;
-}
-
-/**
- * moveInterfaceToNetNs:
- * @interface: name of device
- * @pidInNs: PID of process in target net namespace
- *
- * Moves the given device into the target net namespace specified by the given
- * pid using this command:
- * ip link set interface netns pidInNs
- *
- * Returns 0 on success or -1 in case of error
- */
-int moveInterfaceToNetNs(const char* iface, int pidInNs)
-{
- int rc = -1;
- char *pid = NULL;
- const char *argv[] = {
- "ip", "link", "set", iface, "netns", NULL, NULL
- };
- int cmdResult;
-
- if (NULL == iface) {
- goto error_out;
- }
-
- if (virAsprintf(&pid, "%d", pidInNs) == -1)
- goto error_out;
-
- argv[5] = pid;
- rc = virRun(NULL, argv, &cmdResult);
- if (0 == rc)
- rc = cmdResult;
-
-error_out:
- VIR_FREE(pid);
- return rc;
-}
+++ /dev/null
-/*
- * veth.h: Interface to tools for managing veth pairs
- *
- * Copyright IBM Corp. 2008
- *
- * See COPYING.LIB for the License of this software
- *
- * Authors:
- * David L. Leskovec <dlesko at linux.vnet.ibm.com>
- */
-
-#ifndef VETH_H
-#define VETH_H
-
-#include <config.h>
-
-/* Function declarations */
-int vethCreate(char* veth1, int veth1MaxLen, char* veth2,
- int veth2MaxLen);
-int vethDelete(const char* veth);
-int vethInterfaceUpOrDown(const char* veth, int upOrDown);
-int moveInterfaceToNetNs(const char *iface, int pidInNs);
-
-#endif /* VETH_H */