]> xenbits.xensource.com Git - libvirt.git/commitdiff
Allow use of block devices for guest filesystem
authorDaniel P. Berrange <berrange@redhat.com>
Fri, 22 Jul 2011 12:02:51 +0000 (13:02 +0100)
committerDaniel P. Berrange <berrange@redhat.com>
Mon, 8 Aug 2011 10:38:05 +0000 (11:38 +0100)
Currently the LXC driver can only populate filesystems from
host filesystems, using bind mounts. This patch allows host
block devices to be mounted. It autodetects the filesystem
format at mount time, and adds the block device to the cgroups
ACL. Example usage is

    <filesystem type='block' accessmode='passthrough'>
      <source dev='/dev/sda1'/>
      <target dir='/home'/>
    </filesystem>

* src/lxc/lxc_container.c: Mount block device filesystems
* src/lxc/lxc_controller.c: Add block device filesystems
  to cgroups ACL

src/lxc/lxc_container.c
src/lxc/lxc_controller.c

index 52ef35149ba4d3841f2e6d972caab04c9cc3fde3..4e84344e2ed74bb3633178e4e17702e1c8ca1064 100644 (file)
@@ -35,6 +35,7 @@
 #include <sys/stat.h>
 #include <unistd.h>
 #include <mntent.h>
+#include <dirent.h>
 
 /* Yes, we want linux private one, for _syscall2() macro */
 #include <linux/unistd.h>
@@ -608,12 +609,184 @@ static int lxcContainerMountFSBind(virDomainFSDefPtr fs,
             virReportSystemError(errno,
                                  _("Failed to make directory %s readonly"),
                                  fs->dst);
+        }
+    }
+
+    ret = 0;
+
+cleanup:
+    VIR_FREE(src);
+    return ret;
+}
+
+
+
+/*
+ * This functions attempts to do automatic detection of filesystem
+ * type following the same rules as the util-linux 'mount' binary.
+ *
+ * The main difference is that we don't (currently) try to use
+ * libblkid to detect the format first. We go straight to using
+ * /etc/filesystems, and then /proc/filesystems
+ */
+static int lxcContainerMountFSBlockAuto(virDomainFSDefPtr fs,
+                                        int fsflags,
+                                        const char *src,
+                                        const char *srcprefix)
+{
+    FILE *fp = NULL;
+    int ret = -1;
+    bool tryProc = false;
+    bool gotStar = false;
+    char *fslist = NULL;
+    char *line = NULL;
+    const char *type;
+
+    VIR_DEBUG("src=%s srcprefix=%s dst=%s", src, srcprefix, fs->dst);
+
+    /* First time around we use /etc/filesystems */
+retry:
+    if (virAsprintf(&fslist, "%s%s",
+                    srcprefix, tryProc ? "/proc/filesystems" : "/etc/filesystems") < 0) {
+        virReportOOMError();
+        goto cleanup;
+    }
+
+    VIR_DEBUG("Open fslist %s", fslist);
+    if (!(fp = fopen(fslist, "r"))) {
+        /* If /etc/filesystems does not exist, then we need to retry
+         * with /proc/filesystems next
+         */
+        if (errno == ENOENT &&
+            !tryProc) {
+            tryProc = true;
+            VIR_FREE(fslist);
+            goto retry;
+        }
+
+        virReportSystemError(errno,
+                             _("Unable to read %s"),
+                             fslist);
+        goto cleanup;
+    }
+
+    while (!feof(fp)) {
+        size_t n;
+        VIR_FREE(line);
+        if (getline(&line, &n, fp) <= 0) {
+            if (feof(fp))
+                break;
+
             goto cleanup;
         }
 
+        if (strstr(line, "nodev"))
+            continue;
+
+        type = strchr(line, '\n');
+        if (type)
+            line[type-line] = '\0';
+
+        type = line;
+        virSkipSpaces(&type);
+
+        /*
+         * /etc/filesystems is only allowed to contain '*' on the last line
+         */
+        if (gotStar) {
+            lxcError(VIR_ERR_INTERNAL_ERROR,
+                     _("%s has unexpected '*' before last line"),
+                     fslist);
+            goto cleanup;
+        }
+
+        /* An '*' on the last line in /etc/filesystems
+         * means try /proc/filesystems next. We don't
+         * jump immediately though, since we need to see
+         * if any more lines follow
+         */
+        if (!tryProc &&
+            STREQ(type, "*"))
+            gotStar = true;
+
+        VIR_DEBUG("Trying mount %s with %s", src, type);
+        if (mount(src, fs->dst, type, fsflags, NULL) < 0) {
+            /* These errnos indicate a bogus filesystem type for
+             * the image we have, so skip to the next type
+             */
+            if (errno == EINVAL || errno == ENODEV)
+                continue;
+
+            virReportSystemError(errno,
+                                 _("Failed to bind mount directory %s to %s"),
+                                 src, fs->dst);
+            goto cleanup;
+        }
+
+        ret = 0;
+        break;
     }
 
-    ret = 0;
+    /* We've got to the end of /etc/filesystems and saw
+     * a '*', so we must try /proc/filesystems next
+     */
+    if (ret != 0 &&
+        !tryProc &&
+        gotStar) {
+        tryProc = true;
+        VIR_FREE(fslist);
+        VIR_FORCE_FCLOSE(fp);
+        goto retry;
+    }
+
+    VIR_DEBUG("Done mounting filesystem ret=%d tryProc=%d", ret, tryProc);
+
+cleanup:
+    VIR_FREE(line);
+    VIR_FORCE_FCLOSE(fp);
+    return ret;
+}
+
+
+/*
+ * Mount a block device 'src' on fs->dst, automatically
+ * probing for filesystem type
+ */
+static int lxcContainerMountFSBlockHelper(virDomainFSDefPtr fs,
+                                          const char *src,
+                                          const char *srcprefix)
+{
+    int fsflags = 0;
+    int ret = -1;
+    if (fs->readonly)
+        fsflags |= MS_RDONLY;
+
+    if (virFileMakePath(fs->dst) < 0) {
+        virReportSystemError(errno,
+                             _("Failed to create %s"),
+                             fs->dst);
+        goto cleanup;
+    }
+
+    ret = lxcContainerMountFSBlockAuto(fs, fsflags, src, srcprefix);
+
+cleanup:
+    return ret;
+}
+
+
+static int lxcContainerMountFSBlock(virDomainFSDefPtr fs,
+                                    const char *srcprefix)
+{
+    char *src = NULL;
+    int ret = -1;
+
+    if (virAsprintf(&src, "%s%s", srcprefix, fs->src) < 0) {
+        virReportOOMError();
+        goto cleanup;
+    }
+
+    ret = lxcContainerMountFSBlockHelper(fs, src, srcprefix);
 
     VIR_DEBUG("Done mounting filesystem ret=%d", ret);
 
@@ -631,6 +804,10 @@ static int lxcContainerMountFS(virDomainFSDefPtr fs,
         if (lxcContainerMountFSBind(fs, srcprefix) < 0)
             return -1;
         break;
+    case VIR_DOMAIN_FS_TYPE_BLOCK:
+        if (lxcContainerMountFSBlock(fs, srcprefix) < 0)
+            return -1;
+        break;
     default:
         lxcError(VIR_ERR_CONFIG_UNSUPPORTED,
                  _("Cannot mount filesystem type %s"),
index ff42aa569a7f6aac146900300584bb40032f10f0..bc5ee2524dc33cd93e812f661206d680cfc87ea0 100644 (file)
@@ -188,6 +188,23 @@ static int lxcSetContainerResources(virDomainDefPtr def)
         }
     }
 
+    for (i = 0 ; i < def->nfss ; i++) {
+        if (def->fss[i]->type != VIR_DOMAIN_FS_TYPE_BLOCK)
+            continue;
+
+        rc = virCgroupAllowDevicePath(cgroup,
+                                      def->fss[i]->src,
+                                      def->fss[i]->readonly ?
+                                      VIR_CGROUP_DEVICE_READ :
+                                      VIR_CGROUP_DEVICE_RW);
+        if (rc != 0) {
+            virReportSystemError(-rc,
+                                 _("Unable to allow device %s for domain %s"),
+                                 def->fss[i]->src, def->name);
+            goto cleanup;
+        }
+    }
+
     rc = virCgroupAllowDeviceMajor(cgroup, 'c', LXC_DEV_MAJ_PTY,
                                    VIR_CGROUP_DEVICE_RWM);
     if (rc != 0) {