]> xenbits.xensource.com Git - libvirt.git/commitdiff
vircgroup: introduce virCgroupV2DevicesAttachProg
authorPavel Hrdina <phrdina@redhat.com>
Mon, 24 Jun 2019 12:15:31 +0000 (14:15 +0200)
committerPavel Hrdina <phrdina@redhat.com>
Fri, 15 Nov 2019 11:58:05 +0000 (12:58 +0100)
This function loads the BPF prog with prepared map into kernel and
attaches it into guest cgroup.  It can be also used to replace existing
program in the cgroup if we need to resize BPF map to store more rules
for devices. The old program will be closed and removed from kernel.

There are two possible ways how to create BPF program:

    - One way is to write simple C-like code which can by compiled into
      BPF object file which can be loaded into kernel using elfutils.

    - The second way is to define macros which look like assembler
      instructions and can be used directly to create BPF program that
      can be directly loaded into kernel.

Since the program is not too complex we can use the second option.

If there is no program, all devices are allowed, if there is some
program it is executed and based on the exit status the access is
denied for 0 and allowed for 1.

Our program will follow these rules:

    - first it will try to look for the specific key using major and
      minor to see if there is any rule for that specific device

    - if there is no specific rule it will try to look for any rule that
      matches only major of the device

    - if there is no match with major it will try the same but with
      minor of the device

    - as the last attempt it will try to look for rule for all devices
      and if there is no match it will return 0 to deny that access

Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
Reviewed-by: Ján Tomko <jtomko@redhat.com>
po/POTFILES.in
src/libvirt_private.syms
src/util/vircgrouppriv.h
src/util/vircgroupv2devices.c
src/util/vircgroupv2devices.h

index 984ec36c0f781baf80c6197161ab7088f836ee48..f93fb9694d6597c0235e00df0b77134975ec1b21 100644 (file)
 @SRCDIR@/src/util/vircgroupbackend.h
 @SRCDIR@/src/util/vircgroupv1.c
 @SRCDIR@/src/util/vircgroupv2.c
+@SRCDIR@/src/util/vircgroupv2devices.c
 @SRCDIR@/src/util/virclosecallbacks.c
 @SRCDIR@/src/util/vircommand.c
 @SRCDIR@/src/util/virconf.c
index c93327791885a3221d236ba5cec81de210d3bc89..975733f71ef047975908a3dba2d38a7e4b2b8f95 100644 (file)
@@ -1717,6 +1717,7 @@ virCgroupV1Register;
 virCgroupV2Register;
 
 # util/vircgroupv2devices.h
+virCgroupV2DevicesAttachProg;
 virCgroupV2DevicesAvailable;
 
 # util/virclosecallbacks.h
index 334095719e0d5a08972fb19d105b6aa71dc917bf..f2a80aeb82d2de7e53355df9e620e103c95b7eb5 100644 (file)
@@ -41,10 +41,20 @@ struct _virCgroupV1Controller {
 typedef struct _virCgroupV1Controller virCgroupV1Controller;
 typedef virCgroupV1Controller *virCgroupV1ControllerPtr;
 
+struct _virCgroupV2Devices {
+    int mapfd;
+    int progfd;
+    ssize_t count;
+    ssize_t max;
+};
+typedef struct _virCgroupV2Devices virCgroupV2Devices;
+typedef virCgroupV2Devices *virCgroupV2DevicesPtr;
+
 struct _virCgroupV2Controller {
     int controllers;
     char *mountPoint;
     char *placement;
+    virCgroupV2Devices devices;
 };
 typedef struct _virCgroupV2Controller virCgroupV2Controller;
 typedef virCgroupV2Controller *virCgroupV2ControllerPtr;
index 86416458100de64a068ac79e836c244c501d1104..c30a23f1659aad40033fd3eb753643978f7649f5 100644 (file)
@@ -30,6 +30,7 @@
 #define LIBVIRT_VIRCGROUPPRIV_H_ALLOW
 #include "vircgrouppriv.h"
 
+#include "viralloc.h"
 #include "virbpf.h"
 #include "vircgroup.h"
 #include "vircgroupv2devices.h"
@@ -60,10 +61,283 @@ virCgroupV2DevicesAvailable(virCgroupPtr group)
 
     return true;
 }
+
+
+/* Steps to get assembly version of devices BPF program:
+ *
+ * Save the following program into bpfprog.c, compile it using clang:
+ *
+ *     clang -O2 -Wall -target bpf -c bpfprog.c -o bpfprog.o
+ *
+ * Now you can use llvm-objdump to get the list if instructions:
+ *
+ *     llvm-objdump -S -no-show-raw-insn bpfprog.o
+ *
+ * which can be converted into program using VIR_BPF_* macros.
+ *
+ * ----------------------------------------------------------------------------
+ * #include <linux/bpf.h>
+ * #include <linux/version.h>
+ *
+ * #define SEC(NAME) __attribute__((section(NAME), used))
+ *
+ * struct bpf_map_def {
+ *     unsigned int type;
+ *     unsigned int key_size;
+ *     unsigned int value_size;
+ *     unsigned int max_entries;
+ *     unsigned int map_flags;
+ *     unsigned int inner_map_idx;
+ *     unsigned int numa_node;
+ * };
+ *
+ * static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+ *     (void *) BPF_FUNC_map_lookup_elem;
+ *
+ * struct bpf_map_def SEC("maps") devices = {
+ *     .type = BPF_MAP_TYPE_HASH,
+ *     .key_size = sizeof(__u64),
+ *     .value_size = sizeof(__u32),
+ *     .max_entries = 65,
+ * };
+ *
+ * SEC("cgroup/dev") int
+ * bpf_libvirt_cgroup_device(struct bpf_cgroup_dev_ctx *ctx)
+ * {
+ *     __u64 key = ((__u64)ctx->major << 32) | ctx->minor;
+ *     __u32 *val = 0;
+ *
+ *     val = bpf_map_lookup_elem(&devices, &key);
+ *     if (val && (ctx->access_type & *val) == ctx->access_type)
+ *         return 1;
+ *
+ *     key = ((__u64)ctx->major << 32) | 0xffffffff;
+ *     val = bpf_map_lookup_elem(&devices, &key);
+ *     if (val && (ctx->access_type & *val) == ctx->access_type)
+ *         return 1;
+ *
+ *     key = 0xffffffff00000000 | ctx->minor;
+ *     val = bpf_map_lookup_elem(&devices, &key);
+ *     if (val && (ctx->access_type & *val) == ctx->access_type)
+ *         return 1;
+ *
+ *     key = 0xffffffffffffffff;
+ *     val = bpf_map_lookup_elem(&devices, &key);
+ *     if (val && (ctx->access_type & *val) == ctx->access_type)
+ *         return 1;
+ *
+ *     return 0;
+ * }
+ *
+ * char _license[] SEC("license") = "GPL";
+ * __u32 _version SEC("version") = LINUX_VERSION_CODE;
+ * ----------------------------------------------------------------------------
+ * */
+static int
+virCgroupV2DevicesLoadProg(int mapfd)
+{
+    struct bpf_insn prog[] = {
+        /*  0:  r6 = r1 */
+        VIR_BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+        /*  1:  r1 = *(u32 *)(r6 + 8) */
+        VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 8),
+        /*  2:  r2 = *(u32 *)(r6 + 4) */
+        VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 4),
+        /*  3:  r2 <<= 32 */
+        VIR_BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
+        /*  4:  r2 |= r1 */
+        VIR_BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
+        /*  5:  *(u64 *)(r10 - 8) = r2 */
+        VIR_BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),
+        /*  6:  r2 = r10 */
+        VIR_BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+        /*  7:  r2 += -8 */
+        VIR_BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+        /*  8:  r1 = 0 ll */
+        VIR_BPF_LD_MAP_FD(BPF_REG_1, mapfd),
+        /* 10:  call 1 */
+        VIR_BPF_CALL_INSN(BPF_FUNC_map_lookup_elem),
+        /* 11:  r1 = r0 */
+        VIR_BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+        /* 12:  if r1 == 0 goto +5 <LBB0_2> */
+        VIR_BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
+        /* 13:  r0 = 1 */
+        VIR_BPF_MOV64_IMM(BPF_REG_0, 1),
+        /* 14:  r2 = *(u32 *)(r6 + 0) */
+        VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 0),
+        /* 15:  r1 = *(u32 *)(r1 + 0) */
+        VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+        /* 16:  r1 &= r2 */
+        VIR_BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
+        /* 17:  if r1 == r2 goto +50 <LBB0_9> */
+        VIR_BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 50),
+        /* LBB0_2: */
+        /* 18:  r1 = *(u32 *)(r6 + 4) */
+        VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 4),
+        /* 19:  r1 <<= 32 */
+        VIR_BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+        /* 20:  r2 = 4294967295 ll */
+        VIR_BPF_LD_IMM64(BPF_REG_2, 0xffffffff),
+        /* 22:  r1 |= r2 */
+        VIR_BPF_ALU64_REG(BPF_OR, BPF_REG_1, BPF_REG_2),
+        /* 23:  *(u64 *)(r10 - 8) = r1 */
+        VIR_BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+        /* 24:  r2 = r10 */
+        VIR_BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+        /* 25:  r2 += -8 */
+        VIR_BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+        /* 26:  r1 = 0 ll */
+        VIR_BPF_LD_MAP_FD(BPF_REG_1, mapfd),
+        /* 28:  call 1 */
+        VIR_BPF_CALL_INSN(BPF_FUNC_map_lookup_elem),
+        /* 29:  r1 = r0 */
+        VIR_BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+        /* 30:  if r1 == 0 goto +5 <LBB0_4> */
+        VIR_BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
+        /* 31:  r0 = 1 */
+        VIR_BPF_MOV64_IMM(BPF_REG_0, 1),
+        /* 32:  r2 = *(u32 *)(r6 + 0) */
+        VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 0),
+        /* 33:  r1 = *(u32 *)(r1 + 0) */
+        VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+        /* 34:  r1 &= r2 */
+        VIR_BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
+        /* 35:  if r1 == r2 goto +32 <LBB0_9> */
+        VIR_BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 32),
+        /* LBB0_4: */
+        /* 36:  r1 = *(u32 *)(r6 + 8) */
+        VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 8),
+        /* 37:  r2 = -4294967296 ll */
+        VIR_BPF_LD_IMM64(BPF_REG_2, 0xffffffff00000000),
+        /* 39:  r1 |= r2 */
+        VIR_BPF_ALU64_REG(BPF_OR, BPF_REG_1, BPF_REG_2),
+        /* 40:  *(u64 *)(r10 - 8) = r1 */
+        VIR_BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+        /* 41:  r2 = r10 */
+        VIR_BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+        /* 42:  r2 += -8 */
+        VIR_BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+        /* 43:  r1 = 0 ll */
+        VIR_BPF_LD_MAP_FD(BPF_REG_1, mapfd),
+        /* 45:  call 1 */
+        VIR_BPF_CALL_INSN(BPF_FUNC_map_lookup_elem),
+        /* 46:  r1 = r0 */
+        VIR_BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+        /* 47:  if r1 == 0 goto +5 <LBB0_6> */
+        VIR_BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
+        /* 48:  r0 = 1 */
+        VIR_BPF_MOV64_IMM(BPF_REG_0, 1),
+        /* 49:  r2 = *(u32 *)(r6 + 0) */
+        VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 0),
+        /* 50:  r1 = *(u32 *)(r1 + 0) */
+        VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+        /* 51:  r1 &= r2 */
+        VIR_BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
+        /* 52:  if r1 == r2 goto +15 <LBB0_9> */
+        VIR_BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 15),
+        /* LBB0_6: */
+        /* 53:  r1 = -1 */
+        VIR_BPF_MOV64_IMM(BPF_REG_1, -1),
+        /* 54:  *(u64 *)(r10 - 8) = r1 */
+        VIR_BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+        /* 55:  r2 = r10 */
+        VIR_BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+        /* 56:  r2 += -8 */
+        VIR_BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+        /* 57:  r1 = 0 ll */
+        VIR_BPF_LD_MAP_FD(BPF_REG_1, mapfd),
+        /* 59:  call 1 */
+        VIR_BPF_CALL_INSN(BPF_FUNC_map_lookup_elem),
+        /* 60:  r1 = r0 */
+        VIR_BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+        /* 61:  if r1 == 0 goto +5 <LBB0_8> */
+        VIR_BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
+        /* 62:  r0 = 1 */
+        VIR_BPF_MOV64_IMM(BPF_REG_0, 1),
+        /* 63:  r2 = *(u32 *)(r6 + 0) */
+        VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 0),
+        /* 64:  r1 = *(u32 *)(r1 + 0) */
+        VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+        /* 65:  r1 &= r2 */
+        VIR_BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
+        /* 66:  if r1 == r2 goto +1 <LBB0_9> */
+        VIR_BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+        /* LBB0_8: */
+        /* 67:  r0 = 0 */
+        VIR_BPF_MOV64_IMM(BPF_REG_0, 0),
+        /* LBB0_9: */
+        /* 68:  exit */
+        VIR_BPF_EXIT_INSN(),
+    };
+
+    return virBPFLoadProg(prog, BPF_PROG_TYPE_CGROUP_DEVICE, G_N_ELEMENTS(prog));
+}
+
+
+int
+virCgroupV2DevicesAttachProg(virCgroupPtr group,
+                             int mapfd,
+                             size_t max)
+{
+    int ret = -1;
+    VIR_AUTOCLOSE progfd = -1;
+    VIR_AUTOCLOSE cgroupfd = -1;
+    g_autofree char *path = NULL;
+
+    if (virCgroupPathOfController(group, VIR_CGROUP_CONTROLLER_DEVICES,
+                                  NULL, &path) < 0) {
+        goto cleanup;
+    }
+
+    progfd = virCgroupV2DevicesLoadProg(mapfd);
+    if (progfd < 0) {
+        virReportSystemError(errno, "%s", _("failed to load cgroup BPF prog"));
+        goto cleanup;
+    }
+
+    cgroupfd = open(path, O_RDONLY);
+    if (cgroupfd < 0) {
+        virReportSystemError(errno, _("unable to open '%s'"), path);
+        goto cleanup;
+    }
+
+    if (virBPFAttachProg(progfd, cgroupfd, BPF_CGROUP_DEVICE) < 0) {
+        virReportSystemError(errno, "%s", _("failed to attach cgroup BPF prog"));
+        goto cleanup;
+    }
+
+    if (group->unified.devices.progfd > 0) {
+        VIR_DEBUG("Closing existing program that was replaced by new one.");
+        VIR_FORCE_CLOSE(group->unified.devices.progfd);
+    }
+
+    group->unified.devices.progfd = progfd;
+    group->unified.devices.mapfd = mapfd;
+    group->unified.devices.max = max;
+    progfd = -1;
+    mapfd = -1;
+
+    ret = 0;
+ cleanup:
+    VIR_FORCE_CLOSE(mapfd);
+    return ret;
+}
 #else /* !HAVE_DECL_BPF_CGROUP_DEVICE */
 bool
 virCgroupV2DevicesAvailable(virCgroupPtr group G_GNUC_UNUSED)
 {
     return false;
 }
+
+
+int
+virCgroupV2DevicesAttachProg(virCgroupPtr group G_GNUC_UNUSED,
+                             int mapfd G_GNUC_UNUSED,
+                             size_t max G_GNUC_UNUSED)
+{
+    virReportSystemError(ENOSYS, "%s",
+                         _("cgroups v2 BPF devices not supported "
+                           "with this kernel"));
+    return -1;
+}
 #endif /* !HAVE_DECL_BPF_CGROUP_DEVICE */
index 2448a8890fa1208855dc53f6493c675fa4325698..57454e80af1c4250e89f6a7c15f0103999e472cf 100644 (file)
@@ -22,3 +22,8 @@
 
 bool
 virCgroupV2DevicesAvailable(virCgroupPtr group);
+
+int
+virCgroupV2DevicesAttachProg(virCgroupPtr group,
+                             int mapfd,
+                             size_t max);