From 9b93317af2da195c57ca5aafe10b4edbe282ba6f Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Mon, 16 Mar 2015 09:52:32 +0000 Subject: [PATCH] libxl: build, check and pass vNUMA info to Xen for HVM guest Transform user supplied vNUMA configuration into libxl internal representations then libxc representations. Check validity along the line. Libxc has more involvement in building vmemranges in HVM case compared to PV case. The building of vmemranges is placed after xc_hvm_build returns, because it relies on memory hole information provided by xc_hvm_build. Signed-off-by: Wei Liu Reviewed-by: Dario Faggioli Cc: Ian Campbell Cc: Ian Jackson Cc: Dario Faggioli Cc: Elena Ufimtseva Acked-by: Ian Campbell --- tools/libxl/libxl_create.c | 9 ++++++ tools/libxl/libxl_dom.c | 43 +++++++++++++++++++++++++++ tools/libxl/libxl_internal.h | 5 ++++ tools/libxl/libxl_vnuma.c | 56 ++++++++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+) diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index 98687bdb52..af04248f80 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -853,6 +853,15 @@ static void initiate_domain_create(libxl__egc *egc, goto error_out; } + /* Disallow PoD and vNUMA to be enabled at the same time because PoD + * pool is not vNUMA-aware yet. + */ + if (pod_enabled && d_config->b_info.num_vnuma_nodes) { + ret = ERROR_INVAL; + LOG(ERROR, "Cannot enable PoD and vNUMA at the same time"); + goto error_out; + } + ret = libxl__domain_create_info_setdefault(gc, &d_config->c_info); if (ret) goto error_out; diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c index 727d2f2e79..ace8a66f51 100644 --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -893,12 +893,55 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid, goto out; } + if (info->num_vnuma_nodes != 0) { + int i; + + args.nr_vmemranges = state->num_vmemranges; + args.vmemranges = libxl__malloc(gc, sizeof(*args.vmemranges) * + args.nr_vmemranges); + + for (i = 0; i < args.nr_vmemranges; i++) { + args.vmemranges[i].start = state->vmemranges[i].start; + args.vmemranges[i].end = state->vmemranges[i].end; + args.vmemranges[i].flags = state->vmemranges[i].flags; + args.vmemranges[i].nid = state->vmemranges[i].nid; + } + + /* Consider video ram belongs to vmemrange 0 -- just shrink it + * by the size of video ram. + */ + if (((args.vmemranges[0].end - args.vmemranges[0].start) >> 10) + < info->video_memkb) { + LOG(ERROR, "vmemrange 0 too small to contain video ram"); + goto out; + } + + args.vmemranges[0].end -= (info->video_memkb << 10); + + args.nr_vnodes = info->num_vnuma_nodes; + args.vnode_to_pnode = libxl__malloc(gc, sizeof(*args.vnode_to_pnode) * + args.nr_vnodes); + for (i = 0; i < args.nr_vnodes; i++) + args.vnode_to_pnode[i] = info->vnuma_nodes[i].pnode; + } + ret = xc_hvm_build(ctx->xch, domid, &args); if (ret) { LOGEV(ERROR, ret, "hvm building failed"); goto out; } + if (info->num_vnuma_nodes != 0) { + ret = libxl__vnuma_build_vmemrange_hvm(gc, domid, info, state, &args); + if (ret) { + LOGEV(ERROR, ret, "hvm build vmemranges failed"); + goto out; + } + ret = libxl__vnuma_config_check(gc, info, state); + if (ret) goto out; + ret = set_vnuma_info(gc, domid, info, state); + if (ret) goto out; + } ret = hvm_build_set_params(ctx->xch, domid, info, state->store_port, &state->store_mfn, state->console_port, &state->console_mfn, state->store_domid, diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 4a2f9ed851..202cea175c 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -3429,6 +3429,11 @@ int libxl__vnuma_build_vmemrange_pv(libxl__gc *gc, uint32_t domid, libxl_domain_build_info *b_info, libxl__domain_build_state *state); +int libxl__vnuma_build_vmemrange_hvm(libxl__gc *gc, + uint32_t domid, + libxl_domain_build_info *b_info, + libxl__domain_build_state *state, + struct xc_hvm_build_args *args); _hidden int libxl__ms_vm_genid_set(libxl__gc *gc, uint32_t domid, const libxl_ms_vm_genid *id); diff --git a/tools/libxl/libxl_vnuma.c b/tools/libxl/libxl_vnuma.c index bef3cc548e..72339f74ac 100644 --- a/tools/libxl/libxl_vnuma.c +++ b/tools/libxl/libxl_vnuma.c @@ -182,6 +182,62 @@ int libxl__vnuma_build_vmemrange_pv(libxl__gc *gc, return libxl__arch_vnuma_build_vmemrange(gc, domid, b_info, state); } +/* Build vmemranges for HVM guest */ +int libxl__vnuma_build_vmemrange_hvm(libxl__gc *gc, + uint32_t domid, + libxl_domain_build_info *b_info, + libxl__domain_build_state *state, + struct xc_hvm_build_args *args) +{ + uint64_t hole_start, hole_end, next; + int nid, nr_vmemrange; + xen_vmemrange_t *vmemranges; + + /* Derive vmemranges from vnode size and memory hole. + * + * Guest physical address space layout: + * [0, hole_start) [hole_start, hole_end) [hole_end, highmem_end) + */ + hole_start = args->lowmem_end < args->mmio_start ? + args->lowmem_end : args->mmio_start; + hole_end = (args->mmio_start + args->mmio_size) > (1ULL << 32) ? + (args->mmio_start + args->mmio_size) : (1ULL << 32); + + assert(state->vmemranges == NULL); + + next = 0; + nr_vmemrange = 0; + vmemranges = NULL; + for (nid = 0; nid < b_info->num_vnuma_nodes; nid++) { + libxl_vnode_info *p = &b_info->vnuma_nodes[nid]; + uint64_t remaining_bytes = p->memkb << 10; + + while (remaining_bytes > 0) { + uint64_t count = remaining_bytes; + + if (next >= hole_start && next < hole_end) + next = hole_end; + if ((next < hole_start) && (next + remaining_bytes >= hole_start)) + count = hole_start - next; + + GCREALLOC_ARRAY(vmemranges, nr_vmemrange+1); + vmemranges[nr_vmemrange].start = next; + vmemranges[nr_vmemrange].end = next + count; + vmemranges[nr_vmemrange].flags = 0; + vmemranges[nr_vmemrange].nid = nid; + + nr_vmemrange++; + remaining_bytes -= count; + next += count; + } + } + + state->vmemranges = vmemranges; + state->num_vmemranges = nr_vmemrange; + + return 0; +} + /* * Local variables: * mode: C -- 2.39.5