From: Juergen Gross Date: Wed, 2 Dec 2015 07:42:17 +0000 (+0100) Subject: libxc: try to find last used pfn when migrating X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=91e204d37f44913913776d0a89279721694f8b32;p=people%2Fjulieng%2Fxen-unstable.git libxc: try to find last used pfn when migrating For migration the last used pfn of a guest is needed to size the logdirty bitmap and as an upper bound of the page loop. Unfortunately there are pv-kernels advertising a much higher maximum pfn as they are really using in order to support memory hotplug. This will lead to allocation of much more memory in Xen tools during migration as really needed. Try to find the last used guest pfn of a pv-domu by scanning the p2m tree from the last entry towards it's start and search for an entry not being invalid. Normally the mid pages of the p2m tree containing all invalid entries are being reused, so we can just scan the top page for identical entries and skip them but the first one. Signed-off-by: Juergen Gross Reviewed-by: Wei Liu Reviewed-by: Andrew Cooper [ ijc -- added errno = E2BIG to one error path ] Acked-by: Ian Campbell --- diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h index 64f60826f3..9aecde2c74 100644 --- a/tools/libxc/xc_sr_common.h +++ b/tools/libxc/xc_sr_common.h @@ -54,9 +54,11 @@ struct xc_sr_save_ops void **page); /** - * Set up local environment to restore a domain. This is called before - * any records are written to the stream. (Typically querying running - * domain state, setting up mappings etc.) + * Set up local environment to save a domain. (Typically querying + * running domain state, setting up mappings etc.) + * + * This is called once before any common setup has occurred, allowing for + * guest-specific adjustments to be made to common state. */ int (*setup)(struct xc_sr_context *ctx); @@ -121,8 +123,10 @@ struct xc_sr_restore_ops int (*localise_page)(struct xc_sr_context *ctx, uint32_t type, void *page); /** - * Set up local environment to restore a domain. This is called before - * any records are read from the stream. + * Set up local environment to restore a domain. + * + * This is called once before any common setup has occurred, allowing for + * guest-specific adjustments to be made to common state. */ int (*setup)(struct xc_sr_context *ctx); diff --git a/tools/libxc/xc_sr_common_x86_pv.c b/tools/libxc/xc_sr_common_x86_pv.c index eb68c07c9e..f233c8799e 100644 --- a/tools/libxc/xc_sr_common_x86_pv.c +++ b/tools/libxc/xc_sr_common_x86_pv.c @@ -68,8 +68,7 @@ uint64_t mfn_to_cr3(struct xc_sr_context *ctx, xen_pfn_t _mfn) int x86_pv_domain_info(struct xc_sr_context *ctx) { xc_interface *xch = ctx->xch; - unsigned int guest_width, guest_levels, fpp; - xen_pfn_t max_pfn; + unsigned int guest_width, guest_levels; /* Get the domain width */ if ( xc_domain_get_guest_width(xch, ctx->domid, &guest_width) ) @@ -89,25 +88,9 @@ int x86_pv_domain_info(struct xc_sr_context *ctx) } ctx->x86_pv.width = guest_width; ctx->x86_pv.levels = guest_levels; - fpp = PAGE_SIZE / ctx->x86_pv.width; DPRINTF("%d bits, %d levels", guest_width * 8, guest_levels); - /* Get the domain's size */ - if ( xc_domain_maximum_gpfn(xch, ctx->domid, &max_pfn) < 0 ) - { - PERROR("Unable to obtain guests max pfn"); - return -1; - } - - if ( max_pfn > 0 ) - { - ctx->x86_pv.max_pfn = max_pfn; - ctx->x86_pv.p2m_frames = (ctx->x86_pv.max_pfn + fpp) / fpp; - - DPRINTF("max_pfn %#lx, p2m_frames %d", max_pfn, ctx->x86_pv.p2m_frames); - } - return 0; } diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c index 0c12e56802..cefcef5934 100644 --- a/tools/libxc/xc_sr_save.c +++ b/tools/libxc/xc_sr_save.c @@ -677,6 +677,10 @@ static int setup(struct xc_sr_context *ctx) DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap, &ctx->save.dirty_bitmap_hbuf); + rc = ctx->save.ops.setup(ctx); + if ( rc ) + goto err; + dirty_bitmap = xc_hypercall_buffer_alloc_pages( xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->save.p2m_size))); ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE * @@ -692,10 +696,6 @@ static int setup(struct xc_sr_context *ctx) goto err; } - rc = ctx->save.ops.setup(ctx); - if ( rc ) - goto err; - rc = 0; err: @@ -824,7 +824,6 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags, struct save_callbacks* callbacks, int hvm) { - xen_pfn_t nr_pfns; struct xc_sr_context ctx = { .xch = xch, @@ -869,21 +868,6 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, ctx.domid = dom; - if ( xc_domain_nr_gpfns(xch, dom, &nr_pfns) < 0 ) - { - PERROR("Unable to obtain the guest p2m size"); - return -1; - } - - ctx.save.p2m_size = nr_pfns; - - if ( ctx.save.p2m_size > ~XEN_DOMCTL_PFINFO_LTAB_MASK ) - { - errno = E2BIG; - ERROR("Cannot save this big a guest"); - return -1; - } - if ( ctx.dominfo.hvm ) { ctx.save.ops = save_ops_x86_hvm; diff --git a/tools/libxc/xc_sr_save_x86_hvm.c b/tools/libxc/xc_sr_save_x86_hvm.c index cdee7741da..f3d6ceec40 100644 --- a/tools/libxc/xc_sr_save_x86_hvm.c +++ b/tools/libxc/xc_sr_save_x86_hvm.c @@ -135,6 +135,21 @@ static int x86_hvm_normalise_page(struct xc_sr_context *ctx, static int x86_hvm_setup(struct xc_sr_context *ctx) { xc_interface *xch = ctx->xch; + xen_pfn_t nr_pfns; + + if ( xc_domain_nr_gpfns(xch, ctx->domid, &nr_pfns) < 0 ) + { + PERROR("Unable to obtain the guest p2m size"); + return -1; + } + if ( nr_pfns > ~XEN_DOMCTL_PFINFO_LTAB_MASK ) + { + errno = E2BIG; + PERROR("Cannot save this big a guest"); + return -1; + } + + ctx->save.p2m_size = nr_pfns; if ( ctx->save.callbacks->switch_qemu_logdirty( ctx->domid, 1, ctx->save.callbacks->data) ) diff --git a/tools/libxc/xc_sr_save_x86_pv.c b/tools/libxc/xc_sr_save_x86_pv.c index f63f40bf1f..c8d6f0be5b 100644 --- a/tools/libxc/xc_sr_save_x86_pv.c +++ b/tools/libxc/xc_sr_save_x86_pv.c @@ -83,8 +83,8 @@ static int map_p2m(struct xc_sr_context *ctx) */ xc_interface *xch = ctx->xch; int rc = -1; - unsigned x, fpp, fll_entries, fl_entries; - xen_pfn_t fll_mfn; + unsigned x, saved_x, fpp, fll_entries, fl_entries; + xen_pfn_t fll_mfn, saved_mfn, max_pfn; xen_pfn_t *local_fll = NULL; void *guest_fll = NULL; @@ -94,9 +94,15 @@ static int map_p2m(struct xc_sr_context *ctx) void *guest_fl = NULL; size_t local_fl_size; + ctx->x86_pv.max_pfn = GET_FIELD(ctx->x86_pv.shinfo, arch.max_pfn, + ctx->x86_pv.width) - 1; fpp = PAGE_SIZE / ctx->x86_pv.width; fll_entries = (ctx->x86_pv.max_pfn / (fpp * fpp)) + 1; - fl_entries = (ctx->x86_pv.max_pfn / fpp) + 1; + if ( fll_entries > fpp ) + { + ERROR("max_pfn %#lx too large for p2m tree", ctx->x86_pv.max_pfn); + goto err; + } fll_mfn = GET_FIELD(ctx->x86_pv.shinfo, arch.pfn_to_mfn_frame_list_list, ctx->x86_pv.width); @@ -131,6 +137,8 @@ static int map_p2m(struct xc_sr_context *ctx) } /* Check for bad mfns in frame list list. */ + saved_mfn = 0; + saved_x = 0; for ( x = 0; x < fll_entries; ++x ) { if ( local_fll[x] == 0 || local_fll[x] > ctx->x86_pv.max_mfn ) @@ -139,8 +147,35 @@ static int map_p2m(struct xc_sr_context *ctx) local_fll[x], x, fll_entries); goto err; } + if ( local_fll[x] != saved_mfn ) + { + saved_mfn = local_fll[x]; + saved_x = x; + } } + /* + * Check for actual lower max_pfn: + * If the trailing entries of the frame list list were all the same we can + * assume they all reference mid pages all referencing p2m pages with all + * invalid entries. Otherwise there would be multiple pfns referencing all + * the same mfn which can't work across migration, as this sharing would be + * broken by the migration process. + * Adjust max_pfn if possible to avoid allocating much larger areas as + * needed for p2m and logdirty map. + */ + max_pfn = (saved_x + 1) * fpp * fpp - 1; + if ( max_pfn < ctx->x86_pv.max_pfn ) + { + ctx->x86_pv.max_pfn = max_pfn; + fll_entries = (ctx->x86_pv.max_pfn / (fpp * fpp)) + 1; + } + ctx->x86_pv.p2m_frames = (ctx->x86_pv.max_pfn + fpp) / fpp; + DPRINTF("max_pfn %#lx, p2m_frames %d", ctx->x86_pv.max_pfn, + ctx->x86_pv.p2m_frames); + ctx->save.p2m_size = ctx->x86_pv.max_pfn + 1; + fl_entries = (ctx->x86_pv.max_pfn / fpp) + 1; + /* Map the guest mid p2m frames. */ guest_fl = xc_map_foreign_pages(xch, ctx->domid, PROT_READ, local_fll, fll_entries);