From 8cc7129f15e3367bddf0645bf8b5b314e275e3e8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 16 Jan 2020 15:14:50 +0100 Subject: [PATCH] x86/setup: detect live update breadcrumb at boot and map data stream The breadcrumb is written to the first page of the reserved bootmem, as the last instructions processed by kexec_reloc(). Check for it there and follow it to the migration data stream. Mark the pages of the sglist and the individual data stream PGC_allocated so that init_heap_pages() won't touch them. Also make lu_stream_free() remove the PGC_allocated flag. That function is already used for the cleanup in the error case on kexec but the flag should never be set in that case so clearing it will be a no-op. Other pages which are handed across live update will also need to have their PGC_allocated flag removed as part of the "rehabilitation" as they are introduced directly into domains, etc. They will remain in state PGC_state_uninitialised so that when they are eventually returned to the heap, init_heap_pages() will process them correctly and create node structures as appropriate, etc. Note: we can't use PGC_state_inuse for this as we do want those pages to be processed by init_heap_pages() if/when they are subsequently freed. Signed-off-by: David Woodhouse --- xen/arch/x86/setup.c | 41 +++++++++++++++++++++++++++++++++++-- xen/common/lu/stream.c | 45 ++++++++++++++++++++++++++++++++++++++++- xen/common/page_alloc.c | 9 ++++++--- xen/include/xen/lu.h | 3 +++ 4 files changed, 92 insertions(+), 6 deletions(-) diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index fa51c56f30..7a61b7ce0a 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -769,6 +770,7 @@ static struct domain * __init create_dom0(const module_t *image, #define PREBUILT_MAP_LIMIT (1 << L2_PAGETABLE_SHIFT) unsigned long lu_bootmem_start, lu_bootmem_size; +static unsigned long lu_breadcrumb_phys; #ifdef CONFIG_LIVE_UPDATE static int __init parse_liveupdate(const char *str) @@ -800,6 +802,11 @@ static int __init parse_liveupdate(const char *str) printk(XENLOG_INFO "Live update area 0x%lx-0x%lx (0x%lx)\n", lu_bootmem_start, lu_bootmem_start + lu_bootmem_size, lu_bootmem_size); + /* + * If present, the breadcrumb leading to the migration data stream is + * in the very beginning of the reserved bootmem region. + */ + lu_breadcrumb_phys = lu_bootmem_start; return 0; } custom_param("liveupdate", parse_liveupdate); @@ -823,6 +830,8 @@ void __init noreturn __start_xen(unsigned long mbi_p) .stop_bits = 1 }; const char *hypervisor_name; + uint64_t lu_mfnlist_phys = 0, lu_nr_pages = 0; + struct lu_stream lu_stream; /* Critical region without IDT or TSS. Any fault is deadly! */ @@ -967,9 +976,16 @@ void __init noreturn __start_xen(unsigned long mbi_p) printk(" Found %d EDD information structures\n", bootsym(boot_edd_info_nr)); - /* Check that we have at least one Multiboot module. */ if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) ) - panic("dom0 kernel not specified. Check bootloader configuration\n"); + { + if ( !lu_breadcrumb_phys ) + panic("dom0 kernel not specified. Check bootloader configuration\n"); + } + else + { + /* If modules are provided, don't even look for live update data. */ + lu_breadcrumb_phys = 0; + } /* Check that we don't have a silly number of modules. */ if ( mbi->mods_count > sizeof(module_map) * 8 ) @@ -1419,6 +1435,22 @@ void __init noreturn __start_xen(unsigned long mbi_p) if ( !xen_phys_start ) panic("Not enough memory to relocate Xen\n"); + /* Check for the state breadcrumb before giving it to the boot allocator */ + if ( IS_ENABLED(CONFIG_LIVE_UPDATE) && lu_breadcrumb_phys ) + { + uint64_t *breadcrumb = maddr_to_virt(lu_breadcrumb_phys); + + lu_mfnlist_phys = breadcrumb[1]; + lu_nr_pages = breadcrumb[2] >> PAGE_SHIFT; + + if ( breadcrumb[0] == LIVE_UPDATE_MAGIC && lu_nr_pages) { + printk("%ld pages of live update data at 0x%lx\n", lu_nr_pages, lu_mfnlist_phys); + } else { + panic("Live update breadcrumb not found: %lx %lx %lx at %lx\n", + breadcrumb[0], breadcrumb[1], breadcrumb[2], lu_breadcrumb_phys); + } + } + if ( lu_bootmem_start ) { if ( !lu_reserved ) @@ -1668,6 +1700,11 @@ void __init noreturn __start_xen(unsigned long mbi_p) numa_initmem_init(0, raw_max_page); + if ( IS_ENABLED(CONFIG_LIVE_UPDATE) && lu_nr_pages ) + { + lu_stream_map(&lu_stream, lu_mfnlist_phys, lu_nr_pages); + } + if ( lu_bootmem_start ) { unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1); diff --git a/xen/common/lu/stream.c b/xen/common/lu/stream.c index 10e123a466..5318b7fe7e 100644 --- a/xen/common/lu/stream.c +++ b/xen/common/lu/stream.c @@ -108,6 +108,7 @@ int lu_stream_append(struct lu_stream *stream, const void *data, size_t size) void lu_stream_free(struct lu_stream *stream) { unsigned int order = get_order_from_bytes((stream->nr_pages + 1) * sizeof(mfn_t)); + struct page_info *pg; unsigned int i; if ( stream->data ) @@ -118,12 +119,54 @@ void lu_stream_free(struct lu_stream *stream) for ( i = 0; i < stream->nr_pages; i++ ) { if (mfn_valid(stream->pagelist[i])) - free_domheap_page(mfn_to_page(stream->pagelist[i])); + { + pg = mfn_to_page(stream->pagelist[i]); + pg->count_info &= ~PGC_allocated; + free_domheap_page(pg); + } } + pg = virt_to_page(stream->pagelist); + for ( i = 0; i < 1<pagelist, order); } } +void lu_stream_map(struct lu_stream *stream, unsigned long mfns_phys, int nr_pages) +{ + unsigned int order = get_order_from_bytes((nr_pages + 1) * sizeof(mfn_t)); + unsigned int i; + + memset(stream, 0, sizeof(*stream)); + + stream->len = nr_pages << PAGE_SHIFT; + stream->nr_pages = nr_pages; + stream->pagelist = __va(mfns_phys); + + map_pages_to_xen((unsigned long)stream->pagelist, maddr_to_mfn(mfns_phys), + 1 << order, PAGE_HYPERVISOR); + + /* Reserve the pages used for the pagelist itself. */ + for ( i = 0; i < (1 << order); i++ ) + { + maddr_to_page(mfns_phys + (i << PAGE_SHIFT))->count_info |= PGC_allocated; + } + + /* Validate and reserve the data pages */ + for ( i = 0; i < nr_pages; i++ ) + { + if (!mfn_valid(stream->pagelist[i])) + panic("Invalid MFN %lx in live update stream\n", mfn_x(stream->pagelist[i])); + + mfn_to_page(stream->pagelist[i])->count_info |= PGC_allocated; + } + + stream->data = vmap(stream->pagelist, nr_pages); + if (!stream->data) + panic("Failed to map live update data\n"); +} + /* * local variables: * mode: c diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 6dfcc9631b..85c6d0459a 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -1790,7 +1790,7 @@ static unsigned long contig_avail_pages(struct page_info *pg, unsigned long max_ for ( i = 0; i < max_pages; i++) { - if ( page_state_is(pg + i, broken) ) + if ( page_state_is(pg + i, broken) || pg[i].count_info & PGC_allocated ) break; } @@ -1837,8 +1837,11 @@ static void init_heap_pages(struct page_info *pg, unsigned long nr_pages, { unsigned int nid = phys_to_nid(page_to_maddr(pg+i)); - /* If the (first) page is already marked broken, don't add it. */ - if ( page_state_is(pg + i, broken) ) + /* + * If the (first) page is already marked broken or allocated, + * don't add it. + */ + if ( page_state_is(pg + i, broken) || pg[i].count_info & PGC_allocated ) continue; if ( unlikely(!avail[nid]) ) diff --git a/xen/include/xen/lu.h b/xen/include/xen/lu.h index 21ee1825d3..77089c3dbf 100644 --- a/xen/include/xen/lu.h +++ b/xen/include/xen/lu.h @@ -19,6 +19,9 @@ void lu_stream_end_reservation(struct lu_stream *stream, size_t size); int lu_stream_append(struct lu_stream *stream, const void *data, size_t size); void lu_stream_free(struct lu_stream *stream); +void lu_stream_map(struct lu_stream *stream, unsigned long mfns_phys, + int nr_pages); + #endif /* __XEN_LU_H__ */ /* -- 2.39.5