ia64/xen-unstable

changeset 14749:e518f2fbdd72

[HVM] Save/restore: merge xc_linux_restore and xc_hvm_restore
into one function (and one file) since they share a lot of code
Signed-off-by: Tim Deegan <Tim.Deegan@?ensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Thu Apr 05 15:11:22 2007 +0100 (2007-04-05)
parents 602d061ff51f
children 40d4150764ad
files tools/libxc/Makefile tools/libxc/xc_domain_restore.c tools/libxc/xc_hvm_restore.c tools/libxc/xc_hvm_save.c tools/libxc/xc_linux_restore.c tools/libxc/xenguest.h tools/libxc/xg_private.c tools/xcutils/xc_restore.c
line diff
     1.1 --- a/tools/libxc/Makefile	Thu Apr 05 10:43:50 2007 +0100
     1.2 +++ b/tools/libxc/Makefile	Thu Apr 05 15:11:22 2007 +0100
     1.3 @@ -26,8 +26,8 @@ CTRL_SRCS-$(CONFIG_X86_Linux) += xc_ptra
     1.4  
     1.5  GUEST_SRCS-y :=
     1.6  GUEST_SRCS-y += xg_private.c
     1.7 -GUEST_SRCS-$(CONFIG_MIGRATE) += xc_linux_restore.c xc_linux_save.c
     1.8 -GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_restore.c xc_hvm_save.c
     1.9 +GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_linux_save.c
    1.10 +GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_save.c
    1.11  
    1.12  # symlink libelf from xen/common/libelf/
    1.13  LIBELF_SRCS := libelf-tools.c libelf-loader.c
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/tools/libxc/xc_domain_restore.c	Thu Apr 05 15:11:22 2007 +0100
     2.3 @@ -0,0 +1,1086 @@
     2.4 +/******************************************************************************
     2.5 + * xc_domain_restore.c
     2.6 + *
     2.7 + * Restore the state of a guest session.
     2.8 + *
     2.9 + * Copyright (c) 2003, K A Fraser.
    2.10 + * Copyright (c) 2006, Intel Corporation
    2.11 + * Copyright (c) 2007, XenSource Inc.
    2.12 + *
    2.13 + * This program is free software; you can redistribute it and/or modify it
    2.14 + * under the terms and conditions of the GNU General Public License,
    2.15 + * version 2, as published by the Free Software Foundation.
    2.16 + *
    2.17 + * This program is distributed in the hope it will be useful, but WITHOUT
    2.18 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    2.19 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    2.20 + * more details.
    2.21 + *
    2.22 + * You should have received a copy of the GNU General Public License along with
    2.23 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    2.24 + * Place - Suite 330, Boston, MA 02111-1307 USA.
    2.25 + *
    2.26 + */
    2.27 +
    2.28 +#include <stdlib.h>
    2.29 +#include <unistd.h>
    2.30 +
    2.31 +#include "xg_private.h"
    2.32 +#include "xg_save_restore.h"
    2.33 +#include "xc_dom.h"
    2.34 +
    2.35 +#include <xen/hvm/ioreq.h>
    2.36 +#include <xen/hvm/params.h>
    2.37 +
    2.38 +/* max mfn of the current host machine */
    2.39 +static unsigned long max_mfn;
    2.40 +
    2.41 +/* virtual starting address of the hypervisor */
    2.42 +static unsigned long hvirt_start;
    2.43 +
    2.44 +/* #levels of page tables used by the current guest */
    2.45 +static unsigned int pt_levels;
    2.46 +
    2.47 +/* number of pfns this guest has (i.e. number of entries in the P2M) */
    2.48 +static unsigned long p2m_size;
    2.49 +
    2.50 +/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
    2.51 +static unsigned long nr_pfns;
    2.52 +
    2.53 +/* Live mapping of the table mapping each PFN to its current MFN. */
    2.54 +static xen_pfn_t *live_p2m = NULL;
    2.55 +
    2.56 +/* A table mapping each PFN to its new MFN. */
    2.57 +static xen_pfn_t *p2m = NULL;
    2.58 +
    2.59 +/* A table of P2M mappings in the current region */
    2.60 +static xen_pfn_t *p2m_batch = NULL;
    2.61 +
    2.62 +static ssize_t
    2.63 +read_exact(int fd, void *buf, size_t count)
    2.64 +{
    2.65 +    int r = 0, s;
    2.66 +    unsigned char *b = buf;
    2.67 +
    2.68 +    while (r < count) {
    2.69 +        s = read(fd, &b[r], count - r);
    2.70 +        if ((s == -1) && (errno == EINTR))
    2.71 +            continue;
    2.72 +        if (s <= 0) {
    2.73 +            break;
    2.74 +        }
    2.75 +        r += s;
    2.76 +    }
    2.77 +
    2.78 +    return (r == count) ? 1 : 0;
    2.79 +}
    2.80 +
    2.81 +/*
    2.82 +** In the state file (or during transfer), all page-table pages are
    2.83 +** converted into a 'canonical' form where references to actual mfns
    2.84 +** are replaced with references to the corresponding pfns.
    2.85 +** This function inverts that operation, replacing the pfn values with
    2.86 +** the (now known) appropriate mfn values.
    2.87 +*/
    2.88 +static int uncanonicalize_pagetable(int xc_handle, uint32_t dom, 
    2.89 +                                    unsigned long type, void *page)
    2.90 +{
    2.91 +    int i, pte_last;
    2.92 +    unsigned long pfn;
    2.93 +    uint64_t pte;
    2.94 +    int nr_mfns = 0; 
    2.95 +
    2.96 +    pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8);
    2.97 +
    2.98 +    /* First pass: work out how many (if any) MFNs we need to alloc */
    2.99 +    for(i = 0; i < pte_last; i++) {
   2.100 +        
   2.101 +        if(pt_levels == 2)
   2.102 +            pte = ((uint32_t *)page)[i];
   2.103 +        else
   2.104 +            pte = ((uint64_t *)page)[i];
   2.105 +        
   2.106 +        /* XXX SMH: below needs fixing for PROT_NONE etc */
   2.107 +        if(!(pte & _PAGE_PRESENT))
   2.108 +            continue; 
   2.109 +        
   2.110 +        pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
   2.111 +        
   2.112 +        if(pfn >= p2m_size) {
   2.113 +            /* This "page table page" is probably not one; bail. */
   2.114 +            ERROR("Frame number in type %lu page table is out of range: "
   2.115 +                  "i=%d pfn=0x%lx p2m_size=%lu",
   2.116 +                  type >> 28, i, pfn, p2m_size);
   2.117 +            return 0;
   2.118 +        }
   2.119 +        
   2.120 +        if(p2m[pfn] == INVALID_P2M_ENTRY) {
   2.121 +            /* Have a 'valid' PFN without a matching MFN - need to alloc */
   2.122 +            p2m_batch[nr_mfns++] = pfn; 
   2.123 +        }
   2.124 +    }
   2.125 +    
   2.126 +    
   2.127 +    /* Allocate the requistite number of mfns */
   2.128 +    if (nr_mfns && xc_domain_memory_populate_physmap(
   2.129 +            xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { 
   2.130 +        ERROR("Failed to allocate memory for batch.!\n"); 
   2.131 +        errno = ENOMEM;
   2.132 +        return 0; 
   2.133 +    }
   2.134 +    
   2.135 +    /* Second pass: uncanonicalize each present PTE */
   2.136 +    nr_mfns = 0;
   2.137 +    for(i = 0; i < pte_last; i++) {
   2.138 +
   2.139 +        if(pt_levels == 2)
   2.140 +            pte = ((uint32_t *)page)[i];
   2.141 +        else
   2.142 +            pte = ((uint64_t *)page)[i];
   2.143 +        
   2.144 +        /* XXX SMH: below needs fixing for PROT_NONE etc */
   2.145 +        if(!(pte & _PAGE_PRESENT))
   2.146 +            continue;
   2.147 +        
   2.148 +        pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
   2.149 +        
   2.150 +        if(p2m[pfn] == INVALID_P2M_ENTRY)
   2.151 +            p2m[pfn] = p2m_batch[nr_mfns++];
   2.152 +
   2.153 +        pte &= ~MADDR_MASK_X86;
   2.154 +        pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT;
   2.155 +
   2.156 +        if(pt_levels == 2)
   2.157 +            ((uint32_t *)page)[i] = (uint32_t)pte;
   2.158 +        else
   2.159 +            ((uint64_t *)page)[i] = (uint64_t)pte;
   2.160 +    }
   2.161 +
   2.162 +    return 1;
   2.163 +}
   2.164 +
   2.165 +
   2.166 +/* Load the p2m frame list, plus potential extended info chunk */
   2.167 +static xen_pfn_t * load_p2m_frame_list(int io_fd, int *pae_extended_cr3)
   2.168 +{
   2.169 +    xen_pfn_t *p2m_frame_list;
   2.170 +    vcpu_guest_context_t ctxt;
   2.171 +
   2.172 +    if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
   2.173 +        ERROR("Couldn't allocate p2m_frame_list array");
   2.174 +        return NULL;
   2.175 +    }
   2.176 +    
   2.177 +    /* Read first entry of P2M list, or extended-info signature (~0UL). */
   2.178 +    if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
   2.179 +            ERROR("read extended-info signature failed");
   2.180 +            return NULL;
   2.181 +        }
   2.182 +    
   2.183 +    if (p2m_frame_list[0] == ~0UL) {
   2.184 +        uint32_t tot_bytes;
   2.185 +        
   2.186 +        /* Next 4 bytes: total size of following extended info. */
   2.187 +        if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) {
   2.188 +            ERROR("read extended-info size failed");
   2.189 +            return NULL;
   2.190 +        }
   2.191 +        
   2.192 +        while (tot_bytes) {
   2.193 +            uint32_t chunk_bytes;
   2.194 +            char     chunk_sig[4];
   2.195 +            
   2.196 +            /* 4-character chunk signature + 4-byte remaining chunk size. */
   2.197 +            if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
   2.198 +                !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) {
   2.199 +                ERROR("read extended-info chunk signature failed");
   2.200 +                return NULL;
   2.201 +            }
   2.202 +            tot_bytes -= 8;
   2.203 +            
   2.204 +            /* VCPU context structure? */
   2.205 +            if (!strncmp(chunk_sig, "vcpu", 4)) {
   2.206 +                if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
   2.207 +                    ERROR("read extended-info vcpu context failed");
   2.208 +                    return NULL;
   2.209 +                }
   2.210 +                tot_bytes   -= sizeof(struct vcpu_guest_context);
   2.211 +                chunk_bytes -= sizeof(struct vcpu_guest_context);
   2.212 +                
   2.213 +                if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))
   2.214 +                    *pae_extended_cr3 = 1;
   2.215 +            }
   2.216 +            
   2.217 +            /* Any remaining bytes of this chunk: read and discard. */
   2.218 +            while (chunk_bytes) {
   2.219 +                unsigned long sz = chunk_bytes;
   2.220 +                if ( sz > P2M_FL_SIZE )
   2.221 +                    sz = P2M_FL_SIZE;
   2.222 +                if (!read_exact(io_fd, p2m_frame_list, sz)) {
   2.223 +                    ERROR("read-and-discard extended-info chunk bytes failed");
   2.224 +                    return NULL;
   2.225 +                }
   2.226 +                chunk_bytes -= sz;
   2.227 +                tot_bytes   -= sz;
   2.228 +            }
   2.229 +        }
   2.230 +        
   2.231 +        /* Now read the real first entry of P2M list. */
   2.232 +        if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
   2.233 +            ERROR("read first entry of p2m_frame_list failed");
   2.234 +            return NULL;
   2.235 +        }
   2.236 +    }
   2.237 +    
   2.238 +    /* First entry is already read into the p2m array. */
   2.239 +    if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) {
   2.240 +            ERROR("read p2m_frame_list failed");
   2.241 +            return NULL;
   2.242 +    }
   2.243 +    
   2.244 +    return p2m_frame_list;
   2.245 +}
   2.246 +
   2.247 +
   2.248 +
   2.249 +int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
   2.250 +                      unsigned int store_evtchn, unsigned long *store_mfn,
   2.251 +                      unsigned int console_evtchn, unsigned long *console_mfn,
   2.252 +                      unsigned int hvm, unsigned int pae)
   2.253 +{
   2.254 +    DECLARE_DOMCTL;
   2.255 +    int rc = 1, i, j, n, m, pae_extended_cr3 = 0;
   2.256 +    unsigned long mfn, pfn;
   2.257 +    unsigned int prev_pc, this_pc;
   2.258 +    int verify = 0;
   2.259 +    int nraces = 0;
   2.260 +
   2.261 +    /* The new domain's shared-info frame number. */
   2.262 +    unsigned long shared_info_frame;
   2.263 +    unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
   2.264 +    shared_info_t *shared_info = (shared_info_t *)shared_info_page;
   2.265 +
   2.266 +    /* A copy of the CPU context of the guest. */
   2.267 +    vcpu_guest_context_t ctxt;
   2.268 +
   2.269 +    /* A table containing the type of each PFN (/not/ MFN!). */
   2.270 +    unsigned long *pfn_type = NULL;
   2.271 +
   2.272 +    /* A table of MFNs to map in the current region */
   2.273 +    xen_pfn_t *region_mfn = NULL;
   2.274 +
   2.275 +    /* Types of the pfns in the current region */
   2.276 +    unsigned long region_pfn_type[MAX_BATCH_SIZE];
   2.277 +
   2.278 +    /* A temporary mapping, and a copy, of one frame of guest memory. */
   2.279 +    unsigned long *page = NULL;
   2.280 +
   2.281 +    /* A copy of the pfn-to-mfn table frame list. */
   2.282 +    xen_pfn_t *p2m_frame_list = NULL;
   2.283 +    
   2.284 +    /* A temporary mapping of the guest's start_info page. */
   2.285 +    start_info_t *start_info;
   2.286 +
   2.287 +    /* Our mapping of the current region (batch) */
   2.288 +    char *region_base;
   2.289 +
   2.290 +    xc_mmu_t *mmu = NULL;
   2.291 +
   2.292 +    /* used by debug verify code */
   2.293 +    unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
   2.294 +
   2.295 +    struct mmuext_op pin[MAX_PIN_BATCH];
   2.296 +    unsigned int nr_pins;
   2.297 +
   2.298 +    uint64_t vcpumap = 1ULL;
   2.299 +    unsigned int max_vcpu_id = 0;
   2.300 +    int new_ctxt_format = 0;
   2.301 +
   2.302 +    /* Magic frames in HVM guests: ioreqs and xenstore comms. */
   2.303 +    uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
   2.304 +
   2.305 +    /* Buffer for holding HVM context */
   2.306 +    uint8_t *hvm_buf = NULL;
   2.307 +
   2.308 +    /* For info only */
   2.309 +    nr_pfns = 0;
   2.310 +
   2.311 +    if ( !read_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
   2.312 +    {
   2.313 +        ERROR("read: p2m_size");
   2.314 +        goto out;
   2.315 +    }
   2.316 +    DPRINTF("xc_domain_restore start: p2m_size = %lx\n", p2m_size);
   2.317 +
   2.318 +    if ( !hvm )
   2.319 +    {
   2.320 +        /*
   2.321 +         * XXX For now, 32bit dom0's can only save/restore 32bit domUs
   2.322 +         * on 64bit hypervisors.
   2.323 +         */
   2.324 +        memset(&domctl, 0, sizeof(domctl));
   2.325 +        domctl.domain = dom;
   2.326 +        domctl.cmd    = XEN_DOMCTL_set_address_size;
   2.327 +        domctl.u.address_size.size = sizeof(unsigned long) * 8;
   2.328 +        rc = do_domctl(xc_handle, &domctl);
   2.329 +        if ( rc != 0 ) {
   2.330 +            ERROR("Unable to set guest address size.");
   2.331 +            goto out;
   2.332 +        }
   2.333 +        rc = 1;
   2.334 +    }
   2.335 +
   2.336 +    if(!get_platform_info(xc_handle, dom,
   2.337 +                          &max_mfn, &hvirt_start, &pt_levels)) {
   2.338 +        ERROR("Unable to get platform info.");
   2.339 +        return 1;
   2.340 +    }
   2.341 +
   2.342 +    if (lock_pages(&ctxt, sizeof(ctxt))) {
   2.343 +        /* needed for build domctl, but might as well do early */
   2.344 +        ERROR("Unable to lock ctxt");
   2.345 +        return 1;
   2.346 +    }
   2.347 +
   2.348 +    /* Load the p2m frame list, plus potential extended info chunk */
   2.349 +    if ( !hvm ) 
   2.350 +    {
   2.351 +        p2m_frame_list = load_p2m_frame_list(io_fd, &pae_extended_cr3);
   2.352 +        if ( !p2m_frame_list )
   2.353 +            goto out;
   2.354 +    }
   2.355 +
   2.356 +    /* We want zeroed memory so use calloc rather than malloc. */
   2.357 +    p2m        = calloc(p2m_size, sizeof(xen_pfn_t));
   2.358 +    pfn_type   = calloc(p2m_size, sizeof(unsigned long));
   2.359 +    region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
   2.360 +    p2m_batch  = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
   2.361 +
   2.362 +    if ((p2m == NULL) || (pfn_type == NULL) ||
   2.363 +        (region_mfn == NULL) || (p2m_batch == NULL)) {
   2.364 +        ERROR("memory alloc failed");
   2.365 +        errno = ENOMEM;
   2.366 +        goto out;
   2.367 +    }
   2.368 +
   2.369 +    if (lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
   2.370 +        ERROR("Could not lock region_mfn");
   2.371 +        goto out;
   2.372 +    }
   2.373 +
   2.374 +    if (lock_pages(p2m_batch, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
   2.375 +        ERROR("Could not lock p2m_batch");
   2.376 +        goto out;
   2.377 +    }
   2.378 +
   2.379 +    /* Get the domain's shared-info frame. */
   2.380 +    domctl.cmd = XEN_DOMCTL_getdomaininfo;
   2.381 +    domctl.domain = (domid_t)dom;
   2.382 +    if (xc_domctl(xc_handle, &domctl) < 0) {
   2.383 +        ERROR("Could not get information on new domain");
   2.384 +        goto out;
   2.385 +    }
   2.386 +    shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
   2.387 +
   2.388 +    /* Mark all PFNs as invalid; we allocate on demand */
   2.389 +    for ( pfn = 0; pfn < p2m_size; pfn++ )
   2.390 +        p2m[pfn] = INVALID_P2M_ENTRY;
   2.391 +
   2.392 +    if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) {
   2.393 +        ERROR("Could not initialise for MMU updates");
   2.394 +        goto out;
   2.395 +    }
   2.396 +
   2.397 +    DPRINTF("Reloading memory pages:   0%%\n");
   2.398 +
   2.399 +    /*
   2.400 +     * Now simply read each saved frame into its new machine frame.
   2.401 +     * We uncanonicalise page tables as we go.
   2.402 +     */
   2.403 +    prev_pc = 0;
   2.404 +
   2.405 +    n = m = 0;
   2.406 +    while (1) {
   2.407 +
   2.408 +        int j, nr_mfns = 0; 
   2.409 +
   2.410 +        this_pc = (n * 100) / p2m_size;
   2.411 +        if ( (this_pc - prev_pc) >= 5 )
   2.412 +        {
   2.413 +            PPRINTF("\b\b\b\b%3d%%", this_pc);
   2.414 +            prev_pc = this_pc;
   2.415 +        }
   2.416 +
   2.417 +        if (!read_exact(io_fd, &j, sizeof(int))) {
   2.418 +            ERROR("Error when reading batch size");
   2.419 +            goto out;
   2.420 +        }
   2.421 +
   2.422 +        PPRINTF("batch %d\n",j);
   2.423 +
   2.424 +        if (j == -1) {
   2.425 +            verify = 1;
   2.426 +            DPRINTF("Entering page verify mode\n");
   2.427 +            continue;
   2.428 +        }
   2.429 +
   2.430 +        if (j == -2) {
   2.431 +            new_ctxt_format = 1;
   2.432 +            if (!read_exact(io_fd, &max_vcpu_id, sizeof(int)) ||
   2.433 +                (max_vcpu_id >= 64) ||
   2.434 +                !read_exact(io_fd, &vcpumap, sizeof(uint64_t))) {
   2.435 +                ERROR("Error when reading max_vcpu_id");
   2.436 +                goto out;
   2.437 +            }
   2.438 +            continue;
   2.439 +        }
   2.440 +
   2.441 +        if (j == 0)
   2.442 +            break;  /* our work here is done */
   2.443 +
   2.444 +        if (j > MAX_BATCH_SIZE) {
   2.445 +            ERROR("Max batch size exceeded. Giving up.");
   2.446 +            goto out;
   2.447 +        }
   2.448 +
   2.449 +        if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) {
   2.450 +            ERROR("Error when reading region pfn types");
   2.451 +            goto out;
   2.452 +        }
   2.453 +
   2.454 +        /* First pass for this batch: work out how much memory to alloc */
   2.455 +        nr_mfns = 0; 
   2.456 +        for ( i = 0; i < j; i++ )
   2.457 +        {
   2.458 +            unsigned long pfn, pagetype;
   2.459 +            pfn      = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
   2.460 +            pagetype = region_pfn_type[i] &  XEN_DOMCTL_PFINFO_LTAB_MASK;
   2.461 +
   2.462 +            if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) && 
   2.463 +                 (p2m[pfn] == INVALID_P2M_ENTRY) )
   2.464 +            {
   2.465 +                /* Have a live PFN which hasn't had an MFN allocated */
   2.466 +                p2m_batch[nr_mfns++] = pfn; 
   2.467 +            }
   2.468 +        } 
   2.469 +
   2.470 +
   2.471 +        /* Now allocate a bunch of mfns for this batch */
   2.472 +        if (nr_mfns && xc_domain_memory_populate_physmap(
   2.473 +                xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { 
   2.474 +            ERROR("Failed to allocate memory for batch.!\n"); 
   2.475 +            errno = ENOMEM;
   2.476 +            goto out;
   2.477 +        }
   2.478 +
   2.479 +        /* Second pass for this batch: update p2m[] and region_mfn[] */
   2.480 +        nr_mfns = 0; 
   2.481 +        for ( i = 0; i < j; i++ )
   2.482 +        {
   2.483 +            unsigned long pfn, pagetype;
   2.484 +            pfn      = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
   2.485 +            pagetype = region_pfn_type[i] &  XEN_DOMCTL_PFINFO_LTAB_MASK;
   2.486 +
   2.487 +            if ( pagetype == XEN_DOMCTL_PFINFO_XTAB)
   2.488 +                region_mfn[i] = ~0UL; /* map will fail but we don't care */
   2.489 +            else 
   2.490 +            {
   2.491 +                if (p2m[pfn] == INVALID_P2M_ENTRY) {
   2.492 +                    /* We just allocated a new mfn above; update p2m */
   2.493 +                    p2m[pfn] = p2m_batch[nr_mfns++]; 
   2.494 +                    nr_pfns++; 
   2.495 +                }
   2.496 +
   2.497 +                /* setup region_mfn[] for batch map.
   2.498 +                 * For HVM guests, this interface takes PFNs, not MFNs */
   2.499 +                region_mfn[i] = hvm ? pfn : p2m[pfn]; 
   2.500 +            }
   2.501 +        } 
   2.502 +
   2.503 +        /* Map relevant mfns */
   2.504 +        region_base = xc_map_foreign_batch(
   2.505 +            xc_handle, dom, PROT_WRITE, region_mfn, j);
   2.506 +
   2.507 +        if ( region_base == NULL )
   2.508 +        {
   2.509 +            ERROR("map batch failed");
   2.510 +            goto out;
   2.511 +        }
   2.512 +
   2.513 +        for ( i = 0; i < j; i++ )
   2.514 +        {
   2.515 +            void *page;
   2.516 +            unsigned long pagetype;
   2.517 +
   2.518 +            pfn      = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
   2.519 +            pagetype = region_pfn_type[i] &  XEN_DOMCTL_PFINFO_LTAB_MASK;
   2.520 +
   2.521 +            if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
   2.522 +                /* a bogus/unmapped page: skip it */
   2.523 +                continue;
   2.524 +
   2.525 +            if ( pfn > p2m_size )
   2.526 +            {
   2.527 +                ERROR("pfn out of range");
   2.528 +                goto out;
   2.529 +            }
   2.530 +
   2.531 +            pfn_type[pfn] = pagetype;
   2.532 +
   2.533 +            mfn = p2m[pfn];
   2.534 +
   2.535 +            /* In verify mode, we use a copy; otherwise we work in place */
   2.536 +            page = verify ? (void *)buf : (region_base + i*PAGE_SIZE);
   2.537 +
   2.538 +            if (!read_exact(io_fd, page, PAGE_SIZE)) {
   2.539 +                ERROR("Error when reading page (type was %lx)", pagetype);
   2.540 +                goto out;
   2.541 +            }
   2.542 +
   2.543 +            pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
   2.544 +
   2.545 +            if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) && 
   2.546 +                 (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
   2.547 +            {
   2.548 +                /*
   2.549 +                ** A page table page - need to 'uncanonicalize' it, i.e.
   2.550 +                ** replace all the references to pfns with the corresponding
   2.551 +                ** mfns for the new domain.
   2.552 +                **
   2.553 +                ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
   2.554 +                ** so we may need to update the p2m after the main loop.
   2.555 +                ** Hence we defer canonicalization of L1s until then.
   2.556 +                */
   2.557 +                if ((pt_levels != 3) ||
   2.558 +                    pae_extended_cr3 ||
   2.559 +                    (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) {
   2.560 +
   2.561 +                    if (!uncanonicalize_pagetable(xc_handle, dom, 
   2.562 +                                                  pagetype, page)) {
   2.563 +                        /*
   2.564 +                        ** Failing to uncanonicalize a page table can be ok
   2.565 +                        ** under live migration since the pages type may have
   2.566 +                        ** changed by now (and we'll get an update later).
   2.567 +                        */
   2.568 +                        DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
   2.569 +                                pagetype >> 28, pfn, mfn);
   2.570 +                        nraces++;
   2.571 +                        continue;
   2.572 +                    } 
   2.573 +                }
   2.574 +            }
   2.575 +            else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
   2.576 +            {
   2.577 +                ERROR("Bogus page type %lx page table is out of range: "
   2.578 +                    "i=%d p2m_size=%lu", pagetype, i, p2m_size);
   2.579 +                goto out;
   2.580 +
   2.581 +            }
   2.582 +
   2.583 +
   2.584 +            if (verify) {
   2.585 +
   2.586 +                int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
   2.587 +
   2.588 +                if (res) {
   2.589 +
   2.590 +                    int v;
   2.591 +
   2.592 +                    DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
   2.593 +                            "actualcs=%08lx\n", pfn, pfn_type[pfn],
   2.594 +                            csum_page(region_base + i*PAGE_SIZE),
   2.595 +                            csum_page(buf));
   2.596 +
   2.597 +                    for (v = 0; v < 4; v++) {
   2.598 +
   2.599 +                        unsigned long *p = (unsigned long *)
   2.600 +                            (region_base + i*PAGE_SIZE);
   2.601 +                        if (buf[v] != p[v])
   2.602 +                            DPRINTF("    %d: %08lx %08lx\n", v, buf[v], p[v]);
   2.603 +                    }
   2.604 +                }
   2.605 +            }
   2.606 +
   2.607 +            if (!hvm 
   2.608 +                && xc_add_mmu_update(xc_handle, mmu,
   2.609 +                                     (((unsigned long long)mfn) << PAGE_SHIFT)
   2.610 +                                     | MMU_MACHPHYS_UPDATE, pfn)) {
   2.611 +                ERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
   2.612 +                goto out;
   2.613 +            }
   2.614 +        } /* end of 'batch' for loop */
   2.615 +
   2.616 +        munmap(region_base, j*PAGE_SIZE);
   2.617 +        n+= j; /* crude stats */
   2.618 +
   2.619 +        /* 
   2.620 +         * Discard cache for portion of file read so far up to last
   2.621 +         *  page boundary every 16MB or so.
   2.622 +         */
   2.623 +        m += j;
   2.624 +        if ( m > MAX_PAGECACHE_USAGE )
   2.625 +        {
   2.626 +            discard_file_cache(io_fd, 0 /* no flush */);
   2.627 +            m = 0;
   2.628 +        }
   2.629 +    }
   2.630 +
   2.631 +    /*
   2.632 +     * Ensure we flush all machphys updates before potential PAE-specific
   2.633 +     * reallocations below.
   2.634 +     */
   2.635 +    if (!hvm && xc_finish_mmu_updates(xc_handle, mmu)) {
   2.636 +        ERROR("Error doing finish_mmu_updates()");
   2.637 +        goto out;
   2.638 +    }
   2.639 +
   2.640 +    DPRINTF("Received all pages (%d races)\n", nraces);
   2.641 +
   2.642 +    if ( hvm ) 
   2.643 +    {
   2.644 +        uint32_t rec_len;
   2.645 +
   2.646 +        /* Set HVM-specific parameters */
   2.647 +        if ( !read_exact(io_fd, magic_pfns, sizeof(magic_pfns)) )
   2.648 +        {
   2.649 +            ERROR("error reading magic page addresses");
   2.650 +            goto out;
   2.651 +        }
   2.652 +        
   2.653 +        /* These comms pages need to be zeroed at the start of day */
   2.654 +        if ( xc_clear_domain_page(xc_handle, dom, magic_pfns[0]) ||
   2.655 +             xc_clear_domain_page(xc_handle, dom, magic_pfns[1]) ||
   2.656 +             xc_clear_domain_page(xc_handle, dom, magic_pfns[2]) )
   2.657 +        {
   2.658 +            ERROR("error zeroing magic pages");
   2.659 +            goto out;
   2.660 +        }
   2.661 +        
   2.662 +        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, magic_pfns[0]);
   2.663 +        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]);
   2.664 +        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, magic_pfns[2]);
   2.665 +        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
   2.666 +        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
   2.667 +        *store_mfn = magic_pfns[2];
   2.668 +
   2.669 +        /* Read vcpu contexts */
   2.670 +        for (i = 0; i <= max_vcpu_id; i++) 
   2.671 +        {
   2.672 +            if (!(vcpumap & (1ULL << i)))
   2.673 +                continue;
   2.674 +
   2.675 +            if ( !read_exact(io_fd, &(ctxt), sizeof(ctxt)) )
   2.676 +            {
   2.677 +                ERROR("error read vcpu context.\n");
   2.678 +                goto out;
   2.679 +            }
   2.680 +            
   2.681 +            if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) )
   2.682 +            {
   2.683 +                ERROR("Could not set vcpu context, rc=%d", rc);
   2.684 +                goto out;
   2.685 +            }
   2.686 +            rc = 1;
   2.687 +        }
   2.688 +
   2.689 +        /* Read HVM context */
   2.690 +        if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) )
   2.691 +        {
   2.692 +            ERROR("error read hvm context size!\n");
   2.693 +            goto out;
   2.694 +        }
   2.695 +        
   2.696 +        hvm_buf = malloc(rec_len);
   2.697 +        if ( hvm_buf == NULL )
   2.698 +        {
   2.699 +            ERROR("memory alloc for hvm context buffer failed");
   2.700 +            errno = ENOMEM;
   2.701 +            goto out;
   2.702 +        }
   2.703 +        
   2.704 +        if ( !read_exact(io_fd, hvm_buf, rec_len) )
   2.705 +        {
   2.706 +            ERROR("error loading the HVM context");
   2.707 +            goto out;
   2.708 +        }
   2.709 +        
   2.710 +        rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len);
   2.711 +        if ( rc ) 
   2.712 +            ERROR("error setting the HVM context");
   2.713 +       
   2.714 +        goto out;
   2.715 +    }
   2.716 +
   2.717 +    /* Non-HVM guests only from here on */
   2.718 +
   2.719 +    if ((pt_levels == 3) && !pae_extended_cr3) {
   2.720 +
   2.721 +        /*
   2.722 +        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
   2.723 +        ** is a little awkward and involves (a) finding all such PGDs and
   2.724 +        ** replacing them with 'lowmem' versions; (b) upating the p2m[]
   2.725 +        ** with the new info; and (c) canonicalizing all the L1s using the
   2.726 +        ** (potentially updated) p2m[].
   2.727 +        **
   2.728 +        ** This is relatively slow (and currently involves two passes through
   2.729 +        ** the pfn_type[] array), but at least seems to be correct. May wish
   2.730 +        ** to consider more complex approaches to optimize this later.
   2.731 +        */
   2.732 +
   2.733 +        int j, k;
   2.734 +        
   2.735 +        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
   2.736 +        for ( i = 0; i < p2m_size; i++ )
   2.737 +        {
   2.738 +            if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
   2.739 +                  XEN_DOMCTL_PFINFO_L3TAB) &&
   2.740 +                 (p2m[i] > 0xfffffUL) )
   2.741 +            {
   2.742 +                unsigned long new_mfn;
   2.743 +                uint64_t l3ptes[4];
   2.744 +                uint64_t *l3tab;
   2.745 +
   2.746 +                l3tab = (uint64_t *)
   2.747 +                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
   2.748 +                                         PROT_READ, p2m[i]);
   2.749 +
   2.750 +                for(j = 0; j < 4; j++)
   2.751 +                    l3ptes[j] = l3tab[j];
   2.752 +
   2.753 +                munmap(l3tab, PAGE_SIZE);
   2.754 +
   2.755 +                if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
   2.756 +                    ERROR("Couldn't get a page below 4GB :-(");
   2.757 +                    goto out;
   2.758 +                }
   2.759 +
   2.760 +                p2m[i] = new_mfn;
   2.761 +                if (xc_add_mmu_update(xc_handle, mmu,
   2.762 +                                      (((unsigned long long)new_mfn)
   2.763 +                                       << PAGE_SHIFT) |
   2.764 +                                      MMU_MACHPHYS_UPDATE, i)) {
   2.765 +                    ERROR("Couldn't m2p on PAE root pgdir");
   2.766 +                    goto out;
   2.767 +                }
   2.768 +
   2.769 +                l3tab = (uint64_t *)
   2.770 +                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
   2.771 +                                         PROT_READ | PROT_WRITE, p2m[i]);
   2.772 +
   2.773 +                for(j = 0; j < 4; j++)
   2.774 +                    l3tab[j] = l3ptes[j];
   2.775 +
   2.776 +                munmap(l3tab, PAGE_SIZE);
   2.777 +
   2.778 +            }
   2.779 +        }
   2.780 +
   2.781 +        /* Second pass: find all L1TABs and uncanonicalize them */
   2.782 +        j = 0;
   2.783 +
   2.784 +        for ( i = 0; i < p2m_size; i++ )
   2.785 +        {
   2.786 +            if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
   2.787 +                  XEN_DOMCTL_PFINFO_L1TAB) )
   2.788 +            {
   2.789 +                region_mfn[j] = p2m[i];
   2.790 +                j++;
   2.791 +            }
   2.792 +
   2.793 +            if(i == (p2m_size-1) || j == MAX_BATCH_SIZE) {
   2.794 +
   2.795 +                if (!(region_base = xc_map_foreign_batch(
   2.796 +                          xc_handle, dom, PROT_READ | PROT_WRITE,
   2.797 +                          region_mfn, j))) {
   2.798 +                    ERROR("map batch failed");
   2.799 +                    goto out;
   2.800 +                }
   2.801 +
   2.802 +                for(k = 0; k < j; k++) {
   2.803 +                    if(!uncanonicalize_pagetable(xc_handle, dom, 
   2.804 +                                                 XEN_DOMCTL_PFINFO_L1TAB,
   2.805 +                                                 region_base + k*PAGE_SIZE)) {
   2.806 +                        ERROR("failed uncanonicalize pt!");
   2.807 +                        goto out;
   2.808 +                    }
   2.809 +                }
   2.810 +
   2.811 +                munmap(region_base, j*PAGE_SIZE);
   2.812 +                j = 0;
   2.813 +            }
   2.814 +        }
   2.815 +
   2.816 +        if (xc_finish_mmu_updates(xc_handle, mmu)) {
   2.817 +            ERROR("Error doing finish_mmu_updates()");
   2.818 +            goto out;
   2.819 +        }
   2.820 +    }
   2.821 +
   2.822 +    /*
   2.823 +     * Pin page tables. Do this after writing to them as otherwise Xen
   2.824 +     * will barf when doing the type-checking.
   2.825 +     */
   2.826 +    nr_pins = 0;
   2.827 +    for ( i = 0; i < p2m_size; i++ )
   2.828 +    {
   2.829 +        if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
   2.830 +            continue;
   2.831 +
   2.832 +        switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
   2.833 +        {
   2.834 +        case XEN_DOMCTL_PFINFO_L1TAB:
   2.835 +            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
   2.836 +            break;
   2.837 +
   2.838 +        case XEN_DOMCTL_PFINFO_L2TAB:
   2.839 +            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
   2.840 +            break;
   2.841 +
   2.842 +        case XEN_DOMCTL_PFINFO_L3TAB:
   2.843 +            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
   2.844 +            break;
   2.845 +
   2.846 +        case XEN_DOMCTL_PFINFO_L4TAB:
   2.847 +            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
   2.848 +            break;
   2.849 +
   2.850 +        default:
   2.851 +            continue;
   2.852 +        }
   2.853 +
   2.854 +        pin[nr_pins].arg1.mfn = p2m[i];
   2.855 +        nr_pins++;
   2.856 +
   2.857 +        /* Batch full? Then flush. */
   2.858 +        if (nr_pins == MAX_PIN_BATCH) {
   2.859 +            if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) {
   2.860 +                ERROR("Failed to pin batch of %d page tables", nr_pins);
   2.861 +                goto out;
   2.862 +            }
   2.863 +            nr_pins = 0;
   2.864 +        }
   2.865 +    }
   2.866 +
   2.867 +    /* Flush final partial batch. */
   2.868 +    if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) {
   2.869 +        ERROR("Failed to pin batch of %d page tables", nr_pins);
   2.870 +        goto out;
   2.871 +    }
   2.872 +
   2.873 +    DPRINTF("\b\b\b\b100%%\n");
   2.874 +    DPRINTF("Memory reloaded (%ld pages)\n", nr_pfns);
   2.875 +
   2.876 +    /* Get the list of PFNs that are not in the psuedo-phys map */
   2.877 +    {
   2.878 +        unsigned int count;
   2.879 +        unsigned long *pfntab;
   2.880 +        int nr_frees, rc;
   2.881 +
   2.882 +        if (!read_exact(io_fd, &count, sizeof(count))) {
   2.883 +            ERROR("Error when reading pfn count");
   2.884 +            goto out;
   2.885 +        }
   2.886 +
   2.887 +        if(!(pfntab = malloc(sizeof(unsigned long) * count))) {
   2.888 +            ERROR("Out of memory");
   2.889 +            goto out;
   2.890 +        }
   2.891 +
   2.892 +        if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) {
   2.893 +            ERROR("Error when reading pfntab");
   2.894 +            goto out;
   2.895 +        }
   2.896 +
   2.897 +        nr_frees = 0; 
   2.898 +        for (i = 0; i < count; i++) {
   2.899 +
   2.900 +            unsigned long pfn = pfntab[i];
   2.901 +
   2.902 +            if(p2m[pfn] != INVALID_P2M_ENTRY) {
   2.903 +                /* pfn is not in physmap now, but was at some point during 
   2.904 +                   the save/migration process - need to free it */
   2.905 +                pfntab[nr_frees++] = p2m[pfn];
   2.906 +                p2m[pfn]  = INVALID_P2M_ENTRY; // not in pseudo-physical map
   2.907 +            }
   2.908 +        }
   2.909 +
   2.910 +        if (nr_frees > 0) {
   2.911 +
   2.912 +            struct xen_memory_reservation reservation = {
   2.913 +                .nr_extents   = nr_frees,
   2.914 +                .extent_order = 0,
   2.915 +                .domid        = dom
   2.916 +            };
   2.917 +            set_xen_guest_handle(reservation.extent_start, pfntab);
   2.918 +
   2.919 +            if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
   2.920 +                                   &reservation)) != nr_frees) {
   2.921 +                ERROR("Could not decrease reservation : %d", rc);
   2.922 +                goto out;
   2.923 +            } else
   2.924 +                DPRINTF("Decreased reservation by %d pages\n", count);
   2.925 +        }
   2.926 +    }
   2.927 +
   2.928 +    for (i = 0; i <= max_vcpu_id; i++) {
   2.929 +        if (!(vcpumap & (1ULL << i)))
   2.930 +            continue;
   2.931 +
   2.932 +        if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
   2.933 +            ERROR("Error when reading ctxt %d", i);
   2.934 +            goto out;
   2.935 +        }
   2.936 +
   2.937 +        if ( !new_ctxt_format )
   2.938 +            ctxt.flags |= VGCF_online;
   2.939 +
   2.940 +        if (i == 0) {
   2.941 +            /*
   2.942 +             * Uncanonicalise the suspend-record frame number and poke
   2.943 +             * resume record.
   2.944 +             */
   2.945 +            pfn = ctxt.user_regs.edx;
   2.946 +            if ((pfn >= p2m_size) ||
   2.947 +                (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
   2.948 +                ERROR("Suspend record frame number is bad");
   2.949 +                goto out;
   2.950 +            }
   2.951 +            ctxt.user_regs.edx = mfn = p2m[pfn];
   2.952 +            start_info = xc_map_foreign_range(
   2.953 +                xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
   2.954 +            start_info->nr_pages = p2m_size;
   2.955 +            start_info->shared_info = shared_info_frame << PAGE_SHIFT;
   2.956 +            start_info->flags = 0;
   2.957 +            *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn];
   2.958 +            start_info->store_evtchn = store_evtchn;
   2.959 +            start_info->console.domU.mfn = p2m[start_info->console.domU.mfn];
   2.960 +            start_info->console.domU.evtchn = console_evtchn;
   2.961 +            *console_mfn = start_info->console.domU.mfn;
   2.962 +            munmap(start_info, PAGE_SIZE);
   2.963 +        }
   2.964 +
   2.965 +        /* Uncanonicalise each GDT frame number. */
   2.966 +        if (ctxt.gdt_ents > 8192) {
   2.967 +            ERROR("GDT entry count out of range");
   2.968 +            goto out;
   2.969 +        }
   2.970 +
   2.971 +        for (j = 0; (512*j) < ctxt.gdt_ents; j++) {
   2.972 +            pfn = ctxt.gdt_frames[j];
   2.973 +            if ((pfn >= p2m_size) ||
   2.974 +                (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
   2.975 +                ERROR("GDT frame number is bad");
   2.976 +                goto out;
   2.977 +            }
   2.978 +            ctxt.gdt_frames[j] = p2m[pfn];
   2.979 +        }
   2.980 +
   2.981 +        /* Uncanonicalise the page table base pointer. */
   2.982 +        pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]);
   2.983 +
   2.984 +        if (pfn >= p2m_size) {
   2.985 +            ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
   2.986 +                  pfn, p2m_size, pfn_type[pfn]);
   2.987 +            goto out;
   2.988 +        }
   2.989 +
   2.990 +        if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
   2.991 +             ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
   2.992 +            ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
   2.993 +                  pfn, p2m_size, pfn_type[pfn],
   2.994 +                  (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
   2.995 +            goto out;
   2.996 +        }
   2.997 +
   2.998 +        ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]);
   2.999 +
  2.1000 +        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
  2.1001 +        if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
  2.1002 +        {
  2.1003 +            pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]);
  2.1004 +
  2.1005 +            if (pfn >= p2m_size) {
  2.1006 +                ERROR("User PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
  2.1007 +                      pfn, p2m_size, pfn_type[pfn]);
  2.1008 +                goto out;
  2.1009 +            }
  2.1010 +
  2.1011 +            if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
  2.1012 +                 ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
  2.1013 +                ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
  2.1014 +                      pfn, p2m_size, pfn_type[pfn],
  2.1015 +                      (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
  2.1016 +                goto out;
  2.1017 +            }
  2.1018 +
  2.1019 +            ctxt.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]);
  2.1020 +        }
  2.1021 +
  2.1022 +        domctl.cmd = XEN_DOMCTL_setvcpucontext;
  2.1023 +        domctl.domain = (domid_t)dom;
  2.1024 +        domctl.u.vcpucontext.vcpu = i;
  2.1025 +        set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt);
  2.1026 +        rc = xc_domctl(xc_handle, &domctl);
  2.1027 +        if (rc != 0) {
  2.1028 +            ERROR("Couldn't build vcpu%d", i);
  2.1029 +            goto out;
  2.1030 +        }
  2.1031 +        rc = 1;
  2.1032 +    }
  2.1033 +
  2.1034 +    if (!read_exact(io_fd, shared_info_page, PAGE_SIZE)) {
  2.1035 +        ERROR("Error when reading shared info page");
  2.1036 +        goto out;
  2.1037 +    }
  2.1038 +
  2.1039 +    /* clear any pending events and the selector */
  2.1040 +    memset(&(shared_info->evtchn_pending[0]), 0,
  2.1041 +           sizeof (shared_info->evtchn_pending));
  2.1042 +    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
  2.1043 +        shared_info->vcpu_info[i].evtchn_pending_sel = 0;
  2.1044 +
  2.1045 +    /* Copy saved contents of shared-info page. No checking needed. */
  2.1046 +    page = xc_map_foreign_range(
  2.1047 +        xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
  2.1048 +    memcpy(page, shared_info, PAGE_SIZE);
  2.1049 +    munmap(page, PAGE_SIZE);
  2.1050 +
  2.1051 +    /* Uncanonicalise the pfn-to-mfn table frame-number list. */
  2.1052 +    for (i = 0; i < P2M_FL_ENTRIES; i++) {
  2.1053 +        pfn = p2m_frame_list[i];
  2.1054 +        if ((pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
  2.1055 +            ERROR("PFN-to-MFN frame number is bad");
  2.1056 +            goto out;
  2.1057 +        }
  2.1058 +
  2.1059 +        p2m_frame_list[i] = p2m[pfn];
  2.1060 +    }
  2.1061 +
  2.1062 +    /* Copy the P2M we've constructed to the 'live' P2M */
  2.1063 +    if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE,
  2.1064 +                                          p2m_frame_list, P2M_FL_ENTRIES))) {
  2.1065 +        ERROR("Couldn't map p2m table");
  2.1066 +        goto out;
  2.1067 +    }
  2.1068 +
  2.1069 +    memcpy(live_p2m, p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
  2.1070 +    munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
  2.1071 +
  2.1072 +    DPRINTF("Domain ready to be built.\n");
  2.1073 +    rc = 0;
  2.1074 +
  2.1075 + out:
  2.1076 +    if ( (rc != 0) && (dom != 0) )
  2.1077 +        xc_domain_destroy(xc_handle, dom);
  2.1078 +    free(mmu);
  2.1079 +    free(p2m);
  2.1080 +    free(pfn_type);
  2.1081 +    free(hvm_buf);
  2.1082 +
  2.1083 +    /* discard cache for save file  */
  2.1084 +    discard_file_cache(io_fd, 1 /*flush*/);
  2.1085 +
  2.1086 +    DPRINTF("Restore exit with rc=%d\n", rc);
  2.1087 +    
  2.1088 +    return rc;
  2.1089 +}
     3.1 --- a/tools/libxc/xc_hvm_restore.c	Thu Apr 05 10:43:50 2007 +0100
     3.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.3 @@ -1,351 +0,0 @@
     3.4 -/******************************************************************************
     3.5 - * xc_hvm_restore.c
     3.6 - *
     3.7 - * Restore the state of a HVM guest.
     3.8 - *
     3.9 - * Copyright (c) 2003, K A Fraser.
    3.10 - * Copyright (c) 2006 Intel Corperation
    3.11 - * rewriten for hvm guest by Zhai Edwin <edwin.zhai@intel.com>
    3.12 - *
    3.13 - * This program is free software; you can redistribute it and/or modify it
    3.14 - * under the terms and conditions of the GNU General Public License,
    3.15 - * version 2, as published by the Free Software Foundation.
    3.16 - *
    3.17 - * This program is distributed in the hope it will be useful, but WITHOUT
    3.18 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    3.19 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    3.20 - * more details.
    3.21 - *
    3.22 - * You should have received a copy of the GNU General Public License along with
    3.23 - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    3.24 - * Place - Suite 330, Boston, MA 02111-1307 USA.
    3.25 - *
    3.26 - */
    3.27 -
    3.28 -#include <stdlib.h>
    3.29 -#include <unistd.h>
    3.30 -
    3.31 -#include "xg_private.h"
    3.32 -#include "xg_save_restore.h"
    3.33 -
    3.34 -#include <xen/hvm/ioreq.h>
    3.35 -#include <xen/hvm/params.h>
    3.36 -#include <xen/hvm/e820.h>
    3.37 -
    3.38 -static ssize_t
    3.39 -read_exact(int fd, void *buf, size_t count)
    3.40 -{
    3.41 -    int r = 0, s;
    3.42 -    unsigned char *b = buf;
    3.43 -
    3.44 -    while ( r < count )
    3.45 -    {
    3.46 -        s = read(fd, &b[r], count - r);
    3.47 -        if ( (s == -1) && (errno == EINTR) )
    3.48 -            continue;
    3.49 -        if ( s <= 0 )
    3.50 -            break;
    3.51 -        r += s;
    3.52 -    }
    3.53 -
    3.54 -    return (r == count) ? 1 : 0;
    3.55 -}
    3.56 -
    3.57 -#define BPL (sizeof(long)*8)
    3.58 -#define test_bit(bit, map) !!((map)[(bit)/BPL] & (1UL << ((bit) % BPL)))
    3.59 -#define set_bit(bit, map)  ((map)[(bit)/BPL] |= (1UL << ((bit) % BPL)))
    3.60 -static int test_and_set_bit(unsigned long nr, unsigned long *map)
    3.61 -{
    3.62 -    int rc = test_bit(nr, map);
    3.63 -    if ( !rc )
    3.64 -        set_bit(nr, map);
    3.65 -    return rc;
    3.66 -}
    3.67 -
    3.68 -int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom,
    3.69 -                   unsigned int store_evtchn, unsigned long *store_mfn,
    3.70 -                   unsigned int pae, unsigned int apic)
    3.71 -{
    3.72 -    DECLARE_DOMCTL;
    3.73 -
    3.74 -    /* A copy of the CPU context of the guest. */
    3.75 -    vcpu_guest_context_t ctxt;
    3.76 -
    3.77 -    char *region_base;
    3.78 -
    3.79 -    unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
    3.80 -
    3.81 -    xc_dominfo_t info;
    3.82 -    unsigned int rc = 1, n, i;
    3.83 -    uint32_t rec_len, nr_vcpus;
    3.84 -    uint8_t *hvm_buf = NULL;
    3.85 -
    3.86 -    /* Magic frames: ioreqs and xenstore comms. */
    3.87 -    uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
    3.88 -
    3.89 -    unsigned long pfn;
    3.90 -    int verify = 0;
    3.91 -
    3.92 -    /* Types of the pfns in the current region */
    3.93 -    unsigned long region_pfn_type[MAX_BATCH_SIZE];
    3.94 -    xen_pfn_t pfn_alloc_batch[MAX_BATCH_SIZE];
    3.95 -    unsigned int pfn_alloc_batch_size;
    3.96 -
    3.97 -    /* The size of an array big enough to contain all guest pfns */
    3.98 -    unsigned long max_pfn = 0xfffffUL; /* initial memory map guess: 4GB */
    3.99 -    unsigned long *pfn_bitmap = NULL, *new_pfn_bitmap;
   3.100 -
   3.101 -    DPRINTF("xc_hvm_restore:dom=%d, store_evtchn=%d, "
   3.102 -            "pae=%u, apic=%u.\n", dom, store_evtchn, pae, apic);
   3.103 -
   3.104 -    DPRINTF("xc_hvm_restore start: max_pfn = %lx\n", max_pfn);
   3.105 -
   3.106 -    if ( mlock(&ctxt, sizeof(ctxt)) )
   3.107 -    {
   3.108 -        /* needed for build dom0 op, but might as well do early */
   3.109 -        ERROR("Unable to mlock ctxt");
   3.110 -        return 1;
   3.111 -    }
   3.112 -
   3.113 -    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
   3.114 -    {
   3.115 -        ERROR("Could not get domain info");
   3.116 -        return 1;
   3.117 -    }
   3.118 -
   3.119 -    domctl.cmd = XEN_DOMCTL_getdomaininfo;
   3.120 -    domctl.domain = (domid_t)dom;
   3.121 -    if ( xc_domctl(xc_handle, &domctl) < 0 )
   3.122 -    {
   3.123 -        ERROR("Could not get information on new domain");
   3.124 -        goto out;
   3.125 -    }
   3.126 -
   3.127 -    pfn_bitmap = calloc((max_pfn+1)/8, 1);
   3.128 -    if ( pfn_bitmap == NULL )
   3.129 -    {
   3.130 -        ERROR("Could not allocate pfn bitmap");
   3.131 -        goto out;
   3.132 -    }
   3.133 -
   3.134 -    n = 0;
   3.135 -    for ( ; ; )
   3.136 -    {
   3.137 -        int j;
   3.138 -
   3.139 -        if ( !read_exact(io_fd, &j, sizeof(int)) )
   3.140 -        {
   3.141 -            ERROR("HVM restore Error when reading batch size");
   3.142 -            goto out;
   3.143 -        }
   3.144 -
   3.145 -        PPRINTF("batch %d\n",j);
   3.146 -
   3.147 -        if ( j == -1 )
   3.148 -        {
   3.149 -            verify = 1;
   3.150 -            DPRINTF("Entering page verify mode\n");
   3.151 -            continue;
   3.152 -        }
   3.153 -
   3.154 -        if ( j == 0 )
   3.155 -            break;  /* our work here is done */
   3.156 -
   3.157 -        if ( j > MAX_BATCH_SIZE )
   3.158 -        {
   3.159 -            ERROR("Max batch size exceeded. Giving up.");
   3.160 -            goto out;
   3.161 -        }
   3.162 -
   3.163 -        if ( !read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long)) )
   3.164 -        {
   3.165 -            ERROR("Error when reading region pfn types");
   3.166 -            goto out;
   3.167 -        }
   3.168 -
   3.169 -        pfn_alloc_batch_size = 0;
   3.170 -        for ( i = 0; i < j; i++ )
   3.171 -        {
   3.172 -            pfn = region_pfn_type[i];
   3.173 -            if ( pfn & XEN_DOMCTL_PFINFO_LTAB_MASK )
   3.174 -                continue;
   3.175 -
   3.176 -            while ( pfn > max_pfn )
   3.177 -            {
   3.178 -                if ( max_pfn >= 0xfffffff )
   3.179 -                {
   3.180 -                    ERROR("Maximum PFN beyond reason (1TB) %lx\n", pfn);
   3.181 -                    goto out;
   3.182 -                }
   3.183 -                max_pfn = 2*max_pfn + 1;
   3.184 -                new_pfn_bitmap = realloc(pfn_bitmap, (max_pfn+1)/8);
   3.185 -                if ( new_pfn_bitmap == NULL )
   3.186 -                {
   3.187 -                    ERROR("Could not realloc pfn bitmap for max_pfn=%lx\n",
   3.188 -                          max_pfn);
   3.189 -                    goto out;
   3.190 -                }
   3.191 -                pfn_bitmap = new_pfn_bitmap;
   3.192 -                memset(&pfn_bitmap[(max_pfn+1)/(2*BPL)], 0, (max_pfn+1)/(2*8));
   3.193 -            }
   3.194 -
   3.195 -            if ( !test_and_set_bit(pfn, pfn_bitmap) )
   3.196 -                pfn_alloc_batch[pfn_alloc_batch_size++] = pfn;
   3.197 -        }
   3.198 -
   3.199 -        if ( pfn_alloc_batch_size != 0 )
   3.200 -        {
   3.201 -             rc = xc_domain_memory_populate_physmap(
   3.202 -                 xc_handle, dom, pfn_alloc_batch_size, 0, 0, pfn_alloc_batch);
   3.203 -             if ( rc != 0 )
   3.204 -             {
   3.205 -                 PERROR("Could not allocate %u pages for HVM guest.\n",
   3.206 -                        pfn_alloc_batch_size);
   3.207 -                 goto out;
   3.208 -             }
   3.209 -        }
   3.210 -
   3.211 -        region_base = xc_map_foreign_batch(
   3.212 -            xc_handle, dom, PROT_WRITE, region_pfn_type, j);
   3.213 -
   3.214 -        for ( i = 0; i < j; i++ )
   3.215 -        {
   3.216 -            void *page;
   3.217 -
   3.218 -            pfn = region_pfn_type[i];
   3.219 -            if ( pfn & XEN_DOMCTL_PFINFO_LTAB_MASK )
   3.220 -                continue;
   3.221 -
   3.222 -            /* In verify mode, we use a copy; otherwise we work in place */
   3.223 -            page = verify ? (void *)buf : (region_base + i*PAGE_SIZE);
   3.224 -
   3.225 -            if ( !read_exact(io_fd, page, PAGE_SIZE) )
   3.226 -            {
   3.227 -                ERROR("Error when reading page (%x)", i);
   3.228 -                goto out;
   3.229 -            }
   3.230 -
   3.231 -            if ( verify )
   3.232 -            {
   3.233 -                int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
   3.234 -                if ( res )
   3.235 -                {
   3.236 -                    int v;
   3.237 -
   3.238 -                    DPRINTF("************** pfn=%lx gotcs=%08lx "
   3.239 -                            "actualcs=%08lx\n", pfn, 
   3.240 -                            csum_page(region_base + i*PAGE_SIZE),
   3.241 -                            csum_page(buf));
   3.242 -
   3.243 -                    for ( v = 0; v < 4; v++ )
   3.244 -                    {
   3.245 -                        unsigned long *p = (unsigned long *)
   3.246 -                            (region_base + i*PAGE_SIZE);
   3.247 -                        if (buf[v] != p[v])
   3.248 -                            DPRINTF("    %d: %08lx %08lx\n", v, buf[v], p[v]);
   3.249 -                    }
   3.250 -                }
   3.251 -            }
   3.252 -
   3.253 -        } /* end of 'batch' for loop */
   3.254 -
   3.255 -        munmap(region_base, j*PAGE_SIZE);
   3.256 -        n += j; /* crude stats */
   3.257 -    }
   3.258 -    
   3.259 -    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
   3.260 -    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
   3.261 -
   3.262 -    if ( !read_exact(io_fd, magic_pfns, sizeof(magic_pfns)) )
   3.263 -    {
   3.264 -        ERROR("error reading magic page addresses\n");
   3.265 -        goto out;
   3.266 -    }
   3.267 -
   3.268 -    if ( xc_clear_domain_page(xc_handle, dom, magic_pfns[0]) ||
   3.269 -         xc_clear_domain_page(xc_handle, dom, magic_pfns[1]) ||
   3.270 -         xc_clear_domain_page(xc_handle, dom, magic_pfns[2]) )
   3.271 -    {
   3.272 -        rc = -1;
   3.273 -        goto out;
   3.274 -    }
   3.275 -
   3.276 -    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, magic_pfns[0]);
   3.277 -    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]);
   3.278 -    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, magic_pfns[2]);
   3.279 -    *store_mfn = magic_pfns[2];
   3.280 -    DPRINTF("hvm restore: calculate new store_mfn=0x%lx.\n", *store_mfn);
   3.281 -
   3.282 -    if ( !read_exact(io_fd, &nr_vcpus, sizeof(uint32_t)) )
   3.283 -    {
   3.284 -        ERROR("error read nr vcpu !\n");
   3.285 -        goto out;
   3.286 -    }
   3.287 -    DPRINTF("hvm restore:get nr_vcpus=%d.\n", nr_vcpus);
   3.288 -
   3.289 -    for ( i = 0; i < nr_vcpus; i++ )
   3.290 -    {
   3.291 -        if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) )
   3.292 -        {
   3.293 -            ERROR("error read vcpu context size!\n");
   3.294 -            goto out;
   3.295 -        }
   3.296 -        if ( rec_len != sizeof(ctxt) )
   3.297 -        {
   3.298 -            ERROR("vcpu context size dismatch!\n");
   3.299 -            goto out;
   3.300 -        }
   3.301 -
   3.302 -        if ( !read_exact(io_fd, &(ctxt), sizeof(ctxt)) )
   3.303 -        {
   3.304 -            ERROR("error read vcpu context.\n");
   3.305 -            goto out;
   3.306 -        }
   3.307 -
   3.308 -        if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) )
   3.309 -        {
   3.310 -            ERROR("Could not set vcpu context, rc=%d", rc);
   3.311 -            goto out;
   3.312 -        }
   3.313 -    }
   3.314 -
   3.315 -    /* restore hvm context including pic/pit/shpage */
   3.316 -    if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) )
   3.317 -    {
   3.318 -        ERROR("error read hvm context size!\n");
   3.319 -        goto out;
   3.320 -    }
   3.321 -
   3.322 -    hvm_buf = malloc(rec_len);
   3.323 -    if ( hvm_buf == NULL )
   3.324 -    {
   3.325 -        ERROR("memory alloc for hvm context buffer failed");
   3.326 -        errno = ENOMEM;
   3.327 -        goto out;
   3.328 -    }
   3.329 -
   3.330 -    if ( !read_exact(io_fd, hvm_buf, rec_len) )
   3.331 -    {
   3.332 -        ERROR("error read hvm buffer!\n");
   3.333 -        goto out;
   3.334 -    }
   3.335 -
   3.336 -    if ( (rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len)) )
   3.337 -    {
   3.338 -        ERROR("error set hvm buffer!\n");
   3.339 -        goto out;
   3.340 -    }
   3.341 -
   3.342 -    rc = 0;
   3.343 -    goto out;
   3.344 -
   3.345 - out:
   3.346 -    if ( (rc != 0) && (dom != 0) )
   3.347 -        xc_domain_destroy(xc_handle, dom);
   3.348 -    free(hvm_buf);
   3.349 -    free(pfn_bitmap);
   3.350 -
   3.351 -    DPRINTF("Restore exit with rc=%d\n", rc);
   3.352 -
   3.353 -    return rc;
   3.354 -}
     4.1 --- a/tools/libxc/xc_hvm_save.c	Thu Apr 05 10:43:50 2007 +0100
     4.2 +++ b/tools/libxc/xc_hvm_save.c	Thu Apr 05 15:11:22 2007 +0100
     4.3 @@ -305,6 +305,8 @@ int xc_hvm_save(int xc_handle, int io_fd
     4.4  
     4.5      unsigned long total_sent = 0;
     4.6  
     4.7 +    uint64_t vcpumap = 1ULL;
     4.8 +
     4.9      DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, "
    4.10              "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags,
    4.11              live, debug);
    4.12 @@ -371,6 +373,12 @@ int xc_hvm_save(int xc_handle, int io_fd
    4.13  
    4.14      /* Size of any array that covers 0 ... max_pfn */
    4.15      pfn_array_size = max_pfn + 1;
    4.16 +    if ( !write_exact(io_fd, &pfn_array_size, sizeof(unsigned long)) )
    4.17 +    {
    4.18 +        ERROR("Error when writing to state file (1)");
    4.19 +        goto out;
    4.20 +    }
    4.21 +    
    4.22  
    4.23      /* pretend we sent all the pages last iteration */
    4.24      sent_last_iter = pfn_array_size;
    4.25 @@ -644,6 +652,32 @@ int xc_hvm_save(int xc_handle, int io_fd
    4.26  
    4.27      DPRINTF("All HVM memory is saved\n");
    4.28  
    4.29 +    {
    4.30 +        struct {
    4.31 +            int minustwo;
    4.32 +            int max_vcpu_id;
    4.33 +            uint64_t vcpumap;
    4.34 +        } chunk = { -2, info.max_vcpu_id };
    4.35 +
    4.36 +        if (info.max_vcpu_id >= 64) {
    4.37 +            ERROR("Too many VCPUS in guest!");
    4.38 +            goto out;
    4.39 +        }
    4.40 +
    4.41 +        for (i = 1; i <= info.max_vcpu_id; i++) {
    4.42 +            xc_vcpuinfo_t vinfo;
    4.43 +            if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
    4.44 +                vinfo.online)
    4.45 +                vcpumap |= 1ULL << i;
    4.46 +        }
    4.47 +
    4.48 +        chunk.vcpumap = vcpumap;
    4.49 +        if(!write_exact(io_fd, &chunk, sizeof(chunk))) {
    4.50 +            ERROR("Error when writing to state file (errno %d)", errno);
    4.51 +            goto out;
    4.52 +        }
    4.53 +    }
    4.54 +
    4.55      /* Zero terminate */
    4.56      i = 0;
    4.57      if ( !write_exact(io_fd, &i, sizeof(int)) )
    4.58 @@ -666,33 +700,22 @@ int xc_hvm_save(int xc_handle, int io_fd
    4.59          goto out;
    4.60      }
    4.61  
    4.62 -    /* save vcpu/vmcs context */
    4.63 -    if ( !write_exact(io_fd, &nr_vcpus, sizeof(uint32_t)) )
    4.64 -    {
    4.65 -        ERROR("error write nr vcpus");
    4.66 -        goto out;
    4.67 -    }
    4.68 -
    4.69 -    /*XXX: need a online map to exclude down cpu */
    4.70 +    /* save vcpu/vmcs contexts */
    4.71      for ( i = 0; i < nr_vcpus; i++ )
    4.72      {
    4.73 +        if (!(vcpumap & (1ULL << i)))
    4.74 +            continue;
    4.75 +
    4.76          if ( xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
    4.77          {
    4.78              ERROR("HVM:Could not get vcpu context");
    4.79              goto out;
    4.80          }
    4.81  
    4.82 -        rec_size = sizeof(ctxt);
    4.83 -        DPRINTF("write %d vcpucontext of total %d.\n", i, nr_vcpus); 
    4.84 -        if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) )
    4.85 -        {
    4.86 -            ERROR("error write vcpu ctxt size");
    4.87 -            goto out;
    4.88 -        }
    4.89 -
    4.90 +        DPRINTF("write vcpu %d context.\n", i); 
    4.91          if ( !write_exact(io_fd, &(ctxt), sizeof(ctxt)) )
    4.92          {
    4.93 -            ERROR("write vmcs failed!\n");
    4.94 +            ERROR("write vcpu context failed!\n");
    4.95              goto out;
    4.96          }
    4.97      }
     5.1 --- a/tools/libxc/xc_linux_restore.c	Thu Apr 05 10:43:50 2007 +0100
     5.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.3 @@ -1,955 +0,0 @@
     5.4 -/******************************************************************************
     5.5 - * xc_linux_restore.c
     5.6 - *
     5.7 - * Restore the state of a Linux session.
     5.8 - *
     5.9 - * Copyright (c) 2003, K A Fraser.
    5.10 - */
    5.11 -
    5.12 -#include <stdlib.h>
    5.13 -#include <unistd.h>
    5.14 -
    5.15 -#include "xg_private.h"
    5.16 -#include "xg_save_restore.h"
    5.17 -#include "xc_dom.h"
    5.18 -
    5.19 -/* max mfn of the current host machine */
    5.20 -static unsigned long max_mfn;
    5.21 -
    5.22 -/* virtual starting address of the hypervisor */
    5.23 -static unsigned long hvirt_start;
    5.24 -
    5.25 -/* #levels of page tables used by the current guest */
    5.26 -static unsigned int pt_levels;
    5.27 -
    5.28 -/* number of pfns this guest has (i.e. number of entries in the P2M) */
    5.29 -static unsigned long p2m_size;
    5.30 -
    5.31 -/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
    5.32 -static unsigned long nr_pfns;
    5.33 -
    5.34 -/* Live mapping of the table mapping each PFN to its current MFN. */
    5.35 -static xen_pfn_t *live_p2m = NULL;
    5.36 -
    5.37 -/* A table mapping each PFN to its new MFN. */
    5.38 -static xen_pfn_t *p2m = NULL;
    5.39 -
    5.40 -/* A table of P2M mappings in the current region */
    5.41 -static xen_pfn_t *p2m_batch = NULL;
    5.42 -
    5.43 -static ssize_t
    5.44 -read_exact(int fd, void *buf, size_t count)
    5.45 -{
    5.46 -    int r = 0, s;
    5.47 -    unsigned char *b = buf;
    5.48 -
    5.49 -    while (r < count) {
    5.50 -        s = read(fd, &b[r], count - r);
    5.51 -        if ((s == -1) && (errno == EINTR))
    5.52 -            continue;
    5.53 -        if (s <= 0) {
    5.54 -            break;
    5.55 -        }
    5.56 -        r += s;
    5.57 -    }
    5.58 -
    5.59 -    return (r == count) ? 1 : 0;
    5.60 -}
    5.61 -
    5.62 -/*
    5.63 -** In the state file (or during transfer), all page-table pages are
    5.64 -** converted into a 'canonical' form where references to actual mfns
    5.65 -** are replaced with references to the corresponding pfns.
    5.66 -** This function inverts that operation, replacing the pfn values with
    5.67 -** the (now known) appropriate mfn values.
    5.68 -*/
    5.69 -static int uncanonicalize_pagetable(int xc_handle, uint32_t dom, 
    5.70 -                                    unsigned long type, void *page)
    5.71 -{
    5.72 -    int i, pte_last;
    5.73 -    unsigned long pfn;
    5.74 -    uint64_t pte;
    5.75 -    int nr_mfns = 0; 
    5.76 -
    5.77 -    pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8);
    5.78 -
    5.79 -    /* First pass: work out how many (if any) MFNs we need to alloc */
    5.80 -    for(i = 0; i < pte_last; i++) {
    5.81 -        
    5.82 -        if(pt_levels == 2)
    5.83 -            pte = ((uint32_t *)page)[i];
    5.84 -        else
    5.85 -            pte = ((uint64_t *)page)[i];
    5.86 -        
    5.87 -        /* XXX SMH: below needs fixing for PROT_NONE etc */
    5.88 -        if(!(pte & _PAGE_PRESENT))
    5.89 -            continue; 
    5.90 -        
    5.91 -        pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
    5.92 -        
    5.93 -        if(pfn >= p2m_size) {
    5.94 -            /* This "page table page" is probably not one; bail. */
    5.95 -            ERROR("Frame number in type %lu page table is out of range: "
    5.96 -                  "i=%d pfn=0x%lx p2m_size=%lu",
    5.97 -                  type >> 28, i, pfn, p2m_size);
    5.98 -            return 0;
    5.99 -        }
   5.100 -        
   5.101 -        if(p2m[pfn] == INVALID_P2M_ENTRY) {
   5.102 -            /* Have a 'valid' PFN without a matching MFN - need to alloc */
   5.103 -            p2m_batch[nr_mfns++] = pfn; 
   5.104 -        }
   5.105 -    }
   5.106 -    
   5.107 -    
   5.108 -    /* Alllocate the requistite number of mfns */
   5.109 -    if (nr_mfns && xc_domain_memory_populate_physmap(
   5.110 -            xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { 
   5.111 -        ERROR("Failed to allocate memory for batch.!\n"); 
   5.112 -        errno = ENOMEM;
   5.113 -        return 0; 
   5.114 -    }
   5.115 -    
   5.116 -    /* Second pass: uncanonicalize each present PTE */
   5.117 -    nr_mfns = 0;
   5.118 -    for(i = 0; i < pte_last; i++) {
   5.119 -
   5.120 -        if(pt_levels == 2)
   5.121 -            pte = ((uint32_t *)page)[i];
   5.122 -        else
   5.123 -            pte = ((uint64_t *)page)[i];
   5.124 -        
   5.125 -        /* XXX SMH: below needs fixing for PROT_NONE etc */
   5.126 -        if(!(pte & _PAGE_PRESENT))
   5.127 -            continue;
   5.128 -        
   5.129 -        pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
   5.130 -        
   5.131 -        if(p2m[pfn] == INVALID_P2M_ENTRY)
   5.132 -            p2m[pfn] = p2m_batch[nr_mfns++];
   5.133 -
   5.134 -        pte &= ~MADDR_MASK_X86;
   5.135 -        pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT;
   5.136 -
   5.137 -        if(pt_levels == 2)
   5.138 -            ((uint32_t *)page)[i] = (uint32_t)pte;
   5.139 -        else
   5.140 -            ((uint64_t *)page)[i] = (uint64_t)pte;
   5.141 -    }
   5.142 -
   5.143 -    return 1;
   5.144 -}
   5.145 -
   5.146 -
   5.147 -int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
   5.148 -                     unsigned int store_evtchn, unsigned long *store_mfn,
   5.149 -                     unsigned int console_evtchn, unsigned long *console_mfn)
   5.150 -{
   5.151 -    DECLARE_DOMCTL;
   5.152 -    int rc = 1, i, j, n, m, pae_extended_cr3 = 0;
   5.153 -    unsigned long mfn, pfn;
   5.154 -    unsigned int prev_pc, this_pc;
   5.155 -    int verify = 0;
   5.156 -    int nraces = 0;
   5.157 -
   5.158 -    /* The new domain's shared-info frame number. */
   5.159 -    unsigned long shared_info_frame;
   5.160 -    unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
   5.161 -    shared_info_t *shared_info = (shared_info_t *)shared_info_page;
   5.162 -
   5.163 -    /* A copy of the CPU context of the guest. */
   5.164 -    vcpu_guest_context_t ctxt;
   5.165 -
   5.166 -    /* A table containing the type of each PFN (/not/ MFN!). */
   5.167 -    unsigned long *pfn_type = NULL;
   5.168 -
   5.169 -    /* A table of MFNs to map in the current region */
   5.170 -    xen_pfn_t *region_mfn = NULL;
   5.171 -
   5.172 -    /* Types of the pfns in the current region */
   5.173 -    unsigned long region_pfn_type[MAX_BATCH_SIZE];
   5.174 -
   5.175 -    /* A temporary mapping, and a copy, of one frame of guest memory. */
   5.176 -    unsigned long *page = NULL;
   5.177 -
   5.178 -    /* A copy of the pfn-to-mfn table frame list. */
   5.179 -    xen_pfn_t *p2m_frame_list = NULL;
   5.180 -
   5.181 -    /* A temporary mapping of the guest's start_info page. */
   5.182 -    start_info_t *start_info;
   5.183 -
   5.184 -    /* Our mapping of the current region (batch) */
   5.185 -    char *region_base;
   5.186 -
   5.187 -    xc_mmu_t *mmu = NULL;
   5.188 -
   5.189 -    /* used by debug verify code */
   5.190 -    unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
   5.191 -
   5.192 -    struct mmuext_op pin[MAX_PIN_BATCH];
   5.193 -    unsigned int nr_pins;
   5.194 -
   5.195 -    uint64_t vcpumap = 1ULL;
   5.196 -    unsigned int max_vcpu_id = 0;
   5.197 -    int new_ctxt_format = 0;
   5.198 -
   5.199 -    /* For info only */
   5.200 -    nr_pfns = 0;
   5.201 -
   5.202 -    if ( !read_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
   5.203 -    {
   5.204 -        ERROR("read: p2m_size");
   5.205 -        goto out;
   5.206 -    }
   5.207 -    DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size);
   5.208 -
   5.209 -    /*
   5.210 -     * XXX For now, 32bit dom0's can only save/restore 32bit domUs
   5.211 -     * on 64bit hypervisors.
   5.212 -     */
   5.213 -    memset(&domctl, 0, sizeof(domctl));
   5.214 -    domctl.domain = dom;
   5.215 -    domctl.cmd    = XEN_DOMCTL_set_address_size;
   5.216 -    domctl.u.address_size.size = sizeof(unsigned long) * 8;
   5.217 -    rc = do_domctl(xc_handle, &domctl);
   5.218 -    if ( rc != 0 ) {
   5.219 -	ERROR("Unable to set guest address size.");
   5.220 -	goto out;
   5.221 -    }
   5.222 -
   5.223 -    if(!get_platform_info(xc_handle, dom,
   5.224 -                          &max_mfn, &hvirt_start, &pt_levels)) {
   5.225 -        ERROR("Unable to get platform info.");
   5.226 -        return 1;
   5.227 -    }
   5.228 -
   5.229 -    if (lock_pages(&ctxt, sizeof(ctxt))) {
   5.230 -        /* needed for build domctl, but might as well do early */
   5.231 -        ERROR("Unable to lock ctxt");
   5.232 -        return 1;
   5.233 -    }
   5.234 -
   5.235 -    if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
   5.236 -        ERROR("Couldn't allocate p2m_frame_list array");
   5.237 -        goto out;
   5.238 -    }
   5.239 -
   5.240 -    /* Read first entry of P2M list, or extended-info signature (~0UL). */
   5.241 -    if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
   5.242 -        ERROR("read extended-info signature failed");
   5.243 -        goto out;
   5.244 -    }
   5.245 -
   5.246 -    if (p2m_frame_list[0] == ~0UL) {
   5.247 -        uint32_t tot_bytes;
   5.248 -
   5.249 -        /* Next 4 bytes: total size of following extended info. */
   5.250 -        if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) {
   5.251 -            ERROR("read extended-info size failed");
   5.252 -            goto out;
   5.253 -        }
   5.254 -
   5.255 -        while (tot_bytes) {
   5.256 -            uint32_t chunk_bytes;
   5.257 -            char     chunk_sig[4];
   5.258 -
   5.259 -            /* 4-character chunk signature + 4-byte remaining chunk size. */
   5.260 -            if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
   5.261 -                !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) {
   5.262 -                ERROR("read extended-info chunk signature failed");
   5.263 -                goto out;
   5.264 -            }
   5.265 -            tot_bytes -= 8;
   5.266 -
   5.267 -            /* VCPU context structure? */
   5.268 -            if (!strncmp(chunk_sig, "vcpu", 4)) {
   5.269 -                if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
   5.270 -                    ERROR("read extended-info vcpu context failed");
   5.271 -                    goto out;
   5.272 -                }
   5.273 -                tot_bytes   -= sizeof(struct vcpu_guest_context);
   5.274 -                chunk_bytes -= sizeof(struct vcpu_guest_context);
   5.275 -
   5.276 -                if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))
   5.277 -                    pae_extended_cr3 = 1;
   5.278 -            }
   5.279 -
   5.280 -            /* Any remaining bytes of this chunk: read and discard. */
   5.281 -            while (chunk_bytes) {
   5.282 -                unsigned long sz = chunk_bytes;
   5.283 -                if ( sz > P2M_FL_SIZE )
   5.284 -                    sz = P2M_FL_SIZE;
   5.285 -                if (!read_exact(io_fd, p2m_frame_list, sz)) {
   5.286 -                    ERROR("read-and-discard extended-info chunk bytes failed");
   5.287 -                    goto out;
   5.288 -                }
   5.289 -                chunk_bytes -= sz;
   5.290 -                tot_bytes   -= sz;
   5.291 -            }
   5.292 -        }
   5.293 -
   5.294 -        /* Now read the real first entry of P2M list. */
   5.295 -        if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
   5.296 -            ERROR("read first entry of p2m_frame_list failed");
   5.297 -            goto out;
   5.298 -        }
   5.299 -    }
   5.300 -
   5.301 -    /* First entry is already read into the p2m array. */
   5.302 -    if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) {
   5.303 -        ERROR("read p2m_frame_list failed");
   5.304 -        goto out;
   5.305 -    }
   5.306 -
   5.307 -    /* We want zeroed memory so use calloc rather than malloc. */
   5.308 -    p2m        = calloc(p2m_size, sizeof(xen_pfn_t));
   5.309 -    pfn_type   = calloc(p2m_size, sizeof(unsigned long));
   5.310 -    region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
   5.311 -    p2m_batch  = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
   5.312 -
   5.313 -    if ((p2m == NULL) || (pfn_type == NULL) ||
   5.314 -        (region_mfn == NULL) || (p2m_batch == NULL)) {
   5.315 -        ERROR("memory alloc failed");
   5.316 -        errno = ENOMEM;
   5.317 -        goto out;
   5.318 -    }
   5.319 -
   5.320 -    if (lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
   5.321 -        ERROR("Could not lock region_mfn");
   5.322 -        goto out;
   5.323 -    }
   5.324 -
   5.325 -    if (lock_pages(p2m_batch, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
   5.326 -        ERROR("Could not lock p2m_batch");
   5.327 -        goto out;
   5.328 -    }
   5.329 -
   5.330 -    /* Get the domain's shared-info frame. */
   5.331 -    domctl.cmd = XEN_DOMCTL_getdomaininfo;
   5.332 -    domctl.domain = (domid_t)dom;
   5.333 -    if (xc_domctl(xc_handle, &domctl) < 0) {
   5.334 -        ERROR("Could not get information on new domain");
   5.335 -        goto out;
   5.336 -    }
   5.337 -    shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
   5.338 -
   5.339 -    /* Mark all PFNs as invalid; we allocate on demand */
   5.340 -    for ( pfn = 0; pfn < p2m_size; pfn++ )
   5.341 -        p2m[pfn] = INVALID_P2M_ENTRY;
   5.342 -
   5.343 -    if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) {
   5.344 -        ERROR("Could not initialise for MMU updates");
   5.345 -        goto out;
   5.346 -    }
   5.347 -
   5.348 -    DPRINTF("Reloading memory pages:   0%%\n");
   5.349 -
   5.350 -    /*
   5.351 -     * Now simply read each saved frame into its new machine frame.
   5.352 -     * We uncanonicalise page tables as we go.
   5.353 -     */
   5.354 -    prev_pc = 0;
   5.355 -
   5.356 -    n = m = 0;
   5.357 -    while (1) {
   5.358 -
   5.359 -        int j, nr_mfns = 0; 
   5.360 -
   5.361 -        this_pc = (n * 100) / p2m_size;
   5.362 -        if ( (this_pc - prev_pc) >= 5 )
   5.363 -        {
   5.364 -            PPRINTF("\b\b\b\b%3d%%", this_pc);
   5.365 -            prev_pc = this_pc;
   5.366 -        }
   5.367 -
   5.368 -        if (!read_exact(io_fd, &j, sizeof(int))) {
   5.369 -            ERROR("Error when reading batch size");
   5.370 -            goto out;
   5.371 -        }
   5.372 -
   5.373 -        PPRINTF("batch %d\n",j);
   5.374 -
   5.375 -        if (j == -1) {
   5.376 -            verify = 1;
   5.377 -            DPRINTF("Entering page verify mode\n");
   5.378 -            continue;
   5.379 -        }
   5.380 -
   5.381 -        if (j == -2) {
   5.382 -            new_ctxt_format = 1;
   5.383 -            if (!read_exact(io_fd, &max_vcpu_id, sizeof(int)) ||
   5.384 -                (max_vcpu_id >= 64) ||
   5.385 -                !read_exact(io_fd, &vcpumap, sizeof(uint64_t))) {
   5.386 -                ERROR("Error when reading max_vcpu_id");
   5.387 -                goto out;
   5.388 -            }
   5.389 -            continue;
   5.390 -        }
   5.391 -
   5.392 -        if (j == 0)
   5.393 -            break;  /* our work here is done */
   5.394 -
   5.395 -        if (j > MAX_BATCH_SIZE) {
   5.396 -            ERROR("Max batch size exceeded. Giving up.");
   5.397 -            goto out;
   5.398 -        }
   5.399 -
   5.400 -        if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) {
   5.401 -            ERROR("Error when reading region pfn types");
   5.402 -            goto out;
   5.403 -        }
   5.404 -
   5.405 -        /* First pass for this batch: work out how much memory to alloc */
   5.406 -        nr_mfns = 0; 
   5.407 -        for ( i = 0; i < j; i++ )
   5.408 -        {
   5.409 -            unsigned long pfn, pagetype;
   5.410 -            pfn      = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
   5.411 -            pagetype = region_pfn_type[i] &  XEN_DOMCTL_PFINFO_LTAB_MASK;
   5.412 -
   5.413 -            if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) && 
   5.414 -                 (p2m[pfn] == INVALID_P2M_ENTRY) )
   5.415 -            {
   5.416 -                /* Have a live PFN which hasn't had an MFN allocated */
   5.417 -                p2m_batch[nr_mfns++] = pfn; 
   5.418 -            }
   5.419 -        } 
   5.420 -
   5.421 -
   5.422 -        /* Now allocate a bunch of mfns for this batch */
   5.423 -        if (nr_mfns && xc_domain_memory_populate_physmap(
   5.424 -                xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { 
   5.425 -            ERROR("Failed to allocate memory for batch.!\n"); 
   5.426 -            errno = ENOMEM;
   5.427 -            goto out;
   5.428 -        }
   5.429 -
   5.430 -        /* Second pass for this batch: update p2m[] and region_mfn[] */
   5.431 -        nr_mfns = 0; 
   5.432 -        for ( i = 0; i < j; i++ )
   5.433 -        {
   5.434 -            unsigned long pfn, pagetype;
   5.435 -            pfn      = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
   5.436 -            pagetype = region_pfn_type[i] &  XEN_DOMCTL_PFINFO_LTAB_MASK;
   5.437 -
   5.438 -            if ( pagetype == XEN_DOMCTL_PFINFO_XTAB)
   5.439 -                region_mfn[i] = ~0UL; /* map will fail but we don't care */
   5.440 -            else 
   5.441 -            {
   5.442 -                if (p2m[pfn] == INVALID_P2M_ENTRY) {
   5.443 -                    /* We just allocated a new mfn above; update p2m */
   5.444 -                    p2m[pfn] = p2m_batch[nr_mfns++]; 
   5.445 -                    nr_pfns++; 
   5.446 -                }
   5.447 -
   5.448 -                /* setup region_mfn[] for batch map */
   5.449 -                region_mfn[i] = p2m[pfn]; 
   5.450 -            }
   5.451 -        } 
   5.452 -
   5.453 -        /* Map relevant mfns */
   5.454 -        region_base = xc_map_foreign_batch(
   5.455 -            xc_handle, dom, PROT_WRITE, region_mfn, j);
   5.456 -
   5.457 -        if ( region_base == NULL )
   5.458 -        {
   5.459 -            ERROR("map batch failed");
   5.460 -            goto out;
   5.461 -        }
   5.462 -
   5.463 -        for ( i = 0; i < j; i++ )
   5.464 -        {
   5.465 -            void *page;
   5.466 -            unsigned long pagetype;
   5.467 -
   5.468 -            pfn      = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
   5.469 -            pagetype = region_pfn_type[i] &  XEN_DOMCTL_PFINFO_LTAB_MASK;
   5.470 -
   5.471 -            if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
   5.472 -                /* a bogus/unmapped page: skip it */
   5.473 -                continue;
   5.474 -
   5.475 -            if ( pfn > p2m_size )
   5.476 -            {
   5.477 -                ERROR("pfn out of range");
   5.478 -                goto out;
   5.479 -            }
   5.480 -
   5.481 -            pfn_type[pfn] = pagetype;
   5.482 -
   5.483 -            mfn = p2m[pfn];
   5.484 -
   5.485 -            /* In verify mode, we use a copy; otherwise we work in place */
   5.486 -            page = verify ? (void *)buf : (region_base + i*PAGE_SIZE);
   5.487 -
   5.488 -            if (!read_exact(io_fd, page, PAGE_SIZE)) {
   5.489 -                ERROR("Error when reading page (type was %lx)", pagetype);
   5.490 -                goto out;
   5.491 -            }
   5.492 -
   5.493 -            pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
   5.494 -
   5.495 -            if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) && 
   5.496 -                 (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
   5.497 -            {
   5.498 -                /*
   5.499 -                ** A page table page - need to 'uncanonicalize' it, i.e.
   5.500 -                ** replace all the references to pfns with the corresponding
   5.501 -                ** mfns for the new domain.
   5.502 -                **
   5.503 -                ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
   5.504 -                ** so we may need to update the p2m after the main loop.
   5.505 -                ** Hence we defer canonicalization of L1s until then.
   5.506 -                */
   5.507 -                if ((pt_levels != 3) ||
   5.508 -                    pae_extended_cr3 ||
   5.509 -                    (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) {
   5.510 -
   5.511 -                    if (!uncanonicalize_pagetable(xc_handle, dom, 
   5.512 -                                                  pagetype, page)) {
   5.513 -                        /*
   5.514 -                        ** Failing to uncanonicalize a page table can be ok
   5.515 -                        ** under live migration since the pages type may have
   5.516 -                        ** changed by now (and we'll get an update later).
   5.517 -                        */
   5.518 -                        DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
   5.519 -                                pagetype >> 28, pfn, mfn);
   5.520 -                        nraces++;
   5.521 -                        continue;
   5.522 -                    } 
   5.523 -                }
   5.524 -            }
   5.525 -            else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
   5.526 -            {
   5.527 -                ERROR("Bogus page type %lx page table is out of range: "
   5.528 -                    "i=%d p2m_size=%lu", pagetype, i, p2m_size);
   5.529 -                goto out;
   5.530 -
   5.531 -            }
   5.532 -
   5.533 -
   5.534 -            if (verify) {
   5.535 -
   5.536 -                int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
   5.537 -
   5.538 -                if (res) {
   5.539 -
   5.540 -                    int v;
   5.541 -
   5.542 -                    DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
   5.543 -                            "actualcs=%08lx\n", pfn, pfn_type[pfn],
   5.544 -                            csum_page(region_base + i*PAGE_SIZE),
   5.545 -                            csum_page(buf));
   5.546 -
   5.547 -                    for (v = 0; v < 4; v++) {
   5.548 -
   5.549 -                        unsigned long *p = (unsigned long *)
   5.550 -                            (region_base + i*PAGE_SIZE);
   5.551 -                        if (buf[v] != p[v])
   5.552 -                            DPRINTF("    %d: %08lx %08lx\n", v, buf[v], p[v]);
   5.553 -                    }
   5.554 -                }
   5.555 -            }
   5.556 -
   5.557 -            if (xc_add_mmu_update(xc_handle, mmu,
   5.558 -                                  (((unsigned long long)mfn) << PAGE_SHIFT)
   5.559 -                                  | MMU_MACHPHYS_UPDATE, pfn)) {
   5.560 -                ERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
   5.561 -                goto out;
   5.562 -            }
   5.563 -        } /* end of 'batch' for loop */
   5.564 -
   5.565 -        munmap(region_base, j*PAGE_SIZE);
   5.566 -        n+= j; /* crude stats */
   5.567 -
   5.568 -        /* 
   5.569 -         * Discard cache for portion of file read so far up to last
   5.570 -         *  page boundary every 16MB or so.
   5.571 -         */
   5.572 -        m += j;
   5.573 -        if ( m > MAX_PAGECACHE_USAGE )
   5.574 -        {
   5.575 -            discard_file_cache(io_fd, 0 /* no flush */);
   5.576 -            m = 0;
   5.577 -        }
   5.578 -    }
   5.579 -
   5.580 -    /*
   5.581 -     * Ensure we flush all machphys updates before potential PAE-specific
   5.582 -     * reallocations below.
   5.583 -     */
   5.584 -    if (xc_finish_mmu_updates(xc_handle, mmu)) {
   5.585 -        ERROR("Error doing finish_mmu_updates()");
   5.586 -        goto out;
   5.587 -    }
   5.588 -
   5.589 -    DPRINTF("Received all pages (%d races)\n", nraces);
   5.590 -
   5.591 -    if ((pt_levels == 3) && !pae_extended_cr3) {
   5.592 -
   5.593 -        /*
   5.594 -        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
   5.595 -        ** is a little awkward and involves (a) finding all such PGDs and
   5.596 -        ** replacing them with 'lowmem' versions; (b) upating the p2m[]
   5.597 -        ** with the new info; and (c) canonicalizing all the L1s using the
   5.598 -        ** (potentially updated) p2m[].
   5.599 -        **
   5.600 -        ** This is relatively slow (and currently involves two passes through
   5.601 -        ** the pfn_type[] array), but at least seems to be correct. May wish
   5.602 -        ** to consider more complex approaches to optimize this later.
   5.603 -        */
   5.604 -
   5.605 -        int j, k;
   5.606 -        
   5.607 -        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
   5.608 -        for ( i = 0; i < p2m_size; i++ )
   5.609 -        {
   5.610 -            if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
   5.611 -                  XEN_DOMCTL_PFINFO_L3TAB) &&
   5.612 -                 (p2m[i] > 0xfffffUL) )
   5.613 -            {
   5.614 -                unsigned long new_mfn;
   5.615 -                uint64_t l3ptes[4];
   5.616 -                uint64_t *l3tab;
   5.617 -
   5.618 -                l3tab = (uint64_t *)
   5.619 -                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
   5.620 -                                         PROT_READ, p2m[i]);
   5.621 -
   5.622 -                for(j = 0; j < 4; j++)
   5.623 -                    l3ptes[j] = l3tab[j];
   5.624 -
   5.625 -                munmap(l3tab, PAGE_SIZE);
   5.626 -
   5.627 -                if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
   5.628 -                    ERROR("Couldn't get a page below 4GB :-(");
   5.629 -                    goto out;
   5.630 -                }
   5.631 -
   5.632 -                p2m[i] = new_mfn;
   5.633 -                if (xc_add_mmu_update(xc_handle, mmu,
   5.634 -                                      (((unsigned long long)new_mfn)
   5.635 -                                       << PAGE_SHIFT) |
   5.636 -                                      MMU_MACHPHYS_UPDATE, i)) {
   5.637 -                    ERROR("Couldn't m2p on PAE root pgdir");
   5.638 -                    goto out;
   5.639 -                }
   5.640 -
   5.641 -                l3tab = (uint64_t *)
   5.642 -                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
   5.643 -                                         PROT_READ | PROT_WRITE, p2m[i]);
   5.644 -
   5.645 -                for(j = 0; j < 4; j++)
   5.646 -                    l3tab[j] = l3ptes[j];
   5.647 -
   5.648 -                munmap(l3tab, PAGE_SIZE);
   5.649 -
   5.650 -            }
   5.651 -        }
   5.652 -
   5.653 -        /* Second pass: find all L1TABs and uncanonicalize them */
   5.654 -        j = 0;
   5.655 -
   5.656 -        for ( i = 0; i < p2m_size; i++ )
   5.657 -        {
   5.658 -            if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
   5.659 -                  XEN_DOMCTL_PFINFO_L1TAB) )
   5.660 -            {
   5.661 -                region_mfn[j] = p2m[i];
   5.662 -                j++;
   5.663 -            }
   5.664 -
   5.665 -            if(i == (p2m_size-1) || j == MAX_BATCH_SIZE) {
   5.666 -
   5.667 -                if (!(region_base = xc_map_foreign_batch(
   5.668 -                          xc_handle, dom, PROT_READ | PROT_WRITE,
   5.669 -                          region_mfn, j))) {
   5.670 -                    ERROR("map batch failed");
   5.671 -                    goto out;
   5.672 -                }
   5.673 -
   5.674 -                for(k = 0; k < j; k++) {
   5.675 -                    if(!uncanonicalize_pagetable(xc_handle, dom, 
   5.676 -                                                 XEN_DOMCTL_PFINFO_L1TAB,
   5.677 -                                                 region_base + k*PAGE_SIZE)) {
   5.678 -                        ERROR("failed uncanonicalize pt!");
   5.679 -                        goto out;
   5.680 -                    }
   5.681 -                }
   5.682 -
   5.683 -                munmap(region_base, j*PAGE_SIZE);
   5.684 -                j = 0;
   5.685 -            }
   5.686 -        }
   5.687 -
   5.688 -        if (xc_finish_mmu_updates(xc_handle, mmu)) {
   5.689 -            ERROR("Error doing finish_mmu_updates()");
   5.690 -            goto out;
   5.691 -        }
   5.692 -    }
   5.693 -
   5.694 -    /*
   5.695 -     * Pin page tables. Do this after writing to them as otherwise Xen
   5.696 -     * will barf when doing the type-checking.
   5.697 -     */
   5.698 -    nr_pins = 0;
   5.699 -    for ( i = 0; i < p2m_size; i++ )
   5.700 -    {
   5.701 -        if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
   5.702 -            continue;
   5.703 -
   5.704 -        switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
   5.705 -        {
   5.706 -        case XEN_DOMCTL_PFINFO_L1TAB:
   5.707 -            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
   5.708 -            break;
   5.709 -
   5.710 -        case XEN_DOMCTL_PFINFO_L2TAB:
   5.711 -            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
   5.712 -            break;
   5.713 -
   5.714 -        case XEN_DOMCTL_PFINFO_L3TAB:
   5.715 -            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
   5.716 -            break;
   5.717 -
   5.718 -        case XEN_DOMCTL_PFINFO_L4TAB:
   5.719 -            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
   5.720 -            break;
   5.721 -
   5.722 -        default:
   5.723 -            continue;
   5.724 -        }
   5.725 -
   5.726 -        pin[nr_pins].arg1.mfn = p2m[i];
   5.727 -        nr_pins++;
   5.728 -
   5.729 -        /* Batch full? Then flush. */
   5.730 -        if (nr_pins == MAX_PIN_BATCH) {
   5.731 -            if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) {
   5.732 -                ERROR("Failed to pin batch of %d page tables", nr_pins);
   5.733 -                goto out;
   5.734 -            }
   5.735 -            nr_pins = 0;
   5.736 -        }
   5.737 -    }
   5.738 -
   5.739 -    /* Flush final partial batch. */
   5.740 -    if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) {
   5.741 -        ERROR("Failed to pin batch of %d page tables", nr_pins);
   5.742 -        goto out;
   5.743 -    }
   5.744 -
   5.745 -    DPRINTF("\b\b\b\b100%%\n");
   5.746 -    DPRINTF("Memory reloaded (%ld pages)\n", nr_pfns);
   5.747 -
   5.748 -    /* Get the list of PFNs that are not in the psuedo-phys map */
   5.749 -    {
   5.750 -        unsigned int count;
   5.751 -        unsigned long *pfntab;
   5.752 -        int nr_frees, rc;
   5.753 -
   5.754 -        if (!read_exact(io_fd, &count, sizeof(count))) {
   5.755 -            ERROR("Error when reading pfn count");
   5.756 -            goto out;
   5.757 -        }
   5.758 -
   5.759 -        if(!(pfntab = malloc(sizeof(unsigned long) * count))) {
   5.760 -            ERROR("Out of memory");
   5.761 -            goto out;
   5.762 -        }
   5.763 -
   5.764 -        if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) {
   5.765 -            ERROR("Error when reading pfntab");
   5.766 -            goto out;
   5.767 -        }
   5.768 -
   5.769 -        nr_frees = 0; 
   5.770 -        for (i = 0; i < count; i++) {
   5.771 -
   5.772 -            unsigned long pfn = pfntab[i];
   5.773 -
   5.774 -            if(p2m[pfn] != INVALID_P2M_ENTRY) {
   5.775 -                /* pfn is not in physmap now, but was at some point during 
   5.776 -                   the save/migration process - need to free it */
   5.777 -                pfntab[nr_frees++] = p2m[pfn];
   5.778 -                p2m[pfn]  = INVALID_P2M_ENTRY; // not in pseudo-physical map
   5.779 -            }
   5.780 -        }
   5.781 -
   5.782 -        if (nr_frees > 0) {
   5.783 -
   5.784 -            struct xen_memory_reservation reservation = {
   5.785 -                .nr_extents   = nr_frees,
   5.786 -                .extent_order = 0,
   5.787 -                .domid        = dom
   5.788 -            };
   5.789 -            set_xen_guest_handle(reservation.extent_start, pfntab);
   5.790 -
   5.791 -            if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
   5.792 -                                   &reservation)) != nr_frees) {
   5.793 -                ERROR("Could not decrease reservation : %d", rc);
   5.794 -                goto out;
   5.795 -            } else
   5.796 -                DPRINTF("Decreased reservation by %d pages\n", count);
   5.797 -        }
   5.798 -    }
   5.799 -
   5.800 -    for (i = 0; i <= max_vcpu_id; i++) {
   5.801 -        if (!(vcpumap & (1ULL << i)))
   5.802 -            continue;
   5.803 -
   5.804 -        if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
   5.805 -            ERROR("Error when reading ctxt %d", i);
   5.806 -            goto out;
   5.807 -        }
   5.808 -
   5.809 -        if ( !new_ctxt_format )
   5.810 -            ctxt.flags |= VGCF_online;
   5.811 -
   5.812 -        if (i == 0) {
   5.813 -            /*
   5.814 -             * Uncanonicalise the suspend-record frame number and poke
   5.815 -             * resume record.
   5.816 -             */
   5.817 -            pfn = ctxt.user_regs.edx;
   5.818 -            if ((pfn >= p2m_size) ||
   5.819 -                (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
   5.820 -                ERROR("Suspend record frame number is bad");
   5.821 -                goto out;
   5.822 -            }
   5.823 -            ctxt.user_regs.edx = mfn = p2m[pfn];
   5.824 -            start_info = xc_map_foreign_range(
   5.825 -                xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
   5.826 -            start_info->nr_pages = p2m_size;
   5.827 -            start_info->shared_info = shared_info_frame << PAGE_SHIFT;
   5.828 -            start_info->flags = 0;
   5.829 -            *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn];
   5.830 -            start_info->store_evtchn = store_evtchn;
   5.831 -            start_info->console.domU.mfn = p2m[start_info->console.domU.mfn];
   5.832 -            start_info->console.domU.evtchn = console_evtchn;
   5.833 -            *console_mfn = start_info->console.domU.mfn;
   5.834 -            munmap(start_info, PAGE_SIZE);
   5.835 -        }
   5.836 -
   5.837 -        /* Uncanonicalise each GDT frame number. */
   5.838 -        if (ctxt.gdt_ents > 8192) {
   5.839 -            ERROR("GDT entry count out of range");
   5.840 -            goto out;
   5.841 -        }
   5.842 -
   5.843 -        for (j = 0; (512*j) < ctxt.gdt_ents; j++) {
   5.844 -            pfn = ctxt.gdt_frames[j];
   5.845 -            if ((pfn >= p2m_size) ||
   5.846 -                (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
   5.847 -                ERROR("GDT frame number is bad");
   5.848 -                goto out;
   5.849 -            }
   5.850 -            ctxt.gdt_frames[j] = p2m[pfn];
   5.851 -        }
   5.852 -
   5.853 -        /* Uncanonicalise the page table base pointer. */
   5.854 -        pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]);
   5.855 -
   5.856 -        if (pfn >= p2m_size) {
   5.857 -            ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
   5.858 -                  pfn, p2m_size, pfn_type[pfn]);
   5.859 -            goto out;
   5.860 -        }
   5.861 -
   5.862 -        if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
   5.863 -             ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
   5.864 -            ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
   5.865 -                  pfn, p2m_size, pfn_type[pfn],
   5.866 -                  (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
   5.867 -            goto out;
   5.868 -        }
   5.869 -
   5.870 -        ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]);
   5.871 -
   5.872 -        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
   5.873 -        if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
   5.874 -        {
   5.875 -            pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]);
   5.876 -
   5.877 -            if (pfn >= p2m_size) {
   5.878 -                ERROR("User PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
   5.879 -                      pfn, p2m_size, pfn_type[pfn]);
   5.880 -                goto out;
   5.881 -            }
   5.882 -
   5.883 -            if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
   5.884 -                 ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
   5.885 -                ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
   5.886 -                      pfn, p2m_size, pfn_type[pfn],
   5.887 -                      (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
   5.888 -                goto out;
   5.889 -            }
   5.890 -
   5.891 -            ctxt.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]);
   5.892 -        }
   5.893 -
   5.894 -        domctl.cmd = XEN_DOMCTL_setvcpucontext;
   5.895 -        domctl.domain = (domid_t)dom;
   5.896 -        domctl.u.vcpucontext.vcpu = i;
   5.897 -        set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt);
   5.898 -        rc = xc_domctl(xc_handle, &domctl);
   5.899 -        if (rc != 0) {
   5.900 -            ERROR("Couldn't build vcpu%d", i);
   5.901 -            goto out;
   5.902 -        }
   5.903 -    }
   5.904 -
   5.905 -    if (!read_exact(io_fd, shared_info_page, PAGE_SIZE)) {
   5.906 -        ERROR("Error when reading shared info page");
   5.907 -        goto out;
   5.908 -    }
   5.909 -
   5.910 -    /* clear any pending events and the selector */
   5.911 -    memset(&(shared_info->evtchn_pending[0]), 0,
   5.912 -           sizeof (shared_info->evtchn_pending));
   5.913 -    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
   5.914 -        shared_info->vcpu_info[i].evtchn_pending_sel = 0;
   5.915 -
   5.916 -    /* Copy saved contents of shared-info page. No checking needed. */
   5.917 -    page = xc_map_foreign_range(
   5.918 -        xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
   5.919 -    memcpy(page, shared_info, PAGE_SIZE);
   5.920 -    munmap(page, PAGE_SIZE);
   5.921 -
   5.922 -    /* Uncanonicalise the pfn-to-mfn table frame-number list. */
   5.923 -    for (i = 0; i < P2M_FL_ENTRIES; i++) {
   5.924 -        pfn = p2m_frame_list[i];
   5.925 -        if ((pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
   5.926 -            ERROR("PFN-to-MFN frame number is bad");
   5.927 -            goto out;
   5.928 -        }
   5.929 -
   5.930 -        p2m_frame_list[i] = p2m[pfn];
   5.931 -    }
   5.932 -
   5.933 -    /* Copy the P2M we've constructed to the 'live' P2M */
   5.934 -    if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE,
   5.935 -                                          p2m_frame_list, P2M_FL_ENTRIES))) {
   5.936 -        ERROR("Couldn't map p2m table");
   5.937 -        goto out;
   5.938 -    }
   5.939 -
   5.940 -    memcpy(live_p2m, p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
   5.941 -    munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
   5.942 -
   5.943 -    DPRINTF("Domain ready to be built.\n");
   5.944 -
   5.945 - out:
   5.946 -    if ( (rc != 0) && (dom != 0) )
   5.947 -        xc_domain_destroy(xc_handle, dom);
   5.948 -    free(mmu);
   5.949 -    free(p2m);
   5.950 -    free(pfn_type);
   5.951 -
   5.952 -    /* discard cache for save file  */
   5.953 -    discard_file_cache(io_fd, 1 /*flush*/);
   5.954 -
   5.955 -    DPRINTF("Restore exit with rc=%d\n", rc);
   5.956 -    
   5.957 -    return rc;
   5.958 -}
     6.1 --- a/tools/libxc/xenguest.h	Thu Apr 05 10:43:50 2007 +0100
     6.2 +++ b/tools/libxc/xenguest.h	Thu Apr 05 15:11:22 2007 +0100
     6.3 @@ -38,29 +38,21 @@ int xc_hvm_save(int xc_handle, int io_fd
     6.4                  void (*qemu_flip_buffer)(int, int));
     6.5  
     6.6  /**
     6.7 - * This function will restore a saved domain running Linux.
     6.8 + * This function will restore a saved domain.
     6.9   *
    6.10   * @parm xc_handle a handle to an open hypervisor interface
    6.11   * @parm fd the file descriptor to restore a domain from
    6.12   * @parm dom the id of the domain
    6.13   * @parm store_evtchn the store event channel for this domain to use
    6.14   * @parm store_mfn returned with the mfn of the store page
    6.15 + * @parm hvm non-zero if this is a HVM restore
    6.16 + * @parm pae non-zero if this HVM domain has PAE support enabled
    6.17   * @return 0 on success, -1 on failure
    6.18   */
    6.19 -int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
    6.20 -                     unsigned int store_evtchn, unsigned long *store_mfn,
    6.21 -                     unsigned int console_evtchn, unsigned long *console_mfn);
    6.22 -
    6.23 -/**
    6.24 - * This function will restore a saved hvm domain running unmodified guest.
    6.25 - *
    6.26 - * @parm store_mfn pass mem size & returned with the mfn of the store page
    6.27 - * @return 0 on success, -1 on failure
    6.28 - */
    6.29 -int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom,
    6.30 -                   unsigned int store_evtchn,
    6.31 -                   unsigned long *store_mfn, 
    6.32 -                   unsigned int pae, unsigned int apic);
    6.33 +int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
    6.34 +                      unsigned int store_evtchn, unsigned long *store_mfn,
    6.35 +                      unsigned int console_evtchn, unsigned long *console_mfn,
    6.36 +                      unsigned int hvm, unsigned int pae);
    6.37  
    6.38  /**
    6.39   * This function will create a domain for a paravirtualized Linux
     7.1 --- a/tools/libxc/xg_private.c	Thu Apr 05 10:43:50 2007 +0100
     7.2 +++ b/tools/libxc/xg_private.c	Thu Apr 05 15:11:22 2007 +0100
     7.3 @@ -209,16 +209,6 @@ unsigned long csum_page(void *page)
     7.4      return -1;
     7.5  }
     7.6  
     7.7 -__attribute__((weak)) 
     7.8 -    int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom,
     7.9 -                       unsigned int store_evtchn,
    7.10 -                       unsigned long *store_mfn,
    7.11 -                       unsigned int pae, unsigned int apic)
    7.12 -{
    7.13 -    errno = ENOSYS;
    7.14 -    return -1;
    7.15 -}
    7.16 -
    7.17  __attribute__((weak)) int xc_get_hvm_param(
    7.18      int handle, domid_t dom, int param, unsigned long *value)
    7.19  {
     8.1 --- a/tools/xcutils/xc_restore.c	Thu Apr 05 10:43:50 2007 +0100
     8.2 +++ b/tools/xcutils/xc_restore.c	Thu Apr 05 15:11:22 2007 +0100
     8.3 @@ -39,14 +39,8 @@ main(int argc, char **argv)
     8.4      pae  = atoi(argv[6]);
     8.5      apic = atoi(argv[7]);
     8.6  
     8.7 -    if ( hvm )
     8.8 -        ret = xc_hvm_restore(xc_fd, io_fd, domid,
     8.9 -                             store_evtchn, &store_mfn,
    8.10 -                             pae, apic);
    8.11 -    else
    8.12 -        ret = xc_linux_restore(xc_fd, io_fd, domid,
    8.13 -                               store_evtchn, &store_mfn,
    8.14 -                               console_evtchn, &console_mfn);
    8.15 +    ret = xc_domain_restore(xc_fd, io_fd, domid, store_evtchn, &store_mfn,
    8.16 +                            console_evtchn, &console_mfn, hvm, pae);
    8.17  
    8.18      if ( ret == 0 )
    8.19      {