ia64/xen-unstable

changeset 901:53a122a3c2b5

bitkeeper revision 1.569 (3faab4a36OLmL1p_QOPfw7H-YMwVIA)

Many files:
More work on DOM0 save/restore tools. Now ready for testing.
author kaf24@scramble.cl.cam.ac.uk
date Thu Nov 06 20:52:51 2003 +0000 (2003-11-06)
parents b2decdeba558
children 7bf6fbf9efac a758526e0e84
files tools/internal/xi_build.c tools/internal/xi_restore_linux.c tools/internal/xi_save_linux.c xen/arch/i386/mm.c xen/arch/i386/traps.c xen/common/dom0_ops.c xen/common/domain.c xen/include/asm-i386/processor.h
line diff
     1.1 --- a/tools/internal/xi_build.c	Thu Nov 06 19:14:11 2003 +0000
     1.2 +++ b/tools/internal/xi_build.c	Thu Nov 06 20:52:51 2003 +0000
     1.3 @@ -102,6 +102,18 @@ static int read_kernel_header(int fd, lo
     1.4      /* Read the load address which immediately follows the Xeno signature. */
     1.5      read(fd, load_addr, sizeof(unsigned long));
     1.6  
     1.7 +    if ( (*load_addr & (PAGE_SIZE-1)) != 0 )
     1.8 +    {
     1.9 +        ERROR("We can only deal with page-aligned load addresses");
    1.10 +        return -1;
    1.11 +    }
    1.12 +
    1.13 +    if ( (*load_addr + (dom_size << 10)) > HYPERVISOR_VIRT_START )
    1.14 +    {
    1.15 +        ERROR("Cannot map all domain memory without hitting Xen space");
    1.16 +        return -1;
    1.17 +    }
    1.18 +
    1.19      *ksize = stat.st_size - SIG_LEN - sizeof(unsigned long);
    1.20  
    1.21      return 0;
     2.1 --- a/tools/internal/xi_restore_linux.c	Thu Nov 06 19:14:11 2003 +0000
     2.2 +++ b/tools/internal/xi_restore_linux.c	Thu Nov 06 20:52:51 2003 +0000
     2.3 @@ -10,12 +10,73 @@
     2.4  #include "mem_defs.h"
     2.5  #include <asm-xeno/suspend.h>
     2.6  
     2.7 -static char *argv0 = "internal_save_linux";
     2.8 +static char *argv0 = "internal_restore_linux";
     2.9  
    2.10 -/* A table mapping each PFN to its current MFN. */
    2.11 +/* A table mapping each PFN to its new MFN. */
    2.12  static unsigned long *pfn_to_mfn_table;
    2.13 -/* A table mapping each current MFN to its canonical PFN. */
    2.14 -static unsigned long *mfn_to_pfn_table;
    2.15 +
    2.16 +static int get_pfn_list(
    2.17 +    int domain_id, unsigned long *pfn_buf, unsigned long max_pfns)
    2.18 +{
    2.19 +    dom0_op_t op;
    2.20 +    int ret;
    2.21 +    op.cmd = DOM0_GETMEMLIST;
    2.22 +    op.u.getmemlist.domain   = domain_id;
    2.23 +    op.u.getmemlist.max_pfns = max_pfns;
    2.24 +    op.u.getmemlist.buffer   = pfn_buf;
    2.25 +
    2.26 +    if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 )
    2.27 +    {
    2.28 +        PERROR("Could not lock pfn list buffer");
    2.29 +        return -1;
    2.30 +    }    
    2.31 +
    2.32 +    ret = do_dom0_op(&op);
    2.33 +
    2.34 +    (void)munlock(pfn_buf, max_pfns * sizeof(unsigned long));
    2.35 +
    2.36 +    return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
    2.37 +}
    2.38 +
    2.39 +#define MAX_MMU_UPDATES 1024
    2.40 +static mmu_update_t mmu_updates[MAX_MMU_UPDATES];
    2.41 +static int mmu_update_idx;
    2.42 +
    2.43 +static void flush_mmu_updates(void)
    2.44 +{
    2.45 +    privcmd_hypercall_t hypercall;
    2.46 +
    2.47 +    if ( mmu_update_idx == 0 )
    2.48 +        return;
    2.49 +
    2.50 +    hypercall.op     = __HYPERVISOR_mmu_update;
    2.51 +    hypercall.arg[0] = (unsigned long)mmu_updates;
    2.52 +    hypercall.arg[1] = (unsigned long)mmu_update_idx;
    2.53 +
    2.54 +    if ( mlock(mmu_updates, sizeof(mmu_updates)) != 0 )
    2.55 +    {
    2.56 +        PERROR("Could not lock pagetable update array");
    2.57 +        exit(1);
    2.58 +    }
    2.59 +
    2.60 +    if ( do_xen_hypercall(&hypercall) < 0 )
    2.61 +    {
    2.62 +        ERROR("Failure when submitting mmu updates");
    2.63 +        exit(1);
    2.64 +    }
    2.65 +
    2.66 +    mmu_update_idx = 0;
    2.67 +    
    2.68 +    (void)munlock(mmu_updates, sizeof(mmu_updates));
    2.69 +}
    2.70 +
    2.71 +static void add_mmu_update(unsigned long ptr, unsigned long val)
    2.72 +{
    2.73 +    mmu_updates[mmu_update_idx].ptr = ptr;
    2.74 +    mmu_updates[mmu_update_idx].val = val;
    2.75 +    if ( ++mmu_update_idx == MAX_MMU_UPDATES )
    2.76 +        flush_mmu_updates();
    2.77 +}
    2.78  
    2.79  static int devmem_fd;
    2.80  
    2.81 @@ -46,47 +107,6 @@ static void unmap_pfn(void *vaddr)
    2.82      (void)munmap(vaddr, PAGE_SIZE);
    2.83  }
    2.84  
    2.85 -/*
    2.86 - * Returns TRUE if the given machine frame number has a unique mapping
    2.87 - * in the guest's pseudophysical map.
    2.88 - */
    2.89 -#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
    2.90 -    (((_mfn) < (1024*1024)) &&          \
    2.91 -     (pfn_to_mfn_table[mfn_to_pfn_table[_mfn]] == (_mfn)))
    2.92 -
    2.93 -/* Returns TRUE if MFN is successfully converted to a PFN. */
    2.94 -static int translate_mfn_to_pfn(unsigned long *pmfn)
    2.95 -{
    2.96 -    unsigned long mfn = *pmfn;
    2.97 -    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
    2.98 -        return 0;
    2.99 -    *pmfn = mfn_to_pfn_table[mfn];
   2.100 -    return 1;
   2.101 -}
   2.102 -
   2.103 -static int check_pfn_ownership(unsigned long mfn, unsigned int dom)
   2.104 -{
   2.105 -    dom0_op_t op;
   2.106 -    op.cmd = DOM0_GETPAGEFRAMEINFO;
   2.107 -    op.u.getpageframeinfo.pfn = mfn;
   2.108 -    if ( (do_dom0_op(&op) < 0) || (op.u.getpageframeinfo.domain != dom) )
   2.109 -        return 0;
   2.110 -    return 1;
   2.111 -}
   2.112 -
   2.113 -static unsigned int get_pfn_type(unsigned long mfn)
   2.114 -{
   2.115 -    dom0_op_t op;
   2.116 -    op.cmd = DOM0_GETPAGEFRAMEINFO;
   2.117 -    op.u.getpageframeinfo.pfn = mfn;
   2.118 -    if ( do_dom0_op(&op) < 0 )
   2.119 -    {
   2.120 -        PERROR("Unexpected failure when getting page frame info!");
   2.121 -        exit(1);
   2.122 -    }
   2.123 -    return op.u.getpageframeinfo.type;
   2.124 -}
   2.125 -
   2.126  static int checked_read(int fd, void *buf, size_t count)
   2.127  {
   2.128      int rc;
   2.129 @@ -98,11 +118,15 @@ static int checked_read(int fd, void *bu
   2.130  int main(int argc, char **argv)
   2.131  {
   2.132      dom0_op_t op;
   2.133 -    int rc = 1, i;
   2.134 -    unsigned long mfn, dom = 0;
   2.135 +    int rc = 1, i, j;
   2.136 +    unsigned long mfn, pfn, dom = 0;
   2.137      
   2.138      /* Number of page frames in use by this XenoLinux session. */
   2.139      unsigned long nr_pfns;
   2.140 +
   2.141 +    /* The new domain's shared-info frame number. */
   2.142 +    unsigned long shared_info_frame;
   2.143 +    unsigned char shared_info[PAGE_SIZE]; /* saved contents from file */
   2.144      
   2.145      /* A copy of the CPU context of the guest. */
   2.146      full_execution_context_t ctxt;
   2.147 @@ -121,11 +145,9 @@ int main(int argc, char **argv)
   2.148  
   2.149      /* A copy of the pfn-to-mfn table frame list. */
   2.150      unsigned long pfn_to_mfn_frame_list[1024];
   2.151 -    /* A temporary mapping of one frame in the above list. */
   2.152 -    unsigned long *pfn_to_mfn_frame;
   2.153  
   2.154 -    /* A temporary mapping, and a copy, of the guest's suspend record. */
   2.155 -    suspend_record_t *p_srec, srec;
   2.156 +    /* A temporary mapping of the guest's suspend record. */
   2.157 +    suspend_record_t *p_srec;
   2.158  
   2.159      /* The name and descriptor of the file that we are reading from. */
   2.160      char *filename;
   2.161 @@ -158,18 +180,13 @@ int main(int argc, char **argv)
   2.162      if ( !checked_read(fd, name,                  sizeof(name)) ||
   2.163           !checked_read(fd, &nr_pfns,              sizeof(unsigned long)) ||
   2.164           !checked_read(fd, &ctxt,                 sizeof(ctxt)) ||
   2.165 +         !checked_read(fd, shared_info,           PAGE_SIZE) ||
   2.166           !checked_read(fd, pfn_to_mfn_frame_list, PAGE_SIZE) )
   2.167      {
   2.168          ERROR("Error when reading from state file");
   2.169          goto out;
   2.170      }
   2.171  
   2.172 -    if ( nr_pfns > 1024*1024 )
   2.173 -    {
   2.174 -        ERROR("Invalid state file -- pfn count out of range");
   2.175 -        goto out;
   2.176 -    }
   2.177 -
   2.178      for ( i = 0; i < MAX_DOMAIN_NAME; i++ )
   2.179      {
   2.180          if ( name[i] == '\0' ) break;
   2.181 @@ -181,8 +198,13 @@ int main(int argc, char **argv)
   2.182      }
   2.183      name[MAX_DOMAIN_NAME-1] = '\0';
   2.184  
   2.185 +    if ( nr_pfns > 1024*1024 )
   2.186 +    {
   2.187 +        ERROR("Invalid state file -- pfn count out of range");
   2.188 +        goto out;
   2.189 +    }
   2.190 +
   2.191      /* We want zeroed memory so use calloc rather than malloc. */
   2.192 -    mfn_to_pfn_table = calloc(1, 4 * 1024 * 1024);
   2.193      pfn_to_mfn_table = calloc(1, 4 * nr_pfns);
   2.194      pfn_type         = calloc(1, 4 * nr_pfns);    
   2.195  
   2.196 @@ -192,7 +214,7 @@ int main(int argc, char **argv)
   2.197          goto out;
   2.198      }
   2.199  
   2.200 -    /* Create a new domain of teh appropriate size, and find it's dom_id. */
   2.201 +    /* Create a new domain of the appropriate size, and find it's dom_id. */
   2.202      op.cmd = DOM0_CREATEDOMAIN;
   2.203      op.u.createdomain.memory_kb = nr_pfns * (PAGE_SIZE / 1024);
   2.204      memcpy(op.u.createdomain.name, name, MAX_DOMAIN_NAME);
   2.205 @@ -203,131 +225,200 @@ int main(int argc, char **argv)
   2.206      }
   2.207      dom = op.u.createdomain.domain;
   2.208  
   2.209 +    /* Get the domain's shared-info frame. */
   2.210 +    op.cmd = DOM0_GETDOMAININFO;
   2.211 +    op.u.getdomaininfo.domain = dom;
   2.212 +    if ( do_dom0_op(&op) < 0 )
   2.213 +    {
   2.214 +        ERROR("Could not get information on new domain");
   2.215 +        goto out;
   2.216 +    }
   2.217 +    shared_info_frame = op.u.getdomaininfo.shared_info_frame;
   2.218 +
   2.219      if ( init_pfn_mapper() < 0 )
   2.220          goto out;
   2.221  
   2.222 -    /* Is the suspend-record MFN actually valid for this domain? */
   2.223 -    if ( !check_pfn_ownership(ctxt.i386_ctxt.esi, dom) )
   2.224 -    {
   2.225 -        ERROR("Invalid state record pointer");
   2.226 -        goto out;
   2.227 -    }
   2.228 +    /* Copy saved contents of shared-info page. No checking needed. */
   2.229 +    ppage = map_pfn(shared_info_frame);
   2.230 +    memcpy(ppage, shared_info, PAGE_SIZE);
   2.231 +    unmap_pfn(ppage);
   2.232  
   2.233 -    /* If the suspend-record MFN is okay then grab a copy of it to @srec. */
   2.234 -    p_srec = map_pfn(ctxt.i386_ctxt.esi);
   2.235 -    memcpy(&srec, p_srec, sizeof(srec));
   2.236 -    unmap_pfn(p_srec);
   2.237 -
   2.238 -    if ( !check_pfn_ownership(srec.pfn_to_mfn_frame_list, dom) )
   2.239 +    /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */
   2.240 +    if ( get_pfn_list(dom, pfn_to_mfn_table, nr_pfns) != nr_pfns )
   2.241      {
   2.242 -        ERROR("Invalid pfn-to-mfn frame list pointer");
   2.243 +        ERROR("Did not read correct number of frame numbers for new dom");
   2.244          goto out;
   2.245      }
   2.246  
   2.247      /*
   2.248 -     * Construct the local pfn-to-mfn and mfn-to-pfn tables. On exit from this
   2.249 -     * loop we have each MFN mapped at most once. Note that there may be MFNs
   2.250 -     * that aren't mapped at all: we detect these by MFN_IS_IN_PSEUDOPHYS_MAP.
   2.251 +     * Now simply read each saved frame into its new machine frame.
   2.252 +     * We uncanonicalise page tables as we go.
   2.253       */
   2.254 -    pfn_to_mfn_frame = NULL;
   2.255 -    for ( i = 0; i < srec.nr_pfns; i++ )
   2.256 +    for ( i = 0; i < nr_pfns; i++ )
   2.257      {
   2.258 -        /* Each frameful of table frames must be checked & mapped on demand. */
   2.259 -        if ( (i & 1023) == 0 )
   2.260 +        mfn = pfn_to_mfn_table[i];
   2.261 +
   2.262 +        if ( !checked_read(fd, page, PAGE_SIZE) )
   2.263          {
   2.264 -            mfn = pfn_to_mfn_frame_list[i/1024];
   2.265 -            if ( !check_pfn_ownership(mfn, dom) )
   2.266 -            {
   2.267 -                ERROR("Invalid frame number if pfn-to-mfn frame list");
   2.268 -                goto out;
   2.269 -            }
   2.270 -            if ( pfn_to_mfn_frame != NULL )
   2.271 -                unmap_pfn(pfn_to_mfn_frame);
   2.272 -            pfn_to_mfn_frame = map_pfn(mfn);
   2.273 -        }
   2.274 -        
   2.275 -        mfn = pfn_to_mfn_frame[i & 1023];
   2.276 -
   2.277 -        if ( !check_pfn_ownership(mfn, dom) )
   2.278 -        {
   2.279 -            ERROR("Invalid frame specified with pfn-to-mfn table");
   2.280 +            ERROR("Error when reading from state file");
   2.281              goto out;
   2.282          }
   2.283  
   2.284 -        pfn_to_mfn_table[i] = mfn;
   2.285 +        ppage = map_pfn(mfn);
   2.286 +        switch ( pfn_type[i] )
   2.287 +        {
   2.288 +        case L1TAB:
   2.289 +            memset(ppage, 0, PAGE_SIZE);
   2.290 +            add_mmu_update((mfn<<PAGE_SHIFT) | MMU_EXTENDED_COMMAND,
   2.291 +                           MMUEXT_PIN_L1_TABLE);
   2.292 +            for ( j = 0; j < 1024; j++ )
   2.293 +            {
   2.294 +                if ( page[j] & _PAGE_PRESENT )
   2.295 +                {
   2.296 +                    if ( (pfn = page[j] >> PAGE_SHIFT) >= nr_pfns )
   2.297 +                    {
   2.298 +                        ERROR("Frame number in page table is out of range");
   2.299 +                        goto out;
   2.300 +                    }
   2.301 +                    if ( (pfn_type[pfn] != NONE) && (page[j] & _PAGE_RW) )
   2.302 +                    {
   2.303 +                        ERROR("Write access requested for a restricted frame");
   2.304 +                        goto out;
   2.305 +                    }
   2.306 +                    page[j] &= PAGE_SIZE - 1;
   2.307 +                    page[j] |= pfn_to_mfn_table[pfn] << PAGE_SHIFT;
   2.308 +                }
   2.309 +                add_mmu_update((unsigned long)&ppage[j], page[j]);
   2.310 +            }
   2.311 +            break;
   2.312 +        case L2TAB:
   2.313 +            memset(ppage, 0, PAGE_SIZE);
   2.314 +            add_mmu_update((mfn<<PAGE_SHIFT) | MMU_EXTENDED_COMMAND,
   2.315 +                           MMUEXT_PIN_L2_TABLE);
   2.316 +            for ( j = 0; j < 1024; j++ )
   2.317 +            {
   2.318 +                if ( page[j] & _PAGE_PRESENT )
   2.319 +                {
   2.320 +                    if ( (pfn = page[j] >> PAGE_SHIFT) >= nr_pfns )
   2.321 +                    {
   2.322 +                        ERROR("Frame number in page table is out of range");
   2.323 +                        goto out;
   2.324 +                    }
   2.325 +                    if ( pfn_type[pfn] != L1TAB )
   2.326 +                    {
   2.327 +                        ERROR("Page table mistyping");
   2.328 +                        goto out;
   2.329 +                    }
   2.330 +                    page[j] &= PAGE_SIZE - 1;
   2.331 +                    page[j] |= pfn_to_mfn_table[pfn] << PAGE_SHIFT;
   2.332 +                }
   2.333 +                add_mmu_update((unsigned long)&ppage[j], page[j]);
   2.334 +            }
   2.335 +            break;
   2.336 +        default:
   2.337 +            memcpy(ppage, page, PAGE_SIZE);
   2.338 +            break;
   2.339 +        }
   2.340 +        unmap_pfn(ppage);
   2.341  
   2.342 -        /* Did we map this MFN already? That would be invalid! */
   2.343 -        if ( MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
   2.344 +        add_mmu_update((mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, i);
   2.345 +    }
   2.346 +
   2.347 +    flush_mmu_updates();
   2.348 +
   2.349 +    /* Uncanonicalise the suspend-record frame number and poke resume rec. */
   2.350 +    pfn = ctxt.i386_ctxt.esi;
   2.351 +    if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) )
   2.352 +    {
   2.353 +        ERROR("Suspend record frame number is bad");
   2.354 +        goto out;
   2.355 +    }
   2.356 +    ctxt.i386_ctxt.esi = mfn = pfn_to_mfn_table[pfn];
   2.357 +    p_srec = map_pfn(mfn);
   2.358 +    p_srec->resume_info.nr_pages    = nr_pfns;
   2.359 +    p_srec->resume_info.shared_info = shared_info_frame << PAGE_SHIFT;
   2.360 +    p_srec->resume_info.dom_id      = dom;
   2.361 +    p_srec->resume_info.flags       = 0;
   2.362 +    unmap_pfn(p_srec);
   2.363 +
   2.364 +    /* Uncanonicalise each GDT frame number. */
   2.365 +    if ( ctxt.gdt_ents > 8192 )
   2.366 +    {
   2.367 +        ERROR("GDT entry count out of range");
   2.368 +        goto out;
   2.369 +    }
   2.370 +    for ( i = 0; i < ctxt.gdt_ents; i += 512 )
   2.371 +    {
   2.372 +        pfn = ctxt.gdt_frames[i];
   2.373 +        if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) )
   2.374          {
   2.375 -            ERROR("A machine frame appears twice in pseudophys space");
   2.376 +            ERROR("GDT frame number is bad");
   2.377              goto out;
   2.378          }
   2.379 -        
   2.380 -        mfn_to_pfn_table[mfn] = i;
   2.381 -
   2.382 -        /* Query page type by MFN, but store it by PFN. */
   2.383 -        pfn_type[i] = get_pfn_type(mfn);
   2.384 +        ctxt.gdt_frames[i] = pfn_to_mfn_table[pfn];
   2.385      }
   2.386  
   2.387 -    /* Canonicalise the suspend-record frame number. */
   2.388 -    if ( !translate_mfn_to_pfn(&ctxt.i386_ctxt.esi) )
   2.389 +    /* Uncanonicalise the page table base pointer. */
   2.390 +    pfn = ctxt.pt_base >> PAGE_SHIFT;
   2.391 +    if ( (pfn >= nr_pfns) || (pfn_type[pfn] != L2TAB) )
   2.392      {
   2.393 -        ERROR("State record is not in range of pseudophys map");
   2.394 +        ERROR("PT base is bad");
   2.395 +        goto out;
   2.396 +    }
   2.397 +    ctxt.pt_base = pfn_to_mfn_table[pfn] << PAGE_SHIFT;
   2.398 +
   2.399 +    /* Uncanonicalise the pfn-to-mfn table frame-number list. */
   2.400 +    for ( i = 0; i < nr_pfns; i += 1024 )
   2.401 +    {
   2.402 +        unsigned long copy_size = (nr_pfns - i) * sizeof(unsigned long);
   2.403 +        if ( copy_size > PAGE_SIZE ) copy_size = PAGE_SIZE;
   2.404 +        pfn = pfn_to_mfn_frame_list[i/1024];
   2.405 +        if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) )
   2.406 +        {
   2.407 +            ERROR("PFN-to-MFN frame number is bad");
   2.408 +            goto out;
   2.409 +        }
   2.410 +        ppage = map_pfn(pfn_to_mfn_table[pfn]);
   2.411 +        memcpy(ppage, &pfn_to_mfn_table[i], copy_size);        
   2.412 +        unmap_pfn(ppage);
   2.413 +    }
   2.414 +
   2.415 +    /*
   2.416 +     * Safety checking of saved context:
   2.417 +     *  1. i386_ctxt is fine, as Xen checks that on context switch.
   2.418 +     *  2. i387_ctxt is fine, as it can't hurt Xen.
   2.419 +     *  3. trap_ctxt needs the code selectors checked.
   2.420 +     *  4. fast_trap_idx is checked by Xen.
   2.421 +     *  5. ldt base must be page-aligned, no more than 8192 ents, ...
   2.422 +     *  6. gdt already done, and further checking is done by Xen.
   2.423 +     *  7. check that ring1_ss/esp is safe.
   2.424 +     *  8. pt_base is already done.
   2.425 +     *  9. debugregs are checked by Xen.
   2.426 +     *  10. callback code selectors need checking.
   2.427 +     */
   2.428 +    for ( i = 0; i < 256; i++ )
   2.429 +    {
   2.430 +        ctxt.trap_ctxt[i].vector = i;
   2.431 +        if ( (ctxt.trap_ctxt[i].cs & 3) == 0 )
   2.432 +            ctxt.trap_ctxt[i].cs = FLAT_RING1_CS;
   2.433 +    }
   2.434 +    if ( (ctxt.ring1_ss & 3) == 0 )
   2.435 +        ctxt.ring1_ss = FLAT_RING1_DS;
   2.436 +    if ( ctxt.ring1_esp > HYPERVISOR_VIRT_START )
   2.437 +        ctxt.ring1_esp = HYPERVISOR_VIRT_START;
   2.438 +    if ( (ctxt.event_callback_cs & 3) == 0 )
   2.439 +        ctxt.event_callback_cs = FLAT_RING1_CS;
   2.440 +    if ( (ctxt.failsafe_callback_cs & 3) == 0 )
   2.441 +        ctxt.failsafe_callback_cs = FLAT_RING1_CS;
   2.442 +    if ( ((ctxt.ldt_base & (PAGE_SIZE - 1)) != 0) ||
   2.443 +         (ctxt.ldt_ents > 8192) ||
   2.444 +         (ctxt.ldt_base > HYPERVISOR_VIRT_START) ||
   2.445 +         ((ctxt.ldt_base + ctxt.ldt_ents*8) > HYPERVISOR_VIRT_START) )
   2.446 +    {
   2.447 +        ERROR("Bad LDT base or size");
   2.448          goto out;
   2.449      }
   2.450  
   2.451 -    /* Canonicalise each GDT frame number. */
   2.452 -    for ( i = 0; i < ctxt.gdt_ents; i += 512 )
   2.453 -    {
   2.454 -        if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) )
   2.455 -        {
   2.456 -            ERROR("GDT frame is not in range of pseudophys map");
   2.457 -            goto out;
   2.458 -        }
   2.459 -    }
   2.460 -
   2.461 -    /* Canonicalise the page table base pointer. */
   2.462 -    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) )
   2.463 -    {
   2.464 -        ERROR("PT base is not in range of pseudophys map");
   2.465 -        goto out;
   2.466 -    }
   2.467 -    ctxt.pt_base = mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
   2.468 -
   2.469 -    /* Canonicalise the pfn-to-mfn table frame-number list. */
   2.470 -    for ( i = 0; i < srec.nr_pfns; i += 1024 )
   2.471 -    {
   2.472 -        if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
   2.473 -        {
   2.474 -            ERROR("Frame # in pfn-to-mfn frame list is not in pseudophys");
   2.475 -            goto out;
   2.476 -        }
   2.477 -    }
   2.478 -
   2.479 -    /* Now write out each data page, canonicalising page tables as we go... */
   2.480 -    for ( i = 0; i < srec.nr_pfns; i++ )
   2.481 -    {
   2.482 -        mfn = pfn_to_mfn_table[i];
   2.483 -        ppage = map_pfn(mfn);
   2.484 -        memcpy(&page, ppage, PAGE_SIZE);
   2.485 -        unmap_pfn(ppage);
   2.486 -        if ( (pfn_type[i] == L1TAB) || (pfn_type[i] == L2TAB) )
   2.487 -        {
   2.488 -            for ( i = 0; i < 1024; i++ )
   2.489 -            {
   2.490 -                if ( !(page[i] & _PAGE_PRESENT) ) continue;
   2.491 -                mfn = page[i] >> PAGE_SHIFT;
   2.492 -                if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
   2.493 -                {
   2.494 -                    ERROR("Frame number in pagetable page is invalid");
   2.495 -                    goto out;
   2.496 -                }
   2.497 -                page[i] &= PAGE_SIZE - 1;
   2.498 -                page[i] |= mfn_to_pfn_table[mfn] << PAGE_SHIFT;
   2.499 -            }
   2.500 -        }
   2.501 -        write(fd, &page, PAGE_SIZE);
   2.502 -    }
   2.503  
   2.504      /* Success! */
   2.505      rc = 0;
     3.1 --- a/tools/internal/xi_save_linux.c	Thu Nov 06 19:14:11 2003 +0000
     3.2 +++ b/tools/internal/xi_save_linux.c	Thu Nov 06 20:52:51 2003 +0000
     3.3 @@ -104,6 +104,9 @@ int main(int argc, char **argv)
     3.4      /* Remember if we stopped the guest, so we can restart it on exit. */
     3.5      int we_stopped_it = 0;
     3.6  
     3.7 +    /* The new domain's shared-info frame number. */
     3.8 +    unsigned long shared_info_frame;
     3.9 +    
    3.10      /* A copy of the CPU context of the guest. */
    3.11      full_execution_context_t ctxt;
    3.12  
    3.13 @@ -164,6 +167,7 @@ int main(int argc, char **argv)
    3.14  
    3.15          memcpy(&ctxt, &op.u.getdomaininfo.ctxt, sizeof(ctxt));
    3.16          memcpy(name, op.u.getdomaininfo.name, sizeof(name));
    3.17 +        shared_info_frame = op.u.getdomaininfo.shared_info_frame;
    3.18  
    3.19          if ( op.u.getdomaininfo.state == DOMSTATE_STOPPED )
    3.20              break;
    3.21 @@ -302,16 +306,19 @@ int main(int argc, char **argv)
    3.22      }
    3.23  
    3.24      /* Start writing out the saved-domain record. */
    3.25 +    ppage = map_pfn(shared_info_frame);
    3.26      if ( !checked_write(fd, "XenoLinuxSuspend",    16) ||
    3.27           !checked_write(fd, name,                  sizeof(name)) ||
    3.28           !checked_write(fd, &srec.nr_pfns,         sizeof(unsigned long)) ||
    3.29           !checked_write(fd, &ctxt,                 sizeof(ctxt)) ||
    3.30 +         !checked_write(fd, ppage,                 PAGE_SIZE) ||
    3.31           !checked_write(fd, pfn_to_mfn_frame_list, PAGE_SIZE) ||
    3.32           !checked_write(fd, pfn_type,              4 * srec.nr_pfns) )
    3.33      {
    3.34          ERROR("Error when writing to state file");
    3.35          goto out;
    3.36      }
    3.37 +    unmap_pfn(ppage);
    3.38  
    3.39      /* Now write out each data page, canonicalising page tables as we go... */
    3.40      for ( i = 0; i < srec.nr_pfns; i++ )
    3.41 @@ -319,7 +326,7 @@ int main(int argc, char **argv)
    3.42          mfn = pfn_to_mfn_table[i];
    3.43  
    3.44          ppage = map_pfn(mfn);
    3.45 -        memcpy(&page, ppage, PAGE_SIZE);
    3.46 +        memcpy(page, ppage, PAGE_SIZE);
    3.47          unmap_pfn(ppage);
    3.48  
    3.49          if ( (pfn_type[i] == L1TAB) || (pfn_type[i] == L2TAB) )
    3.50 @@ -338,7 +345,7 @@ int main(int argc, char **argv)
    3.51              }
    3.52          }
    3.53  
    3.54 -        if ( !checked_write(fd, &page, PAGE_SIZE) )
    3.55 +        if ( !checked_write(fd, page, PAGE_SIZE) )
    3.56          {
    3.57              ERROR("Error when writing to state file");
    3.58              goto out;
     4.1 --- a/xen/arch/i386/mm.c	Thu Nov 06 19:14:11 2003 +0000
     4.2 +++ b/xen/arch/i386/mm.c	Thu Nov 06 20:52:51 2003 +0000
     4.3 @@ -129,10 +129,9 @@ long do_stack_switch(unsigned long ss, u
     4.4      int nr = smp_processor_id();
     4.5      struct tss_struct *t = &init_tss[nr];
     4.6  
     4.7 -    /*
     4.8 -     * No need to check validity: CPU will fault if SS or ESP is bad. This is
     4.9 -     * true even for a fast trap: a bad SS:ESP will get us either a #SS or #TS.
    4.10 -     */
    4.11 +    if ( ((ss & 3) == 0) || (esp > HYPERVISOR_VIRT_START) )
    4.12 +        return -EPERM;
    4.13 +
    4.14      current->thread.ss1  = ss;
    4.15      current->thread.esp1 = esp;
    4.16      t->ss1  = ss;
     5.1 --- a/xen/arch/i386/traps.c	Thu Nov 06 19:14:11 2003 +0000
     5.2 +++ b/xen/arch/i386/traps.c	Thu Nov 06 20:52:51 2003 +0000
     5.3 @@ -751,7 +751,7 @@ long do_fpu_taskswitch(void)
     5.4  }
     5.5  
     5.6  
     5.7 -long do_set_debugreg(int reg, unsigned long value)
     5.8 +long set_debugreg(struct task_struct *p, int reg, unsigned long value)
     5.9  {
    5.10      int i;
    5.11  
    5.12 @@ -759,19 +759,23 @@ long do_set_debugreg(int reg, unsigned l
    5.13      {
    5.14      case 0: 
    5.15          if ( value > (PAGE_OFFSET-4) ) return -EPERM;
    5.16 -        __asm__ ( "movl %0, %%db0" : : "r" (value) );
    5.17 +        if ( p == current ) 
    5.18 +            __asm__ ( "movl %0, %%db0" : : "r" (value) );
    5.19          break;
    5.20      case 1: 
    5.21          if ( value > (PAGE_OFFSET-4) ) return -EPERM;
    5.22 -        __asm__ ( "movl %0, %%db1" : : "r" (value) );
    5.23 +        if ( p == current ) 
    5.24 +            __asm__ ( "movl %0, %%db1" : : "r" (value) );
    5.25          break;
    5.26      case 2: 
    5.27          if ( value > (PAGE_OFFSET-4) ) return -EPERM;
    5.28 -        __asm__ ( "movl %0, %%db2" : : "r" (value) );
    5.29 +        if ( p == current ) 
    5.30 +            __asm__ ( "movl %0, %%db2" : : "r" (value) );
    5.31          break;
    5.32      case 3:
    5.33          if ( value > (PAGE_OFFSET-4) ) return -EPERM;
    5.34 -        __asm__ ( "movl %0, %%db3" : : "r" (value) );
    5.35 +        if ( p == current ) 
    5.36 +            __asm__ ( "movl %0, %%db3" : : "r" (value) );
    5.37          break;
    5.38      case 6:
    5.39          /*
    5.40 @@ -780,7 +784,8 @@ long do_set_debugreg(int reg, unsigned l
    5.41           */
    5.42          value &= 0xffffefff; /* reserved bits => 0 */
    5.43          value |= 0xffff0ff0; /* reserved bits => 1 */
    5.44 -        __asm__ ( "movl %0, %%db6" : : "r" (value) );
    5.45 +        if ( p == current ) 
    5.46 +            __asm__ ( "movl %0, %%db6" : : "r" (value) );
    5.47          break;
    5.48      case 7:
    5.49          /*
    5.50 @@ -800,16 +805,22 @@ long do_set_debugreg(int reg, unsigned l
    5.51              for ( i = 0; i < 16; i += 2 )
    5.52                  if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
    5.53          }
    5.54 -        __asm__ ( "movl %0, %%db7" : : "r" (value) );
    5.55 +        if ( p == current ) 
    5.56 +            __asm__ ( "movl %0, %%db7" : : "r" (value) );
    5.57          break;
    5.58      default:
    5.59          return -EINVAL;
    5.60      }
    5.61  
    5.62 -    current->thread.debugreg[reg] = value;
    5.63 +    p->thread.debugreg[reg] = value;
    5.64      return 0;
    5.65  }
    5.66  
    5.67 +long do_set_debugreg(int reg, unsigned long value)
    5.68 +{
    5.69 +    return set_debugreg(current, reg, value);
    5.70 +}
    5.71 +
    5.72  unsigned long do_get_debugreg(int reg)
    5.73  {
    5.74      if ( (reg < 0) || (reg > 7) ) return -EINVAL;
     6.1 --- a/xen/common/dom0_ops.c	Thu Nov 06 19:14:11 2003 +0000
     6.2 +++ b/xen/common/dom0_ops.c	Thu Nov 06 20:52:51 2003 +0000
     6.3 @@ -343,7 +343,8 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
     6.4              
     6.5              op.u.getpageframeinfo.domain = page->flags & PG_domain_mask;
     6.6              op.u.getpageframeinfo.type   = NONE;
     6.7 -            if ( page->type_count & REFCNT_PIN_BIT )
     6.8 +
     6.9 +            if ( page->type_count != 0 )
    6.10              {
    6.11                  switch ( page->flags & PG_type_mask )
    6.12                  {
     7.1 --- a/xen/common/domain.c	Thu Nov 06 19:14:11 2003 +0000
     7.2 +++ b/xen/common/domain.c	Thu Nov 06 20:52:51 2003 +0000
     7.3 @@ -330,6 +330,7 @@ void release_task(struct task_struct *p)
     7.4  int final_setup_guestos(struct task_struct *p, dom0_builddomain_t *builddomain)
     7.5  {
     7.6      unsigned long phys_l2tab;
     7.7 +    int i;
     7.8  
     7.9      if ( (p->flags & PF_CONSTRUCTED) )
    7.10          return -EINVAL;
    7.11 @@ -358,9 +359,8 @@ int final_setup_guestos(struct task_stru
    7.12                        builddomain->ctxt.gdt_ents);
    7.13      p->thread.ss1  = builddomain->ctxt.ring1_ss;
    7.14      p->thread.esp1 = builddomain->ctxt.ring1_esp;
    7.15 -    memcpy(p->thread.debugreg,
    7.16 -           builddomain->ctxt.debugreg,
    7.17 -           sizeof(p->thread.debugreg));
    7.18 +    for ( i = 0; i < 8; i++ )
    7.19 +        (void)set_debugreg(p, i, builddomain->ctxt.debugreg[i]);
    7.20      p->event_selector    = builddomain->ctxt.event_callback_cs;
    7.21      p->event_address     = builddomain->ctxt.event_callback_eip;
    7.22      p->failsafe_selector = builddomain->ctxt.failsafe_callback_cs;
     8.1 --- a/xen/include/asm-i386/processor.h	Thu Nov 06 19:14:11 2003 +0000
     8.2 +++ b/xen/include/asm-i386/processor.h	Thu Nov 06 20:52:51 2003 +0000
     8.3 @@ -437,6 +437,8 @@ long set_gdt(struct task_struct *p,
     8.4               unsigned long *frames, 
     8.5               unsigned int entries);
     8.6  
     8.7 +long set_debugreg(struct task_struct *p, int reg, unsigned long value);
     8.8 +
     8.9  struct microcode {
    8.10      unsigned int hdrver;
    8.11      unsigned int rev;