ia64/xen-unstable

changeset 899:2157fc1812d8

bitkeeper revision 1.567 (3fa98625j0d47oe7ZCKLDdbnYED8wA)

xi_restore_linux.c, xi_save_linux.c:
new file
dom0_ops.c, Makefile:
Starting DOM0 support for suspend/resume. Untested so far, and resume is currently incomplete.
author kaf24@scramble.cl.cam.ac.uk
date Wed Nov 05 23:22:13 2003 +0000 (2003-11-05)
parents db4b686bbf64
children b2decdeba558
files .rootkeys tools/internal/Makefile tools/internal/xi_restore_linux.c tools/internal/xi_save_linux.c xen/common/dom0_ops.c
line diff
     1.1 --- a/.rootkeys	Wed Nov 05 15:30:38 2003 +0000
     1.2 +++ b/.rootkeys	Wed Nov 05 23:22:13 2003 +0000
     1.3 @@ -169,6 +169,8 @@ 3eb781fdcJ0fF7rWfzAOArW-x4-gwA tools/int
     1.4  3ec43c5dmQxGDvgJJXbV1yLxT30Y1A tools/internal/xi_helper
     1.5  3f108ad5wQm0ZaQ4GXFoUhH1W1aW9w tools/internal/xi_list.c
     1.6  3f0458aaXhD8BQAggO81gv30RQ-ifA tools/internal/xi_phys_grant.c
     1.7 +3fa9861aBdNV1yCjfY4cLPr4Mtrpuw tools/internal/xi_restore_linux.c
     1.8 +3fa98615LWZfagwDBp7XfuC-u9wi3w tools/internal/xi_save_linux.c
     1.9  3f108adb2b5OkKL6-faG3lMiOYDf_w tools/internal/xi_sched_domain.c
    1.10  3f108ade1v8weyh1sKx890VTd240Hw tools/internal/xi_sched_global.c
    1.11  3eb781fd8oRfPgH7qTh7xvgmwD6NgA tools/internal/xi_start.c
     2.1 --- a/tools/internal/Makefile	Wed Nov 05 15:30:38 2003 +0000
     2.2 +++ b/tools/internal/Makefile	Wed Nov 05 23:22:13 2003 +0000
     2.3 @@ -8,7 +8,7 @@ SRCS     = $(wildcard *.c)
     2.4  OBJS     = $(patsubst %.c,%.o,$(SRCS))
     2.5  
     2.6  TARGETS  = xi_create xi_start xi_stop xi_destroy xi_build 
     2.7 -TARGETS += xi_phys_grant xi_list 
     2.8 +TARGETS += xi_phys_grant xi_list xi_save_linux xi_restore_linux
     2.9  TARGETS += xi_sched_global xi_sched_domain xi_usage xi_vif_params
    2.10  INSTALL  = $(TARGETS) xi_vifinit xi_helper
    2.11  
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/tools/internal/xi_restore_linux.c	Wed Nov 05 23:22:13 2003 +0000
     3.3 @@ -0,0 +1,346 @@
     3.4 +/******************************************************************************
     3.5 + * xi_restore_linux.c
     3.6 + * 
     3.7 + * Restore the state of a Xenolinux session.
     3.8 + * 
     3.9 + * Copyright (c) 2003, K A Fraser.
    3.10 + */
    3.11 +
    3.12 +#include "dom0_defs.h"
    3.13 +#include "mem_defs.h"
    3.14 +#include <asm-xeno/suspend.h>
    3.15 +
    3.16 +static char *argv0 = "internal_save_linux";
    3.17 +
    3.18 +/* A table mapping each PFN to its current MFN. */
    3.19 +static unsigned long *pfn_to_mfn_table;
    3.20 +/* A table mapping each current MFN to its canonical PFN. */
    3.21 +static unsigned long *mfn_to_pfn_table;
    3.22 +
    3.23 +static int devmem_fd;
    3.24 +
    3.25 +static int init_pfn_mapper(void)
    3.26 +{
    3.27 +    if ( (devmem_fd = open("/dev/mem", O_RDWR)) < 0 )
    3.28 +    {
    3.29 +        PERROR("Could not open /dev/mem");
    3.30 +        return -1;
    3.31 +    }
    3.32 +    return 0;
    3.33 +}
    3.34 +
    3.35 +static void *map_pfn(unsigned long pfn)
    3.36 +{
    3.37 +    void *vaddr = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE,
    3.38 +                       MAP_SHARED, devmem_fd, pfn << PAGE_SHIFT);
    3.39 +    if ( vaddr == MAP_FAILED )
    3.40 +    {
    3.41 +        PERROR("Could not mmap a domain pfn using /dev/mem");
    3.42 +        return NULL;
    3.43 +    }
    3.44 +    return vaddr;
    3.45 +}
    3.46 +
    3.47 +static void unmap_pfn(void *vaddr)
    3.48 +{
    3.49 +    (void)munmap(vaddr, PAGE_SIZE);
    3.50 +}
    3.51 +
    3.52 +/*
    3.53 + * Returns TRUE if the given machine frame number has a unique mapping
    3.54 + * in the guest's pseudophysical map.
    3.55 + */
    3.56 +#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
    3.57 +    (((_mfn) < (1024*1024)) &&          \
    3.58 +     (pfn_to_mfn_table[mfn_to_pfn_table[_mfn]] == (_mfn)))
    3.59 +
    3.60 +/* Returns TRUE if MFN is successfully converted to a PFN. */
    3.61 +static int translate_mfn_to_pfn(unsigned long *pmfn)
    3.62 +{
    3.63 +    unsigned long mfn = *pmfn;
    3.64 +    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
    3.65 +        return 0;
    3.66 +    *pmfn = mfn_to_pfn_table[mfn];
    3.67 +    return 1;
    3.68 +}
    3.69 +
    3.70 +static int check_pfn_ownership(unsigned long mfn, unsigned int dom)
    3.71 +{
    3.72 +    dom0_op_t op;
    3.73 +    op.cmd = DOM0_GETPAGEFRAMEINFO;
    3.74 +    op.u.getpageframeinfo.pfn = mfn;
    3.75 +    if ( (do_dom0_op(&op) < 0) || (op.u.getpageframeinfo.domain != dom) )
    3.76 +        return 0;
    3.77 +    return 1;
    3.78 +}
    3.79 +
    3.80 +static unsigned int get_pfn_type(unsigned long mfn)
    3.81 +{
    3.82 +    dom0_op_t op;
    3.83 +    op.cmd = DOM0_GETPAGEFRAMEINFO;
    3.84 +    op.u.getpageframeinfo.pfn = mfn;
    3.85 +    if ( do_dom0_op(&op) < 0 )
    3.86 +    {
    3.87 +        PERROR("Unexpected failure when getting page frame info!");
    3.88 +        exit(1);
    3.89 +    }
    3.90 +    return op.u.getpageframeinfo.type;
    3.91 +}
    3.92 +
    3.93 +static int checked_read(int fd, void *buf, size_t count)
    3.94 +{
    3.95 +    int rc;
    3.96 +    while ( ((rc = read(fd, buf, count)) == -1) && (errno == EINTR) )
    3.97 +        continue;
    3.98 +    return rc == count;
    3.99 +}
   3.100 +
   3.101 +int main(int argc, char **argv)
   3.102 +{
   3.103 +    dom0_op_t op;
   3.104 +    int rc = 1, i;
   3.105 +    unsigned long mfn, dom = 0;
   3.106 +    
   3.107 +    /* Number of page frames in use by this XenoLinux session. */
   3.108 +    unsigned long nr_pfns;
   3.109 +    
   3.110 +    /* A copy of the CPU context of the guest. */
   3.111 +    full_execution_context_t ctxt;
   3.112 +
   3.113 +    /* First 16 bytes of the state file must contain 'XenoLinuxSuspend'. */
   3.114 +    char signature[16];
   3.115 +    
   3.116 +    /* A copy of the domain's name. */
   3.117 +    char name[MAX_DOMAIN_NAME];
   3.118 +
   3.119 +    /* A table containg the type of each PFN (/not/ MFN!). */
   3.120 +    unsigned long *pfn_type;
   3.121 +
   3.122 +    /* A temporary mapping, and a copy, of one frame of guest memory. */
   3.123 +    unsigned long *ppage, page[1024];
   3.124 +
   3.125 +    /* A copy of the pfn-to-mfn table frame list. */
   3.126 +    unsigned long pfn_to_mfn_frame_list[1024];
   3.127 +    /* A temporary mapping of one frame in the above list. */
   3.128 +    unsigned long *pfn_to_mfn_frame;
   3.129 +
   3.130 +    /* A temporary mapping, and a copy, of the guest's suspend record. */
   3.131 +    suspend_record_t *p_srec, srec;
   3.132 +
   3.133 +    /* The name and descriptor of the file that we are reading from. */
   3.134 +    char *filename;
   3.135 +    int fd;
   3.136 +
   3.137 +    if ( argv[0] != NULL ) 
   3.138 +        argv0 = argv[0];
   3.139 +
   3.140 +    if ( argc != 2 )
   3.141 +    {
   3.142 +        fprintf(stderr, "Usage: %s <state file>\n", argv0);
   3.143 +        return 1;
   3.144 +    }
   3.145 +
   3.146 +    filename = argv[1];
   3.147 +    if ( (fd = open(name, O_RDONLY)) == -1 )
   3.148 +    {
   3.149 +        PERROR("Could not open file for writing");
   3.150 +        return 1;
   3.151 +    }
   3.152 +
   3.153 +    /* Start writing out the saved-domain record. */
   3.154 +    if ( !checked_read(fd, signature, 16) ||
   3.155 +         (memcmp(signature, "XenoLinuxSuspend", 16) != 0) )
   3.156 +    {
   3.157 +        ERROR("Unrecognised state format -- no signature found");
   3.158 +        goto out;
   3.159 +    }
   3.160 +
   3.161 +    if ( !checked_read(fd, name,                  sizeof(name)) ||
   3.162 +         !checked_read(fd, &nr_pfns,              sizeof(unsigned long)) ||
   3.163 +         !checked_read(fd, &ctxt,                 sizeof(ctxt)) ||
   3.164 +         !checked_read(fd, pfn_to_mfn_frame_list, PAGE_SIZE) )
   3.165 +    {
   3.166 +        ERROR("Error when reading from state file");
   3.167 +        goto out;
   3.168 +    }
   3.169 +
   3.170 +    if ( nr_pfns > 1024*1024 )
   3.171 +    {
   3.172 +        ERROR("Invalid state file -- pfn count out of range");
   3.173 +        goto out;
   3.174 +    }
   3.175 +
   3.176 +    for ( i = 0; i < MAX_DOMAIN_NAME; i++ )
   3.177 +    {
   3.178 +        if ( name[i] == '\0' ) break;
   3.179 +        if ( name[i] & 0x80 )
   3.180 +        {
   3.181 +            ERROR("Random characters in domain name");
   3.182 +            goto out;
   3.183 +        }
   3.184 +    }
   3.185 +    name[MAX_DOMAIN_NAME-1] = '\0';
   3.186 +
   3.187 +    /* We want zeroed memory so use calloc rather than malloc. */
   3.188 +    mfn_to_pfn_table = calloc(1, 4 * 1024 * 1024);
   3.189 +    pfn_to_mfn_table = calloc(1, 4 * nr_pfns);
   3.190 +    pfn_type         = calloc(1, 4 * nr_pfns);    
   3.191 +
   3.192 +    if ( !checked_read(fd, pfn_type, 4 * nr_pfns) )
   3.193 +    {
   3.194 +        ERROR("Error when reading from state file");
   3.195 +        goto out;
   3.196 +    }
   3.197 +
   3.198 +    /* Create a new domain of teh appropriate size, and find it's dom_id. */
   3.199 +    op.cmd = DOM0_CREATEDOMAIN;
   3.200 +    op.u.createdomain.memory_kb = nr_pfns * (PAGE_SIZE / 1024);
   3.201 +    memcpy(op.u.createdomain.name, name, MAX_DOMAIN_NAME);
   3.202 +    if ( do_dom0_op(&op) < 0 )
   3.203 +    {
   3.204 +        ERROR("Could not create new domain");
   3.205 +        goto out;
   3.206 +    }
   3.207 +    dom = op.u.createdomain.domain;
   3.208 +
   3.209 +    if ( init_pfn_mapper() < 0 )
   3.210 +        goto out;
   3.211 +
   3.212 +    /* Is the suspend-record MFN actually valid for this domain? */
   3.213 +    if ( !check_pfn_ownership(ctxt.i386_ctxt.esi, dom) )
   3.214 +    {
   3.215 +        ERROR("Invalid state record pointer");
   3.216 +        goto out;
   3.217 +    }
   3.218 +
   3.219 +    /* If the suspend-record MFN is okay then grab a copy of it to @srec. */
   3.220 +    p_srec = map_pfn(ctxt.i386_ctxt.esi);
   3.221 +    memcpy(&srec, p_srec, sizeof(srec));
   3.222 +    unmap_pfn(p_srec);
   3.223 +
   3.224 +    if ( !check_pfn_ownership(srec.pfn_to_mfn_frame_list, dom) )
   3.225 +    {
   3.226 +        ERROR("Invalid pfn-to-mfn frame list pointer");
   3.227 +        goto out;
   3.228 +    }
   3.229 +
   3.230 +    /*
   3.231 +     * Construct the local pfn-to-mfn and mfn-to-pfn tables. On exit from this
   3.232 +     * loop we have each MFN mapped at most once. Note that there may be MFNs
   3.233 +     * that aren't mapped at all: we detect these by MFN_IS_IN_PSEUDOPHYS_MAP.
   3.234 +     */
   3.235 +    pfn_to_mfn_frame = NULL;
   3.236 +    for ( i = 0; i < srec.nr_pfns; i++ )
   3.237 +    {
   3.238 +        /* Each frameful of table frames must be checked & mapped on demand. */
   3.239 +        if ( (i & 1023) == 0 )
   3.240 +        {
   3.241 +            mfn = pfn_to_mfn_frame_list[i/1024];
   3.242 +            if ( !check_pfn_ownership(mfn, dom) )
   3.243 +            {
   3.244 +                ERROR("Invalid frame number if pfn-to-mfn frame list");
   3.245 +                goto out;
   3.246 +            }
   3.247 +            if ( pfn_to_mfn_frame != NULL )
   3.248 +                unmap_pfn(pfn_to_mfn_frame);
   3.249 +            pfn_to_mfn_frame = map_pfn(mfn);
   3.250 +        }
   3.251 +        
   3.252 +        mfn = pfn_to_mfn_frame[i & 1023];
   3.253 +
   3.254 +        if ( !check_pfn_ownership(mfn, dom) )
   3.255 +        {
   3.256 +            ERROR("Invalid frame specified with pfn-to-mfn table");
   3.257 +            goto out;
   3.258 +        }
   3.259 +
   3.260 +        pfn_to_mfn_table[i] = mfn;
   3.261 +
   3.262 +        /* Did we map this MFN already? That would be invalid! */
   3.263 +        if ( MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
   3.264 +        {
   3.265 +            ERROR("A machine frame appears twice in pseudophys space");
   3.266 +            goto out;
   3.267 +        }
   3.268 +        
   3.269 +        mfn_to_pfn_table[mfn] = i;
   3.270 +
   3.271 +        /* Query page type by MFN, but store it by PFN. */
   3.272 +        pfn_type[i] = get_pfn_type(mfn);
   3.273 +    }
   3.274 +
   3.275 +    /* Canonicalise the suspend-record frame number. */
   3.276 +    if ( !translate_mfn_to_pfn(&ctxt.i386_ctxt.esi) )
   3.277 +    {
   3.278 +        ERROR("State record is not in range of pseudophys map");
   3.279 +        goto out;
   3.280 +    }
   3.281 +
   3.282 +    /* Canonicalise each GDT frame number. */
   3.283 +    for ( i = 0; i < ctxt.gdt_ents; i += 512 )
   3.284 +    {
   3.285 +        if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) )
   3.286 +        {
   3.287 +            ERROR("GDT frame is not in range of pseudophys map");
   3.288 +            goto out;
   3.289 +        }
   3.290 +    }
   3.291 +
   3.292 +    /* Canonicalise the page table base pointer. */
   3.293 +    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) )
   3.294 +    {
   3.295 +        ERROR("PT base is not in range of pseudophys map");
   3.296 +        goto out;
   3.297 +    }
   3.298 +    ctxt.pt_base = mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
   3.299 +
   3.300 +    /* Canonicalise the pfn-to-mfn table frame-number list. */
   3.301 +    for ( i = 0; i < srec.nr_pfns; i += 1024 )
   3.302 +    {
   3.303 +        if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
   3.304 +        {
   3.305 +            ERROR("Frame # in pfn-to-mfn frame list is not in pseudophys");
   3.306 +            goto out;
   3.307 +        }
   3.308 +    }
   3.309 +
   3.310 +    /* Now write out each data page, canonicalising page tables as we go... */
   3.311 +    for ( i = 0; i < srec.nr_pfns; i++ )
   3.312 +    {
   3.313 +        mfn = pfn_to_mfn_table[i];
   3.314 +        ppage = map_pfn(mfn);
   3.315 +        memcpy(&page, ppage, PAGE_SIZE);
   3.316 +        unmap_pfn(ppage);
   3.317 +        if ( (pfn_type[i] == L1TAB) || (pfn_type[i] == L2TAB) )
   3.318 +        {
   3.319 +            for ( i = 0; i < 1024; i++ )
   3.320 +            {
   3.321 +                if ( !(page[i] & _PAGE_PRESENT) ) continue;
   3.322 +                mfn = page[i] >> PAGE_SHIFT;
   3.323 +                if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
   3.324 +                {
   3.325 +                    ERROR("Frame number in pagetable page is invalid");
   3.326 +                    goto out;
   3.327 +                }
   3.328 +                page[i] &= PAGE_SIZE - 1;
   3.329 +                page[i] |= mfn_to_pfn_table[mfn] << PAGE_SHIFT;
   3.330 +            }
   3.331 +        }
   3.332 +        write(fd, &page, PAGE_SIZE);
   3.333 +    }
   3.334 +
   3.335 +    /* Success! */
   3.336 +    rc = 0;
   3.337 +
   3.338 + out:
   3.339 +    /* If we experience an error then kill the half-constructed domain. */
   3.340 +    if ( (rc != 0) && (dom != 0) )
   3.341 +    {
   3.342 +        op.cmd = DOM0_DESTROYDOMAIN;
   3.343 +        op.u.destroydomain.domain = dom;
   3.344 +        op.u.destroydomain.force  = 1;
   3.345 +        (void)do_dom0_op(&op);
   3.346 +    }
   3.347 +
   3.348 +    return !!rc;
   3.349 +}
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/tools/internal/xi_save_linux.c	Wed Nov 05 23:22:13 2003 +0000
     4.3 @@ -0,0 +1,365 @@
     4.4 +/******************************************************************************
     4.5 + * xi_save_linux.c
     4.6 + * 
     4.7 + * Save the state of a running Xenolinux session.
     4.8 + * 
     4.9 + * Copyright (c) 2003, K A Fraser.
    4.10 + */
    4.11 +
    4.12 +#include "dom0_defs.h"
    4.13 +#include "mem_defs.h"
    4.14 +#include <asm-xeno/suspend.h>
    4.15 +
    4.16 +static char *argv0 = "internal_save_linux";
    4.17 +
    4.18 +/* A table mapping each PFN to its current MFN. */
    4.19 +static unsigned long *pfn_to_mfn_table;
    4.20 +/* A table mapping each current MFN to its canonical PFN. */
    4.21 +static unsigned long *mfn_to_pfn_table;
    4.22 +
    4.23 +static int devmem_fd;
    4.24 +
    4.25 +static int init_pfn_mapper(void)
    4.26 +{
    4.27 +    if ( (devmem_fd = open("/dev/mem", O_RDWR)) < 0 )
    4.28 +    {
    4.29 +        PERROR("Could not open /dev/mem");
    4.30 +        return -1;
    4.31 +    }
    4.32 +    return 0;
    4.33 +}
    4.34 +
    4.35 +static void *map_pfn(unsigned long pfn)
    4.36 +{
    4.37 +    void *vaddr = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE,
    4.38 +                       MAP_SHARED, devmem_fd, pfn << PAGE_SHIFT);
    4.39 +    if ( vaddr == MAP_FAILED )
    4.40 +    {
    4.41 +        PERROR("Could not mmap a domain pfn using /dev/mem");
    4.42 +        return NULL;
    4.43 +    }
    4.44 +    return vaddr;
    4.45 +}
    4.46 +
    4.47 +static void unmap_pfn(void *vaddr)
    4.48 +{
    4.49 +    (void)munmap(vaddr, PAGE_SIZE);
    4.50 +}
    4.51 +
    4.52 +/*
    4.53 + * Returns TRUE if the given machine frame number has a unique mapping
    4.54 + * in the guest's pseudophysical map.
    4.55 + */
    4.56 +#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
    4.57 +    (((_mfn) < (1024*1024)) &&          \
    4.58 +     (pfn_to_mfn_table[mfn_to_pfn_table[_mfn]] == (_mfn)))
    4.59 +
    4.60 +/* Returns TRUE if MFN is successfully converted to a PFN. */
    4.61 +static int translate_mfn_to_pfn(unsigned long *pmfn)
    4.62 +{
    4.63 +    unsigned long mfn = *pmfn;
    4.64 +    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
    4.65 +        return 0;
    4.66 +    *pmfn = mfn_to_pfn_table[mfn];
    4.67 +    return 1;
    4.68 +}
    4.69 +
    4.70 +static int check_pfn_ownership(unsigned long mfn, unsigned int dom)
    4.71 +{
    4.72 +    dom0_op_t op;
    4.73 +    op.cmd = DOM0_GETPAGEFRAMEINFO;
    4.74 +    op.u.getpageframeinfo.pfn = mfn;
    4.75 +    if ( (do_dom0_op(&op) < 0) || (op.u.getpageframeinfo.domain != dom) )
    4.76 +        return 0;
    4.77 +    return 1;
    4.78 +}
    4.79 +
    4.80 +static unsigned int get_pfn_type(unsigned long mfn)
    4.81 +{
    4.82 +    dom0_op_t op;
    4.83 +    op.cmd = DOM0_GETPAGEFRAMEINFO;
    4.84 +    op.u.getpageframeinfo.pfn = mfn;
    4.85 +    if ( do_dom0_op(&op) < 0 )
    4.86 +    {
    4.87 +        PERROR("Unexpected failure when getting page frame info!");
    4.88 +        exit(1);
    4.89 +    }
    4.90 +    return op.u.getpageframeinfo.type;
    4.91 +}
    4.92 +
    4.93 +static int checked_write(int fd, const void *buf, size_t count)
    4.94 +{
    4.95 +    int rc;
    4.96 +    while ( ((rc = write(fd, buf, count)) == -1) && (errno = EINTR) )
    4.97 +        continue;
    4.98 +    return rc == count;
    4.99 +}
   4.100 +
   4.101 +int main(int argc, char **argv)
   4.102 +{
   4.103 +    dom0_op_t op;
   4.104 +    int rc = 1, i;
   4.105 +    unsigned long mfn, dom;
   4.106 +
   4.107 +    /* Remember if we stopped the guest, so we can restart it on exit. */
   4.108 +    int we_stopped_it = 0;
   4.109 +
   4.110 +    /* A copy of the CPU context of the guest. */
   4.111 +    full_execution_context_t ctxt;
   4.112 +
   4.113 +    /* A copy of the domain's name. */
   4.114 +    char name[MAX_DOMAIN_NAME];
   4.115 +
   4.116 +    /* A table containg the type of each PFN (/not/ MFN!). */
   4.117 +    unsigned long *pfn_type;
   4.118 +
   4.119 +    /* A temporary mapping, and a copy, of one frame of guest memory. */
   4.120 +    unsigned long *ppage, page[1024];
   4.121 +
   4.122 +    /* A temporary mapping, and a copy, of the pfn-to-mfn table frame list. */
   4.123 +    unsigned long *p_pfn_to_mfn_frame_list, pfn_to_mfn_frame_list[1024];
   4.124 +    /* A temporary mapping of one frame in the above list. */
   4.125 +    unsigned long *pfn_to_mfn_frame;
   4.126 +
   4.127 +    /* A temporary mapping, and a copy, of the guest's suspend record. */
   4.128 +    suspend_record_t *p_srec, srec;
   4.129 +
   4.130 +    /* The name and descriptor of the file that we are writing to. */
   4.131 +    char *filename;
   4.132 +    int fd;
   4.133 +
   4.134 +    if ( argv[0] != NULL ) 
   4.135 +        argv0 = argv[0];
   4.136 +
   4.137 +    if ( argc != 3 )
   4.138 +    {
   4.139 +        fprintf(stderr, "Usage: %s <domain_id> <state file>\n", argv0);
   4.140 +        return 1;
   4.141 +    }
   4.142 +
   4.143 +    dom = atoi(argv[1]);
   4.144 +    if ( dom == 0 )
   4.145 +    {
   4.146 +        ERROR("Did you really mean domain 0?");
   4.147 +        return 1;
   4.148 +    }
   4.149 +
   4.150 +    filename = argv[2];
   4.151 +    if ( (fd = open(name, O_CREAT|O_EXCL|O_RDWR)) == -1 )
   4.152 +    {
   4.153 +        PERROR("Could not open file for writing");
   4.154 +        return 1;
   4.155 +    }
   4.156 +
   4.157 +    /* Ensure that the domain exists, and that it is stopped. */
   4.158 +    for ( ; ; )
   4.159 +    {
   4.160 +        op.cmd = DOM0_GETDOMAININFO;
   4.161 +        op.u.getdomaininfo.domain = dom;
   4.162 +        if ( (do_dom0_op(&op) < 0) || (op.u.getdomaininfo.domain != dom) )
   4.163 +        {
   4.164 +            PERROR("Could not get info on domain");
   4.165 +            goto out;
   4.166 +        }
   4.167 +
   4.168 +        memcpy(&ctxt, &op.u.getdomaininfo.ctxt, sizeof(ctxt));
   4.169 +        memcpy(name, op.u.getdomaininfo.name, sizeof(name));
   4.170 +
   4.171 +        if ( op.u.getdomaininfo.state == DOMSTATE_STOPPED )
   4.172 +            break;
   4.173 +
   4.174 +        we_stopped_it = 1;
   4.175 +
   4.176 +        op.cmd = DOM0_STOPDOMAIN;
   4.177 +        op.u.stopdomain.domain = dom;
   4.178 +        (void)do_dom0_op(&op);
   4.179 +
   4.180 +        sleep(1);
   4.181 +    }
   4.182 +
   4.183 +    /* A cheesy test to see whether the domain contains valid state. */
   4.184 +    if ( ctxt.pt_base == 0 )
   4.185 +    {
   4.186 +        ERROR("Domain is not in a valid Xenolinux state");
   4.187 +        goto out;
   4.188 +    }
   4.189 +
   4.190 +    if ( init_pfn_mapper() < 0 )
   4.191 +        goto out;
   4.192 +
   4.193 +    /* Is the suspend-record MFN actually valid for this domain? */
   4.194 +    if ( !check_pfn_ownership(ctxt.i386_ctxt.esi, dom) )
   4.195 +    {
   4.196 +        ERROR("Invalid state record pointer");
   4.197 +        goto out;
   4.198 +    }
   4.199 +
   4.200 +    /* If the suspend-record MFN is okay then grab a copy of it to @srec. */
   4.201 +    p_srec = map_pfn(ctxt.i386_ctxt.esi);
   4.202 +    memcpy(&srec, p_srec, sizeof(srec));
   4.203 +    unmap_pfn(p_srec);
   4.204 +
   4.205 +    if ( srec.nr_pfns > 1024*1024 )
   4.206 +    {
   4.207 +        ERROR("Invalid state record -- pfn count out of range");
   4.208 +        goto out;
   4.209 +    }
   4.210 +
   4.211 +    if ( !check_pfn_ownership(srec.pfn_to_mfn_frame_list, dom) )
   4.212 +    {
   4.213 +        ERROR("Invalid pfn-to-mfn frame list pointer");
   4.214 +        goto out;
   4.215 +    }
   4.216 +
   4.217 +    /* Grab a copy of the pfn-to-mfn table frame list. */
   4.218 +    p_pfn_to_mfn_frame_list = map_pfn(srec.pfn_to_mfn_frame_list);
   4.219 +    memcpy(pfn_to_mfn_frame_list, p_pfn_to_mfn_frame_list, PAGE_SIZE);
   4.220 +    unmap_pfn(p_pfn_to_mfn_frame_list);
   4.221 +
   4.222 +    /* We want zeroed memory so use calloc rather than malloc. */
   4.223 +    mfn_to_pfn_table = calloc(1, 4 * 1024 * 1024);
   4.224 +    pfn_to_mfn_table = calloc(1, 4 * srec.nr_pfns);
   4.225 +    pfn_type         = calloc(1, 4 * srec.nr_pfns);
   4.226 +
   4.227 +    /*
   4.228 +     * Construct the local pfn-to-mfn and mfn-to-pfn tables. On exit from this
   4.229 +     * loop we have each MFN mapped at most once. Note that there may be MFNs
   4.230 +     * that aren't mapped at all: we detect these by MFN_IS_IN_PSEUDOPHYS_MAP.
   4.231 +     */
   4.232 +    pfn_to_mfn_frame = NULL;
   4.233 +    for ( i = 0; i < srec.nr_pfns; i++ )
   4.234 +    {
   4.235 +        /* Each frameful of table frames must be checked & mapped on demand. */
   4.236 +        if ( (i & 1023) == 0 )
   4.237 +        {
   4.238 +            mfn = pfn_to_mfn_frame_list[i/1024];
   4.239 +            if ( !check_pfn_ownership(mfn, dom) )
   4.240 +            {
   4.241 +                ERROR("Invalid frame number if pfn-to-mfn frame list");
   4.242 +                goto out;
   4.243 +            }
   4.244 +            if ( pfn_to_mfn_frame != NULL )
   4.245 +                unmap_pfn(pfn_to_mfn_frame);
   4.246 +            pfn_to_mfn_frame = map_pfn(mfn);
   4.247 +        }
   4.248 +        
   4.249 +        mfn = pfn_to_mfn_frame[i & 1023];
   4.250 +
   4.251 +        if ( !check_pfn_ownership(mfn, dom) )
   4.252 +        {
   4.253 +            ERROR("Invalid frame specified with pfn-to-mfn table");
   4.254 +            goto out;
   4.255 +        }
   4.256 +
   4.257 +        pfn_to_mfn_table[i] = mfn;
   4.258 +
   4.259 +        /* Did we map this MFN already? That would be invalid! */
   4.260 +        if ( MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
   4.261 +        {
   4.262 +            ERROR("A machine frame appears twice in pseudophys space");
   4.263 +            goto out;
   4.264 +        }
   4.265 +        
   4.266 +        mfn_to_pfn_table[mfn] = i;
   4.267 +
   4.268 +        /* Query page type by MFN, but store it by PFN. */
   4.269 +        pfn_type[i] = get_pfn_type(mfn);
   4.270 +    }
   4.271 +
   4.272 +    /* Canonicalise the suspend-record frame number. */
   4.273 +    if ( !translate_mfn_to_pfn(&ctxt.i386_ctxt.esi) )
   4.274 +    {
   4.275 +        ERROR("State record is not in range of pseudophys map");
   4.276 +        goto out;
   4.277 +    }
   4.278 +
   4.279 +    /* Canonicalise each GDT frame number. */
   4.280 +    for ( i = 0; i < ctxt.gdt_ents; i += 512 )
   4.281 +    {
   4.282 +        if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) )
   4.283 +        {
   4.284 +            ERROR("GDT frame is not in range of pseudophys map");
   4.285 +            goto out;
   4.286 +        }
   4.287 +    }
   4.288 +
   4.289 +    /* Canonicalise the page table base pointer. */
   4.290 +    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) )
   4.291 +    {
   4.292 +        ERROR("PT base is not in range of pseudophys map");
   4.293 +        goto out;
   4.294 +    }
   4.295 +    ctxt.pt_base = mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
   4.296 +
   4.297 +    /* Canonicalise the pfn-to-mfn table frame-number list. */
   4.298 +    for ( i = 0; i < srec.nr_pfns; i += 1024 )
   4.299 +    {
   4.300 +        if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
   4.301 +        {
   4.302 +            ERROR("Frame # in pfn-to-mfn frame list is not in pseudophys");
   4.303 +            goto out;
   4.304 +        }
   4.305 +    }
   4.306 +
   4.307 +    /* Start writing out the saved-domain record. */
   4.308 +    if ( !checked_write(fd, "XenoLinuxSuspend",    16) ||
   4.309 +         !checked_write(fd, name,                  sizeof(name)) ||
   4.310 +         !checked_write(fd, &srec.nr_pfns,         sizeof(unsigned long)) ||
   4.311 +         !checked_write(fd, &ctxt,                 sizeof(ctxt)) ||
   4.312 +         !checked_write(fd, pfn_to_mfn_frame_list, PAGE_SIZE) ||
   4.313 +         !checked_write(fd, pfn_type,              4 * srec.nr_pfns) )
   4.314 +    {
   4.315 +        ERROR("Error when writing to state file");
   4.316 +        goto out;
   4.317 +    }
   4.318 +
   4.319 +    /* Now write out each data page, canonicalising page tables as we go... */
   4.320 +    for ( i = 0; i < srec.nr_pfns; i++ )
   4.321 +    {
   4.322 +        mfn = pfn_to_mfn_table[i];
   4.323 +
   4.324 +        ppage = map_pfn(mfn);
   4.325 +        memcpy(&page, ppage, PAGE_SIZE);
   4.326 +        unmap_pfn(ppage);
   4.327 +
   4.328 +        if ( (pfn_type[i] == L1TAB) || (pfn_type[i] == L2TAB) )
   4.329 +        {
   4.330 +            for ( i = 0; i < 1024; i++ )
   4.331 +            {
   4.332 +                if ( !(page[i] & _PAGE_PRESENT) ) continue;
   4.333 +                mfn = page[i] >> PAGE_SHIFT;
   4.334 +                if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
   4.335 +                {
   4.336 +                    ERROR("Frame number in pagetable page is invalid");
   4.337 +                    goto out;
   4.338 +                }
   4.339 +                page[i] &= PAGE_SIZE - 1;
   4.340 +                page[i] |= mfn_to_pfn_table[mfn] << PAGE_SHIFT;
   4.341 +            }
   4.342 +        }
   4.343 +
   4.344 +        if ( !checked_write(fd, &page, PAGE_SIZE) )
   4.345 +        {
   4.346 +            ERROR("Error when writing to state file");
   4.347 +            goto out;
   4.348 +        }
   4.349 +    }
   4.350 +
   4.351 +    /* Success! */
   4.352 +    rc = 0;
   4.353 +
   4.354 + out:
   4.355 +    /* Restart the domain if we had to stop it to save its state. */
   4.356 +    if ( we_stopped_it )
   4.357 +    {
   4.358 +        op.cmd = DOM0_STARTDOMAIN;
   4.359 +        op.u.startdomain.domain = dom;
   4.360 +        (void)do_dom0_op(&op);
   4.361 +    }
   4.362 +
   4.363 +    /* On error, make sure the file is deleted. */
   4.364 +    if ( rc != 0 )
   4.365 +        unlink(filename);
   4.366 +    
   4.367 +    return !!rc;
   4.368 +}
     5.1 --- a/xen/common/dom0_ops.c	Wed Nov 05 15:30:38 2003 +0000
     5.2 +++ b/xen/common/dom0_ops.c	Wed Nov 05 23:22:13 2003 +0000
     5.3 @@ -330,24 +330,34 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
     5.4  
     5.5      case DOM0_GETPAGEFRAMEINFO:
     5.6      {
     5.7 -        struct pfn_info *page = frame_table + op.u.getpageframeinfo.pfn;
     5.8 +        struct pfn_info *page;
     5.9 +        unsigned long pfn = op.u.getpageframeinfo.pfn;
    5.10          
    5.11 -        op.u.getpageframeinfo.domain = page->flags & PG_domain_mask;
    5.12 -        op.u.getpageframeinfo.type   = NONE;
    5.13 -        if ( page->type_count & REFCNT_PIN_BIT )
    5.14 +        if ( pfn >= max_page )
    5.15          {
    5.16 -            switch ( page->flags & PG_type_mask )
    5.17 +            ret = -EINVAL;
    5.18 +        }
    5.19 +        else
    5.20 +        {
    5.21 +            page = frame_table + pfn;
    5.22 +            
    5.23 +            op.u.getpageframeinfo.domain = page->flags & PG_domain_mask;
    5.24 +            op.u.getpageframeinfo.type   = NONE;
    5.25 +            if ( page->type_count & REFCNT_PIN_BIT )
    5.26              {
    5.27 -            case PGT_l1_page_table:
    5.28 -                op.u.getpageframeinfo.type = L1TAB;
    5.29 -                break;
    5.30 -            case PGT_l2_page_table:
    5.31 -                op.u.getpageframeinfo.type = L2TAB;
    5.32 -                break;
    5.33 +                switch ( page->flags & PG_type_mask )
    5.34 +                {
    5.35 +                case PGT_l1_page_table:
    5.36 +                    op.u.getpageframeinfo.type = L1TAB;
    5.37 +                    break;
    5.38 +                case PGT_l2_page_table:
    5.39 +                    op.u.getpageframeinfo.type = L2TAB;
    5.40 +                    break;
    5.41 +                }
    5.42              }
    5.43 +
    5.44 +            copy_to_user(u_dom0_op, &op, sizeof(op));
    5.45          }
    5.46 -
    5.47 -        copy_to_user(u_dom0_op, &op, sizeof(op));
    5.48      }
    5.49      break;
    5.50