ia64/xen-unstable

changeset 1716:15cbfb844361

bitkeeper revision 1.1048 (40ec4798TeT9RzPiK7i4naYPCCSQog)

Merge ssh://xenbk@gandalf//var/bk/djm/xeno-unstable-common.bk
into kirby.fc.hp.com:/home/djm/src/xen/xeno-unstable-common.bk
author djm@kirby.fc.hp.com
date Wed Jul 07 18:57:28 2004 +0000 (2004-07-07)
parents e47071c24e23 376d3fcd1ceb
children bae23a1177c6
files .rootkeys xen/arch/x86/domain.c xen/arch/x86/memory.c xen/common/domain.c xen/common/memory.c xen/include/asm-x86/config.h xen/include/asm-x86/mm.h xen/include/xen/mm.h xen/include/xen/sched.h
line diff
     1.1 --- a/.rootkeys	Wed Jul 07 18:56:39 2004 +0000
     1.2 +++ b/.rootkeys	Wed Jul 07 18:57:28 2004 +0000
     1.3 @@ -323,6 +323,7 @@ 3ddb79bcCAq6IpdkHueChoVTfXqEQQ xen/arch/
     1.4  3ddb79bcBit4xJXbwtX0kb1hh2uO1Q xen/arch/x86/idle0_task.c
     1.5  3ddb79bcKIkRR0kqWaJhe5VUDkMdxg xen/arch/x86/io_apic.c
     1.6  3ddb79bdqfIcjkz_h9Hvtp8Tk_19Zw xen/arch/x86/irq.c
     1.7 +40ec29ffuOa1ZvmJHzFKyZn4k_RcXg xen/arch/x86/memory.c
     1.8  3ddb79bdS4UeWWXDH-FaBKqcpMFcnw xen/arch/x86/mpparse.c
     1.9  3f12cff65EV3qOG2j37Qm0ShgvXGRw xen/arch/x86/nmi.c
    1.10  3ddb79bdHe6_Uij4-glW91vInNtBYQ xen/arch/x86/pci-irq.c
    1.11 @@ -448,6 +449,7 @@ 3ddb79c2TKeScYHQZreTdHqYNLbehQ xen/inclu
    1.12  3ddb79c2L7rTlFzazOLW1XuSZefpFw xen/include/asm-x86/irq.h
    1.13  404f1b93OjLO4bFfBXYNaJdIqlNz-Q xen/include/asm-x86/ldt.h
    1.14  3ddb79c3I98vWcQR8xEo34JMJ4Ahyw xen/include/asm-x86/mc146818rtc.h
    1.15 +40ec25fd7cSvbP7Biw91zaU_g0xsEQ xen/include/asm-x86/mm.h
    1.16  3ddb79c3n_UbPuxlkNxvvLycClIkxA xen/include/asm-x86/mpspec.h
    1.17  3ddb79c2wa0dA_LGigxOelSGbJ284Q xen/include/asm-x86/msr.h
    1.18  3ddb79c3xjYnrv5t3VqYlR4tNEOl4Q xen/include/asm-x86/page.h
     2.1 --- a/xen/arch/x86/domain.c	Wed Jul 07 18:56:39 2004 +0000
     2.2 +++ b/xen/arch/x86/domain.c	Wed Jul 07 18:57:28 2004 +0000
     2.3 @@ -24,6 +24,28 @@
     2.4  #include <xen/irq.h>
     2.5  #include <xen/event.h>
     2.6  #include <xen/shadow.h>
     2.7 +#include <xen/console.h>
     2.8 +
     2.9 +#include <xen/elf.h>
    2.10 +
    2.11 +extern int loadelfimage(char *);
    2.12 +extern int readelfimage_base_and_size(char *, unsigned long,
    2.13 +                  unsigned long *, unsigned long *, unsigned long *);
    2.14 +
    2.15 +#if !defined(CONFIG_X86_64BITMODE)
    2.16 +/* No ring-3 access in initial page tables. */
    2.17 +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
    2.18 +#else
    2.19 +/* Allow ring-3 access in long mode as guest cannot use ring 1. */
    2.20 +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    2.21 +#endif
    2.22 +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    2.23 +#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    2.24 +#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    2.25 +
    2.26 +#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
    2.27 +#define round_pgdown(_p)  ((_p)&PAGE_MASK)
    2.28 +
    2.29  
    2.30  int hlt_counter;
    2.31  
    2.32 @@ -422,3 +444,375 @@ long do_iopl(domid_t domain, unsigned in
    2.33  }
    2.34  
    2.35  #endif
    2.36 +
    2.37 +void domain_relinquish_memory(struct domain *d)
    2.38 +{
    2.39 +    struct list_head *ent, *tmp;
    2.40 +    struct pfn_info  *page;
    2.41 +    unsigned long     x, y;
    2.42 +
    2.43 +    /*
    2.44 +     * If we're executing the idle task then we may still be running over the 
    2.45 +     * dead domain's page tables. We'd better fix that before freeing them!
    2.46 +     */
    2.47 +    if ( is_idle_task(current) )
    2.48 +        write_ptbase(&current->mm);
    2.49 +
    2.50 +    /* Exit shadow mode before deconstructing final guest page table. */
    2.51 +    if ( shadow_mode(d) )
    2.52 +        shadow_mode_disable(d);
    2.53 +
    2.54 +    /* Drop the in-use reference to the page-table base. */
    2.55 +    if ( pagetable_val(d->mm.pagetable) != 0 )
    2.56 +        put_page_and_type(&frame_table[pagetable_val(d->mm.pagetable) >>
    2.57 +                                      PAGE_SHIFT]);
    2.58 +
    2.59 +    /* Relinquish Xen-heap pages. Currently this can only be 'shared_info'. */
    2.60 +    page = virt_to_page(d->shared_info);
    2.61 +    if ( test_and_clear_bit(_PGC_allocated, &page->count_and_flags) )
    2.62 +        put_page(page);
    2.63 +
    2.64 +    /* Relinquish all pages on the domain's allocation list. */
    2.65 +    spin_lock_recursive(&d->page_alloc_lock); /* may enter free_domain_page */
    2.66 +    list_for_each_safe ( ent, tmp, &d->page_list )
    2.67 +    {
    2.68 +        page = list_entry(ent, struct pfn_info, list);
    2.69 +
    2.70 +        if ( test_and_clear_bit(_PGC_guest_pinned, &page->count_and_flags) )
    2.71 +            put_page_and_type(page);
    2.72 +
    2.73 +        if ( test_and_clear_bit(_PGC_allocated, &page->count_and_flags) )
    2.74 +            put_page(page);
    2.75 +
    2.76 +        /*
    2.77 +         * Forcibly invalidate base page tables at this point to break circular
    2.78 +         * 'linear page table' references. This is okay because MMU structures
    2.79 +         * are not shared across domains and this domain is now dead. Thus base
    2.80 +         * tables are not in use so a non-zero count means circular reference.
    2.81 +         */
    2.82 +        y = page->type_and_flags;
    2.83 +        do {
    2.84 +            x = y;
    2.85 +            if ( likely((x & (PGT_type_mask|PGT_validated)) != 
    2.86 +                        (PGT_base_page_table|PGT_validated)) )
    2.87 +                break;
    2.88 +            y = cmpxchg(&page->type_and_flags, x, x & ~PGT_validated);
    2.89 +            if ( likely(y == x) )
    2.90 +                free_page_type(page, PGT_base_page_table);
    2.91 +        }
    2.92 +        while ( unlikely(y != x) );
    2.93 +    }
    2.94 +    spin_unlock_recursive(&d->page_alloc_lock);
    2.95 +}
    2.96 +
    2.97 +
    2.98 +int construct_dom0(struct domain *p, 
    2.99 +                   unsigned long alloc_start,
   2.100 +                   unsigned long alloc_end,
   2.101 +                   char *image_start, unsigned long image_len, 
   2.102 +                   char *initrd_start, unsigned long initrd_len,
   2.103 +                   char *cmdline)
   2.104 +{
   2.105 +    char *dst;
   2.106 +    int i, rc;
   2.107 +    unsigned long pfn, mfn;
   2.108 +    unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT;
   2.109 +    unsigned long nr_pt_pages;
   2.110 +    unsigned long count;
   2.111 +    l2_pgentry_t *l2tab, *l2start;
   2.112 +    l1_pgentry_t *l1tab = NULL, *l1start = NULL;
   2.113 +    struct pfn_info *page = NULL;
   2.114 +    start_info_t *si;
   2.115 +
   2.116 +    /*
   2.117 +     * This fully describes the memory layout of the initial domain. All 
   2.118 +     * *_start address are page-aligned, except v_start (and v_end) which are 
   2.119 +     * superpage-aligned.
   2.120 +     */
   2.121 +    unsigned long v_start;
   2.122 +    unsigned long vkern_start;
   2.123 +    unsigned long vkern_entry;
   2.124 +    unsigned long vkern_end;
   2.125 +    unsigned long vinitrd_start;
   2.126 +    unsigned long vinitrd_end;
   2.127 +    unsigned long vphysmap_start;
   2.128 +    unsigned long vphysmap_end;
   2.129 +    unsigned long vstartinfo_start;
   2.130 +    unsigned long vstartinfo_end;
   2.131 +    unsigned long vstack_start;
   2.132 +    unsigned long vstack_end;
   2.133 +    unsigned long vpt_start;
   2.134 +    unsigned long vpt_end;
   2.135 +    unsigned long v_end;
   2.136 +
   2.137 +    /* Machine address of next candidate page-table page. */
   2.138 +    unsigned long mpt_alloc;
   2.139 +
   2.140 +    extern void physdev_init_dom0(struct domain *);
   2.141 +
   2.142 +    /* Sanity! */
   2.143 +    if ( p->domain != 0 ) 
   2.144 +        BUG();
   2.145 +    if ( test_bit(DF_CONSTRUCTED, &p->flags) ) 
   2.146 +        BUG();
   2.147 +
   2.148 +    printk("*** LOADING DOMAIN 0 ***\n");
   2.149 +
   2.150 +    /*
   2.151 +     * This is all a bit grim. We've moved the modules to the "safe" physical 
   2.152 +     * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this 
   2.153 +     * routine we're going to copy it down into the region that's actually 
   2.154 +     * been allocated to domain 0. This is highly likely to be overlapping, so 
   2.155 +     * we use a forward copy.
   2.156 +     * 
   2.157 +     * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with 
   2.158 +     * 4GB and lots of network/disk cards that allocate loads of buffers. 
   2.159 +     * We'll have to revisit this if we ever support PAE (64GB).
   2.160 +     */
   2.161 +
   2.162 +    rc = readelfimage_base_and_size(image_start, image_len,
   2.163 +                                    &vkern_start, &vkern_end, &vkern_entry);
   2.164 +    if ( rc != 0 )
   2.165 +        return rc;
   2.166 +
   2.167 +    /*
   2.168 +     * Why do we need this? The number of page-table frames depends on the 
   2.169 +     * size of the bootstrap address space. But the size of the address space 
   2.170 +     * depends on the number of page-table frames (since each one is mapped 
   2.171 +     * read-only). We have a pair of simultaneous equations in two unknowns, 
   2.172 +     * which we solve by exhaustive search.
   2.173 +     */
   2.174 +    for ( nr_pt_pages = 2; ; nr_pt_pages++ )
   2.175 +    {
   2.176 +        v_start          = vkern_start & ~((1<<22)-1);
   2.177 +        vinitrd_start    = round_pgup(vkern_end);
   2.178 +        vinitrd_end      = vinitrd_start + initrd_len;
   2.179 +        vphysmap_start   = round_pgup(vinitrd_end);
   2.180 +        vphysmap_end     = vphysmap_start + (nr_pages * sizeof(unsigned long));
   2.181 +        vpt_start        = round_pgup(vphysmap_end);
   2.182 +        vpt_end          = vpt_start + (nr_pt_pages * PAGE_SIZE);
   2.183 +        vstartinfo_start = vpt_end;
   2.184 +        vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
   2.185 +        vstack_start     = vstartinfo_end;
   2.186 +        vstack_end       = vstack_start + PAGE_SIZE;
   2.187 +        v_end            = (vstack_end + (1<<22)-1) & ~((1<<22)-1);
   2.188 +        if ( (v_end - vstack_end) < (512 << 10) )
   2.189 +            v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */
   2.190 +        if ( (((v_end - v_start) >> L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
   2.191 +            break;
   2.192 +    }
   2.193 +
   2.194 +    if ( (v_end - v_start) > (nr_pages * PAGE_SIZE) )
   2.195 +    {
   2.196 +        printk("Initial guest OS requires too much space\n"
   2.197 +               "(%luMB is greater than %luMB limit)\n",
   2.198 +               (v_end-v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
   2.199 +        return -ENOMEM;
   2.200 +    }
   2.201 +
   2.202 +    printk("PHYSICAL MEMORY ARRANGEMENT:\n"
   2.203 +           " Kernel image:  %p->%p\n"
   2.204 +           " Initrd image:  %p->%p\n"
   2.205 +           " Dom0 alloc.:   %08lx->%08lx\n",
   2.206 +           image_start, image_start + image_len,
   2.207 +           initrd_start, initrd_start + initrd_len,
   2.208 +           alloc_start, alloc_end);
   2.209 +    printk("VIRTUAL MEMORY ARRANGEMENT:\n"
   2.210 +           " Loaded kernel: %08lx->%08lx\n"
   2.211 +           " Init. ramdisk: %08lx->%08lx\n"
   2.212 +           " Phys-Mach map: %08lx->%08lx\n"
   2.213 +           " Page tables:   %08lx->%08lx\n"
   2.214 +           " Start info:    %08lx->%08lx\n"
   2.215 +           " Boot stack:    %08lx->%08lx\n"
   2.216 +           " TOTAL:         %08lx->%08lx\n",
   2.217 +           vkern_start, vkern_end, 
   2.218 +           vinitrd_start, vinitrd_end,
   2.219 +           vphysmap_start, vphysmap_end,
   2.220 +           vpt_start, vpt_end,
   2.221 +           vstartinfo_start, vstartinfo_end,
   2.222 +           vstack_start, vstack_end,
   2.223 +           v_start, v_end);
   2.224 +    printk(" ENTRY ADDRESS: %08lx\n", vkern_entry);
   2.225 +
   2.226 +    /*
   2.227 +     * Protect the lowest 1GB of memory. We use a temporary mapping there
   2.228 +     * from which we copy the kernel and ramdisk images.
   2.229 +     */
   2.230 +    if ( v_start < (1<<30) )
   2.231 +    {
   2.232 +        printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
   2.233 +        return -EINVAL;
   2.234 +    }
   2.235 +
   2.236 +    /* Construct a frame-allocation list for the initial domain. */
   2.237 +    for ( mfn = (alloc_start>>PAGE_SHIFT); 
   2.238 +          mfn < (alloc_end>>PAGE_SHIFT); 
   2.239 +          mfn++ )
   2.240 +    {
   2.241 +        page = &frame_table[mfn];
   2.242 +        page->u.domain        = p;
   2.243 +        page->type_and_flags  = 0;
   2.244 +        page->count_and_flags = PGC_allocated | 1;
   2.245 +        list_add_tail(&page->list, &p->page_list);
   2.246 +        p->tot_pages++; p->max_pages++;
   2.247 +    }
   2.248 +
   2.249 +    mpt_alloc = (vpt_start - v_start) + alloc_start;
   2.250 +
   2.251 +    SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
   2.252 +    SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS);
   2.253 +
   2.254 +    /*
   2.255 +     * We're basically forcing default RPLs to 1, so that our "what privilege
   2.256 +     * level are we returning to?" logic works.
   2.257 +     */
   2.258 +    p->failsafe_selector = FLAT_GUESTOS_CS;
   2.259 +    p->event_selector    = FLAT_GUESTOS_CS;
   2.260 +    p->thread.guestos_ss = FLAT_GUESTOS_DS;
   2.261 +    for ( i = 0; i < 256; i++ ) 
   2.262 +        p->thread.traps[i].cs = FLAT_GUESTOS_CS;
   2.263 +
   2.264 +    /* WARNING: The new domain must have its 'processor' field filled in! */
   2.265 +    l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
   2.266 +    memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
   2.267 +    l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
   2.268 +        mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
   2.269 +    l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
   2.270 +        mk_l2_pgentry(__pa(p->mm.perdomain_pt) | __PAGE_HYPERVISOR);
   2.271 +    p->mm.pagetable = mk_pagetable((unsigned long)l2start);
   2.272 +
   2.273 +    l2tab += l2_table_offset(v_start);
   2.274 +    mfn = alloc_start >> PAGE_SHIFT;
   2.275 +    for ( count = 0; count < ((v_end-v_start)>>PAGE_SHIFT); count++ )
   2.276 +    {
   2.277 +        if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
   2.278 +        {
   2.279 +            l1start = l1tab = (l1_pgentry_t *)mpt_alloc; 
   2.280 +            mpt_alloc += PAGE_SIZE;
   2.281 +            *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
   2.282 +            clear_page(l1tab);
   2.283 +        }
   2.284 +        *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
   2.285 +        
   2.286 +        page = &frame_table[mfn];
   2.287 +        set_bit(_PGC_tlb_flush_on_type_change, &page->count_and_flags);
   2.288 +        if ( !get_page_and_type(page, p, PGT_writeable_page) )
   2.289 +            BUG();
   2.290 +
   2.291 +        mfn++;
   2.292 +    }
   2.293 +
   2.294 +    /* Pages that are part of page tables must be read only. */
   2.295 +    l2tab = l2start + l2_table_offset(vpt_start);
   2.296 +    l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
   2.297 +    l1tab += l1_table_offset(vpt_start);
   2.298 +    l2tab++;
   2.299 +    for ( count = 0; count < nr_pt_pages; count++ ) 
   2.300 +    {
   2.301 +        *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
   2.302 +        page = &frame_table[l1_pgentry_to_pagenr(*l1tab)];
   2.303 +        if ( count == 0 )
   2.304 +        {
   2.305 +            page->type_and_flags &= ~PGT_type_mask;
   2.306 +            page->type_and_flags |= PGT_l2_page_table;
   2.307 +            get_page(page, p); /* an extra ref because of readable mapping */
   2.308 +            /* Get another ref to L2 page so that it can be pinned. */
   2.309 +            if ( !get_page_and_type(page, p, PGT_l2_page_table) )
   2.310 +                BUG();
   2.311 +            set_bit(_PGC_guest_pinned, &page->count_and_flags);
   2.312 +        }
   2.313 +        else
   2.314 +        {
   2.315 +            page->type_and_flags &= ~PGT_type_mask;
   2.316 +            page->type_and_flags |= PGT_l1_page_table;
   2.317 +            get_page(page, p); /* an extra ref because of readable mapping */
   2.318 +        }
   2.319 +        l1tab++;
   2.320 +        if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) )
   2.321 +            l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
   2.322 +    }
   2.323 +
   2.324 +    /* Set up shared-info area. */
   2.325 +    update_dom_time(p->shared_info);
   2.326 +    p->shared_info->domain_time = 0;
   2.327 +    /* Mask all upcalls... */
   2.328 +    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
   2.329 +        p->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
   2.330 +
   2.331 +    /* Install the new page tables. */
   2.332 +    __cli();
   2.333 +    write_ptbase(&p->mm);
   2.334 +
   2.335 +    /* Copy the OS image. */
   2.336 +    (void)loadelfimage(image_start);
   2.337 +
   2.338 +    /* Copy the initial ramdisk. */
   2.339 +    if ( initrd_len != 0 )
   2.340 +        memcpy((void *)vinitrd_start, initrd_start, initrd_len);
   2.341 +    
   2.342 +    /* Set up start info area. */
   2.343 +    si = (start_info_t *)vstartinfo_start;
   2.344 +    memset(si, 0, PAGE_SIZE);
   2.345 +    si->nr_pages     = p->tot_pages;
   2.346 +    si->shared_info  = virt_to_phys(p->shared_info);
   2.347 +    si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
   2.348 +    si->pt_base      = vpt_start;
   2.349 +    si->nr_pt_frames = nr_pt_pages;
   2.350 +    si->mfn_list     = vphysmap_start;
   2.351 +
   2.352 +    /* Write the phys->machine and machine->phys table entries. */
   2.353 +    for ( mfn = (alloc_start>>PAGE_SHIFT); 
   2.354 +          mfn < (alloc_end>>PAGE_SHIFT); 
   2.355 +          mfn++ )
   2.356 +    {
   2.357 +        pfn = mfn - (alloc_start>>PAGE_SHIFT);
   2.358 +        ((unsigned long *)vphysmap_start)[pfn] = mfn;
   2.359 +        machine_to_phys_mapping[mfn] = pfn;
   2.360 +    }
   2.361 +
   2.362 +    if ( initrd_len != 0 )
   2.363 +    {
   2.364 +        si->mod_start = vinitrd_start;
   2.365 +        si->mod_len   = initrd_len;
   2.366 +        printk("Initrd len 0x%lx, start at 0x%08lx\n",
   2.367 +               si->mod_len, si->mod_start);
   2.368 +    }
   2.369 +
   2.370 +    dst = si->cmd_line;
   2.371 +    if ( cmdline != NULL )
   2.372 +    {
   2.373 +        for ( i = 0; i < 255; i++ )
   2.374 +        {
   2.375 +            if ( cmdline[i] == '\0' )
   2.376 +                break;
   2.377 +            *dst++ = cmdline[i];
   2.378 +        }
   2.379 +    }
   2.380 +    *dst = '\0';
   2.381 +
   2.382 +    /* Reinstate the caller's page tables. */
   2.383 +    write_ptbase(&current->mm);
   2.384 +    __sti();
   2.385 +
   2.386 +    /* Destroy low mappings - they were only for our convenience. */
   2.387 +    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
   2.388 +        if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
   2.389 +            l2start[i] = mk_l2_pgentry(0);
   2.390 +    zap_low_mappings(); /* Do the same for the idle page tables. */
   2.391 +    
   2.392 +    /* Give up the VGA console if DOM0 is configured to grab it. */
   2.393 +    console_endboot(strstr(cmdline, "tty0") != NULL);
   2.394 +
   2.395 +    /* DOM0 gets access to everything. */
   2.396 +    physdev_init_dom0(p);
   2.397 +
   2.398 +    set_bit(DF_CONSTRUCTED, &p->flags);
   2.399 +
   2.400 +#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */
   2.401 +    shadow_mode_enable(&p->mm, SHM_test); 
   2.402 +#endif
   2.403 +
   2.404 +    new_thread(p, vkern_entry, vstack_end, vstartinfo_start);
   2.405 +
   2.406 +    return 0;
   2.407 +}
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/xen/arch/x86/memory.c	Wed Jul 07 18:57:28 2004 +0000
     3.3 @@ -0,0 +1,1201 @@
     3.4 +/******************************************************************************
     3.5 + * arch/x86/memory.c
     3.6 + * 
     3.7 + * Copyright (c) 2002-2004 K A Fraser
     3.8 + * 
     3.9 + * This program is free software; you can redistribute it and/or modify
    3.10 + * it under the terms of the GNU General Public License as published by
    3.11 + * the Free Software Foundation; either version 2 of the License, or
    3.12 + * (at your option) any later version.
    3.13 + * 
    3.14 + * This program is distributed in the hope that it will be useful,
    3.15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    3.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    3.17 + * GNU General Public License for more details.
    3.18 + * 
    3.19 + * You should have received a copy of the GNU General Public License
    3.20 + * along with this program; if not, write to the Free Software
    3.21 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    3.22 + */
    3.23 +
    3.24 +/*
    3.25 + * A description of the x86 page table API:
    3.26 + * 
    3.27 + * Domains trap to do_mmu_update with a list of update requests.
    3.28 + * This is a list of (ptr, val) pairs, where the requested operation
    3.29 + * is *ptr = val.
    3.30 + * 
    3.31 + * Reference counting of pages:
    3.32 + * ----------------------------
    3.33 + * Each page has two refcounts: tot_count and type_count.
    3.34 + * 
    3.35 + * TOT_COUNT is the obvious reference count. It counts all uses of a
    3.36 + * physical page frame by a domain, including uses as a page directory,
    3.37 + * a page table, or simple mappings via a PTE. This count prevents a
    3.38 + * domain from releasing a frame back to the free pool when it still holds
    3.39 + * a reference to it.
    3.40 + * 
    3.41 + * TYPE_COUNT is more subtle. A frame can be put to one of three
    3.42 + * mutually-exclusive uses: it might be used as a page directory, or a
    3.43 + * page table, or it may be mapped writeable by the domain [of course, a
    3.44 + * frame may not be used in any of these three ways!].
    3.45 + * So, type_count is a count of the number of times a frame is being 
    3.46 + * referred to in its current incarnation. Therefore, a page can only
    3.47 + * change its type when its type count is zero.
    3.48 + * 
    3.49 + * Pinning the page type:
    3.50 + * ----------------------
    3.51 + * The type of a page can be pinned/unpinned with the commands
    3.52 + * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
    3.53 + * pinning is not reference counted, so it can't be nested).
    3.54 + * This is useful to prevent a page's type count falling to zero, at which
    3.55 + * point safety checks would need to be carried out next time the count
    3.56 + * is increased again.
    3.57 + * 
    3.58 + * A further note on writeable page mappings:
    3.59 + * ------------------------------------------
    3.60 + * For simplicity, the count of writeable mappings for a page may not
    3.61 + * correspond to reality. The 'writeable count' is incremented for every
    3.62 + * PTE which maps the page with the _PAGE_RW flag set. However, for
    3.63 + * write access to be possible the page directory entry must also have
    3.64 + * its _PAGE_RW bit set. We do not check this as it complicates the 
    3.65 + * reference counting considerably [consider the case of multiple
    3.66 + * directory entries referencing a single page table, some with the RW
    3.67 + * bit set, others not -- it starts getting a bit messy].
    3.68 + * In normal use, this simplification shouldn't be a problem.
    3.69 + * However, the logic can be added if required.
    3.70 + * 
    3.71 + * One more note on read-only page mappings:
    3.72 + * -----------------------------------------
    3.73 + * We want domains to be able to map pages for read-only access. The
    3.74 + * main reason is that page tables and directories should be readable
    3.75 + * by a domain, but it would not be safe for them to be writeable.
    3.76 + * However, domains have free access to rings 1 & 2 of the Intel
    3.77 + * privilege model. In terms of page protection, these are considered
    3.78 + * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
    3.79 + * read-only restrictions are respected in supervisor mode -- if the 
    3.80 + * bit is clear then any mapped page is writeable.
    3.81 + * 
    3.82 + * We get round this by always setting the WP bit and disallowing 
    3.83 + * updates to it. This is very unlikely to cause a problem for guest
    3.84 + * OS's, which will generally use the WP bit to simplify copy-on-write
    3.85 + * implementation (in that case, OS wants a fault when it writes to
    3.86 + * an application-supplied buffer).
    3.87 + */
    3.88 +
    3.89 +#include <xen/config.h>
    3.90 +#include <xen/init.h>
    3.91 +#include <xen/lib.h>
    3.92 +#include <xen/mm.h>
    3.93 +#include <xen/sched.h>
    3.94 +#include <xen/errno.h>
    3.95 +#include <xen/perfc.h>
    3.96 +#include <xen/irq.h>
    3.97 +#include <xen/shadow.h>
    3.98 +#include <asm/page.h>
    3.99 +#include <asm/flushtlb.h>
   3.100 +#include <asm/io.h>
   3.101 +#include <asm/uaccess.h>
   3.102 +#include <asm/domain_page.h>
   3.103 +#include <asm/ldt.h>
   3.104 +
   3.105 +#ifndef NDEBUG
   3.106 +#define MEM_LOG(_f, _a...)                           \
   3.107 +  printk("DOM%u: (file=memory.c, line=%d) " _f "\n", \
   3.108 +         current->domain , __LINE__ , ## _a )
   3.109 +#else
   3.110 +#define MEM_LOG(_f, _a...) ((void)0)
   3.111 +#endif
   3.112 +
   3.113 +static int alloc_l2_table(struct pfn_info *page);
   3.114 +static int alloc_l1_table(struct pfn_info *page);
   3.115 +static int get_page_from_pagenr(unsigned long page_nr, struct domain *d);
   3.116 +static int get_page_and_type_from_pagenr(unsigned long page_nr, 
   3.117 +                                         u32 type,
   3.118 +                                         struct domain *d);
   3.119 +
   3.120 +static void free_l2_table(struct pfn_info *page);
   3.121 +static void free_l1_table(struct pfn_info *page);
   3.122 +
   3.123 +static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long);
   3.124 +static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t);
   3.125 +
   3.126 +/* Used to defer flushing of memory structures. */
   3.127 +static struct {
   3.128 +#define DOP_FLUSH_TLB   (1<<0) /* Flush the TLB.                 */
   3.129 +#define DOP_RELOAD_LDT  (1<<1) /* Reload the LDT shadow mapping. */
   3.130 +    unsigned long       deferred_ops;
   3.131 +    unsigned long       cr0;
   3.132 +    /* General-Purpose Subject, Page-Table Subject */
   3.133 +    struct domain *gps, *pts;
   3.134 +} percpu_info[NR_CPUS] __cacheline_aligned;
   3.135 +
   3.136 +/* Determine the current General-Purpose Subject or Page-Table Subject. */
   3.137 +#define PTS (percpu_info[smp_processor_id()].pts ? : current)
   3.138 +#define GPS (percpu_info[smp_processor_id()].gps ? : current)
   3.139 +
   3.140 +
   3.141 +void init_percpu_info(void)
   3.142 +{
   3.143 +    memset(percpu_info, 0, sizeof(percpu_info));
   3.144 +}
   3.145 +
   3.146 +static void __invalidate_shadow_ldt(struct domain *d)
   3.147 +{
   3.148 +    int i;
   3.149 +    unsigned long pfn;
   3.150 +    struct pfn_info *page;
   3.151 +    
   3.152 +    d->mm.shadow_ldt_mapcnt = 0;
   3.153 +
   3.154 +    for ( i = 16; i < 32; i++ )
   3.155 +    {
   3.156 +        pfn = l1_pgentry_to_pagenr(d->mm.perdomain_pt[i]);
   3.157 +        if ( pfn == 0 ) continue;
   3.158 +        d->mm.perdomain_pt[i] = mk_l1_pgentry(0);
   3.159 +        page = &frame_table[pfn];
   3.160 +        ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
   3.161 +        ASSERT_PAGE_IS_DOMAIN(page, d);
   3.162 +        put_page_and_type(page);
   3.163 +    }
   3.164 +
   3.165 +    /* Dispose of the (now possibly invalid) mappings from the TLB.  */
   3.166 +    percpu_info[d->processor].deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT;
   3.167 +}
   3.168 +
   3.169 +
   3.170 +static inline void invalidate_shadow_ldt(void)
   3.171 +{
   3.172 +    struct domain *d = current;
   3.173 +    if ( d->mm.shadow_ldt_mapcnt != 0 )
   3.174 +        __invalidate_shadow_ldt(d);
   3.175 +}
   3.176 +
   3.177 +
   3.178 +int alloc_segdesc_page(struct pfn_info *page)
   3.179 +{
   3.180 +    unsigned long *descs = map_domain_mem((page-frame_table) << PAGE_SHIFT);
   3.181 +    int i;
   3.182 +
   3.183 +    for ( i = 0; i < 512; i++ )
   3.184 +        if ( unlikely(!check_descriptor(descs[i*2], descs[i*2+1])) )
   3.185 +            goto fail;
   3.186 +
   3.187 +    unmap_domain_mem(descs);
   3.188 +    return 1;
   3.189 +
   3.190 + fail:
   3.191 +    unmap_domain_mem(descs);
   3.192 +    return 0;
   3.193 +}
   3.194 +
   3.195 +
   3.196 +/* Map shadow page at offset @off. */
   3.197 +int map_ldt_shadow_page(unsigned int off)
   3.198 +{
   3.199 +    struct domain *d = current;
   3.200 +    unsigned long l1e;
   3.201 +
   3.202 +    if ( unlikely(in_irq()) )
   3.203 +        BUG();
   3.204 +
   3.205 +    __get_user(l1e, (unsigned long *)&linear_pg_table[(d->mm.ldt_base >> 
   3.206 +                                                       PAGE_SHIFT) + off]);
   3.207 +
   3.208 +    if ( unlikely(!(l1e & _PAGE_PRESENT)) ||
   3.209 +         unlikely(!get_page_and_type(&frame_table[l1e >> PAGE_SHIFT], 
   3.210 +                                     d, PGT_ldt_page)) )
   3.211 +        return 0;
   3.212 +
   3.213 +    d->mm.perdomain_pt[off + 16] = mk_l1_pgentry(l1e | _PAGE_RW);
   3.214 +    d->mm.shadow_ldt_mapcnt++;
   3.215 +
   3.216 +    return 1;
   3.217 +}
   3.218 +
   3.219 +
   3.220 +static int get_page_from_pagenr(unsigned long page_nr, struct domain *d)
   3.221 +{
   3.222 +    struct pfn_info *page = &frame_table[page_nr];
   3.223 +
   3.224 +    if ( unlikely(!pfn_is_ram(page_nr)) )
   3.225 +    {
   3.226 +        MEM_LOG("Pfn %08lx is not RAM", page_nr);
   3.227 +        return 0;
   3.228 +    }
   3.229 +
   3.230 +    if ( unlikely(!get_page(page, d)) )
   3.231 +    {
   3.232 +        MEM_LOG("Could not get page ref for pfn %08lx", page_nr);
   3.233 +        return 0;
   3.234 +    }
   3.235 +
   3.236 +    return 1;
   3.237 +}
   3.238 +
   3.239 +
   3.240 +static int get_page_and_type_from_pagenr(unsigned long page_nr, 
   3.241 +                                         u32 type,
   3.242 +                                         struct domain *d)
   3.243 +{
   3.244 +    struct pfn_info *page = &frame_table[page_nr];
   3.245 +
   3.246 +    if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
   3.247 +        return 0;
   3.248 +
   3.249 +    if ( unlikely(!get_page_type(page, type)) )
   3.250 +    {
   3.251 +        MEM_LOG("Bad page type for pfn %08lx (%08x)", 
   3.252 +                page_nr, page->type_and_flags);
   3.253 +        put_page(page);
   3.254 +        return 0;
   3.255 +    }
   3.256 +
   3.257 +    return 1;
   3.258 +}
   3.259 +
   3.260 +
   3.261 +/*
   3.262 + * We allow an L2 tables to map each other (a.k.a. linear page tables). It
   3.263 + * needs some special care with reference counst and access permissions:
   3.264 + *  1. The mapping entry must be read-only, or the guest may get write access
   3.265 + *     to its own PTEs.
   3.266 + *  2. We must only bump the reference counts for an *already validated*
   3.267 + *     L2 table, or we can end up in a deadlock in get_page_type() by waiting
   3.268 + *     on a validation that is required to complete that validation.
   3.269 + *  3. We only need to increment the reference counts for the mapped page
   3.270 + *     frame if it is mapped by a different L2 table. This is sufficient and
   3.271 + *     also necessary to allow validation of an L2 table mapping itself.
   3.272 + */
   3.273 +static int get_linear_pagetable(l2_pgentry_t l2e, unsigned long pfn)
   3.274 +{
   3.275 +    u32 x, y;
   3.276 +    struct pfn_info *page;
   3.277 +
   3.278 +    if ( (l2_pgentry_val(l2e) & _PAGE_RW) )
   3.279 +    {
   3.280 +        MEM_LOG("Attempt to create linear p.t. with write perms");
   3.281 +        return 0;
   3.282 +    }
   3.283 +
   3.284 +    if ( (l2_pgentry_val(l2e) >> PAGE_SHIFT) != pfn )
   3.285 +    {
   3.286 +        /* Make sure the mapped frame belongs to the correct domain. */
   3.287 +        if ( unlikely(!get_page_from_pagenr(l2_pgentry_to_pagenr(l2e), PTS)) )
   3.288 +            return 0;
   3.289 +
   3.290 +        /*
   3.291 +         * Make sure that the mapped frame is an already-validated L2 table. 
   3.292 +         * If so, atomically increment the count (checking for overflow).
   3.293 +         */
   3.294 +        page = &frame_table[l2_pgentry_to_pagenr(l2e)];
   3.295 +        y = page->type_and_flags;
   3.296 +        do {
   3.297 +            x = y;
   3.298 +            if ( unlikely((x & PGT_count_mask) == PGT_count_mask) ||
   3.299 +                 unlikely((x & (PGT_type_mask|PGT_validated)) != 
   3.300 +                          (PGT_l2_page_table|PGT_validated)) )
   3.301 +            {
   3.302 +                put_page(page);
   3.303 +                return 0;
   3.304 +            }
   3.305 +        }
   3.306 +        while ( (y = cmpxchg(&page->type_and_flags, x, x + 1)) != x );
   3.307 +    }
   3.308 +
   3.309 +    return 1;
   3.310 +}
   3.311 +
   3.312 +
   3.313 +static int get_page_from_l1e(l1_pgentry_t l1e)
   3.314 +{
   3.315 +    unsigned long l1v = l1_pgentry_val(l1e);
   3.316 +    unsigned long pfn = l1_pgentry_to_pagenr(l1e);
   3.317 +    extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn);
   3.318 +
   3.319 +    if ( !(l1v & _PAGE_PRESENT) )
   3.320 +        return 1;
   3.321 +
   3.322 +    if ( unlikely(l1v & (_PAGE_GLOBAL|_PAGE_PAT)) )
   3.323 +    {
   3.324 +        MEM_LOG("Bad L1 type settings %04lx", l1v & (_PAGE_GLOBAL|_PAGE_PAT));
   3.325 +        return 0;
   3.326 +    }
   3.327 +
   3.328 +    if ( unlikely(!pfn_is_ram(pfn)) )
   3.329 +    {
   3.330 +        if ( IS_PRIV(current) )
   3.331 +            return 1;
   3.332 +
   3.333 +        if ( IS_CAPABLE_PHYSDEV(current) )
   3.334 +            return domain_iomem_in_pfn(current, pfn);
   3.335 +
   3.336 +        MEM_LOG("Non-privileged attempt to map I/O space %08lx", pfn);
   3.337 +        return 0;
   3.338 +    }
   3.339 +
   3.340 +    if ( l1v & _PAGE_RW )
   3.341 +    {
   3.342 +        if ( unlikely(!get_page_and_type_from_pagenr(
   3.343 +            pfn, PGT_writeable_page, GPS)) )
   3.344 +            return 0;
   3.345 +        set_bit(_PGC_tlb_flush_on_type_change, 
   3.346 +                &frame_table[pfn].count_and_flags);
   3.347 +        return 1;
   3.348 +    }
   3.349 +
   3.350 +    return get_page_from_pagenr(pfn, GPS);
   3.351 +}
   3.352 +
   3.353 +
   3.354 +/* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
   3.355 +static int get_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
   3.356 +{
   3.357 +    if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) )
   3.358 +        return 1;
   3.359 +
   3.360 +    if ( unlikely((l2_pgentry_val(l2e) & (_PAGE_GLOBAL|_PAGE_PSE))) )
   3.361 +    {
   3.362 +        MEM_LOG("Bad L2 page type settings %04lx",
   3.363 +                l2_pgentry_val(l2e) & (_PAGE_GLOBAL|_PAGE_PSE));
   3.364 +        return 0;
   3.365 +    }
   3.366 +
   3.367 +    if ( unlikely(!get_page_and_type_from_pagenr(
   3.368 +        l2_pgentry_to_pagenr(l2e), PGT_l1_page_table, PTS)) )
   3.369 +        return get_linear_pagetable(l2e, pfn);
   3.370 +
   3.371 +    return 1;
   3.372 +}
   3.373 +
   3.374 +
   3.375 +static void put_page_from_l1e(l1_pgentry_t l1e)
   3.376 +{
   3.377 +    struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)];
   3.378 +    unsigned long    l1v  = l1_pgentry_val(l1e);
   3.379 +
   3.380 +    if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(l1v >> PAGE_SHIFT) )
   3.381 +        return;
   3.382 +
   3.383 +    if ( l1v & _PAGE_RW )
   3.384 +    {
   3.385 +        put_page_and_type(page);
   3.386 +    }
   3.387 +    else
   3.388 +    {
   3.389 +        /* We expect this is rare so we blow the entire shadow LDT. */
   3.390 +        if ( unlikely(((page->type_and_flags & PGT_type_mask) == 
   3.391 +                       PGT_ldt_page)) &&
   3.392 +             unlikely(((page->type_and_flags & PGT_count_mask) != 0)) )
   3.393 +            invalidate_shadow_ldt();
   3.394 +        put_page(page);
   3.395 +    }
   3.396 +}
   3.397 +
   3.398 +
   3.399 +/*
   3.400 + * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
   3.401 + * Note also that this automatically deals correctly with linear p.t.'s.
   3.402 + */
   3.403 +static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
   3.404 +{
   3.405 +    if ( (l2_pgentry_val(l2e) & _PAGE_PRESENT) && 
   3.406 +         ((l2_pgentry_val(l2e) >> PAGE_SHIFT) != pfn) )
   3.407 +        put_page_and_type(&frame_table[l2_pgentry_to_pagenr(l2e)]);
   3.408 +}
   3.409 +
   3.410 +
   3.411 +static int alloc_l2_table(struct pfn_info *page)
   3.412 +{
   3.413 +    unsigned long page_nr = page - frame_table;
   3.414 +    l2_pgentry_t *pl2e;
   3.415 +    int i;
   3.416 +   
   3.417 +    pl2e = map_domain_mem(page_nr << PAGE_SHIFT);
   3.418 +
   3.419 +    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
   3.420 +        if ( unlikely(!get_page_from_l2e(pl2e[i], page_nr)) )
   3.421 +            goto fail;
   3.422 +    
   3.423 +#if defined(__i386__)
   3.424 +    /* Now we add our private high mappings. */
   3.425 +    memcpy(&pl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
   3.426 +           &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
   3.427 +           HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
   3.428 +    pl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
   3.429 +        mk_l2_pgentry((page_nr << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   3.430 +    pl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
   3.431 +        mk_l2_pgentry(__pa(page->u.domain->mm.perdomain_pt) | 
   3.432 +                      __PAGE_HYPERVISOR);
   3.433 +#endif
   3.434 +
   3.435 +    unmap_domain_mem(pl2e);
   3.436 +    return 1;
   3.437 +
   3.438 + fail:
   3.439 +    while ( i-- > 0 )
   3.440 +        put_page_from_l2e(pl2e[i], page_nr);
   3.441 +
   3.442 +    unmap_domain_mem(pl2e);
   3.443 +    return 0;
   3.444 +}
   3.445 +
   3.446 +
   3.447 +static int alloc_l1_table(struct pfn_info *page)
   3.448 +{
   3.449 +    unsigned long page_nr = page - frame_table;
   3.450 +    l1_pgentry_t *pl1e;
   3.451 +    int i;
   3.452 +
   3.453 +    pl1e = map_domain_mem(page_nr << PAGE_SHIFT);
   3.454 +
   3.455 +    for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
   3.456 +        if ( unlikely(!get_page_from_l1e(pl1e[i])) )
   3.457 +            goto fail;
   3.458 +
   3.459 +    unmap_domain_mem(pl1e);
   3.460 +    return 1;
   3.461 +
   3.462 + fail:
   3.463 +    while ( i-- > 0 )
   3.464 +        put_page_from_l1e(pl1e[i]);
   3.465 +
   3.466 +    unmap_domain_mem(pl1e);
   3.467 +    return 0;
   3.468 +}
   3.469 +
   3.470 +
   3.471 +static void free_l2_table(struct pfn_info *page)
   3.472 +{
   3.473 +    unsigned long page_nr = page - frame_table;
   3.474 +    l2_pgentry_t *pl2e;
   3.475 +    int i;
   3.476 +
   3.477 +    pl2e = map_domain_mem(page_nr << PAGE_SHIFT);
   3.478 +
   3.479 +    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
   3.480 +        put_page_from_l2e(pl2e[i], page_nr);
   3.481 +
   3.482 +    unmap_domain_mem(pl2e);
   3.483 +}
   3.484 +
   3.485 +
   3.486 +static void free_l1_table(struct pfn_info *page)
   3.487 +{
   3.488 +    unsigned long page_nr = page - frame_table;
   3.489 +    l1_pgentry_t *pl1e;
   3.490 +    int i;
   3.491 +
   3.492 +    pl1e = map_domain_mem(page_nr << PAGE_SHIFT);
   3.493 +
   3.494 +    for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
   3.495 +        put_page_from_l1e(pl1e[i]);
   3.496 +
   3.497 +    unmap_domain_mem(pl1e);
   3.498 +}
   3.499 +
   3.500 +
   3.501 +static inline int update_l2e(l2_pgentry_t *pl2e, 
   3.502 +                             l2_pgentry_t  ol2e, 
   3.503 +                             l2_pgentry_t  nl2e)
   3.504 +{
   3.505 +    unsigned long o = cmpxchg((unsigned long *)pl2e, 
   3.506 +                              l2_pgentry_val(ol2e), 
   3.507 +                              l2_pgentry_val(nl2e));
   3.508 +    if ( o != l2_pgentry_val(ol2e) )
   3.509 +        MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n",
   3.510 +                l2_pgentry_val(ol2e), l2_pgentry_val(nl2e), o);
   3.511 +    return (o == l2_pgentry_val(ol2e));
   3.512 +}
   3.513 +
   3.514 +
   3.515 +/* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
   3.516 +static int mod_l2_entry(l2_pgentry_t *pl2e, 
   3.517 +                        l2_pgentry_t nl2e, 
   3.518 +                        unsigned long pfn)
   3.519 +{
   3.520 +    l2_pgentry_t ol2e;
   3.521 +    unsigned long _ol2e;
   3.522 +
   3.523 +    if ( unlikely((((unsigned long)pl2e & (PAGE_SIZE-1)) >> 2) >=
   3.524 +                  DOMAIN_ENTRIES_PER_L2_PAGETABLE) )
   3.525 +    {
   3.526 +        MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
   3.527 +        return 0;
   3.528 +    }
   3.529 +
   3.530 +    if ( unlikely(__get_user(_ol2e, (unsigned long *)pl2e) != 0) )
   3.531 +        return 0;
   3.532 +    ol2e = mk_l2_pgentry(_ol2e);
   3.533 +
   3.534 +    if ( l2_pgentry_val(nl2e) & _PAGE_PRESENT )
   3.535 +    {
   3.536 +        /* Differ in mapping (bits 12-31) or presence (bit 0)? */
   3.537 +        if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) == 0 )
   3.538 +            return update_l2e(pl2e, ol2e, nl2e);
   3.539 +
   3.540 +        if ( unlikely(!get_page_from_l2e(nl2e, pfn)) )
   3.541 +            return 0;
   3.542 +        
   3.543 +        if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
   3.544 +        {
   3.545 +            put_page_from_l2e(nl2e, pfn);
   3.546 +            return 0;
   3.547 +        }
   3.548 +        
   3.549 +        put_page_from_l2e(ol2e, pfn);
   3.550 +        return 1;
   3.551 +    }
   3.552 +
   3.553 +    if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
   3.554 +        return 0;
   3.555 +
   3.556 +    put_page_from_l2e(ol2e, pfn);
   3.557 +    return 1;
   3.558 +}
   3.559 +
   3.560 +
   3.561 +static inline int update_l1e(l1_pgentry_t *pl1e, 
   3.562 +                             l1_pgentry_t  ol1e, 
   3.563 +                             l1_pgentry_t  nl1e)
   3.564 +{
   3.565 +    unsigned long o = l1_pgentry_val(ol1e);
   3.566 +    unsigned long n = l1_pgentry_val(nl1e);
   3.567 +
   3.568 +    if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
   3.569 +         unlikely(o != l1_pgentry_val(ol1e)) )
   3.570 +    {
   3.571 +        MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n",
   3.572 +                l1_pgentry_val(ol1e), l1_pgentry_val(nl1e), o);
   3.573 +        return 0;
   3.574 +    }
   3.575 +
   3.576 +    return 1;
   3.577 +}
   3.578 +
   3.579 +
   3.580 +/* Update the L1 entry at pl1e to new value nl1e. */
   3.581 +static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e)
   3.582 +{
   3.583 +    l1_pgentry_t ol1e;
   3.584 +    unsigned long _ol1e;
   3.585 +
   3.586 +    if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
   3.587 +    {
   3.588 +        MEM_LOG("Bad get_user\n");
   3.589 +        return 0;
   3.590 +    }
   3.591 +    
   3.592 +    ol1e = mk_l1_pgentry(_ol1e);
   3.593 +
   3.594 +    if ( l1_pgentry_val(nl1e) & _PAGE_PRESENT )
   3.595 +    {
   3.596 +        /* Differ in mapping (bits 12-31), r/w (bit 1), or presence (bit 0)? */
   3.597 +        if ( ((l1_pgentry_val(ol1e) ^ l1_pgentry_val(nl1e)) & ~0xffc) == 0 )
   3.598 +            return update_l1e(pl1e, ol1e, nl1e);
   3.599 +
   3.600 +        if ( unlikely(!get_page_from_l1e(nl1e)) )
   3.601 +            return 0;
   3.602 +        
   3.603 +        if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   3.604 +        {
   3.605 +            put_page_from_l1e(nl1e);
   3.606 +            return 0;
   3.607 +        }
   3.608 +        
   3.609 +        put_page_from_l1e(ol1e);
   3.610 +        return 1;
   3.611 +    }
   3.612 +
   3.613 +    if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   3.614 +        return 0;
   3.615 +    
   3.616 +    put_page_from_l1e(ol1e);
   3.617 +    return 1;
   3.618 +}
   3.619 +
   3.620 +
   3.621 +int alloc_page_type(struct pfn_info *page, unsigned int type)
   3.622 +{
   3.623 +    if ( unlikely(test_and_clear_bit(_PGC_tlb_flush_on_type_change, 
   3.624 +                                     &page->count_and_flags)) )
   3.625 +    {
   3.626 +        struct domain *p = page->u.domain;
   3.627 +        if ( unlikely(NEED_FLUSH(tlbflush_time[p->processor],
   3.628 +                                 page->tlbflush_timestamp)) )
   3.629 +        {
   3.630 +            perfc_incr(need_flush_tlb_flush);
   3.631 +            flush_tlb_cpu(p->processor);
   3.632 +        }
   3.633 +    }
   3.634 +
   3.635 +    switch ( type )
   3.636 +    {
   3.637 +    case PGT_l1_page_table:
   3.638 +        return alloc_l1_table(page);
   3.639 +    case PGT_l2_page_table:
   3.640 +        return alloc_l2_table(page);
   3.641 +    case PGT_gdt_page:
   3.642 +    case PGT_ldt_page:
   3.643 +        return alloc_segdesc_page(page);
   3.644 +    default:
   3.645 +        BUG();
   3.646 +    }
   3.647 +
   3.648 +    return 0;
   3.649 +}
   3.650 +
   3.651 +
   3.652 +void free_page_type(struct pfn_info *page, unsigned int type)
   3.653 +{
   3.654 +    switch ( type )
   3.655 +    {
   3.656 +    case PGT_l1_page_table:
   3.657 +        free_l1_table(page);
   3.658 +        if ( unlikely(current->mm.shadow_mode) && 
   3.659 +             (get_shadow_status(&current->mm, 
   3.660 +                                page-frame_table) & PSH_shadowed) )
   3.661 +        {
   3.662 +            /*
   3.663 +             * Using 'current->mm' is safe and correct because page-table pages
   3.664 +             * are not shared across domains. Updates to such pages' types are
   3.665 +             * thus only done within the context of the owning domain. The one
   3.666 +             * exception is when destroying a domain; however, this is not a
   3.667 +             * problem as the currently-executing domain will not have this MFN
   3.668 +             * shadowed, and at domain end-of-day we explicitly unshadow
   3.669 +             * everything so that nothing will get left lying around.
   3.670 +             */
   3.671 +            unshadow_table( page-frame_table, type );
   3.672 +            put_shadow_status(&current->mm);
   3.673 +        }
   3.674 +        break;
   3.675 +
   3.676 +    case PGT_l2_page_table:
   3.677 +        free_l2_table(page);
   3.678 +        if ( unlikely(current->mm.shadow_mode) && 
   3.679 +             (get_shadow_status(&current->mm, 
   3.680 +                                page-frame_table) & PSH_shadowed) )
   3.681 +        {
   3.682 +            unshadow_table( page-frame_table, type );
   3.683 +            put_shadow_status(&current->mm);
   3.684 +        }
   3.685 +        break;
   3.686 +
   3.687 +    default:
   3.688 +        BUG();
   3.689 +    }
   3.690 +}
   3.691 +
   3.692 +
   3.693 +static int do_extended_command(unsigned long ptr, unsigned long val)
   3.694 +{
   3.695 +    int okay = 1, cpu = smp_processor_id();
   3.696 +    unsigned int cmd = val & MMUEXT_CMD_MASK;
   3.697 +    unsigned long pfn = ptr >> PAGE_SHIFT;
   3.698 +    unsigned long old_base_pfn;
   3.699 +    struct pfn_info *page = &frame_table[pfn];
   3.700 +    struct domain *d = current, *nd, *e;
   3.701 +    u32 x, y;
   3.702 +    domid_t domid;
   3.703 +
   3.704 +    switch ( cmd )
   3.705 +    {
   3.706 +    case MMUEXT_PIN_L1_TABLE:
   3.707 +    case MMUEXT_PIN_L2_TABLE:
   3.708 +        okay = get_page_and_type_from_pagenr(
   3.709 +            pfn, 
   3.710 +            (cmd==MMUEXT_PIN_L2_TABLE) ? PGT_l2_page_table : PGT_l1_page_table,
   3.711 +            PTS);
   3.712 +        if ( unlikely(!okay) )
   3.713 +        {
   3.714 +            MEM_LOG("Error while pinning pfn %08lx", pfn);
   3.715 +            put_page(page);
   3.716 +            break;
   3.717 +        }
   3.718 +
   3.719 +        if ( unlikely(test_and_set_bit(_PGC_guest_pinned, 
   3.720 +                                       &page->count_and_flags)) )
   3.721 +        {
   3.722 +            MEM_LOG("Pfn %08lx already pinned", pfn);
   3.723 +            put_page_and_type(page);
   3.724 +            okay = 0;
   3.725 +            break;
   3.726 +        }
   3.727 +
   3.728 +        break;
   3.729 +
   3.730 +    case MMUEXT_UNPIN_TABLE:
   3.731 +        if ( unlikely(!(okay = get_page_from_pagenr(pfn, PTS))) )
   3.732 +        {
   3.733 +            MEM_LOG("Page %08lx bad domain (dom=%p)",
   3.734 +                    ptr, page->u.domain);
   3.735 +        }
   3.736 +        else if ( likely(test_and_clear_bit(_PGC_guest_pinned, 
   3.737 +                                            &page->count_and_flags)) )
   3.738 +        {
   3.739 +            put_page_and_type(page);
   3.740 +            put_page(page);
   3.741 +        }
   3.742 +        else
   3.743 +        {
   3.744 +            okay = 0;
   3.745 +            put_page(page);
   3.746 +            MEM_LOG("Pfn %08lx not pinned", pfn);
   3.747 +        }
   3.748 +        break;
   3.749 +
   3.750 +    case MMUEXT_NEW_BASEPTR:
   3.751 +        okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, d);
   3.752 +        if ( likely(okay) )
   3.753 +        {
   3.754 +            invalidate_shadow_ldt();
   3.755 +
   3.756 +            percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
   3.757 +            old_base_pfn = pagetable_val(d->mm.pagetable) >> PAGE_SHIFT;
   3.758 +            d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
   3.759 +
   3.760 +            shadow_mk_pagetable(&d->mm);
   3.761 +
   3.762 +            write_ptbase(&d->mm);
   3.763 +
   3.764 +            put_page_and_type(&frame_table[old_base_pfn]);    
   3.765 +
   3.766 +            /*
   3.767 +             * Note that we tick the clock /after/ dropping the old base's
   3.768 +             * reference count. If the page tables got freed then this will
   3.769 +             * avoid unnecessary TLB flushes when the pages are reused.
   3.770 +             */
   3.771 +            tlb_clocktick();
   3.772 +        }
   3.773 +        else
   3.774 +        {
   3.775 +            MEM_LOG("Error while installing new baseptr %08lx", ptr);
   3.776 +        }
   3.777 +        break;
   3.778 +        
   3.779 +    case MMUEXT_TLB_FLUSH:
   3.780 +        percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
   3.781 +        break;
   3.782 +    
   3.783 +    case MMUEXT_INVLPG:
   3.784 +        __flush_tlb_one(ptr);
   3.785 +        break;
   3.786 +
   3.787 +    case MMUEXT_SET_LDT:
   3.788 +    {
   3.789 +        unsigned long ents = val >> MMUEXT_CMD_SHIFT;
   3.790 +        if ( ((ptr & (PAGE_SIZE-1)) != 0) || 
   3.791 +             (ents > 8192) ||
   3.792 +             ((ptr+ents*LDT_ENTRY_SIZE) < ptr) ||
   3.793 +             ((ptr+ents*LDT_ENTRY_SIZE) > PAGE_OFFSET) )
   3.794 +        {
   3.795 +            okay = 0;
   3.796 +            MEM_LOG("Bad args to SET_LDT: ptr=%08lx, ents=%08lx", ptr, ents);
   3.797 +        }
   3.798 +        else if ( (d->mm.ldt_ents != ents) || 
   3.799 +                  (d->mm.ldt_base != ptr) )
   3.800 +        {
   3.801 +            invalidate_shadow_ldt();
   3.802 +            d->mm.ldt_base = ptr;
   3.803 +            d->mm.ldt_ents = ents;
   3.804 +            load_LDT(d);
   3.805 +            percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT;
   3.806 +            if ( ents != 0 )
   3.807 +                percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT;
   3.808 +        }
   3.809 +        break;
   3.810 +    }
   3.811 +
   3.812 +    case MMUEXT_SET_SUBJECTDOM:
   3.813 +        domid = ((domid_t)((ptr&~0xFFFF)|(val>>16)));
   3.814 +
   3.815 +        if ( !IS_PRIV(d) )
   3.816 +        {
   3.817 +            MEM_LOG("Dom %u has no privilege to set subject domain",
   3.818 +                    d->domain);
   3.819 +            okay = 0;
   3.820 +        }
   3.821 +        else
   3.822 +        {
   3.823 +            if ( percpu_info[cpu].gps != NULL )
   3.824 +                put_domain(percpu_info[cpu].gps);
   3.825 +            percpu_info[cpu].gps = find_domain_by_id(domid);
   3.826 +            percpu_info[cpu].pts = (val & SET_PAGETABLE_SUBJECTDOM) ? 
   3.827 +                percpu_info[cpu].gps : NULL;
   3.828 +            if ( percpu_info[cpu].gps == NULL )
   3.829 +            {
   3.830 +                MEM_LOG("Unknown domain '%u'", domid);
   3.831 +                okay = 0;
   3.832 +            }
   3.833 +        }
   3.834 +        break;
   3.835 +
   3.836 +    case MMUEXT_REASSIGN_PAGE:
   3.837 +        if ( unlikely(!IS_PRIV(d)) )
   3.838 +        {
   3.839 +            MEM_LOG("Dom %u has no reassignment priv", d->domain);
   3.840 +            okay = 0;
   3.841 +            break;
   3.842 +        }
   3.843 +
   3.844 +        if ( unlikely((e = percpu_info[cpu].gps) == NULL) )
   3.845 +        {
   3.846 +            MEM_LOG("No GPS to reassign pfn %08lx to\n", pfn);
   3.847 +            okay = 0;
   3.848 +            break;
   3.849 +        }
   3.850 +
   3.851 +        /*
   3.852 +         * Grab both page_list locks, in order. This prevents the page from
   3.853 +         * disappearing elsewhere while we modify the owner, and we'll need
   3.854 +         * both locks if we're successful so that we can change lists.
   3.855 +         */
   3.856 +        if ( d < e )
   3.857 +        {
   3.858 +            spin_lock(&d->page_alloc_lock);
   3.859 +            spin_lock(&e->page_alloc_lock);
   3.860 +        }
   3.861 +        else
   3.862 +        {
   3.863 +            spin_lock(&e->page_alloc_lock);
   3.864 +            spin_lock(&d->page_alloc_lock);
   3.865 +        }
   3.866 +
   3.867 +        /* A domain shouldn't have PGC_allocated pages when it is dying. */
   3.868 +        if ( unlikely(test_bit(DF_DYING, &e->flags)) ||
   3.869 +             unlikely(IS_XEN_HEAP_FRAME(page)) )
   3.870 +        {
   3.871 +            okay = 0;
   3.872 +            goto reassign_fail;
   3.873 +        }
   3.874 +
   3.875 +        /*
   3.876 +         * The tricky bit: atomically change owner while there is just one
   3.877 +         * benign reference to the page (PGC_allocated). If that reference
   3.878 +         * disappears then the deallocation routine will safely spin.
   3.879 +         */
   3.880 +        nd = page->u.domain;
   3.881 +        y  = page->count_and_flags;
   3.882 +        do {
   3.883 +            x = y;
   3.884 +            if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != 
   3.885 +                          (1|PGC_allocated)) ||
   3.886 +                 unlikely(nd != d) )
   3.887 +            {
   3.888 +                MEM_LOG("Bad page values %08lx: ed=%p(%u), sd=%p,"
   3.889 +                        " caf=%08x, taf=%08x\n", page_to_pfn(page),
   3.890 +                        d, d->domain, nd, x, page->type_and_flags);
   3.891 +                okay = 0;
   3.892 +                goto reassign_fail;
   3.893 +            }
   3.894 +            __asm__ __volatile__(
   3.895 +                LOCK_PREFIX "cmpxchg8b %3"
   3.896 +                : "=a" (nd), "=d" (y), "=b" (e),
   3.897 +                "=m" (*(volatile u64 *)(&page->u.domain))
   3.898 +                : "0" (d), "1" (x), "b" (e), "c" (x) );
   3.899 +        } 
   3.900 +        while ( unlikely(nd != d) || unlikely(y != x) );
   3.901 +        
   3.902 +        /*
   3.903 +         * Unlink from 'd'. We transferred at least one reference to 'e', so
   3.904 +         * noone else is spinning to try to delete this page from 'd'.
   3.905 +         */
   3.906 +        d->tot_pages--;
   3.907 +        list_del(&page->list);
   3.908 +        
   3.909 +        /*
   3.910 +         * Add the page to 'e'. Someone may already have removed the last
   3.911 +         * reference and want to remove the page from 'e'. However, we have
   3.912 +         * the lock so they'll spin waiting for us.
   3.913 +         */
   3.914 +        if ( unlikely(e->tot_pages++ == 0) )
   3.915 +            get_domain(e);
   3.916 +        list_add_tail(&page->list, &e->page_list);
   3.917 +
   3.918 +    reassign_fail:        
   3.919 +        spin_unlock(&d->page_alloc_lock);
   3.920 +        spin_unlock(&e->page_alloc_lock);
   3.921 +        break;
   3.922 +
   3.923 +    case MMUEXT_RESET_SUBJECTDOM:
   3.924 +        if ( percpu_info[cpu].gps != NULL )
   3.925 +            put_domain(percpu_info[cpu].gps);
   3.926 +        percpu_info[cpu].gps = percpu_info[cpu].pts = NULL;
   3.927 +        break;
   3.928 +
   3.929 +    default:
   3.930 +        MEM_LOG("Invalid extended pt command 0x%08lx", val & MMUEXT_CMD_MASK);
   3.931 +        okay = 0;
   3.932 +        break;
   3.933 +    }
   3.934 +
   3.935 +    return okay;
   3.936 +}
   3.937 +
   3.938 +
   3.939 +int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count)
   3.940 +{
   3.941 +    mmu_update_t req;
   3.942 +    unsigned long va = 0, deferred_ops, pfn, prev_pfn = 0;
   3.943 +    struct pfn_info *page;
   3.944 +    int rc = 0, okay = 1, i, cpu = smp_processor_id();
   3.945 +    unsigned int cmd;
   3.946 +    unsigned long prev_spfn = 0;
   3.947 +    l1_pgentry_t *prev_spl1e = 0;
   3.948 +
   3.949 +    perfc_incrc(calls_to_mmu_update); 
   3.950 +    perfc_addc(num_page_updates, count);
   3.951 +
   3.952 +    for ( i = 0; i < count; i++ )
   3.953 +    {
   3.954 +        if ( unlikely(copy_from_user(&req, ureqs, sizeof(req)) != 0) )
   3.955 +        {
   3.956 +            MEM_LOG("Bad copy_from_user");
   3.957 +            rc = -EFAULT;
   3.958 +            break;
   3.959 +        }
   3.960 +
   3.961 +        cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
   3.962 +        pfn = req.ptr >> PAGE_SHIFT;
   3.963 +
   3.964 +        okay = 0;
   3.965 +
   3.966 +        switch ( cmd )
   3.967 +        {
   3.968 +            /*
   3.969 +             * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
   3.970 +             */
   3.971 +        case MMU_NORMAL_PT_UPDATE:
   3.972 +            if ( unlikely(!get_page_from_pagenr(pfn, PTS)) )
   3.973 +            {
   3.974 +                MEM_LOG("Could not get page for normal update");
   3.975 +                break;
   3.976 +            }
   3.977 +
   3.978 +            if ( likely(prev_pfn == pfn) )
   3.979 +            {
   3.980 +                va = (va & PAGE_MASK) | (req.ptr & ~PAGE_MASK);
   3.981 +            }
   3.982 +            else
   3.983 +            {
   3.984 +                if ( prev_pfn != 0 )
   3.985 +                    unmap_domain_mem((void *)va);
   3.986 +                va = (unsigned long)map_domain_mem(req.ptr);
   3.987 +                prev_pfn = pfn;
   3.988 +            }
   3.989 +
   3.990 +            page = &frame_table[pfn];
   3.991 +            switch ( (page->type_and_flags & PGT_type_mask) )
   3.992 +            {
   3.993 +            case PGT_l1_page_table: 
   3.994 +                if ( likely(get_page_type(page, PGT_l1_page_table)) )
   3.995 +                {
   3.996 +                    okay = mod_l1_entry((l1_pgentry_t *)va, 
   3.997 +                                        mk_l1_pgentry(req.val)); 
   3.998 +
   3.999 +                    if ( okay && unlikely(current->mm.shadow_mode) &&
  3.1000 +                         (get_shadow_status(&current->mm, page-frame_table) &
  3.1001 +                          PSH_shadowed) )
  3.1002 +                    {
  3.1003 +                        shadow_l1_normal_pt_update( req.ptr, req.val, 
  3.1004 +                                                    &prev_spfn, &prev_spl1e );
  3.1005 +                        put_shadow_status(&current->mm);
  3.1006 +                    }
  3.1007 +
  3.1008 +                    put_page_type(page);
  3.1009 +                }
  3.1010 +                break;
  3.1011 +            case PGT_l2_page_table:
  3.1012 +                if ( likely(get_page_type(page, PGT_l2_page_table)) )
  3.1013 +                {
  3.1014 +                    okay = mod_l2_entry((l2_pgentry_t *)va, 
  3.1015 +                                        mk_l2_pgentry(req.val),
  3.1016 +                                        pfn); 
  3.1017 +
  3.1018 +                    if ( okay && unlikely(current->mm.shadow_mode) &&
  3.1019 +                         (get_shadow_status(&current->mm, page-frame_table) & 
  3.1020 +                          PSH_shadowed) )
  3.1021 +                    {
  3.1022 +                        shadow_l2_normal_pt_update( req.ptr, req.val );
  3.1023 +                        put_shadow_status(&current->mm);
  3.1024 +                    }
  3.1025 +
  3.1026 +                    put_page_type(page);
  3.1027 +                }
  3.1028 +                break;
  3.1029 +            default:
  3.1030 +                if ( likely(get_page_type(page, PGT_writeable_page)) )
  3.1031 +                {
  3.1032 +                    *(unsigned long *)va = req.val;
  3.1033 +                    okay = 1;
  3.1034 +                    put_page_type(page);
  3.1035 +                }
  3.1036 +                break;
  3.1037 +            }
  3.1038 +
  3.1039 +            put_page(page);
  3.1040 +
  3.1041 +            break;
  3.1042 +
  3.1043 +        case MMU_MACHPHYS_UPDATE:
  3.1044 +            if ( unlikely(!get_page_from_pagenr(pfn, GPS)) )
  3.1045 +            {
  3.1046 +                MEM_LOG("Could not get page for mach->phys update");
  3.1047 +                break;
  3.1048 +            }
  3.1049 +
  3.1050 +            machine_to_phys_mapping[pfn] = req.val;
  3.1051 +            okay = 1;
  3.1052 +
  3.1053 +            /*
  3.1054 +             * If in log-dirty mode, mark the corresponding pseudo-physical
  3.1055 +             * page as dirty.
  3.1056 +             */
  3.1057 +            if( unlikely(current->mm.shadow_mode == SHM_logdirty) )
  3.1058 +                mark_dirty( &current->mm, pfn );
  3.1059 +
  3.1060 +            put_page(&frame_table[pfn]);
  3.1061 +            break;
  3.1062 +
  3.1063 +            /*
  3.1064 +             * MMU_EXTENDED_COMMAND: Extended command is specified
  3.1065 +             * in the least-siginificant bits of the 'value' field.
  3.1066 +             */
  3.1067 +        case MMU_EXTENDED_COMMAND:
  3.1068 +            req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
  3.1069 +            okay = do_extended_command(req.ptr, req.val);
  3.1070 +            break;
  3.1071 +
  3.1072 +        default:
  3.1073 +            MEM_LOG("Invalid page update command %08lx", req.ptr);
  3.1074 +            break;
  3.1075 +        }
  3.1076 +
  3.1077 +        if ( unlikely(!okay) )
  3.1078 +        {
  3.1079 +            rc = -EINVAL;
  3.1080 +            break;
  3.1081 +        }
  3.1082 +
  3.1083 +        ureqs++;
  3.1084 +    }
  3.1085 +
  3.1086 +    if ( prev_pfn != 0 )
  3.1087 +        unmap_domain_mem((void *)va);
  3.1088 +
  3.1089 +    if( prev_spl1e != 0 ) 
  3.1090 +        unmap_domain_mem((void *)prev_spl1e);
  3.1091 +
  3.1092 +    deferred_ops = percpu_info[cpu].deferred_ops;
  3.1093 +    percpu_info[cpu].deferred_ops = 0;
  3.1094 +
  3.1095 +    if ( deferred_ops & DOP_FLUSH_TLB )
  3.1096 +        local_flush_tlb();
  3.1097 +
  3.1098 +    if ( deferred_ops & DOP_RELOAD_LDT )
  3.1099 +        (void)map_ldt_shadow_page(0);
  3.1100 +
  3.1101 +    if ( unlikely(percpu_info[cpu].gps != NULL) )
  3.1102 +    {
  3.1103 +        put_domain(percpu_info[cpu].gps);
  3.1104 +        percpu_info[cpu].gps = percpu_info[cpu].pts = NULL;
  3.1105 +    }
  3.1106 +
  3.1107 +    if ( unlikely(success_count != NULL) )
  3.1108 +        put_user(count, success_count);
  3.1109 +
  3.1110 +    return rc;
  3.1111 +}
  3.1112 +
  3.1113 +
  3.1114 +int do_update_va_mapping(unsigned long page_nr, 
  3.1115 +                         unsigned long val, 
  3.1116 +                         unsigned long flags)
  3.1117 +{
  3.1118 +    struct domain *p = current;
  3.1119 +    int err = 0;
  3.1120 +    unsigned int cpu = p->processor;
  3.1121 +    unsigned long deferred_ops;
  3.1122 +
  3.1123 +    perfc_incrc(calls_to_update_va);
  3.1124 +
  3.1125 +    if ( unlikely(page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT)) )
  3.1126 +        return -EINVAL;
  3.1127 +
  3.1128 +    /*
  3.1129 +     * XXX When we make this support 4MB superpages we should also deal with 
  3.1130 +     * the case of updating L2 entries.
  3.1131 +     */
  3.1132 +
  3.1133 +    if ( unlikely(!mod_l1_entry(&linear_pg_table[page_nr], 
  3.1134 +                                mk_l1_pgentry(val))) )
  3.1135 +        err = -EINVAL;
  3.1136 +
  3.1137 +    if ( unlikely(p->mm.shadow_mode) )
  3.1138 +    {
  3.1139 +        unsigned long sval;
  3.1140 +
  3.1141 +        l1pte_no_fault( &current->mm, &val, &sval );
  3.1142 +
  3.1143 +        if ( unlikely(__put_user(sval, ((unsigned long *)(
  3.1144 +            &shadow_linear_pg_table[page_nr])))) )
  3.1145 +        {
  3.1146 +            /*
  3.1147 +             * Since L2's are guranteed RW, failure indicates the page was not 
  3.1148 +             * shadowed, so ignore.
  3.1149 +             */
  3.1150 +            perfc_incrc(shadow_update_va_fail);
  3.1151 +        }
  3.1152 +
  3.1153 +        /*
  3.1154 +         * If we're in log-dirty mode then we need to note that we've updated
  3.1155 +         * the PTE in the PT-holding page. We need the machine frame number
  3.1156 +         * for this.
  3.1157 +         */
  3.1158 +        if ( p->mm.shadow_mode == SHM_logdirty )
  3.1159 +            mark_dirty( &current->mm, va_to_l1mfn(page_nr<<PAGE_SHIFT) );  
  3.1160 +  
  3.1161 +        check_pagetable( p, p->mm.pagetable, "va" ); /* debug */
  3.1162 +    }
  3.1163 +
  3.1164 +    deferred_ops = percpu_info[cpu].deferred_ops;
  3.1165 +    percpu_info[cpu].deferred_ops = 0;
  3.1166 +
  3.1167 +    if ( unlikely(deferred_ops & DOP_FLUSH_TLB) || 
  3.1168 +         unlikely(flags & UVMF_FLUSH_TLB) )
  3.1169 +        local_flush_tlb();
  3.1170 +    else if ( unlikely(flags & UVMF_INVLPG) )
  3.1171 +        __flush_tlb_one(page_nr << PAGE_SHIFT);
  3.1172 +
  3.1173 +    if ( unlikely(deferred_ops & DOP_RELOAD_LDT) )
  3.1174 +        (void)map_ldt_shadow_page(0);
  3.1175 +    
  3.1176 +    return err;
  3.1177 +}
  3.1178 +
  3.1179 +int do_update_va_mapping_otherdomain(unsigned long page_nr, 
  3.1180 +                                     unsigned long val, 
  3.1181 +                                     unsigned long flags,
  3.1182 +                                     domid_t domid)
  3.1183 +{
  3.1184 +    unsigned int cpu = smp_processor_id();
  3.1185 +    struct domain *d;
  3.1186 +    int rc;
  3.1187 +
  3.1188 +    if ( unlikely(!IS_PRIV(current)) )
  3.1189 +        return -EPERM;
  3.1190 +
  3.1191 +    percpu_info[cpu].gps = d = find_domain_by_id(domid);
  3.1192 +    if ( unlikely(d == NULL) )
  3.1193 +    {
  3.1194 +        MEM_LOG("Unknown domain '%u'", domid);
  3.1195 +        return -ESRCH;
  3.1196 +    }
  3.1197 +
  3.1198 +    rc = do_update_va_mapping(page_nr, val, flags);
  3.1199 +
  3.1200 +    put_domain(d);
  3.1201 +    percpu_info[cpu].gps = NULL;
  3.1202 +
  3.1203 +    return rc;
  3.1204 +}
     4.1 --- a/xen/common/domain.c	Wed Jul 07 18:56:39 2004 +0000
     4.2 +++ b/xen/common/domain.c	Wed Jul 07 18:57:28 2004 +0000
     4.3 @@ -5,48 +5,22 @@
     4.4  #include <xen/errno.h>
     4.5  #include <xen/sched.h>
     4.6  #include <xen/mm.h>
     4.7 -#include <xen/delay.h>
     4.8  #include <xen/event.h>
     4.9  #include <xen/time.h>
    4.10 -#include <xen/shadow.h>
    4.11  #include <xen/console.h>
    4.12  #include <xen/shadow.h>
    4.13 -#include <xen/irq.h>
    4.14 -#include <asm/io.h>
    4.15 -#include <asm/domain_page.h>
    4.16 -#include <asm/flushtlb.h>
    4.17 -#include <asm/i387.h>
    4.18 +#include <xen/elf.h>
    4.19  #include <hypervisor-ifs/dom0_ops.h>
    4.20  
    4.21 -#if defined(__x86_64__)
    4.22 -#define ELFSIZE 64
    4.23 -#else
    4.24 -#define ELFSIZE 32
    4.25 -#endif
    4.26 -#include <xen/elf.h>
    4.27 -
    4.28 -#if !defined(CONFIG_X86_64BITMODE)
    4.29 -/* No ring-3 access in initial page tables. */
    4.30 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
    4.31 -#else
    4.32 -/* Allow ring-3 access in long mode as guest cannot use ring 1. */
    4.33 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    4.34 -#endif
    4.35 -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    4.36 -#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    4.37 -#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    4.38 -
    4.39 -#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
    4.40 -#define round_pgdown(_p)  ((_p)&PAGE_MASK)
    4.41 -
    4.42  /* Both these structures are protected by the tasklist_lock. */
    4.43  rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;
    4.44  struct domain *task_hash[TASK_HASH_SIZE];
    4.45  struct domain *task_list;
    4.46  
    4.47 -void arch_do_createdomain(struct domain *);
    4.48 -void arch_final_setup_guestos(struct domain *, full_execution_context_t *c);
    4.49 -void free_perdomain_pt(struct domain *);
    4.50 +extern void arch_do_createdomain(struct domain *);
    4.51 +extern void arch_final_setup_guestos(struct domain *, full_execution_context_t *c);
    4.52 +extern void free_perdomain_pt(struct domain *);
    4.53 +extern void domain_relinquish_memory(struct domain *d);
    4.54  
    4.55  struct domain *do_createdomain(domid_t dom_id, unsigned int cpu)
    4.56  {
    4.57 @@ -314,68 +288,6 @@ void free_domain_page(struct pfn_info *p
    4.58          put_domain(d);
    4.59  }
    4.60  
    4.61 -
    4.62 -void domain_relinquish_memory(struct domain *d)
    4.63 -{
    4.64 -    struct list_head *ent, *tmp;
    4.65 -    struct pfn_info  *page;
    4.66 -    unsigned long     x, y;
    4.67 -
    4.68 -    /*
    4.69 -     * If we're executing the idle task then we may still be running over the 
    4.70 -     * dead domain's page tables. We'd better fix that before freeing them!
    4.71 -     */
    4.72 -    if ( is_idle_task(current) )
    4.73 -        write_ptbase(&current->mm);
    4.74 -
    4.75 -    /* Exit shadow mode before deconstructing final guest page table. */
    4.76 -    if ( shadow_mode(d) )
    4.77 -        shadow_mode_disable(d);
    4.78 -
    4.79 -    /* Drop the in-use reference to the page-table base. */
    4.80 -    if ( pagetable_val(d->mm.pagetable) != 0 )
    4.81 -        put_page_and_type(&frame_table[pagetable_val(d->mm.pagetable) >>
    4.82 -                                      PAGE_SHIFT]);
    4.83 -
    4.84 -    /* Relinquish Xen-heap pages. Currently this can only be 'shared_info'. */
    4.85 -    page = virt_to_page(d->shared_info);
    4.86 -    if ( test_and_clear_bit(_PGC_allocated, &page->count_and_flags) )
    4.87 -        put_page(page);
    4.88 -
    4.89 -    /* Relinquish all pages on the domain's allocation list. */
    4.90 -    spin_lock_recursive(&d->page_alloc_lock); /* may enter free_domain_page */
    4.91 -    list_for_each_safe ( ent, tmp, &d->page_list )
    4.92 -    {
    4.93 -        page = list_entry(ent, struct pfn_info, list);
    4.94 -
    4.95 -        if ( test_and_clear_bit(_PGC_guest_pinned, &page->count_and_flags) )
    4.96 -            put_page_and_type(page);
    4.97 -
    4.98 -        if ( test_and_clear_bit(_PGC_allocated, &page->count_and_flags) )
    4.99 -            put_page(page);
   4.100 -
   4.101 -        /*
   4.102 -         * Forcibly invalidate base page tables at this point to break circular
   4.103 -         * 'linear page table' references. This is okay because MMU structures
   4.104 -         * are not shared across domains and this domain is now dead. Thus base
   4.105 -         * tables are not in use so a non-zero count means circular reference.
   4.106 -         */
   4.107 -        y = page->type_and_flags;
   4.108 -        do {
   4.109 -            x = y;
   4.110 -            if ( likely((x & (PGT_type_mask|PGT_validated)) != 
   4.111 -                        (PGT_base_page_table|PGT_validated)) )
   4.112 -                break;
   4.113 -            y = cmpxchg(&page->type_and_flags, x, x & ~PGT_validated);
   4.114 -            if ( likely(y == x) )
   4.115 -                free_page_type(page, PGT_base_page_table);
   4.116 -        }
   4.117 -        while ( unlikely(y != x) );
   4.118 -    }
   4.119 -    spin_unlock_recursive(&d->page_alloc_lock);
   4.120 -}
   4.121 -
   4.122 -
   4.123  unsigned int alloc_new_dom_mem(struct domain *d, unsigned int kbytes)
   4.124  {
   4.125      unsigned int alloc_pfns, nr_pages;
   4.126 @@ -492,7 +404,7 @@ static inline int is_loadable_phdr(Elf_P
   4.127              ((phdr->p_flags & (PF_W|PF_X)) != 0));
   4.128  }
   4.129  
   4.130 -static int readelfimage_base_and_size(char *elfbase, 
   4.131 +int readelfimage_base_and_size(char *elfbase, 
   4.132                                        unsigned long elfsize,
   4.133                                        unsigned long *pkernstart,
   4.134                                        unsigned long *pkernend,
   4.135 @@ -581,7 +493,7 @@ static int readelfimage_base_and_size(ch
   4.136      return 0;
   4.137  }
   4.138  
   4.139 -static int loadelfimage(char *elfbase)
   4.140 +int loadelfimage(char *elfbase)
   4.141  {
   4.142      Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
   4.143      Elf_Phdr *phdr;
   4.144 @@ -602,314 +514,3 @@ static int loadelfimage(char *elfbase)
   4.145  
   4.146      return 0;
   4.147  }
   4.148 -
   4.149 -int construct_dom0(struct domain *p, 
   4.150 -                   unsigned long alloc_start,
   4.151 -                   unsigned long alloc_end,
   4.152 -                   char *image_start, unsigned long image_len, 
   4.153 -                   char *initrd_start, unsigned long initrd_len,
   4.154 -                   char *cmdline)
   4.155 -{
   4.156 -    char *dst;
   4.157 -    int i, rc;
   4.158 -    unsigned long pfn, mfn;
   4.159 -    unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT;
   4.160 -    unsigned long nr_pt_pages;
   4.161 -    unsigned long count;
   4.162 -    l2_pgentry_t *l2tab, *l2start;
   4.163 -    l1_pgentry_t *l1tab = NULL, *l1start = NULL;
   4.164 -    struct pfn_info *page = NULL;
   4.165 -    start_info_t *si;
   4.166 -
   4.167 -    /*
   4.168 -     * This fully describes the memory layout of the initial domain. All 
   4.169 -     * *_start address are page-aligned, except v_start (and v_end) which are 
   4.170 -     * superpage-aligned.
   4.171 -     */
   4.172 -    unsigned long v_start;
   4.173 -    unsigned long vkern_start;
   4.174 -    unsigned long vkern_entry;
   4.175 -    unsigned long vkern_end;
   4.176 -    unsigned long vinitrd_start;
   4.177 -    unsigned long vinitrd_end;
   4.178 -    unsigned long vphysmap_start;
   4.179 -    unsigned long vphysmap_end;
   4.180 -    unsigned long vstartinfo_start;
   4.181 -    unsigned long vstartinfo_end;
   4.182 -    unsigned long vstack_start;
   4.183 -    unsigned long vstack_end;
   4.184 -    unsigned long vpt_start;
   4.185 -    unsigned long vpt_end;
   4.186 -    unsigned long v_end;
   4.187 -
   4.188 -    /* Machine address of next candidate page-table page. */
   4.189 -    unsigned long mpt_alloc;
   4.190 -
   4.191 -    extern void physdev_init_dom0(struct domain *);
   4.192 -
   4.193 -    /* Sanity! */
   4.194 -    if ( p->domain != 0 ) 
   4.195 -        BUG();
   4.196 -    if ( test_bit(DF_CONSTRUCTED, &p->flags) ) 
   4.197 -        BUG();
   4.198 -
   4.199 -    printk("*** LOADING DOMAIN 0 ***\n");
   4.200 -
   4.201 -    /*
   4.202 -     * This is all a bit grim. We've moved the modules to the "safe" physical 
   4.203 -     * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this 
   4.204 -     * routine we're going to copy it down into the region that's actually 
   4.205 -     * been allocated to domain 0. This is highly likely to be overlapping, so 
   4.206 -     * we use a forward copy.
   4.207 -     * 
   4.208 -     * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with 
   4.209 -     * 4GB and lots of network/disk cards that allocate loads of buffers. 
   4.210 -     * We'll have to revisit this if we ever support PAE (64GB).
   4.211 -     */
   4.212 -
   4.213 -    rc = readelfimage_base_and_size(image_start, image_len,
   4.214 -                                    &vkern_start, &vkern_end, &vkern_entry);
   4.215 -    if ( rc != 0 )
   4.216 -        return rc;
   4.217 -
   4.218 -    /*
   4.219 -     * Why do we need this? The number of page-table frames depends on the 
   4.220 -     * size of the bootstrap address space. But the size of the address space 
   4.221 -     * depends on the number of page-table frames (since each one is mapped 
   4.222 -     * read-only). We have a pair of simultaneous equations in two unknowns, 
   4.223 -     * which we solve by exhaustive search.
   4.224 -     */
   4.225 -    for ( nr_pt_pages = 2; ; nr_pt_pages++ )
   4.226 -    {
   4.227 -        v_start          = vkern_start & ~((1<<22)-1);
   4.228 -        vinitrd_start    = round_pgup(vkern_end);
   4.229 -        vinitrd_end      = vinitrd_start + initrd_len;
   4.230 -        vphysmap_start   = round_pgup(vinitrd_end);
   4.231 -        vphysmap_end     = vphysmap_start + (nr_pages * sizeof(unsigned long));
   4.232 -        vpt_start        = round_pgup(vphysmap_end);
   4.233 -        vpt_end          = vpt_start + (nr_pt_pages * PAGE_SIZE);
   4.234 -        vstartinfo_start = vpt_end;
   4.235 -        vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
   4.236 -        vstack_start     = vstartinfo_end;
   4.237 -        vstack_end       = vstack_start + PAGE_SIZE;
   4.238 -        v_end            = (vstack_end + (1<<22)-1) & ~((1<<22)-1);
   4.239 -        if ( (v_end - vstack_end) < (512 << 10) )
   4.240 -            v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */
   4.241 -        if ( (((v_end - v_start) >> L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
   4.242 -            break;
   4.243 -    }
   4.244 -
   4.245 -    if ( (v_end - v_start) > (nr_pages * PAGE_SIZE) )
   4.246 -    {
   4.247 -        printk("Initial guest OS requires too much space\n"
   4.248 -               "(%luMB is greater than %luMB limit)\n",
   4.249 -               (v_end-v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
   4.250 -        return -ENOMEM;
   4.251 -    }
   4.252 -
   4.253 -    printk("PHYSICAL MEMORY ARRANGEMENT:\n"
   4.254 -           " Kernel image:  %p->%p\n"
   4.255 -           " Initrd image:  %p->%p\n"
   4.256 -           " Dom0 alloc.:   %08lx->%08lx\n",
   4.257 -           image_start, image_start + image_len,
   4.258 -           initrd_start, initrd_start + initrd_len,
   4.259 -           alloc_start, alloc_end);
   4.260 -    printk("VIRTUAL MEMORY ARRANGEMENT:\n"
   4.261 -           " Loaded kernel: %08lx->%08lx\n"
   4.262 -           " Init. ramdisk: %08lx->%08lx\n"
   4.263 -           " Phys-Mach map: %08lx->%08lx\n"
   4.264 -           " Page tables:   %08lx->%08lx\n"
   4.265 -           " Start info:    %08lx->%08lx\n"
   4.266 -           " Boot stack:    %08lx->%08lx\n"
   4.267 -           " TOTAL:         %08lx->%08lx\n",
   4.268 -           vkern_start, vkern_end, 
   4.269 -           vinitrd_start, vinitrd_end,
   4.270 -           vphysmap_start, vphysmap_end,
   4.271 -           vpt_start, vpt_end,
   4.272 -           vstartinfo_start, vstartinfo_end,
   4.273 -           vstack_start, vstack_end,
   4.274 -           v_start, v_end);
   4.275 -    printk(" ENTRY ADDRESS: %08lx\n", vkern_entry);
   4.276 -
   4.277 -    /*
   4.278 -     * Protect the lowest 1GB of memory. We use a temporary mapping there
   4.279 -     * from which we copy the kernel and ramdisk images.
   4.280 -     */
   4.281 -    if ( v_start < (1<<30) )
   4.282 -    {
   4.283 -        printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
   4.284 -        return -EINVAL;
   4.285 -    }
   4.286 -
   4.287 -    /* Construct a frame-allocation list for the initial domain. */
   4.288 -    for ( mfn = (alloc_start>>PAGE_SHIFT); 
   4.289 -          mfn < (alloc_end>>PAGE_SHIFT); 
   4.290 -          mfn++ )
   4.291 -    {
   4.292 -        page = &frame_table[mfn];
   4.293 -        page->u.domain        = p;
   4.294 -        page->type_and_flags  = 0;
   4.295 -        page->count_and_flags = PGC_allocated | 1;
   4.296 -        list_add_tail(&page->list, &p->page_list);
   4.297 -        p->tot_pages++; p->max_pages++;
   4.298 -    }
   4.299 -
   4.300 -    mpt_alloc = (vpt_start - v_start) + alloc_start;
   4.301 -
   4.302 -    SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
   4.303 -    SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS);
   4.304 -
   4.305 -    /*
   4.306 -     * We're basically forcing default RPLs to 1, so that our "what privilege
   4.307 -     * level are we returning to?" logic works.
   4.308 -     */
   4.309 -    p->failsafe_selector = FLAT_GUESTOS_CS;
   4.310 -    p->event_selector    = FLAT_GUESTOS_CS;
   4.311 -    p->thread.guestos_ss = FLAT_GUESTOS_DS;
   4.312 -    for ( i = 0; i < 256; i++ ) 
   4.313 -        p->thread.traps[i].cs = FLAT_GUESTOS_CS;
   4.314 -
   4.315 -    /* WARNING: The new domain must have its 'processor' field filled in! */
   4.316 -    l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
   4.317 -    memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
   4.318 -    l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
   4.319 -        mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
   4.320 -    l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
   4.321 -        mk_l2_pgentry(__pa(p->mm.perdomain_pt) | __PAGE_HYPERVISOR);
   4.322 -    p->mm.pagetable = mk_pagetable((unsigned long)l2start);
   4.323 -
   4.324 -    l2tab += l2_table_offset(v_start);
   4.325 -    mfn = alloc_start >> PAGE_SHIFT;
   4.326 -    for ( count = 0; count < ((v_end-v_start)>>PAGE_SHIFT); count++ )
   4.327 -    {
   4.328 -        if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
   4.329 -        {
   4.330 -            l1start = l1tab = (l1_pgentry_t *)mpt_alloc; 
   4.331 -            mpt_alloc += PAGE_SIZE;
   4.332 -            *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
   4.333 -            clear_page(l1tab);
   4.334 -        }
   4.335 -        *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
   4.336 -        
   4.337 -        page = &frame_table[mfn];
   4.338 -        set_bit(_PGC_tlb_flush_on_type_change, &page->count_and_flags);
   4.339 -        if ( !get_page_and_type(page, p, PGT_writeable_page) )
   4.340 -            BUG();
   4.341 -
   4.342 -        mfn++;
   4.343 -    }
   4.344 -
   4.345 -    /* Pages that are part of page tables must be read only. */
   4.346 -    l2tab = l2start + l2_table_offset(vpt_start);
   4.347 -    l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
   4.348 -    l1tab += l1_table_offset(vpt_start);
   4.349 -    l2tab++;
   4.350 -    for ( count = 0; count < nr_pt_pages; count++ ) 
   4.351 -    {
   4.352 -        *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
   4.353 -        page = &frame_table[l1_pgentry_to_pagenr(*l1tab)];
   4.354 -        if ( count == 0 )
   4.355 -        {
   4.356 -            page->type_and_flags &= ~PGT_type_mask;
   4.357 -            page->type_and_flags |= PGT_l2_page_table;
   4.358 -            get_page(page, p); /* an extra ref because of readable mapping */
   4.359 -            /* Get another ref to L2 page so that it can be pinned. */
   4.360 -            if ( !get_page_and_type(page, p, PGT_l2_page_table) )
   4.361 -                BUG();
   4.362 -            set_bit(_PGC_guest_pinned, &page->count_and_flags);
   4.363 -        }
   4.364 -        else
   4.365 -        {
   4.366 -            page->type_and_flags &= ~PGT_type_mask;
   4.367 -            page->type_and_flags |= PGT_l1_page_table;
   4.368 -            get_page(page, p); /* an extra ref because of readable mapping */
   4.369 -        }
   4.370 -        l1tab++;
   4.371 -        if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) )
   4.372 -            l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
   4.373 -    }
   4.374 -
   4.375 -    /* Set up shared-info area. */
   4.376 -    update_dom_time(p->shared_info);
   4.377 -    p->shared_info->domain_time = 0;
   4.378 -    /* Mask all upcalls... */
   4.379 -    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
   4.380 -        p->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
   4.381 -
   4.382 -    /* Install the new page tables. */
   4.383 -    __cli();
   4.384 -    write_ptbase(&p->mm);
   4.385 -
   4.386 -    /* Copy the OS image. */
   4.387 -    (void)loadelfimage(image_start);
   4.388 -
   4.389 -    /* Copy the initial ramdisk. */
   4.390 -    if ( initrd_len != 0 )
   4.391 -        memcpy((void *)vinitrd_start, initrd_start, initrd_len);
   4.392 -    
   4.393 -    /* Set up start info area. */
   4.394 -    si = (start_info_t *)vstartinfo_start;
   4.395 -    memset(si, 0, PAGE_SIZE);
   4.396 -    si->nr_pages     = p->tot_pages;
   4.397 -    si->shared_info  = virt_to_phys(p->shared_info);
   4.398 -    si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
   4.399 -    si->pt_base      = vpt_start;
   4.400 -    si->nr_pt_frames = nr_pt_pages;
   4.401 -    si->mfn_list     = vphysmap_start;
   4.402 -
   4.403 -    /* Write the phys->machine and machine->phys table entries. */
   4.404 -    for ( mfn = (alloc_start>>PAGE_SHIFT); 
   4.405 -          mfn < (alloc_end>>PAGE_SHIFT); 
   4.406 -          mfn++ )
   4.407 -    {
   4.408 -        pfn = mfn - (alloc_start>>PAGE_SHIFT);
   4.409 -        ((unsigned long *)vphysmap_start)[pfn] = mfn;
   4.410 -        machine_to_phys_mapping[mfn] = pfn;
   4.411 -    }
   4.412 -
   4.413 -    if ( initrd_len != 0 )
   4.414 -    {
   4.415 -        si->mod_start = vinitrd_start;
   4.416 -        si->mod_len   = initrd_len;
   4.417 -        printk("Initrd len 0x%lx, start at 0x%08lx\n",
   4.418 -               si->mod_len, si->mod_start);
   4.419 -    }
   4.420 -
   4.421 -    dst = si->cmd_line;
   4.422 -    if ( cmdline != NULL )
   4.423 -    {
   4.424 -        for ( i = 0; i < 255; i++ )
   4.425 -        {
   4.426 -            if ( cmdline[i] == '\0' )
   4.427 -                break;
   4.428 -            *dst++ = cmdline[i];
   4.429 -        }
   4.430 -    }
   4.431 -    *dst = '\0';
   4.432 -
   4.433 -    /* Reinstate the caller's page tables. */
   4.434 -    write_ptbase(&current->mm);
   4.435 -    __sti();
   4.436 -
   4.437 -    /* Destroy low mappings - they were only for our convenience. */
   4.438 -    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
   4.439 -        if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
   4.440 -            l2start[i] = mk_l2_pgentry(0);
   4.441 -    zap_low_mappings(); /* Do the same for the idle page tables. */
   4.442 -    
   4.443 -    /* Give up the VGA console if DOM0 is configured to grab it. */
   4.444 -    console_endboot(strstr(cmdline, "tty0") != NULL);
   4.445 -
   4.446 -    /* DOM0 gets access to everything. */
   4.447 -    physdev_init_dom0(p);
   4.448 -
   4.449 -    set_bit(DF_CONSTRUCTED, &p->flags);
   4.450 -
   4.451 -#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */
   4.452 -    shadow_mode_enable(&p->mm, SHM_test); 
   4.453 -#endif
   4.454 -
   4.455 -    new_thread(p, vkern_entry, vstack_end, vstartinfo_start);
   4.456 -
   4.457 -    return 0;
   4.458 -}
     5.1 --- a/xen/common/memory.c	Wed Jul 07 18:56:39 2004 +0000
     5.2 +++ b/xen/common/memory.c	Wed Jul 07 18:57:28 2004 +0000
     5.3 @@ -18,71 +18,6 @@
     5.4   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     5.5   */
     5.6  
     5.7 -/*
     5.8 - * A description of the page table API:
     5.9 - * 
    5.10 - * Domains trap to do_mmu_update with a list of update requests.
    5.11 - * This is a list of (ptr, val) pairs, where the requested operation
    5.12 - * is *ptr = val.
    5.13 - * 
    5.14 - * Reference counting of pages:
    5.15 - * ----------------------------
    5.16 - * Each page has two refcounts: tot_count and type_count.
    5.17 - * 
    5.18 - * TOT_COUNT is the obvious reference count. It counts all uses of a
    5.19 - * physical page frame by a domain, including uses as a page directory,
    5.20 - * a page table, or simple mappings via a PTE. This count prevents a
    5.21 - * domain from releasing a frame back to the free pool when it still holds
    5.22 - * a reference to it.
    5.23 - * 
    5.24 - * TYPE_COUNT is more subtle. A frame can be put to one of three
    5.25 - * mutually-exclusive uses: it might be used as a page directory, or a
    5.26 - * page table, or it may be mapped writeable by the domain [of course, a
    5.27 - * frame may not be used in any of these three ways!].
    5.28 - * So, type_count is a count of the number of times a frame is being 
    5.29 - * referred to in its current incarnation. Therefore, a page can only
    5.30 - * change its type when its type count is zero.
    5.31 - * 
    5.32 - * Pinning the page type:
    5.33 - * ----------------------
    5.34 - * The type of a page can be pinned/unpinned with the commands
    5.35 - * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
    5.36 - * pinning is not reference counted, so it can't be nested).
    5.37 - * This is useful to prevent a page's type count falling to zero, at which
    5.38 - * point safety checks would need to be carried out next time the count
    5.39 - * is increased again.
    5.40 - * 
    5.41 - * A further note on writeable page mappings:
    5.42 - * ------------------------------------------
    5.43 - * For simplicity, the count of writeable mappings for a page may not
    5.44 - * correspond to reality. The 'writeable count' is incremented for every
    5.45 - * PTE which maps the page with the _PAGE_RW flag set. However, for
    5.46 - * write access to be possible the page directory entry must also have
    5.47 - * its _PAGE_RW bit set. We do not check this as it complicates the 
    5.48 - * reference counting considerably [consider the case of multiple
    5.49 - * directory entries referencing a single page table, some with the RW
    5.50 - * bit set, others not -- it starts getting a bit messy].
    5.51 - * In normal use, this simplification shouldn't be a problem.
    5.52 - * However, the logic can be added if required.
    5.53 - * 
    5.54 - * One more note on read-only page mappings:
    5.55 - * -----------------------------------------
    5.56 - * We want domains to be able to map pages for read-only access. The
    5.57 - * main reason is that page tables and directories should be readable
    5.58 - * by a domain, but it would not be safe for them to be writeable.
    5.59 - * However, domains have free access to rings 1 & 2 of the Intel
    5.60 - * privilege model. In terms of page protection, these are considered
    5.61 - * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
    5.62 - * read-only restrictions are respected in supervisor mode -- if the 
    5.63 - * bit is clear then any mapped page is writeable.
    5.64 - * 
    5.65 - * We get round this by always setting the WP bit and disallowing 
    5.66 - * updates to it. This is very unlikely to cause a problem for guest
    5.67 - * OS's, which will generally use the WP bit to simplify copy-on-write
    5.68 - * implementation (in that case, OS wants a fault when it writes to
    5.69 - * an application-supplied buffer).
    5.70 - */
    5.71 -
    5.72  #include <xen/config.h>
    5.73  #include <xen/init.h>
    5.74  #include <xen/lib.h>
    5.75 @@ -91,34 +26,11 @@
    5.76  #include <xen/errno.h>
    5.77  #include <xen/perfc.h>
    5.78  #include <xen/irq.h>
    5.79 -#include <xen/shadow.h>
    5.80  #include <asm/page.h>
    5.81  #include <asm/flushtlb.h>
    5.82  #include <asm/io.h>
    5.83  #include <asm/uaccess.h>
    5.84  #include <asm/domain_page.h>
    5.85 -#include <asm/ldt.h>
    5.86 -
    5.87 -#ifndef NDEBUG
    5.88 -#define MEM_LOG(_f, _a...)                           \
    5.89 -  printk("DOM%u: (file=memory.c, line=%d) " _f "\n", \
    5.90 -         current->domain , __LINE__ , ## _a )
    5.91 -#else
    5.92 -#define MEM_LOG(_f, _a...) ((void)0)
    5.93 -#endif
    5.94 -
    5.95 -static int alloc_l2_table(struct pfn_info *page);
    5.96 -static int alloc_l1_table(struct pfn_info *page);
    5.97 -static int get_page_from_pagenr(unsigned long page_nr, struct domain *d);
    5.98 -static int get_page_and_type_from_pagenr(unsigned long page_nr, 
    5.99 -                                         u32 type,
   5.100 -                                         struct domain *d);
   5.101 -
   5.102 -static void free_l2_table(struct pfn_info *page);
   5.103 -static void free_l1_table(struct pfn_info *page);
   5.104 -
   5.105 -static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long);
   5.106 -static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t);
   5.107  
   5.108  /* Frame table and its size in pages. */
   5.109  struct pfn_info *frame_table;
   5.110 @@ -129,26 +41,13 @@ struct list_head free_list;
   5.111  spinlock_t free_list_lock;
   5.112  unsigned int free_pfns;
   5.113  
   5.114 -/* Used to defer flushing of memory structures. */
   5.115 -static struct {
   5.116 -#define DOP_FLUSH_TLB   (1<<0) /* Flush the TLB.                 */
   5.117 -#define DOP_RELOAD_LDT  (1<<1) /* Reload the LDT shadow mapping. */
   5.118 -    unsigned long       deferred_ops;
   5.119 -    unsigned long       cr0;
   5.120 -    /* General-Purpose Subject, Page-Table Subject */
   5.121 -    struct domain *gps, *pts;
   5.122 -} percpu_info[NR_CPUS] __cacheline_aligned;
   5.123 -
   5.124 -/* Determine the current General-Purpose Subject or Page-Table Subject. */
   5.125 -#define PTS (percpu_info[smp_processor_id()].pts ? : current)
   5.126 -#define GPS (percpu_info[smp_processor_id()].gps ? : current)
   5.127 -
   5.128 +extern void init_percpu_info(void);
   5.129  
   5.130  void __init init_frametable(void *frametable_vstart, unsigned long nr_pages)
   5.131  {
   5.132      unsigned long mfn;
   5.133  
   5.134 -    memset(percpu_info, 0, sizeof(percpu_info));
   5.135 +    init_percpu_info();
   5.136  
   5.137      max_page = nr_pages;
   5.138      frame_table_size = nr_pages * sizeof(struct pfn_info);
   5.139 @@ -194,1063 +93,3 @@ void add_to_domain_alloc_list(unsigned l
   5.140      }
   5.141      spin_unlock_irqrestore(&free_list_lock, flags);
   5.142  }
   5.143 -
   5.144 -static void __invalidate_shadow_ldt(struct domain *d)
   5.145 -{
   5.146 -    int i;
   5.147 -    unsigned long pfn;
   5.148 -    struct pfn_info *page;
   5.149 -    
   5.150 -    d->mm.shadow_ldt_mapcnt = 0;
   5.151 -
   5.152 -    for ( i = 16; i < 32; i++ )
   5.153 -    {
   5.154 -        pfn = l1_pgentry_to_pagenr(d->mm.perdomain_pt[i]);
   5.155 -        if ( pfn == 0 ) continue;
   5.156 -        d->mm.perdomain_pt[i] = mk_l1_pgentry(0);
   5.157 -        page = &frame_table[pfn];
   5.158 -        ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
   5.159 -        ASSERT_PAGE_IS_DOMAIN(page, d);
   5.160 -        put_page_and_type(page);
   5.161 -    }
   5.162 -
   5.163 -    /* Dispose of the (now possibly invalid) mappings from the TLB.  */
   5.164 -    percpu_info[d->processor].deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT;
   5.165 -}
   5.166 -
   5.167 -
   5.168 -static inline void invalidate_shadow_ldt(void)
   5.169 -{
   5.170 -    struct domain *d = current;
   5.171 -    if ( d->mm.shadow_ldt_mapcnt != 0 )
   5.172 -        __invalidate_shadow_ldt(d);
   5.173 -}
   5.174 -
   5.175 -
   5.176 -int alloc_segdesc_page(struct pfn_info *page)
   5.177 -{
   5.178 -    unsigned long *descs = map_domain_mem((page-frame_table) << PAGE_SHIFT);
   5.179 -    int i;
   5.180 -
   5.181 -    for ( i = 0; i < 512; i++ )
   5.182 -        if ( unlikely(!check_descriptor(descs[i*2], descs[i*2+1])) )
   5.183 -            goto fail;
   5.184 -
   5.185 -    unmap_domain_mem(descs);
   5.186 -    return 1;
   5.187 -
   5.188 - fail:
   5.189 -    unmap_domain_mem(descs);
   5.190 -    return 0;
   5.191 -}
   5.192 -
   5.193 -
   5.194 -/* Map shadow page at offset @off. */
   5.195 -int map_ldt_shadow_page(unsigned int off)
   5.196 -{
   5.197 -    struct domain *d = current;
   5.198 -    unsigned long l1e;
   5.199 -
   5.200 -    if ( unlikely(in_irq()) )
   5.201 -        BUG();
   5.202 -
   5.203 -    __get_user(l1e, (unsigned long *)&linear_pg_table[(d->mm.ldt_base >> 
   5.204 -                                                       PAGE_SHIFT) + off]);
   5.205 -
   5.206 -    if ( unlikely(!(l1e & _PAGE_PRESENT)) ||
   5.207 -         unlikely(!get_page_and_type(&frame_table[l1e >> PAGE_SHIFT], 
   5.208 -                                     d, PGT_ldt_page)) )
   5.209 -        return 0;
   5.210 -
   5.211 -    d->mm.perdomain_pt[off + 16] = mk_l1_pgentry(l1e | _PAGE_RW);
   5.212 -    d->mm.shadow_ldt_mapcnt++;
   5.213 -
   5.214 -    return 1;
   5.215 -}
   5.216 -
   5.217 -
   5.218 -static int get_page_from_pagenr(unsigned long page_nr, struct domain *d)
   5.219 -{
   5.220 -    struct pfn_info *page = &frame_table[page_nr];
   5.221 -
   5.222 -    if ( unlikely(!pfn_is_ram(page_nr)) )
   5.223 -    {
   5.224 -        MEM_LOG("Pfn %08lx is not RAM", page_nr);
   5.225 -        return 0;
   5.226 -    }
   5.227 -
   5.228 -    if ( unlikely(!get_page(page, d)) )
   5.229 -    {
   5.230 -        MEM_LOG("Could not get page ref for pfn %08lx", page_nr);
   5.231 -        return 0;
   5.232 -    }
   5.233 -
   5.234 -    return 1;
   5.235 -}
   5.236 -
   5.237 -
   5.238 -static int get_page_and_type_from_pagenr(unsigned long page_nr, 
   5.239 -                                         u32 type,
   5.240 -                                         struct domain *d)
   5.241 -{
   5.242 -    struct pfn_info *page = &frame_table[page_nr];
   5.243 -
   5.244 -    if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
   5.245 -        return 0;
   5.246 -
   5.247 -    if ( unlikely(!get_page_type(page, type)) )
   5.248 -    {
   5.249 -        MEM_LOG("Bad page type for pfn %08lx (%08x)", 
   5.250 -                page_nr, page->type_and_flags);
   5.251 -        put_page(page);
   5.252 -        return 0;
   5.253 -    }
   5.254 -
   5.255 -    return 1;
   5.256 -}
   5.257 -
   5.258 -
   5.259 -/*
   5.260 - * We allow an L2 tables to map each other (a.k.a. linear page tables). It
   5.261 - * needs some special care with reference counst and access permissions:
   5.262 - *  1. The mapping entry must be read-only, or the guest may get write access
   5.263 - *     to its own PTEs.
   5.264 - *  2. We must only bump the reference counts for an *already validated*
   5.265 - *     L2 table, or we can end up in a deadlock in get_page_type() by waiting
   5.266 - *     on a validation that is required to complete that validation.
   5.267 - *  3. We only need to increment the reference counts for the mapped page
   5.268 - *     frame if it is mapped by a different L2 table. This is sufficient and
   5.269 - *     also necessary to allow validation of an L2 table mapping itself.
   5.270 - */
   5.271 -static int get_linear_pagetable(l2_pgentry_t l2e, unsigned long pfn)
   5.272 -{
   5.273 -    u32 x, y;
   5.274 -    struct pfn_info *page;
   5.275 -
   5.276 -    if ( (l2_pgentry_val(l2e) & _PAGE_RW) )
   5.277 -    {
   5.278 -        MEM_LOG("Attempt to create linear p.t. with write perms");
   5.279 -        return 0;
   5.280 -    }
   5.281 -
   5.282 -    if ( (l2_pgentry_val(l2e) >> PAGE_SHIFT) != pfn )
   5.283 -    {
   5.284 -        /* Make sure the mapped frame belongs to the correct domain. */
   5.285 -        if ( unlikely(!get_page_from_pagenr(l2_pgentry_to_pagenr(l2e), PTS)) )
   5.286 -            return 0;
   5.287 -
   5.288 -        /*
   5.289 -         * Make sure that the mapped frame is an already-validated L2 table. 
   5.290 -         * If so, atomically increment the count (checking for overflow).
   5.291 -         */
   5.292 -        page = &frame_table[l2_pgentry_to_pagenr(l2e)];
   5.293 -        y = page->type_and_flags;
   5.294 -        do {
   5.295 -            x = y;
   5.296 -            if ( unlikely((x & PGT_count_mask) == PGT_count_mask) ||
   5.297 -                 unlikely((x & (PGT_type_mask|PGT_validated)) != 
   5.298 -                          (PGT_l2_page_table|PGT_validated)) )
   5.299 -            {
   5.300 -                put_page(page);
   5.301 -                return 0;
   5.302 -            }
   5.303 -        }
   5.304 -        while ( (y = cmpxchg(&page->type_and_flags, x, x + 1)) != x );
   5.305 -    }
   5.306 -
   5.307 -    return 1;
   5.308 -}
   5.309 -
   5.310 -
   5.311 -static int get_page_from_l1e(l1_pgentry_t l1e)
   5.312 -{
   5.313 -    unsigned long l1v = l1_pgentry_val(l1e);
   5.314 -    unsigned long pfn = l1_pgentry_to_pagenr(l1e);
   5.315 -    extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn);
   5.316 -
   5.317 -    if ( !(l1v & _PAGE_PRESENT) )
   5.318 -        return 1;
   5.319 -
   5.320 -    if ( unlikely(l1v & (_PAGE_GLOBAL|_PAGE_PAT)) )
   5.321 -    {
   5.322 -        MEM_LOG("Bad L1 type settings %04lx", l1v & (_PAGE_GLOBAL|_PAGE_PAT));
   5.323 -        return 0;
   5.324 -    }
   5.325 -
   5.326 -    if ( unlikely(!pfn_is_ram(pfn)) )
   5.327 -    {
   5.328 -        if ( IS_PRIV(current) )
   5.329 -            return 1;
   5.330 -
   5.331 -        if ( IS_CAPABLE_PHYSDEV(current) )
   5.332 -            return domain_iomem_in_pfn(current, pfn);
   5.333 -
   5.334 -        MEM_LOG("Non-privileged attempt to map I/O space %08lx", pfn);
   5.335 -        return 0;
   5.336 -    }
   5.337 -
   5.338 -    if ( l1v & _PAGE_RW )
   5.339 -    {
   5.340 -        if ( unlikely(!get_page_and_type_from_pagenr(
   5.341 -            pfn, PGT_writeable_page, GPS)) )
   5.342 -            return 0;
   5.343 -        set_bit(_PGC_tlb_flush_on_type_change, 
   5.344 -                &frame_table[pfn].count_and_flags);
   5.345 -        return 1;
   5.346 -    }
   5.347 -
   5.348 -    return get_page_from_pagenr(pfn, GPS);
   5.349 -}
   5.350 -
   5.351 -
   5.352 -/* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
   5.353 -static int get_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
   5.354 -{
   5.355 -    if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) )
   5.356 -        return 1;
   5.357 -
   5.358 -    if ( unlikely((l2_pgentry_val(l2e) & (_PAGE_GLOBAL|_PAGE_PSE))) )
   5.359 -    {
   5.360 -        MEM_LOG("Bad L2 page type settings %04lx",
   5.361 -                l2_pgentry_val(l2e) & (_PAGE_GLOBAL|_PAGE_PSE));
   5.362 -        return 0;
   5.363 -    }
   5.364 -
   5.365 -    if ( unlikely(!get_page_and_type_from_pagenr(
   5.366 -        l2_pgentry_to_pagenr(l2e), PGT_l1_page_table, PTS)) )
   5.367 -        return get_linear_pagetable(l2e, pfn);
   5.368 -
   5.369 -    return 1;
   5.370 -}
   5.371 -
   5.372 -
   5.373 -static void put_page_from_l1e(l1_pgentry_t l1e)
   5.374 -{
   5.375 -    struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)];
   5.376 -    unsigned long    l1v  = l1_pgentry_val(l1e);
   5.377 -
   5.378 -    if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(l1v >> PAGE_SHIFT) )
   5.379 -        return;
   5.380 -
   5.381 -    if ( l1v & _PAGE_RW )
   5.382 -    {
   5.383 -        put_page_and_type(page);
   5.384 -    }
   5.385 -    else
   5.386 -    {
   5.387 -        /* We expect this is rare so we blow the entire shadow LDT. */
   5.388 -        if ( unlikely(((page->type_and_flags & PGT_type_mask) == 
   5.389 -                       PGT_ldt_page)) &&
   5.390 -             unlikely(((page->type_and_flags & PGT_count_mask) != 0)) )
   5.391 -            invalidate_shadow_ldt();
   5.392 -        put_page(page);
   5.393 -    }
   5.394 -}
   5.395 -
   5.396 -
   5.397 -/*
   5.398 - * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
   5.399 - * Note also that this automatically deals correctly with linear p.t.'s.
   5.400 - */
   5.401 -static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
   5.402 -{
   5.403 -    if ( (l2_pgentry_val(l2e) & _PAGE_PRESENT) && 
   5.404 -         ((l2_pgentry_val(l2e) >> PAGE_SHIFT) != pfn) )
   5.405 -        put_page_and_type(&frame_table[l2_pgentry_to_pagenr(l2e)]);
   5.406 -}
   5.407 -
   5.408 -
   5.409 -static int alloc_l2_table(struct pfn_info *page)
   5.410 -{
   5.411 -    unsigned long page_nr = page - frame_table;
   5.412 -    l2_pgentry_t *pl2e;
   5.413 -    int i;
   5.414 -   
   5.415 -    pl2e = map_domain_mem(page_nr << PAGE_SHIFT);
   5.416 -
   5.417 -    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
   5.418 -        if ( unlikely(!get_page_from_l2e(pl2e[i], page_nr)) )
   5.419 -            goto fail;
   5.420 -    
   5.421 -#if defined(__i386__)
   5.422 -    /* Now we add our private high mappings. */
   5.423 -    memcpy(&pl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
   5.424 -           &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
   5.425 -           HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
   5.426 -    pl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
   5.427 -        mk_l2_pgentry((page_nr << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   5.428 -    pl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
   5.429 -        mk_l2_pgentry(__pa(page->u.domain->mm.perdomain_pt) | 
   5.430 -                      __PAGE_HYPERVISOR);
   5.431 -#endif
   5.432 -
   5.433 -    unmap_domain_mem(pl2e);
   5.434 -    return 1;
   5.435 -
   5.436 - fail:
   5.437 -    while ( i-- > 0 )
   5.438 -        put_page_from_l2e(pl2e[i], page_nr);
   5.439 -
   5.440 -    unmap_domain_mem(pl2e);
   5.441 -    return 0;
   5.442 -}
   5.443 -
   5.444 -
   5.445 -static int alloc_l1_table(struct pfn_info *page)
   5.446 -{
   5.447 -    unsigned long page_nr = page - frame_table;
   5.448 -    l1_pgentry_t *pl1e;
   5.449 -    int i;
   5.450 -
   5.451 -    pl1e = map_domain_mem(page_nr << PAGE_SHIFT);
   5.452 -
   5.453 -    for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
   5.454 -        if ( unlikely(!get_page_from_l1e(pl1e[i])) )
   5.455 -            goto fail;
   5.456 -
   5.457 -    unmap_domain_mem(pl1e);
   5.458 -    return 1;
   5.459 -
   5.460 - fail:
   5.461 -    while ( i-- > 0 )
   5.462 -        put_page_from_l1e(pl1e[i]);
   5.463 -
   5.464 -    unmap_domain_mem(pl1e);
   5.465 -    return 0;
   5.466 -}
   5.467 -
   5.468 -
   5.469 -static void free_l2_table(struct pfn_info *page)
   5.470 -{
   5.471 -    unsigned long page_nr = page - frame_table;
   5.472 -    l2_pgentry_t *pl2e;
   5.473 -    int i;
   5.474 -
   5.475 -    pl2e = map_domain_mem(page_nr << PAGE_SHIFT);
   5.476 -
   5.477 -    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
   5.478 -        put_page_from_l2e(pl2e[i], page_nr);
   5.479 -
   5.480 -    unmap_domain_mem(pl2e);
   5.481 -}
   5.482 -
   5.483 -
   5.484 -static void free_l1_table(struct pfn_info *page)
   5.485 -{
   5.486 -    unsigned long page_nr = page - frame_table;
   5.487 -    l1_pgentry_t *pl1e;
   5.488 -    int i;
   5.489 -
   5.490 -    pl1e = map_domain_mem(page_nr << PAGE_SHIFT);
   5.491 -
   5.492 -    for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
   5.493 -        put_page_from_l1e(pl1e[i]);
   5.494 -
   5.495 -    unmap_domain_mem(pl1e);
   5.496 -}
   5.497 -
   5.498 -
   5.499 -static inline int update_l2e(l2_pgentry_t *pl2e, 
   5.500 -                             l2_pgentry_t  ol2e, 
   5.501 -                             l2_pgentry_t  nl2e)
   5.502 -{
   5.503 -    unsigned long o = cmpxchg((unsigned long *)pl2e, 
   5.504 -                              l2_pgentry_val(ol2e), 
   5.505 -                              l2_pgentry_val(nl2e));
   5.506 -    if ( o != l2_pgentry_val(ol2e) )
   5.507 -        MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n",
   5.508 -                l2_pgentry_val(ol2e), l2_pgentry_val(nl2e), o);
   5.509 -    return (o == l2_pgentry_val(ol2e));
   5.510 -}
   5.511 -
   5.512 -
   5.513 -/* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
   5.514 -static int mod_l2_entry(l2_pgentry_t *pl2e, 
   5.515 -                        l2_pgentry_t nl2e, 
   5.516 -                        unsigned long pfn)
   5.517 -{
   5.518 -    l2_pgentry_t ol2e;
   5.519 -    unsigned long _ol2e;
   5.520 -
   5.521 -    if ( unlikely((((unsigned long)pl2e & (PAGE_SIZE-1)) >> 2) >=
   5.522 -                  DOMAIN_ENTRIES_PER_L2_PAGETABLE) )
   5.523 -    {
   5.524 -        MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
   5.525 -        return 0;
   5.526 -    }
   5.527 -
   5.528 -    if ( unlikely(__get_user(_ol2e, (unsigned long *)pl2e) != 0) )
   5.529 -        return 0;
   5.530 -    ol2e = mk_l2_pgentry(_ol2e);
   5.531 -
   5.532 -    if ( l2_pgentry_val(nl2e) & _PAGE_PRESENT )
   5.533 -    {
   5.534 -        /* Differ in mapping (bits 12-31) or presence (bit 0)? */
   5.535 -        if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) == 0 )
   5.536 -            return update_l2e(pl2e, ol2e, nl2e);
   5.537 -
   5.538 -        if ( unlikely(!get_page_from_l2e(nl2e, pfn)) )
   5.539 -            return 0;
   5.540 -        
   5.541 -        if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
   5.542 -        {
   5.543 -            put_page_from_l2e(nl2e, pfn);
   5.544 -            return 0;
   5.545 -        }
   5.546 -        
   5.547 -        put_page_from_l2e(ol2e, pfn);
   5.548 -        return 1;
   5.549 -    }
   5.550 -
   5.551 -    if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
   5.552 -        return 0;
   5.553 -
   5.554 -    put_page_from_l2e(ol2e, pfn);
   5.555 -    return 1;
   5.556 -}
   5.557 -
   5.558 -
   5.559 -static inline int update_l1e(l1_pgentry_t *pl1e, 
   5.560 -                             l1_pgentry_t  ol1e, 
   5.561 -                             l1_pgentry_t  nl1e)
   5.562 -{
   5.563 -    unsigned long o = l1_pgentry_val(ol1e);
   5.564 -    unsigned long n = l1_pgentry_val(nl1e);
   5.565 -
   5.566 -    if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
   5.567 -         unlikely(o != l1_pgentry_val(ol1e)) )
   5.568 -    {
   5.569 -        MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n",
   5.570 -                l1_pgentry_val(ol1e), l1_pgentry_val(nl1e), o);
   5.571 -        return 0;
   5.572 -    }
   5.573 -
   5.574 -    return 1;
   5.575 -}
   5.576 -
   5.577 -
   5.578 -/* Update the L1 entry at pl1e to new value nl1e. */
   5.579 -static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e)
   5.580 -{
   5.581 -    l1_pgentry_t ol1e;
   5.582 -    unsigned long _ol1e;
   5.583 -
   5.584 -    if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
   5.585 -    {
   5.586 -        MEM_LOG("Bad get_user\n");
   5.587 -        return 0;
   5.588 -    }
   5.589 -    
   5.590 -    ol1e = mk_l1_pgentry(_ol1e);
   5.591 -
   5.592 -    if ( l1_pgentry_val(nl1e) & _PAGE_PRESENT )
   5.593 -    {
   5.594 -        /* Differ in mapping (bits 12-31), r/w (bit 1), or presence (bit 0)? */
   5.595 -        if ( ((l1_pgentry_val(ol1e) ^ l1_pgentry_val(nl1e)) & ~0xffc) == 0 )
   5.596 -            return update_l1e(pl1e, ol1e, nl1e);
   5.597 -
   5.598 -        if ( unlikely(!get_page_from_l1e(nl1e)) )
   5.599 -            return 0;
   5.600 -        
   5.601 -        if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   5.602 -        {
   5.603 -            put_page_from_l1e(nl1e);
   5.604 -            return 0;
   5.605 -        }
   5.606 -        
   5.607 -        put_page_from_l1e(ol1e);
   5.608 -        return 1;
   5.609 -    }
   5.610 -
   5.611 -    if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   5.612 -        return 0;
   5.613 -    
   5.614 -    put_page_from_l1e(ol1e);
   5.615 -    return 1;
   5.616 -}
   5.617 -
   5.618 -
   5.619 -int alloc_page_type(struct pfn_info *page, unsigned int type)
   5.620 -{
   5.621 -    if ( unlikely(test_and_clear_bit(_PGC_tlb_flush_on_type_change, 
   5.622 -                                     &page->count_and_flags)) )
   5.623 -    {
   5.624 -        struct domain *p = page->u.domain;
   5.625 -        if ( unlikely(NEED_FLUSH(tlbflush_time[p->processor],
   5.626 -                                 page->tlbflush_timestamp)) )
   5.627 -        {
   5.628 -            perfc_incr(need_flush_tlb_flush);
   5.629 -            flush_tlb_cpu(p->processor);
   5.630 -        }
   5.631 -    }
   5.632 -
   5.633 -    switch ( type )
   5.634 -    {
   5.635 -    case PGT_l1_page_table:
   5.636 -        return alloc_l1_table(page);
   5.637 -    case PGT_l2_page_table:
   5.638 -        return alloc_l2_table(page);
   5.639 -    case PGT_gdt_page:
   5.640 -    case PGT_ldt_page:
   5.641 -        return alloc_segdesc_page(page);
   5.642 -    default:
   5.643 -        BUG();
   5.644 -    }
   5.645 -
   5.646 -    return 0;
   5.647 -}
   5.648 -
   5.649 -
   5.650 -void free_page_type(struct pfn_info *page, unsigned int type)
   5.651 -{
   5.652 -    switch ( type )
   5.653 -    {
   5.654 -    case PGT_l1_page_table:
   5.655 -        free_l1_table(page);
   5.656 -        if ( unlikely(current->mm.shadow_mode) && 
   5.657 -             (get_shadow_status(&current->mm, 
   5.658 -                                page-frame_table) & PSH_shadowed) )
   5.659 -        {
   5.660 -            /*
   5.661 -             * Using 'current->mm' is safe and correct because page-table pages
   5.662 -             * are not shared across domains. Updates to such pages' types are
   5.663 -             * thus only done within the context of the owning domain. The one
   5.664 -             * exception is when destroying a domain; however, this is not a
   5.665 -             * problem as the currently-executing domain will not have this MFN
   5.666 -             * shadowed, and at domain end-of-day we explicitly unshadow
   5.667 -             * everything so that nothing will get left lying around.
   5.668 -             */
   5.669 -            unshadow_table( page-frame_table, type );
   5.670 -            put_shadow_status(&current->mm);
   5.671 -        }
   5.672 -        break;
   5.673 -
   5.674 -    case PGT_l2_page_table:
   5.675 -        free_l2_table(page);
   5.676 -        if ( unlikely(current->mm.shadow_mode) && 
   5.677 -             (get_shadow_status(&current->mm, 
   5.678 -                                page-frame_table) & PSH_shadowed) )
   5.679 -        {
   5.680 -            unshadow_table( page-frame_table, type );
   5.681 -            put_shadow_status(&current->mm);
   5.682 -        }
   5.683 -        break;
   5.684 -
   5.685 -    default:
   5.686 -        BUG();
   5.687 -    }
   5.688 -}
   5.689 -
   5.690 -
   5.691 -static int do_extended_command(unsigned long ptr, unsigned long val)
   5.692 -{
   5.693 -    int okay = 1, cpu = smp_processor_id();
   5.694 -    unsigned int cmd = val & MMUEXT_CMD_MASK;
   5.695 -    unsigned long pfn = ptr >> PAGE_SHIFT;
   5.696 -    unsigned long old_base_pfn;
   5.697 -    struct pfn_info *page = &frame_table[pfn];
   5.698 -    struct domain *d = current, *nd, *e;
   5.699 -    u32 x, y;
   5.700 -    domid_t domid;
   5.701 -
   5.702 -    switch ( cmd )
   5.703 -    {
   5.704 -    case MMUEXT_PIN_L1_TABLE:
   5.705 -    case MMUEXT_PIN_L2_TABLE:
   5.706 -        okay = get_page_and_type_from_pagenr(
   5.707 -            pfn, 
   5.708 -            (cmd==MMUEXT_PIN_L2_TABLE) ? PGT_l2_page_table : PGT_l1_page_table,
   5.709 -            PTS);
   5.710 -        if ( unlikely(!okay) )
   5.711 -        {
   5.712 -            MEM_LOG("Error while pinning pfn %08lx", pfn);
   5.713 -            put_page(page);
   5.714 -            break;
   5.715 -        }
   5.716 -
   5.717 -        if ( unlikely(test_and_set_bit(_PGC_guest_pinned, 
   5.718 -                                       &page->count_and_flags)) )
   5.719 -        {
   5.720 -            MEM_LOG("Pfn %08lx already pinned", pfn);
   5.721 -            put_page_and_type(page);
   5.722 -            okay = 0;
   5.723 -            break;
   5.724 -        }
   5.725 -
   5.726 -        break;
   5.727 -
   5.728 -    case MMUEXT_UNPIN_TABLE:
   5.729 -        if ( unlikely(!(okay = get_page_from_pagenr(pfn, PTS))) )
   5.730 -        {
   5.731 -            MEM_LOG("Page %08lx bad domain (dom=%p)",
   5.732 -                    ptr, page->u.domain);
   5.733 -        }
   5.734 -        else if ( likely(test_and_clear_bit(_PGC_guest_pinned, 
   5.735 -                                            &page->count_and_flags)) )
   5.736 -        {
   5.737 -            put_page_and_type(page);
   5.738 -            put_page(page);
   5.739 -        }
   5.740 -        else
   5.741 -        {
   5.742 -            okay = 0;
   5.743 -            put_page(page);
   5.744 -            MEM_LOG("Pfn %08lx not pinned", pfn);
   5.745 -        }
   5.746 -        break;
   5.747 -
   5.748 -    case MMUEXT_NEW_BASEPTR:
   5.749 -        okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, d);
   5.750 -        if ( likely(okay) )
   5.751 -        {
   5.752 -            invalidate_shadow_ldt();
   5.753 -
   5.754 -            percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
   5.755 -            old_base_pfn = pagetable_val(d->mm.pagetable) >> PAGE_SHIFT;
   5.756 -            d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
   5.757 -
   5.758 -            shadow_mk_pagetable(&d->mm);
   5.759 -
   5.760 -            write_ptbase(&d->mm);
   5.761 -
   5.762 -            put_page_and_type(&frame_table[old_base_pfn]);    
   5.763 -
   5.764 -            /*
   5.765 -             * Note that we tick the clock /after/ dropping the old base's
   5.766 -             * reference count. If the page tables got freed then this will
   5.767 -             * avoid unnecessary TLB flushes when the pages are reused.
   5.768 -             */
   5.769 -            tlb_clocktick();
   5.770 -        }
   5.771 -        else
   5.772 -        {
   5.773 -            MEM_LOG("Error while installing new baseptr %08lx", ptr);
   5.774 -        }
   5.775 -        break;
   5.776 -        
   5.777 -    case MMUEXT_TLB_FLUSH:
   5.778 -        percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
   5.779 -        break;
   5.780 -    
   5.781 -    case MMUEXT_INVLPG:
   5.782 -        __flush_tlb_one(ptr);
   5.783 -        break;
   5.784 -
   5.785 -    case MMUEXT_SET_LDT:
   5.786 -    {
   5.787 -        unsigned long ents = val >> MMUEXT_CMD_SHIFT;
   5.788 -        if ( ((ptr & (PAGE_SIZE-1)) != 0) || 
   5.789 -             (ents > 8192) ||
   5.790 -             ((ptr+ents*LDT_ENTRY_SIZE) < ptr) ||
   5.791 -             ((ptr+ents*LDT_ENTRY_SIZE) > PAGE_OFFSET) )
   5.792 -        {
   5.793 -            okay = 0;
   5.794 -            MEM_LOG("Bad args to SET_LDT: ptr=%08lx, ents=%08lx", ptr, ents);
   5.795 -        }
   5.796 -        else if ( (d->mm.ldt_ents != ents) || 
   5.797 -                  (d->mm.ldt_base != ptr) )
   5.798 -        {
   5.799 -            invalidate_shadow_ldt();
   5.800 -            d->mm.ldt_base = ptr;
   5.801 -            d->mm.ldt_ents = ents;
   5.802 -            load_LDT(d);
   5.803 -            percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT;
   5.804 -            if ( ents != 0 )
   5.805 -                percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT;
   5.806 -        }
   5.807 -        break;
   5.808 -    }
   5.809 -
   5.810 -    case MMUEXT_SET_SUBJECTDOM:
   5.811 -        domid = ((domid_t)((ptr&~0xFFFF)|(val>>16)));
   5.812 -
   5.813 -        if ( !IS_PRIV(d) )
   5.814 -        {
   5.815 -            MEM_LOG("Dom %u has no privilege to set subject domain",
   5.816 -                    d->domain);
   5.817 -            okay = 0;
   5.818 -        }
   5.819 -        else
   5.820 -        {
   5.821 -            if ( percpu_info[cpu].gps != NULL )
   5.822 -                put_domain(percpu_info[cpu].gps);
   5.823 -            percpu_info[cpu].gps = find_domain_by_id(domid);
   5.824 -            percpu_info[cpu].pts = (val & SET_PAGETABLE_SUBJECTDOM) ? 
   5.825 -                percpu_info[cpu].gps : NULL;
   5.826 -            if ( percpu_info[cpu].gps == NULL )
   5.827 -            {
   5.828 -                MEM_LOG("Unknown domain '%u'", domid);
   5.829 -                okay = 0;
   5.830 -            }
   5.831 -        }
   5.832 -        break;
   5.833 -
   5.834 -    case MMUEXT_REASSIGN_PAGE:
   5.835 -        if ( unlikely(!IS_PRIV(d)) )
   5.836 -        {
   5.837 -            MEM_LOG("Dom %u has no reassignment priv", d->domain);
   5.838 -            okay = 0;
   5.839 -            break;
   5.840 -        }
   5.841 -
   5.842 -        if ( unlikely((e = percpu_info[cpu].gps) == NULL) )
   5.843 -        {
   5.844 -            MEM_LOG("No GPS to reassign pfn %08lx to\n", pfn);
   5.845 -            okay = 0;
   5.846 -            break;
   5.847 -        }
   5.848 -
   5.849 -        /*
   5.850 -         * Grab both page_list locks, in order. This prevents the page from
   5.851 -         * disappearing elsewhere while we modify the owner, and we'll need
   5.852 -         * both locks if we're successful so that we can change lists.
   5.853 -         */
   5.854 -        if ( d < e )
   5.855 -        {
   5.856 -            spin_lock(&d->page_alloc_lock);
   5.857 -            spin_lock(&e->page_alloc_lock);
   5.858 -        }
   5.859 -        else
   5.860 -        {
   5.861 -            spin_lock(&e->page_alloc_lock);
   5.862 -            spin_lock(&d->page_alloc_lock);
   5.863 -        }
   5.864 -
   5.865 -        /* A domain shouldn't have PGC_allocated pages when it is dying. */
   5.866 -        if ( unlikely(test_bit(DF_DYING, &e->flags)) ||
   5.867 -             unlikely(IS_XEN_HEAP_FRAME(page)) )
   5.868 -        {
   5.869 -            okay = 0;
   5.870 -            goto reassign_fail;
   5.871 -        }
   5.872 -
   5.873 -        /*
   5.874 -         * The tricky bit: atomically change owner while there is just one
   5.875 -         * benign reference to the page (PGC_allocated). If that reference
   5.876 -         * disappears then the deallocation routine will safely spin.
   5.877 -         */
   5.878 -        nd = page->u.domain;
   5.879 -        y  = page->count_and_flags;
   5.880 -        do {
   5.881 -            x = y;
   5.882 -            if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != 
   5.883 -                          (1|PGC_allocated)) ||
   5.884 -                 unlikely(nd != d) )
   5.885 -            {
   5.886 -                MEM_LOG("Bad page values %08lx: ed=%p(%u), sd=%p,"
   5.887 -                        " caf=%08x, taf=%08x\n", page_to_pfn(page),
   5.888 -                        d, d->domain, nd, x, page->type_and_flags);
   5.889 -                okay = 0;
   5.890 -                goto reassign_fail;
   5.891 -            }
   5.892 -            __asm__ __volatile__(
   5.893 -                LOCK_PREFIX "cmpxchg8b %3"
   5.894 -                : "=a" (nd), "=d" (y), "=b" (e),
   5.895 -                "=m" (*(volatile u64 *)(&page->u.domain))
   5.896 -                : "0" (d), "1" (x), "b" (e), "c" (x) );
   5.897 -        } 
   5.898 -        while ( unlikely(nd != d) || unlikely(y != x) );
   5.899 -        
   5.900 -        /*
   5.901 -         * Unlink from 'd'. We transferred at least one reference to 'e', so
   5.902 -         * noone else is spinning to try to delete this page from 'd'.
   5.903 -         */
   5.904 -        d->tot_pages--;
   5.905 -        list_del(&page->list);
   5.906 -        
   5.907 -        /*
   5.908 -         * Add the page to 'e'. Someone may already have removed the last
   5.909 -         * reference and want to remove the page from 'e'. However, we have
   5.910 -         * the lock so they'll spin waiting for us.
   5.911 -         */
   5.912 -        if ( unlikely(e->tot_pages++ == 0) )
   5.913 -            get_domain(e);
   5.914 -        list_add_tail(&page->list, &e->page_list);
   5.915 -
   5.916 -    reassign_fail:        
   5.917 -        spin_unlock(&d->page_alloc_lock);
   5.918 -        spin_unlock(&e->page_alloc_lock);
   5.919 -        break;
   5.920 -
   5.921 -    case MMUEXT_RESET_SUBJECTDOM:
   5.922 -        if ( percpu_info[cpu].gps != NULL )
   5.923 -            put_domain(percpu_info[cpu].gps);
   5.924 -        percpu_info[cpu].gps = percpu_info[cpu].pts = NULL;
   5.925 -        break;
   5.926 -
   5.927 -    default:
   5.928 -        MEM_LOG("Invalid extended pt command 0x%08lx", val & MMUEXT_CMD_MASK);
   5.929 -        okay = 0;
   5.930 -        break;
   5.931 -    }
   5.932 -
   5.933 -    return okay;
   5.934 -}
   5.935 -
   5.936 -
   5.937 -int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count)
   5.938 -{
   5.939 -    mmu_update_t req;
   5.940 -    unsigned long va = 0, deferred_ops, pfn, prev_pfn = 0;
   5.941 -    struct pfn_info *page;
   5.942 -    int rc = 0, okay = 1, i, cpu = smp_processor_id();
   5.943 -    unsigned int cmd;
   5.944 -    unsigned long prev_spfn = 0;
   5.945 -    l1_pgentry_t *prev_spl1e = 0;
   5.946 -
   5.947 -    perfc_incrc(calls_to_mmu_update); 
   5.948 -    perfc_addc(num_page_updates, count);
   5.949 -
   5.950 -    for ( i = 0; i < count; i++ )
   5.951 -    {
   5.952 -        if ( unlikely(copy_from_user(&req, ureqs, sizeof(req)) != 0) )
   5.953 -        {
   5.954 -            MEM_LOG("Bad copy_from_user");
   5.955 -            rc = -EFAULT;
   5.956 -            break;
   5.957 -        }
   5.958 -
   5.959 -        cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
   5.960 -        pfn = req.ptr >> PAGE_SHIFT;
   5.961 -
   5.962 -        okay = 0;
   5.963 -
   5.964 -        switch ( cmd )
   5.965 -        {
   5.966 -            /*
   5.967 -             * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
   5.968 -             */
   5.969 -        case MMU_NORMAL_PT_UPDATE:
   5.970 -            if ( unlikely(!get_page_from_pagenr(pfn, PTS)) )
   5.971 -            {
   5.972 -                MEM_LOG("Could not get page for normal update");
   5.973 -                break;
   5.974 -            }
   5.975 -
   5.976 -            if ( likely(prev_pfn == pfn) )
   5.977 -            {
   5.978 -                va = (va & PAGE_MASK) | (req.ptr & ~PAGE_MASK);
   5.979 -            }
   5.980 -            else
   5.981 -            {
   5.982 -                if ( prev_pfn != 0 )
   5.983 -                    unmap_domain_mem((void *)va);
   5.984 -                va = (unsigned long)map_domain_mem(req.ptr);
   5.985 -                prev_pfn = pfn;
   5.986 -            }
   5.987 -
   5.988 -            page = &frame_table[pfn];
   5.989 -            switch ( (page->type_and_flags & PGT_type_mask) )
   5.990 -            {
   5.991 -            case PGT_l1_page_table: 
   5.992 -                if ( likely(get_page_type(page, PGT_l1_page_table)) )
   5.993 -                {
   5.994 -                    okay = mod_l1_entry((l1_pgentry_t *)va, 
   5.995 -                                        mk_l1_pgentry(req.val)); 
   5.996 -
   5.997 -                    if ( okay && unlikely(current->mm.shadow_mode) &&
   5.998 -                         (get_shadow_status(&current->mm, page-frame_table) &
   5.999 -                          PSH_shadowed) )
  5.1000 -                    {
  5.1001 -                        shadow_l1_normal_pt_update( req.ptr, req.val, 
  5.1002 -                                                    &prev_spfn, &prev_spl1e );
  5.1003 -                        put_shadow_status(&current->mm);
  5.1004 -                    }
  5.1005 -
  5.1006 -                    put_page_type(page);
  5.1007 -                }
  5.1008 -                break;
  5.1009 -            case PGT_l2_page_table:
  5.1010 -                if ( likely(get_page_type(page, PGT_l2_page_table)) )
  5.1011 -                {
  5.1012 -                    okay = mod_l2_entry((l2_pgentry_t *)va, 
  5.1013 -                                        mk_l2_pgentry(req.val),
  5.1014 -                                        pfn); 
  5.1015 -
  5.1016 -                    if ( okay && unlikely(current->mm.shadow_mode) &&
  5.1017 -                         (get_shadow_status(&current->mm, page-frame_table) & 
  5.1018 -                          PSH_shadowed) )
  5.1019 -                    {
  5.1020 -                        shadow_l2_normal_pt_update( req.ptr, req.val );
  5.1021 -                        put_shadow_status(&current->mm);
  5.1022 -                    }
  5.1023 -
  5.1024 -                    put_page_type(page);
  5.1025 -                }
  5.1026 -                break;
  5.1027 -            default:
  5.1028 -                if ( likely(get_page_type(page, PGT_writeable_page)) )
  5.1029 -                {
  5.1030 -                    *(unsigned long *)va = req.val;
  5.1031 -                    okay = 1;
  5.1032 -                    put_page_type(page);
  5.1033 -                }
  5.1034 -                break;
  5.1035 -            }
  5.1036 -
  5.1037 -            put_page(page);
  5.1038 -
  5.1039 -            break;
  5.1040 -
  5.1041 -        case MMU_MACHPHYS_UPDATE:
  5.1042 -            if ( unlikely(!get_page_from_pagenr(pfn, GPS)) )
  5.1043 -            {
  5.1044 -                MEM_LOG("Could not get page for mach->phys update");
  5.1045 -                break;
  5.1046 -            }
  5.1047 -
  5.1048 -            machine_to_phys_mapping[pfn] = req.val;
  5.1049 -            okay = 1;
  5.1050 -
  5.1051 -            /*
  5.1052 -             * If in log-dirty mode, mark the corresponding pseudo-physical
  5.1053 -             * page as dirty.
  5.1054 -             */
  5.1055 -            if( unlikely(current->mm.shadow_mode == SHM_logdirty) )
  5.1056 -                mark_dirty( &current->mm, pfn );
  5.1057 -
  5.1058 -            put_page(&frame_table[pfn]);
  5.1059 -            break;
  5.1060 -
  5.1061 -            /*
  5.1062 -             * MMU_EXTENDED_COMMAND: Extended command is specified
  5.1063 -             * in the least-siginificant bits of the 'value' field.
  5.1064 -             */
  5.1065 -        case MMU_EXTENDED_COMMAND:
  5.1066 -            req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
  5.1067 -            okay = do_extended_command(req.ptr, req.val);
  5.1068 -            break;
  5.1069 -
  5.1070 -        default:
  5.1071 -            MEM_LOG("Invalid page update command %08lx", req.ptr);
  5.1072 -            break;
  5.1073 -        }
  5.1074 -
  5.1075 -        if ( unlikely(!okay) )
  5.1076 -        {
  5.1077 -            rc = -EINVAL;
  5.1078 -            break;
  5.1079 -        }
  5.1080 -
  5.1081 -        ureqs++;
  5.1082 -    }
  5.1083 -
  5.1084 -    if ( prev_pfn != 0 )
  5.1085 -        unmap_domain_mem((void *)va);
  5.1086 -
  5.1087 -    if( prev_spl1e != 0 ) 
  5.1088 -        unmap_domain_mem((void *)prev_spl1e);
  5.1089 -
  5.1090 -    deferred_ops = percpu_info[cpu].deferred_ops;
  5.1091 -    percpu_info[cpu].deferred_ops = 0;
  5.1092 -
  5.1093 -    if ( deferred_ops & DOP_FLUSH_TLB )
  5.1094 -        local_flush_tlb();
  5.1095 -
  5.1096 -    if ( deferred_ops & DOP_RELOAD_LDT )
  5.1097 -        (void)map_ldt_shadow_page(0);
  5.1098 -
  5.1099 -    if ( unlikely(percpu_info[cpu].gps != NULL) )
  5.1100 -    {
  5.1101 -        put_domain(percpu_info[cpu].gps);
  5.1102 -        percpu_info[cpu].gps = percpu_info[cpu].pts = NULL;
  5.1103 -    }
  5.1104 -
  5.1105 -    if ( unlikely(success_count != NULL) )
  5.1106 -        put_user(count, success_count);
  5.1107 -
  5.1108 -    return rc;
  5.1109 -}
  5.1110 -
  5.1111 -
  5.1112 -int do_update_va_mapping(unsigned long page_nr, 
  5.1113 -                         unsigned long val, 
  5.1114 -                         unsigned long flags)
  5.1115 -{
  5.1116 -    struct domain *p = current;
  5.1117 -    int err = 0;
  5.1118 -    unsigned int cpu = p->processor;
  5.1119 -    unsigned long deferred_ops;
  5.1120 -
  5.1121 -    perfc_incrc(calls_to_update_va);
  5.1122 -
  5.1123 -    if ( unlikely(page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT)) )
  5.1124 -        return -EINVAL;
  5.1125 -
  5.1126 -    /*
  5.1127 -     * XXX When we make this support 4MB superpages we should also deal with 
  5.1128 -     * the case of updating L2 entries.
  5.1129 -     */
  5.1130 -
  5.1131 -    if ( unlikely(!mod_l1_entry(&linear_pg_table[page_nr], 
  5.1132 -                                mk_l1_pgentry(val))) )
  5.1133 -        err = -EINVAL;
  5.1134 -
  5.1135 -    if ( unlikely(p->mm.shadow_mode) )
  5.1136 -    {
  5.1137 -        unsigned long sval;
  5.1138 -
  5.1139 -        l1pte_no_fault( &current->mm, &val, &sval );
  5.1140 -
  5.1141 -        if ( unlikely(__put_user(sval, ((unsigned long *)(
  5.1142 -            &shadow_linear_pg_table[page_nr])))) )
  5.1143 -        {
  5.1144 -            /*
  5.1145 -             * Since L2's are guranteed RW, failure indicates the page was not 
  5.1146 -             * shadowed, so ignore.
  5.1147 -             */
  5.1148 -            perfc_incrc(shadow_update_va_fail);
  5.1149 -        }
  5.1150 -
  5.1151 -        /*
  5.1152 -         * If we're in log-dirty mode then we need to note that we've updated
  5.1153 -         * the PTE in the PT-holding page. We need the machine frame number
  5.1154 -         * for this.
  5.1155 -         */
  5.1156 -        if ( p->mm.shadow_mode == SHM_logdirty )
  5.1157 -            mark_dirty( &current->mm, va_to_l1mfn(page_nr<<PAGE_SHIFT) );  
  5.1158 -  
  5.1159 -        check_pagetable( p, p->mm.pagetable, "va" ); /* debug */
  5.1160 -    }
  5.1161 -
  5.1162 -    deferred_ops = percpu_info[cpu].deferred_ops;
  5.1163 -    percpu_info[cpu].deferred_ops = 0;
  5.1164 -
  5.1165 -    if ( unlikely(deferred_ops & DOP_FLUSH_TLB) || 
  5.1166 -         unlikely(flags & UVMF_FLUSH_TLB) )
  5.1167 -        local_flush_tlb();
  5.1168 -    else if ( unlikely(flags & UVMF_INVLPG) )
  5.1169 -        __flush_tlb_one(page_nr << PAGE_SHIFT);
  5.1170 -
  5.1171 -    if ( unlikely(deferred_ops & DOP_RELOAD_LDT) )
  5.1172 -        (void)map_ldt_shadow_page(0);
  5.1173 -    
  5.1174 -    return err;
  5.1175 -}
  5.1176 -
  5.1177 -int do_update_va_mapping_otherdomain(unsigned long page_nr, 
  5.1178 -                                     unsigned long val, 
  5.1179 -                                     unsigned long flags,
  5.1180 -                                     domid_t domid)
  5.1181 -{
  5.1182 -    unsigned int cpu = smp_processor_id();
  5.1183 -    struct domain *d;
  5.1184 -    int rc;
  5.1185 -
  5.1186 -    if ( unlikely(!IS_PRIV(current)) )
  5.1187 -        return -EPERM;
  5.1188 -
  5.1189 -    percpu_info[cpu].gps = d = find_domain_by_id(domid);
  5.1190 -    if ( unlikely(d == NULL) )
  5.1191 -    {
  5.1192 -        MEM_LOG("Unknown domain '%u'", domid);
  5.1193 -        return -ESRCH;
  5.1194 -    }
  5.1195 -
  5.1196 -    rc = do_update_va_mapping(page_nr, val, flags);
  5.1197 -
  5.1198 -    put_domain(d);
  5.1199 -    percpu_info[cpu].gps = NULL;
  5.1200 -
  5.1201 -    return rc;
  5.1202 -}
     6.1 --- a/xen/include/asm-x86/config.h	Wed Jul 07 18:56:39 2004 +0000
     6.2 +++ b/xen/include/asm-x86/config.h	Wed Jul 07 18:57:28 2004 +0000
     6.3 @@ -220,4 +220,10 @@ extern unsigned long xenheap_phys_end; /
     6.4  #define LDT_VIRT_START        (GDT_VIRT_END)
     6.5  #define LDT_VIRT_END          (LDT_VIRT_START + (64*1024))
     6.6  
     6.7 +#if defined(__x86_64__)
     6.8 +#define ELFSIZE 64
     6.9 +#else
    6.10 +#define ELFSIZE 32
    6.11 +#endif
    6.12 +
    6.13  #endif /* __XEN_I386_CONFIG_H__ */
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/xen/include/asm-x86/mm.h	Wed Jul 07 18:57:28 2004 +0000
     7.3 @@ -0,0 +1,326 @@
     7.4 +
     7.5 +#ifndef __ASM_X86_MM_H__
     7.6 +#define __ASM_X86_MM_H__
     7.7 +
     7.8 +#include <xen/config.h>
     7.9 +#ifdef LINUX_2_6
    7.10 +#include <xen/gfp.h>
    7.11 +#endif
    7.12 +#include <xen/list.h>
    7.13 +#include <xen/spinlock.h>
    7.14 +#include <xen/perfc.h>
    7.15 +#include <xen/sched.h>
    7.16 +
    7.17 +#include <asm/processor.h>
    7.18 +#include <asm/atomic.h>
    7.19 +#include <asm/desc.h>
    7.20 +#include <asm/flushtlb.h>
    7.21 +#include <asm/io.h>
    7.22 +
    7.23 +#include <hypervisor-ifs/hypervisor-if.h>
    7.24 +
    7.25 +/*
    7.26 + * The following is for page_alloc.c.
    7.27 + */
    7.28 +
    7.29 +void init_page_allocator(unsigned long min, unsigned long max);
    7.30 +unsigned long __get_free_pages(int order);
    7.31 +void __free_pages(unsigned long p, int order);
    7.32 +#define get_free_page()   (__get_free_pages(0))
    7.33 +#define __get_free_page() (__get_free_pages(0))
    7.34 +#define free_pages(_p,_o) (__free_pages(_p,_o))
    7.35 +#define free_page(_p)     (__free_pages(_p,0))
    7.36 +
    7.37 +
    7.38 +/*
    7.39 + * Per-page-frame information.
    7.40 + */
    7.41 +
    7.42 +struct pfn_info
    7.43 +{
    7.44 +    /* Each frame can be threaded onto a doubly-linked list. */
    7.45 +    struct list_head list;
    7.46 +    /* The following possible uses are context-dependent. */
    7.47 +    union {
    7.48 +        /* Page is in use: we keep a pointer to its owner. */
    7.49 +        struct domain *domain;
    7.50 +        /* Page is not currently allocated: mask of possibly-tainted TLBs. */
    7.51 +        unsigned long cpu_mask;
    7.52 +    } u;
    7.53 +    /* Reference count and various PGC_xxx flags and fields. */
    7.54 +    u32 count_and_flags;
    7.55 +    /* Type reference count and various PGT_xxx flags and fields. */
    7.56 +    u32 type_and_flags;
    7.57 +    /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
    7.58 +    u32 tlbflush_timestamp;
    7.59 +};
    7.60 +
    7.61 + /* The following page types are MUTUALLY EXCLUSIVE. */
    7.62 +#define PGT_none            (0<<29) /* no special uses of this page */
    7.63 +#define PGT_l1_page_table   (1<<29) /* using this page as an L1 page table? */
    7.64 +#define PGT_l2_page_table   (2<<29) /* using this page as an L2 page table? */
    7.65 +#define PGT_l3_page_table   (3<<29) /* using this page as an L3 page table? */
    7.66 +#define PGT_l4_page_table   (4<<29) /* using this page as an L4 page table? */
    7.67 +#define PGT_gdt_page        (5<<29) /* using this page in a GDT? */
    7.68 +#define PGT_ldt_page        (6<<29) /* using this page in an LDT? */
    7.69 +#define PGT_writeable_page  (7<<29) /* has writable mappings of this page? */
    7.70 +#define PGT_type_mask       (7<<29) /* Bits 29-31. */
    7.71 + /* Has this page been validated for use as its current type? */
    7.72 +#define _PGT_validated      28
    7.73 +#define PGT_validated       (1<<_PGT_validated)
    7.74 + /* 28-bit count of uses of this frame as its current type. */
    7.75 +#define PGT_count_mask      ((1<<28)-1)
    7.76 +
    7.77 + /* For safety, force a TLB flush when this page's type changes. */
    7.78 +#define _PGC_tlb_flush_on_type_change 31
    7.79 +#define PGC_tlb_flush_on_type_change  (1<<_PGC_tlb_flush_on_type_change)
    7.80 + /* Owning guest has pinned this page to its current type? */
    7.81 +#define _PGC_guest_pinned             30
    7.82 +#define PGC_guest_pinned              (1<<_PGC_guest_pinned)
    7.83 + /* Cleared when the owning guest 'frees' this page. */
    7.84 +#define _PGC_allocated                29
    7.85 +#define PGC_allocated                 (1<<_PGC_allocated)
    7.86 + /* 28-bit count of references to this frame. */
    7.87 +#define PGC_count_mask                ((1<<29)-1)
    7.88 +
    7.89 +
    7.90 +/* We trust the slab allocator in slab.c, and our use of it. */
    7.91 +#define PageSlab(page)		(1)
    7.92 +#define PageSetSlab(page)	((void)0)
    7.93 +#define PageClearSlab(page)	((void)0)
    7.94 +
    7.95 +#define IS_XEN_HEAP_FRAME(_pfn) (page_to_phys(_pfn) < xenheap_phys_end)
    7.96 +
    7.97 +#define SHARE_PFN_WITH_DOMAIN(_pfn, _dom)                                   \
    7.98 +    do {                                                                    \
    7.99 +        (_pfn)->u.domain = (_dom);                                          \
   7.100 +        /* The incremented type count is intended to pin to 'writeable'. */ \
   7.101 +        (_pfn)->type_and_flags  = PGT_writeable_page | PGT_validated | 1;   \
   7.102 +        wmb(); /* install valid domain ptr before updating refcnt. */       \
   7.103 +        spin_lock(&(_dom)->page_alloc_lock);                                \
   7.104 +        /* _dom holds an allocation reference */                            \
   7.105 +        (_pfn)->count_and_flags = PGC_allocated | 1;                        \
   7.106 +        if ( unlikely((_dom)->xenheap_pages++ == 0) )                       \
   7.107 +            get_domain(_dom);                                               \
   7.108 +        spin_unlock(&(_dom)->page_alloc_lock);                              \
   7.109 +    } while ( 0 )
   7.110 +
   7.111 +extern struct pfn_info *frame_table;
   7.112 +extern unsigned long frame_table_size;
   7.113 +extern struct list_head free_list;
   7.114 +extern spinlock_t free_list_lock;
   7.115 +extern unsigned int free_pfns;
   7.116 +extern unsigned long max_page;
   7.117 +void init_frametable(void *frametable_vstart, unsigned long nr_pages);
   7.118 +void add_to_domain_alloc_list(unsigned long ps, unsigned long pe);
   7.119 +
   7.120 +struct pfn_info *alloc_domain_page(struct domain *p);
   7.121 +void free_domain_page(struct pfn_info *page);
   7.122 +
   7.123 +int alloc_page_type(struct pfn_info *page, unsigned int type);
   7.124 +void free_page_type(struct pfn_info *page, unsigned int type);
   7.125 +
   7.126 +static inline void put_page(struct pfn_info *page)
   7.127 +{
   7.128 +    u32 nx, x, y = page->count_and_flags;
   7.129 +
   7.130 +    do {
   7.131 +        x  = y;
   7.132 +        nx = x - 1;
   7.133 +    }
   7.134 +    while ( unlikely((y = cmpxchg(&page->count_and_flags, x, nx)) != x) );
   7.135 +
   7.136 +    if ( unlikely((nx & PGC_count_mask) == 0) )
   7.137 +        free_domain_page(page);
   7.138 +}
   7.139 +
   7.140 +
   7.141 +static inline int get_page(struct pfn_info *page,
   7.142 +                           struct domain *domain)
   7.143 +{
   7.144 +    u32 x, nx, y = page->count_and_flags;
   7.145 +    struct domain *p, *np = page->u.domain;
   7.146 +
   7.147 +    do {
   7.148 +        x  = y;
   7.149 +        nx = x + 1;
   7.150 +        p  = np;
   7.151 +        if ( unlikely((x & PGC_count_mask) == 0) ||  /* Not allocated? */
   7.152 +             unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
   7.153 +             unlikely(p != domain) )                 /* Wrong owner? */
   7.154 +        {
   7.155 +            DPRINTK("Error pfn %08lx: ed=%p(%u), sd=%p(%u),"
   7.156 +                    " caf=%08x, taf=%08x\n",
   7.157 +                    page_to_pfn(page), domain, domain->domain,
   7.158 +                    p, (p && !((x & PGC_count_mask) == 0))?p->domain:999, 
   7.159 +                    x, page->type_and_flags);
   7.160 +            return 0;
   7.161 +        }
   7.162 +        __asm__ __volatile__(
   7.163 +            LOCK_PREFIX "cmpxchg8b %3"
   7.164 +            : "=a" (np), "=d" (y), "=b" (p),
   7.165 +              "=m" (*(volatile u64 *)(&page->u.domain))
   7.166 +            : "0" (p), "1" (x), "b" (p), "c" (nx) );
   7.167 +    }
   7.168 +    while ( unlikely(np != p) || unlikely(y != x) );
   7.169 +
   7.170 +    return 1;
   7.171 +}
   7.172 +
   7.173 +
   7.174 +static inline void put_page_type(struct pfn_info *page)
   7.175 +{
   7.176 +    u32 nx, x, y = page->type_and_flags;
   7.177 +
   7.178 + again:
   7.179 +    do {
   7.180 +        x  = y;
   7.181 +        nx = x - 1;
   7.182 +        if ( unlikely((nx & PGT_count_mask) == 0) )
   7.183 +        {
   7.184 +            page->tlbflush_timestamp = tlbflush_clock;
   7.185 +            if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
   7.186 +                 likely(nx & PGT_validated) )
   7.187 +            {
   7.188 +                /*
   7.189 +                 * Page-table pages must be unvalidated when count is zero. The
   7.190 +                 * 'free' is safe because the refcnt is non-zero and the
   7.191 +                 * validated bit is clear => other ops will spin or fail.
   7.192 +                 */
   7.193 +                if ( unlikely((y = cmpxchg(&page->type_and_flags, x, 
   7.194 +                                           x & ~PGT_validated)) != x) )
   7.195 +                    goto again;
   7.196 +                /* We cleared the 'valid bit' so we must do the clear up. */
   7.197 +                free_page_type(page, x & PGT_type_mask);
   7.198 +                /* Carry on as we were, but with the 'valid bit' now clear. */
   7.199 +                x  &= ~PGT_validated;
   7.200 +                nx &= ~PGT_validated;
   7.201 +            }
   7.202 +        }
   7.203 +    }
   7.204 +    while ( unlikely((y = cmpxchg(&page->type_and_flags, x, nx)) != x) );
   7.205 +}
   7.206 +
   7.207 +
   7.208 +static inline int get_page_type(struct pfn_info *page, u32 type)
   7.209 +{
   7.210 +    u32 nx, x, y = page->type_and_flags;
   7.211 + again:
   7.212 +    do {
   7.213 +        x  = y;
   7.214 +        nx = x + 1;
   7.215 +        if ( unlikely((nx & PGT_count_mask) == 0) )
   7.216 +        {
   7.217 +            DPRINTK("Type count overflow on pfn %08lx\n", page_to_pfn(page));
   7.218 +            return 0;
   7.219 +        }
   7.220 +        else if ( unlikely((x & PGT_count_mask) == 0) )
   7.221 +        {
   7.222 +            if ( (x & PGT_type_mask) != type )
   7.223 +            {
   7.224 +                nx &= ~(PGT_type_mask | PGT_validated);
   7.225 +                nx |= type;
   7.226 +                /* No extra validation needed for writeable pages. */
   7.227 +                if ( type == PGT_writeable_page )
   7.228 +                    nx |= PGT_validated;
   7.229 +            }
   7.230 +        }
   7.231 +        else if ( unlikely((x & PGT_type_mask) != type) )
   7.232 +        {
   7.233 +            DPRINTK("Unexpected type (saw %08x != exp %08x) for pfn %08lx\n",
   7.234 +                    x & PGT_type_mask, type, page_to_pfn(page));
   7.235 +            return 0;
   7.236 +        }
   7.237 +        else if ( unlikely(!(x & PGT_validated)) )
   7.238 +        {
   7.239 +            /* Someone else is updating validation of this page. Wait... */
   7.240 +            while ( (y = page->type_and_flags) != x )
   7.241 +            {
   7.242 +                rep_nop();
   7.243 +                barrier();
   7.244 +            }
   7.245 +            goto again;
   7.246 +        }
   7.247 +    }
   7.248 +    while ( unlikely((y = cmpxchg(&page->type_and_flags, x, nx)) != x) );
   7.249 +
   7.250 +    if ( unlikely(!(nx & PGT_validated)) )
   7.251 +    {
   7.252 +        /* Try to validate page type; drop the new reference on failure. */
   7.253 +        if ( unlikely(!alloc_page_type(page, type)) )
   7.254 +        {
   7.255 +            DPRINTK("Error while validating pfn %08lx for type %08x\n",
   7.256 +                    page_to_pfn(page), type);
   7.257 +            put_page_type(page);
   7.258 +            return 0;
   7.259 +        }
   7.260 +        set_bit(_PGT_validated, &page->type_and_flags);
   7.261 +    }
   7.262 +
   7.263 +    return 1;
   7.264 +}
   7.265 +
   7.266 +
   7.267 +static inline void put_page_and_type(struct pfn_info *page)
   7.268 +{
   7.269 +    put_page_type(page);
   7.270 +    put_page(page);
   7.271 +}
   7.272 +
   7.273 +
   7.274 +static inline int get_page_and_type(struct pfn_info *page,
   7.275 +                                    struct domain *domain,
   7.276 +                                    u32 type)
   7.277 +{
   7.278 +    int rc = get_page(page, domain);
   7.279 +
   7.280 +    if ( likely(rc) && unlikely(!get_page_type(page, type)) )
   7.281 +    {
   7.282 +        put_page(page);
   7.283 +        rc = 0;
   7.284 +    }
   7.285 +
   7.286 +    return rc;
   7.287 +}
   7.288 +
   7.289 +#define ASSERT_PAGE_IS_TYPE(_p, _t)                \
   7.290 +    ASSERT(((_p)->type_and_flags & PGT_type_mask) == (_t));  \
   7.291 +    ASSERT(((_p)->type_and_flags & PGT_count_mask) != 0)
   7.292 +#define ASSERT_PAGE_IS_DOMAIN(_p, _d)              \
   7.293 +    ASSERT(((_p)->count_and_flags & PGC_count_mask) != 0);  \
   7.294 +    ASSERT((_p)->u.domain == (_d))
   7.295 +
   7.296 +int check_descriptor(unsigned long a, unsigned long b);
   7.297 +
   7.298 +/*
   7.299 + * The MPT (machine->physical mapping table) is an array of word-sized
   7.300 + * values, indexed on machine frame number. It is expected that guest OSes
   7.301 + * will use it to store a "physical" frame number to give the appearance of
   7.302 + * contiguous (or near contiguous) physical memory.
   7.303 + */
   7.304 +#undef  machine_to_phys_mapping
   7.305 +#ifdef __x86_64__
   7.306 +extern unsigned long *machine_to_phys_mapping;
   7.307 +#else
   7.308 +#define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
   7.309 +#endif
   7.310 +
   7.311 +/* Part of the domain API. */
   7.312 +int do_mmu_update(mmu_update_t *updates, int count, int *success_count);
   7.313 +
   7.314 +#define DEFAULT_GDT_ENTRIES     ((LAST_RESERVED_GDT_ENTRY*8)+7)
   7.315 +#define DEFAULT_GDT_ADDRESS     ((unsigned long)gdt_table)
   7.316 +
   7.317 +#ifdef MEMORY_GUARD
   7.318 +void *memguard_init(void *heap_start);
   7.319 +void memguard_guard_range(void *p, unsigned long l);
   7.320 +void memguard_unguard_range(void *p, unsigned long l);
   7.321 +int memguard_is_guarded(void *p);
   7.322 +#else
   7.323 +#define memguard_init(_s)              (_s)
   7.324 +#define memguard_guard_range(_p,_l)    ((void)0)
   7.325 +#define memguard_unguard_range(_p,_l)  ((void)0)
   7.326 +#define memguard_is_guarded(_p)        (0)
   7.327 +#endif
   7.328 +
   7.329 +#endif /* __ASM_X86_MM_H__ */
     8.1 --- a/xen/include/xen/mm.h	Wed Jul 07 18:56:39 2004 +0000
     8.2 +++ b/xen/include/xen/mm.h	Wed Jul 07 18:57:28 2004 +0000
     8.3 @@ -2,325 +2,6 @@
     8.4  #ifndef __XEN_MM_H__
     8.5  #define __XEN_MM_H__
     8.6  
     8.7 -#include <xen/config.h>
     8.8 -#ifdef LINUX_2_6
     8.9 -#include <xen/gfp.h>
    8.10 -#endif
    8.11 -#include <xen/list.h>
    8.12 -#include <xen/spinlock.h>
    8.13 -#include <xen/perfc.h>
    8.14 -#include <xen/sched.h>
    8.15 -
    8.16 -#include <asm/processor.h>
    8.17 -#include <asm/atomic.h>
    8.18 -#include <asm/desc.h>
    8.19 -#include <asm/flushtlb.h>
    8.20 -#include <asm/io.h>
    8.21 -
    8.22 -#include <hypervisor-ifs/hypervisor-if.h>
    8.23 -
    8.24 -/*
    8.25 - * The following is for page_alloc.c.
    8.26 - */
    8.27 -
    8.28 -void init_page_allocator(unsigned long min, unsigned long max);
    8.29 -unsigned long __get_free_pages(int order);
    8.30 -void __free_pages(unsigned long p, int order);
    8.31 -#define get_free_page()   (__get_free_pages(0))
    8.32 -#define __get_free_page() (__get_free_pages(0))
    8.33 -#define free_pages(_p,_o) (__free_pages(_p,_o))
    8.34 -#define free_page(_p)     (__free_pages(_p,0))
    8.35 -
    8.36 -
    8.37 -/*
    8.38 - * Per-page-frame information.
    8.39 - */
    8.40 -
    8.41 -struct pfn_info
    8.42 -{
    8.43 -    /* Each frame can be threaded onto a doubly-linked list. */
    8.44 -    struct list_head list;
    8.45 -    /* The following possible uses are context-dependent. */
    8.46 -    union {
    8.47 -        /* Page is in use: we keep a pointer to its owner. */
    8.48 -        struct domain *domain;
    8.49 -        /* Page is not currently allocated: mask of possibly-tainted TLBs. */
    8.50 -        unsigned long cpu_mask;
    8.51 -    } u;
    8.52 -    /* Reference count and various PGC_xxx flags and fields. */
    8.53 -    u32 count_and_flags;
    8.54 -    /* Type reference count and various PGT_xxx flags and fields. */
    8.55 -    u32 type_and_flags;
    8.56 -    /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
    8.57 -    u32 tlbflush_timestamp;
    8.58 -};
    8.59 -
    8.60 - /* The following page types are MUTUALLY EXCLUSIVE. */
    8.61 -#define PGT_none            (0<<29) /* no special uses of this page */
    8.62 -#define PGT_l1_page_table   (1<<29) /* using this page as an L1 page table? */
    8.63 -#define PGT_l2_page_table   (2<<29) /* using this page as an L2 page table? */
    8.64 -#define PGT_l3_page_table   (3<<29) /* using this page as an L3 page table? */
    8.65 -#define PGT_l4_page_table   (4<<29) /* using this page as an L4 page table? */
    8.66 -#define PGT_gdt_page        (5<<29) /* using this page in a GDT? */
    8.67 -#define PGT_ldt_page        (6<<29) /* using this page in an LDT? */
    8.68 -#define PGT_writeable_page  (7<<29) /* has writable mappings of this page? */
    8.69 -#define PGT_type_mask       (7<<29) /* Bits 29-31. */
    8.70 - /* Has this page been validated for use as its current type? */
    8.71 -#define _PGT_validated      28
    8.72 -#define PGT_validated       (1<<_PGT_validated)
    8.73 - /* 28-bit count of uses of this frame as its current type. */
    8.74 -#define PGT_count_mask      ((1<<28)-1)
    8.75 -
    8.76 - /* For safety, force a TLB flush when this page's type changes. */
    8.77 -#define _PGC_tlb_flush_on_type_change 31
    8.78 -#define PGC_tlb_flush_on_type_change  (1<<_PGC_tlb_flush_on_type_change)
    8.79 - /* Owning guest has pinned this page to its current type? */
    8.80 -#define _PGC_guest_pinned             30
    8.81 -#define PGC_guest_pinned              (1<<_PGC_guest_pinned)
    8.82 - /* Cleared when the owning guest 'frees' this page. */
    8.83 -#define _PGC_allocated                29
    8.84 -#define PGC_allocated                 (1<<_PGC_allocated)
    8.85 - /* 28-bit count of references to this frame. */
    8.86 -#define PGC_count_mask                ((1<<29)-1)
    8.87 -
    8.88 -
    8.89 -/* We trust the slab allocator in slab.c, and our use of it. */
    8.90 -#define PageSlab(page)		(1)
    8.91 -#define PageSetSlab(page)	((void)0)
    8.92 -#define PageClearSlab(page)	((void)0)
    8.93 -
    8.94 -#define IS_XEN_HEAP_FRAME(_pfn) (page_to_phys(_pfn) < xenheap_phys_end)
    8.95 -
    8.96 -#define SHARE_PFN_WITH_DOMAIN(_pfn, _dom)                                   \
    8.97 -    do {                                                                    \
    8.98 -        (_pfn)->u.domain = (_dom);                                          \
    8.99 -        /* The incremented type count is intended to pin to 'writeable'. */ \
   8.100 -        (_pfn)->type_and_flags  = PGT_writeable_page | PGT_validated | 1;   \
   8.101 -        wmb(); /* install valid domain ptr before updating refcnt. */       \
   8.102 -        spin_lock(&(_dom)->page_alloc_lock);                                \
   8.103 -        /* _dom holds an allocation reference */                            \
   8.104 -        (_pfn)->count_and_flags = PGC_allocated | 1;                        \
   8.105 -        if ( unlikely((_dom)->xenheap_pages++ == 0) )                       \
   8.106 -            get_domain(_dom);                                               \
   8.107 -        spin_unlock(&(_dom)->page_alloc_lock);                              \
   8.108 -    } while ( 0 )
   8.109 -
   8.110 -extern struct pfn_info *frame_table;
   8.111 -extern unsigned long frame_table_size;
   8.112 -extern struct list_head free_list;
   8.113 -extern spinlock_t free_list_lock;
   8.114 -extern unsigned int free_pfns;
   8.115 -extern unsigned long max_page;
   8.116 -void init_frametable(void *frametable_vstart, unsigned long nr_pages);
   8.117 -void add_to_domain_alloc_list(unsigned long ps, unsigned long pe);
   8.118 -
   8.119 -struct pfn_info *alloc_domain_page(struct domain *p);
   8.120 -void free_domain_page(struct pfn_info *page);
   8.121 -
   8.122 -int alloc_page_type(struct pfn_info *page, unsigned int type);
   8.123 -void free_page_type(struct pfn_info *page, unsigned int type);
   8.124 -
   8.125 -static inline void put_page(struct pfn_info *page)
   8.126 -{
   8.127 -    u32 nx, x, y = page->count_and_flags;
   8.128 -
   8.129 -    do {
   8.130 -        x  = y;
   8.131 -        nx = x - 1;
   8.132 -    }
   8.133 -    while ( unlikely((y = cmpxchg(&page->count_and_flags, x, nx)) != x) );
   8.134 -
   8.135 -    if ( unlikely((nx & PGC_count_mask) == 0) )
   8.136 -        free_domain_page(page);
   8.137 -}
   8.138 -
   8.139 -
   8.140 -static inline int get_page(struct pfn_info *page,
   8.141 -                           struct domain *domain)
   8.142 -{
   8.143 -    u32 x, nx, y = page->count_and_flags;
   8.144 -    struct domain *p, *np = page->u.domain;
   8.145 -
   8.146 -    do {
   8.147 -        x  = y;
   8.148 -        nx = x + 1;
   8.149 -        p  = np;
   8.150 -        if ( unlikely((x & PGC_count_mask) == 0) ||  /* Not allocated? */
   8.151 -             unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
   8.152 -             unlikely(p != domain) )                 /* Wrong owner? */
   8.153 -        {
   8.154 -            DPRINTK("Error pfn %08lx: ed=%p(%u), sd=%p(%u),"
   8.155 -                    " caf=%08x, taf=%08x\n",
   8.156 -                    page_to_pfn(page), domain, domain->domain,
   8.157 -                    p, (p && !((x & PGC_count_mask) == 0))?p->domain:999, 
   8.158 -                    x, page->type_and_flags);
   8.159 -            return 0;
   8.160 -        }
   8.161 -        __asm__ __volatile__(
   8.162 -            LOCK_PREFIX "cmpxchg8b %3"
   8.163 -            : "=a" (np), "=d" (y), "=b" (p),
   8.164 -              "=m" (*(volatile u64 *)(&page->u.domain))
   8.165 -            : "0" (p), "1" (x), "b" (p), "c" (nx) );
   8.166 -    }
   8.167 -    while ( unlikely(np != p) || unlikely(y != x) );
   8.168 -
   8.169 -    return 1;
   8.170 -}
   8.171 -
   8.172 -
   8.173 -static inline void put_page_type(struct pfn_info *page)
   8.174 -{
   8.175 -    u32 nx, x, y = page->type_and_flags;
   8.176 -
   8.177 - again:
   8.178 -    do {
   8.179 -        x  = y;
   8.180 -        nx = x - 1;
   8.181 -        if ( unlikely((nx & PGT_count_mask) == 0) )
   8.182 -        {
   8.183 -            page->tlbflush_timestamp = tlbflush_clock;
   8.184 -            if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
   8.185 -                 likely(nx & PGT_validated) )
   8.186 -            {
   8.187 -                /*
   8.188 -                 * Page-table pages must be unvalidated when count is zero. The
   8.189 -                 * 'free' is safe because the refcnt is non-zero and the
   8.190 -                 * validated bit is clear => other ops will spin or fail.
   8.191 -                 */
   8.192 -                if ( unlikely((y = cmpxchg(&page->type_and_flags, x, 
   8.193 -                                           x & ~PGT_validated)) != x) )
   8.194 -                    goto again;
   8.195 -                /* We cleared the 'valid bit' so we must do the clear up. */
   8.196 -                free_page_type(page, x & PGT_type_mask);
   8.197 -                /* Carry on as we were, but with the 'valid bit' now clear. */
   8.198 -                x  &= ~PGT_validated;
   8.199 -                nx &= ~PGT_validated;
   8.200 -            }
   8.201 -        }
   8.202 -    }
   8.203 -    while ( unlikely((y = cmpxchg(&page->type_and_flags, x, nx)) != x) );
   8.204 -}
   8.205 -
   8.206 -
   8.207 -static inline int get_page_type(struct pfn_info *page, u32 type)
   8.208 -{
   8.209 -    u32 nx, x, y = page->type_and_flags;
   8.210 - again:
   8.211 -    do {
   8.212 -        x  = y;
   8.213 -        nx = x + 1;
   8.214 -        if ( unlikely((nx & PGT_count_mask) == 0) )
   8.215 -        {
   8.216 -            DPRINTK("Type count overflow on pfn %08lx\n", page_to_pfn(page));
   8.217 -            return 0;
   8.218 -        }
   8.219 -        else if ( unlikely((x & PGT_count_mask) == 0) )
   8.220 -        {
   8.221 -            if ( (x & PGT_type_mask) != type )
   8.222 -            {
   8.223 -                nx &= ~(PGT_type_mask | PGT_validated);
   8.224 -                nx |= type;
   8.225 -                /* No extra validation needed for writeable pages. */
   8.226 -                if ( type == PGT_writeable_page )
   8.227 -                    nx |= PGT_validated;
   8.228 -            }
   8.229 -        }
   8.230 -        else if ( unlikely((x & PGT_type_mask) != type) )
   8.231 -        {
   8.232 -            DPRINTK("Unexpected type (saw %08x != exp %08x) for pfn %08lx\n",
   8.233 -                    x & PGT_type_mask, type, page_to_pfn(page));
   8.234 -            return 0;
   8.235 -        }
   8.236 -        else if ( unlikely(!(x & PGT_validated)) )
   8.237 -        {
   8.238 -            /* Someone else is updating validation of this page. Wait... */
   8.239 -            while ( (y = page->type_and_flags) != x )
   8.240 -            {
   8.241 -                rep_nop();
   8.242 -                barrier();
   8.243 -            }
   8.244 -            goto again;
   8.245 -        }
   8.246 -    }
   8.247 -    while ( unlikely((y = cmpxchg(&page->type_and_flags, x, nx)) != x) );
   8.248 -
   8.249 -    if ( unlikely(!(nx & PGT_validated)) )
   8.250 -    {
   8.251 -        /* Try to validate page type; drop the new reference on failure. */
   8.252 -        if ( unlikely(!alloc_page_type(page, type)) )
   8.253 -        {
   8.254 -            DPRINTK("Error while validating pfn %08lx for type %08x\n",
   8.255 -                    page_to_pfn(page), type);
   8.256 -            put_page_type(page);
   8.257 -            return 0;
   8.258 -        }
   8.259 -        set_bit(_PGT_validated, &page->type_and_flags);
   8.260 -    }
   8.261 -
   8.262 -    return 1;
   8.263 -}
   8.264 -
   8.265 -
   8.266 -static inline void put_page_and_type(struct pfn_info *page)
   8.267 -{
   8.268 -    put_page_type(page);
   8.269 -    put_page(page);
   8.270 -}
   8.271 -
   8.272 -
   8.273 -static inline int get_page_and_type(struct pfn_info *page,
   8.274 -                                    struct domain *domain,
   8.275 -                                    u32 type)
   8.276 -{
   8.277 -    int rc = get_page(page, domain);
   8.278 -
   8.279 -    if ( likely(rc) && unlikely(!get_page_type(page, type)) )
   8.280 -    {
   8.281 -        put_page(page);
   8.282 -        rc = 0;
   8.283 -    }
   8.284 -
   8.285 -    return rc;
   8.286 -}
   8.287 -
   8.288 -#define ASSERT_PAGE_IS_TYPE(_p, _t)                \
   8.289 -    ASSERT(((_p)->type_and_flags & PGT_type_mask) == (_t));  \
   8.290 -    ASSERT(((_p)->type_and_flags & PGT_count_mask) != 0)
   8.291 -#define ASSERT_PAGE_IS_DOMAIN(_p, _d)              \
   8.292 -    ASSERT(((_p)->count_and_flags & PGC_count_mask) != 0);  \
   8.293 -    ASSERT((_p)->u.domain == (_d))
   8.294 -
   8.295 -int check_descriptor(unsigned long a, unsigned long b);
   8.296 -
   8.297 -/*
   8.298 - * The MPT (machine->physical mapping table) is an array of word-sized
   8.299 - * values, indexed on machine frame number. It is expected that guest OSes
   8.300 - * will use it to store a "physical" frame number to give the appearance of
   8.301 - * contiguous (or near contiguous) physical memory.
   8.302 - */
   8.303 -#undef  machine_to_phys_mapping
   8.304 -#ifdef __x86_64__
   8.305 -extern unsigned long *machine_to_phys_mapping;
   8.306 -#else
   8.307 -#define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
   8.308 -#endif
   8.309 -
   8.310 -/* Part of the domain API. */
   8.311 -int do_mmu_update(mmu_update_t *updates, int count, int *success_count);
   8.312 -
   8.313 -#define DEFAULT_GDT_ENTRIES     ((LAST_RESERVED_GDT_ENTRY*8)+7)
   8.314 -#define DEFAULT_GDT_ADDRESS     ((unsigned long)gdt_table)
   8.315 -
   8.316 -#ifdef MEMORY_GUARD
   8.317 -void *memguard_init(void *heap_start);
   8.318 -void memguard_guard_range(void *p, unsigned long l);
   8.319 -void memguard_unguard_range(void *p, unsigned long l);
   8.320 -int memguard_is_guarded(void *p);
   8.321 -#else
   8.322 -#define memguard_init(_s)              (_s)
   8.323 -#define memguard_guard_range(_p,_l)    ((void)0)
   8.324 -#define memguard_unguard_range(_p,_l)  ((void)0)
   8.325 -#define memguard_is_guarded(_p)        (0)
   8.326 -#endif
   8.327 +#include <asm/mm.h>
   8.328  
   8.329  #endif /* __XEN_MM_H__ */
     9.1 --- a/xen/include/xen/sched.h	Wed Jul 07 18:56:39 2004 +0000
     9.2 +++ b/xen/include/xen/sched.h	Wed Jul 07 18:57:28 2004 +0000
     9.3 @@ -188,7 +188,6 @@ extern void domain_destruct(struct domai
     9.4  extern void domain_kill(struct domain *d);
     9.5  extern void domain_crash(void);
     9.6  extern void domain_shutdown(u8 reason);
     9.7 -extern void domain_relinquish_memory(struct domain *d);
     9.8  
     9.9  void new_thread(struct domain *d,
    9.10                  unsigned long start_pc,