ia64/xen-unstable

changeset 3935:3c5d6f364349

bitkeeper revision 1.1236.1.27 (421f706biBboh8DlmOttNIpUogeM6Q)

A few fixes, and DOM0 by default is now allocated all of memory at
boot time.
Signed-off-by: Keir Fraser <keir.fraser@cl.cam.ac.uk>
author kaf24@scramble.cl.cam.ac.uk
date Fri Feb 25 18:37:31 2005 +0000 (2005-02-25)
parents e93d8f53eabb
children da55822ba1b5 4ce1aebf725f
files .rootkeys xen/arch/x86/boot/x86_32.S xen/arch/x86/boot/x86_64.S xen/arch/x86/domain_build.c xen/arch/x86/setup.c xen/arch/x86/x86_32/domain_build.c xen/arch/x86/x86_32/domain_page.c xen/arch/x86/x86_64/domain_build.c xen/common/page_alloc.c xen/drivers/char/console.c xen/include/asm-x86/shadow.h xen/include/xen/sched.h
line diff
     1.1 --- a/.rootkeys	Fri Feb 25 17:06:27 2005 +0000
     1.2 +++ b/.rootkeys	Fri Feb 25 18:37:31 2005 +0000
     1.3 @@ -946,6 +946,7 @@ 4107c15e-VmEcLsE-7JCXZaabI8C7A xen/arch/
     1.4  3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen/arch/x86/delay.c
     1.5  40e34414WiQO4h2m3tcpaCPn7SyYyg xen/arch/x86/dom0_ops.c
     1.6  3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen/arch/x86/domain.c
     1.7 +4202391dkvdTZ8GhWXe3Gqf9EOgWXg xen/arch/x86/domain_build.c
     1.8  41d3eaae6GSDo3ZJDfK3nvQsJux-PQ xen/arch/x86/e820.c
     1.9  3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen/arch/x86/extable.c
    1.10  3fe443fdDDb0Sw6NQBCk4GQapayfTA xen/arch/x86/flushtlb.c
    1.11 @@ -984,7 +985,6 @@ 41f97ef5139vN42cOYHfX_Ac8WOOjA xen/arch/
    1.12  41c0c4128URE0dxcO15JME_MuKBPfg xen/arch/x86/vmx_vmcs.c
    1.13  419cbedeQDg8IrO3izo3o5rQNlo0kQ xen/arch/x86/x86_32/asm-offsets.c
    1.14  4107c15e_NqNYew2EXroXz2mgTAMWQ xen/arch/x86/x86_32/call_with_regs.S
    1.15 -4202391dkvdTZ8GhWXe3Gqf9EOgWXg xen/arch/x86/x86_32/domain_build.c
    1.16  3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/x86_32/domain_page.c
    1.17  3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/x86_32/entry.S
    1.18  3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/x86_32/mm.c
    1.19 @@ -993,7 +993,6 @@ 42000d3ckiFc1qxa4AWqsd0t3lxuyw xen/arch/
    1.20  3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/x86_32/usercopy.c
    1.21  3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/x86_32/xen.lds
    1.22  41bf1717Ty3hwN3E9swdu8QfnvGqww xen/arch/x86/x86_64/asm-offsets.c
    1.23 -4202391dA91ZovYX9d_5zJi9yGvLoQ xen/arch/x86/x86_64/domain_build.c
    1.24  40e96d3aLDI-nViMuYneD7VKYlZrVg xen/arch/x86/x86_64/entry.S
    1.25  41bf1717XhPz_dNT5OKSjgmbFuWBuA xen/arch/x86/x86_64/mm.c
    1.26  42000d3cMb8o1WuFBXC07c8i3lPZBw xen/arch/x86/x86_64/traps.c
     2.1 --- a/xen/arch/x86/boot/x86_32.S	Fri Feb 25 17:06:27 2005 +0000
     2.2 +++ b/xen/arch/x86/boot/x86_32.S	Fri Feb 25 18:37:31 2005 +0000
     2.3 @@ -15,9 +15,9 @@ ENTRY(start)
     2.4          /* Magic number indicating a Multiboot header. */
     2.5  	.long	0x1BADB002
     2.6  	/* Flags to bootloader (see Multiboot spec). */
     2.7 -	.long	0x00000002
     2.8 +	.long	0x00000003
     2.9  	/* Checksum: must be the negated sum of the first two fields. */
    2.10 -	.long	-0x1BADB004
    2.11 +	.long	-0x1BADB005
    2.12          
    2.13  bad_cpu_msg:
    2.14          .asciz "ERR: Not a P6-compatible CPU!"
     3.1 --- a/xen/arch/x86/boot/x86_64.S	Fri Feb 25 17:06:27 2005 +0000
     3.2 +++ b/xen/arch/x86/boot/x86_64.S	Fri Feb 25 18:37:31 2005 +0000
     3.3 @@ -16,9 +16,9 @@ ENTRY(start)
     3.4          /* Magic number indicating a Multiboot header. */
     3.5          .long   0x1BADB002
     3.6          /* Flags to bootloader (see Multiboot spec). */
     3.7 -        .long   0x00000002
     3.8 +        .long   0x00000003
     3.9          /* Checksum: must be the negated sum of the first two fields. */
    3.10 -        .long   -0x1BADB004
    3.11 +        .long   -0x1BADB005
    3.12  
    3.13          .org    0x010
    3.14          .asciz "ERR: Not a 64-bit CPU!"
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/xen/arch/x86/domain_build.c	Fri Feb 25 18:37:31 2005 +0000
     4.3 @@ -0,0 +1,543 @@
     4.4 +/******************************************************************************
     4.5 + * domain_build.c
     4.6 + * 
     4.7 + * Copyright (c) 2002-2005, K A Fraser
     4.8 + */
     4.9 +
    4.10 +#include <xen/config.h>
    4.11 +#include <xen/init.h>
    4.12 +#include <xen/lib.h>
    4.13 +#include <xen/sched.h>
    4.14 +#include <xen/smp.h>
    4.15 +#include <xen/delay.h>
    4.16 +#include <xen/event.h>
    4.17 +#include <xen/elf.h>
    4.18 +#include <xen/kernel.h>
    4.19 +#include <asm/regs.h>
    4.20 +#include <asm/system.h>
    4.21 +#include <asm/io.h>
    4.22 +#include <asm/processor.h>
    4.23 +#include <asm/desc.h>
    4.24 +#include <asm/i387.h>
    4.25 +#include <asm/shadow.h>
    4.26 +
    4.27 +/* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */
    4.28 +static unsigned int opt_dom0_mem = 0;
    4.29 +integer_param("dom0_mem", opt_dom0_mem);
    4.30 +
    4.31 +#if defined(__i386__)
    4.32 +/* No ring-3 access in initial leaf page tables. */
    4.33 +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
    4.34 +#elif defined(__x86_64__)
    4.35 +/* Allow ring-3 access in long mode as guest cannot use ring 1. */
    4.36 +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    4.37 +#endif
    4.38 +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    4.39 +#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    4.40 +#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    4.41 +
    4.42 +#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
    4.43 +#define round_pgdown(_p)  ((_p)&PAGE_MASK)
    4.44 +
    4.45 +static struct pfn_info *alloc_largest(struct domain *d, unsigned long max)
    4.46 +{
    4.47 +    struct pfn_info *page;
    4.48 +    unsigned int order = get_order(max * PAGE_SIZE);
    4.49 +    if ( (max & (max-1)) != 0 )
    4.50 +        order--;
    4.51 +    while ( (page = alloc_domheap_pages(d, order)) == NULL )
    4.52 +        if ( order-- == 0 )
    4.53 +            break;
    4.54 +    return page;
    4.55 +}
    4.56 +
    4.57 +int construct_dom0(struct domain *d,
    4.58 +                   unsigned long _image_start, unsigned long image_len, 
    4.59 +                   unsigned long _initrd_start, unsigned long initrd_len,
    4.60 +                   char *cmdline)
    4.61 +{
    4.62 +    char *dst;
    4.63 +    int i, rc;
    4.64 +    unsigned long pfn, mfn;
    4.65 +    unsigned long nr_pages;
    4.66 +    unsigned long nr_pt_pages;
    4.67 +    unsigned long alloc_start;
    4.68 +    unsigned long alloc_end;
    4.69 +    unsigned long count;
    4.70 +    struct pfn_info *page = NULL;
    4.71 +    start_info_t *si;
    4.72 +    struct exec_domain *ed = d->exec_domain[0];
    4.73 +#if defined(__i386__)
    4.74 +    char *image_start  = (char *)_image_start;  /* use lowmem mappings */
    4.75 +    char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
    4.76 +#elif defined(__x86_64__)
    4.77 +    char *image_start  = __va(_image_start);
    4.78 +    char *initrd_start = __va(_initrd_start);
    4.79 +    l4_pgentry_t *l4tab = NULL, *l4start = NULL;
    4.80 +    l3_pgentry_t *l3tab = NULL, *l3start = NULL;
    4.81 +#endif
    4.82 +    l2_pgentry_t *l2tab = NULL, *l2start = NULL;
    4.83 +    l1_pgentry_t *l1tab = NULL, *l1start = NULL;
    4.84 +
    4.85 +    /*
    4.86 +     * This fully describes the memory layout of the initial domain. All 
    4.87 +     * *_start address are page-aligned, except v_start (and v_end) which are 
    4.88 +     * superpage-aligned.
    4.89 +     */
    4.90 +    struct domain_setup_info dsi;
    4.91 +    unsigned long vinitrd_start;
    4.92 +    unsigned long vinitrd_end;
    4.93 +    unsigned long vphysmap_start;
    4.94 +    unsigned long vphysmap_end;
    4.95 +    unsigned long vstartinfo_start;
    4.96 +    unsigned long vstartinfo_end;
    4.97 +    unsigned long vstack_start;
    4.98 +    unsigned long vstack_end;
    4.99 +    unsigned long vpt_start;
   4.100 +    unsigned long vpt_end;
   4.101 +    unsigned long v_end;
   4.102 +
   4.103 +    /* Machine address of next candidate page-table page. */
   4.104 +    unsigned long mpt_alloc;
   4.105 +
   4.106 +    extern void physdev_init_dom0(struct domain *);
   4.107 +
   4.108 +    /* Sanity! */
   4.109 +    if ( d->id != 0 ) 
   4.110 +        BUG();
   4.111 +    if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 
   4.112 +        BUG();
   4.113 +
   4.114 +    memset(&dsi, 0, sizeof(struct domain_setup_info));
   4.115 +
   4.116 +    printk("*** LOADING DOMAIN 0 ***\n");
   4.117 +
   4.118 +    /* By default DOM0 is allocated all available memory. */
   4.119 +    if ( (nr_pages = opt_dom0_mem >> (PAGE_SHIFT - 10)) == 0 )
   4.120 +        nr_pages = avail_domheap_pages() +
   4.121 +            ((initrd_len + PAGE_SIZE - 1) >> PAGE_SHIFT) +
   4.122 +            ((image_len  + PAGE_SIZE - 1) >> PAGE_SHIFT);
   4.123 +    d->max_pages = nr_pages;
   4.124 +    if ( (page = alloc_largest(d, nr_pages)) == NULL )
   4.125 +        panic("Not enough RAM for DOM0 reservation.\n");
   4.126 +    alloc_start = page_to_phys(page);
   4.127 +    alloc_end   = alloc_start + (d->tot_pages << PAGE_SHIFT);
   4.128 +    
   4.129 +    rc = parseelfimage(image_start, image_len, &dsi);
   4.130 +    if ( rc != 0 )
   4.131 +        return rc;
   4.132 +
   4.133 +    /* Set up domain options */
   4.134 +    if ( dsi.use_writable_pagetables )
   4.135 +        vm_assist(d, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
   4.136 +
   4.137 +    /* Align load address to 4MB boundary. */
   4.138 +    dsi.v_start &= ~((1UL<<22)-1);
   4.139 +
   4.140 +    /*
   4.141 +     * Why do we need this? The number of page-table frames depends on the 
   4.142 +     * size of the bootstrap address space. But the size of the address space 
   4.143 +     * depends on the number of page-table frames (since each one is mapped 
   4.144 +     * read-only). We have a pair of simultaneous equations in two unknowns, 
   4.145 +     * which we solve by exhaustive search.
   4.146 +     */
   4.147 +    vinitrd_start    = round_pgup(dsi.v_kernend);
   4.148 +    vinitrd_end      = vinitrd_start + initrd_len;
   4.149 +    vphysmap_start   = round_pgup(vinitrd_end);
   4.150 +    vphysmap_end     = vphysmap_start + (nr_pages * sizeof(u32));
   4.151 +    vpt_start        = round_pgup(vphysmap_end);
   4.152 +    for ( nr_pt_pages = 2; ; nr_pt_pages++ )
   4.153 +    {
   4.154 +        vpt_end          = vpt_start + (nr_pt_pages * PAGE_SIZE);
   4.155 +        vstartinfo_start = vpt_end;
   4.156 +        vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
   4.157 +        vstack_start     = vstartinfo_end;
   4.158 +        vstack_end       = vstack_start + PAGE_SIZE;
   4.159 +        v_end            = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
   4.160 +        if ( (v_end - vstack_end) < (512UL << 10) )
   4.161 +            v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
   4.162 +#if defined(__i386__)
   4.163 +        if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> 
   4.164 +               L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
   4.165 +            break;
   4.166 +#elif defined(__x86_64__)
   4.167 +#define NR(_l,_h,_s) \
   4.168 +    (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
   4.169 +       ((_l) & ~((1UL<<(_s))-1))) >> (_s))
   4.170 +        if ( (1 + /* # L4 */
   4.171 +              NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
   4.172 +              NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
   4.173 +              NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT))  /* # L1 */
   4.174 +             <= nr_pt_pages )
   4.175 +            break;
   4.176 +#endif
   4.177 +    }
   4.178 +
   4.179 +    if ( (v_end - dsi.v_start) > (alloc_end - alloc_start) )
   4.180 +        panic("Insufficient contiguous RAM to build kernel image.\n");
   4.181 +
   4.182 +    printk("VIRTUAL MEMORY ARRANGEMENT:\n"
   4.183 +           " Loaded kernel: %p->%p\n"
   4.184 +           " Init. ramdisk: %p->%p\n"
   4.185 +           " Phys-Mach map: %p->%p\n"
   4.186 +           " Page tables:   %p->%p\n"
   4.187 +           " Start info:    %p->%p\n"
   4.188 +           " Boot stack:    %p->%p\n"
   4.189 +           " TOTAL:         %p->%p\n",
   4.190 +           dsi.v_kernstart, dsi.v_kernend, 
   4.191 +           vinitrd_start, vinitrd_end,
   4.192 +           vphysmap_start, vphysmap_end,
   4.193 +           vpt_start, vpt_end,
   4.194 +           vstartinfo_start, vstartinfo_end,
   4.195 +           vstack_start, vstack_end,
   4.196 +           dsi.v_start, v_end);
   4.197 +    printk(" ENTRY ADDRESS: %p\n", dsi.v_kernentry);
   4.198 +
   4.199 +    if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
   4.200 +    {
   4.201 +        printk("Initial guest OS requires too much space\n"
   4.202 +               "(%luMB is greater than %luMB limit)\n",
   4.203 +               (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
   4.204 +        return -ENOMEM;
   4.205 +    }
   4.206 +
   4.207 +    mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
   4.208 +
   4.209 +    SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES);
   4.210 +    SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS);
   4.211 +
   4.212 +    /*
   4.213 +     * We're basically forcing default RPLs to 1, so that our "what privilege
   4.214 +     * level are we returning to?" logic works.
   4.215 +     */
   4.216 +    ed->arch.failsafe_selector = FLAT_KERNEL_CS;
   4.217 +    ed->arch.event_selector    = FLAT_KERNEL_CS;
   4.218 +    ed->arch.kernel_ss = FLAT_KERNEL_SS;
   4.219 +    for ( i = 0; i < 256; i++ ) 
   4.220 +        ed->arch.traps[i].cs = FLAT_KERNEL_CS;
   4.221 +
   4.222 +#if defined(__i386__)
   4.223 +
   4.224 +    /*
   4.225 +     * Protect the lowest 1GB of memory. We use a temporary mapping there
   4.226 +     * from which we copy the kernel and ramdisk images.
   4.227 +     */
   4.228 +    if ( dsi.v_start < (1UL<<30) )
   4.229 +    {
   4.230 +        printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
   4.231 +        return -EINVAL;
   4.232 +    }
   4.233 +
   4.234 +    /* WARNING: The new domain must have its 'processor' field filled in! */
   4.235 +    l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
   4.236 +    memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
   4.237 +    l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
   4.238 +        mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
   4.239 +    l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
   4.240 +        mk_l2_pgentry(__pa(d->arch.mm_perdomain_pt) | __PAGE_HYPERVISOR);
   4.241 +    ed->arch.guest_table = mk_pagetable((unsigned long)l2start);
   4.242 +
   4.243 +    l2tab += l2_table_offset(dsi.v_start);
   4.244 +    mfn = alloc_start >> PAGE_SHIFT;
   4.245 +    for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
   4.246 +    {
   4.247 +        if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
   4.248 +        {
   4.249 +            l1start = l1tab = (l1_pgentry_t *)mpt_alloc; 
   4.250 +            mpt_alloc += PAGE_SIZE;
   4.251 +            *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
   4.252 +            clear_page(l1tab);
   4.253 +            if ( count == 0 )
   4.254 +                l1tab += l1_table_offset(dsi.v_start);
   4.255 +        }
   4.256 +        *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
   4.257 +        
   4.258 +        page = &frame_table[mfn];
   4.259 +        if ( !get_page_and_type(page, d, PGT_writable_page) )
   4.260 +            BUG();
   4.261 +
   4.262 +        mfn++;
   4.263 +    }
   4.264 +
   4.265 +    /* Pages that are part of page tables must be read only. */
   4.266 +    l2tab = l2start + l2_table_offset(vpt_start);
   4.267 +    l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
   4.268 +    l1tab += l1_table_offset(vpt_start);
   4.269 +    for ( count = 0; count < nr_pt_pages; count++ ) 
   4.270 +    {
   4.271 +        *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
   4.272 +        page = &frame_table[l1_pgentry_to_pfn(*l1tab)];
   4.273 +        if ( count == 0 )
   4.274 +        {
   4.275 +            page->u.inuse.type_info &= ~PGT_type_mask;
   4.276 +            page->u.inuse.type_info |= PGT_l2_page_table;
   4.277 +
   4.278 +            /*
   4.279 +             * No longer writable: decrement the type_count.
   4.280 +             * Installed as CR3: increment both the ref_count and type_count.
   4.281 +             * Net: just increment the ref_count.
   4.282 +             */
   4.283 +            get_page(page, d); /* an extra ref because of readable mapping */
   4.284 +
   4.285 +            /* Get another ref to L2 page so that it can be pinned. */
   4.286 +            if ( !get_page_and_type(page, d, PGT_l2_page_table) )
   4.287 +                BUG();
   4.288 +            set_bit(_PGT_pinned, &page->u.inuse.type_info);
   4.289 +        }
   4.290 +        else
   4.291 +        {
   4.292 +            page->u.inuse.type_info &= ~PGT_type_mask;
   4.293 +            page->u.inuse.type_info |= PGT_l1_page_table;
   4.294 +            page->u.inuse.type_info |= 
   4.295 +                ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift;
   4.296 +
   4.297 +            /*
   4.298 +             * No longer writable: decrement the type_count.
   4.299 +             * This is an L1 page, installed in a validated L2 page:
   4.300 +             * increment both the ref_count and type_count.
   4.301 +             * Net: just increment the ref_count.
   4.302 +             */
   4.303 +            get_page(page, d); /* an extra ref because of readable mapping */
   4.304 +        }
   4.305 +        if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
   4.306 +            l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*++l2tab);
   4.307 +    }
   4.308 +
   4.309 +#elif defined(__x86_64__)
   4.310 +
   4.311 +    /* Overlap with Xen protected area? */
   4.312 +    if ( (dsi.v_start < HYPERVISOR_VIRT_END) &&
   4.313 +         (v_end > HYPERVISOR_VIRT_START) )
   4.314 +    {
   4.315 +        printk("DOM0 image overlaps with Xen private area.\n");
   4.316 +        return -EINVAL;
   4.317 +    }
   4.318 +
   4.319 +    /* WARNING: The new domain must have its 'processor' field filled in! */
   4.320 +    phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
   4.321 +    l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
   4.322 +    memcpy(l4tab, &idle_pg_table[0], PAGE_SIZE);
   4.323 +    l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
   4.324 +        mk_l4_pgentry(__pa(l4start) | __PAGE_HYPERVISOR);
   4.325 +    l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
   4.326 +        mk_l4_pgentry(__pa(d->arch.mm_perdomain_l3) | __PAGE_HYPERVISOR);
   4.327 +    ed->arch.guest_table = mk_pagetable(__pa(l4start));
   4.328 +
   4.329 +    l4tab += l4_table_offset(dsi.v_start);
   4.330 +    mfn = alloc_start >> PAGE_SHIFT;
   4.331 +    for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
   4.332 +    {
   4.333 +        if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
   4.334 +        {
   4.335 +            phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l1_page_table;
   4.336 +            l1start = l1tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
   4.337 +            clear_page(l1tab);
   4.338 +            if ( count == 0 )
   4.339 +                l1tab += l1_table_offset(dsi.v_start);
   4.340 +            if ( !((unsigned long)l2tab & (PAGE_SIZE-1)) )
   4.341 +            {
   4.342 +                phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table;
   4.343 +                l2start = l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
   4.344 +                clear_page(l2tab);
   4.345 +                if ( count == 0 )
   4.346 +                    l2tab += l2_table_offset(dsi.v_start);
   4.347 +                if ( !((unsigned long)l3tab & (PAGE_SIZE-1)) )
   4.348 +                {
   4.349 +                    phys_to_page(mpt_alloc)->u.inuse.type_info =
   4.350 +                        PGT_l3_page_table;
   4.351 +                    l3start = l3tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
   4.352 +                    clear_page(l3tab);
   4.353 +                    if ( count == 0 )
   4.354 +                        l3tab += l3_table_offset(dsi.v_start);
   4.355 +                    *l4tab++ = mk_l4_pgentry(__pa(l3start) | L4_PROT);
   4.356 +                }
   4.357 +                *l3tab++ = mk_l3_pgentry(__pa(l2start) | L3_PROT);
   4.358 +            }
   4.359 +            *l2tab++ = mk_l2_pgentry(__pa(l1start) | L2_PROT);
   4.360 +        }
   4.361 +        *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
   4.362 +
   4.363 +        page = &frame_table[mfn];
   4.364 +        if ( (page->u.inuse.type_info == 0) &&
   4.365 +             !get_page_and_type(page, d, PGT_writable_page) )
   4.366 +            BUG();
   4.367 +
   4.368 +        mfn++;
   4.369 +    }
   4.370 +
   4.371 +    /* Pages that are part of page tables must be read only. */
   4.372 +    l4tab = l4start + l4_table_offset(vpt_start);
   4.373 +    l3start = l3tab = l4_pgentry_to_l3(*l4tab);
   4.374 +    l3tab += l3_table_offset(vpt_start);
   4.375 +    l2start = l2tab = l3_pgentry_to_l2(*l3tab);
   4.376 +    l2tab += l2_table_offset(vpt_start);
   4.377 +    l1start = l1tab = l2_pgentry_to_l1(*l2tab);
   4.378 +    l1tab += l1_table_offset(vpt_start);
   4.379 +    for ( count = 0; count < nr_pt_pages; count++ ) 
   4.380 +    {
   4.381 +        *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
   4.382 +        page = &frame_table[l1_pgentry_to_pfn(*l1tab)];
   4.383 +
   4.384 +        /* Read-only mapping + PGC_allocated + page-table page. */
   4.385 +        page->count_info         = PGC_allocated | 3;
   4.386 +        page->u.inuse.type_info |= PGT_validated | 1;
   4.387 +
   4.388 +        /* Top-level p.t. is pinned. */
   4.389 +        if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table )
   4.390 +        {
   4.391 +            page->count_info        += 1;
   4.392 +            page->u.inuse.type_info += 1 | PGT_pinned;
   4.393 +        }
   4.394 +
   4.395 +        /* Iterate. */
   4.396 +        if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
   4.397 +        {
   4.398 +            if ( !((unsigned long)++l2tab & (PAGE_SIZE - 1)) )
   4.399 +            {
   4.400 +                if ( !((unsigned long)++l3tab & (PAGE_SIZE - 1)) )
   4.401 +                    l3start = l3tab = l4_pgentry_to_l3(*++l4tab); 
   4.402 +                l2start = l2tab = l3_pgentry_to_l2(*l3tab);
   4.403 +            }
   4.404 +            l1start = l1tab = l2_pgentry_to_l1(*l2tab);
   4.405 +        }
   4.406 +    }
   4.407 +
   4.408 +#endif /* __x86_64__ */
   4.409 +
   4.410 +    /* Set up shared-info area. */
   4.411 +    update_dom_time(d);
   4.412 +    d->shared_info->domain_time = 0;
   4.413 +    /* Mask all upcalls... */
   4.414 +    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
   4.415 +        d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
   4.416 +    d->shared_info->n_vcpu = smp_num_cpus;
   4.417 +
   4.418 +    /* Set up shadow and monitor tables. */
   4.419 +    update_pagetables(ed);
   4.420 +
   4.421 +    /* Install the new page tables. */
   4.422 +    __cli();
   4.423 +    write_ptbase(ed);
   4.424 +
   4.425 +    /* Copy the OS image and free temporary buffer. */
   4.426 +    (void)loadelfimage(image_start);
   4.427 +    init_domheap_pages(
   4.428 +        _image_start, (_image_start+image_len+PAGE_SIZE-1) & PAGE_MASK);
   4.429 +
   4.430 +    /* Copy the initial ramdisk and free temporary buffer. */
   4.431 +    if ( initrd_len != 0 )
   4.432 +    {
   4.433 +        memcpy((void *)vinitrd_start, initrd_start, initrd_len);
   4.434 +        init_domheap_pages(
   4.435 +            _initrd_start, (_initrd_start+initrd_len+PAGE_SIZE-1) & PAGE_MASK);
   4.436 +    }
   4.437 +    
   4.438 +    /* Set up start info area. */
   4.439 +    si = (start_info_t *)vstartinfo_start;
   4.440 +    memset(si, 0, PAGE_SIZE);
   4.441 +    si->nr_pages     = nr_pages;
   4.442 +    si->shared_info  = virt_to_phys(d->shared_info);
   4.443 +    si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
   4.444 +    si->pt_base      = vpt_start;
   4.445 +    si->nr_pt_frames = nr_pt_pages;
   4.446 +    si->mfn_list     = vphysmap_start;
   4.447 +
   4.448 +    /* Write the phys->machine and machine->phys table entries. */
   4.449 +    for ( pfn = 0; pfn < d->tot_pages; pfn++ )
   4.450 +    {
   4.451 +        mfn = pfn + (alloc_start>>PAGE_SHIFT);
   4.452 +#ifndef NDEBUG
   4.453 +#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
   4.454 +        if ( pfn > REVERSE_START )
   4.455 +            mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
   4.456 +#endif
   4.457 +        ((u32 *)vphysmap_start)[pfn] = mfn;
   4.458 +        machine_to_phys_mapping[mfn] = pfn;
   4.459 +    }
   4.460 +    while ( pfn < nr_pages )
   4.461 +    {
   4.462 +        if ( (page = alloc_largest(d, nr_pages - d->tot_pages)) == NULL )
   4.463 +            panic("Not enough RAM for DOM0 reservation.\n");
   4.464 +        while ( pfn < d->tot_pages )
   4.465 +        {
   4.466 +            mfn = page_to_pfn(page);
   4.467 +#ifndef NDEBUG
   4.468 +#define pfn (nr_pages - 1 - (pfn - ((alloc_end - alloc_start) >> PAGE_SHIFT)))
   4.469 +#endif
   4.470 +            ((u32 *)vphysmap_start)[pfn] = mfn;
   4.471 +            machine_to_phys_mapping[mfn] = pfn;
   4.472 +#undef pfn
   4.473 +            page++; pfn++;
   4.474 +        }
   4.475 +    }
   4.476 +
   4.477 +    if ( initrd_len != 0 )
   4.478 +    {
   4.479 +        si->mod_start = vinitrd_start;
   4.480 +        si->mod_len   = initrd_len;
   4.481 +        printk("Initrd len 0x%lx, start at 0x%p\n",
   4.482 +               si->mod_len, si->mod_start);
   4.483 +    }
   4.484 +
   4.485 +    dst = si->cmd_line;
   4.486 +    if ( cmdline != NULL )
   4.487 +    {
   4.488 +        for ( i = 0; i < 255; i++ )
   4.489 +        {
   4.490 +            if ( cmdline[i] == '\0' )
   4.491 +                break;
   4.492 +            *dst++ = cmdline[i];
   4.493 +        }
   4.494 +    }
   4.495 +    *dst = '\0';
   4.496 +
   4.497 +    /* Reinstate the caller's page tables. */
   4.498 +    write_ptbase(current);
   4.499 +    __sti();
   4.500 +
   4.501 +#if defined(__i386__)
   4.502 +    /* Destroy low mappings - they were only for our convenience. */
   4.503 +    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
   4.504 +        if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
   4.505 +            l2start[i] = mk_l2_pgentry(0);
   4.506 +    zap_low_mappings(); /* Do the same for the idle page tables. */
   4.507 +#endif
   4.508 +    
   4.509 +    /* DOM0 gets access to everything. */
   4.510 +    physdev_init_dom0(d);
   4.511 +
   4.512 +    set_bit(DF_CONSTRUCTED, &d->d_flags);
   4.513 +
   4.514 +    new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
   4.515 +
   4.516 +    return 0;
   4.517 +}
   4.518 +
   4.519 +int elf_sanity_check(Elf_Ehdr *ehdr)
   4.520 +{
   4.521 +    if ( !IS_ELF(*ehdr) ||
   4.522 +#if defined(__i386__)
   4.523 +         (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
   4.524 +         (ehdr->e_machine != EM_386) ||
   4.525 +#elif defined(__x86_64__)
   4.526 +         (ehdr->e_ident[EI_CLASS] != ELFCLASS64) ||
   4.527 +         (ehdr->e_machine != EM_X86_64) ||
   4.528 +#endif
   4.529 +         (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
   4.530 +         (ehdr->e_type != ET_EXEC) )
   4.531 +    {
   4.532 +        printk("DOM0 image is not a Xen-compatible Elf image.\n");
   4.533 +        return 0;
   4.534 +    }
   4.535 +
   4.536 +    return 1;
   4.537 +}
   4.538 +
   4.539 +/*
   4.540 + * Local variables:
   4.541 + * mode: C
   4.542 + * c-set-style: "BSD"
   4.543 + * c-basic-offset: 4
   4.544 + * tab-width: 4
   4.545 + * indent-tabs-mode: nil
   4.546 + */
     5.1 --- a/xen/arch/x86/setup.c	Fri Feb 25 17:06:27 2005 +0000
     5.2 +++ b/xen/arch/x86/setup.c	Fri Feb 25 18:37:31 2005 +0000
     5.3 @@ -20,10 +20,6 @@
     5.4  #include <asm/shadow.h>
     5.5  #include <asm/e820.h>
     5.6  
     5.7 -/* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */
     5.8 -static unsigned int opt_dom0_mem = 16000;
     5.9 -integer_param("dom0_mem", opt_dom0_mem);
    5.10 -
    5.11  /*
    5.12   * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
    5.13   * pfn_info table and allocation bitmap.
    5.14 @@ -463,7 +459,6 @@ void __init __start_xen(multiboot_info_t
    5.15      module_t *mod = (module_t *)__va(mbi->mods_addr);
    5.16      void *heap_start;
    5.17      unsigned long firsthole_start, nr_pages;
    5.18 -    unsigned long dom0_memory_start, dom0_memory_end;
    5.19      unsigned long initial_images_start, initial_images_end;
    5.20      struct e820entry e820_raw[E820MAX];
    5.21      int i, e820_raw_nr = 0, bytes = 0;
    5.22 @@ -567,15 +562,6 @@ void __init __start_xen(multiboot_info_t
    5.23             nr_pages >> (20 - PAGE_SHIFT),
    5.24             nr_pages << (PAGE_SHIFT - 10));
    5.25  
    5.26 -    /* Allocate an aligned chunk of RAM for DOM0. */
    5.27 -    dom0_memory_start = alloc_boot_pages(opt_dom0_mem << 10, 4UL << 20);
    5.28 -    dom0_memory_end   = dom0_memory_start + (opt_dom0_mem << 10);
    5.29 -    if ( dom0_memory_start == 0 )
    5.30 -    {
    5.31 -        printk("Not enough memory for DOM0 memory reservation.\n");
    5.32 -        for ( ; ; ) ;
    5.33 -    }
    5.34 -
    5.35      init_frametable();
    5.36  
    5.37      end_boot_allocator();
    5.38 @@ -613,7 +599,7 @@ void __init __start_xen(multiboot_info_t
    5.39       * We're going to setup domain0 using the module(s) that we stashed safely
    5.40       * above our heap. The second module, if present, is an initrd ramdisk.
    5.41       */
    5.42 -    if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end,
    5.43 +    if ( construct_dom0(dom0,
    5.44                          initial_images_start, 
    5.45                          mod[0].mod_end-mod[0].mod_start,
    5.46                          (mbi->mods_count == 1) ? 0 :
    5.47 @@ -624,9 +610,7 @@ void __init __start_xen(multiboot_info_t
    5.48                          cmdline) != 0)
    5.49          panic("Could not set up DOM0 guest OS\n");
    5.50  
    5.51 -    /* The stash space for the initial kernel image can now be freed up. */
    5.52 -    init_domheap_pages(initial_images_start, initial_images_end);
    5.53 -
    5.54 +    /* Scrub RAM that is still free and so may go to an unprivileged domain. */
    5.55      scrub_heap_pages();
    5.56  
    5.57      init_trace_bufs();
     6.1 --- a/xen/arch/x86/x86_32/domain_build.c	Fri Feb 25 17:06:27 2005 +0000
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,416 +0,0 @@
     6.4 -/******************************************************************************
     6.5 - * domain_build.c
     6.6 - * 
     6.7 - * Copyright (c) 2002-2005, K A Fraser
     6.8 - */
     6.9 -
    6.10 -#include <xen/config.h>
    6.11 -#include <xen/init.h>
    6.12 -#include <xen/lib.h>
    6.13 -#include <xen/sched.h>
    6.14 -#include <xen/smp.h>
    6.15 -#include <xen/delay.h>
    6.16 -#include <asm/regs.h>
    6.17 -#include <asm/system.h>
    6.18 -#include <asm/io.h>
    6.19 -#include <asm/processor.h>
    6.20 -#include <asm/desc.h>
    6.21 -#include <asm/i387.h>
    6.22 -#include <xen/event.h>
    6.23 -#include <xen/elf.h>
    6.24 -#include <xen/kernel.h>
    6.25 -#include <asm/shadow.h>
    6.26 -
    6.27 -/* No ring-3 access in initial page tables. */
    6.28 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
    6.29 -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    6.30 -
    6.31 -#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
    6.32 -#define round_pgdown(_p)  ((_p)&PAGE_MASK)
    6.33 -
    6.34 -int construct_dom0(struct domain *d,
    6.35 -                   unsigned long alloc_start,
    6.36 -                   unsigned long alloc_end,
    6.37 -                   unsigned long _image_start, unsigned long image_len, 
    6.38 -                   unsigned long _initrd_start, unsigned long initrd_len,
    6.39 -                   char *cmdline)
    6.40 -{
    6.41 -    char *dst;
    6.42 -    int i, rc;
    6.43 -    unsigned long pfn, mfn;
    6.44 -    unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT;
    6.45 -    unsigned long nr_pt_pages;
    6.46 -    unsigned long count;
    6.47 -    l2_pgentry_t *l2tab, *l2start;
    6.48 -    l1_pgentry_t *l1tab = NULL, *l1start = NULL;
    6.49 -    struct pfn_info *page = NULL;
    6.50 -    start_info_t *si;
    6.51 -    struct exec_domain *ed = d->exec_domain[0];
    6.52 -    char *image_start  = (char *)_image_start;  /* use lowmem mappings */
    6.53 -    char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
    6.54 -
    6.55 -    /*
    6.56 -     * This fully describes the memory layout of the initial domain. All 
    6.57 -     * *_start address are page-aligned, except v_start (and v_end) which are 
    6.58 -     * superpage-aligned.
    6.59 -     */
    6.60 -    struct domain_setup_info dsi;
    6.61 -    unsigned long vinitrd_start;
    6.62 -    unsigned long vinitrd_end;
    6.63 -    unsigned long vphysmap_start;
    6.64 -    unsigned long vphysmap_end;
    6.65 -    unsigned long vstartinfo_start;
    6.66 -    unsigned long vstartinfo_end;
    6.67 -    unsigned long vstack_start;
    6.68 -    unsigned long vstack_end;
    6.69 -    unsigned long vpt_start;
    6.70 -    unsigned long vpt_end;
    6.71 -    unsigned long v_end;
    6.72 -
    6.73 -    /* Machine address of next candidate page-table page. */
    6.74 -    unsigned long mpt_alloc;
    6.75 -
    6.76 -    extern void physdev_init_dom0(struct domain *);
    6.77 -
    6.78 -    /* Sanity! */
    6.79 -    if ( d->id != 0 ) 
    6.80 -        BUG();
    6.81 -    if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 
    6.82 -        BUG();
    6.83 -
    6.84 -    memset(&dsi, 0, sizeof(struct domain_setup_info));
    6.85 -
    6.86 -    printk("*** LOADING DOMAIN 0 ***\n");
    6.87 -
    6.88 -    /*
    6.89 -     * This is all a bit grim. We've moved the modules to the "safe" physical 
    6.90 -     * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this 
    6.91 -     * routine we're going to copy it down into the region that's actually 
    6.92 -     * been allocated to domain 0. This is highly likely to be overlapping, so 
    6.93 -     * we use a forward copy.
    6.94 -     * 
    6.95 -     * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with 
    6.96 -     * 4GB and lots of network/disk cards that allocate loads of buffers. 
    6.97 -     * We'll have to revisit this if we ever support PAE (64GB).
    6.98 -     */
    6.99 -
   6.100 -    rc = parseelfimage(image_start, image_len, &dsi);
   6.101 -    if ( rc != 0 )
   6.102 -        return rc;
   6.103 -
   6.104 -    /* Set up domain options */
   6.105 -    if ( dsi.use_writable_pagetables )
   6.106 -        vm_assist(d, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
   6.107 -
   6.108 -    /* Align load address to 4MB boundary. */
   6.109 -    dsi.v_start &= ~((1UL<<22)-1);
   6.110 -
   6.111 -    /*
   6.112 -     * Why do we need this? The number of page-table frames depends on the 
   6.113 -     * size of the bootstrap address space. But the size of the address space 
   6.114 -     * depends on the number of page-table frames (since each one is mapped 
   6.115 -     * read-only). We have a pair of simultaneous equations in two unknowns, 
   6.116 -     * which we solve by exhaustive search.
   6.117 -     */
   6.118 -    vinitrd_start    = round_pgup(dsi.v_kernend);
   6.119 -    vinitrd_end      = vinitrd_start + initrd_len;
   6.120 -    vphysmap_start   = round_pgup(vinitrd_end);
   6.121 -    vphysmap_end     = vphysmap_start + (nr_pages * sizeof(u32));
   6.122 -    vpt_start        = round_pgup(vphysmap_end);
   6.123 -    for ( nr_pt_pages = 2; ; nr_pt_pages++ )
   6.124 -    {
   6.125 -        vpt_end          = vpt_start + (nr_pt_pages * PAGE_SIZE);
   6.126 -        vstartinfo_start = vpt_end;
   6.127 -        vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
   6.128 -        vstack_start     = vstartinfo_end;
   6.129 -        vstack_end       = vstack_start + PAGE_SIZE;
   6.130 -        v_end            = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
   6.131 -        if ( (v_end - vstack_end) < (512UL << 10) )
   6.132 -            v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
   6.133 -        if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> 
   6.134 -               L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
   6.135 -            break;
   6.136 -    }
   6.137 -
   6.138 -    printk("PHYSICAL MEMORY ARRANGEMENT:\n"
   6.139 -           " Kernel image:  %p->%p\n"
   6.140 -           " Initrd image:  %p->%p\n"
   6.141 -           " Dom0 alloc.:   %p->%p\n",
   6.142 -           _image_start, _image_start + image_len,
   6.143 -           _initrd_start, _initrd_start + initrd_len,
   6.144 -           alloc_start, alloc_end);
   6.145 -    printk("VIRTUAL MEMORY ARRANGEMENT:\n"
   6.146 -           " Loaded kernel: %p->%p\n"
   6.147 -           " Init. ramdisk: %p->%p\n"
   6.148 -           " Phys-Mach map: %p->%p\n"
   6.149 -           " Page tables:   %p->%p\n"
   6.150 -           " Start info:    %p->%p\n"
   6.151 -           " Boot stack:    %p->%p\n"
   6.152 -           " TOTAL:         %p->%p\n",
   6.153 -           dsi.v_kernstart, dsi.v_kernend, 
   6.154 -           vinitrd_start, vinitrd_end,
   6.155 -           vphysmap_start, vphysmap_end,
   6.156 -           vpt_start, vpt_end,
   6.157 -           vstartinfo_start, vstartinfo_end,
   6.158 -           vstack_start, vstack_end,
   6.159 -           dsi.v_start, v_end);
   6.160 -    printk(" ENTRY ADDRESS: %p\n", dsi.v_kernentry);
   6.161 -
   6.162 -    if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
   6.163 -    {
   6.164 -        printk("Initial guest OS requires too much space\n"
   6.165 -               "(%luMB is greater than %luMB limit)\n",
   6.166 -               (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
   6.167 -        return -ENOMEM;
   6.168 -    }
   6.169 -
   6.170 -    /*
   6.171 -     * Protect the lowest 1GB of memory. We use a temporary mapping there
   6.172 -     * from which we copy the kernel and ramdisk images.
   6.173 -     */
   6.174 -    if ( dsi.v_start < (1UL<<30) )
   6.175 -    {
   6.176 -        printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
   6.177 -        return -EINVAL;
   6.178 -    }
   6.179 -
   6.180 -    /* Paranoia: scrub DOM0's memory allocation. */
   6.181 -    printk("Scrubbing DOM0 RAM: ");
   6.182 -    dst = (char *)alloc_start;
   6.183 -    while ( dst < (char *)alloc_end )
   6.184 -    {
   6.185 -#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */
   6.186 -        printk(".");
   6.187 -        touch_nmi_watchdog();
   6.188 -        if ( ((char *)alloc_end - dst) > SCRUB_BYTES )
   6.189 -        {
   6.190 -            memset(dst, 0, SCRUB_BYTES);
   6.191 -            dst += SCRUB_BYTES;
   6.192 -        }
   6.193 -        else
   6.194 -        {
   6.195 -            memset(dst, 0, (char *)alloc_end - dst);
   6.196 -            break;
   6.197 -        }
   6.198 -    }
   6.199 -    printk("done.\n");
   6.200 -
   6.201 -    /* Construct a frame-allocation list for the initial domain. */
   6.202 -    for ( mfn = (alloc_start>>PAGE_SHIFT); 
   6.203 -          mfn < (alloc_end>>PAGE_SHIFT); 
   6.204 -          mfn++ )
   6.205 -    {
   6.206 -        page = &frame_table[mfn];
   6.207 -        page_set_owner(page, d);
   6.208 -        page->u.inuse.type_info = 0;
   6.209 -        page->count_info        = PGC_allocated | 1;
   6.210 -        list_add_tail(&page->list, &d->page_list);
   6.211 -        d->tot_pages++; d->max_pages++;
   6.212 -    }
   6.213 -
   6.214 -    mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
   6.215 -
   6.216 -    SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES);
   6.217 -    SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS);
   6.218 -
   6.219 -    /*
   6.220 -     * We're basically forcing default RPLs to 1, so that our "what privilege
   6.221 -     * level are we returning to?" logic works.
   6.222 -     */
   6.223 -    ed->arch.failsafe_selector = FLAT_KERNEL_CS;
   6.224 -    ed->arch.event_selector    = FLAT_KERNEL_CS;
   6.225 -    ed->arch.kernel_ss = FLAT_KERNEL_SS;
   6.226 -    for ( i = 0; i < 256; i++ ) 
   6.227 -        ed->arch.traps[i].cs = FLAT_KERNEL_CS;
   6.228 -
   6.229 -    /* WARNING: The new domain must have its 'processor' field filled in! */
   6.230 -    l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
   6.231 -    memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
   6.232 -    l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
   6.233 -        mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
   6.234 -    l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
   6.235 -        mk_l2_pgentry(__pa(d->arch.mm_perdomain_pt) | __PAGE_HYPERVISOR);
   6.236 -    ed->arch.guest_table = mk_pagetable((unsigned long)l2start);
   6.237 -
   6.238 -    l2tab += l2_table_offset(dsi.v_start);
   6.239 -    mfn = alloc_start >> PAGE_SHIFT;
   6.240 -    for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
   6.241 -    {
   6.242 -        if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
   6.243 -        {
   6.244 -            l1start = l1tab = (l1_pgentry_t *)mpt_alloc; 
   6.245 -            mpt_alloc += PAGE_SIZE;
   6.246 -            *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
   6.247 -            clear_page(l1tab);
   6.248 -            if ( count == 0 )
   6.249 -                l1tab += l1_table_offset(dsi.v_start);
   6.250 -        }
   6.251 -        *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
   6.252 -        
   6.253 -        page = &frame_table[mfn];
   6.254 -        if ( !get_page_and_type(page, d, PGT_writable_page) )
   6.255 -            BUG();
   6.256 -
   6.257 -        mfn++;
   6.258 -    }
   6.259 -
   6.260 -    /* Pages that are part of page tables must be read only. */
   6.261 -    l2tab = l2start + l2_table_offset(vpt_start);
   6.262 -    l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
   6.263 -    l1tab += l1_table_offset(vpt_start);
   6.264 -    for ( count = 0; count < nr_pt_pages; count++ ) 
   6.265 -    {
   6.266 -        *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
   6.267 -        page = &frame_table[l1_pgentry_to_pfn(*l1tab)];
   6.268 -        if ( count == 0 )
   6.269 -        {
   6.270 -            page->u.inuse.type_info &= ~PGT_type_mask;
   6.271 -            page->u.inuse.type_info |= PGT_l2_page_table;
   6.272 -
   6.273 -            /*
   6.274 -             * No longer writable: decrement the type_count.
   6.275 -             * Installed as CR3: increment both the ref_count and type_count.
   6.276 -             * Net: just increment the ref_count.
   6.277 -             */
   6.278 -            get_page(page, d); /* an extra ref because of readable mapping */
   6.279 -
   6.280 -            /* Get another ref to L2 page so that it can be pinned. */
   6.281 -            if ( !get_page_and_type(page, d, PGT_l2_page_table) )
   6.282 -                BUG();
   6.283 -            set_bit(_PGT_pinned, &page->u.inuse.type_info);
   6.284 -        }
   6.285 -        else
   6.286 -        {
   6.287 -            page->u.inuse.type_info &= ~PGT_type_mask;
   6.288 -            page->u.inuse.type_info |= PGT_l1_page_table;
   6.289 -	    page->u.inuse.type_info |= 
   6.290 -		((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift;
   6.291 -
   6.292 -            /*
   6.293 -             * No longer writable: decrement the type_count.
   6.294 -             * This is an L1 page, installed in a validated L2 page:
   6.295 -             * increment both the ref_count and type_count.
   6.296 -             * Net: just increment the ref_count.
   6.297 -             */
   6.298 -            get_page(page, d); /* an extra ref because of readable mapping */
   6.299 -        }
   6.300 -        if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
   6.301 -            l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*++l2tab);
   6.302 -    }
   6.303 -
   6.304 -    /* Set up shared-info area. */
   6.305 -    update_dom_time(d);
   6.306 -    d->shared_info->domain_time = 0;
   6.307 -    /* Mask all upcalls... */
   6.308 -    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
   6.309 -        d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
   6.310 -    d->shared_info->n_vcpu = smp_num_cpus;
   6.311 -
   6.312 -    /* setup shadow and monitor tables */
   6.313 -    update_pagetables(ed);
   6.314 -
   6.315 -    /* Install the new page tables. */
   6.316 -    __cli();
   6.317 -    write_ptbase(ed);
   6.318 -
   6.319 -    /* Copy the OS image. */
   6.320 -    (void)loadelfimage(image_start);
   6.321 -
   6.322 -    /* Copy the initial ramdisk. */
   6.323 -    if ( initrd_len != 0 )
   6.324 -        memcpy((void *)vinitrd_start, initrd_start, initrd_len);
   6.325 -    
   6.326 -    /* Set up start info area. */
   6.327 -    si = (start_info_t *)vstartinfo_start;
   6.328 -    memset(si, 0, PAGE_SIZE);
   6.329 -    si->nr_pages     = d->tot_pages;
   6.330 -    si->shared_info  = virt_to_phys(d->shared_info);
   6.331 -    si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
   6.332 -    si->pt_base      = vpt_start;
   6.333 -    si->nr_pt_frames = nr_pt_pages;
   6.334 -    si->mfn_list     = vphysmap_start;
   6.335 -
   6.336 -    /* Write the phys->machine and machine->phys table entries. */
   6.337 -    for ( pfn = 0; pfn < d->tot_pages; pfn++ )
   6.338 -    {
   6.339 -        mfn = pfn + (alloc_start>>PAGE_SHIFT);
   6.340 -#ifndef NDEBUG
   6.341 -#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
   6.342 -        if ( pfn > REVERSE_START )
   6.343 -            mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
   6.344 -#endif
   6.345 -        ((u32 *)vphysmap_start)[pfn] = mfn;
   6.346 -        machine_to_phys_mapping[mfn] = pfn;
   6.347 -    }
   6.348 -
   6.349 -    if ( initrd_len != 0 )
   6.350 -    {
   6.351 -        si->mod_start = vinitrd_start;
   6.352 -        si->mod_len   = initrd_len;
   6.353 -        printk("Initrd len 0x%lx, start at 0x%p\n",
   6.354 -               si->mod_len, si->mod_start);
   6.355 -    }
   6.356 -
   6.357 -    dst = si->cmd_line;
   6.358 -    if ( cmdline != NULL )
   6.359 -    {
   6.360 -        for ( i = 0; i < 255; i++ )
   6.361 -        {
   6.362 -            if ( cmdline[i] == '\0' )
   6.363 -                break;
   6.364 -            *dst++ = cmdline[i];
   6.365 -        }
   6.366 -    }
   6.367 -    *dst = '\0';
   6.368 -
   6.369 -    /* Reinstate the caller's page tables. */
   6.370 -    write_ptbase(current);
   6.371 -    __sti();
   6.372 -
   6.373 -    /* Destroy low mappings - they were only for our convenience. */
   6.374 -    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
   6.375 -        if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
   6.376 -            l2start[i] = mk_l2_pgentry(0);
   6.377 -    zap_low_mappings(); /* Do the same for the idle page tables. */
   6.378 -    
   6.379 -    /* DOM0 gets access to everything. */
   6.380 -    physdev_init_dom0(d);
   6.381 -
   6.382 -    set_bit(DF_CONSTRUCTED, &d->d_flags);
   6.383 -
   6.384 -    new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
   6.385 -
   6.386 -#ifndef NDEBUG
   6.387 -    if (0) /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */
   6.388 -    {
   6.389 -        shadow_mode_enable(d, SHM_enable); 
   6.390 -        update_pagetables(ed); /* XXX SMP */
   6.391 -    }
   6.392 -#endif
   6.393 -
   6.394 -    return 0;
   6.395 -}
   6.396 -
   6.397 -int elf_sanity_check(Elf_Ehdr *ehdr)
   6.398 -{
   6.399 -    if ( !IS_ELF(*ehdr) ||
   6.400 -         (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
   6.401 -         (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
   6.402 -         (ehdr->e_type != ET_EXEC) ||
   6.403 -         (ehdr->e_machine != EM_386) )
   6.404 -    {
   6.405 -        printk("DOM0 image is not i386-compatible executable Elf image.\n");
   6.406 -        return 0;
   6.407 -    }
   6.408 -
   6.409 -    return 1;
   6.410 -}
   6.411 -
   6.412 -/*
   6.413 - * Local variables:
   6.414 - * mode: C
   6.415 - * c-set-style: "BSD"
   6.416 - * c-basic-offset: 4
   6.417 - * tab-width: 4
   6.418 - * indent-tabs-mode: nil
   6.419 - */
     7.1 --- a/xen/arch/x86/x86_32/domain_page.c	Fri Feb 25 17:06:27 2005 +0000
     7.2 +++ b/xen/arch/x86/x86_32/domain_page.c	Fri Feb 25 18:37:31 2005 +0000
     7.3 @@ -45,7 +45,7 @@ void *map_domain_mem(unsigned long pa)
     7.4      unsigned int idx, cpu = smp_processor_id();
     7.5      unsigned long *cache = mapcache;
     7.6  #ifndef NDEBUG
     7.7 -    unsigned flush_count = 0;
     7.8 +    unsigned int flush_count = 0;
     7.9  #endif
    7.10  
    7.11      ASSERT(!in_irq());
    7.12 @@ -65,17 +65,11 @@ void *map_domain_mem(unsigned long pa)
    7.13          idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1);
    7.14          if ( unlikely(idx == 0) )
    7.15          {
    7.16 +            ASSERT(flush_count++ == 0);
    7.17              flush_all_ready_maps();
    7.18              perfc_incrc(domain_page_tlb_flush);
    7.19              local_flush_tlb();
    7.20              shadow_epoch[cpu] = ++epoch;
    7.21 -#ifndef NDEBUG
    7.22 -            if ( unlikely(flush_count++) )
    7.23 -            {
    7.24 -                // we've run out of map cache entries...
    7.25 -                BUG();
    7.26 -            }
    7.27 -#endif
    7.28          }
    7.29      }
    7.30      while ( cache[idx] != 0 );
     8.1 --- a/xen/arch/x86/x86_64/domain_build.c	Fri Feb 25 17:06:27 2005 +0000
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,424 +0,0 @@
     8.4 -/******************************************************************************
     8.5 - * domain_build.c
     8.6 - * 
     8.7 - * Copyright (c) 2002-2005, K A Fraser
     8.8 - */
     8.9 -
    8.10 -#include <xen/config.h>
    8.11 -#include <xen/init.h>
    8.12 -#include <xen/lib.h>
    8.13 -#include <xen/sched.h>
    8.14 -#include <xen/smp.h>
    8.15 -#include <xen/delay.h>
    8.16 -#include <asm/regs.h>
    8.17 -#include <asm/system.h>
    8.18 -#include <asm/io.h>
    8.19 -#include <asm/processor.h>
    8.20 -#include <asm/shadow.h>
    8.21 -#include <asm/desc.h>
    8.22 -#include <asm/i387.h>
    8.23 -#include <xen/event.h>
    8.24 -#include <xen/elf.h>
    8.25 -#include <xen/kernel.h>
    8.26 -
    8.27 -/* Allow ring-3 access in long mode as guest cannot use ring 1. */
    8.28 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    8.29 -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    8.30 -#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    8.31 -#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    8.32 -
    8.33 -#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
    8.34 -#define round_pgdown(_p)  ((_p)&PAGE_MASK)
    8.35 -
    8.36 -int construct_dom0(struct domain *d,
    8.37 -                   unsigned long alloc_start,
    8.38 -                   unsigned long alloc_end,
    8.39 -                   unsigned long _image_start, unsigned long image_len, 
    8.40 -                   unsigned long _initrd_start, unsigned long initrd_len,
    8.41 -                   char *cmdline)
    8.42 -{
    8.43 -    char *dst;
    8.44 -    int i, rc;
    8.45 -    unsigned long pfn, mfn;
    8.46 -    unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT;
    8.47 -    unsigned long nr_pt_pages;
    8.48 -    unsigned long count;
    8.49 -    l4_pgentry_t *l4tab = NULL, *l4start = NULL;
    8.50 -    l3_pgentry_t *l3tab = NULL, *l3start = NULL;
    8.51 -    l2_pgentry_t *l2tab = NULL, *l2start = NULL;
    8.52 -    l1_pgentry_t *l1tab = NULL, *l1start = NULL;
    8.53 -    struct pfn_info *page = NULL;
    8.54 -    start_info_t *si;
    8.55 -    struct exec_domain *ed = d->exec_domain[0];
    8.56 -    char *image_start  = __va(_image_start);
    8.57 -    char *initrd_start = __va(_initrd_start);
    8.58 -
    8.59 -    /*
    8.60 -     * This fully describes the memory layout of the initial domain. All 
    8.61 -     * *_start address are page-aligned, except v_start (and v_end) which are 
    8.62 -     * superpage-aligned.
    8.63 -     */
    8.64 -    struct domain_setup_info dsi;
    8.65 -    unsigned long vinitrd_start;
    8.66 -    unsigned long vinitrd_end;
    8.67 -    unsigned long vphysmap_start;
    8.68 -    unsigned long vphysmap_end;
    8.69 -    unsigned long vstartinfo_start;
    8.70 -    unsigned long vstartinfo_end;
    8.71 -    unsigned long vstack_start;
    8.72 -    unsigned long vstack_end;
    8.73 -    unsigned long vpt_start;
    8.74 -    unsigned long vpt_end;
    8.75 -    unsigned long v_end;
    8.76 -
    8.77 -    /* Machine address of next candidate page-table page. */
    8.78 -    unsigned long mpt_alloc;
    8.79 -
    8.80 -    extern void physdev_init_dom0(struct domain *);
    8.81 -
    8.82 -    /* Sanity! */
    8.83 -    if ( d->id != 0 ) 
    8.84 -        BUG();
    8.85 -    if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 
    8.86 -        BUG();
    8.87 -
    8.88 -    memset(&dsi, 0, sizeof(struct domain_setup_info));
    8.89 -
    8.90 -    printk("*** LOADING DOMAIN 0 ***\n");
    8.91 -
    8.92 -    /*
    8.93 -     * This is all a bit grim. We've moved the modules to the "safe" physical 
    8.94 -     * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this 
    8.95 -     * routine we're going to copy it down into the region that's actually 
    8.96 -     * been allocated to domain 0. This is highly likely to be overlapping, so 
    8.97 -     * we use a forward copy.
    8.98 -     * 
    8.99 -     * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with 
   8.100 -     * 4GB and lots of network/disk cards that allocate loads of buffers. 
   8.101 -     * We'll have to revisit this if we ever support PAE (64GB).
   8.102 -     */
   8.103 -
   8.104 -    rc = parseelfimage(image_start, image_len, &dsi);
   8.105 -    if ( rc != 0 )
   8.106 -        return rc;
   8.107 -
   8.108 -    /* Set up domain options */
   8.109 -    if ( dsi.use_writable_pagetables )
   8.110 -        vm_assist(d, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
   8.111 -
   8.112 -    /* Align load address to 4MB boundary. */
   8.113 -    dsi.v_start &= ~((1UL<<22)-1);
   8.114 -
   8.115 -    /*
   8.116 -     * Why do we need this? The number of page-table frames depends on the 
   8.117 -     * size of the bootstrap address space. But the size of the address space 
   8.118 -     * depends on the number of page-table frames (since each one is mapped 
   8.119 -     * read-only). We have a pair of simultaneous equations in two unknowns, 
   8.120 -     * which we solve by exhaustive search.
   8.121 -     */
   8.122 -    vinitrd_start    = round_pgup(dsi.v_kernend);
   8.123 -    vinitrd_end      = vinitrd_start + initrd_len;
   8.124 -    vphysmap_start   = round_pgup(vinitrd_end);
   8.125 -    vphysmap_end     = vphysmap_start + (nr_pages * sizeof(u32));
   8.126 -    vpt_start        = round_pgup(vphysmap_end);
   8.127 -    for ( nr_pt_pages = 2; ; nr_pt_pages++ )
   8.128 -    {
   8.129 -        vpt_end          = vpt_start + (nr_pt_pages * PAGE_SIZE);
   8.130 -        vstartinfo_start = vpt_end;
   8.131 -        vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
   8.132 -        vstack_start     = vstartinfo_end;
   8.133 -        vstack_end       = vstack_start + PAGE_SIZE;
   8.134 -        v_end            = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
   8.135 -        if ( (v_end - vstack_end) < (512UL << 10) )
   8.136 -            v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
   8.137 -#define NR(_l,_h,_s) \
   8.138 -    (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
   8.139 -       ((_l) & ~((1UL<<(_s))-1))) >> (_s))
   8.140 -        if ( (1 + /* # L4 */
   8.141 -              NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
   8.142 -              NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
   8.143 -              NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT))  /* # L1 */
   8.144 -             <= nr_pt_pages )
   8.145 -            break;
   8.146 -    }
   8.147 -
   8.148 -    printk("PHYSICAL MEMORY ARRANGEMENT:\n"
   8.149 -           " Kernel image:  %p->%p\n"
   8.150 -           " Initrd image:  %p->%p\n"
   8.151 -           " Dom0 alloc.:   %p->%p\n",
   8.152 -           _image_start, _image_start + image_len,
   8.153 -           _initrd_start, _initrd_start + initrd_len,
   8.154 -           alloc_start, alloc_end);
   8.155 -    printk("VIRTUAL MEMORY ARRANGEMENT:\n"
   8.156 -           " Loaded kernel: %p->%p\n"
   8.157 -           " Init. ramdisk: %p->%p\n"
   8.158 -           " Phys-Mach map: %p->%p\n"
   8.159 -           " Page tables:   %p->%p\n"
   8.160 -           " Start info:    %p->%p\n"
   8.161 -           " Boot stack:    %p->%p\n"
   8.162 -           " TOTAL:         %p->%p\n",
   8.163 -           dsi.v_kernstart, dsi.v_kernend, 
   8.164 -           vinitrd_start, vinitrd_end,
   8.165 -           vphysmap_start, vphysmap_end,
   8.166 -           vpt_start, vpt_end,
   8.167 -           vstartinfo_start, vstartinfo_end,
   8.168 -           vstack_start, vstack_end,
   8.169 -           dsi.v_start, v_end);
   8.170 -    printk(" ENTRY ADDRESS: %p\n", dsi.v_kernentry);
   8.171 -
   8.172 -    if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
   8.173 -    {
   8.174 -        printk("Initial guest OS requires too much space\n"
   8.175 -               "(%luMB is greater than %luMB limit)\n",
   8.176 -               (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
   8.177 -        return -ENOMEM;
   8.178 -    }
   8.179 -
   8.180 -    /* Overlap with Xen protected area? */
   8.181 -    if ( (dsi.v_start < HYPERVISOR_VIRT_END) &&
   8.182 -         (v_end > HYPERVISOR_VIRT_START) )
   8.183 -    {
   8.184 -        printk("DOM0 image overlaps with Xen private area.\n");
   8.185 -        return -EINVAL;
   8.186 -    }
   8.187 -
   8.188 -    /* Paranoia: scrub DOM0's memory allocation. */
   8.189 -    printk("Scrubbing DOM0 RAM: ");
   8.190 -    dst = __va(alloc_start);
   8.191 -    while ( __pa(dst) < alloc_end )
   8.192 -    {
   8.193 -#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */
   8.194 -        printk(".");
   8.195 -        touch_nmi_watchdog();
   8.196 -        if ( (alloc_end - __pa(dst)) > SCRUB_BYTES )
   8.197 -        {
   8.198 -            memset(dst, 0, SCRUB_BYTES);
   8.199 -            dst += SCRUB_BYTES;
   8.200 -        }
   8.201 -        else
   8.202 -        {
   8.203 -            memset(dst, 0, alloc_end - __pa(dst));
   8.204 -            break;
   8.205 -        }
   8.206 -    }
   8.207 -    printk("done.\n");
   8.208 -
   8.209 -    /* Construct a frame-allocation list for the initial domain. */
   8.210 -    for ( mfn = (alloc_start>>PAGE_SHIFT);
   8.211 -          mfn < (alloc_end>>PAGE_SHIFT);
   8.212 -          mfn++ )
   8.213 -    {
   8.214 -        page = &frame_table[mfn];
   8.215 -        page_set_owner(page, d);
   8.216 -        page->u.inuse.type_info = 0;
   8.217 -        page->count_info        = PGC_allocated | 1;
   8.218 -        list_add_tail(&page->list, &d->page_list);
   8.219 -        d->tot_pages++; d->max_pages++;
   8.220 -    }
   8.221 -
   8.222 -    mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
   8.223 -
   8.224 -    SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES);
   8.225 -    SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS);
   8.226 -
   8.227 -    /*
   8.228 -     * We're basically forcing default RPLs to 1, so that our "what privilege
   8.229 -     * level are we returning to?" logic works.
   8.230 -     */
   8.231 -    ed->arch.failsafe_selector = FLAT_KERNEL_CS;
   8.232 -    ed->arch.event_selector    = FLAT_KERNEL_CS;
   8.233 -    ed->arch.kernel_ss = FLAT_KERNEL_SS;
   8.234 -    for ( i = 0; i < 256; i++ ) 
   8.235 -        ed->arch.traps[i].cs = FLAT_KERNEL_CS;
   8.236 -
   8.237 -    /* WARNING: The new domain must have its 'processor' field filled in! */
   8.238 -    phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
   8.239 -    l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
   8.240 -    memcpy(l4tab, &idle_pg_table[0], PAGE_SIZE);
   8.241 -    l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
   8.242 -        mk_l4_pgentry(__pa(l4start) | __PAGE_HYPERVISOR);
   8.243 -    l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
   8.244 -        mk_l4_pgentry(__pa(d->arch.mm_perdomain_l3) | __PAGE_HYPERVISOR);
   8.245 -    ed->arch.guest_table = mk_pagetable(__pa(l4start));
   8.246 -
   8.247 -    l4tab += l4_table_offset(dsi.v_start);
   8.248 -    mfn = alloc_start >> PAGE_SHIFT;
   8.249 -    for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
   8.250 -    {
   8.251 -        if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
   8.252 -        {
   8.253 -            phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l1_page_table;
   8.254 -            l1start = l1tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
   8.255 -            clear_page(l1tab);
   8.256 -            if ( count == 0 )
   8.257 -                l1tab += l1_table_offset(dsi.v_start);
   8.258 -            if ( !((unsigned long)l2tab & (PAGE_SIZE-1)) )
   8.259 -            {
   8.260 -                phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table;
   8.261 -                l2start = l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
   8.262 -                clear_page(l2tab);
   8.263 -                if ( count == 0 )
   8.264 -                    l2tab += l2_table_offset(dsi.v_start);
   8.265 -                if ( !((unsigned long)l3tab & (PAGE_SIZE-1)) )
   8.266 -                {
   8.267 -                    phys_to_page(mpt_alloc)->u.inuse.type_info =
   8.268 -                        PGT_l3_page_table;
   8.269 -                    l3start = l3tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
   8.270 -                    clear_page(l3tab);
   8.271 -                    if ( count == 0 )
   8.272 -                        l3tab += l3_table_offset(dsi.v_start);
   8.273 -                    *l4tab++ = mk_l4_pgentry(__pa(l3start) | L4_PROT);
   8.274 -                }
   8.275 -                *l3tab++ = mk_l3_pgentry(__pa(l2start) | L3_PROT);
   8.276 -            }
   8.277 -            *l2tab++ = mk_l2_pgentry(__pa(l1start) | L2_PROT);
   8.278 -        }
   8.279 -        *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
   8.280 -
   8.281 -        page = &frame_table[mfn];
   8.282 -        if ( (page->u.inuse.type_info == 0) &&
   8.283 -             !get_page_and_type(page, d, PGT_writable_page) )
   8.284 -            BUG();
   8.285 -
   8.286 -        mfn++;
   8.287 -    }
   8.288 -
   8.289 -    /* Pages that are part of page tables must be read only. */
   8.290 -    l4tab = l4start + l4_table_offset(vpt_start);
   8.291 -    l3start = l3tab = l4_pgentry_to_l3(*l4tab);
   8.292 -    l3tab += l3_table_offset(vpt_start);
   8.293 -    l2start = l2tab = l3_pgentry_to_l2(*l3tab);
   8.294 -    l2tab += l2_table_offset(vpt_start);
   8.295 -    l1start = l1tab = l2_pgentry_to_l1(*l2tab);
   8.296 -    l1tab += l1_table_offset(vpt_start);
   8.297 -    for ( count = 0; count < nr_pt_pages; count++ ) 
   8.298 -    {
   8.299 -        *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
   8.300 -        page = &frame_table[l1_pgentry_to_pfn(*l1tab)];
   8.301 -
   8.302 -        /* Read-only mapping + PGC_allocated + page-table page. */
   8.303 -        page->count_info         = PGC_allocated | 3;
   8.304 -        page->u.inuse.type_info |= PGT_validated | 1;
   8.305 -
   8.306 -        /* Top-level p.t. is pinned. */
   8.307 -        if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table )
   8.308 -        {
   8.309 -            page->count_info        += 1;
   8.310 -            page->u.inuse.type_info += 1 | PGT_pinned;
   8.311 -        }
   8.312 -
   8.313 -        /* Iterate. */
   8.314 -        if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
   8.315 -        {
   8.316 -            if ( !((unsigned long)++l2tab & (PAGE_SIZE - 1)) )
   8.317 -            {
   8.318 -                if ( !((unsigned long)++l3tab & (PAGE_SIZE - 1)) )
   8.319 -                    l3start = l3tab = l4_pgentry_to_l3(*++l4tab); 
   8.320 -                l2start = l2tab = l3_pgentry_to_l2(*l3tab);
   8.321 -            }
   8.322 -            l1start = l1tab = l2_pgentry_to_l1(*l2tab);
   8.323 -        }
   8.324 -    }
   8.325 -
   8.326 -    /* Set up shared-info area. */
   8.327 -    update_dom_time(d);
   8.328 -    d->shared_info->domain_time = 0;
   8.329 -    /* Mask all upcalls... */
   8.330 -    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
   8.331 -        d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
   8.332 -    d->shared_info->n_vcpu = smp_num_cpus;
   8.333 -
   8.334 -    /* Set up shadow and monitor tables. */
   8.335 -    update_pagetables(ed);
   8.336 -
   8.337 -    /* Install the new page tables. */
   8.338 -    __cli();
   8.339 -    write_ptbase(ed);
   8.340 -
   8.341 -    /* Copy the OS image. */
   8.342 -    (void)loadelfimage(image_start);
   8.343 -
   8.344 -    /* Copy the initial ramdisk. */
   8.345 -    if ( initrd_len != 0 )
   8.346 -        memcpy((void *)vinitrd_start, initrd_start, initrd_len);
   8.347 -    
   8.348 -    /* Set up start info area. */
   8.349 -    si = (start_info_t *)vstartinfo_start;
   8.350 -    memset(si, 0, PAGE_SIZE);
   8.351 -    si->nr_pages     = d->tot_pages;
   8.352 -    si->shared_info  = virt_to_phys(d->shared_info);
   8.353 -    si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
   8.354 -    si->pt_base      = vpt_start;
   8.355 -    si->nr_pt_frames = nr_pt_pages;
   8.356 -    si->mfn_list     = vphysmap_start;
   8.357 -
   8.358 -    /* Write the phys->machine and machine->phys table entries. */
   8.359 -    for ( pfn = 0; pfn < d->tot_pages; pfn++ )
   8.360 -    {
   8.361 -        mfn = pfn + (alloc_start>>PAGE_SHIFT);
   8.362 -#ifndef NDEBUG
   8.363 -#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
   8.364 -        if ( pfn > REVERSE_START )
   8.365 -            mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
   8.366 -#endif
   8.367 -        ((u32 *)vphysmap_start)[pfn] = mfn;
   8.368 -        machine_to_phys_mapping[mfn] = pfn;
   8.369 -    }
   8.370 -
   8.371 -    if ( initrd_len != 0 )
   8.372 -    {
   8.373 -        si->mod_start = vinitrd_start;
   8.374 -        si->mod_len   = initrd_len;
   8.375 -        printk("Initrd len 0x%lx, start at 0x%p\n",
   8.376 -               si->mod_len, si->mod_start);
   8.377 -    }
   8.378 -
   8.379 -    dst = si->cmd_line;
   8.380 -    if ( cmdline != NULL )
   8.381 -    {
   8.382 -        for ( i = 0; i < 255; i++ )
   8.383 -        {
   8.384 -            if ( cmdline[i] == '\0' )
   8.385 -                break;
   8.386 -            *dst++ = cmdline[i];
   8.387 -        }
   8.388 -    }
   8.389 -    *dst = '\0';
   8.390 -
   8.391 -    /* Reinstate the caller's page tables. */
   8.392 -    write_ptbase(current);
   8.393 -    __sti();
   8.394 -
   8.395 -    /* DOM0 gets access to everything. */
   8.396 -    physdev_init_dom0(d);
   8.397 -
   8.398 -    set_bit(DF_CONSTRUCTED, &d->d_flags);
   8.399 -
   8.400 -    new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
   8.401 -
   8.402 -    return 0;
   8.403 -}
   8.404 -
   8.405 -int elf_sanity_check(Elf_Ehdr *ehdr)
   8.406 -{
   8.407 -    if ( !IS_ELF(*ehdr) ||
   8.408 -         (ehdr->e_ident[EI_CLASS] != ELFCLASS64) ||
   8.409 -         (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
   8.410 -         (ehdr->e_type != ET_EXEC) ||
   8.411 -         (ehdr->e_machine != EM_X86_64) )
   8.412 -    {
   8.413 -        printk("DOM0 image is not x86/64-compatible executable Elf image.\n");
   8.414 -        return 0;
   8.415 -    }
   8.416 -
   8.417 -    return 1;
   8.418 -}
   8.419 -
   8.420 -/*
   8.421 - * Local variables:
   8.422 - * mode: C
   8.423 - * c-set-style: "BSD"
   8.424 - * c-basic-offset: 4
   8.425 - * tab-width: 4
   8.426 - * indent-tabs-mode: nil
   8.427 - */
     9.1 --- a/xen/common/page_alloc.c	Fri Feb 25 17:06:27 2005 +0000
     9.2 +++ b/xen/common/page_alloc.c	Fri Feb 25 18:37:31 2005 +0000
     9.3 @@ -203,8 +203,8 @@ unsigned long alloc_boot_pages(unsigned 
     9.4  #define MEMZONE_DOM 1
     9.5  #define NR_ZONES    2
     9.6  
     9.7 -/* Up to 2^10 pages can be allocated at once. */
     9.8 -#define MAX_ORDER 10
     9.9 +/* Up to 2^20 pages can be allocated at once. */
    9.10 +#define MAX_ORDER 20
    9.11  static struct list_head heap[NR_ZONES][MAX_ORDER+1];
    9.12  
    9.13  static unsigned long avail[NR_ZONES];
    10.1 --- a/xen/drivers/char/console.c	Fri Feb 25 17:06:27 2005 +0000
    10.2 +++ b/xen/drivers/char/console.c	Fri Feb 25 18:37:31 2005 +0000
    10.3 @@ -577,6 +577,8 @@ static int __init debugtrace_init(void)
    10.4      debugtrace_buf = (unsigned char *)alloc_xenheap_pages(order);
    10.5      ASSERT(debugtrace_buf != NULL);
    10.6  
    10.7 +    memset(debugtrace_buf, '\0', debugtrace_bytes);
    10.8 +
    10.9      return 0;
   10.10  }
   10.11  __initcall(debugtrace_init);
    11.1 --- a/xen/include/asm-x86/shadow.h	Fri Feb 25 17:06:27 2005 +0000
    11.2 +++ b/xen/include/asm-x86/shadow.h	Fri Feb 25 18:37:31 2005 +0000
    11.3 @@ -13,17 +13,20 @@
    11.4  #define PSH_hl2         (1<<30) /* page is an hl2 */
    11.5  #define PSH_pfn_mask    ((1<<21)-1)
    11.6  
    11.7 -/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
    11.8 -
    11.9 +/* Shadow PT operation mode: shadow-mode variable in arch_domain. */
   11.10  #define SHM_enable    (1<<0) /* we're in one of the shadow modes */
   11.11  #define SHM_log_dirty (1<<1) /* enable log dirty mode */
   11.12 -#define SHM_translate (1<<2) /* do p2m tranaltion on guest tables */
   11.13 +#define SHM_translate (1<<2) /* do p2m translation on guest tables */
   11.14  #define SHM_external  (1<<3) /* external page table, not used by Xen */
   11.15  
   11.16  #define shadow_mode_enabled(_d)   ((_d)->arch.shadow_mode)
   11.17  #define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty)
   11.18  #define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate)
   11.19 +#ifndef __x86_64__ /* XXX Currently breaks the 64-bit build. */
   11.20  #define shadow_mode_external(_d)  ((_d)->arch.shadow_mode & SHM_external)
   11.21 +#else
   11.22 +#define shadow_mode_external(_d)  (0)
   11.23 +#endif
   11.24  
   11.25  #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
   11.26  #define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
   11.27 @@ -804,6 +807,10 @@ static inline void update_pagetables(str
   11.28  
   11.29      if ( !shadow_mode_external(d) )
   11.30      {
   11.31 +        /*
   11.32 +         * Internal page tables:
   11.33 +         * No need to allocate a separate page table for Xen.
   11.34 +         */
   11.35  #ifdef __x86_64__
   11.36          if ( !(ed->arch.flags & TF_kernel_mode) )
   11.37              ed->arch.monitor_table = ed->arch.guest_table_user;
   11.38 @@ -816,9 +823,10 @@ static inline void update_pagetables(str
   11.39      }
   11.40      else
   11.41      {
   11.42 -        // External page tables...
   11.43 -        // Allocate a monitor page table if we don't already have one.
   11.44 -        //
   11.45 +        /*
   11.46 +         * External page tables:
   11.47 +         * Allocate a monitor page table if we don't already have one.
   11.48 +         */
   11.49          if ( unlikely(!pagetable_val(ed->arch.monitor_table)) )
   11.50              ed->arch.monitor_table =
   11.51                  mk_pagetable(alloc_monitor_pagetable(ed) << PAGE_SHIFT);
    12.1 --- a/xen/include/xen/sched.h	Fri Feb 25 17:06:27 2005 +0000
    12.2 +++ b/xen/include/xen/sched.h	Fri Feb 25 18:37:31 2005 +0000
    12.3 @@ -215,12 +215,11 @@ static inline void get_knownalive_domain
    12.4    
    12.5  extern struct domain *do_createdomain(
    12.6      domid_t dom_id, unsigned int cpu);
    12.7 -extern int construct_dom0(struct domain *d, 
    12.8 -                          unsigned long alloc_start,
    12.9 -                          unsigned long alloc_end,
   12.10 -                          unsigned long image_start, unsigned long image_len, 
   12.11 -                          unsigned long initrd_start, unsigned long initrd_len,
   12.12 -                          char *cmdline);
   12.13 +extern int construct_dom0(
   12.14 +    struct domain *d,
   12.15 +    unsigned long image_start, unsigned long image_len, 
   12.16 +    unsigned long initrd_start, unsigned long initrd_len,
   12.17 +    char *cmdline);
   12.18  extern int final_setup_guest(struct domain *d, dom0_builddomain_t *);
   12.19  
   12.20  struct domain *find_domain_by_id(domid_t dom);