ia64/xen-unstable
changeset 3935:3c5d6f364349
bitkeeper revision 1.1236.1.27 (421f706biBboh8DlmOttNIpUogeM6Q)
A few fixes, and DOM0 by default is now allocated all of memory at
boot time.
Signed-off-by: Keir Fraser <keir.fraser@cl.cam.ac.uk>
A few fixes, and DOM0 by default is now allocated all of memory at
boot time.
Signed-off-by: Keir Fraser <keir.fraser@cl.cam.ac.uk>
author | kaf24@scramble.cl.cam.ac.uk |
---|---|
date | Fri Feb 25 18:37:31 2005 +0000 (2005-02-25) |
parents | e93d8f53eabb |
children | da55822ba1b5 4ce1aebf725f |
files | .rootkeys xen/arch/x86/boot/x86_32.S xen/arch/x86/boot/x86_64.S xen/arch/x86/domain_build.c xen/arch/x86/setup.c xen/arch/x86/x86_32/domain_build.c xen/arch/x86/x86_32/domain_page.c xen/arch/x86/x86_64/domain_build.c xen/common/page_alloc.c xen/drivers/char/console.c xen/include/asm-x86/shadow.h xen/include/xen/sched.h |
line diff
1.1 --- a/.rootkeys Fri Feb 25 17:06:27 2005 +0000 1.2 +++ b/.rootkeys Fri Feb 25 18:37:31 2005 +0000 1.3 @@ -946,6 +946,7 @@ 4107c15e-VmEcLsE-7JCXZaabI8C7A xen/arch/ 1.4 3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen/arch/x86/delay.c 1.5 40e34414WiQO4h2m3tcpaCPn7SyYyg xen/arch/x86/dom0_ops.c 1.6 3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen/arch/x86/domain.c 1.7 +4202391dkvdTZ8GhWXe3Gqf9EOgWXg xen/arch/x86/domain_build.c 1.8 41d3eaae6GSDo3ZJDfK3nvQsJux-PQ xen/arch/x86/e820.c 1.9 3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen/arch/x86/extable.c 1.10 3fe443fdDDb0Sw6NQBCk4GQapayfTA xen/arch/x86/flushtlb.c 1.11 @@ -984,7 +985,6 @@ 41f97ef5139vN42cOYHfX_Ac8WOOjA xen/arch/ 1.12 41c0c4128URE0dxcO15JME_MuKBPfg xen/arch/x86/vmx_vmcs.c 1.13 419cbedeQDg8IrO3izo3o5rQNlo0kQ xen/arch/x86/x86_32/asm-offsets.c 1.14 4107c15e_NqNYew2EXroXz2mgTAMWQ xen/arch/x86/x86_32/call_with_regs.S 1.15 -4202391dkvdTZ8GhWXe3Gqf9EOgWXg xen/arch/x86/x86_32/domain_build.c 1.16 3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/x86_32/domain_page.c 1.17 3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/x86_32/entry.S 1.18 3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/x86_32/mm.c 1.19 @@ -993,7 +993,6 @@ 42000d3ckiFc1qxa4AWqsd0t3lxuyw xen/arch/ 1.20 3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/x86_32/usercopy.c 1.21 3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/x86_32/xen.lds 1.22 41bf1717Ty3hwN3E9swdu8QfnvGqww xen/arch/x86/x86_64/asm-offsets.c 1.23 -4202391dA91ZovYX9d_5zJi9yGvLoQ xen/arch/x86/x86_64/domain_build.c 1.24 40e96d3aLDI-nViMuYneD7VKYlZrVg xen/arch/x86/x86_64/entry.S 1.25 41bf1717XhPz_dNT5OKSjgmbFuWBuA xen/arch/x86/x86_64/mm.c 1.26 42000d3cMb8o1WuFBXC07c8i3lPZBw xen/arch/x86/x86_64/traps.c
2.1 --- a/xen/arch/x86/boot/x86_32.S Fri Feb 25 17:06:27 2005 +0000 2.2 +++ b/xen/arch/x86/boot/x86_32.S Fri Feb 25 18:37:31 2005 +0000 2.3 @@ -15,9 +15,9 @@ ENTRY(start) 2.4 /* Magic number indicating a Multiboot header. */ 2.5 .long 0x1BADB002 2.6 /* Flags to bootloader (see Multiboot spec). */ 2.7 - .long 0x00000002 2.8 + .long 0x00000003 2.9 /* Checksum: must be the negated sum of the first two fields. */ 2.10 - .long -0x1BADB004 2.11 + .long -0x1BADB005 2.12 2.13 bad_cpu_msg: 2.14 .asciz "ERR: Not a P6-compatible CPU!"
3.1 --- a/xen/arch/x86/boot/x86_64.S Fri Feb 25 17:06:27 2005 +0000 3.2 +++ b/xen/arch/x86/boot/x86_64.S Fri Feb 25 18:37:31 2005 +0000 3.3 @@ -16,9 +16,9 @@ ENTRY(start) 3.4 /* Magic number indicating a Multiboot header. */ 3.5 .long 0x1BADB002 3.6 /* Flags to bootloader (see Multiboot spec). */ 3.7 - .long 0x00000002 3.8 + .long 0x00000003 3.9 /* Checksum: must be the negated sum of the first two fields. */ 3.10 - .long -0x1BADB004 3.11 + .long -0x1BADB005 3.12 3.13 .org 0x010 3.14 .asciz "ERR: Not a 64-bit CPU!"
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/xen/arch/x86/domain_build.c Fri Feb 25 18:37:31 2005 +0000 4.3 @@ -0,0 +1,543 @@ 4.4 +/****************************************************************************** 4.5 + * domain_build.c 4.6 + * 4.7 + * Copyright (c) 2002-2005, K A Fraser 4.8 + */ 4.9 + 4.10 +#include <xen/config.h> 4.11 +#include <xen/init.h> 4.12 +#include <xen/lib.h> 4.13 +#include <xen/sched.h> 4.14 +#include <xen/smp.h> 4.15 +#include <xen/delay.h> 4.16 +#include <xen/event.h> 4.17 +#include <xen/elf.h> 4.18 +#include <xen/kernel.h> 4.19 +#include <asm/regs.h> 4.20 +#include <asm/system.h> 4.21 +#include <asm/io.h> 4.22 +#include <asm/processor.h> 4.23 +#include <asm/desc.h> 4.24 +#include <asm/i387.h> 4.25 +#include <asm/shadow.h> 4.26 + 4.27 +/* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */ 4.28 +static unsigned int opt_dom0_mem = 0; 4.29 +integer_param("dom0_mem", opt_dom0_mem); 4.30 + 4.31 +#if defined(__i386__) 4.32 +/* No ring-3 access in initial leaf page tables. */ 4.33 +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) 4.34 +#elif defined(__x86_64__) 4.35 +/* Allow ring-3 access in long mode as guest cannot use ring 1. */ 4.36 +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) 4.37 +#endif 4.38 +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) 4.39 +#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) 4.40 +#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) 4.41 + 4.42 +#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) 4.43 +#define round_pgdown(_p) ((_p)&PAGE_MASK) 4.44 + 4.45 +static struct pfn_info *alloc_largest(struct domain *d, unsigned long max) 4.46 +{ 4.47 + struct pfn_info *page; 4.48 + unsigned int order = get_order(max * PAGE_SIZE); 4.49 + if ( (max & (max-1)) != 0 ) 4.50 + order--; 4.51 + while ( (page = alloc_domheap_pages(d, order)) == NULL ) 4.52 + if ( order-- == 0 ) 4.53 + break; 4.54 + return page; 4.55 +} 4.56 + 4.57 +int construct_dom0(struct domain *d, 4.58 + unsigned long _image_start, unsigned long image_len, 4.59 + unsigned long _initrd_start, unsigned long initrd_len, 4.60 + char *cmdline) 4.61 +{ 4.62 + char *dst; 4.63 + int i, rc; 4.64 + unsigned long pfn, mfn; 4.65 + unsigned long nr_pages; 4.66 + unsigned long nr_pt_pages; 4.67 + unsigned long alloc_start; 4.68 + unsigned long alloc_end; 4.69 + unsigned long count; 4.70 + struct pfn_info *page = NULL; 4.71 + start_info_t *si; 4.72 + struct exec_domain *ed = d->exec_domain[0]; 4.73 +#if defined(__i386__) 4.74 + char *image_start = (char *)_image_start; /* use lowmem mappings */ 4.75 + char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */ 4.76 +#elif defined(__x86_64__) 4.77 + char *image_start = __va(_image_start); 4.78 + char *initrd_start = __va(_initrd_start); 4.79 + l4_pgentry_t *l4tab = NULL, *l4start = NULL; 4.80 + l3_pgentry_t *l3tab = NULL, *l3start = NULL; 4.81 +#endif 4.82 + l2_pgentry_t *l2tab = NULL, *l2start = NULL; 4.83 + l1_pgentry_t *l1tab = NULL, *l1start = NULL; 4.84 + 4.85 + /* 4.86 + * This fully describes the memory layout of the initial domain. All 4.87 + * *_start address are page-aligned, except v_start (and v_end) which are 4.88 + * superpage-aligned. 4.89 + */ 4.90 + struct domain_setup_info dsi; 4.91 + unsigned long vinitrd_start; 4.92 + unsigned long vinitrd_end; 4.93 + unsigned long vphysmap_start; 4.94 + unsigned long vphysmap_end; 4.95 + unsigned long vstartinfo_start; 4.96 + unsigned long vstartinfo_end; 4.97 + unsigned long vstack_start; 4.98 + unsigned long vstack_end; 4.99 + unsigned long vpt_start; 4.100 + unsigned long vpt_end; 4.101 + unsigned long v_end; 4.102 + 4.103 + /* Machine address of next candidate page-table page. */ 4.104 + unsigned long mpt_alloc; 4.105 + 4.106 + extern void physdev_init_dom0(struct domain *); 4.107 + 4.108 + /* Sanity! */ 4.109 + if ( d->id != 0 ) 4.110 + BUG(); 4.111 + if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 4.112 + BUG(); 4.113 + 4.114 + memset(&dsi, 0, sizeof(struct domain_setup_info)); 4.115 + 4.116 + printk("*** LOADING DOMAIN 0 ***\n"); 4.117 + 4.118 + /* By default DOM0 is allocated all available memory. */ 4.119 + if ( (nr_pages = opt_dom0_mem >> (PAGE_SHIFT - 10)) == 0 ) 4.120 + nr_pages = avail_domheap_pages() + 4.121 + ((initrd_len + PAGE_SIZE - 1) >> PAGE_SHIFT) + 4.122 + ((image_len + PAGE_SIZE - 1) >> PAGE_SHIFT); 4.123 + d->max_pages = nr_pages; 4.124 + if ( (page = alloc_largest(d, nr_pages)) == NULL ) 4.125 + panic("Not enough RAM for DOM0 reservation.\n"); 4.126 + alloc_start = page_to_phys(page); 4.127 + alloc_end = alloc_start + (d->tot_pages << PAGE_SHIFT); 4.128 + 4.129 + rc = parseelfimage(image_start, image_len, &dsi); 4.130 + if ( rc != 0 ) 4.131 + return rc; 4.132 + 4.133 + /* Set up domain options */ 4.134 + if ( dsi.use_writable_pagetables ) 4.135 + vm_assist(d, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 4.136 + 4.137 + /* Align load address to 4MB boundary. */ 4.138 + dsi.v_start &= ~((1UL<<22)-1); 4.139 + 4.140 + /* 4.141 + * Why do we need this? The number of page-table frames depends on the 4.142 + * size of the bootstrap address space. But the size of the address space 4.143 + * depends on the number of page-table frames (since each one is mapped 4.144 + * read-only). We have a pair of simultaneous equations in two unknowns, 4.145 + * which we solve by exhaustive search. 4.146 + */ 4.147 + vinitrd_start = round_pgup(dsi.v_kernend); 4.148 + vinitrd_end = vinitrd_start + initrd_len; 4.149 + vphysmap_start = round_pgup(vinitrd_end); 4.150 + vphysmap_end = vphysmap_start + (nr_pages * sizeof(u32)); 4.151 + vpt_start = round_pgup(vphysmap_end); 4.152 + for ( nr_pt_pages = 2; ; nr_pt_pages++ ) 4.153 + { 4.154 + vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); 4.155 + vstartinfo_start = vpt_end; 4.156 + vstartinfo_end = vstartinfo_start + PAGE_SIZE; 4.157 + vstack_start = vstartinfo_end; 4.158 + vstack_end = vstack_start + PAGE_SIZE; 4.159 + v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1); 4.160 + if ( (v_end - vstack_end) < (512UL << 10) ) 4.161 + v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */ 4.162 +#if defined(__i386__) 4.163 + if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> 4.164 + L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) 4.165 + break; 4.166 +#elif defined(__x86_64__) 4.167 +#define NR(_l,_h,_s) \ 4.168 + (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \ 4.169 + ((_l) & ~((1UL<<(_s))-1))) >> (_s)) 4.170 + if ( (1 + /* # L4 */ 4.171 + NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */ 4.172 + NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */ 4.173 + NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */ 4.174 + <= nr_pt_pages ) 4.175 + break; 4.176 +#endif 4.177 + } 4.178 + 4.179 + if ( (v_end - dsi.v_start) > (alloc_end - alloc_start) ) 4.180 + panic("Insufficient contiguous RAM to build kernel image.\n"); 4.181 + 4.182 + printk("VIRTUAL MEMORY ARRANGEMENT:\n" 4.183 + " Loaded kernel: %p->%p\n" 4.184 + " Init. ramdisk: %p->%p\n" 4.185 + " Phys-Mach map: %p->%p\n" 4.186 + " Page tables: %p->%p\n" 4.187 + " Start info: %p->%p\n" 4.188 + " Boot stack: %p->%p\n" 4.189 + " TOTAL: %p->%p\n", 4.190 + dsi.v_kernstart, dsi.v_kernend, 4.191 + vinitrd_start, vinitrd_end, 4.192 + vphysmap_start, vphysmap_end, 4.193 + vpt_start, vpt_end, 4.194 + vstartinfo_start, vstartinfo_end, 4.195 + vstack_start, vstack_end, 4.196 + dsi.v_start, v_end); 4.197 + printk(" ENTRY ADDRESS: %p\n", dsi.v_kernentry); 4.198 + 4.199 + if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) ) 4.200 + { 4.201 + printk("Initial guest OS requires too much space\n" 4.202 + "(%luMB is greater than %luMB limit)\n", 4.203 + (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20); 4.204 + return -ENOMEM; 4.205 + } 4.206 + 4.207 + mpt_alloc = (vpt_start - dsi.v_start) + alloc_start; 4.208 + 4.209 + SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES); 4.210 + SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS); 4.211 + 4.212 + /* 4.213 + * We're basically forcing default RPLs to 1, so that our "what privilege 4.214 + * level are we returning to?" logic works. 4.215 + */ 4.216 + ed->arch.failsafe_selector = FLAT_KERNEL_CS; 4.217 + ed->arch.event_selector = FLAT_KERNEL_CS; 4.218 + ed->arch.kernel_ss = FLAT_KERNEL_SS; 4.219 + for ( i = 0; i < 256; i++ ) 4.220 + ed->arch.traps[i].cs = FLAT_KERNEL_CS; 4.221 + 4.222 +#if defined(__i386__) 4.223 + 4.224 + /* 4.225 + * Protect the lowest 1GB of memory. We use a temporary mapping there 4.226 + * from which we copy the kernel and ramdisk images. 4.227 + */ 4.228 + if ( dsi.v_start < (1UL<<30) ) 4.229 + { 4.230 + printk("Initial loading isn't allowed to lowest 1GB of memory.\n"); 4.231 + return -EINVAL; 4.232 + } 4.233 + 4.234 + /* WARNING: The new domain must have its 'processor' field filled in! */ 4.235 + l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; 4.236 + memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE); 4.237 + l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = 4.238 + mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR); 4.239 + l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = 4.240 + mk_l2_pgentry(__pa(d->arch.mm_perdomain_pt) | __PAGE_HYPERVISOR); 4.241 + ed->arch.guest_table = mk_pagetable((unsigned long)l2start); 4.242 + 4.243 + l2tab += l2_table_offset(dsi.v_start); 4.244 + mfn = alloc_start >> PAGE_SHIFT; 4.245 + for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) 4.246 + { 4.247 + if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) 4.248 + { 4.249 + l1start = l1tab = (l1_pgentry_t *)mpt_alloc; 4.250 + mpt_alloc += PAGE_SIZE; 4.251 + *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT); 4.252 + clear_page(l1tab); 4.253 + if ( count == 0 ) 4.254 + l1tab += l1_table_offset(dsi.v_start); 4.255 + } 4.256 + *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT); 4.257 + 4.258 + page = &frame_table[mfn]; 4.259 + if ( !get_page_and_type(page, d, PGT_writable_page) ) 4.260 + BUG(); 4.261 + 4.262 + mfn++; 4.263 + } 4.264 + 4.265 + /* Pages that are part of page tables must be read only. */ 4.266 + l2tab = l2start + l2_table_offset(vpt_start); 4.267 + l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab); 4.268 + l1tab += l1_table_offset(vpt_start); 4.269 + for ( count = 0; count < nr_pt_pages; count++ ) 4.270 + { 4.271 + *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW); 4.272 + page = &frame_table[l1_pgentry_to_pfn(*l1tab)]; 4.273 + if ( count == 0 ) 4.274 + { 4.275 + page->u.inuse.type_info &= ~PGT_type_mask; 4.276 + page->u.inuse.type_info |= PGT_l2_page_table; 4.277 + 4.278 + /* 4.279 + * No longer writable: decrement the type_count. 4.280 + * Installed as CR3: increment both the ref_count and type_count. 4.281 + * Net: just increment the ref_count. 4.282 + */ 4.283 + get_page(page, d); /* an extra ref because of readable mapping */ 4.284 + 4.285 + /* Get another ref to L2 page so that it can be pinned. */ 4.286 + if ( !get_page_and_type(page, d, PGT_l2_page_table) ) 4.287 + BUG(); 4.288 + set_bit(_PGT_pinned, &page->u.inuse.type_info); 4.289 + } 4.290 + else 4.291 + { 4.292 + page->u.inuse.type_info &= ~PGT_type_mask; 4.293 + page->u.inuse.type_info |= PGT_l1_page_table; 4.294 + page->u.inuse.type_info |= 4.295 + ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift; 4.296 + 4.297 + /* 4.298 + * No longer writable: decrement the type_count. 4.299 + * This is an L1 page, installed in a validated L2 page: 4.300 + * increment both the ref_count and type_count. 4.301 + * Net: just increment the ref_count. 4.302 + */ 4.303 + get_page(page, d); /* an extra ref because of readable mapping */ 4.304 + } 4.305 + if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) ) 4.306 + l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*++l2tab); 4.307 + } 4.308 + 4.309 +#elif defined(__x86_64__) 4.310 + 4.311 + /* Overlap with Xen protected area? */ 4.312 + if ( (dsi.v_start < HYPERVISOR_VIRT_END) && 4.313 + (v_end > HYPERVISOR_VIRT_START) ) 4.314 + { 4.315 + printk("DOM0 image overlaps with Xen private area.\n"); 4.316 + return -EINVAL; 4.317 + } 4.318 + 4.319 + /* WARNING: The new domain must have its 'processor' field filled in! */ 4.320 + phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table; 4.321 + l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; 4.322 + memcpy(l4tab, &idle_pg_table[0], PAGE_SIZE); 4.323 + l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = 4.324 + mk_l4_pgentry(__pa(l4start) | __PAGE_HYPERVISOR); 4.325 + l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = 4.326 + mk_l4_pgentry(__pa(d->arch.mm_perdomain_l3) | __PAGE_HYPERVISOR); 4.327 + ed->arch.guest_table = mk_pagetable(__pa(l4start)); 4.328 + 4.329 + l4tab += l4_table_offset(dsi.v_start); 4.330 + mfn = alloc_start >> PAGE_SHIFT; 4.331 + for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) 4.332 + { 4.333 + if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) 4.334 + { 4.335 + phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l1_page_table; 4.336 + l1start = l1tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; 4.337 + clear_page(l1tab); 4.338 + if ( count == 0 ) 4.339 + l1tab += l1_table_offset(dsi.v_start); 4.340 + if ( !((unsigned long)l2tab & (PAGE_SIZE-1)) ) 4.341 + { 4.342 + phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table; 4.343 + l2start = l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; 4.344 + clear_page(l2tab); 4.345 + if ( count == 0 ) 4.346 + l2tab += l2_table_offset(dsi.v_start); 4.347 + if ( !((unsigned long)l3tab & (PAGE_SIZE-1)) ) 4.348 + { 4.349 + phys_to_page(mpt_alloc)->u.inuse.type_info = 4.350 + PGT_l3_page_table; 4.351 + l3start = l3tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; 4.352 + clear_page(l3tab); 4.353 + if ( count == 0 ) 4.354 + l3tab += l3_table_offset(dsi.v_start); 4.355 + *l4tab++ = mk_l4_pgentry(__pa(l3start) | L4_PROT); 4.356 + } 4.357 + *l3tab++ = mk_l3_pgentry(__pa(l2start) | L3_PROT); 4.358 + } 4.359 + *l2tab++ = mk_l2_pgentry(__pa(l1start) | L2_PROT); 4.360 + } 4.361 + *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT); 4.362 + 4.363 + page = &frame_table[mfn]; 4.364 + if ( (page->u.inuse.type_info == 0) && 4.365 + !get_page_and_type(page, d, PGT_writable_page) ) 4.366 + BUG(); 4.367 + 4.368 + mfn++; 4.369 + } 4.370 + 4.371 + /* Pages that are part of page tables must be read only. */ 4.372 + l4tab = l4start + l4_table_offset(vpt_start); 4.373 + l3start = l3tab = l4_pgentry_to_l3(*l4tab); 4.374 + l3tab += l3_table_offset(vpt_start); 4.375 + l2start = l2tab = l3_pgentry_to_l2(*l3tab); 4.376 + l2tab += l2_table_offset(vpt_start); 4.377 + l1start = l1tab = l2_pgentry_to_l1(*l2tab); 4.378 + l1tab += l1_table_offset(vpt_start); 4.379 + for ( count = 0; count < nr_pt_pages; count++ ) 4.380 + { 4.381 + *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW); 4.382 + page = &frame_table[l1_pgentry_to_pfn(*l1tab)]; 4.383 + 4.384 + /* Read-only mapping + PGC_allocated + page-table page. */ 4.385 + page->count_info = PGC_allocated | 3; 4.386 + page->u.inuse.type_info |= PGT_validated | 1; 4.387 + 4.388 + /* Top-level p.t. is pinned. */ 4.389 + if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table ) 4.390 + { 4.391 + page->count_info += 1; 4.392 + page->u.inuse.type_info += 1 | PGT_pinned; 4.393 + } 4.394 + 4.395 + /* Iterate. */ 4.396 + if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) ) 4.397 + { 4.398 + if ( !((unsigned long)++l2tab & (PAGE_SIZE - 1)) ) 4.399 + { 4.400 + if ( !((unsigned long)++l3tab & (PAGE_SIZE - 1)) ) 4.401 + l3start = l3tab = l4_pgentry_to_l3(*++l4tab); 4.402 + l2start = l2tab = l3_pgentry_to_l2(*l3tab); 4.403 + } 4.404 + l1start = l1tab = l2_pgentry_to_l1(*l2tab); 4.405 + } 4.406 + } 4.407 + 4.408 +#endif /* __x86_64__ */ 4.409 + 4.410 + /* Set up shared-info area. */ 4.411 + update_dom_time(d); 4.412 + d->shared_info->domain_time = 0; 4.413 + /* Mask all upcalls... */ 4.414 + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) 4.415 + d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; 4.416 + d->shared_info->n_vcpu = smp_num_cpus; 4.417 + 4.418 + /* Set up shadow and monitor tables. */ 4.419 + update_pagetables(ed); 4.420 + 4.421 + /* Install the new page tables. */ 4.422 + __cli(); 4.423 + write_ptbase(ed); 4.424 + 4.425 + /* Copy the OS image and free temporary buffer. */ 4.426 + (void)loadelfimage(image_start); 4.427 + init_domheap_pages( 4.428 + _image_start, (_image_start+image_len+PAGE_SIZE-1) & PAGE_MASK); 4.429 + 4.430 + /* Copy the initial ramdisk and free temporary buffer. */ 4.431 + if ( initrd_len != 0 ) 4.432 + { 4.433 + memcpy((void *)vinitrd_start, initrd_start, initrd_len); 4.434 + init_domheap_pages( 4.435 + _initrd_start, (_initrd_start+initrd_len+PAGE_SIZE-1) & PAGE_MASK); 4.436 + } 4.437 + 4.438 + /* Set up start info area. */ 4.439 + si = (start_info_t *)vstartinfo_start; 4.440 + memset(si, 0, PAGE_SIZE); 4.441 + si->nr_pages = nr_pages; 4.442 + si->shared_info = virt_to_phys(d->shared_info); 4.443 + si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; 4.444 + si->pt_base = vpt_start; 4.445 + si->nr_pt_frames = nr_pt_pages; 4.446 + si->mfn_list = vphysmap_start; 4.447 + 4.448 + /* Write the phys->machine and machine->phys table entries. */ 4.449 + for ( pfn = 0; pfn < d->tot_pages; pfn++ ) 4.450 + { 4.451 + mfn = pfn + (alloc_start>>PAGE_SHIFT); 4.452 +#ifndef NDEBUG 4.453 +#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT) 4.454 + if ( pfn > REVERSE_START ) 4.455 + mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START); 4.456 +#endif 4.457 + ((u32 *)vphysmap_start)[pfn] = mfn; 4.458 + machine_to_phys_mapping[mfn] = pfn; 4.459 + } 4.460 + while ( pfn < nr_pages ) 4.461 + { 4.462 + if ( (page = alloc_largest(d, nr_pages - d->tot_pages)) == NULL ) 4.463 + panic("Not enough RAM for DOM0 reservation.\n"); 4.464 + while ( pfn < d->tot_pages ) 4.465 + { 4.466 + mfn = page_to_pfn(page); 4.467 +#ifndef NDEBUG 4.468 +#define pfn (nr_pages - 1 - (pfn - ((alloc_end - alloc_start) >> PAGE_SHIFT))) 4.469 +#endif 4.470 + ((u32 *)vphysmap_start)[pfn] = mfn; 4.471 + machine_to_phys_mapping[mfn] = pfn; 4.472 +#undef pfn 4.473 + page++; pfn++; 4.474 + } 4.475 + } 4.476 + 4.477 + if ( initrd_len != 0 ) 4.478 + { 4.479 + si->mod_start = vinitrd_start; 4.480 + si->mod_len = initrd_len; 4.481 + printk("Initrd len 0x%lx, start at 0x%p\n", 4.482 + si->mod_len, si->mod_start); 4.483 + } 4.484 + 4.485 + dst = si->cmd_line; 4.486 + if ( cmdline != NULL ) 4.487 + { 4.488 + for ( i = 0; i < 255; i++ ) 4.489 + { 4.490 + if ( cmdline[i] == '\0' ) 4.491 + break; 4.492 + *dst++ = cmdline[i]; 4.493 + } 4.494 + } 4.495 + *dst = '\0'; 4.496 + 4.497 + /* Reinstate the caller's page tables. */ 4.498 + write_ptbase(current); 4.499 + __sti(); 4.500 + 4.501 +#if defined(__i386__) 4.502 + /* Destroy low mappings - they were only for our convenience. */ 4.503 + for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) 4.504 + if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE ) 4.505 + l2start[i] = mk_l2_pgentry(0); 4.506 + zap_low_mappings(); /* Do the same for the idle page tables. */ 4.507 +#endif 4.508 + 4.509 + /* DOM0 gets access to everything. */ 4.510 + physdev_init_dom0(d); 4.511 + 4.512 + set_bit(DF_CONSTRUCTED, &d->d_flags); 4.513 + 4.514 + new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start); 4.515 + 4.516 + return 0; 4.517 +} 4.518 + 4.519 +int elf_sanity_check(Elf_Ehdr *ehdr) 4.520 +{ 4.521 + if ( !IS_ELF(*ehdr) || 4.522 +#if defined(__i386__) 4.523 + (ehdr->e_ident[EI_CLASS] != ELFCLASS32) || 4.524 + (ehdr->e_machine != EM_386) || 4.525 +#elif defined(__x86_64__) 4.526 + (ehdr->e_ident[EI_CLASS] != ELFCLASS64) || 4.527 + (ehdr->e_machine != EM_X86_64) || 4.528 +#endif 4.529 + (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) || 4.530 + (ehdr->e_type != ET_EXEC) ) 4.531 + { 4.532 + printk("DOM0 image is not a Xen-compatible Elf image.\n"); 4.533 + return 0; 4.534 + } 4.535 + 4.536 + return 1; 4.537 +} 4.538 + 4.539 +/* 4.540 + * Local variables: 4.541 + * mode: C 4.542 + * c-set-style: "BSD" 4.543 + * c-basic-offset: 4 4.544 + * tab-width: 4 4.545 + * indent-tabs-mode: nil 4.546 + */
5.1 --- a/xen/arch/x86/setup.c Fri Feb 25 17:06:27 2005 +0000 5.2 +++ b/xen/arch/x86/setup.c Fri Feb 25 18:37:31 2005 +0000 5.3 @@ -20,10 +20,6 @@ 5.4 #include <asm/shadow.h> 5.5 #include <asm/e820.h> 5.6 5.7 -/* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */ 5.8 -static unsigned int opt_dom0_mem = 16000; 5.9 -integer_param("dom0_mem", opt_dom0_mem); 5.10 - 5.11 /* 5.12 * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the 5.13 * pfn_info table and allocation bitmap. 5.14 @@ -463,7 +459,6 @@ void __init __start_xen(multiboot_info_t 5.15 module_t *mod = (module_t *)__va(mbi->mods_addr); 5.16 void *heap_start; 5.17 unsigned long firsthole_start, nr_pages; 5.18 - unsigned long dom0_memory_start, dom0_memory_end; 5.19 unsigned long initial_images_start, initial_images_end; 5.20 struct e820entry e820_raw[E820MAX]; 5.21 int i, e820_raw_nr = 0, bytes = 0; 5.22 @@ -567,15 +562,6 @@ void __init __start_xen(multiboot_info_t 5.23 nr_pages >> (20 - PAGE_SHIFT), 5.24 nr_pages << (PAGE_SHIFT - 10)); 5.25 5.26 - /* Allocate an aligned chunk of RAM for DOM0. */ 5.27 - dom0_memory_start = alloc_boot_pages(opt_dom0_mem << 10, 4UL << 20); 5.28 - dom0_memory_end = dom0_memory_start + (opt_dom0_mem << 10); 5.29 - if ( dom0_memory_start == 0 ) 5.30 - { 5.31 - printk("Not enough memory for DOM0 memory reservation.\n"); 5.32 - for ( ; ; ) ; 5.33 - } 5.34 - 5.35 init_frametable(); 5.36 5.37 end_boot_allocator(); 5.38 @@ -613,7 +599,7 @@ void __init __start_xen(multiboot_info_t 5.39 * We're going to setup domain0 using the module(s) that we stashed safely 5.40 * above our heap. The second module, if present, is an initrd ramdisk. 5.41 */ 5.42 - if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end, 5.43 + if ( construct_dom0(dom0, 5.44 initial_images_start, 5.45 mod[0].mod_end-mod[0].mod_start, 5.46 (mbi->mods_count == 1) ? 0 : 5.47 @@ -624,9 +610,7 @@ void __init __start_xen(multiboot_info_t 5.48 cmdline) != 0) 5.49 panic("Could not set up DOM0 guest OS\n"); 5.50 5.51 - /* The stash space for the initial kernel image can now be freed up. */ 5.52 - init_domheap_pages(initial_images_start, initial_images_end); 5.53 - 5.54 + /* Scrub RAM that is still free and so may go to an unprivileged domain. */ 5.55 scrub_heap_pages(); 5.56 5.57 init_trace_bufs();
6.1 --- a/xen/arch/x86/x86_32/domain_build.c Fri Feb 25 17:06:27 2005 +0000 6.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 6.3 @@ -1,416 +0,0 @@ 6.4 -/****************************************************************************** 6.5 - * domain_build.c 6.6 - * 6.7 - * Copyright (c) 2002-2005, K A Fraser 6.8 - */ 6.9 - 6.10 -#include <xen/config.h> 6.11 -#include <xen/init.h> 6.12 -#include <xen/lib.h> 6.13 -#include <xen/sched.h> 6.14 -#include <xen/smp.h> 6.15 -#include <xen/delay.h> 6.16 -#include <asm/regs.h> 6.17 -#include <asm/system.h> 6.18 -#include <asm/io.h> 6.19 -#include <asm/processor.h> 6.20 -#include <asm/desc.h> 6.21 -#include <asm/i387.h> 6.22 -#include <xen/event.h> 6.23 -#include <xen/elf.h> 6.24 -#include <xen/kernel.h> 6.25 -#include <asm/shadow.h> 6.26 - 6.27 -/* No ring-3 access in initial page tables. */ 6.28 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) 6.29 -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) 6.30 - 6.31 -#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) 6.32 -#define round_pgdown(_p) ((_p)&PAGE_MASK) 6.33 - 6.34 -int construct_dom0(struct domain *d, 6.35 - unsigned long alloc_start, 6.36 - unsigned long alloc_end, 6.37 - unsigned long _image_start, unsigned long image_len, 6.38 - unsigned long _initrd_start, unsigned long initrd_len, 6.39 - char *cmdline) 6.40 -{ 6.41 - char *dst; 6.42 - int i, rc; 6.43 - unsigned long pfn, mfn; 6.44 - unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT; 6.45 - unsigned long nr_pt_pages; 6.46 - unsigned long count; 6.47 - l2_pgentry_t *l2tab, *l2start; 6.48 - l1_pgentry_t *l1tab = NULL, *l1start = NULL; 6.49 - struct pfn_info *page = NULL; 6.50 - start_info_t *si; 6.51 - struct exec_domain *ed = d->exec_domain[0]; 6.52 - char *image_start = (char *)_image_start; /* use lowmem mappings */ 6.53 - char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */ 6.54 - 6.55 - /* 6.56 - * This fully describes the memory layout of the initial domain. All 6.57 - * *_start address are page-aligned, except v_start (and v_end) which are 6.58 - * superpage-aligned. 6.59 - */ 6.60 - struct domain_setup_info dsi; 6.61 - unsigned long vinitrd_start; 6.62 - unsigned long vinitrd_end; 6.63 - unsigned long vphysmap_start; 6.64 - unsigned long vphysmap_end; 6.65 - unsigned long vstartinfo_start; 6.66 - unsigned long vstartinfo_end; 6.67 - unsigned long vstack_start; 6.68 - unsigned long vstack_end; 6.69 - unsigned long vpt_start; 6.70 - unsigned long vpt_end; 6.71 - unsigned long v_end; 6.72 - 6.73 - /* Machine address of next candidate page-table page. */ 6.74 - unsigned long mpt_alloc; 6.75 - 6.76 - extern void physdev_init_dom0(struct domain *); 6.77 - 6.78 - /* Sanity! */ 6.79 - if ( d->id != 0 ) 6.80 - BUG(); 6.81 - if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 6.82 - BUG(); 6.83 - 6.84 - memset(&dsi, 0, sizeof(struct domain_setup_info)); 6.85 - 6.86 - printk("*** LOADING DOMAIN 0 ***\n"); 6.87 - 6.88 - /* 6.89 - * This is all a bit grim. We've moved the modules to the "safe" physical 6.90 - * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this 6.91 - * routine we're going to copy it down into the region that's actually 6.92 - * been allocated to domain 0. This is highly likely to be overlapping, so 6.93 - * we use a forward copy. 6.94 - * 6.95 - * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with 6.96 - * 4GB and lots of network/disk cards that allocate loads of buffers. 6.97 - * We'll have to revisit this if we ever support PAE (64GB). 6.98 - */ 6.99 - 6.100 - rc = parseelfimage(image_start, image_len, &dsi); 6.101 - if ( rc != 0 ) 6.102 - return rc; 6.103 - 6.104 - /* Set up domain options */ 6.105 - if ( dsi.use_writable_pagetables ) 6.106 - vm_assist(d, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 6.107 - 6.108 - /* Align load address to 4MB boundary. */ 6.109 - dsi.v_start &= ~((1UL<<22)-1); 6.110 - 6.111 - /* 6.112 - * Why do we need this? The number of page-table frames depends on the 6.113 - * size of the bootstrap address space. But the size of the address space 6.114 - * depends on the number of page-table frames (since each one is mapped 6.115 - * read-only). We have a pair of simultaneous equations in two unknowns, 6.116 - * which we solve by exhaustive search. 6.117 - */ 6.118 - vinitrd_start = round_pgup(dsi.v_kernend); 6.119 - vinitrd_end = vinitrd_start + initrd_len; 6.120 - vphysmap_start = round_pgup(vinitrd_end); 6.121 - vphysmap_end = vphysmap_start + (nr_pages * sizeof(u32)); 6.122 - vpt_start = round_pgup(vphysmap_end); 6.123 - for ( nr_pt_pages = 2; ; nr_pt_pages++ ) 6.124 - { 6.125 - vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); 6.126 - vstartinfo_start = vpt_end; 6.127 - vstartinfo_end = vstartinfo_start + PAGE_SIZE; 6.128 - vstack_start = vstartinfo_end; 6.129 - vstack_end = vstack_start + PAGE_SIZE; 6.130 - v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1); 6.131 - if ( (v_end - vstack_end) < (512UL << 10) ) 6.132 - v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */ 6.133 - if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> 6.134 - L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) 6.135 - break; 6.136 - } 6.137 - 6.138 - printk("PHYSICAL MEMORY ARRANGEMENT:\n" 6.139 - " Kernel image: %p->%p\n" 6.140 - " Initrd image: %p->%p\n" 6.141 - " Dom0 alloc.: %p->%p\n", 6.142 - _image_start, _image_start + image_len, 6.143 - _initrd_start, _initrd_start + initrd_len, 6.144 - alloc_start, alloc_end); 6.145 - printk("VIRTUAL MEMORY ARRANGEMENT:\n" 6.146 - " Loaded kernel: %p->%p\n" 6.147 - " Init. ramdisk: %p->%p\n" 6.148 - " Phys-Mach map: %p->%p\n" 6.149 - " Page tables: %p->%p\n" 6.150 - " Start info: %p->%p\n" 6.151 - " Boot stack: %p->%p\n" 6.152 - " TOTAL: %p->%p\n", 6.153 - dsi.v_kernstart, dsi.v_kernend, 6.154 - vinitrd_start, vinitrd_end, 6.155 - vphysmap_start, vphysmap_end, 6.156 - vpt_start, vpt_end, 6.157 - vstartinfo_start, vstartinfo_end, 6.158 - vstack_start, vstack_end, 6.159 - dsi.v_start, v_end); 6.160 - printk(" ENTRY ADDRESS: %p\n", dsi.v_kernentry); 6.161 - 6.162 - if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) ) 6.163 - { 6.164 - printk("Initial guest OS requires too much space\n" 6.165 - "(%luMB is greater than %luMB limit)\n", 6.166 - (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20); 6.167 - return -ENOMEM; 6.168 - } 6.169 - 6.170 - /* 6.171 - * Protect the lowest 1GB of memory. We use a temporary mapping there 6.172 - * from which we copy the kernel and ramdisk images. 6.173 - */ 6.174 - if ( dsi.v_start < (1UL<<30) ) 6.175 - { 6.176 - printk("Initial loading isn't allowed to lowest 1GB of memory.\n"); 6.177 - return -EINVAL; 6.178 - } 6.179 - 6.180 - /* Paranoia: scrub DOM0's memory allocation. */ 6.181 - printk("Scrubbing DOM0 RAM: "); 6.182 - dst = (char *)alloc_start; 6.183 - while ( dst < (char *)alloc_end ) 6.184 - { 6.185 -#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */ 6.186 - printk("."); 6.187 - touch_nmi_watchdog(); 6.188 - if ( ((char *)alloc_end - dst) > SCRUB_BYTES ) 6.189 - { 6.190 - memset(dst, 0, SCRUB_BYTES); 6.191 - dst += SCRUB_BYTES; 6.192 - } 6.193 - else 6.194 - { 6.195 - memset(dst, 0, (char *)alloc_end - dst); 6.196 - break; 6.197 - } 6.198 - } 6.199 - printk("done.\n"); 6.200 - 6.201 - /* Construct a frame-allocation list for the initial domain. */ 6.202 - for ( mfn = (alloc_start>>PAGE_SHIFT); 6.203 - mfn < (alloc_end>>PAGE_SHIFT); 6.204 - mfn++ ) 6.205 - { 6.206 - page = &frame_table[mfn]; 6.207 - page_set_owner(page, d); 6.208 - page->u.inuse.type_info = 0; 6.209 - page->count_info = PGC_allocated | 1; 6.210 - list_add_tail(&page->list, &d->page_list); 6.211 - d->tot_pages++; d->max_pages++; 6.212 - } 6.213 - 6.214 - mpt_alloc = (vpt_start - dsi.v_start) + alloc_start; 6.215 - 6.216 - SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES); 6.217 - SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS); 6.218 - 6.219 - /* 6.220 - * We're basically forcing default RPLs to 1, so that our "what privilege 6.221 - * level are we returning to?" logic works. 6.222 - */ 6.223 - ed->arch.failsafe_selector = FLAT_KERNEL_CS; 6.224 - ed->arch.event_selector = FLAT_KERNEL_CS; 6.225 - ed->arch.kernel_ss = FLAT_KERNEL_SS; 6.226 - for ( i = 0; i < 256; i++ ) 6.227 - ed->arch.traps[i].cs = FLAT_KERNEL_CS; 6.228 - 6.229 - /* WARNING: The new domain must have its 'processor' field filled in! */ 6.230 - l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; 6.231 - memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE); 6.232 - l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = 6.233 - mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR); 6.234 - l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = 6.235 - mk_l2_pgentry(__pa(d->arch.mm_perdomain_pt) | __PAGE_HYPERVISOR); 6.236 - ed->arch.guest_table = mk_pagetable((unsigned long)l2start); 6.237 - 6.238 - l2tab += l2_table_offset(dsi.v_start); 6.239 - mfn = alloc_start >> PAGE_SHIFT; 6.240 - for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) 6.241 - { 6.242 - if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) 6.243 - { 6.244 - l1start = l1tab = (l1_pgentry_t *)mpt_alloc; 6.245 - mpt_alloc += PAGE_SIZE; 6.246 - *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT); 6.247 - clear_page(l1tab); 6.248 - if ( count == 0 ) 6.249 - l1tab += l1_table_offset(dsi.v_start); 6.250 - } 6.251 - *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT); 6.252 - 6.253 - page = &frame_table[mfn]; 6.254 - if ( !get_page_and_type(page, d, PGT_writable_page) ) 6.255 - BUG(); 6.256 - 6.257 - mfn++; 6.258 - } 6.259 - 6.260 - /* Pages that are part of page tables must be read only. */ 6.261 - l2tab = l2start + l2_table_offset(vpt_start); 6.262 - l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab); 6.263 - l1tab += l1_table_offset(vpt_start); 6.264 - for ( count = 0; count < nr_pt_pages; count++ ) 6.265 - { 6.266 - *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW); 6.267 - page = &frame_table[l1_pgentry_to_pfn(*l1tab)]; 6.268 - if ( count == 0 ) 6.269 - { 6.270 - page->u.inuse.type_info &= ~PGT_type_mask; 6.271 - page->u.inuse.type_info |= PGT_l2_page_table; 6.272 - 6.273 - /* 6.274 - * No longer writable: decrement the type_count. 6.275 - * Installed as CR3: increment both the ref_count and type_count. 6.276 - * Net: just increment the ref_count. 6.277 - */ 6.278 - get_page(page, d); /* an extra ref because of readable mapping */ 6.279 - 6.280 - /* Get another ref to L2 page so that it can be pinned. */ 6.281 - if ( !get_page_and_type(page, d, PGT_l2_page_table) ) 6.282 - BUG(); 6.283 - set_bit(_PGT_pinned, &page->u.inuse.type_info); 6.284 - } 6.285 - else 6.286 - { 6.287 - page->u.inuse.type_info &= ~PGT_type_mask; 6.288 - page->u.inuse.type_info |= PGT_l1_page_table; 6.289 - page->u.inuse.type_info |= 6.290 - ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift; 6.291 - 6.292 - /* 6.293 - * No longer writable: decrement the type_count. 6.294 - * This is an L1 page, installed in a validated L2 page: 6.295 - * increment both the ref_count and type_count. 6.296 - * Net: just increment the ref_count. 6.297 - */ 6.298 - get_page(page, d); /* an extra ref because of readable mapping */ 6.299 - } 6.300 - if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) ) 6.301 - l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*++l2tab); 6.302 - } 6.303 - 6.304 - /* Set up shared-info area. */ 6.305 - update_dom_time(d); 6.306 - d->shared_info->domain_time = 0; 6.307 - /* Mask all upcalls... */ 6.308 - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) 6.309 - d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; 6.310 - d->shared_info->n_vcpu = smp_num_cpus; 6.311 - 6.312 - /* setup shadow and monitor tables */ 6.313 - update_pagetables(ed); 6.314 - 6.315 - /* Install the new page tables. */ 6.316 - __cli(); 6.317 - write_ptbase(ed); 6.318 - 6.319 - /* Copy the OS image. */ 6.320 - (void)loadelfimage(image_start); 6.321 - 6.322 - /* Copy the initial ramdisk. */ 6.323 - if ( initrd_len != 0 ) 6.324 - memcpy((void *)vinitrd_start, initrd_start, initrd_len); 6.325 - 6.326 - /* Set up start info area. */ 6.327 - si = (start_info_t *)vstartinfo_start; 6.328 - memset(si, 0, PAGE_SIZE); 6.329 - si->nr_pages = d->tot_pages; 6.330 - si->shared_info = virt_to_phys(d->shared_info); 6.331 - si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; 6.332 - si->pt_base = vpt_start; 6.333 - si->nr_pt_frames = nr_pt_pages; 6.334 - si->mfn_list = vphysmap_start; 6.335 - 6.336 - /* Write the phys->machine and machine->phys table entries. */ 6.337 - for ( pfn = 0; pfn < d->tot_pages; pfn++ ) 6.338 - { 6.339 - mfn = pfn + (alloc_start>>PAGE_SHIFT); 6.340 -#ifndef NDEBUG 6.341 -#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT) 6.342 - if ( pfn > REVERSE_START ) 6.343 - mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START); 6.344 -#endif 6.345 - ((u32 *)vphysmap_start)[pfn] = mfn; 6.346 - machine_to_phys_mapping[mfn] = pfn; 6.347 - } 6.348 - 6.349 - if ( initrd_len != 0 ) 6.350 - { 6.351 - si->mod_start = vinitrd_start; 6.352 - si->mod_len = initrd_len; 6.353 - printk("Initrd len 0x%lx, start at 0x%p\n", 6.354 - si->mod_len, si->mod_start); 6.355 - } 6.356 - 6.357 - dst = si->cmd_line; 6.358 - if ( cmdline != NULL ) 6.359 - { 6.360 - for ( i = 0; i < 255; i++ ) 6.361 - { 6.362 - if ( cmdline[i] == '\0' ) 6.363 - break; 6.364 - *dst++ = cmdline[i]; 6.365 - } 6.366 - } 6.367 - *dst = '\0'; 6.368 - 6.369 - /* Reinstate the caller's page tables. */ 6.370 - write_ptbase(current); 6.371 - __sti(); 6.372 - 6.373 - /* Destroy low mappings - they were only for our convenience. */ 6.374 - for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) 6.375 - if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE ) 6.376 - l2start[i] = mk_l2_pgentry(0); 6.377 - zap_low_mappings(); /* Do the same for the idle page tables. */ 6.378 - 6.379 - /* DOM0 gets access to everything. */ 6.380 - physdev_init_dom0(d); 6.381 - 6.382 - set_bit(DF_CONSTRUCTED, &d->d_flags); 6.383 - 6.384 - new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start); 6.385 - 6.386 -#ifndef NDEBUG 6.387 - if (0) /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */ 6.388 - { 6.389 - shadow_mode_enable(d, SHM_enable); 6.390 - update_pagetables(ed); /* XXX SMP */ 6.391 - } 6.392 -#endif 6.393 - 6.394 - return 0; 6.395 -} 6.396 - 6.397 -int elf_sanity_check(Elf_Ehdr *ehdr) 6.398 -{ 6.399 - if ( !IS_ELF(*ehdr) || 6.400 - (ehdr->e_ident[EI_CLASS] != ELFCLASS32) || 6.401 - (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) || 6.402 - (ehdr->e_type != ET_EXEC) || 6.403 - (ehdr->e_machine != EM_386) ) 6.404 - { 6.405 - printk("DOM0 image is not i386-compatible executable Elf image.\n"); 6.406 - return 0; 6.407 - } 6.408 - 6.409 - return 1; 6.410 -} 6.411 - 6.412 -/* 6.413 - * Local variables: 6.414 - * mode: C 6.415 - * c-set-style: "BSD" 6.416 - * c-basic-offset: 4 6.417 - * tab-width: 4 6.418 - * indent-tabs-mode: nil 6.419 - */
7.1 --- a/xen/arch/x86/x86_32/domain_page.c Fri Feb 25 17:06:27 2005 +0000 7.2 +++ b/xen/arch/x86/x86_32/domain_page.c Fri Feb 25 18:37:31 2005 +0000 7.3 @@ -45,7 +45,7 @@ void *map_domain_mem(unsigned long pa) 7.4 unsigned int idx, cpu = smp_processor_id(); 7.5 unsigned long *cache = mapcache; 7.6 #ifndef NDEBUG 7.7 - unsigned flush_count = 0; 7.8 + unsigned int flush_count = 0; 7.9 #endif 7.10 7.11 ASSERT(!in_irq()); 7.12 @@ -65,17 +65,11 @@ void *map_domain_mem(unsigned long pa) 7.13 idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1); 7.14 if ( unlikely(idx == 0) ) 7.15 { 7.16 + ASSERT(flush_count++ == 0); 7.17 flush_all_ready_maps(); 7.18 perfc_incrc(domain_page_tlb_flush); 7.19 local_flush_tlb(); 7.20 shadow_epoch[cpu] = ++epoch; 7.21 -#ifndef NDEBUG 7.22 - if ( unlikely(flush_count++) ) 7.23 - { 7.24 - // we've run out of map cache entries... 7.25 - BUG(); 7.26 - } 7.27 -#endif 7.28 } 7.29 } 7.30 while ( cache[idx] != 0 );
8.1 --- a/xen/arch/x86/x86_64/domain_build.c Fri Feb 25 17:06:27 2005 +0000 8.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 8.3 @@ -1,424 +0,0 @@ 8.4 -/****************************************************************************** 8.5 - * domain_build.c 8.6 - * 8.7 - * Copyright (c) 2002-2005, K A Fraser 8.8 - */ 8.9 - 8.10 -#include <xen/config.h> 8.11 -#include <xen/init.h> 8.12 -#include <xen/lib.h> 8.13 -#include <xen/sched.h> 8.14 -#include <xen/smp.h> 8.15 -#include <xen/delay.h> 8.16 -#include <asm/regs.h> 8.17 -#include <asm/system.h> 8.18 -#include <asm/io.h> 8.19 -#include <asm/processor.h> 8.20 -#include <asm/shadow.h> 8.21 -#include <asm/desc.h> 8.22 -#include <asm/i387.h> 8.23 -#include <xen/event.h> 8.24 -#include <xen/elf.h> 8.25 -#include <xen/kernel.h> 8.26 - 8.27 -/* Allow ring-3 access in long mode as guest cannot use ring 1. */ 8.28 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) 8.29 -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) 8.30 -#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) 8.31 -#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) 8.32 - 8.33 -#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) 8.34 -#define round_pgdown(_p) ((_p)&PAGE_MASK) 8.35 - 8.36 -int construct_dom0(struct domain *d, 8.37 - unsigned long alloc_start, 8.38 - unsigned long alloc_end, 8.39 - unsigned long _image_start, unsigned long image_len, 8.40 - unsigned long _initrd_start, unsigned long initrd_len, 8.41 - char *cmdline) 8.42 -{ 8.43 - char *dst; 8.44 - int i, rc; 8.45 - unsigned long pfn, mfn; 8.46 - unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT; 8.47 - unsigned long nr_pt_pages; 8.48 - unsigned long count; 8.49 - l4_pgentry_t *l4tab = NULL, *l4start = NULL; 8.50 - l3_pgentry_t *l3tab = NULL, *l3start = NULL; 8.51 - l2_pgentry_t *l2tab = NULL, *l2start = NULL; 8.52 - l1_pgentry_t *l1tab = NULL, *l1start = NULL; 8.53 - struct pfn_info *page = NULL; 8.54 - start_info_t *si; 8.55 - struct exec_domain *ed = d->exec_domain[0]; 8.56 - char *image_start = __va(_image_start); 8.57 - char *initrd_start = __va(_initrd_start); 8.58 - 8.59 - /* 8.60 - * This fully describes the memory layout of the initial domain. All 8.61 - * *_start address are page-aligned, except v_start (and v_end) which are 8.62 - * superpage-aligned. 8.63 - */ 8.64 - struct domain_setup_info dsi; 8.65 - unsigned long vinitrd_start; 8.66 - unsigned long vinitrd_end; 8.67 - unsigned long vphysmap_start; 8.68 - unsigned long vphysmap_end; 8.69 - unsigned long vstartinfo_start; 8.70 - unsigned long vstartinfo_end; 8.71 - unsigned long vstack_start; 8.72 - unsigned long vstack_end; 8.73 - unsigned long vpt_start; 8.74 - unsigned long vpt_end; 8.75 - unsigned long v_end; 8.76 - 8.77 - /* Machine address of next candidate page-table page. */ 8.78 - unsigned long mpt_alloc; 8.79 - 8.80 - extern void physdev_init_dom0(struct domain *); 8.81 - 8.82 - /* Sanity! */ 8.83 - if ( d->id != 0 ) 8.84 - BUG(); 8.85 - if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 8.86 - BUG(); 8.87 - 8.88 - memset(&dsi, 0, sizeof(struct domain_setup_info)); 8.89 - 8.90 - printk("*** LOADING DOMAIN 0 ***\n"); 8.91 - 8.92 - /* 8.93 - * This is all a bit grim. We've moved the modules to the "safe" physical 8.94 - * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this 8.95 - * routine we're going to copy it down into the region that's actually 8.96 - * been allocated to domain 0. This is highly likely to be overlapping, so 8.97 - * we use a forward copy. 8.98 - * 8.99 - * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with 8.100 - * 4GB and lots of network/disk cards that allocate loads of buffers. 8.101 - * We'll have to revisit this if we ever support PAE (64GB). 8.102 - */ 8.103 - 8.104 - rc = parseelfimage(image_start, image_len, &dsi); 8.105 - if ( rc != 0 ) 8.106 - return rc; 8.107 - 8.108 - /* Set up domain options */ 8.109 - if ( dsi.use_writable_pagetables ) 8.110 - vm_assist(d, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 8.111 - 8.112 - /* Align load address to 4MB boundary. */ 8.113 - dsi.v_start &= ~((1UL<<22)-1); 8.114 - 8.115 - /* 8.116 - * Why do we need this? The number of page-table frames depends on the 8.117 - * size of the bootstrap address space. But the size of the address space 8.118 - * depends on the number of page-table frames (since each one is mapped 8.119 - * read-only). We have a pair of simultaneous equations in two unknowns, 8.120 - * which we solve by exhaustive search. 8.121 - */ 8.122 - vinitrd_start = round_pgup(dsi.v_kernend); 8.123 - vinitrd_end = vinitrd_start + initrd_len; 8.124 - vphysmap_start = round_pgup(vinitrd_end); 8.125 - vphysmap_end = vphysmap_start + (nr_pages * sizeof(u32)); 8.126 - vpt_start = round_pgup(vphysmap_end); 8.127 - for ( nr_pt_pages = 2; ; nr_pt_pages++ ) 8.128 - { 8.129 - vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); 8.130 - vstartinfo_start = vpt_end; 8.131 - vstartinfo_end = vstartinfo_start + PAGE_SIZE; 8.132 - vstack_start = vstartinfo_end; 8.133 - vstack_end = vstack_start + PAGE_SIZE; 8.134 - v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1); 8.135 - if ( (v_end - vstack_end) < (512UL << 10) ) 8.136 - v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */ 8.137 -#define NR(_l,_h,_s) \ 8.138 - (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \ 8.139 - ((_l) & ~((1UL<<(_s))-1))) >> (_s)) 8.140 - if ( (1 + /* # L4 */ 8.141 - NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */ 8.142 - NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */ 8.143 - NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */ 8.144 - <= nr_pt_pages ) 8.145 - break; 8.146 - } 8.147 - 8.148 - printk("PHYSICAL MEMORY ARRANGEMENT:\n" 8.149 - " Kernel image: %p->%p\n" 8.150 - " Initrd image: %p->%p\n" 8.151 - " Dom0 alloc.: %p->%p\n", 8.152 - _image_start, _image_start + image_len, 8.153 - _initrd_start, _initrd_start + initrd_len, 8.154 - alloc_start, alloc_end); 8.155 - printk("VIRTUAL MEMORY ARRANGEMENT:\n" 8.156 - " Loaded kernel: %p->%p\n" 8.157 - " Init. ramdisk: %p->%p\n" 8.158 - " Phys-Mach map: %p->%p\n" 8.159 - " Page tables: %p->%p\n" 8.160 - " Start info: %p->%p\n" 8.161 - " Boot stack: %p->%p\n" 8.162 - " TOTAL: %p->%p\n", 8.163 - dsi.v_kernstart, dsi.v_kernend, 8.164 - vinitrd_start, vinitrd_end, 8.165 - vphysmap_start, vphysmap_end, 8.166 - vpt_start, vpt_end, 8.167 - vstartinfo_start, vstartinfo_end, 8.168 - vstack_start, vstack_end, 8.169 - dsi.v_start, v_end); 8.170 - printk(" ENTRY ADDRESS: %p\n", dsi.v_kernentry); 8.171 - 8.172 - if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) ) 8.173 - { 8.174 - printk("Initial guest OS requires too much space\n" 8.175 - "(%luMB is greater than %luMB limit)\n", 8.176 - (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20); 8.177 - return -ENOMEM; 8.178 - } 8.179 - 8.180 - /* Overlap with Xen protected area? */ 8.181 - if ( (dsi.v_start < HYPERVISOR_VIRT_END) && 8.182 - (v_end > HYPERVISOR_VIRT_START) ) 8.183 - { 8.184 - printk("DOM0 image overlaps with Xen private area.\n"); 8.185 - return -EINVAL; 8.186 - } 8.187 - 8.188 - /* Paranoia: scrub DOM0's memory allocation. */ 8.189 - printk("Scrubbing DOM0 RAM: "); 8.190 - dst = __va(alloc_start); 8.191 - while ( __pa(dst) < alloc_end ) 8.192 - { 8.193 -#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */ 8.194 - printk("."); 8.195 - touch_nmi_watchdog(); 8.196 - if ( (alloc_end - __pa(dst)) > SCRUB_BYTES ) 8.197 - { 8.198 - memset(dst, 0, SCRUB_BYTES); 8.199 - dst += SCRUB_BYTES; 8.200 - } 8.201 - else 8.202 - { 8.203 - memset(dst, 0, alloc_end - __pa(dst)); 8.204 - break; 8.205 - } 8.206 - } 8.207 - printk("done.\n"); 8.208 - 8.209 - /* Construct a frame-allocation list for the initial domain. */ 8.210 - for ( mfn = (alloc_start>>PAGE_SHIFT); 8.211 - mfn < (alloc_end>>PAGE_SHIFT); 8.212 - mfn++ ) 8.213 - { 8.214 - page = &frame_table[mfn]; 8.215 - page_set_owner(page, d); 8.216 - page->u.inuse.type_info = 0; 8.217 - page->count_info = PGC_allocated | 1; 8.218 - list_add_tail(&page->list, &d->page_list); 8.219 - d->tot_pages++; d->max_pages++; 8.220 - } 8.221 - 8.222 - mpt_alloc = (vpt_start - dsi.v_start) + alloc_start; 8.223 - 8.224 - SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES); 8.225 - SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS); 8.226 - 8.227 - /* 8.228 - * We're basically forcing default RPLs to 1, so that our "what privilege 8.229 - * level are we returning to?" logic works. 8.230 - */ 8.231 - ed->arch.failsafe_selector = FLAT_KERNEL_CS; 8.232 - ed->arch.event_selector = FLAT_KERNEL_CS; 8.233 - ed->arch.kernel_ss = FLAT_KERNEL_SS; 8.234 - for ( i = 0; i < 256; i++ ) 8.235 - ed->arch.traps[i].cs = FLAT_KERNEL_CS; 8.236 - 8.237 - /* WARNING: The new domain must have its 'processor' field filled in! */ 8.238 - phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table; 8.239 - l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; 8.240 - memcpy(l4tab, &idle_pg_table[0], PAGE_SIZE); 8.241 - l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = 8.242 - mk_l4_pgentry(__pa(l4start) | __PAGE_HYPERVISOR); 8.243 - l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = 8.244 - mk_l4_pgentry(__pa(d->arch.mm_perdomain_l3) | __PAGE_HYPERVISOR); 8.245 - ed->arch.guest_table = mk_pagetable(__pa(l4start)); 8.246 - 8.247 - l4tab += l4_table_offset(dsi.v_start); 8.248 - mfn = alloc_start >> PAGE_SHIFT; 8.249 - for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) 8.250 - { 8.251 - if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) 8.252 - { 8.253 - phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l1_page_table; 8.254 - l1start = l1tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; 8.255 - clear_page(l1tab); 8.256 - if ( count == 0 ) 8.257 - l1tab += l1_table_offset(dsi.v_start); 8.258 - if ( !((unsigned long)l2tab & (PAGE_SIZE-1)) ) 8.259 - { 8.260 - phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table; 8.261 - l2start = l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; 8.262 - clear_page(l2tab); 8.263 - if ( count == 0 ) 8.264 - l2tab += l2_table_offset(dsi.v_start); 8.265 - if ( !((unsigned long)l3tab & (PAGE_SIZE-1)) ) 8.266 - { 8.267 - phys_to_page(mpt_alloc)->u.inuse.type_info = 8.268 - PGT_l3_page_table; 8.269 - l3start = l3tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; 8.270 - clear_page(l3tab); 8.271 - if ( count == 0 ) 8.272 - l3tab += l3_table_offset(dsi.v_start); 8.273 - *l4tab++ = mk_l4_pgentry(__pa(l3start) | L4_PROT); 8.274 - } 8.275 - *l3tab++ = mk_l3_pgentry(__pa(l2start) | L3_PROT); 8.276 - } 8.277 - *l2tab++ = mk_l2_pgentry(__pa(l1start) | L2_PROT); 8.278 - } 8.279 - *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT); 8.280 - 8.281 - page = &frame_table[mfn]; 8.282 - if ( (page->u.inuse.type_info == 0) && 8.283 - !get_page_and_type(page, d, PGT_writable_page) ) 8.284 - BUG(); 8.285 - 8.286 - mfn++; 8.287 - } 8.288 - 8.289 - /* Pages that are part of page tables must be read only. */ 8.290 - l4tab = l4start + l4_table_offset(vpt_start); 8.291 - l3start = l3tab = l4_pgentry_to_l3(*l4tab); 8.292 - l3tab += l3_table_offset(vpt_start); 8.293 - l2start = l2tab = l3_pgentry_to_l2(*l3tab); 8.294 - l2tab += l2_table_offset(vpt_start); 8.295 - l1start = l1tab = l2_pgentry_to_l1(*l2tab); 8.296 - l1tab += l1_table_offset(vpt_start); 8.297 - for ( count = 0; count < nr_pt_pages; count++ ) 8.298 - { 8.299 - *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW); 8.300 - page = &frame_table[l1_pgentry_to_pfn(*l1tab)]; 8.301 - 8.302 - /* Read-only mapping + PGC_allocated + page-table page. */ 8.303 - page->count_info = PGC_allocated | 3; 8.304 - page->u.inuse.type_info |= PGT_validated | 1; 8.305 - 8.306 - /* Top-level p.t. is pinned. */ 8.307 - if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table ) 8.308 - { 8.309 - page->count_info += 1; 8.310 - page->u.inuse.type_info += 1 | PGT_pinned; 8.311 - } 8.312 - 8.313 - /* Iterate. */ 8.314 - if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) ) 8.315 - { 8.316 - if ( !((unsigned long)++l2tab & (PAGE_SIZE - 1)) ) 8.317 - { 8.318 - if ( !((unsigned long)++l3tab & (PAGE_SIZE - 1)) ) 8.319 - l3start = l3tab = l4_pgentry_to_l3(*++l4tab); 8.320 - l2start = l2tab = l3_pgentry_to_l2(*l3tab); 8.321 - } 8.322 - l1start = l1tab = l2_pgentry_to_l1(*l2tab); 8.323 - } 8.324 - } 8.325 - 8.326 - /* Set up shared-info area. */ 8.327 - update_dom_time(d); 8.328 - d->shared_info->domain_time = 0; 8.329 - /* Mask all upcalls... */ 8.330 - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) 8.331 - d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; 8.332 - d->shared_info->n_vcpu = smp_num_cpus; 8.333 - 8.334 - /* Set up shadow and monitor tables. */ 8.335 - update_pagetables(ed); 8.336 - 8.337 - /* Install the new page tables. */ 8.338 - __cli(); 8.339 - write_ptbase(ed); 8.340 - 8.341 - /* Copy the OS image. */ 8.342 - (void)loadelfimage(image_start); 8.343 - 8.344 - /* Copy the initial ramdisk. */ 8.345 - if ( initrd_len != 0 ) 8.346 - memcpy((void *)vinitrd_start, initrd_start, initrd_len); 8.347 - 8.348 - /* Set up start info area. */ 8.349 - si = (start_info_t *)vstartinfo_start; 8.350 - memset(si, 0, PAGE_SIZE); 8.351 - si->nr_pages = d->tot_pages; 8.352 - si->shared_info = virt_to_phys(d->shared_info); 8.353 - si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; 8.354 - si->pt_base = vpt_start; 8.355 - si->nr_pt_frames = nr_pt_pages; 8.356 - si->mfn_list = vphysmap_start; 8.357 - 8.358 - /* Write the phys->machine and machine->phys table entries. */ 8.359 - for ( pfn = 0; pfn < d->tot_pages; pfn++ ) 8.360 - { 8.361 - mfn = pfn + (alloc_start>>PAGE_SHIFT); 8.362 -#ifndef NDEBUG 8.363 -#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT) 8.364 - if ( pfn > REVERSE_START ) 8.365 - mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START); 8.366 -#endif 8.367 - ((u32 *)vphysmap_start)[pfn] = mfn; 8.368 - machine_to_phys_mapping[mfn] = pfn; 8.369 - } 8.370 - 8.371 - if ( initrd_len != 0 ) 8.372 - { 8.373 - si->mod_start = vinitrd_start; 8.374 - si->mod_len = initrd_len; 8.375 - printk("Initrd len 0x%lx, start at 0x%p\n", 8.376 - si->mod_len, si->mod_start); 8.377 - } 8.378 - 8.379 - dst = si->cmd_line; 8.380 - if ( cmdline != NULL ) 8.381 - { 8.382 - for ( i = 0; i < 255; i++ ) 8.383 - { 8.384 - if ( cmdline[i] == '\0' ) 8.385 - break; 8.386 - *dst++ = cmdline[i]; 8.387 - } 8.388 - } 8.389 - *dst = '\0'; 8.390 - 8.391 - /* Reinstate the caller's page tables. */ 8.392 - write_ptbase(current); 8.393 - __sti(); 8.394 - 8.395 - /* DOM0 gets access to everything. */ 8.396 - physdev_init_dom0(d); 8.397 - 8.398 - set_bit(DF_CONSTRUCTED, &d->d_flags); 8.399 - 8.400 - new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start); 8.401 - 8.402 - return 0; 8.403 -} 8.404 - 8.405 -int elf_sanity_check(Elf_Ehdr *ehdr) 8.406 -{ 8.407 - if ( !IS_ELF(*ehdr) || 8.408 - (ehdr->e_ident[EI_CLASS] != ELFCLASS64) || 8.409 - (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) || 8.410 - (ehdr->e_type != ET_EXEC) || 8.411 - (ehdr->e_machine != EM_X86_64) ) 8.412 - { 8.413 - printk("DOM0 image is not x86/64-compatible executable Elf image.\n"); 8.414 - return 0; 8.415 - } 8.416 - 8.417 - return 1; 8.418 -} 8.419 - 8.420 -/* 8.421 - * Local variables: 8.422 - * mode: C 8.423 - * c-set-style: "BSD" 8.424 - * c-basic-offset: 4 8.425 - * tab-width: 4 8.426 - * indent-tabs-mode: nil 8.427 - */
9.1 --- a/xen/common/page_alloc.c Fri Feb 25 17:06:27 2005 +0000 9.2 +++ b/xen/common/page_alloc.c Fri Feb 25 18:37:31 2005 +0000 9.3 @@ -203,8 +203,8 @@ unsigned long alloc_boot_pages(unsigned 9.4 #define MEMZONE_DOM 1 9.5 #define NR_ZONES 2 9.6 9.7 -/* Up to 2^10 pages can be allocated at once. */ 9.8 -#define MAX_ORDER 10 9.9 +/* Up to 2^20 pages can be allocated at once. */ 9.10 +#define MAX_ORDER 20 9.11 static struct list_head heap[NR_ZONES][MAX_ORDER+1]; 9.12 9.13 static unsigned long avail[NR_ZONES];
10.1 --- a/xen/drivers/char/console.c Fri Feb 25 17:06:27 2005 +0000 10.2 +++ b/xen/drivers/char/console.c Fri Feb 25 18:37:31 2005 +0000 10.3 @@ -577,6 +577,8 @@ static int __init debugtrace_init(void) 10.4 debugtrace_buf = (unsigned char *)alloc_xenheap_pages(order); 10.5 ASSERT(debugtrace_buf != NULL); 10.6 10.7 + memset(debugtrace_buf, '\0', debugtrace_bytes); 10.8 + 10.9 return 0; 10.10 } 10.11 __initcall(debugtrace_init);
11.1 --- a/xen/include/asm-x86/shadow.h Fri Feb 25 17:06:27 2005 +0000 11.2 +++ b/xen/include/asm-x86/shadow.h Fri Feb 25 18:37:31 2005 +0000 11.3 @@ -13,17 +13,20 @@ 11.4 #define PSH_hl2 (1<<30) /* page is an hl2 */ 11.5 #define PSH_pfn_mask ((1<<21)-1) 11.6 11.7 -/* Shadow PT operation mode : shadow-mode variable in arch_domain. */ 11.8 - 11.9 +/* Shadow PT operation mode: shadow-mode variable in arch_domain. */ 11.10 #define SHM_enable (1<<0) /* we're in one of the shadow modes */ 11.11 #define SHM_log_dirty (1<<1) /* enable log dirty mode */ 11.12 -#define SHM_translate (1<<2) /* do p2m tranaltion on guest tables */ 11.13 +#define SHM_translate (1<<2) /* do p2m translation on guest tables */ 11.14 #define SHM_external (1<<3) /* external page table, not used by Xen */ 11.15 11.16 #define shadow_mode_enabled(_d) ((_d)->arch.shadow_mode) 11.17 #define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty) 11.18 #define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate) 11.19 +#ifndef __x86_64__ /* XXX Currently breaks the 64-bit build. */ 11.20 #define shadow_mode_external(_d) ((_d)->arch.shadow_mode & SHM_external) 11.21 +#else 11.22 +#define shadow_mode_external(_d) (0) 11.23 +#endif 11.24 11.25 #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START) 11.26 #define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \ 11.27 @@ -804,6 +807,10 @@ static inline void update_pagetables(str 11.28 11.29 if ( !shadow_mode_external(d) ) 11.30 { 11.31 + /* 11.32 + * Internal page tables: 11.33 + * No need to allocate a separate page table for Xen. 11.34 + */ 11.35 #ifdef __x86_64__ 11.36 if ( !(ed->arch.flags & TF_kernel_mode) ) 11.37 ed->arch.monitor_table = ed->arch.guest_table_user; 11.38 @@ -816,9 +823,10 @@ static inline void update_pagetables(str 11.39 } 11.40 else 11.41 { 11.42 - // External page tables... 11.43 - // Allocate a monitor page table if we don't already have one. 11.44 - // 11.45 + /* 11.46 + * External page tables: 11.47 + * Allocate a monitor page table if we don't already have one. 11.48 + */ 11.49 if ( unlikely(!pagetable_val(ed->arch.monitor_table)) ) 11.50 ed->arch.monitor_table = 11.51 mk_pagetable(alloc_monitor_pagetable(ed) << PAGE_SHIFT);
12.1 --- a/xen/include/xen/sched.h Fri Feb 25 17:06:27 2005 +0000 12.2 +++ b/xen/include/xen/sched.h Fri Feb 25 18:37:31 2005 +0000 12.3 @@ -215,12 +215,11 @@ static inline void get_knownalive_domain 12.4 12.5 extern struct domain *do_createdomain( 12.6 domid_t dom_id, unsigned int cpu); 12.7 -extern int construct_dom0(struct domain *d, 12.8 - unsigned long alloc_start, 12.9 - unsigned long alloc_end, 12.10 - unsigned long image_start, unsigned long image_len, 12.11 - unsigned long initrd_start, unsigned long initrd_len, 12.12 - char *cmdline); 12.13 +extern int construct_dom0( 12.14 + struct domain *d, 12.15 + unsigned long image_start, unsigned long image_len, 12.16 + unsigned long initrd_start, unsigned long initrd_len, 12.17 + char *cmdline); 12.18 extern int final_setup_guest(struct domain *d, dom0_builddomain_t *); 12.19 12.20 struct domain *find_domain_by_id(domid_t dom);