ia64/xen-unstable

changeset 3955:a6914c2c15cf

bitkeeper revision 1.1241 (42247288lnXKH-KF7Ay_vzBHIGmGeg)

Merge wyvis.research.intel-research.net:/home/irchomes/rneugeba/src/xeno/xeno.bk
into wyvis.research.intel-research.net:/home/irchomes/rneugeba/src/xeno/xen.bench

Signed-off-by: michael.fetterman@cl.cam.ac.uk
author rneugeba@wyvis.research.intel-research.net
date Tue Mar 01 13:47:52 2005 +0000 (2005-03-01)
parents 3a2834dc4f1b 4202d86eff9f
children cfee4c4a8ed6
files .rootkeys BitKeeper/etc/logging_ok Makefile extras/mini-os/x86_32.S extras/mini-os/x86_64.S linux-2.4.29-xen-sparse/arch/xen/kernel/head.S linux-2.4.29-xen-sparse/arch/xen/mm/init.c linux-2.4.29-xen-sparse/include/asm-xen/synch_bitops.h linux-2.4.29-xen-sparse/include/asm-xen/system.h linux-2.4.29-xen-sparse/mkbuildtree linux-2.6.10-xen-sparse/arch/xen/i386/kernel/head.S linux-2.6.10-xen-sparse/arch/xen/i386/mm/init.c netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S tools/Rules.mk tools/libxc/xc_linux_build.c tools/libxc/xc_vmx_build.c tools/misc/xend tools/python/xen/lowlevel/xu/xu.c tools/xcs/xcs.c tools/xcs/xcs_proto.h tools/xcs/xcsdump.c xen/Rules.mk xen/arch/ia64/domain.c xen/arch/ia64/vcpu.c xen/arch/ia64/xensetup.c xen/arch/x86/boot/x86_32.S xen/arch/x86/boot/x86_64.S xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/setup.c xen/arch/x86/x86_32/domain_build.c xen/arch/x86/x86_32/domain_page.c xen/arch/x86/x86_64/domain_build.c xen/common/elf.c xen/common/page_alloc.c xen/drivers/char/console.c xen/include/asm-x86/shadow.h xen/include/xen/sched.h
line diff
     1.1 --- a/.rootkeys	Tue Mar 01 13:47:27 2005 +0000
     1.2 +++ b/.rootkeys	Tue Mar 01 13:47:52 2005 +0000
     1.3 @@ -113,7 +113,6 @@ 3e5a4e676uK4xErTBDH6XJREn9LSyg linux-2.4
     1.4  41224663YBCUMX1kVo_HRUtgaHTi7w linux-2.4.29-xen-sparse/include/asm-xen/queues.h
     1.5  3e5a4e68uJz-xI0IBVMD7xRLQKJDFg linux-2.4.29-xen-sparse/include/asm-xen/segment.h
     1.6  3e5a4e68Nfdh6QcOKUTGCaYkf2LmYA linux-2.4.29-xen-sparse/include/asm-xen/smp.h
     1.7 -4062f7e2PzFOUGT0PaE7A0VprTU3JQ linux-2.4.29-xen-sparse/include/asm-xen/synch_bitops.h
     1.8  3e5a4e68mTr0zcp9SXDbnd-XLrrfxw linux-2.4.29-xen-sparse/include/asm-xen/system.h
     1.9  3f1056a9L_kqHcFheV00KbKBzv9j5w linux-2.4.29-xen-sparse/include/asm-xen/vga.h
    1.10  40659defgWA92arexpMGn8X3QMDj3w linux-2.4.29-xen-sparse/include/asm-xen/xor.h
    1.11 @@ -946,6 +945,7 @@ 4107c15e-VmEcLsE-7JCXZaabI8C7A xen/arch/
    1.12  3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen/arch/x86/delay.c
    1.13  40e34414WiQO4h2m3tcpaCPn7SyYyg xen/arch/x86/dom0_ops.c
    1.14  3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen/arch/x86/domain.c
    1.15 +4202391dkvdTZ8GhWXe3Gqf9EOgWXg xen/arch/x86/domain_build.c
    1.16  41d3eaae6GSDo3ZJDfK3nvQsJux-PQ xen/arch/x86/e820.c
    1.17  3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen/arch/x86/extable.c
    1.18  3fe443fdDDb0Sw6NQBCk4GQapayfTA xen/arch/x86/flushtlb.c
    1.19 @@ -984,7 +984,6 @@ 41f97ef5139vN42cOYHfX_Ac8WOOjA xen/arch/
    1.20  41c0c4128URE0dxcO15JME_MuKBPfg xen/arch/x86/vmx_vmcs.c
    1.21  419cbedeQDg8IrO3izo3o5rQNlo0kQ xen/arch/x86/x86_32/asm-offsets.c
    1.22  4107c15e_NqNYew2EXroXz2mgTAMWQ xen/arch/x86/x86_32/call_with_regs.S
    1.23 -4202391dkvdTZ8GhWXe3Gqf9EOgWXg xen/arch/x86/x86_32/domain_build.c
    1.24  3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/x86_32/domain_page.c
    1.25  3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/x86_32/entry.S
    1.26  3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/x86_32/mm.c
    1.27 @@ -993,7 +992,6 @@ 42000d3ckiFc1qxa4AWqsd0t3lxuyw xen/arch/
    1.28  3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/x86_32/usercopy.c
    1.29  3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/x86_32/xen.lds
    1.30  41bf1717Ty3hwN3E9swdu8QfnvGqww xen/arch/x86/x86_64/asm-offsets.c
    1.31 -4202391dA91ZovYX9d_5zJi9yGvLoQ xen/arch/x86/x86_64/domain_build.c
    1.32  40e96d3aLDI-nViMuYneD7VKYlZrVg xen/arch/x86/x86_64/entry.S
    1.33  41bf1717XhPz_dNT5OKSjgmbFuWBuA xen/arch/x86/x86_64/mm.c
    1.34  42000d3cMb8o1WuFBXC07c8i3lPZBw xen/arch/x86/x86_64/traps.c
     2.1 --- a/BitKeeper/etc/logging_ok	Tue Mar 01 13:47:27 2005 +0000
     2.2 +++ b/BitKeeper/etc/logging_ok	Tue Mar 01 13:47:52 2005 +0000
     2.3 @@ -15,6 +15,7 @@ br260@laudney.cl.cam.ac.uk
     2.4  bren@anvil.research
     2.5  bren@br260.wolfson.cam.ac.uk
     2.6  cl349@arcadians.cl.cam.ac.uk
     2.7 +cl349@firebug.cl.cam.ac.uk
     2.8  cl349@freefall.cl.cam.ac.uk
     2.9  cl349@labyrinth.cl.cam.ac.uk
    2.10  cwc22@centipede.cl.cam.ac.uk
     3.1 --- a/Makefile	Tue Mar 01 13:47:27 2005 +0000
     3.2 +++ b/Makefile	Tue Mar 01 13:47:52 2005 +0000
     3.3 @@ -22,9 +22,9 @@ XKERNELS := $(foreach kernel, $(KERNELS)
     3.4  export DESTDIR
     3.5  
     3.6  # Export target architecture overrides to Xen and Linux sub-trees.
     3.7 -ifneq ($(TARGET_ARCH),)
     3.8 -SUBARCH := $(subst x86_32,i386,$(TARGET_ARCH))
     3.9 -export TARGET_ARCH SUBARCH
    3.10 +ifneq ($(XEN_TARGET_ARCH),)
    3.11 +SUBARCH := $(subst x86_32,i386,$(XEN_TARGET_ARCH))
    3.12 +export XEN_TARGET_ARCH SUBARCH
    3.13  endif
    3.14  
    3.15  include buildconfigs/Rules.mk
     4.1 --- a/extras/mini-os/x86_32.S	Tue Mar 01 13:47:27 2005 +0000
     4.2 +++ b/extras/mini-os/x86_32.S	Tue Mar 01 13:47:52 2005 +0000
     4.3 @@ -1,7 +1,7 @@
     4.4  #include <os.h>
     4.5  
     4.6  .section __xen_guest
     4.7 -        .asciz  "XEN_VER=2.0,LOADER=generic,PT_MODE_WRITABLE"
     4.8 +        .asciz  "XEN_VER=3.0,LOADER=generic,PT_MODE_WRITABLE"
     4.9  .text
    4.10  
    4.11  .globl _start, shared_info
     5.1 --- a/extras/mini-os/x86_64.S	Tue Mar 01 13:47:27 2005 +0000
     5.2 +++ b/extras/mini-os/x86_64.S	Tue Mar 01 13:47:52 2005 +0000
     5.3 @@ -1,7 +1,7 @@
     5.4  #include <os.h>
     5.5  
     5.6  .section __xen_guest
     5.7 -        .asciz  "XEN_VER=2.0,LOADER=generic,PT_MODE_WRITABLE"
     5.8 +        .asciz  "XEN_VER=3.0,LOADER=generic,PT_MODE_WRITABLE"
     5.9  .text
    5.10  
    5.11  #define ENTRY(X) .globl X ; X :
     6.1 --- a/linux-2.4.29-xen-sparse/arch/xen/kernel/head.S	Tue Mar 01 13:47:27 2005 +0000
     6.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/kernel/head.S	Tue Mar 01 13:47:52 2005 +0000
     6.3 @@ -1,6 +1,6 @@
     6.4  
     6.5  .section __xen_guest
     6.6 -    .asciz "GUEST_OS=linux,GUEST_VER=2.4,XEN_VER=2.0,VIRT_BASE=0xC0000000"
     6.7 +    .asciz "GUEST_OS=linux,GUEST_VER=2.4,XEN_VER=3.0,VIRT_BASE=0xC0000000"
     6.8  
     6.9  .text
    6.10  #include <linux/config.h>
     7.1 --- a/linux-2.4.29-xen-sparse/arch/xen/mm/init.c	Tue Mar 01 13:47:27 2005 +0000
     7.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/mm/init.c	Tue Mar 01 13:47:52 2005 +0000
     7.3 @@ -366,6 +366,12 @@ static int __init free_pages_init(void)
     7.4  
     7.5      /* this will put all low memory onto the freelists */
     7.6      totalram_pages += free_all_bootmem();
     7.7 +    /* XEN: init and count low-mem pages outside initial allocation. */
     7.8 +    for (pfn = boot_pfn; pfn < max_low_pfn; pfn++) {
     7.9 +        ClearPageReserved(&mem_map[pfn]);
    7.10 +        atomic_set(&mem_map[pfn].count, 1);
    7.11 +        totalram_pages++;
    7.12 +    }
    7.13  
    7.14      reservedpages = 0;
    7.15      for (pfn = 0; pfn < boot_pfn ; pfn++) {
     8.1 --- a/linux-2.4.29-xen-sparse/include/asm-xen/synch_bitops.h	Tue Mar 01 13:47:27 2005 +0000
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,83 +0,0 @@
     8.4 -#ifndef __XEN_SYNCH_BITOPS_H__
     8.5 -#define __XEN_SYNCH_BITOPS_H__
     8.6 -
     8.7 -/*
     8.8 - * Copyright 1992, Linus Torvalds.
     8.9 - * Heavily modified to provide guaranteed strong synchronisation
    8.10 - * when communicating with Xen or other guest OSes running on other CPUs.
    8.11 - */
    8.12 -
    8.13 -#include <linux/config.h>
    8.14 -
    8.15 -#define ADDR (*(volatile long *) addr)
    8.16 -
    8.17 -static __inline__ void synch_set_bit(int nr, volatile void * addr)
    8.18 -{
    8.19 -    __asm__ __volatile__ ( 
    8.20 -        "lock btsl %1,%0"
    8.21 -        : "=m" (ADDR) : "Ir" (nr) : "memory" );
    8.22 -}
    8.23 -
    8.24 -static __inline__ void synch_clear_bit(int nr, volatile void * addr)
    8.25 -{
    8.26 -    __asm__ __volatile__ (
    8.27 -        "lock btrl %1,%0"
    8.28 -        : "=m" (ADDR) : "Ir" (nr) : "memory" );
    8.29 -}
    8.30 -
    8.31 -static __inline__ void synch_change_bit(int nr, volatile void * addr)
    8.32 -{
    8.33 -    __asm__ __volatile__ (
    8.34 -        "lock btcl %1,%0"
    8.35 -        : "=m" (ADDR) : "Ir" (nr) : "memory" );
    8.36 -}
    8.37 -
    8.38 -static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr)
    8.39 -{
    8.40 -    int oldbit;
    8.41 -    __asm__ __volatile__ (
    8.42 -        "lock btsl %2,%1\n\tsbbl %0,%0"
    8.43 -        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
    8.44 -    return oldbit;
    8.45 -}
    8.46 -
    8.47 -static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr)
    8.48 -{
    8.49 -    int oldbit;
    8.50 -    __asm__ __volatile__ (
    8.51 -        "lock btrl %2,%1\n\tsbbl %0,%0"
    8.52 -        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
    8.53 -    return oldbit;
    8.54 -}
    8.55 -
    8.56 -static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr)
    8.57 -{
    8.58 -    int oldbit;
    8.59 -
    8.60 -    __asm__ __volatile__ (
    8.61 -        "lock btcl %2,%1\n\tsbbl %0,%0"
    8.62 -        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
    8.63 -    return oldbit;
    8.64 -}
    8.65 -
    8.66 -static __inline__ int synch_const_test_bit(int nr, const volatile void * addr)
    8.67 -{
    8.68 -    return ((1UL << (nr & 31)) & 
    8.69 -            (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
    8.70 -}
    8.71 -
    8.72 -static __inline__ int synch_var_test_bit(int nr, volatile void * addr)
    8.73 -{
    8.74 -    int oldbit;
    8.75 -    __asm__ __volatile__ (
    8.76 -        "btl %2,%1\n\tsbbl %0,%0"
    8.77 -        : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) );
    8.78 -    return oldbit;
    8.79 -}
    8.80 -
    8.81 -#define synch_test_bit(nr,addr) \
    8.82 -(__builtin_constant_p(nr) ? \
    8.83 - synch_const_test_bit((nr),(addr)) : \
    8.84 - synch_var_test_bit((nr),(addr)))
    8.85 -
    8.86 -#endif /* __XEN_SYNCH_BITOPS_H__ */
     9.1 --- a/linux-2.4.29-xen-sparse/include/asm-xen/system.h	Tue Mar 01 13:47:27 2005 +0000
     9.2 +++ b/linux-2.4.29-xen-sparse/include/asm-xen/system.h	Tue Mar 01 13:47:52 2005 +0000
     9.3 @@ -113,6 +113,22 @@ static inline unsigned long _get_base(ch
     9.4  
     9.5  #endif	/* __KERNEL__ */
     9.6  
     9.7 +/**
     9.8 + * __ffs - find first bit in word.
     9.9 + * @word: The word to search
    9.10 + *
    9.11 + * Undefined if no bit exists, so code should check against 0 first.
    9.12 + *
    9.13 + * Taken from 2.6 for Xen.
    9.14 + */
    9.15 +static inline unsigned long __ffs(unsigned long word)
    9.16 +{
    9.17 +	__asm__("bsfl %1,%0"
    9.18 +		:"=r" (word)
    9.19 +		:"rm" (word));
    9.20 +	return word;
    9.21 +}
    9.22 +
    9.23  static inline unsigned long get_limit(unsigned long segment)
    9.24  {
    9.25  	unsigned long __limit;
    10.1 --- a/linux-2.4.29-xen-sparse/mkbuildtree	Tue Mar 01 13:47:27 2005 +0000
    10.2 +++ b/linux-2.4.29-xen-sparse/mkbuildtree	Tue Mar 01 13:47:52 2005 +0000
    10.3 @@ -212,6 +212,7 @@ ln -sf ../../${LINUX_26}/include/asm-xen
    10.4  ln -sf ../../${LINUX_26}/include/asm-xen/hypervisor.h
    10.5  ln -sf ../../${LINUX_26}/include/asm-xen/multicall.h
    10.6  ln -sf ../../${LINUX_26}/include/asm-xen/xen_proc.h
    10.7 +ln -sf ../../${LINUX_26}/include/asm-xen/asm-i386/synch_bitops.h
    10.8  
    10.9  mkdir -p linux-public && cd linux-public
   10.10  ln -sf ../../../${LINUX_26}/include/asm-xen/linux-public/privcmd.h
    11.1 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/head.S	Tue Mar 01 13:47:27 2005 +0000
    11.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/head.S	Tue Mar 01 13:47:52 2005 +0000
    11.3 @@ -2,7 +2,7 @@
    11.4  #include <linux/config.h>
    11.5  
    11.6  .section __xen_guest
    11.7 -	.ascii	"GUEST_OS=linux,GUEST_VER=2.6,XEN_VER=2.0,VIRT_BASE=0xC0000000"
    11.8 +	.ascii	"GUEST_OS=linux,GUEST_VER=2.6,XEN_VER=3.0,VIRT_BASE=0xC0000000"
    11.9  	.ascii	",LOADER=generic"
   11.10  	.ascii	",PT_MODE_WRITABLE"
   11.11  	.byte	0
    12.1 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/init.c	Tue Mar 01 13:47:27 2005 +0000
    12.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/i386/mm/init.c	Tue Mar 01 13:47:52 2005 +0000
    12.3 @@ -177,9 +177,9 @@ static void __init kernel_physical_mappi
    12.4  				pte = one_page_table_init(pmd);
    12.5  
    12.6  				pte += pte_ofs;
    12.7 -				/* XEN: Only map initial RAM allocation. */
    12.8 -				for (; pte_ofs < PTRS_PER_PTE && pfn < max_ram_pfn; pte++, pfn++, pte_ofs++) {
    12.9 -						if (pte_present(*pte))
   12.10 +				for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
   12.11 +						/* XEN: Only map initial RAM allocation. */
   12.12 +						if ((pfn >= max_ram_pfn) || pte_present(*pte))
   12.13  							continue;
   12.14  						if (is_kernel_text(address))
   12.15  							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
   12.16 @@ -627,6 +627,7 @@ void __init mem_init(void)
   12.17  	int codesize, reservedpages, datasize, initsize;
   12.18  	int tmp;
   12.19  	int bad_ppro;
   12.20 +	unsigned long pfn;
   12.21  
   12.22  #ifndef CONFIG_DISCONTIGMEM
   12.23  	if (!mem_map)
   12.24 @@ -655,6 +656,12 @@ void __init mem_init(void)
   12.25  
   12.26  	/* this will put all low memory onto the freelists */
   12.27  	totalram_pages += __free_all_bootmem();
   12.28 +	/* XEN: init and count low-mem pages outside initial allocation. */
   12.29 +	for (pfn = xen_start_info.nr_pages; pfn < max_low_pfn; pfn++) {
   12.30 +		ClearPageReserved(&mem_map[pfn]);
   12.31 +		set_page_count(&mem_map[pfn], 1);
   12.32 +		totalram_pages++;
   12.33 +	}
   12.34  
   12.35  	reservedpages = 0;
   12.36  	for (tmp = 0; tmp < max_low_pfn; tmp++)
    13.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S	Tue Mar 01 13:47:27 2005 +0000
    13.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S	Tue Mar 01 13:47:52 2005 +0000
    13.3 @@ -180,7 +180,7 @@
    13.4   * Xen guest identifier and loader selection
    13.5   */
    13.6  .section __xen_guest
    13.7 -	.ascii	"GUEST_OS=netbsd,GUEST_VER=2.0,XEN_VER=2.0"
    13.8 +	.ascii	"GUEST_OS=netbsd,GUEST_VER=2.0,XEN_VER=3.0"
    13.9  	.ascii	",LOADER=generic"
   13.10  #if (NKSYMS || defined(DDB) || defined(LKM)) && !defined(SYMTAB_SPACE)
   13.11  	.ascii	",BSD_SYMTAB"
    14.1 --- a/tools/Rules.mk	Tue Mar 01 13:47:27 2005 +0000
    14.2 +++ b/tools/Rules.mk	Tue Mar 01 13:47:52 2005 +0000
    14.3 @@ -4,15 +4,15 @@ XEN_XC             = $(XEN_ROOT)/tools/p
    14.4  XEN_LIBXC          = $(XEN_ROOT)/tools/libxc
    14.5  XEN_LIBXUTIL       = $(XEN_ROOT)/tools/libxutil
    14.6  
    14.7 -COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/)
    14.8 -TARGET_ARCH     ?= $(COMPILE_ARCH)
    14.9 +XEN_COMPILE_ARCH  ?= $(shell uname -m | sed -e s/i.86/x86_32/)
   14.10 +XEN_TARGET_ARCH   ?= $(XEN_COMPILE_ARCH)
   14.11  
   14.12 -ifeq ($(TARGET_ARCH),x86_32)
   14.13 +ifeq ($(XEN_TARGET_ARCH),x86_32)
   14.14  CFLAGS  += -m32 -march=i686
   14.15  LDFLAGS += -m elf_i386
   14.16  endif
   14.17  
   14.18 -ifeq ($(TARGET_ARCH),x86_64)
   14.19 +ifeq ($(XEN_TARGET_ARCH),x86_64)
   14.20  CFLAGS  += -m64
   14.21  LDFLAGS += -m elf_x86_64
   14.22  endif
    15.1 --- a/tools/libxc/xc_linux_build.c	Tue Mar 01 13:47:27 2005 +0000
    15.2 +++ b/tools/libxc/xc_linux_build.c	Tue Mar 01 13:47:52 2005 +0000
    15.3 @@ -538,9 +538,9 @@ static int parseelfimage(char *elfbase,
    15.4              return -EINVAL;
    15.5          }
    15.6  
    15.7 -        if ( (strstr(guestinfo, "XEN_VER=2.0") == NULL) )
    15.8 +        if ( (strstr(guestinfo, "XEN_VER=3.0") == NULL) )
    15.9          {
   15.10 -            ERROR("Will only load images built for Xen v2.0");
   15.11 +            ERROR("Will only load images built for Xen v3.0");
   15.12              ERROR("Actually saw: '%s'", guestinfo);
   15.13              return -EINVAL;
   15.14          }
    16.1 --- a/tools/libxc/xc_vmx_build.c	Tue Mar 01 13:47:27 2005 +0000
    16.2 +++ b/tools/libxc/xc_vmx_build.c	Tue Mar 01 13:47:52 2005 +0000
    16.3 @@ -465,10 +465,10 @@ int vmx_identify(void)
    16.4  {
    16.5      int eax, ecx;
    16.6  
    16.7 -    __asm__ __volatile__ ("cpuid" 
    16.8 +    __asm__ __volatile__ ("pushl %%ebx; cpuid; popl %%ebx" 
    16.9  			  : "=a" (eax), "=c" (ecx) 
   16.10  			  : "0" (1) 
   16.11 -			  : "bx", "dx");
   16.12 +			  : "dx");
   16.13      if (!(ecx & VMX_FEATURE_FLAG)) {
   16.14          return -1;
   16.15      }
    17.1 --- a/tools/misc/xend	Tue Mar 01 13:47:27 2005 +0000
    17.2 +++ b/tools/misc/xend	Tue Mar 01 13:47:52 2005 +0000
    17.3 @@ -24,7 +24,7 @@ import sys
    17.4  import socket
    17.5  import time
    17.6  
    17.7 -XCS_PORT = 1633
    17.8 +XCS_PATH = "/var/xen/xcs_socket"
    17.9  XCS_EXEC = "/usr/sbin/xcs"
   17.10  XCS_LOGFILE = "/var/log/xcs.log"
   17.11  
   17.12 @@ -100,9 +100,9 @@ def xcs_running():
   17.13      """ See if the control switch is running.
   17.14      """
   17.15      ret = 1
   17.16 -    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
   17.17 +    s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
   17.18      try:
   17.19 -        s.connect( ("127.0.0.1", XCS_PORT) )
   17.20 +        s.connect( (XCS_PATH) )
   17.21      except:
   17.22          ret = 0
   17.23      s.close()
   17.24 @@ -118,7 +118,7 @@ def main():
   17.25      
   17.26      if (not xcs_running()):
   17.27          if os.fork():
   17.28 -            time.sleep(1) # let xcs start
   17.29 +            time.sleep(0.5) # let xcs start
   17.30          else:
   17.31              try:
   17.32                  logfile = os.open(XCS_LOGFILE, 
    18.1 --- a/tools/python/xen/lowlevel/xu/xu.c	Tue Mar 01 13:47:27 2005 +0000
    18.2 +++ b/tools/python/xen/lowlevel/xu/xu.c	Tue Mar 01 13:47:52 2005 +0000
    18.3 @@ -13,10 +13,10 @@
    18.4  #include <sys/wait.h>
    18.5  #include <sys/stat.h>
    18.6  #include <sys/socket.h>
    18.7 +#include <sys/un.h>
    18.8  #include <sys/mman.h>
    18.9  #include <sys/poll.h>
   18.10  #include <sys/sysmacros.h>
   18.11 -#include <netinet/in.h>
   18.12  #include <fcntl.h>
   18.13  #include <unistd.h>
   18.14  #include <errno.h>
   18.15 @@ -87,36 +87,34 @@ static int xcs_ctrl_read(xcs_msg_t *msg)
   18.16  static int xcs_data_send(xcs_msg_t *msg);
   18.17  static int xcs_data_read(xcs_msg_t *msg);
   18.18  
   18.19 -static int xcs_connect(char *ip, short port)
   18.20 +static int xcs_connect(char *path)
   18.21  {
   18.22 -    struct sockaddr_in addr;
   18.23 -    int ret, flags;
   18.24 +    struct sockaddr_un addr;
   18.25 +    int ret, len, flags;
   18.26      xcs_msg_t msg;
   18.27  
   18.28      if (xcs_data_fd != -1) /* already connected */
   18.29          return 0;
   18.30      
   18.31 -    xcs_ctrl_fd = socket(AF_INET, SOCK_STREAM, 0);
   18.32 +    xcs_ctrl_fd = socket(AF_UNIX, SOCK_STREAM, 0);
   18.33      if (xcs_ctrl_fd < 0)
   18.34      {
   18.35          printf("error creating xcs socket!\n");
   18.36          goto fail;
   18.37      }
   18.38      
   18.39 -    addr.sin_family = AF_INET;
   18.40 -    addr.sin_port = htons(port);
   18.41 -    addr.sin_addr.s_addr = inet_addr(ip);
   18.42 -    memset(&(addr.sin_zero), '\0', 8);
   18.43 +    addr.sun_family = AF_UNIX;
   18.44 +    strcpy(addr.sun_path, path);
   18.45 +    len = sizeof(addr.sun_family) + strlen(addr.sun_path) + 1;
   18.46  
   18.47 -    ret = connect(xcs_ctrl_fd, (struct sockaddr *)&addr, 
   18.48 -            sizeof(struct sockaddr));
   18.49 +    ret = connect(xcs_ctrl_fd, (struct sockaddr *)&addr, len);
   18.50      if (ret < 0) 
   18.51      {
   18.52          printf("error connecting to xcs(ctrl)! (%d)\n", errno);
   18.53          goto ctrl_fd_fail;
   18.54      }
   18.55  
   18.56 -    //set_cloexec(xcs_ctrl_fd);
   18.57 +    /*set_cloexec(xcs_ctrl_fd);*/
   18.58              
   18.59      msg.type = XCS_CONNECT_CTRL;
   18.60      msg.u.connect.session_id = xcs_session_id;
   18.61 @@ -131,20 +129,18 @@ static int xcs_connect(char *ip, short p
   18.62      xcs_session_id = msg.u.connect.session_id;
   18.63      
   18.64      /* now the data connection. */
   18.65 -    xcs_data_fd = socket(AF_INET, SOCK_STREAM, 0);
   18.66 +    xcs_data_fd = socket(AF_UNIX, SOCK_STREAM, 0);
   18.67      if (xcs_data_fd < 0)
   18.68      {
   18.69          printf("error creating xcs data socket!\n");
   18.70          goto ctrl_fd_fail;
   18.71      }
   18.72      
   18.73 -    addr.sin_family = AF_INET;
   18.74 -    addr.sin_port = htons(port);
   18.75 -    addr.sin_addr.s_addr = inet_addr(ip);
   18.76 -    memset(&(addr.sin_zero), '\0', 8);
   18.77 +    addr.sun_family = AF_UNIX;
   18.78 +    strcpy(addr.sun_path, path);
   18.79 +    len = sizeof(addr.sun_family) + strlen(addr.sun_path) + 1;
   18.80      
   18.81 -    ret = connect(xcs_data_fd, (struct sockaddr *)&addr, 
   18.82 -            sizeof(struct sockaddr));
   18.83 +    ret = connect(xcs_data_fd, (struct sockaddr *)&addr, len);
   18.84      if (ret < 0) 
   18.85      {
   18.86          printf("error connecting to xcs(data)! (%d)\n", errno);
   18.87 @@ -447,7 +443,7 @@ static PyObject *xu_notifier_new(PyObjec
   18.88      for (i = 0; i < XCS_RING_SIZE; i++) 
   18.89          REQ_RING_ENT(i) = RSP_RING_ENT(i) = NULL;
   18.90      
   18.91 -    (void)xcs_connect("127.0.0.1", XCS_TCP_PORT);
   18.92 +    (void)xcs_connect(XCS_SUN_PATH);
   18.93      
   18.94  
   18.95      return (PyObject *)xun;
    19.1 --- a/tools/xcs/xcs.c	Tue Mar 01 13:47:27 2005 +0000
    19.2 +++ b/tools/xcs/xcs.c	Tue Mar 01 13:47:52 2005 +0000
    19.3 @@ -71,8 +71,7 @@
    19.4  #include <string.h>
    19.5  #include <signal.h>
    19.6  #include <sys/socket.h>
    19.7 -#include <netinet/in.h>
    19.8 -#include <arpa/inet.h>
    19.9 +#include <sys/un.h>
   19.10  #include <errno.h>
   19.11  #include <malloc.h>
   19.12  #include <fcntl.h>
   19.13 @@ -89,27 +88,28 @@ static int dom_port_map_size = 0;
   19.14  
   19.15  static void map_dom_to_port(u32 dom, int port)
   19.16  {
   19.17 -	if (dom >= dom_port_map_size) {
   19.18 -		dom_port_map = (int *)realloc(dom_port_map,
   19.19 -					      (dom + 10) * sizeof(dom_port_map[0]));
   19.20 +    if (dom >= dom_port_map_size) {
   19.21 +        dom_port_map = (int *)realloc(dom_port_map,
   19.22 +                                      (dom + 256) * sizeof(dom_port_map[0]));
   19.23  
   19.24 -		if (dom_port_map == NULL) {
   19.25 -			perror("realloc(dom_port_map)");
   19.26 -			exit(1);
   19.27 -		}
   19.28 +        if (dom_port_map == NULL) {
   19.29 +            perror("realloc(dom_port_map)");
   19.30 +            exit(1);
   19.31 +        }
   19.32  
   19.33 -		for (; dom_port_map_size < dom + 10; dom_port_map_size++) {
   19.34 -			dom_port_map[dom_port_map_size] = -1;
   19.35 -		}
   19.36 -	}
   19.37 +        for (; dom_port_map_size < dom + 10; dom_port_map_size++) {
   19.38 +            dom_port_map[dom_port_map_size] = -1;
   19.39 +        }
   19.40 +    }
   19.41  
   19.42 -	dom_port_map[dom] = port;
   19.43 +    dom_port_map[dom] = port;
   19.44  }
   19.45  
   19.46 -static int dom_to_port(u32 dom) {
   19.47 -	if (dom >= dom_port_map_size) return -1;
   19.48 +static int dom_to_port(u32 dom) 
   19.49 +{
   19.50 +    if (dom >= dom_port_map_size) return -1;
   19.51  
   19.52 -	return dom_port_map[dom];
   19.53 +    return dom_port_map[dom];
   19.54  }
   19.55  
   19.56  static void init_interfaces(void)
   19.57 @@ -218,37 +218,34 @@ void put_interface(control_channel_t *cc
   19.58  /* ------[ Simple helpers ]------------------------------------------------*/
   19.59  
   19.60  /* listen_socket() is straight from paul sheer's useful select_tut manpage. */
   19.61 -static int listen_socket (int listen_port) 
   19.62 +static int listen_socket (char *listen_path) 
   19.63  {
   19.64 -    struct sockaddr_in a;
   19.65 +    struct sockaddr_un a;
   19.66      int s;
   19.67      int yes;
   19.68  
   19.69 -    if ((s = socket (AF_INET, SOCK_STREAM, 0)) < 0) 
   19.70 +    if ((s = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) 
   19.71      {
   19.72          perror ("socket");
   19.73          return -1;
   19.74      }
   19.75      
   19.76      yes = 1;
   19.77 -    if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,
   19.78 -        (char *) &yes, sizeof (yes)) < 0) 
   19.79 -    {
   19.80 -        perror ("setsockopt");
   19.81 -        close (s);
   19.82 -        return -1;
   19.83 -    }
   19.84  
   19.85      memset (&a, 0, sizeof (a));
   19.86 -    a.sin_port = htons (listen_port);
   19.87 -    a.sin_family = AF_INET;
   19.88 +    a.sun_family = AF_UNIX;
   19.89 +    strcpy(a.sun_path, listen_path);
   19.90 +
   19.91 +    /* remove an old socket if it exists. */
   19.92 +    unlink(listen_path);
   19.93 +
   19.94      if (bind(s, (struct sockaddr *) &a, sizeof (a)) < 0) 
   19.95      {
   19.96          perror ("bind");
   19.97          close (s);
   19.98          return -1;
   19.99      }
  19.100 -    printf ("accepting connections on port %d\n", (int) listen_port);
  19.101 +    printf ("accepting connections on path %s\n", listen_path);
  19.102      listen (s, 10);
  19.103      return s;
  19.104  }
  19.105 @@ -626,13 +623,13 @@ void gc_ufd_list( unbound_fd_t **ufd )
  19.106      }
  19.107  }
  19.108  
  19.109 -int main (int argc, char*argv[])
  19.110 +int main (int argc, char *argv[])
  19.111  {
  19.112      int listen_fd, evtchn_fd;
  19.113      unbound_fd_t *unbound_fd_list = NULL, **ufd;
  19.114      struct timeval timeout = { XCS_GC_INTERVAL, 0 };
  19.115      connection_t **con;
  19.116 -    
  19.117 +
  19.118      /* Initialize xc and event connections. */
  19.119      if (ctrl_chan_init() != 0)
  19.120      {
  19.121 @@ -650,7 +647,7 @@ int main (int argc, char*argv[])
  19.122      init_interfaces();
  19.123      init_bindings();
  19.124      
  19.125 -    listen_fd = listen_socket(XCS_TCP_PORT);
  19.126 +    listen_fd = listen_socket(XCS_SUN_PATH);
  19.127     
  19.128      /* detach from our controlling tty so that a shell does hang waiting for
  19.129         stopped jobs. */
  19.130 @@ -742,7 +739,7 @@ int main (int argc, char*argv[])
  19.131          /* CASE 2: New connection on the listen port. */
  19.132          if ( FD_ISSET ( listen_fd, &rd ))
  19.133          {
  19.134 -            struct sockaddr_in remote_addr;
  19.135 +            struct sockaddr_un remote_addr;
  19.136              int size;
  19.137              memset (&remote_addr, 0, sizeof (remote_addr));
  19.138              size = sizeof remote_addr;
    20.1 --- a/tools/xcs/xcs_proto.h	Tue Mar 01 13:47:27 2005 +0000
    20.2 +++ b/tools/xcs/xcs_proto.h	Tue Mar 01 13:47:52 2005 +0000
    20.3 @@ -9,7 +9,7 @@
    20.4  #ifndef  __XCS_PROTO_H__
    20.5  #define  __XCS_PROTO_H__
    20.6  
    20.7 -#define XCS_TCP_PORT     1633
    20.8 +#define XCS_SUN_PATH     "/var/xen/xcs_socket"
    20.9  
   20.10  /* xcs message types: */
   20.11  #define XCS_CONNECT_CTRL       0 /* This is a control connection.     */
    21.1 --- a/tools/xcs/xcsdump.c	Tue Mar 01 13:47:27 2005 +0000
    21.2 +++ b/tools/xcs/xcsdump.c	Tue Mar 01 13:47:52 2005 +0000
    21.3 @@ -11,8 +11,7 @@
    21.4  #include <unistd.h>
    21.5  #include <sys/types.h>
    21.6  #include <sys/socket.h>
    21.7 -#include <netinet/in.h>
    21.8 -#include <arpa/inet.h>
    21.9 +#include <sys/un.h>
   21.10  #include <ctype.h>
   21.11  #include <xc.h>
   21.12  #include <xen/xen.h>
   21.13 @@ -23,24 +22,23 @@
   21.14  static int xcs_ctrl_fd = -1; /* connection to the xcs server. */
   21.15  static int xcs_data_fd = -1; /* connection to the xcs server. */
   21.16  
   21.17 -int tcp_connect(char *ip, short port)
   21.18 +int sock_connect(char *path)
   21.19  {
   21.20 -    struct sockaddr_in addr;
   21.21 -    int ret, fd;
   21.22 +    struct sockaddr_un addr;
   21.23 +    int ret, len, fd;
   21.24  
   21.25 -    fd = socket(AF_INET, SOCK_STREAM, 0);
   21.26 +    fd = socket(AF_UNIX, SOCK_STREAM, 0);
   21.27      if (fd < 0)
   21.28      {
   21.29          printf("error creating xcs socket!\n");
   21.30          return -1;
   21.31      }
   21.32  
   21.33 -    addr.sin_family = AF_INET;
   21.34 -    addr.sin_port = htons(port);
   21.35 -    addr.sin_addr.s_addr = inet_addr(ip);
   21.36 -    memset(&(addr.sin_zero), '\0', 8);
   21.37 +    addr.sun_family = AF_UNIX;
   21.38 +    strcpy(addr.sun_path, path);
   21.39 +    len = sizeof(addr.sun_family) + strlen(addr.sun_path) + 1;
   21.40  
   21.41 -    ret = connect(fd, (struct sockaddr *)&addr, sizeof(struct sockaddr));
   21.42 +    ret = connect(fd, (struct sockaddr *)&addr, len);
   21.43      if (ret < 0) 
   21.44      {
   21.45          printf("error connecting to xcs!\n");
   21.46 @@ -50,7 +48,7 @@ int tcp_connect(char *ip, short port)
   21.47      return fd;
   21.48  }
   21.49  
   21.50 -void tcp_disconnect(int *fd)
   21.51 +void sock_disconnect(int *fd)
   21.52  {
   21.53      close(*fd);
   21.54      *fd = -1;
   21.55 @@ -91,7 +89,7 @@ int main(int argc, char* argv[])
   21.56          if ((strlen(argv[1]) >=2) && (strncmp(argv[1], "-v", 2) == 0))
   21.57              verbose = 1;
   21.58      
   21.59 -    ret = tcp_connect("127.0.0.1", XCS_TCP_PORT);
   21.60 +    ret = sock_connect(XCS_SUN_PATH);
   21.61      if (ret < 0) 
   21.62      {
   21.63          printf("connect failed!\n"); 
   21.64 @@ -109,7 +107,7 @@ int main(int argc, char* argv[])
   21.65          exit(-1);
   21.66      }
   21.67      
   21.68 -    ret = tcp_connect("127.0.0.1", XCS_TCP_PORT);
   21.69 +    ret = sock_connect(XCS_SUN_PATH);
   21.70      if (ret < 0) 
   21.71      {
   21.72          printf("connect failed!\n"); 
    22.1 --- a/xen/Rules.mk	Tue Mar 01 13:47:27 2005 +0000
    22.2 +++ b/xen/Rules.mk	Tue Mar 01 13:47:52 2005 +0000
    22.3 @@ -7,14 +7,14 @@ optimize    ?= y
    22.4  crash_debug ?= n
    22.5  
    22.6  # Currently supported architectures: x86_32, x86_64
    22.7 -COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/)
    22.8 -TARGET_ARCH     ?= $(COMPILE_ARCH)
    22.9 +XEN_COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/)
   22.10 +XEN_TARGET_ARCH     ?= $(XEN_COMPILE_ARCH)
   22.11  
   22.12  # Set ARCH/SUBARCH appropriately.
   22.13 -override COMPILE_SUBARCH := $(COMPILE_ARCH)
   22.14 -override TARGET_SUBARCH  := $(TARGET_ARCH)
   22.15 -override COMPILE_ARCH    := $(patsubst x86%,x86,$(COMPILE_ARCH))
   22.16 -override TARGET_ARCH     := $(patsubst x86%,x86,$(TARGET_ARCH))
   22.17 +override COMPILE_SUBARCH := $(XEN_COMPILE_ARCH)
   22.18 +override TARGET_SUBARCH  := $(XEN_TARGET_ARCH)
   22.19 +override COMPILE_ARCH    := $(patsubst x86%,x86,$(XEN_COMPILE_ARCH))
   22.20 +override TARGET_ARCH     := $(patsubst x86%,x86,$(XEN_TARGET_ARCH))
   22.21  
   22.22  TARGET  := $(BASEDIR)/xen
   22.23  HDRS    := $(wildcard $(BASEDIR)/include/xen/*.h)
    23.1 --- a/xen/arch/ia64/domain.c	Tue Mar 01 13:47:27 2005 +0000
    23.2 +++ b/xen/arch/ia64/domain.c	Tue Mar 01 13:47:52 2005 +0000
    23.3 @@ -470,8 +470,6 @@ void alloc_dom0(void)
    23.4  }
    23.5  
    23.6  int construct_dom0(struct domain *d, 
    23.7 -                   unsigned long alloc_start,
    23.8 -                   unsigned long alloc_end,
    23.9                     unsigned long image_start, unsigned long image_len, 
   23.10                     unsigned long initrd_start, unsigned long initrd_len,
   23.11                     char *cmdline)
    24.1 --- a/xen/arch/ia64/vcpu.c	Tue Mar 01 13:47:27 2005 +0000
    24.2 +++ b/xen/arch/ia64/vcpu.c	Tue Mar 01 13:47:52 2005 +0000
    24.3 @@ -1421,7 +1421,7 @@ extern struct domain *dom0;
    24.4  void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 logps)
    24.5  {
    24.6  	unsigned long psr;
    24.7 -	unsigned long ps = (vcpu==dom0) ? logps : PAGE_SHIFT;
    24.8 +	unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT;
    24.9  
   24.10  	// FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK!
   24.11  	// FIXME, must be inlined or potential for nested fault here!
    25.1 --- a/xen/arch/ia64/xensetup.c	Tue Mar 01 13:47:27 2005 +0000
    25.2 +++ b/xen/arch/ia64/xensetup.c	Tue Mar 01 13:47:52 2005 +0000
    25.3 @@ -341,8 +341,6 @@ printk("About to  process command line\n
    25.4  #ifdef IA64
    25.5  printk("About to call construct_dom0()\n");
    25.6      if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end,
    25.7 -                        0, 
    25.8 -                        0,
    25.9  			0,
   25.10                          0,
   25.11  			0) != 0)
   25.12 @@ -366,8 +364,6 @@ printk("CONSTRUCTING DOMAIN0 CLONE #%d\n
   25.13          if ( construct_dom0(clones[i], dom0_memory_start, dom0_memory_end,
   25.14                          0, 
   25.15                          0,
   25.16 -			0,
   25.17 -                        0,
   25.18  			0) != 0)
   25.19              panic("Could not set up DOM0 clone %d\n",i);
   25.20      }
    26.1 --- a/xen/arch/x86/boot/x86_32.S	Tue Mar 01 13:47:27 2005 +0000
    26.2 +++ b/xen/arch/x86/boot/x86_32.S	Tue Mar 01 13:47:52 2005 +0000
    26.3 @@ -15,9 +15,9 @@ ENTRY(start)
    26.4          /* Magic number indicating a Multiboot header. */
    26.5  	.long	0x1BADB002
    26.6  	/* Flags to bootloader (see Multiboot spec). */
    26.7 -	.long	0x00000002
    26.8 +	.long	0x00000003
    26.9  	/* Checksum: must be the negated sum of the first two fields. */
   26.10 -	.long	-0x1BADB004
   26.11 +	.long	-0x1BADB005
   26.12          
   26.13  bad_cpu_msg:
   26.14          .asciz "ERR: Not a P6-compatible CPU!"
    27.1 --- a/xen/arch/x86/boot/x86_64.S	Tue Mar 01 13:47:27 2005 +0000
    27.2 +++ b/xen/arch/x86/boot/x86_64.S	Tue Mar 01 13:47:52 2005 +0000
    27.3 @@ -16,9 +16,9 @@ ENTRY(start)
    27.4          /* Magic number indicating a Multiboot header. */
    27.5          .long   0x1BADB002
    27.6          /* Flags to bootloader (see Multiboot spec). */
    27.7 -        .long   0x00000002
    27.8 +        .long   0x00000003
    27.9          /* Checksum: must be the negated sum of the first two fields. */
   27.10 -        .long   -0x1BADB004
   27.11 +        .long   -0x1BADB005
   27.12  
   27.13          .org    0x010
   27.14          .asciz "ERR: Not a 64-bit CPU!"
    28.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.2 +++ b/xen/arch/x86/domain_build.c	Tue Mar 01 13:47:52 2005 +0000
    28.3 @@ -0,0 +1,545 @@
    28.4 +/******************************************************************************
    28.5 + * domain_build.c
    28.6 + * 
    28.7 + * Copyright (c) 2002-2005, K A Fraser
    28.8 + */
    28.9 +
   28.10 +#include <xen/config.h>
   28.11 +#include <xen/init.h>
   28.12 +#include <xen/lib.h>
   28.13 +#include <xen/sched.h>
   28.14 +#include <xen/smp.h>
   28.15 +#include <xen/delay.h>
   28.16 +#include <xen/event.h>
   28.17 +#include <xen/elf.h>
   28.18 +#include <xen/kernel.h>
   28.19 +#include <asm/regs.h>
   28.20 +#include <asm/system.h>
   28.21 +#include <asm/io.h>
   28.22 +#include <asm/processor.h>
   28.23 +#include <asm/desc.h>
   28.24 +#include <asm/i387.h>
   28.25 +#include <asm/shadow.h>
   28.26 +
   28.27 +/* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */
   28.28 +static unsigned int opt_dom0_mem = 0;
   28.29 +integer_param("dom0_mem", opt_dom0_mem);
   28.30 +
   28.31 +#if defined(__i386__)
   28.32 +/* No ring-3 access in initial leaf page tables. */
   28.33 +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
   28.34 +#elif defined(__x86_64__)
   28.35 +/* Allow ring-3 access in long mode as guest cannot use ring 1. */
   28.36 +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
   28.37 +#endif
   28.38 +/* Don't change these: Linux expects just these bits to be set. */
   28.39 +/* (And that includes the bogus _PAGE_DIRTY!) */
   28.40 +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
   28.41 +#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
   28.42 +#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
   28.43 +
   28.44 +#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
   28.45 +#define round_pgdown(_p)  ((_p)&PAGE_MASK)
   28.46 +
   28.47 +static struct pfn_info *alloc_largest(struct domain *d, unsigned long max)
   28.48 +{
   28.49 +    struct pfn_info *page;
   28.50 +    unsigned int order = get_order(max * PAGE_SIZE);
   28.51 +    if ( (max & (max-1)) != 0 )
   28.52 +        order--;
   28.53 +    while ( (page = alloc_domheap_pages(d, order)) == NULL )
   28.54 +        if ( order-- == 0 )
   28.55 +            break;
   28.56 +    return page;
   28.57 +}
   28.58 +
   28.59 +int construct_dom0(struct domain *d,
   28.60 +                   unsigned long _image_start, unsigned long image_len, 
   28.61 +                   unsigned long _initrd_start, unsigned long initrd_len,
   28.62 +                   char *cmdline)
   28.63 +{
   28.64 +    char *dst;
   28.65 +    int i, rc;
   28.66 +    unsigned long pfn, mfn;
   28.67 +    unsigned long nr_pages;
   28.68 +    unsigned long nr_pt_pages;
   28.69 +    unsigned long alloc_start;
   28.70 +    unsigned long alloc_end;
   28.71 +    unsigned long count;
   28.72 +    struct pfn_info *page = NULL;
   28.73 +    start_info_t *si;
   28.74 +    struct exec_domain *ed = d->exec_domain[0];
   28.75 +#if defined(__i386__)
   28.76 +    char *image_start  = (char *)_image_start;  /* use lowmem mappings */
   28.77 +    char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
   28.78 +#elif defined(__x86_64__)
   28.79 +    char *image_start  = __va(_image_start);
   28.80 +    char *initrd_start = __va(_initrd_start);
   28.81 +    l4_pgentry_t *l4tab = NULL, *l4start = NULL;
   28.82 +    l3_pgentry_t *l3tab = NULL, *l3start = NULL;
   28.83 +#endif
   28.84 +    l2_pgentry_t *l2tab = NULL, *l2start = NULL;
   28.85 +    l1_pgentry_t *l1tab = NULL, *l1start = NULL;
   28.86 +
   28.87 +    /*
   28.88 +     * This fully describes the memory layout of the initial domain. All 
   28.89 +     * *_start address are page-aligned, except v_start (and v_end) which are 
   28.90 +     * superpage-aligned.
   28.91 +     */
   28.92 +    struct domain_setup_info dsi;
   28.93 +    unsigned long vinitrd_start;
   28.94 +    unsigned long vinitrd_end;
   28.95 +    unsigned long vphysmap_start;
   28.96 +    unsigned long vphysmap_end;
   28.97 +    unsigned long vstartinfo_start;
   28.98 +    unsigned long vstartinfo_end;
   28.99 +    unsigned long vstack_start;
  28.100 +    unsigned long vstack_end;
  28.101 +    unsigned long vpt_start;
  28.102 +    unsigned long vpt_end;
  28.103 +    unsigned long v_end;
  28.104 +
  28.105 +    /* Machine address of next candidate page-table page. */
  28.106 +    unsigned long mpt_alloc;
  28.107 +
  28.108 +    extern void physdev_init_dom0(struct domain *);
  28.109 +
  28.110 +    /* Sanity! */
  28.111 +    if ( d->id != 0 ) 
  28.112 +        BUG();
  28.113 +    if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 
  28.114 +        BUG();
  28.115 +
  28.116 +    memset(&dsi, 0, sizeof(struct domain_setup_info));
  28.117 +
  28.118 +    printk("*** LOADING DOMAIN 0 ***\n");
  28.119 +
  28.120 +    /* By default DOM0 is allocated all available memory. */
  28.121 +    d->max_pages = ~0U;
  28.122 +    if ( (nr_pages = opt_dom0_mem >> (PAGE_SHIFT - 10)) == 0 )
  28.123 +        nr_pages = avail_domheap_pages() +
  28.124 +            ((initrd_len + PAGE_SIZE - 1) >> PAGE_SHIFT) +
  28.125 +            ((image_len  + PAGE_SIZE - 1) >> PAGE_SHIFT);
  28.126 +    if ( (page = alloc_largest(d, nr_pages)) == NULL )
  28.127 +        panic("Not enough RAM for DOM0 reservation.\n");
  28.128 +    alloc_start = page_to_phys(page);
  28.129 +    alloc_end   = alloc_start + (d->tot_pages << PAGE_SHIFT);
  28.130 +    
  28.131 +    rc = parseelfimage(image_start, image_len, &dsi);
  28.132 +    if ( rc != 0 )
  28.133 +        return rc;
  28.134 +
  28.135 +    /* Set up domain options */
  28.136 +    if ( dsi.use_writable_pagetables )
  28.137 +        vm_assist(d, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
  28.138 +
  28.139 +    /* Align load address to 4MB boundary. */
  28.140 +    dsi.v_start &= ~((1UL<<22)-1);
  28.141 +
  28.142 +    /*
  28.143 +     * Why do we need this? The number of page-table frames depends on the 
  28.144 +     * size of the bootstrap address space. But the size of the address space 
  28.145 +     * depends on the number of page-table frames (since each one is mapped 
  28.146 +     * read-only). We have a pair of simultaneous equations in two unknowns, 
  28.147 +     * which we solve by exhaustive search.
  28.148 +     */
  28.149 +    vinitrd_start    = round_pgup(dsi.v_kernend);
  28.150 +    vinitrd_end      = vinitrd_start + initrd_len;
  28.151 +    vphysmap_start   = round_pgup(vinitrd_end);
  28.152 +    vphysmap_end     = vphysmap_start + (nr_pages * sizeof(u32));
  28.153 +    vpt_start        = round_pgup(vphysmap_end);
  28.154 +    for ( nr_pt_pages = 2; ; nr_pt_pages++ )
  28.155 +    {
  28.156 +        vpt_end          = vpt_start + (nr_pt_pages * PAGE_SIZE);
  28.157 +        vstartinfo_start = vpt_end;
  28.158 +        vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
  28.159 +        vstack_start     = vstartinfo_end;
  28.160 +        vstack_end       = vstack_start + PAGE_SIZE;
  28.161 +        v_end            = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
  28.162 +        if ( (v_end - vstack_end) < (512UL << 10) )
  28.163 +            v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
  28.164 +#if defined(__i386__)
  28.165 +        if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> 
  28.166 +               L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
  28.167 +            break;
  28.168 +#elif defined(__x86_64__)
  28.169 +#define NR(_l,_h,_s) \
  28.170 +    (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
  28.171 +       ((_l) & ~((1UL<<(_s))-1))) >> (_s))
  28.172 +        if ( (1 + /* # L4 */
  28.173 +              NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
  28.174 +              NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
  28.175 +              NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT))  /* # L1 */
  28.176 +             <= nr_pt_pages )
  28.177 +            break;
  28.178 +#endif
  28.179 +    }
  28.180 +
  28.181 +    if ( (v_end - dsi.v_start) > (alloc_end - alloc_start) )
  28.182 +        panic("Insufficient contiguous RAM to build kernel image.\n");
  28.183 +
  28.184 +    printk("VIRTUAL MEMORY ARRANGEMENT:\n"
  28.185 +           " Loaded kernel: %p->%p\n"
  28.186 +           " Init. ramdisk: %p->%p\n"
  28.187 +           " Phys-Mach map: %p->%p\n"
  28.188 +           " Page tables:   %p->%p\n"
  28.189 +           " Start info:    %p->%p\n"
  28.190 +           " Boot stack:    %p->%p\n"
  28.191 +           " TOTAL:         %p->%p\n",
  28.192 +           dsi.v_kernstart, dsi.v_kernend, 
  28.193 +           vinitrd_start, vinitrd_end,
  28.194 +           vphysmap_start, vphysmap_end,
  28.195 +           vpt_start, vpt_end,
  28.196 +           vstartinfo_start, vstartinfo_end,
  28.197 +           vstack_start, vstack_end,
  28.198 +           dsi.v_start, v_end);
  28.199 +    printk(" ENTRY ADDRESS: %p\n", dsi.v_kernentry);
  28.200 +
  28.201 +    if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
  28.202 +    {
  28.203 +        printk("Initial guest OS requires too much space\n"
  28.204 +               "(%luMB is greater than %luMB limit)\n",
  28.205 +               (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
  28.206 +        return -ENOMEM;
  28.207 +    }
  28.208 +
  28.209 +    mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
  28.210 +
  28.211 +    SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES);
  28.212 +    SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS);
  28.213 +
  28.214 +    /*
  28.215 +     * We're basically forcing default RPLs to 1, so that our "what privilege
  28.216 +     * level are we returning to?" logic works.
  28.217 +     */
  28.218 +    ed->arch.failsafe_selector = FLAT_KERNEL_CS;
  28.219 +    ed->arch.event_selector    = FLAT_KERNEL_CS;
  28.220 +    ed->arch.kernel_ss = FLAT_KERNEL_SS;
  28.221 +    for ( i = 0; i < 256; i++ ) 
  28.222 +        ed->arch.traps[i].cs = FLAT_KERNEL_CS;
  28.223 +
  28.224 +#if defined(__i386__)
  28.225 +
  28.226 +    /*
  28.227 +     * Protect the lowest 1GB of memory. We use a temporary mapping there
  28.228 +     * from which we copy the kernel and ramdisk images.
  28.229 +     */
  28.230 +    if ( dsi.v_start < (1UL<<30) )
  28.231 +    {
  28.232 +        printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
  28.233 +        return -EINVAL;
  28.234 +    }
  28.235 +
  28.236 +    /* WARNING: The new domain must have its 'processor' field filled in! */
  28.237 +    l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
  28.238 +    memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
  28.239 +    l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
  28.240 +        mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
  28.241 +    l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
  28.242 +        mk_l2_pgentry(__pa(d->arch.mm_perdomain_pt) | __PAGE_HYPERVISOR);
  28.243 +    ed->arch.guest_table = mk_pagetable((unsigned long)l2start);
  28.244 +
  28.245 +    l2tab += l2_table_offset(dsi.v_start);
  28.246 +    mfn = alloc_start >> PAGE_SHIFT;
  28.247 +    for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
  28.248 +    {
  28.249 +        if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
  28.250 +        {
  28.251 +            l1start = l1tab = (l1_pgentry_t *)mpt_alloc; 
  28.252 +            mpt_alloc += PAGE_SIZE;
  28.253 +            *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
  28.254 +            clear_page(l1tab);
  28.255 +            if ( count == 0 )
  28.256 +                l1tab += l1_table_offset(dsi.v_start);
  28.257 +        }
  28.258 +        *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
  28.259 +        
  28.260 +        page = &frame_table[mfn];
  28.261 +        if ( !get_page_and_type(page, d, PGT_writable_page) )
  28.262 +            BUG();
  28.263 +
  28.264 +        mfn++;
  28.265 +    }
  28.266 +
  28.267 +    /* Pages that are part of page tables must be read only. */
  28.268 +    l2tab = l2start + l2_table_offset(vpt_start);
  28.269 +    l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
  28.270 +    l1tab += l1_table_offset(vpt_start);
  28.271 +    for ( count = 0; count < nr_pt_pages; count++ ) 
  28.272 +    {
  28.273 +        *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
  28.274 +        page = &frame_table[l1_pgentry_to_pfn(*l1tab)];
  28.275 +        if ( count == 0 )
  28.276 +        {
  28.277 +            page->u.inuse.type_info &= ~PGT_type_mask;
  28.278 +            page->u.inuse.type_info |= PGT_l2_page_table;
  28.279 +
  28.280 +            /*
  28.281 +             * No longer writable: decrement the type_count.
  28.282 +             * Installed as CR3: increment both the ref_count and type_count.
  28.283 +             * Net: just increment the ref_count.
  28.284 +             */
  28.285 +            get_page(page, d); /* an extra ref because of readable mapping */
  28.286 +
  28.287 +            /* Get another ref to L2 page so that it can be pinned. */
  28.288 +            if ( !get_page_and_type(page, d, PGT_l2_page_table) )
  28.289 +                BUG();
  28.290 +            set_bit(_PGT_pinned, &page->u.inuse.type_info);
  28.291 +        }
  28.292 +        else
  28.293 +        {
  28.294 +            page->u.inuse.type_info &= ~PGT_type_mask;
  28.295 +            page->u.inuse.type_info |= PGT_l1_page_table;
  28.296 +            page->u.inuse.type_info |= 
  28.297 +                ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift;
  28.298 +
  28.299 +            /*
  28.300 +             * No longer writable: decrement the type_count.
  28.301 +             * This is an L1 page, installed in a validated L2 page:
  28.302 +             * increment both the ref_count and type_count.
  28.303 +             * Net: just increment the ref_count.
  28.304 +             */
  28.305 +            get_page(page, d); /* an extra ref because of readable mapping */
  28.306 +        }
  28.307 +        if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
  28.308 +            l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*++l2tab);
  28.309 +    }
  28.310 +
  28.311 +#elif defined(__x86_64__)
  28.312 +
  28.313 +    /* Overlap with Xen protected area? */
  28.314 +    if ( (dsi.v_start < HYPERVISOR_VIRT_END) &&
  28.315 +         (v_end > HYPERVISOR_VIRT_START) )
  28.316 +    {
  28.317 +        printk("DOM0 image overlaps with Xen private area.\n");
  28.318 +        return -EINVAL;
  28.319 +    }
  28.320 +
  28.321 +    /* WARNING: The new domain must have its 'processor' field filled in! */
  28.322 +    phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
  28.323 +    l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
  28.324 +    memcpy(l4tab, &idle_pg_table[0], PAGE_SIZE);
  28.325 +    l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
  28.326 +        mk_l4_pgentry(__pa(l4start) | __PAGE_HYPERVISOR);
  28.327 +    l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
  28.328 +        mk_l4_pgentry(__pa(d->arch.mm_perdomain_l3) | __PAGE_HYPERVISOR);
  28.329 +    ed->arch.guest_table = mk_pagetable(__pa(l4start));
  28.330 +
  28.331 +    l4tab += l4_table_offset(dsi.v_start);
  28.332 +    mfn = alloc_start >> PAGE_SHIFT;
  28.333 +    for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
  28.334 +    {
  28.335 +        if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
  28.336 +        {
  28.337 +            phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l1_page_table;
  28.338 +            l1start = l1tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
  28.339 +            clear_page(l1tab);
  28.340 +            if ( count == 0 )
  28.341 +                l1tab += l1_table_offset(dsi.v_start);
  28.342 +            if ( !((unsigned long)l2tab & (PAGE_SIZE-1)) )
  28.343 +            {
  28.344 +                phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table;
  28.345 +                l2start = l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
  28.346 +                clear_page(l2tab);
  28.347 +                if ( count == 0 )
  28.348 +                    l2tab += l2_table_offset(dsi.v_start);
  28.349 +                if ( !((unsigned long)l3tab & (PAGE_SIZE-1)) )
  28.350 +                {
  28.351 +                    phys_to_page(mpt_alloc)->u.inuse.type_info =
  28.352 +                        PGT_l3_page_table;
  28.353 +                    l3start = l3tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
  28.354 +                    clear_page(l3tab);
  28.355 +                    if ( count == 0 )
  28.356 +                        l3tab += l3_table_offset(dsi.v_start);
  28.357 +                    *l4tab++ = mk_l4_pgentry(__pa(l3start) | L4_PROT);
  28.358 +                }
  28.359 +                *l3tab++ = mk_l3_pgentry(__pa(l2start) | L3_PROT);
  28.360 +            }
  28.361 +            *l2tab++ = mk_l2_pgentry(__pa(l1start) | L2_PROT);
  28.362 +        }
  28.363 +        *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
  28.364 +
  28.365 +        page = &frame_table[mfn];
  28.366 +        if ( (page->u.inuse.type_info == 0) &&
  28.367 +             !get_page_and_type(page, d, PGT_writable_page) )
  28.368 +            BUG();
  28.369 +
  28.370 +        mfn++;
  28.371 +    }
  28.372 +
  28.373 +    /* Pages that are part of page tables must be read only. */
  28.374 +    l4tab = l4start + l4_table_offset(vpt_start);
  28.375 +    l3start = l3tab = l4_pgentry_to_l3(*l4tab);
  28.376 +    l3tab += l3_table_offset(vpt_start);
  28.377 +    l2start = l2tab = l3_pgentry_to_l2(*l3tab);
  28.378 +    l2tab += l2_table_offset(vpt_start);
  28.379 +    l1start = l1tab = l2_pgentry_to_l1(*l2tab);
  28.380 +    l1tab += l1_table_offset(vpt_start);
  28.381 +    for ( count = 0; count < nr_pt_pages; count++ ) 
  28.382 +    {
  28.383 +        *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
  28.384 +        page = &frame_table[l1_pgentry_to_pfn(*l1tab)];
  28.385 +
  28.386 +        /* Read-only mapping + PGC_allocated + page-table page. */
  28.387 +        page->count_info         = PGC_allocated | 3;
  28.388 +        page->u.inuse.type_info |= PGT_validated | 1;
  28.389 +
  28.390 +        /* Top-level p.t. is pinned. */
  28.391 +        if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table )
  28.392 +        {
  28.393 +            page->count_info        += 1;
  28.394 +            page->u.inuse.type_info += 1 | PGT_pinned;
  28.395 +        }
  28.396 +
  28.397 +        /* Iterate. */
  28.398 +        if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
  28.399 +        {
  28.400 +            if ( !((unsigned long)++l2tab & (PAGE_SIZE - 1)) )
  28.401 +            {
  28.402 +                if ( !((unsigned long)++l3tab & (PAGE_SIZE - 1)) )
  28.403 +                    l3start = l3tab = l4_pgentry_to_l3(*++l4tab); 
  28.404 +                l2start = l2tab = l3_pgentry_to_l2(*l3tab);
  28.405 +            }
  28.406 +            l1start = l1tab = l2_pgentry_to_l1(*l2tab);
  28.407 +        }
  28.408 +    }
  28.409 +
  28.410 +#endif /* __x86_64__ */
  28.411 +
  28.412 +    /* Set up shared-info area. */
  28.413 +    update_dom_time(d);
  28.414 +    d->shared_info->domain_time = 0;
  28.415 +    /* Mask all upcalls... */
  28.416 +    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
  28.417 +        d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
  28.418 +    d->shared_info->n_vcpu = smp_num_cpus;
  28.419 +
  28.420 +    /* Set up shadow and monitor tables. */
  28.421 +    update_pagetables(ed);
  28.422 +
  28.423 +    /* Install the new page tables. */
  28.424 +    __cli();
  28.425 +    write_ptbase(ed);
  28.426 +
  28.427 +    /* Copy the OS image and free temporary buffer. */
  28.428 +    (void)loadelfimage(image_start);
  28.429 +    init_domheap_pages(
  28.430 +        _image_start, (_image_start+image_len+PAGE_SIZE-1) & PAGE_MASK);
  28.431 +
  28.432 +    /* Copy the initial ramdisk and free temporary buffer. */
  28.433 +    if ( initrd_len != 0 )
  28.434 +    {
  28.435 +        memcpy((void *)vinitrd_start, initrd_start, initrd_len);
  28.436 +        init_domheap_pages(
  28.437 +            _initrd_start, (_initrd_start+initrd_len+PAGE_SIZE-1) & PAGE_MASK);
  28.438 +    }
  28.439 +    
  28.440 +    /* Set up start info area. */
  28.441 +    si = (start_info_t *)vstartinfo_start;
  28.442 +    memset(si, 0, PAGE_SIZE);
  28.443 +    si->nr_pages     = nr_pages;
  28.444 +    si->shared_info  = virt_to_phys(d->shared_info);
  28.445 +    si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
  28.446 +    si->pt_base      = vpt_start;
  28.447 +    si->nr_pt_frames = nr_pt_pages;
  28.448 +    si->mfn_list     = vphysmap_start;
  28.449 +
  28.450 +    /* Write the phys->machine and machine->phys table entries. */
  28.451 +    for ( pfn = 0; pfn < d->tot_pages; pfn++ )
  28.452 +    {
  28.453 +        mfn = pfn + (alloc_start>>PAGE_SHIFT);
  28.454 +#ifndef NDEBUG
  28.455 +#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
  28.456 +        if ( pfn > REVERSE_START )
  28.457 +            mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
  28.458 +#endif
  28.459 +        ((u32 *)vphysmap_start)[pfn] = mfn;
  28.460 +        machine_to_phys_mapping[mfn] = pfn;
  28.461 +    }
  28.462 +    while ( pfn < nr_pages )
  28.463 +    {
  28.464 +        if ( (page = alloc_largest(d, nr_pages - d->tot_pages)) == NULL )
  28.465 +            panic("Not enough RAM for DOM0 reservation.\n");
  28.466 +        while ( pfn < d->tot_pages )
  28.467 +        {
  28.468 +            mfn = page_to_pfn(page);
  28.469 +#ifndef NDEBUG
  28.470 +#define pfn (nr_pages - 1 - (pfn - ((alloc_end - alloc_start) >> PAGE_SHIFT)))
  28.471 +#endif
  28.472 +            ((u32 *)vphysmap_start)[pfn] = mfn;
  28.473 +            machine_to_phys_mapping[mfn] = pfn;
  28.474 +#undef pfn
  28.475 +            page++; pfn++;
  28.476 +        }
  28.477 +    }
  28.478 +
  28.479 +    if ( initrd_len != 0 )
  28.480 +    {
  28.481 +        si->mod_start = vinitrd_start;
  28.482 +        si->mod_len   = initrd_len;
  28.483 +        printk("Initrd len 0x%lx, start at 0x%p\n",
  28.484 +               si->mod_len, si->mod_start);
  28.485 +    }
  28.486 +
  28.487 +    dst = si->cmd_line;
  28.488 +    if ( cmdline != NULL )
  28.489 +    {
  28.490 +        for ( i = 0; i < 255; i++ )
  28.491 +        {
  28.492 +            if ( cmdline[i] == '\0' )
  28.493 +                break;
  28.494 +            *dst++ = cmdline[i];
  28.495 +        }
  28.496 +    }
  28.497 +    *dst = '\0';
  28.498 +
  28.499 +    /* Reinstate the caller's page tables. */
  28.500 +    write_ptbase(current);
  28.501 +    __sti();
  28.502 +
  28.503 +#if defined(__i386__)
  28.504 +    /* Destroy low mappings - they were only for our convenience. */
  28.505 +    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
  28.506 +        if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
  28.507 +            l2start[i] = mk_l2_pgentry(0);
  28.508 +    zap_low_mappings(); /* Do the same for the idle page tables. */
  28.509 +#endif
  28.510 +    
  28.511 +    /* DOM0 gets access to everything. */
  28.512 +    physdev_init_dom0(d);
  28.513 +
  28.514 +    set_bit(DF_CONSTRUCTED, &d->d_flags);
  28.515 +
  28.516 +    new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
  28.517 +
  28.518 +    return 0;
  28.519 +}
  28.520 +
  28.521 +int elf_sanity_check(Elf_Ehdr *ehdr)
  28.522 +{
  28.523 +    if ( !IS_ELF(*ehdr) ||
  28.524 +#if defined(__i386__)
  28.525 +         (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
  28.526 +         (ehdr->e_machine != EM_386) ||
  28.527 +#elif defined(__x86_64__)
  28.528 +         (ehdr->e_ident[EI_CLASS] != ELFCLASS64) ||
  28.529 +         (ehdr->e_machine != EM_X86_64) ||
  28.530 +#endif
  28.531 +         (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
  28.532 +         (ehdr->e_type != ET_EXEC) )
  28.533 +    {
  28.534 +        printk("DOM0 image is not a Xen-compatible Elf image.\n");
  28.535 +        return 0;
  28.536 +    }
  28.537 +
  28.538 +    return 1;
  28.539 +}
  28.540 +
  28.541 +/*
  28.542 + * Local variables:
  28.543 + * mode: C
  28.544 + * c-set-style: "BSD"
  28.545 + * c-basic-offset: 4
  28.546 + * tab-width: 4
  28.547 + * indent-tabs-mode: nil
  28.548 + */
    29.1 --- a/xen/arch/x86/setup.c	Tue Mar 01 13:47:27 2005 +0000
    29.2 +++ b/xen/arch/x86/setup.c	Tue Mar 01 13:47:52 2005 +0000
    29.3 @@ -20,10 +20,6 @@
    29.4  #include <asm/shadow.h>
    29.5  #include <asm/e820.h>
    29.6  
    29.7 -/* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */
    29.8 -static unsigned int opt_dom0_mem = 16000;
    29.9 -integer_param("dom0_mem", opt_dom0_mem);
   29.10 -
   29.11  /*
   29.12   * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
   29.13   * pfn_info table and allocation bitmap.
   29.14 @@ -463,7 +459,6 @@ void __init __start_xen(multiboot_info_t
   29.15      module_t *mod = (module_t *)__va(mbi->mods_addr);
   29.16      void *heap_start;
   29.17      unsigned long firsthole_start, nr_pages;
   29.18 -    unsigned long dom0_memory_start, dom0_memory_end;
   29.19      unsigned long initial_images_start, initial_images_end;
   29.20      struct e820entry e820_raw[E820MAX];
   29.21      int i, e820_raw_nr = 0, bytes = 0;
   29.22 @@ -567,15 +562,6 @@ void __init __start_xen(multiboot_info_t
   29.23             nr_pages >> (20 - PAGE_SHIFT),
   29.24             nr_pages << (PAGE_SHIFT - 10));
   29.25  
   29.26 -    /* Allocate an aligned chunk of RAM for DOM0. */
   29.27 -    dom0_memory_start = alloc_boot_pages(opt_dom0_mem << 10, 4UL << 20);
   29.28 -    dom0_memory_end   = dom0_memory_start + (opt_dom0_mem << 10);
   29.29 -    if ( dom0_memory_start == 0 )
   29.30 -    {
   29.31 -        printk("Not enough memory for DOM0 memory reservation.\n");
   29.32 -        for ( ; ; ) ;
   29.33 -    }
   29.34 -
   29.35      init_frametable();
   29.36  
   29.37      end_boot_allocator();
   29.38 @@ -613,7 +599,7 @@ void __init __start_xen(multiboot_info_t
   29.39       * We're going to setup domain0 using the module(s) that we stashed safely
   29.40       * above our heap. The second module, if present, is an initrd ramdisk.
   29.41       */
   29.42 -    if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end,
   29.43 +    if ( construct_dom0(dom0,
   29.44                          initial_images_start, 
   29.45                          mod[0].mod_end-mod[0].mod_start,
   29.46                          (mbi->mods_count == 1) ? 0 :
   29.47 @@ -624,9 +610,7 @@ void __init __start_xen(multiboot_info_t
   29.48                          cmdline) != 0)
   29.49          panic("Could not set up DOM0 guest OS\n");
   29.50  
   29.51 -    /* The stash space for the initial kernel image can now be freed up. */
   29.52 -    init_domheap_pages(initial_images_start, initial_images_end);
   29.53 -
   29.54 +    /* Scrub RAM that is still free and so may go to an unprivileged domain. */
   29.55      scrub_heap_pages();
   29.56  
   29.57      init_trace_bufs();
    30.1 --- a/xen/arch/x86/x86_32/domain_build.c	Tue Mar 01 13:47:27 2005 +0000
    30.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.3 @@ -1,416 +0,0 @@
    30.4 -/******************************************************************************
    30.5 - * domain_build.c
    30.6 - * 
    30.7 - * Copyright (c) 2002-2005, K A Fraser
    30.8 - */
    30.9 -
   30.10 -#include <xen/config.h>
   30.11 -#include <xen/init.h>
   30.12 -#include <xen/lib.h>
   30.13 -#include <xen/sched.h>
   30.14 -#include <xen/smp.h>
   30.15 -#include <xen/delay.h>
   30.16 -#include <asm/regs.h>
   30.17 -#include <asm/system.h>
   30.18 -#include <asm/io.h>
   30.19 -#include <asm/processor.h>
   30.20 -#include <asm/desc.h>
   30.21 -#include <asm/i387.h>
   30.22 -#include <xen/event.h>
   30.23 -#include <xen/elf.h>
   30.24 -#include <xen/kernel.h>
   30.25 -#include <asm/shadow.h>
   30.26 -
   30.27 -/* No ring-3 access in initial page tables. */
   30.28 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
   30.29 -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
   30.30 -
   30.31 -#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
   30.32 -#define round_pgdown(_p)  ((_p)&PAGE_MASK)
   30.33 -
   30.34 -int construct_dom0(struct domain *d,
   30.35 -                   unsigned long alloc_start,
   30.36 -                   unsigned long alloc_end,
   30.37 -                   unsigned long _image_start, unsigned long image_len, 
   30.38 -                   unsigned long _initrd_start, unsigned long initrd_len,
   30.39 -                   char *cmdline)
   30.40 -{
   30.41 -    char *dst;
   30.42 -    int i, rc;
   30.43 -    unsigned long pfn, mfn;
   30.44 -    unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT;
   30.45 -    unsigned long nr_pt_pages;
   30.46 -    unsigned long count;
   30.47 -    l2_pgentry_t *l2tab, *l2start;
   30.48 -    l1_pgentry_t *l1tab = NULL, *l1start = NULL;
   30.49 -    struct pfn_info *page = NULL;
   30.50 -    start_info_t *si;
   30.51 -    struct exec_domain *ed = d->exec_domain[0];
   30.52 -    char *image_start  = (char *)_image_start;  /* use lowmem mappings */
   30.53 -    char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
   30.54 -
   30.55 -    /*
   30.56 -     * This fully describes the memory layout of the initial domain. All 
   30.57 -     * *_start address are page-aligned, except v_start (and v_end) which are 
   30.58 -     * superpage-aligned.
   30.59 -     */
   30.60 -    struct domain_setup_info dsi;
   30.61 -    unsigned long vinitrd_start;
   30.62 -    unsigned long vinitrd_end;
   30.63 -    unsigned long vphysmap_start;
   30.64 -    unsigned long vphysmap_end;
   30.65 -    unsigned long vstartinfo_start;
   30.66 -    unsigned long vstartinfo_end;
   30.67 -    unsigned long vstack_start;
   30.68 -    unsigned long vstack_end;
   30.69 -    unsigned long vpt_start;
   30.70 -    unsigned long vpt_end;
   30.71 -    unsigned long v_end;
   30.72 -
   30.73 -    /* Machine address of next candidate page-table page. */
   30.74 -    unsigned long mpt_alloc;
   30.75 -
   30.76 -    extern void physdev_init_dom0(struct domain *);
   30.77 -
   30.78 -    /* Sanity! */
   30.79 -    if ( d->id != 0 ) 
   30.80 -        BUG();
   30.81 -    if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 
   30.82 -        BUG();
   30.83 -
   30.84 -    memset(&dsi, 0, sizeof(struct domain_setup_info));
   30.85 -
   30.86 -    printk("*** LOADING DOMAIN 0 ***\n");
   30.87 -
   30.88 -    /*
   30.89 -     * This is all a bit grim. We've moved the modules to the "safe" physical 
   30.90 -     * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this 
   30.91 -     * routine we're going to copy it down into the region that's actually 
   30.92 -     * been allocated to domain 0. This is highly likely to be overlapping, so 
   30.93 -     * we use a forward copy.
   30.94 -     * 
   30.95 -     * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with 
   30.96 -     * 4GB and lots of network/disk cards that allocate loads of buffers. 
   30.97 -     * We'll have to revisit this if we ever support PAE (64GB).
   30.98 -     */
   30.99 -
  30.100 -    rc = parseelfimage(image_start, image_len, &dsi);
  30.101 -    if ( rc != 0 )
  30.102 -        return rc;
  30.103 -
  30.104 -    /* Set up domain options */
  30.105 -    if ( dsi.use_writable_pagetables )
  30.106 -        vm_assist(d, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
  30.107 -
  30.108 -    /* Align load address to 4MB boundary. */
  30.109 -    dsi.v_start &= ~((1UL<<22)-1);
  30.110 -
  30.111 -    /*
  30.112 -     * Why do we need this? The number of page-table frames depends on the 
  30.113 -     * size of the bootstrap address space. But the size of the address space 
  30.114 -     * depends on the number of page-table frames (since each one is mapped 
  30.115 -     * read-only). We have a pair of simultaneous equations in two unknowns, 
  30.116 -     * which we solve by exhaustive search.
  30.117 -     */
  30.118 -    vinitrd_start    = round_pgup(dsi.v_kernend);
  30.119 -    vinitrd_end      = vinitrd_start + initrd_len;
  30.120 -    vphysmap_start   = round_pgup(vinitrd_end);
  30.121 -    vphysmap_end     = vphysmap_start + (nr_pages * sizeof(u32));
  30.122 -    vpt_start        = round_pgup(vphysmap_end);
  30.123 -    for ( nr_pt_pages = 2; ; nr_pt_pages++ )
  30.124 -    {
  30.125 -        vpt_end          = vpt_start + (nr_pt_pages * PAGE_SIZE);
  30.126 -        vstartinfo_start = vpt_end;
  30.127 -        vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
  30.128 -        vstack_start     = vstartinfo_end;
  30.129 -        vstack_end       = vstack_start + PAGE_SIZE;
  30.130 -        v_end            = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
  30.131 -        if ( (v_end - vstack_end) < (512UL << 10) )
  30.132 -            v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
  30.133 -        if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> 
  30.134 -               L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
  30.135 -            break;
  30.136 -    }
  30.137 -
  30.138 -    printk("PHYSICAL MEMORY ARRANGEMENT:\n"
  30.139 -           " Kernel image:  %p->%p\n"
  30.140 -           " Initrd image:  %p->%p\n"
  30.141 -           " Dom0 alloc.:   %p->%p\n",
  30.142 -           _image_start, _image_start + image_len,
  30.143 -           _initrd_start, _initrd_start + initrd_len,
  30.144 -           alloc_start, alloc_end);
  30.145 -    printk("VIRTUAL MEMORY ARRANGEMENT:\n"
  30.146 -           " Loaded kernel: %p->%p\n"
  30.147 -           " Init. ramdisk: %p->%p\n"
  30.148 -           " Phys-Mach map: %p->%p\n"
  30.149 -           " Page tables:   %p->%p\n"
  30.150 -           " Start info:    %p->%p\n"
  30.151 -           " Boot stack:    %p->%p\n"
  30.152 -           " TOTAL:         %p->%p\n",
  30.153 -           dsi.v_kernstart, dsi.v_kernend, 
  30.154 -           vinitrd_start, vinitrd_end,
  30.155 -           vphysmap_start, vphysmap_end,
  30.156 -           vpt_start, vpt_end,
  30.157 -           vstartinfo_start, vstartinfo_end,
  30.158 -           vstack_start, vstack_end,
  30.159 -           dsi.v_start, v_end);
  30.160 -    printk(" ENTRY ADDRESS: %p\n", dsi.v_kernentry);
  30.161 -
  30.162 -    if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
  30.163 -    {
  30.164 -        printk("Initial guest OS requires too much space\n"
  30.165 -               "(%luMB is greater than %luMB limit)\n",
  30.166 -               (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
  30.167 -        return -ENOMEM;
  30.168 -    }
  30.169 -
  30.170 -    /*
  30.171 -     * Protect the lowest 1GB of memory. We use a temporary mapping there
  30.172 -     * from which we copy the kernel and ramdisk images.
  30.173 -     */
  30.174 -    if ( dsi.v_start < (1UL<<30) )
  30.175 -    {
  30.176 -        printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
  30.177 -        return -EINVAL;
  30.178 -    }
  30.179 -
  30.180 -    /* Paranoia: scrub DOM0's memory allocation. */
  30.181 -    printk("Scrubbing DOM0 RAM: ");
  30.182 -    dst = (char *)alloc_start;
  30.183 -    while ( dst < (char *)alloc_end )
  30.184 -    {
  30.185 -#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */
  30.186 -        printk(".");
  30.187 -        touch_nmi_watchdog();
  30.188 -        if ( ((char *)alloc_end - dst) > SCRUB_BYTES )
  30.189 -        {
  30.190 -            memset(dst, 0, SCRUB_BYTES);
  30.191 -            dst += SCRUB_BYTES;
  30.192 -        }
  30.193 -        else
  30.194 -        {
  30.195 -            memset(dst, 0, (char *)alloc_end - dst);
  30.196 -            break;
  30.197 -        }
  30.198 -    }
  30.199 -    printk("done.\n");
  30.200 -
  30.201 -    /* Construct a frame-allocation list for the initial domain. */
  30.202 -    for ( mfn = (alloc_start>>PAGE_SHIFT); 
  30.203 -          mfn < (alloc_end>>PAGE_SHIFT); 
  30.204 -          mfn++ )
  30.205 -    {
  30.206 -        page = &frame_table[mfn];
  30.207 -        page_set_owner(page, d);
  30.208 -        page->u.inuse.type_info = 0;
  30.209 -        page->count_info        = PGC_allocated | 1;
  30.210 -        list_add_tail(&page->list, &d->page_list);
  30.211 -        d->tot_pages++; d->max_pages++;
  30.212 -    }
  30.213 -
  30.214 -    mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
  30.215 -
  30.216 -    SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES);
  30.217 -    SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS);
  30.218 -
  30.219 -    /*
  30.220 -     * We're basically forcing default RPLs to 1, so that our "what privilege
  30.221 -     * level are we returning to?" logic works.
  30.222 -     */
  30.223 -    ed->arch.failsafe_selector = FLAT_KERNEL_CS;
  30.224 -    ed->arch.event_selector    = FLAT_KERNEL_CS;
  30.225 -    ed->arch.kernel_ss = FLAT_KERNEL_SS;
  30.226 -    for ( i = 0; i < 256; i++ ) 
  30.227 -        ed->arch.traps[i].cs = FLAT_KERNEL_CS;
  30.228 -
  30.229 -    /* WARNING: The new domain must have its 'processor' field filled in! */
  30.230 -    l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
  30.231 -    memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
  30.232 -    l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
  30.233 -        mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
  30.234 -    l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
  30.235 -        mk_l2_pgentry(__pa(d->arch.mm_perdomain_pt) | __PAGE_HYPERVISOR);
  30.236 -    ed->arch.guest_table = mk_pagetable((unsigned long)l2start);
  30.237 -
  30.238 -    l2tab += l2_table_offset(dsi.v_start);
  30.239 -    mfn = alloc_start >> PAGE_SHIFT;
  30.240 -    for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
  30.241 -    {
  30.242 -        if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
  30.243 -        {
  30.244 -            l1start = l1tab = (l1_pgentry_t *)mpt_alloc; 
  30.245 -            mpt_alloc += PAGE_SIZE;
  30.246 -            *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
  30.247 -            clear_page(l1tab);
  30.248 -            if ( count == 0 )
  30.249 -                l1tab += l1_table_offset(dsi.v_start);
  30.250 -        }
  30.251 -        *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
  30.252 -        
  30.253 -        page = &frame_table[mfn];
  30.254 -        if ( !get_page_and_type(page, d, PGT_writable_page) )
  30.255 -            BUG();
  30.256 -
  30.257 -        mfn++;
  30.258 -    }
  30.259 -
  30.260 -    /* Pages that are part of page tables must be read only. */
  30.261 -    l2tab = l2start + l2_table_offset(vpt_start);
  30.262 -    l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
  30.263 -    l1tab += l1_table_offset(vpt_start);
  30.264 -    for ( count = 0; count < nr_pt_pages; count++ ) 
  30.265 -    {
  30.266 -        *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
  30.267 -        page = &frame_table[l1_pgentry_to_pfn(*l1tab)];
  30.268 -        if ( count == 0 )
  30.269 -        {
  30.270 -            page->u.inuse.type_info &= ~PGT_type_mask;
  30.271 -            page->u.inuse.type_info |= PGT_l2_page_table;
  30.272 -
  30.273 -            /*
  30.274 -             * No longer writable: decrement the type_count.
  30.275 -             * Installed as CR3: increment both the ref_count and type_count.
  30.276 -             * Net: just increment the ref_count.
  30.277 -             */
  30.278 -            get_page(page, d); /* an extra ref because of readable mapping */
  30.279 -
  30.280 -            /* Get another ref to L2 page so that it can be pinned. */
  30.281 -            if ( !get_page_and_type(page, d, PGT_l2_page_table) )
  30.282 -                BUG();
  30.283 -            set_bit(_PGT_pinned, &page->u.inuse.type_info);
  30.284 -        }
  30.285 -        else
  30.286 -        {
  30.287 -            page->u.inuse.type_info &= ~PGT_type_mask;
  30.288 -            page->u.inuse.type_info |= PGT_l1_page_table;
  30.289 -	    page->u.inuse.type_info |= 
  30.290 -		((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift;
  30.291 -
  30.292 -            /*
  30.293 -             * No longer writable: decrement the type_count.
  30.294 -             * This is an L1 page, installed in a validated L2 page:
  30.295 -             * increment both the ref_count and type_count.
  30.296 -             * Net: just increment the ref_count.
  30.297 -             */
  30.298 -            get_page(page, d); /* an extra ref because of readable mapping */
  30.299 -        }
  30.300 -        if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
  30.301 -            l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*++l2tab);
  30.302 -    }
  30.303 -
  30.304 -    /* Set up shared-info area. */
  30.305 -    update_dom_time(d);
  30.306 -    d->shared_info->domain_time = 0;
  30.307 -    /* Mask all upcalls... */
  30.308 -    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
  30.309 -        d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
  30.310 -    d->shared_info->n_vcpu = smp_num_cpus;
  30.311 -
  30.312 -    /* setup shadow and monitor tables */
  30.313 -    update_pagetables(ed);
  30.314 -
  30.315 -    /* Install the new page tables. */
  30.316 -    __cli();
  30.317 -    write_ptbase(ed);
  30.318 -
  30.319 -    /* Copy the OS image. */
  30.320 -    (void)loadelfimage(image_start);
  30.321 -
  30.322 -    /* Copy the initial ramdisk. */
  30.323 -    if ( initrd_len != 0 )
  30.324 -        memcpy((void *)vinitrd_start, initrd_start, initrd_len);
  30.325 -    
  30.326 -    /* Set up start info area. */
  30.327 -    si = (start_info_t *)vstartinfo_start;
  30.328 -    memset(si, 0, PAGE_SIZE);
  30.329 -    si->nr_pages     = d->tot_pages;
  30.330 -    si->shared_info  = virt_to_phys(d->shared_info);
  30.331 -    si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
  30.332 -    si->pt_base      = vpt_start;
  30.333 -    si->nr_pt_frames = nr_pt_pages;
  30.334 -    si->mfn_list     = vphysmap_start;
  30.335 -
  30.336 -    /* Write the phys->machine and machine->phys table entries. */
  30.337 -    for ( pfn = 0; pfn < d->tot_pages; pfn++ )
  30.338 -    {
  30.339 -        mfn = pfn + (alloc_start>>PAGE_SHIFT);
  30.340 -#ifndef NDEBUG
  30.341 -#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
  30.342 -        if ( pfn > REVERSE_START )
  30.343 -            mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
  30.344 -#endif
  30.345 -        ((u32 *)vphysmap_start)[pfn] = mfn;
  30.346 -        machine_to_phys_mapping[mfn] = pfn;
  30.347 -    }
  30.348 -
  30.349 -    if ( initrd_len != 0 )
  30.350 -    {
  30.351 -        si->mod_start = vinitrd_start;
  30.352 -        si->mod_len   = initrd_len;
  30.353 -        printk("Initrd len 0x%lx, start at 0x%p\n",
  30.354 -               si->mod_len, si->mod_start);
  30.355 -    }
  30.356 -
  30.357 -    dst = si->cmd_line;
  30.358 -    if ( cmdline != NULL )
  30.359 -    {
  30.360 -        for ( i = 0; i < 255; i++ )
  30.361 -        {
  30.362 -            if ( cmdline[i] == '\0' )
  30.363 -                break;
  30.364 -            *dst++ = cmdline[i];
  30.365 -        }
  30.366 -    }
  30.367 -    *dst = '\0';
  30.368 -
  30.369 -    /* Reinstate the caller's page tables. */
  30.370 -    write_ptbase(current);
  30.371 -    __sti();
  30.372 -
  30.373 -    /* Destroy low mappings - they were only for our convenience. */
  30.374 -    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
  30.375 -        if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
  30.376 -            l2start[i] = mk_l2_pgentry(0);
  30.377 -    zap_low_mappings(); /* Do the same for the idle page tables. */
  30.378 -    
  30.379 -    /* DOM0 gets access to everything. */
  30.380 -    physdev_init_dom0(d);
  30.381 -
  30.382 -    set_bit(DF_CONSTRUCTED, &d->d_flags);
  30.383 -
  30.384 -    new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
  30.385 -
  30.386 -#ifndef NDEBUG
  30.387 -    if (0) /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */
  30.388 -    {
  30.389 -        shadow_mode_enable(d, SHM_enable); 
  30.390 -        update_pagetables(ed); /* XXX SMP */
  30.391 -    }
  30.392 -#endif
  30.393 -
  30.394 -    return 0;
  30.395 -}
  30.396 -
  30.397 -int elf_sanity_check(Elf_Ehdr *ehdr)
  30.398 -{
  30.399 -    if ( !IS_ELF(*ehdr) ||
  30.400 -         (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
  30.401 -         (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
  30.402 -         (ehdr->e_type != ET_EXEC) ||
  30.403 -         (ehdr->e_machine != EM_386) )
  30.404 -    {
  30.405 -        printk("DOM0 image is not i386-compatible executable Elf image.\n");
  30.406 -        return 0;
  30.407 -    }
  30.408 -
  30.409 -    return 1;
  30.410 -}
  30.411 -
  30.412 -/*
  30.413 - * Local variables:
  30.414 - * mode: C
  30.415 - * c-set-style: "BSD"
  30.416 - * c-basic-offset: 4
  30.417 - * tab-width: 4
  30.418 - * indent-tabs-mode: nil
  30.419 - */
    31.1 --- a/xen/arch/x86/x86_32/domain_page.c	Tue Mar 01 13:47:27 2005 +0000
    31.2 +++ b/xen/arch/x86/x86_32/domain_page.c	Tue Mar 01 13:47:52 2005 +0000
    31.3 @@ -45,7 +45,7 @@ void *map_domain_mem(unsigned long pa)
    31.4      unsigned int idx, cpu = smp_processor_id();
    31.5      unsigned long *cache = mapcache;
    31.6  #ifndef NDEBUG
    31.7 -    unsigned flush_count = 0;
    31.8 +    unsigned int flush_count = 0;
    31.9  #endif
   31.10  
   31.11      ASSERT(!in_irq());
   31.12 @@ -65,17 +65,11 @@ void *map_domain_mem(unsigned long pa)
   31.13          idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1);
   31.14          if ( unlikely(idx == 0) )
   31.15          {
   31.16 +            ASSERT(flush_count++ == 0);
   31.17              flush_all_ready_maps();
   31.18              perfc_incrc(domain_page_tlb_flush);
   31.19              local_flush_tlb();
   31.20              shadow_epoch[cpu] = ++epoch;
   31.21 -#ifndef NDEBUG
   31.22 -            if ( unlikely(flush_count++) )
   31.23 -            {
   31.24 -                // we've run out of map cache entries...
   31.25 -                BUG();
   31.26 -            }
   31.27 -#endif
   31.28          }
   31.29      }
   31.30      while ( cache[idx] != 0 );
    32.1 --- a/xen/arch/x86/x86_64/domain_build.c	Tue Mar 01 13:47:27 2005 +0000
    32.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.3 @@ -1,424 +0,0 @@
    32.4 -/******************************************************************************
    32.5 - * domain_build.c
    32.6 - * 
    32.7 - * Copyright (c) 2002-2005, K A Fraser
    32.8 - */
    32.9 -
   32.10 -#include <xen/config.h>
   32.11 -#include <xen/init.h>
   32.12 -#include <xen/lib.h>
   32.13 -#include <xen/sched.h>
   32.14 -#include <xen/smp.h>
   32.15 -#include <xen/delay.h>
   32.16 -#include <asm/regs.h>
   32.17 -#include <asm/system.h>
   32.18 -#include <asm/io.h>
   32.19 -#include <asm/processor.h>
   32.20 -#include <asm/shadow.h>
   32.21 -#include <asm/desc.h>
   32.22 -#include <asm/i387.h>
   32.23 -#include <xen/event.h>
   32.24 -#include <xen/elf.h>
   32.25 -#include <xen/kernel.h>
   32.26 -
   32.27 -/* Allow ring-3 access in long mode as guest cannot use ring 1. */
   32.28 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
   32.29 -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
   32.30 -#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
   32.31 -#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
   32.32 -
   32.33 -#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
   32.34 -#define round_pgdown(_p)  ((_p)&PAGE_MASK)
   32.35 -
   32.36 -int construct_dom0(struct domain *d,
   32.37 -                   unsigned long alloc_start,
   32.38 -                   unsigned long alloc_end,
   32.39 -                   unsigned long _image_start, unsigned long image_len, 
   32.40 -                   unsigned long _initrd_start, unsigned long initrd_len,
   32.41 -                   char *cmdline)
   32.42 -{
   32.43 -    char *dst;
   32.44 -    int i, rc;
   32.45 -    unsigned long pfn, mfn;
   32.46 -    unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT;
   32.47 -    unsigned long nr_pt_pages;
   32.48 -    unsigned long count;
   32.49 -    l4_pgentry_t *l4tab = NULL, *l4start = NULL;
   32.50 -    l3_pgentry_t *l3tab = NULL, *l3start = NULL;
   32.51 -    l2_pgentry_t *l2tab = NULL, *l2start = NULL;
   32.52 -    l1_pgentry_t *l1tab = NULL, *l1start = NULL;
   32.53 -    struct pfn_info *page = NULL;
   32.54 -    start_info_t *si;
   32.55 -    struct exec_domain *ed = d->exec_domain[0];
   32.56 -    char *image_start  = __va(_image_start);
   32.57 -    char *initrd_start = __va(_initrd_start);
   32.58 -
   32.59 -    /*
   32.60 -     * This fully describes the memory layout of the initial domain. All 
   32.61 -     * *_start address are page-aligned, except v_start (and v_end) which are 
   32.62 -     * superpage-aligned.
   32.63 -     */
   32.64 -    struct domain_setup_info dsi;
   32.65 -    unsigned long vinitrd_start;
   32.66 -    unsigned long vinitrd_end;
   32.67 -    unsigned long vphysmap_start;
   32.68 -    unsigned long vphysmap_end;
   32.69 -    unsigned long vstartinfo_start;
   32.70 -    unsigned long vstartinfo_end;
   32.71 -    unsigned long vstack_start;
   32.72 -    unsigned long vstack_end;
   32.73 -    unsigned long vpt_start;
   32.74 -    unsigned long vpt_end;
   32.75 -    unsigned long v_end;
   32.76 -
   32.77 -    /* Machine address of next candidate page-table page. */
   32.78 -    unsigned long mpt_alloc;
   32.79 -
   32.80 -    extern void physdev_init_dom0(struct domain *);
   32.81 -
   32.82 -    /* Sanity! */
   32.83 -    if ( d->id != 0 ) 
   32.84 -        BUG();
   32.85 -    if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) 
   32.86 -        BUG();
   32.87 -
   32.88 -    memset(&dsi, 0, sizeof(struct domain_setup_info));
   32.89 -
   32.90 -    printk("*** LOADING DOMAIN 0 ***\n");
   32.91 -
   32.92 -    /*
   32.93 -     * This is all a bit grim. We've moved the modules to the "safe" physical 
   32.94 -     * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this 
   32.95 -     * routine we're going to copy it down into the region that's actually 
   32.96 -     * been allocated to domain 0. This is highly likely to be overlapping, so 
   32.97 -     * we use a forward copy.
   32.98 -     * 
   32.99 -     * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with 
  32.100 -     * 4GB and lots of network/disk cards that allocate loads of buffers. 
  32.101 -     * We'll have to revisit this if we ever support PAE (64GB).
  32.102 -     */
  32.103 -
  32.104 -    rc = parseelfimage(image_start, image_len, &dsi);
  32.105 -    if ( rc != 0 )
  32.106 -        return rc;
  32.107 -
  32.108 -    /* Set up domain options */
  32.109 -    if ( dsi.use_writable_pagetables )
  32.110 -        vm_assist(d, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
  32.111 -
  32.112 -    /* Align load address to 4MB boundary. */
  32.113 -    dsi.v_start &= ~((1UL<<22)-1);
  32.114 -
  32.115 -    /*
  32.116 -     * Why do we need this? The number of page-table frames depends on the 
  32.117 -     * size of the bootstrap address space. But the size of the address space 
  32.118 -     * depends on the number of page-table frames (since each one is mapped 
  32.119 -     * read-only). We have a pair of simultaneous equations in two unknowns, 
  32.120 -     * which we solve by exhaustive search.
  32.121 -     */
  32.122 -    vinitrd_start    = round_pgup(dsi.v_kernend);
  32.123 -    vinitrd_end      = vinitrd_start + initrd_len;
  32.124 -    vphysmap_start   = round_pgup(vinitrd_end);
  32.125 -    vphysmap_end     = vphysmap_start + (nr_pages * sizeof(u32));
  32.126 -    vpt_start        = round_pgup(vphysmap_end);
  32.127 -    for ( nr_pt_pages = 2; ; nr_pt_pages++ )
  32.128 -    {
  32.129 -        vpt_end          = vpt_start + (nr_pt_pages * PAGE_SIZE);
  32.130 -        vstartinfo_start = vpt_end;
  32.131 -        vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
  32.132 -        vstack_start     = vstartinfo_end;
  32.133 -        vstack_end       = vstack_start + PAGE_SIZE;
  32.134 -        v_end            = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
  32.135 -        if ( (v_end - vstack_end) < (512UL << 10) )
  32.136 -            v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
  32.137 -#define NR(_l,_h,_s) \
  32.138 -    (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
  32.139 -       ((_l) & ~((1UL<<(_s))-1))) >> (_s))
  32.140 -        if ( (1 + /* # L4 */
  32.141 -              NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
  32.142 -              NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
  32.143 -              NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT))  /* # L1 */
  32.144 -             <= nr_pt_pages )
  32.145 -            break;
  32.146 -    }
  32.147 -
  32.148 -    printk("PHYSICAL MEMORY ARRANGEMENT:\n"
  32.149 -           " Kernel image:  %p->%p\n"
  32.150 -           " Initrd image:  %p->%p\n"
  32.151 -           " Dom0 alloc.:   %p->%p\n",
  32.152 -           _image_start, _image_start + image_len,
  32.153 -           _initrd_start, _initrd_start + initrd_len,
  32.154 -           alloc_start, alloc_end);
  32.155 -    printk("VIRTUAL MEMORY ARRANGEMENT:\n"
  32.156 -           " Loaded kernel: %p->%p\n"
  32.157 -           " Init. ramdisk: %p->%p\n"
  32.158 -           " Phys-Mach map: %p->%p\n"
  32.159 -           " Page tables:   %p->%p\n"
  32.160 -           " Start info:    %p->%p\n"
  32.161 -           " Boot stack:    %p->%p\n"
  32.162 -           " TOTAL:         %p->%p\n",
  32.163 -           dsi.v_kernstart, dsi.v_kernend, 
  32.164 -           vinitrd_start, vinitrd_end,
  32.165 -           vphysmap_start, vphysmap_end,
  32.166 -           vpt_start, vpt_end,
  32.167 -           vstartinfo_start, vstartinfo_end,
  32.168 -           vstack_start, vstack_end,
  32.169 -           dsi.v_start, v_end);
  32.170 -    printk(" ENTRY ADDRESS: %p\n", dsi.v_kernentry);
  32.171 -
  32.172 -    if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
  32.173 -    {
  32.174 -        printk("Initial guest OS requires too much space\n"
  32.175 -               "(%luMB is greater than %luMB limit)\n",
  32.176 -               (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
  32.177 -        return -ENOMEM;
  32.178 -    }
  32.179 -
  32.180 -    /* Overlap with Xen protected area? */
  32.181 -    if ( (dsi.v_start < HYPERVISOR_VIRT_END) &&
  32.182 -         (v_end > HYPERVISOR_VIRT_START) )
  32.183 -    {
  32.184 -        printk("DOM0 image overlaps with Xen private area.\n");
  32.185 -        return -EINVAL;
  32.186 -    }
  32.187 -
  32.188 -    /* Paranoia: scrub DOM0's memory allocation. */
  32.189 -    printk("Scrubbing DOM0 RAM: ");
  32.190 -    dst = __va(alloc_start);
  32.191 -    while ( __pa(dst) < alloc_end )
  32.192 -    {
  32.193 -#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */
  32.194 -        printk(".");
  32.195 -        touch_nmi_watchdog();
  32.196 -        if ( (alloc_end - __pa(dst)) > SCRUB_BYTES )
  32.197 -        {
  32.198 -            memset(dst, 0, SCRUB_BYTES);
  32.199 -            dst += SCRUB_BYTES;
  32.200 -        }
  32.201 -        else
  32.202 -        {
  32.203 -            memset(dst, 0, alloc_end - __pa(dst));
  32.204 -            break;
  32.205 -        }
  32.206 -    }
  32.207 -    printk("done.\n");
  32.208 -
  32.209 -    /* Construct a frame-allocation list for the initial domain. */
  32.210 -    for ( mfn = (alloc_start>>PAGE_SHIFT);
  32.211 -          mfn < (alloc_end>>PAGE_SHIFT);
  32.212 -          mfn++ )
  32.213 -    {
  32.214 -        page = &frame_table[mfn];
  32.215 -        page_set_owner(page, d);
  32.216 -        page->u.inuse.type_info = 0;
  32.217 -        page->count_info        = PGC_allocated | 1;
  32.218 -        list_add_tail(&page->list, &d->page_list);
  32.219 -        d->tot_pages++; d->max_pages++;
  32.220 -    }
  32.221 -
  32.222 -    mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
  32.223 -
  32.224 -    SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES);
  32.225 -    SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS);
  32.226 -
  32.227 -    /*
  32.228 -     * We're basically forcing default RPLs to 1, so that our "what privilege
  32.229 -     * level are we returning to?" logic works.
  32.230 -     */
  32.231 -    ed->arch.failsafe_selector = FLAT_KERNEL_CS;
  32.232 -    ed->arch.event_selector    = FLAT_KERNEL_CS;
  32.233 -    ed->arch.kernel_ss = FLAT_KERNEL_SS;
  32.234 -    for ( i = 0; i < 256; i++ ) 
  32.235 -        ed->arch.traps[i].cs = FLAT_KERNEL_CS;
  32.236 -
  32.237 -    /* WARNING: The new domain must have its 'processor' field filled in! */
  32.238 -    phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
  32.239 -    l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
  32.240 -    memcpy(l4tab, &idle_pg_table[0], PAGE_SIZE);
  32.241 -    l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
  32.242 -        mk_l4_pgentry(__pa(l4start) | __PAGE_HYPERVISOR);
  32.243 -    l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
  32.244 -        mk_l4_pgentry(__pa(d->arch.mm_perdomain_l3) | __PAGE_HYPERVISOR);
  32.245 -    ed->arch.guest_table = mk_pagetable(__pa(l4start));
  32.246 -
  32.247 -    l4tab += l4_table_offset(dsi.v_start);
  32.248 -    mfn = alloc_start >> PAGE_SHIFT;
  32.249 -    for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
  32.250 -    {
  32.251 -        if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
  32.252 -        {
  32.253 -            phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l1_page_table;
  32.254 -            l1start = l1tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
  32.255 -            clear_page(l1tab);
  32.256 -            if ( count == 0 )
  32.257 -                l1tab += l1_table_offset(dsi.v_start);
  32.258 -            if ( !((unsigned long)l2tab & (PAGE_SIZE-1)) )
  32.259 -            {
  32.260 -                phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table;
  32.261 -                l2start = l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
  32.262 -                clear_page(l2tab);
  32.263 -                if ( count == 0 )
  32.264 -                    l2tab += l2_table_offset(dsi.v_start);
  32.265 -                if ( !((unsigned long)l3tab & (PAGE_SIZE-1)) )
  32.266 -                {
  32.267 -                    phys_to_page(mpt_alloc)->u.inuse.type_info =
  32.268 -                        PGT_l3_page_table;
  32.269 -                    l3start = l3tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
  32.270 -                    clear_page(l3tab);
  32.271 -                    if ( count == 0 )
  32.272 -                        l3tab += l3_table_offset(dsi.v_start);
  32.273 -                    *l4tab++ = mk_l4_pgentry(__pa(l3start) | L4_PROT);
  32.274 -                }
  32.275 -                *l3tab++ = mk_l3_pgentry(__pa(l2start) | L3_PROT);
  32.276 -            }
  32.277 -            *l2tab++ = mk_l2_pgentry(__pa(l1start) | L2_PROT);
  32.278 -        }
  32.279 -        *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
  32.280 -
  32.281 -        page = &frame_table[mfn];
  32.282 -        if ( (page->u.inuse.type_info == 0) &&
  32.283 -             !get_page_and_type(page, d, PGT_writable_page) )
  32.284 -            BUG();
  32.285 -
  32.286 -        mfn++;
  32.287 -    }
  32.288 -
  32.289 -    /* Pages that are part of page tables must be read only. */
  32.290 -    l4tab = l4start + l4_table_offset(vpt_start);
  32.291 -    l3start = l3tab = l4_pgentry_to_l3(*l4tab);
  32.292 -    l3tab += l3_table_offset(vpt_start);
  32.293 -    l2start = l2tab = l3_pgentry_to_l2(*l3tab);
  32.294 -    l2tab += l2_table_offset(vpt_start);
  32.295 -    l1start = l1tab = l2_pgentry_to_l1(*l2tab);
  32.296 -    l1tab += l1_table_offset(vpt_start);
  32.297 -    for ( count = 0; count < nr_pt_pages; count++ ) 
  32.298 -    {
  32.299 -        *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
  32.300 -        page = &frame_table[l1_pgentry_to_pfn(*l1tab)];
  32.301 -
  32.302 -        /* Read-only mapping + PGC_allocated + page-table page. */
  32.303 -        page->count_info         = PGC_allocated | 3;
  32.304 -        page->u.inuse.type_info |= PGT_validated | 1;
  32.305 -
  32.306 -        /* Top-level p.t. is pinned. */
  32.307 -        if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table )
  32.308 -        {
  32.309 -            page->count_info        += 1;
  32.310 -            page->u.inuse.type_info += 1 | PGT_pinned;
  32.311 -        }
  32.312 -
  32.313 -        /* Iterate. */
  32.314 -        if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
  32.315 -        {
  32.316 -            if ( !((unsigned long)++l2tab & (PAGE_SIZE - 1)) )
  32.317 -            {
  32.318 -                if ( !((unsigned long)++l3tab & (PAGE_SIZE - 1)) )
  32.319 -                    l3start = l3tab = l4_pgentry_to_l3(*++l4tab); 
  32.320 -                l2start = l2tab = l3_pgentry_to_l2(*l3tab);
  32.321 -            }
  32.322 -            l1start = l1tab = l2_pgentry_to_l1(*l2tab);
  32.323 -        }
  32.324 -    }
  32.325 -
  32.326 -    /* Set up shared-info area. */
  32.327 -    update_dom_time(d);
  32.328 -    d->shared_info->domain_time = 0;
  32.329 -    /* Mask all upcalls... */
  32.330 -    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
  32.331 -        d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
  32.332 -    d->shared_info->n_vcpu = smp_num_cpus;
  32.333 -
  32.334 -    /* Set up shadow and monitor tables. */
  32.335 -    update_pagetables(ed);
  32.336 -
  32.337 -    /* Install the new page tables. */
  32.338 -    __cli();
  32.339 -    write_ptbase(ed);
  32.340 -
  32.341 -    /* Copy the OS image. */
  32.342 -    (void)loadelfimage(image_start);
  32.343 -
  32.344 -    /* Copy the initial ramdisk. */
  32.345 -    if ( initrd_len != 0 )
  32.346 -        memcpy((void *)vinitrd_start, initrd_start, initrd_len);
  32.347 -    
  32.348 -    /* Set up start info area. */
  32.349 -    si = (start_info_t *)vstartinfo_start;
  32.350 -    memset(si, 0, PAGE_SIZE);
  32.351 -    si->nr_pages     = d->tot_pages;
  32.352 -    si->shared_info  = virt_to_phys(d->shared_info);
  32.353 -    si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
  32.354 -    si->pt_base      = vpt_start;
  32.355 -    si->nr_pt_frames = nr_pt_pages;
  32.356 -    si->mfn_list     = vphysmap_start;
  32.357 -
  32.358 -    /* Write the phys->machine and machine->phys table entries. */
  32.359 -    for ( pfn = 0; pfn < d->tot_pages; pfn++ )
  32.360 -    {
  32.361 -        mfn = pfn + (alloc_start>>PAGE_SHIFT);
  32.362 -#ifndef NDEBUG
  32.363 -#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
  32.364 -        if ( pfn > REVERSE_START )
  32.365 -            mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
  32.366 -#endif
  32.367 -        ((u32 *)vphysmap_start)[pfn] = mfn;
  32.368 -        machine_to_phys_mapping[mfn] = pfn;
  32.369 -    }
  32.370 -
  32.371 -    if ( initrd_len != 0 )
  32.372 -    {
  32.373 -        si->mod_start = vinitrd_start;
  32.374 -        si->mod_len   = initrd_len;
  32.375 -        printk("Initrd len 0x%lx, start at 0x%p\n",
  32.376 -               si->mod_len, si->mod_start);
  32.377 -    }
  32.378 -
  32.379 -    dst = si->cmd_line;
  32.380 -    if ( cmdline != NULL )
  32.381 -    {
  32.382 -        for ( i = 0; i < 255; i++ )
  32.383 -        {
  32.384 -            if ( cmdline[i] == '\0' )
  32.385 -                break;
  32.386 -            *dst++ = cmdline[i];
  32.387 -        }
  32.388 -    }
  32.389 -    *dst = '\0';
  32.390 -
  32.391 -    /* Reinstate the caller's page tables. */
  32.392 -    write_ptbase(current);
  32.393 -    __sti();
  32.394 -
  32.395 -    /* DOM0 gets access to everything. */
  32.396 -    physdev_init_dom0(d);
  32.397 -
  32.398 -    set_bit(DF_CONSTRUCTED, &d->d_flags);
  32.399 -
  32.400 -    new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
  32.401 -
  32.402 -    return 0;
  32.403 -}
  32.404 -
  32.405 -int elf_sanity_check(Elf_Ehdr *ehdr)
  32.406 -{
  32.407 -    if ( !IS_ELF(*ehdr) ||
  32.408 -         (ehdr->e_ident[EI_CLASS] != ELFCLASS64) ||
  32.409 -         (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
  32.410 -         (ehdr->e_type != ET_EXEC) ||
  32.411 -         (ehdr->e_machine != EM_X86_64) )
  32.412 -    {
  32.413 -        printk("DOM0 image is not x86/64-compatible executable Elf image.\n");
  32.414 -        return 0;
  32.415 -    }
  32.416 -
  32.417 -    return 1;
  32.418 -}
  32.419 -
  32.420 -/*
  32.421 - * Local variables:
  32.422 - * mode: C
  32.423 - * c-set-style: "BSD"
  32.424 - * c-basic-offset: 4
  32.425 - * tab-width: 4
  32.426 - * indent-tabs-mode: nil
  32.427 - */
    33.1 --- a/xen/common/elf.c	Tue Mar 01 13:47:27 2005 +0000
    33.2 +++ b/xen/common/elf.c	Tue Mar 01 13:47:52 2005 +0000
    33.3 @@ -76,9 +76,9 @@ int parseelfimage(char *elfbase,
    33.4              return -EINVAL;
    33.5          }
    33.6  
    33.7 -        if ( (strstr(guestinfo, "XEN_VER=2.0") == NULL) )
    33.8 +        if ( (strstr(guestinfo, "XEN_VER=3.0") == NULL) )
    33.9          {
   33.10 -            printk("ERROR: Xen will only load images built for Xen v2.0\n");
   33.11 +            printk("ERROR: Xen will only load images built for Xen v3.0\n");
   33.12              return -EINVAL;
   33.13          }
   33.14  
    34.1 --- a/xen/common/page_alloc.c	Tue Mar 01 13:47:27 2005 +0000
    34.2 +++ b/xen/common/page_alloc.c	Tue Mar 01 13:47:52 2005 +0000
    34.3 @@ -203,8 +203,8 @@ unsigned long alloc_boot_pages(unsigned 
    34.4  #define MEMZONE_DOM 1
    34.5  #define NR_ZONES    2
    34.6  
    34.7 -/* Up to 2^10 pages can be allocated at once. */
    34.8 -#define MAX_ORDER 10
    34.9 +/* Up to 2^20 pages can be allocated at once. */
   34.10 +#define MAX_ORDER 20
   34.11  static struct list_head heap[NR_ZONES][MAX_ORDER+1];
   34.12  
   34.13  static unsigned long avail[NR_ZONES];
    35.1 --- a/xen/drivers/char/console.c	Tue Mar 01 13:47:27 2005 +0000
    35.2 +++ b/xen/drivers/char/console.c	Tue Mar 01 13:47:52 2005 +0000
    35.3 @@ -577,6 +577,8 @@ static int __init debugtrace_init(void)
    35.4      debugtrace_buf = (unsigned char *)alloc_xenheap_pages(order);
    35.5      ASSERT(debugtrace_buf != NULL);
    35.6  
    35.7 +    memset(debugtrace_buf, '\0', debugtrace_bytes);
    35.8 +
    35.9      return 0;
   35.10  }
   35.11  __initcall(debugtrace_init);
    36.1 --- a/xen/include/asm-x86/shadow.h	Tue Mar 01 13:47:27 2005 +0000
    36.2 +++ b/xen/include/asm-x86/shadow.h	Tue Mar 01 13:47:52 2005 +0000
    36.3 @@ -13,17 +13,20 @@
    36.4  #define PSH_hl2         (1<<30) /* page is an hl2 */
    36.5  #define PSH_pfn_mask    ((1<<21)-1)
    36.6  
    36.7 -/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
    36.8 -
    36.9 +/* Shadow PT operation mode: shadow-mode variable in arch_domain. */
   36.10  #define SHM_enable    (1<<0) /* we're in one of the shadow modes */
   36.11  #define SHM_log_dirty (1<<1) /* enable log dirty mode */
   36.12 -#define SHM_translate (1<<2) /* do p2m tranaltion on guest tables */
   36.13 +#define SHM_translate (1<<2) /* do p2m translation on guest tables */
   36.14  #define SHM_external  (1<<3) /* external page table, not used by Xen */
   36.15  
   36.16  #define shadow_mode_enabled(_d)   ((_d)->arch.shadow_mode)
   36.17  #define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty)
   36.18  #define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate)
   36.19 +#ifndef __x86_64__ /* XXX Currently breaks the 64-bit build. */
   36.20  #define shadow_mode_external(_d)  ((_d)->arch.shadow_mode & SHM_external)
   36.21 +#else
   36.22 +#define shadow_mode_external(_d)  (0)
   36.23 +#endif
   36.24  
   36.25  #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
   36.26  #define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
   36.27 @@ -804,6 +807,10 @@ static inline void update_pagetables(str
   36.28  
   36.29      if ( !shadow_mode_external(d) )
   36.30      {
   36.31 +        /*
   36.32 +         * Internal page tables:
   36.33 +         * No need to allocate a separate page table for Xen.
   36.34 +         */
   36.35  #ifdef __x86_64__
   36.36          if ( !(ed->arch.flags & TF_kernel_mode) )
   36.37              ed->arch.monitor_table = ed->arch.guest_table_user;
   36.38 @@ -816,9 +823,10 @@ static inline void update_pagetables(str
   36.39      }
   36.40      else
   36.41      {
   36.42 -        // External page tables...
   36.43 -        // Allocate a monitor page table if we don't already have one.
   36.44 -        //
   36.45 +        /*
   36.46 +         * External page tables:
   36.47 +         * Allocate a monitor page table if we don't already have one.
   36.48 +         */
   36.49          if ( unlikely(!pagetable_val(ed->arch.monitor_table)) )
   36.50              ed->arch.monitor_table =
   36.51                  mk_pagetable(alloc_monitor_pagetable(ed) << PAGE_SHIFT);
    37.1 --- a/xen/include/xen/sched.h	Tue Mar 01 13:47:27 2005 +0000
    37.2 +++ b/xen/include/xen/sched.h	Tue Mar 01 13:47:52 2005 +0000
    37.3 @@ -215,12 +215,11 @@ static inline void get_knownalive_domain
    37.4    
    37.5  extern struct domain *do_createdomain(
    37.6      domid_t dom_id, unsigned int cpu);
    37.7 -extern int construct_dom0(struct domain *d, 
    37.8 -                          unsigned long alloc_start,
    37.9 -                          unsigned long alloc_end,
   37.10 -                          unsigned long image_start, unsigned long image_len, 
   37.11 -                          unsigned long initrd_start, unsigned long initrd_len,
   37.12 -                          char *cmdline);
   37.13 +extern int construct_dom0(
   37.14 +    struct domain *d,
   37.15 +    unsigned long image_start, unsigned long image_len, 
   37.16 +    unsigned long initrd_start, unsigned long initrd_len,
   37.17 +    char *cmdline);
   37.18  extern int final_setup_guest(struct domain *d, dom0_builddomain_t *);
   37.19  
   37.20  struct domain *find_domain_by_id(domid_t dom);