ia64/xen-unstable

changeset 15074:23c4790512db

xen: Big changes to x86 start-of-day:

1. x86/64 Xen now relocates itself to physical high memory. This is
useful if we have devices that need very low memory, or if in
future we want to grant a 1:1 mapping of low physical memory to a
special 'native client domain'.

2. We now only map low 16MB RAM statically. All other RAM is mapped
dynamically within the constraints of the e820 map. It is
recommended never to map MMIO regions, and this change means that
Xen now obeys this constraint.

3. The CPU bootup trampoline is now permanently installed at
0x90000. This is necessary prereq for CPU hotplug.

4. Start-of-day asm is generally cleaned up and diff between x86/32
and x86/64 is reduced.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu May 10 18:02:55 2007 +0100 (2007-05-10)
parents 07b1e917c9d8
children dfbbb4d3b0dd
files xen/arch/ia64/xen/mm.c xen/arch/x86/Makefile xen/arch/x86/boot/Makefile xen/arch/x86/boot/head.S xen/arch/x86/boot/trampoline.S xen/arch/x86/boot/x86_32.S xen/arch/x86/boot/x86_64.S xen/arch/x86/e820.c xen/arch/x86/mm.c xen/arch/x86/setup.c xen/arch/x86/smpboot.c xen/arch/x86/trampoline.S xen/arch/x86/traps.c xen/arch/x86/x86_32/gpr_switch.S xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_64/compat_kexec.S xen/arch/x86/x86_64/gpr_switch.S xen/arch/x86/x86_64/mm.c xen/arch/x86/x86_64/traps.c xen/arch/x86/x86_64/xen.lds.S xen/common/grant_table.c xen/common/page_alloc.c xen/include/asm-ia64/mm.h xen/include/asm-powerpc/mm.h xen/include/asm-x86/config.h xen/include/asm-x86/mm.h xen/include/asm-x86/page.h xen/include/asm-x86/x86_32/page.h xen/include/asm-x86/x86_64/page.h
line diff
     1.1 --- a/xen/arch/ia64/xen/mm.c	Thu May 10 16:22:27 2007 +0100
     1.2 +++ b/xen/arch/ia64/xen/mm.c	Thu May 10 18:02:55 2007 +0100
     1.3 @@ -2118,7 +2118,7 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
     1.4  
     1.5          /* Remove previously mapped page if it was present. */
     1.6          if (prev_mfn && mfn_valid(prev_mfn)) {
     1.7 -            if (IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn)))
     1.8 +            if (is_xen_heap_frame(mfn_to_page(prev_mfn)))
     1.9                  /* Xen heap frames are simply unhooked from this phys slot. */
    1.10                  guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
    1.11              else
     2.1 --- a/xen/arch/x86/Makefile	Thu May 10 16:22:27 2007 +0100
     2.2 +++ b/xen/arch/x86/Makefile	Thu May 10 18:02:55 2007 +0100
     2.3 @@ -39,7 +39,6 @@ obj-y += srat.o
     2.4  obj-y += string.o
     2.5  obj-y += sysctl.o
     2.6  obj-y += time.o
     2.7 -obj-y += trampoline.o
     2.8  obj-y += traps.o
     2.9  obj-y += usercopy.o
    2.10  obj-y += x86_emulate.o
    2.11 @@ -52,20 +51,19 @@ obj-$(crash_debug) += gdbstub.o
    2.12  	./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000 \
    2.13  	`$(NM) -nr $(TARGET)-syms | head -n 1 | sed -e 's/^\([^ ]*\).*/0x\1/'`
    2.14  
    2.15 -$(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) xen.lds
    2.16 +ALL_OBJS := $(BASEDIR)/arch/x86/boot/built_in.o $(ALL_OBJS)
    2.17 +
    2.18 +$(TARGET)-syms: $(ALL_OBJS) xen.lds
    2.19  	$(MAKE) -f $(BASEDIR)/Rules.mk $(BASEDIR)/common/symbols-dummy.o
    2.20 -	$(LD) $(LDFLAGS) -T xen.lds -N \
    2.21 -	    boot/$(TARGET_SUBARCH).o $(ALL_OBJS) \
    2.22 +	$(LD) $(LDFLAGS) -T xen.lds -N $(ALL_OBJS) \
    2.23  	    $(BASEDIR)/common/symbols-dummy.o -o $(@D)/.$(@F).0
    2.24  	$(NM) -n $(@D)/.$(@F).0 | $(BASEDIR)/tools/symbols >$(@D)/.$(@F).0.S
    2.25  	$(MAKE) -f $(BASEDIR)/Rules.mk $(@D)/.$(@F).0.o
    2.26 -	$(LD) $(LDFLAGS) -T xen.lds -N \
    2.27 -	    boot/$(TARGET_SUBARCH).o $(ALL_OBJS) \
    2.28 +	$(LD) $(LDFLAGS) -T xen.lds -N $(ALL_OBJS) \
    2.29  	    $(@D)/.$(@F).0.o -o $(@D)/.$(@F).1
    2.30  	$(NM) -n $(@D)/.$(@F).1 | $(BASEDIR)/tools/symbols >$(@D)/.$(@F).1.S
    2.31  	$(MAKE) -f $(BASEDIR)/Rules.mk $(@D)/.$(@F).1.o
    2.32 -	$(LD) $(LDFLAGS) -T xen.lds -N \
    2.33 -	    boot/$(TARGET_SUBARCH).o $(ALL_OBJS) \
    2.34 +	$(LD) $(LDFLAGS) -T xen.lds -N $(ALL_OBJS) \
    2.35  	    $(@D)/.$(@F).1.o -o $@
    2.36  	rm -f $(@D)/.$(@F).[0-9]*
    2.37  
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/xen/arch/x86/boot/Makefile	Thu May 10 18:02:55 2007 +0100
     3.3 @@ -0,0 +1,3 @@
     3.4 +obj-y += head.o
     3.5 +
     3.6 +head.o: head.S trampoline.S $(TARGET_SUBARCH).S
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/xen/arch/x86/boot/head.S	Thu May 10 18:02:55 2007 +0100
     4.3 @@ -0,0 +1,180 @@
     4.4 +#include <xen/config.h>
     4.5 +#include <xen/multiboot.h>
     4.6 +#include <public/xen.h>
     4.7 +#include <asm/asm_defns.h>
     4.8 +#include <asm/desc.h>
     4.9 +#include <asm/page.h>
    4.10 +#include <asm/msr.h>
    4.11 +
    4.12 +        .text
    4.13 +        .code32
    4.14 +
    4.15 +#define SYM_PHYS(sym)       ((sym) - __XEN_VIRT_START)
    4.16 +#define SYM_TRAMP_PHYS(sym) ((sym) - trampoline_start + BOOT_TRAMPOLINE)
    4.17 +
    4.18 +#define TRAMP_CS32 0x0008
    4.19 +#define TRAMP_CS64 0x0010
    4.20 +#define TRAMP_DS   0x0018
    4.21 +
    4.22 +ENTRY(start)
    4.23 +        jmp     __start
    4.24 +
    4.25 +        .align 4
    4.26 +/*** MULTIBOOT HEADER ****/
    4.27 +#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_HEADER_MODS_ALIGNED | \
    4.28 +                                MULTIBOOT_HEADER_WANT_MEMORY)
    4.29 +        /* Magic number indicating a Multiboot header. */
    4.30 +        .long   MULTIBOOT_HEADER_MAGIC
    4.31 +        /* Flags to bootloader (see Multiboot spec). */
    4.32 +        .long   MULTIBOOT_HEADER_FLAGS
    4.33 +        /* Checksum: must be the negated sum of the first two fields. */
    4.34 +        .long   -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
    4.35 +
    4.36 +.Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
    4.37 +.Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
    4.38 +
    4.39 +bad_cpu:
    4.40 +        mov     $(SYM_PHYS(.Lbad_cpu_msg)),%esi # Error message
    4.41 +        jmp     print_err
    4.42 +not_multiboot:
    4.43 +        mov     $(SYM_PHYS(.Lbad_ldr_msg)),%esi # Error message
    4.44 +print_err:
    4.45 +        mov     $0xB8000,%edi  # VGA framebuffer
    4.46 +1:      mov     (%esi),%bl
    4.47 +        test    %bl,%bl        # Terminate on '\0' sentinel
    4.48 +2:      je      2b
    4.49 +        mov     $0x3f8+5,%dx   # UART Line Status Register
    4.50 +3:      in      %dx,%al
    4.51 +        test    $0x20,%al      # Test THR Empty flag
    4.52 +        je      3b
    4.53 +        mov     $0x3f8+0,%dx   # UART Transmit Holding Register
    4.54 +        mov     %bl,%al
    4.55 +        out     %al,%dx        # Send a character over the serial line
    4.56 +        movsb                  # Write a character to the VGA framebuffer
    4.57 +        mov     $7,%al
    4.58 +        stosb                  # Write an attribute to the VGA framebuffer
    4.59 +        jmp     1b
    4.60 +
    4.61 +gdt_boot_descr:
    4.62 +        .word   4*8-1
    4.63 +        .long   SYM_PHYS(trampoline_gdt)
    4.64 +
    4.65 +__start:
    4.66 +        cld
    4.67 +        cli
    4.68 +
    4.69 +        /* Initialise GDT and basic data segments. */
    4.70 +        lgdt    %cs:SYM_PHYS(gdt_boot_descr)
    4.71 +        mov     $TRAMP_DS,%ecx
    4.72 +        mov     %ecx,%ds
    4.73 +        mov     %ecx,%es
    4.74 +
    4.75 +        /* Check for Multiboot bootloader */
    4.76 +        cmp     $0x2BADB002,%eax
    4.77 +        jne     not_multiboot
    4.78 +
    4.79 +        /* Save the Multiboot info structure for later use. */
    4.80 +        mov     %ebx,SYM_PHYS(multiboot_ptr)
    4.81 +
    4.82 +        /* Initialize BSS (no nasty surprises!) */
    4.83 +        mov     $SYM_PHYS(__bss_start),%edi
    4.84 +        mov     $SYM_PHYS(_end),%ecx
    4.85 +        sub     %edi,%ecx
    4.86 +        xor     %eax,%eax
    4.87 +        rep     stosb
    4.88 +
    4.89 +        /* Interrogate CPU extended features via CPUID. */
    4.90 +        mov     $0x80000000,%eax
    4.91 +        cpuid
    4.92 +        xor     %edx,%edx
    4.93 +        cmp     $0x80000000,%eax    # any function > 0x80000000?
    4.94 +        jbe     1f
    4.95 +        mov     $0x80000001,%eax
    4.96 +        cpuid
    4.97 +1:      mov     %edx,SYM_PHYS(cpuid_ext_features)
    4.98 +
    4.99 +#if defined(__x86_64__)
   4.100 +        /* Check for availability of long mode. */
   4.101 +        bt      $29,%edx
   4.102 +        jnc     bad_cpu
   4.103 +        /* Initialise L2 identity-map and xen page table entries (16MB). */
   4.104 +        mov     $SYM_PHYS(l2_identmap),%edi
   4.105 +        mov     $SYM_PHYS(l2_xenmap),%esi
   4.106 +        mov     $0x1e3,%eax                  /* PRESENT+RW+A+D+2MB+GLOBAL */
   4.107 +        mov     $8,%ecx
   4.108 +1:      mov     %eax,(%edi)
   4.109 +        add     $8,%edi
   4.110 +        mov     %eax,(%esi)
   4.111 +        add     $8,%esi
   4.112 +        add     $(1<<L2_PAGETABLE_SHIFT),%eax
   4.113 +        loop    1b
   4.114 +        /* Initialise L3 identity-map page directory entries. */
   4.115 +        mov     $SYM_PHYS(l3_identmap),%edi
   4.116 +        mov     $(SYM_PHYS(l2_identmap)+7),%eax
   4.117 +        mov     $4,%ecx
   4.118 +1:      mov     %eax,(%edi)
   4.119 +        add     $8,%edi
   4.120 +        add     $PAGE_SIZE,%eax
   4.121 +        loop    1b
   4.122 +        /* Initialise L3 xen-map page directory entry. */
   4.123 +        mov     $(SYM_PHYS(l2_xenmap)+7),%eax
   4.124 +        mov     %eax,SYM_PHYS(l3_xenmap) + (50*8)
   4.125 +        /* Hook indentity-map and xen-map L3 tables into PML4. */
   4.126 +        mov     $(SYM_PHYS(l3_identmap)+7),%eax
   4.127 +        mov     %eax,SYM_PHYS(idle_pg_table) + (  0*8) /* PML4[  0]: 1:1 map */
   4.128 +        mov     %eax,SYM_PHYS(idle_pg_table) + (262*8) /* PML4[262]: 1:1 map */
   4.129 +        mov     $(SYM_PHYS(l3_xenmap)+7),%eax
   4.130 +        mov     %eax,SYM_PHYS(idle_pg_table) + (261*8) /* PML4[261]: xen map */
   4.131 +#elif defined(CONFIG_X86_PAE)
   4.132 +        /* Initialize low and high mappings of memory with 2MB pages */
   4.133 +        mov     $SYM_PHYS(idle_pg_table_l2),%edi
   4.134 +        mov     $0xe3,%eax                   /* PRESENT+RW+A+D+2MB */
   4.135 +1:      mov     %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
   4.136 +        stosl                                /* low mapping */
   4.137 +        add     $4,%edi
   4.138 +        add     $(1<<L2_PAGETABLE_SHIFT),%eax
   4.139 +        cmp     $DIRECTMAP_PHYS_END+0xe3,%eax
   4.140 +        jne     1b
   4.141 +1:      stosl   /* low mappings cover up to 16MB */
   4.142 +        add     $4,%edi
   4.143 +        add     $(1<<L2_PAGETABLE_SHIFT),%eax
   4.144 +        cmp     $(16<<20)+0xe3,%eax
   4.145 +        jne     1b
   4.146 +#else
   4.147 +        /* Initialize low and high mappings of memory with 4MB pages */
   4.148 +        mov     $SYM_PHYS(idle_pg_table),%edi
   4.149 +        mov     $0xe3,%eax                   /* PRESENT+RW+A+D+4MB */
   4.150 +1:      mov     %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */
   4.151 +        stosl                                /* low mapping */
   4.152 +        add     $(1<<L2_PAGETABLE_SHIFT),%eax
   4.153 +        cmp     $DIRECTMAP_PHYS_END+0xe3,%eax
   4.154 +        jne     1b
   4.155 +1:      stosl   /* low mappings cover up to 16MB */
   4.156 +        add     $(1<<L2_PAGETABLE_SHIFT),%eax
   4.157 +        cmp     $(16<<20)+0xe3,%eax
   4.158 +        jne     1b
   4.159 +#endif
   4.160 +
   4.161 +        /* Copy bootstrap trampoline to low memory, below 1MB. */
   4.162 +        mov     $SYM_PHYS(trampoline_start),%esi
   4.163 +        mov     $SYM_TRAMP_PHYS(trampoline_start),%edi
   4.164 +        mov     $trampoline_end - trampoline_start,%ecx
   4.165 +        rep     movsb
   4.166 +
   4.167 +        /* EBX == 0 indicates we are the BP (Boot Processor). */
   4.168 +        xor     %ebx,%ebx
   4.169 +
   4.170 +        /* Jump into the relocated trampoline. */
   4.171 +        jmp     $TRAMP_CS32,$SYM_TRAMP_PHYS(trampoline_protmode_entry)
   4.172 +
   4.173 +        .globl trampoline_start, trampoline_end
   4.174 +trampoline_start:
   4.175 +#include "trampoline.S"
   4.176 +trampoline_end:
   4.177 +
   4.178 +__high_start:
   4.179 +#ifdef __x86_64__
   4.180 +#include "x86_64.S"
   4.181 +#else
   4.182 +#include "x86_32.S"
   4.183 +#endif
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/xen/arch/x86/boot/trampoline.S	Thu May 10 18:02:55 2007 +0100
     5.3 @@ -0,0 +1,107 @@
     5.4 +        .code16
     5.5 +
     5.6 +        .globl trampoline_realmode_entry
     5.7 +trampoline_realmode_entry:
     5.8 +        nop                               # We use this byte as a progress flag
     5.9 +        movb    $0xA5,trampoline_cpu_started - trampoline_start
    5.10 +        cld
    5.11 +        cli
    5.12 +        lidt    %cs:idt_48 - trampoline_start
    5.13 +        lgdt    %cs:gdt_48 - trampoline_start
    5.14 +        xor     %ax, %ax
    5.15 +        inc     %ax
    5.16 +        lmsw    %ax                       # CR0.PE = 1 (enter protected mode)
    5.17 +        mov     $1,%bl                    # EBX != 0 indicates we are an AP
    5.18 +        jmp     1f
    5.19 +1:      ljmpl   $TRAMP_CS32,$SYM_TRAMP_PHYS(trampoline_protmode_entry)
    5.20 +
    5.21 +idt_48: .word   0, 0, 0 # base = limit = 0
    5.22 +gdt_48: .word   4*8-1
    5.23 +        .long   SYM_TRAMP_PHYS(trampoline_gdt)
    5.24 +trampoline_gdt:
    5.25 +        .quad   0x0000000000000000     /* 0x0000: unused */
    5.26 +        .quad   0x00cf9a000000ffff     /* 0x0008: ring 0 code, 32-bit mode */
    5.27 +        .quad   0x00af9a000000ffff     /* 0x0010: ring 0 code, 64-bit mode */
    5.28 +        .quad   0x00cf92000000ffff     /* 0x0018: ring 0 data */
    5.29 +
    5.30 +cpuid_ext_features:
    5.31 +        .long   0
    5.32 +
    5.33 +        .globl trampoline_xen_phys_start
    5.34 +trampoline_xen_phys_start:
    5.35 +        .long   0
    5.36 +
    5.37 +        .globl trampoline_cpu_started
    5.38 +trampoline_cpu_started:
    5.39 +        .byte   0
    5.40 +
    5.41 +        .code32
    5.42 +trampoline_protmode_entry:
    5.43 +        /* Set up a few descriptors: on entry only CS is guaranteed good. */
    5.44 +        mov     $TRAMP_DS,%eax
    5.45 +        mov     %eax,%ds
    5.46 +        mov     %eax,%es
    5.47 +
    5.48 +        /* Set up FPU. */
    5.49 +        fninit
    5.50 +
    5.51 +        /* Initialise CR4. */
    5.52 +#if CONFIG_PAGING_LEVELS == 2
    5.53 +        mov     $X86_CR4_PSE,%ecx
    5.54 +#else
    5.55 +        mov     $X86_CR4_PAE,%ecx
    5.56 +#endif
    5.57 +        mov     %ecx,%cr4
    5.58 +
    5.59 +        /* Load pagetable base register. */
    5.60 +        mov     $SYM_PHYS(idle_pg_table),%eax
    5.61 +        add     SYM_TRAMP_PHYS(trampoline_xen_phys_start),%eax
    5.62 +        mov     %eax,%cr3
    5.63 +
    5.64 +#if CONFIG_PAGING_LEVELS != 2
    5.65 +        /* Set up EFER (Extended Feature Enable Register). */
    5.66 +        movl    $MSR_EFER,%ecx
    5.67 +        rdmsr
    5.68 +#if CONFIG_PAGING_LEVELS == 4
    5.69 +        btsl    $_EFER_LME,%eax /* Long Mode      */
    5.70 +        btsl    $_EFER_SCE,%eax /* SYSCALL/SYSRET */
    5.71 +#endif
    5.72 +        mov     SYM_TRAMP_PHYS(cpuid_ext_features),%edi
    5.73 +        btl     $20,%edi        /* CPUID 0x80000001, EDX[20] */
    5.74 +        jnc     1f
    5.75 +        btsl    $_EFER_NX,%eax  /* No-Execute     */
    5.76 +1:      wrmsr
    5.77 +#endif
    5.78 +
    5.79 +        mov     $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */
    5.80 +        mov     %eax,%cr0
    5.81 +        jmp     1f
    5.82 +1:
    5.83 +
    5.84 +#if defined(__x86_64__)
    5.85 +
    5.86 +        /* Now in compatibility mode. Long-jump into 64-bit mode. */
    5.87 +        ljmp    $TRAMP_CS64,$SYM_TRAMP_PHYS(start64)
    5.88 +
    5.89 +        .code64
    5.90 +start64:
    5.91 +        /* Jump to high mappings. */
    5.92 +        mov     high_start(%rip),%rax
    5.93 +        jmpq    *%rax
    5.94 +
    5.95 +high_start:
    5.96 +        .quad   __high_start
    5.97 +
    5.98 +#else /* !defined(__x86_64__) */
    5.99 +
   5.100 +        /* Install relocated selectors. */
   5.101 +        lgdt    gdt_descr
   5.102 +        mov     $(__HYPERVISOR_DS),%eax
   5.103 +        mov     %eax,%ds
   5.104 +        mov     %eax,%es
   5.105 +        mov     %eax,%fs
   5.106 +        mov     %eax,%gs
   5.107 +        mov     %eax,%ss
   5.108 +        ljmp    $(__HYPERVISOR_CS),$__high_start
   5.109 +
   5.110 +#endif
     6.1 --- a/xen/arch/x86/boot/x86_32.S	Thu May 10 16:22:27 2007 +0100
     6.2 +++ b/xen/arch/x86/boot/x86_32.S	Thu May 10 18:02:55 2007 +0100
     6.3 @@ -1,178 +1,38 @@
     6.4 -#include <xen/config.h>
     6.5 -#include <xen/multiboot.h>
     6.6 -#include <public/xen.h>
     6.7 -#include <asm/asm_defns.h>
     6.8 -#include <asm/desc.h>
     6.9 -#include <asm/page.h>
    6.10 -#include <asm/msr.h>
    6.11 -
    6.12 -#define  SECONDARY_CPU_FLAG 0xA5A5A5A5
    6.13 -                
    6.14 -        .text
    6.15 -
    6.16 -ENTRY(start)
    6.17 -        jmp __start
    6.18 -
    6.19 -        .align 4
    6.20 -
    6.21 -/*** MULTIBOOT HEADER ****/
    6.22 -#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_HEADER_MODS_ALIGNED | \
    6.23 -                                MULTIBOOT_HEADER_WANT_MEMORY)
    6.24 -        /* Magic number indicating a Multiboot header. */
    6.25 -        .long MULTIBOOT_HEADER_MAGIC
    6.26 -        /* Flags to bootloader (see Multiboot spec). */
    6.27 -        .long MULTIBOOT_HEADER_FLAGS
    6.28 -        /* Checksum: must be the negated sum of the first two fields. */
    6.29 -        .long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
    6.30 +        .code32
    6.31          
    6.32 -not_multiboot_msg:
    6.33 -        .asciz "ERR: Not a Multiboot bootloader!"
    6.34 -not_multiboot:
    6.35 -        mov     $not_multiboot_msg-__PAGE_OFFSET,%esi
    6.36 -        mov     $0xB8000,%edi  # VGA framebuffer
    6.37 -1:      mov     (%esi),%bl
    6.38 -        test    %bl,%bl        # Terminate on '\0' sentinel
    6.39 -2:      je      2b
    6.40 -        mov     $0x3f8+5,%dx   # UART Line Status Register
    6.41 -3:      in      %dx,%al
    6.42 -        test    $0x20,%al      # Test THR Empty flag
    6.43 -        je      3b
    6.44 -        mov     $0x3f8+0,%dx   # UART Transmit Holding Register
    6.45 -        mov     %bl,%al
    6.46 -        out     %al,%dx        # Send a character over the serial line
    6.47 -        movsb                  # Write a character to the VGA framebuffer
    6.48 -        mov     $7,%al
    6.49 -        stosb                  # Write an attribute to the VGA framebuffer
    6.50 -        jmp     1b
    6.51 +        /* Enable full CR4 features. */
    6.52 +        mov     mmu_cr4_features,%eax
    6.53 +        mov     %eax,%cr4
    6.54          
    6.55 -__start:
    6.56 -        /* Set up a few descriptors: on entry only CS is guaranteed good. */
    6.57 -        lgdt    %cs:nopaging_gdt_descr-__PAGE_OFFSET
    6.58 -        mov     $(__HYPERVISOR_DS),%ecx
    6.59 -        mov     %ecx,%ds
    6.60 -        mov     %ecx,%es
    6.61 -        mov     %ecx,%fs
    6.62 -        mov     %ecx,%gs
    6.63 -        ljmp    $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
    6.64 -1:      lss     stack_start-__PAGE_OFFSET,%esp
    6.65 -        add     $(STACK_SIZE-CPUINFO_sizeof-__PAGE_OFFSET),%esp
    6.66 -
    6.67 +        /* Initialise stack. */
    6.68 +        mov     stack_start,%esp
    6.69 +        or      $(STACK_SIZE-CPUINFO_sizeof),%esp
    6.70 +        
    6.71          /* Reset EFLAGS (subsumes CLI and CLD). */
    6.72          pushl   $0
    6.73          popf
    6.74  
    6.75 -        /* Set up FPU. */
    6.76 -        fninit
    6.77 -
    6.78 -        /* Set up CR4, except global flag which Intel requires should be     */
    6.79 -        /* left until after paging is enabled (IA32 Manual Vol. 3, Sec. 2.5) */
    6.80 -        mov     mmu_cr4_features-__PAGE_OFFSET,%ecx
    6.81 -        and     $0x7f,%cl   # CR4.PGE (global enable)
    6.82 -        mov     %ecx,%cr4
    6.83 -
    6.84 -        cmp     $(SECONDARY_CPU_FLAG),%ebx
    6.85 -        je      start_paging
    6.86 -                
    6.87 -        /* Check for Multiboot bootloader */
    6.88 -        cmp     $0x2BADB002,%eax
    6.89 -        jne     not_multiboot
    6.90 -
    6.91 -        /* Initialize BSS (no nasty surprises!) */
    6.92 -        mov     $__bss_start-__PAGE_OFFSET,%edi
    6.93 -        mov     $_end-__PAGE_OFFSET,%ecx
    6.94 -        sub     %edi,%ecx
    6.95 -        xor     %eax,%eax
    6.96 -        rep     stosb
    6.97 -
    6.98 -        /* Save the Multiboot info structure for later use. */
    6.99 -        add     $__PAGE_OFFSET,%ebx
   6.100 -        push    %ebx
   6.101 +        lidt    idt_descr
   6.102  
   6.103 -#ifdef CONFIG_X86_PAE
   6.104 -        /* Initialize low and high mappings of all memory with 2MB pages */
   6.105 -        mov     $idle_pg_table_l2-__PAGE_OFFSET,%edi
   6.106 -        mov     $0xe3,%eax                  /* PRESENT+RW+A+D+2MB */
   6.107 -1:      mov     %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
   6.108 -        stosl                                /* low mapping */
   6.109 -        add     $4,%edi
   6.110 -        add     $(1<<L2_PAGETABLE_SHIFT),%eax
   6.111 -        cmp     $DIRECTMAP_PHYS_END+0xe3,%eax
   6.112 -        jne     1b
   6.113 -1:      stosl   /* low mappings cover as much physmem as possible */
   6.114 -        add     $4,%edi
   6.115 -        add     $(1<<L2_PAGETABLE_SHIFT),%eax
   6.116 -        cmp     $HYPERVISOR_VIRT_START+0xe3,%eax
   6.117 -        jne     1b
   6.118 -#else
   6.119 -        /* Initialize low and high mappings of all memory with 4MB pages */
   6.120 -        mov     $idle_pg_table-__PAGE_OFFSET,%edi
   6.121 -        mov     $0xe3,%eax                  /* PRESENT+RW+A+D+4MB */
   6.122 -1:      mov     %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */
   6.123 -        stosl                                /* low mapping */
   6.124 -        add     $(1<<L2_PAGETABLE_SHIFT),%eax
   6.125 -        cmp     $DIRECTMAP_PHYS_END+0xe3,%eax
   6.126 -        jne     1b
   6.127 -1:      stosl   /* low mappings cover as much physmem as possible */
   6.128 -        add     $(1<<L2_PAGETABLE_SHIFT),%eax
   6.129 -        cmp     $HYPERVISOR_VIRT_START+0xe3,%eax
   6.130 -        jne     1b
   6.131 -#endif
   6.132 -        
   6.133 +        test    %ebx,%ebx
   6.134 +        jnz     start_secondary
   6.135 +
   6.136          /* Initialise IDT with simple error defaults. */
   6.137          lea     ignore_int,%edx
   6.138          mov     $(__HYPERVISOR_CS << 16),%eax
   6.139          mov     %dx,%ax            /* selector = 0x0010 = cs */
   6.140          mov     $0x8E00,%dx        /* interrupt gate - dpl=0, present */
   6.141 -        lea     idt_table-__PAGE_OFFSET,%edi
   6.142 +        lea     idt_table,%edi
   6.143          mov     $256,%ecx
   6.144  1:      mov     %eax,(%edi)
   6.145          mov     %edx,4(%edi)
   6.146          add     $8,%edi
   6.147          loop    1b
   6.148 -
   6.149 -start_paging:
   6.150 -#ifdef CONFIG_X86_PAE
   6.151 -        /* Enable Execute-Disable (NX/XD) support if it is available. */
   6.152 -        push    %ebx
   6.153 -        mov     $0x80000000,%eax
   6.154 -        cpuid
   6.155 -        cmp     $0x80000000,%eax    /* Any function > 0x80000000? */
   6.156 -        jbe     no_execute_disable
   6.157 -        mov     $0x80000001,%eax
   6.158 -        cpuid
   6.159 -        bt      $20,%edx            /* Execute Disable? */
   6.160 -        jnc     no_execute_disable
   6.161 -        movl    $MSR_EFER,%ecx
   6.162 -        rdmsr
   6.163 -        bts     $_EFER_NX,%eax
   6.164 -        wrmsr
   6.165 -no_execute_disable:
   6.166 -        pop     %ebx
   6.167 -#endif
   6.168 -        mov     $idle_pg_table-__PAGE_OFFSET,%eax
   6.169 -        mov     %eax,%cr3
   6.170 -        mov     $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */
   6.171 -        mov     %eax,%cr0
   6.172 -        jmp     1f
   6.173 -1:      /* Install relocated selectors (FS/GS unused). */
   6.174 -        lgdt    gdt_descr
   6.175 -        mov     $(__HYPERVISOR_DS),%ecx
   6.176 -        mov     %ecx,%ds
   6.177 -        mov     %ecx,%es
   6.178 -        mov     %ecx,%ss
   6.179 -        ljmp    $(__HYPERVISOR_CS),$1f
   6.180 -1:      /* Paging enabled, so we can now enable GLOBAL mappings in CR4. */
   6.181 -        mov     mmu_cr4_features,%ecx
   6.182 -        mov     %ecx,%cr4
   6.183 -        /* Relocate ESP */
   6.184 -        add     $__PAGE_OFFSET,%esp
   6.185 -
   6.186 -        lidt    idt_descr
   6.187                  
   6.188 -        cmp     $(SECONDARY_CPU_FLAG),%ebx
   6.189 -        je      start_secondary
   6.190 -
   6.191 -        /* Call into main C routine. This should never return.*/
   6.192 +        /* Pass off the Multiboot info structure to C land. */
   6.193 +        mov     multiboot_ptr,%eax
   6.194 +        add     $__PAGE_OFFSET,%eax
   6.195 +        push    %eax
   6.196          call    __start_xen
   6.197          ud2     /* Force a panic (invalid opcode). */
   6.198  
   6.199 @@ -189,15 +49,14 @@ ignore_int:
   6.200          call    printk
   6.201  1:      jmp     1b
   6.202  
   6.203 -/*** STACK LOCATION ***/
   6.204 -        
   6.205  ENTRY(stack_start)
   6.206          .long cpu0_stack
   6.207 -        .long __HYPERVISOR_DS
   6.208          
   6.209  /*** DESCRIPTOR TABLES ***/
   6.210  
   6.211          ALIGN
   6.212 +multiboot_ptr:
   6.213 +        .long   0
   6.214          
   6.215          .word   0    
   6.216  idt_descr:
   6.217 @@ -212,8 +71,8 @@ gdt_descr:
   6.218          .word   0
   6.219  nopaging_gdt_descr:
   6.220          .word   LAST_RESERVED_GDT_BYTE
   6.221 -        .long   gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
   6.222 -        
   6.223 +        .long   SYM_PHYS(gdt_table) - FIRST_RESERVED_GDT_BYTE
   6.224 +
   6.225          .align PAGE_SIZE, 0
   6.226  /* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */
   6.227  /*     the machine->physical mapping table. Ring 0 can access all memory.    */
   6.228 @@ -231,25 +90,11 @@ ENTRY(gdt_table)
   6.229          .quad 0x0000000000000000     /* unused                           */
   6.230          .fill 2*NR_CPUS,8,0          /* space for TSS and LDT per CPU    */
   6.231  
   6.232 -        .align PAGE_SIZE, 0
   6.233 -
   6.234  #ifdef CONFIG_X86_PAE
   6.235 +        .align 32
   6.236  ENTRY(idle_pg_table)
   6.237 -ENTRY(idle_pg_table_l3)
   6.238 -        .long idle_pg_table_l2 + 0*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
   6.239 -        .long idle_pg_table_l2 + 1*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
   6.240 -        .long idle_pg_table_l2 + 2*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
   6.241 -        .long idle_pg_table_l2 + 3*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
   6.242 -.section ".bss.page_aligned","w"
   6.243 -ENTRY(idle_pg_table_l2)
   6.244 -        .fill 4*PAGE_SIZE,1,0
   6.245 -#else
   6.246 -.section ".bss.page_aligned","w"
   6.247 -ENTRY(idle_pg_table)
   6.248 -ENTRY(idle_pg_table_l2)
   6.249 -        .fill 1*PAGE_SIZE,1,0
   6.250 +        .long SYM_PHYS(idle_pg_table_l2) + 0*PAGE_SIZE + 0x01, 0
   6.251 +        .long SYM_PHYS(idle_pg_table_l2) + 1*PAGE_SIZE + 0x01, 0
   6.252 +        .long SYM_PHYS(idle_pg_table_l2) + 2*PAGE_SIZE + 0x01, 0
   6.253 +        .long SYM_PHYS(idle_pg_table_l2) + 3*PAGE_SIZE + 0x01, 0
   6.254  #endif
   6.255 -
   6.256 -.section ".bss.stack_aligned","w"
   6.257 -ENTRY(cpu0_stack)
   6.258 -        .fill STACK_SIZE,1,0
     7.1 --- a/xen/arch/x86/boot/x86_64.S	Thu May 10 16:22:27 2007 +0100
     7.2 +++ b/xen/arch/x86/boot/x86_64.S	Thu May 10 18:02:55 2007 +0100
     7.3 @@ -1,122 +1,13 @@
     7.4 -#include <xen/config.h>
     7.5 -#include <xen/multiboot.h>
     7.6 -#include <public/xen.h>
     7.7 -#include <asm/asm_defns.h>
     7.8 -#include <asm/desc.h>
     7.9 -#include <asm/page.h>
    7.10 -#include <asm/msr.h>
    7.11 -
    7.12 -#define  SECONDARY_CPU_FLAG 0xA5A5A5A5
    7.13 -                
    7.14 -        .text
    7.15 -        .code32
    7.16 -
    7.17 -#define SYM_PHYS(sym) (sym - __PAGE_OFFSET)
    7.18 -
    7.19 -ENTRY(start)
    7.20 -        jmp __start
    7.21 -
    7.22 -        .org    0x004
    7.23 -/*** MULTIBOOT HEADER ****/
    7.24 -#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_HEADER_MODS_ALIGNED | \
    7.25 -                                MULTIBOOT_HEADER_WANT_MEMORY)
    7.26 -        /* Magic number indicating a Multiboot header. */
    7.27 -        .long   MULTIBOOT_HEADER_MAGIC
    7.28 -        /* Flags to bootloader (see Multiboot spec). */
    7.29 -        .long   MULTIBOOT_HEADER_FLAGS
    7.30 -        /* Checksum: must be the negated sum of the first two fields. */
    7.31 -        .long   -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
    7.32 +        .code64
    7.33  
    7.34 -.Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
    7.35 -.Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
    7.36 -
    7.37 -bad_cpu:
    7.38 -        mov     $(SYM_PHYS(.Lbad_cpu_msg)),%esi # Error message
    7.39 -        jmp     print_err
    7.40 -not_multiboot:
    7.41 -        mov     $(SYM_PHYS(.Lbad_ldr_msg)),%esi # Error message
    7.42 -print_err:
    7.43 -        mov     $0xB8000,%edi  # VGA framebuffer
    7.44 -1:      mov     (%esi),%bl
    7.45 -        test    %bl,%bl        # Terminate on '\0' sentinel
    7.46 -2:      je      2b
    7.47 -        mov     $0x3f8+5,%dx   # UART Line Status Register
    7.48 -3:      in      %dx,%al
    7.49 -        test    $0x20,%al      # Test THR Empty flag
    7.50 -        je      3b
    7.51 -        mov     $0x3f8+0,%dx   # UART Transmit Holding Register
    7.52 -        mov     %bl,%al
    7.53 -        out     %al,%dx        # Send a character over the serial line
    7.54 -        movsb                  # Write a character to the VGA framebuffer
    7.55 -        mov     $7,%al
    7.56 -        stosb                  # Write an attribute to the VGA framebuffer
    7.57 -        jmp     1b
    7.58 -
    7.59 -__start:
    7.60 -        cld
    7.61 -        cli
    7.62 -
    7.63 -        /* Set up a few descriptors: on entry only CS is guaranteed good. */
    7.64 -        lgdt    %cs:SYM_PHYS(nopaging_gdt_descr)
    7.65 -        mov     $(__HYPERVISOR_DS32),%ecx
    7.66 +        /* Install relocated data selectors. */
    7.67 +        lgdt    gdt_descr(%rip)
    7.68 +        mov     $(__HYPERVISOR_DS64),%ecx
    7.69          mov     %ecx,%ds
    7.70          mov     %ecx,%es
    7.71 -
    7.72 -        cmp     $(SECONDARY_CPU_FLAG),%ebx
    7.73 -        je      skip_boot_checks
    7.74 -
    7.75 -        /* Check for Multiboot bootloader */
    7.76 -        cmp     $0x2BADB002,%eax
    7.77 -        jne     not_multiboot
    7.78 -
    7.79 -        /* Save the Multiboot info structure for later use. */
    7.80 -        mov     %ebx,SYM_PHYS(multiboot_ptr)
    7.81 -
    7.82 -        /* We begin by interrogating the CPU for the presence of long mode. */
    7.83 -        mov     $0x80000000,%eax
    7.84 -        cpuid
    7.85 -        cmp     $0x80000000,%eax    # any function > 0x80000000?
    7.86 -        jbe     bad_cpu
    7.87 -        mov     $0x80000001,%eax
    7.88 -        cpuid
    7.89 -        bt      $29,%edx            # Long mode feature?
    7.90 -        jnc     bad_cpu
    7.91 -        mov     %edx,SYM_PHYS(cpuid_ext_features)
    7.92 -skip_boot_checks:
    7.93 -
    7.94 -        /* Set up FPU. */
    7.95 -        fninit
    7.96 -        
    7.97 -        /* Enable PAE in CR4. */
    7.98 -        mov     $0x20,%ecx # X86_CR4_PAE
    7.99 -        mov     %ecx,%cr4
   7.100 -
   7.101 -        /* Load pagetable base register. */
   7.102 -        mov     $SYM_PHYS(idle_pg_table),%eax
   7.103 -        mov     %eax,%cr3
   7.104 -
   7.105 -        /* Set up EFER (Extended Feature Enable Register). */
   7.106 -        movl    $MSR_EFER,%ecx
   7.107 -        rdmsr
   7.108 -        btsl    $_EFER_LME,%eax /* Long Mode      */
   7.109 -        btsl    $_EFER_SCE,%eax /* SYSCALL/SYSRET */
   7.110 -        mov     SYM_PHYS(cpuid_ext_features),%edi
   7.111 -        btl     $20,%edi        /* CPUID 0x80000001, EDX[20] */
   7.112 -        jnc     1f
   7.113 -        btsl    $_EFER_NX,%eax  /* No-Execute     */
   7.114 -1:      wrmsr
   7.115 -
   7.116 -        mov     $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */
   7.117 -        mov     %eax,%cr0
   7.118 -        jmp     1f
   7.119 -
   7.120 -1:      /* Now in compatibility mode. Long-jump into 64-bit mode. */
   7.121 -        ljmp    $(__HYPERVISOR_CS64),$SYM_PHYS(start64)
   7.122 -        
   7.123 -        .code64
   7.124 -start64:
   7.125 -        /* Install relocated selectors (FS/GS unused). */
   7.126 -        lgdt    gdt_descr(%rip)
   7.127 +        mov     %ecx,%fs
   7.128 +        mov     %ecx,%gs
   7.129 +        mov     %ecx,%ss
   7.130  
   7.131          /* Enable full CR4 features. */
   7.132          mov     mmu_cr4_features(%rip),%rcx
   7.133 @@ -129,30 +20,15 @@ start64:
   7.134          pushq   $0
   7.135          popf
   7.136  
   7.137 -        /* Jump to high mappings. */
   7.138 -        mov     high_start(%rip),%rax
   7.139 -        push    %rax
   7.140 -        ret
   7.141 -__high_start:
   7.142 -        
   7.143 -        mov     $(__HYPERVISOR_DS64),%ecx
   7.144 -        mov     %ecx,%ds
   7.145 -        mov     %ecx,%es
   7.146 -        mov     %ecx,%fs
   7.147 -        mov     %ecx,%gs
   7.148 -        mov     %ecx,%ss
   7.149 +        /* Reload code selector. */
   7.150 +        pushq   $(__HYPERVISOR_CS64)
   7.151 +        leaq    1f(%rip),%rax
   7.152 +        pushq   %rax
   7.153 +        lretq
   7.154 +1:      lidt    idt_descr(%rip)
   7.155  
   7.156 -        lidt    idt_descr(%rip)
   7.157 -
   7.158 -        cmp     $(SECONDARY_CPU_FLAG),%ebx
   7.159 -        je      start_secondary
   7.160 -
   7.161 -        /* Initialize BSS (no nasty surprises!) */
   7.162 -        lea     __bss_start(%rip),%rdi
   7.163 -        lea     _end(%rip),%rcx
   7.164 -        sub     %rdi,%rcx
   7.165 -        xor     %rax,%rax
   7.166 -        rep     stosb
   7.167 +        test    %ebx,%ebx
   7.168 +        jnz     start_secondary
   7.169  
   7.170          /* Initialise IDT with simple error defaults. */
   7.171          leaq    ignore_int(%rip),%rcx
   7.172 @@ -198,14 +74,6 @@ multiboot_ptr:
   7.173          .long   0
   7.174  
   7.175          .word   0
   7.176 -nopaging_gdt_descr:
   7.177 -        .word   LAST_RESERVED_GDT_BYTE
   7.178 -        .quad   gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
   7.179 -
   7.180 -cpuid_ext_features:
   7.181 -        .long   0
   7.182 -        
   7.183 -        .word   0
   7.184  gdt_descr:
   7.185          .word   LAST_RESERVED_GDT_BYTE
   7.186          .quad   gdt_table - FIRST_RESERVED_GDT_BYTE
   7.187 @@ -218,9 +86,6 @@ idt_descr:
   7.188  ENTRY(stack_start)
   7.189          .quad   cpu0_stack
   7.190  
   7.191 -high_start:
   7.192 -        .quad   __high_start
   7.193 -
   7.194          .align PAGE_SIZE, 0
   7.195  ENTRY(gdt_table)
   7.196          .quad 0x0000000000000000     /* unused */
   7.197 @@ -234,7 +99,6 @@ ENTRY(gdt_table)
   7.198          .org gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
   7.199          .fill 4*NR_CPUS,8,0          /* space for TSS and LDT per CPU     */
   7.200  
   7.201 -#ifdef CONFIG_COMPAT
   7.202          .align PAGE_SIZE, 0
   7.203  /* NB. Even rings != 0 get access to the full 4Gb, as only the            */
   7.204  /*     (compatibility) machine->physical mapping table lives there.       */
   7.205 @@ -249,37 +113,3 @@ ENTRY(compat_gdt_table)
   7.206          .quad 0x00cf9a000000ffff     /* 0xe038 ring 0 code, compatibility */
   7.207          .org compat_gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
   7.208          .fill 4*NR_CPUS,8,0          /* space for TSS and LDT per CPU     */
   7.209 -# undef LIMIT
   7.210 -#endif
   7.211 -
   7.212 -/* Initial PML4 -- level-4 page table. */
   7.213 -        .align PAGE_SIZE, 0
   7.214 -ENTRY(idle_pg_table)
   7.215 -ENTRY(idle_pg_table_4)
   7.216 -        .quad idle_pg_table_l3 - __PAGE_OFFSET + 7 # PML4[0]
   7.217 -        .fill 261,8,0
   7.218 -        .quad idle_pg_table_l3 - __PAGE_OFFSET + 7 # PML4[262]
   7.219 -
   7.220 -/* Initial PDP -- level-3 page table. */
   7.221 -        .align PAGE_SIZE, 0
   7.222 -ENTRY(idle_pg_table_l3)
   7.223 -        .quad idle_pg_table_l2 - __PAGE_OFFSET + 7
   7.224 -
   7.225 -/* Initial PDE -- level-2 page table. Maps first 1GB physical memory. */
   7.226 -        .align PAGE_SIZE, 0
   7.227 -ENTRY(idle_pg_table_l2)
   7.228 -        .macro identmap from=0, count=512
   7.229 -        .if \count-1
   7.230 -        identmap "(\from+0)","(\count/2)"
   7.231 -        identmap "(\from+(0x200000*(\count/2)))","(\count/2)"
   7.232 -        .else
   7.233 -        .quad 0x00000000000001e3 + \from
   7.234 -        .endif
   7.235 -        .endm
   7.236 -        identmap
   7.237 -
   7.238 -        .align PAGE_SIZE, 0
   7.239 -
   7.240 -.section ".bss.stack_aligned","w"
   7.241 -ENTRY(cpu0_stack)
   7.242 -        .fill STACK_SIZE,1,0
     8.1 --- a/xen/arch/x86/e820.c	Thu May 10 16:22:27 2007 +0100
     8.2 +++ b/xen/arch/x86/e820.c	Thu May 10 18:02:55 2007 +0100
     8.3 @@ -32,7 +32,7 @@ static void __init add_memory_region(uns
     8.4      }
     8.5  } /* add_memory_region */
     8.6  
     8.7 -static void __init print_e820_memory_map(struct e820entry *map, int entries)
     8.8 +/*static*/ void __init print_e820_memory_map(struct e820entry *map, int entries)
     8.9  {
    8.10      int i;
    8.11  
     9.1 --- a/xen/arch/x86/mm.c	Thu May 10 16:22:27 2007 +0100
     9.2 +++ b/xen/arch/x86/mm.c	Thu May 10 18:02:55 2007 +0100
     9.3 @@ -3037,7 +3037,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
     9.4          prev_mfn = gmfn_to_mfn(d, xatp.gpfn);
     9.5          if ( mfn_valid(prev_mfn) )
     9.6          {
     9.7 -            if ( IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn)) )
     9.8 +            if ( is_xen_heap_frame(mfn_to_page(prev_mfn)) )
     9.9                  /* Xen heap frames are simply unhooked from this phys slot. */
    9.10                  guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
    9.11              else
    9.12 @@ -3487,8 +3487,17 @@ void __set_fixmap(
    9.13  void memguard_init(void)
    9.14  {
    9.15      map_pages_to_xen(
    9.16 -        PAGE_OFFSET, 0, xenheap_phys_end >> PAGE_SHIFT,
    9.17 +        (unsigned long)__va(xen_phys_start),
    9.18 +        xen_phys_start >> PAGE_SHIFT,
    9.19 +        (xenheap_phys_end - xen_phys_start) >> PAGE_SHIFT,
    9.20          __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
    9.21 +#ifdef __x86_64__
    9.22 +    map_pages_to_xen(
    9.23 +        XEN_VIRT_START,
    9.24 +        xen_phys_start >> PAGE_SHIFT,
    9.25 +        (__pa(&_end) + PAGE_SIZE - 1 - xen_phys_start) >> PAGE_SHIFT,
    9.26 +        __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
    9.27 +#endif
    9.28  }
    9.29  
    9.30  static void __memguard_change_range(void *p, unsigned long l, int guard)
    10.1 --- a/xen/arch/x86/setup.c	Thu May 10 16:22:27 2007 +0100
    10.2 +++ b/xen/arch/x86/setup.c	Thu May 10 18:02:55 2007 +0100
    10.3 @@ -34,6 +34,14 @@
    10.4  #include <acm/acm_hooks.h>
    10.5  #include <xen/kexec.h>
    10.6  
    10.7 +#if defined(CONFIG_X86_64)
    10.8 +#define BOOTSTRAP_DIRECTMAP_END (1UL << 32)
    10.9 +#define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
   10.10 +#else
   10.11 +#define BOOTSTRAP_DIRECTMAP_END HYPERVISOR_VIRT_START
   10.12 +#define maddr_to_bootstrap_virt(m) ((void *)(long)(m))
   10.13 +#endif
   10.14 +
   10.15  extern void dmi_scan_machine(void);
   10.16  extern void generic_apic_probe(void);
   10.17  extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
   10.18 @@ -82,6 +90,8 @@ int early_boot = 1;
   10.19  
   10.20  cpumask_t cpu_present_map;
   10.21  
   10.22 +unsigned long xen_phys_start;
   10.23 +
   10.24  /* Limits of Xen heap, used to initialise the allocator. */
   10.25  unsigned long xenheap_phys_start, xenheap_phys_end;
   10.26  
   10.27 @@ -93,7 +103,7 @@ extern void early_cpu_init(void);
   10.28  
   10.29  struct tss_struct init_tss[NR_CPUS];
   10.30  
   10.31 -extern unsigned long cpu0_stack[];
   10.32 +char __attribute__ ((__section__(".bss.page_aligned"))) cpu0_stack[STACK_SIZE];
   10.33  
   10.34  struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
   10.35  
   10.36 @@ -108,7 +118,7 @@ int acpi_disabled;
   10.37  
   10.38  int acpi_force;
   10.39  char acpi_param[10] = "";
   10.40 -static void parse_acpi_param(char *s)
   10.41 +static void __init parse_acpi_param(char *s)
   10.42  {
   10.43      /* Save the parameter so it can be propagated to domain0. */
   10.44      safe_strcpy(acpi_param, s);
   10.45 @@ -147,20 +157,23 @@ static void __init do_initcalls(void)
   10.46          (*call)();
   10.47  }
   10.48  
   10.49 -#define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
   10.50 -
   10.51 -static struct e820entry e820_raw[E820MAX];
   10.52 +#define EARLY_FAIL(f, a...) do {                \
   10.53 +    printk( f , ## a );                         \
   10.54 +    for ( ; ; ) __asm__ __volatile__ ( "hlt" ); \
   10.55 +} while (0)
   10.56  
   10.57 -static unsigned long initial_images_start, initial_images_end;
   10.58 +static struct e820entry __initdata e820_raw[E820MAX];
   10.59  
   10.60 -unsigned long initial_images_nrpages(void)
   10.61 +static unsigned long __initdata initial_images_start, initial_images_end;
   10.62 +
   10.63 +unsigned long __init initial_images_nrpages(void)
   10.64  {
   10.65      unsigned long s = initial_images_start + PAGE_SIZE - 1;
   10.66      unsigned long e = initial_images_end;
   10.67      return ((e >> PAGE_SHIFT) - (s >> PAGE_SHIFT));
   10.68  }
   10.69  
   10.70 -void discard_initial_images(void)
   10.71 +void __init discard_initial_images(void)
   10.72  {
   10.73      init_domheap_pages(initial_images_start, initial_images_end);
   10.74  }
   10.75 @@ -170,33 +183,15 @@ extern char __per_cpu_start[], __per_cpu
   10.76  static void __init percpu_init_areas(void)
   10.77  {
   10.78      unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start;
   10.79 +    unsigned int first_unused;
   10.80  
   10.81      BUG_ON(data_size > PERCPU_SIZE);
   10.82  
   10.83 -    for_each_cpu ( i )
   10.84 -    {
   10.85 -        memguard_unguard_range(__per_cpu_start + (i << PERCPU_SHIFT),
   10.86 -                               1 << PERCPU_SHIFT);
   10.87 -        if ( i != 0 )
   10.88 -            memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
   10.89 -                   __per_cpu_start,
   10.90 -                   data_size);
   10.91 -    }
   10.92 -}
   10.93 -
   10.94 -static void __init percpu_guard_areas(void)
   10.95 -{
   10.96 -    memguard_guard_range(__per_cpu_start, __per_cpu_end - __per_cpu_start);
   10.97 -}
   10.98 -
   10.99 -static void __init percpu_free_unused_areas(void)
  10.100 -{
  10.101 -    unsigned int i, first_unused;
  10.102 -
  10.103 -    /* Find first unused CPU number. */
  10.104 -    for ( i = 0; i < NR_CPUS; i++ )
  10.105 -        if ( !cpu_possible(i) )
  10.106 -            break;
  10.107 +    /* Initialise per-cpu data area for all possible secondary CPUs. */
  10.108 +    for ( i = 1; (i < NR_CPUS) && cpu_possible(i); i++ )
  10.109 +        memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
  10.110 +               __per_cpu_start,
  10.111 +               data_size);
  10.112      first_unused = i;
  10.113  
  10.114      /* Check that there are no holes in cpu_possible_map. */
  10.115 @@ -210,7 +205,7 @@ static void __init percpu_free_unused_ar
  10.116  }
  10.117  
  10.118  /* Fetch acm policy module from multiboot modules. */
  10.119 -static void extract_acm_policy(
  10.120 +static void __init extract_acm_policy(
  10.121      multiboot_info_t *mbi,
  10.122      unsigned int *initrdidx,
  10.123      char **_policy_start,
  10.124 @@ -228,11 +223,7 @@ static void extract_acm_policy(
  10.125      for ( i = mbi->mods_count-1; i >= 1; i-- )
  10.126      {
  10.127          start = initial_images_start + (mod[i].mod_start-mod[0].mod_start);
  10.128 -#if defined(__i386__)
  10.129 -        policy_start = (char *)start;
  10.130 -#elif defined(__x86_64__)
  10.131 -        policy_start = __va(start);
  10.132 -#endif
  10.133 +        policy_start = maddr_to_bootstrap_virt(start);
  10.134          policy_len   = mod[i].mod_end - mod[i].mod_start;
  10.135          if ( acm_is_policy(policy_start, policy_len) )
  10.136          {
  10.137 @@ -264,7 +255,7 @@ static void __init init_idle_domain(void
  10.138      setup_idle_pagetable();
  10.139  }
  10.140  
  10.141 -static void srat_detect_node(int cpu)
  10.142 +static void __init srat_detect_node(int cpu)
  10.143  {
  10.144      unsigned node;
  10.145      u8 apicid = x86_cpu_to_apicid[cpu];
  10.146 @@ -278,18 +269,45 @@ static void srat_detect_node(int cpu)
  10.147          printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
  10.148  }
  10.149  
  10.150 -void __init move_memory(unsigned long dst,
  10.151 -                          unsigned long src_start, unsigned long src_end)
  10.152 +static void __init move_memory(
  10.153 +    unsigned long dst, unsigned long src_start, unsigned long src_end)
  10.154 +{
  10.155 +    memmove(maddr_to_bootstrap_virt(dst),
  10.156 +            maddr_to_bootstrap_virt(src_start),
  10.157 +            src_end - src_start);
  10.158 +}
  10.159 +
  10.160 +/* A temporary copy of the e820 map that we can mess with during bootstrap. */
  10.161 +static struct e820map __initdata boot_e820;
  10.162 +
  10.163 +/* Reserve area (@s,@e) in the temporary bootstrap e820 map. */
  10.164 +static void __init reserve_in_boot_e820(unsigned long s, unsigned long e)
  10.165  {
  10.166 -#if defined(CONFIG_X86_32)
  10.167 -    memmove((void *)dst,            /* use low mapping */
  10.168 -            (void *)src_start,      /* use low mapping */
  10.169 -            src_end - src_start);
  10.170 -#elif defined(CONFIG_X86_64)
  10.171 -    memmove(__va(dst),
  10.172 -            __va(src_start),
  10.173 -            src_end - src_start);
  10.174 -#endif
  10.175 +    unsigned long rs, re;
  10.176 +    int i;
  10.177 +
  10.178 +    for ( i = 0; i < boot_e820.nr_map; i++ )
  10.179 +    {
  10.180 +        /* Have we found the e820 region that includes the specified range? */
  10.181 +        rs = boot_e820.map[i].addr;
  10.182 +        re = boot_e820.map[i].addr + boot_e820.map[i].size;
  10.183 +        if ( (s < rs) || (e > re) )
  10.184 +            continue;
  10.185 +
  10.186 +        /* Start fragment. */
  10.187 +        boot_e820.map[i].size = s - rs;
  10.188 +
  10.189 +        /* End fragment. */
  10.190 +        if ( e < re )
  10.191 +        {
  10.192 +            memmove(&boot_e820.map[i+1], &boot_e820.map[i],
  10.193 +                    (boot_e820.nr_map-i) * sizeof(boot_e820.map[0]));
  10.194 +            boot_e820.nr_map++;
  10.195 +            i++;
  10.196 +            boot_e820.map[i].addr = e;
  10.197 +            boot_e820.map[i].size = re - e;
  10.198 +        }
  10.199 +    }
  10.200  }
  10.201  
  10.202  void __init __start_xen(multiboot_info_t *mbi)
  10.203 @@ -301,7 +319,6 @@ void __init __start_xen(multiboot_info_t
  10.204      unsigned long _policy_len = 0;
  10.205      module_t *mod = (module_t *)__va(mbi->mods_addr);
  10.206      unsigned long nr_pages, modules_length;
  10.207 -    paddr_t s, e;
  10.208      int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0;
  10.209      struct ns16550_defaults ns16550 = {
  10.210          .data_bits = 8,
  10.211 @@ -338,17 +355,11 @@ void __init __start_xen(multiboot_info_t
  10.212  
  10.213      /* Check that we have at least one Multiboot module. */
  10.214      if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
  10.215 -    {
  10.216 -        printk("FATAL ERROR: dom0 kernel not specified."
  10.217 -               " Check bootloader configuration.\n");
  10.218 -        EARLY_FAIL();
  10.219 -    }
  10.220 +        EARLY_FAIL("dom0 kernel not specified. "
  10.221 +                   "Check bootloader configuration.\n");
  10.222  
  10.223      if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
  10.224 -    {
  10.225 -        printk("FATAL ERROR: Misaligned CPU0 stack.\n");
  10.226 -        EARLY_FAIL();
  10.227 -    }
  10.228 +        EARLY_FAIL("Misaligned CPU0 stack.\n");
  10.229  
  10.230      /*
  10.231       * Since there are some stubs getting built on the stacks which use
  10.232 @@ -357,7 +368,6 @@ void __init __start_xen(multiboot_info_t
  10.233       */
  10.234      if ( opt_xenheap_megabytes > 2048 )
  10.235          opt_xenheap_megabytes = 2048;
  10.236 -    xenheap_phys_end = opt_xenheap_megabytes << 20;
  10.237  
  10.238      if ( mbi->flags & MBI_MEMMAP )
  10.239      {
  10.240 @@ -403,8 +413,7 @@ void __init __start_xen(multiboot_info_t
  10.241      }
  10.242      else
  10.243      {
  10.244 -        printk("FATAL ERROR: Bootloader provided no memory information.\n");
  10.245 -        for ( ; ; ) ;
  10.246 +        EARLY_FAIL("Bootloader provided no memory information.\n");
  10.247      }
  10.248  
  10.249      if ( e820_warn )
  10.250 @@ -430,80 +439,190 @@ void __init __start_xen(multiboot_info_t
  10.251      /* Sanitise the raw E820 map to produce a final clean version. */
  10.252      max_page = init_e820(e820_raw, &e820_raw_nr);
  10.253  
  10.254 -    modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
  10.255 -
  10.256 -    /* Find a large enough RAM extent to stash the DOM0 modules. */
  10.257 -    for ( i = 0; ; i++ )
  10.258 +    /*
  10.259 +     * Create a temporary copy of the E820 map. Truncate it to above 16MB
  10.260 +     * as anything below that is already mapped and has a statically-allocated
  10.261 +     * purpose.
  10.262 +     */
  10.263 +    memcpy(&boot_e820, &e820, sizeof(e820));
  10.264 +    for ( i = 0; i < boot_e820.nr_map; i++ )
  10.265      {
  10.266 -        if ( i == e820.nr_map )
  10.267 +        uint64_t s, e, min = 16 << 20; /* 16MB */
  10.268 +        s = boot_e820.map[i].addr;
  10.269 +        e = boot_e820.map[i].addr + boot_e820.map[i].size;
  10.270 +        if ( s >= min )
  10.271 +            continue;
  10.272 +        if ( e > min )
  10.273          {
  10.274 -            printk("Not enough memory to stash the DOM0 kernel image.\n");
  10.275 -            for ( ; ; ) ;
  10.276 +            boot_e820.map[i].addr = min;
  10.277 +            boot_e820.map[i].size = e - min;
  10.278          }
  10.279 -
  10.280 -        if ( (e820.map[i].type == E820_RAM) &&
  10.281 -             (e820.map[i].size >= modules_length) &&
  10.282 -             ((e820.map[i].addr + e820.map[i].size) >=
  10.283 -              (xenheap_phys_end + modules_length)) )
  10.284 -            break;
  10.285 +        else
  10.286 +            boot_e820.map[i].type = E820_RESERVED;
  10.287      }
  10.288  
  10.289 -    /* Stash as near as possible to the beginning of the RAM extent. */
  10.290 -    initial_images_start = e820.map[i].addr;
  10.291 -    if ( initial_images_start < xenheap_phys_end )
  10.292 -        initial_images_start = xenheap_phys_end;
  10.293 -    initial_images_end = initial_images_start + modules_length;
  10.294 +    /*
  10.295 +     * Iterate over all superpage-aligned RAM regions.
  10.296 +     * 
  10.297 +     * We require superpage alignment because the boot allocator is not yet
  10.298 +     * initialised. Hence we can only map superpages in the address range
  10.299 +     * 0 to BOOTSTRAP_DIRECTMAP_END, as this is guaranteed not to require
  10.300 +     * dynamic allocation of pagetables.
  10.301 +     * 
  10.302 +     * As well as mapping superpages in that range, in preparation for
  10.303 +     * initialising the boot allocator, we also look for a region to which
  10.304 +     * we can relocate the dom0 kernel and other multiboot modules. Also, on
  10.305 +     * x86/64, we relocate Xen to higher memory.
  10.306 +     */
  10.307 +    modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
  10.308 +    for ( i = 0; i < boot_e820.nr_map; i++ )
  10.309 +    {
  10.310 +        uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
  10.311  
  10.312 -    move_memory(initial_images_start, 
  10.313 -                mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
  10.314 -
  10.315 -    /* Initialise boot-time allocator with all RAM situated after modules. */
  10.316 -    xenheap_phys_start = init_boot_allocator(__pa(&_end));
  10.317 -    nr_pages = 0;
  10.318 -    for ( i = 0; i < e820.nr_map; i++ )
  10.319 -    {
  10.320 -        if ( e820.map[i].type != E820_RAM )
  10.321 +        /* Superpage-aligned chunks up to BOOTSTRAP_DIRECTMAP_END, please. */
  10.322 +        s = (boot_e820.map[i].addr + mask) & ~mask;
  10.323 +        e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
  10.324 +        e = min_t(uint64_t, e, BOOTSTRAP_DIRECTMAP_END);
  10.325 +        if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
  10.326              continue;
  10.327  
  10.328 -        nr_pages += e820.map[i].size >> PAGE_SHIFT;
  10.329 +        /* Map the chunk. No memory will need to be allocated to do this. */
  10.330 +        map_pages_to_xen(
  10.331 +            (unsigned long)maddr_to_bootstrap_virt(s),
  10.332 +            s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
  10.333  
  10.334 -        /* Initialise boot heap, skipping Xen heap and dom0 modules. */
  10.335 -        s = e820.map[i].addr;
  10.336 -        e = s + e820.map[i].size;
  10.337 -        if ( s < xenheap_phys_end )
  10.338 -            s = xenheap_phys_end;
  10.339 -        if ( (s < initial_images_end) && (e > initial_images_start) )
  10.340 -            s = initial_images_end;
  10.341 -        init_boot_pages(s, e);
  10.342 +        /* Is the region suitable for relocating the multiboot modules? */
  10.343 +        if ( !initial_images_start && ((e-s) >= modules_length) )
  10.344 +        {
  10.345 +            e -= modules_length;
  10.346 +            e &= ~mask;
  10.347 +            initial_images_start = e;
  10.348 +            initial_images_end = initial_images_start + modules_length;
  10.349 +            move_memory(initial_images_start, 
  10.350 +                        mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
  10.351 +            if ( s >= e )
  10.352 +                continue;
  10.353 +        }
  10.354  
  10.355  #if defined(CONFIG_X86_64)
  10.356 -        /*
  10.357 -         * x86/64 maps all registered RAM. Points to note:
  10.358 -         *  1. The initial pagetable already maps low 1GB, so skip that.
  10.359 -         *  2. We must map *only* RAM areas, taking care to avoid I/O holes.
  10.360 -         *     Failure to do this can cause coherency problems and deadlocks
  10.361 -         *     due to cache-attribute mismatches (e.g., AMD/AGP Linux bug).
  10.362 -         */
  10.363 +        /* Is the region suitable for relocating Xen? */
  10.364 +        if ( !xen_phys_start && (((e-s) >> 20) >= opt_xenheap_megabytes) )
  10.365          {
  10.366 -            /* Calculate page-frame range, discarding partial frames. */
  10.367 -            unsigned long start, end;
  10.368 -            unsigned long init_mapped = 1UL << (30 - PAGE_SHIFT); /* 1GB */
  10.369 -            start = PFN_UP(e820.map[i].addr);
  10.370 -            end   = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
  10.371 -            /* Clip the range to exclude what the bootstrapper initialised. */
  10.372 -            if ( start < init_mapped )
  10.373 -                start = init_mapped;
  10.374 -            if ( end <= start )
  10.375 -                continue;
  10.376 -            /* Request the mapping. */
  10.377 -            map_pages_to_xen(
  10.378 -                PAGE_OFFSET + (start << PAGE_SHIFT),
  10.379 -                start, end-start, PAGE_HYPERVISOR);
  10.380 +            extern l2_pgentry_t l2_xenmap[];
  10.381 +            l4_pgentry_t *pl4e;
  10.382 +            l3_pgentry_t *pl3e;
  10.383 +            l2_pgentry_t *pl2e;
  10.384 +            int i, j;
  10.385 +
  10.386 +            /* Select relocation address. */
  10.387 +            e = (e - (opt_xenheap_megabytes << 20)) & ~mask;
  10.388 +            xen_phys_start = e;
  10.389 +            boot_trampoline_va(trampoline_xen_phys_start) = e;
  10.390 +
  10.391 +            /*
  10.392 +             * Perform relocation to new physical address.
  10.393 +             * Before doing so we must sync static/global data with main memory
  10.394 +             * with a barrier(). After this we must *not* modify static/global
  10.395 +             * data until after we have switched to the relocated pagetables!
  10.396 +             */
  10.397 +            barrier();
  10.398 +            move_memory(e, 0, __pa(&_end) - xen_phys_start);
  10.399 +
  10.400 +            /* Walk initial pagetables, relocating page directory entries. */
  10.401 +            pl4e = __va(__pa(idle_pg_table));
  10.402 +            for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
  10.403 +            {
  10.404 +                if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
  10.405 +                    continue;
  10.406 +                *pl4e = l4e_from_intpte(l4e_get_intpte(*pl4e) +
  10.407 +                                        xen_phys_start);
  10.408 +                pl3e = l4e_to_l3e(*pl4e);
  10.409 +                for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ )
  10.410 +                {
  10.411 +                    /* Not present or already relocated? */
  10.412 +                    if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ||
  10.413 +                         (l3e_get_pfn(*pl3e) > 0x1000) )
  10.414 +                        continue;
  10.415 +                    *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) +
  10.416 +                                            xen_phys_start);
  10.417 +                }
  10.418 +            }
  10.419 +
  10.420 +            /* The only data mappings to be relocated are in the Xen area. */
  10.421 +            pl2e = __va(__pa(l2_xenmap));
  10.422 +            for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ )
  10.423 +            {
  10.424 +                if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
  10.425 +                    continue;
  10.426 +                *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
  10.427 +                                        xen_phys_start);
  10.428 +            }
  10.429 +
  10.430 +            /* Re-sync the stack and then switch to relocated pagetables. */
  10.431 +            asm volatile (
  10.432 +                "rep movsb        ; " /* re-sync the stack */
  10.433 +                "movq %%cr4,%%rsi ; "
  10.434 +                "andb $0x7f,%%sil ; "
  10.435 +                "movq %%rsi,%%cr4 ; " /* CR4.PGE == 0 */
  10.436 +                "movq %0,%%cr3    ; " /* CR3 == new pagetables */
  10.437 +                "orb $0x80,%%sil  ; "
  10.438 +                "movq %%rsi,%%cr4   " /* CR4.PGE == 1 */
  10.439 +                : : "r" (__pa(idle_pg_table)), "S" (cpu0_stack),
  10.440 +                "D" (__va(__pa(cpu0_stack))), "c" (STACK_SIZE) : "memory" );
  10.441          }
  10.442  #endif
  10.443      }
  10.444  
  10.445 -    if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0)
  10.446 +    if ( !initial_images_start )
  10.447 +        EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
  10.448 +    reserve_in_boot_e820(initial_images_start, initial_images_end);
  10.449 +
  10.450 +    /*
  10.451 +     * With modules (and Xen itself, on x86/64) relocated out of the way, we
  10.452 +     * can now initialise the boot allocator with some memory.
  10.453 +     */
  10.454 +    xenheap_phys_start = init_boot_allocator(__pa(&_end));
  10.455 +    xenheap_phys_end   = opt_xenheap_megabytes << 20;
  10.456 +#if defined(CONFIG_X86_64)
  10.457 +    if ( !xen_phys_start )
  10.458 +        EARLY_FAIL("Not enough memory to relocate Xen.\n");
  10.459 +    xenheap_phys_end += xen_phys_start;
  10.460 +    reserve_in_boot_e820(xen_phys_start,
  10.461 +                         xen_phys_start + (opt_xenheap_megabytes<<20));
  10.462 +    init_boot_pages(1<<20, 16<<20); /* Initial seed: 15MB */
  10.463 +#else
  10.464 +    init_boot_pages(xenheap_phys_end, 16<<20); /* Initial seed: 4MB */
  10.465 +#endif
  10.466 +
  10.467 +    /*
  10.468 +     * With the boot allocator now seeded, we can walk every RAM region and
  10.469 +     * map it in its entirety (on x86/64, at least) and notify it to the
  10.470 +     * boot allocator.
  10.471 +     */
  10.472 +    for ( i = 0; i < boot_e820.nr_map; i++ )
  10.473 +    {
  10.474 +        uint64_t s, e, map_e, mask = PAGE_SIZE - 1;
  10.475 +
  10.476 +        /* Only page alignment required now. */
  10.477 +        s = (boot_e820.map[i].addr + mask) & ~mask;
  10.478 +        e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
  10.479 +        if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
  10.480 +            continue;
  10.481 +
  10.482 +        /* Perform the mapping (truncated in 32-bit mode). */
  10.483 +        map_e = e;
  10.484 +#if defined(CONFIG_X86_32)
  10.485 +        map_e = min_t(uint64_t, map_e, BOOTSTRAP_DIRECTMAP_END);
  10.486 +#endif
  10.487 +        if ( s < map_e )
  10.488 +            map_pages_to_xen(
  10.489 +                (unsigned long)maddr_to_bootstrap_virt(s),
  10.490 +                s >> PAGE_SHIFT, (map_e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
  10.491 +
  10.492 +        init_boot_pages(s, e);
  10.493 +    }
  10.494 +
  10.495 +    if ( (kexec_crash_area.size > 0) && (kexec_crash_area.start > 0) )
  10.496      {
  10.497          unsigned long kdump_start, kdump_size, k;
  10.498  
  10.499 @@ -534,7 +653,7 @@ void __init __start_xen(multiboot_info_t
  10.500  
  10.501  #if defined(CONFIG_X86_32)
  10.502          /* Must allocate within bootstrap 1:1 limits. */
  10.503 -        k = alloc_boot_low_pages(k, 1); /* 0x0 - HYPERVISOR_VIRT_START */
  10.504 +        k = alloc_boot_low_pages(k, 1); /* 0x0 - BOOTSTRAP_DIRECTMAP_END */
  10.505  #else
  10.506          k = alloc_boot_pages(k, 1);
  10.507  #endif
  10.508 @@ -549,8 +668,11 @@ void __init __start_xen(multiboot_info_t
  10.509      }
  10.510  
  10.511      memguard_init();
  10.512 -    percpu_guard_areas();
  10.513  
  10.514 +    nr_pages = 0;
  10.515 +    for ( i = 0; i < e820.nr_map; i++ )
  10.516 +        if ( e820.map[i].type == E820_RAM )
  10.517 +            nr_pages += e820.map[i].size >> PAGE_SHIFT;
  10.518      printk("System RAM: %luMB (%lukB)\n",
  10.519             nr_pages >> (20 - PAGE_SHIFT),
  10.520             nr_pages << (PAGE_SHIFT - 10));
  10.521 @@ -592,26 +714,13 @@ void __init __start_xen(multiboot_info_t
  10.522      numa_initmem_init(0, max_page);
  10.523  
  10.524      /* Initialise the Xen heap, skipping RAM holes. */
  10.525 -    nr_pages = 0;
  10.526 -    for ( i = 0; i < e820.nr_map; i++ )
  10.527 -    {
  10.528 -        if ( e820.map[i].type != E820_RAM )
  10.529 -            continue;
  10.530 -
  10.531 -        s = e820.map[i].addr;
  10.532 -        e = s + e820.map[i].size;
  10.533 -        if ( s < xenheap_phys_start )
  10.534 -            s = xenheap_phys_start;
  10.535 -        if ( e > xenheap_phys_end )
  10.536 -            e = xenheap_phys_end;
  10.537 - 
  10.538 -        if ( s < e )
  10.539 -        {
  10.540 -            nr_pages += (e - s) >> PAGE_SHIFT;
  10.541 -            init_xenheap_pages(s, e);
  10.542 -        }
  10.543 -    }
  10.544 -
  10.545 +    init_xenheap_pages(xenheap_phys_start, xenheap_phys_end);
  10.546 +    nr_pages = (xenheap_phys_end - xenheap_phys_start) >> PAGE_SHIFT;
  10.547 +#ifdef __x86_64__
  10.548 +    init_xenheap_pages(xen_phys_start, __pa(&_start));
  10.549 +    nr_pages += (__pa(&_start) - xen_phys_start) >> PAGE_SHIFT;
  10.550 +#endif
  10.551 +    xenheap_phys_start = xen_phys_start;
  10.552      printk("Xen heap: %luMB (%lukB)\n", 
  10.553             nr_pages >> (20 - PAGE_SHIFT),
  10.554             nr_pages << (PAGE_SHIFT - 10));
  10.555 @@ -636,8 +745,6 @@ void __init __start_xen(multiboot_info_t
  10.556  
  10.557      find_smp_config();
  10.558  
  10.559 -    smp_alloc_memory();
  10.560 -
  10.561      dmi_scan_machine();
  10.562  
  10.563      generic_apic_probe();
  10.564 @@ -711,8 +818,6 @@ void __init __start_xen(multiboot_info_t
  10.565      printk("Brought up %ld CPUs\n", (long)num_online_cpus());
  10.566      smp_cpus_done(max_cpus);
  10.567  
  10.568 -    percpu_free_unused_areas();
  10.569 -
  10.570      initialise_gdb(); /* could be moved earlier */
  10.571  
  10.572      do_initcalls();
    11.1 --- a/xen/arch/x86/smpboot.c	Thu May 10 16:22:27 2007 +0100
    11.2 +++ b/xen/arch/x86/smpboot.c	Thu May 10 18:02:55 2007 +0100
    11.3 @@ -54,8 +54,8 @@
    11.4  #include <mach_wakecpu.h>
    11.5  #include <smpboot_hooks.h>
    11.6  
    11.7 -static inline int set_kernel_exec(unsigned long x, int y) { return 0; }
    11.8 -#define alloc_bootmem_low_pages(x) __va(0x90000) /* trampoline address */
    11.9 +#define set_kernel_exec(x, y) (0)
   11.10 +#define setup_trampoline()    (boot_trampoline_pa(trampoline_realmode_entry))
   11.11  
   11.12  /* Set if we find a B stepping CPU */
   11.13  static int __devinitdata smp_b_stepping;
   11.14 @@ -109,51 +109,8 @@ u8 x86_cpu_to_apicid[NR_CPUS] __read_mos
   11.15  			{ [0 ... NR_CPUS-1] = 0xff };
   11.16  EXPORT_SYMBOL(x86_cpu_to_apicid);
   11.17  
   11.18 -/*
   11.19 - * Trampoline 80x86 program as an array.
   11.20 - */
   11.21 -
   11.22 -extern unsigned char trampoline_data [];
   11.23 -extern unsigned char trampoline_end  [];
   11.24 -static unsigned char *trampoline_base;
   11.25 -static int trampoline_exec;
   11.26 -
   11.27  static void map_cpu_to_logical_apicid(void);
   11.28  
   11.29 -/* State of each CPU. */
   11.30 -/*DEFINE_PER_CPU(int, cpu_state) = { 0 };*/
   11.31 -
   11.32 -/*
   11.33 - * Currently trivial. Write the real->protected mode
   11.34 - * bootstrap into the page concerned. The caller
   11.35 - * has made sure it's suitably aligned.
   11.36 - */
   11.37 -
   11.38 -static unsigned long __devinit setup_trampoline(void)
   11.39 -{
   11.40 -	memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
   11.41 -	return virt_to_maddr(trampoline_base);
   11.42 -}
   11.43 -
   11.44 -/*
   11.45 - * We are called very early to get the low memory for the
   11.46 - * SMP bootup trampoline page.
   11.47 - */
   11.48 -void __init smp_alloc_memory(void)
   11.49 -{
   11.50 -	trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
   11.51 -	/*
   11.52 -	 * Has to be in very low memory so we can execute
   11.53 -	 * real-mode AP code.
   11.54 -	 */
   11.55 -	if (__pa(trampoline_base) >= 0x9F000)
   11.56 -		BUG();
   11.57 -	/*
   11.58 -	 * Make the SMP trampoline executable:
   11.59 -	 */
   11.60 -	trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
   11.61 -}
   11.62 -
   11.63  /*
   11.64   * The bootstrap kernel entry code has set these up. Save them for
   11.65   * a given CPU
   11.66 @@ -950,9 +907,9 @@ static int __devinit do_boot_cpu(int api
   11.67  			print_cpu_info(&cpu_data[cpu]);
   11.68  			Dprintk("CPU has booted.\n");
   11.69  		} else {
   11.70 -			boot_error= 1;
   11.71 -			if (*((volatile unsigned char *)trampoline_base)
   11.72 -					== 0xA5)
   11.73 +			boot_error = 1;
   11.74 +			mb();
   11.75 +			if (boot_trampoline_va(trampoline_cpu_started) == 0xA5)
   11.76  				/* trampoline started but...? */
   11.77  				printk("Stuck ??\n");
   11.78  			else
   11.79 @@ -974,7 +931,8 @@ static int __devinit do_boot_cpu(int api
   11.80  	}
   11.81  
   11.82  	/* mark "stuck" area as not stuck */
   11.83 -	*((volatile unsigned long *)trampoline_base) = 0;
   11.84 +	boot_trampoline_va(trampoline_cpu_started) = 0;
   11.85 +	mb();
   11.86  
   11.87  	return boot_error;
   11.88  }
    12.1 --- a/xen/arch/x86/trampoline.S	Thu May 10 16:22:27 2007 +0100
    12.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.3 @@ -1,67 +0,0 @@
    12.4 -/*
    12.5 - *
    12.6 - *	Trampoline.S	Derived from Setup.S by Linus Torvalds
    12.7 - *
    12.8 - *	4 Jan 1997 Michael Chastain: changed to gnu as.
    12.9 - *
   12.10 - *	Entry: CS:IP point to the start of our code, we are 
   12.11 - *	in real mode with no stack, but the rest of the 
   12.12 - *	trampoline page to make our stack and everything else
   12.13 - *	is a mystery.
   12.14 - *
   12.15 - *	On entry to trampoline_data, the processor is in real mode
   12.16 - *	with 16-bit addressing and 16-bit data.  CS has some value
   12.17 - *	and IP is zero.  Thus, data addresses need to be absolute
   12.18 - *	(no relocation) and are taken with regard to r_base.
   12.19 - */
   12.20 -
   12.21 -#include <xen/config.h>
   12.22 -#include <public/xen.h>
   12.23 -#include <asm/desc.h>
   12.24 -#include <asm/page.h>
   12.25 -
   12.26 -#ifdef CONFIG_SMP
   12.27 -        
   12.28 -.data
   12.29 -
   12.30 -.code16
   12.31 -
   12.32 -ENTRY(trampoline_data)
   12.33 -r_base = .
   12.34 -        mov	%cs, %ax	# Code and data in the same place
   12.35 -	mov	%ax, %ds
   12.36 -
   12.37 -	movl	$0xA5A5A5A5, %ebx # Flag an SMP trampoline
   12.38 -	cli			# We should be safe anyway
   12.39 -
   12.40 -	movl	$0xA5A5A5A5, trampoline_data - r_base
   12.41 -
   12.42 -	lidt	idt_48 - r_base	# load idt with 0, 0
   12.43 -	lgdt	gdt_48 - r_base	# load gdt with whatever is appropriate
   12.44 -
   12.45 -	xor	%ax, %ax
   12.46 -	inc	%ax		# protected mode (PE) bit
   12.47 -	lmsw	%ax		# into protected mode
   12.48 -	jmp	flush_instr
   12.49 -flush_instr:
   12.50 -#if defined(__x86_64__)
   12.51 -	ljmpl	$__HYPERVISOR_CS32, $0x100000 # 1MB
   12.52 -#else        
   12.53 -	ljmpl	$__HYPERVISOR_CS,   $0x100000 # 1MB
   12.54 -#endif
   12.55 -
   12.56 -idt_48:
   12.57 -	.word	0			# idt limit = 0
   12.58 -	.word	0, 0			# idt base = 0L
   12.59 -
   12.60 -gdt_48:
   12.61 -	.word	LAST_RESERVED_GDT_BYTE
   12.62 -#ifdef __i386__
   12.63 -	.long	gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
   12.64 -#else
   12.65 -	.long   0x101000 - FIRST_RESERVED_GDT_BYTE
   12.66 -#endif
   12.67 -
   12.68 -ENTRY(trampoline_end)
   12.69 -
   12.70 -#endif /* CONFIG_SMP */
    13.1 --- a/xen/arch/x86/traps.c	Thu May 10 16:22:27 2007 +0100
    13.2 +++ b/xen/arch/x86/traps.c	Thu May 10 18:02:55 2007 +0100
    13.3 @@ -1413,20 +1413,30 @@ static int emulate_privileged_op(struct 
    13.4       * GPR context. This is needed for some systems which (ab)use IN/OUT
    13.5       * to communicate with BIOS code in system-management mode.
    13.6       */
    13.7 +#ifdef __x86_64__
    13.8 +    /* movq $host_to_guest_gpr_switch,%rcx */
    13.9 +    io_emul_stub[0] = 0x48;
   13.10 +    io_emul_stub[1] = 0xb9;
   13.11 +    *(void **)&io_emul_stub[2] = (void *)host_to_guest_gpr_switch;
   13.12 +    /* callq *%rcx */
   13.13 +    io_emul_stub[10] = 0xff;
   13.14 +    io_emul_stub[11] = 0xd1;
   13.15 +#else
   13.16      /* call host_to_guest_gpr_switch */
   13.17      io_emul_stub[0] = 0xe8;
   13.18      *(s32 *)&io_emul_stub[1] =
   13.19          (char *)host_to_guest_gpr_switch - &io_emul_stub[5];
   13.20 +    /* 7 x nop */
   13.21 +    memset(&io_emul_stub[5], 0x90, 7);
   13.22 +#endif
   13.23      /* data16 or nop */
   13.24 -    io_emul_stub[5] = (op_bytes != 2) ? 0x90 : 0x66;
   13.25 +    io_emul_stub[12] = (op_bytes != 2) ? 0x90 : 0x66;
   13.26      /* <io-access opcode> */
   13.27 -    io_emul_stub[6] = opcode;
   13.28 +    io_emul_stub[13] = opcode;
   13.29      /* imm8 or nop */
   13.30 -    io_emul_stub[7] = 0x90;
   13.31 -    /* jmp guest_to_host_gpr_switch */
   13.32 -    io_emul_stub[8] = 0xe9;
   13.33 -    *(s32 *)&io_emul_stub[9] =
   13.34 -        (char *)guest_to_host_gpr_switch - &io_emul_stub[13];
   13.35 +    io_emul_stub[14] = 0x90;
   13.36 +    /* ret (jumps to guest_to_host_gpr_switch) */
   13.37 +    io_emul_stub[15] = 0xc3;
   13.38  
   13.39      /* Handy function-typed pointer to the stub. */
   13.40      io_emul = (void *)io_emul_stub;
   13.41 @@ -1438,7 +1448,7 @@ static int emulate_privileged_op(struct 
   13.42          op_bytes = 1;
   13.43      case 0xe5: /* IN imm8,%eax */
   13.44          port = insn_fetch(u8, code_base, eip, code_limit);
   13.45 -        io_emul_stub[7] = port; /* imm8 */
   13.46 +        io_emul_stub[14] = port; /* imm8 */
   13.47      exec_in:
   13.48          if ( !guest_io_okay(port, op_bytes, v, regs) )
   13.49              goto fail;
   13.50 @@ -1480,7 +1490,7 @@ static int emulate_privileged_op(struct 
   13.51          op_bytes = 1;
   13.52      case 0xe7: /* OUT %eax,imm8 */
   13.53          port = insn_fetch(u8, code_base, eip, code_limit);
   13.54 -        io_emul_stub[7] = port; /* imm8 */
   13.55 +        io_emul_stub[14] = port; /* imm8 */
   13.56      exec_out:
   13.57          if ( !guest_io_okay(port, op_bytes, v, regs) )
   13.58              goto fail;
    14.1 --- a/xen/arch/x86/x86_32/gpr_switch.S	Thu May 10 16:22:27 2007 +0100
    14.2 +++ b/xen/arch/x86/x86_32/gpr_switch.S	Thu May 10 18:02:55 2007 +0100
    14.3 @@ -20,6 +20,7 @@ ENTRY(host_to_guest_gpr_switch)
    14.4          movl  UREGS_esi(%eax), %esi
    14.5          pushl %edi
    14.6          movl  UREGS_edi(%eax), %edi
    14.7 +        pushl $guest_to_host_gpr_switch
    14.8          pushl %ecx
    14.9          movl  UREGS_ecx(%eax), %ecx
   14.10          movl  UREGS_eax(%eax), %eax
    15.1 --- a/xen/arch/x86/x86_32/mm.c	Thu May 10 16:22:27 2007 +0100
    15.2 +++ b/xen/arch/x86/x86_32/mm.c	Thu May 10 18:02:55 2007 +0100
    15.3 @@ -30,6 +30,14 @@
    15.4  #include <asm/fixmap.h>
    15.5  #include <public/memory.h>
    15.6  
    15.7 +#ifdef CONFIG_X86_PAE
    15.8 +l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
    15.9 +    idle_pg_table_l2[4 * L2_PAGETABLE_ENTRIES];
   15.10 +#else
   15.11 +l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
   15.12 +    idle_pg_table_l2[L2_PAGETABLE_ENTRIES];
   15.13 +#endif
   15.14 +
   15.15  unsigned int PAGE_HYPERVISOR         = __PAGE_HYPERVISOR;
   15.16  unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
   15.17  
    16.1 --- a/xen/arch/x86/x86_64/compat_kexec.S	Thu May 10 16:22:27 2007 +0100
    16.2 +++ b/xen/arch/x86/x86_64/compat_kexec.S	Thu May 10 18:02:55 2007 +0100
    16.3 @@ -8,7 +8,9 @@
    16.4  #include <asm/msr.h>
    16.5  #include <asm/page.h>
    16.6  
    16.7 -.text
    16.8 +#define SYM_PHYS(sym)       ((sym) - __XEN_VIRT_START)
    16.9 +
   16.10 +        .text
   16.11  
   16.12          .code64
   16.13  
   16.14 @@ -29,21 +31,19 @@ 1:      dec %r9
   16.15          test %r9,%r9
   16.16          jnz 1b
   16.17  
   16.18 -        movq %rbx,%rdx
   16.19 -        mov $__PAGE_OFFSET,%rbx
   16.20 -        sub %rbx, %rdx
   16.21 +        mov $SYM_PHYS(compat_page_list),%rdx
   16.22  
   16.23          /*
   16.24           * Setup an identity mapped region in PML4[0] of idle page
   16.25           * table.
   16.26           */
   16.27 -        lea idle_pg_table_l3(%rip),%rax
   16.28 +        lea l3_identmap(%rip),%rax
   16.29          sub %rbx,%rax
   16.30          or  $0x63,%rax
   16.31          mov %rax, idle_pg_table(%rip)
   16.32  
   16.33          /* Switch to idle page table. */
   16.34 -        movq $(idle_pg_table - __PAGE_OFFSET), %rax
   16.35 +        movq $SYM_PHYS(idle_pg_table), %rax
   16.36          movq %rax, %cr3
   16.37  
   16.38          /* Jump to low identity mapping in compatibility mode. */
   16.39 @@ -51,7 +51,7 @@ 1:      dec %r9
   16.40          ud2
   16.41  
   16.42  compatibility_mode_far:
   16.43 -        .long compatibility_mode - __PAGE_OFFSET
   16.44 +        .long SYM_PHYS(compatibility_mode)
   16.45          .long __HYPERVISOR_CS32
   16.46  
   16.47          .code32
   16.48 @@ -78,7 +78,7 @@ compatibility_mode:
   16.49          movl %eax, %cr0
   16.50  
   16.51          /* Switch to 32 bit page table. */
   16.52 -        movl  $compat_pg_table - __PAGE_OFFSET, %eax
   16.53 +        movl  $SYM_PHYS(compat_pg_table), %eax
   16.54          movl  %eax, %cr3
   16.55  
   16.56          /* Clear MSR_EFER[LME], disabling long mode */
   16.57 @@ -106,10 +106,10 @@ compat_page_list:
   16.58           * first 4G of the physical address space.
   16.59           */
   16.60  compat_pg_table:
   16.61 -        .long compat_pg_table_l2 + 0*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
   16.62 -        .long compat_pg_table_l2 + 1*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
   16.63 -        .long compat_pg_table_l2 + 2*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
   16.64 -        .long compat_pg_table_l2 + 3*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
   16.65 +        .long SYM_PHYS(compat_pg_table_l2) + 0*PAGE_SIZE + 0x01, 0
   16.66 +        .long SYM_PHYS(compat_pg_table_l2) + 1*PAGE_SIZE + 0x01, 0
   16.67 +        .long SYM_PHYS(compat_pg_table_l2) + 2*PAGE_SIZE + 0x01, 0
   16.68 +        .long SYM_PHYS(compat_pg_table_l2) + 3*PAGE_SIZE + 0x01, 0
   16.69  
   16.70          .align 4096,0
   16.71  
    17.1 --- a/xen/arch/x86/x86_64/gpr_switch.S	Thu May 10 16:22:27 2007 +0100
    17.2 +++ b/xen/arch/x86/x86_64/gpr_switch.S	Thu May 10 18:02:55 2007 +0100
    17.3 @@ -30,7 +30,10 @@ ENTRY(host_to_guest_gpr_switch)
    17.4          pushq %r15
    17.5          movq  UREGS_r11(%rdi), %r11
    17.6          movq  UREGS_r15(%rdi), %r15
    17.7 +        pushq %rcx /* dummy push, filled by guest_to_host_gpr_switch pointer */
    17.8          pushq %rcx
    17.9 +        leaq  guest_to_host_gpr_switch(%rip),%rcx
   17.10 +        movq  %rcx,8(%rsp)
   17.11          movq  UREGS_rcx(%rdi), %rcx
   17.12          movq  UREGS_rdi(%rdi), %rdi
   17.13          ret
    18.1 --- a/xen/arch/x86/x86_64/mm.c	Thu May 10 16:22:27 2007 +0100
    18.2 +++ b/xen/arch/x86/x86_64/mm.c	Thu May 10 18:02:55 2007 +0100
    18.3 @@ -36,6 +36,22 @@
    18.4  unsigned int m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
    18.5  #endif
    18.6  
    18.7 +/* Top-level master (and idle-domain) page directory. */
    18.8 +l4_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
    18.9 +    idle_pg_table[L4_PAGETABLE_ENTRIES];
   18.10 +
   18.11 +/* Enough page directories to map bottom 4GB of the memory map. */
   18.12 +l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
   18.13 +    l3_identmap[L3_PAGETABLE_ENTRIES];
   18.14 +l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
   18.15 +    l2_identmap[4*L2_PAGETABLE_ENTRIES];
   18.16 +
   18.17 +/* Enough page directories to map the Xen text and static data. */
   18.18 +l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
   18.19 +    l3_xenmap[L3_PAGETABLE_ENTRIES];
   18.20 +l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
   18.21 +    l2_xenmap[L2_PAGETABLE_ENTRIES];
   18.22 +
   18.23  void *alloc_xen_pagetable(void)
   18.24  {
   18.25      extern int early_boot;
    19.1 --- a/xen/arch/x86/x86_64/traps.c	Thu May 10 16:22:27 2007 +0100
    19.2 +++ b/xen/arch/x86/x86_64/traps.c	Thu May 10 18:02:55 2007 +0100
    19.3 @@ -19,8 +19,11 @@
    19.4  #include <asm/shared.h>
    19.5  #include <asm/hvm/hvm.h>
    19.6  #include <asm/hvm/support.h>
    19.7 +#include <public/callback.h>
    19.8  
    19.9 -#include <public/callback.h>
   19.10 +asmlinkage void syscall_enter(void);
   19.11 +asmlinkage void compat_hypercall(void);
   19.12 +asmlinkage void int80_direct_trap(void);
   19.13  
   19.14  static void print_xen_info(void)
   19.15  {
   19.16 @@ -246,9 +249,42 @@ unsigned long do_iret(void)
   19.17      return 0;
   19.18  }
   19.19  
   19.20 -asmlinkage void syscall_enter(void);
   19.21 -asmlinkage void compat_hypercall(void);
   19.22 -asmlinkage void int80_direct_trap(void);
   19.23 +static int write_stack_trampoline(
   19.24 +    char *stack, char *stack_bottom, uint16_t cs_seg)
   19.25 +{
   19.26 +    /* movq %rsp, saversp(%rip) */
   19.27 +    stack[0] = 0x48;
   19.28 +    stack[1] = 0x89;
   19.29 +    stack[2] = 0x25;
   19.30 +    *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
   19.31 +
   19.32 +    /* leaq saversp(%rip), %rsp */
   19.33 +    stack[7] = 0x48;
   19.34 +    stack[8] = 0x8d;
   19.35 +    stack[9] = 0x25;
   19.36 +    *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
   19.37 +
   19.38 +    /* pushq %r11 */
   19.39 +    stack[14] = 0x41;
   19.40 +    stack[15] = 0x53;
   19.41 +
   19.42 +    /* pushq $<cs_seg> */
   19.43 +    stack[16] = 0x68;
   19.44 +    *(u32 *)&stack[17] = cs_seg;
   19.45 +
   19.46 +    /* movq $syscall_enter,%r11 */
   19.47 +    stack[21] = 0x49;
   19.48 +    stack[22] = 0xbb;
   19.49 +    *(void **)&stack[23] = (void *)syscall_enter;
   19.50 +
   19.51 +    /* jmpq *%r11 */
   19.52 +    stack[31] = 0x41;
   19.53 +    stack[32] = 0xff;
   19.54 +    stack[33] = 0xe3;
   19.55 +
   19.56 +    return 34;
   19.57 +}
   19.58 +
   19.59  void __init percpu_traps_init(void)
   19.60  {
   19.61      char *stack_bottom, *stack;
   19.62 @@ -280,74 +316,16 @@ void __init percpu_traps_init(void)
   19.63      /* NMI handler has its own per-CPU 1kB stack. */
   19.64      init_tss[cpu].ist[1] = (unsigned long)&stack[3072];
   19.65  
   19.66 -    /*
   19.67 -     * Trampoline for SYSCALL entry from long mode.
   19.68 -     */
   19.69 -
   19.70 -    /* Skip the NMI and DF stacks. */
   19.71 -    stack = &stack[3072];
   19.72 +    /* Trampoline for SYSCALL entry from long mode. */
   19.73 +    stack = &stack[3072]; /* Skip the NMI and DF stacks. */
   19.74      wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
   19.75 -
   19.76 -    /* movq %rsp, saversp(%rip) */
   19.77 -    stack[0] = 0x48;
   19.78 -    stack[1] = 0x89;
   19.79 -    stack[2] = 0x25;
   19.80 -    *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
   19.81 -
   19.82 -    /* leaq saversp(%rip), %rsp */
   19.83 -    stack[7] = 0x48;
   19.84 -    stack[8] = 0x8d;
   19.85 -    stack[9] = 0x25;
   19.86 -    *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
   19.87 -
   19.88 -    /* pushq %r11 */
   19.89 -    stack[14] = 0x41;
   19.90 -    stack[15] = 0x53;
   19.91 -
   19.92 -    /* pushq $FLAT_KERNEL_CS64 */
   19.93 -    stack[16] = 0x68;
   19.94 -    *(u32 *)&stack[17] = FLAT_KERNEL_CS64;
   19.95 -
   19.96 -    /* jmp syscall_enter */
   19.97 -    stack[21] = 0xe9;
   19.98 -    *(u32 *)&stack[22] = (char *)syscall_enter - &stack[26];
   19.99 +    stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS64);
  19.100  
  19.101 -    /*
  19.102 -     * Trampoline for SYSCALL entry from compatibility mode.
  19.103 -     */
  19.104 -
  19.105 -    /* Skip the long-mode entry trampoline. */
  19.106 -    stack = &stack[26];
  19.107 +    /* Trampoline for SYSCALL entry from compatibility mode. */
  19.108      wrmsr(MSR_CSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
  19.109 -
  19.110 -    /* movq %rsp, saversp(%rip) */
  19.111 -    stack[0] = 0x48;
  19.112 -    stack[1] = 0x89;
  19.113 -    stack[2] = 0x25;
  19.114 -    *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
  19.115 +    stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS32);
  19.116  
  19.117 -    /* leaq saversp(%rip), %rsp */
  19.118 -    stack[7] = 0x48;
  19.119 -    stack[8] = 0x8d;
  19.120 -    stack[9] = 0x25;
  19.121 -    *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
  19.122 -
  19.123 -    /* pushq %r11 */
  19.124 -    stack[14] = 0x41;
  19.125 -    stack[15] = 0x53;
  19.126 -
  19.127 -    /* pushq $FLAT_KERNEL_CS32 */
  19.128 -    stack[16] = 0x68;
  19.129 -    *(u32 *)&stack[17] = FLAT_KERNEL_CS32;
  19.130 -
  19.131 -    /* jmp syscall_enter */
  19.132 -    stack[21] = 0xe9;
  19.133 -    *(u32 *)&stack[22] = (char *)syscall_enter - &stack[26];
  19.134 -
  19.135 -    /*
  19.136 -     * Common SYSCALL parameters.
  19.137 -     */
  19.138 -
  19.139 +    /* Common SYSCALL parameters. */
  19.140      wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
  19.141      wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U);
  19.142  }
    20.1 --- a/xen/arch/x86/x86_64/xen.lds.S	Thu May 10 16:22:27 2007 +0100
    20.2 +++ b/xen/arch/x86/x86_64/xen.lds.S	Thu May 10 18:02:55 2007 +0100
    20.3 @@ -16,7 +16,7 @@ PHDRS
    20.4  }
    20.5  SECTIONS
    20.6  {
    20.7 -  . = 0xFFFF830000100000;
    20.8 +  . = __XEN_VIRT_START + 0x100000;
    20.9    _start = .;
   20.10    _stext = .;			/* Text and read-only data */
   20.11    .text : {
    21.1 --- a/xen/common/grant_table.c	Thu May 10 16:22:27 2007 +0100
    21.2 +++ b/xen/common/grant_table.c	Thu May 10 18:02:55 2007 +0100
    21.3 @@ -833,7 +833,7 @@ gnttab_transfer(
    21.4          }
    21.5  
    21.6          page = mfn_to_page(mfn);
    21.7 -        if ( unlikely(IS_XEN_HEAP_FRAME(page)) )
    21.8 +        if ( unlikely(is_xen_heap_frame(page)) )
    21.9          { 
   21.10              gdprintk(XENLOG_INFO, "gnttab_transfer: xen frame %lx\n",
   21.11                      (unsigned long)gop.mfn);
    22.1 --- a/xen/common/page_alloc.c	Thu May 10 16:22:27 2007 +0100
    22.2 +++ b/xen/common/page_alloc.c	Thu May 10 18:02:55 2007 +0100
    22.3 @@ -585,18 +585,20 @@ static unsigned long avail_heap_pages(
    22.4      return free_pages;
    22.5  }
    22.6  
    22.7 +#define avail_for_domheap(mfn) \
    22.8 +    (!allocated_in_map(mfn) && !is_xen_heap_frame(mfn_to_page(mfn)))
    22.9  void end_boot_allocator(void)
   22.10  {
   22.11      unsigned long i;
   22.12      int curr_free, next_free;
   22.13  
   22.14      /* Pages that are free now go to the domain sub-allocator. */
   22.15 -    if ( (curr_free = next_free = !allocated_in_map(first_valid_mfn)) )
   22.16 +    if ( (curr_free = next_free = avail_for_domheap(first_valid_mfn)) )
   22.17          map_alloc(first_valid_mfn, 1);
   22.18      for ( i = first_valid_mfn; i < max_page; i++ )
   22.19      {
   22.20          curr_free = next_free;
   22.21 -        next_free = !allocated_in_map(i+1);
   22.22 +        next_free = avail_for_domheap(i+1);
   22.23          if ( next_free )
   22.24              map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
   22.25          if ( curr_free )
   22.26 @@ -605,6 +607,7 @@ void end_boot_allocator(void)
   22.27  
   22.28      printk("Domain heap initialised: DMA width %u bits\n", dma_bitsize);
   22.29  }
   22.30 +#undef avail_for_domheap
   22.31  
   22.32  /*
   22.33   * Scrub all unallocated pages in all heap zones. This function is more
   22.34 @@ -635,7 +638,7 @@ void scrub_heap_pages(void)
   22.35          /* Re-check page status with lock held. */
   22.36          if ( !allocated_in_map(mfn) )
   22.37          {
   22.38 -            if ( IS_XEN_HEAP_FRAME(mfn_to_page(mfn)) )
   22.39 +            if ( is_xen_heap_frame(mfn_to_page(mfn)) )
   22.40              {
   22.41                  p = page_to_virt(mfn_to_page(mfn));
   22.42                  memguard_unguard_range(p, PAGE_SIZE);
   22.43 @@ -675,7 +678,9 @@ void init_xenheap_pages(paddr_t ps, padd
   22.44       * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
   22.45       * prevent merging of power-of-two blocks across the zone boundary.
   22.46       */
   22.47 -    if ( !IS_XEN_HEAP_FRAME(maddr_to_page(pe)) )
   22.48 +    if ( ps && !is_xen_heap_frame(maddr_to_page(ps)-1) )
   22.49 +        ps += PAGE_SIZE;
   22.50 +    if ( !is_xen_heap_frame(maddr_to_page(pe)) )
   22.51          pe -= PAGE_SIZE;
   22.52  
   22.53      init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
   22.54 @@ -856,7 +861,7 @@ void free_domheap_pages(struct page_info
   22.55  
   22.56      ASSERT(!in_irq());
   22.57  
   22.58 -    if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
   22.59 +    if ( unlikely(is_xen_heap_frame(pg)) )
   22.60      {
   22.61          /* NB. May recursively lock from relinquish_memory(). */
   22.62          spin_lock_recursive(&d->page_alloc_lock);
    23.1 --- a/xen/include/asm-ia64/mm.h	Thu May 10 16:22:27 2007 +0100
    23.2 +++ b/xen/include/asm-ia64/mm.h	Thu May 10 18:02:55 2007 +0100
    23.3 @@ -115,8 +115,8 @@ struct page_info
    23.4   /* 29-bit count of references to this frame. */
    23.5  #define PGC_count_mask      ((1UL<<29)-1)
    23.6  
    23.7 -#define IS_XEN_HEAP_FRAME(_pfn) ((page_to_maddr(_pfn) < xenheap_phys_end) \
    23.8 -				 && (page_to_maddr(_pfn) >= xen_pstart))
    23.9 +#define is_xen_heap_frame(pfn) ((page_to_maddr(pfn) < xenheap_phys_end) \
   23.10 +				 && (page_to_maddr(pfn) >= xen_pstart))
   23.11  
   23.12  extern void *xen_heap_start;
   23.13  #define __pickle(a)	((unsigned long)a - (unsigned long)xen_heap_start)
    24.1 --- a/xen/include/asm-powerpc/mm.h	Thu May 10 16:22:27 2007 +0100
    24.2 +++ b/xen/include/asm-powerpc/mm.h	Thu May 10 18:02:55 2007 +0100
    24.3 @@ -112,7 +112,7 @@ struct page_info
    24.4   /* 29-bit count of references to this frame. */
    24.5  #define PGC_count_mask      ((1UL<<28)-1)
    24.6  
    24.7 -#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
    24.8 +#define is_xen_heap_frame(pfn) (page_to_maddr(pfn) < xenheap_phys_end)
    24.9  
   24.10  static inline struct domain *unpickle_domptr(u32 _domain)
   24.11  { return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); }
    25.1 --- a/xen/include/asm-x86/config.h	Thu May 10 16:22:27 2007 +0100
    25.2 +++ b/xen/include/asm-x86/config.h	Thu May 10 18:02:55 2007 +0100
    25.3 @@ -84,6 +84,19 @@
    25.4  
    25.5  #define CONFIG_DMA_BITSIZE 32
    25.6  
    25.7 +#define BOOT_TRAMPOLINE 0x90000
    25.8 +#define boot_trampoline_pa(sym)                                 \
    25.9 +    (((unsigned long)&(sym)-(unsigned long)&trampoline_start)+BOOT_TRAMPOLINE)
   25.10 +#define boot_trampoline_va(sym)                                 \
   25.11 +    (*RELOC_HIDE((typeof(&(sym)))__va(__pa(&(sym))),            \
   25.12 +                 BOOT_TRAMPOLINE-__pa(trampoline_start)))
   25.13 +#ifndef __ASSEMBLY__
   25.14 +extern char trampoline_start[], trampoline_end[];
   25.15 +extern char trampoline_realmode_entry[];
   25.16 +extern unsigned int trampoline_xen_phys_start;
   25.17 +extern unsigned char trampoline_cpu_started;
   25.18 +#endif
   25.19 +
   25.20  #if defined(__x86_64__)
   25.21  
   25.22  #define CONFIG_X86_64 1
   25.23 @@ -116,7 +129,7 @@
   25.24   *  0xffff804000000000 - 0xffff807fffffffff [256GB, 2^38 bytes, PML4:256]
   25.25   *    Reserved for future shared info with the guest OS (GUEST ACCESSIBLE).
   25.26   *  0xffff808000000000 - 0xffff80ffffffffff [512GB, 2^39 bytes, PML4:257]
   25.27 - *    Read-only guest linear page table (GUEST ACCESSIBLE).
   25.28 + *    Reserved for future use.
   25.29   *  0xffff810000000000 - 0xffff817fffffffff [512GB, 2^39 bytes, PML4:258]
   25.30   *    Guest linear page table.
   25.31   *  0xffff818000000000 - 0xffff81ffffffffff [512GB, 2^39 bytes, PML4:259]
   25.32 @@ -133,10 +146,12 @@
   25.33   *    Compatibility machine-to-phys translation table.
   25.34   *  0xffff828c40000000 - 0xffff828c7fffffff [1GB,   2^30 bytes, PML4:261]
   25.35   *    High read-only compatibility machine-to-phys translation table.
   25.36 - *  0xffff828c80000000 - 0xffff82ffffffffff [462GB,             PML4:261]
   25.37 + *  0xffff828c80000000 - 0xffff828cbfffffff [1GB,   2^30 bytes, PML4:261]
   25.38 + *    Xen text, static data, bss.
   25.39 + *  0xffff828cc0000000 - 0xffff82ffffffffff [461GB,             PML4:261]
   25.40   *    Reserved for future use.
   25.41   *  0xffff830000000000 - 0xffff83ffffffffff [1TB,   2^40 bytes, PML4:262-263]
   25.42 - *    1:1 direct mapping of all physical memory. Xen and its heap live here.
   25.43 + *    1:1 direct mapping of all physical memory.
   25.44   *  0xffff840000000000 - 0xffff87ffffffffff [4TB,   2^42 bytes, PML4:264-271]
   25.45   *    Reserved for future use.
   25.46   *  0xffff880000000000 - 0xffffffffffffffff [120TB, PML4:272-511]
   25.47 @@ -167,14 +182,6 @@
   25.48  /* Slot 256: read-only guest-accessible machine-to-phys translation table. */
   25.49  #define RO_MPT_VIRT_START       (PML4_ADDR(256))
   25.50  #define RO_MPT_VIRT_END         (RO_MPT_VIRT_START + PML4_ENTRY_BYTES/2)
   25.51 -
   25.52 -// current unused?
   25.53 -#if 0
   25.54 -/* Slot 257: read-only guest-accessible linear page table. */
   25.55 -#define RO_LINEAR_PT_VIRT_START (PML4_ADDR(257))
   25.56 -#define RO_LINEAR_PT_VIRT_END   (RO_LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
   25.57 -#endif
   25.58 -
   25.59  /* Slot 258: linear page table (guest table). */
   25.60  #define LINEAR_PT_VIRT_START    (PML4_ADDR(258))
   25.61  #define LINEAR_PT_VIRT_END      (LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
   25.62 @@ -197,9 +204,12 @@
   25.63  /* Slot 261: compatibility machine-to-phys conversion table (1GB). */
   25.64  #define RDWR_COMPAT_MPT_VIRT_START IOREMAP_VIRT_END
   25.65  #define RDWR_COMPAT_MPT_VIRT_END (RDWR_COMPAT_MPT_VIRT_START + (1UL << 30))
   25.66 -/* Slot 261: high read-only compatibility machine-to-phys conversion table (1GB). */
   25.67 +/* Slot 261: high read-only compat machine-to-phys conversion table (1GB). */
   25.68  #define HIRO_COMPAT_MPT_VIRT_START RDWR_COMPAT_MPT_VIRT_END
   25.69  #define HIRO_COMPAT_MPT_VIRT_END (HIRO_COMPAT_MPT_VIRT_START + (1UL << 30))
   25.70 +/* Slot 261: xen text, static data and bss (1GB). */
   25.71 +#define XEN_VIRT_START          (HIRO_COMPAT_MPT_VIRT_END)
   25.72 +#define XEN_VIRT_END            (XEN_VIRT_START + (1UL << 30))
   25.73  /* Slot 262-263: A direct 1:1 mapping of all of physical memory. */
   25.74  #define DIRECTMAP_VIRT_START    (PML4_ADDR(262))
   25.75  #define DIRECTMAP_VIRT_END      (DIRECTMAP_VIRT_START + PML4_ENTRY_BYTES*2)
   25.76 @@ -340,7 +350,7 @@
   25.77  #endif /* __i386__ */
   25.78  
   25.79  #ifndef __ASSEMBLY__
   25.80 -extern unsigned long xenheap_phys_end; /* user-configurable */
   25.81 +extern unsigned long xen_phys_start, xenheap_phys_start, xenheap_phys_end;
   25.82  #endif
   25.83  
   25.84  /* GDT/LDT shadow mapping area. The first per-domain-mapping sub-area. */
    26.1 --- a/xen/include/asm-x86/mm.h	Thu May 10 16:22:27 2007 +0100
    26.2 +++ b/xen/include/asm-x86/mm.h	Thu May 10 18:02:55 2007 +0100
    26.3 @@ -104,7 +104,10 @@ struct page_info
    26.4  #define PageSetSlab(page)   ((void)0)
    26.5  #define PageClearSlab(page) ((void)0)
    26.6  
    26.7 -#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
    26.8 +#define is_xen_heap_frame(pfn) ({                                       \
    26.9 +    paddr_t maddr = page_to_maddr(pfn);                                 \
   26.10 +    ((maddr >= xenheap_phys_start) && (maddr < xenheap_phys_end));      \
   26.11 +})
   26.12  
   26.13  #if defined(__i386__)
   26.14  #define pickle_domptr(_d)   ((u32)(unsigned long)(_d))
    27.1 --- a/xen/include/asm-x86/page.h	Thu May 10 16:22:27 2007 +0100
    27.2 +++ b/xen/include/asm-x86/page.h	Thu May 10 18:02:55 2007 +0100
    27.3 @@ -223,10 +223,6 @@ typedef struct { u64 pfn; } pagetable_t;
    27.4  #define mfn_valid(mfn)      ((mfn) < max_page)
    27.5  
    27.6  /* Convert between Xen-heap virtual addresses and machine addresses. */
    27.7 -#define PAGE_OFFSET         ((unsigned long)__PAGE_OFFSET)
    27.8 -#define virt_to_maddr(va)   ((unsigned long)(va)-PAGE_OFFSET)
    27.9 -#define maddr_to_virt(ma)   ((void *)((unsigned long)(ma)+PAGE_OFFSET))
   27.10 -/* Shorthand versions of the above functions. */
   27.11  #define __pa(x)             (virt_to_maddr(x))
   27.12  #define __va(x)             (maddr_to_virt(x))
   27.13  
   27.14 @@ -280,21 +276,19 @@ typedef struct { u64 pfn; } pagetable_t;
   27.15  
   27.16  
   27.17  #ifndef __ASSEMBLY__
   27.18 +extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
   27.19  #if CONFIG_PAGING_LEVELS == 3
   27.20 -extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
   27.21 -extern l3_pgentry_t   idle_pg_table_l3[ROOT_PAGETABLE_ENTRIES];
   27.22 -extern l2_pgentry_t   idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES*L2_PAGETABLE_ENTRIES];
   27.23 -#else
   27.24 -extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
   27.25 -extern l2_pgentry_t   idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES];
   27.26 -#ifdef CONFIG_COMPAT
   27.27 +extern l2_pgentry_t   idle_pg_table_l2[
   27.28 +    ROOT_PAGETABLE_ENTRIES * L2_PAGETABLE_ENTRIES];
   27.29 +#elif CONFIG_PAGING_LEVELS == 2
   27.30 +#define idle_pg_table_l2 idle_pg_table
   27.31 +#elif CONFIG_PAGING_LEVELS == 4
   27.32  extern l2_pgentry_t  *compat_idle_pg_table_l2;
   27.33  extern unsigned int   m2p_compat_vstart;
   27.34  #endif
   27.35 -#endif
   27.36  void paging_init(void);
   27.37  void setup_idle_pagetable(void);
   27.38 -#endif
   27.39 +#endif /* !defined(__ASSEMBLY__) */
   27.40  
   27.41  #define __pge_off()                                                     \
   27.42      do {                                                                \
    28.1 --- a/xen/include/asm-x86/x86_32/page.h	Thu May 10 16:22:27 2007 +0100
    28.2 +++ b/xen/include/asm-x86/x86_32/page.h	Thu May 10 18:02:55 2007 +0100
    28.3 @@ -3,6 +3,10 @@
    28.4  #define __X86_32_PAGE_H__
    28.5  
    28.6  #define __PAGE_OFFSET           (0xFF000000)
    28.7 +#define __XEN_VIRT_START        __PAGE_OFFSET
    28.8 +
    28.9 +#define virt_to_maddr(va) ((unsigned long)(va)-DIRECTMAP_VIRT_START)
   28.10 +#define maddr_to_virt(ma) ((void *)((unsigned long)(ma)+DIRECTMAP_VIRT_START))
   28.11  
   28.12  #define VADDR_BITS              32
   28.13  #define VADDR_MASK              (~0UL)
    29.1 --- a/xen/include/asm-x86/x86_64/page.h	Thu May 10 16:22:27 2007 +0100
    29.2 +++ b/xen/include/asm-x86/x86_64/page.h	Thu May 10 18:02:55 2007 +0100
    29.3 @@ -17,6 +17,7 @@
    29.4  #define ROOT_PAGETABLE_ENTRIES  L4_PAGETABLE_ENTRIES
    29.5  
    29.6  #define __PAGE_OFFSET           (0xFFFF830000000000)
    29.7 +#define __XEN_VIRT_START        (0xFFFF828C80000000)
    29.8  
    29.9  /* These are architectural limits. Current CPUs support only 40-bit phys. */
   29.10  #define PADDR_BITS              52
   29.11 @@ -31,6 +32,23 @@
   29.12  #include <xen/config.h>
   29.13  #include <asm/types.h>
   29.14  
   29.15 +/* Physical address where Xen was relocated to. */
   29.16 +extern unsigned long xen_phys_start;
   29.17 +
   29.18 +static inline unsigned long __virt_to_maddr(unsigned long va)
   29.19 +{
   29.20 +    ASSERT(va >= XEN_VIRT_START);
   29.21 +    ASSERT(va < DIRECTMAP_VIRT_END);
   29.22 +    ASSERT((va < XEN_VIRT_END) || (va >= DIRECTMAP_VIRT_START));
   29.23 +    if ( va > DIRECTMAP_VIRT_START )
   29.24 +        return va - DIRECTMAP_VIRT_START;
   29.25 +    return va - XEN_VIRT_START + xen_phys_start;
   29.26 +}
   29.27 +#define virt_to_maddr(va)       \
   29.28 +    (__virt_to_maddr((unsigned long)(va)))
   29.29 +#define maddr_to_virt(ma)       \
   29.30 +    ((void *)((unsigned long)(ma)+DIRECTMAP_VIRT_START))
   29.31 +
   29.32  /* read access (should only be used for debug printk's) */
   29.33  typedef u64 intpte_t;
   29.34  #define PRIpte "016lx"