direct-io.hg

changeset 5237:6ab1fc3f5598

bitkeeper revision 1.1615.1.1 (429c8530L_ZtVuxsQcKwKB3yPuljog)

Subject: PAE support

This patch adds initial support for PAE paging to xen.
This patch does:

* boot Xen itself with PAE paging enabled.
* add PAE support to the dom0 domain builder.

Some notes on the design and the changes:

* There are two new config options: CONFIG_X86_PAE (boolean,
same name Linux uses to simply things) and
CONFIG_PAGING_LEVELS (int, possible values are 2,3,4). I've
used #if CONFIG_PAGING_LEVELS for stuff which simply depends
on the number of paging levels in the code common for
x86-32/64, and CONFIG_X86_PAE for special PAE quirks or
i386-only stuff. I've tried to avoid ifdefs if possible
though, often I rearranged code to make it work in both
PAE and non-PAE case instead.

* idle_pg_table: 3rd level is statically initialized, 2nd
level is contignous in physical and virtual memory, so it can
be addressed linear (the dom0 builder uses the same trick to
simplify things a bit btw.). There are two new symbols:
idle_pg_table_l3 and idle_pg_table_l2 for the two tables.
idle_pg_table is aliased to the toplevel page table, i.e.
idle_pg_table_l3 in PAE mode and idle_pg_table_l2 in non-pae
mode. The idle l3 table is actually never ever touched after
boot, the l2 table is accessed via idle_pg_table_l2 and
addressed linear in both PAE and non-PAE mode.

* I've added a "intpte_t" type and a PRIpte define, modeled
after the C99 inttypes.h header, for page table entries.

Signed-off-by: Gerd Knorr <kraxel@bytesex.org>
author kaf24@firebug.cl.cam.ac.uk
date Tue May 31 15:39:28 2005 +0000 (2005-05-31)
parents 1af7f0708b54
children f85f27fa69ff
files .rootkeys xen/arch/x86/audit.c xen/arch/x86/boot/x86_32.S xen/arch/x86/dom0_ops.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/idle0_task.c xen/arch/x86/mm.c xen/arch/x86/setup.c xen/arch/x86/shadow.c xen/arch/x86/traps.c xen/arch/x86/vmx.c xen/arch/x86/vmx_io.c xen/arch/x86/vmx_vmcs.c xen/arch/x86/x86_32/domain_page.c xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/mm.c xen/include/asm-x86/config.h xen/include/asm-x86/domain.h xen/include/asm-x86/mm.h xen/include/asm-x86/page.h xen/include/asm-x86/shadow.h xen/include/asm-x86/smp.h xen/include/asm-x86/types.h xen/include/asm-x86/x86_32/page-2level.h xen/include/asm-x86/x86_32/page-3level.h xen/include/asm-x86/x86_32/page.h xen/include/asm-x86/x86_64/page.h xen/include/public/arch-x86_32.h
line diff
     1.1 --- a/.rootkeys	Tue May 31 15:20:43 2005 +0000
     1.2 +++ b/.rootkeys	Tue May 31 15:39:28 2005 +0000
     1.3 @@ -1377,6 +1377,8 @@ 420951dcqyUCe_gXA_XJPu1ix_poKg xen/inclu
     1.4  41c0c412lQ0NVVN9PsOSznQ-qhOiPA xen/include/asm-x86/vmx_vmcs.h
     1.5  418fbcfe_WliJPToeVM-9VStvym-hw xen/include/asm-x86/x86_32/asm_defns.h
     1.6  3e20b82fl1jmQiKdLy7fxMcutfpjWA xen/include/asm-x86/x86_32/domain_page.h
     1.7 +429c852fi3pvfa9kIjryYK5AGBmXAg xen/include/asm-x86/x86_32/page-2level.h
     1.8 +429c852fskvSOgcD5EC25_m9um9t4g xen/include/asm-x86/x86_32/page-3level.h
     1.9  4208e2a3ZNFroNXbX9OYaOB-xtUyDQ xen/include/asm-x86/x86_32/page.h
    1.10  3ddb79c3mbqEM7QQr3zVq7NiBNhouA xen/include/asm-x86/x86_32/regs.h
    1.11  3e7f358aG11EvMI9VJ4_9hD4LUO7rQ xen/include/asm-x86/x86_32/string.h
     2.1 --- a/xen/arch/x86/audit.c	Tue May 31 15:20:43 2005 +0000
     2.2 +++ b/xen/arch/x86/audit.c	Tue May 31 15:39:28 2005 +0000
     2.3 @@ -408,9 +408,9 @@ int audit_adjust_pgtables(struct domain 
     2.4  
     2.5          for_each_exec_domain(d, ed)
     2.6          {
     2.7 -            if ( pagetable_val(ed->arch.guest_table) )
     2.8 +            if ( pagetable_get_phys(ed->arch.guest_table) )
     2.9                  adjust(&frame_table[pagetable_get_pfn(ed->arch.guest_table)], 1);
    2.10 -            if ( pagetable_val(ed->arch.shadow_table) )
    2.11 +            if ( pagetable_get_phys(ed->arch.shadow_table) )
    2.12                  adjust(&frame_table[pagetable_get_pfn(ed->arch.shadow_table)], 0);
    2.13              if ( ed->arch.monitor_shadow_ref )
    2.14                  adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
     3.1 --- a/xen/arch/x86/boot/x86_32.S	Tue May 31 15:20:43 2005 +0000
     3.2 +++ b/xen/arch/x86/boot/x86_32.S	Tue May 31 15:39:28 2005 +0000
     3.3 @@ -101,6 +101,22 @@ 1:      lss     stack_start-__PAGE_OFFSE
     3.4          xor     %eax,%eax
     3.5          rep     stosb
     3.6  
     3.7 +#ifdef CONFIG_X86_PAE
     3.8 +        /* Initialize low and high mappings of all memory with 2MB pages */
     3.9 +        mov     $idle_pg_table_l2-__PAGE_OFFSET,%edi
    3.10 +        mov     $0xe3,%eax                  /* PRESENT+RW+A+D+2MB */
    3.11 +1:      mov     %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
    3.12 +        stosl                                /* low mapping */
    3.13 +        add     $4,%edi
    3.14 +        add     $(1<<L2_PAGETABLE_SHIFT),%eax
    3.15 +        cmp     $DIRECTMAP_PHYS_END+0xe3,%eax
    3.16 +        jne     1b
    3.17 +1:      stosl   /* low mappings cover as much physmem as possible */
    3.18 +        add     $4,%edi
    3.19 +        add     $(1<<L2_PAGETABLE_SHIFT),%eax
    3.20 +        cmp     $__HYPERVISOR_VIRT_START+0xe3,%eax
    3.21 +        jne     1b
    3.22 +#else
    3.23          /* Initialize low and high mappings of all memory with 4MB pages */
    3.24          mov     $idle_pg_table-__PAGE_OFFSET,%edi
    3.25          mov     $0xe3,%eax                  /* PRESENT+RW+A+D+4MB */
    3.26 @@ -113,6 +129,7 @@ 1:      stosl   /* low mappings cover as
    3.27          add     $(1<<L2_PAGETABLE_SHIFT),%eax
    3.28          cmp     $__HYPERVISOR_VIRT_START+0xe3,%eax
    3.29          jne     1b
    3.30 +#endif
    3.31          
    3.32          /* Initialise IDT with simple error defaults. */
    3.33          lea     ignore_int,%edx
    3.34 @@ -204,10 +221,17 @@ ENTRY(gdt_table)
    3.35          .quad 0x0000000000000000     /* unused */
    3.36          .quad 0x00cf9a000000ffff     /* 0xe008 ring 0 4.00GB code at 0x0 */
    3.37          .quad 0x00cf92000000ffff     /* 0xe010 ring 0 4.00GB data at 0x0 */
    3.38 +#ifdef CONFIG_X86_PAE
    3.39 +        .quad 0x00cfba00000067ff
    3.40 +        .quad 0x00cfb200000067ff
    3.41 +        .quad 0x00cffa00000067ff
    3.42 +        .quad 0x00cff200000067ff
    3.43 +#else
    3.44          .quad 0x00cfba000000c3ff     /* 0xe019 ring 1 3.95GB code at 0x0 */
    3.45          .quad 0x00cfb2000000c3ff     /* 0xe021 ring 1 3.95GB data at 0x0 */
    3.46          .quad 0x00cffa000000c3ff     /* 0xe02b ring 3 3.95GB code at 0x0 */
    3.47          .quad 0x00cff2000000c3ff     /* 0xe033 ring 3 3.95GB data at 0x0 */
    3.48 +#endif
    3.49          .quad 0x0000000000000000     /* unused                           */
    3.50          .fill 2*NR_CPUS,8,0          /* space for TSS and LDT per CPU    */
    3.51  
    3.52 @@ -215,10 +239,27 @@ ENTRY(gdt_table)
    3.53  /* Maximum STACK_ORDER for x86/32 is 1. We must therefore ensure that the */
    3.54  /* CPU0 stack is aligned on an even page boundary!                        */
    3.55  ENTRY(cpu0_stack)
    3.56 +        .org 0x2000 + STACK_SIZE
    3.57  
    3.58 -        .org 0x2000 + STACK_SIZE
    3.59 +#ifdef CONFIG_X86_PAE
    3.60 +
    3.61  ENTRY(idle_pg_table)
    3.62 +ENTRY(idle_pg_table_l3)
    3.63 +        .quad 0x100000 + 0x2000 + STACK_SIZE + 1*PAGE_SIZE + 0x01
    3.64 +        .quad 0x100000 + 0x2000 + STACK_SIZE + 2*PAGE_SIZE + 0x01
    3.65 +        .quad 0x100000 + 0x2000 + STACK_SIZE + 3*PAGE_SIZE + 0x01
    3.66 +        .quad 0x100000 + 0x2000 + STACK_SIZE + 4*PAGE_SIZE + 0x01
    3.67 +        .org 0x2000 + STACK_SIZE + 1*PAGE_SIZE
    3.68 +ENTRY(idle_pg_table_l2)
    3.69 +        .org 0x2000 + STACK_SIZE + 5*PAGE_SIZE
    3.70  
    3.71 +#else /* CONFIG_X86_PAE */
    3.72 +
    3.73 +ENTRY(idle_pg_table)
    3.74 +ENTRY(idle_pg_table_l2) # Initial page directory is 4kB
    3.75          .org 0x2000 + STACK_SIZE + PAGE_SIZE
    3.76 +
    3.77 +#endif /* CONFIG_X86_PAE */
    3.78 +
    3.79  ENTRY(stext)
    3.80  ENTRY(_stext)
     4.1 --- a/xen/arch/x86/dom0_ops.c	Tue May 31 15:20:43 2005 +0000
     4.2 +++ b/xen/arch/x86/dom0_ops.c	Tue May 31 15:39:28 2005 +0000
     4.3 @@ -405,7 +405,7 @@ void arch_getdomaininfo_ctxt(
     4.4          c->flags |= VGCF_VMX_GUEST;
     4.5  #endif
     4.6  
     4.7 -    c->pt_base = pagetable_val(ed->arch.guest_table);
     4.8 +    c->pt_base = pagetable_get_phys(ed->arch.guest_table);
     4.9  
    4.10      c->vm_assist = ed->domain->vm_assist;
    4.11  }
     5.1 --- a/xen/arch/x86/domain.c	Tue May 31 15:20:43 2005 +0000
     5.2 +++ b/xen/arch/x86/domain.c	Tue May 31 15:39:28 2005 +0000
     5.3 @@ -460,7 +460,7 @@ int arch_set_info_guest(
     5.4          //      trust the VMX domain builder.  Xen should validate this
     5.5          //      page table, and/or build the table itself, or ???
     5.6          //
     5.7 -        if ( !pagetable_val(d->arch.phys_table) )
     5.8 +        if ( !pagetable_get_phys(d->arch.phys_table) )
     5.9              d->arch.phys_table = ed->arch.guest_table;
    5.10  
    5.11          if ( (error = vmx_final_setup_guest(ed, c)) )
    5.12 @@ -660,7 +660,7 @@ long do_switch_to_user(void)
    5.13      struct exec_domain    *ed = current;
    5.14  
    5.15      if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) ||
    5.16 -         unlikely(pagetable_val(ed->arch.guest_table_user) == 0) )
    5.17 +         unlikely(pagetable_get_phys(ed->arch.guest_table_user) == 0) )
    5.18          return -EFAULT;
    5.19  
    5.20      toggle_guest_mode(ed);
    5.21 @@ -978,7 +978,7 @@ void domain_relinquish_resources(struct 
    5.22      /* Drop the in-use references to page-table bases. */
    5.23      for_each_exec_domain ( d, ed )
    5.24      {
    5.25 -        if ( pagetable_val(ed->arch.guest_table) != 0 )
    5.26 +        if ( pagetable_get_phys(ed->arch.guest_table) != 0 )
    5.27          {
    5.28              if ( shadow_mode_refcounts(d) )
    5.29                  put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
    5.30 @@ -988,7 +988,7 @@ void domain_relinquish_resources(struct 
    5.31              ed->arch.guest_table = mk_pagetable(0);
    5.32          }
    5.33  
    5.34 -        if ( pagetable_val(ed->arch.guest_table_user) != 0 )
    5.35 +        if ( pagetable_get_phys(ed->arch.guest_table_user) != 0 )
    5.36          {
    5.37              if ( shadow_mode_refcounts(d) )
    5.38                  put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
     6.1 --- a/xen/arch/x86/domain_build.c	Tue May 31 15:20:43 2005 +0000
     6.2 +++ b/xen/arch/x86/domain_build.c	Tue May 31 15:39:28 2005 +0000
     6.3 @@ -44,15 +44,15 @@ boolean_param("dom0_translate", opt_dom0
     6.4  #if defined(__i386__)
     6.5  /* No ring-3 access in initial leaf page tables. */
     6.6  #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
     6.7 +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
     6.8 +#define L3_PROT (_PAGE_PRESENT)
     6.9  #elif defined(__x86_64__)
    6.10  /* Allow ring-3 access in long mode as guest cannot use ring 1. */
    6.11  #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    6.12 -#endif
    6.13 -/* Don't change these: Linux expects just these bits to be set. */
    6.14 -/* (And that includes the bogus _PAGE_DIRTY!) */
    6.15  #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    6.16  #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    6.17  #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    6.18 +#endif
    6.19  
    6.20  #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
    6.21  #define round_pgdown(_p)  ((_p)&PAGE_MASK)
    6.22 @@ -91,7 +91,11 @@ int construct_dom0(struct domain *d,
    6.23  #elif defined(__x86_64__)
    6.24      char *image_start  = __va(_image_start);
    6.25      char *initrd_start = __va(_initrd_start);
    6.26 +#endif
    6.27 +#if CONFIG_PAGING_LEVELS >= 4
    6.28      l4_pgentry_t *l4tab = NULL, *l4start = NULL;
    6.29 +#endif
    6.30 +#if CONFIG_PAGING_LEVELS >= 3
    6.31      l3_pgentry_t *l3tab = NULL, *l3start = NULL;
    6.32  #endif
    6.33      l2_pgentry_t *l2tab = NULL, *l2start = NULL;
    6.34 @@ -143,7 +147,7 @@ int construct_dom0(struct domain *d,
    6.35          panic("Not enough RAM for DOM0 reservation.\n");
    6.36      alloc_start = page_to_phys(page);
    6.37      alloc_end   = alloc_start + (d->tot_pages << PAGE_SHIFT);
    6.38 -    
    6.39 +
    6.40      if ( (rc = parseelfimage(&dsi)) != 0 )
    6.41          return rc;
    6.42  
    6.43 @@ -172,10 +176,15 @@ int construct_dom0(struct domain *d,
    6.44          v_end            = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
    6.45          if ( (v_end - vstack_end) < (512UL << 10) )
    6.46              v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
    6.47 -#if defined(__i386__)
    6.48 +#if defined(__i386__) && !defined(CONFIG_X86_PAE)
    6.49          if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> 
    6.50                 L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
    6.51              break;
    6.52 +#elif defined(__i386__) && defined(CONFIG_X86_PAE)
    6.53 +        /* 5 pages: 1x 3rd + 4x 2nd level */
    6.54 +        if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> 
    6.55 +               L2_PAGETABLE_SHIFT) + 5) <= nr_pt_pages )
    6.56 +            break;
    6.57  #elif defined(__x86_64__)
    6.58  #define NR(_l,_h,_s) \
    6.59      (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
    6.60 @@ -249,6 +258,24 @@ int construct_dom0(struct domain *d,
    6.61      }
    6.62  
    6.63      /* WARNING: The new domain must have its 'processor' field filled in! */
    6.64 +#if CONFIG_PAGING_LEVELS == 3
    6.65 +    l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
    6.66 +    l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE;
    6.67 +    memcpy(l2tab, idle_pg_table_l2, 4*PAGE_SIZE);
    6.68 +    for (i = 0; i < 4; i++) {
    6.69 +        l3tab[i] = l3e_create_phys((u32)l2tab + i*PAGE_SIZE, L3_PROT);
    6.70 +        l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
    6.71 +            l2e_create_phys((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
    6.72 +    }
    6.73 +    unsigned long v;
    6.74 +    for (v = PERDOMAIN_VIRT_START; v < PERDOMAIN_VIRT_END;
    6.75 +         v += (1 << L2_PAGETABLE_SHIFT)) {
    6.76 +        l2tab[v >> L2_PAGETABLE_SHIFT] =
    6.77 +            l2e_create_phys(__pa(d->arch.mm_perdomain_pt) + (v-PERDOMAIN_VIRT_START),
    6.78 +                            __PAGE_HYPERVISOR);
    6.79 +    }
    6.80 +    ed->arch.guest_table = mk_pagetable((unsigned long)l3start);
    6.81 +#else
    6.82      l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
    6.83      memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
    6.84      l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
    6.85 @@ -256,8 +283,9 @@ int construct_dom0(struct domain *d,
    6.86      l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
    6.87          l2e_create_phys(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
    6.88      ed->arch.guest_table = mk_pagetable((unsigned long)l2start);
    6.89 +#endif
    6.90  
    6.91 -    l2tab += l2_table_offset(dsi.v_start);
    6.92 +    l2tab += l2_linear_offset(dsi.v_start);
    6.93      mfn = alloc_start >> PAGE_SHIFT;
    6.94      for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
    6.95      {
    6.96 @@ -282,8 +310,8 @@ int construct_dom0(struct domain *d,
    6.97      }
    6.98  
    6.99      /* Pages that are part of page tables must be read only. */
   6.100 -    l2tab = l2start + l2_table_offset(vpt_start);
   6.101 -    l1start = l1tab = (l1_pgentry_t *)l2e_get_phys(*l2tab);
   6.102 +    l2tab = l2start + l2_linear_offset(vpt_start);
   6.103 +    l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_phys(*l2tab);
   6.104      l1tab += l1_table_offset(vpt_start);
   6.105      for ( count = 0; count < nr_pt_pages; count++ ) 
   6.106      {
   6.107 @@ -294,6 +322,34 @@ int construct_dom0(struct domain *d,
   6.108              if ( !get_page_type(page, PGT_writable_page) )
   6.109                  BUG();
   6.110  
   6.111 +#if CONFIG_PAGING_LEVELS == 3
   6.112 +        switch (count) {
   6.113 +        case 0:
   6.114 +            page->u.inuse.type_info &= ~PGT_type_mask;
   6.115 +            page->u.inuse.type_info |= PGT_l3_page_table;
   6.116 +            get_page(page, d); /* an extra ref because of readable mapping */
   6.117 +
   6.118 +            /* Get another ref to L3 page so that it can be pinned. */
   6.119 +            if ( !get_page_and_type(page, d, PGT_l3_page_table) )
   6.120 +                BUG();
   6.121 +            set_bit(_PGT_pinned, &page->u.inuse.type_info);
   6.122 +            break;
   6.123 +        case 1 ... 4:
   6.124 +            page->u.inuse.type_info &= ~PGT_type_mask;
   6.125 +            page->u.inuse.type_info |= PGT_l2_page_table;
   6.126 +            page->u.inuse.type_info |=
   6.127 +                (count-1) << PGT_va_shift;
   6.128 +            get_page(page, d); /* an extra ref because of readable mapping */
   6.129 +            break;
   6.130 +        default:
   6.131 +            page->u.inuse.type_info &= ~PGT_type_mask;
   6.132 +            page->u.inuse.type_info |= PGT_l1_page_table;
   6.133 +            page->u.inuse.type_info |= 
   6.134 +                ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-5))<<PGT_va_shift;
   6.135 +            get_page(page, d); /* an extra ref because of readable mapping */
   6.136 +            break;
   6.137 +        }
   6.138 +#else
   6.139          if ( count == 0 )
   6.140          {
   6.141              page->u.inuse.type_info &= ~PGT_type_mask;
   6.142 @@ -326,8 +382,9 @@ int construct_dom0(struct domain *d,
   6.143               */
   6.144              get_page(page, d); /* an extra ref because of readable mapping */
   6.145          }
   6.146 +#endif
   6.147          if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
   6.148 -            l1start = l1tab = (l1_pgentry_t *)l2e_get_phys(*++l2tab);
   6.149 +            l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_phys(*++l2tab);
   6.150      }
   6.151  
   6.152  #elif defined(__x86_64__)
   6.153 @@ -538,10 +595,8 @@ int construct_dom0(struct domain *d,
   6.154  
   6.155  #if defined(__i386__)
   6.156      /* Destroy low mappings - they were only for our convenience. */
   6.157 -    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
   6.158 -        if ( l2e_get_flags(l2start[i]) & _PAGE_PSE )
   6.159 -            l2start[i] = l2e_empty();
   6.160 -    zap_low_mappings(); /* Do the same for the idle page tables. */
   6.161 +    zap_low_mappings(l2start);
   6.162 +    zap_low_mappings(idle_pg_table_l2);
   6.163  #endif
   6.164      
   6.165      /* DOM0 gets access to everything. */
   6.166 @@ -558,6 +613,12 @@ int construct_dom0(struct domain *d,
   6.167                                 : SHM_enable));
   6.168          if ( opt_dom0_translate )
   6.169          {
   6.170 +#if defined(__i386__) && defined(CONFIG_X86_PAE)
   6.171 +            printk("FIXME: PAE code needed here: %s:%d (%s)\n",
   6.172 +                   __FILE__, __LINE__, __FUNCTION__);
   6.173 +            for ( ; ; )
   6.174 +                __asm__ __volatile__ ( "hlt" );
   6.175 +#else
   6.176              /* Hmm, what does this?
   6.177                 Looks like isn't portable across 32/64 bit and pae/non-pae ...
   6.178                 -- kraxel */
   6.179 @@ -573,13 +634,14 @@ int construct_dom0(struct domain *d,
   6.180              // so that we can easily access it.
   6.181              //
   6.182              ASSERT( root_get_value(idle_pg_table[1]) == 0 );
   6.183 -            ASSERT( pagetable_val(d->arch.phys_table) );
   6.184 +            ASSERT( pagetable_get_phys(d->arch.phys_table) );
   6.185              idle_pg_table[1] = root_create_phys(
   6.186 -                pagetable_val(d->arch.phys_table), __PAGE_HYPERVISOR);
   6.187 +                pagetable_get_phys(d->arch.phys_table), __PAGE_HYPERVISOR);
   6.188              translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT),
   6.189                                  pagetable_get_pfn(ed->arch.guest_table));
   6.190              idle_pg_table[1] = root_empty();
   6.191              local_flush_tlb();
   6.192 +#endif
   6.193          }
   6.194  
   6.195          update_pagetables(ed); /* XXX SMP */
     7.1 --- a/xen/arch/x86/idle0_task.c	Tue May 31 15:20:43 2005 +0000
     7.2 +++ b/xen/arch/x86/idle0_task.c	Tue May 31 15:39:28 2005 +0000
     7.3 @@ -11,8 +11,7 @@ struct domain idle0_domain = {
     7.4  
     7.5  struct exec_domain idle0_exec_domain = {
     7.6      processor:   0,
     7.7 -    domain:      &idle0_domain,
     7.8 -    arch:        IDLE0_ARCH_EXEC_DOMAIN
     7.9 +    domain:      &idle0_domain
    7.10  };
    7.11  
    7.12  struct tss_struct init_tss[NR_CPUS];
     8.1 --- a/xen/arch/x86/mm.c	Tue May 31 15:20:43 2005 +0000
     8.2 +++ b/xen/arch/x86/mm.c	Tue May 31 15:39:28 2005 +0000
     8.3 @@ -121,7 +121,8 @@
     8.4  static void free_l2_table(struct pfn_info *page);
     8.5  static void free_l1_table(struct pfn_info *page);
     8.6  
     8.7 -static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long);
     8.8 +static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long,
     8.9 +                        unsigned int type);
    8.10  static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t);
    8.11  
    8.12  /* Used to defer flushing of memory structures. */
    8.13 @@ -149,21 +150,22 @@ unsigned long max_page;
    8.14  
    8.15  void __init init_frametable(void)
    8.16  {
    8.17 -    unsigned long i, p;
    8.18 +    unsigned long i, p, step;
    8.19  
    8.20      frame_table      = (struct pfn_info *)FRAMETABLE_VIRT_START;
    8.21      frame_table_size = max_page * sizeof(struct pfn_info);
    8.22      frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
    8.23  
    8.24 -    for ( i = 0; i < frame_table_size; i += (4UL << 20) )
    8.25 +    step = (1 << L2_PAGETABLE_SHIFT);
    8.26 +    for ( i = 0; i < frame_table_size; i += step )
    8.27      {
    8.28 -        p = alloc_boot_pages(min(frame_table_size - i, 4UL << 20), 4UL << 20);
    8.29 +        p = alloc_boot_pages(min(frame_table_size - i, step), step);
    8.30          if ( p == 0 )
    8.31              panic("Not enough memory for frame table\n");
    8.32          map_pages_to_xen(
    8.33              FRAMETABLE_VIRT_START + i,
    8.34              p >> PAGE_SHIFT,
    8.35 -            4UL << (20-PAGE_SHIFT),
    8.36 +            step >> PAGE_SHIFT,
    8.37              PAGE_HYPERVISOR);
    8.38      }
    8.39  
    8.40 @@ -232,7 +234,7 @@ void arch_init_memory(void)
    8.41  
    8.42  void write_ptbase(struct exec_domain *ed)
    8.43  {
    8.44 -    write_cr3(pagetable_val(ed->arch.monitor_table));
    8.45 +    write_cr3(pagetable_get_phys(ed->arch.monitor_table));
    8.46  }
    8.47  
    8.48  void invalidate_shadow_ldt(struct exec_domain *d)
    8.49 @@ -375,7 +377,6 @@ static int get_page_and_type_from_pagenr
    8.50      return 1;
    8.51  }
    8.52  
    8.53 -
    8.54  /*
    8.55   * We allow root tables to map each other (a.k.a. linear page tables). It
    8.56   * needs some special care with reference counts and access permissions:
    8.57 @@ -432,7 +433,6 @@ get_linear_pagetable(
    8.58      return 1;
    8.59  }
    8.60  
    8.61 -
    8.62  int
    8.63  get_page_from_l1e(
    8.64      l1_pgentry_t l1e, struct domain *d)
    8.65 @@ -446,8 +446,7 @@ get_page_from_l1e(
    8.66  
    8.67      if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
    8.68      {
    8.69 -        MEM_LOG("Bad L1 type settings %lx %lx", l1e_get_value(l1e),
    8.70 -                l1e_get_value(l1e) & L1_DISALLOW_MASK);
    8.71 +        MEM_LOG("Bad L1 flags %x\n", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
    8.72          return 0;
    8.73      }
    8.74  
    8.75 @@ -482,7 +481,7 @@ get_page_from_l1e(
    8.76  static int 
    8.77  get_page_from_l2e(
    8.78      l2_pgentry_t l2e, unsigned long pfn,
    8.79 -    struct domain *d, unsigned long va_idx)
    8.80 +    struct domain *d, unsigned long vaddr)
    8.81  {
    8.82      int rc;
    8.83  
    8.84 @@ -493,45 +492,58 @@ get_page_from_l2e(
    8.85  
    8.86      if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
    8.87      {
    8.88 -        MEM_LOG("Bad L2 page type settings %lx",
    8.89 -                l2e_get_value(l2e) & L2_DISALLOW_MASK);
    8.90 +        MEM_LOG("Bad L2 flags %x\n", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
    8.91          return 0;
    8.92      }
    8.93  
    8.94 +    vaddr >>= L2_PAGETABLE_SHIFT;
    8.95 +    vaddr <<= PGT_va_shift;
    8.96      rc = get_page_and_type_from_pagenr(
    8.97 -        l2e_get_pfn(l2e), 
    8.98 -        PGT_l1_page_table | (va_idx<<PGT_va_shift), d);
    8.99 -
   8.100 -#if defined(__i386__)
   8.101 -    return rc ? rc : get_linear_pagetable(l2e, pfn, d);
   8.102 -#elif defined(__x86_64__)
   8.103 +        l2e_get_pfn(l2e), PGT_l1_page_table | vaddr, d);
   8.104 +
   8.105 +#if CONFIG_PAGING_LEVELS == 2
   8.106 +    if (!rc)
   8.107 +        rc = get_linear_pagetable(l2e, pfn, d);
   8.108 +#endif
   8.109      return rc;
   8.110 -#endif
   8.111  }
   8.112  
   8.113  
   8.114 -#ifdef __x86_64__
   8.115 +#if CONFIG_PAGING_LEVELS >= 3
   8.116  
   8.117  static int 
   8.118  get_page_from_l3e(
   8.119 -    l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
   8.120 +    l3_pgentry_t l3e, unsigned long pfn,
   8.121 +    struct domain *d, unsigned long vaddr)
   8.122  {
   8.123      ASSERT( !shadow_mode_refcounts(d) );
   8.124  
   8.125 +    int rc;
   8.126 +
   8.127      if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
   8.128          return 1;
   8.129  
   8.130      if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) )
   8.131      {
   8.132 -        MEM_LOG("Bad L3 page type settings %lx",
   8.133 -                l3e_get_value(l3e) & L3_DISALLOW_MASK);
   8.134 +        MEM_LOG("Bad L3 flags %x\n", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
   8.135          return 0;
   8.136      }
   8.137  
   8.138 -    return get_page_and_type_from_pagenr(
   8.139 -        l3e_get_pfn(l3e), PGT_l2_page_table, d);
   8.140 +    vaddr >>= L3_PAGETABLE_SHIFT;
   8.141 +    vaddr <<= PGT_va_shift;
   8.142 +    rc = get_page_and_type_from_pagenr(
   8.143 +        l3e_get_pfn(l3e),
   8.144 +        PGT_l2_page_table | vaddr, d);
   8.145 +#if CONFIG_PAGING_LEVELS == 3
   8.146 +    if (!rc)
   8.147 +        rc = get_linear_pagetable(l3e, pfn, d);
   8.148 +#endif
   8.149 +    return rc;
   8.150  }
   8.151  
   8.152 +#endif /* 3 level */
   8.153 +
   8.154 +#if CONFIG_PAGING_LEVELS >= 4
   8.155  
   8.156  static int 
   8.157  get_page_from_l4e(
   8.158 @@ -546,8 +558,7 @@ get_page_from_l4e(
   8.159  
   8.160      if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
   8.161      {
   8.162 -        MEM_LOG("Bad L4 page type settings %lx",
   8.163 -                l4e_get_value(l4e) & L4_DISALLOW_MASK);
   8.164 +        MEM_LOG("Bad L4 flags %x\n", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
   8.165          return 0;
   8.166      }
   8.167  
   8.168 @@ -560,7 +571,7 @@ get_page_from_l4e(
   8.169      return 1;
   8.170  }
   8.171  
   8.172 -#endif /* __x86_64__ */
   8.173 +#endif /* 4 level */
   8.174  
   8.175  
   8.176  void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
   8.177 @@ -622,7 +633,7 @@ static void put_page_from_l2e(l2_pgentry
   8.178  }
   8.179  
   8.180  
   8.181 -#ifdef __x86_64__
   8.182 +#if CONFIG_PAGING_LEVELS >= 3
   8.183  
   8.184  static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
   8.185  {
   8.186 @@ -631,6 +642,9 @@ static void put_page_from_l3e(l3_pgentry
   8.187          put_page_and_type(&frame_table[l3e_get_pfn(l3e)]);
   8.188  }
   8.189  
   8.190 +#endif
   8.191 +
   8.192 +#if CONFIG_PAGING_LEVELS >= 4
   8.193  
   8.194  static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
   8.195  {
   8.196 @@ -639,7 +653,7 @@ static void put_page_from_l4e(l4_pgentry
   8.197          put_page_and_type(&frame_table[l4e_get_pfn(l4e)]);
   8.198  }
   8.199  
   8.200 -#endif /* __x86_64__ */
   8.201 +#endif
   8.202  
   8.203  
   8.204  static int alloc_l1_table(struct pfn_info *page)
   8.205 @@ -670,11 +684,61 @@ static int alloc_l1_table(struct pfn_inf
   8.206      return 0;
   8.207  }
   8.208  
   8.209 -
   8.210 -static int alloc_l2_table(struct pfn_info *page)
   8.211 +#ifdef CONFIG_X86_PAE
   8.212 +static inline int fixup_pae_linear_mappings(l3_pgentry_t *pl3e)
   8.213 +{
   8.214 +    l2_pgentry_t *pl2e;
   8.215 +    unsigned long vaddr;
   8.216 +    int i,idx;
   8.217 +
   8.218 +    while ((unsigned long)pl3e & ~PAGE_MASK)
   8.219 +        pl3e--;
   8.220 +
   8.221 +    if (!(l3e_get_flags(pl3e[3]) & _PAGE_PRESENT)) {
   8.222 +        printk("Installing a L3 PAE pt without L2 in slot #3 isn't going to fly ...\n");
   8.223 +        return 0;
   8.224 +    }
   8.225 +
   8.226 +    pl2e = map_domain_mem(l3e_get_phys(pl3e[3]));
   8.227 +    for (i = 0; i < 4; i++) {
   8.228 +        vaddr = LINEAR_PT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
   8.229 +        idx = (vaddr >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES-1);
   8.230 +        if (l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) {
   8.231 +            pl2e[idx] = l2e_create_phys(l3e_get_phys(pl3e[i]),
   8.232 +                                        __PAGE_HYPERVISOR);
   8.233 +        } else
   8.234 +            pl2e[idx] = l2e_empty();
   8.235 +    }
   8.236 +    unmap_domain_mem(pl2e);
   8.237 +
   8.238 +    return 1;
   8.239 +}
   8.240 +
   8.241 +static inline unsigned long fixup_pae_vaddr(unsigned long l2vaddr,
   8.242 +                                            unsigned long l2type)
   8.243 +{
   8.244 +    unsigned long l3vaddr;
   8.245 +    
   8.246 +    if ((l2type & PGT_va_mask) == PGT_va_unknown) {
   8.247 +        printk("%s: hooking one l2 pt into multiple l3 slots isn't allowed, sorry\n",
   8.248 +               __FUNCTION__);
   8.249 +        domain_crash();
   8.250 +    }
   8.251 +    l3vaddr = ((l2type & PGT_va_mask) >> PGT_va_shift)
   8.252 +        << L3_PAGETABLE_SHIFT;
   8.253 +    return l3vaddr + l2vaddr;
   8.254 +}
   8.255 +
   8.256 +#else
   8.257 +# define fixup_pae_linear_mappings(unused) (1)
   8.258 +# define fixup_pae_vaddr(vaddr, type) (vaddr)
   8.259 +#endif
   8.260 +
   8.261 +static int alloc_l2_table(struct pfn_info *page, unsigned int type)
   8.262  {
   8.263      struct domain *d = page_get_owner(page);
   8.264      unsigned long  pfn = page_to_pfn(page);
   8.265 +    unsigned long  vaddr;
   8.266      l2_pgentry_t  *pl2e;
   8.267      int            i;
   8.268  
   8.269 @@ -682,21 +746,24 @@ static int alloc_l2_table(struct pfn_inf
   8.270      if ( (PGT_base_page_table == PGT_l2_page_table) &&
   8.271           unlikely(shadow_mode_refcounts(d)) )
   8.272          return 1;
   8.273 -
   8.274      ASSERT( !shadow_mode_refcounts(d) );
   8.275     
   8.276 +    
   8.277      pl2e = map_domain_mem(pfn << PAGE_SHIFT);
   8.278  
   8.279 -    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   8.280 -        if ( is_guest_l2_slot(i) &&
   8.281 -             unlikely(!get_page_from_l2e(pl2e[i], pfn, d, i)) )
   8.282 +    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) {
   8.283 +        vaddr = i << L2_PAGETABLE_SHIFT;
   8.284 +        vaddr = fixup_pae_vaddr(vaddr,type);
   8.285 +        if ( is_guest_l2_slot(type, i) &&
   8.286 +             unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) )
   8.287              goto fail;
   8.288 -
   8.289 -#if defined(__i386__)
   8.290 +    }
   8.291 +
   8.292 +#if CONFIG_PAGING_LEVELS == 2
   8.293      /* Xen private mappings. */
   8.294 -    memcpy(&pl2e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
   8.295 -           &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
   8.296 -           ROOT_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
   8.297 +    memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
   8.298 +           &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
   8.299 +           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
   8.300      pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
   8.301          l2e_create_pfn(pfn, __PAGE_HYPERVISOR);
   8.302      pl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
   8.303 @@ -704,13 +771,31 @@ static int alloc_l2_table(struct pfn_inf
   8.304              virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt),
   8.305              __PAGE_HYPERVISOR);
   8.306  #endif
   8.307 +#if CONFIG_PAGING_LEVELS == 3
   8.308 +    if (3 == ((type & PGT_va_mask) >> PGT_va_shift)) {
   8.309 +        unsigned long v,src,dst;
   8.310 +        void *virt;
   8.311 +        /* Xen private mappings. */
   8.312 +        dst = L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1);
   8.313 +        src = L2_PAGETABLE_FIRST_XEN_SLOT;
   8.314 +        memcpy(&pl2e[dst], &idle_pg_table_l2[src],
   8.315 +               L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
   8.316 +        for (v = PERDOMAIN_VIRT_START; v < PERDOMAIN_VIRT_END;
   8.317 +             v += (1 << L2_PAGETABLE_SHIFT)) {
   8.318 +            dst = (v >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES-1);
   8.319 +            virt = page_get_owner(page)->arch.mm_perdomain_pt + (v-PERDOMAIN_VIRT_START);
   8.320 +            pl2e[dst] = l2e_create_page(virt_to_page(virt), __PAGE_HYPERVISOR);
   8.321 +        }
   8.322 +        /* see fixup_pae_linear_mappings() for linear pagetables */
   8.323 +    }
   8.324 +#endif
   8.325  
   8.326      unmap_domain_mem(pl2e);
   8.327      return 1;
   8.328  
   8.329   fail:
   8.330      while ( i-- > 0 )
   8.331 -        if ( is_guest_l2_slot(i) )
   8.332 +        if ( is_guest_l2_slot(type, i) )
   8.333              put_page_from_l2e(pl2e[i], pfn);
   8.334  
   8.335      unmap_domain_mem(pl2e);
   8.336 @@ -718,22 +803,29 @@ static int alloc_l2_table(struct pfn_inf
   8.337  }
   8.338  
   8.339  
   8.340 -#ifdef __x86_64__
   8.341 +#if CONFIG_PAGING_LEVELS >= 3
   8.342  
   8.343  static int alloc_l3_table(struct pfn_info *page)
   8.344  {
   8.345      struct domain *d = page_get_owner(page);
   8.346      unsigned long  pfn = page_to_pfn(page);
   8.347 -    l3_pgentry_t  *pl3e = page_to_virt(page);
   8.348 +    unsigned long  vaddr;
   8.349 +    l3_pgentry_t  *pl3e;
   8.350      int            i;
   8.351  
   8.352      ASSERT( !shadow_mode_refcounts(d) );
   8.353  
   8.354 -    for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
   8.355 +    pl3e = map_domain_mem(pfn << PAGE_SHIFT);
   8.356 +    for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) {
   8.357 +        vaddr = i << L3_PAGETABLE_SHIFT;
   8.358          if ( is_guest_l3_slot(i) &&
   8.359 -             unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
   8.360 +             unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) )
   8.361              goto fail;
   8.362 -
   8.363 +    }
   8.364 +
   8.365 +    if (!fixup_pae_linear_mappings(pl3e))
   8.366 +        goto fail;
   8.367 +    unmap_domain_mem(pl3e);
   8.368      return 1;
   8.369  
   8.370   fail:
   8.371 @@ -741,9 +833,13 @@ static int alloc_l3_table(struct pfn_inf
   8.372          if ( is_guest_l3_slot(i) )
   8.373              put_page_from_l3e(pl3e[i], pfn);
   8.374  
   8.375 +    unmap_domain_mem(pl3e);
   8.376      return 0;
   8.377  }
   8.378  
   8.379 +#endif
   8.380 +
   8.381 +#if CONFIG_PAGING_LEVELS >= 4
   8.382  
   8.383  static int alloc_l4_table(struct pfn_info *page)
   8.384  {
   8.385 @@ -813,27 +909,35 @@ static void free_l2_table(struct pfn_inf
   8.386  
   8.387      pl2e = map_domain_mem(pfn << PAGE_SHIFT);
   8.388  
   8.389 -    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   8.390 -        if ( is_guest_l2_slot(i) )
   8.391 +    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) {
   8.392 +        if ( is_guest_l2_slot(page->u.inuse.type_info, i) )
   8.393              put_page_from_l2e(pl2e[i], pfn);
   8.394 +    }
   8.395  
   8.396      unmap_domain_mem(pl2e);
   8.397  }
   8.398  
   8.399  
   8.400 -#ifdef __x86_64__
   8.401 +#if CONFIG_PAGING_LEVELS >= 3
   8.402  
   8.403  static void free_l3_table(struct pfn_info *page)
   8.404  {
   8.405      unsigned long pfn = page_to_pfn(page);
   8.406 -    l3_pgentry_t *pl3e = page_to_virt(page);
   8.407 +    l3_pgentry_t *pl3e;
   8.408      int           i;
   8.409  
   8.410 +    pl3e = map_domain_mem(pfn << PAGE_SHIFT);
   8.411 +
   8.412      for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
   8.413          if ( is_guest_l3_slot(i) )
   8.414              put_page_from_l3e(pl3e[i], pfn);
   8.415 +
   8.416 +    unmap_domain_mem(pl3e);
   8.417  }
   8.418  
   8.419 +#endif
   8.420 +
   8.421 +#if CONFIG_PAGING_LEVELS >= 4
   8.422  
   8.423  static void free_l4_table(struct pfn_info *page)
   8.424  {
   8.425 @@ -846,25 +950,24 @@ static void free_l4_table(struct pfn_inf
   8.426              put_page_from_l4e(pl4e[i], pfn);
   8.427  }
   8.428  
   8.429 -#endif /* __x86_64__ */
   8.430 -
   8.431 +#endif
   8.432  
   8.433  static inline int update_l1e(l1_pgentry_t *pl1e, 
   8.434                               l1_pgentry_t  ol1e, 
   8.435                               l1_pgentry_t  nl1e)
   8.436  {
   8.437 -    /* FIXME: breaks with PAE */
   8.438 -    unsigned long o = l1e_get_value(ol1e);
   8.439 -    unsigned long n = l1e_get_value(nl1e);
   8.440 +    intpte_t o = l1e_get_value(ol1e);
   8.441 +    intpte_t n = l1e_get_value(nl1e);
   8.442  
   8.443      if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
   8.444           unlikely(o != l1e_get_value(ol1e)) )
   8.445      {
   8.446 -        MEM_LOG("Failed to update %lx -> %lx: saw %lx",
   8.447 -                l1e_get_value(ol1e), l1e_get_value(nl1e), o);
   8.448 +        MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte ": saw %" PRIpte "\n",
   8.449 +                l1e_get_value(ol1e),
   8.450 +                l1e_get_value(nl1e),
   8.451 +                o);
   8.452          return 0;
   8.453      }
   8.454 -
   8.455      return 1;
   8.456  }
   8.457  
   8.458 @@ -885,8 +988,8 @@ static int mod_l1_entry(l1_pgentry_t *pl
   8.459      {
   8.460          if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
   8.461          {
   8.462 -            MEM_LOG("Bad L1 type settings %lx", 
   8.463 -                    l1e_get_value(nl1e) & L1_DISALLOW_MASK);
   8.464 +            MEM_LOG("Bad L1 type settings %" PRIpte "\n", 
   8.465 +                    (l1e_get_value(nl1e) & L1_DISALLOW_MASK));
   8.466              return 0;
   8.467          }
   8.468  
   8.469 @@ -913,25 +1016,27 @@ static int mod_l1_entry(l1_pgentry_t *pl
   8.470      return 1;
   8.471  }
   8.472  
   8.473 -
   8.474  #define UPDATE_ENTRY(_t,_p,_o,_n) ({                                    \
   8.475 -    unsigned long __o = cmpxchg((unsigned long *)(_p),                  \
   8.476 -                                _t ## e_get_value(_o),                  \
   8.477 -                                _t ## e_get_value(_n));                 \
   8.478 +    intpte_t __o = cmpxchg((intpte_t *)(_p),                            \
   8.479 +                           _t ## e_get_value(_o),                       \
   8.480 +                           _t ## e_get_value(_n));                      \
   8.481      if ( __o != _t ## e_get_value(_o) )                                 \
   8.482 -        MEM_LOG("Failed to update %lx -> %lx: saw %lx",                 \
   8.483 -                _t ## e_get_value(_o), _t ## e_get_value(_n), __o);     \
   8.484 +        MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte ": saw %" PRIpte "", \
   8.485 +                (_t ## e_get_value(_o)),                                \
   8.486 +                (_t ## e_get_value(_n)),                                \
   8.487 +                (__o));                                                 \
   8.488      (__o == _t ## e_get_value(_o)); })
   8.489  
   8.490 -
   8.491  /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
   8.492  static int mod_l2_entry(l2_pgentry_t *pl2e, 
   8.493                          l2_pgentry_t nl2e, 
   8.494 -                        unsigned long pfn)
   8.495 +                        unsigned long pfn,
   8.496 +                        unsigned int type)
   8.497  {
   8.498      l2_pgentry_t ol2e;
   8.499 -
   8.500 -    if ( unlikely(!is_guest_l2_slot(pgentry_ptr_to_slot(pl2e))) )
   8.501 +    unsigned long vaddr;
   8.502 +
   8.503 +    if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) )
   8.504      {
   8.505          MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
   8.506          return 0;
   8.507 @@ -944,8 +1049,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
   8.508      {
   8.509          if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
   8.510          {
   8.511 -            MEM_LOG("Bad L2 type settings %lx", 
   8.512 -                    l2e_get_value(nl2e) & L2_DISALLOW_MASK);
   8.513 +            MEM_LOG("Bad L2 type settings %" PRIpte "\n", 
   8.514 +                    (l2e_get_value(nl2e) & L2_DISALLOW_MASK));
   8.515              return 0;
   8.516          }
   8.517  
   8.518 @@ -953,9 +1058,10 @@ static int mod_l2_entry(l2_pgentry_t *pl
   8.519          if ( !l2e_has_changed(&ol2e, &nl2e, _PAGE_PRESENT))
   8.520              return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e);
   8.521  
   8.522 -        if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain,
   8.523 -                                        ((unsigned long)pl2e & 
   8.524 -                                         ~PAGE_MASK) >> 2)) )
   8.525 +        vaddr = (((unsigned long)pl2e & ~PAGE_MASK) / sizeof(l2_pgentry_t))
   8.526 +            << L2_PAGETABLE_SHIFT;
   8.527 +        vaddr = fixup_pae_vaddr(vaddr,type);
   8.528 +        if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, vaddr)) )
   8.529              return 0;
   8.530  
   8.531          if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
   8.532 @@ -975,7 +1081,7 @@ static int mod_l2_entry(l2_pgentry_t *pl
   8.533  }
   8.534  
   8.535  
   8.536 -#ifdef __x86_64__
   8.537 +#if CONFIG_PAGING_LEVELS >= 3
   8.538  
   8.539  /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
   8.540  static int mod_l3_entry(l3_pgentry_t *pl3e, 
   8.541 @@ -983,6 +1089,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
   8.542                          unsigned long pfn)
   8.543  {
   8.544      l3_pgentry_t ol3e;
   8.545 +    unsigned long vaddr;
   8.546  
   8.547      if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
   8.548      {
   8.549 @@ -997,8 +1104,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
   8.550      {
   8.551          if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) )
   8.552          {
   8.553 -            MEM_LOG("Bad L3 type settings %lx", 
   8.554 -                    l3e_get_value(nl3e) & L3_DISALLOW_MASK);
   8.555 +            MEM_LOG("Bad L3 type settings %" PRIpte "", 
   8.556 +                    (u64)(l3e_get_value(nl3e) & L3_DISALLOW_MASK));
   8.557              return 0;
   8.558          }
   8.559  
   8.560 @@ -1006,26 +1113,33 @@ static int mod_l3_entry(l3_pgentry_t *pl
   8.561          if (!l3e_has_changed(&ol3e, &nl3e, _PAGE_PRESENT))
   8.562              return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e);
   8.563  
   8.564 -        if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) )
   8.565 +        vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t))
   8.566 +            << L3_PAGETABLE_SHIFT;
   8.567 +        if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
   8.568              return 0;
   8.569  
   8.570 -        if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
   8.571 +        if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e) ||
   8.572 +                      !fixup_pae_linear_mappings(pl3e)) )
   8.573          {
   8.574              put_page_from_l3e(nl3e, pfn);
   8.575              return 0;
   8.576          }
   8.577 -        
   8.578 +
   8.579          put_page_from_l3e(ol3e, pfn);
   8.580          return 1;
   8.581      }
   8.582  
   8.583 -    if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
   8.584 +    if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e) ||
   8.585 +                  !fixup_pae_linear_mappings(pl3e)) )
   8.586          return 0;
   8.587  
   8.588      put_page_from_l3e(ol3e, pfn);
   8.589      return 1;
   8.590  }
   8.591  
   8.592 +#endif
   8.593 +
   8.594 +#if CONFIG_PAGING_LEVELS >= 4
   8.595  
   8.596  /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
   8.597  static int mod_l4_entry(l4_pgentry_t *pl4e, 
   8.598 @@ -1076,20 +1190,21 @@ static int mod_l4_entry(l4_pgentry_t *pl
   8.599      return 1;
   8.600  }
   8.601  
   8.602 -#endif /* __x86_64__ */
   8.603 -
   8.604 +#endif
   8.605  
   8.606  int alloc_page_type(struct pfn_info *page, unsigned int type)
   8.607  {
   8.608 -    switch ( type )
   8.609 +    switch ( type & PGT_type_mask )
   8.610      {
   8.611      case PGT_l1_page_table:
   8.612          return alloc_l1_table(page);
   8.613      case PGT_l2_page_table:
   8.614 -        return alloc_l2_table(page);
   8.615 -#ifdef __x86_64__
   8.616 +        return alloc_l2_table(page, type);
   8.617 +#if CONFIG_PAGING_LEVELS >= 3
   8.618      case PGT_l3_page_table:
   8.619          return alloc_l3_table(page);
   8.620 +#endif
   8.621 +#if CONFIG_PAGING_LEVELS >= 4
   8.622      case PGT_l4_page_table:
   8.623          return alloc_l4_table(page);
   8.624  #endif
   8.625 @@ -1124,7 +1239,7 @@ void free_page_type(struct pfn_info *pag
   8.626          }
   8.627      }
   8.628  
   8.629 -    switch ( type )
   8.630 +    switch (type  & PGT_type_mask)
   8.631      {
   8.632      case PGT_l1_page_table:
   8.633          free_l1_table(page);
   8.634 @@ -1134,17 +1249,21 @@ void free_page_type(struct pfn_info *pag
   8.635          free_l2_table(page);
   8.636          break;
   8.637  
   8.638 -#ifdef __x86_64__
   8.639 +#if CONFIG_PAGING_LEVELS >= 3
   8.640      case PGT_l3_page_table:
   8.641          free_l3_table(page);
   8.642          break;
   8.643 -
   8.644 +#endif
   8.645 +
   8.646 +#if CONFIG_PAGING_LEVELS >= 4
   8.647      case PGT_l4_page_table:
   8.648          free_l4_table(page);
   8.649          break;
   8.650  #endif
   8.651  
   8.652      default:
   8.653 +        printk("%s: type %x pfn %lx\n",__FUNCTION__,
   8.654 +               type, page_to_pfn(page));
   8.655          BUG();
   8.656      }
   8.657  }
   8.658 @@ -1187,7 +1306,7 @@ void put_page_type(struct pfn_info *page
   8.659                                             x & ~PGT_validated)) != x) )
   8.660                      goto again;
   8.661                  /* We cleared the 'valid bit' so we do the clean up. */
   8.662 -                free_page_type(page, x & PGT_type_mask);
   8.663 +                free_page_type(page, x);
   8.664                  /* Carry on, but with the 'valid bit' now clear. */
   8.665                  x  &= ~PGT_validated;
   8.666                  nx &= ~PGT_validated;
   8.667 @@ -1270,6 +1389,10 @@ int get_page_type(struct pfn_info *page,
   8.668                      /* This table is may be mapped at multiple locations. */
   8.669                      nx &= ~PGT_va_mask;
   8.670                      nx |= PGT_va_unknown;
   8.671 +#if 0 /* debug */
   8.672 +                    printk("%s: pfn %lx type %x -> %x (tag as unknown)\n",
   8.673 +                           __FUNCTION__,page_to_pfn(page),x,nx);
   8.674 +#endif
   8.675                  }
   8.676              }
   8.677              if ( unlikely(!(x & PGT_validated)) )
   8.678 @@ -1286,7 +1409,7 @@ int get_page_type(struct pfn_info *page,
   8.679      if ( unlikely(!(nx & PGT_validated)) )
   8.680      {
   8.681          /* Try to validate page type; drop the new reference on failure. */
   8.682 -        if ( unlikely(!alloc_page_type(page, type & PGT_type_mask)) )
   8.683 +        if ( unlikely(!alloc_page_type(page, type)) )
   8.684          {
   8.685              MEM_LOG("Error while validating pfn %lx for type %08x."
   8.686                      " caf=%08x taf=%08x",
   8.687 @@ -1537,15 +1660,17 @@ int do_mmuext_op(
   8.688              type = PGT_l2_page_table;
   8.689              goto pin_page;
   8.690  
   8.691 -#ifdef __x86_64__
   8.692 +#if CONFIG_PAGING_LEVELS >= 3
   8.693          case MMUEXT_PIN_L3_TABLE:
   8.694              type = PGT_l3_page_table;
   8.695              goto pin_page;
   8.696 -
   8.697 +#endif
   8.698 +
   8.699 +#if CONFIG_PAGING_LEVELS >= 4
   8.700          case MMUEXT_PIN_L4_TABLE:
   8.701              type = PGT_l4_page_table;
   8.702              goto pin_page;
   8.703 -#endif /* __x86_64__ */
   8.704 +#endif
   8.705  
   8.706          case MMUEXT_UNPIN_TABLE:
   8.707              if ( unlikely(!(okay = get_page_from_pagenr(op.mfn, FOREIGNDOM))) )
   8.708 @@ -1912,19 +2037,20 @@ int do_mmu_update(
   8.709                  break;
   8.710              case PGT_l2_page_table:
   8.711                  ASSERT( !shadow_mode_refcounts(d) );
   8.712 -                if ( likely(get_page_type(page, PGT_l2_page_table)) )
   8.713 +                if ( likely(get_page_type(
   8.714 +                    page, type_info & (PGT_type_mask|PGT_va_mask))) )
   8.715                  {
   8.716                      l2_pgentry_t l2e;
   8.717  
   8.718                      /* FIXME: doesn't work with PAE */
   8.719                      l2e = l2e_create_phys(req.val, req.val);
   8.720 -                    okay = mod_l2_entry(va, l2e, mfn);
   8.721 +                    okay = mod_l2_entry((l2_pgentry_t *)va, l2e, mfn, type_info);
   8.722                      if ( okay && unlikely(shadow_mode_enabled(d)) )
   8.723                          shadow_l2_normal_pt_update(d, req.ptr, l2e, &sh_mapcache);
   8.724                      put_page_type(page);
   8.725                  }
   8.726                  break;
   8.727 -#ifdef __x86_64__
   8.728 +#if CONFIG_PAGING_LEVELS >= 3
   8.729              case PGT_l3_page_table:
   8.730                  ASSERT( !shadow_mode_refcounts(d) );
   8.731                  if ( likely(get_page_type(page, PGT_l3_page_table)) )
   8.732 @@ -1939,6 +2065,8 @@ int do_mmu_update(
   8.733                      put_page_type(page);
   8.734                  }
   8.735                  break;
   8.736 +#endif
   8.737 +#if CONFIG_PAGING_LEVELS >= 4
   8.738              case PGT_l4_page_table:
   8.739                  ASSERT( !shadow_mode_refcounts(d) );
   8.740                  if ( likely(get_page_type(page, PGT_l4_page_table)) )
   8.741 @@ -1952,7 +2080,7 @@ int do_mmu_update(
   8.742                      put_page_type(page);
   8.743                  }
   8.744                  break;
   8.745 -#endif /* __x86_64__ */
   8.746 +#endif
   8.747              default:
   8.748                  if ( likely(get_page_type(page, PGT_writable_page)) )
   8.749                  {
   8.750 @@ -2119,9 +2247,10 @@ int update_grant_va_mapping(unsigned lon
   8.751  
   8.752  
   8.753  int do_update_va_mapping(unsigned long va,
   8.754 -                         l1_pgentry_t  val, 
   8.755 +                         unsigned long val32,
   8.756                           unsigned long flags)
   8.757  {
   8.758 +    l1_pgentry_t       val  = l1e_create_phys(val32,val32);
   8.759      struct exec_domain *ed  = current;
   8.760      struct domain      *d   = ed->domain;
   8.761      unsigned int        cpu = ed->processor;
   8.762 @@ -2216,7 +2345,7 @@ int do_update_va_mapping(unsigned long v
   8.763  }
   8.764  
   8.765  int do_update_va_mapping_otherdomain(unsigned long va,
   8.766 -                                     l1_pgentry_t  val, 
   8.767 +                                     unsigned long val32,
   8.768                                       unsigned long flags,
   8.769                                       domid_t domid)
   8.770  {
   8.771 @@ -2234,7 +2363,7 @@ int do_update_va_mapping_otherdomain(uns
   8.772          return -ESRCH;
   8.773      }
   8.774  
   8.775 -    rc = do_update_va_mapping(va, val, flags);
   8.776 +    rc = do_update_va_mapping(va, val32, flags);
   8.777  
   8.778      return rc;
   8.779  }
   8.780 @@ -2551,8 +2680,8 @@ void ptwr_flush(struct domain *d, const 
   8.781  
   8.782  static int ptwr_emulated_update(
   8.783      unsigned long addr,
   8.784 -    unsigned long old,
   8.785 -    unsigned long val,
   8.786 +    physaddr_t old,
   8.787 +    physaddr_t val,
   8.788      unsigned int bytes,
   8.789      unsigned int do_cmpxchg)
   8.790  {
   8.791 @@ -2570,21 +2699,22 @@ static int ptwr_emulated_update(
   8.792      }
   8.793  
   8.794      /* Turn a sub-word access into a full-word access. */
   8.795 -    /* FIXME: needs tweaks for PAE */
   8.796 -    if ( (addr & ((BITS_PER_LONG/8)-1)) != 0 )
   8.797 +    if (bytes != sizeof(physaddr_t))
   8.798      {
   8.799          int           rc;
   8.800 -        unsigned long full;
   8.801 -        unsigned int  mask = addr & ((BITS_PER_LONG/8)-1);
   8.802 +        physaddr_t    full;
   8.803 +        unsigned int  offset = addr & (sizeof(physaddr_t)-1);
   8.804 +
   8.805          /* Align address; read full word. */
   8.806 -        addr &= ~((BITS_PER_LONG/8)-1);
   8.807 -        if ( (rc = x86_emulate_read_std(addr, &full, BITS_PER_LONG/8)) )
   8.808 -            return rc;
   8.809 +        addr &= ~(sizeof(physaddr_t)-1);
   8.810 +        if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
   8.811 +					sizeof(physaddr_t))) )
   8.812 +            return rc; 
   8.813          /* Mask out bits provided by caller. */
   8.814 -        full &= ~((1UL << (bytes*8)) - 1UL) << (mask*8);
   8.815 +        full &= ~((((physaddr_t)1 << (bytes*8)) - 1) << (offset*8));
   8.816          /* Shift the caller value and OR in the missing bits. */
   8.817 -        val  &= (1UL << (bytes*8)) - 1UL;
   8.818 -        val <<= mask*8;
   8.819 +        val  &= (((physaddr_t)1 << (bytes*8)) - 1);
   8.820 +        val <<= (offset)*8;
   8.821          val  |= full;
   8.822      }
   8.823  
     9.1 --- a/xen/arch/x86/setup.c	Tue May 31 15:20:43 2005 +0000
     9.2 +++ b/xen/arch/x86/setup.c	Tue May 31 15:39:28 2005 +0000
     9.3 @@ -87,7 +87,7 @@ extern unsigned long cpu0_stack[];
     9.4  
     9.5  struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
     9.6  
     9.7 -#if defined(CONFIG_X86_64)
     9.8 +#if CONFIG_PAGING_LEVELS > 2
     9.9  unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
    9.10  #else
    9.11  unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE;
    10.1 --- a/xen/arch/x86/shadow.c	Tue May 31 15:20:43 2005 +0000
    10.2 +++ b/xen/arch/x86/shadow.c	Tue May 31 15:39:28 2005 +0000
    10.3 @@ -358,13 +358,13 @@ free_shadow_hl2_table(struct domain *d, 
    10.4  }
    10.5  
    10.6  static void inline
    10.7 -free_shadow_l2_table(struct domain *d, unsigned long smfn)
    10.8 +free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
    10.9  {
   10.10      l2_pgentry_t *pl2e = map_domain_mem(smfn << PAGE_SHIFT);
   10.11      int i, external = shadow_mode_external(d);
   10.12  
   10.13      for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   10.14 -        if ( external || is_guest_l2_slot(i) )
   10.15 +        if ( external || is_guest_l2_slot(type, i) )
   10.16              if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
   10.17                  put_shadow_ref(l2e_get_pfn(pl2e[i]));
   10.18  
   10.19 @@ -404,7 +404,7 @@ void free_shadow_page(unsigned long smfn
   10.20      case PGT_l2_shadow:
   10.21          perfc_decr(shadow_l2_pages);
   10.22          shadow_demote(d, gpfn, gmfn);
   10.23 -        free_shadow_l2_table(d, smfn);
   10.24 +        free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
   10.25          break;
   10.26  
   10.27      case PGT_hl2_shadow:
   10.28 @@ -573,7 +573,7 @@ static void free_shadow_pages(struct dom
   10.29      //
   10.30      for_each_exec_domain(d, ed)
   10.31      {
   10.32 -        if ( pagetable_val(ed->arch.shadow_table) )
   10.33 +        if ( pagetable_get_phys(ed->arch.shadow_table) )
   10.34          {
   10.35              put_shadow_ref(pagetable_get_pfn(ed->arch.shadow_table));
   10.36              ed->arch.shadow_table = mk_pagetable(0);
   10.37 @@ -684,7 +684,7 @@ static void alloc_monitor_pagetable(stru
   10.38      struct pfn_info *mmfn_info;
   10.39      struct domain *d = ed->domain;
   10.40  
   10.41 -    ASSERT(pagetable_val(ed->arch.monitor_table) == 0);
   10.42 +    ASSERT(pagetable_get_phys(ed->arch.monitor_table) == 0);
   10.43  
   10.44      mmfn_info = alloc_domheap_page(NULL);
   10.45      ASSERT(mmfn_info != NULL);
   10.46 @@ -705,7 +705,7 @@ static void alloc_monitor_pagetable(stru
   10.47  
   10.48      // map the phys_to_machine map into the Read-Only MPT space for this domain
   10.49      mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
   10.50 -        l2e_create_phys(pagetable_val(d->arch.phys_table),
   10.51 +        l2e_create_phys(pagetable_get_phys(d->arch.phys_table),
   10.52                          __PAGE_HYPERVISOR);
   10.53  
   10.54      // Don't (yet) have mappings for these...
   10.55 @@ -726,7 +726,7 @@ void free_monitor_pagetable(struct exec_
   10.56      l2_pgentry_t *mpl2e, hl2e, sl2e;
   10.57      unsigned long mfn;
   10.58  
   10.59 -    ASSERT( pagetable_val(ed->arch.monitor_table) );
   10.60 +    ASSERT( pagetable_get_phys(ed->arch.monitor_table) );
   10.61      
   10.62      mpl2e = ed->arch.monitor_vtable;
   10.63  
   10.64 @@ -766,7 +766,7 @@ set_p2m_entry(struct domain *d, unsigned
   10.65                struct map_dom_mem_cache *l2cache,
   10.66                struct map_dom_mem_cache *l1cache)
   10.67  {
   10.68 -    unsigned long phystab = pagetable_val(d->arch.phys_table);
   10.69 +    unsigned long phystab = pagetable_get_phys(d->arch.phys_table);
   10.70      l2_pgentry_t *l2, l2e;
   10.71      l1_pgentry_t *l1;
   10.72      struct pfn_info *l1page;
   10.73 @@ -965,7 +965,7 @@ int __shadow_mode_enable(struct domain *
   10.74      {
   10.75          if ( !(new_modes & SHM_external) )
   10.76          {
   10.77 -            ASSERT( !pagetable_val(d->arch.phys_table) );
   10.78 +            ASSERT( !pagetable_get_phys(d->arch.phys_table) );
   10.79              if ( !alloc_p2m_table(d) )
   10.80              {
   10.81                  printk("alloc_p2m_table failed (out-of-memory?)\n");
   10.82 @@ -1051,7 +1051,7 @@ int __shadow_mode_enable(struct domain *
   10.83          d->arch.shadow_dirty_bitmap = NULL;
   10.84      }
   10.85      if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
   10.86 -         pagetable_val(d->arch.phys_table) )
   10.87 +         pagetable_get_phys(d->arch.phys_table) )
   10.88      {
   10.89          free_p2m_table(d);
   10.90      }
   10.91 @@ -1093,7 +1093,8 @@ translate_l1pgtable(struct domain *d, l1
   10.92  // up dom0.
   10.93  //
   10.94  void
   10.95 -translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn)
   10.96 +translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
   10.97 +                    unsigned int type)
   10.98  {
   10.99      int i;
  10.100      l2_pgentry_t *l2;
  10.101 @@ -1103,7 +1104,7 @@ translate_l2pgtable(struct domain *d, l1
  10.102      l2 = map_domain_mem(l2mfn << PAGE_SHIFT);
  10.103      for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
  10.104      {
  10.105 -        if ( is_guest_l2_slot(i) &&
  10.106 +        if ( is_guest_l2_slot(type, i) &&
  10.107               (l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
  10.108          {
  10.109              unsigned long mfn = l2e_get_pfn(l2[i]);
  10.110 @@ -1403,13 +1404,13 @@ gpfn_to_mfn_foreign(struct domain *d, un
  10.111      perfc_incrc(gpfn_to_mfn_foreign);
  10.112  
  10.113      unsigned long va = gpfn << PAGE_SHIFT;
  10.114 -    unsigned long phystab = pagetable_val(d->arch.phys_table);
  10.115 +    unsigned long phystab = pagetable_get_phys(d->arch.phys_table);
  10.116      l2_pgentry_t *l2 = map_domain_mem(phystab);
  10.117      l2_pgentry_t l2e = l2[l2_table_offset(va)];
  10.118      unmap_domain_mem(l2);
  10.119      if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
  10.120      {
  10.121 -        printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%lx\n",
  10.122 +        printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
  10.123                 d->domain_id, gpfn, l2e_get_value(l2e));
  10.124          return INVALID_MFN;
  10.125      }
  10.126 @@ -1425,7 +1426,7 @@ gpfn_to_mfn_foreign(struct domain *d, un
  10.127  
  10.128      if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
  10.129      {
  10.130 -        printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%lx\n",
  10.131 +        printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n",
  10.132                 d->domain_id, gpfn, l1e_get_value(l1e));
  10.133          return INVALID_MFN;
  10.134      }
  10.135 @@ -1540,7 +1541,7 @@ static unsigned long shadow_l2_table(
  10.136              unsigned long hl2mfn;
  10.137  
  10.138              spl2e[l2_table_offset(RO_MPT_VIRT_START)] =
  10.139 -                l2e_create_phys(pagetable_val(d->arch.phys_table),
  10.140 +                l2e_create_phys(pagetable_get_phys(d->arch.phys_table),
  10.141                                  __PAGE_HYPERVISOR);
  10.142  
  10.143              if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
  10.144 @@ -2391,7 +2392,10 @@ static int resync_all(struct domain *d, 
  10.145              changed = 0;
  10.146              for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
  10.147              {
  10.148 -                if ( !is_guest_l2_slot(i) && !external )
  10.149 +#if CONFIG_X86_PAE
  10.150 +                BUG();  /* FIXME: need type_info */
  10.151 +#endif
  10.152 +                if ( !is_guest_l2_slot(0,i) && !external )
  10.153                      continue;
  10.154  
  10.155                  l2_pgentry_t new_pde = guest2[i];
  10.156 @@ -2434,7 +2438,10 @@ static int resync_all(struct domain *d, 
  10.157              changed = 0;
  10.158              for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
  10.159              {
  10.160 -                if ( !is_guest_l2_slot(i) && !external )
  10.161 +#if CONFIG_X86_PAE
  10.162 +                BUG();  /* FIXME: need type_info */
  10.163 +#endif
  10.164 +                if ( !is_guest_l2_slot(0, i) && !external )
  10.165                      continue;
  10.166  
  10.167                  l2_pgentry_t new_pde = guest2[i];
  10.168 @@ -2647,8 +2654,8 @@ int shadow_fault(unsigned long va, struc
  10.169                                       &gpte, sizeof(gpte))) )
  10.170          {
  10.171              printk("%s() failed, crashing domain %d "
  10.172 -                   "due to a read-only L2 page table (gpde=%lx), va=%lx\n",
  10.173 -                   __func__, d->domain_id, l2e_get_value(gpde), va);
  10.174 +                   "due to a read-only L2 page table (gpde=%" PRIpte "), va=%lx\n",
  10.175 +                   __func__,d->domain_id, l2e_get_value(gpde), va);
  10.176              domain_crash_synchronous();
  10.177          }
  10.178  
  10.179 @@ -2721,7 +2728,7 @@ void shadow_l2_normal_pt_update(
  10.180      shadow_unlock(d);
  10.181  }
  10.182  
  10.183 -#ifdef __x86_64__
  10.184 +#if CONFIG_PAGING_LEVELS >= 3
  10.185  void shadow_l3_normal_pt_update(
  10.186      struct domain *d,
  10.187      unsigned long pa, l3_pgentry_t gpde,
  10.188 @@ -2729,7 +2736,9 @@ void shadow_l3_normal_pt_update(
  10.189  {
  10.190      BUG(); // not yet implemented
  10.191  }
  10.192 -
  10.193 +#endif
  10.194 +
  10.195 +#if CONFIG_PAGING_LEVELS >= 4
  10.196  void shadow_l4_normal_pt_update(
  10.197      struct domain *d,
  10.198      unsigned long pa, l4_pgentry_t gpde,
    11.1 --- a/xen/arch/x86/traps.c	Tue May 31 15:20:43 2005 +0000
    11.2 +++ b/xen/arch/x86/traps.c	Tue May 31 15:39:28 2005 +0000
    11.3 @@ -797,7 +797,7 @@ static int emulate_privileged_op(struct 
    11.4              break;
    11.5              
    11.6          case 3: /* Read CR3 */
    11.7 -            *reg = pagetable_val(ed->arch.guest_table);
    11.8 +            *reg = pagetable_get_phys(ed->arch.guest_table);
    11.9              break;
   11.10  
   11.11          default:
    12.1 --- a/xen/arch/x86/vmx.c	Tue May 31 15:20:43 2005 +0000
    12.2 +++ b/xen/arch/x86/vmx.c	Tue May 31 15:39:28 2005 +0000
    12.3 @@ -567,7 +567,7 @@ vmx_world_restore(struct exec_domain *d,
    12.4  
    12.5      if (!vmx_paging_enabled(d)) {
    12.6  	VMX_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
    12.7 -	__vmwrite(GUEST_CR3, pagetable_val(d->domain->arch.phys_table));
    12.8 +	__vmwrite(GUEST_CR3, pagetable_get_phys(d->domain->arch.phys_table));
    12.9          goto skip_cr3;
   12.10      }
   12.11  
   12.12 @@ -578,7 +578,7 @@ vmx_world_restore(struct exec_domain *d,
   12.13  	 * We simply invalidate the shadow.
   12.14  	 */
   12.15  	mfn = phys_to_machine_mapping(c->cr3 >> PAGE_SHIFT);
   12.16 -	if ((mfn << PAGE_SHIFT) != pagetable_val(d->arch.guest_table)) {
   12.17 +	if (mfn != pagetable_get_pfn(d->arch.guest_table)) {
   12.18  	    printk("Invalid CR3 value=%lx", c->cr3);
   12.19  	    domain_crash_synchronous();
   12.20  	    return 0;
   12.21 @@ -603,7 +603,7 @@ vmx_world_restore(struct exec_domain *d,
   12.22  	 */
   12.23  	d->arch.arch_vmx.cpu_cr3 = c->cr3;
   12.24  	VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", c->cr3);
   12.25 -	__vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
   12.26 +	__vmwrite(GUEST_CR3, pagetable_get_phys(d->arch.shadow_table));
   12.27      }
   12.28  
   12.29  skip_cr3:
   12.30 @@ -769,7 +769,7 @@ static int vmx_set_cr0(unsigned long val
   12.31          VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
   12.32                  (unsigned long) (mfn << PAGE_SHIFT));
   12.33  
   12.34 -        __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
   12.35 +        __vmwrite(GUEST_CR3, pagetable_get_phys(d->arch.shadow_table));
   12.36          /* 
   12.37           * arch->shadow_table should hold the next CR3 for shadow
   12.38           */
   12.39 @@ -869,7 +869,7 @@ static int mov_to_cr(int gp, int cr, str
   12.40               * We simply invalidate the shadow.
   12.41               */
   12.42              mfn = phys_to_machine_mapping(value >> PAGE_SHIFT);
   12.43 -            if ((mfn << PAGE_SHIFT) != pagetable_val(d->arch.guest_table))
   12.44 +            if (mfn != pagetable_get_pfn(d->arch.guest_table))
   12.45                  __vmx_bug(regs);
   12.46              shadow_sync_all(d->domain);
   12.47          } else {
   12.48 @@ -896,7 +896,7 @@ static int mov_to_cr(int gp, int cr, str
   12.49              d->arch.arch_vmx.cpu_cr3 = value;
   12.50              VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
   12.51                      value);
   12.52 -            __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
   12.53 +            __vmwrite(GUEST_CR3, pagetable_get_phys(d->arch.shadow_table));
   12.54          }
   12.55          break;
   12.56      }
    13.1 --- a/xen/arch/x86/vmx_io.c	Tue May 31 15:20:43 2005 +0000
    13.2 +++ b/xen/arch/x86/vmx_io.c	Tue May 31 15:39:28 2005 +0000
    13.3 @@ -466,12 +466,12 @@ void vmx_do_resume(struct exec_domain *d
    13.4  {
    13.5      vmx_stts();
    13.6      if ( vmx_paging_enabled(d) )
    13.7 -        __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
    13.8 +        __vmwrite(GUEST_CR3, pagetable_get_phys(d->arch.shadow_table));
    13.9      else
   13.10          // paging is not enabled in the guest
   13.11 -        __vmwrite(GUEST_CR3, pagetable_val(d->domain->arch.phys_table));
   13.12 +        __vmwrite(GUEST_CR3, pagetable_get_phys(d->domain->arch.phys_table));
   13.13  
   13.14 -    __vmwrite(HOST_CR3, pagetable_val(d->arch.monitor_table));
   13.15 +    __vmwrite(HOST_CR3, pagetable_get_phys(d->arch.monitor_table));
   13.16      __vmwrite(HOST_ESP, (unsigned long)get_stack_bottom());
   13.17  
   13.18      if (event_pending(d)) {
    14.1 --- a/xen/arch/x86/vmx_vmcs.c	Tue May 31 15:20:43 2005 +0000
    14.2 +++ b/xen/arch/x86/vmx_vmcs.c	Tue May 31 15:39:28 2005 +0000
    14.3 @@ -196,8 +196,8 @@ void vmx_do_launch(struct exec_domain *e
    14.4      error |= __vmwrite(GUEST_TR_BASE, 0);
    14.5      error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
    14.6  
    14.7 -    __vmwrite(GUEST_CR3, pagetable_val(ed->arch.guest_table));
    14.8 -    __vmwrite(HOST_CR3, pagetable_val(ed->arch.monitor_table));
    14.9 +    __vmwrite(GUEST_CR3, pagetable_get_phys(ed->arch.guest_table));
   14.10 +    __vmwrite(HOST_CR3, pagetable_get_phys(ed->arch.monitor_table));
   14.11      __vmwrite(HOST_ESP, (unsigned long)get_stack_bottom());
   14.12  
   14.13      ed->arch.schedule_tail = arch_vmx_do_resume;
    15.1 --- a/xen/arch/x86/x86_32/domain_page.c	Tue May 31 15:20:43 2005 +0000
    15.2 +++ b/xen/arch/x86/x86_32/domain_page.c	Tue May 31 15:39:28 2005 +0000
    15.3 @@ -72,7 +72,7 @@ void *map_domain_mem(unsigned long pa)
    15.4              shadow_epoch[cpu] = ++epoch;
    15.5          }
    15.6      }
    15.7 -    while ( l1e_get_value(cache[idx]) != 0 );
    15.8 +    while ( l1e_get_flags(cache[idx]) & _PAGE_PRESENT );
    15.9  
   15.10      cache[idx] = l1e_create_phys(pa, __PAGE_HYPERVISOR);
   15.11  
    16.1 --- a/xen/arch/x86/x86_32/mm.c	Tue May 31 15:20:43 2005 +0000
    16.2 +++ b/xen/arch/x86/x86_32/mm.c	Tue May 31 15:39:28 2005 +0000
    16.3 @@ -27,6 +27,8 @@
    16.4  #include <asm/fixmap.h>
    16.5  #include <asm/domain_page.h>
    16.6  
    16.7 +static unsigned long mpt_size;
    16.8 +
    16.9  struct pfn_info *alloc_xen_pagetable(void)
   16.10  {
   16.11      extern int early_boot;
   16.12 @@ -51,69 +53,102 @@ void free_xen_pagetable(struct pfn_info 
   16.13  
   16.14  l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
   16.15  {
   16.16 -    return &idle_pg_table[l2_table_offset(v)];
   16.17 +    return &idle_pg_table_l2[l2_linear_offset(v)];
   16.18  }
   16.19  
   16.20  void __init paging_init(void)
   16.21  {
   16.22      void *ioremap_pt;
   16.23 -    unsigned long v;
   16.24 -    struct pfn_info *m2p_pg;
   16.25 +    unsigned long v,v2,i;
   16.26 +    struct pfn_info *pg;
   16.27 +
   16.28 +#ifdef CONFIG_X86_PAE
   16.29 +    printk("PAE enabled, limit: %d GB\n", MACHPHYS_MBYTES);
   16.30 +#else
   16.31 +    printk("PAE disabled.\n");
   16.32 +#endif
   16.33 +
   16.34 +    idle0_exec_domain.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
   16.35  
   16.36 -    /* Allocate and map the machine-to-phys table. */
   16.37 -    if ( (m2p_pg = alloc_domheap_pages(NULL, 10)) == NULL )
   16.38 -        panic("Not enough memory to bootstrap Xen.\n");
   16.39 -    idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)] =
   16.40 -        l2e_create_page(m2p_pg, __PAGE_HYPERVISOR | _PAGE_PSE);
   16.41 -    memset((void *)RDWR_MPT_VIRT_START, 0x55, 4UL << 20);
   16.42 +    /* Allocate and map the machine-to-phys table and create read-only
   16.43 +     * mapping of MPT for guest-OS use.  Without PAE we'll end up with
   16.44 +     * one 4MB page, with PAE we'll allocate 2MB pages depending on
   16.45 +     * the amout of memory installed, but at least 4MB to cover 4GB
   16.46 +     * address space.  This is needed to make PCI I/O memory address
   16.47 +     * lookups work in guests. -- kraxel */
   16.48 +    mpt_size = max_page * 4;
   16.49 +    if (mpt_size < 4*1024*1024)
   16.50 +        mpt_size = 4*1024*1024;
   16.51 +    for (v  = RDWR_MPT_VIRT_START, v2 = RO_MPT_VIRT_START;
   16.52 +         v != RDWR_MPT_VIRT_END && mpt_size > (v - RDWR_MPT_VIRT_START);
   16.53 +         v += (1 << L2_PAGETABLE_SHIFT), v2 += (1 << L2_PAGETABLE_SHIFT)) {
   16.54 +        if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER)) == NULL )
   16.55 +            panic("Not enough memory to bootstrap Xen.\n");
   16.56 +        idle_pg_table_l2[l2_linear_offset(v)] =
   16.57 +            l2e_create_page(pg, __PAGE_HYPERVISOR | _PAGE_PSE);
   16.58 +        idle_pg_table_l2[l2_linear_offset(v2)] =
   16.59 +            l2e_create_page(pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW);
   16.60 +    }
   16.61 +    memset((void *)RDWR_MPT_VIRT_START, 0x55, mpt_size);
   16.62  
   16.63 -    /* Xen 4MB mappings can all be GLOBAL. */
   16.64 +    /* Xen 2/4MB mappings can all be GLOBAL. */
   16.65      if ( cpu_has_pge )
   16.66      {
   16.67 -        for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) )
   16.68 -        {
   16.69 -            if (l2e_get_flags(idle_pg_table[l2_table_offset(v)]) & _PAGE_PSE)
   16.70 -                l2e_add_flags(&idle_pg_table[l2_table_offset(v)],
   16.71 -                              _PAGE_GLOBAL);
   16.72 +        for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) ) {
   16.73 +            if (!l2e_get_flags(idle_pg_table_l2[l2_linear_offset(v)]) & _PAGE_PSE)
   16.74 +                continue;
   16.75 +            if (v >= RO_MPT_VIRT_START && v < RO_MPT_VIRT_END)
   16.76 +                continue;
   16.77 +            l2e_add_flags(&idle_pg_table_l2[l2_linear_offset(v)],
   16.78 +                          _PAGE_GLOBAL);
   16.79          }
   16.80      }
   16.81  
   16.82 -    /* Create page table for ioremap(). */
   16.83 -    ioremap_pt = (void *)alloc_xenheap_page();
   16.84 -    clear_page(ioremap_pt);
   16.85 -    idle_pg_table[l2_table_offset(IOREMAP_VIRT_START)] =
   16.86 -        l2e_create_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR);
   16.87 -
   16.88 -    /*
   16.89 -     * Create read-only mapping of MPT for guest-OS use.
   16.90 -     * NB. Remove the global bit so that shadow_mode_translate()==true domains
   16.91 -     *     can reused this address space for their phys-to-machine mapping.
   16.92 -     */
   16.93 -    idle_pg_table[l2_table_offset(RO_MPT_VIRT_START)] =
   16.94 -        l2e_create_page(m2p_pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW);
   16.95 +    /* Create page table(s) for ioremap(). */
   16.96 +    for (v = IOREMAP_VIRT_START; v != IOREMAP_VIRT_END; v += (1 << L2_PAGETABLE_SHIFT)) {
   16.97 +        ioremap_pt = (void *)alloc_xenheap_page();
   16.98 +        clear_page(ioremap_pt);
   16.99 +        idle_pg_table_l2[l2_linear_offset(v)] =
  16.100 +            l2e_create_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR);
  16.101 +    }
  16.102  
  16.103      /* Set up mapping cache for domain pages. */
  16.104 -    mapcache = (l1_pgentry_t *)alloc_xenheap_page();
  16.105 -    clear_page(mapcache);
  16.106 -    idle_pg_table[l2_table_offset(MAPCACHE_VIRT_START)] =
  16.107 -        l2e_create_page(virt_to_page(mapcache), __PAGE_HYPERVISOR);
  16.108 +    mapcache = (l1_pgentry_t*)alloc_xenheap_pages(10-PAGETABLE_ORDER);
  16.109 +    for (v = MAPCACHE_VIRT_START, i = 0;
  16.110 +         v != MAPCACHE_VIRT_END;
  16.111 +         v += (1 << L2_PAGETABLE_SHIFT), i++) {
  16.112 +        clear_page(mapcache + i*L1_PAGETABLE_ENTRIES);
  16.113 +        idle_pg_table_l2[l2_linear_offset(v)] =
  16.114 +            l2e_create_page(virt_to_page(mapcache + i*L1_PAGETABLE_ENTRIES),
  16.115 +                            __PAGE_HYPERVISOR);
  16.116 +    }
  16.117  
  16.118 -    /* Set up linear page table mapping. */
  16.119 -    idle_pg_table[l2_table_offset(LINEAR_PT_VIRT_START)] =
  16.120 -        l2e_create_page(virt_to_page(idle_pg_table), __PAGE_HYPERVISOR);
  16.121 +    for (v = LINEAR_PT_VIRT_START; v != LINEAR_PT_VIRT_END; v += (1 << L2_PAGETABLE_SHIFT)) {
  16.122 +        idle_pg_table_l2[l2_linear_offset(v)] =
  16.123 +            l2e_create_page(virt_to_page(idle_pg_table_l2 + ((v-RDWR_MPT_VIRT_START) >> PAGETABLE_ORDER)),
  16.124 +                            __PAGE_HYPERVISOR);
  16.125 +    }
  16.126  }
  16.127  
  16.128 -void __init zap_low_mappings(void)
  16.129 +void __init zap_low_mappings(l2_pgentry_t *base)
  16.130  {
  16.131      int i;
  16.132 -    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
  16.133 -        idle_pg_table[i] = l2e_empty();
  16.134 +    u32 addr;
  16.135 +
  16.136 +    for (i = 0; ; i++) {
  16.137 +        addr = (i << L2_PAGETABLE_SHIFT);
  16.138 +        if (addr >= HYPERVISOR_VIRT_START)
  16.139 +            break;
  16.140 +        if (l2e_get_phys(base[i]) != addr)
  16.141 +            continue;
  16.142 +        base[i] = l2e_empty();
  16.143 +    }
  16.144      flush_tlb_all_pge();
  16.145  }
  16.146  
  16.147  void subarch_init_memory(struct domain *dom_xen)
  16.148  {
  16.149 -    unsigned long i, m2p_start_mfn;
  16.150 +    unsigned long i, v, m2p_start_mfn;
  16.151  
  16.152      /*
  16.153       * We are rather picky about the layout of 'struct pfn_info'. The
  16.154 @@ -129,19 +164,24 @@ void subarch_init_memory(struct domain *
  16.155                 offsetof(struct pfn_info, count_info),
  16.156                 offsetof(struct pfn_info, u.inuse._domain),
  16.157                 sizeof(struct pfn_info));
  16.158 -        for ( ; ; ) ;
  16.159 +        for ( ; ; )
  16.160 +            __asm__ __volatile__ ( "hlt" );
  16.161      }
  16.162  
  16.163      /* M2P table is mappable read-only by privileged domains. */
  16.164 -    m2p_start_mfn = l2e_get_pfn(
  16.165 -        idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]);
  16.166 -    for ( i = 0; i < 1024; i++ )
  16.167 -    {
  16.168 -        frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
  16.169 -	/* gdt to make sure it's only mapped read-only by non-privileged
  16.170 -	   domains. */
  16.171 -        frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
  16.172 -        page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
  16.173 +    for (v  = RDWR_MPT_VIRT_START;
  16.174 +         v != RDWR_MPT_VIRT_END && mpt_size > (v - RDWR_MPT_VIRT_START);
  16.175 +         v += (1 << L2_PAGETABLE_SHIFT)) {
  16.176 +        m2p_start_mfn = l2e_get_pfn(
  16.177 +            idle_pg_table_l2[l2_linear_offset(v)]);
  16.178 +        for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
  16.179 +        {
  16.180 +            frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
  16.181 +            /* gdt to make sure it's only mapped read-only by non-privileged
  16.182 +               domains. */
  16.183 +            frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
  16.184 +            page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
  16.185 +        }
  16.186      }
  16.187  }
  16.188  
    17.1 --- a/xen/arch/x86/x86_32/traps.c	Tue May 31 15:20:43 2005 +0000
    17.2 +++ b/xen/arch/x86/x86_32/traps.c	Tue May 31 15:39:28 2005 +0000
    17.3 @@ -92,21 +92,24 @@ void show_registers(struct cpu_user_regs
    17.4  
    17.5  void show_page_walk(unsigned long addr)
    17.6  {
    17.7 -    unsigned long page;
    17.8 +    l2_pgentry_t pmd;
    17.9 +    l1_pgentry_t *pte;
   17.10  
   17.11      if ( addr < PAGE_OFFSET )
   17.12          return;
   17.13  
   17.14      printk("Pagetable walk from %08lx:\n", addr);
   17.15      
   17.16 -    page = l2e_get_value(idle_pg_table[l2_table_offset(addr)]);
   17.17 -    printk(" L2 = %08lx %s\n", page, (page & _PAGE_PSE) ? "(4MB)" : "");
   17.18 -    if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) )
   17.19 +    pmd = idle_pg_table_l2[l2_linear_offset(addr)];
   17.20 +    printk(" L2 = %08llx %s\n", (u64)l2e_get_value(pmd),
   17.21 +           (l2e_get_flags(pmd) & _PAGE_PSE) ? "(2/4MB)" : "");
   17.22 +    if ( !(l2e_get_flags(pmd) & _PAGE_PRESENT) ||
   17.23 +         (l2e_get_flags(pmd) & _PAGE_PSE) )
   17.24          return;
   17.25  
   17.26 -    page &= PAGE_MASK;
   17.27 -    page = ((unsigned long *) __va(page))[l1_table_offset(addr)];
   17.28 -    printk("  L1 = %08lx\n", page);
   17.29 +    pte  = __va(l2e_get_phys(pmd));
   17.30 +    pte += l1_table_offset(addr);
   17.31 +    printk("  L1 = %08llx\n", (u64)l1e_get_value(*pte));
   17.32  }
   17.33  
   17.34  #define DOUBLEFAULT_STACK_SIZE 1024
    18.1 --- a/xen/arch/x86/x86_64/mm.c	Tue May 31 15:20:43 2005 +0000
    18.2 +++ b/xen/arch/x86/x86_64/mm.c	Tue May 31 15:39:28 2005 +0000
    18.3 @@ -79,6 +79,8 @@ void __init paging_init(void)
    18.4      l2_pgentry_t *l2_ro_mpt;
    18.5      struct pfn_info *pg;
    18.6  
    18.7 +    idle0_exec_domain.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
    18.8 +
    18.9      /* Create user-accessible L2 directory to map the MPT for guests. */
   18.10      l3_ro_mpt = (l3_pgentry_t *)alloc_xenheap_page();
   18.11      clear_page(l3_ro_mpt);
    19.1 --- a/xen/include/asm-x86/config.h	Tue May 31 15:20:43 2005 +0000
    19.2 +++ b/xen/include/asm-x86/config.h	Tue May 31 15:39:28 2005 +0000
    19.3 @@ -7,6 +7,19 @@
    19.4  #ifndef __X86_CONFIG_H__
    19.5  #define __X86_CONFIG_H__
    19.6  
    19.7 +#if defined(__i386__)
    19.8 +// # define CONFIG_X86_PAE 1   /* yes */
    19.9 + # undef CONFIG_X86_PAE      /* no  */
   19.10 +#endif
   19.11 +
   19.12 +#if defined(__x86_64__)
   19.13 +# define CONFIG_PAGING_LEVELS 4
   19.14 +#elif defined(CONFIG_X86_PAE)
   19.15 +# define CONFIG_PAGING_LEVELS 3
   19.16 +#else
   19.17 +# define CONFIG_PAGING_LEVELS 2
   19.18 +#endif
   19.19 +
   19.20  #define CONFIG_X86 1
   19.21  #define CONFIG_X86_HT 1
   19.22  #define CONFIG_SHADOW 1
   19.23 @@ -189,7 +202,7 @@ extern unsigned long _end; /* standard E
   19.24   *  Per-domain mappings                                  ( 4MB)
   19.25   *  Shadow linear pagetable                              ( 4MB) ( 8MB)
   19.26   *  Guest linear pagetable                               ( 4MB) ( 8MB)
   19.27 - *  Machine-to-physical translation table [writable]     ( 4MB)
   19.28 + *  Machine-to-physical translation table [writable]     ( 4MB) (16MB)
   19.29   *  Frame-info table                                     (24MB) (96MB)
   19.30   *   * Start of guest inaccessible area
   19.31   *  Machine-to-physical translation table [read-only]    ( 4MB)
   19.32 @@ -203,8 +216,8 @@ extern unsigned long _end; /* standard E
   19.33  
   19.34  #ifdef CONFIG_X86_PAE
   19.35  # define LINEARPT_MBYTES         8
   19.36 -# define MACHPHYS_MBYTES         4 /* KAF: This needs to be bigger */
   19.37 -# define FRAMETABLE_MBYTES	96 /* 16 GB mem limit (total)      */
   19.38 +# define MACHPHYS_MBYTES        16 /* 1 MB needed per 1 GB memory */
   19.39 +# define FRAMETABLE_MBYTES (MACHPHYS_MBYTES * 6)
   19.40  #else
   19.41  # define LINEARPT_MBYTES         4
   19.42  # define MACHPHYS_MBYTES         4
   19.43 @@ -237,21 +250,21 @@ extern unsigned long _end; /* standard E
   19.44  #define GUEST_SEGMENT_MAX_ADDR  RO_MPT_VIRT_END
   19.45  
   19.46  #ifdef CONFIG_X86_PAE
   19.47 -/* Hypervisor owns top 144MB of virtual address space. */
   19.48 -# define __HYPERVISOR_VIRT_START  0xF7000000
   19.49 -# define HYPERVISOR_VIRT_START   (0xF7000000UL)
   19.50 +/* Hypervisor owns top 168MB of virtual address space. */
   19.51 +# define __HYPERVISOR_VIRT_START  0xF5800000
   19.52 +# define HYPERVISOR_VIRT_START   (0xF5800000UL)
   19.53  #else
   19.54  /* Hypervisor owns top 64MB of virtual address space. */
   19.55  # define __HYPERVISOR_VIRT_START  0xFC000000
   19.56  # define HYPERVISOR_VIRT_START   (0xFC000000UL)
   19.57  #endif
   19.58  
   19.59 -#define ROOT_PAGETABLE_FIRST_XEN_SLOT \
   19.60 +#define L2_PAGETABLE_FIRST_XEN_SLOT \
   19.61      (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT)
   19.62 -#define ROOT_PAGETABLE_LAST_XEN_SLOT  \
   19.63 +#define L2_PAGETABLE_LAST_XEN_SLOT  \
   19.64      (~0UL >> L2_PAGETABLE_SHIFT)
   19.65 -#define ROOT_PAGETABLE_XEN_SLOTS \
   19.66 -    (ROOT_PAGETABLE_LAST_XEN_SLOT - ROOT_PAGETABLE_FIRST_XEN_SLOT + 1)
   19.67 +#define L2_PAGETABLE_XEN_SLOTS \
   19.68 +    (L2_PAGETABLE_LAST_XEN_SLOT - L2_PAGETABLE_FIRST_XEN_SLOT + 1)
   19.69  
   19.70  #define PGT_base_page_table PGT_l2_page_table
   19.71  
    20.1 --- a/xen/include/asm-x86/domain.h	Tue May 31 15:20:43 2005 +0000
    20.2 +++ b/xen/include/asm-x86/domain.h	Tue May 31 15:39:28 2005 +0000
    20.3 @@ -119,12 +119,6 @@ struct arch_exec_domain
    20.4      unsigned long shadow_ldt_mapcnt;
    20.5  } __cacheline_aligned;
    20.6  
    20.7 -#define IDLE0_ARCH_EXEC_DOMAIN                                      \
    20.8 -{                                                                   \
    20.9 -    perdomain_ptes: 0,                                              \
   20.10 -    monitor_table:  mk_pagetable(__pa(idle_pg_table))               \
   20.11 -}
   20.12 -
   20.13  #endif /* __ASM_DOMAIN_H__ */
   20.14  
   20.15  /*
    21.1 --- a/xen/include/asm-x86/mm.h	Tue May 31 15:20:43 2005 +0000
    21.2 +++ b/xen/include/asm-x86/mm.h	Tue May 31 15:39:28 2005 +0000
    21.3 @@ -76,15 +76,15 @@ struct pfn_info
    21.4   /* Owning guest has pinned this page to its current type? */
    21.5  #define _PGT_pinned         27
    21.6  #define PGT_pinned          (1U<<_PGT_pinned)
    21.7 - /* The 10 most significant bits of virt address if this is a page table. */
    21.8 -#define PGT_va_shift        17
    21.9 -#define PGT_va_mask         (((1U<<10)-1)<<PGT_va_shift)
   21.10 + /* The 11 most significant bits of virt address if this is a page table. */
   21.11 +#define PGT_va_shift        16
   21.12 +#define PGT_va_mask         (((1U<<11)-1)<<PGT_va_shift)
   21.13   /* Is the back pointer still mutable (i.e. not fixed yet)? */
   21.14 -#define PGT_va_mutable      (((1U<<10)-1)<<PGT_va_shift)
   21.15 +#define PGT_va_mutable      (((1U<<11)-1)<<PGT_va_shift)
   21.16   /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
   21.17 -#define PGT_va_unknown      (((1U<<10)-2)<<PGT_va_shift)
   21.18 - /* 17-bit count of uses of this frame as its current type. */
   21.19 -#define PGT_count_mask      ((1U<<17)-1)
   21.20 +#define PGT_va_unknown      (((1U<<11)-2)<<PGT_va_shift)
   21.21 + /* 16-bit count of uses of this frame as its current type. */
   21.22 +#define PGT_count_mask      ((1U<<16)-1)
   21.23  
   21.24  #define PGT_mfn_mask        ((1U<<20)-1) /* mfn mask for shadow types */
   21.25  
    22.1 --- a/xen/include/asm-x86/page.h	Tue May 31 15:20:43 2005 +0000
    22.2 +++ b/xen/include/asm-x86/page.h	Tue May 31 15:39:28 2005 +0000
    22.3 @@ -8,23 +8,132 @@
    22.4  #define PAGE_SIZE           (1 << PAGE_SHIFT)
    22.5  #endif
    22.6  #define PAGE_MASK           (~(PAGE_SIZE-1))
    22.7 +#define PAGE_FLAG_MASK      (~0U)
    22.8 +
    22.9 +#ifndef __ASSEMBLY__
   22.10 +# include <asm/types.h>
   22.11 +#endif
   22.12  
   22.13  #if defined(__i386__)
   22.14 -#include <asm/x86_32/page.h>
   22.15 +# include <asm/x86_32/page.h>
   22.16  #elif defined(__x86_64__)
   22.17 -#include <asm/x86_64/page.h>
   22.18 +# include <asm/x86_64/page.h>
   22.19  #endif
   22.20  
   22.21 +/* Get pte contents as an integer (intpte_t). */
   22.22 +#define l1e_get_value(x)           ((x).l1)
   22.23 +#define l2e_get_value(x)           ((x).l2)
   22.24 +#define l3e_get_value(x)           ((x).l3)
   22.25 +#define l4e_get_value(x)           ((x).l4)
   22.26 +
   22.27 +/* Get pfn mapped by pte (unsigned long). */
   22.28 +#define l1e_get_pfn(x)             \
   22.29 +    ((unsigned long)(((x).l1 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT))
   22.30 +#define l2e_get_pfn(x)             \
   22.31 +    ((unsigned long)(((x).l2 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT))
   22.32 +#define l3e_get_pfn(x)             \
   22.33 +    ((unsigned long)(((x).l3 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT))
   22.34 +#define l4e_get_pfn(x)             \
   22.35 +    ((unsigned long)(((x).l4 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT))
   22.36 +
   22.37 +/* Get physical address of page mapped by pte (physaddr_t). */
   22.38 +#define l1e_get_phys(x)            \
   22.39 +    ((physaddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
   22.40 +#define l2e_get_phys(x)            \
   22.41 +    ((physaddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK))))
   22.42 +#define l3e_get_phys(x)            \
   22.43 +    ((physaddr_t)(((x).l3 & (PADDR_MASK&PAGE_MASK))))
   22.44 +#define l4e_get_phys(x)            \
   22.45 +    ((physaddr_t)(((x).l4 & (PADDR_MASK&PAGE_MASK))))
   22.46 +
   22.47 +/* Get pte access flags (unsigned int). */
   22.48 +#define l1e_get_flags(x)           (get_pte_flags((x).l1))
   22.49 +#define l2e_get_flags(x)           (get_pte_flags((x).l2))
   22.50 +#define l3e_get_flags(x)           (get_pte_flags((x).l3))
   22.51 +#define l4e_get_flags(x)           (get_pte_flags((x).l4))
   22.52 +
   22.53 +/* Construct an empty pte. */
   22.54 +#define l1e_empty()                ((l1_pgentry_t) { 0 })
   22.55 +#define l2e_empty()                ((l2_pgentry_t) { 0 })
   22.56 +#define l3e_empty()                ((l3_pgentry_t) { 0 })
   22.57 +#define l4e_empty()                ((l4_pgentry_t) { 0 })
   22.58 +
   22.59 +/* Construct a pte from a pfn and access flags. */
   22.60 +#define l1e_create_pfn(pfn, flags) \
   22.61 +    ((l1_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) })
   22.62 +#define l2e_create_pfn(pfn, flags) \
   22.63 +    ((l2_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) })
   22.64 +#define l3e_create_pfn(pfn, flags) \
   22.65 +    ((l3_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) })
   22.66 +#define l4e_create_pfn(pfn, flags) \
   22.67 +    ((l4_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) })
   22.68 +
   22.69 +/* Construct a pte from a physical address and access flags. */
   22.70 +#define l1e_create_phys(pa, flags) \
   22.71 +    ((l1_pgentry_t) { (pa) | put_pte_flags(flags) })
   22.72 +#define l2e_create_phys(pa, flags) \
   22.73 +    ((l2_pgentry_t) { (pa) | put_pte_flags(flags) })
   22.74 +#define l3e_create_phys(pa, flags) \
   22.75 +    ((l3_pgentry_t) { (pa) | put_pte_flags(flags) })
   22.76 +#define l4e_create_phys(pa, flags) \
   22.77 +    ((l4_pgentry_t) { (pa) | put_pte_flags(flags) })
   22.78 +
   22.79 +/* Add extra flags to an existing pte. */
   22.80 +#define l1e_add_flags(x, flags)    ((x)->l1 |= put_pte_flags(flags))
   22.81 +#define l2e_add_flags(x, flags)    ((x)->l2 |= put_pte_flags(flags))
   22.82 +#define l3e_add_flags(x, flags)    ((x)->l3 |= put_pte_flags(flags))
   22.83 +#define l4e_add_flags(x, flags)    ((x)->l4 |= put_pte_flags(flags))
   22.84 +
   22.85 +/* Remove flags from an existing pte. */
   22.86 +#define l1e_remove_flags(x, flags) ((x)->l1 &= ~put_pte_flags(flags))
   22.87 +#define l2e_remove_flags(x, flags) ((x)->l2 &= ~put_pte_flags(flags))
   22.88 +#define l3e_remove_flags(x, flags) ((x)->l3 &= ~put_pte_flags(flags))
   22.89 +#define l4e_remove_flags(x, flags) ((x)->l4 &= ~put_pte_flags(flags))
   22.90 +
   22.91 +/* Check if a pte's page mapping or significant access flags have changed. */
   22.92 +#define l1e_has_changed(x,y,flags) \
   22.93 +    ( !!(((x)->l1 ^ (y)->l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
   22.94 +#define l2e_has_changed(x,y,flags) \
   22.95 +    ( !!(((x)->l2 ^ (y)->l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
   22.96 +#define l3e_has_changed(x,y,flags) \
   22.97 +    ( !!(((x)->l3 ^ (y)->l3) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
   22.98 +#define l4e_has_changed(x,y,flags) \
   22.99 +    ( !!(((x)->l4 ^ (y)->l4) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
  22.100 +
  22.101 +/* Pagetable walking. */
  22.102 +#define l2e_to_l1e(x)              ((l1_pgentry_t *)__va(l2e_get_phys(x)))
  22.103 +#define l3e_to_l2e(x)              ((l2_pgentry_t *)__va(l3e_get_phys(x)))
  22.104 +#define l4e_to_l3e(x)              ((l3_pgentry_t *)__va(l4e_get_phys(x)))
  22.105 +
  22.106 +/* Given a virtual address, get an entry offset into a page table. */
  22.107 +#define l1_table_offset(a)         \
  22.108 +    (((a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
  22.109 +#define l2_table_offset(a)         \
  22.110 +    (((a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
  22.111 +#define l3_table_offset(a)         \
  22.112 +    (((a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
  22.113 +#define l4_table_offset(a)         \
  22.114 +    (((a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
  22.115 +
  22.116  /* Convert a pointer to a page-table entry into pagetable slot index. */
  22.117  #define pgentry_ptr_to_slot(_p) \
  22.118      (((unsigned long)(_p) & ~PAGE_MASK) / sizeof(*(_p)))
  22.119  
  22.120  /* Page-table type. */
  22.121  #ifndef __ASSEMBLY__
  22.122 -typedef struct { unsigned long pt_lo; } pagetable_t;
  22.123 -#define pagetable_val(_x)   ((_x).pt_lo)
  22.124 -#define pagetable_get_pfn(_x) ((_x).pt_lo >> PAGE_SHIFT)
  22.125 -#define mk_pagetable(_x)    ( (pagetable_t) { (_x) } )
  22.126 +#if CONFIG_PAGING_LEVELS == 2
  22.127 +/* x86_32 default */
  22.128 +typedef struct { u32 pfn; } pagetable_t;
  22.129 +#elif CONFIG_PAGING_LEVELS == 3
  22.130 +/* x86_32 PAE */
  22.131 +typedef struct { u32 pfn; } pagetable_t;
  22.132 +#elif CONFIG_PAGING_LEVELS == 4
  22.133 +/* x86_64 */
  22.134 +typedef struct { u64 pfn; } pagetable_t;
  22.135 +#endif
  22.136 +#define pagetable_get_phys(_x) ((physaddr_t)(_x).pfn << PAGE_SHIFT)
  22.137 +#define pagetable_get_pfn(_x)  ((_x).pfn)
  22.138 +#define mk_pagetable(_phys)    ({ pagetable_t __p; __p.pfn = _phys >> PAGE_SHIFT; __p; })
  22.139  #endif
  22.140  
  22.141  #define clear_page(_p)      memset((void *)(_p), 0, PAGE_SIZE)
  22.142 @@ -49,6 +158,7 @@ typedef struct { unsigned long pt_lo; } 
  22.143  #define l4e_create_page(_x,_y) (l4e_create_pfn(page_to_pfn(_x),(_y)))
  22.144  
  22.145  /* High table entries are reserved by the hypervisor. */
  22.146 +/* FIXME: this breaks with PAE -- kraxel */
  22.147  #define DOMAIN_ENTRIES_PER_L2_PAGETABLE     \
  22.148    (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT)
  22.149  #define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \
  22.150 @@ -78,7 +188,14 @@ typedef struct { unsigned long pt_lo; } 
  22.151      (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
  22.152  
  22.153  #ifndef __ASSEMBLY__
  22.154 +#if CONFIG_PAGING_LEVELS == 3
  22.155  extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
  22.156 +extern l3_pgentry_t   idle_pg_table_l3[ROOT_PAGETABLE_ENTRIES];
  22.157 +extern l2_pgentry_t   idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES*L2_PAGETABLE_ENTRIES];
  22.158 +#else
  22.159 +extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
  22.160 +extern l2_pgentry_t   idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES];
  22.161 +#endif
  22.162  extern void paging_init(void);
  22.163  #endif
  22.164  
  22.165 @@ -96,17 +213,17 @@ extern void paging_init(void);
  22.166              : : "r" (mmu_cr4_features) );                               \
  22.167      } while ( 0 )
  22.168  
  22.169 -#define _PAGE_PRESENT  0x001UL
  22.170 -#define _PAGE_RW       0x002UL
  22.171 -#define _PAGE_USER     0x004UL
  22.172 -#define _PAGE_PWT      0x008UL
  22.173 -#define _PAGE_PCD      0x010UL
  22.174 -#define _PAGE_ACCESSED 0x020UL
  22.175 -#define _PAGE_DIRTY    0x040UL
  22.176 -#define _PAGE_PAT      0x080UL
  22.177 -#define _PAGE_PSE      0x080UL
  22.178 -#define _PAGE_GLOBAL   0x100UL
  22.179 -#define _PAGE_AVAIL    0xe00UL
  22.180 +#define _PAGE_PRESENT  0x001U
  22.181 +#define _PAGE_RW       0x002U
  22.182 +#define _PAGE_USER     0x004U
  22.183 +#define _PAGE_PWT      0x008U
  22.184 +#define _PAGE_PCD      0x010U
  22.185 +#define _PAGE_ACCESSED 0x020U
  22.186 +#define _PAGE_DIRTY    0x040U
  22.187 +#define _PAGE_PAT      0x080U
  22.188 +#define _PAGE_PSE      0x080U
  22.189 +#define _PAGE_GLOBAL   0x100U
  22.190 +#define _PAGE_AVAIL    0xE00U
  22.191  
  22.192  #define __PAGE_HYPERVISOR \
  22.193      (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
    23.1 --- a/xen/include/asm-x86/shadow.h	Tue May 31 15:20:43 2005 +0000
    23.2 +++ b/xen/include/asm-x86/shadow.h	Tue May 31 15:39:28 2005 +0000
    23.3 @@ -130,10 +130,12 @@ extern void shadow_l1_normal_pt_update(s
    23.4  extern void shadow_l2_normal_pt_update(struct domain *d,
    23.5                                         unsigned long pa, l2_pgentry_t l2e,
    23.6                                         struct map_dom_mem_cache *cache);
    23.7 -#ifdef __x86_64__
    23.8 +#if CONFIG_PAGING_LEVELS >= 3
    23.9  extern void shadow_l3_normal_pt_update(struct domain *d,
   23.10                                         unsigned long pa, l3_pgentry_t l3e,
   23.11                                         struct map_dom_mem_cache *cache);
   23.12 +#endif
   23.13 +#if CONFIG_PAGING_LEVELS >= 4
   23.14  extern void shadow_l4_normal_pt_update(struct domain *d,
   23.15                                         unsigned long pa, l4_pgentry_t l4e,
   23.16                                         struct map_dom_mem_cache *cache);
   23.17 @@ -1682,7 +1684,7 @@ static inline void update_pagetables(str
   23.18          // HACK ALERT: there's currently no easy way to figure out if a domU
   23.19          // has set its arch.guest_table to zero, vs not yet initialized it.
   23.20          //
   23.21 -        paging_enabled = !!pagetable_val(ed->arch.guest_table);
   23.22 +        paging_enabled = !!pagetable_get_phys(ed->arch.guest_table);
   23.23  
   23.24      /*
   23.25       * We don't call __update_pagetables() when vmx guest paging is
    24.1 --- a/xen/include/asm-x86/smp.h	Tue May 31 15:20:43 2005 +0000
    24.2 +++ b/xen/include/asm-x86/smp.h	Tue May 31 15:39:28 2005 +0000
    24.3 @@ -39,7 +39,12 @@ extern cpumask_t cpu_sibling_map[];
    24.4  extern void smp_flush_tlb(void);
    24.5  extern void smp_invalidate_rcv(void);		/* Process an NMI */
    24.6  extern void (*mtrr_hook) (void);
    24.7 -extern void zap_low_mappings (void);
    24.8 +
    24.9 +#ifdef CONFIG_X86_64
   24.10 +extern void zap_low_mappings(void);
   24.11 +#else
   24.12 +extern void zap_low_mappings(l2_pgentry_t *base);
   24.13 +#endif
   24.14  
   24.15  #define MAX_APICID 256
   24.16  extern u8 x86_cpu_to_apicid[];
    25.1 --- a/xen/include/asm-x86/types.h	Tue May 31 15:20:43 2005 +0000
    25.2 +++ b/xen/include/asm-x86/types.h	Tue May 31 15:39:28 2005 +0000
    25.3 @@ -44,11 +44,17 @@ typedef signed long long s64;
    25.4  typedef unsigned long long u64;
    25.5  #define BITS_PER_LONG 32
    25.6  typedef unsigned int size_t;
    25.7 +#if defined(CONFIG_X86_PAE)
    25.8 +typedef u64 physaddr_t;
    25.9 +#else
   25.10 +typedef u32 physaddr_t;
   25.11 +#endif
   25.12  #elif defined(__x86_64__)
   25.13  typedef signed long s64;
   25.14  typedef unsigned long u64;
   25.15  #define BITS_PER_LONG 64
   25.16  typedef unsigned long size_t;
   25.17 +typedef u64 physaddr_t;
   25.18  #endif
   25.19  
   25.20  /* DMA addresses come in generic and 64-bit flavours.  */
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/xen/include/asm-x86/x86_32/page-2level.h	Tue May 31 15:39:28 2005 +0000
    26.3 @@ -0,0 +1,49 @@
    26.4 +#ifndef __X86_32_PAGE_2L_H__
    26.5 +#define __X86_32_PAGE_2L_H__
    26.6 +
    26.7 +#define L1_PAGETABLE_SHIFT      12
    26.8 +#define L2_PAGETABLE_SHIFT      22
    26.9 +#define PAGE_SHIFT              L1_PAGETABLE_SHIFT
   26.10 +#define ROOT_PAGETABLE_SHIFT    L2_PAGETABLE_SHIFT
   26.11 +
   26.12 +#define PAGETABLE_ORDER         10
   26.13 +#define L1_PAGETABLE_ENTRIES    (1<<PAGETABLE_ORDER)
   26.14 +#define L2_PAGETABLE_ENTRIES    (1<<PAGETABLE_ORDER)
   26.15 +#define ROOT_PAGETABLE_ENTRIES  L2_PAGETABLE_ENTRIES
   26.16 +
   26.17 +#define PADDR_BITS              32
   26.18 +#define PADDR_MASK              (~0UL)
   26.19 +
   26.20 +#ifndef __ASSEMBLY__
   26.21 +
   26.22 +#include <asm/types.h>
   26.23 +
   26.24 +/* read access (should only be used for debug printk's) */
   26.25 +typedef u32 intpte_t;
   26.26 +#define PRIpte "08x"
   26.27 +
   26.28 +typedef struct { intpte_t l1; } l1_pgentry_t;
   26.29 +typedef struct { intpte_t l2; } l2_pgentry_t;
   26.30 +typedef l2_pgentry_t root_pgentry_t;
   26.31 +
   26.32 +#endif /* !__ASSEMBLY__ */
   26.33 +
   26.34 +/* root table */
   26.35 +#define root_get_pfn              l2e_get_pfn
   26.36 +#define root_get_flags            l2e_get_flags
   26.37 +#define root_get_value            l2e_get_value
   26.38 +#define root_empty                l2e_empty
   26.39 +#define root_create_phys          l2e_create_phys
   26.40 +#define PGT_root_page_table       PGT_l2_page_table
   26.41 +
   26.42 +/* misc */
   26.43 +#define is_guest_l1_slot(_s)    (1)
   26.44 +#define is_guest_l2_slot(_t,_s) ((_s) < L2_PAGETABLE_FIRST_XEN_SLOT)
   26.45 +
   26.46 +#define get_pte_flags(x) ((int)(x) & 0xFFF)
   26.47 +#define put_pte_flags(x) ((intpte_t)(x))
   26.48 +
   26.49 +#define L1_DISALLOW_MASK (0xFFFFF180U) /* PAT/GLOBAL */
   26.50 +#define L2_DISALLOW_MASK (0xFFFFF180U) /* PSE/GLOBAL */
   26.51 +
   26.52 +#endif /* __X86_32_PAGE_2L_H__ */
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/xen/include/asm-x86/x86_32/page-3level.h	Tue May 31 15:39:28 2005 +0000
    27.3 @@ -0,0 +1,56 @@
    27.4 +#ifndef __X86_32_PAGE_3L_H__
    27.5 +#define __X86_32_PAGE_3L_H__
    27.6 +
    27.7 +#define L1_PAGETABLE_SHIFT      12
    27.8 +#define L2_PAGETABLE_SHIFT      21
    27.9 +#define L3_PAGETABLE_SHIFT      30
   27.10 +#define PAGE_SHIFT              L1_PAGETABLE_SHIFT
   27.11 +#define ROOT_PAGETABLE_SHIFT    L3_PAGETABLE_SHIFT
   27.12 +
   27.13 +#define PAGETABLE_ORDER         9
   27.14 +#define L1_PAGETABLE_ENTRIES    (1<<PAGETABLE_ORDER)
   27.15 +#define L2_PAGETABLE_ENTRIES    (1<<PAGETABLE_ORDER)
   27.16 +#define L3_PAGETABLE_ENTRIES    4
   27.17 +#define ROOT_PAGETABLE_ENTRIES  L3_PAGETABLE_ENTRIES
   27.18 +
   27.19 +#define PADDR_BITS              52
   27.20 +#define PADDR_MASK              ((1ULL << PADDR_BITS)-1)
   27.21 +
   27.22 +#ifndef __ASSEMBLY__
   27.23 +
   27.24 +#include <asm/types.h>
   27.25 +
   27.26 +/* read access (should only be used for debug printk's) */
   27.27 +typedef u64 intpte_t;
   27.28 +#define PRIpte "016llx"
   27.29 +
   27.30 +typedef struct { intpte_t l1; } l1_pgentry_t;
   27.31 +typedef struct { intpte_t l2; } l2_pgentry_t;
   27.32 +typedef struct { intpte_t l3; } l3_pgentry_t;
   27.33 +typedef l3_pgentry_t root_pgentry_t;
   27.34 +
   27.35 +#endif /* !__ASSEMBLY__ */
   27.36 +
   27.37 +/* root table */
   27.38 +#define root_get_pfn              l3e_get_pfn
   27.39 +#define root_get_flags            l3e_get_flags
   27.40 +#define root_get_value            l3e_get_value
   27.41 +#define root_empty                l3e_empty
   27.42 +#define root_init_phys            l3e_create_phys
   27.43 +#define PGT_root_page_table       PGT_l3_page_table
   27.44 +
   27.45 +/* misc */
   27.46 +#define is_guest_l1_slot(_s)    (1)
   27.47 +#define is_guest_l2_slot(_t,_s) \
   27.48 +    ((3 != (((_t) & PGT_va_mask) >> PGT_va_shift)) || \
   27.49 +     ((_s) < (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1))))
   27.50 +#define is_guest_l3_slot(_s)    (1)
   27.51 +
   27.52 +#define get_pte_flags(x) ((int)((x) >> 40) | ((int)(x) & 0xFFF))
   27.53 +#define put_pte_flags(x) ((((intpte_t)((x) & ~0xFFF)) << 40) | ((x) & 0xFFF))
   27.54 +
   27.55 +#define L1_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PAT/GLOBAL */
   27.56 +#define L2_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PSE/GLOBAL */
   27.57 +#define L3_DISALLOW_MASK (0xFFFFF1E6U)             /* must-be-zero */
   27.58 +
   27.59 +#endif /* __X86_32_PAGE_3L_H__ */
    28.1 --- a/xen/include/asm-x86/x86_32/page.h	Tue May 31 15:20:43 2005 +0000
    28.2 +++ b/xen/include/asm-x86/x86_32/page.h	Tue May 31 15:39:28 2005 +0000
    28.3 @@ -2,134 +2,23 @@
    28.4  #ifndef __X86_32_PAGE_H__
    28.5  #define __X86_32_PAGE_H__
    28.6  
    28.7 -#define L1_PAGETABLE_SHIFT      12
    28.8 -#define L2_PAGETABLE_SHIFT      22
    28.9 -#define PAGE_SHIFT              L1_PAGETABLE_SHIFT
   28.10 -#define ROOT_PAGETABLE_SHIFT    L2_PAGETABLE_SHIFT
   28.11 -
   28.12 -#define PAGETABLE_ORDER         10
   28.13 -#define L1_PAGETABLE_ENTRIES    (1<<PAGETABLE_ORDER)
   28.14 -#define L2_PAGETABLE_ENTRIES    (1<<PAGETABLE_ORDER)
   28.15 -#define ROOT_PAGETABLE_ENTRIES  L2_PAGETABLE_ENTRIES
   28.16 -
   28.17  #define __PAGE_OFFSET           (0xFF000000)
   28.18  
   28.19 -#define PADDR_BITS              32
   28.20  #define VADDR_BITS              32
   28.21 -#define PADDR_MASK              (~0UL)
   28.22  #define VADDR_MASK              (~0UL)
   28.23  
   28.24 -#define _PAGE_NX                0UL
   28.25 -#define PAGE_FLAG_MASK          0xfff
   28.26 -
   28.27 -#ifndef __ASSEMBLY__
   28.28 -#include <xen/config.h>
   28.29 -#include <asm/types.h>
   28.30 -typedef struct { u32 l1_lo; } l1_pgentry_t;
   28.31 -typedef struct { u32 l2_lo; } l2_pgentry_t;
   28.32 -typedef l2_pgentry_t root_pgentry_t;
   28.33 -
   28.34 -/* read access (deprecated) */
   28.35 -#define l1e_get_value(_x)      ((unsigned long)((_x).l1_lo))
   28.36 -#define l2e_get_value(_x)      ((unsigned long)((_x).l2_lo))
   28.37 -
   28.38 -/* read access */
   28.39 -#define l1e_get_pfn(_x)        ((unsigned long)((_x).l1_lo >> PAGE_SHIFT))
   28.40 -#define l1e_get_phys(_x)       ((unsigned long)((_x).l1_lo &  PAGE_MASK))
   28.41 -#define l1e_get_flags(_x)      ((unsigned long)((_x).l1_lo &  PAGE_FLAG_MASK))
   28.42 -
   28.43 -#define l2e_get_pfn(_x)        ((unsigned long)((_x).l2_lo >> PAGE_SHIFT))
   28.44 -#define l2e_get_phys(_x)       ((unsigned long)((_x).l2_lo &  PAGE_MASK))
   28.45 -#define l2e_get_flags(_x)      ((unsigned long)((_x).l2_lo &  PAGE_FLAG_MASK))
   28.46 +#define _PAGE_NX                0U
   28.47  
   28.48 -/* write access */
   28.49 -static inline l1_pgentry_t l1e_empty(void)
   28.50 -{
   28.51 -    l1_pgentry_t e = { .l1_lo = 0 };
   28.52 -    return e;
   28.53 -}
   28.54 -static inline l1_pgentry_t l1e_create_pfn(u32 pfn, u32 flags)
   28.55 -{
   28.56 -    l1_pgentry_t e = { .l1_lo = (pfn << PAGE_SHIFT) | flags };
   28.57 -    return e;
   28.58 -}
   28.59 -static inline l1_pgentry_t l1e_create_phys(u32 addr, u32 flags)
   28.60 -{
   28.61 -    l1_pgentry_t e = { .l1_lo = (addr & PAGE_MASK) | flags };
   28.62 -    return e;
   28.63 -}
   28.64 -static inline void l1e_add_flags(l1_pgentry_t *e, u32 flags)
   28.65 -{
   28.66 -    e->l1_lo |= flags;
   28.67 -}
   28.68 -static inline void l1e_remove_flags(l1_pgentry_t *e, u32 flags)
   28.69 -{
   28.70 -    e->l1_lo &= ~flags;
   28.71 -}
   28.72 -
   28.73 -static inline l2_pgentry_t l2e_empty(void)
   28.74 -{
   28.75 -    l2_pgentry_t e = { .l2_lo = 0 };
   28.76 -    return e;
   28.77 -}
   28.78 -static inline l2_pgentry_t l2e_create_pfn(u32 pfn, u32 flags)
   28.79 -{
   28.80 -    l2_pgentry_t e = { .l2_lo = (pfn << PAGE_SHIFT) | flags };
   28.81 -    return e;
   28.82 -}
   28.83 -static inline l2_pgentry_t l2e_create_phys(u32 addr, u32 flags)
   28.84 -{
   28.85 -    l2_pgentry_t e = { .l2_lo = (addr & PAGE_MASK) | flags };
   28.86 -    return e;
   28.87 -}
   28.88 -static inline void l2e_add_flags(l2_pgentry_t *e, u32 flags)
   28.89 -{
   28.90 -    e->l2_lo |= flags;
   28.91 -}
   28.92 -static inline void l2e_remove_flags(l2_pgentry_t *e, u32 flags)
   28.93 -{
   28.94 -    e->l2_lo &= ~flags;
   28.95 -}
   28.96 -
   28.97 -/* check entries */
   28.98 -static inline int l1e_has_changed(l1_pgentry_t *e1, l1_pgentry_t *e2, u32 flags)
   28.99 -{
  28.100 -    return ((e1->l1_lo ^ e2->l1_lo) & (PAGE_MASK | flags)) != 0;
  28.101 -}
  28.102 -static inline int l2e_has_changed(l2_pgentry_t *e1, l2_pgentry_t *e2, u32 flags)
  28.103 -{
  28.104 -    return ((e1->l2_lo ^ e2->l2_lo) & (PAGE_MASK | flags)) != 0;
  28.105 -}
  28.106 -
  28.107 -#endif /* !__ASSEMBLY__ */
  28.108 -
  28.109 -/* Pagetable walking. */
  28.110 -#define l2e_to_l1e(_x) \
  28.111 -  ((l1_pgentry_t *)__va(l2e_get_phys(_x)))
  28.112 -
  28.113 -/* Given a virtual address, get an entry offset into a page table. */
  28.114 -#define l1_table_offset(_a) \
  28.115 -  (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
  28.116 -#define l2_table_offset(_a) \
  28.117 -  ((_a) >> L2_PAGETABLE_SHIFT)
  28.118 +#include <xen/config.h>
  28.119 +#ifdef CONFIG_X86_PAE
  28.120 +# include <asm/x86_32/page-3level.h>
  28.121 +#else
  28.122 +# include <asm/x86_32/page-2level.h>
  28.123 +#endif
  28.124  
  28.125  /* Given a virtual address, get an entry offset into a linear page table. */
  28.126 -#define l1_linear_offset(_a) ((_a) >> PAGE_SHIFT)
  28.127 -
  28.128 -#define is_guest_l1_slot(_s) (1)
  28.129 -#define is_guest_l2_slot(_s) ((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT)
  28.130 -
  28.131 -#define root_get_pfn              l2e_get_pfn
  28.132 -#define root_get_flags            l2e_get_flags
  28.133 -#define root_get_value            l2e_get_value
  28.134 -#define root_empty                l2e_empty
  28.135 -#define root_create_phys          l2e_create_phys
  28.136 -#define PGT_root_page_table       PGT_l2_page_table
  28.137 -
  28.138 -#define L1_DISALLOW_MASK (3UL << 7)
  28.139 -#define L2_DISALLOW_MASK (7UL << 7)
  28.140 -#define L3_DISALLOW_MASK (7UL << 7)
  28.141 -#define L4_DISALLOW_MASK (7UL << 7)
  28.142 +#define l1_linear_offset(_a) ((_a) >> L1_PAGETABLE_SHIFT)
  28.143 +#define l2_linear_offset(_a) ((_a) >> L2_PAGETABLE_SHIFT)
  28.144  
  28.145  #endif /* __X86_32_PAGE_H__ */
  28.146  
    29.1 --- a/xen/include/asm-x86/x86_64/page.h	Tue May 31 15:20:43 2005 +0000
    29.2 +++ b/xen/include/asm-x86/x86_64/page.h	Tue May 31 15:39:28 2005 +0000
    29.3 @@ -24,181 +24,28 @@
    29.4  #define PADDR_MASK              ((1UL << PADDR_BITS)-1)
    29.5  #define VADDR_MASK              ((1UL << VADDR_BITS)-1)
    29.6  
    29.7 -#define _PAGE_NX                (cpu_has_nx ? (1UL<<63) : 0UL)
    29.8 -#define PAGE_FLAG_MASK          0xfff
    29.9 +#ifndef __ASSEMBLY__
   29.10  
   29.11 -#ifndef __ASSEMBLY__
   29.12  #include <xen/config.h>
   29.13  #include <asm/types.h>
   29.14 -typedef struct { u64 l1_lo; } l1_pgentry_t;
   29.15 -typedef struct { u64 l2_lo; } l2_pgentry_t;
   29.16 -typedef struct { u64 l3_lo; } l3_pgentry_t;
   29.17 -typedef struct { u64 l4_lo; } l4_pgentry_t;
   29.18 +
   29.19 +/* read access (should only be used for debug printk's) */
   29.20 +typedef u64 intpte_t;
   29.21 +#define PRIpte "016lx"
   29.22 +
   29.23 +typedef struct { intpte_t l1; } l1_pgentry_t;
   29.24 +typedef struct { intpte_t l2; } l2_pgentry_t;
   29.25 +typedef struct { intpte_t l3; } l3_pgentry_t;
   29.26 +typedef struct { intpte_t l4; } l4_pgentry_t;
   29.27  typedef l4_pgentry_t root_pgentry_t;
   29.28  
   29.29 -/* read access (depricated) */
   29.30 -#define l1e_get_value(_x)         ((_x).l1_lo)
   29.31 -#define l2e_get_value(_x)         ((_x).l2_lo)
   29.32 -#define l3e_get_value(_x)         ((_x).l3_lo)
   29.33 -#define l4e_get_value(_x)         ((_x).l4_lo)
   29.34 -
   29.35 -/* read access */
   29.36 -#define l1e_get_pfn(_x)           (((_x).l1_lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)
   29.37 -#define l1e_get_phys(_x)          (((_x).l1_lo & (PADDR_MASK&PAGE_MASK)))
   29.38 -#define l1e_get_flags(_x)         ((_x).l1_lo  &  PAGE_FLAG_MASK)
   29.39 -
   29.40 -#define l2e_get_pfn(_x)           (((_x).l2_lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)
   29.41 -#define l2e_get_phys(_x)          (((_x).l2_lo & (PADDR_MASK&PAGE_MASK)))
   29.42 -#define l2e_get_flags(_x)         ((_x).l2_lo  &  PAGE_FLAG_MASK)
   29.43 -
   29.44 -#define l3e_get_pfn(_x)           (((_x).l3_lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)
   29.45 -#define l3e_get_phys(_x)          (((_x).l3_lo & (PADDR_MASK&PAGE_MASK)))
   29.46 -#define l3e_get_flags(_x)         ((_x).l3_lo  &  PAGE_FLAG_MASK)
   29.47 -
   29.48 -#define l4e_get_pfn(_x)           (((_x).l4_lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)
   29.49 -#define l4e_get_phys(_x)          (((_x).l4_lo & (PADDR_MASK&PAGE_MASK)))
   29.50 -#define l4e_get_flags(_x)         ((_x).l4_lo  &  PAGE_FLAG_MASK)
   29.51 -
   29.52 -/* write access */
   29.53 -static inline l1_pgentry_t l1e_empty(void)
   29.54 -{
   29.55 -    l1_pgentry_t e = { .l1_lo = 0 };
   29.56 -    return e;
   29.57 -}
   29.58 -static inline l1_pgentry_t l1e_create_pfn(u64 pfn, u64 flags)
   29.59 -{
   29.60 -    l1_pgentry_t e = { .l1_lo = (pfn << PAGE_SHIFT) | flags };
   29.61 -    return e;
   29.62 -}
   29.63 -static inline l1_pgentry_t l1e_create_phys(u64 addr, u64 flags)
   29.64 -{
   29.65 -    l1_pgentry_t e = { .l1_lo = (addr & (PADDR_MASK&PAGE_MASK)) | flags };
   29.66 -    return e;
   29.67 -}
   29.68 -static inline void l1e_add_flags(l1_pgentry_t *e, u64 flags)
   29.69 -{
   29.70 -    e->l1_lo |= flags;
   29.71 -}
   29.72 -static inline void l1e_remove_flags(l1_pgentry_t *e, u64 flags)
   29.73 -{
   29.74 -    e->l1_lo &= ~flags;
   29.75 -}
   29.76 -
   29.77 -static inline l2_pgentry_t l2e_empty(void)
   29.78 -{
   29.79 -    l2_pgentry_t e = { .l2_lo = 0 };
   29.80 -    return e;
   29.81 -}
   29.82 -static inline l2_pgentry_t l2e_create_pfn(u64 pfn, u64 flags)
   29.83 -{
   29.84 -    l2_pgentry_t e = { .l2_lo = (pfn << PAGE_SHIFT) | flags };
   29.85 -    return e;
   29.86 -}
   29.87 -static inline l2_pgentry_t l2e_create_phys(u64 addr, u64 flags)
   29.88 -{
   29.89 -    l2_pgentry_t e = { .l2_lo = (addr & (PADDR_MASK&PAGE_MASK)) | flags };
   29.90 -    return e;
   29.91 -}
   29.92 -static inline void l2e_add_flags(l2_pgentry_t *e, u64 flags)
   29.93 -{
   29.94 -    e->l2_lo |= flags;
   29.95 -}
   29.96 -static inline void l2e_remove_flags(l2_pgentry_t *e, u64 flags)
   29.97 -{
   29.98 -    e->l2_lo &= ~flags;
   29.99 -}
  29.100 -
  29.101 -static inline l3_pgentry_t l3e_empty(void)
  29.102 -{
  29.103 -    l3_pgentry_t e = { .l3_lo = 0 };
  29.104 -    return e;
  29.105 -}
  29.106 -static inline l3_pgentry_t l3e_create_pfn(u64 pfn, u64 flags)
  29.107 -{
  29.108 -    l3_pgentry_t e = { .l3_lo = (pfn << PAGE_SHIFT) | flags };
  29.109 -    return e;
  29.110 -}
  29.111 -static inline l3_pgentry_t l3e_create_phys(u64 addr, u64 flags)
  29.112 -{
  29.113 -    l3_pgentry_t e = { .l3_lo = (addr & (PADDR_MASK&PAGE_MASK)) | flags };
  29.114 -    return e;
  29.115 -}
  29.116 -static inline void l3e_add_flags(l3_pgentry_t *e, u64 flags)
  29.117 -{
  29.118 -    e->l3_lo |= flags;
  29.119 -}
  29.120 -static inline void l3e_remove_flags(l3_pgentry_t *e, u64 flags)
  29.121 -{
  29.122 -    e->l3_lo &= ~flags;
  29.123 -}
  29.124 -
  29.125 -static inline l4_pgentry_t l4e_empty(void)
  29.126 -{
  29.127 -    l4_pgentry_t e = { .l4_lo = 0 };
  29.128 -    return e;
  29.129 -}
  29.130 -static inline l4_pgentry_t l4e_create_pfn(u64 pfn, u64 flags)
  29.131 -{
  29.132 -    l4_pgentry_t e = { .l4_lo = (pfn << PAGE_SHIFT) | flags };
  29.133 -    return e;
  29.134 -}
  29.135 -static inline l4_pgentry_t l4e_create_phys(u64 addr, u64 flags)
  29.136 -{
  29.137 -    l4_pgentry_t e = { .l4_lo = (addr & (PADDR_MASK&PAGE_MASK)) | flags };
  29.138 -    return e;
  29.139 -}
  29.140 -static inline void l4e_add_flags(l4_pgentry_t *e, u64 flags)
  29.141 -{
  29.142 -    e->l4_lo |= flags;
  29.143 -}
  29.144 -static inline void l4e_remove_flags(l4_pgentry_t *e, u64 flags)
  29.145 -{
  29.146 -    e->l4_lo &= ~flags;
  29.147 -}
  29.148 -
  29.149 -/* check entries */
  29.150 -static inline int l1e_has_changed(l1_pgentry_t *e1, l1_pgentry_t *e2, u32 flags)
  29.151 -{
  29.152 -    return ((e1->l1_lo ^ e2->l1_lo) & ((PADDR_MASK&PAGE_MASK) | flags)) != 0;
  29.153 -}
  29.154 -static inline int l2e_has_changed(l2_pgentry_t *e1, l2_pgentry_t *e2, u32 flags)
  29.155 -{
  29.156 -    return ((e1->l2_lo ^ e2->l2_lo) & ((PADDR_MASK&PAGE_MASK) | flags)) != 0;
  29.157 -}
  29.158 -static inline int l3e_has_changed(l3_pgentry_t *e1, l3_pgentry_t *e2, u32 flags)
  29.159 -{
  29.160 -    return ((e1->l3_lo ^ e2->l3_lo) & ((PADDR_MASK&PAGE_MASK) | flags)) != 0;
  29.161 -}
  29.162 -static inline int l4e_has_changed(l4_pgentry_t *e1, l4_pgentry_t *e2, u32 flags)
  29.163 -{
  29.164 -    return ((e1->l4_lo ^ e2->l4_lo) & ((PADDR_MASK&PAGE_MASK) | flags)) != 0;
  29.165 -}
  29.166 -
  29.167  #endif /* !__ASSEMBLY__ */
  29.168  
  29.169 -/* Pagetable walking. */
  29.170 -#define l2e_to_l1e(_x) \
  29.171 -  ((l1_pgentry_t *)__va(l2e_get_phys(_x)))
  29.172 -#define l3e_to_l2e(_x) \
  29.173 -  ((l2_pgentry_t *)__va(l3e_get_phys(_x)))
  29.174 -#define l4e_to_l3e(_x) \
  29.175 -  ((l3_pgentry_t *)__va(l4e_get_phys(_x)))
  29.176 -
  29.177 -/* Given a virtual address, get an entry offset into a page table. */
  29.178 -#define l1_table_offset(_a) \
  29.179 -  (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
  29.180 -#define l2_table_offset(_a) \
  29.181 -  (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
  29.182 -#define l3_table_offset(_a) \
  29.183 -  (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
  29.184 -#define l4_table_offset(_a) \
  29.185 -  (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
  29.186 -
  29.187  /* Given a virtual address, get an entry offset into a linear page table. */
  29.188  #define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> PAGE_SHIFT)
  29.189  
  29.190  #define is_guest_l1_slot(_s) (1)
  29.191 -#define is_guest_l2_slot(_s) (1)
  29.192 +#define is_guest_l2_slot(_t, _s) (1)
  29.193  #define is_guest_l3_slot(_s) (1)
  29.194  #define is_guest_l4_slot(_s)                   \
  29.195      (((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) || \
  29.196 @@ -211,10 +58,15 @@ static inline int l4e_has_changed(l4_pge
  29.197  #define root_create_phys          l4e_create_phys
  29.198  #define PGT_root_page_table PGT_l4_page_table
  29.199  
  29.200 -#define L1_DISALLOW_MASK ((cpu_has_nx?0:(1UL<<63)) | (3UL << 7))
  29.201 -#define L2_DISALLOW_MASK ((cpu_has_nx?0:(1UL<<63)) | (7UL << 7))
  29.202 -#define L3_DISALLOW_MASK ((cpu_has_nx?0:(1UL<<63)) | (7UL << 7))
  29.203 -#define L4_DISALLOW_MASK ((cpu_has_nx?0:(1UL<<63)) | (7UL << 7))
  29.204 +#define get_pte_flags(x) ((int)((x) >> 40) | ((int)(x) & 0xFFF))
  29.205 +#define put_pte_flags(x) ((((intpte_t)((x) & ~0xFFF)) << 40) | ((x) & 0xFFF))
  29.206 +
  29.207 +#define _PAGE_NX                (cpu_has_nx ? (1U<<23) : 0U)
  29.208 +
  29.209 +#define L1_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PAT/GLOBAL */
  29.210 +#define L2_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PSE/GLOBAL */
  29.211 +#define L3_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* must-be-zero */
  29.212 +#define L4_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* must-be-zero */
  29.213  
  29.214  #endif /* __X86_64_PAGE_H__ */
  29.215  
    30.1 --- a/xen/include/public/arch-x86_32.h	Tue May 31 15:20:43 2005 +0000
    30.2 +++ b/xen/include/public/arch-x86_32.h	Tue May 31 15:39:28 2005 +0000
    30.3 @@ -64,7 +64,11 @@
    30.4   * Virtual addresses beyond this are not modifiable by guest OSes. The 
    30.5   * machine->physical mapping table starts at this address, read-only.
    30.6   */
    30.7 -#define HYPERVISOR_VIRT_START (0xFC000000UL)
    30.8 +#ifdef CONFIG_X86_PAE
    30.9 +# define HYPERVISOR_VIRT_START (0xF5800000UL)
   30.10 +#else
   30.11 +# define HYPERVISOR_VIRT_START (0xFC000000UL)
   30.12 +#endif
   30.13  #ifndef machine_to_phys_mapping
   30.14  #define machine_to_phys_mapping ((u32 *)HYPERVISOR_VIRT_START)
   30.15  #endif