ia64/xen-unstable

changeset 8770:f030f4b565a5

Allows x86_32 PAE Xen to run VMX domains (2-level guest page
tables). To support >4GB machines, we use PAE mode for the shadow page
tables; the guests think they are using 2-level page tables.

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Xin B Li <xin.b.li@intel.com>

This should not break SVM, however the SVM code will need some small
changes to enable support for non-pae guests on pae hosts.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Mon Feb 06 23:25:31 2006 +0100 (2006-02-06)
parents 17b5d5cca484
children 707cb68a391f
files xen/arch/x86/Makefile xen/arch/x86/hvm/vmx/vmcs.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/setup.c xen/arch/x86/shadow.c xen/arch/x86/shadow_guest32.c xen/arch/x86/shadow_public.c xen/include/asm-x86/hvm/vmx/vmx.h xen/include/asm-x86/shadow_64.h
line diff
     1.1 --- a/xen/arch/x86/Makefile	Mon Feb 06 18:02:36 2006 +0000
     1.2 +++ b/xen/arch/x86/Makefile	Mon Feb 06 23:25:31 2006 +0100
     1.3 @@ -26,7 +26,7 @@ ifeq ($(TARGET_SUBARCH),x86_64)
     1.4  endif
     1.5  ifeq ($(TARGET_SUBARCH),x86_32) 
     1.6   ifneq ($(pae),n)
     1.7 -  OBJS += shadow.o shadow_public.o	# x86_32p: new code
     1.8 +  OBJS += shadow.o shadow_public.o shadow_guest32.o	# x86_32p: new code
     1.9   else
    1.10    OBJS += shadow32.o			# x86_32: old code
    1.11   endif
     2.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Mon Feb 06 18:02:36 2006 +0000
     2.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Mon Feb 06 23:25:31 2006 +0100
     2.3 @@ -191,17 +191,9 @@ static void vmx_do_launch(struct vcpu *v
     2.4  
     2.5      __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (cr4) : );
     2.6  
     2.7 -#ifdef __x86_64__
     2.8      error |= __vmwrite(GUEST_CR4, cr4 & ~X86_CR4_PSE);
     2.9 -#else
    2.10 -    error |= __vmwrite(GUEST_CR4, cr4);
    2.11 -#endif
    2.12 +    cr4 &= ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
    2.13  
    2.14 -#ifdef __x86_64__
    2.15 -    cr4 &= ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
    2.16 -#else
    2.17 -    cr4 &= ~(X86_CR4_PGE | X86_CR4_VMXE);
    2.18 -#endif
    2.19      error |= __vmwrite(CR4_READ_SHADOW, cr4);
    2.20  
    2.21      vmx_stts();
     3.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Mon Feb 06 18:02:36 2006 +0000
     3.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Mon Feb 06 23:25:31 2006 +0100
     3.3 @@ -645,7 +645,7 @@ static void vmx_vmexit_do_cpuid(unsigned
     3.4                  !vlapic_global_enabled((VLAPIC(v))) )
     3.5              clear_bit(X86_FEATURE_APIC, &edx);
     3.6  
     3.7 -#ifdef __x86_64__
     3.8 +#if CONFIG_PAGING_LEVELS >= 3
     3.9          if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
    3.10  #endif
    3.11          {
    3.12 @@ -995,7 +995,7 @@ vmx_world_restore(struct vcpu *v, struct
    3.13          if(!get_page(mfn_to_page(mfn), v->domain))
    3.14                  return 0;
    3.15          old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
    3.16 -        v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
    3.17 +        v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
    3.18          if (old_base_mfn)
    3.19               put_page(mfn_to_page(old_base_mfn));
    3.20          update_pagetables(v);
    3.21 @@ -1196,8 +1196,9 @@ static int vmx_set_cr0(unsigned long val
    3.22  #endif
    3.23          }
    3.24          else
    3.25 +#endif  /* __x86_64__ */
    3.26          {
    3.27 -#if CONFIG_PAGING_LEVELS >= 4
    3.28 +#if CONFIG_PAGING_LEVELS >= 3
    3.29              if(!shadow_set_guest_paging_levels(v->domain, 2)) {
    3.30                  printk("Unsupported guest paging levels\n");
    3.31                  domain_crash_synchronous(); /* need to take a clean path */
    3.32 @@ -1217,14 +1218,13 @@ static int vmx_set_cr0(unsigned long val
    3.33                  __vmwrite(GUEST_CR4, crn | X86_CR4_PAE);
    3.34              }
    3.35          }
    3.36 -#endif
    3.37  #if CONFIG_PAGING_LEVELS == 2
    3.38          shadow_direct_map_clean(v);
    3.39  #endif
    3.40          /*
    3.41           * Now arch.guest_table points to machine physical.
    3.42           */
    3.43 -        v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
    3.44 +        v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
    3.45          update_pagetables(v);
    3.46  
    3.47          HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
    3.48 @@ -1392,7 +1392,7 @@ static int mov_to_cr(int gp, int cr, str
    3.49                  domain_crash_synchronous(); /* need to take a clean path */
    3.50              }
    3.51              old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
    3.52 -            v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
    3.53 +            v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
    3.54              if (old_base_mfn)
    3.55                  put_page(mfn_to_page(old_base_mfn));
    3.56              update_pagetables(v);
     4.1 --- a/xen/arch/x86/setup.c	Mon Feb 06 18:02:36 2006 +0000
     4.2 +++ b/xen/arch/x86/setup.c	Mon Feb 06 23:25:31 2006 +0100
     4.3 @@ -575,7 +575,7 @@ void arch_get_xen_caps(xen_capabilities_
     4.4      p += sprintf(p, "xen-%d.%d-x86_32p ", XEN_VERSION, XEN_SUBVERSION);
     4.5      if ( hvm_enabled )
     4.6      {
     4.7 -        //p += sprintf(p, "hvm-%d.%d-x86_32 ", XEN_VERSION, XEN_SUBVERSION);
     4.8 +        p += sprintf(p, "hvm-%d.%d-x86_32 ", XEN_VERSION, XEN_SUBVERSION);
     4.9          //p += sprintf(p, "hvm-%d.%d-x86_32p ", XEN_VERSION, XEN_SUBVERSION);
    4.10      }
    4.11  
     5.1 --- a/xen/arch/x86/shadow.c	Mon Feb 06 18:02:36 2006 +0000
     5.2 +++ b/xen/arch/x86/shadow.c	Mon Feb 06 23:25:31 2006 +0100
     5.3 @@ -36,6 +36,9 @@
     5.4  #include <xen/trace.h>
     5.5  #include <asm/shadow_64.h>
     5.6  
     5.7 +/* Use this to have the compiler remove unnecessary branches */
     5.8 +#define SH_L1_HAS_NEXT_PAGE (GUEST_L1_PAGETABLE_ENTRIES - L1_PAGETABLE_ENTRIES)
     5.9 +
    5.10  extern void free_shadow_pages(struct domain *d);
    5.11  
    5.12  #if 0 // this code has not been updated for 32pae & 64 bit modes
    5.13 @@ -223,11 +226,16 @@ alloc_shadow_page(struct domain *d,
    5.14          }
    5.15          else
    5.16          {
    5.17 -            if (d->arch.ops->guest_paging_levels == PAGING_L2)
    5.18 +            if ( SH_L1_HAS_NEXT_PAGE &&
    5.19 +                 d->arch.ops->guest_paging_levels == PAGING_L2)
    5.20              {
    5.21  #if CONFIG_PAGING_LEVELS >= 3
    5.22 -                /* For 32-bit HVM guest, 2 shadow L1s to simulate 1 guest L1
    5.23 -                 * So need allocate 2 continues shadow L1 each time.
    5.24 +                /* 
    5.25 +                 * For 32-bit HVM guest, 2 shadow L1s are required to
    5.26 +                 * simulate 1 guest L1 So need allocate 2 shadow L1
    5.27 +                 * pages each time. 
    5.28 +                 *
    5.29 +                 * --> Need to avoidalloc_domheap_pages.
    5.30                   */
    5.31                  page = alloc_domheap_pages(NULL, SL1_ORDER, 0);
    5.32                  if (!page)
    5.33 @@ -237,7 +245,7 @@ alloc_shadow_page(struct domain *d,
    5.34                  memset(l1, 0, PAGE_SIZE);
    5.35                  unmap_domain_page(l1);
    5.36  
    5.37 -                l1 = map_domain_page(page_to_mfn(page+1));
    5.38 +                l1 = map_domain_page(page_to_mfn(page + 1));
    5.39                  memset(l1, 0, PAGE_SIZE);
    5.40                  unmap_domain_page(l1);
    5.41  #else
    5.42 @@ -265,14 +273,12 @@ alloc_shadow_page(struct domain *d,
    5.43      else {
    5.44  #if CONFIG_PAGING_LEVELS == 2
    5.45          page = alloc_domheap_page(NULL);
    5.46 -#elif CONFIG_PAGING_LEVELS == 3
    5.47 -        if ( psh_type == PGT_l3_shadow )
    5.48 +#elif CONFIG_PAGING_LEVELS >= 3
    5.49 +        if ( d->arch.ops->guest_paging_levels == PAGING_L2 &&
    5.50 +             psh_type == PGT_l4_shadow )      /* allocated for PAE PDP page */
    5.51              page = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA);
    5.52 -        else
    5.53 -            page = alloc_domheap_page(NULL);
    5.54 -#elif CONFIG_PAGING_LEVELS == 4
    5.55 -        if ( (psh_type == PGT_l4_shadow) &&
    5.56 -             (d->arch.ops->guest_paging_levels != PAGING_L4) )
    5.57 +        else if ( d->arch.ops->guest_paging_levels == PAGING_L3 &&
    5.58 +                  psh_type == PGT_l3_shadow ) /* allocated for PAE PDP page */
    5.59              page = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA);
    5.60          else
    5.61              page = alloc_domheap_page(NULL);
    5.62 @@ -550,7 +556,7 @@ static void shadow_map_l1_into_current_l
    5.63  {
    5.64      struct vcpu *v = current;
    5.65      struct domain *d = v->domain;
    5.66 -    l1_pgentry_t *spl1e;
    5.67 +    l1_pgentry_t *spl1e, *spl1e_next = 0;
    5.68      l2_pgentry_t sl2e;
    5.69      guest_l1_pgentry_t *gpl1e;
    5.70      guest_l2_pgentry_t gl2e = {0};
    5.71 @@ -599,8 +605,9 @@ static void shadow_map_l1_into_current_l
    5.72      }
    5.73  #endif
    5.74  
    5.75 -#if CONFIG_PAGING_LEVELS >=3
    5.76 -    if (d->arch.ops->guest_paging_levels == PAGING_L2)
    5.77 +#if CONFIG_PAGING_LEVELS >= 3
    5.78 +    if ( SH_L1_HAS_NEXT_PAGE && 
    5.79 +         d->arch.ops->guest_paging_levels == PAGING_L2 )
    5.80      {
    5.81          /* for 32-bit HVM guest on 64-bit or PAE host,
    5.82           * need update two L2 entries each time
    5.83 @@ -639,15 +646,21 @@ static void shadow_map_l1_into_current_l
    5.84          tmp_gmfn = gmfn_to_mfn(d, l2e_get_pfn(tmp_gl2e));
    5.85          gpl1e = (guest_l1_pgentry_t *) map_domain_page(tmp_gmfn);
    5.86  
    5.87 -        /* If the PGT_l1_shadow has two continual pages */
    5.88 -#if CONFIG_PAGING_LEVELS >=3
    5.89 -        if (d->arch.ops->guest_paging_levels == PAGING_L2)
    5.90 -            __shadow_get_l2e(v,  va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1), &tmp_sl2e);
    5.91 +        /* If the PGT_l1_shadow has two contiguous pages */
    5.92 +#if CONFIG_PAGING_LEVELS >= 3
    5.93 +        if ( SH_L1_HAS_NEXT_PAGE &&
    5.94 +             d->arch.ops->guest_paging_levels == PAGING_L2 )
    5.95 +            __shadow_get_l2e(v,  va & ~((1UL << L2_PAGETABLE_SHIFT_32) - 1), &tmp_sl2e);
    5.96          else
    5.97  #endif
    5.98          __shadow_get_l2e(v, va, &tmp_sl2e);
    5.99 +
   5.100          spl1e = (l1_pgentry_t *) map_domain_page(l2e_get_pfn(tmp_sl2e));
   5.101  
   5.102 +        if ( SH_L1_HAS_NEXT_PAGE )
   5.103 +            spl1e_next = (l1_pgentry_t *) map_domain_page(
   5.104 +                (l2e_get_pfn(tmp_sl2e) + 1UL));
   5.105 +
   5.106          for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ )
   5.107          {
   5.108              l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
   5.109 @@ -665,7 +678,12 @@ static void shadow_map_l1_into_current_l
   5.110                  }
   5.111                  break;
   5.112              }
   5.113 -            spl1e[i] = sl1e;
   5.114 +
   5.115 +            if ( SH_L1_HAS_NEXT_PAGE && i >= L1_PAGETABLE_ENTRIES )
   5.116 +                spl1e_next[i - L1_PAGETABLE_ENTRIES] = sl1e;
   5.117 +            else 
   5.118 +                spl1e[i] = sl1e;
   5.119 +
   5.120              if ( unlikely(i < min) )
   5.121                  min = i;
   5.122              if ( likely(i > max) )
   5.123 @@ -678,6 +696,9 @@ static void shadow_map_l1_into_current_l
   5.124  
   5.125          unmap_domain_page(gpl1e);
   5.126          unmap_domain_page(spl1e);
   5.127 +
   5.128 +        if ( SH_L1_HAS_NEXT_PAGE )
   5.129 +            unmap_domain_page(spl1e_next);
   5.130      }
   5.131  }
   5.132  
   5.133 @@ -1032,7 +1053,7 @@ static void shadow_mark_va_out_of_sync(
   5.134      l2_pgentry_t sl2e;
   5.135      struct domain *d = v->domain;
   5.136  
   5.137 -#if CONFIG_PAGING_LEVELS >= 4
   5.138 +#if CONFIG_PAGING_LEVELS >= 3
   5.139      {
   5.140          l4_pgentry_t sl4e;
   5.141          l3_pgentry_t sl3e;
   5.142 @@ -1322,6 +1343,7 @@ static u32 remove_all_write_access_in_pt
   5.143      u32 max_refs_to_find, unsigned long prediction)
   5.144  {
   5.145      l1_pgentry_t *pt = map_domain_page(pt_mfn);
   5.146 +    l1_pgentry_t *pt_next = 0, *sl1e_p;
   5.147      l1_pgentry_t match;
   5.148      unsigned long flags = _PAGE_RW | _PAGE_PRESENT;
   5.149      int i;
   5.150 @@ -1335,28 +1357,46 @@ static u32 remove_all_write_access_in_pt
   5.151                  PGT_fl1_shadow);
   5.152  #endif
   5.153  
   5.154 +    if ( SH_L1_HAS_NEXT_PAGE )
   5.155 +        pt_next = map_domain_page(pt_mfn + 1);
   5.156 +
   5.157      match = l1e_from_pfn(readonly_gmfn, flags);
   5.158  
   5.159 -    if ( shadow_mode_external(d) ) {
   5.160 +    if ( shadow_mode_external(d) ) 
   5.161 +    {
   5.162          i = (mfn_to_page(readonly_gmfn)->u.inuse.type_info & PGT_va_mask)
   5.163              >> PGT_va_shift;
   5.164  
   5.165 -        if ( (i >= 0 && i < L1_PAGETABLE_ENTRIES) &&
   5.166 -             !l1e_has_changed(pt[i], match, flags) &&
   5.167 -             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) &&
   5.168 +        if ( SH_L1_HAS_NEXT_PAGE &&
   5.169 +             i >= L1_PAGETABLE_ENTRIES )
   5.170 +            sl1e_p = &pt_next[i - L1_PAGETABLE_ENTRIES];
   5.171 +        else
   5.172 +            sl1e_p = &pt[i];
   5.173 +
   5.174 +        if ( (i >= 0 && i < GUEST_L1_PAGETABLE_ENTRIES) &&
   5.175 +             !l1e_has_changed(*sl1e_p, match, flags) &&
   5.176 +             fix_entry(d, sl1e_p, &found, is_l1_shadow, max_refs_to_find) &&
   5.177               !prediction )
   5.178              goto out;
   5.179      }
   5.180  
   5.181 -    for (i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++)
   5.182 +    for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ )
   5.183      {
   5.184 -        if ( unlikely(!l1e_has_changed(pt[i], match, flags)) &&
   5.185 -             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) )
   5.186 +        if ( SH_L1_HAS_NEXT_PAGE &&
   5.187 +             i >= L1_PAGETABLE_ENTRIES )
   5.188 +            sl1e_p = &pt_next[i - L1_PAGETABLE_ENTRIES];
   5.189 +        else
   5.190 +            sl1e_p = &pt[i];
   5.191 +
   5.192 +        if ( unlikely(!l1e_has_changed(*sl1e_p, match, flags)) &&
   5.193 +             fix_entry(d, sl1e_p, &found, is_l1_shadow, max_refs_to_find) )
   5.194              break;
   5.195      }
   5.196  
   5.197  out:
   5.198      unmap_domain_page(pt);
   5.199 +    if ( SH_L1_HAS_NEXT_PAGE )
   5.200 +        unmap_domain_page(pt_next);
   5.201  
   5.202      return found;
   5.203  }
   5.204 @@ -1512,6 +1552,7 @@ static int resync_all(struct domain *d, 
   5.205          {
   5.206              guest_l1_pgentry_t *guest1 = guest;
   5.207              l1_pgentry_t *shadow1 = shadow;
   5.208 +            l1_pgentry_t *shadow1_next = 0, *sl1e_p;
   5.209              guest_l1_pgentry_t *snapshot1 = snapshot;
   5.210              int unshadow_l1 = 0;
   5.211  
   5.212 @@ -1525,19 +1566,28 @@ static int resync_all(struct domain *d, 
   5.213  
   5.214              changed = 0;
   5.215  
   5.216 +            if ( SH_L1_HAS_NEXT_PAGE && shadow1 )
   5.217 +                shadow1_next = map_domain_page(smfn + 1);
   5.218 +
   5.219              for ( i = min_shadow; i <= max_shadow; i++ )
   5.220              {
   5.221 +
   5.222 +                if ( SH_L1_HAS_NEXT_PAGE && i >= L1_PAGETABLE_ENTRIES )
   5.223 +                    sl1e_p = &shadow1_next[i - L1_PAGETABLE_ENTRIES];
   5.224 +                else
   5.225 +                    sl1e_p = &shadow1[i];
   5.226 +
   5.227                  if ( (i < min_snapshot) || (i > max_snapshot) ||
   5.228                       guest_l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
   5.229                  {
   5.230                      int error;
   5.231  
   5.232 -                    error = validate_pte_change(d, guest1[i], &shadow1[i]);
   5.233 +                    error = validate_pte_change(d, guest1[i], sl1e_p);
   5.234                      if ( error ==  -1 )
   5.235                          unshadow_l1 = 1;
   5.236                      else {
   5.237                          need_flush |= error;
   5.238 -                        set_guest_back_ptr(d, shadow1[i], smfn, i);
   5.239 +                        set_guest_back_ptr(d, *sl1e_p, smfn, i);
   5.240                      }
   5.241                      // can't update snapshots of linear page tables -- they
   5.242                      // are used multiple times...
   5.243 @@ -1547,6 +1597,10 @@ static int resync_all(struct domain *d, 
   5.244                      changed++;
   5.245                  }
   5.246              }
   5.247 +
   5.248 +            if ( shadow1_next )
   5.249 +                unmap_domain_page(shadow1_next);
   5.250 +
   5.251              perfc_incrc(resync_l1);
   5.252              perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
   5.253              perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES);
   5.254 @@ -1690,7 +1744,6 @@ static int resync_all(struct domain *d, 
   5.255          case PGT_l4_shadow:
   5.256          {
   5.257              guest_root_pgentry_t *guest_root = guest;
   5.258 -            l4_pgentry_t *shadow4 = shadow;
   5.259              guest_root_pgentry_t *snapshot_root = snapshot;
   5.260  
   5.261              changed = 0;
   5.262 @@ -1702,12 +1755,18 @@ static int resync_all(struct domain *d, 
   5.263                  if ( root_entry_has_changed(
   5.264                          new_root_e, snapshot_root[i], PAGE_FLAG_MASK))
   5.265                  {
   5.266 +#ifndef GUEST_PGENTRY_32
   5.267 +                    l4_pgentry_t *shadow4 = shadow;
   5.268 +
   5.269                      if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) 
   5.270                      {
   5.271                          need_flush |= validate_entry_change(
   5.272                            d, (pgentry_64_t *)&new_root_e,
   5.273                            (pgentry_64_t *)&shadow4[i], shadow_type_to_level(stype));
   5.274 -                    } else {
   5.275 +                    }
   5.276 +                    else
   5.277 +#endif
   5.278 +                    {
   5.279                          validate_bl2e_change(d, &new_root_e, shadow, i);
   5.280                      }
   5.281                      changed++;
   5.282 @@ -1822,12 +1881,12 @@ static void sync_all(struct domain *d)
   5.283  #endif
   5.284  
   5.285  #if CONFIG_PAGING_LEVELS >= 3
   5.286 -    if (d->arch.ops->guest_paging_levels == PAGING_L2)
   5.287 +    if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
   5.288          need_flush |= resync_all(d, PGT_l4_shadow);
   5.289      else
   5.290          need_flush |= resync_all(d, PGT_l2_shadow);
   5.291  
   5.292 -    if (d->arch.ops->guest_paging_levels >= PAGING_L3) 
   5.293 +    if ( d->arch.ops->guest_paging_levels >= PAGING_L3 )
   5.294      {
   5.295          need_flush |= resync_all(d, PGT_l3_shadow);
   5.296          need_flush |= resync_all(d, PGT_l4_shadow);
   5.297 @@ -2184,7 +2243,7 @@ static void shadow_update_pagetables(str
   5.298      if ( !get_shadow_ref(smfn) )
   5.299          BUG();
   5.300      old_smfn = pagetable_get_pfn(v->arch.shadow_table);
   5.301 -    v->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
   5.302 +    v->arch.shadow_table = mk_pagetable((u64)smfn << PAGE_SHIFT);
   5.303      if ( old_smfn )
   5.304          put_shadow_ref(old_smfn);
   5.305  
   5.306 @@ -2251,12 +2310,36 @@ static void shadow_update_pagetables(str
   5.307      }
   5.308  #endif /* CONFIG_PAGING_LEVELS == 2 */
   5.309  
   5.310 +#if CONFIG_PAGING_LEVELS == 3
   5.311 +    /*
   5.312 +     * fixup pointers in monitor table, as necessary
   5.313 +     */
   5.314 +    if ( max_mode == SHM_external )
   5.315 +    {
   5.316 +        l3_pgentry_t *mpl3e = (l3_pgentry_t *) v->arch.monitor_vtable;
   5.317 +        l2_pgentry_t *spl2e;
   5.318 +        unsigned long s2mfn;
   5.319 +        int i;
   5.320 + 
   5.321 +        ASSERT( shadow_mode_translate(d) );
   5.322 +        s2mfn = l3e_get_pfn(mpl3e[L3_PAGETABLE_ENTRIES - 1]);
   5.323 + 
   5.324 +        ASSERT( s2mfn);
   5.325 +        spl2e = map_domain_page(s2mfn);
   5.326 + 
   5.327 +        for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
   5.328 +            spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START) + i] =
   5.329 +                (l3e_get_flags(mpl3e[i]) & _PAGE_PRESENT) ?
   5.330 +                l2e_from_pfn(l3e_get_pfn(mpl3e[i]), __PAGE_HYPERVISOR) :
   5.331 +                l2e_empty();
   5.332 + 
   5.333 +        unmap_domain_page(spl2e);
   5.334 +        local_flush_tlb();
   5.335 +    }
   5.336 +#endif
   5.337 +
   5.338      if(likely(need_sync))
   5.339          shadow_sync_all(d);
   5.340 -
   5.341 -#if CONFIG_PAGING_LEVELS == 3
   5.342 -    /* FIXME: PAE code to be written */
   5.343 -#endif
   5.344  }
   5.345  
   5.346  
   5.347 @@ -2733,6 +2816,55 @@ int _check_all_pagetables(struct vcpu *v
   5.348  #endif // SHADOW_DEBUG
   5.349  #endif // this code has not been updated for 32pae & 64 bit modes
   5.350  
   5.351 +#if CONFIG_PAGING_LEVELS >= 3
   5.352 +/****************************************************************************/
   5.353 +/* 64-bit shadow-mode code testing */
   5.354 +/****************************************************************************/
   5.355 +/*
   5.356 + * init_bl2() is for 32-bit VMX guest on 64-bit host
   5.357 + * Using 1 shadow L4(l3) and 4 shadow L2s to simulate guest L2
   5.358 + */
   5.359 +static inline unsigned long init_bl2(
   5.360 +    struct domain *d, unsigned long gpfn, unsigned long gmfn)
   5.361 +{
   5.362 +    unsigned int count;
   5.363 +    unsigned long sl2mfn;
   5.364 +    unsigned long smfn;
   5.365 +    struct page_info *page;
   5.366 +    l4_pgentry_t *spl4e;
   5.367 +    void *l2;
   5.368 +
   5.369 +    if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
   5.370 +    {
   5.371 +        printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
   5.372 +        BUG(); /* XXX Deal gracefully with failure. */
   5.373 +    }
   5.374 +
   5.375 +    spl4e = (l4_pgentry_t *)map_domain_page(smfn);
   5.376 +
   5.377 +    /* Map the self entry, L4&L3 share the same page */
   5.378 +    spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
   5.379 +
   5.380 +    /* Allocate 4 shadow L2s */
   5.381 +    page = alloc_domheap_pages(NULL, SL2_ORDER, 0);
   5.382 +    if ( !page )
   5.383 +        domain_crash_synchronous();
   5.384 +
   5.385 +    for ( count = 0; count < PAE_L3_PAGETABLE_ENTRIES; count++ )
   5.386 +    {
   5.387 +        sl2mfn = page_to_mfn(page+count);
   5.388 +        l2 = map_domain_page(sl2mfn);
   5.389 +        memset(l2, 0, PAGE_SIZE);
   5.390 +        unmap_domain_page(l2);
   5.391 +        spl4e[count] = l4e_from_pfn(sl2mfn, _PAGE_PRESENT);
   5.392 +    }
   5.393 +
   5.394 +    unmap_domain_page(spl4e);
   5.395 +
   5.396 +    return smfn;
   5.397 +}
   5.398 +#endif
   5.399 +
   5.400  #if CONFIG_PAGING_LEVELS == 3
   5.401  static unsigned long shadow_l3_table(
   5.402      struct domain *d, unsigned long gpfn, unsigned long gmfn)
   5.403 @@ -2742,10 +2874,18 @@ static unsigned long shadow_l3_table(
   5.404  
   5.405      perfc_incrc(shadow_l3_table_count);
   5.406  
   5.407 +    SH_VVLOG("shadow_l4_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
   5.408 +
   5.409 +    if ( SH_L1_HAS_NEXT_PAGE &&
   5.410 +         d->arch.ops->guest_paging_levels == PAGING_L2 )
   5.411 +    {
   5.412 +        return init_bl2(d, gpfn, gmfn);
   5.413 +    }
   5.414 +
   5.415      if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l3_shadow))) )
   5.416      {
   5.417 -        printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
   5.418 -        BUG(); /* XXX Deal gracefully with failure. */
   5.419 +            printk("Couldn't alloc an L3 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
   5.420 +            BUG(); /* XXX Deal gracefully with failure. */
   5.421      }
   5.422  
   5.423      spl3e = (l3_pgentry_t *)map_domain_page(smfn);
   5.424 @@ -2825,53 +2965,17 @@ static unsigned long shadow_l3_table(
   5.425  
   5.426      return smfn;
   5.427  }
   5.428 -
   5.429 +#endif /* CONFIG_PAGING_LEVELS == 3 */
   5.430 +
   5.431 +#ifndef GUEST_PGENTRY_32
   5.432  static unsigned long gva_to_gpa_pae(unsigned long gva)
   5.433  {
   5.434      BUG();
   5.435      return 43;
   5.436  }
   5.437 -#endif /* CONFIG_PAGING_LEVELS == 3 */
   5.438 +#endif
   5.439  
   5.440  #if CONFIG_PAGING_LEVELS == 4
   5.441 -/****************************************************************************/
   5.442 -/* 64-bit shadow-mode code testing */
   5.443 -/****************************************************************************/
   5.444 -/*
   5.445 - * init_bl2() is for 32-bit HVM guest on 64-bit host
   5.446 - * Using 1 shadow L4(l3) and 4 shadow L2s to simulate guest L2
   5.447 - */
   5.448 -static inline unsigned long init_bl2(l4_pgentry_t *spl4e, unsigned long smfn)
   5.449 -{
   5.450 -    unsigned int count;
   5.451 -    unsigned long sl2mfn;
   5.452 -    struct page_info *page;
   5.453 -    void *l2;
   5.454 -
   5.455 -    memset(spl4e, 0, PAGE_SIZE);
   5.456 -
   5.457 -    /* Map the self entry, L4&L3 share the same page */
   5.458 -    spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
   5.459 -
   5.460 -    /* Allocate 4 shadow L2s */
   5.461 -    page = alloc_domheap_pages(NULL, SL2_ORDER, 0);
   5.462 -    if (!page)
   5.463 -        domain_crash_synchronous();
   5.464 -
   5.465 -    for ( count = 0; count < PAE_L3_PAGETABLE_ENTRIES; count++ )
   5.466 -    {
   5.467 -        sl2mfn = page_to_mfn(page+count);
   5.468 -        l2 = map_domain_page(sl2mfn);
   5.469 -        memset(l2, 0, PAGE_SIZE);
   5.470 -        unmap_domain_page(l2);
   5.471 -        spl4e[count] = l4e_from_pfn(sl2mfn, _PAGE_PRESENT);
   5.472 -    }
   5.473 -
   5.474 -    unmap_domain_page(spl4e);
   5.475 -
   5.476 -    return smfn;
   5.477 -}
   5.478 -
   5.479  static unsigned long shadow_l4_table(
   5.480    struct domain *d, unsigned long gpfn, unsigned long gmfn)
   5.481  {
   5.482 @@ -2882,6 +2986,11 @@ static unsigned long shadow_l4_table(
   5.483  
   5.484      perfc_incrc(shadow_l4_table_count);
   5.485  
   5.486 +    if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
   5.487 +    {
   5.488 +        return init_bl2(d, gpfn, gmfn);
   5.489 +    }
   5.490 +
   5.491      if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
   5.492      {
   5.493          printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
   5.494 @@ -2890,10 +2999,6 @@ static unsigned long shadow_l4_table(
   5.495  
   5.496      spl4e = (l4_pgentry_t *)map_domain_page(smfn);
   5.497  
   5.498 -    if (d->arch.ops->guest_paging_levels == PAGING_L2) {
   5.499 -        return init_bl2(spl4e, smfn);
   5.500 -    }
   5.501 -
   5.502      /* Install hypervisor and 4x linear p.t. mapings. */
   5.503      if ( (PGT_base_page_table == PGT_l4_page_table) &&
   5.504        !shadow_mode_external(d) )
   5.505 @@ -3568,8 +3673,6 @@ static void shadow_invlpg_64(struct vcpu
   5.506      shadow_unlock(d);
   5.507  }
   5.508  
   5.509 -
   5.510 -#if CONFIG_PAGING_LEVELS == 4
   5.511  static unsigned long gva_to_gpa_64(unsigned long gva)
   5.512  {
   5.513      struct vcpu *v = current;
   5.514 @@ -3588,8 +3691,25 @@ static unsigned long gva_to_gpa_64(unsig
   5.515      return gpa;
   5.516  }
   5.517  
   5.518 +/*
   5.519 + * The naming convention of the shadow_ops:
   5.520 + * MODE_<pgentry size>_<guest paging levels>_HANDLER
   5.521 + */
   5.522  #ifndef GUEST_PGENTRY_32
   5.523 -struct shadow_ops MODE_F_HANDLER = {
   5.524 +struct shadow_ops MODE_64_3_HANDLER = {
   5.525 +    .guest_paging_levels        = 3,
   5.526 +    .invlpg                     = shadow_invlpg_64,
   5.527 +    .fault                      = shadow_fault_64,
   5.528 +    .update_pagetables          = shadow_update_pagetables,
   5.529 +    .sync_all                   = sync_all,
   5.530 +    .remove_all_write_access    = remove_all_write_access,
   5.531 +    .do_update_va_mapping       = do_update_va_mapping,
   5.532 +    .mark_mfn_out_of_sync       = mark_mfn_out_of_sync,
   5.533 +    .is_out_of_sync             = is_out_of_sync,
   5.534 +    .gva_to_gpa                 = gva_to_gpa_pae,
   5.535 +};
   5.536 +
   5.537 +struct shadow_ops MODE_64_4_HANDLER = {
   5.538      .guest_paging_levels        = 4,
   5.539      .invlpg                     = shadow_invlpg_64,
   5.540      .fault                      = shadow_fault_64,
   5.541 @@ -3602,13 +3722,11 @@ struct shadow_ops MODE_F_HANDLER = {
   5.542      .gva_to_gpa                 = gva_to_gpa_64,
   5.543  };
   5.544  #endif /* GUEST_PGENTRY_32 */
   5.545 -#endif /* CONFIG_PAGING_LEVELS == 4 */
   5.546 -
   5.547  #endif /* CONFIG_PAGING_LEVELS >= 3 */
   5.548  
   5.549  
   5.550  #if CONFIG_PAGING_LEVELS == 2
   5.551 -struct shadow_ops MODE_A_HANDLER = {
   5.552 +struct shadow_ops MODE_32_2_HANDLER = {
   5.553      .guest_paging_levels        = 2,
   5.554      .invlpg                     = shadow_invlpg_32,
   5.555      .fault                      = shadow_fault_32,
   5.556 @@ -3620,25 +3738,9 @@ struct shadow_ops MODE_A_HANDLER = {
   5.557      .is_out_of_sync             = is_out_of_sync,
   5.558      .gva_to_gpa                 = gva_to_gpa_64,
   5.559  };
   5.560 -
   5.561 -#elif CONFIG_PAGING_LEVELS == 3
   5.562 -
   5.563 -struct shadow_ops MODE_B_HANDLER = {
   5.564 -    .guest_paging_levels        = 3,
   5.565 -    .invlpg                     = shadow_invlpg_64,
   5.566 -    .fault                      = shadow_fault_64,
   5.567 -    .update_pagetables          = shadow_update_pagetables,
   5.568 -    .sync_all                   = sync_all,
   5.569 -    .remove_all_write_access    = remove_all_write_access,
   5.570 -    .do_update_va_mapping       = do_update_va_mapping,
   5.571 -    .mark_mfn_out_of_sync       = mark_mfn_out_of_sync,
   5.572 -    .is_out_of_sync             = is_out_of_sync,
   5.573 -    .gva_to_gpa                 = gva_to_gpa_pae,
   5.574 -};
   5.575 -
   5.576  #endif
   5.577  
   5.578 -#if CONFIG_PAGING_LEVELS == 3 ||                                \
   5.579 +#if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) ) ||  \
   5.580      ( CONFIG_PAGING_LEVELS == 4 && defined (GUEST_PGENTRY_32) )
   5.581  
   5.582  /* 
   5.583 @@ -3697,7 +3799,7 @@ int shadow_direct_map_fault(unsigned lon
   5.584      }
   5.585  
   5.586      __shadow_get_l1e(v, vpa, &sl1e);
   5.587 -        
   5.588 +
   5.589      if ( !(l1e_get_flags(sl1e) & _PAGE_PRESENT) ) 
   5.590      {
   5.591          sl1e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR | _PAGE_USER);
     6.1 --- a/xen/arch/x86/shadow_guest32.c	Mon Feb 06 18:02:36 2006 +0000
     6.2 +++ b/xen/arch/x86/shadow_guest32.c	Mon Feb 06 23:25:31 2006 +0100
     6.3 @@ -1,9 +1,8 @@
     6.4  #define GUEST_PGENTRY_32
     6.5 -#if defined (__x86_64__)
     6.6  
     6.7  #include "shadow.c"
     6.8 -struct shadow_ops MODE_D_HANDLER = {
     6.9 -    .guest_paging_levels              = 2,
    6.10 +struct shadow_ops MODE_64_2_HANDLER = {
    6.11 +    .guest_paging_levels        = 2,
    6.12      .invlpg                     = shadow_invlpg_64,
    6.13      .fault                      = shadow_fault_64,
    6.14      .update_pagetables          = shadow_update_pagetables,
    6.15 @@ -15,4 +14,3 @@ struct shadow_ops MODE_D_HANDLER = {
    6.16      .gva_to_gpa                 = gva_to_gpa_64,
    6.17  };
    6.18  
    6.19 -#endif
     7.1 --- a/xen/arch/x86/shadow_public.c	Mon Feb 06 18:02:36 2006 +0000
     7.2 +++ b/xen/arch/x86/shadow_public.c	Mon Feb 06 23:25:31 2006 +0100
     7.3 @@ -29,19 +29,9 @@
     7.4  #include <xen/event.h>
     7.5  #include <xen/sched.h>
     7.6  #include <xen/trace.h>
     7.7 -
     7.8 -#if CONFIG_PAGING_LEVELS >= 3
     7.9  #include <asm/shadow_64.h>
    7.10  
    7.11 -#endif
    7.12 -#if CONFIG_PAGING_LEVELS == 4
    7.13 -extern struct shadow_ops MODE_F_HANDLER;
    7.14 -extern struct shadow_ops MODE_D_HANDLER;
    7.15 -
    7.16  static void free_p2m_table(struct vcpu *v);
    7.17 -#endif
    7.18 -
    7.19 -extern struct shadow_ops MODE_A_HANDLER;
    7.20  
    7.21  #define SHADOW_MAX_GUEST32(_encoded) ((L1_PAGETABLE_ENTRIES_32 - 1) - ((_encoded) >> 16))
    7.22  
    7.23 @@ -120,24 +110,27 @@ int shadow_set_guest_paging_levels(struc
    7.24           shadow_direct_map_clean(v);
    7.25  
    7.26      switch(levels) {
    7.27 -#if CONFIG_PAGING_LEVELS >= 4
    7.28 +#if CONFIG_PAGING_LEVELS == 4
    7.29      case 4:
    7.30 -        if ( d->arch.ops != &MODE_F_HANDLER )
    7.31 -            d->arch.ops = &MODE_F_HANDLER;
    7.32 +        if ( d->arch.ops != &MODE_64_4_HANDLER )
    7.33 +            d->arch.ops = &MODE_64_4_HANDLER;
    7.34          shadow_unlock(d);
    7.35          return 1;
    7.36  #endif
    7.37 +#if CONFIG_PAGING_LEVELS >= 3
    7.38      case 3:
    7.39 +        if ( d->arch.ops != &MODE_64_3_HANDLER )
    7.40 +            d->arch.ops = &MODE_64_3_HANDLER;
    7.41 +        shadow_unlock(d);
    7.42 +        return 1;
    7.43 +#endif
    7.44      case 2:
    7.45  #if CONFIG_PAGING_LEVELS == 2
    7.46 -        if ( d->arch.ops != &MODE_A_HANDLER )
    7.47 -            d->arch.ops = &MODE_A_HANDLER;
    7.48 -#elif CONFIG_PAGING_LEVELS == 3
    7.49 -        if ( d->arch.ops != &MODE_B_HANDLER )
    7.50 -            d->arch.ops = &MODE_B_HANDLER;
    7.51 -#elif CONFIG_PAGING_LEVELS == 4
    7.52 -        if ( d->arch.ops != &MODE_D_HANDLER )
    7.53 -            d->arch.ops = &MODE_D_HANDLER;
    7.54 +        if ( d->arch.ops != &MODE_32_2_HANDLER )
    7.55 +            d->arch.ops = &MODE_32_2_HANDLER;
    7.56 +#elif CONFIG_PAGING_LEVELS >= 3
    7.57 +        if ( d->arch.ops != &MODE_64_2_HANDLER )
    7.58 +            d->arch.ops = &MODE_64_2_HANDLER;
    7.59  #endif
    7.60          shadow_unlock(d);
    7.61          return 1;
    7.62 @@ -235,14 +228,14 @@ free_shadow_tables(struct domain *d, uns
    7.63      pgentry_64_t *ple = map_domain_page(smfn);
    7.64      int i, external = shadow_mode_external(d);
    7.65  
    7.66 -#if CONFIG_PAGING_LEVELS >=3
    7.67 +#if CONFIG_PAGING_LEVELS >= 3
    7.68      if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
    7.69      {
    7.70          struct page_info *page = mfn_to_page(smfn);
    7.71          for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ )
    7.72          {
    7.73              if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
    7.74 -                free_fake_shadow_l2(d,entry_get_pfn(ple[i]));
    7.75 +                free_fake_shadow_l2(d, entry_get_pfn(ple[i]));
    7.76          }
    7.77  
    7.78          page = mfn_to_page(entry_get_pfn(ple[0]));
    7.79 @@ -346,15 +339,79 @@ void free_monitor_pagetable(struct vcpu 
    7.80      v->arch.monitor_vtable = 0;
    7.81  }
    7.82  #elif CONFIG_PAGING_LEVELS == 3
    7.83 -
    7.84  static void alloc_monitor_pagetable(struct vcpu *v)
    7.85  {
    7.86 -    BUG(); /* PAE not implemented yet */
    7.87 +    unsigned long m2mfn, m3mfn;
    7.88 +    l2_pgentry_t *mpl2e;
    7.89 +    l3_pgentry_t *mpl3e;
    7.90 +    struct page_info *m2mfn_info, *m3mfn_info, *page;
    7.91 +    struct domain *d = v->domain;
    7.92 +    int i;
    7.93 +
    7.94 +    ASSERT(!pagetable_get_paddr(v->arch.monitor_table)); /* we should only get called once */
    7.95 +
    7.96 +    m3mfn_info = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA);
    7.97 +    ASSERT( m3mfn_info );
    7.98 +
    7.99 +    m3mfn = page_to_mfn(m3mfn_info);
   7.100 +    mpl3e = (l3_pgentry_t *) map_domain_page_global(m3mfn);
   7.101 +    memset(mpl3e, 0, L3_PAGETABLE_ENTRIES * sizeof(l3_pgentry_t));
   7.102 +
   7.103 +    m2mfn_info = alloc_domheap_page(NULL);
   7.104 +    ASSERT( m2mfn_info );
   7.105 +
   7.106 +    m2mfn = page_to_mfn(m2mfn_info);
   7.107 +    mpl2e = (l2_pgentry_t *) map_domain_page(m2mfn);
   7.108 +    memset(mpl2e, 0, L2_PAGETABLE_ENTRIES * sizeof(l2_pgentry_t));
   7.109 +
   7.110 +    memcpy(&mpl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
   7.111 +           &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
   7.112 +           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
   7.113 +    /*
   7.114 +     * Map L2 page into L3
   7.115 +     */
   7.116 +    mpl3e[L3_PAGETABLE_ENTRIES - 1] = l3e_from_pfn(m2mfn, _PAGE_PRESENT);
   7.117 +    page = l3e_get_page(mpl3e[L3_PAGETABLE_ENTRIES - 1]);
   7.118 +
   7.119 +    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
   7.120 +        mpl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
   7.121 +            l2e_from_page(
   7.122 +                virt_to_page(d->arch.mm_perdomain_pt) + i, 
   7.123 +                __PAGE_HYPERVISOR);
   7.124 +    for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
   7.125 +        mpl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
   7.126 +            (l3e_get_flags(mpl3e[i]) & _PAGE_PRESENT) ?
   7.127 +            l2e_from_pfn(l3e_get_pfn(mpl3e[i]), __PAGE_HYPERVISOR) :
   7.128 +            l2e_empty();
   7.129 +    mpl2e[l2_table_offset(RO_MPT_VIRT_START)] = l2e_empty();
   7.130 +
   7.131 +    unmap_domain_page(mpl2e);
   7.132 +
   7.133 +    v->arch.monitor_table = mk_pagetable(m3mfn << PAGE_SHIFT); /* < 4GB */
   7.134 +    v->arch.monitor_vtable = (l2_pgentry_t *) mpl3e;
   7.135 +
   7.136 +    if ( v->vcpu_id == 0 )
   7.137 +        alloc_p2m_table(d);
   7.138  }
   7.139  
   7.140  void free_monitor_pagetable(struct vcpu *v)
   7.141  {
   7.142 -    BUG(); /* PAE not implemented yet */
   7.143 +    unsigned long m2mfn, m3mfn;
   7.144 +    /*
   7.145 +     * free monitor_table.
   7.146 +     */
   7.147 +    if ( v->vcpu_id == 0 )
   7.148 +        free_p2m_table(v);
   7.149 +
   7.150 +    m3mfn = pagetable_get_pfn(v->arch.monitor_table);
   7.151 +    m2mfn = l2e_get_pfn(v->arch.monitor_vtable[L3_PAGETABLE_ENTRIES - 1]);
   7.152 +
   7.153 +    free_domheap_page(mfn_to_page(m2mfn));
   7.154 +    unmap_domain_page_global(v->arch.monitor_vtable);
   7.155 +    free_domheap_page(mfn_to_page(m3mfn));
   7.156 +
   7.157 +    v->arch.monitor_table = mk_pagetable(0);
   7.158 +    v->arch.monitor_vtable = 0;
   7.159  }
   7.160  #endif
   7.161  
   7.162 @@ -475,24 +532,35 @@ static void inline
   7.163  free_shadow_l1_table(struct domain *d, unsigned long smfn)
   7.164  {
   7.165      l1_pgentry_t *pl1e = map_domain_page(smfn);
   7.166 +    l1_pgentry_t *pl1e_next = 0, *sl1e_p;
   7.167      int i;
   7.168      struct page_info *spage = mfn_to_page(smfn);
   7.169      u32 min_max = spage->tlbflush_timestamp;
   7.170      int min = SHADOW_MIN(min_max);
   7.171      int max;
   7.172      
   7.173 -    if (d->arch.ops->guest_paging_levels == PAGING_L2)
   7.174 +    if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
   7.175 +    {
   7.176          max = SHADOW_MAX_GUEST32(min_max);
   7.177 +        pl1e_next = map_domain_page(smfn + 1);
   7.178 +    }
   7.179      else
   7.180          max = SHADOW_MAX(min_max);
   7.181  
   7.182      for ( i = min; i <= max; i++ )
   7.183      {
   7.184 -        shadow_put_page_from_l1e(pl1e[i], d);
   7.185 -        pl1e[i] = l1e_empty();
   7.186 +        if ( pl1e_next && i >= L1_PAGETABLE_ENTRIES )
   7.187 +            sl1e_p = &pl1e_next[i - L1_PAGETABLE_ENTRIES];
   7.188 +        else
   7.189 +            sl1e_p = &pl1e[i];
   7.190 +
   7.191 +        shadow_put_page_from_l1e(*sl1e_p, d);
   7.192 +        *sl1e_p = l1e_empty();
   7.193      }
   7.194  
   7.195      unmap_domain_page(pl1e);
   7.196 +    if ( pl1e_next )
   7.197 +        unmap_domain_page(pl1e_next);
   7.198  }
   7.199  
   7.200  static void inline
   7.201 @@ -547,10 +615,8 @@ void free_fake_shadow_l2(struct domain *
   7.202      int i;
   7.203  
   7.204      for ( i = 0; i < PAGETABLE_ENTRIES; i = i + 2 )
   7.205 -    {
   7.206          if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
   7.207              put_shadow_ref(entry_get_pfn(ple[i]));
   7.208 -    }
   7.209  
   7.210      unmap_domain_page(ple);
   7.211  }
   7.212 @@ -844,7 +910,7 @@ void free_shadow_pages(struct domain *d)
   7.213  
   7.214          if (d->arch.ops->guest_paging_levels == PAGING_L2)
   7.215          {
   7.216 -#if CONFIG_PAGING_LEVELS >=4
   7.217 +#if CONFIG_PAGING_LEVELS >=3
   7.218              free_domheap_pages(page, SL1_ORDER);
   7.219  #else
   7.220              free_domheap_page(page);
   7.221 @@ -1012,13 +1078,6 @@ int __shadow_mode_enable(struct domain *
   7.222                  goto nomem;
   7.223              }
   7.224          }
   7.225 -        else
   7.226 -        {
   7.227 -            // external guests provide their own memory for their P2M maps.
   7.228 -            //
   7.229 -            ASSERT(d == page_get_owner(mfn_to_page(pagetable_get_pfn(
   7.230 -                d->arch.phys_table))));
   7.231 -        }
   7.232      }
   7.233  
   7.234      // Get rid of any shadow pages from any previous shadow mode.
   7.235 @@ -1316,7 +1375,6 @@ alloc_p2m_table(struct domain *d)
   7.236  {
   7.237      struct list_head *list_ent;
   7.238      unsigned long va = RO_MPT_VIRT_START; /*  phys_to_machine_mapping */
   7.239 -//    unsigned long va = PML4_ADDR(264);
   7.240  
   7.241  #if CONFIG_PAGING_LEVELS >= 4
   7.242      l4_pgentry_t *l4tab = NULL;
   7.243 @@ -1360,10 +1418,6 @@ alloc_p2m_table(struct domain *d)
   7.244          if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) 
   7.245          {
   7.246              page = alloc_domheap_page(NULL);
   7.247 -
   7.248 -            if ( !l3tab )
   7.249 -                unmap_domain_page(l3tab);
   7.250 -
   7.251              l3tab = map_domain_page(page_to_mfn(page));
   7.252              memset(l3tab, 0, PAGE_SIZE);
   7.253              l4e = l4tab[l4_table_offset(va)] = 
   7.254 @@ -1376,9 +1430,6 @@ alloc_p2m_table(struct domain *d)
   7.255          if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) 
   7.256          {
   7.257              page = alloc_domheap_page(NULL);
   7.258 -            if ( !l2tab )
   7.259 -                unmap_domain_page(l2tab);
   7.260 -
   7.261              l2tab = map_domain_page(page_to_mfn(page));
   7.262              memset(l2tab, 0, PAGE_SIZE);
   7.263              l3e = l3tab[l3_table_offset(va)] = 
   7.264 @@ -1391,10 +1442,6 @@ alloc_p2m_table(struct domain *d)
   7.265          if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) 
   7.266          {
   7.267              page = alloc_domheap_page(NULL);
   7.268 -
   7.269 -            if ( !l1tab )
   7.270 -                unmap_domain_page(l1tab);
   7.271 -            
   7.272              l1tab = map_domain_page(page_to_mfn(page));
   7.273              memset(l1tab, 0, PAGE_SIZE);
   7.274              l2e = l2tab[l2_table_offset(va)] = 
   7.275 @@ -1407,9 +1454,6 @@ alloc_p2m_table(struct domain *d)
   7.276          if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) ) 
   7.277          {
   7.278              page = alloc_domheap_page(NULL);
   7.279 -            if ( !l0tab )
   7.280 -                unmap_domain_page(l0tab);
   7.281 -
   7.282              l0tab = map_domain_page(page_to_mfn(page));
   7.283              memset(l0tab, 0, PAGE_SIZE);
   7.284              l1e = l1tab[l1_table_offset(va)] = 
   7.285 @@ -1418,9 +1462,25 @@ alloc_p2m_table(struct domain *d)
   7.286          else if ( l0tab == NULL) 
   7.287              l0tab = map_domain_page(l1e_get_pfn(l1e));
   7.288  
   7.289 -        l0tab[i & ((1 << PAGETABLE_ORDER) - 1) ] = pfn;
   7.290 +        l0tab[i & ((PAGE_SIZE / sizeof (pfn)) - 1) ] = pfn;
   7.291          list_ent = frame_table[pfn].list.next;
   7.292          va += sizeof (pfn);
   7.293 +
   7.294 +        if ( l2tab )
   7.295 +        {
   7.296 +            unmap_domain_page(l2tab);
   7.297 +            l2tab = NULL;
   7.298 +        }
   7.299 +        if ( l1tab )
   7.300 +        {
   7.301 +            unmap_domain_page(l1tab);
   7.302 +            l1tab = NULL;
   7.303 +        }
   7.304 +        if ( l0tab )
   7.305 +        {
   7.306 +            unmap_domain_page(l0tab);
   7.307 +            l0tab = NULL;
   7.308 +        }
   7.309      }
   7.310  #if CONFIG_PAGING_LEVELS >= 4
   7.311      unmap_domain_page(l4tab);
   7.312 @@ -1428,14 +1488,10 @@ alloc_p2m_table(struct domain *d)
   7.313  #if CONFIG_PAGING_LEVELS >= 3
   7.314      unmap_domain_page(l3tab);
   7.315  #endif
   7.316 -    unmap_domain_page(l2tab);
   7.317 -    unmap_domain_page(l1tab);
   7.318 -    unmap_domain_page(l0tab);
   7.319 -
   7.320      return 1;
   7.321  }
   7.322  
   7.323 -#if CONFIG_PAGING_LEVELS == 4
   7.324 +#if CONFIG_PAGING_LEVELS >= 3
   7.325  static void
   7.326  free_p2m_table(struct vcpu *v)
   7.327  {
   7.328 @@ -1447,9 +1503,9 @@ free_p2m_table(struct vcpu *v)
   7.329  #if CONFIG_PAGING_LEVELS >= 3
   7.330      l3_pgentry_t *l3tab; 
   7.331      l3_pgentry_t l3e;
   7.332 -    int i3;
   7.333  #endif
   7.334  #if CONFIG_PAGING_LEVELS == 4
   7.335 +    int i3;
   7.336      l4_pgentry_t *l4tab; 
   7.337      l4_pgentry_t l4e;
   7.338  #endif
   7.339 @@ -1463,6 +1519,10 @@ free_p2m_table(struct vcpu *v)
   7.340  #if CONFIG_PAGING_LEVELS == 3
   7.341      l3tab = map_domain_page(
   7.342          pagetable_get_pfn(v->arch.monitor_table));
   7.343 +
   7.344 +    va = RO_MPT_VIRT_START;
   7.345 +    l3e = l3tab[l3_table_offset(va)];
   7.346 +    l2tab = map_domain_page(l3e_get_pfn(l3e));
   7.347  #endif
   7.348  
   7.349      for ( va = RO_MPT_VIRT_START; va < RO_MPT_VIRT_END; )
   7.350 @@ -1473,9 +1533,10 @@ free_p2m_table(struct vcpu *v)
   7.351          if ( l4e_get_flags(l4e) & _PAGE_PRESENT )
   7.352          {
   7.353              l3tab = map_domain_page(l4e_get_pfn(l4e));
   7.354 -#endif
   7.355 -            for ( i3 = 0; i3 < L1_PAGETABLE_ENTRIES; i3++ )
   7.356 +
   7.357 +            for ( i3 = 0; i3 < L3_PAGETABLE_ENTRIES; i3++ )
   7.358              {
   7.359 +
   7.360                  l3e = l3tab[l3_table_offset(va)];
   7.361                  if ( l3e_get_flags(l3e) & _PAGE_PRESENT )
   7.362                  {
   7.363 @@ -1483,15 +1544,19 @@ free_p2m_table(struct vcpu *v)
   7.364  
   7.365                      l2tab = map_domain_page(l3e_get_pfn(l3e));
   7.366  
   7.367 -                    for ( i2 = 0; i2 < L1_PAGETABLE_ENTRIES; i2++ )
   7.368 +                    for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
   7.369                      {
   7.370 +#endif
   7.371                          l2e = l2tab[l2_table_offset(va)];
   7.372                          if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
   7.373                          {
   7.374                              int i1;
   7.375  
   7.376                              l1tab = map_domain_page(l2e_get_pfn(l2e));
   7.377 -
   7.378 +                            
   7.379 +                            /*
   7.380 +                             * unsigned long phys_to_machine_mapping[]
   7.381 +                             */
   7.382                              for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++ )
   7.383                              {
   7.384                                  l1e = l1tab[l1_table_offset(va)];
   7.385 @@ -1499,26 +1564,28 @@ free_p2m_table(struct vcpu *v)
   7.386                                  if ( l1e_get_flags(l1e) & _PAGE_PRESENT )
   7.387                                      free_domheap_page(mfn_to_page(l1e_get_pfn(l1e)));
   7.388  
   7.389 -                                va += 1UL << L1_PAGETABLE_SHIFT;
   7.390 +                                va += PAGE_SIZE;
   7.391                              }
   7.392                              unmap_domain_page(l1tab);
   7.393                              free_domheap_page(mfn_to_page(l2e_get_pfn(l2e)));
   7.394                          }
   7.395                          else
   7.396 -                            va += 1UL << L2_PAGETABLE_SHIFT;
   7.397 +                            va += PAGE_SIZE * L1_PAGETABLE_ENTRIES;
   7.398 +
   7.399 +#if CONFIG_PAGING_LEVELS == 4                    
   7.400                      }
   7.401                      unmap_domain_page(l2tab);
   7.402                      free_domheap_page(mfn_to_page(l3e_get_pfn(l3e)));
   7.403                  }
   7.404                  else
   7.405 -                    va += 1UL << L3_PAGETABLE_SHIFT;
   7.406 +                    va += PAGE_SIZE * L1_PAGETABLE_ENTRIES * L2_PAGETABLE_ENTRIES;
   7.407              }
   7.408 -#if CONFIG_PAGING_LEVELS == 4
   7.409              unmap_domain_page(l3tab);
   7.410              free_domheap_page(mfn_to_page(l4e_get_pfn(l4e)));
   7.411          }
   7.412          else
   7.413 -            va += 1UL << L4_PAGETABLE_SHIFT;
   7.414 +            va += PAGE_SIZE * 
   7.415 +                L1_PAGETABLE_ENTRIES * L2_PAGETABLE_ENTRIES * L3_PAGETABLE_ENTRIES;
   7.416  #endif
   7.417      }
   7.418  
     8.1 --- a/xen/include/asm-x86/hvm/vmx/vmx.h	Mon Feb 06 18:02:36 2006 +0000
     8.2 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h	Mon Feb 06 23:25:31 2006 +0100
     8.3 @@ -174,10 +174,10 @@ extern unsigned int cpu_rev;
     8.4  #define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose register */
     8.5   
     8.6  /* These bits in the CR4 are owned by the host */
     8.7 -#ifdef __i386__
     8.8 +#if CONFIG_PAGING_LEVELS >= 3
     8.9 +#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE)
    8.10 +#else
    8.11  #define VMX_CR4_HOST_MASK (X86_CR4_VMXE)
    8.12 -#else
    8.13 -#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE)
    8.14  #endif
    8.15  
    8.16  #define VMCALL_OPCODE   ".byte 0x0f,0x01,0xc1\n"
     9.1 --- a/xen/include/asm-x86/shadow_64.h	Mon Feb 06 18:02:36 2006 +0000
     9.2 +++ b/xen/include/asm-x86/shadow_64.h	Mon Feb 06 23:25:31 2006 +0100
     9.3 @@ -29,7 +29,15 @@
     9.4  #include <asm/shadow.h>
     9.5  #include <asm/shadow_ops.h>
     9.6  
     9.7 -extern struct shadow_ops MODE_B_HANDLER;
     9.8 +/*
     9.9 + * The naming convention of the shadow_ops:
    9.10 + * MODE_<pgentry size>_<guest paging levels>_HANDLER
    9.11 + */
    9.12 +extern struct shadow_ops MODE_64_2_HANDLER;
    9.13 +extern struct shadow_ops MODE_64_3_HANDLER;
    9.14 +#if CONFIG_PAGING_LEVELS == 4
    9.15 +extern struct shadow_ops MODE_64_4_HANDLER;
    9.16 +#endif
    9.17  
    9.18  #if CONFIG_PAGING_LEVELS == 3
    9.19  #define L4_PAGETABLE_SHIFT      39
    9.20 @@ -118,7 +126,6 @@ static inline int  table_offset_64(unsig
    9.21  #endif
    9.22  #endif
    9.23          default:
    9.24 -            //printk("<table_offset_64> level %d is too big\n", level);
    9.25              return -1;
    9.26      }
    9.27  }
    9.28 @@ -142,7 +149,7 @@ static inline void free_out_of_sync_stat
    9.29  }
    9.30  
    9.31  static inline int __entry(
    9.32 -    struct vcpu *v, u64 va, pgentry_64_t *e_p, u32 flag)
    9.33 +    struct vcpu *v, unsigned long va, pgentry_64_t *e_p, u32 flag)
    9.34  {
    9.35      int i;
    9.36      pgentry_64_t *le_e;
    9.37 @@ -197,7 +204,7 @@ static inline int __entry(
    9.38  }
    9.39  
    9.40  static inline int __rw_entry(
    9.41 -    struct vcpu *v, u64 va, void *e_p, u32 flag)
    9.42 +    struct vcpu *v, unsigned long va, void *e_p, u32 flag)
    9.43  {
    9.44      pgentry_64_t *e = (pgentry_64_t *)e_p;
    9.45  
    9.46 @@ -235,7 +242,7 @@ static inline int __rw_entry(
    9.47    __rw_entry(v, va, gl3e, GUEST_ENTRY | GET_ENTRY | PAGING_L3)
    9.48  
    9.49  static inline int  __guest_set_l2e(
    9.50 -    struct vcpu *v, u64 va, void *value, int size)
    9.51 +    struct vcpu *v, unsigned long va, void *value, int size)
    9.52  {
    9.53      switch(size) {
    9.54          case 4:
    9.55 @@ -258,10 +265,10 @@ static inline int  __guest_set_l2e(
    9.56  }
    9.57  
    9.58  #define __guest_set_l2e(v, va, value) \
    9.59 -    __guest_set_l2e(v, (u64)va, value, sizeof(*value))
    9.60 +    __guest_set_l2e(v, (unsigned long)va, value, sizeof(*value))
    9.61  
    9.62  static inline int  __guest_get_l2e(
    9.63 -  struct vcpu *v, u64 va, void *gl2e, int size)
    9.64 +  struct vcpu *v, unsigned long va, void *gl2e, int size)
    9.65  {
    9.66      switch(size) {
    9.67          case 4:
    9.68 @@ -283,10 +290,10 @@ static inline int  __guest_get_l2e(
    9.69  }
    9.70  
    9.71  #define __guest_get_l2e(v, va, gl2e) \
    9.72 -    __guest_get_l2e(v, (u64)va, gl2e, sizeof(*gl2e))
    9.73 +    __guest_get_l2e(v, (unsigned long)va, gl2e, sizeof(*gl2e))
    9.74  
    9.75  static inline int  __guest_set_l1e(
    9.76 -  struct vcpu *v, u64 va, void *value, int size)
    9.77 +  struct vcpu *v, unsigned long va, void *value, int size)
    9.78  {
    9.79      switch(size) {
    9.80          case 4:
    9.81 @@ -322,10 +329,10 @@ static inline int  __guest_set_l1e(
    9.82  }
    9.83  
    9.84  #define __guest_set_l1e(v, va, value) \
    9.85 -     __guest_set_l1e(v, (u64)va, value, sizeof(*value))
    9.86 +     __guest_set_l1e(v, (unsigned long)va, value, sizeof(*value))
    9.87  
    9.88  static inline int  __guest_get_l1e(
    9.89 -  struct vcpu *v, u64 va, void *gl1e, int size)
    9.90 +  struct vcpu *v, unsigned long va, void *gl1e, int size)
    9.91  {
    9.92      switch(size) {
    9.93          case 4:
    9.94 @@ -362,7 +369,7 @@ static inline int  __guest_get_l1e(
    9.95  }
    9.96  
    9.97  #define __guest_get_l1e(v, va, gl1e) \
    9.98 -    __guest_get_l1e(v, (u64)va, gl1e, sizeof(*gl1e))
    9.99 +    __guest_get_l1e(v, (unsigned long)va, gl1e, sizeof(*gl1e))
   9.100  
   9.101  static inline void entry_general(
   9.102    struct domain *d,