direct-io.hg

changeset 11794:37ee88ca1440

[XEN] Don't keep shadows of PAE guest l3 tables.

Holding pages readonly that have guest PAE l3 tables in them means
a performance hit and potential bug if the guest puts other
datastructures on the same page as an l3 table. Instead of shadowing
them, treat PAE guests as if they had four CR3 registers, and load all
four top-level entries when we handle a CR3 write. This also cuts about
500 lines of special-case refcounting and re-copying code.

Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Tue Oct 17 11:11:48 2006 +0100 (2006-10-17)
parents 22885e4c1275
children a0d33cc09333
files xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/svm/vmcb.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/multi.h xen/arch/x86/mm/shadow/private.h xen/arch/x86/mm/shadow/types.h xen/include/asm-x86/domain.h xen/include/asm-x86/hvm/vcpu.h xen/include/asm-x86/mm.h xen/include/asm-x86/shadow.h
line diff
     1.1 --- a/xen/arch/x86/hvm/svm/svm.c	Tue Oct 17 11:07:11 2006 +0100
     1.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Tue Oct 17 11:11:48 2006 +0100
     1.3 @@ -1739,9 +1739,6 @@ static int mov_to_cr(int gpreg, int cr, 
     1.4              if (old_base_mfn)
     1.5                  put_page(mfn_to_page(old_base_mfn));
     1.6  
     1.7 -            /*
     1.8 -             * arch.shadow_table should now hold the next CR3 for shadow
     1.9 -             */
    1.10              v->arch.hvm_svm.cpu_cr3 = value;
    1.11              update_cr3(v);
    1.12              vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
    1.13 @@ -1788,10 +1785,6 @@ static int mov_to_cr(int gpreg, int cr, 
    1.14  
    1.15                  vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
    1.16  
    1.17 -                /*
    1.18 -                 * arch->shadow_table should hold the next CR3 for shadow
    1.19 -                 */
    1.20 -
    1.21                  HVM_DBG_LOG(DBG_LEVEL_VMMU, 
    1.22                              "Update CR3 value = %lx, mfn = %lx",
    1.23                              v->arch.hvm_svm.cpu_cr3, mfn);
    1.24 @@ -2355,7 +2348,7 @@ void svm_dump_regs(const char *from, str
    1.25  {
    1.26      struct vcpu *v = current;
    1.27      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    1.28 -    unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
    1.29 +    unsigned long pt = v->arch.hvm_vcpu.hw_cr3;
    1.30  
    1.31      printf("%s: guest registers from %s:\n", __func__, from);
    1.32  #if defined (__x86_64__)
    1.33 @@ -2681,11 +2674,11 @@ asmlinkage void svm_vmexit_handler(struc
    1.34          if (do_debug)
    1.35          {
    1.36              printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
    1.37 -                   "shadow_table = 0x%08x\n", 
    1.38 +                   "hw_cr3 = 0x%16lx\n", 
    1.39                     __func__,
    1.40                     (int) v->arch.guest_table.pfn,
    1.41                     (int) v->arch.monitor_table.pfn, 
    1.42 -                   (int) v->arch.shadow_table.pfn);
    1.43 +                   (long unsigned int) v->arch.hvm_vcpu.hw_cr3);
    1.44  
    1.45              svm_dump_vmcb(__func__, vmcb);
    1.46              svm_dump_regs(__func__, regs);
    1.47 @@ -2913,10 +2906,10 @@ asmlinkage void svm_vmexit_handler(struc
    1.48      if (do_debug) 
    1.49      {
    1.50          printk("vmexit_handler():- guest_table = 0x%08x, "
    1.51 -               "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
    1.52 +               "monitor_table = 0x%08x, hw_cr3 = 0x%16x\n",
    1.53                 (int)v->arch.guest_table.pfn,
    1.54                 (int)v->arch.monitor_table.pfn, 
    1.55 -               (int)v->arch.shadow_table.pfn);
    1.56 +               (int)v->arch.hvm_vcpu.hw_cr3);
    1.57          printk("svm_vmexit_handler: Returning\n");
    1.58      }
    1.59  #endif
     2.1 --- a/xen/arch/x86/hvm/svm/vmcb.c	Tue Oct 17 11:07:11 2006 +0100
     2.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c	Tue Oct 17 11:11:48 2006 +0100
     2.3 @@ -372,8 +372,8 @@ void svm_do_launch(struct vcpu *v)
     2.4      if (svm_dbg_on) 
     2.5      {
     2.6          unsigned long pt;
     2.7 -        pt = pagetable_get_paddr(v->arch.shadow_table);
     2.8 -        printk("%s: shadow_table = %lx\n", __func__, pt);
     2.9 +        printk("%s: hw_cr3 = %llx\n", __func__, 
    2.10 +               (unsigned long long) v->arch.hvm_vcpu.hw_cr3);
    2.11          pt = pagetable_get_paddr(v->arch.guest_table);
    2.12          printk("%s: guest_table  = %lx\n", __func__, pt);
    2.13          pt = pagetable_get_paddr(v->domain->arch.phys_table);
    2.14 @@ -387,8 +387,9 @@ void svm_do_launch(struct vcpu *v)
    2.15      {
    2.16          printk("%s: cr3 = %lx ", __func__, (unsigned long)vmcb->cr3);
    2.17          printk("init_guest_table: guest_table = 0x%08x, monitor_table = 0x%08x,"
    2.18 -                " shadow_table = 0x%08x\n", (int)v->arch.guest_table.pfn, 
    2.19 -                (int)v->arch.monitor_table.pfn, (int)v->arch.shadow_table.pfn);
    2.20 +                " hw_cr3 = 0x%16llx\n", (int)v->arch.guest_table.pfn, 
    2.21 +               (int)v->arch.monitor_table.pfn, 
    2.22 +               (unsigned long long) v->arch.hvm_vcpu.hw_cr3);
    2.23      }
    2.24  
    2.25      v->arch.schedule_tail = arch_svm_do_resume;
     3.1 --- a/xen/arch/x86/mm/shadow/common.c	Tue Oct 17 11:07:11 2006 +0100
     3.2 +++ b/xen/arch/x86/mm/shadow/common.c	Tue Oct 17 11:11:48 2006 +0100
     3.3 @@ -283,11 +283,8 @@ int
     3.4      if ( page->shadow_flags & SHF_L2H_PAE ) 
     3.5          result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 3, 3)
     3.6              (v, gmfn, entry, size);
     3.7 -    if ( page->shadow_flags & SHF_L3_PAE ) 
     3.8 -        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 3, 3)
     3.9 -            (v, gmfn, entry, size);
    3.10  #else /* 32-bit non-PAE hypervisor does not support PAE guests */
    3.11 -    ASSERT((page->shadow_flags & (SHF_L3_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
    3.12 +    ASSERT((page->shadow_flags & (SHF_L2H_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
    3.13  #endif
    3.14  
    3.15  #if CONFIG_PAGING_LEVELS >= 4 
    3.16 @@ -427,22 +424,16 @@ shadow_validate_guest_pt_write(struct vc
    3.17  /* Allocating shadow pages
    3.18   * -----------------------
    3.19   *
    3.20 - * Most shadow pages are allocated singly, but there are two cases where we 
    3.21 - * need to allocate multiple pages together.
    3.22 - * 
    3.23 - * 1: Shadowing 32-bit guest tables on PAE or 64-bit shadows.
    3.24 - *    A 32-bit guest l1 table covers 4MB of virtuial address space,
    3.25 - *    and needs to be shadowed by two PAE/64-bit l1 tables (covering 2MB
    3.26 - *    of virtual address space each).  Similarly, a 32-bit guest l2 table 
    3.27 - *    (4GB va) needs to be shadowed by four PAE/64-bit l2 tables (1GB va 
    3.28 - *    each).  These multi-page shadows are contiguous and aligned; 
    3.29 - *    functions for handling offsets into them are defined in shadow.c 
    3.30 - *    (shadow_l1_index() etc.)
    3.31 + * Most shadow pages are allocated singly, but there is one case where
    3.32 + * we need to allocate multiple pages together: shadowing 32-bit guest
    3.33 + * tables on PAE or 64-bit shadows.  A 32-bit guest l1 table covers 4MB
    3.34 + * of virtuial address space, and needs to be shadowed by two PAE/64-bit
    3.35 + * l1 tables (covering 2MB of virtual address space each).  Similarly, a
    3.36 + * 32-bit guest l2 table (4GB va) needs to be shadowed by four
    3.37 + * PAE/64-bit l2 tables (1GB va each).  These multi-page shadows are
    3.38 + * contiguous and aligned; functions for handling offsets into them are
    3.39 + * defined in shadow.c (shadow_l1_index() etc.)
    3.40   *    
    3.41 - * 2: Shadowing PAE top-level pages.  Each guest page that contains
    3.42 - *    any PAE top-level pages requires two shadow pages to shadow it.
    3.43 - *    They contain alternating l3 tables and pae_l3_bookkeeping structs.
    3.44 - *
    3.45   * This table shows the allocation behaviour of the different modes:
    3.46   *
    3.47   * Xen paging      32b  pae  pae  64b  64b  64b
    3.48 @@ -452,7 +443,7 @@ shadow_validate_guest_pt_write(struct vc
    3.49   *
    3.50   * sl1 size         4k   8k   4k   8k   4k   4k
    3.51   * sl2 size         4k  16k   4k  16k   4k   4k
    3.52 - * sl3 size         -    -    8k   -    8k   4k
    3.53 + * sl3 size         -    -    -    -    -    4k
    3.54   * sl4 size         -    -    -    -    -    4k
    3.55   *
    3.56   * We allocate memory from xen in four-page units and break them down
    3.57 @@ -506,7 +497,6 @@ shadow_order(u32 shadow_type)
    3.58          0, /* PGC_SH_fl1_pae_shadow */
    3.59          0, /* PGC_SH_l2_pae_shadow  */
    3.60          0, /* PGC_SH_l2h_pae_shadow */
    3.61 -        1, /* PGC_SH_l3_pae_shadow  */
    3.62          0, /* PGC_SH_l1_64_shadow   */
    3.63          0, /* PGC_SH_fl1_64_shadow  */
    3.64          0, /* PGC_SH_l2_64_shadow   */
    3.65 @@ -549,7 +539,8 @@ void shadow_unhook_mappings(struct vcpu 
    3.66  #endif
    3.67          break;
    3.68  #if CONFIG_PAGING_LEVELS >= 3
    3.69 -    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
    3.70 +    case PGC_SH_l2_pae_shadow >> PGC_SH_type_shift:
    3.71 +    case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift:
    3.72          SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings,3,3)(v,smfn);
    3.73          break;
    3.74  #endif
    3.75 @@ -590,18 +581,8 @@ void shadow_prealloc(struct domain *d, u
    3.76          pg = list_entry(l, struct page_info, list);
    3.77          smfn = page_to_mfn(pg);
    3.78  
    3.79 -#if CONFIG_PAGING_LEVELS >= 3
    3.80 -        if ( (pg->count_info & PGC_SH_type_mask) == PGC_SH_l3_pae_shadow )
    3.81 -        {
    3.82 -            /* For PAE, we need to unpin each subshadow on this shadow */
    3.83 -            SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn);
    3.84 -        } 
    3.85 -        else 
    3.86 -#endif /* 32-bit code always takes this branch */
    3.87 -        {
    3.88 -            /* Unpin this top-level shadow */
    3.89 -            sh_unpin(v, smfn);
    3.90 -        }
    3.91 +        /* Unpin this top-level shadow */
    3.92 +        sh_unpin(v, smfn);
    3.93  
    3.94          /* See if that freed up a chunk of appropriate size */
    3.95          if ( chunk_is_available(d, order) ) return;
    3.96 @@ -623,8 +604,12 @@ void shadow_prealloc(struct domain *d, u
    3.97          shadow_unhook_mappings(v, smfn);
    3.98  
    3.99          /* Need to flush TLB if we've altered our own tables */
   3.100 -        if ( !shadow_mode_external(d) 
   3.101 -             && pagetable_get_pfn(current->arch.shadow_table) == mfn_x(smfn) )
   3.102 +        if ( !shadow_mode_external(d) &&
   3.103 +             (pagetable_get_pfn(current->arch.shadow_table[0]) == mfn_x(smfn)
   3.104 +              || pagetable_get_pfn(current->arch.shadow_table[1]) == mfn_x(smfn)
   3.105 +              || pagetable_get_pfn(current->arch.shadow_table[2]) == mfn_x(smfn)
   3.106 +              || pagetable_get_pfn(current->arch.shadow_table[3]) == mfn_x(smfn)
   3.107 +                 ) )
   3.108              local_flush_tlb();
   3.109          
   3.110          /* See if that freed up a chunk of appropriate size */
   3.111 @@ -923,9 +908,20 @@ p2m_next_level(struct domain *d, mfn_t *
   3.112  #if CONFIG_PAGING_LEVELS == 3
   3.113          if (type == PGT_l2_page_table)
   3.114          {
   3.115 +            struct vcpu *v;
   3.116              /* We have written to the p2m l3: need to sync the per-vcpu
   3.117               * copies of it in the monitor tables */
   3.118              p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry);
   3.119 +            /* Also, any vcpus running on shadows of the p2m need to 
   3.120 +             * reload their CR3s so the change propagates to the shadow */
   3.121 +            ASSERT(shadow_lock_is_acquired(d));
   3.122 +            for_each_vcpu(d, v) 
   3.123 +            {
   3.124 +                if ( pagetable_get_pfn(v->arch.guest_table) 
   3.125 +                     == pagetable_get_pfn(d->arch.phys_table) 
   3.126 +                     && v->arch.shadow.mode != NULL )
   3.127 +                    v->arch.shadow.mode->update_cr3(v);
   3.128 +            }
   3.129          }
   3.130  #endif
   3.131          /* The P2M can be shadowed: keep the shadows synced */
   3.132 @@ -1714,9 +1710,6 @@ void sh_destroy_shadow(struct vcpu *v, m
   3.133      case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift:
   3.134          SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 3)(v, smfn);
   3.135          break;
   3.136 -    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
   3.137 -        SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 3, 3)(v, smfn);
   3.138 -        break;
   3.139  #endif
   3.140  
   3.141  #if CONFIG_PAGING_LEVELS >= 4
   3.142 @@ -1771,7 +1764,6 @@ int shadow_remove_write_access(struct vc
   3.143  #endif
   3.144          NULL, /* l2_pae  */
   3.145          NULL, /* l2h_pae */
   3.146 -        NULL, /* l3_pae  */
   3.147  #if CONFIG_PAGING_LEVELS >= 4
   3.148          SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64   */
   3.149          SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64  */
   3.150 @@ -1935,7 +1927,6 @@ int shadow_remove_all_mappings(struct vc
   3.151  #endif
   3.152          NULL, /* l2_pae  */
   3.153          NULL, /* l2h_pae */
   3.154 -        NULL, /* l3_pae  */
   3.155  #if CONFIG_PAGING_LEVELS >= 4
   3.156          SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64   */
   3.157          SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64  */
   3.158 @@ -2008,7 +1999,8 @@ static int sh_remove_shadow_via_pointer(
   3.159      ASSERT((pg->count_info & PGC_SH_type_mask) > 0);
   3.160      ASSERT((pg->count_info & PGC_SH_type_mask) < PGC_SH_max_shadow);
   3.161      ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_32_shadow);
   3.162 -    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l3_pae_shadow);
   3.163 +    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_pae_shadow);
   3.164 +    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2h_pae_shadow);
   3.165      ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l4_64_shadow);
   3.166      
   3.167      if (pg->up == 0) return 0;
   3.168 @@ -2037,7 +2029,6 @@ static int sh_remove_shadow_via_pointer(
   3.169      case PGC_SH_l1_pae_shadow:
   3.170      case PGC_SH_l2_pae_shadow:
   3.171      case PGC_SH_l2h_pae_shadow:
   3.172 -    case PGC_SH_l3_pae_shadow:
   3.173          SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,3)(v, vaddr, pmfn);
   3.174          break;
   3.175  #if CONFIG_PAGING_LEVELS >= 4
   3.176 @@ -2091,11 +2082,9 @@ void sh_remove_shadows(struct vcpu *v, m
   3.177  #if CONFIG_PAGING_LEVELS >= 3
   3.178          SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2_pae  */
   3.179          SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2h_pae */
   3.180 -        SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,3,3), /* l3_pae  */
   3.181  #else 
   3.182          NULL, /* l2_pae  */
   3.183          NULL, /* l2h_pae */
   3.184 -        NULL, /* l3_pae  */
   3.185  #endif
   3.186          NULL, /* l1_64   */
   3.187          NULL, /* fl1_64  */
   3.188 @@ -2121,9 +2110,8 @@ void sh_remove_shadows(struct vcpu *v, m
   3.189          ((1 << (PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift))
   3.190           | (1 << (PGC_SH_l2_pae_shadow >> PGC_SH_type_shift))), /* l1_pae  */
   3.191          0, /* fl1_pae */
   3.192 -        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2_pae  */
   3.193 -        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2h_pae  */
   3.194 -        0, /* l3_pae  */
   3.195 +        0, /* l2_pae  */
   3.196 +        0, /* l2h_pae  */
   3.197          1 << (PGC_SH_l2_64_shadow >> PGC_SH_type_shift), /* l1_64   */
   3.198          0, /* fl1_64  */
   3.199          1 << (PGC_SH_l3_64_shadow >> PGC_SH_type_shift), /* l2_64   */
   3.200 @@ -2166,17 +2154,14 @@ void sh_remove_shadows(struct vcpu *v, m
   3.201      smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);                       \
   3.202      if ( mfn_to_page(smfn)->count_info & PGC_SH_pinned )                \
   3.203          sh_unpin(v, smfn);                                              \
   3.204 -    if ( (_type) == PGC_SH_l3_pae_shadow )                              \
   3.205 -        SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn);  \
   3.206  } while (0)
   3.207  
   3.208      if ( sh_flags & SHF_L1_32 )   DO_UNSHADOW(PGC_SH_l1_32_shadow);
   3.209      if ( sh_flags & SHF_L2_32 )   DO_UNPIN(PGC_SH_l2_32_shadow);
   3.210  #if CONFIG_PAGING_LEVELS >= 3
   3.211      if ( sh_flags & SHF_L1_PAE )  DO_UNSHADOW(PGC_SH_l1_pae_shadow);
   3.212 -    if ( sh_flags & SHF_L2_PAE )  DO_UNSHADOW(PGC_SH_l2_pae_shadow);
   3.213 -    if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(PGC_SH_l2h_pae_shadow);
   3.214 -    if ( sh_flags & SHF_L3_PAE )  DO_UNPIN(PGC_SH_l3_pae_shadow);
   3.215 +    if ( sh_flags & SHF_L2_PAE )  DO_UNPIN(PGC_SH_l2_pae_shadow);
   3.216 +    if ( sh_flags & SHF_L2H_PAE ) DO_UNPIN(PGC_SH_l2h_pae_shadow);
   3.217  #if CONFIG_PAGING_LEVELS >= 4
   3.218      if ( sh_flags & SHF_L1_64 )   DO_UNSHADOW(PGC_SH_l1_64_shadow);
   3.219      if ( sh_flags & SHF_L2_64 )   DO_UNSHADOW(PGC_SH_l2_64_shadow);
   3.220 @@ -2188,14 +2173,6 @@ void sh_remove_shadows(struct vcpu *v, m
   3.221  #undef DO_UNSHADOW
   3.222  #undef DO_UNPIN
   3.223  
   3.224 -
   3.225 -#if CONFIG_PAGING_LEVELS > 2
   3.226 -    /* We may have caused some PAE l3 entries to change: need to 
   3.227 -     * fix up the copies of them in various places */
   3.228 -    if ( sh_flags & (SHF_L2_PAE|SHF_L2H_PAE) )
   3.229 -        sh_pae_recopy(v->domain);
   3.230 -#endif
   3.231 -
   3.232      /* If that didn't catch the shadows, something is wrong */
   3.233      if ( !fast && (pg->count_info & PGC_page_table) )
   3.234      {
   3.235 @@ -3127,7 +3104,6 @@ void shadow_audit_tables(struct vcpu *v)
   3.236          SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,3), /* fl1_pae */
   3.237          SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2_pae  */
   3.238          SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2h_pae */
   3.239 -        SHADOW_INTERNAL_NAME(sh_audit_l3_table,3,3),  /* l3_pae  */
   3.240  #if CONFIG_PAGING_LEVELS >= 4
   3.241          SHADOW_INTERNAL_NAME(sh_audit_l1_table,4,4),  /* l1_64   */
   3.242          SHADOW_INTERNAL_NAME(sh_audit_fl1_table,4,4), /* fl1_64  */
   3.243 @@ -3152,7 +3128,7 @@ void shadow_audit_tables(struct vcpu *v)
   3.244          {
   3.245          case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break;
   3.246          case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE
   3.247 -                        |SHF_L2H_PAE|SHF_L3_PAE); break;
   3.248 +                        |SHF_L2H_PAE); break;
   3.249          case 4: mask = (SHF_L1_64|SHF_FL1_64|SHF_L2_64  
   3.250                          |SHF_L3_64|SHF_L4_64); break;
   3.251          default: BUG();
     4.1 --- a/xen/arch/x86/mm/shadow/multi.c	Tue Oct 17 11:07:11 2006 +0100
     4.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Tue Oct 17 11:11:48 2006 +0100
     4.3 @@ -21,20 +21,6 @@
     4.4   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     4.5   */
     4.6  
     4.7 -// DESIGN QUESTIONS:
     4.8 -// Why use subshadows for PAE guests?
     4.9 -// - reduces pressure in the hash table
    4.10 -// - reduces shadow size (64-vs-4096 bytes of shadow for 32 bytes of guest L3)
    4.11 -// - would need to find space in the page_info to store 7 more bits of
    4.12 -//   backpointer
    4.13 -// - independent shadows of 32 byte chunks makes it non-obvious how to quickly
    4.14 -//   figure out when to demote the guest page from l3 status
    4.15 -//
    4.16 -// PAE Xen HVM guests are restricted to 8GB of pseudo-physical address space.
    4.17 -// - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address
    4.18 -//   space for both PV and HVM guests.
    4.19 -//
    4.20 -
    4.21  #include <xen/config.h>
    4.22  #include <xen/types.h>
    4.23  #include <xen/mm.h>
    4.24 @@ -118,9 +104,6 @@ static char *fetch_type_names[] = {
    4.25  #endif
    4.26  
    4.27  /* XXX forward declarations */
    4.28 -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
    4.29 -static unsigned long hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res);
    4.30 -#endif
    4.31  static inline void sh_update_linear_entries(struct vcpu *v);
    4.32  
    4.33  /**************************************************************************/
    4.34 @@ -129,8 +112,6 @@ static inline void sh_update_linear_entr
    4.35   * Normal case: maps the mfn of a guest page to the mfn of its shadow page.
    4.36   * FL1's:       maps the *gfn* of the start of a superpage to the mfn of a
    4.37   *              shadow L1 which maps its "splinters".
    4.38 - * PAE CR3s:    maps the 32-byte aligned, 32-bit CR3 value to the mfn of the
    4.39 - *              PAE L3 info page for that CR3 value.
    4.40   */
    4.41  
    4.42  static inline mfn_t 
    4.43 @@ -429,18 +410,16 @@ static void sh_audit_gw(struct vcpu *v, 
    4.44      if ( !(SHADOW_AUDIT_ENABLE) )
    4.45          return;
    4.46  
    4.47 -#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
    4.48  #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
    4.49      if ( valid_mfn(gw->l4mfn)
    4.50           && valid_mfn((smfn = get_shadow_status(v, gw->l4mfn, 
    4.51                                                  PGC_SH_l4_shadow))) )
    4.52          (void) sh_audit_l4_table(v, smfn, _mfn(INVALID_MFN));
    4.53 -#endif /* PAE or 64... */
    4.54      if ( valid_mfn(gw->l3mfn)
    4.55           && valid_mfn((smfn = get_shadow_status(v, gw->l3mfn, 
    4.56                                                  PGC_SH_l3_shadow))) )
    4.57          (void) sh_audit_l3_table(v, smfn, _mfn(INVALID_MFN));
    4.58 -#endif /* All levels... */
    4.59 +#endif /* PAE or 64... */
    4.60      if ( valid_mfn(gw->l2mfn) )
    4.61      {
    4.62          if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, 
    4.63 @@ -498,8 +477,7 @@ static u32 guest_set_ad_bits(struct vcpu
    4.64      flags = guest_l1e_get_flags(*ep);
    4.65  
    4.66      /* PAE l3s do not have A and D bits */
    4.67 -    if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) )
    4.68 -        return flags;
    4.69 +    ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
    4.70  
    4.71      /* Need the D bit as well for writes, in L1es and PSE L2es. */
    4.72      if ( ft == ft_demand_write  
    4.73 @@ -646,38 +624,14 @@ shadow_l2_index(mfn_t *smfn, u32 guest_i
    4.74  #endif
    4.75  }
    4.76  
    4.77 -#if GUEST_PAGING_LEVELS >= 3
    4.78 +#if GUEST_PAGING_LEVELS >= 4
    4.79  
    4.80  static inline u32
    4.81  shadow_l3_index(mfn_t *smfn, u32 guest_index)
    4.82  {
    4.83 -#if GUEST_PAGING_LEVELS == 3
    4.84 -    u32 group_id;
    4.85 -
    4.86 -    // Because we use twice the space in L3 shadows as was consumed in guest
    4.87 -    // L3s, the number of guest entries per shadow page is
    4.88 -    // SHADOW_L2_PAGETABLE_ENTRIES/2.  (Note this is *not*
    4.89 -    // SHADOW_L3_PAGETABLE_ENTRIES, which in this case is 4...)
    4.90 -    //
    4.91 -    *smfn = _mfn(mfn_x(*smfn) +
    4.92 -                 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
    4.93 -
    4.94 -    // We store PAE L3 shadows in groups of 4, alternating shadows and
    4.95 -    // pae_l3_bookkeeping structs.  So the effective shadow index is
    4.96 -    // the the group_id * 8 + the offset within the group.
    4.97 -    //
    4.98 -    guest_index %= (SHADOW_L2_PAGETABLE_ENTRIES / 2);
    4.99 -    group_id = guest_index / 4;
   4.100 -    return (group_id * 8) + (guest_index % 4);
   4.101 -#else
   4.102      return guest_index;
   4.103 -#endif
   4.104  }
   4.105  
   4.106 -#endif // GUEST_PAGING_LEVELS >= 3
   4.107 -
   4.108 -#if GUEST_PAGING_LEVELS >= 4
   4.109 -
   4.110  static inline u32
   4.111  shadow_l4_index(mfn_t *smfn, u32 guest_index)
   4.112  {
   4.113 @@ -722,6 +676,9 @@ do {                                    
   4.114      u32 pass_thru_flags;
   4.115      u32 sflags;
   4.116  
   4.117 +    /* We don't shadow PAE l3s */
   4.118 +    ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
   4.119 +
   4.120      // XXX -- might want to think about PAT support for HVM guests...
   4.121  
   4.122  #ifndef NDEBUG
   4.123 @@ -757,29 +714,16 @@ do {                                    
   4.124      if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
   4.125          gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
   4.126      
   4.127 -    // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's...
   4.128 -    //
   4.129 -    if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) )
   4.130 -        pass_thru_flags = _PAGE_PRESENT;
   4.131 -    else
   4.132 -    {
   4.133 -        pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
   4.134 -                           _PAGE_RW | _PAGE_PRESENT);
   4.135 -        if ( guest_supports_nx(v) )
   4.136 -            pass_thru_flags |= _PAGE_NX_BIT;
   4.137 -    }
   4.138 -
   4.139 -    // PAE guests can not put NX, RW, USER, ACCESSED, or DIRTY bits into their
   4.140 -    // L3e's; they are all implied.  So we emulate them here.
   4.141 -    //
   4.142 -    if ( (GUEST_PAGING_LEVELS == 3) && (level == 3) )
   4.143 -        gflags = pass_thru_flags;
   4.144  
   4.145      // Propagate bits from the guest to the shadow.
   4.146      // Some of these may be overwritten, below.
   4.147      // Since we know the guest's PRESENT bit is set, we also set the shadow's
   4.148      // SHADOW_PRESENT bit.
   4.149      //
   4.150 +    pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
   4.151 +                       _PAGE_RW | _PAGE_PRESENT);
   4.152 +    if ( guest_supports_nx(v) )
   4.153 +        pass_thru_flags |= _PAGE_NX_BIT;
   4.154      sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT;
   4.155  
   4.156      // Copy the guest's RW bit into the SHADOW_RW bit.
   4.157 @@ -800,8 +744,7 @@ do {                                    
   4.158      // If the A or D bit has not yet been set in the guest, then we must
   4.159      // prevent the corresponding kind of access.
   4.160      //
   4.161 -    if ( unlikely(!((GUEST_PAGING_LEVELS == 3) && (level == 3)) &&
   4.162 -                  !(gflags & _PAGE_ACCESSED)) )
   4.163 +    if ( unlikely(!(gflags & _PAGE_ACCESSED)) )
   4.164          sflags &= ~_PAGE_PRESENT;
   4.165  
   4.166      /* D bits exist in L1es and PSE L2es */
   4.167 @@ -890,9 +833,7 @@ l4e_propagate_from_guest(struct vcpu *v,
   4.168                    fetch_type_names[ft], gl4e->l4, sl4p->l4);
   4.169      ASSERT(sflags != -1);
   4.170  }
   4.171 -#endif // GUEST_PAGING_LEVELS >= 4
   4.172 -
   4.173 -#if GUEST_PAGING_LEVELS >= 3
   4.174 +
   4.175  static void
   4.176  l3e_propagate_from_guest(struct vcpu *v,
   4.177                           guest_l3e_t *gl3e,
   4.178 @@ -912,7 +853,7 @@ l3e_propagate_from_guest(struct vcpu *v,
   4.179                    fetch_type_names[ft], gl3e->l3, sl3p->l3);
   4.180      ASSERT(sflags != -1);
   4.181  }
   4.182 -#endif // GUEST_PAGING_LEVELS >= 3
   4.183 +#endif // GUEST_PAGING_LEVELS >= 4
   4.184  
   4.185  static void
   4.186  l2e_propagate_from_guest(struct vcpu *v, 
   4.187 @@ -1081,9 +1022,6 @@ shadow_write_entries(void *d, void *s, i
   4.188          safe_write_entry(dst++, src++);
   4.189  
   4.190      if ( map != NULL ) sh_unmap_domain_page(map);
   4.191 -
   4.192 -    /* XXX TODO:
   4.193 -     * Update min/max field in page_info struct of this mfn */
   4.194  }
   4.195  
   4.196  static inline int
   4.197 @@ -1195,9 +1133,7 @@ static int shadow_set_l4e(struct vcpu *v
   4.198      }
   4.199      return flags;
   4.200  }
   4.201 -#endif /* GUEST_PAGING_LEVELS >= 4 */
   4.202 -
   4.203 -#if GUEST_PAGING_LEVELS >= 3
   4.204 +
   4.205  static int shadow_set_l3e(struct vcpu *v, 
   4.206                            shadow_l3e_t *sl3e, 
   4.207                            shadow_l3e_t new_sl3e, 
   4.208 @@ -1224,28 +1160,6 @@ static int shadow_set_l3e(struct vcpu *v
   4.209      shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
   4.210      flags |= SHADOW_SET_CHANGED;
   4.211  
   4.212 -#if GUEST_PAGING_LEVELS == 3 
   4.213 -    /* We wrote a guest l3e in a PAE pagetable.  This table is copied in
   4.214 -     * the linear pagetable entries of its l2s, and may also be copied
   4.215 -     * to a low memory location to make it fit in CR3.  Report that we
   4.216 -     * need to resync those copies (we can't wait for the guest to flush
   4.217 -     * the TLB because it might be an increase in rights). */
   4.218 -    {
   4.219 -        struct vcpu *vcpu;
   4.220 -
   4.221 -        struct pae_l3_bookkeeping *info = sl3p_to_info(sl3e);
   4.222 -        for_each_vcpu(v->domain, vcpu)
   4.223 -        {
   4.224 -            if (info->vcpus & (1 << vcpu->vcpu_id))
   4.225 -            {
   4.226 -                // Remember that this flip/update needs to occur.
   4.227 -                vcpu->arch.shadow.pae_flip_pending = 1;
   4.228 -                flags |= SHADOW_SET_L3PAE_RECOPY;
   4.229 -            }
   4.230 -        }
   4.231 -    }
   4.232 -#endif
   4.233 -
   4.234      if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT ) 
   4.235      {
   4.236          /* We lost a reference to an old mfn. */
   4.237 @@ -1260,7 +1174,7 @@ static int shadow_set_l3e(struct vcpu *v
   4.238      }
   4.239      return flags;
   4.240  }
   4.241 -#endif /* GUEST_PAGING_LEVELS >= 3 */ 
   4.242 +#endif /* GUEST_PAGING_LEVELS >= 4 */ 
   4.243  
   4.244  static int shadow_set_l2e(struct vcpu *v, 
   4.245                            shadow_l2e_t *sl2e, 
   4.246 @@ -1535,51 +1449,7 @@ do {                                    
   4.247  
   4.248  #endif /* different kinds of l2 */
   4.249  
   4.250 -#if GUEST_PAGING_LEVELS == 3
   4.251 -
   4.252 -/* PAE l3 subshadow: touch all entries (FOREACH_L2E will find Xen l2es). */
   4.253 -#define SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, _done, _code)             \
   4.254 -do {                                                                    \
   4.255 -    int _i;                                                             \
   4.256 -    for ( _i = 0; _i < 4; _i++ )                                        \
   4.257 -    {                                                                   \
   4.258 -        if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT )           \
   4.259 -            {_code}                                                     \
   4.260 -        if ( _done ) break;                                             \
   4.261 -        _sl3e++;                                                        \
   4.262 -        increment_ptr_to_guest_entry(_gl3p);                            \
   4.263 -    }                                                                   \
   4.264 -} while (0)
   4.265 -
   4.266 -/* PAE l3 full shadow: call subshadow walk on all valid l3 subshadows */
   4.267 -#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
   4.268 -do {                                                                    \
   4.269 -    int _i, _j, _k, __done = 0;                                         \
   4.270 -    ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask)       \
   4.271 -           == PGC_SH_l3_pae_shadow);                                   \
   4.272 -    /* The subshadows are split, 64 on each page of the shadow */       \
   4.273 -    for ( _j = 0; _j < 2 && !__done; _j++ )                             \
   4.274 -    {                                                                   \
   4.275 -        void *_sp = sh_map_domain_page(_sl3mfn);                       \
   4.276 -        for ( _i = 0; _i < 64; _i++ )                                   \
   4.277 -        {                                                               \
   4.278 -            /* Every second 32-byte region is a bookkeeping entry */    \
   4.279 -            _sl3e = (shadow_l3e_t *)(_sp + (64 * _i));                  \
   4.280 -            if ( (sl3p_to_info(_sl3e))->refcount > 0 )                  \
   4.281 -                SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p,                   \
   4.282 -                                        ({ __done = (_done); __done; }), \
   4.283 -                                        _code);                         \
   4.284 -            else                                                        \
   4.285 -                for ( _k = 0 ; _k < 4 ; _k++ )                          \
   4.286 -                    increment_ptr_to_guest_entry(_gl3p);                \
   4.287 -            if ( __done ) break;                                        \
   4.288 -        }                                                               \
   4.289 -        sh_unmap_domain_page(_sp);                                     \
   4.290 -        _sl3mfn = _mfn(mfn_x(_sl3mfn) + 1);                             \
   4.291 -    }                                                                   \
   4.292 -} while (0)
   4.293 -
   4.294 -#elif GUEST_PAGING_LEVELS == 4
   4.295 +#if GUEST_PAGING_LEVELS == 4
   4.296  
   4.297  /* 64-bit l3: touch all entries */
   4.298  #define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
   4.299 @@ -1711,8 +1581,8 @@ void sh_install_xen_entries_in_l2h(struc
   4.300      
   4.301      /* We don't set up a linear mapping here because we can't until this
   4.302       * l2h is installed in an l3e.  sh_update_linear_entries() handles
   4.303 -     * the linear mappings when the l3 is loaded.  We zero them here, just as
   4.304 -     * a safety measure.
   4.305 +     * the linear mappings when CR3 (and so the fourth l3e) is loaded.  
   4.306 +     * We zero them here, just as a safety measure.
   4.307       */
   4.308      for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
   4.309          sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START) + i] =
   4.310 @@ -1740,37 +1610,6 @@ void sh_install_xen_entries_in_l2h(struc
   4.311      
   4.312      sh_unmap_domain_page(sl2e);
   4.313  }
   4.314 -
   4.315 -void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn)
   4.316 -{
   4.317 -    shadow_l3e_t *sl3e;
   4.318 -    guest_l3e_t *gl3e = v->arch.guest_vtable;
   4.319 -    shadow_l3e_t new_sl3e;
   4.320 -    gfn_t l2gfn;
   4.321 -    mfn_t l2gmfn, l2smfn;
   4.322 -    int r;
   4.323 -
   4.324 -    ASSERT(!shadow_mode_external(v->domain));
   4.325 -    ASSERT(guest_l3e_get_flags(gl3e[3]) & _PAGE_PRESENT);
   4.326 -    l2gfn = guest_l3e_get_gfn(gl3e[3]);
   4.327 -    l2gmfn = sh_gfn_to_mfn(v->domain, gfn_x(l2gfn));
   4.328 -    l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow);
   4.329 -    if ( !valid_mfn(l2smfn) )
   4.330 -    {
   4.331 -        /* must remove write access to this page before shadowing it */
   4.332 -        // XXX -- should check to see whether this is better with level==0 or
   4.333 -        // level==2...
   4.334 -        if ( shadow_remove_write_access(v, l2gmfn, 2, 0xc0000000ul) != 0 )
   4.335 -            flush_tlb_mask(v->domain->domain_dirty_cpumask);
   4.336 - 
   4.337 -        l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow);
   4.338 -    }
   4.339 -    l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e,
   4.340 -                             ft_prefetch);
   4.341 -    sl3e = sh_map_domain_page(sl3mfn);
   4.342 -    r = shadow_set_l3e(v, &sl3e[3], new_sl3e, sl3mfn);
   4.343 -    sh_unmap_domain_page(sl3e);
   4.344 -}
   4.345  #endif
   4.346  
   4.347  
   4.348 @@ -1827,8 +1666,6 @@ void sh_install_xen_entries_in_l2(struct
   4.349  
   4.350  
   4.351  
   4.352 -
   4.353 -
   4.354  /**************************************************************************/
   4.355  /* Create a shadow of a given guest page.
   4.356   */
   4.357 @@ -1839,7 +1676,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
   4.358      SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n",
   4.359                    mfn_x(gmfn), shadow_type, mfn_x(smfn));
   4.360  
   4.361 -    if ( shadow_type != PGC_SH_guest_root_type )
   4.362 +    if ( shadow_type != PGC_SH_l2_32_shadow 
   4.363 +         && shadow_type != PGC_SH_l2_pae_shadow 
   4.364 +         && shadow_type != PGC_SH_l2h_pae_shadow 
   4.365 +         && shadow_type != PGC_SH_l4_64_shadow )
   4.366          /* Lower-level shadow, not yet linked form a higher level */
   4.367          mfn_to_page(smfn)->up = 0;
   4.368  
   4.369 @@ -1853,8 +1693,6 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
   4.370              sh_install_xen_entries_in_l4(v, gmfn, smfn); break;
   4.371  #endif
   4.372  #if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
   4.373 -        case PGC_SH_l3_shadow:
   4.374 -            sh_install_xen_entries_in_l3(v, gmfn, smfn); break;
   4.375          case PGC_SH_l2h_shadow:
   4.376              sh_install_xen_entries_in_l2h(v, smfn); break;
   4.377  #endif
   4.378 @@ -1988,20 +1826,16 @@ static shadow_l4e_t * shadow_get_and_cre
   4.379                                                  mfn_t *sl4mfn)
   4.380  {
   4.381      /* There is always a shadow of the top level table.  Get it. */
   4.382 -    *sl4mfn = pagetable_get_mfn(v->arch.shadow_table);
   4.383 +    *sl4mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
   4.384      /* Reading the top level table is always valid. */
   4.385      return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va);
   4.386  }
   4.387 -#endif /* GUEST_PAGING_LEVELS >= 4 */
   4.388 -
   4.389 -
   4.390 -#if GUEST_PAGING_LEVELS >= 3
   4.391 +
   4.392  static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v, 
   4.393                                                  walk_t *gw, 
   4.394                                                  mfn_t *sl3mfn,
   4.395                                                  fetch_type_t ft)
   4.396  {
   4.397 -#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
   4.398      mfn_t sl4mfn;
   4.399      shadow_l4e_t *sl4e;
   4.400      if ( !valid_mfn(gw->l3mfn) ) return NULL; /* No guest page. */
   4.401 @@ -2032,19 +1866,8 @@ static shadow_l3e_t * shadow_get_and_cre
   4.402      }
   4.403      /* Now follow it down a level.  Guaranteed to succeed. */
   4.404      return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
   4.405 -#else /* PAE... */
   4.406 -    /* There is always a shadow of the top level table.  Get it. */
   4.407 -    *sl3mfn = pagetable_get_mfn(v->arch.shadow_table);
   4.408 -    /* This next line is important: the shadow l3 table is in an 8k
   4.409 -     * shadow and we need to return the right mfn of the pair. This call
   4.410 -     * will set it for us as a side-effect. */
   4.411 -    (void) shadow_l3_index(sl3mfn, guest_index(gw->l3e));
   4.412 -    ASSERT(v->arch.shadow_vtable);
   4.413 -    return ((shadow_l3e_t *)v->arch.shadow_vtable) 
   4.414 -        + shadow_l3_table_offset(gw->va);
   4.415 +}
   4.416  #endif /* GUEST_PAGING_LEVELS >= 4 */
   4.417 -}
   4.418 -#endif /* GUEST_PAGING_LEVELS >= 3 */
   4.419  
   4.420  
   4.421  static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v, 
   4.422 @@ -2052,7 +1875,7 @@ static shadow_l2e_t * shadow_get_and_cre
   4.423                                                  mfn_t *sl2mfn,
   4.424                                                  fetch_type_t ft)
   4.425  {
   4.426 -#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64bit... */
   4.427 +#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
   4.428      mfn_t sl3mfn = _mfn(INVALID_MFN);
   4.429      shadow_l3e_t *sl3e;
   4.430      if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
   4.431 @@ -2080,17 +1903,22 @@ static shadow_l2e_t * shadow_get_and_cre
   4.432                                   *sl2mfn, &new_sl3e, ft);
   4.433          r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
   4.434          ASSERT((r & SHADOW_SET_FLUSH) == 0);
   4.435 -#if GUEST_PAGING_LEVELS == 3 
   4.436 -        /* Need to sync up the linear maps, as we are about to use them */
   4.437 -        ASSERT( r & SHADOW_SET_L3PAE_RECOPY );
   4.438 -        sh_pae_recopy(v->domain);
   4.439 -#endif
   4.440      }
   4.441      /* Now follow it down a level.  Guaranteed to succeed. */
   4.442      return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
   4.443 +#elif GUEST_PAGING_LEVELS == 3 /* PAE... */
   4.444 +    /* We never demand-shadow PAE l3es: they are only created in
   4.445 +     * sh_update_cr3().  Check if the relevant sl3e is present. */
   4.446 +    shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.shadow.l3table) 
   4.447 +        + shadow_l3_linear_offset(gw->va);
   4.448 +    if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) 
   4.449 +        return NULL;
   4.450 +    *sl2mfn = shadow_l3e_get_mfn(*sl3e);
   4.451 +    ASSERT(valid_mfn(*sl2mfn));
   4.452 +    return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
   4.453  #else /* 32bit... */
   4.454      /* There is always a shadow of the top level table.  Get it. */
   4.455 -    *sl2mfn = pagetable_get_mfn(v->arch.shadow_table);
   4.456 +    *sl2mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
   4.457      /* This next line is important: the guest l2 has a 16k
   4.458       * shadow, we need to return the right mfn of the four. This
   4.459       * call will set it for us as a side-effect. */
   4.460 @@ -2213,9 +2041,7 @@ void sh_destroy_l4_shadow(struct vcpu *v
   4.461      /* Put the memory back in the pool */
   4.462      shadow_free(v->domain, smfn);
   4.463  }
   4.464 -#endif    
   4.465 -
   4.466 -#if GUEST_PAGING_LEVELS >= 3
   4.467 +
   4.468  void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
   4.469  {
   4.470      shadow_l3e_t *sl3e;
   4.471 @@ -2230,10 +2056,6 @@ void sh_destroy_l3_shadow(struct vcpu *v
   4.472      gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
   4.473      delete_shadow_status(v, gmfn, t, smfn);
   4.474      shadow_demote(v, gmfn, t);
   4.475 -#if GUEST_PAGING_LEVELS == 3
   4.476 -    /* Take this shadow off the list of root shadows */
   4.477 -    list_del_init(&mfn_to_page(smfn)->list);
   4.478 -#endif
   4.479  
   4.480      /* Decrement refcounts of all the old entries */
   4.481      sl3mfn = smfn; 
   4.482 @@ -2247,53 +2069,8 @@ void sh_destroy_l3_shadow(struct vcpu *v
   4.483      /* Put the memory back in the pool */
   4.484      shadow_free(v->domain, smfn);
   4.485  }
   4.486 -#endif    
   4.487 -
   4.488 -
   4.489 -#if GUEST_PAGING_LEVELS == 3
   4.490 -static void sh_destroy_l3_subshadow(struct vcpu *v, 
   4.491 -                                     shadow_l3e_t *sl3e)
   4.492 -/* Tear down just a single 4-entry l3 on a 2-page l3 shadow. */
   4.493 -{
   4.494 -    int i;
   4.495 -    mfn_t sl3mfn = _mfn(maddr_from_mapped_domain_page(sl3e) >> PAGE_SHIFT);
   4.496 -    ASSERT((unsigned long)sl3e % (4 * sizeof (shadow_l3e_t)) == 0); 
   4.497 -    for ( i = 0; i < GUEST_L3_PAGETABLE_ENTRIES; i++ ) 
   4.498 -        if ( shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT ) 
   4.499 -            shadow_set_l3e(v, &sl3e[i], shadow_l3e_empty(), sl3mfn);
   4.500 -}
   4.501 -#endif
   4.502 -
   4.503 -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
   4.504 -void sh_unpin_all_l3_subshadows(struct vcpu *v, mfn_t smfn)
   4.505 -/* Walk a full PAE l3 shadow, unpinning all of the subshadows on it */
   4.506 -{
   4.507 -    int i, j;
   4.508 -    struct pae_l3_bookkeeping *bk;
   4.509 -    
   4.510 -    ASSERT((mfn_to_page(smfn)->count_info & PGC_SH_type_mask) 
   4.511 -           == PGC_SH_l3_pae_shadow);
   4.512 -    /* The subshadows are split, 64 on each page of the shadow */
   4.513 -    for ( i = 0; i < 2; i++ ) 
   4.514 -    {
   4.515 -        void *p = sh_map_domain_page(_mfn(mfn_x(smfn) + i));
   4.516 -        for ( j = 0; j < 64; j++ )
   4.517 -        {
   4.518 -            /* Every second 32-byte region is a bookkeeping entry */
   4.519 -            bk = (struct pae_l3_bookkeeping *)(p + (64 * j) + 32);
   4.520 -            if ( bk->pinned )
   4.521 -                sh_unpin_l3_subshadow(v, (shadow_l3e_t *)(p + (64*j)), smfn);
   4.522 -            /* Check whether we've just freed the whole shadow */
   4.523 -            if ( (mfn_to_page(smfn)->count_info & PGC_SH_count_mask) == 0 ) 
   4.524 -            {
   4.525 -                sh_unmap_domain_page(p);
   4.526 -                return;
   4.527 -            }
   4.528 -        }
   4.529 -        sh_unmap_domain_page(p);
   4.530 -    }
   4.531 -}
   4.532 -#endif
   4.533 +#endif /* GUEST_PAGING_LEVELS >= 4 */
   4.534 +
   4.535  
   4.536  void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
   4.537  {
   4.538 @@ -2311,7 +2088,7 @@ void sh_destroy_l2_shadow(struct vcpu *v
   4.539      gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
   4.540      delete_shadow_status(v, gmfn, t, smfn);
   4.541      shadow_demote(v, gmfn, t);
   4.542 -#if GUEST_PAGING_LEVELS == 2
   4.543 +#if (GUEST_PAGING_LEVELS == 2) || (GUEST_PAGING_LEVELS == 3)
   4.544      /* Take this shadow off the list of root shadows */
   4.545      list_del_init(&mfn_to_page(smfn)->list);
   4.546  #endif
   4.547 @@ -2421,31 +2198,14 @@ void sh_unhook_32b_mappings(struct vcpu 
   4.548  
   4.549  #elif GUEST_PAGING_LEVELS == 3
   4.550  
   4.551 -void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl3mfn)
   4.552 -/* Walk a full PAE l3 shadow, unhooking entries from all the subshadows */
   4.553 +void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn)
   4.554 +/* Walk a PAE l2 shadow, unhooking entries from all the subshadows */
   4.555  {
   4.556 -    shadow_l3e_t *sl3e;
   4.557 -    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
   4.558 -        if ( (shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) {
   4.559 -            mfn_t sl2mfn = shadow_l3e_get_mfn(*sl3e);
   4.560 -            if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask) 
   4.561 -                 == PGC_SH_l2h_pae_shadow ) 
   4.562 -            {
   4.563 -                /* High l2: need to pick particular l2es to unhook */
   4.564 -                shadow_l2e_t *sl2e;
   4.565 -                SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, 1, {
   4.566 -                    (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
   4.567 -                });
   4.568 -            }
   4.569 -            else
   4.570 -            {
   4.571 -                /* Normal l2: can safely unhook the whole l3e */
   4.572 -                (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
   4.573 -            }
   4.574 -        }
   4.575 +    shadow_l2e_t *sl2e;
   4.576 +    int xen_mappings = !shadow_mode_external(v->domain);
   4.577 +    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
   4.578 +        (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
   4.579      });
   4.580 -    /* We've changed PAE L3 entries: must sync up various copies of them */
   4.581 -    sh_pae_recopy(v->domain);
   4.582  }
   4.583  
   4.584  #elif GUEST_PAGING_LEVELS == 4
   4.585 @@ -2523,9 +2283,8 @@ static int validate_gl4e(struct vcpu *v,
   4.586      result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
   4.587      return result;
   4.588  }
   4.589 -#endif // GUEST_PAGING_LEVELS >= 4
   4.590 -
   4.591 -#if GUEST_PAGING_LEVELS >= 3
   4.592 +
   4.593 +
   4.594  static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
   4.595  {
   4.596      shadow_l3e_t new_sl3e;
   4.597 @@ -2536,16 +2295,6 @@ static int validate_gl3e(struct vcpu *v,
   4.598  
   4.599      perfc_incrc(shadow_validate_gl3e_calls);
   4.600  
   4.601 -#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
   4.602 -    {
   4.603 -        /* If we've updated a subshadow which is unreferenced then 
   4.604 -           we don't care what value is being written - bail. */
   4.605 -        struct pae_l3_bookkeeping *info = sl3p_to_info(se); 
   4.606 -        if(!info->refcount)
   4.607 -            return result; 
   4.608 -    }
   4.609 -#endif
   4.610 -
   4.611      if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
   4.612      {
   4.613          gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
   4.614 @@ -2559,16 +2308,9 @@ static int validate_gl3e(struct vcpu *v,
   4.615                               sl2mfn, &new_sl3e, ft_prefetch);
   4.616      result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
   4.617  
   4.618 -#if GUEST_PAGING_LEVELS == 3
   4.619 -    /* We have changed a PAE l3 entry: need to sync up the possible copies 
   4.620 -     * of it */
   4.621 -    if ( result & SHADOW_SET_L3PAE_RECOPY )
   4.622 -        sh_pae_recopy(v->domain);
   4.623 -#endif
   4.624 -
   4.625      return result;
   4.626  }
   4.627 -#endif // GUEST_PAGING_LEVELS >= 3
   4.628 +#endif // GUEST_PAGING_LEVELS >= 4
   4.629  
   4.630  static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
   4.631  {
   4.632 @@ -2755,12 +2497,12 @@ int
   4.633  sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn,
   4.634                            void *new_gl3p, u32 size)
   4.635  {
   4.636 -#if GUEST_PAGING_LEVELS >= 3
   4.637 +#if GUEST_PAGING_LEVELS >= 4
   4.638      return sh_map_and_validate(v, gl3mfn, new_gl3p, size, 
   4.639                                  PGC_SH_l3_shadow, 
   4.640                                  shadow_l3_index, 
   4.641                                  validate_gl3e);
   4.642 -#else // ! GUEST_PAGING_LEVELS >= 3
   4.643 +#else // ! GUEST_PAGING_LEVELS >= 4
   4.644      SHADOW_PRINTK("called in wrong paging mode!\n");
   4.645      BUG();
   4.646      return 0;
   4.647 @@ -2822,7 +2564,7 @@ static inline void check_for_early_unsha
   4.648      {
   4.649          u32 flags = mfn_to_page(gmfn)->shadow_flags;
   4.650          mfn_t smfn;
   4.651 -        if ( !(flags & (SHF_L2_32|SHF_L3_PAE|SHF_L4_64)) )
   4.652 +        if ( !(flags & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64)) )
   4.653          {
   4.654              perfc_incrc(shadow_early_unshadow);
   4.655              sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
   4.656 @@ -2840,9 +2582,14 @@ static inline void check_for_early_unsha
   4.657                  smfn = get_shadow_status(v, gmfn, PGC_SH_l2_32_shadow);
   4.658                  shadow_unhook_mappings(v, smfn);
   4.659              }
   4.660 -            if ( flags & SHF_L3_PAE ) 
   4.661 +            if ( flags & SHF_L2_PAE ) 
   4.662              {
   4.663 -                smfn = get_shadow_status(v, gmfn, PGC_SH_l3_pae_shadow);
   4.664 +                smfn = get_shadow_status(v, gmfn, PGC_SH_l2_pae_shadow);
   4.665 +                shadow_unhook_mappings(v, smfn);
   4.666 +            }
   4.667 +            if ( flags & SHF_L2H_PAE ) 
   4.668 +            {
   4.669 +                smfn = get_shadow_status(v, gmfn, PGC_SH_l2h_pae_shadow);
   4.670                  shadow_unhook_mappings(v, smfn);
   4.671              }
   4.672              if ( flags & SHF_L4_64 ) 
   4.673 @@ -3183,8 +2930,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
   4.674              return 0;
   4.675      }
   4.676  #elif SHADOW_PAGING_LEVELS == 3
   4.677 -    if ( !(shadow_l3e_get_flags(
   4.678 -          ((shadow_l3e_t *)v->arch.shadow_vtable)[shadow_l3_linear_offset(va)])
   4.679 +    if ( !(l3e_get_flags(v->arch.shadow.l3table[shadow_l3_linear_offset(va)])
   4.680             & _PAGE_PRESENT) )
   4.681          // no need to flush anything if there's no SL2...
   4.682          return 0;
   4.683 @@ -3249,34 +2995,6 @@ sh_gva_to_gpa(struct vcpu *v, unsigned l
   4.684  }
   4.685  
   4.686  
   4.687 -// XXX -- should this be in this file?
   4.688 -//        Or should it be moved to shadow-common.c?
   4.689 -//
   4.690 -/* returns a lowmem machine address of the copied HVM L3 root table
   4.691 - * If clear_res != 0, then clear the PAE-l3 reserved bits in the copy,
   4.692 - * otherwise blank out any entries with reserved bits in them.  */
   4.693 -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
   4.694 -static unsigned long
   4.695 -hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res)
   4.696 -{
   4.697 -    int i, f;
   4.698 -    int res = (_PAGE_RW|_PAGE_NX_BIT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY);
   4.699 -    l3_pgentry_t new_l3e, *copy = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
   4.700 -    memcpy(copy, l3tab, 4 * sizeof(l3_pgentry_t));
   4.701 -    for ( i = 0; i < 4; i++ )
   4.702 -    {
   4.703 -        f = l3e_get_flags(l3tab[i]);
   4.704 -        if ( (f & _PAGE_PRESENT) && (!(f & res) || clear_res) )
   4.705 -            new_l3e = l3e_from_pfn(l3e_get_pfn(l3tab[i]), f & ~res);
   4.706 -        else
   4.707 -            new_l3e = l3e_empty();
   4.708 -        safe_write_entry(&copy[i], &new_l3e);
   4.709 -    }
   4.710 -    return __pa(copy);
   4.711 -}
   4.712 -#endif
   4.713 -
   4.714 -
   4.715  static inline void
   4.716  sh_update_linear_entries(struct vcpu *v)
   4.717  /* Sync up all the linear mappings for this vcpu's pagetables */
   4.718 @@ -3330,7 +3048,7 @@ sh_update_linear_entries(struct vcpu *v)
   4.719          if ( v == current ) 
   4.720          {
   4.721              __linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
   4.722 -                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
   4.723 +                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
   4.724                               __PAGE_HYPERVISOR);
   4.725          } 
   4.726          else
   4.727 @@ -3338,7 +3056,7 @@ sh_update_linear_entries(struct vcpu *v)
   4.728              l4_pgentry_t *ml4e;
   4.729              ml4e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
   4.730              ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] = 
   4.731 -                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
   4.732 +                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
   4.733                               __PAGE_HYPERVISOR);
   4.734              sh_unmap_domain_page(ml4e);
   4.735          }
   4.736 @@ -3379,13 +3097,8 @@ sh_update_linear_entries(struct vcpu *v)
   4.737              sh_unmap_domain_page(ml4e);
   4.738          }
   4.739  
   4.740 -#if GUEST_PAGING_LEVELS == 2
   4.741          /* Shadow l3 tables are made up by update_cr3 */
   4.742 -        sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
   4.743 -#else
   4.744 -        /* Always safe to use shadow_vtable, because it's globally mapped */
   4.745 -        sl3e = v->arch.shadow_vtable;
   4.746 -#endif
   4.747 +        sl3e = v->arch.shadow.l3table;
   4.748  
   4.749          for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
   4.750          {
   4.751 @@ -3424,14 +3137,14 @@ sh_update_linear_entries(struct vcpu *v)
   4.752  #if GUEST_PAGING_LEVELS == 2
   4.753          /* Shadow l3 tables were built by update_cr3 */
   4.754          if ( shadow_mode_external(d) )
   4.755 -            shadow_l3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
   4.756 +            shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table;
   4.757          else
   4.758              BUG(); /* PV 2-on-3 is not supported yet */
   4.759          
   4.760  #else /* GUEST_PAGING_LEVELS == 3 */
   4.761          
   4.762 -        /* Always safe to use *_vtable, because they're globally mapped */
   4.763 -        shadow_l3e = v->arch.shadow_vtable;
   4.764 +        shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table;
   4.765 +        /* Always safe to use guest_vtable, because it's globally mapped */
   4.766          guest_l3e = v->arch.guest_vtable;
   4.767  
   4.768  #endif /* GUEST_PAGING_LEVELS */
   4.769 @@ -3510,7 +3223,7 @@ sh_update_linear_entries(struct vcpu *v)
   4.770          if ( v == current ) 
   4.771          {
   4.772              __linear_l2_table[l2_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
   4.773 -                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
   4.774 +                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
   4.775                               __PAGE_HYPERVISOR);
   4.776          } 
   4.777          else
   4.778 @@ -3518,7 +3231,7 @@ sh_update_linear_entries(struct vcpu *v)
   4.779              l2_pgentry_t *ml2e;
   4.780              ml2e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
   4.781              ml2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = 
   4.782 -                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
   4.783 +                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
   4.784                               __PAGE_HYPERVISOR);
   4.785              sh_unmap_domain_page(ml2e);
   4.786          }
   4.787 @@ -3530,69 +3243,7 @@ sh_update_linear_entries(struct vcpu *v)
   4.788  }
   4.789  
   4.790  
   4.791 -// XXX -- should this be in this file?
   4.792 -//        Or should it be moved to shadow-common.c?
   4.793 -//
   4.794 -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
   4.795 -void sh_pae_recopy(struct domain *d)
   4.796 -/* Called whenever we write to the l3 entries of a PAE pagetable which 
   4.797 - * is currently in use.  Each vcpu that is using the table needs to 
   4.798 - * resync its copies of the l3s in linear maps and any low-memory
   4.799 - * copies it might have made for fitting into 32bit CR3.
   4.800 - * Since linear maps are also resynced when we change CR3, we don't
   4.801 - * need to worry about changes to PAE l3es that are not currently in use.*/
   4.802 -{
   4.803 -    struct vcpu *v;
   4.804 -    cpumask_t flush_mask = CPU_MASK_NONE;
   4.805 -    ASSERT(shadow_lock_is_acquired(d));
   4.806 -    
   4.807 -    for_each_vcpu(d, v)
   4.808 -    {
   4.809 -        if ( !v->arch.shadow.pae_flip_pending ) 
   4.810 -            continue;
   4.811 -
   4.812 -        cpu_set(v->processor, flush_mask);
   4.813 -        
   4.814 -        SHADOW_PRINTK("d=%u v=%u\n", v->domain->domain_id, v->vcpu_id);
   4.815 -
   4.816 -        /* This vcpu has a copy in its linear maps */
   4.817 -        sh_update_linear_entries(v);
   4.818 -        if ( hvm_guest(v) )
   4.819 -        {
   4.820 -            /* This vcpu has a copy in its HVM PAE l3 */
   4.821 -            v->arch.hvm_vcpu.hw_cr3 = 
   4.822 -                hvm_pae_copy_root(v, v->arch.shadow_vtable,
   4.823 -                                  !shadow_vcpu_mode_translate(v));
   4.824 -        }
   4.825 -#if CONFIG_PAGING_LEVELS == 3
   4.826 -        else 
   4.827 -        {
   4.828 -            /* This vcpu might have copied the l3 to below 4GB */
   4.829 -            if ( v->arch.cr3 >> PAGE_SHIFT 
   4.830 -                 != pagetable_get_pfn(v->arch.shadow_table) )
   4.831 -            {
   4.832 -                /* Recopy to where that copy is. */
   4.833 -                int i;
   4.834 -                l3_pgentry_t *dst, *src;
   4.835 -                dst = __va(v->arch.cr3 & ~0x1f); /* Mask cache control bits */
   4.836 -                src = v->arch.shadow_vtable;
   4.837 -                for ( i = 0 ; i < 4 ; i++ ) 
   4.838 -                    safe_write_entry(dst + i, src + i);
   4.839 -            }
   4.840 -        }
   4.841 -#endif
   4.842 -        v->arch.shadow.pae_flip_pending = 0;        
   4.843 -    }
   4.844 -
   4.845 -    flush_tlb_mask(flush_mask);
   4.846 -}
   4.847 -#endif /* (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) */
   4.848 -
   4.849 -
   4.850 -/* removes:
   4.851 - *     vcpu->arch.guest_vtable
   4.852 - *     vcpu->arch.shadow_table
   4.853 - *     vcpu->arch.shadow_vtable
   4.854 +/* Removes vcpu->arch.guest_vtable and vcpu->arch.shadow_table[].
   4.855   * Does all appropriate management/bookkeeping/refcounting/etc...
   4.856   */
   4.857  static void
   4.858 @@ -3600,6 +3251,7 @@ sh_detach_old_tables(struct vcpu *v)
   4.859  {
   4.860      struct domain *d = v->domain;
   4.861      mfn_t smfn;
   4.862 +    int i = 0;
   4.863  
   4.864      ////
   4.865      //// vcpu->arch.guest_vtable
   4.866 @@ -3620,56 +3272,80 @@ sh_detach_old_tables(struct vcpu *v)
   4.867      }
   4.868  
   4.869      ////
   4.870 -    //// vcpu->arch.shadow_table
   4.871 +    //// vcpu->arch.shadow_table[]
   4.872      ////
   4.873 -    smfn = pagetable_get_mfn(v->arch.shadow_table);
   4.874 -    if ( mfn_x(smfn) )
   4.875 -    {
   4.876 -        ASSERT(v->arch.shadow_vtable);
   4.877 +
   4.878  
   4.879  #if GUEST_PAGING_LEVELS == 3
   4.880 -        // PAE guests do not (necessarily) use an entire page for their
   4.881 -        // 4-entry L3s, so we have to deal with them specially.
   4.882 -        //
   4.883 -        sh_put_ref_l3_subshadow(v, v->arch.shadow_vtable, smfn);
   4.884 -#else
   4.885 -        sh_put_ref(v, smfn, 0);
   4.886 +    /* PAE guests have four shadow_table entries */
   4.887 +    for ( i = 0 ; i < 4 ; i++ )
   4.888  #endif
   4.889 -
   4.890 -#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
   4.891 -        {
   4.892 -            struct pae_l3_bookkeeping *info =
   4.893 -                sl3p_to_info(v->arch.shadow_vtable);
   4.894 -            ASSERT(test_bit(v->vcpu_id, &info->vcpus));
   4.895 -            clear_bit(v->vcpu_id, &info->vcpus);
   4.896 -        }
   4.897 -#endif
   4.898 -        v->arch.shadow_table = pagetable_null();
   4.899 -    }
   4.900 -
   4.901 -    ////
   4.902 -    //// vcpu->arch.shadow_vtable
   4.903 -    ////
   4.904 -    if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
   4.905 -         v->arch.shadow_vtable )
   4.906      {
   4.907 -        // Q: why does this need to use (un)map_domain_page_*global* ?
   4.908 -        /* A: so sh_update_linear_entries can operate on other vcpus */
   4.909 -        sh_unmap_domain_page_global(v->arch.shadow_vtable);
   4.910 -        v->arch.shadow_vtable = NULL;
   4.911 +        smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
   4.912 +        if ( mfn_x(smfn) )
   4.913 +            sh_put_ref(v, smfn, 0);
   4.914 +        v->arch.shadow_table[i] = pagetable_null();
   4.915      }
   4.916  }
   4.917  
   4.918 +/* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
   4.919 +static void
   4.920 +sh_set_toplevel_shadow(struct vcpu *v, 
   4.921 +                       int slot,
   4.922 +                       mfn_t gmfn, 
   4.923 +                       unsigned int root_type) 
   4.924 +{
   4.925 +    mfn_t smfn = get_shadow_status(v, gmfn, root_type);
   4.926 +    struct domain *d = v->domain;
   4.927 +    ASSERT(pagetable_is_null(v->arch.shadow_table[slot]));
   4.928 +    if ( valid_mfn(smfn) )
   4.929 +    {
   4.930 +        /* Pull this root shadow to the front of the list of roots. */
   4.931 +        list_del(&mfn_to_page(smfn)->list);
   4.932 +        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
   4.933 +    }
   4.934 +    else
   4.935 +    {
   4.936 +        /* This guest MFN is a pagetable.  Must revoke write access. */
   4.937 +        if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) != 0 )
   4.938 +            flush_tlb_mask(v->domain->domain_dirty_cpumask); 
   4.939 +        /* Make sure there's enough free shadow memory. */
   4.940 +        shadow_prealloc(d, SHADOW_MAX_ORDER); 
   4.941 +        /* Shadow the page. */
   4.942 +        smfn = sh_make_shadow(v, gmfn, root_type);
   4.943 +        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
   4.944 +    }
   4.945 +    ASSERT(valid_mfn(smfn));
   4.946 +    
   4.947 +#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
   4.948 +    /* Once again OK to unhook entries from this table if we see fork/exit */
   4.949 +    ASSERT(sh_mfn_is_a_page_table(gmfn));
   4.950 +    mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
   4.951 +#endif
   4.952 +
   4.953 +    /* Take a ref to this page: it will be released in sh_detach_old_tables. */
   4.954 +    sh_get_ref(smfn, 0);
   4.955 +    sh_pin(smfn);
   4.956 +
   4.957 +    /* Done.  Install it */
   4.958 +    SHADOW_PRINTK("%u/%u [%u] gmfn %#"SH_PRI_mfn" smfn %#"SH_PRI_mfn"\n",
   4.959 +                  GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, slot,
   4.960 +                  mfn_x(gmfn), mfn_x(smfn));
   4.961 +    v->arch.shadow_table[slot] = pagetable_from_mfn(smfn);
   4.962 +}
   4.963 +
   4.964 +
   4.965  static void
   4.966  sh_update_cr3(struct vcpu *v)
   4.967 -/* Updates vcpu->arch.shadow_table after the guest has changed CR3.
   4.968 +/* Updates vcpu->arch.cr3 after the guest has changed CR3.
   4.969   * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
   4.970   * if appropriate).
   4.971 - * HVM guests should also set hvm_get_guest_cntl_reg(v, 3)...
   4.972 + * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works,
   4.973 + * and read vcpu->arch.hvm_vcpu.hw_cr3 afterwards.
   4.974   */
   4.975  {
   4.976      struct domain *d = v->domain;
   4.977 -    mfn_t gmfn, smfn;
   4.978 +    mfn_t gmfn;
   4.979  #if GUEST_PAGING_LEVELS == 3
   4.980      u32 guest_idx=0;
   4.981  #endif
   4.982 @@ -3770,159 +3446,93 @@ sh_update_cr3(struct vcpu *v)
   4.983  #endif
   4.984  
   4.985      ////
   4.986 -    //// vcpu->arch.shadow_table
   4.987 +    //// vcpu->arch.shadow_table[]
   4.988      ////
   4.989 -    smfn = get_shadow_status(v, gmfn, PGC_SH_guest_root_type);
   4.990 -    if ( valid_mfn(smfn) )
   4.991 -    {
   4.992 -        /* Pull this root shadow to the front of the list of roots. */
   4.993 -        list_del(&mfn_to_page(smfn)->list);
   4.994 -        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
   4.995 -    }
   4.996 -    else
   4.997 +
   4.998 +#if GUEST_PAGING_LEVELS == 2
   4.999 +    sh_set_toplevel_shadow(v, 0, gmfn, PGC_SH_l2_shadow);
  4.1000 +#elif GUEST_PAGING_LEVELS == 3
  4.1001 +    /* PAE guests have four shadow_table entries, based on the 
  4.1002 +     * current values of the guest's four l3es. */
  4.1003      {
  4.1004 -        /* This guest MFN is a pagetable.  Must revoke write access. */
  4.1005 -        if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) 
  4.1006 -             != 0 )
  4.1007 -            flush_tlb_mask(d->domain_dirty_cpumask); 
  4.1008 -        /* Make sure there's enough free shadow memory. */
  4.1009 -        shadow_prealloc(d, SHADOW_MAX_ORDER); 
  4.1010 -        /* Shadow the page. */
  4.1011 -        smfn = sh_make_shadow(v, gmfn, PGC_SH_guest_root_type);
  4.1012 -        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
  4.1013 +        int i;
  4.1014 +        guest_l3e_t *gl3e = (guest_l3e_t*)v->arch.guest_vtable;
  4.1015 +        for ( i = 0; i < 4; i++ ) 
  4.1016 +        {
  4.1017 +            ASSERT(pagetable_is_null(v->arch.shadow_table[i]));
  4.1018 +            if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
  4.1019 +            {
  4.1020 +                gfn_t gl2gfn = guest_l3e_get_gfn(gl3e[i]);
  4.1021 +                mfn_t gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
  4.1022 +                if ( valid_mfn(gl2mfn) )                
  4.1023 +                    sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
  4.1024 +                                           ? PGC_SH_l2h_shadow 
  4.1025 +                                           : PGC_SH_l2_shadow);
  4.1026 +            }
  4.1027 +        }
  4.1028      }
  4.1029 -    ASSERT(valid_mfn(smfn));
  4.1030 -    v->arch.shadow_table = pagetable_from_mfn(smfn);
  4.1031 -
  4.1032 -#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
  4.1033 -    /* Once again OK to unhook entries from this table if we see fork/exit */
  4.1034 -    ASSERT(sh_mfn_is_a_page_table(gmfn));
  4.1035 -    mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
  4.1036 +#elif GUEST_PAGING_LEVELS == 4
  4.1037 +    sh_set_toplevel_shadow(v, 0, gmfn, PGC_SH_l4_shadow);
  4.1038 +#else
  4.1039 +#error This should never happen 
  4.1040  #endif
  4.1041  
  4.1042 -
  4.1043 -    ////
  4.1044 -    //// vcpu->arch.shadow_vtable
  4.1045 -    ////
  4.1046 -    if ( shadow_mode_external(d) )
  4.1047 -    {
  4.1048 -#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
  4.1049 -        mfn_t adjusted_smfn = smfn;
  4.1050 -        u32 shadow_idx = shadow_l3_index(&adjusted_smfn, guest_idx);
  4.1051 -        // Q: why does this need to use (un)map_domain_page_*global* ?
  4.1052 -        v->arch.shadow_vtable =
  4.1053 -            (shadow_l3e_t *)sh_map_domain_page_global(adjusted_smfn) +
  4.1054 -            shadow_idx;
  4.1055 -#else
  4.1056 -        // Q: why does this need to use (un)map_domain_page_*global* ?
  4.1057 -        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
  4.1058 -#endif
  4.1059 -    }
  4.1060 -    else
  4.1061 -    {
  4.1062 -#if SHADOW_PAGING_LEVELS == 4
  4.1063 -        v->arch.shadow_vtable = __sh_linear_l4_table;
  4.1064 -#elif GUEST_PAGING_LEVELS == 3
  4.1065 -        // XXX - why does this need a global map?
  4.1066 -        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
  4.1067 -#else
  4.1068 -        v->arch.shadow_vtable = __sh_linear_l2_table;
  4.1069 -#endif
  4.1070 -    }
  4.1071 -
  4.1072  #if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
  4.1073 -    // Now that shadow_vtable is in place, check that the sl3e[3] is properly
  4.1074 -    // shadowed and installed in PAE PV guests...
  4.1075 -    if ( !shadow_mode_external(d) &&
  4.1076 -         !(shadow_l3e_get_flags(((shadow_l3e_t *)v->arch.shadow_vtable)[3]) &
  4.1077 -           _PAGE_PRESENT) )
  4.1078 -    {
  4.1079 -        sh_install_xen_entries_in_l3(v, gmfn, smfn);
  4.1080 -    }
  4.1081  #endif
  4.1082  
  4.1083 -    ////
  4.1084 -    //// Take a ref to the new shadow table, and pin it.
  4.1085 -    ////
  4.1086 -    //
  4.1087 -    // This ref is logically "held" by v->arch.shadow_table entry itself.
  4.1088 -    // Release the old ref.
  4.1089 -    //
  4.1090 -#if GUEST_PAGING_LEVELS == 3
  4.1091 -    // PAE guests do not (necessarily) use an entire page for their
  4.1092 -    // 4-entry L3s, so we have to deal with them specially.
  4.1093 -    //
  4.1094 -    // XXX - might want to revisit this if/when we do multiple compilation for
  4.1095 -    //       HVM-vs-PV guests, as PAE PV guests could get away without doing
  4.1096 -    //       subshadows.
  4.1097 -    //
  4.1098 -    sh_get_ref_l3_subshadow(v->arch.shadow_vtable, smfn);
  4.1099 -    sh_pin_l3_subshadow(v->arch.shadow_vtable, smfn);
  4.1100 +    /// 
  4.1101 +    /// v->arch.shadow.l3table
  4.1102 +    ///
  4.1103 +#if SHADOW_PAGING_LEVELS == 3
  4.1104 +        {
  4.1105 +            mfn_t smfn;
  4.1106 +            int i;
  4.1107 +            for ( i = 0; i < 4; i++ )
  4.1108 +            {
  4.1109 +#if GUEST_PAGING_LEVELS == 2
  4.1110 +                /* 2-on-3: make a PAE l3 that points at the four-page l2 */
  4.1111 +                smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[0]) + i);
  4.1112  #else
  4.1113 -    sh_get_ref(smfn, 0);
  4.1114 -    sh_pin(smfn);
  4.1115 +                /* 3-on-3: make a PAE l3 that points at the four l2 pages */
  4.1116 +                smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
  4.1117  #endif
  4.1118 -
  4.1119 -#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
  4.1120 -    // PAE 3-on-3 shadows have to keep track of which vcpu's are using
  4.1121 -    // which l3 subshadow, in order handle the SHADOW_SET_L3PAE_RECOPY
  4.1122 -    // case from validate_gl3e().  Search for SHADOW_SET_L3PAE_RECOPY
  4.1123 -    // in the code for more info.
  4.1124 -    //
  4.1125 -    {
  4.1126 -        struct pae_l3_bookkeeping *info =
  4.1127 -            sl3p_to_info(v->arch.shadow_vtable);
  4.1128 -        ASSERT(!test_bit(v->vcpu_id, &info->vcpus));
  4.1129 -        set_bit(v->vcpu_id, &info->vcpus);
  4.1130 -    }
  4.1131 -#endif
  4.1132 -
  4.1133 -    debugtrace_printk("%s cr3 gmfn=%05lx smfn=%05lx\n",
  4.1134 -                      __func__, gmfn, smfn);
  4.1135 +                v->arch.shadow.l3table[i] = 
  4.1136 +                    (mfn_x(smfn) == 0) 
  4.1137 +                    ? shadow_l3e_empty()
  4.1138 +                    : shadow_l3e_from_mfn(smfn, _PAGE_PRESENT);
  4.1139 +            }
  4.1140 +        }
  4.1141 +#endif /* SHADOW_PAGING_LEVELS == 3 */
  4.1142 +
  4.1143  
  4.1144      ///
  4.1145 -    /// v->arch.cr3 and, if appropriate, v->arch.hvm_vcpu.hw_cr3
  4.1146 +    /// v->arch.cr3
  4.1147      ///
  4.1148      if ( shadow_mode_external(d) )
  4.1149      {
  4.1150 -        ASSERT(hvm_guest(v));
  4.1151          make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
  4.1152 -
  4.1153 -#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
  4.1154 -#if SHADOW_PAGING_LEVELS != 3
  4.1155 -#error unexpected combination of GUEST and SHADOW paging levels
  4.1156 -#endif
  4.1157 -        /* 2-on-3: make a PAE l3 table that points at the four-page l2 */
  4.1158 -        {
  4.1159 -            mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table);
  4.1160 -            int i;
  4.1161 -
  4.1162 -            ASSERT(v->arch.hvm_vcpu.hw_cr3 ==
  4.1163 -                   virt_to_maddr(v->arch.hvm_vcpu.hvm_lowmem_l3tab));
  4.1164 -            for (i = 0; i < 4; i++)
  4.1165 -            {
  4.1166 -                v->arch.hvm_vcpu.hvm_lowmem_l3tab[i] =
  4.1167 -                    shadow_l3e_from_mfn(_mfn(mfn_x(smfn)+i), _PAGE_PRESENT);
  4.1168 -            }
  4.1169 -        }
  4.1170 -#elif (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
  4.1171 -        /* 3-on-3: copy the shadow l3 to slots that are below 4GB.
  4.1172 -         * If paging is disabled, clear l3e reserved bits; otherwise 
  4.1173 -         * remove entries that have reserved bits set. */
  4.1174 -        v->arch.hvm_vcpu.hw_cr3 =
  4.1175 -            hvm_pae_copy_root(v, v->arch.shadow_vtable, 
  4.1176 -                              !shadow_vcpu_mode_translate(v));
  4.1177 -#else
  4.1178 -        /* 2-on-2 or 4-on-4: just put the shadow top-level into cr3 */
  4.1179 -        v->arch.hvm_vcpu.hw_cr3 =
  4.1180 -            pagetable_get_paddr(v->arch.shadow_table);
  4.1181 -#endif
  4.1182      }
  4.1183      else // not shadow_mode_external...
  4.1184      {
  4.1185          /* We don't support PV except guest == shadow == config levels */
  4.1186          BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS);
  4.1187 -        make_cr3(v, pagetable_get_pfn(v->arch.shadow_table));
  4.1188 +        make_cr3(v, pagetable_get_pfn(v->arch.shadow_table[0]));
  4.1189 +    }
  4.1190 +
  4.1191 +
  4.1192 +    ///
  4.1193 +    /// v->arch.hvm_vcpu.hw_cr3
  4.1194 +    ///
  4.1195 +    if ( shadow_mode_external(d) )
  4.1196 +    {
  4.1197 +        ASSERT(hvm_guest(v));
  4.1198 +#if SHADOW_PAGING_LEVELS == 3
  4.1199 +        /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */
  4.1200 +        v->arch.hvm_vcpu.hw_cr3 = virt_to_maddr(&v->arch.shadow.l3table);
  4.1201 +#else
  4.1202 +        /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */
  4.1203 +        v->arch.hvm_vcpu.hw_cr3 = pagetable_get_paddr(v->arch.shadow_table[0]);
  4.1204 +#endif
  4.1205      }
  4.1206  
  4.1207      /* Fix up the linear pagetable mappings */
  4.1208 @@ -3950,7 +3560,6 @@ static int sh_guess_wrmap(struct vcpu *v
  4.1209  
  4.1210  
  4.1211      /* Carefully look in the shadow linear map for the l1e we expect */
  4.1212 -    if ( v->arch.shadow_vtable == NULL ) return 0;
  4.1213  #if GUEST_PAGING_LEVELS >= 4
  4.1214      sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
  4.1215      if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
  4.1216 @@ -3959,7 +3568,7 @@ static int sh_guess_wrmap(struct vcpu *v
  4.1217      if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
  4.1218          return 0;
  4.1219  #elif GUEST_PAGING_LEVELS == 3
  4.1220 -    sl3p = ((shadow_l3e_t *) v->arch.shadow_vtable) 
  4.1221 +    sl3p = ((shadow_l3e_t *) v->arch.shadow.l3table) 
  4.1222          + shadow_l3_linear_offset(vaddr);
  4.1223      if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
  4.1224          return 0;
  4.1225 @@ -4044,14 +3653,12 @@ void sh_clear_shadow_entry(struct vcpu *
  4.1226      case PGC_SH_l2h_shadow:
  4.1227  #endif
  4.1228          shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); break;
  4.1229 -#if GUEST_PAGING_LEVELS >= 3
  4.1230 +#if GUEST_PAGING_LEVELS >= 4
  4.1231      case PGC_SH_l3_shadow:
  4.1232          shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); break;
  4.1233 -#if GUEST_PAGING_LEVELS >= 4
  4.1234      case PGC_SH_l4_shadow:
  4.1235          shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); break;
  4.1236  #endif
  4.1237 -#endif
  4.1238      default: BUG(); /* Called with the wrong kind of shadow. */
  4.1239      }
  4.1240  }
  4.1241 @@ -4081,7 +3688,7 @@ int sh_remove_l1_shadow(struct vcpu *v, 
  4.1242      return done;
  4.1243  }
  4.1244  
  4.1245 -#if GUEST_PAGING_LEVELS >= 3
  4.1246 +#if GUEST_PAGING_LEVELS >= 4
  4.1247  int sh_remove_l2_shadow(struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn)
  4.1248  /* Remove all mappings of this l2 shadow from this l3 shadow */
  4.1249  {
  4.1250 @@ -4104,7 +3711,6 @@ int sh_remove_l2_shadow(struct vcpu *v, 
  4.1251      return done;
  4.1252  }
  4.1253  
  4.1254 -#if GUEST_PAGING_LEVELS >= 4
  4.1255  int sh_remove_l3_shadow(struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn)
  4.1256  /* Remove all mappings of this l3 shadow from this l4 shadow */
  4.1257  {
  4.1258 @@ -4127,7 +3733,6 @@ int sh_remove_l3_shadow(struct vcpu *v, 
  4.1259      return done;
  4.1260  }
  4.1261  #endif /* 64bit guest */ 
  4.1262 -#endif /* PAE guest */
  4.1263  
  4.1264  /**************************************************************************/
  4.1265  /* Handling HVM guest writes to pagetables  */
  4.1266 @@ -4448,7 +4053,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
  4.1267      return 0;
  4.1268  }
  4.1269  
  4.1270 -#if GUEST_PAGING_LEVELS >= 3
  4.1271 +#if GUEST_PAGING_LEVELS >= 4
  4.1272  int sh_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x)
  4.1273  {
  4.1274      guest_l3e_t *gl3e, *gp;
  4.1275 @@ -4486,9 +4091,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
  4.1276      sh_unmap_domain_page(gp);
  4.1277      return 0;
  4.1278  }
  4.1279 -#endif /* GUEST_PAGING_LEVELS >= 3 */
  4.1280 -
  4.1281 -#if GUEST_PAGING_LEVELS >= 4
  4.1282 +
  4.1283  int sh_audit_l4_table(struct vcpu *v, mfn_t sl4mfn, mfn_t x)
  4.1284  {
  4.1285      guest_l4e_t *gl4e, *gp;
     5.1 --- a/xen/arch/x86/mm/shadow/multi.h	Tue Oct 17 11:07:11 2006 +0100
     5.2 +++ b/xen/arch/x86/mm/shadow/multi.h	Tue Oct 17 11:11:48 2006 +0100
     5.3 @@ -50,10 +50,6 @@ extern void
     5.4  SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
     5.5      struct vcpu *v, mfn_t smfn);
     5.6  
     5.7 -extern void
     5.8 -SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows, 3, 3)
     5.9 -    (struct vcpu *v, mfn_t smfn);
    5.10 -
    5.11  extern void 
    5.12  SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
    5.13      (struct vcpu *v, mfn_t sl2mfn);
     6.1 --- a/xen/arch/x86/mm/shadow/private.h	Tue Oct 17 11:07:11 2006 +0100
     6.2 +++ b/xen/arch/x86/mm/shadow/private.h	Tue Oct 17 11:11:48 2006 +0100
     6.3 @@ -336,13 +336,9 @@ void shadow_convert_to_log_dirty(struct 
     6.4   * non-Xen mappings in this top-level shadow mfn */
     6.5  void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn);
     6.6  
     6.7 -/* Re-sync copies of PAE shadow L3 tables if they have been changed */
     6.8 -void sh_pae_recopy(struct domain *d);
     6.9 -
    6.10  /* Install the xen mappings in various flavours of shadow */
    6.11  void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn);
    6.12  void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn);
    6.13 -void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn);
    6.14  void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn);
    6.15  
    6.16  
     7.1 --- a/xen/arch/x86/mm/shadow/types.h	Tue Oct 17 11:07:11 2006 +0100
     7.2 +++ b/xen/arch/x86/mm/shadow/types.h	Tue Oct 17 11:11:48 2006 +0100
     7.3 @@ -215,8 +215,7 @@ static inline shadow_l4e_t shadow_l4e_fr
     7.4       shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START)); \
     7.5  })
     7.6  
     7.7 -// shadow linear L3 and L4 tables only exist in 4 level paging...
     7.8 -#if SHADOW_PAGING_LEVELS == 4
     7.9 +#if SHADOW_PAGING_LEVELS >= 4
    7.10  #define sh_linear_l3_table(v) ({ \
    7.11      ASSERT(current == (v)); \
    7.12      ((shadow_l3e_t *) \
    7.13 @@ -386,7 +385,6 @@ static inline guest_l4e_t guest_l4e_from
    7.14  #define PGC_SH_fl1_shadow PGC_SH_fl1_pae_shadow
    7.15  #define PGC_SH_l2_shadow  PGC_SH_l2_pae_shadow
    7.16  #define PGC_SH_l2h_shadow PGC_SH_l2h_pae_shadow
    7.17 -#define PGC_SH_l3_shadow  PGC_SH_l3_pae_shadow
    7.18  #else
    7.19  #define PGC_SH_l1_shadow  PGC_SH_l1_64_shadow
    7.20  #define PGC_SH_fl1_shadow PGC_SH_fl1_64_shadow
    7.21 @@ -405,14 +403,6 @@ valid_gfn(gfn_t m)
    7.22      return VALID_GFN(gfn_x(m));
    7.23  }
    7.24  
    7.25 -#if GUEST_PAGING_LEVELS == 2
    7.26 -#define PGC_SH_guest_root_type PGC_SH_l2_32_shadow
    7.27 -#elif GUEST_PAGING_LEVELS == 3
    7.28 -#define PGC_SH_guest_root_type PGC_SH_l3_pae_shadow
    7.29 -#else
    7.30 -#define PGC_SH_guest_root_type PGC_SH_l4_64_shadow
    7.31 -#endif
    7.32 -
    7.33  /* Translation between mfns and gfns */
    7.34  static inline mfn_t
    7.35  vcpu_gfn_to_mfn(struct vcpu *v, gfn_t gfn)
    7.36 @@ -490,8 +480,6 @@ struct shadow_walk_t
    7.37  #define sh_map_and_validate_gl1e   INTERNAL_NAME(sh_map_and_validate_gl1e)
    7.38  #define sh_destroy_l4_shadow       INTERNAL_NAME(sh_destroy_l4_shadow)
    7.39  #define sh_destroy_l3_shadow       INTERNAL_NAME(sh_destroy_l3_shadow)
    7.40 -#define sh_destroy_l3_subshadow    INTERNAL_NAME(sh_destroy_l3_subshadow)
    7.41 -#define sh_unpin_all_l3_subshadows INTERNAL_NAME(sh_unpin_all_l3_subshadows)
    7.42  #define sh_destroy_l2_shadow       INTERNAL_NAME(sh_destroy_l2_shadow)
    7.43  #define sh_destroy_l1_shadow       INTERNAL_NAME(sh_destroy_l1_shadow)
    7.44  #define sh_unhook_32b_mappings     INTERNAL_NAME(sh_unhook_32b_mappings)
    7.45 @@ -533,115 +521,6 @@ struct shadow_walk_t
    7.46                                SHADOW_PAGING_LEVELS)
    7.47  
    7.48  
    7.49 -#if GUEST_PAGING_LEVELS == 3
    7.50 -/*
    7.51 - * Accounting information stored in the shadow of PAE Guest L3 pages.
    7.52 - * Because these "L3 pages" are only 32-bytes, it is inconvenient to keep
    7.53 - * various refcounts, etc., on the page_info of their page.  We provide extra
    7.54 - * bookkeeping space in the shadow itself, and this is the structure
    7.55 - * definition for that bookkeeping information.
    7.56 - */
    7.57 -struct pae_l3_bookkeeping {
    7.58 -    u32 vcpus;                  /* bitmap of which vcpus are currently storing
    7.59 -                                 * copies of this 32-byte page */
    7.60 -    u32 refcount;               /* refcount for this 32-byte page */
    7.61 -    u8 pinned;                  /* is this 32-byte page pinned or not? */
    7.62 -};
    7.63 -
    7.64 -// Convert a shadow entry pointer into a pae_l3_bookkeeping pointer.
    7.65 -#define sl3p_to_info(_ptr) ((struct pae_l3_bookkeeping *)         \
    7.66 -                            (((unsigned long)(_ptr) & ~31) + 32))
    7.67 -
    7.68 -static void sh_destroy_l3_subshadow(struct vcpu *v, 
    7.69 -                                     shadow_l3e_t *sl3e);
    7.70 -
    7.71 -/* Increment a subshadow ref
    7.72 - * Called with a pointer to the subshadow, and the mfn of the
    7.73 - * *first* page of the overall shadow. */
    7.74 -static inline void sh_get_ref_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
    7.75 -{
    7.76 -    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
    7.77 -
    7.78 -    /* First ref to the subshadow takes a ref to the full shadow */
    7.79 -    if ( bk->refcount == 0 ) 
    7.80 -        sh_get_ref(smfn, 0);
    7.81 -    if ( unlikely(++(bk->refcount) == 0) )
    7.82 -    {
    7.83 -        SHADOW_PRINTK("shadow l3 subshadow ref overflow, smfn=%" SH_PRI_mfn " sh=%p\n", 
    7.84 -                       mfn_x(smfn), sl3e);
    7.85 -        domain_crash_synchronous();
    7.86 -    }
    7.87 -}
    7.88 -
    7.89 -/* Decrement a subshadow ref.
    7.90 - * Called with a pointer to the subshadow, and the mfn of the
    7.91 - * *first* page of the overall shadow.  Calling this may cause the 
    7.92 - * entire shadow to disappear, so the caller must immediately unmap 
    7.93 - * the pointer after calling. */ 
    7.94 -static inline void sh_put_ref_l3_subshadow(struct vcpu *v, 
    7.95 -                                            shadow_l3e_t *sl3e,
    7.96 -                                            mfn_t smfn)
    7.97 -{
    7.98 -    struct pae_l3_bookkeeping *bk;
    7.99 -
   7.100 -    bk = sl3p_to_info(sl3e);
   7.101 -
   7.102 -    ASSERT(bk->refcount > 0);
   7.103 -    if ( --(bk->refcount) == 0 )
   7.104 -    {
   7.105 -        /* Need to destroy this subshadow */
   7.106 -        sh_destroy_l3_subshadow(v, sl3e);
   7.107 -        /* Last ref to the subshadow had a ref to the full shadow */
   7.108 -        sh_put_ref(v, smfn, 0);
   7.109 -    }
   7.110 -}
   7.111 -
   7.112 -/* Pin a subshadow 
   7.113 - * Called with a pointer to the subshadow, and the mfn of the
   7.114 - * *first* page of the overall shadow. */
   7.115 -static inline void sh_pin_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
   7.116 -{
   7.117 -    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
   7.118 -
   7.119 -#if 0
   7.120 -    debugtrace_printk("%s smfn=%05lx offset=%ld\n",
   7.121 -                      __func__, mfn_x(smfn),
   7.122 -                      ((unsigned long)sl3e & ~PAGE_MASK) / 64);
   7.123 -#endif
   7.124 -
   7.125 -    if ( !bk->pinned )
   7.126 -    {
   7.127 -        bk->pinned = 1;
   7.128 -        sh_get_ref_l3_subshadow(sl3e, smfn);
   7.129 -    }
   7.130 -}
   7.131 -
   7.132 -/* Unpin a sub-shadow. 
   7.133 - * Called with a pointer to the subshadow, and the mfn of the
   7.134 - * *first* page of the overall shadow.  Calling this may cause the 
   7.135 - * entire shadow to disappear, so the caller must immediately unmap 
   7.136 - * the pointer after calling. */ 
   7.137 -static inline void sh_unpin_l3_subshadow(struct vcpu *v, 
   7.138 -                                          shadow_l3e_t *sl3e,
   7.139 -                                          mfn_t smfn)
   7.140 -{
   7.141 -    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
   7.142 -
   7.143 -#if 0
   7.144 -    debugtrace_printk("%s smfn=%05lx offset=%ld\n",
   7.145 -                      __func__, mfn_x(smfn),
   7.146 -                      ((unsigned long)sl3e & ~PAGE_MASK) / 64);
   7.147 -#endif
   7.148 -
   7.149 -    if ( bk->pinned )
   7.150 -    {
   7.151 -        bk->pinned = 0;
   7.152 -        sh_put_ref_l3_subshadow(v, sl3e, smfn);
   7.153 -    }
   7.154 -}
   7.155 -
   7.156 -#endif /* GUEST_PAGING_LEVELS == 3 */
   7.157 -
   7.158  #if SHADOW_PAGING_LEVELS == 3
   7.159  #define MFN_FITS_IN_HVM_CR3(_MFN) !(mfn_x(_MFN) >> 20)
   7.160  #endif
     8.1 --- a/xen/include/asm-x86/domain.h	Tue Oct 17 11:07:11 2006 +0100
     8.2 +++ b/xen/include/asm-x86/domain.h	Tue Oct 17 11:11:48 2006 +0100
     8.3 @@ -134,6 +134,10 @@ struct pae_l3_cache { };
     8.4  #endif
     8.5  
     8.6  struct shadow_vcpu {
     8.7 +#if CONFIG_PAGING_LEVELS >= 3
     8.8 +    /* PAE guests: per-vcpu shadow top-level table */
     8.9 +    l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
    8.10 +#endif
    8.11      /* Pointers to mode-specific entry points. */
    8.12      struct shadow_paging_mode *mode;
    8.13      /* Last MFN that we emulated a write to. */
    8.14 @@ -142,10 +146,6 @@ struct shadow_vcpu {
    8.15      unsigned int translate_enabled:1;
    8.16      /* Emulated fault needs to be propagated to guest? */
    8.17      unsigned int propagate_fault:1;
    8.18 -#if CONFIG_PAGING_LEVELS >= 3
    8.19 -    /* Shadow update requires this PAE cpu to recopy/install its L3 table. */
    8.20 -    unsigned int pae_flip_pending:1;
    8.21 -#endif
    8.22  };
    8.23  
    8.24  struct arch_vcpu
    8.25 @@ -190,13 +190,12 @@ struct arch_vcpu
    8.26      pagetable_t guest_table;            /* (MFN) guest notion of cr3 */
    8.27      /* guest_table holds a ref to the page, and also a type-count unless
    8.28       * shadow refcounts are in use */
    8.29 -    pagetable_t shadow_table;           /* (MFN) shadow of guest */
    8.30 +    pagetable_t shadow_table[4];        /* (MFN) shadow(s) of guest */
    8.31      pagetable_t monitor_table;          /* (MFN) hypervisor PT (for HVM) */
    8.32      unsigned long cr3;           	    /* (MA) value to install in HW CR3 */
    8.33  
    8.34 -    void *guest_vtable;                 /* virtual address of pagetable */
    8.35 -    void *shadow_vtable;                /* virtual address of shadow_table */
    8.36 -    root_pgentry_t *monitor_vtable;		/* virtual address of monitor_table */
    8.37 +    void *guest_vtable;                 /* virtual addr of pagetable */
    8.38 +    root_pgentry_t *monitor_vtable;		/* virtual addr of monitor_table */
    8.39  
    8.40      /* Current LDT details. */
    8.41      unsigned long shadow_ldt_mapcnt;
     9.1 --- a/xen/include/asm-x86/hvm/vcpu.h	Tue Oct 17 11:07:11 2006 +0100
     9.2 +++ b/xen/include/asm-x86/hvm/vcpu.h	Tue Oct 17 11:11:48 2006 +0100
     9.3 @@ -41,11 +41,6 @@ struct hvm_vcpu {
     9.4  
     9.5      int                 xen_port;
     9.6  
     9.7 -#if CONFIG_PAGING_LEVELS >= 3
     9.8 -    l3_pgentry_t hvm_lowmem_l3tab[4]
     9.9 -    __attribute__((__aligned__(32)));
    9.10 -#endif
    9.11 -
    9.12      /* Flags */
    9.13      int                 flag_dr_dirty;
    9.14  
    10.1 --- a/xen/include/asm-x86/mm.h	Tue Oct 17 11:07:11 2006 +0100
    10.2 +++ b/xen/include/asm-x86/mm.h	Tue Oct 17 11:11:48 2006 +0100
    10.3 @@ -114,15 +114,14 @@ struct page_info
    10.4  #define PGC_SH_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */
    10.5  #define PGC_SH_l2_pae_shadow  (6U<<28) /* shadowing a pae L2-low page */
    10.6  #define PGC_SH_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */
    10.7 -#define PGC_SH_l3_pae_shadow  (8U<<28) /* shadowing a pae L3 page */
    10.8 -#define PGC_SH_l1_64_shadow   (9U<<28) /* shadowing a 64-bit L1 page */
    10.9 -#define PGC_SH_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */
   10.10 -#define PGC_SH_l2_64_shadow  (11U<<28) /* shadowing a 64-bit L2 page */
   10.11 -#define PGC_SH_l3_64_shadow  (12U<<28) /* shadowing a 64-bit L3 page */
   10.12 -#define PGC_SH_l4_64_shadow  (13U<<28) /* shadowing a 64-bit L4 page */
   10.13 -#define PGC_SH_max_shadow    (13U<<28)
   10.14 -#define PGC_SH_p2m_table     (14U<<28) /* in use as the p2m table */
   10.15 -#define PGC_SH_monitor_table (15U<<28) /* in use as a monitor table */
   10.16 +#define PGC_SH_l1_64_shadow   (8U<<28) /* shadowing a 64-bit L1 page */
   10.17 +#define PGC_SH_fl1_64_shadow  (9U<<28) /* L1 shadow for 64-bit 2M superpg */
   10.18 +#define PGC_SH_l2_64_shadow  (10U<<28) /* shadowing a 64-bit L2 page */
   10.19 +#define PGC_SH_l3_64_shadow  (11U<<28) /* shadowing a 64-bit L3 page */
   10.20 +#define PGC_SH_l4_64_shadow  (12U<<28) /* shadowing a 64-bit L4 page */
   10.21 +#define PGC_SH_max_shadow    (12U<<28)
   10.22 +#define PGC_SH_p2m_table     (13U<<28) /* in use as the p2m table */
   10.23 +#define PGC_SH_monitor_table (14U<<28) /* in use as a monitor table */
   10.24  #define PGC_SH_unused        (15U<<28)
   10.25  
   10.26  #define PGC_SH_type_mask     (15U<<28)
    11.1 --- a/xen/include/asm-x86/shadow.h	Tue Oct 17 11:07:11 2006 +0100
    11.2 +++ b/xen/include/asm-x86/shadow.h	Tue Oct 17 11:11:48 2006 +0100
    11.3 @@ -72,7 +72,6 @@
    11.4  #define SHADOW_SET_CHANGED            0x1
    11.5  #define SHADOW_SET_FLUSH              0x2
    11.6  #define SHADOW_SET_ERROR              0x4
    11.7 -#define SHADOW_SET_L3PAE_RECOPY       0x8
    11.8  
    11.9  // How do we tell that we have a 32-bit PV guest in a 64-bit Xen?
   11.10  #ifdef __x86_64__
   11.11 @@ -406,7 +405,6 @@ shadow_update_cr3(struct vcpu *v)
   11.12   * for HVM guests, arch.monitor_table and hvm's guest CR3.
   11.13   *
   11.14   * Update ref counts to shadow tables appropriately.
   11.15 - * For PAE, relocate L3 entries, if necessary, into low memory.
   11.16   */
   11.17  static inline void update_cr3(struct vcpu *v)
   11.18  {
   11.19 @@ -587,7 +585,6 @@ shadow_guest_physmap_remove_page(struct 
   11.20  #define SHF_FL1_PAE (1u << PGC_SH_type_to_index(PGC_SH_fl1_pae_shadow))
   11.21  #define SHF_L2_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l2_pae_shadow))
   11.22  #define SHF_L2H_PAE (1u << PGC_SH_type_to_index(PGC_SH_l2h_pae_shadow))
   11.23 -#define SHF_L3_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l3_pae_shadow))
   11.24  #define SHF_L1_64   (1u << PGC_SH_type_to_index(PGC_SH_l1_64_shadow))
   11.25  #define SHF_FL1_64  (1u << PGC_SH_type_to_index(PGC_SH_fl1_64_shadow))
   11.26  #define SHF_L2_64   (1u << PGC_SH_type_to_index(PGC_SH_l2_64_shadow))