ia64/xen-unstable

changeset 11871:5c029fda79dc

Merge
author Tim Deegan <Tim.Deegan@xensource.com>
date Wed Oct 18 14:36:20 2006 +0100 (2006-10-18)
parents bd207697f0c7 87fc080f555b
children 21f8c507da29
files
line diff
     1.1 --- a/xen/arch/x86/hvm/svm/svm.c	Wed Oct 18 13:43:35 2006 +0100
     1.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Wed Oct 18 14:36:20 2006 +0100
     1.3 @@ -1739,9 +1739,6 @@ static int mov_to_cr(int gpreg, int cr, 
     1.4              if (old_base_mfn)
     1.5                  put_page(mfn_to_page(old_base_mfn));
     1.6  
     1.7 -            /*
     1.8 -             * arch.shadow_table should now hold the next CR3 for shadow
     1.9 -             */
    1.10              v->arch.hvm_svm.cpu_cr3 = value;
    1.11              update_cr3(v);
    1.12              vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
    1.13 @@ -1788,10 +1785,6 @@ static int mov_to_cr(int gpreg, int cr, 
    1.14  
    1.15                  vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
    1.16  
    1.17 -                /*
    1.18 -                 * arch->shadow_table should hold the next CR3 for shadow
    1.19 -                 */
    1.20 -
    1.21                  HVM_DBG_LOG(DBG_LEVEL_VMMU, 
    1.22                              "Update CR3 value = %lx, mfn = %lx",
    1.23                              v->arch.hvm_svm.cpu_cr3, mfn);
    1.24 @@ -2355,7 +2348,7 @@ void svm_dump_regs(const char *from, str
    1.25  {
    1.26      struct vcpu *v = current;
    1.27      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    1.28 -    unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
    1.29 +    unsigned long pt = v->arch.hvm_vcpu.hw_cr3;
    1.30  
    1.31      printf("%s: guest registers from %s:\n", __func__, from);
    1.32  #if defined (__x86_64__)
    1.33 @@ -2681,11 +2674,11 @@ asmlinkage void svm_vmexit_handler(struc
    1.34          if (do_debug)
    1.35          {
    1.36              printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
    1.37 -                   "shadow_table = 0x%08x\n", 
    1.38 +                   "hw_cr3 = 0x%16lx\n", 
    1.39                     __func__,
    1.40                     (int) v->arch.guest_table.pfn,
    1.41                     (int) v->arch.monitor_table.pfn, 
    1.42 -                   (int) v->arch.shadow_table.pfn);
    1.43 +                   (long unsigned int) v->arch.hvm_vcpu.hw_cr3);
    1.44  
    1.45              svm_dump_vmcb(__func__, vmcb);
    1.46              svm_dump_regs(__func__, regs);
    1.47 @@ -2913,10 +2906,10 @@ asmlinkage void svm_vmexit_handler(struc
    1.48      if (do_debug) 
    1.49      {
    1.50          printk("vmexit_handler():- guest_table = 0x%08x, "
    1.51 -               "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
    1.52 +               "monitor_table = 0x%08x, hw_cr3 = 0x%16x\n",
    1.53                 (int)v->arch.guest_table.pfn,
    1.54                 (int)v->arch.monitor_table.pfn, 
    1.55 -               (int)v->arch.shadow_table.pfn);
    1.56 +               (int)v->arch.hvm_vcpu.hw_cr3);
    1.57          printk("svm_vmexit_handler: Returning\n");
    1.58      }
    1.59  #endif
     2.1 --- a/xen/arch/x86/hvm/svm/vmcb.c	Wed Oct 18 13:43:35 2006 +0100
     2.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c	Wed Oct 18 14:36:20 2006 +0100
     2.3 @@ -372,8 +372,8 @@ void svm_do_launch(struct vcpu *v)
     2.4      if (svm_dbg_on) 
     2.5      {
     2.6          unsigned long pt;
     2.7 -        pt = pagetable_get_paddr(v->arch.shadow_table);
     2.8 -        printk("%s: shadow_table = %lx\n", __func__, pt);
     2.9 +        printk("%s: hw_cr3 = %llx\n", __func__, 
    2.10 +               (unsigned long long) v->arch.hvm_vcpu.hw_cr3);
    2.11          pt = pagetable_get_paddr(v->arch.guest_table);
    2.12          printk("%s: guest_table  = %lx\n", __func__, pt);
    2.13          pt = pagetable_get_paddr(v->domain->arch.phys_table);
    2.14 @@ -387,8 +387,9 @@ void svm_do_launch(struct vcpu *v)
    2.15      {
    2.16          printk("%s: cr3 = %lx ", __func__, (unsigned long)vmcb->cr3);
    2.17          printk("init_guest_table: guest_table = 0x%08x, monitor_table = 0x%08x,"
    2.18 -                " shadow_table = 0x%08x\n", (int)v->arch.guest_table.pfn, 
    2.19 -                (int)v->arch.monitor_table.pfn, (int)v->arch.shadow_table.pfn);
    2.20 +                " hw_cr3 = 0x%16llx\n", (int)v->arch.guest_table.pfn, 
    2.21 +               (int)v->arch.monitor_table.pfn, 
    2.22 +               (unsigned long long) v->arch.hvm_vcpu.hw_cr3);
    2.23      }
    2.24  
    2.25      v->arch.schedule_tail = arch_svm_do_resume;
     3.1 --- a/xen/arch/x86/mm/shadow/common.c	Wed Oct 18 13:43:35 2006 +0100
     3.2 +++ b/xen/arch/x86/mm/shadow/common.c	Wed Oct 18 14:36:20 2006 +0100
     3.3 @@ -283,11 +283,8 @@ int
     3.4      if ( page->shadow_flags & SHF_L2H_PAE ) 
     3.5          result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 3, 3)
     3.6              (v, gmfn, entry, size);
     3.7 -    if ( page->shadow_flags & SHF_L3_PAE ) 
     3.8 -        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 3, 3)
     3.9 -            (v, gmfn, entry, size);
    3.10  #else /* 32-bit non-PAE hypervisor does not support PAE guests */
    3.11 -    ASSERT((page->shadow_flags & (SHF_L3_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
    3.12 +    ASSERT((page->shadow_flags & (SHF_L2H_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
    3.13  #endif
    3.14  
    3.15  #if CONFIG_PAGING_LEVELS >= 4 
    3.16 @@ -343,8 +340,11 @@ shadow_validate_guest_pt_write(struct vc
    3.17      if ( rc & SHADOW_SET_ERROR ) 
    3.18      {
    3.19          /* This page is probably not a pagetable any more: tear it out of the 
    3.20 -         * shadows, along with any tables that reference it */
    3.21 -        shadow_remove_all_shadows_and_parents(v, gmfn);
    3.22 +         * shadows, along with any tables that reference it.  
    3.23 +         * Since the validate call above will have made a "safe" (i.e. zero) 
    3.24 +         * shadow entry, we can let the domain live even if we can't fully 
    3.25 +         * unshadow the page. */
    3.26 +        sh_remove_shadows(v, gmfn, 0, 0);
    3.27      }
    3.28  }
    3.29  
    3.30 @@ -424,22 +424,16 @@ shadow_validate_guest_pt_write(struct vc
    3.31  /* Allocating shadow pages
    3.32   * -----------------------
    3.33   *
    3.34 - * Most shadow pages are allocated singly, but there are two cases where we 
    3.35 - * need to allocate multiple pages together.
    3.36 - * 
    3.37 - * 1: Shadowing 32-bit guest tables on PAE or 64-bit shadows.
    3.38 - *    A 32-bit guest l1 table covers 4MB of virtuial address space,
    3.39 - *    and needs to be shadowed by two PAE/64-bit l1 tables (covering 2MB
    3.40 - *    of virtual address space each).  Similarly, a 32-bit guest l2 table 
    3.41 - *    (4GB va) needs to be shadowed by four PAE/64-bit l2 tables (1GB va 
    3.42 - *    each).  These multi-page shadows are contiguous and aligned; 
    3.43 - *    functions for handling offsets into them are defined in shadow.c 
    3.44 - *    (shadow_l1_index() etc.)
    3.45 + * Most shadow pages are allocated singly, but there is one case where
    3.46 + * we need to allocate multiple pages together: shadowing 32-bit guest
    3.47 + * tables on PAE or 64-bit shadows.  A 32-bit guest l1 table covers 4MB
    3.48 + * of virtuial address space, and needs to be shadowed by two PAE/64-bit
    3.49 + * l1 tables (covering 2MB of virtual address space each).  Similarly, a
    3.50 + * 32-bit guest l2 table (4GB va) needs to be shadowed by four
    3.51 + * PAE/64-bit l2 tables (1GB va each).  These multi-page shadows are
    3.52 + * contiguous and aligned; functions for handling offsets into them are
    3.53 + * defined in shadow.c (shadow_l1_index() etc.)
    3.54   *    
    3.55 - * 2: Shadowing PAE top-level pages.  Each guest page that contains
    3.56 - *    any PAE top-level pages requires two shadow pages to shadow it.
    3.57 - *    They contain alternating l3 tables and pae_l3_bookkeeping structs.
    3.58 - *
    3.59   * This table shows the allocation behaviour of the different modes:
    3.60   *
    3.61   * Xen paging      32b  pae  pae  64b  64b  64b
    3.62 @@ -449,7 +443,7 @@ shadow_validate_guest_pt_write(struct vc
    3.63   *
    3.64   * sl1 size         4k   8k   4k   8k   4k   4k
    3.65   * sl2 size         4k  16k   4k  16k   4k   4k
    3.66 - * sl3 size         -    -    8k   -    8k   4k
    3.67 + * sl3 size         -    -    -    -    -    4k
    3.68   * sl4 size         -    -    -    -    -    4k
    3.69   *
    3.70   * We allocate memory from xen in four-page units and break them down
    3.71 @@ -503,7 +497,6 @@ shadow_order(u32 shadow_type)
    3.72          0, /* PGC_SH_fl1_pae_shadow */
    3.73          0, /* PGC_SH_l2_pae_shadow  */
    3.74          0, /* PGC_SH_l2h_pae_shadow */
    3.75 -        1, /* PGC_SH_l3_pae_shadow  */
    3.76          0, /* PGC_SH_l1_64_shadow   */
    3.77          0, /* PGC_SH_fl1_64_shadow  */
    3.78          0, /* PGC_SH_l2_64_shadow   */
    3.79 @@ -546,7 +539,8 @@ void shadow_unhook_mappings(struct vcpu 
    3.80  #endif
    3.81          break;
    3.82  #if CONFIG_PAGING_LEVELS >= 3
    3.83 -    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
    3.84 +    case PGC_SH_l2_pae_shadow >> PGC_SH_type_shift:
    3.85 +    case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift:
    3.86          SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings,3,3)(v,smfn);
    3.87          break;
    3.88  #endif
    3.89 @@ -587,18 +581,8 @@ void shadow_prealloc(struct domain *d, u
    3.90          pg = list_entry(l, struct page_info, list);
    3.91          smfn = page_to_mfn(pg);
    3.92  
    3.93 -#if CONFIG_PAGING_LEVELS >= 3
    3.94 -        if ( (pg->count_info & PGC_SH_type_mask) == PGC_SH_l3_pae_shadow )
    3.95 -        {
    3.96 -            /* For PAE, we need to unpin each subshadow on this shadow */
    3.97 -            SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn);
    3.98 -        } 
    3.99 -        else 
   3.100 -#endif /* 32-bit code always takes this branch */
   3.101 -        {
   3.102 -            /* Unpin this top-level shadow */
   3.103 -            sh_unpin(v, smfn);
   3.104 -        }
   3.105 +        /* Unpin this top-level shadow */
   3.106 +        sh_unpin(v, smfn);
   3.107  
   3.108          /* See if that freed up a chunk of appropriate size */
   3.109          if ( chunk_is_available(d, order) ) return;
   3.110 @@ -620,8 +604,12 @@ void shadow_prealloc(struct domain *d, u
   3.111          shadow_unhook_mappings(v, smfn);
   3.112  
   3.113          /* Need to flush TLB if we've altered our own tables */
   3.114 -        if ( !shadow_mode_external(d) 
   3.115 -             && pagetable_get_pfn(current->arch.shadow_table) == mfn_x(smfn) )
   3.116 +        if ( !shadow_mode_external(d) &&
   3.117 +             (pagetable_get_pfn(current->arch.shadow_table[0]) == mfn_x(smfn)
   3.118 +              || pagetable_get_pfn(current->arch.shadow_table[1]) == mfn_x(smfn)
   3.119 +              || pagetable_get_pfn(current->arch.shadow_table[2]) == mfn_x(smfn)
   3.120 +              || pagetable_get_pfn(current->arch.shadow_table[3]) == mfn_x(smfn)
   3.121 +                 ) )
   3.122              local_flush_tlb();
   3.123          
   3.124          /* See if that freed up a chunk of appropriate size */
   3.125 @@ -732,6 +720,15 @@ void shadow_free(struct domain *d, mfn_t
   3.126  
   3.127      for ( i = 0; i < 1<<order; i++ ) 
   3.128      {
   3.129 +#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
   3.130 +        struct vcpu *v;
   3.131 +        for_each_vcpu(d, v) 
   3.132 +        {
   3.133 +            /* No longer safe to look for a writeable mapping in this shadow */
   3.134 +            if ( v->arch.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i ) 
   3.135 +                v->arch.shadow.last_writeable_pte_smfn = 0;
   3.136 +        }
   3.137 +#endif
   3.138          /* Strip out the type: this is now a free shadow page */
   3.139          pg[i].count_info = 0;
   3.140          /* Remember the TLB timestamp so we will know whether to flush 
   3.141 @@ -920,9 +917,20 @@ p2m_next_level(struct domain *d, mfn_t *
   3.142  #if CONFIG_PAGING_LEVELS == 3
   3.143          if (type == PGT_l2_page_table)
   3.144          {
   3.145 +            struct vcpu *v;
   3.146              /* We have written to the p2m l3: need to sync the per-vcpu
   3.147               * copies of it in the monitor tables */
   3.148              p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry);
   3.149 +            /* Also, any vcpus running on shadows of the p2m need to 
   3.150 +             * reload their CR3s so the change propagates to the shadow */
   3.151 +            ASSERT(shadow_lock_is_acquired(d));
   3.152 +            for_each_vcpu(d, v) 
   3.153 +            {
   3.154 +                if ( pagetable_get_pfn(v->arch.guest_table) 
   3.155 +                     == pagetable_get_pfn(d->arch.phys_table) 
   3.156 +                     && v->arch.shadow.mode != NULL )
   3.157 +                    v->arch.shadow.mode->update_cr3(v);
   3.158 +            }
   3.159          }
   3.160  #endif
   3.161          /* The P2M can be shadowed: keep the shadows synced */
   3.162 @@ -1711,9 +1719,6 @@ void sh_destroy_shadow(struct vcpu *v, m
   3.163      case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift:
   3.164          SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 3)(v, smfn);
   3.165          break;
   3.166 -    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
   3.167 -        SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 3, 3)(v, smfn);
   3.168 -        break;
   3.169  #endif
   3.170  
   3.171  #if CONFIG_PAGING_LEVELS >= 4
   3.172 @@ -1768,7 +1773,6 @@ int shadow_remove_write_access(struct vc
   3.173  #endif
   3.174          NULL, /* l2_pae  */
   3.175          NULL, /* l2h_pae */
   3.176 -        NULL, /* l3_pae  */
   3.177  #if CONFIG_PAGING_LEVELS >= 4
   3.178          SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64   */
   3.179          SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64  */
   3.180 @@ -1825,12 +1829,11 @@ int shadow_remove_write_access(struct vc
   3.181          unsigned long gfn;
   3.182          /* Heuristic: there is likely to be only one writeable mapping,
   3.183           * and that mapping is likely to be in the current pagetable,
   3.184 -         * either in the guest's linear map (linux, windows) or in a
   3.185 -         * magic slot used to map high memory regions (linux HIGHTPTE) */
   3.186 +         * in the guest's linear map (on non-HIGHPTE linux and windows)*/
   3.187  
   3.188  #define GUESS(_a, _h) do {                                              \
   3.189 -            if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) )          \
   3.190 -                perfc_incrc(shadow_writeable_h_ ## _h);                \
   3.191 +            if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) )      \
   3.192 +                perfc_incrc(shadow_writeable_h_ ## _h);                 \
   3.193              if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )        \
   3.194                  return 1;                                               \
   3.195          } while (0)
   3.196 @@ -1880,9 +1883,35 @@ int shadow_remove_write_access(struct vc
   3.197  #endif /* CONFIG_PAGING_LEVELS >= 3 */
   3.198  
   3.199  #undef GUESS
   3.200 -
   3.201      }
   3.202 -#endif
   3.203 +
   3.204 +    if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )
   3.205 +        return 1;
   3.206 +
   3.207 +    /* Second heuristic: on HIGHPTE linux, there are two particular PTEs
   3.208 +     * (entries in the fixmap) where linux maps its pagetables.  Since
   3.209 +     * we expect to hit them most of the time, we start the search for
   3.210 +     * the writeable mapping by looking at the same MFN where the last
   3.211 +     * brute-force search succeeded. */
   3.212 +
   3.213 +    if ( v->arch.shadow.last_writeable_pte_smfn != 0 )
   3.214 +    {
   3.215 +        unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask);
   3.216 +        mfn_t last_smfn = _mfn(v->arch.shadow.last_writeable_pte_smfn);
   3.217 +        int shtype = (mfn_to_page(last_smfn)->count_info & PGC_SH_type_mask) 
   3.218 +            >> PGC_SH_type_shift;
   3.219 +
   3.220 +        if ( callbacks[shtype] ) 
   3.221 +            callbacks[shtype](v, last_smfn, gmfn);
   3.222 +
   3.223 +        if ( (pg->u.inuse.type_info & PGT_count_mask) != old_count )
   3.224 +            perfc_incrc(shadow_writeable_h_5);
   3.225 +    }
   3.226 +
   3.227 +    if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )
   3.228 +        return 1;
   3.229 +
   3.230 +#endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */
   3.231      
   3.232      /* Brute-force search of all the shadows, by walking the hash */
   3.233      perfc_incrc(shadow_writeable_bf);
   3.234 @@ -1932,7 +1961,6 @@ int shadow_remove_all_mappings(struct vc
   3.235  #endif
   3.236          NULL, /* l2_pae  */
   3.237          NULL, /* l2h_pae */
   3.238 -        NULL, /* l3_pae  */
   3.239  #if CONFIG_PAGING_LEVELS >= 4
   3.240          SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64   */
   3.241          SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64  */
   3.242 @@ -2005,7 +2033,8 @@ static int sh_remove_shadow_via_pointer(
   3.243      ASSERT((pg->count_info & PGC_SH_type_mask) > 0);
   3.244      ASSERT((pg->count_info & PGC_SH_type_mask) < PGC_SH_max_shadow);
   3.245      ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_32_shadow);
   3.246 -    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l3_pae_shadow);
   3.247 +    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_pae_shadow);
   3.248 +    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2h_pae_shadow);
   3.249      ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l4_64_shadow);
   3.250      
   3.251      if (pg->up == 0) return 0;
   3.252 @@ -2034,7 +2063,6 @@ static int sh_remove_shadow_via_pointer(
   3.253      case PGC_SH_l1_pae_shadow:
   3.254      case PGC_SH_l2_pae_shadow:
   3.255      case PGC_SH_l2h_pae_shadow:
   3.256 -    case PGC_SH_l3_pae_shadow:
   3.257          SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,3)(v, vaddr, pmfn);
   3.258          break;
   3.259  #if CONFIG_PAGING_LEVELS >= 4
   3.260 @@ -2058,17 +2086,20 @@ static int sh_remove_shadow_via_pointer(
   3.261      return rc;
   3.262  }
   3.263  
   3.264 -void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int all)
   3.265 +void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
   3.266  /* Remove the shadows of this guest page.  
   3.267 - * If all != 0, find all shadows, if necessary by walking the tables.
   3.268 - * Otherwise, just try the (much faster) heuristics, which will remove 
   3.269 - * at most one reference to each shadow of the page. */
   3.270 + * If fast != 0, just try the quick heuristic, which will remove 
   3.271 + * at most one reference to each shadow of the page.  Otherwise, walk
   3.272 + * all the shadow tables looking for refs to shadows of this gmfn.
   3.273 + * If all != 0, kill the domain if we can't find all the shadows.
   3.274 + * (all != 0 implies fast == 0)
   3.275 + */
   3.276  {
   3.277      struct page_info *pg;
   3.278      mfn_t smfn;
   3.279      u32 sh_flags;
   3.280      unsigned char t;
   3.281 -
   3.282 +    
   3.283      /* Dispatch table for getting per-type functions: each level must
   3.284       * be called with the function to remove a lower-level shadow. */
   3.285      static hash_callback_t callbacks[16] = {
   3.286 @@ -2085,11 +2116,9 @@ void sh_remove_shadows(struct vcpu *v, m
   3.287  #if CONFIG_PAGING_LEVELS >= 3
   3.288          SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2_pae  */
   3.289          SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2h_pae */
   3.290 -        SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,3,3), /* l3_pae  */
   3.291  #else 
   3.292          NULL, /* l2_pae  */
   3.293          NULL, /* l2h_pae */
   3.294 -        NULL, /* l3_pae  */
   3.295  #endif
   3.296          NULL, /* l1_64   */
   3.297          NULL, /* fl1_64  */
   3.298 @@ -2115,9 +2144,8 @@ void sh_remove_shadows(struct vcpu *v, m
   3.299          ((1 << (PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift))
   3.300           | (1 << (PGC_SH_l2_pae_shadow >> PGC_SH_type_shift))), /* l1_pae  */
   3.301          0, /* fl1_pae */
   3.302 -        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2_pae  */
   3.303 -        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2h_pae  */
   3.304 -        0, /* l3_pae  */
   3.305 +        0, /* l2_pae  */
   3.306 +        0, /* l2h_pae  */
   3.307          1 << (PGC_SH_l2_64_shadow >> PGC_SH_type_shift), /* l1_64   */
   3.308          0, /* fl1_64  */
   3.309          1 << (PGC_SH_l3_64_shadow >> PGC_SH_type_shift), /* l2_64   */
   3.310 @@ -2128,6 +2156,7 @@ void sh_remove_shadows(struct vcpu *v, m
   3.311      };
   3.312  
   3.313      ASSERT(shadow_lock_is_acquired(v->domain));
   3.314 +    ASSERT(!(all && fast));
   3.315  
   3.316      pg = mfn_to_page(gmfn);
   3.317  
   3.318 @@ -2147,29 +2176,26 @@ void sh_remove_shadows(struct vcpu *v, m
   3.319       * call will remove at most one shadow, and terminate immediately when
   3.320       * it does remove it, so we never walk the hash after doing a deletion.  */
   3.321  #define DO_UNSHADOW(_type) do {                                 \
   3.322 -    t = (_type) >> PGC_SH_type_shift;                          \
   3.323 -    smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);              \
   3.324 -    if ( !sh_remove_shadow_via_pointer(v, smfn) && all )       \
   3.325 +    t = (_type) >> PGC_SH_type_shift;                           \
   3.326 +    smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);               \
   3.327 +    if ( !sh_remove_shadow_via_pointer(v, smfn) && !fast )      \
   3.328          hash_foreach(v, masks[t], callbacks, smfn);             \
   3.329  } while (0)
   3.330  
   3.331      /* Top-level shadows need to be unpinned */
   3.332 -#define DO_UNPIN(_type) do {                                             \
   3.333 +#define DO_UNPIN(_type) do {                                            \
   3.334      t = (_type) >> PGC_SH_type_shift;                                   \
   3.335      smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);                       \
   3.336      if ( mfn_to_page(smfn)->count_info & PGC_SH_pinned )                \
   3.337          sh_unpin(v, smfn);                                              \
   3.338 -    if ( (_type) == PGC_SH_l3_pae_shadow )                              \
   3.339 -        SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn); \
   3.340  } while (0)
   3.341  
   3.342      if ( sh_flags & SHF_L1_32 )   DO_UNSHADOW(PGC_SH_l1_32_shadow);
   3.343      if ( sh_flags & SHF_L2_32 )   DO_UNPIN(PGC_SH_l2_32_shadow);
   3.344  #if CONFIG_PAGING_LEVELS >= 3
   3.345      if ( sh_flags & SHF_L1_PAE )  DO_UNSHADOW(PGC_SH_l1_pae_shadow);
   3.346 -    if ( sh_flags & SHF_L2_PAE )  DO_UNSHADOW(PGC_SH_l2_pae_shadow);
   3.347 -    if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(PGC_SH_l2h_pae_shadow);
   3.348 -    if ( sh_flags & SHF_L3_PAE )  DO_UNPIN(PGC_SH_l3_pae_shadow);
   3.349 +    if ( sh_flags & SHF_L2_PAE )  DO_UNPIN(PGC_SH_l2_pae_shadow);
   3.350 +    if ( sh_flags & SHF_L2H_PAE ) DO_UNPIN(PGC_SH_l2h_pae_shadow);
   3.351  #if CONFIG_PAGING_LEVELS >= 4
   3.352      if ( sh_flags & SHF_L1_64 )   DO_UNSHADOW(PGC_SH_l1_64_shadow);
   3.353      if ( sh_flags & SHF_L2_64 )   DO_UNSHADOW(PGC_SH_l2_64_shadow);
   3.354 @@ -2181,20 +2207,14 @@ void sh_remove_shadows(struct vcpu *v, m
   3.355  #undef DO_UNSHADOW
   3.356  #undef DO_UNPIN
   3.357  
   3.358 -
   3.359 -#if CONFIG_PAGING_LEVELS > 2
   3.360 -    /* We may have caused some PAE l3 entries to change: need to 
   3.361 -     * fix up the copies of them in various places */
   3.362 -    if ( sh_flags & (SHF_L2_PAE|SHF_L2H_PAE) )
   3.363 -        sh_pae_recopy(v->domain);
   3.364 -#endif
   3.365 -
   3.366      /* If that didn't catch the shadows, something is wrong */
   3.367 -    if ( all && (pg->count_info & PGC_page_table) )
   3.368 +    if ( !fast && (pg->count_info & PGC_page_table) )
   3.369      {
   3.370 -        SHADOW_ERROR("can't find all shadows of mfn %05lx (shadow_flags=%08x)\n",
   3.371 +        SHADOW_ERROR("can't find all shadows of mfn %05lx "
   3.372 +                     "(shadow_flags=%08x)\n",
   3.373                        mfn_x(gmfn), pg->shadow_flags);
   3.374 -        domain_crash(v->domain);
   3.375 +        if ( all ) 
   3.376 +            domain_crash(v->domain);
   3.377      }
   3.378  }
   3.379  
   3.380 @@ -3118,7 +3138,6 @@ void shadow_audit_tables(struct vcpu *v)
   3.381          SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,3), /* fl1_pae */
   3.382          SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2_pae  */
   3.383          SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2h_pae */
   3.384 -        SHADOW_INTERNAL_NAME(sh_audit_l3_table,3,3),  /* l3_pae  */
   3.385  #if CONFIG_PAGING_LEVELS >= 4
   3.386          SHADOW_INTERNAL_NAME(sh_audit_l1_table,4,4),  /* l1_64   */
   3.387          SHADOW_INTERNAL_NAME(sh_audit_fl1_table,4,4), /* fl1_64  */
   3.388 @@ -3143,7 +3162,7 @@ void shadow_audit_tables(struct vcpu *v)
   3.389          {
   3.390          case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break;
   3.391          case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE
   3.392 -                        |SHF_L2H_PAE|SHF_L3_PAE); break;
   3.393 +                        |SHF_L2H_PAE); break;
   3.394          case 4: mask = (SHF_L1_64|SHF_FL1_64|SHF_L2_64  
   3.395                          |SHF_L3_64|SHF_L4_64); break;
   3.396          default: BUG();
     4.1 --- a/xen/arch/x86/mm/shadow/multi.c	Wed Oct 18 13:43:35 2006 +0100
     4.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Wed Oct 18 14:36:20 2006 +0100
     4.3 @@ -21,20 +21,6 @@
     4.4   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     4.5   */
     4.6  
     4.7 -// DESIGN QUESTIONS:
     4.8 -// Why use subshadows for PAE guests?
     4.9 -// - reduces pressure in the hash table
    4.10 -// - reduces shadow size (64-vs-4096 bytes of shadow for 32 bytes of guest L3)
    4.11 -// - would need to find space in the page_info to store 7 more bits of
    4.12 -//   backpointer
    4.13 -// - independent shadows of 32 byte chunks makes it non-obvious how to quickly
    4.14 -//   figure out when to demote the guest page from l3 status
    4.15 -//
    4.16 -// PAE Xen HVM guests are restricted to 8GB of pseudo-physical address space.
    4.17 -// - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address
    4.18 -//   space for both PV and HVM guests.
    4.19 -//
    4.20 -
    4.21  #include <xen/config.h>
    4.22  #include <xen/types.h>
    4.23  #include <xen/mm.h>
    4.24 @@ -118,9 +104,6 @@ static char *fetch_type_names[] = {
    4.25  #endif
    4.26  
    4.27  /* XXX forward declarations */
    4.28 -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
    4.29 -static unsigned long hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res);
    4.30 -#endif
    4.31  static inline void sh_update_linear_entries(struct vcpu *v);
    4.32  
    4.33  /**************************************************************************/
    4.34 @@ -129,8 +112,6 @@ static inline void sh_update_linear_entr
    4.35   * Normal case: maps the mfn of a guest page to the mfn of its shadow page.
    4.36   * FL1's:       maps the *gfn* of the start of a superpage to the mfn of a
    4.37   *              shadow L1 which maps its "splinters".
    4.38 - * PAE CR3s:    maps the 32-byte aligned, 32-bit CR3 value to the mfn of the
    4.39 - *              PAE L3 info page for that CR3 value.
    4.40   */
    4.41  
    4.42  static inline mfn_t 
    4.43 @@ -215,7 +196,6 @@ delete_fl1_shadow_status(struct vcpu *v,
    4.44  {
    4.45      SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
    4.46                     gfn_x(gfn), PGC_SH_fl1_shadow, mfn_x(smfn));
    4.47 -
    4.48      shadow_hash_delete(v, gfn_x(gfn),
    4.49                          PGC_SH_fl1_shadow >> PGC_SH_type_shift, smfn);
    4.50  }
    4.51 @@ -429,18 +409,16 @@ static void sh_audit_gw(struct vcpu *v, 
    4.52      if ( !(SHADOW_AUDIT_ENABLE) )
    4.53          return;
    4.54  
    4.55 -#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
    4.56  #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
    4.57      if ( valid_mfn(gw->l4mfn)
    4.58           && valid_mfn((smfn = get_shadow_status(v, gw->l4mfn, 
    4.59                                                  PGC_SH_l4_shadow))) )
    4.60          (void) sh_audit_l4_table(v, smfn, _mfn(INVALID_MFN));
    4.61 -#endif /* PAE or 64... */
    4.62      if ( valid_mfn(gw->l3mfn)
    4.63           && valid_mfn((smfn = get_shadow_status(v, gw->l3mfn, 
    4.64                                                  PGC_SH_l3_shadow))) )
    4.65          (void) sh_audit_l3_table(v, smfn, _mfn(INVALID_MFN));
    4.66 -#endif /* All levels... */
    4.67 +#endif /* PAE or 64... */
    4.68      if ( valid_mfn(gw->l2mfn) )
    4.69      {
    4.70          if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, 
    4.71 @@ -498,8 +476,7 @@ static u32 guest_set_ad_bits(struct vcpu
    4.72      flags = guest_l1e_get_flags(*ep);
    4.73  
    4.74      /* PAE l3s do not have A and D bits */
    4.75 -    if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) )
    4.76 -        return flags;
    4.77 +    ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
    4.78  
    4.79      /* Need the D bit as well for writes, in L1es and PSE L2es. */
    4.80      if ( ft == ft_demand_write  
    4.81 @@ -646,38 +623,14 @@ shadow_l2_index(mfn_t *smfn, u32 guest_i
    4.82  #endif
    4.83  }
    4.84  
    4.85 -#if GUEST_PAGING_LEVELS >= 3
    4.86 +#if GUEST_PAGING_LEVELS >= 4
    4.87  
    4.88  static inline u32
    4.89  shadow_l3_index(mfn_t *smfn, u32 guest_index)
    4.90  {
    4.91 -#if GUEST_PAGING_LEVELS == 3
    4.92 -    u32 group_id;
    4.93 -
    4.94 -    // Because we use twice the space in L3 shadows as was consumed in guest
    4.95 -    // L3s, the number of guest entries per shadow page is
    4.96 -    // SHADOW_L2_PAGETABLE_ENTRIES/2.  (Note this is *not*
    4.97 -    // SHADOW_L3_PAGETABLE_ENTRIES, which in this case is 4...)
    4.98 -    //
    4.99 -    *smfn = _mfn(mfn_x(*smfn) +
   4.100 -                 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
   4.101 -
   4.102 -    // We store PAE L3 shadows in groups of 4, alternating shadows and
   4.103 -    // pae_l3_bookkeeping structs.  So the effective shadow index is
   4.104 -    // the the group_id * 8 + the offset within the group.
   4.105 -    //
   4.106 -    guest_index %= (SHADOW_L2_PAGETABLE_ENTRIES / 2);
   4.107 -    group_id = guest_index / 4;
   4.108 -    return (group_id * 8) + (guest_index % 4);
   4.109 -#else
   4.110      return guest_index;
   4.111 -#endif
   4.112  }
   4.113  
   4.114 -#endif // GUEST_PAGING_LEVELS >= 3
   4.115 -
   4.116 -#if GUEST_PAGING_LEVELS >= 4
   4.117 -
   4.118  static inline u32
   4.119  shadow_l4_index(mfn_t *smfn, u32 guest_index)
   4.120  {
   4.121 @@ -722,6 +675,9 @@ do {                                    
   4.122      u32 pass_thru_flags;
   4.123      u32 sflags;
   4.124  
   4.125 +    /* We don't shadow PAE l3s */
   4.126 +    ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
   4.127 +
   4.128      // XXX -- might want to think about PAT support for HVM guests...
   4.129  
   4.130  #ifndef NDEBUG
   4.131 @@ -757,29 +713,16 @@ do {                                    
   4.132      if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
   4.133          gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
   4.134      
   4.135 -    // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's...
   4.136 -    //
   4.137 -    if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) )
   4.138 -        pass_thru_flags = _PAGE_PRESENT;
   4.139 -    else
   4.140 -    {
   4.141 -        pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
   4.142 -                           _PAGE_RW | _PAGE_PRESENT);
   4.143 -        if ( guest_supports_nx(v) )
   4.144 -            pass_thru_flags |= _PAGE_NX_BIT;
   4.145 -    }
   4.146 -
   4.147 -    // PAE guests can not put NX, RW, USER, ACCESSED, or DIRTY bits into their
   4.148 -    // L3e's; they are all implied.  So we emulate them here.
   4.149 -    //
   4.150 -    if ( (GUEST_PAGING_LEVELS == 3) && (level == 3) )
   4.151 -        gflags = pass_thru_flags;
   4.152  
   4.153      // Propagate bits from the guest to the shadow.
   4.154      // Some of these may be overwritten, below.
   4.155      // Since we know the guest's PRESENT bit is set, we also set the shadow's
   4.156      // SHADOW_PRESENT bit.
   4.157      //
   4.158 +    pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
   4.159 +                       _PAGE_RW | _PAGE_PRESENT);
   4.160 +    if ( guest_supports_nx(v) )
   4.161 +        pass_thru_flags |= _PAGE_NX_BIT;
   4.162      sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT;
   4.163  
   4.164      // Copy the guest's RW bit into the SHADOW_RW bit.
   4.165 @@ -800,8 +743,7 @@ do {                                    
   4.166      // If the A or D bit has not yet been set in the guest, then we must
   4.167      // prevent the corresponding kind of access.
   4.168      //
   4.169 -    if ( unlikely(!((GUEST_PAGING_LEVELS == 3) && (level == 3)) &&
   4.170 -                  !(gflags & _PAGE_ACCESSED)) )
   4.171 +    if ( unlikely(!(gflags & _PAGE_ACCESSED)) )
   4.172          sflags &= ~_PAGE_PRESENT;
   4.173  
   4.174      /* D bits exist in L1es and PSE L2es */
   4.175 @@ -890,9 +832,7 @@ l4e_propagate_from_guest(struct vcpu *v,
   4.176                    fetch_type_names[ft], gl4e->l4, sl4p->l4);
   4.177      ASSERT(sflags != -1);
   4.178  }
   4.179 -#endif // GUEST_PAGING_LEVELS >= 4
   4.180 -
   4.181 -#if GUEST_PAGING_LEVELS >= 3
   4.182 +
   4.183  static void
   4.184  l3e_propagate_from_guest(struct vcpu *v,
   4.185                           guest_l3e_t *gl3e,
   4.186 @@ -912,7 +852,7 @@ l3e_propagate_from_guest(struct vcpu *v,
   4.187                    fetch_type_names[ft], gl3e->l3, sl3p->l3);
   4.188      ASSERT(sflags != -1);
   4.189  }
   4.190 -#endif // GUEST_PAGING_LEVELS >= 3
   4.191 +#endif // GUEST_PAGING_LEVELS >= 4
   4.192  
   4.193  static void
   4.194  l2e_propagate_from_guest(struct vcpu *v, 
   4.195 @@ -1081,9 +1021,6 @@ shadow_write_entries(void *d, void *s, i
   4.196          safe_write_entry(dst++, src++);
   4.197  
   4.198      if ( map != NULL ) sh_unmap_domain_page(map);
   4.199 -
   4.200 -    /* XXX TODO:
   4.201 -     * Update min/max field in page_info struct of this mfn */
   4.202  }
   4.203  
   4.204  static inline int
   4.205 @@ -1195,9 +1132,7 @@ static int shadow_set_l4e(struct vcpu *v
   4.206      }
   4.207      return flags;
   4.208  }
   4.209 -#endif /* GUEST_PAGING_LEVELS >= 4 */
   4.210 -
   4.211 -#if GUEST_PAGING_LEVELS >= 3
   4.212 +
   4.213  static int shadow_set_l3e(struct vcpu *v, 
   4.214                            shadow_l3e_t *sl3e, 
   4.215                            shadow_l3e_t new_sl3e, 
   4.216 @@ -1224,28 +1159,6 @@ static int shadow_set_l3e(struct vcpu *v
   4.217      shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
   4.218      flags |= SHADOW_SET_CHANGED;
   4.219  
   4.220 -#if GUEST_PAGING_LEVELS == 3 
   4.221 -    /* We wrote a guest l3e in a PAE pagetable.  This table is copied in
   4.222 -     * the linear pagetable entries of its l2s, and may also be copied
   4.223 -     * to a low memory location to make it fit in CR3.  Report that we
   4.224 -     * need to resync those copies (we can't wait for the guest to flush
   4.225 -     * the TLB because it might be an increase in rights). */
   4.226 -    {
   4.227 -        struct vcpu *vcpu;
   4.228 -
   4.229 -        struct pae_l3_bookkeeping *info = sl3p_to_info(sl3e);
   4.230 -        for_each_vcpu(v->domain, vcpu)
   4.231 -        {
   4.232 -            if (info->vcpus & (1 << vcpu->vcpu_id))
   4.233 -            {
   4.234 -                // Remember that this flip/update needs to occur.
   4.235 -                vcpu->arch.shadow.pae_flip_pending = 1;
   4.236 -                flags |= SHADOW_SET_L3PAE_RECOPY;
   4.237 -            }
   4.238 -        }
   4.239 -    }
   4.240 -#endif
   4.241 -
   4.242      if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT ) 
   4.243      {
   4.244          /* We lost a reference to an old mfn. */
   4.245 @@ -1260,7 +1173,7 @@ static int shadow_set_l3e(struct vcpu *v
   4.246      }
   4.247      return flags;
   4.248  }
   4.249 -#endif /* GUEST_PAGING_LEVELS >= 3 */ 
   4.250 +#endif /* GUEST_PAGING_LEVELS >= 4 */ 
   4.251  
   4.252  static int shadow_set_l2e(struct vcpu *v, 
   4.253                            shadow_l2e_t *sl2e, 
   4.254 @@ -1535,51 +1448,7 @@ do {                                    
   4.255  
   4.256  #endif /* different kinds of l2 */
   4.257  
   4.258 -#if GUEST_PAGING_LEVELS == 3
   4.259 -
   4.260 -/* PAE l3 subshadow: touch all entries (FOREACH_L2E will find Xen l2es). */
   4.261 -#define SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, _done, _code)             \
   4.262 -do {                                                                    \
   4.263 -    int _i;                                                             \
   4.264 -    for ( _i = 0; _i < 4; _i++ )                                        \
   4.265 -    {                                                                   \
   4.266 -        if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT )           \
   4.267 -            {_code}                                                     \
   4.268 -        if ( _done ) break;                                             \
   4.269 -        _sl3e++;                                                        \
   4.270 -        increment_ptr_to_guest_entry(_gl3p);                            \
   4.271 -    }                                                                   \
   4.272 -} while (0)
   4.273 -
   4.274 -/* PAE l3 full shadow: call subshadow walk on all valid l3 subshadows */
   4.275 -#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
   4.276 -do {                                                                    \
   4.277 -    int _i, _j, _k, __done = 0;                                         \
   4.278 -    ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask)       \
   4.279 -           == PGC_SH_l3_pae_shadow);                                   \
   4.280 -    /* The subshadows are split, 64 on each page of the shadow */       \
   4.281 -    for ( _j = 0; _j < 2 && !__done; _j++ )                             \
   4.282 -    {                                                                   \
   4.283 -        void *_sp = sh_map_domain_page(_sl3mfn);                       \
   4.284 -        for ( _i = 0; _i < 64; _i++ )                                   \
   4.285 -        {                                                               \
   4.286 -            /* Every second 32-byte region is a bookkeeping entry */    \
   4.287 -            _sl3e = (shadow_l3e_t *)(_sp + (64 * _i));                  \
   4.288 -            if ( (sl3p_to_info(_sl3e))->refcount > 0 )                  \
   4.289 -                SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p,                   \
   4.290 -                                        ({ __done = (_done); __done; }), \
   4.291 -                                        _code);                         \
   4.292 -            else                                                        \
   4.293 -                for ( _k = 0 ; _k < 4 ; _k++ )                          \
   4.294 -                    increment_ptr_to_guest_entry(_gl3p);                \
   4.295 -            if ( __done ) break;                                        \
   4.296 -        }                                                               \
   4.297 -        sh_unmap_domain_page(_sp);                                     \
   4.298 -        _sl3mfn = _mfn(mfn_x(_sl3mfn) + 1);                             \
   4.299 -    }                                                                   \
   4.300 -} while (0)
   4.301 -
   4.302 -#elif GUEST_PAGING_LEVELS == 4
   4.303 +#if GUEST_PAGING_LEVELS == 4
   4.304  
   4.305  /* 64-bit l3: touch all entries */
   4.306  #define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
   4.307 @@ -1711,8 +1580,8 @@ void sh_install_xen_entries_in_l2h(struc
   4.308      
   4.309      /* We don't set up a linear mapping here because we can't until this
   4.310       * l2h is installed in an l3e.  sh_update_linear_entries() handles
   4.311 -     * the linear mappings when the l3 is loaded.  We zero them here, just as
   4.312 -     * a safety measure.
   4.313 +     * the linear mappings when CR3 (and so the fourth l3e) is loaded.  
   4.314 +     * We zero them here, just as a safety measure.
   4.315       */
   4.316      for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
   4.317          sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START) + i] =
   4.318 @@ -1740,37 +1609,6 @@ void sh_install_xen_entries_in_l2h(struc
   4.319      
   4.320      sh_unmap_domain_page(sl2e);
   4.321  }
   4.322 -
   4.323 -void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn)
   4.324 -{
   4.325 -    shadow_l3e_t *sl3e;
   4.326 -    guest_l3e_t *gl3e = v->arch.guest_vtable;
   4.327 -    shadow_l3e_t new_sl3e;
   4.328 -    gfn_t l2gfn;
   4.329 -    mfn_t l2gmfn, l2smfn;
   4.330 -    int r;
   4.331 -
   4.332 -    ASSERT(!shadow_mode_external(v->domain));
   4.333 -    ASSERT(guest_l3e_get_flags(gl3e[3]) & _PAGE_PRESENT);
   4.334 -    l2gfn = guest_l3e_get_gfn(gl3e[3]);
   4.335 -    l2gmfn = sh_gfn_to_mfn(v->domain, gfn_x(l2gfn));
   4.336 -    l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow);
   4.337 -    if ( !valid_mfn(l2smfn) )
   4.338 -    {
   4.339 -        /* must remove write access to this page before shadowing it */
   4.340 -        // XXX -- should check to see whether this is better with level==0 or
   4.341 -        // level==2...
   4.342 -        if ( shadow_remove_write_access(v, l2gmfn, 2, 0xc0000000ul) != 0 )
   4.343 -            flush_tlb_mask(v->domain->domain_dirty_cpumask);
   4.344 - 
   4.345 -        l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow);
   4.346 -    }
   4.347 -    l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e,
   4.348 -                             ft_prefetch);
   4.349 -    sl3e = sh_map_domain_page(sl3mfn);
   4.350 -    r = shadow_set_l3e(v, &sl3e[3], new_sl3e, sl3mfn);
   4.351 -    sh_unmap_domain_page(sl3e);
   4.352 -}
   4.353  #endif
   4.354  
   4.355  
   4.356 @@ -1827,8 +1665,6 @@ void sh_install_xen_entries_in_l2(struct
   4.357  
   4.358  
   4.359  
   4.360 -
   4.361 -
   4.362  /**************************************************************************/
   4.363  /* Create a shadow of a given guest page.
   4.364   */
   4.365 @@ -1839,7 +1675,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
   4.366      SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n",
   4.367                    mfn_x(gmfn), shadow_type, mfn_x(smfn));
   4.368  
   4.369 -    if ( shadow_type != PGC_SH_guest_root_type )
   4.370 +    if ( shadow_type != PGC_SH_l2_32_shadow 
   4.371 +         && shadow_type != PGC_SH_l2_pae_shadow 
   4.372 +         && shadow_type != PGC_SH_l2h_pae_shadow 
   4.373 +         && shadow_type != PGC_SH_l4_64_shadow )
   4.374          /* Lower-level shadow, not yet linked form a higher level */
   4.375          mfn_to_page(smfn)->up = 0;
   4.376  
   4.377 @@ -1853,8 +1692,6 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
   4.378              sh_install_xen_entries_in_l4(v, gmfn, smfn); break;
   4.379  #endif
   4.380  #if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
   4.381 -        case PGC_SH_l3_shadow:
   4.382 -            sh_install_xen_entries_in_l3(v, gmfn, smfn); break;
   4.383          case PGC_SH_l2h_shadow:
   4.384              sh_install_xen_entries_in_l2h(v, smfn); break;
   4.385  #endif
   4.386 @@ -1988,20 +1825,16 @@ static shadow_l4e_t * shadow_get_and_cre
   4.387                                                  mfn_t *sl4mfn)
   4.388  {
   4.389      /* There is always a shadow of the top level table.  Get it. */
   4.390 -    *sl4mfn = pagetable_get_mfn(v->arch.shadow_table);
   4.391 +    *sl4mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
   4.392      /* Reading the top level table is always valid. */
   4.393      return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va);
   4.394  }
   4.395 -#endif /* GUEST_PAGING_LEVELS >= 4 */
   4.396 -
   4.397 -
   4.398 -#if GUEST_PAGING_LEVELS >= 3
   4.399 +
   4.400  static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v, 
   4.401                                                  walk_t *gw, 
   4.402                                                  mfn_t *sl3mfn,
   4.403                                                  fetch_type_t ft)
   4.404  {
   4.405 -#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
   4.406      mfn_t sl4mfn;
   4.407      shadow_l4e_t *sl4e;
   4.408      if ( !valid_mfn(gw->l3mfn) ) return NULL; /* No guest page. */
   4.409 @@ -2032,19 +1865,8 @@ static shadow_l3e_t * shadow_get_and_cre
   4.410      }
   4.411      /* Now follow it down a level.  Guaranteed to succeed. */
   4.412      return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
   4.413 -#else /* PAE... */
   4.414 -    /* There is always a shadow of the top level table.  Get it. */
   4.415 -    *sl3mfn = pagetable_get_mfn(v->arch.shadow_table);
   4.416 -    /* This next line is important: the shadow l3 table is in an 8k
   4.417 -     * shadow and we need to return the right mfn of the pair. This call
   4.418 -     * will set it for us as a side-effect. */
   4.419 -    (void) shadow_l3_index(sl3mfn, guest_index(gw->l3e));
   4.420 -    ASSERT(v->arch.shadow_vtable);
   4.421 -    return ((shadow_l3e_t *)v->arch.shadow_vtable) 
   4.422 -        + shadow_l3_table_offset(gw->va);
   4.423 +}
   4.424  #endif /* GUEST_PAGING_LEVELS >= 4 */
   4.425 -}
   4.426 -#endif /* GUEST_PAGING_LEVELS >= 3 */
   4.427  
   4.428  
   4.429  static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v, 
   4.430 @@ -2052,7 +1874,7 @@ static shadow_l2e_t * shadow_get_and_cre
   4.431                                                  mfn_t *sl2mfn,
   4.432                                                  fetch_type_t ft)
   4.433  {
   4.434 -#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64bit... */
   4.435 +#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
   4.436      mfn_t sl3mfn = _mfn(INVALID_MFN);
   4.437      shadow_l3e_t *sl3e;
   4.438      if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
   4.439 @@ -2080,17 +1902,22 @@ static shadow_l2e_t * shadow_get_and_cre
   4.440                                   *sl2mfn, &new_sl3e, ft);
   4.441          r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
   4.442          ASSERT((r & SHADOW_SET_FLUSH) == 0);
   4.443 -#if GUEST_PAGING_LEVELS == 3 
   4.444 -        /* Need to sync up the linear maps, as we are about to use them */
   4.445 -        ASSERT( r & SHADOW_SET_L3PAE_RECOPY );
   4.446 -        sh_pae_recopy(v->domain);
   4.447 -#endif
   4.448      }
   4.449      /* Now follow it down a level.  Guaranteed to succeed. */
   4.450      return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
   4.451 +#elif GUEST_PAGING_LEVELS == 3 /* PAE... */
   4.452 +    /* We never demand-shadow PAE l3es: they are only created in
   4.453 +     * sh_update_cr3().  Check if the relevant sl3e is present. */
   4.454 +    shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.shadow.l3table) 
   4.455 +        + shadow_l3_linear_offset(gw->va);
   4.456 +    if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) 
   4.457 +        return NULL;
   4.458 +    *sl2mfn = shadow_l3e_get_mfn(*sl3e);
   4.459 +    ASSERT(valid_mfn(*sl2mfn));
   4.460 +    return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
   4.461  #else /* 32bit... */
   4.462      /* There is always a shadow of the top level table.  Get it. */
   4.463 -    *sl2mfn = pagetable_get_mfn(v->arch.shadow_table);
   4.464 +    *sl2mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
   4.465      /* This next line is important: the guest l2 has a 16k
   4.466       * shadow, we need to return the right mfn of the four. This
   4.467       * call will set it for us as a side-effect. */
   4.468 @@ -2213,9 +2040,7 @@ void sh_destroy_l4_shadow(struct vcpu *v
   4.469      /* Put the memory back in the pool */
   4.470      shadow_free(v->domain, smfn);
   4.471  }
   4.472 -#endif    
   4.473 -
   4.474 -#if GUEST_PAGING_LEVELS >= 3
   4.475 +
   4.476  void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
   4.477  {
   4.478      shadow_l3e_t *sl3e;
   4.479 @@ -2230,10 +2055,6 @@ void sh_destroy_l3_shadow(struct vcpu *v
   4.480      gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
   4.481      delete_shadow_status(v, gmfn, t, smfn);
   4.482      shadow_demote(v, gmfn, t);
   4.483 -#if GUEST_PAGING_LEVELS == 3
   4.484 -    /* Take this shadow off the list of root shadows */
   4.485 -    list_del_init(&mfn_to_page(smfn)->list);
   4.486 -#endif
   4.487  
   4.488      /* Decrement refcounts of all the old entries */
   4.489      sl3mfn = smfn; 
   4.490 @@ -2247,53 +2068,8 @@ void sh_destroy_l3_shadow(struct vcpu *v
   4.491      /* Put the memory back in the pool */
   4.492      shadow_free(v->domain, smfn);
   4.493  }
   4.494 -#endif    
   4.495 -
   4.496 -
   4.497 -#if GUEST_PAGING_LEVELS == 3
   4.498 -static void sh_destroy_l3_subshadow(struct vcpu *v, 
   4.499 -                                     shadow_l3e_t *sl3e)
   4.500 -/* Tear down just a single 4-entry l3 on a 2-page l3 shadow. */
   4.501 -{
   4.502 -    int i;
   4.503 -    mfn_t sl3mfn = _mfn(maddr_from_mapped_domain_page(sl3e) >> PAGE_SHIFT);
   4.504 -    ASSERT((unsigned long)sl3e % (4 * sizeof (shadow_l3e_t)) == 0); 
   4.505 -    for ( i = 0; i < GUEST_L3_PAGETABLE_ENTRIES; i++ ) 
   4.506 -        if ( shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT ) 
   4.507 -            shadow_set_l3e(v, &sl3e[i], shadow_l3e_empty(), sl3mfn);
   4.508 -}
   4.509 -#endif
   4.510 -
   4.511 -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
   4.512 -void sh_unpin_all_l3_subshadows(struct vcpu *v, mfn_t smfn)
   4.513 -/* Walk a full PAE l3 shadow, unpinning all of the subshadows on it */
   4.514 -{
   4.515 -    int i, j;
   4.516 -    struct pae_l3_bookkeeping *bk;
   4.517 -    
   4.518 -    ASSERT((mfn_to_page(smfn)->count_info & PGC_SH_type_mask) 
   4.519 -           == PGC_SH_l3_pae_shadow);
   4.520 -    /* The subshadows are split, 64 on each page of the shadow */
   4.521 -    for ( i = 0; i < 2; i++ ) 
   4.522 -    {
   4.523 -        void *p = sh_map_domain_page(_mfn(mfn_x(smfn) + i));
   4.524 -        for ( j = 0; j < 64; j++ )
   4.525 -        {
   4.526 -            /* Every second 32-byte region is a bookkeeping entry */
   4.527 -            bk = (struct pae_l3_bookkeeping *)(p + (64 * j) + 32);
   4.528 -            if ( bk->pinned )
   4.529 -                sh_unpin_l3_subshadow(v, (shadow_l3e_t *)(p + (64*j)), smfn);
   4.530 -            /* Check whether we've just freed the whole shadow */
   4.531 -            if ( (mfn_to_page(smfn)->count_info & PGC_SH_count_mask) == 0 ) 
   4.532 -            {
   4.533 -                sh_unmap_domain_page(p);
   4.534 -                return;
   4.535 -            }
   4.536 -        }
   4.537 -        sh_unmap_domain_page(p);
   4.538 -    }
   4.539 -}
   4.540 -#endif
   4.541 +#endif /* GUEST_PAGING_LEVELS >= 4 */
   4.542 +
   4.543  
   4.544  void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
   4.545  {
   4.546 @@ -2311,7 +2087,7 @@ void sh_destroy_l2_shadow(struct vcpu *v
   4.547      gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
   4.548      delete_shadow_status(v, gmfn, t, smfn);
   4.549      shadow_demote(v, gmfn, t);
   4.550 -#if GUEST_PAGING_LEVELS == 2
   4.551 +#if (GUEST_PAGING_LEVELS == 2) || (GUEST_PAGING_LEVELS == 3)
   4.552      /* Take this shadow off the list of root shadows */
   4.553      list_del_init(&mfn_to_page(smfn)->list);
   4.554  #endif
   4.555 @@ -2421,31 +2197,14 @@ void sh_unhook_32b_mappings(struct vcpu 
   4.556  
   4.557  #elif GUEST_PAGING_LEVELS == 3
   4.558  
   4.559 -void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl3mfn)
   4.560 -/* Walk a full PAE l3 shadow, unhooking entries from all the subshadows */
   4.561 +void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn)
   4.562 +/* Walk a PAE l2 shadow, unhooking entries from all the subshadows */
   4.563  {
   4.564 -    shadow_l3e_t *sl3e;
   4.565 -    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
   4.566 -        if ( (shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) {
   4.567 -            mfn_t sl2mfn = shadow_l3e_get_mfn(*sl3e);
   4.568 -            if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask) 
   4.569 -                 == PGC_SH_l2h_pae_shadow ) 
   4.570 -            {
   4.571 -                /* High l2: need to pick particular l2es to unhook */
   4.572 -                shadow_l2e_t *sl2e;
   4.573 -                SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, 1, {
   4.574 -                    (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
   4.575 -                });
   4.576 -            }
   4.577 -            else
   4.578 -            {
   4.579 -                /* Normal l2: can safely unhook the whole l3e */
   4.580 -                (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
   4.581 -            }
   4.582 -        }
   4.583 +    shadow_l2e_t *sl2e;
   4.584 +    int xen_mappings = !shadow_mode_external(v->domain);
   4.585 +    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
   4.586 +        (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
   4.587      });
   4.588 -    /* We've changed PAE L3 entries: must sync up various copies of them */
   4.589 -    sh_pae_recopy(v->domain);
   4.590  }
   4.591  
   4.592  #elif GUEST_PAGING_LEVELS == 4
   4.593 @@ -2523,9 +2282,8 @@ static int validate_gl4e(struct vcpu *v,
   4.594      result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
   4.595      return result;
   4.596  }
   4.597 -#endif // GUEST_PAGING_LEVELS >= 4
   4.598 -
   4.599 -#if GUEST_PAGING_LEVELS >= 3
   4.600 +
   4.601 +
   4.602  static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
   4.603  {
   4.604      shadow_l3e_t new_sl3e;
   4.605 @@ -2536,16 +2294,6 @@ static int validate_gl3e(struct vcpu *v,
   4.606  
   4.607      perfc_incrc(shadow_validate_gl3e_calls);
   4.608  
   4.609 -#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
   4.610 -    {
   4.611 -        /* If we've updated a subshadow which is unreferenced then 
   4.612 -           we don't care what value is being written - bail. */
   4.613 -        struct pae_l3_bookkeeping *info = sl3p_to_info(se); 
   4.614 -        if(!info->refcount)
   4.615 -            return result; 
   4.616 -    }
   4.617 -#endif
   4.618 -
   4.619      if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
   4.620      {
   4.621          gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
   4.622 @@ -2559,16 +2307,9 @@ static int validate_gl3e(struct vcpu *v,
   4.623                               sl2mfn, &new_sl3e, ft_prefetch);
   4.624      result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
   4.625  
   4.626 -#if GUEST_PAGING_LEVELS == 3
   4.627 -    /* We have changed a PAE l3 entry: need to sync up the possible copies 
   4.628 -     * of it */
   4.629 -    if ( result & SHADOW_SET_L3PAE_RECOPY )
   4.630 -        sh_pae_recopy(v->domain);
   4.631 -#endif
   4.632 -
   4.633      return result;
   4.634  }
   4.635 -#endif // GUEST_PAGING_LEVELS >= 3
   4.636 +#endif // GUEST_PAGING_LEVELS >= 4
   4.637  
   4.638  static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
   4.639  {
   4.640 @@ -2755,12 +2496,12 @@ int
   4.641  sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn,
   4.642                            void *new_gl3p, u32 size)
   4.643  {
   4.644 -#if GUEST_PAGING_LEVELS >= 3
   4.645 +#if GUEST_PAGING_LEVELS >= 4
   4.646      return sh_map_and_validate(v, gl3mfn, new_gl3p, size, 
   4.647                                  PGC_SH_l3_shadow, 
   4.648                                  shadow_l3_index, 
   4.649                                  validate_gl3e);
   4.650 -#else // ! GUEST_PAGING_LEVELS >= 3
   4.651 +#else // ! GUEST_PAGING_LEVELS >= 4
   4.652      SHADOW_PRINTK("called in wrong paging mode!\n");
   4.653      BUG();
   4.654      return 0;
   4.655 @@ -2822,10 +2563,10 @@ static inline void check_for_early_unsha
   4.656      {
   4.657          u32 flags = mfn_to_page(gmfn)->shadow_flags;
   4.658          mfn_t smfn;
   4.659 -        if ( !(flags & (SHF_L2_32|SHF_L3_PAE|SHF_L4_64)) )
   4.660 +        if ( !(flags & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64)) )
   4.661          {
   4.662              perfc_incrc(shadow_early_unshadow);
   4.663 -            sh_remove_shadows(v, gmfn, 0 /* Can fail to unshadow */ );
   4.664 +            sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
   4.665              return;
   4.666          }
   4.667          /* SHF_unhooked_mappings is set to make sure we only unhook
   4.668 @@ -2840,9 +2581,14 @@ static inline void check_for_early_unsha
   4.669                  smfn = get_shadow_status(v, gmfn, PGC_SH_l2_32_shadow);
   4.670                  shadow_unhook_mappings(v, smfn);
   4.671              }
   4.672 -            if ( flags & SHF_L3_PAE ) 
   4.673 +            if ( flags & SHF_L2_PAE ) 
   4.674              {
   4.675 -                smfn = get_shadow_status(v, gmfn, PGC_SH_l3_pae_shadow);
   4.676 +                smfn = get_shadow_status(v, gmfn, PGC_SH_l2_pae_shadow);
   4.677 +                shadow_unhook_mappings(v, smfn);
   4.678 +            }
   4.679 +            if ( flags & SHF_L2H_PAE ) 
   4.680 +            {
   4.681 +                smfn = get_shadow_status(v, gmfn, PGC_SH_l2h_pae_shadow);
   4.682                  shadow_unhook_mappings(v, smfn);
   4.683              }
   4.684              if ( flags & SHF_L4_64 ) 
   4.685 @@ -3134,7 +2880,6 @@ static int sh_page_fault(struct vcpu *v,
   4.686      shadow_audit_tables(v);
   4.687      reset_early_unshadow(v);
   4.688      shadow_unlock(d);
   4.689 -    sh_log_mmio(v, gpa);
   4.690      handle_mmio(va, gpa);
   4.691      return EXCRET_fault_fixed;
   4.692  
   4.693 @@ -3183,8 +2928,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
   4.694              return 0;
   4.695      }
   4.696  #elif SHADOW_PAGING_LEVELS == 3
   4.697 -    if ( !(shadow_l3e_get_flags(
   4.698 -          ((shadow_l3e_t *)v->arch.shadow_vtable)[shadow_l3_linear_offset(va)])
   4.699 +    if ( !(l3e_get_flags(v->arch.shadow.l3table[shadow_l3_linear_offset(va)])
   4.700             & _PAGE_PRESENT) )
   4.701          // no need to flush anything if there's no SL2...
   4.702          return 0;
   4.703 @@ -3249,34 +2993,6 @@ sh_gva_to_gpa(struct vcpu *v, unsigned l
   4.704  }
   4.705  
   4.706  
   4.707 -// XXX -- should this be in this file?
   4.708 -//        Or should it be moved to shadow-common.c?
   4.709 -//
   4.710 -/* returns a lowmem machine address of the copied HVM L3 root table
   4.711 - * If clear_res != 0, then clear the PAE-l3 reserved bits in the copy,
   4.712 - * otherwise blank out any entries with reserved bits in them.  */
   4.713 -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
   4.714 -static unsigned long
   4.715 -hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res)
   4.716 -{
   4.717 -    int i, f;
   4.718 -    int res = (_PAGE_RW|_PAGE_NX_BIT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY);
   4.719 -    l3_pgentry_t new_l3e, *copy = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
   4.720 -    memcpy(copy, l3tab, 4 * sizeof(l3_pgentry_t));
   4.721 -    for ( i = 0; i < 4; i++ )
   4.722 -    {
   4.723 -        f = l3e_get_flags(l3tab[i]);
   4.724 -        if ( (f & _PAGE_PRESENT) && (!(f & res) || clear_res) )
   4.725 -            new_l3e = l3e_from_pfn(l3e_get_pfn(l3tab[i]), f & ~res);
   4.726 -        else
   4.727 -            new_l3e = l3e_empty();
   4.728 -        safe_write_entry(&copy[i], &new_l3e);
   4.729 -    }
   4.730 -    return __pa(copy);
   4.731 -}
   4.732 -#endif
   4.733 -
   4.734 -
   4.735  static inline void
   4.736  sh_update_linear_entries(struct vcpu *v)
   4.737  /* Sync up all the linear mappings for this vcpu's pagetables */
   4.738 @@ -3330,7 +3046,7 @@ sh_update_linear_entries(struct vcpu *v)
   4.739          if ( v == current ) 
   4.740          {
   4.741              __linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
   4.742 -                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
   4.743 +                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
   4.744                               __PAGE_HYPERVISOR);
   4.745          } 
   4.746          else
   4.747 @@ -3338,7 +3054,7 @@ sh_update_linear_entries(struct vcpu *v)
   4.748              l4_pgentry_t *ml4e;
   4.749              ml4e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
   4.750              ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] = 
   4.751 -                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
   4.752 +                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
   4.753                               __PAGE_HYPERVISOR);
   4.754              sh_unmap_domain_page(ml4e);
   4.755          }
   4.756 @@ -3379,13 +3095,8 @@ sh_update_linear_entries(struct vcpu *v)
   4.757              sh_unmap_domain_page(ml4e);
   4.758          }
   4.759  
   4.760 -#if GUEST_PAGING_LEVELS == 2
   4.761          /* Shadow l3 tables are made up by update_cr3 */
   4.762 -        sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
   4.763 -#else
   4.764 -        /* Always safe to use shadow_vtable, because it's globally mapped */
   4.765 -        sl3e = v->arch.shadow_vtable;
   4.766 -#endif
   4.767 +        sl3e = v->arch.shadow.l3table;
   4.768  
   4.769          for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
   4.770          {
   4.771 @@ -3424,14 +3135,14 @@ sh_update_linear_entries(struct vcpu *v)
   4.772  #if GUEST_PAGING_LEVELS == 2
   4.773          /* Shadow l3 tables were built by update_cr3 */
   4.774          if ( shadow_mode_external(d) )
   4.775 -            shadow_l3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
   4.776 +            shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table;
   4.777          else
   4.778              BUG(); /* PV 2-on-3 is not supported yet */
   4.779          
   4.780  #else /* GUEST_PAGING_LEVELS == 3 */
   4.781          
   4.782 -        /* Always safe to use *_vtable, because they're globally mapped */
   4.783 -        shadow_l3e = v->arch.shadow_vtable;
   4.784 +        shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table;
   4.785 +        /* Always safe to use guest_vtable, because it's globally mapped */
   4.786          guest_l3e = v->arch.guest_vtable;
   4.787  
   4.788  #endif /* GUEST_PAGING_LEVELS */
   4.789 @@ -3510,7 +3221,7 @@ sh_update_linear_entries(struct vcpu *v)
   4.790          if ( v == current ) 
   4.791          {
   4.792              __linear_l2_table[l2_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
   4.793 -                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
   4.794 +                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
   4.795                               __PAGE_HYPERVISOR);
   4.796          } 
   4.797          else
   4.798 @@ -3518,7 +3229,7 @@ sh_update_linear_entries(struct vcpu *v)
   4.799              l2_pgentry_t *ml2e;
   4.800              ml2e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
   4.801              ml2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = 
   4.802 -                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
   4.803 +                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
   4.804                               __PAGE_HYPERVISOR);
   4.805              sh_unmap_domain_page(ml2e);
   4.806          }
   4.807 @@ -3530,69 +3241,7 @@ sh_update_linear_entries(struct vcpu *v)
   4.808  }
   4.809  
   4.810  
   4.811 -// XXX -- should this be in this file?
   4.812 -//        Or should it be moved to shadow-common.c?
   4.813 -//
   4.814 -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
   4.815 -void sh_pae_recopy(struct domain *d)
   4.816 -/* Called whenever we write to the l3 entries of a PAE pagetable which 
   4.817 - * is currently in use.  Each vcpu that is using the table needs to 
   4.818 - * resync its copies of the l3s in linear maps and any low-memory
   4.819 - * copies it might have made for fitting into 32bit CR3.
   4.820 - * Since linear maps are also resynced when we change CR3, we don't
   4.821 - * need to worry about changes to PAE l3es that are not currently in use.*/
   4.822 -{
   4.823 -    struct vcpu *v;
   4.824 -    cpumask_t flush_mask = CPU_MASK_NONE;
   4.825 -    ASSERT(shadow_lock_is_acquired(d));
   4.826 -    
   4.827 -    for_each_vcpu(d, v)
   4.828 -    {
   4.829 -        if ( !v->arch.shadow.pae_flip_pending ) 
   4.830 -            continue;
   4.831 -
   4.832 -        cpu_set(v->processor, flush_mask);
   4.833 -        
   4.834 -        SHADOW_PRINTK("d=%u v=%u\n", v->domain->domain_id, v->vcpu_id);
   4.835 -
   4.836 -        /* This vcpu has a copy in its linear maps */
   4.837 -        sh_update_linear_entries(v);
   4.838 -        if ( hvm_guest(v) )
   4.839 -        {
   4.840 -            /* This vcpu has a copy in its HVM PAE l3 */
   4.841 -            v->arch.hvm_vcpu.hw_cr3 = 
   4.842 -                hvm_pae_copy_root(v, v->arch.shadow_vtable,
   4.843 -                                  !shadow_vcpu_mode_translate(v));
   4.844 -        }
   4.845 -#if CONFIG_PAGING_LEVELS == 3
   4.846 -        else 
   4.847 -        {
   4.848 -            /* This vcpu might have copied the l3 to below 4GB */
   4.849 -            if ( v->arch.cr3 >> PAGE_SHIFT 
   4.850 -                 != pagetable_get_pfn(v->arch.shadow_table) )
   4.851 -            {
   4.852 -                /* Recopy to where that copy is. */
   4.853 -                int i;
   4.854 -                l3_pgentry_t *dst, *src;
   4.855 -                dst = __va(v->arch.cr3 & ~0x1f); /* Mask cache control bits */
   4.856 -                src = v->arch.shadow_vtable;
   4.857 -                for ( i = 0 ; i < 4 ; i++ ) 
   4.858 -                    safe_write_entry(dst + i, src + i);
   4.859 -            }
   4.860 -        }
   4.861 -#endif
   4.862 -        v->arch.shadow.pae_flip_pending = 0;        
   4.863 -    }
   4.864 -
   4.865 -    flush_tlb_mask(flush_mask);
   4.866 -}
   4.867 -#endif /* (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) */
   4.868 -
   4.869 -
   4.870 -/* removes:
   4.871 - *     vcpu->arch.guest_vtable
   4.872 - *     vcpu->arch.shadow_table
   4.873 - *     vcpu->arch.shadow_vtable
   4.874 +/* Removes vcpu->arch.guest_vtable and vcpu->arch.shadow_table[].
   4.875   * Does all appropriate management/bookkeeping/refcounting/etc...
   4.876   */
   4.877  static void
   4.878 @@ -3600,6 +3249,7 @@ sh_detach_old_tables(struct vcpu *v)
   4.879  {
   4.880      struct domain *d = v->domain;
   4.881      mfn_t smfn;
   4.882 +    int i = 0;
   4.883  
   4.884      ////
   4.885      //// vcpu->arch.guest_vtable
   4.886 @@ -3620,56 +3270,80 @@ sh_detach_old_tables(struct vcpu *v)
   4.887      }
   4.888  
   4.889      ////
   4.890 -    //// vcpu->arch.shadow_table
   4.891 +    //// vcpu->arch.shadow_table[]
   4.892      ////
   4.893 -    smfn = pagetable_get_mfn(v->arch.shadow_table);
   4.894 -    if ( mfn_x(smfn) )
   4.895 -    {
   4.896 -        ASSERT(v->arch.shadow_vtable);
   4.897 +
   4.898  
   4.899  #if GUEST_PAGING_LEVELS == 3
   4.900 -        // PAE guests do not (necessarily) use an entire page for their
   4.901 -        // 4-entry L3s, so we have to deal with them specially.
   4.902 -        //
   4.903 -        sh_put_ref_l3_subshadow(v, v->arch.shadow_vtable, smfn);
   4.904 -#else
   4.905 -        sh_put_ref(v, smfn, 0);
   4.906 +    /* PAE guests have four shadow_table entries */
   4.907 +    for ( i = 0 ; i < 4 ; i++ )
   4.908  #endif
   4.909 -
   4.910 -#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
   4.911 -        {
   4.912 -            struct pae_l3_bookkeeping *info =
   4.913 -                sl3p_to_info(v->arch.shadow_vtable);
   4.914 -            ASSERT(test_bit(v->vcpu_id, &info->vcpus));
   4.915 -            clear_bit(v->vcpu_id, &info->vcpus);
   4.916 -        }
   4.917 -#endif
   4.918 -        v->arch.shadow_table = pagetable_null();
   4.919 -    }
   4.920 -
   4.921 -    ////
   4.922 -    //// vcpu->arch.shadow_vtable
   4.923 -    ////
   4.924 -    if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
   4.925 -         v->arch.shadow_vtable )
   4.926      {
   4.927 -        // Q: why does this need to use (un)map_domain_page_*global* ?
   4.928 -        /* A: so sh_update_linear_entries can operate on other vcpus */
   4.929 -        sh_unmap_domain_page_global(v->arch.shadow_vtable);
   4.930 -        v->arch.shadow_vtable = NULL;
   4.931 +        smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
   4.932 +        if ( mfn_x(smfn) )
   4.933 +            sh_put_ref(v, smfn, 0);
   4.934 +        v->arch.shadow_table[i] = pagetable_null();
   4.935      }
   4.936  }
   4.937  
   4.938 +/* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
   4.939 +static void
   4.940 +sh_set_toplevel_shadow(struct vcpu *v, 
   4.941 +                       int slot,
   4.942 +                       mfn_t gmfn, 
   4.943 +                       unsigned int root_type) 
   4.944 +{
   4.945 +    mfn_t smfn = get_shadow_status(v, gmfn, root_type);
   4.946 +    struct domain *d = v->domain;
   4.947 +    ASSERT(pagetable_is_null(v->arch.shadow_table[slot]));
   4.948 +    if ( valid_mfn(smfn) )
   4.949 +    {
   4.950 +        /* Pull this root shadow to the front of the list of roots. */
   4.951 +        list_del(&mfn_to_page(smfn)->list);
   4.952 +        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
   4.953 +    }
   4.954 +    else
   4.955 +    {
   4.956 +        /* This guest MFN is a pagetable.  Must revoke write access. */
   4.957 +        if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) != 0 )
   4.958 +            flush_tlb_mask(v->domain->domain_dirty_cpumask); 
   4.959 +        /* Make sure there's enough free shadow memory. */
   4.960 +        shadow_prealloc(d, SHADOW_MAX_ORDER); 
   4.961 +        /* Shadow the page. */
   4.962 +        smfn = sh_make_shadow(v, gmfn, root_type);
   4.963 +        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
   4.964 +    }
   4.965 +    ASSERT(valid_mfn(smfn));
   4.966 +    
   4.967 +#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
   4.968 +    /* Once again OK to unhook entries from this table if we see fork/exit */
   4.969 +    ASSERT(sh_mfn_is_a_page_table(gmfn));
   4.970 +    mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
   4.971 +#endif
   4.972 +
   4.973 +    /* Take a ref to this page: it will be released in sh_detach_old_tables. */
   4.974 +    sh_get_ref(smfn, 0);
   4.975 +    sh_pin(smfn);
   4.976 +
   4.977 +    /* Done.  Install it */
   4.978 +    SHADOW_PRINTK("%u/%u [%u] gmfn %#"SH_PRI_mfn" smfn %#"SH_PRI_mfn"\n",
   4.979 +                  GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, slot,
   4.980 +                  mfn_x(gmfn), mfn_x(smfn));
   4.981 +    v->arch.shadow_table[slot] = pagetable_from_mfn(smfn);
   4.982 +}
   4.983 +
   4.984 +
   4.985  static void
   4.986  sh_update_cr3(struct vcpu *v)
   4.987 -/* Updates vcpu->arch.shadow_table after the guest has changed CR3.
   4.988 +/* Updates vcpu->arch.cr3 after the guest has changed CR3.
   4.989   * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
   4.990   * if appropriate).
   4.991 - * HVM guests should also set hvm_get_guest_cntl_reg(v, 3)...
   4.992 + * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works,
   4.993 + * and read vcpu->arch.hvm_vcpu.hw_cr3 afterwards.
   4.994   */
   4.995  {
   4.996      struct domain *d = v->domain;
   4.997 -    mfn_t gmfn, smfn;
   4.998 +    mfn_t gmfn;
   4.999  #if GUEST_PAGING_LEVELS == 3
  4.1000      u32 guest_idx=0;
  4.1001  #endif
  4.1002 @@ -3770,159 +3444,102 @@ sh_update_cr3(struct vcpu *v)
  4.1003  #endif
  4.1004  
  4.1005      ////
  4.1006 -    //// vcpu->arch.shadow_table
  4.1007 +    //// vcpu->arch.shadow_table[]
  4.1008      ////
  4.1009 -    smfn = get_shadow_status(v, gmfn, PGC_SH_guest_root_type);
  4.1010 -    if ( valid_mfn(smfn) )
  4.1011 -    {
  4.1012 -        /* Pull this root shadow to the front of the list of roots. */
  4.1013 -        list_del(&mfn_to_page(smfn)->list);
  4.1014 -        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
  4.1015 -    }
  4.1016 -    else
  4.1017 +
  4.1018 +#if GUEST_PAGING_LEVELS == 2
  4.1019 +    sh_set_toplevel_shadow(v, 0, gmfn, PGC_SH_l2_shadow);
  4.1020 +#elif GUEST_PAGING_LEVELS == 3
  4.1021 +    /* PAE guests have four shadow_table entries, based on the 
  4.1022 +     * current values of the guest's four l3es. */
  4.1023      {
  4.1024 -        /* This guest MFN is a pagetable.  Must revoke write access. */
  4.1025 -        if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) 
  4.1026 -             != 0 )
  4.1027 -            flush_tlb_mask(d->domain_dirty_cpumask); 
  4.1028 -        /* Make sure there's enough free shadow memory. */
  4.1029 -        shadow_prealloc(d, SHADOW_MAX_ORDER); 
  4.1030 -        /* Shadow the page. */
  4.1031 -        smfn = sh_make_shadow(v, gmfn, PGC_SH_guest_root_type);
  4.1032 -        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
  4.1033 +        int i;
  4.1034 +        guest_l3e_t *gl3e = (guest_l3e_t*)v->arch.guest_vtable;
  4.1035 +        for ( i = 0; i < 4; i++ ) 
  4.1036 +        {
  4.1037 +            ASSERT(pagetable_is_null(v->arch.shadow_table[i]));
  4.1038 +            if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
  4.1039 +            {
  4.1040 +                gfn_t gl2gfn = guest_l3e_get_gfn(gl3e[i]);
  4.1041 +                mfn_t gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
  4.1042 +                if ( valid_mfn(gl2mfn) )                
  4.1043 +                    sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
  4.1044 +                                           ? PGC_SH_l2h_shadow 
  4.1045 +                                           : PGC_SH_l2_shadow);
  4.1046 +            }
  4.1047 +        }
  4.1048      }
  4.1049 -    ASSERT(valid_mfn(smfn));
  4.1050 -    v->arch.shadow_table = pagetable_from_mfn(smfn);
  4.1051 -
  4.1052 -#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
  4.1053 -    /* Once again OK to unhook entries from this table if we see fork/exit */
  4.1054 -    ASSERT(sh_mfn_is_a_page_table(gmfn));
  4.1055 -    mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
  4.1056 +#elif GUEST_PAGING_LEVELS == 4
  4.1057 +    sh_set_toplevel_shadow(v, 0, gmfn, PGC_SH_l4_shadow);
  4.1058 +#else
  4.1059 +#error This should never happen 
  4.1060  #endif
  4.1061  
  4.1062 -
  4.1063 -    ////
  4.1064 -    //// vcpu->arch.shadow_vtable
  4.1065 -    ////
  4.1066 -    if ( shadow_mode_external(d) )
  4.1067 -    {
  4.1068 -#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
  4.1069 -        mfn_t adjusted_smfn = smfn;
  4.1070 -        u32 shadow_idx = shadow_l3_index(&adjusted_smfn, guest_idx);
  4.1071 -        // Q: why does this need to use (un)map_domain_page_*global* ?
  4.1072 -        v->arch.shadow_vtable =
  4.1073 -            (shadow_l3e_t *)sh_map_domain_page_global(adjusted_smfn) +
  4.1074 -            shadow_idx;
  4.1075 -#else
  4.1076 -        // Q: why does this need to use (un)map_domain_page_*global* ?
  4.1077 -        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
  4.1078 -#endif
  4.1079 -    }
  4.1080 -    else
  4.1081 -    {
  4.1082 -#if SHADOW_PAGING_LEVELS == 4
  4.1083 -        v->arch.shadow_vtable = __sh_linear_l4_table;
  4.1084 -#elif GUEST_PAGING_LEVELS == 3
  4.1085 -        // XXX - why does this need a global map?
  4.1086 -        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
  4.1087 -#else
  4.1088 -        v->arch.shadow_vtable = __sh_linear_l2_table;
  4.1089 -#endif
  4.1090 -    }
  4.1091 -
  4.1092  #if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
  4.1093 -    // Now that shadow_vtable is in place, check that the sl3e[3] is properly
  4.1094 -    // shadowed and installed in PAE PV guests...
  4.1095 -    if ( !shadow_mode_external(d) &&
  4.1096 -         !(shadow_l3e_get_flags(((shadow_l3e_t *)v->arch.shadow_vtable)[3]) &
  4.1097 -           _PAGE_PRESENT) )
  4.1098 -    {
  4.1099 -        sh_install_xen_entries_in_l3(v, gmfn, smfn);
  4.1100 -    }
  4.1101  #endif
  4.1102  
  4.1103 -    ////
  4.1104 -    //// Take a ref to the new shadow table, and pin it.
  4.1105 -    ////
  4.1106 -    //
  4.1107 -    // This ref is logically "held" by v->arch.shadow_table entry itself.
  4.1108 -    // Release the old ref.
  4.1109 -    //
  4.1110 -#if GUEST_PAGING_LEVELS == 3
  4.1111 -    // PAE guests do not (necessarily) use an entire page for their
  4.1112 -    // 4-entry L3s, so we have to deal with them specially.
  4.1113 -    //
  4.1114 -    // XXX - might want to revisit this if/when we do multiple compilation for
  4.1115 -    //       HVM-vs-PV guests, as PAE PV guests could get away without doing
  4.1116 -    //       subshadows.
  4.1117 -    //
  4.1118 -    sh_get_ref_l3_subshadow(v->arch.shadow_vtable, smfn);
  4.1119 -    sh_pin_l3_subshadow(v->arch.shadow_vtable, smfn);
  4.1120 +    /// 
  4.1121 +    /// v->arch.shadow.l3table
  4.1122 +    ///
  4.1123 +#if SHADOW_PAGING_LEVELS == 3
  4.1124 +        {
  4.1125 +            mfn_t smfn;
  4.1126 +            int i;
  4.1127 +            for ( i = 0; i < 4; i++ )
  4.1128 +            {
  4.1129 +#if GUEST_PAGING_LEVELS == 2
  4.1130 +                /* 2-on-3: make a PAE l3 that points at the four-page l2 */
  4.1131 +                smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[0]) + i);
  4.1132  #else
  4.1133 -    sh_get_ref(smfn, 0);
  4.1134 -    sh_pin(smfn);
  4.1135 +                /* 3-on-3: make a PAE l3 that points at the four l2 pages */
  4.1136 +                smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
  4.1137  #endif
  4.1138 -
  4.1139 -#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
  4.1140 -    // PAE 3-on-3 shadows have to keep track of which vcpu's are using
  4.1141 -    // which l3 subshadow, in order handle the SHADOW_SET_L3PAE_RECOPY
  4.1142 -    // case from validate_gl3e().  Search for SHADOW_SET_L3PAE_RECOPY
  4.1143 -    // in the code for more info.
  4.1144 -    //
  4.1145 -    {
  4.1146 -        struct pae_l3_bookkeeping *info =
  4.1147 -            sl3p_to_info(v->arch.shadow_vtable);
  4.1148 -        ASSERT(!test_bit(v->vcpu_id, &info->vcpus));
  4.1149 -        set_bit(v->vcpu_id, &info->vcpus);
  4.1150 -    }
  4.1151 -#endif
  4.1152 -
  4.1153 -    debugtrace_printk("%s cr3 gmfn=%05lx smfn=%05lx\n",
  4.1154 -                      __func__, gmfn, smfn);
  4.1155 +                v->arch.shadow.l3table[i] = 
  4.1156 +                    (mfn_x(smfn) == 0) 
  4.1157 +                    ? shadow_l3e_empty()
  4.1158 +                    : shadow_l3e_from_mfn(smfn, _PAGE_PRESENT);
  4.1159 +            }
  4.1160 +        }
  4.1161 +#endif /* SHADOW_PAGING_LEVELS == 3 */
  4.1162 +
  4.1163  
  4.1164      ///
  4.1165 -    /// v->arch.cr3 and, if appropriate, v->arch.hvm_vcpu.hw_cr3
  4.1166 +    /// v->arch.cr3
  4.1167      ///
  4.1168      if ( shadow_mode_external(d) )
  4.1169      {
  4.1170 -        ASSERT(hvm_guest(v));
  4.1171          make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
  4.1172 -
  4.1173 -#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
  4.1174 -#if SHADOW_PAGING_LEVELS != 3
  4.1175 -#error unexpected combination of GUEST and SHADOW paging levels
  4.1176 -#endif
  4.1177 -        /* 2-on-3: make a PAE l3 table that points at the four-page l2 */
  4.1178 -        {
  4.1179 -            mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table);
  4.1180 -            int i;
  4.1181 -
  4.1182 -            ASSERT(v->arch.hvm_vcpu.hw_cr3 ==
  4.1183 -                   virt_to_maddr(v->arch.hvm_vcpu.hvm_lowmem_l3tab));
  4.1184 -            for (i = 0; i < 4; i++)
  4.1185 -            {
  4.1186 -                v->arch.hvm_vcpu.hvm_lowmem_l3tab[i] =
  4.1187 -                    shadow_l3e_from_mfn(_mfn(mfn_x(smfn)+i), _PAGE_PRESENT);
  4.1188 -            }
  4.1189 -        }
  4.1190 -#elif (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
  4.1191 -        /* 3-on-3: copy the shadow l3 to slots that are below 4GB.
  4.1192 -         * If paging is disabled, clear l3e reserved bits; otherwise 
  4.1193 -         * remove entries that have reserved bits set. */
  4.1194 -        v->arch.hvm_vcpu.hw_cr3 =
  4.1195 -            hvm_pae_copy_root(v, v->arch.shadow_vtable, 
  4.1196 -                              !shadow_vcpu_mode_translate(v));
  4.1197 -#else
  4.1198 -        /* 2-on-2 or 4-on-4: just put the shadow top-level into cr3 */
  4.1199 -        v->arch.hvm_vcpu.hw_cr3 =
  4.1200 -            pagetable_get_paddr(v->arch.shadow_table);
  4.1201 -#endif
  4.1202      }
  4.1203      else // not shadow_mode_external...
  4.1204      {
  4.1205          /* We don't support PV except guest == shadow == config levels */
  4.1206          BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS);
  4.1207 -        make_cr3(v, pagetable_get_pfn(v->arch.shadow_table));
  4.1208 +#if SHADOW_PAGING_LEVELS == 3
  4.1209 +        /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated.
  4.1210 +         * Don't use make_cr3 because (a) we know it's below 4GB, and
  4.1211 +         * (b) it's not necessarily page-aligned, and make_cr3 takes a pfn */
  4.1212 +        ASSERT(virt_to_maddr(&v->arch.shadow.l3table) <= 0xffffffe0ULL);
  4.1213 +        v->arch.cr3 = virt_to_maddr(&v->arch.shadow.l3table);
  4.1214 +#else
  4.1215 +        /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */
  4.1216 +        make_cr3(v, pagetable_get_pfn(v->arch.shadow_table[0]));
  4.1217 +#endif
  4.1218 +    }
  4.1219 +
  4.1220 +
  4.1221 +    ///
  4.1222 +    /// v->arch.hvm_vcpu.hw_cr3
  4.1223 +    ///
  4.1224 +    if ( shadow_mode_external(d) )
  4.1225 +    {
  4.1226 +        ASSERT(hvm_guest(v));
  4.1227 +#if SHADOW_PAGING_LEVELS == 3
  4.1228 +        /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */
  4.1229 +        v->arch.hvm_vcpu.hw_cr3 = virt_to_maddr(&v->arch.shadow.l3table);
  4.1230 +#else
  4.1231 +        /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */
  4.1232 +        v->arch.hvm_vcpu.hw_cr3 = pagetable_get_paddr(v->arch.shadow_table[0]);
  4.1233 +#endif
  4.1234      }
  4.1235  
  4.1236      /* Fix up the linear pagetable mappings */
  4.1237 @@ -3950,7 +3567,6 @@ static int sh_guess_wrmap(struct vcpu *v
  4.1238  
  4.1239  
  4.1240      /* Carefully look in the shadow linear map for the l1e we expect */
  4.1241 -    if ( v->arch.shadow_vtable == NULL ) return 0;
  4.1242  #if GUEST_PAGING_LEVELS >= 4
  4.1243      sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
  4.1244      if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
  4.1245 @@ -3959,7 +3575,7 @@ static int sh_guess_wrmap(struct vcpu *v
  4.1246      if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
  4.1247          return 0;
  4.1248  #elif GUEST_PAGING_LEVELS == 3
  4.1249 -    sl3p = ((shadow_l3e_t *) v->arch.shadow_vtable) 
  4.1250 +    sl3p = ((shadow_l3e_t *) v->arch.shadow.l3table) 
  4.1251          + shadow_l3_linear_offset(vaddr);
  4.1252      if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
  4.1253          return 0;
  4.1254 @@ -3988,6 +3604,7 @@ int sh_remove_write_access(struct vcpu *
  4.1255      shadow_l1e_t *sl1e;
  4.1256      int done = 0;
  4.1257      int flags;
  4.1258 +    mfn_t base_sl1mfn = sl1mfn; /* Because sl1mfn changes in the foreach */
  4.1259      
  4.1260      SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
  4.1261      {
  4.1262 @@ -3997,6 +3614,10 @@ int sh_remove_write_access(struct vcpu *
  4.1263               && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(readonly_mfn)) )
  4.1264          {
  4.1265              shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn);
  4.1266 +#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC 
  4.1267 +            /* Remember the last shadow that we shot a writeable mapping in */
  4.1268 +            v->arch.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn);
  4.1269 +#endif
  4.1270              if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info
  4.1271                    & PGT_count_mask) == 0 )
  4.1272                  /* This breaks us cleanly out of the FOREACH macro */
  4.1273 @@ -4044,14 +3665,12 @@ void sh_clear_shadow_entry(struct vcpu *
  4.1274      case PGC_SH_l2h_shadow:
  4.1275  #endif
  4.1276          shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); break;
  4.1277 -#if GUEST_PAGING_LEVELS >= 3
  4.1278 +#if GUEST_PAGING_LEVELS >= 4
  4.1279      case PGC_SH_l3_shadow:
  4.1280          shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); break;
  4.1281 -#if GUEST_PAGING_LEVELS >= 4
  4.1282      case PGC_SH_l4_shadow:
  4.1283          shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); break;
  4.1284  #endif
  4.1285 -#endif
  4.1286      default: BUG(); /* Called with the wrong kind of shadow. */
  4.1287      }
  4.1288  }
  4.1289 @@ -4081,7 +3700,7 @@ int sh_remove_l1_shadow(struct vcpu *v, 
  4.1290      return done;
  4.1291  }
  4.1292  
  4.1293 -#if GUEST_PAGING_LEVELS >= 3
  4.1294 +#if GUEST_PAGING_LEVELS >= 4
  4.1295  int sh_remove_l2_shadow(struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn)
  4.1296  /* Remove all mappings of this l2 shadow from this l3 shadow */
  4.1297  {
  4.1298 @@ -4104,7 +3723,6 @@ int sh_remove_l2_shadow(struct vcpu *v, 
  4.1299      return done;
  4.1300  }
  4.1301  
  4.1302 -#if GUEST_PAGING_LEVELS >= 4
  4.1303  int sh_remove_l3_shadow(struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn)
  4.1304  /* Remove all mappings of this l3 shadow from this l4 shadow */
  4.1305  {
  4.1306 @@ -4127,7 +3745,6 @@ int sh_remove_l3_shadow(struct vcpu *v, 
  4.1307      return done;
  4.1308  }
  4.1309  #endif /* 64bit guest */ 
  4.1310 -#endif /* PAE guest */
  4.1311  
  4.1312  /**************************************************************************/
  4.1313  /* Handling HVM guest writes to pagetables  */
  4.1314 @@ -4448,7 +4065,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
  4.1315      return 0;
  4.1316  }
  4.1317  
  4.1318 -#if GUEST_PAGING_LEVELS >= 3
  4.1319 +#if GUEST_PAGING_LEVELS >= 4
  4.1320  int sh_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x)
  4.1321  {
  4.1322      guest_l3e_t *gl3e, *gp;
  4.1323 @@ -4486,9 +4103,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
  4.1324      sh_unmap_domain_page(gp);
  4.1325      return 0;
  4.1326  }
  4.1327 -#endif /* GUEST_PAGING_LEVELS >= 3 */
  4.1328 -
  4.1329 -#if GUEST_PAGING_LEVELS >= 4
  4.1330 +
  4.1331  int sh_audit_l4_table(struct vcpu *v, mfn_t sl4mfn, mfn_t x)
  4.1332  {
  4.1333      guest_l4e_t *gl4e, *gp;
     5.1 --- a/xen/arch/x86/mm/shadow/multi.h	Wed Oct 18 13:43:35 2006 +0100
     5.2 +++ b/xen/arch/x86/mm/shadow/multi.h	Wed Oct 18 14:36:20 2006 +0100
     5.3 @@ -50,10 +50,6 @@ extern void
     5.4  SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
     5.5      struct vcpu *v, mfn_t smfn);
     5.6  
     5.7 -extern void
     5.8 -SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows, 3, 3)
     5.9 -    (struct vcpu *v, mfn_t smfn);
    5.10 -
    5.11  extern void 
    5.12  SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
    5.13      (struct vcpu *v, mfn_t sl2mfn);
     6.1 --- a/xen/arch/x86/mm/shadow/private.h	Wed Oct 18 13:43:35 2006 +0100
     6.2 +++ b/xen/arch/x86/mm/shadow/private.h	Wed Oct 18 14:36:20 2006 +0100
     6.3 @@ -178,77 +178,6 @@ extern void shadow_audit_p2m(struct doma
     6.4  
     6.5  
     6.6  /******************************************************************************
     6.7 - * Mechanism for double-checking the optimized pagefault path: this
     6.8 - * structure contains a record of actions taken by the fault handling
     6.9 - * code.  In paranoid mode, the fast-path code fills out one of these
    6.10 - * structures (but doesn't take any actual action) and then the normal 
    6.11 - * path fills in another.  When the fault handler finishes, the 
    6.12 - * two are compared */
    6.13 -
    6.14 -#ifdef SHADOW_OPTIMIZATION_PARANOIA
    6.15 -
    6.16 -typedef struct shadow_action_log sh_log_t;
    6.17 -struct shadow_action_log {
    6.18 -    paddr_t ad[CONFIG_PAGING_LEVELS];  /* A & D bits propagated here */
    6.19 -    paddr_t mmio;                      /* Address of an mmio operation */
    6.20 -    int rv;                            /* Result of the fault handler */
    6.21 -};
    6.22 -
    6.23 -/* There are two logs, one for the fast path, one for the normal path */
    6.24 -enum sh_log_type { log_slow = 0, log_fast= 1 };
    6.25 -
    6.26 -/* Alloc and zero the logs */
    6.27 -static inline void sh_init_log(struct vcpu *v) 
    6.28 -{
    6.29 -    if ( unlikely(!v->arch.shadow.action_log) ) 
    6.30 -        v->arch.shadow.action_log = xmalloc_array(sh_log_t, 2);
    6.31 -    ASSERT(v->arch.shadow.action_log);
    6.32 -    memset(v->arch.shadow.action_log, 0, 2 * sizeof (sh_log_t));
    6.33 -}
    6.34 -
    6.35 -/* Log an A&D-bit update */
    6.36 -static inline void sh_log_ad(struct vcpu *v, paddr_t e, unsigned int level)
    6.37 -{
    6.38 -    v->arch.shadow.action_log[v->arch.shadow.action_index].ad[level] = e;
    6.39 -}
    6.40 -
    6.41 -/* Log an MMIO address */
    6.42 -static inline void sh_log_mmio(struct vcpu *v, paddr_t m)
    6.43 -{
    6.44 -    v->arch.shadow.action_log[v->arch.shadow.action_index].mmio = m;
    6.45 -}
    6.46 -
    6.47 -/* Log the result */
    6.48 -static inline void sh_log_rv(struct vcpu *v, int rv)
    6.49 -{
    6.50 -    v->arch.shadow.action_log[v->arch.shadow.action_index].rv = rv;
    6.51 -}
    6.52 -
    6.53 -/* Set which mode we're in */
    6.54 -static inline void sh_set_log_mode(struct vcpu *v, enum sh_log_type t) 
    6.55 -{
    6.56 -    v->arch.shadow.action_index = t;
    6.57 -}
    6.58 -
    6.59 -/* Know not to take action, because we're only checking the mechanism */
    6.60 -static inline int sh_take_no_action(struct vcpu *v) 
    6.61 -{
    6.62 -    return (v->arch.shadow.action_index == log_fast);
    6.63 -}
    6.64 -
    6.65 -#else /* Non-paranoid mode: these logs do not exist */
    6.66 -
    6.67 -#define sh_init_log(_v) do { (void)(_v); } while(0)
    6.68 -#define sh_set_log_mode(_v,_t) do { (void)(_v); } while(0)
    6.69 -#define sh_log_ad(_v,_e,_l) do { (void)(_v),(void)(_e),(void)(_l); } while (0)
    6.70 -#define sh_log_mmio(_v,_m) do { (void)(_v),(void)(_m); } while (0)
    6.71 -#define sh_log_rv(_v,_r) do { (void)(_v),(void)(_r); } while (0)
    6.72 -#define sh_take_no_action(_v) (((void)(_v)), 0)
    6.73 -
    6.74 -#endif /* SHADOW_OPTIMIZATION_PARANOIA */
    6.75 -
    6.76 -
    6.77 -/******************************************************************************
    6.78   * Macro for dealing with the naming of the internal names of the
    6.79   * shadow code's external entry points.
    6.80   */
    6.81 @@ -336,13 +265,9 @@ void shadow_convert_to_log_dirty(struct 
    6.82   * non-Xen mappings in this top-level shadow mfn */
    6.83  void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn);
    6.84  
    6.85 -/* Re-sync copies of PAE shadow L3 tables if they have been changed */
    6.86 -void sh_pae_recopy(struct domain *d);
    6.87 -
    6.88  /* Install the xen mappings in various flavours of shadow */
    6.89  void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn);
    6.90  void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn);
    6.91 -void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn);
    6.92  void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn);
    6.93  
    6.94  
     7.1 --- a/xen/arch/x86/mm/shadow/types.h	Wed Oct 18 13:43:35 2006 +0100
     7.2 +++ b/xen/arch/x86/mm/shadow/types.h	Wed Oct 18 14:36:20 2006 +0100
     7.3 @@ -215,8 +215,7 @@ static inline shadow_l4e_t shadow_l4e_fr
     7.4       shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START)); \
     7.5  })
     7.6  
     7.7 -// shadow linear L3 and L4 tables only exist in 4 level paging...
     7.8 -#if SHADOW_PAGING_LEVELS == 4
     7.9 +#if SHADOW_PAGING_LEVELS >= 4
    7.10  #define sh_linear_l3_table(v) ({ \
    7.11      ASSERT(current == (v)); \
    7.12      ((shadow_l3e_t *) \
    7.13 @@ -386,7 +385,6 @@ static inline guest_l4e_t guest_l4e_from
    7.14  #define PGC_SH_fl1_shadow PGC_SH_fl1_pae_shadow
    7.15  #define PGC_SH_l2_shadow  PGC_SH_l2_pae_shadow
    7.16  #define PGC_SH_l2h_shadow PGC_SH_l2h_pae_shadow
    7.17 -#define PGC_SH_l3_shadow  PGC_SH_l3_pae_shadow
    7.18  #else
    7.19  #define PGC_SH_l1_shadow  PGC_SH_l1_64_shadow
    7.20  #define PGC_SH_fl1_shadow PGC_SH_fl1_64_shadow
    7.21 @@ -405,14 +403,6 @@ valid_gfn(gfn_t m)
    7.22      return VALID_GFN(gfn_x(m));
    7.23  }
    7.24  
    7.25 -#if GUEST_PAGING_LEVELS == 2
    7.26 -#define PGC_SH_guest_root_type PGC_SH_l2_32_shadow
    7.27 -#elif GUEST_PAGING_LEVELS == 3
    7.28 -#define PGC_SH_guest_root_type PGC_SH_l3_pae_shadow
    7.29 -#else
    7.30 -#define PGC_SH_guest_root_type PGC_SH_l4_64_shadow
    7.31 -#endif
    7.32 -
    7.33  /* Translation between mfns and gfns */
    7.34  static inline mfn_t
    7.35  vcpu_gfn_to_mfn(struct vcpu *v, gfn_t gfn)
    7.36 @@ -490,8 +480,6 @@ struct shadow_walk_t
    7.37  #define sh_map_and_validate_gl1e   INTERNAL_NAME(sh_map_and_validate_gl1e)
    7.38  #define sh_destroy_l4_shadow       INTERNAL_NAME(sh_destroy_l4_shadow)
    7.39  #define sh_destroy_l3_shadow       INTERNAL_NAME(sh_destroy_l3_shadow)
    7.40 -#define sh_destroy_l3_subshadow    INTERNAL_NAME(sh_destroy_l3_subshadow)
    7.41 -#define sh_unpin_all_l3_subshadows INTERNAL_NAME(sh_unpin_all_l3_subshadows)
    7.42  #define sh_destroy_l2_shadow       INTERNAL_NAME(sh_destroy_l2_shadow)
    7.43  #define sh_destroy_l1_shadow       INTERNAL_NAME(sh_destroy_l1_shadow)
    7.44  #define sh_unhook_32b_mappings     INTERNAL_NAME(sh_unhook_32b_mappings)
    7.45 @@ -533,115 +521,6 @@ struct shadow_walk_t
    7.46                                SHADOW_PAGING_LEVELS)
    7.47  
    7.48  
    7.49 -#if GUEST_PAGING_LEVELS == 3
    7.50 -/*
    7.51 - * Accounting information stored in the shadow of PAE Guest L3 pages.
    7.52 - * Because these "L3 pages" are only 32-bytes, it is inconvenient to keep
    7.53 - * various refcounts, etc., on the page_info of their page.  We provide extra
    7.54 - * bookkeeping space in the shadow itself, and this is the structure
    7.55 - * definition for that bookkeeping information.
    7.56 - */
    7.57 -struct pae_l3_bookkeeping {
    7.58 -    u32 vcpus;                  /* bitmap of which vcpus are currently storing
    7.59 -                                 * copies of this 32-byte page */
    7.60 -    u32 refcount;               /* refcount for this 32-byte page */
    7.61 -    u8 pinned;                  /* is this 32-byte page pinned or not? */
    7.62 -};
    7.63 -
    7.64 -// Convert a shadow entry pointer into a pae_l3_bookkeeping pointer.
    7.65 -#define sl3p_to_info(_ptr) ((struct pae_l3_bookkeeping *)         \
    7.66 -                            (((unsigned long)(_ptr) & ~31) + 32))
    7.67 -
    7.68 -static void sh_destroy_l3_subshadow(struct vcpu *v, 
    7.69 -                                     shadow_l3e_t *sl3e);
    7.70 -
    7.71 -/* Increment a subshadow ref
    7.72 - * Called with a pointer to the subshadow, and the mfn of the
    7.73 - * *first* page of the overall shadow. */
    7.74 -static inline void sh_get_ref_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
    7.75 -{
    7.76 -    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
    7.77 -
    7.78 -    /* First ref to the subshadow takes a ref to the full shadow */
    7.79 -    if ( bk->refcount == 0 ) 
    7.80 -        sh_get_ref(smfn, 0);
    7.81 -    if ( unlikely(++(bk->refcount) == 0) )
    7.82 -    {
    7.83 -        SHADOW_PRINTK("shadow l3 subshadow ref overflow, smfn=%" SH_PRI_mfn " sh=%p\n", 
    7.84 -                       mfn_x(smfn), sl3e);
    7.85 -        domain_crash_synchronous();
    7.86 -    }
    7.87 -}
    7.88 -
    7.89 -/* Decrement a subshadow ref.
    7.90 - * Called with a pointer to the subshadow, and the mfn of the
    7.91 - * *first* page of the overall shadow.  Calling this may cause the 
    7.92 - * entire shadow to disappear, so the caller must immediately unmap 
    7.93 - * the pointer after calling. */ 
    7.94 -static inline void sh_put_ref_l3_subshadow(struct vcpu *v, 
    7.95 -                                            shadow_l3e_t *sl3e,
    7.96 -                                            mfn_t smfn)
    7.97 -{
    7.98 -    struct pae_l3_bookkeeping *bk;
    7.99 -
   7.100 -    bk = sl3p_to_info(sl3e);
   7.101 -
   7.102 -    ASSERT(bk->refcount > 0);
   7.103 -    if ( --(bk->refcount) == 0 )
   7.104 -    {
   7.105 -        /* Need to destroy this subshadow */
   7.106 -        sh_destroy_l3_subshadow(v, sl3e);
   7.107 -        /* Last ref to the subshadow had a ref to the full shadow */
   7.108 -        sh_put_ref(v, smfn, 0);
   7.109 -    }
   7.110 -}
   7.111 -
   7.112 -/* Pin a subshadow 
   7.113 - * Called with a pointer to the subshadow, and the mfn of the
   7.114 - * *first* page of the overall shadow. */
   7.115 -static inline void sh_pin_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
   7.116 -{
   7.117 -    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
   7.118 -
   7.119 -#if 0
   7.120 -    debugtrace_printk("%s smfn=%05lx offset=%ld\n",
   7.121 -                      __func__, mfn_x(smfn),
   7.122 -                      ((unsigned long)sl3e & ~PAGE_MASK) / 64);
   7.123 -#endif
   7.124 -
   7.125 -    if ( !bk->pinned )
   7.126 -    {
   7.127 -        bk->pinned = 1;
   7.128 -        sh_get_ref_l3_subshadow(sl3e, smfn);
   7.129 -    }
   7.130 -}
   7.131 -
   7.132 -/* Unpin a sub-shadow. 
   7.133 - * Called with a pointer to the subshadow, and the mfn of the
   7.134 - * *first* page of the overall shadow.  Calling this may cause the 
   7.135 - * entire shadow to disappear, so the caller must immediately unmap 
   7.136 - * the pointer after calling. */ 
   7.137 -static inline void sh_unpin_l3_subshadow(struct vcpu *v, 
   7.138 -                                          shadow_l3e_t *sl3e,
   7.139 -                                          mfn_t smfn)
   7.140 -{
   7.141 -    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
   7.142 -
   7.143 -#if 0
   7.144 -    debugtrace_printk("%s smfn=%05lx offset=%ld\n",
   7.145 -                      __func__, mfn_x(smfn),
   7.146 -                      ((unsigned long)sl3e & ~PAGE_MASK) / 64);
   7.147 -#endif
   7.148 -
   7.149 -    if ( bk->pinned )
   7.150 -    {
   7.151 -        bk->pinned = 0;
   7.152 -        sh_put_ref_l3_subshadow(v, sl3e, smfn);
   7.153 -    }
   7.154 -}
   7.155 -
   7.156 -#endif /* GUEST_PAGING_LEVELS == 3 */
   7.157 -
   7.158  #if SHADOW_PAGING_LEVELS == 3
   7.159  #define MFN_FITS_IN_HVM_CR3(_MFN) !(mfn_x(_MFN) >> 20)
   7.160  #endif
     8.1 --- a/xen/include/asm-x86/domain.h	Wed Oct 18 13:43:35 2006 +0100
     8.2 +++ b/xen/include/asm-x86/domain.h	Wed Oct 18 14:36:20 2006 +0100
     8.3 @@ -134,18 +134,20 @@ struct pae_l3_cache { };
     8.4  #endif
     8.5  
     8.6  struct shadow_vcpu {
     8.7 +#if CONFIG_PAGING_LEVELS >= 3
     8.8 +    /* PAE guests: per-vcpu shadow top-level table */
     8.9 +    l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
    8.10 +#endif
    8.11      /* Pointers to mode-specific entry points. */
    8.12      struct shadow_paging_mode *mode;
    8.13      /* Last MFN that we emulated a write to. */
    8.14      unsigned long last_emulated_mfn;
    8.15 +    /* MFN of the last shadow that we shot a writeable mapping in */
    8.16 +    unsigned long last_writeable_pte_smfn;
    8.17      /* HVM guest: paging enabled (CR0.PG)?  */
    8.18      unsigned int translate_enabled:1;
    8.19      /* Emulated fault needs to be propagated to guest? */
    8.20      unsigned int propagate_fault:1;
    8.21 -#if CONFIG_PAGING_LEVELS >= 3
    8.22 -    /* Shadow update requires this PAE cpu to recopy/install its L3 table. */
    8.23 -    unsigned int pae_flip_pending:1;
    8.24 -#endif
    8.25  };
    8.26  
    8.27  struct arch_vcpu
    8.28 @@ -190,13 +192,12 @@ struct arch_vcpu
    8.29      pagetable_t guest_table;            /* (MFN) guest notion of cr3 */
    8.30      /* guest_table holds a ref to the page, and also a type-count unless
    8.31       * shadow refcounts are in use */
    8.32 -    pagetable_t shadow_table;           /* (MFN) shadow of guest */
    8.33 +    pagetable_t shadow_table[4];        /* (MFN) shadow(s) of guest */
    8.34      pagetable_t monitor_table;          /* (MFN) hypervisor PT (for HVM) */
    8.35      unsigned long cr3;           	    /* (MA) value to install in HW CR3 */
    8.36  
    8.37 -    void *guest_vtable;                 /* virtual address of pagetable */
    8.38 -    void *shadow_vtable;                /* virtual address of shadow_table */
    8.39 -    root_pgentry_t *monitor_vtable;		/* virtual address of monitor_table */
    8.40 +    void *guest_vtable;                 /* virtual addr of pagetable */
    8.41 +    root_pgentry_t *monitor_vtable;		/* virtual addr of monitor_table */
    8.42  
    8.43      /* Current LDT details. */
    8.44      unsigned long shadow_ldt_mapcnt;
     9.1 --- a/xen/include/asm-x86/hvm/vcpu.h	Wed Oct 18 13:43:35 2006 +0100
     9.2 +++ b/xen/include/asm-x86/hvm/vcpu.h	Wed Oct 18 14:36:20 2006 +0100
     9.3 @@ -41,11 +41,6 @@ struct hvm_vcpu {
     9.4  
     9.5      int                 xen_port;
     9.6  
     9.7 -#if CONFIG_PAGING_LEVELS >= 3
     9.8 -    l3_pgentry_t hvm_lowmem_l3tab[4]
     9.9 -    __attribute__((__aligned__(32)));
    9.10 -#endif
    9.11 -
    9.12      /* Flags */
    9.13      int                 flag_dr_dirty;
    9.14  
    10.1 --- a/xen/include/asm-x86/mm.h	Wed Oct 18 13:43:35 2006 +0100
    10.2 +++ b/xen/include/asm-x86/mm.h	Wed Oct 18 14:36:20 2006 +0100
    10.3 @@ -114,15 +114,14 @@ struct page_info
    10.4  #define PGC_SH_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */
    10.5  #define PGC_SH_l2_pae_shadow  (6U<<28) /* shadowing a pae L2-low page */
    10.6  #define PGC_SH_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */
    10.7 -#define PGC_SH_l3_pae_shadow  (8U<<28) /* shadowing a pae L3 page */
    10.8 -#define PGC_SH_l1_64_shadow   (9U<<28) /* shadowing a 64-bit L1 page */
    10.9 -#define PGC_SH_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */
   10.10 -#define PGC_SH_l2_64_shadow  (11U<<28) /* shadowing a 64-bit L2 page */
   10.11 -#define PGC_SH_l3_64_shadow  (12U<<28) /* shadowing a 64-bit L3 page */
   10.12 -#define PGC_SH_l4_64_shadow  (13U<<28) /* shadowing a 64-bit L4 page */
   10.13 -#define PGC_SH_max_shadow    (13U<<28)
   10.14 -#define PGC_SH_p2m_table     (14U<<28) /* in use as the p2m table */
   10.15 -#define PGC_SH_monitor_table (15U<<28) /* in use as a monitor table */
   10.16 +#define PGC_SH_l1_64_shadow   (8U<<28) /* shadowing a 64-bit L1 page */
   10.17 +#define PGC_SH_fl1_64_shadow  (9U<<28) /* L1 shadow for 64-bit 2M superpg */
   10.18 +#define PGC_SH_l2_64_shadow  (10U<<28) /* shadowing a 64-bit L2 page */
   10.19 +#define PGC_SH_l3_64_shadow  (11U<<28) /* shadowing a 64-bit L3 page */
   10.20 +#define PGC_SH_l4_64_shadow  (12U<<28) /* shadowing a 64-bit L4 page */
   10.21 +#define PGC_SH_max_shadow    (12U<<28)
   10.22 +#define PGC_SH_p2m_table     (13U<<28) /* in use as the p2m table */
   10.23 +#define PGC_SH_monitor_table (14U<<28) /* in use as a monitor table */
   10.24  #define PGC_SH_unused        (15U<<28)
   10.25  
   10.26  #define PGC_SH_type_mask     (15U<<28)
    11.1 --- a/xen/include/asm-x86/perfc_defn.h	Wed Oct 18 13:43:35 2006 +0100
    11.2 +++ b/xen/include/asm-x86/perfc_defn.h	Wed Oct 18 14:36:20 2006 +0100
    11.3 @@ -71,6 +71,7 @@ PERFCOUNTER_CPU(shadow_writeable_h_1,  "
    11.4  PERFCOUNTER_CPU(shadow_writeable_h_2,  "shadow writeable: 32pae w2k3")
    11.5  PERFCOUNTER_CPU(shadow_writeable_h_3,  "shadow writeable: 64b w2k3")
    11.6  PERFCOUNTER_CPU(shadow_writeable_h_4,  "shadow writeable: 32b linux low")
    11.7 +PERFCOUNTER_CPU(shadow_writeable_h_5,  "shadow writeable: 32b linux high")
    11.8  PERFCOUNTER_CPU(shadow_writeable_bf,   "shadow writeable brute-force")
    11.9  PERFCOUNTER_CPU(shadow_mappings,       "shadow removes all mappings")
   11.10  PERFCOUNTER_CPU(shadow_mappings_bf,    "shadow rm-mappings brute-force")
    12.1 --- a/xen/include/asm-x86/shadow.h	Wed Oct 18 13:43:35 2006 +0100
    12.2 +++ b/xen/include/asm-x86/shadow.h	Wed Oct 18 14:36:20 2006 +0100
    12.3 @@ -72,7 +72,6 @@
    12.4  #define SHADOW_SET_CHANGED            0x1
    12.5  #define SHADOW_SET_FLUSH              0x2
    12.6  #define SHADOW_SET_ERROR              0x4
    12.7 -#define SHADOW_SET_L3PAE_RECOPY       0x8
    12.8  
    12.9  // How do we tell that we have a 32-bit PV guest in a 64-bit Xen?
   12.10  #ifdef __x86_64__
   12.11 @@ -406,7 +405,6 @@ shadow_update_cr3(struct vcpu *v)
   12.12   * for HVM guests, arch.monitor_table and hvm's guest CR3.
   12.13   *
   12.14   * Update ref counts to shadow tables appropriately.
   12.15 - * For PAE, relocate L3 entries, if necessary, into low memory.
   12.16   */
   12.17  static inline void update_cr3(struct vcpu *v)
   12.18  {
   12.19 @@ -549,13 +547,13 @@ shadow_remove_all_shadows_and_parents(st
   12.20   * Unshadow it, and recursively unshadow pages that reference it. */
   12.21  
   12.22  /* Remove all shadows of the guest mfn. */
   12.23 -extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int all);
   12.24 +extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
   12.25  static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
   12.26  {
   12.27      int was_locked = shadow_lock_is_acquired(v->domain);
   12.28      if ( !was_locked )
   12.29          shadow_lock(v->domain);
   12.30 -    sh_remove_shadows(v, gmfn, 1);
   12.31 +    sh_remove_shadows(v, gmfn, 0, 1);
   12.32      if ( !was_locked )
   12.33          shadow_unlock(v->domain);
   12.34  }
   12.35 @@ -587,7 +585,6 @@ shadow_guest_physmap_remove_page(struct 
   12.36  #define SHF_FL1_PAE (1u << PGC_SH_type_to_index(PGC_SH_fl1_pae_shadow))
   12.37  #define SHF_L2_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l2_pae_shadow))
   12.38  #define SHF_L2H_PAE (1u << PGC_SH_type_to_index(PGC_SH_l2h_pae_shadow))
   12.39 -#define SHF_L3_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l3_pae_shadow))
   12.40  #define SHF_L1_64   (1u << PGC_SH_type_to_index(PGC_SH_l1_64_shadow))
   12.41  #define SHF_FL1_64  (1u << PGC_SH_type_to_index(PGC_SH_fl1_64_shadow))
   12.42  #define SHF_L2_64   (1u << PGC_SH_type_to_index(PGC_SH_l2_64_shadow))