direct-io.hg

changeset 7776:776ef2fb6fd8

Merged.
author emellor@leeni.uk.xensource.com
date Fri Nov 11 21:59:33 2005 +0100 (2005-11-11)
parents bf7c16e761fc 995e94c4802e
children fa237e03d3e7
files
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c	Fri Nov 11 21:59:05 2005 +0100
     1.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c	Fri Nov 11 21:59:33 2005 +0100
     1.3 @@ -398,14 +398,14 @@ void make_page_readonly(void *va)
     1.4  {
     1.5  	pte_t *pte = virt_to_ptep(va);
     1.6  	set_pte(pte, pte_wrprotect(*pte));
     1.7 -	if ( (unsigned long)va >= (unsigned long)high_memory )
     1.8 -	{
     1.9 -		unsigned long phys;
    1.10 -		phys = machine_to_phys(*(unsigned long *)pte & PAGE_MASK);
    1.11 +	if ((unsigned long)va >= (unsigned long)high_memory) {
    1.12 +		unsigned long pfn; 
    1.13 +		pfn = pte_pfn(*pte); 
    1.14  #ifdef CONFIG_HIGHMEM
    1.15 -		if ( (phys >> PAGE_SHIFT) < highstart_pfn )
    1.16 +		if (pfn < highstart_pfn)
    1.17  #endif
    1.18 -			make_lowmem_page_readonly(phys_to_virt(phys));
    1.19 +			make_lowmem_page_readonly(
    1.20 +				phys_to_virt(pfn << PAGE_SHIFT)); 
    1.21  	}
    1.22  }
    1.23  
    1.24 @@ -413,21 +413,20 @@ void make_page_writable(void *va)
    1.25  {
    1.26  	pte_t *pte = virt_to_ptep(va);
    1.27  	set_pte(pte, pte_mkwrite(*pte));
    1.28 -	if ( (unsigned long)va >= (unsigned long)high_memory )
    1.29 -	{
    1.30 -		unsigned long phys;
    1.31 -		phys = machine_to_phys(*(unsigned long *)pte & PAGE_MASK);
    1.32 +	if ((unsigned long)va >= (unsigned long)high_memory) {
    1.33 +		unsigned long pfn; 
    1.34 +		pfn = pte_pfn(*pte); 
    1.35  #ifdef CONFIG_HIGHMEM
    1.36 -		if ( (phys >> PAGE_SHIFT) < highstart_pfn )
    1.37 +		if (pfn < highstart_pfn)
    1.38  #endif
    1.39 -			make_lowmem_page_writable(phys_to_virt(phys));
    1.40 +			make_lowmem_page_writable(
    1.41 +				phys_to_virt(pfn << PAGE_SHIFT)); 
    1.42  	}
    1.43  }
    1.44  
    1.45  void make_pages_readonly(void *va, unsigned int nr)
    1.46  {
    1.47 -	while ( nr-- != 0 )
    1.48 -	{
    1.49 +	while (nr-- != 0) {
    1.50  		make_page_readonly(va);
    1.51  		va = (void *)((unsigned long)va + PAGE_SIZE);
    1.52  	}
    1.53 @@ -435,8 +434,7 @@ void make_pages_readonly(void *va, unsig
    1.54  
    1.55  void make_pages_writable(void *va, unsigned int nr)
    1.56  {
    1.57 -	while ( nr-- != 0 )
    1.58 -	{
    1.59 +	while (nr-- != 0) {
    1.60  		make_page_writable(va);
    1.61  		va = (void *)((unsigned long)va + PAGE_SIZE);
    1.62  	}
     2.1 --- a/xen/arch/x86/shadow.c	Fri Nov 11 21:59:05 2005 +0100
     2.2 +++ b/xen/arch/x86/shadow.c	Fri Nov 11 21:59:33 2005 +0100
     2.3 @@ -22,7 +22,7 @@
     2.4   * Jun Nakajima <jun.nakajima@intel.com>
     2.5   * Chengyuan Li <chengyuan.li@intel.com>
     2.6   *
     2.7 - * Extended to support 64-bit guests.
     2.8 + * Extended to support 32-bit PAE and 64-bit guests.
     2.9   */
    2.10  
    2.11  #include <xen/config.h>
    2.12 @@ -34,6 +34,7 @@
    2.13  #include <xen/event.h>
    2.14  #include <xen/sched.h>
    2.15  #include <xen/trace.h>
    2.16 +#include <asm/shadow_64.h>
    2.17  
    2.18  extern void free_shadow_pages(struct domain *d);
    2.19  
    2.20 @@ -44,13 +45,13 @@ static void mark_shadows_as_reflecting_s
    2.21  #endif
    2.22  
    2.23  #if CONFIG_PAGING_LEVELS == 3
    2.24 -#include <asm/shadow_64.h>
    2.25  static unsigned long shadow_l3_table(
    2.26      struct domain *d, unsigned long gpfn, unsigned long gmfn);
    2.27 +static inline void validate_bl2e_change( struct domain *d,
    2.28 +    guest_root_pgentry_t *new_gle_p, pgentry_64_t *shadow_l3, int index);
    2.29  #endif
    2.30  
    2.31  #if CONFIG_PAGING_LEVELS == 4
    2.32 -#include <asm/shadow_64.h>
    2.33  static unsigned long shadow_l4_table(
    2.34      struct domain *d, unsigned long gpfn, unsigned long gmfn);
    2.35  static void shadow_map_into_current(struct vcpu *v,
    2.36 @@ -222,7 +223,7 @@ alloc_shadow_page(struct domain *d,
    2.37          {
    2.38              if (d->arch.ops->guest_paging_levels == PAGING_L2)
    2.39              {
    2.40 -#if CONFIG_PAGING_LEVELS >= 4
    2.41 +#if CONFIG_PAGING_LEVELS >= 3
    2.42                  /* For 32-bit VMX guest, 2 shadow L1s to simulate 1 guest L1
    2.43                   * So need allocate 2 continues shadow L1 each time.
    2.44                   */
    2.45 @@ -313,6 +314,8 @@ alloc_shadow_page(struct domain *d,
    2.46              goto fail;
    2.47          perfc_incr(shadow_l3_pages);
    2.48          d->arch.shadow_page_count++;
    2.49 +        if ( PGT_l3_page_table == PGT_root_page_table )
    2.50 +            pin = 1;
    2.51          break;
    2.52  
    2.53      case PGT_l4_shadow:
    2.54 @@ -375,7 +378,7 @@ fail:
    2.55      {
    2.56          if (d->arch.ops->guest_paging_levels == PAGING_L2)
    2.57          {
    2.58 -#if CONFIG_PAGING_LEVELS >=4
    2.59 +#if CONFIG_PAGING_LEVELS >=3
    2.60              free_domheap_pages(page, SL1_ORDER);
    2.61  #else
    2.62              free_domheap_page(page);
    2.63 @@ -427,14 +430,10 @@ shadow_hl2_table(struct domain *d, unsig
    2.64  
    2.65      hl2 = map_domain_page(hl2mfn);
    2.66  
    2.67 -#ifdef __i386__
    2.68      if ( shadow_mode_external(d) )
    2.69          limit = L2_PAGETABLE_ENTRIES;
    2.70      else
    2.71          limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
    2.72 -#else
    2.73 -    limit = 0; /* XXX x86/64 XXX */
    2.74 -#endif
    2.75  
    2.76      memset(hl2, 0, limit * sizeof(l1_pgentry_t));
    2.77  
    2.78 @@ -540,7 +539,7 @@ static unsigned long shadow_l2_table(
    2.79      SH_VLOG("shadow_l2_table(%lx -> %lx)", gmfn, smfn);
    2.80      return smfn;
    2.81  }
    2.82 -#endif
    2.83 +#endif /* CONFIG_PAGING_LEVELS == 2 */
    2.84  
    2.85  static void shadow_map_l1_into_current_l2(unsigned long va)
    2.86  {
    2.87 @@ -549,7 +548,7 @@ static void shadow_map_l1_into_current_l
    2.88      l1_pgentry_t *spl1e;
    2.89      l2_pgentry_t sl2e;
    2.90      guest_l1_pgentry_t *gpl1e;
    2.91 -    guest_l2_pgentry_t gl2e;
    2.92 +    guest_l2_pgentry_t gl2e = {0};
    2.93      unsigned long gl1pfn, gl1mfn, sl1mfn;
    2.94      int i, init_table = 0;
    2.95  
    2.96 @@ -593,14 +592,14 @@ static void shadow_map_l1_into_current_l
    2.97      ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) );
    2.98  #endif
    2.99  
   2.100 -#if CONFIG_PAGING_LEVELS >=4
   2.101 +#if CONFIG_PAGING_LEVELS >=3
   2.102      if (d->arch.ops->guest_paging_levels == PAGING_L2)
   2.103      {
   2.104 -        /* for 32-bit VMX guest on 64-bit host,
   2.105 +        /* for 32-bit VMX guest on 64-bit or PAE host,
   2.106           * need update two L2 entries each time
   2.107           */
   2.108          if ( !get_shadow_ref(sl1mfn))
   2.109 -                BUG();
   2.110 +            BUG();
   2.111          l2pde_general(d, &gl2e, &sl2e, sl1mfn);
   2.112          __guest_set_l2e(v, va, &gl2e);
   2.113          __shadow_set_l2e(v, va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1), &sl2e);
   2.114 @@ -625,19 +624,17 @@ static void shadow_map_l1_into_current_l
   2.115          int index = guest_l1_table_offset(va);
   2.116          int min = 1, max = 0;
   2.117  
   2.118 -        unsigned long entries, pt_va;
   2.119 -        l1_pgentry_t tmp_sl1e;
   2.120 -        guest_l1_pgentry_t tmp_gl1e;//Prepare for double compile
   2.121 -
   2.122 -
   2.123 -        entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t);
   2.124 -        pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT;
   2.125 -        gpl1e = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gl1e);
   2.126 +        unsigned long tmp_gmfn;
   2.127 +        l2_pgentry_t tmp_sl2e = {0};
   2.128 +        guest_l2_pgentry_t tmp_gl2e = {0};
   2.129 +
   2.130 +        __guest_get_l2e(v, va, &tmp_gl2e);
   2.131 +        tmp_gmfn = __gpfn_to_mfn(d, l2e_get_pfn(tmp_gl2e));
   2.132 +        gpl1e = (guest_l1_pgentry_t *) map_domain_page(tmp_gmfn);
   2.133  
   2.134          /* If the PGT_l1_shadow has two continual pages */
   2.135 -        entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t); //1024 entry!!!
   2.136 -        pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT;
   2.137 -        spl1e = (l1_pgentry_t *) __shadow_get_l1e(v, pt_va, &tmp_sl1e);
   2.138 +        __shadow_get_l2e(v, va, &tmp_sl2e);
   2.139 +        spl1e = (l1_pgentry_t *) map_domain_page(l2e_get_pfn(tmp_sl2e));
   2.140  
   2.141          for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ )
   2.142          {
   2.143 @@ -662,10 +659,13 @@ static void shadow_map_l1_into_current_l
   2.144              if ( likely(i > max) )
   2.145                  max = i;
   2.146              set_guest_back_ptr(d, sl1e, sl1mfn, i);
   2.147 -          }
   2.148 +        }
   2.149  
   2.150          frame_table[sl1mfn].tlbflush_timestamp =
   2.151              SHADOW_ENCODE_MIN_MAX(min, max);
   2.152 +
   2.153 +        unmap_domain_page(gpl1e);
   2.154 +        unmap_domain_page(spl1e);
   2.155      }
   2.156  }
   2.157  
   2.158 @@ -674,7 +674,7 @@ shadow_set_l1e(unsigned long va, l1_pgen
   2.159  {
   2.160      struct vcpu *v = current;
   2.161      struct domain *d = v->domain;
   2.162 -    l2_pgentry_t sl2e;
   2.163 +    l2_pgentry_t sl2e = {0};
   2.164  
   2.165      __shadow_get_l2e(v, va, &sl2e);
   2.166      if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
   2.167 @@ -690,11 +690,23 @@ shadow_set_l1e(unsigned long va, l1_pgen
   2.168          }
   2.169          else /* check to see if it exists; if so, link it in */
   2.170          {
   2.171 -            l2_pgentry_t gpde = linear_l2_table(v)[l2_table_offset(va)];
   2.172 -            unsigned long gl1pfn = l2e_get_pfn(gpde);
   2.173 -            unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
   2.174 -
   2.175 -            ASSERT( l2e_get_flags(gpde) & _PAGE_PRESENT );
   2.176 +            l2_pgentry_t gpde = {0};
   2.177 +            unsigned long gl1pfn;
   2.178 +            unsigned long sl1mfn;
   2.179 +
   2.180 +            __guest_get_l2e(v, va, &gpde);
   2.181 +
   2.182 +            if ( l2e_get_flags(gpde) & _PAGE_PRESENT )
   2.183 +            {
   2.184 +                gl1pfn = l2e_get_pfn(gpde);
   2.185 +                sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
   2.186 +            }
   2.187 +            else
   2.188 +            {
   2.189 +                // no shadow exists, so there's nothing to do.
   2.190 +                perfc_incrc(shadow_set_l1e_fail);
   2.191 +                return;
   2.192 +            }
   2.193  
   2.194              if ( sl1mfn )
   2.195              {
   2.196 @@ -738,7 +750,7 @@ shadow_set_l1e(unsigned long va, l1_pgen
   2.197      shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
   2.198  }
   2.199  
   2.200 -#if CONFIG_PAGING_LEVELS <= 3
   2.201 +#if CONFIG_PAGING_LEVELS == 2
   2.202  static void shadow_invlpg_32(struct vcpu *v, unsigned long va)
   2.203  {
   2.204      struct domain *d = v->domain;
   2.205 @@ -767,7 +779,7 @@ static void shadow_invlpg_32(struct vcpu
   2.206  
   2.207      shadow_unlock(d);
   2.208  }
   2.209 -#endif
   2.210 +#endif /* CONFIG_PAGING_LEVELS == 2 */
   2.211  
   2.212  static struct out_of_sync_entry *
   2.213  shadow_alloc_oos_entry(struct domain *d)
   2.214 @@ -996,7 +1008,10 @@ static int snapshot_entry_matches(
   2.215  
   2.216      if (__copy_from_user(&gpte, &guest_pt[index],
   2.217                           sizeof(gpte)))
   2.218 +    {
   2.219 +        unmap_domain_page(snapshot);
   2.220          return 0;
   2.221 +    }
   2.222  
   2.223      // This could probably be smarter, but this is sufficent for
   2.224      // our current needs.
   2.225 @@ -1021,7 +1036,7 @@ static int snapshot_entry_matches(
   2.226  static int is_out_of_sync(struct vcpu *v, unsigned long va) /* __shadow_out_of_sync */
   2.227  {
   2.228      struct domain *d = v->domain;
   2.229 -#if defined (__x86_64__)
   2.230 +#if CONFIG_PAGING_LEVELS == 4
   2.231      unsigned long l2mfn = ((v->arch.flags & TF_kernel_mode)?
   2.232                            pagetable_get_pfn(v->arch.guest_table) :
   2.233                            pagetable_get_pfn(v->arch.guest_table_user));
   2.234 @@ -1032,16 +1047,21 @@ static int is_out_of_sync(struct vcpu *v
   2.235      guest_l2_pgentry_t l2e;
   2.236      unsigned long l1pfn, l1mfn;
   2.237      guest_l1_pgentry_t *guest_pt;
   2.238 -    guest_l1_pgentry_t tmp_gle;
   2.239 -    unsigned long pt_va;
   2.240  
   2.241      ASSERT(shadow_lock_is_acquired(d));
   2.242      ASSERT(VALID_M2P(l2pfn));
   2.243  
   2.244      perfc_incrc(shadow_out_of_sync_calls);
   2.245  
   2.246 -#if CONFIG_PAGING_LEVELS >= 4
   2.247 -    if (d->arch.ops->guest_paging_levels == PAGING_L4) { /* Mode F */
   2.248 +#if CONFIG_PAGING_LEVELS >= 3
   2.249 +
   2.250 +#define unmap_and_return(x)                                         \
   2.251 +    if ( guest_pt != (guest_l1_pgentry_t *) v->arch.guest_vtable )  \
   2.252 +        unmap_domain_page(guest_pt);                                \
   2.253 +    return (x);
   2.254 +
   2.255 +    if (d->arch.ops->guest_paging_levels >= PAGING_L3) 
   2.256 +    { 
   2.257          pgentry_64_t le;
   2.258          unsigned long gmfn;
   2.259          unsigned long gpfn;
   2.260 @@ -1051,37 +1071,57 @@ static int is_out_of_sync(struct vcpu *v
   2.261          gpfn = l2pfn;
   2.262          guest_pt = (guest_l1_pgentry_t *)v->arch.guest_vtable;
   2.263  
   2.264 -        for (i = PAGING_L4; i >= PAGING_L3; i--) {
   2.265 +        for ( i = PAGING_L4; i >= PAGING_L3; i-- ) 
   2.266 +        {
   2.267 +            if (d->arch.ops->guest_paging_levels == PAGING_L3 
   2.268 +                && i == PAGING_L4)
   2.269 +                continue;       /* skip the top-level for 3-level */
   2.270 +
   2.271              if ( page_out_of_sync(&frame_table[gmfn]) &&
   2.272 -              !snapshot_entry_matches(
   2.273 -                  d, guest_pt, gpfn, table_offset_64(va, i)) )
   2.274 -                return 1;
   2.275 -
   2.276 +                 !snapshot_entry_matches(
   2.277 +                     d, guest_pt, gpfn, table_offset_64(va, i)) )
   2.278 +            {
   2.279 +                unmap_and_return (1);
   2.280 +            }
   2.281 +
   2.282 +            le = entry_empty();
   2.283              __rw_entry(v, va, &le, GUEST_ENTRY | GET_ENTRY | i);
   2.284 +
   2.285              if ( !(entry_get_flags(le) & _PAGE_PRESENT) )
   2.286 -                return 0;
   2.287 +            {
   2.288 +                unmap_and_return (0);
   2.289 +            }
   2.290              gpfn = entry_get_pfn(le);
   2.291              gmfn = __gpfn_to_mfn(d, gpfn);
   2.292              if ( !VALID_MFN(gmfn) )
   2.293 -                return 0;
   2.294 -            /* Todo: check!*/
   2.295 +            {
   2.296 +                unmap_and_return (0);
   2.297 +            }
   2.298 +            if ( guest_pt != (guest_l1_pgentry_t *)v->arch.guest_vtable )
   2.299 +                unmap_domain_page(guest_pt);
   2.300              guest_pt = (guest_l1_pgentry_t *)map_domain_page(gmfn);
   2.301 -
   2.302          }
   2.303  
   2.304          /* L2 */
   2.305          if ( page_out_of_sync(&frame_table[gmfn]) &&
   2.306               !snapshot_entry_matches(d, guest_pt, gpfn, l2_table_offset(va)) )
   2.307 +        {
   2.308 +            unmap_and_return (1);
   2.309 +        }
   2.310 +
   2.311 +        if ( guest_pt != (guest_l1_pgentry_t *)v->arch.guest_vtable )
   2.312 +            unmap_domain_page(guest_pt);
   2.313 +
   2.314 +    } 
   2.315 +    else
   2.316 +#undef unmap_and_return
   2.317 +#endif /* CONFIG_PAGING_LEVELS >= 3 */
   2.318 +    {
   2.319 +        if ( page_out_of_sync(&frame_table[l2mfn]) &&
   2.320 +             !snapshot_entry_matches(d, (guest_l1_pgentry_t *)v->arch.guest_vtable,
   2.321 +                                     l2pfn, guest_l2_table_offset(va)) )
   2.322              return 1;
   2.323 -
   2.324 -
   2.325 -    } else
   2.326 -#endif
   2.327 -
   2.328 -    if ( page_out_of_sync(&frame_table[l2mfn]) &&
   2.329 -         !snapshot_entry_matches(d, (guest_l1_pgentry_t *)v->arch.guest_vtable,
   2.330 -                                 l2pfn, guest_l2_table_offset(va)) )
   2.331 -        return 1;
   2.332 +    }
   2.333  
   2.334      __guest_get_l2e(v, va, &l2e);
   2.335      if ( !(guest_l2e_get_flags(l2e) & _PAGE_PRESENT) ||
   2.336 @@ -1095,15 +1135,17 @@ static int is_out_of_sync(struct vcpu *v
   2.337      if ( !VALID_MFN(l1mfn) )
   2.338          return 0;
   2.339  
   2.340 -    pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(GUEST_L1_PAGETABLE_ENTRIES - 1))
   2.341 -      << L1_PAGETABLE_SHIFT;
   2.342 -    guest_pt = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gle);
   2.343 +    guest_pt = (guest_l1_pgentry_t *) map_domain_page(l1mfn);
   2.344  
   2.345      if ( page_out_of_sync(&frame_table[l1mfn]) &&
   2.346           !snapshot_entry_matches(
   2.347 -             d, guest_pt, l1pfn, guest_l1_table_offset(va)) )
   2.348 +             d, guest_pt, l1pfn, guest_l1_table_offset(va)) ) 
   2.349 +    {
   2.350 +        unmap_domain_page(guest_pt);
   2.351          return 1;
   2.352 -
   2.353 +    }
   2.354 +
   2.355 +    unmap_domain_page(guest_pt);
   2.356      return 0;
   2.357  }
   2.358  
   2.359 @@ -1257,7 +1299,7 @@ static int remove_all_write_access(
   2.360      }
   2.361  
   2.362      if ( shadow_mode_external(d) ) {
   2.363 -        if (write_refs-- == 0)
   2.364 +        if (--write_refs == 0)
   2.365              return 0;
   2.366  
   2.367           // Use the back pointer to locate the shadow page that can contain
   2.368 @@ -1314,6 +1356,8 @@ static int resync_all(struct domain *d, 
   2.369  
   2.370      for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
   2.371      {
   2.372 +        int max = -1;
   2.373 +
   2.374          if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
   2.375              continue;
   2.376  
   2.377 @@ -1335,7 +1379,7 @@ static int resync_all(struct domain *d, 
   2.378                  continue;
   2.379          }
   2.380  
   2.381 -        FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
   2.382 +       FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
   2.383                  stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn);
   2.384  
   2.385          // Compare guest's new contents to its snapshot, validating
   2.386 @@ -1373,11 +1417,9 @@ static int resync_all(struct domain *d, 
   2.387  
   2.388              if ( !shadow_mode_refcounts(d) )
   2.389                  revalidate_l1(d, (l1_pgentry_t *)guest1, (l1_pgentry_t *)snapshot1);
   2.390 -
   2.391              if ( !smfn )
   2.392                  break;
   2.393  
   2.394 -
   2.395              changed = 0;
   2.396  
   2.397              for ( i = min_shadow; i <= max_shadow; i++ )
   2.398 @@ -1405,12 +1447,13 @@ static int resync_all(struct domain *d, 
   2.399              perfc_incrc(resync_l1);
   2.400              perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
   2.401              perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES);
   2.402 -            if ( d->arch.ops->guest_paging_levels == PAGING_L4 &&
   2.403 +            if ( d->arch.ops->guest_paging_levels >= PAGING_L3 &&
   2.404                   unshadow_l1 ) {
   2.405 -                pgentry_64_t l2e;
   2.406 +                pgentry_64_t l2e = {0};
   2.407  
   2.408                  __shadow_get_l2e(entry->v, entry->va, &l2e);
   2.409 -                if (entry_get_flags(l2e) & _PAGE_PRESENT) {
   2.410 +
   2.411 +                if ( entry_get_flags(l2e) & _PAGE_PRESENT ) {
   2.412                      entry_remove_flags(l2e, _PAGE_PRESENT);
   2.413                      __shadow_set_l2e(entry->v, entry->va, &l2e);
   2.414  
   2.415 @@ -1421,11 +1464,9 @@ static int resync_all(struct domain *d, 
   2.416  
   2.417              break;
   2.418          }
   2.419 -#if defined (__i386__)
   2.420 +#if CONFIG_PAGING_LEVELS == 2
   2.421          case PGT_l2_shadow:
   2.422          {
   2.423 -            int max = -1;
   2.424 -
   2.425              l2_pgentry_t *guest2 = guest;
   2.426              l2_pgentry_t *shadow2 = shadow;
   2.427              l2_pgentry_t *snapshot2 = snapshot;
   2.428 @@ -1436,9 +1477,6 @@ static int resync_all(struct domain *d, 
   2.429              changed = 0;
   2.430              for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   2.431              {
   2.432 -#if CONFIG_X86_PAE
   2.433 -                BUG();  /* FIXME: need type_info */
   2.434 -#endif
   2.435                  if ( !is_guest_l2_slot(0,i) && !external )
   2.436                      continue;
   2.437  
   2.438 @@ -1482,9 +1520,6 @@ static int resync_all(struct domain *d, 
   2.439              changed = 0;
   2.440              for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   2.441              {
   2.442 -#if CONFIG_X86_PAE
   2.443 -                BUG();  /* FIXME: need type_info */
   2.444 -#endif
   2.445                  if ( !is_guest_l2_slot(0, i) && !external )
   2.446                      continue;
   2.447  
   2.448 @@ -1505,7 +1540,7 @@ static int resync_all(struct domain *d, 
   2.449              perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES);
   2.450              break;
   2.451          }
   2.452 -#else
   2.453 +#elif CONFIG_PAGING_LEVELS >= 3
   2.454          case PGT_l2_shadow:
   2.455          case PGT_l3_shadow:
   2.456          {
   2.457 @@ -1521,19 +1556,35 @@ static int resync_all(struct domain *d, 
   2.458                        guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) )
   2.459                  {
   2.460                      need_flush |= validate_entry_change(
   2.461 -                      d, &guest_pt[i], &shadow_pt[i],
   2.462 -                      shadow_type_to_level(stype));
   2.463 +                        d, &guest_pt[i], &shadow_pt[i],
   2.464 +                        shadow_type_to_level(stype));
   2.465                      changed++;
   2.466                  }
   2.467 +#if CONFIG_PAGING_LEVELS == 3
   2.468 +                if ( stype == PGT_l3_shadow ) 
   2.469 +                {
   2.470 +                    if ( entry_get_value(guest_pt[i]) != 0 ) 
   2.471 +                        max = i;
   2.472 +
   2.473 +                    if ( !(entry_get_flags(guest_pt[i]) & _PAGE_PRESENT) &&
   2.474 +                         unlikely(entry_get_value(guest_pt[i]) != 0) &&
   2.475 +                         !unshadow &&
   2.476 +                         (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
   2.477 +                        unshadow = 1;
   2.478 +                }
   2.479 +#endif
   2.480              }
   2.481 +
   2.482 +            if ( d->arch.ops->guest_paging_levels == PAGING_L3
   2.483 +                 && max == -1 && stype == PGT_l3_shadow )
   2.484 +                unshadow = 1;
   2.485 +
   2.486 +            perfc_incrc(resync_l3);
   2.487 +            perfc_incr_histo(shm_l3_updates, changed, PT_UPDATES);
   2.488              break;
   2.489 -
   2.490 -
   2.491          }
   2.492          case PGT_l4_shadow:
   2.493          {
   2.494 -            int max = -1;
   2.495 -
   2.496              guest_root_pgentry_t *guest_root = guest;
   2.497              l4_pgentry_t *shadow4 = shadow;
   2.498              guest_root_pgentry_t *snapshot_root = snapshot;
   2.499 @@ -1547,7 +1598,8 @@ static int resync_all(struct domain *d, 
   2.500                  if ( root_entry_has_changed(
   2.501                          new_root_e, snapshot_root[i], PAGE_FLAG_MASK))
   2.502                  {
   2.503 -                    if (d->arch.ops->guest_paging_levels == PAGING_L4) {
   2.504 +                    if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) 
   2.505 +                    {
   2.506                          need_flush |= validate_entry_change(
   2.507                            d, (pgentry_64_t *)&new_root_e,
   2.508                            (pgentry_64_t *)&shadow4[i], shadow_type_to_level(stype));
   2.509 @@ -1563,9 +1615,9 @@ static int resync_all(struct domain *d, 
   2.510  
   2.511                  //  Need a better solution in the long term.
   2.512                  if ( !(guest_root_get_flags(new_root_e) & _PAGE_PRESENT) &&
   2.513 -                  unlikely(guest_root_get_intpte(new_root_e) != 0) &&
   2.514 -                  !unshadow &&
   2.515 -                  (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
   2.516 +                     unlikely(guest_root_get_intpte(new_root_e) != 0) &&
   2.517 +                     !unshadow &&
   2.518 +                     (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
   2.519                      unshadow = 1;
   2.520              }
   2.521              if ( max == -1 )
   2.522 @@ -1575,7 +1627,7 @@ static int resync_all(struct domain *d, 
   2.523              break;
   2.524          }
   2.525  
   2.526 -#endif
   2.527 +#endif /* CONFIG_PAGING_LEVELS >= 3 */
   2.528          default:
   2.529              BUG();
   2.530          }
   2.531 @@ -1589,7 +1641,7 @@ static int resync_all(struct domain *d, 
   2.532          {
   2.533              perfc_incrc(unshadow_l2_count);
   2.534              shadow_unpin(smfn);
   2.535 -#if defined (__i386__)
   2.536 +#if CONFIG_PAGING_LEVELS == 2
   2.537              if ( unlikely(shadow_mode_external(d)) )
   2.538              {
   2.539                  unsigned long hl2mfn;
   2.540 @@ -1660,19 +1712,24 @@ static void sync_all(struct domain *d)
   2.541      // Second, resync all L1 pages, then L2 pages, etc...
   2.542      //
   2.543      need_flush |= resync_all(d, PGT_l1_shadow);
   2.544 -#if defined (__i386__)
   2.545 -    if ( shadow_mode_translate(d) )
   2.546 +
   2.547 +#if CONFIG_PAGING_LEVELS == 2
   2.548 +    if ( d->arch.ops->guest_paging_levels == PAGING_L2 &&
   2.549 +         shadow_mode_translate(d) )  
   2.550 +    {
   2.551          need_flush |= resync_all(d, PGT_hl2_shadow);
   2.552 +    }
   2.553  #endif
   2.554  
   2.555 -    /*
   2.556 -     * Fixme: for i386 host
   2.557 -     */
   2.558 -    if (d->arch.ops->guest_paging_levels == PAGING_L4) {
   2.559 -        need_flush |= resync_all(d, PGT_l2_shadow);
   2.560 +    need_flush |= resync_all(d, PGT_l2_shadow);
   2.561 +
   2.562 +#if CONFIG_PAGING_LEVELS >= 3
   2.563 +    if (d->arch.ops->guest_paging_levels >= PAGING_L3) 
   2.564 +    {
   2.565          need_flush |= resync_all(d, PGT_l3_shadow);
   2.566 +        need_flush |= resync_all(d, PGT_l4_shadow);
   2.567      }
   2.568 -    need_flush |= resync_all(d, PGT_l4_shadow);
   2.569 +#endif
   2.570  
   2.571      if ( need_flush && !unlikely(shadow_mode_external(d)) )
   2.572          local_flush_tlb();
   2.573 @@ -1749,7 +1806,7 @@ static inline int l1pte_read_fault(
   2.574  
   2.575      return 1;
   2.576  }
   2.577 -#if CONFIG_PAGING_LEVELS <= 3
   2.578 +#if CONFIG_PAGING_LEVELS == 2
   2.579  static int shadow_fault_32(unsigned long va, struct cpu_user_regs *regs)
   2.580  {
   2.581      l1_pgentry_t gpte, spte, orig_gpte;
   2.582 @@ -1888,7 +1945,20 @@ fail:
   2.583      shadow_unlock(d);
   2.584      return 0;
   2.585  }
   2.586 -#endif
   2.587 +#endif /* CONFIG_PAGING_LEVELS == 2 */
   2.588 +
   2.589 +static inline unsigned long va_to_l1mfn(struct vcpu *v, unsigned long va)
   2.590 +{
   2.591 +    struct domain *d = v->domain;
   2.592 +    guest_l2_pgentry_t gl2e = {0};
   2.593 +
   2.594 +    __guest_get_l2e(v, va, &gl2e);
   2.595 +    
   2.596 +    if ( unlikely(!(guest_l2e_get_flags(gl2e) & _PAGE_PRESENT)) )
   2.597 +        return INVALID_MFN;
   2.598 +
   2.599 +    return __gpfn_to_mfn(d, l2e_get_pfn(gl2e));
   2.600 +}
   2.601  
   2.602  static int do_update_va_mapping(unsigned long va,
   2.603                                  l1_pgentry_t val,
   2.604 @@ -1900,8 +1970,6 @@ static int do_update_va_mapping(unsigned
   2.605  
   2.606      shadow_lock(d);
   2.607  
   2.608 -    //printk("%s(va=%p, val=%p)\n", __func__, (void *)va, (void *)l1e_get_intpte(val));
   2.609 -
   2.610      // This is actually overkill - we don't need to sync the L1 itself,
   2.611      // just everything involved in getting to this L1 (i.e. we need
   2.612      // linear_pg_table[l1_linear_offset(va)] to be in sync)...
   2.613 @@ -1919,7 +1987,6 @@ static int do_update_va_mapping(unsigned
   2.614      if ( shadow_mode_log_dirty(d) )
   2.615          __mark_dirty(d, va_to_l1mfn(v, va));
   2.616  
   2.617 -// out:
   2.618      shadow_unlock(d);
   2.619  
   2.620      return rc;
   2.621 @@ -1955,7 +2022,7 @@ static int do_update_va_mapping(unsigned
   2.622  static void shadow_update_pagetables(struct vcpu *v)
   2.623  {
   2.624      struct domain *d = v->domain;
   2.625 -#if defined (__x86_64__)
   2.626 +#if CONFIG_PAGING_LEVELS == 4
   2.627      unsigned long gmfn = ((v->arch.flags & TF_kernel_mode)?
   2.628                            pagetable_get_pfn(v->arch.guest_table) :
   2.629                            pagetable_get_pfn(v->arch.guest_table_user));
   2.630 @@ -1991,7 +2058,8 @@ static void shadow_update_pagetables(str
   2.631      /*
   2.632       *  arch.shadow_table
   2.633       */
   2.634 -    if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) {
   2.635 +    if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) 
   2.636 +    {
   2.637  #if CONFIG_PAGING_LEVELS == 2
   2.638          smfn = shadow_l2_table(d, gpfn, gmfn);
   2.639  #elif CONFIG_PAGING_LEVELS == 3
   2.640 @@ -2013,7 +2081,7 @@ static void shadow_update_pagetables(str
   2.641       * arch.shadow_vtable
   2.642       */
   2.643      if ( max_mode == SHM_external
   2.644 -#if CONFIG_PAGING_LEVELS >=4
   2.645 +#if CONFIG_PAGING_LEVELS >=3
   2.646           || max_mode & SHM_enable
   2.647  #endif
   2.648          )
   2.649 @@ -2068,7 +2136,7 @@ static void shadow_update_pagetables(str
   2.650          // XXX - maybe this can be optimized somewhat??
   2.651          local_flush_tlb();
   2.652      }
   2.653 -#endif
   2.654 +#endif /* CONFIG_PAGING_LEVELS == 2 */
   2.655  
   2.656  #if CONFIG_PAGING_LEVELS == 3
   2.657      /* FIXME: PAE code to be written */
   2.658 @@ -2373,7 +2441,7 @@ static int check_l2_table(
   2.659                 l2e_get_intpte(match));
   2.660      }
   2.661  
   2.662 -#ifdef __i386__
   2.663 +#if CONFIG_PAGING_LEVELS == 2
   2.664      if ( shadow_mode_external(d) )
   2.665          limit = L2_PAGETABLE_ENTRIES;
   2.666      else
   2.667 @@ -2405,7 +2473,7 @@ static int check_l2_table(
   2.668  int _check_pagetable(struct vcpu *v, char *s)
   2.669  {
   2.670      struct domain *d = v->domain;
   2.671 -#if defined (__x86_64__)
   2.672 +#if CONFIG_PAGING_LEVELS == 4
   2.673      pagetable_t pt = ((v->arch.flags & TF_kernel_mode)?
   2.674                        v->arch.guest_table : v->arch.guest_table_user);
   2.675  #else
   2.676 @@ -2447,7 +2515,7 @@ int _check_pagetable(struct vcpu *v, cha
   2.677      spl2e = (l2_pgentry_t *) map_domain_page(smfn);
   2.678  
   2.679      /* Go back and recurse. */
   2.680 -#ifdef __i386__
   2.681 +#if CONFIG_PAGING_LEVELS == 2
   2.682      if ( shadow_mode_external(d) )
   2.683          limit = L2_PAGETABLE_ENTRIES;
   2.684      else
   2.685 @@ -2551,60 +2619,109 @@ int _check_all_pagetables(struct vcpu *v
   2.686  
   2.687  #if CONFIG_PAGING_LEVELS == 3
   2.688  static unsigned long shadow_l3_table(
   2.689 -  struct domain *d, unsigned long gpfn, unsigned long gmfn)
   2.690 +    struct domain *d, unsigned long gpfn, unsigned long gmfn)
   2.691  {
   2.692 -    BUG();                      /* not implemenated yet */
   2.693 -    return 42;
   2.694 +    unsigned long smfn;
   2.695 +    l3_pgentry_t *spl3e;
   2.696 +
   2.697 +    perfc_incrc(shadow_l3_table_count);
   2.698 +
   2.699 +    if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l3_shadow))) )
   2.700 +    {
   2.701 +        printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
   2.702 +        BUG(); /* XXX Deal gracefully with failure. */
   2.703 +    }
   2.704 +
   2.705 +    spl3e = (l3_pgentry_t *)map_domain_page(smfn);
   2.706 +
   2.707 +    /* Make the self entry */
   2.708 +    spl3e[PAE_SHADOW_SELF_ENTRY] = l3e_from_pfn(smfn, __PAGE_HYPERVISOR);
   2.709 +
   2.710 +    if ( (PGT_base_page_table == PGT_l3_page_table) &&
   2.711 +         !shadow_mode_external(d) ) {
   2.712 +        int i;
   2.713 +        unsigned long g2mfn, s2mfn;
   2.714 +        l2_pgentry_t *spl2e;
   2.715 +        l3_pgentry_t *gpl3e;
   2.716 +
   2.717 +        /* Get the top entry */
   2.718 +        gpl3e = (l3_pgentry_t *)map_domain_page(gmfn);
   2.719 +
   2.720 +        if ( !(l3e_get_flags(gpl3e[L3_PAGETABLE_ENTRIES - 1]) & _PAGE_PRESENT) )
   2.721 +        {
   2.722 +            BUG();
   2.723 +        }
   2.724 +
   2.725 +        g2mfn = l3e_get_pfn(gpl3e[L3_PAGETABLE_ENTRIES - 1]);
   2.726 +
   2.727 +        /* NB. g2mfn should be same as g2pfn */
   2.728 +        if (!(s2mfn = __shadow_status(d, g2mfn, PGT_l2_shadow))) {
   2.729 +            if ( unlikely(!(s2mfn =
   2.730 +                    alloc_shadow_page(d, g2mfn, g2mfn, PGT_l2_shadow))) ) {
   2.731 +                printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n",
   2.732 +                    g2mfn, g2mfn);
   2.733 +                BUG(); /* XXX Deal gracefully with failure. */
   2.734 +            }
   2.735 +
   2.736 +            if (!get_shadow_ref(s2mfn))
   2.737 +                BUG();
   2.738 +        } 
   2.739 +            
   2.740 +        /* Map shadow L2 into shadow L3 */
   2.741 +        spl3e[L3_PAGETABLE_ENTRIES - 1] = l3e_from_pfn(s2mfn, _PAGE_PRESENT);
   2.742 +        shadow_update_min_max(smfn, L3_PAGETABLE_ENTRIES -1);
   2.743 +
   2.744 +        /*  
   2.745 +         * Xen private mappings. Do the similar things as
   2.746 +         * create_pae_xen_mappings().
   2.747 +         */
   2.748 +        spl2e = (l2_pgentry_t *)map_domain_page(s2mfn);
   2.749 +
   2.750 +        /*
   2.751 +         * When we free L2 pages, we need to tell if the page contains
   2.752 +         * Xen private mappings. Use the va_mask part.
   2.753 +         */
   2.754 +        frame_table[s2mfn].u.inuse.type_info |= 
   2.755 +            (unsigned long) 3 << PGT_score_shift; 
   2.756 +
   2.757 +        memset(spl2e, 0, 
   2.758 +               (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)) * sizeof(l2_pgentry_t));
   2.759 +
   2.760 +        memcpy(&spl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
   2.761 +           &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
   2.762 +           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));       
   2.763 +
   2.764 +        for ( i = 0; i < (PERDOMAIN_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
   2.765 +            spl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
   2.766 +                l2e_from_page(
   2.767 +                    virt_to_page(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_pt) + i, 
   2.768 +                    __PAGE_HYPERVISOR);
   2.769 +        for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
   2.770 +            spl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
   2.771 +                (l3e_get_flags(gpl3e[i]) & _PAGE_PRESENT) ?
   2.772 +                l2e_from_pfn(l3e_get_pfn(gpl3e[i]), __PAGE_HYPERVISOR) :
   2.773 +                l2e_empty();
   2.774 +       
   2.775 +        unmap_domain_page(spl2e);
   2.776 +        unmap_domain_page(gpl3e);
   2.777 +    }
   2.778 +    unmap_domain_page(spl3e);
   2.779 +
   2.780 +    return smfn;
   2.781  }
   2.782 +
   2.783  static unsigned long gva_to_gpa_pae(unsigned long gva)
   2.784  {
   2.785      BUG();
   2.786      return 43;
   2.787  }
   2.788 -#endif
   2.789 -
   2.790 -#if CONFIG_PAGING_LEVELS >= 4
   2.791 +#endif /* CONFIG_PAGING_LEVELS == 3 */
   2.792 +
   2.793 +#if CONFIG_PAGING_LEVELS == 4
   2.794  /****************************************************************************/
   2.795  /* 64-bit shadow-mode code testing */
   2.796  /****************************************************************************/
   2.797  /*
   2.798 - * validate_bl2e_change()
   2.799 - * The code is for 32-bit VMX gues on 64-bit host.
   2.800 - * To sync guest L2.
   2.801 - */
   2.802 -static inline void
   2.803 -validate_bl2e_change(
   2.804 -  struct domain *d,
   2.805 -  guest_root_pgentry_t *new_gle_p,
   2.806 -  pgentry_64_t *shadow_l3,
   2.807 -  int index)
   2.808 -{
   2.809 -    int sl3_idx, sl2_idx;
   2.810 -    unsigned long sl2mfn, sl1mfn;
   2.811 -    pgentry_64_t *sl2_p;
   2.812 -
   2.813 -    /* Using guest l2 pte index to get shadow l3&l2 index
   2.814 -     * index: 0 ~ 1023, PAGETABLE_ENTRIES: 512
   2.815 -     */
   2.816 -    sl3_idx = index / (PAGETABLE_ENTRIES / 2);
   2.817 -    sl2_idx = (index % (PAGETABLE_ENTRIES / 2)) * 2;
   2.818 -
   2.819 -    sl2mfn = entry_get_pfn(shadow_l3[sl3_idx]);
   2.820 -    sl2_p = (pgentry_64_t *)map_domain_page(sl2mfn);
   2.821 -
   2.822 -    validate_pde_change(
   2.823 -        d, *(guest_l2_pgentry_t *)new_gle_p, (l2_pgentry_t *)&sl2_p[sl2_idx]);
   2.824 -
   2.825 -    /* Mapping the second l1 shadow page */
   2.826 -    if (entry_get_flags(sl2_p[sl2_idx]) & _PAGE_PRESENT) {
   2.827 -       sl1mfn = entry_get_pfn(sl2_p[sl2_idx]);
   2.828 -       sl2_p[sl2_idx + 1] =
   2.829 -            entry_from_pfn(sl1mfn + 1, entry_get_flags(sl2_p[sl2_idx]));
   2.830 -    }
   2.831 -    unmap_domain_page(sl2_p);
   2.832 -}
   2.833 -
   2.834 -/*
   2.835   * init_bl2() is for 32-bit VMX guest on 64-bit host
   2.836   * Using 1 shadow L4(l3) and 4 shadow L2s to simulate guest L2
   2.837   */
   2.838 @@ -2699,6 +2816,47 @@ static unsigned long shadow_l4_table(
   2.839      ESH_LOG("shadow_l4_table(%lx -> %lx)", gmfn, smfn);
   2.840      return smfn;
   2.841  }
   2.842 +#endif /* CONFIG_PAGING_LEVELS == 4 */
   2.843 +
   2.844 +#if CONFIG_PAGING_LEVELS >= 3
   2.845 +/*
   2.846 + * validate_bl2e_change()
   2.847 + * The code is for 32-bit VMX gues on 64-bit host.
   2.848 + * To sync guest L2.
   2.849 + */
   2.850 +
   2.851 +static inline void
   2.852 +validate_bl2e_change(
   2.853 +    struct domain *d,
   2.854 +    guest_root_pgentry_t *new_gle_p,
   2.855 +    pgentry_64_t *shadow_l3,
   2.856 +    int index)
   2.857 +{
   2.858 +    int sl3_idx, sl2_idx;
   2.859 +    unsigned long sl2mfn, sl1mfn;
   2.860 +    pgentry_64_t *sl2_p;
   2.861 +
   2.862 +    /* Using guest l2 pte index to get shadow l3&l2 index
   2.863 +     * index: 0 ~ 1023, PAGETABLE_ENTRIES: 512
   2.864 +     */
   2.865 +    sl3_idx = index / (PAGETABLE_ENTRIES / 2);
   2.866 +    sl2_idx = (index % (PAGETABLE_ENTRIES / 2)) * 2;
   2.867 +
   2.868 +    sl2mfn = entry_get_pfn(shadow_l3[sl3_idx]);
   2.869 +    sl2_p = (pgentry_64_t *)map_domain_page(sl2mfn);
   2.870 +
   2.871 +    validate_pde_change(
   2.872 +        d, *(guest_l2_pgentry_t *)new_gle_p, (l2_pgentry_t *)&sl2_p[sl2_idx]);
   2.873 +
   2.874 +    /* Mapping the second l1 shadow page */
   2.875 +    if (entry_get_flags(sl2_p[sl2_idx]) & _PAGE_PRESENT) {
   2.876 +       sl1mfn = entry_get_pfn(sl2_p[sl2_idx]);
   2.877 +       sl2_p[sl2_idx + 1] =
   2.878 +            entry_from_pfn(sl1mfn + 1, entry_get_flags(sl2_p[sl2_idx]));
   2.879 +    }
   2.880 +    unmap_domain_page(sl2_p);
   2.881 +
   2.882 +}
   2.883  
   2.884  /*
   2.885   * This shadow_mark_va_out_of_sync() is for 2M page shadow
   2.886 @@ -2715,7 +2873,6 @@ static void shadow_mark_va_out_of_sync_2
   2.887          BUG();
   2.888  }
   2.889  
   2.890 -
   2.891  static int get_shadow_mfn(struct domain *d, unsigned long gpfn, unsigned long *spmfn, u32 flag)
   2.892  {
   2.893      unsigned long gmfn;
   2.894 @@ -2764,7 +2921,7 @@ static int get_shadow_mfn(struct domain 
   2.895  static void shadow_map_into_current(struct vcpu *v,
   2.896    unsigned long va, unsigned int from, unsigned int to)
   2.897  {
   2.898 -    pgentry_64_t gle, sle;
   2.899 +    pgentry_64_t gle = {0}, sle;
   2.900      unsigned long gpfn, smfn;
   2.901  
   2.902      if (from == PAGING_L1 && to == PAGING_L2) {
   2.903 @@ -2836,8 +2993,9 @@ static void shadow_set_l2e_64(unsigned l
   2.904  }
   2.905  
   2.906  
   2.907 -static void shadow_set_l1e_64(unsigned long va, pgentry_64_t *sl1e_p,
   2.908 -  int create_l1_shadow)
   2.909 +static void shadow_set_l1e_64(
   2.910 +    unsigned long va, pgentry_64_t *sl1e_p,
   2.911 +    int create_l1_shadow)
   2.912  {
   2.913      struct vcpu *v = current;
   2.914      struct domain *d = v->domain;
   2.915 @@ -2848,19 +3006,21 @@ static void shadow_set_l1e_64(unsigned l
   2.916      int i;
   2.917      unsigned long orig_va = 0;
   2.918  
   2.919 -    if (d->arch.ops->guest_paging_levels == PAGING_L2) {
   2.920 +    if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) 
   2.921 +    {
   2.922          /* This is for 32-bit VMX guest on 64-bit host */
   2.923          orig_va = va;
   2.924          va = va & (~((1<<L2_PAGETABLE_SHIFT_32)-1));
   2.925      }
   2.926  
   2.927 -    for (i = PAGING_L4; i >= PAGING_L2; i--) {
   2.928 +    for (i = PAGING_L4; i >= PAGING_L2; i--) 
   2.929 +    {
   2.930          if (!__rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i)) {
   2.931              printk("<%s> i = %d\n", __func__, i);
   2.932              BUG();
   2.933          }
   2.934 -        if (!(entry_get_flags(sle) & _PAGE_PRESENT)) {
   2.935 -            if (create_l1_shadow) {
   2.936 +        if ( !(entry_get_flags(sle) & _PAGE_PRESENT) ) {
   2.937 +            if ( create_l1_shadow ) {
   2.938                  perfc_incrc(shadow_set_l3e_force_map);
   2.939                  shadow_map_into_current(v, va, i-1, i);
   2.940                  __rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i);
   2.941 @@ -2870,12 +3030,12 @@ static void shadow_set_l1e_64(unsigned l
   2.942  #endif
   2.943              }
   2.944          }
   2.945 -        if(i < PAGING_L4)
   2.946 +        if( i < PAGING_L4 )
   2.947              shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, i));
   2.948          sle_up = sle;
   2.949      }
   2.950  
   2.951 -    if (d->arch.ops->guest_paging_levels == PAGING_L2) {
   2.952 +    if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) {
   2.953          va = orig_va;
   2.954      }
   2.955  
   2.956 @@ -2914,7 +3074,7 @@ static inline int l2e_rw_fault(
   2.957      l1_pgentry_t sl1e;
   2.958      l1_pgentry_t old_sl1e;
   2.959      l2_pgentry_t sl2e;
   2.960 -    unsigned long nx = 0;
   2.961 +    u64 nx = 0;
   2.962      int put_ref_check = 0;
   2.963      /* Check if gpfn is 2M aligned */
   2.964  
   2.965 @@ -2929,7 +3089,7 @@ static inline int l2e_rw_fault(
   2.966      l2e_remove_flags(tmp_l2e, _PAGE_PSE);
   2.967      if (l2e_get_flags(gl2e) & _PAGE_NX) {
   2.968          l2e_remove_flags(tmp_l2e, _PAGE_NX);
   2.969 -        nx = 1UL << 63;
   2.970 +        nx = 1ULL << 63;
   2.971      }
   2.972  
   2.973  
   2.974 @@ -3037,115 +3197,162 @@ static inline int l2e_rw_fault(
   2.975   * else return 0.
   2.976   */
   2.977  #if defined( GUEST_PGENTRY_32 )
   2.978 -static inline int guest_page_fault(struct vcpu *v,
   2.979 -  unsigned long va, unsigned int error_code,
   2.980 -  guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
   2.981 +static inline int guest_page_fault(
   2.982 +    struct vcpu *v,
   2.983 +    unsigned long va, unsigned int error_code,
   2.984 +    guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
   2.985  {
   2.986      /* The following check for 32-bit guest on 64-bit host */
   2.987  
   2.988      __guest_get_l2e(v, va, gpl2e);
   2.989  
   2.990      /* Check the guest L2 page-table entry first*/
   2.991 -    if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_PRESENT)))
   2.992 +    if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_PRESENT)) )
   2.993          return 1;
   2.994  
   2.995 -    if (error_code & ERROR_W) {
   2.996 -        if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_RW)))
   2.997 +    if ( error_code & ERROR_W ) 
   2.998 +    {
   2.999 +        if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_RW)) )
  2.1000              return 1;
  2.1001      }
  2.1002 -    if (error_code & ERROR_U) {
  2.1003 -        if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_USER)))
  2.1004 +
  2.1005 +    if ( error_code & ERROR_U ) 
  2.1006 +    {
  2.1007 +        if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_USER)) )
  2.1008              return 1;
  2.1009      }
  2.1010  
  2.1011 -    if (guest_l2e_get_flags(*gpl2e) & _PAGE_PSE)
  2.1012 +    if ( guest_l2e_get_flags(*gpl2e) & _PAGE_PSE )
  2.1013          return 0;
  2.1014  
  2.1015      __guest_get_l1e(v, va, gpl1e);
  2.1016  
  2.1017      /* Then check the guest L1 page-table entry */
  2.1018 -    if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_PRESENT)))
  2.1019 +    if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_PRESENT)) )
  2.1020          return 1;
  2.1021  
  2.1022 -    if (error_code & ERROR_W) {
  2.1023 -        if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_RW)))
  2.1024 +    if ( error_code & ERROR_W ) 
  2.1025 +    {
  2.1026 +        if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_RW)) )
  2.1027              return 1;
  2.1028      }
  2.1029 -    if (error_code & ERROR_U) {
  2.1030 -        if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_USER)))
  2.1031 +
  2.1032 +    if ( error_code & ERROR_U ) 
  2.1033 +    {
  2.1034 +        if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_USER)) )
  2.1035              return 1;
  2.1036      }
  2.1037  
  2.1038      return 0;
  2.1039  }
  2.1040  #else
  2.1041 -static inline int guest_page_fault(struct vcpu *v,
  2.1042 -  unsigned long va, unsigned int error_code,
  2.1043 -  guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
  2.1044 +static inline int guest_page_fault(
  2.1045 +    struct vcpu *v,
  2.1046 +    unsigned long va, unsigned int error_code,
  2.1047 +    guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
  2.1048  {
  2.1049      struct domain *d = v->domain;
  2.1050 -    pgentry_64_t gle, *lva;
  2.1051 -    unsigned long mfn;
  2.1052 +    pgentry_64_t gle;
  2.1053 +    unsigned long gpfn = 0, mfn;
  2.1054      int i;
  2.1055  
  2.1056 -    __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | PAGING_L4);
  2.1057 -    if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)))
  2.1058 -        return 1;
  2.1059 -
  2.1060 -    if (error_code & ERROR_W) {
  2.1061 -        if (unlikely(!(entry_get_flags(gle) & _PAGE_RW)))
  2.1062 +    ASSERT( d->arch.ops->guest_paging_levels >= PAGING_L3 );
  2.1063 +
  2.1064 +#if CONFIG_PAGING_LEVELS == 4
  2.1065 +    if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) 
  2.1066 +    {
  2.1067 +        __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | PAGING_L4);
  2.1068 +        if ( unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)) )
  2.1069              return 1;
  2.1070 +
  2.1071 +        if ( error_code & ERROR_W )
  2.1072 +        {
  2.1073 +            if ( unlikely(!(entry_get_flags(gle) & _PAGE_RW)) )
  2.1074 +                return 1;
  2.1075 +        }
  2.1076 +
  2.1077 +        if ( error_code & ERROR_U )
  2.1078 +        {
  2.1079 +            if ( unlikely(!(entry_get_flags(gle) & _PAGE_USER)) )
  2.1080 +                return 1;
  2.1081 +        }
  2.1082 +        gpfn = entry_get_pfn(gle);
  2.1083      }
  2.1084 -    if (error_code & ERROR_U) {
  2.1085 -        if (unlikely(!(entry_get_flags(gle) & _PAGE_USER)))
  2.1086 -            return 1;
  2.1087 +#endif
  2.1088 +
  2.1089 +#if CONFIG_PAGING_LEVELS >= 3
  2.1090 +    if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) 
  2.1091 +    {
  2.1092 +        gpfn = pagetable_get_pfn(v->arch.guest_table);
  2.1093      }
  2.1094 -    for (i = PAGING_L3; i >= PAGING_L1; i--) {
  2.1095 +#endif
  2.1096 +
  2.1097 +    for ( i = PAGING_L3; i >= PAGING_L1; i-- ) 
  2.1098 +    {
  2.1099 +        pgentry_64_t *lva;
  2.1100          /*
  2.1101           * If it's not external mode, then mfn should be machine physical.
  2.1102           */
  2.1103 -        mfn = __gpfn_to_mfn(d, (entry_get_value(gle) >> PAGE_SHIFT));
  2.1104 -
  2.1105 -        lva = (pgentry_64_t *) phys_to_virt(
  2.1106 -          mfn << PAGE_SHIFT);
  2.1107 +        mfn = __gpfn_to_mfn(d, gpfn);
  2.1108 +
  2.1109 +        lva = (pgentry_64_t *) map_domain_page(mfn);
  2.1110          gle = lva[table_offset_64(va, i)];
  2.1111 -
  2.1112 -        if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)))
  2.1113 +        unmap_domain_page(lva);
  2.1114 +
  2.1115 +        gpfn = entry_get_pfn(gle);
  2.1116 +
  2.1117 +        if ( unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)) )
  2.1118              return 1;
  2.1119  
  2.1120 -        if (error_code & ERROR_W) {
  2.1121 -            if (unlikely(!(entry_get_flags(gle) & _PAGE_RW)))
  2.1122 -                return 1;
  2.1123 +        if ( i < PAGING_L3 ) 
  2.1124 +        {
  2.1125 +            if ( error_code & ERROR_W ) 
  2.1126 +            {
  2.1127 +                if ( unlikely(!(entry_get_flags(gle) & _PAGE_RW)) ) 
  2.1128 +                {
  2.1129 +                    if ( i == PAGING_L1 )
  2.1130 +                        if ( gpl1e )
  2.1131 +                            gpl1e->l1 = gle.lo;
  2.1132 +                    return 1;
  2.1133 +                }
  2.1134 +            }
  2.1135 +            if ( error_code & ERROR_U ) 
  2.1136 +            {
  2.1137 +                if ( unlikely(!(entry_get_flags(gle) & _PAGE_USER)) )
  2.1138 +                    return 1;
  2.1139 +            }
  2.1140          }
  2.1141 -        if (error_code & ERROR_U) {
  2.1142 -            if (unlikely(!(entry_get_flags(gle) & _PAGE_USER)))
  2.1143 -                return 1;
  2.1144 -        }
  2.1145 -
  2.1146 -        if (i == PAGING_L2) {
  2.1147 -            if (gpl2e)
  2.1148 +
  2.1149 +        if ( i == PAGING_L2 ) 
  2.1150 +        {
  2.1151 +            if ( gpl2e )
  2.1152                  gpl2e->l2 = gle.lo;
  2.1153 -
  2.1154 -            if (likely(entry_get_flags(gle) & _PAGE_PSE))
  2.1155 +            if ( likely(entry_get_flags(gle) & _PAGE_PSE) )
  2.1156                  return 0;
  2.1157 -
  2.1158          }
  2.1159  
  2.1160 -        if (i == PAGING_L1)
  2.1161 -            if (gpl1e)
  2.1162 +        if ( i == PAGING_L1 )
  2.1163 +            if ( gpl1e )
  2.1164                  gpl1e->l1 = gle.lo;
  2.1165      }
  2.1166 +
  2.1167      return 0;
  2.1168 +
  2.1169  }
  2.1170  #endif
  2.1171 +
  2.1172  static int shadow_fault_64(unsigned long va, struct cpu_user_regs *regs)
  2.1173  {
  2.1174      struct vcpu *v = current;
  2.1175      struct domain *d = v->domain;
  2.1176      guest_l2_pgentry_t gl2e;
  2.1177 -    guest_l1_pgentry_t gl1e;
  2.1178 +    guest_l1_pgentry_t gl1e, orig_gl1e;
  2.1179      l1_pgentry_t sl1e;
  2.1180  
  2.1181 +    gl1e = guest_l1e_empty(); gl2e = guest_l2e_empty();
  2.1182 +
  2.1183 +    sl1e = l1e_empty();
  2.1184 +
  2.1185      perfc_incrc(shadow_fault_calls);
  2.1186  
  2.1187      ESH_LOG("<shadow_fault_64> va=%lx,  rip = %lx, error code = %x\n",
  2.1188 @@ -3156,7 +3363,7 @@ static int shadow_fault_64(unsigned long
  2.1189       */
  2.1190      shadow_lock(d);
  2.1191  
  2.1192 -    /* XXX - FIX THIS COMMENT!!!
  2.1193 +    /*
  2.1194       * STEP 1. Check to see if this fault might have been caused by an
  2.1195       *         out-of-sync table page entry, or if we should pass this
  2.1196       *         fault onto the guest.
  2.1197 @@ -3166,67 +3373,122 @@ static int shadow_fault_64(unsigned long
  2.1198      /*
  2.1199       * STEP 2. Check if the fault belongs to guest
  2.1200       */
  2.1201 -    if ( guest_page_fault(
  2.1202 -            v, va, regs->error_code, &gl2e, &gl1e) ) {
  2.1203 +    if ( guest_page_fault(v, va, regs->error_code, &gl2e, &gl1e) ) 
  2.1204 +    {
  2.1205 +        if ( unlikely(shadow_mode_log_dirty(d)) && l1e_get_intpte(gl1e) != 0 )
  2.1206 +            goto check_writeable;
  2.1207 +        
  2.1208          goto fail;
  2.1209      }
  2.1210  
  2.1211 -    if ( unlikely(!(guest_l2e_get_flags(gl2e) & _PAGE_PSE)) ) {
  2.1212 -        /*
  2.1213 -         * Handle 4K pages here
  2.1214 -         */
  2.1215 -
  2.1216 -        /* Write fault? */
  2.1217 -        if ( regs->error_code & 2 ) {
  2.1218 -            if ( !l1pte_write_fault(v, &gl1e, &sl1e, va) ) {
  2.1219 +    if ( unlikely((guest_l2e_get_flags(gl2e) & _PAGE_PSE)) ) 
  2.1220 +        goto pse;
  2.1221 +
  2.1222 +    /*
  2.1223 +     * Handle 4K pages here
  2.1224 +     */
  2.1225 +check_writeable:
  2.1226 +    orig_gl1e = gl1e;
  2.1227 +    
  2.1228 +    /* Write fault? */
  2.1229 +    if ( regs->error_code & 2 ) 
  2.1230 +    {
  2.1231 +        int allow_writes = 0;
  2.1232 +
  2.1233 +        if ( unlikely(!(guest_l1e_get_flags(gl1e) & _PAGE_RW)) )
  2.1234 +        {
  2.1235 +            if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gl1e)) )
  2.1236 +            {
  2.1237 +                allow_writes = 1;
  2.1238 +                l1e_add_flags(gl1e, _PAGE_RW);
  2.1239 +            }
  2.1240 +            else
  2.1241 +            {
  2.1242 +                /* Write fault on a read-only mapping. */
  2.1243 +                SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")", 
  2.1244 +                         l1e_get_intpte(gl1e));
  2.1245 +                perfc_incrc(shadow_fault_bail_ro_mapping);
  2.1246                  goto fail;
  2.1247              }
  2.1248 -        } else {
  2.1249 -            l1pte_read_fault(d, &gl1e, &sl1e);
  2.1250 +        }
  2.1251 +
  2.1252 +        if ( !l1pte_write_fault(v, &gl1e, &sl1e, va) ) 
  2.1253 +        {
  2.1254 +            SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
  2.1255 +            perfc_incrc(write_fault_bail);
  2.1256 +            shadow_unlock(d);
  2.1257 +            return 0;
  2.1258          }
  2.1259 -        /*
  2.1260 -         * STEP 3. Write guest/shadow l2e back
  2.1261 -         */
  2.1262 -        if (unlikely(!__guest_set_l1e(v, va, &gl1e))) {
  2.1263 + 
  2.1264 +        if (allow_writes)
  2.1265 +            l1e_remove_flags(gl1e, _PAGE_RW);
  2.1266 +    }
  2.1267 +    else 
  2.1268 +    {
  2.1269 +        if ( !l1pte_read_fault(d, &gl1e, &sl1e) )
  2.1270 +        {
  2.1271 +            SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
  2.1272 +            perfc_incrc(read_fault_bail);
  2.1273 +            shadow_unlock(d);
  2.1274 +            return 0;
  2.1275 +        }
  2.1276 +    }
  2.1277 +
  2.1278 +    /*
  2.1279 +     * STEP 3. Write the modified shadow PTE and guest PTE back to the tables
  2.1280 +     */
  2.1281 +    if ( l1e_has_changed(orig_gl1e, gl1e, PAGE_FLAG_MASK) )
  2.1282 +    {
  2.1283 +        if (unlikely(!__guest_set_l1e(v, va, &gl1e))) 
  2.1284              domain_crash_synchronous();
  2.1285 -        }
  2.1286 -
  2.1287 -        ESH_LOG("gl1e: %lx, sl1e: %lx\n", l1e_get_intpte(gl1e), l1e_get_intpte(sl1e));
  2.1288 -        shadow_set_l1e_64(va, (pgentry_64_t *)&sl1e, 1);
  2.1289 -        /*
  2.1290 -         *  if necessary, record the page table page as dirty
  2.1291 -         */
  2.1292 -         if ( unlikely(shadow_mode_log_dirty(d)) )
  2.1293 +
  2.1294 +        // if necessary, record the page table page as dirty
  2.1295 +        if ( unlikely(shadow_mode_log_dirty(d)) )
  2.1296              __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gl2e)));
  2.1297 -
  2.1298 -    } else {
  2.1299 -        /*
  2.1300 -         * Handle 2M pages here
  2.1301 -         */
  2.1302 -        /* Write fault? */
  2.1303 -        if ( regs->error_code & 2 ) {
  2.1304 -            if ( !l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, WRITE_FAULT) ) {
  2.1305 -                goto fail;
  2.1306 -            }
  2.1307 -        } else {
  2.1308 -            l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, READ_FAULT);
  2.1309 +    }
  2.1310 +
  2.1311 +    shadow_set_l1e_64(va, (pgentry_64_t *)&sl1e, 1);
  2.1312 +
  2.1313 +    perfc_incrc(shadow_fault_fixed);
  2.1314 +    d->arch.shadow_fault_count++;
  2.1315 +
  2.1316 +    shadow_unlock(d);
  2.1317 +
  2.1318 +    return EXCRET_fault_fixed;
  2.1319 +
  2.1320 +pse:
  2.1321 +    /*
  2.1322 +     * Handle 2M pages here
  2.1323 +     */
  2.1324 +    if ( unlikely(!shadow_mode_external(d)) )
  2.1325 +        BUG();
  2.1326 +
  2.1327 +    /* Write fault? */
  2.1328 +    if ( regs->error_code & 2 ) 
  2.1329 +    {
  2.1330 +        if ( !l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, WRITE_FAULT) ) 
  2.1331 +        {
  2.1332 +            goto fail;
  2.1333          }
  2.1334 -
  2.1335 -        /*
  2.1336 -         * STEP 3. Write guest/shadow l2e back
  2.1337 -         */
  2.1338 -
  2.1339 -        if ( unlikely(!__guest_set_l2e(v, va, &gl2e)) ) {
  2.1340 -            domain_crash_synchronous();
  2.1341 -        }
  2.1342 -
  2.1343 -        /*
  2.1344 -         * Todo: if necessary, record the page table page as dirty
  2.1345 -         */
  2.1346 -
  2.1347 -
  2.1348 +    } 
  2.1349 +    else 
  2.1350 +    {
  2.1351 +        l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, READ_FAULT);
  2.1352      }
  2.1353  
  2.1354 +    /*
  2.1355 +     * STEP 3. Write guest/shadow l2e back
  2.1356 +     */
  2.1357 +
  2.1358 +    if ( unlikely(!__guest_set_l2e(v, va, &gl2e)) ) 
  2.1359 +    {
  2.1360 +        domain_crash_synchronous();
  2.1361 +    }
  2.1362 +
  2.1363 +    /*
  2.1364 +     * Todo: if necessary, record the page table page as dirty
  2.1365 +     */
  2.1366 +
  2.1367      perfc_incrc(shadow_fault_fixed);
  2.1368      d->arch.shadow_fault_count++;
  2.1369  
  2.1370 @@ -3257,6 +3519,7 @@ static void shadow_invlpg_64(struct vcpu
  2.1371      shadow_unlock(d);
  2.1372  }
  2.1373  
  2.1374 +#if CONFIG_PAGING_LEVELS == 4
  2.1375  static unsigned long gva_to_gpa_64(unsigned long gva)
  2.1376  {
  2.1377      struct vcpu *v = current;
  2.1378 @@ -3273,13 +3536,11 @@ static unsigned long gva_to_gpa_64(unsig
  2.1379          gpa = guest_l1e_get_paddr(gl1e) + (gva & ~PAGE_MASK);
  2.1380  
  2.1381      return gpa;
  2.1382 -
  2.1383  }
  2.1384  
  2.1385  #ifndef GUEST_PGENTRY_32
  2.1386 -
  2.1387  struct shadow_ops MODE_F_HANDLER = {
  2.1388 -    .guest_paging_levels              = 4,
  2.1389 +    .guest_paging_levels        = 4,
  2.1390      .invlpg                     = shadow_invlpg_64,
  2.1391      .fault                      = shadow_fault_64,
  2.1392      .update_pagetables          = shadow_update_pagetables,
  2.1393 @@ -3290,9 +3551,11 @@ struct shadow_ops MODE_F_HANDLER = {
  2.1394      .is_out_of_sync             = is_out_of_sync,
  2.1395      .gva_to_gpa                 = gva_to_gpa_64,
  2.1396  };
  2.1397 -#endif
  2.1398 -
  2.1399 -#endif
  2.1400 +#endif /* GUEST_PGENTRY_32 */
  2.1401 +#endif /* CONFIG_PAGING_LEVELS == 4 */
  2.1402 +
  2.1403 +#endif /* CONFIG_PAGING_LEVELS >= 3 */
  2.1404 +
  2.1405  
  2.1406  #if CONFIG_PAGING_LEVELS == 2
  2.1407  struct shadow_ops MODE_A_HANDLER = {
  2.1408 @@ -3309,10 +3572,11 @@ struct shadow_ops MODE_A_HANDLER = {
  2.1409  };
  2.1410  
  2.1411  #elif CONFIG_PAGING_LEVELS == 3
  2.1412 +
  2.1413  struct shadow_ops MODE_B_HANDLER = {
  2.1414 -    .guest_paging_levels              = 3,
  2.1415 -    .invlpg                     = shadow_invlpg_32,
  2.1416 -    .fault                      = shadow_fault_32,
  2.1417 +    .guest_paging_levels        = 3,
  2.1418 +    .invlpg                     = shadow_invlpg_64,
  2.1419 +    .fault                      = shadow_fault_64,
  2.1420      .update_pagetables          = shadow_update_pagetables,
  2.1421      .sync_all                   = sync_all,
  2.1422      .remove_all_write_access    = remove_all_write_access,
     3.1 --- a/xen/arch/x86/shadow32.c	Fri Nov 11 21:59:05 2005 +0100
     3.2 +++ b/xen/arch/x86/shadow32.c	Fri Nov 11 21:59:33 2005 +0100
     3.3 @@ -31,6 +31,8 @@
     3.4  #include <xen/trace.h>
     3.5  
     3.6  #define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
     3.7 +#define va_to_l1mfn(_ed, _va) \
     3.8 +    (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
     3.9  
    3.10  static void shadow_free_snapshot(struct domain *d,
    3.11                                   struct out_of_sync_entry *entry);
     4.1 --- a/xen/arch/x86/shadow_public.c	Fri Nov 11 21:59:05 2005 +0100
     4.2 +++ b/xen/arch/x86/shadow_public.c	Fri Nov 11 21:59:33 2005 +0100
     4.3 @@ -64,6 +64,9 @@ int shadow_set_guest_paging_levels(struc
     4.4  #if CONFIG_PAGING_LEVELS == 2
     4.5          if ( d->arch.ops != &MODE_A_HANDLER )
     4.6              d->arch.ops = &MODE_A_HANDLER;
     4.7 +#elif CONFIG_PAGING_LEVELS == 3
     4.8 +        if ( d->arch.ops != &MODE_B_HANDLER )
     4.9 +            d->arch.ops = &MODE_B_HANDLER;
    4.10  #elif CONFIG_PAGING_LEVELS == 4
    4.11          if ( d->arch.ops != &MODE_D_HANDLER )
    4.12              d->arch.ops = &MODE_D_HANDLER;
    4.13 @@ -138,7 +141,92 @@ unsigned long gva_to_gpa(unsigned long g
    4.14  }
    4.15  /****************************************************************************/
    4.16  /****************************************************************************/
    4.17 -#if CONFIG_PAGING_LEVELS >= 4
    4.18 +#if CONFIG_PAGING_LEVELS >= 3
    4.19 +
    4.20 +static void inline
    4.21 +free_shadow_fl1_table(struct domain *d, unsigned long smfn)
    4.22 +{
    4.23 +    l1_pgentry_t *pl1e = map_domain_page(smfn);
    4.24 +    int i;
    4.25 +
    4.26 +    for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
    4.27 +        put_page_from_l1e(pl1e[i], d);
    4.28 +}
    4.29 +
    4.30 +/*
    4.31 + * Free l2, l3, l4 shadow tables
    4.32 + */
    4.33 +
    4.34 +void free_fake_shadow_l2(struct domain *d,unsigned long smfn);
    4.35 +
    4.36 +static void inline
    4.37 +free_shadow_tables(struct domain *d, unsigned long smfn, u32 level)
    4.38 +{
    4.39 +    pgentry_64_t *ple = map_domain_page(smfn);
    4.40 +    int i, external = shadow_mode_external(d);
    4.41 +
    4.42 +#if CONFIG_PAGING_LEVELS >=3
    4.43 +    if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
    4.44 +    {
    4.45 +        struct pfn_info *page = &frame_table[smfn];
    4.46 +        for ( i = 0; i < PDP_ENTRIES; i++ )
    4.47 +        {
    4.48 +            if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
    4.49 +                free_fake_shadow_l2(d,entry_get_pfn(ple[i]));
    4.50 +        }
    4.51 +
    4.52 +        page = &frame_table[entry_get_pfn(ple[0])];
    4.53 +        free_domheap_pages(page, SL2_ORDER);
    4.54 +        unmap_domain_page(ple);
    4.55 +    }
    4.56 +    else
    4.57 +#endif
    4.58 +    {
    4.59 +        /*
    4.60 +         * No Xen mappings in external pages
    4.61 +         */
    4.62 +        if ( external )
    4.63 +        {
    4.64 +            for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
    4.65 +                if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
    4.66 +                    put_shadow_ref(entry_get_pfn(ple[i]));
    4.67 +        } 
    4.68 +        else
    4.69 +        {
    4.70 +            for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
    4.71 +            {
    4.72 +                /* 
    4.73 +                 * List the skip/break conditions to avoid freeing
    4.74 +                 * Xen private mappings.
    4.75 +                 */
    4.76 +#if CONFIG_PAGING_LEVELS == 2
    4.77 +                if ( level == PAGING_L2 && !is_guest_l2_slot(0, i) )
    4.78 +                    continue;
    4.79 +#endif
    4.80 +#if CONFIG_PAGING_LEVELS == 3
    4.81 +                if ( level == PAGING_L3 && i == L3_PAGETABLE_ENTRIES )
    4.82 +                    break;
    4.83 +                if ( level == PAGING_L2 )
    4.84 +                {
    4.85 +                    struct pfn_info *page = &frame_table[smfn]; 
    4.86 +                    if ( is_xen_l2_slot(page->u.inuse.type_info, i) )
    4.87 +                        continue;
    4.88 +                }
    4.89 +#endif
    4.90 +#if CONFIG_PAGING_LEVELS == 4
    4.91 +                if ( level == PAGING_L4 && !is_guest_l4_slot(i))
    4.92 +                    continue;
    4.93 +#endif
    4.94 +                if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
    4.95 +                    put_shadow_ref(entry_get_pfn(ple[i]));
    4.96 +            }
    4.97 +        }
    4.98 +        unmap_domain_page(ple);
    4.99 +    }
   4.100 +}
   4.101 +#endif
   4.102 +
   4.103 +#if CONFIG_PAGING_LEVELS == 4
   4.104  /*
   4.105   * Convert PAE 3-level page-table to 4-level page-table
   4.106   */
   4.107 @@ -203,55 +291,6 @@ static void alloc_monitor_pagetable(stru
   4.108      v->arch.monitor_vtable = (l2_pgentry_t *) mpl4e;
   4.109  }
   4.110  
   4.111 -static void inline
   4.112 -free_shadow_fl1_table(struct domain *d, unsigned long smfn)
   4.113 -{
   4.114 -    l1_pgentry_t *pl1e = map_domain_page(smfn);
   4.115 -    int i;
   4.116 -
   4.117 -    for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
   4.118 -        put_page_from_l1e(pl1e[i], d);
   4.119 -}
   4.120 -
   4.121 -/*
   4.122 - * Free l2, l3, l4 shadow tables
   4.123 - */
   4.124 -
   4.125 -void free_fake_shadow_l2(struct domain *d,unsigned long smfn);
   4.126 -
   4.127 -static void inline
   4.128 -free_shadow_tables(struct domain *d, unsigned long smfn, u32 level)
   4.129 -{
   4.130 -    pgentry_64_t *ple = map_domain_page(smfn);
   4.131 -    int i, external = shadow_mode_external(d);
   4.132 -    struct pfn_info *page = &frame_table[smfn];
   4.133 -
   4.134 -    if (d->arch.ops->guest_paging_levels == PAGING_L2)
   4.135 -    {
   4.136 -#if CONFIG_PAGING_LEVELS >=4
   4.137 -        for ( i = 0; i < PDP_ENTRIES; i++ )
   4.138 -        {
   4.139 -            if (entry_get_flags(ple[i]) & _PAGE_PRESENT )
   4.140 -                free_fake_shadow_l2(d,entry_get_pfn(ple[i]));
   4.141 -        }
   4.142 -   
   4.143 -        page = &frame_table[entry_get_pfn(ple[0])];
   4.144 -        free_domheap_pages(page, SL2_ORDER);
   4.145 -        unmap_domain_page(ple);
   4.146 -#endif
   4.147 -    }
   4.148 -    else
   4.149 -    {
   4.150 -        for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
   4.151 -            if ( external || is_guest_l4_slot(i) )
   4.152 -                if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
   4.153 -                    put_shadow_ref(entry_get_pfn(ple[i]));
   4.154 -
   4.155 -        unmap_domain_page(ple);
   4.156 -    }
   4.157 -}
   4.158 -
   4.159 -
   4.160  void free_monitor_pagetable(struct vcpu *v)
   4.161  {
   4.162      unsigned long mfn;
   4.163 @@ -299,11 +338,9 @@ static void alloc_monitor_pagetable(stru
   4.164      mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
   4.165      memset(mpl2e, 0, PAGE_SIZE);
   4.166  
   4.167 -#ifdef __i386__ /* XXX screws x86/64 build */
   4.168      memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
   4.169             &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
   4.170             HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
   4.171 -#endif
   4.172  
   4.173      mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
   4.174          l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
   4.175 @@ -333,7 +370,7 @@ void free_monitor_pagetable(struct vcpu 
   4.176      unsigned long mfn;
   4.177  
   4.178      ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
   4.179 -    
   4.180 +
   4.181      mpl2e = v->arch.monitor_vtable;
   4.182  
   4.183      /*
   4.184 @@ -517,13 +554,11 @@ free_shadow_hl2_table(struct domain *d, 
   4.185  
   4.186      SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
   4.187  
   4.188 -#ifdef __i386__
   4.189 +#if CONFIG_PAGING_LEVELS == 2
   4.190      if ( shadow_mode_external(d) )
   4.191          limit = L2_PAGETABLE_ENTRIES;
   4.192      else
   4.193          limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
   4.194 -#else
   4.195 -    limit = 0; /* XXX x86/64 XXX */
   4.196  #endif
   4.197  
   4.198      for ( i = 0; i < limit; i++ )
   4.199 @@ -584,10 +619,11 @@ void free_shadow_page(unsigned long smfn
   4.200  
   4.201      ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
   4.202  #if CONFIG_PAGING_LEVELS >=4
   4.203 -    if (type == PGT_fl1_shadow) {
   4.204 +    if ( type == PGT_fl1_shadow ) 
   4.205 +    {
   4.206          unsigned long mfn;
   4.207          mfn = __shadow_status(d, gpfn, PGT_fl1_shadow);
   4.208 -        if (!mfn)
   4.209 +        if ( !mfn )
   4.210              gpfn |= (1UL << 63);
   4.211      }
   4.212  #endif
   4.213 @@ -602,7 +638,7 @@ void free_shadow_page(unsigned long smfn
   4.214          free_shadow_l1_table(d, smfn);
   4.215          d->arch.shadow_page_count--;
   4.216          break;
   4.217 -#if defined (__i386__)
   4.218 +#if CONFIG_PAGING_LEVELS == 2
   4.219      case PGT_l2_shadow:
   4.220          perfc_decr(shadow_l2_pages);
   4.221          shadow_demote(d, gpfn, gmfn);
   4.222 @@ -616,7 +652,8 @@ void free_shadow_page(unsigned long smfn
   4.223          free_shadow_hl2_table(d, smfn);
   4.224          d->arch.hl2_page_count--;
   4.225          break;
   4.226 -#else
   4.227 +#endif
   4.228 +#if CONFIG_PAGING_LEVELS >= 3
   4.229      case PGT_l2_shadow:
   4.230      case PGT_l3_shadow:
   4.231      case PGT_l4_shadow:
   4.232 @@ -630,7 +667,6 @@ void free_shadow_page(unsigned long smfn
   4.233          d->arch.shadow_page_count--;
   4.234          break;
   4.235  #endif
   4.236 -
   4.237      case PGT_snapshot:
   4.238          perfc_decr(apshot_pages);
   4.239          break;
   4.240 @@ -782,7 +818,7 @@ void free_shadow_pages(struct domain *d)
   4.241          }
   4.242      }
   4.243  
   4.244 -#if defined (__i386__)
   4.245 +#if CONFIG_PAGING_LEVELS == 2
   4.246      // For external shadows, remove the monitor table's refs
   4.247      //
   4.248      if ( shadow_mode_external(d) )
   4.249 @@ -928,7 +964,7 @@ int __shadow_mode_enable(struct domain *
   4.250      ASSERT(!(d->arch.shadow_mode & ~mode));
   4.251  
   4.252  #if defined(CONFIG_PAGING_LEVELS)
   4.253 -    if(!shadow_set_guest_paging_levels(d, 
   4.254 +    if(!shadow_set_guest_paging_levels(d,
   4.255                                         CONFIG_PAGING_LEVELS)) {
   4.256          printk("Unsupported guest paging levels\n");
   4.257          domain_crash_synchronous(); /* need to take a clean path */
   4.258 @@ -968,7 +1004,7 @@ int __shadow_mode_enable(struct domain *
   4.259          else
   4.260              v->arch.shadow_vtable = NULL;
   4.261          
   4.262 -#if defined (__i386__)
   4.263 +#if CONFIG_PAGING_LEVELS == 2
   4.264          /*
   4.265           * arch.hl2_vtable
   4.266           */
   4.267 @@ -1408,7 +1444,7 @@ void shadow_l1_normal_pt_update(
   4.268      sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
   4.269      if ( sl1mfn )
   4.270      {
   4.271 -        SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte,
   4.272 +        SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpde=%" PRIpte,
   4.273                   (void *)pa, l1e_get_intpte(gpte));
   4.274          l1pte_propagate_from_guest(current->domain, gpte, &spte);
   4.275  
   4.276 @@ -1447,7 +1483,7 @@ void shadow_l2_normal_pt_update(
   4.277  #if CONFIG_PAGING_LEVELS >= 3
   4.278  void shadow_l3_normal_pt_update(
   4.279      struct domain *d,
   4.280 -    unsigned long pa, l3_pgentry_t gpde,
   4.281 +    unsigned long pa, l3_pgentry_t l3e,
   4.282      struct domain_mmap_cache *cache)
   4.283  {
   4.284      unsigned long sl3mfn;
   4.285 @@ -1458,11 +1494,10 @@ void shadow_l3_normal_pt_update(
   4.286      sl3mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l3_shadow);
   4.287      if ( sl3mfn )
   4.288      {
   4.289 -        SH_VVLOG("shadow_l3_normal_pt_update pa=%p, gpde=%" PRIpte,
   4.290 -                 (void *)pa, l3e_get_intpte(gpde));
   4.291 -
   4.292 +        SH_VVLOG("shadow_l3_normal_pt_update pa=%p, l3e=%" PRIpte,
   4.293 +                 (void *)pa, l3e_get_intpte(l3e));
   4.294          spl3e = (pgentry_64_t *) map_domain_page_with_cache(sl3mfn, cache);
   4.295 -        validate_entry_change(d, (pgentry_64_t *) &gpde,
   4.296 +        validate_entry_change(d, (pgentry_64_t *) &l3e,
   4.297                                &spl3e[(pa & ~PAGE_MASK) / sizeof(l3_pgentry_t)], 
   4.298                                shadow_type_to_level(PGT_l3_shadow));
   4.299          unmap_domain_page_with_cache(spl3e, cache);
   4.300 @@ -1475,7 +1510,7 @@ void shadow_l3_normal_pt_update(
   4.301  #if CONFIG_PAGING_LEVELS >= 4
   4.302  void shadow_l4_normal_pt_update(
   4.303      struct domain *d,
   4.304 -    unsigned long pa, l4_pgentry_t gpde,
   4.305 +    unsigned long pa, l4_pgentry_t l4e,
   4.306      struct domain_mmap_cache *cache)
   4.307  {
   4.308      unsigned long sl4mfn;
   4.309 @@ -1486,11 +1521,10 @@ void shadow_l4_normal_pt_update(
   4.310      sl4mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l4_shadow);
   4.311      if ( sl4mfn )
   4.312      {
   4.313 -        SH_VVLOG("shadow_l4_normal_pt_update pa=%p, gpde=%" PRIpte,
   4.314 -                 (void *)pa, l4e_get_intpte(gpde));
   4.315 -
   4.316 +        SH_VVLOG("shadow_l4_normal_pt_update pa=%p, l4e=%" PRIpte,
   4.317 +                 (void *)pa, l4e_get_intpte(l4e));
   4.318          spl4e = (pgentry_64_t *)map_domain_page_with_cache(sl4mfn, cache);
   4.319 -        validate_entry_change(d, (pgentry_64_t *)&gpde,
   4.320 +        validate_entry_change(d, (pgentry_64_t *)&l4e,
   4.321                                &spl4e[(pa & ~PAGE_MASK) / sizeof(l4_pgentry_t)], 
   4.322                                shadow_type_to_level(PGT_l4_shadow));
   4.323          unmap_domain_page_with_cache(spl4e, cache);
   4.324 @@ -1555,8 +1589,6 @@ remove_shadow(struct domain *d, unsigned
   4.325  {
   4.326      unsigned long smfn;
   4.327  
   4.328 -    //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
   4.329 -
   4.330      shadow_lock(d);
   4.331  
   4.332      while ( stype >= PGT_l1_shadow )
     5.1 --- a/xen/arch/x86/x86_32/traps.c	Fri Nov 11 21:59:05 2005 +0100
     5.2 +++ b/xen/arch/x86/x86_32/traps.c	Fri Nov 11 21:59:33 2005 +0100
     5.3 @@ -84,32 +84,37 @@ void show_registers(struct cpu_user_regs
     5.4  
     5.5  void show_page_walk(unsigned long addr)
     5.6  {
     5.7 -    unsigned long pfn = read_cr3() >> PAGE_SHIFT;
     5.8 +    unsigned long mfn = read_cr3() >> PAGE_SHIFT;
     5.9      intpte_t *ptab, ent;
    5.10 +    unsigned long pfn; 
    5.11  
    5.12      printk("Pagetable walk from %08lx:\n", addr);
    5.13  
    5.14  #ifdef CONFIG_X86_PAE
    5.15 -    ptab = map_domain_page(pfn);
    5.16 -    ent = ptab[l3_table_offset(addr)];
    5.17 -    printk(" L3 = %"PRIpte"\n", ent);
    5.18 +    ptab = map_domain_page(mfn);
    5.19 +    ent  = ptab[l3_table_offset(addr)];
    5.20 +    pfn  = machine_to_phys_mapping[(u32)(ent >> PAGE_SHIFT)]; 
    5.21 +    printk(" L3 = %"PRIpte" %08lx\n", ent, pfn);
    5.22      unmap_domain_page(ptab);
    5.23      if ( !(ent & _PAGE_PRESENT) )
    5.24          return;
    5.25 -    pfn = ent >> PAGE_SHIFT;
    5.26 +    mfn = ent >> PAGE_SHIFT;
    5.27  #endif
    5.28  
    5.29 -    ptab = map_domain_page(pfn);
    5.30 -    ent = ptab[l2_table_offset(addr)];
    5.31 -    printk("  L2 = %"PRIpte" %s\n", ent, (ent & _PAGE_PSE) ? "(PSE)" : "");
    5.32 +    ptab = map_domain_page(mfn);
    5.33 +    ent  = ptab[l2_table_offset(addr)];
    5.34 +    pfn  = machine_to_phys_mapping[(u32)(ent >> PAGE_SHIFT)]; 
    5.35 +    printk("  L2 = %"PRIpte" %08lx %s\n", ent, pfn, 
    5.36 +           (ent & _PAGE_PSE) ? "(PSE)" : "");
    5.37      unmap_domain_page(ptab);
    5.38      if ( !(ent & _PAGE_PRESENT) || (ent & _PAGE_PSE) )
    5.39          return;
    5.40 -    pfn = ent >> PAGE_SHIFT;
    5.41 +    mfn = ent >> PAGE_SHIFT;
    5.42  
    5.43      ptab = map_domain_page(ent >> PAGE_SHIFT);
    5.44 -    ent = ptab[l2_table_offset(addr)];
    5.45 -    printk("   L1 = %"PRIpte"\n", ent);
    5.46 +    ent  = ptab[l1_table_offset(addr)];
    5.47 +    pfn  = machine_to_phys_mapping[(u32)(ent >> PAGE_SHIFT)]; 
    5.48 +    printk("   L1 = %"PRIpte" %08lx\n", ent, pfn);
    5.49      unmap_domain_page(ptab);
    5.50  }
    5.51  
     6.1 --- a/xen/include/asm-x86/page.h	Fri Nov 11 21:59:05 2005 +0100
     6.2 +++ b/xen/include/asm-x86/page.h	Fri Nov 11 21:59:33 2005 +0100
     6.3 @@ -232,9 +232,6 @@ typedef struct { u64 pfn; } pagetable_t;
     6.4  #define linear_l3_table(_ed) ((_ed)->arch.guest_vl3table)
     6.5  #define linear_l4_table(_ed) ((_ed)->arch.guest_vl4table)
     6.6  
     6.7 -#define va_to_l1mfn(_ed, _va) \
     6.8 -    (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
     6.9 -
    6.10  #ifndef __ASSEMBLY__
    6.11  #if CONFIG_PAGING_LEVELS == 3
    6.12  extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
     7.1 --- a/xen/include/asm-x86/shadow.h	Fri Nov 11 21:59:05 2005 +0100
     7.2 +++ b/xen/include/asm-x86/shadow.h	Fri Nov 11 21:59:33 2005 +0100
     7.3 @@ -138,6 +138,14 @@ extern void shadow_l2_normal_pt_update(s
     7.4                                         struct domain_mmap_cache *cache);
     7.5  #if CONFIG_PAGING_LEVELS >= 3
     7.6  #include <asm/page-guest32.h>
     7.7 +/*
     7.8 + * va_mask cannot be used because it's used by the shadow hash.
     7.9 + * Use the score area for for now.
    7.10 + */
    7.11 +#define is_xen_l2_slot(t,s)                                                    \
    7.12 +    ( ((((t) & PGT_score_mask) >> PGT_score_shift) == 3) &&                    \
    7.13 +      ((s) >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) )
    7.14 +
    7.15  extern unsigned long gva_to_gpa(unsigned long gva);
    7.16  extern void shadow_l3_normal_pt_update(struct domain *d,
    7.17                                         unsigned long pa, l3_pgentry_t l3e,
    7.18 @@ -458,7 +466,7 @@ static inline void shadow_put_page(struc
    7.19  
    7.20  /************************************************************************/
    7.21  
    7.22 -static inline int __mark_dirty(struct domain *d, unsigned int mfn)
    7.23 +static inline int __mark_dirty(struct domain *d, unsigned long mfn)
    7.24  {
    7.25      unsigned long pfn;
    7.26      int           rc = 0;
    7.27 @@ -906,7 +914,7 @@ static inline void l2pde_general(
    7.28          guest_l2e_add_flags(gpde, _PAGE_ACCESSED);
    7.29  
    7.30          *gpde_p = gpde;
    7.31 -    }
    7.32 +    } 
    7.33  
    7.34      if ( l2e_get_intpte(spde) || l2e_get_intpte(gpde) )
    7.35          SH_VVLOG("%s: gpde=%" PRIpte ", new spde=%" PRIpte, __func__,
    7.36 @@ -1355,7 +1363,7 @@ static inline void put_shadow_status(str
    7.37  }
    7.38  
    7.39  
    7.40 -static inline void delete_shadow_status( 
    7.41 +static inline void delete_shadow_status(
    7.42      struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype)
    7.43  {
    7.44      struct shadow_status *p, *x, *n, *head;
    7.45 @@ -1454,7 +1462,7 @@ static inline void set_shadow_status(
    7.46      ASSERT(stype && !(stype & ~PGT_type_mask));
    7.47  
    7.48      x = head = hash_bucket(d, gpfn);
    7.49 -   
    7.50 +
    7.51      SH_VLOG("set gpfn=%lx smfn=%lx t=%lx bucket=%p(%p)",
    7.52               gpfn, smfn, stype, x, x->next);
    7.53      shadow_audit(d, 0);
    7.54 @@ -1584,7 +1592,7 @@ shadow_set_l1e(unsigned long va, l1_pgen
    7.55  {
    7.56      struct vcpu *v = current;
    7.57      struct domain *d = v->domain;
    7.58 -    l2_pgentry_t sl2e;
    7.59 +    l2_pgentry_t sl2e = {0};
    7.60  
    7.61      __shadow_get_l2e(v, va, &sl2e);
    7.62      if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
    7.63 @@ -1731,7 +1739,7 @@ static inline void update_pagetables(str
    7.64  #ifdef CONFIG_VMX
    7.65      if ( VMX_DOMAIN(v) )
    7.66          paging_enabled = vmx_paging_enabled(v);
    7.67 -            
    7.68 +
    7.69      else
    7.70  #endif
    7.71          // HACK ALERT: there's currently no easy way to figure out if a domU
    7.72 @@ -1757,7 +1765,7 @@ static inline void update_pagetables(str
    7.73          if ( shadow_mode_enabled(d) )
    7.74              v->arch.monitor_table = v->arch.shadow_table;
    7.75          else
    7.76 -#ifdef __x86_64__
    7.77 +#if CONFIG_PAGING_LEVELS == 4
    7.78          if ( !(v->arch.flags & TF_kernel_mode) )
    7.79              v->arch.monitor_table = v->arch.guest_table_user;
    7.80          else
     8.1 --- a/xen/include/asm-x86/shadow_64.h	Fri Nov 11 21:59:05 2005 +0100
     8.2 +++ b/xen/include/asm-x86/shadow_64.h	Fri Nov 11 21:59:33 2005 +0100
     8.3 @@ -29,6 +29,15 @@
     8.4  #include <asm/shadow.h>
     8.5  #include <asm/shadow_ops.h>
     8.6  
     8.7 +extern struct shadow_ops MODE_B_HANDLER;
     8.8 +
     8.9 +#if CONFIG_PAGING_LEVELS == 3
    8.10 +#define L4_PAGETABLE_SHIFT      39
    8.11 +#define L4_PAGETABLE_ENTRIES    (1<<PAGETABLE_ORDER)
    8.12 +typedef struct { intpte_t l4; } l4_pgentry_t;
    8.13 +#define is_guest_l4_slot(_s) (1)
    8.14 +#endif
    8.15 +
    8.16  #define READ_FAULT  0
    8.17  #define WRITE_FAULT 1
    8.18  
    8.19 @@ -94,6 +103,11 @@ static inline int  table_offset_64(unsig
    8.20              return  (((va) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1));
    8.21          case 3:
    8.22              return  (((va) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1));
    8.23 +#if CONFIG_PAGING_LEVELS == 3
    8.24 +        case 4:
    8.25 +            return PAE_SHADOW_SELF_ENTRY;
    8.26 +#endif
    8.27 +
    8.28  #if CONFIG_PAGING_LEVELS >= 4
    8.29  #ifndef GUEST_PGENTRY_32
    8.30          case 4:
    8.31 @@ -127,57 +141,73 @@ static inline void free_out_of_sync_stat
    8.32      }
    8.33  }
    8.34  
    8.35 -static inline pgentry_64_t *__entry(
    8.36 -    struct vcpu *v, u64 va, u32 flag)
    8.37 +static inline int __entry(
    8.38 +    struct vcpu *v, u64 va, pgentry_64_t *e_p, u32 flag)
    8.39  {
    8.40      int i;
    8.41      pgentry_64_t *le_e;
    8.42 -    pgentry_64_t *le_p;
    8.43 +    pgentry_64_t *le_p = NULL;
    8.44      unsigned long mfn;
    8.45      int index;
    8.46      u32 level = flag & L_MASK;
    8.47      struct domain *d = v->domain;
    8.48 +    int root_level;
    8.49  
    8.50 -    index = table_offset_64(va, ROOT_LEVEL_64);
    8.51 -    if (flag & SHADOW_ENTRY)
    8.52 +    if ( flag & SHADOW_ENTRY )
    8.53 +    {
    8.54 +	root_level =  ROOT_LEVEL_64;
    8.55 +	index = table_offset_64(va, root_level);
    8.56          le_e = (pgentry_64_t *)&v->arch.shadow_vtable[index];
    8.57 -    else
    8.58 +    }
    8.59 +    else /* guest entry */  
    8.60 +    {
    8.61 +        root_level = v->domain->arch.ops->guest_paging_levels;
    8.62 +	index = table_offset_64(va, root_level);
    8.63          le_e = (pgentry_64_t *)&v->arch.guest_vtable[index];
    8.64 -
    8.65 +    }
    8.66      /*
    8.67       * If it's not external mode, then mfn should be machine physical.
    8.68       */
    8.69 -    for (i = ROOT_LEVEL_64 - level; i > 0; i--) {
    8.70 -        if (unlikely(!(entry_get_flags(*le_e) & _PAGE_PRESENT)))
    8.71 -            return NULL;
    8.72 -        mfn = entry_get_value(*le_e) >> PAGE_SHIFT;
    8.73 -        if ((flag & GUEST_ENTRY) && shadow_mode_translate(d))
    8.74 +    for (i = root_level - level; i > 0; i--) {
    8.75 +        if ( unlikely(!(entry_get_flags(*le_e) & _PAGE_PRESENT)) ) {
    8.76 +            if ( le_p )
    8.77 +                unmap_domain_page(le_p);
    8.78 +            return 0;
    8.79 +        }
    8.80 +        mfn = entry_get_pfn(*le_e);
    8.81 +        if ( (flag & GUEST_ENTRY) && shadow_mode_translate(d) )
    8.82              mfn = get_mfn_from_pfn(mfn);
    8.83 -        le_p = (pgentry_64_t *)phys_to_virt(mfn << PAGE_SHIFT);
    8.84 +        if ( le_p )
    8.85 +            unmap_domain_page(le_p);
    8.86 +        le_p = (pgentry_64_t *)map_domain_page(mfn);
    8.87          index = table_offset_64(va, (level + i - 1));
    8.88          le_e = &le_p[index];
    8.89 +    }
    8.90  
    8.91 -    }
    8.92 -    return le_e;
    8.93 +    if ( flag & SET_ENTRY )
    8.94 +        *le_e = *e_p;
    8.95 +    else
    8.96 +        *e_p = *le_e;
    8.97 +
    8.98 +    if ( le_p )
    8.99 +        unmap_domain_page(le_p);
   8.100 +
   8.101 +    return 1;
   8.102  
   8.103  }
   8.104  
   8.105 -static inline pgentry_64_t *__rw_entry(
   8.106 -    struct vcpu *ed, u64 va, void *e_p, u32 flag)
   8.107 +static inline int __rw_entry(
   8.108 +    struct vcpu *v, u64 va, void *e_p, u32 flag)
   8.109  {
   8.110 -    pgentry_64_t *le_e = __entry(ed, va, flag);
   8.111      pgentry_64_t *e = (pgentry_64_t *)e_p;
   8.112 -    if (le_e == NULL)
   8.113 -        return NULL;
   8.114  
   8.115      if (e) {
   8.116 -        if (flag & SET_ENTRY)
   8.117 -            *le_e = *e;
   8.118 -        else
   8.119 -            *e = *le_e;
   8.120 +        return __entry(v, va, e, flag);
   8.121      }
   8.122 -    return le_e;
   8.123 +
   8.124 +    return 0;
   8.125  }
   8.126 +
   8.127  #define __shadow_set_l4e(v, va, value) \
   8.128    __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L4)
   8.129  #define __shadow_get_l4e(v, va, sl4e) \
   8.130 @@ -204,7 +234,7 @@ static inline pgentry_64_t *__rw_entry(
   8.131  #define __guest_get_l3e(v, va, sl3e) \
   8.132    __rw_entry(v, va, gl3e, GUEST_ENTRY | GET_ENTRY | PAGING_L3)
   8.133  
   8.134 -static inline void *  __guest_set_l2e(
   8.135 +static inline int  __guest_set_l2e(
   8.136      struct vcpu *v, u64 va, void *value, int size)
   8.137  {
   8.138      switch(size) {
   8.139 @@ -216,21 +246,21 @@ static inline void *  __guest_set_l2e(
   8.140                  l2va = (l2_pgentry_32_t *)v->arch.guest_vtable;
   8.141                  if (value)
   8.142                      l2va[l2_table_offset_32(va)] = *(l2_pgentry_32_t *)value;
   8.143 -                return &l2va[l2_table_offset_32(va)];
   8.144 +                return 1;
   8.145              }
   8.146          case 8:
   8.147              return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L2);
   8.148          default:
   8.149              BUG();
   8.150 -            return NULL;
   8.151 +            return 0;
   8.152      }
   8.153 -    return NULL;
   8.154 +    return 0;
   8.155  }
   8.156  
   8.157  #define __guest_set_l2e(v, va, value) \
   8.158 -  ( __typeof__(value) )__guest_set_l2e(v, (u64)va, value, sizeof(*value))
   8.159 +    __guest_set_l2e(v, (u64)va, value, sizeof(*value))
   8.160  
   8.161 -static inline void * __guest_get_l2e(
   8.162 +static inline int  __guest_get_l2e(
   8.163    struct vcpu *v, u64 va, void *gl2e, int size)
   8.164  {
   8.165      switch(size) {
   8.166 @@ -241,21 +271,21 @@ static inline void * __guest_get_l2e(
   8.167                  l2va = (l2_pgentry_32_t *)v->arch.guest_vtable;
   8.168                  if (gl2e)
   8.169                      *(l2_pgentry_32_t *)gl2e = l2va[l2_table_offset_32(va)];
   8.170 -                return &l2va[l2_table_offset_32(va)];
   8.171 +                return 1;
   8.172              }
   8.173          case 8:
   8.174              return __rw_entry(v, va, gl2e, GUEST_ENTRY | GET_ENTRY | PAGING_L2);
   8.175          default:
   8.176              BUG();
   8.177 -            return NULL;
   8.178 +            return 0;
   8.179      }
   8.180 -    return NULL;
   8.181 +    return 0;
   8.182  }
   8.183  
   8.184  #define __guest_get_l2e(v, va, gl2e) \
   8.185 -  (__typeof__ (gl2e))__guest_get_l2e(v, (u64)va, gl2e, sizeof(*gl2e))
   8.186 +    __guest_get_l2e(v, (u64)va, gl2e, sizeof(*gl2e))
   8.187  
   8.188 -static inline void *  __guest_set_l1e(
   8.189 +static inline int  __guest_set_l1e(
   8.190    struct vcpu *v, u64 va, void *value, int size)
   8.191  {
   8.192      switch(size) {
   8.193 @@ -267,34 +297,34 @@ static inline void *  __guest_set_l1e(
   8.194                  unsigned long l1mfn;
   8.195  
   8.196                  if (!__guest_get_l2e(v, va, &gl2e))
   8.197 -                    return NULL;
   8.198 +                    return 0;
   8.199                  if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT)))
   8.200 -                    return NULL;
   8.201 +                    return 0;
   8.202  
   8.203                  l1mfn = get_mfn_from_pfn(
   8.204                    l2e_get_pfn(gl2e));
   8.205  
   8.206 -                l1va = (l1_pgentry_32_t *)
   8.207 -                  phys_to_virt(l1mfn << L1_PAGETABLE_SHIFT);
   8.208 +                l1va = (l1_pgentry_32_t *)map_domain_page(l1mfn);
   8.209                  if (value)
   8.210                      l1va[l1_table_offset_32(va)] = *(l1_pgentry_32_t *)value;
   8.211 +                unmap_domain_page(l1va);
   8.212  
   8.213 -                return &l1va[l1_table_offset_32(va)];
   8.214 +                return 1;
   8.215              }
   8.216  
   8.217          case 8:
   8.218              return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L1);
   8.219          default:
   8.220              BUG();
   8.221 -            return NULL;
   8.222 +            return 0;
   8.223      }
   8.224 -    return NULL;
   8.225 +    return 0;
   8.226  }
   8.227  
   8.228  #define __guest_set_l1e(v, va, value) \
   8.229 -  ( __typeof__(value) )__guest_set_l1e(v, (u64)va, value, sizeof(*value))
   8.230 +     __guest_set_l1e(v, (u64)va, value, sizeof(*value))
   8.231  
   8.232 -static inline void *  __guest_get_l1e(
   8.233 +static inline int  __guest_get_l1e(
   8.234    struct vcpu *v, u64 va, void *gl1e, int size)
   8.235  {
   8.236      switch(size) {
   8.237 @@ -306,34 +336,33 @@ static inline void *  __guest_get_l1e(
   8.238                  unsigned long l1mfn;
   8.239  
   8.240                  if (!(__guest_get_l2e(v, va, &gl2e)))
   8.241 -                    return NULL;
   8.242 +                    return 0;
   8.243  
   8.244  
   8.245                  if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT)))
   8.246 -                    return NULL;
   8.247 +                    return 0;
   8.248  
   8.249  
   8.250                  l1mfn = get_mfn_from_pfn(
   8.251                    l2e_get_pfn(gl2e));
   8.252 -                l1va = (l1_pgentry_32_t *) phys_to_virt(
   8.253 -                  l1mfn << L1_PAGETABLE_SHIFT);
   8.254 +                l1va = (l1_pgentry_32_t *) map_domain_page(l1mfn);
   8.255                  if (gl1e)
   8.256                      *(l1_pgentry_32_t *)gl1e = l1va[l1_table_offset_32(va)];
   8.257 -
   8.258 -                return &l1va[l1_table_offset_32(va)];
   8.259 +                unmap_domain_page(l1va);
   8.260 +                return 1;
   8.261              }
   8.262          case 8:
   8.263              // 64-bit guest
   8.264              return __rw_entry(v, va, gl1e, GUEST_ENTRY | GET_ENTRY | PAGING_L1);
   8.265          default:
   8.266              BUG();
   8.267 -            return NULL;
   8.268 +            return 0;
   8.269      }
   8.270 -    return NULL;
   8.271 +    return 0;
   8.272  }
   8.273  
   8.274  #define __guest_get_l1e(v, va, gl1e) \
   8.275 -  ( __typeof__(gl1e) )__guest_get_l1e(v, (u64)va, gl1e, sizeof(*gl1e))
   8.276 +    __guest_get_l1e(v, (u64)va, gl1e, sizeof(*gl1e))
   8.277  
   8.278  static inline void entry_general(
   8.279    struct domain *d,
   8.280 @@ -365,10 +394,16 @@ static inline void entry_general(
   8.281                  unmap_domain_page(l1_p);
   8.282              }
   8.283          } else {
   8.284 -            sle = entry_from_pfn(
   8.285 -                smfn,
   8.286 -                (entry_get_flags(gle) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL);
   8.287 -            entry_add_flags(gle, _PAGE_ACCESSED);
   8.288 +            if (d->arch.ops->guest_paging_levels <= PAGING_L3
   8.289 +                    && level == PAGING_L3) {
   8.290 +                sle = entry_from_pfn(smfn, entry_get_flags(gle));
   8.291 +            } else {
   8.292 +
   8.293 +                sle = entry_from_pfn(
   8.294 +                  smfn,
   8.295 +                  (entry_get_flags(gle) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL);
   8.296 +                entry_add_flags(gle, _PAGE_ACCESSED);
   8.297 +            }
   8.298          }
   8.299          // XXX mafetter: Hmm...
   8.300          //     Shouldn't the dirty log be checked/updated here?
   8.301 @@ -392,7 +427,7 @@ static inline void entry_propagate_from_
   8.302  
   8.303      if ( entry_get_flags(gle) & _PAGE_PRESENT ) {
   8.304          if ((entry_get_flags(gle) & _PAGE_PSE) && level == PAGING_L2) {
   8.305 -            smfn =  __shadow_status(d, entry_get_value(gle) >> PAGE_SHIFT, PGT_fl1_shadow);
   8.306 +            smfn =  __shadow_status(d, entry_get_pfn(gle), PGT_fl1_shadow);
   8.307          } else {
   8.308              smfn =  __shadow_status(d, entry_get_pfn(gle), 
   8.309                shadow_level_to_type((level -1 )));