direct-io.hg

changeset 7772:995e94c4802e

Attached patch allows PAE xenlinux to run in the shadow mode using
log-dirty guest-refcount, which is required to support
save/restore/relocate. We can turn on/off the mode every 5-sec interval
while doing kernel build (make -j4), for example, and it survives for
hours.

We are still restoring log-dirty mode for x86_64 xenlinux, which is not
very stable right now, but I believe it should be done very soon.

We also checked that it did not break 64-bit VMX domains, which uses
different sub-mode of the shadow mode.

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Xiaohui Xin <xiaohui.xin@intel.com>
author smh22@firebug.cl.cam.ac.uk
date Fri Nov 11 19:11:13 2005 +0100 (2005-11-11)
parents e023e37b3c7a
children 776ef2fb6fd8
files xen/arch/x86/shadow.c xen/arch/x86/shadow32.c xen/arch/x86/shadow_public.c xen/include/asm-x86/page.h xen/include/asm-x86/shadow.h xen/include/asm-x86/shadow_64.h
line diff
     1.1 --- a/xen/arch/x86/shadow.c	Fri Nov 11 19:02:49 2005 +0100
     1.2 +++ b/xen/arch/x86/shadow.c	Fri Nov 11 19:11:13 2005 +0100
     1.3 @@ -22,7 +22,7 @@
     1.4   * Jun Nakajima <jun.nakajima@intel.com>
     1.5   * Chengyuan Li <chengyuan.li@intel.com>
     1.6   *
     1.7 - * Extended to support 64-bit guests.
     1.8 + * Extended to support 32-bit PAE and 64-bit guests.
     1.9   */
    1.10  
    1.11  #include <xen/config.h>
    1.12 @@ -34,6 +34,7 @@
    1.13  #include <xen/event.h>
    1.14  #include <xen/sched.h>
    1.15  #include <xen/trace.h>
    1.16 +#include <asm/shadow_64.h>
    1.17  
    1.18  extern void free_shadow_pages(struct domain *d);
    1.19  
    1.20 @@ -44,13 +45,13 @@ static void mark_shadows_as_reflecting_s
    1.21  #endif
    1.22  
    1.23  #if CONFIG_PAGING_LEVELS == 3
    1.24 -#include <asm/shadow_64.h>
    1.25  static unsigned long shadow_l3_table(
    1.26      struct domain *d, unsigned long gpfn, unsigned long gmfn);
    1.27 +static inline void validate_bl2e_change( struct domain *d,
    1.28 +    guest_root_pgentry_t *new_gle_p, pgentry_64_t *shadow_l3, int index);
    1.29  #endif
    1.30  
    1.31  #if CONFIG_PAGING_LEVELS == 4
    1.32 -#include <asm/shadow_64.h>
    1.33  static unsigned long shadow_l4_table(
    1.34      struct domain *d, unsigned long gpfn, unsigned long gmfn);
    1.35  static void shadow_map_into_current(struct vcpu *v,
    1.36 @@ -222,7 +223,7 @@ alloc_shadow_page(struct domain *d,
    1.37          {
    1.38              if (d->arch.ops->guest_paging_levels == PAGING_L2)
    1.39              {
    1.40 -#if CONFIG_PAGING_LEVELS >= 4
    1.41 +#if CONFIG_PAGING_LEVELS >= 3
    1.42                  /* For 32-bit VMX guest, 2 shadow L1s to simulate 1 guest L1
    1.43                   * So need allocate 2 continues shadow L1 each time.
    1.44                   */
    1.45 @@ -313,6 +314,8 @@ alloc_shadow_page(struct domain *d,
    1.46              goto fail;
    1.47          perfc_incr(shadow_l3_pages);
    1.48          d->arch.shadow_page_count++;
    1.49 +        if ( PGT_l3_page_table == PGT_root_page_table )
    1.50 +            pin = 1;
    1.51          break;
    1.52  
    1.53      case PGT_l4_shadow:
    1.54 @@ -375,7 +378,7 @@ fail:
    1.55      {
    1.56          if (d->arch.ops->guest_paging_levels == PAGING_L2)
    1.57          {
    1.58 -#if CONFIG_PAGING_LEVELS >=4
    1.59 +#if CONFIG_PAGING_LEVELS >=3
    1.60              free_domheap_pages(page, SL1_ORDER);
    1.61  #else
    1.62              free_domheap_page(page);
    1.63 @@ -427,14 +430,10 @@ shadow_hl2_table(struct domain *d, unsig
    1.64  
    1.65      hl2 = map_domain_page(hl2mfn);
    1.66  
    1.67 -#ifdef __i386__
    1.68      if ( shadow_mode_external(d) )
    1.69          limit = L2_PAGETABLE_ENTRIES;
    1.70      else
    1.71          limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
    1.72 -#else
    1.73 -    limit = 0; /* XXX x86/64 XXX */
    1.74 -#endif
    1.75  
    1.76      memset(hl2, 0, limit * sizeof(l1_pgentry_t));
    1.77  
    1.78 @@ -540,7 +539,7 @@ static unsigned long shadow_l2_table(
    1.79      SH_VLOG("shadow_l2_table(%lx -> %lx)", gmfn, smfn);
    1.80      return smfn;
    1.81  }
    1.82 -#endif
    1.83 +#endif /* CONFIG_PAGING_LEVELS == 2 */
    1.84  
    1.85  static void shadow_map_l1_into_current_l2(unsigned long va)
    1.86  {
    1.87 @@ -549,7 +548,7 @@ static void shadow_map_l1_into_current_l
    1.88      l1_pgentry_t *spl1e;
    1.89      l2_pgentry_t sl2e;
    1.90      guest_l1_pgentry_t *gpl1e;
    1.91 -    guest_l2_pgentry_t gl2e;
    1.92 +    guest_l2_pgentry_t gl2e = {0};
    1.93      unsigned long gl1pfn, gl1mfn, sl1mfn;
    1.94      int i, init_table = 0;
    1.95  
    1.96 @@ -593,14 +592,14 @@ static void shadow_map_l1_into_current_l
    1.97      ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) );
    1.98  #endif
    1.99  
   1.100 -#if CONFIG_PAGING_LEVELS >=4
   1.101 +#if CONFIG_PAGING_LEVELS >=3
   1.102      if (d->arch.ops->guest_paging_levels == PAGING_L2)
   1.103      {
   1.104 -        /* for 32-bit VMX guest on 64-bit host,
   1.105 +        /* for 32-bit VMX guest on 64-bit or PAE host,
   1.106           * need update two L2 entries each time
   1.107           */
   1.108          if ( !get_shadow_ref(sl1mfn))
   1.109 -                BUG();
   1.110 +            BUG();
   1.111          l2pde_general(d, &gl2e, &sl2e, sl1mfn);
   1.112          __guest_set_l2e(v, va, &gl2e);
   1.113          __shadow_set_l2e(v, va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1), &sl2e);
   1.114 @@ -625,19 +624,17 @@ static void shadow_map_l1_into_current_l
   1.115          int index = guest_l1_table_offset(va);
   1.116          int min = 1, max = 0;
   1.117  
   1.118 -        unsigned long entries, pt_va;
   1.119 -        l1_pgentry_t tmp_sl1e;
   1.120 -        guest_l1_pgentry_t tmp_gl1e;//Prepare for double compile
   1.121 -
   1.122 -
   1.123 -        entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t);
   1.124 -        pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT;
   1.125 -        gpl1e = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gl1e);
   1.126 +        unsigned long tmp_gmfn;
   1.127 +        l2_pgentry_t tmp_sl2e = {0};
   1.128 +        guest_l2_pgentry_t tmp_gl2e = {0};
   1.129 +
   1.130 +        __guest_get_l2e(v, va, &tmp_gl2e);
   1.131 +        tmp_gmfn = __gpfn_to_mfn(d, l2e_get_pfn(tmp_gl2e));
   1.132 +        gpl1e = (guest_l1_pgentry_t *) map_domain_page(tmp_gmfn);
   1.133  
   1.134          /* If the PGT_l1_shadow has two continual pages */
   1.135 -        entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t); //1024 entry!!!
   1.136 -        pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT;
   1.137 -        spl1e = (l1_pgentry_t *) __shadow_get_l1e(v, pt_va, &tmp_sl1e);
   1.138 +        __shadow_get_l2e(v, va, &tmp_sl2e);
   1.139 +        spl1e = (l1_pgentry_t *) map_domain_page(l2e_get_pfn(tmp_sl2e));
   1.140  
   1.141          for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ )
   1.142          {
   1.143 @@ -662,10 +659,13 @@ static void shadow_map_l1_into_current_l
   1.144              if ( likely(i > max) )
   1.145                  max = i;
   1.146              set_guest_back_ptr(d, sl1e, sl1mfn, i);
   1.147 -          }
   1.148 +        }
   1.149  
   1.150          frame_table[sl1mfn].tlbflush_timestamp =
   1.151              SHADOW_ENCODE_MIN_MAX(min, max);
   1.152 +
   1.153 +        unmap_domain_page(gpl1e);
   1.154 +        unmap_domain_page(spl1e);
   1.155      }
   1.156  }
   1.157  
   1.158 @@ -674,7 +674,7 @@ shadow_set_l1e(unsigned long va, l1_pgen
   1.159  {
   1.160      struct vcpu *v = current;
   1.161      struct domain *d = v->domain;
   1.162 -    l2_pgentry_t sl2e;
   1.163 +    l2_pgentry_t sl2e = {0};
   1.164  
   1.165      __shadow_get_l2e(v, va, &sl2e);
   1.166      if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
   1.167 @@ -690,11 +690,23 @@ shadow_set_l1e(unsigned long va, l1_pgen
   1.168          }
   1.169          else /* check to see if it exists; if so, link it in */
   1.170          {
   1.171 -            l2_pgentry_t gpde = linear_l2_table(v)[l2_table_offset(va)];
   1.172 -            unsigned long gl1pfn = l2e_get_pfn(gpde);
   1.173 -            unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
   1.174 -
   1.175 -            ASSERT( l2e_get_flags(gpde) & _PAGE_PRESENT );
   1.176 +            l2_pgentry_t gpde = {0};
   1.177 +            unsigned long gl1pfn;
   1.178 +            unsigned long sl1mfn;
   1.179 +
   1.180 +            __guest_get_l2e(v, va, &gpde);
   1.181 +
   1.182 +            if ( l2e_get_flags(gpde) & _PAGE_PRESENT )
   1.183 +            {
   1.184 +                gl1pfn = l2e_get_pfn(gpde);
   1.185 +                sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
   1.186 +            }
   1.187 +            else
   1.188 +            {
   1.189 +                // no shadow exists, so there's nothing to do.
   1.190 +                perfc_incrc(shadow_set_l1e_fail);
   1.191 +                return;
   1.192 +            }
   1.193  
   1.194              if ( sl1mfn )
   1.195              {
   1.196 @@ -738,7 +750,7 @@ shadow_set_l1e(unsigned long va, l1_pgen
   1.197      shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
   1.198  }
   1.199  
   1.200 -#if CONFIG_PAGING_LEVELS <= 3
   1.201 +#if CONFIG_PAGING_LEVELS == 2
   1.202  static void shadow_invlpg_32(struct vcpu *v, unsigned long va)
   1.203  {
   1.204      struct domain *d = v->domain;
   1.205 @@ -767,7 +779,7 @@ static void shadow_invlpg_32(struct vcpu
   1.206  
   1.207      shadow_unlock(d);
   1.208  }
   1.209 -#endif
   1.210 +#endif /* CONFIG_PAGING_LEVELS == 2 */
   1.211  
   1.212  static struct out_of_sync_entry *
   1.213  shadow_alloc_oos_entry(struct domain *d)
   1.214 @@ -996,7 +1008,10 @@ static int snapshot_entry_matches(
   1.215  
   1.216      if (__copy_from_user(&gpte, &guest_pt[index],
   1.217                           sizeof(gpte)))
   1.218 +    {
   1.219 +        unmap_domain_page(snapshot);
   1.220          return 0;
   1.221 +    }
   1.222  
   1.223      // This could probably be smarter, but this is sufficent for
   1.224      // our current needs.
   1.225 @@ -1021,7 +1036,7 @@ static int snapshot_entry_matches(
   1.226  static int is_out_of_sync(struct vcpu *v, unsigned long va) /* __shadow_out_of_sync */
   1.227  {
   1.228      struct domain *d = v->domain;
   1.229 -#if defined (__x86_64__)
   1.230 +#if CONFIG_PAGING_LEVELS == 4
   1.231      unsigned long l2mfn = ((v->arch.flags & TF_kernel_mode)?
   1.232                            pagetable_get_pfn(v->arch.guest_table) :
   1.233                            pagetable_get_pfn(v->arch.guest_table_user));
   1.234 @@ -1032,16 +1047,21 @@ static int is_out_of_sync(struct vcpu *v
   1.235      guest_l2_pgentry_t l2e;
   1.236      unsigned long l1pfn, l1mfn;
   1.237      guest_l1_pgentry_t *guest_pt;
   1.238 -    guest_l1_pgentry_t tmp_gle;
   1.239 -    unsigned long pt_va;
   1.240  
   1.241      ASSERT(shadow_lock_is_acquired(d));
   1.242      ASSERT(VALID_M2P(l2pfn));
   1.243  
   1.244      perfc_incrc(shadow_out_of_sync_calls);
   1.245  
   1.246 -#if CONFIG_PAGING_LEVELS >= 4
   1.247 -    if (d->arch.ops->guest_paging_levels == PAGING_L4) { /* Mode F */
   1.248 +#if CONFIG_PAGING_LEVELS >= 3
   1.249 +
   1.250 +#define unmap_and_return(x)                                         \
   1.251 +    if ( guest_pt != (guest_l1_pgentry_t *) v->arch.guest_vtable )  \
   1.252 +        unmap_domain_page(guest_pt);                                \
   1.253 +    return (x);
   1.254 +
   1.255 +    if (d->arch.ops->guest_paging_levels >= PAGING_L3) 
   1.256 +    { 
   1.257          pgentry_64_t le;
   1.258          unsigned long gmfn;
   1.259          unsigned long gpfn;
   1.260 @@ -1051,37 +1071,57 @@ static int is_out_of_sync(struct vcpu *v
   1.261          gpfn = l2pfn;
   1.262          guest_pt = (guest_l1_pgentry_t *)v->arch.guest_vtable;
   1.263  
   1.264 -        for (i = PAGING_L4; i >= PAGING_L3; i--) {
   1.265 +        for ( i = PAGING_L4; i >= PAGING_L3; i-- ) 
   1.266 +        {
   1.267 +            if (d->arch.ops->guest_paging_levels == PAGING_L3 
   1.268 +                && i == PAGING_L4)
   1.269 +                continue;       /* skip the top-level for 3-level */
   1.270 +
   1.271              if ( page_out_of_sync(&frame_table[gmfn]) &&
   1.272 -              !snapshot_entry_matches(
   1.273 -                  d, guest_pt, gpfn, table_offset_64(va, i)) )
   1.274 -                return 1;
   1.275 -
   1.276 +                 !snapshot_entry_matches(
   1.277 +                     d, guest_pt, gpfn, table_offset_64(va, i)) )
   1.278 +            {
   1.279 +                unmap_and_return (1);
   1.280 +            }
   1.281 +
   1.282 +            le = entry_empty();
   1.283              __rw_entry(v, va, &le, GUEST_ENTRY | GET_ENTRY | i);
   1.284 +
   1.285              if ( !(entry_get_flags(le) & _PAGE_PRESENT) )
   1.286 -                return 0;
   1.287 +            {
   1.288 +                unmap_and_return (0);
   1.289 +            }
   1.290              gpfn = entry_get_pfn(le);
   1.291              gmfn = __gpfn_to_mfn(d, gpfn);
   1.292              if ( !VALID_MFN(gmfn) )
   1.293 -                return 0;
   1.294 -            /* Todo: check!*/
   1.295 +            {
   1.296 +                unmap_and_return (0);
   1.297 +            }
   1.298 +            if ( guest_pt != (guest_l1_pgentry_t *)v->arch.guest_vtable )
   1.299 +                unmap_domain_page(guest_pt);
   1.300              guest_pt = (guest_l1_pgentry_t *)map_domain_page(gmfn);
   1.301 -
   1.302          }
   1.303  
   1.304          /* L2 */
   1.305          if ( page_out_of_sync(&frame_table[gmfn]) &&
   1.306               !snapshot_entry_matches(d, guest_pt, gpfn, l2_table_offset(va)) )
   1.307 +        {
   1.308 +            unmap_and_return (1);
   1.309 +        }
   1.310 +
   1.311 +        if ( guest_pt != (guest_l1_pgentry_t *)v->arch.guest_vtable )
   1.312 +            unmap_domain_page(guest_pt);
   1.313 +
   1.314 +    } 
   1.315 +    else
   1.316 +#undef unmap_and_return
   1.317 +#endif /* CONFIG_PAGING_LEVELS >= 3 */
   1.318 +    {
   1.319 +        if ( page_out_of_sync(&frame_table[l2mfn]) &&
   1.320 +             !snapshot_entry_matches(d, (guest_l1_pgentry_t *)v->arch.guest_vtable,
   1.321 +                                     l2pfn, guest_l2_table_offset(va)) )
   1.322              return 1;
   1.323 -
   1.324 -
   1.325 -    } else
   1.326 -#endif
   1.327 -
   1.328 -    if ( page_out_of_sync(&frame_table[l2mfn]) &&
   1.329 -         !snapshot_entry_matches(d, (guest_l1_pgentry_t *)v->arch.guest_vtable,
   1.330 -                                 l2pfn, guest_l2_table_offset(va)) )
   1.331 -        return 1;
   1.332 +    }
   1.333  
   1.334      __guest_get_l2e(v, va, &l2e);
   1.335      if ( !(guest_l2e_get_flags(l2e) & _PAGE_PRESENT) ||
   1.336 @@ -1095,15 +1135,17 @@ static int is_out_of_sync(struct vcpu *v
   1.337      if ( !VALID_MFN(l1mfn) )
   1.338          return 0;
   1.339  
   1.340 -    pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(GUEST_L1_PAGETABLE_ENTRIES - 1))
   1.341 -      << L1_PAGETABLE_SHIFT;
   1.342 -    guest_pt = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gle);
   1.343 +    guest_pt = (guest_l1_pgentry_t *) map_domain_page(l1mfn);
   1.344  
   1.345      if ( page_out_of_sync(&frame_table[l1mfn]) &&
   1.346           !snapshot_entry_matches(
   1.347 -             d, guest_pt, l1pfn, guest_l1_table_offset(va)) )
   1.348 +             d, guest_pt, l1pfn, guest_l1_table_offset(va)) ) 
   1.349 +    {
   1.350 +        unmap_domain_page(guest_pt);
   1.351          return 1;
   1.352 -
   1.353 +    }
   1.354 +
   1.355 +    unmap_domain_page(guest_pt);
   1.356      return 0;
   1.357  }
   1.358  
   1.359 @@ -1257,7 +1299,7 @@ static int remove_all_write_access(
   1.360      }
   1.361  
   1.362      if ( shadow_mode_external(d) ) {
   1.363 -        if (write_refs-- == 0)
   1.364 +        if (--write_refs == 0)
   1.365              return 0;
   1.366  
   1.367           // Use the back pointer to locate the shadow page that can contain
   1.368 @@ -1314,6 +1356,8 @@ static int resync_all(struct domain *d, 
   1.369  
   1.370      for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
   1.371      {
   1.372 +        int max = -1;
   1.373 +
   1.374          if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
   1.375              continue;
   1.376  
   1.377 @@ -1335,7 +1379,7 @@ static int resync_all(struct domain *d, 
   1.378                  continue;
   1.379          }
   1.380  
   1.381 -        FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
   1.382 +       FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
   1.383                  stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn);
   1.384  
   1.385          // Compare guest's new contents to its snapshot, validating
   1.386 @@ -1373,11 +1417,9 @@ static int resync_all(struct domain *d, 
   1.387  
   1.388              if ( !shadow_mode_refcounts(d) )
   1.389                  revalidate_l1(d, (l1_pgentry_t *)guest1, (l1_pgentry_t *)snapshot1);
   1.390 -
   1.391              if ( !smfn )
   1.392                  break;
   1.393  
   1.394 -
   1.395              changed = 0;
   1.396  
   1.397              for ( i = min_shadow; i <= max_shadow; i++ )
   1.398 @@ -1405,12 +1447,13 @@ static int resync_all(struct domain *d, 
   1.399              perfc_incrc(resync_l1);
   1.400              perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
   1.401              perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES);
   1.402 -            if ( d->arch.ops->guest_paging_levels == PAGING_L4 &&
   1.403 +            if ( d->arch.ops->guest_paging_levels >= PAGING_L3 &&
   1.404                   unshadow_l1 ) {
   1.405 -                pgentry_64_t l2e;
   1.406 +                pgentry_64_t l2e = {0};
   1.407  
   1.408                  __shadow_get_l2e(entry->v, entry->va, &l2e);
   1.409 -                if (entry_get_flags(l2e) & _PAGE_PRESENT) {
   1.410 +
   1.411 +                if ( entry_get_flags(l2e) & _PAGE_PRESENT ) {
   1.412                      entry_remove_flags(l2e, _PAGE_PRESENT);
   1.413                      __shadow_set_l2e(entry->v, entry->va, &l2e);
   1.414  
   1.415 @@ -1421,11 +1464,9 @@ static int resync_all(struct domain *d, 
   1.416  
   1.417              break;
   1.418          }
   1.419 -#if defined (__i386__)
   1.420 +#if CONFIG_PAGING_LEVELS == 2
   1.421          case PGT_l2_shadow:
   1.422          {
   1.423 -            int max = -1;
   1.424 -
   1.425              l2_pgentry_t *guest2 = guest;
   1.426              l2_pgentry_t *shadow2 = shadow;
   1.427              l2_pgentry_t *snapshot2 = snapshot;
   1.428 @@ -1436,9 +1477,6 @@ static int resync_all(struct domain *d, 
   1.429              changed = 0;
   1.430              for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   1.431              {
   1.432 -#if CONFIG_X86_PAE
   1.433 -                BUG();  /* FIXME: need type_info */
   1.434 -#endif
   1.435                  if ( !is_guest_l2_slot(0,i) && !external )
   1.436                      continue;
   1.437  
   1.438 @@ -1482,9 +1520,6 @@ static int resync_all(struct domain *d, 
   1.439              changed = 0;
   1.440              for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   1.441              {
   1.442 -#if CONFIG_X86_PAE
   1.443 -                BUG();  /* FIXME: need type_info */
   1.444 -#endif
   1.445                  if ( !is_guest_l2_slot(0, i) && !external )
   1.446                      continue;
   1.447  
   1.448 @@ -1505,7 +1540,7 @@ static int resync_all(struct domain *d, 
   1.449              perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES);
   1.450              break;
   1.451          }
   1.452 -#else
   1.453 +#elif CONFIG_PAGING_LEVELS >= 3
   1.454          case PGT_l2_shadow:
   1.455          case PGT_l3_shadow:
   1.456          {
   1.457 @@ -1521,19 +1556,35 @@ static int resync_all(struct domain *d, 
   1.458                        guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) )
   1.459                  {
   1.460                      need_flush |= validate_entry_change(
   1.461 -                      d, &guest_pt[i], &shadow_pt[i],
   1.462 -                      shadow_type_to_level(stype));
   1.463 +                        d, &guest_pt[i], &shadow_pt[i],
   1.464 +                        shadow_type_to_level(stype));
   1.465                      changed++;
   1.466                  }
   1.467 +#if CONFIG_PAGING_LEVELS == 3
   1.468 +                if ( stype == PGT_l3_shadow ) 
   1.469 +                {
   1.470 +                    if ( entry_get_value(guest_pt[i]) != 0 ) 
   1.471 +                        max = i;
   1.472 +
   1.473 +                    if ( !(entry_get_flags(guest_pt[i]) & _PAGE_PRESENT) &&
   1.474 +                         unlikely(entry_get_value(guest_pt[i]) != 0) &&
   1.475 +                         !unshadow &&
   1.476 +                         (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
   1.477 +                        unshadow = 1;
   1.478 +                }
   1.479 +#endif
   1.480              }
   1.481 +
   1.482 +            if ( d->arch.ops->guest_paging_levels == PAGING_L3
   1.483 +                 && max == -1 && stype == PGT_l3_shadow )
   1.484 +                unshadow = 1;
   1.485 +
   1.486 +            perfc_incrc(resync_l3);
   1.487 +            perfc_incr_histo(shm_l3_updates, changed, PT_UPDATES);
   1.488              break;
   1.489 -
   1.490 -
   1.491          }
   1.492          case PGT_l4_shadow:
   1.493          {
   1.494 -            int max = -1;
   1.495 -
   1.496              guest_root_pgentry_t *guest_root = guest;
   1.497              l4_pgentry_t *shadow4 = shadow;
   1.498              guest_root_pgentry_t *snapshot_root = snapshot;
   1.499 @@ -1547,7 +1598,8 @@ static int resync_all(struct domain *d, 
   1.500                  if ( root_entry_has_changed(
   1.501                          new_root_e, snapshot_root[i], PAGE_FLAG_MASK))
   1.502                  {
   1.503 -                    if (d->arch.ops->guest_paging_levels == PAGING_L4) {
   1.504 +                    if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) 
   1.505 +                    {
   1.506                          need_flush |= validate_entry_change(
   1.507                            d, (pgentry_64_t *)&new_root_e,
   1.508                            (pgentry_64_t *)&shadow4[i], shadow_type_to_level(stype));
   1.509 @@ -1563,9 +1615,9 @@ static int resync_all(struct domain *d, 
   1.510  
   1.511                  //  Need a better solution in the long term.
   1.512                  if ( !(guest_root_get_flags(new_root_e) & _PAGE_PRESENT) &&
   1.513 -                  unlikely(guest_root_get_intpte(new_root_e) != 0) &&
   1.514 -                  !unshadow &&
   1.515 -                  (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
   1.516 +                     unlikely(guest_root_get_intpte(new_root_e) != 0) &&
   1.517 +                     !unshadow &&
   1.518 +                     (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
   1.519                      unshadow = 1;
   1.520              }
   1.521              if ( max == -1 )
   1.522 @@ -1575,7 +1627,7 @@ static int resync_all(struct domain *d, 
   1.523              break;
   1.524          }
   1.525  
   1.526 -#endif
   1.527 +#endif /* CONFIG_PAGING_LEVELS >= 3 */
   1.528          default:
   1.529              BUG();
   1.530          }
   1.531 @@ -1589,7 +1641,7 @@ static int resync_all(struct domain *d, 
   1.532          {
   1.533              perfc_incrc(unshadow_l2_count);
   1.534              shadow_unpin(smfn);
   1.535 -#if defined (__i386__)
   1.536 +#if CONFIG_PAGING_LEVELS == 2
   1.537              if ( unlikely(shadow_mode_external(d)) )
   1.538              {
   1.539                  unsigned long hl2mfn;
   1.540 @@ -1660,19 +1712,24 @@ static void sync_all(struct domain *d)
   1.541      // Second, resync all L1 pages, then L2 pages, etc...
   1.542      //
   1.543      need_flush |= resync_all(d, PGT_l1_shadow);
   1.544 -#if defined (__i386__)
   1.545 -    if ( shadow_mode_translate(d) )
   1.546 +
   1.547 +#if CONFIG_PAGING_LEVELS == 2
   1.548 +    if ( d->arch.ops->guest_paging_levels == PAGING_L2 &&
   1.549 +         shadow_mode_translate(d) )  
   1.550 +    {
   1.551          need_flush |= resync_all(d, PGT_hl2_shadow);
   1.552 +    }
   1.553  #endif
   1.554  
   1.555 -    /*
   1.556 -     * Fixme: for i386 host
   1.557 -     */
   1.558 -    if (d->arch.ops->guest_paging_levels == PAGING_L4) {
   1.559 -        need_flush |= resync_all(d, PGT_l2_shadow);
   1.560 +    need_flush |= resync_all(d, PGT_l2_shadow);
   1.561 +
   1.562 +#if CONFIG_PAGING_LEVELS >= 3
   1.563 +    if (d->arch.ops->guest_paging_levels >= PAGING_L3) 
   1.564 +    {
   1.565          need_flush |= resync_all(d, PGT_l3_shadow);
   1.566 +        need_flush |= resync_all(d, PGT_l4_shadow);
   1.567      }
   1.568 -    need_flush |= resync_all(d, PGT_l4_shadow);
   1.569 +#endif
   1.570  
   1.571      if ( need_flush && !unlikely(shadow_mode_external(d)) )
   1.572          local_flush_tlb();
   1.573 @@ -1749,7 +1806,7 @@ static inline int l1pte_read_fault(
   1.574  
   1.575      return 1;
   1.576  }
   1.577 -#if CONFIG_PAGING_LEVELS <= 3
   1.578 +#if CONFIG_PAGING_LEVELS == 2
   1.579  static int shadow_fault_32(unsigned long va, struct cpu_user_regs *regs)
   1.580  {
   1.581      l1_pgentry_t gpte, spte, orig_gpte;
   1.582 @@ -1888,7 +1945,20 @@ fail:
   1.583      shadow_unlock(d);
   1.584      return 0;
   1.585  }
   1.586 -#endif
   1.587 +#endif /* CONFIG_PAGING_LEVELS == 2 */
   1.588 +
   1.589 +static inline unsigned long va_to_l1mfn(struct vcpu *v, unsigned long va)
   1.590 +{
   1.591 +    struct domain *d = v->domain;
   1.592 +    guest_l2_pgentry_t gl2e = {0};
   1.593 +
   1.594 +    __guest_get_l2e(v, va, &gl2e);
   1.595 +    
   1.596 +    if ( unlikely(!(guest_l2e_get_flags(gl2e) & _PAGE_PRESENT)) )
   1.597 +        return INVALID_MFN;
   1.598 +
   1.599 +    return __gpfn_to_mfn(d, l2e_get_pfn(gl2e));
   1.600 +}
   1.601  
   1.602  static int do_update_va_mapping(unsigned long va,
   1.603                                  l1_pgentry_t val,
   1.604 @@ -1900,8 +1970,6 @@ static int do_update_va_mapping(unsigned
   1.605  
   1.606      shadow_lock(d);
   1.607  
   1.608 -    //printk("%s(va=%p, val=%p)\n", __func__, (void *)va, (void *)l1e_get_intpte(val));
   1.609 -
   1.610      // This is actually overkill - we don't need to sync the L1 itself,
   1.611      // just everything involved in getting to this L1 (i.e. we need
   1.612      // linear_pg_table[l1_linear_offset(va)] to be in sync)...
   1.613 @@ -1919,7 +1987,6 @@ static int do_update_va_mapping(unsigned
   1.614      if ( shadow_mode_log_dirty(d) )
   1.615          __mark_dirty(d, va_to_l1mfn(v, va));
   1.616  
   1.617 -// out:
   1.618      shadow_unlock(d);
   1.619  
   1.620      return rc;
   1.621 @@ -1955,7 +2022,7 @@ static int do_update_va_mapping(unsigned
   1.622  static void shadow_update_pagetables(struct vcpu *v)
   1.623  {
   1.624      struct domain *d = v->domain;
   1.625 -#if defined (__x86_64__)
   1.626 +#if CONFIG_PAGING_LEVELS == 4
   1.627      unsigned long gmfn = ((v->arch.flags & TF_kernel_mode)?
   1.628                            pagetable_get_pfn(v->arch.guest_table) :
   1.629                            pagetable_get_pfn(v->arch.guest_table_user));
   1.630 @@ -1991,7 +2058,8 @@ static void shadow_update_pagetables(str
   1.631      /*
   1.632       *  arch.shadow_table
   1.633       */
   1.634 -    if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) {
   1.635 +    if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) 
   1.636 +    {
   1.637  #if CONFIG_PAGING_LEVELS == 2
   1.638          smfn = shadow_l2_table(d, gpfn, gmfn);
   1.639  #elif CONFIG_PAGING_LEVELS == 3
   1.640 @@ -2013,7 +2081,7 @@ static void shadow_update_pagetables(str
   1.641       * arch.shadow_vtable
   1.642       */
   1.643      if ( max_mode == SHM_external
   1.644 -#if CONFIG_PAGING_LEVELS >=4
   1.645 +#if CONFIG_PAGING_LEVELS >=3
   1.646           || max_mode & SHM_enable
   1.647  #endif
   1.648          )
   1.649 @@ -2068,7 +2136,7 @@ static void shadow_update_pagetables(str
   1.650          // XXX - maybe this can be optimized somewhat??
   1.651          local_flush_tlb();
   1.652      }
   1.653 -#endif
   1.654 +#endif /* CONFIG_PAGING_LEVELS == 2 */
   1.655  
   1.656  #if CONFIG_PAGING_LEVELS == 3
   1.657      /* FIXME: PAE code to be written */
   1.658 @@ -2373,7 +2441,7 @@ static int check_l2_table(
   1.659                 l2e_get_intpte(match));
   1.660      }
   1.661  
   1.662 -#ifdef __i386__
   1.663 +#if CONFIG_PAGING_LEVELS == 2
   1.664      if ( shadow_mode_external(d) )
   1.665          limit = L2_PAGETABLE_ENTRIES;
   1.666      else
   1.667 @@ -2405,7 +2473,7 @@ static int check_l2_table(
   1.668  int _check_pagetable(struct vcpu *v, char *s)
   1.669  {
   1.670      struct domain *d = v->domain;
   1.671 -#if defined (__x86_64__)
   1.672 +#if CONFIG_PAGING_LEVELS == 4
   1.673      pagetable_t pt = ((v->arch.flags & TF_kernel_mode)?
   1.674                        v->arch.guest_table : v->arch.guest_table_user);
   1.675  #else
   1.676 @@ -2447,7 +2515,7 @@ int _check_pagetable(struct vcpu *v, cha
   1.677      spl2e = (l2_pgentry_t *) map_domain_page(smfn);
   1.678  
   1.679      /* Go back and recurse. */
   1.680 -#ifdef __i386__
   1.681 +#if CONFIG_PAGING_LEVELS == 2
   1.682      if ( shadow_mode_external(d) )
   1.683          limit = L2_PAGETABLE_ENTRIES;
   1.684      else
   1.685 @@ -2551,60 +2619,109 @@ int _check_all_pagetables(struct vcpu *v
   1.686  
   1.687  #if CONFIG_PAGING_LEVELS == 3
   1.688  static unsigned long shadow_l3_table(
   1.689 -  struct domain *d, unsigned long gpfn, unsigned long gmfn)
   1.690 +    struct domain *d, unsigned long gpfn, unsigned long gmfn)
   1.691  {
   1.692 -    BUG();                      /* not implemenated yet */
   1.693 -    return 42;
   1.694 +    unsigned long smfn;
   1.695 +    l3_pgentry_t *spl3e;
   1.696 +
   1.697 +    perfc_incrc(shadow_l3_table_count);
   1.698 +
   1.699 +    if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l3_shadow))) )
   1.700 +    {
   1.701 +        printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
   1.702 +        BUG(); /* XXX Deal gracefully with failure. */
   1.703 +    }
   1.704 +
   1.705 +    spl3e = (l3_pgentry_t *)map_domain_page(smfn);
   1.706 +
   1.707 +    /* Make the self entry */
   1.708 +    spl3e[PAE_SHADOW_SELF_ENTRY] = l3e_from_pfn(smfn, __PAGE_HYPERVISOR);
   1.709 +
   1.710 +    if ( (PGT_base_page_table == PGT_l3_page_table) &&
   1.711 +         !shadow_mode_external(d) ) {
   1.712 +        int i;
   1.713 +        unsigned long g2mfn, s2mfn;
   1.714 +        l2_pgentry_t *spl2e;
   1.715 +        l3_pgentry_t *gpl3e;
   1.716 +
   1.717 +        /* Get the top entry */
   1.718 +        gpl3e = (l3_pgentry_t *)map_domain_page(gmfn);
   1.719 +
   1.720 +        if ( !(l3e_get_flags(gpl3e[L3_PAGETABLE_ENTRIES - 1]) & _PAGE_PRESENT) )
   1.721 +        {
   1.722 +            BUG();
   1.723 +        }
   1.724 +
   1.725 +        g2mfn = l3e_get_pfn(gpl3e[L3_PAGETABLE_ENTRIES - 1]);
   1.726 +
   1.727 +        /* NB. g2mfn should be same as g2pfn */
   1.728 +        if (!(s2mfn = __shadow_status(d, g2mfn, PGT_l2_shadow))) {
   1.729 +            if ( unlikely(!(s2mfn =
   1.730 +                    alloc_shadow_page(d, g2mfn, g2mfn, PGT_l2_shadow))) ) {
   1.731 +                printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n",
   1.732 +                    g2mfn, g2mfn);
   1.733 +                BUG(); /* XXX Deal gracefully with failure. */
   1.734 +            }
   1.735 +
   1.736 +            if (!get_shadow_ref(s2mfn))
   1.737 +                BUG();
   1.738 +        } 
   1.739 +            
   1.740 +        /* Map shadow L2 into shadow L3 */
   1.741 +        spl3e[L3_PAGETABLE_ENTRIES - 1] = l3e_from_pfn(s2mfn, _PAGE_PRESENT);
   1.742 +        shadow_update_min_max(smfn, L3_PAGETABLE_ENTRIES -1);
   1.743 +
   1.744 +        /*  
   1.745 +         * Xen private mappings. Do the similar things as
   1.746 +         * create_pae_xen_mappings().
   1.747 +         */
   1.748 +        spl2e = (l2_pgentry_t *)map_domain_page(s2mfn);
   1.749 +
   1.750 +        /*
   1.751 +         * When we free L2 pages, we need to tell if the page contains
   1.752 +         * Xen private mappings. Use the va_mask part.
   1.753 +         */
   1.754 +        frame_table[s2mfn].u.inuse.type_info |= 
   1.755 +            (unsigned long) 3 << PGT_score_shift; 
   1.756 +
   1.757 +        memset(spl2e, 0, 
   1.758 +               (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)) * sizeof(l2_pgentry_t));
   1.759 +
   1.760 +        memcpy(&spl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
   1.761 +           &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
   1.762 +           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));       
   1.763 +
   1.764 +        for ( i = 0; i < (PERDOMAIN_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
   1.765 +            spl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
   1.766 +                l2e_from_page(
   1.767 +                    virt_to_page(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_pt) + i, 
   1.768 +                    __PAGE_HYPERVISOR);
   1.769 +        for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
   1.770 +            spl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
   1.771 +                (l3e_get_flags(gpl3e[i]) & _PAGE_PRESENT) ?
   1.772 +                l2e_from_pfn(l3e_get_pfn(gpl3e[i]), __PAGE_HYPERVISOR) :
   1.773 +                l2e_empty();
   1.774 +       
   1.775 +        unmap_domain_page(spl2e);
   1.776 +        unmap_domain_page(gpl3e);
   1.777 +    }
   1.778 +    unmap_domain_page(spl3e);
   1.779 +
   1.780 +    return smfn;
   1.781  }
   1.782 +
   1.783  static unsigned long gva_to_gpa_pae(unsigned long gva)
   1.784  {
   1.785      BUG();
   1.786      return 43;
   1.787  }
   1.788 -#endif
   1.789 -
   1.790 -#if CONFIG_PAGING_LEVELS >= 4
   1.791 +#endif /* CONFIG_PAGING_LEVELS == 3 */
   1.792 +
   1.793 +#if CONFIG_PAGING_LEVELS == 4
   1.794  /****************************************************************************/
   1.795  /* 64-bit shadow-mode code testing */
   1.796  /****************************************************************************/
   1.797  /*
   1.798 - * validate_bl2e_change()
   1.799 - * The code is for 32-bit VMX gues on 64-bit host.
   1.800 - * To sync guest L2.
   1.801 - */
   1.802 -static inline void
   1.803 -validate_bl2e_change(
   1.804 -  struct domain *d,
   1.805 -  guest_root_pgentry_t *new_gle_p,
   1.806 -  pgentry_64_t *shadow_l3,
   1.807 -  int index)
   1.808 -{
   1.809 -    int sl3_idx, sl2_idx;
   1.810 -    unsigned long sl2mfn, sl1mfn;
   1.811 -    pgentry_64_t *sl2_p;
   1.812 -
   1.813 -    /* Using guest l2 pte index to get shadow l3&l2 index
   1.814 -     * index: 0 ~ 1023, PAGETABLE_ENTRIES: 512
   1.815 -     */
   1.816 -    sl3_idx = index / (PAGETABLE_ENTRIES / 2);
   1.817 -    sl2_idx = (index % (PAGETABLE_ENTRIES / 2)) * 2;
   1.818 -
   1.819 -    sl2mfn = entry_get_pfn(shadow_l3[sl3_idx]);
   1.820 -    sl2_p = (pgentry_64_t *)map_domain_page(sl2mfn);
   1.821 -
   1.822 -    validate_pde_change(
   1.823 -        d, *(guest_l2_pgentry_t *)new_gle_p, (l2_pgentry_t *)&sl2_p[sl2_idx]);
   1.824 -
   1.825 -    /* Mapping the second l1 shadow page */
   1.826 -    if (entry_get_flags(sl2_p[sl2_idx]) & _PAGE_PRESENT) {
   1.827 -       sl1mfn = entry_get_pfn(sl2_p[sl2_idx]);
   1.828 -       sl2_p[sl2_idx + 1] =
   1.829 -            entry_from_pfn(sl1mfn + 1, entry_get_flags(sl2_p[sl2_idx]));
   1.830 -    }
   1.831 -    unmap_domain_page(sl2_p);
   1.832 -}
   1.833 -
   1.834 -/*
   1.835   * init_bl2() is for 32-bit VMX guest on 64-bit host
   1.836   * Using 1 shadow L4(l3) and 4 shadow L2s to simulate guest L2
   1.837   */
   1.838 @@ -2699,6 +2816,47 @@ static unsigned long shadow_l4_table(
   1.839      ESH_LOG("shadow_l4_table(%lx -> %lx)", gmfn, smfn);
   1.840      return smfn;
   1.841  }
   1.842 +#endif /* CONFIG_PAGING_LEVELS == 4 */
   1.843 +
   1.844 +#if CONFIG_PAGING_LEVELS >= 3
   1.845 +/*
   1.846 + * validate_bl2e_change()
   1.847 + * The code is for 32-bit VMX gues on 64-bit host.
   1.848 + * To sync guest L2.
   1.849 + */
   1.850 +
   1.851 +static inline void
   1.852 +validate_bl2e_change(
   1.853 +    struct domain *d,
   1.854 +    guest_root_pgentry_t *new_gle_p,
   1.855 +    pgentry_64_t *shadow_l3,
   1.856 +    int index)
   1.857 +{
   1.858 +    int sl3_idx, sl2_idx;
   1.859 +    unsigned long sl2mfn, sl1mfn;
   1.860 +    pgentry_64_t *sl2_p;
   1.861 +
   1.862 +    /* Using guest l2 pte index to get shadow l3&l2 index
   1.863 +     * index: 0 ~ 1023, PAGETABLE_ENTRIES: 512
   1.864 +     */
   1.865 +    sl3_idx = index / (PAGETABLE_ENTRIES / 2);
   1.866 +    sl2_idx = (index % (PAGETABLE_ENTRIES / 2)) * 2;
   1.867 +
   1.868 +    sl2mfn = entry_get_pfn(shadow_l3[sl3_idx]);
   1.869 +    sl2_p = (pgentry_64_t *)map_domain_page(sl2mfn);
   1.870 +
   1.871 +    validate_pde_change(
   1.872 +        d, *(guest_l2_pgentry_t *)new_gle_p, (l2_pgentry_t *)&sl2_p[sl2_idx]);
   1.873 +
   1.874 +    /* Mapping the second l1 shadow page */
   1.875 +    if (entry_get_flags(sl2_p[sl2_idx]) & _PAGE_PRESENT) {
   1.876 +       sl1mfn = entry_get_pfn(sl2_p[sl2_idx]);
   1.877 +       sl2_p[sl2_idx + 1] =
   1.878 +            entry_from_pfn(sl1mfn + 1, entry_get_flags(sl2_p[sl2_idx]));
   1.879 +    }
   1.880 +    unmap_domain_page(sl2_p);
   1.881 +
   1.882 +}
   1.883  
   1.884  /*
   1.885   * This shadow_mark_va_out_of_sync() is for 2M page shadow
   1.886 @@ -2715,7 +2873,6 @@ static void shadow_mark_va_out_of_sync_2
   1.887          BUG();
   1.888  }
   1.889  
   1.890 -
   1.891  static int get_shadow_mfn(struct domain *d, unsigned long gpfn, unsigned long *spmfn, u32 flag)
   1.892  {
   1.893      unsigned long gmfn;
   1.894 @@ -2764,7 +2921,7 @@ static int get_shadow_mfn(struct domain 
   1.895  static void shadow_map_into_current(struct vcpu *v,
   1.896    unsigned long va, unsigned int from, unsigned int to)
   1.897  {
   1.898 -    pgentry_64_t gle, sle;
   1.899 +    pgentry_64_t gle = {0}, sle;
   1.900      unsigned long gpfn, smfn;
   1.901  
   1.902      if (from == PAGING_L1 && to == PAGING_L2) {
   1.903 @@ -2836,8 +2993,9 @@ static void shadow_set_l2e_64(unsigned l
   1.904  }
   1.905  
   1.906  
   1.907 -static void shadow_set_l1e_64(unsigned long va, pgentry_64_t *sl1e_p,
   1.908 -  int create_l1_shadow)
   1.909 +static void shadow_set_l1e_64(
   1.910 +    unsigned long va, pgentry_64_t *sl1e_p,
   1.911 +    int create_l1_shadow)
   1.912  {
   1.913      struct vcpu *v = current;
   1.914      struct domain *d = v->domain;
   1.915 @@ -2848,19 +3006,21 @@ static void shadow_set_l1e_64(unsigned l
   1.916      int i;
   1.917      unsigned long orig_va = 0;
   1.918  
   1.919 -    if (d->arch.ops->guest_paging_levels == PAGING_L2) {
   1.920 +    if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) 
   1.921 +    {
   1.922          /* This is for 32-bit VMX guest on 64-bit host */
   1.923          orig_va = va;
   1.924          va = va & (~((1<<L2_PAGETABLE_SHIFT_32)-1));
   1.925      }
   1.926  
   1.927 -    for (i = PAGING_L4; i >= PAGING_L2; i--) {
   1.928 +    for (i = PAGING_L4; i >= PAGING_L2; i--) 
   1.929 +    {
   1.930          if (!__rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i)) {
   1.931              printk("<%s> i = %d\n", __func__, i);
   1.932              BUG();
   1.933          }
   1.934 -        if (!(entry_get_flags(sle) & _PAGE_PRESENT)) {
   1.935 -            if (create_l1_shadow) {
   1.936 +        if ( !(entry_get_flags(sle) & _PAGE_PRESENT) ) {
   1.937 +            if ( create_l1_shadow ) {
   1.938                  perfc_incrc(shadow_set_l3e_force_map);
   1.939                  shadow_map_into_current(v, va, i-1, i);
   1.940                  __rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i);
   1.941 @@ -2870,12 +3030,12 @@ static void shadow_set_l1e_64(unsigned l
   1.942  #endif
   1.943              }
   1.944          }
   1.945 -        if(i < PAGING_L4)
   1.946 +        if( i < PAGING_L4 )
   1.947              shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, i));
   1.948          sle_up = sle;
   1.949      }
   1.950  
   1.951 -    if (d->arch.ops->guest_paging_levels == PAGING_L2) {
   1.952 +    if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) {
   1.953          va = orig_va;
   1.954      }
   1.955  
   1.956 @@ -2914,7 +3074,7 @@ static inline int l2e_rw_fault(
   1.957      l1_pgentry_t sl1e;
   1.958      l1_pgentry_t old_sl1e;
   1.959      l2_pgentry_t sl2e;
   1.960 -    unsigned long nx = 0;
   1.961 +    u64 nx = 0;
   1.962      int put_ref_check = 0;
   1.963      /* Check if gpfn is 2M aligned */
   1.964  
   1.965 @@ -2929,7 +3089,7 @@ static inline int l2e_rw_fault(
   1.966      l2e_remove_flags(tmp_l2e, _PAGE_PSE);
   1.967      if (l2e_get_flags(gl2e) & _PAGE_NX) {
   1.968          l2e_remove_flags(tmp_l2e, _PAGE_NX);
   1.969 -        nx = 1UL << 63;
   1.970 +        nx = 1ULL << 63;
   1.971      }
   1.972  
   1.973  
   1.974 @@ -3037,115 +3197,162 @@ static inline int l2e_rw_fault(
   1.975   * else return 0.
   1.976   */
   1.977  #if defined( GUEST_PGENTRY_32 )
   1.978 -static inline int guest_page_fault(struct vcpu *v,
   1.979 -  unsigned long va, unsigned int error_code,
   1.980 -  guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
   1.981 +static inline int guest_page_fault(
   1.982 +    struct vcpu *v,
   1.983 +    unsigned long va, unsigned int error_code,
   1.984 +    guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
   1.985  {
   1.986      /* The following check for 32-bit guest on 64-bit host */
   1.987  
   1.988      __guest_get_l2e(v, va, gpl2e);
   1.989  
   1.990      /* Check the guest L2 page-table entry first*/
   1.991 -    if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_PRESENT)))
   1.992 +    if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_PRESENT)) )
   1.993          return 1;
   1.994  
   1.995 -    if (error_code & ERROR_W) {
   1.996 -        if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_RW)))
   1.997 +    if ( error_code & ERROR_W ) 
   1.998 +    {
   1.999 +        if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_RW)) )
  1.1000              return 1;
  1.1001      }
  1.1002 -    if (error_code & ERROR_U) {
  1.1003 -        if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_USER)))
  1.1004 +
  1.1005 +    if ( error_code & ERROR_U ) 
  1.1006 +    {
  1.1007 +        if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_USER)) )
  1.1008              return 1;
  1.1009      }
  1.1010  
  1.1011 -    if (guest_l2e_get_flags(*gpl2e) & _PAGE_PSE)
  1.1012 +    if ( guest_l2e_get_flags(*gpl2e) & _PAGE_PSE )
  1.1013          return 0;
  1.1014  
  1.1015      __guest_get_l1e(v, va, gpl1e);
  1.1016  
  1.1017      /* Then check the guest L1 page-table entry */
  1.1018 -    if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_PRESENT)))
  1.1019 +    if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_PRESENT)) )
  1.1020          return 1;
  1.1021  
  1.1022 -    if (error_code & ERROR_W) {
  1.1023 -        if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_RW)))
  1.1024 +    if ( error_code & ERROR_W ) 
  1.1025 +    {
  1.1026 +        if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_RW)) )
  1.1027              return 1;
  1.1028      }
  1.1029 -    if (error_code & ERROR_U) {
  1.1030 -        if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_USER)))
  1.1031 +
  1.1032 +    if ( error_code & ERROR_U ) 
  1.1033 +    {
  1.1034 +        if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_USER)) )
  1.1035              return 1;
  1.1036      }
  1.1037  
  1.1038      return 0;
  1.1039  }
  1.1040  #else
  1.1041 -static inline int guest_page_fault(struct vcpu *v,
  1.1042 -  unsigned long va, unsigned int error_code,
  1.1043 -  guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
  1.1044 +static inline int guest_page_fault(
  1.1045 +    struct vcpu *v,
  1.1046 +    unsigned long va, unsigned int error_code,
  1.1047 +    guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
  1.1048  {
  1.1049      struct domain *d = v->domain;
  1.1050 -    pgentry_64_t gle, *lva;
  1.1051 -    unsigned long mfn;
  1.1052 +    pgentry_64_t gle;
  1.1053 +    unsigned long gpfn = 0, mfn;
  1.1054      int i;
  1.1055  
  1.1056 -    __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | PAGING_L4);
  1.1057 -    if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)))
  1.1058 -        return 1;
  1.1059 -
  1.1060 -    if (error_code & ERROR_W) {
  1.1061 -        if (unlikely(!(entry_get_flags(gle) & _PAGE_RW)))
  1.1062 +    ASSERT( d->arch.ops->guest_paging_levels >= PAGING_L3 );
  1.1063 +
  1.1064 +#if CONFIG_PAGING_LEVELS == 4
  1.1065 +    if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) 
  1.1066 +    {
  1.1067 +        __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | PAGING_L4);
  1.1068 +        if ( unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)) )
  1.1069              return 1;
  1.1070 +
  1.1071 +        if ( error_code & ERROR_W )
  1.1072 +        {
  1.1073 +            if ( unlikely(!(entry_get_flags(gle) & _PAGE_RW)) )
  1.1074 +                return 1;
  1.1075 +        }
  1.1076 +
  1.1077 +        if ( error_code & ERROR_U )
  1.1078 +        {
  1.1079 +            if ( unlikely(!(entry_get_flags(gle) & _PAGE_USER)) )
  1.1080 +                return 1;
  1.1081 +        }
  1.1082 +        gpfn = entry_get_pfn(gle);
  1.1083      }
  1.1084 -    if (error_code & ERROR_U) {
  1.1085 -        if (unlikely(!(entry_get_flags(gle) & _PAGE_USER)))
  1.1086 -            return 1;
  1.1087 +#endif
  1.1088 +
  1.1089 +#if CONFIG_PAGING_LEVELS >= 3
  1.1090 +    if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) 
  1.1091 +    {
  1.1092 +        gpfn = pagetable_get_pfn(v->arch.guest_table);
  1.1093      }
  1.1094 -    for (i = PAGING_L3; i >= PAGING_L1; i--) {
  1.1095 +#endif
  1.1096 +
  1.1097 +    for ( i = PAGING_L3; i >= PAGING_L1; i-- ) 
  1.1098 +    {
  1.1099 +        pgentry_64_t *lva;
  1.1100          /*
  1.1101           * If it's not external mode, then mfn should be machine physical.
  1.1102           */
  1.1103 -        mfn = __gpfn_to_mfn(d, (entry_get_value(gle) >> PAGE_SHIFT));
  1.1104 -
  1.1105 -        lva = (pgentry_64_t *) phys_to_virt(
  1.1106 -          mfn << PAGE_SHIFT);
  1.1107 +        mfn = __gpfn_to_mfn(d, gpfn);
  1.1108 +
  1.1109 +        lva = (pgentry_64_t *) map_domain_page(mfn);
  1.1110          gle = lva[table_offset_64(va, i)];
  1.1111 -
  1.1112 -        if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)))
  1.1113 +        unmap_domain_page(lva);
  1.1114 +
  1.1115 +        gpfn = entry_get_pfn(gle);
  1.1116 +
  1.1117 +        if ( unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)) )
  1.1118              return 1;
  1.1119  
  1.1120 -        if (error_code & ERROR_W) {
  1.1121 -            if (unlikely(!(entry_get_flags(gle) & _PAGE_RW)))
  1.1122 -                return 1;
  1.1123 +        if ( i < PAGING_L3 ) 
  1.1124 +        {
  1.1125 +            if ( error_code & ERROR_W ) 
  1.1126 +            {
  1.1127 +                if ( unlikely(!(entry_get_flags(gle) & _PAGE_RW)) ) 
  1.1128 +                {
  1.1129 +                    if ( i == PAGING_L1 )
  1.1130 +                        if ( gpl1e )
  1.1131 +                            gpl1e->l1 = gle.lo;
  1.1132 +                    return 1;
  1.1133 +                }
  1.1134 +            }
  1.1135 +            if ( error_code & ERROR_U ) 
  1.1136 +            {
  1.1137 +                if ( unlikely(!(entry_get_flags(gle) & _PAGE_USER)) )
  1.1138 +                    return 1;
  1.1139 +            }
  1.1140          }
  1.1141 -        if (error_code & ERROR_U) {
  1.1142 -            if (unlikely(!(entry_get_flags(gle) & _PAGE_USER)))
  1.1143 -                return 1;
  1.1144 -        }
  1.1145 -
  1.1146 -        if (i == PAGING_L2) {
  1.1147 -            if (gpl2e)
  1.1148 +
  1.1149 +        if ( i == PAGING_L2 ) 
  1.1150 +        {
  1.1151 +            if ( gpl2e )
  1.1152                  gpl2e->l2 = gle.lo;
  1.1153 -
  1.1154 -            if (likely(entry_get_flags(gle) & _PAGE_PSE))
  1.1155 +            if ( likely(entry_get_flags(gle) & _PAGE_PSE) )
  1.1156                  return 0;
  1.1157 -
  1.1158          }
  1.1159  
  1.1160 -        if (i == PAGING_L1)
  1.1161 -            if (gpl1e)
  1.1162 +        if ( i == PAGING_L1 )
  1.1163 +            if ( gpl1e )
  1.1164                  gpl1e->l1 = gle.lo;
  1.1165      }
  1.1166 +
  1.1167      return 0;
  1.1168 +
  1.1169  }
  1.1170  #endif
  1.1171 +
  1.1172  static int shadow_fault_64(unsigned long va, struct cpu_user_regs *regs)
  1.1173  {
  1.1174      struct vcpu *v = current;
  1.1175      struct domain *d = v->domain;
  1.1176      guest_l2_pgentry_t gl2e;
  1.1177 -    guest_l1_pgentry_t gl1e;
  1.1178 +    guest_l1_pgentry_t gl1e, orig_gl1e;
  1.1179      l1_pgentry_t sl1e;
  1.1180  
  1.1181 +    gl1e = guest_l1e_empty(); gl2e = guest_l2e_empty();
  1.1182 +
  1.1183 +    sl1e = l1e_empty();
  1.1184 +
  1.1185      perfc_incrc(shadow_fault_calls);
  1.1186  
  1.1187      ESH_LOG("<shadow_fault_64> va=%lx,  rip = %lx, error code = %x\n",
  1.1188 @@ -3156,7 +3363,7 @@ static int shadow_fault_64(unsigned long
  1.1189       */
  1.1190      shadow_lock(d);
  1.1191  
  1.1192 -    /* XXX - FIX THIS COMMENT!!!
  1.1193 +    /*
  1.1194       * STEP 1. Check to see if this fault might have been caused by an
  1.1195       *         out-of-sync table page entry, or if we should pass this
  1.1196       *         fault onto the guest.
  1.1197 @@ -3166,67 +3373,122 @@ static int shadow_fault_64(unsigned long
  1.1198      /*
  1.1199       * STEP 2. Check if the fault belongs to guest
  1.1200       */
  1.1201 -    if ( guest_page_fault(
  1.1202 -            v, va, regs->error_code, &gl2e, &gl1e) ) {
  1.1203 +    if ( guest_page_fault(v, va, regs->error_code, &gl2e, &gl1e) ) 
  1.1204 +    {
  1.1205 +        if ( unlikely(shadow_mode_log_dirty(d)) && l1e_get_intpte(gl1e) != 0 )
  1.1206 +            goto check_writeable;
  1.1207 +        
  1.1208          goto fail;
  1.1209      }
  1.1210  
  1.1211 -    if ( unlikely(!(guest_l2e_get_flags(gl2e) & _PAGE_PSE)) ) {
  1.1212 -        /*
  1.1213 -         * Handle 4K pages here
  1.1214 -         */
  1.1215 -
  1.1216 -        /* Write fault? */
  1.1217 -        if ( regs->error_code & 2 ) {
  1.1218 -            if ( !l1pte_write_fault(v, &gl1e, &sl1e, va) ) {
  1.1219 +    if ( unlikely((guest_l2e_get_flags(gl2e) & _PAGE_PSE)) ) 
  1.1220 +        goto pse;
  1.1221 +
  1.1222 +    /*
  1.1223 +     * Handle 4K pages here
  1.1224 +     */
  1.1225 +check_writeable:
  1.1226 +    orig_gl1e = gl1e;
  1.1227 +    
  1.1228 +    /* Write fault? */
  1.1229 +    if ( regs->error_code & 2 ) 
  1.1230 +    {
  1.1231 +        int allow_writes = 0;
  1.1232 +
  1.1233 +        if ( unlikely(!(guest_l1e_get_flags(gl1e) & _PAGE_RW)) )
  1.1234 +        {
  1.1235 +            if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gl1e)) )
  1.1236 +            {
  1.1237 +                allow_writes = 1;
  1.1238 +                l1e_add_flags(gl1e, _PAGE_RW);
  1.1239 +            }
  1.1240 +            else
  1.1241 +            {
  1.1242 +                /* Write fault on a read-only mapping. */
  1.1243 +                SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")", 
  1.1244 +                         l1e_get_intpte(gl1e));
  1.1245 +                perfc_incrc(shadow_fault_bail_ro_mapping);
  1.1246                  goto fail;
  1.1247              }
  1.1248 -        } else {
  1.1249 -            l1pte_read_fault(d, &gl1e, &sl1e);
  1.1250 +        }
  1.1251 +
  1.1252 +        if ( !l1pte_write_fault(v, &gl1e, &sl1e, va) ) 
  1.1253 +        {
  1.1254 +            SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
  1.1255 +            perfc_incrc(write_fault_bail);
  1.1256 +            shadow_unlock(d);
  1.1257 +            return 0;
  1.1258          }
  1.1259 -        /*
  1.1260 -         * STEP 3. Write guest/shadow l2e back
  1.1261 -         */
  1.1262 -        if (unlikely(!__guest_set_l1e(v, va, &gl1e))) {
  1.1263 + 
  1.1264 +        if (allow_writes)
  1.1265 +            l1e_remove_flags(gl1e, _PAGE_RW);
  1.1266 +    }
  1.1267 +    else 
  1.1268 +    {
  1.1269 +        if ( !l1pte_read_fault(d, &gl1e, &sl1e) )
  1.1270 +        {
  1.1271 +            SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
  1.1272 +            perfc_incrc(read_fault_bail);
  1.1273 +            shadow_unlock(d);
  1.1274 +            return 0;
  1.1275 +        }
  1.1276 +    }
  1.1277 +
  1.1278 +    /*
  1.1279 +     * STEP 3. Write the modified shadow PTE and guest PTE back to the tables
  1.1280 +     */
  1.1281 +    if ( l1e_has_changed(orig_gl1e, gl1e, PAGE_FLAG_MASK) )
  1.1282 +    {
  1.1283 +        if (unlikely(!__guest_set_l1e(v, va, &gl1e))) 
  1.1284              domain_crash_synchronous();
  1.1285 -        }
  1.1286 -
  1.1287 -        ESH_LOG("gl1e: %lx, sl1e: %lx\n", l1e_get_intpte(gl1e), l1e_get_intpte(sl1e));
  1.1288 -        shadow_set_l1e_64(va, (pgentry_64_t *)&sl1e, 1);
  1.1289 -        /*
  1.1290 -         *  if necessary, record the page table page as dirty
  1.1291 -         */
  1.1292 -         if ( unlikely(shadow_mode_log_dirty(d)) )
  1.1293 +
  1.1294 +        // if necessary, record the page table page as dirty
  1.1295 +        if ( unlikely(shadow_mode_log_dirty(d)) )
  1.1296              __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gl2e)));
  1.1297 -
  1.1298 -    } else {
  1.1299 -        /*
  1.1300 -         * Handle 2M pages here
  1.1301 -         */
  1.1302 -        /* Write fault? */
  1.1303 -        if ( regs->error_code & 2 ) {
  1.1304 -            if ( !l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, WRITE_FAULT) ) {
  1.1305 -                goto fail;
  1.1306 -            }
  1.1307 -        } else {
  1.1308 -            l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, READ_FAULT);
  1.1309 +    }
  1.1310 +
  1.1311 +    shadow_set_l1e_64(va, (pgentry_64_t *)&sl1e, 1);
  1.1312 +
  1.1313 +    perfc_incrc(shadow_fault_fixed);
  1.1314 +    d->arch.shadow_fault_count++;
  1.1315 +
  1.1316 +    shadow_unlock(d);
  1.1317 +
  1.1318 +    return EXCRET_fault_fixed;
  1.1319 +
  1.1320 +pse:
  1.1321 +    /*
  1.1322 +     * Handle 2M pages here
  1.1323 +     */
  1.1324 +    if ( unlikely(!shadow_mode_external(d)) )
  1.1325 +        BUG();
  1.1326 +
  1.1327 +    /* Write fault? */
  1.1328 +    if ( regs->error_code & 2 ) 
  1.1329 +    {
  1.1330 +        if ( !l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, WRITE_FAULT) ) 
  1.1331 +        {
  1.1332 +            goto fail;
  1.1333          }
  1.1334 -
  1.1335 -        /*
  1.1336 -         * STEP 3. Write guest/shadow l2e back
  1.1337 -         */
  1.1338 -
  1.1339 -        if ( unlikely(!__guest_set_l2e(v, va, &gl2e)) ) {
  1.1340 -            domain_crash_synchronous();
  1.1341 -        }
  1.1342 -
  1.1343 -        /*
  1.1344 -         * Todo: if necessary, record the page table page as dirty
  1.1345 -         */
  1.1346 -
  1.1347 -
  1.1348 +    } 
  1.1349 +    else 
  1.1350 +    {
  1.1351 +        l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, READ_FAULT);
  1.1352      }
  1.1353  
  1.1354 +    /*
  1.1355 +     * STEP 3. Write guest/shadow l2e back
  1.1356 +     */
  1.1357 +
  1.1358 +    if ( unlikely(!__guest_set_l2e(v, va, &gl2e)) ) 
  1.1359 +    {
  1.1360 +        domain_crash_synchronous();
  1.1361 +    }
  1.1362 +
  1.1363 +    /*
  1.1364 +     * Todo: if necessary, record the page table page as dirty
  1.1365 +     */
  1.1366 +
  1.1367      perfc_incrc(shadow_fault_fixed);
  1.1368      d->arch.shadow_fault_count++;
  1.1369  
  1.1370 @@ -3257,6 +3519,7 @@ static void shadow_invlpg_64(struct vcpu
  1.1371      shadow_unlock(d);
  1.1372  }
  1.1373  
  1.1374 +#if CONFIG_PAGING_LEVELS == 4
  1.1375  static unsigned long gva_to_gpa_64(unsigned long gva)
  1.1376  {
  1.1377      struct vcpu *v = current;
  1.1378 @@ -3273,13 +3536,11 @@ static unsigned long gva_to_gpa_64(unsig
  1.1379          gpa = guest_l1e_get_paddr(gl1e) + (gva & ~PAGE_MASK);
  1.1380  
  1.1381      return gpa;
  1.1382 -
  1.1383  }
  1.1384  
  1.1385  #ifndef GUEST_PGENTRY_32
  1.1386 -
  1.1387  struct shadow_ops MODE_F_HANDLER = {
  1.1388 -    .guest_paging_levels              = 4,
  1.1389 +    .guest_paging_levels        = 4,
  1.1390      .invlpg                     = shadow_invlpg_64,
  1.1391      .fault                      = shadow_fault_64,
  1.1392      .update_pagetables          = shadow_update_pagetables,
  1.1393 @@ -3290,9 +3551,11 @@ struct shadow_ops MODE_F_HANDLER = {
  1.1394      .is_out_of_sync             = is_out_of_sync,
  1.1395      .gva_to_gpa                 = gva_to_gpa_64,
  1.1396  };
  1.1397 -#endif
  1.1398 -
  1.1399 -#endif
  1.1400 +#endif /* GUEST_PGENTRY_32 */
  1.1401 +#endif /* CONFIG_PAGING_LEVELS == 4 */
  1.1402 +
  1.1403 +#endif /* CONFIG_PAGING_LEVELS >= 3 */
  1.1404 +
  1.1405  
  1.1406  #if CONFIG_PAGING_LEVELS == 2
  1.1407  struct shadow_ops MODE_A_HANDLER = {
  1.1408 @@ -3309,10 +3572,11 @@ struct shadow_ops MODE_A_HANDLER = {
  1.1409  };
  1.1410  
  1.1411  #elif CONFIG_PAGING_LEVELS == 3
  1.1412 +
  1.1413  struct shadow_ops MODE_B_HANDLER = {
  1.1414 -    .guest_paging_levels              = 3,
  1.1415 -    .invlpg                     = shadow_invlpg_32,
  1.1416 -    .fault                      = shadow_fault_32,
  1.1417 +    .guest_paging_levels        = 3,
  1.1418 +    .invlpg                     = shadow_invlpg_64,
  1.1419 +    .fault                      = shadow_fault_64,
  1.1420      .update_pagetables          = shadow_update_pagetables,
  1.1421      .sync_all                   = sync_all,
  1.1422      .remove_all_write_access    = remove_all_write_access,
     2.1 --- a/xen/arch/x86/shadow32.c	Fri Nov 11 19:02:49 2005 +0100
     2.2 +++ b/xen/arch/x86/shadow32.c	Fri Nov 11 19:11:13 2005 +0100
     2.3 @@ -31,6 +31,8 @@
     2.4  #include <xen/trace.h>
     2.5  
     2.6  #define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
     2.7 +#define va_to_l1mfn(_ed, _va) \
     2.8 +    (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
     2.9  
    2.10  static void shadow_free_snapshot(struct domain *d,
    2.11                                   struct out_of_sync_entry *entry);
     3.1 --- a/xen/arch/x86/shadow_public.c	Fri Nov 11 19:02:49 2005 +0100
     3.2 +++ b/xen/arch/x86/shadow_public.c	Fri Nov 11 19:11:13 2005 +0100
     3.3 @@ -64,6 +64,9 @@ int shadow_set_guest_paging_levels(struc
     3.4  #if CONFIG_PAGING_LEVELS == 2
     3.5          if ( d->arch.ops != &MODE_A_HANDLER )
     3.6              d->arch.ops = &MODE_A_HANDLER;
     3.7 +#elif CONFIG_PAGING_LEVELS == 3
     3.8 +        if ( d->arch.ops != &MODE_B_HANDLER )
     3.9 +            d->arch.ops = &MODE_B_HANDLER;
    3.10  #elif CONFIG_PAGING_LEVELS == 4
    3.11          if ( d->arch.ops != &MODE_D_HANDLER )
    3.12              d->arch.ops = &MODE_D_HANDLER;
    3.13 @@ -138,7 +141,92 @@ unsigned long gva_to_gpa(unsigned long g
    3.14  }
    3.15  /****************************************************************************/
    3.16  /****************************************************************************/
    3.17 -#if CONFIG_PAGING_LEVELS >= 4
    3.18 +#if CONFIG_PAGING_LEVELS >= 3
    3.19 +
    3.20 +static void inline
    3.21 +free_shadow_fl1_table(struct domain *d, unsigned long smfn)
    3.22 +{
    3.23 +    l1_pgentry_t *pl1e = map_domain_page(smfn);
    3.24 +    int i;
    3.25 +
    3.26 +    for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
    3.27 +        put_page_from_l1e(pl1e[i], d);
    3.28 +}
    3.29 +
    3.30 +/*
    3.31 + * Free l2, l3, l4 shadow tables
    3.32 + */
    3.33 +
    3.34 +void free_fake_shadow_l2(struct domain *d,unsigned long smfn);
    3.35 +
    3.36 +static void inline
    3.37 +free_shadow_tables(struct domain *d, unsigned long smfn, u32 level)
    3.38 +{
    3.39 +    pgentry_64_t *ple = map_domain_page(smfn);
    3.40 +    int i, external = shadow_mode_external(d);
    3.41 +
    3.42 +#if CONFIG_PAGING_LEVELS >=3
    3.43 +    if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
    3.44 +    {
    3.45 +        struct pfn_info *page = &frame_table[smfn];
    3.46 +        for ( i = 0; i < PDP_ENTRIES; i++ )
    3.47 +        {
    3.48 +            if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
    3.49 +                free_fake_shadow_l2(d,entry_get_pfn(ple[i]));
    3.50 +        }
    3.51 +
    3.52 +        page = &frame_table[entry_get_pfn(ple[0])];
    3.53 +        free_domheap_pages(page, SL2_ORDER);
    3.54 +        unmap_domain_page(ple);
    3.55 +    }
    3.56 +    else
    3.57 +#endif
    3.58 +    {
    3.59 +        /*
    3.60 +         * No Xen mappings in external pages
    3.61 +         */
    3.62 +        if ( external )
    3.63 +        {
    3.64 +            for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
    3.65 +                if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
    3.66 +                    put_shadow_ref(entry_get_pfn(ple[i]));
    3.67 +        } 
    3.68 +        else
    3.69 +        {
    3.70 +            for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
    3.71 +            {
    3.72 +                /* 
    3.73 +                 * List the skip/break conditions to avoid freeing
    3.74 +                 * Xen private mappings.
    3.75 +                 */
    3.76 +#if CONFIG_PAGING_LEVELS == 2
    3.77 +                if ( level == PAGING_L2 && !is_guest_l2_slot(0, i) )
    3.78 +                    continue;
    3.79 +#endif
    3.80 +#if CONFIG_PAGING_LEVELS == 3
    3.81 +                if ( level == PAGING_L3 && i == L3_PAGETABLE_ENTRIES )
    3.82 +                    break;
    3.83 +                if ( level == PAGING_L2 )
    3.84 +                {
    3.85 +                    struct pfn_info *page = &frame_table[smfn]; 
    3.86 +                    if ( is_xen_l2_slot(page->u.inuse.type_info, i) )
    3.87 +                        continue;
    3.88 +                }
    3.89 +#endif
    3.90 +#if CONFIG_PAGING_LEVELS == 4
    3.91 +                if ( level == PAGING_L4 && !is_guest_l4_slot(i))
    3.92 +                    continue;
    3.93 +#endif
    3.94 +                if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
    3.95 +                    put_shadow_ref(entry_get_pfn(ple[i]));
    3.96 +            }
    3.97 +        }
    3.98 +        unmap_domain_page(ple);
    3.99 +    }
   3.100 +}
   3.101 +#endif
   3.102 +
   3.103 +#if CONFIG_PAGING_LEVELS == 4
   3.104  /*
   3.105   * Convert PAE 3-level page-table to 4-level page-table
   3.106   */
   3.107 @@ -203,55 +291,6 @@ static void alloc_monitor_pagetable(stru
   3.108      v->arch.monitor_vtable = (l2_pgentry_t *) mpl4e;
   3.109  }
   3.110  
   3.111 -static void inline
   3.112 -free_shadow_fl1_table(struct domain *d, unsigned long smfn)
   3.113 -{
   3.114 -    l1_pgentry_t *pl1e = map_domain_page(smfn);
   3.115 -    int i;
   3.116 -
   3.117 -    for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
   3.118 -        put_page_from_l1e(pl1e[i], d);
   3.119 -}
   3.120 -
   3.121 -/*
   3.122 - * Free l2, l3, l4 shadow tables
   3.123 - */
   3.124 -
   3.125 -void free_fake_shadow_l2(struct domain *d,unsigned long smfn);
   3.126 -
   3.127 -static void inline
   3.128 -free_shadow_tables(struct domain *d, unsigned long smfn, u32 level)
   3.129 -{
   3.130 -    pgentry_64_t *ple = map_domain_page(smfn);
   3.131 -    int i, external = shadow_mode_external(d);
   3.132 -    struct pfn_info *page = &frame_table[smfn];
   3.133 -
   3.134 -    if (d->arch.ops->guest_paging_levels == PAGING_L2)
   3.135 -    {
   3.136 -#if CONFIG_PAGING_LEVELS >=4
   3.137 -        for ( i = 0; i < PDP_ENTRIES; i++ )
   3.138 -        {
   3.139 -            if (entry_get_flags(ple[i]) & _PAGE_PRESENT )
   3.140 -                free_fake_shadow_l2(d,entry_get_pfn(ple[i]));
   3.141 -        }
   3.142 -   
   3.143 -        page = &frame_table[entry_get_pfn(ple[0])];
   3.144 -        free_domheap_pages(page, SL2_ORDER);
   3.145 -        unmap_domain_page(ple);
   3.146 -#endif
   3.147 -    }
   3.148 -    else
   3.149 -    {
   3.150 -        for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
   3.151 -            if ( external || is_guest_l4_slot(i) )
   3.152 -                if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
   3.153 -                    put_shadow_ref(entry_get_pfn(ple[i]));
   3.154 -
   3.155 -        unmap_domain_page(ple);
   3.156 -    }
   3.157 -}
   3.158 -
   3.159 -
   3.160  void free_monitor_pagetable(struct vcpu *v)
   3.161  {
   3.162      unsigned long mfn;
   3.163 @@ -299,11 +338,9 @@ static void alloc_monitor_pagetable(stru
   3.164      mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
   3.165      memset(mpl2e, 0, PAGE_SIZE);
   3.166  
   3.167 -#ifdef __i386__ /* XXX screws x86/64 build */
   3.168      memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
   3.169             &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
   3.170             HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
   3.171 -#endif
   3.172  
   3.173      mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
   3.174          l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
   3.175 @@ -333,7 +370,7 @@ void free_monitor_pagetable(struct vcpu 
   3.176      unsigned long mfn;
   3.177  
   3.178      ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
   3.179 -    
   3.180 +
   3.181      mpl2e = v->arch.monitor_vtable;
   3.182  
   3.183      /*
   3.184 @@ -517,13 +554,11 @@ free_shadow_hl2_table(struct domain *d, 
   3.185  
   3.186      SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
   3.187  
   3.188 -#ifdef __i386__
   3.189 +#if CONFIG_PAGING_LEVELS == 2
   3.190      if ( shadow_mode_external(d) )
   3.191          limit = L2_PAGETABLE_ENTRIES;
   3.192      else
   3.193          limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
   3.194 -#else
   3.195 -    limit = 0; /* XXX x86/64 XXX */
   3.196  #endif
   3.197  
   3.198      for ( i = 0; i < limit; i++ )
   3.199 @@ -584,10 +619,11 @@ void free_shadow_page(unsigned long smfn
   3.200  
   3.201      ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
   3.202  #if CONFIG_PAGING_LEVELS >=4
   3.203 -    if (type == PGT_fl1_shadow) {
   3.204 +    if ( type == PGT_fl1_shadow ) 
   3.205 +    {
   3.206          unsigned long mfn;
   3.207          mfn = __shadow_status(d, gpfn, PGT_fl1_shadow);
   3.208 -        if (!mfn)
   3.209 +        if ( !mfn )
   3.210              gpfn |= (1UL << 63);
   3.211      }
   3.212  #endif
   3.213 @@ -602,7 +638,7 @@ void free_shadow_page(unsigned long smfn
   3.214          free_shadow_l1_table(d, smfn);
   3.215          d->arch.shadow_page_count--;
   3.216          break;
   3.217 -#if defined (__i386__)
   3.218 +#if CONFIG_PAGING_LEVELS == 2
   3.219      case PGT_l2_shadow:
   3.220          perfc_decr(shadow_l2_pages);
   3.221          shadow_demote(d, gpfn, gmfn);
   3.222 @@ -616,7 +652,8 @@ void free_shadow_page(unsigned long smfn
   3.223          free_shadow_hl2_table(d, smfn);
   3.224          d->arch.hl2_page_count--;
   3.225          break;
   3.226 -#else
   3.227 +#endif
   3.228 +#if CONFIG_PAGING_LEVELS >= 3
   3.229      case PGT_l2_shadow:
   3.230      case PGT_l3_shadow:
   3.231      case PGT_l4_shadow:
   3.232 @@ -630,7 +667,6 @@ void free_shadow_page(unsigned long smfn
   3.233          d->arch.shadow_page_count--;
   3.234          break;
   3.235  #endif
   3.236 -
   3.237      case PGT_snapshot:
   3.238          perfc_decr(apshot_pages);
   3.239          break;
   3.240 @@ -782,7 +818,7 @@ void free_shadow_pages(struct domain *d)
   3.241          }
   3.242      }
   3.243  
   3.244 -#if defined (__i386__)
   3.245 +#if CONFIG_PAGING_LEVELS == 2
   3.246      // For external shadows, remove the monitor table's refs
   3.247      //
   3.248      if ( shadow_mode_external(d) )
   3.249 @@ -928,7 +964,7 @@ int __shadow_mode_enable(struct domain *
   3.250      ASSERT(!(d->arch.shadow_mode & ~mode));
   3.251  
   3.252  #if defined(CONFIG_PAGING_LEVELS)
   3.253 -    if(!shadow_set_guest_paging_levels(d, 
   3.254 +    if(!shadow_set_guest_paging_levels(d,
   3.255                                         CONFIG_PAGING_LEVELS)) {
   3.256          printk("Unsupported guest paging levels\n");
   3.257          domain_crash_synchronous(); /* need to take a clean path */
   3.258 @@ -968,7 +1004,7 @@ int __shadow_mode_enable(struct domain *
   3.259          else
   3.260              v->arch.shadow_vtable = NULL;
   3.261          
   3.262 -#if defined (__i386__)
   3.263 +#if CONFIG_PAGING_LEVELS == 2
   3.264          /*
   3.265           * arch.hl2_vtable
   3.266           */
   3.267 @@ -1408,7 +1444,7 @@ void shadow_l1_normal_pt_update(
   3.268      sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
   3.269      if ( sl1mfn )
   3.270      {
   3.271 -        SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte,
   3.272 +        SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpde=%" PRIpte,
   3.273                   (void *)pa, l1e_get_intpte(gpte));
   3.274          l1pte_propagate_from_guest(current->domain, gpte, &spte);
   3.275  
   3.276 @@ -1447,7 +1483,7 @@ void shadow_l2_normal_pt_update(
   3.277  #if CONFIG_PAGING_LEVELS >= 3
   3.278  void shadow_l3_normal_pt_update(
   3.279      struct domain *d,
   3.280 -    unsigned long pa, l3_pgentry_t gpde,
   3.281 +    unsigned long pa, l3_pgentry_t l3e,
   3.282      struct domain_mmap_cache *cache)
   3.283  {
   3.284      unsigned long sl3mfn;
   3.285 @@ -1458,11 +1494,10 @@ void shadow_l3_normal_pt_update(
   3.286      sl3mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l3_shadow);
   3.287      if ( sl3mfn )
   3.288      {
   3.289 -        SH_VVLOG("shadow_l3_normal_pt_update pa=%p, gpde=%" PRIpte,
   3.290 -                 (void *)pa, l3e_get_intpte(gpde));
   3.291 -
   3.292 +        SH_VVLOG("shadow_l3_normal_pt_update pa=%p, l3e=%" PRIpte,
   3.293 +                 (void *)pa, l3e_get_intpte(l3e));
   3.294          spl3e = (pgentry_64_t *) map_domain_page_with_cache(sl3mfn, cache);
   3.295 -        validate_entry_change(d, (pgentry_64_t *) &gpde,
   3.296 +        validate_entry_change(d, (pgentry_64_t *) &l3e,
   3.297                                &spl3e[(pa & ~PAGE_MASK) / sizeof(l3_pgentry_t)], 
   3.298                                shadow_type_to_level(PGT_l3_shadow));
   3.299          unmap_domain_page_with_cache(spl3e, cache);
   3.300 @@ -1475,7 +1510,7 @@ void shadow_l3_normal_pt_update(
   3.301  #if CONFIG_PAGING_LEVELS >= 4
   3.302  void shadow_l4_normal_pt_update(
   3.303      struct domain *d,
   3.304 -    unsigned long pa, l4_pgentry_t gpde,
   3.305 +    unsigned long pa, l4_pgentry_t l4e,
   3.306      struct domain_mmap_cache *cache)
   3.307  {
   3.308      unsigned long sl4mfn;
   3.309 @@ -1486,11 +1521,10 @@ void shadow_l4_normal_pt_update(
   3.310      sl4mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l4_shadow);
   3.311      if ( sl4mfn )
   3.312      {
   3.313 -        SH_VVLOG("shadow_l4_normal_pt_update pa=%p, gpde=%" PRIpte,
   3.314 -                 (void *)pa, l4e_get_intpte(gpde));
   3.315 -
   3.316 +        SH_VVLOG("shadow_l4_normal_pt_update pa=%p, l4e=%" PRIpte,
   3.317 +                 (void *)pa, l4e_get_intpte(l4e));
   3.318          spl4e = (pgentry_64_t *)map_domain_page_with_cache(sl4mfn, cache);
   3.319 -        validate_entry_change(d, (pgentry_64_t *)&gpde,
   3.320 +        validate_entry_change(d, (pgentry_64_t *)&l4e,
   3.321                                &spl4e[(pa & ~PAGE_MASK) / sizeof(l4_pgentry_t)], 
   3.322                                shadow_type_to_level(PGT_l4_shadow));
   3.323          unmap_domain_page_with_cache(spl4e, cache);
   3.324 @@ -1555,8 +1589,6 @@ remove_shadow(struct domain *d, unsigned
   3.325  {
   3.326      unsigned long smfn;
   3.327  
   3.328 -    //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
   3.329 -
   3.330      shadow_lock(d);
   3.331  
   3.332      while ( stype >= PGT_l1_shadow )
     4.1 --- a/xen/include/asm-x86/page.h	Fri Nov 11 19:02:49 2005 +0100
     4.2 +++ b/xen/include/asm-x86/page.h	Fri Nov 11 19:11:13 2005 +0100
     4.3 @@ -232,9 +232,6 @@ typedef struct { u64 pfn; } pagetable_t;
     4.4  #define linear_l3_table(_ed) ((_ed)->arch.guest_vl3table)
     4.5  #define linear_l4_table(_ed) ((_ed)->arch.guest_vl4table)
     4.6  
     4.7 -#define va_to_l1mfn(_ed, _va) \
     4.8 -    (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
     4.9 -
    4.10  #ifndef __ASSEMBLY__
    4.11  #if CONFIG_PAGING_LEVELS == 3
    4.12  extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
     5.1 --- a/xen/include/asm-x86/shadow.h	Fri Nov 11 19:02:49 2005 +0100
     5.2 +++ b/xen/include/asm-x86/shadow.h	Fri Nov 11 19:11:13 2005 +0100
     5.3 @@ -138,6 +138,14 @@ extern void shadow_l2_normal_pt_update(s
     5.4                                         struct domain_mmap_cache *cache);
     5.5  #if CONFIG_PAGING_LEVELS >= 3
     5.6  #include <asm/page-guest32.h>
     5.7 +/*
     5.8 + * va_mask cannot be used because it's used by the shadow hash.
     5.9 + * Use the score area for for now.
    5.10 + */
    5.11 +#define is_xen_l2_slot(t,s)                                                    \
    5.12 +    ( ((((t) & PGT_score_mask) >> PGT_score_shift) == 3) &&                    \
    5.13 +      ((s) >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) )
    5.14 +
    5.15  extern unsigned long gva_to_gpa(unsigned long gva);
    5.16  extern void shadow_l3_normal_pt_update(struct domain *d,
    5.17                                         unsigned long pa, l3_pgentry_t l3e,
    5.18 @@ -458,7 +466,7 @@ static inline void shadow_put_page(struc
    5.19  
    5.20  /************************************************************************/
    5.21  
    5.22 -static inline int __mark_dirty(struct domain *d, unsigned int mfn)
    5.23 +static inline int __mark_dirty(struct domain *d, unsigned long mfn)
    5.24  {
    5.25      unsigned long pfn;
    5.26      int           rc = 0;
    5.27 @@ -906,7 +914,7 @@ static inline void l2pde_general(
    5.28          guest_l2e_add_flags(gpde, _PAGE_ACCESSED);
    5.29  
    5.30          *gpde_p = gpde;
    5.31 -    }
    5.32 +    } 
    5.33  
    5.34      if ( l2e_get_intpte(spde) || l2e_get_intpte(gpde) )
    5.35          SH_VVLOG("%s: gpde=%" PRIpte ", new spde=%" PRIpte, __func__,
    5.36 @@ -1355,7 +1363,7 @@ static inline void put_shadow_status(str
    5.37  }
    5.38  
    5.39  
    5.40 -static inline void delete_shadow_status( 
    5.41 +static inline void delete_shadow_status(
    5.42      struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype)
    5.43  {
    5.44      struct shadow_status *p, *x, *n, *head;
    5.45 @@ -1454,7 +1462,7 @@ static inline void set_shadow_status(
    5.46      ASSERT(stype && !(stype & ~PGT_type_mask));
    5.47  
    5.48      x = head = hash_bucket(d, gpfn);
    5.49 -   
    5.50 +
    5.51      SH_VLOG("set gpfn=%lx smfn=%lx t=%lx bucket=%p(%p)",
    5.52               gpfn, smfn, stype, x, x->next);
    5.53      shadow_audit(d, 0);
    5.54 @@ -1584,7 +1592,7 @@ shadow_set_l1e(unsigned long va, l1_pgen
    5.55  {
    5.56      struct vcpu *v = current;
    5.57      struct domain *d = v->domain;
    5.58 -    l2_pgentry_t sl2e;
    5.59 +    l2_pgentry_t sl2e = {0};
    5.60  
    5.61      __shadow_get_l2e(v, va, &sl2e);
    5.62      if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
    5.63 @@ -1731,7 +1739,7 @@ static inline void update_pagetables(str
    5.64  #ifdef CONFIG_VMX
    5.65      if ( VMX_DOMAIN(v) )
    5.66          paging_enabled = vmx_paging_enabled(v);
    5.67 -            
    5.68 +
    5.69      else
    5.70  #endif
    5.71          // HACK ALERT: there's currently no easy way to figure out if a domU
    5.72 @@ -1757,7 +1765,7 @@ static inline void update_pagetables(str
    5.73          if ( shadow_mode_enabled(d) )
    5.74              v->arch.monitor_table = v->arch.shadow_table;
    5.75          else
    5.76 -#ifdef __x86_64__
    5.77 +#if CONFIG_PAGING_LEVELS == 4
    5.78          if ( !(v->arch.flags & TF_kernel_mode) )
    5.79              v->arch.monitor_table = v->arch.guest_table_user;
    5.80          else
     6.1 --- a/xen/include/asm-x86/shadow_64.h	Fri Nov 11 19:02:49 2005 +0100
     6.2 +++ b/xen/include/asm-x86/shadow_64.h	Fri Nov 11 19:11:13 2005 +0100
     6.3 @@ -29,6 +29,15 @@
     6.4  #include <asm/shadow.h>
     6.5  #include <asm/shadow_ops.h>
     6.6  
     6.7 +extern struct shadow_ops MODE_B_HANDLER;
     6.8 +
     6.9 +#if CONFIG_PAGING_LEVELS == 3
    6.10 +#define L4_PAGETABLE_SHIFT      39
    6.11 +#define L4_PAGETABLE_ENTRIES    (1<<PAGETABLE_ORDER)
    6.12 +typedef struct { intpte_t l4; } l4_pgentry_t;
    6.13 +#define is_guest_l4_slot(_s) (1)
    6.14 +#endif
    6.15 +
    6.16  #define READ_FAULT  0
    6.17  #define WRITE_FAULT 1
    6.18  
    6.19 @@ -94,6 +103,11 @@ static inline int  table_offset_64(unsig
    6.20              return  (((va) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1));
    6.21          case 3:
    6.22              return  (((va) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1));
    6.23 +#if CONFIG_PAGING_LEVELS == 3
    6.24 +        case 4:
    6.25 +            return PAE_SHADOW_SELF_ENTRY;
    6.26 +#endif
    6.27 +
    6.28  #if CONFIG_PAGING_LEVELS >= 4
    6.29  #ifndef GUEST_PGENTRY_32
    6.30          case 4:
    6.31 @@ -127,57 +141,73 @@ static inline void free_out_of_sync_stat
    6.32      }
    6.33  }
    6.34  
    6.35 -static inline pgentry_64_t *__entry(
    6.36 -    struct vcpu *v, u64 va, u32 flag)
    6.37 +static inline int __entry(
    6.38 +    struct vcpu *v, u64 va, pgentry_64_t *e_p, u32 flag)
    6.39  {
    6.40      int i;
    6.41      pgentry_64_t *le_e;
    6.42 -    pgentry_64_t *le_p;
    6.43 +    pgentry_64_t *le_p = NULL;
    6.44      unsigned long mfn;
    6.45      int index;
    6.46      u32 level = flag & L_MASK;
    6.47      struct domain *d = v->domain;
    6.48 +    int root_level;
    6.49  
    6.50 -    index = table_offset_64(va, ROOT_LEVEL_64);
    6.51 -    if (flag & SHADOW_ENTRY)
    6.52 +    if ( flag & SHADOW_ENTRY )
    6.53 +    {
    6.54 +	root_level =  ROOT_LEVEL_64;
    6.55 +	index = table_offset_64(va, root_level);
    6.56          le_e = (pgentry_64_t *)&v->arch.shadow_vtable[index];
    6.57 -    else
    6.58 +    }
    6.59 +    else /* guest entry */  
    6.60 +    {
    6.61 +        root_level = v->domain->arch.ops->guest_paging_levels;
    6.62 +	index = table_offset_64(va, root_level);
    6.63          le_e = (pgentry_64_t *)&v->arch.guest_vtable[index];
    6.64 -
    6.65 +    }
    6.66      /*
    6.67       * If it's not external mode, then mfn should be machine physical.
    6.68       */
    6.69 -    for (i = ROOT_LEVEL_64 - level; i > 0; i--) {
    6.70 -        if (unlikely(!(entry_get_flags(*le_e) & _PAGE_PRESENT)))
    6.71 -            return NULL;
    6.72 -        mfn = entry_get_value(*le_e) >> PAGE_SHIFT;
    6.73 -        if ((flag & GUEST_ENTRY) && shadow_mode_translate(d))
    6.74 +    for (i = root_level - level; i > 0; i--) {
    6.75 +        if ( unlikely(!(entry_get_flags(*le_e) & _PAGE_PRESENT)) ) {
    6.76 +            if ( le_p )
    6.77 +                unmap_domain_page(le_p);
    6.78 +            return 0;
    6.79 +        }
    6.80 +        mfn = entry_get_pfn(*le_e);
    6.81 +        if ( (flag & GUEST_ENTRY) && shadow_mode_translate(d) )
    6.82              mfn = get_mfn_from_pfn(mfn);
    6.83 -        le_p = (pgentry_64_t *)phys_to_virt(mfn << PAGE_SHIFT);
    6.84 +        if ( le_p )
    6.85 +            unmap_domain_page(le_p);
    6.86 +        le_p = (pgentry_64_t *)map_domain_page(mfn);
    6.87          index = table_offset_64(va, (level + i - 1));
    6.88          le_e = &le_p[index];
    6.89 +    }
    6.90  
    6.91 -    }
    6.92 -    return le_e;
    6.93 +    if ( flag & SET_ENTRY )
    6.94 +        *le_e = *e_p;
    6.95 +    else
    6.96 +        *e_p = *le_e;
    6.97 +
    6.98 +    if ( le_p )
    6.99 +        unmap_domain_page(le_p);
   6.100 +
   6.101 +    return 1;
   6.102  
   6.103  }
   6.104  
   6.105 -static inline pgentry_64_t *__rw_entry(
   6.106 -    struct vcpu *ed, u64 va, void *e_p, u32 flag)
   6.107 +static inline int __rw_entry(
   6.108 +    struct vcpu *v, u64 va, void *e_p, u32 flag)
   6.109  {
   6.110 -    pgentry_64_t *le_e = __entry(ed, va, flag);
   6.111      pgentry_64_t *e = (pgentry_64_t *)e_p;
   6.112 -    if (le_e == NULL)
   6.113 -        return NULL;
   6.114  
   6.115      if (e) {
   6.116 -        if (flag & SET_ENTRY)
   6.117 -            *le_e = *e;
   6.118 -        else
   6.119 -            *e = *le_e;
   6.120 +        return __entry(v, va, e, flag);
   6.121      }
   6.122 -    return le_e;
   6.123 +
   6.124 +    return 0;
   6.125  }
   6.126 +
   6.127  #define __shadow_set_l4e(v, va, value) \
   6.128    __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L4)
   6.129  #define __shadow_get_l4e(v, va, sl4e) \
   6.130 @@ -204,7 +234,7 @@ static inline pgentry_64_t *__rw_entry(
   6.131  #define __guest_get_l3e(v, va, sl3e) \
   6.132    __rw_entry(v, va, gl3e, GUEST_ENTRY | GET_ENTRY | PAGING_L3)
   6.133  
   6.134 -static inline void *  __guest_set_l2e(
   6.135 +static inline int  __guest_set_l2e(
   6.136      struct vcpu *v, u64 va, void *value, int size)
   6.137  {
   6.138      switch(size) {
   6.139 @@ -216,21 +246,21 @@ static inline void *  __guest_set_l2e(
   6.140                  l2va = (l2_pgentry_32_t *)v->arch.guest_vtable;
   6.141                  if (value)
   6.142                      l2va[l2_table_offset_32(va)] = *(l2_pgentry_32_t *)value;
   6.143 -                return &l2va[l2_table_offset_32(va)];
   6.144 +                return 1;
   6.145              }
   6.146          case 8:
   6.147              return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L2);
   6.148          default:
   6.149              BUG();
   6.150 -            return NULL;
   6.151 +            return 0;
   6.152      }
   6.153 -    return NULL;
   6.154 +    return 0;
   6.155  }
   6.156  
   6.157  #define __guest_set_l2e(v, va, value) \
   6.158 -  ( __typeof__(value) )__guest_set_l2e(v, (u64)va, value, sizeof(*value))
   6.159 +    __guest_set_l2e(v, (u64)va, value, sizeof(*value))
   6.160  
   6.161 -static inline void * __guest_get_l2e(
   6.162 +static inline int  __guest_get_l2e(
   6.163    struct vcpu *v, u64 va, void *gl2e, int size)
   6.164  {
   6.165      switch(size) {
   6.166 @@ -241,21 +271,21 @@ static inline void * __guest_get_l2e(
   6.167                  l2va = (l2_pgentry_32_t *)v->arch.guest_vtable;
   6.168                  if (gl2e)
   6.169                      *(l2_pgentry_32_t *)gl2e = l2va[l2_table_offset_32(va)];
   6.170 -                return &l2va[l2_table_offset_32(va)];
   6.171 +                return 1;
   6.172              }
   6.173          case 8:
   6.174              return __rw_entry(v, va, gl2e, GUEST_ENTRY | GET_ENTRY | PAGING_L2);
   6.175          default:
   6.176              BUG();
   6.177 -            return NULL;
   6.178 +            return 0;
   6.179      }
   6.180 -    return NULL;
   6.181 +    return 0;
   6.182  }
   6.183  
   6.184  #define __guest_get_l2e(v, va, gl2e) \
   6.185 -  (__typeof__ (gl2e))__guest_get_l2e(v, (u64)va, gl2e, sizeof(*gl2e))
   6.186 +    __guest_get_l2e(v, (u64)va, gl2e, sizeof(*gl2e))
   6.187  
   6.188 -static inline void *  __guest_set_l1e(
   6.189 +static inline int  __guest_set_l1e(
   6.190    struct vcpu *v, u64 va, void *value, int size)
   6.191  {
   6.192      switch(size) {
   6.193 @@ -267,34 +297,34 @@ static inline void *  __guest_set_l1e(
   6.194                  unsigned long l1mfn;
   6.195  
   6.196                  if (!__guest_get_l2e(v, va, &gl2e))
   6.197 -                    return NULL;
   6.198 +                    return 0;
   6.199                  if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT)))
   6.200 -                    return NULL;
   6.201 +                    return 0;
   6.202  
   6.203                  l1mfn = get_mfn_from_pfn(
   6.204                    l2e_get_pfn(gl2e));
   6.205  
   6.206 -                l1va = (l1_pgentry_32_t *)
   6.207 -                  phys_to_virt(l1mfn << L1_PAGETABLE_SHIFT);
   6.208 +                l1va = (l1_pgentry_32_t *)map_domain_page(l1mfn);
   6.209                  if (value)
   6.210                      l1va[l1_table_offset_32(va)] = *(l1_pgentry_32_t *)value;
   6.211 +                unmap_domain_page(l1va);
   6.212  
   6.213 -                return &l1va[l1_table_offset_32(va)];
   6.214 +                return 1;
   6.215              }
   6.216  
   6.217          case 8:
   6.218              return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L1);
   6.219          default:
   6.220              BUG();
   6.221 -            return NULL;
   6.222 +            return 0;
   6.223      }
   6.224 -    return NULL;
   6.225 +    return 0;
   6.226  }
   6.227  
   6.228  #define __guest_set_l1e(v, va, value) \
   6.229 -  ( __typeof__(value) )__guest_set_l1e(v, (u64)va, value, sizeof(*value))
   6.230 +     __guest_set_l1e(v, (u64)va, value, sizeof(*value))
   6.231  
   6.232 -static inline void *  __guest_get_l1e(
   6.233 +static inline int  __guest_get_l1e(
   6.234    struct vcpu *v, u64 va, void *gl1e, int size)
   6.235  {
   6.236      switch(size) {
   6.237 @@ -306,34 +336,33 @@ static inline void *  __guest_get_l1e(
   6.238                  unsigned long l1mfn;
   6.239  
   6.240                  if (!(__guest_get_l2e(v, va, &gl2e)))
   6.241 -                    return NULL;
   6.242 +                    return 0;
   6.243  
   6.244  
   6.245                  if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT)))
   6.246 -                    return NULL;
   6.247 +                    return 0;
   6.248  
   6.249  
   6.250                  l1mfn = get_mfn_from_pfn(
   6.251                    l2e_get_pfn(gl2e));
   6.252 -                l1va = (l1_pgentry_32_t *) phys_to_virt(
   6.253 -                  l1mfn << L1_PAGETABLE_SHIFT);
   6.254 +                l1va = (l1_pgentry_32_t *) map_domain_page(l1mfn);
   6.255                  if (gl1e)
   6.256                      *(l1_pgentry_32_t *)gl1e = l1va[l1_table_offset_32(va)];
   6.257 -
   6.258 -                return &l1va[l1_table_offset_32(va)];
   6.259 +                unmap_domain_page(l1va);
   6.260 +                return 1;
   6.261              }
   6.262          case 8:
   6.263              // 64-bit guest
   6.264              return __rw_entry(v, va, gl1e, GUEST_ENTRY | GET_ENTRY | PAGING_L1);
   6.265          default:
   6.266              BUG();
   6.267 -            return NULL;
   6.268 +            return 0;
   6.269      }
   6.270 -    return NULL;
   6.271 +    return 0;
   6.272  }
   6.273  
   6.274  #define __guest_get_l1e(v, va, gl1e) \
   6.275 -  ( __typeof__(gl1e) )__guest_get_l1e(v, (u64)va, gl1e, sizeof(*gl1e))
   6.276 +    __guest_get_l1e(v, (u64)va, gl1e, sizeof(*gl1e))
   6.277  
   6.278  static inline void entry_general(
   6.279    struct domain *d,
   6.280 @@ -365,10 +394,16 @@ static inline void entry_general(
   6.281                  unmap_domain_page(l1_p);
   6.282              }
   6.283          } else {
   6.284 -            sle = entry_from_pfn(
   6.285 -                smfn,
   6.286 -                (entry_get_flags(gle) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL);
   6.287 -            entry_add_flags(gle, _PAGE_ACCESSED);
   6.288 +            if (d->arch.ops->guest_paging_levels <= PAGING_L3
   6.289 +                    && level == PAGING_L3) {
   6.290 +                sle = entry_from_pfn(smfn, entry_get_flags(gle));
   6.291 +            } else {
   6.292 +
   6.293 +                sle = entry_from_pfn(
   6.294 +                  smfn,
   6.295 +                  (entry_get_flags(gle) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL);
   6.296 +                entry_add_flags(gle, _PAGE_ACCESSED);
   6.297 +            }
   6.298          }
   6.299          // XXX mafetter: Hmm...
   6.300          //     Shouldn't the dirty log be checked/updated here?
   6.301 @@ -392,7 +427,7 @@ static inline void entry_propagate_from_
   6.302  
   6.303      if ( entry_get_flags(gle) & _PAGE_PRESENT ) {
   6.304          if ((entry_get_flags(gle) & _PAGE_PSE) && level == PAGING_L2) {
   6.305 -            smfn =  __shadow_status(d, entry_get_value(gle) >> PAGE_SHIFT, PGT_fl1_shadow);
   6.306 +            smfn =  __shadow_status(d, entry_get_pfn(gle), PGT_fl1_shadow);
   6.307          } else {
   6.308              smfn =  __shadow_status(d, entry_get_pfn(gle), 
   6.309                shadow_level_to_type((level -1 )));