ia64/xen-unstable

changeset 5722:f261f14b9781

Now we have extended the patch to support x86_64 domU as well. The
shadow mode survived with the domU running lmbench, ltp, kernbench, etc.
by running a script that enables shadow LOGDIRTY mode, CLEAN, and
disables at 5-second intervals in an infinite loop. Thanks Ian for
providing the Python script. Tested x86 domU and VMX domains as well.
Big ones are all new, and are used for 64-bit only. Please apply.

We also verified that shadow_64.c and shadow_public.c could be built for
x86 and that they worked fine there. We can provide a small patch that
does it (once the code is in ;-).

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Chengyuan Li <chengyuan.li@intel.com>
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Mon Jul 11 09:14:11 2005 +0000 (2005-07-11)
parents c32cf5fef34c
children d332d4df452e
files xen/arch/x86/Makefile xen/arch/x86/audit.c xen/arch/x86/domain.c xen/arch/x86/shadow.c xen/arch/x86/traps.c xen/arch/x86/vmx.c xen/arch/x86/vmx_platform.c xen/arch/x86/vmx_vmcs.c xen/include/asm-x86/domain.h xen/include/asm-x86/mm.h xen/include/asm-x86/page.h xen/include/asm-x86/shadow.h
line diff
     1.1 --- a/xen/arch/x86/Makefile	Mon Jul 11 09:03:24 2005 +0000
     1.2 +++ b/xen/arch/x86/Makefile	Mon Jul 11 09:14:11 2005 +0000
     1.3 @@ -13,6 +13,11 @@ OBJS := $(subst cpu/centaur.o,,$(OBJS))
     1.4  OBJS := $(subst cpu/cyrix.o,,$(OBJS))
     1.5  OBJS := $(subst cpu/rise.o,,$(OBJS))
     1.6  OBJS := $(subst cpu/transmeta.o,,$(OBJS))
     1.7 +OBJS := $(subst shadow32.o,,$(OBJS))
     1.8 +else
     1.9 +OBJS := $(subst shadow.o,,$(OBJS))
    1.10 +OBJS := $(subst shadow_public.o,,$(OBJS))
    1.11 +OBJS := $(subst shadow_xxx.o,,$(OBJS))
    1.12  endif
    1.13  
    1.14  OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS))
     2.1 --- a/xen/arch/x86/audit.c	Mon Jul 11 09:03:24 2005 +0000
     2.2 +++ b/xen/arch/x86/audit.c	Mon Jul 11 09:14:11 2005 +0000
     2.3 @@ -846,6 +846,8 @@ void _audit_domain(struct domain *d, int
     2.4          {
     2.5          case PGT_l1_page_table:
     2.6          case PGT_l2_page_table:
     2.7 +        case PGT_l3_page_table:
     2.8 +        case PGT_l4_page_table:
     2.9              if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
    2.10              {
    2.11                  printk("Audit %d: type count!=0 t=%x ot=%x c=%x mfn=%lx\n",
     3.1 --- a/xen/arch/x86/domain.c	Mon Jul 11 09:03:24 2005 +0000
     3.2 +++ b/xen/arch/x86/domain.c	Mon Jul 11 09:14:11 2005 +0000
     3.3 @@ -404,7 +404,7 @@ int arch_set_info_guest(
     3.4          set_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
     3.5  
     3.6      v->arch.flags &= ~TF_kernel_mode;
     3.7 -    if ( c->flags & VGCF_IN_KERNEL )
     3.8 +    if ( (c->flags & VGCF_IN_KERNEL) || (c->flags & VGCF_VMX_GUEST) )
     3.9          v->arch.flags |= TF_kernel_mode;
    3.10  
    3.11      memcpy(&v->arch.guest_context, c, sizeof(*c));
     4.1 --- a/xen/arch/x86/shadow.c	Mon Jul 11 09:03:24 2005 +0000
     4.2 +++ b/xen/arch/x86/shadow.c	Mon Jul 11 09:14:11 2005 +0000
     4.3 @@ -1,5 +1,5 @@
     4.4  /******************************************************************************
     4.5 - * arch/x86/shadow.c
     4.6 + * arch/x86/shadow_64.c
     4.7   * 
     4.8   * Copyright (c) 2005 Michael A Fetterman
     4.9   * Based on an earlier implementation by Ian Pratt et al
    4.10 @@ -18,7 +18,12 @@
    4.11   * along with this program; if not, write to the Free Software
    4.12   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    4.13   */
    4.14 -
    4.15 +/*
    4.16 + * Jun Nakajima <jun.nakajima@intel.com>
    4.17 + * Chengyuan Li <chengyuan.li@intel.com>
    4.18 + *
    4.19 + * Extended to support 64-bit guests.
    4.20 + */
    4.21  
    4.22  #include <xen/config.h>
    4.23  #include <xen/types.h>
    4.24 @@ -30,17 +35,20 @@
    4.25  #include <xen/sched.h>
    4.26  #include <xen/trace.h>
    4.27  
    4.28 -#define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
    4.29 -
    4.30 -static void shadow_free_snapshot(struct domain *d,
    4.31 -                                 struct out_of_sync_entry *entry);
    4.32 -static void remove_out_of_sync_entries(struct domain *d, unsigned long smfn);
    4.33 -static void free_writable_pte_predictions(struct domain *d);
    4.34 +extern void free_shadow_pages(struct domain *d);
    4.35  
    4.36  #if SHADOW_DEBUG
    4.37  static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn);
    4.38  #endif
    4.39  
    4.40 +#if CONFIG_PAGING_LEVELS >= 4
    4.41 +#include <asm/shadow_64.h>
    4.42 +static unsigned long shadow_l4_table(
    4.43 +    struct domain *d, unsigned long gpfn, unsigned long gmfn);
    4.44 +static void shadow_map_into_current(struct vcpu *v,
    4.45 +    unsigned long va, unsigned int from, unsigned int to);
    4.46 +#endif
    4.47 +
    4.48  /********
    4.49  
    4.50  There's a per-domain shadow table spin lock which works fine for SMP
    4.51 @@ -136,24 +144,6 @@ shadow_promote(struct domain *d, unsigne
    4.52      return okay;
    4.53  }
    4.54  
    4.55 -static inline void
    4.56 -shadow_demote(struct domain *d, unsigned long gpfn, unsigned long gmfn)
    4.57 -{
    4.58 -    if ( !shadow_mode_refcounts(d) )
    4.59 -        return;
    4.60 -
    4.61 -    ASSERT(frame_table[gmfn].count_info & PGC_page_table);
    4.62 -
    4.63 -    if ( shadow_max_pgtable_type(d, gpfn, NULL) == PGT_none )
    4.64 -    {
    4.65 -        clear_bit(_PGC_page_table, &frame_table[gmfn].count_info);
    4.66 -
    4.67 -        if ( page_out_of_sync(pfn_to_page(gmfn)) )
    4.68 -        {
    4.69 -            remove_out_of_sync_entries(d, gmfn);
    4.70 -        }
    4.71 -    }
    4.72 -}
    4.73  
    4.74  /*
    4.75   * Things in shadow mode that collect get_page() refs to the domain's
    4.76 @@ -227,9 +217,13 @@ alloc_shadow_page(struct domain *d,
    4.77              unmap_domain_page(l1);
    4.78          }
    4.79      }
    4.80 -    else
    4.81 +    else {
    4.82          page = alloc_domheap_page(NULL);
    4.83 -
    4.84 +        void *lp = map_domain_page(page_to_pfn(page));
    4.85 +        memset(lp, 0, PAGE_SIZE);
    4.86 +        unmap_domain_page(lp);
    4.87 +
    4.88 +    }
    4.89      if ( unlikely(page == NULL) )
    4.90      {
    4.91          printk("Couldn't alloc shadow page! dom%d count=%d\n",
    4.92 @@ -268,6 +262,29 @@ alloc_shadow_page(struct domain *d,
    4.93  
    4.94          break;
    4.95  
    4.96 +    case PGT_l3_shadow:
    4.97 +        if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
    4.98 +            goto fail;
    4.99 +        perfc_incr(shadow_l3_pages);
   4.100 +        d->arch.shadow_page_count++;
   4.101 +        break;
   4.102 +
   4.103 +    case PGT_l4_shadow:
   4.104 +        if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
   4.105 +            goto fail;
   4.106 +        perfc_incr(shadow_l4_pages);
   4.107 +        d->arch.shadow_page_count++;
   4.108 +        if ( PGT_l4_page_table == PGT_root_page_table )
   4.109 +            pin = 1;
   4.110 +        break;
   4.111 +
   4.112 +#if CONFIG_PAGING_LEVELS >= 4
   4.113 +    case PGT_fl1_shadow:
   4.114 +        perfc_incr(shadow_l1_pages);
   4.115 +        d->arch.shadow_page_count++;
   4.116 +        break;
   4.117 +#else
   4.118 +
   4.119      case PGT_hl2_shadow:
   4.120          // Treat an hl2 as an L1 for purposes of promotion.
   4.121          // For external mode domains, treat them as an L2 for purposes of
   4.122 @@ -282,7 +299,7 @@ alloc_shadow_page(struct domain *d,
   4.123              pin = 1;
   4.124  
   4.125          break;
   4.126 -
   4.127 +#endif
   4.128      case PGT_snapshot:
   4.129          perfc_incr(snapshot_pages);
   4.130          d->arch.snapshot_page_count++;
   4.131 @@ -312,1127 +329,7 @@ alloc_shadow_page(struct domain *d,
   4.132      return 0;
   4.133  }
   4.134  
   4.135 -static void inline
   4.136 -free_shadow_l1_table(struct domain *d, unsigned long smfn)
   4.137 -{
   4.138 -    l1_pgentry_t *pl1e = map_domain_page(smfn);
   4.139 -    int i;
   4.140 -    struct pfn_info *spage = pfn_to_page(smfn);
   4.141 -    u32 min_max = spage->tlbflush_timestamp;
   4.142 -    int min = SHADOW_MIN(min_max);
   4.143 -    int max = SHADOW_MAX(min_max);
   4.144 -
   4.145 -    for ( i = min; i <= max; i++ )
   4.146 -    {
   4.147 -        shadow_put_page_from_l1e(pl1e[i], d);
   4.148 -        pl1e[i] = l1e_empty();
   4.149 -    }
   4.150 -
   4.151 -    unmap_domain_page(pl1e);
   4.152 -}
   4.153 -
   4.154 -static void inline
   4.155 -free_shadow_hl2_table(struct domain *d, unsigned long smfn)
   4.156 -{
   4.157 -    l1_pgentry_t *hl2 = map_domain_page(smfn);
   4.158 -    int i, limit;
   4.159 -
   4.160 -    SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
   4.161 -
   4.162 -#ifdef __i386__
   4.163 -    if ( shadow_mode_external(d) )
   4.164 -        limit = L2_PAGETABLE_ENTRIES;
   4.165 -    else
   4.166 -        limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
   4.167 -#else
   4.168 -    limit = 0; /* XXX x86/64 XXX */
   4.169 -#endif
   4.170 -
   4.171 -    for ( i = 0; i < limit; i++ )
   4.172 -    {
   4.173 -        if ( l1e_get_flags(hl2[i]) & _PAGE_PRESENT )
   4.174 -            put_page(pfn_to_page(l1e_get_pfn(hl2[i])));
   4.175 -    }
   4.176 -
   4.177 -    unmap_domain_page(hl2);
   4.178 -}
   4.179 -
   4.180 -static void inline
   4.181 -free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
   4.182 -{
   4.183 -    l2_pgentry_t *pl2e = map_domain_page(smfn);
   4.184 -    int i, external = shadow_mode_external(d);
   4.185 -
   4.186 -    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   4.187 -        if ( external || is_guest_l2_slot(type, i) )
   4.188 -            if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
   4.189 -                put_shadow_ref(l2e_get_pfn(pl2e[i]));
   4.190 -
   4.191 -    if ( (PGT_base_page_table == PGT_l2_page_table) &&
   4.192 -         shadow_mode_translate(d) && !external )
   4.193 -    {
   4.194 -        // free the ref to the hl2
   4.195 -        //
   4.196 -        put_shadow_ref(l2e_get_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]));
   4.197 -    }
   4.198 -
   4.199 -    unmap_domain_page(pl2e);
   4.200 -}
   4.201 -
   4.202 -void free_shadow_page(unsigned long smfn)
   4.203 -{
   4.204 -    struct pfn_info *page = &frame_table[smfn];
   4.205 -    unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
   4.206 -    struct domain *d = page_get_owner(pfn_to_page(gmfn));
   4.207 -    unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
   4.208 -    unsigned long type = page->u.inuse.type_info & PGT_type_mask;
   4.209 -
   4.210 -    SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
   4.211 -
   4.212 -    ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
   4.213 -
   4.214 -    delete_shadow_status(d, gpfn, gmfn, type);
   4.215 -
   4.216 -    switch ( type )
   4.217 -    {
   4.218 -    case PGT_l1_shadow:
   4.219 -        perfc_decr(shadow_l1_pages);
   4.220 -        shadow_demote(d, gpfn, gmfn);
   4.221 -        free_shadow_l1_table(d, smfn);
   4.222 -        break;
   4.223 -
   4.224 -    case PGT_l2_shadow:
   4.225 -        perfc_decr(shadow_l2_pages);
   4.226 -        shadow_demote(d, gpfn, gmfn);
   4.227 -        free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
   4.228 -        break;
   4.229 -
   4.230 -    case PGT_hl2_shadow:
   4.231 -        perfc_decr(hl2_table_pages);
   4.232 -        shadow_demote(d, gpfn, gmfn);
   4.233 -        free_shadow_hl2_table(d, smfn);
   4.234 -        break;
   4.235 -
   4.236 -    case PGT_snapshot:
   4.237 -        perfc_decr(snapshot_pages);
   4.238 -        break;
   4.239 -
   4.240 -    default:
   4.241 -        printk("Free shadow weird page type mfn=%lx type=%08x\n",
   4.242 -               page_to_pfn(page), page->u.inuse.type_info);
   4.243 -        break;
   4.244 -    }
   4.245 -
   4.246 -    d->arch.shadow_page_count--;
   4.247 -
   4.248 -    // No TLB flushes are needed the next time this page gets allocated.
   4.249 -    //
   4.250 -    page->tlbflush_timestamp = 0;
   4.251 -    page->u.free.cpumask     = CPU_MASK_NONE;
   4.252 -
   4.253 -    if ( type == PGT_l1_shadow )
   4.254 -    {
   4.255 -        list_add(&page->list, &d->arch.free_shadow_frames);
   4.256 -        perfc_incr(free_l1_pages);
   4.257 -    }
   4.258 -    else
   4.259 -        free_domheap_page(page);
   4.260 -}
   4.261 -
   4.262 -void
   4.263 -remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
   4.264 -{
   4.265 -    unsigned long smfn;
   4.266 -
   4.267 -    //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
   4.268 -
   4.269 -    shadow_lock(d);
   4.270 -
   4.271 -    while ( stype >= PGT_l1_shadow )
   4.272 -    {
   4.273 -        smfn = __shadow_status(d, gpfn, stype);
   4.274 -        if ( smfn && MFN_PINNED(smfn) )
   4.275 -            shadow_unpin(smfn);
   4.276 -        stype -= PGT_l1_shadow;
   4.277 -    }
   4.278 -
   4.279 -    shadow_unlock(d);
   4.280 -}
   4.281 -
   4.282 -static void inline
   4.283 -release_out_of_sync_entry(struct domain *d, struct out_of_sync_entry *entry)
   4.284 -{
   4.285 -    struct pfn_info *page;
   4.286 -
   4.287 -    page = &frame_table[entry->gmfn];
   4.288 -        
   4.289 -    // Decrement ref count of guest & shadow pages
   4.290 -    //
   4.291 -    put_page(page);
   4.292 -
   4.293 -    // Only use entries that have low bits clear...
   4.294 -    //
   4.295 -    if ( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
   4.296 -    {
   4.297 -        put_shadow_ref(entry->writable_pl1e >> PAGE_SHIFT);
   4.298 -        entry->writable_pl1e = -2;
   4.299 -    }
   4.300 -    else
   4.301 -        ASSERT( entry->writable_pl1e == -1 );
   4.302 -
   4.303 -    // Free the snapshot
   4.304 -    //
   4.305 -    shadow_free_snapshot(d, entry);
   4.306 -}
   4.307 -
   4.308 -static void remove_out_of_sync_entries(struct domain *d, unsigned long gmfn)
   4.309 -{
   4.310 -    struct out_of_sync_entry *entry = d->arch.out_of_sync;
   4.311 -    struct out_of_sync_entry **prev = &d->arch.out_of_sync;
   4.312 -    struct out_of_sync_entry *found = NULL;
   4.313 -
   4.314 -    // NB: Be careful not to call something that manipulates this list
   4.315 -    //     while walking it.  Collect the results into a separate list
   4.316 -    //     first, then walk that list.
   4.317 -    //
   4.318 -    while ( entry )
   4.319 -    {
   4.320 -        if ( entry->gmfn == gmfn )
   4.321 -        {
   4.322 -            // remove from out of sync list
   4.323 -            *prev = entry->next;
   4.324 -
   4.325 -            // add to found list
   4.326 -            entry->next = found;
   4.327 -            found = entry;
   4.328 -
   4.329 -            entry = *prev;
   4.330 -            continue;
   4.331 -        }
   4.332 -        prev = &entry->next;
   4.333 -        entry = entry->next;
   4.334 -    }
   4.335 -
   4.336 -    prev = NULL;
   4.337 -    entry = found;
   4.338 -    while ( entry )
   4.339 -    {
   4.340 -        release_out_of_sync_entry(d, entry);
   4.341 -
   4.342 -        prev = &entry->next;
   4.343 -        entry = entry->next;
   4.344 -    }
   4.345 -
   4.346 -    // Add found list to free list
   4.347 -    if ( prev )
   4.348 -    {
   4.349 -        *prev = d->arch.out_of_sync_free;
   4.350 -        d->arch.out_of_sync_free = found;
   4.351 -    }
   4.352 -}
   4.353 -
   4.354 -static void free_out_of_sync_state(struct domain *d)
   4.355 -{
   4.356 -    struct out_of_sync_entry *entry;
   4.357 -
   4.358 -    // NB: Be careful not to call something that manipulates this list
   4.359 -    //     while walking it.  Remove one item at a time, and always
   4.360 -    //     restart from start of list.
   4.361 -    //
   4.362 -    while ( (entry = d->arch.out_of_sync) )
   4.363 -    {
   4.364 -        d->arch.out_of_sync = entry->next;
   4.365 -        release_out_of_sync_entry(d, entry);
   4.366 -
   4.367 -        entry->next = d->arch.out_of_sync_free;
   4.368 -        d->arch.out_of_sync_free = entry;
   4.369 -    }
   4.370 -}
   4.371 -
   4.372 -static void free_shadow_pages(struct domain *d)
   4.373 -{
   4.374 -    int                   i;
   4.375 -    struct shadow_status *x;
   4.376 -    struct vcpu          *v;
   4.377 - 
   4.378 -    /*
   4.379 -     * WARNING! The shadow page table must not currently be in use!
   4.380 -     * e.g., You are expected to have paused the domain and synchronized CR3.
   4.381 -     */
   4.382 -
   4.383 -    if( !d->arch.shadow_ht ) return;
   4.384 -
   4.385 -    shadow_audit(d, 1);
   4.386 -
   4.387 -    // first, remove any outstanding refs from out_of_sync entries...
   4.388 -    //
   4.389 -    free_out_of_sync_state(d);
   4.390 -
   4.391 -    // second, remove any outstanding refs from v->arch.shadow_table
   4.392 -    // and CR3.
   4.393 -    //
   4.394 -    for_each_vcpu(d, v)
   4.395 -    {
   4.396 -        if ( pagetable_get_paddr(v->arch.shadow_table) )
   4.397 -        {
   4.398 -            put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
   4.399 -            v->arch.shadow_table = mk_pagetable(0);
   4.400 -        }
   4.401 -
   4.402 -        if ( v->arch.monitor_shadow_ref )
   4.403 -        {
   4.404 -            put_shadow_ref(v->arch.monitor_shadow_ref);
   4.405 -            v->arch.monitor_shadow_ref = 0;
   4.406 -        }
   4.407 -    }
   4.408 -
   4.409 -    // For external shadows, remove the monitor table's refs
   4.410 -    //
   4.411 -    if ( shadow_mode_external(d) )
   4.412 -    {
   4.413 -        for_each_vcpu(d, v)
   4.414 -        {
   4.415 -            l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
   4.416 -
   4.417 -            if ( mpl2e )
   4.418 -            {
   4.419 -                l2_pgentry_t hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
   4.420 -                l2_pgentry_t smfn = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
   4.421 -
   4.422 -                if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
   4.423 -                {
   4.424 -                    put_shadow_ref(l2e_get_pfn(hl2e));
   4.425 -                    mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
   4.426 -                }
   4.427 -                if ( l2e_get_flags(smfn) & _PAGE_PRESENT )
   4.428 -                {
   4.429 -                    put_shadow_ref(l2e_get_pfn(smfn));
   4.430 -                    mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
   4.431 -                }
   4.432 -            }
   4.433 -        }
   4.434 -    }
   4.435 -
   4.436 -    // Now, the only refs to shadow pages that are left are from the shadow
   4.437 -    // pages themselves.  We just unpin the pinned pages, and the rest
   4.438 -    // should automatically disappear.
   4.439 -    //
   4.440 -    // NB: Beware: each explicitly or implicit call to free_shadow_page
   4.441 -    // can/will result in the hash bucket getting rewritten out from
   4.442 -    // under us...  First, collect the list of pinned pages, then
   4.443 -    // free them.
   4.444 -    //
   4.445 -    for ( i = 0; i < shadow_ht_buckets; i++ )
   4.446 -    {
   4.447 -        u32 count;
   4.448 -        unsigned long *mfn_list;
   4.449 -
   4.450 -        /* Skip empty buckets. */
   4.451 -        if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
   4.452 -            continue;
   4.453 -
   4.454 -        count = 0;
   4.455 -        for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
   4.456 -            if ( MFN_PINNED(x->smfn) )
   4.457 -                count++;
   4.458 -        if ( !count )
   4.459 -            continue;
   4.460 -
   4.461 -        mfn_list = xmalloc_array(unsigned long, count);
   4.462 -        count = 0;
   4.463 -        for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
   4.464 -            if ( MFN_PINNED(x->smfn) )
   4.465 -                mfn_list[count++] = x->smfn;
   4.466 -
   4.467 -        while ( count )
   4.468 -        {
   4.469 -            shadow_unpin(mfn_list[--count]);
   4.470 -        }
   4.471 -        xfree(mfn_list);
   4.472 -    }
   4.473 -
   4.474 -    // Now free the pre-zero'ed pages from the domain
   4.475 -    //
   4.476 -    struct list_head *list_ent, *tmp;
   4.477 -    list_for_each_safe(list_ent, tmp, &d->arch.free_shadow_frames)
   4.478 -    {
   4.479 -        list_del(list_ent);
   4.480 -        perfc_decr(free_l1_pages);
   4.481 -
   4.482 -        struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
   4.483 -        free_domheap_page(page);
   4.484 -    }
   4.485 -
   4.486 -    shadow_audit(d, 0);
   4.487 -
   4.488 -    SH_LOG("Free shadow table.");
   4.489 -}
   4.490 -
   4.491 -void shadow_mode_init(void)
   4.492 -{
   4.493 -}
   4.494 -
   4.495 -int _shadow_mode_refcounts(struct domain *d)
   4.496 -{
   4.497 -    return shadow_mode_refcounts(d);
   4.498 -}
   4.499 -
   4.500 -static void alloc_monitor_pagetable(struct vcpu *v)
   4.501 -{
   4.502 -    unsigned long mmfn;
   4.503 -    l2_pgentry_t *mpl2e;
   4.504 -    struct pfn_info *mmfn_info;
   4.505 -    struct domain *d = v->domain;
   4.506 -
   4.507 -    ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
   4.508 -
   4.509 -    mmfn_info = alloc_domheap_page(NULL);
   4.510 -    ASSERT(mmfn_info != NULL);
   4.511 -
   4.512 -    mmfn = page_to_pfn(mmfn_info);
   4.513 -    mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
   4.514 -    memset(mpl2e, 0, PAGE_SIZE);
   4.515 -
   4.516 -#ifdef __i386__ /* XXX screws x86/64 build */
   4.517 -    memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
   4.518 -           &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
   4.519 -           HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
   4.520 -#endif
   4.521 -
   4.522 -    mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
   4.523 -        l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
   4.524 -                        __PAGE_HYPERVISOR);
   4.525 -
   4.526 -    // map the phys_to_machine map into the Read-Only MPT space for this domain
   4.527 -    mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
   4.528 -        l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
   4.529 -                        __PAGE_HYPERVISOR);
   4.530 -
   4.531 -    // Don't (yet) have mappings for these...
   4.532 -    // Don't want to accidentally see the idle_pg_table's linear mapping.
   4.533 -    //
   4.534 -    mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
   4.535 -    mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
   4.536 -
   4.537 -    v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
   4.538 -    v->arch.monitor_vtable = mpl2e;
   4.539 -}
   4.540 -
   4.541 -/*
   4.542 - * Free the pages for monitor_table and hl2_table
   4.543 - */
   4.544 -void free_monitor_pagetable(struct vcpu *v)
   4.545 -{
   4.546 -    l2_pgentry_t *mpl2e, hl2e, sl2e;
   4.547 -    unsigned long mfn;
   4.548 -
   4.549 -    ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
   4.550 -    
   4.551 -    mpl2e = v->arch.monitor_vtable;
   4.552 -
   4.553 -    /*
   4.554 -     * First get the mfn for hl2_table by looking at monitor_table
   4.555 -     */
   4.556 -    hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
   4.557 -    if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
   4.558 -    {
   4.559 -        mfn = l2e_get_pfn(hl2e);
   4.560 -        ASSERT(mfn);
   4.561 -        put_shadow_ref(mfn);
   4.562 -    }
   4.563 -
   4.564 -    sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
   4.565 -    if ( l2e_get_flags(sl2e) & _PAGE_PRESENT )
   4.566 -    {
   4.567 -        mfn = l2e_get_pfn(sl2e);
   4.568 -        ASSERT(mfn);
   4.569 -        put_shadow_ref(mfn);
   4.570 -    }
   4.571 -
   4.572 -    unmap_domain_page(mpl2e);
   4.573 -
   4.574 -    /*
   4.575 -     * Then free monitor_table.
   4.576 -     */
   4.577 -    mfn = pagetable_get_pfn(v->arch.monitor_table);
   4.578 -    free_domheap_page(&frame_table[mfn]);
   4.579 -
   4.580 -    v->arch.monitor_table = mk_pagetable(0);
   4.581 -    v->arch.monitor_vtable = 0;
   4.582 -}
   4.583 -
   4.584 -int
   4.585 -set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn,
   4.586 -              struct domain_mmap_cache *l2cache,
   4.587 -              struct domain_mmap_cache *l1cache)
   4.588 -{
   4.589 -    unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
   4.590 -    l2_pgentry_t *l2, l2e;
   4.591 -    l1_pgentry_t *l1;
   4.592 -    struct pfn_info *l1page;
   4.593 -    unsigned long va = pfn << PAGE_SHIFT;
   4.594 -
   4.595 -    ASSERT(tabpfn != 0);
   4.596 -
   4.597 -    l2 = map_domain_page_with_cache(tabpfn, l2cache);
   4.598 -    l2e = l2[l2_table_offset(va)];
   4.599 -    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
   4.600 -    {
   4.601 -        l1page = alloc_domheap_page(NULL);
   4.602 -        if ( !l1page )
   4.603 -        {
   4.604 -            unmap_domain_page_with_cache(l2, l2cache);
   4.605 -            return 0;
   4.606 -        }
   4.607 -
   4.608 -        l1 = map_domain_page_with_cache(page_to_pfn(l1page), l1cache);
   4.609 -        memset(l1, 0, PAGE_SIZE);
   4.610 -        unmap_domain_page_with_cache(l1, l1cache);
   4.611 -
   4.612 -        l2e = l2e_from_page(l1page, __PAGE_HYPERVISOR);
   4.613 -        l2[l2_table_offset(va)] = l2e;
   4.614 -    }
   4.615 -    unmap_domain_page_with_cache(l2, l2cache);
   4.616 -
   4.617 -    l1 = map_domain_page_with_cache(l2e_get_pfn(l2e), l1cache);
   4.618 -    l1[l1_table_offset(va)] = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
   4.619 -    unmap_domain_page_with_cache(l1, l1cache);
   4.620 -
   4.621 -    return 1;
   4.622 -}
   4.623 -
   4.624 -static int
   4.625 -alloc_p2m_table(struct domain *d)
   4.626 -{
   4.627 -    struct list_head *list_ent;
   4.628 -    struct pfn_info *page, *l2page;
   4.629 -    l2_pgentry_t *l2;
   4.630 -    unsigned long mfn, pfn;
   4.631 -    struct domain_mmap_cache l1cache, l2cache;
   4.632 -
   4.633 -    l2page = alloc_domheap_page(NULL);
   4.634 -    if ( l2page == NULL )
   4.635 -        return 0;
   4.636 -
   4.637 -    domain_mmap_cache_init(&l1cache);
   4.638 -    domain_mmap_cache_init(&l2cache);
   4.639 -
   4.640 -    d->arch.phys_table = mk_pagetable(page_to_phys(l2page));
   4.641 -    l2 = map_domain_page_with_cache(page_to_pfn(l2page), &l2cache);
   4.642 -    memset(l2, 0, PAGE_SIZE);
   4.643 -    unmap_domain_page_with_cache(l2, &l2cache);
   4.644 -
   4.645 -    list_ent = d->page_list.next;
   4.646 -    while ( list_ent != &d->page_list )
   4.647 -    {
   4.648 -        page = list_entry(list_ent, struct pfn_info, list);
   4.649 -        mfn = page_to_pfn(page);
   4.650 -        pfn = machine_to_phys_mapping[mfn];
   4.651 -        ASSERT(pfn != INVALID_M2P_ENTRY);
   4.652 -        ASSERT(pfn < (1u<<20));
   4.653 -
   4.654 -        set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
   4.655 -
   4.656 -        list_ent = page->list.next;
   4.657 -    }
   4.658 -
   4.659 -    list_ent = d->xenpage_list.next;
   4.660 -    while ( list_ent != &d->xenpage_list )
   4.661 -    {
   4.662 -        page = list_entry(list_ent, struct pfn_info, list);
   4.663 -        mfn = page_to_pfn(page);
   4.664 -        pfn = machine_to_phys_mapping[mfn];
   4.665 -        if ( (pfn != INVALID_M2P_ENTRY) &&
   4.666 -             (pfn < (1u<<20)) )
   4.667 -        {
   4.668 -            set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
   4.669 -        }
   4.670 -
   4.671 -        list_ent = page->list.next;
   4.672 -    }
   4.673 -
   4.674 -    domain_mmap_cache_destroy(&l2cache);
   4.675 -    domain_mmap_cache_destroy(&l1cache);
   4.676 -
   4.677 -    return 1;
   4.678 -}
   4.679 -
   4.680 -static void
   4.681 -free_p2m_table(struct domain *d)
   4.682 -{
   4.683 -    // uh, this needs some work...  :)
   4.684 -    BUG();
   4.685 -}
   4.686 -
   4.687 -int __shadow_mode_enable(struct domain *d, unsigned int mode)
   4.688 -{
   4.689 -    struct vcpu *v;
   4.690 -    int new_modes = (mode & ~d->arch.shadow_mode);
   4.691 -
   4.692 -    // Gotta be adding something to call this function.
   4.693 -    ASSERT(new_modes);
   4.694 -
   4.695 -    // can't take anything away by calling this function.
   4.696 -    ASSERT(!(d->arch.shadow_mode & ~mode));
   4.697 -
   4.698 -    for_each_vcpu(d, v)
   4.699 -    {
   4.700 -        invalidate_shadow_ldt(v);
   4.701 -
   4.702 -        // We need to set these up for __update_pagetables().
   4.703 -        // See the comment there.
   4.704 -
   4.705 -        /*
   4.706 -         * arch.guest_vtable
   4.707 -         */
   4.708 -        if ( v->arch.guest_vtable &&
   4.709 -             (v->arch.guest_vtable != __linear_l2_table) )
   4.710 -        {
   4.711 -            unmap_domain_page(v->arch.guest_vtable);
   4.712 -        }
   4.713 -        if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
   4.714 -            v->arch.guest_vtable = __linear_l2_table;
   4.715 -        else
   4.716 -            v->arch.guest_vtable = NULL;
   4.717 -
   4.718 -        /*
   4.719 -         * arch.shadow_vtable
   4.720 -         */
   4.721 -        if ( v->arch.shadow_vtable &&
   4.722 -             (v->arch.shadow_vtable != __shadow_linear_l2_table) )
   4.723 -        {
   4.724 -            unmap_domain_page(v->arch.shadow_vtable);
   4.725 -        }
   4.726 -        if ( !(mode & SHM_external) )
   4.727 -            v->arch.shadow_vtable = __shadow_linear_l2_table;
   4.728 -        else
   4.729 -            v->arch.shadow_vtable = NULL;
   4.730 -
   4.731 -        /*
   4.732 -         * arch.hl2_vtable
   4.733 -         */
   4.734 -        if ( v->arch.hl2_vtable &&
   4.735 -             (v->arch.hl2_vtable != __linear_hl2_table) )
   4.736 -        {
   4.737 -            unmap_domain_page(v->arch.hl2_vtable);
   4.738 -        }
   4.739 -        if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
   4.740 -            v->arch.hl2_vtable = __linear_hl2_table;
   4.741 -        else
   4.742 -            v->arch.hl2_vtable = NULL;
   4.743 -
   4.744 -        /*
   4.745 -         * arch.monitor_table & arch.monitor_vtable
   4.746 -         */
   4.747 -        if ( v->arch.monitor_vtable )
   4.748 -        {
   4.749 -            free_monitor_pagetable(v);
   4.750 -        }
   4.751 -        if ( mode & SHM_external )
   4.752 -        {
   4.753 -            alloc_monitor_pagetable(v);
   4.754 -        }
   4.755 -    }
   4.756 -
   4.757 -    if ( new_modes & SHM_enable )
   4.758 -    {
   4.759 -        ASSERT( !d->arch.shadow_ht );
   4.760 -        d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
   4.761 -        if ( d->arch.shadow_ht == NULL )
   4.762 -            goto nomem;
   4.763 -
   4.764 -        memset(d->arch.shadow_ht, 0,
   4.765 -           shadow_ht_buckets * sizeof(struct shadow_status));
   4.766 -    }
   4.767 -
   4.768 -    if ( new_modes & SHM_log_dirty )
   4.769 -    {
   4.770 -        ASSERT( !d->arch.shadow_dirty_bitmap );
   4.771 -        d->arch.shadow_dirty_bitmap_size = (d->max_pages + 63) & ~63;
   4.772 -        d->arch.shadow_dirty_bitmap = 
   4.773 -            xmalloc_array(unsigned long, d->arch.shadow_dirty_bitmap_size /
   4.774 -                                         (8 * sizeof(unsigned long)));
   4.775 -        if ( d->arch.shadow_dirty_bitmap == NULL )
   4.776 -        {
   4.777 -            d->arch.shadow_dirty_bitmap_size = 0;
   4.778 -            goto nomem;
   4.779 -        }
   4.780 -        memset(d->arch.shadow_dirty_bitmap, 0, 
   4.781 -               d->arch.shadow_dirty_bitmap_size/8);
   4.782 -    }
   4.783 -
   4.784 -    if ( new_modes & SHM_translate )
   4.785 -    {
   4.786 -        if ( !(new_modes & SHM_external) )
   4.787 -        {
   4.788 -            ASSERT( !pagetable_get_paddr(d->arch.phys_table) );
   4.789 -            if ( !alloc_p2m_table(d) )
   4.790 -            {
   4.791 -                printk("alloc_p2m_table failed (out-of-memory?)\n");
   4.792 -                goto nomem;
   4.793 -            }
   4.794 -        }
   4.795 -        else
   4.796 -        {
   4.797 -            // external guests provide their own memory for their P2M maps.
   4.798 -            //
   4.799 -            ASSERT( d == page_get_owner(
   4.800 -                        &frame_table[pagetable_get_pfn(d->arch.phys_table)]) );
   4.801 -        }
   4.802 -    }
   4.803 -
   4.804 -    printk("audit1\n");
   4.805 -    _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
   4.806 -    printk("audit1 done\n");
   4.807 -
   4.808 -    // Get rid of any shadow pages from any previous shadow mode.
   4.809 -    //
   4.810 -    free_shadow_pages(d);
   4.811 -
   4.812 -    printk("audit2\n");
   4.813 -    _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
   4.814 -    printk("audit2 done\n");
   4.815 -
   4.816 -    /*
   4.817 -     * Tear down it's counts by disassembling its page-table-based ref counts.
   4.818 -     * Also remove CR3's gcount/tcount.
   4.819 -     * That leaves things like GDTs and LDTs and external refs in tact.
   4.820 -     *
   4.821 -     * Most pages will be writable tcount=0.
   4.822 -     * Some will still be L1 tcount=0 or L2 tcount=0.
   4.823 -     * Maybe some pages will be type none tcount=0.
   4.824 -     * Pages granted external writable refs (via grant tables?) will
   4.825 -     * still have a non-zero tcount.  That's OK.
   4.826 -     *
   4.827 -     * gcounts will generally be 1 for PGC_allocated.
   4.828 -     * GDTs and LDTs will have additional gcounts.
   4.829 -     * Any grant-table based refs will still be in the gcount.
   4.830 -     *
   4.831 -     * We attempt to grab writable refs to each page (thus setting its type).
   4.832 -     * Immediately put back those type refs.
   4.833 -     *
   4.834 -     * Assert that no pages are left with L1/L2/L3/L4 type.
   4.835 -     */
   4.836 -    audit_adjust_pgtables(d, -1, 1);
   4.837 -
   4.838 -    d->arch.shadow_mode = mode;
   4.839 -
   4.840 -    if ( shadow_mode_refcounts(d) )
   4.841 -    {
   4.842 -        struct list_head *list_ent = d->page_list.next;
   4.843 -        while ( list_ent != &d->page_list )
   4.844 -        {
   4.845 -            struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
   4.846 -            if ( !get_page_type(page, PGT_writable_page) )
   4.847 -                BUG();
   4.848 -            put_page_type(page);
   4.849 -
   4.850 -            list_ent = page->list.next;
   4.851 -        }
   4.852 -    }
   4.853 -
   4.854 -    audit_adjust_pgtables(d, 1, 1);
   4.855 -
   4.856 -    printk("audit3\n");
   4.857 -    _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
   4.858 -    printk("audit3 done\n");
   4.859 -
   4.860 -    return 0;
   4.861 -
   4.862 - nomem:
   4.863 -    if ( (new_modes & SHM_enable) )
   4.864 -    {
   4.865 -        xfree(d->arch.shadow_ht);
   4.866 -        d->arch.shadow_ht = NULL;
   4.867 -    }
   4.868 -    if ( (new_modes & SHM_log_dirty) )
   4.869 -    {
   4.870 -        xfree(d->arch.shadow_dirty_bitmap);
   4.871 -        d->arch.shadow_dirty_bitmap = NULL;
   4.872 -    }
   4.873 -    if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
   4.874 -         pagetable_get_paddr(d->arch.phys_table) )
   4.875 -    {
   4.876 -        free_p2m_table(d);
   4.877 -    }
   4.878 -    return -ENOMEM;
   4.879 -}
   4.880 -
   4.881 -int shadow_mode_enable(struct domain *d, unsigned int mode)
   4.882 -{
   4.883 -    int rc;
   4.884 -    shadow_lock(d);
   4.885 -    rc = __shadow_mode_enable(d, mode);
   4.886 -    shadow_unlock(d);
   4.887 -    return rc;
   4.888 -}
   4.889 -
   4.890 -static void
   4.891 -translate_l1pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l1mfn)
   4.892 -{
   4.893 -    int i;
   4.894 -    l1_pgentry_t *l1;
   4.895 -
   4.896 -    l1 = map_domain_page(l1mfn);
   4.897 -    for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
   4.898 -    {
   4.899 -        if ( is_guest_l1_slot(i) &&
   4.900 -             (l1e_get_flags(l1[i]) & _PAGE_PRESENT) )
   4.901 -        {
   4.902 -            unsigned long mfn = l1e_get_pfn(l1[i]);
   4.903 -            unsigned long gpfn = __mfn_to_gpfn(d, mfn);
   4.904 -            ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
   4.905 -            l1[i] = l1e_from_pfn(gpfn, l1e_get_flags(l1[i]));
   4.906 -        }
   4.907 -    }
   4.908 -    unmap_domain_page(l1);
   4.909 -}
   4.910 -
   4.911 -// This is not general enough to handle arbitrary pagetables
   4.912 -// with shared L1 pages, etc., but it is sufficient for bringing
   4.913 -// up dom0.
   4.914 -//
   4.915 -void
   4.916 -translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
   4.917 -                    unsigned int type)
   4.918 -{
   4.919 -    int i;
   4.920 -    l2_pgentry_t *l2;
   4.921 -
   4.922 -    ASSERT(shadow_mode_translate(d) && !shadow_mode_external(d));
   4.923 -
   4.924 -    l2 = map_domain_page(l2mfn);
   4.925 -    for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
   4.926 -    {
   4.927 -        if ( is_guest_l2_slot(type, i) &&
   4.928 -             (l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
   4.929 -        {
   4.930 -            unsigned long mfn = l2e_get_pfn(l2[i]);
   4.931 -            unsigned long gpfn = __mfn_to_gpfn(d, mfn);
   4.932 -            ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
   4.933 -            l2[i] = l2e_from_pfn(gpfn, l2e_get_flags(l2[i]));
   4.934 -            translate_l1pgtable(d, p2m, mfn);
   4.935 -        }
   4.936 -    }
   4.937 -    unmap_domain_page(l2);
   4.938 -}
   4.939 -
   4.940 -static void free_shadow_ht_entries(struct domain *d)
   4.941 -{
   4.942 -    struct shadow_status *x, *n;
   4.943 -
   4.944 -    SH_VLOG("freed tables count=%d l1=%d l2=%d",
   4.945 -            d->arch.shadow_page_count, perfc_value(shadow_l1_pages), 
   4.946 -            perfc_value(shadow_l2_pages));
   4.947 -
   4.948 -    n = d->arch.shadow_ht_extras;
   4.949 -    while ( (x = n) != NULL )
   4.950 -    {
   4.951 -        d->arch.shadow_extras_count--;
   4.952 -        n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
   4.953 -        xfree(x);
   4.954 -    }
   4.955 -
   4.956 -    d->arch.shadow_ht_extras = NULL;
   4.957 -    d->arch.shadow_ht_free = NULL;
   4.958 -
   4.959 -    ASSERT(d->arch.shadow_extras_count == 0);
   4.960 -    SH_LOG("freed extras, now %d", d->arch.shadow_extras_count);
   4.961 -
   4.962 -    if ( d->arch.shadow_dirty_bitmap != NULL )
   4.963 -    {
   4.964 -        xfree(d->arch.shadow_dirty_bitmap);
   4.965 -        d->arch.shadow_dirty_bitmap = 0;
   4.966 -        d->arch.shadow_dirty_bitmap_size = 0;
   4.967 -    }
   4.968 -
   4.969 -    xfree(d->arch.shadow_ht);
   4.970 -    d->arch.shadow_ht = NULL;
   4.971 -}
   4.972 -
   4.973 -static void free_out_of_sync_entries(struct domain *d)
   4.974 -{
   4.975 -    struct out_of_sync_entry *x, *n;
   4.976 -
   4.977 -    n = d->arch.out_of_sync_extras;
   4.978 -    while ( (x = n) != NULL )
   4.979 -    {
   4.980 -        d->arch.out_of_sync_extras_count--;
   4.981 -        n = *((struct out_of_sync_entry **)(&x[out_of_sync_extra_size]));
   4.982 -        xfree(x);
   4.983 -    }
   4.984 -
   4.985 -    d->arch.out_of_sync_extras = NULL;
   4.986 -    d->arch.out_of_sync_free = NULL;
   4.987 -    d->arch.out_of_sync = NULL;
   4.988 -
   4.989 -    ASSERT(d->arch.out_of_sync_extras_count == 0);
   4.990 -    FSH_LOG("freed extra out_of_sync entries, now %d",
   4.991 -            d->arch.out_of_sync_extras_count);
   4.992 -}
   4.993 -
   4.994 -void __shadow_mode_disable(struct domain *d)
   4.995 -{
   4.996 -    if ( unlikely(!shadow_mode_enabled(d)) )
   4.997 -        return;
   4.998 -
   4.999 -    /*
  4.1000 -     * Currently this does not fix up page ref counts, so it is valid to call
  4.1001 -     * only when a domain is being destroyed.
  4.1002 -     */
  4.1003 -    BUG_ON(!test_bit(_DOMF_dying, &d->domain_flags) &&
  4.1004 -           shadow_mode_refcounts(d));
  4.1005 -    d->arch.shadow_tainted_refcnts = shadow_mode_refcounts(d);
  4.1006 -
  4.1007 -    free_shadow_pages(d);
  4.1008 -    free_writable_pte_predictions(d);
  4.1009 -
  4.1010 -#ifndef NDEBUG
  4.1011 -    int i;
  4.1012 -    for ( i = 0; i < shadow_ht_buckets; i++ )
  4.1013 -    {
  4.1014 -        if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
  4.1015 -        {
  4.1016 -            printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
  4.1017 -                   __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
  4.1018 -            BUG();
  4.1019 -        }
  4.1020 -    }
  4.1021 -#endif
  4.1022 -
  4.1023 -    d->arch.shadow_mode = 0;
  4.1024 -
  4.1025 -    free_shadow_ht_entries(d);
  4.1026 -    free_out_of_sync_entries(d);
  4.1027 -
  4.1028 -    struct vcpu *v;
  4.1029 -    for_each_vcpu(d, v)
  4.1030 -    {
  4.1031 -        update_pagetables(v);
  4.1032 -    }
  4.1033 -}
  4.1034 -
  4.1035 -static int shadow_mode_table_op(
  4.1036 -    struct domain *d, dom0_shadow_control_t *sc)
  4.1037 -{
  4.1038 -    unsigned int      op = sc->op;
  4.1039 -    int               i, rc = 0;
  4.1040 -    struct vcpu *v;
  4.1041 -
  4.1042 -    ASSERT(shadow_lock_is_acquired(d));
  4.1043 -
  4.1044 -    SH_VLOG("shadow mode table op %lx %lx count %d",
  4.1045 -            (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.guest_table),  /* XXX SMP */
  4.1046 -            (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.shadow_table), /* XXX SMP */
  4.1047 -            d->arch.shadow_page_count);
  4.1048 -
  4.1049 -    shadow_audit(d, 1);
  4.1050 -
  4.1051 -    switch ( op )
  4.1052 -    {
  4.1053 -    case DOM0_SHADOW_CONTROL_OP_FLUSH:
  4.1054 -        free_shadow_pages(d);
  4.1055 -
  4.1056 -        d->arch.shadow_fault_count       = 0;
  4.1057 -        d->arch.shadow_dirty_count       = 0;
  4.1058 -        d->arch.shadow_dirty_net_count   = 0;
  4.1059 -        d->arch.shadow_dirty_block_count = 0;
  4.1060 -
  4.1061 -        break;
  4.1062 -   
  4.1063 -    case DOM0_SHADOW_CONTROL_OP_CLEAN:
  4.1064 -        free_shadow_pages(d);
  4.1065 -
  4.1066 -        sc->stats.fault_count       = d->arch.shadow_fault_count;
  4.1067 -        sc->stats.dirty_count       = d->arch.shadow_dirty_count;
  4.1068 -        sc->stats.dirty_net_count   = d->arch.shadow_dirty_net_count;
  4.1069 -        sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
  4.1070 -
  4.1071 -        d->arch.shadow_fault_count       = 0;
  4.1072 -        d->arch.shadow_dirty_count       = 0;
  4.1073 -        d->arch.shadow_dirty_net_count   = 0;
  4.1074 -        d->arch.shadow_dirty_block_count = 0;
  4.1075 - 
  4.1076 -        if ( (d->max_pages > sc->pages) || 
  4.1077 -             (sc->dirty_bitmap == NULL) || 
  4.1078 -             (d->arch.shadow_dirty_bitmap == NULL) )
  4.1079 -        {
  4.1080 -            rc = -EINVAL;
  4.1081 -            break;
  4.1082 -        }
  4.1083 - 
  4.1084 -        sc->pages = d->max_pages;
  4.1085 -
  4.1086 -#define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
  4.1087 -        for ( i = 0; i < d->max_pages; i += chunk )
  4.1088 -        {
  4.1089 -            int bytes = ((((d->max_pages - i) > chunk) ?
  4.1090 -                          chunk : (d->max_pages - i)) + 7) / 8;
  4.1091 -     
  4.1092 -            if (copy_to_user(
  4.1093 -                    sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
  4.1094 -                    d->arch.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
  4.1095 -                    bytes))
  4.1096 -            {
  4.1097 -                // copy_to_user can fail when copying to guest app memory.
  4.1098 -                // app should zero buffer after mallocing, and pin it
  4.1099 -                rc = -EINVAL;
  4.1100 -                memset(
  4.1101 -                    d->arch.shadow_dirty_bitmap + 
  4.1102 -                    (i/(8*sizeof(unsigned long))),
  4.1103 -                    0, (d->max_pages/8) - (i/(8*sizeof(unsigned long))));
  4.1104 -                break;
  4.1105 -            }
  4.1106 -
  4.1107 -            memset(
  4.1108 -                d->arch.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
  4.1109 -                0, bytes);
  4.1110 -        }
  4.1111 -
  4.1112 -        break;
  4.1113 -
  4.1114 -    case DOM0_SHADOW_CONTROL_OP_PEEK:
  4.1115 -        sc->stats.fault_count       = d->arch.shadow_fault_count;
  4.1116 -        sc->stats.dirty_count       = d->arch.shadow_dirty_count;
  4.1117 -        sc->stats.dirty_net_count   = d->arch.shadow_dirty_net_count;
  4.1118 -        sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
  4.1119 - 
  4.1120 -        if ( (d->max_pages > sc->pages) || 
  4.1121 -             (sc->dirty_bitmap == NULL) || 
  4.1122 -             (d->arch.shadow_dirty_bitmap == NULL) )
  4.1123 -        {
  4.1124 -            rc = -EINVAL;
  4.1125 -            break;
  4.1126 -        }
  4.1127 - 
  4.1128 -        sc->pages = d->max_pages;
  4.1129 -        if (copy_to_user(
  4.1130 -            sc->dirty_bitmap, d->arch.shadow_dirty_bitmap, (d->max_pages+7)/8))
  4.1131 -        {
  4.1132 -            rc = -EINVAL;
  4.1133 -            break;
  4.1134 -        }
  4.1135 -
  4.1136 -        break;
  4.1137 -
  4.1138 -    default:
  4.1139 -        rc = -EINVAL;
  4.1140 -        break;
  4.1141 -    }
  4.1142 -
  4.1143 -    SH_VLOG("shadow mode table op : page count %d", d->arch.shadow_page_count);
  4.1144 -    shadow_audit(d, 1);
  4.1145 -
  4.1146 -    for_each_vcpu(d,v)
  4.1147 -        __update_pagetables(v);
  4.1148 -
  4.1149 -    return rc;
  4.1150 -}
  4.1151 -
  4.1152 -int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
  4.1153 -{
  4.1154 -    unsigned int op = sc->op;
  4.1155 -    int          rc = 0;
  4.1156 -    struct vcpu *v;
  4.1157 -
  4.1158 -    if ( unlikely(d == current->domain) )
  4.1159 -    {
  4.1160 -        DPRINTK("Don't try to do a shadow op on yourself!\n");
  4.1161 -        return -EINVAL;
  4.1162 -    }   
  4.1163 -
  4.1164 -    domain_pause(d);
  4.1165 -
  4.1166 -    shadow_lock(d);
  4.1167 -
  4.1168 -    switch ( op )
  4.1169 -    {
  4.1170 -    case DOM0_SHADOW_CONTROL_OP_OFF:
  4.1171 -        __shadow_sync_all(d);
  4.1172 -        __shadow_mode_disable(d);
  4.1173 -        break;
  4.1174 -
  4.1175 -    case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
  4.1176 -        free_shadow_pages(d);
  4.1177 -        rc = __shadow_mode_enable(d, SHM_enable);
  4.1178 -        break;
  4.1179 -
  4.1180 -    case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
  4.1181 -        free_shadow_pages(d);
  4.1182 -        rc = __shadow_mode_enable(
  4.1183 -            d, d->arch.shadow_mode|SHM_enable|SHM_log_dirty);
  4.1184 -        break;
  4.1185 -
  4.1186 -    case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
  4.1187 -        free_shadow_pages(d);
  4.1188 -        rc = __shadow_mode_enable(
  4.1189 -            d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate);
  4.1190 -        break;
  4.1191 -
  4.1192 -    default:
  4.1193 -        rc = shadow_mode_enabled(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
  4.1194 -        break;
  4.1195 -    }
  4.1196 -
  4.1197 -    shadow_unlock(d);
  4.1198 -
  4.1199 -    for_each_vcpu(d,v)
  4.1200 -        update_pagetables(v);
  4.1201 -
  4.1202 -    domain_unpause(d);
  4.1203 -
  4.1204 -    return rc;
  4.1205 -}
  4.1206 -
  4.1207 -/*
  4.1208 - * XXX KAF: Why is this VMX specific?
  4.1209 - */
  4.1210 -void vmx_shadow_clear_state(struct domain *d)
  4.1211 -{
  4.1212 -    SH_VVLOG("%s:", __func__);
  4.1213 -    shadow_lock(d);
  4.1214 -    free_shadow_pages(d);
  4.1215 -    shadow_unlock(d);
  4.1216 -    update_pagetables(d->vcpu[0]);
  4.1217 -}
  4.1218 -
  4.1219 -unsigned long
  4.1220 -gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
  4.1221 -{
  4.1222 -    ASSERT( shadow_mode_translate(d) );
  4.1223 -
  4.1224 -    perfc_incrc(gpfn_to_mfn_foreign);
  4.1225 -
  4.1226 -    unsigned long va = gpfn << PAGE_SHIFT;
  4.1227 -    unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
  4.1228 -    l2_pgentry_t *l2 = map_domain_page(tabpfn);
  4.1229 -    l2_pgentry_t l2e = l2[l2_table_offset(va)];
  4.1230 -    unmap_domain_page(l2);
  4.1231 -    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
  4.1232 -    {
  4.1233 -        printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
  4.1234 -               d->domain_id, gpfn, l2e_get_intpte(l2e));
  4.1235 -        return INVALID_MFN;
  4.1236 -    }
  4.1237 -    l1_pgentry_t *l1 = map_domain_page(l2e_get_pfn(l2e));
  4.1238 -    l1_pgentry_t l1e = l1[l1_table_offset(va)];
  4.1239 -    unmap_domain_page(l1);
  4.1240 -
  4.1241 -#if 0
  4.1242 -    printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx, l1e=%lx\n",
  4.1243 -           d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, tabpfn, l2e, l1tab, l1e);
  4.1244 -#endif
  4.1245 -
  4.1246 -    if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
  4.1247 -    {
  4.1248 -        printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n",
  4.1249 -               d->domain_id, gpfn, l1e_get_intpte(l1e));
  4.1250 -        return INVALID_MFN;
  4.1251 -    }
  4.1252 -
  4.1253 -    return l1e_get_pfn(l1e);
  4.1254 -}
  4.1255 -
  4.1256 +#if CONFIG_PAGING_LEVELS == 2
  4.1257  static unsigned long
  4.1258  shadow_hl2_table(struct domain *d, unsigned long gpfn, unsigned long gmfn,
  4.1259                  unsigned long smfn)
  4.1260 @@ -1569,8 +466,9 @@ static unsigned long shadow_l2_table(
  4.1261      SH_VLOG("shadow_l2_table(%lx -> %lx)", gmfn, smfn);
  4.1262      return smfn;
  4.1263  }
  4.1264 -
  4.1265 -void shadow_map_l1_into_current_l2(unsigned long va)
  4.1266 +#endif
  4.1267 +
  4.1268 +static void shadow_map_l1_into_current_l2(unsigned long va)
  4.1269  { 
  4.1270      struct vcpu *v = current;
  4.1271      struct domain *d = v->domain;
  4.1272 @@ -1622,20 +520,34 @@ void shadow_map_l1_into_current_l2(unsig
  4.1273      if ( !get_shadow_ref(sl1mfn) )
  4.1274          BUG();
  4.1275      l2pde_general(d, &gl2e, &sl2e, sl1mfn);
  4.1276 -    __guest_set_l2e(v, va, gl2e);
  4.1277 -    __shadow_set_l2e(v, va, sl2e);
  4.1278 +    __guest_set_l2e(v, va, &gl2e);
  4.1279 +    __shadow_set_l2e(v, va, &sl2e);
  4.1280  
  4.1281      if ( init_table )
  4.1282      {
  4.1283          l1_pgentry_t sl1e;
  4.1284          int index = l1_table_offset(va);
  4.1285          int min = 1, max = 0;
  4.1286 -
  4.1287 +        
  4.1288 +        unsigned long entries, pt_va;
  4.1289 +        l1_pgentry_t tmp_sl1e;
  4.1290 +        l1_pgentry_t tmp_gl1e;//Prepare for double compile
  4.1291 +
  4.1292 +
  4.1293 +        entries = PAGE_SIZE / sizeof(l1_pgentry_t);
  4.1294 +        pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT;
  4.1295 +        gpl1e = (l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gl1e);
  4.1296 +
  4.1297 +        entries = PAGE_SIZE / sizeof(l1_pgentry_t);
  4.1298 +        pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT;
  4.1299 +        spl1e = (l1_pgentry_t *) __shadow_get_l1e(v, pt_va, &tmp_sl1e);
  4.1300 +
  4.1301 +        /*
  4.1302          gpl1e = &(linear_pg_table[l1_linear_offset(va) &
  4.1303                                ~(L1_PAGETABLE_ENTRIES-1)]);
  4.1304  
  4.1305          spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
  4.1306 -                                     ~(L1_PAGETABLE_ENTRIES-1)]);
  4.1307 +                                     ~(L1_PAGETABLE_ENTRIES-1)]);*/
  4.1308  
  4.1309          for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
  4.1310          {
  4.1311 @@ -1666,7 +578,74 @@ void shadow_map_l1_into_current_l2(unsig
  4.1312      }
  4.1313  }
  4.1314  
  4.1315 -void shadow_invlpg(struct vcpu *v, unsigned long va)
  4.1316 +static void 
  4.1317 +shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow)
  4.1318 +{
  4.1319 +    struct vcpu *v = current;
  4.1320 +    struct domain *d = v->domain;
  4.1321 +    l2_pgentry_t sl2e;
  4.1322 +
  4.1323 +    __shadow_get_l2e(v, va, &sl2e);
  4.1324 +    if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
  4.1325 +    {
  4.1326 +        /*
  4.1327 +         * Either the L1 is not shadowed, or the shadow isn't linked into
  4.1328 +         * the current shadow L2.
  4.1329 +         */
  4.1330 +        if ( create_l1_shadow )
  4.1331 +        {
  4.1332 +            perfc_incrc(shadow_set_l1e_force_map);
  4.1333 +            shadow_map_l1_into_current_l2(va);
  4.1334 +        }
  4.1335 +        else /* check to see if it exists; if so, link it in */
  4.1336 +        {
  4.1337 +            l2_pgentry_t gpde = linear_l2_table(v)[l2_table_offset(va)];
  4.1338 +            unsigned long gl1pfn = l2e_get_pfn(gpde);
  4.1339 +            unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
  4.1340 +
  4.1341 +            ASSERT( l2e_get_flags(gpde) & _PAGE_PRESENT );
  4.1342 +
  4.1343 +            if ( sl1mfn )
  4.1344 +            {
  4.1345 +                perfc_incrc(shadow_set_l1e_unlinked);
  4.1346 +                if ( !get_shadow_ref(sl1mfn) )
  4.1347 +                    BUG();
  4.1348 +                l2pde_general(d, &gpde, &sl2e, sl1mfn);
  4.1349 +                __guest_set_l2e(v, va, &gpde);
  4.1350 +                __shadow_set_l2e(v, va, &sl2e);
  4.1351 +            }
  4.1352 +            else
  4.1353 +            {
  4.1354 +                // no shadow exists, so there's nothing to do.
  4.1355 +                perfc_incrc(shadow_set_l1e_fail);
  4.1356 +                return;
  4.1357 +            }
  4.1358 +        }
  4.1359 +    }
  4.1360 +
  4.1361 +    if ( shadow_mode_refcounts(d) )
  4.1362 +    {
  4.1363 +        l1_pgentry_t old_spte;
  4.1364 +        __shadow_get_l1e(v, va, &old_spte);
  4.1365 +
  4.1366 +        // only do the ref counting if something important changed.
  4.1367 +        //
  4.1368 +        if ( l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
  4.1369 +        {
  4.1370 +            if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
  4.1371 +                 !shadow_get_page_from_l1e(new_spte, d) )
  4.1372 +                new_spte = l1e_empty();
  4.1373 +            if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
  4.1374 +                shadow_put_page_from_l1e(old_spte, d);
  4.1375 +        }
  4.1376 +    }
  4.1377 +
  4.1378 +    __shadow_set_l1e(v, va, &new_spte);
  4.1379 +
  4.1380 +    shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
  4.1381 +}
  4.1382 +
  4.1383 +static void shadow_invlpg_32(struct vcpu *v, unsigned long va)
  4.1384  {
  4.1385      struct domain *d = v->domain;
  4.1386      l1_pgentry_t gpte, spte;
  4.1387 @@ -1682,8 +661,9 @@ void shadow_invlpg(struct vcpu *v, unsig
  4.1388      // It's not strictly necessary to update the shadow here,
  4.1389      // but it might save a fault later.
  4.1390      //
  4.1391 -    if (__copy_from_user(&gpte, &linear_pg_table[va >> PAGE_SHIFT],
  4.1392 -                         sizeof(gpte))) {
  4.1393 +    /*if (__copy_from_user(&gpte, &linear_pg_table[va >> PAGE_SHIFT],
  4.1394 +                         sizeof(gpte))) {*/
  4.1395 +    if (unlikely(!__guest_get_l1e(v, va, &gpte))) {
  4.1396          perfc_incrc(shadow_invlpg_faults);
  4.1397          return;
  4.1398      }
  4.1399 @@ -1693,7 +673,7 @@ void shadow_invlpg(struct vcpu *v, unsig
  4.1400      shadow_unlock(d);
  4.1401  }
  4.1402  
  4.1403 -struct out_of_sync_entry *
  4.1404 +static struct out_of_sync_entry *
  4.1405  shadow_alloc_oos_entry(struct domain *d)
  4.1406  {
  4.1407      struct out_of_sync_entry *f, *extra;
  4.1408 @@ -1784,30 +764,8 @@ shadow_make_snapshot(
  4.1409      return smfn;
  4.1410  }
  4.1411  
  4.1412 -static void
  4.1413 -shadow_free_snapshot(struct domain *d, struct out_of_sync_entry *entry)
  4.1414 -{
  4.1415 -    void *snapshot;
  4.1416 -
  4.1417 -    if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
  4.1418 -        return;
  4.1419 -
  4.1420 -    // Clear the out_of_sync bit.
  4.1421 -    //
  4.1422 -    clear_bit(_PGC_out_of_sync, &frame_table[entry->gmfn].count_info);
  4.1423 -
  4.1424 -    // XXX Need to think about how to protect the domain's
  4.1425 -    // information less expensively.
  4.1426 -    //
  4.1427 -    snapshot = map_domain_page(entry->snapshot_mfn);
  4.1428 -    memset(snapshot, 0, PAGE_SIZE);
  4.1429 -    unmap_domain_page(snapshot);
  4.1430 -
  4.1431 -    put_shadow_ref(entry->snapshot_mfn);
  4.1432 -}
  4.1433 -
  4.1434 -struct out_of_sync_entry *
  4.1435 -shadow_mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
  4.1436 +static struct out_of_sync_entry *
  4.1437 +mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
  4.1438                               unsigned long mfn)
  4.1439  {
  4.1440      struct domain *d = v->domain;
  4.1441 @@ -1862,13 +820,33 @@ shadow_mark_mfn_out_of_sync(struct vcpu 
  4.1442      return entry;
  4.1443  }
  4.1444  
  4.1445 -void shadow_mark_va_out_of_sync(
  4.1446 +static void shadow_mark_va_out_of_sync(
  4.1447      struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long va)
  4.1448  {
  4.1449      struct out_of_sync_entry *entry =
  4.1450          shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
  4.1451      l2_pgentry_t sl2e;
  4.1452  
  4.1453 +#if CONFIG_PAGING_LEVELS >= 4
  4.1454 +    {
  4.1455 +        l4_pgentry_t sl4e;
  4.1456 +        l3_pgentry_t sl3e;
  4.1457 +
  4.1458 +        __shadow_get_l4e(v, va, &sl4e);
  4.1459 +        if ( !(l4e_get_flags(sl4e) & _PAGE_PRESENT)) {
  4.1460 +            shadow_map_into_current(v, va, L3, L4);
  4.1461 +        }
  4.1462 +
  4.1463 +        if (!__shadow_get_l3e(v, va, &sl3e)) {
  4.1464 +            BUG();
  4.1465 +        }
  4.1466 +
  4.1467 +        if ( !(l3e_get_flags(sl3e) & _PAGE_PRESENT)) {
  4.1468 +            shadow_map_into_current(v, va, L2, L3);
  4.1469 +        }
  4.1470 +    }
  4.1471 +#endif
  4.1472 +
  4.1473      // We need the address of shadow PTE that maps @va.
  4.1474      // It might not exist yet.  Make sure it's there.
  4.1475      //
  4.1476 @@ -1940,26 +918,74 @@ static int snapshot_entry_matches(
  4.1477   * Returns 1 if va's shadow mapping is out-of-sync.
  4.1478   * Returns 0 otherwise.
  4.1479   */
  4.1480 -int __shadow_out_of_sync(struct vcpu *v, unsigned long va)
  4.1481 +static int is_out_of_sync(struct vcpu *v, unsigned long va) /* __shadow_out_of_sync */
  4.1482  {
  4.1483      struct domain *d = v->domain;
  4.1484 +#if defined (__x86_64__)
  4.1485 +    unsigned long l2mfn = ((v->arch.flags & TF_kernel_mode)? 
  4.1486 +                          pagetable_get_pfn(v->arch.guest_table) :
  4.1487 +                          pagetable_get_pfn(v->arch.guest_table_user));
  4.1488 +#else
  4.1489      unsigned long l2mfn = pagetable_get_pfn(v->arch.guest_table);
  4.1490 +#endif
  4.1491      unsigned long l2pfn = __mfn_to_gpfn(d, l2mfn);
  4.1492      l2_pgentry_t l2e;
  4.1493      unsigned long l1pfn, l1mfn;
  4.1494 +    l1_pgentry_t *guest_pt;
  4.1495 +    l1_pgentry_t tmp_gle;
  4.1496 +    unsigned long pt_va;
  4.1497  
  4.1498      ASSERT(shadow_lock_is_acquired(d));
  4.1499      ASSERT(VALID_M2P(l2pfn));
  4.1500  
  4.1501      perfc_incrc(shadow_out_of_sync_calls);
  4.1502  
  4.1503 +#if CONFIG_PAGING_LEVELS >= 4
  4.1504 +    if (d->arch.ops->guest_paging_levels == L4) { /* Mode F */
  4.1505 +        pgentry_64_t le;
  4.1506 +        unsigned long gmfn;
  4.1507 +        unsigned long gpfn;
  4.1508 +        int i;
  4.1509 +
  4.1510 +        gmfn = l2mfn;
  4.1511 +        gpfn = l2pfn;
  4.1512 +        guest_pt = (l1_pgentry_t *)v->arch.guest_vtable;
  4.1513 +
  4.1514 +        for (i = L4; i >= L3; i--) {
  4.1515 +            if ( page_out_of_sync(&frame_table[gmfn]) &&
  4.1516 +              !snapshot_entry_matches(
  4.1517 +                  d, guest_pt, gpfn, table_offset_64(va, i)) )
  4.1518 +                return 1;
  4.1519 +
  4.1520 +            __rw_entry(v, va, &le, GUEST_ENTRY | GET_ENTRY | i);
  4.1521 +            if ( !(entry_get_flags(le) & _PAGE_PRESENT) )
  4.1522 +                return 0;
  4.1523 +            gpfn = entry_get_pfn(le);
  4.1524 +            gmfn = __gpfn_to_mfn(d, gpfn);
  4.1525 +            if ( !VALID_MFN(gmfn) )
  4.1526 +                return 0;
  4.1527 +            /* Todo: check!*/
  4.1528 +            guest_pt = (l1_pgentry_t *)map_domain_page(gmfn);
  4.1529 +
  4.1530 +        }
  4.1531 +
  4.1532 +        /* L2 */
  4.1533 +        if ( page_out_of_sync(&frame_table[gmfn]) &&
  4.1534 +             !snapshot_entry_matches(d, guest_pt, gpfn, l2_table_offset(va)) )
  4.1535 +            return 1;
  4.1536 +
  4.1537 +
  4.1538 +    } else
  4.1539 +#endif
  4.1540 +
  4.1541      if ( page_out_of_sync(&frame_table[l2mfn]) &&
  4.1542           !snapshot_entry_matches(d, (l1_pgentry_t *)v->arch.guest_vtable,
  4.1543                                   l2pfn, l2_table_offset(va)) )
  4.1544          return 1;
  4.1545  
  4.1546      __guest_get_l2e(v, va, &l2e);
  4.1547 -    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
  4.1548 +    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || 
  4.1549 +         (l2e_get_flags(l2e) & _PAGE_PSE))
  4.1550          return 0;
  4.1551  
  4.1552      l1pfn = l2e_get_pfn(l2e);
  4.1553 @@ -1968,11 +994,14 @@ int __shadow_out_of_sync(struct vcpu *v,
  4.1554      // If the l1 pfn is invalid, it can't be out of sync...
  4.1555      if ( !VALID_MFN(l1mfn) )
  4.1556          return 0;
  4.1557 +    
  4.1558 +    pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(L1_PAGETABLE_ENTRIES - 1))
  4.1559 +      << L1_PAGETABLE_SHIFT;
  4.1560 +    guest_pt = (l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gle);
  4.1561  
  4.1562      if ( page_out_of_sync(&frame_table[l1mfn]) &&
  4.1563           !snapshot_entry_matches(
  4.1564 -             d, &linear_pg_table[l1_linear_offset(va) & ~(L1_PAGETABLE_ENTRIES-1)],
  4.1565 -             l1pfn, l1_table_offset(va)) )
  4.1566 +             d, guest_pt, l1pfn, l1_table_offset(va)) )
  4.1567          return 1;
  4.1568  
  4.1569      return 0;
  4.1570 @@ -2027,42 +1056,6 @@ decrease_writable_pte_prediction(struct 
  4.1571      }
  4.1572  }
  4.1573  
  4.1574 -static void
  4.1575 -free_writable_pte_predictions(struct domain *d)
  4.1576 -{
  4.1577 -    int i;
  4.1578 -    struct shadow_status *x;
  4.1579 -
  4.1580 -    for ( i = 0; i < shadow_ht_buckets; i++ )
  4.1581 -    {
  4.1582 -        u32 count;
  4.1583 -        unsigned long *gpfn_list;
  4.1584 -
  4.1585 -        /* Skip empty buckets. */
  4.1586 -        if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
  4.1587 -            continue;
  4.1588 -
  4.1589 -        count = 0;
  4.1590 -        for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
  4.1591 -            if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
  4.1592 -                count++;
  4.1593 -
  4.1594 -        gpfn_list = xmalloc_array(unsigned long, count);
  4.1595 -        count = 0;
  4.1596 -        for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
  4.1597 -            if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
  4.1598 -                gpfn_list[count++] = x->gpfn_and_flags & PGT_mfn_mask;
  4.1599 -
  4.1600 -        while ( count )
  4.1601 -        {
  4.1602 -            count--;
  4.1603 -            delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
  4.1604 -        }
  4.1605 -
  4.1606 -        xfree(gpfn_list);
  4.1607 -    }
  4.1608 -}
  4.1609 -
  4.1610  static u32 remove_all_write_access_in_ptpage(
  4.1611      struct domain *d, unsigned long pt_pfn, unsigned long pt_mfn,
  4.1612      unsigned long readonly_gpfn, unsigned long readonly_gmfn,
  4.1613 @@ -2124,7 +1117,7 @@ static u32 remove_all_write_access_in_pt
  4.1614  #undef MATCH_ENTRY
  4.1615  }
  4.1616  
  4.1617 -int shadow_remove_all_write_access(
  4.1618 +static int remove_all_write_access(
  4.1619      struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
  4.1620  {
  4.1621      int i;
  4.1622 @@ -2190,7 +1183,12 @@ int shadow_remove_all_write_access(
  4.1623          a = &d->arch.shadow_ht[i];
  4.1624          while ( a && a->gpfn_and_flags )
  4.1625          {
  4.1626 -            if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow )
  4.1627 +            if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow 
  4.1628 +#if CONFIG_PAGING_LEVELS >= 4
  4.1629 +              || (a->gpfn_and_flags & PGT_type_mask) == PGT_fl1_shadow
  4.1630 +#endif
  4.1631 +              )
  4.1632 +
  4.1633              {
  4.1634                  found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
  4.1635                  if ( found == write_refs )
  4.1636 @@ -2207,80 +1205,6 @@ int shadow_remove_all_write_access(
  4.1637      return 0;
  4.1638  }
  4.1639  
  4.1640 -static u32 remove_all_access_in_page(
  4.1641 -    struct domain *d, unsigned long l1mfn, unsigned long forbidden_gmfn)
  4.1642 -{
  4.1643 -    l1_pgentry_t *pl1e = map_domain_page(l1mfn);
  4.1644 -    l1_pgentry_t match;
  4.1645 -    unsigned long flags  = _PAGE_PRESENT;
  4.1646 -    int i;
  4.1647 -    u32 count = 0;
  4.1648 -    int is_l1_shadow =
  4.1649 -        ((frame_table[l1mfn].u.inuse.type_info & PGT_type_mask) ==
  4.1650 -         PGT_l1_shadow);
  4.1651 -
  4.1652 -    match = l1e_from_pfn(forbidden_gmfn, flags);
  4.1653 -    
  4.1654 -    for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
  4.1655 -    {
  4.1656 -        if ( unlikely(!l1e_has_changed(pl1e[i], match, flags) == 0) )
  4.1657 -        {
  4.1658 -            l1_pgentry_t ol2e = pl1e[i];
  4.1659 -            pl1e[i] = l1e_empty();
  4.1660 -            count++;
  4.1661 -
  4.1662 -            if ( is_l1_shadow )
  4.1663 -                shadow_put_page_from_l1e(ol2e, d);
  4.1664 -            else /* must be an hl2 page */
  4.1665 -                put_page(&frame_table[forbidden_gmfn]);
  4.1666 -        }
  4.1667 -    }
  4.1668 -
  4.1669 -    unmap_domain_page(pl1e);
  4.1670 -
  4.1671 -    return count;
  4.1672 -}
  4.1673 -
  4.1674 -u32 shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn)
  4.1675 -{
  4.1676 -    int i;
  4.1677 -    struct shadow_status *a;
  4.1678 -    u32 count = 0;
  4.1679 -
  4.1680 -    if ( unlikely(!shadow_mode_enabled(d)) )
  4.1681 -        return 0;
  4.1682 -
  4.1683 -    ASSERT(shadow_lock_is_acquired(d));
  4.1684 -    perfc_incrc(remove_all_access);
  4.1685 -
  4.1686 -    for (i = 0; i < shadow_ht_buckets; i++)
  4.1687 -    {
  4.1688 -        a = &d->arch.shadow_ht[i];
  4.1689 -        while ( a && a->gpfn_and_flags )
  4.1690 -        {
  4.1691 -            switch (a->gpfn_and_flags & PGT_type_mask)
  4.1692 -            {
  4.1693 -            case PGT_l1_shadow:
  4.1694 -            case PGT_l2_shadow:
  4.1695 -            case PGT_l3_shadow:
  4.1696 -            case PGT_l4_shadow:
  4.1697 -            case PGT_hl2_shadow:
  4.1698 -                count += remove_all_access_in_page(d, a->smfn, forbidden_gmfn);
  4.1699 -                break;
  4.1700 -            case PGT_snapshot:
  4.1701 -            case PGT_writable_pred:
  4.1702 -                // these can't hold refs to the forbidden page
  4.1703 -                break;
  4.1704 -            default:
  4.1705 -                BUG();
  4.1706 -            }
  4.1707 -
  4.1708 -            a = a->next;
  4.1709 -        }
  4.1710 -    }
  4.1711 -
  4.1712 -    return count;
  4.1713 -}    
  4.1714  
  4.1715  static int resync_all(struct domain *d, u32 stype)
  4.1716  {
  4.1717 @@ -2334,6 +1258,15 @@ static int resync_all(struct domain *d, 
  4.1718  
  4.1719          unshadow = 0;
  4.1720  
  4.1721 +        u32 min_max_shadow = pfn_to_page(smfn)->tlbflush_timestamp;
  4.1722 +        int min_shadow = SHADOW_MIN(min_max_shadow);
  4.1723 +        int max_shadow = SHADOW_MAX(min_max_shadow);
  4.1724 +
  4.1725 +        u32 min_max_snapshot =
  4.1726 +          pfn_to_page(entry->snapshot_mfn)->tlbflush_timestamp;
  4.1727 +        int min_snapshot = SHADOW_MIN(min_max_snapshot);
  4.1728 +        int max_snapshot = SHADOW_MAX(min_max_snapshot);
  4.1729 +
  4.1730          switch ( stype ) {
  4.1731          case PGT_l1_shadow:
  4.1732          {
  4.1733 @@ -2350,14 +1283,6 @@ static int resync_all(struct domain *d, 
  4.1734              if ( !smfn )
  4.1735                  break;
  4.1736  
  4.1737 -            u32 min_max_shadow = pfn_to_page(smfn)->tlbflush_timestamp;
  4.1738 -            int min_shadow = SHADOW_MIN(min_max_shadow);
  4.1739 -            int max_shadow = SHADOW_MAX(min_max_shadow);
  4.1740 -
  4.1741 -            u32 min_max_snapshot =
  4.1742 -                pfn_to_page(entry->snapshot_mfn)->tlbflush_timestamp;
  4.1743 -            int min_snapshot = SHADOW_MIN(min_max_snapshot);
  4.1744 -            int max_snapshot = SHADOW_MAX(min_max_snapshot);
  4.1745  
  4.1746              changed = 0;
  4.1747  
  4.1748 @@ -2381,6 +1306,7 @@ static int resync_all(struct domain *d, 
  4.1749              perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES);
  4.1750              break;
  4.1751          }
  4.1752 +#if defined (__i386__)
  4.1753          case PGT_l2_shadow:
  4.1754          {
  4.1755              int max = -1;
  4.1756 @@ -2464,6 +1390,73 @@ static int resync_all(struct domain *d, 
  4.1757              perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES);
  4.1758              break;
  4.1759          }
  4.1760 +#else
  4.1761 +        case PGT_l2_shadow:
  4.1762 +        case PGT_l3_shadow:
  4.1763 +        {
  4.1764 +            pgentry_64_t *guest_pt = guest;
  4.1765 +            pgentry_64_t *shadow_pt = shadow;
  4.1766 +            pgentry_64_t *snapshot_pt = snapshot;
  4.1767 +
  4.1768 +            changed = 0;
  4.1769 +            for ( i = min_shadow; i <= max_shadow; i++ )
  4.1770 +            {
  4.1771 +                if ( (i < min_snapshot) || (i > max_snapshot) ||
  4.1772 +                  entry_has_changed(
  4.1773 +                      guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) )
  4.1774 +                {
  4.1775 +                    need_flush |= validate_entry_change(
  4.1776 +                      d, &guest_pt[i], &shadow_pt[i],
  4.1777 +                      shadow_type_to_level(stype));
  4.1778 +                    changed++;
  4.1779 +                }
  4.1780 +            }
  4.1781 +            break;
  4.1782 +
  4.1783 +
  4.1784 +        }
  4.1785 +        case PGT_l4_shadow:
  4.1786 +        {
  4.1787 +            int max = -1;
  4.1788 +
  4.1789 +            l4_pgentry_t *guest4 = guest;
  4.1790 +            l4_pgentry_t *shadow4 = shadow;
  4.1791 +            l4_pgentry_t *snapshot4 = snapshot;
  4.1792 +
  4.1793 +            changed = 0;
  4.1794 +            for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
  4.1795 +            {
  4.1796 +                if ( !is_guest_l4_slot(i) && !external )
  4.1797 +                    continue;
  4.1798 +                l4_pgentry_t new_l4e = guest4[i];
  4.1799 +                if ( l4e_has_changed(new_l4e, snapshot4[i], PAGE_FLAG_MASK))
  4.1800 +                {
  4.1801 +                    need_flush |= validate_entry_change(
  4.1802 +                      d, (pgentry_64_t *)&new_l4e,
  4.1803 +                      (pgentry_64_t *)&shadow4[i], shadow_type_to_level(stype));
  4.1804 +
  4.1805 +                    changed++;
  4.1806 +                    ESH_LOG("%d: shadow4 mfn: %lx, shadow root: %lx\n", i,
  4.1807 +                      smfn, pagetable_get_paddr(current->arch.shadow_table));
  4.1808 +                }
  4.1809 +                if ( l4e_get_intpte(new_l4e) != 0 ) /* FIXME: check flags? */
  4.1810 +                    max = i;
  4.1811 +
  4.1812 +                //  Need a better solution in the long term.
  4.1813 +                if ( !(l4e_get_flags(new_l4e) & _PAGE_PRESENT) &&
  4.1814 +                  unlikely(l4e_get_intpte(new_l4e) != 0) &&
  4.1815 +                  !unshadow &&
  4.1816 +                  (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
  4.1817 +                    unshadow = 1;
  4.1818 +            }
  4.1819 +            if ( max == -1 )
  4.1820 +                unshadow = 1;
  4.1821 +            perfc_incrc(resync_l4);
  4.1822 +            perfc_incr_histo(shm_l4_updates, changed, PT_UPDATES);
  4.1823 +            break;
  4.1824 +        }
  4.1825 +
  4.1826 +#endif
  4.1827          default:
  4.1828              BUG();
  4.1829          }
  4.1830 @@ -2477,6 +1470,7 @@ static int resync_all(struct domain *d, 
  4.1831          {
  4.1832              perfc_incrc(unshadow_l2_count);
  4.1833              shadow_unpin(smfn);
  4.1834 +#if defined (__i386__)
  4.1835              if ( unlikely(shadow_mode_external(d)) )
  4.1836              {
  4.1837                  unsigned long hl2mfn;
  4.1838 @@ -2485,13 +1479,14 @@ static int resync_all(struct domain *d, 
  4.1839                       MFN_PINNED(hl2mfn) )
  4.1840                      shadow_unpin(hl2mfn);
  4.1841              }
  4.1842 +#endif
  4.1843          }
  4.1844      }
  4.1845  
  4.1846      return need_flush;
  4.1847  }
  4.1848  
  4.1849 -void __shadow_sync_all(struct domain *d)
  4.1850 +static void sync_all(struct domain *d)
  4.1851  {
  4.1852      struct out_of_sync_entry *entry;
  4.1853      int need_flush = 0;
  4.1854 @@ -2544,9 +1539,13 @@ void __shadow_sync_all(struct domain *d)
  4.1855      // Second, resync all L1 pages, then L2 pages, etc...
  4.1856      //
  4.1857      need_flush |= resync_all(d, PGT_l1_shadow);
  4.1858 +#if defined (__i386__)
  4.1859      if ( shadow_mode_translate(d) )
  4.1860          need_flush |= resync_all(d, PGT_hl2_shadow);
  4.1861 +#endif
  4.1862      need_flush |= resync_all(d, PGT_l2_shadow);
  4.1863 +    need_flush |= resync_all(d, PGT_l3_shadow);
  4.1864 +    need_flush |= resync_all(d, PGT_l4_shadow);
  4.1865  
  4.1866      if ( need_flush && !unlikely(shadow_mode_external(d)) )
  4.1867          local_flush_tlb();
  4.1868 @@ -2554,7 +1553,77 @@ void __shadow_sync_all(struct domain *d)
  4.1869      free_out_of_sync_state(d);
  4.1870  }
  4.1871  
  4.1872 -int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
  4.1873 +static inline int l1pte_write_fault(
  4.1874 +    struct vcpu *v, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p,
  4.1875 +    unsigned long va)
  4.1876 +{
  4.1877 +    struct domain *d = v->domain;
  4.1878 +    l1_pgentry_t gpte = *gpte_p;
  4.1879 +    l1_pgentry_t spte;
  4.1880 +    unsigned long gpfn = l1e_get_pfn(gpte);
  4.1881 +    unsigned long gmfn = __gpfn_to_mfn(d, gpfn);
  4.1882 +
  4.1883 +    //printk("l1pte_write_fault gmfn=%lx\n", gmfn);
  4.1884 +
  4.1885 +    if ( unlikely(!VALID_MFN(gmfn)) )
  4.1886 +    {
  4.1887 +        SH_LOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
  4.1888 +        *spte_p = l1e_empty();
  4.1889 +        return 0;
  4.1890 +    }
  4.1891 +
  4.1892 +    ASSERT(l1e_get_flags(gpte) & _PAGE_RW);
  4.1893 +    l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED);
  4.1894 +    spte = l1e_from_pfn(gmfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
  4.1895 +
  4.1896 +    SH_VVLOG("l1pte_write_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
  4.1897 +             l1e_get_intpte(spte), l1e_get_intpte(gpte));
  4.1898 +
  4.1899 +    if ( shadow_mode_log_dirty(d) )
  4.1900 +        __mark_dirty(d, gmfn);
  4.1901 +
  4.1902 +    if ( mfn_is_page_table(gmfn) )
  4.1903 +        shadow_mark_va_out_of_sync(v, gpfn, gmfn, va);
  4.1904 +
  4.1905 +    *gpte_p = gpte;
  4.1906 +    *spte_p = spte;
  4.1907 +
  4.1908 +    return 1;
  4.1909 +}
  4.1910 +
  4.1911 +static inline int l1pte_read_fault(
  4.1912 +    struct domain *d, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p)
  4.1913 +{ 
  4.1914 +    l1_pgentry_t gpte = *gpte_p;
  4.1915 +    l1_pgentry_t spte = *spte_p;
  4.1916 +    unsigned long pfn = l1e_get_pfn(gpte);
  4.1917 +    unsigned long mfn = __gpfn_to_mfn(d, pfn);
  4.1918 +
  4.1919 +    if ( unlikely(!VALID_MFN(mfn)) )
  4.1920 +    {
  4.1921 +        SH_LOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
  4.1922 +        *spte_p = l1e_empty();
  4.1923 +        return 0;
  4.1924 +    }
  4.1925 +
  4.1926 +    l1e_add_flags(gpte, _PAGE_ACCESSED);
  4.1927 +    spte = l1e_from_pfn(mfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
  4.1928 +
  4.1929 +    if ( shadow_mode_log_dirty(d) || !(l1e_get_flags(gpte) & _PAGE_DIRTY) ||
  4.1930 +         mfn_is_page_table(mfn) )
  4.1931 +    {
  4.1932 +        l1e_remove_flags(spte, _PAGE_RW);
  4.1933 +    }
  4.1934 +
  4.1935 +    SH_VVLOG("l1pte_read_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
  4.1936 +             l1e_get_intpte(spte), l1e_get_intpte(gpte));
  4.1937 +    *gpte_p = gpte;
  4.1938 +    *spte_p = spte;
  4.1939 +
  4.1940 +    return 1;
  4.1941 +}
  4.1942 +
  4.1943 +static int shadow_fault_32(unsigned long va, struct cpu_user_regs *regs)
  4.1944  {
  4.1945      l1_pgentry_t gpte, spte, orig_gpte;
  4.1946      struct vcpu *v = current;
  4.1947 @@ -2596,7 +1665,10 @@ int shadow_fault(unsigned long va, struc
  4.1948      // the mapping is in-sync, so the check of the PDE's present bit, above,
  4.1949      // covers this access.
  4.1950      //
  4.1951 -    orig_gpte = gpte = linear_pg_table[l1_linear_offset(va)];
  4.1952 +    //orig_gpte = gpte = linear_pg_table[l1_linear_offset(va)];
  4.1953 +    __guest_get_l1e(v, va, &gpte);
  4.1954 +    orig_gpte = gpte;
  4.1955 +
  4.1956      if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) )
  4.1957      {
  4.1958          SH_VVLOG("shadow_fault - EXIT: gpte not present (%" PRIpte ")",
  4.1959 @@ -2655,8 +1727,9 @@ int shadow_fault(unsigned long va, struc
  4.1960      if ( l1e_has_changed(orig_gpte, gpte, PAGE_FLAG_MASK) )
  4.1961      {
  4.1962          /* XXX Watch out for read-only L2 entries! (not used in Linux). */
  4.1963 -        if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
  4.1964 -                                     &gpte, sizeof(gpte))) )
  4.1965 +        /*if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
  4.1966 +                                     &gpte, sizeof(gpte))) )*/
  4.1967 +        if ( unlikely(!__guest_set_l1e(v, va, &gpte)))
  4.1968          {
  4.1969              printk("%s() failed, crashing domain %d "
  4.1970                     "due to a read-only L2 page table (gpde=%" PRIpte "), va=%lx\n",
  4.1971 @@ -2684,76 +1757,7 @@ int shadow_fault(unsigned long va, struc
  4.1972      return 0;
  4.1973  }
  4.1974  
  4.1975 -void shadow_l1_normal_pt_update(
  4.1976 -    struct domain *d,
  4.1977 -    unsigned long pa, l1_pgentry_t gpte,
  4.1978 -    struct domain_mmap_cache *cache)
  4.1979 -{
  4.1980 -    unsigned long sl1mfn;    
  4.1981 -    l1_pgentry_t *spl1e, spte;
  4.1982 -
  4.1983 -    shadow_lock(d);
  4.1984 -
  4.1985 -    sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
  4.1986 -    if ( sl1mfn )
  4.1987 -    {
  4.1988 -        SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte,
  4.1989 -                 (void *)pa, l1e_get_intpte(gpte));
  4.1990 -        l1pte_propagate_from_guest(current->domain, gpte, &spte);
  4.1991 -
  4.1992 -        spl1e = map_domain_page_with_cache(sl1mfn, cache);
  4.1993 -        spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = spte;
  4.1994 -        unmap_domain_page_with_cache(spl1e, cache);
  4.1995 -    }
  4.1996 -
  4.1997 -    shadow_unlock(d);
  4.1998 -}
  4.1999 -
  4.2000 -void shadow_l2_normal_pt_update(
  4.2001 -    struct domain *d,
  4.2002 -    unsigned long pa, l2_pgentry_t gpde,
  4.2003 -    struct domain_mmap_cache *cache)
  4.2004 -{
  4.2005 -    unsigned long sl2mfn;
  4.2006 -    l2_pgentry_t *spl2e;
  4.2007 -
  4.2008 -    shadow_lock(d);
  4.2009 -
  4.2010 -    sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l2_shadow);
  4.2011 -    if ( sl2mfn )
  4.2012 -    {
  4.2013 -        SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%" PRIpte,
  4.2014 -                 (void *)pa, l2e_get_intpte(gpde));
  4.2015 -        spl2e = map_domain_page_with_cache(sl2mfn, cache);
  4.2016 -        validate_pde_change(d, gpde,
  4.2017 -                            &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]);
  4.2018 -        unmap_domain_page_with_cache(spl2e, cache);
  4.2019 -    }
  4.2020 -
  4.2021 -    shadow_unlock(d);
  4.2022 -}
  4.2023 -
  4.2024 -#if CONFIG_PAGING_LEVELS >= 3
  4.2025 -void shadow_l3_normal_pt_update(
  4.2026 -    struct domain *d,
  4.2027 -    unsigned long pa, l3_pgentry_t gpde,
  4.2028 -    struct domain_mmap_cache *cache)
  4.2029 -{
  4.2030 -    BUG(); // not yet implemented
  4.2031 -}
  4.2032 -#endif
  4.2033 -
  4.2034 -#if CONFIG_PAGING_LEVELS >= 4
  4.2035 -void shadow_l4_normal_pt_update(
  4.2036 -    struct domain *d,
  4.2037 -    unsigned long pa, l4_pgentry_t gpde,
  4.2038 -    struct domain_mmap_cache *cache)
  4.2039 -{
  4.2040 -    BUG(); // not yet implemented
  4.2041 -}
  4.2042 -#endif
  4.2043 -
  4.2044 -int shadow_do_update_va_mapping(unsigned long va,
  4.2045 +static int do_update_va_mapping(unsigned long va,
  4.2046                                  l1_pgentry_t val,
  4.2047                                  struct vcpu *v)
  4.2048  {
  4.2049 @@ -2811,17 +1815,28 @@ int shadow_do_update_va_mapping(unsigned
  4.2050   * P2M               n/a            n/a           R/O M2P          R/O M2P
  4.2051   *
  4.2052   * NB:
  4.2053 - * update_pagetables(), __update_pagetables(), shadow_mode_enable(),
  4.2054 + * update_pagetables(), shadow_update_pagetables(), shadow_mode_enable(),
  4.2055   * shadow_l2_table(), shadow_hl2_table(), and alloc_monitor_pagetable()
  4.2056   * all play a part in maintaining these mappings.
  4.2057   */
  4.2058 -void __update_pagetables(struct vcpu *v)
  4.2059 +static void shadow_update_pagetables(struct vcpu *v)
  4.2060  {
  4.2061      struct domain *d = v->domain;
  4.2062 +#if defined (__x86_64__)
  4.2063 +    unsigned long gmfn = ((v->arch.flags & TF_kernel_mode)? 
  4.2064 +                          pagetable_get_pfn(v->arch.guest_table) :
  4.2065 +                          pagetable_get_pfn(v->arch.guest_table_user));
  4.2066 +#else
  4.2067      unsigned long gmfn = pagetable_get_pfn(v->arch.guest_table);
  4.2068 +#endif
  4.2069 +
  4.2070      unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
  4.2071 -    unsigned long smfn, hl2mfn, old_smfn;
  4.2072 -
  4.2073 +    unsigned long smfn, old_smfn;
  4.2074 +
  4.2075 +#if defined (__i386__)
  4.2076 +    unsigned long hl2mfn;
  4.2077 +#endif
  4.2078 +  
  4.2079      int max_mode = ( shadow_mode_external(d) ? SHM_external
  4.2080                       : shadow_mode_translate(d) ? SHM_translate
  4.2081                       : shadow_mode_enabled(d) ? SHM_enable
  4.2082 @@ -2843,8 +1858,15 @@ void __update_pagetables(struct vcpu *v)
  4.2083      /*
  4.2084       *  arch.shadow_table
  4.2085       */
  4.2086 -    if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) )
  4.2087 +    if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) {
  4.2088 +#if CONFIG_PAGING_LEVELS == 2
  4.2089          smfn = shadow_l2_table(d, gpfn, gmfn);
  4.2090 +#elif CONFIG_PAGING_LEVELS == 3
  4.2091 +        smfn = shadow_l3_table(d, gpfn, gmfn);
  4.2092 +#elif CONFIG_PAGING_LEVELS == 4
  4.2093 +        smfn = shadow_l4_table(d, gpfn, gmfn);
  4.2094 +#endif
  4.2095 +    }
  4.2096      if ( !get_shadow_ref(smfn) )
  4.2097          BUG();
  4.2098      old_smfn = pagetable_get_pfn(v->arch.shadow_table);
  4.2099 @@ -2852,18 +1874,23 @@ void __update_pagetables(struct vcpu *v)
  4.2100      if ( old_smfn )
  4.2101          put_shadow_ref(old_smfn);
  4.2102  
  4.2103 -    SH_VVLOG("__update_pagetables(gmfn=%lx, smfn=%lx)", gmfn, smfn);
  4.2104 +    SH_VVLOG("shadow_update_pagetables(gmfn=%lx, smfn=%lx)", gmfn, smfn);
  4.2105  
  4.2106      /*
  4.2107       * arch.shadow_vtable
  4.2108       */
  4.2109 -    if ( max_mode == SHM_external )
  4.2110 +    if ( max_mode == SHM_external 
  4.2111 +#if CONFIG_PAGING_LEVELS >=4
  4.2112 +         || max_mode & SHM_enable
  4.2113 +#endif
  4.2114 +        )
  4.2115      {
  4.2116          if ( v->arch.shadow_vtable )
  4.2117              unmap_domain_page(v->arch.shadow_vtable);
  4.2118          v->arch.shadow_vtable = map_domain_page(smfn);
  4.2119      }
  4.2120  
  4.2121 +#if defined (__i386__)
  4.2122      /*
  4.2123       * arch.hl2_vtable
  4.2124       */
  4.2125 @@ -2908,8 +1935,20 @@ void __update_pagetables(struct vcpu *v)
  4.2126          // XXX - maybe this can be optimized somewhat??
  4.2127          local_flush_tlb();
  4.2128      }
  4.2129 +#endif
  4.2130  }
  4.2131  
  4.2132 +struct shadow_ops MODE_A_HANDLER = {
  4.2133 +    .guest_paging_levels        = 2,
  4.2134 +    .invlpg                     = shadow_invlpg_32,
  4.2135 +    .fault                      = shadow_fault_32,
  4.2136 +    .update_pagetables          = shadow_update_pagetables,
  4.2137 +    .sync_all                   = sync_all,
  4.2138 +    .remove_all_write_access    = remove_all_write_access,
  4.2139 +    .do_update_va_mapping       = do_update_va_mapping,
  4.2140 +    .mark_mfn_out_of_sync       = mark_mfn_out_of_sync,
  4.2141 +    .is_out_of_sync             = is_out_of_sync,
  4.2142 +};
  4.2143  
  4.2144  /************************************************************************/
  4.2145  /************************************************************************/
  4.2146 @@ -3147,7 +2186,7 @@ static int check_l1_table(
  4.2147          errors++;                                                 \
  4.2148      } while ( 0 )
  4.2149  
  4.2150 -int check_l2_table(
  4.2151 +static int check_l2_table(
  4.2152      struct vcpu *v, unsigned long gmfn, unsigned long smfn, int oos_pdes)
  4.2153  {
  4.2154      struct domain *d = v->domain;
  4.2155 @@ -3236,10 +2275,16 @@ int check_l2_table(
  4.2156  }
  4.2157  #undef FAILPT
  4.2158  
  4.2159 -int _check_pagetable(struct vcpu *v, char *s)
  4.2160 +static int _check_pagetable(struct vcpu *v, char *s)
  4.2161  {
  4.2162      struct domain *d = v->domain;
  4.2163 +#if defined (__x86_64__)
  4.2164 +    pagetable_t pt = ((v->arch.flags & TF_kernel_mode)? 
  4.2165 +                      pagetable_get_pfn(v->arch.guest_table) :
  4.2166 +                      pagetable_get_pfn(v->arch.guest_table_user));
  4.2167 +#else
  4.2168      pagetable_t pt = v->arch.guest_table;
  4.2169 +#endif
  4.2170      unsigned long gptbase = pagetable_get_paddr(pt);
  4.2171      unsigned long ptbase_pfn, smfn;
  4.2172      unsigned long i;
  4.2173 @@ -3377,6 +2422,511 @@ int _check_all_pagetables(struct vcpu *v
  4.2174  
  4.2175  #endif // SHADOW_DEBUG
  4.2176  
  4.2177 +#if CONFIG_PAGING_LEVELS == 3
  4.2178 +static unsigned long shadow_l3_table(
  4.2179 +  struct domain *d, unsigned long gpfn, unsigned long gmfn)
  4.2180 +{
  4.2181 +    BUG();                      /* not implemenated yet */
  4.2182 +}
  4.2183 +#endif
  4.2184 +
  4.2185 +#if CONFIG_PAGING_LEVELS >= 4
  4.2186 +/****************************************************************************/
  4.2187 +/* 64-bit shadow-mode code testing */
  4.2188 +/****************************************************************************/
  4.2189 +
  4.2190 +static unsigned long shadow_l4_table(
  4.2191 +  struct domain *d, unsigned long gpfn, unsigned long gmfn)
  4.2192 +{
  4.2193 +    unsigned long smfn;
  4.2194 +    l4_pgentry_t *spl4e;
  4.2195 +
  4.2196 +    SH_VVLOG("shadow_l4_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
  4.2197 +
  4.2198 +    perfc_incrc(shadow_l4_table_count);
  4.2199 +
  4.2200 +    if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
  4.2201 +    {
  4.2202 +        printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
  4.2203 +        BUG(); /* XXX Deal gracefully with failure. */
  4.2204 +    }
  4.2205 +
  4.2206 +    spl4e = (l4_pgentry_t *)map_domain_page(smfn);
  4.2207 +    /* Install hypervisor and 4x linear p.t. mapings. */
  4.2208 +    if ( (PGT_base_page_table == PGT_l4_page_table) &&
  4.2209 +      !shadow_mode_external(d) )
  4.2210 +    {
  4.2211 +        /*
  4.2212 +         * We could proactively fill in PDEs for pages that are already
  4.2213 +         * shadowed *and* where the guest PDE has _PAGE_ACCESSED set
  4.2214 +         * (restriction required for coherence of the accessed bit). However,
  4.2215 +         * we tried it and it didn't help performance. This is simpler. 
  4.2216 +         */
  4.2217 +        memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
  4.2218 +
  4.2219 +        /* Install hypervisor and 2x linear p.t. mapings. */
  4.2220 +        memcpy(&spl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
  4.2221 +           &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
  4.2222 +           ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
  4.2223 +
  4.2224 +        spl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
  4.2225 +            l4e_from_paddr(__pa(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_l3),
  4.2226 +                            __PAGE_HYPERVISOR);
  4.2227 +
  4.2228 +        if ( shadow_mode_translate(d) ) // NB: not external
  4.2229 +        {
  4.2230 +            spl4e[l4_table_offset(RO_MPT_VIRT_START)] =
  4.2231 +                l4e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
  4.2232 +                                __PAGE_HYPERVISOR);
  4.2233 +        }
  4.2234 +        else
  4.2235 +            spl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
  4.2236 +                l4e_from_pfn(gmfn, __PAGE_HYPERVISOR);
  4.2237 +
  4.2238 +    } else
  4.2239 +        memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
  4.2240 +
  4.2241 +    unmap_domain_page(spl4e);
  4.2242 +
  4.2243 +    ESH_LOG("shadow_l4_table(%lx -> %lx)", gmfn, smfn);
  4.2244 +    return smfn;
  4.2245 +}
  4.2246 +
  4.2247 +/*
  4.2248 + * This shadow_mark_va_out_of_sync() is for 2M page shadow
  4.2249 + */
  4.2250 +static void shadow_mark_va_out_of_sync_2mp(
  4.2251 +  struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long writable_pl1e)
  4.2252 +{
  4.2253 +    struct out_of_sync_entry *entry =
  4.2254 +      shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
  4.2255 +
  4.2256 +    entry->writable_pl1e = writable_pl1e;
  4.2257 +    ESH_LOG("<shadow_mark_va_out_of_sync_2mp> gpfn = %lx\n", gpfn);
  4.2258 +    if ( !get_shadow_ref(writable_pl1e >> L1_PAGETABLE_SHIFT) )
  4.2259 +        BUG();
  4.2260 +}
  4.2261 +
  4.2262 +
  4.2263 +static int get_shadow_mfn(struct domain *d, unsigned long gpfn, unsigned long *spmfn, u32 flag)
  4.2264 +{
  4.2265 +    unsigned long gmfn;
  4.2266 +    if ( !(*spmfn = __shadow_status(d, gpfn, flag)) )
  4.2267 +    {
  4.2268 +        /* This is NOT already shadowed so we need to shadow it. */
  4.2269 +        SH_VVLOG("<get_shadow_mfn>: not shadowed");
  4.2270 +
  4.2271 +        gmfn = __gpfn_to_mfn(d, gpfn);
  4.2272 +        if ( unlikely(!VALID_MFN(gmfn)) )
  4.2273 +        {
  4.2274 +            // Attempt to use an invalid pfn as an shadow page.
  4.2275 +            // XXX this needs to be more graceful!
  4.2276 +            BUG();
  4.2277 +        }
  4.2278 +
  4.2279 +        if ( unlikely(!(*spmfn =
  4.2280 +                  alloc_shadow_page(d, gpfn, gmfn, flag))) )
  4.2281 +        {
  4.2282 +            printk("<get_shadow_mfn>Couldn't alloc an shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
  4.2283 +            BUG(); /* XXX Need to deal gracefully with failure. */
  4.2284 +        }
  4.2285 +        switch(flag) {
  4.2286 +            case PGT_l1_shadow:
  4.2287 +                perfc_incrc(shadow_l1_table_count);
  4.2288 +                break;
  4.2289 +            case PGT_l2_shadow:
  4.2290 +                perfc_incrc(shadow_l2_table_count);
  4.2291 +                break;
  4.2292 +            case PGT_l3_shadow:
  4.2293 +                perfc_incrc(shadow_l3_table_count);
  4.2294 +                break;
  4.2295 +            case PGT_hl2_shadow:
  4.2296 +                perfc_incrc(shadow_hl2_table_count);
  4.2297 +                break;
  4.2298 +        }
  4.2299 +
  4.2300 +        return 1;
  4.2301 +    } else {
  4.2302 +        /* This L1 is shadowed already, but the L2 entry is missing. */
  4.2303 +        SH_VVLOG("4b: was shadowed, l2 missing (%lx)", *spmfn);
  4.2304 +        return 0;
  4.2305 +    }
  4.2306 +}
  4.2307 +
  4.2308 +static void shadow_map_into_current(struct vcpu *v, 
  4.2309 +  unsigned long va, unsigned int from, unsigned int to)
  4.2310 +{
  4.2311 +    pgentry_64_t gle, sle;
  4.2312 +    unsigned long gpfn, smfn;
  4.2313 +
  4.2314 +    if (from == L1 && to == L2) {
  4.2315 +        shadow_map_l1_into_current_l2(va);
  4.2316 +        return;
  4.2317 +    }
  4.2318 +
  4.2319 +    __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | to); 
  4.2320 +    ASSERT(entry_get_flags(gle) & _PAGE_PRESENT);
  4.2321 +    gpfn = entry_get_pfn(gle);
  4.2322 +
  4.2323 +    get_shadow_mfn(v->domain, gpfn, &smfn, shadow_level_to_type(from));
  4.2324 +
  4.2325 +    if ( !get_shadow_ref(smfn) )
  4.2326 +        BUG();
  4.2327 +    entry_general(v->domain, &gle, &sle, smfn, to);
  4.2328 +    __rw_entry(v, va, &gle, GUEST_ENTRY | SET_ENTRY | to);
  4.2329 +    __rw_entry(v, va, &sle, SHADOW_ENTRY | SET_ENTRY | to);
  4.2330 +}
  4.2331 +
  4.2332 +/*
  4.2333 + * shadow_set_lxe should be put in shadow.h
  4.2334 + */
  4.2335 +static void shadow_set_l2e_64(unsigned long va, l2_pgentry_t sl2e, 
  4.2336 +  int create_l2_shadow)
  4.2337 +{
  4.2338 +    struct vcpu *v = current;
  4.2339 +    l4_pgentry_t sl4e;
  4.2340 +    l3_pgentry_t sl3e;
  4.2341 +
  4.2342 +    __shadow_get_l4e(v, va, &sl4e);
  4.2343 +    if (!(l4e_get_flags(sl4e) & _PAGE_PRESENT)) {
  4.2344 +        if (create_l2_shadow) {
  4.2345 +            perfc_incrc(shadow_set_l3e_force_map);
  4.2346 +            shadow_map_into_current(v, va, L3, L4);
  4.2347 +            __shadow_get_l4e(v, va, &sl4e);
  4.2348 +        } else {
  4.2349 +            printk("For non VMX shadow, create_l1_shadow:%d\n", create_l2_shadow);
  4.2350 +        }
  4.2351 +    }
  4.2352 +
  4.2353 +    __shadow_get_l3e(v, va, &sl3e);
  4.2354 +    if (!(l3e_get_flags(sl3e) & _PAGE_PRESENT)) {
  4.2355 +         if (create_l2_shadow) {
  4.2356 +            perfc_incrc(shadow_set_l2e_force_map);
  4.2357 +            shadow_map_into_current(v, va, L2, L3);
  4.2358 +            __shadow_get_l3e(v, va, &sl3e);
  4.2359 +        } else {
  4.2360 +            printk("For non VMX shadow, create_l1_shadow:%d\n", create_l2_shadow);
  4.2361 +        }
  4.2362 +         shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va));
  4.2363 +
  4.2364 +    }
  4.2365 +
  4.2366 +    if (! __shadow_set_l2e(v, va, &sl2e))
  4.2367 +        BUG();
  4.2368 +    shadow_update_min_max(l3e_get_pfn(sl3e), l2_table_offset(va));
  4.2369 +}
  4.2370 +
  4.2371 +
  4.2372 +static void shadow_set_l1e_64(unsigned long va, pgentry_64_t *sl1e_p,
  4.2373 +  int create_l1_shadow)
  4.2374 +{
  4.2375 +    struct vcpu *v = current;
  4.2376 +    struct domain *d = v->domain;
  4.2377 +    pgentry_64_t sle;
  4.2378 +    pgentry_64_t sle_up;
  4.2379 +    l1_pgentry_t old_spte;
  4.2380 +    l1_pgentry_t sl1e = *(l1_pgentry_t *)sl1e_p;
  4.2381 +    int i;
  4.2382 +
  4.2383 +    for (i = L4; i >= L2; i--) {
  4.2384 +        if (!__rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i)) {
  4.2385 +            printk("<%s> i = %d\n", __func__, i);
  4.2386 +            BUG();
  4.2387 +        }
  4.2388 +        if (!(entry_get_flags(sle) & _PAGE_PRESENT)) {
  4.2389 +            if (create_l1_shadow) {
  4.2390 +                perfc_incrc(shadow_set_l3e_force_map);
  4.2391 +                shadow_map_into_current(v, va, i-1, i);
  4.2392 +                __rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i);
  4.2393 +            } else {
  4.2394 +#if 0
  4.2395 +                printk("For non VMX shadow, create_l1_shadow:%d\n", create_l1_shadow);
  4.2396 +#endif
  4.2397 +            }
  4.2398 +        }
  4.2399 +        if(i < L4)
  4.2400 +            shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, i));
  4.2401 +        sle_up = sle;
  4.2402 +    }
  4.2403 +
  4.2404 +    if ( shadow_mode_refcounts(d) )
  4.2405 +    {
  4.2406 +        __shadow_get_l1e(v, va, &old_spte);
  4.2407 +        ESH_LOG("old_sl1e: %lx, new_sl1e: %lx\n", l1e_get_intpte(old_spte), l1e_get_intpte(sl1e));
  4.2408 +        if ( l1e_has_changed(old_spte, sl1e, _PAGE_RW | _PAGE_PRESENT) )
  4.2409 +            {
  4.2410 +                if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
  4.2411 +                     !shadow_get_page_from_l1e(sl1e, d) )
  4.2412 +                    sl1e = l1e_empty();
  4.2413 +                if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
  4.2414 +                    put_page_from_l1e(old_spte, d);
  4.2415 +            }
  4.2416 +    }
  4.2417 +
  4.2418 +    __shadow_set_l1e(v, va, &sl1e);
  4.2419 +    shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, L1));
  4.2420 +}
  4.2421 +
  4.2422 +static inline int l2e_rw_fault(
  4.2423 +    struct vcpu *v, l2_pgentry_t *gl2e_p, unsigned long va, int rw)
  4.2424 +{
  4.2425 +    struct domain *d = v->domain;
  4.2426 +    l2_pgentry_t gl2e = *gl2e_p;
  4.2427 +    l2_pgentry_t tmp_l2e = gl2e;
  4.2428 +    unsigned long start_gpfn = l2e_get_pfn(gl2e);
  4.2429 +    unsigned long gpfn, mfn;
  4.2430 +    unsigned long l1_mfn, gmfn;
  4.2431 +    l1_pgentry_t *l1_p;
  4.2432 +    l1_pgentry_t sl1e;
  4.2433 +    l1_pgentry_t old_sl1e;
  4.2434 +    l2_pgentry_t sl2e;
  4.2435 +    unsigned long nx = 0;
  4.2436 +
  4.2437 +    /* Check if gpfn is 2M aligned */
  4.2438 +
  4.2439 +    /* Update guest l2e */
  4.2440 +    if (rw) {
  4.2441 +        ASSERT(l2e_get_flags(gl2e) & _PAGE_RW);
  4.2442 +        l2e_add_flags(gl2e, _PAGE_DIRTY | _PAGE_ACCESSED);
  4.2443 +    } else {
  4.2444 +        l2e_add_flags(gl2e, _PAGE_ACCESSED);
  4.2445 +    }
  4.2446 +
  4.2447 +    l2e_remove_flags(tmp_l2e, _PAGE_PSE);
  4.2448 +    if (l2e_get_flags(gl2e) & _PAGE_NX) {
  4.2449 +        l2e_remove_flags(tmp_l2e, _PAGE_NX);
  4.2450 +        nx = 1UL << 63;
  4.2451 +    }
  4.2452 +
  4.2453 +
  4.2454 +    /* Get the shadow l2 first */
  4.2455 +    if ( !__shadow_get_l2e(v, va, &sl2e) )
  4.2456 +        sl2e = l2e_empty();
  4.2457 +
  4.2458 +    l1_mfn = ___shadow_status(d, start_gpfn | nx, PGT_fl1_shadow);
  4.2459 +    
  4.2460 +    /* Check the corresponding l2e */
  4.2461 +    if (l1_mfn) {
  4.2462 +        /* Why it is PRESENT?*/
  4.2463 +        if ((l2e_get_flags(sl2e) & _PAGE_PRESENT) && 
  4.2464 +                l2e_get_pfn(sl2e) == l1_mfn) {
  4.2465 +            ESH_LOG("sl2e PRSENT bit is set: %lx, l1_mfn = %lx\n", l2e_get_pfn(sl2e), l1_mfn);
  4.2466 +        } else {
  4.2467 +            if (!get_shadow_ref(l1_mfn))
  4.2468 +                BUG();
  4.2469 +        }
  4.2470 +        l1_p = (l1_pgentry_t *)map_domain_page(l1_mfn);
  4.2471 +        sl2e = l2e_from_pfn(l1_mfn, l2e_get_flags(tmp_l2e));
  4.2472 +    } else {
  4.2473 +        /* Allocate a new page as shadow page table if need */
  4.2474 +        gmfn = __gpfn_to_mfn(d, start_gpfn);
  4.2475 +        l1_mfn = alloc_shadow_page(d, start_gpfn | nx, gmfn, PGT_fl1_shadow);
  4.2476 +        if (unlikely(!l1_mfn)) {
  4.2477 +            BUG();
  4.2478 +        }
  4.2479 +
  4.2480 +        if (!get_shadow_ref(l1_mfn))
  4.2481 +            BUG();
  4.2482 +        l1_p = (l1_pgentry_t *)map_domain_page(l1_mfn );
  4.2483 +        sl2e = l2e_from_pfn(l1_mfn, l2e_get_flags(tmp_l2e));
  4.2484 +        memset(l1_p, 0, PAGE_SIZE);
  4.2485 +        ESH_LOG("Alloc a shadow page: %lx\n", l1_mfn);
  4.2486 +    }
  4.2487 +
  4.2488 +    ESH_LOG("<%s>: sl2e = %lx\n", __func__, l2e_get_intpte(sl2e));
  4.2489 +    /* Map the page to l2*/
  4.2490 +    shadow_set_l2e_64(va, sl2e, 1);
  4.2491 +
  4.2492 +    if (l2e_get_flags(gl2e) & _PAGE_NX)
  4.2493 +        l2e_add_flags(tmp_l2e, _PAGE_NX);
  4.2494 +
  4.2495 +    /* Propagate the shadow page table, i.e. setting sl1e */
  4.2496 +    for (gpfn = start_gpfn;
  4.2497 +      gpfn < (start_gpfn + L1_PAGETABLE_ENTRIES); gpfn++) {
  4.2498 +
  4.2499 +        mfn = __gpfn_to_mfn(d, gpfn);
  4.2500 +
  4.2501 +        if ( unlikely(!VALID_MFN(mfn)) )
  4.2502 +        {
  4.2503 +            continue;
  4.2504 +        }
  4.2505 +
  4.2506 +        sl1e = l1e_from_pfn(mfn, l2e_get_flags(tmp_l2e));
  4.2507 +
  4.2508 +        if (!rw) {
  4.2509 +            if ( shadow_mode_log_dirty(d) || 
  4.2510 +              !(l2e_get_flags(gl2e) & _PAGE_DIRTY) || mfn_is_page_table(mfn) )
  4.2511 +            {
  4.2512 +                l1e_remove_flags(sl1e, _PAGE_RW);
  4.2513 +            }
  4.2514 +        } else {
  4.2515 +            /* log dirty*/
  4.2516 +            /*
  4.2517 +               if ( shadow_mode_log_dirty(d) )
  4.2518 +               __mark_dirty(d, gmfn);
  4.2519 +             */
  4.2520 +        }
  4.2521 +       // printk("<%s> gpfn: %lx, mfn: %lx, sl1e: %lx\n", __func__, gpfn, mfn, l1e_get_intpte(sl1e));
  4.2522 +        /* The shadow entrys need setup before shadow_mark_va_out_of_sync()*/
  4.2523 +        old_sl1e = l1_p[gpfn - start_gpfn];
  4.2524 +
  4.2525 +        if ( l1e_has_changed(old_sl1e, sl1e, _PAGE_RW | _PAGE_PRESENT) )
  4.2526 +        {
  4.2527 +            if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
  4.2528 +              !shadow_get_page_from_l1e(sl1e, d) ) {
  4.2529 +                ESH_LOG("%lx, mfn: %lx why make me empty, start_pfn: %lx, gpfn: %lx\n", l1e_get_intpte(sl1e),mfn, start_gpfn, gpfn);
  4.2530 +                sl1e = l1e_empty();
  4.2531 +            }
  4.2532 +            if ( l1e_get_flags(old_sl1e) & _PAGE_PRESENT )
  4.2533 +                put_page_from_l1e(old_sl1e, d);
  4.2534 +        }
  4.2535 +
  4.2536 +        l1_p[gpfn - start_gpfn] = sl1e;
  4.2537 +
  4.2538 +        if (rw) {
  4.2539 +            /* shadow_mark_va_out_of_sync() need modificatin for 2M pages*/
  4.2540 +            if ( mfn_is_page_table(mfn) )
  4.2541 +                shadow_mark_va_out_of_sync_2mp(v, gpfn, mfn,
  4.2542 +                  l2e_get_paddr(sl2e) | (sizeof(l1_pgentry_t) * (gpfn - start_gpfn)));
  4.2543 +        }
  4.2544 +    }
  4.2545 +
  4.2546 +    unmap_domain_page(l1_p);
  4.2547 +    return 1;
  4.2548 +
  4.2549 +}
  4.2550 +
  4.2551 +static int shadow_fault_64(unsigned long va, struct cpu_user_regs *regs)
  4.2552 +{
  4.2553 +    struct vcpu *v = current;
  4.2554 +    struct domain *d = v->domain;
  4.2555 +    l2_pgentry_t gl2e;
  4.2556 +    l1_pgentry_t sl1e, gl1e;
  4.2557 +
  4.2558 +    perfc_incrc(shadow_fault_calls);
  4.2559 +
  4.2560 +    ESH_LOG("<shadow_fault_64> va=%lx,  rip = %lx, error code = %x\n", 
  4.2561 +            va, regs->eip, regs->error_code);
  4.2562 +
  4.2563 +    /*
  4.2564 +     * Don't let someone else take the guest's table pages out-of-sync.
  4.2565 +     */
  4.2566 +    shadow_lock(d);
  4.2567 +
  4.2568 +    /* XXX - FIX THIS COMMENT!!!
  4.2569 +     * STEP 1. Check to see if this fault might have been caused by an
  4.2570 +     *         out-of-sync table page entry, or if we should pass this
  4.2571 +     *         fault onto the guest.
  4.2572 +     */
  4.2573 +    __shadow_sync_va(v, va);
  4.2574 +
  4.2575 +    /*
  4.2576 +     * STEP 2. Check if the fault belongs to guest
  4.2577 +     */
  4.2578 +    if ( guest_page_fault(
  4.2579 +            v, va, regs->error_code, 
  4.2580 +            (pgentry_64_t *)&gl2e, (pgentry_64_t *)&gl1e) ) {
  4.2581 +        goto fail;
  4.2582 +    }
  4.2583 +    
  4.2584 +    if ( unlikely(!(l2e_get_flags(gl2e) & _PAGE_PSE)) ) {
  4.2585 +        /*
  4.2586 +         * Handle 4K pages here
  4.2587 +         */
  4.2588 +        
  4.2589 +        /* Write fault? */
  4.2590 +        if ( regs->error_code & 2 ) {
  4.2591 +            if ( !l1pte_write_fault(v, &gl1e, &sl1e, va) ) {
  4.2592 +                goto fail;
  4.2593 +            }
  4.2594 +        } else {
  4.2595 +            l1pte_read_fault(d, &gl1e, &sl1e);
  4.2596 +        }
  4.2597 +        /*
  4.2598 +         * STEP 3. Write guest/shadow l2e back
  4.2599 +         */
  4.2600 +        if (unlikely(!__guest_set_l1e(v, va, &gl1e))) {
  4.2601 +            domain_crash_synchronous();
  4.2602 +        }
  4.2603 +
  4.2604 +        ESH_LOG("gl1e: %lx, sl1e: %lx\n", l1e_get_intpte(gl1e), l1e_get_intpte(sl1e));
  4.2605 +        shadow_set_l1e_64(va, (pgentry_64_t *)&sl1e, 1);
  4.2606 +        /*
  4.2607 +         *  if necessary, record the page table page as dirty
  4.2608 +         */
  4.2609 +         if ( unlikely(shadow_mode_log_dirty(d)) )
  4.2610 +            __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gl2e)));
  4.2611 + 
  4.2612 +    } else {
  4.2613 +        /*
  4.2614 +         * Handle 2M pages here
  4.2615 +         */
  4.2616 +        /* Write fault? */
  4.2617 +        if ( regs->error_code & 2 ) {
  4.2618 +            if ( !l2e_rw_fault(v, &gl2e, va, WRITE_FAULT) ) {
  4.2619 +                goto fail;
  4.2620 +            }
  4.2621 +        } else {
  4.2622 +            l2e_rw_fault(v, &gl2e, va, READ_FAULT);
  4.2623 +        }
  4.2624 +
  4.2625 +        /*
  4.2626 +         * STEP 3. Write guest/shadow l2e back
  4.2627 +         */
  4.2628 +
  4.2629 +        if ( unlikely(!__guest_set_l2e(v, va, &gl2e)) ) {
  4.2630 +            domain_crash_synchronous();
  4.2631 +        }
  4.2632 +
  4.2633 +        /*
  4.2634 +         * Todo: if necessary, record the page table page as dirty
  4.2635 +         */
  4.2636 +
  4.2637 +
  4.2638 +    }
  4.2639 +
  4.2640 +    perfc_incrc(shadow_fault_fixed);
  4.2641 +    d->arch.shadow_fault_count++;
  4.2642 +
  4.2643 +    shadow_unlock(d);
  4.2644 +
  4.2645 +    return EXCRET_fault_fixed;
  4.2646 +fail:
  4.2647 +    shadow_unlock(d);
  4.2648 +    ESH_LOG("Guest fault~~~\n");
  4.2649 +    return 0;
  4.2650 +}
  4.2651 +
  4.2652 +static void shadow_invlpg_64(struct vcpu *v, unsigned long va)
  4.2653 +{
  4.2654 +    struct domain *d = v->domain;
  4.2655 +    //l1_pgentry_64_t  gl1e, sl1e;
  4.2656 +    l1_pgentry_t  sl1e;
  4.2657 +
  4.2658 +    shadow_lock(d);
  4.2659 +
  4.2660 +    sl1e = l1e_empty();
  4.2661 +    __shadow_set_l1e(v, va, &sl1e);
  4.2662 +
  4.2663 +    shadow_unlock(d);
  4.2664 +}
  4.2665 +
  4.2666 +#ifndef PGENTRY_32
  4.2667 +struct shadow_ops MODE_F_HANDLER = {
  4.2668 +    .guest_paging_levels              = 4,
  4.2669 +    .invlpg                     = shadow_invlpg_64,
  4.2670 +    .fault                      = shadow_fault_64,
  4.2671 +    .update_pagetables          = shadow_update_pagetables,
  4.2672 +    .sync_all                   = sync_all,
  4.2673 +    .remove_all_write_access    = remove_all_write_access,
  4.2674 +    .do_update_va_mapping       = do_update_va_mapping,
  4.2675 +    .mark_mfn_out_of_sync       = mark_mfn_out_of_sync,
  4.2676 +    .is_out_of_sync             = is_out_of_sync,
  4.2677 +};
  4.2678 +#endif
  4.2679 +
  4.2680 +#endif
  4.2681 +
  4.2682  /*
  4.2683   * Local variables:
  4.2684   * mode: C
     5.1 --- a/xen/arch/x86/traps.c	Mon Jul 11 09:03:24 2005 +0000
     5.2 +++ b/xen/arch/x86/traps.c	Mon Jul 11 09:14:11 2005 +0000
     5.3 @@ -438,6 +438,9 @@ asmlinkage int do_page_fault(struct cpu_
     5.4  
     5.5      if ( unlikely(shadow_mode_enabled(d)) &&
     5.6           ((addr < HYPERVISOR_VIRT_START) ||
     5.7 +#if defined(__x86_64__)
     5.8 +          (addr >= HYPERVISOR_VIRT_END) ||
     5.9 +#endif
    5.10            (shadow_mode_external(d) && GUEST_CONTEXT(v, regs))) &&
    5.11           shadow_fault(addr, regs) )
    5.12          return EXCRET_fault_fixed;
     6.1 --- a/xen/arch/x86/vmx.c	Mon Jul 11 09:03:24 2005 +0000
     6.2 +++ b/xen/arch/x86/vmx.c	Mon Jul 11 09:14:11 2005 +0000
     6.3 @@ -38,6 +38,10 @@
     6.4  #include <asm/vmx_vmcs.h>
     6.5  #include <asm/vmx_intercept.h>
     6.6  #include <asm/shadow.h>
     6.7 +#if CONFIG_PAGING_LEVELS >= 4
     6.8 +#include <asm/shadow_64.h>
     6.9 +#endif
    6.10 +
    6.11  #include <public/io/ioreq.h>
    6.12  
    6.13  #ifdef CONFIG_VMX
    6.14 @@ -964,6 +968,12 @@ static int vmx_set_cr0(unsigned long val
    6.15              vm_entry_value |= VM_ENTRY_CONTROLS_IA_32E_MODE;
    6.16              __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
    6.17  
    6.18 +#if CONFIG_PAGING_LEVELS >= 4 
    6.19 +            if(!shadow_set_guest_paging_levels(d->domain, 4)) {
    6.20 +                printk("Unsupported guest paging levels\n");
    6.21 +                domain_crash_synchronous(); /* need to take a clean path */
    6.22 +            }
    6.23 +#endif
    6.24          }
    6.25  
    6.26  	unsigned long crn;
     7.1 --- a/xen/arch/x86/vmx_platform.c	Mon Jul 11 09:03:24 2005 +0000
     7.2 +++ b/xen/arch/x86/vmx_platform.c	Mon Jul 11 09:14:11 2005 +0000
     7.3 @@ -32,7 +32,9 @@
     7.4  #include <xen/lib.h>
     7.5  #include <xen/sched.h>
     7.6  #include <asm/current.h>
     7.7 -
     7.8 +#if CONFIG_PAGING_LEVELS >= 4
     7.9 +#include <asm/shadow_64.h>
    7.10 +#endif
    7.11  #ifdef CONFIG_VMX
    7.12  
    7.13  #define DECODE_success  1
     8.1 --- a/xen/arch/x86/vmx_vmcs.c	Mon Jul 11 09:03:24 2005 +0000
     8.2 +++ b/xen/arch/x86/vmx_vmcs.c	Mon Jul 11 09:14:11 2005 +0000
     8.3 @@ -28,10 +28,13 @@
     8.4  #include <asm/processor.h>
     8.5  #include <asm/msr.h>
     8.6  #include <asm/vmx.h>
     8.7 +#include <asm/flushtlb.h>
     8.8  #include <xen/event.h>
     8.9  #include <xen/kernel.h>
    8.10  #include <public/io/ioreq.h>
    8.11 -
    8.12 +#if CONFIG_PAGING_LEVELS >= 4
    8.13 +#include <asm/shadow_64.h>
    8.14 +#endif
    8.15  #ifdef CONFIG_VMX
    8.16  
    8.17  struct vmcs_struct *alloc_vmcs(void) 
     9.1 --- a/xen/include/asm-x86/domain.h	Mon Jul 11 09:03:24 2005 +0000
     9.2 +++ b/xen/include/asm-x86/domain.h	Mon Jul 11 09:14:11 2005 +0000
     9.3 @@ -29,6 +29,7 @@ struct arch_domain
     9.4      u8 *iobmp_mask;       /* Address of IO bitmap mask, or NULL.      */
     9.5  
     9.6      /* Shadow mode status and controls. */
     9.7 +    struct shadow_ops *ops;
     9.8      unsigned int shadow_mode;  /* flags to control shadow table operation */
     9.9      unsigned int shadow_nest;  /* Recursive depth of shadow_lock() nesting */
    9.10      /* Shadow mode has tainted page reference counts? */
    10.1 --- a/xen/include/asm-x86/mm.h	Mon Jul 11 09:03:24 2005 +0000
    10.2 +++ b/xen/include/asm-x86/mm.h	Mon Jul 11 09:14:11 2005 +0000
    10.3 @@ -51,24 +51,25 @@ struct pfn_info
    10.4  };
    10.5  
    10.6   /* The following page types are MUTUALLY EXCLUSIVE. */
    10.7 -#define PGT_none            (0<<29) /* no special uses of this page */
    10.8 -#define PGT_l1_page_table   (1<<29) /* using this page as an L1 page table? */
    10.9 -#define PGT_l2_page_table   (2<<29) /* using this page as an L2 page table? */
   10.10 -#define PGT_l3_page_table   (3<<29) /* using this page as an L3 page table? */
   10.11 -#define PGT_l4_page_table   (4<<29) /* using this page as an L4 page table? */
   10.12 -#define PGT_gdt_page        (5<<29) /* using this page in a GDT? */
   10.13 -#define PGT_ldt_page        (6<<29) /* using this page in an LDT? */
   10.14 -#define PGT_writable_page   (7<<29) /* has writable mappings of this page? */
   10.15 +#define PGT_none            (0U<<29) /* no special uses of this page */
   10.16 +#define PGT_l1_page_table   (1U<<29) /* using this page as an L1 page table? */
   10.17 +#define PGT_l2_page_table   (2U<<29) /* using this page as an L2 page table? */
   10.18 +#define PGT_l3_page_table   (3U<<29) /* using this page as an L3 page table? */
   10.19 +#define PGT_l4_page_table   (4U<<29) /* using this page as an L4 page table? */
   10.20 +#define PGT_gdt_page        (5U<<29) /* using this page in a GDT? */
   10.21 +#define PGT_ldt_page        (6U<<29) /* using this page in an LDT? */
   10.22 +#define PGT_writable_page   (7U<<29) /* has writable mappings of this page? */
   10.23  
   10.24  #define PGT_l1_shadow       PGT_l1_page_table
   10.25  #define PGT_l2_shadow       PGT_l2_page_table
   10.26  #define PGT_l3_shadow       PGT_l3_page_table
   10.27  #define PGT_l4_shadow       PGT_l4_page_table
   10.28 -#define PGT_hl2_shadow      (5<<29)
   10.29 -#define PGT_snapshot        (6<<29)
   10.30 -#define PGT_writable_pred   (7<<29) /* predicted gpfn with writable ref */
   10.31 +#define PGT_hl2_shadow      (5U<<29)
   10.32 +#define PGT_snapshot        (6U<<29)
   10.33 +#define PGT_writable_pred   (7U<<29) /* predicted gpfn with writable ref */
   10.34  
   10.35 -#define PGT_type_mask       (7<<29) /* Bits 29-31. */
   10.36 +#define PGT_fl1_shadow      (5U<<29)
   10.37 +#define PGT_type_mask       (7U<<29) /* Bits 29-31. */
   10.38  
   10.39   /* Has this page been validated for use as its current type? */
   10.40  #define _PGT_validated      28
    11.1 --- a/xen/include/asm-x86/page.h	Mon Jul 11 09:03:24 2005 +0000
    11.2 +++ b/xen/include/asm-x86/page.h	Mon Jul 11 09:14:11 2005 +0000
    11.3 @@ -198,6 +198,14 @@ typedef struct { u64 pfn; } pagetable_t;
    11.4  #else
    11.5  #define DOMAIN_ENTRIES_PER_L2_PAGETABLE     0
    11.6  #define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE 0
    11.7 +
    11.8 +#define DOMAIN_ENTRIES_PER_L4_PAGETABLE     \
    11.9 +    (l4_table_offset(HYPERVISOR_VIRT_START))
   11.10 +#define GUEST_ENTRIES_PER_L4_PAGETABLE     \
   11.11 +    (l4_table_offset(HYPERVISOR_VIRT_END))
   11.12 +#define HYPERVISOR_ENTRIES_PER_L4_PAGETABLE \
   11.13 +    (L4_PAGETABLE_ENTRIES - GUEST_ENTRIES_PER_L4_PAGETABLE  \
   11.14 +     + DOMAIN_ENTRIES_PER_L4_PAGETABLE)
   11.15  #endif
   11.16  
   11.17  #define linear_l1_table                                                 \
    12.1 --- a/xen/include/asm-x86/shadow.h	Mon Jul 11 09:03:24 2005 +0000
    12.2 +++ b/xen/include/asm-x86/shadow.h	Mon Jul 11 09:14:11 2005 +0000
    12.3 @@ -33,6 +33,7 @@
    12.4  #include <asm/processor.h>
    12.5  #include <asm/vmx.h>
    12.6  #include <public/dom0_ops.h>
    12.7 +#include <asm/shadow_public.h>
    12.8  
    12.9  /* Shadow PT operation mode : shadow-mode variable in arch_domain. */
   12.10  
   12.11 @@ -135,6 +136,7 @@ extern void shadow_l3_normal_pt_update(s
   12.12                                         struct domain_mmap_cache *cache);
   12.13  #endif
   12.14  #if CONFIG_PAGING_LEVELS >= 4
   12.15 +#include <asm/page-guest32.h>
   12.16  extern void shadow_l4_normal_pt_update(struct domain *d,
   12.17                                         unsigned long pa, l4_pgentry_t l4e,
   12.18                                         struct domain_mmap_cache *cache);
   12.19 @@ -146,7 +148,10 @@ extern int shadow_do_update_va_mapping(u
   12.20  
   12.21  static inline unsigned long __shadow_status(
   12.22      struct domain *d, unsigned long gpfn, unsigned long stype);
   12.23 +
   12.24 +#if CONFIG_PAGING_LEVELS <= 2
   12.25  static inline void update_hl2e(struct vcpu *v, unsigned long va);
   12.26 +#endif
   12.27  
   12.28  extern void vmx_shadow_clear_state(struct domain *);
   12.29  
   12.30 @@ -209,11 +214,12 @@ static void inline
   12.31          //
   12.32          __shadow_sync_all(v->domain);
   12.33      }
   12.34 -
   12.35 +#if CONFIG_PAGING_LEVELS <= 2
   12.36      // Also make sure the HL2 is up-to-date for this address.
   12.37      //
   12.38      if ( unlikely(shadow_mode_translate(v->domain)) )
   12.39          update_hl2e(v, va);
   12.40 +#endif
   12.41  }
   12.42  
   12.43  static void inline
   12.44 @@ -501,7 +507,7 @@ static inline int mark_dirty(struct doma
   12.45  
   12.46  
   12.47  /************************************************************************/
   12.48 -
   12.49 +#if CONFIG_PAGING_LEVELS <= 2
   12.50  static inline void
   12.51  __shadow_get_l2e(
   12.52      struct vcpu *v, unsigned long va, l2_pgentry_t *psl2e)
   12.53 @@ -623,7 +629,7 @@ static inline void shadow_sync_and_drop_
   12.54  
   12.55      shadow_unlock(d);
   12.56  }
   12.57 -
   12.58 +#endif
   12.59  /************************************************************************/
   12.60  
   12.61  /*
   12.62 @@ -709,7 +715,7 @@ shadow_unpin(unsigned long smfn)
   12.63  
   12.64  
   12.65  /************************************************************************/
   12.66 -
   12.67 +#if CONFIG_PAGING_LEVELS <= 2
   12.68  extern void shadow_mark_va_out_of_sync(
   12.69      struct vcpu *v, unsigned long gpfn, unsigned long mfn,
   12.70      unsigned long va);
   12.71 @@ -783,6 +789,7 @@ static inline int l1pte_read_fault(
   12.72  
   12.73      return 1;
   12.74  }
   12.75 +#endif
   12.76  
   12.77  static inline void l1pte_propagate_from_guest(
   12.78      struct domain *d, l1_pgentry_t gpte, l1_pgentry_t *spte_p)
   12.79 @@ -1535,6 +1542,7 @@ shadow_update_min_max(unsigned long smfn
   12.80          sl1page->tlbflush_timestamp = SHADOW_ENCODE_MIN_MAX(min, max);
   12.81  }
   12.82  
   12.83 +#if CONFIG_PAGING_LEVELS <= 2
   12.84  extern void shadow_map_l1_into_current_l2(unsigned long va);
   12.85  
   12.86  void static inline
   12.87 @@ -1602,7 +1610,7 @@ shadow_set_l1e(unsigned long va, l1_pgen
   12.88  
   12.89      shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
   12.90  }
   12.91 -
   12.92 +#endif
   12.93  /************************************************************************/
   12.94  
   12.95  static inline int
   12.96 @@ -1626,6 +1634,7 @@ shadow_mode_page_writable(struct domain 
   12.97      return 0;
   12.98  }
   12.99  
  12.100 +#if CONFIG_PAGING_LEVELS <= 2
  12.101  static inline l1_pgentry_t gva_to_gpte(unsigned long gva)
  12.102  {
  12.103      l2_pgentry_t gpde;
  12.104 @@ -1664,7 +1673,7 @@ static inline unsigned long gva_to_gpa(u
  12.105  
  12.106      return l1e_get_paddr(gpte) + (gva & ~PAGE_MASK); 
  12.107  }
  12.108 -
  12.109 +#endif
  12.110  /************************************************************************/
  12.111  
  12.112  extern void __update_pagetables(struct vcpu *v);
  12.113 @@ -1699,14 +1708,14 @@ static inline void update_pagetables(str
  12.114  
  12.115      if ( likely(!shadow_mode_external(d)) )
  12.116      {
  12.117 +        if ( shadow_mode_enabled(d) )
  12.118 +            v->arch.monitor_table = v->arch.shadow_table;
  12.119 +        else
  12.120  #ifdef __x86_64__
  12.121          if ( !(v->arch.flags & TF_kernel_mode) )
  12.122              v->arch.monitor_table = v->arch.guest_table_user;
  12.123          else
  12.124  #endif
  12.125 -        if ( shadow_mode_enabled(d) )
  12.126 -            v->arch.monitor_table = v->arch.shadow_table;
  12.127 -        else
  12.128              v->arch.monitor_table = v->arch.guest_table;
  12.129      }
  12.130  }