ia64/xen-unstable

changeset 13141:c75d6f2aad7a

[XEN] Clean up the shadow interface
Remove a lot of unneccesary things from shadow.h, and move the shadow lock
entirely inside the shadow code.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Wed Dec 20 12:03:07 2006 +0000 (2006-12-20)
parents b258c7587d8d
children 988d3a63d9be
files xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/mm.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/multi.h xen/arch/x86/mm/shadow/private.h xen/arch/x86/mm/shadow/types.h xen/include/asm-x86/mm.h xen/include/asm-x86/shadow.h
line diff
     1.1 --- a/xen/arch/x86/domain.c	Wed Dec 20 11:59:54 2006 +0000
     1.2 +++ b/xen/arch/x86/domain.c	Wed Dec 20 12:03:07 2006 +0000
     1.3 @@ -172,10 +172,11 @@ int arch_domain_create(struct domain *d)
     1.4  {
     1.5  #ifdef __x86_64__
     1.6      struct page_info *pg;
     1.7 +    int i;
     1.8  #endif
     1.9      l1_pgentry_t gdt_l1e;
    1.10      int vcpuid, pdpt_order;
    1.11 -    int i, rc = -ENOMEM;
    1.12 +    int rc = -ENOMEM;
    1.13  
    1.14      pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
    1.15      d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order);
    1.16 @@ -218,12 +219,7 @@ int arch_domain_create(struct domain *d)
    1.17  
    1.18  #endif /* __x86_64__ */
    1.19  
    1.20 -    shadow_lock_init(d);
    1.21 -    for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
    1.22 -        INIT_LIST_HEAD(&d->arch.shadow.freelists[i]);
    1.23 -    INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist);
    1.24 -    INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse);
    1.25 -    INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows);
    1.26 +    shadow_domain_init(d);
    1.27  
    1.28      if ( !is_idle_domain(d) )
    1.29      {
    1.30 @@ -366,15 +362,6 @@ int arch_set_info_guest(
    1.31          v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
    1.32      }    
    1.33  
    1.34 -    /* Shadow: make sure the domain has enough shadow memory to
    1.35 -     * boot another vcpu */
    1.36 -    if ( shadow_mode_enabled(d) 
    1.37 -         && d->arch.shadow.total_pages < shadow_min_acceptable_pages(d) )
    1.38 -    {
    1.39 -        destroy_gdt(v);
    1.40 -        return -ENOMEM;
    1.41 -    }
    1.42 -
    1.43      if ( v->vcpu_id == 0 )
    1.44          update_domain_wallclock_time(d);
    1.45  
     2.1 --- a/xen/arch/x86/domain_build.c	Wed Dec 20 11:59:54 2006 +0000
     2.2 +++ b/xen/arch/x86/domain_build.c	Wed Dec 20 12:03:07 2006 +0000
     2.3 @@ -827,7 +827,7 @@ int construct_dom0(struct domain *d,
     2.4      regs->eflags = X86_EFLAGS_IF;
     2.5  
     2.6      if ( opt_dom0_shadow )
     2.7 -        if ( shadow_test_enable(d) == 0 ) 
     2.8 +        if ( shadow_enable(d, SHM2_enable) == 0 ) 
     2.9              shadow_update_paging_modes(v);
    2.10  
    2.11      if ( supervisor_mode_kernel )
     3.1 --- a/xen/arch/x86/mm.c	Wed Dec 20 11:59:54 2006 +0000
     3.2 +++ b/xen/arch/x86/mm.c	Wed Dec 20 12:03:07 2006 +0000
     3.3 @@ -365,6 +365,38 @@ void write_ptbase(struct vcpu *v)
     3.4      write_cr3(v->arch.cr3);
     3.5  }
     3.6  
     3.7 +/* Should be called after CR3 is updated.
     3.8 + * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
     3.9 + * 
    3.10 + * Also updates other state derived from CR3 (vcpu->arch.guest_vtable,
    3.11 + * shadow_vtable, etc).
    3.12 + *
    3.13 + * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
    3.14 + * for HVM guests, arch.monitor_table and hvm's guest CR3.
    3.15 + *
    3.16 + * Update ref counts to shadow tables appropriately.
    3.17 + */
    3.18 +void update_cr3(struct vcpu *v)
    3.19 +{
    3.20 +    unsigned long cr3_mfn=0;
    3.21 +
    3.22 +    if ( shadow_mode_enabled(v->domain) )
    3.23 +    {
    3.24 +        shadow_update_cr3(v);
    3.25 +        return;
    3.26 +    }
    3.27 +
    3.28 +#if CONFIG_PAGING_LEVELS == 4
    3.29 +    if ( !(v->arch.flags & TF_kernel_mode) )
    3.30 +        cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
    3.31 +    else
    3.32 +#endif
    3.33 +        cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
    3.34 +
    3.35 +    make_cr3(v, cr3_mfn);
    3.36 +}
    3.37 +
    3.38 +
    3.39  void invalidate_shadow_ldt(struct vcpu *v)
    3.40  {
    3.41      int i;
    3.42 @@ -1160,53 +1192,57 @@ static void free_l4_table(struct page_in
    3.43  
    3.44  #endif
    3.45  
    3.46 -static inline int update_l1e(l1_pgentry_t *pl1e, 
    3.47 -                             l1_pgentry_t  ol1e, 
    3.48 -                             l1_pgentry_t  nl1e,
    3.49 -                             unsigned long gl1mfn,
    3.50 -                             struct vcpu *v)
    3.51 +
    3.52 +/* How to write an entry to the guest pagetables.
    3.53 + * Returns 0 for failure (pointer not valid), 1 for success. */
    3.54 +static inline int update_intpte(intpte_t *p, 
    3.55 +                                intpte_t old, 
    3.56 +                                intpte_t new,
    3.57 +                                unsigned long mfn,
    3.58 +                                struct vcpu *v)
    3.59  {
    3.60      int rv = 1;
    3.61 +#ifndef PTE_UPDATE_WITH_CMPXCHG
    3.62      if ( unlikely(shadow_mode_enabled(v->domain)) )
    3.63 -        shadow_lock(v->domain);
    3.64 -#ifndef PTE_UPDATE_WITH_CMPXCHG
    3.65 -    rv = (!__copy_to_user(pl1e, &nl1e, sizeof(nl1e)));
    3.66 +        rv = shadow_write_guest_entry(v, p, new, _mfn(mfn));
    3.67 +    else
    3.68 +        rv = (!__copy_to_user(p, &new, sizeof(new)));
    3.69  #else
    3.70      {
    3.71 -        intpte_t o = l1e_get_intpte(ol1e);
    3.72 -        intpte_t n = l1e_get_intpte(nl1e);
    3.73 -        
    3.74 +        intpte_t t = old;
    3.75          for ( ; ; )
    3.76          {
    3.77 -            if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) )
    3.78 +            if ( unlikely(shadow_mode_enabled(v->domain)) )
    3.79 +                rv = shadow_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn));
    3.80 +            else
    3.81 +                rv = (!cmpxchg_user(p, t, new));
    3.82 +
    3.83 +            if ( unlikely(rv == 0) )
    3.84              {
    3.85                  MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
    3.86 -                        ": saw %" PRIpte,
    3.87 -                        l1e_get_intpte(ol1e),
    3.88 -                        l1e_get_intpte(nl1e),
    3.89 -                        o);
    3.90 -                rv = 0;
    3.91 +                        ": saw %" PRIpte, old, new, t);
    3.92                  break;
    3.93              }
    3.94  
    3.95 -            if ( o == l1e_get_intpte(ol1e) )
    3.96 +            if ( t == old )
    3.97                  break;
    3.98  
    3.99              /* Allowed to change in Accessed/Dirty flags only. */
   3.100 -            BUG_ON((o ^ l1e_get_intpte(ol1e)) &
   3.101 -                   ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY));
   3.102 -            ol1e = l1e_from_intpte(o);
   3.103 +            BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY));
   3.104 +
   3.105 +            old = t;
   3.106          }
   3.107      }
   3.108  #endif
   3.109 -    if ( unlikely(shadow_mode_enabled(v->domain)) && rv )
   3.110 -    {
   3.111 -        shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e);
   3.112 -        shadow_unlock(v->domain);    
   3.113 -    }
   3.114      return rv;
   3.115  }
   3.116  
   3.117 +/* Macro that wraps the appropriate type-changes around update_intpte().
   3.118 + * Arguments are: type, ptr, old, new, mfn, vcpu */
   3.119 +#define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v)                             \
   3.120 +    update_intpte((intpte_t *)(_p),                                 \
   3.121 +                  _t ## e_get_intpte(_o), _t ## e_get_intpte(_n),   \
   3.122 +                  (_m), (_v))
   3.123  
   3.124  /* Update the L1 entry at pl1e to new value nl1e. */
   3.125  static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, 
   3.126 @@ -1219,7 +1255,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
   3.127          return 0;
   3.128  
   3.129      if ( unlikely(shadow_mode_refcounts(d)) )
   3.130 -        return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current);
   3.131 +        return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
   3.132  
   3.133      if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
   3.134      {
   3.135 @@ -1238,12 +1274,12 @@ static int mod_l1_entry(l1_pgentry_t *pl
   3.136  
   3.137          /* Fast path for identical mapping, r/w and presence. */
   3.138          if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
   3.139 -            return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current);
   3.140 +            return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
   3.141  
   3.142          if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
   3.143              return 0;
   3.144          
   3.145 -        if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) )
   3.146 +        if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
   3.147          {
   3.148              put_page_from_l1e(nl1e, d);
   3.149              return 0;
   3.150 @@ -1251,7 +1287,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
   3.151      }
   3.152      else
   3.153      {
   3.154 -        if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) )
   3.155 +        if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
   3.156              return 0;
   3.157      }
   3.158  
   3.159 @@ -1259,36 +1295,6 @@ static int mod_l1_entry(l1_pgentry_t *pl
   3.160      return 1;
   3.161  }
   3.162  
   3.163 -#ifndef PTE_UPDATE_WITH_CMPXCHG
   3.164 -#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; })
   3.165 -#else
   3.166 -#define _UPDATE_ENTRY(_t,_p,_o,_n) ({                            \
   3.167 -    for ( ; ; )                                                 \
   3.168 -    {                                                           \
   3.169 -        intpte_t __o = cmpxchg((intpte_t *)(_p),                \
   3.170 -                               _t ## e_get_intpte(_o),          \
   3.171 -                               _t ## e_get_intpte(_n));         \
   3.172 -        if ( __o == _t ## e_get_intpte(_o) )                    \
   3.173 -            break;                                              \
   3.174 -        /* Allowed to change in Accessed/Dirty flags only. */   \
   3.175 -        BUG_ON((__o ^ _t ## e_get_intpte(_o)) &                 \
   3.176 -               ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY));             \
   3.177 -        _o = _t ## e_from_intpte(__o);                          \
   3.178 -    }                                                           \
   3.179 -    1; })
   3.180 -#endif
   3.181 -#define UPDATE_ENTRY(_t,_p,_o,_n,_m)  ({                            \
   3.182 -    int rv;                                                         \
   3.183 -    if ( unlikely(shadow_mode_enabled(current->domain)) )          \
   3.184 -        shadow_lock(current->domain);                              \
   3.185 -    rv = _UPDATE_ENTRY(_t, _p, _o, _n);                             \
   3.186 -    if ( unlikely(shadow_mode_enabled(current->domain)) )          \
   3.187 -    {                                                               \
   3.188 -        shadow_validate_guest_entry(current, _mfn(_m), (_p));      \
   3.189 -        shadow_unlock(current->domain);                            \
   3.190 -    }                                                               \
   3.191 -    rv;                                                             \
   3.192 -})
   3.193  
   3.194  /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
   3.195  static int mod_l2_entry(l2_pgentry_t *pl2e, 
   3.196 @@ -1320,18 +1326,18 @@ static int mod_l2_entry(l2_pgentry_t *pl
   3.197  
   3.198          /* Fast path for identical mapping and presence. */
   3.199          if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
   3.200 -            return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn);
   3.201 +            return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current);
   3.202  
   3.203          if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain)) )
   3.204              return 0;
   3.205  
   3.206 -        if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) )
   3.207 +        if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
   3.208          {
   3.209              put_page_from_l2e(nl2e, pfn);
   3.210              return 0;
   3.211          }
   3.212      }
   3.213 -    else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) )
   3.214 +    else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
   3.215      {
   3.216          return 0;
   3.217      }
   3.218 @@ -1381,18 +1387,18 @@ static int mod_l3_entry(l3_pgentry_t *pl
   3.219  
   3.220          /* Fast path for identical mapping and presence. */
   3.221          if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
   3.222 -            return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn);
   3.223 +            return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current);
   3.224  
   3.225          if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) )
   3.226              return 0;
   3.227  
   3.228 -        if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) )
   3.229 +        if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
   3.230          {
   3.231              put_page_from_l3e(nl3e, pfn);
   3.232              return 0;
   3.233          }
   3.234      }
   3.235 -    else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) )
   3.236 +    else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
   3.237      {
   3.238          return 0;
   3.239      }
   3.240 @@ -1439,18 +1445,18 @@ static int mod_l4_entry(l4_pgentry_t *pl
   3.241  
   3.242          /* Fast path for identical mapping and presence. */
   3.243          if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
   3.244 -            return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn);
   3.245 +            return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current);
   3.246  
   3.247          if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
   3.248              return 0;
   3.249  
   3.250 -        if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) )
   3.251 +        if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
   3.252          {
   3.253              put_page_from_l4e(nl4e, pfn);
   3.254              return 0;
   3.255          }
   3.256      }
   3.257 -    else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) )
   3.258 +    else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
   3.259      {
   3.260          return 0;
   3.261      }
   3.262 @@ -2292,15 +2298,11 @@ int do_mmu_update(
   3.263                      break;
   3.264  
   3.265                  if ( unlikely(shadow_mode_enabled(d)) )
   3.266 -                    shadow_lock(d);
   3.267 -
   3.268 -                *(intpte_t *)va = req.val;
   3.269 -                okay = 1;
   3.270 -
   3.271 -                if ( unlikely(shadow_mode_enabled(d)) )
   3.272 +                    okay = shadow_write_guest_entry(v, va, req.val, _mfn(mfn));
   3.273 +                else
   3.274                  {
   3.275 -                    shadow_validate_guest_entry(v, _mfn(mfn), va);
   3.276 -                    shadow_unlock(d);
   3.277 +                    *(intpte_t *)va = req.val;
   3.278 +                    okay = 1;
   3.279                  }
   3.280  
   3.281                  put_page_type(page);
   3.282 @@ -2409,7 +2411,7 @@ static int create_grant_pte_mapping(
   3.283      }
   3.284  
   3.285      ol1e = *(l1_pgentry_t *)va;
   3.286 -    if ( !update_l1e(va, ol1e, nl1e, mfn, v) )
   3.287 +    if ( !UPDATE_ENTRY(l1, va, ol1e, nl1e, mfn, v) )
   3.288      {
   3.289          put_page_type(page);
   3.290          rc = GNTST_general_error;
   3.291 @@ -2477,7 +2479,7 @@ static int destroy_grant_pte_mapping(
   3.292      }
   3.293  
   3.294      /* Delete pagetable entry. */
   3.295 -    if ( unlikely(!update_l1e(
   3.296 +    if ( unlikely(!UPDATE_ENTRY(l1, 
   3.297                        (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn, 
   3.298                        d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) )
   3.299      {
   3.300 @@ -2515,7 +2517,7 @@ static int create_grant_va_mapping(
   3.301          return GNTST_general_error;
   3.302      }
   3.303      ol1e = *pl1e;
   3.304 -    okay = update_l1e(pl1e, ol1e, nl1e, gl1mfn, v);
   3.305 +    okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v);
   3.306      guest_unmap_l1e(v, pl1e);
   3.307      pl1e = NULL;
   3.308  
   3.309 @@ -2553,7 +2555,7 @@ static int destroy_grant_va_mapping(
   3.310      }
   3.311  
   3.312      /* Delete pagetable entry. */
   3.313 -    if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
   3.314 +    if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
   3.315      {
   3.316          MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
   3.317          rc = GNTST_general_error;
   3.318 @@ -2952,16 +2954,6 @@ long arch_memory_op(int op, XEN_GUEST_HA
   3.319  
   3.320          UNLOCK_BIGLOCK(d);
   3.321  
   3.322 -        /* If we're doing FAST_FAULT_PATH, then shadow mode may have
   3.323 -           cached the fact that this is an mmio region in the shadow
   3.324 -           page tables.  Blow the tables away to remove the cache.
   3.325 -           This is pretty heavy handed, but this is a rare operation
   3.326 -           (it might happen a dozen times during boot and then never
   3.327 -           again), so it doesn't matter too much. */
   3.328 -        shadow_lock(d);
   3.329 -        shadow_blow_tables(d);
   3.330 -        shadow_unlock(d);
   3.331 -
   3.332          put_domain(d);
   3.333  
   3.334          break;
   3.335 @@ -3188,27 +3180,30 @@ static int ptwr_emulated_update(
   3.336      pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
   3.337      if ( do_cmpxchg )
   3.338      {
   3.339 -        if ( shadow_mode_enabled(d) )
   3.340 -            shadow_lock(d);
   3.341 +        int okay;
   3.342          ol1e = l1e_from_intpte(old);
   3.343 -        if ( cmpxchg((intpte_t *)pl1e, old, val) != old )
   3.344 +
   3.345 +        if ( shadow_mode_enabled(d) )
   3.346          {
   3.347 -            if ( shadow_mode_enabled(d) )
   3.348 -                shadow_unlock(d);
   3.349 +            intpte_t t = old;
   3.350 +            okay = shadow_cmpxchg_guest_entry(v, (intpte_t *) pl1e, 
   3.351 +                                              &t, val, _mfn(mfn));
   3.352 +            okay = (okay && t == old);
   3.353 +        }
   3.354 +        else 
   3.355 +            okay = (cmpxchg((intpte_t *)pl1e, old, val) == old);
   3.356 +
   3.357 +        if ( !okay )
   3.358 +        {
   3.359              unmap_domain_page(pl1e);
   3.360              put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d);
   3.361              return X86EMUL_CMPXCHG_FAILED;
   3.362          }
   3.363 -        if ( unlikely(shadow_mode_enabled(d)) )
   3.364 -        {
   3.365 -            shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e);
   3.366 -            shadow_unlock(d);    
   3.367 -        }
   3.368      }
   3.369      else
   3.370      {
   3.371          ol1e = *pl1e;
   3.372 -        if ( !update_l1e(pl1e, ol1e, nl1e, page_to_mfn(page), v) )
   3.373 +        if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, page_to_mfn(page), v) )
   3.374              BUG();
   3.375      }
   3.376  
     4.1 --- a/xen/arch/x86/mm/shadow/common.c	Wed Dec 20 11:59:54 2006 +0000
     4.2 +++ b/xen/arch/x86/mm/shadow/common.c	Wed Dec 20 12:03:07 2006 +0000
     4.3 @@ -38,6 +38,21 @@
     4.4  #include <asm/shadow.h>
     4.5  #include "private.h"
     4.6  
     4.7 +
     4.8 +/* Set up the shadow-specific parts of a domain struct at start of day.
     4.9 + * Called for every domain from arch_domain_create() */
    4.10 +void shadow_domain_init(struct domain *d)
    4.11 +{
    4.12 +    int i;
    4.13 +    shadow_lock_init(d);
    4.14 +    for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
    4.15 +        INIT_LIST_HEAD(&d->arch.shadow.freelists[i]);
    4.16 +    INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist);
    4.17 +    INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse);
    4.18 +    INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows);
    4.19 +}
    4.20 +
    4.21 +
    4.22  #if SHADOW_AUDIT
    4.23  int shadow_audit_enable = 0;
    4.24  
    4.25 @@ -434,7 +449,7 @@ void shadow_promote(struct vcpu *v, mfn_
    4.26      ASSERT(mfn_valid(gmfn));
    4.27  
    4.28      /* We should never try to promote a gmfn that has writeable mappings */
    4.29 -    ASSERT(shadow_remove_write_access(v, gmfn, 0, 0) == 0);
    4.30 +    ASSERT(sh_remove_write_access(v, gmfn, 0, 0) == 0);
    4.31  
    4.32      /* Is the page already shadowed? */
    4.33      if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
    4.34 @@ -466,8 +481,7 @@ void shadow_demote(struct vcpu *v, mfn_t
    4.35   * Returns a bitmask of SHADOW_SET_* flags. */
    4.36  
    4.37  int
    4.38 -__shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 
    4.39 -                               void *entry, u32 size)
    4.40 +sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size)
    4.41  {
    4.42      int result = 0;
    4.43      struct page_info *page = mfn_to_page(gmfn);
    4.44 @@ -546,22 +560,9 @@ int
    4.45  }
    4.46  
    4.47  
    4.48 -int
    4.49 -shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry)
    4.50 -/* This is the entry point from hypercalls. It returns a bitmask of all the 
    4.51 - * results of shadow_set_l*e() calls, so the caller knows to do TLB flushes. */
    4.52 -{
    4.53 -    int rc;
    4.54 -
    4.55 -    ASSERT(shadow_locked_by_me(v->domain));
    4.56 -    rc = __shadow_validate_guest_entry(v, gmfn, entry, sizeof(l1_pgentry_t));
    4.57 -    shadow_audit_tables(v);
    4.58 -    return rc;
    4.59 -}
    4.60 -
    4.61  void
    4.62 -shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
    4.63 -                                void *entry, u32 size)
    4.64 +sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
    4.65 +                           void *entry, u32 size)
    4.66  /* This is the entry point for emulated writes to pagetables in HVM guests and
    4.67   * PV translated guests.
    4.68   */
    4.69 @@ -570,7 +571,7 @@ shadow_validate_guest_pt_write(struct vc
    4.70      int rc;
    4.71  
    4.72      ASSERT(shadow_locked_by_me(v->domain));
    4.73 -    rc = __shadow_validate_guest_entry(v, gmfn, entry, size);
    4.74 +    rc = sh_validate_guest_entry(v, gmfn, entry, size);
    4.75      if ( rc & SHADOW_SET_FLUSH )
    4.76          /* Need to flush TLBs to pick up shadow PT changes */
    4.77          flush_tlb_mask(d->domain_dirty_cpumask);
    4.78 @@ -585,6 +586,38 @@ shadow_validate_guest_pt_write(struct vc
    4.79      }
    4.80  }
    4.81  
    4.82 +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p,
    4.83 +                             intpte_t new, mfn_t gmfn)
    4.84 +/* Write a new value into the guest pagetable, and update the shadows 
    4.85 + * appropriately.  Returns 0 if we page-faulted, 1 for success. */
    4.86 +{
    4.87 +    int failed;
    4.88 +    shadow_lock(v->domain);
    4.89 +    failed = __copy_to_user(p, &new, sizeof(new));
    4.90 +    if ( failed != sizeof(new) )
    4.91 +        sh_validate_guest_entry(v, gmfn, p, sizeof(new));
    4.92 +    shadow_unlock(v->domain);
    4.93 +    return (failed == 0);
    4.94 +}
    4.95 +
    4.96 +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
    4.97 +                               intpte_t *old, intpte_t new, mfn_t gmfn)
    4.98 +/* Cmpxchg a new value into the guest pagetable, and update the shadows 
    4.99 + * appropriately. Returns 0 if we page-faulted, 1 if not.
   4.100 + * N.B. caller should check the value of "old" to see if the
   4.101 + * cmpxchg itself was successful. */
   4.102 +{
   4.103 +    int failed;
   4.104 +    intpte_t t = *old;
   4.105 +    shadow_lock(v->domain);
   4.106 +    failed = cmpxchg_user(p, t, new);
   4.107 +    if ( t == *old )
   4.108 +        sh_validate_guest_entry(v, gmfn, p, sizeof(new));
   4.109 +    *old = t;
   4.110 +    shadow_unlock(v->domain);
   4.111 +    return (failed == 0);
   4.112 +}
   4.113 +
   4.114  
   4.115  /**************************************************************************/
   4.116  /* Memory management for shadow pages. */ 
   4.117 @@ -791,7 +824,7 @@ void shadow_prealloc(struct domain *d, u
   4.118  
   4.119  /* Deliberately free all the memory we can: this will tear down all of
   4.120   * this domain's shadows */
   4.121 -void shadow_blow_tables(struct domain *d) 
   4.122 +static void shadow_blow_tables(struct domain *d) 
   4.123  {
   4.124      struct list_head *l, *t;
   4.125      struct shadow_page_info *sp;
   4.126 @@ -989,7 +1022,7 @@ void shadow_free(struct domain *d, mfn_t
   4.127   * Also, we only ever allocate a max-order chunk, so as to preserve
   4.128   * the invariant that shadow_prealloc() always works.
   4.129   * Returns 0 iff it can't get a chunk (the caller should then
   4.130 - * free up some pages in domheap and call set_sh_allocation);
   4.131 + * free up some pages in domheap and call sh_set_allocation);
   4.132   * returns non-zero on success.
   4.133   */
   4.134  static int
   4.135 @@ -1149,14 +1182,14 @@ p2m_next_level(struct domain *d, mfn_t *
   4.136                  if ( pagetable_get_pfn(v->arch.guest_table) 
   4.137                       == pagetable_get_pfn(d->arch.phys_table) 
   4.138                       && v->arch.shadow.mode != NULL )
   4.139 -                    v->arch.shadow.mode->update_cr3(v);
   4.140 +                    v->arch.shadow.mode->update_cr3(v, 0);
   4.141              }
   4.142          }
   4.143  #endif
   4.144          /* The P2M can be shadowed: keep the shadows synced */
   4.145          if ( d->vcpu[0] != NULL )
   4.146 -            (void)__shadow_validate_guest_entry(d->vcpu[0], *table_mfn,
   4.147 -                                                p2m_entry, sizeof *p2m_entry);
   4.148 +            (void)sh_validate_guest_entry(d->vcpu[0], *table_mfn,
   4.149 +                                          p2m_entry, sizeof *p2m_entry);
   4.150      }
   4.151      *table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
   4.152      next = sh_map_domain_page(*table_mfn);
   4.153 @@ -1216,8 +1249,8 @@ shadow_set_p2m_entry(struct domain *d, u
   4.154  
   4.155      /* The P2M can be shadowed: keep the shadows synced */
   4.156      if ( d->vcpu[0] != NULL )
   4.157 -        (void)__shadow_validate_guest_entry(
   4.158 -            d->vcpu[0], table_mfn, p2m_entry, sizeof(*p2m_entry));
   4.159 +        (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, 
   4.160 +                                      p2m_entry, sizeof(*p2m_entry));
   4.161  
   4.162      /* Success */
   4.163      rv = 1;
   4.164 @@ -1427,9 +1460,9 @@ static void shadow_p2m_teardown(struct d
   4.165   * Input will be rounded up to at least shadow_min_acceptable_pages(),
   4.166   * plus space for the p2m table.
   4.167   * Returns 0 for success, non-zero for failure. */
   4.168 -static unsigned int set_sh_allocation(struct domain *d, 
   4.169 -                                       unsigned int pages,
   4.170 -                                       int *preempted)
   4.171 +static unsigned int sh_set_allocation(struct domain *d, 
   4.172 +                                      unsigned int pages,
   4.173 +                                      int *preempted)
   4.174  {
   4.175      struct shadow_page_info *sp;
   4.176      unsigned int lower_bound;
   4.177 @@ -1499,20 +1532,12 @@ static unsigned int set_sh_allocation(st
   4.178      return 0;
   4.179  }
   4.180  
   4.181 -unsigned int shadow_set_allocation(struct domain *d, 
   4.182 -                                    unsigned int megabytes,
   4.183 -                                    int *preempted)
   4.184 -/* Hypercall interface to set the shadow memory allocation */
   4.185 +/* Return the size of the shadow pool, rounded up to the nearest MB */
   4.186 +static unsigned int shadow_get_allocation(struct domain *d)
   4.187  {
   4.188 -    unsigned int rv;
   4.189 -    shadow_lock(d);
   4.190 -    rv = set_sh_allocation(d, megabytes << (20 - PAGE_SHIFT), preempted); 
   4.191 -    SHADOW_PRINTK("dom %u allocation now %u pages (%u MB)\n",
   4.192 -                   d->domain_id,
   4.193 -                   d->arch.shadow.total_pages,
   4.194 -                   shadow_get_allocation(d));
   4.195 -    shadow_unlock(d);
   4.196 -    return rv;
   4.197 +    unsigned int pg = d->arch.shadow.total_pages;
   4.198 +    return ((pg >> (20 - PAGE_SHIFT))
   4.199 +            + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
   4.200  }
   4.201  
   4.202  /**************************************************************************/
   4.203 @@ -1889,24 +1914,24 @@ void sh_destroy_shadow(struct vcpu *v, m
   4.204   * level and fault_addr desribe how we found this to be a pagetable;
   4.205   * level==0 means we have some other reason for revoking write access.*/
   4.206  
   4.207 -int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, 
   4.208 -                                unsigned int level,
   4.209 -                                unsigned long fault_addr)
   4.210 +int sh_remove_write_access(struct vcpu *v, mfn_t gmfn, 
   4.211 +                           unsigned int level,
   4.212 +                           unsigned long fault_addr)
   4.213  {
   4.214      /* Dispatch table for getting per-type functions */
   4.215      static hash_callback_t callbacks[16] = {
   4.216          NULL, /* none    */
   4.217  #if CONFIG_PAGING_LEVELS == 2
   4.218 -        SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* l1_32   */
   4.219 -        SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* fl1_32  */
   4.220 +        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* l1_32   */
   4.221 +        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* fl1_32  */
   4.222  #else 
   4.223 -        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* l1_32   */
   4.224 -        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* fl1_32  */
   4.225 +        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* l1_32   */
   4.226 +        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* fl1_32  */
   4.227  #endif
   4.228          NULL, /* l2_32   */
   4.229  #if CONFIG_PAGING_LEVELS >= 3
   4.230 -        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* l1_pae  */
   4.231 -        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* fl1_pae */
   4.232 +        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* l1_pae  */
   4.233 +        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* fl1_pae */
   4.234  #else 
   4.235          NULL, /* l1_pae  */
   4.236          NULL, /* fl1_pae */
   4.237 @@ -1914,8 +1939,8 @@ int shadow_remove_write_access(struct vc
   4.238          NULL, /* l2_pae  */
   4.239          NULL, /* l2h_pae */
   4.240  #if CONFIG_PAGING_LEVELS >= 4
   4.241 -        SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64   */
   4.242 -        SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64  */
   4.243 +        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* l1_64   */
   4.244 +        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* fl1_64  */
   4.245  #else
   4.246          NULL, /* l1_64   */
   4.247          NULL, /* fl1_64  */
   4.248 @@ -2077,25 +2102,25 @@ int shadow_remove_write_access(struct vc
   4.249  /* Remove all mappings of a guest frame from the shadow tables.
   4.250   * Returns non-zero if we need to flush TLBs. */
   4.251  
   4.252 -int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
   4.253 +int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
   4.254  {
   4.255      struct page_info *page = mfn_to_page(gmfn);
   4.256 -    int expected_count;
   4.257 +    int expected_count, do_locking;
   4.258  
   4.259      /* Dispatch table for getting per-type functions */
   4.260      static hash_callback_t callbacks[16] = {
   4.261          NULL, /* none    */
   4.262  #if CONFIG_PAGING_LEVELS == 2
   4.263 -        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* l1_32   */
   4.264 -        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* fl1_32  */
   4.265 +        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* l1_32   */
   4.266 +        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* fl1_32  */
   4.267  #else 
   4.268 -        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* l1_32   */
   4.269 -        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* fl1_32  */
   4.270 +        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* l1_32   */
   4.271 +        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* fl1_32  */
   4.272  #endif
   4.273          NULL, /* l2_32   */
   4.274  #if CONFIG_PAGING_LEVELS >= 3
   4.275 -        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* l1_pae  */
   4.276 -        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* fl1_pae */
   4.277 +        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* l1_pae  */
   4.278 +        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* fl1_pae */
   4.279  #else 
   4.280          NULL, /* l1_pae  */
   4.281          NULL, /* fl1_pae */
   4.282 @@ -2103,8 +2128,8 @@ int shadow_remove_all_mappings(struct vc
   4.283          NULL, /* l2_pae  */
   4.284          NULL, /* l2h_pae */
   4.285  #if CONFIG_PAGING_LEVELS >= 4
   4.286 -        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64   */
   4.287 -        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64  */
   4.288 +        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* l1_64   */
   4.289 +        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* fl1_64  */
   4.290  #else
   4.291          NULL, /* l1_64   */
   4.292          NULL, /* fl1_64  */
   4.293 @@ -2129,7 +2154,12 @@ int shadow_remove_all_mappings(struct vc
   4.294      if ( (page->count_info & PGC_count_mask) == 0 )
   4.295          return 0;
   4.296  
   4.297 -    ASSERT(shadow_locked_by_me(v->domain));
   4.298 +    /* Although this is an externally visible function, we do not know
   4.299 +     * whether the shadow lock will be held when it is called (since it
   4.300 +     * can be called via put_page_type when we clear a shadow l1e).
   4.301 +     * If the lock isn't held, take it for the duration of the call. */
   4.302 +    do_locking = !shadow_locked_by_me(v->domain);
   4.303 +    if ( do_locking ) shadow_lock(v->domain);
   4.304  
   4.305      /* XXX TODO: 
   4.306       * Heuristics for finding the (probably) single mapping of this gmfn */
   4.307 @@ -2154,6 +2184,8 @@ int shadow_remove_all_mappings(struct vc
   4.308          }
   4.309      }
   4.310  
   4.311 +    if ( do_locking ) shadow_unlock(v->domain);
   4.312 +
   4.313      /* We killed at least one mapping, so must flush TLBs. */
   4.314      return 1;
   4.315  }
   4.316 @@ -2236,9 +2268,10 @@ void sh_remove_shadows(struct vcpu *v, m
   4.317   * (all != 0 implies fast == 0)
   4.318   */
   4.319  {
   4.320 -    struct page_info *pg;
   4.321 +    struct page_info *pg = mfn_to_page(gmfn);
   4.322      mfn_t smfn;
   4.323      u32 sh_flags;
   4.324 +    int do_locking;
   4.325      unsigned char t;
   4.326      
   4.327      /* Dispatch table for getting per-type functions: each level must
   4.328 @@ -2296,15 +2329,19 @@ void sh_remove_shadows(struct vcpu *v, m
   4.329          0  /* unused  */
   4.330      };
   4.331  
   4.332 -    ASSERT(shadow_locked_by_me(v->domain));
   4.333      ASSERT(!(all && fast));
   4.334  
   4.335 -    pg = mfn_to_page(gmfn);
   4.336 -
   4.337      /* Bail out now if the page is not shadowed */
   4.338      if ( (pg->count_info & PGC_page_table) == 0 )
   4.339          return;
   4.340  
   4.341 +    /* Although this is an externally visible function, we do not know
   4.342 +     * whether the shadow lock will be held when it is called (since it
   4.343 +     * can be called via put_page_type when we clear a shadow l1e).
   4.344 +     * If the lock isn't held, take it for the duration of the call. */
   4.345 +    do_locking = !shadow_locked_by_me(v->domain);
   4.346 +    if ( do_locking ) shadow_lock(v->domain);
   4.347 +
   4.348      SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
   4.349                     v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
   4.350  
   4.351 @@ -2356,14 +2393,16 @@ void sh_remove_shadows(struct vcpu *v, m
   4.352      /* Need to flush TLBs now, so that linear maps are safe next time we 
   4.353       * take a fault. */
   4.354      flush_tlb_mask(v->domain->domain_dirty_cpumask);
   4.355 +
   4.356 +    if ( do_locking ) shadow_unlock(v->domain);
   4.357  }
   4.358  
   4.359 -void
   4.360 -shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
   4.361 +static void
   4.362 +sh_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
   4.363  /* Even harsher: this is a HVM page that we thing is no longer a pagetable.
   4.364   * Unshadow it, and recursively unshadow pages that reference it. */
   4.365  {
   4.366 -    shadow_remove_all_shadows(v, gmfn);
   4.367 +    sh_remove_shadows(v, gmfn, 0, 1);
   4.368      /* XXX TODO:
   4.369       * Rework this hashtable walker to return a linked-list of all 
   4.370       * the shadows it modified, then do breadth-first recursion 
   4.371 @@ -2376,7 +2415,7 @@ shadow_remove_all_shadows_and_parents(st
   4.372  
   4.373  /**************************************************************************/
   4.374  
   4.375 -void sh_update_paging_modes(struct vcpu *v)
   4.376 +static void sh_update_paging_modes(struct vcpu *v)
   4.377  {
   4.378      struct domain *d = v->domain;
   4.379      struct shadow_paging_mode *old_mode = v->arch.shadow.mode;
   4.380 @@ -2394,7 +2433,8 @@ void sh_update_paging_modes(struct vcpu 
   4.381  
   4.382      // First, tear down any old shadow tables held by this vcpu.
   4.383      //
   4.384 -    shadow_detach_old_tables(v);
   4.385 +    if ( v->arch.shadow.mode )
   4.386 +        v->arch.shadow.mode->detach_old_tables(v);
   4.387  
   4.388      if ( !is_hvm_domain(d) )
   4.389      {
   4.390 @@ -2402,10 +2442,9 @@ void sh_update_paging_modes(struct vcpu 
   4.391          /// PV guest
   4.392          ///
   4.393  #if CONFIG_PAGING_LEVELS == 4
   4.394 -        if ( pv_32bit_guest(v) )
   4.395 -            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,3);
   4.396 -        else
   4.397 -            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
   4.398 +        /* When 32-on-64 PV guests are supported, they must choose 
   4.399 +         * a different mode here */
   4.400 +        v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
   4.401  #elif CONFIG_PAGING_LEVELS == 3
   4.402          v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
   4.403  #elif CONFIG_PAGING_LEVELS == 2
   4.404 @@ -2493,7 +2532,7 @@ void sh_update_paging_modes(struct vcpu 
   4.405  
   4.406          if ( pagetable_is_null(v->arch.monitor_table) )
   4.407          {
   4.408 -            mfn_t mmfn = shadow_make_monitor_table(v);
   4.409 +            mfn_t mmfn = v->arch.shadow.mode->make_monitor_table(v);
   4.410              v->arch.monitor_table = pagetable_from_mfn(mmfn);
   4.411              make_cr3(v, mfn_x(mmfn));
   4.412              hvm_update_host_cr3(v);
   4.413 @@ -2528,7 +2567,7 @@ void sh_update_paging_modes(struct vcpu 
   4.414  
   4.415                  old_mfn = pagetable_get_mfn(v->arch.monitor_table);
   4.416                  v->arch.monitor_table = pagetable_null();
   4.417 -                new_mfn = v->arch.shadow.mode->make_monitor_table(v);            
   4.418 +                new_mfn = v->arch.shadow.mode->make_monitor_table(v);
   4.419                  v->arch.monitor_table = pagetable_from_mfn(new_mfn);
   4.420                  SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n",
   4.421                                 mfn_x(new_mfn));
   4.422 @@ -2549,7 +2588,14 @@ void sh_update_paging_modes(struct vcpu 
   4.423          //        This *does* happen, at least for CR4.PGE...
   4.424      }
   4.425  
   4.426 -    v->arch.shadow.mode->update_cr3(v);
   4.427 +    v->arch.shadow.mode->update_cr3(v, 0);
   4.428 +}
   4.429 +
   4.430 +void shadow_update_paging_modes(struct vcpu *v)
   4.431 +{
   4.432 +    shadow_lock(v->domain);
   4.433 +    sh_update_paging_modes(v);
   4.434 +    shadow_unlock(v->domain);
   4.435  }
   4.436  
   4.437  /**************************************************************************/
   4.438 @@ -2610,9 +2656,9 @@ int shadow_enable(struct domain *d, u32 
   4.439      /* Init the shadow memory allocation if the user hasn't done so */
   4.440      old_pages = d->arch.shadow.total_pages;
   4.441      if ( old_pages == 0 )
   4.442 -        if ( set_sh_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */
   4.443 +        if ( sh_set_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */
   4.444          {
   4.445 -            set_sh_allocation(d, 0, NULL);
   4.446 +            sh_set_allocation(d, 0, NULL);
   4.447              rv = -ENOMEM;
   4.448              goto out;
   4.449          }
   4.450 @@ -2620,7 +2666,7 @@ int shadow_enable(struct domain *d, u32 
   4.451      /* Init the hash table */
   4.452      if ( shadow_hash_alloc(d) != 0 )
   4.453      {
   4.454 -        set_sh_allocation(d, old_pages, NULL);            
   4.455 +        sh_set_allocation(d, old_pages, NULL);            
   4.456          rv = -ENOMEM;
   4.457          goto out;
   4.458      }
   4.459 @@ -2630,7 +2676,7 @@ int shadow_enable(struct domain *d, u32 
   4.460          if ( !shadow_alloc_p2m_table(d) )
   4.461          {
   4.462              shadow_hash_teardown(d);
   4.463 -            set_sh_allocation(d, old_pages, NULL);
   4.464 +            sh_set_allocation(d, old_pages, NULL);
   4.465              shadow_p2m_teardown(d);
   4.466              rv = -ENOMEM;
   4.467              goto out;
   4.468 @@ -2669,13 +2715,16 @@ void shadow_teardown(struct domain *d)
   4.469          /* Release the shadow and monitor tables held by each vcpu */
   4.470          for_each_vcpu(d, v)
   4.471          {
   4.472 -            shadow_detach_old_tables(v);
   4.473 -            if ( shadow_mode_external(d) )
   4.474 +            if ( v->arch.shadow.mode )
   4.475              {
   4.476 -                mfn = pagetable_get_mfn(v->arch.monitor_table);
   4.477 -                if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
   4.478 -                    shadow_destroy_monitor_table(v, mfn);
   4.479 -                v->arch.monitor_table = pagetable_null();
   4.480 +                v->arch.shadow.mode->detach_old_tables(v);
   4.481 +                if ( shadow_mode_external(d) )
   4.482 +                {
   4.483 +                    mfn = pagetable_get_mfn(v->arch.monitor_table);
   4.484 +                    if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
   4.485 +                        v->arch.shadow.mode->destroy_monitor_table(v, mfn);
   4.486 +                    v->arch.monitor_table = pagetable_null();
   4.487 +                }
   4.488              }
   4.489          }
   4.490      }
   4.491 @@ -2689,7 +2738,7 @@ void shadow_teardown(struct domain *d)
   4.492                         d->arch.shadow.free_pages, 
   4.493                         d->arch.shadow.p2m_pages);
   4.494          /* Destroy all the shadows and release memory to domheap */
   4.495 -        set_sh_allocation(d, 0, NULL);
   4.496 +        sh_set_allocation(d, 0, NULL);
   4.497          /* Release the hash table back to xenheap */
   4.498          if (d->arch.shadow.hash_table) 
   4.499              shadow_hash_teardown(d);
   4.500 @@ -2755,10 +2804,10 @@ static int shadow_one_bit_enable(struct 
   4.501      if ( d->arch.shadow.mode == 0 )
   4.502      {
   4.503          /* Init the shadow memory allocation and the hash table */
   4.504 -        if ( set_sh_allocation(d, 1, NULL) != 0 
   4.505 +        if ( sh_set_allocation(d, 1, NULL) != 0 
   4.506               || shadow_hash_alloc(d) != 0 )
   4.507          {
   4.508 -            set_sh_allocation(d, 0, NULL);
   4.509 +            sh_set_allocation(d, 0, NULL);
   4.510              return -ENOMEM;
   4.511          }
   4.512      }
   4.513 @@ -2794,7 +2843,8 @@ static int shadow_one_bit_disable(struct
   4.514                         d->arch.shadow.p2m_pages);
   4.515          for_each_vcpu(d, v)
   4.516          {
   4.517 -            shadow_detach_old_tables(v);
   4.518 +            if ( v->arch.shadow.mode )
   4.519 +                v->arch.shadow.mode->detach_old_tables(v);
   4.520  #if CONFIG_PAGING_LEVELS == 4
   4.521              if ( !(v->arch.flags & TF_kernel_mode) )
   4.522                  make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user));
   4.523 @@ -2805,7 +2855,7 @@ static int shadow_one_bit_disable(struct
   4.524          }
   4.525  
   4.526          /* Pull down the memory allocation */
   4.527 -        if ( set_sh_allocation(d, 0, NULL) != 0 )
   4.528 +        if ( sh_set_allocation(d, 0, NULL) != 0 )
   4.529          {
   4.530              // XXX - How can this occur?
   4.531              //       Seems like a bug to return an error now that we've
   4.532 @@ -2826,7 +2876,7 @@ static int shadow_one_bit_disable(struct
   4.533  }
   4.534  
   4.535  /* Enable/disable ops for the "test" and "log-dirty" modes */
   4.536 -int shadow_test_enable(struct domain *d)
   4.537 +static int shadow_test_enable(struct domain *d)
   4.538  {
   4.539      int ret;
   4.540  
   4.541 @@ -2849,7 +2899,7 @@ int shadow_test_enable(struct domain *d)
   4.542      return ret;
   4.543  }
   4.544  
   4.545 -int shadow_test_disable(struct domain *d)
   4.546 +static int shadow_test_disable(struct domain *d)
   4.547  {
   4.548      int ret;
   4.549  
   4.550 @@ -2968,8 +3018,8 @@ sh_p2m_remove_page(struct domain *d, uns
   4.551  
   4.552      if ( v != NULL )
   4.553      {
   4.554 -        shadow_remove_all_shadows_and_parents(v, _mfn(mfn));
   4.555 -        if ( shadow_remove_all_mappings(v, _mfn(mfn)) )
   4.556 +        sh_remove_all_shadows_and_parents(v, _mfn(mfn));
   4.557 +        if ( sh_remove_all_mappings(v, _mfn(mfn)) )
   4.558              flush_tlb_mask(d->domain_dirty_cpumask);
   4.559      }
   4.560  
   4.561 @@ -3012,8 +3062,8 @@ shadow_guest_physmap_add_page(struct dom
   4.562              v = d->vcpu[0];
   4.563          if ( v != NULL )
   4.564          {
   4.565 -            shadow_remove_all_shadows_and_parents(v, omfn);
   4.566 -            if ( shadow_remove_all_mappings(v, omfn) )
   4.567 +            sh_remove_all_shadows_and_parents(v, omfn);
   4.568 +            if ( sh_remove_all_mappings(v, omfn) )
   4.569                  flush_tlb_mask(d->domain_dirty_cpumask);
   4.570          }
   4.571          set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
   4.572 @@ -3043,6 +3093,17 @@ shadow_guest_physmap_add_page(struct dom
   4.573  
   4.574      shadow_set_p2m_entry(d, gfn, _mfn(mfn));
   4.575      set_gpfn_from_mfn(mfn, gfn);
   4.576 +
   4.577 +#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH)
   4.578 +    /* If we're doing FAST_FAULT_PATH, then shadow mode may have
   4.579 +       cached the fact that this is an mmio region in the shadow
   4.580 +       page tables.  Blow the tables away to remove the cache.
   4.581 +       This is pretty heavy handed, but this is a rare operation
   4.582 +       (it might happen a dozen times during boot and then never
   4.583 +       again), so it doesn't matter too much. */
   4.584 +    shadow_blow_tables(d);
   4.585 +#endif
   4.586 +
   4.587      shadow_audit_p2m(d);
   4.588      shadow_unlock(d);
   4.589  }
   4.590 @@ -3130,14 +3191,13 @@ static int shadow_log_dirty_op(
   4.591  
   4.592  
   4.593  /* Mark a page as dirty */
   4.594 -void sh_do_mark_dirty(struct domain *d, mfn_t gmfn)
   4.595 +void sh_mark_dirty(struct domain *d, mfn_t gmfn)
   4.596  {
   4.597      unsigned long pfn;
   4.598  
   4.599      ASSERT(shadow_locked_by_me(d));
   4.600 -    ASSERT(shadow_mode_log_dirty(d));
   4.601 -
   4.602 -    if ( !mfn_valid(gmfn) )
   4.603 +
   4.604 +    if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) )
   4.605          return;
   4.606  
   4.607      ASSERT(d->arch.shadow.dirty_bitmap != NULL);
   4.608 @@ -3181,13 +3241,19 @@ void sh_do_mark_dirty(struct domain *d, 
   4.609      }
   4.610  }
   4.611  
   4.612 +void shadow_mark_dirty(struct domain *d, mfn_t gmfn)
   4.613 +{
   4.614 +    shadow_lock(d);
   4.615 +    sh_mark_dirty(d, gmfn);
   4.616 +    shadow_unlock(d);
   4.617 +}
   4.618  
   4.619  /**************************************************************************/
   4.620  /* Shadow-control XEN_DOMCTL dispatcher */
   4.621  
   4.622  int shadow_domctl(struct domain *d, 
   4.623 -                   xen_domctl_shadow_op_t *sc,
   4.624 -                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
   4.625 +                  xen_domctl_shadow_op_t *sc,
   4.626 +                  XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
   4.627  {
   4.628      int rc, preempted = 0;
   4.629  
   4.630 @@ -3233,7 +3299,9 @@ int shadow_domctl(struct domain *d,
   4.631          return 0;
   4.632  
   4.633      case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
   4.634 -        rc = shadow_set_allocation(d, sc->mb, &preempted);
   4.635 +        shadow_lock(d);
   4.636 +        rc = sh_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
   4.637 +        shadow_unlock(d);
   4.638          if ( preempted )
   4.639              /* Not finished.  Set up to re-run the call. */
   4.640              rc = hypercall_create_continuation(
     5.1 --- a/xen/arch/x86/mm/shadow/multi.c	Wed Dec 20 11:59:54 2006 +0000
     5.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Wed Dec 20 12:03:07 2006 +0000
     5.3 @@ -243,7 +243,7 @@ guest_walk_tables(struct vcpu *v, unsign
     5.4      gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e));
     5.5      if ( !mfn_valid(gw->l3mfn) ) return 1;
     5.6      /* This mfn is a pagetable: make sure the guest can't write to it. */
     5.7 -    if ( guest_op && shadow_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
     5.8 +    if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
     5.9          flush_tlb_mask(v->domain->domain_dirty_cpumask); 
    5.10      gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn))
    5.11          + guest_l3_table_offset(va);
    5.12 @@ -257,7 +257,7 @@ guest_walk_tables(struct vcpu *v, unsign
    5.13      gw->l2mfn = vcpu_gfn_to_mfn(v, guest_l3e_get_gfn(*gw->l3e));
    5.14      if ( !mfn_valid(gw->l2mfn) ) return 1;
    5.15      /* This mfn is a pagetable: make sure the guest can't write to it. */
    5.16 -    if ( guest_op && shadow_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
    5.17 +    if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
    5.18          flush_tlb_mask(v->domain->domain_dirty_cpumask); 
    5.19      gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn))
    5.20          + guest_l2_table_offset(va);
    5.21 @@ -299,7 +299,7 @@ guest_walk_tables(struct vcpu *v, unsign
    5.22          if ( !mfn_valid(gw->l1mfn) ) return 1;
    5.23          /* This mfn is a pagetable: make sure the guest can't write to it. */
    5.24          if ( guest_op 
    5.25 -             && shadow_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
    5.26 +             && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
    5.27              flush_tlb_mask(v->domain->domain_dirty_cpumask); 
    5.28          gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn))
    5.29              + guest_l1_table_offset(va);
    5.30 @@ -492,7 +492,7 @@ static u32 guest_set_ad_bits(struct vcpu
    5.31          u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask;
    5.32          /* More than one type bit set in shadow-flags? */
    5.33          if ( shflags & ~(1UL << find_first_set_bit(shflags)) )
    5.34 -            res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep));
    5.35 +            res = sh_validate_guest_entry(v, gmfn, ep, sizeof (*ep));
    5.36      }
    5.37  
    5.38      /* We should never need to flush the TLB or recopy PAE entries */
    5.39 @@ -2847,7 +2847,7 @@ static int sh_page_fault(struct vcpu *v,
    5.40          /* If this is actually a page table, then we have a bug, and need 
    5.41           * to support more operations in the emulator.  More likely, 
    5.42           * though, this is a hint that this page should not be shadowed. */
    5.43 -        shadow_remove_all_shadows(v, gmfn);
    5.44 +        sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
    5.45      }
    5.46  
    5.47      /* Emulator has changed the user registers: write back */
    5.48 @@ -3080,7 +3080,7 @@ sh_update_linear_entries(struct vcpu *v)
    5.49              sh_unmap_domain_page(ml4e);
    5.50          }
    5.51  
    5.52 -        /* Shadow l3 tables are made up by update_cr3 */
    5.53 +        /* Shadow l3 tables are made up by sh_update_cr3 */
    5.54          sl3e = v->arch.shadow.l3table;
    5.55  
    5.56          for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
    5.57 @@ -3118,7 +3118,7 @@ sh_update_linear_entries(struct vcpu *v)
    5.58          int unmap_l2e = 0;
    5.59  
    5.60  #if GUEST_PAGING_LEVELS == 2
    5.61 -        /* Shadow l3 tables were built by update_cr3 */
    5.62 +        /* Shadow l3 tables were built by sh_update_cr3 */
    5.63          if ( shadow_mode_external(d) )
    5.64              shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table;
    5.65          else
    5.66 @@ -3341,12 +3341,15 @@ sh_set_toplevel_shadow(struct vcpu *v,
    5.67  
    5.68  
    5.69  static void
    5.70 -sh_update_cr3(struct vcpu *v)
    5.71 +sh_update_cr3(struct vcpu *v, int do_locking)
    5.72  /* Updates vcpu->arch.cr3 after the guest has changed CR3.
    5.73   * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
    5.74   * if appropriate).
    5.75   * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works,
    5.76   * and read vcpu->arch.hvm_vcpu.hw_cr3 afterwards.
    5.77 + * If do_locking != 0, assume we are being called from outside the 
    5.78 + * shadow code, and must take and release the shadow lock; otherwise 
    5.79 + * that is the caller's respnsibility.
    5.80   */
    5.81  {
    5.82      struct domain *d = v->domain;
    5.83 @@ -3355,6 +3358,15 @@ sh_update_cr3(struct vcpu *v)
    5.84      u32 guest_idx=0;
    5.85  #endif
    5.86  
    5.87 +    /* Don't do anything on an uninitialised vcpu */
    5.88 +    if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
    5.89 +    {
    5.90 +        ASSERT(v->arch.cr3 == 0);
    5.91 +        return;
    5.92 +    }
    5.93 +
    5.94 +    if ( do_locking ) shadow_lock(v->domain);
    5.95 +
    5.96      ASSERT(shadow_locked_by_me(v->domain));
    5.97      ASSERT(v->arch.shadow.mode);
    5.98  
    5.99 @@ -3400,11 +3412,6 @@ sh_update_cr3(struct vcpu *v)
   5.100  #endif
   5.101          gmfn = pagetable_get_mfn(v->arch.guest_table);
   5.102  
   5.103 -    if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
   5.104 -    {
   5.105 -        ASSERT(v->arch.cr3 == 0);
   5.106 -        return;
   5.107 -    }
   5.108  
   5.109      ////
   5.110      //// vcpu->arch.guest_vtable
   5.111 @@ -3466,7 +3473,7 @@ sh_update_cr3(struct vcpu *v)
   5.112       * replace the old shadow pagetable(s), so that we can safely use the 
   5.113       * (old) shadow linear maps in the writeable mapping heuristics. */
   5.114  #if GUEST_PAGING_LEVELS == 2
   5.115 -    if ( shadow_remove_write_access(v, gmfn, 2, 0) != 0 )
   5.116 +    if ( sh_remove_write_access(v, gmfn, 2, 0) != 0 )
   5.117          flush_tlb_mask(v->domain->domain_dirty_cpumask); 
   5.118      sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow);
   5.119  #elif GUEST_PAGING_LEVELS == 3
   5.120 @@ -3484,7 +3491,7 @@ sh_update_cr3(struct vcpu *v)
   5.121              {
   5.122                  gl2gfn = guest_l3e_get_gfn(gl3e[i]);
   5.123                  gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
   5.124 -                flush |= shadow_remove_write_access(v, gl2mfn, 2, 0); 
   5.125 +                flush |= sh_remove_write_access(v, gl2mfn, 2, 0); 
   5.126              }
   5.127          }
   5.128          if ( flush ) 
   5.129 @@ -3506,7 +3513,7 @@ sh_update_cr3(struct vcpu *v)
   5.130          }
   5.131      }
   5.132  #elif GUEST_PAGING_LEVELS == 4
   5.133 -    if ( shadow_remove_write_access(v, gmfn, 4, 0) != 0 )
   5.134 +    if ( sh_remove_write_access(v, gmfn, 4, 0) != 0 )
   5.135          flush_tlb_mask(v->domain->domain_dirty_cpumask);
   5.136      sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow);
   5.137  #else
   5.138 @@ -3582,6 +3589,9 @@ sh_update_cr3(struct vcpu *v)
   5.139  
   5.140      /* Fix up the linear pagetable mappings */
   5.141      sh_update_linear_entries(v);
   5.142 +
   5.143 +    /* Release the lock, if we took it (otherwise it's the caller's problem) */
   5.144 +    if ( do_locking ) shadow_unlock(v->domain);
   5.145  }
   5.146  
   5.147  
   5.148 @@ -3637,7 +3647,8 @@ static int sh_guess_wrmap(struct vcpu *v
   5.149  }
   5.150  #endif
   5.151  
   5.152 -int sh_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn)
   5.153 +int sh_rm_write_access_from_l1(struct vcpu *v, mfn_t sl1mfn,
   5.154 +                               mfn_t readonly_mfn)
   5.155  /* Excises all writeable mappings to readonly_mfn from this l1 shadow table */
   5.156  {
   5.157      shadow_l1e_t *sl1e;
   5.158 @@ -3668,7 +3679,7 @@ int sh_remove_write_access(struct vcpu *
   5.159  }
   5.160  
   5.161  
   5.162 -int sh_remove_all_mappings(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
   5.163 +int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
   5.164  /* Excises all mappings to guest frame from this shadow l1 table */
   5.165  {
   5.166      shadow_l1e_t *sl1e;
   5.167 @@ -3888,7 +3899,7 @@ sh_x86_emulate_write(struct vcpu *v, uns
   5.168  
   5.169      skip = safe_not_to_verify_write(mfn, addr, src, bytes);
   5.170      memcpy(addr, src, bytes);
   5.171 -    if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, bytes);
   5.172 +    if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes);
   5.173  
   5.174      /* If we are writing zeros to this page, might want to unshadow */
   5.175      if ( likely(bytes >= 4) && (*(u32 *)addr == 0) )
   5.176 @@ -3933,7 +3944,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
   5.177  
   5.178      if ( prev == old )
   5.179      {
   5.180 -        if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, bytes);
   5.181 +        if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes);
   5.182      }
   5.183      else
   5.184          rv = X86EMUL_CMPXCHG_FAILED;
   5.185 @@ -3977,7 +3988,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v,
   5.186  
   5.187      if ( prev == old )
   5.188      {
   5.189 -        if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, 8);
   5.190 +        if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, 8);
   5.191      }
   5.192      else
   5.193          rv = X86EMUL_CMPXCHG_FAILED;
     6.1 --- a/xen/arch/x86/mm/shadow/multi.h	Wed Dec 20 11:59:54 2006 +0000
     6.2 +++ b/xen/arch/x86/mm/shadow/multi.h	Wed Dec 20 12:03:07 2006 +0000
     6.3 @@ -61,10 +61,10 @@ SHADOW_INTERNAL_NAME(sh_unhook_64b_mappi
     6.4      (struct vcpu *v, mfn_t sl4mfn);
     6.5  
     6.6  extern int
     6.7 -SHADOW_INTERNAL_NAME(sh_remove_write_access, SHADOW_LEVELS, GUEST_LEVELS)
     6.8 +SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, SHADOW_LEVELS, GUEST_LEVELS)
     6.9      (struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn);
    6.10  extern int
    6.11 -SHADOW_INTERNAL_NAME(sh_remove_all_mappings, SHADOW_LEVELS, GUEST_LEVELS)
    6.12 +SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, SHADOW_LEVELS, GUEST_LEVELS)
    6.13      (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
    6.14  
    6.15  extern void
     7.1 --- a/xen/arch/x86/mm/shadow/private.h	Wed Dec 20 11:59:54 2006 +0000
     7.2 +++ b/xen/arch/x86/mm/shadow/private.h	Wed Dec 20 12:03:07 2006 +0000
     7.3 @@ -33,8 +33,43 @@
     7.4  
     7.5  
     7.6  /******************************************************************************
     7.7 + * Levels of self-test and paranoia
     7.8 + */
     7.9 +
    7.10 +#define SHADOW_AUDIT_HASH           0x01  /* Check current hash bucket */
    7.11 +#define SHADOW_AUDIT_HASH_FULL      0x02  /* Check every hash bucket */
    7.12 +#define SHADOW_AUDIT_ENTRIES        0x04  /* Check this walk's shadows */
    7.13 +#define SHADOW_AUDIT_ENTRIES_FULL   0x08  /* Check every shadow */
    7.14 +#define SHADOW_AUDIT_ENTRIES_MFNS   0x10  /* Check gfn-mfn map in shadows */
    7.15 +#define SHADOW_AUDIT_P2M            0x20  /* Check the p2m table */
    7.16 +
    7.17 +#ifdef NDEBUG
    7.18 +#define SHADOW_AUDIT                   0
    7.19 +#define SHADOW_AUDIT_ENABLE            0
    7.20 +#else
    7.21 +#define SHADOW_AUDIT                0x15  /* Basic audit of all except p2m. */
    7.22 +#define SHADOW_AUDIT_ENABLE         shadow_audit_enable
    7.23 +extern int shadow_audit_enable;
    7.24 +#endif
    7.25 +
    7.26 +/******************************************************************************
    7.27 + * Levels of optimization
    7.28 + */
    7.29 +
    7.30 +#define SHOPT_WRITABLE_HEURISTIC  0x01  /* Guess at RW PTEs via linear maps */
    7.31 +#define SHOPT_EARLY_UNSHADOW      0x02  /* Unshadow l1s on fork or exit */
    7.32 +#define SHOPT_FAST_FAULT_PATH     0x04  /* Fast-path MMIO and not-present */
    7.33 +#define SHOPT_PREFETCH            0x08  /* Shadow multiple entries per fault */
    7.34 +#define SHOPT_LINUX_L3_TOPLEVEL   0x10  /* Pin l3es on early 64bit linux */
    7.35 +#define SHOPT_SKIP_VERIFY         0x20  /* Skip PTE v'fy when safe to do so */
    7.36 +
    7.37 +#define SHADOW_OPTIMIZATIONS      0x3f
    7.38 +
    7.39 +
    7.40 +/******************************************************************************
    7.41   * Debug and error-message output
    7.42   */
    7.43 +
    7.44  #define SHADOW_PRINTK(_f, _a...)                                     \
    7.45      debugtrace_printk("sh: %s(): " _f, __func__, ##_a)
    7.46  #define SHADOW_ERROR(_f, _a...)                                      \
    7.47 @@ -54,6 +89,58 @@
    7.48  #define SHADOW_DEBUG_EMULATE           1
    7.49  #define SHADOW_DEBUG_LOGDIRTY          0
    7.50  
    7.51 +/******************************************************************************
    7.52 + * The shadow lock.
    7.53 + *
    7.54 + * This lock is per-domain.  It is intended to allow us to make atomic
    7.55 + * updates to the software TLB that the shadow tables provide.
    7.56 + * 
    7.57 + * Specifically, it protects:
    7.58 + *   - all changes to shadow page table pages
    7.59 + *   - the shadow hash table
    7.60 + *   - the shadow page allocator 
    7.61 + *   - all changes to guest page table pages
    7.62 + *   - all changes to the page_info->tlbflush_timestamp
    7.63 + *   - the page_info->count fields on shadow pages
    7.64 + *   - the shadow dirty bit array and count
    7.65 + */
    7.66 +#ifndef CONFIG_SMP
    7.67 +#error shadow.h currently requires CONFIG_SMP
    7.68 +#endif
    7.69 +
    7.70 +#define shadow_lock_init(_d)                            \
    7.71 +    do {                                                \
    7.72 +        spin_lock_init(&(_d)->arch.shadow.lock);        \
    7.73 +        (_d)->arch.shadow.locker = -1;                  \
    7.74 +        (_d)->arch.shadow.locker_function = "nobody";   \
    7.75 +    } while (0)
    7.76 +
    7.77 +#define shadow_locked_by_me(_d)                     \
    7.78 +    (current->processor == (_d)->arch.shadow.locker)
    7.79 +
    7.80 +#define shadow_lock(_d)                                                 \
    7.81 +    do {                                                                \
    7.82 +        if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \
    7.83 +        {                                                               \
    7.84 +            printk("Error: shadow lock held by %s\n",                   \
    7.85 +                   (_d)->arch.shadow.locker_function);                  \
    7.86 +            BUG();                                                      \
    7.87 +        }                                                               \
    7.88 +        spin_lock(&(_d)->arch.shadow.lock);                             \
    7.89 +        ASSERT((_d)->arch.shadow.locker == -1);                         \
    7.90 +        (_d)->arch.shadow.locker = current->processor;                  \
    7.91 +        (_d)->arch.shadow.locker_function = __func__;                   \
    7.92 +    } while (0)
    7.93 +
    7.94 +#define shadow_unlock(_d)                                       \
    7.95 +    do {                                                        \
    7.96 +        ASSERT((_d)->arch.shadow.locker == current->processor); \
    7.97 +        (_d)->arch.shadow.locker = -1;                          \
    7.98 +        (_d)->arch.shadow.locker_function = "nobody";           \
    7.99 +        spin_unlock(&(_d)->arch.shadow.lock);                   \
   7.100 +    } while (0)
   7.101 +
   7.102 +
   7.103  
   7.104  /******************************************************************************
   7.105   * Auditing routines 
   7.106 @@ -291,6 +378,21 @@ void sh_install_xen_entries_in_l4(struct
   7.107  void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn);
   7.108  void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn);
   7.109  
   7.110 +/* Update the shadows in response to a pagetable write from Xen */
   7.111 +extern int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 
   7.112 +                                   void *entry, u32 size);
   7.113 +
   7.114 +/* Update the shadows in response to a pagetable write from a HVM guest */
   7.115 +extern void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 
   7.116 +                                       void *entry, u32 size);
   7.117 +
   7.118 +/* Remove all writeable mappings of a guest frame from the shadows.
   7.119 + * Returns non-zero if we need to flush TLBs. 
   7.120 + * level and fault_addr desribe how we found this to be a pagetable;
   7.121 + * level==0 means we have some other reason for revoking write access. */
   7.122 +extern int sh_remove_write_access(struct vcpu *v, mfn_t readonly_mfn,
   7.123 +                                  unsigned int level,
   7.124 +                                  unsigned long fault_addr);
   7.125  
   7.126  /******************************************************************************
   7.127   * Flags used in the return value of the shadow_set_lXe() functions...
   7.128 @@ -325,6 +427,26 @@ void sh_install_xen_entries_in_l2(struct
   7.129  #undef mfn_valid
   7.130  #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
   7.131  
   7.132 +
   7.133 +static inline int
   7.134 +sh_mfn_is_a_page_table(mfn_t gmfn)
   7.135 +{
   7.136 +    struct page_info *page = mfn_to_page(gmfn);
   7.137 +    struct domain *owner;
   7.138 +    unsigned long type_info;
   7.139 +
   7.140 +    if ( !mfn_valid(gmfn) )
   7.141 +        return 0;
   7.142 +
   7.143 +    owner = page_get_owner(page);
   7.144 +    if ( owner && shadow_mode_refcounts(owner) 
   7.145 +         && (page->count_info & PGC_page_table) )
   7.146 +        return 1; 
   7.147 +
   7.148 +    type_info = page->u.inuse.type_info & PGT_type_mask;
   7.149 +    return type_info && (type_info <= PGT_l4_page_table);
   7.150 +}
   7.151 +
   7.152  // Provide mfn_t-aware versions of common xen functions
   7.153  static inline void *
   7.154  sh_map_domain_page(mfn_t mfn)
   7.155 @@ -350,6 +472,25 @@ sh_unmap_domain_page_global(void *p)
   7.156      unmap_domain_page_global(p);
   7.157  }
   7.158  
   7.159 +static inline mfn_t
   7.160 +pagetable_get_mfn(pagetable_t pt)
   7.161 +{
   7.162 +    return _mfn(pagetable_get_pfn(pt));
   7.163 +}
   7.164 +
   7.165 +static inline pagetable_t
   7.166 +pagetable_from_mfn(mfn_t mfn)
   7.167 +{
   7.168 +    return pagetable_from_pfn(mfn_x(mfn));
   7.169 +}
   7.170 +
   7.171 +
   7.172 +/******************************************************************************
   7.173 + * Log-dirty mode bitmap handling
   7.174 + */
   7.175 +
   7.176 +extern void sh_mark_dirty(struct domain *d, mfn_t gmfn);
   7.177 +
   7.178  static inline int
   7.179  sh_mfn_is_dirty(struct domain *d, mfn_t gmfn)
   7.180  /* Is this guest page dirty?  Call only in log-dirty mode. */
   7.181 @@ -368,25 +509,6 @@ sh_mfn_is_dirty(struct domain *d, mfn_t 
   7.182      return 0;
   7.183  }
   7.184  
   7.185 -static inline int
   7.186 -sh_mfn_is_a_page_table(mfn_t gmfn)
   7.187 -{
   7.188 -    struct page_info *page = mfn_to_page(gmfn);
   7.189 -    struct domain *owner;
   7.190 -    unsigned long type_info;
   7.191 -
   7.192 -    if ( !mfn_valid(gmfn) )
   7.193 -        return 0;
   7.194 -
   7.195 -    owner = page_get_owner(page);
   7.196 -    if ( owner && shadow_mode_refcounts(owner) 
   7.197 -         && (page->count_info & PGC_page_table) )
   7.198 -        return 1; 
   7.199 -
   7.200 -    type_info = page->u.inuse.type_info & PGT_type_mask;
   7.201 -    return type_info && (type_info <= PGT_l4_page_table);
   7.202 -}
   7.203 -
   7.204  
   7.205  /**************************************************************************/
   7.206  /* Shadow-page refcounting. */
     8.1 --- a/xen/arch/x86/mm/shadow/types.h	Wed Dec 20 11:59:54 2006 +0000
     8.2 +++ b/xen/arch/x86/mm/shadow/types.h	Wed Dec 20 12:03:07 2006 +0000
     8.3 @@ -477,8 +477,8 @@ struct shadow_walk_t
     8.4  #define sh_gva_to_gpa              INTERNAL_NAME(sh_gva_to_gpa)
     8.5  #define sh_gva_to_gfn              INTERNAL_NAME(sh_gva_to_gfn)
     8.6  #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
     8.7 -#define sh_remove_write_access     INTERNAL_NAME(sh_remove_write_access)
     8.8 -#define sh_remove_all_mappings     INTERNAL_NAME(sh_remove_all_mappings)
     8.9 +#define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
    8.10 +#define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
    8.11  #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
    8.12  #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
    8.13  #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
     9.1 --- a/xen/include/asm-x86/mm.h	Wed Dec 20 11:59:54 2006 +0000
     9.2 +++ b/xen/include/asm-x86/mm.h	Wed Dec 20 12:03:07 2006 +0000
     9.3 @@ -307,7 +307,7 @@ void audit_domains(void);
     9.4  
     9.5  int new_guest_cr3(unsigned long pfn);
     9.6  void make_cr3(struct vcpu *v, unsigned long mfn);
     9.7 -
     9.8 +void update_cr3(struct vcpu *v);
     9.9  void propagate_page_fault(unsigned long addr, u16 error_code);
    9.10  
    9.11  int __sync_lazy_execstate(void);
    10.1 --- a/xen/include/asm-x86/shadow.h	Wed Dec 20 11:59:54 2006 +0000
    10.2 +++ b/xen/include/asm-x86/shadow.h	Wed Dec 20 12:03:07 2006 +0000
    10.3 @@ -29,20 +29,8 @@
    10.4  #include <xen/domain_page.h>
    10.5  #include <asm/flushtlb.h>
    10.6  
    10.7 -/* How to make sure a page is not referred to in a shadow PT */
    10.8 -/* This will need to be a for_each_vcpu if we go to per-vcpu shadows */ 
    10.9 -#define shadow_drop_references(_d, _p)                      \
   10.10 -    shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
   10.11 -#define shadow_sync_and_drop_references(_d, _p)             \
   10.12 -    shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
   10.13 -
   10.14 -/* How to add and remove entries in the p2m mapping. */
   10.15 -#define guest_physmap_add_page(_d, _p, _m)                  \
   10.16 -    shadow_guest_physmap_add_page((_d), (_p), (_m))
   10.17 -#define guest_physmap_remove_page(_d, _p, _m   )            \
   10.18 -    shadow_guest_physmap_remove_page((_d), (_p), (_m))
   10.19 -
   10.20 -/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
   10.21 +/*****************************************************************************
   10.22 + * Macros to tell which shadow paging mode a domain is in */
   10.23  
   10.24  #define SHM2_shift 10
   10.25  /* We're in one of the shadow modes */
   10.26 @@ -64,107 +52,24 @@
   10.27  #define shadow_mode_external(_d)  ((_d)->arch.shadow.mode & SHM2_external)
   10.28  
   10.29  /* Xen traps & emulates all reads of all page table pages:
   10.30 - * not yet supported
   10.31 - */
   10.32 + * not yet supported */
   10.33  #define shadow_mode_trap_reads(_d) ({ (void)(_d); 0; })
   10.34  
   10.35 -// How do we tell that we have a 32-bit PV guest in a 64-bit Xen?
   10.36 -#ifdef __x86_64__
   10.37 -#define pv_32bit_guest(_v) 0 // not yet supported
   10.38 -#else
   10.39 -#define pv_32bit_guest(_v) !is_hvm_vcpu(v)
   10.40 -#endif
   10.41  
   10.42 -/* The shadow lock.
   10.43 - *
   10.44 - * This lock is per-domain.  It is intended to allow us to make atomic
   10.45 - * updates to the software TLB that the shadow tables provide.
   10.46 - * 
   10.47 - * Specifically, it protects:
   10.48 - *   - all changes to shadow page table pages
   10.49 - *   - the shadow hash table
   10.50 - *   - the shadow page allocator 
   10.51 - *   - all changes to guest page table pages; if/when the notion of
   10.52 - *     out-of-sync pages is added to this code, then the shadow lock is
   10.53 - *     protecting all guest page table pages which are not listed as
   10.54 - *     currently as both guest-writable and out-of-sync...
   10.55 - *     XXX -- need to think about this relative to writable page tables.
   10.56 - *   - all changes to the page_info->tlbflush_timestamp
   10.57 - *   - the page_info->count fields on shadow pages
   10.58 - *   - the shadow dirty bit array and count
   10.59 - *   - XXX
   10.60 - */
   10.61 -#ifndef CONFIG_SMP
   10.62 -#error shadow.h currently requires CONFIG_SMP
   10.63 -#endif
   10.64 -
   10.65 -#define shadow_lock_init(_d)                            \
   10.66 -    do {                                                \
   10.67 -        spin_lock_init(&(_d)->arch.shadow.lock);        \
   10.68 -        (_d)->arch.shadow.locker = -1;                  \
   10.69 -        (_d)->arch.shadow.locker_function = "nobody";   \
   10.70 -    } while (0)
   10.71 -
   10.72 -#define shadow_locked_by_me(_d)                     \
   10.73 -    (current->processor == (_d)->arch.shadow.locker)
   10.74 +/******************************************************************************
   10.75 + * The equivalent for a particular vcpu of a shadowed domain. */
   10.76  
   10.77 -#define shadow_lock(_d)                                                 \
   10.78 -    do {                                                                \
   10.79 -        if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \
   10.80 -        {                                                               \
   10.81 -            printk("Error: shadow lock held by %s\n",                   \
   10.82 -                   (_d)->arch.shadow.locker_function);                  \
   10.83 -            BUG();                                                      \
   10.84 -        }                                                               \
   10.85 -        spin_lock(&(_d)->arch.shadow.lock);                             \
   10.86 -        ASSERT((_d)->arch.shadow.locker == -1);                         \
   10.87 -        (_d)->arch.shadow.locker = current->processor;                  \
   10.88 -        (_d)->arch.shadow.locker_function = __func__;                   \
   10.89 -    } while (0)
   10.90 -
   10.91 -#define shadow_unlock(_d)                                       \
   10.92 -    do {                                                        \
   10.93 -        ASSERT((_d)->arch.shadow.locker == current->processor); \
   10.94 -        (_d)->arch.shadow.locker = -1;                          \
   10.95 -        (_d)->arch.shadow.locker_function = "nobody";           \
   10.96 -        spin_unlock(&(_d)->arch.shadow.lock);                   \
   10.97 -    } while (0)
   10.98 -
   10.99 -/* 
  10.100 - * Levels of self-test and paranoia
  10.101 - * XXX should go in config files somewhere?  
  10.102 - */
  10.103 -#define SHADOW_AUDIT_HASH           0x01  /* Check current hash bucket */
  10.104 -#define SHADOW_AUDIT_HASH_FULL      0x02  /* Check every hash bucket */
  10.105 -#define SHADOW_AUDIT_ENTRIES        0x04  /* Check this walk's shadows */
  10.106 -#define SHADOW_AUDIT_ENTRIES_FULL   0x08  /* Check every shadow */
  10.107 -#define SHADOW_AUDIT_ENTRIES_MFNS   0x10  /* Check gfn-mfn map in shadows */
  10.108 -#define SHADOW_AUDIT_P2M            0x20  /* Check the p2m table */
  10.109 -
  10.110 -#ifdef NDEBUG
  10.111 -#define SHADOW_AUDIT                   0
  10.112 -#define SHADOW_AUDIT_ENABLE            0
  10.113 -#else
  10.114 -#define SHADOW_AUDIT                0x15  /* Basic audit of all except p2m. */
  10.115 -#define SHADOW_AUDIT_ENABLE         shadow_audit_enable
  10.116 -extern int shadow_audit_enable;
  10.117 -#endif
  10.118 -
  10.119 -/* 
  10.120 - * Levels of optimization
  10.121 - * XXX should go in config files somewhere?  
  10.122 - */
  10.123 -#define SHOPT_WRITABLE_HEURISTIC  0x01  /* Guess at RW PTEs via linear maps */
  10.124 -#define SHOPT_EARLY_UNSHADOW      0x02  /* Unshadow l1s on fork or exit */
  10.125 -#define SHOPT_FAST_FAULT_PATH     0x04  /* Fast-path MMIO and not-present */
  10.126 -#define SHOPT_PREFETCH            0x08  /* Shadow multiple entries per fault */
  10.127 -#define SHOPT_LINUX_L3_TOPLEVEL   0x10  /* Pin l3es on early 64bit linux */
  10.128 -#define SHOPT_SKIP_VERIFY         0x20  /* Skip PTE v'fy when safe to do so */
  10.129 -
  10.130 -#define SHADOW_OPTIMIZATIONS      0x3f
  10.131 +/* Is this vcpu using the P2M table to translate between GFNs and MFNs?
  10.132 + *
  10.133 + * This is true of translated HVM domains on a vcpu which has paging
  10.134 + * enabled.  (HVM vcpus with paging disabled are using the p2m table as
  10.135 + * its paging table, so no translation occurs in this case.)
  10.136 + * It is also true for all vcpus of translated PV domains. */
  10.137 +#define shadow_vcpu_mode_translate(_v) ((_v)->arch.shadow.translate_enabled)
  10.138  
  10.139  
  10.140 -/* With shadow pagetables, the different kinds of address start 
  10.141 +/******************************************************************************
  10.142 + * With shadow pagetables, the different kinds of address start 
  10.143   * to get get confusing.
  10.144   * 
  10.145   * Virtual addresses are what they usually are: the addresses that are used 
  10.146 @@ -214,38 +119,16 @@ static inline _type _name##_x(_name##_t 
  10.147  #endif
  10.148  
  10.149  TYPE_SAFE(unsigned long,mfn)
  10.150 +
  10.151 +/* Macro for printk formats: use as printk("%"SH_PRI_mfn"\n", mfn_x(foo)); */
  10.152  #define SH_PRI_mfn "05lx"
  10.153  
  10.154 -static inline mfn_t
  10.155 -pagetable_get_mfn(pagetable_t pt)
  10.156 -{
  10.157 -    return _mfn(pagetable_get_pfn(pt));
  10.158 -}
  10.159  
  10.160 -static inline pagetable_t
  10.161 -pagetable_from_mfn(mfn_t mfn)
  10.162 -{
  10.163 -    return pagetable_from_pfn(mfn_x(mfn));
  10.164 -}
  10.165 -
  10.166 -static inline int
  10.167 -shadow_vcpu_mode_translate(struct vcpu *v)
  10.168 -{
  10.169 -    // Returns true if this VCPU needs to be using the P2M table to translate
  10.170 -    // between GFNs and MFNs.
  10.171 -    //
  10.172 -    // This is true of translated HVM domains on a vcpu which has paging
  10.173 -    // enabled.  (HVM vcpu's with paging disabled are using the p2m table as
  10.174 -    // its paging table, so no translation occurs in this case.)
  10.175 -    //
  10.176 -    // It is also true for translated PV domains.
  10.177 -    //
  10.178 -    return v->arch.shadow.translate_enabled;
  10.179 -}
  10.180 -
  10.181 -
  10.182 -/**************************************************************************/
  10.183 -/* Mode-specific entry points into the shadow code */
  10.184 +/*****************************************************************************
  10.185 + * Mode-specific entry points into the shadow code.  
  10.186 + *
  10.187 + * These shouldn't be used directly by callers; rather use the functions
  10.188 + * below which will indirect through this table as appropriate. */
  10.189  
  10.190  struct sh_emulate_ctxt;
  10.191  struct shadow_paging_mode {
  10.192 @@ -254,7 +137,7 @@ struct shadow_paging_mode {
  10.193      int           (*invlpg                )(struct vcpu *v, unsigned long va);
  10.194      paddr_t       (*gva_to_gpa            )(struct vcpu *v, unsigned long va);
  10.195      unsigned long (*gva_to_gfn            )(struct vcpu *v, unsigned long va);
  10.196 -    void          (*update_cr3            )(struct vcpu *v);
  10.197 +    void          (*update_cr3            )(struct vcpu *v, int do_locking);
  10.198      int           (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn,
  10.199                                              void *new_guest_entry, u32 size);
  10.200      int           (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn,
  10.201 @@ -286,35 +169,30 @@ struct shadow_paging_mode {
  10.202                                              unsigned long *gl1mfn);
  10.203      void          (*guest_get_eff_l1e     )(struct vcpu *v, unsigned long va,
  10.204                                              void *eff_l1e);
  10.205 -#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
  10.206      int           (*guess_wrmap           )(struct vcpu *v, 
  10.207                                              unsigned long vaddr, mfn_t gmfn);
  10.208 -#endif
  10.209      /* For outsiders to tell what mode we're in */
  10.210      unsigned int shadow_levels;
  10.211      unsigned int guest_levels;
  10.212  };
  10.213  
  10.214 -static inline int shadow_guest_paging_levels(struct vcpu *v)
  10.215 -{
  10.216 -    ASSERT(v->arch.shadow.mode != NULL);
  10.217 -    return v->arch.shadow.mode->guest_levels;
  10.218 -}
  10.219 +
  10.220 +/*****************************************************************************
  10.221 + * Entry points into the shadow code */
  10.222  
  10.223 -/**************************************************************************/
  10.224 -/* Entry points into the shadow code */
  10.225 +/* Set up the shadow-specific parts of a domain struct at start of day.
  10.226 + * Called for  every domain from arch_domain_create() */
  10.227 +void shadow_domain_init(struct domain *d);
  10.228  
  10.229 -/* Enable arbitrary shadow mode. */
  10.230 +/* Enable an arbitrary shadow mode.  Call once at domain creation. */
  10.231  int shadow_enable(struct domain *d, u32 mode);
  10.232  
  10.233 -/* Turning on shadow test mode */
  10.234 -int shadow_test_enable(struct domain *d);
  10.235 -
  10.236 -/* Handler for shadow control ops: enabling and disabling shadow modes, 
  10.237 - * and log-dirty bitmap ops all happen through here. */
  10.238 +/* Handler for shadow control ops: operations from user-space to enable
  10.239 + * and disable ephemeral shadow modes (test mode and log-dirty mode) and
  10.240 + * manipulate the log-dirty bitmap. */
  10.241  int shadow_domctl(struct domain *d, 
  10.242 -                   xen_domctl_shadow_op_t *sc,
  10.243 -                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
  10.244 +                  xen_domctl_shadow_op_t *sc,
  10.245 +                  XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
  10.246  
  10.247  /* Call when destroying a domain */
  10.248  void shadow_teardown(struct domain *d);
  10.249 @@ -322,164 +200,96 @@ void shadow_teardown(struct domain *d);
  10.250  /* Call once all of the references to the domain have gone away */
  10.251  void shadow_final_teardown(struct domain *d);
  10.252  
  10.253 -
  10.254 -/* Mark a page as dirty in the bitmap */
  10.255 -void sh_do_mark_dirty(struct domain *d, mfn_t gmfn);
  10.256 +/* Mark a page as dirty in the log-dirty bitmap: called when Xen 
  10.257 + * makes changes to guest memory on its behalf. */
  10.258 +void shadow_mark_dirty(struct domain *d, mfn_t gmfn);
  10.259 +/* Cleaner version so we don't pepper shadow_mode tests all over the place */
  10.260  static inline void mark_dirty(struct domain *d, unsigned long gmfn)
  10.261  {
  10.262 -    if ( likely(!shadow_mode_log_dirty(d)) )
  10.263 -        return;
  10.264 -
  10.265 -    shadow_lock(d);
  10.266 -    sh_do_mark_dirty(d, _mfn(gmfn));
  10.267 -    shadow_unlock(d);
  10.268 +    if ( unlikely(shadow_mode_log_dirty(d)) )
  10.269 +        shadow_mark_dirty(d, _mfn(gmfn));
  10.270  }
  10.271  
  10.272 -/* Internal version, for when the shadow lock is already held */
  10.273 -static inline void sh_mark_dirty(struct domain *d, mfn_t gmfn)
  10.274 -{
  10.275 -    ASSERT(shadow_locked_by_me(d));
  10.276 -    if ( unlikely(shadow_mode_log_dirty(d)) )
  10.277 -        sh_do_mark_dirty(d, gmfn);
  10.278 -}
  10.279 -
  10.280 -static inline int
  10.281 -shadow_fault(unsigned long va, struct cpu_user_regs *regs)
  10.282 -/* Called from pagefault handler in Xen, and from the HVM trap handlers
  10.283 +/* Handle page-faults caused by the shadow pagetable mechanisms.
  10.284 + * Called from pagefault handler in Xen, and from the HVM trap handlers
  10.285   * for pagefaults.  Returns 1 if this fault was an artefact of the
  10.286   * shadow code (and the guest should retry) or 0 if it is not (and the
  10.287   * fault should be handled elsewhere or passed to the guest). */
  10.288 +static inline int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
  10.289  {
  10.290      struct vcpu *v = current;
  10.291      perfc_incrc(shadow_fault);
  10.292      return v->arch.shadow.mode->page_fault(v, va, regs);
  10.293  }
  10.294  
  10.295 -static inline int
  10.296 -shadow_invlpg(struct vcpu *v, unsigned long va)
  10.297 -/* Called when the guest requests an invlpg.  Returns 1 if the invlpg
  10.298 - * instruction should be issued on the hardware, or 0 if it's safe not
  10.299 - * to do so. */
  10.300 +/* Handle invlpg requests on shadowed vcpus. 
  10.301 + * Returns 1 if the invlpg instruction should be issued on the hardware, 
  10.302 + * or 0 if it's safe not to do so. */
  10.303 +static inline int shadow_invlpg(struct vcpu *v, unsigned long va)
  10.304  {
  10.305      return v->arch.shadow.mode->invlpg(v, va);
  10.306  }
  10.307  
  10.308 -static inline paddr_t
  10.309 -shadow_gva_to_gpa(struct vcpu *v, unsigned long va)
  10.310 -/* Called to translate a guest virtual address to what the *guest*
  10.311 - * pagetables would map it to. */
  10.312 +/* Translate a guest virtual address to the physical address that the
  10.313 + * *guest* pagetables would map it to. */
  10.314 +static inline paddr_t shadow_gva_to_gpa(struct vcpu *v, unsigned long va)
  10.315  {
  10.316      if ( unlikely(!shadow_vcpu_mode_translate(v)) )
  10.317          return (paddr_t) va;
  10.318      return v->arch.shadow.mode->gva_to_gpa(v, va);
  10.319  }
  10.320  
  10.321 -static inline unsigned long
  10.322 -shadow_gva_to_gfn(struct vcpu *v, unsigned long va)
  10.323 -/* Called to translate a guest virtual address to what the *guest*
  10.324 - * pagetables would map it to. */
  10.325 +/* Translate a guest virtual address to the frame number that the
  10.326 + * *guest* pagetables would map it to. */
  10.327 +static inline unsigned long shadow_gva_to_gfn(struct vcpu *v, unsigned long va)
  10.328  {
  10.329      if ( unlikely(!shadow_vcpu_mode_translate(v)) )
  10.330          return va >> PAGE_SHIFT;
  10.331      return v->arch.shadow.mode->gva_to_gfn(v, va);
  10.332  }
  10.333  
  10.334 -static inline void
  10.335 -shadow_update_cr3(struct vcpu *v)
  10.336 -/* Updates all the things that are derived from the guest's CR3. 
  10.337 - * Called when the guest changes CR3. */
  10.338 +/* Update all the things that are derived from the guest's CR3. 
  10.339 + * Called when the guest changes CR3; the caller can then use 
  10.340 + * v->arch.cr3 as the value to load into the host CR3 to schedule this vcpu
  10.341 + * and v->arch.hvm_vcpu.hw_cr3 as the value to put in the vmcb/vmcs when 
  10.342 + * entering the HVM guest. */
  10.343 +static inline void shadow_update_cr3(struct vcpu *v)
  10.344  {
  10.345 -    shadow_lock(v->domain);
  10.346 -    v->arch.shadow.mode->update_cr3(v);
  10.347 -    shadow_unlock(v->domain);
  10.348 +    v->arch.shadow.mode->update_cr3(v, 1);
  10.349  }
  10.350  
  10.351 +/* Update all the things that are derived from the guest's CR0/CR3/CR4.
  10.352 + * Called to initialize paging structures if the paging mode
  10.353 + * has changed, and when bringing up a VCPU for the first time. */
  10.354 +void shadow_update_paging_modes(struct vcpu *v);
  10.355 +
  10.356  
  10.357 -/* Should be called after CR3 is updated.
  10.358 - * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
  10.359 - * 
  10.360 - * Also updates other state derived from CR3 (vcpu->arch.guest_vtable,
  10.361 - * shadow_vtable, etc).
  10.362 - *
  10.363 - * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
  10.364 - * for HVM guests, arch.monitor_table and hvm's guest CR3.
  10.365 - *
  10.366 - * Update ref counts to shadow tables appropriately.
  10.367 - */
  10.368 -static inline void update_cr3(struct vcpu *v)
  10.369 -{
  10.370 -    unsigned long cr3_mfn=0;
  10.371 -
  10.372 -    if ( shadow_mode_enabled(v->domain) )
  10.373 -    {
  10.374 -        shadow_update_cr3(v);
  10.375 -        return;
  10.376 -    }
  10.377 -
  10.378 -#if CONFIG_PAGING_LEVELS == 4
  10.379 -    if ( !(v->arch.flags & TF_kernel_mode) )
  10.380 -        cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
  10.381 -    else
  10.382 -#endif
  10.383 -        cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
  10.384 -
  10.385 -    make_cr3(v, cr3_mfn);
  10.386 -}
  10.387 +/*****************************************************************************
  10.388 + * Access to the guest pagetables */
  10.389  
  10.390 -extern void sh_update_paging_modes(struct vcpu *v);
  10.391 -
  10.392 -/* Should be called to initialise paging structures if the paging mode
  10.393 - * has changed, and when bringing up a VCPU for the first time. */
  10.394 -static inline void shadow_update_paging_modes(struct vcpu *v)
  10.395 -{
  10.396 -    ASSERT(shadow_mode_enabled(v->domain));
  10.397 -    shadow_lock(v->domain);
  10.398 -    sh_update_paging_modes(v);
  10.399 -    shadow_unlock(v->domain);
  10.400 -}
  10.401 -
  10.402 -static inline void
  10.403 -shadow_detach_old_tables(struct vcpu *v)
  10.404 -{
  10.405 -    if ( v->arch.shadow.mode )
  10.406 -        v->arch.shadow.mode->detach_old_tables(v);
  10.407 -}
  10.408 -
  10.409 -static inline mfn_t
  10.410 -shadow_make_monitor_table(struct vcpu *v)
  10.411 -{
  10.412 -    return v->arch.shadow.mode->make_monitor_table(v);
  10.413 -}
  10.414 -
  10.415 -static inline void
  10.416 -shadow_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
  10.417 -{
  10.418 -    v->arch.shadow.mode->destroy_monitor_table(v, mmfn);
  10.419 -}
  10.420 -
  10.421 +/* Get a mapping of a PV guest's l1e for this virtual address. */
  10.422  static inline void *
  10.423  guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn)
  10.424  {
  10.425 -    if ( likely(!shadow_mode_translate(v->domain)) )
  10.426 -    {
  10.427 -        l2_pgentry_t l2e;
  10.428 -        ASSERT(!shadow_mode_external(v->domain));
  10.429 -        /* Find this l1e and its enclosing l1mfn in the linear map */
  10.430 -        if ( __copy_from_user(&l2e, 
  10.431 -                              &__linear_l2_table[l2_linear_offset(addr)],
  10.432 -                              sizeof(l2_pgentry_t)) != 0 )
  10.433 -            return NULL;
  10.434 -        /* Check flags that it will be safe to read the l1e */
  10.435 -        if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) 
  10.436 -             != _PAGE_PRESENT )
  10.437 -            return NULL;
  10.438 -        *gl1mfn = l2e_get_pfn(l2e);
  10.439 -        return &__linear_l1_table[l1_linear_offset(addr)];
  10.440 -    }
  10.441 +    l2_pgentry_t l2e;
  10.442 +
  10.443 +    if ( unlikely(shadow_mode_translate(v->domain)) )
  10.444 +        return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn);
  10.445  
  10.446 -    return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn);
  10.447 +    /* Find this l1e and its enclosing l1mfn in the linear map */
  10.448 +    if ( __copy_from_user(&l2e, 
  10.449 +                          &__linear_l2_table[l2_linear_offset(addr)],
  10.450 +                          sizeof(l2_pgentry_t)) != 0 )
  10.451 +        return NULL;
  10.452 +    /* Check flags that it will be safe to read the l1e */
  10.453 +    if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) 
  10.454 +         != _PAGE_PRESENT )
  10.455 +        return NULL;
  10.456 +    *gl1mfn = l2e_get_pfn(l2e);
  10.457 +    return &__linear_l1_table[l1_linear_offset(addr)];
  10.458  }
  10.459  
  10.460 +/* Pull down the mapping we got from guest_map_l1e() */
  10.461  static inline void
  10.462  guest_unmap_l1e(struct vcpu *v, void *p)
  10.463  {
  10.464 @@ -487,6 +297,7 @@ guest_unmap_l1e(struct vcpu *v, void *p)
  10.465          unmap_domain_page(p);
  10.466  }
  10.467  
  10.468 +/* Read the guest's l1e that maps this address. */
  10.469  static inline void
  10.470  guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
  10.471  {
  10.472 @@ -503,6 +314,8 @@ guest_get_eff_l1e(struct vcpu *v, unsign
  10.473      v->arch.shadow.mode->guest_get_eff_l1e(v, addr, eff_l1e);
  10.474  }
  10.475  
  10.476 +/* Read the guest's l1e that maps this address, from the kernel-mode
  10.477 + * pagetables. */
  10.478  static inline void
  10.479  guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
  10.480  {
  10.481 @@ -518,82 +331,36 @@ guest_get_eff_kern_l1e(struct vcpu *v, u
  10.482      TOGGLE_MODE();
  10.483  }
  10.484  
  10.485 -
  10.486 -/* Validate a pagetable change from the guest and update the shadows. */
  10.487 -extern int shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
  10.488 -                                        void *new_guest_entry);
  10.489 -extern int __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 
  10.490 -                                         void *entry, u32 size);
  10.491 -
  10.492 -/* Update the shadows in response to a pagetable write from a HVM guest */
  10.493 -extern void shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 
  10.494 -                                            void *entry, u32 size);
  10.495 +/* Write a new value into the guest pagetable, and update the shadows 
  10.496 + * appropriately.  Returns 0 if we page-faulted, 1 for success. */
  10.497 +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p,
  10.498 +                             intpte_t new, mfn_t gmfn);
  10.499  
  10.500 -/* Remove all writeable mappings of a guest frame from the shadows.
  10.501 - * Returns non-zero if we need to flush TLBs. 
  10.502 - * level and fault_addr desribe how we found this to be a pagetable;
  10.503 - * level==0 means we have some other reason for revoking write access. */
  10.504 -extern int shadow_remove_write_access(struct vcpu *v, mfn_t readonly_mfn,
  10.505 -                                       unsigned int level,
  10.506 -                                       unsigned long fault_addr);
  10.507 +/* Cmpxchg a new value into the guest pagetable, and update the shadows 
  10.508 + * appropriately. Returns 0 if we page-faulted, 1 if not.
  10.509 + * N.B. caller should check the value of "old" to see if the
  10.510 + * cmpxchg itself was successful. */
  10.511 +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
  10.512 +                               intpte_t *old, intpte_t new, mfn_t gmfn);
  10.513  
  10.514 -/* Remove all mappings of the guest mfn from the shadows. 
  10.515 - * Returns non-zero if we need to flush TLBs. */
  10.516 -extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
  10.517 -
  10.518 -/* Remove all mappings from the shadows. */
  10.519 -extern void shadow_blow_tables(struct domain *d);
  10.520 -
  10.521 -void
  10.522 -shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn);
  10.523 -/* This is a HVM page that we thing is no longer a pagetable.
  10.524 - * Unshadow it, and recursively unshadow pages that reference it. */
  10.525 +/* Remove all mappings of the guest page from the shadows. 
  10.526 + * This is called from common code.  It does not flush TLBs. */
  10.527 +int sh_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
  10.528 +static inline void 
  10.529 +shadow_drop_references(struct domain *d, struct page_info *p)
  10.530 +{
  10.531 +    /* See the comment about locking in sh_remove_all_mappings */
  10.532 +    sh_remove_all_mappings(d->vcpu[0], _mfn(page_to_mfn(p)));
  10.533 +}
  10.534  
  10.535  /* Remove all shadows of the guest mfn. */
  10.536 -extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
  10.537 +void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
  10.538  static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
  10.539  {
  10.540 -    int was_locked = shadow_locked_by_me(v->domain);
  10.541 -    if ( !was_locked )
  10.542 -        shadow_lock(v->domain);
  10.543 -    sh_remove_shadows(v, gmfn, 0, 1);
  10.544 -    if ( !was_locked )
  10.545 -        shadow_unlock(v->domain);
  10.546 +    /* See the comment about locking in sh_remove_shadows */
  10.547 +    sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
  10.548  }
  10.549  
  10.550 -/* Add a page to a domain */
  10.551 -void
  10.552 -shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
  10.553 -                               unsigned long mfn);
  10.554 -
  10.555 -/* Remove a page from a domain */
  10.556 -void
  10.557 -shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
  10.558 -                                  unsigned long mfn);
  10.559 -
  10.560 -/* 
  10.561 - * Allocation of shadow pages 
  10.562 - */
  10.563 -
  10.564 -/* Return the minumum acceptable number of shadow pages a domain needs */
  10.565 -unsigned int shadow_min_acceptable_pages(struct domain *d);
  10.566 -
  10.567 -/* Set the pool of shadow pages to the required number of MB.
  10.568 - * Input will be rounded up to at least min_acceptable_shadow_pages().
  10.569 - * Returns 0 for success, 1 for failure. */
  10.570 -unsigned int shadow_set_allocation(struct domain *d, 
  10.571 -                                    unsigned int megabytes,
  10.572 -                                    int *preempted);
  10.573 -
  10.574 -/* Return the size of the shadow pool, rounded up to the nearest MB */
  10.575 -static inline unsigned int shadow_get_allocation(struct domain *d)
  10.576 -{
  10.577 -    unsigned int pg = d->arch.shadow.total_pages;
  10.578 -    return ((pg >> (20 - PAGE_SHIFT))
  10.579 -            + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
  10.580 -}
  10.581 -
  10.582 -
  10.583  /**************************************************************************/
  10.584  /* Guest physmap (p2m) support 
  10.585   *
  10.586 @@ -602,8 +369,19 @@ static inline unsigned int shadow_get_al
  10.587   * guests, so we steal the address space that would have normally
  10.588   * been used by the read-only MPT map.
  10.589   */
  10.590 +#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
  10.591  
  10.592 -#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
  10.593 +/* Add a page to a domain's p2m table */
  10.594 +void shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
  10.595 +                                   unsigned long mfn);
  10.596 +
  10.597 +/* Remove a page from a domain's p2m table */
  10.598 +void shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
  10.599 +                                      unsigned long mfn);
  10.600 +
  10.601 +/* Aliases, called from common code. */
  10.602 +#define guest_physmap_add_page    shadow_guest_physmap_add_page
  10.603 +#define guest_physmap_remove_page shadow_guest_physmap_remove_page
  10.604  
  10.605  /* Read the current domain's P2M table. */
  10.606  static inline mfn_t sh_gfn_to_mfn_current(unsigned long gfn)
  10.607 @@ -627,8 +405,8 @@ static inline mfn_t sh_gfn_to_mfn_curren
  10.608      return _mfn(INVALID_MFN);
  10.609  }
  10.610  
  10.611 -/* Walk another domain's P2M table, mapping pages as we go */
  10.612 -extern mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
  10.613 +/* Read another domain's P2M table, mapping pages as we go */
  10.614 +mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
  10.615  
  10.616  /* General conversion function from gfn to mfn */
  10.617  static inline mfn_t
  10.618 @@ -666,6 +444,7 @@ mmio_space(paddr_t gpa)
  10.619      return !mfn_valid(mfn_x(sh_gfn_to_mfn_current(gfn)));
  10.620  }
  10.621  
  10.622 +/* Translate the frame number held in an l1e from guest to machine */
  10.623  static inline l1_pgentry_t
  10.624  gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
  10.625  {
  10.626 @@ -685,4 +464,3 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr
  10.627   * indent-tabs-mode: nil
  10.628   * End:
  10.629   */
  10.630 -