ia64/xen-unstable
changeset 13141:c75d6f2aad7a
[XEN] Clean up the shadow interface
Remove a lot of unneccesary things from shadow.h, and move the shadow lock
entirely inside the shadow code.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
Remove a lot of unneccesary things from shadow.h, and move the shadow lock
entirely inside the shadow code.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author | Tim Deegan <Tim.Deegan@xensource.com> |
---|---|
date | Wed Dec 20 12:03:07 2006 +0000 (2006-12-20) |
parents | b258c7587d8d |
children | 988d3a63d9be |
files | xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/mm.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/multi.h xen/arch/x86/mm/shadow/private.h xen/arch/x86/mm/shadow/types.h xen/include/asm-x86/mm.h xen/include/asm-x86/shadow.h |
line diff
1.1 --- a/xen/arch/x86/domain.c Wed Dec 20 11:59:54 2006 +0000 1.2 +++ b/xen/arch/x86/domain.c Wed Dec 20 12:03:07 2006 +0000 1.3 @@ -172,10 +172,11 @@ int arch_domain_create(struct domain *d) 1.4 { 1.5 #ifdef __x86_64__ 1.6 struct page_info *pg; 1.7 + int i; 1.8 #endif 1.9 l1_pgentry_t gdt_l1e; 1.10 int vcpuid, pdpt_order; 1.11 - int i, rc = -ENOMEM; 1.12 + int rc = -ENOMEM; 1.13 1.14 pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t)); 1.15 d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order); 1.16 @@ -218,12 +219,7 @@ int arch_domain_create(struct domain *d) 1.17 1.18 #endif /* __x86_64__ */ 1.19 1.20 - shadow_lock_init(d); 1.21 - for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) 1.22 - INIT_LIST_HEAD(&d->arch.shadow.freelists[i]); 1.23 - INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist); 1.24 - INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse); 1.25 - INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows); 1.26 + shadow_domain_init(d); 1.27 1.28 if ( !is_idle_domain(d) ) 1.29 { 1.30 @@ -366,15 +362,6 @@ int arch_set_info_guest( 1.31 v->arch.guest_table = pagetable_from_pfn(cr3_pfn); 1.32 } 1.33 1.34 - /* Shadow: make sure the domain has enough shadow memory to 1.35 - * boot another vcpu */ 1.36 - if ( shadow_mode_enabled(d) 1.37 - && d->arch.shadow.total_pages < shadow_min_acceptable_pages(d) ) 1.38 - { 1.39 - destroy_gdt(v); 1.40 - return -ENOMEM; 1.41 - } 1.42 - 1.43 if ( v->vcpu_id == 0 ) 1.44 update_domain_wallclock_time(d); 1.45
2.1 --- a/xen/arch/x86/domain_build.c Wed Dec 20 11:59:54 2006 +0000 2.2 +++ b/xen/arch/x86/domain_build.c Wed Dec 20 12:03:07 2006 +0000 2.3 @@ -827,7 +827,7 @@ int construct_dom0(struct domain *d, 2.4 regs->eflags = X86_EFLAGS_IF; 2.5 2.6 if ( opt_dom0_shadow ) 2.7 - if ( shadow_test_enable(d) == 0 ) 2.8 + if ( shadow_enable(d, SHM2_enable) == 0 ) 2.9 shadow_update_paging_modes(v); 2.10 2.11 if ( supervisor_mode_kernel )
3.1 --- a/xen/arch/x86/mm.c Wed Dec 20 11:59:54 2006 +0000 3.2 +++ b/xen/arch/x86/mm.c Wed Dec 20 12:03:07 2006 +0000 3.3 @@ -365,6 +365,38 @@ void write_ptbase(struct vcpu *v) 3.4 write_cr3(v->arch.cr3); 3.5 } 3.6 3.7 +/* Should be called after CR3 is updated. 3.8 + * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3. 3.9 + * 3.10 + * Also updates other state derived from CR3 (vcpu->arch.guest_vtable, 3.11 + * shadow_vtable, etc). 3.12 + * 3.13 + * Uses values found in vcpu->arch.(guest_table and guest_table_user), and 3.14 + * for HVM guests, arch.monitor_table and hvm's guest CR3. 3.15 + * 3.16 + * Update ref counts to shadow tables appropriately. 3.17 + */ 3.18 +void update_cr3(struct vcpu *v) 3.19 +{ 3.20 + unsigned long cr3_mfn=0; 3.21 + 3.22 + if ( shadow_mode_enabled(v->domain) ) 3.23 + { 3.24 + shadow_update_cr3(v); 3.25 + return; 3.26 + } 3.27 + 3.28 +#if CONFIG_PAGING_LEVELS == 4 3.29 + if ( !(v->arch.flags & TF_kernel_mode) ) 3.30 + cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user); 3.31 + else 3.32 +#endif 3.33 + cr3_mfn = pagetable_get_pfn(v->arch.guest_table); 3.34 + 3.35 + make_cr3(v, cr3_mfn); 3.36 +} 3.37 + 3.38 + 3.39 void invalidate_shadow_ldt(struct vcpu *v) 3.40 { 3.41 int i; 3.42 @@ -1160,53 +1192,57 @@ static void free_l4_table(struct page_in 3.43 3.44 #endif 3.45 3.46 -static inline int update_l1e(l1_pgentry_t *pl1e, 3.47 - l1_pgentry_t ol1e, 3.48 - l1_pgentry_t nl1e, 3.49 - unsigned long gl1mfn, 3.50 - struct vcpu *v) 3.51 + 3.52 +/* How to write an entry to the guest pagetables. 3.53 + * Returns 0 for failure (pointer not valid), 1 for success. */ 3.54 +static inline int update_intpte(intpte_t *p, 3.55 + intpte_t old, 3.56 + intpte_t new, 3.57 + unsigned long mfn, 3.58 + struct vcpu *v) 3.59 { 3.60 int rv = 1; 3.61 +#ifndef PTE_UPDATE_WITH_CMPXCHG 3.62 if ( unlikely(shadow_mode_enabled(v->domain)) ) 3.63 - shadow_lock(v->domain); 3.64 -#ifndef PTE_UPDATE_WITH_CMPXCHG 3.65 - rv = (!__copy_to_user(pl1e, &nl1e, sizeof(nl1e))); 3.66 + rv = shadow_write_guest_entry(v, p, new, _mfn(mfn)); 3.67 + else 3.68 + rv = (!__copy_to_user(p, &new, sizeof(new))); 3.69 #else 3.70 { 3.71 - intpte_t o = l1e_get_intpte(ol1e); 3.72 - intpte_t n = l1e_get_intpte(nl1e); 3.73 - 3.74 + intpte_t t = old; 3.75 for ( ; ; ) 3.76 { 3.77 - if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ) 3.78 + if ( unlikely(shadow_mode_enabled(v->domain)) ) 3.79 + rv = shadow_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn)); 3.80 + else 3.81 + rv = (!cmpxchg_user(p, t, new)); 3.82 + 3.83 + if ( unlikely(rv == 0) ) 3.84 { 3.85 MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte 3.86 - ": saw %" PRIpte, 3.87 - l1e_get_intpte(ol1e), 3.88 - l1e_get_intpte(nl1e), 3.89 - o); 3.90 - rv = 0; 3.91 + ": saw %" PRIpte, old, new, t); 3.92 break; 3.93 } 3.94 3.95 - if ( o == l1e_get_intpte(ol1e) ) 3.96 + if ( t == old ) 3.97 break; 3.98 3.99 /* Allowed to change in Accessed/Dirty flags only. */ 3.100 - BUG_ON((o ^ l1e_get_intpte(ol1e)) & 3.101 - ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); 3.102 - ol1e = l1e_from_intpte(o); 3.103 + BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY)); 3.104 + 3.105 + old = t; 3.106 } 3.107 } 3.108 #endif 3.109 - if ( unlikely(shadow_mode_enabled(v->domain)) && rv ) 3.110 - { 3.111 - shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e); 3.112 - shadow_unlock(v->domain); 3.113 - } 3.114 return rv; 3.115 } 3.116 3.117 +/* Macro that wraps the appropriate type-changes around update_intpte(). 3.118 + * Arguments are: type, ptr, old, new, mfn, vcpu */ 3.119 +#define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v) \ 3.120 + update_intpte((intpte_t *)(_p), \ 3.121 + _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \ 3.122 + (_m), (_v)) 3.123 3.124 /* Update the L1 entry at pl1e to new value nl1e. */ 3.125 static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, 3.126 @@ -1219,7 +1255,7 @@ static int mod_l1_entry(l1_pgentry_t *pl 3.127 return 0; 3.128 3.129 if ( unlikely(shadow_mode_refcounts(d)) ) 3.130 - return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current); 3.131 + return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current); 3.132 3.133 if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) 3.134 { 3.135 @@ -1238,12 +1274,12 @@ static int mod_l1_entry(l1_pgentry_t *pl 3.136 3.137 /* Fast path for identical mapping, r/w and presence. */ 3.138 if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) 3.139 - return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current); 3.140 + return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current); 3.141 3.142 if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) ) 3.143 return 0; 3.144 3.145 - if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) ) 3.146 + if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) ) 3.147 { 3.148 put_page_from_l1e(nl1e, d); 3.149 return 0; 3.150 @@ -1251,7 +1287,7 @@ static int mod_l1_entry(l1_pgentry_t *pl 3.151 } 3.152 else 3.153 { 3.154 - if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) ) 3.155 + if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) ) 3.156 return 0; 3.157 } 3.158 3.159 @@ -1259,36 +1295,6 @@ static int mod_l1_entry(l1_pgentry_t *pl 3.160 return 1; 3.161 } 3.162 3.163 -#ifndef PTE_UPDATE_WITH_CMPXCHG 3.164 -#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; }) 3.165 -#else 3.166 -#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ \ 3.167 - for ( ; ; ) \ 3.168 - { \ 3.169 - intpte_t __o = cmpxchg((intpte_t *)(_p), \ 3.170 - _t ## e_get_intpte(_o), \ 3.171 - _t ## e_get_intpte(_n)); \ 3.172 - if ( __o == _t ## e_get_intpte(_o) ) \ 3.173 - break; \ 3.174 - /* Allowed to change in Accessed/Dirty flags only. */ \ 3.175 - BUG_ON((__o ^ _t ## e_get_intpte(_o)) & \ 3.176 - ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); \ 3.177 - _o = _t ## e_from_intpte(__o); \ 3.178 - } \ 3.179 - 1; }) 3.180 -#endif 3.181 -#define UPDATE_ENTRY(_t,_p,_o,_n,_m) ({ \ 3.182 - int rv; \ 3.183 - if ( unlikely(shadow_mode_enabled(current->domain)) ) \ 3.184 - shadow_lock(current->domain); \ 3.185 - rv = _UPDATE_ENTRY(_t, _p, _o, _n); \ 3.186 - if ( unlikely(shadow_mode_enabled(current->domain)) ) \ 3.187 - { \ 3.188 - shadow_validate_guest_entry(current, _mfn(_m), (_p)); \ 3.189 - shadow_unlock(current->domain); \ 3.190 - } \ 3.191 - rv; \ 3.192 -}) 3.193 3.194 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */ 3.195 static int mod_l2_entry(l2_pgentry_t *pl2e, 3.196 @@ -1320,18 +1326,18 @@ static int mod_l2_entry(l2_pgentry_t *pl 3.197 3.198 /* Fast path for identical mapping and presence. */ 3.199 if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT)) 3.200 - return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn); 3.201 + return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current); 3.202 3.203 if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain)) ) 3.204 return 0; 3.205 3.206 - if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) ) 3.207 + if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) ) 3.208 { 3.209 put_page_from_l2e(nl2e, pfn); 3.210 return 0; 3.211 } 3.212 } 3.213 - else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) ) 3.214 + else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) ) 3.215 { 3.216 return 0; 3.217 } 3.218 @@ -1381,18 +1387,18 @@ static int mod_l3_entry(l3_pgentry_t *pl 3.219 3.220 /* Fast path for identical mapping and presence. */ 3.221 if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT)) 3.222 - return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn); 3.223 + return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current); 3.224 3.225 if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) ) 3.226 return 0; 3.227 3.228 - if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) ) 3.229 + if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) ) 3.230 { 3.231 put_page_from_l3e(nl3e, pfn); 3.232 return 0; 3.233 } 3.234 } 3.235 - else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) ) 3.236 + else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) ) 3.237 { 3.238 return 0; 3.239 } 3.240 @@ -1439,18 +1445,18 @@ static int mod_l4_entry(l4_pgentry_t *pl 3.241 3.242 /* Fast path for identical mapping and presence. */ 3.243 if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT)) 3.244 - return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn); 3.245 + return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current); 3.246 3.247 if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) ) 3.248 return 0; 3.249 3.250 - if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) ) 3.251 + if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) ) 3.252 { 3.253 put_page_from_l4e(nl4e, pfn); 3.254 return 0; 3.255 } 3.256 } 3.257 - else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) ) 3.258 + else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) ) 3.259 { 3.260 return 0; 3.261 } 3.262 @@ -2292,15 +2298,11 @@ int do_mmu_update( 3.263 break; 3.264 3.265 if ( unlikely(shadow_mode_enabled(d)) ) 3.266 - shadow_lock(d); 3.267 - 3.268 - *(intpte_t *)va = req.val; 3.269 - okay = 1; 3.270 - 3.271 - if ( unlikely(shadow_mode_enabled(d)) ) 3.272 + okay = shadow_write_guest_entry(v, va, req.val, _mfn(mfn)); 3.273 + else 3.274 { 3.275 - shadow_validate_guest_entry(v, _mfn(mfn), va); 3.276 - shadow_unlock(d); 3.277 + *(intpte_t *)va = req.val; 3.278 + okay = 1; 3.279 } 3.280 3.281 put_page_type(page); 3.282 @@ -2409,7 +2411,7 @@ static int create_grant_pte_mapping( 3.283 } 3.284 3.285 ol1e = *(l1_pgentry_t *)va; 3.286 - if ( !update_l1e(va, ol1e, nl1e, mfn, v) ) 3.287 + if ( !UPDATE_ENTRY(l1, va, ol1e, nl1e, mfn, v) ) 3.288 { 3.289 put_page_type(page); 3.290 rc = GNTST_general_error; 3.291 @@ -2477,7 +2479,7 @@ static int destroy_grant_pte_mapping( 3.292 } 3.293 3.294 /* Delete pagetable entry. */ 3.295 - if ( unlikely(!update_l1e( 3.296 + if ( unlikely(!UPDATE_ENTRY(l1, 3.297 (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn, 3.298 d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) ) 3.299 { 3.300 @@ -2515,7 +2517,7 @@ static int create_grant_va_mapping( 3.301 return GNTST_general_error; 3.302 } 3.303 ol1e = *pl1e; 3.304 - okay = update_l1e(pl1e, ol1e, nl1e, gl1mfn, v); 3.305 + okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v); 3.306 guest_unmap_l1e(v, pl1e); 3.307 pl1e = NULL; 3.308 3.309 @@ -2553,7 +2555,7 @@ static int destroy_grant_va_mapping( 3.310 } 3.311 3.312 /* Delete pagetable entry. */ 3.313 - if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), gl1mfn, v)) ) 3.314 + if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) ) 3.315 { 3.316 MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); 3.317 rc = GNTST_general_error; 3.318 @@ -2952,16 +2954,6 @@ long arch_memory_op(int op, XEN_GUEST_HA 3.319 3.320 UNLOCK_BIGLOCK(d); 3.321 3.322 - /* If we're doing FAST_FAULT_PATH, then shadow mode may have 3.323 - cached the fact that this is an mmio region in the shadow 3.324 - page tables. Blow the tables away to remove the cache. 3.325 - This is pretty heavy handed, but this is a rare operation 3.326 - (it might happen a dozen times during boot and then never 3.327 - again), so it doesn't matter too much. */ 3.328 - shadow_lock(d); 3.329 - shadow_blow_tables(d); 3.330 - shadow_unlock(d); 3.331 - 3.332 put_domain(d); 3.333 3.334 break; 3.335 @@ -3188,27 +3180,30 @@ static int ptwr_emulated_update( 3.336 pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK)); 3.337 if ( do_cmpxchg ) 3.338 { 3.339 - if ( shadow_mode_enabled(d) ) 3.340 - shadow_lock(d); 3.341 + int okay; 3.342 ol1e = l1e_from_intpte(old); 3.343 - if ( cmpxchg((intpte_t *)pl1e, old, val) != old ) 3.344 + 3.345 + if ( shadow_mode_enabled(d) ) 3.346 { 3.347 - if ( shadow_mode_enabled(d) ) 3.348 - shadow_unlock(d); 3.349 + intpte_t t = old; 3.350 + okay = shadow_cmpxchg_guest_entry(v, (intpte_t *) pl1e, 3.351 + &t, val, _mfn(mfn)); 3.352 + okay = (okay && t == old); 3.353 + } 3.354 + else 3.355 + okay = (cmpxchg((intpte_t *)pl1e, old, val) == old); 3.356 + 3.357 + if ( !okay ) 3.358 + { 3.359 unmap_domain_page(pl1e); 3.360 put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d); 3.361 return X86EMUL_CMPXCHG_FAILED; 3.362 } 3.363 - if ( unlikely(shadow_mode_enabled(d)) ) 3.364 - { 3.365 - shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e); 3.366 - shadow_unlock(d); 3.367 - } 3.368 } 3.369 else 3.370 { 3.371 ol1e = *pl1e; 3.372 - if ( !update_l1e(pl1e, ol1e, nl1e, page_to_mfn(page), v) ) 3.373 + if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, page_to_mfn(page), v) ) 3.374 BUG(); 3.375 } 3.376
4.1 --- a/xen/arch/x86/mm/shadow/common.c Wed Dec 20 11:59:54 2006 +0000 4.2 +++ b/xen/arch/x86/mm/shadow/common.c Wed Dec 20 12:03:07 2006 +0000 4.3 @@ -38,6 +38,21 @@ 4.4 #include <asm/shadow.h> 4.5 #include "private.h" 4.6 4.7 + 4.8 +/* Set up the shadow-specific parts of a domain struct at start of day. 4.9 + * Called for every domain from arch_domain_create() */ 4.10 +void shadow_domain_init(struct domain *d) 4.11 +{ 4.12 + int i; 4.13 + shadow_lock_init(d); 4.14 + for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) 4.15 + INIT_LIST_HEAD(&d->arch.shadow.freelists[i]); 4.16 + INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist); 4.17 + INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse); 4.18 + INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows); 4.19 +} 4.20 + 4.21 + 4.22 #if SHADOW_AUDIT 4.23 int shadow_audit_enable = 0; 4.24 4.25 @@ -434,7 +449,7 @@ void shadow_promote(struct vcpu *v, mfn_ 4.26 ASSERT(mfn_valid(gmfn)); 4.27 4.28 /* We should never try to promote a gmfn that has writeable mappings */ 4.29 - ASSERT(shadow_remove_write_access(v, gmfn, 0, 0) == 0); 4.30 + ASSERT(sh_remove_write_access(v, gmfn, 0, 0) == 0); 4.31 4.32 /* Is the page already shadowed? */ 4.33 if ( !test_and_set_bit(_PGC_page_table, &page->count_info) ) 4.34 @@ -466,8 +481,7 @@ void shadow_demote(struct vcpu *v, mfn_t 4.35 * Returns a bitmask of SHADOW_SET_* flags. */ 4.36 4.37 int 4.38 -__shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 4.39 - void *entry, u32 size) 4.40 +sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size) 4.41 { 4.42 int result = 0; 4.43 struct page_info *page = mfn_to_page(gmfn); 4.44 @@ -546,22 +560,9 @@ int 4.45 } 4.46 4.47 4.48 -int 4.49 -shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry) 4.50 -/* This is the entry point from hypercalls. It returns a bitmask of all the 4.51 - * results of shadow_set_l*e() calls, so the caller knows to do TLB flushes. */ 4.52 -{ 4.53 - int rc; 4.54 - 4.55 - ASSERT(shadow_locked_by_me(v->domain)); 4.56 - rc = __shadow_validate_guest_entry(v, gmfn, entry, sizeof(l1_pgentry_t)); 4.57 - shadow_audit_tables(v); 4.58 - return rc; 4.59 -} 4.60 - 4.61 void 4.62 -shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 4.63 - void *entry, u32 size) 4.64 +sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 4.65 + void *entry, u32 size) 4.66 /* This is the entry point for emulated writes to pagetables in HVM guests and 4.67 * PV translated guests. 4.68 */ 4.69 @@ -570,7 +571,7 @@ shadow_validate_guest_pt_write(struct vc 4.70 int rc; 4.71 4.72 ASSERT(shadow_locked_by_me(v->domain)); 4.73 - rc = __shadow_validate_guest_entry(v, gmfn, entry, size); 4.74 + rc = sh_validate_guest_entry(v, gmfn, entry, size); 4.75 if ( rc & SHADOW_SET_FLUSH ) 4.76 /* Need to flush TLBs to pick up shadow PT changes */ 4.77 flush_tlb_mask(d->domain_dirty_cpumask); 4.78 @@ -585,6 +586,38 @@ shadow_validate_guest_pt_write(struct vc 4.79 } 4.80 } 4.81 4.82 +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, 4.83 + intpte_t new, mfn_t gmfn) 4.84 +/* Write a new value into the guest pagetable, and update the shadows 4.85 + * appropriately. Returns 0 if we page-faulted, 1 for success. */ 4.86 +{ 4.87 + int failed; 4.88 + shadow_lock(v->domain); 4.89 + failed = __copy_to_user(p, &new, sizeof(new)); 4.90 + if ( failed != sizeof(new) ) 4.91 + sh_validate_guest_entry(v, gmfn, p, sizeof(new)); 4.92 + shadow_unlock(v->domain); 4.93 + return (failed == 0); 4.94 +} 4.95 + 4.96 +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, 4.97 + intpte_t *old, intpte_t new, mfn_t gmfn) 4.98 +/* Cmpxchg a new value into the guest pagetable, and update the shadows 4.99 + * appropriately. Returns 0 if we page-faulted, 1 if not. 4.100 + * N.B. caller should check the value of "old" to see if the 4.101 + * cmpxchg itself was successful. */ 4.102 +{ 4.103 + int failed; 4.104 + intpte_t t = *old; 4.105 + shadow_lock(v->domain); 4.106 + failed = cmpxchg_user(p, t, new); 4.107 + if ( t == *old ) 4.108 + sh_validate_guest_entry(v, gmfn, p, sizeof(new)); 4.109 + *old = t; 4.110 + shadow_unlock(v->domain); 4.111 + return (failed == 0); 4.112 +} 4.113 + 4.114 4.115 /**************************************************************************/ 4.116 /* Memory management for shadow pages. */ 4.117 @@ -791,7 +824,7 @@ void shadow_prealloc(struct domain *d, u 4.118 4.119 /* Deliberately free all the memory we can: this will tear down all of 4.120 * this domain's shadows */ 4.121 -void shadow_blow_tables(struct domain *d) 4.122 +static void shadow_blow_tables(struct domain *d) 4.123 { 4.124 struct list_head *l, *t; 4.125 struct shadow_page_info *sp; 4.126 @@ -989,7 +1022,7 @@ void shadow_free(struct domain *d, mfn_t 4.127 * Also, we only ever allocate a max-order chunk, so as to preserve 4.128 * the invariant that shadow_prealloc() always works. 4.129 * Returns 0 iff it can't get a chunk (the caller should then 4.130 - * free up some pages in domheap and call set_sh_allocation); 4.131 + * free up some pages in domheap and call sh_set_allocation); 4.132 * returns non-zero on success. 4.133 */ 4.134 static int 4.135 @@ -1149,14 +1182,14 @@ p2m_next_level(struct domain *d, mfn_t * 4.136 if ( pagetable_get_pfn(v->arch.guest_table) 4.137 == pagetable_get_pfn(d->arch.phys_table) 4.138 && v->arch.shadow.mode != NULL ) 4.139 - v->arch.shadow.mode->update_cr3(v); 4.140 + v->arch.shadow.mode->update_cr3(v, 0); 4.141 } 4.142 } 4.143 #endif 4.144 /* The P2M can be shadowed: keep the shadows synced */ 4.145 if ( d->vcpu[0] != NULL ) 4.146 - (void)__shadow_validate_guest_entry(d->vcpu[0], *table_mfn, 4.147 - p2m_entry, sizeof *p2m_entry); 4.148 + (void)sh_validate_guest_entry(d->vcpu[0], *table_mfn, 4.149 + p2m_entry, sizeof *p2m_entry); 4.150 } 4.151 *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); 4.152 next = sh_map_domain_page(*table_mfn); 4.153 @@ -1216,8 +1249,8 @@ shadow_set_p2m_entry(struct domain *d, u 4.154 4.155 /* The P2M can be shadowed: keep the shadows synced */ 4.156 if ( d->vcpu[0] != NULL ) 4.157 - (void)__shadow_validate_guest_entry( 4.158 - d->vcpu[0], table_mfn, p2m_entry, sizeof(*p2m_entry)); 4.159 + (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, 4.160 + p2m_entry, sizeof(*p2m_entry)); 4.161 4.162 /* Success */ 4.163 rv = 1; 4.164 @@ -1427,9 +1460,9 @@ static void shadow_p2m_teardown(struct d 4.165 * Input will be rounded up to at least shadow_min_acceptable_pages(), 4.166 * plus space for the p2m table. 4.167 * Returns 0 for success, non-zero for failure. */ 4.168 -static unsigned int set_sh_allocation(struct domain *d, 4.169 - unsigned int pages, 4.170 - int *preempted) 4.171 +static unsigned int sh_set_allocation(struct domain *d, 4.172 + unsigned int pages, 4.173 + int *preempted) 4.174 { 4.175 struct shadow_page_info *sp; 4.176 unsigned int lower_bound; 4.177 @@ -1499,20 +1532,12 @@ static unsigned int set_sh_allocation(st 4.178 return 0; 4.179 } 4.180 4.181 -unsigned int shadow_set_allocation(struct domain *d, 4.182 - unsigned int megabytes, 4.183 - int *preempted) 4.184 -/* Hypercall interface to set the shadow memory allocation */ 4.185 +/* Return the size of the shadow pool, rounded up to the nearest MB */ 4.186 +static unsigned int shadow_get_allocation(struct domain *d) 4.187 { 4.188 - unsigned int rv; 4.189 - shadow_lock(d); 4.190 - rv = set_sh_allocation(d, megabytes << (20 - PAGE_SHIFT), preempted); 4.191 - SHADOW_PRINTK("dom %u allocation now %u pages (%u MB)\n", 4.192 - d->domain_id, 4.193 - d->arch.shadow.total_pages, 4.194 - shadow_get_allocation(d)); 4.195 - shadow_unlock(d); 4.196 - return rv; 4.197 + unsigned int pg = d->arch.shadow.total_pages; 4.198 + return ((pg >> (20 - PAGE_SHIFT)) 4.199 + + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); 4.200 } 4.201 4.202 /**************************************************************************/ 4.203 @@ -1889,24 +1914,24 @@ void sh_destroy_shadow(struct vcpu *v, m 4.204 * level and fault_addr desribe how we found this to be a pagetable; 4.205 * level==0 means we have some other reason for revoking write access.*/ 4.206 4.207 -int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, 4.208 - unsigned int level, 4.209 - unsigned long fault_addr) 4.210 +int sh_remove_write_access(struct vcpu *v, mfn_t gmfn, 4.211 + unsigned int level, 4.212 + unsigned long fault_addr) 4.213 { 4.214 /* Dispatch table for getting per-type functions */ 4.215 static hash_callback_t callbacks[16] = { 4.216 NULL, /* none */ 4.217 #if CONFIG_PAGING_LEVELS == 2 4.218 - SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* l1_32 */ 4.219 - SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* fl1_32 */ 4.220 + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* l1_32 */ 4.221 + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* fl1_32 */ 4.222 #else 4.223 - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* l1_32 */ 4.224 - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* fl1_32 */ 4.225 + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* l1_32 */ 4.226 + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* fl1_32 */ 4.227 #endif 4.228 NULL, /* l2_32 */ 4.229 #if CONFIG_PAGING_LEVELS >= 3 4.230 - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* l1_pae */ 4.231 - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* fl1_pae */ 4.232 + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* l1_pae */ 4.233 + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* fl1_pae */ 4.234 #else 4.235 NULL, /* l1_pae */ 4.236 NULL, /* fl1_pae */ 4.237 @@ -1914,8 +1939,8 @@ int shadow_remove_write_access(struct vc 4.238 NULL, /* l2_pae */ 4.239 NULL, /* l2h_pae */ 4.240 #if CONFIG_PAGING_LEVELS >= 4 4.241 - SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64 */ 4.242 - SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64 */ 4.243 + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* l1_64 */ 4.244 + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* fl1_64 */ 4.245 #else 4.246 NULL, /* l1_64 */ 4.247 NULL, /* fl1_64 */ 4.248 @@ -2077,25 +2102,25 @@ int shadow_remove_write_access(struct vc 4.249 /* Remove all mappings of a guest frame from the shadow tables. 4.250 * Returns non-zero if we need to flush TLBs. */ 4.251 4.252 -int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn) 4.253 +int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn) 4.254 { 4.255 struct page_info *page = mfn_to_page(gmfn); 4.256 - int expected_count; 4.257 + int expected_count, do_locking; 4.258 4.259 /* Dispatch table for getting per-type functions */ 4.260 static hash_callback_t callbacks[16] = { 4.261 NULL, /* none */ 4.262 #if CONFIG_PAGING_LEVELS == 2 4.263 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* l1_32 */ 4.264 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* fl1_32 */ 4.265 + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* l1_32 */ 4.266 + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* fl1_32 */ 4.267 #else 4.268 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* l1_32 */ 4.269 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* fl1_32 */ 4.270 + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* l1_32 */ 4.271 + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* fl1_32 */ 4.272 #endif 4.273 NULL, /* l2_32 */ 4.274 #if CONFIG_PAGING_LEVELS >= 3 4.275 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* l1_pae */ 4.276 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* fl1_pae */ 4.277 + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* l1_pae */ 4.278 + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* fl1_pae */ 4.279 #else 4.280 NULL, /* l1_pae */ 4.281 NULL, /* fl1_pae */ 4.282 @@ -2103,8 +2128,8 @@ int shadow_remove_all_mappings(struct vc 4.283 NULL, /* l2_pae */ 4.284 NULL, /* l2h_pae */ 4.285 #if CONFIG_PAGING_LEVELS >= 4 4.286 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64 */ 4.287 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64 */ 4.288 + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* l1_64 */ 4.289 + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* fl1_64 */ 4.290 #else 4.291 NULL, /* l1_64 */ 4.292 NULL, /* fl1_64 */ 4.293 @@ -2129,7 +2154,12 @@ int shadow_remove_all_mappings(struct vc 4.294 if ( (page->count_info & PGC_count_mask) == 0 ) 4.295 return 0; 4.296 4.297 - ASSERT(shadow_locked_by_me(v->domain)); 4.298 + /* Although this is an externally visible function, we do not know 4.299 + * whether the shadow lock will be held when it is called (since it 4.300 + * can be called via put_page_type when we clear a shadow l1e). 4.301 + * If the lock isn't held, take it for the duration of the call. */ 4.302 + do_locking = !shadow_locked_by_me(v->domain); 4.303 + if ( do_locking ) shadow_lock(v->domain); 4.304 4.305 /* XXX TODO: 4.306 * Heuristics for finding the (probably) single mapping of this gmfn */ 4.307 @@ -2154,6 +2184,8 @@ int shadow_remove_all_mappings(struct vc 4.308 } 4.309 } 4.310 4.311 + if ( do_locking ) shadow_unlock(v->domain); 4.312 + 4.313 /* We killed at least one mapping, so must flush TLBs. */ 4.314 return 1; 4.315 } 4.316 @@ -2236,9 +2268,10 @@ void sh_remove_shadows(struct vcpu *v, m 4.317 * (all != 0 implies fast == 0) 4.318 */ 4.319 { 4.320 - struct page_info *pg; 4.321 + struct page_info *pg = mfn_to_page(gmfn); 4.322 mfn_t smfn; 4.323 u32 sh_flags; 4.324 + int do_locking; 4.325 unsigned char t; 4.326 4.327 /* Dispatch table for getting per-type functions: each level must 4.328 @@ -2296,15 +2329,19 @@ void sh_remove_shadows(struct vcpu *v, m 4.329 0 /* unused */ 4.330 }; 4.331 4.332 - ASSERT(shadow_locked_by_me(v->domain)); 4.333 ASSERT(!(all && fast)); 4.334 4.335 - pg = mfn_to_page(gmfn); 4.336 - 4.337 /* Bail out now if the page is not shadowed */ 4.338 if ( (pg->count_info & PGC_page_table) == 0 ) 4.339 return; 4.340 4.341 + /* Although this is an externally visible function, we do not know 4.342 + * whether the shadow lock will be held when it is called (since it 4.343 + * can be called via put_page_type when we clear a shadow l1e). 4.344 + * If the lock isn't held, take it for the duration of the call. */ 4.345 + do_locking = !shadow_locked_by_me(v->domain); 4.346 + if ( do_locking ) shadow_lock(v->domain); 4.347 + 4.348 SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n", 4.349 v->domain->domain_id, v->vcpu_id, mfn_x(gmfn)); 4.350 4.351 @@ -2356,14 +2393,16 @@ void sh_remove_shadows(struct vcpu *v, m 4.352 /* Need to flush TLBs now, so that linear maps are safe next time we 4.353 * take a fault. */ 4.354 flush_tlb_mask(v->domain->domain_dirty_cpumask); 4.355 + 4.356 + if ( do_locking ) shadow_unlock(v->domain); 4.357 } 4.358 4.359 -void 4.360 -shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn) 4.361 +static void 4.362 +sh_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn) 4.363 /* Even harsher: this is a HVM page that we thing is no longer a pagetable. 4.364 * Unshadow it, and recursively unshadow pages that reference it. */ 4.365 { 4.366 - shadow_remove_all_shadows(v, gmfn); 4.367 + sh_remove_shadows(v, gmfn, 0, 1); 4.368 /* XXX TODO: 4.369 * Rework this hashtable walker to return a linked-list of all 4.370 * the shadows it modified, then do breadth-first recursion 4.371 @@ -2376,7 +2415,7 @@ shadow_remove_all_shadows_and_parents(st 4.372 4.373 /**************************************************************************/ 4.374 4.375 -void sh_update_paging_modes(struct vcpu *v) 4.376 +static void sh_update_paging_modes(struct vcpu *v) 4.377 { 4.378 struct domain *d = v->domain; 4.379 struct shadow_paging_mode *old_mode = v->arch.shadow.mode; 4.380 @@ -2394,7 +2433,8 @@ void sh_update_paging_modes(struct vcpu 4.381 4.382 // First, tear down any old shadow tables held by this vcpu. 4.383 // 4.384 - shadow_detach_old_tables(v); 4.385 + if ( v->arch.shadow.mode ) 4.386 + v->arch.shadow.mode->detach_old_tables(v); 4.387 4.388 if ( !is_hvm_domain(d) ) 4.389 { 4.390 @@ -2402,10 +2442,9 @@ void sh_update_paging_modes(struct vcpu 4.391 /// PV guest 4.392 /// 4.393 #if CONFIG_PAGING_LEVELS == 4 4.394 - if ( pv_32bit_guest(v) ) 4.395 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,3); 4.396 - else 4.397 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); 4.398 + /* When 32-on-64 PV guests are supported, they must choose 4.399 + * a different mode here */ 4.400 + v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); 4.401 #elif CONFIG_PAGING_LEVELS == 3 4.402 v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 4.403 #elif CONFIG_PAGING_LEVELS == 2 4.404 @@ -2493,7 +2532,7 @@ void sh_update_paging_modes(struct vcpu 4.405 4.406 if ( pagetable_is_null(v->arch.monitor_table) ) 4.407 { 4.408 - mfn_t mmfn = shadow_make_monitor_table(v); 4.409 + mfn_t mmfn = v->arch.shadow.mode->make_monitor_table(v); 4.410 v->arch.monitor_table = pagetable_from_mfn(mmfn); 4.411 make_cr3(v, mfn_x(mmfn)); 4.412 hvm_update_host_cr3(v); 4.413 @@ -2528,7 +2567,7 @@ void sh_update_paging_modes(struct vcpu 4.414 4.415 old_mfn = pagetable_get_mfn(v->arch.monitor_table); 4.416 v->arch.monitor_table = pagetable_null(); 4.417 - new_mfn = v->arch.shadow.mode->make_monitor_table(v); 4.418 + new_mfn = v->arch.shadow.mode->make_monitor_table(v); 4.419 v->arch.monitor_table = pagetable_from_mfn(new_mfn); 4.420 SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n", 4.421 mfn_x(new_mfn)); 4.422 @@ -2549,7 +2588,14 @@ void sh_update_paging_modes(struct vcpu 4.423 // This *does* happen, at least for CR4.PGE... 4.424 } 4.425 4.426 - v->arch.shadow.mode->update_cr3(v); 4.427 + v->arch.shadow.mode->update_cr3(v, 0); 4.428 +} 4.429 + 4.430 +void shadow_update_paging_modes(struct vcpu *v) 4.431 +{ 4.432 + shadow_lock(v->domain); 4.433 + sh_update_paging_modes(v); 4.434 + shadow_unlock(v->domain); 4.435 } 4.436 4.437 /**************************************************************************/ 4.438 @@ -2610,9 +2656,9 @@ int shadow_enable(struct domain *d, u32 4.439 /* Init the shadow memory allocation if the user hasn't done so */ 4.440 old_pages = d->arch.shadow.total_pages; 4.441 if ( old_pages == 0 ) 4.442 - if ( set_sh_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */ 4.443 + if ( sh_set_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */ 4.444 { 4.445 - set_sh_allocation(d, 0, NULL); 4.446 + sh_set_allocation(d, 0, NULL); 4.447 rv = -ENOMEM; 4.448 goto out; 4.449 } 4.450 @@ -2620,7 +2666,7 @@ int shadow_enable(struct domain *d, u32 4.451 /* Init the hash table */ 4.452 if ( shadow_hash_alloc(d) != 0 ) 4.453 { 4.454 - set_sh_allocation(d, old_pages, NULL); 4.455 + sh_set_allocation(d, old_pages, NULL); 4.456 rv = -ENOMEM; 4.457 goto out; 4.458 } 4.459 @@ -2630,7 +2676,7 @@ int shadow_enable(struct domain *d, u32 4.460 if ( !shadow_alloc_p2m_table(d) ) 4.461 { 4.462 shadow_hash_teardown(d); 4.463 - set_sh_allocation(d, old_pages, NULL); 4.464 + sh_set_allocation(d, old_pages, NULL); 4.465 shadow_p2m_teardown(d); 4.466 rv = -ENOMEM; 4.467 goto out; 4.468 @@ -2669,13 +2715,16 @@ void shadow_teardown(struct domain *d) 4.469 /* Release the shadow and monitor tables held by each vcpu */ 4.470 for_each_vcpu(d, v) 4.471 { 4.472 - shadow_detach_old_tables(v); 4.473 - if ( shadow_mode_external(d) ) 4.474 + if ( v->arch.shadow.mode ) 4.475 { 4.476 - mfn = pagetable_get_mfn(v->arch.monitor_table); 4.477 - if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) 4.478 - shadow_destroy_monitor_table(v, mfn); 4.479 - v->arch.monitor_table = pagetable_null(); 4.480 + v->arch.shadow.mode->detach_old_tables(v); 4.481 + if ( shadow_mode_external(d) ) 4.482 + { 4.483 + mfn = pagetable_get_mfn(v->arch.monitor_table); 4.484 + if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) 4.485 + v->arch.shadow.mode->destroy_monitor_table(v, mfn); 4.486 + v->arch.monitor_table = pagetable_null(); 4.487 + } 4.488 } 4.489 } 4.490 } 4.491 @@ -2689,7 +2738,7 @@ void shadow_teardown(struct domain *d) 4.492 d->arch.shadow.free_pages, 4.493 d->arch.shadow.p2m_pages); 4.494 /* Destroy all the shadows and release memory to domheap */ 4.495 - set_sh_allocation(d, 0, NULL); 4.496 + sh_set_allocation(d, 0, NULL); 4.497 /* Release the hash table back to xenheap */ 4.498 if (d->arch.shadow.hash_table) 4.499 shadow_hash_teardown(d); 4.500 @@ -2755,10 +2804,10 @@ static int shadow_one_bit_enable(struct 4.501 if ( d->arch.shadow.mode == 0 ) 4.502 { 4.503 /* Init the shadow memory allocation and the hash table */ 4.504 - if ( set_sh_allocation(d, 1, NULL) != 0 4.505 + if ( sh_set_allocation(d, 1, NULL) != 0 4.506 || shadow_hash_alloc(d) != 0 ) 4.507 { 4.508 - set_sh_allocation(d, 0, NULL); 4.509 + sh_set_allocation(d, 0, NULL); 4.510 return -ENOMEM; 4.511 } 4.512 } 4.513 @@ -2794,7 +2843,8 @@ static int shadow_one_bit_disable(struct 4.514 d->arch.shadow.p2m_pages); 4.515 for_each_vcpu(d, v) 4.516 { 4.517 - shadow_detach_old_tables(v); 4.518 + if ( v->arch.shadow.mode ) 4.519 + v->arch.shadow.mode->detach_old_tables(v); 4.520 #if CONFIG_PAGING_LEVELS == 4 4.521 if ( !(v->arch.flags & TF_kernel_mode) ) 4.522 make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user)); 4.523 @@ -2805,7 +2855,7 @@ static int shadow_one_bit_disable(struct 4.524 } 4.525 4.526 /* Pull down the memory allocation */ 4.527 - if ( set_sh_allocation(d, 0, NULL) != 0 ) 4.528 + if ( sh_set_allocation(d, 0, NULL) != 0 ) 4.529 { 4.530 // XXX - How can this occur? 4.531 // Seems like a bug to return an error now that we've 4.532 @@ -2826,7 +2876,7 @@ static int shadow_one_bit_disable(struct 4.533 } 4.534 4.535 /* Enable/disable ops for the "test" and "log-dirty" modes */ 4.536 -int shadow_test_enable(struct domain *d) 4.537 +static int shadow_test_enable(struct domain *d) 4.538 { 4.539 int ret; 4.540 4.541 @@ -2849,7 +2899,7 @@ int shadow_test_enable(struct domain *d) 4.542 return ret; 4.543 } 4.544 4.545 -int shadow_test_disable(struct domain *d) 4.546 +static int shadow_test_disable(struct domain *d) 4.547 { 4.548 int ret; 4.549 4.550 @@ -2968,8 +3018,8 @@ sh_p2m_remove_page(struct domain *d, uns 4.551 4.552 if ( v != NULL ) 4.553 { 4.554 - shadow_remove_all_shadows_and_parents(v, _mfn(mfn)); 4.555 - if ( shadow_remove_all_mappings(v, _mfn(mfn)) ) 4.556 + sh_remove_all_shadows_and_parents(v, _mfn(mfn)); 4.557 + if ( sh_remove_all_mappings(v, _mfn(mfn)) ) 4.558 flush_tlb_mask(d->domain_dirty_cpumask); 4.559 } 4.560 4.561 @@ -3012,8 +3062,8 @@ shadow_guest_physmap_add_page(struct dom 4.562 v = d->vcpu[0]; 4.563 if ( v != NULL ) 4.564 { 4.565 - shadow_remove_all_shadows_and_parents(v, omfn); 4.566 - if ( shadow_remove_all_mappings(v, omfn) ) 4.567 + sh_remove_all_shadows_and_parents(v, omfn); 4.568 + if ( sh_remove_all_mappings(v, omfn) ) 4.569 flush_tlb_mask(d->domain_dirty_cpumask); 4.570 } 4.571 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); 4.572 @@ -3043,6 +3093,17 @@ shadow_guest_physmap_add_page(struct dom 4.573 4.574 shadow_set_p2m_entry(d, gfn, _mfn(mfn)); 4.575 set_gpfn_from_mfn(mfn, gfn); 4.576 + 4.577 +#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) 4.578 + /* If we're doing FAST_FAULT_PATH, then shadow mode may have 4.579 + cached the fact that this is an mmio region in the shadow 4.580 + page tables. Blow the tables away to remove the cache. 4.581 + This is pretty heavy handed, but this is a rare operation 4.582 + (it might happen a dozen times during boot and then never 4.583 + again), so it doesn't matter too much. */ 4.584 + shadow_blow_tables(d); 4.585 +#endif 4.586 + 4.587 shadow_audit_p2m(d); 4.588 shadow_unlock(d); 4.589 } 4.590 @@ -3130,14 +3191,13 @@ static int shadow_log_dirty_op( 4.591 4.592 4.593 /* Mark a page as dirty */ 4.594 -void sh_do_mark_dirty(struct domain *d, mfn_t gmfn) 4.595 +void sh_mark_dirty(struct domain *d, mfn_t gmfn) 4.596 { 4.597 unsigned long pfn; 4.598 4.599 ASSERT(shadow_locked_by_me(d)); 4.600 - ASSERT(shadow_mode_log_dirty(d)); 4.601 - 4.602 - if ( !mfn_valid(gmfn) ) 4.603 + 4.604 + if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) ) 4.605 return; 4.606 4.607 ASSERT(d->arch.shadow.dirty_bitmap != NULL); 4.608 @@ -3181,13 +3241,19 @@ void sh_do_mark_dirty(struct domain *d, 4.609 } 4.610 } 4.611 4.612 +void shadow_mark_dirty(struct domain *d, mfn_t gmfn) 4.613 +{ 4.614 + shadow_lock(d); 4.615 + sh_mark_dirty(d, gmfn); 4.616 + shadow_unlock(d); 4.617 +} 4.618 4.619 /**************************************************************************/ 4.620 /* Shadow-control XEN_DOMCTL dispatcher */ 4.621 4.622 int shadow_domctl(struct domain *d, 4.623 - xen_domctl_shadow_op_t *sc, 4.624 - XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) 4.625 + xen_domctl_shadow_op_t *sc, 4.626 + XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) 4.627 { 4.628 int rc, preempted = 0; 4.629 4.630 @@ -3233,7 +3299,9 @@ int shadow_domctl(struct domain *d, 4.631 return 0; 4.632 4.633 case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: 4.634 - rc = shadow_set_allocation(d, sc->mb, &preempted); 4.635 + shadow_lock(d); 4.636 + rc = sh_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted); 4.637 + shadow_unlock(d); 4.638 if ( preempted ) 4.639 /* Not finished. Set up to re-run the call. */ 4.640 rc = hypercall_create_continuation(
5.1 --- a/xen/arch/x86/mm/shadow/multi.c Wed Dec 20 11:59:54 2006 +0000 5.2 +++ b/xen/arch/x86/mm/shadow/multi.c Wed Dec 20 12:03:07 2006 +0000 5.3 @@ -243,7 +243,7 @@ guest_walk_tables(struct vcpu *v, unsign 5.4 gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e)); 5.5 if ( !mfn_valid(gw->l3mfn) ) return 1; 5.6 /* This mfn is a pagetable: make sure the guest can't write to it. */ 5.7 - if ( guest_op && shadow_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) 5.8 + if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) 5.9 flush_tlb_mask(v->domain->domain_dirty_cpumask); 5.10 gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn)) 5.11 + guest_l3_table_offset(va); 5.12 @@ -257,7 +257,7 @@ guest_walk_tables(struct vcpu *v, unsign 5.13 gw->l2mfn = vcpu_gfn_to_mfn(v, guest_l3e_get_gfn(*gw->l3e)); 5.14 if ( !mfn_valid(gw->l2mfn) ) return 1; 5.15 /* This mfn is a pagetable: make sure the guest can't write to it. */ 5.16 - if ( guest_op && shadow_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) 5.17 + if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) 5.18 flush_tlb_mask(v->domain->domain_dirty_cpumask); 5.19 gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn)) 5.20 + guest_l2_table_offset(va); 5.21 @@ -299,7 +299,7 @@ guest_walk_tables(struct vcpu *v, unsign 5.22 if ( !mfn_valid(gw->l1mfn) ) return 1; 5.23 /* This mfn is a pagetable: make sure the guest can't write to it. */ 5.24 if ( guest_op 5.25 - && shadow_remove_write_access(v, gw->l1mfn, 1, va) != 0 ) 5.26 + && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 ) 5.27 flush_tlb_mask(v->domain->domain_dirty_cpumask); 5.28 gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn)) 5.29 + guest_l1_table_offset(va); 5.30 @@ -492,7 +492,7 @@ static u32 guest_set_ad_bits(struct vcpu 5.31 u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask; 5.32 /* More than one type bit set in shadow-flags? */ 5.33 if ( shflags & ~(1UL << find_first_set_bit(shflags)) ) 5.34 - res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep)); 5.35 + res = sh_validate_guest_entry(v, gmfn, ep, sizeof (*ep)); 5.36 } 5.37 5.38 /* We should never need to flush the TLB or recopy PAE entries */ 5.39 @@ -2847,7 +2847,7 @@ static int sh_page_fault(struct vcpu *v, 5.40 /* If this is actually a page table, then we have a bug, and need 5.41 * to support more operations in the emulator. More likely, 5.42 * though, this is a hint that this page should not be shadowed. */ 5.43 - shadow_remove_all_shadows(v, gmfn); 5.44 + sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */); 5.45 } 5.46 5.47 /* Emulator has changed the user registers: write back */ 5.48 @@ -3080,7 +3080,7 @@ sh_update_linear_entries(struct vcpu *v) 5.49 sh_unmap_domain_page(ml4e); 5.50 } 5.51 5.52 - /* Shadow l3 tables are made up by update_cr3 */ 5.53 + /* Shadow l3 tables are made up by sh_update_cr3 */ 5.54 sl3e = v->arch.shadow.l3table; 5.55 5.56 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) 5.57 @@ -3118,7 +3118,7 @@ sh_update_linear_entries(struct vcpu *v) 5.58 int unmap_l2e = 0; 5.59 5.60 #if GUEST_PAGING_LEVELS == 2 5.61 - /* Shadow l3 tables were built by update_cr3 */ 5.62 + /* Shadow l3 tables were built by sh_update_cr3 */ 5.63 if ( shadow_mode_external(d) ) 5.64 shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; 5.65 else 5.66 @@ -3341,12 +3341,15 @@ sh_set_toplevel_shadow(struct vcpu *v, 5.67 5.68 5.69 static void 5.70 -sh_update_cr3(struct vcpu *v) 5.71 +sh_update_cr3(struct vcpu *v, int do_locking) 5.72 /* Updates vcpu->arch.cr3 after the guest has changed CR3. 5.73 * Paravirtual guests should set v->arch.guest_table (and guest_table_user, 5.74 * if appropriate). 5.75 * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works, 5.76 * and read vcpu->arch.hvm_vcpu.hw_cr3 afterwards. 5.77 + * If do_locking != 0, assume we are being called from outside the 5.78 + * shadow code, and must take and release the shadow lock; otherwise 5.79 + * that is the caller's respnsibility. 5.80 */ 5.81 { 5.82 struct domain *d = v->domain; 5.83 @@ -3355,6 +3358,15 @@ sh_update_cr3(struct vcpu *v) 5.84 u32 guest_idx=0; 5.85 #endif 5.86 5.87 + /* Don't do anything on an uninitialised vcpu */ 5.88 + if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) 5.89 + { 5.90 + ASSERT(v->arch.cr3 == 0); 5.91 + return; 5.92 + } 5.93 + 5.94 + if ( do_locking ) shadow_lock(v->domain); 5.95 + 5.96 ASSERT(shadow_locked_by_me(v->domain)); 5.97 ASSERT(v->arch.shadow.mode); 5.98 5.99 @@ -3400,11 +3412,6 @@ sh_update_cr3(struct vcpu *v) 5.100 #endif 5.101 gmfn = pagetable_get_mfn(v->arch.guest_table); 5.102 5.103 - if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) 5.104 - { 5.105 - ASSERT(v->arch.cr3 == 0); 5.106 - return; 5.107 - } 5.108 5.109 //// 5.110 //// vcpu->arch.guest_vtable 5.111 @@ -3466,7 +3473,7 @@ sh_update_cr3(struct vcpu *v) 5.112 * replace the old shadow pagetable(s), so that we can safely use the 5.113 * (old) shadow linear maps in the writeable mapping heuristics. */ 5.114 #if GUEST_PAGING_LEVELS == 2 5.115 - if ( shadow_remove_write_access(v, gmfn, 2, 0) != 0 ) 5.116 + if ( sh_remove_write_access(v, gmfn, 2, 0) != 0 ) 5.117 flush_tlb_mask(v->domain->domain_dirty_cpumask); 5.118 sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow); 5.119 #elif GUEST_PAGING_LEVELS == 3 5.120 @@ -3484,7 +3491,7 @@ sh_update_cr3(struct vcpu *v) 5.121 { 5.122 gl2gfn = guest_l3e_get_gfn(gl3e[i]); 5.123 gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn); 5.124 - flush |= shadow_remove_write_access(v, gl2mfn, 2, 0); 5.125 + flush |= sh_remove_write_access(v, gl2mfn, 2, 0); 5.126 } 5.127 } 5.128 if ( flush ) 5.129 @@ -3506,7 +3513,7 @@ sh_update_cr3(struct vcpu *v) 5.130 } 5.131 } 5.132 #elif GUEST_PAGING_LEVELS == 4 5.133 - if ( shadow_remove_write_access(v, gmfn, 4, 0) != 0 ) 5.134 + if ( sh_remove_write_access(v, gmfn, 4, 0) != 0 ) 5.135 flush_tlb_mask(v->domain->domain_dirty_cpumask); 5.136 sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow); 5.137 #else 5.138 @@ -3582,6 +3589,9 @@ sh_update_cr3(struct vcpu *v) 5.139 5.140 /* Fix up the linear pagetable mappings */ 5.141 sh_update_linear_entries(v); 5.142 + 5.143 + /* Release the lock, if we took it (otherwise it's the caller's problem) */ 5.144 + if ( do_locking ) shadow_unlock(v->domain); 5.145 } 5.146 5.147 5.148 @@ -3637,7 +3647,8 @@ static int sh_guess_wrmap(struct vcpu *v 5.149 } 5.150 #endif 5.151 5.152 -int sh_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn) 5.153 +int sh_rm_write_access_from_l1(struct vcpu *v, mfn_t sl1mfn, 5.154 + mfn_t readonly_mfn) 5.155 /* Excises all writeable mappings to readonly_mfn from this l1 shadow table */ 5.156 { 5.157 shadow_l1e_t *sl1e; 5.158 @@ -3668,7 +3679,7 @@ int sh_remove_write_access(struct vcpu * 5.159 } 5.160 5.161 5.162 -int sh_remove_all_mappings(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn) 5.163 +int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn) 5.164 /* Excises all mappings to guest frame from this shadow l1 table */ 5.165 { 5.166 shadow_l1e_t *sl1e; 5.167 @@ -3888,7 +3899,7 @@ sh_x86_emulate_write(struct vcpu *v, uns 5.168 5.169 skip = safe_not_to_verify_write(mfn, addr, src, bytes); 5.170 memcpy(addr, src, bytes); 5.171 - if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, bytes); 5.172 + if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes); 5.173 5.174 /* If we are writing zeros to this page, might want to unshadow */ 5.175 if ( likely(bytes >= 4) && (*(u32 *)addr == 0) ) 5.176 @@ -3933,7 +3944,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u 5.177 5.178 if ( prev == old ) 5.179 { 5.180 - if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, bytes); 5.181 + if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes); 5.182 } 5.183 else 5.184 rv = X86EMUL_CMPXCHG_FAILED; 5.185 @@ -3977,7 +3988,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v, 5.186 5.187 if ( prev == old ) 5.188 { 5.189 - if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, 8); 5.190 + if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, 8); 5.191 } 5.192 else 5.193 rv = X86EMUL_CMPXCHG_FAILED;
6.1 --- a/xen/arch/x86/mm/shadow/multi.h Wed Dec 20 11:59:54 2006 +0000 6.2 +++ b/xen/arch/x86/mm/shadow/multi.h Wed Dec 20 12:03:07 2006 +0000 6.3 @@ -61,10 +61,10 @@ SHADOW_INTERNAL_NAME(sh_unhook_64b_mappi 6.4 (struct vcpu *v, mfn_t sl4mfn); 6.5 6.6 extern int 6.7 -SHADOW_INTERNAL_NAME(sh_remove_write_access, SHADOW_LEVELS, GUEST_LEVELS) 6.8 +SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, SHADOW_LEVELS, GUEST_LEVELS) 6.9 (struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn); 6.10 extern int 6.11 -SHADOW_INTERNAL_NAME(sh_remove_all_mappings, SHADOW_LEVELS, GUEST_LEVELS) 6.12 +SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, SHADOW_LEVELS, GUEST_LEVELS) 6.13 (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn); 6.14 6.15 extern void
7.1 --- a/xen/arch/x86/mm/shadow/private.h Wed Dec 20 11:59:54 2006 +0000 7.2 +++ b/xen/arch/x86/mm/shadow/private.h Wed Dec 20 12:03:07 2006 +0000 7.3 @@ -33,8 +33,43 @@ 7.4 7.5 7.6 /****************************************************************************** 7.7 + * Levels of self-test and paranoia 7.8 + */ 7.9 + 7.10 +#define SHADOW_AUDIT_HASH 0x01 /* Check current hash bucket */ 7.11 +#define SHADOW_AUDIT_HASH_FULL 0x02 /* Check every hash bucket */ 7.12 +#define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */ 7.13 +#define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */ 7.14 +#define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */ 7.15 +#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */ 7.16 + 7.17 +#ifdef NDEBUG 7.18 +#define SHADOW_AUDIT 0 7.19 +#define SHADOW_AUDIT_ENABLE 0 7.20 +#else 7.21 +#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */ 7.22 +#define SHADOW_AUDIT_ENABLE shadow_audit_enable 7.23 +extern int shadow_audit_enable; 7.24 +#endif 7.25 + 7.26 +/****************************************************************************** 7.27 + * Levels of optimization 7.28 + */ 7.29 + 7.30 +#define SHOPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */ 7.31 +#define SHOPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */ 7.32 +#define SHOPT_FAST_FAULT_PATH 0x04 /* Fast-path MMIO and not-present */ 7.33 +#define SHOPT_PREFETCH 0x08 /* Shadow multiple entries per fault */ 7.34 +#define SHOPT_LINUX_L3_TOPLEVEL 0x10 /* Pin l3es on early 64bit linux */ 7.35 +#define SHOPT_SKIP_VERIFY 0x20 /* Skip PTE v'fy when safe to do so */ 7.36 + 7.37 +#define SHADOW_OPTIMIZATIONS 0x3f 7.38 + 7.39 + 7.40 +/****************************************************************************** 7.41 * Debug and error-message output 7.42 */ 7.43 + 7.44 #define SHADOW_PRINTK(_f, _a...) \ 7.45 debugtrace_printk("sh: %s(): " _f, __func__, ##_a) 7.46 #define SHADOW_ERROR(_f, _a...) \ 7.47 @@ -54,6 +89,58 @@ 7.48 #define SHADOW_DEBUG_EMULATE 1 7.49 #define SHADOW_DEBUG_LOGDIRTY 0 7.50 7.51 +/****************************************************************************** 7.52 + * The shadow lock. 7.53 + * 7.54 + * This lock is per-domain. It is intended to allow us to make atomic 7.55 + * updates to the software TLB that the shadow tables provide. 7.56 + * 7.57 + * Specifically, it protects: 7.58 + * - all changes to shadow page table pages 7.59 + * - the shadow hash table 7.60 + * - the shadow page allocator 7.61 + * - all changes to guest page table pages 7.62 + * - all changes to the page_info->tlbflush_timestamp 7.63 + * - the page_info->count fields on shadow pages 7.64 + * - the shadow dirty bit array and count 7.65 + */ 7.66 +#ifndef CONFIG_SMP 7.67 +#error shadow.h currently requires CONFIG_SMP 7.68 +#endif 7.69 + 7.70 +#define shadow_lock_init(_d) \ 7.71 + do { \ 7.72 + spin_lock_init(&(_d)->arch.shadow.lock); \ 7.73 + (_d)->arch.shadow.locker = -1; \ 7.74 + (_d)->arch.shadow.locker_function = "nobody"; \ 7.75 + } while (0) 7.76 + 7.77 +#define shadow_locked_by_me(_d) \ 7.78 + (current->processor == (_d)->arch.shadow.locker) 7.79 + 7.80 +#define shadow_lock(_d) \ 7.81 + do { \ 7.82 + if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \ 7.83 + { \ 7.84 + printk("Error: shadow lock held by %s\n", \ 7.85 + (_d)->arch.shadow.locker_function); \ 7.86 + BUG(); \ 7.87 + } \ 7.88 + spin_lock(&(_d)->arch.shadow.lock); \ 7.89 + ASSERT((_d)->arch.shadow.locker == -1); \ 7.90 + (_d)->arch.shadow.locker = current->processor; \ 7.91 + (_d)->arch.shadow.locker_function = __func__; \ 7.92 + } while (0) 7.93 + 7.94 +#define shadow_unlock(_d) \ 7.95 + do { \ 7.96 + ASSERT((_d)->arch.shadow.locker == current->processor); \ 7.97 + (_d)->arch.shadow.locker = -1; \ 7.98 + (_d)->arch.shadow.locker_function = "nobody"; \ 7.99 + spin_unlock(&(_d)->arch.shadow.lock); \ 7.100 + } while (0) 7.101 + 7.102 + 7.103 7.104 /****************************************************************************** 7.105 * Auditing routines 7.106 @@ -291,6 +378,21 @@ void sh_install_xen_entries_in_l4(struct 7.107 void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn); 7.108 void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn); 7.109 7.110 +/* Update the shadows in response to a pagetable write from Xen */ 7.111 +extern int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 7.112 + void *entry, u32 size); 7.113 + 7.114 +/* Update the shadows in response to a pagetable write from a HVM guest */ 7.115 +extern void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 7.116 + void *entry, u32 size); 7.117 + 7.118 +/* Remove all writeable mappings of a guest frame from the shadows. 7.119 + * Returns non-zero if we need to flush TLBs. 7.120 + * level and fault_addr desribe how we found this to be a pagetable; 7.121 + * level==0 means we have some other reason for revoking write access. */ 7.122 +extern int sh_remove_write_access(struct vcpu *v, mfn_t readonly_mfn, 7.123 + unsigned int level, 7.124 + unsigned long fault_addr); 7.125 7.126 /****************************************************************************** 7.127 * Flags used in the return value of the shadow_set_lXe() functions... 7.128 @@ -325,6 +427,26 @@ void sh_install_xen_entries_in_l2(struct 7.129 #undef mfn_valid 7.130 #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) 7.131 7.132 + 7.133 +static inline int 7.134 +sh_mfn_is_a_page_table(mfn_t gmfn) 7.135 +{ 7.136 + struct page_info *page = mfn_to_page(gmfn); 7.137 + struct domain *owner; 7.138 + unsigned long type_info; 7.139 + 7.140 + if ( !mfn_valid(gmfn) ) 7.141 + return 0; 7.142 + 7.143 + owner = page_get_owner(page); 7.144 + if ( owner && shadow_mode_refcounts(owner) 7.145 + && (page->count_info & PGC_page_table) ) 7.146 + return 1; 7.147 + 7.148 + type_info = page->u.inuse.type_info & PGT_type_mask; 7.149 + return type_info && (type_info <= PGT_l4_page_table); 7.150 +} 7.151 + 7.152 // Provide mfn_t-aware versions of common xen functions 7.153 static inline void * 7.154 sh_map_domain_page(mfn_t mfn) 7.155 @@ -350,6 +472,25 @@ sh_unmap_domain_page_global(void *p) 7.156 unmap_domain_page_global(p); 7.157 } 7.158 7.159 +static inline mfn_t 7.160 +pagetable_get_mfn(pagetable_t pt) 7.161 +{ 7.162 + return _mfn(pagetable_get_pfn(pt)); 7.163 +} 7.164 + 7.165 +static inline pagetable_t 7.166 +pagetable_from_mfn(mfn_t mfn) 7.167 +{ 7.168 + return pagetable_from_pfn(mfn_x(mfn)); 7.169 +} 7.170 + 7.171 + 7.172 +/****************************************************************************** 7.173 + * Log-dirty mode bitmap handling 7.174 + */ 7.175 + 7.176 +extern void sh_mark_dirty(struct domain *d, mfn_t gmfn); 7.177 + 7.178 static inline int 7.179 sh_mfn_is_dirty(struct domain *d, mfn_t gmfn) 7.180 /* Is this guest page dirty? Call only in log-dirty mode. */ 7.181 @@ -368,25 +509,6 @@ sh_mfn_is_dirty(struct domain *d, mfn_t 7.182 return 0; 7.183 } 7.184 7.185 -static inline int 7.186 -sh_mfn_is_a_page_table(mfn_t gmfn) 7.187 -{ 7.188 - struct page_info *page = mfn_to_page(gmfn); 7.189 - struct domain *owner; 7.190 - unsigned long type_info; 7.191 - 7.192 - if ( !mfn_valid(gmfn) ) 7.193 - return 0; 7.194 - 7.195 - owner = page_get_owner(page); 7.196 - if ( owner && shadow_mode_refcounts(owner) 7.197 - && (page->count_info & PGC_page_table) ) 7.198 - return 1; 7.199 - 7.200 - type_info = page->u.inuse.type_info & PGT_type_mask; 7.201 - return type_info && (type_info <= PGT_l4_page_table); 7.202 -} 7.203 - 7.204 7.205 /**************************************************************************/ 7.206 /* Shadow-page refcounting. */
8.1 --- a/xen/arch/x86/mm/shadow/types.h Wed Dec 20 11:59:54 2006 +0000 8.2 +++ b/xen/arch/x86/mm/shadow/types.h Wed Dec 20 12:03:07 2006 +0000 8.3 @@ -477,8 +477,8 @@ struct shadow_walk_t 8.4 #define sh_gva_to_gpa INTERNAL_NAME(sh_gva_to_gpa) 8.5 #define sh_gva_to_gfn INTERNAL_NAME(sh_gva_to_gfn) 8.6 #define sh_update_cr3 INTERNAL_NAME(sh_update_cr3) 8.7 -#define sh_remove_write_access INTERNAL_NAME(sh_remove_write_access) 8.8 -#define sh_remove_all_mappings INTERNAL_NAME(sh_remove_all_mappings) 8.9 +#define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1) 8.10 +#define sh_rm_mappings_from_l1 INTERNAL_NAME(sh_rm_mappings_from_l1) 8.11 #define sh_remove_l1_shadow INTERNAL_NAME(sh_remove_l1_shadow) 8.12 #define sh_remove_l2_shadow INTERNAL_NAME(sh_remove_l2_shadow) 8.13 #define sh_remove_l3_shadow INTERNAL_NAME(sh_remove_l3_shadow)
9.1 --- a/xen/include/asm-x86/mm.h Wed Dec 20 11:59:54 2006 +0000 9.2 +++ b/xen/include/asm-x86/mm.h Wed Dec 20 12:03:07 2006 +0000 9.3 @@ -307,7 +307,7 @@ void audit_domains(void); 9.4 9.5 int new_guest_cr3(unsigned long pfn); 9.6 void make_cr3(struct vcpu *v, unsigned long mfn); 9.7 - 9.8 +void update_cr3(struct vcpu *v); 9.9 void propagate_page_fault(unsigned long addr, u16 error_code); 9.10 9.11 int __sync_lazy_execstate(void);
10.1 --- a/xen/include/asm-x86/shadow.h Wed Dec 20 11:59:54 2006 +0000 10.2 +++ b/xen/include/asm-x86/shadow.h Wed Dec 20 12:03:07 2006 +0000 10.3 @@ -29,20 +29,8 @@ 10.4 #include <xen/domain_page.h> 10.5 #include <asm/flushtlb.h> 10.6 10.7 -/* How to make sure a page is not referred to in a shadow PT */ 10.8 -/* This will need to be a for_each_vcpu if we go to per-vcpu shadows */ 10.9 -#define shadow_drop_references(_d, _p) \ 10.10 - shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p))) 10.11 -#define shadow_sync_and_drop_references(_d, _p) \ 10.12 - shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p))) 10.13 - 10.14 -/* How to add and remove entries in the p2m mapping. */ 10.15 -#define guest_physmap_add_page(_d, _p, _m) \ 10.16 - shadow_guest_physmap_add_page((_d), (_p), (_m)) 10.17 -#define guest_physmap_remove_page(_d, _p, _m ) \ 10.18 - shadow_guest_physmap_remove_page((_d), (_p), (_m)) 10.19 - 10.20 -/* Shadow PT operation mode : shadow-mode variable in arch_domain. */ 10.21 +/***************************************************************************** 10.22 + * Macros to tell which shadow paging mode a domain is in */ 10.23 10.24 #define SHM2_shift 10 10.25 /* We're in one of the shadow modes */ 10.26 @@ -64,107 +52,24 @@ 10.27 #define shadow_mode_external(_d) ((_d)->arch.shadow.mode & SHM2_external) 10.28 10.29 /* Xen traps & emulates all reads of all page table pages: 10.30 - * not yet supported 10.31 - */ 10.32 + * not yet supported */ 10.33 #define shadow_mode_trap_reads(_d) ({ (void)(_d); 0; }) 10.34 10.35 -// How do we tell that we have a 32-bit PV guest in a 64-bit Xen? 10.36 -#ifdef __x86_64__ 10.37 -#define pv_32bit_guest(_v) 0 // not yet supported 10.38 -#else 10.39 -#define pv_32bit_guest(_v) !is_hvm_vcpu(v) 10.40 -#endif 10.41 10.42 -/* The shadow lock. 10.43 - * 10.44 - * This lock is per-domain. It is intended to allow us to make atomic 10.45 - * updates to the software TLB that the shadow tables provide. 10.46 - * 10.47 - * Specifically, it protects: 10.48 - * - all changes to shadow page table pages 10.49 - * - the shadow hash table 10.50 - * - the shadow page allocator 10.51 - * - all changes to guest page table pages; if/when the notion of 10.52 - * out-of-sync pages is added to this code, then the shadow lock is 10.53 - * protecting all guest page table pages which are not listed as 10.54 - * currently as both guest-writable and out-of-sync... 10.55 - * XXX -- need to think about this relative to writable page tables. 10.56 - * - all changes to the page_info->tlbflush_timestamp 10.57 - * - the page_info->count fields on shadow pages 10.58 - * - the shadow dirty bit array and count 10.59 - * - XXX 10.60 - */ 10.61 -#ifndef CONFIG_SMP 10.62 -#error shadow.h currently requires CONFIG_SMP 10.63 -#endif 10.64 - 10.65 -#define shadow_lock_init(_d) \ 10.66 - do { \ 10.67 - spin_lock_init(&(_d)->arch.shadow.lock); \ 10.68 - (_d)->arch.shadow.locker = -1; \ 10.69 - (_d)->arch.shadow.locker_function = "nobody"; \ 10.70 - } while (0) 10.71 - 10.72 -#define shadow_locked_by_me(_d) \ 10.73 - (current->processor == (_d)->arch.shadow.locker) 10.74 +/****************************************************************************** 10.75 + * The equivalent for a particular vcpu of a shadowed domain. */ 10.76 10.77 -#define shadow_lock(_d) \ 10.78 - do { \ 10.79 - if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \ 10.80 - { \ 10.81 - printk("Error: shadow lock held by %s\n", \ 10.82 - (_d)->arch.shadow.locker_function); \ 10.83 - BUG(); \ 10.84 - } \ 10.85 - spin_lock(&(_d)->arch.shadow.lock); \ 10.86 - ASSERT((_d)->arch.shadow.locker == -1); \ 10.87 - (_d)->arch.shadow.locker = current->processor; \ 10.88 - (_d)->arch.shadow.locker_function = __func__; \ 10.89 - } while (0) 10.90 - 10.91 -#define shadow_unlock(_d) \ 10.92 - do { \ 10.93 - ASSERT((_d)->arch.shadow.locker == current->processor); \ 10.94 - (_d)->arch.shadow.locker = -1; \ 10.95 - (_d)->arch.shadow.locker_function = "nobody"; \ 10.96 - spin_unlock(&(_d)->arch.shadow.lock); \ 10.97 - } while (0) 10.98 - 10.99 -/* 10.100 - * Levels of self-test and paranoia 10.101 - * XXX should go in config files somewhere? 10.102 - */ 10.103 -#define SHADOW_AUDIT_HASH 0x01 /* Check current hash bucket */ 10.104 -#define SHADOW_AUDIT_HASH_FULL 0x02 /* Check every hash bucket */ 10.105 -#define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */ 10.106 -#define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */ 10.107 -#define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */ 10.108 -#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */ 10.109 - 10.110 -#ifdef NDEBUG 10.111 -#define SHADOW_AUDIT 0 10.112 -#define SHADOW_AUDIT_ENABLE 0 10.113 -#else 10.114 -#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */ 10.115 -#define SHADOW_AUDIT_ENABLE shadow_audit_enable 10.116 -extern int shadow_audit_enable; 10.117 -#endif 10.118 - 10.119 -/* 10.120 - * Levels of optimization 10.121 - * XXX should go in config files somewhere? 10.122 - */ 10.123 -#define SHOPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */ 10.124 -#define SHOPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */ 10.125 -#define SHOPT_FAST_FAULT_PATH 0x04 /* Fast-path MMIO and not-present */ 10.126 -#define SHOPT_PREFETCH 0x08 /* Shadow multiple entries per fault */ 10.127 -#define SHOPT_LINUX_L3_TOPLEVEL 0x10 /* Pin l3es on early 64bit linux */ 10.128 -#define SHOPT_SKIP_VERIFY 0x20 /* Skip PTE v'fy when safe to do so */ 10.129 - 10.130 -#define SHADOW_OPTIMIZATIONS 0x3f 10.131 +/* Is this vcpu using the P2M table to translate between GFNs and MFNs? 10.132 + * 10.133 + * This is true of translated HVM domains on a vcpu which has paging 10.134 + * enabled. (HVM vcpus with paging disabled are using the p2m table as 10.135 + * its paging table, so no translation occurs in this case.) 10.136 + * It is also true for all vcpus of translated PV domains. */ 10.137 +#define shadow_vcpu_mode_translate(_v) ((_v)->arch.shadow.translate_enabled) 10.138 10.139 10.140 -/* With shadow pagetables, the different kinds of address start 10.141 +/****************************************************************************** 10.142 + * With shadow pagetables, the different kinds of address start 10.143 * to get get confusing. 10.144 * 10.145 * Virtual addresses are what they usually are: the addresses that are used 10.146 @@ -214,38 +119,16 @@ static inline _type _name##_x(_name##_t 10.147 #endif 10.148 10.149 TYPE_SAFE(unsigned long,mfn) 10.150 + 10.151 +/* Macro for printk formats: use as printk("%"SH_PRI_mfn"\n", mfn_x(foo)); */ 10.152 #define SH_PRI_mfn "05lx" 10.153 10.154 -static inline mfn_t 10.155 -pagetable_get_mfn(pagetable_t pt) 10.156 -{ 10.157 - return _mfn(pagetable_get_pfn(pt)); 10.158 -} 10.159 10.160 -static inline pagetable_t 10.161 -pagetable_from_mfn(mfn_t mfn) 10.162 -{ 10.163 - return pagetable_from_pfn(mfn_x(mfn)); 10.164 -} 10.165 - 10.166 -static inline int 10.167 -shadow_vcpu_mode_translate(struct vcpu *v) 10.168 -{ 10.169 - // Returns true if this VCPU needs to be using the P2M table to translate 10.170 - // between GFNs and MFNs. 10.171 - // 10.172 - // This is true of translated HVM domains on a vcpu which has paging 10.173 - // enabled. (HVM vcpu's with paging disabled are using the p2m table as 10.174 - // its paging table, so no translation occurs in this case.) 10.175 - // 10.176 - // It is also true for translated PV domains. 10.177 - // 10.178 - return v->arch.shadow.translate_enabled; 10.179 -} 10.180 - 10.181 - 10.182 -/**************************************************************************/ 10.183 -/* Mode-specific entry points into the shadow code */ 10.184 +/***************************************************************************** 10.185 + * Mode-specific entry points into the shadow code. 10.186 + * 10.187 + * These shouldn't be used directly by callers; rather use the functions 10.188 + * below which will indirect through this table as appropriate. */ 10.189 10.190 struct sh_emulate_ctxt; 10.191 struct shadow_paging_mode { 10.192 @@ -254,7 +137,7 @@ struct shadow_paging_mode { 10.193 int (*invlpg )(struct vcpu *v, unsigned long va); 10.194 paddr_t (*gva_to_gpa )(struct vcpu *v, unsigned long va); 10.195 unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va); 10.196 - void (*update_cr3 )(struct vcpu *v); 10.197 + void (*update_cr3 )(struct vcpu *v, int do_locking); 10.198 int (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn, 10.199 void *new_guest_entry, u32 size); 10.200 int (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn, 10.201 @@ -286,35 +169,30 @@ struct shadow_paging_mode { 10.202 unsigned long *gl1mfn); 10.203 void (*guest_get_eff_l1e )(struct vcpu *v, unsigned long va, 10.204 void *eff_l1e); 10.205 -#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC 10.206 int (*guess_wrmap )(struct vcpu *v, 10.207 unsigned long vaddr, mfn_t gmfn); 10.208 -#endif 10.209 /* For outsiders to tell what mode we're in */ 10.210 unsigned int shadow_levels; 10.211 unsigned int guest_levels; 10.212 }; 10.213 10.214 -static inline int shadow_guest_paging_levels(struct vcpu *v) 10.215 -{ 10.216 - ASSERT(v->arch.shadow.mode != NULL); 10.217 - return v->arch.shadow.mode->guest_levels; 10.218 -} 10.219 + 10.220 +/***************************************************************************** 10.221 + * Entry points into the shadow code */ 10.222 10.223 -/**************************************************************************/ 10.224 -/* Entry points into the shadow code */ 10.225 +/* Set up the shadow-specific parts of a domain struct at start of day. 10.226 + * Called for every domain from arch_domain_create() */ 10.227 +void shadow_domain_init(struct domain *d); 10.228 10.229 -/* Enable arbitrary shadow mode. */ 10.230 +/* Enable an arbitrary shadow mode. Call once at domain creation. */ 10.231 int shadow_enable(struct domain *d, u32 mode); 10.232 10.233 -/* Turning on shadow test mode */ 10.234 -int shadow_test_enable(struct domain *d); 10.235 - 10.236 -/* Handler for shadow control ops: enabling and disabling shadow modes, 10.237 - * and log-dirty bitmap ops all happen through here. */ 10.238 +/* Handler for shadow control ops: operations from user-space to enable 10.239 + * and disable ephemeral shadow modes (test mode and log-dirty mode) and 10.240 + * manipulate the log-dirty bitmap. */ 10.241 int shadow_domctl(struct domain *d, 10.242 - xen_domctl_shadow_op_t *sc, 10.243 - XEN_GUEST_HANDLE(xen_domctl_t) u_domctl); 10.244 + xen_domctl_shadow_op_t *sc, 10.245 + XEN_GUEST_HANDLE(xen_domctl_t) u_domctl); 10.246 10.247 /* Call when destroying a domain */ 10.248 void shadow_teardown(struct domain *d); 10.249 @@ -322,164 +200,96 @@ void shadow_teardown(struct domain *d); 10.250 /* Call once all of the references to the domain have gone away */ 10.251 void shadow_final_teardown(struct domain *d); 10.252 10.253 - 10.254 -/* Mark a page as dirty in the bitmap */ 10.255 -void sh_do_mark_dirty(struct domain *d, mfn_t gmfn); 10.256 +/* Mark a page as dirty in the log-dirty bitmap: called when Xen 10.257 + * makes changes to guest memory on its behalf. */ 10.258 +void shadow_mark_dirty(struct domain *d, mfn_t gmfn); 10.259 +/* Cleaner version so we don't pepper shadow_mode tests all over the place */ 10.260 static inline void mark_dirty(struct domain *d, unsigned long gmfn) 10.261 { 10.262 - if ( likely(!shadow_mode_log_dirty(d)) ) 10.263 - return; 10.264 - 10.265 - shadow_lock(d); 10.266 - sh_do_mark_dirty(d, _mfn(gmfn)); 10.267 - shadow_unlock(d); 10.268 + if ( unlikely(shadow_mode_log_dirty(d)) ) 10.269 + shadow_mark_dirty(d, _mfn(gmfn)); 10.270 } 10.271 10.272 -/* Internal version, for when the shadow lock is already held */ 10.273 -static inline void sh_mark_dirty(struct domain *d, mfn_t gmfn) 10.274 -{ 10.275 - ASSERT(shadow_locked_by_me(d)); 10.276 - if ( unlikely(shadow_mode_log_dirty(d)) ) 10.277 - sh_do_mark_dirty(d, gmfn); 10.278 -} 10.279 - 10.280 -static inline int 10.281 -shadow_fault(unsigned long va, struct cpu_user_regs *regs) 10.282 -/* Called from pagefault handler in Xen, and from the HVM trap handlers 10.283 +/* Handle page-faults caused by the shadow pagetable mechanisms. 10.284 + * Called from pagefault handler in Xen, and from the HVM trap handlers 10.285 * for pagefaults. Returns 1 if this fault was an artefact of the 10.286 * shadow code (and the guest should retry) or 0 if it is not (and the 10.287 * fault should be handled elsewhere or passed to the guest). */ 10.288 +static inline int shadow_fault(unsigned long va, struct cpu_user_regs *regs) 10.289 { 10.290 struct vcpu *v = current; 10.291 perfc_incrc(shadow_fault); 10.292 return v->arch.shadow.mode->page_fault(v, va, regs); 10.293 } 10.294 10.295 -static inline int 10.296 -shadow_invlpg(struct vcpu *v, unsigned long va) 10.297 -/* Called when the guest requests an invlpg. Returns 1 if the invlpg 10.298 - * instruction should be issued on the hardware, or 0 if it's safe not 10.299 - * to do so. */ 10.300 +/* Handle invlpg requests on shadowed vcpus. 10.301 + * Returns 1 if the invlpg instruction should be issued on the hardware, 10.302 + * or 0 if it's safe not to do so. */ 10.303 +static inline int shadow_invlpg(struct vcpu *v, unsigned long va) 10.304 { 10.305 return v->arch.shadow.mode->invlpg(v, va); 10.306 } 10.307 10.308 -static inline paddr_t 10.309 -shadow_gva_to_gpa(struct vcpu *v, unsigned long va) 10.310 -/* Called to translate a guest virtual address to what the *guest* 10.311 - * pagetables would map it to. */ 10.312 +/* Translate a guest virtual address to the physical address that the 10.313 + * *guest* pagetables would map it to. */ 10.314 +static inline paddr_t shadow_gva_to_gpa(struct vcpu *v, unsigned long va) 10.315 { 10.316 if ( unlikely(!shadow_vcpu_mode_translate(v)) ) 10.317 return (paddr_t) va; 10.318 return v->arch.shadow.mode->gva_to_gpa(v, va); 10.319 } 10.320 10.321 -static inline unsigned long 10.322 -shadow_gva_to_gfn(struct vcpu *v, unsigned long va) 10.323 -/* Called to translate a guest virtual address to what the *guest* 10.324 - * pagetables would map it to. */ 10.325 +/* Translate a guest virtual address to the frame number that the 10.326 + * *guest* pagetables would map it to. */ 10.327 +static inline unsigned long shadow_gva_to_gfn(struct vcpu *v, unsigned long va) 10.328 { 10.329 if ( unlikely(!shadow_vcpu_mode_translate(v)) ) 10.330 return va >> PAGE_SHIFT; 10.331 return v->arch.shadow.mode->gva_to_gfn(v, va); 10.332 } 10.333 10.334 -static inline void 10.335 -shadow_update_cr3(struct vcpu *v) 10.336 -/* Updates all the things that are derived from the guest's CR3. 10.337 - * Called when the guest changes CR3. */ 10.338 +/* Update all the things that are derived from the guest's CR3. 10.339 + * Called when the guest changes CR3; the caller can then use 10.340 + * v->arch.cr3 as the value to load into the host CR3 to schedule this vcpu 10.341 + * and v->arch.hvm_vcpu.hw_cr3 as the value to put in the vmcb/vmcs when 10.342 + * entering the HVM guest. */ 10.343 +static inline void shadow_update_cr3(struct vcpu *v) 10.344 { 10.345 - shadow_lock(v->domain); 10.346 - v->arch.shadow.mode->update_cr3(v); 10.347 - shadow_unlock(v->domain); 10.348 + v->arch.shadow.mode->update_cr3(v, 1); 10.349 } 10.350 10.351 +/* Update all the things that are derived from the guest's CR0/CR3/CR4. 10.352 + * Called to initialize paging structures if the paging mode 10.353 + * has changed, and when bringing up a VCPU for the first time. */ 10.354 +void shadow_update_paging_modes(struct vcpu *v); 10.355 + 10.356 10.357 -/* Should be called after CR3 is updated. 10.358 - * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3. 10.359 - * 10.360 - * Also updates other state derived from CR3 (vcpu->arch.guest_vtable, 10.361 - * shadow_vtable, etc). 10.362 - * 10.363 - * Uses values found in vcpu->arch.(guest_table and guest_table_user), and 10.364 - * for HVM guests, arch.monitor_table and hvm's guest CR3. 10.365 - * 10.366 - * Update ref counts to shadow tables appropriately. 10.367 - */ 10.368 -static inline void update_cr3(struct vcpu *v) 10.369 -{ 10.370 - unsigned long cr3_mfn=0; 10.371 - 10.372 - if ( shadow_mode_enabled(v->domain) ) 10.373 - { 10.374 - shadow_update_cr3(v); 10.375 - return; 10.376 - } 10.377 - 10.378 -#if CONFIG_PAGING_LEVELS == 4 10.379 - if ( !(v->arch.flags & TF_kernel_mode) ) 10.380 - cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user); 10.381 - else 10.382 -#endif 10.383 - cr3_mfn = pagetable_get_pfn(v->arch.guest_table); 10.384 - 10.385 - make_cr3(v, cr3_mfn); 10.386 -} 10.387 +/***************************************************************************** 10.388 + * Access to the guest pagetables */ 10.389 10.390 -extern void sh_update_paging_modes(struct vcpu *v); 10.391 - 10.392 -/* Should be called to initialise paging structures if the paging mode 10.393 - * has changed, and when bringing up a VCPU for the first time. */ 10.394 -static inline void shadow_update_paging_modes(struct vcpu *v) 10.395 -{ 10.396 - ASSERT(shadow_mode_enabled(v->domain)); 10.397 - shadow_lock(v->domain); 10.398 - sh_update_paging_modes(v); 10.399 - shadow_unlock(v->domain); 10.400 -} 10.401 - 10.402 -static inline void 10.403 -shadow_detach_old_tables(struct vcpu *v) 10.404 -{ 10.405 - if ( v->arch.shadow.mode ) 10.406 - v->arch.shadow.mode->detach_old_tables(v); 10.407 -} 10.408 - 10.409 -static inline mfn_t 10.410 -shadow_make_monitor_table(struct vcpu *v) 10.411 -{ 10.412 - return v->arch.shadow.mode->make_monitor_table(v); 10.413 -} 10.414 - 10.415 -static inline void 10.416 -shadow_destroy_monitor_table(struct vcpu *v, mfn_t mmfn) 10.417 -{ 10.418 - v->arch.shadow.mode->destroy_monitor_table(v, mmfn); 10.419 -} 10.420 - 10.421 +/* Get a mapping of a PV guest's l1e for this virtual address. */ 10.422 static inline void * 10.423 guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn) 10.424 { 10.425 - if ( likely(!shadow_mode_translate(v->domain)) ) 10.426 - { 10.427 - l2_pgentry_t l2e; 10.428 - ASSERT(!shadow_mode_external(v->domain)); 10.429 - /* Find this l1e and its enclosing l1mfn in the linear map */ 10.430 - if ( __copy_from_user(&l2e, 10.431 - &__linear_l2_table[l2_linear_offset(addr)], 10.432 - sizeof(l2_pgentry_t)) != 0 ) 10.433 - return NULL; 10.434 - /* Check flags that it will be safe to read the l1e */ 10.435 - if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) 10.436 - != _PAGE_PRESENT ) 10.437 - return NULL; 10.438 - *gl1mfn = l2e_get_pfn(l2e); 10.439 - return &__linear_l1_table[l1_linear_offset(addr)]; 10.440 - } 10.441 + l2_pgentry_t l2e; 10.442 + 10.443 + if ( unlikely(shadow_mode_translate(v->domain)) ) 10.444 + return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn); 10.445 10.446 - return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn); 10.447 + /* Find this l1e and its enclosing l1mfn in the linear map */ 10.448 + if ( __copy_from_user(&l2e, 10.449 + &__linear_l2_table[l2_linear_offset(addr)], 10.450 + sizeof(l2_pgentry_t)) != 0 ) 10.451 + return NULL; 10.452 + /* Check flags that it will be safe to read the l1e */ 10.453 + if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) 10.454 + != _PAGE_PRESENT ) 10.455 + return NULL; 10.456 + *gl1mfn = l2e_get_pfn(l2e); 10.457 + return &__linear_l1_table[l1_linear_offset(addr)]; 10.458 } 10.459 10.460 +/* Pull down the mapping we got from guest_map_l1e() */ 10.461 static inline void 10.462 guest_unmap_l1e(struct vcpu *v, void *p) 10.463 { 10.464 @@ -487,6 +297,7 @@ guest_unmap_l1e(struct vcpu *v, void *p) 10.465 unmap_domain_page(p); 10.466 } 10.467 10.468 +/* Read the guest's l1e that maps this address. */ 10.469 static inline void 10.470 guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) 10.471 { 10.472 @@ -503,6 +314,8 @@ guest_get_eff_l1e(struct vcpu *v, unsign 10.473 v->arch.shadow.mode->guest_get_eff_l1e(v, addr, eff_l1e); 10.474 } 10.475 10.476 +/* Read the guest's l1e that maps this address, from the kernel-mode 10.477 + * pagetables. */ 10.478 static inline void 10.479 guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) 10.480 { 10.481 @@ -518,82 +331,36 @@ guest_get_eff_kern_l1e(struct vcpu *v, u 10.482 TOGGLE_MODE(); 10.483 } 10.484 10.485 - 10.486 -/* Validate a pagetable change from the guest and update the shadows. */ 10.487 -extern int shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 10.488 - void *new_guest_entry); 10.489 -extern int __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 10.490 - void *entry, u32 size); 10.491 - 10.492 -/* Update the shadows in response to a pagetable write from a HVM guest */ 10.493 -extern void shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 10.494 - void *entry, u32 size); 10.495 +/* Write a new value into the guest pagetable, and update the shadows 10.496 + * appropriately. Returns 0 if we page-faulted, 1 for success. */ 10.497 +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, 10.498 + intpte_t new, mfn_t gmfn); 10.499 10.500 -/* Remove all writeable mappings of a guest frame from the shadows. 10.501 - * Returns non-zero if we need to flush TLBs. 10.502 - * level and fault_addr desribe how we found this to be a pagetable; 10.503 - * level==0 means we have some other reason for revoking write access. */ 10.504 -extern int shadow_remove_write_access(struct vcpu *v, mfn_t readonly_mfn, 10.505 - unsigned int level, 10.506 - unsigned long fault_addr); 10.507 +/* Cmpxchg a new value into the guest pagetable, and update the shadows 10.508 + * appropriately. Returns 0 if we page-faulted, 1 if not. 10.509 + * N.B. caller should check the value of "old" to see if the 10.510 + * cmpxchg itself was successful. */ 10.511 +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, 10.512 + intpte_t *old, intpte_t new, mfn_t gmfn); 10.513 10.514 -/* Remove all mappings of the guest mfn from the shadows. 10.515 - * Returns non-zero if we need to flush TLBs. */ 10.516 -extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn); 10.517 - 10.518 -/* Remove all mappings from the shadows. */ 10.519 -extern void shadow_blow_tables(struct domain *d); 10.520 - 10.521 -void 10.522 -shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn); 10.523 -/* This is a HVM page that we thing is no longer a pagetable. 10.524 - * Unshadow it, and recursively unshadow pages that reference it. */ 10.525 +/* Remove all mappings of the guest page from the shadows. 10.526 + * This is called from common code. It does not flush TLBs. */ 10.527 +int sh_remove_all_mappings(struct vcpu *v, mfn_t target_mfn); 10.528 +static inline void 10.529 +shadow_drop_references(struct domain *d, struct page_info *p) 10.530 +{ 10.531 + /* See the comment about locking in sh_remove_all_mappings */ 10.532 + sh_remove_all_mappings(d->vcpu[0], _mfn(page_to_mfn(p))); 10.533 +} 10.534 10.535 /* Remove all shadows of the guest mfn. */ 10.536 -extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all); 10.537 +void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all); 10.538 static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn) 10.539 { 10.540 - int was_locked = shadow_locked_by_me(v->domain); 10.541 - if ( !was_locked ) 10.542 - shadow_lock(v->domain); 10.543 - sh_remove_shadows(v, gmfn, 0, 1); 10.544 - if ( !was_locked ) 10.545 - shadow_unlock(v->domain); 10.546 + /* See the comment about locking in sh_remove_shadows */ 10.547 + sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */); 10.548 } 10.549 10.550 -/* Add a page to a domain */ 10.551 -void 10.552 -shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, 10.553 - unsigned long mfn); 10.554 - 10.555 -/* Remove a page from a domain */ 10.556 -void 10.557 -shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, 10.558 - unsigned long mfn); 10.559 - 10.560 -/* 10.561 - * Allocation of shadow pages 10.562 - */ 10.563 - 10.564 -/* Return the minumum acceptable number of shadow pages a domain needs */ 10.565 -unsigned int shadow_min_acceptable_pages(struct domain *d); 10.566 - 10.567 -/* Set the pool of shadow pages to the required number of MB. 10.568 - * Input will be rounded up to at least min_acceptable_shadow_pages(). 10.569 - * Returns 0 for success, 1 for failure. */ 10.570 -unsigned int shadow_set_allocation(struct domain *d, 10.571 - unsigned int megabytes, 10.572 - int *preempted); 10.573 - 10.574 -/* Return the size of the shadow pool, rounded up to the nearest MB */ 10.575 -static inline unsigned int shadow_get_allocation(struct domain *d) 10.576 -{ 10.577 - unsigned int pg = d->arch.shadow.total_pages; 10.578 - return ((pg >> (20 - PAGE_SHIFT)) 10.579 - + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); 10.580 -} 10.581 - 10.582 - 10.583 /**************************************************************************/ 10.584 /* Guest physmap (p2m) support 10.585 * 10.586 @@ -602,8 +369,19 @@ static inline unsigned int shadow_get_al 10.587 * guests, so we steal the address space that would have normally 10.588 * been used by the read-only MPT map. 10.589 */ 10.590 +#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) 10.591 10.592 -#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) 10.593 +/* Add a page to a domain's p2m table */ 10.594 +void shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, 10.595 + unsigned long mfn); 10.596 + 10.597 +/* Remove a page from a domain's p2m table */ 10.598 +void shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, 10.599 + unsigned long mfn); 10.600 + 10.601 +/* Aliases, called from common code. */ 10.602 +#define guest_physmap_add_page shadow_guest_physmap_add_page 10.603 +#define guest_physmap_remove_page shadow_guest_physmap_remove_page 10.604 10.605 /* Read the current domain's P2M table. */ 10.606 static inline mfn_t sh_gfn_to_mfn_current(unsigned long gfn) 10.607 @@ -627,8 +405,8 @@ static inline mfn_t sh_gfn_to_mfn_curren 10.608 return _mfn(INVALID_MFN); 10.609 } 10.610 10.611 -/* Walk another domain's P2M table, mapping pages as we go */ 10.612 -extern mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); 10.613 +/* Read another domain's P2M table, mapping pages as we go */ 10.614 +mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); 10.615 10.616 /* General conversion function from gfn to mfn */ 10.617 static inline mfn_t 10.618 @@ -666,6 +444,7 @@ mmio_space(paddr_t gpa) 10.619 return !mfn_valid(mfn_x(sh_gfn_to_mfn_current(gfn))); 10.620 } 10.621 10.622 +/* Translate the frame number held in an l1e from guest to machine */ 10.623 static inline l1_pgentry_t 10.624 gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) 10.625 { 10.626 @@ -685,4 +464,3 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr 10.627 * indent-tabs-mode: nil 10.628 * End: 10.629 */ 10.630 -