ia64/xen-unstable
changeset 12564:2fd223c64fc6
[XEN] Pin l3 shadows of older x86_64 linux guests.
Older x86_64 linux kernels use one l4 table per cpu and context switch by
changing an l4 entry pointing to an l3 table. If we're shadowing them
we need to pin l3 shadows to stop them being torn down on every
context switch. (But don't do this for normal 64bit guests).
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
Older x86_64 linux kernels use one l4 table per cpu and context switch by
changing an l4 entry pointing to an l3 table. If we're shadowing them
we need to pin l3 shadows to stop them being torn down on every
context switch. (But don't do this for normal 64bit guests).
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author | Tim Deegan <Tim.Deegan@xensource.com> |
---|---|
date | Thu Nov 23 17:46:52 2006 +0000 (2006-11-23) |
parents | 47a8bb3cd123 |
children | cd40792968cb |
files | xen/arch/x86/domain.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/private.h xen/include/asm-x86/domain.h xen/include/asm-x86/shadow.h |
line diff
1.1 --- a/xen/arch/x86/domain.c Thu Nov 23 17:44:12 2006 +0000 1.2 +++ b/xen/arch/x86/domain.c Thu Nov 23 17:46:52 2006 +0000 1.3 @@ -219,7 +219,7 @@ int arch_domain_create(struct domain *d) 1.4 INIT_LIST_HEAD(&d->arch.shadow.freelists[i]); 1.5 INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist); 1.6 INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse); 1.7 - INIT_LIST_HEAD(&d->arch.shadow.toplevel_shadows); 1.8 + INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows); 1.9 1.10 if ( !is_idle_domain(d) ) 1.11 {
2.1 --- a/xen/arch/x86/mm/shadow/common.c Thu Nov 23 17:44:12 2006 +0000 2.2 +++ b/xen/arch/x86/mm/shadow/common.c Thu Nov 23 17:46:52 2006 +0000 2.3 @@ -495,6 +495,7 @@ void shadow_prealloc(struct domain *d, u 2.4 struct shadow_page_info *sp; 2.5 cpumask_t flushmask = CPU_MASK_NONE; 2.6 mfn_t smfn; 2.7 + int i; 2.8 2.9 if ( chunk_is_available(d, order) ) return; 2.10 2.11 @@ -503,9 +504,9 @@ void shadow_prealloc(struct domain *d, u 2.12 v = d->vcpu[0]; 2.13 ASSERT(v != NULL); 2.14 2.15 - /* Stage one: walk the list of top-level pages, unpinning them */ 2.16 + /* Stage one: walk the list of pinned pages, unpinning them */ 2.17 perfc_incrc(shadow_prealloc_1); 2.18 - list_for_each_backwards_safe(l, t, &d->arch.shadow.toplevel_shadows) 2.19 + list_for_each_backwards_safe(l, t, &d->arch.shadow.pinned_shadows) 2.20 { 2.21 sp = list_entry(l, struct shadow_page_info, list); 2.22 smfn = shadow_page_to_mfn(sp); 2.23 @@ -521,31 +522,24 @@ void shadow_prealloc(struct domain *d, u 2.24 * loaded in cr3 on some vcpu. Walk them, unhooking the non-Xen 2.25 * mappings. */ 2.26 perfc_incrc(shadow_prealloc_2); 2.27 - list_for_each_backwards_safe(l, t, &d->arch.shadow.toplevel_shadows) 2.28 - { 2.29 - sp = list_entry(l, struct shadow_page_info, list); 2.30 - smfn = shadow_page_to_mfn(sp); 2.31 - shadow_unhook_mappings(v, smfn); 2.32 - 2.33 - /* Remember to flush TLBs: we have removed shadow entries that 2.34 - * were in use by some vcpu(s). */ 2.35 - for_each_vcpu(d, v2) 2.36 + 2.37 + for_each_vcpu(d, v2) 2.38 + for ( i = 0 ; i < 4 ; i++ ) 2.39 { 2.40 - if ( pagetable_get_pfn(v2->arch.shadow_table[0]) == mfn_x(smfn) 2.41 - || pagetable_get_pfn(v2->arch.shadow_table[1]) == mfn_x(smfn) 2.42 - || pagetable_get_pfn(v2->arch.shadow_table[2]) == mfn_x(smfn) 2.43 - || pagetable_get_pfn(v2->arch.shadow_table[3]) == mfn_x(smfn) 2.44 - ) 2.45 + if ( !pagetable_is_null(v2->arch.shadow_table[i]) ) 2.46 + { 2.47 + shadow_unhook_mappings(v, 2.48 + pagetable_get_mfn(v2->arch.shadow_table[i])); 2.49 cpus_or(flushmask, v2->vcpu_dirty_cpumask, flushmask); 2.50 + 2.51 + /* See if that freed up a chunk of appropriate size */ 2.52 + if ( chunk_is_available(d, order) ) 2.53 + { 2.54 + flush_tlb_mask(flushmask); 2.55 + return; 2.56 + } 2.57 + } 2.58 } 2.59 - 2.60 - /* See if that freed up a chunk of appropriate size */ 2.61 - if ( chunk_is_available(d, order) ) 2.62 - { 2.63 - flush_tlb_mask(flushmask); 2.64 - return; 2.65 - } 2.66 - } 2.67 2.68 /* Nothing more we can do: all remaining shadows are of pages that 2.69 * hold Xen mappings for some vcpu. This can never happen. */ 2.70 @@ -558,52 +552,57 @@ void shadow_prealloc(struct domain *d, u 2.71 BUG(); 2.72 } 2.73 2.74 -#ifndef NDEBUG 2.75 -/* Deliberately free all the memory we can: this can be used to cause the 2.76 - * guest's pagetables to be re-shadowed if we suspect that the shadows 2.77 - * have somehow got out of sync */ 2.78 -static void shadow_blow_tables(unsigned char c) 2.79 +/* Deliberately free all the memory we can: this will tear down all of 2.80 + * this domain's shadows */ 2.81 +static void shadow_blow_tables(struct domain *d) 2.82 { 2.83 struct list_head *l, *t; 2.84 struct shadow_page_info *sp; 2.85 - struct domain *d; 2.86 - struct vcpu *v; 2.87 + struct vcpu *v = d->vcpu[0]; 2.88 mfn_t smfn; 2.89 - 2.90 + int i; 2.91 + 2.92 + /* Pass one: unpin all pinned pages */ 2.93 + list_for_each_backwards_safe(l,t, &d->arch.shadow.pinned_shadows) 2.94 + { 2.95 + sp = list_entry(l, struct shadow_page_info, list); 2.96 + smfn = shadow_page_to_mfn(sp); 2.97 + sh_unpin(v, smfn); 2.98 + } 2.99 + 2.100 + /* Second pass: unhook entries of in-use shadows */ 2.101 + for_each_vcpu(d, v) 2.102 + for ( i = 0 ; i < 4 ; i++ ) 2.103 + if ( !pagetable_is_null(v->arch.shadow_table[i]) ) 2.104 + shadow_unhook_mappings(v, 2.105 + pagetable_get_mfn(v->arch.shadow_table[i])); 2.106 + 2.107 + /* Make sure everyone sees the unshadowings */ 2.108 + flush_tlb_mask(d->domain_dirty_cpumask); 2.109 +} 2.110 + 2.111 + 2.112 +#ifndef NDEBUG 2.113 +/* Blow all shadows of all shadowed domains: this can be used to cause the 2.114 + * guest's pagetables to be re-shadowed if we suspect that the shadows 2.115 + * have somehow got out of sync */ 2.116 +static void shadow_blow_all_tables(unsigned char c) 2.117 +{ 2.118 + struct domain *d; 2.119 + printk("'%c' pressed -> blowing all shadow tables\n", c); 2.120 for_each_domain(d) 2.121 - { 2.122 - if ( shadow_mode_enabled(d) && (v = d->vcpu[0]) != NULL) 2.123 + if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL ) 2.124 { 2.125 shadow_lock(d); 2.126 - printk("Blowing shadow tables for domain %u\n", d->domain_id); 2.127 - 2.128 - /* Pass one: unpin all top-level pages */ 2.129 - list_for_each_backwards_safe(l,t, &d->arch.shadow.toplevel_shadows) 2.130 - { 2.131 - sp = list_entry(l, struct shadow_page_info, list); 2.132 - smfn = shadow_page_to_mfn(sp); 2.133 - sh_unpin(v, smfn); 2.134 - } 2.135 - 2.136 - /* Second pass: unhook entries of in-use shadows */ 2.137 - list_for_each_backwards_safe(l,t, &d->arch.shadow.toplevel_shadows) 2.138 - { 2.139 - sp = list_entry(l, struct shadow_page_info, list); 2.140 - smfn = shadow_page_to_mfn(sp); 2.141 - shadow_unhook_mappings(v, smfn); 2.142 - } 2.143 - 2.144 - /* Make sure everyone sees the unshadowings */ 2.145 - flush_tlb_mask(d->domain_dirty_cpumask); 2.146 + shadow_blow_tables(d); 2.147 shadow_unlock(d); 2.148 } 2.149 - } 2.150 } 2.151 2.152 /* Register this function in the Xen console keypress table */ 2.153 static __init int shadow_blow_tables_keyhandler_init(void) 2.154 { 2.155 - register_keyhandler('S', shadow_blow_tables, "reset shadow pagetables"); 2.156 + register_keyhandler('S', shadow_blow_all_tables,"reset shadow pagetables"); 2.157 return 0; 2.158 } 2.159 __initcall(shadow_blow_tables_keyhandler_init); 2.160 @@ -789,9 +788,9 @@ mfn_t 2.161 shadow_alloc_p2m_page(struct domain *d) 2.162 { 2.163 struct list_head *entry; 2.164 + struct page_info *pg; 2.165 mfn_t mfn; 2.166 void *p; 2.167 - int ok; 2.168 2.169 if ( list_empty(&d->arch.shadow.p2m_freelist) && 2.170 !shadow_alloc_p2m_pages(d) ) 2.171 @@ -799,9 +798,9 @@ shadow_alloc_p2m_page(struct domain *d) 2.172 entry = d->arch.shadow.p2m_freelist.next; 2.173 list_del(entry); 2.174 list_add_tail(entry, &d->arch.shadow.p2m_inuse); 2.175 - mfn = page_to_mfn(list_entry(entry, struct page_info, list)); 2.176 - ok = sh_get_ref(mfn, 0); 2.177 - ASSERT(ok); /* First sh_get_ref() can't possibly overflow */ 2.178 + pg = list_entry(entry, struct page_info, list); 2.179 + pg->count_info = 1; 2.180 + mfn = page_to_mfn(pg); 2.181 p = sh_map_domain_page(mfn); 2.182 clear_page(p); 2.183 sh_unmap_domain_page(p); 2.184 @@ -2067,37 +2066,32 @@ void sh_remove_shadows(struct vcpu *v, m 2.185 * This call to hash_foreach() looks dangerous but is in fact OK: each 2.186 * call will remove at most one shadow, and terminate immediately when 2.187 * it does remove it, so we never walk the hash after doing a deletion. */ 2.188 -#define DO_UNSHADOW(_type) do { \ 2.189 - t = (_type); \ 2.190 - smfn = shadow_hash_lookup(v, mfn_x(gmfn), t); \ 2.191 - if ( !sh_remove_shadow_via_pointer(v, smfn) && !fast ) \ 2.192 - hash_foreach(v, masks[t], callbacks, smfn); \ 2.193 -} while (0) 2.194 - 2.195 - /* Top-level shadows need to be unpinned */ 2.196 -#define DO_UNPIN(_type) do { \ 2.197 +#define DO_UNSHADOW(_type) do { \ 2.198 t = (_type); \ 2.199 smfn = shadow_hash_lookup(v, mfn_x(gmfn), t); \ 2.200 - if ( mfn_to_shadow_page(smfn)->pinned ) \ 2.201 + if ( sh_type_is_pinnable(v, t) ) \ 2.202 sh_unpin(v, smfn); \ 2.203 + else \ 2.204 + sh_remove_shadow_via_pointer(v, smfn); \ 2.205 + if ( (pg->count_info & PGC_page_table) && !fast ) \ 2.206 + hash_foreach(v, masks[t], callbacks, smfn); \ 2.207 } while (0) 2.208 2.209 if ( sh_flags & SHF_L1_32 ) DO_UNSHADOW(SH_type_l1_32_shadow); 2.210 - if ( sh_flags & SHF_L2_32 ) DO_UNPIN(SH_type_l2_32_shadow); 2.211 + if ( sh_flags & SHF_L2_32 ) DO_UNSHADOW(SH_type_l2_32_shadow); 2.212 #if CONFIG_PAGING_LEVELS >= 3 2.213 if ( sh_flags & SHF_L1_PAE ) DO_UNSHADOW(SH_type_l1_pae_shadow); 2.214 - if ( sh_flags & SHF_L2_PAE ) DO_UNPIN(SH_type_l2_pae_shadow); 2.215 - if ( sh_flags & SHF_L2H_PAE ) DO_UNPIN(SH_type_l2h_pae_shadow); 2.216 + if ( sh_flags & SHF_L2_PAE ) DO_UNSHADOW(SH_type_l2_pae_shadow); 2.217 + if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(SH_type_l2h_pae_shadow); 2.218 #if CONFIG_PAGING_LEVELS >= 4 2.219 if ( sh_flags & SHF_L1_64 ) DO_UNSHADOW(SH_type_l1_64_shadow); 2.220 if ( sh_flags & SHF_L2_64 ) DO_UNSHADOW(SH_type_l2_64_shadow); 2.221 if ( sh_flags & SHF_L3_64 ) DO_UNSHADOW(SH_type_l3_64_shadow); 2.222 - if ( sh_flags & SHF_L4_64 ) DO_UNPIN(SH_type_l4_64_shadow); 2.223 + if ( sh_flags & SHF_L4_64 ) DO_UNSHADOW(SH_type_l4_64_shadow); 2.224 #endif 2.225 #endif 2.226 2.227 #undef DO_UNSHADOW 2.228 -#undef DO_UNPIN 2.229 2.230 /* If that didn't catch the shadows, something is wrong */ 2.231 if ( !fast && (pg->count_info & PGC_page_table) ) 2.232 @@ -2393,6 +2387,12 @@ int shadow_enable(struct domain *d, u32 2.233 goto out; 2.234 } 2.235 2.236 +#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 2.237 + /* We assume we're dealing with an older 64bit linux guest until we 2.238 + * see the guest use more than one l4 per vcpu. */ 2.239 + d->arch.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL; 2.240 +#endif 2.241 + 2.242 /* Update the bits */ 2.243 sh_new_mode(d, mode); 2.244 shadow_audit_p2m(d); 2.245 @@ -2831,18 +2831,10 @@ static int shadow_log_dirty_op( 2.246 2.247 if ( clean ) 2.248 { 2.249 - struct list_head *l, *t; 2.250 - struct shadow_page_info *sp; 2.251 - 2.252 /* Need to revoke write access to the domain's pages again. 2.253 * In future, we'll have a less heavy-handed approach to this, 2.254 * but for now, we just unshadow everything except Xen. */ 2.255 - list_for_each_safe(l, t, &d->arch.shadow.toplevel_shadows) 2.256 - { 2.257 - sp = list_entry(l, struct shadow_page_info, list); 2.258 - if ( d->vcpu[0] != NULL ) 2.259 - shadow_unhook_mappings(d->vcpu[0], shadow_page_to_mfn(sp)); 2.260 - } 2.261 + shadow_blow_tables(d); 2.262 2.263 d->arch.shadow.fault_count = 0; 2.264 d->arch.shadow.dirty_count = 0;
3.1 --- a/xen/arch/x86/mm/shadow/multi.c Thu Nov 23 17:44:12 2006 +0000 3.2 +++ b/xen/arch/x86/mm/shadow/multi.c Thu Nov 23 17:46:52 2006 +0000 3.3 @@ -964,7 +964,7 @@ static int shadow_set_l4e(struct vcpu *v 3.4 shadow_l4e_t new_sl4e, 3.5 mfn_t sl4mfn) 3.6 { 3.7 - int flags = 0; 3.8 + int flags = 0, ok; 3.9 shadow_l4e_t old_sl4e; 3.10 paddr_t paddr; 3.11 ASSERT(sl4e != NULL); 3.12 @@ -976,12 +976,19 @@ static int shadow_set_l4e(struct vcpu *v 3.13 | (((unsigned long)sl4e) & ~PAGE_MASK)); 3.14 3.15 if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT ) 3.16 + { 3.17 /* About to install a new reference */ 3.18 - if ( !sh_get_ref(shadow_l4e_get_mfn(new_sl4e), paddr) ) 3.19 + mfn_t sl3mfn = shadow_l4e_get_mfn(new_sl4e); 3.20 + ok = sh_get_ref(v, sl3mfn, paddr); 3.21 + /* Are we pinning l3 shadows to handle wierd linux behaviour? */ 3.22 + if ( sh_type_is_pinnable(v, SH_type_l3_64_shadow) ) 3.23 + ok |= sh_pin(v, sl3mfn); 3.24 + if ( !ok ) 3.25 { 3.26 domain_crash(v->domain); 3.27 return SHADOW_SET_ERROR; 3.28 } 3.29 + } 3.30 3.31 /* Write the new entry */ 3.32 shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn); 3.33 @@ -1020,7 +1027,7 @@ static int shadow_set_l3e(struct vcpu *v 3.34 3.35 if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT ) 3.36 /* About to install a new reference */ 3.37 - if ( !sh_get_ref(shadow_l3e_get_mfn(new_sl3e), paddr) ) 3.38 + if ( !sh_get_ref(v, shadow_l3e_get_mfn(new_sl3e), paddr) ) 3.39 { 3.40 domain_crash(v->domain); 3.41 return SHADOW_SET_ERROR; 3.42 @@ -1076,7 +1083,7 @@ static int shadow_set_l2e(struct vcpu *v 3.43 3.44 if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT ) 3.45 /* About to install a new reference */ 3.46 - if ( !sh_get_ref(shadow_l2e_get_mfn(new_sl2e), paddr) ) 3.47 + if ( !sh_get_ref(v, shadow_l2e_get_mfn(new_sl2e), paddr) ) 3.48 { 3.49 domain_crash(v->domain); 3.50 return SHADOW_SET_ERROR; 3.51 @@ -1361,8 +1368,6 @@ do { 3.52 /**************************************************************************/ 3.53 /* Functions to install Xen mappings and linear mappings in shadow pages */ 3.54 3.55 -static mfn_t sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type); 3.56 - 3.57 // XXX -- this function should probably be moved to shadow-common.c, but that 3.58 // probably wants to wait until the shadow types have been moved from 3.59 // shadow-types.h to shadow-private.h 3.60 @@ -1547,6 +1552,44 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf 3.61 /* Lower-level shadow, not yet linked form a higher level */ 3.62 mfn_to_shadow_page(smfn)->up = 0; 3.63 3.64 +#if GUEST_PAGING_LEVELS == 4 3.65 +#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 3.66 + if ( shadow_type == SH_type_l4_64_shadow && 3.67 + unlikely(v->domain->arch.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) ) 3.68 + { 3.69 + /* We're shadowing a new l4, but we've been assuming the guest uses 3.70 + * only one l4 per vcpu and context switches using an l4 entry. 3.71 + * Count the number of active l4 shadows. If there are enough 3.72 + * of them, decide that this isn't an old linux guest, and stop 3.73 + * pinning l3es. This is not very quick but it doesn't happen 3.74 + * very often. */ 3.75 + struct list_head *l, *t; 3.76 + struct shadow_page_info *sp; 3.77 + struct vcpu *v2; 3.78 + int l4count = 0, vcpus = 0; 3.79 + list_for_each(l, &v->domain->arch.shadow.pinned_shadows) 3.80 + { 3.81 + sp = list_entry(l, struct shadow_page_info, list); 3.82 + if ( sp->type == SH_type_l4_64_shadow ) 3.83 + l4count++; 3.84 + } 3.85 + for_each_vcpu ( v->domain, v2 ) 3.86 + vcpus++; 3.87 + if ( l4count > 2 * vcpus ) 3.88 + { 3.89 + /* Unpin all the pinned l3 tables, and don't pin any more. */ 3.90 + list_for_each_safe(l, t, &v->domain->arch.shadow.pinned_shadows) 3.91 + { 3.92 + sp = list_entry(l, struct shadow_page_info, list); 3.93 + if ( sp->type == SH_type_l3_64_shadow ) 3.94 + sh_unpin(v, shadow_page_to_mfn(sp)); 3.95 + } 3.96 + v->domain->arch.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL; 3.97 + } 3.98 + } 3.99 +#endif 3.100 +#endif 3.101 + 3.102 // Create the Xen mappings... 3.103 if ( !shadow_mode_external(v->domain) ) 3.104 { 3.105 @@ -1893,9 +1936,6 @@ void sh_destroy_l4_shadow(struct vcpu *v 3.106 gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer); 3.107 delete_shadow_status(v, gmfn, t, smfn); 3.108 shadow_demote(v, gmfn, t); 3.109 - /* Take this shadow off the list of root shadows */ 3.110 - list_del_init(&mfn_to_shadow_page(smfn)->list); 3.111 - 3.112 /* Decrement refcounts of all the old entries */ 3.113 xen_mappings = (!shadow_mode_external(v->domain)); 3.114 sl4mfn = smfn; 3.115 @@ -1903,8 +1943,8 @@ void sh_destroy_l4_shadow(struct vcpu *v 3.116 if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT ) 3.117 { 3.118 sh_put_ref(v, shadow_l4e_get_mfn(*sl4e), 3.119 - (((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) 3.120 - | ((unsigned long)sl4e & ~PAGE_MASK)); 3.121 + (((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) 3.122 + | ((unsigned long)sl4e & ~PAGE_MASK)); 3.123 } 3.124 }); 3.125 3.126 @@ -1958,10 +1998,6 @@ void sh_destroy_l2_shadow(struct vcpu *v 3.127 gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer); 3.128 delete_shadow_status(v, gmfn, t, smfn); 3.129 shadow_demote(v, gmfn, t); 3.130 -#if (GUEST_PAGING_LEVELS == 2) || (GUEST_PAGING_LEVELS == 3) 3.131 - /* Take this shadow off the list of root shadows */ 3.132 - list_del_init(&mfn_to_shadow_page(smfn)->list); 3.133 -#endif 3.134 3.135 /* Decrement refcounts of all the old entries */ 3.136 sl2mfn = smfn; 3.137 @@ -3276,13 +3312,7 @@ sh_set_toplevel_shadow(struct vcpu *v, 3.138 3.139 /* Guest mfn is valid: shadow it and install the shadow */ 3.140 smfn = get_shadow_status(v, gmfn, root_type); 3.141 - if ( valid_mfn(smfn) ) 3.142 - { 3.143 - /* Pull this root shadow out of the list of roots (we will put 3.144 - * it back in at the head). */ 3.145 - list_del(&mfn_to_shadow_page(smfn)->list); 3.146 - } 3.147 - else 3.148 + if ( !valid_mfn(smfn) ) 3.149 { 3.150 /* Make sure there's enough free shadow memory. */ 3.151 shadow_prealloc(d, SHADOW_MAX_ORDER); 3.152 @@ -3298,17 +3328,15 @@ sh_set_toplevel_shadow(struct vcpu *v, 3.153 #endif 3.154 3.155 /* Pin the shadow and put it (back) on the list of top-level shadows */ 3.156 - if ( sh_pin(smfn) ) 3.157 - list_add(&mfn_to_shadow_page(smfn)->list, 3.158 - &d->arch.shadow.toplevel_shadows); 3.159 - else 3.160 + if ( sh_pin(v, smfn) == 0 ) 3.161 { 3.162 SHADOW_ERROR("can't pin %#lx as toplevel shadow\n", mfn_x(smfn)); 3.163 domain_crash(v->domain); 3.164 - } 3.165 - 3.166 - /* Take a ref to this page: it will be released in sh_detach_old_tables. */ 3.167 - if ( !sh_get_ref(smfn, 0) ) 3.168 + } 3.169 + 3.170 + /* Take a ref to this page: it will be released in sh_detach_old_tables() 3.171 + * or the next call to set_toplevel_shadow() */ 3.172 + if ( !sh_get_ref(v, smfn, 0) ) 3.173 { 3.174 SHADOW_ERROR("can't install %#lx as toplevel shadow\n", mfn_x(smfn)); 3.175 domain_crash(v->domain);
4.1 --- a/xen/arch/x86/mm/shadow/private.h Thu Nov 23 17:44:12 2006 +0000 4.2 +++ b/xen/arch/x86/mm/shadow/private.h Thu Nov 23 17:46:52 2006 +0000 4.3 @@ -157,9 +157,11 @@ struct shadow_page_info 4.4 } __attribute__((packed)); 4.5 union { 4.6 /* For unused shadow pages, a list of pages of this order; 4.7 - * for top-level shadows, a list of other top-level shadows */ 4.8 + * for pinnable shadows, if pinned, a list of other pinned shadows 4.9 + * (see sh_type_is_pinnable() below for the definition of 4.10 + * "pinnable" shadow types). */ 4.11 struct list_head list; 4.12 - /* For lower-level shadows, a higher entry that points at us */ 4.13 + /* For non-pinnable shadows, a higher entry that points at us */ 4.14 paddr_t up; 4.15 }; 4.16 }; 4.17 @@ -195,6 +197,36 @@ static inline void shadow_check_page_str 4.18 #define SH_type_monitor_table (14U) /* in use as a monitor table */ 4.19 #define SH_type_unused (15U) 4.20 4.21 +/* 4.22 + * What counts as a pinnable shadow? 4.23 + */ 4.24 + 4.25 +static inline int sh_type_is_pinnable(struct vcpu *v, unsigned int t) 4.26 +{ 4.27 + /* Top-level shadow types in each mode can be pinned, so that they 4.28 + * persist even when not currently in use in a guest CR3 */ 4.29 + if ( t == SH_type_l2_32_shadow 4.30 + || t == SH_type_l2_pae_shadow 4.31 + || t == SH_type_l2h_pae_shadow 4.32 + || t == SH_type_l4_64_shadow ) 4.33 + return 1; 4.34 + 4.35 +#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 4.36 + /* Early 64-bit linux used three levels of pagetables for the guest 4.37 + * and context switched by changing one l4 entry in a per-cpu l4 4.38 + * page. When we're shadowing those kernels, we have to pin l3 4.39 + * shadows so they don't just evaporate on every context switch. 4.40 + * For all other guests, we'd rather use the up-pointer field in l3s. */ 4.41 + if ( unlikely((v->domain->arch.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) 4.42 + && CONFIG_PAGING_LEVELS >= 4 4.43 + && t == SH_type_l3_64_shadow) ) 4.44 + return 1; 4.45 +#endif 4.46 + 4.47 + /* Everything else is not pinnable, and can use the "up" pointer */ 4.48 + return 0; 4.49 +} 4.50 + 4.51 /* 4.52 * Definitions for the shadow_flags field in page_info. 4.53 * These flags are stored on *guest* pages... 4.54 @@ -364,7 +396,7 @@ void sh_destroy_shadow(struct vcpu *v, m 4.55 * and the physical address of the shadow entry that holds the ref (or zero 4.56 * if the ref is held by something else). 4.57 * Returns 0 for failure, 1 for success. */ 4.58 -static inline int sh_get_ref(mfn_t smfn, paddr_t entry_pa) 4.59 +static inline int sh_get_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa) 4.60 { 4.61 u32 x, nx; 4.62 struct shadow_page_info *sp = mfn_to_shadow_page(smfn); 4.63 @@ -385,7 +417,9 @@ static inline int sh_get_ref(mfn_t smfn, 4.64 sp->count = nx; 4.65 4.66 /* We remember the first shadow entry that points to each shadow. */ 4.67 - if ( entry_pa != 0 && sp->up == 0 ) 4.68 + if ( entry_pa != 0 4.69 + && sh_type_is_pinnable(v, sp->type) 4.70 + && sp->up == 0 ) 4.71 sp->up = entry_pa; 4.72 4.73 return 1; 4.74 @@ -403,7 +437,9 @@ static inline void sh_put_ref(struct vcp 4.75 ASSERT(sp->mbz == 0); 4.76 4.77 /* If this is the entry in the up-pointer, remove it */ 4.78 - if ( entry_pa != 0 && sp->up == entry_pa ) 4.79 + if ( entry_pa != 0 4.80 + && sh_type_is_pinnable(v, sp->type) 4.81 + && sp->up == entry_pa ) 4.82 sp->up = 0; 4.83 4.84 x = sp->count; 4.85 @@ -424,33 +460,48 @@ static inline void sh_put_ref(struct vcp 4.86 } 4.87 4.88 4.89 -/* Pin a shadow page: take an extra refcount and set the pin bit. 4.90 +/* Pin a shadow page: take an extra refcount, set the pin bit, 4.91 + * and put the shadow at the head of the list of pinned shadows. 4.92 * Returns 0 for failure, 1 for success. */ 4.93 -static inline int sh_pin(mfn_t smfn) 4.94 +static inline int sh_pin(struct vcpu *v, mfn_t smfn) 4.95 { 4.96 struct shadow_page_info *sp; 4.97 4.98 ASSERT(mfn_valid(smfn)); 4.99 sp = mfn_to_shadow_page(smfn); 4.100 - if ( !(sp->pinned) ) 4.101 + ASSERT(sh_type_is_pinnable(v, sp->type)); 4.102 + if ( sp->pinned ) 4.103 { 4.104 - if ( !sh_get_ref(smfn, 0) ) 4.105 + /* Already pinned: take it out of the pinned-list so it can go 4.106 + * at the front */ 4.107 + list_del(&sp->list); 4.108 + } 4.109 + else 4.110 + { 4.111 + /* Not pinned: pin it! */ 4.112 + if ( !sh_get_ref(v, smfn, 0) ) 4.113 return 0; 4.114 sp->pinned = 1; 4.115 } 4.116 + /* Put it at the head of the list of pinned shadows */ 4.117 + list_add(&sp->list, &v->domain->arch.shadow.pinned_shadows); 4.118 return 1; 4.119 } 4.120 4.121 -/* Unpin a shadow page: unset the pin bit and release the extra ref. */ 4.122 +/* Unpin a shadow page: unset the pin bit, take the shadow off the list 4.123 + * of pinned shadows, and release the extra ref. */ 4.124 static inline void sh_unpin(struct vcpu *v, mfn_t smfn) 4.125 { 4.126 struct shadow_page_info *sp; 4.127 4.128 ASSERT(mfn_valid(smfn)); 4.129 sp = mfn_to_shadow_page(smfn); 4.130 + ASSERT(sh_type_is_pinnable(v, sp->type)); 4.131 if ( sp->pinned ) 4.132 { 4.133 sp->pinned = 0; 4.134 + list_del(&sp->list); 4.135 + sp->up = 0; /* in case this stops being a pinnable type in future */ 4.136 sh_put_ref(v, smfn, 0); 4.137 } 4.138 }
5.1 --- a/xen/include/asm-x86/domain.h Thu Nov 23 17:44:12 2006 +0000 5.2 +++ b/xen/include/asm-x86/domain.h Thu Nov 23 17:46:52 2006 +0000 5.3 @@ -65,10 +65,11 @@ struct shadow_domain { 5.4 struct list_head freelists[SHADOW_MAX_ORDER + 1]; 5.5 struct list_head p2m_freelist; 5.6 struct list_head p2m_inuse; 5.7 - struct list_head toplevel_shadows; 5.8 + struct list_head pinned_shadows; 5.9 unsigned int total_pages; /* number of pages allocated */ 5.10 unsigned int free_pages; /* number of pages on freelists */ 5.11 unsigned int p2m_pages; /* number of pages in p2m map */ 5.12 + unsigned int opt_flags; /* runtime tunable optimizations on/off */ 5.13 5.14 /* Shadow hashtable */ 5.15 struct shadow_page_info **hash_table;
6.1 --- a/xen/include/asm-x86/shadow.h Thu Nov 23 17:44:12 2006 +0000 6.2 +++ b/xen/include/asm-x86/shadow.h Thu Nov 23 17:46:52 2006 +0000 6.3 @@ -158,8 +158,9 @@ extern int shadow_audit_enable; 6.4 #define SHOPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */ 6.5 #define SHOPT_FAST_FAULT_PATH 0x04 /* Fast-path MMIO and not-present */ 6.6 #define SHOPT_PREFETCH 0x08 /* Shadow multiple entries per fault */ 6.7 +#define SHOPT_LINUX_L3_TOPLEVEL 0x10 /* Pin l3es on early 64bit linux */ 6.8 6.9 -#define SHADOW_OPTIMIZATIONS 0x0f 6.10 +#define SHADOW_OPTIMIZATIONS 0x1f 6.11 6.12 6.13 /* With shadow pagetables, the different kinds of address start 6.14 @@ -594,24 +595,6 @@ static inline unsigned int shadow_get_al 6.15 + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); 6.16 } 6.17 6.18 -#if SHADOW_OPTIMIZATIONS & SHOPT_CACHE_WALKS 6.19 -/* Optimization: cache the results of guest walks. This helps with MMIO 6.20 - * and emulated writes, which tend to issue very similar walk requests 6.21 - * repeatedly. We keep the results of the last few walks, and blow 6.22 - * away the cache on guest cr3 write, mode change, or page fault. */ 6.23 - 6.24 -#define SH_WALK_CACHE_ENTRIES 4 6.25 - 6.26 -/* Rather than cache a guest walk, which would include mapped pointers 6.27 - * to pages, we cache what a TLB would remember about the walk: the 6.28 - * permissions and the l1 gfn */ 6.29 -struct shadow_walk_cache { 6.30 - unsigned long va; /* The virtual address (or 0 == unused) */ 6.31 - unsigned long gfn; /* The gfn from the effective l1e */ 6.32 - u32 permissions; /* The aggregated permission bits */ 6.33 -}; 6.34 -#endif 6.35 - 6.36 6.37 /**************************************************************************/ 6.38 /* Guest physmap (p2m) support