ia64/xen-unstable
changeset 12204:9b553a9787cf
Merge.
Signed-off-by: Ewan Mellor <ewan@xensource.com>
Signed-off-by: Ewan Mellor <ewan@xensource.com>
author | Ewan Mellor <ewan@xensource.com> |
---|---|
date | Wed Nov 01 10:41:44 2006 +0000 (2006-11-01) |
parents | 33e9c88aab02 0b6f49d25d4f |
children | 444496ecb14e |
files |
line diff
1.1 --- a/xen/arch/x86/mm/shadow/common.c Wed Nov 01 10:40:46 2006 +0000 1.2 +++ b/xen/arch/x86/mm/shadow/common.c Wed Nov 01 10:41:44 2006 +0000 1.3 @@ -1327,8 +1327,18 @@ static void sh_hash_audit_bucket(struct 1.4 && e->t != (PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift) 1.5 && e->t != (PGC_SH_fl1_64_shadow >> PGC_SH_type_shift) ) 1.6 { 1.7 + struct page_info *gpg = mfn_to_page(_mfn(e->n)); 1.8 /* Bad shadow flags on guest page? */ 1.9 - BUG_ON( !(mfn_to_page(_mfn(e->n))->shadow_flags & (1<<e->t)) ); 1.10 + BUG_ON( !(gpg->shadow_flags & (1<<e->t)) ); 1.11 + /* Bad type count on guest page? */ 1.12 + if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page 1.13 + && (gpg->u.inuse.type_info & PGT_count_mask) != 0 ) 1.14 + { 1.15 + SHADOW_ERROR("MFN %#"SH_PRI_mfn" shadowed (by %#"SH_PRI_mfn")" 1.16 + " but has typecount %#lx\n", 1.17 + e->n, mfn_x(e->smfn), gpg->u.inuse.type_info); 1.18 + BUG(); 1.19 + } 1.20 } 1.21 /* That entry was OK; on we go */ 1.22 e = e->next;
2.1 --- a/xen/arch/x86/mm/shadow/multi.c Wed Nov 01 10:40:46 2006 +0000 2.2 +++ b/xen/arch/x86/mm/shadow/multi.c Wed Nov 01 10:41:44 2006 +0000 2.3 @@ -36,10 +36,7 @@ 2.4 #include "private.h" 2.5 #include "types.h" 2.6 2.7 -/* The first cut: an absolutely synchronous, trap-and-emulate version, 2.8 - * supporting only HVM guests (and so only "external" shadow mode). 2.9 - * 2.10 - * THINGS TO DO LATER: 2.11 +/* THINGS TO DO LATER: 2.12 * 2.13 * TEARDOWN HEURISTICS 2.14 * Also: have a heuristic for when to destroy a previous paging-mode's 2.15 @@ -56,14 +53,6 @@ 2.16 * l3-and-l2h-only shadow mode for PAE PV guests that would allow them 2.17 * to share l2h pages again. 2.18 * 2.19 - * PAE L3 COPYING 2.20 - * In this code, we copy all 32 bytes of a PAE L3 every time we change an 2.21 - * entry in it, and every time we change CR3. We copy it for the linear 2.22 - * mappings (ugh! PAE linear mappings) and we copy it to the low-memory 2.23 - * buffer so it fits in CR3. Maybe we can avoid some of this recopying 2.24 - * by using the shadow directly in some places. 2.25 - * Also, for SMP, need to actually respond to seeing shadow.pae_flip_pending. 2.26 - * 2.27 * GUEST_WALK_TABLES TLB FLUSH COALESCE 2.28 * guest_walk_tables can do up to three remote TLB flushes as it walks to 2.29 * the first l1 of a new pagetable. Should coalesce the flushes to the end, 2.30 @@ -99,9 +88,6 @@ static char *fetch_type_names[] = { 2.31 }; 2.32 #endif 2.33 2.34 -/* XXX forward declarations */ 2.35 -static inline void sh_update_linear_entries(struct vcpu *v); 2.36 - 2.37 /**************************************************************************/ 2.38 /* Hash table mapping from guest pagetables to shadows 2.39 * 2.40 @@ -460,16 +446,20 @@ static u32 guest_set_ad_bits(struct vcpu 2.41 u32 flags; 2.42 int res = 0; 2.43 2.44 + ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1))); 2.45 + ASSERT(level <= GUEST_PAGING_LEVELS); 2.46 + ASSERT(shadow_lock_is_acquired(v->domain)); 2.47 + 2.48 + flags = guest_l1e_get_flags(*ep); 2.49 + 2.50 + /* Only set A and D bits for guest-initiated accesses */ 2.51 + if ( !(ft & FETCH_TYPE_DEMAND) ) 2.52 + return flags; 2.53 + 2.54 ASSERT(valid_mfn(gmfn) 2.55 && (sh_mfn_is_a_page_table(gmfn) 2.56 || ((mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) 2.57 == 0))); 2.58 - ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1))); 2.59 - ASSERT(level <= GUEST_PAGING_LEVELS); 2.60 - ASSERT(ft == ft_demand_read || ft == ft_demand_write); 2.61 - ASSERT(shadow_lock_is_acquired(v->domain)); 2.62 - 2.63 - flags = guest_l1e_get_flags(*ep); 2.64 2.65 /* PAE l3s do not have A and D bits */ 2.66 ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); 2.67 @@ -496,12 +486,20 @@ static u32 guest_set_ad_bits(struct vcpu 2.68 /* Set the bit(s) */ 2.69 sh_mark_dirty(v->domain, gmfn); 2.70 SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", " 2.71 - "old flags = %#x, new flags = %#x\n", 2.72 - gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep), flags); 2.73 + "old flags = %#x, new flags = %#x\n", 2.74 + gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep), 2.75 + flags); 2.76 *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags); 2.77 2.78 - /* Propagate this change to any existing shadows */ 2.79 - res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep)); 2.80 + /* Propagate this change to any other shadows of the page 2.81 + * (only necessary if there is more than one shadow) */ 2.82 + if ( mfn_to_page(gmfn)->count_info & PGC_page_table ) 2.83 + { 2.84 + u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask; 2.85 + /* More than one type bit set in shadow-flags? */ 2.86 + if ( shflags & ~(1UL << find_first_set_bit(shflags)) ) 2.87 + res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep)); 2.88 + } 2.89 2.90 /* We should never need to flush the TLB or recopy PAE entries */ 2.91 ASSERT((res == 0) || (res == SHADOW_SET_CHANGED)); 2.92 @@ -637,79 +635,70 @@ shadow_l4_index(mfn_t *smfn, u32 guest_i 2.93 2.94 2.95 /**************************************************************************/ 2.96 -/* Functions which compute shadow entries from their corresponding guest 2.97 - * entries. 2.98 - * 2.99 - * These are the "heart" of the shadow code. 2.100 - * 2.101 - * There are two sets of these: those that are called on demand faults (read 2.102 - * faults and write faults), and those that are essentially called to 2.103 - * "prefetch" (or propagate) entries from the guest into the shadow. The read 2.104 - * fault and write fault are handled as two separate cases for L1 entries (due 2.105 - * to the _PAGE_DIRTY bit handling), but for L[234], they are grouped together 2.106 - * into the respective demand_fault functions. 2.107 +/* Function which computes shadow entries from their corresponding guest 2.108 + * entries. This is the "heart" of the shadow code. It operates using 2.109 + * level-1 shadow types, but handles all levels of entry. 2.110 + * Don't call it directly, but use the four wrappers below. 2.111 */ 2.112 -// The function below tries to capture all of the flag manipulation for the 2.113 -// demand and propagate functions into one place. 2.114 -// 2.115 -static always_inline u32 2.116 -sh_propagate_flags(struct vcpu *v, mfn_t target_mfn, 2.117 - u32 gflags, guest_l1e_t *guest_entry_ptr, mfn_t gmfn, 2.118 - int mmio, int level, fetch_type_t ft) 2.119 + 2.120 +static always_inline void 2.121 +_sh_propagate(struct vcpu *v, 2.122 + void *guest_entry_ptr, 2.123 + mfn_t guest_table_mfn, 2.124 + mfn_t target_mfn, 2.125 + void *shadow_entry_ptr, 2.126 + int level, 2.127 + fetch_type_t ft, 2.128 + int mmio) 2.129 { 2.130 -#define CHECK(_cond) \ 2.131 -do { \ 2.132 - if (unlikely(!(_cond))) \ 2.133 - { \ 2.134 - printk("%s %s %d ASSERTION (%s) FAILED\n", \ 2.135 - __func__, __FILE__, __LINE__, #_cond); \ 2.136 - domain_crash(d); \ 2.137 - } \ 2.138 -} while (0); 2.139 - 2.140 + guest_l1e_t *gp = guest_entry_ptr; 2.141 + shadow_l1e_t *sp = shadow_entry_ptr; 2.142 struct domain *d = v->domain; 2.143 u32 pass_thru_flags; 2.144 - u32 sflags; 2.145 + u32 gflags, sflags; 2.146 2.147 /* We don't shadow PAE l3s */ 2.148 ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); 2.149 2.150 - // XXX -- might want to think about PAT support for HVM guests... 2.151 - 2.152 -#ifndef NDEBUG 2.153 - // MMIO can only occur from L1e's 2.154 - // 2.155 - if ( mmio ) 2.156 - CHECK(level == 1); 2.157 - 2.158 - // We should always have a pointer to the guest entry if it's a non-PSE 2.159 - // non-MMIO demand access. 2.160 - if ( ft & FETCH_TYPE_DEMAND ) 2.161 - CHECK(guest_entry_ptr || level == 1); 2.162 -#endif 2.163 - 2.164 - // A not-present guest entry has a special signature in the shadow table, 2.165 - // so that we do not have to consult the guest tables multiple times... 2.166 - // 2.167 + if ( valid_mfn(guest_table_mfn) ) 2.168 + /* Handle A and D bit propagation into the guest */ 2.169 + gflags = guest_set_ad_bits(v, guest_table_mfn, gp, level, ft); 2.170 + else 2.171 + { 2.172 + /* Must be an fl1e or a prefetch */ 2.173 + ASSERT(level==1 || !(ft & FETCH_TYPE_DEMAND)); 2.174 + gflags = guest_l1e_get_flags(*gp); 2.175 + } 2.176 + 2.177 if ( unlikely(!(gflags & _PAGE_PRESENT)) ) 2.178 - return _PAGE_SHADOW_GUEST_NOT_PRESENT; 2.179 - 2.180 - // Must have a valid target_mfn, unless this is mmio, or unless this is a 2.181 - // prefetch. In the case of a prefetch, an invalid mfn means that we can 2.182 - // not usefully shadow anything, and so we return early. 2.183 + { 2.184 + /* If a guest l1 entry is not present, shadow with the magic 2.185 + * guest-not-present entry. */ 2.186 + if ( level == 1 ) 2.187 + *sp = sh_l1e_gnp(); 2.188 + else 2.189 + *sp = shadow_l1e_empty(); 2.190 + goto done; 2.191 + } 2.192 + 2.193 + if ( level == 1 && mmio ) 2.194 + { 2.195 + /* Guest l1e maps MMIO space */ 2.196 + *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags); 2.197 + goto done; 2.198 + } 2.199 + 2.200 + // Must have a valid target_mfn, unless this is a prefetch. In the 2.201 + // case of a prefetch, an invalid mfn means that we can not usefully 2.202 + // shadow anything, and so we return early. 2.203 // 2.204 if ( !valid_mfn(target_mfn) ) 2.205 { 2.206 - CHECK((ft == ft_prefetch) || mmio); 2.207 - if ( !mmio ) 2.208 - return 0; 2.209 + ASSERT((ft == ft_prefetch)); 2.210 + *sp = shadow_l1e_empty(); 2.211 + goto done; 2.212 } 2.213 2.214 - // Set the A and D bits in the guest entry, if we need to. 2.215 - if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) ) 2.216 - gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft); 2.217 - 2.218 - 2.219 // Propagate bits from the guest to the shadow. 2.220 // Some of these may be overwritten, below. 2.221 // Since we know the guest's PRESENT bit is set, we also set the shadow's 2.222 @@ -719,12 +708,7 @@ do { 2.223 _PAGE_RW | _PAGE_PRESENT); 2.224 if ( guest_supports_nx(v) ) 2.225 pass_thru_flags |= _PAGE_NX_BIT; 2.226 - sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT; 2.227 - 2.228 - // Copy the guest's RW bit into the SHADOW_RW bit. 2.229 - // 2.230 - if ( gflags & _PAGE_RW ) 2.231 - sflags |= _PAGE_SHADOW_RW; 2.232 + sflags = gflags & pass_thru_flags; 2.233 2.234 // Set the A&D bits for higher level shadows. 2.235 // Higher level entries do not, strictly speaking, have dirty bits, but 2.236 @@ -750,49 +734,35 @@ do { 2.237 && !(gflags & _PAGE_DIRTY)) ) 2.238 sflags &= ~_PAGE_RW; 2.239 2.240 - // MMIO caching 2.241 + // shadow_mode_log_dirty support 2.242 // 2.243 - // MMIO mappings are marked as not present, but we set the SHADOW_MMIO bit 2.244 - // to cache the fact that this entry is in MMIO space. 2.245 + // Only allow the guest write access to a page a) on a demand fault, 2.246 + // or b) if the page is already marked as dirty. 2.247 // 2.248 - if ( (level == 1) && mmio ) 2.249 + if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) ) 2.250 { 2.251 - sflags &= ~(_PAGE_PRESENT); 2.252 - sflags |= _PAGE_SHADOW_MMIO; 2.253 + if ( ft & FETCH_TYPE_WRITE ) 2.254 + sh_mark_dirty(d, target_mfn); 2.255 + else if ( !sh_mfn_is_dirty(d, target_mfn) ) 2.256 + sflags &= ~_PAGE_RW; 2.257 } 2.258 - else 2.259 + 2.260 + // protect guest page tables 2.261 + // 2.262 + if ( unlikely((level == 1) && sh_mfn_is_a_page_table(target_mfn)) ) 2.263 { 2.264 - // shadow_mode_log_dirty support 2.265 - // 2.266 - // Only allow the guest write access to a page a) on a demand fault, 2.267 - // or b) if the page is already marked as dirty. 2.268 - // 2.269 - if ( unlikely((level == 1) && 2.270 - !(ft & FETCH_TYPE_WRITE) && 2.271 - shadow_mode_log_dirty(d) && 2.272 - !sh_mfn_is_dirty(d, target_mfn)) ) 2.273 + if ( shadow_mode_trap_reads(d) ) 2.274 { 2.275 - sflags &= ~_PAGE_RW; 2.276 + // if we are trapping both reads & writes, then mark this page 2.277 + // as not present... 2.278 + // 2.279 + sflags &= ~_PAGE_PRESENT; 2.280 } 2.281 - 2.282 - // protect guest page tables 2.283 - // 2.284 - if ( unlikely((level == 1) && 2.285 - sh_mfn_is_a_page_table(target_mfn)) ) 2.286 + else 2.287 { 2.288 - if ( shadow_mode_trap_reads(d) ) 2.289 - { 2.290 - // if we are trapping both reads & writes, then mark this page 2.291 - // as not present... 2.292 - // 2.293 - sflags &= ~_PAGE_PRESENT; 2.294 - } 2.295 - else 2.296 - { 2.297 - // otherwise, just prevent any writes... 2.298 - // 2.299 - sflags &= ~_PAGE_RW; 2.300 - } 2.301 + // otherwise, just prevent any writes... 2.302 + // 2.303 + sflags &= ~_PAGE_RW; 2.304 } 2.305 } 2.306 2.307 @@ -804,29 +774,28 @@ do { 2.308 sflags |= _PAGE_USER; 2.309 } 2.310 2.311 - return sflags; 2.312 -#undef CHECK 2.313 + *sp = shadow_l1e_from_mfn(target_mfn, sflags); 2.314 + done: 2.315 + SHADOW_DEBUG(PROPAGATE, 2.316 + "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n", 2.317 + fetch_type_names[ft], level, gp->l1, sp->l1); 2.318 } 2.319 2.320 + 2.321 +/* These four wrappers give us a little bit of type-safety back around the 2.322 + * use of void-* pointers in _sh_propagate(), and allow the compiler to 2.323 + * optimize out some level checks. */ 2.324 + 2.325 #if GUEST_PAGING_LEVELS >= 4 2.326 static void 2.327 l4e_propagate_from_guest(struct vcpu *v, 2.328 guest_l4e_t *gl4e, 2.329 mfn_t gl4mfn, 2.330 mfn_t sl3mfn, 2.331 - shadow_l4e_t *sl4p, 2.332 + shadow_l4e_t *sl4e, 2.333 fetch_type_t ft) 2.334 { 2.335 - u32 gflags = guest_l4e_get_flags(*gl4e); 2.336 - u32 sflags = sh_propagate_flags(v, sl3mfn, gflags, (guest_l1e_t *) gl4e, 2.337 - gl4mfn, 0, 4, ft); 2.338 - 2.339 - *sl4p = shadow_l4e_from_mfn(sl3mfn, sflags); 2.340 - 2.341 - SHADOW_DEBUG(PROPAGATE, 2.342 - "%s gl4e=%" SH_PRI_gpte " sl4e=%" SH_PRI_pte "\n", 2.343 - fetch_type_names[ft], gl4e->l4, sl4p->l4); 2.344 - ASSERT(sflags != -1); 2.345 + _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, 0); 2.346 } 2.347 2.348 static void 2.349 @@ -834,19 +803,10 @@ l3e_propagate_from_guest(struct vcpu *v, 2.350 guest_l3e_t *gl3e, 2.351 mfn_t gl3mfn, 2.352 mfn_t sl2mfn, 2.353 - shadow_l3e_t *sl3p, 2.354 + shadow_l3e_t *sl3e, 2.355 fetch_type_t ft) 2.356 { 2.357 - u32 gflags = guest_l3e_get_flags(*gl3e); 2.358 - u32 sflags = sh_propagate_flags(v, sl2mfn, gflags, (guest_l1e_t *) gl3e, 2.359 - gl3mfn, 0, 3, ft); 2.360 - 2.361 - *sl3p = shadow_l3e_from_mfn(sl2mfn, sflags); 2.362 - 2.363 - SHADOW_DEBUG(PROPAGATE, 2.364 - "%s gl3e=%" SH_PRI_gpte " sl3e=%" SH_PRI_pte "\n", 2.365 - fetch_type_names[ft], gl3e->l3, sl3p->l3); 2.366 - ASSERT(sflags != -1); 2.367 + _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, 0); 2.368 } 2.369 #endif // GUEST_PAGING_LEVELS >= 4 2.370 2.371 @@ -854,95 +814,23 @@ static void 2.372 l2e_propagate_from_guest(struct vcpu *v, 2.373 guest_l2e_t *gl2e, 2.374 mfn_t gl2mfn, 2.375 - mfn_t sl1mfn, 2.376 - shadow_l2e_t *sl2p, 2.377 + mfn_t sl1mfn, 2.378 + shadow_l2e_t *sl2e, 2.379 fetch_type_t ft) 2.380 { 2.381 - u32 gflags = guest_l2e_get_flags(*gl2e); 2.382 - u32 sflags = sh_propagate_flags(v, sl1mfn, gflags, (guest_l1e_t *) gl2e, 2.383 - gl2mfn, 0, 2, ft); 2.384 - 2.385 - *sl2p = shadow_l2e_from_mfn(sl1mfn, sflags); 2.386 - 2.387 - SHADOW_DEBUG(PROPAGATE, 2.388 - "%s gl2e=%" SH_PRI_gpte " sl2e=%" SH_PRI_pte "\n", 2.389 - fetch_type_names[ft], gl2e->l2, sl2p->l2); 2.390 - ASSERT(sflags != -1); 2.391 + _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, 0); 2.392 } 2.393 2.394 -static inline int 2.395 -l1e_read_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p, 2.396 - int mmio) 2.397 -/* returns 1 if emulation is required, and 0 otherwise */ 2.398 -{ 2.399 - struct domain *d = v->domain; 2.400 - u32 gflags = guest_l1e_get_flags(gw->eff_l1e); 2.401 - u32 sflags = sh_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn, 2.402 - mmio, 1, ft_demand_read); 2.403 - 2.404 - if ( shadow_mode_trap_reads(d) && !mmio && sh_mfn_is_a_page_table(gmfn) ) 2.405 - { 2.406 - // emulation required! 2.407 - *sl1p = shadow_l1e_empty(); 2.408 - return 1; 2.409 - } 2.410 - 2.411 - *sl1p = shadow_l1e_from_mfn(gmfn, sflags); 2.412 - 2.413 - SHADOW_DEBUG(PROPAGATE, 2.414 - "va=%p eff_gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n", 2.415 - (void *)gw->va, gw->eff_l1e.l1, sl1p->l1); 2.416 - 2.417 - ASSERT(sflags != -1); 2.418 - return 0; 2.419 -} 2.420 - 2.421 -static inline int 2.422 -l1e_write_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p, 2.423 - int mmio) 2.424 -/* returns 1 if emulation is required, and 0 otherwise */ 2.425 -{ 2.426 - struct domain *d = v->domain; 2.427 - u32 gflags = guest_l1e_get_flags(gw->eff_l1e); 2.428 - u32 sflags = sh_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn, 2.429 - mmio, 1, ft_demand_write); 2.430 - 2.431 - sh_mark_dirty(d, gmfn); 2.432 - 2.433 - if ( !mmio && sh_mfn_is_a_page_table(gmfn) ) 2.434 - { 2.435 - // emulation required! 2.436 - *sl1p = shadow_l1e_empty(); 2.437 - return 1; 2.438 - } 2.439 - 2.440 - *sl1p = shadow_l1e_from_mfn(gmfn, sflags); 2.441 - 2.442 - SHADOW_DEBUG(PROPAGATE, 2.443 - "va=%p eff_gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n", 2.444 - (void *)gw->va, gw->eff_l1e.l1, sl1p->l1); 2.445 - 2.446 - ASSERT(sflags != -1); 2.447 - return 0; 2.448 -} 2.449 - 2.450 -static inline void 2.451 -l1e_propagate_from_guest(struct vcpu *v, guest_l1e_t gl1e, shadow_l1e_t *sl1p, 2.452 +static void 2.453 +l1e_propagate_from_guest(struct vcpu *v, 2.454 + guest_l1e_t *gl1e, 2.455 + mfn_t gl1mfn, 2.456 + mfn_t gmfn, 2.457 + shadow_l1e_t *sl1e, 2.458 + fetch_type_t ft, 2.459 int mmio) 2.460 { 2.461 - gfn_t gfn = guest_l1e_get_gfn(gl1e); 2.462 - mfn_t gmfn = (mmio) ? _mfn(gfn_x(gfn)) : vcpu_gfn_to_mfn(v, gfn); 2.463 - u32 gflags = guest_l1e_get_flags(gl1e); 2.464 - u32 sflags = sh_propagate_flags(v, gmfn, gflags, 0, _mfn(INVALID_MFN), 2.465 - mmio, 1, ft_prefetch); 2.466 - 2.467 - *sl1p = shadow_l1e_from_mfn(gmfn, sflags); 2.468 - 2.469 - SHADOW_DEBUG(PROPAGATE, 2.470 - "gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n", 2.471 - gl1e.l1, sl1p->l1); 2.472 - 2.473 - ASSERT(sflags != -1); 2.474 + _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, mmio); 2.475 } 2.476 2.477 2.478 @@ -956,8 +844,6 @@ l1e_propagate_from_guest(struct vcpu *v, 2.479 * SHADOW_SET_FLUSH -- the caller must cause a TLB flush. 2.480 * SHADOW_SET_ERROR -- the input is not a valid entry (for example, if 2.481 * shadow_get_page_from_l1e() fails). 2.482 - * SHADOW_SET_L3PAE_RECOPY -- one or more vcpu's need to have their local 2.483 - * copies of their PAE L3 entries re-copied. 2.484 */ 2.485 2.486 static inline void safe_write_entry(void *dst, void *src) 2.487 @@ -1041,16 +927,13 @@ shadow_get_page_from_l1e(shadow_l1e_t sl 2.488 int res; 2.489 mfn_t mfn; 2.490 struct domain *owner; 2.491 - shadow_l1e_t sanitized_sl1e = 2.492 - shadow_l1e_remove_flags(sl1e, _PAGE_SHADOW_RW | _PAGE_SHADOW_PRESENT); 2.493 - 2.494 - //ASSERT(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT); 2.495 - //ASSERT((shadow_l1e_get_flags(sl1e) & L1_DISALLOW_MASK) == 0); 2.496 + 2.497 + ASSERT(!sh_l1e_is_magic(sl1e)); 2.498 2.499 if ( !shadow_mode_refcounts(d) ) 2.500 return 1; 2.501 2.502 - res = get_page_from_l1e(sanitized_sl1e, d); 2.503 + res = get_page_from_l1e(sl1e, d); 2.504 2.505 // If a privileged domain is attempting to install a map of a page it does 2.506 // not own, we let it succeed anyway. 2.507 @@ -1062,7 +945,7 @@ shadow_get_page_from_l1e(shadow_l1e_t sl 2.508 (owner = page_get_owner(mfn_to_page(mfn))) && 2.509 (d != owner) ) 2.510 { 2.511 - res = get_page_from_l1e(sanitized_sl1e, owner); 2.512 + res = get_page_from_l1e(sl1e, owner); 2.513 SHADOW_PRINTK("privileged domain %d installs map of mfn %05lx " 2.514 "which is owned by domain %d: %s\n", 2.515 d->domain_id, mfn_x(mfn), owner->domain_id, 2.516 @@ -1250,7 +1133,8 @@ static int shadow_set_l1e(struct vcpu *v 2.517 2.518 if ( old_sl1e.l1 == new_sl1e.l1 ) return 0; /* Nothing to do */ 2.519 2.520 - if ( shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT ) 2.521 + if ( (shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT) 2.522 + && !sh_l1e_is_magic(new_sl1e) ) 2.523 { 2.524 /* About to install a new reference */ 2.525 if ( shadow_mode_refcounts(d) ) { 2.526 @@ -1267,7 +1151,8 @@ static int shadow_set_l1e(struct vcpu *v 2.527 shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn); 2.528 flags |= SHADOW_SET_CHANGED; 2.529 2.530 - if ( shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT ) 2.531 + if ( (shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT) 2.532 + && !sh_l1e_is_magic(old_sl1e) ) 2.533 { 2.534 /* We lost a reference to an old mfn. */ 2.535 /* N.B. Unlike higher-level sets, never need an extra flush 2.536 @@ -2133,7 +2018,8 @@ void sh_destroy_l1_shadow(struct vcpu *v 2.537 /* Decrement refcounts of all the old entries */ 2.538 mfn_t sl1mfn = smfn; 2.539 SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, { 2.540 - if ( shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT ) 2.541 + if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT) 2.542 + && !sh_l1e_is_magic(*sl1e) ) 2.543 shadow_put_page_from_l1e(*sl1e, d); 2.544 }); 2.545 } 2.546 @@ -2399,16 +2285,17 @@ static int validate_gl1e(struct vcpu *v, 2.547 guest_l1e_t *new_gl1e = new_ge; 2.548 shadow_l1e_t *sl1p = se; 2.549 gfn_t gfn; 2.550 - mfn_t mfn; 2.551 - int result = 0; 2.552 + mfn_t gmfn; 2.553 + int result = 0, mmio; 2.554 2.555 perfc_incrc(shadow_validate_gl1e_calls); 2.556 2.557 gfn = guest_l1e_get_gfn(*new_gl1e); 2.558 - mfn = vcpu_gfn_to_mfn(v, gfn); 2.559 - 2.560 - l1e_propagate_from_guest(v, *new_gl1e, &new_sl1e, 2.561 - /* mmio? */ !valid_mfn(mfn)); 2.562 + gmfn = vcpu_gfn_to_mfn(v, gfn); 2.563 + 2.564 + mmio = (hvm_guest(v) && shadow_vcpu_mode_translate(v) && !valid_mfn(gmfn)); 2.565 + l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, 2.566 + ft_prefetch, mmio); 2.567 2.568 result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn); 2.569 return result; 2.570 @@ -2579,6 +2466,80 @@ static inline void reset_early_unshadow( 2.571 2.572 2.573 /**************************************************************************/ 2.574 +/* Optimization: Prefetch multiple L1 entries. This is called after we have 2.575 + * demand-faulted a shadow l1e in the fault handler, to see if it's 2.576 + * worth fetching some more. 2.577 + */ 2.578 + 2.579 +#if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH 2.580 + 2.581 +/* XXX magic number */ 2.582 +#define PREFETCH_DISTANCE 32 2.583 + 2.584 +static void sh_prefetch(struct vcpu *v, walk_t *gw, 2.585 + shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn) 2.586 +{ 2.587 + int i, dist, mmio; 2.588 + gfn_t gfn; 2.589 + mfn_t gmfn; 2.590 + guest_l1e_t gl1e; 2.591 + shadow_l1e_t sl1e; 2.592 + u32 gflags; 2.593 + 2.594 + /* Prefetch no further than the end of the _shadow_ l1 MFN */ 2.595 + dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e; 2.596 + /* And no more than a maximum fetches-per-fault */ 2.597 + if ( dist > PREFETCH_DISTANCE ) 2.598 + dist = PREFETCH_DISTANCE; 2.599 + 2.600 + for ( i = 1; i < dist ; i++ ) 2.601 + { 2.602 + /* No point in prefetching if there's already a shadow */ 2.603 + if ( ptr_sl1e[i].l1 != 0 ) 2.604 + break; 2.605 + 2.606 + if ( gw->l1e ) 2.607 + { 2.608 + /* Normal guest page; grab the next guest entry */ 2.609 + gl1e = gw->l1e[i]; 2.610 + /* Not worth continuing if we hit an entry that will need another 2.611 + * fault for A/D-bit propagation anyway */ 2.612 + gflags = guest_l1e_get_flags(gl1e); 2.613 + if ( (gflags & _PAGE_PRESENT) 2.614 + && (!(gflags & _PAGE_ACCESSED) 2.615 + || ((gflags & _PAGE_RW) && !(gflags & _PAGE_DIRTY))) ) 2.616 + break; 2.617 + } 2.618 + else 2.619 + { 2.620 + /* Fragmented superpage, unless we've been called wrongly */ 2.621 + ASSERT(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE); 2.622 + /* Increment the l1e's GFN by the right number of guest pages */ 2.623 + gl1e = guest_l1e_from_gfn( 2.624 + _gfn(gfn_x(guest_l1e_get_gfn(gw->eff_l1e)) + i), 2.625 + guest_l1e_get_flags(gw->eff_l1e)); 2.626 + } 2.627 + 2.628 + /* Look at the gfn that the l1e is pointing at */ 2.629 + gfn = guest_l1e_get_gfn(gl1e); 2.630 + gmfn = vcpu_gfn_to_mfn(v, gfn); 2.631 + mmio = ( hvm_guest(v) 2.632 + && shadow_vcpu_mode_translate(v) 2.633 + && mmio_space(gfn_to_paddr(gfn)) ); 2.634 + 2.635 + /* Propagate the entry. Safe to use a pointer to our local 2.636 + * gl1e, since this is not a demand-fetch so there will be no 2.637 + * write-back to the guest. */ 2.638 + l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN), 2.639 + gmfn, &sl1e, ft_prefetch, mmio); 2.640 + (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn); 2.641 + } 2.642 +} 2.643 + 2.644 +#endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */ 2.645 + 2.646 + 2.647 +/**************************************************************************/ 2.648 /* Entry points into the shadow code */ 2.649 2.650 /* Called from pagefault handler in Xen, and from the HVM trap handlers 2.651 @@ -2602,16 +2563,70 @@ static int sh_page_fault(struct vcpu *v, 2.652 int r, mmio; 2.653 fetch_type_t ft = 0; 2.654 2.655 + SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n", 2.656 + v->domain->domain_id, v->vcpu_id, va, regs->error_code); 2.657 + 2.658 // 2.659 // XXX: Need to think about eventually mapping superpages directly in the 2.660 // shadow (when possible), as opposed to splintering them into a 2.661 // bunch of 4K maps. 2.662 // 2.663 2.664 +#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) && SHADOW_PAGING_LEVELS > 2 2.665 + if ( (regs->error_code & PFEC_reserved_bit) ) 2.666 + { 2.667 + /* The only reasons for reserved bits to be set in shadow entries 2.668 + * are the two "magic" shadow_l1e entries. */ 2.669 + if ( likely((__copy_from_user(&sl1e, 2.670 + (sh_linear_l1_table(v) 2.671 + + shadow_l1_linear_offset(va)), 2.672 + sizeof(sl1e)) == 0) 2.673 + && sh_l1e_is_magic(sl1e)) ) 2.674 + { 2.675 + if ( sh_l1e_is_gnp(sl1e) ) 2.676 + { 2.677 + if ( likely(!hvm_guest(v) || shadow_vcpu_mode_translate(v)) ) 2.678 + { 2.679 + /* Not-present in a guest PT: pass to the guest as 2.680 + * a not-present fault (by flipping two bits). */ 2.681 + ASSERT(regs->error_code & PFEC_page_present); 2.682 + regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present); 2.683 + perfc_incrc(shadow_fault_fast_gnp); 2.684 + SHADOW_PRINTK("fast path not-present\n"); 2.685 + return 0; 2.686 + } 2.687 + else 2.688 + { 2.689 + /* Not-present in the P2M: MMIO */ 2.690 + gpa = va; 2.691 + } 2.692 + } 2.693 + else 2.694 + { 2.695 + /* Magic MMIO marker: extract gfn for MMIO address */ 2.696 + ASSERT(sh_l1e_is_mmio(sl1e)); 2.697 + gpa = (((paddr_t)(gfn_x(sh_l1e_mmio_get_gfn(sl1e)))) 2.698 + << PAGE_SHIFT) 2.699 + | (va & ~PAGE_MASK); 2.700 + } 2.701 + perfc_incrc(shadow_fault_fast_mmio); 2.702 + SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa); 2.703 + reset_early_unshadow(v); 2.704 + handle_mmio(gpa); 2.705 + return EXCRET_fault_fixed; 2.706 + } 2.707 + else 2.708 + { 2.709 + /* This should be exceptionally rare: another vcpu has fixed 2.710 + * the tables between the fault and our reading the l1e. 2.711 + * Fall through to the normal fault handing logic */ 2.712 + perfc_incrc(shadow_fault_fast_fail); 2.713 + SHADOW_PRINTK("fast path false alarm!\n"); 2.714 + } 2.715 + } 2.716 +#endif /* SHOPT_FAST_FAULT_PATH */ 2.717 + 2.718 shadow_lock(d); 2.719 - 2.720 - SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n", 2.721 - v->domain->domain_id, v->vcpu_id, va, regs->error_code); 2.722 2.723 shadow_audit_tables(v); 2.724 2.725 @@ -2659,8 +2674,9 @@ static int sh_page_fault(struct vcpu *v, 2.726 } 2.727 2.728 // Was it a write fault? 2.729 - // 2.730 - if ( regs->error_code & PFEC_write_access ) 2.731 + ft = ((regs->error_code & PFEC_write_access) 2.732 + ? ft_demand_write : ft_demand_read); 2.733 + if ( ft == ft_demand_write ) 2.734 { 2.735 if ( unlikely(!(accumulated_gflags & _PAGE_RW)) ) 2.736 { 2.737 @@ -2685,26 +2701,19 @@ static int sh_page_fault(struct vcpu *v, 2.738 } 2.739 } 2.740 2.741 - /* Is this an MMIO access? */ 2.742 + /* What mfn is the guest trying to access? */ 2.743 gfn = guest_l1e_get_gfn(gw.eff_l1e); 2.744 + gmfn = vcpu_gfn_to_mfn(v, gfn); 2.745 mmio = ( hvm_guest(v) 2.746 && shadow_vcpu_mode_translate(v) 2.747 && mmio_space(gfn_to_paddr(gfn)) ); 2.748 2.749 - /* For MMIO, the shadow holds the *gfn*; for normal accesses, it holds 2.750 - * the equivalent mfn. */ 2.751 - if ( mmio ) 2.752 - gmfn = _mfn(gfn_x(gfn)); 2.753 - else 2.754 + if ( !mmio && !valid_mfn(gmfn) ) 2.755 { 2.756 - gmfn = vcpu_gfn_to_mfn(v, gfn); 2.757 - if ( !valid_mfn(gmfn) ) 2.758 - { 2.759 - perfc_incrc(shadow_fault_bail_bad_gfn); 2.760 - SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"SH_PRI_mfn"\n", 2.761 - gfn_x(gfn), mfn_x(gmfn)); 2.762 - goto not_a_shadow_fault; 2.763 - } 2.764 + perfc_incrc(shadow_fault_bail_bad_gfn); 2.765 + SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"SH_PRI_mfn"\n", 2.766 + gfn_x(gfn), mfn_x(gmfn)); 2.767 + goto not_a_shadow_fault; 2.768 } 2.769 2.770 /* Make sure there is enough free shadow memory to build a chain of 2.771 @@ -2717,45 +2726,40 @@ static int sh_page_fault(struct vcpu *v, 2.772 * for the shadow entry, since we might promote a page here. */ 2.773 // XXX -- this code will need to change somewhat if/when the shadow code 2.774 // can directly map superpages... 2.775 - ft = ((regs->error_code & PFEC_write_access) ? 2.776 - ft_demand_write : ft_demand_read); 2.777 ptr_sl1e = shadow_get_and_create_l1e(v, &gw, &sl1mfn, ft); 2.778 ASSERT(ptr_sl1e); 2.779 2.780 - /* Calculate the shadow entry */ 2.781 - if ( ft == ft_demand_write ) 2.782 + /* Calculate the shadow entry and write it */ 2.783 + l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, 2.784 + gmfn, &sl1e, ft, mmio); 2.785 + r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn); 2.786 + 2.787 +#if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH 2.788 + /* Prefetch some more shadow entries */ 2.789 + sh_prefetch(v, &gw, ptr_sl1e, sl1mfn); 2.790 +#endif 2.791 + 2.792 + /* Need to emulate accesses to page tables */ 2.793 + if ( sh_mfn_is_a_page_table(gmfn) ) 2.794 { 2.795 - if ( l1e_write_fault(v, &gw, gmfn, &sl1e, mmio) ) 2.796 + if ( ft == ft_demand_write ) 2.797 { 2.798 perfc_incrc(shadow_fault_emulate_write); 2.799 goto emulate; 2.800 } 2.801 - } 2.802 - else if ( l1e_read_fault(v, &gw, gmfn, &sl1e, mmio) ) 2.803 - { 2.804 - perfc_incrc(shadow_fault_emulate_read); 2.805 - goto emulate; 2.806 + else if ( shadow_mode_trap_reads(d) && ft == ft_demand_read ) 2.807 + { 2.808 + perfc_incrc(shadow_fault_emulate_read); 2.809 + goto emulate; 2.810 + } 2.811 } 2.812 2.813 - /* Quick sanity check: we never make an MMIO entry that's got the 2.814 - * _PAGE_PRESENT flag set in it. */ 2.815 - ASSERT(!mmio || !(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT)); 2.816 - 2.817 - r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn); 2.818 - 2.819 if ( mmio ) 2.820 { 2.821 gpa = guest_walk_to_gpa(&gw); 2.822 goto mmio; 2.823 } 2.824 2.825 -#if 0 2.826 - if ( !(r & SHADOW_SET_CHANGED) ) 2.827 - debugtrace_printk("%s: shadow_set_l1e(va=%p, sl1e=%" SH_PRI_pte 2.828 - ") did not change anything\n", 2.829 - __func__, gw.va, l1e_get_intpte(sl1e)); 2.830 -#endif 2.831 - 2.832 perfc_incrc(shadow_fault_fixed); 2.833 d->arch.shadow.fault_count++; 2.834 reset_early_unshadow(v); 2.835 @@ -2769,7 +2773,6 @@ static int sh_page_fault(struct vcpu *v, 2.836 return EXCRET_fault_fixed; 2.837 2.838 emulate: 2.839 - 2.840 /* Take the register set we were called with */ 2.841 emul_regs = *regs; 2.842 if ( hvm_guest(v) ) 2.843 @@ -3932,25 +3935,48 @@ int sh_audit_l1_table(struct vcpu *v, mf 2.844 gfn_t gfn; 2.845 char *s; 2.846 int done = 0; 2.847 - 2.848 + 2.849 /* Follow the backpointer */ 2.850 gl1mfn = _mfn(mfn_to_page(sl1mfn)->u.inuse.type_info); 2.851 gl1e = gp = sh_map_domain_page(gl1mfn); 2.852 SHADOW_FOREACH_L1E(sl1mfn, sl1e, &gl1e, done, { 2.853 2.854 - s = sh_audit_flags(v, 1, guest_l1e_get_flags(*gl1e), 2.855 - shadow_l1e_get_flags(*sl1e)); 2.856 - if ( s ) AUDIT_FAIL(1, "%s", s); 2.857 - 2.858 - if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS ) 2.859 + if ( sh_l1e_is_magic(*sl1e) ) 2.860 { 2.861 - gfn = guest_l1e_get_gfn(*gl1e); 2.862 - mfn = shadow_l1e_get_mfn(*sl1e); 2.863 - gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn); 2.864 - if ( mfn_x(gmfn) != mfn_x(mfn) ) 2.865 - AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn 2.866 - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n", 2.867 - gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); 2.868 +#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) && SHADOW_PAGING_LEVELS > 2 2.869 + if ( sh_l1e_is_gnp(*sl1e) ) 2.870 + { 2.871 + if ( guest_l1e_get_flags(*gl1e) & _PAGE_PRESENT ) 2.872 + AUDIT_FAIL(1, "shadow is GNP magic but guest is present"); 2.873 + } 2.874 + else 2.875 + { 2.876 + ASSERT(sh_l1e_is_mmio(*sl1e)); 2.877 + gfn = sh_l1e_mmio_get_gfn(*sl1e); 2.878 + if ( gfn_x(gfn) != gfn_x(guest_l1e_get_gfn(*gl1e)) ) 2.879 + AUDIT_FAIL(1, "shadow MMIO gfn is %" SH_PRI_gfn 2.880 + " but guest gfn is %" SH_PRI_gfn, 2.881 + gfn_x(gfn), 2.882 + gfn_x(guest_l1e_get_gfn(*gl1e))); 2.883 + } 2.884 +#endif 2.885 + } 2.886 + else 2.887 + { 2.888 + s = sh_audit_flags(v, 1, guest_l1e_get_flags(*gl1e), 2.889 + shadow_l1e_get_flags(*sl1e)); 2.890 + if ( s ) AUDIT_FAIL(1, "%s", s); 2.891 + 2.892 + if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS ) 2.893 + { 2.894 + gfn = guest_l1e_get_gfn(*gl1e); 2.895 + mfn = shadow_l1e_get_mfn(*sl1e); 2.896 + gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn); 2.897 + if ( mfn_x(gmfn) != mfn_x(mfn) ) 2.898 + AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn 2.899 + " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, 2.900 + gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); 2.901 + } 2.902 } 2.903 }); 2.904 sh_unmap_domain_page(gp); 2.905 @@ -3973,7 +3999,8 @@ int sh_audit_fl1_table(struct vcpu *v, m 2.906 if ( !(f == 0 2.907 || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW| 2.908 _PAGE_ACCESSED|_PAGE_DIRTY) 2.909 - || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)) ) 2.910 + || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY) 2.911 + || sh_l1e_is_magic(*sl1e)) ) 2.912 AUDIT_FAIL(1, "fl1e has bad flags"); 2.913 }); 2.914 return 0; 2.915 @@ -4011,7 +4038,7 @@ int sh_audit_l2_table(struct vcpu *v, mf 2.916 if ( mfn_x(gmfn) != mfn_x(mfn) ) 2.917 AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn 2.918 " (--> %" SH_PRI_mfn ")" 2.919 - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n", 2.920 + " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, 2.921 gfn_x(gfn), 2.922 (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0 2.923 : mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)), 2.924 @@ -4053,7 +4080,7 @@ int sh_audit_l3_table(struct vcpu *v, mf 2.925 : PGC_SH_l2_shadow); 2.926 if ( mfn_x(gmfn) != mfn_x(mfn) ) 2.927 AUDIT_FAIL(3, "bad translation: gfn %" SH_PRI_gfn 2.928 - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n", 2.929 + " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, 2.930 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); 2.931 } 2.932 }); 2.933 @@ -4088,7 +4115,7 @@ int sh_audit_l4_table(struct vcpu *v, mf 2.934 PGC_SH_l3_shadow); 2.935 if ( mfn_x(gmfn) != mfn_x(mfn) ) 2.936 AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn 2.937 - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n", 2.938 + " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, 2.939 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); 2.940 } 2.941 });
3.1 --- a/xen/arch/x86/mm/shadow/private.h Wed Nov 01 10:40:46 2006 +0000 3.2 +++ b/xen/arch/x86/mm/shadow/private.h Wed Nov 01 10:41:44 2006 +0000 3.3 @@ -33,111 +33,6 @@ 3.4 3.5 3.6 /****************************************************************************** 3.7 - * Definitions for the use of the "available" bits in the shadow PTEs. 3.8 - * 3.9 - * Review of the low 12 bits of a shadow page table entry: 3.10 - * 3.11 - * in a guest: in a shadow: 3.12 - * Bit 11: _PAGE_AVAIL2, aka _PAGE_GNTTAB 3.13 - * Bit 10: _PAGE_AVAIL1 _PAGE_SHADOW_RW ("SW" below) 3.14 - * Bit 9: _PAGE_AVAIL0 _PAGE_SHADOW_PRESENT ("SP" below) 3.15 - * Bit 8: _PAGE_GLOBAL _PAGE_SHADOW_MMIO ("MMIO" below), 3.16 - * aka _PAGE_SHADOW_GUEST_NOT_PRESENT 3.17 - * Bit 7: _PAGE_PSE, aka _PAGE_PAT 3.18 - * Bit 6: _PAGE_DIRTY 3.19 - * Bit 5: _PAGE_ACCESSED 3.20 - * Bit 4: _PAGE_PCD 3.21 - * Bit 3: _PAGE_PWT 3.22 - * Bit 2: _PAGE_USER 3.23 - * Bit 1: _PAGE_RW ("GW" below) 3.24 - * Bit 0: _PAGE_PRESENT ("GP" below) 3.25 - * 3.26 - * Given a guest entry, as shown below, we can expect the following in the 3.27 - * corresponding shadow entry: 3.28 - * 3.29 - * Guest entry Shadow entry Commentary 3.30 - * ----------- ---------------- --------------------------------------------- 3.31 - * Maps 3.32 - * GP GW IO GP SP GW SW MMIO 3.33 - * -- -- ---- -- -- -- -- ---- 3.34 - * - - - 0 0 0 0 0 The guest entry has not yet been shadowed. 3.35 - * 0 - - 0 0 0 0 1 The guest entry is marked not-present. 3.36 - * 1 1 no ? 1 ? 1 0 Writable entry in the guest. 3.37 - * 1 0 no ? 1 0 0 0 Read-only entry in the guest. 3.38 - * 1 1 yes 0 1 ? 1 1 Writable MMIO mapping in the guest. 3.39 - * 1 0 yes 0 1 0 0 1 Read-only MMIO mapping in the guest. 3.40 - * 3.41 - * Normally, we would expect that GP=1 in the guest to imply GP=1 in the 3.42 - * shadow, and similarly for GW=1. However, various functionality that may be 3.43 - * implemented via the shadow can cause GP or GW to be cleared in such cases. 3.44 - * A & D bit emulation is a prime example of such functionality. 3.45 - * 3.46 - * If _PAGE_SHADOW_PRESENT is zero, then the _PAGE_PRESENT bit in that same 3.47 - * entry will always be zero, too. 3.48 - 3.49 - * Bit 11 is used in debug builds as the _PAGE_GNTTAB bit in PV guests. It is 3.50 - * currently available for random (ab)use in shadow entries. 3.51 - * 3.52 - * Bit 8 (the global bit) could be propagated from an HVM guest to the shadow, 3.53 - * but currently there is no benefit, as the guest's TLB is flushed on every 3.54 - * transition of CR3 anyway due to the HVM exit/re-entry. 3.55 - * 3.56 - * In shadow entries in which the _PAGE_SHADOW_PRESENT is set, bit 8 is used 3.57 - * as the _PAGE_SHADOW_MMIO bit. In such entries, if _PAGE_SHADOW_MMIO is 3.58 - * set, then the entry contains the *gfn* directly from the corresponding 3.59 - * guest entry (not an mfn!!). 3.60 - * 3.61 - * Bit 7 is set in a guest L2 to signify a superpage entry. The current 3.62 - * shadow code splinters superpage mappings into 512 or 1024 4K mappings; the 3.63 - * resulting shadow L1 table is called an FL1. Note that there is no guest 3.64 - * page that corresponds to an FL1. 3.65 - * 3.66 - * Bit 7 in a guest L1 is the PAT2 bit. Currently we do not support PAT in 3.67 - * this shadow code. 3.68 - * 3.69 - * Bit 6 is the dirty bit. 3.70 - * 3.71 - * Bit 5 is the accessed bit. 3.72 - * 3.73 - * Bit 4 is the cache disable bit. If set in a guest, the hardware is 3.74 - * supposed to refuse to cache anything found via this entry. It can be set 3.75 - * in an L4e, L3e, L2e, or L1e. This shadow code currently does not support 3.76 - * cache disable bits. They are silently ignored. 3.77 - * 3.78 - * Bit 4 is a guest L1 is also the PAT1 bit. Currently we do not support PAT 3.79 - * in this shadow code. 3.80 - * 3.81 - * Bit 3 is the cache write-thru bit. If set in a guest, the hardware is 3.82 - * supposed to use write-thru instead of write-back caching for anything found 3.83 - * via this entry. It can be set in an L4e, L3e, L2e, or L1e. This shadow 3.84 - * code currently does not support cache write-thru bits. They are silently 3.85 - * ignored. 3.86 - * 3.87 - * Bit 3 is a guest L1 is also the PAT0 bit. Currently we do not support PAT 3.88 - * in this shadow code. 3.89 - * 3.90 - * Bit 2 is the user bit. 3.91 - * 3.92 - * Bit 1 is the read-write bit. 3.93 - * 3.94 - * Bit 0 is the present bit. 3.95 - */ 3.96 - 3.97 -// Copy of the _PAGE_RW bit from the guest's PTE, appropriately zero'ed by 3.98 -// the appropriate shadow rules. 3.99 -#define _PAGE_SHADOW_RW _PAGE_AVAIL1 3.100 - 3.101 -// Copy of the _PAGE_PRESENT bit from the guest's PTE 3.102 -#define _PAGE_SHADOW_PRESENT _PAGE_AVAIL0 3.103 - 3.104 -// The matching guest entry maps MMIO space 3.105 -#define _PAGE_SHADOW_MMIO _PAGE_GLOBAL 3.106 - 3.107 -// Shadow flags value used when the guest is not present 3.108 -#define _PAGE_SHADOW_GUEST_NOT_PRESENT _PAGE_GLOBAL 3.109 - 3.110 - 3.111 -/****************************************************************************** 3.112 * Debug and error-message output 3.113 */ 3.114 #define SHADOW_PRINTK(_f, _a...) \ 3.115 @@ -151,13 +46,13 @@ 3.116 } while (0) 3.117 3.118 // The flags for use with SHADOW_DEBUG: 3.119 -#define SHADOW_DEBUG_PROPAGATE 0 3.120 -#define SHADOW_DEBUG_MAKE_SHADOW 0 3.121 -#define SHADOW_DEBUG_DESTROY_SHADOW 0 3.122 +#define SHADOW_DEBUG_PROPAGATE 1 3.123 +#define SHADOW_DEBUG_MAKE_SHADOW 1 3.124 +#define SHADOW_DEBUG_DESTROY_SHADOW 1 3.125 #define SHADOW_DEBUG_P2M 0 3.126 -#define SHADOW_DEBUG_A_AND_D 0 3.127 -#define SHADOW_DEBUG_EMULATE 0 3.128 -#define SHADOW_DEBUG_LOGDIRTY 1 3.129 +#define SHADOW_DEBUG_A_AND_D 1 3.130 +#define SHADOW_DEBUG_EMULATE 1 3.131 +#define SHADOW_DEBUG_LOGDIRTY 0 3.132 3.133 3.134 /******************************************************************************
4.1 --- a/xen/arch/x86/mm/shadow/types.h Wed Nov 01 10:40:46 2006 +0000 4.2 +++ b/xen/arch/x86/mm/shadow/types.h Wed Nov 01 10:41:44 2006 +0000 4.3 @@ -591,6 +591,77 @@ accumulate_guest_flags(struct vcpu *v, w 4.4 return accumulated_flags; 4.5 } 4.6 4.7 + 4.8 +#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) && SHADOW_PAGING_LEVELS > 2 4.9 +/****************************************************************************** 4.10 + * We implement a "fast path" for two special cases: faults that require 4.11 + * MMIO emulation, and faults where the guest PTE is not present. We 4.12 + * record these as shadow l1 entries that have reserved bits set in 4.13 + * them, so we can spot them immediately in the fault handler and handle 4.14 + * them without needing to hold the shadow lock or walk the guest 4.15 + * pagetables. 4.16 + * 4.17 + * This is only feasible for PAE and 64bit Xen: 32-bit non-PAE PTEs don't 4.18 + * have reserved bits that we can use for this. 4.19 + */ 4.20 + 4.21 +#define SH_L1E_MAGIC 0xffffffff00000000ULL 4.22 +static inline int sh_l1e_is_magic(shadow_l1e_t sl1e) 4.23 +{ 4.24 + return ((sl1e.l1 & SH_L1E_MAGIC) == SH_L1E_MAGIC); 4.25 +} 4.26 + 4.27 +/* Guest not present: a single magic value */ 4.28 +static inline shadow_l1e_t sh_l1e_gnp(void) 4.29 +{ 4.30 + return (shadow_l1e_t){ -1ULL }; 4.31 +} 4.32 + 4.33 +static inline int sh_l1e_is_gnp(shadow_l1e_t sl1e) 4.34 +{ 4.35 + return (sl1e.l1 == sh_l1e_gnp().l1); 4.36 +} 4.37 + 4.38 +/* MMIO: an invalid PTE that contains the GFN of the equivalent guest l1e. 4.39 + * We store 28 bits of GFN in bits 4:32 of the entry. 4.40 + * The present bit is set, and the U/S and R/W bits are taken from the guest. 4.41 + * Bit 3 is always 0, to differentiate from gnp above. */ 4.42 +#define SH_L1E_MMIO_MAGIC 0xffffffff00000001ULL 4.43 +#define SH_L1E_MMIO_MAGIC_MASK 0xffffffff00000009ULL 4.44 +#define SH_L1E_MMIO_GFN_MASK 0x00000000fffffff0ULL 4.45 +#define SH_L1E_MMIO_GFN_SHIFT 4 4.46 + 4.47 +static inline shadow_l1e_t sh_l1e_mmio(gfn_t gfn, u32 gflags) 4.48 +{ 4.49 + return (shadow_l1e_t) { (SH_L1E_MMIO_MAGIC 4.50 + | (gfn_x(gfn) << SH_L1E_MMIO_GFN_SHIFT) 4.51 + | (gflags & (_PAGE_USER|_PAGE_RW))) }; 4.52 +} 4.53 + 4.54 +static inline int sh_l1e_is_mmio(shadow_l1e_t sl1e) 4.55 +{ 4.56 + return ((sl1e.l1 & SH_L1E_MMIO_MAGIC_MASK) == SH_L1E_MMIO_MAGIC); 4.57 +} 4.58 + 4.59 +static inline gfn_t sh_l1e_mmio_get_gfn(shadow_l1e_t sl1e) 4.60 +{ 4.61 + return _gfn((sl1e.l1 & SH_L1E_MMIO_GFN_MASK) >> SH_L1E_MMIO_GFN_SHIFT); 4.62 +} 4.63 + 4.64 +static inline u32 sh_l1e_mmio_get_flags(shadow_l1e_t sl1e) 4.65 +{ 4.66 + return (u32)((sl1e.l1 & (_PAGE_USER|_PAGE_RW))); 4.67 +} 4.68 + 4.69 +#else 4.70 + 4.71 +#define sh_l1e_gnp() shadow_l1e_empty() 4.72 +#define sh_l1e_mmio(_gfn, _flags) shadow_l1e_empty() 4.73 +#define sh_l1e_is_magic(_e) (0) 4.74 + 4.75 +#endif /* SHOPT_FAST_FAULT_PATH */ 4.76 + 4.77 + 4.78 #endif /* _XEN_SHADOW_TYPES_H */ 4.79 4.80 /*
5.1 --- a/xen/arch/x86/x86_32/seg_fixup.c Wed Nov 01 10:40:46 2006 +0000 5.2 +++ b/xen/arch/x86/x86_32/seg_fixup.c Wed Nov 01 10:41:44 2006 +0000 5.3 @@ -296,8 +296,8 @@ int gpf_emulate_4gb(struct cpu_user_regs 5.4 if ( get_user(b, pb) ) 5.5 { 5.6 dprintk(XENLOG_DEBUG, 5.7 - "Fault while accessing byte %d of instruction\n", 5.8 - pb-eip); 5.9 + "Fault while accessing byte %ld of instruction\n", 5.10 + (long)(pb-eip)); 5.11 goto page_fault; 5.12 } 5.13
6.1 --- a/xen/include/asm-x86/perfc_defn.h Wed Nov 01 10:40:46 2006 +0000 6.2 +++ b/xen/include/asm-x86/perfc_defn.h Wed Nov 01 10:41:44 2006 +0000 6.3 @@ -43,6 +43,9 @@ PERFCOUNTER_CPU(shadow_linear_map_failed 6.4 PERFCOUNTER_CPU(shadow_a_update, "shadow A bit update") 6.5 PERFCOUNTER_CPU(shadow_ad_update, "shadow A&D bit update") 6.6 PERFCOUNTER_CPU(shadow_fault, "calls to shadow_fault") 6.7 +PERFCOUNTER_CPU(shadow_fault_fast_gnp, "shadow_fault fast path n/p") 6.8 +PERFCOUNTER_CPU(shadow_fault_fast_mmio, "shadow_fault fast path mmio") 6.9 +PERFCOUNTER_CPU(shadow_fault_fast_fail, "shadow_fault fast path error") 6.10 PERFCOUNTER_CPU(shadow_fault_bail_bad_gfn, "shadow_fault guest bad gfn") 6.11 PERFCOUNTER_CPU(shadow_fault_bail_not_present, 6.12 "shadow_fault guest not-present")
7.1 --- a/xen/include/asm-x86/shadow.h Wed Nov 01 10:40:46 2006 +0000 7.2 +++ b/xen/include/asm-x86/shadow.h Wed Nov 01 10:41:44 2006 +0000 7.3 @@ -161,8 +161,10 @@ extern int shadow_audit_enable; 7.4 */ 7.5 #define SHOPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */ 7.6 #define SHOPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */ 7.7 +#define SHOPT_FAST_FAULT_PATH 0x04 /* Fast-path MMIO and not-present */ 7.8 +#define SHOPT_PREFETCH 0x08 /* Shadow multiple entries per fault */ 7.9 7.10 -#define SHADOW_OPTIMIZATIONS 0x03 7.11 +#define SHADOW_OPTIMIZATIONS 0x0f 7.12 7.13 7.14 /* With shadow pagetables, the different kinds of address start