direct-io.hg
changeset 4160:7a281bd1e0f5
bitkeeper revision 1.1236.34.3 (4237063cE2rat5RdEGCsTzuaC6XCcA)
Tidy the x86 emulator interface, and use it from within the
writable pagetable algorithm to deal with otherwise unhandleable cases.
For example: L1 mapped at multiple L2 slots; L1 that maps itself; L1
that also maps the code making the update, or the kernel stack.
This provides a proof-of-concept for the emulator that can be picked
up for the VMX code to improve the device-model emulation.
Signed-off-by: Keir Fraser <keir@xensource.com>
Tidy the x86 emulator interface, and use it from within the
writable pagetable algorithm to deal with otherwise unhandleable cases.
For example: L1 mapped at multiple L2 slots; L1 that maps itself; L1
that also maps the code making the update, or the kernel stack.
This provides a proof-of-concept for the emulator that can be picked
up for the VMX code to improve the device-model emulation.
Signed-off-by: Keir Fraser <keir@xensource.com>
author | kaf24@firebug.cl.cam.ac.uk |
---|---|
date | Tue Mar 15 15:58:52 2005 +0000 (2005-03-15) |
parents | 5c5ca35a900a |
children | fc1e5218f616 |
files | tools/tests/test_x86_emulator.c xen/arch/x86/mm.c xen/arch/x86/x86_emulate.c xen/include/asm-x86/mm.h xen/include/asm-x86/x86_emulate.h xen/include/xen/perfc_defn.h |
line diff
1.1 --- a/tools/tests/test_x86_emulator.c Tue Mar 15 14:31:42 2005 +0000 1.2 +++ b/tools/tests/test_x86_emulator.c Tue Mar 15 15:58:52 2005 +0000 1.3 @@ -26,7 +26,7 @@ static int read_any( 1.4 case 4: *val = *(u32 *)addr; break; 1.5 case 8: *val = *(unsigned long *)addr; break; 1.6 } 1.7 - return 0; 1.8 + return X86EMUL_CONTINUE; 1.9 } 1.10 1.11 static int write_any( 1.12 @@ -41,17 +41,15 @@ static int write_any( 1.13 case 4: *(u32 *)addr = (u32)val; break; 1.14 case 8: *(unsigned long *)addr = val; break; 1.15 } 1.16 - return 0; 1.17 + return X86EMUL_CONTINUE; 1.18 } 1.19 1.20 static int cmpxchg_any( 1.21 unsigned long addr, 1.22 unsigned long old, 1.23 unsigned long new, 1.24 - unsigned long *seen, 1.25 unsigned int bytes) 1.26 { 1.27 - *seen = old; 1.28 switch ( bytes ) 1.29 { 1.30 case 1: *(u8 *)addr = (u8)new; break; 1.31 @@ -59,7 +57,7 @@ static int cmpxchg_any( 1.32 case 4: *(u32 *)addr = (u32)new; break; 1.33 case 8: *(unsigned long *)addr = new; break; 1.34 } 1.35 - return 0; 1.36 + return X86EMUL_CONTINUE; 1.37 } 1.38 1.39 static struct x86_mem_emulator emulops = {
2.1 --- a/xen/arch/x86/mm.c Tue Mar 15 14:31:42 2005 +0000 2.2 +++ b/xen/arch/x86/mm.c Tue Mar 15 15:58:52 2005 +0000 2.3 @@ -101,6 +101,7 @@ 2.4 #include <asm/uaccess.h> 2.5 #include <asm/domain_page.h> 2.6 #include <asm/ldt.h> 2.7 +#include <asm/x86_emulate.h> 2.8 2.9 #ifdef VERBOSE 2.10 #define MEM_LOG(_f, _a...) \ 2.11 @@ -265,8 +266,7 @@ int map_ldt_shadow_page(unsigned int off 2.12 #define TOGGLE_MODE() ((void)0) 2.13 #endif 2.14 2.15 - if ( unlikely(in_irq()) ) 2.16 - BUG(); 2.17 + BUG_ON(unlikely(in_irq())); 2.18 2.19 TOGGLE_MODE(); 2.20 __get_user(l1e, (unsigned long *) 2.21 @@ -1939,12 +1939,13 @@ void update_shadow_va_mapping(unsigned l 2.22 &shadow_linear_pg_table[l1_linear_offset(va)])))) ) 2.23 { 2.24 /* 2.25 - * Since L2's are guranteed RW, failure indicates either that the 2.26 + * Since L2's are guaranteed RW, failure indicates either that the 2.27 * page was not shadowed, or that the L2 entry has not yet been 2.28 * updated to reflect the shadow. 2.29 */ 2.30 - if ( shadow_mode_external(current->domain) ) 2.31 - BUG(); // can't use linear_l2_table with external tables. 2.32 + 2.33 + /* Can't use linear_l2_table with external tables. */ 2.34 + BUG_ON(shadow_mode_external(current->domain)); 2.35 2.36 l2_pgentry_t gpde = linear_l2_table[l2_table_offset(va)]; 2.37 unsigned long gpfn = l2_pgentry_val(gpde) >> PAGE_SHIFT; 2.38 @@ -2294,9 +2295,7 @@ void ptwr_flush(const int which) 2.39 int i, cpu = smp_processor_id(); 2.40 struct exec_domain *ed = current; 2.41 struct domain *d = ed->domain; 2.42 -#ifdef PERF_COUNTERS 2.43 unsigned int modified = 0; 2.44 -#endif 2.45 2.46 l1va = ptwr_info[cpu].ptinfo[which].l1va; 2.47 ptep = (unsigned long *)&linear_pg_table[l1_linear_offset(l1va)]; 2.48 @@ -2344,11 +2343,7 @@ void ptwr_flush(const int which) 2.49 2.50 /* Ensure that there are no stale writable mappings in any TLB. */ 2.51 /* NB. INVLPG is a serialising instruction: flushes pending updates. */ 2.52 -#if 1 2.53 __flush_tlb_one(l1va); /* XXX Multi-CPU guests? */ 2.54 -#else 2.55 - flush_tlb_all(); 2.56 -#endif 2.57 PTWR_PRINTK("[%c] disconnected_l1va at %p now %p\n", 2.58 PTWR_PRINT_WHICH, ptep, pte); 2.59 2.60 @@ -2365,10 +2360,8 @@ void ptwr_flush(const int which) 2.61 if ( likely(l1_pgentry_val(ol1e) == l1_pgentry_val(nl1e)) ) 2.62 continue; 2.63 2.64 -#ifdef PERF_COUNTERS 2.65 /* Update number of entries modified. */ 2.66 modified++; 2.67 -#endif 2.68 2.69 /* 2.70 * Fast path for PTEs that have merely been write-protected 2.71 @@ -2411,6 +2404,8 @@ void ptwr_flush(const int which) 2.72 unmap_domain_mem(pl1e); 2.73 2.74 perfc_incr_histo(wpt_updates, modified, PT_UPDATES); 2.75 + ptwr_info[cpu].ptinfo[which].prev_exec_domain = ed; 2.76 + ptwr_info[cpu].ptinfo[which].prev_nr_updates = modified; 2.77 2.78 /* 2.79 * STEP 3. Reattach the L1 p.t. page into the current address space. 2.80 @@ -2435,6 +2430,133 @@ void ptwr_flush(const int which) 2.81 } 2.82 } 2.83 2.84 +static int ptwr_emulated_update( 2.85 + unsigned long addr, 2.86 + unsigned long old, 2.87 + unsigned long val, 2.88 + unsigned int bytes, 2.89 + unsigned int do_cmpxchg) 2.90 +{ 2.91 + unsigned long sstat, pte, pfn; 2.92 + struct pfn_info *page; 2.93 + l1_pgentry_t ol1e, nl1e, *pl1e, *sl1e; 2.94 + struct domain *d = current->domain; 2.95 + 2.96 + /* Aligned access only, thank you. */ 2.97 + if ( (addr & (bytes-1)) != 0 ) 2.98 + { 2.99 + MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %p)\n", 2.100 + bytes, addr); 2.101 + return X86EMUL_UNHANDLEABLE; 2.102 + } 2.103 + 2.104 + /* Turn a sub-word access into a full-word access. */ 2.105 + if ( (addr & ((BITS_PER_LONG/8)-1)) != 0 ) 2.106 + { 2.107 + int rc; 2.108 + unsigned long full; 2.109 + unsigned int mask = addr & ((BITS_PER_LONG/8)-1); 2.110 + /* Align address; read full word. */ 2.111 + addr &= ~((BITS_PER_LONG/8)-1); 2.112 + if ( (rc = x86_emulate_read_std(addr, &full, BITS_PER_LONG/8)) ) 2.113 + return rc; 2.114 + /* Mask out bits provided by caller. */ 2.115 + full &= ~((1UL << (bytes*8)) - 1UL) << (mask*8); 2.116 + /* Shift the caller value and OR in the missing bits. */ 2.117 + val &= (1UL << (bytes*8)) - 1UL; 2.118 + val <<= mask*8; 2.119 + val |= full; 2.120 + } 2.121 + 2.122 + /* Read the PTE that maps the page being updated. */ 2.123 + if ( __get_user(pte, (unsigned long *) 2.124 + &linear_pg_table[l1_linear_offset(addr)]) ) 2.125 + { 2.126 + MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table\n"); 2.127 + return X86EMUL_UNHANDLEABLE; 2.128 + } 2.129 + 2.130 + pfn = pte >> PAGE_SHIFT; 2.131 + page = &frame_table[pfn]; 2.132 + 2.133 + /* We are looking only for read-only mappings of p.t. pages. */ 2.134 + if ( ((pte & (_PAGE_RW | _PAGE_PRESENT)) != _PAGE_PRESENT) || 2.135 + ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ) 2.136 + { 2.137 + MEM_LOG("ptwr_emulate: Page is mistyped or bad pte (%p, %x)\n", 2.138 + pte, page->u.inuse.type_info); 2.139 + return X86EMUL_UNHANDLEABLE; 2.140 + } 2.141 + 2.142 + /* Check the new PTE. */ 2.143 + nl1e = mk_l1_pgentry(val); 2.144 + if ( unlikely(!get_page_from_l1e(nl1e, d)) ) 2.145 + return X86EMUL_UNHANDLEABLE; 2.146 + 2.147 + /* Checked successfully: do the update (write or cmpxchg). */ 2.148 + pl1e = map_domain_mem(page_to_phys(page) + (addr & ~PAGE_MASK)); 2.149 + if ( do_cmpxchg ) 2.150 + { 2.151 + ol1e = mk_l1_pgentry(old); 2.152 + if ( cmpxchg((unsigned long *)pl1e, old, val) != old ) 2.153 + { 2.154 + unmap_domain_mem(pl1e); 2.155 + return X86EMUL_CMPXCHG_FAILED; 2.156 + } 2.157 + } 2.158 + else 2.159 + { 2.160 + ol1e = *pl1e; 2.161 + *pl1e = nl1e; 2.162 + } 2.163 + unmap_domain_mem(pl1e); 2.164 + 2.165 + /* Propagate update to shadow cache. */ 2.166 + if ( unlikely(shadow_mode_enabled(d)) ) 2.167 + { 2.168 + sstat = get_shadow_status(d, page_to_pfn(page)); 2.169 + if ( sstat & PSH_shadowed ) 2.170 + { 2.171 + sl1e = map_domain_mem( 2.172 + ((sstat & PSH_pfn_mask) << PAGE_SHIFT) + (addr & ~PAGE_MASK)); 2.173 + l1pte_propagate_from_guest( 2.174 + d, &l1_pgentry_val(nl1e), &l1_pgentry_val(*sl1e)); 2.175 + unmap_domain_mem(sl1e); 2.176 + } 2.177 + } 2.178 + 2.179 + /* Finally, drop the old PTE. */ 2.180 + if ( unlikely(l1_pgentry_val(ol1e) & _PAGE_PRESENT) ) 2.181 + put_page_from_l1e(ol1e, d); 2.182 + 2.183 + return X86EMUL_CONTINUE; 2.184 +} 2.185 + 2.186 +static int ptwr_emulated_write( 2.187 + unsigned long addr, 2.188 + unsigned long val, 2.189 + unsigned int bytes) 2.190 +{ 2.191 + return ptwr_emulated_update(addr, 0, val, bytes, 0); 2.192 +} 2.193 + 2.194 +static int ptwr_emulated_cmpxchg( 2.195 + unsigned long addr, 2.196 + unsigned long old, 2.197 + unsigned long new, 2.198 + unsigned int bytes) 2.199 +{ 2.200 + return ptwr_emulated_update(addr, old, new, bytes, 1); 2.201 +} 2.202 + 2.203 +static struct x86_mem_emulator ptwr_mem_emulator = { 2.204 + .read_std = x86_emulate_read_std, 2.205 + .write_std = x86_emulate_write_std, 2.206 + .read_emulated = x86_emulate_read_std, 2.207 + .write_emulated = ptwr_emulated_write, 2.208 + .cmpxchg_emulated = ptwr_emulated_cmpxchg 2.209 +}; 2.210 + 2.211 /* Write page fault handler: check if guest is trying to modify a PTE. */ 2.212 int ptwr_do_page_fault(unsigned long addr) 2.213 { 2.214 @@ -2448,13 +2570,13 @@ int ptwr_do_page_fault(unsigned long add 2.215 return 0; /* Writable pagetables need fixing for x86_64. */ 2.216 #endif 2.217 2.218 + /* Can't use linear_l2_table with external tables. */ 2.219 + BUG_ON(shadow_mode_external(current->domain)); 2.220 + 2.221 /* 2.222 * Attempt to read the PTE that maps the VA being accessed. By checking for 2.223 * PDE validity in the L2 we avoid many expensive fixups in __get_user(). 2.224 */ 2.225 - if ( shadow_mode_external(current->domain) ) 2.226 - BUG(); // can't use linear_l2_table with external tables. 2.227 - 2.228 if ( !(l2_pgentry_val(linear_l2_table[addr>>L2_PAGETABLE_SHIFT]) & 2.229 _PAGE_PRESENT) || 2.230 __get_user(pte, (unsigned long *) 2.231 @@ -2472,47 +2594,35 @@ int ptwr_do_page_fault(unsigned long add 2.232 { 2.233 return 0; 2.234 } 2.235 - 2.236 + 2.237 /* Get the L2 index at which this L1 p.t. is always mapped. */ 2.238 l2_idx = page->u.inuse.type_info & PGT_va_mask; 2.239 if ( unlikely(l2_idx >= PGT_va_unknown) ) 2.240 - { 2.241 - domain_crash(); /* Urk! This L1 is mapped in multiple L2 slots! */ 2.242 - } 2.243 + goto emulate; /* Urk! This L1 is mapped in multiple L2 slots! */ 2.244 l2_idx >>= PGT_va_shift; 2.245 2.246 - if ( l2_idx == (addr >> L2_PAGETABLE_SHIFT) ) 2.247 - { 2.248 - MEM_LOG("PTWR failure! Pagetable maps itself at %p\n", addr); 2.249 - domain_crash(); 2.250 - } 2.251 + if ( unlikely(l2_idx == (addr >> L2_PAGETABLE_SHIFT)) ) 2.252 + goto emulate; /* Urk! Pagetable maps itself! */ 2.253 2.254 /* 2.255 * Is the L1 p.t. mapped into the current address space? If so we call it 2.256 * an ACTIVE p.t., otherwise it is INACTIVE. 2.257 */ 2.258 - if ( shadow_mode_external(current->domain) ) 2.259 - BUG(); // can't use linear_l2_table with external tables. 2.260 - 2.261 pl2e = &linear_l2_table[l2_idx]; 2.262 l2e = l2_pgentry_val(*pl2e); 2.263 which = PTWR_PT_INACTIVE; 2.264 if ( (l2e >> PAGE_SHIFT) == pfn ) 2.265 { 2.266 - /* Check the PRESENT bit to set ACTIVE. */ 2.267 - if ( likely(l2e & _PAGE_PRESENT) ) 2.268 + /* 2.269 + * Check the PRESENT bit to set ACTIVE mode. 2.270 + * If the PRESENT bit is clear, we may be conflicting with the current 2.271 + * ACTIVE p.t. (it may be the same p.t. mapped at another virt addr). 2.272 + * The ptwr_flush call below will restore the PRESENT bit. 2.273 + */ 2.274 + if ( likely(l2e & _PAGE_PRESENT) || 2.275 + (ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va && 2.276 + (l2_idx == ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx)) ) 2.277 which = PTWR_PT_ACTIVE; 2.278 - else { 2.279 - /* 2.280 - * If the PRESENT bit is clear, we may be conflicting with 2.281 - * the current ACTIVE p.t. (it may be the same p.t. mapped 2.282 - * at another virt addr). 2.283 - * The ptwr_flush call below will restore the PRESENT bit. 2.284 - */ 2.285 - if ( ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va && 2.286 - l2_idx == ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx ) 2.287 - which = PTWR_PT_ACTIVE; 2.288 - } 2.289 } 2.290 2.291 PTWR_PRINTK("[%c] page_fault on l1 pt at va %p, pt for %08x, " 2.292 @@ -2526,6 +2636,18 @@ int ptwr_do_page_fault(unsigned long add 2.293 if ( ptwr_info[cpu].ptinfo[which].l1va ) 2.294 ptwr_flush(which); 2.295 2.296 + /* 2.297 + * If last batch made no updates then we are probably stuck. Emulate this 2.298 + * update to ensure we make progress. 2.299 + */ 2.300 + if ( (ptwr_info[cpu].ptinfo[which].prev_exec_domain == current) && 2.301 + (ptwr_info[cpu].ptinfo[which].prev_nr_updates == 0) ) 2.302 + { 2.303 + /* Force non-emul next time, or we can get stuck emulating forever. */ 2.304 + ptwr_info[cpu].ptinfo[which].prev_exec_domain = NULL; 2.305 + goto emulate; 2.306 + } 2.307 + 2.308 ptwr_info[cpu].ptinfo[which].l1va = addr | 1; 2.309 ptwr_info[cpu].ptinfo[which].l2_idx = l2_idx; 2.310 2.311 @@ -2534,11 +2656,7 @@ int ptwr_do_page_fault(unsigned long add 2.312 likely(!shadow_mode_enabled(current->domain)) ) 2.313 { 2.314 *pl2e = mk_l2_pgentry(l2e & ~_PAGE_PRESENT); 2.315 -#if 1 2.316 flush_tlb(); /* XXX Multi-CPU guests? */ 2.317 -#else 2.318 - flush_tlb_all(); 2.319 -#endif 2.320 } 2.321 2.322 /* Temporarily map the L1 page, and make a copy of it. */ 2.323 @@ -2563,6 +2681,13 @@ int ptwr_do_page_fault(unsigned long add 2.324 } 2.325 2.326 return EXCRET_fault_fixed; 2.327 + 2.328 + emulate: 2.329 + if ( x86_emulate_memop(get_execution_context(), addr, 2.330 + &ptwr_mem_emulator, BITS_PER_LONG/8) ) 2.331 + return 0; 2.332 + perfc_incrc(ptwr_emulations); 2.333 + return EXCRET_fault_fixed; 2.334 } 2.335 2.336 static __init int ptwr_init(void) 2.337 @@ -2762,8 +2887,7 @@ void audit_domain(struct domain *d) 2.338 pfn = list_entry(list_ent, struct pfn_info, list) - frame_table; 2.339 page = &frame_table[pfn]; 2.340 2.341 - if ( page_get_owner(page) != d ) 2.342 - BUG(); 2.343 + BUG_ON(page_get_owner(page) != d); 2.344 2.345 if ( (page->u.inuse.type_info & PGT_count_mask) > 2.346 (page->count_info & PGC_count_mask) ) 2.347 @@ -2809,8 +2933,7 @@ void audit_domain(struct domain *d) 2.348 pfn = list_entry(list_ent, struct pfn_info, list) - frame_table; 2.349 page = &frame_table[pfn]; 2.350 2.351 - if ( page_get_owner(page) != d ) 2.352 - BUG(); 2.353 + BUG_ON(page_get_owner(page) != d); 2.354 2.355 switch ( page->u.inuse.type_info & PGT_type_mask ) 2.356 { 2.357 @@ -3060,7 +3183,10 @@ void audit_domain(struct domain *d) 2.358 d->exec_domain[0]->arch.guest_table)>>PAGE_SHIFT], 1, 1); 2.359 2.360 spin_unlock(&d->page_alloc_lock); 2.361 - printk("Audit %d: Done. ref=%d xenpages=%d pages=%d l1=%d l2=%d ctot=%d ttot=%d\n", d->id, atomic_read(&d->refcnt), d->xenheap_pages, d->tot_pages, l1, l2, ctot, ttot ); 2.362 + printk("Audit %d: Done. ref=%d xenpages=%d pages=%d l1=%d" 2.363 + " l2=%d ctot=%d ttot=%d\n", 2.364 + d->id, atomic_read(&d->refcnt), d->xenheap_pages, d->tot_pages, 2.365 + l1, l2, ctot, ttot ); 2.366 2.367 if ( d != current->domain ) 2.368 domain_unpause(d);
3.1 --- a/xen/arch/x86/x86_emulate.c Tue Mar 15 14:31:42 2005 +0000 3.2 +++ b/xen/arch/x86/x86_emulate.c Tue Mar 15 15:58:52 2005 +0000 3.3 @@ -363,7 +363,7 @@ do{ __asm__ __volatile__ ( 3.4 /* Fetch next part of the instruction being emulated. */ 3.5 #define insn_fetch(_type, _size, _eip) \ 3.6 ({ unsigned long _x; \ 3.7 - if ( ops->read_std((unsigned long)(_eip), &_x, (_size)) ) \ 3.8 + if ( (rc = ops->read_std((unsigned long)(_eip), &_x, (_size))) != 0 ) \ 3.9 goto done; \ 3.10 (_eip) += (_size); \ 3.11 (_type)_x; \ 3.12 @@ -422,6 +422,7 @@ x86_emulate_memop( 3.13 u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; 3.14 unsigned int op_bytes = (mode == 8) ? 4 : mode, ad_bytes = mode; 3.15 unsigned int lock_prefix = 0, rep_prefix = 0, i; 3.16 + int rc = 0; 3.17 struct operand src, dst; 3.18 3.19 /* Shadow copy of register state. Committed on successful emulation. */ 3.20 @@ -556,7 +557,8 @@ x86_emulate_memop( 3.21 dst.ptr = (unsigned long *)cr2; 3.22 dst.bytes = (d & ByteOp) ? 1 : op_bytes; 3.23 if ( !(d & Mov) && /* optimisation - avoid slow emulated read */ 3.24 - ops->read_emulated((unsigned long)dst.ptr, &dst.val, dst.bytes) ) 3.25 + ((rc = ops->read_emulated((unsigned long)dst.ptr, 3.26 + &dst.val, dst.bytes)) != 0) ) 3.27 goto done; 3.28 break; 3.29 } 3.30 @@ -590,7 +592,8 @@ x86_emulate_memop( 3.31 src.type = OP_MEM; 3.32 src.ptr = (unsigned long *)cr2; 3.33 src.bytes = (d & ByteOp) ? 1 : op_bytes; 3.34 - if ( ops->read_emulated((unsigned long)src.ptr, &src.val, src.bytes) ) 3.35 + if ( (rc = ops->read_emulated((unsigned long)src.ptr, 3.36 + &src.val, src.bytes)) != 0 ) 3.37 goto done; 3.38 src.orig_val = src.val; 3.39 break; 3.40 @@ -664,6 +667,7 @@ x86_emulate_memop( 3.41 src.val ^= dst.val; 3.42 dst.val ^= src.val; 3.43 src.val ^= dst.val; 3.44 + lock_prefix = 1; 3.45 break; 3.46 case 0xa0 ... 0xa1: /* mov */ 3.47 dst.ptr = (unsigned long *)&_regs.eax; 3.48 @@ -682,7 +686,7 @@ x86_emulate_memop( 3.49 /* 64-bit mode: POP defaults to 64-bit operands. */ 3.50 if ( (mode == 8) && (dst.bytes == 4) ) 3.51 dst.bytes = 8; 3.52 - if ( ops->read_std(_regs.esp, &dst.val, dst.bytes) ) 3.53 + if ( (rc = ops->read_std(_regs.esp, &dst.val, dst.bytes)) != 0 ) 3.54 goto done; 3.55 _regs.esp += dst.bytes; 3.56 break; 3.57 @@ -759,11 +763,12 @@ x86_emulate_memop( 3.58 if ( (mode == 8) && (dst.bytes == 4) ) 3.59 { 3.60 dst.bytes = 8; 3.61 - if ( ops->read_std((unsigned long)dst.ptr, &dst.val, 8) ) 3.62 + if ( (rc = ops->read_std((unsigned long)dst.ptr, 3.63 + &dst.val, 8)) != 0 ) 3.64 goto done; 3.65 } 3.66 _regs.esp -= dst.bytes; 3.67 - if ( ops->write_std(_regs.esp, dst.val, dst.bytes) ) 3.68 + if ( (rc = ops->write_std(_regs.esp, dst.val, dst.bytes)) != 0 ) 3.69 goto done; 3.70 dst.val = dst.orig_val; /* skanky: disable writeback */ 3.71 break; 3.72 @@ -790,22 +795,13 @@ x86_emulate_memop( 3.73 break; 3.74 case OP_MEM: 3.75 if ( lock_prefix ) 3.76 - { 3.77 - unsigned long seen; 3.78 - if ( ops->cmpxchg_emulated((unsigned long)dst.ptr, 3.79 - dst.orig_val, dst.val, 3.80 - &seen, dst.bytes) ) 3.81 - goto done; 3.82 - if ( seen != dst.orig_val ) 3.83 - goto done; /* Try again... */ 3.84 - } 3.85 + rc = ops->cmpxchg_emulated( 3.86 + (unsigned long)dst.ptr, dst.orig_val, dst.val, dst.bytes); 3.87 else 3.88 - { 3.89 - if ( ops->write_emulated((unsigned long)dst.ptr, 3.90 - dst.val, dst.bytes) ) 3.91 - goto done; 3.92 - } 3.93 - break; 3.94 + rc = ops->write_emulated( 3.95 + (unsigned long)dst.ptr, dst.val, dst.bytes); 3.96 + if ( rc != 0 ) 3.97 + goto done; 3.98 default: 3.99 break; 3.100 } 3.101 @@ -815,7 +811,7 @@ x86_emulate_memop( 3.102 *regs = _regs; 3.103 3.104 done: 3.105 - return 0; 3.106 + return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; 3.107 3.108 special_insn: 3.109 if ( twobyte ) 3.110 @@ -839,15 +835,15 @@ x86_emulate_memop( 3.111 { 3.112 /* Write fault: destination is special memory. */ 3.113 dst.ptr = (unsigned long *)cr2; 3.114 - if ( ops->read_std(_regs.esi - _regs.edi + cr2, 3.115 - &dst.val, dst.bytes) ) 3.116 + if ( (rc = ops->read_std(_regs.esi - _regs.edi + cr2, 3.117 + &dst.val, dst.bytes)) != 0 ) 3.118 goto done; 3.119 } 3.120 else 3.121 { 3.122 /* Read fault: source is special memory. */ 3.123 dst.ptr = (unsigned long *)(_regs.edi - _regs.esi + cr2); 3.124 - if ( ops->read_emulated(cr2, &dst.val, dst.bytes) ) 3.125 + if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 ) 3.126 goto done; 3.127 } 3.128 _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes; 3.129 @@ -867,7 +863,7 @@ x86_emulate_memop( 3.130 dst.type = OP_REG; 3.131 dst.bytes = (d & ByteOp) ? 1 : op_bytes; 3.132 dst.ptr = (unsigned long *)&_regs.eax; 3.133 - if ( ops->read_emulated(cr2, &dst.val, dst.bytes) ) 3.134 + if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 ) 3.135 goto done; 3.136 _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes; 3.137 break; 3.138 @@ -971,3 +967,39 @@ x86_emulate_memop( 3.139 DPRINTF("Cannot emulate %02x\n", b); 3.140 return -1; 3.141 } 3.142 + 3.143 +#ifndef __TEST_HARNESS__ 3.144 + 3.145 +#include <asm/mm.h> 3.146 +#include <asm/uaccess.h> 3.147 + 3.148 +int 3.149 +x86_emulate_read_std( 3.150 + unsigned long addr, 3.151 + unsigned long *val, 3.152 + unsigned int bytes) 3.153 +{ 3.154 + *val = 0; 3.155 + if ( copy_from_user((void *)val, (void *)addr, bytes) ) 3.156 + { 3.157 + propagate_page_fault(addr, 4); /* user mode, read fault */ 3.158 + return X86EMUL_PROPAGATE_FAULT; 3.159 + } 3.160 + return X86EMUL_CONTINUE; 3.161 +} 3.162 + 3.163 +int 3.164 +x86_emulate_write_std( 3.165 + unsigned long addr, 3.166 + unsigned long val, 3.167 + unsigned int bytes) 3.168 +{ 3.169 + if ( copy_to_user((void *)addr, (void *)&val, bytes) ) 3.170 + { 3.171 + propagate_page_fault(addr, 6); /* user mode, write fault */ 3.172 + return X86EMUL_PROPAGATE_FAULT; 3.173 + } 3.174 + return X86EMUL_CONTINUE; 3.175 +} 3.176 + 3.177 +#endif
4.1 --- a/xen/include/asm-x86/mm.h Tue Mar 15 14:31:42 2005 +0000 4.2 +++ b/xen/include/asm-x86/mm.h Tue Mar 15 15:58:52 2005 +0000 4.3 @@ -289,6 +289,9 @@ typedef struct { 4.4 l1_pgentry_t *pl1e; 4.5 /* Index in L2 page table where this L1 p.t. is always hooked. */ 4.6 unsigned int l2_idx; /* NB. Only used for PTWR_PT_ACTIVE. */ 4.7 + /* Info about last ptwr update batch. */ 4.8 + struct exec_domain *prev_exec_domain; /* domain making the update */ 4.9 + unsigned int prev_nr_updates; /* size of update batch */ 4.10 } ptwr_ptinfo_t; 4.11 4.12 typedef struct {
5.1 --- a/xen/include/asm-x86/x86_emulate.h Tue Mar 15 14:31:42 2005 +0000 5.2 +++ b/xen/include/asm-x86/x86_emulate.h Tue Mar 15 15:58:52 2005 +0000 5.3 @@ -32,9 +32,17 @@ 5.4 * 2. If the access fails (cannot emulate, or a standard access faults) then 5.5 * it is up to the memop to propagate the fault to the guest VM via 5.6 * some out-of-band mechanism, unknown to the emulator. The memop signals 5.7 - * failure by returning a non-zero value to the emulator, which will then 5.8 - * immediately bail. 5.9 + * failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will 5.10 + * then immediately bail. 5.11 */ 5.12 +/* Access completed successfully: continue emulation as normal. */ 5.13 +#define X86EMUL_CONTINUE 0 5.14 +/* Access is unhandleable: bail from emulation and return error to caller. */ 5.15 +#define X86EMUL_UNHANDLEABLE 1 5.16 +/* Terminate emulation but return success to the caller. */ 5.17 +#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ 5.18 +#define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */ 5.19 +#define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */ 5.20 struct x86_mem_emulator 5.21 { 5.22 /* 5.23 @@ -89,17 +97,26 @@ struct x86_mem_emulator 5.24 * @addr: [IN ] Linear address to access. 5.25 * @old: [IN ] Value expected to be current at @addr. 5.26 * @new: [IN ] Value to write to @addr. 5.27 - * @seen: [OUT] Value actually seen at @addr, zero-extended to 'u_long'. 5.28 * @bytes: [IN ] Number of bytes to access using CMPXCHG. 5.29 */ 5.30 int (*cmpxchg_emulated)( 5.31 unsigned long addr, 5.32 - unsigned long old, 5.33 + unsigned long old, 5.34 unsigned long new, 5.35 - unsigned long *seen, 5.36 unsigned int bytes); 5.37 }; 5.38 5.39 +/* Standard reader/writer functions that callers may wish to use. */ 5.40 +extern int 5.41 +x86_emulate_read_std( 5.42 + unsigned long addr, 5.43 + unsigned long *val, 5.44 + unsigned int bytes); 5.45 +extern int 5.46 +x86_emulate_write_std( 5.47 + unsigned long addr, 5.48 + unsigned long val, 5.49 + unsigned int bytes); 5.50 5.51 struct xen_regs; 5.52
6.1 --- a/xen/include/xen/perfc_defn.h Tue Mar 15 14:31:42 2005 +0000 6.2 +++ b/xen/include/xen/perfc_defn.h Tue Mar 15 15:58:52 2005 +0000 6.3 @@ -20,6 +20,7 @@ PERFCOUNTER_CPU( calls_to_update_va, "ca 6.4 PERFCOUNTER_CPU( page_faults, "page faults" ) 6.5 PERFCOUNTER_CPU( copy_user_faults, "copy_user faults" ) 6.6 PERFCOUNTER_CPU( map_domain_mem_count, "map_domain_mem count" ) 6.7 +PERFCOUNTER_CPU( ptwr_emulations, "writable pt emulations" ) 6.8 6.9 PERFCOUNTER_CPU( shadow_l2_table_count, "shadow_l2_table count" ) 6.10 PERFCOUNTER_CPU( shadow_l1_table_count, "shadow_l1_table count" )