ia64/xen-unstable
changeset 11666:b6ee084892da
[XEN] Support lightweight shadow-translate PV guests, for paravirt-ops.
This is a modified subset of Michael Fetterman's shadow-translate work.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
This is a modified subset of Michael Fetterman's shadow-translate work.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author | Tim Deegan <tim.deegan@xensource.com> |
---|---|
date | Thu Sep 28 17:10:54 2006 +0100 (2006-09-28) |
parents | 5f42b4824e45 |
children | f9929b7e009e |
files | xen/arch/x86/domain.c xen/arch/x86/mm.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/multi.h xen/arch/x86/mm/shadow/private.h xen/arch/x86/mm/shadow/types.h xen/arch/x86/traps.c xen/include/asm-x86/domain.h xen/include/asm-x86/guest_access.h xen/include/asm-x86/mm.h xen/include/asm-x86/shadow.h |
line diff
1.1 --- a/xen/arch/x86/domain.c Thu Sep 28 17:09:11 2006 +0100 1.2 +++ b/xen/arch/x86/domain.c Thu Sep 28 17:10:54 2006 +0100 1.3 @@ -334,8 +334,10 @@ int arch_set_info_guest( 1.4 } 1.5 else 1.6 { 1.7 - if ( !get_page_and_type(mfn_to_page(cr3_pfn), d, 1.8 - PGT_base_page_table) ) 1.9 + if ( shadow_mode_refcounts(d) 1.10 + ? !get_page(mfn_to_page(cr3_pfn), d) 1.11 + : !get_page_and_type(mfn_to_page(cr3_pfn), d, 1.12 + PGT_base_page_table) ) 1.13 { 1.14 destroy_gdt(v); 1.15 return -EINVAL; 1.16 @@ -952,7 +954,10 @@ void domain_relinquish_resources(struct 1.17 pfn = pagetable_get_pfn(v->arch.guest_table_user); 1.18 if ( pfn != 0 ) 1.19 { 1.20 - put_page_and_type(mfn_to_page(pfn)); 1.21 + if ( shadow_mode_refcounts(d) ) 1.22 + put_page(mfn_to_page(pfn)); 1.23 + else 1.24 + put_page_and_type(mfn_to_page(pfn)); 1.25 v->arch.guest_table_user = pagetable_null(); 1.26 } 1.27 #endif
2.1 --- a/xen/arch/x86/mm.c Thu Sep 28 17:09:11 2006 +0100 2.2 +++ b/xen/arch/x86/mm.c Thu Sep 28 17:10:54 2006 +0100 2.3 @@ -427,23 +427,11 @@ int map_ldt_shadow_page(unsigned int off 2.4 unsigned long gmfn, mfn; 2.5 l1_pgentry_t l1e, nl1e; 2.6 unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT); 2.7 - int res; 2.8 - 2.9 -#if defined(__x86_64__) 2.10 - /* If in user mode, switch to kernel mode just to read LDT mapping. */ 2.11 - int user_mode = !(v->arch.flags & TF_kernel_mode); 2.12 -#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) 2.13 -#elif defined(__i386__) 2.14 -#define TOGGLE_MODE() ((void)0) 2.15 -#endif 2.16 + int okay; 2.17 2.18 BUG_ON(unlikely(in_irq())); 2.19 2.20 - TOGGLE_MODE(); 2.21 - __copy_from_user(&l1e, &linear_pg_table[l1_linear_offset(gva)], 2.22 - sizeof(l1e)); 2.23 - TOGGLE_MODE(); 2.24 - 2.25 + guest_get_eff_kern_l1e(v, gva, &l1e); 2.26 if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) ) 2.27 return 0; 2.28 2.29 @@ -452,17 +440,17 @@ int map_ldt_shadow_page(unsigned int off 2.30 if ( unlikely(!VALID_MFN(mfn)) ) 2.31 return 0; 2.32 2.33 - res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page); 2.34 - 2.35 - if ( !res && unlikely(shadow_mode_refcounts(d)) ) 2.36 + okay = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page); 2.37 + 2.38 + if ( !okay && unlikely(shadow_mode_refcounts(d)) ) 2.39 { 2.40 shadow_lock(d); 2.41 shadow_remove_write_access(d->vcpu[0], _mfn(mfn), 0, 0); 2.42 - res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page); 2.43 + okay = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page); 2.44 shadow_unlock(d); 2.45 } 2.46 2.47 - if ( unlikely(!res) ) 2.48 + if ( unlikely(!okay) ) 2.49 return 0; 2.50 2.51 nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW); 2.52 @@ -1233,7 +1221,7 @@ static inline int update_l1e(l1_pgentry_ 2.53 } 2.54 } 2.55 #endif 2.56 - if ( unlikely(shadow_mode_enabled(v->domain)) ) 2.57 + if ( unlikely(shadow_mode_enabled(v->domain)) && rv ) 2.58 { 2.59 shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e); 2.60 shadow_unlock(v->domain); 2.61 @@ -1252,6 +1240,9 @@ static int mod_l1_entry(l1_pgentry_t *pl 2.62 if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) 2.63 return 0; 2.64 2.65 + if ( unlikely(shadow_mode_refcounts(d)) ) 2.66 + return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current); 2.67 + 2.68 if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) 2.69 { 2.70 if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) ) 2.71 @@ -1871,6 +1862,14 @@ static int set_foreigndom(domid_t domid) 2.72 } 2.73 } 2.74 2.75 + if ( unlikely(shadow_mode_translate(d)) ) 2.76 + { 2.77 + MEM_LOG("%s: can not mix foreign mappings with translated domains", 2.78 + __func__); 2.79 + info->foreign = NULL; 2.80 + okay = 0; 2.81 + } 2.82 + 2.83 out: 2.84 return okay; 2.85 } 2.86 @@ -1902,7 +1901,7 @@ int do_mmuext_op( 2.87 { 2.88 struct mmuext_op op; 2.89 int rc = 0, i = 0, okay; 2.90 - unsigned long mfn, type; 2.91 + unsigned long mfn = 0, gmfn = 0, type; 2.92 unsigned int done = 0; 2.93 struct page_info *page; 2.94 struct vcpu *v = current; 2.95 @@ -1947,7 +1946,8 @@ int do_mmuext_op( 2.96 } 2.97 2.98 okay = 1; 2.99 - mfn = op.arg1.mfn; 2.100 + gmfn = op.arg1.mfn; 2.101 + mfn = gmfn_to_mfn(FOREIGNDOM, gmfn); 2.102 page = mfn_to_page(mfn); 2.103 2.104 switch ( op.cmd ) 2.105 @@ -2022,7 +2022,6 @@ int do_mmuext_op( 2.106 break; 2.107 2.108 case MMUEXT_NEW_BASEPTR: 2.109 - mfn = gmfn_to_mfn(current->domain, mfn); 2.110 okay = new_guest_cr3(mfn); 2.111 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB; 2.112 break; 2.113 @@ -2031,8 +2030,13 @@ int do_mmuext_op( 2.114 case MMUEXT_NEW_USER_BASEPTR: 2.115 okay = 1; 2.116 if (likely(mfn != 0)) 2.117 - okay = get_page_and_type_from_pagenr( 2.118 - mfn, PGT_root_page_table, d); 2.119 + { 2.120 + if ( shadow_mode_refcounts(d) ) 2.121 + okay = get_page_from_pagenr(mfn, d); 2.122 + else 2.123 + okay = get_page_and_type_from_pagenr( 2.124 + mfn, PGT_root_page_table, d); 2.125 + } 2.126 if ( unlikely(!okay) ) 2.127 { 2.128 MEM_LOG("Error while installing new mfn %lx", mfn); 2.129 @@ -2043,7 +2047,12 @@ int do_mmuext_op( 2.130 pagetable_get_pfn(v->arch.guest_table_user); 2.131 v->arch.guest_table_user = pagetable_from_pfn(mfn); 2.132 if ( old_mfn != 0 ) 2.133 - put_page_and_type(mfn_to_page(old_mfn)); 2.134 + { 2.135 + if ( shadow_mode_refcounts(d) ) 2.136 + put_page(mfn_to_page(old_mfn)); 2.137 + else 2.138 + put_page_and_type(mfn_to_page(old_mfn)); 2.139 + } 2.140 } 2.141 break; 2.142 #endif 2.143 @@ -2504,17 +2513,26 @@ static int create_grant_va_mapping( 2.144 { 2.145 l1_pgentry_t *pl1e, ol1e; 2.146 struct domain *d = v->domain; 2.147 + unsigned long gl1mfn; 2.148 + int okay; 2.149 2.150 ASSERT(spin_is_locked(&d->big_lock)); 2.151 2.152 adjust_guest_l1e(nl1e); 2.153 2.154 - pl1e = &linear_pg_table[l1_linear_offset(va)]; 2.155 - 2.156 - if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) || 2.157 - !update_l1e(pl1e, ol1e, nl1e, 2.158 - l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]), v) ) 2.159 + pl1e = guest_map_l1e(v, va, &gl1mfn); 2.160 + if ( !pl1e ) 2.161 + { 2.162 + MEM_LOG("Could not find L1 PTE for address %lx", va); 2.163 return GNTST_general_error; 2.164 + } 2.165 + ol1e = *pl1e; 2.166 + okay = update_l1e(pl1e, ol1e, nl1e, gl1mfn, v); 2.167 + guest_unmap_l1e(v, pl1e); 2.168 + pl1e = NULL; 2.169 + 2.170 + if ( !okay ) 2.171 + return GNTST_general_error; 2.172 2.173 if ( !shadow_mode_refcounts(d) ) 2.174 put_page_from_l1e(ol1e, d); 2.175 @@ -2523,17 +2541,19 @@ static int create_grant_va_mapping( 2.176 } 2.177 2.178 static int destroy_grant_va_mapping( 2.179 - unsigned long addr, unsigned long frame, struct domain *d) 2.180 + unsigned long addr, unsigned long frame, struct vcpu *v) 2.181 { 2.182 l1_pgentry_t *pl1e, ol1e; 2.183 + unsigned long gl1mfn; 2.184 + int rc = 0; 2.185 2.186 - pl1e = &linear_pg_table[l1_linear_offset(addr)]; 2.187 - 2.188 - if ( unlikely(__get_user(ol1e.l1, &pl1e->l1) != 0) ) 2.189 + pl1e = guest_map_l1e(v, addr, &gl1mfn); 2.190 + if ( !pl1e ) 2.191 { 2.192 - MEM_LOG("Could not find PTE entry for address %lx", addr); 2.193 + MEM_LOG("Could not find L1 PTE for address %lx", addr); 2.194 return GNTST_general_error; 2.195 } 2.196 + ol1e = *pl1e; 2.197 2.198 /* 2.199 * Check that the virtual address supplied is actually mapped to 2.200 @@ -2543,19 +2563,21 @@ static int destroy_grant_va_mapping( 2.201 { 2.202 MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx", 2.203 l1e_get_pfn(ol1e), addr, frame); 2.204 - return GNTST_general_error; 2.205 + rc = GNTST_general_error; 2.206 + goto out; 2.207 } 2.208 2.209 /* Delete pagetable entry. */ 2.210 - if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), 2.211 - l2e_get_pfn(__linear_l2_table[l2_linear_offset(addr)]), 2.212 - d->vcpu[0] /* Change for per-vcpu shadows */)) ) 2.213 + if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), gl1mfn, v)) ) 2.214 { 2.215 MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); 2.216 - return GNTST_general_error; 2.217 + rc = GNTST_general_error; 2.218 + goto out; // this is redundant & unnecessary, but informative 2.219 } 2.220 2.221 - return 0; 2.222 + out: 2.223 + guest_unmap_l1e(v, pl1e); 2.224 + return rc; 2.225 } 2.226 2.227 int create_grant_host_mapping( 2.228 @@ -2578,7 +2600,7 @@ int destroy_grant_host_mapping( 2.229 { 2.230 if ( flags & GNTMAP_contains_pte ) 2.231 return destroy_grant_pte_mapping(addr, frame, current->domain); 2.232 - return destroy_grant_va_mapping(addr, frame, current->domain); 2.233 + return destroy_grant_va_mapping(addr, frame, current); 2.234 } 2.235 2.236 int steal_page( 2.237 @@ -2634,7 +2656,8 @@ int do_update_va_mapping(unsigned long v 2.238 l1_pgentry_t val = l1e_from_intpte(val64); 2.239 struct vcpu *v = current; 2.240 struct domain *d = v->domain; 2.241 - unsigned long vmask, bmap_ptr; 2.242 + l1_pgentry_t *pl1e; 2.243 + unsigned long vmask, bmap_ptr, gl1mfn; 2.244 cpumask_t pmask; 2.245 int rc = 0; 2.246 2.247 @@ -2643,35 +2666,17 @@ int do_update_va_mapping(unsigned long v 2.248 if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) ) 2.249 return -EINVAL; 2.250 2.251 - if ( unlikely(shadow_mode_refcounts(d)) ) 2.252 - { 2.253 - DPRINTK("Grant op on a shadow-refcounted domain\n"); 2.254 - return -EINVAL; 2.255 - } 2.256 - 2.257 LOCK_BIGLOCK(d); 2.258 2.259 - if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) ) 2.260 - { 2.261 - if ( unlikely(this_cpu(percpu_mm_info).foreign && 2.262 - (shadow_mode_translate(d) || 2.263 - shadow_mode_translate( 2.264 - this_cpu(percpu_mm_info).foreign))) ) 2.265 - { 2.266 - /* 2.267 - * The foreign domain's pfn's are in a different namespace. There's 2.268 - * not enough information in just a gpte to figure out how to 2.269 - * (re-)shadow this entry. 2.270 - */ 2.271 - domain_crash(d); 2.272 - } 2.273 - } 2.274 - 2.275 - if ( unlikely(!mod_l1_entry( 2.276 - &linear_pg_table[l1_linear_offset(va)], val, 2.277 - l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]))) ) 2.278 + pl1e = guest_map_l1e(v, va, &gl1mfn); 2.279 + 2.280 + if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn)) ) 2.281 rc = -EINVAL; 2.282 - 2.283 + 2.284 + if ( pl1e ) 2.285 + guest_unmap_l1e(v, pl1e); 2.286 + pl1e = NULL; 2.287 + 2.288 switch ( flags & UVMF_FLUSHTYPE_MASK ) 2.289 { 2.290 case UVMF_TLB_FLUSH: 2.291 @@ -3033,7 +3038,7 @@ static int ptwr_emulated_update( 2.292 unsigned int bytes, 2.293 unsigned int do_cmpxchg) 2.294 { 2.295 - unsigned long pfn; 2.296 + unsigned long gmfn, mfn; 2.297 struct page_info *page; 2.298 l1_pgentry_t pte, ol1e, nl1e, *pl1e; 2.299 struct vcpu *v = current; 2.300 @@ -3073,15 +3078,17 @@ static int ptwr_emulated_update( 2.301 } 2.302 2.303 /* Read the PTE that maps the page being updated. */ 2.304 - if ( __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)], 2.305 - sizeof(pte)) ) 2.306 + guest_get_eff_l1e(v, addr, &pte); 2.307 + if ( unlikely(!(l1e_get_flags(pte) & _PAGE_PRESENT)) ) 2.308 { 2.309 - MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table"); 2.310 + MEM_LOG("%s: Cannot get L1 PTE for guest address %lx", 2.311 + __func__, addr); 2.312 return X86EMUL_UNHANDLEABLE; 2.313 } 2.314 2.315 - pfn = l1e_get_pfn(pte); 2.316 - page = mfn_to_page(pfn); 2.317 + gmfn = l1e_get_pfn(pte); 2.318 + mfn = gmfn_to_mfn(d, gmfn); 2.319 + page = mfn_to_page(mfn); 2.320 2.321 /* We are looking only for read-only mappings of p.t. pages. */ 2.322 ASSERT((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) == _PAGE_PRESENT); 2.323 @@ -3091,7 +3098,7 @@ static int ptwr_emulated_update( 2.324 2.325 /* Check the new PTE. */ 2.326 nl1e = l1e_from_intpte(val); 2.327 - if ( unlikely(!get_page_from_l1e(nl1e, d)) ) 2.328 + if ( unlikely(!get_page_from_l1e(gl1e_to_ml1e(d, nl1e), d)) ) 2.329 { 2.330 if ( (CONFIG_PAGING_LEVELS == 3) && 2.331 (bytes == 4) && 2.332 @@ -3130,13 +3137,13 @@ static int ptwr_emulated_update( 2.333 if ( shadow_mode_enabled(d) ) 2.334 shadow_unlock(d); 2.335 unmap_domain_page(pl1e); 2.336 - put_page_from_l1e(nl1e, d); 2.337 + put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d); 2.338 return X86EMUL_CMPXCHG_FAILED; 2.339 } 2.340 - if ( unlikely(shadow_mode_enabled(v->domain)) ) 2.341 + if ( unlikely(shadow_mode_enabled(d)) ) 2.342 { 2.343 shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e); 2.344 - shadow_unlock(v->domain); 2.345 + shadow_unlock(d); 2.346 } 2.347 } 2.348 else 2.349 @@ -3149,7 +3156,7 @@ static int ptwr_emulated_update( 2.350 unmap_domain_page(pl1e); 2.351 2.352 /* Finally, drop the old PTE. */ 2.353 - put_page_from_l1e(ol1e, d); 2.354 + put_page_from_l1e(gl1e_to_ml1e(d, ol1e), d); 2.355 2.356 return X86EMUL_CONTINUE; 2.357 } 2.358 @@ -3198,13 +3205,13 @@ static struct x86_emulate_ops ptwr_emula 2.359 }; 2.360 2.361 /* Write page fault handler: check if guest is trying to modify a PTE. */ 2.362 -int ptwr_do_page_fault(struct domain *d, unsigned long addr, 2.363 +int ptwr_do_page_fault(struct vcpu *v, unsigned long addr, 2.364 struct cpu_user_regs *regs) 2.365 { 2.366 + struct domain *d = v->domain; 2.367 unsigned long pfn; 2.368 struct page_info *page; 2.369 l1_pgentry_t pte; 2.370 - l2_pgentry_t *pl2e, l2e; 2.371 struct x86_emulate_ctxt emul_ctxt; 2.372 2.373 LOCK_BIGLOCK(d); 2.374 @@ -3213,13 +3220,9 @@ int ptwr_do_page_fault(struct domain *d, 2.375 * Attempt to read the PTE that maps the VA being accessed. By checking for 2.376 * PDE validity in the L2 we avoid many expensive fixups in __get_user(). 2.377 */ 2.378 - pl2e = &__linear_l2_table[l2_linear_offset(addr)]; 2.379 - if ( __copy_from_user(&l2e, pl2e, sizeof(l2e)) || 2.380 - !(l2e_get_flags(l2e) & _PAGE_PRESENT) || 2.381 - __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)], 2.382 - sizeof(pte)) ) 2.383 + guest_get_eff_l1e(v, addr, &pte); 2.384 + if ( !(l1e_get_flags(pte) & _PAGE_PRESENT) ) 2.385 goto bail; 2.386 - 2.387 pfn = l1e_get_pfn(pte); 2.388 page = mfn_to_page(pfn); 2.389
3.1 --- a/xen/arch/x86/mm/shadow/common.c Thu Sep 28 17:09:11 2006 +0100 3.2 +++ b/xen/arch/x86/mm/shadow/common.c Thu Sep 28 17:10:54 2006 +0100 3.3 @@ -75,35 +75,27 @@ sh_x86_emulate_read_std(unsigned long ad 3.4 unsigned int bytes, 3.5 struct x86_emulate_ctxt *ctxt) 3.6 { 3.7 - struct vcpu *v = current; 3.8 - if ( hvm_guest(v) ) 3.9 + *val = 0; 3.10 + // XXX -- this is WRONG. 3.11 + // It entirely ignores the permissions in the page tables. 3.12 + // In this case, that is only a user vs supervisor access check. 3.13 + // 3.14 + if ( hvm_copy(val, addr, bytes, HVM_COPY_IN) ) 3.15 { 3.16 - *val = 0; 3.17 - // XXX -- this is WRONG. 3.18 - // It entirely ignores the permissions in the page tables. 3.19 - // In this case, that is only a user vs supervisor access check. 3.20 - // 3.21 - if ( hvm_copy(val, addr, bytes, HVM_COPY_IN) ) 3.22 - { 3.23 #if 0 3.24 - SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n", 3.25 - v->domain->domain_id, v->vcpu_id, 3.26 - addr, *val, bytes); 3.27 + struct vcpu *v = current; 3.28 + SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n", 3.29 + v->domain->domain_id, v->vcpu_id, 3.30 + addr, *val, bytes); 3.31 #endif 3.32 - return X86EMUL_CONTINUE; 3.33 - } 3.34 - 3.35 - /* If we got here, there was nothing mapped here, or a bad GFN 3.36 - * was mapped here. This should never happen: we're here because 3.37 - * of a write fault at the end of the instruction we're emulating. */ 3.38 - SHADOW_PRINTK("read failed to va %#lx\n", addr); 3.39 - return X86EMUL_PROPAGATE_FAULT; 3.40 + return X86EMUL_CONTINUE; 3.41 } 3.42 - else 3.43 - { 3.44 - SHADOW_PRINTK("this operation is not emulated yet\n"); 3.45 - return X86EMUL_UNHANDLEABLE; 3.46 - } 3.47 + 3.48 + /* If we got here, there was nothing mapped here, or a bad GFN 3.49 + * was mapped here. This should never happen: we're here because 3.50 + * of a write fault at the end of the instruction we're emulating. */ 3.51 + SHADOW_PRINTK("read failed to va %#lx\n", addr); 3.52 + return X86EMUL_PROPAGATE_FAULT; 3.53 } 3.54 3.55 static int 3.56 @@ -112,33 +104,26 @@ sh_x86_emulate_write_std(unsigned long a 3.57 unsigned int bytes, 3.58 struct x86_emulate_ctxt *ctxt) 3.59 { 3.60 +#if 0 3.61 struct vcpu *v = current; 3.62 -#if 0 3.63 SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n", 3.64 v->domain->domain_id, v->vcpu_id, addr, val, bytes); 3.65 #endif 3.66 - if ( hvm_guest(v) ) 3.67 - { 3.68 - // XXX -- this is WRONG. 3.69 - // It entirely ignores the permissions in the page tables. 3.70 - // In this case, that includes user vs supervisor, and 3.71 - // write access. 3.72 - // 3.73 - if ( hvm_copy(&val, addr, bytes, HVM_COPY_OUT) ) 3.74 - return X86EMUL_CONTINUE; 3.75 - 3.76 - /* If we got here, there was nothing mapped here, or a bad GFN 3.77 - * was mapped here. This should never happen: we're here because 3.78 - * of a write fault at the end of the instruction we're emulating, 3.79 - * which should be handled by sh_x86_emulate_write_emulated. */ 3.80 - SHADOW_PRINTK("write failed to va %#lx\n", addr); 3.81 - return X86EMUL_PROPAGATE_FAULT; 3.82 - } 3.83 - else 3.84 - { 3.85 - SHADOW_PRINTK("this operation is not emulated yet\n"); 3.86 - return X86EMUL_UNHANDLEABLE; 3.87 - } 3.88 + 3.89 + // XXX -- this is WRONG. 3.90 + // It entirely ignores the permissions in the page tables. 3.91 + // In this case, that includes user vs supervisor, and 3.92 + // write access. 3.93 + // 3.94 + if ( hvm_copy(&val, addr, bytes, HVM_COPY_OUT) ) 3.95 + return X86EMUL_CONTINUE; 3.96 + 3.97 + /* If we got here, there was nothing mapped here, or a bad GFN 3.98 + * was mapped here. This should never happen: we're here because 3.99 + * of a write fault at the end of the instruction we're emulating, 3.100 + * which should be handled by sh_x86_emulate_write_emulated. */ 3.101 + SHADOW_PRINTK("write failed to va %#lx\n", addr); 3.102 + return X86EMUL_PROPAGATE_FAULT; 3.103 } 3.104 3.105 static int 3.106 @@ -152,15 +137,7 @@ sh_x86_emulate_write_emulated(unsigned l 3.107 SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n", 3.108 v->domain->domain_id, v->vcpu_id, addr, val, bytes); 3.109 #endif 3.110 - if ( hvm_guest(v) ) 3.111 - { 3.112 - return v->arch.shadow.mode->x86_emulate_write(v, addr, &val, bytes, ctxt); 3.113 - } 3.114 - else 3.115 - { 3.116 - SHADOW_PRINTK("this operation is not emulated yet\n"); 3.117 - return X86EMUL_UNHANDLEABLE; 3.118 - } 3.119 + return v->arch.shadow.mode->x86_emulate_write(v, addr, &val, bytes, ctxt); 3.120 } 3.121 3.122 static int 3.123 @@ -175,16 +152,8 @@ sh_x86_emulate_cmpxchg_emulated(unsigned 3.124 SHADOW_PRINTK("d=%u v=%u a=%#lx o?=%#lx n:=%#lx bytes=%u\n", 3.125 v->domain->domain_id, v->vcpu_id, addr, old, new, bytes); 3.126 #endif 3.127 - if ( hvm_guest(v) ) 3.128 - { 3.129 - return v->arch.shadow.mode->x86_emulate_cmpxchg(v, addr, old, new, 3.130 - bytes, ctxt); 3.131 - } 3.132 - else 3.133 - { 3.134 - SHADOW_PRINTK("this operation is not emulated yet\n"); 3.135 - return X86EMUL_UNHANDLEABLE; 3.136 - } 3.137 + return v->arch.shadow.mode->x86_emulate_cmpxchg(v, addr, old, new, 3.138 + bytes, ctxt); 3.139 } 3.140 3.141 static int 3.142 @@ -201,16 +170,8 @@ sh_x86_emulate_cmpxchg8b_emulated(unsign 3.143 v->domain->domain_id, v->vcpu_id, addr, old_hi, old_lo, 3.144 new_hi, new_lo, ctxt); 3.145 #endif 3.146 - if ( hvm_guest(v) ) 3.147 - { 3.148 - return v->arch.shadow.mode->x86_emulate_cmpxchg8b(v, addr, old_lo, old_hi, 3.149 - new_lo, new_hi, ctxt); 3.150 - } 3.151 - else 3.152 - { 3.153 - SHADOW_PRINTK("this operation is not emulated yet\n"); 3.154 - return X86EMUL_UNHANDLEABLE; 3.155 - } 3.156 + return v->arch.shadow.mode->x86_emulate_cmpxchg8b(v, addr, old_lo, old_hi, 3.157 + new_lo, new_hi, ctxt); 3.158 } 3.159 3.160 3.161 @@ -267,7 +228,7 @@ void shadow_demote(struct vcpu *v, mfn_t 3.162 /* Validate a pagetable change from the guest and update the shadows. 3.163 * Returns a bitmask of SHADOW_SET_* flags. */ 3.164 3.165 -static int 3.166 +int 3.167 __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 3.168 void *entry, u32 size) 3.169 { 3.170 @@ -367,7 +328,9 @@ shadow_validate_guest_entry(struct vcpu 3.171 void 3.172 shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 3.173 void *entry, u32 size) 3.174 -/* This is the entry point for emulated writes to pagetables in HVM guests */ 3.175 +/* This is the entry point for emulated writes to pagetables in HVM guests and 3.176 + * PV translated guests. 3.177 + */ 3.178 { 3.179 struct domain *d = v->domain; 3.180 int rc; 3.181 @@ -806,7 +769,7 @@ void shadow_free(struct domain *d, mfn_t 3.182 3.183 /* Divert some memory from the pool to be used by the p2m mapping. 3.184 * This action is irreversible: the p2m mapping only ever grows. 3.185 - * That's OK because the p2m table only exists for external domains, 3.186 + * That's OK because the p2m table only exists for translated domains, 3.187 * and those domains can't ever turn off shadow mode. 3.188 * Also, we only ever allocate a max-order chunk, so as to preserve 3.189 * the invariant that shadow_prealloc() always works. 3.190 @@ -830,7 +793,12 @@ shadow_alloc_p2m_pages(struct domain *d) 3.191 d->arch.shadow.total_pages -= (1<<SHADOW_MAX_ORDER); 3.192 for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++) 3.193 { 3.194 - /* Unlike shadow pages, mark p2m pages as owned by the domain */ 3.195 + /* Unlike shadow pages, mark p2m pages as owned by the domain. 3.196 + * Marking the domain as the owner would normally allow the guest to 3.197 + * create mappings of these pages, but these p2m pages will never be 3.198 + * in the domain's guest-physical address space, and so that is not 3.199 + * believed to be a concern. 3.200 + */ 3.201 page_set_owner(&pg[i], d); 3.202 list_add_tail(&pg[i].list, &d->arch.shadow.p2m_freelist); 3.203 } 3.204 @@ -2269,7 +2237,7 @@ void sh_update_paging_modes(struct vcpu 3.205 // 3.206 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) 3.207 { 3.208 - printk("%s: postponing determination of shadow mode\n", __func__); 3.209 + SHADOW_PRINTK("%s: postponing determination of shadow mode\n", __func__); 3.210 return; 3.211 } 3.212 3.213 @@ -2294,6 +2262,7 @@ void sh_update_paging_modes(struct vcpu 3.214 #else 3.215 #error unexpected paging mode 3.216 #endif 3.217 + v->arch.shadow.translate_enabled = !!shadow_mode_translate(d); 3.218 } 3.219 else 3.220 { 3.221 @@ -2303,8 +2272,8 @@ void sh_update_paging_modes(struct vcpu 3.222 ASSERT(shadow_mode_translate(d)); 3.223 ASSERT(shadow_mode_external(d)); 3.224 3.225 - v->arch.shadow.hvm_paging_enabled = !!hvm_paging_enabled(v); 3.226 - if ( !v->arch.shadow.hvm_paging_enabled ) 3.227 + v->arch.shadow.translate_enabled = !!hvm_paging_enabled(v); 3.228 + if ( !v->arch.shadow.translate_enabled ) 3.229 { 3.230 3.231 /* Set v->arch.guest_table to use the p2m map, and choose 3.232 @@ -2381,13 +2350,14 @@ void sh_update_paging_modes(struct vcpu 3.233 3.234 if ( v->arch.shadow.mode != old_mode ) 3.235 { 3.236 - SHADOW_PRINTK("new paging mode: d=%u v=%u g=%u s=%u " 3.237 - "(was g=%u s=%u)\n", 3.238 - d->domain_id, v->vcpu_id, 3.239 - v->arch.shadow.mode->guest_levels, 3.240 - v->arch.shadow.mode->shadow_levels, 3.241 - old_mode ? old_mode->guest_levels : 0, 3.242 - old_mode ? old_mode->shadow_levels : 0); 3.243 + SHADOW_PRINTK("new paging mode: d=%u v=%u pe=%d g=%u s=%u " 3.244 + "(was g=%u s=%u)\n", 3.245 + d->domain_id, v->vcpu_id, 3.246 + hvm_guest(v) ? !!hvm_paging_enabled(v) : 1, 3.247 + v->arch.shadow.mode->guest_levels, 3.248 + v->arch.shadow.mode->shadow_levels, 3.249 + old_mode ? old_mode->guest_levels : 0, 3.250 + old_mode ? old_mode->shadow_levels : 0); 3.251 if ( old_mode && 3.252 (v->arch.shadow.mode->shadow_levels != 3.253 old_mode->shadow_levels) ) 3.254 @@ -2467,6 +2437,7 @@ static int shadow_enable(struct domain * 3.255 /* Sanity check the arguments */ 3.256 if ( (d == current->domain) || 3.257 shadow_mode_enabled(d) || 3.258 + ((mode & SHM2_translate) && !(mode & SHM2_refcounts)) || 3.259 ((mode & SHM2_external) && !(mode & SHM2_translate)) ) 3.260 { 3.261 rv = -EINVAL; 3.262 @@ -2522,7 +2493,7 @@ static int shadow_enable(struct domain * 3.263 out: 3.264 shadow_unlock(d); 3.265 domain_unpause(d); 3.266 - return 0; 3.267 + return rv; 3.268 } 3.269 3.270 void shadow_teardown(struct domain *d)
4.1 --- a/xen/arch/x86/mm/shadow/multi.c Thu Sep 28 17:09:11 2006 +0100 4.2 +++ b/xen/arch/x86/mm/shadow/multi.c Thu Sep 28 17:10:54 2006 +0100 4.3 @@ -483,8 +483,7 @@ static u32 guest_set_ad_bits(struct vcpu 4.4 unsigned int level, 4.5 fetch_type_t ft) 4.6 { 4.7 - u32 flags, shflags, bit; 4.8 - struct page_info *pg; 4.9 + u32 flags; 4.10 int res = 0; 4.11 4.12 ASSERT(valid_mfn(gmfn) 4.13 @@ -502,11 +501,10 @@ static u32 guest_set_ad_bits(struct vcpu 4.14 if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) ) 4.15 return flags; 4.16 4.17 - /* Need the D bit as well for writes, in l1es and 32bit/PAE PSE l2es. */ 4.18 + /* Need the D bit as well for writes, in L1es and PSE L2es. */ 4.19 if ( ft == ft_demand_write 4.20 - && (level == 1 || 4.21 - (level == 2 && GUEST_PAGING_LEVELS < 4 4.22 - && (flags & _PAGE_PSE) && guest_supports_superpages(v))) ) 4.23 + && (level == 1 || 4.24 + (level == 2 && (flags & _PAGE_PSE) && guest_supports_superpages(v))) ) 4.25 { 4.26 if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) 4.27 == (_PAGE_DIRTY | _PAGE_ACCESSED) ) 4.28 @@ -524,77 +522,70 @@ static u32 guest_set_ad_bits(struct vcpu 4.29 4.30 /* Set the bit(s) */ 4.31 sh_mark_dirty(v->domain, gmfn); 4.32 - SHADOW_DEBUG(A_AND_D, "gfn = %"SH_PRI_gfn", " 4.33 + SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", " 4.34 "old flags = %#x, new flags = %#x\n", 4.35 - guest_l1e_get_gfn(*ep), guest_l1e_get_flags(*ep), flags); 4.36 + gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep), flags); 4.37 *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags); 4.38 4.39 - /* May need to propagate this change forward to other kinds of shadow */ 4.40 - pg = mfn_to_page(gmfn); 4.41 - if ( !sh_mfn_is_a_page_table(gmfn) ) 4.42 - { 4.43 - /* This guest pagetable is not yet shadowed at all. */ 4.44 - // MAF: I think this assert is busted... If this gmfn has not yet 4.45 - // been promoted, then it seems perfectly reasonable for there to be 4.46 - // outstanding type refs to it... 4.47 - /* TJD: No. If the gmfn has not been promoted, we must at least 4.48 - * have recognised that it is a pagetable, and pulled write access. 4.49 - * The type count should only be non-zero if it is actually a page 4.50 - * table. The test above was incorrect, though, so I've fixed it. */ 4.51 - ASSERT((pg->u.inuse.type_info & PGT_count_mask) == 0); 4.52 - return flags; 4.53 - } 4.54 - 4.55 - shflags = pg->shadow_flags & SHF_page_type_mask; 4.56 - while ( shflags ) 4.57 - { 4.58 - bit = find_first_set_bit(shflags); 4.59 - ASSERT(shflags & (1u << bit)); 4.60 - shflags &= ~(1u << bit); 4.61 - if ( !(pg->shadow_flags & (1u << bit)) ) 4.62 - continue; 4.63 - switch ( bit ) 4.64 - { 4.65 - case PGC_SH_type_to_index(PGC_SH_l1_shadow): 4.66 - if (level != 1) 4.67 - res |= sh_map_and_validate_gl1e(v, gmfn, ep, sizeof (*ep)); 4.68 - break; 4.69 - case PGC_SH_type_to_index(PGC_SH_l2_shadow): 4.70 - if (level != 2) 4.71 - res |= sh_map_and_validate_gl2e(v, gmfn, ep, sizeof (*ep)); 4.72 - break; 4.73 -#if GUEST_PAGING_LEVELS == 3 /* PAE only */ 4.74 - case PGC_SH_type_to_index(PGC_SH_l2h_shadow): 4.75 - if (level != 2) 4.76 - res |= sh_map_and_validate_gl2he(v, gmfn, ep, sizeof (*ep)); 4.77 - break; 4.78 -#endif 4.79 -#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ 4.80 - case PGC_SH_type_to_index(PGC_SH_l3_shadow): 4.81 - if (level != 3) 4.82 - res |= sh_map_and_validate_gl3e(v, gmfn, ep, sizeof (*ep)); 4.83 - break; 4.84 -#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ 4.85 - case PGC_SH_type_to_index(PGC_SH_l4_shadow): 4.86 - if (level != 4) 4.87 - res |= sh_map_and_validate_gl4e(v, gmfn, ep, sizeof (*ep)); 4.88 - break; 4.89 -#endif 4.90 -#endif 4.91 - default: 4.92 - SHADOW_ERROR("mfn %"SH_PRI_mfn" is shadowed in multiple " 4.93 - "modes: A&D bits may be out of sync (flags=%#x).\n", 4.94 - mfn_x(gmfn), pg->shadow_flags); 4.95 - /* XXX Shadows in other modes will not be updated, so will 4.96 - * have their A and D bits out of sync. */ 4.97 - } 4.98 - } 4.99 - 4.100 + /* Propagate this change to any existing shadows */ 4.101 + res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep)); 4.102 + 4.103 /* We should never need to flush the TLB or recopy PAE entries */ 4.104 - ASSERT( res == 0 || res == SHADOW_SET_CHANGED ); 4.105 + ASSERT((res == 0) || (res == SHADOW_SET_CHANGED)); 4.106 + 4.107 return flags; 4.108 } 4.109 4.110 +#if (CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS) && (CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS) 4.111 +void * 4.112 +sh_guest_map_l1e(struct vcpu *v, unsigned long addr, 4.113 + unsigned long *gl1mfn) 4.114 +{ 4.115 + void *pl1e = NULL; 4.116 + walk_t gw; 4.117 + 4.118 + ASSERT(shadow_mode_translate(v->domain)); 4.119 + 4.120 + // XXX -- this is expensive, but it's easy to cobble together... 4.121 + // FIXME! 4.122 + 4.123 + shadow_lock(v->domain); 4.124 + guest_walk_tables(v, addr, &gw, 1); 4.125 + 4.126 + if ( gw.l2e && 4.127 + (guest_l2e_get_flags(*gw.l2e) & _PAGE_PRESENT) && 4.128 + !(guest_supports_superpages(v) && (guest_l2e_get_flags(*gw.l2e) & _PAGE_PSE)) ) 4.129 + { 4.130 + if ( gl1mfn ) 4.131 + *gl1mfn = mfn_x(gw.l1mfn); 4.132 + pl1e = map_domain_page(mfn_x(gw.l1mfn)) + 4.133 + (guest_l1_table_offset(addr) * sizeof(guest_l1e_t)); 4.134 + } 4.135 + 4.136 + unmap_walk(v, &gw); 4.137 + shadow_unlock(v->domain); 4.138 + 4.139 + return pl1e; 4.140 +} 4.141 + 4.142 +void 4.143 +sh_guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) 4.144 +{ 4.145 + walk_t gw; 4.146 + 4.147 + ASSERT(shadow_mode_translate(v->domain)); 4.148 + 4.149 + // XXX -- this is expensive, but it's easy to cobble together... 4.150 + // FIXME! 4.151 + 4.152 + shadow_lock(v->domain); 4.153 + guest_walk_tables(v, addr, &gw, 1); 4.154 + *(guest_l1e_t *)eff_l1e = gw.eff_l1e; 4.155 + unmap_walk(v, &gw); 4.156 + shadow_unlock(v->domain); 4.157 +} 4.158 +#endif /* CONFIG==SHADOW==GUEST */ 4.159 + 4.160 /**************************************************************************/ 4.161 /* Functions to compute the correct index into a shadow page, given an 4.162 * index into the guest page (as returned by guest_get_index()). 4.163 @@ -709,17 +700,6 @@ shadow_l4_index(mfn_t *smfn, u32 guest_i 4.164 * to the _PAGE_DIRTY bit handling), but for L[234], they are grouped together 4.165 * into the respective demand_fault functions. 4.166 */ 4.167 - 4.168 -#define CHECK(_cond) \ 4.169 -do { \ 4.170 - if (unlikely(!(_cond))) \ 4.171 - { \ 4.172 - printk("%s %s %d ASSERTION (%s) FAILED\n", \ 4.173 - __func__, __FILE__, __LINE__, #_cond); \ 4.174 - return -1; \ 4.175 - } \ 4.176 -} while (0); 4.177 - 4.178 // The function below tries to capture all of the flag manipulation for the 4.179 // demand and propagate functions into one place. 4.180 // 4.181 @@ -728,6 +708,16 @@ sh_propagate_flags(struct vcpu *v, mfn_t 4.182 u32 gflags, guest_l1e_t *guest_entry_ptr, mfn_t gmfn, 4.183 int mmio, int level, fetch_type_t ft) 4.184 { 4.185 +#define CHECK(_cond) \ 4.186 +do { \ 4.187 + if (unlikely(!(_cond))) \ 4.188 + { \ 4.189 + printk("%s %s %d ASSERTION (%s) FAILED\n", \ 4.190 + __func__, __FILE__, __LINE__, #_cond); \ 4.191 + domain_crash(d); \ 4.192 + } \ 4.193 +} while (0); 4.194 + 4.195 struct domain *d = v->domain; 4.196 u32 pass_thru_flags; 4.197 u32 sflags; 4.198 @@ -763,6 +753,10 @@ sh_propagate_flags(struct vcpu *v, mfn_t 4.199 return 0; 4.200 } 4.201 4.202 + // Set the A and D bits in the guest entry, if we need to. 4.203 + if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) ) 4.204 + gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft); 4.205 + 4.206 // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's... 4.207 // 4.208 if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) ) 4.209 @@ -797,17 +791,12 @@ sh_propagate_flags(struct vcpu *v, mfn_t 4.210 // Higher level entries do not, strictly speaking, have dirty bits, but 4.211 // since we use shadow linear tables, each of these entries may, at some 4.212 // point in time, also serve as a shadow L1 entry. 4.213 - // By setting both the A&D bits in each of these, we eliminate the burden 4.214 + // By setting both the A&D bits in each of these, we eliminate the burden 4.215 // on the hardware to update these bits on initial accesses. 4.216 // 4.217 if ( (level > 1) && !((SHADOW_PAGING_LEVELS == 3) && (level == 3)) ) 4.218 sflags |= _PAGE_ACCESSED | _PAGE_DIRTY; 4.219 4.220 - 4.221 - // Set the A and D bits in the guest entry, if we need to. 4.222 - if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) ) 4.223 - gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft); 4.224 - 4.225 // If the A or D bit has not yet been set in the guest, then we must 4.226 // prevent the corresponding kind of access. 4.227 // 4.228 @@ -815,12 +804,12 @@ sh_propagate_flags(struct vcpu *v, mfn_t 4.229 !(gflags & _PAGE_ACCESSED)) ) 4.230 sflags &= ~_PAGE_PRESENT; 4.231 4.232 - /* D bits exist in l1es, and 32bit/PAE PSE l2es, but not 64bit PSE l2es */ 4.233 - if ( unlikely( ((level == 1) 4.234 - || ((level == 2) && (GUEST_PAGING_LEVELS < 4) 4.235 - && guest_supports_superpages(v) && 4.236 - (gflags & _PAGE_PSE))) 4.237 - && !(gflags & _PAGE_DIRTY)) ) 4.238 + /* D bits exist in L1es and PSE L2es */ 4.239 + if ( unlikely(((level == 1) || 4.240 + ((level == 2) && 4.241 + (gflags & _PAGE_PSE) && 4.242 + guest_supports_superpages(v))) 4.243 + && !(gflags & _PAGE_DIRTY)) ) 4.244 sflags &= ~_PAGE_RW; 4.245 4.246 // MMIO caching 4.247 @@ -869,11 +858,18 @@ sh_propagate_flags(struct vcpu *v, mfn_t 4.248 } 4.249 } 4.250 4.251 + // PV guests in 64-bit mode use two different page tables for user vs 4.252 + // supervisor permissions, making the guest's _PAGE_USER bit irrelevant. 4.253 + // It is always shadowed as present... 4.254 + if ( (GUEST_PAGING_LEVELS == 4) && !hvm_guest(v) ) 4.255 + { 4.256 + sflags |= _PAGE_USER; 4.257 + } 4.258 + 4.259 return sflags; 4.260 +#undef CHECK 4.261 } 4.262 4.263 -#undef CHECK 4.264 - 4.265 #if GUEST_PAGING_LEVELS >= 4 4.266 static void 4.267 l4e_propagate_from_guest(struct vcpu *v, 4.268 @@ -1732,11 +1728,21 @@ void sh_install_xen_entries_in_l4(struct 4.269 __PAGE_HYPERVISOR); 4.270 4.271 /* Linear mapping */ 4.272 - sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = 4.273 - shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR); 4.274 sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = 4.275 shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR); 4.276 4.277 + if ( shadow_mode_translate(v->domain) && !shadow_mode_external(v->domain) ) 4.278 + { 4.279 + // linear tables may not be used with translated PV guests 4.280 + sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = 4.281 + shadow_l4e_empty(); 4.282 + } 4.283 + else 4.284 + { 4.285 + sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = 4.286 + shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR); 4.287 + } 4.288 + 4.289 if ( shadow_mode_translate(v->domain) ) 4.290 { 4.291 /* install domain-specific P2M table */ 4.292 @@ -1779,7 +1785,15 @@ void sh_install_xen_entries_in_l2h(struc 4.293 4.294 /* We don't set up a linear mapping here because we can't until this 4.295 * l2h is installed in an l3e. sh_update_linear_entries() handles 4.296 - * the linear mappings when the l3 is loaded. */ 4.297 + * the linear mappings when the l3 is loaded. We zero them here, just as 4.298 + * a safety measure. 4.299 + */ 4.300 + for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) 4.301 + sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START) + i] = 4.302 + shadow_l2e_empty(); 4.303 + for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) 4.304 + sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START) + i] = 4.305 + shadow_l2e_empty(); 4.306 4.307 if ( shadow_mode_translate(d) ) 4.308 { 4.309 @@ -1817,6 +1831,12 @@ void sh_install_xen_entries_in_l3(struct 4.310 l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow); 4.311 if ( !valid_mfn(l2smfn) ) 4.312 { 4.313 + /* must remove write access to this page before shadowing it */ 4.314 + // XXX -- should check to see whether this is better with level==0 or 4.315 + // level==2... 4.316 + if ( shadow_remove_write_access(v, l2gmfn, 2, 0xc0000000ul) != 0 ) 4.317 + flush_tlb_mask(v->domain->domain_dirty_cpumask); 4.318 + 4.319 l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow); 4.320 } 4.321 l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e, 4.322 @@ -1852,11 +1872,21 @@ void sh_install_xen_entries_in_l2(struct 4.323 __PAGE_HYPERVISOR); 4.324 4.325 /* Linear mapping */ 4.326 - sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] = 4.327 - shadow_l2e_from_mfn(gl2mfn, __PAGE_HYPERVISOR); 4.328 sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START)] = 4.329 shadow_l2e_from_mfn(sl2mfn, __PAGE_HYPERVISOR); 4.330 4.331 + if ( shadow_mode_translate(v->domain) && !shadow_mode_external(v->domain) ) 4.332 + { 4.333 + // linear tables may not be used with translated PV guests 4.334 + sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] = 4.335 + shadow_l2e_empty(); 4.336 + } 4.337 + else 4.338 + { 4.339 + sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] = 4.340 + shadow_l2e_from_mfn(gl2mfn, __PAGE_HYPERVISOR); 4.341 + } 4.342 + 4.343 if ( shadow_mode_translate(d) ) 4.344 { 4.345 /* install domain-specific P2M table */ 4.346 @@ -2527,6 +2557,32 @@ static int validate_gl4e(struct vcpu *v, 4.347 } 4.348 l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN), 4.349 sl3mfn, &new_sl4e, ft_prefetch); 4.350 + 4.351 + // check for updates to xen reserved slots 4.352 + if ( !shadow_mode_external(v->domain) ) 4.353 + { 4.354 + int shadow_index = (((unsigned long)sl4p & ~PAGE_MASK) / 4.355 + sizeof(shadow_l4e_t)); 4.356 + int reserved_xen_slot = !is_guest_l4_slot(shadow_index); 4.357 + 4.358 + if ( unlikely(reserved_xen_slot) ) 4.359 + { 4.360 + // attempt by the guest to write to a xen reserved slot 4.361 + // 4.362 + SHADOW_PRINTK("%s out-of-range update " 4.363 + "sl4mfn=%05lx index=0x%x val=%" SH_PRI_pte "\n", 4.364 + __func__, mfn_x(sl4mfn), shadow_index, new_sl4e.l4); 4.365 + if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT ) 4.366 + { 4.367 + SHADOW_ERROR("out-of-range l4e update\n"); 4.368 + result |= SHADOW_SET_ERROR; 4.369 + } 4.370 + 4.371 + // do not call shadow_set_l4e... 4.372 + return result; 4.373 + } 4.374 + } 4.375 + 4.376 result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn); 4.377 return result; 4.378 } 4.379 @@ -2616,6 +2672,48 @@ static int validate_gl2e(struct vcpu *v, 4.380 } 4.381 l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN), 4.382 sl1mfn, &new_sl2e, ft_prefetch); 4.383 + 4.384 + // check for updates to xen reserved slots in PV guests... 4.385 + // XXX -- need to revisit this for PV 3-on-4 guests. 4.386 + // 4.387 +#if SHADOW_PAGING_LEVELS < 4 4.388 +#if CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS 4.389 + if ( !shadow_mode_external(v->domain) ) 4.390 + { 4.391 + int shadow_index = (((unsigned long)sl2p & ~PAGE_MASK) / 4.392 + sizeof(shadow_l2e_t)); 4.393 + int reserved_xen_slot; 4.394 + 4.395 +#if SHADOW_PAGING_LEVELS == 3 4.396 + reserved_xen_slot = 4.397 + (((mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask) 4.398 + == PGC_SH_l2h_pae_shadow) && 4.399 + (shadow_index 4.400 + >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)))); 4.401 +#else /* SHADOW_PAGING_LEVELS == 2 */ 4.402 + reserved_xen_slot = (shadow_index >= L2_PAGETABLE_FIRST_XEN_SLOT); 4.403 +#endif 4.404 + 4.405 + if ( unlikely(reserved_xen_slot) ) 4.406 + { 4.407 + // attempt by the guest to write to a xen reserved slot 4.408 + // 4.409 + SHADOW_PRINTK("%s out-of-range update " 4.410 + "sl2mfn=%05lx index=0x%x val=%" SH_PRI_pte "\n", 4.411 + __func__, mfn_x(sl2mfn), shadow_index, new_sl2e.l2); 4.412 + if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT ) 4.413 + { 4.414 + SHADOW_ERROR("out-of-range l2e update\n"); 4.415 + result |= SHADOW_SET_ERROR; 4.416 + } 4.417 + 4.418 + // do not call shadow_set_l2e... 4.419 + return result; 4.420 + } 4.421 + } 4.422 +#endif /* CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS */ 4.423 +#endif /* SHADOW_PAGING_LEVELS < 4 */ 4.424 + 4.425 result |= shadow_set_l2e(v, sl2p, new_sl2e, sl2mfn); 4.426 4.427 return result; 4.428 @@ -2897,7 +2995,7 @@ static int sh_page_fault(struct vcpu *v, 4.429 } 4.430 4.431 // All levels of the guest page table are now known to be present. 4.432 - accumulated_gflags = accumulate_guest_flags(&gw); 4.433 + accumulated_gflags = accumulate_guest_flags(v, &gw); 4.434 4.435 // Check for attempts to access supervisor-only pages from user mode, 4.436 // i.e. ring 3. Such errors are not caused or dealt with by the shadow 4.437 @@ -3348,6 +3446,7 @@ sh_update_linear_entries(struct vcpu *v) 4.438 l2_pgentry_t *l2e, new_l2e; 4.439 shadow_l3e_t *guest_l3e = NULL, *shadow_l3e; 4.440 int i; 4.441 + int unmap_l2e = 0; 4.442 4.443 #if GUEST_PAGING_LEVELS == 2 4.444 /* Shadow l3 tables were built by update_cr3 */ 4.445 @@ -3365,39 +3464,45 @@ sh_update_linear_entries(struct vcpu *v) 4.446 #endif /* GUEST_PAGING_LEVELS */ 4.447 4.448 /* Choose where to write the entries, using linear maps if possible */ 4.449 - if ( v == current && shadow_mode_external(d) ) 4.450 - { 4.451 - /* From the monitor tables, it's safe to use linear maps to update 4.452 - * monitor l2s */ 4.453 - l2e = __linear_l2_table + (3 * L2_PAGETABLE_ENTRIES); 4.454 - } 4.455 - else if ( shadow_mode_external(d) ) 4.456 + if ( shadow_mode_external(d) ) 4.457 { 4.458 - /* Map the monitor table's high l2 */ 4.459 - l3_pgentry_t *l3e; 4.460 - l3e = sh_map_domain_page( 4.461 - pagetable_get_mfn(v->arch.monitor_table)); 4.462 - ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT); 4.463 - l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[3]))); 4.464 - sh_unmap_domain_page(l3e); 4.465 - } 4.466 + if ( v == current ) 4.467 + { 4.468 + /* From the monitor tables, it's safe to use linear maps 4.469 + * to update monitor l2s */ 4.470 + l2e = __linear_l2_table + (3 * L2_PAGETABLE_ENTRIES); 4.471 + } 4.472 + else 4.473 + { 4.474 + /* Map the monitor table's high l2 */ 4.475 + l3_pgentry_t *l3e; 4.476 + l3e = sh_map_domain_page( 4.477 + pagetable_get_mfn(v->arch.monitor_table)); 4.478 + ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT); 4.479 + l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[3]))); 4.480 + unmap_l2e = 1; 4.481 + sh_unmap_domain_page(l3e); 4.482 + } 4.483 + } 4.484 else 4.485 { 4.486 /* Map the shadow table's high l2 */ 4.487 ASSERT(shadow_l3e_get_flags(shadow_l3e[3]) & _PAGE_PRESENT); 4.488 l2e = sh_map_domain_page(shadow_l3e_get_mfn(shadow_l3e[3])); 4.489 + unmap_l2e = 1; 4.490 } 4.491 4.492 - 4.493 - if ( !shadow_mode_external(d) ) 4.494 + /* Write linear mapping of guest (only in PV, and only when 4.495 + * not translated). */ 4.496 + if ( !shadow_mode_translate(d) ) 4.497 { 4.498 - /* Write linear mapping of guest. */ 4.499 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) 4.500 - { 4.501 - new_l2e = (shadow_l3e_get_flags(guest_l3e[i]) & _PAGE_PRESENT) 4.502 - ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(guest_l3e[i])), 4.503 - __PAGE_HYPERVISOR) 4.504 - : l2e_empty(); 4.505 + { 4.506 + new_l2e = 4.507 + ((shadow_l3e_get_flags(guest_l3e[i]) & _PAGE_PRESENT) 4.508 + ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(guest_l3e[i])), 4.509 + __PAGE_HYPERVISOR) 4.510 + : l2e_empty()); 4.511 safe_write_entry( 4.512 &l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i], 4.513 &new_l2e); 4.514 @@ -3416,9 +3521,8 @@ sh_update_linear_entries(struct vcpu *v) 4.515 &new_l2e); 4.516 } 4.517 4.518 - if ( v != current || !shadow_mode_external(d) ) 4.519 + if ( unmap_l2e ) 4.520 sh_unmap_domain_page(l2e); 4.521 - 4.522 } 4.523 4.524 #elif CONFIG_PAGING_LEVELS == 2 4.525 @@ -3521,16 +3625,24 @@ void sh_pae_recopy(struct domain *d) 4.526 static void 4.527 sh_detach_old_tables(struct vcpu *v) 4.528 { 4.529 + struct domain *d = v->domain; 4.530 mfn_t smfn; 4.531 4.532 //// 4.533 //// vcpu->arch.guest_vtable 4.534 //// 4.535 - if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) && 4.536 - v->arch.guest_vtable ) 4.537 + if ( v->arch.guest_vtable ) 4.538 { 4.539 - // Q: why does this need to use (un)map_domain_page_*global* ? 4.540 - sh_unmap_domain_page_global(v->arch.guest_vtable); 4.541 +#if GUEST_PAGING_LEVELS == 4 4.542 + if ( shadow_mode_external(d) || shadow_mode_translate(d) ) 4.543 + sh_unmap_domain_page_global(v->arch.guest_vtable); 4.544 +#elif GUEST_PAGING_LEVELS == 3 4.545 + if ( 1 || shadow_mode_external(d) || shadow_mode_translate(d) ) 4.546 + sh_unmap_domain_page_global(v->arch.guest_vtable); 4.547 +#elif GUEST_PAGING_LEVELS == 2 4.548 + if ( shadow_mode_external(d) || shadow_mode_translate(d) ) 4.549 + sh_unmap_domain_page_global(v->arch.guest_vtable); 4.550 +#endif 4.551 v->arch.guest_vtable = NULL; 4.552 } 4.553 4.554 @@ -3645,9 +3757,14 @@ sh_update_cr3(struct vcpu *v) 4.555 //// 4.556 //// vcpu->arch.guest_vtable 4.557 //// 4.558 +#if GUEST_PAGING_LEVELS == 4 4.559 + if ( shadow_mode_external(d) || shadow_mode_translate(d) ) 4.560 + v->arch.guest_vtable = sh_map_domain_page_global(gmfn); 4.561 + else 4.562 + v->arch.guest_vtable = __linear_l4_table; 4.563 +#elif GUEST_PAGING_LEVELS == 3 4.564 if ( shadow_mode_external(d) ) 4.565 { 4.566 -#if GUEST_PAGING_LEVELS == 3 4.567 if ( shadow_vcpu_mode_translate(v) ) 4.568 /* Paging enabled: find where in the page the l3 table is */ 4.569 guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3)); 4.570 @@ -3658,25 +3775,21 @@ sh_update_cr3(struct vcpu *v) 4.571 // Ignore the low 2 bits of guest_idx -- they are really just 4.572 // cache control. 4.573 guest_idx &= ~3; 4.574 + 4.575 // XXX - why does this need a global map? 4.576 v->arch.guest_vtable = 4.577 (guest_l3e_t *)sh_map_domain_page_global(gmfn) + guest_idx; 4.578 -#else 4.579 - // XXX - why does this need a global map? 4.580 - v->arch.guest_vtable = sh_map_domain_page_global(gmfn); 4.581 -#endif 4.582 } 4.583 else 4.584 - { 4.585 -#ifdef __x86_64__ 4.586 - v->arch.guest_vtable = __linear_l4_table; 4.587 -#elif GUEST_PAGING_LEVELS == 3 4.588 - // XXX - why does this need a global map? 4.589 + v->arch.guest_vtable = sh_map_domain_page_global(gmfn); 4.590 +#elif GUEST_PAGING_LEVELS == 2 4.591 + if ( shadow_mode_external(d) || shadow_mode_translate(d) ) 4.592 v->arch.guest_vtable = sh_map_domain_page_global(gmfn); 4.593 -#else 4.594 + else 4.595 v->arch.guest_vtable = __linear_l2_table; 4.596 +#else 4.597 +#error this should never happen 4.598 #endif 4.599 - } 4.600 4.601 #if 0 4.602 printk("%s %s %d gmfn=%05lx guest_vtable=%p\n", 4.603 @@ -3744,6 +3857,17 @@ sh_update_cr3(struct vcpu *v) 4.604 #endif 4.605 } 4.606 4.607 +#if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3) 4.608 + // Now that shadow_vtable is in place, check that the sl3e[3] is properly 4.609 + // shadowed and installed in PAE PV guests... 4.610 + if ( !shadow_mode_external(d) && 4.611 + !(shadow_l3e_get_flags(((shadow_l3e_t *)v->arch.shadow_vtable)[3]) & 4.612 + _PAGE_PRESENT) ) 4.613 + { 4.614 + sh_install_xen_entries_in_l3(v, gmfn, smfn); 4.615 + } 4.616 +#endif 4.617 + 4.618 //// 4.619 //// Take a ref to the new shadow table, and pin it. 4.620 //// 4.621 @@ -4049,7 +4173,7 @@ static inline void * emulate_map_dest(st 4.622 mfn_t mfn; 4.623 4.624 guest_walk_tables(v, vaddr, &gw, 1); 4.625 - flags = accumulate_guest_flags(&gw); 4.626 + flags = accumulate_guest_flags(v, &gw); 4.627 gfn = guest_l1e_get_gfn(gw.eff_l1e); 4.628 mfn = vcpu_gfn_to_mfn(v, gfn); 4.629 sh_audit_gw(v, &gw); 4.630 @@ -4453,6 +4577,8 @@ struct shadow_paging_mode sh_paging_mode 4.631 .x86_emulate_cmpxchg8b = sh_x86_emulate_cmpxchg8b, 4.632 .make_monitor_table = sh_make_monitor_table, 4.633 .destroy_monitor_table = sh_destroy_monitor_table, 4.634 + .guest_map_l1e = sh_guest_map_l1e, 4.635 + .guest_get_eff_l1e = sh_guest_get_eff_l1e, 4.636 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC 4.637 .guess_wrmap = sh_guess_wrmap, 4.638 #endif
5.1 --- a/xen/arch/x86/mm/shadow/multi.h Thu Sep 28 17:09:11 2006 +0100 5.2 +++ b/xen/arch/x86/mm/shadow/multi.h Thu Sep 28 17:10:54 2006 +0100 5.3 @@ -103,6 +103,13 @@ SHADOW_INTERNAL_NAME(sh_audit_l4_table, 5.4 (struct vcpu *v, mfn_t sl4mfn, mfn_t x); 5.5 #endif 5.6 5.7 +extern void * 5.8 +SHADOW_INTERNAL_NAME(sh_guest_map_l1e, CONFIG_PAGING_LEVELS, CONFIG_PAGING_LEVELS) 5.9 + (struct vcpu *v, unsigned long va, unsigned long *gl1mfn); 5.10 +extern void 5.11 +SHADOW_INTERNAL_NAME(sh_guest_get_eff_l1e, CONFIG_PAGING_LEVELS, CONFIG_PAGING_LEVELS) 5.12 + (struct vcpu *v, unsigned long va, void *eff_l1e); 5.13 + 5.14 #if SHADOW_LEVELS == GUEST_LEVELS 5.15 extern mfn_t 5.16 SHADOW_INTERNAL_NAME(sh_make_monitor_table, SHADOW_LEVELS, GUEST_LEVELS)
6.1 --- a/xen/arch/x86/mm/shadow/private.h Thu Sep 28 17:09:11 2006 +0100 6.2 +++ b/xen/arch/x86/mm/shadow/private.h Thu Sep 28 17:10:54 2006 +0100 6.3 @@ -532,55 +532,6 @@ static inline void sh_unpin(struct vcpu 6.4 } 6.5 } 6.6 6.7 -/**************************************************************************/ 6.8 -/* Guest physmap (p2m) support */ 6.9 - 6.10 -/* Read our own P2M table, checking in the linear pagetables first to be 6.11 - * sure that we will succeed. Call this function if you expect it to 6.12 - * fail often, as it avoids page faults. If you expect to succeed, use 6.13 - * vcpu_gfn_to_mfn, which copy_from_user()s the entry */ 6.14 -static inline mfn_t 6.15 -vcpu_gfn_to_mfn_nofault(struct vcpu *v, unsigned long gfn) 6.16 -{ 6.17 - unsigned long entry_addr = (unsigned long) &phys_to_machine_mapping[gfn]; 6.18 -#if CONFIG_PAGING_LEVELS >= 4 6.19 - l4_pgentry_t *l4e; 6.20 - l3_pgentry_t *l3e; 6.21 -#endif 6.22 - l2_pgentry_t *l2e; 6.23 - l1_pgentry_t *l1e; 6.24 - 6.25 - ASSERT(current == v); 6.26 - if ( !shadow_vcpu_mode_translate(v) ) 6.27 - return _mfn(gfn); 6.28 - 6.29 -#if CONFIG_PAGING_LEVELS > 2 6.30 - if ( gfn >= (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t) ) 6.31 - /* This pfn is higher than the p2m map can hold */ 6.32 - return _mfn(INVALID_MFN); 6.33 -#endif 6.34 - 6.35 - /* Walk the linear pagetables. Note that this is *not* the same as 6.36 - * the walk in sh_gfn_to_mfn_foreign, which is walking the p2m map */ 6.37 -#if CONFIG_PAGING_LEVELS >= 4 6.38 - l4e = __linear_l4_table + l4_linear_offset(entry_addr); 6.39 - if ( !(l4e_get_flags(*l4e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN); 6.40 - l3e = __linear_l3_table + l3_linear_offset(entry_addr); 6.41 - if ( !(l3e_get_flags(*l3e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN); 6.42 -#endif 6.43 - l2e = __linear_l2_table + l2_linear_offset(entry_addr); 6.44 - if ( !(l2e_get_flags(*l2e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN); 6.45 - l1e = __linear_l1_table + l1_linear_offset(entry_addr); 6.46 - if ( !(l1e_get_flags(*l1e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN); 6.47 - 6.48 - /* Safe to look at this part of the table */ 6.49 - if ( l1e_get_flags(phys_to_machine_mapping[gfn]) & _PAGE_PRESENT ) 6.50 - return _mfn(l1e_get_pfn(phys_to_machine_mapping[gfn])); 6.51 - 6.52 - return _mfn(INVALID_MFN); 6.53 -} 6.54 - 6.55 - 6.56 #endif /* _XEN_SHADOW_PRIVATE_H */ 6.57 6.58 /*
7.1 --- a/xen/arch/x86/mm/shadow/types.h Thu Sep 28 17:09:11 2006 +0100 7.2 +++ b/xen/arch/x86/mm/shadow/types.h Thu Sep 28 17:10:54 2006 +0100 7.3 @@ -205,6 +205,9 @@ static inline shadow_l4e_t shadow_l4e_fr 7.4 __sh_linear_l1_table; \ 7.5 }) 7.6 7.7 +// XXX -- these should not be conditional on hvm_guest(v), but rather on 7.8 +// shadow_mode_external(d)... 7.9 +// 7.10 #define sh_linear_l2_table(v) ({ \ 7.11 ASSERT(current == (v)); \ 7.12 ((shadow_l2e_t *) \ 7.13 @@ -507,10 +510,22 @@ struct shadow_walk_t 7.14 #define sh_guess_wrmap INTERNAL_NAME(sh_guess_wrmap) 7.15 #define sh_clear_shadow_entry INTERNAL_NAME(sh_clear_shadow_entry) 7.16 7.17 +/* The sh_guest_(map|get)_* functions only depends on the number of config 7.18 + * levels 7.19 + */ 7.20 +#define sh_guest_map_l1e \ 7.21 + SHADOW_INTERNAL_NAME(sh_guest_map_l1e, \ 7.22 + CONFIG_PAGING_LEVELS, \ 7.23 + CONFIG_PAGING_LEVELS) 7.24 +#define sh_guest_get_eff_l1e \ 7.25 + SHADOW_INTERNAL_NAME(sh_guest_get_eff_l1e, \ 7.26 + CONFIG_PAGING_LEVELS, \ 7.27 + CONFIG_PAGING_LEVELS) 7.28 + 7.29 /* sh_make_monitor_table only depends on the number of shadow levels */ 7.30 -#define sh_make_monitor_table \ 7.31 - SHADOW_INTERNAL_NAME(sh_make_monitor_table, \ 7.32 - SHADOW_PAGING_LEVELS, \ 7.33 +#define sh_make_monitor_table \ 7.34 + SHADOW_INTERNAL_NAME(sh_make_monitor_table, \ 7.35 + SHADOW_PAGING_LEVELS, \ 7.36 SHADOW_PAGING_LEVELS) 7.37 #define sh_destroy_monitor_table \ 7.38 SHADOW_INTERNAL_NAME(sh_destroy_monitor_table, \ 7.39 @@ -652,7 +667,7 @@ static inline void sh_unpin_l3_subshadow 7.40 #endif /* GUEST_PAGING_LEVELS >= 3 */ 7.41 7.42 static inline u32 7.43 -accumulate_guest_flags(walk_t *gw) 7.44 +accumulate_guest_flags(struct vcpu *v, walk_t *gw) 7.45 { 7.46 u32 accumulated_flags; 7.47 7.48 @@ -674,9 +689,15 @@ accumulate_guest_flags(walk_t *gw) 7.49 accumulated_flags &= guest_l4e_get_flags(*gw->l4e) ^ _PAGE_NX_BIT; 7.50 #endif 7.51 7.52 - // Finally, revert the NX bit back to its original polarity 7.53 + // Revert the NX bit back to its original polarity 7.54 accumulated_flags ^= _PAGE_NX_BIT; 7.55 7.56 + // In 64-bit PV guests, the _PAGE_USER bit is implied in all guest 7.57 + // entries (since even the guest kernel runs in ring 3). 7.58 + // 7.59 + if ( (GUEST_PAGING_LEVELS == 4) && !hvm_guest(v) ) 7.60 + accumulated_flags |= _PAGE_USER; 7.61 + 7.62 return accumulated_flags; 7.63 } 7.64
8.1 --- a/xen/arch/x86/traps.c Thu Sep 28 17:09:11 2006 +0100 8.2 +++ b/xen/arch/x86/traps.c Thu Sep 28 17:10:54 2006 +0100 8.3 @@ -886,7 +886,7 @@ static int fixup_page_fault(unsigned lon 8.4 /* Do not check if access-protection fault since the page may 8.5 legitimately be not present in shadow page tables */ 8.6 ((regs->error_code & PFEC_write_access) == PFEC_write_access) && 8.7 - ptwr_do_page_fault(d, addr, regs) ) 8.8 + ptwr_do_page_fault(v, addr, regs) ) 8.9 return EXCRET_fault_fixed; 8.10 8.11 if ( shadow_mode_enabled(d) )
9.1 --- a/xen/include/asm-x86/domain.h Thu Sep 28 17:09:11 2006 +0100 9.2 +++ b/xen/include/asm-x86/domain.h Thu Sep 28 17:10:54 2006 +0100 9.3 @@ -139,7 +139,7 @@ struct shadow_vcpu { 9.4 /* Last MFN that we emulated a write to. */ 9.5 unsigned long last_emulated_mfn; 9.6 /* HVM guest: paging enabled (CR0.PG)? */ 9.7 - unsigned int hvm_paging_enabled:1; 9.8 + unsigned int translate_enabled:1; 9.9 /* Emulated fault needs to be propagated to guest? */ 9.10 unsigned int propagate_fault:1; 9.11 #if CONFIG_PAGING_LEVELS >= 3
10.1 --- a/xen/include/asm-x86/guest_access.h Thu Sep 28 17:09:11 2006 +0100 10.2 +++ b/xen/include/asm-x86/guest_access.h Thu Sep 28 17:10:54 2006 +0100 10.3 @@ -8,6 +8,7 @@ 10.4 #define __ASM_X86_GUEST_ACCESS_H__ 10.5 10.6 #include <asm/uaccess.h> 10.7 +#include <asm/shadow.h> 10.8 #include <asm/hvm/support.h> 10.9 #include <asm/hvm/guest_access.h> 10.10 10.11 @@ -33,7 +34,7 @@ 10.12 #define copy_to_guest_offset(hnd, off, ptr, nr) ({ \ 10.13 const typeof(ptr) _x = (hnd).p; \ 10.14 const typeof(ptr) _y = (ptr); \ 10.15 - hvm_guest(current) ? \ 10.16 + shadow_mode_translate(current->domain) ? \ 10.17 copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) : \ 10.18 copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \ 10.19 }) 10.20 @@ -45,7 +46,7 @@ 10.21 #define copy_from_guest_offset(ptr, hnd, off, nr) ({ \ 10.22 const typeof(ptr) _x = (hnd).p; \ 10.23 const typeof(ptr) _y = (ptr); \ 10.24 - hvm_guest(current) ? \ 10.25 + shadow_mode_translate(current->domain) ? \ 10.26 copy_from_user_hvm(_y, _x+(off), sizeof(*_x)*(nr)) :\ 10.27 copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \ 10.28 }) 10.29 @@ -54,7 +55,7 @@ 10.30 #define copy_field_to_guest(hnd, ptr, field) ({ \ 10.31 const typeof(&(ptr)->field) _x = &(hnd).p->field; \ 10.32 const typeof(&(ptr)->field) _y = &(ptr)->field; \ 10.33 - hvm_guest(current) ? \ 10.34 + shadow_mode_translate(current->domain) ? \ 10.35 copy_to_user_hvm(_x, _y, sizeof(*_x)) : \ 10.36 copy_to_user(_x, _y, sizeof(*_x)); \ 10.37 }) 10.38 @@ -63,7 +64,7 @@ 10.39 #define copy_field_from_guest(ptr, hnd, field) ({ \ 10.40 const typeof(&(ptr)->field) _x = &(hnd).p->field; \ 10.41 const typeof(&(ptr)->field) _y = &(ptr)->field; \ 10.42 - hvm_guest(current) ? \ 10.43 + shadow_mode_translate(current->domain) ? \ 10.44 copy_from_user_hvm(_y, _x, sizeof(*_x)) : \ 10.45 copy_from_user(_y, _x, sizeof(*_x)); \ 10.46 }) 10.47 @@ -73,12 +74,13 @@ 10.48 * Allows use of faster __copy_* functions. 10.49 */ 10.50 #define guest_handle_okay(hnd, nr) \ 10.51 - (hvm_guest(current) || array_access_ok((hnd).p, (nr), sizeof(*(hnd).p))) 10.52 + (shadow_mode_external(current->domain) || \ 10.53 + array_access_ok((hnd).p, (nr), sizeof(*(hnd).p))) 10.54 10.55 #define __copy_to_guest_offset(hnd, off, ptr, nr) ({ \ 10.56 const typeof(ptr) _x = (hnd).p; \ 10.57 const typeof(ptr) _y = (ptr); \ 10.58 - hvm_guest(current) ? \ 10.59 + shadow_mode_translate(current->domain) ? \ 10.60 copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) : \ 10.61 __copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \ 10.62 }) 10.63 @@ -86,7 +88,7 @@ 10.64 #define __copy_from_guest_offset(ptr, hnd, off, nr) ({ \ 10.65 const typeof(ptr) _x = (hnd).p; \ 10.66 const typeof(ptr) _y = (ptr); \ 10.67 - hvm_guest(current) ? \ 10.68 + shadow_mode_translate(current->domain) ? \ 10.69 copy_from_user_hvm(_y, _x+(off),sizeof(*_x)*(nr)) : \ 10.70 __copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \ 10.71 }) 10.72 @@ -94,7 +96,7 @@ 10.73 #define __copy_field_to_guest(hnd, ptr, field) ({ \ 10.74 const typeof(&(ptr)->field) _x = &(hnd).p->field; \ 10.75 const typeof(&(ptr)->field) _y = &(ptr)->field; \ 10.76 - hvm_guest(current) ? \ 10.77 + shadow_mode_translate(current->domain) ? \ 10.78 copy_to_user_hvm(_x, _y, sizeof(*_x)) : \ 10.79 __copy_to_user(_x, _y, sizeof(*_x)); \ 10.80 }) 10.81 @@ -102,7 +104,7 @@ 10.82 #define __copy_field_from_guest(ptr, hnd, field) ({ \ 10.83 const typeof(&(ptr)->field) _x = &(hnd).p->field; \ 10.84 const typeof(&(ptr)->field) _y = &(ptr)->field; \ 10.85 - hvm_guest(current) ? \ 10.86 + shadow_mode_translate(current->domain) ? \ 10.87 copy_from_user_hvm(_x, _y, sizeof(*_x)) : \ 10.88 __copy_from_user(_y, _x, sizeof(*_x)); \ 10.89 })
11.1 --- a/xen/include/asm-x86/mm.h Thu Sep 28 17:09:11 2006 +0100 11.2 +++ b/xen/include/asm-x86/mm.h Thu Sep 28 17:10:54 2006 +0100 11.3 @@ -348,7 +348,7 @@ void memguard_unguard_range(void *p, uns 11.4 11.5 void memguard_guard_stack(void *p); 11.6 11.7 -int ptwr_do_page_fault(struct domain *, unsigned long, 11.8 +int ptwr_do_page_fault(struct vcpu *, unsigned long, 11.9 struct cpu_user_regs *); 11.10 11.11 int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
12.1 --- a/xen/include/asm-x86/shadow.h Thu Sep 28 17:09:11 2006 +0100 12.2 +++ b/xen/include/asm-x86/shadow.h Thu Sep 28 17:10:54 2006 +0100 12.3 @@ -26,6 +26,7 @@ 12.4 #include <public/domctl.h> 12.5 #include <xen/sched.h> 12.6 #include <xen/perfc.h> 12.7 +#include <xen/domain_page.h> 12.8 #include <asm/flushtlb.h> 12.9 12.10 /* How to make sure a page is not referred to in a shadow PT */ 12.11 @@ -245,7 +246,9 @@ shadow_vcpu_mode_translate(struct vcpu * 12.12 // enabled. (HVM vcpu's with paging disabled are using the p2m table as 12.13 // its paging table, so no translation occurs in this case.) 12.14 // 12.15 - return v->arch.shadow.hvm_paging_enabled; 12.16 + // It is also true for translated PV domains. 12.17 + // 12.18 + return v->arch.shadow.translate_enabled; 12.19 } 12.20 12.21 12.22 @@ -287,6 +290,10 @@ struct shadow_paging_mode { 12.23 struct x86_emulate_ctxt *ctxt); 12.24 mfn_t (*make_monitor_table )(struct vcpu *v); 12.25 void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn); 12.26 + void * (*guest_map_l1e )(struct vcpu *v, unsigned long va, 12.27 + unsigned long *gl1mfn); 12.28 + void (*guest_get_eff_l1e )(struct vcpu *v, unsigned long va, 12.29 + void *eff_l1e); 12.30 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC 12.31 int (*guess_wrmap )(struct vcpu *v, 12.32 unsigned long vaddr, mfn_t gmfn); 12.33 @@ -452,9 +459,73 @@ shadow_destroy_monitor_table(struct vcpu 12.34 v->arch.shadow.mode->destroy_monitor_table(v, mmfn); 12.35 } 12.36 12.37 +static inline void * 12.38 +guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn) 12.39 +{ 12.40 + if ( likely(!shadow_mode_translate(v->domain)) ) 12.41 + { 12.42 + l2_pgentry_t l2e; 12.43 + ASSERT(!shadow_mode_external(v->domain)); 12.44 + /* Find this l1e and its enclosing l1mfn in the linear map */ 12.45 + if ( __copy_from_user(&l2e, 12.46 + &__linear_l2_table[l2_linear_offset(addr)], 12.47 + sizeof(l2_pgentry_t)) != 0 ) 12.48 + return NULL; 12.49 + /* Check flags that it will be safe to read the l1e */ 12.50 + if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) 12.51 + != _PAGE_PRESENT ) 12.52 + return NULL; 12.53 + *gl1mfn = l2e_get_pfn(l2e); 12.54 + return &__linear_l1_table[l1_linear_offset(addr)]; 12.55 + } 12.56 + 12.57 + return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn); 12.58 +} 12.59 + 12.60 +static inline void 12.61 +guest_unmap_l1e(struct vcpu *v, void *p) 12.62 +{ 12.63 + if ( unlikely(shadow_mode_translate(v->domain)) ) 12.64 + unmap_domain_page(p); 12.65 +} 12.66 + 12.67 +static inline void 12.68 +guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) 12.69 +{ 12.70 + if ( likely(!shadow_mode_translate(v->domain)) ) 12.71 + { 12.72 + ASSERT(!shadow_mode_external(v->domain)); 12.73 + if ( __copy_from_user(eff_l1e, 12.74 + &__linear_l1_table[l1_linear_offset(addr)], 12.75 + sizeof(l1_pgentry_t)) != 0 ) 12.76 + *(l1_pgentry_t *)eff_l1e = l1e_empty(); 12.77 + return; 12.78 + } 12.79 + 12.80 + v->arch.shadow.mode->guest_get_eff_l1e(v, addr, eff_l1e); 12.81 +} 12.82 + 12.83 +static inline void 12.84 +guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) 12.85 +{ 12.86 +#if defined(__x86_64__) 12.87 + int user_mode = !(v->arch.flags & TF_kernel_mode); 12.88 +#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) 12.89 +#else 12.90 +#define TOGGLE_MODE() ((void)0) 12.91 +#endif 12.92 + 12.93 + TOGGLE_MODE(); 12.94 + guest_get_eff_l1e(v, addr, eff_l1e); 12.95 + TOGGLE_MODE(); 12.96 +} 12.97 + 12.98 + 12.99 /* Validate a pagetable change from the guest and update the shadows. */ 12.100 extern int shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 12.101 void *new_guest_entry); 12.102 +extern int __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 12.103 + void *entry, u32 size); 12.104 12.105 /* Update the shadows in response to a pagetable write from a HVM guest */ 12.106 extern void shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 12.107 @@ -629,7 +700,14 @@ sh_mfn_to_gfn(struct domain *d, mfn_t mf 12.108 return mfn_x(mfn); 12.109 } 12.110 12.111 - 12.112 +static inline l1_pgentry_t 12.113 +gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) 12.114 +{ 12.115 + if ( unlikely(shadow_mode_translate(d)) ) 12.116 + l1e = l1e_from_pfn(gmfn_to_mfn(d, l1e_get_pfn(l1e)), 12.117 + l1e_get_flags(l1e)); 12.118 + return l1e; 12.119 +} 12.120 12.121 #endif /* _XEN_SHADOW_H */ 12.122