ia64/xen-unstable
changeset 9703:9849bd4a86dd
SVM patch to add PAE support.
Signed-off-by: Tom Woller <thomas.woller@amd.com>
Signed-off-by: Tom Woller <thomas.woller@amd.com>
author | kaf24@firebug.cl.cam.ac.uk |
---|---|
date | Thu Apr 13 11:06:43 2006 +0100 (2006-04-13) |
parents | cff23e96eae7 |
children | 137b60a47d13 |
files | xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/svm/vmcb.c |
line diff
1.1 --- a/xen/arch/x86/hvm/svm/svm.c Thu Apr 13 11:00:54 2006 +0100 1.2 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Apr 13 11:06:43 2006 +0100 1.3 @@ -315,20 +315,30 @@ static inline int long_mode_do_msr_write 1.4 { 1.5 case MSR_EFER: 1.6 #ifdef __x86_64__ 1.7 - if ((msr_content & EFER_LME) ^ test_bit(SVM_CPU_STATE_LME_ENABLED, 1.8 - &vc->arch.hvm_svm.cpu_state)) 1.9 + /* offending reserved bit will cause #GP */ 1.10 + if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) ) 1.11 { 1.12 - if (test_bit(SVM_CPU_STATE_PG_ENABLED, &vc->arch.hvm_svm.cpu_state) 1.13 - || !test_bit(SVM_CPU_STATE_PAE_ENABLED, 1.14 - &vc->arch.hvm_svm.cpu_state)) 1.15 + printk("trying to set reserved bit in EFER\n"); 1.16 + svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0); 1.17 + return 0; 1.18 + } 1.19 + 1.20 + /* LME: 0 -> 1 */ 1.21 + if ( msr_content & EFER_LME && 1.22 + !test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state) ) 1.23 + { 1.24 + if ( svm_paging_enabled(vc) || 1.25 + !test_bit(SVM_CPU_STATE_PAE_ENABLED, 1.26 + &vc->arch.hvm_svm.cpu_state) ) 1.27 { 1.28 + printk("trying to set LME bit when " 1.29 + "in paging mode or PAE bit is not set\n"); 1.30 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0); 1.31 + return 0; 1.32 } 1.33 + set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state); 1.34 } 1.35 1.36 - if (msr_content & EFER_LME) 1.37 - set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state); 1.38 - 1.39 /* We have already recorded that we want LME, so it will be set 1.40 * next time CR0 gets updated. So we clear that bit and continue. 1.41 */ 1.42 @@ -757,7 +767,8 @@ void arch_svm_do_resume(struct vcpu *v) 1.43 reset_stack_and_jump( svm_asm_do_resume ); 1.44 } 1.45 else { 1.46 - printk("VCPU core pinned: %d to %d\n", v->arch.hvm_svm.launch_core, smp_processor_id() ); 1.47 + printk("VCPU core pinned: %d to %d\n", 1.48 + v->arch.hvm_svm.launch_core, smp_processor_id() ); 1.49 v->arch.hvm_svm.launch_core = smp_processor_id(); 1.50 svm_migrate_timers( v ); 1.51 svm_do_resume( v ); 1.52 @@ -922,6 +933,7 @@ static void svm_vmexit_do_cpuid(struct v 1.53 clear_bit(X86_FEATURE_APIC, &edx); 1.54 1.55 #if CONFIG_PAGING_LEVELS < 3 1.56 + clear_bit(X86_FEATURE_NX, &edx); 1.57 clear_bit(X86_FEATURE_PAE, &edx); 1.58 clear_bit(X86_FEATURE_PSE, &edx); 1.59 clear_bit(X86_FEATURE_PSE36, &edx); 1.60 @@ -929,12 +941,14 @@ static void svm_vmexit_do_cpuid(struct v 1.61 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 ) 1.62 { 1.63 if ( !v->domain->arch.hvm_domain.pae_enabled ) 1.64 - clear_bit(X86_FEATURE_PAE, &edx); 1.65 + { 1.66 + clear_bit(X86_FEATURE_PAE, &edx); 1.67 + clear_bit(X86_FEATURE_NX, &edx); 1.68 + } 1.69 clear_bit(X86_FEATURE_PSE, &edx); 1.70 clear_bit(X86_FEATURE_PSE36, &edx); 1.71 } 1.72 -#endif 1.73 - 1.74 +#endif 1.75 /* Clear out reserved bits. */ 1.76 ecx &= ~SVM_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */ 1.77 clear_bit(X86_FEATURE_MWAIT & 31, &ecx); 1.78 @@ -1312,8 +1326,7 @@ static int svm_set_cr0(unsigned long val 1.79 unsigned long mfn; 1.80 int paging_enabled; 1.81 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 1.82 - unsigned long crn; 1.83 - 1.84 + 1.85 ASSERT(vmcb); 1.86 1.87 /* We don't want to lose PG. ET is reserved and should be always be 1*/ 1.88 @@ -1358,37 +1371,39 @@ static int svm_set_cr0(unsigned long val 1.89 set_bit(SVM_CPU_STATE_LMA_ENABLED, 1.90 &v->arch.hvm_svm.cpu_state); 1.91 vmcb->efer |= (EFER_LMA | EFER_LME); 1.92 - 1.93 -#if CONFIG_PAGING_LEVELS >= 4 1.94 - if (!shadow_set_guest_paging_levels(v->domain, 4)) 1.95 + if (!shadow_set_guest_paging_levels(v->domain, PAGING_L4) ) 1.96 { 1.97 printk("Unsupported guest paging levels\n"); 1.98 domain_crash_synchronous(); /* need to take a clean path */ 1.99 } 1.100 -#endif 1.101 } 1.102 else 1.103 #endif /* __x86_64__ */ 1.104 { 1.105 #if CONFIG_PAGING_LEVELS >= 3 1.106 - if (!shadow_set_guest_paging_levels(v->domain, 2)) 1.107 + /* seems it's a 32-bit or 32-bit PAE guest */ 1.108 + if ( test_bit(SVM_CPU_STATE_PAE_ENABLED, 1.109 + &v->arch.hvm_svm.cpu_state) ) 1.110 { 1.111 - printk("Unsupported guest paging levels\n"); 1.112 - domain_crash_synchronous(); /* need to take a clean path */ 1.113 + /* The guest enables PAE first and then it enables PG, it is 1.114 + * really a PAE guest */ 1.115 + if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) 1.116 + { 1.117 + printk("Unsupported guest paging levels\n"); 1.118 + domain_crash_synchronous(); 1.119 + } 1.120 + } 1.121 + else 1.122 + { 1.123 + if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) ) 1.124 + { 1.125 + printk("Unsupported guest paging levels\n"); 1.126 + domain_crash_synchronous(); /* need to take a clean path */ 1.127 + } 1.128 } 1.129 #endif 1.130 } 1.131 1.132 - /* update CR4's PAE if needed */ 1.133 - crn = vmcb->cr4; 1.134 - if ((!(crn & X86_CR4_PAE)) 1.135 - && test_bit(SVM_CPU_STATE_PAE_ENABLED, 1.136 - &v->arch.hvm_svm.cpu_state)) 1.137 - { 1.138 - HVM_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n"); 1.139 - vmcb->cr4 |= X86_CR4_PAE; 1.140 - } 1.141 - 1.142 /* Now arch.guest_table points to machine physical. */ 1.143 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); 1.144 update_pagetables(v); 1.145 @@ -1402,8 +1417,17 @@ static int svm_set_cr0(unsigned long val 1.146 /* arch->shadow_table should hold the next CR3 for shadow */ 1.147 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx\n", 1.148 v->arch.hvm_svm.cpu_cr3, mfn); 1.149 + 1.150 + return 1; 1.151 } 1.152 1.153 + if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled ) 1.154 + if ( v->arch.hvm_svm.cpu_cr3 ) { 1.155 + put_page(mfn_to_page(get_mfn_from_gpfn( 1.156 + v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))); 1.157 + v->arch.guest_table = mk_pagetable(0); 1.158 + } 1.159 + 1.160 /* 1.161 * SVM implements paged real-mode and when we return to real-mode 1.162 * we revert back to the physical mappings that the domain builder 1.163 @@ -1415,6 +1439,14 @@ static int svm_set_cr0(unsigned long val 1.164 return 0; 1.165 } 1.166 1.167 + clear_all_shadow_status( v->domain ); 1.168 + set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags); 1.169 + vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table); 1.170 + } 1.171 + else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE ) 1.172 + { 1.173 + /* we should take care of this kind of situation */ 1.174 + clear_all_shadow_status(v->domain); 1.175 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags); 1.176 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table); 1.177 } 1.178 @@ -1438,15 +1470,21 @@ static void mov_from_cr(int cr, int gp, 1.179 { 1.180 case 0: 1.181 value = v->arch.hvm_svm.cpu_shadow_cr0; 1.182 - break; 1.183 + if (svm_dbg_on) 1.184 + printk("CR0 read =%lx \n", value ); 1.185 + break; 1.186 case 2: 1.187 value = vmcb->cr2; 1.188 break; 1.189 case 3: 1.190 value = (unsigned long) v->arch.hvm_svm.cpu_cr3; 1.191 - break; 1.192 + if (svm_dbg_on) 1.193 + printk("CR3 read =%lx \n", value ); 1.194 + break; 1.195 case 4: 1.196 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4; 1.197 + if (svm_dbg_on) 1.198 + printk( "CR4 read=%lx\n", value ); 1.199 break; 1.200 case 8: 1.201 #if 0 1.202 @@ -1466,6 +1504,12 @@ static void mov_from_cr(int cr, int gp, 1.203 } 1.204 1.205 1.206 +static inline int svm_pgbit_test(struct vcpu *v) 1.207 +{ 1.208 + return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG; 1.209 +} 1.210 + 1.211 + 1.212 /* 1.213 * Write to control registers 1.214 */ 1.215 @@ -1486,12 +1530,15 @@ static int mov_to_cr(int gpreg, int cr, 1.216 switch (cr) 1.217 { 1.218 case 0: 1.219 + if (svm_dbg_on) 1.220 + printk("CR0 write =%lx \n", value ); 1.221 return svm_set_cr0(value); 1.222 1.223 case 3: 1.224 { 1.225 unsigned long old_base_mfn, mfn; 1.226 - 1.227 + if (svm_dbg_on) 1.228 + printk("CR3 write =%lx \n", value ); 1.229 /* If paging is not enabled yet, simply copy the value to CR3. */ 1.230 if (!svm_paging_enabled(v)) { 1.231 v->arch.hvm_svm.cpu_cr3 = value; 1.232 @@ -1533,19 +1580,104 @@ static int mov_to_cr(int gpreg, int cr, 1.233 if (old_base_mfn) 1.234 put_page(mfn_to_page(old_base_mfn)); 1.235 1.236 + /* 1.237 + * arch.shadow_table should now hold the next CR3 for shadow 1.238 + */ 1.239 +#if CONFIG_PAGING_LEVELS >= 3 1.240 + if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 ) 1.241 + shadow_sync_all(v->domain); 1.242 +#endif 1.243 + v->arch.hvm_svm.cpu_cr3 = value; 1.244 update_pagetables(v); 1.245 - 1.246 - /* arch.shadow_table should now hold the next CR3 for shadow*/ 1.247 - v->arch.hvm_svm.cpu_cr3 = value; 1.248 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); 1.249 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table); 1.250 } 1.251 break; 1.252 } 1.253 1.254 - case 4: 1.255 - /* CR4 */ 1.256 - if (value & X86_CR4_PAE) { 1.257 + case 4: /* CR4 */ 1.258 + { 1.259 + if (svm_dbg_on) 1.260 + printk( "write cr4=%lx, cr0=%lx\n", 1.261 + value, v->arch.hvm_svm.cpu_shadow_cr0 ); 1.262 + old_cr = v->arch.hvm_svm.cpu_shadow_cr4; 1.263 + if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) ) 1.264 + { 1.265 + set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); 1.266 + if ( svm_pgbit_test(v) ) 1.267 + { 1.268 + /* The guest is a 32-bit PAE guest. */ 1.269 +#if CONFIG_PAGING_LEVELS >= 4 1.270 + unsigned long mfn, old_base_mfn; 1.271 + 1.272 + if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) 1.273 + { 1.274 + printk("Unsupported guest paging levels\n"); 1.275 + domain_crash_synchronous(); /* need to take a clean path */ 1.276 + } 1.277 + 1.278 + if ( !VALID_MFN(mfn = get_mfn_from_gpfn( 1.279 + v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) || 1.280 + !get_page(mfn_to_page(mfn), v->domain) ) 1.281 + { 1.282 + printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3); 1.283 + domain_crash_synchronous(); /* need to take a clean path */ 1.284 + } 1.285 + 1.286 + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); 1.287 + if ( old_base_mfn ) 1.288 + put_page(mfn_to_page(old_base_mfn)); 1.289 + 1.290 + /* 1.291 + * Now arch.guest_table points to machine physical. 1.292 + */ 1.293 + 1.294 + v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); 1.295 + update_pagetables(v); 1.296 + 1.297 + HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 1.298 + (unsigned long) (mfn << PAGE_SHIFT)); 1.299 + 1.300 + vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table); 1.301 + 1.302 + /* 1.303 + * arch->shadow_table should hold the next CR3 for shadow 1.304 + */ 1.305 + 1.306 + HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", 1.307 + v->arch.hvm_svm.cpu_cr3, mfn); 1.308 +#endif 1.309 + } 1.310 + else 1.311 + { 1.312 + /* The guest is a 64 bit or 32-bit PAE guest. */ 1.313 +#if CONFIG_PAGING_LEVELS >= 4 1.314 + if ( (v->domain->arch.ops != NULL) && 1.315 + v->domain->arch.ops->guest_paging_levels == PAGING_L2) 1.316 + { 1.317 + /* Seems the guest first enables PAE without enabling PG, 1.318 + * it must enable PG after that, and it is a 32-bit PAE 1.319 + * guest */ 1.320 + 1.321 + if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) 1.322 + { 1.323 + printk("Unsupported guest paging levels\n"); 1.324 + domain_crash_synchronous(); 1.325 + } 1.326 + } 1.327 + else 1.328 + { 1.329 + if ( !shadow_set_guest_paging_levels(v->domain, 1.330 + PAGING_L4) ) 1.331 + { 1.332 + printk("Unsupported guest paging levels\n"); 1.333 + domain_crash_synchronous(); 1.334 + } 1.335 + } 1.336 +#endif 1.337 + } 1.338 + } 1.339 + else if (value & X86_CR4_PAE) { 1.340 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); 1.341 } else { 1.342 if (test_bit(SVM_CPU_STATE_LMA_ENABLED, 1.343 @@ -1555,7 +1687,6 @@ static int mov_to_cr(int gpreg, int cr, 1.344 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); 1.345 } 1.346 1.347 - old_cr = v->arch.hvm_svm.cpu_shadow_cr4; 1.348 v->arch.hvm_svm.cpu_shadow_cr4 = value; 1.349 vmcb->cr4 = value | SVM_CR4_HOST_MASK; 1.350 1.351 @@ -1569,6 +1700,7 @@ static int mov_to_cr(int gpreg, int cr, 1.352 shadow_sync_all(v->domain); 1.353 } 1.354 break; 1.355 + } 1.356 1.357 default: 1.358 printk("invalid cr: %d\n", cr); 1.359 @@ -1933,6 +2065,7 @@ static int svm_do_vmmcall_reset_to_realm 1.360 1.361 vmcb->cr4 = SVM_CR4_HOST_MASK; 1.362 v->arch.hvm_svm.cpu_shadow_cr4 = 0; 1.363 + clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); 1.364 1.365 /* This will jump to ROMBIOS */ 1.366 vmcb->rip = 0xFFF0; 1.367 @@ -2280,7 +2413,8 @@ void walk_shadow_and_guest_pt(unsigned l 1.368 gpte.l1 = 0; 1.369 __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ], sizeof(gpte) ); 1.370 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) ); 1.371 - __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ], sizeof(spte) ); 1.372 + __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ], 1.373 + sizeof(spte) ); 1.374 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte)); 1.375 } 1.376 #endif /* SVM_WALK_GUEST_PAGES */ 1.377 @@ -2314,18 +2448,29 @@ asmlinkage void svm_vmexit_handler(struc 1.378 { 1.379 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2))) 1.380 { 1.381 + printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx, gpa=%llx\n", 1.382 + intercepts_counter, 1.383 + exit_reasons[exit_reason], exit_reason, regs.cs, 1.384 + (unsigned long long) regs.rip, 1.385 + (unsigned long long) vmcb->exitinfo1, 1.386 + (unsigned long long) vmcb->exitinfo2, 1.387 + (unsigned long long) vmcb->exitintinfo.bytes, 1.388 + (unsigned long long) gva_to_gpa( vmcb->exitinfo2 ) ); 1.389 + } 1.390 + else 1.391 + { 1.392 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n", 1.393 intercepts_counter, 1.394 exit_reasons[exit_reason], exit_reason, regs.cs, 1.395 (unsigned long long) regs.rip, 1.396 (unsigned long long) vmcb->exitinfo1, 1.397 (unsigned long long) vmcb->exitinfo2, 1.398 - (unsigned long long) vmcb->exitintinfo.bytes); 1.399 + (unsigned long long) vmcb->exitintinfo.bytes ); 1.400 } 1.401 } 1.402 - else if (svm_dbg_on 1.403 - && exit_reason != VMEXIT_IOIO 1.404 - && exit_reason != VMEXIT_INTR) 1.405 + else if ( svm_dbg_on 1.406 + && exit_reason != VMEXIT_IOIO 1.407 + && exit_reason != VMEXIT_INTR) 1.408 { 1.409 1.410 if (exit_reasons[exit_reason]) 1.411 @@ -2350,7 +2495,9 @@ asmlinkage void svm_vmexit_handler(struc 1.412 } 1.413 1.414 #ifdef SVM_WALK_GUEST_PAGES 1.415 - if( exit_reason == VMEXIT_EXCEPTION_PF && ( ( vmcb->exitinfo2 == vmcb->rip )|| vmcb->exitintinfo.bytes) ) 1.416 + if( exit_reason == VMEXIT_EXCEPTION_PF 1.417 + && ( ( vmcb->exitinfo2 == vmcb->rip ) 1.418 + || vmcb->exitintinfo.bytes) ) 1.419 { 1.420 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2))) 1.421 walk_shadow_and_guest_pt( vmcb->exitinfo2 );
2.1 --- a/xen/arch/x86/hvm/svm/vmcb.c Thu Apr 13 11:00:54 2006 +0100 2.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c Thu Apr 13 11:06:43 2006 +0100 2.3 @@ -257,7 +257,8 @@ static int construct_init_vmcb_guest(str 2.4 /* CR3 is set in svm_final_setup_guest */ 2.5 2.6 __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) :); 2.7 - arch_svm->cpu_shadow_cr4 = crn & ~(X86_CR4_PGE | X86_CR4_PSE); 2.8 + crn &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE); 2.9 + arch_svm->cpu_shadow_cr4 = crn; 2.10 vmcb->cr4 = crn | SVM_CR4_HOST_MASK; 2.11 2.12 vmcb->rsp = 0;