ia64/xen-unstable

view xen/arch/x86/vmx.c @ 8387:dc8122d90670

Fix the issue when guest OS clear TS bit by mov to cr0 instead of
clts instruction for floating point context save and restore.
clts instruction is already handled in vmx exit handler while
vmx_set_cr0 has not handled it yet.

Signed-off-by: Xiaofeng Ling <xiaofeng.ling@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Dec 14 19:47:16 2005 +0100 (2005-12-14)
parents c9929c7c63f6
children 37cafca1539e
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/domain_page.h>
28 #include <asm/current.h>
29 #include <asm/io.h>
30 #include <asm/shadow.h>
31 #include <asm/regs.h>
32 #include <asm/cpufeature.h>
33 #include <asm/processor.h>
34 #include <asm/types.h>
35 #include <asm/msr.h>
36 #include <asm/spinlock.h>
37 #include <asm/vmx.h>
38 #include <asm/vmx_vmcs.h>
39 #include <asm/vmx_intercept.h>
40 #include <asm/shadow.h>
41 #if CONFIG_PAGING_LEVELS >= 3
42 #include <asm/shadow_64.h>
43 #endif
44 #include <public/sched.h>
45 #include <public/io/ioreq.h>
46 #include <asm/vmx_vpic.h>
47 #include <asm/vmx_vlapic.h>
49 int hvm_enabled;
51 #ifdef CONFIG_VMX
52 unsigned int opt_vmx_debug_level = 0;
53 integer_param("vmx_debug", opt_vmx_debug_level);
55 static unsigned long trace_values[NR_CPUS][4];
56 #define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
58 static int vmx_switch_on;
60 void vmx_final_setup_guest(struct vcpu *v)
61 {
62 v->arch.schedule_tail = arch_vmx_do_launch;
64 if ( v->vcpu_id == 0 )
65 {
66 struct domain *d = v->domain;
67 struct vcpu *vc;
69 d->arch.vmx_platform.lapic_enable = v->arch.guest_context.user_regs.ecx;
70 v->arch.guest_context.user_regs.ecx = 0;
71 VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "lapic enable is %d.\n",
72 d->arch.vmx_platform.lapic_enable);
74 /* Initialize monitor page table */
75 for_each_vcpu(d, vc)
76 vc->arch.monitor_table = mk_pagetable(0);
78 /*
79 * Required to do this once per domain
80 * XXX todo: add a seperate function to do these.
81 */
82 memset(&d->shared_info->evtchn_mask[0], 0xff,
83 sizeof(d->shared_info->evtchn_mask));
85 /* Put the domain in shadow mode even though we're going to be using
86 * the shared 1:1 page table initially. It shouldn't hurt */
87 shadow_mode_enable(d,
88 SHM_enable|SHM_refcounts|
89 SHM_translate|SHM_external|SHM_wr_pt_pte);
90 }
92 vmx_switch_on = 1;
93 }
95 void vmx_relinquish_resources(struct vcpu *v)
96 {
97 struct vmx_virpit *vpit;
99 if ( !VMX_DOMAIN(v) )
100 return;
102 if (v->vcpu_id == 0) {
103 /* unmap IO shared page */
104 struct domain *d = v->domain;
105 unmap_domain_page((void *)d->arch.vmx_platform.shared_page_va);
106 }
108 destroy_vmcs(&v->arch.arch_vmx);
109 free_monitor_pagetable(v);
110 vpit = &v->domain->arch.vmx_platform.vmx_pit;
111 if ( active_ac_timer(&(vpit->pit_timer)) )
112 rem_ac_timer(&vpit->pit_timer);
113 if ( active_ac_timer(&v->arch.arch_vmx.hlt_timer) ) {
114 rem_ac_timer(&v->arch.arch_vmx.hlt_timer);
115 }
116 if ( vmx_apic_support(v->domain) ) {
117 rem_ac_timer( &(VLAPIC(v)->vlapic_timer) );
118 xfree( VLAPIC(v) );
119 }
120 }
122 #ifdef __x86_64__
123 static struct msr_state percpu_msr[NR_CPUS];
125 static u32 msr_data_index[VMX_MSR_COUNT] =
126 {
127 MSR_LSTAR, MSR_STAR, MSR_CSTAR,
128 MSR_SYSCALL_MASK, MSR_EFER,
129 };
131 /*
132 * To avoid MSR save/restore at every VM exit/entry time, we restore
133 * the x86_64 specific MSRs at domain switch time. Since those MSRs are
134 * are not modified once set for generic domains, we don't save them,
135 * but simply reset them to the values set at percpu_traps_init().
136 */
137 void vmx_load_msrs(struct vcpu *n)
138 {
139 struct msr_state *host_state = &percpu_msr[smp_processor_id()];
140 int i;
142 if ( !vmx_switch_on )
143 return;
145 while ( host_state->flags )
146 {
147 i = find_first_set_bit(host_state->flags);
148 wrmsrl(msr_data_index[i], host_state->msr_items[i]);
149 clear_bit(i, &host_state->flags);
150 }
151 }
153 static void vmx_save_init_msrs(void)
154 {
155 struct msr_state *host_state = &percpu_msr[smp_processor_id()];
156 int i;
158 for ( i = 0; i < VMX_MSR_COUNT; i++ )
159 rdmsrl(msr_data_index[i], host_state->msr_items[i]);
160 }
162 #define CASE_READ_MSR(address) \
163 case MSR_ ## address: \
164 msr_content = msr->msr_items[VMX_INDEX_MSR_ ## address]; \
165 break
167 #define CASE_WRITE_MSR(address) \
168 case MSR_ ## address: \
169 { \
170 msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content; \
171 if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)) { \
172 set_bit(VMX_INDEX_MSR_ ## address, &msr->flags); \
173 } \
174 wrmsrl(MSR_ ## address, msr_content); \
175 set_bit(VMX_INDEX_MSR_ ## address, &host_state->flags); \
176 } \
177 break
179 #define IS_CANO_ADDRESS(add) 1
180 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
181 {
182 u64 msr_content = 0;
183 struct vcpu *vc = current;
184 struct msr_state * msr = &vc->arch.arch_vmx.msr_content;
185 switch(regs->ecx){
186 case MSR_EFER:
187 msr_content = msr->msr_items[VMX_INDEX_MSR_EFER];
188 VMX_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %llx\n", (unsigned long long)msr_content);
189 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
190 &vc->arch.arch_vmx.cpu_state))
191 msr_content |= 1 << _EFER_LME;
193 if (VMX_LONG_GUEST(vc))
194 msr_content |= 1 << _EFER_LMA;
195 break;
196 case MSR_FS_BASE:
197 if (!(VMX_LONG_GUEST(vc)))
198 /* XXX should it be GP fault */
199 domain_crash_synchronous();
200 __vmread(GUEST_FS_BASE, &msr_content);
201 break;
202 case MSR_GS_BASE:
203 if (!(VMX_LONG_GUEST(vc)))
204 domain_crash_synchronous();
205 __vmread(GUEST_GS_BASE, &msr_content);
206 break;
207 case MSR_SHADOW_GS_BASE:
208 msr_content = msr->shadow_gs;
209 break;
211 CASE_READ_MSR(STAR);
212 CASE_READ_MSR(LSTAR);
213 CASE_READ_MSR(CSTAR);
214 CASE_READ_MSR(SYSCALL_MASK);
215 default:
216 return 0;
217 }
218 VMX_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %lx\n", msr_content);
219 regs->eax = msr_content & 0xffffffff;
220 regs->edx = msr_content >> 32;
221 return 1;
222 }
224 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
225 {
226 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
227 struct vcpu *vc = current;
228 struct msr_state * msr = &vc->arch.arch_vmx.msr_content;
229 struct msr_state * host_state =
230 &percpu_msr[smp_processor_id()];
232 VMX_DBG_LOG(DBG_LEVEL_1, " mode_do_msr_write msr %lx msr_content %lx\n",
233 regs->ecx, msr_content);
235 switch (regs->ecx){
236 case MSR_EFER:
237 if ((msr_content & EFER_LME) ^
238 test_bit(VMX_CPU_STATE_LME_ENABLED,
239 &vc->arch.arch_vmx.cpu_state)){
240 if (test_bit(VMX_CPU_STATE_PG_ENABLED,
241 &vc->arch.arch_vmx.cpu_state) ||
242 !test_bit(VMX_CPU_STATE_PAE_ENABLED,
243 &vc->arch.arch_vmx.cpu_state)){
244 vmx_inject_exception(vc, TRAP_gp_fault, 0);
245 }
246 }
247 if (msr_content & EFER_LME)
248 set_bit(VMX_CPU_STATE_LME_ENABLED,
249 &vc->arch.arch_vmx.cpu_state);
250 /* No update for LME/LMA since it have no effect */
251 msr->msr_items[VMX_INDEX_MSR_EFER] =
252 msr_content;
253 if (msr_content & ~(EFER_LME | EFER_LMA)){
254 msr->msr_items[VMX_INDEX_MSR_EFER] = msr_content;
255 if (!test_bit(VMX_INDEX_MSR_EFER, &msr->flags)){
256 rdmsrl(MSR_EFER,
257 host_state->msr_items[VMX_INDEX_MSR_EFER]);
258 set_bit(VMX_INDEX_MSR_EFER, &host_state->flags);
259 set_bit(VMX_INDEX_MSR_EFER, &msr->flags);
260 wrmsrl(MSR_EFER, msr_content);
261 }
262 }
263 break;
265 case MSR_FS_BASE:
266 case MSR_GS_BASE:
267 if (!(VMX_LONG_GUEST(vc)))
268 domain_crash_synchronous();
269 if (!IS_CANO_ADDRESS(msr_content)){
270 VMX_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
271 vmx_inject_exception(vc, TRAP_gp_fault, 0);
272 }
273 if (regs->ecx == MSR_FS_BASE)
274 __vmwrite(GUEST_FS_BASE, msr_content);
275 else
276 __vmwrite(GUEST_GS_BASE, msr_content);
277 break;
279 case MSR_SHADOW_GS_BASE:
280 if (!(VMX_LONG_GUEST(vc)))
281 domain_crash_synchronous();
282 vc->arch.arch_vmx.msr_content.shadow_gs = msr_content;
283 wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
284 break;
286 CASE_WRITE_MSR(STAR);
287 CASE_WRITE_MSR(LSTAR);
288 CASE_WRITE_MSR(CSTAR);
289 CASE_WRITE_MSR(SYSCALL_MASK);
290 default:
291 return 0;
292 }
293 return 1;
294 }
296 void
297 vmx_restore_msrs(struct vcpu *v)
298 {
299 int i = 0;
300 struct msr_state *guest_state;
301 struct msr_state *host_state;
302 unsigned long guest_flags ;
304 guest_state = &v->arch.arch_vmx.msr_content;;
305 host_state = &percpu_msr[smp_processor_id()];
307 wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs);
308 guest_flags = guest_state->flags;
309 if (!guest_flags)
310 return;
312 while (guest_flags){
313 i = find_first_set_bit(guest_flags);
315 VMX_DBG_LOG(DBG_LEVEL_2,
316 "restore guest's index %d msr %lx with %lx\n",
317 i, (unsigned long) msr_data_index[i], (unsigned long) guest_state->msr_items[i]);
318 set_bit(i, &host_state->flags);
319 wrmsrl(msr_data_index[i], guest_state->msr_items[i]);
320 clear_bit(i, &guest_flags);
321 }
322 }
324 #else /* __i386__ */
325 #define vmx_save_init_msrs() ((void)0)
327 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs){
328 return 0;
329 }
330 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs){
331 return 0;
332 }
333 #endif
335 extern long evtchn_send(int lport);
336 extern long do_block(void);
337 void do_nmi(struct cpu_user_regs *, unsigned long);
339 static int check_vmx_controls(ctrls, msr)
340 {
341 u32 vmx_msr_low, vmx_msr_high;
343 rdmsr(msr, vmx_msr_low, vmx_msr_high);
344 if (ctrls < vmx_msr_low || ctrls > vmx_msr_high) {
345 printk("Insufficient VMX capability 0x%x, "
346 "msr=0x%x,low=0x%8x,high=0x%x\n",
347 ctrls, msr, vmx_msr_low, vmx_msr_high);
348 return 0;
349 }
350 return 1;
351 }
353 int start_vmx(void)
354 {
355 struct vmcs_struct *vmcs;
356 u32 ecx;
357 u32 eax, edx;
358 u64 phys_vmcs; /* debugging */
360 /*
361 * Xen does not fill x86_capability words except 0.
362 */
363 ecx = cpuid_ecx(1);
364 boot_cpu_data.x86_capability[4] = ecx;
366 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
367 return 0;
369 rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
371 if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) {
372 if ((eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0) {
373 printk("VMX disabled by Feature Control MSR.\n");
374 return 0;
375 }
376 }
377 else {
378 wrmsr(IA32_FEATURE_CONTROL_MSR,
379 IA32_FEATURE_CONTROL_MSR_LOCK |
380 IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
381 }
383 if (!check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS,
384 MSR_IA32_VMX_PINBASED_CTLS_MSR))
385 return 0;
386 if (!check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS,
387 MSR_IA32_VMX_PROCBASED_CTLS_MSR))
388 return 0;
389 if (!check_vmx_controls(MONITOR_VM_EXIT_CONTROLS,
390 MSR_IA32_VMX_EXIT_CTLS_MSR))
391 return 0;
392 if (!check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS,
393 MSR_IA32_VMX_ENTRY_CTLS_MSR))
394 return 0;
396 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
398 if (!(vmcs = alloc_vmcs())) {
399 printk("Failed to allocate VMCS\n");
400 return 0;
401 }
403 phys_vmcs = (u64) virt_to_phys(vmcs);
405 if (!(__vmxon(phys_vmcs))) {
406 printk("VMXON is done\n");
407 }
409 vmx_save_init_msrs();
411 hvm_enabled = 1;
413 return 1;
414 }
416 void stop_vmx(void)
417 {
418 if (read_cr4() & X86_CR4_VMXE)
419 __vmxoff();
420 }
422 /*
423 * Not all cases receive valid value in the VM-exit instruction length field.
424 */
425 #define __get_instruction_length(len) \
426 __vmread(VM_EXIT_INSTRUCTION_LEN, &(len)); \
427 if ((len) < 1 || (len) > 15) \
428 __vmx_bug(&regs);
430 static void inline __update_guest_eip(unsigned long inst_len)
431 {
432 unsigned long current_eip;
434 __vmread(GUEST_RIP, &current_eip);
435 __vmwrite(GUEST_RIP, current_eip + inst_len);
436 }
439 static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
440 {
441 unsigned long gpa; /* FIXME: PAE */
442 int result;
444 #if 0 /* keep for debugging */
445 {
446 unsigned long eip;
448 __vmread(GUEST_RIP, &eip);
449 VMX_DBG_LOG(DBG_LEVEL_VMMU,
450 "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
451 va, eip, (unsigned long)regs->error_code);
452 }
453 #endif
455 if (!vmx_paging_enabled(current)){
456 handle_mmio(va, va);
457 TRACE_VMEXIT (2,2);
458 return 1;
459 }
460 gpa = gva_to_gpa(va);
462 /* Use 1:1 page table to identify MMIO address space */
463 if ( mmio_space(gpa) ){
464 struct vcpu *v = current;
465 /* No support for APIC */
466 if (!vmx_apic_support(v->domain) && gpa >= 0xFEC00000) {
467 u32 inst_len;
468 __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len));
469 __update_guest_eip(inst_len);
470 return 1;
471 }
472 TRACE_VMEXIT (2,2);
473 handle_mmio(va, gpa);
474 return 1;
475 }
477 result = shadow_fault(va, regs);
478 TRACE_VMEXIT (2,result);
479 #if 0
480 if ( !result )
481 {
482 __vmread(GUEST_RIP, &eip);
483 printk("vmx pgfault to guest va=%lx eip=%lx\n", va, eip);
484 }
485 #endif
487 return result;
488 }
490 static void vmx_do_no_device_fault(void)
491 {
492 unsigned long cr0;
493 struct vcpu *v = current;
495 clts();
496 setup_fpu(current);
497 __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
498 if (!(cr0 & X86_CR0_TS)) {
499 __vmread_vcpu(v, GUEST_CR0, &cr0);
500 cr0 &= ~X86_CR0_TS;
501 __vmwrite(GUEST_CR0, cr0);
502 }
503 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
504 }
506 /* Reserved bits: [31:15], [12:11], [9], [6], [2:1] */
507 #define VMX_VCPU_CPUID_L1_RESERVED 0xffff9a46
509 static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs *regs)
510 {
511 unsigned int eax, ebx, ecx, edx;
512 unsigned long eip;
513 struct vcpu *v = current;
515 __vmread(GUEST_RIP, &eip);
517 VMX_DBG_LOG(DBG_LEVEL_1,
518 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
519 " (esi) %lx, (edi) %lx",
520 (unsigned long)regs->eax, (unsigned long)regs->ebx,
521 (unsigned long)regs->ecx, (unsigned long)regs->edx,
522 (unsigned long)regs->esi, (unsigned long)regs->edi);
524 cpuid(input, &eax, &ebx, &ecx, &edx);
526 if ( input == 1 )
527 {
528 if ( vmx_apic_support(v->domain) &&
529 !vlapic_global_enabled((VLAPIC(v))) )
530 clear_bit(X86_FEATURE_APIC, &edx);
532 #ifdef __x86_64__
533 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
534 #endif
535 {
536 clear_bit(X86_FEATURE_PSE, &edx);
537 clear_bit(X86_FEATURE_PAE, &edx);
538 clear_bit(X86_FEATURE_PSE36, &edx);
539 }
541 /* Unsupportable for virtualised CPUs. */
542 ecx &= ~VMX_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */
543 clear_bit(X86_FEATURE_VMXE & 31, &ecx);
544 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
545 }
547 regs->eax = (unsigned long) eax;
548 regs->ebx = (unsigned long) ebx;
549 regs->ecx = (unsigned long) ecx;
550 regs->edx = (unsigned long) edx;
552 VMX_DBG_LOG(DBG_LEVEL_1,
553 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x",
554 eip, input, eax, ebx, ecx, edx);
556 }
558 #define CASE_GET_REG_P(REG, reg) \
559 case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
561 static void vmx_dr_access (unsigned long exit_qualification, struct cpu_user_regs *regs)
562 {
563 unsigned int reg;
564 unsigned long *reg_p = 0;
565 struct vcpu *v = current;
566 unsigned long eip;
568 __vmread(GUEST_RIP, &eip);
570 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
572 VMX_DBG_LOG(DBG_LEVEL_1,
573 "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx",
574 eip, reg, exit_qualification);
576 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
577 CASE_GET_REG_P(EAX, eax);
578 CASE_GET_REG_P(ECX, ecx);
579 CASE_GET_REG_P(EDX, edx);
580 CASE_GET_REG_P(EBX, ebx);
581 CASE_GET_REG_P(EBP, ebp);
582 CASE_GET_REG_P(ESI, esi);
583 CASE_GET_REG_P(EDI, edi);
584 case REG_ESP:
585 break;
586 default:
587 __vmx_bug(regs);
588 }
590 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
591 case TYPE_MOV_TO_DR:
592 /* don't need to check the range */
593 if (reg != REG_ESP)
594 v->arch.guest_context.debugreg[reg] = *reg_p;
595 else {
596 unsigned long value;
597 __vmread(GUEST_RSP, &value);
598 v->arch.guest_context.debugreg[reg] = value;
599 }
600 break;
601 case TYPE_MOV_FROM_DR:
602 if (reg != REG_ESP)
603 *reg_p = v->arch.guest_context.debugreg[reg];
604 else {
605 __vmwrite(GUEST_RSP, v->arch.guest_context.debugreg[reg]);
606 }
607 break;
608 }
609 }
611 /*
612 * Invalidate the TLB for va. Invalidate the shadow page corresponding
613 * the address va.
614 */
615 static void vmx_vmexit_do_invlpg(unsigned long va)
616 {
617 unsigned long eip;
618 struct vcpu *v = current;
620 __vmread(GUEST_RIP, &eip);
622 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg: eip=%lx, va=%lx",
623 eip, va);
625 /*
626 * We do the safest things first, then try to update the shadow
627 * copying from guest
628 */
629 shadow_invlpg(v, va);
630 }
632 static int check_for_null_selector(unsigned long eip)
633 {
634 unsigned char inst[MAX_INST_LEN];
635 unsigned long sel;
636 int i, inst_len;
637 int inst_copy_from_guest(unsigned char *, unsigned long, int);
639 __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
640 memset(inst, 0, MAX_INST_LEN);
641 if (inst_copy_from_guest(inst, eip, inst_len) != inst_len) {
642 printf("check_for_null_selector: get guest instruction failed\n");
643 domain_crash_synchronous();
644 }
646 for (i = 0; i < inst_len; i++) {
647 switch (inst[i]) {
648 case 0xf3: /* REPZ */
649 case 0xf2: /* REPNZ */
650 case 0xf0: /* LOCK */
651 case 0x66: /* data32 */
652 case 0x67: /* addr32 */
653 continue;
654 case 0x2e: /* CS */
655 __vmread(GUEST_CS_SELECTOR, &sel);
656 break;
657 case 0x36: /* SS */
658 __vmread(GUEST_SS_SELECTOR, &sel);
659 break;
660 case 0x26: /* ES */
661 __vmread(GUEST_ES_SELECTOR, &sel);
662 break;
663 case 0x64: /* FS */
664 __vmread(GUEST_FS_SELECTOR, &sel);
665 break;
666 case 0x65: /* GS */
667 __vmread(GUEST_GS_SELECTOR, &sel);
668 break;
669 case 0x3e: /* DS */
670 /* FALLTHROUGH */
671 default:
672 /* DS is the default */
673 __vmread(GUEST_DS_SELECTOR, &sel);
674 }
675 return sel == 0 ? 1 : 0;
676 }
678 return 0;
679 }
681 void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
682 unsigned long count, int size, long value, int dir, int pvalid)
683 {
684 struct vcpu *v = current;
685 vcpu_iodata_t *vio;
686 ioreq_t *p;
688 vio = get_vio(v->domain, v->vcpu_id);
689 if (vio == NULL) {
690 printk("bad shared page: %lx\n", (unsigned long) vio);
691 domain_crash_synchronous();
692 }
694 if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
695 printf("VMX I/O has not yet completed\n");
696 domain_crash_synchronous();
697 }
698 set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
700 p = &vio->vp_ioreq;
701 p->dir = dir;
702 p->pdata_valid = pvalid;
704 p->type = IOREQ_TYPE_PIO;
705 p->size = size;
706 p->addr = port;
707 p->count = count;
708 p->df = regs->eflags & EF_DF ? 1 : 0;
710 if (pvalid) {
711 if (vmx_paging_enabled(current))
712 p->u.pdata = (void *) gva_to_gpa(value);
713 else
714 p->u.pdata = (void *) value; /* guest VA == guest PA */
715 } else
716 p->u.data = value;
718 if (vmx_portio_intercept(p)) {
719 p->state = STATE_IORESP_READY;
720 vmx_io_assist(v);
721 return;
722 }
724 p->state = STATE_IOREQ_READY;
726 evtchn_send(iopacket_port(v->domain));
727 vmx_wait_io();
728 }
730 static void vmx_io_instruction(struct cpu_user_regs *regs,
731 unsigned long exit_qualification, unsigned long inst_len)
732 {
733 struct mmio_op *mmio_opp;
734 unsigned long eip, cs, eflags;
735 unsigned long port, size, dir;
736 int vm86;
738 mmio_opp = &current->arch.arch_vmx.mmio_op;
739 mmio_opp->instr = INSTR_PIO;
740 mmio_opp->flags = 0;
742 __vmread(GUEST_RIP, &eip);
743 __vmread(GUEST_CS_SELECTOR, &cs);
744 __vmread(GUEST_RFLAGS, &eflags);
745 vm86 = eflags & X86_EFLAGS_VM ? 1 : 0;
747 VMX_DBG_LOG(DBG_LEVEL_1,
748 "vmx_io_instruction: vm86 %d, eip=%lx:%lx, "
749 "exit_qualification = %lx",
750 vm86, cs, eip, exit_qualification);
752 if (test_bit(6, &exit_qualification))
753 port = (exit_qualification >> 16) & 0xFFFF;
754 else
755 port = regs->edx & 0xffff;
756 TRACE_VMEXIT(2, port);
757 size = (exit_qualification & 7) + 1;
758 dir = test_bit(3, &exit_qualification); /* direction */
760 if (test_bit(4, &exit_qualification)) { /* string instruction */
761 unsigned long addr, count = 1;
762 int sign = regs->eflags & EF_DF ? -1 : 1;
764 __vmread(GUEST_LINEAR_ADDRESS, &addr);
766 /*
767 * In protected mode, guest linear address is invalid if the
768 * selector is null.
769 */
770 if (!vm86 && check_for_null_selector(eip))
771 addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
773 if (test_bit(5, &exit_qualification)) { /* "rep" prefix */
774 mmio_opp->flags |= REPZ;
775 count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
776 }
778 /*
779 * Handle string pio instructions that cross pages or that
780 * are unaligned. See the comments in vmx_platform.c/handle_mmio()
781 */
782 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
783 unsigned long value = 0;
785 mmio_opp->flags |= OVERLAP;
786 if (dir == IOREQ_WRITE)
787 vmx_copy(&value, addr, size, VMX_COPY_IN);
788 send_pio_req(regs, port, 1, size, value, dir, 0);
789 } else {
790 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) {
791 if (sign > 0)
792 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
793 else
794 count = (addr & ~PAGE_MASK) / size;
795 } else
796 __update_guest_eip(inst_len);
798 send_pio_req(regs, port, count, size, addr, dir, 1);
799 }
800 } else {
801 __update_guest_eip(inst_len);
802 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
803 }
804 }
806 int
807 vmx_copy(void *buf, unsigned long laddr, int size, int dir)
808 {
809 unsigned long gpa, mfn;
810 char *addr;
811 int count;
813 while (size > 0) {
814 count = PAGE_SIZE - (laddr & ~PAGE_MASK);
815 if (count > size)
816 count = size;
818 if (vmx_paging_enabled(current)) {
819 gpa = gva_to_gpa(laddr);
820 mfn = get_mfn_from_pfn(gpa >> PAGE_SHIFT);
821 } else
822 mfn = get_mfn_from_pfn(laddr >> PAGE_SHIFT);
823 if (mfn == INVALID_MFN)
824 return 0;
826 addr = (char *)map_domain_page(mfn) + (laddr & ~PAGE_MASK);
828 if (dir == VMX_COPY_IN)
829 memcpy(buf, addr, count);
830 else
831 memcpy(addr, buf, count);
833 unmap_domain_page(addr);
835 laddr += count;
836 buf += count;
837 size -= count;
838 }
840 return 1;
841 }
843 int
844 vmx_world_save(struct vcpu *v, struct vmx_assist_context *c)
845 {
846 unsigned long inst_len;
847 int error = 0;
849 error |= __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
850 error |= __vmread(GUEST_RIP, &c->eip);
851 c->eip += inst_len; /* skip transition instruction */
852 error |= __vmread(GUEST_RSP, &c->esp);
853 error |= __vmread(GUEST_RFLAGS, &c->eflags);
855 error |= __vmread(CR0_READ_SHADOW, &c->cr0);
856 c->cr3 = v->arch.arch_vmx.cpu_cr3;
857 error |= __vmread(CR4_READ_SHADOW, &c->cr4);
859 error |= __vmread(GUEST_IDTR_LIMIT, &c->idtr_limit);
860 error |= __vmread(GUEST_IDTR_BASE, &c->idtr_base);
862 error |= __vmread(GUEST_GDTR_LIMIT, &c->gdtr_limit);
863 error |= __vmread(GUEST_GDTR_BASE, &c->gdtr_base);
865 error |= __vmread(GUEST_CS_SELECTOR, &c->cs_sel);
866 error |= __vmread(GUEST_CS_LIMIT, &c->cs_limit);
867 error |= __vmread(GUEST_CS_BASE, &c->cs_base);
868 error |= __vmread(GUEST_CS_AR_BYTES, &c->cs_arbytes.bytes);
870 error |= __vmread(GUEST_DS_SELECTOR, &c->ds_sel);
871 error |= __vmread(GUEST_DS_LIMIT, &c->ds_limit);
872 error |= __vmread(GUEST_DS_BASE, &c->ds_base);
873 error |= __vmread(GUEST_DS_AR_BYTES, &c->ds_arbytes.bytes);
875 error |= __vmread(GUEST_ES_SELECTOR, &c->es_sel);
876 error |= __vmread(GUEST_ES_LIMIT, &c->es_limit);
877 error |= __vmread(GUEST_ES_BASE, &c->es_base);
878 error |= __vmread(GUEST_ES_AR_BYTES, &c->es_arbytes.bytes);
880 error |= __vmread(GUEST_SS_SELECTOR, &c->ss_sel);
881 error |= __vmread(GUEST_SS_LIMIT, &c->ss_limit);
882 error |= __vmread(GUEST_SS_BASE, &c->ss_base);
883 error |= __vmread(GUEST_SS_AR_BYTES, &c->ss_arbytes.bytes);
885 error |= __vmread(GUEST_FS_SELECTOR, &c->fs_sel);
886 error |= __vmread(GUEST_FS_LIMIT, &c->fs_limit);
887 error |= __vmread(GUEST_FS_BASE, &c->fs_base);
888 error |= __vmread(GUEST_FS_AR_BYTES, &c->fs_arbytes.bytes);
890 error |= __vmread(GUEST_GS_SELECTOR, &c->gs_sel);
891 error |= __vmread(GUEST_GS_LIMIT, &c->gs_limit);
892 error |= __vmread(GUEST_GS_BASE, &c->gs_base);
893 error |= __vmread(GUEST_GS_AR_BYTES, &c->gs_arbytes.bytes);
895 error |= __vmread(GUEST_TR_SELECTOR, &c->tr_sel);
896 error |= __vmread(GUEST_TR_LIMIT, &c->tr_limit);
897 error |= __vmread(GUEST_TR_BASE, &c->tr_base);
898 error |= __vmread(GUEST_TR_AR_BYTES, &c->tr_arbytes.bytes);
900 error |= __vmread(GUEST_LDTR_SELECTOR, &c->ldtr_sel);
901 error |= __vmread(GUEST_LDTR_LIMIT, &c->ldtr_limit);
902 error |= __vmread(GUEST_LDTR_BASE, &c->ldtr_base);
903 error |= __vmread(GUEST_LDTR_AR_BYTES, &c->ldtr_arbytes.bytes);
905 return !error;
906 }
908 int
909 vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
910 {
911 unsigned long mfn, old_cr4, old_base_mfn;
912 int error = 0;
914 error |= __vmwrite(GUEST_RIP, c->eip);
915 error |= __vmwrite(GUEST_RSP, c->esp);
916 error |= __vmwrite(GUEST_RFLAGS, c->eflags);
918 error |= __vmwrite(CR0_READ_SHADOW, c->cr0);
920 if (!vmx_paging_enabled(v)) {
921 VMX_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
922 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
923 goto skip_cr3;
924 }
926 if (c->cr3 == v->arch.arch_vmx.cpu_cr3) {
927 /*
928 * This is simple TLB flush, implying the guest has
929 * removed some translation or changed page attributes.
930 * We simply invalidate the shadow.
931 */
932 mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
933 if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
934 printk("Invalid CR3 value=%x", c->cr3);
935 domain_crash_synchronous();
936 return 0;
937 }
938 shadow_sync_all(v->domain);
939 } else {
940 /*
941 * If different, make a shadow. Check if the PDBR is valid
942 * first.
943 */
944 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3);
945 if ((c->cr3 >> PAGE_SHIFT) > v->domain->max_pages) {
946 printk("Invalid CR3 value=%x", c->cr3);
947 domain_crash_synchronous();
948 return 0;
949 }
950 mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
951 if(!get_page(pfn_to_page(mfn), v->domain))
952 return 0;
953 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
954 v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
955 if (old_base_mfn)
956 put_page(pfn_to_page(old_base_mfn));
957 update_pagetables(v);
958 /*
959 * arch.shadow_table should now hold the next CR3 for shadow
960 */
961 v->arch.arch_vmx.cpu_cr3 = c->cr3;
962 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
963 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
964 }
966 skip_cr3:
968 error |= __vmread(CR4_READ_SHADOW, &old_cr4);
969 error |= __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
970 error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
972 error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
973 error |= __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
975 error |= __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
976 error |= __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
978 error |= __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
979 error |= __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
980 error |= __vmwrite(GUEST_CS_BASE, c->cs_base);
981 error |= __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes.bytes);
983 error |= __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
984 error |= __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
985 error |= __vmwrite(GUEST_DS_BASE, c->ds_base);
986 error |= __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes.bytes);
988 error |= __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
989 error |= __vmwrite(GUEST_ES_LIMIT, c->es_limit);
990 error |= __vmwrite(GUEST_ES_BASE, c->es_base);
991 error |= __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes.bytes);
993 error |= __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
994 error |= __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
995 error |= __vmwrite(GUEST_SS_BASE, c->ss_base);
996 error |= __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes.bytes);
998 error |= __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
999 error |= __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
1000 error |= __vmwrite(GUEST_FS_BASE, c->fs_base);
1001 error |= __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes.bytes);
1003 error |= __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
1004 error |= __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
1005 error |= __vmwrite(GUEST_GS_BASE, c->gs_base);
1006 error |= __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes.bytes);
1008 error |= __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
1009 error |= __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
1010 error |= __vmwrite(GUEST_TR_BASE, c->tr_base);
1011 error |= __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes.bytes);
1013 error |= __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
1014 error |= __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
1015 error |= __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
1016 error |= __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes);
1018 return !error;
1021 enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE };
1023 int
1024 vmx_assist(struct vcpu *v, int mode)
1026 struct vmx_assist_context c;
1027 u32 magic;
1028 u32 cp;
1030 /* make sure vmxassist exists (this is not an error) */
1031 if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), VMX_COPY_IN))
1032 return 0;
1033 if (magic != VMXASSIST_MAGIC)
1034 return 0;
1036 switch (mode) {
1037 /*
1038 * Transfer control to vmxassist.
1039 * Store the current context in VMXASSIST_OLD_CONTEXT and load
1040 * the new VMXASSIST_NEW_CONTEXT context. This context was created
1041 * by vmxassist and will transfer control to it.
1042 */
1043 case VMX_ASSIST_INVOKE:
1044 /* save the old context */
1045 if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN))
1046 goto error;
1047 if (cp != 0) {
1048 if (!vmx_world_save(v, &c))
1049 goto error;
1050 if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_OUT))
1051 goto error;
1054 /* restore the new context, this should activate vmxassist */
1055 if (!vmx_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), VMX_COPY_IN))
1056 goto error;
1057 if (cp != 0) {
1058 if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN))
1059 goto error;
1060 if (!vmx_world_restore(v, &c))
1061 goto error;
1062 return 1;
1064 break;
1066 /*
1067 * Restore the VMXASSIST_OLD_CONTEXT that was saved by VMX_ASSIST_INVOKE
1068 * above.
1069 */
1070 case VMX_ASSIST_RESTORE:
1071 /* save the old context */
1072 if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN))
1073 goto error;
1074 if (cp != 0) {
1075 if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN))
1076 goto error;
1077 if (!vmx_world_restore(v, &c))
1078 goto error;
1079 return 1;
1081 break;
1084 error:
1085 printf("Failed to transfer to vmxassist\n");
1086 domain_crash_synchronous();
1087 return 0;
1090 static int vmx_set_cr0(unsigned long value)
1092 struct vcpu *v = current;
1093 unsigned long mfn;
1094 unsigned long eip;
1095 int paging_enabled;
1096 unsigned long vm_entry_value;
1097 unsigned long old_cr0;
1099 /*
1100 * CR0: We don't want to lose PE and PG.
1101 */
1102 __vmread_vcpu(v, CR0_READ_SHADOW, &old_cr0);
1103 paging_enabled = (old_cr0 & X86_CR0_PE) && (old_cr0 & X86_CR0_PG);
1104 /* If OS don't use clts to clear TS bit...*/
1105 if((old_cr0 & X86_CR0_TS) && !(value & X86_CR0_TS))
1107 clts();
1108 setup_fpu(v);
1112 __vmwrite(GUEST_CR0, value | X86_CR0_PE | X86_CR0_PG | X86_CR0_NE);
1113 __vmwrite(CR0_READ_SHADOW, value);
1115 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1117 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) {
1118 /*
1119 * The guest CR3 must be pointing to the guest physical.
1120 */
1121 if ( !VALID_MFN(mfn = get_mfn_from_pfn(
1122 v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
1123 !get_page(pfn_to_page(mfn), v->domain) )
1125 printk("Invalid CR3 value = %lx", v->arch.arch_vmx.cpu_cr3);
1126 domain_crash_synchronous(); /* need to take a clean path */
1129 #if defined(__x86_64__)
1130 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
1131 &v->arch.arch_vmx.cpu_state) &&
1132 !test_bit(VMX_CPU_STATE_PAE_ENABLED,
1133 &v->arch.arch_vmx.cpu_state)){
1134 VMX_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1135 vmx_inject_exception(v, TRAP_gp_fault, 0);
1137 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
1138 &v->arch.arch_vmx.cpu_state)){
1139 /* Here the PAE is should to be opened */
1140 VMX_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1141 set_bit(VMX_CPU_STATE_LMA_ENABLED,
1142 &v->arch.arch_vmx.cpu_state);
1143 __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
1144 vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE;
1145 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
1147 #if CONFIG_PAGING_LEVELS >= 4
1148 if(!shadow_set_guest_paging_levels(v->domain, 4)) {
1149 printk("Unsupported guest paging levels\n");
1150 domain_crash_synchronous(); /* need to take a clean path */
1152 #endif
1154 else
1156 #if CONFIG_PAGING_LEVELS >= 4
1157 if(!shadow_set_guest_paging_levels(v->domain, 2)) {
1158 printk("Unsupported guest paging levels\n");
1159 domain_crash_synchronous(); /* need to take a clean path */
1161 #endif
1165 unsigned long crn;
1166 /* update CR4's PAE if needed */
1167 __vmread(GUEST_CR4, &crn);
1168 if ( (!(crn & X86_CR4_PAE)) &&
1169 test_bit(VMX_CPU_STATE_PAE_ENABLED,
1170 &v->arch.arch_vmx.cpu_state) )
1172 VMX_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n");
1173 __vmwrite(GUEST_CR4, crn | X86_CR4_PAE);
1176 #endif
1177 /*
1178 * Now arch.guest_table points to machine physical.
1179 */
1180 v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
1181 update_pagetables(v);
1183 VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1184 (unsigned long) (mfn << PAGE_SHIFT));
1186 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1187 /*
1188 * arch->shadow_table should hold the next CR3 for shadow
1189 */
1190 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1191 v->arch.arch_vmx.cpu_cr3, mfn);
1194 if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled)
1195 if(v->arch.arch_vmx.cpu_cr3){
1196 put_page(pfn_to_page(get_mfn_from_pfn(
1197 v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)));
1198 v->arch.guest_table = mk_pagetable(0);
1201 /*
1202 * VMX does not implement real-mode virtualization. We emulate
1203 * real-mode by performing a world switch to VMXAssist whenever
1204 * a partition disables the CR0.PE bit.
1205 */
1206 if ((value & X86_CR0_PE) == 0) {
1207 if ( value & X86_CR0_PG ) {
1208 /* inject GP here */
1209 vmx_inject_exception(v, TRAP_gp_fault, 0);
1210 return 0;
1211 } else {
1212 /*
1213 * Disable paging here.
1214 * Same to PE == 1 && PG == 0
1215 */
1216 if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
1217 &v->arch.arch_vmx.cpu_state)){
1218 clear_bit(VMX_CPU_STATE_LMA_ENABLED,
1219 &v->arch.arch_vmx.cpu_state);
1220 __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
1221 vm_entry_value &= ~VM_ENTRY_CONTROLS_IA32E_MODE;
1222 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
1226 if (vmx_assist(v, VMX_ASSIST_INVOKE)) {
1227 set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.arch_vmx.cpu_state);
1228 __vmread(GUEST_RIP, &eip);
1229 VMX_DBG_LOG(DBG_LEVEL_1,
1230 "Transfering control to vmxassist %%eip 0x%lx\n", eip);
1231 return 0; /* do not update eip! */
1233 } else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
1234 &v->arch.arch_vmx.cpu_state)) {
1235 __vmread(GUEST_RIP, &eip);
1236 VMX_DBG_LOG(DBG_LEVEL_1,
1237 "Enabling CR0.PE at %%eip 0x%lx\n", eip);
1238 if (vmx_assist(v, VMX_ASSIST_RESTORE)) {
1239 clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
1240 &v->arch.arch_vmx.cpu_state);
1241 __vmread(GUEST_RIP, &eip);
1242 VMX_DBG_LOG(DBG_LEVEL_1,
1243 "Restoring to %%eip 0x%lx\n", eip);
1244 return 0; /* do not update eip! */
1248 return 1;
1251 #define CASE_GET_REG(REG, reg) \
1252 case REG_ ## REG: value = regs->reg; break
1254 #define CASE_EXTEND_SET_REG \
1255 CASE_EXTEND_REG(S)
1256 #define CASE_EXTEND_GET_REG \
1257 CASE_EXTEND_REG(G)
1259 #ifdef __i386__
1260 #define CASE_EXTEND_REG(T)
1261 #else
1262 #define CASE_EXTEND_REG(T) \
1263 CASE_ ## T ## ET_REG(R8, r8); \
1264 CASE_ ## T ## ET_REG(R9, r9); \
1265 CASE_ ## T ## ET_REG(R10, r10); \
1266 CASE_ ## T ## ET_REG(R11, r11); \
1267 CASE_ ## T ## ET_REG(R12, r12); \
1268 CASE_ ## T ## ET_REG(R13, r13); \
1269 CASE_ ## T ## ET_REG(R14, r14); \
1270 CASE_ ## T ## ET_REG(R15, r15);
1271 #endif
1274 /*
1275 * Write to control registers
1276 */
1277 static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
1279 unsigned long value;
1280 unsigned long old_cr;
1281 struct vcpu *v = current;
1283 switch (gp) {
1284 CASE_GET_REG(EAX, eax);
1285 CASE_GET_REG(ECX, ecx);
1286 CASE_GET_REG(EDX, edx);
1287 CASE_GET_REG(EBX, ebx);
1288 CASE_GET_REG(EBP, ebp);
1289 CASE_GET_REG(ESI, esi);
1290 CASE_GET_REG(EDI, edi);
1291 CASE_EXTEND_GET_REG
1292 case REG_ESP:
1293 __vmread(GUEST_RSP, &value);
1294 break;
1295 default:
1296 printk("invalid gp: %d\n", gp);
1297 __vmx_bug(regs);
1300 VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1301 VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1303 switch(cr) {
1304 case 0:
1306 return vmx_set_cr0(value);
1308 case 3:
1310 unsigned long old_base_mfn, mfn;
1312 /*
1313 * If paging is not enabled yet, simply copy the value to CR3.
1314 */
1315 if (!vmx_paging_enabled(v)) {
1316 v->arch.arch_vmx.cpu_cr3 = value;
1317 break;
1320 /*
1321 * We make a new one if the shadow does not exist.
1322 */
1323 if (value == v->arch.arch_vmx.cpu_cr3) {
1324 /*
1325 * This is simple TLB flush, implying the guest has
1326 * removed some translation or changed page attributes.
1327 * We simply invalidate the shadow.
1328 */
1329 mfn = get_mfn_from_pfn(value >> PAGE_SHIFT);
1330 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1331 __vmx_bug(regs);
1332 shadow_sync_all(v->domain);
1333 } else {
1334 /*
1335 * If different, make a shadow. Check if the PDBR is valid
1336 * first.
1337 */
1338 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1339 if ( ((value >> PAGE_SHIFT) > v->domain->max_pages ) ||
1340 !VALID_MFN(mfn = get_mfn_from_pfn(value >> PAGE_SHIFT)) ||
1341 !get_page(pfn_to_page(mfn), v->domain) )
1343 printk("Invalid CR3 value=%lx", value);
1344 domain_crash_synchronous(); /* need to take a clean path */
1346 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1347 v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
1348 if (old_base_mfn)
1349 put_page(pfn_to_page(old_base_mfn));
1350 update_pagetables(v);
1351 /*
1352 * arch.shadow_table should now hold the next CR3 for shadow
1353 */
1354 v->arch.arch_vmx.cpu_cr3 = value;
1355 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
1356 value);
1357 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1359 break;
1361 case 4: /* CR4 */
1363 if (value & X86_CR4_PAE){
1364 set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.arch_vmx.cpu_state);
1365 } else {
1366 if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
1367 &v->arch.arch_vmx.cpu_state)){
1368 vmx_inject_exception(v, TRAP_gp_fault, 0);
1370 clear_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.arch_vmx.cpu_state);
1373 __vmread(CR4_READ_SHADOW, &old_cr);
1375 __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
1376 __vmwrite(CR4_READ_SHADOW, value);
1378 /*
1379 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1380 * all TLB entries except global entries.
1381 */
1382 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
1383 shadow_sync_all(v->domain);
1385 break;
1387 default:
1388 printk("invalid cr: %d\n", gp);
1389 __vmx_bug(regs);
1392 return 1;
1395 #define CASE_SET_REG(REG, reg) \
1396 case REG_ ## REG: \
1397 regs->reg = value; \
1398 break
1400 /*
1401 * Read from control registers. CR0 and CR4 are read from the shadow.
1402 */
1403 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1405 unsigned long value;
1406 struct vcpu *v = current;
1408 if (cr != 3)
1409 __vmx_bug(regs);
1411 value = (unsigned long) v->arch.arch_vmx.cpu_cr3;
1413 switch (gp) {
1414 CASE_SET_REG(EAX, eax);
1415 CASE_SET_REG(ECX, ecx);
1416 CASE_SET_REG(EDX, edx);
1417 CASE_SET_REG(EBX, ebx);
1418 CASE_SET_REG(EBP, ebp);
1419 CASE_SET_REG(ESI, esi);
1420 CASE_SET_REG(EDI, edi);
1421 CASE_EXTEND_SET_REG
1422 case REG_ESP:
1423 __vmwrite(GUEST_RSP, value);
1424 regs->esp = value;
1425 break;
1426 default:
1427 printk("invalid gp: %d\n", gp);
1428 __vmx_bug(regs);
1431 VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1434 static int vmx_cr_access(unsigned long exit_qualification, struct cpu_user_regs *regs)
1436 unsigned int gp, cr;
1437 unsigned long value;
1438 struct vcpu *v = current;
1440 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
1441 case TYPE_MOV_TO_CR:
1442 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1443 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1444 TRACE_VMEXIT(1,TYPE_MOV_TO_CR);
1445 TRACE_VMEXIT(2,cr);
1446 TRACE_VMEXIT(3,gp);
1447 return mov_to_cr(gp, cr, regs);
1448 case TYPE_MOV_FROM_CR:
1449 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1450 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1451 TRACE_VMEXIT(1,TYPE_MOV_FROM_CR);
1452 TRACE_VMEXIT(2,cr);
1453 TRACE_VMEXIT(3,gp);
1454 mov_from_cr(cr, gp, regs);
1455 break;
1456 case TYPE_CLTS:
1457 TRACE_VMEXIT(1,TYPE_CLTS);
1458 clts();
1459 setup_fpu(current);
1461 __vmread_vcpu(v, GUEST_CR0, &value);
1462 value &= ~X86_CR0_TS; /* clear TS */
1463 __vmwrite(GUEST_CR0, value);
1465 __vmread_vcpu(v, CR0_READ_SHADOW, &value);
1466 value &= ~X86_CR0_TS; /* clear TS */
1467 __vmwrite(CR0_READ_SHADOW, value);
1468 break;
1469 case TYPE_LMSW:
1470 TRACE_VMEXIT(1,TYPE_LMSW);
1471 __vmread_vcpu(v, CR0_READ_SHADOW, &value);
1472 value = (value & ~0xF) |
1473 (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
1474 return vmx_set_cr0(value);
1475 break;
1476 default:
1477 __vmx_bug(regs);
1478 break;
1480 return 1;
1483 static inline void vmx_do_msr_read(struct cpu_user_regs *regs)
1485 u64 msr_content = 0;
1486 struct vcpu *v = current;
1488 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
1489 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1490 (unsigned long)regs->edx);
1491 switch (regs->ecx) {
1492 case MSR_IA32_TIME_STAMP_COUNTER:
1494 struct vmx_virpit *vpit;
1496 rdtscll(msr_content);
1497 vpit = &(v->domain->arch.vmx_platform.vmx_pit);
1498 msr_content += vpit->shift;
1499 break;
1501 case MSR_IA32_SYSENTER_CS:
1502 __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
1503 break;
1504 case MSR_IA32_SYSENTER_ESP:
1505 __vmread(GUEST_SYSENTER_ESP, &msr_content);
1506 break;
1507 case MSR_IA32_SYSENTER_EIP:
1508 __vmread(GUEST_SYSENTER_EIP, &msr_content);
1509 break;
1510 case MSR_IA32_APICBASE:
1511 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
1512 break;
1513 default:
1514 if(long_mode_do_msr_read(regs))
1515 return;
1516 rdmsr_user(regs->ecx, regs->eax, regs->edx);
1517 break;
1520 regs->eax = msr_content & 0xFFFFFFFF;
1521 regs->edx = msr_content >> 32;
1523 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
1524 "ecx=%lx, eax=%lx, edx=%lx",
1525 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1526 (unsigned long)regs->edx);
1529 static inline void vmx_do_msr_write(struct cpu_user_regs *regs)
1531 u64 msr_content;
1532 struct vcpu *v = current;
1534 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write: ecx=%lx, eax=%lx, edx=%lx",
1535 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1536 (unsigned long)regs->edx);
1538 msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
1540 switch (regs->ecx) {
1541 case MSR_IA32_TIME_STAMP_COUNTER:
1543 struct vmx_virpit *vpit;
1544 u64 host_tsc, drift;
1546 rdtscll(host_tsc);
1547 vpit = &(v->domain->arch.vmx_platform.vmx_pit);
1548 drift = v->arch.arch_vmx.tsc_offset - vpit->shift;
1549 vpit->shift = msr_content - host_tsc;
1550 v->arch.arch_vmx.tsc_offset = vpit->shift + drift;
1551 __vmwrite(TSC_OFFSET, vpit->shift);
1553 #if defined (__i386__)
1554 __vmwrite(TSC_OFFSET_HIGH, ((vpit->shift)>>32));
1555 #endif
1556 break;
1558 case MSR_IA32_SYSENTER_CS:
1559 __vmwrite(GUEST_SYSENTER_CS, msr_content);
1560 break;
1561 case MSR_IA32_SYSENTER_ESP:
1562 __vmwrite(GUEST_SYSENTER_ESP, msr_content);
1563 break;
1564 case MSR_IA32_SYSENTER_EIP:
1565 __vmwrite(GUEST_SYSENTER_EIP, msr_content);
1566 break;
1567 case MSR_IA32_APICBASE:
1568 vlapic_msr_set(VLAPIC(v), msr_content);
1569 break;
1570 default:
1571 long_mode_do_msr_write(regs);
1572 break;
1575 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write returns: "
1576 "ecx=%lx, eax=%lx, edx=%lx",
1577 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1578 (unsigned long)regs->edx);
1581 /*
1582 * Need to use this exit to reschedule
1583 */
1584 void vmx_vmexit_do_hlt(void)
1586 struct vcpu *v=current;
1587 struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit);
1588 s_time_t next_pit=-1,next_wakeup;
1590 if ( !v->vcpu_id ) {
1591 next_pit = get_pit_scheduled(v,vpit);
1593 next_wakeup = get_apictime_scheduled(v);
1594 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) {
1595 next_wakeup = next_pit;
1597 if ( next_wakeup != - 1 )
1598 set_ac_timer(&current->arch.arch_vmx.hlt_timer, next_wakeup);
1599 do_block();
1602 static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs)
1604 unsigned int vector;
1605 int error;
1607 asmlinkage void do_IRQ(struct cpu_user_regs *);
1608 void smp_apic_timer_interrupt(struct cpu_user_regs *);
1609 void timer_interrupt(int, void *, struct cpu_user_regs *);
1610 void smp_event_check_interrupt(void);
1611 void smp_invalidate_interrupt(void);
1612 void smp_call_function_interrupt(void);
1613 void smp_spurious_interrupt(struct cpu_user_regs *regs);
1614 void smp_error_interrupt(struct cpu_user_regs *regs);
1616 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
1617 && !(vector & INTR_INFO_VALID_MASK))
1618 __vmx_bug(regs);
1620 vector &= 0xff;
1621 local_irq_disable();
1623 switch(vector) {
1624 case LOCAL_TIMER_VECTOR:
1625 smp_apic_timer_interrupt(regs);
1626 break;
1627 case EVENT_CHECK_VECTOR:
1628 smp_event_check_interrupt();
1629 break;
1630 case INVALIDATE_TLB_VECTOR:
1631 smp_invalidate_interrupt();
1632 break;
1633 case CALL_FUNCTION_VECTOR:
1634 smp_call_function_interrupt();
1635 break;
1636 case SPURIOUS_APIC_VECTOR:
1637 smp_spurious_interrupt(regs);
1638 break;
1639 case ERROR_APIC_VECTOR:
1640 smp_error_interrupt(regs);
1641 break;
1642 default:
1643 regs->entry_vector = vector;
1644 do_IRQ(regs);
1645 break;
1649 #define BUF_SIZ 256
1650 #define MAX_LINE 80
1651 char print_buf[BUF_SIZ];
1652 static int index;
1654 static void vmx_print_line(const char c, struct vcpu *v)
1657 if (index == MAX_LINE || c == '\n') {
1658 if (index == MAX_LINE) {
1659 print_buf[index++] = c;
1661 print_buf[index] = '\0';
1662 printk("(GUEST: %u) %s\n", v->domain->domain_id, (char *) &print_buf);
1663 index = 0;
1665 else
1666 print_buf[index++] = c;
1669 void save_vmx_cpu_user_regs(struct cpu_user_regs *ctxt)
1671 __vmread(GUEST_SS_SELECTOR, &ctxt->ss);
1672 __vmread(GUEST_RSP, &ctxt->esp);
1673 __vmread(GUEST_RFLAGS, &ctxt->eflags);
1674 __vmread(GUEST_CS_SELECTOR, &ctxt->cs);
1675 __vmread(GUEST_RIP, &ctxt->eip);
1677 __vmread(GUEST_GS_SELECTOR, &ctxt->gs);
1678 __vmread(GUEST_FS_SELECTOR, &ctxt->fs);
1679 __vmread(GUEST_ES_SELECTOR, &ctxt->es);
1680 __vmread(GUEST_DS_SELECTOR, &ctxt->ds);
1683 #ifdef XEN_DEBUGGER
1684 void save_cpu_user_regs(struct cpu_user_regs *regs)
1686 __vmread(GUEST_SS_SELECTOR, &regs->xss);
1687 __vmread(GUEST_RSP, &regs->esp);
1688 __vmread(GUEST_RFLAGS, &regs->eflags);
1689 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
1690 __vmread(GUEST_RIP, &regs->eip);
1692 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
1693 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
1694 __vmread(GUEST_ES_SELECTOR, &regs->xes);
1695 __vmread(GUEST_DS_SELECTOR, &regs->xds);
1698 void restore_cpu_user_regs(struct cpu_user_regs *regs)
1700 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
1701 __vmwrite(GUEST_RSP, regs->esp);
1702 __vmwrite(GUEST_RFLAGS, regs->eflags);
1703 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
1704 __vmwrite(GUEST_RIP, regs->eip);
1706 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
1707 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
1708 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
1709 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
1711 #endif
1713 asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs)
1715 unsigned int exit_reason, idtv_info_field;
1716 unsigned long exit_qualification, eip, inst_len = 0;
1717 struct vcpu *v = current;
1718 int error;
1720 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
1721 __vmx_bug(&regs);
1723 perfc_incra(vmexits, exit_reason);
1725 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
1726 if (idtv_info_field & INTR_INFO_VALID_MASK) {
1727 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
1729 __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
1730 if (inst_len >= 1 && inst_len <= 15)
1731 __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
1733 if (idtv_info_field & 0x800) { /* valid error code */
1734 unsigned long error_code;
1735 __vmread(IDT_VECTORING_ERROR_CODE, &error_code);
1736 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
1739 VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
1742 /* don't bother H/W interrutps */
1743 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
1744 exit_reason != EXIT_REASON_VMCALL &&
1745 exit_reason != EXIT_REASON_IO_INSTRUCTION)
1746 VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
1748 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
1749 printk("Failed vm entry\n");
1750 domain_crash_synchronous();
1751 return;
1755 __vmread(GUEST_RIP, &eip);
1756 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
1757 TRACE_VMEXIT(0,exit_reason);
1760 switch (exit_reason) {
1761 case EXIT_REASON_EXCEPTION_NMI:
1763 /*
1764 * We don't set the software-interrupt exiting (INT n).
1765 * (1) We can get an exception (e.g. #PG) in the guest, or
1766 * (2) NMI
1767 */
1768 int error;
1769 unsigned int vector;
1770 unsigned long va;
1772 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
1773 || !(vector & INTR_INFO_VALID_MASK))
1774 __vmx_bug(&regs);
1775 vector &= 0xff;
1777 TRACE_VMEXIT(1,vector);
1778 perfc_incra(cause_vector, vector);
1780 TRACE_3D(TRC_VMX_VECTOR, v->domain->domain_id, eip, vector);
1781 switch (vector) {
1782 #ifdef XEN_DEBUGGER
1783 case TRAP_debug:
1785 save_cpu_user_regs(&regs);
1786 pdb_handle_exception(1, &regs, 1);
1787 restore_cpu_user_regs(&regs);
1788 break;
1790 case TRAP_int3:
1792 save_cpu_user_regs(&regs);
1793 pdb_handle_exception(3, &regs, 1);
1794 restore_cpu_user_regs(&regs);
1795 break;
1797 #else
1798 case TRAP_debug:
1800 void store_cpu_user_regs(struct cpu_user_regs *regs);
1801 long do_sched_op(unsigned long op);
1804 store_cpu_user_regs(&regs);
1805 __vm_clear_bit(GUEST_PENDING_DBG_EXCEPTIONS, PENDING_DEBUG_EXC_BS);
1807 domain_pause_for_debugger();
1808 do_sched_op(SCHEDOP_yield);
1810 break;
1812 #endif
1813 case TRAP_no_device:
1815 vmx_do_no_device_fault();
1816 break;
1818 case TRAP_page_fault:
1820 __vmread(EXIT_QUALIFICATION, &va);
1821 __vmread(VM_EXIT_INTR_ERROR_CODE, &regs.error_code);
1823 TRACE_VMEXIT(3,regs.error_code);
1824 TRACE_VMEXIT(4,va);
1826 VMX_DBG_LOG(DBG_LEVEL_VMMU,
1827 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1828 (unsigned long)regs.eax, (unsigned long)regs.ebx,
1829 (unsigned long)regs.ecx, (unsigned long)regs.edx,
1830 (unsigned long)regs.esi, (unsigned long)regs.edi);
1831 v->arch.arch_vmx.mmio_op.inst_decoder_regs = &regs;
1833 if (!(error = vmx_do_page_fault(va, &regs))) {
1834 /*
1835 * Inject #PG using Interruption-Information Fields
1836 */
1837 vmx_inject_exception(v, TRAP_page_fault, regs.error_code);
1838 v->arch.arch_vmx.cpu_cr2 = va;
1839 TRACE_3D(TRC_VMX_INT, v->domain->domain_id, TRAP_page_fault, va);
1841 break;
1843 case TRAP_nmi:
1844 do_nmi(&regs, 0);
1845 break;
1846 default:
1847 vmx_reflect_exception(v);
1848 break;
1850 break;
1852 case EXIT_REASON_EXTERNAL_INTERRUPT:
1853 vmx_vmexit_do_extint(&regs);
1854 break;
1855 case EXIT_REASON_PENDING_INTERRUPT:
1856 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
1857 MONITOR_CPU_BASED_EXEC_CONTROLS);
1858 break;
1859 case EXIT_REASON_TASK_SWITCH:
1860 __vmx_bug(&regs);
1861 break;
1862 case EXIT_REASON_CPUID:
1863 __get_instruction_length(inst_len);
1864 vmx_vmexit_do_cpuid(regs.eax, &regs);
1865 __update_guest_eip(inst_len);
1866 break;
1867 case EXIT_REASON_HLT:
1868 __get_instruction_length(inst_len);
1869 __update_guest_eip(inst_len);
1870 vmx_vmexit_do_hlt();
1871 break;
1872 case EXIT_REASON_INVLPG:
1874 unsigned long va;
1876 __vmread(EXIT_QUALIFICATION, &va);
1877 vmx_vmexit_do_invlpg(va);
1878 __get_instruction_length(inst_len);
1879 __update_guest_eip(inst_len);
1880 break;
1882 case EXIT_REASON_VMCALL:
1883 __get_instruction_length(inst_len);
1884 __vmread(GUEST_RIP, &eip);
1885 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1887 vmx_print_line(regs.eax, v); /* provides the current domain */
1888 __update_guest_eip(inst_len);
1889 break;
1890 case EXIT_REASON_CR_ACCESS:
1892 __vmread(GUEST_RIP, &eip);
1893 __get_instruction_length(inst_len);
1894 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1896 VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx",
1897 eip, inst_len, exit_qualification);
1898 if (vmx_cr_access(exit_qualification, &regs))
1899 __update_guest_eip(inst_len);
1900 TRACE_VMEXIT(3,regs.error_code);
1901 TRACE_VMEXIT(4,exit_qualification);
1902 break;
1904 case EXIT_REASON_DR_ACCESS:
1905 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1906 vmx_dr_access(exit_qualification, &regs);
1907 __get_instruction_length(inst_len);
1908 __update_guest_eip(inst_len);
1909 break;
1910 case EXIT_REASON_IO_INSTRUCTION:
1911 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1912 __get_instruction_length(inst_len);
1913 vmx_io_instruction(&regs, exit_qualification, inst_len);
1914 TRACE_VMEXIT(4,exit_qualification);
1915 break;
1916 case EXIT_REASON_MSR_READ:
1917 __get_instruction_length(inst_len);
1918 vmx_do_msr_read(&regs);
1919 __update_guest_eip(inst_len);
1920 break;
1921 case EXIT_REASON_MSR_WRITE:
1922 __vmread(GUEST_RIP, &eip);
1923 vmx_do_msr_write(&regs);
1924 __get_instruction_length(inst_len);
1925 __update_guest_eip(inst_len);
1926 break;
1927 case EXIT_REASON_MWAIT_INSTRUCTION:
1928 __vmx_bug(&regs);
1929 break;
1930 default:
1931 __vmx_bug(&regs); /* should not happen */
1935 asmlinkage void load_cr2(void)
1937 struct vcpu *v = current;
1939 local_irq_disable();
1940 #ifdef __i386__
1941 asm volatile("movl %0,%%cr2": :"r" (v->arch.arch_vmx.cpu_cr2));
1942 #else
1943 asm volatile("movq %0,%%cr2": :"r" (v->arch.arch_vmx.cpu_cr2));
1944 #endif
1947 asmlinkage void trace_vmentry (void)
1949 TRACE_5D(TRC_VMENTRY,trace_values[current->processor][0],
1950 trace_values[current->processor][1],trace_values[current->processor][2],
1951 trace_values[current->processor][3],trace_values[current->processor][4]);
1952 TRACE_VMEXIT(0,9);
1953 TRACE_VMEXIT(1,9);
1954 TRACE_VMEXIT(2,9);
1955 TRACE_VMEXIT(3,9);
1956 TRACE_VMEXIT(4,9);
1957 return;
1959 asmlinkage void trace_vmexit (void)
1961 TRACE_3D(TRC_VMEXIT,0,0,0);
1962 return;
1964 #endif /* CONFIG_VMX */
1966 /*
1967 * Local variables:
1968 * mode: C
1969 * c-set-style: "BSD"
1970 * c-basic-offset: 4
1971 * tab-width: 4
1972 * indent-tabs-mode: nil
1973 * End:
1974 */