direct-io.hg

view xen/arch/x86/vmx.c @ 8379:e7977d6aa6ba

Unmap io shared page only if it is properly initialized.

Signed-off-by: Xin Li <xin.b.li@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Dec 15 20:43:44 2005 +0100 (2005-12-15)
parents fb1f2498ce1e
children 6ee4c16bfdec
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/domain_page.h>
28 #include <asm/current.h>
29 #include <asm/io.h>
30 #include <asm/shadow.h>
31 #include <asm/regs.h>
32 #include <asm/cpufeature.h>
33 #include <asm/processor.h>
34 #include <asm/types.h>
35 #include <asm/msr.h>
36 #include <asm/spinlock.h>
37 #include <asm/vmx.h>
38 #include <asm/vmx_vmcs.h>
39 #include <asm/vmx_intercept.h>
40 #include <asm/shadow.h>
41 #if CONFIG_PAGING_LEVELS >= 3
42 #include <asm/shadow_64.h>
43 #endif
44 #include <public/sched.h>
45 #include <public/io/ioreq.h>
46 #include <asm/vmx_vpic.h>
47 #include <asm/vmx_vlapic.h>
49 int hvm_enabled;
51 #ifdef CONFIG_VMX
52 unsigned int opt_vmx_debug_level = 0;
53 integer_param("vmx_debug", opt_vmx_debug_level);
55 static unsigned long trace_values[NR_CPUS][4];
56 #define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
58 static int vmx_switch_on;
60 void vmx_final_setup_guest(struct vcpu *v)
61 {
62 v->arch.schedule_tail = arch_vmx_do_launch;
64 if ( v->vcpu_id == 0 )
65 {
66 struct domain *d = v->domain;
67 struct vcpu *vc;
69 d->arch.vmx_platform.lapic_enable = v->arch.guest_context.user_regs.ecx;
70 v->arch.guest_context.user_regs.ecx = 0;
71 VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "lapic enable is %d.\n",
72 d->arch.vmx_platform.lapic_enable);
74 /* Initialize monitor page table */
75 for_each_vcpu(d, vc)
76 vc->arch.monitor_table = mk_pagetable(0);
78 /*
79 * Required to do this once per domain
80 * XXX todo: add a seperate function to do these.
81 */
82 memset(&d->shared_info->evtchn_mask[0], 0xff,
83 sizeof(d->shared_info->evtchn_mask));
85 /* Put the domain in shadow mode even though we're going to be using
86 * the shared 1:1 page table initially. It shouldn't hurt */
87 shadow_mode_enable(d,
88 SHM_enable|SHM_refcounts|
89 SHM_translate|SHM_external|SHM_wr_pt_pte);
90 }
92 vmx_switch_on = 1;
93 }
95 void vmx_relinquish_resources(struct vcpu *v)
96 {
97 struct vmx_virpit *vpit;
99 if ( !VMX_DOMAIN(v) )
100 return;
102 if (v->vcpu_id == 0) {
103 /* unmap IO shared page */
104 struct domain *d = v->domain;
105 if ( d->arch.vmx_platform.shared_page_va )
106 unmap_domain_page((void *)d->arch.vmx_platform.shared_page_va);
107 }
109 destroy_vmcs(&v->arch.arch_vmx);
110 free_monitor_pagetable(v);
111 vpit = &v->domain->arch.vmx_platform.vmx_pit;
112 if ( active_ac_timer(&(vpit->pit_timer)) )
113 rem_ac_timer(&vpit->pit_timer);
114 if ( active_ac_timer(&v->arch.arch_vmx.hlt_timer) )
115 rem_ac_timer(&v->arch.arch_vmx.hlt_timer);
116 if ( vmx_apic_support(v->domain) && (VLAPIC(v) != NULL) )
117 {
118 rem_ac_timer(&VLAPIC(v)->vlapic_timer);
119 xfree(VLAPIC(v));
120 }
121 }
123 #ifdef __x86_64__
124 static struct msr_state percpu_msr[NR_CPUS];
126 static u32 msr_data_index[VMX_MSR_COUNT] =
127 {
128 MSR_LSTAR, MSR_STAR, MSR_CSTAR,
129 MSR_SYSCALL_MASK, MSR_EFER,
130 };
132 /*
133 * To avoid MSR save/restore at every VM exit/entry time, we restore
134 * the x86_64 specific MSRs at domain switch time. Since those MSRs are
135 * are not modified once set for generic domains, we don't save them,
136 * but simply reset them to the values set at percpu_traps_init().
137 */
138 void vmx_load_msrs(struct vcpu *n)
139 {
140 struct msr_state *host_state = &percpu_msr[smp_processor_id()];
141 int i;
143 if ( !vmx_switch_on )
144 return;
146 while ( host_state->flags )
147 {
148 i = find_first_set_bit(host_state->flags);
149 wrmsrl(msr_data_index[i], host_state->msr_items[i]);
150 clear_bit(i, &host_state->flags);
151 }
152 }
154 static void vmx_save_init_msrs(void)
155 {
156 struct msr_state *host_state = &percpu_msr[smp_processor_id()];
157 int i;
159 for ( i = 0; i < VMX_MSR_COUNT; i++ )
160 rdmsrl(msr_data_index[i], host_state->msr_items[i]);
161 }
163 #define CASE_READ_MSR(address) \
164 case MSR_ ## address: \
165 msr_content = msr->msr_items[VMX_INDEX_MSR_ ## address]; \
166 break
168 #define CASE_WRITE_MSR(address) \
169 case MSR_ ## address: \
170 { \
171 msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content; \
172 if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)) { \
173 set_bit(VMX_INDEX_MSR_ ## address, &msr->flags); \
174 } \
175 wrmsrl(MSR_ ## address, msr_content); \
176 set_bit(VMX_INDEX_MSR_ ## address, &host_state->flags); \
177 } \
178 break
180 #define IS_CANO_ADDRESS(add) 1
181 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
182 {
183 u64 msr_content = 0;
184 struct vcpu *vc = current;
185 struct msr_state * msr = &vc->arch.arch_vmx.msr_content;
186 switch(regs->ecx){
187 case MSR_EFER:
188 msr_content = msr->msr_items[VMX_INDEX_MSR_EFER];
189 VMX_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %llx\n", (unsigned long long)msr_content);
190 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
191 &vc->arch.arch_vmx.cpu_state))
192 msr_content |= 1 << _EFER_LME;
194 if (VMX_LONG_GUEST(vc))
195 msr_content |= 1 << _EFER_LMA;
196 break;
197 case MSR_FS_BASE:
198 if (!(VMX_LONG_GUEST(vc)))
199 /* XXX should it be GP fault */
200 domain_crash_synchronous();
201 __vmread(GUEST_FS_BASE, &msr_content);
202 break;
203 case MSR_GS_BASE:
204 if (!(VMX_LONG_GUEST(vc)))
205 domain_crash_synchronous();
206 __vmread(GUEST_GS_BASE, &msr_content);
207 break;
208 case MSR_SHADOW_GS_BASE:
209 msr_content = msr->shadow_gs;
210 break;
212 CASE_READ_MSR(STAR);
213 CASE_READ_MSR(LSTAR);
214 CASE_READ_MSR(CSTAR);
215 CASE_READ_MSR(SYSCALL_MASK);
216 default:
217 return 0;
218 }
219 VMX_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %lx\n", msr_content);
220 regs->eax = msr_content & 0xffffffff;
221 regs->edx = msr_content >> 32;
222 return 1;
223 }
225 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
226 {
227 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
228 struct vcpu *vc = current;
229 struct msr_state * msr = &vc->arch.arch_vmx.msr_content;
230 struct msr_state * host_state =
231 &percpu_msr[smp_processor_id()];
233 VMX_DBG_LOG(DBG_LEVEL_1, " mode_do_msr_write msr %lx msr_content %lx\n",
234 regs->ecx, msr_content);
236 switch (regs->ecx){
237 case MSR_EFER:
238 if ((msr_content & EFER_LME) ^
239 test_bit(VMX_CPU_STATE_LME_ENABLED,
240 &vc->arch.arch_vmx.cpu_state)){
241 if (test_bit(VMX_CPU_STATE_PG_ENABLED,
242 &vc->arch.arch_vmx.cpu_state) ||
243 !test_bit(VMX_CPU_STATE_PAE_ENABLED,
244 &vc->arch.arch_vmx.cpu_state)){
245 vmx_inject_exception(vc, TRAP_gp_fault, 0);
246 }
247 }
248 if (msr_content & EFER_LME)
249 set_bit(VMX_CPU_STATE_LME_ENABLED,
250 &vc->arch.arch_vmx.cpu_state);
251 /* No update for LME/LMA since it have no effect */
252 msr->msr_items[VMX_INDEX_MSR_EFER] =
253 msr_content;
254 if (msr_content & ~(EFER_LME | EFER_LMA)){
255 msr->msr_items[VMX_INDEX_MSR_EFER] = msr_content;
256 if (!test_bit(VMX_INDEX_MSR_EFER, &msr->flags)){
257 rdmsrl(MSR_EFER,
258 host_state->msr_items[VMX_INDEX_MSR_EFER]);
259 set_bit(VMX_INDEX_MSR_EFER, &host_state->flags);
260 set_bit(VMX_INDEX_MSR_EFER, &msr->flags);
261 wrmsrl(MSR_EFER, msr_content);
262 }
263 }
264 break;
266 case MSR_FS_BASE:
267 case MSR_GS_BASE:
268 if (!(VMX_LONG_GUEST(vc)))
269 domain_crash_synchronous();
270 if (!IS_CANO_ADDRESS(msr_content)){
271 VMX_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
272 vmx_inject_exception(vc, TRAP_gp_fault, 0);
273 }
274 if (regs->ecx == MSR_FS_BASE)
275 __vmwrite(GUEST_FS_BASE, msr_content);
276 else
277 __vmwrite(GUEST_GS_BASE, msr_content);
278 break;
280 case MSR_SHADOW_GS_BASE:
281 if (!(VMX_LONG_GUEST(vc)))
282 domain_crash_synchronous();
283 vc->arch.arch_vmx.msr_content.shadow_gs = msr_content;
284 wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
285 break;
287 CASE_WRITE_MSR(STAR);
288 CASE_WRITE_MSR(LSTAR);
289 CASE_WRITE_MSR(CSTAR);
290 CASE_WRITE_MSR(SYSCALL_MASK);
291 default:
292 return 0;
293 }
294 return 1;
295 }
297 void
298 vmx_restore_msrs(struct vcpu *v)
299 {
300 int i = 0;
301 struct msr_state *guest_state;
302 struct msr_state *host_state;
303 unsigned long guest_flags ;
305 guest_state = &v->arch.arch_vmx.msr_content;;
306 host_state = &percpu_msr[smp_processor_id()];
308 wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs);
309 guest_flags = guest_state->flags;
310 if (!guest_flags)
311 return;
313 while (guest_flags){
314 i = find_first_set_bit(guest_flags);
316 VMX_DBG_LOG(DBG_LEVEL_2,
317 "restore guest's index %d msr %lx with %lx\n",
318 i, (unsigned long) msr_data_index[i], (unsigned long) guest_state->msr_items[i]);
319 set_bit(i, &host_state->flags);
320 wrmsrl(msr_data_index[i], guest_state->msr_items[i]);
321 clear_bit(i, &guest_flags);
322 }
323 }
325 #else /* __i386__ */
326 #define vmx_save_init_msrs() ((void)0)
328 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs){
329 return 0;
330 }
331 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs){
332 return 0;
333 }
334 #endif
336 extern long evtchn_send(int lport);
337 extern long do_block(void);
338 void do_nmi(struct cpu_user_regs *, unsigned long);
340 static int check_vmx_controls(ctrls, msr)
341 {
342 u32 vmx_msr_low, vmx_msr_high;
344 rdmsr(msr, vmx_msr_low, vmx_msr_high);
345 if (ctrls < vmx_msr_low || ctrls > vmx_msr_high) {
346 printk("Insufficient VMX capability 0x%x, "
347 "msr=0x%x,low=0x%8x,high=0x%x\n",
348 ctrls, msr, vmx_msr_low, vmx_msr_high);
349 return 0;
350 }
351 return 1;
352 }
354 int start_vmx(void)
355 {
356 struct vmcs_struct *vmcs;
357 u32 ecx;
358 u32 eax, edx;
359 u64 phys_vmcs; /* debugging */
361 /*
362 * Xen does not fill x86_capability words except 0.
363 */
364 ecx = cpuid_ecx(1);
365 boot_cpu_data.x86_capability[4] = ecx;
367 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
368 return 0;
370 rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
372 if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) {
373 if ((eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0) {
374 printk("VMX disabled by Feature Control MSR.\n");
375 return 0;
376 }
377 }
378 else {
379 wrmsr(IA32_FEATURE_CONTROL_MSR,
380 IA32_FEATURE_CONTROL_MSR_LOCK |
381 IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
382 }
384 if (!check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS,
385 MSR_IA32_VMX_PINBASED_CTLS_MSR))
386 return 0;
387 if (!check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS,
388 MSR_IA32_VMX_PROCBASED_CTLS_MSR))
389 return 0;
390 if (!check_vmx_controls(MONITOR_VM_EXIT_CONTROLS,
391 MSR_IA32_VMX_EXIT_CTLS_MSR))
392 return 0;
393 if (!check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS,
394 MSR_IA32_VMX_ENTRY_CTLS_MSR))
395 return 0;
397 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
399 if (!(vmcs = alloc_vmcs())) {
400 printk("Failed to allocate VMCS\n");
401 return 0;
402 }
404 phys_vmcs = (u64) virt_to_phys(vmcs);
406 if (!(__vmxon(phys_vmcs))) {
407 printk("VMXON is done\n");
408 }
410 vmx_save_init_msrs();
412 hvm_enabled = 1;
414 return 1;
415 }
417 void stop_vmx(void)
418 {
419 if (read_cr4() & X86_CR4_VMXE)
420 __vmxoff();
421 }
423 /*
424 * Not all cases receive valid value in the VM-exit instruction length field.
425 */
426 #define __get_instruction_length(len) \
427 __vmread(VM_EXIT_INSTRUCTION_LEN, &(len)); \
428 if ((len) < 1 || (len) > 15) \
429 __vmx_bug(&regs);
431 static void inline __update_guest_eip(unsigned long inst_len)
432 {
433 unsigned long current_eip;
435 __vmread(GUEST_RIP, &current_eip);
436 __vmwrite(GUEST_RIP, current_eip + inst_len);
437 }
440 static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
441 {
442 unsigned long gpa; /* FIXME: PAE */
443 int result;
445 #if 0 /* keep for debugging */
446 {
447 unsigned long eip;
449 __vmread(GUEST_RIP, &eip);
450 VMX_DBG_LOG(DBG_LEVEL_VMMU,
451 "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
452 va, eip, (unsigned long)regs->error_code);
453 }
454 #endif
456 if (!vmx_paging_enabled(current)){
457 handle_mmio(va, va);
458 TRACE_VMEXIT (2,2);
459 return 1;
460 }
461 gpa = gva_to_gpa(va);
463 /* Use 1:1 page table to identify MMIO address space */
464 if ( mmio_space(gpa) ){
465 struct vcpu *v = current;
466 /* No support for APIC */
467 if (!vmx_apic_support(v->domain) && gpa >= 0xFEC00000) {
468 u32 inst_len;
469 __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len));
470 __update_guest_eip(inst_len);
471 return 1;
472 }
473 TRACE_VMEXIT (2,2);
474 handle_mmio(va, gpa);
475 return 1;
476 }
478 result = shadow_fault(va, regs);
479 TRACE_VMEXIT (2,result);
480 #if 0
481 if ( !result )
482 {
483 __vmread(GUEST_RIP, &eip);
484 printk("vmx pgfault to guest va=%lx eip=%lx\n", va, eip);
485 }
486 #endif
488 return result;
489 }
491 static void vmx_do_no_device_fault(void)
492 {
493 unsigned long cr0;
494 struct vcpu *v = current;
496 clts();
497 setup_fpu(current);
498 __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
499 if (!(cr0 & X86_CR0_TS)) {
500 __vmread_vcpu(v, GUEST_CR0, &cr0);
501 cr0 &= ~X86_CR0_TS;
502 __vmwrite(GUEST_CR0, cr0);
503 }
504 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
505 }
507 /* Reserved bits: [31:15], [12:11], [9], [6], [2:1] */
508 #define VMX_VCPU_CPUID_L1_RESERVED 0xffff9a46
510 static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs *regs)
511 {
512 unsigned int eax, ebx, ecx, edx;
513 unsigned long eip;
514 struct vcpu *v = current;
516 __vmread(GUEST_RIP, &eip);
518 VMX_DBG_LOG(DBG_LEVEL_1,
519 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
520 " (esi) %lx, (edi) %lx",
521 (unsigned long)regs->eax, (unsigned long)regs->ebx,
522 (unsigned long)regs->ecx, (unsigned long)regs->edx,
523 (unsigned long)regs->esi, (unsigned long)regs->edi);
525 cpuid(input, &eax, &ebx, &ecx, &edx);
527 if ( input == 1 )
528 {
529 if ( vmx_apic_support(v->domain) &&
530 !vlapic_global_enabled((VLAPIC(v))) )
531 clear_bit(X86_FEATURE_APIC, &edx);
533 #ifdef __x86_64__
534 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
535 #endif
536 {
537 clear_bit(X86_FEATURE_PSE, &edx);
538 clear_bit(X86_FEATURE_PAE, &edx);
539 clear_bit(X86_FEATURE_PSE36, &edx);
540 }
542 /* Unsupportable for virtualised CPUs. */
543 ecx &= ~VMX_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */
544 clear_bit(X86_FEATURE_VMXE & 31, &ecx);
545 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
546 }
548 regs->eax = (unsigned long) eax;
549 regs->ebx = (unsigned long) ebx;
550 regs->ecx = (unsigned long) ecx;
551 regs->edx = (unsigned long) edx;
553 VMX_DBG_LOG(DBG_LEVEL_1,
554 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x",
555 eip, input, eax, ebx, ecx, edx);
557 }
559 #define CASE_GET_REG_P(REG, reg) \
560 case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
562 static void vmx_dr_access (unsigned long exit_qualification, struct cpu_user_regs *regs)
563 {
564 unsigned int reg;
565 unsigned long *reg_p = 0;
566 struct vcpu *v = current;
567 unsigned long eip;
569 __vmread(GUEST_RIP, &eip);
571 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
573 VMX_DBG_LOG(DBG_LEVEL_1,
574 "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx",
575 eip, reg, exit_qualification);
577 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
578 CASE_GET_REG_P(EAX, eax);
579 CASE_GET_REG_P(ECX, ecx);
580 CASE_GET_REG_P(EDX, edx);
581 CASE_GET_REG_P(EBX, ebx);
582 CASE_GET_REG_P(EBP, ebp);
583 CASE_GET_REG_P(ESI, esi);
584 CASE_GET_REG_P(EDI, edi);
585 case REG_ESP:
586 break;
587 default:
588 __vmx_bug(regs);
589 }
591 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
592 case TYPE_MOV_TO_DR:
593 /* don't need to check the range */
594 if (reg != REG_ESP)
595 v->arch.guest_context.debugreg[reg] = *reg_p;
596 else {
597 unsigned long value;
598 __vmread(GUEST_RSP, &value);
599 v->arch.guest_context.debugreg[reg] = value;
600 }
601 break;
602 case TYPE_MOV_FROM_DR:
603 if (reg != REG_ESP)
604 *reg_p = v->arch.guest_context.debugreg[reg];
605 else {
606 __vmwrite(GUEST_RSP, v->arch.guest_context.debugreg[reg]);
607 }
608 break;
609 }
610 }
612 /*
613 * Invalidate the TLB for va. Invalidate the shadow page corresponding
614 * the address va.
615 */
616 static void vmx_vmexit_do_invlpg(unsigned long va)
617 {
618 unsigned long eip;
619 struct vcpu *v = current;
621 __vmread(GUEST_RIP, &eip);
623 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg: eip=%lx, va=%lx",
624 eip, va);
626 /*
627 * We do the safest things first, then try to update the shadow
628 * copying from guest
629 */
630 shadow_invlpg(v, va);
631 }
633 static int check_for_null_selector(unsigned long eip)
634 {
635 unsigned char inst[MAX_INST_LEN];
636 unsigned long sel;
637 int i, inst_len;
638 int inst_copy_from_guest(unsigned char *, unsigned long, int);
640 __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
641 memset(inst, 0, MAX_INST_LEN);
642 if (inst_copy_from_guest(inst, eip, inst_len) != inst_len) {
643 printf("check_for_null_selector: get guest instruction failed\n");
644 domain_crash_synchronous();
645 }
647 for (i = 0; i < inst_len; i++) {
648 switch (inst[i]) {
649 case 0xf3: /* REPZ */
650 case 0xf2: /* REPNZ */
651 case 0xf0: /* LOCK */
652 case 0x66: /* data32 */
653 case 0x67: /* addr32 */
654 continue;
655 case 0x2e: /* CS */
656 __vmread(GUEST_CS_SELECTOR, &sel);
657 break;
658 case 0x36: /* SS */
659 __vmread(GUEST_SS_SELECTOR, &sel);
660 break;
661 case 0x26: /* ES */
662 __vmread(GUEST_ES_SELECTOR, &sel);
663 break;
664 case 0x64: /* FS */
665 __vmread(GUEST_FS_SELECTOR, &sel);
666 break;
667 case 0x65: /* GS */
668 __vmread(GUEST_GS_SELECTOR, &sel);
669 break;
670 case 0x3e: /* DS */
671 /* FALLTHROUGH */
672 default:
673 /* DS is the default */
674 __vmread(GUEST_DS_SELECTOR, &sel);
675 }
676 return sel == 0 ? 1 : 0;
677 }
679 return 0;
680 }
682 void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
683 unsigned long count, int size, long value, int dir, int pvalid)
684 {
685 struct vcpu *v = current;
686 vcpu_iodata_t *vio;
687 ioreq_t *p;
689 vio = get_vio(v->domain, v->vcpu_id);
690 if (vio == NULL) {
691 printk("bad shared page: %lx\n", (unsigned long) vio);
692 domain_crash_synchronous();
693 }
695 if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
696 printf("VMX I/O has not yet completed\n");
697 domain_crash_synchronous();
698 }
699 set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
701 p = &vio->vp_ioreq;
702 p->dir = dir;
703 p->pdata_valid = pvalid;
705 p->type = IOREQ_TYPE_PIO;
706 p->size = size;
707 p->addr = port;
708 p->count = count;
709 p->df = regs->eflags & EF_DF ? 1 : 0;
711 if (pvalid) {
712 if (vmx_paging_enabled(current))
713 p->u.pdata = (void *) gva_to_gpa(value);
714 else
715 p->u.pdata = (void *) value; /* guest VA == guest PA */
716 } else
717 p->u.data = value;
719 if (vmx_portio_intercept(p)) {
720 p->state = STATE_IORESP_READY;
721 vmx_io_assist(v);
722 return;
723 }
725 p->state = STATE_IOREQ_READY;
727 evtchn_send(iopacket_port(v->domain));
728 vmx_wait_io();
729 }
731 static void vmx_io_instruction(struct cpu_user_regs *regs,
732 unsigned long exit_qualification, unsigned long inst_len)
733 {
734 struct mmio_op *mmio_opp;
735 unsigned long eip, cs, eflags;
736 unsigned long port, size, dir;
737 int vm86;
739 mmio_opp = &current->arch.arch_vmx.mmio_op;
740 mmio_opp->instr = INSTR_PIO;
741 mmio_opp->flags = 0;
743 __vmread(GUEST_RIP, &eip);
744 __vmread(GUEST_CS_SELECTOR, &cs);
745 __vmread(GUEST_RFLAGS, &eflags);
746 vm86 = eflags & X86_EFLAGS_VM ? 1 : 0;
748 VMX_DBG_LOG(DBG_LEVEL_1,
749 "vmx_io_instruction: vm86 %d, eip=%lx:%lx, "
750 "exit_qualification = %lx",
751 vm86, cs, eip, exit_qualification);
753 if (test_bit(6, &exit_qualification))
754 port = (exit_qualification >> 16) & 0xFFFF;
755 else
756 port = regs->edx & 0xffff;
757 TRACE_VMEXIT(2, port);
758 size = (exit_qualification & 7) + 1;
759 dir = test_bit(3, &exit_qualification); /* direction */
761 if (test_bit(4, &exit_qualification)) { /* string instruction */
762 unsigned long addr, count = 1;
763 int sign = regs->eflags & EF_DF ? -1 : 1;
765 __vmread(GUEST_LINEAR_ADDRESS, &addr);
767 /*
768 * In protected mode, guest linear address is invalid if the
769 * selector is null.
770 */
771 if (!vm86 && check_for_null_selector(eip))
772 addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
774 if (test_bit(5, &exit_qualification)) { /* "rep" prefix */
775 mmio_opp->flags |= REPZ;
776 count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
777 }
779 /*
780 * Handle string pio instructions that cross pages or that
781 * are unaligned. See the comments in vmx_platform.c/handle_mmio()
782 */
783 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
784 unsigned long value = 0;
786 mmio_opp->flags |= OVERLAP;
787 if (dir == IOREQ_WRITE)
788 vmx_copy(&value, addr, size, VMX_COPY_IN);
789 send_pio_req(regs, port, 1, size, value, dir, 0);
790 } else {
791 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) {
792 if (sign > 0)
793 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
794 else
795 count = (addr & ~PAGE_MASK) / size;
796 } else
797 __update_guest_eip(inst_len);
799 send_pio_req(regs, port, count, size, addr, dir, 1);
800 }
801 } else {
802 __update_guest_eip(inst_len);
803 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
804 }
805 }
807 int
808 vmx_copy(void *buf, unsigned long laddr, int size, int dir)
809 {
810 unsigned long gpa, mfn;
811 char *addr;
812 int count;
814 while (size > 0) {
815 count = PAGE_SIZE - (laddr & ~PAGE_MASK);
816 if (count > size)
817 count = size;
819 if (vmx_paging_enabled(current)) {
820 gpa = gva_to_gpa(laddr);
821 mfn = get_mfn_from_pfn(gpa >> PAGE_SHIFT);
822 } else
823 mfn = get_mfn_from_pfn(laddr >> PAGE_SHIFT);
824 if (mfn == INVALID_MFN)
825 return 0;
827 addr = (char *)map_domain_page(mfn) + (laddr & ~PAGE_MASK);
829 if (dir == VMX_COPY_IN)
830 memcpy(buf, addr, count);
831 else
832 memcpy(addr, buf, count);
834 unmap_domain_page(addr);
836 laddr += count;
837 buf += count;
838 size -= count;
839 }
841 return 1;
842 }
844 int
845 vmx_world_save(struct vcpu *v, struct vmx_assist_context *c)
846 {
847 unsigned long inst_len;
848 int error = 0;
850 error |= __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
851 error |= __vmread(GUEST_RIP, &c->eip);
852 c->eip += inst_len; /* skip transition instruction */
853 error |= __vmread(GUEST_RSP, &c->esp);
854 error |= __vmread(GUEST_RFLAGS, &c->eflags);
856 error |= __vmread(CR0_READ_SHADOW, &c->cr0);
857 c->cr3 = v->arch.arch_vmx.cpu_cr3;
858 error |= __vmread(CR4_READ_SHADOW, &c->cr4);
860 error |= __vmread(GUEST_IDTR_LIMIT, &c->idtr_limit);
861 error |= __vmread(GUEST_IDTR_BASE, &c->idtr_base);
863 error |= __vmread(GUEST_GDTR_LIMIT, &c->gdtr_limit);
864 error |= __vmread(GUEST_GDTR_BASE, &c->gdtr_base);
866 error |= __vmread(GUEST_CS_SELECTOR, &c->cs_sel);
867 error |= __vmread(GUEST_CS_LIMIT, &c->cs_limit);
868 error |= __vmread(GUEST_CS_BASE, &c->cs_base);
869 error |= __vmread(GUEST_CS_AR_BYTES, &c->cs_arbytes.bytes);
871 error |= __vmread(GUEST_DS_SELECTOR, &c->ds_sel);
872 error |= __vmread(GUEST_DS_LIMIT, &c->ds_limit);
873 error |= __vmread(GUEST_DS_BASE, &c->ds_base);
874 error |= __vmread(GUEST_DS_AR_BYTES, &c->ds_arbytes.bytes);
876 error |= __vmread(GUEST_ES_SELECTOR, &c->es_sel);
877 error |= __vmread(GUEST_ES_LIMIT, &c->es_limit);
878 error |= __vmread(GUEST_ES_BASE, &c->es_base);
879 error |= __vmread(GUEST_ES_AR_BYTES, &c->es_arbytes.bytes);
881 error |= __vmread(GUEST_SS_SELECTOR, &c->ss_sel);
882 error |= __vmread(GUEST_SS_LIMIT, &c->ss_limit);
883 error |= __vmread(GUEST_SS_BASE, &c->ss_base);
884 error |= __vmread(GUEST_SS_AR_BYTES, &c->ss_arbytes.bytes);
886 error |= __vmread(GUEST_FS_SELECTOR, &c->fs_sel);
887 error |= __vmread(GUEST_FS_LIMIT, &c->fs_limit);
888 error |= __vmread(GUEST_FS_BASE, &c->fs_base);
889 error |= __vmread(GUEST_FS_AR_BYTES, &c->fs_arbytes.bytes);
891 error |= __vmread(GUEST_GS_SELECTOR, &c->gs_sel);
892 error |= __vmread(GUEST_GS_LIMIT, &c->gs_limit);
893 error |= __vmread(GUEST_GS_BASE, &c->gs_base);
894 error |= __vmread(GUEST_GS_AR_BYTES, &c->gs_arbytes.bytes);
896 error |= __vmread(GUEST_TR_SELECTOR, &c->tr_sel);
897 error |= __vmread(GUEST_TR_LIMIT, &c->tr_limit);
898 error |= __vmread(GUEST_TR_BASE, &c->tr_base);
899 error |= __vmread(GUEST_TR_AR_BYTES, &c->tr_arbytes.bytes);
901 error |= __vmread(GUEST_LDTR_SELECTOR, &c->ldtr_sel);
902 error |= __vmread(GUEST_LDTR_LIMIT, &c->ldtr_limit);
903 error |= __vmread(GUEST_LDTR_BASE, &c->ldtr_base);
904 error |= __vmread(GUEST_LDTR_AR_BYTES, &c->ldtr_arbytes.bytes);
906 return !error;
907 }
909 int
910 vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
911 {
912 unsigned long mfn, old_cr4, old_base_mfn;
913 int error = 0;
915 error |= __vmwrite(GUEST_RIP, c->eip);
916 error |= __vmwrite(GUEST_RSP, c->esp);
917 error |= __vmwrite(GUEST_RFLAGS, c->eflags);
919 error |= __vmwrite(CR0_READ_SHADOW, c->cr0);
921 if (!vmx_paging_enabled(v)) {
922 VMX_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
923 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
924 goto skip_cr3;
925 }
927 if (c->cr3 == v->arch.arch_vmx.cpu_cr3) {
928 /*
929 * This is simple TLB flush, implying the guest has
930 * removed some translation or changed page attributes.
931 * We simply invalidate the shadow.
932 */
933 mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
934 if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
935 printk("Invalid CR3 value=%x", c->cr3);
936 domain_crash_synchronous();
937 return 0;
938 }
939 shadow_sync_all(v->domain);
940 } else {
941 /*
942 * If different, make a shadow. Check if the PDBR is valid
943 * first.
944 */
945 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3);
946 if ((c->cr3 >> PAGE_SHIFT) > v->domain->max_pages) {
947 printk("Invalid CR3 value=%x", c->cr3);
948 domain_crash_synchronous();
949 return 0;
950 }
951 mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
952 if(!get_page(pfn_to_page(mfn), v->domain))
953 return 0;
954 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
955 v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
956 if (old_base_mfn)
957 put_page(pfn_to_page(old_base_mfn));
958 update_pagetables(v);
959 /*
960 * arch.shadow_table should now hold the next CR3 for shadow
961 */
962 v->arch.arch_vmx.cpu_cr3 = c->cr3;
963 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
964 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
965 }
967 skip_cr3:
969 error |= __vmread(CR4_READ_SHADOW, &old_cr4);
970 error |= __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
971 error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
973 error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
974 error |= __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
976 error |= __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
977 error |= __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
979 error |= __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
980 error |= __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
981 error |= __vmwrite(GUEST_CS_BASE, c->cs_base);
982 error |= __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes.bytes);
984 error |= __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
985 error |= __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
986 error |= __vmwrite(GUEST_DS_BASE, c->ds_base);
987 error |= __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes.bytes);
989 error |= __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
990 error |= __vmwrite(GUEST_ES_LIMIT, c->es_limit);
991 error |= __vmwrite(GUEST_ES_BASE, c->es_base);
992 error |= __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes.bytes);
994 error |= __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
995 error |= __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
996 error |= __vmwrite(GUEST_SS_BASE, c->ss_base);
997 error |= __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes.bytes);
999 error |= __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
1000 error |= __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
1001 error |= __vmwrite(GUEST_FS_BASE, c->fs_base);
1002 error |= __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes.bytes);
1004 error |= __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
1005 error |= __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
1006 error |= __vmwrite(GUEST_GS_BASE, c->gs_base);
1007 error |= __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes.bytes);
1009 error |= __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
1010 error |= __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
1011 error |= __vmwrite(GUEST_TR_BASE, c->tr_base);
1012 error |= __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes.bytes);
1014 error |= __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
1015 error |= __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
1016 error |= __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
1017 error |= __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes);
1019 return !error;
1022 enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE };
1024 int
1025 vmx_assist(struct vcpu *v, int mode)
1027 struct vmx_assist_context c;
1028 u32 magic;
1029 u32 cp;
1031 /* make sure vmxassist exists (this is not an error) */
1032 if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), VMX_COPY_IN))
1033 return 0;
1034 if (magic != VMXASSIST_MAGIC)
1035 return 0;
1037 switch (mode) {
1038 /*
1039 * Transfer control to vmxassist.
1040 * Store the current context in VMXASSIST_OLD_CONTEXT and load
1041 * the new VMXASSIST_NEW_CONTEXT context. This context was created
1042 * by vmxassist and will transfer control to it.
1043 */
1044 case VMX_ASSIST_INVOKE:
1045 /* save the old context */
1046 if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN))
1047 goto error;
1048 if (cp != 0) {
1049 if (!vmx_world_save(v, &c))
1050 goto error;
1051 if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_OUT))
1052 goto error;
1055 /* restore the new context, this should activate vmxassist */
1056 if (!vmx_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), VMX_COPY_IN))
1057 goto error;
1058 if (cp != 0) {
1059 if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN))
1060 goto error;
1061 if (!vmx_world_restore(v, &c))
1062 goto error;
1063 return 1;
1065 break;
1067 /*
1068 * Restore the VMXASSIST_OLD_CONTEXT that was saved by VMX_ASSIST_INVOKE
1069 * above.
1070 */
1071 case VMX_ASSIST_RESTORE:
1072 /* save the old context */
1073 if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN))
1074 goto error;
1075 if (cp != 0) {
1076 if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN))
1077 goto error;
1078 if (!vmx_world_restore(v, &c))
1079 goto error;
1080 return 1;
1082 break;
1085 error:
1086 printf("Failed to transfer to vmxassist\n");
1087 domain_crash_synchronous();
1088 return 0;
1091 static int vmx_set_cr0(unsigned long value)
1093 struct vcpu *v = current;
1094 unsigned long mfn;
1095 unsigned long eip;
1096 int paging_enabled;
1097 unsigned long vm_entry_value;
1098 unsigned long old_cr0;
1100 /*
1101 * CR0: We don't want to lose PE and PG.
1102 */
1103 __vmread_vcpu(v, CR0_READ_SHADOW, &old_cr0);
1104 paging_enabled = (old_cr0 & X86_CR0_PE) && (old_cr0 & X86_CR0_PG);
1105 /* If OS don't use clts to clear TS bit...*/
1106 if((old_cr0 & X86_CR0_TS) && !(value & X86_CR0_TS))
1108 clts();
1109 setup_fpu(v);
1113 __vmwrite(GUEST_CR0, value | X86_CR0_PE | X86_CR0_PG | X86_CR0_NE);
1114 __vmwrite(CR0_READ_SHADOW, value);
1116 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1118 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) {
1119 /*
1120 * The guest CR3 must be pointing to the guest physical.
1121 */
1122 if ( !VALID_MFN(mfn = get_mfn_from_pfn(
1123 v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
1124 !get_page(pfn_to_page(mfn), v->domain) )
1126 printk("Invalid CR3 value = %lx", v->arch.arch_vmx.cpu_cr3);
1127 domain_crash_synchronous(); /* need to take a clean path */
1130 #if defined(__x86_64__)
1131 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
1132 &v->arch.arch_vmx.cpu_state) &&
1133 !test_bit(VMX_CPU_STATE_PAE_ENABLED,
1134 &v->arch.arch_vmx.cpu_state)){
1135 VMX_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1136 vmx_inject_exception(v, TRAP_gp_fault, 0);
1138 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
1139 &v->arch.arch_vmx.cpu_state)){
1140 /* Here the PAE is should to be opened */
1141 VMX_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1142 set_bit(VMX_CPU_STATE_LMA_ENABLED,
1143 &v->arch.arch_vmx.cpu_state);
1144 __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
1145 vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE;
1146 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
1148 #if CONFIG_PAGING_LEVELS >= 4
1149 if(!shadow_set_guest_paging_levels(v->domain, 4)) {
1150 printk("Unsupported guest paging levels\n");
1151 domain_crash_synchronous(); /* need to take a clean path */
1153 #endif
1155 else
1157 #if CONFIG_PAGING_LEVELS >= 4
1158 if(!shadow_set_guest_paging_levels(v->domain, 2)) {
1159 printk("Unsupported guest paging levels\n");
1160 domain_crash_synchronous(); /* need to take a clean path */
1162 #endif
1166 unsigned long crn;
1167 /* update CR4's PAE if needed */
1168 __vmread(GUEST_CR4, &crn);
1169 if ( (!(crn & X86_CR4_PAE)) &&
1170 test_bit(VMX_CPU_STATE_PAE_ENABLED,
1171 &v->arch.arch_vmx.cpu_state) )
1173 VMX_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n");
1174 __vmwrite(GUEST_CR4, crn | X86_CR4_PAE);
1177 #endif
1178 /*
1179 * Now arch.guest_table points to machine physical.
1180 */
1181 v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
1182 update_pagetables(v);
1184 VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1185 (unsigned long) (mfn << PAGE_SHIFT));
1187 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1188 /*
1189 * arch->shadow_table should hold the next CR3 for shadow
1190 */
1191 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1192 v->arch.arch_vmx.cpu_cr3, mfn);
1195 if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled)
1196 if(v->arch.arch_vmx.cpu_cr3){
1197 put_page(pfn_to_page(get_mfn_from_pfn(
1198 v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)));
1199 v->arch.guest_table = mk_pagetable(0);
1202 /*
1203 * VMX does not implement real-mode virtualization. We emulate
1204 * real-mode by performing a world switch to VMXAssist whenever
1205 * a partition disables the CR0.PE bit.
1206 */
1207 if ((value & X86_CR0_PE) == 0) {
1208 if ( value & X86_CR0_PG ) {
1209 /* inject GP here */
1210 vmx_inject_exception(v, TRAP_gp_fault, 0);
1211 return 0;
1212 } else {
1213 /*
1214 * Disable paging here.
1215 * Same to PE == 1 && PG == 0
1216 */
1217 if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
1218 &v->arch.arch_vmx.cpu_state)){
1219 clear_bit(VMX_CPU_STATE_LMA_ENABLED,
1220 &v->arch.arch_vmx.cpu_state);
1221 __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
1222 vm_entry_value &= ~VM_ENTRY_CONTROLS_IA32E_MODE;
1223 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
1227 clear_all_shadow_status(v->domain);
1228 if (vmx_assist(v, VMX_ASSIST_INVOKE)) {
1229 set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.arch_vmx.cpu_state);
1230 __vmread(GUEST_RIP, &eip);
1231 VMX_DBG_LOG(DBG_LEVEL_1,
1232 "Transfering control to vmxassist %%eip 0x%lx\n", eip);
1233 return 0; /* do not update eip! */
1235 } else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
1236 &v->arch.arch_vmx.cpu_state)) {
1237 __vmread(GUEST_RIP, &eip);
1238 VMX_DBG_LOG(DBG_LEVEL_1,
1239 "Enabling CR0.PE at %%eip 0x%lx\n", eip);
1240 if (vmx_assist(v, VMX_ASSIST_RESTORE)) {
1241 clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
1242 &v->arch.arch_vmx.cpu_state);
1243 __vmread(GUEST_RIP, &eip);
1244 VMX_DBG_LOG(DBG_LEVEL_1,
1245 "Restoring to %%eip 0x%lx\n", eip);
1246 return 0; /* do not update eip! */
1250 return 1;
1253 #define CASE_GET_REG(REG, reg) \
1254 case REG_ ## REG: value = regs->reg; break
1256 #define CASE_EXTEND_SET_REG \
1257 CASE_EXTEND_REG(S)
1258 #define CASE_EXTEND_GET_REG \
1259 CASE_EXTEND_REG(G)
1261 #ifdef __i386__
1262 #define CASE_EXTEND_REG(T)
1263 #else
1264 #define CASE_EXTEND_REG(T) \
1265 CASE_ ## T ## ET_REG(R8, r8); \
1266 CASE_ ## T ## ET_REG(R9, r9); \
1267 CASE_ ## T ## ET_REG(R10, r10); \
1268 CASE_ ## T ## ET_REG(R11, r11); \
1269 CASE_ ## T ## ET_REG(R12, r12); \
1270 CASE_ ## T ## ET_REG(R13, r13); \
1271 CASE_ ## T ## ET_REG(R14, r14); \
1272 CASE_ ## T ## ET_REG(R15, r15);
1273 #endif
1276 /*
1277 * Write to control registers
1278 */
1279 static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
1281 unsigned long value;
1282 unsigned long old_cr;
1283 struct vcpu *v = current;
1285 switch (gp) {
1286 CASE_GET_REG(EAX, eax);
1287 CASE_GET_REG(ECX, ecx);
1288 CASE_GET_REG(EDX, edx);
1289 CASE_GET_REG(EBX, ebx);
1290 CASE_GET_REG(EBP, ebp);
1291 CASE_GET_REG(ESI, esi);
1292 CASE_GET_REG(EDI, edi);
1293 CASE_EXTEND_GET_REG
1294 case REG_ESP:
1295 __vmread(GUEST_RSP, &value);
1296 break;
1297 default:
1298 printk("invalid gp: %d\n", gp);
1299 __vmx_bug(regs);
1302 VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1303 VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1305 switch(cr) {
1306 case 0:
1308 return vmx_set_cr0(value);
1310 case 3:
1312 unsigned long old_base_mfn, mfn;
1314 /*
1315 * If paging is not enabled yet, simply copy the value to CR3.
1316 */
1317 if (!vmx_paging_enabled(v)) {
1318 v->arch.arch_vmx.cpu_cr3 = value;
1319 break;
1322 /*
1323 * We make a new one if the shadow does not exist.
1324 */
1325 if (value == v->arch.arch_vmx.cpu_cr3) {
1326 /*
1327 * This is simple TLB flush, implying the guest has
1328 * removed some translation or changed page attributes.
1329 * We simply invalidate the shadow.
1330 */
1331 mfn = get_mfn_from_pfn(value >> PAGE_SHIFT);
1332 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1333 __vmx_bug(regs);
1334 shadow_sync_all(v->domain);
1335 } else {
1336 /*
1337 * If different, make a shadow. Check if the PDBR is valid
1338 * first.
1339 */
1340 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1341 if ( ((value >> PAGE_SHIFT) > v->domain->max_pages ) ||
1342 !VALID_MFN(mfn = get_mfn_from_pfn(value >> PAGE_SHIFT)) ||
1343 !get_page(pfn_to_page(mfn), v->domain) )
1345 printk("Invalid CR3 value=%lx", value);
1346 domain_crash_synchronous(); /* need to take a clean path */
1348 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1349 v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
1350 if (old_base_mfn)
1351 put_page(pfn_to_page(old_base_mfn));
1352 update_pagetables(v);
1353 /*
1354 * arch.shadow_table should now hold the next CR3 for shadow
1355 */
1356 v->arch.arch_vmx.cpu_cr3 = value;
1357 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
1358 value);
1359 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1361 break;
1363 case 4: /* CR4 */
1365 if (value & X86_CR4_PAE){
1366 set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.arch_vmx.cpu_state);
1367 } else {
1368 if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
1369 &v->arch.arch_vmx.cpu_state)){
1370 vmx_inject_exception(v, TRAP_gp_fault, 0);
1372 clear_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.arch_vmx.cpu_state);
1375 __vmread(CR4_READ_SHADOW, &old_cr);
1377 __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
1378 __vmwrite(CR4_READ_SHADOW, value);
1380 /*
1381 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1382 * all TLB entries except global entries.
1383 */
1384 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
1385 shadow_sync_all(v->domain);
1387 break;
1389 default:
1390 printk("invalid cr: %d\n", gp);
1391 __vmx_bug(regs);
1394 return 1;
1397 #define CASE_SET_REG(REG, reg) \
1398 case REG_ ## REG: \
1399 regs->reg = value; \
1400 break
1402 /*
1403 * Read from control registers. CR0 and CR4 are read from the shadow.
1404 */
1405 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1407 unsigned long value;
1408 struct vcpu *v = current;
1410 if (cr != 3)
1411 __vmx_bug(regs);
1413 value = (unsigned long) v->arch.arch_vmx.cpu_cr3;
1415 switch (gp) {
1416 CASE_SET_REG(EAX, eax);
1417 CASE_SET_REG(ECX, ecx);
1418 CASE_SET_REG(EDX, edx);
1419 CASE_SET_REG(EBX, ebx);
1420 CASE_SET_REG(EBP, ebp);
1421 CASE_SET_REG(ESI, esi);
1422 CASE_SET_REG(EDI, edi);
1423 CASE_EXTEND_SET_REG
1424 case REG_ESP:
1425 __vmwrite(GUEST_RSP, value);
1426 regs->esp = value;
1427 break;
1428 default:
1429 printk("invalid gp: %d\n", gp);
1430 __vmx_bug(regs);
1433 VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1436 static int vmx_cr_access(unsigned long exit_qualification, struct cpu_user_regs *regs)
1438 unsigned int gp, cr;
1439 unsigned long value;
1440 struct vcpu *v = current;
1442 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
1443 case TYPE_MOV_TO_CR:
1444 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1445 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1446 TRACE_VMEXIT(1,TYPE_MOV_TO_CR);
1447 TRACE_VMEXIT(2,cr);
1448 TRACE_VMEXIT(3,gp);
1449 return mov_to_cr(gp, cr, regs);
1450 case TYPE_MOV_FROM_CR:
1451 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1452 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1453 TRACE_VMEXIT(1,TYPE_MOV_FROM_CR);
1454 TRACE_VMEXIT(2,cr);
1455 TRACE_VMEXIT(3,gp);
1456 mov_from_cr(cr, gp, regs);
1457 break;
1458 case TYPE_CLTS:
1459 TRACE_VMEXIT(1,TYPE_CLTS);
1460 clts();
1461 setup_fpu(current);
1463 __vmread_vcpu(v, GUEST_CR0, &value);
1464 value &= ~X86_CR0_TS; /* clear TS */
1465 __vmwrite(GUEST_CR0, value);
1467 __vmread_vcpu(v, CR0_READ_SHADOW, &value);
1468 value &= ~X86_CR0_TS; /* clear TS */
1469 __vmwrite(CR0_READ_SHADOW, value);
1470 break;
1471 case TYPE_LMSW:
1472 TRACE_VMEXIT(1,TYPE_LMSW);
1473 __vmread_vcpu(v, CR0_READ_SHADOW, &value);
1474 value = (value & ~0xF) |
1475 (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
1476 return vmx_set_cr0(value);
1477 break;
1478 default:
1479 __vmx_bug(regs);
1480 break;
1482 return 1;
1485 static inline void vmx_do_msr_read(struct cpu_user_regs *regs)
1487 u64 msr_content = 0;
1488 struct vcpu *v = current;
1490 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
1491 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1492 (unsigned long)regs->edx);
1493 switch (regs->ecx) {
1494 case MSR_IA32_TIME_STAMP_COUNTER:
1496 struct vmx_virpit *vpit;
1498 rdtscll(msr_content);
1499 vpit = &(v->domain->arch.vmx_platform.vmx_pit);
1500 msr_content += vpit->shift;
1501 break;
1503 case MSR_IA32_SYSENTER_CS:
1504 __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
1505 break;
1506 case MSR_IA32_SYSENTER_ESP:
1507 __vmread(GUEST_SYSENTER_ESP, &msr_content);
1508 break;
1509 case MSR_IA32_SYSENTER_EIP:
1510 __vmread(GUEST_SYSENTER_EIP, &msr_content);
1511 break;
1512 case MSR_IA32_APICBASE:
1513 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
1514 break;
1515 default:
1516 if(long_mode_do_msr_read(regs))
1517 return;
1518 rdmsr_user(regs->ecx, regs->eax, regs->edx);
1519 break;
1522 regs->eax = msr_content & 0xFFFFFFFF;
1523 regs->edx = msr_content >> 32;
1525 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
1526 "ecx=%lx, eax=%lx, edx=%lx",
1527 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1528 (unsigned long)regs->edx);
1531 static inline void vmx_do_msr_write(struct cpu_user_regs *regs)
1533 u64 msr_content;
1534 struct vcpu *v = current;
1536 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write: ecx=%lx, eax=%lx, edx=%lx",
1537 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1538 (unsigned long)regs->edx);
1540 msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
1542 switch (regs->ecx) {
1543 case MSR_IA32_TIME_STAMP_COUNTER:
1545 struct vmx_virpit *vpit;
1546 u64 host_tsc, drift;
1548 rdtscll(host_tsc);
1549 vpit = &(v->domain->arch.vmx_platform.vmx_pit);
1550 drift = v->arch.arch_vmx.tsc_offset - vpit->shift;
1551 vpit->shift = msr_content - host_tsc;
1552 v->arch.arch_vmx.tsc_offset = vpit->shift + drift;
1553 __vmwrite(TSC_OFFSET, vpit->shift);
1555 #if defined (__i386__)
1556 __vmwrite(TSC_OFFSET_HIGH, ((vpit->shift)>>32));
1557 #endif
1558 break;
1560 case MSR_IA32_SYSENTER_CS:
1561 __vmwrite(GUEST_SYSENTER_CS, msr_content);
1562 break;
1563 case MSR_IA32_SYSENTER_ESP:
1564 __vmwrite(GUEST_SYSENTER_ESP, msr_content);
1565 break;
1566 case MSR_IA32_SYSENTER_EIP:
1567 __vmwrite(GUEST_SYSENTER_EIP, msr_content);
1568 break;
1569 case MSR_IA32_APICBASE:
1570 vlapic_msr_set(VLAPIC(v), msr_content);
1571 break;
1572 default:
1573 long_mode_do_msr_write(regs);
1574 break;
1577 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write returns: "
1578 "ecx=%lx, eax=%lx, edx=%lx",
1579 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1580 (unsigned long)regs->edx);
1583 /*
1584 * Need to use this exit to reschedule
1585 */
1586 void vmx_vmexit_do_hlt(void)
1588 struct vcpu *v=current;
1589 struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit);
1590 s_time_t next_pit=-1,next_wakeup;
1592 if ( !v->vcpu_id ) {
1593 next_pit = get_pit_scheduled(v,vpit);
1595 next_wakeup = get_apictime_scheduled(v);
1596 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) {
1597 next_wakeup = next_pit;
1599 if ( next_wakeup != - 1 )
1600 set_ac_timer(&current->arch.arch_vmx.hlt_timer, next_wakeup);
1601 do_block();
1604 static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs)
1606 unsigned int vector;
1607 int error;
1609 asmlinkage void do_IRQ(struct cpu_user_regs *);
1610 void smp_apic_timer_interrupt(struct cpu_user_regs *);
1611 void timer_interrupt(int, void *, struct cpu_user_regs *);
1612 void smp_event_check_interrupt(void);
1613 void smp_invalidate_interrupt(void);
1614 void smp_call_function_interrupt(void);
1615 void smp_spurious_interrupt(struct cpu_user_regs *regs);
1616 void smp_error_interrupt(struct cpu_user_regs *regs);
1618 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
1619 && !(vector & INTR_INFO_VALID_MASK))
1620 __vmx_bug(regs);
1622 vector &= 0xff;
1623 local_irq_disable();
1625 switch(vector) {
1626 case LOCAL_TIMER_VECTOR:
1627 smp_apic_timer_interrupt(regs);
1628 break;
1629 case EVENT_CHECK_VECTOR:
1630 smp_event_check_interrupt();
1631 break;
1632 case INVALIDATE_TLB_VECTOR:
1633 smp_invalidate_interrupt();
1634 break;
1635 case CALL_FUNCTION_VECTOR:
1636 smp_call_function_interrupt();
1637 break;
1638 case SPURIOUS_APIC_VECTOR:
1639 smp_spurious_interrupt(regs);
1640 break;
1641 case ERROR_APIC_VECTOR:
1642 smp_error_interrupt(regs);
1643 break;
1644 default:
1645 regs->entry_vector = vector;
1646 do_IRQ(regs);
1647 break;
1651 #define BUF_SIZ 256
1652 #define MAX_LINE 80
1653 char print_buf[BUF_SIZ];
1654 static int index;
1656 static void vmx_print_line(const char c, struct vcpu *v)
1659 if (index == MAX_LINE || c == '\n') {
1660 if (index == MAX_LINE) {
1661 print_buf[index++] = c;
1663 print_buf[index] = '\0';
1664 printk("(GUEST: %u) %s\n", v->domain->domain_id, (char *) &print_buf);
1665 index = 0;
1667 else
1668 print_buf[index++] = c;
1671 void save_vmx_cpu_user_regs(struct cpu_user_regs *ctxt)
1673 __vmread(GUEST_SS_SELECTOR, &ctxt->ss);
1674 __vmread(GUEST_RSP, &ctxt->esp);
1675 __vmread(GUEST_RFLAGS, &ctxt->eflags);
1676 __vmread(GUEST_CS_SELECTOR, &ctxt->cs);
1677 __vmread(GUEST_RIP, &ctxt->eip);
1679 __vmread(GUEST_GS_SELECTOR, &ctxt->gs);
1680 __vmread(GUEST_FS_SELECTOR, &ctxt->fs);
1681 __vmread(GUEST_ES_SELECTOR, &ctxt->es);
1682 __vmread(GUEST_DS_SELECTOR, &ctxt->ds);
1685 #ifdef XEN_DEBUGGER
1686 void save_cpu_user_regs(struct cpu_user_regs *regs)
1688 __vmread(GUEST_SS_SELECTOR, &regs->xss);
1689 __vmread(GUEST_RSP, &regs->esp);
1690 __vmread(GUEST_RFLAGS, &regs->eflags);
1691 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
1692 __vmread(GUEST_RIP, &regs->eip);
1694 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
1695 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
1696 __vmread(GUEST_ES_SELECTOR, &regs->xes);
1697 __vmread(GUEST_DS_SELECTOR, &regs->xds);
1700 void restore_cpu_user_regs(struct cpu_user_regs *regs)
1702 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
1703 __vmwrite(GUEST_RSP, regs->esp);
1704 __vmwrite(GUEST_RFLAGS, regs->eflags);
1705 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
1706 __vmwrite(GUEST_RIP, regs->eip);
1708 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
1709 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
1710 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
1711 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
1713 #endif
1715 asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs)
1717 unsigned int exit_reason, idtv_info_field;
1718 unsigned long exit_qualification, eip, inst_len = 0;
1719 struct vcpu *v = current;
1720 int error;
1722 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
1723 __vmx_bug(&regs);
1725 perfc_incra(vmexits, exit_reason);
1727 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
1728 if (idtv_info_field & INTR_INFO_VALID_MASK) {
1729 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
1731 __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
1732 if (inst_len >= 1 && inst_len <= 15)
1733 __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
1735 if (idtv_info_field & 0x800) { /* valid error code */
1736 unsigned long error_code;
1737 __vmread(IDT_VECTORING_ERROR_CODE, &error_code);
1738 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
1741 VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
1744 /* don't bother H/W interrutps */
1745 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
1746 exit_reason != EXIT_REASON_VMCALL &&
1747 exit_reason != EXIT_REASON_IO_INSTRUCTION)
1748 VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
1750 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
1751 printk("Failed vm entry\n");
1752 domain_crash_synchronous();
1753 return;
1757 __vmread(GUEST_RIP, &eip);
1758 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
1759 TRACE_VMEXIT(0,exit_reason);
1762 switch (exit_reason) {
1763 case EXIT_REASON_EXCEPTION_NMI:
1765 /*
1766 * We don't set the software-interrupt exiting (INT n).
1767 * (1) We can get an exception (e.g. #PG) in the guest, or
1768 * (2) NMI
1769 */
1770 int error;
1771 unsigned int vector;
1772 unsigned long va;
1774 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
1775 || !(vector & INTR_INFO_VALID_MASK))
1776 __vmx_bug(&regs);
1777 vector &= 0xff;
1779 TRACE_VMEXIT(1,vector);
1780 perfc_incra(cause_vector, vector);
1782 TRACE_3D(TRC_VMX_VECTOR, v->domain->domain_id, eip, vector);
1783 switch (vector) {
1784 #ifdef XEN_DEBUGGER
1785 case TRAP_debug:
1787 save_cpu_user_regs(&regs);
1788 pdb_handle_exception(1, &regs, 1);
1789 restore_cpu_user_regs(&regs);
1790 break;
1792 case TRAP_int3:
1794 save_cpu_user_regs(&regs);
1795 pdb_handle_exception(3, &regs, 1);
1796 restore_cpu_user_regs(&regs);
1797 break;
1799 #else
1800 case TRAP_debug:
1802 void store_cpu_user_regs(struct cpu_user_regs *regs);
1803 long do_sched_op(unsigned long op);
1806 store_cpu_user_regs(&regs);
1807 __vm_clear_bit(GUEST_PENDING_DBG_EXCEPTIONS, PENDING_DEBUG_EXC_BS);
1809 domain_pause_for_debugger();
1810 do_sched_op(SCHEDOP_yield);
1812 break;
1814 #endif
1815 case TRAP_no_device:
1817 vmx_do_no_device_fault();
1818 break;
1820 case TRAP_page_fault:
1822 __vmread(EXIT_QUALIFICATION, &va);
1823 __vmread(VM_EXIT_INTR_ERROR_CODE, &regs.error_code);
1825 TRACE_VMEXIT(3,regs.error_code);
1826 TRACE_VMEXIT(4,va);
1828 VMX_DBG_LOG(DBG_LEVEL_VMMU,
1829 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1830 (unsigned long)regs.eax, (unsigned long)regs.ebx,
1831 (unsigned long)regs.ecx, (unsigned long)regs.edx,
1832 (unsigned long)regs.esi, (unsigned long)regs.edi);
1833 v->arch.arch_vmx.mmio_op.inst_decoder_regs = &regs;
1835 if (!(error = vmx_do_page_fault(va, &regs))) {
1836 /*
1837 * Inject #PG using Interruption-Information Fields
1838 */
1839 vmx_inject_exception(v, TRAP_page_fault, regs.error_code);
1840 v->arch.arch_vmx.cpu_cr2 = va;
1841 TRACE_3D(TRC_VMX_INT, v->domain->domain_id, TRAP_page_fault, va);
1843 break;
1845 case TRAP_nmi:
1846 do_nmi(&regs, 0);
1847 break;
1848 default:
1849 vmx_reflect_exception(v);
1850 break;
1852 break;
1854 case EXIT_REASON_EXTERNAL_INTERRUPT:
1855 vmx_vmexit_do_extint(&regs);
1856 break;
1857 case EXIT_REASON_PENDING_INTERRUPT:
1858 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
1859 MONITOR_CPU_BASED_EXEC_CONTROLS);
1860 break;
1861 case EXIT_REASON_TASK_SWITCH:
1862 __vmx_bug(&regs);
1863 break;
1864 case EXIT_REASON_CPUID:
1865 __get_instruction_length(inst_len);
1866 vmx_vmexit_do_cpuid(regs.eax, &regs);
1867 __update_guest_eip(inst_len);
1868 break;
1869 case EXIT_REASON_HLT:
1870 __get_instruction_length(inst_len);
1871 __update_guest_eip(inst_len);
1872 vmx_vmexit_do_hlt();
1873 break;
1874 case EXIT_REASON_INVLPG:
1876 unsigned long va;
1878 __vmread(EXIT_QUALIFICATION, &va);
1879 vmx_vmexit_do_invlpg(va);
1880 __get_instruction_length(inst_len);
1881 __update_guest_eip(inst_len);
1882 break;
1884 case EXIT_REASON_VMCALL:
1885 __get_instruction_length(inst_len);
1886 __vmread(GUEST_RIP, &eip);
1887 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1889 vmx_print_line(regs.eax, v); /* provides the current domain */
1890 __update_guest_eip(inst_len);
1891 break;
1892 case EXIT_REASON_CR_ACCESS:
1894 __vmread(GUEST_RIP, &eip);
1895 __get_instruction_length(inst_len);
1896 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1898 VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx",
1899 eip, inst_len, exit_qualification);
1900 if (vmx_cr_access(exit_qualification, &regs))
1901 __update_guest_eip(inst_len);
1902 TRACE_VMEXIT(3,regs.error_code);
1903 TRACE_VMEXIT(4,exit_qualification);
1904 break;
1906 case EXIT_REASON_DR_ACCESS:
1907 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1908 vmx_dr_access(exit_qualification, &regs);
1909 __get_instruction_length(inst_len);
1910 __update_guest_eip(inst_len);
1911 break;
1912 case EXIT_REASON_IO_INSTRUCTION:
1913 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1914 __get_instruction_length(inst_len);
1915 vmx_io_instruction(&regs, exit_qualification, inst_len);
1916 TRACE_VMEXIT(4,exit_qualification);
1917 break;
1918 case EXIT_REASON_MSR_READ:
1919 __get_instruction_length(inst_len);
1920 vmx_do_msr_read(&regs);
1921 __update_guest_eip(inst_len);
1922 break;
1923 case EXIT_REASON_MSR_WRITE:
1924 __vmread(GUEST_RIP, &eip);
1925 vmx_do_msr_write(&regs);
1926 __get_instruction_length(inst_len);
1927 __update_guest_eip(inst_len);
1928 break;
1929 case EXIT_REASON_MWAIT_INSTRUCTION:
1930 __vmx_bug(&regs);
1931 break;
1932 default:
1933 __vmx_bug(&regs); /* should not happen */
1937 asmlinkage void load_cr2(void)
1939 struct vcpu *v = current;
1941 local_irq_disable();
1942 #ifdef __i386__
1943 asm volatile("movl %0,%%cr2": :"r" (v->arch.arch_vmx.cpu_cr2));
1944 #else
1945 asm volatile("movq %0,%%cr2": :"r" (v->arch.arch_vmx.cpu_cr2));
1946 #endif
1949 asmlinkage void trace_vmentry (void)
1951 TRACE_5D(TRC_VMENTRY,trace_values[current->processor][0],
1952 trace_values[current->processor][1],trace_values[current->processor][2],
1953 trace_values[current->processor][3],trace_values[current->processor][4]);
1954 TRACE_VMEXIT(0,9);
1955 TRACE_VMEXIT(1,9);
1956 TRACE_VMEXIT(2,9);
1957 TRACE_VMEXIT(3,9);
1958 TRACE_VMEXIT(4,9);
1959 return;
1961 asmlinkage void trace_vmexit (void)
1963 TRACE_3D(TRC_VMEXIT,0,0,0);
1964 return;
1966 #endif /* CONFIG_VMX */
1968 /*
1969 * Local variables:
1970 * mode: C
1971 * c-set-style: "BSD"
1972 * c-basic-offset: 4
1973 * tab-width: 4
1974 * indent-tabs-mode: nil
1975 * End:
1976 */