ia64/xen-unstable

view xen/arch/x86/vmx.c @ 8396:fb1f2498ce1e

Check for existence of VLAPIC before relinquishing it,
or a vmx guest can die very early.

Signed-off-by: Xin Li <xin.b.li@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Dec 15 20:42:09 2005 +0100 (2005-12-15)
parents 37cafca1539e
children e7977d6aa6ba
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/domain_page.h>
28 #include <asm/current.h>
29 #include <asm/io.h>
30 #include <asm/shadow.h>
31 #include <asm/regs.h>
32 #include <asm/cpufeature.h>
33 #include <asm/processor.h>
34 #include <asm/types.h>
35 #include <asm/msr.h>
36 #include <asm/spinlock.h>
37 #include <asm/vmx.h>
38 #include <asm/vmx_vmcs.h>
39 #include <asm/vmx_intercept.h>
40 #include <asm/shadow.h>
41 #if CONFIG_PAGING_LEVELS >= 3
42 #include <asm/shadow_64.h>
43 #endif
44 #include <public/sched.h>
45 #include <public/io/ioreq.h>
46 #include <asm/vmx_vpic.h>
47 #include <asm/vmx_vlapic.h>
49 int hvm_enabled;
51 #ifdef CONFIG_VMX
52 unsigned int opt_vmx_debug_level = 0;
53 integer_param("vmx_debug", opt_vmx_debug_level);
55 static unsigned long trace_values[NR_CPUS][4];
56 #define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
58 static int vmx_switch_on;
60 void vmx_final_setup_guest(struct vcpu *v)
61 {
62 v->arch.schedule_tail = arch_vmx_do_launch;
64 if ( v->vcpu_id == 0 )
65 {
66 struct domain *d = v->domain;
67 struct vcpu *vc;
69 d->arch.vmx_platform.lapic_enable = v->arch.guest_context.user_regs.ecx;
70 v->arch.guest_context.user_regs.ecx = 0;
71 VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "lapic enable is %d.\n",
72 d->arch.vmx_platform.lapic_enable);
74 /* Initialize monitor page table */
75 for_each_vcpu(d, vc)
76 vc->arch.monitor_table = mk_pagetable(0);
78 /*
79 * Required to do this once per domain
80 * XXX todo: add a seperate function to do these.
81 */
82 memset(&d->shared_info->evtchn_mask[0], 0xff,
83 sizeof(d->shared_info->evtchn_mask));
85 /* Put the domain in shadow mode even though we're going to be using
86 * the shared 1:1 page table initially. It shouldn't hurt */
87 shadow_mode_enable(d,
88 SHM_enable|SHM_refcounts|
89 SHM_translate|SHM_external|SHM_wr_pt_pte);
90 }
92 vmx_switch_on = 1;
93 }
95 void vmx_relinquish_resources(struct vcpu *v)
96 {
97 struct vmx_virpit *vpit;
99 if ( !VMX_DOMAIN(v) )
100 return;
102 if (v->vcpu_id == 0) {
103 /* unmap IO shared page */
104 struct domain *d = v->domain;
105 unmap_domain_page((void *)d->arch.vmx_platform.shared_page_va);
106 }
108 destroy_vmcs(&v->arch.arch_vmx);
109 free_monitor_pagetable(v);
110 vpit = &v->domain->arch.vmx_platform.vmx_pit;
111 if ( active_ac_timer(&(vpit->pit_timer)) )
112 rem_ac_timer(&vpit->pit_timer);
113 if ( active_ac_timer(&v->arch.arch_vmx.hlt_timer) )
114 rem_ac_timer(&v->arch.arch_vmx.hlt_timer);
115 if ( vmx_apic_support(v->domain) && (VLAPIC(v) != NULL) )
116 {
117 rem_ac_timer(&VLAPIC(v)->vlapic_timer);
118 xfree(VLAPIC(v));
119 }
120 }
122 #ifdef __x86_64__
123 static struct msr_state percpu_msr[NR_CPUS];
125 static u32 msr_data_index[VMX_MSR_COUNT] =
126 {
127 MSR_LSTAR, MSR_STAR, MSR_CSTAR,
128 MSR_SYSCALL_MASK, MSR_EFER,
129 };
131 /*
132 * To avoid MSR save/restore at every VM exit/entry time, we restore
133 * the x86_64 specific MSRs at domain switch time. Since those MSRs are
134 * are not modified once set for generic domains, we don't save them,
135 * but simply reset them to the values set at percpu_traps_init().
136 */
137 void vmx_load_msrs(struct vcpu *n)
138 {
139 struct msr_state *host_state = &percpu_msr[smp_processor_id()];
140 int i;
142 if ( !vmx_switch_on )
143 return;
145 while ( host_state->flags )
146 {
147 i = find_first_set_bit(host_state->flags);
148 wrmsrl(msr_data_index[i], host_state->msr_items[i]);
149 clear_bit(i, &host_state->flags);
150 }
151 }
153 static void vmx_save_init_msrs(void)
154 {
155 struct msr_state *host_state = &percpu_msr[smp_processor_id()];
156 int i;
158 for ( i = 0; i < VMX_MSR_COUNT; i++ )
159 rdmsrl(msr_data_index[i], host_state->msr_items[i]);
160 }
162 #define CASE_READ_MSR(address) \
163 case MSR_ ## address: \
164 msr_content = msr->msr_items[VMX_INDEX_MSR_ ## address]; \
165 break
167 #define CASE_WRITE_MSR(address) \
168 case MSR_ ## address: \
169 { \
170 msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content; \
171 if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)) { \
172 set_bit(VMX_INDEX_MSR_ ## address, &msr->flags); \
173 } \
174 wrmsrl(MSR_ ## address, msr_content); \
175 set_bit(VMX_INDEX_MSR_ ## address, &host_state->flags); \
176 } \
177 break
179 #define IS_CANO_ADDRESS(add) 1
180 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
181 {
182 u64 msr_content = 0;
183 struct vcpu *vc = current;
184 struct msr_state * msr = &vc->arch.arch_vmx.msr_content;
185 switch(regs->ecx){
186 case MSR_EFER:
187 msr_content = msr->msr_items[VMX_INDEX_MSR_EFER];
188 VMX_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %llx\n", (unsigned long long)msr_content);
189 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
190 &vc->arch.arch_vmx.cpu_state))
191 msr_content |= 1 << _EFER_LME;
193 if (VMX_LONG_GUEST(vc))
194 msr_content |= 1 << _EFER_LMA;
195 break;
196 case MSR_FS_BASE:
197 if (!(VMX_LONG_GUEST(vc)))
198 /* XXX should it be GP fault */
199 domain_crash_synchronous();
200 __vmread(GUEST_FS_BASE, &msr_content);
201 break;
202 case MSR_GS_BASE:
203 if (!(VMX_LONG_GUEST(vc)))
204 domain_crash_synchronous();
205 __vmread(GUEST_GS_BASE, &msr_content);
206 break;
207 case MSR_SHADOW_GS_BASE:
208 msr_content = msr->shadow_gs;
209 break;
211 CASE_READ_MSR(STAR);
212 CASE_READ_MSR(LSTAR);
213 CASE_READ_MSR(CSTAR);
214 CASE_READ_MSR(SYSCALL_MASK);
215 default:
216 return 0;
217 }
218 VMX_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %lx\n", msr_content);
219 regs->eax = msr_content & 0xffffffff;
220 regs->edx = msr_content >> 32;
221 return 1;
222 }
224 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
225 {
226 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
227 struct vcpu *vc = current;
228 struct msr_state * msr = &vc->arch.arch_vmx.msr_content;
229 struct msr_state * host_state =
230 &percpu_msr[smp_processor_id()];
232 VMX_DBG_LOG(DBG_LEVEL_1, " mode_do_msr_write msr %lx msr_content %lx\n",
233 regs->ecx, msr_content);
235 switch (regs->ecx){
236 case MSR_EFER:
237 if ((msr_content & EFER_LME) ^
238 test_bit(VMX_CPU_STATE_LME_ENABLED,
239 &vc->arch.arch_vmx.cpu_state)){
240 if (test_bit(VMX_CPU_STATE_PG_ENABLED,
241 &vc->arch.arch_vmx.cpu_state) ||
242 !test_bit(VMX_CPU_STATE_PAE_ENABLED,
243 &vc->arch.arch_vmx.cpu_state)){
244 vmx_inject_exception(vc, TRAP_gp_fault, 0);
245 }
246 }
247 if (msr_content & EFER_LME)
248 set_bit(VMX_CPU_STATE_LME_ENABLED,
249 &vc->arch.arch_vmx.cpu_state);
250 /* No update for LME/LMA since it have no effect */
251 msr->msr_items[VMX_INDEX_MSR_EFER] =
252 msr_content;
253 if (msr_content & ~(EFER_LME | EFER_LMA)){
254 msr->msr_items[VMX_INDEX_MSR_EFER] = msr_content;
255 if (!test_bit(VMX_INDEX_MSR_EFER, &msr->flags)){
256 rdmsrl(MSR_EFER,
257 host_state->msr_items[VMX_INDEX_MSR_EFER]);
258 set_bit(VMX_INDEX_MSR_EFER, &host_state->flags);
259 set_bit(VMX_INDEX_MSR_EFER, &msr->flags);
260 wrmsrl(MSR_EFER, msr_content);
261 }
262 }
263 break;
265 case MSR_FS_BASE:
266 case MSR_GS_BASE:
267 if (!(VMX_LONG_GUEST(vc)))
268 domain_crash_synchronous();
269 if (!IS_CANO_ADDRESS(msr_content)){
270 VMX_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
271 vmx_inject_exception(vc, TRAP_gp_fault, 0);
272 }
273 if (regs->ecx == MSR_FS_BASE)
274 __vmwrite(GUEST_FS_BASE, msr_content);
275 else
276 __vmwrite(GUEST_GS_BASE, msr_content);
277 break;
279 case MSR_SHADOW_GS_BASE:
280 if (!(VMX_LONG_GUEST(vc)))
281 domain_crash_synchronous();
282 vc->arch.arch_vmx.msr_content.shadow_gs = msr_content;
283 wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
284 break;
286 CASE_WRITE_MSR(STAR);
287 CASE_WRITE_MSR(LSTAR);
288 CASE_WRITE_MSR(CSTAR);
289 CASE_WRITE_MSR(SYSCALL_MASK);
290 default:
291 return 0;
292 }
293 return 1;
294 }
296 void
297 vmx_restore_msrs(struct vcpu *v)
298 {
299 int i = 0;
300 struct msr_state *guest_state;
301 struct msr_state *host_state;
302 unsigned long guest_flags ;
304 guest_state = &v->arch.arch_vmx.msr_content;;
305 host_state = &percpu_msr[smp_processor_id()];
307 wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs);
308 guest_flags = guest_state->flags;
309 if (!guest_flags)
310 return;
312 while (guest_flags){
313 i = find_first_set_bit(guest_flags);
315 VMX_DBG_LOG(DBG_LEVEL_2,
316 "restore guest's index %d msr %lx with %lx\n",
317 i, (unsigned long) msr_data_index[i], (unsigned long) guest_state->msr_items[i]);
318 set_bit(i, &host_state->flags);
319 wrmsrl(msr_data_index[i], guest_state->msr_items[i]);
320 clear_bit(i, &guest_flags);
321 }
322 }
324 #else /* __i386__ */
325 #define vmx_save_init_msrs() ((void)0)
327 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs){
328 return 0;
329 }
330 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs){
331 return 0;
332 }
333 #endif
335 extern long evtchn_send(int lport);
336 extern long do_block(void);
337 void do_nmi(struct cpu_user_regs *, unsigned long);
339 static int check_vmx_controls(ctrls, msr)
340 {
341 u32 vmx_msr_low, vmx_msr_high;
343 rdmsr(msr, vmx_msr_low, vmx_msr_high);
344 if (ctrls < vmx_msr_low || ctrls > vmx_msr_high) {
345 printk("Insufficient VMX capability 0x%x, "
346 "msr=0x%x,low=0x%8x,high=0x%x\n",
347 ctrls, msr, vmx_msr_low, vmx_msr_high);
348 return 0;
349 }
350 return 1;
351 }
353 int start_vmx(void)
354 {
355 struct vmcs_struct *vmcs;
356 u32 ecx;
357 u32 eax, edx;
358 u64 phys_vmcs; /* debugging */
360 /*
361 * Xen does not fill x86_capability words except 0.
362 */
363 ecx = cpuid_ecx(1);
364 boot_cpu_data.x86_capability[4] = ecx;
366 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
367 return 0;
369 rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
371 if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) {
372 if ((eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0) {
373 printk("VMX disabled by Feature Control MSR.\n");
374 return 0;
375 }
376 }
377 else {
378 wrmsr(IA32_FEATURE_CONTROL_MSR,
379 IA32_FEATURE_CONTROL_MSR_LOCK |
380 IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
381 }
383 if (!check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS,
384 MSR_IA32_VMX_PINBASED_CTLS_MSR))
385 return 0;
386 if (!check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS,
387 MSR_IA32_VMX_PROCBASED_CTLS_MSR))
388 return 0;
389 if (!check_vmx_controls(MONITOR_VM_EXIT_CONTROLS,
390 MSR_IA32_VMX_EXIT_CTLS_MSR))
391 return 0;
392 if (!check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS,
393 MSR_IA32_VMX_ENTRY_CTLS_MSR))
394 return 0;
396 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
398 if (!(vmcs = alloc_vmcs())) {
399 printk("Failed to allocate VMCS\n");
400 return 0;
401 }
403 phys_vmcs = (u64) virt_to_phys(vmcs);
405 if (!(__vmxon(phys_vmcs))) {
406 printk("VMXON is done\n");
407 }
409 vmx_save_init_msrs();
411 hvm_enabled = 1;
413 return 1;
414 }
416 void stop_vmx(void)
417 {
418 if (read_cr4() & X86_CR4_VMXE)
419 __vmxoff();
420 }
422 /*
423 * Not all cases receive valid value in the VM-exit instruction length field.
424 */
425 #define __get_instruction_length(len) \
426 __vmread(VM_EXIT_INSTRUCTION_LEN, &(len)); \
427 if ((len) < 1 || (len) > 15) \
428 __vmx_bug(&regs);
430 static void inline __update_guest_eip(unsigned long inst_len)
431 {
432 unsigned long current_eip;
434 __vmread(GUEST_RIP, &current_eip);
435 __vmwrite(GUEST_RIP, current_eip + inst_len);
436 }
439 static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
440 {
441 unsigned long gpa; /* FIXME: PAE */
442 int result;
444 #if 0 /* keep for debugging */
445 {
446 unsigned long eip;
448 __vmread(GUEST_RIP, &eip);
449 VMX_DBG_LOG(DBG_LEVEL_VMMU,
450 "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
451 va, eip, (unsigned long)regs->error_code);
452 }
453 #endif
455 if (!vmx_paging_enabled(current)){
456 handle_mmio(va, va);
457 TRACE_VMEXIT (2,2);
458 return 1;
459 }
460 gpa = gva_to_gpa(va);
462 /* Use 1:1 page table to identify MMIO address space */
463 if ( mmio_space(gpa) ){
464 struct vcpu *v = current;
465 /* No support for APIC */
466 if (!vmx_apic_support(v->domain) && gpa >= 0xFEC00000) {
467 u32 inst_len;
468 __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len));
469 __update_guest_eip(inst_len);
470 return 1;
471 }
472 TRACE_VMEXIT (2,2);
473 handle_mmio(va, gpa);
474 return 1;
475 }
477 result = shadow_fault(va, regs);
478 TRACE_VMEXIT (2,result);
479 #if 0
480 if ( !result )
481 {
482 __vmread(GUEST_RIP, &eip);
483 printk("vmx pgfault to guest va=%lx eip=%lx\n", va, eip);
484 }
485 #endif
487 return result;
488 }
490 static void vmx_do_no_device_fault(void)
491 {
492 unsigned long cr0;
493 struct vcpu *v = current;
495 clts();
496 setup_fpu(current);
497 __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
498 if (!(cr0 & X86_CR0_TS)) {
499 __vmread_vcpu(v, GUEST_CR0, &cr0);
500 cr0 &= ~X86_CR0_TS;
501 __vmwrite(GUEST_CR0, cr0);
502 }
503 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
504 }
506 /* Reserved bits: [31:15], [12:11], [9], [6], [2:1] */
507 #define VMX_VCPU_CPUID_L1_RESERVED 0xffff9a46
509 static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs *regs)
510 {
511 unsigned int eax, ebx, ecx, edx;
512 unsigned long eip;
513 struct vcpu *v = current;
515 __vmread(GUEST_RIP, &eip);
517 VMX_DBG_LOG(DBG_LEVEL_1,
518 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
519 " (esi) %lx, (edi) %lx",
520 (unsigned long)regs->eax, (unsigned long)regs->ebx,
521 (unsigned long)regs->ecx, (unsigned long)regs->edx,
522 (unsigned long)regs->esi, (unsigned long)regs->edi);
524 cpuid(input, &eax, &ebx, &ecx, &edx);
526 if ( input == 1 )
527 {
528 if ( vmx_apic_support(v->domain) &&
529 !vlapic_global_enabled((VLAPIC(v))) )
530 clear_bit(X86_FEATURE_APIC, &edx);
532 #ifdef __x86_64__
533 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
534 #endif
535 {
536 clear_bit(X86_FEATURE_PSE, &edx);
537 clear_bit(X86_FEATURE_PAE, &edx);
538 clear_bit(X86_FEATURE_PSE36, &edx);
539 }
541 /* Unsupportable for virtualised CPUs. */
542 ecx &= ~VMX_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */
543 clear_bit(X86_FEATURE_VMXE & 31, &ecx);
544 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
545 }
547 regs->eax = (unsigned long) eax;
548 regs->ebx = (unsigned long) ebx;
549 regs->ecx = (unsigned long) ecx;
550 regs->edx = (unsigned long) edx;
552 VMX_DBG_LOG(DBG_LEVEL_1,
553 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x",
554 eip, input, eax, ebx, ecx, edx);
556 }
558 #define CASE_GET_REG_P(REG, reg) \
559 case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
561 static void vmx_dr_access (unsigned long exit_qualification, struct cpu_user_regs *regs)
562 {
563 unsigned int reg;
564 unsigned long *reg_p = 0;
565 struct vcpu *v = current;
566 unsigned long eip;
568 __vmread(GUEST_RIP, &eip);
570 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
572 VMX_DBG_LOG(DBG_LEVEL_1,
573 "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx",
574 eip, reg, exit_qualification);
576 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
577 CASE_GET_REG_P(EAX, eax);
578 CASE_GET_REG_P(ECX, ecx);
579 CASE_GET_REG_P(EDX, edx);
580 CASE_GET_REG_P(EBX, ebx);
581 CASE_GET_REG_P(EBP, ebp);
582 CASE_GET_REG_P(ESI, esi);
583 CASE_GET_REG_P(EDI, edi);
584 case REG_ESP:
585 break;
586 default:
587 __vmx_bug(regs);
588 }
590 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
591 case TYPE_MOV_TO_DR:
592 /* don't need to check the range */
593 if (reg != REG_ESP)
594 v->arch.guest_context.debugreg[reg] = *reg_p;
595 else {
596 unsigned long value;
597 __vmread(GUEST_RSP, &value);
598 v->arch.guest_context.debugreg[reg] = value;
599 }
600 break;
601 case TYPE_MOV_FROM_DR:
602 if (reg != REG_ESP)
603 *reg_p = v->arch.guest_context.debugreg[reg];
604 else {
605 __vmwrite(GUEST_RSP, v->arch.guest_context.debugreg[reg]);
606 }
607 break;
608 }
609 }
611 /*
612 * Invalidate the TLB for va. Invalidate the shadow page corresponding
613 * the address va.
614 */
615 static void vmx_vmexit_do_invlpg(unsigned long va)
616 {
617 unsigned long eip;
618 struct vcpu *v = current;
620 __vmread(GUEST_RIP, &eip);
622 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg: eip=%lx, va=%lx",
623 eip, va);
625 /*
626 * We do the safest things first, then try to update the shadow
627 * copying from guest
628 */
629 shadow_invlpg(v, va);
630 }
632 static int check_for_null_selector(unsigned long eip)
633 {
634 unsigned char inst[MAX_INST_LEN];
635 unsigned long sel;
636 int i, inst_len;
637 int inst_copy_from_guest(unsigned char *, unsigned long, int);
639 __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
640 memset(inst, 0, MAX_INST_LEN);
641 if (inst_copy_from_guest(inst, eip, inst_len) != inst_len) {
642 printf("check_for_null_selector: get guest instruction failed\n");
643 domain_crash_synchronous();
644 }
646 for (i = 0; i < inst_len; i++) {
647 switch (inst[i]) {
648 case 0xf3: /* REPZ */
649 case 0xf2: /* REPNZ */
650 case 0xf0: /* LOCK */
651 case 0x66: /* data32 */
652 case 0x67: /* addr32 */
653 continue;
654 case 0x2e: /* CS */
655 __vmread(GUEST_CS_SELECTOR, &sel);
656 break;
657 case 0x36: /* SS */
658 __vmread(GUEST_SS_SELECTOR, &sel);
659 break;
660 case 0x26: /* ES */
661 __vmread(GUEST_ES_SELECTOR, &sel);
662 break;
663 case 0x64: /* FS */
664 __vmread(GUEST_FS_SELECTOR, &sel);
665 break;
666 case 0x65: /* GS */
667 __vmread(GUEST_GS_SELECTOR, &sel);
668 break;
669 case 0x3e: /* DS */
670 /* FALLTHROUGH */
671 default:
672 /* DS is the default */
673 __vmread(GUEST_DS_SELECTOR, &sel);
674 }
675 return sel == 0 ? 1 : 0;
676 }
678 return 0;
679 }
681 void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
682 unsigned long count, int size, long value, int dir, int pvalid)
683 {
684 struct vcpu *v = current;
685 vcpu_iodata_t *vio;
686 ioreq_t *p;
688 vio = get_vio(v->domain, v->vcpu_id);
689 if (vio == NULL) {
690 printk("bad shared page: %lx\n", (unsigned long) vio);
691 domain_crash_synchronous();
692 }
694 if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
695 printf("VMX I/O has not yet completed\n");
696 domain_crash_synchronous();
697 }
698 set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
700 p = &vio->vp_ioreq;
701 p->dir = dir;
702 p->pdata_valid = pvalid;
704 p->type = IOREQ_TYPE_PIO;
705 p->size = size;
706 p->addr = port;
707 p->count = count;
708 p->df = regs->eflags & EF_DF ? 1 : 0;
710 if (pvalid) {
711 if (vmx_paging_enabled(current))
712 p->u.pdata = (void *) gva_to_gpa(value);
713 else
714 p->u.pdata = (void *) value; /* guest VA == guest PA */
715 } else
716 p->u.data = value;
718 if (vmx_portio_intercept(p)) {
719 p->state = STATE_IORESP_READY;
720 vmx_io_assist(v);
721 return;
722 }
724 p->state = STATE_IOREQ_READY;
726 evtchn_send(iopacket_port(v->domain));
727 vmx_wait_io();
728 }
730 static void vmx_io_instruction(struct cpu_user_regs *regs,
731 unsigned long exit_qualification, unsigned long inst_len)
732 {
733 struct mmio_op *mmio_opp;
734 unsigned long eip, cs, eflags;
735 unsigned long port, size, dir;
736 int vm86;
738 mmio_opp = &current->arch.arch_vmx.mmio_op;
739 mmio_opp->instr = INSTR_PIO;
740 mmio_opp->flags = 0;
742 __vmread(GUEST_RIP, &eip);
743 __vmread(GUEST_CS_SELECTOR, &cs);
744 __vmread(GUEST_RFLAGS, &eflags);
745 vm86 = eflags & X86_EFLAGS_VM ? 1 : 0;
747 VMX_DBG_LOG(DBG_LEVEL_1,
748 "vmx_io_instruction: vm86 %d, eip=%lx:%lx, "
749 "exit_qualification = %lx",
750 vm86, cs, eip, exit_qualification);
752 if (test_bit(6, &exit_qualification))
753 port = (exit_qualification >> 16) & 0xFFFF;
754 else
755 port = regs->edx & 0xffff;
756 TRACE_VMEXIT(2, port);
757 size = (exit_qualification & 7) + 1;
758 dir = test_bit(3, &exit_qualification); /* direction */
760 if (test_bit(4, &exit_qualification)) { /* string instruction */
761 unsigned long addr, count = 1;
762 int sign = regs->eflags & EF_DF ? -1 : 1;
764 __vmread(GUEST_LINEAR_ADDRESS, &addr);
766 /*
767 * In protected mode, guest linear address is invalid if the
768 * selector is null.
769 */
770 if (!vm86 && check_for_null_selector(eip))
771 addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
773 if (test_bit(5, &exit_qualification)) { /* "rep" prefix */
774 mmio_opp->flags |= REPZ;
775 count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
776 }
778 /*
779 * Handle string pio instructions that cross pages or that
780 * are unaligned. See the comments in vmx_platform.c/handle_mmio()
781 */
782 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
783 unsigned long value = 0;
785 mmio_opp->flags |= OVERLAP;
786 if (dir == IOREQ_WRITE)
787 vmx_copy(&value, addr, size, VMX_COPY_IN);
788 send_pio_req(regs, port, 1, size, value, dir, 0);
789 } else {
790 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) {
791 if (sign > 0)
792 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
793 else
794 count = (addr & ~PAGE_MASK) / size;
795 } else
796 __update_guest_eip(inst_len);
798 send_pio_req(regs, port, count, size, addr, dir, 1);
799 }
800 } else {
801 __update_guest_eip(inst_len);
802 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
803 }
804 }
806 int
807 vmx_copy(void *buf, unsigned long laddr, int size, int dir)
808 {
809 unsigned long gpa, mfn;
810 char *addr;
811 int count;
813 while (size > 0) {
814 count = PAGE_SIZE - (laddr & ~PAGE_MASK);
815 if (count > size)
816 count = size;
818 if (vmx_paging_enabled(current)) {
819 gpa = gva_to_gpa(laddr);
820 mfn = get_mfn_from_pfn(gpa >> PAGE_SHIFT);
821 } else
822 mfn = get_mfn_from_pfn(laddr >> PAGE_SHIFT);
823 if (mfn == INVALID_MFN)
824 return 0;
826 addr = (char *)map_domain_page(mfn) + (laddr & ~PAGE_MASK);
828 if (dir == VMX_COPY_IN)
829 memcpy(buf, addr, count);
830 else
831 memcpy(addr, buf, count);
833 unmap_domain_page(addr);
835 laddr += count;
836 buf += count;
837 size -= count;
838 }
840 return 1;
841 }
843 int
844 vmx_world_save(struct vcpu *v, struct vmx_assist_context *c)
845 {
846 unsigned long inst_len;
847 int error = 0;
849 error |= __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
850 error |= __vmread(GUEST_RIP, &c->eip);
851 c->eip += inst_len; /* skip transition instruction */
852 error |= __vmread(GUEST_RSP, &c->esp);
853 error |= __vmread(GUEST_RFLAGS, &c->eflags);
855 error |= __vmread(CR0_READ_SHADOW, &c->cr0);
856 c->cr3 = v->arch.arch_vmx.cpu_cr3;
857 error |= __vmread(CR4_READ_SHADOW, &c->cr4);
859 error |= __vmread(GUEST_IDTR_LIMIT, &c->idtr_limit);
860 error |= __vmread(GUEST_IDTR_BASE, &c->idtr_base);
862 error |= __vmread(GUEST_GDTR_LIMIT, &c->gdtr_limit);
863 error |= __vmread(GUEST_GDTR_BASE, &c->gdtr_base);
865 error |= __vmread(GUEST_CS_SELECTOR, &c->cs_sel);
866 error |= __vmread(GUEST_CS_LIMIT, &c->cs_limit);
867 error |= __vmread(GUEST_CS_BASE, &c->cs_base);
868 error |= __vmread(GUEST_CS_AR_BYTES, &c->cs_arbytes.bytes);
870 error |= __vmread(GUEST_DS_SELECTOR, &c->ds_sel);
871 error |= __vmread(GUEST_DS_LIMIT, &c->ds_limit);
872 error |= __vmread(GUEST_DS_BASE, &c->ds_base);
873 error |= __vmread(GUEST_DS_AR_BYTES, &c->ds_arbytes.bytes);
875 error |= __vmread(GUEST_ES_SELECTOR, &c->es_sel);
876 error |= __vmread(GUEST_ES_LIMIT, &c->es_limit);
877 error |= __vmread(GUEST_ES_BASE, &c->es_base);
878 error |= __vmread(GUEST_ES_AR_BYTES, &c->es_arbytes.bytes);
880 error |= __vmread(GUEST_SS_SELECTOR, &c->ss_sel);
881 error |= __vmread(GUEST_SS_LIMIT, &c->ss_limit);
882 error |= __vmread(GUEST_SS_BASE, &c->ss_base);
883 error |= __vmread(GUEST_SS_AR_BYTES, &c->ss_arbytes.bytes);
885 error |= __vmread(GUEST_FS_SELECTOR, &c->fs_sel);
886 error |= __vmread(GUEST_FS_LIMIT, &c->fs_limit);
887 error |= __vmread(GUEST_FS_BASE, &c->fs_base);
888 error |= __vmread(GUEST_FS_AR_BYTES, &c->fs_arbytes.bytes);
890 error |= __vmread(GUEST_GS_SELECTOR, &c->gs_sel);
891 error |= __vmread(GUEST_GS_LIMIT, &c->gs_limit);
892 error |= __vmread(GUEST_GS_BASE, &c->gs_base);
893 error |= __vmread(GUEST_GS_AR_BYTES, &c->gs_arbytes.bytes);
895 error |= __vmread(GUEST_TR_SELECTOR, &c->tr_sel);
896 error |= __vmread(GUEST_TR_LIMIT, &c->tr_limit);
897 error |= __vmread(GUEST_TR_BASE, &c->tr_base);
898 error |= __vmread(GUEST_TR_AR_BYTES, &c->tr_arbytes.bytes);
900 error |= __vmread(GUEST_LDTR_SELECTOR, &c->ldtr_sel);
901 error |= __vmread(GUEST_LDTR_LIMIT, &c->ldtr_limit);
902 error |= __vmread(GUEST_LDTR_BASE, &c->ldtr_base);
903 error |= __vmread(GUEST_LDTR_AR_BYTES, &c->ldtr_arbytes.bytes);
905 return !error;
906 }
908 int
909 vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
910 {
911 unsigned long mfn, old_cr4, old_base_mfn;
912 int error = 0;
914 error |= __vmwrite(GUEST_RIP, c->eip);
915 error |= __vmwrite(GUEST_RSP, c->esp);
916 error |= __vmwrite(GUEST_RFLAGS, c->eflags);
918 error |= __vmwrite(CR0_READ_SHADOW, c->cr0);
920 if (!vmx_paging_enabled(v)) {
921 VMX_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
922 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
923 goto skip_cr3;
924 }
926 if (c->cr3 == v->arch.arch_vmx.cpu_cr3) {
927 /*
928 * This is simple TLB flush, implying the guest has
929 * removed some translation or changed page attributes.
930 * We simply invalidate the shadow.
931 */
932 mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
933 if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
934 printk("Invalid CR3 value=%x", c->cr3);
935 domain_crash_synchronous();
936 return 0;
937 }
938 shadow_sync_all(v->domain);
939 } else {
940 /*
941 * If different, make a shadow. Check if the PDBR is valid
942 * first.
943 */
944 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3);
945 if ((c->cr3 >> PAGE_SHIFT) > v->domain->max_pages) {
946 printk("Invalid CR3 value=%x", c->cr3);
947 domain_crash_synchronous();
948 return 0;
949 }
950 mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
951 if(!get_page(pfn_to_page(mfn), v->domain))
952 return 0;
953 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
954 v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
955 if (old_base_mfn)
956 put_page(pfn_to_page(old_base_mfn));
957 update_pagetables(v);
958 /*
959 * arch.shadow_table should now hold the next CR3 for shadow
960 */
961 v->arch.arch_vmx.cpu_cr3 = c->cr3;
962 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
963 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
964 }
966 skip_cr3:
968 error |= __vmread(CR4_READ_SHADOW, &old_cr4);
969 error |= __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
970 error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
972 error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
973 error |= __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
975 error |= __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
976 error |= __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
978 error |= __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
979 error |= __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
980 error |= __vmwrite(GUEST_CS_BASE, c->cs_base);
981 error |= __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes.bytes);
983 error |= __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
984 error |= __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
985 error |= __vmwrite(GUEST_DS_BASE, c->ds_base);
986 error |= __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes.bytes);
988 error |= __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
989 error |= __vmwrite(GUEST_ES_LIMIT, c->es_limit);
990 error |= __vmwrite(GUEST_ES_BASE, c->es_base);
991 error |= __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes.bytes);
993 error |= __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
994 error |= __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
995 error |= __vmwrite(GUEST_SS_BASE, c->ss_base);
996 error |= __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes.bytes);
998 error |= __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
999 error |= __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
1000 error |= __vmwrite(GUEST_FS_BASE, c->fs_base);
1001 error |= __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes.bytes);
1003 error |= __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
1004 error |= __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
1005 error |= __vmwrite(GUEST_GS_BASE, c->gs_base);
1006 error |= __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes.bytes);
1008 error |= __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
1009 error |= __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
1010 error |= __vmwrite(GUEST_TR_BASE, c->tr_base);
1011 error |= __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes.bytes);
1013 error |= __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
1014 error |= __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
1015 error |= __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
1016 error |= __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes);
1018 return !error;
1021 enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE };
1023 int
1024 vmx_assist(struct vcpu *v, int mode)
1026 struct vmx_assist_context c;
1027 u32 magic;
1028 u32 cp;
1030 /* make sure vmxassist exists (this is not an error) */
1031 if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), VMX_COPY_IN))
1032 return 0;
1033 if (magic != VMXASSIST_MAGIC)
1034 return 0;
1036 switch (mode) {
1037 /*
1038 * Transfer control to vmxassist.
1039 * Store the current context in VMXASSIST_OLD_CONTEXT and load
1040 * the new VMXASSIST_NEW_CONTEXT context. This context was created
1041 * by vmxassist and will transfer control to it.
1042 */
1043 case VMX_ASSIST_INVOKE:
1044 /* save the old context */
1045 if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN))
1046 goto error;
1047 if (cp != 0) {
1048 if (!vmx_world_save(v, &c))
1049 goto error;
1050 if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_OUT))
1051 goto error;
1054 /* restore the new context, this should activate vmxassist */
1055 if (!vmx_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), VMX_COPY_IN))
1056 goto error;
1057 if (cp != 0) {
1058 if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN))
1059 goto error;
1060 if (!vmx_world_restore(v, &c))
1061 goto error;
1062 return 1;
1064 break;
1066 /*
1067 * Restore the VMXASSIST_OLD_CONTEXT that was saved by VMX_ASSIST_INVOKE
1068 * above.
1069 */
1070 case VMX_ASSIST_RESTORE:
1071 /* save the old context */
1072 if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN))
1073 goto error;
1074 if (cp != 0) {
1075 if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN))
1076 goto error;
1077 if (!vmx_world_restore(v, &c))
1078 goto error;
1079 return 1;
1081 break;
1084 error:
1085 printf("Failed to transfer to vmxassist\n");
1086 domain_crash_synchronous();
1087 return 0;
1090 static int vmx_set_cr0(unsigned long value)
1092 struct vcpu *v = current;
1093 unsigned long mfn;
1094 unsigned long eip;
1095 int paging_enabled;
1096 unsigned long vm_entry_value;
1097 unsigned long old_cr0;
1099 /*
1100 * CR0: We don't want to lose PE and PG.
1101 */
1102 __vmread_vcpu(v, CR0_READ_SHADOW, &old_cr0);
1103 paging_enabled = (old_cr0 & X86_CR0_PE) && (old_cr0 & X86_CR0_PG);
1104 /* If OS don't use clts to clear TS bit...*/
1105 if((old_cr0 & X86_CR0_TS) && !(value & X86_CR0_TS))
1107 clts();
1108 setup_fpu(v);
1112 __vmwrite(GUEST_CR0, value | X86_CR0_PE | X86_CR0_PG | X86_CR0_NE);
1113 __vmwrite(CR0_READ_SHADOW, value);
1115 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1117 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) {
1118 /*
1119 * The guest CR3 must be pointing to the guest physical.
1120 */
1121 if ( !VALID_MFN(mfn = get_mfn_from_pfn(
1122 v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
1123 !get_page(pfn_to_page(mfn), v->domain) )
1125 printk("Invalid CR3 value = %lx", v->arch.arch_vmx.cpu_cr3);
1126 domain_crash_synchronous(); /* need to take a clean path */
1129 #if defined(__x86_64__)
1130 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
1131 &v->arch.arch_vmx.cpu_state) &&
1132 !test_bit(VMX_CPU_STATE_PAE_ENABLED,
1133 &v->arch.arch_vmx.cpu_state)){
1134 VMX_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1135 vmx_inject_exception(v, TRAP_gp_fault, 0);
1137 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
1138 &v->arch.arch_vmx.cpu_state)){
1139 /* Here the PAE is should to be opened */
1140 VMX_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1141 set_bit(VMX_CPU_STATE_LMA_ENABLED,
1142 &v->arch.arch_vmx.cpu_state);
1143 __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
1144 vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE;
1145 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
1147 #if CONFIG_PAGING_LEVELS >= 4
1148 if(!shadow_set_guest_paging_levels(v->domain, 4)) {
1149 printk("Unsupported guest paging levels\n");
1150 domain_crash_synchronous(); /* need to take a clean path */
1152 #endif
1154 else
1156 #if CONFIG_PAGING_LEVELS >= 4
1157 if(!shadow_set_guest_paging_levels(v->domain, 2)) {
1158 printk("Unsupported guest paging levels\n");
1159 domain_crash_synchronous(); /* need to take a clean path */
1161 #endif
1165 unsigned long crn;
1166 /* update CR4's PAE if needed */
1167 __vmread(GUEST_CR4, &crn);
1168 if ( (!(crn & X86_CR4_PAE)) &&
1169 test_bit(VMX_CPU_STATE_PAE_ENABLED,
1170 &v->arch.arch_vmx.cpu_state) )
1172 VMX_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n");
1173 __vmwrite(GUEST_CR4, crn | X86_CR4_PAE);
1176 #endif
1177 /*
1178 * Now arch.guest_table points to machine physical.
1179 */
1180 v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
1181 update_pagetables(v);
1183 VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1184 (unsigned long) (mfn << PAGE_SHIFT));
1186 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1187 /*
1188 * arch->shadow_table should hold the next CR3 for shadow
1189 */
1190 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1191 v->arch.arch_vmx.cpu_cr3, mfn);
1194 if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled)
1195 if(v->arch.arch_vmx.cpu_cr3){
1196 put_page(pfn_to_page(get_mfn_from_pfn(
1197 v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)));
1198 v->arch.guest_table = mk_pagetable(0);
1201 /*
1202 * VMX does not implement real-mode virtualization. We emulate
1203 * real-mode by performing a world switch to VMXAssist whenever
1204 * a partition disables the CR0.PE bit.
1205 */
1206 if ((value & X86_CR0_PE) == 0) {
1207 if ( value & X86_CR0_PG ) {
1208 /* inject GP here */
1209 vmx_inject_exception(v, TRAP_gp_fault, 0);
1210 return 0;
1211 } else {
1212 /*
1213 * Disable paging here.
1214 * Same to PE == 1 && PG == 0
1215 */
1216 if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
1217 &v->arch.arch_vmx.cpu_state)){
1218 clear_bit(VMX_CPU_STATE_LMA_ENABLED,
1219 &v->arch.arch_vmx.cpu_state);
1220 __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
1221 vm_entry_value &= ~VM_ENTRY_CONTROLS_IA32E_MODE;
1222 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
1226 clear_all_shadow_status(v->domain);
1227 if (vmx_assist(v, VMX_ASSIST_INVOKE)) {
1228 set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.arch_vmx.cpu_state);
1229 __vmread(GUEST_RIP, &eip);
1230 VMX_DBG_LOG(DBG_LEVEL_1,
1231 "Transfering control to vmxassist %%eip 0x%lx\n", eip);
1232 return 0; /* do not update eip! */
1234 } else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
1235 &v->arch.arch_vmx.cpu_state)) {
1236 __vmread(GUEST_RIP, &eip);
1237 VMX_DBG_LOG(DBG_LEVEL_1,
1238 "Enabling CR0.PE at %%eip 0x%lx\n", eip);
1239 if (vmx_assist(v, VMX_ASSIST_RESTORE)) {
1240 clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
1241 &v->arch.arch_vmx.cpu_state);
1242 __vmread(GUEST_RIP, &eip);
1243 VMX_DBG_LOG(DBG_LEVEL_1,
1244 "Restoring to %%eip 0x%lx\n", eip);
1245 return 0; /* do not update eip! */
1249 return 1;
1252 #define CASE_GET_REG(REG, reg) \
1253 case REG_ ## REG: value = regs->reg; break
1255 #define CASE_EXTEND_SET_REG \
1256 CASE_EXTEND_REG(S)
1257 #define CASE_EXTEND_GET_REG \
1258 CASE_EXTEND_REG(G)
1260 #ifdef __i386__
1261 #define CASE_EXTEND_REG(T)
1262 #else
1263 #define CASE_EXTEND_REG(T) \
1264 CASE_ ## T ## ET_REG(R8, r8); \
1265 CASE_ ## T ## ET_REG(R9, r9); \
1266 CASE_ ## T ## ET_REG(R10, r10); \
1267 CASE_ ## T ## ET_REG(R11, r11); \
1268 CASE_ ## T ## ET_REG(R12, r12); \
1269 CASE_ ## T ## ET_REG(R13, r13); \
1270 CASE_ ## T ## ET_REG(R14, r14); \
1271 CASE_ ## T ## ET_REG(R15, r15);
1272 #endif
1275 /*
1276 * Write to control registers
1277 */
1278 static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
1280 unsigned long value;
1281 unsigned long old_cr;
1282 struct vcpu *v = current;
1284 switch (gp) {
1285 CASE_GET_REG(EAX, eax);
1286 CASE_GET_REG(ECX, ecx);
1287 CASE_GET_REG(EDX, edx);
1288 CASE_GET_REG(EBX, ebx);
1289 CASE_GET_REG(EBP, ebp);
1290 CASE_GET_REG(ESI, esi);
1291 CASE_GET_REG(EDI, edi);
1292 CASE_EXTEND_GET_REG
1293 case REG_ESP:
1294 __vmread(GUEST_RSP, &value);
1295 break;
1296 default:
1297 printk("invalid gp: %d\n", gp);
1298 __vmx_bug(regs);
1301 VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1302 VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1304 switch(cr) {
1305 case 0:
1307 return vmx_set_cr0(value);
1309 case 3:
1311 unsigned long old_base_mfn, mfn;
1313 /*
1314 * If paging is not enabled yet, simply copy the value to CR3.
1315 */
1316 if (!vmx_paging_enabled(v)) {
1317 v->arch.arch_vmx.cpu_cr3 = value;
1318 break;
1321 /*
1322 * We make a new one if the shadow does not exist.
1323 */
1324 if (value == v->arch.arch_vmx.cpu_cr3) {
1325 /*
1326 * This is simple TLB flush, implying the guest has
1327 * removed some translation or changed page attributes.
1328 * We simply invalidate the shadow.
1329 */
1330 mfn = get_mfn_from_pfn(value >> PAGE_SHIFT);
1331 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1332 __vmx_bug(regs);
1333 shadow_sync_all(v->domain);
1334 } else {
1335 /*
1336 * If different, make a shadow. Check if the PDBR is valid
1337 * first.
1338 */
1339 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1340 if ( ((value >> PAGE_SHIFT) > v->domain->max_pages ) ||
1341 !VALID_MFN(mfn = get_mfn_from_pfn(value >> PAGE_SHIFT)) ||
1342 !get_page(pfn_to_page(mfn), v->domain) )
1344 printk("Invalid CR3 value=%lx", value);
1345 domain_crash_synchronous(); /* need to take a clean path */
1347 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1348 v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
1349 if (old_base_mfn)
1350 put_page(pfn_to_page(old_base_mfn));
1351 update_pagetables(v);
1352 /*
1353 * arch.shadow_table should now hold the next CR3 for shadow
1354 */
1355 v->arch.arch_vmx.cpu_cr3 = value;
1356 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
1357 value);
1358 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1360 break;
1362 case 4: /* CR4 */
1364 if (value & X86_CR4_PAE){
1365 set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.arch_vmx.cpu_state);
1366 } else {
1367 if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
1368 &v->arch.arch_vmx.cpu_state)){
1369 vmx_inject_exception(v, TRAP_gp_fault, 0);
1371 clear_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.arch_vmx.cpu_state);
1374 __vmread(CR4_READ_SHADOW, &old_cr);
1376 __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
1377 __vmwrite(CR4_READ_SHADOW, value);
1379 /*
1380 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1381 * all TLB entries except global entries.
1382 */
1383 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
1384 shadow_sync_all(v->domain);
1386 break;
1388 default:
1389 printk("invalid cr: %d\n", gp);
1390 __vmx_bug(regs);
1393 return 1;
1396 #define CASE_SET_REG(REG, reg) \
1397 case REG_ ## REG: \
1398 regs->reg = value; \
1399 break
1401 /*
1402 * Read from control registers. CR0 and CR4 are read from the shadow.
1403 */
1404 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1406 unsigned long value;
1407 struct vcpu *v = current;
1409 if (cr != 3)
1410 __vmx_bug(regs);
1412 value = (unsigned long) v->arch.arch_vmx.cpu_cr3;
1414 switch (gp) {
1415 CASE_SET_REG(EAX, eax);
1416 CASE_SET_REG(ECX, ecx);
1417 CASE_SET_REG(EDX, edx);
1418 CASE_SET_REG(EBX, ebx);
1419 CASE_SET_REG(EBP, ebp);
1420 CASE_SET_REG(ESI, esi);
1421 CASE_SET_REG(EDI, edi);
1422 CASE_EXTEND_SET_REG
1423 case REG_ESP:
1424 __vmwrite(GUEST_RSP, value);
1425 regs->esp = value;
1426 break;
1427 default:
1428 printk("invalid gp: %d\n", gp);
1429 __vmx_bug(regs);
1432 VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1435 static int vmx_cr_access(unsigned long exit_qualification, struct cpu_user_regs *regs)
1437 unsigned int gp, cr;
1438 unsigned long value;
1439 struct vcpu *v = current;
1441 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
1442 case TYPE_MOV_TO_CR:
1443 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1444 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1445 TRACE_VMEXIT(1,TYPE_MOV_TO_CR);
1446 TRACE_VMEXIT(2,cr);
1447 TRACE_VMEXIT(3,gp);
1448 return mov_to_cr(gp, cr, regs);
1449 case TYPE_MOV_FROM_CR:
1450 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1451 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1452 TRACE_VMEXIT(1,TYPE_MOV_FROM_CR);
1453 TRACE_VMEXIT(2,cr);
1454 TRACE_VMEXIT(3,gp);
1455 mov_from_cr(cr, gp, regs);
1456 break;
1457 case TYPE_CLTS:
1458 TRACE_VMEXIT(1,TYPE_CLTS);
1459 clts();
1460 setup_fpu(current);
1462 __vmread_vcpu(v, GUEST_CR0, &value);
1463 value &= ~X86_CR0_TS; /* clear TS */
1464 __vmwrite(GUEST_CR0, value);
1466 __vmread_vcpu(v, CR0_READ_SHADOW, &value);
1467 value &= ~X86_CR0_TS; /* clear TS */
1468 __vmwrite(CR0_READ_SHADOW, value);
1469 break;
1470 case TYPE_LMSW:
1471 TRACE_VMEXIT(1,TYPE_LMSW);
1472 __vmread_vcpu(v, CR0_READ_SHADOW, &value);
1473 value = (value & ~0xF) |
1474 (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
1475 return vmx_set_cr0(value);
1476 break;
1477 default:
1478 __vmx_bug(regs);
1479 break;
1481 return 1;
1484 static inline void vmx_do_msr_read(struct cpu_user_regs *regs)
1486 u64 msr_content = 0;
1487 struct vcpu *v = current;
1489 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
1490 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1491 (unsigned long)regs->edx);
1492 switch (regs->ecx) {
1493 case MSR_IA32_TIME_STAMP_COUNTER:
1495 struct vmx_virpit *vpit;
1497 rdtscll(msr_content);
1498 vpit = &(v->domain->arch.vmx_platform.vmx_pit);
1499 msr_content += vpit->shift;
1500 break;
1502 case MSR_IA32_SYSENTER_CS:
1503 __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
1504 break;
1505 case MSR_IA32_SYSENTER_ESP:
1506 __vmread(GUEST_SYSENTER_ESP, &msr_content);
1507 break;
1508 case MSR_IA32_SYSENTER_EIP:
1509 __vmread(GUEST_SYSENTER_EIP, &msr_content);
1510 break;
1511 case MSR_IA32_APICBASE:
1512 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
1513 break;
1514 default:
1515 if(long_mode_do_msr_read(regs))
1516 return;
1517 rdmsr_user(regs->ecx, regs->eax, regs->edx);
1518 break;
1521 regs->eax = msr_content & 0xFFFFFFFF;
1522 regs->edx = msr_content >> 32;
1524 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
1525 "ecx=%lx, eax=%lx, edx=%lx",
1526 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1527 (unsigned long)regs->edx);
1530 static inline void vmx_do_msr_write(struct cpu_user_regs *regs)
1532 u64 msr_content;
1533 struct vcpu *v = current;
1535 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write: ecx=%lx, eax=%lx, edx=%lx",
1536 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1537 (unsigned long)regs->edx);
1539 msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
1541 switch (regs->ecx) {
1542 case MSR_IA32_TIME_STAMP_COUNTER:
1544 struct vmx_virpit *vpit;
1545 u64 host_tsc, drift;
1547 rdtscll(host_tsc);
1548 vpit = &(v->domain->arch.vmx_platform.vmx_pit);
1549 drift = v->arch.arch_vmx.tsc_offset - vpit->shift;
1550 vpit->shift = msr_content - host_tsc;
1551 v->arch.arch_vmx.tsc_offset = vpit->shift + drift;
1552 __vmwrite(TSC_OFFSET, vpit->shift);
1554 #if defined (__i386__)
1555 __vmwrite(TSC_OFFSET_HIGH, ((vpit->shift)>>32));
1556 #endif
1557 break;
1559 case MSR_IA32_SYSENTER_CS:
1560 __vmwrite(GUEST_SYSENTER_CS, msr_content);
1561 break;
1562 case MSR_IA32_SYSENTER_ESP:
1563 __vmwrite(GUEST_SYSENTER_ESP, msr_content);
1564 break;
1565 case MSR_IA32_SYSENTER_EIP:
1566 __vmwrite(GUEST_SYSENTER_EIP, msr_content);
1567 break;
1568 case MSR_IA32_APICBASE:
1569 vlapic_msr_set(VLAPIC(v), msr_content);
1570 break;
1571 default:
1572 long_mode_do_msr_write(regs);
1573 break;
1576 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write returns: "
1577 "ecx=%lx, eax=%lx, edx=%lx",
1578 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1579 (unsigned long)regs->edx);
1582 /*
1583 * Need to use this exit to reschedule
1584 */
1585 void vmx_vmexit_do_hlt(void)
1587 struct vcpu *v=current;
1588 struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit);
1589 s_time_t next_pit=-1,next_wakeup;
1591 if ( !v->vcpu_id ) {
1592 next_pit = get_pit_scheduled(v,vpit);
1594 next_wakeup = get_apictime_scheduled(v);
1595 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) {
1596 next_wakeup = next_pit;
1598 if ( next_wakeup != - 1 )
1599 set_ac_timer(&current->arch.arch_vmx.hlt_timer, next_wakeup);
1600 do_block();
1603 static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs)
1605 unsigned int vector;
1606 int error;
1608 asmlinkage void do_IRQ(struct cpu_user_regs *);
1609 void smp_apic_timer_interrupt(struct cpu_user_regs *);
1610 void timer_interrupt(int, void *, struct cpu_user_regs *);
1611 void smp_event_check_interrupt(void);
1612 void smp_invalidate_interrupt(void);
1613 void smp_call_function_interrupt(void);
1614 void smp_spurious_interrupt(struct cpu_user_regs *regs);
1615 void smp_error_interrupt(struct cpu_user_regs *regs);
1617 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
1618 && !(vector & INTR_INFO_VALID_MASK))
1619 __vmx_bug(regs);
1621 vector &= 0xff;
1622 local_irq_disable();
1624 switch(vector) {
1625 case LOCAL_TIMER_VECTOR:
1626 smp_apic_timer_interrupt(regs);
1627 break;
1628 case EVENT_CHECK_VECTOR:
1629 smp_event_check_interrupt();
1630 break;
1631 case INVALIDATE_TLB_VECTOR:
1632 smp_invalidate_interrupt();
1633 break;
1634 case CALL_FUNCTION_VECTOR:
1635 smp_call_function_interrupt();
1636 break;
1637 case SPURIOUS_APIC_VECTOR:
1638 smp_spurious_interrupt(regs);
1639 break;
1640 case ERROR_APIC_VECTOR:
1641 smp_error_interrupt(regs);
1642 break;
1643 default:
1644 regs->entry_vector = vector;
1645 do_IRQ(regs);
1646 break;
1650 #define BUF_SIZ 256
1651 #define MAX_LINE 80
1652 char print_buf[BUF_SIZ];
1653 static int index;
1655 static void vmx_print_line(const char c, struct vcpu *v)
1658 if (index == MAX_LINE || c == '\n') {
1659 if (index == MAX_LINE) {
1660 print_buf[index++] = c;
1662 print_buf[index] = '\0';
1663 printk("(GUEST: %u) %s\n", v->domain->domain_id, (char *) &print_buf);
1664 index = 0;
1666 else
1667 print_buf[index++] = c;
1670 void save_vmx_cpu_user_regs(struct cpu_user_regs *ctxt)
1672 __vmread(GUEST_SS_SELECTOR, &ctxt->ss);
1673 __vmread(GUEST_RSP, &ctxt->esp);
1674 __vmread(GUEST_RFLAGS, &ctxt->eflags);
1675 __vmread(GUEST_CS_SELECTOR, &ctxt->cs);
1676 __vmread(GUEST_RIP, &ctxt->eip);
1678 __vmread(GUEST_GS_SELECTOR, &ctxt->gs);
1679 __vmread(GUEST_FS_SELECTOR, &ctxt->fs);
1680 __vmread(GUEST_ES_SELECTOR, &ctxt->es);
1681 __vmread(GUEST_DS_SELECTOR, &ctxt->ds);
1684 #ifdef XEN_DEBUGGER
1685 void save_cpu_user_regs(struct cpu_user_regs *regs)
1687 __vmread(GUEST_SS_SELECTOR, &regs->xss);
1688 __vmread(GUEST_RSP, &regs->esp);
1689 __vmread(GUEST_RFLAGS, &regs->eflags);
1690 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
1691 __vmread(GUEST_RIP, &regs->eip);
1693 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
1694 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
1695 __vmread(GUEST_ES_SELECTOR, &regs->xes);
1696 __vmread(GUEST_DS_SELECTOR, &regs->xds);
1699 void restore_cpu_user_regs(struct cpu_user_regs *regs)
1701 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
1702 __vmwrite(GUEST_RSP, regs->esp);
1703 __vmwrite(GUEST_RFLAGS, regs->eflags);
1704 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
1705 __vmwrite(GUEST_RIP, regs->eip);
1707 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
1708 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
1709 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
1710 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
1712 #endif
1714 asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs)
1716 unsigned int exit_reason, idtv_info_field;
1717 unsigned long exit_qualification, eip, inst_len = 0;
1718 struct vcpu *v = current;
1719 int error;
1721 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
1722 __vmx_bug(&regs);
1724 perfc_incra(vmexits, exit_reason);
1726 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
1727 if (idtv_info_field & INTR_INFO_VALID_MASK) {
1728 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
1730 __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
1731 if (inst_len >= 1 && inst_len <= 15)
1732 __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
1734 if (idtv_info_field & 0x800) { /* valid error code */
1735 unsigned long error_code;
1736 __vmread(IDT_VECTORING_ERROR_CODE, &error_code);
1737 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
1740 VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
1743 /* don't bother H/W interrutps */
1744 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
1745 exit_reason != EXIT_REASON_VMCALL &&
1746 exit_reason != EXIT_REASON_IO_INSTRUCTION)
1747 VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
1749 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
1750 printk("Failed vm entry\n");
1751 domain_crash_synchronous();
1752 return;
1756 __vmread(GUEST_RIP, &eip);
1757 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
1758 TRACE_VMEXIT(0,exit_reason);
1761 switch (exit_reason) {
1762 case EXIT_REASON_EXCEPTION_NMI:
1764 /*
1765 * We don't set the software-interrupt exiting (INT n).
1766 * (1) We can get an exception (e.g. #PG) in the guest, or
1767 * (2) NMI
1768 */
1769 int error;
1770 unsigned int vector;
1771 unsigned long va;
1773 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
1774 || !(vector & INTR_INFO_VALID_MASK))
1775 __vmx_bug(&regs);
1776 vector &= 0xff;
1778 TRACE_VMEXIT(1,vector);
1779 perfc_incra(cause_vector, vector);
1781 TRACE_3D(TRC_VMX_VECTOR, v->domain->domain_id, eip, vector);
1782 switch (vector) {
1783 #ifdef XEN_DEBUGGER
1784 case TRAP_debug:
1786 save_cpu_user_regs(&regs);
1787 pdb_handle_exception(1, &regs, 1);
1788 restore_cpu_user_regs(&regs);
1789 break;
1791 case TRAP_int3:
1793 save_cpu_user_regs(&regs);
1794 pdb_handle_exception(3, &regs, 1);
1795 restore_cpu_user_regs(&regs);
1796 break;
1798 #else
1799 case TRAP_debug:
1801 void store_cpu_user_regs(struct cpu_user_regs *regs);
1802 long do_sched_op(unsigned long op);
1805 store_cpu_user_regs(&regs);
1806 __vm_clear_bit(GUEST_PENDING_DBG_EXCEPTIONS, PENDING_DEBUG_EXC_BS);
1808 domain_pause_for_debugger();
1809 do_sched_op(SCHEDOP_yield);
1811 break;
1813 #endif
1814 case TRAP_no_device:
1816 vmx_do_no_device_fault();
1817 break;
1819 case TRAP_page_fault:
1821 __vmread(EXIT_QUALIFICATION, &va);
1822 __vmread(VM_EXIT_INTR_ERROR_CODE, &regs.error_code);
1824 TRACE_VMEXIT(3,regs.error_code);
1825 TRACE_VMEXIT(4,va);
1827 VMX_DBG_LOG(DBG_LEVEL_VMMU,
1828 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1829 (unsigned long)regs.eax, (unsigned long)regs.ebx,
1830 (unsigned long)regs.ecx, (unsigned long)regs.edx,
1831 (unsigned long)regs.esi, (unsigned long)regs.edi);
1832 v->arch.arch_vmx.mmio_op.inst_decoder_regs = &regs;
1834 if (!(error = vmx_do_page_fault(va, &regs))) {
1835 /*
1836 * Inject #PG using Interruption-Information Fields
1837 */
1838 vmx_inject_exception(v, TRAP_page_fault, regs.error_code);
1839 v->arch.arch_vmx.cpu_cr2 = va;
1840 TRACE_3D(TRC_VMX_INT, v->domain->domain_id, TRAP_page_fault, va);
1842 break;
1844 case TRAP_nmi:
1845 do_nmi(&regs, 0);
1846 break;
1847 default:
1848 vmx_reflect_exception(v);
1849 break;
1851 break;
1853 case EXIT_REASON_EXTERNAL_INTERRUPT:
1854 vmx_vmexit_do_extint(&regs);
1855 break;
1856 case EXIT_REASON_PENDING_INTERRUPT:
1857 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
1858 MONITOR_CPU_BASED_EXEC_CONTROLS);
1859 break;
1860 case EXIT_REASON_TASK_SWITCH:
1861 __vmx_bug(&regs);
1862 break;
1863 case EXIT_REASON_CPUID:
1864 __get_instruction_length(inst_len);
1865 vmx_vmexit_do_cpuid(regs.eax, &regs);
1866 __update_guest_eip(inst_len);
1867 break;
1868 case EXIT_REASON_HLT:
1869 __get_instruction_length(inst_len);
1870 __update_guest_eip(inst_len);
1871 vmx_vmexit_do_hlt();
1872 break;
1873 case EXIT_REASON_INVLPG:
1875 unsigned long va;
1877 __vmread(EXIT_QUALIFICATION, &va);
1878 vmx_vmexit_do_invlpg(va);
1879 __get_instruction_length(inst_len);
1880 __update_guest_eip(inst_len);
1881 break;
1883 case EXIT_REASON_VMCALL:
1884 __get_instruction_length(inst_len);
1885 __vmread(GUEST_RIP, &eip);
1886 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1888 vmx_print_line(regs.eax, v); /* provides the current domain */
1889 __update_guest_eip(inst_len);
1890 break;
1891 case EXIT_REASON_CR_ACCESS:
1893 __vmread(GUEST_RIP, &eip);
1894 __get_instruction_length(inst_len);
1895 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1897 VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx",
1898 eip, inst_len, exit_qualification);
1899 if (vmx_cr_access(exit_qualification, &regs))
1900 __update_guest_eip(inst_len);
1901 TRACE_VMEXIT(3,regs.error_code);
1902 TRACE_VMEXIT(4,exit_qualification);
1903 break;
1905 case EXIT_REASON_DR_ACCESS:
1906 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1907 vmx_dr_access(exit_qualification, &regs);
1908 __get_instruction_length(inst_len);
1909 __update_guest_eip(inst_len);
1910 break;
1911 case EXIT_REASON_IO_INSTRUCTION:
1912 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1913 __get_instruction_length(inst_len);
1914 vmx_io_instruction(&regs, exit_qualification, inst_len);
1915 TRACE_VMEXIT(4,exit_qualification);
1916 break;
1917 case EXIT_REASON_MSR_READ:
1918 __get_instruction_length(inst_len);
1919 vmx_do_msr_read(&regs);
1920 __update_guest_eip(inst_len);
1921 break;
1922 case EXIT_REASON_MSR_WRITE:
1923 __vmread(GUEST_RIP, &eip);
1924 vmx_do_msr_write(&regs);
1925 __get_instruction_length(inst_len);
1926 __update_guest_eip(inst_len);
1927 break;
1928 case EXIT_REASON_MWAIT_INSTRUCTION:
1929 __vmx_bug(&regs);
1930 break;
1931 default:
1932 __vmx_bug(&regs); /* should not happen */
1936 asmlinkage void load_cr2(void)
1938 struct vcpu *v = current;
1940 local_irq_disable();
1941 #ifdef __i386__
1942 asm volatile("movl %0,%%cr2": :"r" (v->arch.arch_vmx.cpu_cr2));
1943 #else
1944 asm volatile("movq %0,%%cr2": :"r" (v->arch.arch_vmx.cpu_cr2));
1945 #endif
1948 asmlinkage void trace_vmentry (void)
1950 TRACE_5D(TRC_VMENTRY,trace_values[current->processor][0],
1951 trace_values[current->processor][1],trace_values[current->processor][2],
1952 trace_values[current->processor][3],trace_values[current->processor][4]);
1953 TRACE_VMEXIT(0,9);
1954 TRACE_VMEXIT(1,9);
1955 TRACE_VMEXIT(2,9);
1956 TRACE_VMEXIT(3,9);
1957 TRACE_VMEXIT(4,9);
1958 return;
1960 asmlinkage void trace_vmexit (void)
1962 TRACE_3D(TRC_VMEXIT,0,0,0);
1963 return;
1965 #endif /* CONFIG_VMX */
1967 /*
1968 * Local variables:
1969 * mode: C
1970 * c-set-style: "BSD"
1971 * c-basic-offset: 4
1972 * tab-width: 4
1973 * indent-tabs-mode: nil
1974 * End:
1975 */