ia64/xen-unstable

view xen/arch/x86/hvm/vmx/vmx.c @ 9295:96ba0a2bc9de

Remove unnecessary cr4 handling in vmx_set_cr0.
1) on x86_32, PAE should never be enabled in guest cr4, since we only
support pure IA32 VMX guest.
2) on x86_32p or x86_64, PAE should always be enabled in guest cr4,
since even pure IA32 guest uses PAE paging mode actually.

Signed-off-by: Xin Li <xin.b.li@intel.com>
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Mar 15 13:35:43 2006 +0100 (2006-03-15)
parents c097485037f7
children 796ac2386a24
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/domain_page.h>
28 #include <asm/current.h>
29 #include <asm/io.h>
30 #include <asm/shadow.h>
31 #include <asm/regs.h>
32 #include <asm/cpufeature.h>
33 #include <asm/processor.h>
34 #include <asm/types.h>
35 #include <asm/msr.h>
36 #include <asm/spinlock.h>
37 #include <asm/hvm/hvm.h>
38 #include <asm/hvm/support.h>
39 #include <asm/hvm/vmx/vmx.h>
40 #include <asm/hvm/vmx/vmcs.h>
41 #include <asm/shadow.h>
42 #if CONFIG_PAGING_LEVELS >= 3
43 #include <asm/shadow_64.h>
44 #endif
45 #include <public/sched.h>
46 #include <public/hvm/ioreq.h>
47 #include <asm/hvm/vpic.h>
48 #include <asm/hvm/vlapic.h>
50 static unsigned long trace_values[NR_CPUS][4];
51 #define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
53 void vmx_final_setup_guest(struct vcpu *v)
54 {
55 v->arch.schedule_tail = arch_vmx_do_launch;
57 if ( v->vcpu_id == 0 )
58 {
59 struct domain *d = v->domain;
60 struct vcpu *vc;
62 /* Initialize monitor page table */
63 for_each_vcpu(d, vc)
64 vc->arch.monitor_table = mk_pagetable(0);
66 /*
67 * Required to do this once per domain
68 * XXX todo: add a seperate function to do these.
69 */
70 memset(&d->shared_info->evtchn_mask[0], 0xff,
71 sizeof(d->shared_info->evtchn_mask));
73 /* Put the domain in shadow mode even though we're going to be using
74 * the shared 1:1 page table initially. It shouldn't hurt */
75 shadow_mode_enable(d,
76 SHM_enable|SHM_refcounts|
77 SHM_translate|SHM_external|SHM_wr_pt_pte);
78 }
79 }
81 void vmx_relinquish_resources(struct vcpu *v)
82 {
83 struct hvm_virpit *vpit;
85 if (v->vcpu_id == 0) {
86 /* unmap IO shared page */
87 struct domain *d = v->domain;
88 if ( d->arch.hvm_domain.shared_page_va )
89 unmap_domain_page_global(
90 (void *)d->arch.hvm_domain.shared_page_va);
91 shadow_direct_map_clean(d);
92 }
94 vmx_request_clear_vmcs(v);
95 destroy_vmcs(&v->arch.hvm_vmx);
96 free_monitor_pagetable(v);
97 vpit = &v->domain->arch.hvm_domain.vpit;
98 kill_timer(&vpit->pit_timer);
99 kill_timer(&v->arch.hvm_vmx.hlt_timer);
100 if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) )
101 {
102 kill_timer(&VLAPIC(v)->vlapic_timer);
103 xfree(VLAPIC(v));
104 }
105 }
107 #ifdef __x86_64__
108 static struct vmx_msr_state percpu_msr[NR_CPUS];
110 static u32 msr_data_index[VMX_MSR_COUNT] =
111 {
112 MSR_LSTAR, MSR_STAR, MSR_CSTAR,
113 MSR_SYSCALL_MASK, MSR_EFER,
114 };
116 void vmx_save_segments(struct vcpu *v)
117 {
118 rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.msr_content.shadow_gs);
119 }
121 /*
122 * To avoid MSR save/restore at every VM exit/entry time, we restore
123 * the x86_64 specific MSRs at domain switch time. Since those MSRs are
124 * are not modified once set for generic domains, we don't save them,
125 * but simply reset them to the values set at percpu_traps_init().
126 */
127 void vmx_load_msrs(void)
128 {
129 struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()];
130 int i;
132 while ( host_state->flags )
133 {
134 i = find_first_set_bit(host_state->flags);
135 wrmsrl(msr_data_index[i], host_state->msr_items[i]);
136 clear_bit(i, &host_state->flags);
137 }
138 }
140 static void vmx_save_init_msrs(void)
141 {
142 struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()];
143 int i;
145 for ( i = 0; i < VMX_MSR_COUNT; i++ )
146 rdmsrl(msr_data_index[i], host_state->msr_items[i]);
147 }
149 #define CASE_READ_MSR(address) \
150 case MSR_ ## address: \
151 msr_content = msr->msr_items[VMX_INDEX_MSR_ ## address]; \
152 break
154 #define CASE_WRITE_MSR(address) \
155 case MSR_ ## address: \
156 { \
157 msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content; \
158 if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)) { \
159 set_bit(VMX_INDEX_MSR_ ## address, &msr->flags); \
160 } \
161 wrmsrl(MSR_ ## address, msr_content); \
162 set_bit(VMX_INDEX_MSR_ ## address, &host_state->flags); \
163 } \
164 break
166 #define IS_CANO_ADDRESS(add) 1
167 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
168 {
169 u64 msr_content = 0;
170 struct vcpu *vc = current;
171 struct vmx_msr_state * msr = &vc->arch.hvm_vmx.msr_content;
172 switch(regs->ecx){
173 case MSR_EFER:
174 msr_content = msr->msr_items[VMX_INDEX_MSR_EFER];
175 HVM_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %"PRIx64"\n", msr_content);
176 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
177 &vc->arch.hvm_vmx.cpu_state))
178 msr_content |= 1 << _EFER_LME;
180 if (VMX_LONG_GUEST(vc))
181 msr_content |= 1 << _EFER_LMA;
182 break;
183 case MSR_FS_BASE:
184 if (!(VMX_LONG_GUEST(vc)))
185 /* XXX should it be GP fault */
186 domain_crash_synchronous();
187 __vmread(GUEST_FS_BASE, &msr_content);
188 break;
189 case MSR_GS_BASE:
190 if (!(VMX_LONG_GUEST(vc)))
191 domain_crash_synchronous();
192 __vmread(GUEST_GS_BASE, &msr_content);
193 break;
194 case MSR_SHADOW_GS_BASE:
195 msr_content = msr->shadow_gs;
196 break;
198 CASE_READ_MSR(STAR);
199 CASE_READ_MSR(LSTAR);
200 CASE_READ_MSR(CSTAR);
201 CASE_READ_MSR(SYSCALL_MASK);
202 default:
203 return 0;
204 }
205 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n",
206 msr_content);
207 regs->eax = msr_content & 0xffffffff;
208 regs->edx = msr_content >> 32;
209 return 1;
210 }
212 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
213 {
214 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
215 struct vcpu *vc = current;
216 struct vmx_msr_state * msr = &vc->arch.hvm_vmx.msr_content;
217 struct vmx_msr_state * host_state =
218 &percpu_msr[smp_processor_id()];
220 HVM_DBG_LOG(DBG_LEVEL_1, " mode_do_msr_write msr %lx "
221 "msr_content %"PRIx64"\n",
222 (unsigned long)regs->ecx, msr_content);
224 switch (regs->ecx){
225 case MSR_EFER:
226 /* offending reserved bit will cause #GP */
227 if ( msr_content &
228 ~( EFER_LME | EFER_LMA | EFER_NX | EFER_SCE ) )
229 vmx_inject_exception(vc, TRAP_gp_fault, 0);
231 if ((msr_content & EFER_LME) ^
232 test_bit(VMX_CPU_STATE_LME_ENABLED,
233 &vc->arch.hvm_vmx.cpu_state)){
234 if ( vmx_paging_enabled(vc) ||
235 !test_bit(VMX_CPU_STATE_PAE_ENABLED,
236 &vc->arch.hvm_vmx.cpu_state)) {
237 vmx_inject_exception(vc, TRAP_gp_fault, 0);
238 }
239 }
240 if (msr_content & EFER_LME)
241 set_bit(VMX_CPU_STATE_LME_ENABLED,
242 &vc->arch.hvm_vmx.cpu_state);
244 msr->msr_items[VMX_INDEX_MSR_EFER] =
245 msr_content;
246 break;
248 case MSR_FS_BASE:
249 case MSR_GS_BASE:
250 if (!(VMX_LONG_GUEST(vc)))
251 domain_crash_synchronous();
252 if (!IS_CANO_ADDRESS(msr_content)){
253 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
254 vmx_inject_exception(vc, TRAP_gp_fault, 0);
255 }
256 if (regs->ecx == MSR_FS_BASE)
257 __vmwrite(GUEST_FS_BASE, msr_content);
258 else
259 __vmwrite(GUEST_GS_BASE, msr_content);
260 break;
262 case MSR_SHADOW_GS_BASE:
263 if (!(VMX_LONG_GUEST(vc)))
264 domain_crash_synchronous();
265 vc->arch.hvm_vmx.msr_content.shadow_gs = msr_content;
266 wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
267 break;
269 CASE_WRITE_MSR(STAR);
270 CASE_WRITE_MSR(LSTAR);
271 CASE_WRITE_MSR(CSTAR);
272 CASE_WRITE_MSR(SYSCALL_MASK);
273 default:
274 return 0;
275 }
276 return 1;
277 }
279 void
280 vmx_restore_msrs(struct vcpu *v)
281 {
282 int i = 0;
283 struct vmx_msr_state *guest_state;
284 struct vmx_msr_state *host_state;
285 unsigned long guest_flags ;
287 guest_state = &v->arch.hvm_vmx.msr_content;;
288 host_state = &percpu_msr[smp_processor_id()];
290 wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs);
291 guest_flags = guest_state->flags;
292 if (!guest_flags)
293 return;
295 while (guest_flags){
296 i = find_first_set_bit(guest_flags);
298 HVM_DBG_LOG(DBG_LEVEL_2,
299 "restore guest's index %d msr %lx with %lx\n",
300 i, (unsigned long) msr_data_index[i], (unsigned long) guest_state->msr_items[i]);
301 set_bit(i, &host_state->flags);
302 wrmsrl(msr_data_index[i], guest_state->msr_items[i]);
303 clear_bit(i, &guest_flags);
304 }
305 }
306 #else /* __i386__ */
307 #define vmx_save_init_msrs() ((void)0)
309 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs){
310 return 0;
311 }
312 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs){
313 return 0;
314 }
315 #endif
317 void stop_vmx(void)
318 {
319 if (read_cr4() & X86_CR4_VMXE)
320 __vmxoff();
321 }
323 int vmx_initialize_guest_resources(struct vcpu *v)
324 {
325 vmx_final_setup_guest(v);
326 return 1;
327 }
329 int vmx_relinquish_guest_resources(struct vcpu *v)
330 {
331 vmx_relinquish_resources(v);
332 return 1;
333 }
335 void vmx_migrate_timers(struct vcpu *v)
336 {
337 struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
339 migrate_timer(&vpit->pit_timer, v->processor);
340 migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
341 if ( hvm_apic_support(v->domain) && VLAPIC(v))
342 migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor);
343 }
345 void vmx_store_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
346 {
347 #if defined (__x86_64__)
348 __vmread(GUEST_RFLAGS, &regs->rflags);
349 __vmread(GUEST_SS_SELECTOR, &regs->ss);
350 __vmread(GUEST_CS_SELECTOR, &regs->cs);
351 __vmread(GUEST_DS_SELECTOR, &regs->ds);
352 __vmread(GUEST_ES_SELECTOR, &regs->es);
353 __vmread(GUEST_GS_SELECTOR, &regs->gs);
354 __vmread(GUEST_FS_SELECTOR, &regs->fs);
355 __vmread(GUEST_RIP, &regs->rip);
356 __vmread(GUEST_RSP, &regs->rsp);
357 #elif defined (__i386__)
358 __vmread(GUEST_RFLAGS, &regs->eflags);
359 __vmread(GUEST_SS_SELECTOR, &regs->ss);
360 __vmread(GUEST_CS_SELECTOR, &regs->cs);
361 __vmread(GUEST_DS_SELECTOR, &regs->ds);
362 __vmread(GUEST_ES_SELECTOR, &regs->es);
363 __vmread(GUEST_GS_SELECTOR, &regs->gs);
364 __vmread(GUEST_FS_SELECTOR, &regs->fs);
365 __vmread(GUEST_RIP, &regs->eip);
366 __vmread(GUEST_RSP, &regs->esp);
367 #else
368 #error Unsupported architecture
369 #endif
370 }
372 void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
373 {
374 #if defined (__x86_64__)
375 __vmwrite(GUEST_SS_SELECTOR, regs->ss);
376 __vmwrite(GUEST_RSP, regs->rsp);
378 __vmwrite(GUEST_RFLAGS, regs->rflags);
379 if (regs->rflags & EF_TF)
380 __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
381 else
382 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
384 __vmwrite(GUEST_CS_SELECTOR, regs->cs);
385 __vmwrite(GUEST_RIP, regs->rip);
386 #elif defined (__i386__)
387 __vmwrite(GUEST_SS_SELECTOR, regs->ss);
388 __vmwrite(GUEST_RSP, regs->esp);
390 __vmwrite(GUEST_RFLAGS, regs->eflags);
391 if (regs->eflags & EF_TF)
392 __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
393 else
394 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
396 __vmwrite(GUEST_CS_SELECTOR, regs->cs);
397 __vmwrite(GUEST_RIP, regs->eip);
398 #else
399 #error Unsupported architecture
400 #endif
401 }
403 void vmx_store_cpu_guest_ctrl_regs(struct vcpu *v, unsigned long crs[8])
404 {
405 __vmread(CR0_READ_SHADOW, &crs[0]);
406 __vmread(GUEST_CR3, &crs[3]);
407 __vmread(CR4_READ_SHADOW, &crs[4]);
408 }
410 void vmx_modify_guest_state(struct vcpu *v)
411 {
412 modify_vmcs(&v->arch.hvm_vmx, &v->arch.guest_context.user_regs);
413 }
415 int vmx_realmode(struct vcpu *v)
416 {
417 unsigned long rflags;
419 __vmread(GUEST_RFLAGS, &rflags);
420 return rflags & X86_EFLAGS_VM;
421 }
423 int vmx_instruction_length(struct vcpu *v)
424 {
425 unsigned long inst_len;
427 if (__vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len))
428 return 0;
429 return inst_len;
430 }
432 unsigned long vmx_get_ctrl_reg(struct vcpu *v, unsigned int num)
433 {
434 switch ( num )
435 {
436 case 0:
437 return v->arch.hvm_vmx.cpu_cr0;
438 case 2:
439 return v->arch.hvm_vmx.cpu_cr2;
440 case 3:
441 return v->arch.hvm_vmx.cpu_cr3;
442 default:
443 BUG();
444 }
445 return 0; /* dummy */
446 }
448 /* SMP VMX guest support */
449 void vmx_init_ap_context(struct vcpu_guest_context *ctxt,
450 int vcpuid, int trampoline_vector)
451 {
452 int i;
454 memset(ctxt, 0, sizeof(*ctxt));
456 /*
457 * Initial register values:
458 */
459 ctxt->user_regs.eip = VMXASSIST_BASE;
460 ctxt->user_regs.edx = vcpuid;
461 ctxt->user_regs.ebx = trampoline_vector;
463 ctxt->flags = VGCF_HVM_GUEST;
465 /* Virtual IDT is empty at start-of-day. */
466 for ( i = 0; i < 256; i++ )
467 {
468 ctxt->trap_ctxt[i].vector = i;
469 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
470 }
472 /* No callback handlers. */
473 #if defined(__i386__)
474 ctxt->event_callback_cs = FLAT_KERNEL_CS;
475 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
476 #endif
477 }
479 void do_nmi(struct cpu_user_regs *);
481 static int check_vmx_controls(u32 ctrls, u32 msr)
482 {
483 u32 vmx_msr_low, vmx_msr_high;
485 rdmsr(msr, vmx_msr_low, vmx_msr_high);
486 if ( (ctrls < vmx_msr_low) || (ctrls > vmx_msr_high) )
487 {
488 printk("Insufficient VMX capability 0x%x, "
489 "msr=0x%x,low=0x%8x,high=0x%x\n",
490 ctrls, msr, vmx_msr_low, vmx_msr_high);
491 return 0;
492 }
493 return 1;
494 }
496 int start_vmx(void)
497 {
498 struct vmcs_struct *vmcs;
499 u32 ecx;
500 u32 eax, edx;
501 u64 phys_vmcs; /* debugging */
503 /*
504 * Xen does not fill x86_capability words except 0.
505 */
506 ecx = cpuid_ecx(1);
507 boot_cpu_data.x86_capability[4] = ecx;
509 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
510 return 0;
512 rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
514 if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) {
515 if ((eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0) {
516 printk("VMX disabled by Feature Control MSR.\n");
517 return 0;
518 }
519 }
520 else {
521 wrmsr(IA32_FEATURE_CONTROL_MSR,
522 IA32_FEATURE_CONTROL_MSR_LOCK |
523 IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
524 }
526 if (!check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS,
527 MSR_IA32_VMX_PINBASED_CTLS_MSR))
528 return 0;
529 if (!check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS,
530 MSR_IA32_VMX_PROCBASED_CTLS_MSR))
531 return 0;
532 if (!check_vmx_controls(MONITOR_VM_EXIT_CONTROLS,
533 MSR_IA32_VMX_EXIT_CTLS_MSR))
534 return 0;
535 if (!check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS,
536 MSR_IA32_VMX_ENTRY_CTLS_MSR))
537 return 0;
539 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
541 if (!(vmcs = alloc_vmcs())) {
542 printk("Failed to allocate VMCS\n");
543 return 0;
544 }
546 phys_vmcs = (u64) virt_to_maddr(vmcs);
548 if (!(__vmxon(phys_vmcs))) {
549 printk("VMXON is done\n");
550 }
552 vmx_save_init_msrs();
554 /* Setup HVM interfaces */
555 hvm_funcs.disable = stop_vmx;
557 hvm_funcs.initialize_guest_resources = vmx_initialize_guest_resources;
558 hvm_funcs.relinquish_guest_resources = vmx_relinquish_guest_resources;
560 hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs;
561 hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs;
563 #ifdef __x86_64__
564 hvm_funcs.save_segments = vmx_save_segments;
565 hvm_funcs.load_msrs = vmx_load_msrs;
566 hvm_funcs.restore_msrs = vmx_restore_msrs;
567 #endif
569 hvm_funcs.store_cpu_guest_ctrl_regs = vmx_store_cpu_guest_ctrl_regs;
570 hvm_funcs.modify_guest_state = vmx_modify_guest_state;
572 hvm_funcs.realmode = vmx_realmode;
573 hvm_funcs.paging_enabled = vmx_paging_enabled;
574 hvm_funcs.instruction_length = vmx_instruction_length;
575 hvm_funcs.get_guest_ctrl_reg = vmx_get_ctrl_reg;
577 hvm_funcs.init_ap_context = vmx_init_ap_context;
579 hvm_enabled = 1;
581 return 1;
582 }
584 /*
585 * Not all cases receive valid value in the VM-exit instruction length field.
586 */
587 #define __get_instruction_length(len) \
588 __vmread(VM_EXIT_INSTRUCTION_LEN, &(len)); \
589 if ((len) < 1 || (len) > 15) \
590 __hvm_bug(&regs);
592 static void inline __update_guest_eip(unsigned long inst_len)
593 {
594 unsigned long current_eip;
596 __vmread(GUEST_RIP, &current_eip);
597 __vmwrite(GUEST_RIP, current_eip + inst_len);
598 __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
599 }
602 static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
603 {
604 unsigned long gpa; /* FIXME: PAE */
605 int result;
607 #if 0 /* keep for debugging */
608 {
609 unsigned long eip;
611 __vmread(GUEST_RIP, &eip);
612 HVM_DBG_LOG(DBG_LEVEL_VMMU,
613 "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
614 va, eip, (unsigned long)regs->error_code);
615 }
616 #endif
618 if ( !vmx_paging_enabled(current) )
619 {
620 /* construct 1-to-1 direct mapping */
621 if ( shadow_direct_map_fault(va, regs) )
622 return 1;
624 handle_mmio(va, va);
625 TRACE_VMEXIT (2,2);
626 return 1;
627 }
628 gpa = gva_to_gpa(va);
630 /* Use 1:1 page table to identify MMIO address space */
631 if ( mmio_space(gpa) ){
632 struct vcpu *v = current;
633 /* No support for APIC */
634 if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000) {
635 u32 inst_len;
636 __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len));
637 __update_guest_eip(inst_len);
638 return 1;
639 }
640 TRACE_VMEXIT (2,2);
641 handle_mmio(va, gpa);
642 return 1;
643 }
645 result = shadow_fault(va, regs);
646 TRACE_VMEXIT (2,result);
647 #if 0
648 if ( !result )
649 {
650 __vmread(GUEST_RIP, &eip);
651 printk("vmx pgfault to guest va=%lx eip=%lx\n", va, eip);
652 }
653 #endif
655 return result;
656 }
658 static void vmx_do_no_device_fault(void)
659 {
660 unsigned long cr0;
661 struct vcpu *v = current;
663 setup_fpu(current);
664 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
666 /* Disable TS in guest CR0 unless the guest wants the exception too. */
667 __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
668 if ( !(cr0 & X86_CR0_TS) )
669 {
670 __vmread_vcpu(v, GUEST_CR0, &cr0);
671 cr0 &= ~X86_CR0_TS;
672 __vmwrite(GUEST_CR0, cr0);
673 }
674 }
676 /* Reserved bits: [31:15], [12:11], [9], [6], [2:1] */
677 #define VMX_VCPU_CPUID_L1_RESERVED 0xffff9a46
679 static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs *regs)
680 {
681 unsigned int eax, ebx, ecx, edx;
682 unsigned long eip;
683 struct vcpu *v = current;
685 __vmread(GUEST_RIP, &eip);
687 HVM_DBG_LOG(DBG_LEVEL_1,
688 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
689 " (esi) %lx, (edi) %lx",
690 (unsigned long)regs->eax, (unsigned long)regs->ebx,
691 (unsigned long)regs->ecx, (unsigned long)regs->edx,
692 (unsigned long)regs->esi, (unsigned long)regs->edi);
694 cpuid(input, &eax, &ebx, &ecx, &edx);
696 if ( input == 1 )
697 {
698 if ( hvm_apic_support(v->domain) &&
699 !vlapic_global_enabled((VLAPIC(v))) )
700 clear_bit(X86_FEATURE_APIC, &edx);
702 #if CONFIG_PAGING_LEVELS < 3
703 clear_bit(X86_FEATURE_PAE, &edx);
704 clear_bit(X86_FEATURE_PSE, &edx);
705 clear_bit(X86_FEATURE_PSE36, &edx);
706 #else
707 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
708 {
709 if ( !v->domain->arch.hvm_domain.pae_enabled )
710 clear_bit(X86_FEATURE_PAE, &edx);
711 clear_bit(X86_FEATURE_PSE, &edx);
712 clear_bit(X86_FEATURE_PSE36, &edx);
713 }
714 #endif
716 /* Unsupportable for virtualised CPUs. */
717 ecx &= ~VMX_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */
718 clear_bit(X86_FEATURE_VMXE & 31, &ecx);
719 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
720 }
721 #ifdef __i386__
722 else if ( input == 0x80000001 )
723 {
724 /* Mask feature for Intel ia32e or AMD long mode. */
725 clear_bit(X86_FEATURE_LM & 31, &edx);
726 }
727 #endif
729 regs->eax = (unsigned long) eax;
730 regs->ebx = (unsigned long) ebx;
731 regs->ecx = (unsigned long) ecx;
732 regs->edx = (unsigned long) edx;
734 HVM_DBG_LOG(DBG_LEVEL_1,
735 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x",
736 eip, input, eax, ebx, ecx, edx);
738 }
740 #define CASE_GET_REG_P(REG, reg) \
741 case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
743 static void vmx_dr_access (unsigned long exit_qualification, struct cpu_user_regs *regs)
744 {
745 unsigned int reg;
746 unsigned long *reg_p = 0;
747 struct vcpu *v = current;
748 unsigned long eip;
750 __vmread(GUEST_RIP, &eip);
752 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
754 HVM_DBG_LOG(DBG_LEVEL_1,
755 "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx",
756 eip, reg, exit_qualification);
758 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
759 CASE_GET_REG_P(EAX, eax);
760 CASE_GET_REG_P(ECX, ecx);
761 CASE_GET_REG_P(EDX, edx);
762 CASE_GET_REG_P(EBX, ebx);
763 CASE_GET_REG_P(EBP, ebp);
764 CASE_GET_REG_P(ESI, esi);
765 CASE_GET_REG_P(EDI, edi);
766 case REG_ESP:
767 break;
768 default:
769 __hvm_bug(regs);
770 }
772 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
773 case TYPE_MOV_TO_DR:
774 /* don't need to check the range */
775 if (reg != REG_ESP)
776 v->arch.guest_context.debugreg[reg] = *reg_p;
777 else {
778 unsigned long value;
779 __vmread(GUEST_RSP, &value);
780 v->arch.guest_context.debugreg[reg] = value;
781 }
782 break;
783 case TYPE_MOV_FROM_DR:
784 if (reg != REG_ESP)
785 *reg_p = v->arch.guest_context.debugreg[reg];
786 else {
787 __vmwrite(GUEST_RSP, v->arch.guest_context.debugreg[reg]);
788 }
789 break;
790 }
791 }
793 /*
794 * Invalidate the TLB for va. Invalidate the shadow page corresponding
795 * the address va.
796 */
797 static void vmx_vmexit_do_invlpg(unsigned long va)
798 {
799 unsigned long eip;
800 struct vcpu *v = current;
802 __vmread(GUEST_RIP, &eip);
804 HVM_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg: eip=%lx, va=%lx",
805 eip, va);
807 /*
808 * We do the safest things first, then try to update the shadow
809 * copying from guest
810 */
811 shadow_invlpg(v, va);
812 }
814 static int check_for_null_selector(unsigned long eip)
815 {
816 unsigned char inst[MAX_INST_LEN];
817 unsigned long sel;
818 int i, inst_len;
819 int inst_copy_from_guest(unsigned char *, unsigned long, int);
821 __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
822 memset(inst, 0, MAX_INST_LEN);
823 if (inst_copy_from_guest(inst, eip, inst_len) != inst_len) {
824 printf("check_for_null_selector: get guest instruction failed\n");
825 domain_crash_synchronous();
826 }
828 for (i = 0; i < inst_len; i++) {
829 switch (inst[i]) {
830 case 0xf3: /* REPZ */
831 case 0xf2: /* REPNZ */
832 case 0xf0: /* LOCK */
833 case 0x66: /* data32 */
834 case 0x67: /* addr32 */
835 continue;
836 case 0x2e: /* CS */
837 __vmread(GUEST_CS_SELECTOR, &sel);
838 break;
839 case 0x36: /* SS */
840 __vmread(GUEST_SS_SELECTOR, &sel);
841 break;
842 case 0x26: /* ES */
843 __vmread(GUEST_ES_SELECTOR, &sel);
844 break;
845 case 0x64: /* FS */
846 __vmread(GUEST_FS_SELECTOR, &sel);
847 break;
848 case 0x65: /* GS */
849 __vmread(GUEST_GS_SELECTOR, &sel);
850 break;
851 case 0x3e: /* DS */
852 /* FALLTHROUGH */
853 default:
854 /* DS is the default */
855 __vmread(GUEST_DS_SELECTOR, &sel);
856 }
857 return sel == 0 ? 1 : 0;
858 }
860 return 0;
861 }
863 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
864 unsigned long count, int size, long value,
865 int dir, int pvalid);
867 static void vmx_io_instruction(struct cpu_user_regs *regs,
868 unsigned long exit_qualification, unsigned long inst_len)
869 {
870 struct mmio_op *mmio_opp;
871 unsigned long eip, cs, eflags;
872 unsigned long port, size, dir;
873 int vm86;
875 mmio_opp = &current->arch.hvm_vcpu.mmio_op;
876 mmio_opp->instr = INSTR_PIO;
877 mmio_opp->flags = 0;
879 __vmread(GUEST_RIP, &eip);
880 __vmread(GUEST_CS_SELECTOR, &cs);
881 __vmread(GUEST_RFLAGS, &eflags);
882 vm86 = eflags & X86_EFLAGS_VM ? 1 : 0;
884 HVM_DBG_LOG(DBG_LEVEL_IO,
885 "vmx_io_instruction: vm86 %d, eip=%lx:%lx, "
886 "exit_qualification = %lx",
887 vm86, cs, eip, exit_qualification);
889 if (test_bit(6, &exit_qualification))
890 port = (exit_qualification >> 16) & 0xFFFF;
891 else
892 port = regs->edx & 0xffff;
893 TRACE_VMEXIT(2, port);
894 size = (exit_qualification & 7) + 1;
895 dir = test_bit(3, &exit_qualification); /* direction */
897 if (test_bit(4, &exit_qualification)) { /* string instruction */
898 unsigned long addr, count = 1;
899 int sign = regs->eflags & EF_DF ? -1 : 1;
901 __vmread(GUEST_LINEAR_ADDRESS, &addr);
903 /*
904 * In protected mode, guest linear address is invalid if the
905 * selector is null.
906 */
907 if (!vm86 && check_for_null_selector(eip))
908 addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
910 if (test_bit(5, &exit_qualification)) { /* "rep" prefix */
911 mmio_opp->flags |= REPZ;
912 count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
913 }
915 /*
916 * Handle string pio instructions that cross pages or that
917 * are unaligned. See the comments in hvm_domain.c/handle_mmio()
918 */
919 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
920 unsigned long value = 0;
922 mmio_opp->flags |= OVERLAP;
923 if (dir == IOREQ_WRITE)
924 hvm_copy(&value, addr, size, HVM_COPY_IN);
925 send_pio_req(regs, port, 1, size, value, dir, 0);
926 } else {
927 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) {
928 if (sign > 0)
929 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
930 else
931 count = (addr & ~PAGE_MASK) / size;
932 } else
933 __update_guest_eip(inst_len);
935 send_pio_req(regs, port, count, size, addr, dir, 1);
936 }
937 } else {
938 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
939 hvm_print_line(current, regs->eax); /* guest debug output */
941 __update_guest_eip(inst_len);
942 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
943 }
944 }
946 int
947 vmx_world_save(struct vcpu *v, struct vmx_assist_context *c)
948 {
949 unsigned long inst_len;
950 int error = 0;
952 error |= __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
953 error |= __vmread(GUEST_RIP, &c->eip);
954 c->eip += inst_len; /* skip transition instruction */
955 error |= __vmread(GUEST_RSP, &c->esp);
956 error |= __vmread(GUEST_RFLAGS, &c->eflags);
958 error |= __vmread(CR0_READ_SHADOW, &c->cr0);
959 c->cr3 = v->arch.hvm_vmx.cpu_cr3;
960 error |= __vmread(CR4_READ_SHADOW, &c->cr4);
962 error |= __vmread(GUEST_IDTR_LIMIT, &c->idtr_limit);
963 error |= __vmread(GUEST_IDTR_BASE, &c->idtr_base);
965 error |= __vmread(GUEST_GDTR_LIMIT, &c->gdtr_limit);
966 error |= __vmread(GUEST_GDTR_BASE, &c->gdtr_base);
968 error |= __vmread(GUEST_CS_SELECTOR, &c->cs_sel);
969 error |= __vmread(GUEST_CS_LIMIT, &c->cs_limit);
970 error |= __vmread(GUEST_CS_BASE, &c->cs_base);
971 error |= __vmread(GUEST_CS_AR_BYTES, &c->cs_arbytes.bytes);
973 error |= __vmread(GUEST_DS_SELECTOR, &c->ds_sel);
974 error |= __vmread(GUEST_DS_LIMIT, &c->ds_limit);
975 error |= __vmread(GUEST_DS_BASE, &c->ds_base);
976 error |= __vmread(GUEST_DS_AR_BYTES, &c->ds_arbytes.bytes);
978 error |= __vmread(GUEST_ES_SELECTOR, &c->es_sel);
979 error |= __vmread(GUEST_ES_LIMIT, &c->es_limit);
980 error |= __vmread(GUEST_ES_BASE, &c->es_base);
981 error |= __vmread(GUEST_ES_AR_BYTES, &c->es_arbytes.bytes);
983 error |= __vmread(GUEST_SS_SELECTOR, &c->ss_sel);
984 error |= __vmread(GUEST_SS_LIMIT, &c->ss_limit);
985 error |= __vmread(GUEST_SS_BASE, &c->ss_base);
986 error |= __vmread(GUEST_SS_AR_BYTES, &c->ss_arbytes.bytes);
988 error |= __vmread(GUEST_FS_SELECTOR, &c->fs_sel);
989 error |= __vmread(GUEST_FS_LIMIT, &c->fs_limit);
990 error |= __vmread(GUEST_FS_BASE, &c->fs_base);
991 error |= __vmread(GUEST_FS_AR_BYTES, &c->fs_arbytes.bytes);
993 error |= __vmread(GUEST_GS_SELECTOR, &c->gs_sel);
994 error |= __vmread(GUEST_GS_LIMIT, &c->gs_limit);
995 error |= __vmread(GUEST_GS_BASE, &c->gs_base);
996 error |= __vmread(GUEST_GS_AR_BYTES, &c->gs_arbytes.bytes);
998 error |= __vmread(GUEST_TR_SELECTOR, &c->tr_sel);
999 error |= __vmread(GUEST_TR_LIMIT, &c->tr_limit);
1000 error |= __vmread(GUEST_TR_BASE, &c->tr_base);
1001 error |= __vmread(GUEST_TR_AR_BYTES, &c->tr_arbytes.bytes);
1003 error |= __vmread(GUEST_LDTR_SELECTOR, &c->ldtr_sel);
1004 error |= __vmread(GUEST_LDTR_LIMIT, &c->ldtr_limit);
1005 error |= __vmread(GUEST_LDTR_BASE, &c->ldtr_base);
1006 error |= __vmread(GUEST_LDTR_AR_BYTES, &c->ldtr_arbytes.bytes);
1008 return !error;
1011 int
1012 vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
1014 unsigned long mfn, old_cr4, old_base_mfn;
1015 int error = 0;
1017 error |= __vmwrite(GUEST_RIP, c->eip);
1018 error |= __vmwrite(GUEST_RSP, c->esp);
1019 error |= __vmwrite(GUEST_RFLAGS, c->eflags);
1021 error |= __vmwrite(CR0_READ_SHADOW, c->cr0);
1023 if (!vmx_paging_enabled(v)) {
1024 HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
1025 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
1026 goto skip_cr3;
1029 if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
1030 /*
1031 * This is simple TLB flush, implying the guest has
1032 * removed some translation or changed page attributes.
1033 * We simply invalidate the shadow.
1034 */
1035 mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
1036 if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
1037 printk("Invalid CR3 value=%x", c->cr3);
1038 domain_crash_synchronous();
1039 return 0;
1041 shadow_sync_all(v->domain);
1042 } else {
1043 /*
1044 * If different, make a shadow. Check if the PDBR is valid
1045 * first.
1046 */
1047 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3);
1048 if ((c->cr3 >> PAGE_SHIFT) > v->domain->max_pages) {
1049 printk("Invalid CR3 value=%x", c->cr3);
1050 domain_crash_synchronous();
1051 return 0;
1053 mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
1054 if(!get_page(mfn_to_page(mfn), v->domain))
1055 return 0;
1056 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1057 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1058 if (old_base_mfn)
1059 put_page(mfn_to_page(old_base_mfn));
1060 /*
1061 * arch.shadow_table should now hold the next CR3 for shadow
1062 */
1063 v->arch.hvm_vmx.cpu_cr3 = c->cr3;
1064 update_pagetables(v);
1065 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
1066 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1069 skip_cr3:
1071 error |= __vmread(CR4_READ_SHADOW, &old_cr4);
1072 error |= __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
1073 error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
1075 error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
1076 error |= __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
1078 error |= __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
1079 error |= __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
1081 error |= __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
1082 error |= __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
1083 error |= __vmwrite(GUEST_CS_BASE, c->cs_base);
1084 error |= __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes.bytes);
1086 error |= __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
1087 error |= __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
1088 error |= __vmwrite(GUEST_DS_BASE, c->ds_base);
1089 error |= __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes.bytes);
1091 error |= __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
1092 error |= __vmwrite(GUEST_ES_LIMIT, c->es_limit);
1093 error |= __vmwrite(GUEST_ES_BASE, c->es_base);
1094 error |= __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes.bytes);
1096 error |= __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
1097 error |= __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
1098 error |= __vmwrite(GUEST_SS_BASE, c->ss_base);
1099 error |= __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes.bytes);
1101 error |= __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
1102 error |= __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
1103 error |= __vmwrite(GUEST_FS_BASE, c->fs_base);
1104 error |= __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes.bytes);
1106 error |= __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
1107 error |= __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
1108 error |= __vmwrite(GUEST_GS_BASE, c->gs_base);
1109 error |= __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes.bytes);
1111 error |= __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
1112 error |= __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
1113 error |= __vmwrite(GUEST_TR_BASE, c->tr_base);
1114 error |= __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes.bytes);
1116 error |= __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
1117 error |= __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
1118 error |= __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
1119 error |= __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes);
1121 return !error;
1124 enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE };
1126 int
1127 vmx_assist(struct vcpu *v, int mode)
1129 struct vmx_assist_context c;
1130 u32 magic;
1131 u32 cp;
1133 /* make sure vmxassist exists (this is not an error) */
1134 if (!hvm_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), HVM_COPY_IN))
1135 return 0;
1136 if (magic != VMXASSIST_MAGIC)
1137 return 0;
1139 switch (mode) {
1140 /*
1141 * Transfer control to vmxassist.
1142 * Store the current context in VMXASSIST_OLD_CONTEXT and load
1143 * the new VMXASSIST_NEW_CONTEXT context. This context was created
1144 * by vmxassist and will transfer control to it.
1145 */
1146 case VMX_ASSIST_INVOKE:
1147 /* save the old context */
1148 if (!hvm_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), HVM_COPY_IN))
1149 goto error;
1150 if (cp != 0) {
1151 if (!vmx_world_save(v, &c))
1152 goto error;
1153 if (!hvm_copy(&c, cp, sizeof(c), HVM_COPY_OUT))
1154 goto error;
1157 /* restore the new context, this should activate vmxassist */
1158 if (!hvm_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), HVM_COPY_IN))
1159 goto error;
1160 if (cp != 0) {
1161 if (!hvm_copy(&c, cp, sizeof(c), HVM_COPY_IN))
1162 goto error;
1163 if (!vmx_world_restore(v, &c))
1164 goto error;
1165 return 1;
1167 break;
1169 /*
1170 * Restore the VMXASSIST_OLD_CONTEXT that was saved by VMX_ASSIST_INVOKE
1171 * above.
1172 */
1173 case VMX_ASSIST_RESTORE:
1174 /* save the old context */
1175 if (!hvm_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), HVM_COPY_IN))
1176 goto error;
1177 if (cp != 0) {
1178 if (!hvm_copy(&c, cp, sizeof(c), HVM_COPY_IN))
1179 goto error;
1180 if (!vmx_world_restore(v, &c))
1181 goto error;
1182 return 1;
1184 break;
1187 error:
1188 printf("Failed to transfer to vmxassist\n");
1189 domain_crash_synchronous();
1190 return 0;
1193 static int vmx_set_cr0(unsigned long value)
1195 struct vcpu *v = current;
1196 unsigned long mfn;
1197 unsigned long eip;
1198 int paging_enabled;
1199 unsigned long vm_entry_value;
1200 unsigned long old_cr0;
1202 /*
1203 * CR0: We don't want to lose PE and PG.
1204 */
1205 __vmread_vcpu(v, CR0_READ_SHADOW, &old_cr0);
1206 paging_enabled = (old_cr0 & X86_CR0_PE) && (old_cr0 & X86_CR0_PG);
1208 /* TS cleared? Then initialise FPU now. */
1209 if ( !(value & X86_CR0_TS) )
1211 setup_fpu(v);
1212 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
1215 __vmwrite(GUEST_CR0, value | X86_CR0_PE | X86_CR0_PG | X86_CR0_NE);
1216 __vmwrite(CR0_READ_SHADOW, value);
1218 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1220 if ( (value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled )
1222 /*
1223 * Trying to enable guest paging.
1224 * The guest CR3 must be pointing to the guest physical.
1225 */
1226 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1227 v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
1228 !get_page(mfn_to_page(mfn), v->domain) )
1230 printk("Invalid CR3 value = %lx", v->arch.hvm_vmx.cpu_cr3);
1231 domain_crash_synchronous(); /* need to take a clean path */
1234 #if defined(__x86_64__)
1235 if ( test_bit(VMX_CPU_STATE_LME_ENABLED,
1236 &v->arch.hvm_vmx.cpu_state) &&
1237 !test_bit(VMX_CPU_STATE_PAE_ENABLED,
1238 &v->arch.hvm_vmx.cpu_state) )
1240 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enabled\n");
1241 vmx_inject_exception(v, TRAP_gp_fault, 0);
1244 if ( test_bit(VMX_CPU_STATE_LME_ENABLED,
1245 &v->arch.hvm_vmx.cpu_state) )
1247 /* Here the PAE is should be opened */
1248 HVM_DBG_LOG(DBG_LEVEL_1, "Enable long mode\n");
1249 set_bit(VMX_CPU_STATE_LMA_ENABLED,
1250 &v->arch.hvm_vmx.cpu_state);
1252 __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
1253 vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE;
1254 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
1256 if ( !shadow_set_guest_paging_levels(v->domain, 4) ) {
1257 printk("Unsupported guest paging levels\n");
1258 domain_crash_synchronous(); /* need to take a clean path */
1261 else
1262 #endif /* __x86_64__ */
1264 #if CONFIG_PAGING_LEVELS >= 3
1265 if ( !shadow_set_guest_paging_levels(v->domain, 2) ) {
1266 printk("Unsupported guest paging levels\n");
1267 domain_crash_synchronous(); /* need to take a clean path */
1269 #endif
1272 /*
1273 * Now arch.guest_table points to machine physical.
1274 */
1275 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1276 update_pagetables(v);
1278 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1279 (unsigned long) (mfn << PAGE_SHIFT));
1281 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1282 /*
1283 * arch->shadow_table should hold the next CR3 for shadow
1284 */
1285 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1286 v->arch.hvm_vmx.cpu_cr3, mfn);
1289 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1290 if ( v->arch.hvm_vmx.cpu_cr3 ) {
1291 put_page(mfn_to_page(get_mfn_from_gpfn(
1292 v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)));
1293 v->arch.guest_table = mk_pagetable(0);
1296 /*
1297 * VMX does not implement real-mode virtualization. We emulate
1298 * real-mode by performing a world switch to VMXAssist whenever
1299 * a partition disables the CR0.PE bit.
1300 */
1301 if ( (value & X86_CR0_PE) == 0 )
1303 if ( value & X86_CR0_PG ) {
1304 /* inject GP here */
1305 vmx_inject_exception(v, TRAP_gp_fault, 0);
1306 return 0;
1307 } else {
1308 /*
1309 * Disable paging here.
1310 * Same to PE == 1 && PG == 0
1311 */
1312 if ( test_bit(VMX_CPU_STATE_LMA_ENABLED,
1313 &v->arch.hvm_vmx.cpu_state) )
1315 clear_bit(VMX_CPU_STATE_LMA_ENABLED,
1316 &v->arch.hvm_vmx.cpu_state);
1317 __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
1318 vm_entry_value &= ~VM_ENTRY_CONTROLS_IA32E_MODE;
1319 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
1323 clear_all_shadow_status(v->domain);
1324 if ( vmx_assist(v, VMX_ASSIST_INVOKE) ) {
1325 set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.hvm_vmx.cpu_state);
1326 __vmread(GUEST_RIP, &eip);
1327 HVM_DBG_LOG(DBG_LEVEL_1,
1328 "Transfering control to vmxassist %%eip 0x%lx\n", eip);
1329 return 0; /* do not update eip! */
1331 } else if ( test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
1332 &v->arch.hvm_vmx.cpu_state) )
1334 __vmread(GUEST_RIP, &eip);
1335 HVM_DBG_LOG(DBG_LEVEL_1,
1336 "Enabling CR0.PE at %%eip 0x%lx\n", eip);
1337 if ( vmx_assist(v, VMX_ASSIST_RESTORE) )
1339 clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
1340 &v->arch.hvm_vmx.cpu_state);
1341 __vmread(GUEST_RIP, &eip);
1342 HVM_DBG_LOG(DBG_LEVEL_1,
1343 "Restoring to %%eip 0x%lx\n", eip);
1344 return 0; /* do not update eip! */
1348 return 1;
1351 #define CASE_GET_REG(REG, reg) \
1352 case REG_ ## REG: value = regs->reg; break
1354 #define CASE_EXTEND_SET_REG \
1355 CASE_EXTEND_REG(S)
1356 #define CASE_EXTEND_GET_REG \
1357 CASE_EXTEND_REG(G)
1359 #ifdef __i386__
1360 #define CASE_EXTEND_REG(T)
1361 #else
1362 #define CASE_EXTEND_REG(T) \
1363 CASE_ ## T ## ET_REG(R8, r8); \
1364 CASE_ ## T ## ET_REG(R9, r9); \
1365 CASE_ ## T ## ET_REG(R10, r10); \
1366 CASE_ ## T ## ET_REG(R11, r11); \
1367 CASE_ ## T ## ET_REG(R12, r12); \
1368 CASE_ ## T ## ET_REG(R13, r13); \
1369 CASE_ ## T ## ET_REG(R14, r14); \
1370 CASE_ ## T ## ET_REG(R15, r15);
1371 #endif
1374 /*
1375 * Write to control registers
1376 */
1377 static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
1379 unsigned long value;
1380 unsigned long old_cr;
1381 struct vcpu *v = current;
1383 switch (gp) {
1384 CASE_GET_REG(EAX, eax);
1385 CASE_GET_REG(ECX, ecx);
1386 CASE_GET_REG(EDX, edx);
1387 CASE_GET_REG(EBX, ebx);
1388 CASE_GET_REG(EBP, ebp);
1389 CASE_GET_REG(ESI, esi);
1390 CASE_GET_REG(EDI, edi);
1391 CASE_EXTEND_GET_REG
1392 case REG_ESP:
1393 __vmread(GUEST_RSP, &value);
1394 break;
1395 default:
1396 printk("invalid gp: %d\n", gp);
1397 __hvm_bug(regs);
1400 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1401 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1403 switch(cr) {
1404 case 0:
1406 return vmx_set_cr0(value);
1408 case 3:
1410 unsigned long old_base_mfn, mfn;
1412 /*
1413 * If paging is not enabled yet, simply copy the value to CR3.
1414 */
1415 if (!vmx_paging_enabled(v)) {
1416 v->arch.hvm_vmx.cpu_cr3 = value;
1417 break;
1420 /*
1421 * We make a new one if the shadow does not exist.
1422 */
1423 if (value == v->arch.hvm_vmx.cpu_cr3) {
1424 /*
1425 * This is simple TLB flush, implying the guest has
1426 * removed some translation or changed page attributes.
1427 * We simply invalidate the shadow.
1428 */
1429 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1430 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1431 __hvm_bug(regs);
1432 shadow_sync_all(v->domain);
1433 } else {
1434 /*
1435 * If different, make a shadow. Check if the PDBR is valid
1436 * first.
1437 */
1438 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1439 if ( ((value >> PAGE_SHIFT) > v->domain->max_pages ) ||
1440 !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT)) ||
1441 !get_page(mfn_to_page(mfn), v->domain) )
1443 printk("Invalid CR3 value=%lx", value);
1444 domain_crash_synchronous(); /* need to take a clean path */
1446 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1447 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1448 if (old_base_mfn)
1449 put_page(mfn_to_page(old_base_mfn));
1450 /*
1451 * arch.shadow_table should now hold the next CR3 for shadow
1452 */
1453 #if CONFIG_PAGING_LEVELS >= 3
1454 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 )
1455 shadow_sync_all(v->domain);
1456 #endif
1458 v->arch.hvm_vmx.cpu_cr3 = value;
1459 update_pagetables(v);
1460 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
1461 value);
1462 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1464 break;
1466 case 4: /* CR4 */
1468 __vmread(CR4_READ_SHADOW, &old_cr);
1470 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1472 set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
1474 if ( vmx_pgbit_test(v) )
1476 /* The guest is 32 bit. */
1477 #if CONFIG_PAGING_LEVELS >= 4
1478 unsigned long mfn, old_base_mfn;
1480 if( !shadow_set_guest_paging_levels(v->domain, 3) )
1482 printk("Unsupported guest paging levels\n");
1483 domain_crash_synchronous(); /* need to take a clean path */
1486 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1487 v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
1488 !get_page(mfn_to_page(mfn), v->domain) )
1490 printk("Invalid CR3 value = %lx", v->arch.hvm_vmx.cpu_cr3);
1491 domain_crash_synchronous(); /* need to take a clean path */
1494 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1495 if ( old_base_mfn )
1496 put_page(mfn_to_page(old_base_mfn));
1498 /*
1499 * Now arch.guest_table points to machine physical.
1500 */
1502 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1503 update_pagetables(v);
1505 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1506 (unsigned long) (mfn << PAGE_SHIFT));
1508 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1510 /*
1511 * arch->shadow_table should hold the next CR3 for shadow
1512 */
1514 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1515 v->arch.hvm_vmx.cpu_cr3, mfn);
1516 #endif
1518 else
1520 /* The guest is 64 bit. */
1521 #if CONFIG_PAGING_LEVELS >= 4
1522 if ( !shadow_set_guest_paging_levels(v->domain, 4) )
1524 printk("Unsupported guest paging levels\n");
1525 domain_crash_synchronous(); /* need to take a clean path */
1527 #endif
1530 else if ( value & X86_CR4_PAE )
1531 set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
1532 else
1534 if ( test_bit(VMX_CPU_STATE_LMA_ENABLED, &v->arch.hvm_vmx.cpu_state) )
1535 vmx_inject_exception(v, TRAP_gp_fault, 0);
1537 clear_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
1540 __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
1541 __vmwrite(CR4_READ_SHADOW, value);
1543 /*
1544 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1545 * all TLB entries except global entries.
1546 */
1547 if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
1548 shadow_sync_all(v->domain);
1550 break;
1552 default:
1553 printk("invalid cr: %d\n", gp);
1554 __hvm_bug(regs);
1557 return 1;
1560 #define CASE_SET_REG(REG, reg) \
1561 case REG_ ## REG: \
1562 regs->reg = value; \
1563 break
1565 /*
1566 * Read from control registers. CR0 and CR4 are read from the shadow.
1567 */
1568 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1570 unsigned long value;
1571 struct vcpu *v = current;
1573 if (cr != 3)
1574 __hvm_bug(regs);
1576 value = (unsigned long) v->arch.hvm_vmx.cpu_cr3;
1578 switch (gp) {
1579 CASE_SET_REG(EAX, eax);
1580 CASE_SET_REG(ECX, ecx);
1581 CASE_SET_REG(EDX, edx);
1582 CASE_SET_REG(EBX, ebx);
1583 CASE_SET_REG(EBP, ebp);
1584 CASE_SET_REG(ESI, esi);
1585 CASE_SET_REG(EDI, edi);
1586 CASE_EXTEND_SET_REG
1587 case REG_ESP:
1588 __vmwrite(GUEST_RSP, value);
1589 regs->esp = value;
1590 break;
1591 default:
1592 printk("invalid gp: %d\n", gp);
1593 __hvm_bug(regs);
1596 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1599 static int vmx_cr_access(unsigned long exit_qualification, struct cpu_user_regs *regs)
1601 unsigned int gp, cr;
1602 unsigned long value;
1603 struct vcpu *v = current;
1605 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
1606 case TYPE_MOV_TO_CR:
1607 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1608 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1609 TRACE_VMEXIT(1,TYPE_MOV_TO_CR);
1610 TRACE_VMEXIT(2,cr);
1611 TRACE_VMEXIT(3,gp);
1612 return mov_to_cr(gp, cr, regs);
1613 case TYPE_MOV_FROM_CR:
1614 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1615 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1616 TRACE_VMEXIT(1,TYPE_MOV_FROM_CR);
1617 TRACE_VMEXIT(2,cr);
1618 TRACE_VMEXIT(3,gp);
1619 mov_from_cr(cr, gp, regs);
1620 break;
1621 case TYPE_CLTS:
1622 TRACE_VMEXIT(1,TYPE_CLTS);
1624 /* We initialise the FPU now, to avoid needing another vmexit. */
1625 setup_fpu(v);
1626 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
1628 __vmread_vcpu(v, GUEST_CR0, &value);
1629 value &= ~X86_CR0_TS; /* clear TS */
1630 __vmwrite(GUEST_CR0, value);
1632 __vmread_vcpu(v, CR0_READ_SHADOW, &value);
1633 value &= ~X86_CR0_TS; /* clear TS */
1634 __vmwrite(CR0_READ_SHADOW, value);
1635 break;
1636 case TYPE_LMSW:
1637 TRACE_VMEXIT(1,TYPE_LMSW);
1638 __vmread_vcpu(v, CR0_READ_SHADOW, &value);
1639 value = (value & ~0xF) |
1640 (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
1641 return vmx_set_cr0(value);
1642 break;
1643 default:
1644 __hvm_bug(regs);
1645 break;
1647 return 1;
1650 static inline void vmx_do_msr_read(struct cpu_user_regs *regs)
1652 u64 msr_content = 0;
1653 struct vcpu *v = current;
1655 HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
1656 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1657 (unsigned long)regs->edx);
1658 switch (regs->ecx) {
1659 case MSR_IA32_TIME_STAMP_COUNTER:
1661 struct hvm_virpit *vpit;
1663 rdtscll(msr_content);
1664 vpit = &(v->domain->arch.hvm_domain.vpit);
1665 msr_content += vpit->shift;
1666 break;
1668 case MSR_IA32_SYSENTER_CS:
1669 __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
1670 break;
1671 case MSR_IA32_SYSENTER_ESP:
1672 __vmread(GUEST_SYSENTER_ESP, &msr_content);
1673 break;
1674 case MSR_IA32_SYSENTER_EIP:
1675 __vmread(GUEST_SYSENTER_EIP, &msr_content);
1676 break;
1677 case MSR_IA32_APICBASE:
1678 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
1679 break;
1680 default:
1681 if(long_mode_do_msr_read(regs))
1682 return;
1683 rdmsr_safe(regs->ecx, regs->eax, regs->edx);
1684 break;
1687 regs->eax = msr_content & 0xFFFFFFFF;
1688 regs->edx = msr_content >> 32;
1690 HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
1691 "ecx=%lx, eax=%lx, edx=%lx",
1692 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1693 (unsigned long)regs->edx);
1696 static inline void vmx_do_msr_write(struct cpu_user_regs *regs)
1698 u64 msr_content;
1699 struct vcpu *v = current;
1701 HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write: ecx=%lx, eax=%lx, edx=%lx",
1702 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1703 (unsigned long)regs->edx);
1705 msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
1707 switch (regs->ecx) {
1708 case MSR_IA32_TIME_STAMP_COUNTER:
1710 struct hvm_virpit *vpit;
1711 u64 host_tsc, drift;
1713 rdtscll(host_tsc);
1714 vpit = &(v->domain->arch.hvm_domain.vpit);
1715 drift = v->arch.hvm_vmx.tsc_offset - vpit->shift;
1716 vpit->shift = msr_content - host_tsc;
1717 v->arch.hvm_vmx.tsc_offset = vpit->shift + drift;
1718 __vmwrite(TSC_OFFSET, vpit->shift);
1720 #if defined (__i386__)
1721 __vmwrite(TSC_OFFSET_HIGH, ((vpit->shift)>>32));
1722 #endif
1723 break;
1725 case MSR_IA32_SYSENTER_CS:
1726 __vmwrite(GUEST_SYSENTER_CS, msr_content);
1727 break;
1728 case MSR_IA32_SYSENTER_ESP:
1729 __vmwrite(GUEST_SYSENTER_ESP, msr_content);
1730 break;
1731 case MSR_IA32_SYSENTER_EIP:
1732 __vmwrite(GUEST_SYSENTER_EIP, msr_content);
1733 break;
1734 case MSR_IA32_APICBASE:
1735 vlapic_msr_set(VLAPIC(v), msr_content);
1736 break;
1737 default:
1738 long_mode_do_msr_write(regs);
1739 break;
1742 HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write returns: "
1743 "ecx=%lx, eax=%lx, edx=%lx",
1744 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1745 (unsigned long)regs->edx);
1748 /*
1749 * Need to use this exit to reschedule
1750 */
1751 void vmx_vmexit_do_hlt(void)
1753 struct vcpu *v=current;
1754 struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
1755 s_time_t next_pit=-1,next_wakeup;
1757 if ( !v->vcpu_id )
1758 next_pit = get_pit_scheduled(v,vpit);
1759 next_wakeup = get_apictime_scheduled(v);
1760 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
1761 next_wakeup = next_pit;
1762 if ( next_wakeup != - 1 )
1763 set_timer(&current->arch.hvm_vmx.hlt_timer, next_wakeup);
1764 hvm_safe_block();
1767 static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs)
1769 unsigned int vector;
1770 int error;
1772 asmlinkage void do_IRQ(struct cpu_user_regs *);
1773 fastcall void smp_apic_timer_interrupt(struct cpu_user_regs *);
1774 fastcall void smp_event_check_interrupt(void);
1775 fastcall void smp_invalidate_interrupt(void);
1776 fastcall void smp_call_function_interrupt(void);
1777 fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
1778 fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
1779 #ifdef CONFIG_X86_MCE_P4THERMAL
1780 fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
1781 #endif
1783 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
1784 && !(vector & INTR_INFO_VALID_MASK))
1785 __hvm_bug(regs);
1787 vector &= 0xff;
1788 local_irq_disable();
1790 switch(vector) {
1791 case LOCAL_TIMER_VECTOR:
1792 smp_apic_timer_interrupt(regs);
1793 break;
1794 case EVENT_CHECK_VECTOR:
1795 smp_event_check_interrupt();
1796 break;
1797 case INVALIDATE_TLB_VECTOR:
1798 smp_invalidate_interrupt();
1799 break;
1800 case CALL_FUNCTION_VECTOR:
1801 smp_call_function_interrupt();
1802 break;
1803 case SPURIOUS_APIC_VECTOR:
1804 smp_spurious_interrupt(regs);
1805 break;
1806 case ERROR_APIC_VECTOR:
1807 smp_error_interrupt(regs);
1808 break;
1809 #ifdef CONFIG_X86_MCE_P4THERMAL
1810 case THERMAL_APIC_VECTOR:
1811 smp_thermal_interrupt(regs);
1812 break;
1813 #endif
1814 default:
1815 regs->entry_vector = vector;
1816 do_IRQ(regs);
1817 break;
1821 #if defined (__x86_64__)
1822 void store_cpu_user_regs(struct cpu_user_regs *regs)
1824 __vmread(GUEST_SS_SELECTOR, &regs->ss);
1825 __vmread(GUEST_RSP, &regs->rsp);
1826 __vmread(GUEST_RFLAGS, &regs->rflags);
1827 __vmread(GUEST_CS_SELECTOR, &regs->cs);
1828 __vmread(GUEST_DS_SELECTOR, &regs->ds);
1829 __vmread(GUEST_ES_SELECTOR, &regs->es);
1830 __vmread(GUEST_RIP, &regs->rip);
1832 #elif defined (__i386__)
1833 void store_cpu_user_regs(struct cpu_user_regs *regs)
1835 __vmread(GUEST_SS_SELECTOR, &regs->ss);
1836 __vmread(GUEST_RSP, &regs->esp);
1837 __vmread(GUEST_RFLAGS, &regs->eflags);
1838 __vmread(GUEST_CS_SELECTOR, &regs->cs);
1839 __vmread(GUEST_DS_SELECTOR, &regs->ds);
1840 __vmread(GUEST_ES_SELECTOR, &regs->es);
1841 __vmread(GUEST_RIP, &regs->eip);
1843 #endif
1845 #ifdef XEN_DEBUGGER
1846 void save_cpu_user_regs(struct cpu_user_regs *regs)
1848 __vmread(GUEST_SS_SELECTOR, &regs->xss);
1849 __vmread(GUEST_RSP, &regs->esp);
1850 __vmread(GUEST_RFLAGS, &regs->eflags);
1851 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
1852 __vmread(GUEST_RIP, &regs->eip);
1854 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
1855 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
1856 __vmread(GUEST_ES_SELECTOR, &regs->xes);
1857 __vmread(GUEST_DS_SELECTOR, &regs->xds);
1860 void restore_cpu_user_regs(struct cpu_user_regs *regs)
1862 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
1863 __vmwrite(GUEST_RSP, regs->esp);
1864 __vmwrite(GUEST_RFLAGS, regs->eflags);
1865 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
1866 __vmwrite(GUEST_RIP, regs->eip);
1868 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
1869 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
1870 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
1871 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
1873 #endif
1875 asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs)
1877 unsigned int exit_reason, idtv_info_field;
1878 unsigned long exit_qualification, eip, inst_len = 0;
1879 struct vcpu *v = current;
1880 int error;
1882 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
1883 __hvm_bug(&regs);
1885 perfc_incra(vmexits, exit_reason);
1887 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
1888 if (idtv_info_field & INTR_INFO_VALID_MASK) {
1889 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
1891 __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
1892 if (inst_len >= 1 && inst_len <= 15)
1893 __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
1895 if (idtv_info_field & 0x800) { /* valid error code */
1896 unsigned long error_code;
1897 __vmread(IDT_VECTORING_ERROR_CODE, &error_code);
1898 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
1901 HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
1904 /* don't bother H/W interrutps */
1905 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
1906 exit_reason != EXIT_REASON_VMCALL &&
1907 exit_reason != EXIT_REASON_IO_INSTRUCTION)
1908 HVM_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
1910 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
1911 printk("Failed vm entry\n");
1912 domain_crash_synchronous();
1913 return;
1917 __vmread(GUEST_RIP, &eip);
1918 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
1919 TRACE_VMEXIT(0,exit_reason);
1922 switch (exit_reason) {
1923 case EXIT_REASON_EXCEPTION_NMI:
1925 /*
1926 * We don't set the software-interrupt exiting (INT n).
1927 * (1) We can get an exception (e.g. #PG) in the guest, or
1928 * (2) NMI
1929 */
1930 int error;
1931 unsigned int vector;
1932 unsigned long va;
1934 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
1935 || !(vector & INTR_INFO_VALID_MASK))
1936 __hvm_bug(&regs);
1937 vector &= 0xff;
1939 TRACE_VMEXIT(1,vector);
1940 perfc_incra(cause_vector, vector);
1942 TRACE_3D(TRC_VMX_VECTOR, v->domain->domain_id, eip, vector);
1943 switch (vector) {
1944 #ifdef XEN_DEBUGGER
1945 case TRAP_debug:
1947 save_cpu_user_regs(&regs);
1948 pdb_handle_exception(1, &regs, 1);
1949 restore_cpu_user_regs(&regs);
1950 break;
1952 case TRAP_int3:
1954 save_cpu_user_regs(&regs);
1955 pdb_handle_exception(3, &regs, 1);
1956 restore_cpu_user_regs(&regs);
1957 break;
1959 #else
1960 case TRAP_debug:
1962 void store_cpu_user_regs(struct cpu_user_regs *regs);
1964 store_cpu_user_regs(&regs);
1965 __vm_clear_bit(GUEST_PENDING_DBG_EXCEPTIONS, PENDING_DEBUG_EXC_BS);
1967 domain_pause_for_debugger();
1969 break;
1971 #endif
1972 case TRAP_no_device:
1974 vmx_do_no_device_fault();
1975 break;
1977 case TRAP_page_fault:
1979 __vmread(EXIT_QUALIFICATION, &va);
1980 __vmread(VM_EXIT_INTR_ERROR_CODE, &regs.error_code);
1982 TRACE_VMEXIT(3,regs.error_code);
1983 TRACE_VMEXIT(4,va);
1985 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1986 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1987 (unsigned long)regs.eax, (unsigned long)regs.ebx,
1988 (unsigned long)regs.ecx, (unsigned long)regs.edx,
1989 (unsigned long)regs.esi, (unsigned long)regs.edi);
1990 v->arch.hvm_vcpu.mmio_op.inst_decoder_regs = &regs;
1992 if (!(error = vmx_do_page_fault(va, &regs))) {
1993 /*
1994 * Inject #PG using Interruption-Information Fields
1995 */
1996 vmx_inject_exception(v, TRAP_page_fault, regs.error_code);
1997 v->arch.hvm_vmx.cpu_cr2 = va;
1998 TRACE_3D(TRC_VMX_INT, v->domain->domain_id, TRAP_page_fault, va);
2000 break;
2002 case TRAP_nmi:
2003 do_nmi(&regs);
2004 break;
2005 default:
2006 vmx_reflect_exception(v);
2007 break;
2009 break;
2011 case EXIT_REASON_EXTERNAL_INTERRUPT:
2012 vmx_vmexit_do_extint(&regs);
2013 break;
2014 case EXIT_REASON_PENDING_INTERRUPT:
2015 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
2016 MONITOR_CPU_BASED_EXEC_CONTROLS);
2017 v->arch.hvm_vcpu.u.vmx.exec_control = MONITOR_CPU_BASED_EXEC_CONTROLS;
2018 break;
2019 case EXIT_REASON_TASK_SWITCH:
2020 __hvm_bug(&regs);
2021 break;
2022 case EXIT_REASON_CPUID:
2023 __get_instruction_length(inst_len);
2024 vmx_vmexit_do_cpuid(regs.eax, &regs);
2025 __update_guest_eip(inst_len);
2026 break;
2027 case EXIT_REASON_HLT:
2028 __get_instruction_length(inst_len);
2029 __update_guest_eip(inst_len);
2030 vmx_vmexit_do_hlt();
2031 break;
2032 case EXIT_REASON_INVLPG:
2034 unsigned long va;
2036 __vmread(EXIT_QUALIFICATION, &va);
2037 vmx_vmexit_do_invlpg(va);
2038 __get_instruction_length(inst_len);
2039 __update_guest_eip(inst_len);
2040 break;
2042 #if 0 /* keep this for debugging */
2043 case EXIT_REASON_VMCALL:
2044 __get_instruction_length(inst_len);
2045 __vmread(GUEST_RIP, &eip);
2046 __vmread(EXIT_QUALIFICATION, &exit_qualification);
2048 hvm_print_line(v, regs.eax); /* provides the current domain */
2049 __update_guest_eip(inst_len);
2050 break;
2051 #endif
2052 case EXIT_REASON_CR_ACCESS:
2054 __vmread(GUEST_RIP, &eip);
2055 __get_instruction_length(inst_len);
2056 __vmread(EXIT_QUALIFICATION, &exit_qualification);
2058 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx",
2059 eip, inst_len, exit_qualification);
2060 if (vmx_cr_access(exit_qualification, &regs))
2061 __update_guest_eip(inst_len);
2062 TRACE_VMEXIT(3,regs.error_code);
2063 TRACE_VMEXIT(4,exit_qualification);
2064 break;
2066 case EXIT_REASON_DR_ACCESS:
2067 __vmread(EXIT_QUALIFICATION, &exit_qualification);
2068 vmx_dr_access(exit_qualification, &regs);
2069 __get_instruction_length(inst_len);
2070 __update_guest_eip(inst_len);
2071 break;
2072 case EXIT_REASON_IO_INSTRUCTION:
2073 __vmread(EXIT_QUALIFICATION, &exit_qualification);
2074 __get_instruction_length(inst_len);
2075 vmx_io_instruction(&regs, exit_qualification, inst_len);
2076 TRACE_VMEXIT(4,exit_qualification);
2077 break;
2078 case EXIT_REASON_MSR_READ:
2079 __get_instruction_length(inst_len);
2080 vmx_do_msr_read(&regs);
2081 __update_guest_eip(inst_len);
2082 break;
2083 case EXIT_REASON_MSR_WRITE:
2084 __vmread(GUEST_RIP, &eip);
2085 vmx_do_msr_write(&regs);
2086 __get_instruction_length(inst_len);
2087 __update_guest_eip(inst_len);
2088 break;
2089 case EXIT_REASON_MWAIT_INSTRUCTION:
2090 __hvm_bug(&regs);
2091 break;
2092 case EXIT_REASON_VMCALL:
2093 case EXIT_REASON_VMCLEAR:
2094 case EXIT_REASON_VMLAUNCH:
2095 case EXIT_REASON_VMPTRLD:
2096 case EXIT_REASON_VMPTRST:
2097 case EXIT_REASON_VMREAD:
2098 case EXIT_REASON_VMRESUME:
2099 case EXIT_REASON_VMWRITE:
2100 case EXIT_REASON_VMOFF:
2101 case EXIT_REASON_VMON:
2102 /* Report invalid opcode exception when a VMX guest tries to execute
2103 any of the VMX instructions */
2104 vmx_inject_exception(v, TRAP_invalid_op, VMX_DELIVER_NO_ERROR_CODE);
2105 break;
2107 default:
2108 __hvm_bug(&regs); /* should not happen */
2112 asmlinkage void vmx_load_cr2(void)
2114 struct vcpu *v = current;
2116 local_irq_disable();
2117 #ifdef __i386__
2118 asm volatile("movl %0,%%cr2": :"r" (v->arch.hvm_vmx.cpu_cr2));
2119 #else
2120 asm volatile("movq %0,%%cr2": :"r" (v->arch.hvm_vmx.cpu_cr2));
2121 #endif
2124 asmlinkage void vmx_trace_vmentry (void)
2126 TRACE_5D(TRC_VMENTRY,
2127 trace_values[smp_processor_id()][0],
2128 trace_values[smp_processor_id()][1],
2129 trace_values[smp_processor_id()][2],
2130 trace_values[smp_processor_id()][3],
2131 trace_values[smp_processor_id()][4]);
2132 TRACE_VMEXIT(0,9);
2133 TRACE_VMEXIT(1,9);
2134 TRACE_VMEXIT(2,9);
2135 TRACE_VMEXIT(3,9);
2136 TRACE_VMEXIT(4,9);
2137 return;
2140 asmlinkage void vmx_trace_vmexit (void)
2142 TRACE_3D(TRC_VMEXIT,0,0,0);
2143 return;
2146 /*
2147 * Local variables:
2148 * mode: C
2149 * c-set-style: "BSD"
2150 * c-basic-offset: 4
2151 * tab-width: 4
2152 * indent-tabs-mode: nil
2153 * End:
2154 */