ia64/xen-unstable

view xen/arch/x86/hvm/vmx/vmx.c @ 9016:cf1c1bb9f6d2

Bring up AP of VMX domain.
1) add INIT-SIPI-SIPI IPI sequence handling code to HVM virtual lapic
code.
2) add an new interface init_ap_context to hvm_funcs, and implement the
VMX side.
3) add a hvm generic function hvm_bringup_ap, which in turn calls
init_ap_context.

Signed-off-by: Xin Li <xin.b.li@intel.com>
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Feb 24 17:32:58 2006 +0100 (2006-02-24)
parents 0349fb4de335
children 7edd64c8bb36
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/domain_page.h>
28 #include <asm/current.h>
29 #include <asm/io.h>
30 #include <asm/shadow.h>
31 #include <asm/regs.h>
32 #include <asm/cpufeature.h>
33 #include <asm/processor.h>
34 #include <asm/types.h>
35 #include <asm/msr.h>
36 #include <asm/spinlock.h>
37 #include <asm/hvm/hvm.h>
38 #include <asm/hvm/support.h>
39 #include <asm/hvm/vmx/vmx.h>
40 #include <asm/hvm/vmx/vmcs.h>
41 #include <asm/shadow.h>
42 #if CONFIG_PAGING_LEVELS >= 3
43 #include <asm/shadow_64.h>
44 #endif
45 #include <public/sched.h>
46 #include <public/hvm/ioreq.h>
47 #include <asm/hvm/vpic.h>
48 #include <asm/hvm/vlapic.h>
50 static unsigned long trace_values[NR_CPUS][4];
51 #define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
53 void vmx_final_setup_guest(struct vcpu *v)
54 {
55 v->arch.schedule_tail = arch_vmx_do_launch;
57 if ( v->vcpu_id == 0 )
58 {
59 struct domain *d = v->domain;
60 struct vcpu *vc;
62 /* Initialize monitor page table */
63 for_each_vcpu(d, vc)
64 vc->arch.monitor_table = mk_pagetable(0);
66 /*
67 * Required to do this once per domain
68 * XXX todo: add a seperate function to do these.
69 */
70 memset(&d->shared_info->evtchn_mask[0], 0xff,
71 sizeof(d->shared_info->evtchn_mask));
73 /* Put the domain in shadow mode even though we're going to be using
74 * the shared 1:1 page table initially. It shouldn't hurt */
75 shadow_mode_enable(d,
76 SHM_enable|SHM_refcounts|
77 SHM_translate|SHM_external|SHM_wr_pt_pte);
78 }
79 }
81 void vmx_relinquish_resources(struct vcpu *v)
82 {
83 struct hvm_virpit *vpit;
85 if (v->vcpu_id == 0) {
86 /* unmap IO shared page */
87 struct domain *d = v->domain;
88 if ( d->arch.hvm_domain.shared_page_va )
89 unmap_domain_page_global(
90 (void *)d->arch.hvm_domain.shared_page_va);
91 shadow_direct_map_clean(d);
92 }
94 vmx_request_clear_vmcs(v);
95 destroy_vmcs(&v->arch.hvm_vmx);
96 free_monitor_pagetable(v);
97 vpit = &v->domain->arch.hvm_domain.vpit;
98 kill_timer(&vpit->pit_timer);
99 kill_timer(&v->arch.hvm_vmx.hlt_timer);
100 if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) )
101 {
102 kill_timer(&VLAPIC(v)->vlapic_timer);
103 xfree(VLAPIC(v));
104 }
105 }
107 #ifdef __x86_64__
108 static struct vmx_msr_state percpu_msr[NR_CPUS];
110 static u32 msr_data_index[VMX_MSR_COUNT] =
111 {
112 MSR_LSTAR, MSR_STAR, MSR_CSTAR,
113 MSR_SYSCALL_MASK, MSR_EFER,
114 };
116 void vmx_save_segments(struct vcpu *v)
117 {
118 rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.msr_content.shadow_gs);
119 }
121 /*
122 * To avoid MSR save/restore at every VM exit/entry time, we restore
123 * the x86_64 specific MSRs at domain switch time. Since those MSRs are
124 * are not modified once set for generic domains, we don't save them,
125 * but simply reset them to the values set at percpu_traps_init().
126 */
127 void vmx_load_msrs(void)
128 {
129 struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()];
130 int i;
132 while ( host_state->flags )
133 {
134 i = find_first_set_bit(host_state->flags);
135 wrmsrl(msr_data_index[i], host_state->msr_items[i]);
136 clear_bit(i, &host_state->flags);
137 }
138 }
140 static void vmx_save_init_msrs(void)
141 {
142 struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()];
143 int i;
145 for ( i = 0; i < VMX_MSR_COUNT; i++ )
146 rdmsrl(msr_data_index[i], host_state->msr_items[i]);
147 }
149 #define CASE_READ_MSR(address) \
150 case MSR_ ## address: \
151 msr_content = msr->msr_items[VMX_INDEX_MSR_ ## address]; \
152 break
154 #define CASE_WRITE_MSR(address) \
155 case MSR_ ## address: \
156 { \
157 msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content; \
158 if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)) { \
159 set_bit(VMX_INDEX_MSR_ ## address, &msr->flags); \
160 } \
161 wrmsrl(MSR_ ## address, msr_content); \
162 set_bit(VMX_INDEX_MSR_ ## address, &host_state->flags); \
163 } \
164 break
166 #define IS_CANO_ADDRESS(add) 1
167 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
168 {
169 u64 msr_content = 0;
170 struct vcpu *vc = current;
171 struct vmx_msr_state * msr = &vc->arch.hvm_vmx.msr_content;
172 switch(regs->ecx){
173 case MSR_EFER:
174 msr_content = msr->msr_items[VMX_INDEX_MSR_EFER];
175 HVM_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %llx\n", (unsigned long long)msr_content);
176 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
177 &vc->arch.hvm_vmx.cpu_state))
178 msr_content |= 1 << _EFER_LME;
180 if (VMX_LONG_GUEST(vc))
181 msr_content |= 1 << _EFER_LMA;
182 break;
183 case MSR_FS_BASE:
184 if (!(VMX_LONG_GUEST(vc)))
185 /* XXX should it be GP fault */
186 domain_crash_synchronous();
187 __vmread(GUEST_FS_BASE, &msr_content);
188 break;
189 case MSR_GS_BASE:
190 if (!(VMX_LONG_GUEST(vc)))
191 domain_crash_synchronous();
192 __vmread(GUEST_GS_BASE, &msr_content);
193 break;
194 case MSR_SHADOW_GS_BASE:
195 msr_content = msr->shadow_gs;
196 break;
198 CASE_READ_MSR(STAR);
199 CASE_READ_MSR(LSTAR);
200 CASE_READ_MSR(CSTAR);
201 CASE_READ_MSR(SYSCALL_MASK);
202 default:
203 return 0;
204 }
205 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %lx\n", msr_content);
206 regs->eax = msr_content & 0xffffffff;
207 regs->edx = msr_content >> 32;
208 return 1;
209 }
211 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
212 {
213 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
214 struct vcpu *vc = current;
215 struct vmx_msr_state * msr = &vc->arch.hvm_vmx.msr_content;
216 struct vmx_msr_state * host_state =
217 &percpu_msr[smp_processor_id()];
219 HVM_DBG_LOG(DBG_LEVEL_1, " mode_do_msr_write msr %lx msr_content %lx\n",
220 regs->ecx, msr_content);
222 switch (regs->ecx){
223 case MSR_EFER:
224 if ((msr_content & EFER_LME) ^
225 test_bit(VMX_CPU_STATE_LME_ENABLED,
226 &vc->arch.hvm_vmx.cpu_state)){
227 if (test_bit(VMX_CPU_STATE_PG_ENABLED,
228 &vc->arch.hvm_vmx.cpu_state) ||
229 !test_bit(VMX_CPU_STATE_PAE_ENABLED,
230 &vc->arch.hvm_vmx.cpu_state)){
231 vmx_inject_exception(vc, TRAP_gp_fault, 0);
232 }
233 }
234 if (msr_content & EFER_LME)
235 set_bit(VMX_CPU_STATE_LME_ENABLED,
236 &vc->arch.hvm_vmx.cpu_state);
237 /* No update for LME/LMA since it have no effect */
238 msr->msr_items[VMX_INDEX_MSR_EFER] =
239 msr_content;
240 if (msr_content & ~(EFER_LME | EFER_LMA)){
241 msr->msr_items[VMX_INDEX_MSR_EFER] = msr_content;
242 if (!test_bit(VMX_INDEX_MSR_EFER, &msr->flags)){
243 rdmsrl(MSR_EFER,
244 host_state->msr_items[VMX_INDEX_MSR_EFER]);
245 set_bit(VMX_INDEX_MSR_EFER, &host_state->flags);
246 set_bit(VMX_INDEX_MSR_EFER, &msr->flags);
247 }
248 }
249 break;
251 case MSR_FS_BASE:
252 case MSR_GS_BASE:
253 if (!(VMX_LONG_GUEST(vc)))
254 domain_crash_synchronous();
255 if (!IS_CANO_ADDRESS(msr_content)){
256 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
257 vmx_inject_exception(vc, TRAP_gp_fault, 0);
258 }
259 if (regs->ecx == MSR_FS_BASE)
260 __vmwrite(GUEST_FS_BASE, msr_content);
261 else
262 __vmwrite(GUEST_GS_BASE, msr_content);
263 break;
265 case MSR_SHADOW_GS_BASE:
266 if (!(VMX_LONG_GUEST(vc)))
267 domain_crash_synchronous();
268 vc->arch.hvm_vmx.msr_content.shadow_gs = msr_content;
269 wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
270 break;
272 CASE_WRITE_MSR(STAR);
273 CASE_WRITE_MSR(LSTAR);
274 CASE_WRITE_MSR(CSTAR);
275 CASE_WRITE_MSR(SYSCALL_MASK);
276 default:
277 return 0;
278 }
279 return 1;
280 }
282 void
283 vmx_restore_msrs(struct vcpu *v)
284 {
285 int i = 0;
286 struct vmx_msr_state *guest_state;
287 struct vmx_msr_state *host_state;
288 unsigned long guest_flags ;
290 guest_state = &v->arch.hvm_vmx.msr_content;;
291 host_state = &percpu_msr[smp_processor_id()];
293 wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs);
294 guest_flags = guest_state->flags;
295 if (!guest_flags)
296 return;
298 while (guest_flags){
299 i = find_first_set_bit(guest_flags);
301 HVM_DBG_LOG(DBG_LEVEL_2,
302 "restore guest's index %d msr %lx with %lx\n",
303 i, (unsigned long) msr_data_index[i], (unsigned long) guest_state->msr_items[i]);
304 set_bit(i, &host_state->flags);
305 wrmsrl(msr_data_index[i], guest_state->msr_items[i]);
306 clear_bit(i, &guest_flags);
307 }
308 }
309 #else /* __i386__ */
310 #define vmx_save_init_msrs() ((void)0)
312 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs){
313 return 0;
314 }
315 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs){
316 return 0;
317 }
318 #endif
320 void stop_vmx(void)
321 {
322 if (read_cr4() & X86_CR4_VMXE)
323 __vmxoff();
324 }
326 int vmx_initialize_guest_resources(struct vcpu *v)
327 {
328 vmx_final_setup_guest(v);
329 return 1;
330 }
332 int vmx_relinquish_guest_resources(struct vcpu *v)
333 {
334 vmx_relinquish_resources(v);
335 return 1;
336 }
338 void vmx_migrate_timers(struct vcpu *v)
339 {
340 struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
342 migrate_timer(&vpit->pit_timer, v->processor);
343 migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
344 if ( hvm_apic_support(v->domain) && VLAPIC(v))
345 migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor);
346 }
348 void vmx_store_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
349 {
350 #if defined (__x86_64__)
351 __vmread(GUEST_RFLAGS, &regs->rflags);
352 __vmread(GUEST_SS_SELECTOR, &regs->ss);
353 __vmread(GUEST_CS_SELECTOR, &regs->cs);
354 __vmread(GUEST_DS_SELECTOR, &regs->ds);
355 __vmread(GUEST_ES_SELECTOR, &regs->es);
356 __vmread(GUEST_GS_SELECTOR, &regs->gs);
357 __vmread(GUEST_FS_SELECTOR, &regs->fs);
358 __vmread(GUEST_RIP, &regs->rip);
359 __vmread(GUEST_RSP, &regs->rsp);
360 #elif defined (__i386__)
361 __vmread(GUEST_RFLAGS, &regs->eflags);
362 __vmread(GUEST_SS_SELECTOR, &regs->ss);
363 __vmread(GUEST_CS_SELECTOR, &regs->cs);
364 __vmread(GUEST_DS_SELECTOR, &regs->ds);
365 __vmread(GUEST_ES_SELECTOR, &regs->es);
366 __vmread(GUEST_GS_SELECTOR, &regs->gs);
367 __vmread(GUEST_FS_SELECTOR, &regs->fs);
368 __vmread(GUEST_RIP, &regs->eip);
369 __vmread(GUEST_RSP, &regs->esp);
370 #else
371 #error Unsupported architecture
372 #endif
373 }
375 void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
376 {
377 #if defined (__x86_64__)
378 __vmwrite(GUEST_SS_SELECTOR, regs->ss);
379 __vmwrite(GUEST_RSP, regs->rsp);
381 __vmwrite(GUEST_RFLAGS, regs->rflags);
382 if (regs->rflags & EF_TF)
383 __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
384 else
385 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
387 __vmwrite(GUEST_CS_SELECTOR, regs->cs);
388 __vmwrite(GUEST_RIP, regs->rip);
389 #elif defined (__i386__)
390 __vmwrite(GUEST_SS_SELECTOR, regs->ss);
391 __vmwrite(GUEST_RSP, regs->esp);
393 __vmwrite(GUEST_RFLAGS, regs->eflags);
394 if (regs->eflags & EF_TF)
395 __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
396 else
397 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
399 __vmwrite(GUEST_CS_SELECTOR, regs->cs);
400 __vmwrite(GUEST_RIP, regs->eip);
401 #else
402 #error Unsupported architecture
403 #endif
404 }
406 void vmx_store_cpu_guest_ctrl_regs(struct vcpu *v, unsigned long crs[8])
407 {
408 __vmread(CR0_READ_SHADOW, &crs[0]);
409 __vmread(GUEST_CR3, &crs[3]);
410 __vmread(CR4_READ_SHADOW, &crs[4]);
411 }
413 void vmx_modify_guest_state(struct vcpu *v)
414 {
415 modify_vmcs(&v->arch.hvm_vmx, &v->arch.guest_context.user_regs);
416 }
418 int vmx_realmode(struct vcpu *v)
419 {
420 unsigned long rflags;
422 __vmread(GUEST_RFLAGS, &rflags);
423 return rflags & X86_EFLAGS_VM;
424 }
426 int vmx_instruction_length(struct vcpu *v)
427 {
428 unsigned long inst_len;
430 if (__vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len))
431 return 0;
432 return inst_len;
433 }
435 unsigned long vmx_get_ctrl_reg(struct vcpu *v, unsigned int num)
436 {
437 switch ( num )
438 {
439 case 0:
440 return v->arch.hvm_vmx.cpu_cr0;
441 case 2:
442 return v->arch.hvm_vmx.cpu_cr2;
443 case 3:
444 return v->arch.hvm_vmx.cpu_cr3;
445 default:
446 BUG();
447 }
448 return 0; /* dummy */
449 }
451 /* SMP VMX guest support */
452 void vmx_init_ap_context(struct vcpu_guest_context *ctxt,
453 int vcpuid, int trampoline_vector)
454 {
455 int i;
457 memset(ctxt, 0, sizeof(*ctxt));
459 /*
460 * Initial register values:
461 */
462 ctxt->user_regs.eip = VMXASSIST_BASE;
463 ctxt->user_regs.edx = vcpuid;
464 ctxt->user_regs.ebx = trampoline_vector;
466 ctxt->flags = VGCF_HVM_GUEST;
468 /* Virtual IDT is empty at start-of-day. */
469 for ( i = 0; i < 256; i++ )
470 {
471 ctxt->trap_ctxt[i].vector = i;
472 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
473 }
475 /* No callback handlers. */
476 #if defined(__i386__)
477 ctxt->event_callback_cs = FLAT_KERNEL_CS;
478 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
479 #endif
480 }
482 void do_nmi(struct cpu_user_regs *);
484 static int check_vmx_controls(ctrls, msr)
485 {
486 u32 vmx_msr_low, vmx_msr_high;
488 rdmsr(msr, vmx_msr_low, vmx_msr_high);
489 if (ctrls < vmx_msr_low || ctrls > vmx_msr_high) {
490 printk("Insufficient VMX capability 0x%x, "
491 "msr=0x%x,low=0x%8x,high=0x%x\n",
492 ctrls, msr, vmx_msr_low, vmx_msr_high);
493 return 0;
494 }
495 return 1;
496 }
498 int start_vmx(void)
499 {
500 struct vmcs_struct *vmcs;
501 u32 ecx;
502 u32 eax, edx;
503 u64 phys_vmcs; /* debugging */
505 /*
506 * Xen does not fill x86_capability words except 0.
507 */
508 ecx = cpuid_ecx(1);
509 boot_cpu_data.x86_capability[4] = ecx;
511 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
512 return 0;
514 rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
516 if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) {
517 if ((eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0) {
518 printk("VMX disabled by Feature Control MSR.\n");
519 return 0;
520 }
521 }
522 else {
523 wrmsr(IA32_FEATURE_CONTROL_MSR,
524 IA32_FEATURE_CONTROL_MSR_LOCK |
525 IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
526 }
528 if (!check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS,
529 MSR_IA32_VMX_PINBASED_CTLS_MSR))
530 return 0;
531 if (!check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS,
532 MSR_IA32_VMX_PROCBASED_CTLS_MSR))
533 return 0;
534 if (!check_vmx_controls(MONITOR_VM_EXIT_CONTROLS,
535 MSR_IA32_VMX_EXIT_CTLS_MSR))
536 return 0;
537 if (!check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS,
538 MSR_IA32_VMX_ENTRY_CTLS_MSR))
539 return 0;
541 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
543 if (!(vmcs = alloc_vmcs())) {
544 printk("Failed to allocate VMCS\n");
545 return 0;
546 }
548 phys_vmcs = (u64) virt_to_maddr(vmcs);
550 if (!(__vmxon(phys_vmcs))) {
551 printk("VMXON is done\n");
552 }
554 vmx_save_init_msrs();
556 /* Setup HVM interfaces */
557 hvm_funcs.disable = stop_vmx;
559 hvm_funcs.initialize_guest_resources = vmx_initialize_guest_resources;
560 hvm_funcs.relinquish_guest_resources = vmx_relinquish_guest_resources;
562 hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs;
563 hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs;
565 #ifdef __x86_64__
566 hvm_funcs.save_segments = vmx_save_segments;
567 hvm_funcs.load_msrs = vmx_load_msrs;
568 hvm_funcs.restore_msrs = vmx_restore_msrs;
569 #endif
571 hvm_funcs.store_cpu_guest_ctrl_regs = vmx_store_cpu_guest_ctrl_regs;
572 hvm_funcs.modify_guest_state = vmx_modify_guest_state;
574 hvm_funcs.realmode = vmx_realmode;
575 hvm_funcs.paging_enabled = vmx_paging_enabled;
576 hvm_funcs.instruction_length = vmx_instruction_length;
577 hvm_funcs.get_guest_ctrl_reg = vmx_get_ctrl_reg;
579 hvm_funcs.init_ap_context = vmx_init_ap_context;
581 hvm_enabled = 1;
583 return 1;
584 }
586 /*
587 * Not all cases receive valid value in the VM-exit instruction length field.
588 */
589 #define __get_instruction_length(len) \
590 __vmread(VM_EXIT_INSTRUCTION_LEN, &(len)); \
591 if ((len) < 1 || (len) > 15) \
592 __hvm_bug(&regs);
594 static void inline __update_guest_eip(unsigned long inst_len)
595 {
596 unsigned long current_eip;
598 __vmread(GUEST_RIP, &current_eip);
599 __vmwrite(GUEST_RIP, current_eip + inst_len);
600 }
603 static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
604 {
605 unsigned long gpa; /* FIXME: PAE */
606 int result;
608 #if 0 /* keep for debugging */
609 {
610 unsigned long eip;
612 __vmread(GUEST_RIP, &eip);
613 HVM_DBG_LOG(DBG_LEVEL_VMMU,
614 "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
615 va, eip, (unsigned long)regs->error_code);
616 }
617 #endif
619 if ( !vmx_paging_enabled(current) )
620 {
621 /* construct 1-to-1 direct mapping */
622 if ( shadow_direct_map_fault(va, regs) )
623 return 1;
625 handle_mmio(va, va);
626 TRACE_VMEXIT (2,2);
627 return 1;
628 }
629 gpa = gva_to_gpa(va);
631 /* Use 1:1 page table to identify MMIO address space */
632 if ( mmio_space(gpa) ){
633 struct vcpu *v = current;
634 /* No support for APIC */
635 if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000) {
636 u32 inst_len;
637 __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len));
638 __update_guest_eip(inst_len);
639 return 1;
640 }
641 TRACE_VMEXIT (2,2);
642 handle_mmio(va, gpa);
643 return 1;
644 }
646 result = shadow_fault(va, regs);
647 TRACE_VMEXIT (2,result);
648 #if 0
649 if ( !result )
650 {
651 __vmread(GUEST_RIP, &eip);
652 printk("vmx pgfault to guest va=%lx eip=%lx\n", va, eip);
653 }
654 #endif
656 return result;
657 }
659 static void vmx_do_no_device_fault(void)
660 {
661 unsigned long cr0;
662 struct vcpu *v = current;
664 setup_fpu(current);
665 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
667 /* Disable TS in guest CR0 unless the guest wants the exception too. */
668 __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
669 if ( !(cr0 & X86_CR0_TS) )
670 {
671 __vmread_vcpu(v, GUEST_CR0, &cr0);
672 cr0 &= ~X86_CR0_TS;
673 __vmwrite(GUEST_CR0, cr0);
674 }
675 }
677 /* Reserved bits: [31:15], [12:11], [9], [6], [2:1] */
678 #define VMX_VCPU_CPUID_L1_RESERVED 0xffff9a46
680 static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs *regs)
681 {
682 unsigned int eax, ebx, ecx, edx;
683 unsigned long eip;
684 struct vcpu *v = current;
686 __vmread(GUEST_RIP, &eip);
688 HVM_DBG_LOG(DBG_LEVEL_1,
689 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
690 " (esi) %lx, (edi) %lx",
691 (unsigned long)regs->eax, (unsigned long)regs->ebx,
692 (unsigned long)regs->ecx, (unsigned long)regs->edx,
693 (unsigned long)regs->esi, (unsigned long)regs->edi);
695 cpuid(input, &eax, &ebx, &ecx, &edx);
697 if ( input == 1 )
698 {
699 if ( hvm_apic_support(v->domain) &&
700 !vlapic_global_enabled((VLAPIC(v))) )
701 clear_bit(X86_FEATURE_APIC, &edx);
703 #if CONFIG_PAGING_LEVELS < 3
704 clear_bit(X86_FEATURE_PAE, &edx);
705 clear_bit(X86_FEATURE_PSE, &edx);
706 clear_bit(X86_FEATURE_PSE36, &edx);
707 #else
708 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
709 {
710 if ( !v->domain->arch.hvm_domain.pae_enabled )
711 clear_bit(X86_FEATURE_PAE, &edx);
712 clear_bit(X86_FEATURE_PSE, &edx);
713 clear_bit(X86_FEATURE_PSE36, &edx);
714 }
715 #endif
717 /* Unsupportable for virtualised CPUs. */
718 ecx &= ~VMX_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */
719 clear_bit(X86_FEATURE_VMXE & 31, &ecx);
720 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
721 }
722 #ifdef __i386__
723 else if ( input == 0x80000001 )
724 {
725 /* Mask feature for Intel ia32e or AMD long mode. */
726 clear_bit(X86_FEATURE_LM & 31, &edx);
727 }
728 #endif
730 regs->eax = (unsigned long) eax;
731 regs->ebx = (unsigned long) ebx;
732 regs->ecx = (unsigned long) ecx;
733 regs->edx = (unsigned long) edx;
735 HVM_DBG_LOG(DBG_LEVEL_1,
736 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x",
737 eip, input, eax, ebx, ecx, edx);
739 }
741 #define CASE_GET_REG_P(REG, reg) \
742 case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
744 static void vmx_dr_access (unsigned long exit_qualification, struct cpu_user_regs *regs)
745 {
746 unsigned int reg;
747 unsigned long *reg_p = 0;
748 struct vcpu *v = current;
749 unsigned long eip;
751 __vmread(GUEST_RIP, &eip);
753 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
755 HVM_DBG_LOG(DBG_LEVEL_1,
756 "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx",
757 eip, reg, exit_qualification);
759 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
760 CASE_GET_REG_P(EAX, eax);
761 CASE_GET_REG_P(ECX, ecx);
762 CASE_GET_REG_P(EDX, edx);
763 CASE_GET_REG_P(EBX, ebx);
764 CASE_GET_REG_P(EBP, ebp);
765 CASE_GET_REG_P(ESI, esi);
766 CASE_GET_REG_P(EDI, edi);
767 case REG_ESP:
768 break;
769 default:
770 __hvm_bug(regs);
771 }
773 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
774 case TYPE_MOV_TO_DR:
775 /* don't need to check the range */
776 if (reg != REG_ESP)
777 v->arch.guest_context.debugreg[reg] = *reg_p;
778 else {
779 unsigned long value;
780 __vmread(GUEST_RSP, &value);
781 v->arch.guest_context.debugreg[reg] = value;
782 }
783 break;
784 case TYPE_MOV_FROM_DR:
785 if (reg != REG_ESP)
786 *reg_p = v->arch.guest_context.debugreg[reg];
787 else {
788 __vmwrite(GUEST_RSP, v->arch.guest_context.debugreg[reg]);
789 }
790 break;
791 }
792 }
794 /*
795 * Invalidate the TLB for va. Invalidate the shadow page corresponding
796 * the address va.
797 */
798 static void vmx_vmexit_do_invlpg(unsigned long va)
799 {
800 unsigned long eip;
801 struct vcpu *v = current;
803 __vmread(GUEST_RIP, &eip);
805 HVM_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg: eip=%lx, va=%lx",
806 eip, va);
808 /*
809 * We do the safest things first, then try to update the shadow
810 * copying from guest
811 */
812 shadow_invlpg(v, va);
813 }
815 static int check_for_null_selector(unsigned long eip)
816 {
817 unsigned char inst[MAX_INST_LEN];
818 unsigned long sel;
819 int i, inst_len;
820 int inst_copy_from_guest(unsigned char *, unsigned long, int);
822 __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
823 memset(inst, 0, MAX_INST_LEN);
824 if (inst_copy_from_guest(inst, eip, inst_len) != inst_len) {
825 printf("check_for_null_selector: get guest instruction failed\n");
826 domain_crash_synchronous();
827 }
829 for (i = 0; i < inst_len; i++) {
830 switch (inst[i]) {
831 case 0xf3: /* REPZ */
832 case 0xf2: /* REPNZ */
833 case 0xf0: /* LOCK */
834 case 0x66: /* data32 */
835 case 0x67: /* addr32 */
836 continue;
837 case 0x2e: /* CS */
838 __vmread(GUEST_CS_SELECTOR, &sel);
839 break;
840 case 0x36: /* SS */
841 __vmread(GUEST_SS_SELECTOR, &sel);
842 break;
843 case 0x26: /* ES */
844 __vmread(GUEST_ES_SELECTOR, &sel);
845 break;
846 case 0x64: /* FS */
847 __vmread(GUEST_FS_SELECTOR, &sel);
848 break;
849 case 0x65: /* GS */
850 __vmread(GUEST_GS_SELECTOR, &sel);
851 break;
852 case 0x3e: /* DS */
853 /* FALLTHROUGH */
854 default:
855 /* DS is the default */
856 __vmread(GUEST_DS_SELECTOR, &sel);
857 }
858 return sel == 0 ? 1 : 0;
859 }
861 return 0;
862 }
864 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
865 unsigned long count, int size, long value,
866 int dir, int pvalid);
868 static void vmx_io_instruction(struct cpu_user_regs *regs,
869 unsigned long exit_qualification, unsigned long inst_len)
870 {
871 struct mmio_op *mmio_opp;
872 unsigned long eip, cs, eflags;
873 unsigned long port, size, dir;
874 int vm86;
876 mmio_opp = &current->arch.hvm_vcpu.mmio_op;
877 mmio_opp->instr = INSTR_PIO;
878 mmio_opp->flags = 0;
880 __vmread(GUEST_RIP, &eip);
881 __vmread(GUEST_CS_SELECTOR, &cs);
882 __vmread(GUEST_RFLAGS, &eflags);
883 vm86 = eflags & X86_EFLAGS_VM ? 1 : 0;
885 HVM_DBG_LOG(DBG_LEVEL_1,
886 "vmx_io_instruction: vm86 %d, eip=%lx:%lx, "
887 "exit_qualification = %lx",
888 vm86, cs, eip, exit_qualification);
890 if (test_bit(6, &exit_qualification))
891 port = (exit_qualification >> 16) & 0xFFFF;
892 else
893 port = regs->edx & 0xffff;
894 TRACE_VMEXIT(2, port);
895 size = (exit_qualification & 7) + 1;
896 dir = test_bit(3, &exit_qualification); /* direction */
898 if (test_bit(4, &exit_qualification)) { /* string instruction */
899 unsigned long addr, count = 1;
900 int sign = regs->eflags & EF_DF ? -1 : 1;
902 __vmread(GUEST_LINEAR_ADDRESS, &addr);
904 /*
905 * In protected mode, guest linear address is invalid if the
906 * selector is null.
907 */
908 if (!vm86 && check_for_null_selector(eip))
909 addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
911 if (test_bit(5, &exit_qualification)) { /* "rep" prefix */
912 mmio_opp->flags |= REPZ;
913 count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
914 }
916 /*
917 * Handle string pio instructions that cross pages or that
918 * are unaligned. See the comments in hvm_domain.c/handle_mmio()
919 */
920 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
921 unsigned long value = 0;
923 mmio_opp->flags |= OVERLAP;
924 if (dir == IOREQ_WRITE)
925 hvm_copy(&value, addr, size, HVM_COPY_IN);
926 send_pio_req(regs, port, 1, size, value, dir, 0);
927 } else {
928 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) {
929 if (sign > 0)
930 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
931 else
932 count = (addr & ~PAGE_MASK) / size;
933 } else
934 __update_guest_eip(inst_len);
936 send_pio_req(regs, port, count, size, addr, dir, 1);
937 }
938 } else {
939 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
940 hvm_print_line(current, regs->eax); /* guest debug output */
942 __update_guest_eip(inst_len);
943 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
944 }
945 }
947 int
948 vmx_world_save(struct vcpu *v, struct vmx_assist_context *c)
949 {
950 unsigned long inst_len;
951 int error = 0;
953 error |= __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
954 error |= __vmread(GUEST_RIP, &c->eip);
955 c->eip += inst_len; /* skip transition instruction */
956 error |= __vmread(GUEST_RSP, &c->esp);
957 error |= __vmread(GUEST_RFLAGS, &c->eflags);
959 error |= __vmread(CR0_READ_SHADOW, &c->cr0);
960 c->cr3 = v->arch.hvm_vmx.cpu_cr3;
961 error |= __vmread(CR4_READ_SHADOW, &c->cr4);
963 error |= __vmread(GUEST_IDTR_LIMIT, &c->idtr_limit);
964 error |= __vmread(GUEST_IDTR_BASE, &c->idtr_base);
966 error |= __vmread(GUEST_GDTR_LIMIT, &c->gdtr_limit);
967 error |= __vmread(GUEST_GDTR_BASE, &c->gdtr_base);
969 error |= __vmread(GUEST_CS_SELECTOR, &c->cs_sel);
970 error |= __vmread(GUEST_CS_LIMIT, &c->cs_limit);
971 error |= __vmread(GUEST_CS_BASE, &c->cs_base);
972 error |= __vmread(GUEST_CS_AR_BYTES, &c->cs_arbytes.bytes);
974 error |= __vmread(GUEST_DS_SELECTOR, &c->ds_sel);
975 error |= __vmread(GUEST_DS_LIMIT, &c->ds_limit);
976 error |= __vmread(GUEST_DS_BASE, &c->ds_base);
977 error |= __vmread(GUEST_DS_AR_BYTES, &c->ds_arbytes.bytes);
979 error |= __vmread(GUEST_ES_SELECTOR, &c->es_sel);
980 error |= __vmread(GUEST_ES_LIMIT, &c->es_limit);
981 error |= __vmread(GUEST_ES_BASE, &c->es_base);
982 error |= __vmread(GUEST_ES_AR_BYTES, &c->es_arbytes.bytes);
984 error |= __vmread(GUEST_SS_SELECTOR, &c->ss_sel);
985 error |= __vmread(GUEST_SS_LIMIT, &c->ss_limit);
986 error |= __vmread(GUEST_SS_BASE, &c->ss_base);
987 error |= __vmread(GUEST_SS_AR_BYTES, &c->ss_arbytes.bytes);
989 error |= __vmread(GUEST_FS_SELECTOR, &c->fs_sel);
990 error |= __vmread(GUEST_FS_LIMIT, &c->fs_limit);
991 error |= __vmread(GUEST_FS_BASE, &c->fs_base);
992 error |= __vmread(GUEST_FS_AR_BYTES, &c->fs_arbytes.bytes);
994 error |= __vmread(GUEST_GS_SELECTOR, &c->gs_sel);
995 error |= __vmread(GUEST_GS_LIMIT, &c->gs_limit);
996 error |= __vmread(GUEST_GS_BASE, &c->gs_base);
997 error |= __vmread(GUEST_GS_AR_BYTES, &c->gs_arbytes.bytes);
999 error |= __vmread(GUEST_TR_SELECTOR, &c->tr_sel);
1000 error |= __vmread(GUEST_TR_LIMIT, &c->tr_limit);
1001 error |= __vmread(GUEST_TR_BASE, &c->tr_base);
1002 error |= __vmread(GUEST_TR_AR_BYTES, &c->tr_arbytes.bytes);
1004 error |= __vmread(GUEST_LDTR_SELECTOR, &c->ldtr_sel);
1005 error |= __vmread(GUEST_LDTR_LIMIT, &c->ldtr_limit);
1006 error |= __vmread(GUEST_LDTR_BASE, &c->ldtr_base);
1007 error |= __vmread(GUEST_LDTR_AR_BYTES, &c->ldtr_arbytes.bytes);
1009 return !error;
1012 int
1013 vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
1015 unsigned long mfn, old_cr4, old_base_mfn;
1016 int error = 0;
1018 error |= __vmwrite(GUEST_RIP, c->eip);
1019 error |= __vmwrite(GUEST_RSP, c->esp);
1020 error |= __vmwrite(GUEST_RFLAGS, c->eflags);
1022 error |= __vmwrite(CR0_READ_SHADOW, c->cr0);
1024 if (!vmx_paging_enabled(v)) {
1025 HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
1026 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
1027 goto skip_cr3;
1030 if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
1031 /*
1032 * This is simple TLB flush, implying the guest has
1033 * removed some translation or changed page attributes.
1034 * We simply invalidate the shadow.
1035 */
1036 mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
1037 if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
1038 printk("Invalid CR3 value=%x", c->cr3);
1039 domain_crash_synchronous();
1040 return 0;
1042 shadow_sync_all(v->domain);
1043 } else {
1044 /*
1045 * If different, make a shadow. Check if the PDBR is valid
1046 * first.
1047 */
1048 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3);
1049 if ((c->cr3 >> PAGE_SHIFT) > v->domain->max_pages) {
1050 printk("Invalid CR3 value=%x", c->cr3);
1051 domain_crash_synchronous();
1052 return 0;
1054 mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
1055 if(!get_page(mfn_to_page(mfn), v->domain))
1056 return 0;
1057 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1058 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1059 if (old_base_mfn)
1060 put_page(mfn_to_page(old_base_mfn));
1061 /*
1062 * arch.shadow_table should now hold the next CR3 for shadow
1063 */
1064 v->arch.hvm_vmx.cpu_cr3 = c->cr3;
1065 update_pagetables(v);
1066 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
1067 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1070 skip_cr3:
1072 error |= __vmread(CR4_READ_SHADOW, &old_cr4);
1073 error |= __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
1074 error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
1076 error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
1077 error |= __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
1079 error |= __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
1080 error |= __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
1082 error |= __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
1083 error |= __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
1084 error |= __vmwrite(GUEST_CS_BASE, c->cs_base);
1085 error |= __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes.bytes);
1087 error |= __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
1088 error |= __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
1089 error |= __vmwrite(GUEST_DS_BASE, c->ds_base);
1090 error |= __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes.bytes);
1092 error |= __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
1093 error |= __vmwrite(GUEST_ES_LIMIT, c->es_limit);
1094 error |= __vmwrite(GUEST_ES_BASE, c->es_base);
1095 error |= __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes.bytes);
1097 error |= __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
1098 error |= __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
1099 error |= __vmwrite(GUEST_SS_BASE, c->ss_base);
1100 error |= __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes.bytes);
1102 error |= __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
1103 error |= __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
1104 error |= __vmwrite(GUEST_FS_BASE, c->fs_base);
1105 error |= __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes.bytes);
1107 error |= __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
1108 error |= __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
1109 error |= __vmwrite(GUEST_GS_BASE, c->gs_base);
1110 error |= __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes.bytes);
1112 error |= __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
1113 error |= __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
1114 error |= __vmwrite(GUEST_TR_BASE, c->tr_base);
1115 error |= __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes.bytes);
1117 error |= __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
1118 error |= __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
1119 error |= __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
1120 error |= __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes);
1122 return !error;
1125 enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE };
1127 int
1128 vmx_assist(struct vcpu *v, int mode)
1130 struct vmx_assist_context c;
1131 u32 magic;
1132 u32 cp;
1134 /* make sure vmxassist exists (this is not an error) */
1135 if (!hvm_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), HVM_COPY_IN))
1136 return 0;
1137 if (magic != VMXASSIST_MAGIC)
1138 return 0;
1140 switch (mode) {
1141 /*
1142 * Transfer control to vmxassist.
1143 * Store the current context in VMXASSIST_OLD_CONTEXT and load
1144 * the new VMXASSIST_NEW_CONTEXT context. This context was created
1145 * by vmxassist and will transfer control to it.
1146 */
1147 case VMX_ASSIST_INVOKE:
1148 /* save the old context */
1149 if (!hvm_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), HVM_COPY_IN))
1150 goto error;
1151 if (cp != 0) {
1152 if (!vmx_world_save(v, &c))
1153 goto error;
1154 if (!hvm_copy(&c, cp, sizeof(c), HVM_COPY_OUT))
1155 goto error;
1158 /* restore the new context, this should activate vmxassist */
1159 if (!hvm_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), HVM_COPY_IN))
1160 goto error;
1161 if (cp != 0) {
1162 if (!hvm_copy(&c, cp, sizeof(c), HVM_COPY_IN))
1163 goto error;
1164 if (!vmx_world_restore(v, &c))
1165 goto error;
1166 return 1;
1168 break;
1170 /*
1171 * Restore the VMXASSIST_OLD_CONTEXT that was saved by VMX_ASSIST_INVOKE
1172 * above.
1173 */
1174 case VMX_ASSIST_RESTORE:
1175 /* save the old context */
1176 if (!hvm_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), HVM_COPY_IN))
1177 goto error;
1178 if (cp != 0) {
1179 if (!hvm_copy(&c, cp, sizeof(c), HVM_COPY_IN))
1180 goto error;
1181 if (!vmx_world_restore(v, &c))
1182 goto error;
1183 return 1;
1185 break;
1188 error:
1189 printf("Failed to transfer to vmxassist\n");
1190 domain_crash_synchronous();
1191 return 0;
1194 static int vmx_set_cr0(unsigned long value)
1196 struct vcpu *v = current;
1197 unsigned long mfn;
1198 unsigned long eip;
1199 int paging_enabled;
1200 unsigned long vm_entry_value;
1201 unsigned long old_cr0;
1203 /*
1204 * CR0: We don't want to lose PE and PG.
1205 */
1206 __vmread_vcpu(v, CR0_READ_SHADOW, &old_cr0);
1207 paging_enabled = (old_cr0 & X86_CR0_PE) && (old_cr0 & X86_CR0_PG);
1209 /* TS cleared? Then initialise FPU now. */
1210 if ( !(value & X86_CR0_TS) )
1212 setup_fpu(v);
1213 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
1216 __vmwrite(GUEST_CR0, value | X86_CR0_PE | X86_CR0_PG | X86_CR0_NE);
1217 __vmwrite(CR0_READ_SHADOW, value);
1219 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1221 if ( (value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled )
1223 unsigned long cr4;
1225 /*
1226 * Trying to enable guest paging.
1227 * The guest CR3 must be pointing to the guest physical.
1228 */
1229 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1230 v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
1231 !get_page(mfn_to_page(mfn), v->domain) )
1233 printk("Invalid CR3 value = %lx", v->arch.hvm_vmx.cpu_cr3);
1234 domain_crash_synchronous(); /* need to take a clean path */
1237 #if defined(__x86_64__)
1238 if ( test_bit(VMX_CPU_STATE_LME_ENABLED,
1239 &v->arch.hvm_vmx.cpu_state) &&
1240 !test_bit(VMX_CPU_STATE_PAE_ENABLED,
1241 &v->arch.hvm_vmx.cpu_state) )
1243 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enabled\n");
1244 vmx_inject_exception(v, TRAP_gp_fault, 0);
1247 if ( test_bit(VMX_CPU_STATE_LME_ENABLED,
1248 &v->arch.hvm_vmx.cpu_state) )
1250 /* Here the PAE is should be opened */
1251 HVM_DBG_LOG(DBG_LEVEL_1, "Enable long mode\n");
1252 set_bit(VMX_CPU_STATE_LMA_ENABLED,
1253 &v->arch.hvm_vmx.cpu_state);
1255 __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
1256 vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE;
1257 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
1259 if ( !shadow_set_guest_paging_levels(v->domain, 4) ) {
1260 printk("Unsupported guest paging levels\n");
1261 domain_crash_synchronous(); /* need to take a clean path */
1264 else
1265 #endif /* __x86_64__ */
1267 #if CONFIG_PAGING_LEVELS >= 3
1268 if ( !shadow_set_guest_paging_levels(v->domain, 2) ) {
1269 printk("Unsupported guest paging levels\n");
1270 domain_crash_synchronous(); /* need to take a clean path */
1272 #endif
1275 /* update CR4's PAE if needed */
1276 __vmread(GUEST_CR4, &cr4);
1277 if ( (!(cr4 & X86_CR4_PAE)) &&
1278 test_bit(VMX_CPU_STATE_PAE_ENABLED,
1279 &v->arch.hvm_vmx.cpu_state) )
1281 HVM_DBG_LOG(DBG_LEVEL_1, "enable PAE in cr4\n");
1282 __vmwrite(GUEST_CR4, cr4 | X86_CR4_PAE);
1285 /*
1286 * Now arch.guest_table points to machine physical.
1287 */
1288 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1289 update_pagetables(v);
1291 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1292 (unsigned long) (mfn << PAGE_SHIFT));
1294 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1295 /*
1296 * arch->shadow_table should hold the next CR3 for shadow
1297 */
1298 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1299 v->arch.hvm_vmx.cpu_cr3, mfn);
1302 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1303 if ( v->arch.hvm_vmx.cpu_cr3 ) {
1304 put_page(mfn_to_page(get_mfn_from_gpfn(
1305 v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)));
1306 v->arch.guest_table = mk_pagetable(0);
1309 /*
1310 * VMX does not implement real-mode virtualization. We emulate
1311 * real-mode by performing a world switch to VMXAssist whenever
1312 * a partition disables the CR0.PE bit.
1313 */
1314 if ( (value & X86_CR0_PE) == 0 )
1316 if ( value & X86_CR0_PG ) {
1317 /* inject GP here */
1318 vmx_inject_exception(v, TRAP_gp_fault, 0);
1319 return 0;
1320 } else {
1321 /*
1322 * Disable paging here.
1323 * Same to PE == 1 && PG == 0
1324 */
1325 if ( test_bit(VMX_CPU_STATE_LMA_ENABLED,
1326 &v->arch.hvm_vmx.cpu_state) )
1328 clear_bit(VMX_CPU_STATE_LMA_ENABLED,
1329 &v->arch.hvm_vmx.cpu_state);
1330 __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
1331 vm_entry_value &= ~VM_ENTRY_CONTROLS_IA32E_MODE;
1332 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
1336 clear_all_shadow_status(v->domain);
1337 if ( vmx_assist(v, VMX_ASSIST_INVOKE) ) {
1338 set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.hvm_vmx.cpu_state);
1339 __vmread(GUEST_RIP, &eip);
1340 HVM_DBG_LOG(DBG_LEVEL_1,
1341 "Transfering control to vmxassist %%eip 0x%lx\n", eip);
1342 return 0; /* do not update eip! */
1344 } else if ( test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
1345 &v->arch.hvm_vmx.cpu_state) )
1347 __vmread(GUEST_RIP, &eip);
1348 HVM_DBG_LOG(DBG_LEVEL_1,
1349 "Enabling CR0.PE at %%eip 0x%lx\n", eip);
1350 if ( vmx_assist(v, VMX_ASSIST_RESTORE) )
1352 clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
1353 &v->arch.hvm_vmx.cpu_state);
1354 __vmread(GUEST_RIP, &eip);
1355 HVM_DBG_LOG(DBG_LEVEL_1,
1356 "Restoring to %%eip 0x%lx\n", eip);
1357 return 0; /* do not update eip! */
1361 return 1;
1364 #define CASE_GET_REG(REG, reg) \
1365 case REG_ ## REG: value = regs->reg; break
1367 #define CASE_EXTEND_SET_REG \
1368 CASE_EXTEND_REG(S)
1369 #define CASE_EXTEND_GET_REG \
1370 CASE_EXTEND_REG(G)
1372 #ifdef __i386__
1373 #define CASE_EXTEND_REG(T)
1374 #else
1375 #define CASE_EXTEND_REG(T) \
1376 CASE_ ## T ## ET_REG(R8, r8); \
1377 CASE_ ## T ## ET_REG(R9, r9); \
1378 CASE_ ## T ## ET_REG(R10, r10); \
1379 CASE_ ## T ## ET_REG(R11, r11); \
1380 CASE_ ## T ## ET_REG(R12, r12); \
1381 CASE_ ## T ## ET_REG(R13, r13); \
1382 CASE_ ## T ## ET_REG(R14, r14); \
1383 CASE_ ## T ## ET_REG(R15, r15);
1384 #endif
1387 /*
1388 * Write to control registers
1389 */
1390 static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
1392 unsigned long value;
1393 unsigned long old_cr;
1394 struct vcpu *v = current;
1396 switch (gp) {
1397 CASE_GET_REG(EAX, eax);
1398 CASE_GET_REG(ECX, ecx);
1399 CASE_GET_REG(EDX, edx);
1400 CASE_GET_REG(EBX, ebx);
1401 CASE_GET_REG(EBP, ebp);
1402 CASE_GET_REG(ESI, esi);
1403 CASE_GET_REG(EDI, edi);
1404 CASE_EXTEND_GET_REG
1405 case REG_ESP:
1406 __vmread(GUEST_RSP, &value);
1407 break;
1408 default:
1409 printk("invalid gp: %d\n", gp);
1410 __hvm_bug(regs);
1413 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1414 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1416 switch(cr) {
1417 case 0:
1419 return vmx_set_cr0(value);
1421 case 3:
1423 unsigned long old_base_mfn, mfn;
1425 /*
1426 * If paging is not enabled yet, simply copy the value to CR3.
1427 */
1428 if (!vmx_paging_enabled(v)) {
1429 v->arch.hvm_vmx.cpu_cr3 = value;
1430 break;
1433 /*
1434 * We make a new one if the shadow does not exist.
1435 */
1436 if (value == v->arch.hvm_vmx.cpu_cr3) {
1437 /*
1438 * This is simple TLB flush, implying the guest has
1439 * removed some translation or changed page attributes.
1440 * We simply invalidate the shadow.
1441 */
1442 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1443 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1444 __hvm_bug(regs);
1445 shadow_sync_all(v->domain);
1446 } else {
1447 /*
1448 * If different, make a shadow. Check if the PDBR is valid
1449 * first.
1450 */
1451 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1452 if ( ((value >> PAGE_SHIFT) > v->domain->max_pages ) ||
1453 !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT)) ||
1454 !get_page(mfn_to_page(mfn), v->domain) )
1456 printk("Invalid CR3 value=%lx", value);
1457 domain_crash_synchronous(); /* need to take a clean path */
1459 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1460 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1461 if (old_base_mfn)
1462 put_page(mfn_to_page(old_base_mfn));
1463 /*
1464 * arch.shadow_table should now hold the next CR3 for shadow
1465 */
1466 #if CONFIG_PAGING_LEVELS >= 3
1467 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 )
1468 shadow_sync_all(v->domain);
1469 #endif
1471 v->arch.hvm_vmx.cpu_cr3 = value;
1472 update_pagetables(v);
1473 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
1474 value);
1475 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1477 break;
1479 case 4: /* CR4 */
1481 __vmread(CR4_READ_SHADOW, &old_cr);
1483 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1485 set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
1487 if ( vmx_pgbit_test(v) )
1489 /* The guest is 32 bit. */
1490 #if CONFIG_PAGING_LEVELS >= 4
1491 unsigned long mfn, old_base_mfn;
1493 if( !shadow_set_guest_paging_levels(v->domain, 3) )
1495 printk("Unsupported guest paging levels\n");
1496 domain_crash_synchronous(); /* need to take a clean path */
1499 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1500 v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
1501 !get_page(mfn_to_page(mfn), v->domain) )
1503 printk("Invalid CR3 value = %lx", v->arch.hvm_vmx.cpu_cr3);
1504 domain_crash_synchronous(); /* need to take a clean path */
1507 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1508 if ( old_base_mfn )
1509 put_page(mfn_to_page(old_base_mfn));
1511 /*
1512 * Now arch.guest_table points to machine physical.
1513 */
1515 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1516 update_pagetables(v);
1518 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1519 (unsigned long) (mfn << PAGE_SHIFT));
1521 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1523 /*
1524 * arch->shadow_table should hold the next CR3 for shadow
1525 */
1527 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1528 v->arch.hvm_vmx.cpu_cr3, mfn);
1529 #endif
1531 else
1533 /* The guest is 64 bit. */
1534 #if CONFIG_PAGING_LEVELS >= 4
1535 if ( !shadow_set_guest_paging_levels(v->domain, 4) )
1537 printk("Unsupported guest paging levels\n");
1538 domain_crash_synchronous(); /* need to take a clean path */
1540 #endif
1543 else if ( value & X86_CR4_PAE )
1544 set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
1545 else
1547 if ( test_bit(VMX_CPU_STATE_LMA_ENABLED, &v->arch.hvm_vmx.cpu_state) )
1548 vmx_inject_exception(v, TRAP_gp_fault, 0);
1550 clear_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
1553 __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
1554 __vmwrite(CR4_READ_SHADOW, value);
1556 /*
1557 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1558 * all TLB entries except global entries.
1559 */
1560 if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
1561 shadow_sync_all(v->domain);
1563 break;
1565 default:
1566 printk("invalid cr: %d\n", gp);
1567 __hvm_bug(regs);
1570 return 1;
1573 #define CASE_SET_REG(REG, reg) \
1574 case REG_ ## REG: \
1575 regs->reg = value; \
1576 break
1578 /*
1579 * Read from control registers. CR0 and CR4 are read from the shadow.
1580 */
1581 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1583 unsigned long value;
1584 struct vcpu *v = current;
1586 if (cr != 3)
1587 __hvm_bug(regs);
1589 value = (unsigned long) v->arch.hvm_vmx.cpu_cr3;
1591 switch (gp) {
1592 CASE_SET_REG(EAX, eax);
1593 CASE_SET_REG(ECX, ecx);
1594 CASE_SET_REG(EDX, edx);
1595 CASE_SET_REG(EBX, ebx);
1596 CASE_SET_REG(EBP, ebp);
1597 CASE_SET_REG(ESI, esi);
1598 CASE_SET_REG(EDI, edi);
1599 CASE_EXTEND_SET_REG
1600 case REG_ESP:
1601 __vmwrite(GUEST_RSP, value);
1602 regs->esp = value;
1603 break;
1604 default:
1605 printk("invalid gp: %d\n", gp);
1606 __hvm_bug(regs);
1609 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1612 static int vmx_cr_access(unsigned long exit_qualification, struct cpu_user_regs *regs)
1614 unsigned int gp, cr;
1615 unsigned long value;
1616 struct vcpu *v = current;
1618 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
1619 case TYPE_MOV_TO_CR:
1620 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1621 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1622 TRACE_VMEXIT(1,TYPE_MOV_TO_CR);
1623 TRACE_VMEXIT(2,cr);
1624 TRACE_VMEXIT(3,gp);
1625 return mov_to_cr(gp, cr, regs);
1626 case TYPE_MOV_FROM_CR:
1627 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1628 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1629 TRACE_VMEXIT(1,TYPE_MOV_FROM_CR);
1630 TRACE_VMEXIT(2,cr);
1631 TRACE_VMEXIT(3,gp);
1632 mov_from_cr(cr, gp, regs);
1633 break;
1634 case TYPE_CLTS:
1635 TRACE_VMEXIT(1,TYPE_CLTS);
1637 /* We initialise the FPU now, to avoid needing another vmexit. */
1638 setup_fpu(v);
1639 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
1641 __vmread_vcpu(v, GUEST_CR0, &value);
1642 value &= ~X86_CR0_TS; /* clear TS */
1643 __vmwrite(GUEST_CR0, value);
1645 __vmread_vcpu(v, CR0_READ_SHADOW, &value);
1646 value &= ~X86_CR0_TS; /* clear TS */
1647 __vmwrite(CR0_READ_SHADOW, value);
1648 break;
1649 case TYPE_LMSW:
1650 TRACE_VMEXIT(1,TYPE_LMSW);
1651 __vmread_vcpu(v, CR0_READ_SHADOW, &value);
1652 value = (value & ~0xF) |
1653 (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
1654 return vmx_set_cr0(value);
1655 break;
1656 default:
1657 __hvm_bug(regs);
1658 break;
1660 return 1;
1663 static inline void vmx_do_msr_read(struct cpu_user_regs *regs)
1665 u64 msr_content = 0;
1666 struct vcpu *v = current;
1668 HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
1669 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1670 (unsigned long)regs->edx);
1671 switch (regs->ecx) {
1672 case MSR_IA32_TIME_STAMP_COUNTER:
1674 struct hvm_virpit *vpit;
1676 rdtscll(msr_content);
1677 vpit = &(v->domain->arch.hvm_domain.vpit);
1678 msr_content += vpit->shift;
1679 break;
1681 case MSR_IA32_SYSENTER_CS:
1682 __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
1683 break;
1684 case MSR_IA32_SYSENTER_ESP:
1685 __vmread(GUEST_SYSENTER_ESP, &msr_content);
1686 break;
1687 case MSR_IA32_SYSENTER_EIP:
1688 __vmread(GUEST_SYSENTER_EIP, &msr_content);
1689 break;
1690 case MSR_IA32_APICBASE:
1691 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
1692 break;
1693 default:
1694 if(long_mode_do_msr_read(regs))
1695 return;
1696 rdmsr_safe(regs->ecx, regs->eax, regs->edx);
1697 break;
1700 regs->eax = msr_content & 0xFFFFFFFF;
1701 regs->edx = msr_content >> 32;
1703 HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
1704 "ecx=%lx, eax=%lx, edx=%lx",
1705 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1706 (unsigned long)regs->edx);
1709 static inline void vmx_do_msr_write(struct cpu_user_regs *regs)
1711 u64 msr_content;
1712 struct vcpu *v = current;
1714 HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write: ecx=%lx, eax=%lx, edx=%lx",
1715 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1716 (unsigned long)regs->edx);
1718 msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
1720 switch (regs->ecx) {
1721 case MSR_IA32_TIME_STAMP_COUNTER:
1723 struct hvm_virpit *vpit;
1724 u64 host_tsc, drift;
1726 rdtscll(host_tsc);
1727 vpit = &(v->domain->arch.hvm_domain.vpit);
1728 drift = v->arch.hvm_vmx.tsc_offset - vpit->shift;
1729 vpit->shift = msr_content - host_tsc;
1730 v->arch.hvm_vmx.tsc_offset = vpit->shift + drift;
1731 __vmwrite(TSC_OFFSET, vpit->shift);
1733 #if defined (__i386__)
1734 __vmwrite(TSC_OFFSET_HIGH, ((vpit->shift)>>32));
1735 #endif
1736 break;
1738 case MSR_IA32_SYSENTER_CS:
1739 __vmwrite(GUEST_SYSENTER_CS, msr_content);
1740 break;
1741 case MSR_IA32_SYSENTER_ESP:
1742 __vmwrite(GUEST_SYSENTER_ESP, msr_content);
1743 break;
1744 case MSR_IA32_SYSENTER_EIP:
1745 __vmwrite(GUEST_SYSENTER_EIP, msr_content);
1746 break;
1747 case MSR_IA32_APICBASE:
1748 vlapic_msr_set(VLAPIC(v), msr_content);
1749 break;
1750 default:
1751 long_mode_do_msr_write(regs);
1752 break;
1755 HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write returns: "
1756 "ecx=%lx, eax=%lx, edx=%lx",
1757 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1758 (unsigned long)regs->edx);
1761 /*
1762 * Need to use this exit to reschedule
1763 */
1764 void vmx_vmexit_do_hlt(void)
1766 struct vcpu *v=current;
1767 struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
1768 s_time_t next_pit=-1,next_wakeup;
1770 if ( !v->vcpu_id )
1771 next_pit = get_pit_scheduled(v,vpit);
1772 next_wakeup = get_apictime_scheduled(v);
1773 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
1774 next_wakeup = next_pit;
1775 if ( next_wakeup != - 1 )
1776 set_timer(&current->arch.hvm_vmx.hlt_timer, next_wakeup);
1777 hvm_safe_block();
1780 static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs)
1782 unsigned int vector;
1783 int error;
1785 asmlinkage void do_IRQ(struct cpu_user_regs *);
1786 fastcall void smp_apic_timer_interrupt(struct cpu_user_regs *);
1787 fastcall void smp_event_check_interrupt(void);
1788 fastcall void smp_invalidate_interrupt(void);
1789 fastcall void smp_call_function_interrupt(void);
1790 fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
1791 fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
1792 #ifdef CONFIG_X86_MCE_P4THERMAL
1793 fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
1794 #endif
1796 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
1797 && !(vector & INTR_INFO_VALID_MASK))
1798 __hvm_bug(regs);
1800 vector &= 0xff;
1801 local_irq_disable();
1803 switch(vector) {
1804 case LOCAL_TIMER_VECTOR:
1805 smp_apic_timer_interrupt(regs);
1806 break;
1807 case EVENT_CHECK_VECTOR:
1808 smp_event_check_interrupt();
1809 break;
1810 case INVALIDATE_TLB_VECTOR:
1811 smp_invalidate_interrupt();
1812 break;
1813 case CALL_FUNCTION_VECTOR:
1814 smp_call_function_interrupt();
1815 break;
1816 case SPURIOUS_APIC_VECTOR:
1817 smp_spurious_interrupt(regs);
1818 break;
1819 case ERROR_APIC_VECTOR:
1820 smp_error_interrupt(regs);
1821 break;
1822 #ifdef CONFIG_X86_MCE_P4THERMAL
1823 case THERMAL_APIC_VECTOR:
1824 smp_thermal_interrupt(regs);
1825 break;
1826 #endif
1827 default:
1828 regs->entry_vector = vector;
1829 do_IRQ(regs);
1830 break;
1834 #if defined (__x86_64__)
1835 void store_cpu_user_regs(struct cpu_user_regs *regs)
1837 __vmread(GUEST_SS_SELECTOR, &regs->ss);
1838 __vmread(GUEST_RSP, &regs->rsp);
1839 __vmread(GUEST_RFLAGS, &regs->rflags);
1840 __vmread(GUEST_CS_SELECTOR, &regs->cs);
1841 __vmread(GUEST_DS_SELECTOR, &regs->ds);
1842 __vmread(GUEST_ES_SELECTOR, &regs->es);
1843 __vmread(GUEST_RIP, &regs->rip);
1845 #elif defined (__i386__)
1846 void store_cpu_user_regs(struct cpu_user_regs *regs)
1848 __vmread(GUEST_SS_SELECTOR, &regs->ss);
1849 __vmread(GUEST_RSP, &regs->esp);
1850 __vmread(GUEST_RFLAGS, &regs->eflags);
1851 __vmread(GUEST_CS_SELECTOR, &regs->cs);
1852 __vmread(GUEST_DS_SELECTOR, &regs->ds);
1853 __vmread(GUEST_ES_SELECTOR, &regs->es);
1854 __vmread(GUEST_RIP, &regs->eip);
1856 #endif
1858 #ifdef XEN_DEBUGGER
1859 void save_cpu_user_regs(struct cpu_user_regs *regs)
1861 __vmread(GUEST_SS_SELECTOR, &regs->xss);
1862 __vmread(GUEST_RSP, &regs->esp);
1863 __vmread(GUEST_RFLAGS, &regs->eflags);
1864 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
1865 __vmread(GUEST_RIP, &regs->eip);
1867 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
1868 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
1869 __vmread(GUEST_ES_SELECTOR, &regs->xes);
1870 __vmread(GUEST_DS_SELECTOR, &regs->xds);
1873 void restore_cpu_user_regs(struct cpu_user_regs *regs)
1875 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
1876 __vmwrite(GUEST_RSP, regs->esp);
1877 __vmwrite(GUEST_RFLAGS, regs->eflags);
1878 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
1879 __vmwrite(GUEST_RIP, regs->eip);
1881 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
1882 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
1883 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
1884 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
1886 #endif
1888 asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs)
1890 unsigned int exit_reason, idtv_info_field;
1891 unsigned long exit_qualification, eip, inst_len = 0;
1892 struct vcpu *v = current;
1893 int error;
1895 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
1896 __hvm_bug(&regs);
1898 perfc_incra(vmexits, exit_reason);
1900 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
1901 if (idtv_info_field & INTR_INFO_VALID_MASK) {
1902 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
1904 __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
1905 if (inst_len >= 1 && inst_len <= 15)
1906 __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
1908 if (idtv_info_field & 0x800) { /* valid error code */
1909 unsigned long error_code;
1910 __vmread(IDT_VECTORING_ERROR_CODE, &error_code);
1911 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
1914 HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
1917 /* don't bother H/W interrutps */
1918 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
1919 exit_reason != EXIT_REASON_VMCALL &&
1920 exit_reason != EXIT_REASON_IO_INSTRUCTION)
1921 HVM_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
1923 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
1924 printk("Failed vm entry\n");
1925 domain_crash_synchronous();
1926 return;
1930 __vmread(GUEST_RIP, &eip);
1931 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
1932 TRACE_VMEXIT(0,exit_reason);
1935 switch (exit_reason) {
1936 case EXIT_REASON_EXCEPTION_NMI:
1938 /*
1939 * We don't set the software-interrupt exiting (INT n).
1940 * (1) We can get an exception (e.g. #PG) in the guest, or
1941 * (2) NMI
1942 */
1943 int error;
1944 unsigned int vector;
1945 unsigned long va;
1947 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
1948 || !(vector & INTR_INFO_VALID_MASK))
1949 __hvm_bug(&regs);
1950 vector &= 0xff;
1952 TRACE_VMEXIT(1,vector);
1953 perfc_incra(cause_vector, vector);
1955 TRACE_3D(TRC_VMX_VECTOR, v->domain->domain_id, eip, vector);
1956 switch (vector) {
1957 #ifdef XEN_DEBUGGER
1958 case TRAP_debug:
1960 save_cpu_user_regs(&regs);
1961 pdb_handle_exception(1, &regs, 1);
1962 restore_cpu_user_regs(&regs);
1963 break;
1965 case TRAP_int3:
1967 save_cpu_user_regs(&regs);
1968 pdb_handle_exception(3, &regs, 1);
1969 restore_cpu_user_regs(&regs);
1970 break;
1972 #else
1973 case TRAP_debug:
1975 void store_cpu_user_regs(struct cpu_user_regs *regs);
1977 store_cpu_user_regs(&regs);
1978 __vm_clear_bit(GUEST_PENDING_DBG_EXCEPTIONS, PENDING_DEBUG_EXC_BS);
1980 domain_pause_for_debugger();
1982 break;
1984 #endif
1985 case TRAP_no_device:
1987 vmx_do_no_device_fault();
1988 break;
1990 case TRAP_page_fault:
1992 __vmread(EXIT_QUALIFICATION, &va);
1993 __vmread(VM_EXIT_INTR_ERROR_CODE, &regs.error_code);
1995 TRACE_VMEXIT(3,regs.error_code);
1996 TRACE_VMEXIT(4,va);
1998 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1999 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2000 (unsigned long)regs.eax, (unsigned long)regs.ebx,
2001 (unsigned long)regs.ecx, (unsigned long)regs.edx,
2002 (unsigned long)regs.esi, (unsigned long)regs.edi);
2003 v->arch.hvm_vcpu.mmio_op.inst_decoder_regs = &regs;
2005 if (!(error = vmx_do_page_fault(va, &regs))) {
2006 /*
2007 * Inject #PG using Interruption-Information Fields
2008 */
2009 vmx_inject_exception(v, TRAP_page_fault, regs.error_code);
2010 v->arch.hvm_vmx.cpu_cr2 = va;
2011 TRACE_3D(TRC_VMX_INT, v->domain->domain_id, TRAP_page_fault, va);
2013 break;
2015 case TRAP_nmi:
2016 do_nmi(&regs);
2017 break;
2018 default:
2019 vmx_reflect_exception(v);
2020 break;
2022 break;
2024 case EXIT_REASON_EXTERNAL_INTERRUPT:
2025 vmx_vmexit_do_extint(&regs);
2026 break;
2027 case EXIT_REASON_PENDING_INTERRUPT:
2028 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
2029 MONITOR_CPU_BASED_EXEC_CONTROLS);
2030 break;
2031 case EXIT_REASON_TASK_SWITCH:
2032 __hvm_bug(&regs);
2033 break;
2034 case EXIT_REASON_CPUID:
2035 __get_instruction_length(inst_len);
2036 vmx_vmexit_do_cpuid(regs.eax, &regs);
2037 __update_guest_eip(inst_len);
2038 break;
2039 case EXIT_REASON_HLT:
2040 __get_instruction_length(inst_len);
2041 __update_guest_eip(inst_len);
2042 vmx_vmexit_do_hlt();
2043 break;
2044 case EXIT_REASON_INVLPG:
2046 unsigned long va;
2048 __vmread(EXIT_QUALIFICATION, &va);
2049 vmx_vmexit_do_invlpg(va);
2050 __get_instruction_length(inst_len);
2051 __update_guest_eip(inst_len);
2052 break;
2054 case EXIT_REASON_VMCALL:
2055 __get_instruction_length(inst_len);
2056 __vmread(GUEST_RIP, &eip);
2057 __vmread(EXIT_QUALIFICATION, &exit_qualification);
2059 hvm_print_line(v, regs.eax); /* provides the current domain */
2060 __update_guest_eip(inst_len);
2061 break;
2062 case EXIT_REASON_CR_ACCESS:
2064 __vmread(GUEST_RIP, &eip);
2065 __get_instruction_length(inst_len);
2066 __vmread(EXIT_QUALIFICATION, &exit_qualification);
2068 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx",
2069 eip, inst_len, exit_qualification);
2070 if (vmx_cr_access(exit_qualification, &regs))
2071 __update_guest_eip(inst_len);
2072 TRACE_VMEXIT(3,regs.error_code);
2073 TRACE_VMEXIT(4,exit_qualification);
2074 break;
2076 case EXIT_REASON_DR_ACCESS:
2077 __vmread(EXIT_QUALIFICATION, &exit_qualification);
2078 vmx_dr_access(exit_qualification, &regs);
2079 __get_instruction_length(inst_len);
2080 __update_guest_eip(inst_len);
2081 break;
2082 case EXIT_REASON_IO_INSTRUCTION:
2083 __vmread(EXIT_QUALIFICATION, &exit_qualification);
2084 __get_instruction_length(inst_len);
2085 vmx_io_instruction(&regs, exit_qualification, inst_len);
2086 TRACE_VMEXIT(4,exit_qualification);
2087 break;
2088 case EXIT_REASON_MSR_READ:
2089 __get_instruction_length(inst_len);
2090 vmx_do_msr_read(&regs);
2091 __update_guest_eip(inst_len);
2092 break;
2093 case EXIT_REASON_MSR_WRITE:
2094 __vmread(GUEST_RIP, &eip);
2095 vmx_do_msr_write(&regs);
2096 __get_instruction_length(inst_len);
2097 __update_guest_eip(inst_len);
2098 break;
2099 case EXIT_REASON_MWAIT_INSTRUCTION:
2100 __hvm_bug(&regs);
2101 break;
2102 default:
2103 __hvm_bug(&regs); /* should not happen */
2107 asmlinkage void vmx_load_cr2(void)
2109 struct vcpu *v = current;
2111 local_irq_disable();
2112 #ifdef __i386__
2113 asm volatile("movl %0,%%cr2": :"r" (v->arch.hvm_vmx.cpu_cr2));
2114 #else
2115 asm volatile("movq %0,%%cr2": :"r" (v->arch.hvm_vmx.cpu_cr2));
2116 #endif
2119 asmlinkage void vmx_trace_vmentry (void)
2121 TRACE_5D(TRC_VMENTRY,
2122 trace_values[smp_processor_id()][0],
2123 trace_values[smp_processor_id()][1],
2124 trace_values[smp_processor_id()][2],
2125 trace_values[smp_processor_id()][3],
2126 trace_values[smp_processor_id()][4]);
2127 TRACE_VMEXIT(0,9);
2128 TRACE_VMEXIT(1,9);
2129 TRACE_VMEXIT(2,9);
2130 TRACE_VMEXIT(3,9);
2131 TRACE_VMEXIT(4,9);
2132 return;
2135 asmlinkage void vmx_trace_vmexit (void)
2137 TRACE_3D(TRC_VMEXIT,0,0,0);
2138 return;
2141 /*
2142 * Local variables:
2143 * mode: C
2144 * c-set-style: "BSD"
2145 * c-basic-offset: 4
2146 * tab-width: 4
2147 * indent-tabs-mode: nil
2148 * End:
2149 */