ia64/xen-unstable

view xen/arch/x86/hvm/vmx/vmx.c @ 15447:5eec9a8825d4

Fix VMX guest can't boot after MCE enabled.
Signed-off-by: Xin Li <xin.b.li@intel.com>
author Keir Fraser <keir@xensource.com>
date Wed Jun 27 20:08:21 2007 +0100 (2007-06-27)
parents b14bbd41e9dc
children 87d34c8c2fe1
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/domain_page.h>
28 #include <xen/hypercall.h>
29 #include <xen/perfc.h>
30 #include <asm/current.h>
31 #include <asm/io.h>
32 #include <asm/regs.h>
33 #include <asm/cpufeature.h>
34 #include <asm/processor.h>
35 #include <asm/types.h>
36 #include <asm/msr.h>
37 #include <asm/spinlock.h>
38 #include <asm/paging.h>
39 #include <asm/p2m.h>
40 #include <asm/hvm/hvm.h>
41 #include <asm/hvm/support.h>
42 #include <asm/hvm/vmx/vmx.h>
43 #include <asm/hvm/vmx/vmcs.h>
44 #include <asm/hvm/vmx/cpu.h>
45 #include <public/sched.h>
46 #include <public/hvm/ioreq.h>
47 #include <asm/hvm/vpic.h>
48 #include <asm/hvm/vlapic.h>
49 #include <asm/x86_emulate.h>
50 #include <asm/hvm/vpt.h>
51 #include <public/hvm/save.h>
52 #include <asm/hvm/trace.h>
54 char *vmx_msr_bitmap;
56 static void vmx_ctxt_switch_from(struct vcpu *v);
57 static void vmx_ctxt_switch_to(struct vcpu *v);
59 static int vmx_alloc_vlapic_mapping(struct domain *d);
60 static void vmx_free_vlapic_mapping(struct domain *d);
61 static void vmx_install_vlapic_mapping(struct vcpu *v);
63 static int vmx_domain_initialise(struct domain *d)
64 {
65 return vmx_alloc_vlapic_mapping(d);
66 }
68 static void vmx_domain_destroy(struct domain *d)
69 {
70 vmx_free_vlapic_mapping(d);
71 }
73 static int vmx_vcpu_initialise(struct vcpu *v)
74 {
75 int rc;
77 spin_lock_init(&v->arch.hvm_vmx.vmcs_lock);
79 v->arch.schedule_tail = vmx_do_resume;
80 v->arch.ctxt_switch_from = vmx_ctxt_switch_from;
81 v->arch.ctxt_switch_to = vmx_ctxt_switch_to;
83 if ( (rc = vmx_create_vmcs(v)) != 0 )
84 {
85 dprintk(XENLOG_WARNING,
86 "Failed to create VMCS for vcpu %d: err=%d.\n",
87 v->vcpu_id, rc);
88 return rc;
89 }
91 vmx_install_vlapic_mapping(v);
93 return 0;
94 }
96 static void vmx_vcpu_destroy(struct vcpu *v)
97 {
98 vmx_destroy_vmcs(v);
99 }
101 #ifdef __x86_64__
103 static DEFINE_PER_CPU(struct vmx_msr_state, host_msr_state);
105 static u32 msr_index[VMX_MSR_COUNT] =
106 {
107 MSR_LSTAR, MSR_STAR, MSR_SYSCALL_MASK
108 };
110 static void vmx_save_host_msrs(void)
111 {
112 struct vmx_msr_state *host_msr_state = &this_cpu(host_msr_state);
113 int i;
115 for ( i = 0; i < VMX_MSR_COUNT; i++ )
116 rdmsrl(msr_index[i], host_msr_state->msrs[i]);
117 }
119 #define WRITE_MSR(address) \
120 guest_msr_state->msrs[VMX_INDEX_MSR_ ## address] = msr_content; \
121 set_bit(VMX_INDEX_MSR_ ## address, &guest_msr_state->flags); \
122 wrmsrl(MSR_ ## address, msr_content); \
123 set_bit(VMX_INDEX_MSR_ ## address, &host_msr_state->flags); \
124 break
126 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
127 {
128 u64 msr_content = 0;
129 u32 ecx = regs->ecx;
130 struct vcpu *v = current;
131 struct vmx_msr_state *guest_msr_state = &v->arch.hvm_vmx.msr_state;
133 switch ( ecx ) {
134 case MSR_EFER:
135 msr_content = v->arch.hvm_vmx.efer;
136 break;
138 case MSR_FS_BASE:
139 msr_content = __vmread(GUEST_FS_BASE);
140 goto check_long_mode;
142 case MSR_GS_BASE:
143 msr_content = __vmread(GUEST_GS_BASE);
144 goto check_long_mode;
146 case MSR_SHADOW_GS_BASE:
147 msr_content = v->arch.hvm_vmx.shadow_gs;
148 check_long_mode:
149 if ( !(vmx_long_mode_enabled(v)) )
150 {
151 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
152 return 0;
153 }
154 break;
156 case MSR_STAR:
157 msr_content = guest_msr_state->msrs[VMX_INDEX_MSR_STAR];
158 break;
160 case MSR_LSTAR:
161 msr_content = guest_msr_state->msrs[VMX_INDEX_MSR_LSTAR];
162 break;
164 case MSR_CSTAR:
165 msr_content = v->arch.hvm_vmx.cstar;
166 break;
168 case MSR_SYSCALL_MASK:
169 msr_content = guest_msr_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
170 break;
172 default:
173 return 0;
174 }
176 HVM_DBG_LOG(DBG_LEVEL_0, "msr 0x%x content 0x%"PRIx64, ecx, msr_content);
178 regs->eax = (u32)(msr_content >> 0);
179 regs->edx = (u32)(msr_content >> 32);
181 return 1;
182 }
184 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
185 {
186 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
187 u32 ecx = regs->ecx;
188 struct vcpu *v = current;
189 struct vmx_msr_state *guest_msr_state = &v->arch.hvm_vmx.msr_state;
190 struct vmx_msr_state *host_msr_state = &this_cpu(host_msr_state);
192 HVM_DBG_LOG(DBG_LEVEL_0, "msr 0x%x content 0x%"PRIx64, ecx, msr_content);
194 switch ( ecx )
195 {
196 case MSR_EFER:
197 /* offending reserved bit will cause #GP */
198 if ( (msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE)) ||
199 (!cpu_has_nx && (msr_content & EFER_NX)) ||
200 (!cpu_has_syscall && (msr_content & EFER_SCE)) )
201 {
202 gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
203 "EFER: %"PRIx64"\n", msr_content);
204 goto gp_fault;
205 }
207 if ( (msr_content & EFER_LME)
208 && !(v->arch.hvm_vmx.efer & EFER_LME) )
209 {
210 if ( unlikely(vmx_paging_enabled(v)) )
211 {
212 gdprintk(XENLOG_WARNING,
213 "Trying to set EFER.LME with paging enabled\n");
214 goto gp_fault;
215 }
216 }
217 else if ( !(msr_content & EFER_LME)
218 && (v->arch.hvm_vmx.efer & EFER_LME) )
219 {
220 if ( unlikely(vmx_paging_enabled(v)) )
221 {
222 gdprintk(XENLOG_WARNING,
223 "Trying to clear EFER.LME with paging enabled\n");
224 goto gp_fault;
225 }
226 }
228 if ( (msr_content ^ v->arch.hvm_vmx.efer) & (EFER_NX|EFER_SCE) )
229 write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |
230 (msr_content & (EFER_NX|EFER_SCE)));
232 v->arch.hvm_vmx.efer = msr_content;
233 break;
235 case MSR_FS_BASE:
236 case MSR_GS_BASE:
237 case MSR_SHADOW_GS_BASE:
238 if ( !vmx_long_mode_enabled(v) )
239 goto gp_fault;
241 if ( !is_canonical_address(msr_content) )
242 goto uncanonical_address;
244 if ( ecx == MSR_FS_BASE )
245 __vmwrite(GUEST_FS_BASE, msr_content);
246 else if ( ecx == MSR_GS_BASE )
247 __vmwrite(GUEST_GS_BASE, msr_content);
248 else
249 {
250 v->arch.hvm_vmx.shadow_gs = msr_content;
251 wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
252 }
254 break;
256 case MSR_STAR:
257 WRITE_MSR(STAR);
259 case MSR_LSTAR:
260 if ( !is_canonical_address(msr_content) )
261 goto uncanonical_address;
262 WRITE_MSR(LSTAR);
264 case MSR_CSTAR:
265 if ( !is_canonical_address(msr_content) )
266 goto uncanonical_address;
267 v->arch.hvm_vmx.cstar = msr_content;
268 break;
270 case MSR_SYSCALL_MASK:
271 WRITE_MSR(SYSCALL_MASK);
273 default:
274 return 0;
275 }
277 return 1;
279 uncanonical_address:
280 HVM_DBG_LOG(DBG_LEVEL_0, "Not cano address of msr write %x", ecx);
281 gp_fault:
282 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
283 return 0;
284 }
286 /*
287 * To avoid MSR save/restore at every VM exit/entry time, we restore
288 * the x86_64 specific MSRs at domain switch time. Since these MSRs
289 * are not modified once set for para domains, we don't save them,
290 * but simply reset them to values set in percpu_traps_init().
291 */
292 static void vmx_restore_host_msrs(void)
293 {
294 struct vmx_msr_state *host_msr_state = &this_cpu(host_msr_state);
295 int i;
297 while ( host_msr_state->flags )
298 {
299 i = find_first_set_bit(host_msr_state->flags);
300 wrmsrl(msr_index[i], host_msr_state->msrs[i]);
301 clear_bit(i, &host_msr_state->flags);
302 }
303 if ( cpu_has_nx && !(read_efer() & EFER_NX) )
304 write_efer(read_efer() | EFER_NX);
305 }
307 static void vmx_save_guest_msrs(struct vcpu *v)
308 {
309 /* MSR_SHADOW_GS_BASE may have been changed by swapgs instruction. */
310 rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs);
311 }
313 static void vmx_restore_guest_msrs(struct vcpu *v)
314 {
315 struct vmx_msr_state *guest_msr_state, *host_msr_state;
316 unsigned long guest_flags;
317 int i;
319 guest_msr_state = &v->arch.hvm_vmx.msr_state;
320 host_msr_state = &this_cpu(host_msr_state);
322 wrmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs);
324 guest_flags = guest_msr_state->flags;
326 while ( guest_flags ) {
327 i = find_first_set_bit(guest_flags);
329 HVM_DBG_LOG(DBG_LEVEL_2,
330 "restore guest's index %d msr %x with value %lx",
331 i, msr_index[i], guest_msr_state->msrs[i]);
332 set_bit(i, &host_msr_state->flags);
333 wrmsrl(msr_index[i], guest_msr_state->msrs[i]);
334 clear_bit(i, &guest_flags);
335 }
337 if ( (v->arch.hvm_vmx.efer ^ read_efer()) & (EFER_NX|EFER_SCE) )
338 {
339 HVM_DBG_LOG(DBG_LEVEL_2,
340 "restore guest's EFER with value %lx",
341 v->arch.hvm_vmx.efer);
342 write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |
343 (v->arch.hvm_vmx.efer & (EFER_NX|EFER_SCE)));
344 }
345 }
347 #else /* __i386__ */
349 #define vmx_save_host_msrs() ((void)0)
351 static void vmx_restore_host_msrs(void)
352 {
353 if ( cpu_has_nx && !(read_efer() & EFER_NX) )
354 write_efer(read_efer() | EFER_NX);
355 }
357 #define vmx_save_guest_msrs(v) ((void)0)
359 static void vmx_restore_guest_msrs(struct vcpu *v)
360 {
361 if ( (v->arch.hvm_vmx.efer ^ read_efer()) & EFER_NX )
362 {
363 HVM_DBG_LOG(DBG_LEVEL_2,
364 "restore guest's EFER with value %lx",
365 v->arch.hvm_vmx.efer);
366 write_efer((read_efer() & ~EFER_NX) |
367 (v->arch.hvm_vmx.efer & EFER_NX));
368 }
369 }
371 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
372 {
373 u64 msr_content = 0;
374 struct vcpu *v = current;
376 switch ( regs->ecx ) {
377 case MSR_EFER:
378 msr_content = v->arch.hvm_vmx.efer;
379 break;
381 default:
382 return 0;
383 }
385 regs->eax = msr_content >> 0;
386 regs->edx = msr_content >> 32;
388 return 1;
389 }
391 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
392 {
393 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
394 struct vcpu *v = current;
396 switch ( regs->ecx )
397 {
398 case MSR_EFER:
399 /* offending reserved bit will cause #GP */
400 if ( (msr_content & ~EFER_NX) ||
401 (!cpu_has_nx && (msr_content & EFER_NX)) )
402 {
403 gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
404 "EFER: %"PRIx64"\n", msr_content);
405 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
406 return 0;
407 }
409 if ( (msr_content ^ v->arch.hvm_vmx.efer) & EFER_NX )
410 write_efer((read_efer() & ~EFER_NX) | (msr_content & EFER_NX));
412 v->arch.hvm_vmx.efer = msr_content;
413 break;
415 default:
416 return 0;
417 }
419 return 1;
420 }
422 #endif /* __i386__ */
424 #define loaddebug(_v,_reg) \
425 __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
426 #define savedebug(_v,_reg) \
427 __asm__ __volatile__ ("mov %%db" #_reg ",%0" : : "r" ((_v)->debugreg[_reg]))
429 static inline void vmx_save_dr(struct vcpu *v)
430 {
431 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
432 return;
434 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
435 v->arch.hvm_vcpu.flag_dr_dirty = 0;
436 v->arch.hvm_vcpu.u.vmx.exec_control |= CPU_BASED_MOV_DR_EXITING;
437 __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vcpu.u.vmx.exec_control);
439 savedebug(&v->arch.guest_context, 0);
440 savedebug(&v->arch.guest_context, 1);
441 savedebug(&v->arch.guest_context, 2);
442 savedebug(&v->arch.guest_context, 3);
443 savedebug(&v->arch.guest_context, 6);
444 v->arch.guest_context.debugreg[7] = __vmread(GUEST_DR7);
445 }
447 static inline void __restore_debug_registers(struct vcpu *v)
448 {
449 loaddebug(&v->arch.guest_context, 0);
450 loaddebug(&v->arch.guest_context, 1);
451 loaddebug(&v->arch.guest_context, 2);
452 loaddebug(&v->arch.guest_context, 3);
453 /* No 4 and 5 */
454 loaddebug(&v->arch.guest_context, 6);
455 /* DR7 is loaded from the VMCS. */
456 }
458 int vmx_vmcs_save(struct vcpu *v, struct hvm_hw_cpu *c)
459 {
460 uint32_t ev;
462 c->rip = __vmread(GUEST_RIP);
463 c->rsp = __vmread(GUEST_RSP);
464 c->rflags = __vmread(GUEST_RFLAGS);
466 c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
467 c->cr2 = v->arch.hvm_vmx.cpu_cr2;
468 c->cr3 = v->arch.hvm_vmx.cpu_cr3;
469 c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
471 #ifdef HVM_DEBUG_SUSPEND
472 printk("vmx_vmcs_save: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
473 c->cr3,
474 c->cr0,
475 c->cr4);
476 #endif
478 c->idtr_limit = __vmread(GUEST_IDTR_LIMIT);
479 c->idtr_base = __vmread(GUEST_IDTR_BASE);
481 c->gdtr_limit = __vmread(GUEST_GDTR_LIMIT);
482 c->gdtr_base = __vmread(GUEST_GDTR_BASE);
484 c->cs_sel = __vmread(GUEST_CS_SELECTOR);
485 c->cs_limit = __vmread(GUEST_CS_LIMIT);
486 c->cs_base = __vmread(GUEST_CS_BASE);
487 c->cs_arbytes = __vmread(GUEST_CS_AR_BYTES);
489 c->ds_sel = __vmread(GUEST_DS_SELECTOR);
490 c->ds_limit = __vmread(GUEST_DS_LIMIT);
491 c->ds_base = __vmread(GUEST_DS_BASE);
492 c->ds_arbytes = __vmread(GUEST_DS_AR_BYTES);
494 c->es_sel = __vmread(GUEST_ES_SELECTOR);
495 c->es_limit = __vmread(GUEST_ES_LIMIT);
496 c->es_base = __vmread(GUEST_ES_BASE);
497 c->es_arbytes = __vmread(GUEST_ES_AR_BYTES);
499 c->ss_sel = __vmread(GUEST_SS_SELECTOR);
500 c->ss_limit = __vmread(GUEST_SS_LIMIT);
501 c->ss_base = __vmread(GUEST_SS_BASE);
502 c->ss_arbytes = __vmread(GUEST_SS_AR_BYTES);
504 c->fs_sel = __vmread(GUEST_FS_SELECTOR);
505 c->fs_limit = __vmread(GUEST_FS_LIMIT);
506 c->fs_base = __vmread(GUEST_FS_BASE);
507 c->fs_arbytes = __vmread(GUEST_FS_AR_BYTES);
509 c->gs_sel = __vmread(GUEST_GS_SELECTOR);
510 c->gs_limit = __vmread(GUEST_GS_LIMIT);
511 c->gs_base = __vmread(GUEST_GS_BASE);
512 c->gs_arbytes = __vmread(GUEST_GS_AR_BYTES);
514 c->tr_sel = __vmread(GUEST_TR_SELECTOR);
515 c->tr_limit = __vmread(GUEST_TR_LIMIT);
516 c->tr_base = __vmread(GUEST_TR_BASE);
517 c->tr_arbytes = __vmread(GUEST_TR_AR_BYTES);
519 c->ldtr_sel = __vmread(GUEST_LDTR_SELECTOR);
520 c->ldtr_limit = __vmread(GUEST_LDTR_LIMIT);
521 c->ldtr_base = __vmread(GUEST_LDTR_BASE);
522 c->ldtr_arbytes = __vmread(GUEST_LDTR_AR_BYTES);
524 c->sysenter_cs = __vmread(GUEST_SYSENTER_CS);
525 c->sysenter_esp = __vmread(GUEST_SYSENTER_ESP);
526 c->sysenter_eip = __vmread(GUEST_SYSENTER_EIP);
528 /* Save any event/interrupt that was being injected when we last
529 * exited. IDT_VECTORING_INFO_FIELD has priority, as anything in
530 * VM_ENTRY_INTR_INFO_FIELD is either a fault caused by the first
531 * event, which will happen the next time, or an interrupt, which we
532 * never inject when IDT_VECTORING_INFO_FIELD is valid.*/
533 if ( (ev = __vmread(IDT_VECTORING_INFO_FIELD)) & INTR_INFO_VALID_MASK )
534 {
535 c->pending_event = ev;
536 c->error_code = __vmread(IDT_VECTORING_ERROR_CODE);
537 }
538 else if ( (ev = __vmread(VM_ENTRY_INTR_INFO_FIELD))
539 & INTR_INFO_VALID_MASK )
540 {
541 c->pending_event = ev;
542 c->error_code = __vmread(VM_ENTRY_EXCEPTION_ERROR_CODE);
543 }
544 else
545 {
546 c->pending_event = 0;
547 c->error_code = 0;
548 }
550 return 1;
551 }
553 int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
554 {
555 unsigned long mfn, old_base_mfn;
557 vmx_vmcs_enter(v);
559 __vmwrite(GUEST_RIP, c->rip);
560 __vmwrite(GUEST_RSP, c->rsp);
561 __vmwrite(GUEST_RFLAGS, c->rflags);
563 v->arch.hvm_vmx.cpu_cr0 = (c->cr0 | X86_CR0_PE | X86_CR0_PG
564 | X86_CR0_NE | X86_CR0_WP | X86_CR0_ET);
565 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
566 v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
567 __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
569 v->arch.hvm_vmx.cpu_cr2 = c->cr2;
571 #ifdef HVM_DEBUG_SUSPEND
572 printk("vmx_vmcs_restore: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
573 c->cr3,
574 c->cr0,
575 c->cr4);
576 #endif
578 if (!vmx_paging_enabled(v)) {
579 printk("vmx_vmcs_restore: paging not enabled.");
580 goto skip_cr3;
581 }
583 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
584 /* current!=vcpu as not called by arch_vmx_do_launch */
585 mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
586 if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
587 goto bad_cr3;
588 }
589 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
590 v->arch.guest_table = pagetable_from_pfn(mfn);
591 if (old_base_mfn)
592 put_page(mfn_to_page(old_base_mfn));
593 v->arch.hvm_vmx.cpu_cr3 = c->cr3;
595 skip_cr3:
596 #if defined(__x86_64__)
597 if (vmx_long_mode_enabled(v)) {
598 unsigned long vm_entry_value;
599 vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
600 vm_entry_value |= VM_ENTRY_IA32E_MODE;
601 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
602 }
603 #endif
605 __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
606 v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
607 __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
609 __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
610 __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
612 __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
613 __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
615 __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
616 __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
617 __vmwrite(GUEST_CS_BASE, c->cs_base);
618 __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes);
620 __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
621 __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
622 __vmwrite(GUEST_DS_BASE, c->ds_base);
623 __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes);
625 __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
626 __vmwrite(GUEST_ES_LIMIT, c->es_limit);
627 __vmwrite(GUEST_ES_BASE, c->es_base);
628 __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes);
630 __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
631 __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
632 __vmwrite(GUEST_SS_BASE, c->ss_base);
633 __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes);
635 __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
636 __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
637 __vmwrite(GUEST_FS_BASE, c->fs_base);
638 __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes);
640 __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
641 __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
642 __vmwrite(GUEST_GS_BASE, c->gs_base);
643 __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes);
645 __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
646 __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
647 __vmwrite(GUEST_TR_BASE, c->tr_base);
648 __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes);
650 __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
651 __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
652 __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
653 __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes);
655 __vmwrite(GUEST_SYSENTER_CS, c->sysenter_cs);
656 __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp);
657 __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip);
659 __vmwrite(GUEST_DR7, c->dr7);
661 vmx_vmcs_exit(v);
663 paging_update_paging_modes(v);
665 if ( c->pending_valid )
666 {
667 vmx_vmcs_enter(v);
668 gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
669 c->pending_event, c->error_code);
671 /* SVM uses type 3 ("Exception") for #OF and #BP; VMX uses type 6 */
672 if ( c->pending_type == 3
673 && (c->pending_vector == 3 || c->pending_vector == 4) )
674 c->pending_type = 6;
676 /* For software exceptions, we need to tell the hardware the
677 * instruction length as well (hmmm). */
678 if ( c->pending_type > 4 )
679 {
680 int addrbytes, ilen;
681 if ( (c->cs_arbytes & (1u<<13)) && (c->msr_efer & EFER_LMA) )
682 addrbytes = 8;
683 else if ( (c->cs_arbytes & (1u<<14)) )
684 addrbytes = 4;
685 else
686 addrbytes = 2;
687 ilen = hvm_instruction_length(c->rip, addrbytes);
688 __vmwrite(VM_ENTRY_INSTRUCTION_LEN, ilen);
689 }
691 /* Sanity check */
692 if ( c->pending_type == 1 || c->pending_type > 6
693 || c->pending_reserved != 0 )
694 {
695 gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32"\n",
696 c->pending_event);
697 return -EINVAL;
698 }
699 /* Re-inject the exception */
700 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, c->pending_event);
701 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, c->error_code);
702 v->arch.hvm_vmx.vector_injected = 1;
703 vmx_vmcs_exit(v);
704 }
706 return 0;
708 bad_cr3:
709 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"", c->cr3);
710 vmx_vmcs_exit(v);
711 return -EINVAL;
712 }
714 #if defined(__x86_64__) && defined(HVM_DEBUG_SUSPEND)
715 static void dump_msr_state(struct vmx_msr_state *m)
716 {
717 int i = 0;
718 printk("**** msr state ****\n");
719 printk("shadow_gs=0x%lx, flags=0x%lx, msr_items:", m->shadow_gs, m->flags);
720 for (i = 0; i < VMX_MSR_COUNT; i++)
721 printk("0x%lx,", m->msrs[i]);
722 printk("\n");
723 }
724 #else
725 #define dump_msr_state(m) ((void)0)
726 #endif
728 static void vmx_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
729 {
730 #ifdef __x86_64__
731 struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
732 unsigned long guest_flags = guest_state->flags;
734 data->shadow_gs = v->arch.hvm_vmx.shadow_gs;
735 data->msr_cstar = v->arch.hvm_vmx.cstar;
737 /* save msrs */
738 data->msr_flags = guest_flags;
739 data->msr_lstar = guest_state->msrs[VMX_INDEX_MSR_LSTAR];
740 data->msr_star = guest_state->msrs[VMX_INDEX_MSR_STAR];
741 data->msr_syscall_mask = guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
742 #endif
744 data->msr_efer = v->arch.hvm_vmx.efer;
746 data->tsc = hvm_get_guest_time(v);
748 dump_msr_state(guest_state);
749 }
751 static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
752 {
753 #ifdef __x86_64__
754 struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
756 /* restore msrs */
757 guest_state->flags = data->msr_flags;
758 guest_state->msrs[VMX_INDEX_MSR_LSTAR] = data->msr_lstar;
759 guest_state->msrs[VMX_INDEX_MSR_STAR] = data->msr_star;
760 guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK] = data->msr_syscall_mask;
762 v->arch.hvm_vmx.cstar = data->msr_cstar;
763 v->arch.hvm_vmx.shadow_gs = data->shadow_gs;
764 #endif
766 v->arch.hvm_vmx.efer = data->msr_efer;
768 v->arch.hvm_vmx.vmxassist_enabled = !(data->cr0 & X86_CR0_PE);
770 hvm_set_guest_time(v, data->tsc);
772 dump_msr_state(guest_state);
773 }
776 static void vmx_save_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
777 {
778 vmx_save_cpu_state(v, ctxt);
779 vmx_vmcs_enter(v);
780 vmx_vmcs_save(v, ctxt);
781 vmx_vmcs_exit(v);
782 }
784 static int vmx_load_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
785 {
786 vmx_load_cpu_state(v, ctxt);
787 if (vmx_vmcs_restore(v, ctxt)) {
788 printk("vmx_vmcs restore failed!\n");
789 domain_crash(v->domain);
790 return -EINVAL;
791 }
793 return 0;
794 }
796 /*
797 * DR7 is saved and restored on every vmexit. Other debug registers only
798 * need to be restored if their value is going to affect execution -- i.e.,
799 * if one of the breakpoints is enabled. So mask out all bits that don't
800 * enable some breakpoint functionality.
801 */
802 #define DR7_ACTIVE_MASK 0xff
804 static inline void vmx_restore_dr(struct vcpu *v)
805 {
806 /* NB. __vmread() is not usable here, so we cannot read from the VMCS. */
807 if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
808 __restore_debug_registers(v);
809 }
811 static void vmx_ctxt_switch_from(struct vcpu *v)
812 {
813 vmx_save_guest_msrs(v);
814 vmx_restore_host_msrs();
815 vmx_save_dr(v);
816 }
818 static void vmx_ctxt_switch_to(struct vcpu *v)
819 {
820 vmx_restore_guest_msrs(v);
821 vmx_restore_dr(v);
822 }
824 static void stop_vmx(void)
825 {
826 if ( !(read_cr4() & X86_CR4_VMXE) )
827 return;
829 __vmxoff();
830 clear_in_cr4(X86_CR4_VMXE);
831 }
833 static void vmx_store_cpu_guest_regs(
834 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
835 {
836 vmx_vmcs_enter(v);
838 if ( regs != NULL )
839 {
840 regs->eflags = __vmread(GUEST_RFLAGS);
841 regs->ss = __vmread(GUEST_SS_SELECTOR);
842 regs->cs = __vmread(GUEST_CS_SELECTOR);
843 regs->eip = __vmread(GUEST_RIP);
844 regs->esp = __vmread(GUEST_RSP);
845 }
847 if ( crs != NULL )
848 {
849 crs[0] = v->arch.hvm_vmx.cpu_shadow_cr0;
850 crs[2] = v->arch.hvm_vmx.cpu_cr2;
851 crs[3] = v->arch.hvm_vmx.cpu_cr3;
852 crs[4] = v->arch.hvm_vmx.cpu_shadow_cr4;
853 }
855 vmx_vmcs_exit(v);
856 }
858 static void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
859 {
860 unsigned long base;
862 vmx_vmcs_enter(v);
864 __vmwrite(GUEST_SS_SELECTOR, regs->ss);
865 __vmwrite(GUEST_RSP, regs->esp);
867 /* NB. Bit 1 of RFLAGS must be set for VMENTRY to succeed. */
868 __vmwrite(GUEST_RFLAGS, regs->eflags | 2UL);
870 if ( regs->eflags & EF_VM )
871 {
872 /*
873 * The VMX spec (section 4.3.1.2, Checks on Guest Segment
874 * Registers) says that virtual-8086 mode guests' segment
875 * base-address fields in the VMCS must be equal to their
876 * corresponding segment selector field shifted right by
877 * four bits upon vmentry.
878 */
879 base = __vmread(GUEST_CS_BASE);
880 if ( (regs->cs << 4) != base )
881 __vmwrite(GUEST_CS_BASE, regs->cs << 4);
882 base = __vmread(GUEST_SS_BASE);
883 if ( (regs->ss << 4) != base )
884 __vmwrite(GUEST_SS_BASE, regs->ss << 4);
885 }
887 __vmwrite(GUEST_CS_SELECTOR, regs->cs);
888 __vmwrite(GUEST_RIP, regs->eip);
890 vmx_vmcs_exit(v);
891 }
893 static unsigned long vmx_get_ctrl_reg(struct vcpu *v, unsigned int num)
894 {
895 switch ( num )
896 {
897 case 0:
898 return v->arch.hvm_vmx.cpu_cr0;
899 case 2:
900 return v->arch.hvm_vmx.cpu_cr2;
901 case 3:
902 return v->arch.hvm_vmx.cpu_cr3;
903 case 4:
904 return v->arch.hvm_vmx.cpu_shadow_cr4;
905 default:
906 BUG();
907 }
908 return 0; /* dummy */
909 }
911 static unsigned long vmx_get_segment_base(struct vcpu *v, enum x86_segment seg)
912 {
913 unsigned long base = 0;
914 int long_mode = 0;
916 ASSERT(v == current);
918 #ifdef __x86_64__
919 if ( vmx_long_mode_enabled(v) &&
920 (__vmread(GUEST_CS_AR_BYTES) & X86_SEG_AR_CS_LM_ACTIVE) )
921 long_mode = 1;
922 #endif
924 switch ( seg )
925 {
926 case x86_seg_cs: if ( !long_mode ) base = __vmread(GUEST_CS_BASE); break;
927 case x86_seg_ds: if ( !long_mode ) base = __vmread(GUEST_DS_BASE); break;
928 case x86_seg_es: if ( !long_mode ) base = __vmread(GUEST_ES_BASE); break;
929 case x86_seg_fs: base = __vmread(GUEST_FS_BASE); break;
930 case x86_seg_gs: base = __vmread(GUEST_GS_BASE); break;
931 case x86_seg_ss: if ( !long_mode ) base = __vmread(GUEST_SS_BASE); break;
932 case x86_seg_tr: base = __vmread(GUEST_TR_BASE); break;
933 case x86_seg_gdtr: base = __vmread(GUEST_GDTR_BASE); break;
934 case x86_seg_idtr: base = __vmread(GUEST_IDTR_BASE); break;
935 case x86_seg_ldtr: base = __vmread(GUEST_LDTR_BASE); break;
936 default: BUG(); break;
937 }
939 return base;
940 }
942 static void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg,
943 struct segment_register *reg)
944 {
945 u16 attr = 0;
947 ASSERT(v == current);
949 switch ( seg )
950 {
951 case x86_seg_cs:
952 reg->sel = __vmread(GUEST_CS_SELECTOR);
953 reg->limit = __vmread(GUEST_CS_LIMIT);
954 reg->base = __vmread(GUEST_CS_BASE);
955 attr = __vmread(GUEST_CS_AR_BYTES);
956 break;
957 case x86_seg_ds:
958 reg->sel = __vmread(GUEST_DS_SELECTOR);
959 reg->limit = __vmread(GUEST_DS_LIMIT);
960 reg->base = __vmread(GUEST_DS_BASE);
961 attr = __vmread(GUEST_DS_AR_BYTES);
962 break;
963 case x86_seg_es:
964 reg->sel = __vmread(GUEST_ES_SELECTOR);
965 reg->limit = __vmread(GUEST_ES_LIMIT);
966 reg->base = __vmread(GUEST_ES_BASE);
967 attr = __vmread(GUEST_ES_AR_BYTES);
968 break;
969 case x86_seg_fs:
970 reg->sel = __vmread(GUEST_FS_SELECTOR);
971 reg->limit = __vmread(GUEST_FS_LIMIT);
972 reg->base = __vmread(GUEST_FS_BASE);
973 attr = __vmread(GUEST_FS_AR_BYTES);
974 break;
975 case x86_seg_gs:
976 reg->sel = __vmread(GUEST_GS_SELECTOR);
977 reg->limit = __vmread(GUEST_GS_LIMIT);
978 reg->base = __vmread(GUEST_GS_BASE);
979 attr = __vmread(GUEST_GS_AR_BYTES);
980 break;
981 case x86_seg_ss:
982 reg->sel = __vmread(GUEST_SS_SELECTOR);
983 reg->limit = __vmread(GUEST_SS_LIMIT);
984 reg->base = __vmread(GUEST_SS_BASE);
985 attr = __vmread(GUEST_SS_AR_BYTES);
986 break;
987 case x86_seg_tr:
988 reg->sel = __vmread(GUEST_TR_SELECTOR);
989 reg->limit = __vmread(GUEST_TR_LIMIT);
990 reg->base = __vmread(GUEST_TR_BASE);
991 attr = __vmread(GUEST_TR_AR_BYTES);
992 break;
993 case x86_seg_gdtr:
994 reg->limit = __vmread(GUEST_GDTR_LIMIT);
995 reg->base = __vmread(GUEST_GDTR_BASE);
996 break;
997 case x86_seg_idtr:
998 reg->limit = __vmread(GUEST_IDTR_LIMIT);
999 reg->base = __vmread(GUEST_IDTR_BASE);
1000 break;
1001 case x86_seg_ldtr:
1002 reg->sel = __vmread(GUEST_LDTR_SELECTOR);
1003 reg->limit = __vmread(GUEST_LDTR_LIMIT);
1004 reg->base = __vmread(GUEST_LDTR_BASE);
1005 attr = __vmread(GUEST_LDTR_AR_BYTES);
1006 break;
1007 default:
1008 BUG();
1011 reg->attr.bytes = (attr & 0xff) | ((attr >> 4) & 0xf00);
1014 /* Make sure that xen intercepts any FP accesses from current */
1015 static void vmx_stts(struct vcpu *v)
1017 /* VMX depends on operating on the current vcpu */
1018 ASSERT(v == current);
1020 /*
1021 * If the guest does not have TS enabled then we must cause and handle an
1022 * exception on first use of the FPU. If the guest *does* have TS enabled
1023 * then this is not necessary: no FPU activity can occur until the guest
1024 * clears CR0.TS, and we will initialise the FPU when that happens.
1025 */
1026 if ( !(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_TS) )
1028 v->arch.hvm_vmx.cpu_cr0 |= X86_CR0_TS;
1029 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
1030 __vm_set_bit(EXCEPTION_BITMAP, TRAP_no_device);
1034 static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
1036 vmx_vmcs_enter(v);
1037 __vmwrite(TSC_OFFSET, offset);
1038 #if defined (__i386__)
1039 __vmwrite(TSC_OFFSET_HIGH, offset >> 32);
1040 #endif
1041 vmx_vmcs_exit(v);
1044 static void vmx_init_ap_context(
1045 struct vcpu_guest_context *ctxt, int vcpuid, int trampoline_vector)
1047 memset(ctxt, 0, sizeof(*ctxt));
1048 ctxt->user_regs.eip = VMXASSIST_BASE;
1049 ctxt->user_regs.edx = vcpuid;
1050 ctxt->user_regs.ebx = trampoline_vector;
1053 void do_nmi(struct cpu_user_regs *);
1055 static void vmx_init_hypercall_page(struct domain *d, void *hypercall_page)
1057 char *p;
1058 int i;
1060 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
1062 p = (char *)(hypercall_page + (i * 32));
1063 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
1064 *(u32 *)(p + 1) = i;
1065 *(u8 *)(p + 5) = 0x0f; /* vmcall */
1066 *(u8 *)(p + 6) = 0x01;
1067 *(u8 *)(p + 7) = 0xc1;
1068 *(u8 *)(p + 8) = 0xc3; /* ret */
1071 /* Don't support HYPERVISOR_iret at the moment */
1072 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
1075 static int vmx_guest_x86_mode(struct vcpu *v)
1077 unsigned int cs_ar_bytes;
1079 ASSERT(v == current);
1081 if ( unlikely(!(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_PE)) )
1082 return 0;
1083 if ( unlikely(__vmread(GUEST_RFLAGS) & X86_EFLAGS_VM) )
1084 return 1;
1085 cs_ar_bytes = __vmread(GUEST_CS_AR_BYTES);
1086 if ( vmx_long_mode_enabled(v) && likely(cs_ar_bytes &
1087 X86_SEG_AR_CS_LM_ACTIVE) )
1088 return 8;
1089 return (likely(cs_ar_bytes & X86_SEG_AR_DEF_OP_SIZE) ? 4 : 2);
1092 static int vmx_pae_enabled(struct vcpu *v)
1094 unsigned long cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
1095 return (vmx_paging_enabled(v) && (cr4 & X86_CR4_PAE));
1098 static int vmx_nx_enabled(struct vcpu *v)
1100 return v->arch.hvm_vmx.efer & EFER_NX;
1103 static int vmx_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
1105 unsigned long intr_shadow, eflags;
1107 ASSERT(v == current);
1109 intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
1110 intr_shadow &= VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS;
1112 if ( type == hvm_intack_nmi )
1113 return !intr_shadow;
1115 ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
1116 eflags = __vmread(GUEST_RFLAGS);
1117 return !irq_masked(eflags) && !intr_shadow;
1120 static void vmx_update_host_cr3(struct vcpu *v)
1122 ASSERT((v == current) || !vcpu_runnable(v));
1123 vmx_vmcs_enter(v);
1124 __vmwrite(HOST_CR3, v->arch.cr3);
1125 vmx_vmcs_exit(v);
1128 static void vmx_update_guest_cr3(struct vcpu *v)
1130 ASSERT((v == current) || !vcpu_runnable(v));
1131 vmx_vmcs_enter(v);
1132 __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
1133 vmx_vmcs_exit(v);
1136 static void vmx_flush_guest_tlbs(void)
1138 /* No tagged TLB support on VMX yet. The fact that we're in Xen
1139 * at all means any guest will have a clean TLB when it's next run,
1140 * because VMRESUME will flush it for us. */
1143 static void vmx_inject_exception(
1144 unsigned int trapnr, int errcode, unsigned long cr2)
1146 struct vcpu *v = current;
1147 vmx_inject_hw_exception(v, trapnr, errcode);
1148 if ( trapnr == TRAP_page_fault )
1149 v->arch.hvm_vmx.cpu_cr2 = cr2;
1152 static void vmx_update_vtpr(struct vcpu *v, unsigned long value)
1154 /* VMX doesn't have a V_TPR field */
1157 static int vmx_event_injection_faulted(struct vcpu *v)
1159 unsigned int idtv_info_field;
1161 ASSERT(v == current);
1163 idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
1164 return (idtv_info_field & INTR_INFO_VALID_MASK);
1167 static void disable_intercept_for_msr(u32 msr)
1169 /*
1170 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
1171 * have the write-low and read-high bitmap offsets the wrong way round.
1172 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
1173 */
1174 if ( msr <= 0x1fff )
1176 __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */
1177 __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */
1179 else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
1181 msr &= 0x1fff;
1182 __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */
1183 __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */
1187 static struct hvm_function_table vmx_function_table = {
1188 .name = "VMX",
1189 .disable = stop_vmx,
1190 .domain_initialise = vmx_domain_initialise,
1191 .domain_destroy = vmx_domain_destroy,
1192 .vcpu_initialise = vmx_vcpu_initialise,
1193 .vcpu_destroy = vmx_vcpu_destroy,
1194 .store_cpu_guest_regs = vmx_store_cpu_guest_regs,
1195 .load_cpu_guest_regs = vmx_load_cpu_guest_regs,
1196 .save_cpu_ctxt = vmx_save_vmcs_ctxt,
1197 .load_cpu_ctxt = vmx_load_vmcs_ctxt,
1198 .paging_enabled = vmx_paging_enabled,
1199 .long_mode_enabled = vmx_long_mode_enabled,
1200 .pae_enabled = vmx_pae_enabled,
1201 .nx_enabled = vmx_nx_enabled,
1202 .interrupts_enabled = vmx_interrupts_enabled,
1203 .guest_x86_mode = vmx_guest_x86_mode,
1204 .get_guest_ctrl_reg = vmx_get_ctrl_reg,
1205 .get_segment_base = vmx_get_segment_base,
1206 .get_segment_register = vmx_get_segment_register,
1207 .update_host_cr3 = vmx_update_host_cr3,
1208 .update_guest_cr3 = vmx_update_guest_cr3,
1209 .flush_guest_tlbs = vmx_flush_guest_tlbs,
1210 .update_vtpr = vmx_update_vtpr,
1211 .stts = vmx_stts,
1212 .set_tsc_offset = vmx_set_tsc_offset,
1213 .inject_exception = vmx_inject_exception,
1214 .init_ap_context = vmx_init_ap_context,
1215 .init_hypercall_page = vmx_init_hypercall_page,
1216 .event_injection_faulted = vmx_event_injection_faulted
1217 };
1219 int start_vmx(void)
1221 u32 eax, edx;
1222 struct vmcs_struct *vmcs;
1224 /*
1225 * Xen does not fill x86_capability words except 0.
1226 */
1227 boot_cpu_data.x86_capability[4] = cpuid_ecx(1);
1229 if ( !test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability) )
1230 return 0;
1232 rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
1234 if ( eax & IA32_FEATURE_CONTROL_MSR_LOCK )
1236 if ( (eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0 )
1238 printk("VMX disabled by Feature Control MSR.\n");
1239 return 0;
1242 else
1244 wrmsr(IA32_FEATURE_CONTROL_MSR,
1245 IA32_FEATURE_CONTROL_MSR_LOCK |
1246 IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
1249 set_in_cr4(X86_CR4_VMXE);
1251 vmx_init_vmcs_config();
1253 if ( smp_processor_id() == 0 )
1254 setup_vmcs_dump();
1256 if ( (vmcs = vmx_alloc_host_vmcs()) == NULL )
1258 clear_in_cr4(X86_CR4_VMXE);
1259 printk("Failed to allocate host VMCS\n");
1260 return 0;
1263 if ( __vmxon(virt_to_maddr(vmcs)) )
1265 clear_in_cr4(X86_CR4_VMXE);
1266 printk("VMXON failed\n");
1267 vmx_free_host_vmcs(vmcs);
1268 return 0;
1271 vmx_save_host_msrs();
1273 if ( smp_processor_id() != 0 )
1274 return 1;
1276 hvm_enable(&vmx_function_table);
1278 if ( cpu_has_vmx_msr_bitmap )
1280 printk("VMX: MSR intercept bitmap enabled\n");
1281 vmx_msr_bitmap = alloc_xenheap_page();
1282 BUG_ON(vmx_msr_bitmap == NULL);
1283 memset(vmx_msr_bitmap, ~0, PAGE_SIZE);
1284 disable_intercept_for_msr(MSR_FS_BASE);
1285 disable_intercept_for_msr(MSR_GS_BASE);
1288 return 1;
1291 /*
1292 * Not all cases receive valid value in the VM-exit instruction length field.
1293 * Callers must know what they're doing!
1294 */
1295 static int __get_instruction_length(void)
1297 int len;
1298 len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe: callers audited */
1299 BUG_ON((len < 1) || (len > 15));
1300 return len;
1303 static void inline __update_guest_eip(unsigned long inst_len)
1305 unsigned long x;
1307 x = __vmread(GUEST_RIP);
1308 __vmwrite(GUEST_RIP, x + inst_len);
1310 x = __vmread(GUEST_RFLAGS);
1311 if ( x & X86_EFLAGS_RF )
1312 __vmwrite(GUEST_RFLAGS, x & ~X86_EFLAGS_RF);
1314 x = __vmread(GUEST_INTERRUPTIBILITY_INFO);
1315 if ( x & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
1317 x &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
1318 __vmwrite(GUEST_INTERRUPTIBILITY_INFO, x);
1322 static void vmx_do_no_device_fault(void)
1324 struct vcpu *v = current;
1326 setup_fpu(current);
1327 __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
1329 /* Disable TS in guest CR0 unless the guest wants the exception too. */
1330 if ( !(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_TS) )
1332 v->arch.hvm_vmx.cpu_cr0 &= ~X86_CR0_TS;
1333 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
1337 #define bitmaskof(idx) (1U << ((idx) & 31))
1338 static void vmx_do_cpuid(struct cpu_user_regs *regs)
1340 unsigned int input = (unsigned int)regs->eax;
1341 unsigned int count = (unsigned int)regs->ecx;
1342 unsigned int eax, ebx, ecx, edx;
1344 if ( input == 0x00000004 )
1346 cpuid_count(input, count, &eax, &ebx, &ecx, &edx);
1347 eax &= NUM_CORES_RESET_MASK;
1349 else if ( input == 0x40000003 )
1351 /*
1352 * NB. Unsupported interface for private use of VMXASSIST only.
1353 * Note that this leaf lives at <max-hypervisor-leaf> + 1.
1354 */
1355 u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx;
1356 unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1357 struct vcpu *v = current;
1358 char *p;
1360 gdprintk(XENLOG_INFO, "Input address is 0x%"PRIx64".\n", value);
1362 /* 8-byte aligned valid pseudophys address from vmxassist, please. */
1363 if ( (value & 7) || (mfn == INVALID_MFN) ||
1364 !v->arch.hvm_vmx.vmxassist_enabled )
1366 domain_crash(v->domain);
1367 return;
1370 p = map_domain_page(mfn);
1371 value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1))));
1372 unmap_domain_page(p);
1374 gdprintk(XENLOG_INFO, "Output value is 0x%"PRIx64".\n", value);
1375 ecx = (u32)value;
1376 edx = (u32)(value >> 32);
1377 } else {
1378 hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
1380 if ( input == 0x00000001 )
1382 /* Mask off reserved bits. */
1383 ecx &= ~VMX_VCPU_CPUID_L1_ECX_RESERVED;
1385 ebx &= NUM_THREADS_RESET_MASK;
1387 /* Unsupportable for virtualised CPUs. */
1388 ecx &= ~(bitmaskof(X86_FEATURE_VMXE) |
1389 bitmaskof(X86_FEATURE_EST) |
1390 bitmaskof(X86_FEATURE_TM2) |
1391 bitmaskof(X86_FEATURE_CID));
1393 edx &= ~(bitmaskof(X86_FEATURE_HT) |
1394 bitmaskof(X86_FEATURE_ACPI) |
1395 bitmaskof(X86_FEATURE_ACC));
1398 if ( input == 0x00000006 || input == 0x00000009 || input == 0x0000000A )
1399 eax = ebx = ecx = edx = 0x0;
1402 regs->eax = (unsigned long)eax;
1403 regs->ebx = (unsigned long)ebx;
1404 regs->ecx = (unsigned long)ecx;
1405 regs->edx = (unsigned long)edx;
1407 HVMTRACE_3D(CPUID, current, input,
1408 ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
1411 #define CASE_GET_REG_P(REG, reg) \
1412 case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
1414 #ifdef __i386__
1415 #define CASE_EXTEND_GET_REG_P
1416 #else
1417 #define CASE_EXTEND_GET_REG_P \
1418 CASE_GET_REG_P(R8, r8); \
1419 CASE_GET_REG_P(R9, r9); \
1420 CASE_GET_REG_P(R10, r10); \
1421 CASE_GET_REG_P(R11, r11); \
1422 CASE_GET_REG_P(R12, r12); \
1423 CASE_GET_REG_P(R13, r13); \
1424 CASE_GET_REG_P(R14, r14); \
1425 CASE_GET_REG_P(R15, r15)
1426 #endif
1428 static void vmx_dr_access(unsigned long exit_qualification,
1429 struct cpu_user_regs *regs)
1431 struct vcpu *v = current;
1433 HVMTRACE_0D(DR_WRITE, v);
1435 v->arch.hvm_vcpu.flag_dr_dirty = 1;
1437 /* We could probably be smarter about this */
1438 __restore_debug_registers(v);
1440 /* Allow guest direct access to DR registers */
1441 v->arch.hvm_vcpu.u.vmx.exec_control &= ~CPU_BASED_MOV_DR_EXITING;
1442 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
1443 v->arch.hvm_vcpu.u.vmx.exec_control);
1446 /*
1447 * Invalidate the TLB for va. Invalidate the shadow page corresponding
1448 * the address va.
1449 */
1450 static void vmx_do_invlpg(unsigned long va)
1452 unsigned long eip;
1453 struct vcpu *v = current;
1455 HVMTRACE_2D(INVLPG, v, /*invlpga=*/ 0, va);
1457 eip = __vmread(GUEST_RIP);
1459 HVM_DBG_LOG(DBG_LEVEL_VMMU, "eip=%lx, va=%lx",
1460 eip, va);
1462 /*
1463 * We do the safest things first, then try to update the shadow
1464 * copying from guest
1465 */
1466 paging_invlpg(v, va);
1469 /*
1470 * get segment for string pio according to guest instruction
1471 */
1472 static void vmx_str_pio_get_segment(int long_mode, unsigned long eip,
1473 int inst_len, enum x86_segment *seg)
1475 unsigned char inst[MAX_INST_LEN];
1476 int i;
1477 extern int inst_copy_from_guest(unsigned char *, unsigned long, int);
1479 if ( !long_mode )
1480 eip += __vmread(GUEST_CS_BASE);
1482 memset(inst, 0, MAX_INST_LEN);
1483 if ( inst_copy_from_guest(inst, eip, inst_len) != inst_len )
1485 gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
1486 domain_crash(current->domain);
1487 return;
1490 for ( i = 0; i < inst_len; i++ )
1492 switch ( inst[i] )
1494 case 0xf3: /* REPZ */
1495 case 0xf2: /* REPNZ */
1496 case 0xf0: /* LOCK */
1497 case 0x66: /* data32 */
1498 case 0x67: /* addr32 */
1499 #ifdef __x86_64__
1500 case 0x40 ... 0x4f: /* REX */
1501 #endif
1502 continue;
1503 case 0x2e: /* CS */
1504 *seg = x86_seg_cs;
1505 continue;
1506 case 0x36: /* SS */
1507 *seg = x86_seg_ss;
1508 continue;
1509 case 0x26: /* ES */
1510 *seg = x86_seg_es;
1511 continue;
1512 case 0x64: /* FS */
1513 *seg = x86_seg_fs;
1514 continue;
1515 case 0x65: /* GS */
1516 *seg = x86_seg_gs;
1517 continue;
1518 case 0x3e: /* DS */
1519 *seg = x86_seg_ds;
1520 continue;
1525 static int vmx_str_pio_check_descriptor(int long_mode, unsigned long eip,
1526 int inst_len, enum x86_segment seg,
1527 unsigned long *base, u32 *limit,
1528 u32 *ar_bytes)
1530 enum vmcs_field ar_field, base_field, limit_field;
1532 *base = 0;
1533 *limit = 0;
1534 if ( seg != x86_seg_es )
1535 vmx_str_pio_get_segment(long_mode, eip, inst_len, &seg);
1537 switch ( seg )
1539 case x86_seg_cs:
1540 ar_field = GUEST_CS_AR_BYTES;
1541 base_field = GUEST_CS_BASE;
1542 limit_field = GUEST_CS_LIMIT;
1543 break;
1544 case x86_seg_ds:
1545 ar_field = GUEST_DS_AR_BYTES;
1546 base_field = GUEST_DS_BASE;
1547 limit_field = GUEST_DS_LIMIT;
1548 break;
1549 case x86_seg_es:
1550 ar_field = GUEST_ES_AR_BYTES;
1551 base_field = GUEST_ES_BASE;
1552 limit_field = GUEST_ES_LIMIT;
1553 break;
1554 case x86_seg_fs:
1555 ar_field = GUEST_FS_AR_BYTES;
1556 base_field = GUEST_FS_BASE;
1557 limit_field = GUEST_FS_LIMIT;
1558 break;
1559 case x86_seg_gs:
1560 ar_field = GUEST_GS_AR_BYTES;
1561 base_field = GUEST_GS_BASE;
1562 limit_field = GUEST_GS_LIMIT;
1563 break;
1564 case x86_seg_ss:
1565 ar_field = GUEST_SS_AR_BYTES;
1566 base_field = GUEST_SS_BASE;
1567 limit_field = GUEST_SS_LIMIT;
1568 break;
1569 default:
1570 BUG();
1571 return 0;
1574 if ( !long_mode || seg == x86_seg_fs || seg == x86_seg_gs )
1576 *base = __vmread(base_field);
1577 *limit = __vmread(limit_field);
1579 *ar_bytes = __vmread(ar_field);
1581 return !(*ar_bytes & 0x10000);
1585 static inline void vmx_str_pio_check_limit(u32 limit, unsigned int size,
1586 u32 ar_bytes, unsigned long addr,
1587 unsigned long base, int df,
1588 unsigned long *count)
1590 unsigned long ea = addr - base;
1592 /* Offset must be within limits. */
1593 ASSERT(ea == (u32)ea);
1594 if ( (u32)(ea + size - 1) < (u32)ea ||
1595 (ar_bytes & 0xc) != 0x4 ? ea + size - 1 > limit
1596 : ea <= limit )
1598 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1599 return;
1602 /* Check the limit for repeated instructions, as above we checked
1603 only the first instance. Truncate the count if a limit violation
1604 would occur. Note that the checking is not necessary for page
1605 granular segments as transfers crossing page boundaries will be
1606 broken up anyway. */
1607 if ( !(ar_bytes & X86_SEG_AR_GRANULARITY) && *count > 1 )
1609 if ( (ar_bytes & 0xc) != 0x4 )
1611 /* expand-up */
1612 if ( !df )
1614 if ( ea + *count * size - 1 < ea ||
1615 ea + *count * size - 1 > limit )
1616 *count = (limit + 1UL - ea) / size;
1618 else
1620 if ( *count - 1 > ea / size )
1621 *count = ea / size + 1;
1624 else
1626 /* expand-down */
1627 if ( !df )
1629 if ( *count - 1 > -(s32)ea / size )
1630 *count = -(s32)ea / size + 1UL;
1632 else
1634 if ( ea < (*count - 1) * size ||
1635 ea - (*count - 1) * size <= limit )
1636 *count = (ea - limit - 1) / size + 1;
1639 ASSERT(*count);
1643 #ifdef __x86_64__
1644 static inline void vmx_str_pio_lm_check_limit(struct cpu_user_regs *regs,
1645 unsigned int size,
1646 unsigned long addr,
1647 unsigned long *count)
1649 if ( !is_canonical_address(addr) ||
1650 !is_canonical_address(addr + size - 1) )
1652 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1653 return;
1655 if ( *count > (1UL << 48) / size )
1656 *count = (1UL << 48) / size;
1657 if ( !(regs->eflags & EF_DF) )
1659 if ( addr + *count * size - 1 < addr ||
1660 !is_canonical_address(addr + *count * size - 1) )
1661 *count = (addr & ~((1UL << 48) - 1)) / size;
1663 else
1665 if ( (*count - 1) * size > addr ||
1666 !is_canonical_address(addr + (*count - 1) * size) )
1667 *count = (addr & ~((1UL << 48) - 1)) / size + 1;
1669 ASSERT(*count);
1671 #endif
1673 static inline void vmx_send_str_pio(struct cpu_user_regs *regs,
1674 struct hvm_io_op *pio_opp,
1675 unsigned long inst_len, unsigned int port,
1676 int sign, unsigned int size, int dir,
1677 int df, unsigned long addr,
1678 unsigned long paddr, unsigned long count)
1680 /*
1681 * Handle string pio instructions that cross pages or that
1682 * are unaligned. See the comments in hvm_domain.c/handle_mmio()
1683 */
1684 if ( (addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK) ) {
1685 unsigned long value = 0;
1687 pio_opp->flags |= OVERLAP;
1689 if ( dir == IOREQ_WRITE ) /* OUTS */
1691 if ( hvm_paging_enabled(current) )
1693 int rv = hvm_copy_from_guest_virt(&value, addr, size);
1694 if ( rv != 0 )
1696 /* Failed on the page-spanning copy. Inject PF into
1697 * the guest for the address where we failed. */
1698 addr += size - rv;
1699 gdprintk(XENLOG_DEBUG, "Pagefault reading non-io side "
1700 "of a page-spanning PIO: va=%#lx\n", addr);
1701 vmx_inject_exception(TRAP_page_fault, 0, addr);
1702 return;
1705 else
1706 (void) hvm_copy_from_guest_phys(&value, addr, size);
1707 } else /* dir != IOREQ_WRITE */
1708 /* Remember where to write the result, as a *VA*.
1709 * Must be a VA so we can handle the page overlap
1710 * correctly in hvm_pio_assist() */
1711 pio_opp->addr = addr;
1713 if ( count == 1 )
1714 regs->eip += inst_len;
1716 send_pio_req(port, 1, size, value, dir, df, 0);
1717 } else {
1718 unsigned long last_addr = sign > 0 ? addr + count * size - 1
1719 : addr - (count - 1) * size;
1721 if ( (addr & PAGE_MASK) != (last_addr & PAGE_MASK) )
1723 if ( sign > 0 )
1724 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1725 else
1726 count = (addr & ~PAGE_MASK) / size + 1;
1727 } else
1728 regs->eip += inst_len;
1730 send_pio_req(port, count, size, paddr, dir, df, 1);
1734 static void vmx_str_pio_handler(unsigned long exit_qualification,
1735 unsigned long inst_len,
1736 struct cpu_user_regs *regs,
1737 struct hvm_io_op *pio_opp)
1739 unsigned int port, size;
1740 int dir, df, vm86;
1741 unsigned long addr, count = 1, base;
1742 paddr_t paddr;
1743 unsigned long gfn;
1744 u32 ar_bytes, limit;
1745 int sign;
1746 int long_mode = 0;
1748 vm86 = regs->eflags & X86_EFLAGS_VM ? 1 : 0;
1749 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1751 if ( test_bit(6, &exit_qualification) )
1752 port = (exit_qualification >> 16) & 0xFFFF;
1753 else
1754 port = regs->edx & 0xffff;
1756 size = (exit_qualification & 7) + 1;
1757 dir = test_bit(3, &exit_qualification); /* direction */
1759 if ( dir == IOREQ_READ )
1760 HVMTRACE_2D(IO_READ, current, port, size);
1761 else
1762 HVMTRACE_2D(IO_WRITE, current, port, size);
1764 sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
1765 ar_bytes = __vmread(GUEST_CS_AR_BYTES);
1766 #ifdef __x86_64__
1767 if ( vmx_long_mode_enabled(current) &&
1768 (ar_bytes & X86_SEG_AR_CS_LM_ACTIVE) )
1769 long_mode = 1;
1770 #endif
1771 addr = __vmread(GUEST_LINEAR_ADDRESS);
1773 if ( test_bit(5, &exit_qualification) ) { /* "rep" prefix */
1774 pio_opp->flags |= REPZ;
1775 count = regs->ecx;
1776 if ( !long_mode &&
1777 (vm86 || !(ar_bytes & X86_SEG_AR_DEF_OP_SIZE)) )
1778 count &= 0xFFFF;
1781 /*
1782 * In protected mode, guest linear address is invalid if the
1783 * selector is null.
1784 */
1785 if ( !vmx_str_pio_check_descriptor(long_mode, regs->eip, inst_len,
1786 dir==IOREQ_WRITE ? x86_seg_ds :
1787 x86_seg_es, &base, &limit,
1788 &ar_bytes) ) {
1789 if ( !long_mode ) {
1790 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1791 return;
1793 addr = dir == IOREQ_WRITE ? base + regs->esi : regs->edi;
1796 if ( !long_mode )
1798 /* Segment must be readable for outs and writeable for ins. */
1799 if ( dir == IOREQ_WRITE ? (ar_bytes & 0xa) == 0x8
1800 : (ar_bytes & 0xa) != 0x2 ) {
1801 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1802 return;
1805 vmx_str_pio_check_limit(limit, size, ar_bytes, addr, base, df, &count);
1807 #ifdef __x86_64__
1808 else
1810 vmx_str_pio_lm_check_limit(regs, size, addr, &count);
1812 #endif
1814 /* Translate the address to a physical address */
1815 gfn = paging_gva_to_gfn(current, addr);
1816 if ( gfn == INVALID_GFN )
1818 /* The guest does not have the RAM address mapped.
1819 * Need to send in a page fault */
1820 int errcode = 0;
1821 /* IO read --> memory write */
1822 if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
1823 vmx_inject_exception(TRAP_page_fault, errcode, addr);
1824 return;
1826 paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
1828 vmx_send_str_pio(regs, pio_opp, inst_len, port, sign,
1829 size, dir, df, addr, paddr, count);
1832 static void vmx_io_instruction(unsigned long exit_qualification,
1833 unsigned long inst_len)
1835 struct cpu_user_regs *regs;
1836 struct hvm_io_op *pio_opp;
1838 pio_opp = &current->arch.hvm_vcpu.io_op;
1839 pio_opp->instr = INSTR_PIO;
1840 pio_opp->flags = 0;
1842 regs = &pio_opp->io_context;
1844 /* Copy current guest state into io instruction state structure. */
1845 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1846 vmx_store_cpu_guest_regs(current, regs, NULL);
1848 HVM_DBG_LOG(DBG_LEVEL_IO, "vm86 %d, eip=%x:%lx, "
1849 "exit_qualification = %lx",
1850 regs->eflags & X86_EFLAGS_VM ? 1 : 0,
1851 regs->cs, (unsigned long)regs->eip, exit_qualification);
1853 if ( test_bit(4, &exit_qualification) ) /* string instrucation */
1854 vmx_str_pio_handler(exit_qualification, inst_len, regs, pio_opp);
1855 else
1857 unsigned int port, size;
1858 int dir, df;
1860 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1862 if ( test_bit(6, &exit_qualification) )
1863 port = (exit_qualification >> 16) & 0xFFFF;
1864 else
1865 port = regs->edx & 0xffff;
1867 size = (exit_qualification & 7) + 1;
1868 dir = test_bit(3, &exit_qualification); /* direction */
1870 if ( dir == IOREQ_READ )
1871 HVMTRACE_2D(IO_READ, current, port, size);
1872 else
1873 HVMTRACE_2D(IO_WRITE, current, port, size);
1875 if ( port == 0xe9 && dir == IOREQ_WRITE && size == 1 )
1876 hvm_print_line(current, regs->eax); /* guest debug output */
1878 regs->eip += inst_len;
1879 send_pio_req(port, 1, size, regs->eax, dir, df, 0);
1883 static void vmx_world_save(struct vcpu *v, struct vmx_assist_context *c)
1885 /* NB. Skip transition instruction. */
1886 c->eip = __vmread(GUEST_RIP);
1887 c->eip += __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */
1889 c->esp = __vmread(GUEST_RSP);
1890 c->eflags = __vmread(GUEST_RFLAGS) & ~X86_EFLAGS_RF;
1892 c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
1893 c->cr3 = v->arch.hvm_vmx.cpu_cr3;
1894 c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
1896 c->idtr_limit = __vmread(GUEST_IDTR_LIMIT);
1897 c->idtr_base = __vmread(GUEST_IDTR_BASE);
1899 c->gdtr_limit = __vmread(GUEST_GDTR_LIMIT);
1900 c->gdtr_base = __vmread(GUEST_GDTR_BASE);
1902 c->cs_sel = __vmread(GUEST_CS_SELECTOR);
1903 c->cs_limit = __vmread(GUEST_CS_LIMIT);
1904 c->cs_base = __vmread(GUEST_CS_BASE);
1905 c->cs_arbytes.bytes = __vmread(GUEST_CS_AR_BYTES);
1907 c->ds_sel = __vmread(GUEST_DS_SELECTOR);
1908 c->ds_limit = __vmread(GUEST_DS_LIMIT);
1909 c->ds_base = __vmread(GUEST_DS_BASE);
1910 c->ds_arbytes.bytes = __vmread(GUEST_DS_AR_BYTES);
1912 c->es_sel = __vmread(GUEST_ES_SELECTOR);
1913 c->es_limit = __vmread(GUEST_ES_LIMIT);
1914 c->es_base = __vmread(GUEST_ES_BASE);
1915 c->es_arbytes.bytes = __vmread(GUEST_ES_AR_BYTES);
1917 c->ss_sel = __vmread(GUEST_SS_SELECTOR);
1918 c->ss_limit = __vmread(GUEST_SS_LIMIT);
1919 c->ss_base = __vmread(GUEST_SS_BASE);
1920 c->ss_arbytes.bytes = __vmread(GUEST_SS_AR_BYTES);
1922 c->fs_sel = __vmread(GUEST_FS_SELECTOR);
1923 c->fs_limit = __vmread(GUEST_FS_LIMIT);
1924 c->fs_base = __vmread(GUEST_FS_BASE);
1925 c->fs_arbytes.bytes = __vmread(GUEST_FS_AR_BYTES);
1927 c->gs_sel = __vmread(GUEST_GS_SELECTOR);
1928 c->gs_limit = __vmread(GUEST_GS_LIMIT);
1929 c->gs_base = __vmread(GUEST_GS_BASE);
1930 c->gs_arbytes.bytes = __vmread(GUEST_GS_AR_BYTES);
1932 c->tr_sel = __vmread(GUEST_TR_SELECTOR);
1933 c->tr_limit = __vmread(GUEST_TR_LIMIT);
1934 c->tr_base = __vmread(GUEST_TR_BASE);
1935 c->tr_arbytes.bytes = __vmread(GUEST_TR_AR_BYTES);
1937 c->ldtr_sel = __vmread(GUEST_LDTR_SELECTOR);
1938 c->ldtr_limit = __vmread(GUEST_LDTR_LIMIT);
1939 c->ldtr_base = __vmread(GUEST_LDTR_BASE);
1940 c->ldtr_arbytes.bytes = __vmread(GUEST_LDTR_AR_BYTES);
1943 static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
1945 unsigned long mfn, old_base_mfn;
1947 __vmwrite(GUEST_RIP, c->eip);
1948 __vmwrite(GUEST_RSP, c->esp);
1949 __vmwrite(GUEST_RFLAGS, c->eflags);
1951 v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
1952 __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
1954 if ( !vmx_paging_enabled(v) )
1955 goto skip_cr3;
1957 if ( c->cr3 == v->arch.hvm_vmx.cpu_cr3 )
1959 /*
1960 * This is simple TLB flush, implying the guest has
1961 * removed some translation or changed page attributes.
1962 * We simply invalidate the shadow.
1963 */
1964 mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
1965 if ( mfn != pagetable_get_pfn(v->arch.guest_table) )
1966 goto bad_cr3;
1968 else
1970 /*
1971 * If different, make a shadow. Check if the PDBR is valid
1972 * first.
1973 */
1974 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3);
1975 mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
1976 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
1977 goto bad_cr3;
1978 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1979 v->arch.guest_table = pagetable_from_pfn(mfn);
1980 if (old_base_mfn)
1981 put_page(mfn_to_page(old_base_mfn));
1982 v->arch.hvm_vmx.cpu_cr3 = c->cr3;
1985 skip_cr3:
1986 if ( !vmx_paging_enabled(v) )
1987 HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
1988 else
1989 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
1991 __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
1992 v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
1993 __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
1995 __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
1996 __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
1998 __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
1999 __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
2001 __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
2002 __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
2003 __vmwrite(GUEST_CS_BASE, c->cs_base);
2004 __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes.bytes);
2006 __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
2007 __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
2008 __vmwrite(GUEST_DS_BASE, c->ds_base);
2009 __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes.bytes);
2011 __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
2012 __vmwrite(GUEST_ES_LIMIT, c->es_limit);
2013 __vmwrite(GUEST_ES_BASE, c->es_base);
2014 __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes.bytes);
2016 __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
2017 __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
2018 __vmwrite(GUEST_SS_BASE, c->ss_base);
2019 __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes.bytes);
2021 __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
2022 __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
2023 __vmwrite(GUEST_FS_BASE, c->fs_base);
2024 __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes.bytes);
2026 __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
2027 __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
2028 __vmwrite(GUEST_GS_BASE, c->gs_base);
2029 __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes.bytes);
2031 __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
2032 __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
2033 __vmwrite(GUEST_TR_BASE, c->tr_base);
2034 __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes.bytes);
2036 __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
2037 __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
2038 __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
2039 __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes);
2041 paging_update_paging_modes(v);
2042 return 0;
2044 bad_cr3:
2045 gdprintk(XENLOG_ERR, "Invalid CR3 value=%x", c->cr3);
2046 return -EINVAL;
2049 enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE };
2051 static int vmx_assist(struct vcpu *v, int mode)
2053 struct vmx_assist_context c;
2054 struct hvm_hw_vpic *vpic = v->domain->arch.hvm_domain.vpic;
2055 u32 magic, cp;
2057 /* make sure vmxassist exists (this is not an error) */
2058 if (hvm_copy_from_guest_phys(&magic, VMXASSIST_MAGIC_OFFSET,
2059 sizeof(magic)))
2060 return 0;
2061 if (magic != VMXASSIST_MAGIC)
2062 return 0;
2064 switch (mode) {
2065 /*
2066 * Transfer control to vmxassist.
2067 * Store the current context in VMXASSIST_OLD_CONTEXT and load
2068 * the new VMXASSIST_NEW_CONTEXT context. This context was created
2069 * by vmxassist and will transfer control to it.
2070 */
2071 case VMX_ASSIST_INVOKE:
2072 /* save the old context */
2073 if (hvm_copy_from_guest_phys(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp)))
2074 goto error;
2075 if (cp != 0) {
2076 vmx_world_save(v, &c);
2077 if (hvm_copy_to_guest_phys(cp, &c, sizeof(c)))
2078 goto error;
2081 /* restore the new context, this should activate vmxassist */
2082 if (hvm_copy_from_guest_phys(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp)))
2083 goto error;
2084 if (cp != 0) {
2085 if (hvm_copy_from_guest_phys(&c, cp, sizeof(c)))
2086 goto error;
2087 if ( vmx_world_restore(v, &c) != 0 )
2088 goto error;
2089 v->arch.hvm_vmx.pm_irqbase[0] = vpic[0].irq_base;
2090 v->arch.hvm_vmx.pm_irqbase[1] = vpic[1].irq_base;
2091 vpic[0].irq_base = NR_EXCEPTION_HANDLER;
2092 vpic[1].irq_base = NR_EXCEPTION_HANDLER + 8;
2093 v->arch.hvm_vmx.vmxassist_enabled = 1;
2094 return 1;
2096 break;
2098 /*
2099 * Restore the VMXASSIST_OLD_CONTEXT that was saved by
2100 * VMX_ASSIST_INVOKE above.
2101 */
2102 case VMX_ASSIST_RESTORE:
2103 /* save the old context */
2104 if (hvm_copy_from_guest_phys(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp)))
2105 goto error;
2106 if (cp != 0) {
2107 if (hvm_copy_from_guest_phys(&c, cp, sizeof(c)))
2108 goto error;
2109 if ( vmx_world_restore(v, &c) != 0 )
2110 goto error;
2111 if ( v->arch.hvm_vmx.irqbase_mode ) {
2112 vpic[0].irq_base = c.rm_irqbase[0] & 0xf8;
2113 vpic[1].irq_base = c.rm_irqbase[1] & 0xf8;
2114 } else {
2115 vpic[0].irq_base = v->arch.hvm_vmx.pm_irqbase[0];
2116 vpic[1].irq_base = v->arch.hvm_vmx.pm_irqbase[1];
2118 v->arch.hvm_vmx.vmxassist_enabled = 0;
2119 return 1;
2121 break;
2124 error:
2125 gdprintk(XENLOG_ERR, "Failed to transfer to vmxassist\n");
2126 domain_crash(v->domain);
2127 return 0;
2130 static int vmx_set_cr0(unsigned long value)
2132 struct vcpu *v = current;
2133 unsigned long mfn;
2134 unsigned long eip;
2135 int paging_enabled;
2136 unsigned long vm_entry_value;
2137 unsigned long old_cr0;
2138 unsigned long old_base_mfn;
2140 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
2142 /* ET is reserved and should be always be 1. */
2143 value |= X86_CR0_ET;
2145 if ( (value & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG )
2147 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2148 return 0;
2151 /* TS cleared? Then initialise FPU now. */
2152 if ( !(value & X86_CR0_TS) )
2154 setup_fpu(v);
2155 __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
2158 old_cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
2159 paging_enabled = old_cr0 & X86_CR0_PG;
2161 v->arch.hvm_vmx.cpu_cr0 = (value | X86_CR0_PE | X86_CR0_PG
2162 | X86_CR0_NE | X86_CR0_WP);
2163 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
2165 v->arch.hvm_vmx.cpu_shadow_cr0 = value;
2166 __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
2168 if ( (value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled )
2170 /*
2171 * Trying to enable guest paging.
2172 * The guest CR3 must be pointing to the guest physical.
2173 */
2174 mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT);
2175 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
2177 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
2178 v->arch.hvm_vmx.cpu_cr3, mfn);
2179 domain_crash(v->domain);
2180 return 0;
2183 #if defined(__x86_64__)
2184 if ( vmx_lme_is_set(v) )
2186 if ( !(v->arch.hvm_vmx.cpu_shadow_cr4 & X86_CR4_PAE) )
2188 HVM_DBG_LOG(DBG_LEVEL_1, "Guest enabled paging "
2189 "with EFER.LME set but not CR4.PAE");
2190 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2192 else
2194 HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode");
2195 v->arch.hvm_vmx.efer |= EFER_LMA;
2196 vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
2197 vm_entry_value |= VM_ENTRY_IA32E_MODE;
2198 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
2201 #endif
2203 /*
2204 * Now arch.guest_table points to machine physical.
2205 */
2206 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
2207 v->arch.guest_table = pagetable_from_pfn(mfn);
2208 if (old_base_mfn)
2209 put_page(mfn_to_page(old_base_mfn));
2210 paging_update_paging_modes(v);
2212 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
2213 (unsigned long) (mfn << PAGE_SHIFT));
2215 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
2216 v->arch.hvm_vmx.cpu_cr3, mfn);
2219 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
2220 if ( v->arch.hvm_vmx.cpu_cr3 ) {
2221 put_page(mfn_to_page(get_mfn_from_gpfn(
2222 v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)));
2223 v->arch.guest_table = pagetable_null();
2226 /*
2227 * VMX does not implement real-mode virtualization. We emulate
2228 * real-mode by performing a world switch to VMXAssist whenever
2229 * a partition disables the CR0.PE bit.
2230 */
2231 if ( (value & X86_CR0_PE) == 0 )
2233 if ( value & X86_CR0_PG ) {
2234 /* inject GP here */
2235 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2236 return 0;
2237 } else {
2238 /*
2239 * Disable paging here.
2240 * Same to PE == 1 && PG == 0
2241 */
2242 if ( vmx_long_mode_enabled(v) )
2244 v->arch.hvm_vmx.efer &= ~EFER_LMA;
2245 vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
2246 vm_entry_value &= ~VM_ENTRY_IA32E_MODE;
2247 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
2251 if ( vmx_assist(v, VMX_ASSIST_INVOKE) )
2253 eip = __vmread(GUEST_RIP);
2254 HVM_DBG_LOG(DBG_LEVEL_1,
2255 "Transfering control to vmxassist %%eip 0x%lx", eip);
2256 return 0; /* do not update eip! */
2259 else if ( v->arch.hvm_vmx.vmxassist_enabled )
2261 eip = __vmread(GUEST_RIP);
2262 HVM_DBG_LOG(DBG_LEVEL_1,
2263 "Enabling CR0.PE at %%eip 0x%lx", eip);
2264 if ( vmx_assist(v, VMX_ASSIST_RESTORE) )
2266 HVM_DBG_LOG(DBG_LEVEL_1,
2267 "Restoring to %%eip 0x%lx", eip);
2268 return 0; /* do not update eip! */
2271 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
2273 if ( vmx_long_mode_enabled(v) )
2275 v->arch.hvm_vmx.efer &= ~EFER_LMA;
2276 vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
2277 vm_entry_value &= ~VM_ENTRY_IA32E_MODE;
2278 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
2280 paging_update_paging_modes(v);
2283 return 1;
2286 #define CASE_SET_REG(REG, reg) \
2287 case REG_ ## REG: regs->reg = value; break
2288 #define CASE_GET_REG(REG, reg) \
2289 case REG_ ## REG: value = regs->reg; break
2291 #define CASE_EXTEND_SET_REG \
2292 CASE_EXTEND_REG(S)
2293 #define CASE_EXTEND_GET_REG \
2294 CASE_EXTEND_REG(G)
2296 #ifdef __i386__
2297 #define CASE_EXTEND_REG(T)
2298 #else
2299 #define CASE_EXTEND_REG(T) \
2300 CASE_ ## T ## ET_REG(R8, r8); \
2301 CASE_ ## T ## ET_REG(R9, r9); \
2302 CASE_ ## T ## ET_REG(R10, r10); \
2303 CASE_ ## T ## ET_REG(R11, r11); \
2304 CASE_ ## T ## ET_REG(R12, r12); \
2305 CASE_ ## T ## ET_REG(R13, r13); \
2306 CASE_ ## T ## ET_REG(R14, r14); \
2307 CASE_ ## T ## ET_REG(R15, r15)
2308 #endif
2310 /*
2311 * Write to control registers
2312 */
2313 static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
2315 unsigned long value, old_cr, old_base_mfn, mfn;
2316 struct vcpu *v = current;
2317 struct vlapic *vlapic = vcpu_vlapic(v);
2319 switch ( gp )
2321 CASE_GET_REG(EAX, eax);
2322 CASE_GET_REG(ECX, ecx);
2323 CASE_GET_REG(EDX, edx);
2324 CASE_GET_REG(EBX, ebx);
2325 CASE_GET_REG(EBP, ebp);
2326 CASE_GET_REG(ESI, esi);
2327 CASE_GET_REG(EDI, edi);
2328 CASE_EXTEND_GET_REG;
2329 case REG_ESP:
2330 value = __vmread(GUEST_RSP);
2331 break;
2332 default:
2333 gdprintk(XENLOG_ERR, "invalid gp: %d\n", gp);
2334 goto exit_and_crash;
2337 HVMTRACE_2D(CR_WRITE, v, cr, value);
2339 HVM_DBG_LOG(DBG_LEVEL_1, "CR%d, value = %lx", cr, value);
2341 switch ( cr )
2343 case 0:
2344 return vmx_set_cr0(value);
2346 case 3:
2347 /*
2348 * If paging is not enabled yet, simply copy the value to CR3.
2349 */
2350 if (!vmx_paging_enabled(v)) {
2351 v->arch.hvm_vmx.cpu_cr3 = value;
2352 break;
2355 /*
2356 * We make a new one if the shadow does not exist.
2357 */
2358 if (value == v->arch.hvm_vmx.cpu_cr3) {
2359 /*
2360 * This is simple TLB flush, implying the guest has
2361 * removed some translation or changed page attributes.
2362 * We simply invalidate the shadow.
2363 */
2364 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
2365 if (mfn != pagetable_get_pfn(v->arch.guest_table))
2366 goto bad_cr3;
2367 paging_update_cr3(v);
2368 } else {
2369 /*
2370 * If different, make a shadow. Check if the PDBR is valid
2371 * first.
2372 */
2373 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
2374 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
2375 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
2376 goto bad_cr3;
2377 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
2378 v->arch.guest_table = pagetable_from_pfn(mfn);
2379 if (old_base_mfn)
2380 put_page(mfn_to_page(old_base_mfn));
2381 v->arch.hvm_vmx.cpu_cr3 = value;
2382 update_cr3(v);
2383 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
2385 break;
2387 case 4: /* CR4 */
2388 old_cr = v->arch.hvm_vmx.cpu_shadow_cr4;
2390 if ( value & X86_CR4_RESERVED_BITS )
2392 HVM_DBG_LOG(DBG_LEVEL_1,
2393 "Guest attempts to set reserved bit in CR4: %lx",
2394 value);
2395 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2396 break;
2399 if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) )
2401 if ( vmx_pgbit_test(v) )
2403 /* The guest is a 32-bit PAE guest. */
2404 #if CONFIG_PAGING_LEVELS >= 3
2405 unsigned long mfn, old_base_mfn;
2406 mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT);
2407 if ( !mfn_valid(mfn) ||
2408 !get_page(mfn_to_page(mfn), v->domain) )
2409 goto bad_cr3;
2411 /*
2412 * Now arch.guest_table points to machine physical.
2413 */
2415 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
2416 v->arch.guest_table = pagetable_from_pfn(mfn);
2417 if ( old_base_mfn )
2418 put_page(mfn_to_page(old_base_mfn));
2420 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
2421 (unsigned long) (mfn << PAGE_SHIFT));
2423 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2424 "Update CR3 value = %lx, mfn = %lx",
2425 v->arch.hvm_vmx.cpu_cr3, mfn);
2426 #endif
2429 else if ( !(value & X86_CR4_PAE) )
2431 if ( unlikely(vmx_long_mode_enabled(v)) )
2433 HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while "
2434 "EFER.LMA is set");
2435 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2439 __vmwrite(GUEST_CR4, value | HVM_CR4_HOST_MASK);
2440 v->arch.hvm_vmx.cpu_shadow_cr4 = value;
2441 __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
2443 /*
2444 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
2445 * all TLB entries except global entries.
2446 */
2447 if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
2448 paging_update_paging_modes(v);
2449 break;
2451 case 8:
2452 vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
2453 break;
2455 default:
2456 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
2457 domain_crash(v->domain);
2458 return 0;
2461 return 1;
2463 bad_cr3:
2464 gdprintk(XENLOG_ERR, "Invalid CR3\n");
2465 exit_and_crash:
2466 domain_crash(v->domain);
2467 return 0;
2470 /*
2471 * Read from control registers. CR0 and CR4 are read from the shadow.
2472 */
2473 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
2475 unsigned long value = 0;
2476 struct vcpu *v = current;
2477 struct vlapic *vlapic = vcpu_vlapic(v);
2479 switch ( cr )
2481 case 3:
2482 value = (unsigned long)v->arch.hvm_vmx.cpu_cr3;
2483 break;
2484 case 8:
2485 value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
2486 value = (value & 0xF0) >> 4;
2487 break;
2488 default:
2489 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
2490 domain_crash(v->domain);
2491 break;
2494 switch ( gp ) {
2495 CASE_SET_REG(EAX, eax);
2496 CASE_SET_REG(ECX, ecx);
2497 CASE_SET_REG(EDX, edx);
2498 CASE_SET_REG(EBX, ebx);
2499 CASE_SET_REG(EBP, ebp);
2500 CASE_SET_REG(ESI, esi);
2501 CASE_SET_REG(EDI, edi);
2502 CASE_EXTEND_SET_REG;
2503 case REG_ESP:
2504 __vmwrite(GUEST_RSP, value);
2505 regs->esp = value;
2506 break;
2507 default:
2508 printk("invalid gp: %d\n", gp);
2509 domain_crash(v->domain);
2510 break;
2513 HVMTRACE_2D(CR_READ, v, cr, value);
2515 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%d, value = %lx", cr, value);
2518 static int vmx_cr_access(unsigned long exit_qualification,
2519 struct cpu_user_regs *regs)
2521 unsigned int gp, cr;
2522 unsigned long value;
2523 struct vcpu *v = current;
2525 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
2526 case TYPE_MOV_TO_CR:
2527 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
2528 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
2529 return mov_to_cr(gp, cr, regs);
2530 case TYPE_MOV_FROM_CR:
2531 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
2532 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
2533 mov_from_cr(cr, gp, regs);
2534 break;
2535 case TYPE_CLTS:
2536 /* We initialise the FPU now, to avoid needing another vmexit. */
2537 setup_fpu(v);
2538 __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
2540 v->arch.hvm_vmx.cpu_cr0 &= ~X86_CR0_TS; /* clear TS */
2541 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
2543 v->arch.hvm_vmx.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
2544 __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
2545 break;
2546 case TYPE_LMSW:
2547 value = v->arch.hvm_vmx.cpu_shadow_cr0;
2548 value = (value & ~0xF) |
2549 (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
2550 return vmx_set_cr0(value);
2551 default:
2552 BUG();
2555 return 1;
2558 static inline int vmx_do_msr_read(struct cpu_user_regs *regs)
2560 u64 msr_content = 0;
2561 u32 ecx = regs->ecx, eax, edx;
2562 struct vcpu *v = current;
2564 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
2566 switch ( ecx )
2568 case MSR_IA32_TIME_STAMP_COUNTER:
2569 msr_content = hvm_get_guest_time(v);
2570 break;
2571 case MSR_IA32_SYSENTER_CS:
2572 msr_content = (u32)__vmread(GUEST_SYSENTER_CS);
2573 break;
2574 case MSR_IA32_SYSENTER_ESP:
2575 msr_content = __vmread(GUEST_SYSENTER_ESP);
2576 break;
2577 case MSR_IA32_SYSENTER_EIP:
2578 msr_content = __vmread(GUEST_SYSENTER_EIP);
2579 break;
2580 case MSR_IA32_APICBASE:
2581 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
2582 break;
2583 case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
2584 goto gp_fault;
2585 default:
2586 if ( long_mode_do_msr_read(regs) )
2587 goto done;
2589 if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
2590 rdmsr_safe(ecx, eax, edx) == 0 )
2592 regs->eax = eax;
2593 regs->edx = edx;
2594 goto done;
2597 goto gp_fault;
2600 regs->eax = msr_content & 0xFFFFFFFF;
2601 regs->edx = msr_content >> 32;
2603 done:
2604 HVMTRACE_2D(MSR_READ, v, ecx, msr_content);
2605 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
2606 ecx, (unsigned long)regs->eax,
2607 (unsigned long)regs->edx);
2608 return 1;
2610 gp_fault:
2611 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2612 return 0;
2615 static int vmx_alloc_vlapic_mapping(struct domain *d)
2617 void *apic_va;
2619 if ( !cpu_has_vmx_virtualize_apic_accesses )
2620 return 0;
2622 apic_va = alloc_xenheap_page();
2623 if ( apic_va == NULL )
2624 return -ENOMEM;
2625 share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
2626 guest_physmap_add_page(
2627 d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), virt_to_mfn(apic_va));
2628 d->arch.hvm_domain.vmx_apic_access_mfn = virt_to_mfn(apic_va);
2630 return 0;
2633 static void vmx_free_vlapic_mapping(struct domain *d)
2635 unsigned long mfn = d->arch.hvm_domain.vmx_apic_access_mfn;
2636 if ( mfn != 0 )
2637 free_xenheap_page(mfn_to_virt(mfn));
2640 static void vmx_install_vlapic_mapping(struct vcpu *v)
2642 paddr_t virt_page_ma, apic_page_ma;
2644 if ( !cpu_has_vmx_virtualize_apic_accesses )
2645 return;
2647 virt_page_ma = page_to_maddr(vcpu_vlapic(v)->regs_page);
2648 apic_page_ma = v->domain->arch.hvm_domain.vmx_apic_access_mfn;
2649 apic_page_ma <<= PAGE_SHIFT;
2651 vmx_vmcs_enter(v);
2652 __vmwrite(VIRTUAL_APIC_PAGE_ADDR, virt_page_ma);
2653 __vmwrite(APIC_ACCESS_ADDR, apic_page_ma);
2654 #if defined (CONFIG_X86_PAE)
2655 __vmwrite(VIRTUAL_APIC_PAGE_ADDR_HIGH, virt_page_ma >> 32);
2656 __vmwrite(APIC_ACCESS_ADDR_HIGH, apic_page_ma >> 32);
2657 #endif
2658 vmx_vmcs_exit(v);
2661 void vmx_vlapic_msr_changed(struct vcpu *v)
2663 struct vlapic *vlapic = vcpu_vlapic(v);
2664 uint32_t ctl;
2666 if ( !cpu_has_vmx_virtualize_apic_accesses )
2667 return;
2669 vmx_vmcs_enter(v);
2670 ctl = __vmread(SECONDARY_VM_EXEC_CONTROL);
2671 ctl &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
2672 if ( !vlapic_hw_disabled(vlapic) &&
2673 (vlapic_base_address(vlapic) == APIC_DEFAULT_PHYS_BASE) )
2674 ctl |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
2675 __vmwrite(SECONDARY_VM_EXEC_CONTROL, ctl);
2676 vmx_vmcs_exit(v);
2679 static inline int vmx_do_msr_write(struct cpu_user_regs *regs)
2681 u32 ecx = regs->ecx;
2682 u64 msr_content;
2683 struct vcpu *v = current;
2685 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
2686 ecx, (u32)regs->eax, (u32)regs->edx);
2688 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
2689 HVMTRACE_2D(MSR_WRITE, v, ecx, msr_content);
2691 switch ( ecx )
2693 case MSR_IA32_TIME_STAMP_COUNTER:
2694 hvm_set_guest_time(v, msr_content);
2695 pt_reset(v);
2696 break;
2697 case MSR_IA32_SYSENTER_CS:
2698 __vmwrite(GUEST_SYSENTER_CS, msr_content);
2699 break;
2700 case MSR_IA32_SYSENTER_ESP:
2701 __vmwrite(GUEST_SYSENTER_ESP, msr_content);
2702 break;
2703 case MSR_IA32_SYSENTER_EIP:
2704 __vmwrite(GUEST_SYSENTER_EIP, msr_content);
2705 break;
2706 case MSR_IA32_APICBASE:
2707 vlapic_msr_set(vcpu_vlapic(v), msr_content);
2708 break;
2709 case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
2710 goto gp_fault;
2711 default:
2712 if ( !long_mode_do_msr_write(regs) )
2713 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
2714 break;
2717 return 1;
2719 gp_fault:
2720 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2721 return 0;
2724 static void vmx_do_hlt(void)
2726 unsigned long rflags;
2727 HVMTRACE_0D(HLT, current);
2728 rflags = __vmread(GUEST_RFLAGS);
2729 hvm_hlt(rflags);
2732 static inline void vmx_do_extint(struct cpu_user_regs *regs)
2734 unsigned int vector;
2736 asmlinkage void do_IRQ(struct cpu_user_regs *);
2737 fastcall void smp_apic_timer_interrupt(struct cpu_user_regs *);
2738 fastcall void smp_event_check_interrupt(void);
2739 fastcall void smp_invalidate_interrupt(void);
2740 fastcall void smp_call_function_interrupt(void);
2741 fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
2742 fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
2743 #ifdef CONFIG_X86_MCE_P4THERMAL
2744 fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
2745 #endif
2747 vector = __vmread(VM_EXIT_INTR_INFO);
2748 BUG_ON(!(vector & INTR_INFO_VALID_MASK));
2750 vector &= INTR_INFO_VECTOR_MASK;
2751 HVMTRACE_1D(INTR, current, vector);
2753 switch(vector) {
2754 case LOCAL_TIMER_VECTOR:
2755 smp_apic_timer_interrupt(regs);
2756 break;
2757 case EVENT_CHECK_VECTOR:
2758 smp_event_check_interrupt();
2759 break;
2760 case INVALIDATE_TLB_VECTOR:
2761 smp_invalidate_interrupt();
2762 break;
2763 case CALL_FUNCTION_VECTOR:
2764 smp_call_function_interrupt();
2765 break;
2766 case SPURIOUS_APIC_VECTOR:
2767 smp_spurious_interrupt(regs);
2768 break;
2769 case ERROR_APIC_VECTOR:
2770 smp_error_interrupt(regs);
2771 break;
2772 #ifdef CONFIG_X86_MCE_P4THERMAL
2773 case THERMAL_APIC_VECTOR:
2774 smp_thermal_interrupt(regs);
2775 break;
2776 #endif
2777 default:
2778 regs->entry_vector = vector;
2779 do_IRQ(regs);
2780 break;
2784 static void vmx_reflect_exception(struct vcpu *v)
2786 int error_code, intr_info, vector;
2788 intr_info = __vmread(VM_EXIT_INTR_INFO);
2789 vector = intr_info & 0xff;
2790 if ( intr_info & INTR_INFO_DELIVER_CODE_MASK )
2791 error_code = __vmread(VM_EXIT_INTR_ERROR_CODE);
2792 else
2793 error_code = VMX_DELIVER_NO_ERROR_CODE;
2795 #ifndef NDEBUG
2797 unsigned long rip;
2799 rip = __vmread(GUEST_RIP);
2800 HVM_DBG_LOG(DBG_LEVEL_1, "rip = %lx, error_code = %x",
2801 rip, error_code);
2803 #endif /* NDEBUG */
2805 /*
2806 * According to Intel Virtualization Technology Specification for
2807 * the IA-32 Intel Architecture (C97063-002 April 2005), section
2808 * 2.8.3, SW_EXCEPTION should be used for #BP and #OV, and
2809 * HW_EXCEPTION used for everything else. The main difference
2810 * appears to be that for SW_EXCEPTION, the EIP/RIP is incremented
2811 * by VM_ENTER_INSTRUCTION_LEN bytes, whereas for HW_EXCEPTION,
2812 * it is not.
2813 */
2814 if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_SW_EXCEPTION )
2816 int ilen = __get_instruction_length(); /* Safe: software exception */
2817 vmx_inject_sw_exception(v, vector, ilen);
2819 else
2821 vmx_inject_hw_exception(v, vector, error_code);
2825 static void vmx_failed_vmentry(unsigned int exit_reason,
2826 struct cpu_user_regs *regs)
2828 unsigned int failed_vmentry_reason = (uint16_t)exit_reason;
2829 unsigned long exit_qualification;
2831 exit_qualification = __vmread(EXIT_QUALIFICATION);
2832 printk("Failed vm entry (exit reason 0x%x) ", exit_reason);
2833 switch ( failed_vmentry_reason )
2835 case EXIT_REASON_INVALID_GUEST_STATE:
2836 printk("caused by invalid guest state (%ld).\n", exit_qualification);
2837 break;
2838 case EXIT_REASON_MSR_LOADING:
2839 printk("caused by MSR entry %ld loading.\n", exit_qualification);
2840 break;
2841 case EXIT_REASON_MACHINE_CHECK:
2842 printk("caused by machine check.\n");
2843 HVMTRACE_0D(MCE, current);
2844 vmx_store_cpu_guest_regs(current, regs, NULL);
2845 do_machine_check(regs);
2846 break;
2847 default:
2848 printk("reason not known yet!");
2849 break;
2852 printk("************* VMCS Area **************\n");
2853 vmcs_dump_vcpu();
2854 printk("**************************************\n");
2856 domain_crash(current->domain);
2859 asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
2861 unsigned int exit_reason;
2862 unsigned long exit_qualification, inst_len = 0;
2863 struct vcpu *v = current;
2865 exit_reason = __vmread(VM_EXIT_REASON);
2867 HVMTRACE_2D(VMEXIT, v, __vmread(GUEST_RIP), exit_reason);
2869 perfc_incra(vmexits, exit_reason);
2871 if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT )
2872 local_irq_enable();
2874 if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
2875 return vmx_failed_vmentry(exit_reason, regs);
2877 switch ( exit_reason )
2879 case EXIT_REASON_EXCEPTION_NMI:
2881 /*
2882 * We don't set the software-interrupt exiting (INT n).
2883 * (1) We can get an exception (e.g. #PG) in the guest, or
2884 * (2) NMI
2885 */
2886 unsigned int intr_info, vector;
2888 intr_info = __vmread(VM_EXIT_INTR_INFO);
2889 BUG_ON(!(intr_info & INTR_INFO_VALID_MASK));
2891 vector = intr_info & INTR_INFO_VECTOR_MASK;
2893 perfc_incra(cause_vector, vector);
2895 switch ( vector )
2897 case TRAP_debug:
2898 case TRAP_int3:
2899 if ( !v->domain->debugger_attached )
2900 goto exit_and_crash;
2901 domain_pause_for_debugger();
2902 break;
2903 case TRAP_no_device:
2904 vmx_do_no_device_fault();
2905 break;
2906 case TRAP_page_fault:
2907 exit_qualification = __vmread(EXIT_QUALIFICATION);
2908 regs->error_code = __vmread(VM_EXIT_INTR_ERROR_CODE);
2910 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2911 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2912 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2913 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2914 (unsigned long)regs->esi, (unsigned long)regs->edi);
2916 if ( paging_fault(exit_qualification, regs) )
2918 HVMTRACE_2D(PF_XEN, v, exit_qualification, regs->error_code);
2919 break;
2922 v->arch.hvm_vmx.cpu_cr2 = exit_qualification;
2923 vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
2924 break;
2925 case TRAP_nmi:
2926 if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI )
2928 HVMTRACE_0D(NMI, v);
2929 vmx_store_cpu_guest_regs(v, regs, NULL);
2930 do_nmi(regs); /* Real NMI, vector 2: normal processing. */
2932 else
2933 vmx_reflect_exception(v);
2934 break;
2935 case TRAP_machine_check:
2936 HVMTRACE_0D(MCE, v);
2937 vmx_store_cpu_guest_regs(v, regs, NULL);
2938 do_machine_check(regs);
2939 break;
2940 default:
2941 goto exit_and_crash;
2943 break;
2945 case EXIT_REASON_EXTERNAL_INTERRUPT:
2946 vmx_do_extint(regs);
2947 break;
2948 case EXIT_REASON_TRIPLE_FAULT:
2949 hvm_triple_fault();
2950 break;
2951 case EXIT_REASON_PENDING_VIRT_INTR:
2952 /* Disable the interrupt window. */
2953 v->arch.hvm_vcpu.u.vmx.exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
2954 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
2955 v->arch.hvm_vcpu.u.vmx.exec_control);
2956 break;
2957 case EXIT_REASON_PENDING_VIRT_NMI:
2958 /* Disable the NMI window. */
2959 v->arch.hvm_vcpu.u.vmx.exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
2960 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
2961 v->arch.hvm_vcpu.u.vmx.exec_control);
2962 break;
2963 case EXIT_REASON_TASK_SWITCH:
2964 goto exit_and_crash;
2965 case EXIT_REASON_CPUID:
2966 inst_len = __get_instruction_length(); /* Safe: CPUID */
2967 __update_guest_eip(inst_len);
2968 vmx_do_cpuid(regs);
2969 break;
2970 case EXIT_REASON_HLT:
2971 inst_len = __get_instruction_length(); /* Safe: HLT */
2972 __update_guest_eip(inst_len);
2973 vmx_do_hlt();
2974 break;
2975 case EXIT_REASON_INVLPG:
2977 inst_len = __get_instruction_length(); /* Safe: INVLPG */
2978 __update_guest_eip(inst_len);
2979 exit_qualification = __vmread(EXIT_QUALIFICATION);
2980 vmx_do_invlpg(exit_qualification);
2981 break;
2983 case EXIT_REASON_VMCALL:
2985 int rc;
2986 HVMTRACE_1D(VMMCALL, v, regs->eax);
2987 inst_len = __get_instruction_length(); /* Safe: VMCALL */
2988 rc = hvm_do_hypercall(regs);
2989 if ( rc != HVM_HCALL_preempted )
2991 __update_guest_eip(inst_len);
2992 if ( rc == HVM_HCALL_invalidate )
2993 send_invalidate_req();
2995 break;
2997 case EXIT_REASON_CR_ACCESS:
2999 exit_qualification = __vmread(EXIT_QUALIFICATION);
3000 inst_len = __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */
3001 if ( vmx_cr_access(exit_qualification, regs) )
3002 __update_guest_eip(inst_len);
3003 break;
3005 case EXIT_REASON_DR_ACCESS:
3006 exit_qualification = __vmread(EXIT_QUALIFICATION);
3007 vmx_dr_access(exit_qualification, regs);
3008 break;
3009 case EXIT_REASON_IO_INSTRUCTION:
3010 exit_qualification = __vmread(EXIT_QUALIFICATION);
3011 inst_len = __get_instruction_length(); /* Safe: IN, INS, OUT, OUTS */
3012 vmx_io_instruction(exit_qualification, inst_len);
3013 break;
3014 case EXIT_REASON_MSR_READ:
3015 inst_len = __get_instruction_length(); /* Safe: RDMSR */
3016 if ( vmx_do_msr_read(regs) )
3017 __update_guest_eip(inst_len);
3018 break;
3019 case EXIT_REASON_MSR_WRITE:
3020 inst_len = __get_instruction_length(); /* Safe: WRMSR */
3021 if ( vmx_do_msr_write(regs) )
3022 __update_guest_eip(inst_len);
3023 break;
3024 case EXIT_REASON_MWAIT_INSTRUCTION:
3025 case EXIT_REASON_MONITOR_INSTRUCTION:
3026 case EXIT_REASON_PAUSE_INSTRUCTION:
3027 goto exit_and_crash;
3028 case EXIT_REASON_VMCLEAR:
3029 case EXIT_REASON_VMLAUNCH:
3030 case EXIT_REASON_VMPTRLD:
3031 case EXIT_REASON_VMPTRST:
3032 case EXIT_REASON_VMREAD:
3033 case EXIT_REASON_VMRESUME:
3034 case EXIT_REASON_VMWRITE:
3035 case EXIT_REASON_VMXOFF:
3036 case EXIT_REASON_VMXON:
3037 /* Report invalid opcode exception when a VMX guest tries to execute
3038 any of the VMX instructions */
3039 vmx_inject_hw_exception(v, TRAP_invalid_op, VMX_DELIVER_NO_ERROR_CODE);
3040 break;
3042 case EXIT_REASON_TPR_BELOW_THRESHOLD:
3043 break;
3045 case EXIT_REASON_APIC_ACCESS:
3047 unsigned long offset;
3048 exit_qualification = __vmread(EXIT_QUALIFICATION);
3049 offset = exit_qualification & 0x0fffUL;
3050 handle_mmio(APIC_DEFAULT_PHYS_BASE | offset);
3051 break;
3054 default:
3055 exit_and_crash:
3056 gdprintk(XENLOG_ERR, "Bad vmexit (reason %x)\n", exit_reason);
3057 domain_crash(v->domain);
3058 break;
3062 asmlinkage void vmx_trace_vmentry(void)
3064 struct vcpu *v = current;
3065 HVMTRACE_0D(VMENTRY, v);
3068 /*
3069 * Local variables:
3070 * mode: C
3071 * c-set-style: "BSD"
3072 * c-basic-offset: 4
3073 * tab-width: 4
3074 * indent-tabs-mode: nil
3075 * End:
3076 */