direct-io.hg

view xen/arch/x86/hvm/vmx/vmx.c @ 15388:50358c4b37f4

hvm: Support injection of virtual NMIs and clean up ExtInt handling in general.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Jun 20 11:50:16 2007 +0100 (2007-06-20)
parents 739d698986e9
children 3624ba0caccc
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/domain_page.h>
28 #include <xen/hypercall.h>
29 #include <xen/perfc.h>
30 #include <asm/current.h>
31 #include <asm/io.h>
32 #include <asm/regs.h>
33 #include <asm/cpufeature.h>
34 #include <asm/processor.h>
35 #include <asm/types.h>
36 #include <asm/msr.h>
37 #include <asm/spinlock.h>
38 #include <asm/paging.h>
39 #include <asm/p2m.h>
40 #include <asm/hvm/hvm.h>
41 #include <asm/hvm/support.h>
42 #include <asm/hvm/vmx/vmx.h>
43 #include <asm/hvm/vmx/vmcs.h>
44 #include <asm/hvm/vmx/cpu.h>
45 #include <public/sched.h>
46 #include <public/hvm/ioreq.h>
47 #include <asm/hvm/vpic.h>
48 #include <asm/hvm/vlapic.h>
49 #include <asm/x86_emulate.h>
50 #include <asm/hvm/vpt.h>
51 #include <public/hvm/save.h>
52 #include <asm/hvm/trace.h>
54 char *vmx_msr_bitmap;
56 static void vmx_ctxt_switch_from(struct vcpu *v);
57 static void vmx_ctxt_switch_to(struct vcpu *v);
59 static int vmx_alloc_vlapic_mapping(struct domain *d);
60 static void vmx_free_vlapic_mapping(struct domain *d);
61 static void vmx_install_vlapic_mapping(struct vcpu *v);
63 static int vmx_domain_initialise(struct domain *d)
64 {
65 return vmx_alloc_vlapic_mapping(d);
66 }
68 static void vmx_domain_destroy(struct domain *d)
69 {
70 vmx_free_vlapic_mapping(d);
71 }
73 static int vmx_vcpu_initialise(struct vcpu *v)
74 {
75 int rc;
77 spin_lock_init(&v->arch.hvm_vmx.vmcs_lock);
79 v->arch.schedule_tail = vmx_do_resume;
80 v->arch.ctxt_switch_from = vmx_ctxt_switch_from;
81 v->arch.ctxt_switch_to = vmx_ctxt_switch_to;
83 if ( (rc = vmx_create_vmcs(v)) != 0 )
84 {
85 dprintk(XENLOG_WARNING,
86 "Failed to create VMCS for vcpu %d: err=%d.\n",
87 v->vcpu_id, rc);
88 return rc;
89 }
91 vmx_install_vlapic_mapping(v);
93 return 0;
94 }
96 static void vmx_vcpu_destroy(struct vcpu *v)
97 {
98 vmx_destroy_vmcs(v);
99 }
101 #ifdef __x86_64__
103 static DEFINE_PER_CPU(struct vmx_msr_state, host_msr_state);
105 static u32 msr_index[VMX_MSR_COUNT] =
106 {
107 MSR_LSTAR, MSR_STAR, MSR_SYSCALL_MASK
108 };
110 static void vmx_save_host_msrs(void)
111 {
112 struct vmx_msr_state *host_msr_state = &this_cpu(host_msr_state);
113 int i;
115 for ( i = 0; i < VMX_MSR_COUNT; i++ )
116 rdmsrl(msr_index[i], host_msr_state->msrs[i]);
117 }
119 #define WRITE_MSR(address) \
120 guest_msr_state->msrs[VMX_INDEX_MSR_ ## address] = msr_content; \
121 set_bit(VMX_INDEX_MSR_ ## address, &guest_msr_state->flags); \
122 wrmsrl(MSR_ ## address, msr_content); \
123 set_bit(VMX_INDEX_MSR_ ## address, &host_msr_state->flags); \
124 break
126 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
127 {
128 u64 msr_content = 0;
129 u32 ecx = regs->ecx;
130 struct vcpu *v = current;
131 struct vmx_msr_state *guest_msr_state = &v->arch.hvm_vmx.msr_state;
133 switch ( ecx ) {
134 case MSR_EFER:
135 msr_content = v->arch.hvm_vmx.efer;
136 break;
138 case MSR_FS_BASE:
139 msr_content = __vmread(GUEST_FS_BASE);
140 goto check_long_mode;
142 case MSR_GS_BASE:
143 msr_content = __vmread(GUEST_GS_BASE);
144 goto check_long_mode;
146 case MSR_SHADOW_GS_BASE:
147 msr_content = v->arch.hvm_vmx.shadow_gs;
148 check_long_mode:
149 if ( !(vmx_long_mode_enabled(v)) )
150 {
151 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
152 return 0;
153 }
154 break;
156 case MSR_STAR:
157 msr_content = guest_msr_state->msrs[VMX_INDEX_MSR_STAR];
158 break;
160 case MSR_LSTAR:
161 msr_content = guest_msr_state->msrs[VMX_INDEX_MSR_LSTAR];
162 break;
164 case MSR_CSTAR:
165 msr_content = v->arch.hvm_vmx.cstar;
166 break;
168 case MSR_SYSCALL_MASK:
169 msr_content = guest_msr_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
170 break;
172 default:
173 return 0;
174 }
176 HVM_DBG_LOG(DBG_LEVEL_0, "msr 0x%x content 0x%"PRIx64, ecx, msr_content);
178 regs->eax = (u32)(msr_content >> 0);
179 regs->edx = (u32)(msr_content >> 32);
181 return 1;
182 }
184 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
185 {
186 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
187 u32 ecx = regs->ecx;
188 struct vcpu *v = current;
189 struct vmx_msr_state *guest_msr_state = &v->arch.hvm_vmx.msr_state;
190 struct vmx_msr_state *host_msr_state = &this_cpu(host_msr_state);
192 HVM_DBG_LOG(DBG_LEVEL_0, "msr 0x%x content 0x%"PRIx64, ecx, msr_content);
194 switch ( ecx )
195 {
196 case MSR_EFER:
197 /* offending reserved bit will cause #GP */
198 if ( (msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE)) ||
199 (!cpu_has_nx && (msr_content & EFER_NX)) ||
200 (!cpu_has_syscall && (msr_content & EFER_SCE)) )
201 {
202 gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
203 "EFER: %"PRIx64"\n", msr_content);
204 goto gp_fault;
205 }
207 if ( (msr_content & EFER_LME)
208 && !(v->arch.hvm_vmx.efer & EFER_LME) )
209 {
210 if ( unlikely(vmx_paging_enabled(v)) )
211 {
212 gdprintk(XENLOG_WARNING,
213 "Trying to set EFER.LME with paging enabled\n");
214 goto gp_fault;
215 }
216 }
217 else if ( !(msr_content & EFER_LME)
218 && (v->arch.hvm_vmx.efer & EFER_LME) )
219 {
220 if ( unlikely(vmx_paging_enabled(v)) )
221 {
222 gdprintk(XENLOG_WARNING,
223 "Trying to clear EFER.LME with paging enabled\n");
224 goto gp_fault;
225 }
226 }
228 if ( (msr_content ^ v->arch.hvm_vmx.efer) & (EFER_NX|EFER_SCE) )
229 write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |
230 (msr_content & (EFER_NX|EFER_SCE)));
232 v->arch.hvm_vmx.efer = msr_content;
233 break;
235 case MSR_FS_BASE:
236 case MSR_GS_BASE:
237 case MSR_SHADOW_GS_BASE:
238 if ( !vmx_long_mode_enabled(v) )
239 goto gp_fault;
241 if ( !is_canonical_address(msr_content) )
242 goto uncanonical_address;
244 if ( ecx == MSR_FS_BASE )
245 __vmwrite(GUEST_FS_BASE, msr_content);
246 else if ( ecx == MSR_GS_BASE )
247 __vmwrite(GUEST_GS_BASE, msr_content);
248 else
249 {
250 v->arch.hvm_vmx.shadow_gs = msr_content;
251 wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
252 }
254 break;
256 case MSR_STAR:
257 WRITE_MSR(STAR);
259 case MSR_LSTAR:
260 if ( !is_canonical_address(msr_content) )
261 goto uncanonical_address;
262 WRITE_MSR(LSTAR);
264 case MSR_CSTAR:
265 if ( !is_canonical_address(msr_content) )
266 goto uncanonical_address;
267 v->arch.hvm_vmx.cstar = msr_content;
268 break;
270 case MSR_SYSCALL_MASK:
271 WRITE_MSR(SYSCALL_MASK);
273 default:
274 return 0;
275 }
277 return 1;
279 uncanonical_address:
280 HVM_DBG_LOG(DBG_LEVEL_0, "Not cano address of msr write %x", ecx);
281 gp_fault:
282 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
283 return 0;
284 }
286 /*
287 * To avoid MSR save/restore at every VM exit/entry time, we restore
288 * the x86_64 specific MSRs at domain switch time. Since these MSRs
289 * are not modified once set for para domains, we don't save them,
290 * but simply reset them to values set in percpu_traps_init().
291 */
292 static void vmx_restore_host_msrs(void)
293 {
294 struct vmx_msr_state *host_msr_state = &this_cpu(host_msr_state);
295 int i;
297 while ( host_msr_state->flags )
298 {
299 i = find_first_set_bit(host_msr_state->flags);
300 wrmsrl(msr_index[i], host_msr_state->msrs[i]);
301 clear_bit(i, &host_msr_state->flags);
302 }
303 if ( cpu_has_nx && !(read_efer() & EFER_NX) )
304 write_efer(read_efer() | EFER_NX);
305 }
307 static void vmx_save_guest_msrs(struct vcpu *v)
308 {
309 /* MSR_SHADOW_GS_BASE may have been changed by swapgs instruction. */
310 rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs);
311 }
313 static void vmx_restore_guest_msrs(struct vcpu *v)
314 {
315 struct vmx_msr_state *guest_msr_state, *host_msr_state;
316 unsigned long guest_flags;
317 int i;
319 guest_msr_state = &v->arch.hvm_vmx.msr_state;
320 host_msr_state = &this_cpu(host_msr_state);
322 wrmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs);
324 guest_flags = guest_msr_state->flags;
326 while ( guest_flags ) {
327 i = find_first_set_bit(guest_flags);
329 HVM_DBG_LOG(DBG_LEVEL_2,
330 "restore guest's index %d msr %x with value %lx",
331 i, msr_index[i], guest_msr_state->msrs[i]);
332 set_bit(i, &host_msr_state->flags);
333 wrmsrl(msr_index[i], guest_msr_state->msrs[i]);
334 clear_bit(i, &guest_flags);
335 }
337 if ( (v->arch.hvm_vmx.efer ^ read_efer()) & (EFER_NX|EFER_SCE) )
338 {
339 HVM_DBG_LOG(DBG_LEVEL_2,
340 "restore guest's EFER with value %lx",
341 v->arch.hvm_vmx.efer);
342 write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |
343 (v->arch.hvm_vmx.efer & (EFER_NX|EFER_SCE)));
344 }
345 }
347 #else /* __i386__ */
349 #define vmx_save_host_msrs() ((void)0)
351 static void vmx_restore_host_msrs(void)
352 {
353 if ( cpu_has_nx && !(read_efer() & EFER_NX) )
354 write_efer(read_efer() | EFER_NX);
355 }
357 #define vmx_save_guest_msrs(v) ((void)0)
359 static void vmx_restore_guest_msrs(struct vcpu *v)
360 {
361 if ( (v->arch.hvm_vmx.efer ^ read_efer()) & EFER_NX )
362 {
363 HVM_DBG_LOG(DBG_LEVEL_2,
364 "restore guest's EFER with value %lx",
365 v->arch.hvm_vmx.efer);
366 write_efer((read_efer() & ~EFER_NX) |
367 (v->arch.hvm_vmx.efer & EFER_NX));
368 }
369 }
371 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
372 {
373 u64 msr_content = 0;
374 struct vcpu *v = current;
376 switch ( regs->ecx ) {
377 case MSR_EFER:
378 msr_content = v->arch.hvm_vmx.efer;
379 break;
381 default:
382 return 0;
383 }
385 regs->eax = msr_content >> 0;
386 regs->edx = msr_content >> 32;
388 return 1;
389 }
391 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
392 {
393 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
394 struct vcpu *v = current;
396 switch ( regs->ecx )
397 {
398 case MSR_EFER:
399 /* offending reserved bit will cause #GP */
400 if ( (msr_content & ~EFER_NX) ||
401 (!cpu_has_nx && (msr_content & EFER_NX)) )
402 {
403 gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
404 "EFER: %"PRIx64"\n", msr_content);
405 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
406 return 0;
407 }
409 if ( (msr_content ^ v->arch.hvm_vmx.efer) & EFER_NX )
410 write_efer((read_efer() & ~EFER_NX) | (msr_content & EFER_NX));
412 v->arch.hvm_vmx.efer = msr_content;
413 break;
415 default:
416 return 0;
417 }
419 return 1;
420 }
422 #endif /* __i386__ */
424 #define loaddebug(_v,_reg) \
425 __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
426 #define savedebug(_v,_reg) \
427 __asm__ __volatile__ ("mov %%db" #_reg ",%0" : : "r" ((_v)->debugreg[_reg]))
429 static inline void vmx_save_dr(struct vcpu *v)
430 {
431 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
432 return;
434 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
435 v->arch.hvm_vcpu.flag_dr_dirty = 0;
436 v->arch.hvm_vcpu.u.vmx.exec_control |= CPU_BASED_MOV_DR_EXITING;
437 __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vcpu.u.vmx.exec_control);
439 savedebug(&v->arch.guest_context, 0);
440 savedebug(&v->arch.guest_context, 1);
441 savedebug(&v->arch.guest_context, 2);
442 savedebug(&v->arch.guest_context, 3);
443 savedebug(&v->arch.guest_context, 6);
444 v->arch.guest_context.debugreg[7] = __vmread(GUEST_DR7);
445 }
447 static inline void __restore_debug_registers(struct vcpu *v)
448 {
449 loaddebug(&v->arch.guest_context, 0);
450 loaddebug(&v->arch.guest_context, 1);
451 loaddebug(&v->arch.guest_context, 2);
452 loaddebug(&v->arch.guest_context, 3);
453 /* No 4 and 5 */
454 loaddebug(&v->arch.guest_context, 6);
455 /* DR7 is loaded from the VMCS. */
456 }
458 int vmx_vmcs_save(struct vcpu *v, struct hvm_hw_cpu *c)
459 {
460 uint32_t ev;
462 c->rip = __vmread(GUEST_RIP);
463 c->rsp = __vmread(GUEST_RSP);
464 c->rflags = __vmread(GUEST_RFLAGS);
466 c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
467 c->cr2 = v->arch.hvm_vmx.cpu_cr2;
468 c->cr3 = v->arch.hvm_vmx.cpu_cr3;
469 c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
471 #ifdef HVM_DEBUG_SUSPEND
472 printk("vmx_vmcs_save: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
473 c->cr3,
474 c->cr0,
475 c->cr4);
476 #endif
478 c->idtr_limit = __vmread(GUEST_IDTR_LIMIT);
479 c->idtr_base = __vmread(GUEST_IDTR_BASE);
481 c->gdtr_limit = __vmread(GUEST_GDTR_LIMIT);
482 c->gdtr_base = __vmread(GUEST_GDTR_BASE);
484 c->cs_sel = __vmread(GUEST_CS_SELECTOR);
485 c->cs_limit = __vmread(GUEST_CS_LIMIT);
486 c->cs_base = __vmread(GUEST_CS_BASE);
487 c->cs_arbytes = __vmread(GUEST_CS_AR_BYTES);
489 c->ds_sel = __vmread(GUEST_DS_SELECTOR);
490 c->ds_limit = __vmread(GUEST_DS_LIMIT);
491 c->ds_base = __vmread(GUEST_DS_BASE);
492 c->ds_arbytes = __vmread(GUEST_DS_AR_BYTES);
494 c->es_sel = __vmread(GUEST_ES_SELECTOR);
495 c->es_limit = __vmread(GUEST_ES_LIMIT);
496 c->es_base = __vmread(GUEST_ES_BASE);
497 c->es_arbytes = __vmread(GUEST_ES_AR_BYTES);
499 c->ss_sel = __vmread(GUEST_SS_SELECTOR);
500 c->ss_limit = __vmread(GUEST_SS_LIMIT);
501 c->ss_base = __vmread(GUEST_SS_BASE);
502 c->ss_arbytes = __vmread(GUEST_SS_AR_BYTES);
504 c->fs_sel = __vmread(GUEST_FS_SELECTOR);
505 c->fs_limit = __vmread(GUEST_FS_LIMIT);
506 c->fs_base = __vmread(GUEST_FS_BASE);
507 c->fs_arbytes = __vmread(GUEST_FS_AR_BYTES);
509 c->gs_sel = __vmread(GUEST_GS_SELECTOR);
510 c->gs_limit = __vmread(GUEST_GS_LIMIT);
511 c->gs_base = __vmread(GUEST_GS_BASE);
512 c->gs_arbytes = __vmread(GUEST_GS_AR_BYTES);
514 c->tr_sel = __vmread(GUEST_TR_SELECTOR);
515 c->tr_limit = __vmread(GUEST_TR_LIMIT);
516 c->tr_base = __vmread(GUEST_TR_BASE);
517 c->tr_arbytes = __vmread(GUEST_TR_AR_BYTES);
519 c->ldtr_sel = __vmread(GUEST_LDTR_SELECTOR);
520 c->ldtr_limit = __vmread(GUEST_LDTR_LIMIT);
521 c->ldtr_base = __vmread(GUEST_LDTR_BASE);
522 c->ldtr_arbytes = __vmread(GUEST_LDTR_AR_BYTES);
524 c->sysenter_cs = __vmread(GUEST_SYSENTER_CS);
525 c->sysenter_esp = __vmread(GUEST_SYSENTER_ESP);
526 c->sysenter_eip = __vmread(GUEST_SYSENTER_EIP);
528 /* Save any event/interrupt that was being injected when we last
529 * exited. IDT_VECTORING_INFO_FIELD has priority, as anything in
530 * VM_ENTRY_INTR_INFO_FIELD is either a fault caused by the first
531 * event, which will happen the next time, or an interrupt, which we
532 * never inject when IDT_VECTORING_INFO_FIELD is valid.*/
533 if ( (ev = __vmread(IDT_VECTORING_INFO_FIELD)) & INTR_INFO_VALID_MASK )
534 {
535 c->pending_event = ev;
536 c->error_code = __vmread(IDT_VECTORING_ERROR_CODE);
537 }
538 else if ( (ev = __vmread(VM_ENTRY_INTR_INFO_FIELD))
539 & INTR_INFO_VALID_MASK )
540 {
541 c->pending_event = ev;
542 c->error_code = __vmread(VM_ENTRY_EXCEPTION_ERROR_CODE);
543 }
544 else
545 {
546 c->pending_event = 0;
547 c->error_code = 0;
548 }
550 return 1;
551 }
553 int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
554 {
555 unsigned long mfn, old_base_mfn;
557 vmx_vmcs_enter(v);
559 __vmwrite(GUEST_RIP, c->rip);
560 __vmwrite(GUEST_RSP, c->rsp);
561 __vmwrite(GUEST_RFLAGS, c->rflags);
563 v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
564 __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
566 v->arch.hvm_vmx.cpu_cr2 = c->cr2;
568 #ifdef HVM_DEBUG_SUSPEND
569 printk("vmx_vmcs_restore: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
570 c->cr3,
571 c->cr0,
572 c->cr4);
573 #endif
575 if (!vmx_paging_enabled(v)) {
576 printk("vmx_vmcs_restore: paging not enabled.");
577 goto skip_cr3;
578 }
580 if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
581 /*
582 * This is simple TLB flush, implying the guest has
583 * removed some translation or changed page attributes.
584 * We simply invalidate the shadow.
585 */
586 mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
587 if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
588 goto bad_cr3;
589 }
590 } else {
591 /*
592 * If different, make a shadow. Check if the PDBR is valid
593 * first.
594 */
595 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
596 /* current!=vcpu as not called by arch_vmx_do_launch */
597 mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
598 if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
599 goto bad_cr3;
600 }
601 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
602 v->arch.guest_table = pagetable_from_pfn(mfn);
603 if (old_base_mfn)
604 put_page(mfn_to_page(old_base_mfn));
605 v->arch.hvm_vmx.cpu_cr3 = c->cr3;
606 }
608 skip_cr3:
609 #if defined(__x86_64__)
610 if (vmx_long_mode_enabled(v)) {
611 unsigned long vm_entry_value;
612 vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
613 vm_entry_value |= VM_ENTRY_IA32E_MODE;
614 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
615 }
616 #endif
618 __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
619 v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
620 __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
622 __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
623 __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
625 __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
626 __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
628 __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
629 __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
630 __vmwrite(GUEST_CS_BASE, c->cs_base);
631 __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes);
633 __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
634 __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
635 __vmwrite(GUEST_DS_BASE, c->ds_base);
636 __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes);
638 __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
639 __vmwrite(GUEST_ES_LIMIT, c->es_limit);
640 __vmwrite(GUEST_ES_BASE, c->es_base);
641 __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes);
643 __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
644 __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
645 __vmwrite(GUEST_SS_BASE, c->ss_base);
646 __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes);
648 __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
649 __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
650 __vmwrite(GUEST_FS_BASE, c->fs_base);
651 __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes);
653 __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
654 __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
655 __vmwrite(GUEST_GS_BASE, c->gs_base);
656 __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes);
658 __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
659 __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
660 __vmwrite(GUEST_TR_BASE, c->tr_base);
661 __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes);
663 __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
664 __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
665 __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
666 __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes);
668 __vmwrite(GUEST_SYSENTER_CS, c->sysenter_cs);
669 __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp);
670 __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip);
672 __vmwrite(GUEST_DR7, c->dr7);
674 vmx_vmcs_exit(v);
676 paging_update_paging_modes(v);
678 if ( c->pending_valid )
679 {
680 vmx_vmcs_enter(v);
681 gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
682 c->pending_event, c->error_code);
684 /* SVM uses type 3 ("Exception") for #OF and #BP; VMX uses type 6 */
685 if ( c->pending_type == 3
686 && (c->pending_vector == 3 || c->pending_vector == 4) )
687 c->pending_type = 6;
689 /* For software exceptions, we need to tell the hardware the
690 * instruction length as well (hmmm). */
691 if ( c->pending_type > 4 )
692 {
693 int addrbytes, ilen;
694 if ( (c->cs_arbytes & (1u<<13)) && (c->msr_efer & EFER_LMA) )
695 addrbytes = 8;
696 else if ( (c->cs_arbytes & (1u<<14)) )
697 addrbytes = 4;
698 else
699 addrbytes = 2;
700 ilen = hvm_instruction_length(c->rip, addrbytes);
701 __vmwrite(VM_ENTRY_INSTRUCTION_LEN, ilen);
702 }
704 /* Sanity check */
705 if ( c->pending_type == 1 || c->pending_type > 6
706 || c->pending_reserved != 0 )
707 {
708 gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32"\n",
709 c->pending_event);
710 return -EINVAL;
711 }
712 /* Re-inject the exception */
713 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, c->pending_event);
714 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, c->error_code);
715 v->arch.hvm_vmx.vector_injected = 1;
716 vmx_vmcs_exit(v);
717 }
719 return 0;
721 bad_cr3:
722 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"", c->cr3);
723 vmx_vmcs_exit(v);
724 return -EINVAL;
725 }
727 #if defined(__x86_64__) && defined(HVM_DEBUG_SUSPEND)
728 static void dump_msr_state(struct vmx_msr_state *m)
729 {
730 int i = 0;
731 printk("**** msr state ****\n");
732 printk("shadow_gs=0x%lx, flags=0x%lx, msr_items:", m->shadow_gs, m->flags);
733 for (i = 0; i < VMX_MSR_COUNT; i++)
734 printk("0x%lx,", m->msrs[i]);
735 printk("\n");
736 }
737 #else
738 #define dump_msr_state(m) ((void)0)
739 #endif
741 static void vmx_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
742 {
743 #ifdef __x86_64__
744 struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
745 unsigned long guest_flags = guest_state->flags;
747 data->shadow_gs = v->arch.hvm_vmx.shadow_gs;
748 data->msr_cstar = v->arch.hvm_vmx.cstar;
750 /* save msrs */
751 data->msr_flags = guest_flags;
752 data->msr_lstar = guest_state->msrs[VMX_INDEX_MSR_LSTAR];
753 data->msr_star = guest_state->msrs[VMX_INDEX_MSR_STAR];
754 data->msr_syscall_mask = guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
755 #endif
757 data->msr_efer = v->arch.hvm_vmx.efer;
759 data->tsc = hvm_get_guest_time(v);
761 dump_msr_state(guest_state);
762 }
764 static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
765 {
766 #ifdef __x86_64__
767 struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
769 /* restore msrs */
770 guest_state->flags = data->msr_flags;
771 guest_state->msrs[VMX_INDEX_MSR_LSTAR] = data->msr_lstar;
772 guest_state->msrs[VMX_INDEX_MSR_STAR] = data->msr_star;
773 guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK] = data->msr_syscall_mask;
775 v->arch.hvm_vmx.cstar = data->msr_cstar;
776 v->arch.hvm_vmx.shadow_gs = data->shadow_gs;
777 #endif
779 v->arch.hvm_vmx.efer = data->msr_efer;
781 v->arch.hvm_vmx.vmxassist_enabled = !(data->cr0 & X86_CR0_PE);
783 hvm_set_guest_time(v, data->tsc);
785 dump_msr_state(guest_state);
786 }
789 static void vmx_save_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
790 {
791 vmx_save_cpu_state(v, ctxt);
792 vmx_vmcs_enter(v);
793 vmx_vmcs_save(v, ctxt);
794 vmx_vmcs_exit(v);
795 }
797 static int vmx_load_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
798 {
799 vmx_load_cpu_state(v, ctxt);
800 if (vmx_vmcs_restore(v, ctxt)) {
801 printk("vmx_vmcs restore failed!\n");
802 domain_crash(v->domain);
803 return -EINVAL;
804 }
806 return 0;
807 }
809 /*
810 * DR7 is saved and restored on every vmexit. Other debug registers only
811 * need to be restored if their value is going to affect execution -- i.e.,
812 * if one of the breakpoints is enabled. So mask out all bits that don't
813 * enable some breakpoint functionality.
814 */
815 #define DR7_ACTIVE_MASK 0xff
817 static inline void vmx_restore_dr(struct vcpu *v)
818 {
819 /* NB. __vmread() is not usable here, so we cannot read from the VMCS. */
820 if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
821 __restore_debug_registers(v);
822 }
824 static void vmx_ctxt_switch_from(struct vcpu *v)
825 {
826 vmx_save_guest_msrs(v);
827 vmx_restore_host_msrs();
828 vmx_save_dr(v);
829 }
831 static void vmx_ctxt_switch_to(struct vcpu *v)
832 {
833 vmx_restore_guest_msrs(v);
834 vmx_restore_dr(v);
835 }
837 static void stop_vmx(void)
838 {
839 if ( !(read_cr4() & X86_CR4_VMXE) )
840 return;
842 __vmxoff();
843 clear_in_cr4(X86_CR4_VMXE);
844 }
846 static void vmx_store_cpu_guest_regs(
847 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
848 {
849 vmx_vmcs_enter(v);
851 if ( regs != NULL )
852 {
853 regs->eflags = __vmread(GUEST_RFLAGS);
854 regs->ss = __vmread(GUEST_SS_SELECTOR);
855 regs->cs = __vmread(GUEST_CS_SELECTOR);
856 regs->eip = __vmread(GUEST_RIP);
857 regs->esp = __vmread(GUEST_RSP);
858 }
860 if ( crs != NULL )
861 {
862 crs[0] = v->arch.hvm_vmx.cpu_shadow_cr0;
863 crs[2] = v->arch.hvm_vmx.cpu_cr2;
864 crs[3] = v->arch.hvm_vmx.cpu_cr3;
865 crs[4] = v->arch.hvm_vmx.cpu_shadow_cr4;
866 }
868 vmx_vmcs_exit(v);
869 }
871 static void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
872 {
873 unsigned long base;
875 vmx_vmcs_enter(v);
877 __vmwrite(GUEST_SS_SELECTOR, regs->ss);
878 __vmwrite(GUEST_RSP, regs->esp);
880 /* NB. Bit 1 of RFLAGS must be set for VMENTRY to succeed. */
881 __vmwrite(GUEST_RFLAGS, regs->eflags | 2UL);
883 if ( regs->eflags & EF_VM )
884 {
885 /*
886 * The VMX spec (section 4.3.1.2, Checks on Guest Segment
887 * Registers) says that virtual-8086 mode guests' segment
888 * base-address fields in the VMCS must be equal to their
889 * corresponding segment selector field shifted right by
890 * four bits upon vmentry.
891 */
892 base = __vmread(GUEST_CS_BASE);
893 if ( (regs->cs << 4) != base )
894 __vmwrite(GUEST_CS_BASE, regs->cs << 4);
895 base = __vmread(GUEST_SS_BASE);
896 if ( (regs->ss << 4) != base )
897 __vmwrite(GUEST_SS_BASE, regs->ss << 4);
898 }
900 __vmwrite(GUEST_CS_SELECTOR, regs->cs);
901 __vmwrite(GUEST_RIP, regs->eip);
903 vmx_vmcs_exit(v);
904 }
906 static unsigned long vmx_get_ctrl_reg(struct vcpu *v, unsigned int num)
907 {
908 switch ( num )
909 {
910 case 0:
911 return v->arch.hvm_vmx.cpu_cr0;
912 case 2:
913 return v->arch.hvm_vmx.cpu_cr2;
914 case 3:
915 return v->arch.hvm_vmx.cpu_cr3;
916 case 4:
917 return v->arch.hvm_vmx.cpu_shadow_cr4;
918 default:
919 BUG();
920 }
921 return 0; /* dummy */
922 }
924 static unsigned long vmx_get_segment_base(struct vcpu *v, enum x86_segment seg)
925 {
926 unsigned long base = 0;
927 int long_mode = 0;
929 ASSERT(v == current);
931 #ifdef __x86_64__
932 if ( vmx_long_mode_enabled(v) &&
933 (__vmread(GUEST_CS_AR_BYTES) & X86_SEG_AR_CS_LM_ACTIVE) )
934 long_mode = 1;
935 #endif
937 switch ( seg )
938 {
939 case x86_seg_cs: if ( !long_mode ) base = __vmread(GUEST_CS_BASE); break;
940 case x86_seg_ds: if ( !long_mode ) base = __vmread(GUEST_DS_BASE); break;
941 case x86_seg_es: if ( !long_mode ) base = __vmread(GUEST_ES_BASE); break;
942 case x86_seg_fs: base = __vmread(GUEST_FS_BASE); break;
943 case x86_seg_gs: base = __vmread(GUEST_GS_BASE); break;
944 case x86_seg_ss: if ( !long_mode ) base = __vmread(GUEST_SS_BASE); break;
945 case x86_seg_tr: base = __vmread(GUEST_TR_BASE); break;
946 case x86_seg_gdtr: base = __vmread(GUEST_GDTR_BASE); break;
947 case x86_seg_idtr: base = __vmread(GUEST_IDTR_BASE); break;
948 case x86_seg_ldtr: base = __vmread(GUEST_LDTR_BASE); break;
949 default: BUG(); break;
950 }
952 return base;
953 }
955 static void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg,
956 struct segment_register *reg)
957 {
958 u16 attr = 0;
960 ASSERT(v == current);
962 switch ( seg )
963 {
964 case x86_seg_cs:
965 reg->sel = __vmread(GUEST_CS_SELECTOR);
966 reg->limit = __vmread(GUEST_CS_LIMIT);
967 reg->base = __vmread(GUEST_CS_BASE);
968 attr = __vmread(GUEST_CS_AR_BYTES);
969 break;
970 case x86_seg_ds:
971 reg->sel = __vmread(GUEST_DS_SELECTOR);
972 reg->limit = __vmread(GUEST_DS_LIMIT);
973 reg->base = __vmread(GUEST_DS_BASE);
974 attr = __vmread(GUEST_DS_AR_BYTES);
975 break;
976 case x86_seg_es:
977 reg->sel = __vmread(GUEST_ES_SELECTOR);
978 reg->limit = __vmread(GUEST_ES_LIMIT);
979 reg->base = __vmread(GUEST_ES_BASE);
980 attr = __vmread(GUEST_ES_AR_BYTES);
981 break;
982 case x86_seg_fs:
983 reg->sel = __vmread(GUEST_FS_SELECTOR);
984 reg->limit = __vmread(GUEST_FS_LIMIT);
985 reg->base = __vmread(GUEST_FS_BASE);
986 attr = __vmread(GUEST_FS_AR_BYTES);
987 break;
988 case x86_seg_gs:
989 reg->sel = __vmread(GUEST_GS_SELECTOR);
990 reg->limit = __vmread(GUEST_GS_LIMIT);
991 reg->base = __vmread(GUEST_GS_BASE);
992 attr = __vmread(GUEST_GS_AR_BYTES);
993 break;
994 case x86_seg_ss:
995 reg->sel = __vmread(GUEST_SS_SELECTOR);
996 reg->limit = __vmread(GUEST_SS_LIMIT);
997 reg->base = __vmread(GUEST_SS_BASE);
998 attr = __vmread(GUEST_SS_AR_BYTES);
999 break;
1000 case x86_seg_tr:
1001 reg->sel = __vmread(GUEST_TR_SELECTOR);
1002 reg->limit = __vmread(GUEST_TR_LIMIT);
1003 reg->base = __vmread(GUEST_TR_BASE);
1004 attr = __vmread(GUEST_TR_AR_BYTES);
1005 break;
1006 case x86_seg_gdtr:
1007 reg->limit = __vmread(GUEST_GDTR_LIMIT);
1008 reg->base = __vmread(GUEST_GDTR_BASE);
1009 break;
1010 case x86_seg_idtr:
1011 reg->limit = __vmread(GUEST_IDTR_LIMIT);
1012 reg->base = __vmread(GUEST_IDTR_BASE);
1013 break;
1014 case x86_seg_ldtr:
1015 reg->sel = __vmread(GUEST_LDTR_SELECTOR);
1016 reg->limit = __vmread(GUEST_LDTR_LIMIT);
1017 reg->base = __vmread(GUEST_LDTR_BASE);
1018 attr = __vmread(GUEST_LDTR_AR_BYTES);
1019 break;
1020 default:
1021 BUG();
1024 reg->attr.bytes = (attr & 0xff) | ((attr >> 4) & 0xf00);
1027 /* Make sure that xen intercepts any FP accesses from current */
1028 static void vmx_stts(struct vcpu *v)
1030 /* VMX depends on operating on the current vcpu */
1031 ASSERT(v == current);
1033 /*
1034 * If the guest does not have TS enabled then we must cause and handle an
1035 * exception on first use of the FPU. If the guest *does* have TS enabled
1036 * then this is not necessary: no FPU activity can occur until the guest
1037 * clears CR0.TS, and we will initialise the FPU when that happens.
1038 */
1039 if ( !(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_TS) )
1041 v->arch.hvm_vmx.cpu_cr0 |= X86_CR0_TS;
1042 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
1043 __vm_set_bit(EXCEPTION_BITMAP, TRAP_no_device);
1047 static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
1049 vmx_vmcs_enter(v);
1050 __vmwrite(TSC_OFFSET, offset);
1051 #if defined (__i386__)
1052 __vmwrite(TSC_OFFSET_HIGH, offset >> 32);
1053 #endif
1054 vmx_vmcs_exit(v);
1057 static void vmx_init_ap_context(
1058 struct vcpu_guest_context *ctxt, int vcpuid, int trampoline_vector)
1060 memset(ctxt, 0, sizeof(*ctxt));
1061 ctxt->user_regs.eip = VMXASSIST_BASE;
1062 ctxt->user_regs.edx = vcpuid;
1063 ctxt->user_regs.ebx = trampoline_vector;
1066 void do_nmi(struct cpu_user_regs *);
1068 static void vmx_init_hypercall_page(struct domain *d, void *hypercall_page)
1070 char *p;
1071 int i;
1073 memset(hypercall_page, 0, PAGE_SIZE);
1075 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
1077 p = (char *)(hypercall_page + (i * 32));
1078 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
1079 *(u32 *)(p + 1) = i;
1080 *(u8 *)(p + 5) = 0x0f; /* vmcall */
1081 *(u8 *)(p + 6) = 0x01;
1082 *(u8 *)(p + 7) = 0xc1;
1083 *(u8 *)(p + 8) = 0xc3; /* ret */
1086 /* Don't support HYPERVISOR_iret at the moment */
1087 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
1090 static int vmx_guest_x86_mode(struct vcpu *v)
1092 unsigned int cs_ar_bytes;
1094 ASSERT(v == current);
1096 if ( unlikely(!(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_PE)) )
1097 return 0;
1098 if ( unlikely(__vmread(GUEST_RFLAGS) & X86_EFLAGS_VM) )
1099 return 1;
1100 cs_ar_bytes = __vmread(GUEST_CS_AR_BYTES);
1101 if ( vmx_long_mode_enabled(v) && likely(cs_ar_bytes &
1102 X86_SEG_AR_CS_LM_ACTIVE) )
1103 return 8;
1104 return (likely(cs_ar_bytes & X86_SEG_AR_DEF_OP_SIZE) ? 4 : 2);
1107 static int vmx_pae_enabled(struct vcpu *v)
1109 unsigned long cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
1110 return (vmx_paging_enabled(v) && (cr4 & X86_CR4_PAE));
1113 static int vmx_nx_enabled(struct vcpu *v)
1115 return v->arch.hvm_vmx.efer & EFER_NX;
1118 static int vmx_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
1120 unsigned long intr_shadow, eflags;
1122 ASSERT(v == current);
1124 intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
1125 intr_shadow &= VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS;
1127 if ( type == hvm_intack_nmi )
1128 return !intr_shadow;
1130 ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
1131 eflags = __vmread(GUEST_RFLAGS);
1132 return !irq_masked(eflags) && !intr_shadow;
1135 static void vmx_update_host_cr3(struct vcpu *v)
1137 ASSERT((v == current) || !vcpu_runnable(v));
1138 vmx_vmcs_enter(v);
1139 __vmwrite(HOST_CR3, v->arch.cr3);
1140 vmx_vmcs_exit(v);
1143 static void vmx_update_guest_cr3(struct vcpu *v)
1145 ASSERT((v == current) || !vcpu_runnable(v));
1146 vmx_vmcs_enter(v);
1147 __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
1148 vmx_vmcs_exit(v);
1151 static void vmx_flush_guest_tlbs(void)
1153 /* No tagged TLB support on VMX yet. The fact that we're in Xen
1154 * at all means any guest will have a clean TLB when it's next run,
1155 * because VMRESUME will flush it for us. */
1158 static void vmx_inject_exception(
1159 unsigned int trapnr, int errcode, unsigned long cr2)
1161 struct vcpu *v = current;
1162 vmx_inject_hw_exception(v, trapnr, errcode);
1163 if ( trapnr == TRAP_page_fault )
1164 v->arch.hvm_vmx.cpu_cr2 = cr2;
1167 static void vmx_update_vtpr(struct vcpu *v, unsigned long value)
1169 /* VMX doesn't have a V_TPR field */
1172 static int vmx_event_injection_faulted(struct vcpu *v)
1174 unsigned int idtv_info_field;
1176 ASSERT(v == current);
1178 idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
1179 return (idtv_info_field & INTR_INFO_VALID_MASK);
1182 static void disable_intercept_for_msr(u32 msr)
1184 /*
1185 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
1186 * have the write-low and read-high bitmap offsets the wrong way round.
1187 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
1188 */
1189 if ( msr <= 0x1fff )
1191 __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */
1192 __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */
1194 else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
1196 msr &= 0x1fff;
1197 __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */
1198 __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */
1202 static struct hvm_function_table vmx_function_table = {
1203 .name = "VMX",
1204 .disable = stop_vmx,
1205 .domain_initialise = vmx_domain_initialise,
1206 .domain_destroy = vmx_domain_destroy,
1207 .vcpu_initialise = vmx_vcpu_initialise,
1208 .vcpu_destroy = vmx_vcpu_destroy,
1209 .store_cpu_guest_regs = vmx_store_cpu_guest_regs,
1210 .load_cpu_guest_regs = vmx_load_cpu_guest_regs,
1211 .save_cpu_ctxt = vmx_save_vmcs_ctxt,
1212 .load_cpu_ctxt = vmx_load_vmcs_ctxt,
1213 .paging_enabled = vmx_paging_enabled,
1214 .long_mode_enabled = vmx_long_mode_enabled,
1215 .pae_enabled = vmx_pae_enabled,
1216 .nx_enabled = vmx_nx_enabled,
1217 .interrupts_enabled = vmx_interrupts_enabled,
1218 .guest_x86_mode = vmx_guest_x86_mode,
1219 .get_guest_ctrl_reg = vmx_get_ctrl_reg,
1220 .get_segment_base = vmx_get_segment_base,
1221 .get_segment_register = vmx_get_segment_register,
1222 .update_host_cr3 = vmx_update_host_cr3,
1223 .update_guest_cr3 = vmx_update_guest_cr3,
1224 .flush_guest_tlbs = vmx_flush_guest_tlbs,
1225 .update_vtpr = vmx_update_vtpr,
1226 .stts = vmx_stts,
1227 .set_tsc_offset = vmx_set_tsc_offset,
1228 .inject_exception = vmx_inject_exception,
1229 .init_ap_context = vmx_init_ap_context,
1230 .init_hypercall_page = vmx_init_hypercall_page,
1231 .event_injection_faulted = vmx_event_injection_faulted
1232 };
1234 int start_vmx(void)
1236 u32 eax, edx;
1237 struct vmcs_struct *vmcs;
1239 /*
1240 * Xen does not fill x86_capability words except 0.
1241 */
1242 boot_cpu_data.x86_capability[4] = cpuid_ecx(1);
1244 if ( !test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability) )
1245 return 0;
1247 rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
1249 if ( eax & IA32_FEATURE_CONTROL_MSR_LOCK )
1251 if ( (eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0 )
1253 printk("VMX disabled by Feature Control MSR.\n");
1254 return 0;
1257 else
1259 wrmsr(IA32_FEATURE_CONTROL_MSR,
1260 IA32_FEATURE_CONTROL_MSR_LOCK |
1261 IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
1264 set_in_cr4(X86_CR4_VMXE);
1266 vmx_init_vmcs_config();
1268 if ( smp_processor_id() == 0 )
1269 setup_vmcs_dump();
1271 if ( (vmcs = vmx_alloc_host_vmcs()) == NULL )
1273 clear_in_cr4(X86_CR4_VMXE);
1274 printk("Failed to allocate host VMCS\n");
1275 return 0;
1278 if ( __vmxon(virt_to_maddr(vmcs)) )
1280 clear_in_cr4(X86_CR4_VMXE);
1281 printk("VMXON failed\n");
1282 vmx_free_host_vmcs(vmcs);
1283 return 0;
1286 vmx_save_host_msrs();
1288 if ( smp_processor_id() != 0 )
1289 return 1;
1291 hvm_enable(&vmx_function_table);
1293 if ( cpu_has_vmx_msr_bitmap )
1295 printk("VMX: MSR intercept bitmap enabled\n");
1296 vmx_msr_bitmap = alloc_xenheap_page();
1297 BUG_ON(vmx_msr_bitmap == NULL);
1298 memset(vmx_msr_bitmap, ~0, PAGE_SIZE);
1299 disable_intercept_for_msr(MSR_FS_BASE);
1300 disable_intercept_for_msr(MSR_GS_BASE);
1303 return 1;
1306 /*
1307 * Not all cases receive valid value in the VM-exit instruction length field.
1308 * Callers must know what they're doing!
1309 */
1310 static int __get_instruction_length(void)
1312 int len;
1313 len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe: callers audited */
1314 BUG_ON((len < 1) || (len > 15));
1315 return len;
1318 static void inline __update_guest_eip(unsigned long inst_len)
1320 unsigned long current_eip, intr_shadow;
1322 current_eip = __vmread(GUEST_RIP);
1323 __vmwrite(GUEST_RIP, current_eip + inst_len);
1325 intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
1326 if ( intr_shadow & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
1328 intr_shadow &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
1329 __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
1333 static void vmx_do_no_device_fault(void)
1335 struct vcpu *v = current;
1337 setup_fpu(current);
1338 __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
1340 /* Disable TS in guest CR0 unless the guest wants the exception too. */
1341 if ( !(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_TS) )
1343 v->arch.hvm_vmx.cpu_cr0 &= ~X86_CR0_TS;
1344 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
1348 #define bitmaskof(idx) (1U << ((idx) & 31))
1349 static void vmx_do_cpuid(struct cpu_user_regs *regs)
1351 unsigned int input = (unsigned int)regs->eax;
1352 unsigned int count = (unsigned int)regs->ecx;
1353 unsigned int eax, ebx, ecx, edx;
1355 if ( input == 0x00000004 )
1357 cpuid_count(input, count, &eax, &ebx, &ecx, &edx);
1358 eax &= NUM_CORES_RESET_MASK;
1360 else if ( input == 0x40000003 )
1362 /*
1363 * NB. Unsupported interface for private use of VMXASSIST only.
1364 * Note that this leaf lives at <max-hypervisor-leaf> + 1.
1365 */
1366 u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx;
1367 unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1368 struct vcpu *v = current;
1369 char *p;
1371 gdprintk(XENLOG_INFO, "Input address is 0x%"PRIx64".\n", value);
1373 /* 8-byte aligned valid pseudophys address from vmxassist, please. */
1374 if ( (value & 7) || (mfn == INVALID_MFN) ||
1375 !v->arch.hvm_vmx.vmxassist_enabled )
1377 domain_crash(v->domain);
1378 return;
1381 p = map_domain_page(mfn);
1382 value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1))));
1383 unmap_domain_page(p);
1385 gdprintk(XENLOG_INFO, "Output value is 0x%"PRIx64".\n", value);
1386 ecx = (u32)value;
1387 edx = (u32)(value >> 32);
1388 } else {
1389 hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
1391 if ( input == 0x00000001 )
1393 /* Mask off reserved bits. */
1394 ecx &= ~VMX_VCPU_CPUID_L1_ECX_RESERVED;
1396 ebx &= NUM_THREADS_RESET_MASK;
1398 /* Unsupportable for virtualised CPUs. */
1399 ecx &= ~(bitmaskof(X86_FEATURE_VMXE) |
1400 bitmaskof(X86_FEATURE_EST) |
1401 bitmaskof(X86_FEATURE_TM2) |
1402 bitmaskof(X86_FEATURE_CID));
1404 edx &= ~(bitmaskof(X86_FEATURE_HT) |
1405 bitmaskof(X86_FEATURE_ACPI) |
1406 bitmaskof(X86_FEATURE_ACC));
1409 if ( input == 0x00000006 || input == 0x00000009 || input == 0x0000000A )
1410 eax = ebx = ecx = edx = 0x0;
1413 regs->eax = (unsigned long)eax;
1414 regs->ebx = (unsigned long)ebx;
1415 regs->ecx = (unsigned long)ecx;
1416 regs->edx = (unsigned long)edx;
1418 HVMTRACE_3D(CPUID, current, input,
1419 ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
1422 #define CASE_GET_REG_P(REG, reg) \
1423 case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
1425 #ifdef __i386__
1426 #define CASE_EXTEND_GET_REG_P
1427 #else
1428 #define CASE_EXTEND_GET_REG_P \
1429 CASE_GET_REG_P(R8, r8); \
1430 CASE_GET_REG_P(R9, r9); \
1431 CASE_GET_REG_P(R10, r10); \
1432 CASE_GET_REG_P(R11, r11); \
1433 CASE_GET_REG_P(R12, r12); \
1434 CASE_GET_REG_P(R13, r13); \
1435 CASE_GET_REG_P(R14, r14); \
1436 CASE_GET_REG_P(R15, r15)
1437 #endif
1439 static void vmx_dr_access(unsigned long exit_qualification,
1440 struct cpu_user_regs *regs)
1442 struct vcpu *v = current;
1444 HVMTRACE_0D(DR_WRITE, v);
1446 v->arch.hvm_vcpu.flag_dr_dirty = 1;
1448 /* We could probably be smarter about this */
1449 __restore_debug_registers(v);
1451 /* Allow guest direct access to DR registers */
1452 v->arch.hvm_vcpu.u.vmx.exec_control &= ~CPU_BASED_MOV_DR_EXITING;
1453 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
1454 v->arch.hvm_vcpu.u.vmx.exec_control);
1457 /*
1458 * Invalidate the TLB for va. Invalidate the shadow page corresponding
1459 * the address va.
1460 */
1461 static void vmx_do_invlpg(unsigned long va)
1463 unsigned long eip;
1464 struct vcpu *v = current;
1466 HVMTRACE_2D(INVLPG, v, /*invlpga=*/ 0, va);
1468 eip = __vmread(GUEST_RIP);
1470 HVM_DBG_LOG(DBG_LEVEL_VMMU, "eip=%lx, va=%lx",
1471 eip, va);
1473 /*
1474 * We do the safest things first, then try to update the shadow
1475 * copying from guest
1476 */
1477 paging_invlpg(v, va);
1480 /*
1481 * get segment for string pio according to guest instruction
1482 */
1483 static void vmx_str_pio_get_segment(int long_mode, unsigned long eip,
1484 int inst_len, enum x86_segment *seg)
1486 unsigned char inst[MAX_INST_LEN];
1487 int i;
1488 extern int inst_copy_from_guest(unsigned char *, unsigned long, int);
1490 if ( !long_mode )
1491 eip += __vmread(GUEST_CS_BASE);
1493 memset(inst, 0, MAX_INST_LEN);
1494 if ( inst_copy_from_guest(inst, eip, inst_len) != inst_len )
1496 gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
1497 domain_crash(current->domain);
1498 return;
1501 for ( i = 0; i < inst_len; i++ )
1503 switch ( inst[i] )
1505 case 0xf3: /* REPZ */
1506 case 0xf2: /* REPNZ */
1507 case 0xf0: /* LOCK */
1508 case 0x66: /* data32 */
1509 case 0x67: /* addr32 */
1510 #ifdef __x86_64__
1511 case 0x40 ... 0x4f: /* REX */
1512 #endif
1513 continue;
1514 case 0x2e: /* CS */
1515 *seg = x86_seg_cs;
1516 continue;
1517 case 0x36: /* SS */
1518 *seg = x86_seg_ss;
1519 continue;
1520 case 0x26: /* ES */
1521 *seg = x86_seg_es;
1522 continue;
1523 case 0x64: /* FS */
1524 *seg = x86_seg_fs;
1525 continue;
1526 case 0x65: /* GS */
1527 *seg = x86_seg_gs;
1528 continue;
1529 case 0x3e: /* DS */
1530 *seg = x86_seg_ds;
1531 continue;
1536 static int vmx_str_pio_check_descriptor(int long_mode, unsigned long eip,
1537 int inst_len, enum x86_segment seg,
1538 unsigned long *base, u32 *limit,
1539 u32 *ar_bytes)
1541 enum vmcs_field ar_field, base_field, limit_field;
1543 *base = 0;
1544 *limit = 0;
1545 if ( seg != x86_seg_es )
1546 vmx_str_pio_get_segment(long_mode, eip, inst_len, &seg);
1548 switch ( seg )
1550 case x86_seg_cs:
1551 ar_field = GUEST_CS_AR_BYTES;
1552 base_field = GUEST_CS_BASE;
1553 limit_field = GUEST_CS_LIMIT;
1554 break;
1555 case x86_seg_ds:
1556 ar_field = GUEST_DS_AR_BYTES;
1557 base_field = GUEST_DS_BASE;
1558 limit_field = GUEST_DS_LIMIT;
1559 break;
1560 case x86_seg_es:
1561 ar_field = GUEST_ES_AR_BYTES;
1562 base_field = GUEST_ES_BASE;
1563 limit_field = GUEST_ES_LIMIT;
1564 break;
1565 case x86_seg_fs:
1566 ar_field = GUEST_FS_AR_BYTES;
1567 base_field = GUEST_FS_BASE;
1568 limit_field = GUEST_FS_LIMIT;
1569 break;
1570 case x86_seg_gs:
1571 ar_field = GUEST_GS_AR_BYTES;
1572 base_field = GUEST_GS_BASE;
1573 limit_field = GUEST_GS_LIMIT;
1574 break;
1575 case x86_seg_ss:
1576 ar_field = GUEST_SS_AR_BYTES;
1577 base_field = GUEST_SS_BASE;
1578 limit_field = GUEST_SS_LIMIT;
1579 break;
1580 default:
1581 BUG();
1582 return 0;
1585 if ( !long_mode || seg == x86_seg_fs || seg == x86_seg_gs )
1587 *base = __vmread(base_field);
1588 *limit = __vmread(limit_field);
1590 *ar_bytes = __vmread(ar_field);
1592 return !(*ar_bytes & 0x10000);
1596 static inline void vmx_str_pio_check_limit(u32 limit, unsigned int size,
1597 u32 ar_bytes, unsigned long addr,
1598 unsigned long base, int df,
1599 unsigned long *count)
1601 unsigned long ea = addr - base;
1603 /* Offset must be within limits. */
1604 ASSERT(ea == (u32)ea);
1605 if ( (u32)(ea + size - 1) < (u32)ea ||
1606 (ar_bytes & 0xc) != 0x4 ? ea + size - 1 > limit
1607 : ea <= limit )
1609 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1610 return;
1613 /* Check the limit for repeated instructions, as above we checked
1614 only the first instance. Truncate the count if a limit violation
1615 would occur. Note that the checking is not necessary for page
1616 granular segments as transfers crossing page boundaries will be
1617 broken up anyway. */
1618 if ( !(ar_bytes & X86_SEG_AR_GRANULARITY) && *count > 1 )
1620 if ( (ar_bytes & 0xc) != 0x4 )
1622 /* expand-up */
1623 if ( !df )
1625 if ( ea + *count * size - 1 < ea ||
1626 ea + *count * size - 1 > limit )
1627 *count = (limit + 1UL - ea) / size;
1629 else
1631 if ( *count - 1 > ea / size )
1632 *count = ea / size + 1;
1635 else
1637 /* expand-down */
1638 if ( !df )
1640 if ( *count - 1 > -(s32)ea / size )
1641 *count = -(s32)ea / size + 1UL;
1643 else
1645 if ( ea < (*count - 1) * size ||
1646 ea - (*count - 1) * size <= limit )
1647 *count = (ea - limit - 1) / size + 1;
1650 ASSERT(*count);
1654 #ifdef __x86_64__
1655 static inline void vmx_str_pio_lm_check_limit(struct cpu_user_regs *regs,
1656 unsigned int size,
1657 unsigned long addr,
1658 unsigned long *count)
1660 if ( !is_canonical_address(addr) ||
1661 !is_canonical_address(addr + size - 1) )
1663 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1664 return;
1666 if ( *count > (1UL << 48) / size )
1667 *count = (1UL << 48) / size;
1668 if ( !(regs->eflags & EF_DF) )
1670 if ( addr + *count * size - 1 < addr ||
1671 !is_canonical_address(addr + *count * size - 1) )
1672 *count = (addr & ~((1UL << 48) - 1)) / size;
1674 else
1676 if ( (*count - 1) * size > addr ||
1677 !is_canonical_address(addr + (*count - 1) * size) )
1678 *count = (addr & ~((1UL << 48) - 1)) / size + 1;
1680 ASSERT(*count);
1682 #endif
1684 static inline void vmx_send_str_pio(struct cpu_user_regs *regs,
1685 struct hvm_io_op *pio_opp,
1686 unsigned long inst_len, unsigned int port,
1687 int sign, unsigned int size, int dir,
1688 int df, unsigned long addr,
1689 unsigned long paddr, unsigned long count)
1691 /*
1692 * Handle string pio instructions that cross pages or that
1693 * are unaligned. See the comments in hvm_domain.c/handle_mmio()
1694 */
1695 if ( (addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK) ) {
1696 unsigned long value = 0;
1698 pio_opp->flags |= OVERLAP;
1700 if ( dir == IOREQ_WRITE ) /* OUTS */
1702 if ( hvm_paging_enabled(current) )
1704 int rv = hvm_copy_from_guest_virt(&value, addr, size);
1705 if ( rv != 0 )
1707 /* Failed on the page-spanning copy. Inject PF into
1708 * the guest for the address where we failed. */
1709 addr += size - rv;
1710 gdprintk(XENLOG_DEBUG, "Pagefault reading non-io side "
1711 "of a page-spanning PIO: va=%#lx\n", addr);
1712 vmx_inject_exception(TRAP_page_fault, 0, addr);
1713 return;
1716 else
1717 (void) hvm_copy_from_guest_phys(&value, addr, size);
1718 } else /* dir != IOREQ_WRITE */
1719 /* Remember where to write the result, as a *VA*.
1720 * Must be a VA so we can handle the page overlap
1721 * correctly in hvm_pio_assist() */
1722 pio_opp->addr = addr;
1724 if ( count == 1 )
1725 regs->eip += inst_len;
1727 send_pio_req(port, 1, size, value, dir, df, 0);
1728 } else {
1729 unsigned long last_addr = sign > 0 ? addr + count * size - 1
1730 : addr - (count - 1) * size;
1732 if ( (addr & PAGE_MASK) != (last_addr & PAGE_MASK) )
1734 if ( sign > 0 )
1735 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1736 else
1737 count = (addr & ~PAGE_MASK) / size + 1;
1738 } else
1739 regs->eip += inst_len;
1741 send_pio_req(port, count, size, paddr, dir, df, 1);
1745 static void vmx_str_pio_handler(unsigned long exit_qualification,
1746 unsigned long inst_len,
1747 struct cpu_user_regs *regs,
1748 struct hvm_io_op *pio_opp)
1750 unsigned int port, size;
1751 int dir, df, vm86;
1752 unsigned long addr, count = 1, base;
1753 paddr_t paddr;
1754 unsigned long gfn;
1755 u32 ar_bytes, limit;
1756 int sign;
1757 int long_mode = 0;
1759 vm86 = regs->eflags & X86_EFLAGS_VM ? 1 : 0;
1760 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1762 if ( test_bit(6, &exit_qualification) )
1763 port = (exit_qualification >> 16) & 0xFFFF;
1764 else
1765 port = regs->edx & 0xffff;
1767 size = (exit_qualification & 7) + 1;
1768 dir = test_bit(3, &exit_qualification); /* direction */
1770 if ( dir == IOREQ_READ )
1771 HVMTRACE_2D(IO_READ, current, port, size);
1772 else
1773 HVMTRACE_2D(IO_WRITE, current, port, size);
1775 sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
1776 ar_bytes = __vmread(GUEST_CS_AR_BYTES);
1777 #ifdef __x86_64__
1778 if ( vmx_long_mode_enabled(current) &&
1779 (ar_bytes & X86_SEG_AR_CS_LM_ACTIVE) )
1780 long_mode = 1;
1781 #endif
1782 addr = __vmread(GUEST_LINEAR_ADDRESS);
1784 if ( test_bit(5, &exit_qualification) ) { /* "rep" prefix */
1785 pio_opp->flags |= REPZ;
1786 count = regs->ecx;
1787 if ( !long_mode &&
1788 (vm86 || !(ar_bytes & X86_SEG_AR_DEF_OP_SIZE)) )
1789 count &= 0xFFFF;
1792 /*
1793 * In protected mode, guest linear address is invalid if the
1794 * selector is null.
1795 */
1796 if ( !vmx_str_pio_check_descriptor(long_mode, regs->eip, inst_len,
1797 dir==IOREQ_WRITE ? x86_seg_ds :
1798 x86_seg_es, &base, &limit,
1799 &ar_bytes) ) {
1800 if ( !long_mode ) {
1801 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1802 return;
1804 addr = dir == IOREQ_WRITE ? base + regs->esi : regs->edi;
1807 if ( !long_mode )
1809 /* Segment must be readable for outs and writeable for ins. */
1810 if ( dir == IOREQ_WRITE ? (ar_bytes & 0xa) == 0x8
1811 : (ar_bytes & 0xa) != 0x2 ) {
1812 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1813 return;
1816 vmx_str_pio_check_limit(limit, size, ar_bytes, addr, base, df, &count);
1818 #ifdef __x86_64__
1819 else
1821 vmx_str_pio_lm_check_limit(regs, size, addr, &count);
1823 #endif
1825 /* Translate the address to a physical address */
1826 gfn = paging_gva_to_gfn(current, addr);
1827 if ( gfn == INVALID_GFN )
1829 /* The guest does not have the RAM address mapped.
1830 * Need to send in a page fault */
1831 int errcode = 0;
1832 /* IO read --> memory write */
1833 if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
1834 vmx_inject_exception(TRAP_page_fault, errcode, addr);
1835 return;
1837 paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
1839 vmx_send_str_pio(regs, pio_opp, inst_len, port, sign,
1840 size, dir, df, addr, paddr, count);
1843 static void vmx_io_instruction(unsigned long exit_qualification,
1844 unsigned long inst_len)
1846 struct cpu_user_regs *regs;
1847 struct hvm_io_op *pio_opp;
1849 pio_opp = &current->arch.hvm_vcpu.io_op;
1850 pio_opp->instr = INSTR_PIO;
1851 pio_opp->flags = 0;
1853 regs = &pio_opp->io_context;
1855 /* Copy current guest state into io instruction state structure. */
1856 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1857 hvm_store_cpu_guest_regs(current, regs, NULL);
1859 HVM_DBG_LOG(DBG_LEVEL_IO, "vm86 %d, eip=%x:%lx, "
1860 "exit_qualification = %lx",
1861 regs->eflags & X86_EFLAGS_VM ? 1 : 0,
1862 regs->cs, (unsigned long)regs->eip, exit_qualification);
1864 if ( test_bit(4, &exit_qualification) ) /* string instrucation */
1865 vmx_str_pio_handler(exit_qualification, inst_len, regs, pio_opp);
1866 else
1868 unsigned int port, size;
1869 int dir, df;
1871 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1873 if ( test_bit(6, &exit_qualification) )
1874 port = (exit_qualification >> 16) & 0xFFFF;
1875 else
1876 port = regs->edx & 0xffff;
1878 size = (exit_qualification & 7) + 1;
1879 dir = test_bit(3, &exit_qualification); /* direction */
1881 if ( dir == IOREQ_READ )
1882 HVMTRACE_2D(IO_READ, current, port, size);
1883 else
1884 HVMTRACE_2D(IO_WRITE, current, port, size);
1886 if ( port == 0xe9 && dir == IOREQ_WRITE && size == 1 )
1887 hvm_print_line(current, regs->eax); /* guest debug output */
1889 regs->eip += inst_len;
1890 send_pio_req(port, 1, size, regs->eax, dir, df, 0);
1894 static void vmx_world_save(struct vcpu *v, struct vmx_assist_context *c)
1896 /* NB. Skip transition instruction. */
1897 c->eip = __vmread(GUEST_RIP);
1898 c->eip += __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */
1900 c->esp = __vmread(GUEST_RSP);
1901 c->eflags = __vmread(GUEST_RFLAGS);
1903 c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
1904 c->cr3 = v->arch.hvm_vmx.cpu_cr3;
1905 c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
1907 c->idtr_limit = __vmread(GUEST_IDTR_LIMIT);
1908 c->idtr_base = __vmread(GUEST_IDTR_BASE);
1910 c->gdtr_limit = __vmread(GUEST_GDTR_LIMIT);
1911 c->gdtr_base = __vmread(GUEST_GDTR_BASE);
1913 c->cs_sel = __vmread(GUEST_CS_SELECTOR);
1914 c->cs_limit = __vmread(GUEST_CS_LIMIT);
1915 c->cs_base = __vmread(GUEST_CS_BASE);
1916 c->cs_arbytes.bytes = __vmread(GUEST_CS_AR_BYTES);
1918 c->ds_sel = __vmread(GUEST_DS_SELECTOR);
1919 c->ds_limit = __vmread(GUEST_DS_LIMIT);
1920 c->ds_base = __vmread(GUEST_DS_BASE);
1921 c->ds_arbytes.bytes = __vmread(GUEST_DS_AR_BYTES);
1923 c->es_sel = __vmread(GUEST_ES_SELECTOR);
1924 c->es_limit = __vmread(GUEST_ES_LIMIT);
1925 c->es_base = __vmread(GUEST_ES_BASE);
1926 c->es_arbytes.bytes = __vmread(GUEST_ES_AR_BYTES);
1928 c->ss_sel = __vmread(GUEST_SS_SELECTOR);
1929 c->ss_limit = __vmread(GUEST_SS_LIMIT);
1930 c->ss_base = __vmread(GUEST_SS_BASE);
1931 c->ss_arbytes.bytes = __vmread(GUEST_SS_AR_BYTES);
1933 c->fs_sel = __vmread(GUEST_FS_SELECTOR);
1934 c->fs_limit = __vmread(GUEST_FS_LIMIT);
1935 c->fs_base = __vmread(GUEST_FS_BASE);
1936 c->fs_arbytes.bytes = __vmread(GUEST_FS_AR_BYTES);
1938 c->gs_sel = __vmread(GUEST_GS_SELECTOR);
1939 c->gs_limit = __vmread(GUEST_GS_LIMIT);
1940 c->gs_base = __vmread(GUEST_GS_BASE);
1941 c->gs_arbytes.bytes = __vmread(GUEST_GS_AR_BYTES);
1943 c->tr_sel = __vmread(GUEST_TR_SELECTOR);
1944 c->tr_limit = __vmread(GUEST_TR_LIMIT);
1945 c->tr_base = __vmread(GUEST_TR_BASE);
1946 c->tr_arbytes.bytes = __vmread(GUEST_TR_AR_BYTES);
1948 c->ldtr_sel = __vmread(GUEST_LDTR_SELECTOR);
1949 c->ldtr_limit = __vmread(GUEST_LDTR_LIMIT);
1950 c->ldtr_base = __vmread(GUEST_LDTR_BASE);
1951 c->ldtr_arbytes.bytes = __vmread(GUEST_LDTR_AR_BYTES);
1954 static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
1956 unsigned long mfn, old_base_mfn;
1958 __vmwrite(GUEST_RIP, c->eip);
1959 __vmwrite(GUEST_RSP, c->esp);
1960 __vmwrite(GUEST_RFLAGS, c->eflags);
1962 v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
1963 __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
1965 if ( !vmx_paging_enabled(v) )
1966 goto skip_cr3;
1968 if ( c->cr3 == v->arch.hvm_vmx.cpu_cr3 )
1970 /*
1971 * This is simple TLB flush, implying the guest has
1972 * removed some translation or changed page attributes.
1973 * We simply invalidate the shadow.
1974 */
1975 mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
1976 if ( mfn != pagetable_get_pfn(v->arch.guest_table) )
1977 goto bad_cr3;
1979 else
1981 /*
1982 * If different, make a shadow. Check if the PDBR is valid
1983 * first.
1984 */
1985 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3);
1986 mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
1987 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
1988 goto bad_cr3;
1989 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1990 v->arch.guest_table = pagetable_from_pfn(mfn);
1991 if (old_base_mfn)
1992 put_page(mfn_to_page(old_base_mfn));
1993 v->arch.hvm_vmx.cpu_cr3 = c->cr3;
1996 skip_cr3:
1997 if ( !vmx_paging_enabled(v) )
1998 HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
1999 else
2000 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
2002 __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
2003 v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
2004 __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
2006 __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
2007 __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
2009 __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
2010 __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
2012 __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
2013 __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
2014 __vmwrite(GUEST_CS_BASE, c->cs_base);
2015 __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes.bytes);
2017 __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
2018 __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
2019 __vmwrite(GUEST_DS_BASE, c->ds_base);
2020 __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes.bytes);
2022 __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
2023 __vmwrite(GUEST_ES_LIMIT, c->es_limit);
2024 __vmwrite(GUEST_ES_BASE, c->es_base);
2025 __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes.bytes);
2027 __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
2028 __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
2029 __vmwrite(GUEST_SS_BASE, c->ss_base);
2030 __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes.bytes);
2032 __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
2033 __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
2034 __vmwrite(GUEST_FS_BASE, c->fs_base);
2035 __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes.bytes);
2037 __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
2038 __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
2039 __vmwrite(GUEST_GS_BASE, c->gs_base);
2040 __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes.bytes);
2042 __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
2043 __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
2044 __vmwrite(GUEST_TR_BASE, c->tr_base);
2045 __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes.bytes);
2047 __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
2048 __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
2049 __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
2050 __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes);
2052 paging_update_paging_modes(v);
2053 return 0;
2055 bad_cr3:
2056 gdprintk(XENLOG_ERR, "Invalid CR3 value=%x", c->cr3);
2057 return -EINVAL;
2060 enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE };
2062 static int vmx_assist(struct vcpu *v, int mode)
2064 struct vmx_assist_context c;
2065 struct hvm_hw_vpic *vpic = v->domain->arch.hvm_domain.vpic;
2066 u32 magic, cp;
2068 /* make sure vmxassist exists (this is not an error) */
2069 if (hvm_copy_from_guest_phys(&magic, VMXASSIST_MAGIC_OFFSET,
2070 sizeof(magic)))
2071 return 0;
2072 if (magic != VMXASSIST_MAGIC)
2073 return 0;
2075 switch (mode) {
2076 /*
2077 * Transfer control to vmxassist.
2078 * Store the current context in VMXASSIST_OLD_CONTEXT and load
2079 * the new VMXASSIST_NEW_CONTEXT context. This context was created
2080 * by vmxassist and will transfer control to it.
2081 */
2082 case VMX_ASSIST_INVOKE:
2083 /* save the old context */
2084 if (hvm_copy_from_guest_phys(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp)))
2085 goto error;
2086 if (cp != 0) {
2087 vmx_world_save(v, &c);
2088 if (hvm_copy_to_guest_phys(cp, &c, sizeof(c)))
2089 goto error;
2092 /* restore the new context, this should activate vmxassist */
2093 if (hvm_copy_from_guest_phys(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp)))
2094 goto error;
2095 if (cp != 0) {
2096 if (hvm_copy_from_guest_phys(&c, cp, sizeof(c)))
2097 goto error;
2098 if ( vmx_world_restore(v, &c) != 0 )
2099 goto error;
2100 v->arch.hvm_vmx.pm_irqbase[0] = vpic[0].irq_base;
2101 v->arch.hvm_vmx.pm_irqbase[1] = vpic[1].irq_base;
2102 vpic[0].irq_base = NR_EXCEPTION_HANDLER;
2103 vpic[1].irq_base = NR_EXCEPTION_HANDLER + 8;
2104 v->arch.hvm_vmx.vmxassist_enabled = 1;
2105 return 1;
2107 break;
2109 /*
2110 * Restore the VMXASSIST_OLD_CONTEXT that was saved by
2111 * VMX_ASSIST_INVOKE above.
2112 */
2113 case VMX_ASSIST_RESTORE:
2114 /* save the old context */
2115 if (hvm_copy_from_guest_phys(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp)))
2116 goto error;
2117 if (cp != 0) {
2118 if (hvm_copy_from_guest_phys(&c, cp, sizeof(c)))
2119 goto error;
2120 if ( vmx_world_restore(v, &c) != 0 )
2121 goto error;
2122 if ( v->arch.hvm_vmx.irqbase_mode ) {
2123 vpic[0].irq_base = c.rm_irqbase[0] & 0xf8;
2124 vpic[1].irq_base = c.rm_irqbase[1] & 0xf8;
2125 } else {
2126 vpic[0].irq_base = v->arch.hvm_vmx.pm_irqbase[0];
2127 vpic[1].irq_base = v->arch.hvm_vmx.pm_irqbase[1];
2129 v->arch.hvm_vmx.vmxassist_enabled = 0;
2130 return 1;
2132 break;
2135 error:
2136 gdprintk(XENLOG_ERR, "Failed to transfer to vmxassist\n");
2137 domain_crash(v->domain);
2138 return 0;
2141 static int vmx_set_cr0(unsigned long value)
2143 struct vcpu *v = current;
2144 unsigned long mfn;
2145 unsigned long eip;
2146 int paging_enabled;
2147 unsigned long vm_entry_value;
2148 unsigned long old_cr0;
2149 unsigned long old_base_mfn;
2151 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
2153 /* ET is reserved and should be always be 1. */
2154 value |= X86_CR0_ET;
2156 if ( (value & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG )
2158 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2159 return 0;
2162 /* TS cleared? Then initialise FPU now. */
2163 if ( !(value & X86_CR0_TS) )
2165 setup_fpu(v);
2166 __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
2169 old_cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
2170 paging_enabled = old_cr0 & X86_CR0_PG;
2172 v->arch.hvm_vmx.cpu_cr0 = (value | X86_CR0_PE | X86_CR0_PG
2173 | X86_CR0_NE | X86_CR0_WP);
2174 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
2176 v->arch.hvm_vmx.cpu_shadow_cr0 = value;
2177 __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
2179 if ( (value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled )
2181 /*
2182 * Trying to enable guest paging.
2183 * The guest CR3 must be pointing to the guest physical.
2184 */
2185 mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT);
2186 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
2188 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
2189 v->arch.hvm_vmx.cpu_cr3, mfn);
2190 domain_crash(v->domain);
2191 return 0;
2194 #if defined(__x86_64__)
2195 if ( vmx_lme_is_set(v) )
2197 if ( !(v->arch.hvm_vmx.cpu_shadow_cr4 & X86_CR4_PAE) )
2199 HVM_DBG_LOG(DBG_LEVEL_1, "Guest enabled paging "
2200 "with EFER.LME set but not CR4.PAE");
2201 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2203 else
2205 HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode");
2206 v->arch.hvm_vmx.efer |= EFER_LMA;
2207 vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
2208 vm_entry_value |= VM_ENTRY_IA32E_MODE;
2209 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
2212 #endif
2214 /*
2215 * Now arch.guest_table points to machine physical.
2216 */
2217 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
2218 v->arch.guest_table = pagetable_from_pfn(mfn);
2219 if (old_base_mfn)
2220 put_page(mfn_to_page(old_base_mfn));
2221 paging_update_paging_modes(v);
2223 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
2224 (unsigned long) (mfn << PAGE_SHIFT));
2226 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
2227 v->arch.hvm_vmx.cpu_cr3, mfn);
2230 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
2231 if ( v->arch.hvm_vmx.cpu_cr3 ) {
2232 put_page(mfn_to_page(get_mfn_from_gpfn(
2233 v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)));
2234 v->arch.guest_table = pagetable_null();
2237 /*
2238 * VMX does not implement real-mode virtualization. We emulate
2239 * real-mode by performing a world switch to VMXAssist whenever
2240 * a partition disables the CR0.PE bit.
2241 */
2242 if ( (value & X86_CR0_PE) == 0 )
2244 if ( value & X86_CR0_PG ) {
2245 /* inject GP here */
2246 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2247 return 0;
2248 } else {
2249 /*
2250 * Disable paging here.
2251 * Same to PE == 1 && PG == 0
2252 */
2253 if ( vmx_long_mode_enabled(v) )
2255 v->arch.hvm_vmx.efer &= ~EFER_LMA;
2256 vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
2257 vm_entry_value &= ~VM_ENTRY_IA32E_MODE;
2258 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
2262 if ( vmx_assist(v, VMX_ASSIST_INVOKE) )
2264 eip = __vmread(GUEST_RIP);
2265 HVM_DBG_LOG(DBG_LEVEL_1,
2266 "Transfering control to vmxassist %%eip 0x%lx", eip);
2267 return 0; /* do not update eip! */
2270 else if ( v->arch.hvm_vmx.vmxassist_enabled )
2272 eip = __vmread(GUEST_RIP);
2273 HVM_DBG_LOG(DBG_LEVEL_1,
2274 "Enabling CR0.PE at %%eip 0x%lx", eip);
2275 if ( vmx_assist(v, VMX_ASSIST_RESTORE) )
2277 eip = __vmread(GUEST_RIP);
2278 HVM_DBG_LOG(DBG_LEVEL_1,
2279 "Restoring to %%eip 0x%lx", eip);
2280 return 0; /* do not update eip! */
2283 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
2285 if ( vmx_long_mode_enabled(v) )
2287 v->arch.hvm_vmx.efer &= ~EFER_LMA;
2288 vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
2289 vm_entry_value &= ~VM_ENTRY_IA32E_MODE;
2290 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
2292 paging_update_paging_modes(v);
2295 return 1;
2298 #define CASE_SET_REG(REG, reg) \
2299 case REG_ ## REG: regs->reg = value; break
2300 #define CASE_GET_REG(REG, reg) \
2301 case REG_ ## REG: value = regs->reg; break
2303 #define CASE_EXTEND_SET_REG \
2304 CASE_EXTEND_REG(S)
2305 #define CASE_EXTEND_GET_REG \
2306 CASE_EXTEND_REG(G)
2308 #ifdef __i386__
2309 #define CASE_EXTEND_REG(T)
2310 #else
2311 #define CASE_EXTEND_REG(T) \
2312 CASE_ ## T ## ET_REG(R8, r8); \
2313 CASE_ ## T ## ET_REG(R9, r9); \
2314 CASE_ ## T ## ET_REG(R10, r10); \
2315 CASE_ ## T ## ET_REG(R11, r11); \
2316 CASE_ ## T ## ET_REG(R12, r12); \
2317 CASE_ ## T ## ET_REG(R13, r13); \
2318 CASE_ ## T ## ET_REG(R14, r14); \
2319 CASE_ ## T ## ET_REG(R15, r15)
2320 #endif
2322 /*
2323 * Write to control registers
2324 */
2325 static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
2327 unsigned long value, old_cr, old_base_mfn, mfn;
2328 struct vcpu *v = current;
2329 struct vlapic *vlapic = vcpu_vlapic(v);
2331 switch ( gp )
2333 CASE_GET_REG(EAX, eax);
2334 CASE_GET_REG(ECX, ecx);
2335 CASE_GET_REG(EDX, edx);
2336 CASE_GET_REG(EBX, ebx);
2337 CASE_GET_REG(EBP, ebp);
2338 CASE_GET_REG(ESI, esi);
2339 CASE_GET_REG(EDI, edi);
2340 CASE_EXTEND_GET_REG;
2341 case REG_ESP:
2342 value = __vmread(GUEST_RSP);
2343 break;
2344 default:
2345 gdprintk(XENLOG_ERR, "invalid gp: %d\n", gp);
2346 goto exit_and_crash;
2349 HVMTRACE_2D(CR_WRITE, v, cr, value);
2351 HVM_DBG_LOG(DBG_LEVEL_1, "CR%d, value = %lx", cr, value);
2353 switch ( cr )
2355 case 0:
2356 return vmx_set_cr0(value);
2358 case 3:
2359 /*
2360 * If paging is not enabled yet, simply copy the value to CR3.
2361 */
2362 if (!vmx_paging_enabled(v)) {
2363 v->arch.hvm_vmx.cpu_cr3 = value;
2364 break;
2367 /*
2368 * We make a new one if the shadow does not exist.
2369 */
2370 if (value == v->arch.hvm_vmx.cpu_cr3) {
2371 /*
2372 * This is simple TLB flush, implying the guest has
2373 * removed some translation or changed page attributes.
2374 * We simply invalidate the shadow.
2375 */
2376 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
2377 if (mfn != pagetable_get_pfn(v->arch.guest_table))
2378 goto bad_cr3;
2379 paging_update_cr3(v);
2380 } else {
2381 /*
2382 * If different, make a shadow. Check if the PDBR is valid
2383 * first.
2384 */
2385 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
2386 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
2387 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
2388 goto bad_cr3;
2389 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
2390 v->arch.guest_table = pagetable_from_pfn(mfn);
2391 if (old_base_mfn)
2392 put_page(mfn_to_page(old_base_mfn));
2393 v->arch.hvm_vmx.cpu_cr3 = value;
2394 update_cr3(v);
2395 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
2397 break;
2399 case 4: /* CR4 */
2400 old_cr = v->arch.hvm_vmx.cpu_shadow_cr4;
2402 if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) )
2404 if ( vmx_pgbit_test(v) )
2406 /* The guest is a 32-bit PAE guest. */
2407 #if CONFIG_PAGING_LEVELS >= 3
2408 unsigned long mfn, old_base_mfn;
2409 mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT);
2410 if ( !mfn_valid(mfn) ||
2411 !get_page(mfn_to_page(mfn), v->domain) )
2412 goto bad_cr3;
2414 /*
2415 * Now arch.guest_table points to machine physical.
2416 */
2418 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
2419 v->arch.guest_table = pagetable_from_pfn(mfn);
2420 if ( old_base_mfn )
2421 put_page(mfn_to_page(old_base_mfn));
2423 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
2424 (unsigned long) (mfn << PAGE_SHIFT));
2426 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2427 "Update CR3 value = %lx, mfn = %lx",
2428 v->arch.hvm_vmx.cpu_cr3, mfn);
2429 #endif
2432 else if ( !(value & X86_CR4_PAE) )
2434 if ( unlikely(vmx_long_mode_enabled(v)) )
2436 HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while "
2437 "EFER.LMA is set");
2438 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2442 __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
2443 v->arch.hvm_vmx.cpu_shadow_cr4 = value;
2444 __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
2446 /*
2447 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
2448 * all TLB entries except global entries.
2449 */
2450 if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
2451 paging_update_paging_modes(v);
2452 break;
2454 case 8:
2455 vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
2456 break;
2458 default:
2459 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
2460 domain_crash(v->domain);
2461 return 0;
2464 return 1;
2466 bad_cr3:
2467 gdprintk(XENLOG_ERR, "Invalid CR3\n");
2468 exit_and_crash:
2469 domain_crash(v->domain);
2470 return 0;
2473 /*
2474 * Read from control registers. CR0 and CR4 are read from the shadow.
2475 */
2476 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
2478 unsigned long value = 0;
2479 struct vcpu *v = current;
2480 struct vlapic *vlapic = vcpu_vlapic(v);
2482 switch ( cr )
2484 case 3:
2485 value = (unsigned long)v->arch.hvm_vmx.cpu_cr3;
2486 break;
2487 case 8:
2488 value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
2489 value = (value & 0xF0) >> 4;
2490 break;
2491 default:
2492 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
2493 domain_crash(v->domain);
2494 break;
2497 switch ( gp ) {
2498 CASE_SET_REG(EAX, eax);
2499 CASE_SET_REG(ECX, ecx);
2500 CASE_SET_REG(EDX, edx);
2501 CASE_SET_REG(EBX, ebx);
2502 CASE_SET_REG(EBP, ebp);
2503 CASE_SET_REG(ESI, esi);
2504 CASE_SET_REG(EDI, edi);
2505 CASE_EXTEND_SET_REG;
2506 case REG_ESP:
2507 __vmwrite(GUEST_RSP, value);
2508 regs->esp = value;
2509 break;
2510 default:
2511 printk("invalid gp: %d\n", gp);
2512 domain_crash(v->domain);
2513 break;
2516 HVMTRACE_2D(CR_READ, v, cr, value);
2518 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%d, value = %lx", cr, value);
2521 static int vmx_cr_access(unsigned long exit_qualification,
2522 struct cpu_user_regs *regs)
2524 unsigned int gp, cr;
2525 unsigned long value;
2526 struct vcpu *v = current;
2528 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
2529 case TYPE_MOV_TO_CR:
2530 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
2531 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
2532 return mov_to_cr(gp, cr, regs);
2533 case TYPE_MOV_FROM_CR:
2534 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
2535 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
2536 mov_from_cr(cr, gp, regs);
2537 break;
2538 case TYPE_CLTS:
2539 /* We initialise the FPU now, to avoid needing another vmexit. */
2540 setup_fpu(v);
2541 __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
2543 v->arch.hvm_vmx.cpu_cr0 &= ~X86_CR0_TS; /* clear TS */
2544 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
2546 v->arch.hvm_vmx.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
2547 __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
2548 break;
2549 case TYPE_LMSW:
2550 value = v->arch.hvm_vmx.cpu_shadow_cr0;
2551 value = (value & ~0xF) |
2552 (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
2553 return vmx_set_cr0(value);
2554 default:
2555 BUG();
2558 return 1;
2561 static inline int vmx_do_msr_read(struct cpu_user_regs *regs)
2563 u64 msr_content = 0;
2564 u32 ecx = regs->ecx, eax, edx;
2565 struct vcpu *v = current;
2567 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
2569 switch ( ecx )
2571 case MSR_IA32_TIME_STAMP_COUNTER:
2572 msr_content = hvm_get_guest_time(v);
2573 break;
2574 case MSR_IA32_SYSENTER_CS:
2575 msr_content = (u32)__vmread(GUEST_SYSENTER_CS);
2576 break;
2577 case MSR_IA32_SYSENTER_ESP:
2578 msr_content = __vmread(GUEST_SYSENTER_ESP);
2579 break;
2580 case MSR_IA32_SYSENTER_EIP:
2581 msr_content = __vmread(GUEST_SYSENTER_EIP);
2582 break;
2583 case MSR_IA32_APICBASE:
2584 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
2585 break;
2586 case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
2587 goto gp_fault;
2588 default:
2589 if ( long_mode_do_msr_read(regs) )
2590 goto done;
2592 if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
2593 rdmsr_safe(ecx, eax, edx) == 0 )
2595 regs->eax = eax;
2596 regs->edx = edx;
2597 goto done;
2600 goto gp_fault;
2603 regs->eax = msr_content & 0xFFFFFFFF;
2604 regs->edx = msr_content >> 32;
2606 done:
2607 HVMTRACE_2D(MSR_READ, v, ecx, msr_content);
2608 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
2609 ecx, (unsigned long)regs->eax,
2610 (unsigned long)regs->edx);
2611 return 1;
2613 gp_fault:
2614 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2615 return 0;
2618 static int vmx_alloc_vlapic_mapping(struct domain *d)
2620 void *apic_va;
2622 if ( !cpu_has_vmx_virtualize_apic_accesses )
2623 return 0;
2625 apic_va = alloc_xenheap_page();
2626 if ( apic_va == NULL )
2627 return -ENOMEM;
2628 share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
2629 guest_physmap_add_page(
2630 d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), virt_to_mfn(apic_va));
2631 d->arch.hvm_domain.vmx_apic_access_mfn = virt_to_mfn(apic_va);
2633 return 0;
2636 static void vmx_free_vlapic_mapping(struct domain *d)
2638 unsigned long mfn = d->arch.hvm_domain.vmx_apic_access_mfn;
2639 if ( mfn != 0 )
2640 free_xenheap_page(mfn_to_virt(mfn));
2643 static void vmx_install_vlapic_mapping(struct vcpu *v)
2645 paddr_t virt_page_ma, apic_page_ma;
2647 if ( !cpu_has_vmx_virtualize_apic_accesses )
2648 return;
2650 virt_page_ma = page_to_maddr(vcpu_vlapic(v)->regs_page);
2651 apic_page_ma = v->domain->arch.hvm_domain.vmx_apic_access_mfn;
2652 apic_page_ma <<= PAGE_SHIFT;
2654 vmx_vmcs_enter(v);
2655 __vmwrite(VIRTUAL_APIC_PAGE_ADDR, virt_page_ma);
2656 __vmwrite(APIC_ACCESS_ADDR, apic_page_ma);
2657 #if defined (CONFIG_X86_PAE)
2658 __vmwrite(VIRTUAL_APIC_PAGE_ADDR_HIGH, virt_page_ma >> 32);
2659 __vmwrite(APIC_ACCESS_ADDR_HIGH, apic_page_ma >> 32);
2660 #endif
2661 vmx_vmcs_exit(v);
2664 void vmx_vlapic_msr_changed(struct vcpu *v)
2666 struct vlapic *vlapic = vcpu_vlapic(v);
2667 uint32_t ctl;
2669 if ( !cpu_has_vmx_virtualize_apic_accesses )
2670 return;
2672 vmx_vmcs_enter(v);
2673 ctl = __vmread(SECONDARY_VM_EXEC_CONTROL);
2674 ctl &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
2675 if ( !vlapic_hw_disabled(vlapic) &&
2676 (vlapic_base_address(vlapic) == APIC_DEFAULT_PHYS_BASE) )
2677 ctl |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
2678 __vmwrite(SECONDARY_VM_EXEC_CONTROL, ctl);
2679 vmx_vmcs_exit(v);
2682 static inline int vmx_do_msr_write(struct cpu_user_regs *regs)
2684 u32 ecx = regs->ecx;
2685 u64 msr_content;
2686 struct vcpu *v = current;
2688 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
2689 ecx, (u32)regs->eax, (u32)regs->edx);
2691 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
2692 HVMTRACE_2D(MSR_WRITE, v, ecx, msr_content);
2694 switch ( ecx )
2696 case MSR_IA32_TIME_STAMP_COUNTER:
2697 hvm_set_guest_time(v, msr_content);
2698 pt_reset(v);
2699 break;
2700 case MSR_IA32_SYSENTER_CS:
2701 __vmwrite(GUEST_SYSENTER_CS, msr_content);
2702 break;
2703 case MSR_IA32_SYSENTER_ESP:
2704 __vmwrite(GUEST_SYSENTER_ESP, msr_content);
2705 break;
2706 case MSR_IA32_SYSENTER_EIP:
2707 __vmwrite(GUEST_SYSENTER_EIP, msr_content);
2708 break;
2709 case MSR_IA32_APICBASE:
2710 vlapic_msr_set(vcpu_vlapic(v), msr_content);
2711 break;
2712 case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
2713 goto gp_fault;
2714 default:
2715 if ( !long_mode_do_msr_write(regs) )
2716 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
2717 break;
2720 return 1;
2722 gp_fault:
2723 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2724 return 0;
2727 static void vmx_do_hlt(void)
2729 unsigned long rflags;
2730 HVMTRACE_0D(HLT, current);
2731 rflags = __vmread(GUEST_RFLAGS);
2732 hvm_hlt(rflags);
2735 static inline void vmx_do_extint(struct cpu_user_regs *regs)
2737 unsigned int vector;
2739 asmlinkage void do_IRQ(struct cpu_user_regs *);
2740 fastcall void smp_apic_timer_interrupt(struct cpu_user_regs *);
2741 fastcall void smp_event_check_interrupt(void);
2742 fastcall void smp_invalidate_interrupt(void);
2743 fastcall void smp_call_function_interrupt(void);
2744 fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
2745 fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
2746 #ifdef CONFIG_X86_MCE_P4THERMAL
2747 fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
2748 #endif
2750 vector = __vmread(VM_EXIT_INTR_INFO);
2751 BUG_ON(!(vector & INTR_INFO_VALID_MASK));
2753 vector &= INTR_INFO_VECTOR_MASK;
2754 HVMTRACE_1D(INTR, current, vector);
2756 switch(vector) {
2757 case LOCAL_TIMER_VECTOR:
2758 smp_apic_timer_interrupt(regs);
2759 break;
2760 case EVENT_CHECK_VECTOR:
2761 smp_event_check_interrupt();
2762 break;
2763 case INVALIDATE_TLB_VECTOR:
2764 smp_invalidate_interrupt();
2765 break;
2766 case CALL_FUNCTION_VECTOR:
2767 smp_call_function_interrupt();
2768 break;
2769 case SPURIOUS_APIC_VECTOR:
2770 smp_spurious_interrupt(regs);
2771 break;
2772 case ERROR_APIC_VECTOR:
2773 smp_error_interrupt(regs);
2774 break;
2775 #ifdef CONFIG_X86_MCE_P4THERMAL
2776 case THERMAL_APIC_VECTOR:
2777 smp_thermal_interrupt(regs);
2778 break;
2779 #endif
2780 default:
2781 regs->entry_vector = vector;
2782 do_IRQ(regs);
2783 break;
2787 static void vmx_reflect_exception(struct vcpu *v)
2789 int error_code, intr_info, vector;
2791 intr_info = __vmread(VM_EXIT_INTR_INFO);
2792 vector = intr_info & 0xff;
2793 if ( intr_info & INTR_INFO_DELIVER_CODE_MASK )
2794 error_code = __vmread(VM_EXIT_INTR_ERROR_CODE);
2795 else
2796 error_code = VMX_DELIVER_NO_ERROR_CODE;
2798 #ifndef NDEBUG
2800 unsigned long rip;
2802 rip = __vmread(GUEST_RIP);
2803 HVM_DBG_LOG(DBG_LEVEL_1, "rip = %lx, error_code = %x",
2804 rip, error_code);
2806 #endif /* NDEBUG */
2808 /*
2809 * According to Intel Virtualization Technology Specification for
2810 * the IA-32 Intel Architecture (C97063-002 April 2005), section
2811 * 2.8.3, SW_EXCEPTION should be used for #BP and #OV, and
2812 * HW_EXCEPTION used for everything else. The main difference
2813 * appears to be that for SW_EXCEPTION, the EIP/RIP is incremented
2814 * by VM_ENTER_INSTRUCTION_LEN bytes, whereas for HW_EXCEPTION,
2815 * it is not.
2816 */
2817 if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_SW_EXCEPTION )
2819 int ilen = __get_instruction_length(); /* Safe: software exception */
2820 vmx_inject_sw_exception(v, vector, ilen);
2822 else
2824 vmx_inject_hw_exception(v, vector, error_code);
2828 static void vmx_failed_vmentry(unsigned int exit_reason)
2830 unsigned int failed_vmentry_reason = (uint16_t)exit_reason;
2831 unsigned long exit_qualification;
2833 exit_qualification = __vmread(EXIT_QUALIFICATION);
2834 printk("Failed vm entry (exit reason 0x%x) ", exit_reason);
2835 switch ( failed_vmentry_reason )
2837 case EXIT_REASON_INVALID_GUEST_STATE:
2838 printk("caused by invalid guest state (%ld).\n", exit_qualification);
2839 break;
2840 case EXIT_REASON_MSR_LOADING:
2841 printk("caused by MSR entry %ld loading.\n", exit_qualification);
2842 break;
2843 case EXIT_REASON_MACHINE_CHECK:
2844 printk("caused by machine check.\n");
2845 break;
2846 default:
2847 printk("reason not known yet!");
2848 break;
2851 printk("************* VMCS Area **************\n");
2852 vmcs_dump_vcpu();
2853 printk("**************************************\n");
2855 domain_crash(current->domain);
2858 asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
2860 unsigned int exit_reason;
2861 unsigned long exit_qualification, inst_len = 0;
2862 struct vcpu *v = current;
2864 exit_reason = __vmread(VM_EXIT_REASON);
2866 HVMTRACE_2D(VMEXIT, v, __vmread(GUEST_RIP), exit_reason);
2868 perfc_incra(vmexits, exit_reason);
2870 if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT )
2871 local_irq_enable();
2873 if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
2874 return vmx_failed_vmentry(exit_reason);
2876 switch ( exit_reason )
2878 case EXIT_REASON_EXCEPTION_NMI:
2880 /*
2881 * We don't set the software-interrupt exiting (INT n).
2882 * (1) We can get an exception (e.g. #PG) in the guest, or
2883 * (2) NMI
2884 */
2885 unsigned int intr_info, vector;
2887 intr_info = __vmread(VM_EXIT_INTR_INFO);
2888 BUG_ON(!(intr_info & INTR_INFO_VALID_MASK));
2890 vector = intr_info & INTR_INFO_VECTOR_MASK;
2892 perfc_incra(cause_vector, vector);
2894 switch ( vector )
2896 case TRAP_debug:
2897 case TRAP_int3:
2898 if ( !v->domain->debugger_attached )
2899 goto exit_and_crash;
2900 domain_pause_for_debugger();
2901 break;
2902 case TRAP_no_device:
2903 vmx_do_no_device_fault();
2904 break;
2905 case TRAP_page_fault:
2906 exit_qualification = __vmread(EXIT_QUALIFICATION);
2907 regs->error_code = __vmread(VM_EXIT_INTR_ERROR_CODE);
2909 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2910 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2911 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2912 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2913 (unsigned long)regs->esi, (unsigned long)regs->edi);
2915 if ( paging_fault(exit_qualification, regs) )
2917 HVMTRACE_2D(PF_XEN, v, exit_qualification, regs->error_code);
2918 break;
2921 v->arch.hvm_vmx.cpu_cr2 = exit_qualification;
2922 vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
2923 break;
2924 case TRAP_nmi:
2925 HVMTRACE_0D(NMI, v);
2926 if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI )
2927 do_nmi(regs); /* Real NMI, vector 2: normal processing. */
2928 else
2929 vmx_reflect_exception(v);
2930 break;
2931 default:
2932 goto exit_and_crash;
2934 break;
2936 case EXIT_REASON_EXTERNAL_INTERRUPT:
2937 vmx_do_extint(regs);
2938 break;
2939 case EXIT_REASON_TRIPLE_FAULT:
2940 hvm_triple_fault();
2941 break;
2942 case EXIT_REASON_PENDING_VIRT_INTR:
2943 /* Disable the interrupt window. */
2944 v->arch.hvm_vcpu.u.vmx.exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
2945 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
2946 v->arch.hvm_vcpu.u.vmx.exec_control);
2947 break;
2948 case EXIT_REASON_PENDING_VIRT_NMI:
2949 /* Disable the NMI window. */
2950 v->arch.hvm_vcpu.u.vmx.exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
2951 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
2952 v->arch.hvm_vcpu.u.vmx.exec_control);
2953 break;
2954 case EXIT_REASON_TASK_SWITCH:
2955 goto exit_and_crash;
2956 case EXIT_REASON_CPUID:
2957 inst_len = __get_instruction_length(); /* Safe: CPUID */
2958 __update_guest_eip(inst_len);
2959 vmx_do_cpuid(regs);
2960 break;
2961 case EXIT_REASON_HLT:
2962 inst_len = __get_instruction_length(); /* Safe: HLT */
2963 __update_guest_eip(inst_len);
2964 vmx_do_hlt();
2965 break;
2966 case EXIT_REASON_INVLPG:
2968 inst_len = __get_instruction_length(); /* Safe: INVLPG */
2969 __update_guest_eip(inst_len);
2970 exit_qualification = __vmread(EXIT_QUALIFICATION);
2971 vmx_do_invlpg(exit_qualification);
2972 break;
2974 case EXIT_REASON_VMCALL:
2976 int rc;
2977 HVMTRACE_1D(VMMCALL, v, regs->eax);
2978 inst_len = __get_instruction_length(); /* Safe: VMCALL */
2979 rc = hvm_do_hypercall(regs);
2980 if ( rc != HVM_HCALL_preempted )
2982 __update_guest_eip(inst_len);
2983 if ( rc == HVM_HCALL_invalidate )
2984 send_invalidate_req();
2986 break;
2988 case EXIT_REASON_CR_ACCESS:
2990 exit_qualification = __vmread(EXIT_QUALIFICATION);
2991 inst_len = __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */
2992 if ( vmx_cr_access(exit_qualification, regs) )
2993 __update_guest_eip(inst_len);
2994 break;
2996 case EXIT_REASON_DR_ACCESS:
2997 exit_qualification = __vmread(EXIT_QUALIFICATION);
2998 vmx_dr_access(exit_qualification, regs);
2999 break;
3000 case EXIT_REASON_IO_INSTRUCTION:
3001 exit_qualification = __vmread(EXIT_QUALIFICATION);
3002 inst_len = __get_instruction_length(); /* Safe: IN, INS, OUT, OUTS */
3003 vmx_io_instruction(exit_qualification, inst_len);
3004 break;
3005 case EXIT_REASON_MSR_READ:
3006 inst_len = __get_instruction_length(); /* Safe: RDMSR */
3007 if ( vmx_do_msr_read(regs) )
3008 __update_guest_eip(inst_len);
3009 break;
3010 case EXIT_REASON_MSR_WRITE:
3011 inst_len = __get_instruction_length(); /* Safe: WRMSR */
3012 if ( vmx_do_msr_write(regs) )
3013 __update_guest_eip(inst_len);
3014 break;
3015 case EXIT_REASON_MWAIT_INSTRUCTION:
3016 case EXIT_REASON_MONITOR_INSTRUCTION:
3017 case EXIT_REASON_PAUSE_INSTRUCTION:
3018 goto exit_and_crash;
3019 case EXIT_REASON_VMCLEAR:
3020 case EXIT_REASON_VMLAUNCH:
3021 case EXIT_REASON_VMPTRLD:
3022 case EXIT_REASON_VMPTRST:
3023 case EXIT_REASON_VMREAD:
3024 case EXIT_REASON_VMRESUME:
3025 case EXIT_REASON_VMWRITE:
3026 case EXIT_REASON_VMXOFF:
3027 case EXIT_REASON_VMXON:
3028 /* Report invalid opcode exception when a VMX guest tries to execute
3029 any of the VMX instructions */
3030 vmx_inject_hw_exception(v, TRAP_invalid_op, VMX_DELIVER_NO_ERROR_CODE);
3031 break;
3033 case EXIT_REASON_TPR_BELOW_THRESHOLD:
3034 break;
3036 case EXIT_REASON_APIC_ACCESS:
3038 unsigned long offset;
3039 exit_qualification = __vmread(EXIT_QUALIFICATION);
3040 offset = exit_qualification & 0x0fffUL;
3041 handle_mmio(APIC_DEFAULT_PHYS_BASE | offset);
3042 break;
3045 default:
3046 exit_and_crash:
3047 gdprintk(XENLOG_ERR, "Bad vmexit (reason %x)\n", exit_reason);
3048 domain_crash(v->domain);
3049 break;
3053 asmlinkage void vmx_trace_vmentry(void)
3055 struct vcpu *v = current;
3056 HVMTRACE_0D(VMENTRY, v);
3059 /*
3060 * Local variables:
3061 * mode: C
3062 * c-set-style: "BSD"
3063 * c-basic-offset: 4
3064 * tab-width: 4
3065 * indent-tabs-mode: nil
3066 * End:
3067 */