ia64/xen-unstable

view xen/arch/x86/hvm/vmx/vmx.c @ 15708:52e5c110aadb

[HVM] Yet another MCA/MCE MSR.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Fri Aug 03 12:10:35 2007 +0100 (2007-08-03)
parents 0636f262ecd8
children 0f541efbb6d6
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/domain_page.h>
28 #include <xen/hypercall.h>
29 #include <xen/perfc.h>
30 #include <asm/current.h>
31 #include <asm/io.h>
32 #include <asm/regs.h>
33 #include <asm/cpufeature.h>
34 #include <asm/processor.h>
35 #include <asm/types.h>
36 #include <asm/msr.h>
37 #include <asm/spinlock.h>
38 #include <asm/paging.h>
39 #include <asm/p2m.h>
40 #include <asm/hvm/hvm.h>
41 #include <asm/hvm/support.h>
42 #include <asm/hvm/vmx/vmx.h>
43 #include <asm/hvm/vmx/vmcs.h>
44 #include <asm/hvm/vmx/cpu.h>
45 #include <public/sched.h>
46 #include <public/hvm/ioreq.h>
47 #include <asm/hvm/vpic.h>
48 #include <asm/hvm/vlapic.h>
49 #include <asm/x86_emulate.h>
50 #include <asm/hvm/vpt.h>
51 #include <public/hvm/save.h>
52 #include <asm/hvm/trace.h>
54 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
56 char *vmx_msr_bitmap;
58 static void vmx_ctxt_switch_from(struct vcpu *v);
59 static void vmx_ctxt_switch_to(struct vcpu *v);
61 static int vmx_alloc_vlapic_mapping(struct domain *d);
62 static void vmx_free_vlapic_mapping(struct domain *d);
63 static void vmx_install_vlapic_mapping(struct vcpu *v);
65 static int vmx_domain_initialise(struct domain *d)
66 {
67 return vmx_alloc_vlapic_mapping(d);
68 }
70 static void vmx_domain_destroy(struct domain *d)
71 {
72 vmx_free_vlapic_mapping(d);
73 }
75 static int vmx_vcpu_initialise(struct vcpu *v)
76 {
77 int rc;
79 spin_lock_init(&v->arch.hvm_vmx.vmcs_lock);
81 v->arch.schedule_tail = vmx_do_resume;
82 v->arch.ctxt_switch_from = vmx_ctxt_switch_from;
83 v->arch.ctxt_switch_to = vmx_ctxt_switch_to;
85 if ( (rc = vmx_create_vmcs(v)) != 0 )
86 {
87 dprintk(XENLOG_WARNING,
88 "Failed to create VMCS for vcpu %d: err=%d.\n",
89 v->vcpu_id, rc);
90 return rc;
91 }
93 vmx_install_vlapic_mapping(v);
95 return 0;
96 }
98 static void vmx_vcpu_destroy(struct vcpu *v)
99 {
100 vmx_destroy_vmcs(v);
101 }
103 static int vmx_paging_enabled(struct vcpu *v)
104 {
105 unsigned long cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
106 return (cr0 & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG);
107 }
109 static int vmx_pgbit_test(struct vcpu *v)
110 {
111 unsigned long cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
112 return cr0 & X86_CR0_PG;
113 }
115 static int vmx_pae_enabled(struct vcpu *v)
116 {
117 unsigned long cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
118 return vmx_paging_enabled(v) && (cr4 & X86_CR4_PAE);
119 }
121 static int vmx_nx_enabled(struct vcpu *v)
122 {
123 return v->arch.hvm_vmx.efer & EFER_NX;
124 }
126 #ifdef __x86_64__
128 static int vmx_lme_is_set(struct vcpu *v)
129 {
130 return v->arch.hvm_vmx.efer & EFER_LME;
131 }
133 static int vmx_long_mode_enabled(struct vcpu *v)
134 {
135 return v->arch.hvm_vmx.efer & EFER_LMA;
136 }
138 static void vmx_enable_long_mode(struct vcpu *v)
139 {
140 unsigned long vm_entry_value;
142 vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
143 vm_entry_value |= VM_ENTRY_IA32E_MODE;
144 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
146 v->arch.hvm_vmx.efer |= EFER_LMA;
147 }
149 static void vmx_disable_long_mode(struct vcpu *v)
150 {
151 unsigned long vm_entry_value;
153 vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
154 vm_entry_value &= ~VM_ENTRY_IA32E_MODE;
155 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
157 v->arch.hvm_vmx.efer &= ~EFER_LMA;
158 }
160 static DEFINE_PER_CPU(struct vmx_msr_state, host_msr_state);
162 static u32 msr_index[VMX_MSR_COUNT] =
163 {
164 MSR_LSTAR, MSR_STAR, MSR_SYSCALL_MASK
165 };
167 static void vmx_save_host_msrs(void)
168 {
169 struct vmx_msr_state *host_msr_state = &this_cpu(host_msr_state);
170 int i;
172 for ( i = 0; i < VMX_MSR_COUNT; i++ )
173 rdmsrl(msr_index[i], host_msr_state->msrs[i]);
174 }
176 #define WRITE_MSR(address) \
177 guest_msr_state->msrs[VMX_INDEX_MSR_ ## address] = msr_content; \
178 set_bit(VMX_INDEX_MSR_ ## address, &guest_msr_state->flags); \
179 wrmsrl(MSR_ ## address, msr_content); \
180 set_bit(VMX_INDEX_MSR_ ## address, &host_msr_state->flags); \
181 break
183 static enum handler_return long_mode_do_msr_read(struct cpu_user_regs *regs)
184 {
185 u64 msr_content = 0;
186 u32 ecx = regs->ecx;
187 struct vcpu *v = current;
188 struct vmx_msr_state *guest_msr_state = &v->arch.hvm_vmx.msr_state;
190 switch ( ecx )
191 {
192 case MSR_EFER:
193 msr_content = v->arch.hvm_vmx.efer;
194 break;
196 case MSR_FS_BASE:
197 msr_content = __vmread(GUEST_FS_BASE);
198 goto check_long_mode;
200 case MSR_GS_BASE:
201 msr_content = __vmread(GUEST_GS_BASE);
202 goto check_long_mode;
204 case MSR_SHADOW_GS_BASE:
205 msr_content = v->arch.hvm_vmx.shadow_gs;
206 check_long_mode:
207 if ( !(vmx_long_mode_enabled(v)) )
208 {
209 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
210 return HNDL_exception_raised;
211 }
212 break;
214 case MSR_STAR:
215 msr_content = guest_msr_state->msrs[VMX_INDEX_MSR_STAR];
216 break;
218 case MSR_LSTAR:
219 msr_content = guest_msr_state->msrs[VMX_INDEX_MSR_LSTAR];
220 break;
222 case MSR_CSTAR:
223 msr_content = v->arch.hvm_vmx.cstar;
224 break;
226 case MSR_SYSCALL_MASK:
227 msr_content = guest_msr_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
228 break;
230 default:
231 return HNDL_unhandled;
232 }
234 HVM_DBG_LOG(DBG_LEVEL_0, "msr 0x%x content 0x%"PRIx64, ecx, msr_content);
236 regs->eax = (u32)(msr_content >> 0);
237 regs->edx = (u32)(msr_content >> 32);
239 return HNDL_done;
240 }
242 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
243 {
244 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
245 u32 ecx = regs->ecx;
246 struct vcpu *v = current;
247 struct vmx_msr_state *guest_msr_state = &v->arch.hvm_vmx.msr_state;
248 struct vmx_msr_state *host_msr_state = &this_cpu(host_msr_state);
250 HVM_DBG_LOG(DBG_LEVEL_0, "msr 0x%x content 0x%"PRIx64, ecx, msr_content);
252 switch ( ecx )
253 {
254 case MSR_EFER:
255 /* offending reserved bit will cause #GP */
256 if ( (msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE)) ||
257 (!cpu_has_nx && (msr_content & EFER_NX)) ||
258 (!cpu_has_syscall && (msr_content & EFER_SCE)) )
259 {
260 gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
261 "EFER: %"PRIx64"\n", msr_content);
262 goto gp_fault;
263 }
265 if ( (msr_content & EFER_LME)
266 && !(v->arch.hvm_vmx.efer & EFER_LME) )
267 {
268 if ( unlikely(vmx_paging_enabled(v)) )
269 {
270 gdprintk(XENLOG_WARNING,
271 "Trying to set EFER.LME with paging enabled\n");
272 goto gp_fault;
273 }
274 }
275 else if ( !(msr_content & EFER_LME)
276 && (v->arch.hvm_vmx.efer & EFER_LME) )
277 {
278 if ( unlikely(vmx_paging_enabled(v)) )
279 {
280 gdprintk(XENLOG_WARNING,
281 "Trying to clear EFER.LME with paging enabled\n");
282 goto gp_fault;
283 }
284 }
286 if ( (msr_content ^ v->arch.hvm_vmx.efer) & (EFER_NX|EFER_SCE) )
287 write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |
288 (msr_content & (EFER_NX|EFER_SCE)));
290 v->arch.hvm_vmx.efer = msr_content;
291 break;
293 case MSR_FS_BASE:
294 case MSR_GS_BASE:
295 case MSR_SHADOW_GS_BASE:
296 if ( !vmx_long_mode_enabled(v) )
297 goto gp_fault;
299 if ( !is_canonical_address(msr_content) )
300 goto uncanonical_address;
302 if ( ecx == MSR_FS_BASE )
303 __vmwrite(GUEST_FS_BASE, msr_content);
304 else if ( ecx == MSR_GS_BASE )
305 __vmwrite(GUEST_GS_BASE, msr_content);
306 else
307 {
308 v->arch.hvm_vmx.shadow_gs = msr_content;
309 wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
310 }
312 break;
314 case MSR_STAR:
315 WRITE_MSR(STAR);
317 case MSR_LSTAR:
318 if ( !is_canonical_address(msr_content) )
319 goto uncanonical_address;
320 WRITE_MSR(LSTAR);
322 case MSR_CSTAR:
323 if ( !is_canonical_address(msr_content) )
324 goto uncanonical_address;
325 v->arch.hvm_vmx.cstar = msr_content;
326 break;
328 case MSR_SYSCALL_MASK:
329 WRITE_MSR(SYSCALL_MASK);
331 default:
332 return HNDL_unhandled;
333 }
335 return HNDL_done;
337 uncanonical_address:
338 HVM_DBG_LOG(DBG_LEVEL_0, "Not cano address of msr write %x", ecx);
339 gp_fault:
340 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
341 return HNDL_exception_raised;
342 }
344 /*
345 * To avoid MSR save/restore at every VM exit/entry time, we restore
346 * the x86_64 specific MSRs at domain switch time. Since these MSRs
347 * are not modified once set for para domains, we don't save them,
348 * but simply reset them to values set in percpu_traps_init().
349 */
350 static void vmx_restore_host_msrs(void)
351 {
352 struct vmx_msr_state *host_msr_state = &this_cpu(host_msr_state);
353 int i;
355 while ( host_msr_state->flags )
356 {
357 i = find_first_set_bit(host_msr_state->flags);
358 wrmsrl(msr_index[i], host_msr_state->msrs[i]);
359 clear_bit(i, &host_msr_state->flags);
360 }
362 if ( cpu_has_nx && !(read_efer() & EFER_NX) )
363 write_efer(read_efer() | EFER_NX);
364 }
366 static void vmx_save_guest_msrs(struct vcpu *v)
367 {
368 /* MSR_SHADOW_GS_BASE may have been changed by swapgs instruction. */
369 rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs);
370 }
372 static void vmx_restore_guest_msrs(struct vcpu *v)
373 {
374 struct vmx_msr_state *guest_msr_state, *host_msr_state;
375 unsigned long guest_flags;
376 int i;
378 guest_msr_state = &v->arch.hvm_vmx.msr_state;
379 host_msr_state = &this_cpu(host_msr_state);
381 wrmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs);
383 guest_flags = guest_msr_state->flags;
385 while ( guest_flags )
386 {
387 i = find_first_set_bit(guest_flags);
389 HVM_DBG_LOG(DBG_LEVEL_2,
390 "restore guest's index %d msr %x with value %lx",
391 i, msr_index[i], guest_msr_state->msrs[i]);
392 set_bit(i, &host_msr_state->flags);
393 wrmsrl(msr_index[i], guest_msr_state->msrs[i]);
394 clear_bit(i, &guest_flags);
395 }
397 if ( (v->arch.hvm_vmx.efer ^ read_efer()) & (EFER_NX | EFER_SCE) )
398 {
399 HVM_DBG_LOG(DBG_LEVEL_2,
400 "restore guest's EFER with value %lx",
401 v->arch.hvm_vmx.efer);
402 write_efer((read_efer() & ~(EFER_NX | EFER_SCE)) |
403 (v->arch.hvm_vmx.efer & (EFER_NX | EFER_SCE)));
404 }
405 }
407 #else /* __i386__ */
409 static int vmx_lme_is_set(struct vcpu *v)
410 { return 0; }
411 static int vmx_long_mode_enabled(struct vcpu *v)
412 { return 0; }
413 static void vmx_enable_long_mode(struct vcpu *v)
414 { BUG(); }
415 static void vmx_disable_long_mode(struct vcpu *v)
416 { BUG(); }
418 #define vmx_save_host_msrs() ((void)0)
420 static void vmx_restore_host_msrs(void)
421 {
422 if ( cpu_has_nx && !(read_efer() & EFER_NX) )
423 write_efer(read_efer() | EFER_NX);
424 }
426 #define vmx_save_guest_msrs(v) ((void)0)
428 static void vmx_restore_guest_msrs(struct vcpu *v)
429 {
430 if ( (v->arch.hvm_vmx.efer ^ read_efer()) & EFER_NX )
431 {
432 HVM_DBG_LOG(DBG_LEVEL_2,
433 "restore guest's EFER with value %lx",
434 v->arch.hvm_vmx.efer);
435 write_efer((read_efer() & ~EFER_NX) |
436 (v->arch.hvm_vmx.efer & EFER_NX));
437 }
438 }
440 static enum handler_return long_mode_do_msr_read(struct cpu_user_regs *regs)
441 {
442 u64 msr_content = 0;
443 struct vcpu *v = current;
445 switch ( regs->ecx ) {
446 case MSR_EFER:
447 msr_content = v->arch.hvm_vmx.efer;
448 break;
450 default:
451 return HNDL_unhandled;
452 }
454 regs->eax = msr_content >> 0;
455 regs->edx = msr_content >> 32;
457 return HNDL_done;
458 }
460 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
461 {
462 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
463 struct vcpu *v = current;
465 switch ( regs->ecx )
466 {
467 case MSR_EFER:
468 /* offending reserved bit will cause #GP */
469 if ( (msr_content & ~EFER_NX) ||
470 (!cpu_has_nx && (msr_content & EFER_NX)) )
471 {
472 gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
473 "EFER: %"PRIx64"\n", msr_content);
474 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
475 return HNDL_exception_raised;
476 }
478 if ( (msr_content ^ v->arch.hvm_vmx.efer) & EFER_NX )
479 write_efer((read_efer() & ~EFER_NX) | (msr_content & EFER_NX));
481 v->arch.hvm_vmx.efer = msr_content;
482 break;
484 default:
485 return HNDL_unhandled;
486 }
488 return HNDL_done;
489 }
491 #endif /* __i386__ */
493 #define loaddebug(_v,_reg) \
494 __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
495 #define savedebug(_v,_reg) \
496 __asm__ __volatile__ ("mov %%db" #_reg ",%0" : : "r" ((_v)->debugreg[_reg]))
498 static int vmx_guest_x86_mode(struct vcpu *v)
499 {
500 unsigned int cs_ar_bytes;
502 ASSERT(v == current);
504 if ( unlikely(!(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_PE)) )
505 return 0;
506 if ( unlikely(__vmread(GUEST_RFLAGS) & X86_EFLAGS_VM) )
507 return 1;
508 cs_ar_bytes = __vmread(GUEST_CS_AR_BYTES);
509 if ( vmx_long_mode_enabled(v) &&
510 likely(cs_ar_bytes & X86_SEG_AR_CS_LM_ACTIVE) )
511 return 8;
512 return (likely(cs_ar_bytes & X86_SEG_AR_DEF_OP_SIZE) ? 4 : 2);
513 }
515 static void vmx_save_dr(struct vcpu *v)
516 {
517 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
518 return;
520 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
521 v->arch.hvm_vcpu.flag_dr_dirty = 0;
522 v->arch.hvm_vmx.exec_control |= CPU_BASED_MOV_DR_EXITING;
523 __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
525 savedebug(&v->arch.guest_context, 0);
526 savedebug(&v->arch.guest_context, 1);
527 savedebug(&v->arch.guest_context, 2);
528 savedebug(&v->arch.guest_context, 3);
529 savedebug(&v->arch.guest_context, 6);
530 v->arch.guest_context.debugreg[7] = __vmread(GUEST_DR7);
531 }
533 static void __restore_debug_registers(struct vcpu *v)
534 {
535 loaddebug(&v->arch.guest_context, 0);
536 loaddebug(&v->arch.guest_context, 1);
537 loaddebug(&v->arch.guest_context, 2);
538 loaddebug(&v->arch.guest_context, 3);
539 /* No 4 and 5 */
540 loaddebug(&v->arch.guest_context, 6);
541 /* DR7 is loaded from the VMCS. */
542 }
544 void vmx_vmcs_save(struct vcpu *v, struct hvm_hw_cpu *c)
545 {
546 uint32_t ev;
548 vmx_vmcs_enter(v);
550 c->rip = __vmread(GUEST_RIP);
551 c->rsp = __vmread(GUEST_RSP);
552 c->rflags = __vmread(GUEST_RFLAGS);
554 c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
555 c->cr2 = v->arch.hvm_vmx.cpu_cr2;
556 c->cr3 = v->arch.hvm_vmx.cpu_cr3;
557 c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
559 c->msr_efer = v->arch.hvm_vmx.efer;
561 #ifdef HVM_DEBUG_SUSPEND
562 printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
563 __func__, c->cr3, c->cr0, c->cr4);
564 #endif
566 c->idtr_limit = __vmread(GUEST_IDTR_LIMIT);
567 c->idtr_base = __vmread(GUEST_IDTR_BASE);
569 c->gdtr_limit = __vmread(GUEST_GDTR_LIMIT);
570 c->gdtr_base = __vmread(GUEST_GDTR_BASE);
572 c->cs_sel = __vmread(GUEST_CS_SELECTOR);
573 c->cs_limit = __vmread(GUEST_CS_LIMIT);
574 c->cs_base = __vmread(GUEST_CS_BASE);
575 c->cs_arbytes = __vmread(GUEST_CS_AR_BYTES);
577 c->ds_sel = __vmread(GUEST_DS_SELECTOR);
578 c->ds_limit = __vmread(GUEST_DS_LIMIT);
579 c->ds_base = __vmread(GUEST_DS_BASE);
580 c->ds_arbytes = __vmread(GUEST_DS_AR_BYTES);
582 c->es_sel = __vmread(GUEST_ES_SELECTOR);
583 c->es_limit = __vmread(GUEST_ES_LIMIT);
584 c->es_base = __vmread(GUEST_ES_BASE);
585 c->es_arbytes = __vmread(GUEST_ES_AR_BYTES);
587 c->ss_sel = __vmread(GUEST_SS_SELECTOR);
588 c->ss_limit = __vmread(GUEST_SS_LIMIT);
589 c->ss_base = __vmread(GUEST_SS_BASE);
590 c->ss_arbytes = __vmread(GUEST_SS_AR_BYTES);
592 c->fs_sel = __vmread(GUEST_FS_SELECTOR);
593 c->fs_limit = __vmread(GUEST_FS_LIMIT);
594 c->fs_base = __vmread(GUEST_FS_BASE);
595 c->fs_arbytes = __vmread(GUEST_FS_AR_BYTES);
597 c->gs_sel = __vmread(GUEST_GS_SELECTOR);
598 c->gs_limit = __vmread(GUEST_GS_LIMIT);
599 c->gs_base = __vmread(GUEST_GS_BASE);
600 c->gs_arbytes = __vmread(GUEST_GS_AR_BYTES);
602 c->tr_sel = __vmread(GUEST_TR_SELECTOR);
603 c->tr_limit = __vmread(GUEST_TR_LIMIT);
604 c->tr_base = __vmread(GUEST_TR_BASE);
605 c->tr_arbytes = __vmread(GUEST_TR_AR_BYTES);
607 c->ldtr_sel = __vmread(GUEST_LDTR_SELECTOR);
608 c->ldtr_limit = __vmread(GUEST_LDTR_LIMIT);
609 c->ldtr_base = __vmread(GUEST_LDTR_BASE);
610 c->ldtr_arbytes = __vmread(GUEST_LDTR_AR_BYTES);
612 c->sysenter_cs = __vmread(GUEST_SYSENTER_CS);
613 c->sysenter_esp = __vmread(GUEST_SYSENTER_ESP);
614 c->sysenter_eip = __vmread(GUEST_SYSENTER_EIP);
616 c->pending_event = 0;
617 c->error_code = 0;
618 if ( ((ev = __vmread(VM_ENTRY_INTR_INFO)) & INTR_INFO_VALID_MASK) &&
619 hvm_event_needs_reinjection((ev >> 8) & 7, ev & 0xff) )
620 {
621 c->pending_event = ev;
622 c->error_code = __vmread(VM_ENTRY_EXCEPTION_ERROR_CODE);
623 }
625 vmx_vmcs_exit(v);
626 }
628 int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
629 {
630 unsigned long mfn, old_base_mfn;
632 vmx_vmcs_enter(v);
634 __vmwrite(GUEST_RIP, c->rip);
635 __vmwrite(GUEST_RSP, c->rsp);
636 __vmwrite(GUEST_RFLAGS, c->rflags);
638 v->arch.hvm_vmx.cpu_cr0 = (c->cr0 | X86_CR0_PE | X86_CR0_PG |
639 X86_CR0_NE | X86_CR0_WP | X86_CR0_ET);
640 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
641 v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
642 __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
644 v->arch.hvm_vmx.cpu_cr2 = c->cr2;
646 v->arch.hvm_vmx.efer = c->msr_efer;
648 #ifdef HVM_DEBUG_SUSPEND
649 printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
650 __func__, c->cr3, c->cr0, c->cr4);
651 #endif
653 if ( !vmx_paging_enabled(v) )
654 {
655 HVM_DBG_LOG(DBG_LEVEL_VMMU, "%s: paging not enabled.", __func__);
656 goto skip_cr3;
657 }
659 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 = %"PRIx64, c->cr3);
660 /* current!=vcpu as not called by arch_vmx_do_launch */
661 mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
662 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
663 {
664 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64".\n", c->cr3);
665 vmx_vmcs_exit(v);
666 return -EINVAL;
667 }
669 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
670 v->arch.guest_table = pagetable_from_pfn(mfn);
671 if ( old_base_mfn )
672 put_page(mfn_to_page(old_base_mfn));
674 skip_cr3:
675 v->arch.hvm_vmx.cpu_cr3 = c->cr3;
677 if ( vmx_long_mode_enabled(v) )
678 vmx_enable_long_mode(v);
680 __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
681 v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
682 __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
684 __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
685 __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
687 __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
688 __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
690 __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
691 __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
692 __vmwrite(GUEST_CS_BASE, c->cs_base);
693 __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes);
695 __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
696 __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
697 __vmwrite(GUEST_DS_BASE, c->ds_base);
698 __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes);
700 __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
701 __vmwrite(GUEST_ES_LIMIT, c->es_limit);
702 __vmwrite(GUEST_ES_BASE, c->es_base);
703 __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes);
705 __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
706 __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
707 __vmwrite(GUEST_SS_BASE, c->ss_base);
708 __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes);
710 __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
711 __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
712 __vmwrite(GUEST_FS_BASE, c->fs_base);
713 __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes);
715 __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
716 __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
717 __vmwrite(GUEST_GS_BASE, c->gs_base);
718 __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes);
720 __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
721 __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
722 __vmwrite(GUEST_TR_BASE, c->tr_base);
723 __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes);
725 __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
726 __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
727 __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
728 __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes);
730 __vmwrite(GUEST_SYSENTER_CS, c->sysenter_cs);
731 __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp);
732 __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip);
734 __vmwrite(GUEST_DR7, c->dr7);
736 vmx_vmcs_exit(v);
738 paging_update_paging_modes(v);
740 if ( c->pending_valid )
741 {
742 gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
743 c->pending_event, c->error_code);
745 if ( (c->pending_type == 1) || (c->pending_type > 6) ||
746 (c->pending_reserved != 0) )
747 {
748 gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n",
749 c->pending_event);
750 return -EINVAL;
751 }
753 if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
754 {
755 vmx_vmcs_enter(v);
756 __vmwrite(VM_ENTRY_INTR_INFO, c->pending_event);
757 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, c->error_code);
758 vmx_vmcs_exit(v);
759 }
760 }
762 return 0;
763 }
765 #if defined(__x86_64__) && defined(HVM_DEBUG_SUSPEND)
766 static void dump_msr_state(struct vmx_msr_state *m)
767 {
768 int i = 0;
769 printk("**** msr state ****\n");
770 printk("shadow_gs=0x%lx, flags=0x%lx, msr_items:", m->shadow_gs, m->flags);
771 for ( i = 0; i < VMX_MSR_COUNT; i++ )
772 printk("0x%lx,", m->msrs[i]);
773 printk("\n");
774 }
775 #else
776 #define dump_msr_state(m) ((void)0)
777 #endif
779 static void vmx_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
780 {
781 #ifdef __x86_64__
782 struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
783 unsigned long guest_flags = guest_state->flags;
785 data->shadow_gs = v->arch.hvm_vmx.shadow_gs;
786 data->msr_cstar = v->arch.hvm_vmx.cstar;
788 /* save msrs */
789 data->msr_flags = guest_flags;
790 data->msr_lstar = guest_state->msrs[VMX_INDEX_MSR_LSTAR];
791 data->msr_star = guest_state->msrs[VMX_INDEX_MSR_STAR];
792 data->msr_syscall_mask = guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
793 #endif
795 data->tsc = hvm_get_guest_time(v);
797 dump_msr_state(guest_state);
798 }
800 static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
801 {
802 #ifdef __x86_64__
803 struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
805 /* restore msrs */
806 guest_state->flags = data->msr_flags;
807 guest_state->msrs[VMX_INDEX_MSR_LSTAR] = data->msr_lstar;
808 guest_state->msrs[VMX_INDEX_MSR_STAR] = data->msr_star;
809 guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK] = data->msr_syscall_mask;
811 v->arch.hvm_vmx.cstar = data->msr_cstar;
812 v->arch.hvm_vmx.shadow_gs = data->shadow_gs;
813 #endif
815 v->arch.hvm_vmx.vmxassist_enabled = !(data->cr0 & X86_CR0_PE);
817 hvm_set_guest_time(v, data->tsc);
819 dump_msr_state(guest_state);
820 }
823 static void vmx_save_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
824 {
825 vmx_save_cpu_state(v, ctxt);
826 vmx_vmcs_save(v, ctxt);
827 }
829 static int vmx_load_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
830 {
831 vmx_load_cpu_state(v, ctxt);
833 if ( vmx_vmcs_restore(v, ctxt) )
834 {
835 gdprintk(XENLOG_ERR, "vmx_vmcs restore failed!\n");
836 domain_crash(v->domain);
837 return -EINVAL;
838 }
840 return 0;
841 }
843 /*
844 * DR7 is saved and restored on every vmexit. Other debug registers only
845 * need to be restored if their value is going to affect execution -- i.e.,
846 * if one of the breakpoints is enabled. So mask out all bits that don't
847 * enable some breakpoint functionality.
848 */
849 #define DR7_ACTIVE_MASK 0xff
851 static void vmx_restore_dr(struct vcpu *v)
852 {
853 /* NB. __vmread() is not usable here, so we cannot read from the VMCS. */
854 if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
855 __restore_debug_registers(v);
856 }
858 static void vmx_ctxt_switch_from(struct vcpu *v)
859 {
860 vmx_save_guest_msrs(v);
861 vmx_restore_host_msrs();
862 vmx_save_dr(v);
863 }
865 static void vmx_ctxt_switch_to(struct vcpu *v)
866 {
867 vmx_restore_guest_msrs(v);
868 vmx_restore_dr(v);
869 }
871 static void vmx_store_cpu_guest_regs(
872 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
873 {
874 vmx_vmcs_enter(v);
876 if ( regs != NULL )
877 {
878 regs->eflags = __vmread(GUEST_RFLAGS);
879 regs->ss = __vmread(GUEST_SS_SELECTOR);
880 regs->cs = __vmread(GUEST_CS_SELECTOR);
881 regs->eip = __vmread(GUEST_RIP);
882 regs->esp = __vmread(GUEST_RSP);
883 }
885 if ( crs != NULL )
886 {
887 crs[0] = v->arch.hvm_vmx.cpu_shadow_cr0;
888 crs[2] = v->arch.hvm_vmx.cpu_cr2;
889 crs[3] = v->arch.hvm_vmx.cpu_cr3;
890 crs[4] = v->arch.hvm_vmx.cpu_shadow_cr4;
891 }
893 vmx_vmcs_exit(v);
894 }
896 static void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
897 {
898 unsigned long base;
900 vmx_vmcs_enter(v);
902 __vmwrite(GUEST_SS_SELECTOR, regs->ss);
903 __vmwrite(GUEST_RSP, regs->esp);
905 /* NB. Bit 1 of RFLAGS must be set for VMENTRY to succeed. */
906 __vmwrite(GUEST_RFLAGS, regs->eflags | 2UL);
908 if ( regs->eflags & EF_VM )
909 {
910 /*
911 * The VMX spec (section 4.3.1.2, Checks on Guest Segment
912 * Registers) says that virtual-8086 mode guests' segment
913 * base-address fields in the VMCS must be equal to their
914 * corresponding segment selector field shifted right by
915 * four bits upon vmentry.
916 */
917 base = __vmread(GUEST_CS_BASE);
918 if ( (regs->cs << 4) != base )
919 __vmwrite(GUEST_CS_BASE, regs->cs << 4);
920 base = __vmread(GUEST_SS_BASE);
921 if ( (regs->ss << 4) != base )
922 __vmwrite(GUEST_SS_BASE, regs->ss << 4);
923 }
925 __vmwrite(GUEST_CS_SELECTOR, regs->cs);
926 __vmwrite(GUEST_RIP, regs->eip);
928 vmx_vmcs_exit(v);
929 }
931 static unsigned long vmx_get_ctrl_reg(struct vcpu *v, unsigned int num)
932 {
933 switch ( num )
934 {
935 case 0:
936 return v->arch.hvm_vmx.cpu_cr0;
937 case 2:
938 return v->arch.hvm_vmx.cpu_cr2;
939 case 3:
940 return v->arch.hvm_vmx.cpu_cr3;
941 case 4:
942 return v->arch.hvm_vmx.cpu_shadow_cr4;
943 default:
944 BUG();
945 }
946 return 0; /* dummy */
947 }
949 static unsigned long vmx_get_segment_base(struct vcpu *v, enum x86_segment seg)
950 {
951 unsigned long base = 0;
952 int long_mode = 0;
954 ASSERT(v == current);
956 if ( vmx_long_mode_enabled(v) &&
957 (__vmread(GUEST_CS_AR_BYTES) & X86_SEG_AR_CS_LM_ACTIVE) )
958 long_mode = 1;
960 switch ( seg )
961 {
962 case x86_seg_cs: if ( !long_mode ) base = __vmread(GUEST_CS_BASE); break;
963 case x86_seg_ds: if ( !long_mode ) base = __vmread(GUEST_DS_BASE); break;
964 case x86_seg_es: if ( !long_mode ) base = __vmread(GUEST_ES_BASE); break;
965 case x86_seg_fs: base = __vmread(GUEST_FS_BASE); break;
966 case x86_seg_gs: base = __vmread(GUEST_GS_BASE); break;
967 case x86_seg_ss: if ( !long_mode ) base = __vmread(GUEST_SS_BASE); break;
968 case x86_seg_tr: base = __vmread(GUEST_TR_BASE); break;
969 case x86_seg_gdtr: base = __vmread(GUEST_GDTR_BASE); break;
970 case x86_seg_idtr: base = __vmread(GUEST_IDTR_BASE); break;
971 case x86_seg_ldtr: base = __vmread(GUEST_LDTR_BASE); break;
972 default: BUG(); break;
973 }
975 return base;
976 }
978 static void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg,
979 struct segment_register *reg)
980 {
981 u16 attr = 0;
983 ASSERT(v == current);
985 switch ( seg )
986 {
987 case x86_seg_cs:
988 reg->sel = __vmread(GUEST_CS_SELECTOR);
989 reg->limit = __vmread(GUEST_CS_LIMIT);
990 reg->base = __vmread(GUEST_CS_BASE);
991 attr = __vmread(GUEST_CS_AR_BYTES);
992 break;
993 case x86_seg_ds:
994 reg->sel = __vmread(GUEST_DS_SELECTOR);
995 reg->limit = __vmread(GUEST_DS_LIMIT);
996 reg->base = __vmread(GUEST_DS_BASE);
997 attr = __vmread(GUEST_DS_AR_BYTES);
998 break;
999 case x86_seg_es:
1000 reg->sel = __vmread(GUEST_ES_SELECTOR);
1001 reg->limit = __vmread(GUEST_ES_LIMIT);
1002 reg->base = __vmread(GUEST_ES_BASE);
1003 attr = __vmread(GUEST_ES_AR_BYTES);
1004 break;
1005 case x86_seg_fs:
1006 reg->sel = __vmread(GUEST_FS_SELECTOR);
1007 reg->limit = __vmread(GUEST_FS_LIMIT);
1008 reg->base = __vmread(GUEST_FS_BASE);
1009 attr = __vmread(GUEST_FS_AR_BYTES);
1010 break;
1011 case x86_seg_gs:
1012 reg->sel = __vmread(GUEST_GS_SELECTOR);
1013 reg->limit = __vmread(GUEST_GS_LIMIT);
1014 reg->base = __vmread(GUEST_GS_BASE);
1015 attr = __vmread(GUEST_GS_AR_BYTES);
1016 break;
1017 case x86_seg_ss:
1018 reg->sel = __vmread(GUEST_SS_SELECTOR);
1019 reg->limit = __vmread(GUEST_SS_LIMIT);
1020 reg->base = __vmread(GUEST_SS_BASE);
1021 attr = __vmread(GUEST_SS_AR_BYTES);
1022 break;
1023 case x86_seg_tr:
1024 reg->sel = __vmread(GUEST_TR_SELECTOR);
1025 reg->limit = __vmread(GUEST_TR_LIMIT);
1026 reg->base = __vmread(GUEST_TR_BASE);
1027 attr = __vmread(GUEST_TR_AR_BYTES);
1028 break;
1029 case x86_seg_gdtr:
1030 reg->limit = __vmread(GUEST_GDTR_LIMIT);
1031 reg->base = __vmread(GUEST_GDTR_BASE);
1032 break;
1033 case x86_seg_idtr:
1034 reg->limit = __vmread(GUEST_IDTR_LIMIT);
1035 reg->base = __vmread(GUEST_IDTR_BASE);
1036 break;
1037 case x86_seg_ldtr:
1038 reg->sel = __vmread(GUEST_LDTR_SELECTOR);
1039 reg->limit = __vmread(GUEST_LDTR_LIMIT);
1040 reg->base = __vmread(GUEST_LDTR_BASE);
1041 attr = __vmread(GUEST_LDTR_AR_BYTES);
1042 break;
1043 default:
1044 BUG();
1047 reg->attr.bytes = (attr & 0xff) | ((attr >> 4) & 0xf00);
1050 /* Make sure that xen intercepts any FP accesses from current */
1051 static void vmx_stts(struct vcpu *v)
1053 /* VMX depends on operating on the current vcpu */
1054 ASSERT(v == current);
1056 /*
1057 * If the guest does not have TS enabled then we must cause and handle an
1058 * exception on first use of the FPU. If the guest *does* have TS enabled
1059 * then this is not necessary: no FPU activity can occur until the guest
1060 * clears CR0.TS, and we will initialise the FPU when that happens.
1061 */
1062 if ( !(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_TS) )
1064 v->arch.hvm_vmx.cpu_cr0 |= X86_CR0_TS;
1065 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
1066 __vm_set_bit(EXCEPTION_BITMAP, TRAP_no_device);
1070 static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
1072 vmx_vmcs_enter(v);
1073 __vmwrite(TSC_OFFSET, offset);
1074 #if defined (__i386__)
1075 __vmwrite(TSC_OFFSET_HIGH, offset >> 32);
1076 #endif
1077 vmx_vmcs_exit(v);
1080 static void vmx_init_ap_context(
1081 struct vcpu_guest_context *ctxt, int vcpuid, int trampoline_vector)
1083 memset(ctxt, 0, sizeof(*ctxt));
1084 ctxt->user_regs.eip = VMXASSIST_BASE;
1085 ctxt->user_regs.edx = vcpuid;
1086 ctxt->user_regs.ebx = trampoline_vector;
1089 void do_nmi(struct cpu_user_regs *);
1091 static void vmx_init_hypercall_page(struct domain *d, void *hypercall_page)
1093 char *p;
1094 int i;
1096 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
1098 p = (char *)(hypercall_page + (i * 32));
1099 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
1100 *(u32 *)(p + 1) = i;
1101 *(u8 *)(p + 5) = 0x0f; /* vmcall */
1102 *(u8 *)(p + 6) = 0x01;
1103 *(u8 *)(p + 7) = 0xc1;
1104 *(u8 *)(p + 8) = 0xc3; /* ret */
1107 /* Don't support HYPERVISOR_iret at the moment */
1108 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
1111 static int vmx_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
1113 unsigned long intr_shadow, eflags;
1115 ASSERT(v == current);
1117 intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
1119 if ( type == hvm_intack_nmi )
1120 return !(intr_shadow & (VMX_INTR_SHADOW_STI|
1121 VMX_INTR_SHADOW_MOV_SS|
1122 VMX_INTR_SHADOW_NMI));
1124 ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
1125 eflags = __vmread(GUEST_RFLAGS);
1126 return (!irq_masked(eflags) &&
1127 !(intr_shadow & (VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS)));
1130 static void vmx_update_host_cr3(struct vcpu *v)
1132 ASSERT((v == current) || !vcpu_runnable(v));
1133 vmx_vmcs_enter(v);
1134 __vmwrite(HOST_CR3, v->arch.cr3);
1135 vmx_vmcs_exit(v);
1138 static void vmx_update_guest_cr3(struct vcpu *v)
1140 ASSERT((v == current) || !vcpu_runnable(v));
1141 vmx_vmcs_enter(v);
1142 __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
1143 vmx_vmcs_exit(v);
1146 static void vmx_flush_guest_tlbs(void)
1148 /* No tagged TLB support on VMX yet. The fact that we're in Xen
1149 * at all means any guest will have a clean TLB when it's next run,
1150 * because VMRESUME will flush it for us. */
1153 static void vmx_inject_exception(
1154 unsigned int trapnr, int errcode, unsigned long cr2)
1156 struct vcpu *v = current;
1157 vmx_inject_hw_exception(v, trapnr, errcode);
1158 if ( trapnr == TRAP_page_fault )
1159 v->arch.hvm_vmx.cpu_cr2 = cr2;
1162 static void vmx_update_vtpr(struct vcpu *v, unsigned long value)
1164 /* VMX doesn't have a V_TPR field */
1167 static int vmx_event_pending(struct vcpu *v)
1169 ASSERT(v == current);
1170 return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);
1173 static void disable_intercept_for_msr(u32 msr)
1175 /*
1176 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
1177 * have the write-low and read-high bitmap offsets the wrong way round.
1178 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
1179 */
1180 if ( msr <= 0x1fff )
1182 __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */
1183 __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */
1185 else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
1187 msr &= 0x1fff;
1188 __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */
1189 __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */
1193 static struct hvm_function_table vmx_function_table = {
1194 .name = "VMX",
1195 .domain_initialise = vmx_domain_initialise,
1196 .domain_destroy = vmx_domain_destroy,
1197 .vcpu_initialise = vmx_vcpu_initialise,
1198 .vcpu_destroy = vmx_vcpu_destroy,
1199 .store_cpu_guest_regs = vmx_store_cpu_guest_regs,
1200 .load_cpu_guest_regs = vmx_load_cpu_guest_regs,
1201 .save_cpu_ctxt = vmx_save_vmcs_ctxt,
1202 .load_cpu_ctxt = vmx_load_vmcs_ctxt,
1203 .paging_enabled = vmx_paging_enabled,
1204 .long_mode_enabled = vmx_long_mode_enabled,
1205 .pae_enabled = vmx_pae_enabled,
1206 .nx_enabled = vmx_nx_enabled,
1207 .interrupts_enabled = vmx_interrupts_enabled,
1208 .guest_x86_mode = vmx_guest_x86_mode,
1209 .get_guest_ctrl_reg = vmx_get_ctrl_reg,
1210 .get_segment_base = vmx_get_segment_base,
1211 .get_segment_register = vmx_get_segment_register,
1212 .update_host_cr3 = vmx_update_host_cr3,
1213 .update_guest_cr3 = vmx_update_guest_cr3,
1214 .flush_guest_tlbs = vmx_flush_guest_tlbs,
1215 .update_vtpr = vmx_update_vtpr,
1216 .stts = vmx_stts,
1217 .set_tsc_offset = vmx_set_tsc_offset,
1218 .inject_exception = vmx_inject_exception,
1219 .init_ap_context = vmx_init_ap_context,
1220 .init_hypercall_page = vmx_init_hypercall_page,
1221 .event_pending = vmx_event_pending,
1222 .cpu_up = vmx_cpu_up,
1223 .cpu_down = vmx_cpu_down,
1224 };
1226 void start_vmx(void)
1228 static int bootstrapped;
1230 vmx_save_host_msrs();
1232 if ( bootstrapped )
1234 if ( hvm_enabled && !vmx_cpu_up() )
1236 printk("VMX: FATAL: failed to initialise CPU%d!\n",
1237 smp_processor_id());
1238 BUG();
1240 return;
1243 bootstrapped = 1;
1245 /* Xen does not fill x86_capability words except 0. */
1246 boot_cpu_data.x86_capability[4] = cpuid_ecx(1);
1248 if ( !test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability) )
1249 return;
1251 set_in_cr4(X86_CR4_VMXE);
1253 if ( !vmx_cpu_up() )
1255 printk("VMX: failed to initialise.\n");
1256 return;
1259 setup_vmcs_dump();
1261 hvm_enable(&vmx_function_table);
1263 if ( cpu_has_vmx_msr_bitmap )
1265 printk("VMX: MSR intercept bitmap enabled\n");
1266 vmx_msr_bitmap = alloc_xenheap_page();
1267 BUG_ON(vmx_msr_bitmap == NULL);
1268 memset(vmx_msr_bitmap, ~0, PAGE_SIZE);
1270 disable_intercept_for_msr(MSR_FS_BASE);
1271 disable_intercept_for_msr(MSR_GS_BASE);
1273 disable_intercept_for_msr(MSR_IA32_SYSENTER_CS);
1274 disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP);
1275 disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP);
1279 /*
1280 * Not all cases receive valid value in the VM-exit instruction length field.
1281 * Callers must know what they're doing!
1282 */
1283 static int __get_instruction_length(void)
1285 int len;
1286 len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe: callers audited */
1287 BUG_ON((len < 1) || (len > 15));
1288 return len;
1291 static void __update_guest_eip(unsigned long inst_len)
1293 unsigned long x;
1295 x = __vmread(GUEST_RIP);
1296 __vmwrite(GUEST_RIP, x + inst_len);
1298 x = __vmread(GUEST_RFLAGS);
1299 if ( x & X86_EFLAGS_RF )
1300 __vmwrite(GUEST_RFLAGS, x & ~X86_EFLAGS_RF);
1302 x = __vmread(GUEST_INTERRUPTIBILITY_INFO);
1303 if ( x & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
1305 x &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
1306 __vmwrite(GUEST_INTERRUPTIBILITY_INFO, x);
1310 static void vmx_do_no_device_fault(void)
1312 struct vcpu *v = current;
1314 setup_fpu(current);
1315 __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
1317 /* Disable TS in guest CR0 unless the guest wants the exception too. */
1318 if ( !(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_TS) )
1320 v->arch.hvm_vmx.cpu_cr0 &= ~X86_CR0_TS;
1321 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
1325 #define bitmaskof(idx) (1U << ((idx) & 31))
1326 static void vmx_do_cpuid(struct cpu_user_regs *regs)
1328 unsigned int input = (unsigned int)regs->eax;
1329 unsigned int count = (unsigned int)regs->ecx;
1330 unsigned int eax, ebx, ecx, edx;
1332 if ( input == 0x00000004 )
1334 cpuid_count(input, count, &eax, &ebx, &ecx, &edx);
1335 eax &= NUM_CORES_RESET_MASK;
1337 else if ( input == 0x40000003 )
1339 /*
1340 * NB. Unsupported interface for private use of VMXASSIST only.
1341 * Note that this leaf lives at <max-hypervisor-leaf> + 1.
1342 */
1343 u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx;
1344 unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1345 struct vcpu *v = current;
1346 char *p;
1348 gdprintk(XENLOG_INFO, "Input address is 0x%"PRIx64".\n", value);
1350 /* 8-byte aligned valid pseudophys address from vmxassist, please. */
1351 if ( (value & 7) || (mfn == INVALID_MFN) ||
1352 !v->arch.hvm_vmx.vmxassist_enabled )
1354 domain_crash(v->domain);
1355 return;
1358 p = map_domain_page(mfn);
1359 value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1))));
1360 unmap_domain_page(p);
1362 gdprintk(XENLOG_INFO, "Output value is 0x%"PRIx64".\n", value);
1363 ecx = (u32)value;
1364 edx = (u32)(value >> 32);
1365 } else {
1366 hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
1368 if ( input == 0x00000001 )
1370 /* Mask off reserved bits. */
1371 ecx &= ~VMX_VCPU_CPUID_L1_ECX_RESERVED;
1373 ebx &= NUM_THREADS_RESET_MASK;
1375 /* Unsupportable for virtualised CPUs. */
1376 ecx &= ~(bitmaskof(X86_FEATURE_VMXE) |
1377 bitmaskof(X86_FEATURE_EST) |
1378 bitmaskof(X86_FEATURE_TM2) |
1379 bitmaskof(X86_FEATURE_CID));
1381 edx &= ~(bitmaskof(X86_FEATURE_HT) |
1382 bitmaskof(X86_FEATURE_ACPI) |
1383 bitmaskof(X86_FEATURE_ACC));
1386 if ( input == 0x00000006 || input == 0x00000009 || input == 0x0000000A )
1387 eax = ebx = ecx = edx = 0x0;
1390 regs->eax = (unsigned long)eax;
1391 regs->ebx = (unsigned long)ebx;
1392 regs->ecx = (unsigned long)ecx;
1393 regs->edx = (unsigned long)edx;
1395 HVMTRACE_3D(CPUID, current, input,
1396 ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
1399 #define CASE_GET_REG_P(REG, reg) \
1400 case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
1402 #ifdef __i386__
1403 #define CASE_EXTEND_GET_REG_P
1404 #else
1405 #define CASE_EXTEND_GET_REG_P \
1406 CASE_GET_REG_P(R8, r8); \
1407 CASE_GET_REG_P(R9, r9); \
1408 CASE_GET_REG_P(R10, r10); \
1409 CASE_GET_REG_P(R11, r11); \
1410 CASE_GET_REG_P(R12, r12); \
1411 CASE_GET_REG_P(R13, r13); \
1412 CASE_GET_REG_P(R14, r14); \
1413 CASE_GET_REG_P(R15, r15)
1414 #endif
1416 static void vmx_dr_access(unsigned long exit_qualification,
1417 struct cpu_user_regs *regs)
1419 struct vcpu *v = current;
1421 HVMTRACE_0D(DR_WRITE, v);
1423 v->arch.hvm_vcpu.flag_dr_dirty = 1;
1425 /* We could probably be smarter about this */
1426 __restore_debug_registers(v);
1428 /* Allow guest direct access to DR registers */
1429 v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MOV_DR_EXITING;
1430 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
1431 v->arch.hvm_vmx.exec_control);
1434 /*
1435 * Invalidate the TLB for va. Invalidate the shadow page corresponding
1436 * the address va.
1437 */
1438 static void vmx_do_invlpg(unsigned long va)
1440 unsigned long eip;
1441 struct vcpu *v = current;
1443 HVMTRACE_2D(INVLPG, v, /*invlpga=*/ 0, va);
1445 eip = __vmread(GUEST_RIP);
1447 HVM_DBG_LOG(DBG_LEVEL_VMMU, "eip=%lx, va=%lx",
1448 eip, va);
1450 /*
1451 * We do the safest things first, then try to update the shadow
1452 * copying from guest
1453 */
1454 paging_invlpg(v, va);
1457 /* Get segment for OUTS according to guest instruction. */
1458 static enum x86_segment vmx_outs_get_segment(
1459 int long_mode, unsigned long eip, int inst_len)
1461 unsigned char inst[MAX_INST_LEN];
1462 enum x86_segment seg = x86_seg_ds;
1463 int i;
1464 extern int inst_copy_from_guest(unsigned char *, unsigned long, int);
1466 if ( likely(cpu_has_vmx_ins_outs_instr_info) )
1468 unsigned int instr_info = __vmread(VMX_INSTRUCTION_INFO);
1470 /* Get segment register according to bits 17:15. */
1471 switch ( (instr_info >> 15) & 7 )
1473 case 0: seg = x86_seg_es; break;
1474 case 1: seg = x86_seg_cs; break;
1475 case 2: seg = x86_seg_ss; break;
1476 case 3: seg = x86_seg_ds; break;
1477 case 4: seg = x86_seg_fs; break;
1478 case 5: seg = x86_seg_gs; break;
1479 default: BUG();
1482 goto out;
1485 if ( !long_mode )
1486 eip += __vmread(GUEST_CS_BASE);
1488 memset(inst, 0, MAX_INST_LEN);
1489 if ( inst_copy_from_guest(inst, eip, inst_len) != inst_len )
1491 gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
1492 domain_crash(current->domain);
1493 goto out;
1496 for ( i = 0; i < inst_len; i++ )
1498 switch ( inst[i] )
1500 case 0xf3: /* REPZ */
1501 case 0xf2: /* REPNZ */
1502 case 0xf0: /* LOCK */
1503 case 0x66: /* data32 */
1504 case 0x67: /* addr32 */
1505 #ifdef __x86_64__
1506 case 0x40 ... 0x4f: /* REX */
1507 #endif
1508 continue;
1509 case 0x2e: /* CS */
1510 seg = x86_seg_cs;
1511 continue;
1512 case 0x36: /* SS */
1513 seg = x86_seg_ss;
1514 continue;
1515 case 0x26: /* ES */
1516 seg = x86_seg_es;
1517 continue;
1518 case 0x64: /* FS */
1519 seg = x86_seg_fs;
1520 continue;
1521 case 0x65: /* GS */
1522 seg = x86_seg_gs;
1523 continue;
1524 case 0x3e: /* DS */
1525 seg = x86_seg_ds;
1526 continue;
1530 out:
1531 return seg;
1534 static int vmx_str_pio_check_descriptor(int long_mode, unsigned long eip,
1535 int inst_len, enum x86_segment seg,
1536 unsigned long *base, u32 *limit,
1537 u32 *ar_bytes)
1539 enum vmcs_field ar_field, base_field, limit_field;
1541 *base = 0;
1542 *limit = 0;
1543 if ( seg != x86_seg_es )
1544 seg = vmx_outs_get_segment(long_mode, eip, inst_len);
1546 switch ( seg )
1548 case x86_seg_cs:
1549 ar_field = GUEST_CS_AR_BYTES;
1550 base_field = GUEST_CS_BASE;
1551 limit_field = GUEST_CS_LIMIT;
1552 break;
1553 case x86_seg_ds:
1554 ar_field = GUEST_DS_AR_BYTES;
1555 base_field = GUEST_DS_BASE;
1556 limit_field = GUEST_DS_LIMIT;
1557 break;
1558 case x86_seg_es:
1559 ar_field = GUEST_ES_AR_BYTES;
1560 base_field = GUEST_ES_BASE;
1561 limit_field = GUEST_ES_LIMIT;
1562 break;
1563 case x86_seg_fs:
1564 ar_field = GUEST_FS_AR_BYTES;
1565 base_field = GUEST_FS_BASE;
1566 limit_field = GUEST_FS_LIMIT;
1567 break;
1568 case x86_seg_gs:
1569 ar_field = GUEST_GS_AR_BYTES;
1570 base_field = GUEST_GS_BASE;
1571 limit_field = GUEST_GS_LIMIT;
1572 break;
1573 case x86_seg_ss:
1574 ar_field = GUEST_SS_AR_BYTES;
1575 base_field = GUEST_SS_BASE;
1576 limit_field = GUEST_SS_LIMIT;
1577 break;
1578 default:
1579 BUG();
1580 return 0;
1583 if ( !long_mode || seg == x86_seg_fs || seg == x86_seg_gs )
1585 *base = __vmread(base_field);
1586 *limit = __vmread(limit_field);
1588 *ar_bytes = __vmread(ar_field);
1590 return !(*ar_bytes & X86_SEG_AR_SEG_UNUSABLE);
1594 static int vmx_str_pio_check_limit(u32 limit, unsigned int size,
1595 u32 ar_bytes, unsigned long addr,
1596 unsigned long base, int df,
1597 unsigned long *count)
1599 unsigned long ea = addr - base;
1601 /* Offset must be within limits. */
1602 ASSERT(ea == (u32)ea);
1603 if ( (u32)(ea + size - 1) < (u32)ea ||
1604 (ar_bytes & 0xc) != 0x4 ? ea + size - 1 > limit
1605 : ea <= limit )
1606 return 0;
1608 /* Check the limit for repeated instructions, as above we checked
1609 only the first instance. Truncate the count if a limit violation
1610 would occur. Note that the checking is not necessary for page
1611 granular segments as transfers crossing page boundaries will be
1612 broken up anyway. */
1613 if ( !(ar_bytes & X86_SEG_AR_GRANULARITY) && *count > 1 )
1615 if ( (ar_bytes & 0xc) != 0x4 )
1617 /* expand-up */
1618 if ( !df )
1620 if ( ea + *count * size - 1 < ea ||
1621 ea + *count * size - 1 > limit )
1622 *count = (limit + 1UL - ea) / size;
1624 else
1626 if ( *count - 1 > ea / size )
1627 *count = ea / size + 1;
1630 else
1632 /* expand-down */
1633 if ( !df )
1635 if ( *count - 1 > -(s32)ea / size )
1636 *count = -(s32)ea / size + 1UL;
1638 else
1640 if ( ea < (*count - 1) * size ||
1641 ea - (*count - 1) * size <= limit )
1642 *count = (ea - limit - 1) / size + 1;
1645 ASSERT(*count);
1648 return 1;
1651 #ifdef __x86_64__
1652 static int vmx_str_pio_lm_check_limit(struct cpu_user_regs *regs,
1653 unsigned int size,
1654 unsigned long addr,
1655 unsigned long *count)
1657 if ( !is_canonical_address(addr) ||
1658 !is_canonical_address(addr + size - 1) )
1659 return 0;
1661 if ( *count > (1UL << 48) / size )
1662 *count = (1UL << 48) / size;
1664 if ( !(regs->eflags & EF_DF) )
1666 if ( addr + *count * size - 1 < addr ||
1667 !is_canonical_address(addr + *count * size - 1) )
1668 *count = (addr & ~((1UL << 48) - 1)) / size;
1670 else
1672 if ( (*count - 1) * size > addr ||
1673 !is_canonical_address(addr + (*count - 1) * size) )
1674 *count = (addr & ~((1UL << 48) - 1)) / size + 1;
1677 ASSERT(*count);
1679 return 1;
1681 #endif
1683 static void vmx_send_str_pio(struct cpu_user_regs *regs,
1684 struct hvm_io_op *pio_opp,
1685 unsigned long inst_len, unsigned int port,
1686 int sign, unsigned int size, int dir,
1687 int df, unsigned long addr,
1688 unsigned long paddr, unsigned long count)
1690 /*
1691 * Handle string pio instructions that cross pages or that
1692 * are unaligned. See the comments in hvm_domain.c/handle_mmio()
1693 */
1694 if ( (addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK) ) {
1695 unsigned long value = 0;
1697 pio_opp->flags |= OVERLAP;
1699 if ( dir == IOREQ_WRITE ) /* OUTS */
1701 if ( hvm_paging_enabled(current) )
1703 int rv = hvm_copy_from_guest_virt(&value, addr, size);
1704 if ( rv != 0 )
1706 /* Failed on the page-spanning copy. Inject PF into
1707 * the guest for the address where we failed. */
1708 addr += size - rv;
1709 gdprintk(XENLOG_DEBUG, "Pagefault reading non-io side "
1710 "of a page-spanning PIO: va=%#lx\n", addr);
1711 vmx_inject_exception(TRAP_page_fault, 0, addr);
1712 return;
1715 else
1716 (void) hvm_copy_from_guest_phys(&value, addr, size);
1717 } else /* dir != IOREQ_WRITE */
1718 /* Remember where to write the result, as a *VA*.
1719 * Must be a VA so we can handle the page overlap
1720 * correctly in hvm_pio_assist() */
1721 pio_opp->addr = addr;
1723 if ( count == 1 )
1724 regs->eip += inst_len;
1726 send_pio_req(port, 1, size, value, dir, df, 0);
1727 } else {
1728 unsigned long last_addr = sign > 0 ? addr + count * size - 1
1729 : addr - (count - 1) * size;
1731 if ( (addr & PAGE_MASK) != (last_addr & PAGE_MASK) )
1733 if ( sign > 0 )
1734 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1735 else
1736 count = (addr & ~PAGE_MASK) / size + 1;
1737 } else
1738 regs->eip += inst_len;
1740 send_pio_req(port, count, size, paddr, dir, df, 1);
1744 static void vmx_do_str_pio(unsigned long exit_qualification,
1745 unsigned long inst_len,
1746 struct cpu_user_regs *regs,
1747 struct hvm_io_op *pio_opp)
1749 unsigned int port, size;
1750 int dir, df, vm86;
1751 unsigned long addr, count = 1, base;
1752 paddr_t paddr;
1753 unsigned long gfn;
1754 u32 ar_bytes, limit;
1755 int sign;
1756 int long_mode = 0;
1758 vm86 = regs->eflags & X86_EFLAGS_VM ? 1 : 0;
1759 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1761 if ( test_bit(6, &exit_qualification) )
1762 port = (exit_qualification >> 16) & 0xFFFF;
1763 else
1764 port = regs->edx & 0xffff;
1766 size = (exit_qualification & 7) + 1;
1767 dir = test_bit(3, &exit_qualification); /* direction */
1769 if ( dir == IOREQ_READ )
1770 HVMTRACE_2D(IO_READ, current, port, size);
1771 else
1772 HVMTRACE_2D(IO_WRITE, current, port, size);
1774 sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
1775 ar_bytes = __vmread(GUEST_CS_AR_BYTES);
1776 if ( vmx_long_mode_enabled(current) &&
1777 (ar_bytes & X86_SEG_AR_CS_LM_ACTIVE) )
1778 long_mode = 1;
1779 addr = __vmread(GUEST_LINEAR_ADDRESS);
1781 if ( test_bit(5, &exit_qualification) ) { /* "rep" prefix */
1782 pio_opp->flags |= REPZ;
1783 count = regs->ecx;
1784 if ( !long_mode &&
1785 (vm86 || !(ar_bytes & X86_SEG_AR_DEF_OP_SIZE)) )
1786 count &= 0xFFFF;
1789 /*
1790 * In protected mode, guest linear address is invalid if the
1791 * selector is null.
1792 */
1793 if ( !vmx_str_pio_check_descriptor(long_mode, regs->eip, inst_len,
1794 dir==IOREQ_WRITE ? x86_seg_ds :
1795 x86_seg_es, &base, &limit,
1796 &ar_bytes) ) {
1797 if ( !long_mode ) {
1798 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1799 return;
1801 addr = dir == IOREQ_WRITE ? base + regs->esi : regs->edi;
1804 if ( !long_mode )
1806 /* Segment must be readable for outs and writeable for ins. */
1807 if ( ((dir == IOREQ_WRITE)
1808 ? ((ar_bytes & 0xa) == 0x8)
1809 : ((ar_bytes & 0xa) != 0x2)) ||
1810 !vmx_str_pio_check_limit(limit, size, ar_bytes,
1811 addr, base, df, &count) )
1813 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1814 return;
1817 #ifdef __x86_64__
1818 else if ( !vmx_str_pio_lm_check_limit(regs, size, addr, &count) )
1820 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1821 return;
1823 #endif
1825 /* Translate the address to a physical address */
1826 gfn = paging_gva_to_gfn(current, addr);
1827 if ( gfn == INVALID_GFN )
1829 /* The guest does not have the RAM address mapped.
1830 * Need to send in a page fault */
1831 int errcode = 0;
1832 /* IO read --> memory write */
1833 if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
1834 vmx_inject_exception(TRAP_page_fault, errcode, addr);
1835 return;
1837 paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
1839 vmx_send_str_pio(regs, pio_opp, inst_len, port, sign,
1840 size, dir, df, addr, paddr, count);
1843 static void vmx_io_instruction(unsigned long exit_qualification,
1844 unsigned long inst_len)
1846 struct cpu_user_regs *regs;
1847 struct hvm_io_op *pio_opp;
1849 pio_opp = &current->arch.hvm_vcpu.io_op;
1850 pio_opp->instr = INSTR_PIO;
1851 pio_opp->flags = 0;
1853 regs = &pio_opp->io_context;
1855 /* Copy current guest state into io instruction state structure. */
1856 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1857 vmx_store_cpu_guest_regs(current, regs, NULL);
1859 HVM_DBG_LOG(DBG_LEVEL_IO, "vm86 %d, eip=%x:%lx, "
1860 "exit_qualification = %lx",
1861 regs->eflags & X86_EFLAGS_VM ? 1 : 0,
1862 regs->cs, (unsigned long)regs->eip, exit_qualification);
1864 if ( test_bit(4, &exit_qualification) ) /* string instrucation */
1865 vmx_do_str_pio(exit_qualification, inst_len, regs, pio_opp);
1866 else
1868 unsigned int port, size;
1869 int dir, df;
1871 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1873 if ( test_bit(6, &exit_qualification) )
1874 port = (exit_qualification >> 16) & 0xFFFF;
1875 else
1876 port = regs->edx & 0xffff;
1878 size = (exit_qualification & 7) + 1;
1879 dir = test_bit(3, &exit_qualification); /* direction */
1881 if ( dir == IOREQ_READ )
1882 HVMTRACE_2D(IO_READ, current, port, size);
1883 else
1884 HVMTRACE_2D(IO_WRITE, current, port, size);
1886 if ( port == 0xe9 && dir == IOREQ_WRITE && size == 1 )
1887 hvm_print_line(current, regs->eax); /* guest debug output */
1889 regs->eip += inst_len;
1890 send_pio_req(port, 1, size, regs->eax, dir, df, 0);
1894 static void vmx_world_save(struct vcpu *v, struct vmx_assist_context *c)
1896 /* NB. Skip transition instruction. */
1897 c->eip = __vmread(GUEST_RIP);
1898 c->eip += __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */
1900 c->esp = __vmread(GUEST_RSP);
1901 c->eflags = __vmread(GUEST_RFLAGS) & ~X86_EFLAGS_RF;
1903 c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
1904 c->cr3 = v->arch.hvm_vmx.cpu_cr3;
1905 c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
1907 c->idtr_limit = __vmread(GUEST_IDTR_LIMIT);
1908 c->idtr_base = __vmread(GUEST_IDTR_BASE);
1910 c->gdtr_limit = __vmread(GUEST_GDTR_LIMIT);
1911 c->gdtr_base = __vmread(GUEST_GDTR_BASE);
1913 c->cs_sel = __vmread(GUEST_CS_SELECTOR);
1914 c->cs_limit = __vmread(GUEST_CS_LIMIT);
1915 c->cs_base = __vmread(GUEST_CS_BASE);
1916 c->cs_arbytes.bytes = __vmread(GUEST_CS_AR_BYTES);
1918 c->ds_sel = __vmread(GUEST_DS_SELECTOR);
1919 c->ds_limit = __vmread(GUEST_DS_LIMIT);
1920 c->ds_base = __vmread(GUEST_DS_BASE);
1921 c->ds_arbytes.bytes = __vmread(GUEST_DS_AR_BYTES);
1923 c->es_sel = __vmread(GUEST_ES_SELECTOR);
1924 c->es_limit = __vmread(GUEST_ES_LIMIT);
1925 c->es_base = __vmread(GUEST_ES_BASE);
1926 c->es_arbytes.bytes = __vmread(GUEST_ES_AR_BYTES);
1928 c->ss_sel = __vmread(GUEST_SS_SELECTOR);
1929 c->ss_limit = __vmread(GUEST_SS_LIMIT);
1930 c->ss_base = __vmread(GUEST_SS_BASE);
1931 c->ss_arbytes.bytes = __vmread(GUEST_SS_AR_BYTES);
1933 c->fs_sel = __vmread(GUEST_FS_SELECTOR);
1934 c->fs_limit = __vmread(GUEST_FS_LIMIT);
1935 c->fs_base = __vmread(GUEST_FS_BASE);
1936 c->fs_arbytes.bytes = __vmread(GUEST_FS_AR_BYTES);
1938 c->gs_sel = __vmread(GUEST_GS_SELECTOR);
1939 c->gs_limit = __vmread(GUEST_GS_LIMIT);
1940 c->gs_base = __vmread(GUEST_GS_BASE);
1941 c->gs_arbytes.bytes = __vmread(GUEST_GS_AR_BYTES);
1943 c->tr_sel = __vmread(GUEST_TR_SELECTOR);
1944 c->tr_limit = __vmread(GUEST_TR_LIMIT);
1945 c->tr_base = __vmread(GUEST_TR_BASE);
1946 c->tr_arbytes.bytes = __vmread(GUEST_TR_AR_BYTES);
1948 c->ldtr_sel = __vmread(GUEST_LDTR_SELECTOR);
1949 c->ldtr_limit = __vmread(GUEST_LDTR_LIMIT);
1950 c->ldtr_base = __vmread(GUEST_LDTR_BASE);
1951 c->ldtr_arbytes.bytes = __vmread(GUEST_LDTR_AR_BYTES);
1954 static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
1956 unsigned long mfn, old_base_mfn;
1958 __vmwrite(GUEST_RIP, c->eip);
1959 __vmwrite(GUEST_RSP, c->esp);
1960 __vmwrite(GUEST_RFLAGS, c->eflags);
1962 v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
1963 __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
1965 if ( !vmx_paging_enabled(v) )
1966 goto skip_cr3;
1968 if ( c->cr3 == v->arch.hvm_vmx.cpu_cr3 )
1970 /*
1971 * This is simple TLB flush, implying the guest has
1972 * removed some translation or changed page attributes.
1973 * We simply invalidate the shadow.
1974 */
1975 mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
1976 if ( mfn != pagetable_get_pfn(v->arch.guest_table) )
1977 goto bad_cr3;
1979 else
1981 /*
1982 * If different, make a shadow. Check if the PDBR is valid
1983 * first.
1984 */
1985 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3);
1986 mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
1987 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
1988 goto bad_cr3;
1989 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1990 v->arch.guest_table = pagetable_from_pfn(mfn);
1991 if ( old_base_mfn )
1992 put_page(mfn_to_page(old_base_mfn));
1993 v->arch.hvm_vmx.cpu_cr3 = c->cr3;
1996 skip_cr3:
1997 if ( !vmx_paging_enabled(v) )
1998 HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
1999 else
2000 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
2002 __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
2003 v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
2004 __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
2006 __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
2007 __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
2009 __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
2010 __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
2012 __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
2013 __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
2014 __vmwrite(GUEST_CS_BASE, c->cs_base);
2015 __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes.bytes);
2017 __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
2018 __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
2019 __vmwrite(GUEST_DS_BASE, c->ds_base);
2020 __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes.bytes);
2022 __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
2023 __vmwrite(GUEST_ES_LIMIT, c->es_limit);
2024 __vmwrite(GUEST_ES_BASE, c->es_base);
2025 __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes.bytes);
2027 __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
2028 __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
2029 __vmwrite(GUEST_SS_BASE, c->ss_base);
2030 __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes.bytes);
2032 __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
2033 __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
2034 __vmwrite(GUEST_FS_BASE, c->fs_base);
2035 __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes.bytes);
2037 __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
2038 __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
2039 __vmwrite(GUEST_GS_BASE, c->gs_base);
2040 __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes.bytes);
2042 __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
2043 __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
2044 __vmwrite(GUEST_TR_BASE, c->tr_base);
2045 __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes.bytes);
2047 __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
2048 __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
2049 __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
2050 __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes);
2052 paging_update_paging_modes(v);
2053 return 0;
2055 bad_cr3:
2056 gdprintk(XENLOG_ERR, "Invalid CR3 value=%x", c->cr3);
2057 return -EINVAL;
2060 enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE };
2062 static int vmx_assist(struct vcpu *v, int mode)
2064 struct vmx_assist_context c;
2065 struct hvm_hw_vpic *vpic = v->domain->arch.hvm_domain.vpic;
2066 u32 magic, cp;
2068 if ( hvm_copy_from_guest_phys(&magic, VMXASSIST_MAGIC_OFFSET,
2069 sizeof(magic)) )
2071 gdprintk(XENLOG_ERR, "No vmxassist: can't execute real mode code\n");
2072 domain_crash(v->domain);
2073 return 0;
2076 if ( magic != VMXASSIST_MAGIC )
2078 gdprintk(XENLOG_ERR, "vmxassist magic number not match\n");
2079 domain_crash(v->domain);
2080 return 0;
2083 switch ( mode ) {
2084 /*
2085 * Transfer control to vmxassist.
2086 * Store the current context in VMXASSIST_OLD_CONTEXT and load
2087 * the new VMXASSIST_NEW_CONTEXT context. This context was created
2088 * by vmxassist and will transfer control to it.
2089 */
2090 case VMX_ASSIST_INVOKE:
2091 /* save the old context */
2092 if ( hvm_copy_from_guest_phys(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp)) )
2093 goto error;
2094 if ( cp != 0 ) {
2095 vmx_world_save(v, &c);
2096 if ( hvm_copy_to_guest_phys(cp, &c, sizeof(c)) )
2097 goto error;
2100 /* restore the new context, this should activate vmxassist */
2101 if ( hvm_copy_from_guest_phys(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp)) )
2102 goto error;
2103 if ( cp != 0 ) {
2104 if ( hvm_copy_from_guest_phys(&c, cp, sizeof(c)) )
2105 goto error;
2106 if ( vmx_world_restore(v, &c) != 0 )
2107 goto error;
2108 v->arch.hvm_vmx.pm_irqbase[0] = vpic[0].irq_base;
2109 v->arch.hvm_vmx.pm_irqbase[1] = vpic[1].irq_base;
2110 vpic[0].irq_base = NR_EXCEPTION_HANDLER;
2111 vpic[1].irq_base = NR_EXCEPTION_HANDLER + 8;
2112 v->arch.hvm_vmx.vmxassist_enabled = 1;
2113 return 1;
2115 break;
2117 /*
2118 * Restore the VMXASSIST_OLD_CONTEXT that was saved by
2119 * VMX_ASSIST_INVOKE above.
2120 */
2121 case VMX_ASSIST_RESTORE:
2122 /* save the old context */
2123 if ( hvm_copy_from_guest_phys(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp)) )
2124 goto error;
2125 if ( cp != 0 ) {
2126 if ( hvm_copy_from_guest_phys(&c, cp, sizeof(c)) )
2127 goto error;
2128 if ( vmx_world_restore(v, &c) != 0 )
2129 goto error;
2130 if ( v->arch.hvm_vmx.irqbase_mode ) {
2131 vpic[0].irq_base = c.rm_irqbase[0] & 0xf8;
2132 vpic[1].irq_base = c.rm_irqbase[1] & 0xf8;
2133 } else {
2134 vpic[0].irq_base = v->arch.hvm_vmx.pm_irqbase[0];
2135 vpic[1].irq_base = v->arch.hvm_vmx.pm_irqbase[1];
2137 v->arch.hvm_vmx.vmxassist_enabled = 0;
2138 return 1;
2140 break;
2143 error:
2144 gdprintk(XENLOG_ERR, "Failed to transfer to vmxassist\n");
2145 domain_crash(v->domain);
2146 return 0;
2149 static int vmx_set_cr0(unsigned long value)
2151 struct vcpu *v = current;
2152 unsigned long mfn;
2153 unsigned long eip;
2154 int paging_enabled;
2155 unsigned long old_cr0;
2156 unsigned long old_base_mfn;
2158 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
2160 if ( (u32)value != value )
2162 HVM_DBG_LOG(DBG_LEVEL_1,
2163 "Guest attempts to set upper 32 bits in CR0: %lx",
2164 value);
2165 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2166 return 0;
2169 value &= ~HVM_CR0_GUEST_RESERVED_BITS;
2171 /* ET is reserved and should be always be 1. */
2172 value |= X86_CR0_ET;
2174 if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PG )
2176 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2177 return 0;
2180 /* TS cleared? Then initialise FPU now. */
2181 if ( !(value & X86_CR0_TS) )
2183 setup_fpu(v);
2184 __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
2187 old_cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
2188 paging_enabled = old_cr0 & X86_CR0_PG;
2190 v->arch.hvm_vmx.cpu_cr0 = (value | X86_CR0_PE | X86_CR0_PG
2191 | X86_CR0_NE | X86_CR0_WP);
2192 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
2194 v->arch.hvm_vmx.cpu_shadow_cr0 = value;
2195 __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
2197 /* Trying to enable paging. */
2198 if ( (value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled )
2200 if ( vmx_lme_is_set(v) && !vmx_long_mode_enabled(v) )
2202 if ( !(v->arch.hvm_vmx.cpu_shadow_cr4 & X86_CR4_PAE) )
2204 HVM_DBG_LOG(DBG_LEVEL_1, "Guest enabled paging "
2205 "with EFER.LME set but not CR4.PAE");
2206 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2207 return 0;
2210 HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode");
2211 vmx_enable_long_mode(v);
2214 /*
2215 * The guest CR3 must be pointing to the guest physical.
2216 */
2217 mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT);
2218 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
2220 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
2221 v->arch.hvm_vmx.cpu_cr3, mfn);
2222 domain_crash(v->domain);
2223 return 0;
2226 /*
2227 * Now arch.guest_table points to machine physical.
2228 */
2229 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
2230 v->arch.guest_table = pagetable_from_pfn(mfn);
2231 if ( old_base_mfn )
2232 put_page(mfn_to_page(old_base_mfn));
2234 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
2235 v->arch.hvm_vmx.cpu_cr3, mfn);
2237 paging_update_paging_modes(v);
2240 /* Trying to disable paging. */
2241 if ( ((value & (X86_CR0_PE | X86_CR0_PG)) != (X86_CR0_PE | X86_CR0_PG)) &&
2242 paging_enabled )
2244 /* When CR0.PG is cleared, LMA is cleared immediately. */
2245 if ( vmx_long_mode_enabled(v) )
2246 vmx_disable_long_mode(v);
2248 if ( v->arch.hvm_vmx.cpu_cr3 )
2250 put_page(mfn_to_page(get_mfn_from_gpfn(
2251 v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)));
2252 v->arch.guest_table = pagetable_null();
2256 /*
2257 * VMX does not implement real-mode virtualization. We emulate
2258 * real-mode by performing a world switch to VMXAssist whenever
2259 * a partition disables the CR0.PE bit.
2260 */
2261 if ( (value & X86_CR0_PE) == 0 )
2263 if ( value & X86_CR0_PG )
2265 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2266 return 0;
2269 if ( vmx_assist(v, VMX_ASSIST_INVOKE) )
2271 eip = __vmread(GUEST_RIP);
2272 HVM_DBG_LOG(DBG_LEVEL_1,
2273 "Transfering control to vmxassist %%eip 0x%lx", eip);
2274 return 0; /* do not update eip! */
2277 else if ( v->arch.hvm_vmx.vmxassist_enabled )
2279 eip = __vmread(GUEST_RIP);
2280 HVM_DBG_LOG(DBG_LEVEL_1,
2281 "Enabling CR0.PE at %%eip 0x%lx", eip);
2282 if ( vmx_assist(v, VMX_ASSIST_RESTORE) )
2284 HVM_DBG_LOG(DBG_LEVEL_1,
2285 "Restoring to %%eip 0x%lx", eip);
2286 return 0; /* do not update eip! */
2289 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
2290 paging_update_paging_modes(v);
2292 return 1;
2295 #define CASE_SET_REG(REG, reg) \
2296 case REG_ ## REG: regs->reg = value; break
2297 #define CASE_GET_REG(REG, reg) \
2298 case REG_ ## REG: value = regs->reg; break
2300 #define CASE_EXTEND_SET_REG \
2301 CASE_EXTEND_REG(S)
2302 #define CASE_EXTEND_GET_REG \
2303 CASE_EXTEND_REG(G)
2305 #ifdef __i386__
2306 #define CASE_EXTEND_REG(T)
2307 #else
2308 #define CASE_EXTEND_REG(T) \
2309 CASE_ ## T ## ET_REG(R8, r8); \
2310 CASE_ ## T ## ET_REG(R9, r9); \
2311 CASE_ ## T ## ET_REG(R10, r10); \
2312 CASE_ ## T ## ET_REG(R11, r11); \
2313 CASE_ ## T ## ET_REG(R12, r12); \
2314 CASE_ ## T ## ET_REG(R13, r13); \
2315 CASE_ ## T ## ET_REG(R14, r14); \
2316 CASE_ ## T ## ET_REG(R15, r15)
2317 #endif
2319 /*
2320 * Write to control registers
2321 */
2322 static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
2324 unsigned long value, old_cr, old_base_mfn, mfn;
2325 struct vcpu *v = current;
2326 struct vlapic *vlapic = vcpu_vlapic(v);
2328 switch ( gp )
2330 CASE_GET_REG(EAX, eax);
2331 CASE_GET_REG(ECX, ecx);
2332 CASE_GET_REG(EDX, edx);
2333 CASE_GET_REG(EBX, ebx);
2334 CASE_GET_REG(EBP, ebp);
2335 CASE_GET_REG(ESI, esi);
2336 CASE_GET_REG(EDI, edi);
2337 CASE_EXTEND_GET_REG;
2338 case REG_ESP:
2339 value = __vmread(GUEST_RSP);
2340 break;
2341 default:
2342 gdprintk(XENLOG_ERR, "invalid gp: %d\n", gp);
2343 goto exit_and_crash;
2346 HVMTRACE_2D(CR_WRITE, v, cr, value);
2348 HVM_DBG_LOG(DBG_LEVEL_1, "CR%d, value = %lx", cr, value);
2350 switch ( cr )
2352 case 0:
2353 return vmx_set_cr0(value);
2355 case 3:
2356 /*
2357 * If paging is not enabled yet, simply copy the value to CR3.
2358 */
2359 if ( !vmx_paging_enabled(v) )
2361 v->arch.hvm_vmx.cpu_cr3 = value;
2362 break;
2365 /*
2366 * We make a new one if the shadow does not exist.
2367 */
2368 if ( value == v->arch.hvm_vmx.cpu_cr3 ) {
2369 /*
2370 * This is simple TLB flush, implying the guest has
2371 * removed some translation or changed page attributes.
2372 * We simply invalidate the shadow.
2373 */
2374 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
2375 if ( mfn != pagetable_get_pfn(v->arch.guest_table) )
2376 goto bad_cr3;
2377 paging_update_cr3(v);
2378 } else {
2379 /*
2380 * If different, make a shadow. Check if the PDBR is valid
2381 * first.
2382 */
2383 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
2384 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
2385 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
2386 goto bad_cr3;
2387 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
2388 v->arch.guest_table = pagetable_from_pfn(mfn);
2389 if ( old_base_mfn )
2390 put_page(mfn_to_page(old_base_mfn));
2391 v->arch.hvm_vmx.cpu_cr3 = value;
2392 update_cr3(v);
2393 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
2395 break;
2397 case 4: /* CR4 */
2398 old_cr = v->arch.hvm_vmx.cpu_shadow_cr4;
2400 if ( value & HVM_CR4_GUEST_RESERVED_BITS )
2402 HVM_DBG_LOG(DBG_LEVEL_1,
2403 "Guest attempts to set reserved bit in CR4: %lx",
2404 value);
2405 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2406 return 0;
2409 if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) )
2411 if ( vmx_pgbit_test(v) )
2413 #if CONFIG_PAGING_LEVELS >= 3
2414 /* The guest is a 32-bit PAE guest. */
2415 unsigned long mfn, old_base_mfn;
2416 mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT);
2417 if ( !mfn_valid(mfn) ||
2418 !get_page(mfn_to_page(mfn), v->domain) )
2419 goto bad_cr3;
2421 /*
2422 * Now arch.guest_table points to machine physical.
2423 */
2424 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
2425 v->arch.guest_table = pagetable_from_pfn(mfn);
2426 if ( old_base_mfn )
2427 put_page(mfn_to_page(old_base_mfn));
2429 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2430 "Update CR3 value = %lx, mfn = %lx",
2431 v->arch.hvm_vmx.cpu_cr3, mfn);
2432 #endif
2435 else if ( !(value & X86_CR4_PAE) )
2437 if ( unlikely(vmx_long_mode_enabled(v)) )
2439 HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while "
2440 "EFER.LMA is set");
2441 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2442 return 0;
2446 __vmwrite(GUEST_CR4, value | HVM_CR4_HOST_MASK);
2447 v->arch.hvm_vmx.cpu_shadow_cr4 = value;
2448 __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
2450 /*
2451 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
2452 * all TLB entries except global entries.
2453 */
2454 if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
2455 paging_update_paging_modes(v);
2457 break;
2459 case 8:
2460 vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
2461 break;
2463 default:
2464 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
2465 domain_crash(v->domain);
2466 return 0;
2469 return 1;
2471 bad_cr3:
2472 gdprintk(XENLOG_ERR, "Invalid CR3\n");
2473 exit_and_crash:
2474 domain_crash(v->domain);
2475 return 0;
2478 /*
2479 * Read from control registers. CR0 and CR4 are read from the shadow.
2480 */
2481 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
2483 unsigned long value = 0;
2484 struct vcpu *v = current;
2485 struct vlapic *vlapic = vcpu_vlapic(v);
2487 switch ( cr )
2489 case 3:
2490 value = (unsigned long)v->arch.hvm_vmx.cpu_cr3;
2491 break;
2492 case 8:
2493 value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
2494 value = (value & 0xF0) >> 4;
2495 break;
2496 default:
2497 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
2498 domain_crash(v->domain);
2499 break;
2502 switch ( gp ) {
2503 CASE_SET_REG(EAX, eax);
2504 CASE_SET_REG(ECX, ecx);
2505 CASE_SET_REG(EDX, edx);
2506 CASE_SET_REG(EBX, ebx);
2507 CASE_SET_REG(EBP, ebp);
2508 CASE_SET_REG(ESI, esi);
2509 CASE_SET_REG(EDI, edi);
2510 CASE_EXTEND_SET_REG;
2511 case REG_ESP:
2512 __vmwrite(GUEST_RSP, value);
2513 regs->esp = value;
2514 break;
2515 default:
2516 printk("invalid gp: %d\n", gp);
2517 domain_crash(v->domain);
2518 break;
2521 HVMTRACE_2D(CR_READ, v, cr, value);
2523 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%d, value = %lx", cr, value);
2526 static int vmx_cr_access(unsigned long exit_qualification,
2527 struct cpu_user_regs *regs)
2529 unsigned int gp, cr;
2530 unsigned long value;
2531 struct vcpu *v = current;
2533 switch ( exit_qualification & CONTROL_REG_ACCESS_TYPE ) {
2534 case TYPE_MOV_TO_CR:
2535 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
2536 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
2537 return mov_to_cr(gp, cr, regs);
2538 case TYPE_MOV_FROM_CR:
2539 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
2540 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
2541 mov_from_cr(cr, gp, regs);
2542 break;
2543 case TYPE_CLTS:
2544 /* We initialise the FPU now, to avoid needing another vmexit. */
2545 setup_fpu(v);
2546 __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
2548 v->arch.hvm_vmx.cpu_cr0 &= ~X86_CR0_TS; /* clear TS */
2549 __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
2551 v->arch.hvm_vmx.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
2552 __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
2553 break;
2554 case TYPE_LMSW:
2555 value = v->arch.hvm_vmx.cpu_shadow_cr0;
2556 value = (value & ~0xF) |
2557 (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
2558 return vmx_set_cr0(value);
2559 default:
2560 BUG();
2563 return 1;
2566 static int vmx_do_msr_read(struct cpu_user_regs *regs)
2568 u64 msr_content = 0;
2569 u32 ecx = regs->ecx, eax, edx;
2570 struct vcpu *v = current;
2572 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
2574 switch ( ecx )
2576 case MSR_IA32_TIME_STAMP_COUNTER:
2577 msr_content = hvm_get_guest_time(v);
2578 break;
2579 case MSR_IA32_SYSENTER_CS:
2580 msr_content = (u32)__vmread(GUEST_SYSENTER_CS);
2581 break;
2582 case MSR_IA32_SYSENTER_ESP:
2583 msr_content = __vmread(GUEST_SYSENTER_ESP);
2584 break;
2585 case MSR_IA32_SYSENTER_EIP:
2586 msr_content = __vmread(GUEST_SYSENTER_EIP);
2587 break;
2588 case MSR_IA32_APICBASE:
2589 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
2590 break;
2591 case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
2592 goto gp_fault;
2593 case MSR_IA32_MCG_STATUS:
2594 case MSR_IA32_MC0_STATUS:
2595 case MSR_K8_MC1_STATUS:
2596 case MSR_K8_MC2_STATUS:
2597 case MSR_K8_MC3_STATUS:
2598 case MSR_K8_MC4_STATUS:
2599 case MSR_K8_MC5_STATUS:
2600 /* No point in letting the guest see real MCEs */
2601 msr_content = 0;
2602 break;
2603 default:
2604 switch ( long_mode_do_msr_read(regs) )
2606 case HNDL_unhandled:
2607 break;
2608 case HNDL_exception_raised:
2609 return 0;
2610 case HNDL_done:
2611 goto done;
2614 if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
2615 rdmsr_safe(ecx, eax, edx) == 0 )
2617 regs->eax = eax;
2618 regs->edx = edx;
2619 goto done;
2622 goto gp_fault;
2625 regs->eax = msr_content & 0xFFFFFFFF;
2626 regs->edx = msr_content >> 32;
2628 done:
2629 HVMTRACE_2D(MSR_READ, v, ecx, msr_content);
2630 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
2631 ecx, (unsigned long)regs->eax,
2632 (unsigned long)regs->edx);
2633 return 1;
2635 gp_fault:
2636 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2637 return 0;
2640 static int vmx_alloc_vlapic_mapping(struct domain *d)
2642 void *apic_va;
2644 if ( !cpu_has_vmx_virtualize_apic_accesses )
2645 return 0;
2647 apic_va = alloc_xenheap_page();
2648 if ( apic_va == NULL )
2649 return -ENOMEM;
2650 share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
2651 guest_physmap_add_page(
2652 d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), virt_to_mfn(apic_va));
2653 d->arch.hvm_domain.vmx_apic_access_mfn = virt_to_mfn(apic_va);
2655 return 0;
2658 static void vmx_free_vlapic_mapping(struct domain *d)
2660 unsigned long mfn = d->arch.hvm_domain.vmx_apic_access_mfn;
2661 if ( mfn != 0 )
2662 free_xenheap_page(mfn_to_virt(mfn));
2665 static void vmx_install_vlapic_mapping(struct vcpu *v)
2667 unsigned long virt_page_ma, apic_page_ma;
2669 if ( !cpu_has_vmx_virtualize_apic_accesses )
2670 return;
2672 virt_page_ma = page_to_maddr(vcpu_vlapic(v)->regs_page);
2673 apic_page_ma = v->domain->arch.hvm_domain.vmx_apic_access_mfn;
2674 apic_page_ma <<= PAGE_SHIFT;
2676 vmx_vmcs_enter(v);
2677 __vmwrite(VIRTUAL_APIC_PAGE_ADDR, virt_page_ma);
2678 __vmwrite(APIC_ACCESS_ADDR, apic_page_ma);
2679 vmx_vmcs_exit(v);
2682 void vmx_vlapic_msr_changed(struct vcpu *v)
2684 struct vlapic *vlapic = vcpu_vlapic(v);
2685 uint32_t ctl;
2687 if ( !cpu_has_vmx_virtualize_apic_accesses )
2688 return;
2690 vmx_vmcs_enter(v);
2691 ctl = __vmread(SECONDARY_VM_EXEC_CONTROL);
2692 ctl &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
2693 if ( !vlapic_hw_disabled(vlapic) &&
2694 (vlapic_base_address(vlapic) == APIC_DEFAULT_PHYS_BASE) )
2695 ctl |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
2696 __vmwrite(SECONDARY_VM_EXEC_CONTROL, ctl);
2697 vmx_vmcs_exit(v);
2700 static int vmx_do_msr_write(struct cpu_user_regs *regs)
2702 u32 ecx = regs->ecx;
2703 u64 msr_content;
2704 struct vcpu *v = current;
2706 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
2707 ecx, (u32)regs->eax, (u32)regs->edx);
2709 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
2710 HVMTRACE_2D(MSR_WRITE, v, ecx, msr_content);
2712 switch ( ecx )
2714 case MSR_IA32_TIME_STAMP_COUNTER:
2715 hvm_set_guest_time(v, msr_content);
2716 pt_reset(v);
2717 break;
2718 case MSR_IA32_SYSENTER_CS:
2719 __vmwrite(GUEST_SYSENTER_CS, msr_content);
2720 break;
2721 case MSR_IA32_SYSENTER_ESP:
2722 __vmwrite(GUEST_SYSENTER_ESP, msr_content);
2723 break;
2724 case MSR_IA32_SYSENTER_EIP:
2725 __vmwrite(GUEST_SYSENTER_EIP, msr_content);
2726 break;
2727 case MSR_IA32_APICBASE:
2728 vlapic_msr_set(vcpu_vlapic(v), msr_content);
2729 break;
2730 case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
2731 goto gp_fault;
2732 default:
2733 switch ( long_mode_do_msr_write(regs) )
2735 case HNDL_unhandled:
2736 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
2737 break;
2738 case HNDL_exception_raised:
2739 return 0;
2740 case HNDL_done:
2741 break;
2743 break;
2746 return 1;
2748 gp_fault:
2749 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2750 return 0;
2753 static void vmx_do_hlt(void)
2755 unsigned long rflags;
2756 HVMTRACE_0D(HLT, current);
2757 rflags = __vmread(GUEST_RFLAGS);
2758 hvm_hlt(rflags);
2761 static void vmx_do_extint(struct cpu_user_regs *regs)
2763 unsigned int vector;
2765 asmlinkage void do_IRQ(struct cpu_user_regs *);
2766 fastcall void smp_apic_timer_interrupt(struct cpu_user_regs *);
2767 fastcall void smp_event_check_interrupt(void);
2768 fastcall void smp_invalidate_interrupt(void);
2769 fastcall void smp_call_function_interrupt(void);
2770 fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
2771 fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
2772 #ifdef CONFIG_X86_MCE_P4THERMAL
2773 fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
2774 #endif
2776 vector = __vmread(VM_EXIT_INTR_INFO);
2777 BUG_ON(!(vector & INTR_INFO_VALID_MASK));
2779 vector &= INTR_INFO_VECTOR_MASK;
2780 HVMTRACE_1D(INTR, current, vector);
2782 switch ( vector )
2784 case LOCAL_TIMER_VECTOR:
2785 smp_apic_timer_interrupt(regs);
2786 break;
2787 case EVENT_CHECK_VECTOR:
2788 smp_event_check_interrupt();
2789 break;
2790 case INVALIDATE_TLB_VECTOR:
2791 smp_invalidate_interrupt();
2792 break;
2793 case CALL_FUNCTION_VECTOR:
2794 smp_call_function_interrupt();
2795 break;
2796 case SPURIOUS_APIC_VECTOR:
2797 smp_spurious_interrupt(regs);
2798 break;
2799 case ERROR_APIC_VECTOR:
2800 smp_error_interrupt(regs);
2801 break;
2802 #ifdef CONFIG_X86_MCE_P4THERMAL
2803 case THERMAL_APIC_VECTOR:
2804 smp_thermal_interrupt(regs);
2805 break;
2806 #endif
2807 default:
2808 regs->entry_vector = vector;
2809 do_IRQ(regs);
2810 break;
2814 static void vmx_failed_vmentry(unsigned int exit_reason,
2815 struct cpu_user_regs *regs)
2817 unsigned int failed_vmentry_reason = (uint16_t)exit_reason;
2818 unsigned long exit_qualification;
2820 exit_qualification = __vmread(EXIT_QUALIFICATION);
2821 printk("Failed vm entry (exit reason 0x%x) ", exit_reason);
2822 switch ( failed_vmentry_reason )
2824 case EXIT_REASON_INVALID_GUEST_STATE:
2825 printk("caused by invalid guest state (%ld).\n", exit_qualification);
2826 break;
2827 case EXIT_REASON_MSR_LOADING:
2828 printk("caused by MSR entry %ld loading.\n", exit_qualification);
2829 break;
2830 case EXIT_REASON_MACHINE_CHECK:
2831 printk("caused by machine check.\n");
2832 HVMTRACE_0D(MCE, current);
2833 vmx_store_cpu_guest_regs(current, regs, NULL);
2834 do_machine_check(regs);
2835 break;
2836 default:
2837 printk("reason not known yet!");
2838 break;
2841 printk("************* VMCS Area **************\n");
2842 vmcs_dump_vcpu();
2843 printk("**************************************\n");
2845 domain_crash(current->domain);
2848 asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
2850 unsigned int exit_reason, idtv_info;
2851 unsigned long exit_qualification, inst_len = 0;
2852 struct vcpu *v = current;
2854 exit_reason = __vmread(VM_EXIT_REASON);
2856 HVMTRACE_2D(VMEXIT, v, __vmread(GUEST_RIP), exit_reason);
2858 perfc_incra(vmexits, exit_reason);
2860 if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT )
2861 local_irq_enable();
2863 if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
2864 return vmx_failed_vmentry(exit_reason, regs);
2866 /* Event delivery caused this intercept? Queue for redelivery. */
2867 idtv_info = __vmread(IDT_VECTORING_INFO);
2868 if ( unlikely(idtv_info & INTR_INFO_VALID_MASK) )
2870 if ( hvm_event_needs_reinjection((idtv_info>>8)&7, idtv_info&0xff) )
2872 /* See SDM 3B 25.7.1.1 and .2 for info about masking resvd bits. */
2873 __vmwrite(VM_ENTRY_INTR_INFO,
2874 idtv_info & ~INTR_INFO_RESVD_BITS_MASK);
2875 if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK )
2876 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
2877 __vmread(IDT_VECTORING_ERROR_CODE));
2880 /*
2881 * Clear NMI-blocking interruptibility info if an NMI delivery faulted.
2882 * Re-delivery will re-set it (see SDM 3B 25.7.1.2).
2883 */
2884 if ( (idtv_info & INTR_INFO_INTR_TYPE_MASK) == (X86_EVENTTYPE_NMI<<8) )
2885 __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
2886 __vmread(GUEST_INTERRUPTIBILITY_INFO) &
2887 ~VMX_INTR_SHADOW_NMI);
2890 switch ( exit_reason )
2892 case EXIT_REASON_EXCEPTION_NMI:
2894 /*
2895 * We don't set the software-interrupt exiting (INT n).
2896 * (1) We can get an exception (e.g. #PG) in the guest, or
2897 * (2) NMI
2898 */
2899 unsigned int intr_info, vector;
2901 intr_info = __vmread(VM_EXIT_INTR_INFO);
2902 BUG_ON(!(intr_info & INTR_INFO_VALID_MASK));
2904 vector = intr_info & INTR_INFO_VECTOR_MASK;
2906 /*
2907 * Re-set the NMI shadow if vmexit caused by a guest IRET fault (see 3B
2908 * 25.7.1.2, "Resuming Guest Software after Handling an Exception").
2909 * (NB. If we emulate this IRET for any reason, we should re-clear!)
2910 */
2911 if ( unlikely(intr_info & INTR_INFO_NMI_UNBLOCKED_BY_IRET) &&
2912 !(__vmread(IDT_VECTORING_INFO) & INTR_INFO_VALID_MASK) &&
2913 (vector != TRAP_double_fault) )
2914 __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
2915 __vmread(GUEST_INTERRUPTIBILITY_INFO)|VMX_INTR_SHADOW_NMI);
2917 perfc_incra(cause_vector, vector);
2919 switch ( vector )
2921 case TRAP_debug:
2922 case TRAP_int3:
2923 if ( !v->domain->debugger_attached )
2924 goto exit_and_crash;
2925 domain_pause_for_debugger();
2926 break;
2927 case TRAP_no_device:
2928 vmx_do_no_device_fault();
2929 break;
2930 case TRAP_page_fault:
2931 exit_qualification = __vmread(EXIT_QUALIFICATION);
2932 regs->error_code = __vmread(VM_EXIT_INTR_ERROR_CODE);
2934 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2935 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2936 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2937 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2938 (unsigned long)regs->esi, (unsigned long)regs->edi);
2940 if ( paging_fault(exit_qualification, regs) )
2942 HVMTRACE_2D(PF_XEN, v, exit_qualification, regs->error_code);
2943 break;
2946 v->arch.hvm_vmx.cpu_cr2 = exit_qualification;
2947 vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
2948 break;
2949 case TRAP_nmi:
2950 if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) !=
2951 (X86_EVENTTYPE_NMI << 8) )
2952 goto exit_and_crash;
2953 HVMTRACE_0D(NMI, v);
2954 vmx_store_cpu_guest_regs(v, regs, NULL);
2955 do_nmi(regs); /* Real NMI, vector 2: normal processing. */
2956 break;
2957 case TRAP_machine_check:
2958 HVMTRACE_0D(MCE, v);
2959 vmx_store_cpu_guest_regs(v, regs, NULL);
2960 do_machine_check(regs);
2961 break;
2962 default:
2963 goto exit_and_crash;
2965 break;
2967 case EXIT_REASON_EXTERNAL_INTERRUPT:
2968 vmx_do_extint(regs);
2969 break;
2970 case EXIT_REASON_TRIPLE_FAULT:
2971 hvm_triple_fault();
2972 break;
2973 case EXIT_REASON_PENDING_VIRT_INTR:
2974 /* Disable the interrupt window. */
2975 v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
2976 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
2977 v->arch.hvm_vmx.exec_control);
2978 break;
2979 case EXIT_REASON_PENDING_VIRT_NMI:
2980 /* Disable the NMI window. */
2981 v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
2982 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
2983 v->arch.hvm_vmx.exec_control);
2984 break;
2985 case EXIT_REASON_TASK_SWITCH:
2986 goto exit_and_crash;
2987 case EXIT_REASON_CPUID:
2988 inst_len = __get_instruction_length(); /* Safe: CPUID */
2989 __update_guest_eip(inst_len);
2990 vmx_do_cpuid(regs);
2991 break;
2992 case EXIT_REASON_HLT:
2993 inst_len = __get_instruction_length(); /* Safe: HLT */
2994 __update_guest_eip(inst_len);
2995 vmx_do_hlt();
2996 break;
2997 case EXIT_REASON_INVLPG:
2999 inst_len = __get_instruction_length(); /* Safe: INVLPG */
3000 __update_guest_eip(inst_len);
3001 exit_qualification = __vmread(EXIT_QUALIFICATION);
3002 vmx_do_invlpg(exit_qualification);
3003 break;
3005 case EXIT_REASON_VMCALL:
3007 int rc;
3008 HVMTRACE_1D(VMMCALL, v, regs->eax);
3009 inst_len = __get_instruction_length(); /* Safe: VMCALL */
3010 rc = hvm_do_hypercall(regs);
3011 if ( rc != HVM_HCALL_preempted )
3013 __update_guest_eip(inst_len);
3014 if ( rc == HVM_HCALL_invalidate )
3015 send_invalidate_req();
3017 break;
3019 case EXIT_REASON_CR_ACCESS:
3021 exit_qualification = __vmread(EXIT_QUALIFICATION);
3022 inst_len = __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */
3023 if ( vmx_cr_access(exit_qualification, regs) )
3024 __update_guest_eip(inst_len);
3025 break;
3027 case EXIT_REASON_DR_ACCESS:
3028 exit_qualification = __vmread(EXIT_QUALIFICATION);
3029 vmx_dr_access(exit_qualification, regs);
3030 break;
3031 case EXIT_REASON_IO_INSTRUCTION:
3032 exit_qualification = __vmread(EXIT_QUALIFICATION);
3033 inst_len = __get_instruction_length(); /* Safe: IN, INS, OUT, OUTS */
3034 vmx_io_instruction(exit_qualification, inst_len);
3035 break;
3036 case EXIT_REASON_MSR_READ:
3037 inst_len = __get_instruction_length(); /* Safe: RDMSR */
3038 if ( vmx_do_msr_read(regs) )
3039 __update_guest_eip(inst_len);
3040 break;
3041 case EXIT_REASON_MSR_WRITE:
3042 inst_len = __get_instruction_length(); /* Safe: WRMSR */
3043 if ( vmx_do_msr_write(regs) )
3044 __update_guest_eip(inst_len);
3045 break;
3046 case EXIT_REASON_MWAIT_INSTRUCTION:
3047 case EXIT_REASON_MONITOR_INSTRUCTION:
3048 case EXIT_REASON_PAUSE_INSTRUCTION:
3049 goto exit_and_crash;
3050 case EXIT_REASON_VMCLEAR:
3051 case EXIT_REASON_VMLAUNCH:
3052 case EXIT_REASON_VMPTRLD:
3053 case EXIT_REASON_VMPTRST:
3054 case EXIT_REASON_VMREAD:
3055 case EXIT_REASON_VMRESUME:
3056 case EXIT_REASON_VMWRITE:
3057 case EXIT_REASON_VMXOFF:
3058 case EXIT_REASON_VMXON:
3059 /* Report invalid opcode exception when a VMX guest tries to execute
3060 any of the VMX instructions */
3061 vmx_inject_hw_exception(v, TRAP_invalid_op, VMX_DELIVER_NO_ERROR_CODE);
3062 break;
3064 case EXIT_REASON_TPR_BELOW_THRESHOLD:
3065 break;
3067 case EXIT_REASON_APIC_ACCESS:
3069 unsigned long offset;
3070 exit_qualification = __vmread(EXIT_QUALIFICATION);
3071 offset = exit_qualification & 0x0fffUL;
3072 handle_mmio(APIC_DEFAULT_PHYS_BASE | offset);
3073 break;
3076 default:
3077 exit_and_crash:
3078 gdprintk(XENLOG_ERR, "Bad vmexit (reason %x)\n", exit_reason);
3079 domain_crash(v->domain);
3080 break;
3084 asmlinkage void vmx_trace_vmentry(void)
3086 struct vcpu *v = current;
3087 HVMTRACE_0D(VMENTRY, v);
3090 /*
3091 * Local variables:
3092 * mode: C
3093 * c-set-style: "BSD"
3094 * c-basic-offset: 4
3095 * tab-width: 4
3096 * indent-tabs-mode: nil
3097 * End:
3098 */