ia64/xen-unstable

view xen/arch/x86/hvm/vmx/vmx.c @ 17053:4c64376d439d

x86 vmx: Streamline vmx_interrupt_blocked() to avoid a VMREAD if
interrupt delivery is blocked by EFLAGS.IF. This speeds up real-mode
emulation in some cases (where we are currently executing
hvm_local_events_need_delivery() after every instruction).
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Feb 13 16:28:38 2008 +0000 (2008-02-13)
parents 5e1df44d406e
children 3f1cf03826fe
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 */
19 #include <xen/config.h>
20 #include <xen/init.h>
21 #include <xen/lib.h>
22 #include <xen/trace.h>
23 #include <xen/sched.h>
24 #include <xen/irq.h>
25 #include <xen/softirq.h>
26 #include <xen/domain_page.h>
27 #include <xen/hypercall.h>
28 #include <xen/perfc.h>
29 #include <asm/current.h>
30 #include <asm/io.h>
31 #include <asm/regs.h>
32 #include <asm/cpufeature.h>
33 #include <asm/processor.h>
34 #include <asm/types.h>
35 #include <asm/debugreg.h>
36 #include <asm/msr.h>
37 #include <asm/spinlock.h>
38 #include <asm/paging.h>
39 #include <asm/p2m.h>
40 #include <asm/hvm/hvm.h>
41 #include <asm/hvm/support.h>
42 #include <asm/hvm/vmx/vmx.h>
43 #include <asm/hvm/vmx/vmcs.h>
44 #include <public/sched.h>
45 #include <public/hvm/ioreq.h>
46 #include <asm/hvm/vpic.h>
47 #include <asm/hvm/vlapic.h>
48 #include <asm/x86_emulate.h>
49 #include <asm/hvm/vpt.h>
50 #include <public/hvm/save.h>
51 #include <asm/hvm/trace.h>
53 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
55 static void vmx_ctxt_switch_from(struct vcpu *v);
56 static void vmx_ctxt_switch_to(struct vcpu *v);
58 static int vmx_alloc_vlapic_mapping(struct domain *d);
59 static void vmx_free_vlapic_mapping(struct domain *d);
60 static void vmx_install_vlapic_mapping(struct vcpu *v);
61 static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr);
62 static void vmx_update_guest_efer(struct vcpu *v);
64 static int vmx_domain_initialise(struct domain *d)
65 {
66 return vmx_alloc_vlapic_mapping(d);
67 }
69 static void vmx_domain_destroy(struct domain *d)
70 {
71 vmx_free_vlapic_mapping(d);
72 }
74 static int vmx_vcpu_initialise(struct vcpu *v)
75 {
76 int rc;
78 spin_lock_init(&v->arch.hvm_vmx.vmcs_lock);
80 v->arch.schedule_tail = vmx_do_resume;
81 v->arch.ctxt_switch_from = vmx_ctxt_switch_from;
82 v->arch.ctxt_switch_to = vmx_ctxt_switch_to;
84 if ( (rc = vmx_create_vmcs(v)) != 0 )
85 {
86 dprintk(XENLOG_WARNING,
87 "Failed to create VMCS for vcpu %d: err=%d.\n",
88 v->vcpu_id, rc);
89 return rc;
90 }
92 vpmu_initialise(v);
94 vmx_install_vlapic_mapping(v);
96 /* %eax == 1 signals full real-mode support to the guest loader. */
97 if ( v->vcpu_id == 0 )
98 v->arch.guest_context.user_regs.eax = 1;
99 v->arch.hvm_vcpu.io_complete = vmx_realmode_io_complete;
101 return 0;
102 }
104 static void vmx_vcpu_destroy(struct vcpu *v)
105 {
106 vmx_destroy_vmcs(v);
107 vpmu_destroy(v);
108 }
110 #ifdef __x86_64__
112 static DEFINE_PER_CPU(struct vmx_msr_state, host_msr_state);
114 static u32 msr_index[VMX_MSR_COUNT] =
115 {
116 MSR_LSTAR, MSR_STAR, MSR_SYSCALL_MASK
117 };
119 static void vmx_save_host_msrs(void)
120 {
121 struct vmx_msr_state *host_msr_state = &this_cpu(host_msr_state);
122 int i;
124 for ( i = 0; i < VMX_MSR_COUNT; i++ )
125 rdmsrl(msr_index[i], host_msr_state->msrs[i]);
126 }
128 #define WRITE_MSR(address) \
129 guest_msr_state->msrs[VMX_INDEX_MSR_ ## address] = msr_content; \
130 set_bit(VMX_INDEX_MSR_ ## address, &guest_msr_state->flags); \
131 wrmsrl(MSR_ ## address, msr_content); \
132 set_bit(VMX_INDEX_MSR_ ## address, &host_msr_state->flags); \
133 break
135 static enum handler_return long_mode_do_msr_read(struct cpu_user_regs *regs)
136 {
137 u64 msr_content = 0;
138 u32 ecx = regs->ecx;
139 struct vcpu *v = current;
140 struct vmx_msr_state *guest_msr_state = &v->arch.hvm_vmx.msr_state;
142 switch ( ecx )
143 {
144 case MSR_EFER:
145 msr_content = v->arch.hvm_vcpu.guest_efer;
146 break;
148 case MSR_FS_BASE:
149 msr_content = __vmread(GUEST_FS_BASE);
150 goto check_long_mode;
152 case MSR_GS_BASE:
153 msr_content = __vmread(GUEST_GS_BASE);
154 goto check_long_mode;
156 case MSR_SHADOW_GS_BASE:
157 msr_content = v->arch.hvm_vmx.shadow_gs;
158 check_long_mode:
159 if ( !(hvm_long_mode_enabled(v)) )
160 {
161 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
162 return HNDL_exception_raised;
163 }
164 break;
166 case MSR_STAR:
167 msr_content = guest_msr_state->msrs[VMX_INDEX_MSR_STAR];
168 break;
170 case MSR_LSTAR:
171 msr_content = guest_msr_state->msrs[VMX_INDEX_MSR_LSTAR];
172 break;
174 case MSR_CSTAR:
175 msr_content = v->arch.hvm_vmx.cstar;
176 break;
178 case MSR_SYSCALL_MASK:
179 msr_content = guest_msr_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
180 break;
182 default:
183 return HNDL_unhandled;
184 }
186 HVM_DBG_LOG(DBG_LEVEL_0, "msr 0x%x content 0x%"PRIx64, ecx, msr_content);
188 regs->eax = (u32)(msr_content >> 0);
189 regs->edx = (u32)(msr_content >> 32);
191 return HNDL_done;
192 }
194 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
195 {
196 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
197 u32 ecx = regs->ecx;
198 struct vcpu *v = current;
199 struct vmx_msr_state *guest_msr_state = &v->arch.hvm_vmx.msr_state;
200 struct vmx_msr_state *host_msr_state = &this_cpu(host_msr_state);
202 HVM_DBG_LOG(DBG_LEVEL_0, "msr 0x%x content 0x%"PRIx64, ecx, msr_content);
204 switch ( ecx )
205 {
206 case MSR_EFER:
207 if ( !hvm_set_efer(msr_content) )
208 goto exception_raised;
209 break;
211 case MSR_FS_BASE:
212 case MSR_GS_BASE:
213 case MSR_SHADOW_GS_BASE:
214 if ( !hvm_long_mode_enabled(v) )
215 goto gp_fault;
217 if ( !is_canonical_address(msr_content) )
218 goto uncanonical_address;
220 if ( ecx == MSR_FS_BASE )
221 __vmwrite(GUEST_FS_BASE, msr_content);
222 else if ( ecx == MSR_GS_BASE )
223 __vmwrite(GUEST_GS_BASE, msr_content);
224 else
225 {
226 v->arch.hvm_vmx.shadow_gs = msr_content;
227 wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
228 }
230 break;
232 case MSR_STAR:
233 WRITE_MSR(STAR);
235 case MSR_LSTAR:
236 if ( !is_canonical_address(msr_content) )
237 goto uncanonical_address;
238 WRITE_MSR(LSTAR);
240 case MSR_CSTAR:
241 if ( !is_canonical_address(msr_content) )
242 goto uncanonical_address;
243 v->arch.hvm_vmx.cstar = msr_content;
244 break;
246 case MSR_SYSCALL_MASK:
247 WRITE_MSR(SYSCALL_MASK);
249 default:
250 return HNDL_unhandled;
251 }
253 return HNDL_done;
255 uncanonical_address:
256 HVM_DBG_LOG(DBG_LEVEL_0, "Not cano address of msr write %x", ecx);
257 gp_fault:
258 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
259 exception_raised:
260 return HNDL_exception_raised;
261 }
263 /*
264 * To avoid MSR save/restore at every VM exit/entry time, we restore
265 * the x86_64 specific MSRs at domain switch time. Since these MSRs
266 * are not modified once set for para domains, we don't save them,
267 * but simply reset them to values set in percpu_traps_init().
268 */
269 static void vmx_restore_host_msrs(void)
270 {
271 struct vmx_msr_state *host_msr_state = &this_cpu(host_msr_state);
272 int i;
274 while ( host_msr_state->flags )
275 {
276 i = find_first_set_bit(host_msr_state->flags);
277 wrmsrl(msr_index[i], host_msr_state->msrs[i]);
278 clear_bit(i, &host_msr_state->flags);
279 }
281 if ( cpu_has_nx && !(read_efer() & EFER_NX) )
282 write_efer(read_efer() | EFER_NX);
283 }
285 static void vmx_save_guest_msrs(struct vcpu *v)
286 {
287 /* MSR_SHADOW_GS_BASE may have been changed by swapgs instruction. */
288 rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs);
289 }
291 static void vmx_restore_guest_msrs(struct vcpu *v)
292 {
293 struct vmx_msr_state *guest_msr_state, *host_msr_state;
294 unsigned long guest_flags;
295 int i;
297 guest_msr_state = &v->arch.hvm_vmx.msr_state;
298 host_msr_state = &this_cpu(host_msr_state);
300 wrmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs);
302 guest_flags = guest_msr_state->flags;
304 while ( guest_flags )
305 {
306 i = find_first_set_bit(guest_flags);
308 HVM_DBG_LOG(DBG_LEVEL_2,
309 "restore guest's index %d msr %x with value %lx",
310 i, msr_index[i], guest_msr_state->msrs[i]);
311 set_bit(i, &host_msr_state->flags);
312 wrmsrl(msr_index[i], guest_msr_state->msrs[i]);
313 clear_bit(i, &guest_flags);
314 }
316 if ( (v->arch.hvm_vcpu.guest_efer ^ read_efer()) & (EFER_NX | EFER_SCE) )
317 {
318 HVM_DBG_LOG(DBG_LEVEL_2,
319 "restore guest's EFER with value %lx",
320 v->arch.hvm_vcpu.guest_efer);
321 write_efer((read_efer() & ~(EFER_NX | EFER_SCE)) |
322 (v->arch.hvm_vcpu.guest_efer & (EFER_NX | EFER_SCE)));
323 }
324 }
326 #else /* __i386__ */
328 #define vmx_save_host_msrs() ((void)0)
330 static void vmx_restore_host_msrs(void)
331 {
332 if ( cpu_has_nx && !(read_efer() & EFER_NX) )
333 write_efer(read_efer() | EFER_NX);
334 }
336 #define vmx_save_guest_msrs(v) ((void)0)
338 static void vmx_restore_guest_msrs(struct vcpu *v)
339 {
340 if ( (v->arch.hvm_vcpu.guest_efer ^ read_efer()) & EFER_NX )
341 {
342 HVM_DBG_LOG(DBG_LEVEL_2,
343 "restore guest's EFER with value %lx",
344 v->arch.hvm_vcpu.guest_efer);
345 write_efer((read_efer() & ~EFER_NX) |
346 (v->arch.hvm_vcpu.guest_efer & EFER_NX));
347 }
348 }
350 static enum handler_return long_mode_do_msr_read(struct cpu_user_regs *regs)
351 {
352 u64 msr_content = 0;
353 struct vcpu *v = current;
355 switch ( regs->ecx )
356 {
357 case MSR_EFER:
358 msr_content = v->arch.hvm_vcpu.guest_efer;
359 break;
361 default:
362 return HNDL_unhandled;
363 }
365 regs->eax = msr_content >> 0;
366 regs->edx = msr_content >> 32;
368 return HNDL_done;
369 }
371 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
372 {
373 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
375 switch ( regs->ecx )
376 {
377 case MSR_EFER:
378 if ( !hvm_set_efer(msr_content) )
379 return HNDL_exception_raised;
380 break;
382 default:
383 return HNDL_unhandled;
384 }
386 return HNDL_done;
387 }
389 #endif /* __i386__ */
391 static int vmx_guest_x86_mode(struct vcpu *v)
392 {
393 unsigned int cs_ar_bytes;
395 if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) )
396 return 0;
397 if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) )
398 return 1;
399 cs_ar_bytes = __vmread(GUEST_CS_AR_BYTES);
400 if ( hvm_long_mode_enabled(v) &&
401 likely(cs_ar_bytes & X86_SEG_AR_CS_LM_ACTIVE) )
402 return 8;
403 return (likely(cs_ar_bytes & X86_SEG_AR_DEF_OP_SIZE) ? 4 : 2);
404 }
406 static void vmx_save_dr(struct vcpu *v)
407 {
408 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
409 return;
411 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
412 v->arch.hvm_vcpu.flag_dr_dirty = 0;
413 v->arch.hvm_vmx.exec_control |= CPU_BASED_MOV_DR_EXITING;
414 __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
416 v->arch.guest_context.debugreg[0] = read_debugreg(0);
417 v->arch.guest_context.debugreg[1] = read_debugreg(1);
418 v->arch.guest_context.debugreg[2] = read_debugreg(2);
419 v->arch.guest_context.debugreg[3] = read_debugreg(3);
420 v->arch.guest_context.debugreg[6] = read_debugreg(6);
421 /* DR7 must be saved as it is used by vmx_restore_dr(). */
422 v->arch.guest_context.debugreg[7] = __vmread(GUEST_DR7);
423 }
425 static void __restore_debug_registers(struct vcpu *v)
426 {
427 if ( v->arch.hvm_vcpu.flag_dr_dirty )
428 return;
430 v->arch.hvm_vcpu.flag_dr_dirty = 1;
432 write_debugreg(0, v->arch.guest_context.debugreg[0]);
433 write_debugreg(1, v->arch.guest_context.debugreg[1]);
434 write_debugreg(2, v->arch.guest_context.debugreg[2]);
435 write_debugreg(3, v->arch.guest_context.debugreg[3]);
436 write_debugreg(6, v->arch.guest_context.debugreg[6]);
437 /* DR7 is loaded from the VMCS. */
438 }
440 /*
441 * DR7 is saved and restored on every vmexit. Other debug registers only
442 * need to be restored if their value is going to affect execution -- i.e.,
443 * if one of the breakpoints is enabled. So mask out all bits that don't
444 * enable some breakpoint functionality.
445 */
446 static void vmx_restore_dr(struct vcpu *v)
447 {
448 /* NB. __vmread() is not usable here, so we cannot read from the VMCS. */
449 if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
450 __restore_debug_registers(v);
451 }
453 static void vmx_vmcs_save(struct vcpu *v, struct hvm_hw_cpu *c)
454 {
455 uint32_t ev;
457 vmx_vmcs_enter(v);
459 c->cr0 = v->arch.hvm_vcpu.guest_cr[0];
460 c->cr2 = v->arch.hvm_vcpu.guest_cr[2];
461 c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
462 c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
464 c->msr_efer = v->arch.hvm_vcpu.guest_efer;
466 c->sysenter_cs = __vmread(GUEST_SYSENTER_CS);
467 c->sysenter_esp = __vmread(GUEST_SYSENTER_ESP);
468 c->sysenter_eip = __vmread(GUEST_SYSENTER_EIP);
470 c->pending_event = 0;
471 c->error_code = 0;
472 if ( ((ev = __vmread(VM_ENTRY_INTR_INFO)) & INTR_INFO_VALID_MASK) &&
473 hvm_event_needs_reinjection((ev >> 8) & 7, ev & 0xff) )
474 {
475 c->pending_event = ev;
476 c->error_code = __vmread(VM_ENTRY_EXCEPTION_ERROR_CODE);
477 }
479 vmx_vmcs_exit(v);
480 }
482 static int vmx_restore_cr0_cr3(
483 struct vcpu *v, unsigned long cr0, unsigned long cr3)
484 {
485 unsigned long mfn = 0;
486 p2m_type_t p2mt;
488 if ( cr0 & X86_CR0_PG )
489 {
490 mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt));
491 if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
492 {
493 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%lx\n", cr3);
494 return -EINVAL;
495 }
496 }
498 if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
499 put_page(pagetable_get_page(v->arch.guest_table));
501 v->arch.guest_table = pagetable_from_pfn(mfn);
503 v->arch.hvm_vcpu.guest_cr[0] = cr0 | X86_CR0_ET;
504 v->arch.hvm_vcpu.guest_cr[3] = cr3;
506 return 0;
507 }
509 static int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
510 {
511 int rc;
513 if ( c->pending_valid &&
514 ((c->pending_type == 1) || (c->pending_type > 6) ||
515 (c->pending_reserved != 0)) )
516 {
517 gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n",
518 c->pending_event);
519 return -EINVAL;
520 }
522 rc = vmx_restore_cr0_cr3(v, c->cr0, c->cr3);
523 if ( rc )
524 return rc;
526 vmx_vmcs_enter(v);
528 v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
529 v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
530 vmx_update_guest_cr(v, 0);
531 vmx_update_guest_cr(v, 2);
532 vmx_update_guest_cr(v, 4);
534 #ifdef HVM_DEBUG_SUSPEND
535 printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
536 __func__, c->cr3, c->cr0, c->cr4);
537 #endif
539 v->arch.hvm_vcpu.guest_efer = c->msr_efer;
540 vmx_update_guest_efer(v);
542 __vmwrite(GUEST_SYSENTER_CS, c->sysenter_cs);
543 __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp);
544 __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip);
546 __vmwrite(GUEST_DR7, c->dr7);
548 vmx_vmcs_exit(v);
550 paging_update_paging_modes(v);
552 if ( c->pending_valid )
553 {
554 gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
555 c->pending_event, c->error_code);
557 if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
558 {
559 vmx_vmcs_enter(v);
560 __vmwrite(VM_ENTRY_INTR_INFO, c->pending_event);
561 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, c->error_code);
562 vmx_vmcs_exit(v);
563 }
564 }
566 return 0;
567 }
569 #if defined(__x86_64__) && defined(HVM_DEBUG_SUSPEND)
570 static void dump_msr_state(struct vmx_msr_state *m)
571 {
572 int i = 0;
573 printk("**** msr state ****\n");
574 printk("shadow_gs=0x%lx, flags=0x%lx, msr_items:", m->shadow_gs, m->flags);
575 for ( i = 0; i < VMX_MSR_COUNT; i++ )
576 printk("0x%lx,", m->msrs[i]);
577 printk("\n");
578 }
579 #else
580 #define dump_msr_state(m) ((void)0)
581 #endif
583 static void vmx_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
584 {
585 #ifdef __x86_64__
586 struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
587 unsigned long guest_flags = guest_state->flags;
589 data->shadow_gs = v->arch.hvm_vmx.shadow_gs;
590 data->msr_cstar = v->arch.hvm_vmx.cstar;
592 /* save msrs */
593 data->msr_flags = guest_flags;
594 data->msr_lstar = guest_state->msrs[VMX_INDEX_MSR_LSTAR];
595 data->msr_star = guest_state->msrs[VMX_INDEX_MSR_STAR];
596 data->msr_syscall_mask = guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
597 #endif
599 data->tsc = hvm_get_guest_time(v);
601 dump_msr_state(guest_state);
602 }
604 static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
605 {
606 #ifdef __x86_64__
607 struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
609 /* restore msrs */
610 guest_state->flags = data->msr_flags;
611 guest_state->msrs[VMX_INDEX_MSR_LSTAR] = data->msr_lstar;
612 guest_state->msrs[VMX_INDEX_MSR_STAR] = data->msr_star;
613 guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK] = data->msr_syscall_mask;
615 v->arch.hvm_vmx.cstar = data->msr_cstar;
616 v->arch.hvm_vmx.shadow_gs = data->shadow_gs;
617 #endif
619 hvm_set_guest_time(v, data->tsc);
621 dump_msr_state(guest_state);
622 }
625 static void vmx_save_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
626 {
627 vmx_save_cpu_state(v, ctxt);
628 vmx_vmcs_save(v, ctxt);
629 }
631 static int vmx_load_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
632 {
633 vmx_load_cpu_state(v, ctxt);
635 if ( vmx_vmcs_restore(v, ctxt) )
636 {
637 gdprintk(XENLOG_ERR, "vmx_vmcs restore failed!\n");
638 domain_crash(v->domain);
639 return -EINVAL;
640 }
642 return 0;
643 }
645 static void vmx_fpu_enter(struct vcpu *v)
646 {
647 setup_fpu(v);
648 __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
649 v->arch.hvm_vmx.host_cr0 &= ~X86_CR0_TS;
650 __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
651 }
653 static void vmx_fpu_leave(struct vcpu *v)
654 {
655 ASSERT(!v->fpu_dirtied);
656 ASSERT(read_cr0() & X86_CR0_TS);
658 if ( !(v->arch.hvm_vmx.host_cr0 & X86_CR0_TS) )
659 {
660 v->arch.hvm_vmx.host_cr0 |= X86_CR0_TS;
661 __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
662 }
664 /*
665 * If the guest does not have TS enabled then we must cause and handle an
666 * exception on first use of the FPU. If the guest *does* have TS enabled
667 * then this is not necessary: no FPU activity can occur until the guest
668 * clears CR0.TS, and we will initialise the FPU when that happens.
669 */
670 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
671 {
672 v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_TS;
673 __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);
674 __vm_set_bit(EXCEPTION_BITMAP, TRAP_no_device);
675 }
676 }
678 static void vmx_ctxt_switch_from(struct vcpu *v)
679 {
680 vmx_fpu_leave(v);
681 vmx_save_guest_msrs(v);
682 vmx_restore_host_msrs();
683 vmx_save_dr(v);
684 vpmu_save(v);
685 }
687 static void vmx_ctxt_switch_to(struct vcpu *v)
688 {
689 /* HOST_CR4 in VMCS is always mmu_cr4_features. Sync CR4 now. */
690 if ( unlikely(read_cr4() != mmu_cr4_features) )
691 write_cr4(mmu_cr4_features);
693 vmx_restore_guest_msrs(v);
694 vmx_restore_dr(v);
695 vpmu_load(v);
696 }
698 static unsigned long vmx_get_segment_base(struct vcpu *v, enum x86_segment seg)
699 {
700 unsigned long base = 0;
701 int long_mode = 0;
703 ASSERT(v == current);
705 if ( hvm_long_mode_enabled(v) &&
706 (__vmread(GUEST_CS_AR_BYTES) & X86_SEG_AR_CS_LM_ACTIVE) )
707 long_mode = 1;
709 switch ( seg )
710 {
711 case x86_seg_cs: if ( !long_mode ) base = __vmread(GUEST_CS_BASE); break;
712 case x86_seg_ds: if ( !long_mode ) base = __vmread(GUEST_DS_BASE); break;
713 case x86_seg_es: if ( !long_mode ) base = __vmread(GUEST_ES_BASE); break;
714 case x86_seg_fs: base = __vmread(GUEST_FS_BASE); break;
715 case x86_seg_gs: base = __vmread(GUEST_GS_BASE); break;
716 case x86_seg_ss: if ( !long_mode ) base = __vmread(GUEST_SS_BASE); break;
717 case x86_seg_tr: base = __vmread(GUEST_TR_BASE); break;
718 case x86_seg_gdtr: base = __vmread(GUEST_GDTR_BASE); break;
719 case x86_seg_idtr: base = __vmread(GUEST_IDTR_BASE); break;
720 case x86_seg_ldtr: base = __vmread(GUEST_LDTR_BASE); break;
721 default: BUG(); break;
722 }
724 return base;
725 }
727 static void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg,
728 struct segment_register *reg)
729 {
730 uint32_t attr = 0;
732 vmx_vmcs_enter(v);
734 switch ( seg )
735 {
736 case x86_seg_cs:
737 reg->sel = __vmread(GUEST_CS_SELECTOR);
738 reg->limit = __vmread(GUEST_CS_LIMIT);
739 reg->base = __vmread(GUEST_CS_BASE);
740 attr = __vmread(GUEST_CS_AR_BYTES);
741 break;
742 case x86_seg_ds:
743 reg->sel = __vmread(GUEST_DS_SELECTOR);
744 reg->limit = __vmread(GUEST_DS_LIMIT);
745 reg->base = __vmread(GUEST_DS_BASE);
746 attr = __vmread(GUEST_DS_AR_BYTES);
747 break;
748 case x86_seg_es:
749 reg->sel = __vmread(GUEST_ES_SELECTOR);
750 reg->limit = __vmread(GUEST_ES_LIMIT);
751 reg->base = __vmread(GUEST_ES_BASE);
752 attr = __vmread(GUEST_ES_AR_BYTES);
753 break;
754 case x86_seg_fs:
755 reg->sel = __vmread(GUEST_FS_SELECTOR);
756 reg->limit = __vmread(GUEST_FS_LIMIT);
757 reg->base = __vmread(GUEST_FS_BASE);
758 attr = __vmread(GUEST_FS_AR_BYTES);
759 break;
760 case x86_seg_gs:
761 reg->sel = __vmread(GUEST_GS_SELECTOR);
762 reg->limit = __vmread(GUEST_GS_LIMIT);
763 reg->base = __vmread(GUEST_GS_BASE);
764 attr = __vmread(GUEST_GS_AR_BYTES);
765 break;
766 case x86_seg_ss:
767 reg->sel = __vmread(GUEST_SS_SELECTOR);
768 reg->limit = __vmread(GUEST_SS_LIMIT);
769 reg->base = __vmread(GUEST_SS_BASE);
770 attr = __vmread(GUEST_SS_AR_BYTES);
771 break;
772 case x86_seg_tr:
773 reg->sel = __vmread(GUEST_TR_SELECTOR);
774 reg->limit = __vmread(GUEST_TR_LIMIT);
775 reg->base = __vmread(GUEST_TR_BASE);
776 attr = __vmread(GUEST_TR_AR_BYTES);
777 break;
778 case x86_seg_gdtr:
779 reg->limit = __vmread(GUEST_GDTR_LIMIT);
780 reg->base = __vmread(GUEST_GDTR_BASE);
781 break;
782 case x86_seg_idtr:
783 reg->limit = __vmread(GUEST_IDTR_LIMIT);
784 reg->base = __vmread(GUEST_IDTR_BASE);
785 break;
786 case x86_seg_ldtr:
787 reg->sel = __vmread(GUEST_LDTR_SELECTOR);
788 reg->limit = __vmread(GUEST_LDTR_LIMIT);
789 reg->base = __vmread(GUEST_LDTR_BASE);
790 attr = __vmread(GUEST_LDTR_AR_BYTES);
791 break;
792 default:
793 BUG();
794 }
796 vmx_vmcs_exit(v);
798 reg->attr.bytes = (attr & 0xff) | ((attr >> 4) & 0xf00);
799 /* Unusable flag is folded into Present flag. */
800 if ( attr & (1u<<16) )
801 reg->attr.fields.p = 0;
802 }
804 static void vmx_set_segment_register(struct vcpu *v, enum x86_segment seg,
805 struct segment_register *reg)
806 {
807 uint32_t attr;
809 attr = reg->attr.bytes;
810 attr = ((attr & 0xf00) << 4) | (attr & 0xff);
812 /* Not-present must mean unusable. */
813 if ( !reg->attr.fields.p )
814 attr |= (1u << 16);
816 vmx_vmcs_enter(v);
818 switch ( seg )
819 {
820 case x86_seg_cs:
821 __vmwrite(GUEST_CS_SELECTOR, reg->sel);
822 __vmwrite(GUEST_CS_LIMIT, reg->limit);
823 __vmwrite(GUEST_CS_BASE, reg->base);
824 __vmwrite(GUEST_CS_AR_BYTES, attr);
825 break;
826 case x86_seg_ds:
827 __vmwrite(GUEST_DS_SELECTOR, reg->sel);
828 __vmwrite(GUEST_DS_LIMIT, reg->limit);
829 __vmwrite(GUEST_DS_BASE, reg->base);
830 __vmwrite(GUEST_DS_AR_BYTES, attr);
831 break;
832 case x86_seg_es:
833 __vmwrite(GUEST_ES_SELECTOR, reg->sel);
834 __vmwrite(GUEST_ES_LIMIT, reg->limit);
835 __vmwrite(GUEST_ES_BASE, reg->base);
836 __vmwrite(GUEST_ES_AR_BYTES, attr);
837 break;
838 case x86_seg_fs:
839 __vmwrite(GUEST_FS_SELECTOR, reg->sel);
840 __vmwrite(GUEST_FS_LIMIT, reg->limit);
841 __vmwrite(GUEST_FS_BASE, reg->base);
842 __vmwrite(GUEST_FS_AR_BYTES, attr);
843 break;
844 case x86_seg_gs:
845 __vmwrite(GUEST_GS_SELECTOR, reg->sel);
846 __vmwrite(GUEST_GS_LIMIT, reg->limit);
847 __vmwrite(GUEST_GS_BASE, reg->base);
848 __vmwrite(GUEST_GS_AR_BYTES, attr);
849 break;
850 case x86_seg_ss:
851 __vmwrite(GUEST_SS_SELECTOR, reg->sel);
852 __vmwrite(GUEST_SS_LIMIT, reg->limit);
853 __vmwrite(GUEST_SS_BASE, reg->base);
854 __vmwrite(GUEST_SS_AR_BYTES, attr);
855 break;
856 case x86_seg_tr:
857 __vmwrite(GUEST_TR_SELECTOR, reg->sel);
858 __vmwrite(GUEST_TR_LIMIT, reg->limit);
859 __vmwrite(GUEST_TR_BASE, reg->base);
860 __vmwrite(GUEST_TR_AR_BYTES, attr);
861 break;
862 case x86_seg_gdtr:
863 __vmwrite(GUEST_GDTR_LIMIT, reg->limit);
864 __vmwrite(GUEST_GDTR_BASE, reg->base);
865 break;
866 case x86_seg_idtr:
867 __vmwrite(GUEST_IDTR_LIMIT, reg->limit);
868 __vmwrite(GUEST_IDTR_BASE, reg->base);
869 break;
870 case x86_seg_ldtr:
871 __vmwrite(GUEST_LDTR_SELECTOR, reg->sel);
872 __vmwrite(GUEST_LDTR_LIMIT, reg->limit);
873 __vmwrite(GUEST_LDTR_BASE, reg->base);
874 __vmwrite(GUEST_LDTR_AR_BYTES, attr);
875 break;
876 default:
877 BUG();
878 }
880 vmx_vmcs_exit(v);
881 }
883 static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
884 {
885 vmx_vmcs_enter(v);
886 __vmwrite(TSC_OFFSET, offset);
887 #if defined (__i386__)
888 __vmwrite(TSC_OFFSET_HIGH, offset >> 32);
889 #endif
890 vmx_vmcs_exit(v);
891 }
893 void do_nmi(struct cpu_user_regs *);
895 static void vmx_init_hypercall_page(struct domain *d, void *hypercall_page)
896 {
897 char *p;
898 int i;
900 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
901 {
902 p = (char *)(hypercall_page + (i * 32));
903 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
904 *(u32 *)(p + 1) = i;
905 *(u8 *)(p + 5) = 0x0f; /* vmcall */
906 *(u8 *)(p + 6) = 0x01;
907 *(u8 *)(p + 7) = 0xc1;
908 *(u8 *)(p + 8) = 0xc3; /* ret */
909 }
911 /* Don't support HYPERVISOR_iret at the moment */
912 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
913 }
915 static enum hvm_intblk vmx_interrupt_blocked(
916 struct vcpu *v, struct hvm_intack intack)
917 {
918 unsigned long intr_shadow;
920 /*
921 * Test EFLAGS.IF first. It is often the most likely reason for interrupt
922 * blockage, and is the cheapest to test (because no VMREAD is required).
923 */
924 if ( (intack.source != hvm_intsrc_nmi) &&
925 !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) )
926 return hvm_intblk_rflags_ie;
928 intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
930 if ( intr_shadow & (VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS) )
931 return hvm_intblk_shadow;
933 if ( intack.source == hvm_intsrc_nmi )
934 return ((intr_shadow & VMX_INTR_SHADOW_NMI) ?
935 hvm_intblk_nmi_iret : hvm_intblk_none);
937 ASSERT((intack.source == hvm_intsrc_pic) ||
938 (intack.source == hvm_intsrc_lapic));
940 return hvm_intblk_none;
941 }
943 static void vmx_update_host_cr3(struct vcpu *v)
944 {
945 vmx_vmcs_enter(v);
946 __vmwrite(HOST_CR3, v->arch.cr3);
947 vmx_vmcs_exit(v);
948 }
950 static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
951 {
952 vmx_vmcs_enter(v);
954 switch ( cr )
955 {
956 case 0: {
957 unsigned long hw_cr0_mask =
958 X86_CR0_NE | X86_CR0_PG | X86_CR0_WP | X86_CR0_PE;
960 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
961 {
962 if ( v != current )
963 hw_cr0_mask |= X86_CR0_TS;
964 else if ( v->arch.hvm_vcpu.hw_cr[0] & X86_CR0_TS )
965 vmx_fpu_enter(v);
966 }
968 v->arch.hvm_vmx.vmxemul &= ~VMXEMUL_REALMODE;
969 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
970 v->arch.hvm_vmx.vmxemul |= VMXEMUL_REALMODE;
972 v->arch.hvm_vcpu.hw_cr[0] =
973 v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask;
974 __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);
975 __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]);
976 break;
977 }
978 case 2:
979 /* CR2 is updated in exit stub. */
980 break;
981 case 3:
982 __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]);
983 break;
984 case 4:
985 v->arch.hvm_vcpu.hw_cr[4] =
986 v->arch.hvm_vcpu.guest_cr[4] | HVM_CR4_HOST_MASK;
987 __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
988 __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]);
989 break;
990 default:
991 BUG();
992 }
994 vmx_vmcs_exit(v);
995 }
997 static void vmx_update_guest_efer(struct vcpu *v)
998 {
999 #ifdef __x86_64__
1000 unsigned long vm_entry_value;
1002 vmx_vmcs_enter(v);
1004 vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
1005 if ( v->arch.hvm_vcpu.guest_efer & EFER_LMA )
1006 vm_entry_value |= VM_ENTRY_IA32E_MODE;
1007 else
1008 vm_entry_value &= ~VM_ENTRY_IA32E_MODE;
1009 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
1011 vmx_vmcs_exit(v);
1012 #endif
1014 if ( v == current )
1015 write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |
1016 (v->arch.hvm_vcpu.guest_efer & (EFER_NX|EFER_SCE)));
1019 static void vmx_flush_guest_tlbs(void)
1021 /* No tagged TLB support on VMX yet. The fact that we're in Xen
1022 * at all means any guest will have a clean TLB when it's next run,
1023 * because VMRESUME will flush it for us. */
1026 static void vmx_inject_exception(
1027 unsigned int trapnr, int errcode, unsigned long cr2)
1029 struct vcpu *curr = current;
1031 vmx_inject_hw_exception(curr, trapnr, errcode);
1033 if ( trapnr == TRAP_page_fault )
1034 curr->arch.hvm_vcpu.guest_cr[2] = cr2;
1036 if ( (trapnr == TRAP_debug) &&
1037 (guest_cpu_user_regs()->eflags & X86_EFLAGS_TF) )
1039 __restore_debug_registers(curr);
1040 write_debugreg(6, read_debugreg(6) | 0x4000);
1044 static int vmx_event_pending(struct vcpu *v)
1046 ASSERT(v == current);
1047 return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);
1050 static int vmx_do_pmu_interrupt(struct cpu_user_regs *regs)
1052 return vpmu_do_interrupt(regs);
1055 static struct hvm_function_table vmx_function_table = {
1056 .name = "VMX",
1057 .domain_initialise = vmx_domain_initialise,
1058 .domain_destroy = vmx_domain_destroy,
1059 .vcpu_initialise = vmx_vcpu_initialise,
1060 .vcpu_destroy = vmx_vcpu_destroy,
1061 .save_cpu_ctxt = vmx_save_vmcs_ctxt,
1062 .load_cpu_ctxt = vmx_load_vmcs_ctxt,
1063 .interrupt_blocked = vmx_interrupt_blocked,
1064 .guest_x86_mode = vmx_guest_x86_mode,
1065 .get_segment_base = vmx_get_segment_base,
1066 .get_segment_register = vmx_get_segment_register,
1067 .set_segment_register = vmx_set_segment_register,
1068 .update_host_cr3 = vmx_update_host_cr3,
1069 .update_guest_cr = vmx_update_guest_cr,
1070 .update_guest_efer = vmx_update_guest_efer,
1071 .flush_guest_tlbs = vmx_flush_guest_tlbs,
1072 .set_tsc_offset = vmx_set_tsc_offset,
1073 .inject_exception = vmx_inject_exception,
1074 .init_hypercall_page = vmx_init_hypercall_page,
1075 .event_pending = vmx_event_pending,
1076 .do_pmu_interrupt = vmx_do_pmu_interrupt,
1077 .cpu_up = vmx_cpu_up,
1078 .cpu_down = vmx_cpu_down,
1079 };
1081 void start_vmx(void)
1083 static int bootstrapped;
1085 vmx_save_host_msrs();
1087 if ( bootstrapped )
1089 if ( hvm_enabled && !vmx_cpu_up() )
1091 printk("VMX: FATAL: failed to initialise CPU%d!\n",
1092 smp_processor_id());
1093 BUG();
1095 return;
1098 bootstrapped = 1;
1100 /* Xen does not fill x86_capability words except 0. */
1101 boot_cpu_data.x86_capability[4] = cpuid_ecx(1);
1103 if ( !test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability) )
1104 return;
1106 set_in_cr4(X86_CR4_VMXE);
1108 if ( !vmx_cpu_up() )
1110 printk("VMX: failed to initialise.\n");
1111 return;
1114 setup_vmcs_dump();
1116 hvm_enable(&vmx_function_table);
1119 /*
1120 * Not all cases receive valid value in the VM-exit instruction length field.
1121 * Callers must know what they're doing!
1122 */
1123 static int __get_instruction_length(void)
1125 int len;
1126 len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe: callers audited */
1127 BUG_ON((len < 1) || (len > 15));
1128 return len;
1131 static void __update_guest_eip(unsigned long inst_len)
1133 struct cpu_user_regs *regs = guest_cpu_user_regs();
1134 unsigned long x;
1136 regs->eip += inst_len;
1137 regs->eflags &= ~X86_EFLAGS_RF;
1139 x = __vmread(GUEST_INTERRUPTIBILITY_INFO);
1140 if ( x & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
1142 x &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
1143 __vmwrite(GUEST_INTERRUPTIBILITY_INFO, x);
1146 if ( regs->eflags & X86_EFLAGS_TF )
1147 vmx_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);
1150 void vmx_do_no_device_fault(void)
1152 struct vcpu *curr = current;
1154 vmx_fpu_enter(curr);
1156 /* Disable TS in guest CR0 unless the guest wants the exception too. */
1157 if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
1159 curr->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS;
1160 __vmwrite(GUEST_CR0, curr->arch.hvm_vcpu.hw_cr[0]);
1164 #define bitmaskof(idx) (1U << ((idx) & 31))
1165 void vmx_cpuid_intercept(
1166 unsigned int *eax, unsigned int *ebx,
1167 unsigned int *ecx, unsigned int *edx)
1169 unsigned int input = *eax;
1170 unsigned int count = *ecx;
1172 hvm_cpuid(input, eax, ebx, ecx, edx);
1174 switch ( input )
1176 case 0x00000001:
1177 /* Mask AMD-only features. */
1178 *ecx &= ~(bitmaskof(X86_FEATURE_POPCNT));
1179 break;
1181 case 0x00000004:
1182 cpuid_count(input, count, eax, ebx, ecx, edx);
1183 *eax &= 0x3FFF; /* one core */
1184 break;
1186 case 0x00000006:
1187 case 0x00000009:
1188 *eax = *ebx = *ecx = *edx = 0;
1189 break;
1191 case 0x80000001:
1192 /* Only a few features are advertised in Intel's 0x80000001. */
1193 *ecx &= (bitmaskof(X86_FEATURE_LAHF_LM));
1194 *edx &= (bitmaskof(X86_FEATURE_NX) |
1195 bitmaskof(X86_FEATURE_LM) |
1196 bitmaskof(X86_FEATURE_SYSCALL));
1197 break;
1200 HVMTRACE_3D(CPUID, current, input,
1201 ((uint64_t)*eax << 32) | *ebx, ((uint64_t)*ecx << 32) | *edx);
1204 static void vmx_do_cpuid(struct cpu_user_regs *regs)
1206 unsigned int eax, ebx, ecx, edx;
1208 eax = regs->eax;
1209 ebx = regs->ebx;
1210 ecx = regs->ecx;
1211 edx = regs->edx;
1213 vmx_cpuid_intercept(&eax, &ebx, &ecx, &edx);
1215 regs->eax = eax;
1216 regs->ebx = ebx;
1217 regs->ecx = ecx;
1218 regs->edx = edx;
1221 #define CASE_GET_REG_P(REG, reg) \
1222 case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
1224 #ifdef __i386__
1225 #define CASE_EXTEND_GET_REG_P
1226 #else
1227 #define CASE_EXTEND_GET_REG_P \
1228 CASE_GET_REG_P(R8, r8); \
1229 CASE_GET_REG_P(R9, r9); \
1230 CASE_GET_REG_P(R10, r10); \
1231 CASE_GET_REG_P(R11, r11); \
1232 CASE_GET_REG_P(R12, r12); \
1233 CASE_GET_REG_P(R13, r13); \
1234 CASE_GET_REG_P(R14, r14); \
1235 CASE_GET_REG_P(R15, r15)
1236 #endif
1238 static void vmx_dr_access(unsigned long exit_qualification,
1239 struct cpu_user_regs *regs)
1241 struct vcpu *v = current;
1243 HVMTRACE_0D(DR_WRITE, v);
1245 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
1246 __restore_debug_registers(v);
1248 /* Allow guest direct access to DR registers */
1249 v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MOV_DR_EXITING;
1250 __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
1253 /*
1254 * Invalidate the TLB for va. Invalidate the shadow page corresponding
1255 * the address va.
1256 */
1257 static void vmx_do_invlpg(unsigned long va)
1259 struct vcpu *v = current;
1261 HVMTRACE_2D(INVLPG, v, /*invlpga=*/ 0, va);
1263 /*
1264 * We do the safest things first, then try to update the shadow
1265 * copying from guest
1266 */
1267 paging_invlpg(v, va);
1270 /* Get segment for OUTS according to guest instruction. */
1271 static enum x86_segment vmx_outs_get_segment(
1272 int long_mode, unsigned long eip, int inst_len)
1274 unsigned char inst[MAX_INST_LEN];
1275 enum x86_segment seg = x86_seg_ds;
1276 int i;
1277 extern int inst_copy_from_guest(unsigned char *, unsigned long, int);
1279 if ( likely(cpu_has_vmx_ins_outs_instr_info) )
1281 unsigned int instr_info = __vmread(VMX_INSTRUCTION_INFO);
1283 /* Get segment register according to bits 17:15. */
1284 switch ( (instr_info >> 15) & 7 )
1286 case 0: seg = x86_seg_es; break;
1287 case 1: seg = x86_seg_cs; break;
1288 case 2: seg = x86_seg_ss; break;
1289 case 3: seg = x86_seg_ds; break;
1290 case 4: seg = x86_seg_fs; break;
1291 case 5: seg = x86_seg_gs; break;
1292 default: BUG();
1295 goto out;
1298 if ( !long_mode )
1299 eip += __vmread(GUEST_CS_BASE);
1301 memset(inst, 0, MAX_INST_LEN);
1302 if ( inst_copy_from_guest(inst, eip, inst_len) != inst_len )
1304 gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
1305 domain_crash(current->domain);
1306 goto out;
1309 for ( i = 0; i < inst_len; i++ )
1311 switch ( inst[i] )
1313 case 0xf3: /* REPZ */
1314 case 0xf2: /* REPNZ */
1315 case 0xf0: /* LOCK */
1316 case 0x66: /* data32 */
1317 case 0x67: /* addr32 */
1318 #ifdef __x86_64__
1319 case 0x40 ... 0x4f: /* REX */
1320 #endif
1321 continue;
1322 case 0x2e: /* CS */
1323 seg = x86_seg_cs;
1324 continue;
1325 case 0x36: /* SS */
1326 seg = x86_seg_ss;
1327 continue;
1328 case 0x26: /* ES */
1329 seg = x86_seg_es;
1330 continue;
1331 case 0x64: /* FS */
1332 seg = x86_seg_fs;
1333 continue;
1334 case 0x65: /* GS */
1335 seg = x86_seg_gs;
1336 continue;
1337 case 0x3e: /* DS */
1338 seg = x86_seg_ds;
1339 continue;
1343 out:
1344 return seg;
1347 static int vmx_str_pio_check_descriptor(int long_mode, unsigned long eip,
1348 int inst_len, enum x86_segment seg,
1349 unsigned long *base, u32 *limit,
1350 u32 *ar_bytes)
1352 enum vmcs_field ar_field, base_field, limit_field;
1354 *base = 0;
1355 *limit = 0;
1356 if ( seg != x86_seg_es )
1357 seg = vmx_outs_get_segment(long_mode, eip, inst_len);
1359 switch ( seg )
1361 case x86_seg_cs:
1362 ar_field = GUEST_CS_AR_BYTES;
1363 base_field = GUEST_CS_BASE;
1364 limit_field = GUEST_CS_LIMIT;
1365 break;
1366 case x86_seg_ds:
1367 ar_field = GUEST_DS_AR_BYTES;
1368 base_field = GUEST_DS_BASE;
1369 limit_field = GUEST_DS_LIMIT;
1370 break;
1371 case x86_seg_es:
1372 ar_field = GUEST_ES_AR_BYTES;
1373 base_field = GUEST_ES_BASE;
1374 limit_field = GUEST_ES_LIMIT;
1375 break;
1376 case x86_seg_fs:
1377 ar_field = GUEST_FS_AR_BYTES;
1378 base_field = GUEST_FS_BASE;
1379 limit_field = GUEST_FS_LIMIT;
1380 break;
1381 case x86_seg_gs:
1382 ar_field = GUEST_GS_AR_BYTES;
1383 base_field = GUEST_GS_BASE;
1384 limit_field = GUEST_GS_LIMIT;
1385 break;
1386 case x86_seg_ss:
1387 ar_field = GUEST_SS_AR_BYTES;
1388 base_field = GUEST_SS_BASE;
1389 limit_field = GUEST_SS_LIMIT;
1390 break;
1391 default:
1392 BUG();
1393 return 0;
1396 if ( !long_mode || seg == x86_seg_fs || seg == x86_seg_gs )
1398 *base = __vmread(base_field);
1399 *limit = __vmread(limit_field);
1401 *ar_bytes = __vmread(ar_field);
1403 return !(*ar_bytes & X86_SEG_AR_SEG_UNUSABLE);
1407 static int vmx_str_pio_check_limit(u32 limit, unsigned int size,
1408 u32 ar_bytes, unsigned long addr,
1409 unsigned long base, int df,
1410 unsigned long *count)
1412 unsigned long ea = addr - base;
1414 /* Offset must be within limits. */
1415 ASSERT(ea == (u32)ea);
1416 if ( (u32)(ea + size - 1) < (u32)ea ||
1417 (ar_bytes & 0xc) != 0x4 ? ea + size - 1 > limit
1418 : ea <= limit )
1419 return 0;
1421 /* Check the limit for repeated instructions, as above we checked
1422 only the first instance. Truncate the count if a limit violation
1423 would occur. Note that the checking is not necessary for page
1424 granular segments as transfers crossing page boundaries will be
1425 broken up anyway. */
1426 if ( !(ar_bytes & X86_SEG_AR_GRANULARITY) && *count > 1 )
1428 if ( (ar_bytes & 0xc) != 0x4 )
1430 /* expand-up */
1431 if ( !df )
1433 if ( ea + *count * size - 1 < ea ||
1434 ea + *count * size - 1 > limit )
1435 *count = (limit + 1UL - ea) / size;
1437 else
1439 if ( *count - 1 > ea / size )
1440 *count = ea / size + 1;
1443 else
1445 /* expand-down */
1446 if ( !df )
1448 if ( *count - 1 > -(s32)ea / size )
1449 *count = -(s32)ea / size + 1UL;
1451 else
1453 if ( ea < (*count - 1) * size ||
1454 ea - (*count - 1) * size <= limit )
1455 *count = (ea - limit - 1) / size + 1;
1458 ASSERT(*count);
1461 return 1;
1464 #ifdef __x86_64__
1465 static int vmx_str_pio_lm_check_limit(struct cpu_user_regs *regs,
1466 unsigned int size,
1467 unsigned long addr,
1468 unsigned long *count)
1470 if ( !is_canonical_address(addr) ||
1471 !is_canonical_address(addr + size - 1) )
1472 return 0;
1474 if ( *count > (1UL << 48) / size )
1475 *count = (1UL << 48) / size;
1477 if ( !(regs->eflags & EF_DF) )
1479 if ( addr + *count * size - 1 < addr ||
1480 !is_canonical_address(addr + *count * size - 1) )
1481 *count = (addr & ~((1UL << 48) - 1)) / size;
1483 else
1485 if ( (*count - 1) * size > addr ||
1486 !is_canonical_address(addr + (*count - 1) * size) )
1487 *count = (addr & ~((1UL << 48) - 1)) / size + 1;
1490 ASSERT(*count);
1492 return 1;
1494 #endif
1496 static void vmx_send_str_pio(struct cpu_user_regs *regs,
1497 struct hvm_io_op *pio_opp,
1498 unsigned long inst_len, unsigned int port,
1499 int sign, unsigned int size, int dir,
1500 int df, unsigned long addr,
1501 paddr_t paddr, unsigned long count)
1503 /*
1504 * Handle string pio instructions that cross pages or that
1505 * are unaligned. See the comments in hvm_domain.c/handle_mmio()
1506 */
1507 if ( (addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK) ) {
1508 unsigned long value = 0;
1510 pio_opp->flags |= OVERLAP;
1512 if ( dir == IOREQ_WRITE ) /* OUTS */
1514 if ( hvm_paging_enabled(current) )
1516 int rv = hvm_copy_from_guest_virt(&value, addr, size);
1517 if ( rv == HVMCOPY_bad_gva_to_gfn )
1518 return; /* exception already injected */
1520 else
1521 (void)hvm_copy_from_guest_phys(&value, addr, size);
1523 else /* dir != IOREQ_WRITE */
1524 /* Remember where to write the result, as a *VA*.
1525 * Must be a VA so we can handle the page overlap
1526 * correctly in hvm_pio_assist() */
1527 pio_opp->addr = addr;
1529 if ( count == 1 )
1530 regs->eip += inst_len;
1532 send_pio_req(port, 1, size, value, dir, df, 0);
1533 } else {
1534 unsigned long last_addr = sign > 0 ? addr + count * size - 1
1535 : addr - (count - 1) * size;
1537 if ( (addr & PAGE_MASK) != (last_addr & PAGE_MASK) )
1539 if ( sign > 0 )
1540 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1541 else
1542 count = (addr & ~PAGE_MASK) / size + 1;
1543 } else
1544 regs->eip += inst_len;
1546 send_pio_req(port, count, size, paddr, dir, df, 1);
1550 static void vmx_do_str_pio(unsigned long exit_qualification,
1551 unsigned long inst_len,
1552 struct cpu_user_regs *regs,
1553 struct hvm_io_op *pio_opp)
1555 unsigned int port, size;
1556 int dir, df, vm86;
1557 unsigned long addr, count = 1, base;
1558 paddr_t paddr;
1559 unsigned long gfn;
1560 u32 ar_bytes, limit, pfec;
1561 int sign;
1562 int long_mode = 0;
1564 vm86 = regs->eflags & X86_EFLAGS_VM ? 1 : 0;
1565 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1567 if ( test_bit(6, &exit_qualification) )
1568 port = (exit_qualification >> 16) & 0xFFFF;
1569 else
1570 port = regs->edx & 0xffff;
1572 size = (exit_qualification & 7) + 1;
1573 dir = test_bit(3, &exit_qualification); /* direction */
1575 if ( dir == IOREQ_READ )
1576 HVMTRACE_2D(IO_READ, current, port, size);
1577 else
1578 HVMTRACE_2D(IO_WRITE, current, port, size);
1580 sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
1581 ar_bytes = __vmread(GUEST_CS_AR_BYTES);
1582 if ( hvm_long_mode_enabled(current) &&
1583 (ar_bytes & X86_SEG_AR_CS_LM_ACTIVE) )
1584 long_mode = 1;
1585 addr = __vmread(GUEST_LINEAR_ADDRESS);
1587 if ( test_bit(5, &exit_qualification) ) { /* "rep" prefix */
1588 pio_opp->flags |= REPZ;
1589 count = regs->ecx;
1590 if ( !long_mode &&
1591 (vm86 || !(ar_bytes & X86_SEG_AR_DEF_OP_SIZE)) )
1592 count &= 0xFFFF;
1595 /*
1596 * In protected mode, guest linear address is invalid if the
1597 * selector is null.
1598 */
1599 if ( !vmx_str_pio_check_descriptor(long_mode, regs->eip, inst_len,
1600 dir==IOREQ_WRITE ? x86_seg_ds :
1601 x86_seg_es, &base, &limit,
1602 &ar_bytes) ) {
1603 if ( !long_mode ) {
1604 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1605 return;
1607 addr = dir == IOREQ_WRITE ? base + regs->esi : regs->edi;
1610 if ( !long_mode )
1612 /* Segment must be readable for outs and writeable for ins. */
1613 if ( ((dir == IOREQ_WRITE)
1614 ? ((ar_bytes & 0xa) == 0x8)
1615 : ((ar_bytes & 0xa) != 0x2)) ||
1616 !vmx_str_pio_check_limit(limit, size, ar_bytes,
1617 addr, base, df, &count) )
1619 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1620 return;
1623 #ifdef __x86_64__
1624 else if ( !vmx_str_pio_lm_check_limit(regs, size, addr, &count) )
1626 vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
1627 return;
1629 #endif
1631 /* Translate the address to a physical address */
1632 pfec = PFEC_page_present;
1633 if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
1634 pfec |= PFEC_write_access;
1635 if ( ((__vmread(GUEST_SS_AR_BYTES) >> 5) & 3) == 3 )
1636 pfec |= PFEC_user_mode;
1637 gfn = paging_gva_to_gfn(current, addr, &pfec);
1638 if ( gfn == INVALID_GFN )
1640 /* The guest does not have the RAM address mapped.
1641 * Need to send in a page fault */
1642 vmx_inject_exception(TRAP_page_fault, pfec, addr);
1643 return;
1645 paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
1647 vmx_send_str_pio(regs, pio_opp, inst_len, port, sign,
1648 size, dir, df, addr, paddr, count);
1651 static void vmx_io_instruction(unsigned long exit_qualification,
1652 unsigned long inst_len)
1654 struct cpu_user_regs *regs;
1655 struct hvm_io_op *pio_opp;
1657 pio_opp = &current->arch.hvm_vcpu.io_op;
1658 pio_opp->instr = INSTR_PIO;
1659 pio_opp->flags = 0;
1661 regs = &pio_opp->io_context;
1663 /* Copy current guest state into io instruction state structure. */
1664 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1666 HVM_DBG_LOG(DBG_LEVEL_IO, "vm86 %d, eip=%x:%lx, "
1667 "exit_qualification = %lx",
1668 regs->eflags & X86_EFLAGS_VM ? 1 : 0,
1669 regs->cs, (unsigned long)regs->eip, exit_qualification);
1671 if ( test_bit(4, &exit_qualification) ) /* string instrucation */
1672 vmx_do_str_pio(exit_qualification, inst_len, regs, pio_opp);
1673 else
1675 unsigned int port, size;
1676 int dir, df;
1678 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1680 if ( test_bit(6, &exit_qualification) )
1681 port = (exit_qualification >> 16) & 0xFFFF;
1682 else
1683 port = regs->edx & 0xffff;
1685 size = (exit_qualification & 7) + 1;
1686 dir = test_bit(3, &exit_qualification); /* direction */
1688 if ( dir == IOREQ_READ )
1689 HVMTRACE_2D(IO_READ, current, port, size);
1690 else
1691 HVMTRACE_3D(IO_WRITE, current, port, size, regs->eax);
1693 if ( port == 0xe9 && dir == IOREQ_WRITE && size == 1 )
1694 hvm_print_line(current, regs->eax); /* guest debug output */
1696 regs->eip += inst_len;
1697 send_pio_req(port, 1, size, regs->eax, dir, df, 0);
1701 #define CASE_SET_REG(REG, reg) \
1702 case REG_ ## REG: regs->reg = value; break
1703 #define CASE_GET_REG(REG, reg) \
1704 case REG_ ## REG: value = regs->reg; break
1706 #define CASE_EXTEND_SET_REG \
1707 CASE_EXTEND_REG(S)
1708 #define CASE_EXTEND_GET_REG \
1709 CASE_EXTEND_REG(G)
1711 #ifdef __i386__
1712 #define CASE_EXTEND_REG(T)
1713 #else
1714 #define CASE_EXTEND_REG(T) \
1715 CASE_ ## T ## ET_REG(R8, r8); \
1716 CASE_ ## T ## ET_REG(R9, r9); \
1717 CASE_ ## T ## ET_REG(R10, r10); \
1718 CASE_ ## T ## ET_REG(R11, r11); \
1719 CASE_ ## T ## ET_REG(R12, r12); \
1720 CASE_ ## T ## ET_REG(R13, r13); \
1721 CASE_ ## T ## ET_REG(R14, r14); \
1722 CASE_ ## T ## ET_REG(R15, r15)
1723 #endif
1725 static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
1727 unsigned long value;
1728 struct vcpu *v = current;
1729 struct vlapic *vlapic = vcpu_vlapic(v);
1731 switch ( gp )
1733 CASE_GET_REG(EAX, eax);
1734 CASE_GET_REG(ECX, ecx);
1735 CASE_GET_REG(EDX, edx);
1736 CASE_GET_REG(EBX, ebx);
1737 CASE_GET_REG(EBP, ebp);
1738 CASE_GET_REG(ESI, esi);
1739 CASE_GET_REG(EDI, edi);
1740 CASE_GET_REG(ESP, esp);
1741 CASE_EXTEND_GET_REG;
1742 default:
1743 gdprintk(XENLOG_ERR, "invalid gp: %d\n", gp);
1744 goto exit_and_crash;
1747 HVMTRACE_2D(CR_WRITE, v, cr, value);
1749 HVM_DBG_LOG(DBG_LEVEL_1, "CR%d, value = %lx", cr, value);
1751 switch ( cr )
1753 case 0:
1754 return hvm_set_cr0(value);
1756 case 3:
1757 return hvm_set_cr3(value);
1759 case 4:
1760 return hvm_set_cr4(value);
1762 case 8:
1763 vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
1764 break;
1766 default:
1767 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1768 goto exit_and_crash;
1771 return 1;
1773 exit_and_crash:
1774 domain_crash(v->domain);
1775 return 0;
1778 /*
1779 * Read from control registers. CR0 and CR4 are read from the shadow.
1780 */
1781 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1783 unsigned long value = 0;
1784 struct vcpu *v = current;
1785 struct vlapic *vlapic = vcpu_vlapic(v);
1787 switch ( cr )
1789 case 3:
1790 value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3];
1791 break;
1792 case 8:
1793 value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
1794 value = (value & 0xF0) >> 4;
1795 break;
1796 default:
1797 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1798 domain_crash(v->domain);
1799 break;
1802 switch ( gp ) {
1803 CASE_SET_REG(EAX, eax);
1804 CASE_SET_REG(ECX, ecx);
1805 CASE_SET_REG(EDX, edx);
1806 CASE_SET_REG(EBX, ebx);
1807 CASE_SET_REG(EBP, ebp);
1808 CASE_SET_REG(ESI, esi);
1809 CASE_SET_REG(EDI, edi);
1810 CASE_SET_REG(ESP, esp);
1811 CASE_EXTEND_SET_REG;
1812 default:
1813 printk("invalid gp: %d\n", gp);
1814 domain_crash(v->domain);
1815 break;
1818 HVMTRACE_2D(CR_READ, v, cr, value);
1820 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%d, value = %lx", cr, value);
1823 static int vmx_cr_access(unsigned long exit_qualification,
1824 struct cpu_user_regs *regs)
1826 unsigned int gp, cr;
1827 unsigned long value;
1828 struct vcpu *v = current;
1830 switch ( exit_qualification & CONTROL_REG_ACCESS_TYPE )
1832 case TYPE_MOV_TO_CR:
1833 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1834 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1835 return mov_to_cr(gp, cr, regs);
1836 case TYPE_MOV_FROM_CR:
1837 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1838 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1839 mov_from_cr(cr, gp, regs);
1840 break;
1841 case TYPE_CLTS:
1842 v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS;
1843 vmx_update_guest_cr(v, 0);
1844 HVMTRACE_0D(CLTS, current);
1845 break;
1846 case TYPE_LMSW:
1847 value = v->arch.hvm_vcpu.guest_cr[0];
1848 value = (value & ~0xF) |
1849 (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
1850 HVMTRACE_1D(LMSW, current, value);
1851 return hvm_set_cr0(value);
1852 default:
1853 BUG();
1856 return 1;
1859 static const struct lbr_info {
1860 u32 base, count;
1861 } p4_lbr[] = {
1862 { MSR_P4_LER_FROM_LIP, 1 },
1863 { MSR_P4_LER_TO_LIP, 1 },
1864 { MSR_P4_LASTBRANCH_TOS, 1 },
1865 { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
1866 { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
1867 { 0, 0 }
1868 }, c2_lbr[] = {
1869 { MSR_IA32_LASTINTFROMIP, 1 },
1870 { MSR_IA32_LASTINTTOIP, 1 },
1871 { MSR_C2_LASTBRANCH_TOS, 1 },
1872 { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
1873 { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
1874 { 0, 0 }
1875 #ifdef __i386__
1876 }, pm_lbr[] = {
1877 { MSR_IA32_LASTINTFROMIP, 1 },
1878 { MSR_IA32_LASTINTTOIP, 1 },
1879 { MSR_PM_LASTBRANCH_TOS, 1 },
1880 { MSR_PM_LASTBRANCH_0, NUM_MSR_PM_LASTBRANCH },
1881 { 0, 0 }
1882 #endif
1883 };
1885 static const struct lbr_info *last_branch_msr_get(void)
1887 switch ( boot_cpu_data.x86 )
1889 case 6:
1890 switch ( boot_cpu_data.x86_model )
1892 #ifdef __i386__
1893 /* PentiumM */
1894 case 9: case 13:
1895 /* Core Solo/Duo */
1896 case 14:
1897 return pm_lbr;
1898 break;
1899 #endif
1900 /* Core2 Duo */
1901 case 15:
1902 return c2_lbr;
1903 break;
1905 break;
1907 case 15:
1908 switch ( boot_cpu_data.x86_model )
1910 /* Pentium4/Xeon with em64t */
1911 case 3: case 4: case 6:
1912 return p4_lbr;
1913 break;
1915 break;
1918 return NULL;
1921 static int is_last_branch_msr(u32 ecx)
1923 const struct lbr_info *lbr = last_branch_msr_get();
1925 if ( lbr == NULL )
1926 return 0;
1928 for ( ; lbr->count; lbr++ )
1929 if ( (ecx >= lbr->base) && (ecx < (lbr->base + lbr->count)) )
1930 return 1;
1932 return 0;
1935 int vmx_msr_read_intercept(struct cpu_user_regs *regs)
1937 u64 msr_content = 0;
1938 u32 ecx = regs->ecx, eax, edx;
1939 struct vcpu *v = current;
1940 int index;
1941 u64 *var_range_base = (u64*)v->arch.hvm_vcpu.mtrr.var_ranges;
1942 u64 *fixed_range_base = (u64*)v->arch.hvm_vcpu.mtrr.fixed_ranges;
1944 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
1946 switch ( ecx )
1948 case MSR_IA32_TSC:
1949 msr_content = hvm_get_guest_time(v);
1950 break;
1951 case MSR_IA32_SYSENTER_CS:
1952 msr_content = (u32)__vmread(GUEST_SYSENTER_CS);
1953 break;
1954 case MSR_IA32_SYSENTER_ESP:
1955 msr_content = __vmread(GUEST_SYSENTER_ESP);
1956 break;
1957 case MSR_IA32_SYSENTER_EIP:
1958 msr_content = __vmread(GUEST_SYSENTER_EIP);
1959 break;
1960 case MSR_IA32_APICBASE:
1961 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
1962 break;
1963 case MSR_IA32_CR_PAT:
1964 msr_content = v->arch.hvm_vcpu.pat_cr;
1965 break;
1966 case MSR_MTRRcap:
1967 msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
1968 break;
1969 case MSR_MTRRdefType:
1970 msr_content = v->arch.hvm_vcpu.mtrr.def_type
1971 | (v->arch.hvm_vcpu.mtrr.enabled << 10);
1972 break;
1973 case MSR_MTRRfix64K_00000:
1974 msr_content = fixed_range_base[0];
1975 break;
1976 case MSR_MTRRfix16K_80000:
1977 case MSR_MTRRfix16K_A0000:
1978 index = regs->ecx - MSR_MTRRfix16K_80000;
1979 msr_content = fixed_range_base[index + 1];
1980 break;
1981 case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
1982 index = regs->ecx - MSR_MTRRfix4K_C0000;
1983 msr_content = fixed_range_base[index + 3];
1984 break;
1985 case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
1986 index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
1987 msr_content = var_range_base[index];
1988 break;
1989 case MSR_IA32_DEBUGCTLMSR:
1990 if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0 )
1991 msr_content = 0;
1992 break;
1993 case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
1994 goto gp_fault;
1995 case MSR_IA32_MCG_CAP:
1996 case MSR_IA32_MCG_STATUS:
1997 case MSR_IA32_MC0_STATUS:
1998 case MSR_IA32_MC1_STATUS:
1999 case MSR_IA32_MC2_STATUS:
2000 case MSR_IA32_MC3_STATUS:
2001 case MSR_IA32_MC4_STATUS:
2002 case MSR_IA32_MC5_STATUS:
2003 /* No point in letting the guest see real MCEs */
2004 msr_content = 0;
2005 break;
2006 case MSR_IA32_MISC_ENABLE:
2007 rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
2008 /* Debug Trace Store is not supported. */
2009 msr_content |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
2010 MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
2011 break;
2012 default:
2013 if ( vpmu_do_rdmsr(regs) )
2014 goto done;
2015 switch ( long_mode_do_msr_read(regs) )
2017 case HNDL_unhandled:
2018 break;
2019 case HNDL_exception_raised:
2020 return 0;
2021 case HNDL_done:
2022 goto done;
2025 if ( vmx_read_guest_msr(v, ecx, &msr_content) == 0 )
2026 break;
2028 if ( is_last_branch_msr(ecx) )
2030 msr_content = 0;
2031 break;
2034 if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
2035 rdmsr_safe(ecx, eax, edx) == 0 )
2037 regs->eax = eax;
2038 regs->edx = edx;
2039 goto done;
2042 goto gp_fault;
2045 regs->eax = msr_content & 0xFFFFFFFF;
2046 regs->edx = msr_content >> 32;
2048 done:
2049 hvmtrace_msr_read(v, ecx, msr_content);
2050 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
2051 ecx, (unsigned long)regs->eax,
2052 (unsigned long)regs->edx);
2053 return 1;
2055 gp_fault:
2056 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2057 return 0;
2060 static int vmx_alloc_vlapic_mapping(struct domain *d)
2062 void *apic_va;
2064 if ( !cpu_has_vmx_virtualize_apic_accesses )
2065 return 0;
2067 apic_va = alloc_xenheap_page();
2068 if ( apic_va == NULL )
2069 return -ENOMEM;
2070 share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
2071 set_mmio_p2m_entry(
2072 d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), _mfn(virt_to_mfn(apic_va)));
2073 d->arch.hvm_domain.vmx_apic_access_mfn = virt_to_mfn(apic_va);
2075 return 0;
2078 static void vmx_free_vlapic_mapping(struct domain *d)
2080 unsigned long mfn = d->arch.hvm_domain.vmx_apic_access_mfn;
2081 if ( mfn != 0 )
2082 free_xenheap_page(mfn_to_virt(mfn));
2085 static void vmx_install_vlapic_mapping(struct vcpu *v)
2087 paddr_t virt_page_ma, apic_page_ma;
2089 if ( !cpu_has_vmx_virtualize_apic_accesses )
2090 return;
2092 virt_page_ma = page_to_maddr(vcpu_vlapic(v)->regs_page);
2093 apic_page_ma = v->domain->arch.hvm_domain.vmx_apic_access_mfn;
2094 apic_page_ma <<= PAGE_SHIFT;
2096 vmx_vmcs_enter(v);
2097 __vmwrite(VIRTUAL_APIC_PAGE_ADDR, virt_page_ma);
2098 __vmwrite(APIC_ACCESS_ADDR, apic_page_ma);
2099 vmx_vmcs_exit(v);
2102 void vmx_vlapic_msr_changed(struct vcpu *v)
2104 struct vlapic *vlapic = vcpu_vlapic(v);
2105 uint32_t ctl;
2107 if ( !cpu_has_vmx_virtualize_apic_accesses )
2108 return;
2110 vmx_vmcs_enter(v);
2111 ctl = __vmread(SECONDARY_VM_EXEC_CONTROL);
2112 ctl &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
2113 if ( !vlapic_hw_disabled(vlapic) &&
2114 (vlapic_base_address(vlapic) == APIC_DEFAULT_PHYS_BASE) )
2115 ctl |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
2116 __vmwrite(SECONDARY_VM_EXEC_CONTROL, ctl);
2117 vmx_vmcs_exit(v);
2120 extern bool_t mtrr_var_range_msr_set(struct mtrr_state *v,
2121 u32 msr, u64 msr_content);
2122 extern bool_t mtrr_fix_range_msr_set(struct mtrr_state *v,
2123 int row, u64 msr_content);
2124 extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
2125 extern bool_t pat_msr_set(u64 *pat, u64 msr);
2127 int vmx_msr_write_intercept(struct cpu_user_regs *regs)
2129 u32 ecx = regs->ecx;
2130 u64 msr_content;
2131 struct vcpu *v = current;
2132 int index;
2134 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
2135 ecx, (u32)regs->eax, (u32)regs->edx);
2137 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
2139 hvmtrace_msr_write(v, ecx, msr_content);
2141 switch ( ecx )
2143 case MSR_IA32_TSC:
2144 hvm_set_guest_time(v, msr_content);
2145 pt_reset(v);
2146 break;
2147 case MSR_IA32_SYSENTER_CS:
2148 __vmwrite(GUEST_SYSENTER_CS, msr_content);
2149 break;
2150 case MSR_IA32_SYSENTER_ESP:
2151 __vmwrite(GUEST_SYSENTER_ESP, msr_content);
2152 break;
2153 case MSR_IA32_SYSENTER_EIP:
2154 __vmwrite(GUEST_SYSENTER_EIP, msr_content);
2155 break;
2156 case MSR_IA32_APICBASE:
2157 vlapic_msr_set(vcpu_vlapic(v), msr_content);
2158 break;
2159 case MSR_IA32_CR_PAT:
2160 if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
2161 goto gp_fault;
2162 break;
2163 case MSR_MTRRdefType:
2164 if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
2165 goto gp_fault;
2166 break;
2167 case MSR_MTRRfix64K_00000:
2168 if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
2169 goto gp_fault;
2170 break;
2171 case MSR_MTRRfix16K_80000:
2172 case MSR_MTRRfix16K_A0000:
2173 index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
2174 if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
2175 index, msr_content) )
2176 goto gp_fault;
2177 break;
2178 case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
2179 index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
2180 if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
2181 index, msr_content) )
2182 goto gp_fault;
2183 break;
2184 case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
2185 if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
2186 regs->ecx, msr_content) )
2187 goto gp_fault;
2188 break;
2189 case MSR_MTRRcap:
2190 goto gp_fault;
2191 case MSR_IA32_DEBUGCTLMSR: {
2192 int i, rc = 0;
2194 if ( !msr_content || (msr_content & ~3) )
2195 break;
2197 if ( msr_content & 1 )
2199 const struct lbr_info *lbr = last_branch_msr_get();
2200 if ( lbr == NULL )
2201 break;
2203 for ( ; (rc == 0) && lbr->count; lbr++ )
2204 for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
2205 if ( (rc = vmx_add_guest_msr(v, lbr->base + i)) == 0 )
2206 vmx_disable_intercept_for_msr(v, lbr->base + i);
2209 if ( (rc < 0) ||
2210 (vmx_add_guest_msr(v, ecx) < 0) ||
2211 (vmx_add_host_load_msr(v, ecx) < 0) )
2212 vmx_inject_hw_exception(v, TRAP_machine_check, 0);
2213 else
2214 vmx_write_guest_msr(v, ecx, msr_content);
2216 break;
2218 case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
2219 goto gp_fault;
2220 default:
2221 if ( vpmu_do_wrmsr(regs) )
2222 return 1;
2223 switch ( long_mode_do_msr_write(regs) )
2225 case HNDL_unhandled:
2226 if ( (vmx_write_guest_msr(v, ecx, msr_content) != 0) &&
2227 !is_last_branch_msr(ecx) )
2228 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
2229 break;
2230 case HNDL_exception_raised:
2231 return 0;
2232 case HNDL_done:
2233 break;
2235 break;
2238 return 1;
2240 gp_fault:
2241 vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
2242 return 0;
2245 static void vmx_do_hlt(struct cpu_user_regs *regs)
2247 unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO);
2248 struct vcpu *curr = current;
2250 /* Check for pending exception. */
2251 if ( intr_info & INTR_INFO_VALID_MASK )
2253 HVMTRACE_1D(HLT, curr, /*int pending=*/ 1);
2254 return;
2257 HVMTRACE_1D(HLT, curr, /*int pending=*/ 0);
2258 hvm_hlt(regs->eflags);
2261 static void vmx_do_extint(struct cpu_user_regs *regs)
2263 unsigned int vector;
2265 asmlinkage void do_IRQ(struct cpu_user_regs *);
2266 fastcall void smp_apic_timer_interrupt(struct cpu_user_regs *);
2267 fastcall void smp_event_check_interrupt(void);
2268 fastcall void smp_invalidate_interrupt(void);
2269 fastcall void smp_call_function_interrupt(void);
2270 fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
2271 fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
2272 fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs);
2273 #ifdef CONFIG_X86_MCE_P4THERMAL
2274 fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
2275 #endif
2277 vector = __vmread(VM_EXIT_INTR_INFO);
2278 BUG_ON(!(vector & INTR_INFO_VALID_MASK));
2280 vector &= INTR_INFO_VECTOR_MASK;
2281 HVMTRACE_1D(INTR, current, vector);
2283 switch ( vector )
2285 case LOCAL_TIMER_VECTOR:
2286 smp_apic_timer_interrupt(regs);
2287 break;
2288 case EVENT_CHECK_VECTOR:
2289 smp_event_check_interrupt();
2290 break;
2291 case INVALIDATE_TLB_VECTOR:
2292 smp_invalidate_interrupt();
2293 break;
2294 case CALL_FUNCTION_VECTOR:
2295 smp_call_function_interrupt();
2296 break;
2297 case SPURIOUS_APIC_VECTOR:
2298 smp_spurious_interrupt(regs);
2299 break;
2300 case ERROR_APIC_VECTOR:
2301 smp_error_interrupt(regs);
2302 break;
2303 case PMU_APIC_VECTOR:
2304 smp_pmu_apic_interrupt(regs);
2305 break;
2306 #ifdef CONFIG_X86_MCE_P4THERMAL
2307 case THERMAL_APIC_VECTOR:
2308 smp_thermal_interrupt(regs);
2309 break;
2310 #endif
2311 default:
2312 regs->entry_vector = vector;
2313 do_IRQ(regs);
2314 break;
2318 static void wbinvd_ipi(void *info)
2320 wbinvd();
2323 void vmx_wbinvd_intercept(void)
2325 if ( list_empty(&(domain_hvm_iommu(current->domain)->pdev_list)) )
2326 return;
2328 if ( cpu_has_wbinvd_exiting )
2329 on_each_cpu(wbinvd_ipi, NULL, 1, 1);
2330 else
2331 wbinvd();
2334 static void vmx_failed_vmentry(unsigned int exit_reason,
2335 struct cpu_user_regs *regs)
2337 unsigned int failed_vmentry_reason = (uint16_t)exit_reason;
2338 unsigned long exit_qualification = __vmread(EXIT_QUALIFICATION);
2339 struct vcpu *curr = current;
2341 printk("Failed vm entry (exit reason 0x%x) ", exit_reason);
2342 switch ( failed_vmentry_reason )
2344 case EXIT_REASON_INVALID_GUEST_STATE:
2345 printk("caused by invalid guest state (%ld).\n", exit_qualification);
2346 break;
2347 case EXIT_REASON_MSR_LOADING:
2348 printk("caused by MSR entry %ld loading.\n", exit_qualification);
2349 break;
2350 case EXIT_REASON_MACHINE_CHECK:
2351 printk("caused by machine check.\n");
2352 HVMTRACE_0D(MCE, curr);
2353 do_machine_check(regs);
2354 break;
2355 default:
2356 printk("reason not known yet!");
2357 break;
2360 printk("************* VMCS Area **************\n");
2361 vmcs_dump_vcpu(curr);
2362 printk("**************************************\n");
2364 domain_crash(curr->domain);
2367 asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
2369 unsigned int exit_reason, idtv_info;
2370 unsigned long exit_qualification, inst_len = 0;
2371 struct vcpu *v = current;
2373 exit_reason = __vmread(VM_EXIT_REASON);
2375 hvmtrace_vmexit(v, regs->eip, exit_reason);
2377 perfc_incra(vmexits, exit_reason);
2379 if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT )
2380 local_irq_enable();
2382 if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
2383 return vmx_failed_vmentry(exit_reason, regs);
2385 hvm_maybe_deassert_evtchn_irq();
2387 /* Event delivery caused this intercept? Queue for redelivery. */
2388 idtv_info = __vmread(IDT_VECTORING_INFO);
2389 if ( unlikely(idtv_info & INTR_INFO_VALID_MASK) &&
2390 (exit_reason != EXIT_REASON_TASK_SWITCH) )
2392 if ( hvm_event_needs_reinjection((idtv_info>>8)&7, idtv_info&0xff) )
2394 /* See SDM 3B 25.7.1.1 and .2 for info about masking resvd bits. */
2395 __vmwrite(VM_ENTRY_INTR_INFO,
2396 idtv_info & ~INTR_INFO_RESVD_BITS_MASK);
2397 if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK )
2398 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
2399 __vmread(IDT_VECTORING_ERROR_CODE));
2402 /*
2403 * Clear NMI-blocking interruptibility info if an NMI delivery faulted.
2404 * Re-delivery will re-set it (see SDM 3B 25.7.1.2).
2405 */
2406 if ( (idtv_info & INTR_INFO_INTR_TYPE_MASK) == (X86_EVENTTYPE_NMI<<8) )
2407 __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
2408 __vmread(GUEST_INTERRUPTIBILITY_INFO) &
2409 ~VMX_INTR_SHADOW_NMI);
2412 switch ( exit_reason )
2414 case EXIT_REASON_EXCEPTION_NMI:
2416 /*
2417 * We don't set the software-interrupt exiting (INT n).
2418 * (1) We can get an exception (e.g. #PG) in the guest, or
2419 * (2) NMI
2420 */
2421 unsigned int intr_info, vector;
2423 intr_info = __vmread(VM_EXIT_INTR_INFO);
2424 BUG_ON(!(intr_info & INTR_INFO_VALID_MASK));
2426 vector = intr_info & INTR_INFO_VECTOR_MASK;
2428 /*
2429 * Re-set the NMI shadow if vmexit caused by a guest IRET fault (see 3B
2430 * 25.7.1.2, "Resuming Guest Software after Handling an Exception").
2431 * (NB. If we emulate this IRET for any reason, we should re-clear!)
2432 */
2433 if ( unlikely(intr_info & INTR_INFO_NMI_UNBLOCKED_BY_IRET) &&
2434 !(__vmread(IDT_VECTORING_INFO) & INTR_INFO_VALID_MASK) &&
2435 (vector != TRAP_double_fault) )
2436 __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
2437 __vmread(GUEST_INTERRUPTIBILITY_INFO)|VMX_INTR_SHADOW_NMI);
2439 perfc_incra(cause_vector, vector);
2441 switch ( vector )
2443 case TRAP_debug:
2444 case TRAP_int3:
2445 if ( !v->domain->debugger_attached )
2446 goto exit_and_crash;
2447 domain_pause_for_debugger();
2448 break;
2449 case TRAP_no_device:
2450 vmx_do_no_device_fault();
2451 break;
2452 case TRAP_page_fault:
2453 exit_qualification = __vmread(EXIT_QUALIFICATION);
2454 regs->error_code = __vmread(VM_EXIT_INTR_ERROR_CODE);
2456 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2457 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2458 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2459 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2460 (unsigned long)regs->esi, (unsigned long)regs->edi);
2462 if ( paging_fault(exit_qualification, regs) )
2464 hvmtrace_pf_xen(v, exit_qualification, regs->error_code);
2465 break;
2468 v->arch.hvm_vcpu.guest_cr[2] = exit_qualification;
2469 vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
2470 break;
2471 case TRAP_nmi:
2472 if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) !=
2473 (X86_EVENTTYPE_NMI << 8) )
2474 goto exit_and_crash;
2475 HVMTRACE_0D(NMI, v);
2476 do_nmi(regs); /* Real NMI, vector 2: normal processing. */
2477 break;
2478 case TRAP_machine_check:
2479 HVMTRACE_0D(MCE, v);
2480 do_machine_check(regs);
2481 break;
2482 default:
2483 goto exit_and_crash;
2485 break;
2487 case EXIT_REASON_EXTERNAL_INTERRUPT:
2488 vmx_do_extint(regs);
2489 break;
2490 case EXIT_REASON_TRIPLE_FAULT:
2491 hvm_triple_fault();
2492 break;
2493 case EXIT_REASON_PENDING_VIRT_INTR:
2494 /* Disable the interrupt window. */
2495 v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
2496 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
2497 v->arch.hvm_vmx.exec_control);
2498 break;
2499 case EXIT_REASON_PENDING_VIRT_NMI:
2500 /* Disable the NMI window. */
2501 v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
2502 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
2503 v->arch.hvm_vmx.exec_control);
2504 break;
2505 case EXIT_REASON_TASK_SWITCH: {
2506 const enum hvm_task_switch_reason reasons[] = {
2507 TSW_call_or_int, TSW_iret, TSW_jmp, TSW_call_or_int };
2508 int32_t errcode = -1;
2509 exit_qualification = __vmread(EXIT_QUALIFICATION);
2510 if ( (idtv_info & INTR_INFO_VALID_MASK) &&
2511 (idtv_info & INTR_INFO_DELIVER_CODE_MASK) )
2512 errcode = __vmread(IDT_VECTORING_ERROR_CODE);
2513 hvm_task_switch((uint16_t)exit_qualification,
2514 reasons[(exit_qualification >> 30) & 3],
2515 errcode);
2516 break;
2518 case EXIT_REASON_CPUID:
2519 inst_len = __get_instruction_length(); /* Safe: CPUID */
2520 __update_guest_eip(inst_len);
2521 vmx_do_cpuid(regs);
2522 break;
2523 case EXIT_REASON_HLT:
2524 inst_len = __get_instruction_length(); /* Safe: HLT */
2525 __update_guest_eip(inst_len);
2526 vmx_do_hlt(regs);
2527 break;
2528 case EXIT_REASON_INVLPG:
2530 inst_len = __get_instruction_length(); /* Safe: INVLPG */
2531 __update_guest_eip(inst_len);
2532 exit_qualification = __vmread(EXIT_QUALIFICATION);
2533 vmx_do_invlpg(exit_qualification);
2534 break;
2536 case EXIT_REASON_VMCALL:
2538 int rc;
2539 HVMTRACE_1D(VMMCALL, v, regs->eax);
2540 inst_len = __get_instruction_length(); /* Safe: VMCALL */
2541 rc = hvm_do_hypercall(regs);
2542 if ( rc != HVM_HCALL_preempted )
2544 __update_guest_eip(inst_len);
2545 if ( rc == HVM_HCALL_invalidate )
2546 send_invalidate_req();
2548 break;
2550 case EXIT_REASON_CR_ACCESS:
2552 exit_qualification = __vmread(EXIT_QUALIFICATION);
2553 inst_len = __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */
2554 if ( vmx_cr_access(exit_qualification, regs) )
2555 __update_guest_eip(inst_len);
2556 break;
2558 case EXIT_REASON_DR_ACCESS:
2559 exit_qualification = __vmread(EXIT_QUALIFICATION);
2560 vmx_dr_access(exit_qualification, regs);
2561 break;
2562 case EXIT_REASON_IO_INSTRUCTION:
2563 exit_qualification = __vmread(EXIT_QUALIFICATION);
2564 inst_len = __get_instruction_length(); /* Safe: IN, INS, OUT, OUTS */
2565 vmx_io_instruction(exit_qualification, inst_len);
2566 break;
2567 case EXIT_REASON_MSR_READ:
2568 inst_len = __get_instruction_length(); /* Safe: RDMSR */
2569 if ( vmx_msr_read_intercept(regs) )
2570 __update_guest_eip(inst_len);
2571 break;
2572 case EXIT_REASON_MSR_WRITE:
2573 inst_len = __get_instruction_length(); /* Safe: WRMSR */
2574 if ( vmx_msr_write_intercept(regs) )
2575 __update_guest_eip(inst_len);
2576 break;
2578 case EXIT_REASON_MWAIT_INSTRUCTION:
2579 case EXIT_REASON_MONITOR_INSTRUCTION:
2580 case EXIT_REASON_VMCLEAR:
2581 case EXIT_REASON_VMLAUNCH:
2582 case EXIT_REASON_VMPTRLD:
2583 case EXIT_REASON_VMPTRST:
2584 case EXIT_REASON_VMREAD:
2585 case EXIT_REASON_VMRESUME:
2586 case EXIT_REASON_VMWRITE:
2587 case EXIT_REASON_VMXOFF:
2588 case EXIT_REASON_VMXON:
2589 vmx_inject_hw_exception(v, TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE);
2590 break;
2592 case EXIT_REASON_TPR_BELOW_THRESHOLD:
2593 break;
2595 case EXIT_REASON_APIC_ACCESS:
2597 unsigned long offset;
2598 exit_qualification = __vmread(EXIT_QUALIFICATION);
2599 offset = exit_qualification & 0x0fffUL;
2600 handle_mmio(APIC_DEFAULT_PHYS_BASE | offset);
2601 break;
2604 case EXIT_REASON_INVD:
2605 case EXIT_REASON_WBINVD:
2607 inst_len = __get_instruction_length(); /* Safe: INVD, WBINVD */
2608 __update_guest_eip(inst_len);
2609 vmx_wbinvd_intercept();
2610 break;
2613 default:
2614 exit_and_crash:
2615 gdprintk(XENLOG_ERR, "Bad vmexit (reason %x)\n", exit_reason);
2616 domain_crash(v->domain);
2617 break;
2621 asmlinkage void vmx_trace_vmentry(void)
2623 struct vcpu *v = current;
2625 hvmtrace_vmentry(v);
2628 /*
2629 * Local variables:
2630 * mode: C
2631 * c-set-style: "BSD"
2632 * c-basic-offset: 4
2633 * tab-width: 4
2634 * indent-tabs-mode: nil
2635 * End:
2636 */