ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 18788:07d0be88571f

hvm: fix single stepping on debugger

The debuggee domain will die with unexpected trap
on single stepping of emulated instruction.

Signed-off-by: Kouya Shimura <kouya@jp.fujitsu.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Nov 11 11:47:03 2008 +0000 (2008-11-11)
parents 8d993552673a
children 629f028d22f9
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/hypercall.h>
28 #include <xen/domain_page.h>
29 #include <asm/current.h>
30 #include <asm/io.h>
31 #include <asm/paging.h>
32 #include <asm/p2m.h>
33 #include <asm/regs.h>
34 #include <asm/cpufeature.h>
35 #include <asm/processor.h>
36 #include <asm/types.h>
37 #include <asm/debugreg.h>
38 #include <asm/msr.h>
39 #include <asm/spinlock.h>
40 #include <asm/hvm/hvm.h>
41 #include <asm/hvm/support.h>
42 #include <asm/hvm/io.h>
43 #include <asm/hvm/svm/asid.h>
44 #include <asm/hvm/svm/svm.h>
45 #include <asm/hvm/svm/vmcb.h>
46 #include <asm/hvm/svm/emulate.h>
47 #include <asm/hvm/svm/intr.h>
48 #include <asm/x86_emulate.h>
49 #include <public/sched.h>
50 #include <asm/hvm/vpt.h>
51 #include <asm/hvm/trace.h>
52 #include <asm/hap.h>
54 u32 svm_feature_flags;
56 #define set_segment_register(name, value) \
57 asm volatile ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
59 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
61 asmlinkage void do_IRQ(struct cpu_user_regs *);
63 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);
64 static void svm_update_guest_efer(struct vcpu *v);
65 static void svm_inject_exception(
66 unsigned int trapnr, int errcode, unsigned long cr2);
67 static void svm_cpuid_intercept(
68 unsigned int *eax, unsigned int *ebx,
69 unsigned int *ecx, unsigned int *edx);
70 static void svm_wbinvd_intercept(void);
71 static void svm_fpu_dirty_intercept(void);
72 static int svm_msr_read_intercept(struct cpu_user_regs *regs);
73 static int svm_msr_write_intercept(struct cpu_user_regs *regs);
74 static void svm_invlpg_intercept(unsigned long vaddr);
76 /* va of hardware host save area */
77 static void *hsa[NR_CPUS] __read_mostly;
79 /* vmcb used for extended host state */
80 static void *root_vmcb[NR_CPUS] __read_mostly;
82 static void inline __update_guest_eip(
83 struct cpu_user_regs *regs, unsigned int inst_len)
84 {
85 struct vcpu *curr = current;
87 if ( unlikely(inst_len == 0) )
88 return;
90 if ( unlikely(inst_len > 15) )
91 {
92 gdprintk(XENLOG_ERR, "Bad instruction length %u\n", inst_len);
93 domain_crash(curr->domain);
94 return;
95 }
97 ASSERT(regs == guest_cpu_user_regs());
99 regs->eip += inst_len;
100 regs->eflags &= ~X86_EFLAGS_RF;
102 curr->arch.hvm_svm.vmcb->interrupt_shadow = 0;
104 if ( regs->eflags & X86_EFLAGS_TF )
105 svm_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);
106 }
108 static void svm_cpu_down(void)
109 {
110 write_efer(read_efer() & ~EFER_SVME);
111 }
113 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
114 {
115 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
116 u32 ecx = regs->ecx;
118 HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64,
119 ecx, msr_content);
121 switch ( ecx )
122 {
123 case MSR_EFER:
124 if ( hvm_set_efer(msr_content) )
125 return HNDL_exception_raised;
126 break;
128 case MSR_IA32_MC4_MISC: /* Threshold register */
129 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
130 /*
131 * MCA/MCE: Threshold register is reported to be locked, so we ignore
132 * all write accesses. This behaviour matches real HW, so guests should
133 * have no problem with this.
134 */
135 break;
137 default:
138 return HNDL_unhandled;
139 }
141 return HNDL_done;
142 }
144 static void svm_save_dr(struct vcpu *v)
145 {
146 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
148 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
149 return;
151 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
152 v->arch.hvm_vcpu.flag_dr_dirty = 0;
153 v->arch.hvm_svm.vmcb->dr_intercepts = ~0u;
155 v->arch.guest_context.debugreg[0] = read_debugreg(0);
156 v->arch.guest_context.debugreg[1] = read_debugreg(1);
157 v->arch.guest_context.debugreg[2] = read_debugreg(2);
158 v->arch.guest_context.debugreg[3] = read_debugreg(3);
159 v->arch.guest_context.debugreg[6] = vmcb->dr6;
160 v->arch.guest_context.debugreg[7] = vmcb->dr7;
161 }
163 static void __restore_debug_registers(struct vcpu *v)
164 {
165 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
167 if ( v->arch.hvm_vcpu.flag_dr_dirty )
168 return;
170 v->arch.hvm_vcpu.flag_dr_dirty = 1;
171 vmcb->dr_intercepts = 0;
173 write_debugreg(0, v->arch.guest_context.debugreg[0]);
174 write_debugreg(1, v->arch.guest_context.debugreg[1]);
175 write_debugreg(2, v->arch.guest_context.debugreg[2]);
176 write_debugreg(3, v->arch.guest_context.debugreg[3]);
177 vmcb->dr6 = v->arch.guest_context.debugreg[6];
178 vmcb->dr7 = v->arch.guest_context.debugreg[7];
179 }
181 /*
182 * DR7 is saved and restored on every vmexit. Other debug registers only
183 * need to be restored if their value is going to affect execution -- i.e.,
184 * if one of the breakpoints is enabled. So mask out all bits that don't
185 * enable some breakpoint functionality.
186 */
187 static void svm_restore_dr(struct vcpu *v)
188 {
189 if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
190 __restore_debug_registers(v);
191 }
193 static int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c)
194 {
195 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
197 c->cr0 = v->arch.hvm_vcpu.guest_cr[0];
198 c->cr2 = v->arch.hvm_vcpu.guest_cr[2];
199 c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
200 c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
202 c->sysenter_cs = vmcb->sysenter_cs;
203 c->sysenter_esp = vmcb->sysenter_esp;
204 c->sysenter_eip = vmcb->sysenter_eip;
206 c->pending_event = 0;
207 c->error_code = 0;
208 if ( vmcb->eventinj.fields.v &&
209 hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
210 vmcb->eventinj.fields.vector) )
211 {
212 c->pending_event = (uint32_t)vmcb->eventinj.bytes;
213 c->error_code = vmcb->eventinj.fields.errorcode;
214 }
216 return 1;
217 }
219 static int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
220 {
221 unsigned long mfn = 0;
222 p2m_type_t p2mt;
223 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
225 if ( c->pending_valid &&
226 ((c->pending_type == 1) || (c->pending_type > 6) ||
227 (c->pending_reserved != 0)) )
228 {
229 gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n",
230 c->pending_event);
231 return -EINVAL;
232 }
234 if ( !paging_mode_hap(v->domain) )
235 {
236 if ( c->cr0 & X86_CR0_PG )
237 {
238 mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
239 if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
240 {
241 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
242 c->cr3);
243 return -EINVAL;
244 }
245 }
247 if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
248 put_page(pagetable_get_page(v->arch.guest_table));
250 v->arch.guest_table = pagetable_from_pfn(mfn);
251 }
253 v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;
254 v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
255 v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
256 v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
257 svm_update_guest_cr(v, 0);
258 svm_update_guest_cr(v, 2);
259 svm_update_guest_cr(v, 4);
261 vmcb->sysenter_cs = c->sysenter_cs;
262 vmcb->sysenter_esp = c->sysenter_esp;
263 vmcb->sysenter_eip = c->sysenter_eip;
265 if ( paging_mode_hap(v->domain) )
266 {
267 vmcb->np_enable = 1;
268 vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
269 vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
270 }
272 if ( c->pending_valid )
273 {
274 gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
275 c->pending_event, c->error_code);
277 if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
278 {
279 vmcb->eventinj.bytes = c->pending_event;
280 vmcb->eventinj.fields.errorcode = c->error_code;
281 }
282 }
284 paging_update_paging_modes(v);
286 return 0;
287 }
290 static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
291 {
292 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
294 data->shadow_gs = vmcb->kerngsbase;
295 data->msr_lstar = vmcb->lstar;
296 data->msr_star = vmcb->star;
297 data->msr_cstar = vmcb->cstar;
298 data->msr_syscall_mask = vmcb->sfmask;
299 data->msr_efer = v->arch.hvm_vcpu.guest_efer;
300 data->msr_flags = -1ULL;
302 data->tsc = hvm_get_guest_tsc(v);
303 }
306 static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
307 {
308 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
310 vmcb->kerngsbase = data->shadow_gs;
311 vmcb->lstar = data->msr_lstar;
312 vmcb->star = data->msr_star;
313 vmcb->cstar = data->msr_cstar;
314 vmcb->sfmask = data->msr_syscall_mask;
315 v->arch.hvm_vcpu.guest_efer = data->msr_efer;
316 svm_update_guest_efer(v);
318 hvm_set_guest_tsc(v, data->tsc);
319 }
321 static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
322 {
323 svm_save_cpu_state(v, ctxt);
324 svm_vmcb_save(v, ctxt);
325 }
327 static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
328 {
329 svm_load_cpu_state(v, ctxt);
330 if (svm_vmcb_restore(v, ctxt)) {
331 printk("svm_vmcb restore failed!\n");
332 domain_crash(v->domain);
333 return -EINVAL;
334 }
336 return 0;
337 }
339 static void svm_fpu_enter(struct vcpu *v)
340 {
341 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
343 setup_fpu(v);
344 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
345 }
347 static void svm_fpu_leave(struct vcpu *v)
348 {
349 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
351 ASSERT(!v->fpu_dirtied);
352 ASSERT(read_cr0() & X86_CR0_TS);
354 /*
355 * If the guest does not have TS enabled then we must cause and handle an
356 * exception on first use of the FPU. If the guest *does* have TS enabled
357 * then this is not necessary: no FPU activity can occur until the guest
358 * clears CR0.TS, and we will initialise the FPU when that happens.
359 */
360 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
361 {
362 v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device;
363 vmcb->cr0 |= X86_CR0_TS;
364 }
365 }
367 static unsigned int svm_get_interrupt_shadow(struct vcpu *v)
368 {
369 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
370 unsigned int intr_shadow = 0;
372 if ( vmcb->interrupt_shadow )
373 intr_shadow |= HVM_INTR_SHADOW_MOV_SS | HVM_INTR_SHADOW_STI;
375 if ( vmcb->general1_intercepts & GENERAL1_INTERCEPT_IRET )
376 intr_shadow |= HVM_INTR_SHADOW_NMI;
378 return intr_shadow;
379 }
381 static void svm_set_interrupt_shadow(struct vcpu *v, unsigned int intr_shadow)
382 {
383 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
385 vmcb->interrupt_shadow =
386 !!(intr_shadow & (HVM_INTR_SHADOW_MOV_SS|HVM_INTR_SHADOW_STI));
388 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_IRET;
389 if ( intr_shadow & HVM_INTR_SHADOW_NMI )
390 vmcb->general1_intercepts |= GENERAL1_INTERCEPT_IRET;
391 }
393 static int svm_guest_x86_mode(struct vcpu *v)
394 {
395 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
397 if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) )
398 return 0;
399 if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) )
400 return 1;
401 if ( hvm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) )
402 return 8;
403 return (likely(vmcb->cs.attr.fields.db) ? 4 : 2);
404 }
406 static void svm_update_host_cr3(struct vcpu *v)
407 {
408 /* SVM doesn't have a HOST_CR3 equivalent to update. */
409 }
411 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
412 {
413 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
415 switch ( cr )
416 {
417 case 0: {
418 unsigned long hw_cr0_mask = 0;
420 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
421 {
422 if ( v != current )
423 hw_cr0_mask |= X86_CR0_TS;
424 else if ( vmcb->cr0 & X86_CR0_TS )
425 svm_fpu_enter(v);
426 }
428 vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask;
429 if ( !paging_mode_hap(v->domain) )
430 vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
431 break;
432 }
433 case 2:
434 vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2];
435 break;
436 case 3:
437 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];
438 svm_asid_inv_asid(v);
439 break;
440 case 4:
441 vmcb->cr4 = HVM_CR4_HOST_MASK;
442 if ( paging_mode_hap(v->domain) )
443 vmcb->cr4 &= ~X86_CR4_PAE;
444 vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4];
445 break;
446 default:
447 BUG();
448 }
449 }
451 static void svm_update_guest_efer(struct vcpu *v)
452 {
453 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
455 vmcb->efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME;
456 if ( vmcb->efer & EFER_LMA )
457 vmcb->efer |= EFER_LME;
458 }
460 static void svm_flush_guest_tlbs(void)
461 {
462 /* Roll over the CPU's ASID generation, so it gets a clean TLB when we
463 * next VMRUN. (If ASIDs are disabled, the whole TLB is flushed on
464 * VMRUN anyway). */
465 svm_asid_inc_generation();
466 }
468 static void svm_sync_vmcb(struct vcpu *v)
469 {
470 struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
472 if ( arch_svm->vmcb_in_sync )
473 return;
475 arch_svm->vmcb_in_sync = 1;
477 svm_vmsave(arch_svm->vmcb);
478 }
480 static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
481 struct segment_register *reg)
482 {
483 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
485 ASSERT((v == current) || !vcpu_runnable(v));
487 switch ( seg )
488 {
489 case x86_seg_cs:
490 memcpy(reg, &vmcb->cs, sizeof(*reg));
491 break;
492 case x86_seg_ds:
493 memcpy(reg, &vmcb->ds, sizeof(*reg));
494 break;
495 case x86_seg_es:
496 memcpy(reg, &vmcb->es, sizeof(*reg));
497 break;
498 case x86_seg_fs:
499 svm_sync_vmcb(v);
500 memcpy(reg, &vmcb->fs, sizeof(*reg));
501 break;
502 case x86_seg_gs:
503 svm_sync_vmcb(v);
504 memcpy(reg, &vmcb->gs, sizeof(*reg));
505 break;
506 case x86_seg_ss:
507 memcpy(reg, &vmcb->ss, sizeof(*reg));
508 reg->attr.fields.dpl = vmcb->cpl;
509 break;
510 case x86_seg_tr:
511 svm_sync_vmcb(v);
512 memcpy(reg, &vmcb->tr, sizeof(*reg));
513 break;
514 case x86_seg_gdtr:
515 memcpy(reg, &vmcb->gdtr, sizeof(*reg));
516 break;
517 case x86_seg_idtr:
518 memcpy(reg, &vmcb->idtr, sizeof(*reg));
519 break;
520 case x86_seg_ldtr:
521 svm_sync_vmcb(v);
522 memcpy(reg, &vmcb->ldtr, sizeof(*reg));
523 break;
524 default:
525 BUG();
526 }
527 }
529 static void svm_set_segment_register(struct vcpu *v, enum x86_segment seg,
530 struct segment_register *reg)
531 {
532 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
533 int sync = 0;
535 ASSERT((v == current) || !vcpu_runnable(v));
537 switch ( seg )
538 {
539 case x86_seg_fs:
540 case x86_seg_gs:
541 case x86_seg_tr:
542 case x86_seg_ldtr:
543 sync = (v == current);
544 break;
545 default:
546 break;
547 }
549 if ( sync )
550 svm_sync_vmcb(v);
552 switch ( seg )
553 {
554 case x86_seg_cs:
555 memcpy(&vmcb->cs, reg, sizeof(*reg));
556 break;
557 case x86_seg_ds:
558 memcpy(&vmcb->ds, reg, sizeof(*reg));
559 break;
560 case x86_seg_es:
561 memcpy(&vmcb->es, reg, sizeof(*reg));
562 break;
563 case x86_seg_fs:
564 memcpy(&vmcb->fs, reg, sizeof(*reg));
565 break;
566 case x86_seg_gs:
567 memcpy(&vmcb->gs, reg, sizeof(*reg));
568 break;
569 case x86_seg_ss:
570 memcpy(&vmcb->ss, reg, sizeof(*reg));
571 vmcb->cpl = vmcb->ss.attr.fields.dpl;
572 break;
573 case x86_seg_tr:
574 memcpy(&vmcb->tr, reg, sizeof(*reg));
575 break;
576 case x86_seg_gdtr:
577 vmcb->gdtr.base = reg->base;
578 vmcb->gdtr.limit = (uint16_t)reg->limit;
579 break;
580 case x86_seg_idtr:
581 vmcb->idtr.base = reg->base;
582 vmcb->idtr.limit = (uint16_t)reg->limit;
583 break;
584 case x86_seg_ldtr:
585 memcpy(&vmcb->ldtr, reg, sizeof(*reg));
586 break;
587 default:
588 BUG();
589 }
591 if ( sync )
592 svm_vmload(vmcb);
593 }
595 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
596 {
597 v->arch.hvm_svm.vmcb->tsc_offset = offset;
598 }
600 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
601 {
602 char *p;
603 int i;
605 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
606 {
607 p = (char *)(hypercall_page + (i * 32));
608 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
609 *(u32 *)(p + 1) = i;
610 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
611 *(u8 *)(p + 6) = 0x01;
612 *(u8 *)(p + 7) = 0xd9;
613 *(u8 *)(p + 8) = 0xc3; /* ret */
614 }
616 /* Don't support HYPERVISOR_iret at the moment */
617 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
618 }
620 static void svm_ctxt_switch_from(struct vcpu *v)
621 {
622 int cpu = smp_processor_id();
624 svm_fpu_leave(v);
626 svm_save_dr(v);
628 svm_sync_vmcb(v);
629 svm_vmload(root_vmcb[cpu]);
631 #ifdef __x86_64__
632 /* Resume use of ISTs now that the host TR is reinstated. */
633 idt_tables[cpu][TRAP_double_fault].a |= IST_DF << 32;
634 idt_tables[cpu][TRAP_nmi].a |= IST_NMI << 32;
635 idt_tables[cpu][TRAP_machine_check].a |= IST_MCE << 32;
636 #endif
637 }
639 static void svm_ctxt_switch_to(struct vcpu *v)
640 {
641 int cpu = smp_processor_id();
643 #ifdef __x86_64__
644 /*
645 * This is required, because VMRUN does consistency check
646 * and some of the DOM0 selectors are pointing to
647 * invalid GDT locations, and cause AMD processors
648 * to shutdown.
649 */
650 set_segment_register(ds, 0);
651 set_segment_register(es, 0);
652 set_segment_register(ss, 0);
654 /*
655 * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
656 * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
657 */
658 idt_tables[cpu][TRAP_double_fault].a &= ~(7UL << 32);
659 idt_tables[cpu][TRAP_nmi].a &= ~(7UL << 32);
660 idt_tables[cpu][TRAP_machine_check].a &= ~(7UL << 32);
661 #endif
663 svm_restore_dr(v);
665 svm_vmsave(root_vmcb[cpu]);
666 svm_vmload(v->arch.hvm_svm.vmcb);
667 }
669 static void svm_do_resume(struct vcpu *v)
670 {
671 bool_t debug_state = v->domain->debugger_attached;
673 if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
674 {
675 uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3);
676 v->arch.hvm_vcpu.debug_state_latch = debug_state;
677 if ( debug_state )
678 v->arch.hvm_svm.vmcb->exception_intercepts |= mask;
679 else
680 v->arch.hvm_svm.vmcb->exception_intercepts &= ~mask;
681 }
683 if ( v->arch.hvm_svm.launch_core != smp_processor_id() )
684 {
685 v->arch.hvm_svm.launch_core = smp_processor_id();
686 hvm_migrate_timers(v);
688 /* Migrating to another ASID domain. Request a new ASID. */
689 svm_asid_init_vcpu(v);
690 }
692 /* Reflect the vlapic's TPR in the hardware vtpr */
693 v->arch.hvm_svm.vmcb->vintr.fields.tpr =
694 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
696 hvm_do_resume(v);
697 reset_stack_and_jump(svm_asm_do_resume);
698 }
700 static int svm_domain_initialise(struct domain *d)
701 {
702 return 0;
703 }
705 static void svm_domain_destroy(struct domain *d)
706 {
707 }
709 static int svm_vcpu_initialise(struct vcpu *v)
710 {
711 int rc;
713 v->arch.schedule_tail = svm_do_resume;
714 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
715 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
717 v->arch.hvm_svm.launch_core = -1;
719 if ( (rc = svm_create_vmcb(v)) != 0 )
720 {
721 dprintk(XENLOG_WARNING,
722 "Failed to create VMCB for vcpu %d: err=%d.\n",
723 v->vcpu_id, rc);
724 return rc;
725 }
727 return 0;
728 }
730 static void svm_vcpu_destroy(struct vcpu *v)
731 {
732 svm_destroy_vmcb(v);
733 }
735 static void svm_inject_exception(
736 unsigned int trapnr, int errcode, unsigned long cr2)
737 {
738 struct vcpu *curr = current;
739 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
740 eventinj_t event = vmcb->eventinj;
742 switch ( trapnr )
743 {
744 case TRAP_debug:
745 if ( guest_cpu_user_regs()->eflags & X86_EFLAGS_TF )
746 {
747 __restore_debug_registers(curr);
748 vmcb->dr6 |= 0x4000;
749 }
750 case TRAP_int3:
751 if ( curr->domain->debugger_attached )
752 {
753 /* Debug/Int3: Trap to debugger. */
754 domain_pause_for_debugger();
755 return;
756 }
757 }
759 if ( unlikely(event.fields.v) &&
760 (event.fields.type == X86_EVENTTYPE_HW_EXCEPTION) )
761 {
762 trapnr = hvm_combine_hw_exceptions(event.fields.vector, trapnr);
763 if ( trapnr == TRAP_double_fault )
764 errcode = 0;
765 }
767 event.bytes = 0;
768 event.fields.v = 1;
769 event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
770 event.fields.vector = trapnr;
771 event.fields.ev = (errcode != HVM_DELIVER_NO_ERROR_CODE);
772 event.fields.errorcode = errcode;
774 vmcb->eventinj = event;
776 if ( trapnr == TRAP_page_fault )
777 {
778 vmcb->cr2 = curr->arch.hvm_vcpu.guest_cr[2] = cr2;
779 HVMTRACE_LONG_2D(PF_INJECT, errcode, TRC_PAR_LONG(cr2));
780 }
781 else
782 {
783 HVMTRACE_2D(INJ_EXC, trapnr, errcode);
784 }
785 }
787 static int svm_event_pending(struct vcpu *v)
788 {
789 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
790 return vmcb->eventinj.fields.v;
791 }
793 static int svm_do_pmu_interrupt(struct cpu_user_regs *regs)
794 {
795 return 0;
796 }
798 static struct hvm_function_table svm_function_table = {
799 .name = "SVM",
800 .cpu_down = svm_cpu_down,
801 .domain_initialise = svm_domain_initialise,
802 .domain_destroy = svm_domain_destroy,
803 .vcpu_initialise = svm_vcpu_initialise,
804 .vcpu_destroy = svm_vcpu_destroy,
805 .save_cpu_ctxt = svm_save_vmcb_ctxt,
806 .load_cpu_ctxt = svm_load_vmcb_ctxt,
807 .get_interrupt_shadow = svm_get_interrupt_shadow,
808 .set_interrupt_shadow = svm_set_interrupt_shadow,
809 .guest_x86_mode = svm_guest_x86_mode,
810 .get_segment_register = svm_get_segment_register,
811 .set_segment_register = svm_set_segment_register,
812 .update_host_cr3 = svm_update_host_cr3,
813 .update_guest_cr = svm_update_guest_cr,
814 .update_guest_efer = svm_update_guest_efer,
815 .flush_guest_tlbs = svm_flush_guest_tlbs,
816 .set_tsc_offset = svm_set_tsc_offset,
817 .inject_exception = svm_inject_exception,
818 .init_hypercall_page = svm_init_hypercall_page,
819 .event_pending = svm_event_pending,
820 .do_pmu_interrupt = svm_do_pmu_interrupt,
821 .cpuid_intercept = svm_cpuid_intercept,
822 .wbinvd_intercept = svm_wbinvd_intercept,
823 .fpu_dirty_intercept = svm_fpu_dirty_intercept,
824 .msr_read_intercept = svm_msr_read_intercept,
825 .msr_write_intercept = svm_msr_write_intercept,
826 .invlpg_intercept = svm_invlpg_intercept
827 };
829 int start_svm(struct cpuinfo_x86 *c)
830 {
831 u32 eax, ecx, edx;
832 u32 phys_hsa_lo, phys_hsa_hi;
833 u64 phys_hsa;
834 int cpu = smp_processor_id();
836 /* Xen does not fill x86_capability words except 0. */
837 ecx = cpuid_ecx(0x80000001);
838 boot_cpu_data.x86_capability[5] = ecx;
840 if ( !(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)) )
841 return 0;
843 /* Check whether SVM feature is disabled in BIOS */
844 rdmsr(MSR_K8_VM_CR, eax, edx);
845 if ( eax & K8_VMCR_SVME_DISABLE )
846 {
847 printk("AMD SVM Extension is disabled in BIOS.\n");
848 return 0;
849 }
851 if ( ((hsa[cpu] = alloc_host_save_area()) == NULL) ||
852 ((root_vmcb[cpu] = alloc_vmcb()) == NULL) )
853 return 0;
855 write_efer(read_efer() | EFER_SVME);
857 /* Initialize the HSA for this core. */
858 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
859 phys_hsa_lo = (u32) phys_hsa;
860 phys_hsa_hi = (u32) (phys_hsa >> 32);
861 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
863 /* Initialize core's ASID handling. */
864 svm_asid_init(c);
866 if ( cpu != 0 )
867 return 1;
869 setup_vmcb_dump();
871 svm_feature_flags = ((cpuid_eax(0x80000000) >= 0x8000000A) ?
872 cpuid_edx(0x8000000A) : 0);
874 svm_function_table.hap_supported = cpu_has_svm_npt;
876 hvm_enable(&svm_function_table);
878 return 1;
879 }
881 static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
882 {
883 p2m_type_t p2mt;
884 mfn_t mfn;
885 unsigned long gfn = gpa >> PAGE_SHIFT;
887 /*
888 * If this GFN is emulated MMIO or marked as read-only, pass the fault
889 * to the mmio handler.
890 */
891 mfn = gfn_to_mfn_current(gfn, &p2mt);
892 if ( (p2mt == p2m_mmio_dm) || (p2mt == p2m_ram_ro) )
893 {
894 if ( !handle_mmio() )
895 hvm_inject_exception(TRAP_gp_fault, 0, 0);
896 return;
897 }
899 /* Log-dirty: mark the page dirty and let the guest write it again */
900 paging_mark_dirty(current->domain, mfn_x(mfn));
901 p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
902 }
904 static void svm_fpu_dirty_intercept(void)
905 {
906 struct vcpu *curr = current;
907 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
909 svm_fpu_enter(curr);
911 if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
912 vmcb->cr0 &= ~X86_CR0_TS;
913 }
915 #define bitmaskof(idx) (1U << ((idx) & 31))
916 static void svm_cpuid_intercept(
917 unsigned int *eax, unsigned int *ebx,
918 unsigned int *ecx, unsigned int *edx)
919 {
920 unsigned int input = *eax;
921 struct vcpu *v = current;
923 hvm_cpuid(input, eax, ebx, ecx, edx);
925 if ( input == 0x80000001 )
926 {
927 /* Fix up VLAPIC details. */
928 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
929 __clear_bit(X86_FEATURE_APIC & 31, edx);
930 }
932 HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
933 }
935 static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs)
936 {
937 unsigned int eax, ebx, ecx, edx, inst_len;
939 if ( (inst_len = __get_instruction_length(current, INSTR_CPUID)) == 0 )
940 return;
942 eax = regs->eax;
943 ebx = regs->ebx;
944 ecx = regs->ecx;
945 edx = regs->edx;
947 svm_cpuid_intercept(&eax, &ebx, &ecx, &edx);
949 regs->eax = eax;
950 regs->ebx = ebx;
951 regs->ecx = ecx;
952 regs->edx = edx;
954 __update_guest_eip(regs, inst_len);
955 }
957 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
958 {
959 HVMTRACE_0D(DR_WRITE);
960 __restore_debug_registers(v);
961 }
963 static int svm_msr_read_intercept(struct cpu_user_regs *regs)
964 {
965 u64 msr_content = 0;
966 u32 ecx = regs->ecx, eax, edx;
967 struct vcpu *v = current;
968 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
970 switch ( ecx )
971 {
972 case MSR_EFER:
973 msr_content = v->arch.hvm_vcpu.guest_efer;
974 break;
976 case MSR_IA32_MC4_MISC: /* Threshold register */
977 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
978 /*
979 * MCA/MCE: We report that the threshold register is unavailable
980 * for OS use (locked by the BIOS).
981 */
982 msr_content = 1ULL << 61; /* MC4_MISC.Locked */
983 break;
985 case MSR_IA32_EBC_FREQUENCY_ID:
986 /*
987 * This Intel-only register may be accessed if this HVM guest
988 * has been migrated from an Intel host. The value zero is not
989 * particularly meaningful, but at least avoids the guest crashing!
990 */
991 msr_content = 0;
992 break;
994 case MSR_K8_VM_HSAVE_PA:
995 goto gpf;
997 case MSR_IA32_DEBUGCTLMSR:
998 msr_content = vmcb->debugctlmsr;
999 break;
1001 case MSR_IA32_LASTBRANCHFROMIP:
1002 msr_content = vmcb->lastbranchfromip;
1003 break;
1005 case MSR_IA32_LASTBRANCHTOIP:
1006 msr_content = vmcb->lastbranchtoip;
1007 break;
1009 case MSR_IA32_LASTINTFROMIP:
1010 msr_content = vmcb->lastintfromip;
1011 break;
1013 case MSR_IA32_LASTINTTOIP:
1014 msr_content = vmcb->lastinttoip;
1015 break;
1017 default:
1018 if ( rdmsr_viridian_regs(ecx, &eax, &edx) ||
1019 rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
1020 rdmsr_safe(ecx, eax, edx) == 0 )
1022 regs->eax = eax;
1023 regs->edx = edx;
1024 goto done;
1026 goto gpf;
1028 regs->eax = msr_content & 0xFFFFFFFF;
1029 regs->edx = msr_content >> 32;
1031 done:
1032 HVMTRACE_3D (MSR_READ, ecx, regs->eax, regs->edx);
1033 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
1034 ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
1035 return X86EMUL_OKAY;
1037 gpf:
1038 svm_inject_exception(TRAP_gp_fault, 0, 0);
1039 return X86EMUL_EXCEPTION;
1042 static int svm_msr_write_intercept(struct cpu_user_regs *regs)
1044 u64 msr_content = 0;
1045 u32 ecx = regs->ecx;
1046 struct vcpu *v = current;
1047 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1049 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
1051 HVMTRACE_3D (MSR_WRITE, ecx, regs->eax, regs->edx);
1053 switch ( ecx )
1055 case MSR_K8_VM_HSAVE_PA:
1056 goto gpf;
1058 case MSR_IA32_DEBUGCTLMSR:
1059 vmcb->debugctlmsr = msr_content;
1060 if ( !msr_content || !cpu_has_svm_lbrv )
1061 break;
1062 vmcb->lbr_control.fields.enable = 1;
1063 svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR);
1064 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP);
1065 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP);
1066 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP);
1067 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP);
1068 break;
1070 case MSR_IA32_LASTBRANCHFROMIP:
1071 vmcb->lastbranchfromip = msr_content;
1072 break;
1074 case MSR_IA32_LASTBRANCHTOIP:
1075 vmcb->lastbranchtoip = msr_content;
1076 break;
1078 case MSR_IA32_LASTINTFROMIP:
1079 vmcb->lastintfromip = msr_content;
1080 break;
1082 case MSR_IA32_LASTINTTOIP:
1083 vmcb->lastinttoip = msr_content;
1084 break;
1086 default:
1087 if ( wrmsr_viridian_regs(ecx, regs->eax, regs->edx) )
1088 break;
1090 switch ( long_mode_do_msr_write(regs) )
1092 case HNDL_unhandled:
1093 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
1094 break;
1095 case HNDL_exception_raised:
1096 return X86EMUL_EXCEPTION;
1097 case HNDL_done:
1098 break;
1100 break;
1103 return X86EMUL_OKAY;
1105 gpf:
1106 svm_inject_exception(TRAP_gp_fault, 0, 0);
1107 return X86EMUL_EXCEPTION;
1110 static void svm_do_msr_access(struct cpu_user_regs *regs)
1112 int rc, inst_len;
1113 struct vcpu *v = current;
1114 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1116 if ( vmcb->exitinfo1 == 0 )
1118 if ( (inst_len = __get_instruction_length(v, INSTR_RDMSR)) == 0 )
1119 return;
1120 rc = hvm_msr_read_intercept(regs);
1122 else
1124 if ( (inst_len = __get_instruction_length(v, INSTR_WRMSR)) == 0 )
1125 return;
1126 rc = hvm_msr_write_intercept(regs);
1129 if ( rc == X86EMUL_OKAY )
1130 __update_guest_eip(regs, inst_len);
1133 static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb,
1134 struct cpu_user_regs *regs)
1136 unsigned int inst_len;
1138 if ( (inst_len = __get_instruction_length(current, INSTR_HLT)) == 0 )
1139 return;
1140 __update_guest_eip(regs, inst_len);
1142 hvm_hlt(regs->eflags);
1145 static void svm_vmexit_do_rdtsc(struct cpu_user_regs *regs)
1147 unsigned int inst_len;
1149 if ( (inst_len = __get_instruction_length(current, INSTR_RDTSC)) == 0 )
1150 return;
1151 __update_guest_eip(regs, inst_len);
1153 hvm_rdtsc_intercept(regs);
1156 static void wbinvd_ipi(void *info)
1158 wbinvd();
1161 static void svm_wbinvd_intercept(void)
1163 if ( has_arch_pdevs(current->domain) )
1164 on_each_cpu(wbinvd_ipi, NULL, 1, 1);
1167 static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs)
1169 enum instruction_index list[] = { INSTR_INVD, INSTR_WBINVD };
1170 int inst_len;
1172 inst_len = __get_instruction_length_from_list(
1173 current, list, ARRAY_SIZE(list));
1174 if ( inst_len == 0 )
1175 return;
1177 svm_wbinvd_intercept();
1179 __update_guest_eip(regs, inst_len);
1182 static void svm_invlpg_intercept(unsigned long vaddr)
1184 struct vcpu *curr = current;
1185 HVMTRACE_LONG_2D(INVLPG, 0, TRC_PAR_LONG(vaddr));
1186 paging_invlpg(curr, vaddr);
1187 svm_asid_g_invlpg(curr, vaddr);
1190 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
1192 unsigned int exit_reason;
1193 struct vcpu *v = current;
1194 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1195 eventinj_t eventinj;
1196 int inst_len, rc;
1198 /*
1199 * Before doing anything else, we need to sync up the VLAPIC's TPR with
1200 * SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
1201 * because we update the vTPR on MMIO writes to the TPR.
1202 */
1203 vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
1204 (vmcb->vintr.fields.tpr & 0x0F) << 4);
1206 exit_reason = vmcb->exitcode;
1208 HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
1209 (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
1210 0, 0, 0);
1212 if ( unlikely(exit_reason == VMEXIT_INVALID) )
1214 svm_dump_vmcb(__func__, vmcb);
1215 goto exit_and_crash;
1218 perfc_incra(svmexits, exit_reason);
1220 hvm_maybe_deassert_evtchn_irq();
1222 /* Event delivery caused this intercept? Queue for redelivery. */
1223 eventinj = vmcb->exitintinfo;
1224 if ( unlikely(eventinj.fields.v) &&
1225 hvm_event_needs_reinjection(eventinj.fields.type,
1226 eventinj.fields.vector) )
1227 vmcb->eventinj = eventinj;
1229 switch ( exit_reason )
1231 case VMEXIT_INTR:
1232 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1233 HVMTRACE_0D(INTR);
1234 break;
1236 case VMEXIT_NMI:
1237 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1238 HVMTRACE_0D(NMI);
1239 break;
1241 case VMEXIT_SMI:
1242 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1243 HVMTRACE_0D(SMI);
1244 break;
1246 case VMEXIT_EXCEPTION_DB:
1247 if ( !v->domain->debugger_attached )
1248 goto exit_and_crash;
1249 domain_pause_for_debugger();
1250 break;
1252 case VMEXIT_EXCEPTION_BP:
1253 if ( !v->domain->debugger_attached )
1254 goto exit_and_crash;
1255 /* AMD Vol2, 15.11: INT3, INTO, BOUND intercepts do not update RIP. */
1256 if ( (inst_len = __get_instruction_length(v, INSTR_INT3)) == 0 )
1257 break;
1258 __update_guest_eip(regs, inst_len);
1259 domain_pause_for_debugger();
1260 break;
1262 case VMEXIT_EXCEPTION_NM:
1263 svm_fpu_dirty_intercept();
1264 break;
1266 case VMEXIT_EXCEPTION_PF: {
1267 unsigned long va;
1268 va = vmcb->exitinfo2;
1269 regs->error_code = vmcb->exitinfo1;
1270 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1271 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1272 (unsigned long)regs->eax, (unsigned long)regs->ebx,
1273 (unsigned long)regs->ecx, (unsigned long)regs->edx,
1274 (unsigned long)regs->esi, (unsigned long)regs->edi);
1276 if ( paging_fault(va, regs) )
1278 if ( trace_will_trace_event(TRC_SHADOW) )
1279 break;
1280 if ( hvm_long_mode_enabled(v) )
1281 HVMTRACE_LONG_2D(PF_XEN, regs->error_code, TRC_PAR_LONG(va));
1282 else
1283 HVMTRACE_2D(PF_XEN, regs->error_code, va);
1284 break;
1287 svm_inject_exception(TRAP_page_fault, regs->error_code, va);
1288 break;
1291 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1292 case VMEXIT_EXCEPTION_MC:
1293 HVMTRACE_0D(MCE);
1294 break;
1296 case VMEXIT_VINTR:
1297 vmcb->vintr.fields.irq = 0;
1298 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
1299 break;
1301 case VMEXIT_INVD:
1302 case VMEXIT_WBINVD:
1303 svm_vmexit_do_invalidate_cache(regs);
1304 break;
1306 case VMEXIT_TASK_SWITCH: {
1307 enum hvm_task_switch_reason reason;
1308 int32_t errcode = -1;
1309 if ( (vmcb->exitinfo2 >> 36) & 1 )
1310 reason = TSW_iret;
1311 else if ( (vmcb->exitinfo2 >> 38) & 1 )
1312 reason = TSW_jmp;
1313 else
1314 reason = TSW_call_or_int;
1315 if ( (vmcb->exitinfo2 >> 44) & 1 )
1316 errcode = (uint32_t)vmcb->exitinfo2;
1318 /*
1319 * Some processors set the EXITINTINFO field when the task switch
1320 * is caused by a task gate in the IDT. In this case we will be
1321 * emulating the event injection, so we do not want the processor
1322 * to re-inject the original event!
1323 */
1324 vmcb->eventinj.bytes = 0;
1326 hvm_task_switch((uint16_t)vmcb->exitinfo1, reason, errcode);
1327 break;
1330 case VMEXIT_CPUID:
1331 svm_vmexit_do_cpuid(regs);
1332 break;
1334 case VMEXIT_HLT:
1335 svm_vmexit_do_hlt(vmcb, regs);
1336 break;
1338 case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
1339 case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
1340 case VMEXIT_INVLPG:
1341 case VMEXIT_INVLPGA:
1342 case VMEXIT_IOIO:
1343 if ( !handle_mmio() )
1344 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1345 break;
1347 case VMEXIT_VMMCALL:
1348 if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
1349 break;
1350 HVMTRACE_1D(VMMCALL, regs->eax);
1351 rc = hvm_do_hypercall(regs);
1352 if ( rc != HVM_HCALL_preempted )
1354 __update_guest_eip(regs, inst_len);
1355 if ( rc == HVM_HCALL_invalidate )
1356 send_invalidate_req();
1358 break;
1360 case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
1361 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
1362 svm_dr_access(v, regs);
1363 break;
1365 case VMEXIT_MSR:
1366 svm_do_msr_access(regs);
1367 break;
1369 case VMEXIT_SHUTDOWN:
1370 hvm_triple_fault();
1371 break;
1373 case VMEXIT_RDTSC:
1374 svm_vmexit_do_rdtsc(regs);
1375 break;
1377 case VMEXIT_RDTSCP:
1378 case VMEXIT_MONITOR:
1379 case VMEXIT_MWAIT:
1380 case VMEXIT_VMRUN:
1381 case VMEXIT_VMLOAD:
1382 case VMEXIT_VMSAVE:
1383 case VMEXIT_STGI:
1384 case VMEXIT_CLGI:
1385 case VMEXIT_SKINIT:
1386 svm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
1387 break;
1389 case VMEXIT_NPF:
1390 perfc_incra(svmexits, VMEXIT_NPF_PERFC);
1391 regs->error_code = vmcb->exitinfo1;
1392 svm_do_nested_pgfault(vmcb->exitinfo2, regs);
1393 break;
1395 case VMEXIT_IRET:
1396 /*
1397 * IRET clears the NMI mask. However because we clear the mask
1398 * /before/ executing IRET, we set the interrupt shadow to prevent
1399 * a pending NMI from being injected immediately. This will work
1400 * perfectly unless the IRET instruction faults: in that case we
1401 * may inject an NMI before the NMI handler's IRET instruction is
1402 * retired.
1403 */
1404 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_IRET;
1405 vmcb->interrupt_shadow = 1;
1406 break;
1408 default:
1409 exit_and_crash:
1410 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
1411 "exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
1412 exit_reason,
1413 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
1414 domain_crash(v->domain);
1415 break;
1418 /* The exit may have updated the TPR: reflect this in the hardware vtpr */
1419 vmcb->vintr.fields.tpr =
1420 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
1423 asmlinkage void svm_trace_vmentry(void)
1425 HVMTRACE_ND (VMENTRY, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
1428 /*
1429 * Local variables:
1430 * mode: C
1431 * c-set-style: "BSD"
1432 * c-basic-offset: 4
1433 * tab-width: 4
1434 * indent-tabs-mode: nil
1435 * End:
1436 */