ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 19798:af06333d4c5d

x86 hvm: Fix bootstrapped boolean check in start_{svm,vmx}().

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 18 10:46:21 2009 +0100 (2009-06-18)
parents 44fe7ad6fee8
children
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/hypercall.h>
28 #include <xen/domain_page.h>
29 #include <asm/current.h>
30 #include <asm/io.h>
31 #include <asm/paging.h>
32 #include <asm/p2m.h>
33 #include <asm/regs.h>
34 #include <asm/cpufeature.h>
35 #include <asm/processor.h>
36 #include <asm/types.h>
37 #include <asm/debugreg.h>
38 #include <asm/msr.h>
39 #include <asm/spinlock.h>
40 #include <asm/hvm/emulate.h>
41 #include <asm/hvm/hvm.h>
42 #include <asm/hvm/support.h>
43 #include <asm/hvm/io.h>
44 #include <asm/hvm/svm/asid.h>
45 #include <asm/hvm/svm/svm.h>
46 #include <asm/hvm/svm/vmcb.h>
47 #include <asm/hvm/svm/emulate.h>
48 #include <asm/hvm/svm/intr.h>
49 #include <asm/x86_emulate.h>
50 #include <public/sched.h>
51 #include <asm/hvm/vpt.h>
52 #include <asm/hvm/trace.h>
53 #include <asm/hap.h>
55 u32 svm_feature_flags;
57 #define set_segment_register(name, value) \
58 asm volatile ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
60 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
62 asmlinkage void do_IRQ(struct cpu_user_regs *);
64 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);
65 static void svm_update_guest_efer(struct vcpu *v);
66 static void svm_inject_exception(
67 unsigned int trapnr, int errcode, unsigned long cr2);
68 static void svm_cpuid_intercept(
69 unsigned int *eax, unsigned int *ebx,
70 unsigned int *ecx, unsigned int *edx);
71 static void svm_wbinvd_intercept(void);
72 static void svm_fpu_dirty_intercept(void);
73 static int svm_msr_read_intercept(struct cpu_user_regs *regs);
74 static int svm_msr_write_intercept(struct cpu_user_regs *regs);
75 static void svm_invlpg_intercept(unsigned long vaddr);
77 /* va of hardware host save area */
78 static void *hsa[NR_CPUS] __read_mostly;
80 /* vmcb used for extended host state */
81 static void *root_vmcb[NR_CPUS] __read_mostly;
83 static void inline __update_guest_eip(
84 struct cpu_user_regs *regs, unsigned int inst_len)
85 {
86 struct vcpu *curr = current;
88 if ( unlikely(inst_len == 0) )
89 return;
91 if ( unlikely(inst_len > 15) )
92 {
93 gdprintk(XENLOG_ERR, "Bad instruction length %u\n", inst_len);
94 domain_crash(curr->domain);
95 return;
96 }
98 ASSERT(regs == guest_cpu_user_regs());
100 regs->eip += inst_len;
101 regs->eflags &= ~X86_EFLAGS_RF;
103 curr->arch.hvm_svm.vmcb->interrupt_shadow = 0;
105 if ( regs->eflags & X86_EFLAGS_TF )
106 svm_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);
107 }
109 static void svm_cpu_down(void)
110 {
111 write_efer(read_efer() & ~EFER_SVME);
112 }
114 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
115 {
116 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
117 u32 ecx = regs->ecx;
119 HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64,
120 ecx, msr_content);
122 switch ( ecx )
123 {
124 case MSR_EFER:
125 if ( hvm_set_efer(msr_content) )
126 return HNDL_exception_raised;
127 break;
129 case MSR_IA32_MC4_MISC: /* Threshold register */
130 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
131 /*
132 * MCA/MCE: Threshold register is reported to be locked, so we ignore
133 * all write accesses. This behaviour matches real HW, so guests should
134 * have no problem with this.
135 */
136 break;
138 default:
139 return HNDL_unhandled;
140 }
142 return HNDL_done;
143 }
145 static void svm_save_dr(struct vcpu *v)
146 {
147 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
149 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
150 return;
152 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
153 v->arch.hvm_vcpu.flag_dr_dirty = 0;
154 v->arch.hvm_svm.vmcb->dr_intercepts = ~0u;
156 v->arch.guest_context.debugreg[0] = read_debugreg(0);
157 v->arch.guest_context.debugreg[1] = read_debugreg(1);
158 v->arch.guest_context.debugreg[2] = read_debugreg(2);
159 v->arch.guest_context.debugreg[3] = read_debugreg(3);
160 v->arch.guest_context.debugreg[6] = vmcb->dr6;
161 v->arch.guest_context.debugreg[7] = vmcb->dr7;
162 }
164 static void __restore_debug_registers(struct vcpu *v)
165 {
166 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
168 if ( v->arch.hvm_vcpu.flag_dr_dirty )
169 return;
171 v->arch.hvm_vcpu.flag_dr_dirty = 1;
172 vmcb->dr_intercepts = 0;
174 write_debugreg(0, v->arch.guest_context.debugreg[0]);
175 write_debugreg(1, v->arch.guest_context.debugreg[1]);
176 write_debugreg(2, v->arch.guest_context.debugreg[2]);
177 write_debugreg(3, v->arch.guest_context.debugreg[3]);
178 vmcb->dr6 = v->arch.guest_context.debugreg[6];
179 vmcb->dr7 = v->arch.guest_context.debugreg[7];
180 }
182 /*
183 * DR7 is saved and restored on every vmexit. Other debug registers only
184 * need to be restored if their value is going to affect execution -- i.e.,
185 * if one of the breakpoints is enabled. So mask out all bits that don't
186 * enable some breakpoint functionality.
187 */
188 static void svm_restore_dr(struct vcpu *v)
189 {
190 if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
191 __restore_debug_registers(v);
192 }
194 static int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c)
195 {
196 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
198 c->cr0 = v->arch.hvm_vcpu.guest_cr[0];
199 c->cr2 = v->arch.hvm_vcpu.guest_cr[2];
200 c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
201 c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
203 c->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs;
204 c->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp;
205 c->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip;
207 c->pending_event = 0;
208 c->error_code = 0;
209 if ( vmcb->eventinj.fields.v &&
210 hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
211 vmcb->eventinj.fields.vector) )
212 {
213 c->pending_event = (uint32_t)vmcb->eventinj.bytes;
214 c->error_code = vmcb->eventinj.fields.errorcode;
215 }
217 return 1;
218 }
220 static int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
221 {
222 unsigned long mfn = 0;
223 p2m_type_t p2mt;
224 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
226 if ( c->pending_valid &&
227 ((c->pending_type == 1) || (c->pending_type > 6) ||
228 (c->pending_reserved != 0)) )
229 {
230 gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n",
231 c->pending_event);
232 return -EINVAL;
233 }
235 if ( !paging_mode_hap(v->domain) )
236 {
237 if ( c->cr0 & X86_CR0_PG )
238 {
239 mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
240 if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
241 {
242 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
243 c->cr3);
244 return -EINVAL;
245 }
246 }
248 if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
249 put_page(pagetable_get_page(v->arch.guest_table));
251 v->arch.guest_table = pagetable_from_pfn(mfn);
252 }
254 v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;
255 v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
256 v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
257 v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
258 svm_update_guest_cr(v, 0);
259 svm_update_guest_cr(v, 2);
260 svm_update_guest_cr(v, 4);
262 v->arch.hvm_svm.guest_sysenter_cs = c->sysenter_cs;
263 v->arch.hvm_svm.guest_sysenter_esp = c->sysenter_esp;
264 v->arch.hvm_svm.guest_sysenter_eip = c->sysenter_eip;
266 if ( paging_mode_hap(v->domain) )
267 {
268 vmcb->np_enable = 1;
269 vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
270 vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
271 }
273 if ( c->pending_valid )
274 {
275 gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
276 c->pending_event, c->error_code);
278 if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
279 {
280 vmcb->eventinj.bytes = c->pending_event;
281 vmcb->eventinj.fields.errorcode = c->error_code;
282 }
283 }
285 paging_update_paging_modes(v);
287 return 0;
288 }
291 static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
292 {
293 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
295 data->shadow_gs = vmcb->kerngsbase;
296 data->msr_lstar = vmcb->lstar;
297 data->msr_star = vmcb->star;
298 data->msr_cstar = vmcb->cstar;
299 data->msr_syscall_mask = vmcb->sfmask;
300 data->msr_efer = v->arch.hvm_vcpu.guest_efer;
301 data->msr_flags = -1ULL;
303 data->tsc = hvm_get_guest_tsc(v);
304 }
307 static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
308 {
309 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
311 vmcb->kerngsbase = data->shadow_gs;
312 vmcb->lstar = data->msr_lstar;
313 vmcb->star = data->msr_star;
314 vmcb->cstar = data->msr_cstar;
315 vmcb->sfmask = data->msr_syscall_mask;
316 v->arch.hvm_vcpu.guest_efer = data->msr_efer;
317 svm_update_guest_efer(v);
319 hvm_set_guest_tsc(v, data->tsc);
320 }
322 static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
323 {
324 svm_save_cpu_state(v, ctxt);
325 svm_vmcb_save(v, ctxt);
326 }
328 static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
329 {
330 svm_load_cpu_state(v, ctxt);
331 if (svm_vmcb_restore(v, ctxt)) {
332 printk("svm_vmcb restore failed!\n");
333 domain_crash(v->domain);
334 return -EINVAL;
335 }
337 return 0;
338 }
340 static void svm_fpu_enter(struct vcpu *v)
341 {
342 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
344 setup_fpu(v);
345 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
346 }
348 static void svm_fpu_leave(struct vcpu *v)
349 {
350 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
352 ASSERT(!v->fpu_dirtied);
353 ASSERT(read_cr0() & X86_CR0_TS);
355 /*
356 * If the guest does not have TS enabled then we must cause and handle an
357 * exception on first use of the FPU. If the guest *does* have TS enabled
358 * then this is not necessary: no FPU activity can occur until the guest
359 * clears CR0.TS, and we will initialise the FPU when that happens.
360 */
361 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
362 {
363 v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device;
364 vmcb->cr0 |= X86_CR0_TS;
365 }
366 }
368 static unsigned int svm_get_interrupt_shadow(struct vcpu *v)
369 {
370 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
371 unsigned int intr_shadow = 0;
373 if ( vmcb->interrupt_shadow )
374 intr_shadow |= HVM_INTR_SHADOW_MOV_SS | HVM_INTR_SHADOW_STI;
376 if ( vmcb->general1_intercepts & GENERAL1_INTERCEPT_IRET )
377 intr_shadow |= HVM_INTR_SHADOW_NMI;
379 return intr_shadow;
380 }
382 static void svm_set_interrupt_shadow(struct vcpu *v, unsigned int intr_shadow)
383 {
384 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
386 vmcb->interrupt_shadow =
387 !!(intr_shadow & (HVM_INTR_SHADOW_MOV_SS|HVM_INTR_SHADOW_STI));
389 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_IRET;
390 if ( intr_shadow & HVM_INTR_SHADOW_NMI )
391 vmcb->general1_intercepts |= GENERAL1_INTERCEPT_IRET;
392 }
394 static int svm_guest_x86_mode(struct vcpu *v)
395 {
396 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
398 if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) )
399 return 0;
400 if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) )
401 return 1;
402 if ( hvm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) )
403 return 8;
404 return (likely(vmcb->cs.attr.fields.db) ? 4 : 2);
405 }
407 static void svm_update_host_cr3(struct vcpu *v)
408 {
409 /* SVM doesn't have a HOST_CR3 equivalent to update. */
410 }
412 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
413 {
414 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
416 switch ( cr )
417 {
418 case 0: {
419 unsigned long hw_cr0_mask = 0;
421 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
422 {
423 if ( v != current )
424 hw_cr0_mask |= X86_CR0_TS;
425 else if ( vmcb->cr0 & X86_CR0_TS )
426 svm_fpu_enter(v);
427 }
429 vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask;
430 if ( !paging_mode_hap(v->domain) )
431 vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
432 break;
433 }
434 case 2:
435 vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2];
436 break;
437 case 3:
438 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];
439 svm_asid_inv_asid(v);
440 break;
441 case 4:
442 vmcb->cr4 = HVM_CR4_HOST_MASK;
443 if ( paging_mode_hap(v->domain) )
444 vmcb->cr4 &= ~X86_CR4_PAE;
445 vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4];
446 break;
447 default:
448 BUG();
449 }
450 }
452 static void svm_update_guest_efer(struct vcpu *v)
453 {
454 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
456 vmcb->efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME;
457 if ( vmcb->efer & EFER_LMA )
458 vmcb->efer |= EFER_LME;
459 }
461 static void svm_flush_guest_tlbs(void)
462 {
463 /* Roll over the CPU's ASID generation, so it gets a clean TLB when we
464 * next VMRUN. (If ASIDs are disabled, the whole TLB is flushed on
465 * VMRUN anyway). */
466 svm_asid_inc_generation();
467 }
469 static void svm_sync_vmcb(struct vcpu *v)
470 {
471 struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
473 if ( arch_svm->vmcb_in_sync )
474 return;
476 arch_svm->vmcb_in_sync = 1;
478 svm_vmsave(arch_svm->vmcb);
479 }
481 static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
482 struct segment_register *reg)
483 {
484 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
486 ASSERT((v == current) || !vcpu_runnable(v));
488 switch ( seg )
489 {
490 case x86_seg_cs:
491 memcpy(reg, &vmcb->cs, sizeof(*reg));
492 reg->attr.fields.g = reg->limit > 0xFFFFF;
493 break;
494 case x86_seg_ds:
495 memcpy(reg, &vmcb->ds, sizeof(*reg));
496 if ( reg->attr.fields.type != 0 )
497 reg->attr.fields.type |= 0x1;
498 break;
499 case x86_seg_es:
500 memcpy(reg, &vmcb->es, sizeof(*reg));
501 if ( reg->attr.fields.type != 0 )
502 reg->attr.fields.type |= 0x1;
503 break;
504 case x86_seg_fs:
505 svm_sync_vmcb(v);
506 memcpy(reg, &vmcb->fs, sizeof(*reg));
507 if ( reg->attr.fields.type != 0 )
508 reg->attr.fields.type |= 0x1;
509 break;
510 case x86_seg_gs:
511 svm_sync_vmcb(v);
512 memcpy(reg, &vmcb->gs, sizeof(*reg));
513 if ( reg->attr.fields.type != 0 )
514 reg->attr.fields.type |= 0x1;
515 break;
516 case x86_seg_ss:
517 memcpy(reg, &vmcb->ss, sizeof(*reg));
518 reg->attr.fields.dpl = vmcb->cpl;
519 if ( reg->attr.fields.type == 0 )
520 reg->attr.fields.db = 0;
521 break;
522 case x86_seg_tr:
523 svm_sync_vmcb(v);
524 memcpy(reg, &vmcb->tr, sizeof(*reg));
525 reg->attr.fields.type |= 0x2;
526 break;
527 case x86_seg_gdtr:
528 memcpy(reg, &vmcb->gdtr, sizeof(*reg));
529 break;
530 case x86_seg_idtr:
531 memcpy(reg, &vmcb->idtr, sizeof(*reg));
532 break;
533 case x86_seg_ldtr:
534 svm_sync_vmcb(v);
535 memcpy(reg, &vmcb->ldtr, sizeof(*reg));
536 break;
537 default:
538 BUG();
539 }
540 }
542 static void svm_set_segment_register(struct vcpu *v, enum x86_segment seg,
543 struct segment_register *reg)
544 {
545 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
546 int sync = 0;
548 ASSERT((v == current) || !vcpu_runnable(v));
550 switch ( seg )
551 {
552 case x86_seg_fs:
553 case x86_seg_gs:
554 case x86_seg_tr:
555 case x86_seg_ldtr:
556 sync = (v == current);
557 break;
558 default:
559 break;
560 }
562 if ( sync )
563 svm_sync_vmcb(v);
565 switch ( seg )
566 {
567 case x86_seg_cs:
568 memcpy(&vmcb->cs, reg, sizeof(*reg));
569 break;
570 case x86_seg_ds:
571 memcpy(&vmcb->ds, reg, sizeof(*reg));
572 break;
573 case x86_seg_es:
574 memcpy(&vmcb->es, reg, sizeof(*reg));
575 break;
576 case x86_seg_fs:
577 memcpy(&vmcb->fs, reg, sizeof(*reg));
578 break;
579 case x86_seg_gs:
580 memcpy(&vmcb->gs, reg, sizeof(*reg));
581 break;
582 case x86_seg_ss:
583 memcpy(&vmcb->ss, reg, sizeof(*reg));
584 vmcb->cpl = vmcb->ss.attr.fields.dpl;
585 break;
586 case x86_seg_tr:
587 memcpy(&vmcb->tr, reg, sizeof(*reg));
588 break;
589 case x86_seg_gdtr:
590 vmcb->gdtr.base = reg->base;
591 vmcb->gdtr.limit = (uint16_t)reg->limit;
592 break;
593 case x86_seg_idtr:
594 vmcb->idtr.base = reg->base;
595 vmcb->idtr.limit = (uint16_t)reg->limit;
596 break;
597 case x86_seg_ldtr:
598 memcpy(&vmcb->ldtr, reg, sizeof(*reg));
599 break;
600 default:
601 BUG();
602 }
604 if ( sync )
605 svm_vmload(vmcb);
606 }
608 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
609 {
610 v->arch.hvm_svm.vmcb->tsc_offset = offset;
611 }
613 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
614 {
615 char *p;
616 int i;
618 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
619 {
620 p = (char *)(hypercall_page + (i * 32));
621 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
622 *(u32 *)(p + 1) = i;
623 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
624 *(u8 *)(p + 6) = 0x01;
625 *(u8 *)(p + 7) = 0xd9;
626 *(u8 *)(p + 8) = 0xc3; /* ret */
627 }
629 /* Don't support HYPERVISOR_iret at the moment */
630 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
631 }
633 static void svm_ctxt_switch_from(struct vcpu *v)
634 {
635 int cpu = smp_processor_id();
637 svm_fpu_leave(v);
639 svm_save_dr(v);
641 svm_sync_vmcb(v);
642 svm_vmload(root_vmcb[cpu]);
644 #ifdef __x86_64__
645 /* Resume use of ISTs now that the host TR is reinstated. */
646 idt_tables[cpu][TRAP_double_fault].a |= IST_DF << 32;
647 idt_tables[cpu][TRAP_nmi].a |= IST_NMI << 32;
648 idt_tables[cpu][TRAP_machine_check].a |= IST_MCE << 32;
649 #endif
650 }
652 static void svm_ctxt_switch_to(struct vcpu *v)
653 {
654 int cpu = smp_processor_id();
656 #ifdef __x86_64__
657 /*
658 * This is required, because VMRUN does consistency check
659 * and some of the DOM0 selectors are pointing to
660 * invalid GDT locations, and cause AMD processors
661 * to shutdown.
662 */
663 set_segment_register(ds, 0);
664 set_segment_register(es, 0);
665 set_segment_register(ss, 0);
667 /*
668 * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
669 * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
670 */
671 idt_tables[cpu][TRAP_double_fault].a &= ~(7UL << 32);
672 idt_tables[cpu][TRAP_nmi].a &= ~(7UL << 32);
673 idt_tables[cpu][TRAP_machine_check].a &= ~(7UL << 32);
674 #endif
676 svm_restore_dr(v);
678 svm_vmsave(root_vmcb[cpu]);
679 svm_vmload(v->arch.hvm_svm.vmcb);
680 }
682 static void svm_do_resume(struct vcpu *v)
683 {
684 bool_t debug_state = v->domain->debugger_attached;
686 if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
687 {
688 uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3);
689 v->arch.hvm_vcpu.debug_state_latch = debug_state;
690 if ( debug_state )
691 v->arch.hvm_svm.vmcb->exception_intercepts |= mask;
692 else
693 v->arch.hvm_svm.vmcb->exception_intercepts &= ~mask;
694 }
696 if ( v->arch.hvm_svm.launch_core != smp_processor_id() )
697 {
698 v->arch.hvm_svm.launch_core = smp_processor_id();
699 hvm_migrate_timers(v);
701 /* Migrating to another ASID domain. Request a new ASID. */
702 svm_asid_init_vcpu(v);
703 }
705 /* Reflect the vlapic's TPR in the hardware vtpr */
706 v->arch.hvm_svm.vmcb->vintr.fields.tpr =
707 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
709 hvm_do_resume(v);
710 reset_stack_and_jump(svm_asm_do_resume);
711 }
713 static int svm_domain_initialise(struct domain *d)
714 {
715 return 0;
716 }
718 static void svm_domain_destroy(struct domain *d)
719 {
720 }
722 static int svm_vcpu_initialise(struct vcpu *v)
723 {
724 int rc;
726 v->arch.schedule_tail = svm_do_resume;
727 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
728 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
730 v->arch.hvm_svm.launch_core = -1;
732 if ( (rc = svm_create_vmcb(v)) != 0 )
733 {
734 dprintk(XENLOG_WARNING,
735 "Failed to create VMCB for vcpu %d: err=%d.\n",
736 v->vcpu_id, rc);
737 return rc;
738 }
740 return 0;
741 }
743 static void svm_vcpu_destroy(struct vcpu *v)
744 {
745 svm_destroy_vmcb(v);
746 }
748 static void svm_inject_exception(
749 unsigned int trapnr, int errcode, unsigned long cr2)
750 {
751 struct vcpu *curr = current;
752 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
753 eventinj_t event = vmcb->eventinj;
755 switch ( trapnr )
756 {
757 case TRAP_debug:
758 if ( guest_cpu_user_regs()->eflags & X86_EFLAGS_TF )
759 {
760 __restore_debug_registers(curr);
761 vmcb->dr6 |= 0x4000;
762 }
763 case TRAP_int3:
764 if ( curr->domain->debugger_attached )
765 {
766 /* Debug/Int3: Trap to debugger. */
767 domain_pause_for_debugger();
768 return;
769 }
770 }
772 if ( unlikely(event.fields.v) &&
773 (event.fields.type == X86_EVENTTYPE_HW_EXCEPTION) )
774 {
775 trapnr = hvm_combine_hw_exceptions(event.fields.vector, trapnr);
776 if ( trapnr == TRAP_double_fault )
777 errcode = 0;
778 }
780 event.bytes = 0;
781 event.fields.v = 1;
782 event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
783 event.fields.vector = trapnr;
784 event.fields.ev = (errcode != HVM_DELIVER_NO_ERROR_CODE);
785 event.fields.errorcode = errcode;
787 vmcb->eventinj = event;
789 if ( trapnr == TRAP_page_fault )
790 {
791 vmcb->cr2 = curr->arch.hvm_vcpu.guest_cr[2] = cr2;
792 HVMTRACE_LONG_2D(PF_INJECT, errcode, TRC_PAR_LONG(cr2));
793 }
794 else
795 {
796 HVMTRACE_2D(INJ_EXC, trapnr, errcode);
797 }
798 }
800 static int svm_event_pending(struct vcpu *v)
801 {
802 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
803 return vmcb->eventinj.fields.v;
804 }
806 static int svm_do_pmu_interrupt(struct cpu_user_regs *regs)
807 {
808 return 0;
809 }
811 static struct hvm_function_table svm_function_table = {
812 .name = "SVM",
813 .cpu_down = svm_cpu_down,
814 .domain_initialise = svm_domain_initialise,
815 .domain_destroy = svm_domain_destroy,
816 .vcpu_initialise = svm_vcpu_initialise,
817 .vcpu_destroy = svm_vcpu_destroy,
818 .save_cpu_ctxt = svm_save_vmcb_ctxt,
819 .load_cpu_ctxt = svm_load_vmcb_ctxt,
820 .get_interrupt_shadow = svm_get_interrupt_shadow,
821 .set_interrupt_shadow = svm_set_interrupt_shadow,
822 .guest_x86_mode = svm_guest_x86_mode,
823 .get_segment_register = svm_get_segment_register,
824 .set_segment_register = svm_set_segment_register,
825 .update_host_cr3 = svm_update_host_cr3,
826 .update_guest_cr = svm_update_guest_cr,
827 .update_guest_efer = svm_update_guest_efer,
828 .flush_guest_tlbs = svm_flush_guest_tlbs,
829 .set_tsc_offset = svm_set_tsc_offset,
830 .inject_exception = svm_inject_exception,
831 .init_hypercall_page = svm_init_hypercall_page,
832 .event_pending = svm_event_pending,
833 .do_pmu_interrupt = svm_do_pmu_interrupt,
834 .cpuid_intercept = svm_cpuid_intercept,
835 .wbinvd_intercept = svm_wbinvd_intercept,
836 .fpu_dirty_intercept = svm_fpu_dirty_intercept,
837 .msr_read_intercept = svm_msr_read_intercept,
838 .msr_write_intercept = svm_msr_write_intercept,
839 .invlpg_intercept = svm_invlpg_intercept
840 };
842 static int svm_cpu_up(struct cpuinfo_x86 *c)
843 {
844 u32 eax, edx, phys_hsa_lo, phys_hsa_hi;
845 u64 phys_hsa;
846 int cpu = smp_processor_id();
848 /* Check whether SVM feature is disabled in BIOS */
849 rdmsr(MSR_K8_VM_CR, eax, edx);
850 if ( eax & K8_VMCR_SVME_DISABLE )
851 {
852 printk("CPU%d: AMD SVM Extension is disabled in BIOS.\n", cpu);
853 return 0;
854 }
856 if ( ((hsa[cpu] == NULL) &&
857 ((hsa[cpu] = alloc_host_save_area()) == NULL)) ||
858 ((root_vmcb[cpu] == NULL) &&
859 ((root_vmcb[cpu] = alloc_vmcb()) == NULL)) )
860 return 0;
862 write_efer(read_efer() | EFER_SVME);
864 /* Initialize the HSA for this core. */
865 phys_hsa = (u64)virt_to_maddr(hsa[cpu]);
866 phys_hsa_lo = (u32)phys_hsa;
867 phys_hsa_hi = (u32)(phys_hsa >> 32);
868 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
870 /* Initialize core's ASID handling. */
871 svm_asid_init(c);
873 return 1;
874 }
876 void start_svm(struct cpuinfo_x86 *c)
877 {
878 static bool_t bootstrapped;
880 if ( test_and_set_bool(bootstrapped) )
881 {
882 if ( hvm_enabled && !svm_cpu_up(c) )
883 {
884 printk("SVM: FATAL: failed to initialise CPU%d!\n",
885 smp_processor_id());
886 BUG();
887 }
888 return;
889 }
891 /* Xen does not fill x86_capability words except 0. */
892 boot_cpu_data.x86_capability[5] = cpuid_ecx(0x80000001);
894 if ( !test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability) )
895 return;
897 if ( !svm_cpu_up(c) )
898 {
899 printk("SVM: failed to initialise.\n");
900 return;
901 }
903 setup_vmcb_dump();
905 svm_feature_flags = ((cpuid_eax(0x80000000) >= 0x8000000A) ?
906 cpuid_edx(0x8000000A) : 0);
908 svm_function_table.hap_supported = cpu_has_svm_npt;
910 hvm_enable(&svm_function_table);
911 }
913 static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
914 {
915 p2m_type_t p2mt;
916 mfn_t mfn;
917 unsigned long gfn = gpa >> PAGE_SHIFT;
919 /*
920 * If this GFN is emulated MMIO or marked as read-only, pass the fault
921 * to the mmio handler.
922 */
923 mfn = gfn_to_mfn_type_current(gfn, &p2mt, p2m_guest);
924 if ( (p2mt == p2m_mmio_dm) || (p2mt == p2m_ram_ro) )
925 {
926 if ( !handle_mmio() )
927 hvm_inject_exception(TRAP_gp_fault, 0, 0);
928 return;
929 }
931 /* Log-dirty: mark the page dirty and let the guest write it again */
932 paging_mark_dirty(current->domain, mfn_x(mfn));
933 p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
934 }
936 static void svm_fpu_dirty_intercept(void)
937 {
938 struct vcpu *curr = current;
939 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
941 svm_fpu_enter(curr);
943 if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
944 vmcb->cr0 &= ~X86_CR0_TS;
945 }
947 #define bitmaskof(idx) (1U << ((idx) & 31))
948 static void svm_cpuid_intercept(
949 unsigned int *eax, unsigned int *ebx,
950 unsigned int *ecx, unsigned int *edx)
951 {
952 unsigned int input = *eax;
953 struct vcpu *v = current;
955 hvm_cpuid(input, eax, ebx, ecx, edx);
957 if ( input == 0x80000001 )
958 {
959 /* Fix up VLAPIC details. */
960 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
961 __clear_bit(X86_FEATURE_APIC & 31, edx);
962 }
964 HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
965 }
967 static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs)
968 {
969 unsigned int eax, ebx, ecx, edx, inst_len;
971 if ( (inst_len = __get_instruction_length(current, INSTR_CPUID)) == 0 )
972 return;
974 eax = regs->eax;
975 ebx = regs->ebx;
976 ecx = regs->ecx;
977 edx = regs->edx;
979 svm_cpuid_intercept(&eax, &ebx, &ecx, &edx);
981 regs->eax = eax;
982 regs->ebx = ebx;
983 regs->ecx = ecx;
984 regs->edx = edx;
986 __update_guest_eip(regs, inst_len);
987 }
989 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
990 {
991 HVMTRACE_0D(DR_WRITE);
992 __restore_debug_registers(v);
993 }
995 static int svm_msr_read_intercept(struct cpu_user_regs *regs)
996 {
997 u64 msr_content = 0;
998 u32 ecx = regs->ecx, eax, edx;
999 struct vcpu *v = current;
1000 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1002 switch ( ecx )
1004 case MSR_EFER:
1005 msr_content = v->arch.hvm_vcpu.guest_efer;
1006 break;
1008 case MSR_IA32_SYSENTER_CS:
1009 msr_content = v->arch.hvm_svm.guest_sysenter_cs;
1010 break;
1011 case MSR_IA32_SYSENTER_ESP:
1012 msr_content = v->arch.hvm_svm.guest_sysenter_esp;
1013 break;
1014 case MSR_IA32_SYSENTER_EIP:
1015 msr_content = v->arch.hvm_svm.guest_sysenter_eip;
1016 break;
1018 case MSR_IA32_MC4_MISC: /* Threshold register */
1019 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
1020 /*
1021 * MCA/MCE: We report that the threshold register is unavailable
1022 * for OS use (locked by the BIOS).
1023 */
1024 msr_content = 1ULL << 61; /* MC4_MISC.Locked */
1025 break;
1027 case MSR_IA32_EBC_FREQUENCY_ID:
1028 /*
1029 * This Intel-only register may be accessed if this HVM guest
1030 * has been migrated from an Intel host. The value zero is not
1031 * particularly meaningful, but at least avoids the guest crashing!
1032 */
1033 msr_content = 0;
1034 break;
1036 case MSR_K8_VM_HSAVE_PA:
1037 goto gpf;
1039 case MSR_IA32_DEBUGCTLMSR:
1040 msr_content = vmcb->debugctlmsr;
1041 break;
1043 case MSR_IA32_LASTBRANCHFROMIP:
1044 msr_content = vmcb->lastbranchfromip;
1045 break;
1047 case MSR_IA32_LASTBRANCHTOIP:
1048 msr_content = vmcb->lastbranchtoip;
1049 break;
1051 case MSR_IA32_LASTINTFROMIP:
1052 msr_content = vmcb->lastintfromip;
1053 break;
1055 case MSR_IA32_LASTINTTOIP:
1056 msr_content = vmcb->lastinttoip;
1057 break;
1059 default:
1060 if ( rdmsr_viridian_regs(ecx, &eax, &edx) ||
1061 rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
1062 rdmsr_safe(ecx, eax, edx) == 0 )
1064 regs->eax = eax;
1065 regs->edx = edx;
1066 goto done;
1068 goto gpf;
1070 regs->eax = msr_content & 0xFFFFFFFF;
1071 regs->edx = msr_content >> 32;
1073 done:
1074 HVMTRACE_3D (MSR_READ, ecx, regs->eax, regs->edx);
1075 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
1076 ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
1077 return X86EMUL_OKAY;
1079 gpf:
1080 svm_inject_exception(TRAP_gp_fault, 0, 0);
1081 return X86EMUL_EXCEPTION;
1084 static int svm_msr_write_intercept(struct cpu_user_regs *regs)
1086 u64 msr_content = 0;
1087 u32 ecx = regs->ecx;
1088 struct vcpu *v = current;
1089 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1091 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
1093 HVMTRACE_3D (MSR_WRITE, ecx, regs->eax, regs->edx);
1095 switch ( ecx )
1097 case MSR_K8_VM_HSAVE_PA:
1098 goto gpf;
1100 case MSR_IA32_SYSENTER_CS:
1101 v->arch.hvm_svm.guest_sysenter_cs = msr_content;
1102 break;
1103 case MSR_IA32_SYSENTER_ESP:
1104 v->arch.hvm_svm.guest_sysenter_esp = msr_content;
1105 break;
1106 case MSR_IA32_SYSENTER_EIP:
1107 v->arch.hvm_svm.guest_sysenter_eip = msr_content;
1108 break;
1110 case MSR_IA32_DEBUGCTLMSR:
1111 vmcb->debugctlmsr = msr_content;
1112 if ( !msr_content || !cpu_has_svm_lbrv )
1113 break;
1114 vmcb->lbr_control.fields.enable = 1;
1115 svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR);
1116 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP);
1117 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP);
1118 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP);
1119 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP);
1120 break;
1122 case MSR_IA32_LASTBRANCHFROMIP:
1123 vmcb->lastbranchfromip = msr_content;
1124 break;
1126 case MSR_IA32_LASTBRANCHTOIP:
1127 vmcb->lastbranchtoip = msr_content;
1128 break;
1130 case MSR_IA32_LASTINTFROMIP:
1131 vmcb->lastintfromip = msr_content;
1132 break;
1134 case MSR_IA32_LASTINTTOIP:
1135 vmcb->lastinttoip = msr_content;
1136 break;
1138 default:
1139 if ( wrmsr_viridian_regs(ecx, regs->eax, regs->edx) )
1140 break;
1142 switch ( long_mode_do_msr_write(regs) )
1144 case HNDL_unhandled:
1145 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
1146 break;
1147 case HNDL_exception_raised:
1148 return X86EMUL_EXCEPTION;
1149 case HNDL_done:
1150 break;
1152 break;
1155 return X86EMUL_OKAY;
1157 gpf:
1158 svm_inject_exception(TRAP_gp_fault, 0, 0);
1159 return X86EMUL_EXCEPTION;
1162 static void svm_do_msr_access(struct cpu_user_regs *regs)
1164 int rc, inst_len;
1165 struct vcpu *v = current;
1166 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1168 if ( vmcb->exitinfo1 == 0 )
1170 if ( (inst_len = __get_instruction_length(v, INSTR_RDMSR)) == 0 )
1171 return;
1172 rc = hvm_msr_read_intercept(regs);
1174 else
1176 if ( (inst_len = __get_instruction_length(v, INSTR_WRMSR)) == 0 )
1177 return;
1178 rc = hvm_msr_write_intercept(regs);
1181 if ( rc == X86EMUL_OKAY )
1182 __update_guest_eip(regs, inst_len);
1185 static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb,
1186 struct cpu_user_regs *regs)
1188 unsigned int inst_len;
1190 if ( (inst_len = __get_instruction_length(current, INSTR_HLT)) == 0 )
1191 return;
1192 __update_guest_eip(regs, inst_len);
1194 hvm_hlt(regs->eflags);
1197 static void svm_vmexit_do_rdtsc(struct cpu_user_regs *regs)
1199 unsigned int inst_len;
1201 if ( (inst_len = __get_instruction_length(current, INSTR_RDTSC)) == 0 )
1202 return;
1203 __update_guest_eip(regs, inst_len);
1205 hvm_rdtsc_intercept(regs);
1208 static void svm_vmexit_ud_intercept(struct cpu_user_regs *regs)
1210 struct hvm_emulate_ctxt ctxt;
1211 int rc;
1213 hvm_emulate_prepare(&ctxt, regs);
1215 rc = hvm_emulate_one(&ctxt);
1217 switch ( rc )
1219 case X86EMUL_UNHANDLEABLE:
1220 gdprintk(XENLOG_WARNING,
1221 "instruction emulation failed @ %04x:%lx: "
1222 "%02x %02x %02x %02x %02x %02x\n",
1223 hvmemul_get_seg_reg(x86_seg_cs, &ctxt)->sel,
1224 ctxt.insn_buf_eip,
1225 ctxt.insn_buf[0], ctxt.insn_buf[1],
1226 ctxt.insn_buf[2], ctxt.insn_buf[3],
1227 ctxt.insn_buf[4], ctxt.insn_buf[5]);
1228 return;
1229 case X86EMUL_EXCEPTION:
1230 if ( ctxt.exn_pending )
1231 hvm_inject_exception(ctxt.exn_vector, ctxt.exn_error_code, 0);
1232 break;
1233 default:
1234 break;
1237 hvm_emulate_writeback(&ctxt);
1240 static void wbinvd_ipi(void *info)
1242 wbinvd();
1245 static void svm_wbinvd_intercept(void)
1247 if ( has_arch_pdevs(current->domain) )
1248 on_each_cpu(wbinvd_ipi, NULL, 1);
1251 static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs)
1253 enum instruction_index list[] = { INSTR_INVD, INSTR_WBINVD };
1254 int inst_len;
1256 inst_len = __get_instruction_length_from_list(
1257 current, list, ARRAY_SIZE(list));
1258 if ( inst_len == 0 )
1259 return;
1261 svm_wbinvd_intercept();
1263 __update_guest_eip(regs, inst_len);
1266 static void svm_invlpg_intercept(unsigned long vaddr)
1268 struct vcpu *curr = current;
1269 HVMTRACE_LONG_2D(INVLPG, 0, TRC_PAR_LONG(vaddr));
1270 paging_invlpg(curr, vaddr);
1271 svm_asid_g_invlpg(curr, vaddr);
1274 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
1276 unsigned int exit_reason;
1277 struct vcpu *v = current;
1278 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1279 eventinj_t eventinj;
1280 int inst_len, rc;
1282 /*
1283 * Before doing anything else, we need to sync up the VLAPIC's TPR with
1284 * SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
1285 * because we update the vTPR on MMIO writes to the TPR.
1286 */
1287 vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
1288 (vmcb->vintr.fields.tpr & 0x0F) << 4);
1290 exit_reason = vmcb->exitcode;
1292 if ( hvm_long_mode_enabled(v) )
1293 HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
1294 (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
1295 0, 0, 0);
1296 else
1297 HVMTRACE_ND(VMEXIT, 1/*cycles*/, 2, exit_reason,
1298 (uint32_t)regs->eip,
1299 0, 0, 0, 0);
1301 if ( unlikely(exit_reason == VMEXIT_INVALID) )
1303 svm_dump_vmcb(__func__, vmcb);
1304 goto exit_and_crash;
1307 perfc_incra(svmexits, exit_reason);
1309 hvm_maybe_deassert_evtchn_irq();
1311 /* Event delivery caused this intercept? Queue for redelivery. */
1312 eventinj = vmcb->exitintinfo;
1313 if ( unlikely(eventinj.fields.v) &&
1314 hvm_event_needs_reinjection(eventinj.fields.type,
1315 eventinj.fields.vector) )
1316 vmcb->eventinj = eventinj;
1318 switch ( exit_reason )
1320 case VMEXIT_INTR:
1321 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1322 HVMTRACE_0D(INTR);
1323 break;
1325 case VMEXIT_NMI:
1326 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1327 HVMTRACE_0D(NMI);
1328 break;
1330 case VMEXIT_SMI:
1331 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1332 HVMTRACE_0D(SMI);
1333 break;
1335 case VMEXIT_EXCEPTION_DB:
1336 if ( !v->domain->debugger_attached )
1337 goto exit_and_crash;
1338 domain_pause_for_debugger();
1339 break;
1341 case VMEXIT_EXCEPTION_BP:
1342 if ( !v->domain->debugger_attached )
1343 goto exit_and_crash;
1344 /* AMD Vol2, 15.11: INT3, INTO, BOUND intercepts do not update RIP. */
1345 if ( (inst_len = __get_instruction_length(v, INSTR_INT3)) == 0 )
1346 break;
1347 __update_guest_eip(regs, inst_len);
1348 domain_pause_for_debugger();
1349 break;
1351 case VMEXIT_EXCEPTION_NM:
1352 svm_fpu_dirty_intercept();
1353 break;
1355 case VMEXIT_EXCEPTION_PF: {
1356 unsigned long va;
1357 va = vmcb->exitinfo2;
1358 regs->error_code = vmcb->exitinfo1;
1359 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1360 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1361 (unsigned long)regs->eax, (unsigned long)regs->ebx,
1362 (unsigned long)regs->ecx, (unsigned long)regs->edx,
1363 (unsigned long)regs->esi, (unsigned long)regs->edi);
1365 if ( paging_fault(va, regs) )
1367 if ( trace_will_trace_event(TRC_SHADOW) )
1368 break;
1369 if ( hvm_long_mode_enabled(v) )
1370 HVMTRACE_LONG_2D(PF_XEN, regs->error_code, TRC_PAR_LONG(va));
1371 else
1372 HVMTRACE_2D(PF_XEN, regs->error_code, va);
1373 break;
1376 svm_inject_exception(TRAP_page_fault, regs->error_code, va);
1377 break;
1380 case VMEXIT_EXCEPTION_UD:
1381 svm_vmexit_ud_intercept(regs);
1382 break;
1384 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1385 case VMEXIT_EXCEPTION_MC:
1386 HVMTRACE_0D(MCE);
1387 break;
1389 case VMEXIT_VINTR:
1390 vmcb->vintr.fields.irq = 0;
1391 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
1392 break;
1394 case VMEXIT_INVD:
1395 case VMEXIT_WBINVD:
1396 svm_vmexit_do_invalidate_cache(regs);
1397 break;
1399 case VMEXIT_TASK_SWITCH: {
1400 enum hvm_task_switch_reason reason;
1401 int32_t errcode = -1;
1402 if ( (vmcb->exitinfo2 >> 36) & 1 )
1403 reason = TSW_iret;
1404 else if ( (vmcb->exitinfo2 >> 38) & 1 )
1405 reason = TSW_jmp;
1406 else
1407 reason = TSW_call_or_int;
1408 if ( (vmcb->exitinfo2 >> 44) & 1 )
1409 errcode = (uint32_t)vmcb->exitinfo2;
1411 /*
1412 * Some processors set the EXITINTINFO field when the task switch
1413 * is caused by a task gate in the IDT. In this case we will be
1414 * emulating the event injection, so we do not want the processor
1415 * to re-inject the original event!
1416 */
1417 vmcb->eventinj.bytes = 0;
1419 hvm_task_switch((uint16_t)vmcb->exitinfo1, reason, errcode);
1420 break;
1423 case VMEXIT_CPUID:
1424 svm_vmexit_do_cpuid(regs);
1425 break;
1427 case VMEXIT_HLT:
1428 svm_vmexit_do_hlt(vmcb, regs);
1429 break;
1431 case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
1432 case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
1433 case VMEXIT_INVLPG:
1434 case VMEXIT_INVLPGA:
1435 case VMEXIT_IOIO:
1436 if ( !handle_mmio() )
1437 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1438 break;
1440 case VMEXIT_VMMCALL:
1441 if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
1442 break;
1443 HVMTRACE_1D(VMMCALL, regs->eax);
1444 rc = hvm_do_hypercall(regs);
1445 if ( rc != HVM_HCALL_preempted )
1447 __update_guest_eip(regs, inst_len);
1448 if ( rc == HVM_HCALL_invalidate )
1449 send_invalidate_req();
1451 break;
1453 case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
1454 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
1455 svm_dr_access(v, regs);
1456 break;
1458 case VMEXIT_MSR:
1459 svm_do_msr_access(regs);
1460 break;
1462 case VMEXIT_SHUTDOWN:
1463 hvm_triple_fault();
1464 break;
1466 case VMEXIT_RDTSC:
1467 svm_vmexit_do_rdtsc(regs);
1468 break;
1470 case VMEXIT_RDTSCP:
1471 case VMEXIT_MONITOR:
1472 case VMEXIT_MWAIT:
1473 case VMEXIT_VMRUN:
1474 case VMEXIT_VMLOAD:
1475 case VMEXIT_VMSAVE:
1476 case VMEXIT_STGI:
1477 case VMEXIT_CLGI:
1478 case VMEXIT_SKINIT:
1479 svm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
1480 break;
1482 case VMEXIT_NPF:
1483 perfc_incra(svmexits, VMEXIT_NPF_PERFC);
1484 regs->error_code = vmcb->exitinfo1;
1485 svm_do_nested_pgfault(vmcb->exitinfo2, regs);
1486 break;
1488 case VMEXIT_IRET:
1489 /*
1490 * IRET clears the NMI mask. However because we clear the mask
1491 * /before/ executing IRET, we set the interrupt shadow to prevent
1492 * a pending NMI from being injected immediately. This will work
1493 * perfectly unless the IRET instruction faults: in that case we
1494 * may inject an NMI before the NMI handler's IRET instruction is
1495 * retired.
1496 */
1497 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_IRET;
1498 vmcb->interrupt_shadow = 1;
1499 break;
1501 case VMEXIT_PAUSE:
1502 /*
1503 * The guest is running a contended spinlock and we've detected it.
1504 * Do something useful, like reschedule the guest
1505 */
1506 do_sched_op_compat(SCHEDOP_yield, 0);
1507 break;
1509 default:
1510 exit_and_crash:
1511 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
1512 "exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
1513 exit_reason,
1514 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
1515 domain_crash(v->domain);
1516 break;
1519 /* The exit may have updated the TPR: reflect this in the hardware vtpr */
1520 vmcb->vintr.fields.tpr =
1521 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
1524 asmlinkage void svm_trace_vmentry(void)
1526 HVMTRACE_ND (VMENTRY, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
1529 /*
1530 * Local variables:
1531 * mode: C
1532 * c-set-style: "BSD"
1533 * c-basic-offset: 4
1534 * tab-width: 4
1535 * indent-tabs-mode: nil
1536 * End:
1537 */