ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 19648:f0e2df69a8eb

x86 hvm: Allow cross-vendor migration

Intercept #UD and emulate SYSCALL/SYSENTER/SYSEXIT as necessary.

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 15:01:36 2009 +0100 (2009-05-26)
parents ac21e640cc83
children 7dfc0a20fa59
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/hypercall.h>
28 #include <xen/domain_page.h>
29 #include <asm/current.h>
30 #include <asm/io.h>
31 #include <asm/paging.h>
32 #include <asm/p2m.h>
33 #include <asm/regs.h>
34 #include <asm/cpufeature.h>
35 #include <asm/processor.h>
36 #include <asm/types.h>
37 #include <asm/debugreg.h>
38 #include <asm/msr.h>
39 #include <asm/spinlock.h>
40 #include <asm/hvm/emulate.h>
41 #include <asm/hvm/hvm.h>
42 #include <asm/hvm/support.h>
43 #include <asm/hvm/io.h>
44 #include <asm/hvm/svm/asid.h>
45 #include <asm/hvm/svm/svm.h>
46 #include <asm/hvm/svm/vmcb.h>
47 #include <asm/hvm/svm/emulate.h>
48 #include <asm/hvm/svm/intr.h>
49 #include <asm/x86_emulate.h>
50 #include <public/sched.h>
51 #include <asm/hvm/vpt.h>
52 #include <asm/hvm/trace.h>
53 #include <asm/hap.h>
55 u32 svm_feature_flags;
57 #define set_segment_register(name, value) \
58 asm volatile ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
60 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
62 asmlinkage void do_IRQ(struct cpu_user_regs *);
64 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);
65 static void svm_update_guest_efer(struct vcpu *v);
66 static void svm_inject_exception(
67 unsigned int trapnr, int errcode, unsigned long cr2);
68 static void svm_cpuid_intercept(
69 unsigned int *eax, unsigned int *ebx,
70 unsigned int *ecx, unsigned int *edx);
71 static void svm_wbinvd_intercept(void);
72 static void svm_fpu_dirty_intercept(void);
73 static int svm_msr_read_intercept(struct cpu_user_regs *regs);
74 static int svm_msr_write_intercept(struct cpu_user_regs *regs);
75 static void svm_invlpg_intercept(unsigned long vaddr);
77 /* va of hardware host save area */
78 static void *hsa[NR_CPUS] __read_mostly;
80 /* vmcb used for extended host state */
81 static void *root_vmcb[NR_CPUS] __read_mostly;
83 static void inline __update_guest_eip(
84 struct cpu_user_regs *regs, unsigned int inst_len)
85 {
86 struct vcpu *curr = current;
88 if ( unlikely(inst_len == 0) )
89 return;
91 if ( unlikely(inst_len > 15) )
92 {
93 gdprintk(XENLOG_ERR, "Bad instruction length %u\n", inst_len);
94 domain_crash(curr->domain);
95 return;
96 }
98 ASSERT(regs == guest_cpu_user_regs());
100 regs->eip += inst_len;
101 regs->eflags &= ~X86_EFLAGS_RF;
103 curr->arch.hvm_svm.vmcb->interrupt_shadow = 0;
105 if ( regs->eflags & X86_EFLAGS_TF )
106 svm_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);
107 }
109 static void svm_cpu_down(void)
110 {
111 write_efer(read_efer() & ~EFER_SVME);
112 }
114 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
115 {
116 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
117 u32 ecx = regs->ecx;
119 HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64,
120 ecx, msr_content);
122 switch ( ecx )
123 {
124 case MSR_EFER:
125 if ( hvm_set_efer(msr_content) )
126 return HNDL_exception_raised;
127 break;
129 case MSR_IA32_MC4_MISC: /* Threshold register */
130 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
131 /*
132 * MCA/MCE: Threshold register is reported to be locked, so we ignore
133 * all write accesses. This behaviour matches real HW, so guests should
134 * have no problem with this.
135 */
136 break;
138 default:
139 return HNDL_unhandled;
140 }
142 return HNDL_done;
143 }
145 static void svm_save_dr(struct vcpu *v)
146 {
147 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
149 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
150 return;
152 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
153 v->arch.hvm_vcpu.flag_dr_dirty = 0;
154 v->arch.hvm_svm.vmcb->dr_intercepts = ~0u;
156 v->arch.guest_context.debugreg[0] = read_debugreg(0);
157 v->arch.guest_context.debugreg[1] = read_debugreg(1);
158 v->arch.guest_context.debugreg[2] = read_debugreg(2);
159 v->arch.guest_context.debugreg[3] = read_debugreg(3);
160 v->arch.guest_context.debugreg[6] = vmcb->dr6;
161 v->arch.guest_context.debugreg[7] = vmcb->dr7;
162 }
164 static void __restore_debug_registers(struct vcpu *v)
165 {
166 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
168 if ( v->arch.hvm_vcpu.flag_dr_dirty )
169 return;
171 v->arch.hvm_vcpu.flag_dr_dirty = 1;
172 vmcb->dr_intercepts = 0;
174 write_debugreg(0, v->arch.guest_context.debugreg[0]);
175 write_debugreg(1, v->arch.guest_context.debugreg[1]);
176 write_debugreg(2, v->arch.guest_context.debugreg[2]);
177 write_debugreg(3, v->arch.guest_context.debugreg[3]);
178 vmcb->dr6 = v->arch.guest_context.debugreg[6];
179 vmcb->dr7 = v->arch.guest_context.debugreg[7];
180 }
182 /*
183 * DR7 is saved and restored on every vmexit. Other debug registers only
184 * need to be restored if their value is going to affect execution -- i.e.,
185 * if one of the breakpoints is enabled. So mask out all bits that don't
186 * enable some breakpoint functionality.
187 */
188 static void svm_restore_dr(struct vcpu *v)
189 {
190 if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
191 __restore_debug_registers(v);
192 }
194 static int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c)
195 {
196 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
198 c->cr0 = v->arch.hvm_vcpu.guest_cr[0];
199 c->cr2 = v->arch.hvm_vcpu.guest_cr[2];
200 c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
201 c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
203 c->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs;
204 c->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp;
205 c->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip;
207 c->pending_event = 0;
208 c->error_code = 0;
209 if ( vmcb->eventinj.fields.v &&
210 hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
211 vmcb->eventinj.fields.vector) )
212 {
213 c->pending_event = (uint32_t)vmcb->eventinj.bytes;
214 c->error_code = vmcb->eventinj.fields.errorcode;
215 }
217 return 1;
218 }
220 static int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
221 {
222 unsigned long mfn = 0;
223 p2m_type_t p2mt;
224 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
226 if ( c->pending_valid &&
227 ((c->pending_type == 1) || (c->pending_type > 6) ||
228 (c->pending_reserved != 0)) )
229 {
230 gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n",
231 c->pending_event);
232 return -EINVAL;
233 }
235 if ( !paging_mode_hap(v->domain) )
236 {
237 if ( c->cr0 & X86_CR0_PG )
238 {
239 mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
240 if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
241 {
242 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
243 c->cr3);
244 return -EINVAL;
245 }
246 }
248 if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
249 put_page(pagetable_get_page(v->arch.guest_table));
251 v->arch.guest_table = pagetable_from_pfn(mfn);
252 }
254 v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;
255 v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
256 v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
257 v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
258 svm_update_guest_cr(v, 0);
259 svm_update_guest_cr(v, 2);
260 svm_update_guest_cr(v, 4);
262 v->arch.hvm_svm.guest_sysenter_cs = c->sysenter_cs;
263 v->arch.hvm_svm.guest_sysenter_esp = c->sysenter_esp;
264 v->arch.hvm_svm.guest_sysenter_eip = c->sysenter_eip;
266 if ( paging_mode_hap(v->domain) )
267 {
268 vmcb->np_enable = 1;
269 vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
270 vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
271 }
273 if ( c->pending_valid )
274 {
275 gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
276 c->pending_event, c->error_code);
278 if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
279 {
280 vmcb->eventinj.bytes = c->pending_event;
281 vmcb->eventinj.fields.errorcode = c->error_code;
282 }
283 }
285 paging_update_paging_modes(v);
287 return 0;
288 }
291 static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
292 {
293 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
295 data->shadow_gs = vmcb->kerngsbase;
296 data->msr_lstar = vmcb->lstar;
297 data->msr_star = vmcb->star;
298 data->msr_cstar = vmcb->cstar;
299 data->msr_syscall_mask = vmcb->sfmask;
300 data->msr_efer = v->arch.hvm_vcpu.guest_efer;
301 data->msr_flags = -1ULL;
303 data->tsc = hvm_get_guest_tsc(v);
304 }
307 static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
308 {
309 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
311 vmcb->kerngsbase = data->shadow_gs;
312 vmcb->lstar = data->msr_lstar;
313 vmcb->star = data->msr_star;
314 vmcb->cstar = data->msr_cstar;
315 vmcb->sfmask = data->msr_syscall_mask;
316 v->arch.hvm_vcpu.guest_efer = data->msr_efer;
317 svm_update_guest_efer(v);
319 hvm_set_guest_tsc(v, data->tsc);
320 }
322 static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
323 {
324 svm_save_cpu_state(v, ctxt);
325 svm_vmcb_save(v, ctxt);
326 }
328 static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
329 {
330 svm_load_cpu_state(v, ctxt);
331 if (svm_vmcb_restore(v, ctxt)) {
332 printk("svm_vmcb restore failed!\n");
333 domain_crash(v->domain);
334 return -EINVAL;
335 }
337 return 0;
338 }
340 static void svm_fpu_enter(struct vcpu *v)
341 {
342 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
344 setup_fpu(v);
345 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
346 }
348 static void svm_fpu_leave(struct vcpu *v)
349 {
350 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
352 ASSERT(!v->fpu_dirtied);
353 ASSERT(read_cr0() & X86_CR0_TS);
355 /*
356 * If the guest does not have TS enabled then we must cause and handle an
357 * exception on first use of the FPU. If the guest *does* have TS enabled
358 * then this is not necessary: no FPU activity can occur until the guest
359 * clears CR0.TS, and we will initialise the FPU when that happens.
360 */
361 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
362 {
363 v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device;
364 vmcb->cr0 |= X86_CR0_TS;
365 }
366 }
368 static unsigned int svm_get_interrupt_shadow(struct vcpu *v)
369 {
370 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
371 unsigned int intr_shadow = 0;
373 if ( vmcb->interrupt_shadow )
374 intr_shadow |= HVM_INTR_SHADOW_MOV_SS | HVM_INTR_SHADOW_STI;
376 if ( vmcb->general1_intercepts & GENERAL1_INTERCEPT_IRET )
377 intr_shadow |= HVM_INTR_SHADOW_NMI;
379 return intr_shadow;
380 }
382 static void svm_set_interrupt_shadow(struct vcpu *v, unsigned int intr_shadow)
383 {
384 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
386 vmcb->interrupt_shadow =
387 !!(intr_shadow & (HVM_INTR_SHADOW_MOV_SS|HVM_INTR_SHADOW_STI));
389 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_IRET;
390 if ( intr_shadow & HVM_INTR_SHADOW_NMI )
391 vmcb->general1_intercepts |= GENERAL1_INTERCEPT_IRET;
392 }
394 static int svm_guest_x86_mode(struct vcpu *v)
395 {
396 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
398 if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) )
399 return 0;
400 if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) )
401 return 1;
402 if ( hvm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) )
403 return 8;
404 return (likely(vmcb->cs.attr.fields.db) ? 4 : 2);
405 }
407 static void svm_update_host_cr3(struct vcpu *v)
408 {
409 /* SVM doesn't have a HOST_CR3 equivalent to update. */
410 }
412 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
413 {
414 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
416 switch ( cr )
417 {
418 case 0: {
419 unsigned long hw_cr0_mask = 0;
421 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
422 {
423 if ( v != current )
424 hw_cr0_mask |= X86_CR0_TS;
425 else if ( vmcb->cr0 & X86_CR0_TS )
426 svm_fpu_enter(v);
427 }
429 vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask;
430 if ( !paging_mode_hap(v->domain) )
431 vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
432 break;
433 }
434 case 2:
435 vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2];
436 break;
437 case 3:
438 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];
439 svm_asid_inv_asid(v);
440 break;
441 case 4:
442 vmcb->cr4 = HVM_CR4_HOST_MASK;
443 if ( paging_mode_hap(v->domain) )
444 vmcb->cr4 &= ~X86_CR4_PAE;
445 vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4];
446 break;
447 default:
448 BUG();
449 }
450 }
452 static void svm_update_guest_efer(struct vcpu *v)
453 {
454 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
456 vmcb->efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME;
457 if ( vmcb->efer & EFER_LMA )
458 vmcb->efer |= EFER_LME;
459 }
461 static void svm_flush_guest_tlbs(void)
462 {
463 /* Roll over the CPU's ASID generation, so it gets a clean TLB when we
464 * next VMRUN. (If ASIDs are disabled, the whole TLB is flushed on
465 * VMRUN anyway). */
466 svm_asid_inc_generation();
467 }
469 static void svm_sync_vmcb(struct vcpu *v)
470 {
471 struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
473 if ( arch_svm->vmcb_in_sync )
474 return;
476 arch_svm->vmcb_in_sync = 1;
478 svm_vmsave(arch_svm->vmcb);
479 }
481 static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
482 struct segment_register *reg)
483 {
484 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
486 ASSERT((v == current) || !vcpu_runnable(v));
488 switch ( seg )
489 {
490 case x86_seg_cs:
491 memcpy(reg, &vmcb->cs, sizeof(*reg));
492 reg->attr.fields.g = reg->limit > 0xFFFFF;
493 break;
494 case x86_seg_ds:
495 memcpy(reg, &vmcb->ds, sizeof(*reg));
496 if ( reg->attr.fields.type != 0 )
497 reg->attr.fields.type |= 0x1;
498 break;
499 case x86_seg_es:
500 memcpy(reg, &vmcb->es, sizeof(*reg));
501 if ( reg->attr.fields.type != 0 )
502 reg->attr.fields.type |= 0x1;
503 break;
504 case x86_seg_fs:
505 svm_sync_vmcb(v);
506 memcpy(reg, &vmcb->fs, sizeof(*reg));
507 if ( reg->attr.fields.type != 0 )
508 reg->attr.fields.type |= 0x1;
509 break;
510 case x86_seg_gs:
511 svm_sync_vmcb(v);
512 memcpy(reg, &vmcb->gs, sizeof(*reg));
513 if ( reg->attr.fields.type != 0 )
514 reg->attr.fields.type |= 0x1;
515 break;
516 case x86_seg_ss:
517 memcpy(reg, &vmcb->ss, sizeof(*reg));
518 reg->attr.fields.dpl = vmcb->cpl;
519 if ( reg->attr.fields.type == 0 )
520 reg->attr.fields.db = 0;
521 break;
522 case x86_seg_tr:
523 svm_sync_vmcb(v);
524 memcpy(reg, &vmcb->tr, sizeof(*reg));
525 reg->attr.fields.type |= 0x2;
526 break;
527 case x86_seg_gdtr:
528 memcpy(reg, &vmcb->gdtr, sizeof(*reg));
529 break;
530 case x86_seg_idtr:
531 memcpy(reg, &vmcb->idtr, sizeof(*reg));
532 break;
533 case x86_seg_ldtr:
534 svm_sync_vmcb(v);
535 memcpy(reg, &vmcb->ldtr, sizeof(*reg));
536 break;
537 default:
538 BUG();
539 }
540 }
542 static void svm_set_segment_register(struct vcpu *v, enum x86_segment seg,
543 struct segment_register *reg)
544 {
545 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
546 int sync = 0;
548 ASSERT((v == current) || !vcpu_runnable(v));
550 switch ( seg )
551 {
552 case x86_seg_fs:
553 case x86_seg_gs:
554 case x86_seg_tr:
555 case x86_seg_ldtr:
556 sync = (v == current);
557 break;
558 default:
559 break;
560 }
562 if ( sync )
563 svm_sync_vmcb(v);
565 switch ( seg )
566 {
567 case x86_seg_cs:
568 memcpy(&vmcb->cs, reg, sizeof(*reg));
569 break;
570 case x86_seg_ds:
571 memcpy(&vmcb->ds, reg, sizeof(*reg));
572 break;
573 case x86_seg_es:
574 memcpy(&vmcb->es, reg, sizeof(*reg));
575 break;
576 case x86_seg_fs:
577 memcpy(&vmcb->fs, reg, sizeof(*reg));
578 break;
579 case x86_seg_gs:
580 memcpy(&vmcb->gs, reg, sizeof(*reg));
581 break;
582 case x86_seg_ss:
583 memcpy(&vmcb->ss, reg, sizeof(*reg));
584 vmcb->cpl = vmcb->ss.attr.fields.dpl;
585 break;
586 case x86_seg_tr:
587 memcpy(&vmcb->tr, reg, sizeof(*reg));
588 break;
589 case x86_seg_gdtr:
590 vmcb->gdtr.base = reg->base;
591 vmcb->gdtr.limit = (uint16_t)reg->limit;
592 break;
593 case x86_seg_idtr:
594 vmcb->idtr.base = reg->base;
595 vmcb->idtr.limit = (uint16_t)reg->limit;
596 break;
597 case x86_seg_ldtr:
598 memcpy(&vmcb->ldtr, reg, sizeof(*reg));
599 break;
600 default:
601 BUG();
602 }
604 if ( sync )
605 svm_vmload(vmcb);
606 }
608 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
609 {
610 v->arch.hvm_svm.vmcb->tsc_offset = offset;
611 }
613 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
614 {
615 char *p;
616 int i;
618 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
619 {
620 p = (char *)(hypercall_page + (i * 32));
621 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
622 *(u32 *)(p + 1) = i;
623 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
624 *(u8 *)(p + 6) = 0x01;
625 *(u8 *)(p + 7) = 0xd9;
626 *(u8 *)(p + 8) = 0xc3; /* ret */
627 }
629 /* Don't support HYPERVISOR_iret at the moment */
630 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
631 }
633 static void svm_ctxt_switch_from(struct vcpu *v)
634 {
635 int cpu = smp_processor_id();
637 svm_fpu_leave(v);
639 svm_save_dr(v);
641 svm_sync_vmcb(v);
642 svm_vmload(root_vmcb[cpu]);
644 #ifdef __x86_64__
645 /* Resume use of ISTs now that the host TR is reinstated. */
646 idt_tables[cpu][TRAP_double_fault].a |= IST_DF << 32;
647 idt_tables[cpu][TRAP_nmi].a |= IST_NMI << 32;
648 idt_tables[cpu][TRAP_machine_check].a |= IST_MCE << 32;
649 #endif
650 }
652 static void svm_ctxt_switch_to(struct vcpu *v)
653 {
654 int cpu = smp_processor_id();
656 #ifdef __x86_64__
657 /*
658 * This is required, because VMRUN does consistency check
659 * and some of the DOM0 selectors are pointing to
660 * invalid GDT locations, and cause AMD processors
661 * to shutdown.
662 */
663 set_segment_register(ds, 0);
664 set_segment_register(es, 0);
665 set_segment_register(ss, 0);
667 /*
668 * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
669 * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
670 */
671 idt_tables[cpu][TRAP_double_fault].a &= ~(7UL << 32);
672 idt_tables[cpu][TRAP_nmi].a &= ~(7UL << 32);
673 idt_tables[cpu][TRAP_machine_check].a &= ~(7UL << 32);
674 #endif
676 svm_restore_dr(v);
678 svm_vmsave(root_vmcb[cpu]);
679 svm_vmload(v->arch.hvm_svm.vmcb);
680 }
682 static void svm_do_resume(struct vcpu *v)
683 {
684 bool_t debug_state = v->domain->debugger_attached;
686 if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
687 {
688 uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3);
689 v->arch.hvm_vcpu.debug_state_latch = debug_state;
690 if ( debug_state )
691 v->arch.hvm_svm.vmcb->exception_intercepts |= mask;
692 else
693 v->arch.hvm_svm.vmcb->exception_intercepts &= ~mask;
694 }
696 if ( v->arch.hvm_svm.launch_core != smp_processor_id() )
697 {
698 v->arch.hvm_svm.launch_core = smp_processor_id();
699 hvm_migrate_timers(v);
701 /* Migrating to another ASID domain. Request a new ASID. */
702 svm_asid_init_vcpu(v);
703 }
705 /* Reflect the vlapic's TPR in the hardware vtpr */
706 v->arch.hvm_svm.vmcb->vintr.fields.tpr =
707 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
709 hvm_do_resume(v);
710 reset_stack_and_jump(svm_asm_do_resume);
711 }
713 static int svm_domain_initialise(struct domain *d)
714 {
715 return 0;
716 }
718 static void svm_domain_destroy(struct domain *d)
719 {
720 }
722 static int svm_vcpu_initialise(struct vcpu *v)
723 {
724 int rc;
726 v->arch.schedule_tail = svm_do_resume;
727 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
728 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
730 v->arch.hvm_svm.launch_core = -1;
732 if ( (rc = svm_create_vmcb(v)) != 0 )
733 {
734 dprintk(XENLOG_WARNING,
735 "Failed to create VMCB for vcpu %d: err=%d.\n",
736 v->vcpu_id, rc);
737 return rc;
738 }
740 return 0;
741 }
743 static void svm_vcpu_destroy(struct vcpu *v)
744 {
745 svm_destroy_vmcb(v);
746 }
748 static void svm_inject_exception(
749 unsigned int trapnr, int errcode, unsigned long cr2)
750 {
751 struct vcpu *curr = current;
752 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
753 eventinj_t event = vmcb->eventinj;
755 switch ( trapnr )
756 {
757 case TRAP_debug:
758 if ( guest_cpu_user_regs()->eflags & X86_EFLAGS_TF )
759 {
760 __restore_debug_registers(curr);
761 vmcb->dr6 |= 0x4000;
762 }
763 case TRAP_int3:
764 if ( curr->domain->debugger_attached )
765 {
766 /* Debug/Int3: Trap to debugger. */
767 domain_pause_for_debugger();
768 return;
769 }
770 }
772 if ( unlikely(event.fields.v) &&
773 (event.fields.type == X86_EVENTTYPE_HW_EXCEPTION) )
774 {
775 trapnr = hvm_combine_hw_exceptions(event.fields.vector, trapnr);
776 if ( trapnr == TRAP_double_fault )
777 errcode = 0;
778 }
780 event.bytes = 0;
781 event.fields.v = 1;
782 event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
783 event.fields.vector = trapnr;
784 event.fields.ev = (errcode != HVM_DELIVER_NO_ERROR_CODE);
785 event.fields.errorcode = errcode;
787 vmcb->eventinj = event;
789 if ( trapnr == TRAP_page_fault )
790 {
791 vmcb->cr2 = curr->arch.hvm_vcpu.guest_cr[2] = cr2;
792 HVMTRACE_LONG_2D(PF_INJECT, errcode, TRC_PAR_LONG(cr2));
793 }
794 else
795 {
796 HVMTRACE_2D(INJ_EXC, trapnr, errcode);
797 }
798 }
800 static int svm_event_pending(struct vcpu *v)
801 {
802 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
803 return vmcb->eventinj.fields.v;
804 }
806 static int svm_do_pmu_interrupt(struct cpu_user_regs *regs)
807 {
808 return 0;
809 }
811 static struct hvm_function_table svm_function_table = {
812 .name = "SVM",
813 .cpu_down = svm_cpu_down,
814 .domain_initialise = svm_domain_initialise,
815 .domain_destroy = svm_domain_destroy,
816 .vcpu_initialise = svm_vcpu_initialise,
817 .vcpu_destroy = svm_vcpu_destroy,
818 .save_cpu_ctxt = svm_save_vmcb_ctxt,
819 .load_cpu_ctxt = svm_load_vmcb_ctxt,
820 .get_interrupt_shadow = svm_get_interrupt_shadow,
821 .set_interrupt_shadow = svm_set_interrupt_shadow,
822 .guest_x86_mode = svm_guest_x86_mode,
823 .get_segment_register = svm_get_segment_register,
824 .set_segment_register = svm_set_segment_register,
825 .update_host_cr3 = svm_update_host_cr3,
826 .update_guest_cr = svm_update_guest_cr,
827 .update_guest_efer = svm_update_guest_efer,
828 .flush_guest_tlbs = svm_flush_guest_tlbs,
829 .set_tsc_offset = svm_set_tsc_offset,
830 .inject_exception = svm_inject_exception,
831 .init_hypercall_page = svm_init_hypercall_page,
832 .event_pending = svm_event_pending,
833 .do_pmu_interrupt = svm_do_pmu_interrupt,
834 .cpuid_intercept = svm_cpuid_intercept,
835 .wbinvd_intercept = svm_wbinvd_intercept,
836 .fpu_dirty_intercept = svm_fpu_dirty_intercept,
837 .msr_read_intercept = svm_msr_read_intercept,
838 .msr_write_intercept = svm_msr_write_intercept,
839 .invlpg_intercept = svm_invlpg_intercept
840 };
842 int start_svm(struct cpuinfo_x86 *c)
843 {
844 u32 eax, ecx, edx;
845 u32 phys_hsa_lo, phys_hsa_hi;
846 u64 phys_hsa;
847 int cpu = smp_processor_id();
849 /* Xen does not fill x86_capability words except 0. */
850 ecx = cpuid_ecx(0x80000001);
851 boot_cpu_data.x86_capability[5] = ecx;
853 if ( !(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)) )
854 return 0;
856 /* Check whether SVM feature is disabled in BIOS */
857 rdmsr(MSR_K8_VM_CR, eax, edx);
858 if ( eax & K8_VMCR_SVME_DISABLE )
859 {
860 printk("AMD SVM Extension is disabled in BIOS.\n");
861 return 0;
862 }
864 if ( ((hsa[cpu] = alloc_host_save_area()) == NULL) ||
865 ((root_vmcb[cpu] = alloc_vmcb()) == NULL) )
866 return 0;
868 write_efer(read_efer() | EFER_SVME);
870 /* Initialize the HSA for this core. */
871 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
872 phys_hsa_lo = (u32) phys_hsa;
873 phys_hsa_hi = (u32) (phys_hsa >> 32);
874 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
876 /* Initialize core's ASID handling. */
877 svm_asid_init(c);
879 if ( cpu != 0 )
880 return 1;
882 setup_vmcb_dump();
884 svm_feature_flags = ((cpuid_eax(0x80000000) >= 0x8000000A) ?
885 cpuid_edx(0x8000000A) : 0);
887 svm_function_table.hap_supported = cpu_has_svm_npt;
889 hvm_enable(&svm_function_table);
891 return 1;
892 }
894 static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
895 {
896 p2m_type_t p2mt;
897 mfn_t mfn;
898 unsigned long gfn = gpa >> PAGE_SHIFT;
900 /*
901 * If this GFN is emulated MMIO or marked as read-only, pass the fault
902 * to the mmio handler.
903 */
904 mfn = gfn_to_mfn_type_current(gfn, &p2mt, p2m_guest);
905 if ( (p2mt == p2m_mmio_dm) || (p2mt == p2m_ram_ro) )
906 {
907 if ( !handle_mmio() )
908 hvm_inject_exception(TRAP_gp_fault, 0, 0);
909 return;
910 }
912 /* Log-dirty: mark the page dirty and let the guest write it again */
913 paging_mark_dirty(current->domain, mfn_x(mfn));
914 p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
915 }
917 static void svm_fpu_dirty_intercept(void)
918 {
919 struct vcpu *curr = current;
920 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
922 svm_fpu_enter(curr);
924 if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
925 vmcb->cr0 &= ~X86_CR0_TS;
926 }
928 #define bitmaskof(idx) (1U << ((idx) & 31))
929 static void svm_cpuid_intercept(
930 unsigned int *eax, unsigned int *ebx,
931 unsigned int *ecx, unsigned int *edx)
932 {
933 unsigned int input = *eax;
934 struct vcpu *v = current;
936 hvm_cpuid(input, eax, ebx, ecx, edx);
938 if ( input == 0x80000001 )
939 {
940 /* Fix up VLAPIC details. */
941 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
942 __clear_bit(X86_FEATURE_APIC & 31, edx);
943 }
945 HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
946 }
948 static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs)
949 {
950 unsigned int eax, ebx, ecx, edx, inst_len;
952 if ( (inst_len = __get_instruction_length(current, INSTR_CPUID)) == 0 )
953 return;
955 eax = regs->eax;
956 ebx = regs->ebx;
957 ecx = regs->ecx;
958 edx = regs->edx;
960 svm_cpuid_intercept(&eax, &ebx, &ecx, &edx);
962 regs->eax = eax;
963 regs->ebx = ebx;
964 regs->ecx = ecx;
965 regs->edx = edx;
967 __update_guest_eip(regs, inst_len);
968 }
970 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
971 {
972 HVMTRACE_0D(DR_WRITE);
973 __restore_debug_registers(v);
974 }
976 static int svm_msr_read_intercept(struct cpu_user_regs *regs)
977 {
978 u64 msr_content = 0;
979 u32 ecx = regs->ecx, eax, edx;
980 struct vcpu *v = current;
981 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
983 switch ( ecx )
984 {
985 case MSR_EFER:
986 msr_content = v->arch.hvm_vcpu.guest_efer;
987 break;
989 case MSR_IA32_SYSENTER_CS:
990 msr_content = v->arch.hvm_svm.guest_sysenter_cs;
991 break;
992 case MSR_IA32_SYSENTER_ESP:
993 msr_content = v->arch.hvm_svm.guest_sysenter_esp;
994 break;
995 case MSR_IA32_SYSENTER_EIP:
996 msr_content = v->arch.hvm_svm.guest_sysenter_eip;
997 break;
999 case MSR_IA32_MC4_MISC: /* Threshold register */
1000 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
1001 /*
1002 * MCA/MCE: We report that the threshold register is unavailable
1003 * for OS use (locked by the BIOS).
1004 */
1005 msr_content = 1ULL << 61; /* MC4_MISC.Locked */
1006 break;
1008 case MSR_IA32_EBC_FREQUENCY_ID:
1009 /*
1010 * This Intel-only register may be accessed if this HVM guest
1011 * has been migrated from an Intel host. The value zero is not
1012 * particularly meaningful, but at least avoids the guest crashing!
1013 */
1014 msr_content = 0;
1015 break;
1017 case MSR_K8_VM_HSAVE_PA:
1018 goto gpf;
1020 case MSR_IA32_DEBUGCTLMSR:
1021 msr_content = vmcb->debugctlmsr;
1022 break;
1024 case MSR_IA32_LASTBRANCHFROMIP:
1025 msr_content = vmcb->lastbranchfromip;
1026 break;
1028 case MSR_IA32_LASTBRANCHTOIP:
1029 msr_content = vmcb->lastbranchtoip;
1030 break;
1032 case MSR_IA32_LASTINTFROMIP:
1033 msr_content = vmcb->lastintfromip;
1034 break;
1036 case MSR_IA32_LASTINTTOIP:
1037 msr_content = vmcb->lastinttoip;
1038 break;
1040 default:
1041 if ( rdmsr_viridian_regs(ecx, &eax, &edx) ||
1042 rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
1043 rdmsr_safe(ecx, eax, edx) == 0 )
1045 regs->eax = eax;
1046 regs->edx = edx;
1047 goto done;
1049 goto gpf;
1051 regs->eax = msr_content & 0xFFFFFFFF;
1052 regs->edx = msr_content >> 32;
1054 done:
1055 HVMTRACE_3D (MSR_READ, ecx, regs->eax, regs->edx);
1056 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
1057 ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
1058 return X86EMUL_OKAY;
1060 gpf:
1061 svm_inject_exception(TRAP_gp_fault, 0, 0);
1062 return X86EMUL_EXCEPTION;
1065 static int svm_msr_write_intercept(struct cpu_user_regs *regs)
1067 u64 msr_content = 0;
1068 u32 ecx = regs->ecx;
1069 struct vcpu *v = current;
1070 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1072 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
1074 HVMTRACE_3D (MSR_WRITE, ecx, regs->eax, regs->edx);
1076 switch ( ecx )
1078 case MSR_K8_VM_HSAVE_PA:
1079 goto gpf;
1081 case MSR_IA32_SYSENTER_CS:
1082 v->arch.hvm_svm.guest_sysenter_cs = msr_content;
1083 break;
1084 case MSR_IA32_SYSENTER_ESP:
1085 v->arch.hvm_svm.guest_sysenter_esp = msr_content;
1086 break;
1087 case MSR_IA32_SYSENTER_EIP:
1088 v->arch.hvm_svm.guest_sysenter_eip = msr_content;
1089 break;
1091 case MSR_IA32_DEBUGCTLMSR:
1092 vmcb->debugctlmsr = msr_content;
1093 if ( !msr_content || !cpu_has_svm_lbrv )
1094 break;
1095 vmcb->lbr_control.fields.enable = 1;
1096 svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR);
1097 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP);
1098 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP);
1099 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP);
1100 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP);
1101 break;
1103 case MSR_IA32_LASTBRANCHFROMIP:
1104 vmcb->lastbranchfromip = msr_content;
1105 break;
1107 case MSR_IA32_LASTBRANCHTOIP:
1108 vmcb->lastbranchtoip = msr_content;
1109 break;
1111 case MSR_IA32_LASTINTFROMIP:
1112 vmcb->lastintfromip = msr_content;
1113 break;
1115 case MSR_IA32_LASTINTTOIP:
1116 vmcb->lastinttoip = msr_content;
1117 break;
1119 default:
1120 if ( wrmsr_viridian_regs(ecx, regs->eax, regs->edx) )
1121 break;
1123 switch ( long_mode_do_msr_write(regs) )
1125 case HNDL_unhandled:
1126 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
1127 break;
1128 case HNDL_exception_raised:
1129 return X86EMUL_EXCEPTION;
1130 case HNDL_done:
1131 break;
1133 break;
1136 return X86EMUL_OKAY;
1138 gpf:
1139 svm_inject_exception(TRAP_gp_fault, 0, 0);
1140 return X86EMUL_EXCEPTION;
1143 static void svm_do_msr_access(struct cpu_user_regs *regs)
1145 int rc, inst_len;
1146 struct vcpu *v = current;
1147 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1149 if ( vmcb->exitinfo1 == 0 )
1151 if ( (inst_len = __get_instruction_length(v, INSTR_RDMSR)) == 0 )
1152 return;
1153 rc = hvm_msr_read_intercept(regs);
1155 else
1157 if ( (inst_len = __get_instruction_length(v, INSTR_WRMSR)) == 0 )
1158 return;
1159 rc = hvm_msr_write_intercept(regs);
1162 if ( rc == X86EMUL_OKAY )
1163 __update_guest_eip(regs, inst_len);
1166 static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb,
1167 struct cpu_user_regs *regs)
1169 unsigned int inst_len;
1171 if ( (inst_len = __get_instruction_length(current, INSTR_HLT)) == 0 )
1172 return;
1173 __update_guest_eip(regs, inst_len);
1175 hvm_hlt(regs->eflags);
1178 static void svm_vmexit_do_rdtsc(struct cpu_user_regs *regs)
1180 unsigned int inst_len;
1182 if ( (inst_len = __get_instruction_length(current, INSTR_RDTSC)) == 0 )
1183 return;
1184 __update_guest_eip(regs, inst_len);
1186 hvm_rdtsc_intercept(regs);
1189 static void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
1191 printk("Dumping guest's current registers at %s...\n", from);
1192 printk("Size of regs = 0x%lx, address = %p\n",
1193 sizeof(struct cpu_user_regs), regs);
1195 printk("r15 = 0x%016"PRIx64", r14 = 0x%016"PRIx64"\n",
1196 regs->r15, regs->r14);
1197 printk("r13 = 0x%016"PRIx64", r12 = 0x%016"PRIx64"\n",
1198 regs->r13, regs->r12);
1199 printk("rbp = 0x%016"PRIx64", rbx = 0x%016"PRIx64"\n",
1200 regs->rbp, regs->rbx);
1201 printk("r11 = 0x%016"PRIx64", r10 = 0x%016"PRIx64"\n",
1202 regs->r11, regs->r10);
1203 printk("r9 = 0x%016"PRIx64", r8 = 0x%016"PRIx64"\n",
1204 regs->r9, regs->r8);
1205 printk("rax = 0x%016"PRIx64", rcx = 0x%016"PRIx64"\n",
1206 regs->rax, regs->rcx);
1207 printk("rdx = 0x%016"PRIx64", rsi = 0x%016"PRIx64"\n",
1208 regs->rdx, regs->rsi);
1209 printk("rdi = 0x%016"PRIx64", rsp = 0x%016"PRIx64"\n",
1210 regs->rdi, regs->rsp);
1211 printk("error code = 0x%08"PRIx32", entry_vector = 0x%08"PRIx32"\n",
1212 regs->error_code, regs->entry_vector);
1213 printk("rip = 0x%016"PRIx64", rflags = 0x%016"PRIx64"\n",
1214 regs->rip, regs->rflags);
1217 static void svm_vmexit_ud_intercept(struct cpu_user_regs *regs)
1219 struct hvm_emulate_ctxt ctxt;
1220 int rc;
1222 hvm_emulate_prepare(&ctxt, regs);
1224 rc = hvm_emulate_one(&ctxt);
1226 switch ( rc )
1228 case X86EMUL_UNHANDLEABLE:
1229 gdprintk(XENLOG_WARNING,
1230 "instruction emulation failed @ %04x:%lx: "
1231 "%02x %02x %02x %02x %02x %02x\n",
1232 hvmemul_get_seg_reg(x86_seg_cs, &ctxt)->sel,
1233 ctxt.insn_buf_eip,
1234 ctxt.insn_buf[0], ctxt.insn_buf[1],
1235 ctxt.insn_buf[2], ctxt.insn_buf[3],
1236 ctxt.insn_buf[4], ctxt.insn_buf[5]);
1237 return;
1238 case X86EMUL_EXCEPTION:
1239 if ( ctxt.exn_pending )
1240 hvm_inject_exception(ctxt.exn_vector, ctxt.exn_error_code, 0);
1241 break;
1242 default:
1243 break;
1246 hvm_emulate_writeback(&ctxt);
1249 static void wbinvd_ipi(void *info)
1251 wbinvd();
1254 static void svm_wbinvd_intercept(void)
1256 if ( has_arch_pdevs(current->domain) )
1257 on_each_cpu(wbinvd_ipi, NULL, 1, 1);
1260 static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs)
1262 enum instruction_index list[] = { INSTR_INVD, INSTR_WBINVD };
1263 int inst_len;
1265 inst_len = __get_instruction_length_from_list(
1266 current, list, ARRAY_SIZE(list));
1267 if ( inst_len == 0 )
1268 return;
1270 svm_wbinvd_intercept();
1272 __update_guest_eip(regs, inst_len);
1275 static void svm_invlpg_intercept(unsigned long vaddr)
1277 struct vcpu *curr = current;
1278 HVMTRACE_LONG_2D(INVLPG, 0, TRC_PAR_LONG(vaddr));
1279 paging_invlpg(curr, vaddr);
1280 svm_asid_g_invlpg(curr, vaddr);
1283 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
1285 unsigned int exit_reason;
1286 struct vcpu *v = current;
1287 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1288 eventinj_t eventinj;
1289 int inst_len, rc;
1291 /*
1292 * Before doing anything else, we need to sync up the VLAPIC's TPR with
1293 * SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
1294 * because we update the vTPR on MMIO writes to the TPR.
1295 */
1296 vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
1297 (vmcb->vintr.fields.tpr & 0x0F) << 4);
1299 exit_reason = vmcb->exitcode;
1301 if ( hvm_long_mode_enabled(v) )
1302 HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
1303 (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
1304 0, 0, 0);
1305 else
1306 HVMTRACE_ND(VMEXIT, 1/*cycles*/, 2, exit_reason,
1307 (uint32_t)regs->eip,
1308 0, 0, 0, 0);
1310 if ( unlikely(exit_reason == VMEXIT_INVALID) )
1312 svm_dump_vmcb(__func__, vmcb);
1313 svm_dump_regs(__func__, regs);
1314 goto exit_and_crash;
1317 perfc_incra(svmexits, exit_reason);
1319 hvm_maybe_deassert_evtchn_irq();
1321 /* Event delivery caused this intercept? Queue for redelivery. */
1322 eventinj = vmcb->exitintinfo;
1323 if ( unlikely(eventinj.fields.v) &&
1324 hvm_event_needs_reinjection(eventinj.fields.type,
1325 eventinj.fields.vector) )
1326 vmcb->eventinj = eventinj;
1328 switch ( exit_reason )
1330 case VMEXIT_INTR:
1331 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1332 HVMTRACE_0D(INTR);
1333 break;
1335 case VMEXIT_NMI:
1336 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1337 HVMTRACE_0D(NMI);
1338 break;
1340 case VMEXIT_SMI:
1341 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1342 HVMTRACE_0D(SMI);
1343 break;
1345 case VMEXIT_EXCEPTION_DB:
1346 if ( !v->domain->debugger_attached )
1347 goto exit_and_crash;
1348 domain_pause_for_debugger();
1349 break;
1351 case VMEXIT_EXCEPTION_BP:
1352 if ( !v->domain->debugger_attached )
1353 goto exit_and_crash;
1354 /* AMD Vol2, 15.11: INT3, INTO, BOUND intercepts do not update RIP. */
1355 if ( (inst_len = __get_instruction_length(v, INSTR_INT3)) == 0 )
1356 break;
1357 __update_guest_eip(regs, inst_len);
1358 domain_pause_for_debugger();
1359 break;
1361 case VMEXIT_EXCEPTION_NM:
1362 svm_fpu_dirty_intercept();
1363 break;
1365 case VMEXIT_EXCEPTION_PF: {
1366 unsigned long va;
1367 va = vmcb->exitinfo2;
1368 regs->error_code = vmcb->exitinfo1;
1369 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1370 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1371 (unsigned long)regs->eax, (unsigned long)regs->ebx,
1372 (unsigned long)regs->ecx, (unsigned long)regs->edx,
1373 (unsigned long)regs->esi, (unsigned long)regs->edi);
1375 if ( paging_fault(va, regs) )
1377 if ( trace_will_trace_event(TRC_SHADOW) )
1378 break;
1379 if ( hvm_long_mode_enabled(v) )
1380 HVMTRACE_LONG_2D(PF_XEN, regs->error_code, TRC_PAR_LONG(va));
1381 else
1382 HVMTRACE_2D(PF_XEN, regs->error_code, va);
1383 break;
1386 svm_inject_exception(TRAP_page_fault, regs->error_code, va);
1387 break;
1390 case VMEXIT_EXCEPTION_UD:
1391 svm_vmexit_ud_intercept(regs);
1392 break;
1394 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1395 case VMEXIT_EXCEPTION_MC:
1396 HVMTRACE_0D(MCE);
1397 break;
1399 case VMEXIT_VINTR:
1400 vmcb->vintr.fields.irq = 0;
1401 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
1402 break;
1404 case VMEXIT_INVD:
1405 case VMEXIT_WBINVD:
1406 svm_vmexit_do_invalidate_cache(regs);
1407 break;
1409 case VMEXIT_TASK_SWITCH: {
1410 enum hvm_task_switch_reason reason;
1411 int32_t errcode = -1;
1412 if ( (vmcb->exitinfo2 >> 36) & 1 )
1413 reason = TSW_iret;
1414 else if ( (vmcb->exitinfo2 >> 38) & 1 )
1415 reason = TSW_jmp;
1416 else
1417 reason = TSW_call_or_int;
1418 if ( (vmcb->exitinfo2 >> 44) & 1 )
1419 errcode = (uint32_t)vmcb->exitinfo2;
1421 /*
1422 * Some processors set the EXITINTINFO field when the task switch
1423 * is caused by a task gate in the IDT. In this case we will be
1424 * emulating the event injection, so we do not want the processor
1425 * to re-inject the original event!
1426 */
1427 vmcb->eventinj.bytes = 0;
1429 hvm_task_switch((uint16_t)vmcb->exitinfo1, reason, errcode);
1430 break;
1433 case VMEXIT_CPUID:
1434 svm_vmexit_do_cpuid(regs);
1435 break;
1437 case VMEXIT_HLT:
1438 svm_vmexit_do_hlt(vmcb, regs);
1439 break;
1441 case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
1442 case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
1443 case VMEXIT_INVLPG:
1444 case VMEXIT_INVLPGA:
1445 case VMEXIT_IOIO:
1446 if ( !handle_mmio() )
1447 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1448 break;
1450 case VMEXIT_VMMCALL:
1451 if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
1452 break;
1453 HVMTRACE_1D(VMMCALL, regs->eax);
1454 rc = hvm_do_hypercall(regs);
1455 if ( rc != HVM_HCALL_preempted )
1457 __update_guest_eip(regs, inst_len);
1458 if ( rc == HVM_HCALL_invalidate )
1459 send_invalidate_req();
1461 break;
1463 case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
1464 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
1465 svm_dr_access(v, regs);
1466 break;
1468 case VMEXIT_MSR:
1469 svm_do_msr_access(regs);
1470 break;
1472 case VMEXIT_SHUTDOWN:
1473 hvm_triple_fault();
1474 break;
1476 case VMEXIT_RDTSC:
1477 svm_vmexit_do_rdtsc(regs);
1478 break;
1480 case VMEXIT_RDTSCP:
1481 case VMEXIT_MONITOR:
1482 case VMEXIT_MWAIT:
1483 case VMEXIT_VMRUN:
1484 case VMEXIT_VMLOAD:
1485 case VMEXIT_VMSAVE:
1486 case VMEXIT_STGI:
1487 case VMEXIT_CLGI:
1488 case VMEXIT_SKINIT:
1489 svm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
1490 break;
1492 case VMEXIT_NPF:
1493 perfc_incra(svmexits, VMEXIT_NPF_PERFC);
1494 regs->error_code = vmcb->exitinfo1;
1495 svm_do_nested_pgfault(vmcb->exitinfo2, regs);
1496 break;
1498 case VMEXIT_IRET:
1499 /*
1500 * IRET clears the NMI mask. However because we clear the mask
1501 * /before/ executing IRET, we set the interrupt shadow to prevent
1502 * a pending NMI from being injected immediately. This will work
1503 * perfectly unless the IRET instruction faults: in that case we
1504 * may inject an NMI before the NMI handler's IRET instruction is
1505 * retired.
1506 */
1507 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_IRET;
1508 vmcb->interrupt_shadow = 1;
1509 break;
1511 default:
1512 exit_and_crash:
1513 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
1514 "exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
1515 exit_reason,
1516 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
1517 domain_crash(v->domain);
1518 break;
1521 /* The exit may have updated the TPR: reflect this in the hardware vtpr */
1522 vmcb->vintr.fields.tpr =
1523 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
1526 asmlinkage void svm_trace_vmentry(void)
1528 HVMTRACE_ND (VMENTRY, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
1531 /*
1532 * Local variables:
1533 * mode: C
1534 * c-set-style: "BSD"
1535 * c-basic-offset: 4
1536 * tab-width: 4
1537 * indent-tabs-mode: nil
1538 * End:
1539 */