ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 16989:92734271810a

vmx realmode: Emulate protected-mode transition while CS and SS have
bad selector values (bottom two bits non-zero).

Allows opensuse 10.3 install CD to boot. Unfortunately SUSE Linux 10.1
install CD still fails to work...

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Feb 05 15:45:10 2008 +0000 (2008-02-05)
parents e4edc310e949
children 199f81c4b882
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/hypercall.h>
28 #include <xen/domain_page.h>
29 #include <asm/current.h>
30 #include <asm/io.h>
31 #include <asm/paging.h>
32 #include <asm/p2m.h>
33 #include <asm/regs.h>
34 #include <asm/cpufeature.h>
35 #include <asm/processor.h>
36 #include <asm/types.h>
37 #include <asm/debugreg.h>
38 #include <asm/msr.h>
39 #include <asm/spinlock.h>
40 #include <asm/hvm/hvm.h>
41 #include <asm/hvm/support.h>
42 #include <asm/hvm/io.h>
43 #include <asm/hvm/svm/asid.h>
44 #include <asm/hvm/svm/svm.h>
45 #include <asm/hvm/svm/vmcb.h>
46 #include <asm/hvm/svm/emulate.h>
47 #include <asm/hvm/svm/intr.h>
48 #include <asm/x86_emulate.h>
49 #include <public/sched.h>
50 #include <asm/hvm/vpt.h>
51 #include <asm/hvm/trace.h>
52 #include <asm/hap.h>
54 u32 svm_feature_flags;
56 #define set_segment_register(name, value) \
57 asm volatile ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
59 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
61 int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
62 int inst_len);
63 asmlinkage void do_IRQ(struct cpu_user_regs *);
65 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);
66 static void svm_update_guest_efer(struct vcpu *v);
67 static void svm_inject_exception(
68 unsigned int trapnr, int errcode, unsigned long cr2);
70 /* va of hardware host save area */
71 static void *hsa[NR_CPUS] __read_mostly;
73 /* vmcb used for extended host state */
74 static void *root_vmcb[NR_CPUS] __read_mostly;
76 static void inline __update_guest_eip(
77 struct cpu_user_regs *regs, unsigned int inst_len)
78 {
79 struct vcpu *curr = current;
81 if ( unlikely((inst_len == 0) || (inst_len > 15)) )
82 {
83 gdprintk(XENLOG_ERR, "Bad instruction length %u\n", inst_len);
84 domain_crash(curr->domain);
85 return;
86 }
88 ASSERT(regs == guest_cpu_user_regs());
90 regs->eip += inst_len;
91 regs->eflags &= ~X86_EFLAGS_RF;
93 curr->arch.hvm_svm.vmcb->interrupt_shadow = 0;
95 if ( regs->eflags & X86_EFLAGS_TF )
96 svm_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);
97 }
99 static void svm_cpu_down(void)
100 {
101 write_efer(read_efer() & ~EFER_SVME);
102 }
104 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
105 {
106 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
107 u32 ecx = regs->ecx;
109 HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64,
110 ecx, msr_content);
112 switch ( ecx )
113 {
114 case MSR_EFER:
115 if ( !hvm_set_efer(msr_content) )
116 return HNDL_exception_raised;
117 break;
119 case MSR_IA32_MC4_MISC: /* Threshold register */
120 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
121 /*
122 * MCA/MCE: Threshold register is reported to be locked, so we ignore
123 * all write accesses. This behaviour matches real HW, so guests should
124 * have no problem with this.
125 */
126 break;
128 default:
129 return HNDL_unhandled;
130 }
132 return HNDL_done;
133 }
135 static void svm_save_dr(struct vcpu *v)
136 {
137 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
139 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
140 return;
142 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
143 v->arch.hvm_vcpu.flag_dr_dirty = 0;
144 v->arch.hvm_svm.vmcb->dr_intercepts = ~0u;
146 v->arch.guest_context.debugreg[0] = read_debugreg(0);
147 v->arch.guest_context.debugreg[1] = read_debugreg(1);
148 v->arch.guest_context.debugreg[2] = read_debugreg(2);
149 v->arch.guest_context.debugreg[3] = read_debugreg(3);
150 v->arch.guest_context.debugreg[6] = vmcb->dr6;
151 v->arch.guest_context.debugreg[7] = vmcb->dr7;
152 }
154 static void __restore_debug_registers(struct vcpu *v)
155 {
156 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
158 if ( v->arch.hvm_vcpu.flag_dr_dirty )
159 return;
161 v->arch.hvm_vcpu.flag_dr_dirty = 1;
162 vmcb->dr_intercepts = 0;
164 write_debugreg(0, v->arch.guest_context.debugreg[0]);
165 write_debugreg(1, v->arch.guest_context.debugreg[1]);
166 write_debugreg(2, v->arch.guest_context.debugreg[2]);
167 write_debugreg(3, v->arch.guest_context.debugreg[3]);
168 vmcb->dr6 = v->arch.guest_context.debugreg[6];
169 vmcb->dr7 = v->arch.guest_context.debugreg[7];
170 }
172 /*
173 * DR7 is saved and restored on every vmexit. Other debug registers only
174 * need to be restored if their value is going to affect execution -- i.e.,
175 * if one of the breakpoints is enabled. So mask out all bits that don't
176 * enable some breakpoint functionality.
177 */
178 static void svm_restore_dr(struct vcpu *v)
179 {
180 if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
181 __restore_debug_registers(v);
182 }
184 int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c)
185 {
186 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
188 c->cr0 = v->arch.hvm_vcpu.guest_cr[0];
189 c->cr2 = v->arch.hvm_vcpu.guest_cr[2];
190 c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
191 c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
193 c->idtr_limit = vmcb->idtr.limit;
194 c->idtr_base = vmcb->idtr.base;
196 c->gdtr_limit = vmcb->gdtr.limit;
197 c->gdtr_base = vmcb->gdtr.base;
199 c->cs_sel = vmcb->cs.sel;
200 c->cs_limit = vmcb->cs.limit;
201 c->cs_base = vmcb->cs.base;
202 c->cs_arbytes = vmcb->cs.attr.bytes;
204 c->ds_sel = vmcb->ds.sel;
205 c->ds_limit = vmcb->ds.limit;
206 c->ds_base = vmcb->ds.base;
207 c->ds_arbytes = vmcb->ds.attr.bytes;
209 c->es_sel = vmcb->es.sel;
210 c->es_limit = vmcb->es.limit;
211 c->es_base = vmcb->es.base;
212 c->es_arbytes = vmcb->es.attr.bytes;
214 c->ss_sel = vmcb->ss.sel;
215 c->ss_limit = vmcb->ss.limit;
216 c->ss_base = vmcb->ss.base;
217 c->ss_arbytes = vmcb->ss.attr.bytes;
219 c->fs_sel = vmcb->fs.sel;
220 c->fs_limit = vmcb->fs.limit;
221 c->fs_base = vmcb->fs.base;
222 c->fs_arbytes = vmcb->fs.attr.bytes;
224 c->gs_sel = vmcb->gs.sel;
225 c->gs_limit = vmcb->gs.limit;
226 c->gs_base = vmcb->gs.base;
227 c->gs_arbytes = vmcb->gs.attr.bytes;
229 c->tr_sel = vmcb->tr.sel;
230 c->tr_limit = vmcb->tr.limit;
231 c->tr_base = vmcb->tr.base;
232 c->tr_arbytes = vmcb->tr.attr.bytes;
234 c->ldtr_sel = vmcb->ldtr.sel;
235 c->ldtr_limit = vmcb->ldtr.limit;
236 c->ldtr_base = vmcb->ldtr.base;
237 c->ldtr_arbytes = vmcb->ldtr.attr.bytes;
239 c->sysenter_cs = vmcb->sysenter_cs;
240 c->sysenter_esp = vmcb->sysenter_esp;
241 c->sysenter_eip = vmcb->sysenter_eip;
243 c->pending_event = 0;
244 c->error_code = 0;
245 if ( vmcb->eventinj.fields.v &&
246 hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
247 vmcb->eventinj.fields.vector) )
248 {
249 c->pending_event = (uint32_t)vmcb->eventinj.bytes;
250 c->error_code = vmcb->eventinj.fields.errorcode;
251 }
253 return 1;
254 }
257 int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
258 {
259 unsigned long mfn = 0;
260 p2m_type_t p2mt;
261 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
263 if ( c->pending_valid &&
264 ((c->pending_type == 1) || (c->pending_type > 6) ||
265 (c->pending_reserved != 0)) )
266 {
267 gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n",
268 c->pending_event);
269 return -EINVAL;
270 }
272 if ( !paging_mode_hap(v->domain) )
273 {
274 if ( c->cr0 & X86_CR0_PG )
275 {
276 mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
277 if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
278 {
279 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
280 c->cr3);
281 return -EINVAL;
282 }
283 }
285 if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
286 put_page(pagetable_get_page(v->arch.guest_table));
288 v->arch.guest_table = pagetable_from_pfn(mfn);
289 }
291 v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;
292 v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
293 v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
294 v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
295 svm_update_guest_cr(v, 0);
296 svm_update_guest_cr(v, 2);
297 svm_update_guest_cr(v, 4);
299 #ifdef HVM_DEBUG_SUSPEND
300 printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
301 __func__, c->cr3, c->cr0, c->cr4);
302 #endif
304 vmcb->idtr.limit = c->idtr_limit;
305 vmcb->idtr.base = c->idtr_base;
307 vmcb->gdtr.limit = c->gdtr_limit;
308 vmcb->gdtr.base = c->gdtr_base;
310 vmcb->cs.sel = c->cs_sel;
311 vmcb->cs.limit = c->cs_limit;
312 vmcb->cs.base = c->cs_base;
313 vmcb->cs.attr.bytes = c->cs_arbytes;
315 vmcb->ds.sel = c->ds_sel;
316 vmcb->ds.limit = c->ds_limit;
317 vmcb->ds.base = c->ds_base;
318 vmcb->ds.attr.bytes = c->ds_arbytes;
320 vmcb->es.sel = c->es_sel;
321 vmcb->es.limit = c->es_limit;
322 vmcb->es.base = c->es_base;
323 vmcb->es.attr.bytes = c->es_arbytes;
325 vmcb->ss.sel = c->ss_sel;
326 vmcb->ss.limit = c->ss_limit;
327 vmcb->ss.base = c->ss_base;
328 vmcb->ss.attr.bytes = c->ss_arbytes;
329 vmcb->cpl = vmcb->ss.attr.fields.dpl;
331 vmcb->fs.sel = c->fs_sel;
332 vmcb->fs.limit = c->fs_limit;
333 vmcb->fs.base = c->fs_base;
334 vmcb->fs.attr.bytes = c->fs_arbytes;
336 vmcb->gs.sel = c->gs_sel;
337 vmcb->gs.limit = c->gs_limit;
338 vmcb->gs.base = c->gs_base;
339 vmcb->gs.attr.bytes = c->gs_arbytes;
341 vmcb->tr.sel = c->tr_sel;
342 vmcb->tr.limit = c->tr_limit;
343 vmcb->tr.base = c->tr_base;
344 vmcb->tr.attr.bytes = c->tr_arbytes;
346 vmcb->ldtr.sel = c->ldtr_sel;
347 vmcb->ldtr.limit = c->ldtr_limit;
348 vmcb->ldtr.base = c->ldtr_base;
349 vmcb->ldtr.attr.bytes = c->ldtr_arbytes;
351 vmcb->sysenter_cs = c->sysenter_cs;
352 vmcb->sysenter_esp = c->sysenter_esp;
353 vmcb->sysenter_eip = c->sysenter_eip;
355 if ( paging_mode_hap(v->domain) )
356 {
357 vmcb->np_enable = 1;
358 vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
359 vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
360 }
362 if ( c->pending_valid )
363 {
364 gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
365 c->pending_event, c->error_code);
367 if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
368 {
369 vmcb->eventinj.bytes = c->pending_event;
370 vmcb->eventinj.fields.errorcode = c->error_code;
371 }
372 }
374 paging_update_paging_modes(v);
376 return 0;
377 }
380 static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
381 {
382 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
384 data->shadow_gs = vmcb->kerngsbase;
385 data->msr_lstar = vmcb->lstar;
386 data->msr_star = vmcb->star;
387 data->msr_cstar = vmcb->cstar;
388 data->msr_syscall_mask = vmcb->sfmask;
389 data->msr_efer = v->arch.hvm_vcpu.guest_efer;
390 data->msr_flags = -1ULL;
392 data->tsc = hvm_get_guest_time(v);
393 }
396 static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
397 {
398 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
400 vmcb->kerngsbase = data->shadow_gs;
401 vmcb->lstar = data->msr_lstar;
402 vmcb->star = data->msr_star;
403 vmcb->cstar = data->msr_cstar;
404 vmcb->sfmask = data->msr_syscall_mask;
405 v->arch.hvm_vcpu.guest_efer = data->msr_efer;
406 svm_update_guest_efer(v);
408 hvm_set_guest_time(v, data->tsc);
409 }
411 static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
412 {
413 svm_save_cpu_state(v, ctxt);
414 svm_vmcb_save(v, ctxt);
415 }
417 static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
418 {
419 svm_load_cpu_state(v, ctxt);
420 if (svm_vmcb_restore(v, ctxt)) {
421 printk("svm_vmcb restore failed!\n");
422 domain_crash(v->domain);
423 return -EINVAL;
424 }
426 return 0;
427 }
429 static void svm_fpu_enter(struct vcpu *v)
430 {
431 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
433 setup_fpu(v);
434 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
435 }
437 static void svm_fpu_leave(struct vcpu *v)
438 {
439 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
441 ASSERT(!v->fpu_dirtied);
442 ASSERT(read_cr0() & X86_CR0_TS);
444 /*
445 * If the guest does not have TS enabled then we must cause and handle an
446 * exception on first use of the FPU. If the guest *does* have TS enabled
447 * then this is not necessary: no FPU activity can occur until the guest
448 * clears CR0.TS, and we will initialise the FPU when that happens.
449 */
450 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
451 {
452 v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device;
453 vmcb->cr0 |= X86_CR0_TS;
454 }
455 }
457 static enum hvm_intblk svm_interrupt_blocked(
458 struct vcpu *v, struct hvm_intack intack)
459 {
460 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
462 if ( vmcb->interrupt_shadow )
463 return hvm_intblk_shadow;
465 if ( intack.source == hvm_intsrc_nmi )
466 return hvm_intblk_none;
468 ASSERT((intack.source == hvm_intsrc_pic) ||
469 (intack.source == hvm_intsrc_lapic));
471 if ( !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) )
472 return hvm_intblk_rflags_ie;
474 return hvm_intblk_none;
475 }
477 static int svm_guest_x86_mode(struct vcpu *v)
478 {
479 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
481 if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) )
482 return 0;
483 if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) )
484 return 1;
485 if ( hvm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) )
486 return 8;
487 return (likely(vmcb->cs.attr.fields.db) ? 4 : 2);
488 }
490 static void svm_update_host_cr3(struct vcpu *v)
491 {
492 /* SVM doesn't have a HOST_CR3 equivalent to update. */
493 }
495 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
496 {
497 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
499 switch ( cr )
500 {
501 case 0: {
502 unsigned long hw_cr0_mask = 0;
504 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
505 {
506 if ( v != current )
507 hw_cr0_mask |= X86_CR0_TS;
508 else if ( vmcb->cr0 & X86_CR0_TS )
509 svm_fpu_enter(v);
510 }
512 vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask;
513 if ( !paging_mode_hap(v->domain) )
514 vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
515 break;
516 }
517 case 2:
518 vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2];
519 break;
520 case 3:
521 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];
522 svm_asid_inv_asid(v);
523 break;
524 case 4:
525 vmcb->cr4 = HVM_CR4_HOST_MASK;
526 if ( paging_mode_hap(v->domain) )
527 vmcb->cr4 &= ~X86_CR4_PAE;
528 vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4];
529 break;
530 default:
531 BUG();
532 }
533 }
535 static void svm_update_guest_efer(struct vcpu *v)
536 {
537 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
539 vmcb->efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME;
540 if ( vmcb->efer & EFER_LMA )
541 vmcb->efer |= EFER_LME;
542 }
544 static void svm_flush_guest_tlbs(void)
545 {
546 /* Roll over the CPU's ASID generation, so it gets a clean TLB when we
547 * next VMRUN. (If ASIDs are disabled, the whole TLB is flushed on
548 * VMRUN anyway). */
549 svm_asid_inc_generation();
550 }
552 static void svm_sync_vmcb(struct vcpu *v)
553 {
554 struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
556 if ( arch_svm->vmcb_in_sync )
557 return;
559 arch_svm->vmcb_in_sync = 1;
561 svm_vmsave(arch_svm->vmcb);
562 }
564 static unsigned long svm_get_segment_base(struct vcpu *v, enum x86_segment seg)
565 {
566 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
567 int long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
569 switch ( seg )
570 {
571 case x86_seg_cs: return long_mode ? 0 : vmcb->cs.base;
572 case x86_seg_ds: return long_mode ? 0 : vmcb->ds.base;
573 case x86_seg_es: return long_mode ? 0 : vmcb->es.base;
574 case x86_seg_fs: svm_sync_vmcb(v); return vmcb->fs.base;
575 case x86_seg_gs: svm_sync_vmcb(v); return vmcb->gs.base;
576 case x86_seg_ss: return long_mode ? 0 : vmcb->ss.base;
577 case x86_seg_tr: svm_sync_vmcb(v); return vmcb->tr.base;
578 case x86_seg_gdtr: return vmcb->gdtr.base;
579 case x86_seg_idtr: return vmcb->idtr.base;
580 case x86_seg_ldtr: svm_sync_vmcb(v); return vmcb->ldtr.base;
581 default: BUG();
582 }
583 return 0;
584 }
586 static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
587 struct segment_register *reg)
588 {
589 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
591 ASSERT(v == current);
593 switch ( seg )
594 {
595 case x86_seg_cs:
596 memcpy(reg, &vmcb->cs, sizeof(*reg));
597 break;
598 case x86_seg_ds:
599 memcpy(reg, &vmcb->ds, sizeof(*reg));
600 break;
601 case x86_seg_es:
602 memcpy(reg, &vmcb->es, sizeof(*reg));
603 break;
604 case x86_seg_fs:
605 svm_sync_vmcb(v);
606 memcpy(reg, &vmcb->fs, sizeof(*reg));
607 break;
608 case x86_seg_gs:
609 svm_sync_vmcb(v);
610 memcpy(reg, &vmcb->gs, sizeof(*reg));
611 break;
612 case x86_seg_ss:
613 memcpy(reg, &vmcb->ss, sizeof(*reg));
614 break;
615 case x86_seg_tr:
616 svm_sync_vmcb(v);
617 memcpy(reg, &vmcb->tr, sizeof(*reg));
618 break;
619 case x86_seg_gdtr:
620 memcpy(reg, &vmcb->gdtr, sizeof(*reg));
621 break;
622 case x86_seg_idtr:
623 memcpy(reg, &vmcb->idtr, sizeof(*reg));
624 break;
625 case x86_seg_ldtr:
626 svm_sync_vmcb(v);
627 memcpy(reg, &vmcb->ldtr, sizeof(*reg));
628 break;
629 default:
630 BUG();
631 }
632 }
634 static void svm_set_segment_register(struct vcpu *v, enum x86_segment seg,
635 struct segment_register *reg)
636 {
637 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
638 int sync = 0;
640 ASSERT((v == current) || !vcpu_runnable(v));
642 switch ( seg )
643 {
644 case x86_seg_fs:
645 case x86_seg_gs:
646 case x86_seg_tr:
647 case x86_seg_ldtr:
648 sync = (v == current);
649 break;
650 default:
651 break;
652 }
654 if ( sync )
655 svm_sync_vmcb(v);
657 switch ( seg )
658 {
659 case x86_seg_cs:
660 memcpy(&vmcb->cs, reg, sizeof(*reg));
661 break;
662 case x86_seg_ds:
663 memcpy(&vmcb->ds, reg, sizeof(*reg));
664 break;
665 case x86_seg_es:
666 memcpy(&vmcb->es, reg, sizeof(*reg));
667 break;
668 case x86_seg_fs:
669 memcpy(&vmcb->fs, reg, sizeof(*reg));
670 break;
671 case x86_seg_gs:
672 memcpy(&vmcb->gs, reg, sizeof(*reg));
673 break;
674 case x86_seg_ss:
675 memcpy(&vmcb->ss, reg, sizeof(*reg));
676 vmcb->cpl = vmcb->ss.attr.fields.dpl;
677 break;
678 case x86_seg_tr:
679 memcpy(&vmcb->tr, reg, sizeof(*reg));
680 break;
681 case x86_seg_gdtr:
682 memcpy(&vmcb->gdtr, reg, sizeof(*reg));
683 break;
684 case x86_seg_idtr:
685 memcpy(&vmcb->idtr, reg, sizeof(*reg));
686 break;
687 case x86_seg_ldtr:
688 memcpy(&vmcb->ldtr, reg, sizeof(*reg));
689 break;
690 default:
691 BUG();
692 }
694 if ( sync )
695 svm_vmload(vmcb);
696 }
698 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
699 {
700 v->arch.hvm_svm.vmcb->tsc_offset = offset;
701 }
703 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
704 {
705 char *p;
706 int i;
708 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
709 {
710 p = (char *)(hypercall_page + (i * 32));
711 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
712 *(u32 *)(p + 1) = i;
713 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
714 *(u8 *)(p + 6) = 0x01;
715 *(u8 *)(p + 7) = 0xd9;
716 *(u8 *)(p + 8) = 0xc3; /* ret */
717 }
719 /* Don't support HYPERVISOR_iret at the moment */
720 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
721 }
723 static void svm_ctxt_switch_from(struct vcpu *v)
724 {
725 int cpu = smp_processor_id();
727 svm_fpu_leave(v);
729 svm_save_dr(v);
731 svm_sync_vmcb(v);
732 svm_vmload(root_vmcb[cpu]);
734 #ifdef __x86_64__
735 /* Resume use of ISTs now that the host TR is reinstated. */
736 idt_tables[cpu][TRAP_double_fault].a |= IST_DF << 32;
737 idt_tables[cpu][TRAP_nmi].a |= IST_NMI << 32;
738 idt_tables[cpu][TRAP_machine_check].a |= IST_MCE << 32;
739 #endif
740 }
742 static void svm_ctxt_switch_to(struct vcpu *v)
743 {
744 int cpu = smp_processor_id();
746 #ifdef __x86_64__
747 /*
748 * This is required, because VMRUN does consistency check
749 * and some of the DOM0 selectors are pointing to
750 * invalid GDT locations, and cause AMD processors
751 * to shutdown.
752 */
753 set_segment_register(ds, 0);
754 set_segment_register(es, 0);
755 set_segment_register(ss, 0);
757 /*
758 * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
759 * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
760 */
761 idt_tables[cpu][TRAP_double_fault].a &= ~(7UL << 32);
762 idt_tables[cpu][TRAP_nmi].a &= ~(7UL << 32);
763 idt_tables[cpu][TRAP_machine_check].a &= ~(7UL << 32);
764 #endif
766 svm_restore_dr(v);
768 svm_vmsave(root_vmcb[cpu]);
769 svm_vmload(v->arch.hvm_svm.vmcb);
770 }
772 static void svm_do_resume(struct vcpu *v)
773 {
774 bool_t debug_state = v->domain->debugger_attached;
776 if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
777 {
778 uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3);
779 v->arch.hvm_vcpu.debug_state_latch = debug_state;
780 if ( debug_state )
781 v->arch.hvm_svm.vmcb->exception_intercepts |= mask;
782 else
783 v->arch.hvm_svm.vmcb->exception_intercepts &= ~mask;
784 }
786 if ( v->arch.hvm_svm.launch_core != smp_processor_id() )
787 {
788 v->arch.hvm_svm.launch_core = smp_processor_id();
789 hvm_migrate_timers(v);
791 /* Migrating to another ASID domain. Request a new ASID. */
792 svm_asid_init_vcpu(v);
793 }
795 /* Reflect the vlapic's TPR in the hardware vtpr */
796 v->arch.hvm_svm.vmcb->vintr.fields.tpr =
797 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
799 hvm_do_resume(v);
800 reset_stack_and_jump(svm_asm_do_resume);
801 }
803 static int svm_domain_initialise(struct domain *d)
804 {
805 return 0;
806 }
808 static void svm_domain_destroy(struct domain *d)
809 {
810 }
812 static int svm_vcpu_initialise(struct vcpu *v)
813 {
814 int rc;
816 v->arch.schedule_tail = svm_do_resume;
817 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
818 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
820 v->arch.hvm_svm.launch_core = -1;
822 if ( (rc = svm_create_vmcb(v)) != 0 )
823 {
824 dprintk(XENLOG_WARNING,
825 "Failed to create VMCB for vcpu %d: err=%d.\n",
826 v->vcpu_id, rc);
827 return rc;
828 }
830 return 0;
831 }
833 static void svm_vcpu_destroy(struct vcpu *v)
834 {
835 svm_destroy_vmcb(v);
836 }
838 static void svm_inject_exception(
839 unsigned int trapnr, int errcode, unsigned long cr2)
840 {
841 struct vcpu *curr = current;
842 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
843 eventinj_t event;
845 event.bytes = 0;
846 event.fields.v = 1;
847 event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
848 event.fields.vector = trapnr;
849 event.fields.ev = (errcode != HVM_DELIVER_NO_ERROR_CODE);
850 event.fields.errorcode = errcode;
852 vmcb->eventinj = event;
854 if ( trapnr == TRAP_page_fault )
855 {
856 vmcb->cr2 = curr->arch.hvm_vcpu.guest_cr[2] = cr2;
857 HVMTRACE_2D(PF_INJECT, curr, curr->arch.hvm_vcpu.guest_cr[2], errcode);
858 }
859 else
860 {
861 HVMTRACE_2D(INJ_EXC, curr, trapnr, errcode);
862 }
864 if ( (trapnr == TRAP_debug) &&
865 (guest_cpu_user_regs()->eflags & X86_EFLAGS_TF) )
866 {
867 __restore_debug_registers(curr);
868 vmcb->dr6 |= 0x4000;
869 }
870 }
872 static int svm_event_pending(struct vcpu *v)
873 {
874 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
875 return vmcb->eventinj.fields.v;
876 }
878 static int svm_do_pmu_interrupt(struct cpu_user_regs *regs)
879 {
880 return 0;
881 }
883 static struct hvm_function_table svm_function_table = {
884 .name = "SVM",
885 .cpu_down = svm_cpu_down,
886 .domain_initialise = svm_domain_initialise,
887 .domain_destroy = svm_domain_destroy,
888 .vcpu_initialise = svm_vcpu_initialise,
889 .vcpu_destroy = svm_vcpu_destroy,
890 .save_cpu_ctxt = svm_save_vmcb_ctxt,
891 .load_cpu_ctxt = svm_load_vmcb_ctxt,
892 .interrupt_blocked = svm_interrupt_blocked,
893 .guest_x86_mode = svm_guest_x86_mode,
894 .get_segment_base = svm_get_segment_base,
895 .get_segment_register = svm_get_segment_register,
896 .set_segment_register = svm_set_segment_register,
897 .update_host_cr3 = svm_update_host_cr3,
898 .update_guest_cr = svm_update_guest_cr,
899 .update_guest_efer = svm_update_guest_efer,
900 .flush_guest_tlbs = svm_flush_guest_tlbs,
901 .set_tsc_offset = svm_set_tsc_offset,
902 .inject_exception = svm_inject_exception,
903 .init_hypercall_page = svm_init_hypercall_page,
904 .event_pending = svm_event_pending,
905 .do_pmu_interrupt = svm_do_pmu_interrupt
906 };
908 int start_svm(struct cpuinfo_x86 *c)
909 {
910 u32 eax, ecx, edx;
911 u32 phys_hsa_lo, phys_hsa_hi;
912 u64 phys_hsa;
913 int cpu = smp_processor_id();
915 /* Xen does not fill x86_capability words except 0. */
916 ecx = cpuid_ecx(0x80000001);
917 boot_cpu_data.x86_capability[5] = ecx;
919 if ( !(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)) )
920 return 0;
922 /* Check whether SVM feature is disabled in BIOS */
923 rdmsr(MSR_K8_VM_CR, eax, edx);
924 if ( eax & K8_VMCR_SVME_DISABLE )
925 {
926 printk("AMD SVM Extension is disabled in BIOS.\n");
927 return 0;
928 }
930 if ( ((hsa[cpu] = alloc_host_save_area()) == NULL) ||
931 ((root_vmcb[cpu] = alloc_vmcb()) == NULL) )
932 return 0;
934 write_efer(read_efer() | EFER_SVME);
936 /* Initialize the HSA for this core. */
937 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
938 phys_hsa_lo = (u32) phys_hsa;
939 phys_hsa_hi = (u32) (phys_hsa >> 32);
940 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
942 /* Initialize core's ASID handling. */
943 svm_asid_init(c);
945 if ( cpu != 0 )
946 return 1;
948 setup_vmcb_dump();
950 svm_feature_flags = ((cpuid_eax(0x80000000) >= 0x8000000A) ?
951 cpuid_edx(0x8000000A) : 0);
953 svm_function_table.hap_supported = cpu_has_svm_npt;
955 hvm_enable(&svm_function_table);
957 return 1;
958 }
960 static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
961 {
962 p2m_type_t p2mt;
963 mfn_t mfn;
964 unsigned long gfn = gpa >> PAGE_SHIFT;
966 /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
967 mfn = gfn_to_mfn_current(gfn, &p2mt);
968 if ( p2mt == p2m_mmio_dm )
969 {
970 handle_mmio(gpa);
971 return;
972 }
974 /* Log-dirty: mark the page dirty and let the guest write it again */
975 paging_mark_dirty(current->domain, mfn_x(mfn));
976 p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
977 }
979 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
980 {
981 struct vcpu *curr = current;
983 svm_fpu_enter(curr);
985 if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
986 vmcb->cr0 &= ~X86_CR0_TS;
987 }
989 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
990 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
991 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
992 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
994 #define bitmaskof(idx) (1U << ((idx) & 31))
995 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb,
996 struct cpu_user_regs *regs)
997 {
998 unsigned long input = regs->eax;
999 unsigned int eax, ebx, ecx, edx;
1000 struct vcpu *v = current;
1001 int inst_len;
1003 hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
1005 switch ( input )
1007 case 0x00000001:
1008 /* Clear out reserved bits. */
1009 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
1010 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
1012 /* Guest should only see one logical processor.
1013 * See details on page 23 of AMD CPUID Specification.
1014 */
1015 __clear_bit(X86_FEATURE_HT & 31, &edx);
1016 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
1017 ebx |= 0x00010000; /* set to 1 just for precaution */
1018 break;
1020 case 0x80000001:
1021 /* Filter features which are shared with 0x00000001:EDX. */
1022 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
1023 __clear_bit(X86_FEATURE_APIC & 31, &edx);
1024 #if CONFIG_PAGING_LEVELS >= 3
1025 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
1026 #endif
1027 __clear_bit(X86_FEATURE_PAE & 31, &edx);
1028 __clear_bit(X86_FEATURE_PSE36 & 31, &edx);
1030 /* Filter all other features according to a whitelist. */
1031 ecx &= (bitmaskof(X86_FEATURE_LAHF_LM) |
1032 bitmaskof(X86_FEATURE_ALTMOVCR) |
1033 bitmaskof(X86_FEATURE_ABM) |
1034 bitmaskof(X86_FEATURE_SSE4A) |
1035 bitmaskof(X86_FEATURE_MISALIGNSSE) |
1036 bitmaskof(X86_FEATURE_3DNOWPF));
1037 edx &= (0x0183f3ff | /* features shared with 0x00000001:EDX */
1038 bitmaskof(X86_FEATURE_NX) |
1039 bitmaskof(X86_FEATURE_LM) |
1040 bitmaskof(X86_FEATURE_SYSCALL) |
1041 bitmaskof(X86_FEATURE_MP) |
1042 bitmaskof(X86_FEATURE_MMXEXT) |
1043 bitmaskof(X86_FEATURE_FFXSR));
1044 break;
1046 case 0x80000007:
1047 case 0x8000000A:
1048 /* Mask out features of power management and SVM extension. */
1049 eax = ebx = ecx = edx = 0;
1050 break;
1052 case 0x80000008:
1053 /* Make sure Number of CPU core is 1 when HTT=0 */
1054 ecx &= 0xFFFFFF00;
1055 break;
1058 regs->eax = eax;
1059 regs->ebx = ebx;
1060 regs->ecx = ecx;
1061 regs->edx = edx;
1063 HVMTRACE_3D(CPUID, v, input,
1064 ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
1066 inst_len = __get_instruction_length(v, INSTR_CPUID, NULL);
1067 __update_guest_eip(regs, inst_len);
1070 static unsigned long *get_reg_p(
1071 unsigned int gpreg,
1072 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1074 unsigned long *reg_p = NULL;
1075 switch (gpreg)
1077 case SVM_REG_EAX:
1078 reg_p = (unsigned long *)&regs->eax;
1079 break;
1080 case SVM_REG_EBX:
1081 reg_p = (unsigned long *)&regs->ebx;
1082 break;
1083 case SVM_REG_ECX:
1084 reg_p = (unsigned long *)&regs->ecx;
1085 break;
1086 case SVM_REG_EDX:
1087 reg_p = (unsigned long *)&regs->edx;
1088 break;
1089 case SVM_REG_EDI:
1090 reg_p = (unsigned long *)&regs->edi;
1091 break;
1092 case SVM_REG_ESI:
1093 reg_p = (unsigned long *)&regs->esi;
1094 break;
1095 case SVM_REG_EBP:
1096 reg_p = (unsigned long *)&regs->ebp;
1097 break;
1098 case SVM_REG_ESP:
1099 reg_p = (unsigned long *)&regs->esp;
1100 break;
1101 #ifdef __x86_64__
1102 case SVM_REG_R8:
1103 reg_p = (unsigned long *)&regs->r8;
1104 break;
1105 case SVM_REG_R9:
1106 reg_p = (unsigned long *)&regs->r9;
1107 break;
1108 case SVM_REG_R10:
1109 reg_p = (unsigned long *)&regs->r10;
1110 break;
1111 case SVM_REG_R11:
1112 reg_p = (unsigned long *)&regs->r11;
1113 break;
1114 case SVM_REG_R12:
1115 reg_p = (unsigned long *)&regs->r12;
1116 break;
1117 case SVM_REG_R13:
1118 reg_p = (unsigned long *)&regs->r13;
1119 break;
1120 case SVM_REG_R14:
1121 reg_p = (unsigned long *)&regs->r14;
1122 break;
1123 case SVM_REG_R15:
1124 reg_p = (unsigned long *)&regs->r15;
1125 break;
1126 #endif
1127 default:
1128 BUG();
1131 return reg_p;
1135 static unsigned long get_reg(
1136 unsigned int gpreg, struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1138 unsigned long *gp;
1139 gp = get_reg_p(gpreg, regs, vmcb);
1140 return *gp;
1144 static void set_reg(
1145 unsigned int gpreg, unsigned long value,
1146 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1148 unsigned long *gp;
1149 gp = get_reg_p(gpreg, regs, vmcb);
1150 *gp = value;
1154 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
1156 HVMTRACE_0D(DR_WRITE, v);
1157 __restore_debug_registers(v);
1161 static void svm_get_prefix_info(struct vcpu *v, unsigned int dir,
1162 svm_segment_register_t **seg,
1163 unsigned int *asize)
1165 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1166 unsigned char inst[MAX_INST_LEN];
1167 int i;
1169 memset(inst, 0, MAX_INST_LEN);
1170 if (inst_copy_from_guest(inst, svm_rip2pointer(v), sizeof(inst))
1171 != MAX_INST_LEN)
1173 gdprintk(XENLOG_ERR, "get guest instruction failed\n");
1174 domain_crash(current->domain);
1175 return;
1178 for (i = 0; i < MAX_INST_LEN; i++)
1180 switch (inst[i])
1182 case 0xf3: /* REPZ */
1183 case 0xf2: /* REPNZ */
1184 case 0xf0: /* LOCK */
1185 case 0x66: /* data32 */
1186 #ifdef __x86_64__
1187 /* REX prefixes */
1188 case 0x40:
1189 case 0x41:
1190 case 0x42:
1191 case 0x43:
1192 case 0x44:
1193 case 0x45:
1194 case 0x46:
1195 case 0x47:
1197 case 0x48:
1198 case 0x49:
1199 case 0x4a:
1200 case 0x4b:
1201 case 0x4c:
1202 case 0x4d:
1203 case 0x4e:
1204 case 0x4f:
1205 #endif
1206 continue;
1207 case 0x67: /* addr32 */
1208 *asize ^= 48; /* Switch 16/32 bits */
1209 continue;
1210 case 0x2e: /* CS */
1211 *seg = &vmcb->cs;
1212 continue;
1213 case 0x36: /* SS */
1214 *seg = &vmcb->ss;
1215 continue;
1216 case 0x26: /* ES */
1217 *seg = &vmcb->es;
1218 continue;
1219 case 0x64: /* FS */
1220 svm_sync_vmcb(v);
1221 *seg = &vmcb->fs;
1222 continue;
1223 case 0x65: /* GS */
1224 svm_sync_vmcb(v);
1225 *seg = &vmcb->gs;
1226 continue;
1227 case 0x3e: /* DS */
1228 *seg = &vmcb->ds;
1229 continue;
1230 default:
1231 break;
1233 return;
1238 /* Get the address of INS/OUTS instruction */
1239 static int svm_get_io_address(
1240 struct vcpu *v, struct cpu_user_regs *regs,
1241 unsigned int size, ioio_info_t info,
1242 unsigned long *count, unsigned long *addr)
1244 unsigned long reg;
1245 unsigned int asize, isize;
1246 int long_mode = 0;
1247 svm_segment_register_t *seg = NULL;
1248 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1250 /* If we're in long mode, don't check the segment presence & limit */
1251 long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
1253 /* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit.
1254 * l field combined with EFER_LMA says whether it's 16 or 64 bit.
1255 */
1256 asize = (long_mode)?64:((vmcb->cs.attr.fields.db)?32:16);
1259 /* The ins/outs instructions are single byte, so if we have got more
1260 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1261 * to figure out what it is...
1262 */
1263 isize = vmcb->exitinfo2 - regs->eip;
1265 if (info.fields.rep)
1266 isize --;
1268 if (isize > 1)
1269 svm_get_prefix_info(v, info.fields.type, &seg, &asize);
1271 if (info.fields.type == IOREQ_WRITE)
1273 reg = regs->esi;
1274 if (!seg) /* If no prefix, used DS. */
1275 seg = &vmcb->ds;
1276 if (!long_mode && (seg->attr.fields.type & 0xa) == 0x8) {
1277 svm_inject_exception(TRAP_gp_fault, 0, 0);
1278 return 0;
1281 else
1283 reg = regs->edi;
1284 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1285 if (!long_mode && (seg->attr.fields.type & 0xa) != 0x2) {
1286 svm_inject_exception(TRAP_gp_fault, 0, 0);
1287 return 0;
1291 /* If the segment isn't present, give GP fault! */
1292 if (!long_mode && !seg->attr.fields.p)
1294 svm_inject_exception(TRAP_gp_fault, 0, 0);
1295 return 0;
1298 if (asize == 16)
1300 *addr = (reg & 0xFFFF);
1301 *count = regs->ecx & 0xffff;
1303 else
1305 *addr = reg;
1306 *count = regs->ecx;
1308 if (!info.fields.rep)
1309 *count = 1;
1311 if (!long_mode)
1313 ASSERT(*addr == (u32)*addr);
1314 if ((u32)(*addr + size - 1) < (u32)*addr ||
1315 (seg->attr.fields.type & 0xc) != 0x4 ?
1316 *addr + size - 1 > seg->limit :
1317 *addr <= seg->limit)
1319 svm_inject_exception(TRAP_gp_fault, 0, 0);
1320 return 0;
1323 /* Check the limit for repeated instructions, as above we checked only
1324 the first instance. Truncate the count if a limit violation would
1325 occur. Note that the checking is not necessary for page granular
1326 segments as transfers crossing page boundaries will be broken up
1327 anyway. */
1328 if (!seg->attr.fields.g && *count > 1)
1330 if ((seg->attr.fields.type & 0xc) != 0x4)
1332 /* expand-up */
1333 if (!(regs->eflags & EF_DF))
1335 if (*addr + *count * size - 1 < *addr ||
1336 *addr + *count * size - 1 > seg->limit)
1337 *count = (seg->limit + 1UL - *addr) / size;
1339 else
1341 if (*count - 1 > *addr / size)
1342 *count = *addr / size + 1;
1345 else
1347 /* expand-down */
1348 if (!(regs->eflags & EF_DF))
1350 if (*count - 1 > -(s32)*addr / size)
1351 *count = -(s32)*addr / size + 1UL;
1353 else
1355 if (*addr < (*count - 1) * size ||
1356 *addr - (*count - 1) * size <= seg->limit)
1357 *count = (*addr - seg->limit - 1) / size + 1;
1360 ASSERT(*count);
1363 *addr += seg->base;
1365 #ifdef __x86_64__
1366 else
1368 if (seg == &vmcb->fs || seg == &vmcb->gs)
1369 *addr += seg->base;
1371 if (!is_canonical_address(*addr) ||
1372 !is_canonical_address(*addr + size - 1))
1374 svm_inject_exception(TRAP_gp_fault, 0, 0);
1375 return 0;
1377 if (*count > (1UL << 48) / size)
1378 *count = (1UL << 48) / size;
1379 if (!(regs->eflags & EF_DF))
1381 if (*addr + *count * size - 1 < *addr ||
1382 !is_canonical_address(*addr + *count * size - 1))
1383 *count = (*addr & ~((1UL << 48) - 1)) / size;
1385 else
1387 if ((*count - 1) * size > *addr ||
1388 !is_canonical_address(*addr + (*count - 1) * size))
1389 *count = (*addr & ~((1UL << 48) - 1)) / size + 1;
1391 ASSERT(*count);
1393 #endif
1395 return 1;
1399 static void svm_io_instruction(struct vcpu *v)
1401 struct cpu_user_regs *regs;
1402 struct hvm_io_op *pio_opp;
1403 unsigned int port;
1404 unsigned int size, dir, df;
1405 ioio_info_t info;
1406 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1408 pio_opp = &current->arch.hvm_vcpu.io_op;
1409 pio_opp->instr = INSTR_PIO;
1410 pio_opp->flags = 0;
1412 regs = &pio_opp->io_context;
1414 /* Copy current guest state into io instruction state structure. */
1415 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1417 info.bytes = vmcb->exitinfo1;
1419 port = info.fields.port; /* port used to be addr */
1420 dir = info.fields.type; /* direction */
1421 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1423 if (info.fields.sz32)
1424 size = 4;
1425 else if (info.fields.sz16)
1426 size = 2;
1427 else
1428 size = 1;
1430 if (dir==IOREQ_READ)
1431 HVMTRACE_2D(IO_READ, v, port, size);
1432 else
1433 HVMTRACE_3D(IO_WRITE, v, port, size, regs->eax);
1435 HVM_DBG_LOG(DBG_LEVEL_IO,
1436 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1437 "exit_qualification = %"PRIx64,
1438 port, vmcb->cs.sel, (uint64_t)regs->eip, info.bytes);
1440 /* string instruction */
1441 if (info.fields.str)
1443 unsigned long addr, count;
1444 paddr_t paddr;
1445 unsigned long gfn;
1446 uint32_t pfec;
1447 int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
1449 if (!svm_get_io_address(v, regs, size, info, &count, &addr))
1451 /* We failed to get a valid address, so don't do the IO operation -
1452 * it would just get worse if we do! Hopefully the guest is handing
1453 * gp-faults...
1454 */
1455 return;
1458 /* "rep" prefix */
1459 if (info.fields.rep)
1461 pio_opp->flags |= REPZ;
1464 /* Translate the address to a physical address */
1465 pfec = PFEC_page_present;
1466 if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
1467 pfec |= PFEC_write_access;
1468 if ( vmcb->cpl == 3 )
1469 pfec |= PFEC_user_mode;
1470 gfn = paging_gva_to_gfn(v, addr, &pfec);
1471 if ( gfn == INVALID_GFN )
1473 /* The guest does not have the RAM address mapped.
1474 * Need to send in a page fault */
1475 svm_inject_exception(TRAP_page_fault, pfec, addr);
1476 return;
1478 paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
1480 /*
1481 * Handle string pio instructions that cross pages or that
1482 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1483 */
1484 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1486 unsigned long value = 0;
1488 pio_opp->flags |= OVERLAP;
1489 pio_opp->addr = addr;
1491 if (dir == IOREQ_WRITE) /* OUTS */
1493 if ( hvm_paging_enabled(current) )
1495 int rv = hvm_copy_from_guest_virt(&value, addr, size);
1496 if ( rv == HVMCOPY_bad_gva_to_gfn )
1497 return; /* exception already injected */
1499 else
1500 (void)hvm_copy_from_guest_phys(&value, addr, size);
1502 else /* dir != IOREQ_WRITE */
1503 /* Remember where to write the result, as a *VA*.
1504 * Must be a VA so we can handle the page overlap
1505 * correctly in hvm_pio_assist() */
1506 pio_opp->addr = addr;
1508 if (count == 1)
1509 regs->eip = vmcb->exitinfo2;
1511 send_pio_req(port, 1, size, value, dir, df, 0);
1513 else
1515 unsigned long last_addr = sign > 0 ? addr + count * size - 1
1516 : addr - (count - 1) * size;
1518 if ((addr & PAGE_MASK) != (last_addr & PAGE_MASK))
1520 if (sign > 0)
1521 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1522 else
1523 count = (addr & ~PAGE_MASK) / size + 1;
1525 else
1526 regs->eip = vmcb->exitinfo2;
1528 send_pio_req(port, count, size, paddr, dir, df, 1);
1531 else
1533 /*
1534 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1535 * ExitInfo2
1536 */
1537 regs->eip = vmcb->exitinfo2;
1539 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1540 hvm_print_line(v, regs->eax); /* guest debug output */
1542 send_pio_req(port, 1, size, regs->eax, dir, df, 0);
1546 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1548 unsigned long value = 0;
1549 struct vcpu *v = current;
1550 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1552 switch ( cr )
1554 case 0:
1555 value = v->arch.hvm_vcpu.guest_cr[0];
1556 break;
1557 case 3:
1558 value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3];
1559 break;
1560 case 4:
1561 value = (unsigned long)v->arch.hvm_vcpu.guest_cr[4];
1562 break;
1563 default:
1564 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1565 domain_crash(v->domain);
1566 return;
1569 HVMTRACE_2D(CR_READ, v, cr, value);
1571 set_reg(gp, value, regs, vmcb);
1573 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx", cr, value);
1576 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1578 unsigned long value;
1579 struct vcpu *v = current;
1580 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1582 value = get_reg(gpreg, regs, vmcb);
1584 HVMTRACE_2D(CR_WRITE, v, cr, value);
1586 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, current = %p",
1587 cr, value, v);
1589 switch ( cr )
1591 case 0:
1592 return hvm_set_cr0(value);
1593 case 3:
1594 return hvm_set_cr3(value);
1595 case 4:
1596 return hvm_set_cr4(value);
1597 default:
1598 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1599 domain_crash(v->domain);
1600 return 0;
1603 return 1;
1606 static void svm_cr_access(
1607 struct vcpu *v, unsigned int cr, unsigned int type,
1608 struct cpu_user_regs *regs)
1610 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1611 int inst_len = 0;
1612 int index,addr_size,i;
1613 unsigned int gpreg,offset;
1614 unsigned long value,addr;
1615 u8 buffer[MAX_INST_LEN];
1616 u8 prefix = 0;
1617 u8 modrm;
1618 enum x86_segment seg;
1619 int result = 1;
1620 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1621 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1622 enum instruction_index match;
1624 inst_copy_from_guest(buffer, svm_rip2pointer(v), sizeof(buffer));
1626 /* get index to first actual instruction byte - as we will need to know
1627 where the prefix lives later on */
1628 index = skip_prefix_bytes(buffer, sizeof(buffer));
1630 if ( type == TYPE_MOV_TO_CR )
1632 inst_len = __get_instruction_length_from_list(
1633 v, list_a, ARRAY_SIZE(list_a), &buffer[index], &match);
1635 else /* type == TYPE_MOV_FROM_CR */
1637 inst_len = __get_instruction_length_from_list(
1638 v, list_b, ARRAY_SIZE(list_b), &buffer[index], &match);
1641 inst_len += index;
1643 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1644 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1645 prefix = buffer[index-1];
1647 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long)regs->eip);
1649 switch ( match )
1652 case INSTR_MOV2CR:
1653 gpreg = decode_src_reg(prefix, buffer[index+2]);
1654 result = mov_to_cr(gpreg, cr, regs);
1655 break;
1657 case INSTR_MOVCR2:
1658 gpreg = decode_src_reg(prefix, buffer[index+2]);
1659 mov_from_cr(cr, gpreg, regs);
1660 break;
1662 case INSTR_CLTS:
1663 v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS;
1664 svm_update_guest_cr(v, 0);
1665 HVMTRACE_0D(CLTS, current);
1666 break;
1668 case INSTR_LMSW:
1669 gpreg = decode_src_reg(prefix, buffer[index+2]);
1670 value = get_reg(gpreg, regs, vmcb) & 0xF;
1671 value = (v->arch.hvm_vcpu.guest_cr[0] & ~0xF) | value;
1672 result = hvm_set_cr0(value);
1673 HVMTRACE_1D(LMSW, current, value);
1674 break;
1676 case INSTR_SMSW:
1677 value = v->arch.hvm_vcpu.guest_cr[0] & 0xFFFF;
1678 modrm = buffer[index+2];
1679 addr_size = svm_guest_x86_mode(v);
1680 if ( addr_size < 2 )
1681 addr_size = 2;
1682 if ( likely((modrm & 0xC0) >> 6 == 3) )
1684 gpreg = decode_src_reg(prefix, modrm);
1685 set_reg(gpreg, value, regs, vmcb);
1687 /*
1688 * For now, only implement decode of the offset mode, since that's the
1689 * only mode observed in a real-world OS. This code is also making the
1690 * assumption that we'll never hit this code in long mode.
1691 */
1692 else if ( (modrm == 0x26) || (modrm == 0x25) )
1694 seg = x86_seg_ds;
1695 i = index;
1696 /* Segment or address size overrides? */
1697 while ( i-- )
1699 switch ( buffer[i] )
1701 case 0x26: seg = x86_seg_es; break;
1702 case 0x2e: seg = x86_seg_cs; break;
1703 case 0x36: seg = x86_seg_ss; break;
1704 case 0x64: seg = x86_seg_fs; break;
1705 case 0x65: seg = x86_seg_gs; break;
1706 case 0x67: addr_size ^= 6; break;
1709 /* Bail unless this really is a seg_base + offset case */
1710 if ( ((modrm == 0x26) && (addr_size == 4)) ||
1711 ((modrm == 0x25) && (addr_size == 2)) )
1713 gdprintk(XENLOG_ERR, "SMSW emulation at guest address: "
1714 "%lx failed due to unhandled addressing mode."
1715 "ModRM byte was: %x \n", svm_rip2pointer(v), modrm);
1716 domain_crash(v->domain);
1718 inst_len += addr_size;
1719 offset = *(( unsigned int *) ( void *) &buffer[index + 3]);
1720 offset = ( addr_size == 4 ) ? offset : ( offset & 0xFFFF );
1721 addr = hvm_get_segment_base(v, seg);
1722 addr += offset;
1723 result = (hvm_copy_to_guest_virt(addr, &value, 2)
1724 != HVMCOPY_bad_gva_to_gfn);
1726 else
1728 gdprintk(XENLOG_ERR, "SMSW emulation at guest address: %lx "
1729 "failed due to unhandled addressing mode!"
1730 "ModRM byte was: %x \n", svm_rip2pointer(v), modrm);
1731 domain_crash(v->domain);
1733 break;
1735 default:
1736 BUG();
1739 if ( result )
1740 __update_guest_eip(regs, inst_len);
1743 static void svm_do_msr_access(
1744 struct vcpu *v, struct cpu_user_regs *regs)
1746 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1747 int inst_len;
1748 u64 msr_content=0;
1749 u32 ecx = regs->ecx, eax, edx;
1751 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x, exitinfo = %lx",
1752 ecx, (u32)regs->eax, (u32)regs->edx,
1753 (unsigned long)vmcb->exitinfo1);
1755 /* is it a read? */
1756 if (vmcb->exitinfo1 == 0)
1758 switch (ecx) {
1759 case MSR_IA32_TSC:
1760 msr_content = hvm_get_guest_time(v);
1761 break;
1763 case MSR_IA32_APICBASE:
1764 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
1765 break;
1767 case MSR_EFER:
1768 msr_content = v->arch.hvm_vcpu.guest_efer;
1769 break;
1771 case MSR_IA32_MC4_MISC: /* Threshold register */
1772 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
1773 /*
1774 * MCA/MCE: We report that the threshold register is unavailable
1775 * for OS use (locked by the BIOS).
1776 */
1777 msr_content = 1ULL << 61; /* MC4_MISC.Locked */
1778 break;
1780 case MSR_IA32_EBC_FREQUENCY_ID:
1781 /*
1782 * This Intel-only register may be accessed if this HVM guest
1783 * has been migrated from an Intel host. The value zero is not
1784 * particularly meaningful, but at least avoids the guest crashing!
1785 */
1786 msr_content = 0;
1787 break;
1789 case MSR_K8_VM_HSAVE_PA:
1790 svm_inject_exception(TRAP_gp_fault, 0, 0);
1791 break;
1793 case MSR_IA32_MCG_CAP:
1794 case MSR_IA32_MCG_STATUS:
1795 case MSR_IA32_MC0_STATUS:
1796 case MSR_IA32_MC1_STATUS:
1797 case MSR_IA32_MC2_STATUS:
1798 case MSR_IA32_MC3_STATUS:
1799 case MSR_IA32_MC4_STATUS:
1800 case MSR_IA32_MC5_STATUS:
1801 /* No point in letting the guest see real MCEs */
1802 msr_content = 0;
1803 break;
1805 case MSR_IA32_DEBUGCTLMSR:
1806 msr_content = vmcb->debugctlmsr;
1807 break;
1809 case MSR_IA32_LASTBRANCHFROMIP:
1810 msr_content = vmcb->lastbranchfromip;
1811 break;
1813 case MSR_IA32_LASTBRANCHTOIP:
1814 msr_content = vmcb->lastbranchtoip;
1815 break;
1817 case MSR_IA32_LASTINTFROMIP:
1818 msr_content = vmcb->lastintfromip;
1819 break;
1821 case MSR_IA32_LASTINTTOIP:
1822 msr_content = vmcb->lastinttoip;
1823 break;
1825 default:
1826 if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
1827 rdmsr_safe(ecx, eax, edx) == 0 )
1829 regs->eax = eax;
1830 regs->edx = edx;
1831 goto done;
1833 svm_inject_exception(TRAP_gp_fault, 0, 0);
1834 return;
1836 regs->eax = msr_content & 0xFFFFFFFF;
1837 regs->edx = msr_content >> 32;
1839 done:
1840 hvmtrace_msr_read(v, ecx, msr_content);
1841 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
1842 ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
1844 inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL);
1846 else
1848 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
1850 hvmtrace_msr_write(v, ecx, msr_content);
1852 switch (ecx)
1854 case MSR_IA32_TSC:
1855 hvm_set_guest_time(v, msr_content);
1856 pt_reset(v);
1857 break;
1859 case MSR_IA32_APICBASE:
1860 vlapic_msr_set(vcpu_vlapic(v), msr_content);
1861 break;
1863 case MSR_K8_VM_HSAVE_PA:
1864 svm_inject_exception(TRAP_gp_fault, 0, 0);
1865 break;
1867 case MSR_IA32_DEBUGCTLMSR:
1868 vmcb->debugctlmsr = msr_content;
1869 if ( !msr_content || !cpu_has_svm_lbrv )
1870 break;
1871 vmcb->lbr_control.fields.enable = 1;
1872 svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR);
1873 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP);
1874 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP);
1875 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP);
1876 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP);
1877 break;
1879 case MSR_IA32_LASTBRANCHFROMIP:
1880 vmcb->lastbranchfromip = msr_content;
1881 break;
1883 case MSR_IA32_LASTBRANCHTOIP:
1884 vmcb->lastbranchtoip = msr_content;
1885 break;
1887 case MSR_IA32_LASTINTFROMIP:
1888 vmcb->lastintfromip = msr_content;
1889 break;
1891 case MSR_IA32_LASTINTTOIP:
1892 vmcb->lastinttoip = msr_content;
1893 break;
1895 default:
1896 switch ( long_mode_do_msr_write(regs) )
1898 case HNDL_unhandled:
1899 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
1900 break;
1901 case HNDL_exception_raised:
1902 return;
1903 case HNDL_done:
1904 break;
1906 break;
1909 inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
1912 __update_guest_eip(regs, inst_len);
1915 static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb,
1916 struct cpu_user_regs *regs)
1918 struct vcpu *curr = current;
1919 struct hvm_intack intack = hvm_vcpu_has_pending_irq(curr);
1920 unsigned int inst_len;
1922 inst_len = __get_instruction_length(curr, INSTR_HLT, NULL);
1923 __update_guest_eip(regs, inst_len);
1925 /* Check for pending exception or new interrupt. */
1926 if ( vmcb->eventinj.fields.v ||
1927 ((intack.source != hvm_intsrc_none) &&
1928 !svm_interrupt_blocked(current, intack)) )
1930 HVMTRACE_1D(HLT, curr, /*int pending=*/ 1);
1931 return;
1934 HVMTRACE_1D(HLT, curr, /*int pending=*/ 0);
1935 hvm_hlt(regs->eflags);
1938 static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs)
1940 enum instruction_index list[] = { INSTR_INVD, INSTR_WBINVD };
1941 struct vcpu *curr = current;
1942 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
1943 int inst_len;
1945 if ( !list_empty(&(domain_hvm_iommu(curr->domain)->pdev_list)) )
1947 vmcb->general2_intercepts &= ~GENERAL2_INTERCEPT_WBINVD;
1948 wbinvd();
1951 inst_len = __get_instruction_length_from_list(
1952 curr, list, ARRAY_SIZE(list), NULL, NULL);
1953 __update_guest_eip(regs, inst_len);
1956 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
1958 struct vcpu *v = current;
1959 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
1960 unsigned long g_vaddr;
1961 int inst_len;
1963 /*
1964 * Unknown how many bytes the invlpg instruction will take. Use the
1965 * maximum instruction length here
1966 */
1967 if ( inst_copy_from_guest(opcode, svm_rip2pointer(v), length) < length )
1969 gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length);
1970 goto crash;
1973 if ( invlpga )
1975 inst_len = __get_instruction_length(v, INSTR_INVLPGA, opcode);
1976 __update_guest_eip(regs, inst_len);
1978 /*
1979 * The address is implicit on this instruction. At the moment, we don't
1980 * use ecx (ASID) to identify individual guests pages
1981 */
1982 g_vaddr = regs->eax;
1984 else
1986 /* What about multiple prefix codes? */
1987 prefix = (is_prefix(opcode[0]) ? opcode[0] : 0);
1988 inst_len = __get_instruction_length(v, INSTR_INVLPG, opcode);
1989 if ( inst_len <= 0 )
1991 gdprintk(XENLOG_ERR, "Error getting invlpg instr len\n");
1992 goto crash;
1995 inst_len--;
1996 length -= inst_len;
1998 /*
1999 * Decode memory operand of the instruction including ModRM, SIB, and
2000 * displacement to get effective address and length in bytes. Assume
2001 * the system in either 32- or 64-bit mode.
2002 */
2003 g_vaddr = get_effective_addr_modrm64(regs, prefix, inst_len,
2004 &opcode[inst_len], &length);
2006 inst_len += length;
2007 __update_guest_eip(regs, inst_len);
2010 HVMTRACE_3D(INVLPG, v, !!invlpga, g_vaddr, (invlpga ? regs->ecx : 0));
2012 paging_invlpg(v, g_vaddr);
2013 svm_asid_g_invlpg(v, g_vaddr);
2014 return;
2016 crash:
2017 domain_crash(v->domain);
2020 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
2022 unsigned int exit_reason;
2023 struct vcpu *v = current;
2024 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2025 eventinj_t eventinj;
2026 int inst_len, rc;
2028 /*
2029 * Before doing anything else, we need to sync up the VLAPIC's TPR with
2030 * SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
2031 * because we update the vTPR on MMIO writes to the TPR.
2032 */
2033 vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
2034 (vmcb->vintr.fields.tpr & 0x0F) << 4);
2036 exit_reason = vmcb->exitcode;
2038 hvmtrace_vmexit(v, regs->eip, exit_reason);
2040 if ( unlikely(exit_reason == VMEXIT_INVALID) )
2042 svm_dump_vmcb(__func__, vmcb);
2043 goto exit_and_crash;
2046 perfc_incra(svmexits, exit_reason);
2048 hvm_maybe_deassert_evtchn_irq();
2050 /* Event delivery caused this intercept? Queue for redelivery. */
2051 eventinj = vmcb->exitintinfo;
2052 if ( unlikely(eventinj.fields.v) &&
2053 hvm_event_needs_reinjection(eventinj.fields.type,
2054 eventinj.fields.vector) )
2055 vmcb->eventinj = eventinj;
2057 switch ( exit_reason )
2059 case VMEXIT_INTR:
2060 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2061 HVMTRACE_0D(INTR, v);
2062 break;
2064 case VMEXIT_NMI:
2065 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2066 HVMTRACE_0D(NMI, v);
2067 break;
2069 case VMEXIT_SMI:
2070 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2071 HVMTRACE_0D(SMI, v);
2072 break;
2074 case VMEXIT_EXCEPTION_DB:
2075 if ( !v->domain->debugger_attached )
2076 goto exit_and_crash;
2077 domain_pause_for_debugger();
2078 break;
2080 case VMEXIT_EXCEPTION_BP:
2081 if ( !v->domain->debugger_attached )
2082 goto exit_and_crash;
2083 /* AMD Vol2, 15.11: INT3, INTO, BOUND intercepts do not update RIP. */
2084 inst_len = __get_instruction_length(v, INSTR_INT3, NULL);
2085 __update_guest_eip(regs, inst_len);
2086 domain_pause_for_debugger();
2087 break;
2089 case VMEXIT_EXCEPTION_NM:
2090 svm_do_no_device_fault(vmcb);
2091 break;
2093 case VMEXIT_EXCEPTION_PF: {
2094 unsigned long va;
2095 va = vmcb->exitinfo2;
2096 regs->error_code = vmcb->exitinfo1;
2097 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2098 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2099 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2100 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2101 (unsigned long)regs->esi, (unsigned long)regs->edi);
2103 if ( paging_fault(va, regs) )
2105 HVMTRACE_2D(PF_XEN, v, va, regs->error_code);
2106 break;
2109 svm_inject_exception(TRAP_page_fault, regs->error_code, va);
2110 break;
2113 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2114 case VMEXIT_EXCEPTION_MC:
2115 HVMTRACE_0D(MCE, v);
2116 break;
2118 case VMEXIT_VINTR:
2119 vmcb->vintr.fields.irq = 0;
2120 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
2121 break;
2123 case VMEXIT_INVD:
2124 case VMEXIT_WBINVD:
2125 svm_vmexit_do_invalidate_cache(regs);
2126 break;
2128 case VMEXIT_TASK_SWITCH: {
2129 enum hvm_task_switch_reason reason;
2130 int32_t errcode = -1;
2131 if ( (vmcb->exitinfo2 >> 36) & 1 )
2132 reason = TSW_iret;
2133 else if ( (vmcb->exitinfo2 >> 38) & 1 )
2134 reason = TSW_jmp;
2135 else
2136 reason = TSW_call_or_int;
2137 if ( (vmcb->exitinfo2 >> 44) & 1 )
2138 errcode = (uint32_t)vmcb->exitinfo2;
2139 hvm_task_switch((uint16_t)vmcb->exitinfo1, reason, errcode);
2140 break;
2143 case VMEXIT_CPUID:
2144 svm_vmexit_do_cpuid(vmcb, regs);
2145 break;
2147 case VMEXIT_HLT:
2148 svm_vmexit_do_hlt(vmcb, regs);
2149 break;
2151 case VMEXIT_INVLPG:
2152 svm_handle_invlpg(0, regs);
2153 break;
2155 case VMEXIT_INVLPGA:
2156 svm_handle_invlpg(1, regs);
2157 break;
2159 case VMEXIT_VMMCALL:
2160 inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL);
2161 HVMTRACE_1D(VMMCALL, v, regs->eax);
2162 rc = hvm_do_hypercall(regs);
2163 if ( rc != HVM_HCALL_preempted )
2165 __update_guest_eip(regs, inst_len);
2166 if ( rc == HVM_HCALL_invalidate )
2167 send_invalidate_req();
2169 break;
2171 case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
2172 svm_cr_access(v, exit_reason - VMEXIT_CR0_READ,
2173 TYPE_MOV_FROM_CR, regs);
2174 break;
2176 case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
2177 svm_cr_access(v, exit_reason - VMEXIT_CR0_WRITE,
2178 TYPE_MOV_TO_CR, regs);
2179 break;
2181 case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
2182 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
2183 svm_dr_access(v, regs);
2184 break;
2186 case VMEXIT_IOIO:
2187 svm_io_instruction(v);
2188 break;
2190 case VMEXIT_MSR:
2191 svm_do_msr_access(v, regs);
2192 break;
2194 case VMEXIT_SHUTDOWN:
2195 hvm_triple_fault();
2196 break;
2198 case VMEXIT_RDTSCP:
2199 case VMEXIT_MONITOR:
2200 case VMEXIT_MWAIT:
2201 case VMEXIT_VMRUN:
2202 case VMEXIT_VMLOAD:
2203 case VMEXIT_VMSAVE:
2204 case VMEXIT_STGI:
2205 case VMEXIT_CLGI:
2206 case VMEXIT_SKINIT:
2207 svm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
2208 break;
2210 case VMEXIT_NPF:
2211 perfc_incra(svmexits, VMEXIT_NPF_PERFC);
2212 regs->error_code = vmcb->exitinfo1;
2213 svm_do_nested_pgfault(vmcb->exitinfo2, regs);
2214 break;
2216 default:
2217 exit_and_crash:
2218 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
2219 "exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
2220 exit_reason,
2221 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
2222 domain_crash(v->domain);
2223 break;
2226 /* The exit may have updated the TPR: reflect this in the hardware vtpr */
2227 vmcb->vintr.fields.tpr =
2228 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
2231 asmlinkage void svm_trace_vmentry(void)
2233 struct vcpu *v = current;
2235 /* This is the last C code before the VMRUN instruction. */
2236 hvmtrace_vmentry(v);
2239 /*
2240 * Local variables:
2241 * mode: C
2242 * c-set-style: "BSD"
2243 * c-basic-offset: 4
2244 * tab-width: 4
2245 * indent-tabs-mode: nil
2246 * End:
2247 */