ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 16620:966a6d3b7408

SVM: Treat the vlapic's tpr as the master copy and sync the vtpr to it
before every vm entry. This fixes HVM save/restore/migrate, as the
vtpr value was only being synced on guest TPR writes before.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Dec 14 11:50:24 2007 +0000 (2007-12-14)
parents 98e9485d8fcf
children 1e3e30670ce4
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/hypercall.h>
28 #include <xen/domain_page.h>
29 #include <asm/current.h>
30 #include <asm/io.h>
31 #include <asm/paging.h>
32 #include <asm/p2m.h>
33 #include <asm/regs.h>
34 #include <asm/cpufeature.h>
35 #include <asm/processor.h>
36 #include <asm/types.h>
37 #include <asm/debugreg.h>
38 #include <asm/msr.h>
39 #include <asm/spinlock.h>
40 #include <asm/hvm/hvm.h>
41 #include <asm/hvm/support.h>
42 #include <asm/hvm/io.h>
43 #include <asm/hvm/svm/asid.h>
44 #include <asm/hvm/svm/svm.h>
45 #include <asm/hvm/svm/vmcb.h>
46 #include <asm/hvm/svm/emulate.h>
47 #include <asm/hvm/svm/intr.h>
48 #include <asm/x86_emulate.h>
49 #include <public/sched.h>
50 #include <asm/hvm/vpt.h>
51 #include <asm/hvm/trace.h>
52 #include <asm/hap.h>
54 u32 svm_feature_flags;
56 #define set_segment_register(name, value) \
57 asm volatile ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
59 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
61 int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
62 int inst_len);
63 asmlinkage void do_IRQ(struct cpu_user_regs *);
65 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);
66 static void svm_update_guest_efer(struct vcpu *v);
67 static void svm_inject_exception(
68 unsigned int trapnr, int errcode, unsigned long cr2);
70 /* va of hardware host save area */
71 static void *hsa[NR_CPUS] __read_mostly;
73 /* vmcb used for extended host state */
74 static void *root_vmcb[NR_CPUS] __read_mostly;
76 static void inline __update_guest_eip(
77 struct cpu_user_regs *regs, unsigned int inst_len)
78 {
79 struct vcpu *curr = current;
81 if ( unlikely((inst_len == 0) || (inst_len > 15)) )
82 {
83 gdprintk(XENLOG_ERR, "Bad instruction length %u\n", inst_len);
84 domain_crash(curr->domain);
85 return;
86 }
88 ASSERT(regs == guest_cpu_user_regs());
90 regs->eip += inst_len;
91 regs->eflags &= ~X86_EFLAGS_RF;
93 curr->arch.hvm_svm.vmcb->interrupt_shadow = 0;
95 if ( regs->eflags & X86_EFLAGS_TF )
96 svm_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);
97 }
99 static void svm_cpu_down(void)
100 {
101 write_efer(read_efer() & ~EFER_SVME);
102 }
104 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
105 {
106 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
107 u32 ecx = regs->ecx;
109 HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64,
110 ecx, msr_content);
112 switch ( ecx )
113 {
114 case MSR_EFER:
115 if ( !hvm_set_efer(msr_content) )
116 return HNDL_exception_raised;
117 break;
119 case MSR_IA32_MC4_MISC: /* Threshold register */
120 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
121 /*
122 * MCA/MCE: Threshold register is reported to be locked, so we ignore
123 * all write accesses. This behaviour matches real HW, so guests should
124 * have no problem with this.
125 */
126 break;
128 default:
129 return HNDL_unhandled;
130 }
132 return HNDL_done;
133 }
135 static void svm_save_dr(struct vcpu *v)
136 {
137 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
139 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
140 return;
142 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
143 v->arch.hvm_vcpu.flag_dr_dirty = 0;
144 v->arch.hvm_svm.vmcb->dr_intercepts = ~0u;
146 v->arch.guest_context.debugreg[0] = read_debugreg(0);
147 v->arch.guest_context.debugreg[1] = read_debugreg(1);
148 v->arch.guest_context.debugreg[2] = read_debugreg(2);
149 v->arch.guest_context.debugreg[3] = read_debugreg(3);
150 v->arch.guest_context.debugreg[6] = vmcb->dr6;
151 v->arch.guest_context.debugreg[7] = vmcb->dr7;
152 }
154 static void __restore_debug_registers(struct vcpu *v)
155 {
156 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
158 if ( v->arch.hvm_vcpu.flag_dr_dirty )
159 return;
161 v->arch.hvm_vcpu.flag_dr_dirty = 1;
162 vmcb->dr_intercepts = 0;
164 write_debugreg(0, v->arch.guest_context.debugreg[0]);
165 write_debugreg(1, v->arch.guest_context.debugreg[1]);
166 write_debugreg(2, v->arch.guest_context.debugreg[2]);
167 write_debugreg(3, v->arch.guest_context.debugreg[3]);
168 vmcb->dr6 = v->arch.guest_context.debugreg[6];
169 vmcb->dr7 = v->arch.guest_context.debugreg[7];
170 }
172 /*
173 * DR7 is saved and restored on every vmexit. Other debug registers only
174 * need to be restored if their value is going to affect execution -- i.e.,
175 * if one of the breakpoints is enabled. So mask out all bits that don't
176 * enable some breakpoint functionality.
177 */
178 static void svm_restore_dr(struct vcpu *v)
179 {
180 if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
181 __restore_debug_registers(v);
182 }
184 int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c)
185 {
186 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
188 c->cr0 = v->arch.hvm_vcpu.guest_cr[0];
189 c->cr2 = v->arch.hvm_vcpu.guest_cr[2];
190 c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
191 c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
193 c->idtr_limit = vmcb->idtr.limit;
194 c->idtr_base = vmcb->idtr.base;
196 c->gdtr_limit = vmcb->gdtr.limit;
197 c->gdtr_base = vmcb->gdtr.base;
199 c->cs_sel = vmcb->cs.sel;
200 c->cs_limit = vmcb->cs.limit;
201 c->cs_base = vmcb->cs.base;
202 c->cs_arbytes = vmcb->cs.attr.bytes;
204 c->ds_sel = vmcb->ds.sel;
205 c->ds_limit = vmcb->ds.limit;
206 c->ds_base = vmcb->ds.base;
207 c->ds_arbytes = vmcb->ds.attr.bytes;
209 c->es_sel = vmcb->es.sel;
210 c->es_limit = vmcb->es.limit;
211 c->es_base = vmcb->es.base;
212 c->es_arbytes = vmcb->es.attr.bytes;
214 c->ss_sel = vmcb->ss.sel;
215 c->ss_limit = vmcb->ss.limit;
216 c->ss_base = vmcb->ss.base;
217 c->ss_arbytes = vmcb->ss.attr.bytes;
219 c->fs_sel = vmcb->fs.sel;
220 c->fs_limit = vmcb->fs.limit;
221 c->fs_base = vmcb->fs.base;
222 c->fs_arbytes = vmcb->fs.attr.bytes;
224 c->gs_sel = vmcb->gs.sel;
225 c->gs_limit = vmcb->gs.limit;
226 c->gs_base = vmcb->gs.base;
227 c->gs_arbytes = vmcb->gs.attr.bytes;
229 c->tr_sel = vmcb->tr.sel;
230 c->tr_limit = vmcb->tr.limit;
231 c->tr_base = vmcb->tr.base;
232 c->tr_arbytes = vmcb->tr.attr.bytes;
234 c->ldtr_sel = vmcb->ldtr.sel;
235 c->ldtr_limit = vmcb->ldtr.limit;
236 c->ldtr_base = vmcb->ldtr.base;
237 c->ldtr_arbytes = vmcb->ldtr.attr.bytes;
239 c->sysenter_cs = vmcb->sysenter_cs;
240 c->sysenter_esp = vmcb->sysenter_esp;
241 c->sysenter_eip = vmcb->sysenter_eip;
243 c->pending_event = 0;
244 c->error_code = 0;
245 if ( vmcb->eventinj.fields.v &&
246 hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
247 vmcb->eventinj.fields.vector) )
248 {
249 c->pending_event = (uint32_t)vmcb->eventinj.bytes;
250 c->error_code = vmcb->eventinj.fields.errorcode;
251 }
253 return 1;
254 }
257 int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
258 {
259 unsigned long mfn = 0;
260 p2m_type_t p2mt;
261 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
263 if ( c->pending_valid &&
264 ((c->pending_type == 1) || (c->pending_type > 6) ||
265 (c->pending_reserved != 0)) )
266 {
267 gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n",
268 c->pending_event);
269 return -EINVAL;
270 }
272 if ( !paging_mode_hap(v->domain) )
273 {
274 if ( c->cr0 & X86_CR0_PG )
275 {
276 mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
277 if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
278 {
279 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
280 c->cr3);
281 return -EINVAL;
282 }
283 }
285 if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
286 put_page(pagetable_get_page(v->arch.guest_table));
288 v->arch.guest_table = pagetable_from_pfn(mfn);
289 }
291 v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;
292 v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
293 v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
294 v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
295 svm_update_guest_cr(v, 0);
296 svm_update_guest_cr(v, 2);
297 svm_update_guest_cr(v, 4);
299 #ifdef HVM_DEBUG_SUSPEND
300 printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
301 __func__, c->cr3, c->cr0, c->cr4);
302 #endif
304 vmcb->idtr.limit = c->idtr_limit;
305 vmcb->idtr.base = c->idtr_base;
307 vmcb->gdtr.limit = c->gdtr_limit;
308 vmcb->gdtr.base = c->gdtr_base;
310 vmcb->cs.sel = c->cs_sel;
311 vmcb->cs.limit = c->cs_limit;
312 vmcb->cs.base = c->cs_base;
313 vmcb->cs.attr.bytes = c->cs_arbytes;
315 vmcb->ds.sel = c->ds_sel;
316 vmcb->ds.limit = c->ds_limit;
317 vmcb->ds.base = c->ds_base;
318 vmcb->ds.attr.bytes = c->ds_arbytes;
320 vmcb->es.sel = c->es_sel;
321 vmcb->es.limit = c->es_limit;
322 vmcb->es.base = c->es_base;
323 vmcb->es.attr.bytes = c->es_arbytes;
325 vmcb->ss.sel = c->ss_sel;
326 vmcb->ss.limit = c->ss_limit;
327 vmcb->ss.base = c->ss_base;
328 vmcb->ss.attr.bytes = c->ss_arbytes;
329 vmcb->cpl = vmcb->ss.attr.fields.dpl;
331 vmcb->fs.sel = c->fs_sel;
332 vmcb->fs.limit = c->fs_limit;
333 vmcb->fs.base = c->fs_base;
334 vmcb->fs.attr.bytes = c->fs_arbytes;
336 vmcb->gs.sel = c->gs_sel;
337 vmcb->gs.limit = c->gs_limit;
338 vmcb->gs.base = c->gs_base;
339 vmcb->gs.attr.bytes = c->gs_arbytes;
341 vmcb->tr.sel = c->tr_sel;
342 vmcb->tr.limit = c->tr_limit;
343 vmcb->tr.base = c->tr_base;
344 vmcb->tr.attr.bytes = c->tr_arbytes;
346 vmcb->ldtr.sel = c->ldtr_sel;
347 vmcb->ldtr.limit = c->ldtr_limit;
348 vmcb->ldtr.base = c->ldtr_base;
349 vmcb->ldtr.attr.bytes = c->ldtr_arbytes;
351 vmcb->sysenter_cs = c->sysenter_cs;
352 vmcb->sysenter_esp = c->sysenter_esp;
353 vmcb->sysenter_eip = c->sysenter_eip;
355 if ( paging_mode_hap(v->domain) )
356 {
357 vmcb->np_enable = 1;
358 vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
359 vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
360 }
362 if ( c->pending_valid )
363 {
364 gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
365 c->pending_event, c->error_code);
367 if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
368 {
369 vmcb->eventinj.bytes = c->pending_event;
370 vmcb->eventinj.fields.errorcode = c->error_code;
371 }
372 }
374 paging_update_paging_modes(v);
376 return 0;
377 }
380 static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
381 {
382 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
384 data->shadow_gs = vmcb->kerngsbase;
385 data->msr_lstar = vmcb->lstar;
386 data->msr_star = vmcb->star;
387 data->msr_cstar = vmcb->cstar;
388 data->msr_syscall_mask = vmcb->sfmask;
389 data->msr_efer = v->arch.hvm_vcpu.guest_efer;
390 data->msr_flags = -1ULL;
392 data->tsc = hvm_get_guest_time(v);
393 }
396 static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
397 {
398 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
400 vmcb->kerngsbase = data->shadow_gs;
401 vmcb->lstar = data->msr_lstar;
402 vmcb->star = data->msr_star;
403 vmcb->cstar = data->msr_cstar;
404 vmcb->sfmask = data->msr_syscall_mask;
405 v->arch.hvm_vcpu.guest_efer = data->msr_efer;
406 svm_update_guest_efer(v);
408 hvm_set_guest_time(v, data->tsc);
409 }
411 static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
412 {
413 svm_save_cpu_state(v, ctxt);
414 svm_vmcb_save(v, ctxt);
415 }
417 static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
418 {
419 svm_load_cpu_state(v, ctxt);
420 if (svm_vmcb_restore(v, ctxt)) {
421 printk("svm_vmcb restore failed!\n");
422 domain_crash(v->domain);
423 return -EINVAL;
424 }
426 return 0;
427 }
429 static enum hvm_intblk svm_interrupt_blocked(
430 struct vcpu *v, struct hvm_intack intack)
431 {
432 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
434 if ( vmcb->interrupt_shadow )
435 return hvm_intblk_shadow;
437 if ( intack.source == hvm_intsrc_nmi )
438 return hvm_intblk_none;
440 ASSERT((intack.source == hvm_intsrc_pic) ||
441 (intack.source == hvm_intsrc_lapic));
443 if ( !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) )
444 return hvm_intblk_rflags_ie;
446 return hvm_intblk_none;
447 }
449 static int svm_guest_x86_mode(struct vcpu *v)
450 {
451 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
453 if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) )
454 return 0;
455 if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) )
456 return 1;
457 if ( hvm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) )
458 return 8;
459 return (likely(vmcb->cs.attr.fields.db) ? 4 : 2);
460 }
462 static void svm_update_host_cr3(struct vcpu *v)
463 {
464 /* SVM doesn't have a HOST_CR3 equivalent to update. */
465 }
467 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
468 {
469 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
471 switch ( cr )
472 {
473 case 0:
474 /* TS cleared? Then initialise FPU now. */
475 if ( (v == current) && !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) &&
476 (vmcb->cr0 & X86_CR0_TS) )
477 {
478 setup_fpu(v);
479 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
480 }
482 vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0];
483 if ( !paging_mode_hap(v->domain) )
484 vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
485 break;
486 case 2:
487 vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2];
488 break;
489 case 3:
490 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];
491 svm_asid_inv_asid(v);
492 break;
493 case 4:
494 vmcb->cr4 = HVM_CR4_HOST_MASK;
495 if ( paging_mode_hap(v->domain) )
496 vmcb->cr4 &= ~X86_CR4_PAE;
497 vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4];
498 break;
499 default:
500 BUG();
501 }
502 }
504 static void svm_update_guest_efer(struct vcpu *v)
505 {
506 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
508 vmcb->efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME;
509 if ( vmcb->efer & EFER_LMA )
510 vmcb->efer |= EFER_LME;
511 }
513 static void svm_flush_guest_tlbs(void)
514 {
515 /* Roll over the CPU's ASID generation, so it gets a clean TLB when we
516 * next VMRUN. (If ASIDs are disabled, the whole TLB is flushed on
517 * VMRUN anyway). */
518 svm_asid_inc_generation();
519 }
521 static void svm_sync_vmcb(struct vcpu *v)
522 {
523 struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
525 if ( arch_svm->vmcb_in_sync )
526 return;
528 arch_svm->vmcb_in_sync = 1;
530 svm_vmsave(arch_svm->vmcb);
531 }
533 static unsigned long svm_get_segment_base(struct vcpu *v, enum x86_segment seg)
534 {
535 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
536 int long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
538 switch ( seg )
539 {
540 case x86_seg_cs: return long_mode ? 0 : vmcb->cs.base;
541 case x86_seg_ds: return long_mode ? 0 : vmcb->ds.base;
542 case x86_seg_es: return long_mode ? 0 : vmcb->es.base;
543 case x86_seg_fs: svm_sync_vmcb(v); return vmcb->fs.base;
544 case x86_seg_gs: svm_sync_vmcb(v); return vmcb->gs.base;
545 case x86_seg_ss: return long_mode ? 0 : vmcb->ss.base;
546 case x86_seg_tr: svm_sync_vmcb(v); return vmcb->tr.base;
547 case x86_seg_gdtr: return vmcb->gdtr.base;
548 case x86_seg_idtr: return vmcb->idtr.base;
549 case x86_seg_ldtr: svm_sync_vmcb(v); return vmcb->ldtr.base;
550 }
551 BUG();
552 return 0;
553 }
555 static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
556 struct segment_register *reg)
557 {
558 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
560 ASSERT(v == current);
562 switch ( seg )
563 {
564 case x86_seg_cs:
565 memcpy(reg, &vmcb->cs, sizeof(*reg));
566 break;
567 case x86_seg_ds:
568 memcpy(reg, &vmcb->ds, sizeof(*reg));
569 break;
570 case x86_seg_es:
571 memcpy(reg, &vmcb->es, sizeof(*reg));
572 break;
573 case x86_seg_fs:
574 svm_sync_vmcb(v);
575 memcpy(reg, &vmcb->fs, sizeof(*reg));
576 break;
577 case x86_seg_gs:
578 svm_sync_vmcb(v);
579 memcpy(reg, &vmcb->gs, sizeof(*reg));
580 break;
581 case x86_seg_ss:
582 memcpy(reg, &vmcb->ss, sizeof(*reg));
583 break;
584 case x86_seg_tr:
585 svm_sync_vmcb(v);
586 memcpy(reg, &vmcb->tr, sizeof(*reg));
587 break;
588 case x86_seg_gdtr:
589 memcpy(reg, &vmcb->gdtr, sizeof(*reg));
590 break;
591 case x86_seg_idtr:
592 memcpy(reg, &vmcb->idtr, sizeof(*reg));
593 break;
594 case x86_seg_ldtr:
595 svm_sync_vmcb(v);
596 memcpy(reg, &vmcb->ldtr, sizeof(*reg));
597 break;
598 default:
599 BUG();
600 }
601 }
603 static void svm_set_segment_register(struct vcpu *v, enum x86_segment seg,
604 struct segment_register *reg)
605 {
606 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
607 int sync = 0;
609 ASSERT((v == current) || !vcpu_runnable(v));
611 switch ( seg )
612 {
613 case x86_seg_fs:
614 case x86_seg_gs:
615 case x86_seg_tr:
616 case x86_seg_ldtr:
617 sync = (v == current);
618 break;
619 default:
620 break;
621 }
623 if ( sync )
624 svm_sync_vmcb(v);
626 switch ( seg )
627 {
628 case x86_seg_cs:
629 memcpy(&vmcb->cs, reg, sizeof(*reg));
630 break;
631 case x86_seg_ds:
632 memcpy(&vmcb->ds, reg, sizeof(*reg));
633 break;
634 case x86_seg_es:
635 memcpy(&vmcb->es, reg, sizeof(*reg));
636 break;
637 case x86_seg_fs:
638 memcpy(&vmcb->fs, reg, sizeof(*reg));
639 break;
640 case x86_seg_gs:
641 memcpy(&vmcb->gs, reg, sizeof(*reg));
642 break;
643 case x86_seg_ss:
644 memcpy(&vmcb->ss, reg, sizeof(*reg));
645 vmcb->cpl = vmcb->ss.attr.fields.dpl;
646 break;
647 case x86_seg_tr:
648 memcpy(&vmcb->tr, reg, sizeof(*reg));
649 break;
650 case x86_seg_gdtr:
651 memcpy(&vmcb->gdtr, reg, sizeof(*reg));
652 break;
653 case x86_seg_idtr:
654 memcpy(&vmcb->idtr, reg, sizeof(*reg));
655 break;
656 case x86_seg_ldtr:
657 memcpy(&vmcb->ldtr, reg, sizeof(*reg));
658 break;
659 default:
660 BUG();
661 }
663 if ( sync )
664 svm_vmload(vmcb);
665 }
667 /* Make sure that xen intercepts any FP accesses from current */
668 static void svm_stts(struct vcpu *v)
669 {
670 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
672 /*
673 * If the guest does not have TS enabled then we must cause and handle an
674 * exception on first use of the FPU. If the guest *does* have TS enabled
675 * then this is not necessary: no FPU activity can occur until the guest
676 * clears CR0.TS, and we will initialise the FPU when that happens.
677 */
678 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
679 {
680 v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device;
681 vmcb->cr0 |= X86_CR0_TS;
682 }
683 }
685 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
686 {
687 v->arch.hvm_svm.vmcb->tsc_offset = offset;
688 }
690 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
691 {
692 char *p;
693 int i;
695 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
696 {
697 p = (char *)(hypercall_page + (i * 32));
698 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
699 *(u32 *)(p + 1) = i;
700 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
701 *(u8 *)(p + 6) = 0x01;
702 *(u8 *)(p + 7) = 0xd9;
703 *(u8 *)(p + 8) = 0xc3; /* ret */
704 }
706 /* Don't support HYPERVISOR_iret at the moment */
707 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
708 }
710 static void svm_ctxt_switch_from(struct vcpu *v)
711 {
712 int cpu = smp_processor_id();
714 svm_save_dr(v);
716 svm_sync_vmcb(v);
717 svm_vmload(root_vmcb[cpu]);
719 #ifdef __x86_64__
720 /* Resume use of ISTs now that the host TR is reinstated. */
721 idt_tables[cpu][TRAP_double_fault].a |= IST_DF << 32;
722 idt_tables[cpu][TRAP_nmi].a |= IST_NMI << 32;
723 idt_tables[cpu][TRAP_machine_check].a |= IST_MCE << 32;
724 #endif
725 }
727 static void svm_ctxt_switch_to(struct vcpu *v)
728 {
729 int cpu = smp_processor_id();
731 #ifdef __x86_64__
732 /*
733 * This is required, because VMRUN does consistency check
734 * and some of the DOM0 selectors are pointing to
735 * invalid GDT locations, and cause AMD processors
736 * to shutdown.
737 */
738 set_segment_register(ds, 0);
739 set_segment_register(es, 0);
740 set_segment_register(ss, 0);
742 /*
743 * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
744 * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
745 */
746 idt_tables[cpu][TRAP_double_fault].a &= ~(7UL << 32);
747 idt_tables[cpu][TRAP_nmi].a &= ~(7UL << 32);
748 idt_tables[cpu][TRAP_machine_check].a &= ~(7UL << 32);
749 #endif
751 svm_restore_dr(v);
753 svm_vmsave(root_vmcb[cpu]);
754 svm_vmload(v->arch.hvm_svm.vmcb);
755 }
757 static void svm_do_resume(struct vcpu *v)
758 {
759 bool_t debug_state = v->domain->debugger_attached;
761 if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
762 {
763 uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3);
764 v->arch.hvm_vcpu.debug_state_latch = debug_state;
765 if ( debug_state )
766 v->arch.hvm_svm.vmcb->exception_intercepts |= mask;
767 else
768 v->arch.hvm_svm.vmcb->exception_intercepts &= ~mask;
769 }
771 if ( v->arch.hvm_svm.launch_core != smp_processor_id() )
772 {
773 v->arch.hvm_svm.launch_core = smp_processor_id();
774 hvm_migrate_timers(v);
776 /* Migrating to another ASID domain. Request a new ASID. */
777 svm_asid_init_vcpu(v);
778 }
780 /* Reflect the vlapic's TPR in the hardware vtpr */
781 v->arch.hvm_svm.vmcb->vintr.fields.tpr =
782 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
784 hvm_do_resume(v);
785 reset_stack_and_jump(svm_asm_do_resume);
786 }
788 static int svm_domain_initialise(struct domain *d)
789 {
790 return 0;
791 }
793 static void svm_domain_destroy(struct domain *d)
794 {
795 }
797 static int svm_vcpu_initialise(struct vcpu *v)
798 {
799 int rc;
801 v->arch.schedule_tail = svm_do_resume;
802 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
803 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
805 v->arch.hvm_svm.launch_core = -1;
807 if ( (rc = svm_create_vmcb(v)) != 0 )
808 {
809 dprintk(XENLOG_WARNING,
810 "Failed to create VMCB for vcpu %d: err=%d.\n",
811 v->vcpu_id, rc);
812 return rc;
813 }
815 return 0;
816 }
818 static void svm_vcpu_destroy(struct vcpu *v)
819 {
820 svm_destroy_vmcb(v);
821 }
823 static void svm_inject_exception(
824 unsigned int trapnr, int errcode, unsigned long cr2)
825 {
826 struct vcpu *curr = current;
827 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
828 eventinj_t event;
830 event.bytes = 0;
831 event.fields.v = 1;
832 event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
833 event.fields.vector = trapnr;
834 event.fields.ev = (errcode != HVM_DELIVER_NO_ERROR_CODE);
835 event.fields.errorcode = errcode;
837 vmcb->eventinj = event;
839 if ( trapnr == TRAP_page_fault )
840 {
841 vmcb->cr2 = curr->arch.hvm_vcpu.guest_cr[2] = cr2;
842 HVMTRACE_2D(PF_INJECT, curr, curr->arch.hvm_vcpu.guest_cr[2], errcode);
843 }
844 else
845 {
846 HVMTRACE_2D(INJ_EXC, curr, trapnr, errcode);
847 }
849 if ( (trapnr == TRAP_debug) &&
850 (guest_cpu_user_regs()->eflags & X86_EFLAGS_TF) )
851 {
852 __restore_debug_registers(curr);
853 vmcb->dr6 |= 0x4000;
854 }
855 }
857 static int svm_event_pending(struct vcpu *v)
858 {
859 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
860 return vmcb->eventinj.fields.v;
861 }
863 static struct hvm_function_table svm_function_table = {
864 .name = "SVM",
865 .cpu_down = svm_cpu_down,
866 .domain_initialise = svm_domain_initialise,
867 .domain_destroy = svm_domain_destroy,
868 .vcpu_initialise = svm_vcpu_initialise,
869 .vcpu_destroy = svm_vcpu_destroy,
870 .save_cpu_ctxt = svm_save_vmcb_ctxt,
871 .load_cpu_ctxt = svm_load_vmcb_ctxt,
872 .interrupt_blocked = svm_interrupt_blocked,
873 .guest_x86_mode = svm_guest_x86_mode,
874 .get_segment_base = svm_get_segment_base,
875 .get_segment_register = svm_get_segment_register,
876 .set_segment_register = svm_set_segment_register,
877 .update_host_cr3 = svm_update_host_cr3,
878 .update_guest_cr = svm_update_guest_cr,
879 .update_guest_efer = svm_update_guest_efer,
880 .flush_guest_tlbs = svm_flush_guest_tlbs,
881 .stts = svm_stts,
882 .set_tsc_offset = svm_set_tsc_offset,
883 .inject_exception = svm_inject_exception,
884 .init_hypercall_page = svm_init_hypercall_page,
885 .event_pending = svm_event_pending
886 };
888 int start_svm(struct cpuinfo_x86 *c)
889 {
890 u32 eax, ecx, edx;
891 u32 phys_hsa_lo, phys_hsa_hi;
892 u64 phys_hsa;
893 int cpu = smp_processor_id();
895 /* Xen does not fill x86_capability words except 0. */
896 ecx = cpuid_ecx(0x80000001);
897 boot_cpu_data.x86_capability[5] = ecx;
899 if ( !(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)) )
900 return 0;
902 /* Check whether SVM feature is disabled in BIOS */
903 rdmsr(MSR_K8_VM_CR, eax, edx);
904 if ( eax & K8_VMCR_SVME_DISABLE )
905 {
906 printk("AMD SVM Extension is disabled in BIOS.\n");
907 return 0;
908 }
910 if ( ((hsa[cpu] = alloc_host_save_area()) == NULL) ||
911 ((root_vmcb[cpu] = alloc_vmcb()) == NULL) )
912 return 0;
914 write_efer(read_efer() | EFER_SVME);
916 /* Initialize the HSA for this core. */
917 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
918 phys_hsa_lo = (u32) phys_hsa;
919 phys_hsa_hi = (u32) (phys_hsa >> 32);
920 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
922 /* Initialize core's ASID handling. */
923 svm_asid_init(c);
925 if ( cpu != 0 )
926 return 1;
928 setup_vmcb_dump();
930 svm_feature_flags = ((cpuid_eax(0x80000000) >= 0x8000000A) ?
931 cpuid_edx(0x8000000A) : 0);
933 svm_function_table.hap_supported = cpu_has_svm_npt;
935 hvm_enable(&svm_function_table);
937 return 1;
938 }
940 static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
941 {
942 p2m_type_t p2mt;
943 mfn_t mfn;
944 unsigned long gfn = gpa >> PAGE_SHIFT;
946 /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
947 mfn = gfn_to_mfn_current(gfn, &p2mt);
948 if ( p2mt == p2m_mmio_dm )
949 {
950 handle_mmio(gpa);
951 return;
952 }
954 /* Log-dirty: mark the page dirty and let the guest write it again */
955 paging_mark_dirty(current->domain, mfn_x(mfn));
956 p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
957 }
959 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
960 {
961 struct vcpu *v = current;
963 setup_fpu(v);
964 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
966 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
967 vmcb->cr0 &= ~X86_CR0_TS;
968 }
970 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
971 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
972 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
973 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
975 #define bitmaskof(idx) (1U << ((idx) & 31))
976 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb,
977 struct cpu_user_regs *regs)
978 {
979 unsigned long input = regs->eax;
980 unsigned int eax, ebx, ecx, edx;
981 struct vcpu *v = current;
982 int inst_len;
984 hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
986 switch ( input )
987 {
988 case 0x00000001:
989 /* Clear out reserved bits. */
990 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
991 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
993 /* Guest should only see one logical processor.
994 * See details on page 23 of AMD CPUID Specification.
995 */
996 __clear_bit(X86_FEATURE_HT & 31, &edx);
997 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
998 ebx |= 0x00010000; /* set to 1 just for precaution */
999 break;
1001 case 0x80000001:
1002 /* Filter features which are shared with 0x00000001:EDX. */
1003 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
1004 __clear_bit(X86_FEATURE_APIC & 31, &edx);
1005 #if CONFIG_PAGING_LEVELS >= 3
1006 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
1007 #endif
1008 __clear_bit(X86_FEATURE_PAE & 31, &edx);
1009 __clear_bit(X86_FEATURE_PSE36 & 31, &edx);
1011 /* Filter all other features according to a whitelist. */
1012 edx &= (0x0183f3ff | /* features shared with 0x00000001:EDX */
1013 bitmaskof(X86_FEATURE_NX) |
1014 bitmaskof(X86_FEATURE_LM) |
1015 bitmaskof(X86_FEATURE_SYSCALL) |
1016 bitmaskof(X86_FEATURE_MP) |
1017 bitmaskof(X86_FEATURE_MMXEXT) |
1018 bitmaskof(X86_FEATURE_FFXSR));
1019 break;
1021 case 0x80000007:
1022 case 0x8000000A:
1023 /* Mask out features of power management and SVM extension. */
1024 eax = ebx = ecx = edx = 0;
1025 break;
1027 case 0x80000008:
1028 /* Make sure Number of CPU core is 1 when HTT=0 */
1029 ecx &= 0xFFFFFF00;
1030 break;
1033 regs->eax = eax;
1034 regs->ebx = ebx;
1035 regs->ecx = ecx;
1036 regs->edx = edx;
1038 HVMTRACE_3D(CPUID, v, input,
1039 ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
1041 inst_len = __get_instruction_length(v, INSTR_CPUID, NULL);
1042 __update_guest_eip(regs, inst_len);
1045 static unsigned long *get_reg_p(
1046 unsigned int gpreg,
1047 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1049 unsigned long *reg_p = NULL;
1050 switch (gpreg)
1052 case SVM_REG_EAX:
1053 reg_p = (unsigned long *)&regs->eax;
1054 break;
1055 case SVM_REG_EBX:
1056 reg_p = (unsigned long *)&regs->ebx;
1057 break;
1058 case SVM_REG_ECX:
1059 reg_p = (unsigned long *)&regs->ecx;
1060 break;
1061 case SVM_REG_EDX:
1062 reg_p = (unsigned long *)&regs->edx;
1063 break;
1064 case SVM_REG_EDI:
1065 reg_p = (unsigned long *)&regs->edi;
1066 break;
1067 case SVM_REG_ESI:
1068 reg_p = (unsigned long *)&regs->esi;
1069 break;
1070 case SVM_REG_EBP:
1071 reg_p = (unsigned long *)&regs->ebp;
1072 break;
1073 case SVM_REG_ESP:
1074 reg_p = (unsigned long *)&regs->esp;
1075 break;
1076 #ifdef __x86_64__
1077 case SVM_REG_R8:
1078 reg_p = (unsigned long *)&regs->r8;
1079 break;
1080 case SVM_REG_R9:
1081 reg_p = (unsigned long *)&regs->r9;
1082 break;
1083 case SVM_REG_R10:
1084 reg_p = (unsigned long *)&regs->r10;
1085 break;
1086 case SVM_REG_R11:
1087 reg_p = (unsigned long *)&regs->r11;
1088 break;
1089 case SVM_REG_R12:
1090 reg_p = (unsigned long *)&regs->r12;
1091 break;
1092 case SVM_REG_R13:
1093 reg_p = (unsigned long *)&regs->r13;
1094 break;
1095 case SVM_REG_R14:
1096 reg_p = (unsigned long *)&regs->r14;
1097 break;
1098 case SVM_REG_R15:
1099 reg_p = (unsigned long *)&regs->r15;
1100 break;
1101 #endif
1102 default:
1103 BUG();
1106 return reg_p;
1110 static unsigned long get_reg(
1111 unsigned int gpreg, struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1113 unsigned long *gp;
1114 gp = get_reg_p(gpreg, regs, vmcb);
1115 return *gp;
1119 static void set_reg(
1120 unsigned int gpreg, unsigned long value,
1121 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1123 unsigned long *gp;
1124 gp = get_reg_p(gpreg, regs, vmcb);
1125 *gp = value;
1129 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
1131 HVMTRACE_0D(DR_WRITE, v);
1132 __restore_debug_registers(v);
1136 static void svm_get_prefix_info(struct vcpu *v, unsigned int dir,
1137 svm_segment_register_t **seg,
1138 unsigned int *asize)
1140 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1141 unsigned char inst[MAX_INST_LEN];
1142 int i;
1144 memset(inst, 0, MAX_INST_LEN);
1145 if (inst_copy_from_guest(inst, svm_rip2pointer(v), sizeof(inst))
1146 != MAX_INST_LEN)
1148 gdprintk(XENLOG_ERR, "get guest instruction failed\n");
1149 domain_crash(current->domain);
1150 return;
1153 for (i = 0; i < MAX_INST_LEN; i++)
1155 switch (inst[i])
1157 case 0xf3: /* REPZ */
1158 case 0xf2: /* REPNZ */
1159 case 0xf0: /* LOCK */
1160 case 0x66: /* data32 */
1161 #ifdef __x86_64__
1162 /* REX prefixes */
1163 case 0x40:
1164 case 0x41:
1165 case 0x42:
1166 case 0x43:
1167 case 0x44:
1168 case 0x45:
1169 case 0x46:
1170 case 0x47:
1172 case 0x48:
1173 case 0x49:
1174 case 0x4a:
1175 case 0x4b:
1176 case 0x4c:
1177 case 0x4d:
1178 case 0x4e:
1179 case 0x4f:
1180 #endif
1181 continue;
1182 case 0x67: /* addr32 */
1183 *asize ^= 48; /* Switch 16/32 bits */
1184 continue;
1185 case 0x2e: /* CS */
1186 *seg = &vmcb->cs;
1187 continue;
1188 case 0x36: /* SS */
1189 *seg = &vmcb->ss;
1190 continue;
1191 case 0x26: /* ES */
1192 *seg = &vmcb->es;
1193 continue;
1194 case 0x64: /* FS */
1195 svm_sync_vmcb(v);
1196 *seg = &vmcb->fs;
1197 continue;
1198 case 0x65: /* GS */
1199 svm_sync_vmcb(v);
1200 *seg = &vmcb->gs;
1201 continue;
1202 case 0x3e: /* DS */
1203 *seg = &vmcb->ds;
1204 continue;
1205 default:
1206 break;
1208 return;
1213 /* Get the address of INS/OUTS instruction */
1214 static int svm_get_io_address(
1215 struct vcpu *v, struct cpu_user_regs *regs,
1216 unsigned int size, ioio_info_t info,
1217 unsigned long *count, unsigned long *addr)
1219 unsigned long reg;
1220 unsigned int asize, isize;
1221 int long_mode = 0;
1222 svm_segment_register_t *seg = NULL;
1223 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1225 /* If we're in long mode, don't check the segment presence & limit */
1226 long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
1228 /* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit.
1229 * l field combined with EFER_LMA says whether it's 16 or 64 bit.
1230 */
1231 asize = (long_mode)?64:((vmcb->cs.attr.fields.db)?32:16);
1234 /* The ins/outs instructions are single byte, so if we have got more
1235 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1236 * to figure out what it is...
1237 */
1238 isize = vmcb->exitinfo2 - regs->eip;
1240 if (info.fields.rep)
1241 isize --;
1243 if (isize > 1)
1244 svm_get_prefix_info(v, info.fields.type, &seg, &asize);
1246 if (info.fields.type == IOREQ_WRITE)
1248 reg = regs->esi;
1249 if (!seg) /* If no prefix, used DS. */
1250 seg = &vmcb->ds;
1251 if (!long_mode && (seg->attr.fields.type & 0xa) == 0x8) {
1252 svm_inject_exception(TRAP_gp_fault, 0, 0);
1253 return 0;
1256 else
1258 reg = regs->edi;
1259 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1260 if (!long_mode && (seg->attr.fields.type & 0xa) != 0x2) {
1261 svm_inject_exception(TRAP_gp_fault, 0, 0);
1262 return 0;
1266 /* If the segment isn't present, give GP fault! */
1267 if (!long_mode && !seg->attr.fields.p)
1269 svm_inject_exception(TRAP_gp_fault, 0, 0);
1270 return 0;
1273 if (asize == 16)
1275 *addr = (reg & 0xFFFF);
1276 *count = regs->ecx & 0xffff;
1278 else
1280 *addr = reg;
1281 *count = regs->ecx;
1283 if (!info.fields.rep)
1284 *count = 1;
1286 if (!long_mode)
1288 ASSERT(*addr == (u32)*addr);
1289 if ((u32)(*addr + size - 1) < (u32)*addr ||
1290 (seg->attr.fields.type & 0xc) != 0x4 ?
1291 *addr + size - 1 > seg->limit :
1292 *addr <= seg->limit)
1294 svm_inject_exception(TRAP_gp_fault, 0, 0);
1295 return 0;
1298 /* Check the limit for repeated instructions, as above we checked only
1299 the first instance. Truncate the count if a limit violation would
1300 occur. Note that the checking is not necessary for page granular
1301 segments as transfers crossing page boundaries will be broken up
1302 anyway. */
1303 if (!seg->attr.fields.g && *count > 1)
1305 if ((seg->attr.fields.type & 0xc) != 0x4)
1307 /* expand-up */
1308 if (!(regs->eflags & EF_DF))
1310 if (*addr + *count * size - 1 < *addr ||
1311 *addr + *count * size - 1 > seg->limit)
1312 *count = (seg->limit + 1UL - *addr) / size;
1314 else
1316 if (*count - 1 > *addr / size)
1317 *count = *addr / size + 1;
1320 else
1322 /* expand-down */
1323 if (!(regs->eflags & EF_DF))
1325 if (*count - 1 > -(s32)*addr / size)
1326 *count = -(s32)*addr / size + 1UL;
1328 else
1330 if (*addr < (*count - 1) * size ||
1331 *addr - (*count - 1) * size <= seg->limit)
1332 *count = (*addr - seg->limit - 1) / size + 1;
1335 ASSERT(*count);
1338 *addr += seg->base;
1340 #ifdef __x86_64__
1341 else
1343 if (seg == &vmcb->fs || seg == &vmcb->gs)
1344 *addr += seg->base;
1346 if (!is_canonical_address(*addr) ||
1347 !is_canonical_address(*addr + size - 1))
1349 svm_inject_exception(TRAP_gp_fault, 0, 0);
1350 return 0;
1352 if (*count > (1UL << 48) / size)
1353 *count = (1UL << 48) / size;
1354 if (!(regs->eflags & EF_DF))
1356 if (*addr + *count * size - 1 < *addr ||
1357 !is_canonical_address(*addr + *count * size - 1))
1358 *count = (*addr & ~((1UL << 48) - 1)) / size;
1360 else
1362 if ((*count - 1) * size > *addr ||
1363 !is_canonical_address(*addr + (*count - 1) * size))
1364 *count = (*addr & ~((1UL << 48) - 1)) / size + 1;
1366 ASSERT(*count);
1368 #endif
1370 return 1;
1374 static void svm_io_instruction(struct vcpu *v)
1376 struct cpu_user_regs *regs;
1377 struct hvm_io_op *pio_opp;
1378 unsigned int port;
1379 unsigned int size, dir, df;
1380 ioio_info_t info;
1381 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1383 pio_opp = &current->arch.hvm_vcpu.io_op;
1384 pio_opp->instr = INSTR_PIO;
1385 pio_opp->flags = 0;
1387 regs = &pio_opp->io_context;
1389 /* Copy current guest state into io instruction state structure. */
1390 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1392 info.bytes = vmcb->exitinfo1;
1394 port = info.fields.port; /* port used to be addr */
1395 dir = info.fields.type; /* direction */
1396 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1398 if (info.fields.sz32)
1399 size = 4;
1400 else if (info.fields.sz16)
1401 size = 2;
1402 else
1403 size = 1;
1405 if (dir==IOREQ_READ)
1406 HVMTRACE_2D(IO_READ, v, port, size);
1407 else
1408 HVMTRACE_3D(IO_WRITE, v, port, size, regs->eax);
1410 HVM_DBG_LOG(DBG_LEVEL_IO,
1411 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1412 "exit_qualification = %"PRIx64,
1413 port, vmcb->cs.sel, (uint64_t)regs->eip, info.bytes);
1415 /* string instruction */
1416 if (info.fields.str)
1418 unsigned long addr, count;
1419 paddr_t paddr;
1420 unsigned long gfn;
1421 uint32_t pfec;
1422 int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
1424 if (!svm_get_io_address(v, regs, size, info, &count, &addr))
1426 /* We failed to get a valid address, so don't do the IO operation -
1427 * it would just get worse if we do! Hopefully the guest is handing
1428 * gp-faults...
1429 */
1430 return;
1433 /* "rep" prefix */
1434 if (info.fields.rep)
1436 pio_opp->flags |= REPZ;
1439 /* Translate the address to a physical address */
1440 pfec = PFEC_page_present;
1441 if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
1442 pfec |= PFEC_write_access;
1443 if ( ring_3(regs) )
1444 pfec |= PFEC_user_mode;
1445 gfn = paging_gva_to_gfn(v, addr, &pfec);
1446 if ( gfn == INVALID_GFN )
1448 /* The guest does not have the RAM address mapped.
1449 * Need to send in a page fault */
1450 svm_inject_exception(TRAP_page_fault, pfec, addr);
1451 return;
1453 paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
1455 /*
1456 * Handle string pio instructions that cross pages or that
1457 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1458 */
1459 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1461 unsigned long value = 0;
1463 pio_opp->flags |= OVERLAP;
1464 pio_opp->addr = addr;
1466 if (dir == IOREQ_WRITE) /* OUTS */
1468 if ( hvm_paging_enabled(current) )
1470 int rv = hvm_copy_from_guest_virt(&value, addr, size);
1471 if ( rv != 0 )
1473 /* Failed on the page-spanning copy. Inject PF into
1474 * the guest for the address where we failed. */
1475 addr += size - rv;
1476 gdprintk(XENLOG_DEBUG, "Pagefault reading non-io side "
1477 "of a page-spanning PIO: va=%#lx\n", addr);
1478 svm_inject_exception(TRAP_page_fault, 0, addr);
1479 return;
1482 else
1483 (void) hvm_copy_from_guest_phys(&value, addr, size);
1484 } else /* dir != IOREQ_WRITE */
1485 /* Remember where to write the result, as a *VA*.
1486 * Must be a VA so we can handle the page overlap
1487 * correctly in hvm_pio_assist() */
1488 pio_opp->addr = addr;
1490 if (count == 1)
1491 regs->eip = vmcb->exitinfo2;
1493 send_pio_req(port, 1, size, value, dir, df, 0);
1495 else
1497 unsigned long last_addr = sign > 0 ? addr + count * size - 1
1498 : addr - (count - 1) * size;
1500 if ((addr & PAGE_MASK) != (last_addr & PAGE_MASK))
1502 if (sign > 0)
1503 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1504 else
1505 count = (addr & ~PAGE_MASK) / size + 1;
1507 else
1508 regs->eip = vmcb->exitinfo2;
1510 send_pio_req(port, count, size, paddr, dir, df, 1);
1513 else
1515 /*
1516 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1517 * ExitInfo2
1518 */
1519 regs->eip = vmcb->exitinfo2;
1521 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1522 hvm_print_line(v, regs->eax); /* guest debug output */
1524 send_pio_req(port, 1, size, regs->eax, dir, df, 0);
1528 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1530 unsigned long value = 0;
1531 struct vcpu *v = current;
1532 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1534 switch ( cr )
1536 case 0:
1537 value = v->arch.hvm_vcpu.guest_cr[0];
1538 break;
1539 case 3:
1540 value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3];
1541 break;
1542 case 4:
1543 value = (unsigned long)v->arch.hvm_vcpu.guest_cr[4];
1544 break;
1545 default:
1546 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1547 domain_crash(v->domain);
1548 return;
1551 HVMTRACE_2D(CR_READ, v, cr, value);
1553 set_reg(gp, value, regs, vmcb);
1555 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx", cr, value);
1558 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1560 unsigned long value;
1561 struct vcpu *v = current;
1562 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1564 value = get_reg(gpreg, regs, vmcb);
1566 HVMTRACE_2D(CR_WRITE, v, cr, value);
1568 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, current = %p",
1569 cr, value, v);
1571 switch ( cr )
1573 case 0:
1574 return hvm_set_cr0(value);
1575 case 3:
1576 return hvm_set_cr3(value);
1577 case 4:
1578 return hvm_set_cr4(value);
1579 default:
1580 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1581 domain_crash(v->domain);
1582 return 0;
1585 return 1;
1588 static void svm_cr_access(
1589 struct vcpu *v, unsigned int cr, unsigned int type,
1590 struct cpu_user_regs *regs)
1592 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1593 int inst_len = 0;
1594 int index,addr_size,i;
1595 unsigned int gpreg,offset;
1596 unsigned long value,addr;
1597 u8 buffer[MAX_INST_LEN];
1598 u8 prefix = 0;
1599 u8 modrm;
1600 enum x86_segment seg;
1601 int result = 1;
1602 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1603 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1604 enum instruction_index match;
1606 inst_copy_from_guest(buffer, svm_rip2pointer(v), sizeof(buffer));
1608 /* get index to first actual instruction byte - as we will need to know
1609 where the prefix lives later on */
1610 index = skip_prefix_bytes(buffer, sizeof(buffer));
1612 if ( type == TYPE_MOV_TO_CR )
1614 inst_len = __get_instruction_length_from_list(
1615 v, list_a, ARRAY_SIZE(list_a), &buffer[index], &match);
1617 else /* type == TYPE_MOV_FROM_CR */
1619 inst_len = __get_instruction_length_from_list(
1620 v, list_b, ARRAY_SIZE(list_b), &buffer[index], &match);
1623 inst_len += index;
1625 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1626 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1627 prefix = buffer[index-1];
1629 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long)regs->eip);
1631 switch ( match )
1634 case INSTR_MOV2CR:
1635 gpreg = decode_src_reg(prefix, buffer[index+2]);
1636 result = mov_to_cr(gpreg, cr, regs);
1637 break;
1639 case INSTR_MOVCR2:
1640 gpreg = decode_src_reg(prefix, buffer[index+2]);
1641 mov_from_cr(cr, gpreg, regs);
1642 break;
1644 case INSTR_CLTS:
1645 /* TS being cleared means that it's time to restore fpu state. */
1646 setup_fpu(current);
1647 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
1648 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1649 v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS; /* clear TS */
1650 HVMTRACE_0D(CLTS, current);
1651 break;
1653 case INSTR_LMSW:
1654 gpreg = decode_src_reg(prefix, buffer[index+2]);
1655 value = get_reg(gpreg, regs, vmcb) & 0xF;
1656 value = (v->arch.hvm_vcpu.guest_cr[0] & ~0xF) | value;
1657 result = hvm_set_cr0(value);
1658 HVMTRACE_1D(LMSW, current, value);
1659 break;
1661 case INSTR_SMSW:
1662 value = v->arch.hvm_vcpu.guest_cr[0] & 0xFFFF;
1663 modrm = buffer[index+2];
1664 addr_size = svm_guest_x86_mode(v);
1665 if ( addr_size < 2 )
1666 addr_size = 2;
1667 if ( likely((modrm & 0xC0) >> 6 == 3) )
1669 gpreg = decode_src_reg(prefix, modrm);
1670 set_reg(gpreg, value, regs, vmcb);
1672 /*
1673 * For now, only implement decode of the offset mode, since that's the
1674 * only mode observed in a real-world OS. This code is also making the
1675 * assumption that we'll never hit this code in long mode.
1676 */
1677 else if ( (modrm == 0x26) || (modrm == 0x25) )
1679 seg = x86_seg_ds;
1680 i = index;
1681 /* Segment or address size overrides? */
1682 while ( i-- )
1684 switch ( buffer[i] )
1686 case 0x26: seg = x86_seg_es; break;
1687 case 0x2e: seg = x86_seg_cs; break;
1688 case 0x36: seg = x86_seg_ss; break;
1689 case 0x64: seg = x86_seg_fs; break;
1690 case 0x65: seg = x86_seg_gs; break;
1691 case 0x67: addr_size ^= 6; break;
1694 /* Bail unless this really is a seg_base + offset case */
1695 if ( ((modrm == 0x26) && (addr_size == 4)) ||
1696 ((modrm == 0x25) && (addr_size == 2)) )
1698 gdprintk(XENLOG_ERR, "SMSW emulation at guest address: "
1699 "%lx failed due to unhandled addressing mode."
1700 "ModRM byte was: %x \n", svm_rip2pointer(v), modrm);
1701 domain_crash(v->domain);
1703 inst_len += addr_size;
1704 offset = *(( unsigned int *) ( void *) &buffer[index + 3]);
1705 offset = ( addr_size == 4 ) ? offset : ( offset & 0xFFFF );
1706 addr = hvm_get_segment_base(v, seg);
1707 addr += offset;
1708 hvm_copy_to_guest_virt(addr,&value,2);
1710 else
1712 gdprintk(XENLOG_ERR, "SMSW emulation at guest address: %lx "
1713 "failed due to unhandled addressing mode!"
1714 "ModRM byte was: %x \n", svm_rip2pointer(v), modrm);
1715 domain_crash(v->domain);
1717 break;
1719 default:
1720 BUG();
1723 if ( result )
1724 __update_guest_eip(regs, inst_len);
1727 static void svm_do_msr_access(
1728 struct vcpu *v, struct cpu_user_regs *regs)
1730 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1731 int inst_len;
1732 u64 msr_content=0;
1733 u32 ecx = regs->ecx, eax, edx;
1735 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x, exitinfo = %lx",
1736 ecx, (u32)regs->eax, (u32)regs->edx,
1737 (unsigned long)vmcb->exitinfo1);
1739 /* is it a read? */
1740 if (vmcb->exitinfo1 == 0)
1742 switch (ecx) {
1743 case MSR_IA32_TSC:
1744 msr_content = hvm_get_guest_time(v);
1745 break;
1747 case MSR_IA32_APICBASE:
1748 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
1749 break;
1751 case MSR_EFER:
1752 msr_content = v->arch.hvm_vcpu.guest_efer;
1753 break;
1755 case MSR_IA32_MC4_MISC: /* Threshold register */
1756 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
1757 /*
1758 * MCA/MCE: We report that the threshold register is unavailable
1759 * for OS use (locked by the BIOS).
1760 */
1761 msr_content = 1ULL << 61; /* MC4_MISC.Locked */
1762 break;
1764 case MSR_IA32_EBC_FREQUENCY_ID:
1765 /*
1766 * This Intel-only register may be accessed if this HVM guest
1767 * has been migrated from an Intel host. The value zero is not
1768 * particularly meaningful, but at least avoids the guest crashing!
1769 */
1770 msr_content = 0;
1771 break;
1773 case MSR_K8_VM_HSAVE_PA:
1774 svm_inject_exception(TRAP_gp_fault, 0, 0);
1775 break;
1777 case MSR_IA32_MCG_CAP:
1778 case MSR_IA32_MCG_STATUS:
1779 case MSR_IA32_MC0_STATUS:
1780 case MSR_IA32_MC1_STATUS:
1781 case MSR_IA32_MC2_STATUS:
1782 case MSR_IA32_MC3_STATUS:
1783 case MSR_IA32_MC4_STATUS:
1784 case MSR_IA32_MC5_STATUS:
1785 /* No point in letting the guest see real MCEs */
1786 msr_content = 0;
1787 break;
1789 case MSR_IA32_DEBUGCTLMSR:
1790 msr_content = vmcb->debugctlmsr;
1791 break;
1793 case MSR_IA32_LASTBRANCHFROMIP:
1794 msr_content = vmcb->lastbranchfromip;
1795 break;
1797 case MSR_IA32_LASTBRANCHTOIP:
1798 msr_content = vmcb->lastbranchtoip;
1799 break;
1801 case MSR_IA32_LASTINTFROMIP:
1802 msr_content = vmcb->lastintfromip;
1803 break;
1805 case MSR_IA32_LASTINTTOIP:
1806 msr_content = vmcb->lastinttoip;
1807 break;
1809 default:
1810 if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
1811 rdmsr_safe(ecx, eax, edx) == 0 )
1813 regs->eax = eax;
1814 regs->edx = edx;
1815 goto done;
1817 svm_inject_exception(TRAP_gp_fault, 0, 0);
1818 return;
1820 regs->eax = msr_content & 0xFFFFFFFF;
1821 regs->edx = msr_content >> 32;
1823 done:
1824 hvmtrace_msr_read(v, ecx, msr_content);
1825 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
1826 ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
1828 inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL);
1830 else
1832 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
1834 hvmtrace_msr_write(v, ecx, msr_content);
1836 switch (ecx)
1838 case MSR_IA32_TSC:
1839 hvm_set_guest_time(v, msr_content);
1840 pt_reset(v);
1841 break;
1843 case MSR_IA32_APICBASE:
1844 vlapic_msr_set(vcpu_vlapic(v), msr_content);
1845 break;
1847 case MSR_K8_VM_HSAVE_PA:
1848 svm_inject_exception(TRAP_gp_fault, 0, 0);
1849 break;
1851 case MSR_IA32_DEBUGCTLMSR:
1852 vmcb->debugctlmsr = msr_content;
1853 if ( !msr_content || !cpu_has_svm_lbrv )
1854 break;
1855 vmcb->lbr_control.fields.enable = 1;
1856 svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR);
1857 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP);
1858 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP);
1859 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP);
1860 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP);
1861 break;
1863 case MSR_IA32_LASTBRANCHFROMIP:
1864 vmcb->lastbranchfromip = msr_content;
1865 break;
1867 case MSR_IA32_LASTBRANCHTOIP:
1868 vmcb->lastbranchtoip = msr_content;
1869 break;
1871 case MSR_IA32_LASTINTFROMIP:
1872 vmcb->lastintfromip = msr_content;
1873 break;
1875 case MSR_IA32_LASTINTTOIP:
1876 vmcb->lastinttoip = msr_content;
1877 break;
1879 default:
1880 switch ( long_mode_do_msr_write(regs) )
1882 case HNDL_unhandled:
1883 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
1884 break;
1885 case HNDL_exception_raised:
1886 return;
1887 case HNDL_done:
1888 break;
1890 break;
1893 inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
1896 __update_guest_eip(regs, inst_len);
1899 static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb,
1900 struct cpu_user_regs *regs)
1902 struct vcpu *curr = current;
1903 struct hvm_intack intack = hvm_vcpu_has_pending_irq(curr);
1904 unsigned int inst_len;
1906 inst_len = __get_instruction_length(curr, INSTR_HLT, NULL);
1907 __update_guest_eip(regs, inst_len);
1909 /* Check for pending exception or new interrupt. */
1910 if ( vmcb->eventinj.fields.v ||
1911 ((intack.source != hvm_intsrc_none) &&
1912 !svm_interrupt_blocked(current, intack)) )
1914 HVMTRACE_1D(HLT, curr, /*int pending=*/ 1);
1915 return;
1918 HVMTRACE_1D(HLT, curr, /*int pending=*/ 0);
1919 hvm_hlt(regs->eflags);
1922 static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs)
1924 enum instruction_index list[] = { INSTR_INVD, INSTR_WBINVD };
1925 struct vcpu *curr = current;
1926 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
1927 int inst_len;
1929 if ( !list_empty(&(domain_hvm_iommu(curr->domain)->pdev_list)) )
1931 vmcb->general2_intercepts &= ~GENERAL2_INTERCEPT_WBINVD;
1932 wbinvd();
1935 inst_len = __get_instruction_length_from_list(
1936 curr, list, ARRAY_SIZE(list), NULL, NULL);
1937 __update_guest_eip(regs, inst_len);
1940 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
1942 struct vcpu *v = current;
1943 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
1944 unsigned long g_vaddr;
1945 int inst_len;
1947 /*
1948 * Unknown how many bytes the invlpg instruction will take. Use the
1949 * maximum instruction length here
1950 */
1951 if ( inst_copy_from_guest(opcode, svm_rip2pointer(v), length) < length )
1953 gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length);
1954 goto crash;
1957 if ( invlpga )
1959 inst_len = __get_instruction_length(v, INSTR_INVLPGA, opcode);
1960 __update_guest_eip(regs, inst_len);
1962 /*
1963 * The address is implicit on this instruction. At the moment, we don't
1964 * use ecx (ASID) to identify individual guests pages
1965 */
1966 g_vaddr = regs->eax;
1968 else
1970 /* What about multiple prefix codes? */
1971 prefix = (is_prefix(opcode[0]) ? opcode[0] : 0);
1972 inst_len = __get_instruction_length(v, INSTR_INVLPG, opcode);
1973 if ( inst_len <= 0 )
1975 gdprintk(XENLOG_ERR, "Error getting invlpg instr len\n");
1976 goto crash;
1979 inst_len--;
1980 length -= inst_len;
1982 /*
1983 * Decode memory operand of the instruction including ModRM, SIB, and
1984 * displacement to get effective address and length in bytes. Assume
1985 * the system in either 32- or 64-bit mode.
1986 */
1987 g_vaddr = get_effective_addr_modrm64(regs, prefix, inst_len,
1988 &opcode[inst_len], &length);
1990 inst_len += length;
1991 __update_guest_eip(regs, inst_len);
1994 HVMTRACE_3D(INVLPG, v, !!invlpga, g_vaddr, (invlpga ? regs->ecx : 0));
1996 paging_invlpg(v, g_vaddr);
1997 svm_asid_g_invlpg(v, g_vaddr);
1998 return;
2000 crash:
2001 domain_crash(v->domain);
2004 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
2006 unsigned int exit_reason;
2007 struct vcpu *v = current;
2008 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2009 eventinj_t eventinj;
2010 int inst_len, rc;
2012 /*
2013 * Before doing anything else, we need to sync up the VLAPIC's TPR with
2014 * SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
2015 * because we update the vTPR on MMIO writes to the TPR.
2016 */
2017 vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
2018 (vmcb->vintr.fields.tpr & 0x0F) << 4);
2020 exit_reason = vmcb->exitcode;
2022 hvmtrace_vmexit(v, regs->eip, exit_reason);
2024 if ( unlikely(exit_reason == VMEXIT_INVALID) )
2026 svm_dump_vmcb(__func__, vmcb);
2027 goto exit_and_crash;
2030 perfc_incra(svmexits, exit_reason);
2032 /* Event delivery caused this intercept? Queue for redelivery. */
2033 eventinj = vmcb->exitintinfo;
2034 if ( unlikely(eventinj.fields.v) &&
2035 hvm_event_needs_reinjection(eventinj.fields.type,
2036 eventinj.fields.vector) )
2037 vmcb->eventinj = eventinj;
2039 switch ( exit_reason )
2041 case VMEXIT_INTR:
2042 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2043 HVMTRACE_0D(INTR, v);
2044 break;
2046 case VMEXIT_NMI:
2047 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2048 HVMTRACE_0D(NMI, v);
2049 break;
2051 case VMEXIT_SMI:
2052 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2053 HVMTRACE_0D(SMI, v);
2054 break;
2056 case VMEXIT_EXCEPTION_DB:
2057 if ( !v->domain->debugger_attached )
2058 goto exit_and_crash;
2059 domain_pause_for_debugger();
2060 break;
2062 case VMEXIT_EXCEPTION_BP:
2063 if ( !v->domain->debugger_attached )
2064 goto exit_and_crash;
2065 /* AMD Vol2, 15.11: INT3, INTO, BOUND intercepts do not update RIP. */
2066 inst_len = __get_instruction_length(v, INSTR_INT3, NULL);
2067 __update_guest_eip(regs, inst_len);
2068 domain_pause_for_debugger();
2069 break;
2071 case VMEXIT_EXCEPTION_NM:
2072 svm_do_no_device_fault(vmcb);
2073 break;
2075 case VMEXIT_EXCEPTION_PF: {
2076 unsigned long va;
2077 va = vmcb->exitinfo2;
2078 regs->error_code = vmcb->exitinfo1;
2079 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2080 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2081 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2082 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2083 (unsigned long)regs->esi, (unsigned long)regs->edi);
2085 if ( paging_fault(va, regs) )
2087 HVMTRACE_2D(PF_XEN, v, va, regs->error_code);
2088 break;
2091 svm_inject_exception(TRAP_page_fault, regs->error_code, va);
2092 break;
2095 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2096 case VMEXIT_EXCEPTION_MC:
2097 HVMTRACE_0D(MCE, v);
2098 break;
2100 case VMEXIT_VINTR:
2101 vmcb->vintr.fields.irq = 0;
2102 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
2103 break;
2105 case VMEXIT_INVD:
2106 case VMEXIT_WBINVD:
2107 svm_vmexit_do_invalidate_cache(regs);
2108 break;
2110 case VMEXIT_TASK_SWITCH: {
2111 enum hvm_task_switch_reason reason;
2112 int32_t errcode = -1;
2113 if ( (vmcb->exitinfo2 >> 36) & 1 )
2114 reason = TSW_iret;
2115 else if ( (vmcb->exitinfo2 >> 38) & 1 )
2116 reason = TSW_jmp;
2117 else
2118 reason = TSW_call_or_int;
2119 if ( (vmcb->exitinfo2 >> 44) & 1 )
2120 errcode = (uint32_t)vmcb->exitinfo2;
2121 hvm_task_switch((uint16_t)vmcb->exitinfo1, reason, errcode);
2122 break;
2125 case VMEXIT_CPUID:
2126 svm_vmexit_do_cpuid(vmcb, regs);
2127 break;
2129 case VMEXIT_HLT:
2130 svm_vmexit_do_hlt(vmcb, regs);
2131 break;
2133 case VMEXIT_INVLPG:
2134 svm_handle_invlpg(0, regs);
2135 break;
2137 case VMEXIT_INVLPGA:
2138 svm_handle_invlpg(1, regs);
2139 break;
2141 case VMEXIT_VMMCALL:
2142 inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL);
2143 HVMTRACE_1D(VMMCALL, v, regs->eax);
2144 rc = hvm_do_hypercall(regs);
2145 if ( rc != HVM_HCALL_preempted )
2147 __update_guest_eip(regs, inst_len);
2148 if ( rc == HVM_HCALL_invalidate )
2149 send_invalidate_req();
2151 break;
2153 case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
2154 svm_cr_access(v, exit_reason - VMEXIT_CR0_READ,
2155 TYPE_MOV_FROM_CR, regs);
2156 break;
2158 case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
2159 svm_cr_access(v, exit_reason - VMEXIT_CR0_WRITE,
2160 TYPE_MOV_TO_CR, regs);
2161 break;
2163 case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
2164 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
2165 svm_dr_access(v, regs);
2166 break;
2168 case VMEXIT_IOIO:
2169 svm_io_instruction(v);
2170 break;
2172 case VMEXIT_MSR:
2173 svm_do_msr_access(v, regs);
2174 break;
2176 case VMEXIT_SHUTDOWN:
2177 hvm_triple_fault();
2178 break;
2180 case VMEXIT_RDTSCP:
2181 case VMEXIT_MONITOR:
2182 case VMEXIT_MWAIT:
2183 case VMEXIT_VMRUN:
2184 case VMEXIT_VMLOAD:
2185 case VMEXIT_VMSAVE:
2186 case VMEXIT_STGI:
2187 case VMEXIT_CLGI:
2188 case VMEXIT_SKINIT:
2189 svm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
2190 break;
2192 case VMEXIT_NPF:
2193 perfc_incra(svmexits, VMEXIT_NPF_PERFC);
2194 regs->error_code = vmcb->exitinfo1;
2195 svm_do_nested_pgfault(vmcb->exitinfo2, regs);
2196 break;
2198 default:
2199 exit_and_crash:
2200 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
2201 "exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
2202 exit_reason,
2203 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
2204 domain_crash(v->domain);
2205 break;
2208 /* The exit may have updated the TPR: reflect this in the hardware vtpr */
2209 vmcb->vintr.fields.tpr =
2210 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
2213 asmlinkage void svm_trace_vmentry(void)
2215 struct vcpu *v = current;
2217 /* This is the last C code before the VMRUN instruction. */
2218 hvmtrace_vmentry(v);
2221 /*
2222 * Local variables:
2223 * mode: C
2224 * c-set-style: "BSD"
2225 * c-basic-offset: 4
2226 * tab-width: 4
2227 * indent-tabs-mode: nil
2228 * End:
2229 */