ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 16427:fd3f6d814f6d

x86: single step after instruction emulation

Inject single step trap after emulating instructions if guest's
EFLAGS.TF is set.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir.fraser@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Nov 22 18:28:47 2007 +0000 (2007-11-22)
parents 5e85709e998b
children 69b56d3289f5
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/hypercall.h>
28 #include <xen/domain_page.h>
29 #include <asm/current.h>
30 #include <asm/io.h>
31 #include <asm/paging.h>
32 #include <asm/p2m.h>
33 #include <asm/regs.h>
34 #include <asm/cpufeature.h>
35 #include <asm/processor.h>
36 #include <asm/types.h>
37 #include <asm/msr.h>
38 #include <asm/spinlock.h>
39 #include <asm/hvm/hvm.h>
40 #include <asm/hvm/support.h>
41 #include <asm/hvm/io.h>
42 #include <asm/hvm/svm/asid.h>
43 #include <asm/hvm/svm/svm.h>
44 #include <asm/hvm/svm/vmcb.h>
45 #include <asm/hvm/svm/emulate.h>
46 #include <asm/hvm/svm/intr.h>
47 #include <asm/x86_emulate.h>
48 #include <public/sched.h>
49 #include <asm/hvm/vpt.h>
50 #include <asm/hvm/trace.h>
51 #include <asm/hap.h>
53 u32 svm_feature_flags;
55 #define set_segment_register(name, value) \
56 asm volatile ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
58 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
60 int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
61 int inst_len);
62 asmlinkage void do_IRQ(struct cpu_user_regs *);
64 static int svm_reset_to_realmode(
65 struct vcpu *v, struct cpu_user_regs *regs);
66 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);
67 static void svm_update_guest_efer(struct vcpu *v);
68 static void svm_inject_exception(
69 unsigned int trapnr, int errcode, unsigned long cr2);
71 /* va of hardware host save area */
72 static void *hsa[NR_CPUS] __read_mostly;
74 /* vmcb used for extended host state */
75 static void *root_vmcb[NR_CPUS] __read_mostly;
77 static void inline __update_guest_eip(
78 struct cpu_user_regs *regs, unsigned int inst_len)
79 {
80 struct vcpu *curr = current;
82 if ( unlikely((inst_len == 0) || (inst_len > 15)) )
83 {
84 gdprintk(XENLOG_ERR, "Bad instruction length %u\n", inst_len);
85 domain_crash(curr->domain);
86 return;
87 }
89 ASSERT(regs == guest_cpu_user_regs());
91 regs->eip += inst_len;
92 regs->eflags &= ~X86_EFLAGS_RF;
94 curr->arch.hvm_svm.vmcb->interrupt_shadow = 0;
96 if ( regs->eflags & X86_EFLAGS_TF )
97 svm_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);
98 }
100 static void svm_cpu_down(void)
101 {
102 write_efer(read_efer() & ~EFER_SVME);
103 }
105 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
106 {
107 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
108 u32 ecx = regs->ecx;
110 HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64,
111 ecx, msr_content);
113 switch ( ecx )
114 {
115 case MSR_EFER:
116 if ( !hvm_set_efer(msr_content) )
117 return HNDL_exception_raised;
118 break;
120 case MSR_IA32_MC4_MISC: /* Threshold register */
121 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
122 /*
123 * MCA/MCE: Threshold register is reported to be locked, so we ignore
124 * all write accesses. This behaviour matches real HW, so guests should
125 * have no problem with this.
126 */
127 break;
129 default:
130 return HNDL_unhandled;
131 }
133 return HNDL_done;
134 }
136 static void svm_save_dr(struct vcpu *v)
137 {
138 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
140 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
141 return;
143 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
144 v->arch.hvm_vcpu.flag_dr_dirty = 0;
145 v->arch.hvm_svm.vmcb->dr_intercepts = ~0u;
147 v->arch.guest_context.debugreg[0] = read_debugreg(0);
148 v->arch.guest_context.debugreg[1] = read_debugreg(1);
149 v->arch.guest_context.debugreg[2] = read_debugreg(2);
150 v->arch.guest_context.debugreg[3] = read_debugreg(3);
151 v->arch.guest_context.debugreg[6] = vmcb->dr6;
152 v->arch.guest_context.debugreg[7] = vmcb->dr7;
153 }
155 static void __restore_debug_registers(struct vcpu *v)
156 {
157 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
159 if ( v->arch.hvm_vcpu.flag_dr_dirty )
160 return;
162 v->arch.hvm_vcpu.flag_dr_dirty = 1;
163 vmcb->dr_intercepts = 0;
165 write_debugreg(0, v->arch.guest_context.debugreg[0]);
166 write_debugreg(1, v->arch.guest_context.debugreg[1]);
167 write_debugreg(2, v->arch.guest_context.debugreg[2]);
168 write_debugreg(3, v->arch.guest_context.debugreg[3]);
169 vmcb->dr6 = v->arch.guest_context.debugreg[6];
170 vmcb->dr7 = v->arch.guest_context.debugreg[7];
171 }
173 /*
174 * DR7 is saved and restored on every vmexit. Other debug registers only
175 * need to be restored if their value is going to affect execution -- i.e.,
176 * if one of the breakpoints is enabled. So mask out all bits that don't
177 * enable some breakpoint functionality.
178 */
179 #define DR7_ACTIVE_MASK 0xff
181 static void svm_restore_dr(struct vcpu *v)
182 {
183 if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
184 __restore_debug_registers(v);
185 }
187 int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c)
188 {
189 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
191 c->cr0 = v->arch.hvm_vcpu.guest_cr[0];
192 c->cr2 = v->arch.hvm_vcpu.guest_cr[2];
193 c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
194 c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
196 c->idtr_limit = vmcb->idtr.limit;
197 c->idtr_base = vmcb->idtr.base;
199 c->gdtr_limit = vmcb->gdtr.limit;
200 c->gdtr_base = vmcb->gdtr.base;
202 c->cs_sel = vmcb->cs.sel;
203 c->cs_limit = vmcb->cs.limit;
204 c->cs_base = vmcb->cs.base;
205 c->cs_arbytes = vmcb->cs.attr.bytes;
207 c->ds_sel = vmcb->ds.sel;
208 c->ds_limit = vmcb->ds.limit;
209 c->ds_base = vmcb->ds.base;
210 c->ds_arbytes = vmcb->ds.attr.bytes;
212 c->es_sel = vmcb->es.sel;
213 c->es_limit = vmcb->es.limit;
214 c->es_base = vmcb->es.base;
215 c->es_arbytes = vmcb->es.attr.bytes;
217 c->ss_sel = vmcb->ss.sel;
218 c->ss_limit = vmcb->ss.limit;
219 c->ss_base = vmcb->ss.base;
220 c->ss_arbytes = vmcb->ss.attr.bytes;
222 c->fs_sel = vmcb->fs.sel;
223 c->fs_limit = vmcb->fs.limit;
224 c->fs_base = vmcb->fs.base;
225 c->fs_arbytes = vmcb->fs.attr.bytes;
227 c->gs_sel = vmcb->gs.sel;
228 c->gs_limit = vmcb->gs.limit;
229 c->gs_base = vmcb->gs.base;
230 c->gs_arbytes = vmcb->gs.attr.bytes;
232 c->tr_sel = vmcb->tr.sel;
233 c->tr_limit = vmcb->tr.limit;
234 c->tr_base = vmcb->tr.base;
235 c->tr_arbytes = vmcb->tr.attr.bytes;
237 c->ldtr_sel = vmcb->ldtr.sel;
238 c->ldtr_limit = vmcb->ldtr.limit;
239 c->ldtr_base = vmcb->ldtr.base;
240 c->ldtr_arbytes = vmcb->ldtr.attr.bytes;
242 c->sysenter_cs = vmcb->sysenter_cs;
243 c->sysenter_esp = vmcb->sysenter_esp;
244 c->sysenter_eip = vmcb->sysenter_eip;
246 c->pending_event = 0;
247 c->error_code = 0;
248 if ( vmcb->eventinj.fields.v &&
249 hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
250 vmcb->eventinj.fields.vector) )
251 {
252 c->pending_event = (uint32_t)vmcb->eventinj.bytes;
253 c->error_code = vmcb->eventinj.fields.errorcode;
254 }
256 return 1;
257 }
260 int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
261 {
262 unsigned long mfn = 0;
263 p2m_type_t p2mt;
264 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
266 if ( c->pending_valid &&
267 ((c->pending_type == 1) || (c->pending_type > 6) ||
268 (c->pending_reserved != 0)) )
269 {
270 gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n",
271 c->pending_event);
272 return -EINVAL;
273 }
275 if ( !paging_mode_hap(v->domain) )
276 {
277 if ( c->cr0 & X86_CR0_PG )
278 {
279 mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
280 if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
281 {
282 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
283 c->cr3);
284 return -EINVAL;
285 }
286 }
288 if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
289 put_page(pagetable_get_page(v->arch.guest_table));
291 v->arch.guest_table = pagetable_from_pfn(mfn);
292 }
294 v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;
295 v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
296 v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
297 v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
298 svm_update_guest_cr(v, 0);
299 svm_update_guest_cr(v, 2);
300 svm_update_guest_cr(v, 4);
302 #ifdef HVM_DEBUG_SUSPEND
303 printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
304 __func__, c->cr3, c->cr0, c->cr4);
305 #endif
307 vmcb->idtr.limit = c->idtr_limit;
308 vmcb->idtr.base = c->idtr_base;
310 vmcb->gdtr.limit = c->gdtr_limit;
311 vmcb->gdtr.base = c->gdtr_base;
313 vmcb->cs.sel = c->cs_sel;
314 vmcb->cs.limit = c->cs_limit;
315 vmcb->cs.base = c->cs_base;
316 vmcb->cs.attr.bytes = c->cs_arbytes;
318 vmcb->ds.sel = c->ds_sel;
319 vmcb->ds.limit = c->ds_limit;
320 vmcb->ds.base = c->ds_base;
321 vmcb->ds.attr.bytes = c->ds_arbytes;
323 vmcb->es.sel = c->es_sel;
324 vmcb->es.limit = c->es_limit;
325 vmcb->es.base = c->es_base;
326 vmcb->es.attr.bytes = c->es_arbytes;
328 vmcb->ss.sel = c->ss_sel;
329 vmcb->ss.limit = c->ss_limit;
330 vmcb->ss.base = c->ss_base;
331 vmcb->ss.attr.bytes = c->ss_arbytes;
332 vmcb->cpl = vmcb->ss.attr.fields.dpl;
334 vmcb->fs.sel = c->fs_sel;
335 vmcb->fs.limit = c->fs_limit;
336 vmcb->fs.base = c->fs_base;
337 vmcb->fs.attr.bytes = c->fs_arbytes;
339 vmcb->gs.sel = c->gs_sel;
340 vmcb->gs.limit = c->gs_limit;
341 vmcb->gs.base = c->gs_base;
342 vmcb->gs.attr.bytes = c->gs_arbytes;
344 vmcb->tr.sel = c->tr_sel;
345 vmcb->tr.limit = c->tr_limit;
346 vmcb->tr.base = c->tr_base;
347 vmcb->tr.attr.bytes = c->tr_arbytes;
349 vmcb->ldtr.sel = c->ldtr_sel;
350 vmcb->ldtr.limit = c->ldtr_limit;
351 vmcb->ldtr.base = c->ldtr_base;
352 vmcb->ldtr.attr.bytes = c->ldtr_arbytes;
354 vmcb->sysenter_cs = c->sysenter_cs;
355 vmcb->sysenter_esp = c->sysenter_esp;
356 vmcb->sysenter_eip = c->sysenter_eip;
358 if ( paging_mode_hap(v->domain) )
359 {
360 vmcb->np_enable = 1;
361 vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
362 vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
363 }
365 if ( c->pending_valid )
366 {
367 gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
368 c->pending_event, c->error_code);
370 if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
371 {
372 vmcb->eventinj.bytes = c->pending_event;
373 vmcb->eventinj.fields.errorcode = c->error_code;
374 }
375 }
377 paging_update_paging_modes(v);
379 return 0;
380 }
383 static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
384 {
385 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
387 data->shadow_gs = vmcb->kerngsbase;
388 data->msr_lstar = vmcb->lstar;
389 data->msr_star = vmcb->star;
390 data->msr_cstar = vmcb->cstar;
391 data->msr_syscall_mask = vmcb->sfmask;
392 data->msr_efer = v->arch.hvm_vcpu.guest_efer;
393 data->msr_flags = -1ULL;
395 data->tsc = hvm_get_guest_time(v);
396 }
399 static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
400 {
401 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
403 vmcb->kerngsbase = data->shadow_gs;
404 vmcb->lstar = data->msr_lstar;
405 vmcb->star = data->msr_star;
406 vmcb->cstar = data->msr_cstar;
407 vmcb->sfmask = data->msr_syscall_mask;
408 v->arch.hvm_vcpu.guest_efer = data->msr_efer;
409 svm_update_guest_efer(v);
411 hvm_set_guest_time(v, data->tsc);
412 }
414 static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
415 {
416 svm_save_cpu_state(v, ctxt);
417 svm_vmcb_save(v, ctxt);
418 }
420 static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
421 {
422 svm_load_cpu_state(v, ctxt);
423 if (svm_vmcb_restore(v, ctxt)) {
424 printk("svm_vmcb restore failed!\n");
425 domain_crash(v->domain);
426 return -EINVAL;
427 }
429 return 0;
430 }
432 static enum hvm_intblk svm_interrupt_blocked(
433 struct vcpu *v, struct hvm_intack intack)
434 {
435 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
437 if ( vmcb->interrupt_shadow )
438 return hvm_intblk_shadow;
440 if ( intack.source == hvm_intsrc_nmi )
441 return hvm_intblk_none;
443 ASSERT((intack.source == hvm_intsrc_pic) ||
444 (intack.source == hvm_intsrc_lapic));
446 if ( !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) )
447 return hvm_intblk_rflags_ie;
449 if ( (intack.source == hvm_intsrc_lapic) &&
450 ((vmcb->vintr.fields.tpr & 0xf) >= (intack.vector >> 4)) )
451 return hvm_intblk_tpr;
453 return hvm_intblk_none;
454 }
456 static int svm_guest_x86_mode(struct vcpu *v)
457 {
458 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
460 if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) )
461 return 0;
462 if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) )
463 return 1;
464 if ( hvm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) )
465 return 8;
466 return (likely(vmcb->cs.attr.fields.db) ? 4 : 2);
467 }
469 static void svm_update_host_cr3(struct vcpu *v)
470 {
471 /* SVM doesn't have a HOST_CR3 equivalent to update. */
472 }
474 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
475 {
476 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
478 switch ( cr )
479 {
480 case 0:
481 /* TS cleared? Then initialise FPU now. */
482 if ( (v == current) && !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) &&
483 (vmcb->cr0 & X86_CR0_TS) )
484 {
485 setup_fpu(v);
486 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
487 }
489 vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0];
490 if ( !paging_mode_hap(v->domain) )
491 vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
492 break;
493 case 2:
494 vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2];
495 break;
496 case 3:
497 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];
498 svm_asid_inv_asid(v);
499 break;
500 case 4:
501 vmcb->cr4 = HVM_CR4_HOST_MASK;
502 if ( paging_mode_hap(v->domain) )
503 vmcb->cr4 &= ~X86_CR4_PAE;
504 vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4];
505 break;
506 default:
507 BUG();
508 }
509 }
511 static void svm_update_guest_efer(struct vcpu *v)
512 {
513 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
515 vmcb->efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME;
516 if ( vmcb->efer & EFER_LMA )
517 vmcb->efer |= EFER_LME;
518 }
520 static void svm_flush_guest_tlbs(void)
521 {
522 /* Roll over the CPU's ASID generation, so it gets a clean TLB when we
523 * next VMRUN. (If ASIDs are disabled, the whole TLB is flushed on
524 * VMRUN anyway). */
525 svm_asid_inc_generation();
526 }
528 static void svm_update_vtpr(struct vcpu *v, unsigned long value)
529 {
530 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
532 vmcb->vintr.fields.tpr = value & 0x0f;
533 }
535 static void svm_sync_vmcb(struct vcpu *v)
536 {
537 struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
539 if ( arch_svm->vmcb_in_sync )
540 return;
542 arch_svm->vmcb_in_sync = 1;
544 svm_vmsave(arch_svm->vmcb);
545 }
547 static unsigned long svm_get_segment_base(struct vcpu *v, enum x86_segment seg)
548 {
549 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
550 int long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
552 switch ( seg )
553 {
554 case x86_seg_cs: return long_mode ? 0 : vmcb->cs.base;
555 case x86_seg_ds: return long_mode ? 0 : vmcb->ds.base;
556 case x86_seg_es: return long_mode ? 0 : vmcb->es.base;
557 case x86_seg_fs: svm_sync_vmcb(v); return vmcb->fs.base;
558 case x86_seg_gs: svm_sync_vmcb(v); return vmcb->gs.base;
559 case x86_seg_ss: return long_mode ? 0 : vmcb->ss.base;
560 case x86_seg_tr: svm_sync_vmcb(v); return vmcb->tr.base;
561 case x86_seg_gdtr: return vmcb->gdtr.base;
562 case x86_seg_idtr: return vmcb->idtr.base;
563 case x86_seg_ldtr: svm_sync_vmcb(v); return vmcb->ldtr.base;
564 }
565 BUG();
566 return 0;
567 }
569 static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
570 struct segment_register *reg)
571 {
572 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
574 ASSERT(v == current);
576 switch ( seg )
577 {
578 case x86_seg_cs:
579 memcpy(reg, &vmcb->cs, sizeof(*reg));
580 break;
581 case x86_seg_ds:
582 memcpy(reg, &vmcb->ds, sizeof(*reg));
583 break;
584 case x86_seg_es:
585 memcpy(reg, &vmcb->es, sizeof(*reg));
586 break;
587 case x86_seg_fs:
588 svm_sync_vmcb(v);
589 memcpy(reg, &vmcb->fs, sizeof(*reg));
590 break;
591 case x86_seg_gs:
592 svm_sync_vmcb(v);
593 memcpy(reg, &vmcb->gs, sizeof(*reg));
594 break;
595 case x86_seg_ss:
596 memcpy(reg, &vmcb->ss, sizeof(*reg));
597 break;
598 case x86_seg_tr:
599 svm_sync_vmcb(v);
600 memcpy(reg, &vmcb->tr, sizeof(*reg));
601 break;
602 case x86_seg_gdtr:
603 memcpy(reg, &vmcb->gdtr, sizeof(*reg));
604 break;
605 case x86_seg_idtr:
606 memcpy(reg, &vmcb->idtr, sizeof(*reg));
607 break;
608 case x86_seg_ldtr:
609 svm_sync_vmcb(v);
610 memcpy(reg, &vmcb->ldtr, sizeof(*reg));
611 break;
612 default:
613 BUG();
614 }
615 }
617 static void svm_set_segment_register(struct vcpu *v, enum x86_segment seg,
618 struct segment_register *reg)
619 {
620 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
622 ASSERT(v == current);
624 switch ( seg )
625 {
626 case x86_seg_cs:
627 memcpy(&vmcb->cs, reg, sizeof(*reg));
628 break;
629 case x86_seg_ds:
630 memcpy(&vmcb->ds, reg, sizeof(*reg));
631 break;
632 case x86_seg_es:
633 memcpy(&vmcb->es, reg, sizeof(*reg));
634 break;
635 case x86_seg_fs:
636 svm_sync_vmcb(v);
637 memcpy(&vmcb->fs, reg, sizeof(*reg));
638 svm_vmload(vmcb);
639 break;
640 case x86_seg_gs:
641 svm_sync_vmcb(v);
642 memcpy(&vmcb->gs, reg, sizeof(*reg));
643 svm_vmload(vmcb);
644 break;
645 case x86_seg_ss:
646 memcpy(&vmcb->ss, reg, sizeof(*reg));
647 vmcb->cpl = vmcb->ss.attr.fields.dpl;
648 break;
649 case x86_seg_tr:
650 svm_sync_vmcb(v);
651 memcpy(&vmcb->tr, reg, sizeof(*reg));
652 svm_vmload(vmcb);
653 break;
654 case x86_seg_gdtr:
655 memcpy(&vmcb->gdtr, reg, sizeof(*reg));
656 break;
657 case x86_seg_idtr:
658 memcpy(&vmcb->idtr, reg, sizeof(*reg));
659 break;
660 case x86_seg_ldtr:
661 svm_sync_vmcb(v);
662 memcpy(&vmcb->ldtr, reg, sizeof(*reg));
663 svm_vmload(vmcb);
664 break;
665 default:
666 BUG();
667 }
668 }
670 /* Make sure that xen intercepts any FP accesses from current */
671 static void svm_stts(struct vcpu *v)
672 {
673 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
675 /*
676 * If the guest does not have TS enabled then we must cause and handle an
677 * exception on first use of the FPU. If the guest *does* have TS enabled
678 * then this is not necessary: no FPU activity can occur until the guest
679 * clears CR0.TS, and we will initialise the FPU when that happens.
680 */
681 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
682 {
683 v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device;
684 vmcb->cr0 |= X86_CR0_TS;
685 }
686 }
689 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
690 {
691 v->arch.hvm_svm.vmcb->tsc_offset = offset;
692 }
695 static void svm_init_ap_context(
696 struct vcpu_guest_context *ctxt, int vcpuid, int trampoline_vector)
697 {
698 struct vcpu *v;
699 struct vmcb_struct *vmcb;
700 cpu_user_regs_t *regs;
701 u16 cs_sel;
703 /* We know this is safe because hvm_bringup_ap() does it */
704 v = current->domain->vcpu[vcpuid];
705 vmcb = v->arch.hvm_svm.vmcb;
706 regs = &v->arch.guest_context.user_regs;
708 memset(ctxt, 0, sizeof(*ctxt));
710 /*
711 * We execute the trampoline code in real mode. The trampoline vector
712 * passed to us is page alligned and is the physical frame number for
713 * the code. We will execute this code in real mode.
714 */
715 cs_sel = trampoline_vector << 8;
716 ctxt->user_regs.eip = 0x0;
717 ctxt->user_regs.cs = cs_sel;
719 /*
720 * This is the launch of an AP; set state so that we begin executing
721 * the trampoline code in real-mode.
722 */
723 svm_reset_to_realmode(v, regs);
724 /* Adjust the vmcb's hidden register state. */
725 vmcb->cs.sel = cs_sel;
726 vmcb->cs.base = (cs_sel << 4);
727 }
729 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
730 {
731 char *p;
732 int i;
734 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
735 {
736 p = (char *)(hypercall_page + (i * 32));
737 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
738 *(u32 *)(p + 1) = i;
739 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
740 *(u8 *)(p + 6) = 0x01;
741 *(u8 *)(p + 7) = 0xd9;
742 *(u8 *)(p + 8) = 0xc3; /* ret */
743 }
745 /* Don't support HYPERVISOR_iret at the moment */
746 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
747 }
749 static void svm_ctxt_switch_from(struct vcpu *v)
750 {
751 int cpu = smp_processor_id();
753 svm_save_dr(v);
755 svm_sync_vmcb(v);
756 svm_vmload(root_vmcb[cpu]);
758 #ifdef __x86_64__
759 /* Resume use of ISTs now that the host TR is reinstated. */
760 idt_tables[cpu][TRAP_double_fault].a |= IST_DF << 32;
761 idt_tables[cpu][TRAP_nmi].a |= IST_NMI << 32;
762 idt_tables[cpu][TRAP_machine_check].a |= IST_MCE << 32;
763 #endif
764 }
766 static void svm_ctxt_switch_to(struct vcpu *v)
767 {
768 int cpu = smp_processor_id();
770 #ifdef __x86_64__
771 /*
772 * This is required, because VMRUN does consistency check
773 * and some of the DOM0 selectors are pointing to
774 * invalid GDT locations, and cause AMD processors
775 * to shutdown.
776 */
777 set_segment_register(ds, 0);
778 set_segment_register(es, 0);
779 set_segment_register(ss, 0);
781 /*
782 * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
783 * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
784 */
785 idt_tables[cpu][TRAP_double_fault].a &= ~(7UL << 32);
786 idt_tables[cpu][TRAP_nmi].a &= ~(7UL << 32);
787 idt_tables[cpu][TRAP_machine_check].a &= ~(7UL << 32);
788 #endif
790 svm_restore_dr(v);
792 svm_vmsave(root_vmcb[cpu]);
793 svm_vmload(v->arch.hvm_svm.vmcb);
794 }
796 static void svm_do_resume(struct vcpu *v)
797 {
798 bool_t debug_state = v->domain->debugger_attached;
800 if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
801 {
802 uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3);
803 v->arch.hvm_vcpu.debug_state_latch = debug_state;
804 if ( debug_state )
805 v->arch.hvm_svm.vmcb->exception_intercepts |= mask;
806 else
807 v->arch.hvm_svm.vmcb->exception_intercepts &= ~mask;
808 }
810 if ( v->arch.hvm_svm.launch_core != smp_processor_id() )
811 {
812 v->arch.hvm_svm.launch_core = smp_processor_id();
813 hvm_migrate_timers(v);
815 /* Migrating to another ASID domain. Request a new ASID. */
816 svm_asid_init_vcpu(v);
817 }
819 hvm_do_resume(v);
820 reset_stack_and_jump(svm_asm_do_resume);
821 }
823 static int svm_domain_initialise(struct domain *d)
824 {
825 return 0;
826 }
828 static void svm_domain_destroy(struct domain *d)
829 {
830 }
832 static int svm_vcpu_initialise(struct vcpu *v)
833 {
834 int rc;
836 v->arch.schedule_tail = svm_do_resume;
837 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
838 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
840 v->arch.hvm_svm.launch_core = -1;
842 if ( (rc = svm_create_vmcb(v)) != 0 )
843 {
844 dprintk(XENLOG_WARNING,
845 "Failed to create VMCB for vcpu %d: err=%d.\n",
846 v->vcpu_id, rc);
847 return rc;
848 }
850 return 0;
851 }
853 static void svm_vcpu_destroy(struct vcpu *v)
854 {
855 svm_destroy_vmcb(v);
856 }
858 static void svm_inject_exception(
859 unsigned int trapnr, int errcode, unsigned long cr2)
860 {
861 struct vcpu *curr = current;
862 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
863 eventinj_t event;
865 event.bytes = 0;
866 event.fields.v = 1;
867 event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
868 event.fields.vector = trapnr;
869 event.fields.ev = (errcode != HVM_DELIVER_NO_ERROR_CODE);
870 event.fields.errorcode = errcode;
872 vmcb->eventinj = event;
874 if ( trapnr == TRAP_page_fault )
875 {
876 vmcb->cr2 = curr->arch.hvm_vcpu.guest_cr[2] = cr2;
877 HVMTRACE_2D(PF_INJECT, curr, curr->arch.hvm_vcpu.guest_cr[2], errcode);
878 }
879 else
880 {
881 HVMTRACE_2D(INJ_EXC, curr, trapnr, errcode);
882 }
884 if ( (trapnr == TRAP_debug) &&
885 (guest_cpu_user_regs()->eflags & X86_EFLAGS_TF) )
886 {
887 __restore_debug_registers(curr);
888 vmcb->dr6 |= 0x4000;
889 }
890 }
892 static int svm_event_pending(struct vcpu *v)
893 {
894 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
895 return vmcb->eventinj.fields.v;
896 }
898 static struct hvm_function_table svm_function_table = {
899 .name = "SVM",
900 .cpu_down = svm_cpu_down,
901 .domain_initialise = svm_domain_initialise,
902 .domain_destroy = svm_domain_destroy,
903 .vcpu_initialise = svm_vcpu_initialise,
904 .vcpu_destroy = svm_vcpu_destroy,
905 .save_cpu_ctxt = svm_save_vmcb_ctxt,
906 .load_cpu_ctxt = svm_load_vmcb_ctxt,
907 .interrupt_blocked = svm_interrupt_blocked,
908 .guest_x86_mode = svm_guest_x86_mode,
909 .get_segment_base = svm_get_segment_base,
910 .get_segment_register = svm_get_segment_register,
911 .set_segment_register = svm_set_segment_register,
912 .update_host_cr3 = svm_update_host_cr3,
913 .update_guest_cr = svm_update_guest_cr,
914 .update_guest_efer = svm_update_guest_efer,
915 .flush_guest_tlbs = svm_flush_guest_tlbs,
916 .update_vtpr = svm_update_vtpr,
917 .stts = svm_stts,
918 .set_tsc_offset = svm_set_tsc_offset,
919 .inject_exception = svm_inject_exception,
920 .init_ap_context = svm_init_ap_context,
921 .init_hypercall_page = svm_init_hypercall_page,
922 .event_pending = svm_event_pending
923 };
925 int start_svm(struct cpuinfo_x86 *c)
926 {
927 u32 eax, ecx, edx;
928 u32 phys_hsa_lo, phys_hsa_hi;
929 u64 phys_hsa;
930 int cpu = smp_processor_id();
932 /* Xen does not fill x86_capability words except 0. */
933 ecx = cpuid_ecx(0x80000001);
934 boot_cpu_data.x86_capability[5] = ecx;
936 if ( !(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)) )
937 return 0;
939 /* Check whether SVM feature is disabled in BIOS */
940 rdmsr(MSR_K8_VM_CR, eax, edx);
941 if ( eax & K8_VMCR_SVME_DISABLE )
942 {
943 printk("AMD SVM Extension is disabled in BIOS.\n");
944 return 0;
945 }
947 if ( ((hsa[cpu] = alloc_host_save_area()) == NULL) ||
948 ((root_vmcb[cpu] = alloc_vmcb()) == NULL) )
949 return 0;
951 write_efer(read_efer() | EFER_SVME);
953 /* Initialize the HSA for this core. */
954 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
955 phys_hsa_lo = (u32) phys_hsa;
956 phys_hsa_hi = (u32) (phys_hsa >> 32);
957 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
959 /* Initialize core's ASID handling. */
960 svm_asid_init(c);
962 if ( cpu != 0 )
963 return 1;
965 setup_vmcb_dump();
967 svm_feature_flags = ((cpuid_eax(0x80000000) >= 0x8000000A) ?
968 cpuid_edx(0x8000000A) : 0);
970 svm_function_table.hap_supported = cpu_has_svm_npt;
972 hvm_enable(&svm_function_table);
974 return 1;
975 }
977 static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
978 {
979 p2m_type_t p2mt;
980 mfn_t mfn;
981 unsigned long gfn = gpa >> PAGE_SHIFT;
983 /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
984 mfn = gfn_to_mfn_current(gfn, &p2mt);
985 if ( p2mt == p2m_mmio_dm )
986 {
987 handle_mmio(gpa);
988 return;
989 }
991 /* Log-dirty: mark the page dirty and let the guest write it again */
992 paging_mark_dirty(current->domain, mfn_x(mfn));
993 p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
994 }
996 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
997 {
998 struct vcpu *v = current;
1000 setup_fpu(v);
1001 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
1003 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
1004 vmcb->cr0 &= ~X86_CR0_TS;
1007 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
1008 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
1009 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
1010 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
1012 #define bitmaskof(idx) (1U << ((idx) & 31))
1013 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb,
1014 struct cpu_user_regs *regs)
1016 unsigned long input = regs->eax;
1017 unsigned int eax, ebx, ecx, edx;
1018 struct vcpu *v = current;
1019 int inst_len;
1021 hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
1023 switch ( input )
1025 case 0x00000001:
1026 /* Clear out reserved bits. */
1027 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
1028 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
1030 /* Guest should only see one logical processor.
1031 * See details on page 23 of AMD CPUID Specification.
1032 */
1033 __clear_bit(X86_FEATURE_HT & 31, &edx);
1034 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
1035 ebx |= 0x00010000; /* set to 1 just for precaution */
1036 break;
1038 case 0x80000001:
1039 /* Filter features which are shared with 0x00000001:EDX. */
1040 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
1041 __clear_bit(X86_FEATURE_APIC & 31, &edx);
1042 #if CONFIG_PAGING_LEVELS >= 3
1043 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
1044 #endif
1045 __clear_bit(X86_FEATURE_PAE & 31, &edx);
1046 __clear_bit(X86_FEATURE_PSE36 & 31, &edx);
1048 /* Filter all other features according to a whitelist. */
1049 edx &= (0x0183f3ff | /* features shared with 0x00000001:EDX */
1050 bitmaskof(X86_FEATURE_NX) |
1051 bitmaskof(X86_FEATURE_LM) |
1052 bitmaskof(X86_FEATURE_SYSCALL) |
1053 bitmaskof(X86_FEATURE_MP) |
1054 bitmaskof(X86_FEATURE_MMXEXT) |
1055 bitmaskof(X86_FEATURE_FFXSR));
1056 break;
1058 case 0x80000007:
1059 case 0x8000000A:
1060 /* Mask out features of power management and SVM extension. */
1061 eax = ebx = ecx = edx = 0;
1062 break;
1064 case 0x80000008:
1065 /* Make sure Number of CPU core is 1 when HTT=0 */
1066 ecx &= 0xFFFFFF00;
1067 break;
1070 regs->eax = eax;
1071 regs->ebx = ebx;
1072 regs->ecx = ecx;
1073 regs->edx = edx;
1075 HVMTRACE_3D(CPUID, v, input,
1076 ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
1078 inst_len = __get_instruction_length(v, INSTR_CPUID, NULL);
1079 __update_guest_eip(regs, inst_len);
1082 static unsigned long *get_reg_p(
1083 unsigned int gpreg,
1084 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1086 unsigned long *reg_p = NULL;
1087 switch (gpreg)
1089 case SVM_REG_EAX:
1090 reg_p = (unsigned long *)&regs->eax;
1091 break;
1092 case SVM_REG_EBX:
1093 reg_p = (unsigned long *)&regs->ebx;
1094 break;
1095 case SVM_REG_ECX:
1096 reg_p = (unsigned long *)&regs->ecx;
1097 break;
1098 case SVM_REG_EDX:
1099 reg_p = (unsigned long *)&regs->edx;
1100 break;
1101 case SVM_REG_EDI:
1102 reg_p = (unsigned long *)&regs->edi;
1103 break;
1104 case SVM_REG_ESI:
1105 reg_p = (unsigned long *)&regs->esi;
1106 break;
1107 case SVM_REG_EBP:
1108 reg_p = (unsigned long *)&regs->ebp;
1109 break;
1110 case SVM_REG_ESP:
1111 reg_p = (unsigned long *)&regs->esp;
1112 break;
1113 #ifdef __x86_64__
1114 case SVM_REG_R8:
1115 reg_p = (unsigned long *)&regs->r8;
1116 break;
1117 case SVM_REG_R9:
1118 reg_p = (unsigned long *)&regs->r9;
1119 break;
1120 case SVM_REG_R10:
1121 reg_p = (unsigned long *)&regs->r10;
1122 break;
1123 case SVM_REG_R11:
1124 reg_p = (unsigned long *)&regs->r11;
1125 break;
1126 case SVM_REG_R12:
1127 reg_p = (unsigned long *)&regs->r12;
1128 break;
1129 case SVM_REG_R13:
1130 reg_p = (unsigned long *)&regs->r13;
1131 break;
1132 case SVM_REG_R14:
1133 reg_p = (unsigned long *)&regs->r14;
1134 break;
1135 case SVM_REG_R15:
1136 reg_p = (unsigned long *)&regs->r15;
1137 break;
1138 #endif
1139 default:
1140 BUG();
1143 return reg_p;
1147 static unsigned long get_reg(
1148 unsigned int gpreg, struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1150 unsigned long *gp;
1151 gp = get_reg_p(gpreg, regs, vmcb);
1152 return *gp;
1156 static void set_reg(
1157 unsigned int gpreg, unsigned long value,
1158 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1160 unsigned long *gp;
1161 gp = get_reg_p(gpreg, regs, vmcb);
1162 *gp = value;
1166 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
1168 HVMTRACE_0D(DR_WRITE, v);
1169 __restore_debug_registers(v);
1173 static void svm_get_prefix_info(struct vcpu *v, unsigned int dir,
1174 svm_segment_register_t **seg,
1175 unsigned int *asize)
1177 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1178 unsigned char inst[MAX_INST_LEN];
1179 int i;
1181 memset(inst, 0, MAX_INST_LEN);
1182 if (inst_copy_from_guest(inst, svm_rip2pointer(v), sizeof(inst))
1183 != MAX_INST_LEN)
1185 gdprintk(XENLOG_ERR, "get guest instruction failed\n");
1186 domain_crash(current->domain);
1187 return;
1190 for (i = 0; i < MAX_INST_LEN; i++)
1192 switch (inst[i])
1194 case 0xf3: /* REPZ */
1195 case 0xf2: /* REPNZ */
1196 case 0xf0: /* LOCK */
1197 case 0x66: /* data32 */
1198 #ifdef __x86_64__
1199 /* REX prefixes */
1200 case 0x40:
1201 case 0x41:
1202 case 0x42:
1203 case 0x43:
1204 case 0x44:
1205 case 0x45:
1206 case 0x46:
1207 case 0x47:
1209 case 0x48:
1210 case 0x49:
1211 case 0x4a:
1212 case 0x4b:
1213 case 0x4c:
1214 case 0x4d:
1215 case 0x4e:
1216 case 0x4f:
1217 #endif
1218 continue;
1219 case 0x67: /* addr32 */
1220 *asize ^= 48; /* Switch 16/32 bits */
1221 continue;
1222 case 0x2e: /* CS */
1223 *seg = &vmcb->cs;
1224 continue;
1225 case 0x36: /* SS */
1226 *seg = &vmcb->ss;
1227 continue;
1228 case 0x26: /* ES */
1229 *seg = &vmcb->es;
1230 continue;
1231 case 0x64: /* FS */
1232 svm_sync_vmcb(v);
1233 *seg = &vmcb->fs;
1234 continue;
1235 case 0x65: /* GS */
1236 svm_sync_vmcb(v);
1237 *seg = &vmcb->gs;
1238 continue;
1239 case 0x3e: /* DS */
1240 *seg = &vmcb->ds;
1241 continue;
1242 default:
1243 break;
1245 return;
1250 /* Get the address of INS/OUTS instruction */
1251 static int svm_get_io_address(
1252 struct vcpu *v, struct cpu_user_regs *regs,
1253 unsigned int size, ioio_info_t info,
1254 unsigned long *count, unsigned long *addr)
1256 unsigned long reg;
1257 unsigned int asize, isize;
1258 int long_mode = 0;
1259 svm_segment_register_t *seg = NULL;
1260 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1262 /* If we're in long mode, don't check the segment presence & limit */
1263 long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
1265 /* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit.
1266 * l field combined with EFER_LMA says whether it's 16 or 64 bit.
1267 */
1268 asize = (long_mode)?64:((vmcb->cs.attr.fields.db)?32:16);
1271 /* The ins/outs instructions are single byte, so if we have got more
1272 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1273 * to figure out what it is...
1274 */
1275 isize = vmcb->exitinfo2 - regs->eip;
1277 if (info.fields.rep)
1278 isize --;
1280 if (isize > 1)
1281 svm_get_prefix_info(v, info.fields.type, &seg, &asize);
1283 if (info.fields.type == IOREQ_WRITE)
1285 reg = regs->esi;
1286 if (!seg) /* If no prefix, used DS. */
1287 seg = &vmcb->ds;
1288 if (!long_mode && (seg->attr.fields.type & 0xa) == 0x8) {
1289 svm_inject_exception(TRAP_gp_fault, 0, 0);
1290 return 0;
1293 else
1295 reg = regs->edi;
1296 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1297 if (!long_mode && (seg->attr.fields.type & 0xa) != 0x2) {
1298 svm_inject_exception(TRAP_gp_fault, 0, 0);
1299 return 0;
1303 /* If the segment isn't present, give GP fault! */
1304 if (!long_mode && !seg->attr.fields.p)
1306 svm_inject_exception(TRAP_gp_fault, 0, 0);
1307 return 0;
1310 if (asize == 16)
1312 *addr = (reg & 0xFFFF);
1313 *count = regs->ecx & 0xffff;
1315 else
1317 *addr = reg;
1318 *count = regs->ecx;
1320 if (!info.fields.rep)
1321 *count = 1;
1323 if (!long_mode)
1325 ASSERT(*addr == (u32)*addr);
1326 if ((u32)(*addr + size - 1) < (u32)*addr ||
1327 (seg->attr.fields.type & 0xc) != 0x4 ?
1328 *addr + size - 1 > seg->limit :
1329 *addr <= seg->limit)
1331 svm_inject_exception(TRAP_gp_fault, 0, 0);
1332 return 0;
1335 /* Check the limit for repeated instructions, as above we checked only
1336 the first instance. Truncate the count if a limit violation would
1337 occur. Note that the checking is not necessary for page granular
1338 segments as transfers crossing page boundaries will be broken up
1339 anyway. */
1340 if (!seg->attr.fields.g && *count > 1)
1342 if ((seg->attr.fields.type & 0xc) != 0x4)
1344 /* expand-up */
1345 if (!(regs->eflags & EF_DF))
1347 if (*addr + *count * size - 1 < *addr ||
1348 *addr + *count * size - 1 > seg->limit)
1349 *count = (seg->limit + 1UL - *addr) / size;
1351 else
1353 if (*count - 1 > *addr / size)
1354 *count = *addr / size + 1;
1357 else
1359 /* expand-down */
1360 if (!(regs->eflags & EF_DF))
1362 if (*count - 1 > -(s32)*addr / size)
1363 *count = -(s32)*addr / size + 1UL;
1365 else
1367 if (*addr < (*count - 1) * size ||
1368 *addr - (*count - 1) * size <= seg->limit)
1369 *count = (*addr - seg->limit - 1) / size + 1;
1372 ASSERT(*count);
1375 *addr += seg->base;
1377 #ifdef __x86_64__
1378 else
1380 if (seg == &vmcb->fs || seg == &vmcb->gs)
1381 *addr += seg->base;
1383 if (!is_canonical_address(*addr) ||
1384 !is_canonical_address(*addr + size - 1))
1386 svm_inject_exception(TRAP_gp_fault, 0, 0);
1387 return 0;
1389 if (*count > (1UL << 48) / size)
1390 *count = (1UL << 48) / size;
1391 if (!(regs->eflags & EF_DF))
1393 if (*addr + *count * size - 1 < *addr ||
1394 !is_canonical_address(*addr + *count * size - 1))
1395 *count = (*addr & ~((1UL << 48) - 1)) / size;
1397 else
1399 if ((*count - 1) * size > *addr ||
1400 !is_canonical_address(*addr + (*count - 1) * size))
1401 *count = (*addr & ~((1UL << 48) - 1)) / size + 1;
1403 ASSERT(*count);
1405 #endif
1407 return 1;
1411 static void svm_io_instruction(struct vcpu *v)
1413 struct cpu_user_regs *regs;
1414 struct hvm_io_op *pio_opp;
1415 unsigned int port;
1416 unsigned int size, dir, df;
1417 ioio_info_t info;
1418 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1420 pio_opp = &current->arch.hvm_vcpu.io_op;
1421 pio_opp->instr = INSTR_PIO;
1422 pio_opp->flags = 0;
1424 regs = &pio_opp->io_context;
1426 /* Copy current guest state into io instruction state structure. */
1427 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1429 info.bytes = vmcb->exitinfo1;
1431 port = info.fields.port; /* port used to be addr */
1432 dir = info.fields.type; /* direction */
1433 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1435 if (info.fields.sz32)
1436 size = 4;
1437 else if (info.fields.sz16)
1438 size = 2;
1439 else
1440 size = 1;
1442 if (dir==IOREQ_READ)
1443 HVMTRACE_2D(IO_READ, v, port, size);
1444 else
1445 HVMTRACE_3D(IO_WRITE, v, port, size, regs->eax);
1447 HVM_DBG_LOG(DBG_LEVEL_IO,
1448 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1449 "exit_qualification = %"PRIx64,
1450 port, vmcb->cs.sel, (uint64_t)regs->eip, info.bytes);
1452 /* string instruction */
1453 if (info.fields.str)
1455 unsigned long addr, count;
1456 paddr_t paddr;
1457 unsigned long gfn;
1458 uint32_t pfec;
1459 int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
1461 if (!svm_get_io_address(v, regs, size, info, &count, &addr))
1463 /* We failed to get a valid address, so don't do the IO operation -
1464 * it would just get worse if we do! Hopefully the guest is handing
1465 * gp-faults...
1466 */
1467 return;
1470 /* "rep" prefix */
1471 if (info.fields.rep)
1473 pio_opp->flags |= REPZ;
1476 /* Translate the address to a physical address */
1477 pfec = PFEC_page_present;
1478 if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
1479 pfec |= PFEC_write_access;
1480 if ( ring_3(regs) )
1481 pfec |= PFEC_user_mode;
1482 gfn = paging_gva_to_gfn(v, addr, &pfec);
1483 if ( gfn == INVALID_GFN )
1485 /* The guest does not have the RAM address mapped.
1486 * Need to send in a page fault */
1487 svm_inject_exception(TRAP_page_fault, pfec, addr);
1488 return;
1490 paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
1492 /*
1493 * Handle string pio instructions that cross pages or that
1494 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1495 */
1496 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1498 unsigned long value = 0;
1500 pio_opp->flags |= OVERLAP;
1501 pio_opp->addr = addr;
1503 if (dir == IOREQ_WRITE) /* OUTS */
1505 if ( hvm_paging_enabled(current) )
1507 int rv = hvm_copy_from_guest_virt(&value, addr, size);
1508 if ( rv != 0 )
1510 /* Failed on the page-spanning copy. Inject PF into
1511 * the guest for the address where we failed. */
1512 addr += size - rv;
1513 gdprintk(XENLOG_DEBUG, "Pagefault reading non-io side "
1514 "of a page-spanning PIO: va=%#lx\n", addr);
1515 svm_inject_exception(TRAP_page_fault, 0, addr);
1516 return;
1519 else
1520 (void) hvm_copy_from_guest_phys(&value, addr, size);
1521 } else /* dir != IOREQ_WRITE */
1522 /* Remember where to write the result, as a *VA*.
1523 * Must be a VA so we can handle the page overlap
1524 * correctly in hvm_pio_assist() */
1525 pio_opp->addr = addr;
1527 if (count == 1)
1528 regs->eip = vmcb->exitinfo2;
1530 send_pio_req(port, 1, size, value, dir, df, 0);
1532 else
1534 unsigned long last_addr = sign > 0 ? addr + count * size - 1
1535 : addr - (count - 1) * size;
1537 if ((addr & PAGE_MASK) != (last_addr & PAGE_MASK))
1539 if (sign > 0)
1540 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1541 else
1542 count = (addr & ~PAGE_MASK) / size + 1;
1544 else
1545 regs->eip = vmcb->exitinfo2;
1547 send_pio_req(port, count, size, paddr, dir, df, 1);
1550 else
1552 /*
1553 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1554 * ExitInfo2
1555 */
1556 regs->eip = vmcb->exitinfo2;
1558 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1559 hvm_print_line(v, regs->eax); /* guest debug output */
1561 send_pio_req(port, 1, size, regs->eax, dir, df, 0);
1565 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1567 unsigned long value = 0;
1568 struct vcpu *v = current;
1569 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1571 switch ( cr )
1573 case 0:
1574 value = v->arch.hvm_vcpu.guest_cr[0];
1575 break;
1576 case 3:
1577 value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3];
1578 break;
1579 case 4:
1580 value = (unsigned long)v->arch.hvm_vcpu.guest_cr[4];
1581 break;
1582 default:
1583 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1584 domain_crash(v->domain);
1585 return;
1588 HVMTRACE_2D(CR_READ, v, cr, value);
1590 set_reg(gp, value, regs, vmcb);
1592 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx", cr, value);
1595 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1597 unsigned long value;
1598 struct vcpu *v = current;
1599 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1601 value = get_reg(gpreg, regs, vmcb);
1603 HVMTRACE_2D(CR_WRITE, v, cr, value);
1605 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, current = %p",
1606 cr, value, v);
1608 switch ( cr )
1610 case 0:
1611 return hvm_set_cr0(value);
1612 case 3:
1613 return hvm_set_cr3(value);
1614 case 4:
1615 return hvm_set_cr4(value);
1616 default:
1617 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1618 domain_crash(v->domain);
1619 return 0;
1622 return 1;
1625 static void svm_cr_access(
1626 struct vcpu *v, unsigned int cr, unsigned int type,
1627 struct cpu_user_regs *regs)
1629 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1630 int inst_len = 0;
1631 int index,addr_size,i;
1632 unsigned int gpreg,offset;
1633 unsigned long value,addr;
1634 u8 buffer[MAX_INST_LEN];
1635 u8 prefix = 0;
1636 u8 modrm;
1637 enum x86_segment seg;
1638 int result = 1;
1639 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1640 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1641 enum instruction_index match;
1643 inst_copy_from_guest(buffer, svm_rip2pointer(v), sizeof(buffer));
1645 /* get index to first actual instruction byte - as we will need to know
1646 where the prefix lives later on */
1647 index = skip_prefix_bytes(buffer, sizeof(buffer));
1649 if ( type == TYPE_MOV_TO_CR )
1651 inst_len = __get_instruction_length_from_list(
1652 v, list_a, ARRAY_SIZE(list_a), &buffer[index], &match);
1654 else /* type == TYPE_MOV_FROM_CR */
1656 inst_len = __get_instruction_length_from_list(
1657 v, list_b, ARRAY_SIZE(list_b), &buffer[index], &match);
1660 inst_len += index;
1662 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1663 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1664 prefix = buffer[index-1];
1666 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long)regs->eip);
1668 switch ( match )
1671 case INSTR_MOV2CR:
1672 gpreg = decode_src_reg(prefix, buffer[index+2]);
1673 result = mov_to_cr(gpreg, cr, regs);
1674 break;
1676 case INSTR_MOVCR2:
1677 gpreg = decode_src_reg(prefix, buffer[index+2]);
1678 mov_from_cr(cr, gpreg, regs);
1679 break;
1681 case INSTR_CLTS:
1682 /* TS being cleared means that it's time to restore fpu state. */
1683 setup_fpu(current);
1684 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
1685 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1686 v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS; /* clear TS */
1687 HVMTRACE_0D(CLTS, current);
1688 break;
1690 case INSTR_LMSW:
1691 gpreg = decode_src_reg(prefix, buffer[index+2]);
1692 value = get_reg(gpreg, regs, vmcb) & 0xF;
1693 value = (v->arch.hvm_vcpu.guest_cr[0] & ~0xF) | value;
1694 result = hvm_set_cr0(value);
1695 HVMTRACE_1D(LMSW, current, value);
1696 break;
1698 case INSTR_SMSW:
1699 value = v->arch.hvm_vcpu.guest_cr[0] & 0xFFFF;
1700 modrm = buffer[index+2];
1701 addr_size = svm_guest_x86_mode(v);
1702 if ( addr_size < 2 )
1703 addr_size = 2;
1704 if ( likely((modrm & 0xC0) >> 6 == 3) )
1706 gpreg = decode_src_reg(prefix, modrm);
1707 set_reg(gpreg, value, regs, vmcb);
1709 /*
1710 * For now, only implement decode of the offset mode, since that's the
1711 * only mode observed in a real-world OS. This code is also making the
1712 * assumption that we'll never hit this code in long mode.
1713 */
1714 else if ( (modrm == 0x26) || (modrm == 0x25) )
1716 seg = x86_seg_ds;
1717 i = index;
1718 /* Segment or address size overrides? */
1719 while ( i-- )
1721 switch ( buffer[i] )
1723 case 0x26: seg = x86_seg_es; break;
1724 case 0x2e: seg = x86_seg_cs; break;
1725 case 0x36: seg = x86_seg_ss; break;
1726 case 0x64: seg = x86_seg_fs; break;
1727 case 0x65: seg = x86_seg_gs; break;
1728 case 0x67: addr_size ^= 6; break;
1731 /* Bail unless this really is a seg_base + offset case */
1732 if ( ((modrm == 0x26) && (addr_size == 4)) ||
1733 ((modrm == 0x25) && (addr_size == 2)) )
1735 gdprintk(XENLOG_ERR, "SMSW emulation at guest address: "
1736 "%lx failed due to unhandled addressing mode."
1737 "ModRM byte was: %x \n", svm_rip2pointer(v), modrm);
1738 domain_crash(v->domain);
1740 inst_len += addr_size;
1741 offset = *(( unsigned int *) ( void *) &buffer[index + 3]);
1742 offset = ( addr_size == 4 ) ? offset : ( offset & 0xFFFF );
1743 addr = hvm_get_segment_base(v, seg);
1744 addr += offset;
1745 hvm_copy_to_guest_virt(addr,&value,2);
1747 else
1749 gdprintk(XENLOG_ERR, "SMSW emulation at guest address: %lx "
1750 "failed due to unhandled addressing mode!"
1751 "ModRM byte was: %x \n", svm_rip2pointer(v), modrm);
1752 domain_crash(v->domain);
1754 break;
1756 default:
1757 BUG();
1760 if ( result )
1761 __update_guest_eip(regs, inst_len);
1764 static void svm_do_msr_access(
1765 struct vcpu *v, struct cpu_user_regs *regs)
1767 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1768 int inst_len;
1769 u64 msr_content=0;
1770 u32 ecx = regs->ecx, eax, edx;
1772 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x, exitinfo = %lx",
1773 ecx, (u32)regs->eax, (u32)regs->edx,
1774 (unsigned long)vmcb->exitinfo1);
1776 /* is it a read? */
1777 if (vmcb->exitinfo1 == 0)
1779 switch (ecx) {
1780 case MSR_IA32_TSC:
1781 msr_content = hvm_get_guest_time(v);
1782 break;
1784 case MSR_IA32_APICBASE:
1785 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
1786 break;
1788 case MSR_EFER:
1789 msr_content = v->arch.hvm_vcpu.guest_efer;
1790 break;
1792 case MSR_IA32_MC4_MISC: /* Threshold register */
1793 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
1794 /*
1795 * MCA/MCE: We report that the threshold register is unavailable
1796 * for OS use (locked by the BIOS).
1797 */
1798 msr_content = 1ULL << 61; /* MC4_MISC.Locked */
1799 break;
1801 case MSR_IA32_EBC_FREQUENCY_ID:
1802 /*
1803 * This Intel-only register may be accessed if this HVM guest
1804 * has been migrated from an Intel host. The value zero is not
1805 * particularly meaningful, but at least avoids the guest crashing!
1806 */
1807 msr_content = 0;
1808 break;
1810 case MSR_K8_VM_HSAVE_PA:
1811 svm_inject_exception(TRAP_gp_fault, 0, 0);
1812 break;
1814 case MSR_IA32_MCG_CAP:
1815 case MSR_IA32_MCG_STATUS:
1816 case MSR_IA32_MC0_STATUS:
1817 case MSR_IA32_MC1_STATUS:
1818 case MSR_IA32_MC2_STATUS:
1819 case MSR_IA32_MC3_STATUS:
1820 case MSR_IA32_MC4_STATUS:
1821 case MSR_IA32_MC5_STATUS:
1822 /* No point in letting the guest see real MCEs */
1823 msr_content = 0;
1824 break;
1826 case MSR_IA32_DEBUGCTLMSR:
1827 msr_content = vmcb->debugctlmsr;
1828 break;
1830 case MSR_IA32_LASTBRANCHFROMIP:
1831 msr_content = vmcb->lastbranchfromip;
1832 break;
1834 case MSR_IA32_LASTBRANCHTOIP:
1835 msr_content = vmcb->lastbranchtoip;
1836 break;
1838 case MSR_IA32_LASTINTFROMIP:
1839 msr_content = vmcb->lastintfromip;
1840 break;
1842 case MSR_IA32_LASTINTTOIP:
1843 msr_content = vmcb->lastinttoip;
1844 break;
1846 default:
1847 if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
1848 rdmsr_safe(ecx, eax, edx) == 0 )
1850 regs->eax = eax;
1851 regs->edx = edx;
1852 goto done;
1854 svm_inject_exception(TRAP_gp_fault, 0, 0);
1855 return;
1857 regs->eax = msr_content & 0xFFFFFFFF;
1858 regs->edx = msr_content >> 32;
1860 done:
1861 hvmtrace_msr_read(v, ecx, msr_content);
1862 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
1863 ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
1865 inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL);
1867 else
1869 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
1871 hvmtrace_msr_write(v, ecx, msr_content);
1873 switch (ecx)
1875 case MSR_IA32_TSC:
1876 hvm_set_guest_time(v, msr_content);
1877 pt_reset(v);
1878 break;
1880 case MSR_IA32_APICBASE:
1881 vlapic_msr_set(vcpu_vlapic(v), msr_content);
1882 break;
1884 case MSR_K8_VM_HSAVE_PA:
1885 svm_inject_exception(TRAP_gp_fault, 0, 0);
1886 break;
1888 case MSR_IA32_DEBUGCTLMSR:
1889 vmcb->debugctlmsr = msr_content;
1890 if ( !msr_content || !cpu_has_svm_lbrv )
1891 break;
1892 vmcb->lbr_control.fields.enable = 1;
1893 svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR);
1894 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP);
1895 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP);
1896 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP);
1897 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP);
1898 break;
1900 case MSR_IA32_LASTBRANCHFROMIP:
1901 vmcb->lastbranchfromip = msr_content;
1902 break;
1904 case MSR_IA32_LASTBRANCHTOIP:
1905 vmcb->lastbranchtoip = msr_content;
1906 break;
1908 case MSR_IA32_LASTINTFROMIP:
1909 vmcb->lastintfromip = msr_content;
1910 break;
1912 case MSR_IA32_LASTINTTOIP:
1913 vmcb->lastinttoip = msr_content;
1914 break;
1916 default:
1917 switch ( long_mode_do_msr_write(regs) )
1919 case HNDL_unhandled:
1920 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
1921 break;
1922 case HNDL_exception_raised:
1923 return;
1924 case HNDL_done:
1925 break;
1927 break;
1930 inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
1933 __update_guest_eip(regs, inst_len);
1936 static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb,
1937 struct cpu_user_regs *regs)
1939 struct vcpu *curr = current;
1940 struct hvm_intack intack = hvm_vcpu_has_pending_irq(curr);
1941 unsigned int inst_len;
1943 inst_len = __get_instruction_length(curr, INSTR_HLT, NULL);
1944 __update_guest_eip(regs, inst_len);
1946 /* Check for pending exception or new interrupt. */
1947 if ( vmcb->eventinj.fields.v ||
1948 ((intack.source != hvm_intsrc_none) &&
1949 !svm_interrupt_blocked(current, intack)) )
1951 HVMTRACE_1D(HLT, curr, /*int pending=*/ 1);
1952 return;
1955 HVMTRACE_1D(HLT, curr, /*int pending=*/ 0);
1956 hvm_hlt(regs->eflags);
1959 static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs)
1961 enum instruction_index list[] = { INSTR_INVD, INSTR_WBINVD };
1962 struct vcpu *curr = current;
1963 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
1964 int inst_len;
1966 if ( !list_empty(&(domain_hvm_iommu(curr->domain)->pdev_list)) )
1968 vmcb->general2_intercepts &= ~GENERAL2_INTERCEPT_WBINVD;
1969 wbinvd();
1972 inst_len = __get_instruction_length_from_list(
1973 curr, list, ARRAY_SIZE(list), NULL, NULL);
1974 __update_guest_eip(regs, inst_len);
1977 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
1979 struct vcpu *v = current;
1980 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
1981 unsigned long g_vaddr;
1982 int inst_len;
1984 /*
1985 * Unknown how many bytes the invlpg instruction will take. Use the
1986 * maximum instruction length here
1987 */
1988 if ( inst_copy_from_guest(opcode, svm_rip2pointer(v), length) < length )
1990 gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length);
1991 goto crash;
1994 if ( invlpga )
1996 inst_len = __get_instruction_length(v, INSTR_INVLPGA, opcode);
1997 __update_guest_eip(regs, inst_len);
1999 /*
2000 * The address is implicit on this instruction. At the moment, we don't
2001 * use ecx (ASID) to identify individual guests pages
2002 */
2003 g_vaddr = regs->eax;
2005 else
2007 /* What about multiple prefix codes? */
2008 prefix = (is_prefix(opcode[0]) ? opcode[0] : 0);
2009 inst_len = __get_instruction_length(v, INSTR_INVLPG, opcode);
2010 if ( inst_len <= 0 )
2012 gdprintk(XENLOG_ERR, "Error getting invlpg instr len\n");
2013 goto crash;
2016 inst_len--;
2017 length -= inst_len;
2019 /*
2020 * Decode memory operand of the instruction including ModRM, SIB, and
2021 * displacement to get effective address and length in bytes. Assume
2022 * the system in either 32- or 64-bit mode.
2023 */
2024 g_vaddr = get_effective_addr_modrm64(regs, prefix, inst_len,
2025 &opcode[inst_len], &length);
2027 inst_len += length;
2028 __update_guest_eip(regs, inst_len);
2031 HVMTRACE_3D(INVLPG, v, !!invlpga, g_vaddr, (invlpga ? regs->ecx : 0));
2033 paging_invlpg(v, g_vaddr);
2034 svm_asid_g_invlpg(v, g_vaddr);
2035 return;
2037 crash:
2038 domain_crash(v->domain);
2042 /*
2043 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2044 * 16-bit realmode. Basically, this mimics a processor reset.
2046 * returns 0 on success, non-zero otherwise
2047 */
2048 static int svm_reset_to_realmode(struct vcpu *v,
2049 struct cpu_user_regs *regs)
2051 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2053 memset(regs, 0, sizeof(struct cpu_user_regs));
2055 regs->eflags = 2;
2057 v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET;
2058 svm_update_guest_cr(v, 0);
2060 v->arch.hvm_vcpu.guest_cr[2] = 0;
2061 svm_update_guest_cr(v, 2);
2063 v->arch.hvm_vcpu.guest_cr[4] = 0;
2064 svm_update_guest_cr(v, 4);
2066 vmcb->efer = EFER_SVME;
2068 /* This will jump to ROMBIOS */
2069 regs->eip = 0xFFF0;
2071 /* Set up the segment registers and all their hidden states. */
2072 vmcb->cs.sel = 0xF000;
2073 vmcb->cs.attr.bytes = 0x089b;
2074 vmcb->cs.limit = 0xffff;
2075 vmcb->cs.base = 0x000F0000;
2077 vmcb->ss.sel = 0x00;
2078 vmcb->ss.attr.bytes = 0x0893;
2079 vmcb->ss.limit = 0xffff;
2080 vmcb->ss.base = 0x00;
2082 vmcb->ds.sel = 0x00;
2083 vmcb->ds.attr.bytes = 0x0893;
2084 vmcb->ds.limit = 0xffff;
2085 vmcb->ds.base = 0x00;
2087 vmcb->es.sel = 0x00;
2088 vmcb->es.attr.bytes = 0x0893;
2089 vmcb->es.limit = 0xffff;
2090 vmcb->es.base = 0x00;
2092 vmcb->fs.sel = 0x00;
2093 vmcb->fs.attr.bytes = 0x0893;
2094 vmcb->fs.limit = 0xffff;
2095 vmcb->fs.base = 0x00;
2097 vmcb->gs.sel = 0x00;
2098 vmcb->gs.attr.bytes = 0x0893;
2099 vmcb->gs.limit = 0xffff;
2100 vmcb->gs.base = 0x00;
2102 vmcb->ldtr.sel = 0x00;
2103 vmcb->ldtr.attr.bytes = 0x0000;
2104 vmcb->ldtr.limit = 0x0;
2105 vmcb->ldtr.base = 0x00;
2107 vmcb->gdtr.sel = 0x00;
2108 vmcb->gdtr.attr.bytes = 0x0000;
2109 vmcb->gdtr.limit = 0x0;
2110 vmcb->gdtr.base = 0x00;
2112 vmcb->tr.sel = 0;
2113 vmcb->tr.attr.bytes = 0;
2114 vmcb->tr.limit = 0x0;
2115 vmcb->tr.base = 0;
2117 vmcb->idtr.sel = 0x00;
2118 vmcb->idtr.attr.bytes = 0x0000;
2119 vmcb->idtr.limit = 0x3ff;
2120 vmcb->idtr.base = 0x00;
2122 return 0;
2125 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
2127 unsigned int exit_reason;
2128 struct vcpu *v = current;
2129 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2130 eventinj_t eventinj;
2131 int inst_len, rc;
2133 /*
2134 * Before doing anything else, we need to sync up the VLAPIC's TPR with
2135 * SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
2136 * because we update the vTPR on MMIO writes to the TPR.
2137 */
2138 vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
2139 (vmcb->vintr.fields.tpr & 0x0F) << 4);
2141 exit_reason = vmcb->exitcode;
2143 hvmtrace_vmexit(v, regs->eip, exit_reason);
2145 if ( unlikely(exit_reason == VMEXIT_INVALID) )
2147 svm_dump_vmcb(__func__, vmcb);
2148 goto exit_and_crash;
2151 perfc_incra(svmexits, exit_reason);
2153 /* Event delivery caused this intercept? Queue for redelivery. */
2154 eventinj = vmcb->exitintinfo;
2155 if ( unlikely(eventinj.fields.v) &&
2156 hvm_event_needs_reinjection(eventinj.fields.type,
2157 eventinj.fields.vector) )
2158 vmcb->eventinj = eventinj;
2160 switch ( exit_reason )
2162 case VMEXIT_INTR:
2163 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2164 HVMTRACE_0D(INTR, v);
2165 break;
2167 case VMEXIT_NMI:
2168 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2169 HVMTRACE_0D(NMI, v);
2170 break;
2172 case VMEXIT_SMI:
2173 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2174 HVMTRACE_0D(SMI, v);
2175 break;
2177 case VMEXIT_EXCEPTION_DB:
2178 if ( !v->domain->debugger_attached )
2179 goto exit_and_crash;
2180 domain_pause_for_debugger();
2181 break;
2183 case VMEXIT_EXCEPTION_BP:
2184 if ( !v->domain->debugger_attached )
2185 goto exit_and_crash;
2186 /* AMD Vol2, 15.11: INT3, INTO, BOUND intercepts do not update RIP. */
2187 inst_len = __get_instruction_length(v, INSTR_INT3, NULL);
2188 __update_guest_eip(regs, inst_len);
2189 domain_pause_for_debugger();
2190 break;
2192 case VMEXIT_EXCEPTION_NM:
2193 svm_do_no_device_fault(vmcb);
2194 break;
2196 case VMEXIT_EXCEPTION_PF: {
2197 unsigned long va;
2198 va = vmcb->exitinfo2;
2199 regs->error_code = vmcb->exitinfo1;
2200 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2201 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2202 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2203 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2204 (unsigned long)regs->esi, (unsigned long)regs->edi);
2206 if ( paging_fault(va, regs) )
2208 HVMTRACE_2D(PF_XEN, v, va, regs->error_code);
2209 break;
2212 svm_inject_exception(TRAP_page_fault, regs->error_code, va);
2213 break;
2216 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2217 case VMEXIT_EXCEPTION_MC:
2218 HVMTRACE_0D(MCE, v);
2219 break;
2221 case VMEXIT_VINTR:
2222 vmcb->vintr.fields.irq = 0;
2223 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
2224 break;
2226 case VMEXIT_INVD:
2227 case VMEXIT_WBINVD:
2228 svm_vmexit_do_invalidate_cache(regs);
2229 break;
2231 case VMEXIT_TASK_SWITCH: {
2232 enum hvm_task_switch_reason reason;
2233 int32_t errcode = -1;
2234 if ( (vmcb->exitinfo2 >> 36) & 1 )
2235 reason = TSW_iret;
2236 else if ( (vmcb->exitinfo2 >> 38) & 1 )
2237 reason = TSW_jmp;
2238 else
2239 reason = TSW_call_or_int;
2240 if ( (vmcb->exitinfo2 >> 44) & 1 )
2241 errcode = (uint32_t)vmcb->exitinfo2;
2242 hvm_task_switch((uint16_t)vmcb->exitinfo1, reason, errcode);
2243 break;
2246 case VMEXIT_CPUID:
2247 svm_vmexit_do_cpuid(vmcb, regs);
2248 break;
2250 case VMEXIT_HLT:
2251 svm_vmexit_do_hlt(vmcb, regs);
2252 break;
2254 case VMEXIT_INVLPG:
2255 svm_handle_invlpg(0, regs);
2256 break;
2258 case VMEXIT_INVLPGA:
2259 svm_handle_invlpg(1, regs);
2260 break;
2262 case VMEXIT_VMMCALL:
2263 inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL);
2264 HVMTRACE_1D(VMMCALL, v, regs->eax);
2265 rc = hvm_do_hypercall(regs);
2266 if ( rc != HVM_HCALL_preempted )
2268 __update_guest_eip(regs, inst_len);
2269 if ( rc == HVM_HCALL_invalidate )
2270 send_invalidate_req();
2272 break;
2274 case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
2275 svm_cr_access(v, exit_reason - VMEXIT_CR0_READ,
2276 TYPE_MOV_FROM_CR, regs);
2277 break;
2279 case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
2280 svm_cr_access(v, exit_reason - VMEXIT_CR0_WRITE,
2281 TYPE_MOV_TO_CR, regs);
2282 break;
2284 case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
2285 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
2286 svm_dr_access(v, regs);
2287 break;
2289 case VMEXIT_IOIO:
2290 svm_io_instruction(v);
2291 break;
2293 case VMEXIT_MSR:
2294 svm_do_msr_access(v, regs);
2295 break;
2297 case VMEXIT_SHUTDOWN:
2298 hvm_triple_fault();
2299 break;
2301 case VMEXIT_RDTSCP:
2302 case VMEXIT_MONITOR:
2303 case VMEXIT_MWAIT:
2304 case VMEXIT_VMRUN:
2305 case VMEXIT_VMLOAD:
2306 case VMEXIT_VMSAVE:
2307 case VMEXIT_STGI:
2308 case VMEXIT_CLGI:
2309 case VMEXIT_SKINIT:
2310 svm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
2311 break;
2313 case VMEXIT_NPF:
2314 perfc_incra(svmexits, VMEXIT_NPF_PERFC);
2315 regs->error_code = vmcb->exitinfo1;
2316 svm_do_nested_pgfault(vmcb->exitinfo2, regs);
2317 break;
2319 default:
2320 exit_and_crash:
2321 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
2322 "exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
2323 exit_reason,
2324 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
2325 domain_crash(v->domain);
2326 break;
2330 asmlinkage void svm_trace_vmentry(void)
2332 struct vcpu *v = current;
2334 /* This is the last C code before the VMRUN instruction. */
2335 hvmtrace_vmentry(v);
2338 /*
2339 * Local variables:
2340 * mode: C
2341 * c-set-style: "BSD"
2342 * c-basic-offset: 4
2343 * tab-width: 4
2344 * indent-tabs-mode: nil
2345 * End:
2346 */