ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 16311:e11b24680480

x86, svm: Add hunk I missed from Jan's debug-register handling
patch. We need to handle SVM debug-register read access intercepts.
Signed-off-by: Keir Fraser <keir@xensource.com>
author Keir Fraser <keir@xensource.com>
date Fri Nov 02 16:06:06 2007 +0000 (2007-11-02)
parents 3582a7a52c10
children 650cadd1b283
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/hypercall.h>
28 #include <xen/domain_page.h>
29 #include <asm/current.h>
30 #include <asm/io.h>
31 #include <asm/paging.h>
32 #include <asm/p2m.h>
33 #include <asm/regs.h>
34 #include <asm/cpufeature.h>
35 #include <asm/processor.h>
36 #include <asm/types.h>
37 #include <asm/msr.h>
38 #include <asm/spinlock.h>
39 #include <asm/hvm/hvm.h>
40 #include <asm/hvm/support.h>
41 #include <asm/hvm/io.h>
42 #include <asm/hvm/svm/asid.h>
43 #include <asm/hvm/svm/svm.h>
44 #include <asm/hvm/svm/vmcb.h>
45 #include <asm/hvm/svm/emulate.h>
46 #include <asm/hvm/svm/intr.h>
47 #include <asm/x86_emulate.h>
48 #include <public/sched.h>
49 #include <asm/hvm/vpt.h>
50 #include <asm/hvm/trace.h>
51 #include <asm/hap.h>
53 u32 svm_feature_flags;
55 #define set_segment_register(name, value) \
56 asm volatile ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
58 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
60 int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
61 int inst_len);
62 asmlinkage void do_IRQ(struct cpu_user_regs *);
64 static int svm_reset_to_realmode(
65 struct vcpu *v, struct cpu_user_regs *regs);
66 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);
68 /* va of hardware host save area */
69 static void *hsa[NR_CPUS] __read_mostly;
71 /* vmcb used for extended host state */
72 static void *root_vmcb[NR_CPUS] __read_mostly;
74 static void svm_update_guest_efer(struct vcpu *v);
76 static void inline __update_guest_eip(
77 struct cpu_user_regs *regs, int inst_len)
78 {
79 ASSERT(inst_len > 0);
80 regs->eip += inst_len;
81 regs->eflags &= ~X86_EFLAGS_RF;
82 }
84 static void svm_inject_exception(
85 struct vcpu *v, int trap, int ev, int error_code)
86 {
87 eventinj_t event;
88 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
90 if ( trap == TRAP_page_fault )
91 HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_vcpu.guest_cr[2], error_code);
92 else
93 HVMTRACE_2D(INJ_EXC, v, trap, error_code);
95 event.bytes = 0;
96 event.fields.v = 1;
97 event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
98 event.fields.vector = trap;
99 event.fields.ev = ev;
100 event.fields.errorcode = error_code;
102 vmcb->eventinj = event;
103 }
105 static void svm_cpu_down(void)
106 {
107 write_efer(read_efer() & ~EFER_SVME);
108 }
110 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
111 {
112 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
113 u32 ecx = regs->ecx;
115 HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64,
116 ecx, msr_content);
118 switch ( ecx )
119 {
120 case MSR_EFER:
121 if ( !hvm_set_efer(msr_content) )
122 return HNDL_exception_raised;
123 break;
125 case MSR_IA32_MC4_MISC: /* Threshold register */
126 /*
127 * MCA/MCE: Threshold register is reported to be locked, so we ignore
128 * all write accesses. This behaviour matches real HW, so guests should
129 * have no problem with this.
130 */
131 break;
133 default:
134 return HNDL_unhandled;
135 }
137 return HNDL_done;
138 }
140 static void svm_save_dr(struct vcpu *v)
141 {
142 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
144 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
145 return;
147 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
148 v->arch.hvm_vcpu.flag_dr_dirty = 0;
149 v->arch.hvm_svm.vmcb->dr_intercepts = ~0u;
151 v->arch.guest_context.debugreg[0] = read_debugreg(0);
152 v->arch.guest_context.debugreg[1] = read_debugreg(1);
153 v->arch.guest_context.debugreg[2] = read_debugreg(2);
154 v->arch.guest_context.debugreg[3] = read_debugreg(3);
155 v->arch.guest_context.debugreg[6] = vmcb->dr6;
156 v->arch.guest_context.debugreg[7] = vmcb->dr7;
157 }
159 static void __restore_debug_registers(struct vcpu *v)
160 {
161 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
163 ASSERT(!v->arch.hvm_vcpu.flag_dr_dirty);
164 v->arch.hvm_vcpu.flag_dr_dirty = 1;
165 vmcb->dr_intercepts = 0;
167 write_debugreg(0, v->arch.guest_context.debugreg[0]);
168 write_debugreg(1, v->arch.guest_context.debugreg[1]);
169 write_debugreg(2, v->arch.guest_context.debugreg[2]);
170 write_debugreg(3, v->arch.guest_context.debugreg[3]);
171 vmcb->dr6 = v->arch.guest_context.debugreg[6];
172 vmcb->dr7 = v->arch.guest_context.debugreg[7];
173 }
175 /*
176 * DR7 is saved and restored on every vmexit. Other debug registers only
177 * need to be restored if their value is going to affect execution -- i.e.,
178 * if one of the breakpoints is enabled. So mask out all bits that don't
179 * enable some breakpoint functionality.
180 */
181 #define DR7_ACTIVE_MASK 0xff
183 static void svm_restore_dr(struct vcpu *v)
184 {
185 if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
186 __restore_debug_registers(v);
187 }
189 int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c)
190 {
191 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
193 c->cr0 = v->arch.hvm_vcpu.guest_cr[0];
194 c->cr2 = v->arch.hvm_vcpu.guest_cr[2];
195 c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
196 c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
198 c->idtr_limit = vmcb->idtr.limit;
199 c->idtr_base = vmcb->idtr.base;
201 c->gdtr_limit = vmcb->gdtr.limit;
202 c->gdtr_base = vmcb->gdtr.base;
204 c->cs_sel = vmcb->cs.sel;
205 c->cs_limit = vmcb->cs.limit;
206 c->cs_base = vmcb->cs.base;
207 c->cs_arbytes = vmcb->cs.attr.bytes;
209 c->ds_sel = vmcb->ds.sel;
210 c->ds_limit = vmcb->ds.limit;
211 c->ds_base = vmcb->ds.base;
212 c->ds_arbytes = vmcb->ds.attr.bytes;
214 c->es_sel = vmcb->es.sel;
215 c->es_limit = vmcb->es.limit;
216 c->es_base = vmcb->es.base;
217 c->es_arbytes = vmcb->es.attr.bytes;
219 c->ss_sel = vmcb->ss.sel;
220 c->ss_limit = vmcb->ss.limit;
221 c->ss_base = vmcb->ss.base;
222 c->ss_arbytes = vmcb->ss.attr.bytes;
224 c->fs_sel = vmcb->fs.sel;
225 c->fs_limit = vmcb->fs.limit;
226 c->fs_base = vmcb->fs.base;
227 c->fs_arbytes = vmcb->fs.attr.bytes;
229 c->gs_sel = vmcb->gs.sel;
230 c->gs_limit = vmcb->gs.limit;
231 c->gs_base = vmcb->gs.base;
232 c->gs_arbytes = vmcb->gs.attr.bytes;
234 c->tr_sel = vmcb->tr.sel;
235 c->tr_limit = vmcb->tr.limit;
236 c->tr_base = vmcb->tr.base;
237 c->tr_arbytes = vmcb->tr.attr.bytes;
239 c->ldtr_sel = vmcb->ldtr.sel;
240 c->ldtr_limit = vmcb->ldtr.limit;
241 c->ldtr_base = vmcb->ldtr.base;
242 c->ldtr_arbytes = vmcb->ldtr.attr.bytes;
244 c->sysenter_cs = vmcb->sysenter_cs;
245 c->sysenter_esp = vmcb->sysenter_esp;
246 c->sysenter_eip = vmcb->sysenter_eip;
248 c->pending_event = 0;
249 c->error_code = 0;
250 if ( vmcb->eventinj.fields.v &&
251 hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
252 vmcb->eventinj.fields.vector) )
253 {
254 c->pending_event = (uint32_t)vmcb->eventinj.bytes;
255 c->error_code = vmcb->eventinj.fields.errorcode;
256 }
258 return 1;
259 }
262 int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
263 {
264 unsigned long mfn = 0;
265 p2m_type_t p2mt;
266 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
268 if ( c->pending_valid &&
269 ((c->pending_type == 1) || (c->pending_type > 6) ||
270 (c->pending_reserved != 0)) )
271 {
272 gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n",
273 c->pending_event);
274 return -EINVAL;
275 }
277 if ( !paging_mode_hap(v->domain) )
278 {
279 if ( c->cr0 & X86_CR0_PG )
280 {
281 mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
282 if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
283 {
284 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
285 c->cr3);
286 return -EINVAL;
287 }
288 }
290 if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
291 put_page(pagetable_get_page(v->arch.guest_table));
293 v->arch.guest_table = pagetable_from_pfn(mfn);
294 }
296 v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;
297 v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
298 v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
299 v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
300 svm_update_guest_cr(v, 0);
301 svm_update_guest_cr(v, 2);
302 svm_update_guest_cr(v, 4);
304 #ifdef HVM_DEBUG_SUSPEND
305 printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
306 __func__, c->cr3, c->cr0, c->cr4);
307 #endif
309 vmcb->idtr.limit = c->idtr_limit;
310 vmcb->idtr.base = c->idtr_base;
312 vmcb->gdtr.limit = c->gdtr_limit;
313 vmcb->gdtr.base = c->gdtr_base;
315 vmcb->cs.sel = c->cs_sel;
316 vmcb->cs.limit = c->cs_limit;
317 vmcb->cs.base = c->cs_base;
318 vmcb->cs.attr.bytes = c->cs_arbytes;
320 vmcb->ds.sel = c->ds_sel;
321 vmcb->ds.limit = c->ds_limit;
322 vmcb->ds.base = c->ds_base;
323 vmcb->ds.attr.bytes = c->ds_arbytes;
325 vmcb->es.sel = c->es_sel;
326 vmcb->es.limit = c->es_limit;
327 vmcb->es.base = c->es_base;
328 vmcb->es.attr.bytes = c->es_arbytes;
330 vmcb->ss.sel = c->ss_sel;
331 vmcb->ss.limit = c->ss_limit;
332 vmcb->ss.base = c->ss_base;
333 vmcb->ss.attr.bytes = c->ss_arbytes;
334 vmcb->cpl = vmcb->ss.attr.fields.dpl;
336 vmcb->fs.sel = c->fs_sel;
337 vmcb->fs.limit = c->fs_limit;
338 vmcb->fs.base = c->fs_base;
339 vmcb->fs.attr.bytes = c->fs_arbytes;
341 vmcb->gs.sel = c->gs_sel;
342 vmcb->gs.limit = c->gs_limit;
343 vmcb->gs.base = c->gs_base;
344 vmcb->gs.attr.bytes = c->gs_arbytes;
346 vmcb->tr.sel = c->tr_sel;
347 vmcb->tr.limit = c->tr_limit;
348 vmcb->tr.base = c->tr_base;
349 vmcb->tr.attr.bytes = c->tr_arbytes;
351 vmcb->ldtr.sel = c->ldtr_sel;
352 vmcb->ldtr.limit = c->ldtr_limit;
353 vmcb->ldtr.base = c->ldtr_base;
354 vmcb->ldtr.attr.bytes = c->ldtr_arbytes;
356 vmcb->sysenter_cs = c->sysenter_cs;
357 vmcb->sysenter_esp = c->sysenter_esp;
358 vmcb->sysenter_eip = c->sysenter_eip;
360 if ( paging_mode_hap(v->domain) )
361 {
362 vmcb->np_enable = 1;
363 vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
364 vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
365 }
367 if ( c->pending_valid )
368 {
369 gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
370 c->pending_event, c->error_code);
372 if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
373 {
374 vmcb->eventinj.bytes = c->pending_event;
375 vmcb->eventinj.fields.errorcode = c->error_code;
376 }
377 }
379 paging_update_paging_modes(v);
381 return 0;
382 }
385 static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
386 {
387 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
389 data->shadow_gs = vmcb->kerngsbase;
390 data->msr_lstar = vmcb->lstar;
391 data->msr_star = vmcb->star;
392 data->msr_cstar = vmcb->cstar;
393 data->msr_syscall_mask = vmcb->sfmask;
394 data->msr_efer = v->arch.hvm_vcpu.guest_efer;
395 data->msr_flags = -1ULL;
397 data->tsc = hvm_get_guest_time(v);
398 }
401 static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
402 {
403 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
405 vmcb->kerngsbase = data->shadow_gs;
406 vmcb->lstar = data->msr_lstar;
407 vmcb->star = data->msr_star;
408 vmcb->cstar = data->msr_cstar;
409 vmcb->sfmask = data->msr_syscall_mask;
410 v->arch.hvm_vcpu.guest_efer = data->msr_efer;
411 svm_update_guest_efer(v);
413 hvm_set_guest_time(v, data->tsc);
414 }
416 static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
417 {
418 svm_save_cpu_state(v, ctxt);
419 svm_vmcb_save(v, ctxt);
420 }
422 static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
423 {
424 svm_load_cpu_state(v, ctxt);
425 if (svm_vmcb_restore(v, ctxt)) {
426 printk("svm_vmcb restore failed!\n");
427 domain_crash(v->domain);
428 return -EINVAL;
429 }
431 return 0;
432 }
434 static enum hvm_intblk svm_interrupt_blocked(
435 struct vcpu *v, struct hvm_intack intack)
436 {
437 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
439 if ( vmcb->interrupt_shadow )
440 return hvm_intblk_shadow;
442 if ( intack.source == hvm_intsrc_nmi )
443 return hvm_intblk_none;
445 ASSERT((intack.source == hvm_intsrc_pic) ||
446 (intack.source == hvm_intsrc_lapic));
448 if ( !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) )
449 return hvm_intblk_rflags_ie;
451 if ( (intack.source == hvm_intsrc_lapic) &&
452 ((vmcb->vintr.fields.tpr & 0xf) >= (intack.vector >> 4)) )
453 return hvm_intblk_tpr;
455 return hvm_intblk_none;
456 }
458 static int svm_guest_x86_mode(struct vcpu *v)
459 {
460 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
462 if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) )
463 return 0;
464 if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) )
465 return 1;
466 if ( hvm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) )
467 return 8;
468 return (likely(vmcb->cs.attr.fields.db) ? 4 : 2);
469 }
471 static void svm_update_host_cr3(struct vcpu *v)
472 {
473 /* SVM doesn't have a HOST_CR3 equivalent to update. */
474 }
476 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
477 {
478 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
480 switch ( cr )
481 {
482 case 0:
483 /* TS cleared? Then initialise FPU now. */
484 if ( (v == current) && !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) &&
485 (vmcb->cr0 & X86_CR0_TS) )
486 {
487 setup_fpu(v);
488 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
489 }
491 vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0];
492 if ( !paging_mode_hap(v->domain) )
493 vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
494 break;
495 case 2:
496 vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2];
497 break;
498 case 3:
499 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];
500 svm_asid_inv_asid(v);
501 break;
502 case 4:
503 vmcb->cr4 = HVM_CR4_HOST_MASK;
504 if ( paging_mode_hap(v->domain) )
505 vmcb->cr4 &= ~X86_CR4_PAE;
506 vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4];
507 break;
508 default:
509 BUG();
510 }
511 }
513 static void svm_update_guest_efer(struct vcpu *v)
514 {
515 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
517 vmcb->efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME;
518 if ( vmcb->efer & EFER_LMA )
519 vmcb->efer |= EFER_LME;
520 }
522 static void svm_flush_guest_tlbs(void)
523 {
524 /* Roll over the CPU's ASID generation, so it gets a clean TLB when we
525 * next VMRUN. (If ASIDs are disabled, the whole TLB is flushed on
526 * VMRUN anyway). */
527 svm_asid_inc_generation();
528 }
530 static void svm_update_vtpr(struct vcpu *v, unsigned long value)
531 {
532 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
534 vmcb->vintr.fields.tpr = value & 0x0f;
535 }
537 static void svm_sync_vmcb(struct vcpu *v)
538 {
539 struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
541 if ( arch_svm->vmcb_in_sync )
542 return;
544 arch_svm->vmcb_in_sync = 1;
546 svm_vmsave(arch_svm->vmcb);
547 }
549 static unsigned long svm_get_segment_base(struct vcpu *v, enum x86_segment seg)
550 {
551 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
552 int long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
554 switch ( seg )
555 {
556 case x86_seg_cs: return long_mode ? 0 : vmcb->cs.base;
557 case x86_seg_ds: return long_mode ? 0 : vmcb->ds.base;
558 case x86_seg_es: return long_mode ? 0 : vmcb->es.base;
559 case x86_seg_fs: svm_sync_vmcb(v); return vmcb->fs.base;
560 case x86_seg_gs: svm_sync_vmcb(v); return vmcb->gs.base;
561 case x86_seg_ss: return long_mode ? 0 : vmcb->ss.base;
562 case x86_seg_tr: svm_sync_vmcb(v); return vmcb->tr.base;
563 case x86_seg_gdtr: return vmcb->gdtr.base;
564 case x86_seg_idtr: return vmcb->idtr.base;
565 case x86_seg_ldtr: svm_sync_vmcb(v); return vmcb->ldtr.base;
566 }
567 BUG();
568 return 0;
569 }
571 static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
572 struct segment_register *reg)
573 {
574 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
576 ASSERT(v == current);
578 switch ( seg )
579 {
580 case x86_seg_cs:
581 memcpy(reg, &vmcb->cs, sizeof(*reg));
582 break;
583 case x86_seg_ds:
584 memcpy(reg, &vmcb->ds, sizeof(*reg));
585 break;
586 case x86_seg_es:
587 memcpy(reg, &vmcb->es, sizeof(*reg));
588 break;
589 case x86_seg_fs:
590 svm_sync_vmcb(v);
591 memcpy(reg, &vmcb->fs, sizeof(*reg));
592 break;
593 case x86_seg_gs:
594 svm_sync_vmcb(v);
595 memcpy(reg, &vmcb->gs, sizeof(*reg));
596 break;
597 case x86_seg_ss:
598 memcpy(reg, &vmcb->ss, sizeof(*reg));
599 break;
600 case x86_seg_tr:
601 svm_sync_vmcb(v);
602 memcpy(reg, &vmcb->tr, sizeof(*reg));
603 break;
604 case x86_seg_gdtr:
605 memcpy(reg, &vmcb->gdtr, sizeof(*reg));
606 break;
607 case x86_seg_idtr:
608 memcpy(reg, &vmcb->idtr, sizeof(*reg));
609 break;
610 case x86_seg_ldtr:
611 svm_sync_vmcb(v);
612 memcpy(reg, &vmcb->ldtr, sizeof(*reg));
613 break;
614 default:
615 BUG();
616 }
617 }
619 static void svm_set_segment_register(struct vcpu *v, enum x86_segment seg,
620 struct segment_register *reg)
621 {
622 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
624 ASSERT(v == current);
626 switch ( seg )
627 {
628 case x86_seg_cs:
629 memcpy(&vmcb->cs, reg, sizeof(*reg));
630 break;
631 case x86_seg_ds:
632 memcpy(&vmcb->ds, reg, sizeof(*reg));
633 break;
634 case x86_seg_es:
635 memcpy(&vmcb->es, reg, sizeof(*reg));
636 break;
637 case x86_seg_fs:
638 svm_sync_vmcb(v);
639 memcpy(&vmcb->fs, reg, sizeof(*reg));
640 svm_vmload(vmcb);
641 break;
642 case x86_seg_gs:
643 svm_sync_vmcb(v);
644 memcpy(&vmcb->gs, reg, sizeof(*reg));
645 svm_vmload(vmcb);
646 break;
647 case x86_seg_ss:
648 memcpy(&vmcb->ss, reg, sizeof(*reg));
649 vmcb->cpl = vmcb->ss.attr.fields.dpl;
650 break;
651 case x86_seg_tr:
652 svm_sync_vmcb(v);
653 memcpy(&vmcb->tr, reg, sizeof(*reg));
654 svm_vmload(vmcb);
655 break;
656 case x86_seg_gdtr:
657 memcpy(&vmcb->gdtr, reg, sizeof(*reg));
658 break;
659 case x86_seg_idtr:
660 memcpy(&vmcb->idtr, reg, sizeof(*reg));
661 break;
662 case x86_seg_ldtr:
663 svm_sync_vmcb(v);
664 memcpy(&vmcb->ldtr, reg, sizeof(*reg));
665 svm_vmload(vmcb);
666 break;
667 default:
668 BUG();
669 }
670 }
672 /* Make sure that xen intercepts any FP accesses from current */
673 static void svm_stts(struct vcpu *v)
674 {
675 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
677 /*
678 * If the guest does not have TS enabled then we must cause and handle an
679 * exception on first use of the FPU. If the guest *does* have TS enabled
680 * then this is not necessary: no FPU activity can occur until the guest
681 * clears CR0.TS, and we will initialise the FPU when that happens.
682 */
683 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
684 {
685 v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device;
686 vmcb->cr0 |= X86_CR0_TS;
687 }
688 }
691 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
692 {
693 v->arch.hvm_svm.vmcb->tsc_offset = offset;
694 }
697 static void svm_init_ap_context(
698 struct vcpu_guest_context *ctxt, int vcpuid, int trampoline_vector)
699 {
700 struct vcpu *v;
701 struct vmcb_struct *vmcb;
702 cpu_user_regs_t *regs;
703 u16 cs_sel;
705 /* We know this is safe because hvm_bringup_ap() does it */
706 v = current->domain->vcpu[vcpuid];
707 vmcb = v->arch.hvm_svm.vmcb;
708 regs = &v->arch.guest_context.user_regs;
710 memset(ctxt, 0, sizeof(*ctxt));
712 /*
713 * We execute the trampoline code in real mode. The trampoline vector
714 * passed to us is page alligned and is the physical frame number for
715 * the code. We will execute this code in real mode.
716 */
717 cs_sel = trampoline_vector << 8;
718 ctxt->user_regs.eip = 0x0;
719 ctxt->user_regs.cs = cs_sel;
721 /*
722 * This is the launch of an AP; set state so that we begin executing
723 * the trampoline code in real-mode.
724 */
725 svm_reset_to_realmode(v, regs);
726 /* Adjust the vmcb's hidden register state. */
727 vmcb->cs.sel = cs_sel;
728 vmcb->cs.base = (cs_sel << 4);
729 }
731 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
732 {
733 char *p;
734 int i;
736 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
737 {
738 p = (char *)(hypercall_page + (i * 32));
739 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
740 *(u32 *)(p + 1) = i;
741 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
742 *(u8 *)(p + 6) = 0x01;
743 *(u8 *)(p + 7) = 0xd9;
744 *(u8 *)(p + 8) = 0xc3; /* ret */
745 }
747 /* Don't support HYPERVISOR_iret at the moment */
748 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
749 }
751 static void svm_ctxt_switch_from(struct vcpu *v)
752 {
753 int cpu = smp_processor_id();
755 svm_save_dr(v);
757 svm_sync_vmcb(v);
758 svm_vmload(root_vmcb[cpu]);
760 #ifdef __x86_64__
761 /* Resume use of ISTs now that the host TR is reinstated. */
762 idt_tables[cpu][TRAP_double_fault].a |= IST_DF << 32;
763 idt_tables[cpu][TRAP_nmi].a |= IST_NMI << 32;
764 idt_tables[cpu][TRAP_machine_check].a |= IST_MCE << 32;
765 #endif
766 }
768 static void svm_ctxt_switch_to(struct vcpu *v)
769 {
770 int cpu = smp_processor_id();
772 #ifdef __x86_64__
773 /*
774 * This is required, because VMRUN does consistency check
775 * and some of the DOM0 selectors are pointing to
776 * invalid GDT locations, and cause AMD processors
777 * to shutdown.
778 */
779 set_segment_register(ds, 0);
780 set_segment_register(es, 0);
781 set_segment_register(ss, 0);
783 /*
784 * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
785 * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
786 */
787 idt_tables[cpu][TRAP_double_fault].a &= ~(7UL << 32);
788 idt_tables[cpu][TRAP_nmi].a &= ~(7UL << 32);
789 idt_tables[cpu][TRAP_machine_check].a &= ~(7UL << 32);
790 #endif
792 svm_restore_dr(v);
794 svm_vmsave(root_vmcb[cpu]);
795 svm_vmload(v->arch.hvm_svm.vmcb);
796 }
798 static void svm_do_resume(struct vcpu *v)
799 {
800 bool_t debug_state = v->domain->debugger_attached;
802 if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
803 {
804 uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3);
805 v->arch.hvm_vcpu.debug_state_latch = debug_state;
806 if ( debug_state )
807 v->arch.hvm_svm.vmcb->exception_intercepts |= mask;
808 else
809 v->arch.hvm_svm.vmcb->exception_intercepts &= ~mask;
810 }
812 if ( v->arch.hvm_svm.launch_core != smp_processor_id() )
813 {
814 v->arch.hvm_svm.launch_core = smp_processor_id();
815 hvm_migrate_timers(v);
817 /* Migrating to another ASID domain. Request a new ASID. */
818 svm_asid_init_vcpu(v);
819 }
821 hvm_do_resume(v);
822 reset_stack_and_jump(svm_asm_do_resume);
823 }
825 static int svm_domain_initialise(struct domain *d)
826 {
827 return 0;
828 }
830 static void svm_domain_destroy(struct domain *d)
831 {
832 }
834 static int svm_vcpu_initialise(struct vcpu *v)
835 {
836 int rc;
838 v->arch.schedule_tail = svm_do_resume;
839 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
840 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
842 v->arch.hvm_svm.launch_core = -1;
844 if ( (rc = svm_create_vmcb(v)) != 0 )
845 {
846 dprintk(XENLOG_WARNING,
847 "Failed to create VMCB for vcpu %d: err=%d.\n",
848 v->vcpu_id, rc);
849 return rc;
850 }
852 return 0;
853 }
855 static void svm_vcpu_destroy(struct vcpu *v)
856 {
857 svm_destroy_vmcb(v);
858 }
860 static void svm_hvm_inject_exception(
861 unsigned int trapnr, int errcode, unsigned long cr2)
862 {
863 struct vcpu *v = current;
864 if ( trapnr == TRAP_page_fault )
865 v->arch.hvm_svm.vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2] = cr2;
866 svm_inject_exception(v, trapnr, (errcode != -1), errcode);
867 }
869 static int svm_event_pending(struct vcpu *v)
870 {
871 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
872 return vmcb->eventinj.fields.v;
873 }
875 static struct hvm_function_table svm_function_table = {
876 .name = "SVM",
877 .cpu_down = svm_cpu_down,
878 .domain_initialise = svm_domain_initialise,
879 .domain_destroy = svm_domain_destroy,
880 .vcpu_initialise = svm_vcpu_initialise,
881 .vcpu_destroy = svm_vcpu_destroy,
882 .save_cpu_ctxt = svm_save_vmcb_ctxt,
883 .load_cpu_ctxt = svm_load_vmcb_ctxt,
884 .interrupt_blocked = svm_interrupt_blocked,
885 .guest_x86_mode = svm_guest_x86_mode,
886 .get_segment_base = svm_get_segment_base,
887 .get_segment_register = svm_get_segment_register,
888 .set_segment_register = svm_set_segment_register,
889 .update_host_cr3 = svm_update_host_cr3,
890 .update_guest_cr = svm_update_guest_cr,
891 .update_guest_efer = svm_update_guest_efer,
892 .flush_guest_tlbs = svm_flush_guest_tlbs,
893 .update_vtpr = svm_update_vtpr,
894 .stts = svm_stts,
895 .set_tsc_offset = svm_set_tsc_offset,
896 .inject_exception = svm_hvm_inject_exception,
897 .init_ap_context = svm_init_ap_context,
898 .init_hypercall_page = svm_init_hypercall_page,
899 .event_pending = svm_event_pending
900 };
902 int start_svm(struct cpuinfo_x86 *c)
903 {
904 u32 eax, ecx, edx;
905 u32 phys_hsa_lo, phys_hsa_hi;
906 u64 phys_hsa;
907 int cpu = smp_processor_id();
909 /* Xen does not fill x86_capability words except 0. */
910 ecx = cpuid_ecx(0x80000001);
911 boot_cpu_data.x86_capability[5] = ecx;
913 if ( !(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)) )
914 return 0;
916 /* Check whether SVM feature is disabled in BIOS */
917 rdmsr(MSR_K8_VM_CR, eax, edx);
918 if ( eax & K8_VMCR_SVME_DISABLE )
919 {
920 printk("AMD SVM Extension is disabled in BIOS.\n");
921 return 0;
922 }
924 if ( ((hsa[cpu] = alloc_host_save_area()) == NULL) ||
925 ((root_vmcb[cpu] = alloc_vmcb()) == NULL) )
926 return 0;
928 write_efer(read_efer() | EFER_SVME);
930 /* Initialize the HSA for this core. */
931 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
932 phys_hsa_lo = (u32) phys_hsa;
933 phys_hsa_hi = (u32) (phys_hsa >> 32);
934 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
936 /* Initialize core's ASID handling. */
937 svm_asid_init(c);
939 if ( cpu != 0 )
940 return 1;
942 setup_vmcb_dump();
944 svm_feature_flags = ((cpuid_eax(0x80000000) >= 0x8000000A) ?
945 cpuid_edx(0x8000000A) : 0);
947 svm_function_table.hap_supported = cpu_has_svm_npt;
949 hvm_enable(&svm_function_table);
951 return 1;
952 }
954 static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
955 {
956 p2m_type_t p2mt;
957 mfn_t mfn;
958 unsigned long gfn = gpa >> PAGE_SHIFT;
960 /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
961 mfn = gfn_to_mfn_current(gfn, &p2mt);
962 if ( p2mt == p2m_mmio_dm )
963 {
964 handle_mmio(gpa);
965 return;
966 }
968 /* Log-dirty: mark the page dirty and let the guest write it again */
969 paging_mark_dirty(current->domain, mfn_x(mfn));
970 p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
971 }
973 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
974 {
975 struct vcpu *v = current;
977 setup_fpu(v);
978 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
980 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
981 vmcb->cr0 &= ~X86_CR0_TS;
982 }
984 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
985 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
986 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
987 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
989 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb,
990 struct cpu_user_regs *regs)
991 {
992 unsigned long input = regs->eax;
993 unsigned int eax, ebx, ecx, edx;
994 struct vcpu *v = current;
995 int inst_len;
997 hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
999 switch ( input )
1001 case 0x00000001:
1002 /* Clear out reserved bits. */
1003 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
1004 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
1006 /* Guest should only see one logical processor.
1007 * See details on page 23 of AMD CPUID Specification.
1008 */
1009 __clear_bit(X86_FEATURE_HT & 31, &edx);
1010 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
1011 ebx |= 0x00010000; /* set to 1 just for precaution */
1012 break;
1014 case 0x80000001:
1015 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
1016 __clear_bit(X86_FEATURE_APIC & 31, &edx);
1018 #if CONFIG_PAGING_LEVELS >= 3
1019 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
1020 #endif
1021 __clear_bit(X86_FEATURE_PAE & 31, &edx);
1023 __clear_bit(X86_FEATURE_PSE36 & 31, &edx);
1025 /* Clear the Cmp_Legacy bit
1026 * This bit is supposed to be zero when HTT = 0.
1027 * See details on page 23 of AMD CPUID Specification.
1028 */
1029 __clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx);
1031 /* Make SVM feature invisible to the guest. */
1032 __clear_bit(X86_FEATURE_SVME & 31, &ecx);
1033 __clear_bit(X86_FEATURE_SKINIT & 31, &ecx);
1035 __clear_bit(X86_FEATURE_OSVW & 31, &ecx);
1036 __clear_bit(X86_FEATURE_WDT & 31, &ecx);
1038 /* So far, we do not support 3DNow for the guest. */
1039 __clear_bit(X86_FEATURE_3DNOW & 31, &edx);
1040 __clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx);
1041 break;
1043 case 0x80000007:
1044 case 0x8000000A:
1045 /* Mask out features of power management and SVM extension. */
1046 eax = ebx = ecx = edx = 0;
1047 break;
1049 case 0x80000008:
1050 /* Make sure Number of CPU core is 1 when HTT=0 */
1051 ecx &= 0xFFFFFF00;
1052 break;
1055 regs->eax = eax;
1056 regs->ebx = ebx;
1057 regs->ecx = ecx;
1058 regs->edx = edx;
1060 HVMTRACE_3D(CPUID, v, input,
1061 ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
1063 inst_len = __get_instruction_length(v, INSTR_CPUID, NULL);
1064 ASSERT(inst_len > 0);
1065 __update_guest_eip(regs, inst_len);
1068 static unsigned long *get_reg_p(
1069 unsigned int gpreg,
1070 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1072 unsigned long *reg_p = NULL;
1073 switch (gpreg)
1075 case SVM_REG_EAX:
1076 reg_p = (unsigned long *)&regs->eax;
1077 break;
1078 case SVM_REG_EBX:
1079 reg_p = (unsigned long *)&regs->ebx;
1080 break;
1081 case SVM_REG_ECX:
1082 reg_p = (unsigned long *)&regs->ecx;
1083 break;
1084 case SVM_REG_EDX:
1085 reg_p = (unsigned long *)&regs->edx;
1086 break;
1087 case SVM_REG_EDI:
1088 reg_p = (unsigned long *)&regs->edi;
1089 break;
1090 case SVM_REG_ESI:
1091 reg_p = (unsigned long *)&regs->esi;
1092 break;
1093 case SVM_REG_EBP:
1094 reg_p = (unsigned long *)&regs->ebp;
1095 break;
1096 case SVM_REG_ESP:
1097 reg_p = (unsigned long *)&regs->esp;
1098 break;
1099 #ifdef __x86_64__
1100 case SVM_REG_R8:
1101 reg_p = (unsigned long *)&regs->r8;
1102 break;
1103 case SVM_REG_R9:
1104 reg_p = (unsigned long *)&regs->r9;
1105 break;
1106 case SVM_REG_R10:
1107 reg_p = (unsigned long *)&regs->r10;
1108 break;
1109 case SVM_REG_R11:
1110 reg_p = (unsigned long *)&regs->r11;
1111 break;
1112 case SVM_REG_R12:
1113 reg_p = (unsigned long *)&regs->r12;
1114 break;
1115 case SVM_REG_R13:
1116 reg_p = (unsigned long *)&regs->r13;
1117 break;
1118 case SVM_REG_R14:
1119 reg_p = (unsigned long *)&regs->r14;
1120 break;
1121 case SVM_REG_R15:
1122 reg_p = (unsigned long *)&regs->r15;
1123 break;
1124 #endif
1125 default:
1126 BUG();
1129 return reg_p;
1133 static unsigned long get_reg(
1134 unsigned int gpreg, struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1136 unsigned long *gp;
1137 gp = get_reg_p(gpreg, regs, vmcb);
1138 return *gp;
1142 static void set_reg(
1143 unsigned int gpreg, unsigned long value,
1144 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1146 unsigned long *gp;
1147 gp = get_reg_p(gpreg, regs, vmcb);
1148 *gp = value;
1152 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
1154 HVMTRACE_0D(DR_WRITE, v);
1155 __restore_debug_registers(v);
1159 static void svm_get_prefix_info(struct vcpu *v, unsigned int dir,
1160 svm_segment_register_t **seg,
1161 unsigned int *asize)
1163 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1164 unsigned char inst[MAX_INST_LEN];
1165 int i;
1167 memset(inst, 0, MAX_INST_LEN);
1168 if (inst_copy_from_guest(inst, svm_rip2pointer(v), sizeof(inst))
1169 != MAX_INST_LEN)
1171 gdprintk(XENLOG_ERR, "get guest instruction failed\n");
1172 domain_crash(current->domain);
1173 return;
1176 for (i = 0; i < MAX_INST_LEN; i++)
1178 switch (inst[i])
1180 case 0xf3: /* REPZ */
1181 case 0xf2: /* REPNZ */
1182 case 0xf0: /* LOCK */
1183 case 0x66: /* data32 */
1184 #ifdef __x86_64__
1185 /* REX prefixes */
1186 case 0x40:
1187 case 0x41:
1188 case 0x42:
1189 case 0x43:
1190 case 0x44:
1191 case 0x45:
1192 case 0x46:
1193 case 0x47:
1195 case 0x48:
1196 case 0x49:
1197 case 0x4a:
1198 case 0x4b:
1199 case 0x4c:
1200 case 0x4d:
1201 case 0x4e:
1202 case 0x4f:
1203 #endif
1204 continue;
1205 case 0x67: /* addr32 */
1206 *asize ^= 48; /* Switch 16/32 bits */
1207 continue;
1208 case 0x2e: /* CS */
1209 *seg = &vmcb->cs;
1210 continue;
1211 case 0x36: /* SS */
1212 *seg = &vmcb->ss;
1213 continue;
1214 case 0x26: /* ES */
1215 *seg = &vmcb->es;
1216 continue;
1217 case 0x64: /* FS */
1218 svm_sync_vmcb(v);
1219 *seg = &vmcb->fs;
1220 continue;
1221 case 0x65: /* GS */
1222 svm_sync_vmcb(v);
1223 *seg = &vmcb->gs;
1224 continue;
1225 case 0x3e: /* DS */
1226 *seg = &vmcb->ds;
1227 continue;
1228 default:
1229 break;
1231 return;
1236 /* Get the address of INS/OUTS instruction */
1237 static int svm_get_io_address(
1238 struct vcpu *v, struct cpu_user_regs *regs,
1239 unsigned int size, ioio_info_t info,
1240 unsigned long *count, unsigned long *addr)
1242 unsigned long reg;
1243 unsigned int asize, isize;
1244 int long_mode = 0;
1245 svm_segment_register_t *seg = NULL;
1246 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1248 /* If we're in long mode, don't check the segment presence & limit */
1249 long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
1251 /* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit.
1252 * l field combined with EFER_LMA says whether it's 16 or 64 bit.
1253 */
1254 asize = (long_mode)?64:((vmcb->cs.attr.fields.db)?32:16);
1257 /* The ins/outs instructions are single byte, so if we have got more
1258 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1259 * to figure out what it is...
1260 */
1261 isize = vmcb->exitinfo2 - regs->eip;
1263 if (info.fields.rep)
1264 isize --;
1266 if (isize > 1)
1267 svm_get_prefix_info(v, info.fields.type, &seg, &asize);
1269 if (info.fields.type == IOREQ_WRITE)
1271 reg = regs->esi;
1272 if (!seg) /* If no prefix, used DS. */
1273 seg = &vmcb->ds;
1274 if (!long_mode && (seg->attr.fields.type & 0xa) == 0x8) {
1275 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1276 return 0;
1279 else
1281 reg = regs->edi;
1282 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1283 if (!long_mode && (seg->attr.fields.type & 0xa) != 0x2) {
1284 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1285 return 0;
1289 /* If the segment isn't present, give GP fault! */
1290 if (!long_mode && !seg->attr.fields.p)
1292 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1293 return 0;
1296 if (asize == 16)
1298 *addr = (reg & 0xFFFF);
1299 *count = regs->ecx & 0xffff;
1301 else
1303 *addr = reg;
1304 *count = regs->ecx;
1306 if (!info.fields.rep)
1307 *count = 1;
1309 if (!long_mode)
1311 ASSERT(*addr == (u32)*addr);
1312 if ((u32)(*addr + size - 1) < (u32)*addr ||
1313 (seg->attr.fields.type & 0xc) != 0x4 ?
1314 *addr + size - 1 > seg->limit :
1315 *addr <= seg->limit)
1317 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1318 return 0;
1321 /* Check the limit for repeated instructions, as above we checked only
1322 the first instance. Truncate the count if a limit violation would
1323 occur. Note that the checking is not necessary for page granular
1324 segments as transfers crossing page boundaries will be broken up
1325 anyway. */
1326 if (!seg->attr.fields.g && *count > 1)
1328 if ((seg->attr.fields.type & 0xc) != 0x4)
1330 /* expand-up */
1331 if (!(regs->eflags & EF_DF))
1333 if (*addr + *count * size - 1 < *addr ||
1334 *addr + *count * size - 1 > seg->limit)
1335 *count = (seg->limit + 1UL - *addr) / size;
1337 else
1339 if (*count - 1 > *addr / size)
1340 *count = *addr / size + 1;
1343 else
1345 /* expand-down */
1346 if (!(regs->eflags & EF_DF))
1348 if (*count - 1 > -(s32)*addr / size)
1349 *count = -(s32)*addr / size + 1UL;
1351 else
1353 if (*addr < (*count - 1) * size ||
1354 *addr - (*count - 1) * size <= seg->limit)
1355 *count = (*addr - seg->limit - 1) / size + 1;
1358 ASSERT(*count);
1361 *addr += seg->base;
1363 #ifdef __x86_64__
1364 else
1366 if (seg == &vmcb->fs || seg == &vmcb->gs)
1367 *addr += seg->base;
1369 if (!is_canonical_address(*addr) ||
1370 !is_canonical_address(*addr + size - 1))
1372 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1373 return 0;
1375 if (*count > (1UL << 48) / size)
1376 *count = (1UL << 48) / size;
1377 if (!(regs->eflags & EF_DF))
1379 if (*addr + *count * size - 1 < *addr ||
1380 !is_canonical_address(*addr + *count * size - 1))
1381 *count = (*addr & ~((1UL << 48) - 1)) / size;
1383 else
1385 if ((*count - 1) * size > *addr ||
1386 !is_canonical_address(*addr + (*count - 1) * size))
1387 *count = (*addr & ~((1UL << 48) - 1)) / size + 1;
1389 ASSERT(*count);
1391 #endif
1393 return 1;
1397 static void svm_io_instruction(struct vcpu *v)
1399 struct cpu_user_regs *regs;
1400 struct hvm_io_op *pio_opp;
1401 unsigned int port;
1402 unsigned int size, dir, df;
1403 ioio_info_t info;
1404 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1406 pio_opp = &current->arch.hvm_vcpu.io_op;
1407 pio_opp->instr = INSTR_PIO;
1408 pio_opp->flags = 0;
1410 regs = &pio_opp->io_context;
1412 /* Copy current guest state into io instruction state structure. */
1413 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1415 info.bytes = vmcb->exitinfo1;
1417 port = info.fields.port; /* port used to be addr */
1418 dir = info.fields.type; /* direction */
1419 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1421 if (info.fields.sz32)
1422 size = 4;
1423 else if (info.fields.sz16)
1424 size = 2;
1425 else
1426 size = 1;
1428 if (dir==IOREQ_READ)
1429 HVMTRACE_2D(IO_READ, v, port, size);
1430 else
1431 HVMTRACE_3D(IO_WRITE, v, port, size, regs->eax);
1433 HVM_DBG_LOG(DBG_LEVEL_IO,
1434 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1435 "exit_qualification = %"PRIx64,
1436 port, vmcb->cs.sel, (uint64_t)regs->eip, info.bytes);
1438 /* string instruction */
1439 if (info.fields.str)
1441 unsigned long addr, count;
1442 paddr_t paddr;
1443 unsigned long gfn;
1444 int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
1446 if (!svm_get_io_address(v, regs, size, info, &count, &addr))
1448 /* We failed to get a valid address, so don't do the IO operation -
1449 * it would just get worse if we do! Hopefully the guest is handing
1450 * gp-faults...
1451 */
1452 return;
1455 /* "rep" prefix */
1456 if (info.fields.rep)
1458 pio_opp->flags |= REPZ;
1461 /* Translate the address to a physical address */
1462 gfn = paging_gva_to_gfn(v, addr);
1463 if ( gfn == INVALID_GFN )
1465 /* The guest does not have the RAM address mapped.
1466 * Need to send in a page fault */
1467 int errcode = 0;
1468 /* IO read --> memory write */
1469 if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
1470 svm_hvm_inject_exception(TRAP_page_fault, errcode, addr);
1471 return;
1473 paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
1475 /*
1476 * Handle string pio instructions that cross pages or that
1477 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1478 */
1479 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1481 unsigned long value = 0;
1483 pio_opp->flags |= OVERLAP;
1484 pio_opp->addr = addr;
1486 if (dir == IOREQ_WRITE) /* OUTS */
1488 if ( hvm_paging_enabled(current) )
1490 int rv = hvm_copy_from_guest_virt(&value, addr, size);
1491 if ( rv != 0 )
1493 /* Failed on the page-spanning copy. Inject PF into
1494 * the guest for the address where we failed. */
1495 addr += size - rv;
1496 gdprintk(XENLOG_DEBUG, "Pagefault reading non-io side "
1497 "of a page-spanning PIO: va=%#lx\n", addr);
1498 svm_hvm_inject_exception(TRAP_page_fault, 0, addr);
1499 return;
1502 else
1503 (void) hvm_copy_from_guest_phys(&value, addr, size);
1504 } else /* dir != IOREQ_WRITE */
1505 /* Remember where to write the result, as a *VA*.
1506 * Must be a VA so we can handle the page overlap
1507 * correctly in hvm_pio_assist() */
1508 pio_opp->addr = addr;
1510 if (count == 1)
1511 regs->eip = vmcb->exitinfo2;
1513 send_pio_req(port, 1, size, value, dir, df, 0);
1515 else
1517 unsigned long last_addr = sign > 0 ? addr + count * size - 1
1518 : addr - (count - 1) * size;
1520 if ((addr & PAGE_MASK) != (last_addr & PAGE_MASK))
1522 if (sign > 0)
1523 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1524 else
1525 count = (addr & ~PAGE_MASK) / size + 1;
1527 else
1528 regs->eip = vmcb->exitinfo2;
1530 send_pio_req(port, count, size, paddr, dir, df, 1);
1533 else
1535 /*
1536 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1537 * ExitInfo2
1538 */
1539 regs->eip = vmcb->exitinfo2;
1541 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1542 hvm_print_line(v, regs->eax); /* guest debug output */
1544 send_pio_req(port, 1, size, regs->eax, dir, df, 0);
1548 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1550 unsigned long value = 0;
1551 struct vcpu *v = current;
1552 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1554 switch ( cr )
1556 case 0:
1557 value = v->arch.hvm_vcpu.guest_cr[0];
1558 break;
1559 case 3:
1560 value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3];
1561 break;
1562 case 4:
1563 value = (unsigned long)v->arch.hvm_vcpu.guest_cr[4];
1564 break;
1565 default:
1566 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1567 domain_crash(v->domain);
1568 return;
1571 HVMTRACE_2D(CR_READ, v, cr, value);
1573 set_reg(gp, value, regs, vmcb);
1575 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx", cr, value);
1578 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1580 unsigned long value;
1581 struct vcpu *v = current;
1582 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1584 value = get_reg(gpreg, regs, vmcb);
1586 HVMTRACE_2D(CR_WRITE, v, cr, value);
1588 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, current = %p",
1589 cr, value, v);
1591 switch ( cr )
1593 case 0:
1594 return hvm_set_cr0(value);
1595 case 3:
1596 return hvm_set_cr3(value);
1597 case 4:
1598 return hvm_set_cr4(value);
1599 default:
1600 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1601 domain_crash(v->domain);
1602 return 0;
1605 return 1;
1608 static void svm_cr_access(
1609 struct vcpu *v, unsigned int cr, unsigned int type,
1610 struct cpu_user_regs *regs)
1612 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1613 int inst_len = 0;
1614 int index,addr_size,i;
1615 unsigned int gpreg,offset;
1616 unsigned long value,addr;
1617 u8 buffer[MAX_INST_LEN];
1618 u8 prefix = 0;
1619 u8 modrm;
1620 enum x86_segment seg;
1621 int result = 1;
1622 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1623 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1624 enum instruction_index match;
1626 inst_copy_from_guest(buffer, svm_rip2pointer(v), sizeof(buffer));
1628 /* get index to first actual instruction byte - as we will need to know
1629 where the prefix lives later on */
1630 index = skip_prefix_bytes(buffer, sizeof(buffer));
1632 if ( type == TYPE_MOV_TO_CR )
1634 inst_len = __get_instruction_length_from_list(
1635 v, list_a, ARRAY_SIZE(list_a), &buffer[index], &match);
1637 else /* type == TYPE_MOV_FROM_CR */
1639 inst_len = __get_instruction_length_from_list(
1640 v, list_b, ARRAY_SIZE(list_b), &buffer[index], &match);
1643 ASSERT(inst_len > 0);
1645 inst_len += index;
1647 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1648 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1649 prefix = buffer[index-1];
1651 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long)regs->eip);
1653 switch ( match )
1656 case INSTR_MOV2CR:
1657 gpreg = decode_src_reg(prefix, buffer[index+2]);
1658 result = mov_to_cr(gpreg, cr, regs);
1659 break;
1661 case INSTR_MOVCR2:
1662 gpreg = decode_src_reg(prefix, buffer[index+2]);
1663 mov_from_cr(cr, gpreg, regs);
1664 break;
1666 case INSTR_CLTS:
1667 /* TS being cleared means that it's time to restore fpu state. */
1668 setup_fpu(current);
1669 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
1670 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1671 v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS; /* clear TS */
1672 HVMTRACE_0D(CLTS, current);
1673 break;
1675 case INSTR_LMSW:
1676 gpreg = decode_src_reg(prefix, buffer[index+2]);
1677 value = get_reg(gpreg, regs, vmcb) & 0xF;
1678 value = (v->arch.hvm_vcpu.guest_cr[0] & ~0xF) | value;
1679 result = hvm_set_cr0(value);
1680 HVMTRACE_1D(LMSW, current, value);
1681 break;
1683 case INSTR_SMSW:
1684 value = v->arch.hvm_vcpu.guest_cr[0] & 0xFFFF;
1685 modrm = buffer[index+2];
1686 addr_size = svm_guest_x86_mode(v);
1687 if ( addr_size < 2 )
1688 addr_size = 2;
1689 if ( likely((modrm & 0xC0) >> 6 == 3) )
1691 gpreg = decode_src_reg(prefix, modrm);
1692 set_reg(gpreg, value, regs, vmcb);
1694 /*
1695 * For now, only implement decode of the offset mode, since that's the
1696 * only mode observed in a real-world OS. This code is also making the
1697 * assumption that we'll never hit this code in long mode.
1698 */
1699 else if ( (modrm == 0x26) || (modrm == 0x25) )
1701 seg = x86_seg_ds;
1702 i = index;
1703 /* Segment or address size overrides? */
1704 while ( i-- )
1706 switch ( buffer[i] )
1708 case 0x26: seg = x86_seg_es; break;
1709 case 0x2e: seg = x86_seg_cs; break;
1710 case 0x36: seg = x86_seg_ss; break;
1711 case 0x64: seg = x86_seg_fs; break;
1712 case 0x65: seg = x86_seg_gs; break;
1713 case 0x67: addr_size ^= 6; break;
1716 /* Bail unless this really is a seg_base + offset case */
1717 if ( ((modrm == 0x26) && (addr_size == 4)) ||
1718 ((modrm == 0x25) && (addr_size == 2)) )
1720 gdprintk(XENLOG_ERR, "SMSW emulation at guest address: "
1721 "%lx failed due to unhandled addressing mode."
1722 "ModRM byte was: %x \n", svm_rip2pointer(v), modrm);
1723 domain_crash(v->domain);
1725 inst_len += addr_size;
1726 offset = *(( unsigned int *) ( void *) &buffer[index + 3]);
1727 offset = ( addr_size == 4 ) ? offset : ( offset & 0xFFFF );
1728 addr = hvm_get_segment_base(v, seg);
1729 addr += offset;
1730 hvm_copy_to_guest_virt(addr,&value,2);
1732 else
1734 gdprintk(XENLOG_ERR, "SMSW emulation at guest address: %lx "
1735 "failed due to unhandled addressing mode!"
1736 "ModRM byte was: %x \n", svm_rip2pointer(v), modrm);
1737 domain_crash(v->domain);
1739 break;
1741 default:
1742 BUG();
1745 ASSERT(inst_len);
1747 if ( result )
1748 __update_guest_eip(regs, inst_len);
1751 static void svm_do_msr_access(
1752 struct vcpu *v, struct cpu_user_regs *regs)
1754 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1755 int inst_len;
1756 u64 msr_content=0;
1757 u32 ecx = regs->ecx, eax, edx;
1759 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x, exitinfo = %lx",
1760 ecx, (u32)regs->eax, (u32)regs->edx,
1761 (unsigned long)vmcb->exitinfo1);
1763 /* is it a read? */
1764 if (vmcb->exitinfo1 == 0)
1766 switch (ecx) {
1767 case MSR_IA32_TSC:
1768 msr_content = hvm_get_guest_time(v);
1769 break;
1771 case MSR_IA32_APICBASE:
1772 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
1773 break;
1775 case MSR_EFER:
1776 msr_content = v->arch.hvm_vcpu.guest_efer;
1777 break;
1779 case MSR_IA32_MC4_MISC: /* Threshold register */
1780 /*
1781 * MCA/MCE: We report that the threshold register is unavailable
1782 * for OS use (locked by the BIOS).
1783 */
1784 msr_content = 1ULL << 61; /* MC4_MISC.Locked */
1785 break;
1787 case MSR_IA32_EBC_FREQUENCY_ID:
1788 /*
1789 * This Intel-only register may be accessed if this HVM guest
1790 * has been migrated from an Intel host. The value zero is not
1791 * particularly meaningful, but at least avoids the guest crashing!
1792 */
1793 msr_content = 0;
1794 break;
1796 case MSR_K8_VM_HSAVE_PA:
1797 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1798 break;
1800 case MSR_IA32_MCG_CAP:
1801 case MSR_IA32_MCG_STATUS:
1802 case MSR_IA32_MC0_STATUS:
1803 case MSR_IA32_MC1_STATUS:
1804 case MSR_IA32_MC2_STATUS:
1805 case MSR_IA32_MC3_STATUS:
1806 case MSR_IA32_MC4_STATUS:
1807 case MSR_IA32_MC5_STATUS:
1808 /* No point in letting the guest see real MCEs */
1809 msr_content = 0;
1810 break;
1812 case MSR_IA32_DEBUGCTLMSR:
1813 msr_content = vmcb->debugctlmsr;
1814 break;
1816 case MSR_IA32_LASTBRANCHFROMIP:
1817 msr_content = vmcb->lastbranchfromip;
1818 break;
1820 case MSR_IA32_LASTBRANCHTOIP:
1821 msr_content = vmcb->lastbranchtoip;
1822 break;
1824 case MSR_IA32_LASTINTFROMIP:
1825 msr_content = vmcb->lastintfromip;
1826 break;
1828 case MSR_IA32_LASTINTTOIP:
1829 msr_content = vmcb->lastinttoip;
1830 break;
1832 default:
1833 if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
1834 rdmsr_safe(ecx, eax, edx) == 0 )
1836 regs->eax = eax;
1837 regs->edx = edx;
1838 goto done;
1840 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1841 return;
1843 regs->eax = msr_content & 0xFFFFFFFF;
1844 regs->edx = msr_content >> 32;
1846 done:
1847 hvmtrace_msr_read(v, ecx, msr_content);
1848 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
1849 ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
1851 inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL);
1853 else
1855 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
1857 hvmtrace_msr_write(v, ecx, msr_content);
1859 switch (ecx)
1861 case MSR_IA32_TSC:
1862 hvm_set_guest_time(v, msr_content);
1863 pt_reset(v);
1864 break;
1866 case MSR_IA32_APICBASE:
1867 vlapic_msr_set(vcpu_vlapic(v), msr_content);
1868 break;
1870 case MSR_K8_VM_HSAVE_PA:
1871 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1872 break;
1874 case MSR_IA32_DEBUGCTLMSR:
1875 vmcb->debugctlmsr = msr_content;
1876 if ( !msr_content || !cpu_has_svm_lbrv )
1877 break;
1878 vmcb->lbr_control.fields.enable = 1;
1879 svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR);
1880 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP);
1881 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP);
1882 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP);
1883 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP);
1884 break;
1886 case MSR_IA32_LASTBRANCHFROMIP:
1887 vmcb->lastbranchfromip = msr_content;
1888 break;
1890 case MSR_IA32_LASTBRANCHTOIP:
1891 vmcb->lastbranchtoip = msr_content;
1892 break;
1894 case MSR_IA32_LASTINTFROMIP:
1895 vmcb->lastintfromip = msr_content;
1896 break;
1898 case MSR_IA32_LASTINTTOIP:
1899 vmcb->lastinttoip = msr_content;
1900 break;
1902 default:
1903 switch ( long_mode_do_msr_write(regs) )
1905 case HNDL_unhandled:
1906 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
1907 break;
1908 case HNDL_exception_raised:
1909 return;
1910 case HNDL_done:
1911 break;
1913 break;
1916 inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
1919 __update_guest_eip(regs, inst_len);
1922 static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb,
1923 struct cpu_user_regs *regs)
1925 struct hvm_intack intack = hvm_vcpu_has_pending_irq(current);
1927 __update_guest_eip(regs, 1);
1929 /* Check for interrupt not handled or new interrupt. */
1930 if ( vmcb->eventinj.fields.v ||
1931 ((intack.source != hvm_intsrc_none) &&
1932 !svm_interrupt_blocked(current, intack)) )
1934 HVMTRACE_1D(HLT, current, /*int pending=*/ 1);
1935 return;
1938 HVMTRACE_1D(HLT, current, /*int pending=*/ 0);
1939 hvm_hlt(regs->eflags);
1942 static void svm_vmexit_do_invd(struct cpu_user_regs *regs)
1944 int inst_len;
1946 /* Invalidate the cache - we can't really do that safely - maybe we should
1947 * WBINVD, but I think it's just fine to completely ignore it - we should
1948 * have cache-snooping that solves it anyways. -- Mats P.
1949 */
1951 /* Tell the user that we did this - just in case someone runs some really
1952 * weird operating system and wants to know why it's not working...
1953 */
1954 gdprintk(XENLOG_WARNING, "INVD instruction intercepted - ignored\n");
1956 inst_len = __get_instruction_length(current, INSTR_INVD, NULL);
1957 __update_guest_eip(regs, inst_len);
1960 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
1962 struct vcpu *v = current;
1963 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
1964 unsigned long g_vaddr;
1965 int inst_len;
1967 /*
1968 * Unknown how many bytes the invlpg instruction will take. Use the
1969 * maximum instruction length here
1970 */
1971 if (inst_copy_from_guest(opcode, svm_rip2pointer(v), length) < length)
1973 gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length);
1974 domain_crash(v->domain);
1975 return;
1978 if (invlpga)
1980 inst_len = __get_instruction_length(v, INSTR_INVLPGA, opcode);
1981 ASSERT(inst_len > 0);
1982 __update_guest_eip(regs, inst_len);
1984 /*
1985 * The address is implicit on this instruction. At the moment, we don't
1986 * use ecx (ASID) to identify individual guests pages
1987 */
1988 g_vaddr = regs->eax;
1990 else
1992 /* What about multiple prefix codes? */
1993 prefix = (is_prefix(opcode[0])?opcode[0]:0);
1994 inst_len = __get_instruction_length(v, INSTR_INVLPG, opcode);
1995 ASSERT(inst_len > 0);
1997 inst_len--;
1998 length -= inst_len;
2000 /*
2001 * Decode memory operand of the instruction including ModRM, SIB, and
2002 * displacement to get effective address and length in bytes. Assume
2003 * the system in either 32- or 64-bit mode.
2004 */
2005 g_vaddr = get_effective_addr_modrm64(regs, prefix, inst_len,
2006 &opcode[inst_len], &length);
2008 inst_len += length;
2009 __update_guest_eip(regs, inst_len);
2012 HVMTRACE_3D(INVLPG, v, (invlpga?1:0), g_vaddr, (invlpga?regs->ecx:0));
2014 paging_invlpg(v, g_vaddr);
2015 svm_asid_g_invlpg(v, g_vaddr);
2019 /*
2020 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2021 * 16-bit realmode. Basically, this mimics a processor reset.
2023 * returns 0 on success, non-zero otherwise
2024 */
2025 static int svm_reset_to_realmode(struct vcpu *v,
2026 struct cpu_user_regs *regs)
2028 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2030 memset(regs, 0, sizeof(struct cpu_user_regs));
2032 regs->eflags = 2;
2034 v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET;
2035 svm_update_guest_cr(v, 0);
2037 v->arch.hvm_vcpu.guest_cr[2] = 0;
2038 svm_update_guest_cr(v, 2);
2040 v->arch.hvm_vcpu.guest_cr[4] = 0;
2041 svm_update_guest_cr(v, 4);
2043 vmcb->efer = EFER_SVME;
2045 /* This will jump to ROMBIOS */
2046 regs->eip = 0xFFF0;
2048 /* Set up the segment registers and all their hidden states. */
2049 vmcb->cs.sel = 0xF000;
2050 vmcb->cs.attr.bytes = 0x089b;
2051 vmcb->cs.limit = 0xffff;
2052 vmcb->cs.base = 0x000F0000;
2054 vmcb->ss.sel = 0x00;
2055 vmcb->ss.attr.bytes = 0x0893;
2056 vmcb->ss.limit = 0xffff;
2057 vmcb->ss.base = 0x00;
2059 vmcb->ds.sel = 0x00;
2060 vmcb->ds.attr.bytes = 0x0893;
2061 vmcb->ds.limit = 0xffff;
2062 vmcb->ds.base = 0x00;
2064 vmcb->es.sel = 0x00;
2065 vmcb->es.attr.bytes = 0x0893;
2066 vmcb->es.limit = 0xffff;
2067 vmcb->es.base = 0x00;
2069 vmcb->fs.sel = 0x00;
2070 vmcb->fs.attr.bytes = 0x0893;
2071 vmcb->fs.limit = 0xffff;
2072 vmcb->fs.base = 0x00;
2074 vmcb->gs.sel = 0x00;
2075 vmcb->gs.attr.bytes = 0x0893;
2076 vmcb->gs.limit = 0xffff;
2077 vmcb->gs.base = 0x00;
2079 vmcb->ldtr.sel = 0x00;
2080 vmcb->ldtr.attr.bytes = 0x0000;
2081 vmcb->ldtr.limit = 0x0;
2082 vmcb->ldtr.base = 0x00;
2084 vmcb->gdtr.sel = 0x00;
2085 vmcb->gdtr.attr.bytes = 0x0000;
2086 vmcb->gdtr.limit = 0x0;
2087 vmcb->gdtr.base = 0x00;
2089 vmcb->tr.sel = 0;
2090 vmcb->tr.attr.bytes = 0;
2091 vmcb->tr.limit = 0x0;
2092 vmcb->tr.base = 0;
2094 vmcb->idtr.sel = 0x00;
2095 vmcb->idtr.attr.bytes = 0x0000;
2096 vmcb->idtr.limit = 0x3ff;
2097 vmcb->idtr.base = 0x00;
2099 return 0;
2102 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
2104 unsigned int exit_reason;
2105 struct vcpu *v = current;
2106 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2107 eventinj_t eventinj;
2108 int inst_len, rc;
2110 /*
2111 * Before doing anything else, we need to sync up the VLAPIC's TPR with
2112 * SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
2113 * because we update the vTPR on MMIO writes to the TPR.
2114 */
2115 vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
2116 (vmcb->vintr.fields.tpr & 0x0F) << 4);
2118 exit_reason = vmcb->exitcode;
2120 hvmtrace_vmexit(v, regs->eip, exit_reason);
2122 if ( unlikely(exit_reason == VMEXIT_INVALID) )
2124 svm_dump_vmcb(__func__, vmcb);
2125 goto exit_and_crash;
2128 perfc_incra(svmexits, exit_reason);
2130 /* Event delivery caused this intercept? Queue for redelivery. */
2131 eventinj = vmcb->exitintinfo;
2132 if ( unlikely(eventinj.fields.v) &&
2133 hvm_event_needs_reinjection(eventinj.fields.type,
2134 eventinj.fields.vector) )
2135 vmcb->eventinj = eventinj;
2137 switch ( exit_reason )
2139 case VMEXIT_INTR:
2140 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2141 HVMTRACE_0D(INTR, v);
2142 break;
2144 case VMEXIT_NMI:
2145 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2146 HVMTRACE_0D(NMI, v);
2147 break;
2149 case VMEXIT_SMI:
2150 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2151 HVMTRACE_0D(SMI, v);
2152 break;
2154 case VMEXIT_EXCEPTION_DB:
2155 if ( !v->domain->debugger_attached )
2156 goto exit_and_crash;
2157 domain_pause_for_debugger();
2158 break;
2160 case VMEXIT_EXCEPTION_BP:
2161 if ( !v->domain->debugger_attached )
2162 goto exit_and_crash;
2163 /* AMD Vol2, 15.11: INT3, INTO, BOUND intercepts do not update RIP. */
2164 inst_len = __get_instruction_length(v, INSTR_INT3, NULL);
2165 __update_guest_eip(regs, inst_len);
2166 domain_pause_for_debugger();
2167 break;
2169 case VMEXIT_EXCEPTION_NM:
2170 svm_do_no_device_fault(vmcb);
2171 break;
2173 case VMEXIT_EXCEPTION_PF: {
2174 unsigned long va;
2175 va = vmcb->exitinfo2;
2176 regs->error_code = vmcb->exitinfo1;
2177 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2178 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2179 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2180 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2181 (unsigned long)regs->esi, (unsigned long)regs->edi);
2183 if ( paging_fault(va, regs) )
2185 HVMTRACE_2D(PF_XEN, v, va, regs->error_code);
2186 break;
2189 v->arch.hvm_vcpu.guest_cr[2] = vmcb->cr2 = va;
2190 svm_inject_exception(v, TRAP_page_fault, 1, regs->error_code);
2191 break;
2194 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2195 case VMEXIT_EXCEPTION_MC:
2196 HVMTRACE_0D(MCE, v);
2197 break;
2199 case VMEXIT_VINTR:
2200 vmcb->vintr.fields.irq = 0;
2201 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
2202 break;
2204 case VMEXIT_INVD:
2205 svm_vmexit_do_invd(regs);
2206 break;
2208 case VMEXIT_TASK_SWITCH: {
2209 enum hvm_task_switch_reason reason;
2210 int32_t errcode = -1;
2211 if ( (vmcb->exitinfo2 >> 36) & 1 )
2212 reason = TSW_iret;
2213 else if ( (vmcb->exitinfo2 >> 38) & 1 )
2214 reason = TSW_jmp;
2215 else
2216 reason = TSW_call_or_int;
2217 if ( (vmcb->exitinfo2 >> 44) & 1 )
2218 errcode = (uint32_t)vmcb->exitinfo2;
2219 hvm_task_switch((uint16_t)vmcb->exitinfo1, reason, errcode);
2220 break;
2223 case VMEXIT_CPUID:
2224 svm_vmexit_do_cpuid(vmcb, regs);
2225 break;
2227 case VMEXIT_HLT:
2228 svm_vmexit_do_hlt(vmcb, regs);
2229 break;
2231 case VMEXIT_INVLPG:
2232 svm_handle_invlpg(0, regs);
2233 break;
2235 case VMEXIT_INVLPGA:
2236 svm_handle_invlpg(1, regs);
2237 break;
2239 case VMEXIT_VMMCALL:
2240 inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL);
2241 ASSERT(inst_len > 0);
2242 HVMTRACE_1D(VMMCALL, v, regs->eax);
2243 rc = hvm_do_hypercall(regs);
2244 if ( rc != HVM_HCALL_preempted )
2246 __update_guest_eip(regs, inst_len);
2247 if ( rc == HVM_HCALL_invalidate )
2248 send_invalidate_req();
2250 break;
2252 case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
2253 svm_cr_access(v, exit_reason - VMEXIT_CR0_READ,
2254 TYPE_MOV_FROM_CR, regs);
2255 break;
2257 case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
2258 svm_cr_access(v, exit_reason - VMEXIT_CR0_WRITE,
2259 TYPE_MOV_TO_CR, regs);
2260 break;
2262 case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
2263 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
2264 svm_dr_access(v, regs);
2265 break;
2267 case VMEXIT_IOIO:
2268 svm_io_instruction(v);
2269 break;
2271 case VMEXIT_MSR:
2272 svm_do_msr_access(v, regs);
2273 break;
2275 case VMEXIT_SHUTDOWN:
2276 hvm_triple_fault();
2277 break;
2279 case VMEXIT_VMRUN:
2280 case VMEXIT_VMLOAD:
2281 case VMEXIT_VMSAVE:
2282 case VMEXIT_STGI:
2283 case VMEXIT_CLGI:
2284 case VMEXIT_SKINIT:
2285 /* Report "Invalid opcode" on any VM-operation except VMMCALL */
2286 svm_inject_exception(v, TRAP_invalid_op, 0, 0);
2287 break;
2289 case VMEXIT_NPF:
2290 perfc_incra(svmexits, VMEXIT_NPF_PERFC);
2291 regs->error_code = vmcb->exitinfo1;
2292 svm_do_nested_pgfault(vmcb->exitinfo2, regs);
2293 break;
2295 default:
2296 exit_and_crash:
2297 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
2298 "exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
2299 exit_reason,
2300 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
2301 domain_crash(v->domain);
2302 break;
2306 asmlinkage void svm_trace_vmentry(void)
2308 struct vcpu *v = current;
2310 /* This is the last C code before the VMRUN instruction. */
2311 hvmtrace_vmentry(v);
2314 /*
2315 * Local variables:
2316 * mode: C
2317 * c-set-style: "BSD"
2318 * c-basic-offset: 4
2319 * tab-width: 4
2320 * indent-tabs-mode: nil
2321 * End:
2322 */