ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 14090:cdc765772f69

hvm: Clean up initialisation of hvm_funcs.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Fri Feb 23 11:32:25 2007 +0000 (2007-02-23)
parents e8470a1a01af
children d2a91b73899a
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005, AMD Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <xen/hypercall.h>
29 #include <xen/domain_page.h>
30 #include <asm/current.h>
31 #include <asm/io.h>
32 #include <asm/paging.h>
33 #include <asm/p2m.h>
34 #include <asm/regs.h>
35 #include <asm/cpufeature.h>
36 #include <asm/processor.h>
37 #include <asm/types.h>
38 #include <asm/msr.h>
39 #include <asm/spinlock.h>
40 #include <asm/hvm/hvm.h>
41 #include <asm/hvm/support.h>
42 #include <asm/hvm/io.h>
43 #include <asm/hvm/svm/svm.h>
44 #include <asm/hvm/svm/vmcb.h>
45 #include <asm/hvm/svm/emulate.h>
46 #include <asm/hvm/svm/vmmcall.h>
47 #include <asm/hvm/svm/intr.h>
48 #include <asm/x86_emulate.h>
49 #include <public/sched.h>
50 #include <asm/hvm/vpt.h>
52 #define SVM_EXTRA_DEBUG
54 #define set_segment_register(name, value) \
55 __asm__ __volatile__ ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
57 /* External functions. We should move these to some suitable header file(s) */
59 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
60 int inst_len);
61 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
62 extern void svm_dump_inst(unsigned long eip);
63 extern int svm_dbg_on;
64 void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
66 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
67 struct cpu_user_regs *regs);
69 /* va of hardware host save area */
70 static void *hsa[NR_CPUS] __read_mostly;
72 /* vmcb used for extended host state */
73 static void *root_vmcb[NR_CPUS] __read_mostly;
75 /* physical address of above for host VMSAVE/VMLOAD */
76 u64 root_vmcb_pa[NR_CPUS] __read_mostly;
78 static inline void svm_inject_exception(struct vcpu *v, int trap,
79 int ev, int error_code)
80 {
81 eventinj_t event;
82 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
84 event.bytes = 0;
85 event.fields.v = 1;
86 event.fields.type = EVENTTYPE_EXCEPTION;
87 event.fields.vector = trap;
88 event.fields.ev = ev;
89 event.fields.errorcode = error_code;
91 ASSERT(vmcb->eventinj.fields.v == 0);
93 vmcb->eventinj = event;
94 }
96 static void stop_svm(void)
97 {
98 u32 eax, edx;
99 int cpu = smp_processor_id();
101 /* We turn off the EFER_SVME bit. */
102 rdmsr(MSR_EFER, eax, edx);
103 eax &= ~EFER_SVME;
104 wrmsr(MSR_EFER, eax, edx);
106 /* release the HSA */
107 free_host_save_area(hsa[cpu]);
108 hsa[cpu] = NULL;
109 wrmsr(MSR_K8_VM_HSAVE_PA, 0, 0 );
111 /* free up the root vmcb */
112 free_vmcb(root_vmcb[cpu]);
113 root_vmcb[cpu] = NULL;
114 root_vmcb_pa[cpu] = 0;
115 }
117 static void svm_store_cpu_guest_regs(
118 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
119 {
120 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
122 if ( regs != NULL )
123 {
124 regs->eip = vmcb->rip;
125 regs->esp = vmcb->rsp;
126 regs->eflags = vmcb->rflags;
127 regs->cs = vmcb->cs.sel;
128 regs->ds = vmcb->ds.sel;
129 regs->es = vmcb->es.sel;
130 regs->ss = vmcb->ss.sel;
131 regs->gs = vmcb->gs.sel;
132 regs->fs = vmcb->fs.sel;
133 }
135 if ( crs != NULL )
136 {
137 /* Returning the guest's regs */
138 crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
139 crs[2] = v->arch.hvm_svm.cpu_cr2;
140 crs[3] = v->arch.hvm_svm.cpu_cr3;
141 crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
142 }
143 }
145 static int svm_paging_enabled(struct vcpu *v)
146 {
147 unsigned long cr0;
149 cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
151 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
152 }
154 static int svm_pae_enabled(struct vcpu *v)
155 {
156 unsigned long cr4;
158 if(!svm_paging_enabled(v))
159 return 0;
161 cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
163 return (cr4 & X86_CR4_PAE);
164 }
166 static int svm_long_mode_enabled(struct vcpu *v)
167 {
168 return test_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
169 }
171 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
172 {
173 u64 msr_content = 0;
174 struct vcpu *v = current;
175 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
177 switch ((u32)regs->ecx)
178 {
179 case MSR_EFER:
180 msr_content = vmcb->efer;
181 msr_content &= ~EFER_SVME;
182 break;
184 #ifdef __x86_64__
185 case MSR_FS_BASE:
186 msr_content = vmcb->fs.base;
187 goto check_long_mode;
189 case MSR_GS_BASE:
190 msr_content = vmcb->gs.base;
191 goto check_long_mode;
193 case MSR_SHADOW_GS_BASE:
194 msr_content = vmcb->kerngsbase;
195 check_long_mode:
196 if ( !svm_long_mode_enabled(v) )
197 {
198 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
199 return 0;
200 }
201 break;
202 #endif
204 case MSR_STAR:
205 msr_content = vmcb->star;
206 break;
208 case MSR_LSTAR:
209 msr_content = vmcb->lstar;
210 break;
212 case MSR_CSTAR:
213 msr_content = vmcb->cstar;
214 break;
216 case MSR_SYSCALL_MASK:
217 msr_content = vmcb->sfmask;
218 break;
219 default:
220 return 0;
221 }
223 HVM_DBG_LOG(DBG_LEVEL_2, "msr_content: %"PRIx64"\n",
224 msr_content);
226 regs->eax = (u32)(msr_content >> 0);
227 regs->edx = (u32)(msr_content >> 32);
228 return 1;
229 }
231 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
232 {
233 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
234 u32 ecx = regs->ecx;
235 struct vcpu *v = current;
236 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
238 HVM_DBG_LOG(DBG_LEVEL_1, "msr %x msr_content %"PRIx64"\n",
239 ecx, msr_content);
241 switch ( ecx )
242 {
243 case MSR_EFER:
244 /* offending reserved bit will cause #GP */
245 if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
246 {
247 gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
248 "EFER: %"PRIx64"\n", msr_content);
249 goto gp_fault;
250 }
252 #ifdef __x86_64__
253 /* LME: 0 -> 1 */
254 if ( msr_content & EFER_LME &&
255 !test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
256 {
257 if ( svm_paging_enabled(v) ||
258 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
259 &v->arch.hvm_svm.cpu_state) )
260 {
261 gdprintk(XENLOG_WARNING, "Trying to set LME bit when "
262 "in paging mode or PAE bit is not set\n");
263 goto gp_fault;
264 }
265 set_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state);
266 }
268 /* We have already recorded that we want LME, so it will be set
269 * next time CR0 gets updated. So we clear that bit and continue.
270 */
271 if ((msr_content ^ vmcb->efer) & EFER_LME)
272 msr_content &= ~EFER_LME;
273 /* No update for LME/LMA since it have no effect */
274 #endif
275 vmcb->efer = msr_content | EFER_SVME;
276 break;
278 #ifdef __x86_64__
279 case MSR_FS_BASE:
280 case MSR_GS_BASE:
281 case MSR_SHADOW_GS_BASE:
282 if ( !svm_long_mode_enabled(v) )
283 goto gp_fault;
285 if ( !is_canonical_address(msr_content) )
286 goto uncanonical_address;
288 if ( ecx == MSR_FS_BASE )
289 vmcb->fs.base = msr_content;
290 else if ( ecx == MSR_GS_BASE )
291 vmcb->gs.base = msr_content;
292 else
293 vmcb->kerngsbase = msr_content;
294 break;
295 #endif
297 case MSR_STAR:
298 vmcb->star = msr_content;
299 break;
301 case MSR_LSTAR:
302 case MSR_CSTAR:
303 if ( !is_canonical_address(msr_content) )
304 goto uncanonical_address;
306 if ( ecx == MSR_LSTAR )
307 vmcb->lstar = msr_content;
308 else
309 vmcb->cstar = msr_content;
310 break;
312 case MSR_SYSCALL_MASK:
313 vmcb->sfmask = msr_content;
314 break;
316 default:
317 return 0;
318 }
320 return 1;
322 uncanonical_address:
323 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write %x\n", ecx);
324 gp_fault:
325 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
326 return 0;
327 }
330 #define loaddebug(_v,_reg) \
331 __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
332 #define savedebug(_v,_reg) \
333 __asm__ __volatile__ ("mov %%db" #_reg ",%0" : : "r" ((_v)->debugreg[_reg]))
335 static inline void svm_save_dr(struct vcpu *v)
336 {
337 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
339 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
340 return;
342 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
343 v->arch.hvm_vcpu.flag_dr_dirty = 0;
344 v->arch.hvm_svm.vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES;
346 savedebug(&v->arch.guest_context, 0);
347 savedebug(&v->arch.guest_context, 1);
348 savedebug(&v->arch.guest_context, 2);
349 savedebug(&v->arch.guest_context, 3);
350 v->arch.guest_context.debugreg[6] = vmcb->dr6;
351 v->arch.guest_context.debugreg[7] = vmcb->dr7;
352 }
355 static inline void __restore_debug_registers(struct vcpu *v)
356 {
357 loaddebug(&v->arch.guest_context, 0);
358 loaddebug(&v->arch.guest_context, 1);
359 loaddebug(&v->arch.guest_context, 2);
360 loaddebug(&v->arch.guest_context, 3);
361 /* DR6 and DR7 are loaded from the VMCB. */
362 }
365 int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c)
366 {
367 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
369 c->eip = vmcb->rip;
371 #ifdef HVM_DEBUG_SUSPEND
372 printk("%s: eip=0x%"PRIx64".\n",
373 __func__,
374 inst_len, c->eip);
375 #endif
377 c->esp = vmcb->rsp;
378 c->eflags = vmcb->rflags;
380 c->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
381 c->cr3 = v->arch.hvm_svm.cpu_cr3;
382 c->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
384 #ifdef HVM_DEBUG_SUSPEND
385 printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
386 __func__,
387 c->cr3,
388 c->cr0,
389 c->cr4);
390 #endif
392 c->idtr_limit = vmcb->idtr.limit;
393 c->idtr_base = vmcb->idtr.base;
395 c->gdtr_limit = vmcb->gdtr.limit;
396 c->gdtr_base = vmcb->gdtr.base;
398 c->cs_sel = vmcb->cs.sel;
399 c->cs_limit = vmcb->cs.limit;
400 c->cs_base = vmcb->cs.base;
401 c->cs_arbytes = vmcb->cs.attr.bytes;
403 c->ds_sel = vmcb->ds.sel;
404 c->ds_limit = vmcb->ds.limit;
405 c->ds_base = vmcb->ds.base;
406 c->ds_arbytes = vmcb->ds.attr.bytes;
408 c->es_sel = vmcb->es.sel;
409 c->es_limit = vmcb->es.limit;
410 c->es_base = vmcb->es.base;
411 c->es_arbytes = vmcb->es.attr.bytes;
413 c->ss_sel = vmcb->ss.sel;
414 c->ss_limit = vmcb->ss.limit;
415 c->ss_base = vmcb->ss.base;
416 c->ss_arbytes = vmcb->ss.attr.bytes;
418 c->fs_sel = vmcb->fs.sel;
419 c->fs_limit = vmcb->fs.limit;
420 c->fs_base = vmcb->fs.base;
421 c->fs_arbytes = vmcb->fs.attr.bytes;
423 c->gs_sel = vmcb->gs.sel;
424 c->gs_limit = vmcb->gs.limit;
425 c->gs_base = vmcb->gs.base;
426 c->gs_arbytes = vmcb->gs.attr.bytes;
428 c->tr_sel = vmcb->tr.sel;
429 c->tr_limit = vmcb->tr.limit;
430 c->tr_base = vmcb->tr.base;
431 c->tr_arbytes = vmcb->tr.attr.bytes;
433 c->ldtr_sel = vmcb->ldtr.sel;
434 c->ldtr_limit = vmcb->ldtr.limit;
435 c->ldtr_base = vmcb->ldtr.base;
436 c->ldtr_arbytes = vmcb->ldtr.attr.bytes;
438 c->sysenter_cs = vmcb->sysenter_cs;
439 c->sysenter_esp = vmcb->sysenter_esp;
440 c->sysenter_eip = vmcb->sysenter_eip;
442 return 1;
443 }
446 int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
447 {
448 unsigned long mfn, old_base_mfn;
449 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
451 vmcb->rip = c->eip;
452 vmcb->rsp = c->esp;
453 vmcb->rflags = c->eflags;
455 v->arch.hvm_svm.cpu_shadow_cr0 = c->cr0;
457 #ifdef HVM_DEBUG_SUSPEND
458 printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
459 __func__,
460 c->cr3,
461 c->cr0,
462 c->cr4);
463 #endif
465 if (!svm_paging_enabled(v)) {
466 printk("%s: paging not enabled.", __func__);
467 goto skip_cr3;
468 }
470 if (c->cr3 == v->arch.hvm_svm.cpu_cr3) {
471 /*
472 * This is simple TLB flush, implying the guest has
473 * removed some translation or changed page attributes.
474 * We simply invalidate the shadow.
475 */
476 mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
477 if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
478 goto bad_cr3;
479 }
480 } else {
481 /*
482 * If different, make a shadow. Check if the PDBR is valid
483 * first.
484 */
485 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64"", c->cr3);
486 /* current!=vcpu as not called by arch_vmx_do_launch */
487 mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
488 if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
489 goto bad_cr3;
490 }
491 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
492 v->arch.guest_table = pagetable_from_pfn(mfn);
493 if (old_base_mfn)
494 put_page(mfn_to_page(old_base_mfn));
495 v->arch.hvm_svm.cpu_cr3 = c->cr3;
496 }
498 skip_cr3:
499 #if defined(__x86_64__) && 0
500 if (vmx_long_mode_enabled(v)) {
501 unsigned long vm_entry_value;
502 vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
503 vm_entry_value |= VM_ENTRY_IA32E_MODE;
504 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
505 }
506 #endif
508 vmcb->cr4 = c->cr4 | SVM_CR4_HOST_MASK;
509 v->arch.hvm_svm.cpu_shadow_cr4 = c->cr4;
511 vmcb->idtr.limit = c->idtr_limit;
512 vmcb->idtr.base = c->idtr_base;
514 vmcb->gdtr.limit = c->gdtr_limit;
515 vmcb->gdtr.base = c->gdtr_base;
517 vmcb->cs.sel = c->cs_sel;
518 vmcb->cs.limit = c->cs_limit;
519 vmcb->cs.base = c->cs_base;
520 vmcb->cs.attr.bytes = c->cs_arbytes;
522 vmcb->ds.sel = c->ds_sel;
523 vmcb->ds.limit = c->ds_limit;
524 vmcb->ds.base = c->ds_base;
525 vmcb->ds.attr.bytes = c->ds_arbytes;
527 vmcb->es.sel = c->es_sel;
528 vmcb->es.limit = c->es_limit;
529 vmcb->es.base = c->es_base;
530 vmcb->es.attr.bytes = c->es_arbytes;
532 vmcb->ss.sel = c->ss_sel;
533 vmcb->ss.limit = c->ss_limit;
534 vmcb->ss.base = c->ss_base;
535 vmcb->ss.attr.bytes = c->ss_arbytes;
537 vmcb->fs.sel = c->fs_sel;
538 vmcb->fs.limit = c->fs_limit;
539 vmcb->fs.base = c->fs_base;
540 vmcb->fs.attr.bytes = c->fs_arbytes;
542 vmcb->gs.sel = c->gs_sel;
543 vmcb->gs.limit = c->gs_limit;
544 vmcb->gs.base = c->gs_base;
545 vmcb->gs.attr.bytes = c->gs_arbytes;
547 vmcb->tr.sel = c->tr_sel;
548 vmcb->tr.limit = c->tr_limit;
549 vmcb->tr.base = c->tr_base;
550 vmcb->tr.attr.bytes = c->tr_arbytes;
552 vmcb->ldtr.sel = c->ldtr_sel;
553 vmcb->ldtr.limit = c->ldtr_limit;
554 vmcb->ldtr.base = c->ldtr_base;
555 vmcb->ldtr.attr.bytes = c->ldtr_arbytes;
557 vmcb->sysenter_cs = c->sysenter_cs;
558 vmcb->sysenter_esp = c->sysenter_esp;
559 vmcb->sysenter_eip = c->sysenter_eip;
561 paging_update_paging_modes(v);
562 return 0;
564 bad_cr3:
565 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"", c->cr3);
566 return -EINVAL;
567 }
570 void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
571 {
572 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
574 data->shadow_gs = vmcb->kerngsbase;
575 data->msr_lstar = vmcb->lstar;
576 data->msr_star = vmcb->star;
577 data->msr_cstar = vmcb->cstar;
578 data->msr_syscall_mask = vmcb->sfmask;
579 data->msr_efer = vmcb->efer;
581 data->tsc = hvm_get_guest_time(v);
582 }
585 void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
586 {
587 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
589 vmcb->kerngsbase = data->shadow_gs;
590 vmcb->lstar = data->msr_lstar;
591 vmcb->star = data->msr_star;
592 vmcb->cstar = data->msr_cstar;
593 vmcb->sfmask = data->msr_syscall_mask;
594 vmcb->efer = data->msr_efer;
596 hvm_set_guest_time(v, data->tsc);
597 }
599 void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
600 {
601 svm_save_cpu_state(v, ctxt);
602 svm_vmcb_save(v, ctxt);
603 }
605 int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
606 {
607 svm_load_cpu_state(v, ctxt);
608 if (svm_vmcb_restore(v, ctxt)) {
609 printk("svm_vmcb restore failed!\n");
610 domain_crash(v->domain);
611 return -EINVAL;
612 }
614 return 0;
615 }
618 static inline void svm_restore_dr(struct vcpu *v)
619 {
620 if ( unlikely(v->arch.guest_context.debugreg[7] & 0xFF) )
621 __restore_debug_registers(v);
622 }
625 static int svm_realmode(struct vcpu *v)
626 {
627 unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
628 unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
630 return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
631 }
633 static int svm_guest_x86_mode(struct vcpu *v)
634 {
635 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
637 if ( (vmcb->efer & EFER_LMA) && vmcb->cs.attr.fields.l )
638 return 8;
640 if ( svm_realmode(v) )
641 return 2;
643 return (vmcb->cs.attr.fields.db ? 4 : 2);
644 }
646 void svm_update_host_cr3(struct vcpu *v)
647 {
648 /* SVM doesn't have a HOST_CR3 equivalent to update. */
649 }
651 void svm_update_guest_cr3(struct vcpu *v)
652 {
653 v->arch.hvm_svm.vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
654 }
656 static void svm_update_vtpr(struct vcpu *v, unsigned long value)
657 {
658 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
660 vmcb->vintr.fields.tpr = value & 0x0f;
661 }
663 unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
664 {
665 switch ( num )
666 {
667 case 0:
668 return v->arch.hvm_svm.cpu_shadow_cr0;
669 case 2:
670 return v->arch.hvm_svm.cpu_cr2;
671 case 3:
672 return v->arch.hvm_svm.cpu_cr3;
673 case 4:
674 return v->arch.hvm_svm.cpu_shadow_cr4;
675 default:
676 BUG();
677 }
678 return 0; /* dummy */
679 }
681 static unsigned long svm_get_segment_base(struct vcpu *v, enum x86_segment seg)
682 {
683 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
684 int long_mode = 0;
686 #ifdef __x86_64__
687 long_mode = vmcb->cs.attr.fields.l && (vmcb->efer & EFER_LMA);
688 #endif
689 switch ( seg )
690 {
691 case x86_seg_cs: return long_mode ? 0 : vmcb->cs.base;
692 case x86_seg_ds: return long_mode ? 0 : vmcb->ds.base;
693 case x86_seg_es: return long_mode ? 0 : vmcb->es.base;
694 case x86_seg_fs: return vmcb->fs.base;
695 case x86_seg_gs: return vmcb->gs.base;
696 case x86_seg_ss: return long_mode ? 0 : vmcb->ss.base;
697 case x86_seg_tr: return vmcb->tr.base;
698 case x86_seg_gdtr: return vmcb->gdtr.base;
699 case x86_seg_idtr: return vmcb->idtr.base;
700 case x86_seg_ldtr: return vmcb->ldtr.base;
701 }
702 BUG();
703 return 0;
704 }
706 static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
707 struct segment_register *reg)
708 {
709 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
710 switch ( seg )
711 {
712 case x86_seg_cs: memcpy(reg, &vmcb->cs, sizeof(*reg)); break;
713 case x86_seg_ds: memcpy(reg, &vmcb->ds, sizeof(*reg)); break;
714 case x86_seg_es: memcpy(reg, &vmcb->es, sizeof(*reg)); break;
715 case x86_seg_fs: memcpy(reg, &vmcb->fs, sizeof(*reg)); break;
716 case x86_seg_gs: memcpy(reg, &vmcb->gs, sizeof(*reg)); break;
717 case x86_seg_ss: memcpy(reg, &vmcb->ss, sizeof(*reg)); break;
718 case x86_seg_tr: memcpy(reg, &vmcb->tr, sizeof(*reg)); break;
719 case x86_seg_gdtr: memcpy(reg, &vmcb->gdtr, sizeof(*reg)); break;
720 case x86_seg_idtr: memcpy(reg, &vmcb->idtr, sizeof(*reg)); break;
721 case x86_seg_ldtr: memcpy(reg, &vmcb->ldtr, sizeof(*reg)); break;
722 default: BUG();
723 }
724 }
726 /* Make sure that xen intercepts any FP accesses from current */
727 static void svm_stts(struct vcpu *v)
728 {
729 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
731 /*
732 * If the guest does not have TS enabled then we must cause and handle an
733 * exception on first use of the FPU. If the guest *does* have TS enabled
734 * then this is not necessary: no FPU activity can occur until the guest
735 * clears CR0.TS, and we will initialise the FPU when that happens.
736 */
737 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
738 {
739 v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
740 vmcb->cr0 |= X86_CR0_TS;
741 }
742 }
745 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
746 {
747 v->arch.hvm_svm.vmcb->tsc_offset = offset;
748 }
751 static void svm_init_ap_context(
752 struct vcpu_guest_context *ctxt, int vcpuid, int trampoline_vector)
753 {
754 memset(ctxt, 0, sizeof(*ctxt));
756 /*
757 * We execute the trampoline code in real mode. The trampoline vector
758 * passed to us is page alligned and is the physicall frame number for
759 * the code. We will execute this code in real mode.
760 */
761 ctxt->user_regs.eip = 0x0;
762 ctxt->user_regs.cs = (trampoline_vector << 8);
763 }
765 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
766 {
767 char *p;
768 int i;
770 memset(hypercall_page, 0, PAGE_SIZE);
772 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
773 {
774 p = (char *)(hypercall_page + (i * 32));
775 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
776 *(u32 *)(p + 1) = i;
777 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
778 *(u8 *)(p + 6) = 0x01;
779 *(u8 *)(p + 7) = 0xd9;
780 *(u8 *)(p + 8) = 0xc3; /* ret */
781 }
783 /* Don't support HYPERVISOR_iret at the moment */
784 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
785 }
788 int svm_dbg_on = 0;
790 static inline int svm_do_debugout(unsigned long exit_code)
791 {
792 int i;
794 static unsigned long counter = 0;
795 static unsigned long works[] =
796 {
797 VMEXIT_IOIO,
798 VMEXIT_HLT,
799 VMEXIT_CPUID,
800 VMEXIT_DR0_READ,
801 VMEXIT_DR1_READ,
802 VMEXIT_DR2_READ,
803 VMEXIT_DR3_READ,
804 VMEXIT_DR6_READ,
805 VMEXIT_DR7_READ,
806 VMEXIT_DR0_WRITE,
807 VMEXIT_DR1_WRITE,
808 VMEXIT_DR2_WRITE,
809 VMEXIT_DR3_WRITE,
810 VMEXIT_CR0_READ,
811 VMEXIT_CR0_WRITE,
812 VMEXIT_CR3_READ,
813 VMEXIT_CR4_READ,
814 VMEXIT_MSR,
815 VMEXIT_CR0_WRITE,
816 VMEXIT_CR3_WRITE,
817 VMEXIT_CR4_WRITE,
818 VMEXIT_EXCEPTION_PF,
819 VMEXIT_INTR,
820 VMEXIT_INVLPG,
821 VMEXIT_EXCEPTION_NM
822 };
825 #if 0
826 if (svm_dbg_on && exit_code != 0x7B)
827 return 1;
828 #endif
830 counter++;
832 #if 0
833 if ((exit_code == 0x4E
834 || exit_code == VMEXIT_CR0_READ
835 || exit_code == VMEXIT_CR0_WRITE)
836 && counter < 200000)
837 return 0;
839 if ((exit_code == 0x4E) && counter < 500000)
840 return 0;
841 #endif
843 for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
844 if (exit_code == works[i])
845 return 0;
847 return 1;
848 }
850 static void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
851 {
852 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
854 ASSERT(vmcb);
856 ctxt->eax = vmcb->rax;
857 ctxt->ss = vmcb->ss.sel;
858 ctxt->esp = vmcb->rsp;
859 ctxt->eflags = vmcb->rflags;
860 ctxt->cs = vmcb->cs.sel;
861 ctxt->eip = vmcb->rip;
863 ctxt->gs = vmcb->gs.sel;
864 ctxt->fs = vmcb->fs.sel;
865 ctxt->es = vmcb->es.sel;
866 ctxt->ds = vmcb->ds.sel;
867 }
869 static void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
870 {
871 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
873 regs->eip = vmcb->rip;
874 regs->esp = vmcb->rsp;
875 regs->eflags = vmcb->rflags;
876 regs->cs = vmcb->cs.sel;
877 regs->ds = vmcb->ds.sel;
878 regs->es = vmcb->es.sel;
879 regs->ss = vmcb->ss.sel;
880 }
882 /* XXX Use svm_load_cpu_guest_regs instead */
883 static void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
884 {
885 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
886 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
888 /* Write the guest register value into VMCB */
889 vmcb->rax = regs->eax;
890 vmcb->ss.sel = regs->ss;
891 vmcb->rsp = regs->esp;
892 vmcb->rflags = regs->eflags | 2UL;
893 vmcb->cs.sel = regs->cs;
894 vmcb->rip = regs->eip;
895 if (regs->eflags & EF_TF)
896 *intercepts |= EXCEPTION_BITMAP_DB;
897 else
898 *intercepts &= ~EXCEPTION_BITMAP_DB;
899 }
901 static void svm_load_cpu_guest_regs(
902 struct vcpu *v, struct cpu_user_regs *regs)
903 {
904 svm_load_cpu_user_regs(v, regs);
905 }
907 static void arch_svm_do_launch(struct vcpu *v)
908 {
909 svm_do_launch(v);
911 if ( v->vcpu_id != 0 )
912 {
913 cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
914 u16 cs_sel = regs->cs;
915 /*
916 * This is the launch of an AP; set state so that we begin executing
917 * the trampoline code in real-mode.
918 */
919 svm_do_vmmcall_reset_to_realmode(v, regs);
920 /* Adjust the state to execute the trampoline code.*/
921 v->arch.hvm_svm.vmcb->rip = 0;
922 v->arch.hvm_svm.vmcb->cs.sel= cs_sel;
923 v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4);
924 }
926 reset_stack_and_jump(svm_asm_do_launch);
927 }
929 static void svm_ctxt_switch_from(struct vcpu *v)
930 {
931 svm_save_dr(v);
932 }
934 static void svm_ctxt_switch_to(struct vcpu *v)
935 {
936 #ifdef __x86_64__
937 /*
938 * This is required, because VMRUN does consistency check
939 * and some of the DOM0 selectors are pointing to
940 * invalid GDT locations, and cause AMD processors
941 * to shutdown.
942 */
943 set_segment_register(ds, 0);
944 set_segment_register(es, 0);
945 set_segment_register(ss, 0);
946 #endif
947 svm_restore_dr(v);
948 }
950 static int svm_vcpu_initialise(struct vcpu *v)
951 {
952 int rc;
954 v->arch.schedule_tail = arch_svm_do_launch;
955 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
956 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
958 v->arch.hvm_svm.saved_irq_vector = -1;
960 if ( (rc = svm_create_vmcb(v)) != 0 )
961 {
962 dprintk(XENLOG_WARNING,
963 "Failed to create VMCB for vcpu %d: err=%d.\n",
964 v->vcpu_id, rc);
965 return rc;
966 }
968 return 0;
969 }
971 static void svm_vcpu_destroy(struct vcpu *v)
972 {
973 svm_destroy_vmcb(v);
974 }
976 static void svm_hvm_inject_exception(
977 unsigned int trapnr, int errcode, unsigned long cr2)
978 {
979 struct vcpu *v = current;
980 svm_inject_exception(v, trapnr, (errcode != -1), errcode);
981 if ( trapnr == TRAP_page_fault )
982 v->arch.hvm_svm.vmcb->cr2 = v->arch.hvm_svm.cpu_cr2 = cr2;
983 }
985 static int svm_event_injection_faulted(struct vcpu *v)
986 {
987 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
988 return vmcb->exitintinfo.fields.v;
989 }
991 static struct hvm_function_table svm_function_table = {
992 .disable = stop_svm,
993 .vcpu_initialise = svm_vcpu_initialise,
994 .vcpu_destroy = svm_vcpu_destroy,
995 .store_cpu_guest_regs = svm_store_cpu_guest_regs,
996 .load_cpu_guest_regs = svm_load_cpu_guest_regs,
997 .save_cpu_ctxt = svm_save_vmcb_ctxt,
998 .load_cpu_ctxt = svm_load_vmcb_ctxt,
999 .paging_enabled = svm_paging_enabled,
1000 .long_mode_enabled = svm_long_mode_enabled,
1001 .pae_enabled = svm_pae_enabled,
1002 .guest_x86_mode = svm_guest_x86_mode,
1003 .get_guest_ctrl_reg = svm_get_ctrl_reg,
1004 .get_segment_base = svm_get_segment_base,
1005 .get_segment_register = svm_get_segment_register,
1006 .update_host_cr3 = svm_update_host_cr3,
1007 .update_guest_cr3 = svm_update_guest_cr3,
1008 .update_vtpr = svm_update_vtpr,
1009 .stts = svm_stts,
1010 .set_tsc_offset = svm_set_tsc_offset,
1011 .inject_exception = svm_hvm_inject_exception,
1012 .init_ap_context = svm_init_ap_context,
1013 .init_hypercall_page = svm_init_hypercall_page,
1014 .event_injection_faulted = svm_event_injection_faulted
1015 };
1017 int start_svm(void)
1019 u32 eax, ecx, edx;
1020 u32 phys_hsa_lo, phys_hsa_hi;
1021 u64 phys_hsa;
1022 int cpu = smp_processor_id();
1024 /* Xen does not fill x86_capability words except 0. */
1025 ecx = cpuid_ecx(0x80000001);
1026 boot_cpu_data.x86_capability[5] = ecx;
1028 if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
1029 return 0;
1031 /* check whether SVM feature is disabled in BIOS */
1032 rdmsr(MSR_K8_VM_CR, eax, edx);
1033 if ( eax & K8_VMCR_SVME_DISABLE )
1035 printk("AMD SVM Extension is disabled in BIOS.\n");
1036 return 0;
1039 if (!(hsa[cpu] = alloc_host_save_area()))
1040 return 0;
1042 rdmsr(MSR_EFER, eax, edx);
1043 eax |= EFER_SVME;
1044 wrmsr(MSR_EFER, eax, edx);
1045 printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
1047 /* Initialize the HSA for this core */
1048 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
1049 phys_hsa_lo = (u32) phys_hsa;
1050 phys_hsa_hi = (u32) (phys_hsa >> 32);
1051 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
1053 if (!(root_vmcb[cpu] = alloc_vmcb()))
1054 return 0;
1055 root_vmcb_pa[cpu] = virt_to_maddr(root_vmcb[cpu]);
1057 if (cpu == 0)
1058 setup_vmcb_dump();
1060 hvm_enable(&svm_function_table);
1062 return 1;
1065 void arch_svm_do_resume(struct vcpu *v)
1067 /* pinning VCPU to a different core? */
1068 if ( v->arch.hvm_svm.launch_core == smp_processor_id()) {
1069 hvm_do_resume( v );
1070 reset_stack_and_jump( svm_asm_do_resume );
1072 else {
1073 if (svm_dbg_on)
1074 printk("VCPU core pinned: %d to %d\n",
1075 v->arch.hvm_svm.launch_core, smp_processor_id() );
1076 v->arch.hvm_svm.launch_core = smp_processor_id();
1077 hvm_migrate_timers( v );
1078 hvm_do_resume( v );
1079 reset_stack_and_jump( svm_asm_do_resume );
1083 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
1085 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1086 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
1087 va, (unsigned long)current->arch.hvm_svm.vmcb->rip,
1088 (unsigned long)regs->error_code);
1089 return paging_fault(va, regs);
1093 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
1095 struct vcpu *v = current;
1097 setup_fpu(v);
1098 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1100 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
1101 vmcb->cr0 &= ~X86_CR0_TS;
1105 static void svm_do_general_protection_fault(struct vcpu *v,
1106 struct cpu_user_regs *regs)
1108 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1109 unsigned long eip, error_code;
1111 ASSERT(vmcb);
1113 eip = vmcb->rip;
1114 error_code = vmcb->exitinfo1;
1116 if (vmcb->idtr.limit == 0) {
1117 printk("Huh? We got a GP Fault with an invalid IDTR!\n");
1118 svm_dump_vmcb(__func__, vmcb);
1119 svm_dump_regs(__func__, regs);
1120 svm_dump_inst(svm_rip2pointer(vmcb));
1121 domain_crash(v->domain);
1122 return;
1125 HVM_DBG_LOG(DBG_LEVEL_1,
1126 "svm_general_protection_fault: eip = %lx, erro_code = %lx",
1127 eip, error_code);
1129 HVM_DBG_LOG(DBG_LEVEL_1,
1130 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1131 (unsigned long)regs->eax, (unsigned long)regs->ebx,
1132 (unsigned long)regs->ecx, (unsigned long)regs->edx,
1133 (unsigned long)regs->esi, (unsigned long)regs->edi);
1135 /* Reflect it back into the guest */
1136 svm_inject_exception(v, TRAP_gp_fault, 1, error_code);
1139 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
1140 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
1141 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
1142 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
1144 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb,
1145 struct cpu_user_regs *regs)
1147 unsigned long input = regs->eax;
1148 unsigned int eax, ebx, ecx, edx;
1149 struct vcpu *v = current;
1150 int inst_len;
1152 ASSERT(vmcb);
1154 hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
1156 if ( input == 0x00000001 )
1158 /* Clear out reserved bits. */
1159 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
1160 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
1162 /* Guest should only see one logical processor.
1163 * See details on page 23 of AMD CPUID Specification.
1164 */
1165 clear_bit(X86_FEATURE_HT & 31, &edx); /* clear the hyperthread bit */
1166 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
1167 ebx |= 0x00010000; /* set to 1 just for precaution */
1169 else if ( input == 0x80000001 )
1171 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
1172 clear_bit(X86_FEATURE_APIC & 31, &edx);
1174 #if CONFIG_PAGING_LEVELS >= 3
1175 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
1176 #endif
1177 clear_bit(X86_FEATURE_PAE & 31, &edx);
1179 clear_bit(X86_FEATURE_PSE36 & 31, &edx);
1181 /* Clear the Cmp_Legacy bit
1182 * This bit is supposed to be zero when HTT = 0.
1183 * See details on page 23 of AMD CPUID Specification.
1184 */
1185 clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx);
1187 /* Make SVM feature invisible to the guest. */
1188 clear_bit(X86_FEATURE_SVME & 31, &ecx);
1190 /* So far, we do not support 3DNow for the guest. */
1191 clear_bit(X86_FEATURE_3DNOW & 31, &edx);
1192 clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx);
1193 /* no FFXSR instructions feature. */
1194 clear_bit(X86_FEATURE_FFXSR & 31, &edx);
1196 else if ( input == 0x80000007 || input == 0x8000000A )
1198 /* Mask out features of power management and SVM extension. */
1199 eax = ebx = ecx = edx = 0;
1201 else if ( input == 0x80000008 )
1203 /* Make sure Number of CPU core is 1 when HTT=0 */
1204 ecx &= 0xFFFFFF00;
1207 regs->eax = (unsigned long)eax;
1208 regs->ebx = (unsigned long)ebx;
1209 regs->ecx = (unsigned long)ecx;
1210 regs->edx = (unsigned long)edx;
1212 inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
1213 ASSERT(inst_len > 0);
1214 __update_guest_eip(vmcb, inst_len);
1217 static inline unsigned long *get_reg_p(unsigned int gpreg,
1218 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1220 unsigned long *reg_p = NULL;
1221 switch (gpreg)
1223 case SVM_REG_EAX:
1224 reg_p = (unsigned long *)&regs->eax;
1225 break;
1226 case SVM_REG_EBX:
1227 reg_p = (unsigned long *)&regs->ebx;
1228 break;
1229 case SVM_REG_ECX:
1230 reg_p = (unsigned long *)&regs->ecx;
1231 break;
1232 case SVM_REG_EDX:
1233 reg_p = (unsigned long *)&regs->edx;
1234 break;
1235 case SVM_REG_EDI:
1236 reg_p = (unsigned long *)&regs->edi;
1237 break;
1238 case SVM_REG_ESI:
1239 reg_p = (unsigned long *)&regs->esi;
1240 break;
1241 case SVM_REG_EBP:
1242 reg_p = (unsigned long *)&regs->ebp;
1243 break;
1244 case SVM_REG_ESP:
1245 reg_p = (unsigned long *)&vmcb->rsp;
1246 break;
1247 #ifdef __x86_64__
1248 case SVM_REG_R8:
1249 reg_p = (unsigned long *)&regs->r8;
1250 break;
1251 case SVM_REG_R9:
1252 reg_p = (unsigned long *)&regs->r9;
1253 break;
1254 case SVM_REG_R10:
1255 reg_p = (unsigned long *)&regs->r10;
1256 break;
1257 case SVM_REG_R11:
1258 reg_p = (unsigned long *)&regs->r11;
1259 break;
1260 case SVM_REG_R12:
1261 reg_p = (unsigned long *)&regs->r12;
1262 break;
1263 case SVM_REG_R13:
1264 reg_p = (unsigned long *)&regs->r13;
1265 break;
1266 case SVM_REG_R14:
1267 reg_p = (unsigned long *)&regs->r14;
1268 break;
1269 case SVM_REG_R15:
1270 reg_p = (unsigned long *)&regs->r15;
1271 break;
1272 #endif
1273 default:
1274 BUG();
1277 return reg_p;
1281 static inline unsigned long get_reg(unsigned int gpreg,
1282 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1284 unsigned long *gp;
1285 gp = get_reg_p(gpreg, regs, vmcb);
1286 return *gp;
1290 static inline void set_reg(unsigned int gpreg, unsigned long value,
1291 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1293 unsigned long *gp;
1294 gp = get_reg_p(gpreg, regs, vmcb);
1295 *gp = value;
1299 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
1301 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1303 v->arch.hvm_vcpu.flag_dr_dirty = 1;
1305 __restore_debug_registers(v);
1307 /* allow the guest full access to the debug registers */
1308 vmcb->dr_intercepts = 0;
1312 static void svm_get_prefix_info(
1313 struct vmcb_struct *vmcb,
1314 unsigned int dir, svm_segment_register_t **seg, unsigned int *asize)
1316 unsigned char inst[MAX_INST_LEN];
1317 int i;
1319 memset(inst, 0, MAX_INST_LEN);
1320 if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
1321 != MAX_INST_LEN)
1323 gdprintk(XENLOG_ERR, "get guest instruction failed\n");
1324 domain_crash(current->domain);
1325 return;
1328 for (i = 0; i < MAX_INST_LEN; i++)
1330 switch (inst[i])
1332 case 0xf3: /* REPZ */
1333 case 0xf2: /* REPNZ */
1334 case 0xf0: /* LOCK */
1335 case 0x66: /* data32 */
1336 #ifdef __x86_64__
1337 /* REX prefixes */
1338 case 0x40:
1339 case 0x41:
1340 case 0x42:
1341 case 0x43:
1342 case 0x44:
1343 case 0x45:
1344 case 0x46:
1345 case 0x47:
1347 case 0x48:
1348 case 0x49:
1349 case 0x4a:
1350 case 0x4b:
1351 case 0x4c:
1352 case 0x4d:
1353 case 0x4e:
1354 case 0x4f:
1355 #endif
1356 continue;
1357 case 0x67: /* addr32 */
1358 *asize ^= 48; /* Switch 16/32 bits */
1359 continue;
1360 case 0x2e: /* CS */
1361 *seg = &vmcb->cs;
1362 continue;
1363 case 0x36: /* SS */
1364 *seg = &vmcb->ss;
1365 continue;
1366 case 0x26: /* ES */
1367 *seg = &vmcb->es;
1368 continue;
1369 case 0x64: /* FS */
1370 *seg = &vmcb->fs;
1371 continue;
1372 case 0x65: /* GS */
1373 *seg = &vmcb->gs;
1374 continue;
1375 case 0x3e: /* DS */
1376 *seg = &vmcb->ds;
1377 continue;
1378 default:
1379 break;
1381 return;
1386 /* Get the address of INS/OUTS instruction */
1387 static inline int svm_get_io_address(
1388 struct vcpu *v, struct cpu_user_regs *regs,
1389 unsigned int size, ioio_info_t info,
1390 unsigned long *count, unsigned long *addr)
1392 unsigned long reg;
1393 unsigned int asize, isize;
1394 int long_mode = 0;
1395 svm_segment_register_t *seg = NULL;
1396 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1398 #ifdef __x86_64__
1399 /* If we're in long mode, we shouldn't check the segment presence & limit */
1400 long_mode = vmcb->cs.attr.fields.l && vmcb->efer & EFER_LMA;
1401 #endif
1403 /* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit.
1404 * l field combined with EFER_LMA says whether it's 16 or 64 bit.
1405 */
1406 asize = (long_mode)?64:((vmcb->cs.attr.fields.db)?32:16);
1409 /* The ins/outs instructions are single byte, so if we have got more
1410 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1411 * to figure out what it is...
1412 */
1413 isize = vmcb->exitinfo2 - vmcb->rip;
1415 if (info.fields.rep)
1416 isize --;
1418 if (isize > 1)
1419 svm_get_prefix_info(vmcb, info.fields.type, &seg, &asize);
1421 if (info.fields.type == IOREQ_WRITE)
1423 reg = regs->esi;
1424 if (!seg) /* If no prefix, used DS. */
1425 seg = &vmcb->ds;
1426 if (!long_mode && (seg->attr.fields.type & 0xa) == 0x8) {
1427 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1428 return 0;
1431 else
1433 reg = regs->edi;
1434 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1435 if (!long_mode && (seg->attr.fields.type & 0xa) != 0x2) {
1436 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1437 return 0;
1441 /* If the segment isn't present, give GP fault! */
1442 if (!long_mode && !seg->attr.fields.p)
1444 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1445 return 0;
1448 if (asize == 16)
1450 *addr = (reg & 0xFFFF);
1451 *count = regs->ecx & 0xffff;
1453 else
1455 *addr = reg;
1456 *count = regs->ecx;
1458 if (!info.fields.rep)
1459 *count = 1;
1461 if (!long_mode)
1463 ASSERT(*addr == (u32)*addr);
1464 if ((u32)(*addr + size - 1) < (u32)*addr ||
1465 (seg->attr.fields.type & 0xc) != 0x4 ?
1466 *addr + size - 1 > seg->limit :
1467 *addr <= seg->limit)
1469 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1470 return 0;
1473 /* Check the limit for repeated instructions, as above we checked only
1474 the first instance. Truncate the count if a limit violation would
1475 occur. Note that the checking is not necessary for page granular
1476 segments as transfers crossing page boundaries will be broken up
1477 anyway. */
1478 if (!seg->attr.fields.g && *count > 1)
1480 if ((seg->attr.fields.type & 0xc) != 0x4)
1482 /* expand-up */
1483 if (!(regs->eflags & EF_DF))
1485 if (*addr + *count * size - 1 < *addr ||
1486 *addr + *count * size - 1 > seg->limit)
1487 *count = (seg->limit + 1UL - *addr) / size;
1489 else
1491 if (*count - 1 > *addr / size)
1492 *count = *addr / size + 1;
1495 else
1497 /* expand-down */
1498 if (!(regs->eflags & EF_DF))
1500 if (*count - 1 > -(s32)*addr / size)
1501 *count = -(s32)*addr / size + 1UL;
1503 else
1505 if (*addr < (*count - 1) * size ||
1506 *addr - (*count - 1) * size <= seg->limit)
1507 *count = (*addr - seg->limit - 1) / size + 1;
1510 ASSERT(*count);
1513 *addr += seg->base;
1515 #ifdef __x86_64__
1516 else
1518 if (seg == &vmcb->fs || seg == &vmcb->gs)
1519 *addr += seg->base;
1521 if (!is_canonical_address(*addr) ||
1522 !is_canonical_address(*addr + size - 1))
1524 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1525 return 0;
1527 if (*count > (1UL << 48) / size)
1528 *count = (1UL << 48) / size;
1529 if (!(regs->eflags & EF_DF))
1531 if (*addr + *count * size - 1 < *addr ||
1532 !is_canonical_address(*addr + *count * size - 1))
1533 *count = (*addr & ~((1UL << 48) - 1)) / size;
1535 else
1537 if ((*count - 1) * size > *addr ||
1538 !is_canonical_address(*addr + (*count - 1) * size))
1539 *count = (*addr & ~((1UL << 48) - 1)) / size + 1;
1541 ASSERT(*count);
1543 #endif
1545 return 1;
1549 static void svm_io_instruction(struct vcpu *v)
1551 struct cpu_user_regs *regs;
1552 struct hvm_io_op *pio_opp;
1553 unsigned int port;
1554 unsigned int size, dir, df;
1555 ioio_info_t info;
1556 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1558 ASSERT(vmcb);
1559 pio_opp = &current->arch.hvm_vcpu.io_op;
1560 pio_opp->instr = INSTR_PIO;
1561 pio_opp->flags = 0;
1563 regs = &pio_opp->io_context;
1565 /* Copy current guest state into io instruction state structure. */
1566 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1567 hvm_store_cpu_guest_regs(v, regs, NULL);
1569 info.bytes = vmcb->exitinfo1;
1571 port = info.fields.port; /* port used to be addr */
1572 dir = info.fields.type; /* direction */
1573 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1575 if (info.fields.sz32)
1576 size = 4;
1577 else if (info.fields.sz16)
1578 size = 2;
1579 else
1580 size = 1;
1582 HVM_DBG_LOG(DBG_LEVEL_IO,
1583 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1584 "exit_qualification = %"PRIx64,
1585 port, vmcb->cs.sel, vmcb->rip, info.bytes);
1587 /* string instruction */
1588 if (info.fields.str)
1590 unsigned long addr, count;
1591 paddr_t paddr;
1592 unsigned long gfn;
1593 int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
1595 if (!svm_get_io_address(v, regs, size, info, &count, &addr))
1597 /* We failed to get a valid address, so don't do the IO operation -
1598 * it would just get worse if we do! Hopefully the guest is handing
1599 * gp-faults...
1600 */
1601 return;
1604 /* "rep" prefix */
1605 if (info.fields.rep)
1607 pio_opp->flags |= REPZ;
1610 /* Translate the address to a physical address */
1611 gfn = paging_gva_to_gfn(v, addr);
1612 if ( gfn == INVALID_GFN )
1614 /* The guest does not have the RAM address mapped.
1615 * Need to send in a page fault */
1616 int errcode = 0;
1617 /* IO read --> memory write */
1618 if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
1619 svm_hvm_inject_exception(TRAP_page_fault, errcode, addr);
1620 return;
1622 paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
1624 /*
1625 * Handle string pio instructions that cross pages or that
1626 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1627 */
1628 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1630 unsigned long value = 0;
1632 pio_opp->flags |= OVERLAP;
1633 pio_opp->addr = addr;
1635 if (dir == IOREQ_WRITE) /* OUTS */
1637 if ( hvm_paging_enabled(current) )
1639 int rv = hvm_copy_from_guest_virt(&value, addr, size);
1640 if ( rv != 0 )
1642 /* Failed on the page-spanning copy. Inject PF into
1643 * the guest for the address where we failed. */
1644 addr += size - rv;
1645 gdprintk(XENLOG_DEBUG, "Pagefault reading non-io side "
1646 "of a page-spanning PIO: va=%#lx\n", addr);
1647 svm_hvm_inject_exception(TRAP_page_fault, 0, addr);
1648 return;
1651 else
1652 (void) hvm_copy_from_guest_phys(&value, addr, size);
1653 } else /* dir != IOREQ_WRITE */
1654 /* Remember where to write the result, as a *VA*.
1655 * Must be a VA so we can handle the page overlap
1656 * correctly in hvm_pio_assist() */
1657 pio_opp->addr = addr;
1659 if (count == 1)
1660 regs->eip = vmcb->exitinfo2;
1662 send_pio_req(port, 1, size, value, dir, df, 0);
1664 else
1666 unsigned long last_addr = sign > 0 ? addr + count * size - 1
1667 : addr - (count - 1) * size;
1669 if ((addr & PAGE_MASK) != (last_addr & PAGE_MASK))
1671 if (sign > 0)
1672 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1673 else
1674 count = (addr & ~PAGE_MASK) / size + 1;
1676 else
1677 regs->eip = vmcb->exitinfo2;
1679 send_pio_req(port, count, size, paddr, dir, df, 1);
1682 else
1684 /*
1685 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1686 * ExitInfo2
1687 */
1688 regs->eip = vmcb->exitinfo2;
1690 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1691 hvm_print_line(v, regs->eax); /* guest debug output */
1693 send_pio_req(port, 1, size, regs->eax, dir, df, 0);
1697 static int svm_set_cr0(unsigned long value)
1699 struct vcpu *v = current;
1700 unsigned long mfn;
1701 int paging_enabled;
1702 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1703 unsigned long old_base_mfn;
1705 ASSERT(vmcb);
1707 /* We don't want to lose PG. ET is reserved and should be always be 1*/
1708 paging_enabled = svm_paging_enabled(v);
1709 value |= X86_CR0_ET;
1710 vmcb->cr0 = value | X86_CR0_PG | X86_CR0_WP;
1711 v->arch.hvm_svm.cpu_shadow_cr0 = value;
1713 /* TS cleared? Then initialise FPU now. */
1714 if ( !(value & X86_CR0_TS) )
1716 setup_fpu(v);
1717 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1720 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1722 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
1724 /* The guest CR3 must be pointing to the guest physical. */
1725 mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
1726 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
1728 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
1729 v->arch.hvm_svm.cpu_cr3, mfn);
1730 domain_crash(v->domain);
1731 return 0;
1734 #if defined(__x86_64__)
1735 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
1736 && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
1737 &v->arch.hvm_svm.cpu_state))
1739 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1740 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1743 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
1745 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1746 set_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
1747 vmcb->efer |= EFER_LMA | EFER_LME;
1749 #endif /* __x86_64__ */
1751 /* Now arch.guest_table points to machine physical. */
1752 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1753 v->arch.guest_table = pagetable_from_pfn(mfn);
1754 if ( old_base_mfn )
1755 put_page(mfn_to_page(old_base_mfn));
1756 paging_update_paging_modes(v);
1758 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1759 (unsigned long) (mfn << PAGE_SHIFT));
1762 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1763 if ( v->arch.hvm_svm.cpu_cr3 ) {
1764 put_page(mfn_to_page(get_mfn_from_gpfn(
1765 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
1766 v->arch.guest_table = pagetable_null();
1769 /*
1770 * SVM implements paged real-mode and when we return to real-mode
1771 * we revert back to the physical mappings that the domain builder
1772 * created.
1773 */
1774 if ((value & X86_CR0_PE) == 0) {
1775 if (value & X86_CR0_PG) {
1776 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1777 return 0;
1779 paging_update_paging_modes(v);
1781 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
1783 if ( svm_long_mode_enabled(v) )
1785 vmcb->efer &= ~EFER_LMA;
1786 clear_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
1788 /* we should take care of this kind of situation */
1789 paging_update_paging_modes(v);
1792 return 1;
1795 /*
1796 * Read from control registers. CR0 and CR4 are read from the shadow.
1797 */
1798 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1800 unsigned long value = 0;
1801 struct vcpu *v = current;
1802 struct vlapic *vlapic = vcpu_vlapic(v);
1803 struct vmcb_struct *vmcb;
1805 vmcb = v->arch.hvm_svm.vmcb;
1806 ASSERT(vmcb);
1808 switch ( cr )
1810 case 0:
1811 value = v->arch.hvm_svm.cpu_shadow_cr0;
1812 if (svm_dbg_on)
1813 printk("CR0 read =%lx \n", value );
1814 break;
1815 case 2:
1816 value = vmcb->cr2;
1817 break;
1818 case 3:
1819 value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
1820 if (svm_dbg_on)
1821 printk("CR3 read =%lx \n", value );
1822 break;
1823 case 4:
1824 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
1825 if (svm_dbg_on)
1826 printk("CR4 read=%lx\n", value);
1827 break;
1828 case 8:
1829 value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
1830 value = (value & 0xF0) >> 4;
1831 break;
1833 default:
1834 domain_crash(v->domain);
1835 return;
1838 set_reg(gp, value, regs, vmcb);
1840 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1844 static inline int svm_pgbit_test(struct vcpu *v)
1846 return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
1850 /*
1851 * Write to control registers
1852 */
1853 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1855 unsigned long value, old_cr, old_base_mfn, mfn;
1856 struct vcpu *v = current;
1857 struct vlapic *vlapic = vcpu_vlapic(v);
1858 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1860 value = get_reg(gpreg, regs, vmcb);
1862 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1863 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1865 switch (cr)
1867 case 0:
1868 if (svm_dbg_on)
1869 printk("CR0 write =%lx \n", value );
1870 return svm_set_cr0(value);
1872 case 3:
1873 if (svm_dbg_on)
1874 printk("CR3 write =%lx \n", value );
1875 /* If paging is not enabled yet, simply copy the value to CR3. */
1876 if (!svm_paging_enabled(v)) {
1877 v->arch.hvm_svm.cpu_cr3 = value;
1878 break;
1881 /* We make a new one if the shadow does not exist. */
1882 if (value == v->arch.hvm_svm.cpu_cr3)
1884 /*
1885 * This is simple TLB flush, implying the guest has
1886 * removed some translation or changed page attributes.
1887 * We simply invalidate the shadow.
1888 */
1889 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1890 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1891 goto bad_cr3;
1892 paging_update_cr3(v);
1894 else
1896 /*
1897 * If different, make a shadow. Check if the PDBR is valid
1898 * first.
1899 */
1900 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1901 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1902 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
1903 goto bad_cr3;
1905 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1906 v->arch.guest_table = pagetable_from_pfn(mfn);
1908 if (old_base_mfn)
1909 put_page(mfn_to_page(old_base_mfn));
1911 v->arch.hvm_svm.cpu_cr3 = value;
1912 update_cr3(v);
1913 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1915 break;
1917 case 4: /* CR4 */
1918 if (svm_dbg_on)
1919 printk( "write cr4=%lx, cr0=%lx\n",
1920 value, v->arch.hvm_svm.cpu_shadow_cr0 );
1921 old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
1922 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1924 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1925 if ( svm_pgbit_test(v) )
1927 /* The guest is a 32-bit PAE guest. */
1928 #if CONFIG_PAGING_LEVELS >= 3
1929 unsigned long mfn, old_base_mfn;
1930 mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
1931 if ( !mfn_valid(mfn) ||
1932 !get_page(mfn_to_page(mfn), v->domain) )
1933 goto bad_cr3;
1935 /*
1936 * Now arch.guest_table points to machine physical.
1937 */
1939 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1940 v->arch.guest_table = pagetable_from_pfn(mfn);
1941 if ( old_base_mfn )
1942 put_page(mfn_to_page(old_base_mfn));
1943 paging_update_paging_modes(v);
1945 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1946 (unsigned long) (mfn << PAGE_SHIFT));
1948 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1949 "Update CR3 value = %lx, mfn = %lx",
1950 v->arch.hvm_svm.cpu_cr3, mfn);
1951 #endif
1954 else if (value & X86_CR4_PAE) {
1955 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1956 } else {
1957 if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
1958 &v->arch.hvm_svm.cpu_state)) {
1959 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1961 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1964 v->arch.hvm_svm.cpu_shadow_cr4 = value;
1965 vmcb->cr4 = value | SVM_CR4_HOST_MASK;
1967 /*
1968 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1969 * all TLB entries except global entries.
1970 */
1971 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
1972 paging_update_paging_modes(v);
1973 break;
1975 case 8:
1976 vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
1977 vmcb->vintr.fields.tpr = value & 0x0F;
1978 break;
1980 default:
1981 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1982 domain_crash(v->domain);
1983 return 0;
1986 return 1;
1988 bad_cr3:
1989 gdprintk(XENLOG_ERR, "Invalid CR3\n");
1990 domain_crash(v->domain);
1991 return 0;
1995 #define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
1998 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
1999 struct cpu_user_regs *regs)
2001 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2002 int inst_len = 0;
2003 int index;
2004 unsigned int gpreg;
2005 unsigned long value;
2006 u8 buffer[MAX_INST_LEN];
2007 u8 prefix = 0;
2008 int result = 1;
2009 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
2010 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
2011 enum instruction_index match;
2013 ASSERT(vmcb);
2015 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
2017 /* get index to first actual instruction byte - as we will need to know
2018 where the prefix lives later on */
2019 index = skip_prefix_bytes(buffer, sizeof(buffer));
2021 if ( type == TYPE_MOV_TO_CR )
2023 inst_len = __get_instruction_length_from_list(
2024 vmcb, list_a, ARR_SIZE(list_a), &buffer[index], &match);
2026 else /* type == TYPE_MOV_FROM_CR */
2028 inst_len = __get_instruction_length_from_list(
2029 vmcb, list_b, ARR_SIZE(list_b), &buffer[index], &match);
2032 ASSERT(inst_len > 0);
2034 inst_len += index;
2036 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
2037 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
2038 prefix = buffer[index-1];
2040 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
2042 switch (match)
2044 case INSTR_MOV2CR:
2045 gpreg = decode_src_reg(prefix, buffer[index+2]);
2046 result = mov_to_cr(gpreg, cr, regs);
2047 break;
2049 case INSTR_MOVCR2:
2050 gpreg = decode_src_reg(prefix, buffer[index+2]);
2051 mov_from_cr(cr, gpreg, regs);
2052 break;
2054 case INSTR_CLTS:
2055 /* TS being cleared means that it's time to restore fpu state. */
2056 setup_fpu(current);
2057 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
2058 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
2059 v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
2060 break;
2062 case INSTR_LMSW:
2063 if (svm_dbg_on)
2064 svm_dump_inst(svm_rip2pointer(vmcb));
2066 gpreg = decode_src_reg(prefix, buffer[index+2]);
2067 value = get_reg(gpreg, regs, vmcb) & 0xF;
2069 if (svm_dbg_on)
2070 printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
2071 inst_len);
2073 value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
2075 if (svm_dbg_on)
2076 printk("CR0-LMSW CR0 - New value=%lx\n", value);
2078 result = svm_set_cr0(value);
2079 break;
2081 case INSTR_SMSW:
2082 if (svm_dbg_on)
2083 svm_dump_inst(svm_rip2pointer(vmcb));
2084 value = v->arch.hvm_svm.cpu_shadow_cr0;
2085 gpreg = decode_src_reg(prefix, buffer[index+2]);
2086 set_reg(gpreg, value, regs, vmcb);
2088 if (svm_dbg_on)
2089 printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
2090 inst_len);
2091 break;
2093 default:
2094 BUG();
2097 ASSERT(inst_len);
2099 __update_guest_eip(vmcb, inst_len);
2101 return result;
2104 static inline void svm_do_msr_access(
2105 struct vcpu *v, struct cpu_user_regs *regs)
2107 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2108 int inst_len;
2109 u64 msr_content=0;
2110 u32 ecx = regs->ecx, eax, edx;
2112 ASSERT(vmcb);
2114 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x, exitinfo = %lx",
2115 ecx, (u32)regs->eax, (u32)regs->edx,
2116 (unsigned long)vmcb->exitinfo1);
2118 /* is it a read? */
2119 if (vmcb->exitinfo1 == 0)
2121 switch (ecx) {
2122 case MSR_IA32_TIME_STAMP_COUNTER:
2123 msr_content = hvm_get_guest_time(v);
2124 break;
2125 case MSR_IA32_SYSENTER_CS:
2126 msr_content = vmcb->sysenter_cs;
2127 break;
2128 case MSR_IA32_SYSENTER_ESP:
2129 msr_content = vmcb->sysenter_esp;
2130 break;
2131 case MSR_IA32_SYSENTER_EIP:
2132 msr_content = vmcb->sysenter_eip;
2133 break;
2134 case MSR_IA32_APICBASE:
2135 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
2136 break;
2137 default:
2138 if (long_mode_do_msr_read(regs))
2139 goto done;
2141 if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
2142 rdmsr_safe(ecx, eax, edx) == 0 )
2144 regs->eax = eax;
2145 regs->edx = edx;
2146 goto done;
2148 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
2149 return;
2151 regs->eax = msr_content & 0xFFFFFFFF;
2152 regs->edx = msr_content >> 32;
2154 done:
2155 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
2156 ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
2158 inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
2160 else
2162 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
2164 switch (ecx)
2166 case MSR_IA32_TIME_STAMP_COUNTER:
2167 hvm_set_guest_time(v, msr_content);
2168 pt_reset(v);
2169 break;
2170 case MSR_IA32_SYSENTER_CS:
2171 vmcb->sysenter_cs = msr_content;
2172 break;
2173 case MSR_IA32_SYSENTER_ESP:
2174 vmcb->sysenter_esp = msr_content;
2175 break;
2176 case MSR_IA32_SYSENTER_EIP:
2177 vmcb->sysenter_eip = msr_content;
2178 break;
2179 case MSR_IA32_APICBASE:
2180 vlapic_msr_set(vcpu_vlapic(v), msr_content);
2181 break;
2182 default:
2183 if ( !long_mode_do_msr_write(regs) )
2184 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
2185 break;
2188 inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
2191 __update_guest_eip(vmcb, inst_len);
2195 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
2197 __update_guest_eip(vmcb, 1);
2199 /* Check for interrupt not handled or new interrupt. */
2200 if ( (vmcb->rflags & X86_EFLAGS_IF) &&
2201 (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) )
2202 return;
2204 hvm_hlt(vmcb->rflags);
2208 static void svm_vmexit_do_invd(struct vmcb_struct *vmcb)
2210 int inst_len;
2212 /* Invalidate the cache - we can't really do that safely - maybe we should
2213 * WBINVD, but I think it's just fine to completely ignore it - we should
2214 * have cache-snooping that solves it anyways. -- Mats P.
2215 */
2217 /* Tell the user that we did this - just in case someone runs some really
2218 * weird operating system and wants to know why it's not working...
2219 */
2220 printk("INVD instruction intercepted - ignored\n");
2222 inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL);
2223 __update_guest_eip(vmcb, inst_len);
2229 #ifdef XEN_DEBUGGER
2230 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb,
2231 struct cpu_user_regs *regs)
2233 regs->eip = vmcb->rip;
2234 regs->esp = vmcb->rsp;
2235 regs->eflags = vmcb->rflags;
2237 regs->xcs = vmcb->cs.sel;
2238 regs->xds = vmcb->ds.sel;
2239 regs->xes = vmcb->es.sel;
2240 regs->xfs = vmcb->fs.sel;
2241 regs->xgs = vmcb->gs.sel;
2242 regs->xss = vmcb->ss.sel;
2246 static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
2248 vmcb->ss.sel = regs->xss;
2249 vmcb->rsp = regs->esp;
2250 vmcb->rflags = regs->eflags;
2251 vmcb->cs.sel = regs->xcs;
2252 vmcb->rip = regs->eip;
2254 vmcb->gs.sel = regs->xgs;
2255 vmcb->fs.sel = regs->xfs;
2256 vmcb->es.sel = regs->xes;
2257 vmcb->ds.sel = regs->xds;
2259 #endif
2262 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
2264 struct vcpu *v = current;
2265 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
2266 unsigned long g_vaddr;
2267 int inst_len;
2268 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2270 /*
2271 * Unknown how many bytes the invlpg instruction will take. Use the
2272 * maximum instruction length here
2273 */
2274 if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
2276 gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length);
2277 domain_crash(v->domain);
2278 return;
2281 if (invlpga)
2283 inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
2284 ASSERT(inst_len > 0);
2285 __update_guest_eip(vmcb, inst_len);
2287 /*
2288 * The address is implicit on this instruction. At the moment, we don't
2289 * use ecx (ASID) to identify individual guests pages
2290 */
2291 g_vaddr = regs->eax;
2293 else
2295 /* What about multiple prefix codes? */
2296 prefix = (is_prefix(opcode[0])?opcode[0]:0);
2297 inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
2298 ASSERT(inst_len > 0);
2300 inst_len--;
2301 length -= inst_len;
2303 /*
2304 * Decode memory operand of the instruction including ModRM, SIB, and
2305 * displacement to get effective address and length in bytes. Assume
2306 * the system in either 32- or 64-bit mode.
2307 */
2308 g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix, inst_len,
2309 &opcode[inst_len], &length);
2311 inst_len += length;
2312 __update_guest_eip (vmcb, inst_len);
2315 paging_invlpg(v, g_vaddr);
2319 /*
2320 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2321 * 16-bit realmode. Basically, this mimics a processor reset.
2323 * returns 0 on success, non-zero otherwise
2324 */
2325 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
2326 struct cpu_user_regs *regs)
2328 struct vmcb_struct *vmcb;
2330 ASSERT(v);
2331 ASSERT(regs);
2333 vmcb = v->arch.hvm_svm.vmcb;
2335 ASSERT(vmcb);
2337 /* clear the vmcb and user regs */
2338 memset(regs, 0, sizeof(struct cpu_user_regs));
2340 /* VMCB Control */
2341 vmcb->tsc_offset = 0;
2343 /* VMCB State */
2344 vmcb->cr0 = X86_CR0_ET | X86_CR0_PG | X86_CR0_WP;
2345 v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
2347 vmcb->cr2 = 0;
2348 vmcb->efer = EFER_SVME;
2350 vmcb->cr4 = SVM_CR4_HOST_MASK;
2351 v->arch.hvm_svm.cpu_shadow_cr4 = 0;
2352 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
2354 /* This will jump to ROMBIOS */
2355 vmcb->rip = 0xFFF0;
2357 /* setup the segment registers and all their hidden states */
2358 vmcb->cs.sel = 0xF000;
2359 vmcb->cs.attr.bytes = 0x089b;
2360 vmcb->cs.limit = 0xffff;
2361 vmcb->cs.base = 0x000F0000;
2363 vmcb->ss.sel = 0x00;
2364 vmcb->ss.attr.bytes = 0x0893;
2365 vmcb->ss.limit = 0xffff;
2366 vmcb->ss.base = 0x00;
2368 vmcb->ds.sel = 0x00;
2369 vmcb->ds.attr.bytes = 0x0893;
2370 vmcb->ds.limit = 0xffff;
2371 vmcb->ds.base = 0x00;
2373 vmcb->es.sel = 0x00;
2374 vmcb->es.attr.bytes = 0x0893;
2375 vmcb->es.limit = 0xffff;
2376 vmcb->es.base = 0x00;
2378 vmcb->fs.sel = 0x00;
2379 vmcb->fs.attr.bytes = 0x0893;
2380 vmcb->fs.limit = 0xffff;
2381 vmcb->fs.base = 0x00;
2383 vmcb->gs.sel = 0x00;
2384 vmcb->gs.attr.bytes = 0x0893;
2385 vmcb->gs.limit = 0xffff;
2386 vmcb->gs.base = 0x00;
2388 vmcb->ldtr.sel = 0x00;
2389 vmcb->ldtr.attr.bytes = 0x0000;
2390 vmcb->ldtr.limit = 0x0;
2391 vmcb->ldtr.base = 0x00;
2393 vmcb->gdtr.sel = 0x00;
2394 vmcb->gdtr.attr.bytes = 0x0000;
2395 vmcb->gdtr.limit = 0x0;
2396 vmcb->gdtr.base = 0x00;
2398 vmcb->tr.sel = 0;
2399 vmcb->tr.attr.bytes = 0;
2400 vmcb->tr.limit = 0x0;
2401 vmcb->tr.base = 0;
2403 vmcb->idtr.sel = 0x00;
2404 vmcb->idtr.attr.bytes = 0x0000;
2405 vmcb->idtr.limit = 0x3ff;
2406 vmcb->idtr.base = 0x00;
2408 vmcb->rax = 0;
2409 vmcb->rsp = 0;
2411 return 0;
2415 /*
2416 * svm_do_vmmcall - SVM VMMCALL handler
2418 * returns 0 on success, non-zero otherwise
2419 */
2420 static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
2422 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2423 int inst_len;
2425 ASSERT(vmcb);
2426 ASSERT(regs);
2428 inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
2429 ASSERT(inst_len > 0);
2431 if ( regs->eax & 0x80000000 )
2433 /* VMMCALL sanity check */
2434 if ( vmcb->cpl > get_vmmcall_cpl(regs->edi) )
2436 printk("VMMCALL CPL check failed\n");
2437 return -1;
2440 /* handle the request */
2441 switch ( regs->eax )
2443 case VMMCALL_RESET_TO_REALMODE:
2444 if ( svm_do_vmmcall_reset_to_realmode(v, regs) )
2446 printk("svm_do_vmmcall_reset_to_realmode() failed\n");
2447 return -1;
2449 /* since we just reset the VMCB, return without adjusting
2450 * the eip */
2451 return 0;
2453 case VMMCALL_DEBUG:
2454 printk("DEBUG features not implemented yet\n");
2455 break;
2456 default:
2457 break;
2460 hvm_print_line(v, regs->eax); /* provides the current domain */
2462 else
2464 hvm_do_hypercall(regs);
2467 __update_guest_eip(vmcb, inst_len);
2468 return 0;
2472 void svm_dump_inst(unsigned long eip)
2474 u8 opcode[256];
2475 unsigned long ptr;
2476 int len;
2477 int i;
2479 ptr = eip & ~0xff;
2480 len = 0;
2482 if (hvm_copy_from_guest_virt(opcode, ptr, sizeof(opcode)) == 0)
2483 len = sizeof(opcode);
2485 printk("Code bytes around(len=%d) %lx:", len, eip);
2486 for (i = 0; i < len; i++)
2488 if ((i & 0x0f) == 0)
2489 printk("\n%08lx:", ptr+i);
2491 printk("%02x ", opcode[i]);
2494 printk("\n");
2498 void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
2500 struct vcpu *v = current;
2501 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2502 unsigned long pt = v->arch.hvm_vcpu.hw_cr3;
2504 printk("%s: guest registers from %s:\n", __func__, from);
2505 #if defined (__x86_64__)
2506 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
2507 regs->rax, regs->rbx, regs->rcx);
2508 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
2509 regs->rdx, regs->rsi, regs->rdi);
2510 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
2511 regs->rbp, regs->rsp, regs->r8);
2512 printk("r9: %016lx r10: %016lx r11: %016lx\n",
2513 regs->r9, regs->r10, regs->r11);
2514 printk("r12: %016lx r13: %016lx r14: %016lx\n",
2515 regs->r12, regs->r13, regs->r14);
2516 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
2517 regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
2518 #else
2519 printk("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n",
2520 regs->eax, regs->ebx, regs->ecx, regs->edx);
2521 printk("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n",
2522 regs->edi, regs->esi, regs->ebp, regs->esp);
2523 printk("%s: guest cr0: %lx\n", __func__,
2524 v->arch.hvm_svm.cpu_shadow_cr0);
2525 printk("guest CR3 = %llx\n", vmcb->cr3);
2526 #endif
2527 printk("%s: pt = %lx\n", __func__, pt);
2531 void svm_dump_host_regs(const char *from)
2533 struct vcpu *v = current;
2534 unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
2535 unsigned long cr3, cr0;
2536 printk("Host registers at %s\n", from);
2538 __asm__ __volatile__ ("\tmov %%cr0,%0\n"
2539 "\tmov %%cr3,%1\n"
2540 : "=r" (cr0), "=r"(cr3));
2541 printk("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
2544 #ifdef SVM_EXTRA_DEBUG
2545 static char *exit_reasons[] = {
2546 [VMEXIT_CR0_READ] = "CR0_READ",
2547 [VMEXIT_CR1_READ] = "CR1_READ",
2548 [VMEXIT_CR2_READ] = "CR2_READ",
2549 [VMEXIT_CR3_READ] = "CR3_READ",
2550 [VMEXIT_CR4_READ] = "CR4_READ",
2551 [VMEXIT_CR5_READ] = "CR5_READ",
2552 [VMEXIT_CR6_READ] = "CR6_READ",
2553 [VMEXIT_CR7_READ] = "CR7_READ",
2554 [VMEXIT_CR8_READ] = "CR8_READ",
2555 [VMEXIT_CR9_READ] = "CR9_READ",
2556 [VMEXIT_CR10_READ] = "CR10_READ",
2557 [VMEXIT_CR11_READ] = "CR11_READ",
2558 [VMEXIT_CR12_READ] = "CR12_READ",
2559 [VMEXIT_CR13_READ] = "CR13_READ",
2560 [VMEXIT_CR14_READ] = "CR14_READ",
2561 [VMEXIT_CR15_READ] = "CR15_READ",
2562 [VMEXIT_CR0_WRITE] = "CR0_WRITE",
2563 [VMEXIT_CR1_WRITE] = "CR1_WRITE",
2564 [VMEXIT_CR2_WRITE] = "CR2_WRITE",
2565 [VMEXIT_CR3_WRITE] = "CR3_WRITE",
2566 [VMEXIT_CR4_WRITE] = "CR4_WRITE",
2567 [VMEXIT_CR5_WRITE] = "CR5_WRITE",
2568 [VMEXIT_CR6_WRITE] = "CR6_WRITE",
2569 [VMEXIT_CR7_WRITE] = "CR7_WRITE",
2570 [VMEXIT_CR8_WRITE] = "CR8_WRITE",
2571 [VMEXIT_CR9_WRITE] = "CR9_WRITE",
2572 [VMEXIT_CR10_WRITE] = "CR10_WRITE",
2573 [VMEXIT_CR11_WRITE] = "CR11_WRITE",
2574 [VMEXIT_CR12_WRITE] = "CR12_WRITE",
2575 [VMEXIT_CR13_WRITE] = "CR13_WRITE",
2576 [VMEXIT_CR14_WRITE] = "CR14_WRITE",
2577 [VMEXIT_CR15_WRITE] = "CR15_WRITE",
2578 [VMEXIT_DR0_READ] = "DR0_READ",
2579 [VMEXIT_DR1_READ] = "DR1_READ",
2580 [VMEXIT_DR2_READ] = "DR2_READ",
2581 [VMEXIT_DR3_READ] = "DR3_READ",
2582 [VMEXIT_DR4_READ] = "DR4_READ",
2583 [VMEXIT_DR5_READ] = "DR5_READ",
2584 [VMEXIT_DR6_READ] = "DR6_READ",
2585 [VMEXIT_DR7_READ] = "DR7_READ",
2586 [VMEXIT_DR8_READ] = "DR8_READ",
2587 [VMEXIT_DR9_READ] = "DR9_READ",
2588 [VMEXIT_DR10_READ] = "DR10_READ",
2589 [VMEXIT_DR11_READ] = "DR11_READ",
2590 [VMEXIT_DR12_READ] = "DR12_READ",
2591 [VMEXIT_DR13_READ] = "DR13_READ",
2592 [VMEXIT_DR14_READ] = "DR14_READ",
2593 [VMEXIT_DR15_READ] = "DR15_READ",
2594 [VMEXIT_DR0_WRITE] = "DR0_WRITE",
2595 [VMEXIT_DR1_WRITE] = "DR1_WRITE",
2596 [VMEXIT_DR2_WRITE] = "DR2_WRITE",
2597 [VMEXIT_DR3_WRITE] = "DR3_WRITE",
2598 [VMEXIT_DR4_WRITE] = "DR4_WRITE",
2599 [VMEXIT_DR5_WRITE] = "DR5_WRITE",
2600 [VMEXIT_DR6_WRITE] = "DR6_WRITE",
2601 [VMEXIT_DR7_WRITE] = "DR7_WRITE",
2602 [VMEXIT_DR8_WRITE] = "DR8_WRITE",
2603 [VMEXIT_DR9_WRITE] = "DR9_WRITE",
2604 [VMEXIT_DR10_WRITE] = "DR10_WRITE",
2605 [VMEXIT_DR11_WRITE] = "DR11_WRITE",
2606 [VMEXIT_DR12_WRITE] = "DR12_WRITE",
2607 [VMEXIT_DR13_WRITE] = "DR13_WRITE",
2608 [VMEXIT_DR14_WRITE] = "DR14_WRITE",
2609 [VMEXIT_DR15_WRITE] = "DR15_WRITE",
2610 [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
2611 [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
2612 [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
2613 [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
2614 [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
2615 [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
2616 [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
2617 [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
2618 [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
2619 [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
2620 [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
2621 [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
2622 [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
2623 [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
2624 [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
2625 [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
2626 [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
2627 [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
2628 [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
2629 [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
2630 [VMEXIT_INTR] = "INTR",
2631 [VMEXIT_NMI] = "NMI",
2632 [VMEXIT_SMI] = "SMI",
2633 [VMEXIT_INIT] = "INIT",
2634 [VMEXIT_VINTR] = "VINTR",
2635 [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
2636 [VMEXIT_IDTR_READ] = "IDTR_READ",
2637 [VMEXIT_GDTR_READ] = "GDTR_READ",
2638 [VMEXIT_LDTR_READ] = "LDTR_READ",
2639 [VMEXIT_TR_READ] = "TR_READ",
2640 [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
2641 [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
2642 [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
2643 [VMEXIT_TR_WRITE] = "TR_WRITE",
2644 [VMEXIT_RDTSC] = "RDTSC",
2645 [VMEXIT_RDPMC] = "RDPMC",
2646 [VMEXIT_PUSHF] = "PUSHF",
2647 [VMEXIT_POPF] = "POPF",
2648 [VMEXIT_CPUID] = "CPUID",
2649 [VMEXIT_RSM] = "RSM",
2650 [VMEXIT_IRET] = "IRET",
2651 [VMEXIT_SWINT] = "SWINT",
2652 [VMEXIT_INVD] = "INVD",
2653 [VMEXIT_PAUSE] = "PAUSE",
2654 [VMEXIT_HLT] = "HLT",
2655 [VMEXIT_INVLPG] = "INVLPG",
2656 [VMEXIT_INVLPGA] = "INVLPGA",
2657 [VMEXIT_IOIO] = "IOIO",
2658 [VMEXIT_MSR] = "MSR",
2659 [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
2660 [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
2661 [VMEXIT_SHUTDOWN] = "SHUTDOWN",
2662 [VMEXIT_VMRUN] = "VMRUN",
2663 [VMEXIT_VMMCALL] = "VMMCALL",
2664 [VMEXIT_VMLOAD] = "VMLOAD",
2665 [VMEXIT_VMSAVE] = "VMSAVE",
2666 [VMEXIT_STGI] = "STGI",
2667 [VMEXIT_CLGI] = "CLGI",
2668 [VMEXIT_SKINIT] = "SKINIT",
2669 [VMEXIT_RDTSCP] = "RDTSCP",
2670 [VMEXIT_ICEBP] = "ICEBP",
2671 [VMEXIT_NPF] = "NPF"
2672 };
2673 #endif /* SVM_EXTRA_DEBUG */
2675 #ifdef SVM_WALK_GUEST_PAGES
2676 void walk_shadow_and_guest_pt(unsigned long gva)
2678 l2_pgentry_t gpde;
2679 l2_pgentry_t spde;
2680 l1_pgentry_t gpte;
2681 l1_pgentry_t spte;
2682 struct vcpu *v = current;
2683 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2684 paddr_t gpa;
2686 gpa = paging_gva_to_gpa(current, gva);
2687 printk("gva = %lx, gpa=%"PRIpaddr", gCR3=%x\n", gva, gpa, (u32)vmcb->cr3);
2688 if( !svm_paging_enabled(v) || mmio_space(gpa) )
2689 return;
2691 /* let's dump the guest and shadow page info */
2693 __guest_get_l2e(v, gva, &gpde);
2694 printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
2695 __shadow_get_l2e( v, gva, &spde );
2696 printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
2698 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2699 return;
2701 spte = l1e_empty();
2703 /* This is actually overkill - we only need to ensure the hl2 is in-sync.*/
2704 shadow_sync_va(v, gva);
2706 gpte.l1 = 0;
2707 __copy_from_user(&gpte, &__linear_l1_table[ l1_linear_offset(gva) ],
2708 sizeof(gpte) );
2709 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
2711 BUG(); // need to think about this, and convert usage of
2712 // phys_to_machine_mapping to use pagetable format...
2713 __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
2714 sizeof(spte) );
2716 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
2718 #endif /* SVM_WALK_GUEST_PAGES */
2721 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
2723 unsigned int exit_reason;
2724 unsigned long eip;
2725 struct vcpu *v = current;
2726 int error;
2727 int do_debug = 0;
2728 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2730 ASSERT(vmcb);
2732 exit_reason = vmcb->exitcode;
2733 save_svm_cpu_user_regs(v, regs);
2735 if (exit_reason == VMEXIT_INVALID)
2737 svm_dump_vmcb(__func__, vmcb);
2738 goto exit_and_crash;
2741 #ifdef SVM_EXTRA_DEBUG
2743 #if defined(__i386__)
2744 #define rip eip
2745 #endif
2747 static unsigned long intercepts_counter = 0;
2749 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF)
2751 if (svm_paging_enabled(v) &&
2752 !mmio_space(
2753 paging_gva_to_gfn(current, vmcb->exitinfo2) << PAGE_SHIFT))
2755 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2756 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64", "
2757 "gpa=%"PRIx64"\n", intercepts_counter,
2758 exit_reasons[exit_reason], exit_reason, regs->cs,
2759 (u64)regs->rip,
2760 (u64)vmcb->exitinfo1,
2761 (u64)vmcb->exitinfo2,
2762 (u64)vmcb->exitintinfo.bytes,
2763 (((u64)paging_gva_to_gfn(current, vmcb->exitinfo2)
2764 << PAGE_SHIFT) | (vmcb->exitinfo2 & ~PAGE_MASK)));
2766 else
2768 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2769 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2770 intercepts_counter,
2771 exit_reasons[exit_reason], exit_reason, regs->cs,
2772 (u64)regs->rip,
2773 (u64)vmcb->exitinfo1,
2774 (u64)vmcb->exitinfo2,
2775 (u64)vmcb->exitintinfo.bytes );
2778 else if ( svm_dbg_on
2779 && exit_reason != VMEXIT_IOIO
2780 && exit_reason != VMEXIT_INTR)
2783 if (exit_reasons[exit_reason])
2785 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2786 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2787 intercepts_counter,
2788 exit_reasons[exit_reason], exit_reason, regs->cs,
2789 (u64)regs->rip,
2790 (u64)vmcb->exitinfo1,
2791 (u64)vmcb->exitinfo2,
2792 (u64)vmcb->exitintinfo.bytes);
2794 else
2796 printk("I%08ld,ExC=%d(0x%x),IP=%x:%"PRIx64","
2797 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2798 intercepts_counter, exit_reason, exit_reason, regs->cs,
2799 (u64)regs->rip,
2800 (u64)vmcb->exitinfo1,
2801 (u64)vmcb->exitinfo2,
2802 (u64)vmcb->exitintinfo.bytes);
2806 #ifdef SVM_WALK_GUEST_PAGES
2807 if( exit_reason == VMEXIT_EXCEPTION_PF
2808 && ( ( vmcb->exitinfo2 == vmcb->rip )
2809 || vmcb->exitintinfo.bytes) )
2811 if ( svm_paging_enabled(v) &&
2812 !mmio_space(gva_to_gpa(vmcb->exitinfo2)) )
2813 walk_shadow_and_guest_pt(vmcb->exitinfo2);
2815 #endif
2817 intercepts_counter++;
2819 #if 0
2820 if (svm_dbg_on)
2821 do_debug = svm_do_debugout(exit_reason);
2822 #endif
2824 if (do_debug)
2826 printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
2827 "hw_cr3 = 0x%16lx\n",
2828 __func__,
2829 (int) v->arch.guest_table.pfn,
2830 (int) v->arch.monitor_table.pfn,
2831 (long unsigned int) v->arch.hvm_vcpu.hw_cr3);
2833 svm_dump_vmcb(__func__, vmcb);
2834 svm_dump_regs(__func__, regs);
2835 svm_dump_inst(svm_rip2pointer(vmcb));
2838 #if defined(__i386__)
2839 #undef rip
2840 #endif
2843 #endif /* SVM_EXTRA_DEBUG */
2846 perfc_incra(svmexits, exit_reason);
2847 eip = vmcb->rip;
2849 #ifdef SVM_EXTRA_DEBUG
2850 if (do_debug)
2852 printk("eip = %lx, exit_reason = %d (0x%x)\n",
2853 eip, exit_reason, exit_reason);
2855 #endif /* SVM_EXTRA_DEBUG */
2857 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
2859 switch (exit_reason)
2861 case VMEXIT_EXCEPTION_DB:
2863 #ifdef XEN_DEBUGGER
2864 svm_debug_save_cpu_user_regs(regs);
2865 pdb_handle_exception(1, regs, 1);
2866 svm_debug_restore_cpu_user_regs(regs);
2867 #else
2868 svm_store_cpu_user_regs(regs, v);
2869 domain_pause_for_debugger();
2870 #endif
2872 break;
2874 case VMEXIT_INTR:
2875 case VMEXIT_NMI:
2876 case VMEXIT_SMI:
2877 /* Asynchronous events, handled when we STGI'd after the VMEXIT. */
2878 break;
2880 case VMEXIT_INIT:
2881 BUG(); /* unreachable */
2883 case VMEXIT_EXCEPTION_BP:
2884 #ifdef XEN_DEBUGGER
2885 svm_debug_save_cpu_user_regs(regs);
2886 pdb_handle_exception(3, regs, 1);
2887 svm_debug_restore_cpu_user_regs(regs);
2888 #else
2889 if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) )
2890 domain_pause_for_debugger();
2891 else
2892 svm_inject_exception(v, TRAP_int3, 0, 0);
2893 #endif
2894 break;
2896 case VMEXIT_EXCEPTION_NM:
2897 svm_do_no_device_fault(vmcb);
2898 break;
2900 case VMEXIT_EXCEPTION_GP:
2901 /* This should probably not be trapped in the future */
2902 regs->error_code = vmcb->exitinfo1;
2903 svm_do_general_protection_fault(v, regs);
2904 break;
2906 case VMEXIT_EXCEPTION_PF:
2908 unsigned long va;
2909 va = vmcb->exitinfo2;
2910 regs->error_code = vmcb->exitinfo1;
2911 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2912 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2913 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2914 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2915 (unsigned long)regs->esi, (unsigned long)regs->edi);
2917 if (!(error = svm_do_page_fault(va, regs)))
2919 /* Inject #PG using Interruption-Information Fields */
2920 svm_inject_exception(v, TRAP_page_fault, 1, regs->error_code);
2922 v->arch.hvm_svm.cpu_cr2 = va;
2923 vmcb->cr2 = va;
2924 TRACE_3D(TRC_VMX_INTR, v->domain->domain_id,
2925 VMEXIT_EXCEPTION_PF, va);
2927 break;
2930 case VMEXIT_EXCEPTION_DF:
2931 /* Debug info to hopefully help debug WHY the guest double-faulted. */
2932 svm_dump_vmcb(__func__, vmcb);
2933 svm_dump_regs(__func__, regs);
2934 svm_dump_inst(svm_rip2pointer(vmcb));
2935 svm_inject_exception(v, TRAP_double_fault, 1, 0);
2936 break;
2938 case VMEXIT_VINTR:
2939 vmcb->vintr.fields.irq = 0;
2940 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
2941 break;
2943 case VMEXIT_INVD:
2944 svm_vmexit_do_invd(vmcb);
2945 break;
2947 case VMEXIT_GDTR_WRITE:
2948 printk("WRITE to GDTR\n");
2949 break;
2951 case VMEXIT_TASK_SWITCH:
2952 goto exit_and_crash;
2954 case VMEXIT_CPUID:
2955 svm_vmexit_do_cpuid(vmcb, regs);
2956 break;
2958 case VMEXIT_HLT:
2959 svm_vmexit_do_hlt(vmcb);
2960 break;
2962 case VMEXIT_INVLPG:
2963 svm_handle_invlpg(0, regs);
2964 break;
2966 case VMEXIT_INVLPGA:
2967 svm_handle_invlpg(1, regs);
2968 break;
2970 case VMEXIT_VMMCALL:
2971 svm_do_vmmcall(v, regs);
2972 break;
2974 case VMEXIT_CR0_READ:
2975 svm_cr_access(v, 0, TYPE_MOV_FROM_CR, regs);
2976 break;
2978 case VMEXIT_CR2_READ:
2979 svm_cr_access(v, 2, TYPE_MOV_FROM_CR, regs);
2980 break;
2982 case VMEXIT_CR3_READ:
2983 svm_cr_access(v, 3, TYPE_MOV_FROM_CR, regs);
2984 break;
2986 case VMEXIT_CR4_READ:
2987 svm_cr_access(v, 4, TYPE_MOV_FROM_CR, regs);
2988 break;
2990 case VMEXIT_CR8_READ:
2991 svm_cr_access(v, 8, TYPE_MOV_FROM_CR, regs);
2992 break;
2994 case VMEXIT_CR0_WRITE:
2995 svm_cr_access(v, 0, TYPE_MOV_TO_CR, regs);
2996 break;
2998 case VMEXIT_CR2_WRITE:
2999 svm_cr_access(v, 2, TYPE_MOV_TO_CR, regs);
3000 break;
3002 case VMEXIT_CR3_WRITE:
3003 svm_cr_access(v, 3, TYPE_MOV_TO_CR, regs);
3004 local_flush_tlb();
3005 break;
3007 case VMEXIT_CR4_WRITE:
3008 svm_cr_access(v, 4, TYPE_MOV_TO_CR, regs);
3009 break;
3011 case VMEXIT_CR8_WRITE:
3012 svm_cr_access(v, 8, TYPE_MOV_TO_CR, regs);
3013 break;
3015 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
3016 svm_dr_access(v, regs);
3017 break;
3019 case VMEXIT_IOIO:
3020 svm_io_instruction(v);
3021 break;
3023 case VMEXIT_MSR:
3024 svm_do_msr_access(v, regs);
3025 break;
3027 case VMEXIT_SHUTDOWN:
3028 hvm_triple_fault();
3029 break;
3031 default:
3032 exit_and_crash:
3033 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
3034 "exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
3035 exit_reason,
3036 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
3037 domain_crash(v->domain);
3038 break;
3041 #ifdef SVM_EXTRA_DEBUG
3042 if (do_debug)
3044 printk("%s: Done switch on vmexit_code\n", __func__);
3045 svm_dump_regs(__func__, regs);
3048 if (do_debug)
3050 printk("vmexit_handler():- guest_table = 0x%08x, "
3051 "monitor_table = 0x%08x, hw_cr3 = 0x%16x\n",
3052 (int)v->arch.guest_table.pfn,
3053 (int)v->arch.monitor_table.pfn,
3054 (int)v->arch.hvm_vcpu.hw_cr3);
3055 printk("svm_vmexit_handler: Returning\n");
3057 #endif
3060 asmlinkage void svm_load_cr2(void)
3062 struct vcpu *v = current;
3064 local_irq_disable();
3065 asm volatile("mov %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
3068 /*
3069 * Local variables:
3070 * mode: C
3071 * c-set-style: "BSD"
3072 * c-basic-offset: 4
3073 * tab-width: 4
3074 * indent-tabs-mode: nil
3075 * End:
3076 */