ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 11797:d845c9522d9e

[HVM][SVM] Check if SVM is disabled by the BIOS before enabling it.

Newer BIOS implementations will be able to disable the SVM feature,
although an additional test of an MSR (VMCR 0xC0010114 bit 4) is
necessary (set equals disabled). Bit 4 of MSR 0xc0010114 returns 0
(SVM enabled) on machines with older BIOS' without the SVM disable
feature support.

Signed-off-by: Wei Huang <wei.huang2@amd.com>=20
Signed-off-by: Tom Woller <thomas.woller@amd.com>=20
author kfraser@localhost.localdomain
date Thu Oct 12 16:12:10 2006 +0100 (2006-10-12)
parents 82983c636549
children f14a67a35bec
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005, AMD Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <xen/hypercall.h>
29 #include <xen/domain_page.h>
30 #include <asm/current.h>
31 #include <asm/io.h>
32 #include <asm/shadow.h>
33 #include <asm/regs.h>
34 #include <asm/cpufeature.h>
35 #include <asm/processor.h>
36 #include <asm/types.h>
37 #include <asm/msr.h>
38 #include <asm/spinlock.h>
39 #include <asm/hvm/hvm.h>
40 #include <asm/hvm/support.h>
41 #include <asm/hvm/io.h>
42 #include <asm/hvm/svm/svm.h>
43 #include <asm/hvm/svm/vmcb.h>
44 #include <asm/hvm/svm/emulate.h>
45 #include <asm/hvm/svm/vmmcall.h>
46 #include <asm/hvm/svm/intr.h>
47 #include <asm/x86_emulate.h>
48 #include <public/sched.h>
50 #define SVM_EXTRA_DEBUG
52 #define set_segment_register(name, value) \
53 __asm__ __volatile__ ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
55 /* External functions. We should move these to some suitable header file(s) */
57 extern void do_nmi(struct cpu_user_regs *, unsigned long);
58 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
59 int inst_len);
60 extern uint32_t vlapic_update_ppr(struct vlapic *vlapic);
61 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
62 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
63 unsigned long count, int size, long value, int dir, int pvalid);
64 extern void svm_dump_inst(unsigned long eip);
65 extern int svm_dbg_on;
66 void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
68 static void svm_relinquish_guest_resources(struct domain *d);
69 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
70 struct cpu_user_regs *regs);
72 /* va of hardware host save area */
73 static void *hsa[NR_CPUS] __read_mostly;
75 /* vmcb used for extended host state */
76 static void *root_vmcb[NR_CPUS] __read_mostly;
78 /* physical address of above for host VMSAVE/VMLOAD */
79 u64 root_vmcb_pa[NR_CPUS] __read_mostly;
82 /* ASID API */
83 enum {
84 ASID_AVAILABLE = 0,
85 ASID_INUSE,
86 ASID_RETIRED
87 };
88 #define INITIAL_ASID 0
89 #define ASID_MAX 64
91 struct asid_pool {
92 spinlock_t asid_lock;
93 u32 asid[ASID_MAX];
94 };
96 static DEFINE_PER_CPU(struct asid_pool, asid_pool);
99 /*
100 * Initializes the POOL of ASID used by the guests per core.
101 */
102 void asidpool_init(int core)
103 {
104 int i;
106 spin_lock_init(&per_cpu(asid_pool,core).asid_lock);
108 /* Host ASID is always in use */
109 per_cpu(asid_pool,core).asid[INITIAL_ASID] = ASID_INUSE;
110 for ( i = 1; i < ASID_MAX; i++ )
111 per_cpu(asid_pool,core).asid[i] = ASID_AVAILABLE;
112 }
115 /* internal function to get the next available ASID */
116 static int asidpool_fetch_next(struct vmcb_struct *vmcb, int core)
117 {
118 int i;
119 for ( i = 1; i < ASID_MAX; i++ )
120 {
121 if ( per_cpu(asid_pool,core).asid[i] == ASID_AVAILABLE )
122 {
123 vmcb->guest_asid = i;
124 per_cpu(asid_pool,core).asid[i] = ASID_INUSE;
125 return i;
126 }
127 }
128 return -1;
129 }
132 /*
133 * This functions assigns on the passed VMCB, the next
134 * available ASID number. If none are available, the
135 * TLB flush flag is set, and all retireds ASID
136 * are made available.
137 *
138 * Returns: 1 -- sucess;
139 * 0 -- failure -- no more ASID numbers
140 * available.
141 */
142 int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
143 int oldcore, int newcore )
144 {
145 int i;
146 int res = 1;
147 static unsigned long cnt=0;
149 spin_lock(&per_cpu(asid_pool,oldcore).asid_lock);
150 if( retire_current && vmcb->guest_asid ) {
151 per_cpu(asid_pool,oldcore).asid[vmcb->guest_asid & (ASID_MAX-1)] =
152 ASID_RETIRED;
153 }
154 spin_unlock(&per_cpu(asid_pool,oldcore).asid_lock);
155 spin_lock(&per_cpu(asid_pool,newcore).asid_lock);
156 if( asidpool_fetch_next( vmcb, newcore ) < 0 ) {
157 if (svm_dbg_on)
158 printk( "SVM: tlb(%ld)\n", cnt++ );
159 /* FLUSH the TLB and all retired slots are made available */
160 vmcb->tlb_control = 1;
161 for( i = 1; i < ASID_MAX; i++ ) {
162 if( per_cpu(asid_pool,newcore).asid[i] == ASID_RETIRED ) {
163 per_cpu(asid_pool,newcore).asid[i] = ASID_AVAILABLE;
164 }
165 }
166 /* Get the First slot available */
167 res = asidpool_fetch_next( vmcb, newcore ) > 0;
168 }
169 spin_unlock(&per_cpu(asid_pool,newcore).asid_lock);
170 return res;
171 }
173 void asidpool_retire( struct vmcb_struct *vmcb, int core )
174 {
175 spin_lock(&per_cpu(asid_pool,core).asid_lock);
176 if( vmcb->guest_asid ) {
177 per_cpu(asid_pool,core).asid[vmcb->guest_asid & (ASID_MAX-1)] =
178 ASID_RETIRED;
179 }
180 spin_unlock(&per_cpu(asid_pool,core).asid_lock);
181 }
183 static inline void svm_inject_exception(struct vcpu *v, int trap,
184 int ev, int error_code)
185 {
186 eventinj_t event;
187 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
189 event.bytes = 0;
190 event.fields.v = 1;
191 event.fields.type = EVENTTYPE_EXCEPTION;
192 event.fields.vector = trap;
193 event.fields.ev = ev;
194 event.fields.errorcode = error_code;
196 ASSERT(vmcb->eventinj.fields.v == 0);
198 vmcb->eventinj = event;
199 }
201 static void stop_svm(void)
202 {
203 u32 eax, edx;
204 int cpu = smp_processor_id();
206 /* We turn off the EFER_SVME bit. */
207 rdmsr(MSR_EFER, eax, edx);
208 eax &= ~EFER_SVME;
209 wrmsr(MSR_EFER, eax, edx);
211 /* release the HSA */
212 free_host_save_area(hsa[cpu]);
213 hsa[cpu] = NULL;
214 wrmsr(MSR_K8_VM_HSAVE_PA, 0, 0 );
216 /* free up the root vmcb */
217 free_vmcb(root_vmcb[cpu]);
218 root_vmcb[cpu] = NULL;
219 root_vmcb_pa[cpu] = 0;
221 printk("AMD SVM Extension is disabled.\n");
222 }
225 static void svm_store_cpu_guest_regs(
226 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
227 {
228 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
230 if ( regs != NULL )
231 {
232 regs->eip = vmcb->rip;
233 regs->esp = vmcb->rsp;
234 regs->eflags = vmcb->rflags;
235 regs->cs = vmcb->cs.sel;
236 regs->ds = vmcb->ds.sel;
237 regs->es = vmcb->es.sel;
238 regs->ss = vmcb->ss.sel;
239 regs->gs = vmcb->gs.sel;
240 regs->fs = vmcb->fs.sel;
241 }
243 if ( crs != NULL )
244 {
245 /* Returning the guest's regs */
246 crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
247 crs[2] = v->arch.hvm_svm.cpu_cr2;
248 crs[3] = v->arch.hvm_svm.cpu_cr3;
249 crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
250 }
251 }
253 static int svm_paging_enabled(struct vcpu *v)
254 {
255 unsigned long cr0;
257 cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
259 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
260 }
262 static int svm_pae_enabled(struct vcpu *v)
263 {
264 unsigned long cr4;
266 if(!svm_paging_enabled(v))
267 return 0;
269 cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
271 return (cr4 & X86_CR4_PAE);
272 }
274 #define IS_CANO_ADDRESS(add) 1
276 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
277 {
278 u64 msr_content = 0;
279 struct vcpu *vc = current;
280 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
282 switch (regs->ecx)
283 {
284 case MSR_EFER:
285 msr_content = vmcb->efer;
286 msr_content &= ~EFER_SVME;
287 break;
289 case MSR_FS_BASE:
290 msr_content = vmcb->fs.base;
291 break;
293 case MSR_GS_BASE:
294 msr_content = vmcb->gs.base;
295 break;
297 case MSR_SHADOW_GS_BASE:
298 msr_content = vmcb->kerngsbase;
299 break;
301 case MSR_STAR:
302 msr_content = vmcb->star;
303 break;
305 case MSR_LSTAR:
306 msr_content = vmcb->lstar;
307 break;
309 case MSR_CSTAR:
310 msr_content = vmcb->cstar;
311 break;
313 case MSR_SYSCALL_MASK:
314 msr_content = vmcb->sfmask;
315 break;
316 default:
317 return 0;
318 }
320 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n",
321 msr_content);
323 regs->eax = (u32)(msr_content >> 0);
324 regs->edx = (u32)(msr_content >> 32);
325 return 1;
326 }
328 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
329 {
330 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
331 struct vcpu *vc = current;
332 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
334 HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx "
335 "msr_content %"PRIx64"\n",
336 (unsigned long)regs->ecx, msr_content);
338 switch (regs->ecx)
339 {
340 case MSR_EFER:
341 #ifdef __x86_64__
342 /* offending reserved bit will cause #GP */
343 if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
344 {
345 printk("Trying to set reserved bit in EFER: %"PRIx64"\n",
346 msr_content);
347 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
348 return 0;
349 }
351 /* LME: 0 -> 1 */
352 if ( msr_content & EFER_LME &&
353 !test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state))
354 {
355 if ( svm_paging_enabled(vc) ||
356 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
357 &vc->arch.hvm_svm.cpu_state) )
358 {
359 printk("Trying to set LME bit when "
360 "in paging mode or PAE bit is not set\n");
361 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
362 return 0;
363 }
364 set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);
365 }
367 /* We have already recorded that we want LME, so it will be set
368 * next time CR0 gets updated. So we clear that bit and continue.
369 */
370 if ((msr_content ^ vmcb->efer) & EFER_LME)
371 msr_content &= ~EFER_LME;
372 /* No update for LME/LMA since it have no effect */
373 #endif
374 vmcb->efer = msr_content | EFER_SVME;
375 break;
377 case MSR_FS_BASE:
378 case MSR_GS_BASE:
379 if (!(SVM_LONG_GUEST(vc)))
380 domain_crash_synchronous();
382 if (!IS_CANO_ADDRESS(msr_content))
383 {
384 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
385 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
386 }
388 if (regs->ecx == MSR_FS_BASE)
389 vmcb->fs.base = msr_content;
390 else
391 vmcb->gs.base = msr_content;
392 break;
394 case MSR_SHADOW_GS_BASE:
395 vmcb->kerngsbase = msr_content;
396 break;
398 case MSR_STAR:
399 vmcb->star = msr_content;
400 break;
402 case MSR_LSTAR:
403 vmcb->lstar = msr_content;
404 break;
406 case MSR_CSTAR:
407 vmcb->cstar = msr_content;
408 break;
410 case MSR_SYSCALL_MASK:
411 vmcb->sfmask = msr_content;
412 break;
414 default:
415 return 0;
416 }
417 return 1;
418 }
421 #define loaddebug(_v,_reg) \
422 __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
423 #define savedebug(_v,_reg) \
424 __asm__ __volatile__ ("mov %%db" #_reg ",%0" : : "r" ((_v)->debugreg[_reg]))
427 static inline void svm_save_dr(struct vcpu *v)
428 {
429 if (v->arch.hvm_vcpu.flag_dr_dirty)
430 {
431 /* clear the DR dirty flag and re-enable intercepts for DR accesses */
432 v->arch.hvm_vcpu.flag_dr_dirty = 0;
433 v->arch.hvm_svm.vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES;
435 savedebug(&v->arch.guest_context, 0);
436 savedebug(&v->arch.guest_context, 1);
437 savedebug(&v->arch.guest_context, 2);
438 savedebug(&v->arch.guest_context, 3);
439 }
440 }
443 static inline void __restore_debug_registers(struct vcpu *v)
444 {
445 loaddebug(&v->arch.guest_context, 0);
446 loaddebug(&v->arch.guest_context, 1);
447 loaddebug(&v->arch.guest_context, 2);
448 loaddebug(&v->arch.guest_context, 3);
449 }
452 static inline void svm_restore_dr(struct vcpu *v)
453 {
454 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
456 if (!vmcb)
457 return;
459 if (unlikely(vmcb->dr7 & 0xFF))
460 __restore_debug_registers(v);
461 }
464 static int svm_realmode(struct vcpu *v)
465 {
466 unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
467 unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
469 return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
470 }
472 static int svm_guest_x86_mode(struct vcpu *v)
473 {
474 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
476 if ( vmcb->efer & EFER_LMA )
477 return (vmcb->cs.attributes.fields.l ?
478 X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32);
480 if ( svm_realmode(v) )
481 return X86EMUL_MODE_REAL;
483 return (vmcb->cs.attributes.fields.db ?
484 X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16);
485 }
487 void svm_update_host_cr3(struct vcpu *v)
488 {
489 /* SVM doesn't have a HOST_CR3 equivalent to update. */
490 }
492 unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
493 {
494 switch ( num )
495 {
496 case 0:
497 return v->arch.hvm_svm.cpu_shadow_cr0;
498 case 2:
499 return v->arch.hvm_svm.cpu_cr2;
500 case 3:
501 return v->arch.hvm_svm.cpu_cr3;
502 case 4:
503 return v->arch.hvm_svm.cpu_shadow_cr4;
504 default:
505 BUG();
506 }
507 return 0; /* dummy */
508 }
511 /* Make sure that xen intercepts any FP accesses from current */
512 static void svm_stts(struct vcpu *v)
513 {
514 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
516 /*
517 * If the guest does not have TS enabled then we must cause and handle an
518 * exception on first use of the FPU. If the guest *does* have TS enabled
519 * then this is not necessary: no FPU activity can occur until the guest
520 * clears CR0.TS, and we will initialise the FPU when that happens.
521 */
522 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
523 {
524 v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
525 vmcb->cr0 |= X86_CR0_TS;
526 }
527 }
530 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
531 {
532 v->arch.hvm_svm.vmcb->tsc_offset = offset;
533 }
536 /* SVM-specific intitialization code for VCPU application processors */
537 static void svm_init_ap_context(struct vcpu_guest_context *ctxt,
538 int vcpuid, int trampoline_vector)
539 {
540 int i;
541 struct vcpu *v, *bsp = current;
542 struct domain *d = bsp->domain;
543 cpu_user_regs_t *regs;;
546 if ((v = d->vcpu[vcpuid]) == NULL)
547 {
548 printk("vcpuid %d is invalid! good-bye.\n", vcpuid);
549 domain_crash_synchronous();
550 }
551 regs = &v->arch.guest_context.user_regs;
553 memset(ctxt, 0, sizeof(*ctxt));
554 for (i = 0; i < 256; ++i)
555 {
556 ctxt->trap_ctxt[i].vector = i;
557 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
558 }
561 /*
562 * We execute the trampoline code in real mode. The trampoline vector
563 * passed to us is page alligned and is the physicall frame number for
564 * the code. We will execute this code in real mode.
565 */
566 ctxt->user_regs.eip = 0x0;
567 ctxt->user_regs.cs = (trampoline_vector << 8);
568 ctxt->flags = VGCF_HVM_GUEST;
569 }
571 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
572 {
573 char *p;
574 int i;
576 memset(hypercall_page, 0, PAGE_SIZE);
578 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
579 {
580 p = (char *)(hypercall_page + (i * 32));
581 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
582 *(u32 *)(p + 1) = i;
583 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
584 *(u8 *)(p + 6) = 0x01;
585 *(u8 *)(p + 7) = 0xd9;
586 *(u8 *)(p + 8) = 0xc3; /* ret */
587 }
589 /* Don't support HYPERVISOR_iret at the moment */
590 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
591 }
594 int svm_dbg_on = 0;
596 static inline int svm_do_debugout(unsigned long exit_code)
597 {
598 int i;
600 static unsigned long counter = 0;
601 static unsigned long works[] =
602 {
603 VMEXIT_IOIO,
604 VMEXIT_HLT,
605 VMEXIT_CPUID,
606 VMEXIT_DR0_READ,
607 VMEXIT_DR1_READ,
608 VMEXIT_DR2_READ,
609 VMEXIT_DR3_READ,
610 VMEXIT_DR6_READ,
611 VMEXIT_DR7_READ,
612 VMEXIT_DR0_WRITE,
613 VMEXIT_DR1_WRITE,
614 VMEXIT_DR2_WRITE,
615 VMEXIT_DR3_WRITE,
616 VMEXIT_CR0_READ,
617 VMEXIT_CR0_WRITE,
618 VMEXIT_CR3_READ,
619 VMEXIT_CR4_READ,
620 VMEXIT_MSR,
621 VMEXIT_CR0_WRITE,
622 VMEXIT_CR3_WRITE,
623 VMEXIT_CR4_WRITE,
624 VMEXIT_EXCEPTION_PF,
625 VMEXIT_INTR,
626 VMEXIT_INVLPG,
627 VMEXIT_EXCEPTION_NM
628 };
631 #if 0
632 if (svm_dbg_on && exit_code != 0x7B)
633 return 1;
634 #endif
636 counter++;
638 #if 0
639 if ((exit_code == 0x4E
640 || exit_code == VMEXIT_CR0_READ
641 || exit_code == VMEXIT_CR0_WRITE)
642 && counter < 200000)
643 return 0;
645 if ((exit_code == 0x4E) && counter < 500000)
646 return 0;
647 #endif
649 for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
650 if (exit_code == works[i])
651 return 0;
653 return 1;
654 }
656 static void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
657 {
658 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
660 ASSERT(vmcb);
662 ctxt->eax = vmcb->rax;
663 ctxt->ss = vmcb->ss.sel;
664 ctxt->esp = vmcb->rsp;
665 ctxt->eflags = vmcb->rflags;
666 ctxt->cs = vmcb->cs.sel;
667 ctxt->eip = vmcb->rip;
669 ctxt->gs = vmcb->gs.sel;
670 ctxt->fs = vmcb->fs.sel;
671 ctxt->es = vmcb->es.sel;
672 ctxt->ds = vmcb->ds.sel;
673 }
675 static void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
676 {
677 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
679 regs->eip = vmcb->rip;
680 regs->esp = vmcb->rsp;
681 regs->eflags = vmcb->rflags;
682 regs->cs = vmcb->cs.sel;
683 regs->ds = vmcb->ds.sel;
684 regs->es = vmcb->es.sel;
685 regs->ss = vmcb->ss.sel;
686 }
688 /* XXX Use svm_load_cpu_guest_regs instead */
689 static void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
690 {
691 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
692 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
694 /* Write the guest register value into VMCB */
695 vmcb->rax = regs->eax;
696 vmcb->ss.sel = regs->ss;
697 vmcb->rsp = regs->esp;
698 vmcb->rflags = regs->eflags;
699 vmcb->cs.sel = regs->cs;
700 vmcb->rip = regs->eip;
701 if (regs->eflags & EF_TF)
702 *intercepts |= EXCEPTION_BITMAP_DB;
703 else
704 *intercepts &= ~EXCEPTION_BITMAP_DB;
705 }
707 static void svm_load_cpu_guest_regs(
708 struct vcpu *v, struct cpu_user_regs *regs)
709 {
710 svm_load_cpu_user_regs(v, regs);
711 }
713 int svm_long_mode_enabled(struct vcpu *v)
714 {
715 return SVM_LONG_GUEST(v);
716 }
720 static void arch_svm_do_launch(struct vcpu *v)
721 {
722 cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
723 int error;
725 #if 0
726 if (svm_dbg_on)
727 printk("Do launch\n");
728 #endif
729 error = construct_vmcb(&v->arch.hvm_svm, regs);
730 if ( error < 0 )
731 {
732 if (v->vcpu_id == 0) {
733 printk("Failed to construct a new VMCB for BSP.\n");
734 } else {
735 printk("Failed to construct a new VMCB for AP %d\n", v->vcpu_id);
736 }
737 domain_crash_synchronous();
738 }
740 svm_do_launch(v);
741 #if 0
742 if (svm_dbg_on)
743 svm_dump_host_regs(__func__);
744 #endif
745 if (v->vcpu_id != 0)
746 {
747 u16 cs_sel = regs->cs;
748 /*
749 * This is the launch of an AP; set state so that we begin executing
750 * the trampoline code in real-mode.
751 */
752 svm_do_vmmcall_reset_to_realmode(v, regs);
753 /* Adjust the state to execute the trampoline code.*/
754 v->arch.hvm_svm.vmcb->rip = 0;
755 v->arch.hvm_svm.vmcb->cs.sel= cs_sel;
756 v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4);
757 }
759 reset_stack_and_jump(svm_asm_do_launch);
760 }
762 static void svm_freeze_time(struct vcpu *v)
763 {
764 struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
766 if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
767 v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
768 stop_timer(&(pt->timer));
769 }
770 }
773 static void svm_ctxt_switch_from(struct vcpu *v)
774 {
775 svm_freeze_time(v);
776 svm_save_dr(v);
777 }
779 static void svm_ctxt_switch_to(struct vcpu *v)
780 {
781 #ifdef __x86_64__
782 /*
783 * This is required, because VMRUN does consistency check
784 * and some of the DOM0 selectors are pointing to
785 * invalid GDT locations, and cause AMD processors
786 * to shutdown.
787 */
788 set_segment_register(ds, 0);
789 set_segment_register(es, 0);
790 set_segment_register(ss, 0);
791 #endif
792 svm_restore_dr(v);
793 }
796 static void svm_final_setup_guest(struct vcpu *v)
797 {
798 struct domain *d = v->domain;
800 v->arch.schedule_tail = arch_svm_do_launch;
801 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
802 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
804 if ( v != d->vcpu[0] )
805 return;
807 if ( !shadow_mode_external(d) )
808 {
809 DPRINTK("Can't init HVM for dom %u vcpu %u: "
810 "not in shadow external mode\n", d->domain_id, v->vcpu_id);
811 domain_crash(d);
812 }
814 /*
815 * Required to do this once per domain
816 * TODO: add a seperate function to do these.
817 */
818 memset(&d->shared_info->evtchn_mask[0], 0xff,
819 sizeof(d->shared_info->evtchn_mask));
820 }
823 static int svm_initialize_guest_resources(struct vcpu *v)
824 {
825 svm_final_setup_guest(v);
826 return 1;
827 }
830 int start_svm(void)
831 {
832 u32 eax, ecx, edx;
833 u32 phys_hsa_lo, phys_hsa_hi;
834 u64 phys_hsa;
835 int cpu = smp_processor_id();
837 /* Xen does not fill x86_capability words except 0. */
838 ecx = cpuid_ecx(0x80000001);
839 boot_cpu_data.x86_capability[5] = ecx;
841 if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
842 return 0;
844 /* check whether SVM feature is disabled in BIOS */
845 rdmsr(0xC0010114, eax, edx);
846 if ( eax & 0x00000010 ) {
847 printk("AMD SVM Extension is disabled in BIOS.\n");
848 return 0;
849 }
851 if (!(hsa[cpu] = alloc_host_save_area()))
852 return 0;
854 rdmsr(MSR_EFER, eax, edx);
855 eax |= EFER_SVME;
856 wrmsr(MSR_EFER, eax, edx);
857 asidpool_init( cpu );
858 printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
860 /* Initialize the HSA for this core */
861 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
862 phys_hsa_lo = (u32) phys_hsa;
863 phys_hsa_hi = (u32) (phys_hsa >> 32);
864 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
866 if (!(root_vmcb[cpu] = alloc_vmcb()))
867 return 0;
868 root_vmcb_pa[cpu] = virt_to_maddr(root_vmcb[cpu]);
870 if (cpu == 0)
871 setup_vmcb_dump();
873 /* Setup HVM interfaces */
874 hvm_funcs.disable = stop_svm;
876 hvm_funcs.initialize_guest_resources = svm_initialize_guest_resources;
877 hvm_funcs.relinquish_guest_resources = svm_relinquish_guest_resources;
879 hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
880 hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
882 hvm_funcs.realmode = svm_realmode;
883 hvm_funcs.paging_enabled = svm_paging_enabled;
884 hvm_funcs.long_mode_enabled = svm_long_mode_enabled;
885 hvm_funcs.pae_enabled = svm_pae_enabled;
886 hvm_funcs.guest_x86_mode = svm_guest_x86_mode;
887 hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
889 hvm_funcs.update_host_cr3 = svm_update_host_cr3;
891 hvm_funcs.stts = svm_stts;
892 hvm_funcs.set_tsc_offset = svm_set_tsc_offset;
894 hvm_funcs.init_ap_context = svm_init_ap_context;
895 hvm_funcs.init_hypercall_page = svm_init_hypercall_page;
897 hvm_enabled = 1;
899 return 1;
900 }
903 static void svm_relinquish_guest_resources(struct domain *d)
904 {
905 struct vcpu *v;
907 for_each_vcpu ( d, v )
908 {
909 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
910 continue;
912 destroy_vmcb(&v->arch.hvm_svm);
913 kill_timer(&v->arch.hvm_vcpu.hlt_timer);
914 if ( VLAPIC(v) != NULL )
915 {
916 kill_timer(&VLAPIC(v)->vlapic_timer);
917 unmap_domain_page_global(VLAPIC(v)->regs);
918 free_domheap_page(VLAPIC(v)->regs_page);
919 xfree(VLAPIC(v));
920 }
921 hvm_release_assist_channel(v);
922 }
924 kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
926 if ( d->arch.hvm_domain.shared_page_va )
927 unmap_domain_page_global(
928 (void *)d->arch.hvm_domain.shared_page_va);
930 if ( d->arch.hvm_domain.buffered_io_va )
931 unmap_domain_page_global((void *)d->arch.hvm_domain.buffered_io_va);
932 }
935 static void svm_migrate_timers(struct vcpu *v)
936 {
937 struct periodic_time *pt =
938 &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
940 if ( pt->enabled )
941 {
942 migrate_timer(&pt->timer, v->processor);
943 migrate_timer(&v->arch.hvm_vcpu.hlt_timer, v->processor);
944 }
945 if ( VLAPIC(v) != NULL )
946 migrate_timer(&VLAPIC(v)->vlapic_timer, v->processor);
947 }
950 void arch_svm_do_resume(struct vcpu *v)
951 {
952 /* pinning VCPU to a different core? */
953 if ( v->arch.hvm_svm.launch_core == smp_processor_id()) {
954 hvm_do_resume( v );
955 reset_stack_and_jump( svm_asm_do_resume );
956 }
957 else {
958 if (svm_dbg_on)
959 printk("VCPU core pinned: %d to %d\n",
960 v->arch.hvm_svm.launch_core, smp_processor_id() );
961 v->arch.hvm_svm.launch_core = smp_processor_id();
962 svm_migrate_timers( v );
963 hvm_do_resume( v );
964 reset_stack_and_jump( svm_asm_do_resume );
965 }
966 }
970 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
971 {
972 struct vcpu *v = current;
973 unsigned long eip;
974 int result;
975 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
977 ASSERT(vmcb);
979 //#if HVM_DEBUG
980 eip = vmcb->rip;
981 HVM_DBG_LOG(DBG_LEVEL_VMMU,
982 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
983 va, eip, (unsigned long)regs->error_code);
984 //#endif
986 result = shadow_fault(va, regs);
988 if( result ) {
989 /* Let's make sure that the Guest TLB is flushed */
990 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
991 }
993 return result;
994 }
997 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
998 {
999 struct vcpu *v = current;
1001 setup_fpu(v);
1002 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1004 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
1005 vmcb->cr0 &= ~X86_CR0_TS;
1009 static void svm_do_general_protection_fault(struct vcpu *v,
1010 struct cpu_user_regs *regs)
1012 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1013 unsigned long eip, error_code;
1015 ASSERT(vmcb);
1017 eip = vmcb->rip;
1018 error_code = vmcb->exitinfo1;
1020 if (vmcb->idtr.limit == 0) {
1021 printf("Huh? We got a GP Fault with an invalid IDTR!\n");
1022 svm_dump_vmcb(__func__, vmcb);
1023 svm_dump_regs(__func__, regs);
1024 svm_dump_inst(vmcb->rip);
1025 __hvm_bug(regs);
1028 HVM_DBG_LOG(DBG_LEVEL_1,
1029 "svm_general_protection_fault: eip = %lx, erro_code = %lx",
1030 eip, error_code);
1032 HVM_DBG_LOG(DBG_LEVEL_1,
1033 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1034 (unsigned long)regs->eax, (unsigned long)regs->ebx,
1035 (unsigned long)regs->ecx, (unsigned long)regs->edx,
1036 (unsigned long)regs->esi, (unsigned long)regs->edi);
1038 /* Reflect it back into the guest */
1039 svm_inject_exception(v, TRAP_gp_fault, 1, error_code);
1042 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
1043 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
1044 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
1045 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
1047 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input,
1048 struct cpu_user_regs *regs)
1050 unsigned int eax, ebx, ecx, edx;
1051 unsigned long eip;
1052 struct vcpu *v = current;
1053 int inst_len;
1055 ASSERT(vmcb);
1057 eip = vmcb->rip;
1059 HVM_DBG_LOG(DBG_LEVEL_1,
1060 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
1061 " (esi) %lx, (edi) %lx",
1062 (unsigned long)regs->eax, (unsigned long)regs->ebx,
1063 (unsigned long)regs->ecx, (unsigned long)regs->edx,
1064 (unsigned long)regs->esi, (unsigned long)regs->edi);
1066 if ( !cpuid_hypervisor_leaves(input, &eax, &ebx, &ecx, &edx) )
1068 cpuid(input, &eax, &ebx, &ecx, &edx);
1069 if (input == 0x00000001 || input == 0x80000001 )
1071 if ( !hvm_apic_support(v->domain) ||
1072 !vlapic_global_enabled((VLAPIC(v))) )
1074 /* Since the apic is disabled, avoid any confusion
1075 about SMP cpus being available */
1076 clear_bit(X86_FEATURE_APIC, &edx);
1078 #if CONFIG_PAGING_LEVELS >= 3
1079 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
1080 #endif
1082 clear_bit(X86_FEATURE_PAE, &edx);
1083 if (input == 0x80000001 )
1084 clear_bit(X86_FEATURE_NX & 31, &edx);
1086 clear_bit(X86_FEATURE_PSE36, &edx);
1087 if (input == 0x00000001 )
1089 /* Clear out reserved bits. */
1090 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
1091 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
1093 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
1095 /* Guest should only see one logical processor.
1096 * See details on page 23 of AMD CPUID Specification.
1097 */
1098 clear_bit(X86_FEATURE_HT, &edx); /* clear the hyperthread bit */
1099 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
1100 ebx |= 0x00010000; /* set to 1 just for precaution */
1102 else
1104 /* Clear the Cmp_Legacy bit
1105 * This bit is supposed to be zero when HTT = 0.
1106 * See details on page 23 of AMD CPUID Specification.
1107 */
1108 clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx);
1109 /* Make SVM feature invisible to the guest. */
1110 clear_bit(X86_FEATURE_SVME & 31, &ecx);
1111 #ifdef __i386__
1112 /* Mask feature for Intel ia32e or AMD long mode. */
1113 clear_bit(X86_FEATURE_LAHF_LM & 31, &ecx);
1115 clear_bit(X86_FEATURE_LM & 31, &edx);
1116 clear_bit(X86_FEATURE_SYSCALL & 31, &edx);
1117 #endif
1118 /* So far, we do not support 3DNow for the guest. */
1119 clear_bit(X86_FEATURE_3DNOW & 31, &edx);
1120 clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx);
1123 else if ( ( input == 0x80000007 ) || ( input == 0x8000000A ) )
1125 /* Mask out features of power management and SVM extension. */
1126 eax = ebx = ecx = edx = 0;
1128 else if ( input == 0x80000008 )
1130 /* Make sure Number of CPU core is 1 when HTT=0 */
1131 ecx &= 0xFFFFFF00;
1135 regs->eax = (unsigned long)eax;
1136 regs->ebx = (unsigned long)ebx;
1137 regs->ecx = (unsigned long)ecx;
1138 regs->edx = (unsigned long)edx;
1140 HVM_DBG_LOG(DBG_LEVEL_1,
1141 "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
1142 "ebx=%x, ecx=%x, edx=%x",
1143 eip, input, eax, ebx, ecx, edx);
1145 inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
1146 ASSERT(inst_len > 0);
1147 __update_guest_eip(vmcb, inst_len);
1151 static inline unsigned long *get_reg_p(unsigned int gpreg,
1152 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1154 unsigned long *reg_p = NULL;
1155 switch (gpreg)
1157 case SVM_REG_EAX:
1158 reg_p = (unsigned long *)&regs->eax;
1159 break;
1160 case SVM_REG_EBX:
1161 reg_p = (unsigned long *)&regs->ebx;
1162 break;
1163 case SVM_REG_ECX:
1164 reg_p = (unsigned long *)&regs->ecx;
1165 break;
1166 case SVM_REG_EDX:
1167 reg_p = (unsigned long *)&regs->edx;
1168 break;
1169 case SVM_REG_EDI:
1170 reg_p = (unsigned long *)&regs->edi;
1171 break;
1172 case SVM_REG_ESI:
1173 reg_p = (unsigned long *)&regs->esi;
1174 break;
1175 case SVM_REG_EBP:
1176 reg_p = (unsigned long *)&regs->ebp;
1177 break;
1178 case SVM_REG_ESP:
1179 reg_p = (unsigned long *)&vmcb->rsp;
1180 break;
1181 #ifdef __x86_64__
1182 case SVM_REG_R8:
1183 reg_p = (unsigned long *)&regs->r8;
1184 break;
1185 case SVM_REG_R9:
1186 reg_p = (unsigned long *)&regs->r9;
1187 break;
1188 case SVM_REG_R10:
1189 reg_p = (unsigned long *)&regs->r10;
1190 break;
1191 case SVM_REG_R11:
1192 reg_p = (unsigned long *)&regs->r11;
1193 break;
1194 case SVM_REG_R12:
1195 reg_p = (unsigned long *)&regs->r12;
1196 break;
1197 case SVM_REG_R13:
1198 reg_p = (unsigned long *)&regs->r13;
1199 break;
1200 case SVM_REG_R14:
1201 reg_p = (unsigned long *)&regs->r14;
1202 break;
1203 case SVM_REG_R15:
1204 reg_p = (unsigned long *)&regs->r15;
1205 break;
1206 #endif
1207 default:
1208 BUG();
1211 return reg_p;
1215 static inline unsigned long get_reg(unsigned int gpreg,
1216 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1218 unsigned long *gp;
1219 gp = get_reg_p(gpreg, regs, vmcb);
1220 return *gp;
1224 static inline void set_reg(unsigned int gpreg, unsigned long value,
1225 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1227 unsigned long *gp;
1228 gp = get_reg_p(gpreg, regs, vmcb);
1229 *gp = value;
1233 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
1235 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1237 v->arch.hvm_vcpu.flag_dr_dirty = 1;
1239 __restore_debug_registers(v);
1241 /* allow the guest full access to the debug registers */
1242 vmcb->dr_intercepts = 0;
1246 static void svm_get_prefix_info(
1247 struct vmcb_struct *vmcb,
1248 unsigned int dir, segment_selector_t **seg, unsigned int *asize)
1250 unsigned char inst[MAX_INST_LEN];
1251 int i;
1253 memset(inst, 0, MAX_INST_LEN);
1254 if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
1255 != MAX_INST_LEN)
1257 printk("%s: get guest instruction failed\n", __func__);
1258 domain_crash_synchronous();
1261 for (i = 0; i < MAX_INST_LEN; i++)
1263 switch (inst[i])
1265 case 0xf3: /* REPZ */
1266 case 0xf2: /* REPNZ */
1267 case 0xf0: /* LOCK */
1268 case 0x66: /* data32 */
1269 #ifdef __x86_64__
1270 /* REX prefixes */
1271 case 0x40:
1272 case 0x41:
1273 case 0x42:
1274 case 0x43:
1275 case 0x44:
1276 case 0x45:
1277 case 0x46:
1278 case 0x47:
1280 case 0x48:
1281 case 0x49:
1282 case 0x4a:
1283 case 0x4b:
1284 case 0x4c:
1285 case 0x4d:
1286 case 0x4e:
1287 case 0x4f:
1288 #endif
1289 continue;
1290 case 0x67: /* addr32 */
1291 *asize ^= 48; /* Switch 16/32 bits */
1292 continue;
1293 case 0x2e: /* CS */
1294 *seg = &vmcb->cs;
1295 continue;
1296 case 0x36: /* SS */
1297 *seg = &vmcb->ss;
1298 continue;
1299 case 0x26: /* ES */
1300 *seg = &vmcb->es;
1301 continue;
1302 case 0x64: /* FS */
1303 *seg = &vmcb->fs;
1304 continue;
1305 case 0x65: /* GS */
1306 *seg = &vmcb->gs;
1307 continue;
1308 case 0x3e: /* DS */
1309 *seg = &vmcb->ds;
1310 continue;
1311 default:
1312 break;
1314 return;
1319 /* Get the address of INS/OUTS instruction */
1320 static inline int svm_get_io_address(
1321 struct vcpu *v,
1322 struct cpu_user_regs *regs, unsigned int dir,
1323 unsigned long *count, unsigned long *addr)
1325 unsigned long reg;
1326 unsigned int asize = 0;
1327 unsigned int isize;
1328 int long_mode;
1329 ioio_info_t info;
1330 segment_selector_t *seg = NULL;
1331 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1333 info.bytes = vmcb->exitinfo1;
1335 /* If we're in long mode, we shouldn't check the segment presence & limit */
1336 long_mode = vmcb->cs.attributes.fields.l && vmcb->efer & EFER_LMA;
1338 /* d field of cs.attributes is 1 for 32-bit, 0 for 16 or 64 bit.
1339 * l field combined with EFER_LMA -> longmode says whether it's 16 or 64 bit.
1340 */
1341 asize = (long_mode)?64:((vmcb->cs.attributes.fields.db)?32:16);
1344 /* The ins/outs instructions are single byte, so if we have got more
1345 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1346 * to figure out what it is...
1347 */
1348 isize = vmcb->exitinfo2 - vmcb->rip;
1350 if (info.fields.rep)
1351 isize --;
1353 if (isize > 1)
1355 svm_get_prefix_info(vmcb, dir, &seg, &asize);
1358 ASSERT(dir == IOREQ_READ || dir == IOREQ_WRITE);
1360 if (dir == IOREQ_WRITE)
1362 reg = regs->esi;
1363 if (!seg) /* If no prefix, used DS. */
1364 seg = &vmcb->ds;
1366 else
1368 reg = regs->edi;
1369 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1372 /* If the segment isn't present, give GP fault! */
1373 if (!long_mode && !seg->attributes.fields.p)
1375 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1376 return 0;
1379 if (asize == 16)
1381 *addr = (reg & 0xFFFF);
1382 *count = regs->ecx & 0xffff;
1384 else
1386 *addr = reg;
1387 *count = regs->ecx;
1390 if (!long_mode) {
1391 if (*addr > seg->limit)
1393 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1394 return 0;
1396 else
1398 *addr += seg->base;
1403 return 1;
1407 static void svm_io_instruction(struct vcpu *v)
1409 struct cpu_user_regs *regs;
1410 struct hvm_io_op *pio_opp;
1411 unsigned int port;
1412 unsigned int size, dir;
1413 ioio_info_t info;
1414 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1416 ASSERT(vmcb);
1417 pio_opp = &current->arch.hvm_vcpu.io_op;
1418 pio_opp->instr = INSTR_PIO;
1419 pio_opp->flags = 0;
1421 regs = &pio_opp->io_context;
1423 /* Copy current guest state into io instruction state structure. */
1424 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1425 hvm_store_cpu_guest_regs(v, regs, NULL);
1427 info.bytes = vmcb->exitinfo1;
1429 port = info.fields.port; /* port used to be addr */
1430 dir = info.fields.type; /* direction */
1431 if (info.fields.sz32)
1432 size = 4;
1433 else if (info.fields.sz16)
1434 size = 2;
1435 else
1436 size = 1;
1438 HVM_DBG_LOG(DBG_LEVEL_IO,
1439 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1440 "exit_qualification = %"PRIx64,
1441 port, vmcb->cs.sel, vmcb->rip, info.bytes);
1443 /* string instruction */
1444 if (info.fields.str)
1446 unsigned long addr, count;
1447 int sign = regs->eflags & EF_DF ? -1 : 1;
1449 if (!svm_get_io_address(v, regs, dir, &count, &addr))
1451 /* We failed to get a valid address, so don't do the IO operation -
1452 * it would just get worse if we do! Hopefully the guest is handing
1453 * gp-faults...
1454 */
1455 return;
1458 /* "rep" prefix */
1459 if (info.fields.rep)
1461 pio_opp->flags |= REPZ;
1463 else
1465 count = 1;
1468 /*
1469 * Handle string pio instructions that cross pages or that
1470 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1471 */
1472 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1474 unsigned long value = 0;
1476 pio_opp->flags |= OVERLAP;
1478 if (dir == IOREQ_WRITE)
1479 (void)hvm_copy_from_guest_virt(&value, addr, size);
1481 send_pio_req(regs, port, 1, size, value, dir, 0);
1483 else
1485 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK))
1487 if (sign > 0)
1488 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1489 else
1490 count = (addr & ~PAGE_MASK) / size;
1492 else
1493 regs->eip = vmcb->exitinfo2;
1495 send_pio_req(regs, port, count, size, addr, dir, 1);
1498 else
1500 /*
1501 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1502 * ExitInfo2
1503 */
1504 regs->eip = vmcb->exitinfo2;
1506 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1507 hvm_print_line(v, regs->eax); /* guest debug output */
1509 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
1513 static int svm_set_cr0(unsigned long value)
1515 struct vcpu *v = current;
1516 unsigned long mfn;
1517 int paging_enabled;
1518 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1519 unsigned long old_base_mfn;
1521 ASSERT(vmcb);
1523 /* We don't want to lose PG. ET is reserved and should be always be 1*/
1524 paging_enabled = svm_paging_enabled(v);
1525 value |= X86_CR0_ET;
1526 vmcb->cr0 = value | X86_CR0_PG;
1527 v->arch.hvm_svm.cpu_shadow_cr0 = value;
1529 /* TS cleared? Then initialise FPU now. */
1530 if ( !(value & X86_CR0_TS) )
1532 setup_fpu(v);
1533 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1536 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1538 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
1540 /* The guest CR3 must be pointing to the guest physical. */
1541 if (!VALID_MFN(mfn =
1542 get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))
1543 || !get_page(mfn_to_page(mfn), v->domain))
1545 printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3);
1546 domain_crash_synchronous(); /* need to take a clean path */
1549 #if defined(__x86_64__)
1550 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
1551 && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
1552 &v->arch.hvm_svm.cpu_state))
1554 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1555 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1558 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
1560 /* Here the PAE is should to be opened */
1561 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1562 set_bit(SVM_CPU_STATE_LMA_ENABLED,
1563 &v->arch.hvm_svm.cpu_state);
1564 vmcb->efer |= (EFER_LMA | EFER_LME);
1566 #endif /* __x86_64__ */
1568 /* Now arch.guest_table points to machine physical. */
1569 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1570 v->arch.guest_table = pagetable_from_pfn(mfn);
1571 if ( old_base_mfn )
1572 put_page(mfn_to_page(old_base_mfn));
1573 shadow_update_paging_modes(v);
1575 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1576 (unsigned long) (mfn << PAGE_SHIFT));
1578 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1579 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1582 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1583 if ( v->arch.hvm_svm.cpu_cr3 ) {
1584 put_page(mfn_to_page(get_mfn_from_gpfn(
1585 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
1586 v->arch.guest_table = pagetable_null();
1589 /*
1590 * SVM implements paged real-mode and when we return to real-mode
1591 * we revert back to the physical mappings that the domain builder
1592 * created.
1593 */
1594 if ((value & X86_CR0_PE) == 0) {
1595 if (value & X86_CR0_PG) {
1596 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1597 return 0;
1599 shadow_update_paging_modes(v);
1600 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1601 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1603 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
1605 /* we should take care of this kind of situation */
1606 shadow_update_paging_modes(v);
1607 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1608 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1611 return 1;
1614 /*
1615 * Read from control registers. CR0 and CR4 are read from the shadow.
1616 */
1617 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1619 unsigned long value = 0;
1620 struct vcpu *v = current;
1621 struct vlapic *vlapic = VLAPIC(v);
1622 struct vmcb_struct *vmcb;
1624 vmcb = v->arch.hvm_svm.vmcb;
1625 ASSERT(vmcb);
1627 switch (cr)
1629 case 0:
1630 value = v->arch.hvm_svm.cpu_shadow_cr0;
1631 if (svm_dbg_on)
1632 printk("CR0 read =%lx \n", value );
1633 break;
1634 case 2:
1635 value = vmcb->cr2;
1636 break;
1637 case 3:
1638 value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
1639 if (svm_dbg_on)
1640 printk("CR3 read =%lx \n", value );
1641 break;
1642 case 4:
1643 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
1644 if (svm_dbg_on)
1645 printk("CR4 read=%lx\n", value);
1646 break;
1647 case 8:
1648 if ( vlapic == NULL )
1649 break;
1650 value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
1651 value = (value & 0xF0) >> 4;
1652 break;
1654 default:
1655 __hvm_bug(regs);
1658 set_reg(gp, value, regs, vmcb);
1660 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1664 static inline int svm_pgbit_test(struct vcpu *v)
1666 return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
1670 /*
1671 * Write to control registers
1672 */
1673 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1675 unsigned long value;
1676 unsigned long old_cr;
1677 struct vcpu *v = current;
1678 struct vlapic *vlapic = VLAPIC(v);
1679 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1681 ASSERT(vmcb);
1683 value = get_reg(gpreg, regs, vmcb);
1685 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1686 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1688 switch (cr)
1690 case 0:
1691 if (svm_dbg_on)
1692 printk("CR0 write =%lx \n", value );
1693 return svm_set_cr0(value);
1695 case 3:
1697 unsigned long old_base_mfn, mfn;
1698 if (svm_dbg_on)
1699 printk("CR3 write =%lx \n", value );
1700 /* If paging is not enabled yet, simply copy the value to CR3. */
1701 if (!svm_paging_enabled(v)) {
1702 v->arch.hvm_svm.cpu_cr3 = value;
1703 break;
1705 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1707 /* We make a new one if the shadow does not exist. */
1708 if (value == v->arch.hvm_svm.cpu_cr3)
1710 /*
1711 * This is simple TLB flush, implying the guest has
1712 * removed some translation or changed page attributes.
1713 * We simply invalidate the shadow.
1714 */
1715 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1716 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1717 __hvm_bug(regs);
1718 shadow_update_cr3(v);
1720 else
1722 /*
1723 * If different, make a shadow. Check if the PDBR is valid
1724 * first.
1725 */
1726 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1727 if (((value >> PAGE_SHIFT) > v->domain->max_pages)
1728 || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT))
1729 || !get_page(mfn_to_page(mfn), v->domain))
1731 printk("Invalid CR3 value=%lx\n", value);
1732 domain_crash_synchronous(); /* need to take a clean path */
1735 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1736 v->arch.guest_table = pagetable_from_pfn(mfn);
1738 if (old_base_mfn)
1739 put_page(mfn_to_page(old_base_mfn));
1741 /*
1742 * arch.shadow_table should now hold the next CR3 for shadow
1743 */
1744 v->arch.hvm_svm.cpu_cr3 = value;
1745 update_cr3(v);
1746 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1747 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1749 break;
1752 case 4: /* CR4 */
1754 if (svm_dbg_on)
1755 printk( "write cr4=%lx, cr0=%lx\n",
1756 value, v->arch.hvm_svm.cpu_shadow_cr0 );
1757 old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
1758 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1760 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1761 if ( svm_pgbit_test(v) )
1763 /* The guest is a 32-bit PAE guest. */
1764 #if CONFIG_PAGING_LEVELS >= 3
1765 unsigned long mfn, old_base_mfn;
1767 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1768 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) ||
1769 !get_page(mfn_to_page(mfn), v->domain) )
1771 printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3);
1772 domain_crash_synchronous(); /* need to take a clean path */
1775 /*
1776 * Now arch.guest_table points to machine physical.
1777 */
1779 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1780 v->arch.guest_table = pagetable_from_pfn(mfn);
1781 if ( old_base_mfn )
1782 put_page(mfn_to_page(old_base_mfn));
1783 shadow_update_paging_modes(v);
1785 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1786 (unsigned long) (mfn << PAGE_SHIFT));
1788 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1790 /*
1791 * arch->shadow_table should hold the next CR3 for shadow
1792 */
1794 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1795 "Update CR3 value = %lx, mfn = %lx",
1796 v->arch.hvm_svm.cpu_cr3, mfn);
1797 #endif
1800 else if (value & X86_CR4_PAE) {
1801 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1802 } else {
1803 if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
1804 &v->arch.hvm_svm.cpu_state)) {
1805 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1807 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1810 v->arch.hvm_svm.cpu_shadow_cr4 = value;
1811 vmcb->cr4 = value | SVM_CR4_HOST_MASK;
1813 /*
1814 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1815 * all TLB entries except global entries.
1816 */
1817 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
1819 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1820 shadow_update_paging_modes(v);
1822 break;
1825 case 8:
1827 if ( vlapic == NULL )
1828 break;
1829 vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
1830 vlapic_update_ppr(vlapic);
1831 break;
1834 default:
1835 printk("invalid cr: %d\n", cr);
1836 __hvm_bug(regs);
1839 return 1;
1843 #define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
1846 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
1847 struct cpu_user_regs *regs)
1849 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1850 int inst_len = 0;
1851 int index;
1852 unsigned int gpreg;
1853 unsigned long value;
1854 u8 buffer[MAX_INST_LEN];
1855 u8 prefix = 0;
1856 int result = 1;
1857 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1858 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1859 enum instruction_index match;
1861 ASSERT(vmcb);
1863 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1865 /* get index to first actual instruction byte - as we will need to know
1866 where the prefix lives later on */
1867 index = skip_prefix_bytes(buffer, sizeof(buffer));
1869 if ( type == TYPE_MOV_TO_CR )
1871 inst_len = __get_instruction_length_from_list(
1872 vmcb, list_a, ARR_SIZE(list_a), &buffer[index], &match);
1874 else /* type == TYPE_MOV_FROM_CR */
1876 inst_len = __get_instruction_length_from_list(
1877 vmcb, list_b, ARR_SIZE(list_b), &buffer[index], &match);
1880 ASSERT(inst_len > 0);
1882 inst_len += index;
1884 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1885 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1886 prefix = buffer[index-1];
1888 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
1890 switch (match)
1892 case INSTR_MOV2CR:
1893 gpreg = decode_src_reg(prefix, buffer[index+2]);
1894 result = mov_to_cr(gpreg, cr, regs);
1895 break;
1897 case INSTR_MOVCR2:
1898 gpreg = decode_src_reg(prefix, buffer[index+2]);
1899 mov_from_cr(cr, gpreg, regs);
1900 break;
1902 case INSTR_CLTS:
1903 /* TS being cleared means that it's time to restore fpu state. */
1904 setup_fpu(current);
1905 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1906 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1907 v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
1908 break;
1910 case INSTR_LMSW:
1911 if (svm_dbg_on)
1912 svm_dump_inst(svm_rip2pointer(vmcb));
1914 gpreg = decode_src_reg(prefix, buffer[index+2]);
1915 value = get_reg(gpreg, regs, vmcb) & 0xF;
1917 if (svm_dbg_on)
1918 printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1919 inst_len);
1921 value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
1923 if (svm_dbg_on)
1924 printk("CR0-LMSW CR0 - New value=%lx\n", value);
1926 result = svm_set_cr0(value);
1927 break;
1929 case INSTR_SMSW:
1930 if (svm_dbg_on)
1931 svm_dump_inst(svm_rip2pointer(vmcb));
1932 value = v->arch.hvm_svm.cpu_shadow_cr0;
1933 gpreg = decode_src_reg(prefix, buffer[index+2]);
1934 set_reg(gpreg, value, regs, vmcb);
1936 if (svm_dbg_on)
1937 printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1938 inst_len);
1939 break;
1941 default:
1942 __hvm_bug(regs);
1943 break;
1946 ASSERT(inst_len);
1948 __update_guest_eip(vmcb, inst_len);
1950 return result;
1953 static inline void svm_do_msr_access(
1954 struct vcpu *v, struct cpu_user_regs *regs)
1956 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1957 int inst_len;
1958 u64 msr_content=0;
1959 u32 eax, edx;
1961 ASSERT(vmcb);
1963 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, "
1964 "exitinfo = %lx", (unsigned long)regs->ecx,
1965 (unsigned long)regs->eax, (unsigned long)regs->edx,
1966 (unsigned long)vmcb->exitinfo1);
1968 /* is it a read? */
1969 if (vmcb->exitinfo1 == 0)
1971 inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
1973 regs->edx = 0;
1974 switch (regs->ecx) {
1975 case MSR_IA32_TIME_STAMP_COUNTER:
1976 msr_content = hvm_get_guest_time(v);
1977 break;
1978 case MSR_IA32_SYSENTER_CS:
1979 msr_content = vmcb->sysenter_cs;
1980 break;
1981 case MSR_IA32_SYSENTER_ESP:
1982 msr_content = vmcb->sysenter_esp;
1983 break;
1984 case MSR_IA32_SYSENTER_EIP:
1985 msr_content = vmcb->sysenter_eip;
1986 break;
1987 case MSR_IA32_APICBASE:
1988 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
1989 break;
1990 default:
1991 if (long_mode_do_msr_read(regs))
1992 goto done;
1994 if ( rdmsr_hypervisor_regs(regs->ecx, &eax, &edx) )
1996 regs->eax = eax;
1997 regs->edx = edx;
1998 goto done;
2001 rdmsr_safe(regs->ecx, regs->eax, regs->edx);
2002 break;
2004 regs->eax = msr_content & 0xFFFFFFFF;
2005 regs->edx = msr_content >> 32;
2007 else
2009 inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
2010 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
2012 switch (regs->ecx)
2014 case MSR_IA32_TIME_STAMP_COUNTER:
2015 hvm_set_guest_time(v, msr_content);
2016 break;
2017 case MSR_IA32_SYSENTER_CS:
2018 vmcb->sysenter_cs = msr_content;
2019 break;
2020 case MSR_IA32_SYSENTER_ESP:
2021 vmcb->sysenter_esp = msr_content;
2022 break;
2023 case MSR_IA32_SYSENTER_EIP:
2024 vmcb->sysenter_eip = msr_content;
2025 break;
2026 case MSR_IA32_APICBASE:
2027 vlapic_msr_set(VLAPIC(v), msr_content);
2028 break;
2029 default:
2030 if ( !long_mode_do_msr_write(regs) )
2031 wrmsr_hypervisor_regs(regs->ecx, regs->eax, regs->edx);
2032 break;
2036 done:
2038 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: "
2039 "ecx=%lx, eax=%lx, edx=%lx",
2040 (unsigned long)regs->ecx, (unsigned long)regs->eax,
2041 (unsigned long)regs->edx);
2043 __update_guest_eip(vmcb, inst_len);
2047 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
2049 __update_guest_eip(vmcb, 1);
2051 /* Check for interrupt not handled or new interrupt. */
2052 if ( (vmcb->rflags & X86_EFLAGS_IF) &&
2053 (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) )
2054 return;
2056 hvm_hlt(vmcb->rflags);
2060 static void svm_vmexit_do_invd(struct vmcb_struct *vmcb)
2062 int inst_len;
2064 /* Invalidate the cache - we can't really do that safely - maybe we should
2065 * WBINVD, but I think it's just fine to completely ignore it - we should
2066 * have cache-snooping that solves it anyways. -- Mats P.
2067 */
2069 /* Tell the user that we did this - just in case someone runs some really
2070 * weird operating system and wants to know why it's not working...
2071 */
2072 printk("INVD instruction intercepted - ignored\n");
2074 inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL);
2075 __update_guest_eip(vmcb, inst_len);
2081 #ifdef XEN_DEBUGGER
2082 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb,
2083 struct cpu_user_regs *regs)
2085 regs->eip = vmcb->rip;
2086 regs->esp = vmcb->rsp;
2087 regs->eflags = vmcb->rflags;
2089 regs->xcs = vmcb->cs.sel;
2090 regs->xds = vmcb->ds.sel;
2091 regs->xes = vmcb->es.sel;
2092 regs->xfs = vmcb->fs.sel;
2093 regs->xgs = vmcb->gs.sel;
2094 regs->xss = vmcb->ss.sel;
2098 static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
2100 vmcb->ss.sel = regs->xss;
2101 vmcb->rsp = regs->esp;
2102 vmcb->rflags = regs->eflags;
2103 vmcb->cs.sel = regs->xcs;
2104 vmcb->rip = regs->eip;
2106 vmcb->gs.sel = regs->xgs;
2107 vmcb->fs.sel = regs->xfs;
2108 vmcb->es.sel = regs->xes;
2109 vmcb->ds.sel = regs->xds;
2111 #endif
2114 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
2116 struct vcpu *v = current;
2117 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
2118 unsigned long g_vaddr;
2119 int inst_len;
2120 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2122 ASSERT(vmcb);
2123 /*
2124 * Unknown how many bytes the invlpg instruction will take. Use the
2125 * maximum instruction length here
2126 */
2127 if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
2129 printk("svm_handle_invlpg (): Error reading memory %d bytes\n",
2130 length);
2131 __hvm_bug(regs);
2134 if (invlpga)
2136 inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
2137 ASSERT(inst_len > 0);
2138 __update_guest_eip(vmcb, inst_len);
2140 /*
2141 * The address is implicit on this instruction. At the moment, we don't
2142 * use ecx (ASID) to identify individual guests pages
2143 */
2144 g_vaddr = regs->eax;
2146 else
2148 /* What about multiple prefix codes? */
2149 prefix = (is_prefix(opcode[0])?opcode[0]:0);
2150 inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
2151 ASSERT(inst_len > 0);
2153 inst_len--;
2154 length -= inst_len;
2156 /*
2157 * Decode memory operand of the instruction including ModRM, SIB, and
2158 * displacement to get effecticve address and length in bytes. Assume
2159 * the system in either 32- or 64-bit mode.
2160 */
2161 g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix,
2162 &opcode[inst_len], &length);
2164 inst_len += length;
2165 __update_guest_eip (vmcb, inst_len);
2168 /* Overkill, we may not this */
2169 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
2170 shadow_invlpg(v, g_vaddr);
2174 /*
2175 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2176 * 16-bit realmode. Basically, this mimics a processor reset.
2178 * returns 0 on success, non-zero otherwise
2179 */
2180 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
2181 struct cpu_user_regs *regs)
2183 struct vmcb_struct *vmcb;
2185 ASSERT(v);
2186 ASSERT(regs);
2188 vmcb = v->arch.hvm_svm.vmcb;
2190 ASSERT(vmcb);
2192 /* clear the vmcb and user regs */
2193 memset(regs, 0, sizeof(struct cpu_user_regs));
2195 /* VMCB Control */
2196 vmcb->tsc_offset = 0;
2198 /* VMCB State */
2199 vmcb->cr0 = X86_CR0_ET | X86_CR0_PG;
2200 v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
2202 vmcb->cr2 = 0;
2203 vmcb->efer = EFER_SVME;
2205 vmcb->cr4 = SVM_CR4_HOST_MASK;
2206 v->arch.hvm_svm.cpu_shadow_cr4 = 0;
2207 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
2209 /* This will jump to ROMBIOS */
2210 vmcb->rip = 0xFFF0;
2212 /* setup the segment registers and all their hidden states */
2213 vmcb->cs.sel = 0xF000;
2214 vmcb->cs.attributes.bytes = 0x089b;
2215 vmcb->cs.limit = 0xffff;
2216 vmcb->cs.base = 0x000F0000;
2218 vmcb->ss.sel = 0x00;
2219 vmcb->ss.attributes.bytes = 0x0893;
2220 vmcb->ss.limit = 0xffff;
2221 vmcb->ss.base = 0x00;
2223 vmcb->ds.sel = 0x00;
2224 vmcb->ds.attributes.bytes = 0x0893;
2225 vmcb->ds.limit = 0xffff;
2226 vmcb->ds.base = 0x00;
2228 vmcb->es.sel = 0x00;
2229 vmcb->es.attributes.bytes = 0x0893;
2230 vmcb->es.limit = 0xffff;
2231 vmcb->es.base = 0x00;
2233 vmcb->fs.sel = 0x00;
2234 vmcb->fs.attributes.bytes = 0x0893;
2235 vmcb->fs.limit = 0xffff;
2236 vmcb->fs.base = 0x00;
2238 vmcb->gs.sel = 0x00;
2239 vmcb->gs.attributes.bytes = 0x0893;
2240 vmcb->gs.limit = 0xffff;
2241 vmcb->gs.base = 0x00;
2243 vmcb->ldtr.sel = 0x00;
2244 vmcb->ldtr.attributes.bytes = 0x0000;
2245 vmcb->ldtr.limit = 0x0;
2246 vmcb->ldtr.base = 0x00;
2248 vmcb->gdtr.sel = 0x00;
2249 vmcb->gdtr.attributes.bytes = 0x0000;
2250 vmcb->gdtr.limit = 0x0;
2251 vmcb->gdtr.base = 0x00;
2253 vmcb->tr.sel = 0;
2254 vmcb->tr.attributes.bytes = 0;
2255 vmcb->tr.limit = 0x0;
2256 vmcb->tr.base = 0;
2258 vmcb->idtr.sel = 0x00;
2259 vmcb->idtr.attributes.bytes = 0x0000;
2260 vmcb->idtr.limit = 0x3ff;
2261 vmcb->idtr.base = 0x00;
2263 vmcb->rax = 0;
2264 vmcb->rsp = 0;
2266 return 0;
2270 /*
2271 * svm_do_vmmcall - SVM VMMCALL handler
2273 * returns 0 on success, non-zero otherwise
2274 */
2275 static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
2277 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2278 int inst_len;
2280 ASSERT(vmcb);
2281 ASSERT(regs);
2283 inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
2284 ASSERT(inst_len > 0);
2286 if ( regs->eax & 0x80000000 )
2288 /* VMMCALL sanity check */
2289 if ( vmcb->cpl > get_vmmcall_cpl(regs->edi) )
2291 printf("VMMCALL CPL check failed\n");
2292 return -1;
2295 /* handle the request */
2296 switch ( regs->eax )
2298 case VMMCALL_RESET_TO_REALMODE:
2299 if ( svm_do_vmmcall_reset_to_realmode(v, regs) )
2301 printf("svm_do_vmmcall_reset_to_realmode() failed\n");
2302 return -1;
2304 /* since we just reset the VMCB, return without adjusting
2305 * the eip */
2306 return 0;
2308 case VMMCALL_DEBUG:
2309 printf("DEBUG features not implemented yet\n");
2310 break;
2311 default:
2312 break;
2315 hvm_print_line(v, regs->eax); /* provides the current domain */
2317 else
2319 hvm_do_hypercall(regs);
2322 __update_guest_eip(vmcb, inst_len);
2323 return 0;
2327 void svm_dump_inst(unsigned long eip)
2329 u8 opcode[256];
2330 unsigned long ptr;
2331 int len;
2332 int i;
2334 ptr = eip & ~0xff;
2335 len = 0;
2337 if (hvm_copy_from_guest_virt(opcode, ptr, sizeof(opcode)) == 0)
2338 len = sizeof(opcode);
2340 printf("Code bytes around(len=%d) %lx:", len, eip);
2341 for (i = 0; i < len; i++)
2343 if ((i & 0x0f) == 0)
2344 printf("\n%08lx:", ptr+i);
2346 printf("%02x ", opcode[i]);
2349 printf("\n");
2353 void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
2355 struct vcpu *v = current;
2356 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2357 unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
2359 printf("%s: guest registers from %s:\n", __func__, from);
2360 #if defined (__x86_64__)
2361 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
2362 regs->rax, regs->rbx, regs->rcx);
2363 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
2364 regs->rdx, regs->rsi, regs->rdi);
2365 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
2366 regs->rbp, regs->rsp, regs->r8);
2367 printk("r9: %016lx r10: %016lx r11: %016lx\n",
2368 regs->r9, regs->r10, regs->r11);
2369 printk("r12: %016lx r13: %016lx r14: %016lx\n",
2370 regs->r12, regs->r13, regs->r14);
2371 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
2372 regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
2373 #else
2374 printf("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n",
2375 regs->eax, regs->ebx, regs->ecx, regs->edx);
2376 printf("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n",
2377 regs->edi, regs->esi, regs->ebp, regs->esp);
2378 printf("%s: guest cr0: %lx\n", __func__,
2379 v->arch.hvm_svm.cpu_shadow_cr0);
2380 printf("guest CR3 = %llx\n", vmcb->cr3);
2381 #endif
2382 printf("%s: pt = %lx\n", __func__, pt);
2386 void svm_dump_host_regs(const char *from)
2388 struct vcpu *v = current;
2389 unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
2390 unsigned long cr3, cr0;
2391 printf("Host registers at %s\n", from);
2393 __asm__ __volatile__ ("\tmov %%cr0,%0\n"
2394 "\tmov %%cr3,%1\n"
2395 : "=r" (cr0), "=r"(cr3));
2396 printf("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
2399 #ifdef SVM_EXTRA_DEBUG
2400 static char *exit_reasons[] = {
2401 [VMEXIT_CR0_READ] = "CR0_READ",
2402 [VMEXIT_CR1_READ] = "CR1_READ",
2403 [VMEXIT_CR2_READ] = "CR2_READ",
2404 [VMEXIT_CR3_READ] = "CR3_READ",
2405 [VMEXIT_CR4_READ] = "CR4_READ",
2406 [VMEXIT_CR5_READ] = "CR5_READ",
2407 [VMEXIT_CR6_READ] = "CR6_READ",
2408 [VMEXIT_CR7_READ] = "CR7_READ",
2409 [VMEXIT_CR8_READ] = "CR8_READ",
2410 [VMEXIT_CR9_READ] = "CR9_READ",
2411 [VMEXIT_CR10_READ] = "CR10_READ",
2412 [VMEXIT_CR11_READ] = "CR11_READ",
2413 [VMEXIT_CR12_READ] = "CR12_READ",
2414 [VMEXIT_CR13_READ] = "CR13_READ",
2415 [VMEXIT_CR14_READ] = "CR14_READ",
2416 [VMEXIT_CR15_READ] = "CR15_READ",
2417 [VMEXIT_CR0_WRITE] = "CR0_WRITE",
2418 [VMEXIT_CR1_WRITE] = "CR1_WRITE",
2419 [VMEXIT_CR2_WRITE] = "CR2_WRITE",
2420 [VMEXIT_CR3_WRITE] = "CR3_WRITE",
2421 [VMEXIT_CR4_WRITE] = "CR4_WRITE",
2422 [VMEXIT_CR5_WRITE] = "CR5_WRITE",
2423 [VMEXIT_CR6_WRITE] = "CR6_WRITE",
2424 [VMEXIT_CR7_WRITE] = "CR7_WRITE",
2425 [VMEXIT_CR8_WRITE] = "CR8_WRITE",
2426 [VMEXIT_CR9_WRITE] = "CR9_WRITE",
2427 [VMEXIT_CR10_WRITE] = "CR10_WRITE",
2428 [VMEXIT_CR11_WRITE] = "CR11_WRITE",
2429 [VMEXIT_CR12_WRITE] = "CR12_WRITE",
2430 [VMEXIT_CR13_WRITE] = "CR13_WRITE",
2431 [VMEXIT_CR14_WRITE] = "CR14_WRITE",
2432 [VMEXIT_CR15_WRITE] = "CR15_WRITE",
2433 [VMEXIT_DR0_READ] = "DR0_READ",
2434 [VMEXIT_DR1_READ] = "DR1_READ",
2435 [VMEXIT_DR2_READ] = "DR2_READ",
2436 [VMEXIT_DR3_READ] = "DR3_READ",
2437 [VMEXIT_DR4_READ] = "DR4_READ",
2438 [VMEXIT_DR5_READ] = "DR5_READ",
2439 [VMEXIT_DR6_READ] = "DR6_READ",
2440 [VMEXIT_DR7_READ] = "DR7_READ",
2441 [VMEXIT_DR8_READ] = "DR8_READ",
2442 [VMEXIT_DR9_READ] = "DR9_READ",
2443 [VMEXIT_DR10_READ] = "DR10_READ",
2444 [VMEXIT_DR11_READ] = "DR11_READ",
2445 [VMEXIT_DR12_READ] = "DR12_READ",
2446 [VMEXIT_DR13_READ] = "DR13_READ",
2447 [VMEXIT_DR14_READ] = "DR14_READ",
2448 [VMEXIT_DR15_READ] = "DR15_READ",
2449 [VMEXIT_DR0_WRITE] = "DR0_WRITE",
2450 [VMEXIT_DR1_WRITE] = "DR1_WRITE",
2451 [VMEXIT_DR2_WRITE] = "DR2_WRITE",
2452 [VMEXIT_DR3_WRITE] = "DR3_WRITE",
2453 [VMEXIT_DR4_WRITE] = "DR4_WRITE",
2454 [VMEXIT_DR5_WRITE] = "DR5_WRITE",
2455 [VMEXIT_DR6_WRITE] = "DR6_WRITE",
2456 [VMEXIT_DR7_WRITE] = "DR7_WRITE",
2457 [VMEXIT_DR8_WRITE] = "DR8_WRITE",
2458 [VMEXIT_DR9_WRITE] = "DR9_WRITE",
2459 [VMEXIT_DR10_WRITE] = "DR10_WRITE",
2460 [VMEXIT_DR11_WRITE] = "DR11_WRITE",
2461 [VMEXIT_DR12_WRITE] = "DR12_WRITE",
2462 [VMEXIT_DR13_WRITE] = "DR13_WRITE",
2463 [VMEXIT_DR14_WRITE] = "DR14_WRITE",
2464 [VMEXIT_DR15_WRITE] = "DR15_WRITE",
2465 [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
2466 [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
2467 [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
2468 [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
2469 [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
2470 [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
2471 [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
2472 [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
2473 [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
2474 [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
2475 [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
2476 [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
2477 [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
2478 [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
2479 [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
2480 [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
2481 [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
2482 [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
2483 [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
2484 [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
2485 [VMEXIT_INTR] = "INTR",
2486 [VMEXIT_NMI] = "NMI",
2487 [VMEXIT_SMI] = "SMI",
2488 [VMEXIT_INIT] = "INIT",
2489 [VMEXIT_VINTR] = "VINTR",
2490 [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
2491 [VMEXIT_IDTR_READ] = "IDTR_READ",
2492 [VMEXIT_GDTR_READ] = "GDTR_READ",
2493 [VMEXIT_LDTR_READ] = "LDTR_READ",
2494 [VMEXIT_TR_READ] = "TR_READ",
2495 [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
2496 [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
2497 [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
2498 [VMEXIT_TR_WRITE] = "TR_WRITE",
2499 [VMEXIT_RDTSC] = "RDTSC",
2500 [VMEXIT_RDPMC] = "RDPMC",
2501 [VMEXIT_PUSHF] = "PUSHF",
2502 [VMEXIT_POPF] = "POPF",
2503 [VMEXIT_CPUID] = "CPUID",
2504 [VMEXIT_RSM] = "RSM",
2505 [VMEXIT_IRET] = "IRET",
2506 [VMEXIT_SWINT] = "SWINT",
2507 [VMEXIT_INVD] = "INVD",
2508 [VMEXIT_PAUSE] = "PAUSE",
2509 [VMEXIT_HLT] = "HLT",
2510 [VMEXIT_INVLPG] = "INVLPG",
2511 [VMEXIT_INVLPGA] = "INVLPGA",
2512 [VMEXIT_IOIO] = "IOIO",
2513 [VMEXIT_MSR] = "MSR",
2514 [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
2515 [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
2516 [VMEXIT_SHUTDOWN] = "SHUTDOWN",
2517 [VMEXIT_VMRUN] = "VMRUN",
2518 [VMEXIT_VMMCALL] = "VMMCALL",
2519 [VMEXIT_VMLOAD] = "VMLOAD",
2520 [VMEXIT_VMSAVE] = "VMSAVE",
2521 [VMEXIT_STGI] = "STGI",
2522 [VMEXIT_CLGI] = "CLGI",
2523 [VMEXIT_SKINIT] = "SKINIT",
2524 [VMEXIT_RDTSCP] = "RDTSCP",
2525 [VMEXIT_ICEBP] = "ICEBP",
2526 [VMEXIT_NPF] = "NPF"
2527 };
2528 #endif /* SVM_EXTRA_DEBUG */
2530 #ifdef SVM_WALK_GUEST_PAGES
2531 void walk_shadow_and_guest_pt(unsigned long gva)
2533 l2_pgentry_t gpde;
2534 l2_pgentry_t spde;
2535 l1_pgentry_t gpte;
2536 l1_pgentry_t spte;
2537 struct vcpu *v = current;
2538 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2539 unsigned long gpa;
2541 gpa = shadow_gva_to_gpa(current, gva);
2542 printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
2543 if( !svm_paging_enabled(v) || mmio_space(gpa) )
2544 return;
2546 /* let's dump the guest and shadow page info */
2548 __guest_get_l2e(v, gva, &gpde);
2549 printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
2550 __shadow_get_l2e( v, gva, &spde );
2551 printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
2553 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2554 return;
2556 spte = l1e_empty();
2558 /* This is actually overkill - we only need to ensure the hl2 is in-sync.*/
2559 shadow_sync_va(v, gva);
2561 gpte.l1 = 0;
2562 __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ],
2563 sizeof(gpte) );
2564 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
2566 BUG(); // need to think about this, and convert usage of
2567 // phys_to_machine_mapping to use pagetable format...
2568 __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
2569 sizeof(spte) );
2571 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
2573 #endif /* SVM_WALK_GUEST_PAGES */
2576 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
2578 unsigned int exit_reason;
2579 unsigned long eip;
2580 struct vcpu *v = current;
2581 int error;
2582 int do_debug = 0;
2583 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2585 ASSERT(vmcb);
2587 exit_reason = vmcb->exitcode;
2588 save_svm_cpu_user_regs(v, regs);
2590 vmcb->tlb_control = 1;
2593 if (exit_reason == VMEXIT_INVALID)
2595 svm_dump_vmcb(__func__, vmcb);
2596 domain_crash_synchronous();
2599 #ifdef SVM_EXTRA_DEBUG
2601 #if defined(__i386__)
2602 #define rip eip
2603 #endif
2605 static unsigned long intercepts_counter = 0;
2607 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF)
2609 if (svm_paging_enabled(v) &&
2610 !mmio_space(shadow_gva_to_gpa(current, vmcb->exitinfo2)))
2612 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2613 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64", "
2614 "gpa=%"PRIx64"\n", intercepts_counter,
2615 exit_reasons[exit_reason], exit_reason, regs->cs,
2616 (u64)regs->rip,
2617 (u64)vmcb->exitinfo1,
2618 (u64)vmcb->exitinfo2,
2619 (u64)vmcb->exitintinfo.bytes,
2620 (u64)shadow_gva_to_gpa(current, vmcb->exitinfo2));
2622 else
2624 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2625 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2626 intercepts_counter,
2627 exit_reasons[exit_reason], exit_reason, regs->cs,
2628 (u64)regs->rip,
2629 (u64)vmcb->exitinfo1,
2630 (u64)vmcb->exitinfo2,
2631 (u64)vmcb->exitintinfo.bytes );
2634 else if ( svm_dbg_on
2635 && exit_reason != VMEXIT_IOIO
2636 && exit_reason != VMEXIT_INTR)
2639 if (exit_reasons[exit_reason])
2641 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2642 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2643 intercepts_counter,
2644 exit_reasons[exit_reason], exit_reason, regs->cs,
2645 (u64)regs->rip,
2646 (u64)vmcb->exitinfo1,
2647 (u64)vmcb->exitinfo2,
2648 (u64)vmcb->exitintinfo.bytes);
2650 else
2652 printk("I%08ld,ExC=%d(0x%x),IP=%x:%"PRIx64","
2653 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2654 intercepts_counter, exit_reason, exit_reason, regs->cs,
2655 (u64)regs->rip,
2656 (u64)vmcb->exitinfo1,
2657 (u64)vmcb->exitinfo2,
2658 (u64)vmcb->exitintinfo.bytes);
2662 #ifdef SVM_WALK_GUEST_PAGES
2663 if( exit_reason == VMEXIT_EXCEPTION_PF
2664 && ( ( vmcb->exitinfo2 == vmcb->rip )
2665 || vmcb->exitintinfo.bytes) )
2667 if ( svm_paging_enabled(v) &&
2668 !mmio_space(gva_to_gpa(vmcb->exitinfo2)) )
2669 walk_shadow_and_guest_pt(vmcb->exitinfo2);
2671 #endif
2673 intercepts_counter++;
2675 #if 0
2676 if (svm_dbg_on)
2677 do_debug = svm_do_debugout(exit_reason);
2678 #endif
2680 if (do_debug)
2682 printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
2683 "shadow_table = 0x%08x\n",
2684 __func__,
2685 (int) v->arch.guest_table.pfn,
2686 (int) v->arch.monitor_table.pfn,
2687 (int) v->arch.shadow_table.pfn);
2689 svm_dump_vmcb(__func__, vmcb);
2690 svm_dump_regs(__func__, regs);
2691 svm_dump_inst(svm_rip2pointer(vmcb));
2694 #if defined(__i386__)
2695 #undef rip
2696 #endif
2699 #endif /* SVM_EXTRA_DEBUG */
2702 perfc_incra(svmexits, exit_reason);
2703 eip = vmcb->rip;
2705 #ifdef SVM_EXTRA_DEBUG
2706 if (do_debug)
2708 printk("eip = %lx, exit_reason = %d (0x%x)\n",
2709 eip, exit_reason, exit_reason);
2711 #endif /* SVM_EXTRA_DEBUG */
2713 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
2715 switch (exit_reason)
2717 case VMEXIT_EXCEPTION_DB:
2719 #ifdef XEN_DEBUGGER
2720 svm_debug_save_cpu_user_regs(regs);
2721 pdb_handle_exception(1, regs, 1);
2722 svm_debug_restore_cpu_user_regs(regs);
2723 #else
2724 svm_store_cpu_user_regs(regs, v);
2725 domain_pause_for_debugger();
2726 #endif
2728 break;
2730 case VMEXIT_NMI:
2731 do_nmi(regs, 0);
2732 break;
2734 case VMEXIT_SMI:
2735 /*
2736 * For asynchronous SMI's, we just need to allow global interrupts
2737 * so that the SMI is taken properly in the context of the host. The
2738 * standard code does a STGI after the VMEXIT which should accomplish
2739 * this task. Continue as normal and restart the guest.
2740 */
2741 break;
2743 case VMEXIT_INIT:
2744 /*
2745 * Nothing to do, in fact we should never get to this point.
2746 */
2747 break;
2749 case VMEXIT_EXCEPTION_BP:
2750 #ifdef XEN_DEBUGGER
2751 svm_debug_save_cpu_user_regs(regs);
2752 pdb_handle_exception(3, regs, 1);
2753 svm_debug_restore_cpu_user_regs(regs);
2754 #else
2755 if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) )
2756 domain_pause_for_debugger();
2757 else
2758 svm_inject_exception(v, TRAP_int3, 0, 0);
2759 #endif
2760 break;
2762 case VMEXIT_EXCEPTION_NM:
2763 svm_do_no_device_fault(vmcb);
2764 break;
2766 case VMEXIT_EXCEPTION_GP:
2767 /* This should probably not be trapped in the future */
2768 regs->error_code = vmcb->exitinfo1;
2769 svm_do_general_protection_fault(v, regs);
2770 break;
2772 case VMEXIT_EXCEPTION_PF:
2774 unsigned long va;
2775 va = vmcb->exitinfo2;
2776 regs->error_code = vmcb->exitinfo1;
2777 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2778 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2779 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2780 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2781 (unsigned long)regs->esi, (unsigned long)regs->edi);
2783 if (!(error = svm_do_page_fault(va, regs)))
2785 /* Inject #PG using Interruption-Information Fields */
2786 svm_inject_exception(v, TRAP_page_fault, 1, regs->error_code);
2788 v->arch.hvm_svm.cpu_cr2 = va;
2789 vmcb->cr2 = va;
2790 TRACE_3D(TRC_VMX_INT, v->domain->domain_id,
2791 VMEXIT_EXCEPTION_PF, va);
2793 break;
2796 case VMEXIT_EXCEPTION_DF:
2797 /* Debug info to hopefully help debug WHY the guest double-faulted. */
2798 svm_dump_vmcb(__func__, vmcb);
2799 svm_dump_regs(__func__, regs);
2800 svm_dump_inst(svm_rip2pointer(vmcb));
2801 svm_inject_exception(v, TRAP_double_fault, 1, 0);
2802 break;
2804 case VMEXIT_INTR:
2805 break;
2807 case VMEXIT_INVD:
2808 svm_vmexit_do_invd(vmcb);
2809 break;
2811 case VMEXIT_GDTR_WRITE:
2812 printk("WRITE to GDTR\n");
2813 break;
2815 case VMEXIT_TASK_SWITCH:
2816 __hvm_bug(regs);
2817 break;
2819 case VMEXIT_CPUID:
2820 svm_vmexit_do_cpuid(vmcb, regs->eax, regs);
2821 break;
2823 case VMEXIT_HLT:
2824 svm_vmexit_do_hlt(vmcb);
2825 break;
2827 case VMEXIT_INVLPG:
2828 svm_handle_invlpg(0, regs);
2829 break;
2831 case VMEXIT_INVLPGA:
2832 svm_handle_invlpg(1, regs);
2833 break;
2835 case VMEXIT_VMMCALL:
2836 svm_do_vmmcall(v, regs);
2837 break;
2839 case VMEXIT_CR0_READ:
2840 svm_cr_access(v, 0, TYPE_MOV_FROM_CR, regs);
2841 break;
2843 case VMEXIT_CR2_READ:
2844 svm_cr_access(v, 2, TYPE_MOV_FROM_CR, regs);
2845 break;
2847 case VMEXIT_CR3_READ:
2848 svm_cr_access(v, 3, TYPE_MOV_FROM_CR, regs);
2849 break;
2851 case VMEXIT_CR4_READ:
2852 svm_cr_access(v, 4, TYPE_MOV_FROM_CR, regs);
2853 break;
2855 case VMEXIT_CR8_READ:
2856 svm_cr_access(v, 8, TYPE_MOV_FROM_CR, regs);
2857 break;
2859 case VMEXIT_CR0_WRITE:
2860 svm_cr_access(v, 0, TYPE_MOV_TO_CR, regs);
2861 break;
2863 case VMEXIT_CR2_WRITE:
2864 svm_cr_access(v, 2, TYPE_MOV_TO_CR, regs);
2865 break;
2867 case VMEXIT_CR3_WRITE:
2868 svm_cr_access(v, 3, TYPE_MOV_TO_CR, regs);
2869 local_flush_tlb();
2870 break;
2872 case VMEXIT_CR4_WRITE:
2873 svm_cr_access(v, 4, TYPE_MOV_TO_CR, regs);
2874 break;
2876 case VMEXIT_CR8_WRITE:
2877 svm_cr_access(v, 8, TYPE_MOV_TO_CR, regs);
2878 break;
2880 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
2881 svm_dr_access(v, regs);
2882 break;
2884 case VMEXIT_IOIO:
2885 svm_io_instruction(v);
2886 break;
2888 case VMEXIT_MSR:
2889 svm_do_msr_access(v, regs);
2890 break;
2892 case VMEXIT_SHUTDOWN:
2893 printk("Guest shutdown exit\n");
2894 domain_crash_synchronous();
2895 break;
2897 default:
2898 printk("unexpected VMEXIT: exit reason = 0x%x, exitinfo1 = %"PRIx64", "
2899 "exitinfo2 = %"PRIx64"\n", exit_reason,
2900 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
2901 __hvm_bug(regs); /* should not happen */
2902 break;
2905 #ifdef SVM_EXTRA_DEBUG
2906 if (do_debug)
2908 printk("%s: Done switch on vmexit_code\n", __func__);
2909 svm_dump_regs(__func__, regs);
2912 if (do_debug)
2914 printk("vmexit_handler():- guest_table = 0x%08x, "
2915 "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
2916 (int)v->arch.guest_table.pfn,
2917 (int)v->arch.monitor_table.pfn,
2918 (int)v->arch.shadow_table.pfn);
2919 printk("svm_vmexit_handler: Returning\n");
2921 #endif
2923 return;
2926 asmlinkage void svm_load_cr2(void)
2928 struct vcpu *v = current;
2930 local_irq_disable();
2931 asm volatile("mov %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
2934 asmlinkage void svm_asid(void)
2936 struct vcpu *v = current;
2937 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2939 /*
2940 * if need to assign new asid, or if switching cores,
2941 * retire asid for the old core, and assign a new asid to the current core.
2942 */
2943 if ( test_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ) ||
2944 ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) {
2945 /* recycle asid */
2946 if ( !asidpool_assign_next(vmcb, 1,
2947 v->arch.hvm_svm.asid_core,
2948 v->arch.hvm_svm.launch_core) )
2950 /* If we get here, we have a major problem */
2951 domain_crash_synchronous();
2954 v->arch.hvm_svm.asid_core = v->arch.hvm_svm.launch_core;
2955 clear_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags );
2959 /*
2960 * Local variables:
2961 * mode: C
2962 * c-set-style: "BSD"
2963 * c-basic-offset: 4
2964 * tab-width: 4
2965 * indent-tabs-mode: nil
2966 * End:
2967 */