ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 12226:45e34f00a78f

[HVM] Clean up VCPU initialisation in Xen. No longer
parse HVM e820 tables in Xen (add some extra HVM parameters as a
cleaner alternative). Lots of code removal.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Nov 02 15:55:51 2006 +0000 (2006-11-02)
parents 7b5115221dfc
children 2368e779f89f
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005, AMD Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <xen/hypercall.h>
29 #include <xen/domain_page.h>
30 #include <asm/current.h>
31 #include <asm/io.h>
32 #include <asm/shadow.h>
33 #include <asm/regs.h>
34 #include <asm/cpufeature.h>
35 #include <asm/processor.h>
36 #include <asm/types.h>
37 #include <asm/msr.h>
38 #include <asm/spinlock.h>
39 #include <asm/hvm/hvm.h>
40 #include <asm/hvm/support.h>
41 #include <asm/hvm/io.h>
42 #include <asm/hvm/svm/svm.h>
43 #include <asm/hvm/svm/vmcb.h>
44 #include <asm/hvm/svm/emulate.h>
45 #include <asm/hvm/svm/vmmcall.h>
46 #include <asm/hvm/svm/intr.h>
47 #include <asm/x86_emulate.h>
48 #include <public/sched.h>
50 #define SVM_EXTRA_DEBUG
52 #define set_segment_register(name, value) \
53 __asm__ __volatile__ ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
55 /* External functions. We should move these to some suitable header file(s) */
57 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
58 int inst_len);
59 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
60 extern void svm_dump_inst(unsigned long eip);
61 extern int svm_dbg_on;
62 void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
64 static void svm_relinquish_guest_resources(struct domain *d);
65 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
66 struct cpu_user_regs *regs);
68 /* va of hardware host save area */
69 static void *hsa[NR_CPUS] __read_mostly;
71 /* vmcb used for extended host state */
72 static void *root_vmcb[NR_CPUS] __read_mostly;
74 /* physical address of above for host VMSAVE/VMLOAD */
75 u64 root_vmcb_pa[NR_CPUS] __read_mostly;
78 /* ASID API */
79 enum {
80 ASID_AVAILABLE = 0,
81 ASID_INUSE,
82 ASID_RETIRED
83 };
84 #define INITIAL_ASID 0
85 #define ASID_MAX 64
87 struct asid_pool {
88 spinlock_t asid_lock;
89 u32 asid[ASID_MAX];
90 };
92 static DEFINE_PER_CPU(struct asid_pool, asid_pool);
95 /*
96 * Initializes the POOL of ASID used by the guests per core.
97 */
98 void asidpool_init(int core)
99 {
100 int i;
102 spin_lock_init(&per_cpu(asid_pool,core).asid_lock);
104 /* Host ASID is always in use */
105 per_cpu(asid_pool,core).asid[INITIAL_ASID] = ASID_INUSE;
106 for ( i = 1; i < ASID_MAX; i++ )
107 per_cpu(asid_pool,core).asid[i] = ASID_AVAILABLE;
108 }
111 /* internal function to get the next available ASID */
112 static int asidpool_fetch_next(struct vmcb_struct *vmcb, int core)
113 {
114 int i;
115 for ( i = 1; i < ASID_MAX; i++ )
116 {
117 if ( per_cpu(asid_pool,core).asid[i] == ASID_AVAILABLE )
118 {
119 vmcb->guest_asid = i;
120 per_cpu(asid_pool,core).asid[i] = ASID_INUSE;
121 return i;
122 }
123 }
124 return -1;
125 }
128 /*
129 * This functions assigns on the passed VMCB, the next
130 * available ASID number. If none are available, the
131 * TLB flush flag is set, and all retireds ASID
132 * are made available.
133 *
134 * Returns: 1 -- sucess;
135 * 0 -- failure -- no more ASID numbers
136 * available.
137 */
138 int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
139 int oldcore, int newcore )
140 {
141 int i;
142 int res = 1;
143 static unsigned long cnt=0;
145 spin_lock(&per_cpu(asid_pool,oldcore).asid_lock);
146 if( retire_current && vmcb->guest_asid ) {
147 per_cpu(asid_pool,oldcore).asid[vmcb->guest_asid & (ASID_MAX-1)] =
148 ASID_RETIRED;
149 }
150 spin_unlock(&per_cpu(asid_pool,oldcore).asid_lock);
151 spin_lock(&per_cpu(asid_pool,newcore).asid_lock);
152 if( asidpool_fetch_next( vmcb, newcore ) < 0 ) {
153 if (svm_dbg_on)
154 printk( "SVM: tlb(%ld)\n", cnt++ );
155 /* FLUSH the TLB and all retired slots are made available */
156 vmcb->tlb_control = 1;
157 for( i = 1; i < ASID_MAX; i++ ) {
158 if( per_cpu(asid_pool,newcore).asid[i] == ASID_RETIRED ) {
159 per_cpu(asid_pool,newcore).asid[i] = ASID_AVAILABLE;
160 }
161 }
162 /* Get the First slot available */
163 res = asidpool_fetch_next( vmcb, newcore ) > 0;
164 }
165 spin_unlock(&per_cpu(asid_pool,newcore).asid_lock);
166 return res;
167 }
169 void asidpool_retire( struct vmcb_struct *vmcb, int core )
170 {
171 spin_lock(&per_cpu(asid_pool,core).asid_lock);
172 if( vmcb->guest_asid ) {
173 per_cpu(asid_pool,core).asid[vmcb->guest_asid & (ASID_MAX-1)] =
174 ASID_RETIRED;
175 }
176 spin_unlock(&per_cpu(asid_pool,core).asid_lock);
177 }
179 static inline void svm_inject_exception(struct vcpu *v, int trap,
180 int ev, int error_code)
181 {
182 eventinj_t event;
183 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
185 event.bytes = 0;
186 event.fields.v = 1;
187 event.fields.type = EVENTTYPE_EXCEPTION;
188 event.fields.vector = trap;
189 event.fields.ev = ev;
190 event.fields.errorcode = error_code;
192 ASSERT(vmcb->eventinj.fields.v == 0);
194 vmcb->eventinj = event;
195 v->arch.hvm_svm.inject_event=1;
196 }
198 static void stop_svm(void)
199 {
200 u32 eax, edx;
201 int cpu = smp_processor_id();
203 /* We turn off the EFER_SVME bit. */
204 rdmsr(MSR_EFER, eax, edx);
205 eax &= ~EFER_SVME;
206 wrmsr(MSR_EFER, eax, edx);
208 /* release the HSA */
209 free_host_save_area(hsa[cpu]);
210 hsa[cpu] = NULL;
211 wrmsr(MSR_K8_VM_HSAVE_PA, 0, 0 );
213 /* free up the root vmcb */
214 free_vmcb(root_vmcb[cpu]);
215 root_vmcb[cpu] = NULL;
216 root_vmcb_pa[cpu] = 0;
218 printk("AMD SVM Extension is disabled.\n");
219 }
222 static void svm_store_cpu_guest_regs(
223 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
224 {
225 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
227 if ( regs != NULL )
228 {
229 regs->eip = vmcb->rip;
230 regs->esp = vmcb->rsp;
231 regs->eflags = vmcb->rflags;
232 regs->cs = vmcb->cs.sel;
233 regs->ds = vmcb->ds.sel;
234 regs->es = vmcb->es.sel;
235 regs->ss = vmcb->ss.sel;
236 regs->gs = vmcb->gs.sel;
237 regs->fs = vmcb->fs.sel;
238 }
240 if ( crs != NULL )
241 {
242 /* Returning the guest's regs */
243 crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
244 crs[2] = v->arch.hvm_svm.cpu_cr2;
245 crs[3] = v->arch.hvm_svm.cpu_cr3;
246 crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
247 }
248 }
250 static int svm_paging_enabled(struct vcpu *v)
251 {
252 unsigned long cr0;
254 cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
256 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
257 }
259 static int svm_pae_enabled(struct vcpu *v)
260 {
261 unsigned long cr4;
263 if(!svm_paging_enabled(v))
264 return 0;
266 cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
268 return (cr4 & X86_CR4_PAE);
269 }
271 #define IS_CANO_ADDRESS(add) 1
273 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
274 {
275 u64 msr_content = 0;
276 struct vcpu *vc = current;
277 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
279 switch (regs->ecx)
280 {
281 case MSR_EFER:
282 msr_content = vmcb->efer;
283 msr_content &= ~EFER_SVME;
284 break;
286 case MSR_FS_BASE:
287 msr_content = vmcb->fs.base;
288 break;
290 case MSR_GS_BASE:
291 msr_content = vmcb->gs.base;
292 break;
294 case MSR_SHADOW_GS_BASE:
295 msr_content = vmcb->kerngsbase;
296 break;
298 case MSR_STAR:
299 msr_content = vmcb->star;
300 break;
302 case MSR_LSTAR:
303 msr_content = vmcb->lstar;
304 break;
306 case MSR_CSTAR:
307 msr_content = vmcb->cstar;
308 break;
310 case MSR_SYSCALL_MASK:
311 msr_content = vmcb->sfmask;
312 break;
313 default:
314 return 0;
315 }
317 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n",
318 msr_content);
320 regs->eax = (u32)(msr_content >> 0);
321 regs->edx = (u32)(msr_content >> 32);
322 return 1;
323 }
325 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
326 {
327 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
328 struct vcpu *vc = current;
329 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
331 HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx "
332 "msr_content %"PRIx64"\n",
333 (unsigned long)regs->ecx, msr_content);
335 switch (regs->ecx)
336 {
337 case MSR_EFER:
338 #ifdef __x86_64__
339 /* offending reserved bit will cause #GP */
340 if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
341 {
342 printk("Trying to set reserved bit in EFER: %"PRIx64"\n",
343 msr_content);
344 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
345 return 0;
346 }
348 /* LME: 0 -> 1 */
349 if ( msr_content & EFER_LME &&
350 !test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state))
351 {
352 if ( svm_paging_enabled(vc) ||
353 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
354 &vc->arch.hvm_svm.cpu_state) )
355 {
356 printk("Trying to set LME bit when "
357 "in paging mode or PAE bit is not set\n");
358 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
359 return 0;
360 }
361 set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);
362 }
364 /* We have already recorded that we want LME, so it will be set
365 * next time CR0 gets updated. So we clear that bit and continue.
366 */
367 if ((msr_content ^ vmcb->efer) & EFER_LME)
368 msr_content &= ~EFER_LME;
369 /* No update for LME/LMA since it have no effect */
370 #endif
371 vmcb->efer = msr_content | EFER_SVME;
372 break;
374 case MSR_FS_BASE:
375 case MSR_GS_BASE:
376 if (!(SVM_LONG_GUEST(vc)))
377 domain_crash_synchronous();
379 if (!IS_CANO_ADDRESS(msr_content))
380 {
381 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
382 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
383 }
385 if (regs->ecx == MSR_FS_BASE)
386 vmcb->fs.base = msr_content;
387 else
388 vmcb->gs.base = msr_content;
389 break;
391 case MSR_SHADOW_GS_BASE:
392 vmcb->kerngsbase = msr_content;
393 break;
395 case MSR_STAR:
396 vmcb->star = msr_content;
397 break;
399 case MSR_LSTAR:
400 vmcb->lstar = msr_content;
401 break;
403 case MSR_CSTAR:
404 vmcb->cstar = msr_content;
405 break;
407 case MSR_SYSCALL_MASK:
408 vmcb->sfmask = msr_content;
409 break;
411 default:
412 return 0;
413 }
414 return 1;
415 }
418 #define loaddebug(_v,_reg) \
419 __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
420 #define savedebug(_v,_reg) \
421 __asm__ __volatile__ ("mov %%db" #_reg ",%0" : : "r" ((_v)->debugreg[_reg]))
424 static inline void svm_save_dr(struct vcpu *v)
425 {
426 if (v->arch.hvm_vcpu.flag_dr_dirty)
427 {
428 /* clear the DR dirty flag and re-enable intercepts for DR accesses */
429 v->arch.hvm_vcpu.flag_dr_dirty = 0;
430 v->arch.hvm_svm.vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES;
432 savedebug(&v->arch.guest_context, 0);
433 savedebug(&v->arch.guest_context, 1);
434 savedebug(&v->arch.guest_context, 2);
435 savedebug(&v->arch.guest_context, 3);
436 }
437 }
440 static inline void __restore_debug_registers(struct vcpu *v)
441 {
442 loaddebug(&v->arch.guest_context, 0);
443 loaddebug(&v->arch.guest_context, 1);
444 loaddebug(&v->arch.guest_context, 2);
445 loaddebug(&v->arch.guest_context, 3);
446 }
449 static inline void svm_restore_dr(struct vcpu *v)
450 {
451 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
453 if (!vmcb)
454 return;
456 if (unlikely(vmcb->dr7 & 0xFF))
457 __restore_debug_registers(v);
458 }
461 static int svm_realmode(struct vcpu *v)
462 {
463 unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
464 unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
466 return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
467 }
469 static int svm_guest_x86_mode(struct vcpu *v)
470 {
471 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
473 if ( vmcb->efer & EFER_LMA )
474 return (vmcb->cs.attributes.fields.l ?
475 X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32);
477 if ( svm_realmode(v) )
478 return X86EMUL_MODE_REAL;
480 return (vmcb->cs.attributes.fields.db ?
481 X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16);
482 }
484 void svm_update_host_cr3(struct vcpu *v)
485 {
486 /* SVM doesn't have a HOST_CR3 equivalent to update. */
487 }
489 unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
490 {
491 switch ( num )
492 {
493 case 0:
494 return v->arch.hvm_svm.cpu_shadow_cr0;
495 case 2:
496 return v->arch.hvm_svm.cpu_cr2;
497 case 3:
498 return v->arch.hvm_svm.cpu_cr3;
499 case 4:
500 return v->arch.hvm_svm.cpu_shadow_cr4;
501 default:
502 BUG();
503 }
504 return 0; /* dummy */
505 }
508 /* Make sure that xen intercepts any FP accesses from current */
509 static void svm_stts(struct vcpu *v)
510 {
511 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
513 /*
514 * If the guest does not have TS enabled then we must cause and handle an
515 * exception on first use of the FPU. If the guest *does* have TS enabled
516 * then this is not necessary: no FPU activity can occur until the guest
517 * clears CR0.TS, and we will initialise the FPU when that happens.
518 */
519 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
520 {
521 v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
522 vmcb->cr0 |= X86_CR0_TS;
523 }
524 }
527 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
528 {
529 v->arch.hvm_svm.vmcb->tsc_offset = offset;
530 }
533 /* SVM-specific intitialization code for VCPU application processors */
534 static void svm_init_ap_context(struct vcpu_guest_context *ctxt,
535 int vcpuid, int trampoline_vector)
536 {
537 int i;
538 struct vcpu *v, *bsp = current;
539 struct domain *d = bsp->domain;
540 cpu_user_regs_t *regs;;
543 if ((v = d->vcpu[vcpuid]) == NULL)
544 {
545 printk("vcpuid %d is invalid! good-bye.\n", vcpuid);
546 domain_crash_synchronous();
547 }
548 regs = &v->arch.guest_context.user_regs;
550 memset(ctxt, 0, sizeof(*ctxt));
551 for (i = 0; i < 256; ++i)
552 {
553 ctxt->trap_ctxt[i].vector = i;
554 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
555 }
558 /*
559 * We execute the trampoline code in real mode. The trampoline vector
560 * passed to us is page alligned and is the physicall frame number for
561 * the code. We will execute this code in real mode.
562 */
563 ctxt->user_regs.eip = 0x0;
564 ctxt->user_regs.cs = (trampoline_vector << 8);
565 }
567 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
568 {
569 char *p;
570 int i;
572 memset(hypercall_page, 0, PAGE_SIZE);
574 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
575 {
576 p = (char *)(hypercall_page + (i * 32));
577 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
578 *(u32 *)(p + 1) = i;
579 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
580 *(u8 *)(p + 6) = 0x01;
581 *(u8 *)(p + 7) = 0xd9;
582 *(u8 *)(p + 8) = 0xc3; /* ret */
583 }
585 /* Don't support HYPERVISOR_iret at the moment */
586 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
587 }
590 int svm_dbg_on = 0;
592 static inline int svm_do_debugout(unsigned long exit_code)
593 {
594 int i;
596 static unsigned long counter = 0;
597 static unsigned long works[] =
598 {
599 VMEXIT_IOIO,
600 VMEXIT_HLT,
601 VMEXIT_CPUID,
602 VMEXIT_DR0_READ,
603 VMEXIT_DR1_READ,
604 VMEXIT_DR2_READ,
605 VMEXIT_DR3_READ,
606 VMEXIT_DR6_READ,
607 VMEXIT_DR7_READ,
608 VMEXIT_DR0_WRITE,
609 VMEXIT_DR1_WRITE,
610 VMEXIT_DR2_WRITE,
611 VMEXIT_DR3_WRITE,
612 VMEXIT_CR0_READ,
613 VMEXIT_CR0_WRITE,
614 VMEXIT_CR3_READ,
615 VMEXIT_CR4_READ,
616 VMEXIT_MSR,
617 VMEXIT_CR0_WRITE,
618 VMEXIT_CR3_WRITE,
619 VMEXIT_CR4_WRITE,
620 VMEXIT_EXCEPTION_PF,
621 VMEXIT_INTR,
622 VMEXIT_INVLPG,
623 VMEXIT_EXCEPTION_NM
624 };
627 #if 0
628 if (svm_dbg_on && exit_code != 0x7B)
629 return 1;
630 #endif
632 counter++;
634 #if 0
635 if ((exit_code == 0x4E
636 || exit_code == VMEXIT_CR0_READ
637 || exit_code == VMEXIT_CR0_WRITE)
638 && counter < 200000)
639 return 0;
641 if ((exit_code == 0x4E) && counter < 500000)
642 return 0;
643 #endif
645 for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
646 if (exit_code == works[i])
647 return 0;
649 return 1;
650 }
652 static void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
653 {
654 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
656 ASSERT(vmcb);
658 ctxt->eax = vmcb->rax;
659 ctxt->ss = vmcb->ss.sel;
660 ctxt->esp = vmcb->rsp;
661 ctxt->eflags = vmcb->rflags;
662 ctxt->cs = vmcb->cs.sel;
663 ctxt->eip = vmcb->rip;
665 ctxt->gs = vmcb->gs.sel;
666 ctxt->fs = vmcb->fs.sel;
667 ctxt->es = vmcb->es.sel;
668 ctxt->ds = vmcb->ds.sel;
669 }
671 static void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
672 {
673 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
675 regs->eip = vmcb->rip;
676 regs->esp = vmcb->rsp;
677 regs->eflags = vmcb->rflags;
678 regs->cs = vmcb->cs.sel;
679 regs->ds = vmcb->ds.sel;
680 regs->es = vmcb->es.sel;
681 regs->ss = vmcb->ss.sel;
682 }
684 /* XXX Use svm_load_cpu_guest_regs instead */
685 static void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
686 {
687 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
688 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
690 /* Write the guest register value into VMCB */
691 vmcb->rax = regs->eax;
692 vmcb->ss.sel = regs->ss;
693 vmcb->rsp = regs->esp;
694 vmcb->rflags = regs->eflags;
695 vmcb->cs.sel = regs->cs;
696 vmcb->rip = regs->eip;
697 if (regs->eflags & EF_TF)
698 *intercepts |= EXCEPTION_BITMAP_DB;
699 else
700 *intercepts &= ~EXCEPTION_BITMAP_DB;
701 }
703 static void svm_load_cpu_guest_regs(
704 struct vcpu *v, struct cpu_user_regs *regs)
705 {
706 svm_load_cpu_user_regs(v, regs);
707 }
709 int svm_long_mode_enabled(struct vcpu *v)
710 {
711 return SVM_LONG_GUEST(v);
712 }
716 static void arch_svm_do_launch(struct vcpu *v)
717 {
718 cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
719 int error;
721 #if 0
722 if (svm_dbg_on)
723 printk("Do launch\n");
724 #endif
725 error = construct_vmcb(&v->arch.hvm_svm, regs);
726 if ( error < 0 )
727 {
728 if (v->vcpu_id == 0) {
729 printk("Failed to construct a new VMCB for BSP.\n");
730 } else {
731 printk("Failed to construct a new VMCB for AP %d\n", v->vcpu_id);
732 }
733 domain_crash_synchronous();
734 }
736 svm_do_launch(v);
737 #if 0
738 if (svm_dbg_on)
739 svm_dump_host_regs(__func__);
740 #endif
741 if (v->vcpu_id != 0)
742 {
743 u16 cs_sel = regs->cs;
744 /*
745 * This is the launch of an AP; set state so that we begin executing
746 * the trampoline code in real-mode.
747 */
748 svm_do_vmmcall_reset_to_realmode(v, regs);
749 /* Adjust the state to execute the trampoline code.*/
750 v->arch.hvm_svm.vmcb->rip = 0;
751 v->arch.hvm_svm.vmcb->cs.sel= cs_sel;
752 v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4);
753 }
755 reset_stack_and_jump(svm_asm_do_launch);
756 }
758 static void svm_freeze_time(struct vcpu *v)
759 {
760 struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
762 if ( pt->enabled && pt->first_injected && v->vcpu_id == pt->bind_vcpu
763 && !v->arch.hvm_vcpu.guest_time ) {
764 v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
765 stop_timer(&(pt->timer));
766 }
767 }
770 static void svm_ctxt_switch_from(struct vcpu *v)
771 {
772 svm_freeze_time(v);
773 svm_save_dr(v);
774 }
776 static void svm_ctxt_switch_to(struct vcpu *v)
777 {
778 #ifdef __x86_64__
779 /*
780 * This is required, because VMRUN does consistency check
781 * and some of the DOM0 selectors are pointing to
782 * invalid GDT locations, and cause AMD processors
783 * to shutdown.
784 */
785 set_segment_register(ds, 0);
786 set_segment_register(es, 0);
787 set_segment_register(ss, 0);
788 #endif
789 svm_restore_dr(v);
790 }
792 static int svm_vcpu_initialise(struct vcpu *v)
793 {
794 v->arch.schedule_tail = arch_svm_do_launch;
795 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
796 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
797 return 0;
798 }
800 int start_svm(void)
801 {
802 u32 eax, ecx, edx;
803 u32 phys_hsa_lo, phys_hsa_hi;
804 u64 phys_hsa;
805 int cpu = smp_processor_id();
807 /* Xen does not fill x86_capability words except 0. */
808 ecx = cpuid_ecx(0x80000001);
809 boot_cpu_data.x86_capability[5] = ecx;
811 if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
812 return 0;
814 /* check whether SVM feature is disabled in BIOS */
815 rdmsr(MSR_K8_VM_CR, eax, edx);
816 if ( eax & K8_VMCR_SVME_DISABLE )
817 {
818 printk("AMD SVM Extension is disabled in BIOS.\n");
819 return 0;
820 }
822 if (!(hsa[cpu] = alloc_host_save_area()))
823 return 0;
825 rdmsr(MSR_EFER, eax, edx);
826 eax |= EFER_SVME;
827 wrmsr(MSR_EFER, eax, edx);
828 asidpool_init( cpu );
829 printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
831 /* Initialize the HSA for this core */
832 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
833 phys_hsa_lo = (u32) phys_hsa;
834 phys_hsa_hi = (u32) (phys_hsa >> 32);
835 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
837 if (!(root_vmcb[cpu] = alloc_vmcb()))
838 return 0;
839 root_vmcb_pa[cpu] = virt_to_maddr(root_vmcb[cpu]);
841 if (cpu == 0)
842 setup_vmcb_dump();
844 /* Setup HVM interfaces */
845 hvm_funcs.disable = stop_svm;
847 hvm_funcs.vcpu_initialise = svm_vcpu_initialise;
848 hvm_funcs.relinquish_guest_resources = svm_relinquish_guest_resources;
850 hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
851 hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
853 hvm_funcs.realmode = svm_realmode;
854 hvm_funcs.paging_enabled = svm_paging_enabled;
855 hvm_funcs.long_mode_enabled = svm_long_mode_enabled;
856 hvm_funcs.pae_enabled = svm_pae_enabled;
857 hvm_funcs.guest_x86_mode = svm_guest_x86_mode;
858 hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
860 hvm_funcs.update_host_cr3 = svm_update_host_cr3;
862 hvm_funcs.stts = svm_stts;
863 hvm_funcs.set_tsc_offset = svm_set_tsc_offset;
865 hvm_funcs.init_ap_context = svm_init_ap_context;
866 hvm_funcs.init_hypercall_page = svm_init_hypercall_page;
868 hvm_enabled = 1;
870 return 1;
871 }
874 static void svm_relinquish_guest_resources(struct domain *d)
875 {
876 struct vcpu *v;
878 for_each_vcpu ( d, v )
879 {
880 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
881 continue;
883 destroy_vmcb(&v->arch.hvm_svm);
884 kill_timer(&v->arch.hvm_vcpu.hlt_timer);
885 if ( VLAPIC(v) != NULL )
886 {
887 kill_timer(&VLAPIC(v)->vlapic_timer);
888 unmap_domain_page_global(VLAPIC(v)->regs);
889 free_domheap_page(VLAPIC(v)->regs_page);
890 xfree(VLAPIC(v));
891 }
892 hvm_release_assist_channel(v);
893 }
895 kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
896 rtc_deinit(d);
897 pmtimer_deinit(d);
899 if ( d->arch.hvm_domain.shared_page_va )
900 unmap_domain_page_global(
901 (void *)d->arch.hvm_domain.shared_page_va);
903 if ( d->arch.hvm_domain.buffered_io_va )
904 unmap_domain_page_global((void *)d->arch.hvm_domain.buffered_io_va);
905 }
908 static void svm_migrate_timers(struct vcpu *v)
909 {
910 struct periodic_time *pt =
911 &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
912 struct RTCState *vrtc = &v->domain->arch.hvm_domain.pl_time.vrtc;
913 struct PMTState *vpmt = &v->domain->arch.hvm_domain.pl_time.vpmt;
915 if ( pt->enabled )
916 {
917 migrate_timer(&pt->timer, v->processor);
918 migrate_timer(&v->arch.hvm_vcpu.hlt_timer, v->processor);
919 }
920 if ( VLAPIC(v) != NULL )
921 migrate_timer(&VLAPIC(v)->vlapic_timer, v->processor);
922 migrate_timer(&vrtc->second_timer, v->processor);
923 migrate_timer(&vrtc->second_timer2, v->processor);
924 migrate_timer(&vpmt->timer, v->processor);
925 }
928 void arch_svm_do_resume(struct vcpu *v)
929 {
930 /* pinning VCPU to a different core? */
931 if ( v->arch.hvm_svm.launch_core == smp_processor_id()) {
932 hvm_do_resume( v );
933 reset_stack_and_jump( svm_asm_do_resume );
934 }
935 else {
936 if (svm_dbg_on)
937 printk("VCPU core pinned: %d to %d\n",
938 v->arch.hvm_svm.launch_core, smp_processor_id() );
939 v->arch.hvm_svm.launch_core = smp_processor_id();
940 svm_migrate_timers( v );
941 hvm_do_resume( v );
942 reset_stack_and_jump( svm_asm_do_resume );
943 }
944 }
948 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
949 {
950 struct vcpu *v = current;
951 unsigned long eip;
952 int result;
953 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
955 ASSERT(vmcb);
957 //#if HVM_DEBUG
958 eip = vmcb->rip;
959 HVM_DBG_LOG(DBG_LEVEL_VMMU,
960 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
961 va, eip, (unsigned long)regs->error_code);
962 //#endif
964 result = shadow_fault(va, regs);
966 if( result ) {
967 /* Let's make sure that the Guest TLB is flushed */
968 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
969 }
971 return result;
972 }
975 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
976 {
977 struct vcpu *v = current;
979 setup_fpu(v);
980 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
982 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
983 vmcb->cr0 &= ~X86_CR0_TS;
984 }
987 static void svm_do_general_protection_fault(struct vcpu *v,
988 struct cpu_user_regs *regs)
989 {
990 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
991 unsigned long eip, error_code;
993 ASSERT(vmcb);
995 eip = vmcb->rip;
996 error_code = vmcb->exitinfo1;
998 if (vmcb->idtr.limit == 0) {
999 printk("Huh? We got a GP Fault with an invalid IDTR!\n");
1000 svm_dump_vmcb(__func__, vmcb);
1001 svm_dump_regs(__func__, regs);
1002 svm_dump_inst(vmcb->rip);
1003 __hvm_bug(regs);
1006 HVM_DBG_LOG(DBG_LEVEL_1,
1007 "svm_general_protection_fault: eip = %lx, erro_code = %lx",
1008 eip, error_code);
1010 HVM_DBG_LOG(DBG_LEVEL_1,
1011 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1012 (unsigned long)regs->eax, (unsigned long)regs->ebx,
1013 (unsigned long)regs->ecx, (unsigned long)regs->edx,
1014 (unsigned long)regs->esi, (unsigned long)regs->edi);
1016 /* Reflect it back into the guest */
1017 svm_inject_exception(v, TRAP_gp_fault, 1, error_code);
1020 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
1021 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
1022 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
1023 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
1025 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input,
1026 struct cpu_user_regs *regs)
1028 unsigned int eax, ebx, ecx, edx;
1029 unsigned long eip;
1030 struct vcpu *v = current;
1031 int inst_len;
1033 ASSERT(vmcb);
1035 eip = vmcb->rip;
1037 HVM_DBG_LOG(DBG_LEVEL_1,
1038 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
1039 " (esi) %lx, (edi) %lx",
1040 (unsigned long)regs->eax, (unsigned long)regs->ebx,
1041 (unsigned long)regs->ecx, (unsigned long)regs->edx,
1042 (unsigned long)regs->esi, (unsigned long)regs->edi);
1044 if ( !cpuid_hypervisor_leaves(input, &eax, &ebx, &ecx, &edx) )
1046 cpuid(input, &eax, &ebx, &ecx, &edx);
1047 if (input == 0x00000001 || input == 0x80000001 )
1049 if ( !hvm_apic_support(v->domain) ||
1050 !vlapic_global_enabled((VLAPIC(v))) )
1052 /* Since the apic is disabled, avoid any confusion
1053 about SMP cpus being available */
1054 clear_bit(X86_FEATURE_APIC, &edx);
1056 #if CONFIG_PAGING_LEVELS >= 3
1057 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
1058 #endif
1060 clear_bit(X86_FEATURE_PAE, &edx);
1061 if (input == 0x80000001 )
1062 clear_bit(X86_FEATURE_NX & 31, &edx);
1064 clear_bit(X86_FEATURE_PSE36, &edx);
1065 if (input == 0x00000001 )
1067 /* Clear out reserved bits. */
1068 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
1069 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
1071 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
1073 /* Guest should only see one logical processor.
1074 * See details on page 23 of AMD CPUID Specification.
1075 */
1076 clear_bit(X86_FEATURE_HT, &edx); /* clear the hyperthread bit */
1077 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
1078 ebx |= 0x00010000; /* set to 1 just for precaution */
1080 else
1082 /* Clear the Cmp_Legacy bit
1083 * This bit is supposed to be zero when HTT = 0.
1084 * See details on page 23 of AMD CPUID Specification.
1085 */
1086 clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx);
1087 /* Make SVM feature invisible to the guest. */
1088 clear_bit(X86_FEATURE_SVME & 31, &ecx);
1089 #ifdef __i386__
1090 /* Mask feature for Intel ia32e or AMD long mode. */
1091 clear_bit(X86_FEATURE_LAHF_LM & 31, &ecx);
1093 clear_bit(X86_FEATURE_LM & 31, &edx);
1094 clear_bit(X86_FEATURE_SYSCALL & 31, &edx);
1095 #endif
1096 /* So far, we do not support 3DNow for the guest. */
1097 clear_bit(X86_FEATURE_3DNOW & 31, &edx);
1098 clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx);
1101 else if ( ( input == 0x80000007 ) || ( input == 0x8000000A ) )
1103 /* Mask out features of power management and SVM extension. */
1104 eax = ebx = ecx = edx = 0;
1106 else if ( input == 0x80000008 )
1108 /* Make sure Number of CPU core is 1 when HTT=0 */
1109 ecx &= 0xFFFFFF00;
1113 regs->eax = (unsigned long)eax;
1114 regs->ebx = (unsigned long)ebx;
1115 regs->ecx = (unsigned long)ecx;
1116 regs->edx = (unsigned long)edx;
1118 HVM_DBG_LOG(DBG_LEVEL_1,
1119 "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
1120 "ebx=%x, ecx=%x, edx=%x",
1121 eip, input, eax, ebx, ecx, edx);
1123 inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
1124 ASSERT(inst_len > 0);
1125 __update_guest_eip(vmcb, inst_len);
1129 static inline unsigned long *get_reg_p(unsigned int gpreg,
1130 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1132 unsigned long *reg_p = NULL;
1133 switch (gpreg)
1135 case SVM_REG_EAX:
1136 reg_p = (unsigned long *)&regs->eax;
1137 break;
1138 case SVM_REG_EBX:
1139 reg_p = (unsigned long *)&regs->ebx;
1140 break;
1141 case SVM_REG_ECX:
1142 reg_p = (unsigned long *)&regs->ecx;
1143 break;
1144 case SVM_REG_EDX:
1145 reg_p = (unsigned long *)&regs->edx;
1146 break;
1147 case SVM_REG_EDI:
1148 reg_p = (unsigned long *)&regs->edi;
1149 break;
1150 case SVM_REG_ESI:
1151 reg_p = (unsigned long *)&regs->esi;
1152 break;
1153 case SVM_REG_EBP:
1154 reg_p = (unsigned long *)&regs->ebp;
1155 break;
1156 case SVM_REG_ESP:
1157 reg_p = (unsigned long *)&vmcb->rsp;
1158 break;
1159 #ifdef __x86_64__
1160 case SVM_REG_R8:
1161 reg_p = (unsigned long *)&regs->r8;
1162 break;
1163 case SVM_REG_R9:
1164 reg_p = (unsigned long *)&regs->r9;
1165 break;
1166 case SVM_REG_R10:
1167 reg_p = (unsigned long *)&regs->r10;
1168 break;
1169 case SVM_REG_R11:
1170 reg_p = (unsigned long *)&regs->r11;
1171 break;
1172 case SVM_REG_R12:
1173 reg_p = (unsigned long *)&regs->r12;
1174 break;
1175 case SVM_REG_R13:
1176 reg_p = (unsigned long *)&regs->r13;
1177 break;
1178 case SVM_REG_R14:
1179 reg_p = (unsigned long *)&regs->r14;
1180 break;
1181 case SVM_REG_R15:
1182 reg_p = (unsigned long *)&regs->r15;
1183 break;
1184 #endif
1185 default:
1186 BUG();
1189 return reg_p;
1193 static inline unsigned long get_reg(unsigned int gpreg,
1194 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1196 unsigned long *gp;
1197 gp = get_reg_p(gpreg, regs, vmcb);
1198 return *gp;
1202 static inline void set_reg(unsigned int gpreg, unsigned long value,
1203 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1205 unsigned long *gp;
1206 gp = get_reg_p(gpreg, regs, vmcb);
1207 *gp = value;
1211 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
1213 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1215 v->arch.hvm_vcpu.flag_dr_dirty = 1;
1217 __restore_debug_registers(v);
1219 /* allow the guest full access to the debug registers */
1220 vmcb->dr_intercepts = 0;
1224 static void svm_get_prefix_info(
1225 struct vmcb_struct *vmcb,
1226 unsigned int dir, segment_selector_t **seg, unsigned int *asize)
1228 unsigned char inst[MAX_INST_LEN];
1229 int i;
1231 memset(inst, 0, MAX_INST_LEN);
1232 if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
1233 != MAX_INST_LEN)
1235 printk("%s: get guest instruction failed\n", __func__);
1236 domain_crash_synchronous();
1239 for (i = 0; i < MAX_INST_LEN; i++)
1241 switch (inst[i])
1243 case 0xf3: /* REPZ */
1244 case 0xf2: /* REPNZ */
1245 case 0xf0: /* LOCK */
1246 case 0x66: /* data32 */
1247 #ifdef __x86_64__
1248 /* REX prefixes */
1249 case 0x40:
1250 case 0x41:
1251 case 0x42:
1252 case 0x43:
1253 case 0x44:
1254 case 0x45:
1255 case 0x46:
1256 case 0x47:
1258 case 0x48:
1259 case 0x49:
1260 case 0x4a:
1261 case 0x4b:
1262 case 0x4c:
1263 case 0x4d:
1264 case 0x4e:
1265 case 0x4f:
1266 #endif
1267 continue;
1268 case 0x67: /* addr32 */
1269 *asize ^= 48; /* Switch 16/32 bits */
1270 continue;
1271 case 0x2e: /* CS */
1272 *seg = &vmcb->cs;
1273 continue;
1274 case 0x36: /* SS */
1275 *seg = &vmcb->ss;
1276 continue;
1277 case 0x26: /* ES */
1278 *seg = &vmcb->es;
1279 continue;
1280 case 0x64: /* FS */
1281 *seg = &vmcb->fs;
1282 continue;
1283 case 0x65: /* GS */
1284 *seg = &vmcb->gs;
1285 continue;
1286 case 0x3e: /* DS */
1287 *seg = &vmcb->ds;
1288 continue;
1289 default:
1290 break;
1292 return;
1297 /* Get the address of INS/OUTS instruction */
1298 static inline int svm_get_io_address(
1299 struct vcpu *v,
1300 struct cpu_user_regs *regs, unsigned int dir,
1301 unsigned long *count, unsigned long *addr)
1303 unsigned long reg;
1304 unsigned int asize = 0;
1305 unsigned int isize;
1306 int long_mode;
1307 ioio_info_t info;
1308 segment_selector_t *seg = NULL;
1309 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1311 info.bytes = vmcb->exitinfo1;
1313 /* If we're in long mode, we shouldn't check the segment presence & limit */
1314 long_mode = vmcb->cs.attributes.fields.l && vmcb->efer & EFER_LMA;
1316 /* d field of cs.attributes is 1 for 32-bit, 0 for 16 or 64 bit.
1317 * l field combined with EFER_LMA -> longmode says whether it's 16 or 64 bit.
1318 */
1319 asize = (long_mode)?64:((vmcb->cs.attributes.fields.db)?32:16);
1322 /* The ins/outs instructions are single byte, so if we have got more
1323 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1324 * to figure out what it is...
1325 */
1326 isize = vmcb->exitinfo2 - vmcb->rip;
1328 if (info.fields.rep)
1329 isize --;
1331 if (isize > 1)
1333 svm_get_prefix_info(vmcb, dir, &seg, &asize);
1336 ASSERT(dir == IOREQ_READ || dir == IOREQ_WRITE);
1338 if (dir == IOREQ_WRITE)
1340 reg = regs->esi;
1341 if (!seg) /* If no prefix, used DS. */
1342 seg = &vmcb->ds;
1344 else
1346 reg = regs->edi;
1347 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1350 /* If the segment isn't present, give GP fault! */
1351 if (!long_mode && !seg->attributes.fields.p)
1353 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1354 return 0;
1357 if (asize == 16)
1359 *addr = (reg & 0xFFFF);
1360 *count = regs->ecx & 0xffff;
1362 else
1364 *addr = reg;
1365 *count = regs->ecx;
1368 if (!long_mode) {
1369 if (*addr > seg->limit)
1371 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1372 return 0;
1374 else
1376 *addr += seg->base;
1381 return 1;
1385 static void svm_io_instruction(struct vcpu *v)
1387 struct cpu_user_regs *regs;
1388 struct hvm_io_op *pio_opp;
1389 unsigned int port;
1390 unsigned int size, dir, df;
1391 ioio_info_t info;
1392 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1394 ASSERT(vmcb);
1395 pio_opp = &current->arch.hvm_vcpu.io_op;
1396 pio_opp->instr = INSTR_PIO;
1397 pio_opp->flags = 0;
1399 regs = &pio_opp->io_context;
1401 /* Copy current guest state into io instruction state structure. */
1402 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1403 hvm_store_cpu_guest_regs(v, regs, NULL);
1405 info.bytes = vmcb->exitinfo1;
1407 port = info.fields.port; /* port used to be addr */
1408 dir = info.fields.type; /* direction */
1409 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1411 if (info.fields.sz32)
1412 size = 4;
1413 else if (info.fields.sz16)
1414 size = 2;
1415 else
1416 size = 1;
1418 HVM_DBG_LOG(DBG_LEVEL_IO,
1419 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1420 "exit_qualification = %"PRIx64,
1421 port, vmcb->cs.sel, vmcb->rip, info.bytes);
1423 /* string instruction */
1424 if (info.fields.str)
1426 unsigned long addr, count;
1427 int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
1429 if (!svm_get_io_address(v, regs, dir, &count, &addr))
1431 /* We failed to get a valid address, so don't do the IO operation -
1432 * it would just get worse if we do! Hopefully the guest is handing
1433 * gp-faults...
1434 */
1435 return;
1438 /* "rep" prefix */
1439 if (info.fields.rep)
1441 pio_opp->flags |= REPZ;
1443 else
1445 count = 1;
1448 /*
1449 * Handle string pio instructions that cross pages or that
1450 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1451 */
1452 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1454 unsigned long value = 0;
1456 pio_opp->flags |= OVERLAP;
1457 pio_opp->addr = addr;
1459 if (dir == IOREQ_WRITE) /* OUTS */
1461 if (hvm_paging_enabled(current))
1462 (void)hvm_copy_from_guest_virt(&value, addr, size);
1463 else
1464 (void)hvm_copy_from_guest_phys(&value, addr, size);
1467 if (count == 1)
1468 regs->eip = vmcb->exitinfo2;
1470 send_pio_req(port, 1, size, value, dir, df, 0);
1472 else
1474 unsigned long last_addr = sign > 0 ? addr + count * size - 1
1475 : addr - (count - 1) * size;
1477 if ((addr & PAGE_MASK) != (last_addr & PAGE_MASK))
1479 if (sign > 0)
1480 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1481 else
1482 count = (addr & ~PAGE_MASK) / size + 1;
1484 else
1485 regs->eip = vmcb->exitinfo2;
1487 send_pio_req(port, count, size, addr, dir, df, 1);
1490 else
1492 /*
1493 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1494 * ExitInfo2
1495 */
1496 regs->eip = vmcb->exitinfo2;
1498 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1499 hvm_print_line(v, regs->eax); /* guest debug output */
1501 send_pio_req(port, 1, size, regs->eax, dir, df, 0);
1505 static int svm_set_cr0(unsigned long value)
1507 struct vcpu *v = current;
1508 unsigned long mfn;
1509 int paging_enabled;
1510 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1511 unsigned long old_base_mfn;
1513 ASSERT(vmcb);
1515 /* We don't want to lose PG. ET is reserved and should be always be 1*/
1516 paging_enabled = svm_paging_enabled(v);
1517 value |= X86_CR0_ET;
1518 vmcb->cr0 = value | X86_CR0_PG;
1519 v->arch.hvm_svm.cpu_shadow_cr0 = value;
1521 /* TS cleared? Then initialise FPU now. */
1522 if ( !(value & X86_CR0_TS) )
1524 setup_fpu(v);
1525 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1528 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1530 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
1532 /* The guest CR3 must be pointing to the guest physical. */
1533 mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
1534 if ( !VALID_MFN(mfn) || !get_page(mfn_to_page(mfn), v->domain))
1536 printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3);
1537 domain_crash_synchronous(); /* need to take a clean path */
1540 #if defined(__x86_64__)
1541 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
1542 && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
1543 &v->arch.hvm_svm.cpu_state))
1545 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1546 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1549 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
1551 /* Here the PAE is should to be opened */
1552 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1553 set_bit(SVM_CPU_STATE_LMA_ENABLED,
1554 &v->arch.hvm_svm.cpu_state);
1555 vmcb->efer |= (EFER_LMA | EFER_LME);
1557 #endif /* __x86_64__ */
1559 /* Now arch.guest_table points to machine physical. */
1560 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1561 v->arch.guest_table = pagetable_from_pfn(mfn);
1562 if ( old_base_mfn )
1563 put_page(mfn_to_page(old_base_mfn));
1564 shadow_update_paging_modes(v);
1566 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1567 (unsigned long) (mfn << PAGE_SHIFT));
1569 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1570 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1573 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1574 if ( v->arch.hvm_svm.cpu_cr3 ) {
1575 put_page(mfn_to_page(get_mfn_from_gpfn(
1576 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
1577 v->arch.guest_table = pagetable_null();
1580 /*
1581 * SVM implements paged real-mode and when we return to real-mode
1582 * we revert back to the physical mappings that the domain builder
1583 * created.
1584 */
1585 if ((value & X86_CR0_PE) == 0) {
1586 if (value & X86_CR0_PG) {
1587 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1588 return 0;
1590 shadow_update_paging_modes(v);
1591 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1592 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1594 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
1596 /* we should take care of this kind of situation */
1597 shadow_update_paging_modes(v);
1598 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1599 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1602 return 1;
1605 /*
1606 * Read from control registers. CR0 and CR4 are read from the shadow.
1607 */
1608 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1610 unsigned long value = 0;
1611 struct vcpu *v = current;
1612 struct vlapic *vlapic = VLAPIC(v);
1613 struct vmcb_struct *vmcb;
1615 vmcb = v->arch.hvm_svm.vmcb;
1616 ASSERT(vmcb);
1618 switch (cr)
1620 case 0:
1621 value = v->arch.hvm_svm.cpu_shadow_cr0;
1622 if (svm_dbg_on)
1623 printk("CR0 read =%lx \n", value );
1624 break;
1625 case 2:
1626 value = vmcb->cr2;
1627 break;
1628 case 3:
1629 value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
1630 if (svm_dbg_on)
1631 printk("CR3 read =%lx \n", value );
1632 break;
1633 case 4:
1634 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
1635 if (svm_dbg_on)
1636 printk("CR4 read=%lx\n", value);
1637 break;
1638 case 8:
1639 if ( vlapic == NULL )
1640 break;
1641 value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
1642 value = (value & 0xF0) >> 4;
1643 break;
1645 default:
1646 __hvm_bug(regs);
1649 set_reg(gp, value, regs, vmcb);
1651 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1655 static inline int svm_pgbit_test(struct vcpu *v)
1657 return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
1661 /*
1662 * Write to control registers
1663 */
1664 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1666 unsigned long value;
1667 unsigned long old_cr;
1668 struct vcpu *v = current;
1669 struct vlapic *vlapic = VLAPIC(v);
1670 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1672 ASSERT(vmcb);
1674 value = get_reg(gpreg, regs, vmcb);
1676 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1677 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1679 switch (cr)
1681 case 0:
1682 if (svm_dbg_on)
1683 printk("CR0 write =%lx \n", value );
1684 return svm_set_cr0(value);
1686 case 3:
1688 unsigned long old_base_mfn, mfn;
1689 if (svm_dbg_on)
1690 printk("CR3 write =%lx \n", value );
1691 /* If paging is not enabled yet, simply copy the value to CR3. */
1692 if (!svm_paging_enabled(v)) {
1693 v->arch.hvm_svm.cpu_cr3 = value;
1694 break;
1696 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1698 /* We make a new one if the shadow does not exist. */
1699 if (value == v->arch.hvm_svm.cpu_cr3)
1701 /*
1702 * This is simple TLB flush, implying the guest has
1703 * removed some translation or changed page attributes.
1704 * We simply invalidate the shadow.
1705 */
1706 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1707 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1708 __hvm_bug(regs);
1709 shadow_update_cr3(v);
1711 else
1713 /*
1714 * If different, make a shadow. Check if the PDBR is valid
1715 * first.
1716 */
1717 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1718 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1719 if ( !VALID_MFN(mfn) || !get_page(mfn_to_page(mfn), v->domain))
1721 printk("Invalid CR3 value=%lx\n", value);
1722 domain_crash_synchronous(); /* need to take a clean path */
1725 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1726 v->arch.guest_table = pagetable_from_pfn(mfn);
1728 if (old_base_mfn)
1729 put_page(mfn_to_page(old_base_mfn));
1731 v->arch.hvm_svm.cpu_cr3 = value;
1732 update_cr3(v);
1733 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1734 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1736 break;
1739 case 4: /* CR4 */
1741 if (svm_dbg_on)
1742 printk( "write cr4=%lx, cr0=%lx\n",
1743 value, v->arch.hvm_svm.cpu_shadow_cr0 );
1744 old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
1745 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1747 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1748 if ( svm_pgbit_test(v) )
1750 /* The guest is a 32-bit PAE guest. */
1751 #if CONFIG_PAGING_LEVELS >= 3
1752 unsigned long mfn, old_base_mfn;
1753 mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
1754 if ( !VALID_MFN(mfn) ||
1755 !get_page(mfn_to_page(mfn), v->domain) )
1757 printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3);
1758 domain_crash_synchronous(); /* need to take a clean path */
1761 /*
1762 * Now arch.guest_table points to machine physical.
1763 */
1765 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1766 v->arch.guest_table = pagetable_from_pfn(mfn);
1767 if ( old_base_mfn )
1768 put_page(mfn_to_page(old_base_mfn));
1769 shadow_update_paging_modes(v);
1771 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1772 (unsigned long) (mfn << PAGE_SHIFT));
1774 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1776 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1777 "Update CR3 value = %lx, mfn = %lx",
1778 v->arch.hvm_svm.cpu_cr3, mfn);
1779 #endif
1782 else if (value & X86_CR4_PAE) {
1783 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1784 } else {
1785 if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
1786 &v->arch.hvm_svm.cpu_state)) {
1787 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1789 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1792 v->arch.hvm_svm.cpu_shadow_cr4 = value;
1793 vmcb->cr4 = value | SVM_CR4_HOST_MASK;
1795 /*
1796 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1797 * all TLB entries except global entries.
1798 */
1799 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
1801 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1802 shadow_update_paging_modes(v);
1804 break;
1807 case 8:
1809 if ( vlapic == NULL )
1810 break;
1811 vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
1812 break;
1815 default:
1816 printk("invalid cr: %d\n", cr);
1817 __hvm_bug(regs);
1820 return 1;
1824 #define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
1827 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
1828 struct cpu_user_regs *regs)
1830 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1831 int inst_len = 0;
1832 int index;
1833 unsigned int gpreg;
1834 unsigned long value;
1835 u8 buffer[MAX_INST_LEN];
1836 u8 prefix = 0;
1837 int result = 1;
1838 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1839 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1840 enum instruction_index match;
1842 ASSERT(vmcb);
1844 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1846 /* get index to first actual instruction byte - as we will need to know
1847 where the prefix lives later on */
1848 index = skip_prefix_bytes(buffer, sizeof(buffer));
1850 if ( type == TYPE_MOV_TO_CR )
1852 inst_len = __get_instruction_length_from_list(
1853 vmcb, list_a, ARR_SIZE(list_a), &buffer[index], &match);
1855 else /* type == TYPE_MOV_FROM_CR */
1857 inst_len = __get_instruction_length_from_list(
1858 vmcb, list_b, ARR_SIZE(list_b), &buffer[index], &match);
1861 ASSERT(inst_len > 0);
1863 inst_len += index;
1865 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1866 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1867 prefix = buffer[index-1];
1869 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
1871 switch (match)
1873 case INSTR_MOV2CR:
1874 gpreg = decode_src_reg(prefix, buffer[index+2]);
1875 result = mov_to_cr(gpreg, cr, regs);
1876 break;
1878 case INSTR_MOVCR2:
1879 gpreg = decode_src_reg(prefix, buffer[index+2]);
1880 mov_from_cr(cr, gpreg, regs);
1881 break;
1883 case INSTR_CLTS:
1884 /* TS being cleared means that it's time to restore fpu state. */
1885 setup_fpu(current);
1886 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1887 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1888 v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
1889 break;
1891 case INSTR_LMSW:
1892 if (svm_dbg_on)
1893 svm_dump_inst(svm_rip2pointer(vmcb));
1895 gpreg = decode_src_reg(prefix, buffer[index+2]);
1896 value = get_reg(gpreg, regs, vmcb) & 0xF;
1898 if (svm_dbg_on)
1899 printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1900 inst_len);
1902 value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
1904 if (svm_dbg_on)
1905 printk("CR0-LMSW CR0 - New value=%lx\n", value);
1907 result = svm_set_cr0(value);
1908 break;
1910 case INSTR_SMSW:
1911 if (svm_dbg_on)
1912 svm_dump_inst(svm_rip2pointer(vmcb));
1913 value = v->arch.hvm_svm.cpu_shadow_cr0;
1914 gpreg = decode_src_reg(prefix, buffer[index+2]);
1915 set_reg(gpreg, value, regs, vmcb);
1917 if (svm_dbg_on)
1918 printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1919 inst_len);
1920 break;
1922 default:
1923 __hvm_bug(regs);
1924 break;
1927 ASSERT(inst_len);
1929 __update_guest_eip(vmcb, inst_len);
1931 return result;
1934 static inline void svm_do_msr_access(
1935 struct vcpu *v, struct cpu_user_regs *regs)
1937 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1938 int inst_len;
1939 u64 msr_content=0;
1940 u32 eax, edx;
1942 ASSERT(vmcb);
1944 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, "
1945 "exitinfo = %lx", (unsigned long)regs->ecx,
1946 (unsigned long)regs->eax, (unsigned long)regs->edx,
1947 (unsigned long)vmcb->exitinfo1);
1949 /* is it a read? */
1950 if (vmcb->exitinfo1 == 0)
1952 inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
1954 regs->edx = 0;
1955 switch (regs->ecx) {
1956 case MSR_IA32_TIME_STAMP_COUNTER:
1957 msr_content = hvm_get_guest_time(v);
1958 break;
1959 case MSR_IA32_SYSENTER_CS:
1960 msr_content = vmcb->sysenter_cs;
1961 break;
1962 case MSR_IA32_SYSENTER_ESP:
1963 msr_content = vmcb->sysenter_esp;
1964 break;
1965 case MSR_IA32_SYSENTER_EIP:
1966 msr_content = vmcb->sysenter_eip;
1967 break;
1968 case MSR_IA32_APICBASE:
1969 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
1970 break;
1971 default:
1972 if (long_mode_do_msr_read(regs))
1973 goto done;
1975 if ( rdmsr_hypervisor_regs(regs->ecx, &eax, &edx) )
1977 regs->eax = eax;
1978 regs->edx = edx;
1979 goto done;
1982 rdmsr_safe(regs->ecx, regs->eax, regs->edx);
1983 break;
1985 regs->eax = msr_content & 0xFFFFFFFF;
1986 regs->edx = msr_content >> 32;
1988 else
1990 inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
1991 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
1993 switch (regs->ecx)
1995 case MSR_IA32_TIME_STAMP_COUNTER:
1996 hvm_set_guest_time(v, msr_content);
1997 break;
1998 case MSR_IA32_SYSENTER_CS:
1999 vmcb->sysenter_cs = msr_content;
2000 break;
2001 case MSR_IA32_SYSENTER_ESP:
2002 vmcb->sysenter_esp = msr_content;
2003 break;
2004 case MSR_IA32_SYSENTER_EIP:
2005 vmcb->sysenter_eip = msr_content;
2006 break;
2007 case MSR_IA32_APICBASE:
2008 vlapic_msr_set(VLAPIC(v), msr_content);
2009 break;
2010 default:
2011 if ( !long_mode_do_msr_write(regs) )
2012 wrmsr_hypervisor_regs(regs->ecx, regs->eax, regs->edx);
2013 break;
2017 done:
2019 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: "
2020 "ecx=%lx, eax=%lx, edx=%lx",
2021 (unsigned long)regs->ecx, (unsigned long)regs->eax,
2022 (unsigned long)regs->edx);
2024 __update_guest_eip(vmcb, inst_len);
2028 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
2030 __update_guest_eip(vmcb, 1);
2032 /* Check for interrupt not handled or new interrupt. */
2033 if ( (vmcb->rflags & X86_EFLAGS_IF) &&
2034 (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) )
2035 return;
2037 hvm_hlt(vmcb->rflags);
2041 static void svm_vmexit_do_invd(struct vmcb_struct *vmcb)
2043 int inst_len;
2045 /* Invalidate the cache - we can't really do that safely - maybe we should
2046 * WBINVD, but I think it's just fine to completely ignore it - we should
2047 * have cache-snooping that solves it anyways. -- Mats P.
2048 */
2050 /* Tell the user that we did this - just in case someone runs some really
2051 * weird operating system and wants to know why it's not working...
2052 */
2053 printk("INVD instruction intercepted - ignored\n");
2055 inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL);
2056 __update_guest_eip(vmcb, inst_len);
2062 #ifdef XEN_DEBUGGER
2063 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb,
2064 struct cpu_user_regs *regs)
2066 regs->eip = vmcb->rip;
2067 regs->esp = vmcb->rsp;
2068 regs->eflags = vmcb->rflags;
2070 regs->xcs = vmcb->cs.sel;
2071 regs->xds = vmcb->ds.sel;
2072 regs->xes = vmcb->es.sel;
2073 regs->xfs = vmcb->fs.sel;
2074 regs->xgs = vmcb->gs.sel;
2075 regs->xss = vmcb->ss.sel;
2079 static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
2081 vmcb->ss.sel = regs->xss;
2082 vmcb->rsp = regs->esp;
2083 vmcb->rflags = regs->eflags;
2084 vmcb->cs.sel = regs->xcs;
2085 vmcb->rip = regs->eip;
2087 vmcb->gs.sel = regs->xgs;
2088 vmcb->fs.sel = regs->xfs;
2089 vmcb->es.sel = regs->xes;
2090 vmcb->ds.sel = regs->xds;
2092 #endif
2095 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
2097 struct vcpu *v = current;
2098 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
2099 unsigned long g_vaddr;
2100 int inst_len;
2101 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2103 ASSERT(vmcb);
2104 /*
2105 * Unknown how many bytes the invlpg instruction will take. Use the
2106 * maximum instruction length here
2107 */
2108 if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
2110 printk("svm_handle_invlpg (): Error reading memory %d bytes\n",
2111 length);
2112 __hvm_bug(regs);
2115 if (invlpga)
2117 inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
2118 ASSERT(inst_len > 0);
2119 __update_guest_eip(vmcb, inst_len);
2121 /*
2122 * The address is implicit on this instruction. At the moment, we don't
2123 * use ecx (ASID) to identify individual guests pages
2124 */
2125 g_vaddr = regs->eax;
2127 else
2129 /* What about multiple prefix codes? */
2130 prefix = (is_prefix(opcode[0])?opcode[0]:0);
2131 inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
2132 ASSERT(inst_len > 0);
2134 inst_len--;
2135 length -= inst_len;
2137 /*
2138 * Decode memory operand of the instruction including ModRM, SIB, and
2139 * displacement to get effecticve address and length in bytes. Assume
2140 * the system in either 32- or 64-bit mode.
2141 */
2142 g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix,
2143 &opcode[inst_len], &length);
2145 inst_len += length;
2146 __update_guest_eip (vmcb, inst_len);
2149 /* Overkill, we may not this */
2150 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
2151 shadow_invlpg(v, g_vaddr);
2155 /*
2156 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2157 * 16-bit realmode. Basically, this mimics a processor reset.
2159 * returns 0 on success, non-zero otherwise
2160 */
2161 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
2162 struct cpu_user_regs *regs)
2164 struct vmcb_struct *vmcb;
2166 ASSERT(v);
2167 ASSERT(regs);
2169 vmcb = v->arch.hvm_svm.vmcb;
2171 ASSERT(vmcb);
2173 /* clear the vmcb and user regs */
2174 memset(regs, 0, sizeof(struct cpu_user_regs));
2176 /* VMCB Control */
2177 vmcb->tsc_offset = 0;
2179 /* VMCB State */
2180 vmcb->cr0 = X86_CR0_ET | X86_CR0_PG;
2181 v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
2183 vmcb->cr2 = 0;
2184 vmcb->efer = EFER_SVME;
2186 vmcb->cr4 = SVM_CR4_HOST_MASK;
2187 v->arch.hvm_svm.cpu_shadow_cr4 = 0;
2188 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
2190 /* This will jump to ROMBIOS */
2191 vmcb->rip = 0xFFF0;
2193 /* setup the segment registers and all their hidden states */
2194 vmcb->cs.sel = 0xF000;
2195 vmcb->cs.attributes.bytes = 0x089b;
2196 vmcb->cs.limit = 0xffff;
2197 vmcb->cs.base = 0x000F0000;
2199 vmcb->ss.sel = 0x00;
2200 vmcb->ss.attributes.bytes = 0x0893;
2201 vmcb->ss.limit = 0xffff;
2202 vmcb->ss.base = 0x00;
2204 vmcb->ds.sel = 0x00;
2205 vmcb->ds.attributes.bytes = 0x0893;
2206 vmcb->ds.limit = 0xffff;
2207 vmcb->ds.base = 0x00;
2209 vmcb->es.sel = 0x00;
2210 vmcb->es.attributes.bytes = 0x0893;
2211 vmcb->es.limit = 0xffff;
2212 vmcb->es.base = 0x00;
2214 vmcb->fs.sel = 0x00;
2215 vmcb->fs.attributes.bytes = 0x0893;
2216 vmcb->fs.limit = 0xffff;
2217 vmcb->fs.base = 0x00;
2219 vmcb->gs.sel = 0x00;
2220 vmcb->gs.attributes.bytes = 0x0893;
2221 vmcb->gs.limit = 0xffff;
2222 vmcb->gs.base = 0x00;
2224 vmcb->ldtr.sel = 0x00;
2225 vmcb->ldtr.attributes.bytes = 0x0000;
2226 vmcb->ldtr.limit = 0x0;
2227 vmcb->ldtr.base = 0x00;
2229 vmcb->gdtr.sel = 0x00;
2230 vmcb->gdtr.attributes.bytes = 0x0000;
2231 vmcb->gdtr.limit = 0x0;
2232 vmcb->gdtr.base = 0x00;
2234 vmcb->tr.sel = 0;
2235 vmcb->tr.attributes.bytes = 0;
2236 vmcb->tr.limit = 0x0;
2237 vmcb->tr.base = 0;
2239 vmcb->idtr.sel = 0x00;
2240 vmcb->idtr.attributes.bytes = 0x0000;
2241 vmcb->idtr.limit = 0x3ff;
2242 vmcb->idtr.base = 0x00;
2244 vmcb->rax = 0;
2245 vmcb->rsp = 0;
2247 return 0;
2251 /*
2252 * svm_do_vmmcall - SVM VMMCALL handler
2254 * returns 0 on success, non-zero otherwise
2255 */
2256 static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
2258 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2259 int inst_len;
2261 ASSERT(vmcb);
2262 ASSERT(regs);
2264 inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
2265 ASSERT(inst_len > 0);
2267 if ( regs->eax & 0x80000000 )
2269 /* VMMCALL sanity check */
2270 if ( vmcb->cpl > get_vmmcall_cpl(regs->edi) )
2272 printk("VMMCALL CPL check failed\n");
2273 return -1;
2276 /* handle the request */
2277 switch ( regs->eax )
2279 case VMMCALL_RESET_TO_REALMODE:
2280 if ( svm_do_vmmcall_reset_to_realmode(v, regs) )
2282 printk("svm_do_vmmcall_reset_to_realmode() failed\n");
2283 return -1;
2285 /* since we just reset the VMCB, return without adjusting
2286 * the eip */
2287 return 0;
2289 case VMMCALL_DEBUG:
2290 printk("DEBUG features not implemented yet\n");
2291 break;
2292 default:
2293 break;
2296 hvm_print_line(v, regs->eax); /* provides the current domain */
2298 else
2300 hvm_do_hypercall(regs);
2303 __update_guest_eip(vmcb, inst_len);
2304 return 0;
2308 void svm_dump_inst(unsigned long eip)
2310 u8 opcode[256];
2311 unsigned long ptr;
2312 int len;
2313 int i;
2315 ptr = eip & ~0xff;
2316 len = 0;
2318 if (hvm_copy_from_guest_virt(opcode, ptr, sizeof(opcode)) == 0)
2319 len = sizeof(opcode);
2321 printk("Code bytes around(len=%d) %lx:", len, eip);
2322 for (i = 0; i < len; i++)
2324 if ((i & 0x0f) == 0)
2325 printk("\n%08lx:", ptr+i);
2327 printk("%02x ", opcode[i]);
2330 printk("\n");
2334 void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
2336 struct vcpu *v = current;
2337 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2338 unsigned long pt = v->arch.hvm_vcpu.hw_cr3;
2340 printk("%s: guest registers from %s:\n", __func__, from);
2341 #if defined (__x86_64__)
2342 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
2343 regs->rax, regs->rbx, regs->rcx);
2344 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
2345 regs->rdx, regs->rsi, regs->rdi);
2346 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
2347 regs->rbp, regs->rsp, regs->r8);
2348 printk("r9: %016lx r10: %016lx r11: %016lx\n",
2349 regs->r9, regs->r10, regs->r11);
2350 printk("r12: %016lx r13: %016lx r14: %016lx\n",
2351 regs->r12, regs->r13, regs->r14);
2352 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
2353 regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
2354 #else
2355 printk("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n",
2356 regs->eax, regs->ebx, regs->ecx, regs->edx);
2357 printk("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n",
2358 regs->edi, regs->esi, regs->ebp, regs->esp);
2359 printk("%s: guest cr0: %lx\n", __func__,
2360 v->arch.hvm_svm.cpu_shadow_cr0);
2361 printk("guest CR3 = %llx\n", vmcb->cr3);
2362 #endif
2363 printk("%s: pt = %lx\n", __func__, pt);
2367 void svm_dump_host_regs(const char *from)
2369 struct vcpu *v = current;
2370 unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
2371 unsigned long cr3, cr0;
2372 printk("Host registers at %s\n", from);
2374 __asm__ __volatile__ ("\tmov %%cr0,%0\n"
2375 "\tmov %%cr3,%1\n"
2376 : "=r" (cr0), "=r"(cr3));
2377 printk("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
2380 #ifdef SVM_EXTRA_DEBUG
2381 static char *exit_reasons[] = {
2382 [VMEXIT_CR0_READ] = "CR0_READ",
2383 [VMEXIT_CR1_READ] = "CR1_READ",
2384 [VMEXIT_CR2_READ] = "CR2_READ",
2385 [VMEXIT_CR3_READ] = "CR3_READ",
2386 [VMEXIT_CR4_READ] = "CR4_READ",
2387 [VMEXIT_CR5_READ] = "CR5_READ",
2388 [VMEXIT_CR6_READ] = "CR6_READ",
2389 [VMEXIT_CR7_READ] = "CR7_READ",
2390 [VMEXIT_CR8_READ] = "CR8_READ",
2391 [VMEXIT_CR9_READ] = "CR9_READ",
2392 [VMEXIT_CR10_READ] = "CR10_READ",
2393 [VMEXIT_CR11_READ] = "CR11_READ",
2394 [VMEXIT_CR12_READ] = "CR12_READ",
2395 [VMEXIT_CR13_READ] = "CR13_READ",
2396 [VMEXIT_CR14_READ] = "CR14_READ",
2397 [VMEXIT_CR15_READ] = "CR15_READ",
2398 [VMEXIT_CR0_WRITE] = "CR0_WRITE",
2399 [VMEXIT_CR1_WRITE] = "CR1_WRITE",
2400 [VMEXIT_CR2_WRITE] = "CR2_WRITE",
2401 [VMEXIT_CR3_WRITE] = "CR3_WRITE",
2402 [VMEXIT_CR4_WRITE] = "CR4_WRITE",
2403 [VMEXIT_CR5_WRITE] = "CR5_WRITE",
2404 [VMEXIT_CR6_WRITE] = "CR6_WRITE",
2405 [VMEXIT_CR7_WRITE] = "CR7_WRITE",
2406 [VMEXIT_CR8_WRITE] = "CR8_WRITE",
2407 [VMEXIT_CR9_WRITE] = "CR9_WRITE",
2408 [VMEXIT_CR10_WRITE] = "CR10_WRITE",
2409 [VMEXIT_CR11_WRITE] = "CR11_WRITE",
2410 [VMEXIT_CR12_WRITE] = "CR12_WRITE",
2411 [VMEXIT_CR13_WRITE] = "CR13_WRITE",
2412 [VMEXIT_CR14_WRITE] = "CR14_WRITE",
2413 [VMEXIT_CR15_WRITE] = "CR15_WRITE",
2414 [VMEXIT_DR0_READ] = "DR0_READ",
2415 [VMEXIT_DR1_READ] = "DR1_READ",
2416 [VMEXIT_DR2_READ] = "DR2_READ",
2417 [VMEXIT_DR3_READ] = "DR3_READ",
2418 [VMEXIT_DR4_READ] = "DR4_READ",
2419 [VMEXIT_DR5_READ] = "DR5_READ",
2420 [VMEXIT_DR6_READ] = "DR6_READ",
2421 [VMEXIT_DR7_READ] = "DR7_READ",
2422 [VMEXIT_DR8_READ] = "DR8_READ",
2423 [VMEXIT_DR9_READ] = "DR9_READ",
2424 [VMEXIT_DR10_READ] = "DR10_READ",
2425 [VMEXIT_DR11_READ] = "DR11_READ",
2426 [VMEXIT_DR12_READ] = "DR12_READ",
2427 [VMEXIT_DR13_READ] = "DR13_READ",
2428 [VMEXIT_DR14_READ] = "DR14_READ",
2429 [VMEXIT_DR15_READ] = "DR15_READ",
2430 [VMEXIT_DR0_WRITE] = "DR0_WRITE",
2431 [VMEXIT_DR1_WRITE] = "DR1_WRITE",
2432 [VMEXIT_DR2_WRITE] = "DR2_WRITE",
2433 [VMEXIT_DR3_WRITE] = "DR3_WRITE",
2434 [VMEXIT_DR4_WRITE] = "DR4_WRITE",
2435 [VMEXIT_DR5_WRITE] = "DR5_WRITE",
2436 [VMEXIT_DR6_WRITE] = "DR6_WRITE",
2437 [VMEXIT_DR7_WRITE] = "DR7_WRITE",
2438 [VMEXIT_DR8_WRITE] = "DR8_WRITE",
2439 [VMEXIT_DR9_WRITE] = "DR9_WRITE",
2440 [VMEXIT_DR10_WRITE] = "DR10_WRITE",
2441 [VMEXIT_DR11_WRITE] = "DR11_WRITE",
2442 [VMEXIT_DR12_WRITE] = "DR12_WRITE",
2443 [VMEXIT_DR13_WRITE] = "DR13_WRITE",
2444 [VMEXIT_DR14_WRITE] = "DR14_WRITE",
2445 [VMEXIT_DR15_WRITE] = "DR15_WRITE",
2446 [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
2447 [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
2448 [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
2449 [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
2450 [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
2451 [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
2452 [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
2453 [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
2454 [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
2455 [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
2456 [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
2457 [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
2458 [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
2459 [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
2460 [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
2461 [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
2462 [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
2463 [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
2464 [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
2465 [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
2466 [VMEXIT_INTR] = "INTR",
2467 [VMEXIT_NMI] = "NMI",
2468 [VMEXIT_SMI] = "SMI",
2469 [VMEXIT_INIT] = "INIT",
2470 [VMEXIT_VINTR] = "VINTR",
2471 [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
2472 [VMEXIT_IDTR_READ] = "IDTR_READ",
2473 [VMEXIT_GDTR_READ] = "GDTR_READ",
2474 [VMEXIT_LDTR_READ] = "LDTR_READ",
2475 [VMEXIT_TR_READ] = "TR_READ",
2476 [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
2477 [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
2478 [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
2479 [VMEXIT_TR_WRITE] = "TR_WRITE",
2480 [VMEXIT_RDTSC] = "RDTSC",
2481 [VMEXIT_RDPMC] = "RDPMC",
2482 [VMEXIT_PUSHF] = "PUSHF",
2483 [VMEXIT_POPF] = "POPF",
2484 [VMEXIT_CPUID] = "CPUID",
2485 [VMEXIT_RSM] = "RSM",
2486 [VMEXIT_IRET] = "IRET",
2487 [VMEXIT_SWINT] = "SWINT",
2488 [VMEXIT_INVD] = "INVD",
2489 [VMEXIT_PAUSE] = "PAUSE",
2490 [VMEXIT_HLT] = "HLT",
2491 [VMEXIT_INVLPG] = "INVLPG",
2492 [VMEXIT_INVLPGA] = "INVLPGA",
2493 [VMEXIT_IOIO] = "IOIO",
2494 [VMEXIT_MSR] = "MSR",
2495 [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
2496 [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
2497 [VMEXIT_SHUTDOWN] = "SHUTDOWN",
2498 [VMEXIT_VMRUN] = "VMRUN",
2499 [VMEXIT_VMMCALL] = "VMMCALL",
2500 [VMEXIT_VMLOAD] = "VMLOAD",
2501 [VMEXIT_VMSAVE] = "VMSAVE",
2502 [VMEXIT_STGI] = "STGI",
2503 [VMEXIT_CLGI] = "CLGI",
2504 [VMEXIT_SKINIT] = "SKINIT",
2505 [VMEXIT_RDTSCP] = "RDTSCP",
2506 [VMEXIT_ICEBP] = "ICEBP",
2507 [VMEXIT_NPF] = "NPF"
2508 };
2509 #endif /* SVM_EXTRA_DEBUG */
2511 #ifdef SVM_WALK_GUEST_PAGES
2512 void walk_shadow_and_guest_pt(unsigned long gva)
2514 l2_pgentry_t gpde;
2515 l2_pgentry_t spde;
2516 l1_pgentry_t gpte;
2517 l1_pgentry_t spte;
2518 struct vcpu *v = current;
2519 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2520 paddr_t gpa;
2522 gpa = shadow_gva_to_gpa(current, gva);
2523 printk("gva = %lx, gpa=%"PRIpaddr", gCR3=%x\n", gva, gpa, (u32)vmcb->cr3);
2524 if( !svm_paging_enabled(v) || mmio_space(gpa) )
2525 return;
2527 /* let's dump the guest and shadow page info */
2529 __guest_get_l2e(v, gva, &gpde);
2530 printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
2531 __shadow_get_l2e( v, gva, &spde );
2532 printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
2534 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2535 return;
2537 spte = l1e_empty();
2539 /* This is actually overkill - we only need to ensure the hl2 is in-sync.*/
2540 shadow_sync_va(v, gva);
2542 gpte.l1 = 0;
2543 __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ],
2544 sizeof(gpte) );
2545 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
2547 BUG(); // need to think about this, and convert usage of
2548 // phys_to_machine_mapping to use pagetable format...
2549 __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
2550 sizeof(spte) );
2552 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
2554 #endif /* SVM_WALK_GUEST_PAGES */
2557 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
2559 unsigned int exit_reason;
2560 unsigned long eip;
2561 struct vcpu *v = current;
2562 int error;
2563 int do_debug = 0;
2564 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2566 ASSERT(vmcb);
2568 exit_reason = vmcb->exitcode;
2569 save_svm_cpu_user_regs(v, regs);
2571 vmcb->tlb_control = 1;
2572 v->arch.hvm_svm.inject_event = 0;
2574 if (exit_reason == VMEXIT_INVALID)
2576 svm_dump_vmcb(__func__, vmcb);
2577 domain_crash_synchronous();
2580 #ifdef SVM_EXTRA_DEBUG
2582 #if defined(__i386__)
2583 #define rip eip
2584 #endif
2586 static unsigned long intercepts_counter = 0;
2588 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF)
2590 if (svm_paging_enabled(v) &&
2591 !mmio_space(shadow_gva_to_gpa(current, vmcb->exitinfo2)))
2593 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2594 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64", "
2595 "gpa=%"PRIx64"\n", intercepts_counter,
2596 exit_reasons[exit_reason], exit_reason, regs->cs,
2597 (u64)regs->rip,
2598 (u64)vmcb->exitinfo1,
2599 (u64)vmcb->exitinfo2,
2600 (u64)vmcb->exitintinfo.bytes,
2601 (u64)shadow_gva_to_gpa(current, vmcb->exitinfo2));
2603 else
2605 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2606 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2607 intercepts_counter,
2608 exit_reasons[exit_reason], exit_reason, regs->cs,
2609 (u64)regs->rip,
2610 (u64)vmcb->exitinfo1,
2611 (u64)vmcb->exitinfo2,
2612 (u64)vmcb->exitintinfo.bytes );
2615 else if ( svm_dbg_on
2616 && exit_reason != VMEXIT_IOIO
2617 && exit_reason != VMEXIT_INTR)
2620 if (exit_reasons[exit_reason])
2622 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2623 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2624 intercepts_counter,
2625 exit_reasons[exit_reason], exit_reason, regs->cs,
2626 (u64)regs->rip,
2627 (u64)vmcb->exitinfo1,
2628 (u64)vmcb->exitinfo2,
2629 (u64)vmcb->exitintinfo.bytes);
2631 else
2633 printk("I%08ld,ExC=%d(0x%x),IP=%x:%"PRIx64","
2634 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2635 intercepts_counter, exit_reason, exit_reason, regs->cs,
2636 (u64)regs->rip,
2637 (u64)vmcb->exitinfo1,
2638 (u64)vmcb->exitinfo2,
2639 (u64)vmcb->exitintinfo.bytes);
2643 #ifdef SVM_WALK_GUEST_PAGES
2644 if( exit_reason == VMEXIT_EXCEPTION_PF
2645 && ( ( vmcb->exitinfo2 == vmcb->rip )
2646 || vmcb->exitintinfo.bytes) )
2648 if ( svm_paging_enabled(v) &&
2649 !mmio_space(gva_to_gpa(vmcb->exitinfo2)) )
2650 walk_shadow_and_guest_pt(vmcb->exitinfo2);
2652 #endif
2654 intercepts_counter++;
2656 #if 0
2657 if (svm_dbg_on)
2658 do_debug = svm_do_debugout(exit_reason);
2659 #endif
2661 if (do_debug)
2663 printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
2664 "hw_cr3 = 0x%16lx\n",
2665 __func__,
2666 (int) v->arch.guest_table.pfn,
2667 (int) v->arch.monitor_table.pfn,
2668 (long unsigned int) v->arch.hvm_vcpu.hw_cr3);
2670 svm_dump_vmcb(__func__, vmcb);
2671 svm_dump_regs(__func__, regs);
2672 svm_dump_inst(svm_rip2pointer(vmcb));
2675 #if defined(__i386__)
2676 #undef rip
2677 #endif
2680 #endif /* SVM_EXTRA_DEBUG */
2683 perfc_incra(svmexits, exit_reason);
2684 eip = vmcb->rip;
2686 #ifdef SVM_EXTRA_DEBUG
2687 if (do_debug)
2689 printk("eip = %lx, exit_reason = %d (0x%x)\n",
2690 eip, exit_reason, exit_reason);
2692 #endif /* SVM_EXTRA_DEBUG */
2694 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
2696 switch (exit_reason)
2698 case VMEXIT_EXCEPTION_DB:
2700 #ifdef XEN_DEBUGGER
2701 svm_debug_save_cpu_user_regs(regs);
2702 pdb_handle_exception(1, regs, 1);
2703 svm_debug_restore_cpu_user_regs(regs);
2704 #else
2705 svm_store_cpu_user_regs(regs, v);
2706 domain_pause_for_debugger();
2707 #endif
2709 break;
2711 case VMEXIT_NMI:
2712 break;
2714 case VMEXIT_SMI:
2715 /*
2716 * For asynchronous SMI's, we just need to allow global interrupts
2717 * so that the SMI is taken properly in the context of the host. The
2718 * standard code does a STGI after the VMEXIT which should accomplish
2719 * this task. Continue as normal and restart the guest.
2720 */
2721 break;
2723 case VMEXIT_INIT:
2724 /*
2725 * Nothing to do, in fact we should never get to this point.
2726 */
2727 break;
2729 case VMEXIT_EXCEPTION_BP:
2730 #ifdef XEN_DEBUGGER
2731 svm_debug_save_cpu_user_regs(regs);
2732 pdb_handle_exception(3, regs, 1);
2733 svm_debug_restore_cpu_user_regs(regs);
2734 #else
2735 if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) )
2736 domain_pause_for_debugger();
2737 else
2738 svm_inject_exception(v, TRAP_int3, 0, 0);
2739 #endif
2740 break;
2742 case VMEXIT_EXCEPTION_NM:
2743 svm_do_no_device_fault(vmcb);
2744 break;
2746 case VMEXIT_EXCEPTION_GP:
2747 /* This should probably not be trapped in the future */
2748 regs->error_code = vmcb->exitinfo1;
2749 svm_do_general_protection_fault(v, regs);
2750 break;
2752 case VMEXIT_EXCEPTION_PF:
2754 unsigned long va;
2755 va = vmcb->exitinfo2;
2756 regs->error_code = vmcb->exitinfo1;
2757 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2758 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2759 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2760 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2761 (unsigned long)regs->esi, (unsigned long)regs->edi);
2763 if (!(error = svm_do_page_fault(va, regs)))
2765 /* Inject #PG using Interruption-Information Fields */
2766 svm_inject_exception(v, TRAP_page_fault, 1, regs->error_code);
2768 v->arch.hvm_svm.cpu_cr2 = va;
2769 vmcb->cr2 = va;
2770 TRACE_3D(TRC_VMX_INTR, v->domain->domain_id,
2771 VMEXIT_EXCEPTION_PF, va);
2773 break;
2776 case VMEXIT_EXCEPTION_DF:
2777 /* Debug info to hopefully help debug WHY the guest double-faulted. */
2778 svm_dump_vmcb(__func__, vmcb);
2779 svm_dump_regs(__func__, regs);
2780 svm_dump_inst(svm_rip2pointer(vmcb));
2781 svm_inject_exception(v, TRAP_double_fault, 1, 0);
2782 break;
2784 case VMEXIT_VINTR:
2785 vmcb->vintr.fields.irq = 0;
2786 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
2787 break;
2789 case VMEXIT_INTR:
2790 break;
2792 case VMEXIT_INVD:
2793 svm_vmexit_do_invd(vmcb);
2794 break;
2796 case VMEXIT_GDTR_WRITE:
2797 printk("WRITE to GDTR\n");
2798 break;
2800 case VMEXIT_TASK_SWITCH:
2801 __hvm_bug(regs);
2802 break;
2804 case VMEXIT_CPUID:
2805 svm_vmexit_do_cpuid(vmcb, regs->eax, regs);
2806 break;
2808 case VMEXIT_HLT:
2809 svm_vmexit_do_hlt(vmcb);
2810 break;
2812 case VMEXIT_INVLPG:
2813 svm_handle_invlpg(0, regs);
2814 break;
2816 case VMEXIT_INVLPGA:
2817 svm_handle_invlpg(1, regs);
2818 break;
2820 case VMEXIT_VMMCALL:
2821 svm_do_vmmcall(v, regs);
2822 break;
2824 case VMEXIT_CR0_READ:
2825 svm_cr_access(v, 0, TYPE_MOV_FROM_CR, regs);
2826 break;
2828 case VMEXIT_CR2_READ:
2829 svm_cr_access(v, 2, TYPE_MOV_FROM_CR, regs);
2830 break;
2832 case VMEXIT_CR3_READ:
2833 svm_cr_access(v, 3, TYPE_MOV_FROM_CR, regs);
2834 break;
2836 case VMEXIT_CR4_READ:
2837 svm_cr_access(v, 4, TYPE_MOV_FROM_CR, regs);
2838 break;
2840 case VMEXIT_CR8_READ:
2841 svm_cr_access(v, 8, TYPE_MOV_FROM_CR, regs);
2842 break;
2844 case VMEXIT_CR0_WRITE:
2845 svm_cr_access(v, 0, TYPE_MOV_TO_CR, regs);
2846 break;
2848 case VMEXIT_CR2_WRITE:
2849 svm_cr_access(v, 2, TYPE_MOV_TO_CR, regs);
2850 break;
2852 case VMEXIT_CR3_WRITE:
2853 svm_cr_access(v, 3, TYPE_MOV_TO_CR, regs);
2854 local_flush_tlb();
2855 break;
2857 case VMEXIT_CR4_WRITE:
2858 svm_cr_access(v, 4, TYPE_MOV_TO_CR, regs);
2859 break;
2861 case VMEXIT_CR8_WRITE:
2862 svm_cr_access(v, 8, TYPE_MOV_TO_CR, regs);
2863 break;
2865 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
2866 svm_dr_access(v, regs);
2867 break;
2869 case VMEXIT_IOIO:
2870 svm_io_instruction(v);
2871 break;
2873 case VMEXIT_MSR:
2874 svm_do_msr_access(v, regs);
2875 break;
2877 case VMEXIT_SHUTDOWN:
2878 printk("Guest shutdown exit\n");
2879 domain_crash_synchronous();
2880 break;
2882 default:
2883 printk("unexpected VMEXIT: exit reason = 0x%x, exitinfo1 = %"PRIx64", "
2884 "exitinfo2 = %"PRIx64"\n", exit_reason,
2885 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
2886 __hvm_bug(regs); /* should not happen */
2887 break;
2890 #ifdef SVM_EXTRA_DEBUG
2891 if (do_debug)
2893 printk("%s: Done switch on vmexit_code\n", __func__);
2894 svm_dump_regs(__func__, regs);
2897 if (do_debug)
2899 printk("vmexit_handler():- guest_table = 0x%08x, "
2900 "monitor_table = 0x%08x, hw_cr3 = 0x%16x\n",
2901 (int)v->arch.guest_table.pfn,
2902 (int)v->arch.monitor_table.pfn,
2903 (int)v->arch.hvm_vcpu.hw_cr3);
2904 printk("svm_vmexit_handler: Returning\n");
2906 #endif
2908 return;
2911 asmlinkage void svm_load_cr2(void)
2913 struct vcpu *v = current;
2915 local_irq_disable();
2916 asm volatile("mov %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
2919 asmlinkage void svm_asid(void)
2921 struct vcpu *v = current;
2922 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2924 /*
2925 * if need to assign new asid, or if switching cores,
2926 * retire asid for the old core, and assign a new asid to the current core.
2927 */
2928 if ( test_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ) ||
2929 ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) {
2930 /* recycle asid */
2931 if ( !asidpool_assign_next(vmcb, 1,
2932 v->arch.hvm_svm.asid_core,
2933 v->arch.hvm_svm.launch_core) )
2935 /* If we get here, we have a major problem */
2936 domain_crash_synchronous();
2939 v->arch.hvm_svm.asid_core = v->arch.hvm_svm.launch_core;
2940 clear_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags );
2944 /*
2945 * Local variables:
2946 * mode: C
2947 * c-set-style: "BSD"
2948 * c-basic-offset: 4
2949 * tab-width: 4
2950 * indent-tabs-mode: nil
2951 * End:
2952 */