ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 11798:f14a67a35bec

[HVM][SVM] Use proper name for the K8 VM_CR MSR.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Oct 12 17:53:51 2006 +0100 (2006-10-12)
parents d845c9522d9e
children 37ee88ca1440
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005, AMD Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <xen/hypercall.h>
29 #include <xen/domain_page.h>
30 #include <asm/current.h>
31 #include <asm/io.h>
32 #include <asm/shadow.h>
33 #include <asm/regs.h>
34 #include <asm/cpufeature.h>
35 #include <asm/processor.h>
36 #include <asm/types.h>
37 #include <asm/msr.h>
38 #include <asm/spinlock.h>
39 #include <asm/hvm/hvm.h>
40 #include <asm/hvm/support.h>
41 #include <asm/hvm/io.h>
42 #include <asm/hvm/svm/svm.h>
43 #include <asm/hvm/svm/vmcb.h>
44 #include <asm/hvm/svm/emulate.h>
45 #include <asm/hvm/svm/vmmcall.h>
46 #include <asm/hvm/svm/intr.h>
47 #include <asm/x86_emulate.h>
48 #include <public/sched.h>
50 #define SVM_EXTRA_DEBUG
52 #define set_segment_register(name, value) \
53 __asm__ __volatile__ ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
55 /* External functions. We should move these to some suitable header file(s) */
57 extern void do_nmi(struct cpu_user_regs *, unsigned long);
58 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
59 int inst_len);
60 extern uint32_t vlapic_update_ppr(struct vlapic *vlapic);
61 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
62 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
63 unsigned long count, int size, long value, int dir, int pvalid);
64 extern void svm_dump_inst(unsigned long eip);
65 extern int svm_dbg_on;
66 void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
68 static void svm_relinquish_guest_resources(struct domain *d);
69 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
70 struct cpu_user_regs *regs);
72 /* va of hardware host save area */
73 static void *hsa[NR_CPUS] __read_mostly;
75 /* vmcb used for extended host state */
76 static void *root_vmcb[NR_CPUS] __read_mostly;
78 /* physical address of above for host VMSAVE/VMLOAD */
79 u64 root_vmcb_pa[NR_CPUS] __read_mostly;
82 /* ASID API */
83 enum {
84 ASID_AVAILABLE = 0,
85 ASID_INUSE,
86 ASID_RETIRED
87 };
88 #define INITIAL_ASID 0
89 #define ASID_MAX 64
91 struct asid_pool {
92 spinlock_t asid_lock;
93 u32 asid[ASID_MAX];
94 };
96 static DEFINE_PER_CPU(struct asid_pool, asid_pool);
99 /*
100 * Initializes the POOL of ASID used by the guests per core.
101 */
102 void asidpool_init(int core)
103 {
104 int i;
106 spin_lock_init(&per_cpu(asid_pool,core).asid_lock);
108 /* Host ASID is always in use */
109 per_cpu(asid_pool,core).asid[INITIAL_ASID] = ASID_INUSE;
110 for ( i = 1; i < ASID_MAX; i++ )
111 per_cpu(asid_pool,core).asid[i] = ASID_AVAILABLE;
112 }
115 /* internal function to get the next available ASID */
116 static int asidpool_fetch_next(struct vmcb_struct *vmcb, int core)
117 {
118 int i;
119 for ( i = 1; i < ASID_MAX; i++ )
120 {
121 if ( per_cpu(asid_pool,core).asid[i] == ASID_AVAILABLE )
122 {
123 vmcb->guest_asid = i;
124 per_cpu(asid_pool,core).asid[i] = ASID_INUSE;
125 return i;
126 }
127 }
128 return -1;
129 }
132 /*
133 * This functions assigns on the passed VMCB, the next
134 * available ASID number. If none are available, the
135 * TLB flush flag is set, and all retireds ASID
136 * are made available.
137 *
138 * Returns: 1 -- sucess;
139 * 0 -- failure -- no more ASID numbers
140 * available.
141 */
142 int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
143 int oldcore, int newcore )
144 {
145 int i;
146 int res = 1;
147 static unsigned long cnt=0;
149 spin_lock(&per_cpu(asid_pool,oldcore).asid_lock);
150 if( retire_current && vmcb->guest_asid ) {
151 per_cpu(asid_pool,oldcore).asid[vmcb->guest_asid & (ASID_MAX-1)] =
152 ASID_RETIRED;
153 }
154 spin_unlock(&per_cpu(asid_pool,oldcore).asid_lock);
155 spin_lock(&per_cpu(asid_pool,newcore).asid_lock);
156 if( asidpool_fetch_next( vmcb, newcore ) < 0 ) {
157 if (svm_dbg_on)
158 printk( "SVM: tlb(%ld)\n", cnt++ );
159 /* FLUSH the TLB and all retired slots are made available */
160 vmcb->tlb_control = 1;
161 for( i = 1; i < ASID_MAX; i++ ) {
162 if( per_cpu(asid_pool,newcore).asid[i] == ASID_RETIRED ) {
163 per_cpu(asid_pool,newcore).asid[i] = ASID_AVAILABLE;
164 }
165 }
166 /* Get the First slot available */
167 res = asidpool_fetch_next( vmcb, newcore ) > 0;
168 }
169 spin_unlock(&per_cpu(asid_pool,newcore).asid_lock);
170 return res;
171 }
173 void asidpool_retire( struct vmcb_struct *vmcb, int core )
174 {
175 spin_lock(&per_cpu(asid_pool,core).asid_lock);
176 if( vmcb->guest_asid ) {
177 per_cpu(asid_pool,core).asid[vmcb->guest_asid & (ASID_MAX-1)] =
178 ASID_RETIRED;
179 }
180 spin_unlock(&per_cpu(asid_pool,core).asid_lock);
181 }
183 static inline void svm_inject_exception(struct vcpu *v, int trap,
184 int ev, int error_code)
185 {
186 eventinj_t event;
187 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
189 event.bytes = 0;
190 event.fields.v = 1;
191 event.fields.type = EVENTTYPE_EXCEPTION;
192 event.fields.vector = trap;
193 event.fields.ev = ev;
194 event.fields.errorcode = error_code;
196 ASSERT(vmcb->eventinj.fields.v == 0);
198 vmcb->eventinj = event;
199 }
201 static void stop_svm(void)
202 {
203 u32 eax, edx;
204 int cpu = smp_processor_id();
206 /* We turn off the EFER_SVME bit. */
207 rdmsr(MSR_EFER, eax, edx);
208 eax &= ~EFER_SVME;
209 wrmsr(MSR_EFER, eax, edx);
211 /* release the HSA */
212 free_host_save_area(hsa[cpu]);
213 hsa[cpu] = NULL;
214 wrmsr(MSR_K8_VM_HSAVE_PA, 0, 0 );
216 /* free up the root vmcb */
217 free_vmcb(root_vmcb[cpu]);
218 root_vmcb[cpu] = NULL;
219 root_vmcb_pa[cpu] = 0;
221 printk("AMD SVM Extension is disabled.\n");
222 }
225 static void svm_store_cpu_guest_regs(
226 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
227 {
228 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
230 if ( regs != NULL )
231 {
232 regs->eip = vmcb->rip;
233 regs->esp = vmcb->rsp;
234 regs->eflags = vmcb->rflags;
235 regs->cs = vmcb->cs.sel;
236 regs->ds = vmcb->ds.sel;
237 regs->es = vmcb->es.sel;
238 regs->ss = vmcb->ss.sel;
239 regs->gs = vmcb->gs.sel;
240 regs->fs = vmcb->fs.sel;
241 }
243 if ( crs != NULL )
244 {
245 /* Returning the guest's regs */
246 crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
247 crs[2] = v->arch.hvm_svm.cpu_cr2;
248 crs[3] = v->arch.hvm_svm.cpu_cr3;
249 crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
250 }
251 }
253 static int svm_paging_enabled(struct vcpu *v)
254 {
255 unsigned long cr0;
257 cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
259 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
260 }
262 static int svm_pae_enabled(struct vcpu *v)
263 {
264 unsigned long cr4;
266 if(!svm_paging_enabled(v))
267 return 0;
269 cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
271 return (cr4 & X86_CR4_PAE);
272 }
274 #define IS_CANO_ADDRESS(add) 1
276 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
277 {
278 u64 msr_content = 0;
279 struct vcpu *vc = current;
280 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
282 switch (regs->ecx)
283 {
284 case MSR_EFER:
285 msr_content = vmcb->efer;
286 msr_content &= ~EFER_SVME;
287 break;
289 case MSR_FS_BASE:
290 msr_content = vmcb->fs.base;
291 break;
293 case MSR_GS_BASE:
294 msr_content = vmcb->gs.base;
295 break;
297 case MSR_SHADOW_GS_BASE:
298 msr_content = vmcb->kerngsbase;
299 break;
301 case MSR_STAR:
302 msr_content = vmcb->star;
303 break;
305 case MSR_LSTAR:
306 msr_content = vmcb->lstar;
307 break;
309 case MSR_CSTAR:
310 msr_content = vmcb->cstar;
311 break;
313 case MSR_SYSCALL_MASK:
314 msr_content = vmcb->sfmask;
315 break;
316 default:
317 return 0;
318 }
320 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n",
321 msr_content);
323 regs->eax = (u32)(msr_content >> 0);
324 regs->edx = (u32)(msr_content >> 32);
325 return 1;
326 }
328 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
329 {
330 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
331 struct vcpu *vc = current;
332 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
334 HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx "
335 "msr_content %"PRIx64"\n",
336 (unsigned long)regs->ecx, msr_content);
338 switch (regs->ecx)
339 {
340 case MSR_EFER:
341 #ifdef __x86_64__
342 /* offending reserved bit will cause #GP */
343 if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
344 {
345 printk("Trying to set reserved bit in EFER: %"PRIx64"\n",
346 msr_content);
347 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
348 return 0;
349 }
351 /* LME: 0 -> 1 */
352 if ( msr_content & EFER_LME &&
353 !test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state))
354 {
355 if ( svm_paging_enabled(vc) ||
356 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
357 &vc->arch.hvm_svm.cpu_state) )
358 {
359 printk("Trying to set LME bit when "
360 "in paging mode or PAE bit is not set\n");
361 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
362 return 0;
363 }
364 set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);
365 }
367 /* We have already recorded that we want LME, so it will be set
368 * next time CR0 gets updated. So we clear that bit and continue.
369 */
370 if ((msr_content ^ vmcb->efer) & EFER_LME)
371 msr_content &= ~EFER_LME;
372 /* No update for LME/LMA since it have no effect */
373 #endif
374 vmcb->efer = msr_content | EFER_SVME;
375 break;
377 case MSR_FS_BASE:
378 case MSR_GS_BASE:
379 if (!(SVM_LONG_GUEST(vc)))
380 domain_crash_synchronous();
382 if (!IS_CANO_ADDRESS(msr_content))
383 {
384 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
385 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
386 }
388 if (regs->ecx == MSR_FS_BASE)
389 vmcb->fs.base = msr_content;
390 else
391 vmcb->gs.base = msr_content;
392 break;
394 case MSR_SHADOW_GS_BASE:
395 vmcb->kerngsbase = msr_content;
396 break;
398 case MSR_STAR:
399 vmcb->star = msr_content;
400 break;
402 case MSR_LSTAR:
403 vmcb->lstar = msr_content;
404 break;
406 case MSR_CSTAR:
407 vmcb->cstar = msr_content;
408 break;
410 case MSR_SYSCALL_MASK:
411 vmcb->sfmask = msr_content;
412 break;
414 default:
415 return 0;
416 }
417 return 1;
418 }
421 #define loaddebug(_v,_reg) \
422 __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
423 #define savedebug(_v,_reg) \
424 __asm__ __volatile__ ("mov %%db" #_reg ",%0" : : "r" ((_v)->debugreg[_reg]))
427 static inline void svm_save_dr(struct vcpu *v)
428 {
429 if (v->arch.hvm_vcpu.flag_dr_dirty)
430 {
431 /* clear the DR dirty flag and re-enable intercepts for DR accesses */
432 v->arch.hvm_vcpu.flag_dr_dirty = 0;
433 v->arch.hvm_svm.vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES;
435 savedebug(&v->arch.guest_context, 0);
436 savedebug(&v->arch.guest_context, 1);
437 savedebug(&v->arch.guest_context, 2);
438 savedebug(&v->arch.guest_context, 3);
439 }
440 }
443 static inline void __restore_debug_registers(struct vcpu *v)
444 {
445 loaddebug(&v->arch.guest_context, 0);
446 loaddebug(&v->arch.guest_context, 1);
447 loaddebug(&v->arch.guest_context, 2);
448 loaddebug(&v->arch.guest_context, 3);
449 }
452 static inline void svm_restore_dr(struct vcpu *v)
453 {
454 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
456 if (!vmcb)
457 return;
459 if (unlikely(vmcb->dr7 & 0xFF))
460 __restore_debug_registers(v);
461 }
464 static int svm_realmode(struct vcpu *v)
465 {
466 unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
467 unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
469 return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
470 }
472 static int svm_guest_x86_mode(struct vcpu *v)
473 {
474 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
476 if ( vmcb->efer & EFER_LMA )
477 return (vmcb->cs.attributes.fields.l ?
478 X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32);
480 if ( svm_realmode(v) )
481 return X86EMUL_MODE_REAL;
483 return (vmcb->cs.attributes.fields.db ?
484 X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16);
485 }
487 void svm_update_host_cr3(struct vcpu *v)
488 {
489 /* SVM doesn't have a HOST_CR3 equivalent to update. */
490 }
492 unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
493 {
494 switch ( num )
495 {
496 case 0:
497 return v->arch.hvm_svm.cpu_shadow_cr0;
498 case 2:
499 return v->arch.hvm_svm.cpu_cr2;
500 case 3:
501 return v->arch.hvm_svm.cpu_cr3;
502 case 4:
503 return v->arch.hvm_svm.cpu_shadow_cr4;
504 default:
505 BUG();
506 }
507 return 0; /* dummy */
508 }
511 /* Make sure that xen intercepts any FP accesses from current */
512 static void svm_stts(struct vcpu *v)
513 {
514 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
516 /*
517 * If the guest does not have TS enabled then we must cause and handle an
518 * exception on first use of the FPU. If the guest *does* have TS enabled
519 * then this is not necessary: no FPU activity can occur until the guest
520 * clears CR0.TS, and we will initialise the FPU when that happens.
521 */
522 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
523 {
524 v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
525 vmcb->cr0 |= X86_CR0_TS;
526 }
527 }
530 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
531 {
532 v->arch.hvm_svm.vmcb->tsc_offset = offset;
533 }
536 /* SVM-specific intitialization code for VCPU application processors */
537 static void svm_init_ap_context(struct vcpu_guest_context *ctxt,
538 int vcpuid, int trampoline_vector)
539 {
540 int i;
541 struct vcpu *v, *bsp = current;
542 struct domain *d = bsp->domain;
543 cpu_user_regs_t *regs;;
546 if ((v = d->vcpu[vcpuid]) == NULL)
547 {
548 printk("vcpuid %d is invalid! good-bye.\n", vcpuid);
549 domain_crash_synchronous();
550 }
551 regs = &v->arch.guest_context.user_regs;
553 memset(ctxt, 0, sizeof(*ctxt));
554 for (i = 0; i < 256; ++i)
555 {
556 ctxt->trap_ctxt[i].vector = i;
557 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
558 }
561 /*
562 * We execute the trampoline code in real mode. The trampoline vector
563 * passed to us is page alligned and is the physicall frame number for
564 * the code. We will execute this code in real mode.
565 */
566 ctxt->user_regs.eip = 0x0;
567 ctxt->user_regs.cs = (trampoline_vector << 8);
568 ctxt->flags = VGCF_HVM_GUEST;
569 }
571 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
572 {
573 char *p;
574 int i;
576 memset(hypercall_page, 0, PAGE_SIZE);
578 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
579 {
580 p = (char *)(hypercall_page + (i * 32));
581 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
582 *(u32 *)(p + 1) = i;
583 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
584 *(u8 *)(p + 6) = 0x01;
585 *(u8 *)(p + 7) = 0xd9;
586 *(u8 *)(p + 8) = 0xc3; /* ret */
587 }
589 /* Don't support HYPERVISOR_iret at the moment */
590 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
591 }
594 int svm_dbg_on = 0;
596 static inline int svm_do_debugout(unsigned long exit_code)
597 {
598 int i;
600 static unsigned long counter = 0;
601 static unsigned long works[] =
602 {
603 VMEXIT_IOIO,
604 VMEXIT_HLT,
605 VMEXIT_CPUID,
606 VMEXIT_DR0_READ,
607 VMEXIT_DR1_READ,
608 VMEXIT_DR2_READ,
609 VMEXIT_DR3_READ,
610 VMEXIT_DR6_READ,
611 VMEXIT_DR7_READ,
612 VMEXIT_DR0_WRITE,
613 VMEXIT_DR1_WRITE,
614 VMEXIT_DR2_WRITE,
615 VMEXIT_DR3_WRITE,
616 VMEXIT_CR0_READ,
617 VMEXIT_CR0_WRITE,
618 VMEXIT_CR3_READ,
619 VMEXIT_CR4_READ,
620 VMEXIT_MSR,
621 VMEXIT_CR0_WRITE,
622 VMEXIT_CR3_WRITE,
623 VMEXIT_CR4_WRITE,
624 VMEXIT_EXCEPTION_PF,
625 VMEXIT_INTR,
626 VMEXIT_INVLPG,
627 VMEXIT_EXCEPTION_NM
628 };
631 #if 0
632 if (svm_dbg_on && exit_code != 0x7B)
633 return 1;
634 #endif
636 counter++;
638 #if 0
639 if ((exit_code == 0x4E
640 || exit_code == VMEXIT_CR0_READ
641 || exit_code == VMEXIT_CR0_WRITE)
642 && counter < 200000)
643 return 0;
645 if ((exit_code == 0x4E) && counter < 500000)
646 return 0;
647 #endif
649 for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
650 if (exit_code == works[i])
651 return 0;
653 return 1;
654 }
656 static void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
657 {
658 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
660 ASSERT(vmcb);
662 ctxt->eax = vmcb->rax;
663 ctxt->ss = vmcb->ss.sel;
664 ctxt->esp = vmcb->rsp;
665 ctxt->eflags = vmcb->rflags;
666 ctxt->cs = vmcb->cs.sel;
667 ctxt->eip = vmcb->rip;
669 ctxt->gs = vmcb->gs.sel;
670 ctxt->fs = vmcb->fs.sel;
671 ctxt->es = vmcb->es.sel;
672 ctxt->ds = vmcb->ds.sel;
673 }
675 static void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
676 {
677 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
679 regs->eip = vmcb->rip;
680 regs->esp = vmcb->rsp;
681 regs->eflags = vmcb->rflags;
682 regs->cs = vmcb->cs.sel;
683 regs->ds = vmcb->ds.sel;
684 regs->es = vmcb->es.sel;
685 regs->ss = vmcb->ss.sel;
686 }
688 /* XXX Use svm_load_cpu_guest_regs instead */
689 static void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
690 {
691 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
692 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
694 /* Write the guest register value into VMCB */
695 vmcb->rax = regs->eax;
696 vmcb->ss.sel = regs->ss;
697 vmcb->rsp = regs->esp;
698 vmcb->rflags = regs->eflags;
699 vmcb->cs.sel = regs->cs;
700 vmcb->rip = regs->eip;
701 if (regs->eflags & EF_TF)
702 *intercepts |= EXCEPTION_BITMAP_DB;
703 else
704 *intercepts &= ~EXCEPTION_BITMAP_DB;
705 }
707 static void svm_load_cpu_guest_regs(
708 struct vcpu *v, struct cpu_user_regs *regs)
709 {
710 svm_load_cpu_user_regs(v, regs);
711 }
713 int svm_long_mode_enabled(struct vcpu *v)
714 {
715 return SVM_LONG_GUEST(v);
716 }
720 static void arch_svm_do_launch(struct vcpu *v)
721 {
722 cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
723 int error;
725 #if 0
726 if (svm_dbg_on)
727 printk("Do launch\n");
728 #endif
729 error = construct_vmcb(&v->arch.hvm_svm, regs);
730 if ( error < 0 )
731 {
732 if (v->vcpu_id == 0) {
733 printk("Failed to construct a new VMCB for BSP.\n");
734 } else {
735 printk("Failed to construct a new VMCB for AP %d\n", v->vcpu_id);
736 }
737 domain_crash_synchronous();
738 }
740 svm_do_launch(v);
741 #if 0
742 if (svm_dbg_on)
743 svm_dump_host_regs(__func__);
744 #endif
745 if (v->vcpu_id != 0)
746 {
747 u16 cs_sel = regs->cs;
748 /*
749 * This is the launch of an AP; set state so that we begin executing
750 * the trampoline code in real-mode.
751 */
752 svm_do_vmmcall_reset_to_realmode(v, regs);
753 /* Adjust the state to execute the trampoline code.*/
754 v->arch.hvm_svm.vmcb->rip = 0;
755 v->arch.hvm_svm.vmcb->cs.sel= cs_sel;
756 v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4);
757 }
759 reset_stack_and_jump(svm_asm_do_launch);
760 }
762 static void svm_freeze_time(struct vcpu *v)
763 {
764 struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
766 if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
767 v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
768 stop_timer(&(pt->timer));
769 }
770 }
773 static void svm_ctxt_switch_from(struct vcpu *v)
774 {
775 svm_freeze_time(v);
776 svm_save_dr(v);
777 }
779 static void svm_ctxt_switch_to(struct vcpu *v)
780 {
781 #ifdef __x86_64__
782 /*
783 * This is required, because VMRUN does consistency check
784 * and some of the DOM0 selectors are pointing to
785 * invalid GDT locations, and cause AMD processors
786 * to shutdown.
787 */
788 set_segment_register(ds, 0);
789 set_segment_register(es, 0);
790 set_segment_register(ss, 0);
791 #endif
792 svm_restore_dr(v);
793 }
796 static void svm_final_setup_guest(struct vcpu *v)
797 {
798 struct domain *d = v->domain;
800 v->arch.schedule_tail = arch_svm_do_launch;
801 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
802 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
804 if ( v != d->vcpu[0] )
805 return;
807 if ( !shadow_mode_external(d) )
808 {
809 DPRINTK("Can't init HVM for dom %u vcpu %u: "
810 "not in shadow external mode\n", d->domain_id, v->vcpu_id);
811 domain_crash(d);
812 }
814 /*
815 * Required to do this once per domain
816 * TODO: add a seperate function to do these.
817 */
818 memset(&d->shared_info->evtchn_mask[0], 0xff,
819 sizeof(d->shared_info->evtchn_mask));
820 }
823 static int svm_initialize_guest_resources(struct vcpu *v)
824 {
825 svm_final_setup_guest(v);
826 return 1;
827 }
830 int start_svm(void)
831 {
832 u32 eax, ecx, edx;
833 u32 phys_hsa_lo, phys_hsa_hi;
834 u64 phys_hsa;
835 int cpu = smp_processor_id();
837 /* Xen does not fill x86_capability words except 0. */
838 ecx = cpuid_ecx(0x80000001);
839 boot_cpu_data.x86_capability[5] = ecx;
841 if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
842 return 0;
844 /* check whether SVM feature is disabled in BIOS */
845 rdmsr(MSR_K8_VM_CR, eax, edx);
846 if ( eax & K8_VMCR_SVME_DISABLE )
847 {
848 printk("AMD SVM Extension is disabled in BIOS.\n");
849 return 0;
850 }
852 if (!(hsa[cpu] = alloc_host_save_area()))
853 return 0;
855 rdmsr(MSR_EFER, eax, edx);
856 eax |= EFER_SVME;
857 wrmsr(MSR_EFER, eax, edx);
858 asidpool_init( cpu );
859 printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
861 /* Initialize the HSA for this core */
862 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
863 phys_hsa_lo = (u32) phys_hsa;
864 phys_hsa_hi = (u32) (phys_hsa >> 32);
865 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
867 if (!(root_vmcb[cpu] = alloc_vmcb()))
868 return 0;
869 root_vmcb_pa[cpu] = virt_to_maddr(root_vmcb[cpu]);
871 if (cpu == 0)
872 setup_vmcb_dump();
874 /* Setup HVM interfaces */
875 hvm_funcs.disable = stop_svm;
877 hvm_funcs.initialize_guest_resources = svm_initialize_guest_resources;
878 hvm_funcs.relinquish_guest_resources = svm_relinquish_guest_resources;
880 hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
881 hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
883 hvm_funcs.realmode = svm_realmode;
884 hvm_funcs.paging_enabled = svm_paging_enabled;
885 hvm_funcs.long_mode_enabled = svm_long_mode_enabled;
886 hvm_funcs.pae_enabled = svm_pae_enabled;
887 hvm_funcs.guest_x86_mode = svm_guest_x86_mode;
888 hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
890 hvm_funcs.update_host_cr3 = svm_update_host_cr3;
892 hvm_funcs.stts = svm_stts;
893 hvm_funcs.set_tsc_offset = svm_set_tsc_offset;
895 hvm_funcs.init_ap_context = svm_init_ap_context;
896 hvm_funcs.init_hypercall_page = svm_init_hypercall_page;
898 hvm_enabled = 1;
900 return 1;
901 }
904 static void svm_relinquish_guest_resources(struct domain *d)
905 {
906 struct vcpu *v;
908 for_each_vcpu ( d, v )
909 {
910 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
911 continue;
913 destroy_vmcb(&v->arch.hvm_svm);
914 kill_timer(&v->arch.hvm_vcpu.hlt_timer);
915 if ( VLAPIC(v) != NULL )
916 {
917 kill_timer(&VLAPIC(v)->vlapic_timer);
918 unmap_domain_page_global(VLAPIC(v)->regs);
919 free_domheap_page(VLAPIC(v)->regs_page);
920 xfree(VLAPIC(v));
921 }
922 hvm_release_assist_channel(v);
923 }
925 kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
927 if ( d->arch.hvm_domain.shared_page_va )
928 unmap_domain_page_global(
929 (void *)d->arch.hvm_domain.shared_page_va);
931 if ( d->arch.hvm_domain.buffered_io_va )
932 unmap_domain_page_global((void *)d->arch.hvm_domain.buffered_io_va);
933 }
936 static void svm_migrate_timers(struct vcpu *v)
937 {
938 struct periodic_time *pt =
939 &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
941 if ( pt->enabled )
942 {
943 migrate_timer(&pt->timer, v->processor);
944 migrate_timer(&v->arch.hvm_vcpu.hlt_timer, v->processor);
945 }
946 if ( VLAPIC(v) != NULL )
947 migrate_timer(&VLAPIC(v)->vlapic_timer, v->processor);
948 }
951 void arch_svm_do_resume(struct vcpu *v)
952 {
953 /* pinning VCPU to a different core? */
954 if ( v->arch.hvm_svm.launch_core == smp_processor_id()) {
955 hvm_do_resume( v );
956 reset_stack_and_jump( svm_asm_do_resume );
957 }
958 else {
959 if (svm_dbg_on)
960 printk("VCPU core pinned: %d to %d\n",
961 v->arch.hvm_svm.launch_core, smp_processor_id() );
962 v->arch.hvm_svm.launch_core = smp_processor_id();
963 svm_migrate_timers( v );
964 hvm_do_resume( v );
965 reset_stack_and_jump( svm_asm_do_resume );
966 }
967 }
971 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
972 {
973 struct vcpu *v = current;
974 unsigned long eip;
975 int result;
976 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
978 ASSERT(vmcb);
980 //#if HVM_DEBUG
981 eip = vmcb->rip;
982 HVM_DBG_LOG(DBG_LEVEL_VMMU,
983 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
984 va, eip, (unsigned long)regs->error_code);
985 //#endif
987 result = shadow_fault(va, regs);
989 if( result ) {
990 /* Let's make sure that the Guest TLB is flushed */
991 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
992 }
994 return result;
995 }
998 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
999 {
1000 struct vcpu *v = current;
1002 setup_fpu(v);
1003 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1005 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
1006 vmcb->cr0 &= ~X86_CR0_TS;
1010 static void svm_do_general_protection_fault(struct vcpu *v,
1011 struct cpu_user_regs *regs)
1013 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1014 unsigned long eip, error_code;
1016 ASSERT(vmcb);
1018 eip = vmcb->rip;
1019 error_code = vmcb->exitinfo1;
1021 if (vmcb->idtr.limit == 0) {
1022 printf("Huh? We got a GP Fault with an invalid IDTR!\n");
1023 svm_dump_vmcb(__func__, vmcb);
1024 svm_dump_regs(__func__, regs);
1025 svm_dump_inst(vmcb->rip);
1026 __hvm_bug(regs);
1029 HVM_DBG_LOG(DBG_LEVEL_1,
1030 "svm_general_protection_fault: eip = %lx, erro_code = %lx",
1031 eip, error_code);
1033 HVM_DBG_LOG(DBG_LEVEL_1,
1034 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1035 (unsigned long)regs->eax, (unsigned long)regs->ebx,
1036 (unsigned long)regs->ecx, (unsigned long)regs->edx,
1037 (unsigned long)regs->esi, (unsigned long)regs->edi);
1039 /* Reflect it back into the guest */
1040 svm_inject_exception(v, TRAP_gp_fault, 1, error_code);
1043 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
1044 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
1045 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
1046 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
1048 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input,
1049 struct cpu_user_regs *regs)
1051 unsigned int eax, ebx, ecx, edx;
1052 unsigned long eip;
1053 struct vcpu *v = current;
1054 int inst_len;
1056 ASSERT(vmcb);
1058 eip = vmcb->rip;
1060 HVM_DBG_LOG(DBG_LEVEL_1,
1061 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
1062 " (esi) %lx, (edi) %lx",
1063 (unsigned long)regs->eax, (unsigned long)regs->ebx,
1064 (unsigned long)regs->ecx, (unsigned long)regs->edx,
1065 (unsigned long)regs->esi, (unsigned long)regs->edi);
1067 if ( !cpuid_hypervisor_leaves(input, &eax, &ebx, &ecx, &edx) )
1069 cpuid(input, &eax, &ebx, &ecx, &edx);
1070 if (input == 0x00000001 || input == 0x80000001 )
1072 if ( !hvm_apic_support(v->domain) ||
1073 !vlapic_global_enabled((VLAPIC(v))) )
1075 /* Since the apic is disabled, avoid any confusion
1076 about SMP cpus being available */
1077 clear_bit(X86_FEATURE_APIC, &edx);
1079 #if CONFIG_PAGING_LEVELS >= 3
1080 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
1081 #endif
1083 clear_bit(X86_FEATURE_PAE, &edx);
1084 if (input == 0x80000001 )
1085 clear_bit(X86_FEATURE_NX & 31, &edx);
1087 clear_bit(X86_FEATURE_PSE36, &edx);
1088 if (input == 0x00000001 )
1090 /* Clear out reserved bits. */
1091 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
1092 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
1094 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
1096 /* Guest should only see one logical processor.
1097 * See details on page 23 of AMD CPUID Specification.
1098 */
1099 clear_bit(X86_FEATURE_HT, &edx); /* clear the hyperthread bit */
1100 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
1101 ebx |= 0x00010000; /* set to 1 just for precaution */
1103 else
1105 /* Clear the Cmp_Legacy bit
1106 * This bit is supposed to be zero when HTT = 0.
1107 * See details on page 23 of AMD CPUID Specification.
1108 */
1109 clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx);
1110 /* Make SVM feature invisible to the guest. */
1111 clear_bit(X86_FEATURE_SVME & 31, &ecx);
1112 #ifdef __i386__
1113 /* Mask feature for Intel ia32e or AMD long mode. */
1114 clear_bit(X86_FEATURE_LAHF_LM & 31, &ecx);
1116 clear_bit(X86_FEATURE_LM & 31, &edx);
1117 clear_bit(X86_FEATURE_SYSCALL & 31, &edx);
1118 #endif
1119 /* So far, we do not support 3DNow for the guest. */
1120 clear_bit(X86_FEATURE_3DNOW & 31, &edx);
1121 clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx);
1124 else if ( ( input == 0x80000007 ) || ( input == 0x8000000A ) )
1126 /* Mask out features of power management and SVM extension. */
1127 eax = ebx = ecx = edx = 0;
1129 else if ( input == 0x80000008 )
1131 /* Make sure Number of CPU core is 1 when HTT=0 */
1132 ecx &= 0xFFFFFF00;
1136 regs->eax = (unsigned long)eax;
1137 regs->ebx = (unsigned long)ebx;
1138 regs->ecx = (unsigned long)ecx;
1139 regs->edx = (unsigned long)edx;
1141 HVM_DBG_LOG(DBG_LEVEL_1,
1142 "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
1143 "ebx=%x, ecx=%x, edx=%x",
1144 eip, input, eax, ebx, ecx, edx);
1146 inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
1147 ASSERT(inst_len > 0);
1148 __update_guest_eip(vmcb, inst_len);
1152 static inline unsigned long *get_reg_p(unsigned int gpreg,
1153 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1155 unsigned long *reg_p = NULL;
1156 switch (gpreg)
1158 case SVM_REG_EAX:
1159 reg_p = (unsigned long *)&regs->eax;
1160 break;
1161 case SVM_REG_EBX:
1162 reg_p = (unsigned long *)&regs->ebx;
1163 break;
1164 case SVM_REG_ECX:
1165 reg_p = (unsigned long *)&regs->ecx;
1166 break;
1167 case SVM_REG_EDX:
1168 reg_p = (unsigned long *)&regs->edx;
1169 break;
1170 case SVM_REG_EDI:
1171 reg_p = (unsigned long *)&regs->edi;
1172 break;
1173 case SVM_REG_ESI:
1174 reg_p = (unsigned long *)&regs->esi;
1175 break;
1176 case SVM_REG_EBP:
1177 reg_p = (unsigned long *)&regs->ebp;
1178 break;
1179 case SVM_REG_ESP:
1180 reg_p = (unsigned long *)&vmcb->rsp;
1181 break;
1182 #ifdef __x86_64__
1183 case SVM_REG_R8:
1184 reg_p = (unsigned long *)&regs->r8;
1185 break;
1186 case SVM_REG_R9:
1187 reg_p = (unsigned long *)&regs->r9;
1188 break;
1189 case SVM_REG_R10:
1190 reg_p = (unsigned long *)&regs->r10;
1191 break;
1192 case SVM_REG_R11:
1193 reg_p = (unsigned long *)&regs->r11;
1194 break;
1195 case SVM_REG_R12:
1196 reg_p = (unsigned long *)&regs->r12;
1197 break;
1198 case SVM_REG_R13:
1199 reg_p = (unsigned long *)&regs->r13;
1200 break;
1201 case SVM_REG_R14:
1202 reg_p = (unsigned long *)&regs->r14;
1203 break;
1204 case SVM_REG_R15:
1205 reg_p = (unsigned long *)&regs->r15;
1206 break;
1207 #endif
1208 default:
1209 BUG();
1212 return reg_p;
1216 static inline unsigned long get_reg(unsigned int gpreg,
1217 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1219 unsigned long *gp;
1220 gp = get_reg_p(gpreg, regs, vmcb);
1221 return *gp;
1225 static inline void set_reg(unsigned int gpreg, unsigned long value,
1226 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1228 unsigned long *gp;
1229 gp = get_reg_p(gpreg, regs, vmcb);
1230 *gp = value;
1234 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
1236 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1238 v->arch.hvm_vcpu.flag_dr_dirty = 1;
1240 __restore_debug_registers(v);
1242 /* allow the guest full access to the debug registers */
1243 vmcb->dr_intercepts = 0;
1247 static void svm_get_prefix_info(
1248 struct vmcb_struct *vmcb,
1249 unsigned int dir, segment_selector_t **seg, unsigned int *asize)
1251 unsigned char inst[MAX_INST_LEN];
1252 int i;
1254 memset(inst, 0, MAX_INST_LEN);
1255 if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
1256 != MAX_INST_LEN)
1258 printk("%s: get guest instruction failed\n", __func__);
1259 domain_crash_synchronous();
1262 for (i = 0; i < MAX_INST_LEN; i++)
1264 switch (inst[i])
1266 case 0xf3: /* REPZ */
1267 case 0xf2: /* REPNZ */
1268 case 0xf0: /* LOCK */
1269 case 0x66: /* data32 */
1270 #ifdef __x86_64__
1271 /* REX prefixes */
1272 case 0x40:
1273 case 0x41:
1274 case 0x42:
1275 case 0x43:
1276 case 0x44:
1277 case 0x45:
1278 case 0x46:
1279 case 0x47:
1281 case 0x48:
1282 case 0x49:
1283 case 0x4a:
1284 case 0x4b:
1285 case 0x4c:
1286 case 0x4d:
1287 case 0x4e:
1288 case 0x4f:
1289 #endif
1290 continue;
1291 case 0x67: /* addr32 */
1292 *asize ^= 48; /* Switch 16/32 bits */
1293 continue;
1294 case 0x2e: /* CS */
1295 *seg = &vmcb->cs;
1296 continue;
1297 case 0x36: /* SS */
1298 *seg = &vmcb->ss;
1299 continue;
1300 case 0x26: /* ES */
1301 *seg = &vmcb->es;
1302 continue;
1303 case 0x64: /* FS */
1304 *seg = &vmcb->fs;
1305 continue;
1306 case 0x65: /* GS */
1307 *seg = &vmcb->gs;
1308 continue;
1309 case 0x3e: /* DS */
1310 *seg = &vmcb->ds;
1311 continue;
1312 default:
1313 break;
1315 return;
1320 /* Get the address of INS/OUTS instruction */
1321 static inline int svm_get_io_address(
1322 struct vcpu *v,
1323 struct cpu_user_regs *regs, unsigned int dir,
1324 unsigned long *count, unsigned long *addr)
1326 unsigned long reg;
1327 unsigned int asize = 0;
1328 unsigned int isize;
1329 int long_mode;
1330 ioio_info_t info;
1331 segment_selector_t *seg = NULL;
1332 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1334 info.bytes = vmcb->exitinfo1;
1336 /* If we're in long mode, we shouldn't check the segment presence & limit */
1337 long_mode = vmcb->cs.attributes.fields.l && vmcb->efer & EFER_LMA;
1339 /* d field of cs.attributes is 1 for 32-bit, 0 for 16 or 64 bit.
1340 * l field combined with EFER_LMA -> longmode says whether it's 16 or 64 bit.
1341 */
1342 asize = (long_mode)?64:((vmcb->cs.attributes.fields.db)?32:16);
1345 /* The ins/outs instructions are single byte, so if we have got more
1346 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1347 * to figure out what it is...
1348 */
1349 isize = vmcb->exitinfo2 - vmcb->rip;
1351 if (info.fields.rep)
1352 isize --;
1354 if (isize > 1)
1356 svm_get_prefix_info(vmcb, dir, &seg, &asize);
1359 ASSERT(dir == IOREQ_READ || dir == IOREQ_WRITE);
1361 if (dir == IOREQ_WRITE)
1363 reg = regs->esi;
1364 if (!seg) /* If no prefix, used DS. */
1365 seg = &vmcb->ds;
1367 else
1369 reg = regs->edi;
1370 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1373 /* If the segment isn't present, give GP fault! */
1374 if (!long_mode && !seg->attributes.fields.p)
1376 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1377 return 0;
1380 if (asize == 16)
1382 *addr = (reg & 0xFFFF);
1383 *count = regs->ecx & 0xffff;
1385 else
1387 *addr = reg;
1388 *count = regs->ecx;
1391 if (!long_mode) {
1392 if (*addr > seg->limit)
1394 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1395 return 0;
1397 else
1399 *addr += seg->base;
1404 return 1;
1408 static void svm_io_instruction(struct vcpu *v)
1410 struct cpu_user_regs *regs;
1411 struct hvm_io_op *pio_opp;
1412 unsigned int port;
1413 unsigned int size, dir;
1414 ioio_info_t info;
1415 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1417 ASSERT(vmcb);
1418 pio_opp = &current->arch.hvm_vcpu.io_op;
1419 pio_opp->instr = INSTR_PIO;
1420 pio_opp->flags = 0;
1422 regs = &pio_opp->io_context;
1424 /* Copy current guest state into io instruction state structure. */
1425 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1426 hvm_store_cpu_guest_regs(v, regs, NULL);
1428 info.bytes = vmcb->exitinfo1;
1430 port = info.fields.port; /* port used to be addr */
1431 dir = info.fields.type; /* direction */
1432 if (info.fields.sz32)
1433 size = 4;
1434 else if (info.fields.sz16)
1435 size = 2;
1436 else
1437 size = 1;
1439 HVM_DBG_LOG(DBG_LEVEL_IO,
1440 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1441 "exit_qualification = %"PRIx64,
1442 port, vmcb->cs.sel, vmcb->rip, info.bytes);
1444 /* string instruction */
1445 if (info.fields.str)
1447 unsigned long addr, count;
1448 int sign = regs->eflags & EF_DF ? -1 : 1;
1450 if (!svm_get_io_address(v, regs, dir, &count, &addr))
1452 /* We failed to get a valid address, so don't do the IO operation -
1453 * it would just get worse if we do! Hopefully the guest is handing
1454 * gp-faults...
1455 */
1456 return;
1459 /* "rep" prefix */
1460 if (info.fields.rep)
1462 pio_opp->flags |= REPZ;
1464 else
1466 count = 1;
1469 /*
1470 * Handle string pio instructions that cross pages or that
1471 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1472 */
1473 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1475 unsigned long value = 0;
1477 pio_opp->flags |= OVERLAP;
1479 if (dir == IOREQ_WRITE)
1480 (void)hvm_copy_from_guest_virt(&value, addr, size);
1482 send_pio_req(regs, port, 1, size, value, dir, 0);
1484 else
1486 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK))
1488 if (sign > 0)
1489 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1490 else
1491 count = (addr & ~PAGE_MASK) / size;
1493 else
1494 regs->eip = vmcb->exitinfo2;
1496 send_pio_req(regs, port, count, size, addr, dir, 1);
1499 else
1501 /*
1502 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1503 * ExitInfo2
1504 */
1505 regs->eip = vmcb->exitinfo2;
1507 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1508 hvm_print_line(v, regs->eax); /* guest debug output */
1510 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
1514 static int svm_set_cr0(unsigned long value)
1516 struct vcpu *v = current;
1517 unsigned long mfn;
1518 int paging_enabled;
1519 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1520 unsigned long old_base_mfn;
1522 ASSERT(vmcb);
1524 /* We don't want to lose PG. ET is reserved and should be always be 1*/
1525 paging_enabled = svm_paging_enabled(v);
1526 value |= X86_CR0_ET;
1527 vmcb->cr0 = value | X86_CR0_PG;
1528 v->arch.hvm_svm.cpu_shadow_cr0 = value;
1530 /* TS cleared? Then initialise FPU now. */
1531 if ( !(value & X86_CR0_TS) )
1533 setup_fpu(v);
1534 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1537 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1539 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
1541 /* The guest CR3 must be pointing to the guest physical. */
1542 if (!VALID_MFN(mfn =
1543 get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))
1544 || !get_page(mfn_to_page(mfn), v->domain))
1546 printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3);
1547 domain_crash_synchronous(); /* need to take a clean path */
1550 #if defined(__x86_64__)
1551 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
1552 && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
1553 &v->arch.hvm_svm.cpu_state))
1555 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1556 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1559 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
1561 /* Here the PAE is should to be opened */
1562 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1563 set_bit(SVM_CPU_STATE_LMA_ENABLED,
1564 &v->arch.hvm_svm.cpu_state);
1565 vmcb->efer |= (EFER_LMA | EFER_LME);
1567 #endif /* __x86_64__ */
1569 /* Now arch.guest_table points to machine physical. */
1570 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1571 v->arch.guest_table = pagetable_from_pfn(mfn);
1572 if ( old_base_mfn )
1573 put_page(mfn_to_page(old_base_mfn));
1574 shadow_update_paging_modes(v);
1576 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1577 (unsigned long) (mfn << PAGE_SHIFT));
1579 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1580 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1583 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1584 if ( v->arch.hvm_svm.cpu_cr3 ) {
1585 put_page(mfn_to_page(get_mfn_from_gpfn(
1586 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
1587 v->arch.guest_table = pagetable_null();
1590 /*
1591 * SVM implements paged real-mode and when we return to real-mode
1592 * we revert back to the physical mappings that the domain builder
1593 * created.
1594 */
1595 if ((value & X86_CR0_PE) == 0) {
1596 if (value & X86_CR0_PG) {
1597 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1598 return 0;
1600 shadow_update_paging_modes(v);
1601 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1602 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1604 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
1606 /* we should take care of this kind of situation */
1607 shadow_update_paging_modes(v);
1608 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1609 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1612 return 1;
1615 /*
1616 * Read from control registers. CR0 and CR4 are read from the shadow.
1617 */
1618 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1620 unsigned long value = 0;
1621 struct vcpu *v = current;
1622 struct vlapic *vlapic = VLAPIC(v);
1623 struct vmcb_struct *vmcb;
1625 vmcb = v->arch.hvm_svm.vmcb;
1626 ASSERT(vmcb);
1628 switch (cr)
1630 case 0:
1631 value = v->arch.hvm_svm.cpu_shadow_cr0;
1632 if (svm_dbg_on)
1633 printk("CR0 read =%lx \n", value );
1634 break;
1635 case 2:
1636 value = vmcb->cr2;
1637 break;
1638 case 3:
1639 value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
1640 if (svm_dbg_on)
1641 printk("CR3 read =%lx \n", value );
1642 break;
1643 case 4:
1644 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
1645 if (svm_dbg_on)
1646 printk("CR4 read=%lx\n", value);
1647 break;
1648 case 8:
1649 if ( vlapic == NULL )
1650 break;
1651 value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
1652 value = (value & 0xF0) >> 4;
1653 break;
1655 default:
1656 __hvm_bug(regs);
1659 set_reg(gp, value, regs, vmcb);
1661 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1665 static inline int svm_pgbit_test(struct vcpu *v)
1667 return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
1671 /*
1672 * Write to control registers
1673 */
1674 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1676 unsigned long value;
1677 unsigned long old_cr;
1678 struct vcpu *v = current;
1679 struct vlapic *vlapic = VLAPIC(v);
1680 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1682 ASSERT(vmcb);
1684 value = get_reg(gpreg, regs, vmcb);
1686 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1687 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1689 switch (cr)
1691 case 0:
1692 if (svm_dbg_on)
1693 printk("CR0 write =%lx \n", value );
1694 return svm_set_cr0(value);
1696 case 3:
1698 unsigned long old_base_mfn, mfn;
1699 if (svm_dbg_on)
1700 printk("CR3 write =%lx \n", value );
1701 /* If paging is not enabled yet, simply copy the value to CR3. */
1702 if (!svm_paging_enabled(v)) {
1703 v->arch.hvm_svm.cpu_cr3 = value;
1704 break;
1706 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1708 /* We make a new one if the shadow does not exist. */
1709 if (value == v->arch.hvm_svm.cpu_cr3)
1711 /*
1712 * This is simple TLB flush, implying the guest has
1713 * removed some translation or changed page attributes.
1714 * We simply invalidate the shadow.
1715 */
1716 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1717 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1718 __hvm_bug(regs);
1719 shadow_update_cr3(v);
1721 else
1723 /*
1724 * If different, make a shadow. Check if the PDBR is valid
1725 * first.
1726 */
1727 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1728 if (((value >> PAGE_SHIFT) > v->domain->max_pages)
1729 || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT))
1730 || !get_page(mfn_to_page(mfn), v->domain))
1732 printk("Invalid CR3 value=%lx\n", value);
1733 domain_crash_synchronous(); /* need to take a clean path */
1736 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1737 v->arch.guest_table = pagetable_from_pfn(mfn);
1739 if (old_base_mfn)
1740 put_page(mfn_to_page(old_base_mfn));
1742 /*
1743 * arch.shadow_table should now hold the next CR3 for shadow
1744 */
1745 v->arch.hvm_svm.cpu_cr3 = value;
1746 update_cr3(v);
1747 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1748 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1750 break;
1753 case 4: /* CR4 */
1755 if (svm_dbg_on)
1756 printk( "write cr4=%lx, cr0=%lx\n",
1757 value, v->arch.hvm_svm.cpu_shadow_cr0 );
1758 old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
1759 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1761 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1762 if ( svm_pgbit_test(v) )
1764 /* The guest is a 32-bit PAE guest. */
1765 #if CONFIG_PAGING_LEVELS >= 3
1766 unsigned long mfn, old_base_mfn;
1768 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1769 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) ||
1770 !get_page(mfn_to_page(mfn), v->domain) )
1772 printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3);
1773 domain_crash_synchronous(); /* need to take a clean path */
1776 /*
1777 * Now arch.guest_table points to machine physical.
1778 */
1780 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1781 v->arch.guest_table = pagetable_from_pfn(mfn);
1782 if ( old_base_mfn )
1783 put_page(mfn_to_page(old_base_mfn));
1784 shadow_update_paging_modes(v);
1786 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1787 (unsigned long) (mfn << PAGE_SHIFT));
1789 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1791 /*
1792 * arch->shadow_table should hold the next CR3 for shadow
1793 */
1795 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1796 "Update CR3 value = %lx, mfn = %lx",
1797 v->arch.hvm_svm.cpu_cr3, mfn);
1798 #endif
1801 else if (value & X86_CR4_PAE) {
1802 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1803 } else {
1804 if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
1805 &v->arch.hvm_svm.cpu_state)) {
1806 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1808 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1811 v->arch.hvm_svm.cpu_shadow_cr4 = value;
1812 vmcb->cr4 = value | SVM_CR4_HOST_MASK;
1814 /*
1815 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1816 * all TLB entries except global entries.
1817 */
1818 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
1820 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1821 shadow_update_paging_modes(v);
1823 break;
1826 case 8:
1828 if ( vlapic == NULL )
1829 break;
1830 vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
1831 vlapic_update_ppr(vlapic);
1832 break;
1835 default:
1836 printk("invalid cr: %d\n", cr);
1837 __hvm_bug(regs);
1840 return 1;
1844 #define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
1847 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
1848 struct cpu_user_regs *regs)
1850 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1851 int inst_len = 0;
1852 int index;
1853 unsigned int gpreg;
1854 unsigned long value;
1855 u8 buffer[MAX_INST_LEN];
1856 u8 prefix = 0;
1857 int result = 1;
1858 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1859 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1860 enum instruction_index match;
1862 ASSERT(vmcb);
1864 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1866 /* get index to first actual instruction byte - as we will need to know
1867 where the prefix lives later on */
1868 index = skip_prefix_bytes(buffer, sizeof(buffer));
1870 if ( type == TYPE_MOV_TO_CR )
1872 inst_len = __get_instruction_length_from_list(
1873 vmcb, list_a, ARR_SIZE(list_a), &buffer[index], &match);
1875 else /* type == TYPE_MOV_FROM_CR */
1877 inst_len = __get_instruction_length_from_list(
1878 vmcb, list_b, ARR_SIZE(list_b), &buffer[index], &match);
1881 ASSERT(inst_len > 0);
1883 inst_len += index;
1885 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1886 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1887 prefix = buffer[index-1];
1889 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
1891 switch (match)
1893 case INSTR_MOV2CR:
1894 gpreg = decode_src_reg(prefix, buffer[index+2]);
1895 result = mov_to_cr(gpreg, cr, regs);
1896 break;
1898 case INSTR_MOVCR2:
1899 gpreg = decode_src_reg(prefix, buffer[index+2]);
1900 mov_from_cr(cr, gpreg, regs);
1901 break;
1903 case INSTR_CLTS:
1904 /* TS being cleared means that it's time to restore fpu state. */
1905 setup_fpu(current);
1906 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1907 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1908 v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
1909 break;
1911 case INSTR_LMSW:
1912 if (svm_dbg_on)
1913 svm_dump_inst(svm_rip2pointer(vmcb));
1915 gpreg = decode_src_reg(prefix, buffer[index+2]);
1916 value = get_reg(gpreg, regs, vmcb) & 0xF;
1918 if (svm_dbg_on)
1919 printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1920 inst_len);
1922 value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
1924 if (svm_dbg_on)
1925 printk("CR0-LMSW CR0 - New value=%lx\n", value);
1927 result = svm_set_cr0(value);
1928 break;
1930 case INSTR_SMSW:
1931 if (svm_dbg_on)
1932 svm_dump_inst(svm_rip2pointer(vmcb));
1933 value = v->arch.hvm_svm.cpu_shadow_cr0;
1934 gpreg = decode_src_reg(prefix, buffer[index+2]);
1935 set_reg(gpreg, value, regs, vmcb);
1937 if (svm_dbg_on)
1938 printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1939 inst_len);
1940 break;
1942 default:
1943 __hvm_bug(regs);
1944 break;
1947 ASSERT(inst_len);
1949 __update_guest_eip(vmcb, inst_len);
1951 return result;
1954 static inline void svm_do_msr_access(
1955 struct vcpu *v, struct cpu_user_regs *regs)
1957 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1958 int inst_len;
1959 u64 msr_content=0;
1960 u32 eax, edx;
1962 ASSERT(vmcb);
1964 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, "
1965 "exitinfo = %lx", (unsigned long)regs->ecx,
1966 (unsigned long)regs->eax, (unsigned long)regs->edx,
1967 (unsigned long)vmcb->exitinfo1);
1969 /* is it a read? */
1970 if (vmcb->exitinfo1 == 0)
1972 inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
1974 regs->edx = 0;
1975 switch (regs->ecx) {
1976 case MSR_IA32_TIME_STAMP_COUNTER:
1977 msr_content = hvm_get_guest_time(v);
1978 break;
1979 case MSR_IA32_SYSENTER_CS:
1980 msr_content = vmcb->sysenter_cs;
1981 break;
1982 case MSR_IA32_SYSENTER_ESP:
1983 msr_content = vmcb->sysenter_esp;
1984 break;
1985 case MSR_IA32_SYSENTER_EIP:
1986 msr_content = vmcb->sysenter_eip;
1987 break;
1988 case MSR_IA32_APICBASE:
1989 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
1990 break;
1991 default:
1992 if (long_mode_do_msr_read(regs))
1993 goto done;
1995 if ( rdmsr_hypervisor_regs(regs->ecx, &eax, &edx) )
1997 regs->eax = eax;
1998 regs->edx = edx;
1999 goto done;
2002 rdmsr_safe(regs->ecx, regs->eax, regs->edx);
2003 break;
2005 regs->eax = msr_content & 0xFFFFFFFF;
2006 regs->edx = msr_content >> 32;
2008 else
2010 inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
2011 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
2013 switch (regs->ecx)
2015 case MSR_IA32_TIME_STAMP_COUNTER:
2016 hvm_set_guest_time(v, msr_content);
2017 break;
2018 case MSR_IA32_SYSENTER_CS:
2019 vmcb->sysenter_cs = msr_content;
2020 break;
2021 case MSR_IA32_SYSENTER_ESP:
2022 vmcb->sysenter_esp = msr_content;
2023 break;
2024 case MSR_IA32_SYSENTER_EIP:
2025 vmcb->sysenter_eip = msr_content;
2026 break;
2027 case MSR_IA32_APICBASE:
2028 vlapic_msr_set(VLAPIC(v), msr_content);
2029 break;
2030 default:
2031 if ( !long_mode_do_msr_write(regs) )
2032 wrmsr_hypervisor_regs(regs->ecx, regs->eax, regs->edx);
2033 break;
2037 done:
2039 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: "
2040 "ecx=%lx, eax=%lx, edx=%lx",
2041 (unsigned long)regs->ecx, (unsigned long)regs->eax,
2042 (unsigned long)regs->edx);
2044 __update_guest_eip(vmcb, inst_len);
2048 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
2050 __update_guest_eip(vmcb, 1);
2052 /* Check for interrupt not handled or new interrupt. */
2053 if ( (vmcb->rflags & X86_EFLAGS_IF) &&
2054 (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) )
2055 return;
2057 hvm_hlt(vmcb->rflags);
2061 static void svm_vmexit_do_invd(struct vmcb_struct *vmcb)
2063 int inst_len;
2065 /* Invalidate the cache - we can't really do that safely - maybe we should
2066 * WBINVD, but I think it's just fine to completely ignore it - we should
2067 * have cache-snooping that solves it anyways. -- Mats P.
2068 */
2070 /* Tell the user that we did this - just in case someone runs some really
2071 * weird operating system and wants to know why it's not working...
2072 */
2073 printk("INVD instruction intercepted - ignored\n");
2075 inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL);
2076 __update_guest_eip(vmcb, inst_len);
2082 #ifdef XEN_DEBUGGER
2083 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb,
2084 struct cpu_user_regs *regs)
2086 regs->eip = vmcb->rip;
2087 regs->esp = vmcb->rsp;
2088 regs->eflags = vmcb->rflags;
2090 regs->xcs = vmcb->cs.sel;
2091 regs->xds = vmcb->ds.sel;
2092 regs->xes = vmcb->es.sel;
2093 regs->xfs = vmcb->fs.sel;
2094 regs->xgs = vmcb->gs.sel;
2095 regs->xss = vmcb->ss.sel;
2099 static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
2101 vmcb->ss.sel = regs->xss;
2102 vmcb->rsp = regs->esp;
2103 vmcb->rflags = regs->eflags;
2104 vmcb->cs.sel = regs->xcs;
2105 vmcb->rip = regs->eip;
2107 vmcb->gs.sel = regs->xgs;
2108 vmcb->fs.sel = regs->xfs;
2109 vmcb->es.sel = regs->xes;
2110 vmcb->ds.sel = regs->xds;
2112 #endif
2115 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
2117 struct vcpu *v = current;
2118 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
2119 unsigned long g_vaddr;
2120 int inst_len;
2121 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2123 ASSERT(vmcb);
2124 /*
2125 * Unknown how many bytes the invlpg instruction will take. Use the
2126 * maximum instruction length here
2127 */
2128 if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
2130 printk("svm_handle_invlpg (): Error reading memory %d bytes\n",
2131 length);
2132 __hvm_bug(regs);
2135 if (invlpga)
2137 inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
2138 ASSERT(inst_len > 0);
2139 __update_guest_eip(vmcb, inst_len);
2141 /*
2142 * The address is implicit on this instruction. At the moment, we don't
2143 * use ecx (ASID) to identify individual guests pages
2144 */
2145 g_vaddr = regs->eax;
2147 else
2149 /* What about multiple prefix codes? */
2150 prefix = (is_prefix(opcode[0])?opcode[0]:0);
2151 inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
2152 ASSERT(inst_len > 0);
2154 inst_len--;
2155 length -= inst_len;
2157 /*
2158 * Decode memory operand of the instruction including ModRM, SIB, and
2159 * displacement to get effecticve address and length in bytes. Assume
2160 * the system in either 32- or 64-bit mode.
2161 */
2162 g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix,
2163 &opcode[inst_len], &length);
2165 inst_len += length;
2166 __update_guest_eip (vmcb, inst_len);
2169 /* Overkill, we may not this */
2170 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
2171 shadow_invlpg(v, g_vaddr);
2175 /*
2176 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2177 * 16-bit realmode. Basically, this mimics a processor reset.
2179 * returns 0 on success, non-zero otherwise
2180 */
2181 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
2182 struct cpu_user_regs *regs)
2184 struct vmcb_struct *vmcb;
2186 ASSERT(v);
2187 ASSERT(regs);
2189 vmcb = v->arch.hvm_svm.vmcb;
2191 ASSERT(vmcb);
2193 /* clear the vmcb and user regs */
2194 memset(regs, 0, sizeof(struct cpu_user_regs));
2196 /* VMCB Control */
2197 vmcb->tsc_offset = 0;
2199 /* VMCB State */
2200 vmcb->cr0 = X86_CR0_ET | X86_CR0_PG;
2201 v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
2203 vmcb->cr2 = 0;
2204 vmcb->efer = EFER_SVME;
2206 vmcb->cr4 = SVM_CR4_HOST_MASK;
2207 v->arch.hvm_svm.cpu_shadow_cr4 = 0;
2208 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
2210 /* This will jump to ROMBIOS */
2211 vmcb->rip = 0xFFF0;
2213 /* setup the segment registers and all their hidden states */
2214 vmcb->cs.sel = 0xF000;
2215 vmcb->cs.attributes.bytes = 0x089b;
2216 vmcb->cs.limit = 0xffff;
2217 vmcb->cs.base = 0x000F0000;
2219 vmcb->ss.sel = 0x00;
2220 vmcb->ss.attributes.bytes = 0x0893;
2221 vmcb->ss.limit = 0xffff;
2222 vmcb->ss.base = 0x00;
2224 vmcb->ds.sel = 0x00;
2225 vmcb->ds.attributes.bytes = 0x0893;
2226 vmcb->ds.limit = 0xffff;
2227 vmcb->ds.base = 0x00;
2229 vmcb->es.sel = 0x00;
2230 vmcb->es.attributes.bytes = 0x0893;
2231 vmcb->es.limit = 0xffff;
2232 vmcb->es.base = 0x00;
2234 vmcb->fs.sel = 0x00;
2235 vmcb->fs.attributes.bytes = 0x0893;
2236 vmcb->fs.limit = 0xffff;
2237 vmcb->fs.base = 0x00;
2239 vmcb->gs.sel = 0x00;
2240 vmcb->gs.attributes.bytes = 0x0893;
2241 vmcb->gs.limit = 0xffff;
2242 vmcb->gs.base = 0x00;
2244 vmcb->ldtr.sel = 0x00;
2245 vmcb->ldtr.attributes.bytes = 0x0000;
2246 vmcb->ldtr.limit = 0x0;
2247 vmcb->ldtr.base = 0x00;
2249 vmcb->gdtr.sel = 0x00;
2250 vmcb->gdtr.attributes.bytes = 0x0000;
2251 vmcb->gdtr.limit = 0x0;
2252 vmcb->gdtr.base = 0x00;
2254 vmcb->tr.sel = 0;
2255 vmcb->tr.attributes.bytes = 0;
2256 vmcb->tr.limit = 0x0;
2257 vmcb->tr.base = 0;
2259 vmcb->idtr.sel = 0x00;
2260 vmcb->idtr.attributes.bytes = 0x0000;
2261 vmcb->idtr.limit = 0x3ff;
2262 vmcb->idtr.base = 0x00;
2264 vmcb->rax = 0;
2265 vmcb->rsp = 0;
2267 return 0;
2271 /*
2272 * svm_do_vmmcall - SVM VMMCALL handler
2274 * returns 0 on success, non-zero otherwise
2275 */
2276 static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
2278 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2279 int inst_len;
2281 ASSERT(vmcb);
2282 ASSERT(regs);
2284 inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
2285 ASSERT(inst_len > 0);
2287 if ( regs->eax & 0x80000000 )
2289 /* VMMCALL sanity check */
2290 if ( vmcb->cpl > get_vmmcall_cpl(regs->edi) )
2292 printf("VMMCALL CPL check failed\n");
2293 return -1;
2296 /* handle the request */
2297 switch ( regs->eax )
2299 case VMMCALL_RESET_TO_REALMODE:
2300 if ( svm_do_vmmcall_reset_to_realmode(v, regs) )
2302 printf("svm_do_vmmcall_reset_to_realmode() failed\n");
2303 return -1;
2305 /* since we just reset the VMCB, return without adjusting
2306 * the eip */
2307 return 0;
2309 case VMMCALL_DEBUG:
2310 printf("DEBUG features not implemented yet\n");
2311 break;
2312 default:
2313 break;
2316 hvm_print_line(v, regs->eax); /* provides the current domain */
2318 else
2320 hvm_do_hypercall(regs);
2323 __update_guest_eip(vmcb, inst_len);
2324 return 0;
2328 void svm_dump_inst(unsigned long eip)
2330 u8 opcode[256];
2331 unsigned long ptr;
2332 int len;
2333 int i;
2335 ptr = eip & ~0xff;
2336 len = 0;
2338 if (hvm_copy_from_guest_virt(opcode, ptr, sizeof(opcode)) == 0)
2339 len = sizeof(opcode);
2341 printf("Code bytes around(len=%d) %lx:", len, eip);
2342 for (i = 0; i < len; i++)
2344 if ((i & 0x0f) == 0)
2345 printf("\n%08lx:", ptr+i);
2347 printf("%02x ", opcode[i]);
2350 printf("\n");
2354 void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
2356 struct vcpu *v = current;
2357 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2358 unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
2360 printf("%s: guest registers from %s:\n", __func__, from);
2361 #if defined (__x86_64__)
2362 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
2363 regs->rax, regs->rbx, regs->rcx);
2364 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
2365 regs->rdx, regs->rsi, regs->rdi);
2366 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
2367 regs->rbp, regs->rsp, regs->r8);
2368 printk("r9: %016lx r10: %016lx r11: %016lx\n",
2369 regs->r9, regs->r10, regs->r11);
2370 printk("r12: %016lx r13: %016lx r14: %016lx\n",
2371 regs->r12, regs->r13, regs->r14);
2372 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
2373 regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
2374 #else
2375 printf("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n",
2376 regs->eax, regs->ebx, regs->ecx, regs->edx);
2377 printf("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n",
2378 regs->edi, regs->esi, regs->ebp, regs->esp);
2379 printf("%s: guest cr0: %lx\n", __func__,
2380 v->arch.hvm_svm.cpu_shadow_cr0);
2381 printf("guest CR3 = %llx\n", vmcb->cr3);
2382 #endif
2383 printf("%s: pt = %lx\n", __func__, pt);
2387 void svm_dump_host_regs(const char *from)
2389 struct vcpu *v = current;
2390 unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
2391 unsigned long cr3, cr0;
2392 printf("Host registers at %s\n", from);
2394 __asm__ __volatile__ ("\tmov %%cr0,%0\n"
2395 "\tmov %%cr3,%1\n"
2396 : "=r" (cr0), "=r"(cr3));
2397 printf("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
2400 #ifdef SVM_EXTRA_DEBUG
2401 static char *exit_reasons[] = {
2402 [VMEXIT_CR0_READ] = "CR0_READ",
2403 [VMEXIT_CR1_READ] = "CR1_READ",
2404 [VMEXIT_CR2_READ] = "CR2_READ",
2405 [VMEXIT_CR3_READ] = "CR3_READ",
2406 [VMEXIT_CR4_READ] = "CR4_READ",
2407 [VMEXIT_CR5_READ] = "CR5_READ",
2408 [VMEXIT_CR6_READ] = "CR6_READ",
2409 [VMEXIT_CR7_READ] = "CR7_READ",
2410 [VMEXIT_CR8_READ] = "CR8_READ",
2411 [VMEXIT_CR9_READ] = "CR9_READ",
2412 [VMEXIT_CR10_READ] = "CR10_READ",
2413 [VMEXIT_CR11_READ] = "CR11_READ",
2414 [VMEXIT_CR12_READ] = "CR12_READ",
2415 [VMEXIT_CR13_READ] = "CR13_READ",
2416 [VMEXIT_CR14_READ] = "CR14_READ",
2417 [VMEXIT_CR15_READ] = "CR15_READ",
2418 [VMEXIT_CR0_WRITE] = "CR0_WRITE",
2419 [VMEXIT_CR1_WRITE] = "CR1_WRITE",
2420 [VMEXIT_CR2_WRITE] = "CR2_WRITE",
2421 [VMEXIT_CR3_WRITE] = "CR3_WRITE",
2422 [VMEXIT_CR4_WRITE] = "CR4_WRITE",
2423 [VMEXIT_CR5_WRITE] = "CR5_WRITE",
2424 [VMEXIT_CR6_WRITE] = "CR6_WRITE",
2425 [VMEXIT_CR7_WRITE] = "CR7_WRITE",
2426 [VMEXIT_CR8_WRITE] = "CR8_WRITE",
2427 [VMEXIT_CR9_WRITE] = "CR9_WRITE",
2428 [VMEXIT_CR10_WRITE] = "CR10_WRITE",
2429 [VMEXIT_CR11_WRITE] = "CR11_WRITE",
2430 [VMEXIT_CR12_WRITE] = "CR12_WRITE",
2431 [VMEXIT_CR13_WRITE] = "CR13_WRITE",
2432 [VMEXIT_CR14_WRITE] = "CR14_WRITE",
2433 [VMEXIT_CR15_WRITE] = "CR15_WRITE",
2434 [VMEXIT_DR0_READ] = "DR0_READ",
2435 [VMEXIT_DR1_READ] = "DR1_READ",
2436 [VMEXIT_DR2_READ] = "DR2_READ",
2437 [VMEXIT_DR3_READ] = "DR3_READ",
2438 [VMEXIT_DR4_READ] = "DR4_READ",
2439 [VMEXIT_DR5_READ] = "DR5_READ",
2440 [VMEXIT_DR6_READ] = "DR6_READ",
2441 [VMEXIT_DR7_READ] = "DR7_READ",
2442 [VMEXIT_DR8_READ] = "DR8_READ",
2443 [VMEXIT_DR9_READ] = "DR9_READ",
2444 [VMEXIT_DR10_READ] = "DR10_READ",
2445 [VMEXIT_DR11_READ] = "DR11_READ",
2446 [VMEXIT_DR12_READ] = "DR12_READ",
2447 [VMEXIT_DR13_READ] = "DR13_READ",
2448 [VMEXIT_DR14_READ] = "DR14_READ",
2449 [VMEXIT_DR15_READ] = "DR15_READ",
2450 [VMEXIT_DR0_WRITE] = "DR0_WRITE",
2451 [VMEXIT_DR1_WRITE] = "DR1_WRITE",
2452 [VMEXIT_DR2_WRITE] = "DR2_WRITE",
2453 [VMEXIT_DR3_WRITE] = "DR3_WRITE",
2454 [VMEXIT_DR4_WRITE] = "DR4_WRITE",
2455 [VMEXIT_DR5_WRITE] = "DR5_WRITE",
2456 [VMEXIT_DR6_WRITE] = "DR6_WRITE",
2457 [VMEXIT_DR7_WRITE] = "DR7_WRITE",
2458 [VMEXIT_DR8_WRITE] = "DR8_WRITE",
2459 [VMEXIT_DR9_WRITE] = "DR9_WRITE",
2460 [VMEXIT_DR10_WRITE] = "DR10_WRITE",
2461 [VMEXIT_DR11_WRITE] = "DR11_WRITE",
2462 [VMEXIT_DR12_WRITE] = "DR12_WRITE",
2463 [VMEXIT_DR13_WRITE] = "DR13_WRITE",
2464 [VMEXIT_DR14_WRITE] = "DR14_WRITE",
2465 [VMEXIT_DR15_WRITE] = "DR15_WRITE",
2466 [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
2467 [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
2468 [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
2469 [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
2470 [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
2471 [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
2472 [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
2473 [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
2474 [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
2475 [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
2476 [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
2477 [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
2478 [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
2479 [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
2480 [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
2481 [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
2482 [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
2483 [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
2484 [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
2485 [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
2486 [VMEXIT_INTR] = "INTR",
2487 [VMEXIT_NMI] = "NMI",
2488 [VMEXIT_SMI] = "SMI",
2489 [VMEXIT_INIT] = "INIT",
2490 [VMEXIT_VINTR] = "VINTR",
2491 [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
2492 [VMEXIT_IDTR_READ] = "IDTR_READ",
2493 [VMEXIT_GDTR_READ] = "GDTR_READ",
2494 [VMEXIT_LDTR_READ] = "LDTR_READ",
2495 [VMEXIT_TR_READ] = "TR_READ",
2496 [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
2497 [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
2498 [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
2499 [VMEXIT_TR_WRITE] = "TR_WRITE",
2500 [VMEXIT_RDTSC] = "RDTSC",
2501 [VMEXIT_RDPMC] = "RDPMC",
2502 [VMEXIT_PUSHF] = "PUSHF",
2503 [VMEXIT_POPF] = "POPF",
2504 [VMEXIT_CPUID] = "CPUID",
2505 [VMEXIT_RSM] = "RSM",
2506 [VMEXIT_IRET] = "IRET",
2507 [VMEXIT_SWINT] = "SWINT",
2508 [VMEXIT_INVD] = "INVD",
2509 [VMEXIT_PAUSE] = "PAUSE",
2510 [VMEXIT_HLT] = "HLT",
2511 [VMEXIT_INVLPG] = "INVLPG",
2512 [VMEXIT_INVLPGA] = "INVLPGA",
2513 [VMEXIT_IOIO] = "IOIO",
2514 [VMEXIT_MSR] = "MSR",
2515 [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
2516 [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
2517 [VMEXIT_SHUTDOWN] = "SHUTDOWN",
2518 [VMEXIT_VMRUN] = "VMRUN",
2519 [VMEXIT_VMMCALL] = "VMMCALL",
2520 [VMEXIT_VMLOAD] = "VMLOAD",
2521 [VMEXIT_VMSAVE] = "VMSAVE",
2522 [VMEXIT_STGI] = "STGI",
2523 [VMEXIT_CLGI] = "CLGI",
2524 [VMEXIT_SKINIT] = "SKINIT",
2525 [VMEXIT_RDTSCP] = "RDTSCP",
2526 [VMEXIT_ICEBP] = "ICEBP",
2527 [VMEXIT_NPF] = "NPF"
2528 };
2529 #endif /* SVM_EXTRA_DEBUG */
2531 #ifdef SVM_WALK_GUEST_PAGES
2532 void walk_shadow_and_guest_pt(unsigned long gva)
2534 l2_pgentry_t gpde;
2535 l2_pgentry_t spde;
2536 l1_pgentry_t gpte;
2537 l1_pgentry_t spte;
2538 struct vcpu *v = current;
2539 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2540 unsigned long gpa;
2542 gpa = shadow_gva_to_gpa(current, gva);
2543 printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
2544 if( !svm_paging_enabled(v) || mmio_space(gpa) )
2545 return;
2547 /* let's dump the guest and shadow page info */
2549 __guest_get_l2e(v, gva, &gpde);
2550 printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
2551 __shadow_get_l2e( v, gva, &spde );
2552 printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
2554 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2555 return;
2557 spte = l1e_empty();
2559 /* This is actually overkill - we only need to ensure the hl2 is in-sync.*/
2560 shadow_sync_va(v, gva);
2562 gpte.l1 = 0;
2563 __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ],
2564 sizeof(gpte) );
2565 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
2567 BUG(); // need to think about this, and convert usage of
2568 // phys_to_machine_mapping to use pagetable format...
2569 __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
2570 sizeof(spte) );
2572 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
2574 #endif /* SVM_WALK_GUEST_PAGES */
2577 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
2579 unsigned int exit_reason;
2580 unsigned long eip;
2581 struct vcpu *v = current;
2582 int error;
2583 int do_debug = 0;
2584 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2586 ASSERT(vmcb);
2588 exit_reason = vmcb->exitcode;
2589 save_svm_cpu_user_regs(v, regs);
2591 vmcb->tlb_control = 1;
2594 if (exit_reason == VMEXIT_INVALID)
2596 svm_dump_vmcb(__func__, vmcb);
2597 domain_crash_synchronous();
2600 #ifdef SVM_EXTRA_DEBUG
2602 #if defined(__i386__)
2603 #define rip eip
2604 #endif
2606 static unsigned long intercepts_counter = 0;
2608 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF)
2610 if (svm_paging_enabled(v) &&
2611 !mmio_space(shadow_gva_to_gpa(current, vmcb->exitinfo2)))
2613 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2614 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64", "
2615 "gpa=%"PRIx64"\n", intercepts_counter,
2616 exit_reasons[exit_reason], exit_reason, regs->cs,
2617 (u64)regs->rip,
2618 (u64)vmcb->exitinfo1,
2619 (u64)vmcb->exitinfo2,
2620 (u64)vmcb->exitintinfo.bytes,
2621 (u64)shadow_gva_to_gpa(current, vmcb->exitinfo2));
2623 else
2625 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2626 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2627 intercepts_counter,
2628 exit_reasons[exit_reason], exit_reason, regs->cs,
2629 (u64)regs->rip,
2630 (u64)vmcb->exitinfo1,
2631 (u64)vmcb->exitinfo2,
2632 (u64)vmcb->exitintinfo.bytes );
2635 else if ( svm_dbg_on
2636 && exit_reason != VMEXIT_IOIO
2637 && exit_reason != VMEXIT_INTR)
2640 if (exit_reasons[exit_reason])
2642 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2643 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2644 intercepts_counter,
2645 exit_reasons[exit_reason], exit_reason, regs->cs,
2646 (u64)regs->rip,
2647 (u64)vmcb->exitinfo1,
2648 (u64)vmcb->exitinfo2,
2649 (u64)vmcb->exitintinfo.bytes);
2651 else
2653 printk("I%08ld,ExC=%d(0x%x),IP=%x:%"PRIx64","
2654 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2655 intercepts_counter, exit_reason, exit_reason, regs->cs,
2656 (u64)regs->rip,
2657 (u64)vmcb->exitinfo1,
2658 (u64)vmcb->exitinfo2,
2659 (u64)vmcb->exitintinfo.bytes);
2663 #ifdef SVM_WALK_GUEST_PAGES
2664 if( exit_reason == VMEXIT_EXCEPTION_PF
2665 && ( ( vmcb->exitinfo2 == vmcb->rip )
2666 || vmcb->exitintinfo.bytes) )
2668 if ( svm_paging_enabled(v) &&
2669 !mmio_space(gva_to_gpa(vmcb->exitinfo2)) )
2670 walk_shadow_and_guest_pt(vmcb->exitinfo2);
2672 #endif
2674 intercepts_counter++;
2676 #if 0
2677 if (svm_dbg_on)
2678 do_debug = svm_do_debugout(exit_reason);
2679 #endif
2681 if (do_debug)
2683 printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
2684 "shadow_table = 0x%08x\n",
2685 __func__,
2686 (int) v->arch.guest_table.pfn,
2687 (int) v->arch.monitor_table.pfn,
2688 (int) v->arch.shadow_table.pfn);
2690 svm_dump_vmcb(__func__, vmcb);
2691 svm_dump_regs(__func__, regs);
2692 svm_dump_inst(svm_rip2pointer(vmcb));
2695 #if defined(__i386__)
2696 #undef rip
2697 #endif
2700 #endif /* SVM_EXTRA_DEBUG */
2703 perfc_incra(svmexits, exit_reason);
2704 eip = vmcb->rip;
2706 #ifdef SVM_EXTRA_DEBUG
2707 if (do_debug)
2709 printk("eip = %lx, exit_reason = %d (0x%x)\n",
2710 eip, exit_reason, exit_reason);
2712 #endif /* SVM_EXTRA_DEBUG */
2714 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
2716 switch (exit_reason)
2718 case VMEXIT_EXCEPTION_DB:
2720 #ifdef XEN_DEBUGGER
2721 svm_debug_save_cpu_user_regs(regs);
2722 pdb_handle_exception(1, regs, 1);
2723 svm_debug_restore_cpu_user_regs(regs);
2724 #else
2725 svm_store_cpu_user_regs(regs, v);
2726 domain_pause_for_debugger();
2727 #endif
2729 break;
2731 case VMEXIT_NMI:
2732 do_nmi(regs, 0);
2733 break;
2735 case VMEXIT_SMI:
2736 /*
2737 * For asynchronous SMI's, we just need to allow global interrupts
2738 * so that the SMI is taken properly in the context of the host. The
2739 * standard code does a STGI after the VMEXIT which should accomplish
2740 * this task. Continue as normal and restart the guest.
2741 */
2742 break;
2744 case VMEXIT_INIT:
2745 /*
2746 * Nothing to do, in fact we should never get to this point.
2747 */
2748 break;
2750 case VMEXIT_EXCEPTION_BP:
2751 #ifdef XEN_DEBUGGER
2752 svm_debug_save_cpu_user_regs(regs);
2753 pdb_handle_exception(3, regs, 1);
2754 svm_debug_restore_cpu_user_regs(regs);
2755 #else
2756 if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) )
2757 domain_pause_for_debugger();
2758 else
2759 svm_inject_exception(v, TRAP_int3, 0, 0);
2760 #endif
2761 break;
2763 case VMEXIT_EXCEPTION_NM:
2764 svm_do_no_device_fault(vmcb);
2765 break;
2767 case VMEXIT_EXCEPTION_GP:
2768 /* This should probably not be trapped in the future */
2769 regs->error_code = vmcb->exitinfo1;
2770 svm_do_general_protection_fault(v, regs);
2771 break;
2773 case VMEXIT_EXCEPTION_PF:
2775 unsigned long va;
2776 va = vmcb->exitinfo2;
2777 regs->error_code = vmcb->exitinfo1;
2778 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2779 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2780 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2781 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2782 (unsigned long)regs->esi, (unsigned long)regs->edi);
2784 if (!(error = svm_do_page_fault(va, regs)))
2786 /* Inject #PG using Interruption-Information Fields */
2787 svm_inject_exception(v, TRAP_page_fault, 1, regs->error_code);
2789 v->arch.hvm_svm.cpu_cr2 = va;
2790 vmcb->cr2 = va;
2791 TRACE_3D(TRC_VMX_INT, v->domain->domain_id,
2792 VMEXIT_EXCEPTION_PF, va);
2794 break;
2797 case VMEXIT_EXCEPTION_DF:
2798 /* Debug info to hopefully help debug WHY the guest double-faulted. */
2799 svm_dump_vmcb(__func__, vmcb);
2800 svm_dump_regs(__func__, regs);
2801 svm_dump_inst(svm_rip2pointer(vmcb));
2802 svm_inject_exception(v, TRAP_double_fault, 1, 0);
2803 break;
2805 case VMEXIT_INTR:
2806 break;
2808 case VMEXIT_INVD:
2809 svm_vmexit_do_invd(vmcb);
2810 break;
2812 case VMEXIT_GDTR_WRITE:
2813 printk("WRITE to GDTR\n");
2814 break;
2816 case VMEXIT_TASK_SWITCH:
2817 __hvm_bug(regs);
2818 break;
2820 case VMEXIT_CPUID:
2821 svm_vmexit_do_cpuid(vmcb, regs->eax, regs);
2822 break;
2824 case VMEXIT_HLT:
2825 svm_vmexit_do_hlt(vmcb);
2826 break;
2828 case VMEXIT_INVLPG:
2829 svm_handle_invlpg(0, regs);
2830 break;
2832 case VMEXIT_INVLPGA:
2833 svm_handle_invlpg(1, regs);
2834 break;
2836 case VMEXIT_VMMCALL:
2837 svm_do_vmmcall(v, regs);
2838 break;
2840 case VMEXIT_CR0_READ:
2841 svm_cr_access(v, 0, TYPE_MOV_FROM_CR, regs);
2842 break;
2844 case VMEXIT_CR2_READ:
2845 svm_cr_access(v, 2, TYPE_MOV_FROM_CR, regs);
2846 break;
2848 case VMEXIT_CR3_READ:
2849 svm_cr_access(v, 3, TYPE_MOV_FROM_CR, regs);
2850 break;
2852 case VMEXIT_CR4_READ:
2853 svm_cr_access(v, 4, TYPE_MOV_FROM_CR, regs);
2854 break;
2856 case VMEXIT_CR8_READ:
2857 svm_cr_access(v, 8, TYPE_MOV_FROM_CR, regs);
2858 break;
2860 case VMEXIT_CR0_WRITE:
2861 svm_cr_access(v, 0, TYPE_MOV_TO_CR, regs);
2862 break;
2864 case VMEXIT_CR2_WRITE:
2865 svm_cr_access(v, 2, TYPE_MOV_TO_CR, regs);
2866 break;
2868 case VMEXIT_CR3_WRITE:
2869 svm_cr_access(v, 3, TYPE_MOV_TO_CR, regs);
2870 local_flush_tlb();
2871 break;
2873 case VMEXIT_CR4_WRITE:
2874 svm_cr_access(v, 4, TYPE_MOV_TO_CR, regs);
2875 break;
2877 case VMEXIT_CR8_WRITE:
2878 svm_cr_access(v, 8, TYPE_MOV_TO_CR, regs);
2879 break;
2881 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
2882 svm_dr_access(v, regs);
2883 break;
2885 case VMEXIT_IOIO:
2886 svm_io_instruction(v);
2887 break;
2889 case VMEXIT_MSR:
2890 svm_do_msr_access(v, regs);
2891 break;
2893 case VMEXIT_SHUTDOWN:
2894 printk("Guest shutdown exit\n");
2895 domain_crash_synchronous();
2896 break;
2898 default:
2899 printk("unexpected VMEXIT: exit reason = 0x%x, exitinfo1 = %"PRIx64", "
2900 "exitinfo2 = %"PRIx64"\n", exit_reason,
2901 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
2902 __hvm_bug(regs); /* should not happen */
2903 break;
2906 #ifdef SVM_EXTRA_DEBUG
2907 if (do_debug)
2909 printk("%s: Done switch on vmexit_code\n", __func__);
2910 svm_dump_regs(__func__, regs);
2913 if (do_debug)
2915 printk("vmexit_handler():- guest_table = 0x%08x, "
2916 "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
2917 (int)v->arch.guest_table.pfn,
2918 (int)v->arch.monitor_table.pfn,
2919 (int)v->arch.shadow_table.pfn);
2920 printk("svm_vmexit_handler: Returning\n");
2922 #endif
2924 return;
2927 asmlinkage void svm_load_cr2(void)
2929 struct vcpu *v = current;
2931 local_irq_disable();
2932 asm volatile("mov %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
2935 asmlinkage void svm_asid(void)
2937 struct vcpu *v = current;
2938 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2940 /*
2941 * if need to assign new asid, or if switching cores,
2942 * retire asid for the old core, and assign a new asid to the current core.
2943 */
2944 if ( test_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ) ||
2945 ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) {
2946 /* recycle asid */
2947 if ( !asidpool_assign_next(vmcb, 1,
2948 v->arch.hvm_svm.asid_core,
2949 v->arch.hvm_svm.launch_core) )
2951 /* If we get here, we have a major problem */
2952 domain_crash_synchronous();
2955 v->arch.hvm_svm.asid_core = v->arch.hvm_svm.launch_core;
2956 clear_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags );
2960 /*
2961 * Local variables:
2962 * mode: C
2963 * c-set-style: "BSD"
2964 * c-basic-offset: 4
2965 * tab-width: 4
2966 * indent-tabs-mode: nil
2967 * End:
2968 */