ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 12599:93e657836d07

[XEN] Remove VALID_MFN(); replace uses with mfn_valid().
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Mon Nov 27 17:48:24 2006 +0000 (2006-11-27)
parents 84c0f49de1b1
children 62b0b520ea53
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005, AMD Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <xen/hypercall.h>
29 #include <xen/domain_page.h>
30 #include <asm/current.h>
31 #include <asm/io.h>
32 #include <asm/shadow.h>
33 #include <asm/regs.h>
34 #include <asm/cpufeature.h>
35 #include <asm/processor.h>
36 #include <asm/types.h>
37 #include <asm/msr.h>
38 #include <asm/spinlock.h>
39 #include <asm/hvm/hvm.h>
40 #include <asm/hvm/support.h>
41 #include <asm/hvm/io.h>
42 #include <asm/hvm/svm/svm.h>
43 #include <asm/hvm/svm/vmcb.h>
44 #include <asm/hvm/svm/emulate.h>
45 #include <asm/hvm/svm/vmmcall.h>
46 #include <asm/hvm/svm/intr.h>
47 #include <asm/x86_emulate.h>
48 #include <public/sched.h>
50 #define SVM_EXTRA_DEBUG
52 #define set_segment_register(name, value) \
53 __asm__ __volatile__ ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
55 /* External functions. We should move these to some suitable header file(s) */
57 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
58 int inst_len);
59 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
60 extern void svm_dump_inst(unsigned long eip);
61 extern int svm_dbg_on;
62 void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
64 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
65 struct cpu_user_regs *regs);
67 /* va of hardware host save area */
68 static void *hsa[NR_CPUS] __read_mostly;
70 /* vmcb used for extended host state */
71 static void *root_vmcb[NR_CPUS] __read_mostly;
73 /* physical address of above for host VMSAVE/VMLOAD */
74 u64 root_vmcb_pa[NR_CPUS] __read_mostly;
77 /* ASID API */
78 enum {
79 ASID_AVAILABLE = 0,
80 ASID_INUSE,
81 ASID_RETIRED
82 };
83 #define INITIAL_ASID 0
84 #define ASID_MAX 64
86 struct asid_pool {
87 spinlock_t asid_lock;
88 u32 asid[ASID_MAX];
89 };
91 static DEFINE_PER_CPU(struct asid_pool, asid_pool);
94 /*
95 * Initializes the POOL of ASID used by the guests per core.
96 */
97 void asidpool_init(int core)
98 {
99 int i;
101 spin_lock_init(&per_cpu(asid_pool,core).asid_lock);
103 /* Host ASID is always in use */
104 per_cpu(asid_pool,core).asid[INITIAL_ASID] = ASID_INUSE;
105 for ( i = 1; i < ASID_MAX; i++ )
106 per_cpu(asid_pool,core).asid[i] = ASID_AVAILABLE;
107 }
110 /* internal function to get the next available ASID */
111 static int asidpool_fetch_next(struct vmcb_struct *vmcb, int core)
112 {
113 int i;
114 for ( i = 1; i < ASID_MAX; i++ )
115 {
116 if ( per_cpu(asid_pool,core).asid[i] == ASID_AVAILABLE )
117 {
118 vmcb->guest_asid = i;
119 per_cpu(asid_pool,core).asid[i] = ASID_INUSE;
120 return i;
121 }
122 }
123 return -1;
124 }
127 /*
128 * This functions assigns on the passed VMCB, the next
129 * available ASID number. If none are available, the
130 * TLB flush flag is set, and all retireds ASID
131 * are made available.
132 *
133 * Returns: 1 -- sucess;
134 * 0 -- failure -- no more ASID numbers
135 * available.
136 */
137 int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
138 int oldcore, int newcore )
139 {
140 int i;
141 int res = 1;
142 static unsigned long cnt=0;
144 spin_lock(&per_cpu(asid_pool,oldcore).asid_lock);
145 if( retire_current && vmcb->guest_asid ) {
146 per_cpu(asid_pool,oldcore).asid[vmcb->guest_asid & (ASID_MAX-1)] =
147 ASID_RETIRED;
148 }
149 spin_unlock(&per_cpu(asid_pool,oldcore).asid_lock);
150 spin_lock(&per_cpu(asid_pool,newcore).asid_lock);
151 if( asidpool_fetch_next( vmcb, newcore ) < 0 ) {
152 if (svm_dbg_on)
153 printk( "SVM: tlb(%ld)\n", cnt++ );
154 /* FLUSH the TLB and all retired slots are made available */
155 vmcb->tlb_control = 1;
156 for( i = 1; i < ASID_MAX; i++ ) {
157 if( per_cpu(asid_pool,newcore).asid[i] == ASID_RETIRED ) {
158 per_cpu(asid_pool,newcore).asid[i] = ASID_AVAILABLE;
159 }
160 }
161 /* Get the First slot available */
162 res = asidpool_fetch_next( vmcb, newcore ) > 0;
163 }
164 spin_unlock(&per_cpu(asid_pool,newcore).asid_lock);
165 return res;
166 }
168 void asidpool_retire( struct vmcb_struct *vmcb, int core )
169 {
170 spin_lock(&per_cpu(asid_pool,core).asid_lock);
171 if( vmcb->guest_asid ) {
172 per_cpu(asid_pool,core).asid[vmcb->guest_asid & (ASID_MAX-1)] =
173 ASID_RETIRED;
174 }
175 spin_unlock(&per_cpu(asid_pool,core).asid_lock);
176 }
178 static inline void svm_inject_exception(struct vcpu *v, int trap,
179 int ev, int error_code)
180 {
181 eventinj_t event;
182 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
184 event.bytes = 0;
185 event.fields.v = 1;
186 event.fields.type = EVENTTYPE_EXCEPTION;
187 event.fields.vector = trap;
188 event.fields.ev = ev;
189 event.fields.errorcode = error_code;
191 ASSERT(vmcb->eventinj.fields.v == 0);
193 vmcb->eventinj = event;
194 v->arch.hvm_svm.inject_event=1;
195 }
197 static void stop_svm(void)
198 {
199 u32 eax, edx;
200 int cpu = smp_processor_id();
202 /* We turn off the EFER_SVME bit. */
203 rdmsr(MSR_EFER, eax, edx);
204 eax &= ~EFER_SVME;
205 wrmsr(MSR_EFER, eax, edx);
207 /* release the HSA */
208 free_host_save_area(hsa[cpu]);
209 hsa[cpu] = NULL;
210 wrmsr(MSR_K8_VM_HSAVE_PA, 0, 0 );
212 /* free up the root vmcb */
213 free_vmcb(root_vmcb[cpu]);
214 root_vmcb[cpu] = NULL;
215 root_vmcb_pa[cpu] = 0;
216 }
218 static void svm_store_cpu_guest_regs(
219 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
220 {
221 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
223 if ( regs != NULL )
224 {
225 regs->eip = vmcb->rip;
226 regs->esp = vmcb->rsp;
227 regs->eflags = vmcb->rflags;
228 regs->cs = vmcb->cs.sel;
229 regs->ds = vmcb->ds.sel;
230 regs->es = vmcb->es.sel;
231 regs->ss = vmcb->ss.sel;
232 regs->gs = vmcb->gs.sel;
233 regs->fs = vmcb->fs.sel;
234 }
236 if ( crs != NULL )
237 {
238 /* Returning the guest's regs */
239 crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
240 crs[2] = v->arch.hvm_svm.cpu_cr2;
241 crs[3] = v->arch.hvm_svm.cpu_cr3;
242 crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
243 }
244 }
246 static int svm_paging_enabled(struct vcpu *v)
247 {
248 unsigned long cr0;
250 cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
252 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
253 }
255 static int svm_pae_enabled(struct vcpu *v)
256 {
257 unsigned long cr4;
259 if(!svm_paging_enabled(v))
260 return 0;
262 cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
264 return (cr4 & X86_CR4_PAE);
265 }
267 static int svm_long_mode_enabled(struct vcpu *v)
268 {
269 return test_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
270 }
272 #define IS_CANO_ADDRESS(add) 1
274 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
275 {
276 u64 msr_content = 0;
277 struct vcpu *vc = current;
278 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
280 switch (regs->ecx)
281 {
282 case MSR_EFER:
283 msr_content = vmcb->efer;
284 msr_content &= ~EFER_SVME;
285 break;
287 case MSR_FS_BASE:
288 msr_content = vmcb->fs.base;
289 break;
291 case MSR_GS_BASE:
292 msr_content = vmcb->gs.base;
293 break;
295 case MSR_SHADOW_GS_BASE:
296 msr_content = vmcb->kerngsbase;
297 break;
299 case MSR_STAR:
300 msr_content = vmcb->star;
301 break;
303 case MSR_LSTAR:
304 msr_content = vmcb->lstar;
305 break;
307 case MSR_CSTAR:
308 msr_content = vmcb->cstar;
309 break;
311 case MSR_SYSCALL_MASK:
312 msr_content = vmcb->sfmask;
313 break;
314 default:
315 return 0;
316 }
318 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n",
319 msr_content);
321 regs->eax = (u32)(msr_content >> 0);
322 regs->edx = (u32)(msr_content >> 32);
323 return 1;
324 }
326 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
327 {
328 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
329 struct vcpu *v = current;
330 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
332 HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx "
333 "msr_content %"PRIx64"\n",
334 (unsigned long)regs->ecx, msr_content);
336 switch ( regs->ecx )
337 {
338 case MSR_EFER:
339 #ifdef __x86_64__
340 /* offending reserved bit will cause #GP */
341 if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
342 {
343 printk("Trying to set reserved bit in EFER: %"PRIx64"\n",
344 msr_content);
345 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
346 return 0;
347 }
349 /* LME: 0 -> 1 */
350 if ( msr_content & EFER_LME &&
351 !test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
352 {
353 if ( svm_paging_enabled(v) ||
354 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
355 &v->arch.hvm_svm.cpu_state) )
356 {
357 printk("Trying to set LME bit when "
358 "in paging mode or PAE bit is not set\n");
359 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
360 return 0;
361 }
362 set_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state);
363 }
365 /* We have already recorded that we want LME, so it will be set
366 * next time CR0 gets updated. So we clear that bit and continue.
367 */
368 if ((msr_content ^ vmcb->efer) & EFER_LME)
369 msr_content &= ~EFER_LME;
370 /* No update for LME/LMA since it have no effect */
371 #endif
372 vmcb->efer = msr_content | EFER_SVME;
373 break;
375 case MSR_FS_BASE:
376 case MSR_GS_BASE:
377 if ( !svm_long_mode_enabled(v) )
378 goto exit_and_crash;
380 if (!IS_CANO_ADDRESS(msr_content))
381 {
382 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
383 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
384 }
386 if (regs->ecx == MSR_FS_BASE)
387 vmcb->fs.base = msr_content;
388 else
389 vmcb->gs.base = msr_content;
390 break;
392 case MSR_SHADOW_GS_BASE:
393 vmcb->kerngsbase = msr_content;
394 break;
396 case MSR_STAR:
397 vmcb->star = msr_content;
398 break;
400 case MSR_LSTAR:
401 vmcb->lstar = msr_content;
402 break;
404 case MSR_CSTAR:
405 vmcb->cstar = msr_content;
406 break;
408 case MSR_SYSCALL_MASK:
409 vmcb->sfmask = msr_content;
410 break;
412 default:
413 return 0;
414 }
416 return 1;
418 exit_and_crash:
419 gdprintk(XENLOG_ERR, "Fatal error writing MSR %lx\n", (long)regs->ecx);
420 domain_crash(v->domain);
421 return 1; /* handled */
422 }
425 #define loaddebug(_v,_reg) \
426 __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
427 #define savedebug(_v,_reg) \
428 __asm__ __volatile__ ("mov %%db" #_reg ",%0" : : "r" ((_v)->debugreg[_reg]))
430 static inline void svm_save_dr(struct vcpu *v)
431 {
432 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
434 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
435 return;
437 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
438 v->arch.hvm_vcpu.flag_dr_dirty = 0;
439 v->arch.hvm_svm.vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES;
441 savedebug(&v->arch.guest_context, 0);
442 savedebug(&v->arch.guest_context, 1);
443 savedebug(&v->arch.guest_context, 2);
444 savedebug(&v->arch.guest_context, 3);
445 v->arch.guest_context.debugreg[6] = vmcb->dr6;
446 v->arch.guest_context.debugreg[7] = vmcb->dr7;
447 }
450 static inline void __restore_debug_registers(struct vcpu *v)
451 {
452 loaddebug(&v->arch.guest_context, 0);
453 loaddebug(&v->arch.guest_context, 1);
454 loaddebug(&v->arch.guest_context, 2);
455 loaddebug(&v->arch.guest_context, 3);
456 /* DR6 and DR7 are loaded from the VMCB. */
457 }
460 static inline void svm_restore_dr(struct vcpu *v)
461 {
462 if ( unlikely(v->arch.guest_context.debugreg[7] & 0xFF) )
463 __restore_debug_registers(v);
464 }
467 static int svm_realmode(struct vcpu *v)
468 {
469 unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
470 unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
472 return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
473 }
475 static int svm_guest_x86_mode(struct vcpu *v)
476 {
477 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
479 if ( vmcb->efer & EFER_LMA )
480 return (vmcb->cs.attributes.fields.l ?
481 X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32);
483 if ( svm_realmode(v) )
484 return X86EMUL_MODE_REAL;
486 return (vmcb->cs.attributes.fields.db ?
487 X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16);
488 }
490 void svm_update_host_cr3(struct vcpu *v)
491 {
492 /* SVM doesn't have a HOST_CR3 equivalent to update. */
493 }
495 unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
496 {
497 switch ( num )
498 {
499 case 0:
500 return v->arch.hvm_svm.cpu_shadow_cr0;
501 case 2:
502 return v->arch.hvm_svm.cpu_cr2;
503 case 3:
504 return v->arch.hvm_svm.cpu_cr3;
505 case 4:
506 return v->arch.hvm_svm.cpu_shadow_cr4;
507 default:
508 BUG();
509 }
510 return 0; /* dummy */
511 }
513 static unsigned long svm_get_segment_base(struct vcpu *v, enum segment seg)
514 {
515 switch ( seg )
516 {
517 case seg_cs: return v->arch.hvm_svm.vmcb->cs.base;
518 case seg_ds: return v->arch.hvm_svm.vmcb->ds.base;
519 case seg_es: return v->arch.hvm_svm.vmcb->es.base;
520 case seg_fs: return v->arch.hvm_svm.vmcb->fs.base;
521 case seg_gs: return v->arch.hvm_svm.vmcb->gs.base;
522 case seg_ss: return v->arch.hvm_svm.vmcb->ss.base;
523 case seg_tr: return v->arch.hvm_svm.vmcb->tr.base;
524 case seg_gdtr: return v->arch.hvm_svm.vmcb->gdtr.base;
525 case seg_idtr: return v->arch.hvm_svm.vmcb->idtr.base;
526 case seg_ldtr: return v->arch.hvm_svm.vmcb->ldtr.base;
527 }
528 BUG();
529 return 0;
530 }
532 /* Make sure that xen intercepts any FP accesses from current */
533 static void svm_stts(struct vcpu *v)
534 {
535 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
537 /*
538 * If the guest does not have TS enabled then we must cause and handle an
539 * exception on first use of the FPU. If the guest *does* have TS enabled
540 * then this is not necessary: no FPU activity can occur until the guest
541 * clears CR0.TS, and we will initialise the FPU when that happens.
542 */
543 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
544 {
545 v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
546 vmcb->cr0 |= X86_CR0_TS;
547 }
548 }
551 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
552 {
553 v->arch.hvm_svm.vmcb->tsc_offset = offset;
554 }
557 static void svm_init_ap_context(
558 struct vcpu_guest_context *ctxt, int vcpuid, int trampoline_vector)
559 {
560 memset(ctxt, 0, sizeof(*ctxt));
562 /*
563 * We execute the trampoline code in real mode. The trampoline vector
564 * passed to us is page alligned and is the physicall frame number for
565 * the code. We will execute this code in real mode.
566 */
567 ctxt->user_regs.eip = 0x0;
568 ctxt->user_regs.cs = (trampoline_vector << 8);
569 }
571 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
572 {
573 char *p;
574 int i;
576 memset(hypercall_page, 0, PAGE_SIZE);
578 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
579 {
580 p = (char *)(hypercall_page + (i * 32));
581 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
582 *(u32 *)(p + 1) = i;
583 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
584 *(u8 *)(p + 6) = 0x01;
585 *(u8 *)(p + 7) = 0xd9;
586 *(u8 *)(p + 8) = 0xc3; /* ret */
587 }
589 /* Don't support HYPERVISOR_iret at the moment */
590 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
591 }
594 int svm_dbg_on = 0;
596 static inline int svm_do_debugout(unsigned long exit_code)
597 {
598 int i;
600 static unsigned long counter = 0;
601 static unsigned long works[] =
602 {
603 VMEXIT_IOIO,
604 VMEXIT_HLT,
605 VMEXIT_CPUID,
606 VMEXIT_DR0_READ,
607 VMEXIT_DR1_READ,
608 VMEXIT_DR2_READ,
609 VMEXIT_DR3_READ,
610 VMEXIT_DR6_READ,
611 VMEXIT_DR7_READ,
612 VMEXIT_DR0_WRITE,
613 VMEXIT_DR1_WRITE,
614 VMEXIT_DR2_WRITE,
615 VMEXIT_DR3_WRITE,
616 VMEXIT_CR0_READ,
617 VMEXIT_CR0_WRITE,
618 VMEXIT_CR3_READ,
619 VMEXIT_CR4_READ,
620 VMEXIT_MSR,
621 VMEXIT_CR0_WRITE,
622 VMEXIT_CR3_WRITE,
623 VMEXIT_CR4_WRITE,
624 VMEXIT_EXCEPTION_PF,
625 VMEXIT_INTR,
626 VMEXIT_INVLPG,
627 VMEXIT_EXCEPTION_NM
628 };
631 #if 0
632 if (svm_dbg_on && exit_code != 0x7B)
633 return 1;
634 #endif
636 counter++;
638 #if 0
639 if ((exit_code == 0x4E
640 || exit_code == VMEXIT_CR0_READ
641 || exit_code == VMEXIT_CR0_WRITE)
642 && counter < 200000)
643 return 0;
645 if ((exit_code == 0x4E) && counter < 500000)
646 return 0;
647 #endif
649 for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
650 if (exit_code == works[i])
651 return 0;
653 return 1;
654 }
656 static void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
657 {
658 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
660 ASSERT(vmcb);
662 ctxt->eax = vmcb->rax;
663 ctxt->ss = vmcb->ss.sel;
664 ctxt->esp = vmcb->rsp;
665 ctxt->eflags = vmcb->rflags;
666 ctxt->cs = vmcb->cs.sel;
667 ctxt->eip = vmcb->rip;
669 ctxt->gs = vmcb->gs.sel;
670 ctxt->fs = vmcb->fs.sel;
671 ctxt->es = vmcb->es.sel;
672 ctxt->ds = vmcb->ds.sel;
673 }
675 static void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
676 {
677 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
679 regs->eip = vmcb->rip;
680 regs->esp = vmcb->rsp;
681 regs->eflags = vmcb->rflags;
682 regs->cs = vmcb->cs.sel;
683 regs->ds = vmcb->ds.sel;
684 regs->es = vmcb->es.sel;
685 regs->ss = vmcb->ss.sel;
686 }
688 /* XXX Use svm_load_cpu_guest_regs instead */
689 static void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
690 {
691 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
692 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
694 /* Write the guest register value into VMCB */
695 vmcb->rax = regs->eax;
696 vmcb->ss.sel = regs->ss;
697 vmcb->rsp = regs->esp;
698 vmcb->rflags = regs->eflags | 2UL;
699 vmcb->cs.sel = regs->cs;
700 vmcb->rip = regs->eip;
701 if (regs->eflags & EF_TF)
702 *intercepts |= EXCEPTION_BITMAP_DB;
703 else
704 *intercepts &= ~EXCEPTION_BITMAP_DB;
705 }
707 static void svm_load_cpu_guest_regs(
708 struct vcpu *v, struct cpu_user_regs *regs)
709 {
710 svm_load_cpu_user_regs(v, regs);
711 }
713 static void arch_svm_do_launch(struct vcpu *v)
714 {
715 svm_do_launch(v);
717 if ( v->vcpu_id != 0 )
718 {
719 cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
720 u16 cs_sel = regs->cs;
721 /*
722 * This is the launch of an AP; set state so that we begin executing
723 * the trampoline code in real-mode.
724 */
725 svm_do_vmmcall_reset_to_realmode(v, regs);
726 /* Adjust the state to execute the trampoline code.*/
727 v->arch.hvm_svm.vmcb->rip = 0;
728 v->arch.hvm_svm.vmcb->cs.sel= cs_sel;
729 v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4);
730 }
732 reset_stack_and_jump(svm_asm_do_launch);
733 }
735 static void svm_ctxt_switch_from(struct vcpu *v)
736 {
737 hvm_freeze_time(v);
738 svm_save_dr(v);
739 }
741 static void svm_ctxt_switch_to(struct vcpu *v)
742 {
743 #ifdef __x86_64__
744 /*
745 * This is required, because VMRUN does consistency check
746 * and some of the DOM0 selectors are pointing to
747 * invalid GDT locations, and cause AMD processors
748 * to shutdown.
749 */
750 set_segment_register(ds, 0);
751 set_segment_register(es, 0);
752 set_segment_register(ss, 0);
753 #endif
754 svm_restore_dr(v);
755 }
757 static int svm_vcpu_initialise(struct vcpu *v)
758 {
759 int rc;
761 v->arch.schedule_tail = arch_svm_do_launch;
762 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
763 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
765 v->arch.hvm_svm.saved_irq_vector = -1;
767 if ( (rc = svm_create_vmcb(v)) != 0 )
768 {
769 dprintk(XENLOG_WARNING,
770 "Failed to create VMCB for vcpu %d: err=%d.\n",
771 v->vcpu_id, rc);
772 return rc;
773 }
775 return 0;
776 }
778 static void svm_vcpu_destroy(struct vcpu *v)
779 {
780 svm_destroy_vmcb(v);
781 }
783 int start_svm(void)
784 {
785 u32 eax, ecx, edx;
786 u32 phys_hsa_lo, phys_hsa_hi;
787 u64 phys_hsa;
788 int cpu = smp_processor_id();
790 /* Xen does not fill x86_capability words except 0. */
791 ecx = cpuid_ecx(0x80000001);
792 boot_cpu_data.x86_capability[5] = ecx;
794 if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
795 return 0;
797 /* check whether SVM feature is disabled in BIOS */
798 rdmsr(MSR_K8_VM_CR, eax, edx);
799 if ( eax & K8_VMCR_SVME_DISABLE )
800 {
801 printk("AMD SVM Extension is disabled in BIOS.\n");
802 return 0;
803 }
805 if (!(hsa[cpu] = alloc_host_save_area()))
806 return 0;
808 rdmsr(MSR_EFER, eax, edx);
809 eax |= EFER_SVME;
810 wrmsr(MSR_EFER, eax, edx);
811 asidpool_init( cpu );
812 printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
814 /* Initialize the HSA for this core */
815 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
816 phys_hsa_lo = (u32) phys_hsa;
817 phys_hsa_hi = (u32) (phys_hsa >> 32);
818 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
820 if (!(root_vmcb[cpu] = alloc_vmcb()))
821 return 0;
822 root_vmcb_pa[cpu] = virt_to_maddr(root_vmcb[cpu]);
824 if (cpu == 0)
825 setup_vmcb_dump();
827 /* Setup HVM interfaces */
828 hvm_funcs.disable = stop_svm;
830 hvm_funcs.vcpu_initialise = svm_vcpu_initialise;
831 hvm_funcs.vcpu_destroy = svm_vcpu_destroy;
833 hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
834 hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
836 hvm_funcs.realmode = svm_realmode;
837 hvm_funcs.paging_enabled = svm_paging_enabled;
838 hvm_funcs.long_mode_enabled = svm_long_mode_enabled;
839 hvm_funcs.pae_enabled = svm_pae_enabled;
840 hvm_funcs.guest_x86_mode = svm_guest_x86_mode;
841 hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
842 hvm_funcs.get_segment_base = svm_get_segment_base;
844 hvm_funcs.update_host_cr3 = svm_update_host_cr3;
846 hvm_funcs.stts = svm_stts;
847 hvm_funcs.set_tsc_offset = svm_set_tsc_offset;
849 hvm_funcs.init_ap_context = svm_init_ap_context;
850 hvm_funcs.init_hypercall_page = svm_init_hypercall_page;
852 hvm_enabled = 1;
854 return 1;
855 }
857 void arch_svm_do_resume(struct vcpu *v)
858 {
859 /* pinning VCPU to a different core? */
860 if ( v->arch.hvm_svm.launch_core == smp_processor_id()) {
861 hvm_do_resume( v );
862 reset_stack_and_jump( svm_asm_do_resume );
863 }
864 else {
865 if (svm_dbg_on)
866 printk("VCPU core pinned: %d to %d\n",
867 v->arch.hvm_svm.launch_core, smp_processor_id() );
868 v->arch.hvm_svm.launch_core = smp_processor_id();
869 hvm_migrate_timers( v );
870 hvm_do_resume( v );
871 reset_stack_and_jump( svm_asm_do_resume );
872 }
873 }
875 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
876 {
877 struct vcpu *v = current;
878 unsigned long eip;
879 int result;
880 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
882 ASSERT(vmcb);
884 //#if HVM_DEBUG
885 eip = vmcb->rip;
886 HVM_DBG_LOG(DBG_LEVEL_VMMU,
887 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
888 va, eip, (unsigned long)regs->error_code);
889 //#endif
891 result = shadow_fault(va, regs);
893 if( result ) {
894 /* Let's make sure that the Guest TLB is flushed */
895 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
896 }
898 return result;
899 }
902 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
903 {
904 struct vcpu *v = current;
906 setup_fpu(v);
907 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
909 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
910 vmcb->cr0 &= ~X86_CR0_TS;
911 }
914 static void svm_do_general_protection_fault(struct vcpu *v,
915 struct cpu_user_regs *regs)
916 {
917 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
918 unsigned long eip, error_code;
920 ASSERT(vmcb);
922 eip = vmcb->rip;
923 error_code = vmcb->exitinfo1;
925 if (vmcb->idtr.limit == 0) {
926 printk("Huh? We got a GP Fault with an invalid IDTR!\n");
927 svm_dump_vmcb(__func__, vmcb);
928 svm_dump_regs(__func__, regs);
929 svm_dump_inst(vmcb->rip);
930 domain_crash(v->domain);
931 return;
932 }
934 HVM_DBG_LOG(DBG_LEVEL_1,
935 "svm_general_protection_fault: eip = %lx, erro_code = %lx",
936 eip, error_code);
938 HVM_DBG_LOG(DBG_LEVEL_1,
939 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
940 (unsigned long)regs->eax, (unsigned long)regs->ebx,
941 (unsigned long)regs->ecx, (unsigned long)regs->edx,
942 (unsigned long)regs->esi, (unsigned long)regs->edi);
944 /* Reflect it back into the guest */
945 svm_inject_exception(v, TRAP_gp_fault, 1, error_code);
946 }
948 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
949 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
950 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
951 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
953 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input,
954 struct cpu_user_regs *regs)
955 {
956 unsigned int eax, ebx, ecx, edx;
957 unsigned long eip;
958 struct vcpu *v = current;
959 int inst_len;
961 ASSERT(vmcb);
963 eip = vmcb->rip;
965 HVM_DBG_LOG(DBG_LEVEL_1,
966 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
967 " (esi) %lx, (edi) %lx",
968 (unsigned long)regs->eax, (unsigned long)regs->ebx,
969 (unsigned long)regs->ecx, (unsigned long)regs->edx,
970 (unsigned long)regs->esi, (unsigned long)regs->edi);
972 if ( !cpuid_hypervisor_leaves(input, &eax, &ebx, &ecx, &edx) )
973 {
974 cpuid(input, &eax, &ebx, &ecx, &edx);
975 if (input == 0x00000001 || input == 0x80000001 )
976 {
977 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
978 {
979 /* Since the apic is disabled, avoid any confusion
980 about SMP cpus being available */
981 clear_bit(X86_FEATURE_APIC, &edx);
982 }
983 #if CONFIG_PAGING_LEVELS >= 3
984 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
985 #endif
986 {
987 clear_bit(X86_FEATURE_PAE, &edx);
988 if (input == 0x80000001 )
989 clear_bit(X86_FEATURE_NX & 31, &edx);
990 }
991 clear_bit(X86_FEATURE_PSE36, &edx);
992 if (input == 0x00000001 )
993 {
994 /* Clear out reserved bits. */
995 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
996 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
998 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
1000 /* Guest should only see one logical processor.
1001 * See details on page 23 of AMD CPUID Specification.
1002 */
1003 clear_bit(X86_FEATURE_HT, &edx); /* clear the hyperthread bit */
1004 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
1005 ebx |= 0x00010000; /* set to 1 just for precaution */
1007 else
1009 /* Clear the Cmp_Legacy bit
1010 * This bit is supposed to be zero when HTT = 0.
1011 * See details on page 23 of AMD CPUID Specification.
1012 */
1013 clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx);
1014 /* Make SVM feature invisible to the guest. */
1015 clear_bit(X86_FEATURE_SVME & 31, &ecx);
1016 #ifdef __i386__
1017 /* Mask feature for Intel ia32e or AMD long mode. */
1018 clear_bit(X86_FEATURE_LAHF_LM & 31, &ecx);
1020 clear_bit(X86_FEATURE_LM & 31, &edx);
1021 clear_bit(X86_FEATURE_SYSCALL & 31, &edx);
1022 #endif
1023 /* So far, we do not support 3DNow for the guest. */
1024 clear_bit(X86_FEATURE_3DNOW & 31, &edx);
1025 clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx);
1028 else if ( ( input == 0x80000007 ) || ( input == 0x8000000A ) )
1030 /* Mask out features of power management and SVM extension. */
1031 eax = ebx = ecx = edx = 0;
1033 else if ( input == 0x80000008 )
1035 /* Make sure Number of CPU core is 1 when HTT=0 */
1036 ecx &= 0xFFFFFF00;
1040 regs->eax = (unsigned long)eax;
1041 regs->ebx = (unsigned long)ebx;
1042 regs->ecx = (unsigned long)ecx;
1043 regs->edx = (unsigned long)edx;
1045 HVM_DBG_LOG(DBG_LEVEL_1,
1046 "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
1047 "ebx=%x, ecx=%x, edx=%x",
1048 eip, input, eax, ebx, ecx, edx);
1050 inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
1051 ASSERT(inst_len > 0);
1052 __update_guest_eip(vmcb, inst_len);
1056 static inline unsigned long *get_reg_p(unsigned int gpreg,
1057 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1059 unsigned long *reg_p = NULL;
1060 switch (gpreg)
1062 case SVM_REG_EAX:
1063 reg_p = (unsigned long *)&regs->eax;
1064 break;
1065 case SVM_REG_EBX:
1066 reg_p = (unsigned long *)&regs->ebx;
1067 break;
1068 case SVM_REG_ECX:
1069 reg_p = (unsigned long *)&regs->ecx;
1070 break;
1071 case SVM_REG_EDX:
1072 reg_p = (unsigned long *)&regs->edx;
1073 break;
1074 case SVM_REG_EDI:
1075 reg_p = (unsigned long *)&regs->edi;
1076 break;
1077 case SVM_REG_ESI:
1078 reg_p = (unsigned long *)&regs->esi;
1079 break;
1080 case SVM_REG_EBP:
1081 reg_p = (unsigned long *)&regs->ebp;
1082 break;
1083 case SVM_REG_ESP:
1084 reg_p = (unsigned long *)&vmcb->rsp;
1085 break;
1086 #ifdef __x86_64__
1087 case SVM_REG_R8:
1088 reg_p = (unsigned long *)&regs->r8;
1089 break;
1090 case SVM_REG_R9:
1091 reg_p = (unsigned long *)&regs->r9;
1092 break;
1093 case SVM_REG_R10:
1094 reg_p = (unsigned long *)&regs->r10;
1095 break;
1096 case SVM_REG_R11:
1097 reg_p = (unsigned long *)&regs->r11;
1098 break;
1099 case SVM_REG_R12:
1100 reg_p = (unsigned long *)&regs->r12;
1101 break;
1102 case SVM_REG_R13:
1103 reg_p = (unsigned long *)&regs->r13;
1104 break;
1105 case SVM_REG_R14:
1106 reg_p = (unsigned long *)&regs->r14;
1107 break;
1108 case SVM_REG_R15:
1109 reg_p = (unsigned long *)&regs->r15;
1110 break;
1111 #endif
1112 default:
1113 BUG();
1116 return reg_p;
1120 static inline unsigned long get_reg(unsigned int gpreg,
1121 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1123 unsigned long *gp;
1124 gp = get_reg_p(gpreg, regs, vmcb);
1125 return *gp;
1129 static inline void set_reg(unsigned int gpreg, unsigned long value,
1130 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1132 unsigned long *gp;
1133 gp = get_reg_p(gpreg, regs, vmcb);
1134 *gp = value;
1138 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
1140 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1142 v->arch.hvm_vcpu.flag_dr_dirty = 1;
1144 __restore_debug_registers(v);
1146 /* allow the guest full access to the debug registers */
1147 vmcb->dr_intercepts = 0;
1151 static void svm_get_prefix_info(
1152 struct vmcb_struct *vmcb,
1153 unsigned int dir, segment_selector_t **seg, unsigned int *asize)
1155 unsigned char inst[MAX_INST_LEN];
1156 int i;
1158 memset(inst, 0, MAX_INST_LEN);
1159 if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
1160 != MAX_INST_LEN)
1162 gdprintk(XENLOG_ERR, "get guest instruction failed\n");
1163 domain_crash(current->domain);
1164 return;
1167 for (i = 0; i < MAX_INST_LEN; i++)
1169 switch (inst[i])
1171 case 0xf3: /* REPZ */
1172 case 0xf2: /* REPNZ */
1173 case 0xf0: /* LOCK */
1174 case 0x66: /* data32 */
1175 #ifdef __x86_64__
1176 /* REX prefixes */
1177 case 0x40:
1178 case 0x41:
1179 case 0x42:
1180 case 0x43:
1181 case 0x44:
1182 case 0x45:
1183 case 0x46:
1184 case 0x47:
1186 case 0x48:
1187 case 0x49:
1188 case 0x4a:
1189 case 0x4b:
1190 case 0x4c:
1191 case 0x4d:
1192 case 0x4e:
1193 case 0x4f:
1194 #endif
1195 continue;
1196 case 0x67: /* addr32 */
1197 *asize ^= 48; /* Switch 16/32 bits */
1198 continue;
1199 case 0x2e: /* CS */
1200 *seg = &vmcb->cs;
1201 continue;
1202 case 0x36: /* SS */
1203 *seg = &vmcb->ss;
1204 continue;
1205 case 0x26: /* ES */
1206 *seg = &vmcb->es;
1207 continue;
1208 case 0x64: /* FS */
1209 *seg = &vmcb->fs;
1210 continue;
1211 case 0x65: /* GS */
1212 *seg = &vmcb->gs;
1213 continue;
1214 case 0x3e: /* DS */
1215 *seg = &vmcb->ds;
1216 continue;
1217 default:
1218 break;
1220 return;
1225 /* Get the address of INS/OUTS instruction */
1226 static inline int svm_get_io_address(
1227 struct vcpu *v,
1228 struct cpu_user_regs *regs, unsigned int dir,
1229 unsigned long *count, unsigned long *addr)
1231 unsigned long reg;
1232 unsigned int asize = 0;
1233 unsigned int isize;
1234 int long_mode;
1235 ioio_info_t info;
1236 segment_selector_t *seg = NULL;
1237 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1239 info.bytes = vmcb->exitinfo1;
1241 /* If we're in long mode, we shouldn't check the segment presence & limit */
1242 long_mode = vmcb->cs.attributes.fields.l && vmcb->efer & EFER_LMA;
1244 /* d field of cs.attributes is 1 for 32-bit, 0 for 16 or 64 bit.
1245 * l field combined with EFER_LMA -> longmode says whether it's 16 or 64 bit.
1246 */
1247 asize = (long_mode)?64:((vmcb->cs.attributes.fields.db)?32:16);
1250 /* The ins/outs instructions are single byte, so if we have got more
1251 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1252 * to figure out what it is...
1253 */
1254 isize = vmcb->exitinfo2 - vmcb->rip;
1256 if (info.fields.rep)
1257 isize --;
1259 if (isize > 1)
1260 svm_get_prefix_info(vmcb, dir, &seg, &asize);
1262 ASSERT(dir == IOREQ_READ || dir == IOREQ_WRITE);
1264 if (dir == IOREQ_WRITE)
1266 reg = regs->esi;
1267 if (!seg) /* If no prefix, used DS. */
1268 seg = &vmcb->ds;
1270 else
1272 reg = regs->edi;
1273 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1276 /* If the segment isn't present, give GP fault! */
1277 if (!long_mode && !seg->attributes.fields.p)
1279 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1280 return 0;
1283 if (asize == 16)
1285 *addr = (reg & 0xFFFF);
1286 *count = regs->ecx & 0xffff;
1288 else
1290 *addr = reg;
1291 *count = regs->ecx;
1294 if (!long_mode) {
1295 if (*addr > seg->limit)
1297 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1298 return 0;
1300 else
1302 *addr += seg->base;
1307 return 1;
1311 static void svm_io_instruction(struct vcpu *v)
1313 struct cpu_user_regs *regs;
1314 struct hvm_io_op *pio_opp;
1315 unsigned int port;
1316 unsigned int size, dir, df;
1317 ioio_info_t info;
1318 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1320 ASSERT(vmcb);
1321 pio_opp = &current->arch.hvm_vcpu.io_op;
1322 pio_opp->instr = INSTR_PIO;
1323 pio_opp->flags = 0;
1325 regs = &pio_opp->io_context;
1327 /* Copy current guest state into io instruction state structure. */
1328 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1329 hvm_store_cpu_guest_regs(v, regs, NULL);
1331 info.bytes = vmcb->exitinfo1;
1333 port = info.fields.port; /* port used to be addr */
1334 dir = info.fields.type; /* direction */
1335 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1337 if (info.fields.sz32)
1338 size = 4;
1339 else if (info.fields.sz16)
1340 size = 2;
1341 else
1342 size = 1;
1344 HVM_DBG_LOG(DBG_LEVEL_IO,
1345 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1346 "exit_qualification = %"PRIx64,
1347 port, vmcb->cs.sel, vmcb->rip, info.bytes);
1349 /* string instruction */
1350 if (info.fields.str)
1352 unsigned long addr, count;
1353 int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
1355 if (!svm_get_io_address(v, regs, dir, &count, &addr))
1357 /* We failed to get a valid address, so don't do the IO operation -
1358 * it would just get worse if we do! Hopefully the guest is handing
1359 * gp-faults...
1360 */
1361 return;
1364 /* "rep" prefix */
1365 if (info.fields.rep)
1367 pio_opp->flags |= REPZ;
1369 else
1371 count = 1;
1374 /*
1375 * Handle string pio instructions that cross pages or that
1376 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1377 */
1378 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1380 unsigned long value = 0;
1382 pio_opp->flags |= OVERLAP;
1383 pio_opp->addr = addr;
1385 if (dir == IOREQ_WRITE) /* OUTS */
1387 if (hvm_paging_enabled(current))
1388 (void)hvm_copy_from_guest_virt(&value, addr, size);
1389 else
1390 (void)hvm_copy_from_guest_phys(&value, addr, size);
1393 if (count == 1)
1394 regs->eip = vmcb->exitinfo2;
1396 send_pio_req(port, 1, size, value, dir, df, 0);
1398 else
1400 unsigned long last_addr = sign > 0 ? addr + count * size - 1
1401 : addr - (count - 1) * size;
1403 if ((addr & PAGE_MASK) != (last_addr & PAGE_MASK))
1405 if (sign > 0)
1406 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1407 else
1408 count = (addr & ~PAGE_MASK) / size + 1;
1410 else
1411 regs->eip = vmcb->exitinfo2;
1413 send_pio_req(port, count, size, addr, dir, df, 1);
1416 else
1418 /*
1419 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1420 * ExitInfo2
1421 */
1422 regs->eip = vmcb->exitinfo2;
1424 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1425 hvm_print_line(v, regs->eax); /* guest debug output */
1427 send_pio_req(port, 1, size, regs->eax, dir, df, 0);
1431 static int svm_set_cr0(unsigned long value)
1433 struct vcpu *v = current;
1434 unsigned long mfn;
1435 int paging_enabled;
1436 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1437 unsigned long old_base_mfn;
1439 ASSERT(vmcb);
1441 /* We don't want to lose PG. ET is reserved and should be always be 1*/
1442 paging_enabled = svm_paging_enabled(v);
1443 value |= X86_CR0_ET;
1444 vmcb->cr0 = value | X86_CR0_PG | X86_CR0_WP;
1445 v->arch.hvm_svm.cpu_shadow_cr0 = value;
1447 /* TS cleared? Then initialise FPU now. */
1448 if ( !(value & X86_CR0_TS) )
1450 setup_fpu(v);
1451 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1454 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1456 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
1458 /* The guest CR3 must be pointing to the guest physical. */
1459 mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
1460 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
1462 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
1463 v->arch.hvm_svm.cpu_cr3, mfn);
1464 domain_crash(v->domain);
1465 return 0;
1468 #if defined(__x86_64__)
1469 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
1470 && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
1471 &v->arch.hvm_svm.cpu_state))
1473 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1474 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1477 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
1479 /* Here the PAE is should to be opened */
1480 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1481 set_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
1482 vmcb->efer |= EFER_LMA;
1484 #endif /* __x86_64__ */
1486 /* Now arch.guest_table points to machine physical. */
1487 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1488 v->arch.guest_table = pagetable_from_pfn(mfn);
1489 if ( old_base_mfn )
1490 put_page(mfn_to_page(old_base_mfn));
1491 shadow_update_paging_modes(v);
1493 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1494 (unsigned long) (mfn << PAGE_SHIFT));
1496 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1497 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1500 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1501 if ( v->arch.hvm_svm.cpu_cr3 ) {
1502 put_page(mfn_to_page(get_mfn_from_gpfn(
1503 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
1504 v->arch.guest_table = pagetable_null();
1507 /*
1508 * SVM implements paged real-mode and when we return to real-mode
1509 * we revert back to the physical mappings that the domain builder
1510 * created.
1511 */
1512 if ((value & X86_CR0_PE) == 0) {
1513 if (value & X86_CR0_PG) {
1514 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1515 return 0;
1517 shadow_update_paging_modes(v);
1518 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1519 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1521 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
1523 if ( svm_long_mode_enabled(v) )
1525 vmcb->efer &= ~EFER_LMA;
1526 clear_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
1528 /* we should take care of this kind of situation */
1529 shadow_update_paging_modes(v);
1530 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1531 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1534 return 1;
1537 /*
1538 * Read from control registers. CR0 and CR4 are read from the shadow.
1539 */
1540 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1542 unsigned long value = 0;
1543 struct vcpu *v = current;
1544 struct vlapic *vlapic = vcpu_vlapic(v);
1545 struct vmcb_struct *vmcb;
1547 vmcb = v->arch.hvm_svm.vmcb;
1548 ASSERT(vmcb);
1550 switch ( cr )
1552 case 0:
1553 value = v->arch.hvm_svm.cpu_shadow_cr0;
1554 if (svm_dbg_on)
1555 printk("CR0 read =%lx \n", value );
1556 break;
1557 case 2:
1558 value = vmcb->cr2;
1559 break;
1560 case 3:
1561 value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
1562 if (svm_dbg_on)
1563 printk("CR3 read =%lx \n", value );
1564 break;
1565 case 4:
1566 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
1567 if (svm_dbg_on)
1568 printk("CR4 read=%lx\n", value);
1569 break;
1570 case 8:
1571 value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
1572 value = (value & 0xF0) >> 4;
1573 break;
1575 default:
1576 domain_crash(v->domain);
1577 return;
1580 set_reg(gp, value, regs, vmcb);
1582 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1586 static inline int svm_pgbit_test(struct vcpu *v)
1588 return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
1592 /*
1593 * Write to control registers
1594 */
1595 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1597 unsigned long value, old_cr, old_base_mfn, mfn;
1598 struct vcpu *v = current;
1599 struct vlapic *vlapic = vcpu_vlapic(v);
1600 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1602 value = get_reg(gpreg, regs, vmcb);
1604 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1605 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1607 switch (cr)
1609 case 0:
1610 if (svm_dbg_on)
1611 printk("CR0 write =%lx \n", value );
1612 return svm_set_cr0(value);
1614 case 3:
1615 if (svm_dbg_on)
1616 printk("CR3 write =%lx \n", value );
1617 /* If paging is not enabled yet, simply copy the value to CR3. */
1618 if (!svm_paging_enabled(v)) {
1619 v->arch.hvm_svm.cpu_cr3 = value;
1620 break;
1622 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1624 /* We make a new one if the shadow does not exist. */
1625 if (value == v->arch.hvm_svm.cpu_cr3)
1627 /*
1628 * This is simple TLB flush, implying the guest has
1629 * removed some translation or changed page attributes.
1630 * We simply invalidate the shadow.
1631 */
1632 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1633 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1634 goto bad_cr3;
1635 shadow_update_cr3(v);
1637 else
1639 /*
1640 * If different, make a shadow. Check if the PDBR is valid
1641 * first.
1642 */
1643 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1644 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1645 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
1646 goto bad_cr3;
1648 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1649 v->arch.guest_table = pagetable_from_pfn(mfn);
1651 if (old_base_mfn)
1652 put_page(mfn_to_page(old_base_mfn));
1654 v->arch.hvm_svm.cpu_cr3 = value;
1655 update_cr3(v);
1656 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1657 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1659 break;
1661 case 4: /* CR4 */
1662 if (svm_dbg_on)
1663 printk( "write cr4=%lx, cr0=%lx\n",
1664 value, v->arch.hvm_svm.cpu_shadow_cr0 );
1665 old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
1666 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1668 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1669 if ( svm_pgbit_test(v) )
1671 /* The guest is a 32-bit PAE guest. */
1672 #if CONFIG_PAGING_LEVELS >= 3
1673 unsigned long mfn, old_base_mfn;
1674 mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
1675 if ( !mfn_valid(mfn) ||
1676 !get_page(mfn_to_page(mfn), v->domain) )
1677 goto bad_cr3;
1679 /*
1680 * Now arch.guest_table points to machine physical.
1681 */
1683 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1684 v->arch.guest_table = pagetable_from_pfn(mfn);
1685 if ( old_base_mfn )
1686 put_page(mfn_to_page(old_base_mfn));
1687 shadow_update_paging_modes(v);
1689 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1690 (unsigned long) (mfn << PAGE_SHIFT));
1692 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
1694 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1695 "Update CR3 value = %lx, mfn = %lx",
1696 v->arch.hvm_svm.cpu_cr3, mfn);
1697 #endif
1700 else if (value & X86_CR4_PAE) {
1701 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1702 } else {
1703 if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
1704 &v->arch.hvm_svm.cpu_state)) {
1705 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1707 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1710 v->arch.hvm_svm.cpu_shadow_cr4 = value;
1711 vmcb->cr4 = value | SVM_CR4_HOST_MASK;
1713 /*
1714 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1715 * all TLB entries except global entries.
1716 */
1717 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
1719 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1720 shadow_update_paging_modes(v);
1722 break;
1724 case 8:
1725 vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
1726 break;
1728 default:
1729 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1730 domain_crash(v->domain);
1731 return 0;
1734 return 1;
1736 bad_cr3:
1737 gdprintk(XENLOG_ERR, "Invalid CR3\n");
1738 domain_crash(v->domain);
1739 return 0;
1743 #define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
1746 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
1747 struct cpu_user_regs *regs)
1749 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1750 int inst_len = 0;
1751 int index;
1752 unsigned int gpreg;
1753 unsigned long value;
1754 u8 buffer[MAX_INST_LEN];
1755 u8 prefix = 0;
1756 int result = 1;
1757 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1758 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1759 enum instruction_index match;
1761 ASSERT(vmcb);
1763 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1765 /* get index to first actual instruction byte - as we will need to know
1766 where the prefix lives later on */
1767 index = skip_prefix_bytes(buffer, sizeof(buffer));
1769 if ( type == TYPE_MOV_TO_CR )
1771 inst_len = __get_instruction_length_from_list(
1772 vmcb, list_a, ARR_SIZE(list_a), &buffer[index], &match);
1774 else /* type == TYPE_MOV_FROM_CR */
1776 inst_len = __get_instruction_length_from_list(
1777 vmcb, list_b, ARR_SIZE(list_b), &buffer[index], &match);
1780 ASSERT(inst_len > 0);
1782 inst_len += index;
1784 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1785 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1786 prefix = buffer[index-1];
1788 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
1790 switch (match)
1792 case INSTR_MOV2CR:
1793 gpreg = decode_src_reg(prefix, buffer[index+2]);
1794 result = mov_to_cr(gpreg, cr, regs);
1795 break;
1797 case INSTR_MOVCR2:
1798 gpreg = decode_src_reg(prefix, buffer[index+2]);
1799 mov_from_cr(cr, gpreg, regs);
1800 break;
1802 case INSTR_CLTS:
1803 /* TS being cleared means that it's time to restore fpu state. */
1804 setup_fpu(current);
1805 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1806 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1807 v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
1808 break;
1810 case INSTR_LMSW:
1811 if (svm_dbg_on)
1812 svm_dump_inst(svm_rip2pointer(vmcb));
1814 gpreg = decode_src_reg(prefix, buffer[index+2]);
1815 value = get_reg(gpreg, regs, vmcb) & 0xF;
1817 if (svm_dbg_on)
1818 printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1819 inst_len);
1821 value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
1823 if (svm_dbg_on)
1824 printk("CR0-LMSW CR0 - New value=%lx\n", value);
1826 result = svm_set_cr0(value);
1827 break;
1829 case INSTR_SMSW:
1830 if (svm_dbg_on)
1831 svm_dump_inst(svm_rip2pointer(vmcb));
1832 value = v->arch.hvm_svm.cpu_shadow_cr0;
1833 gpreg = decode_src_reg(prefix, buffer[index+2]);
1834 set_reg(gpreg, value, regs, vmcb);
1836 if (svm_dbg_on)
1837 printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1838 inst_len);
1839 break;
1841 default:
1842 BUG();
1845 ASSERT(inst_len);
1847 __update_guest_eip(vmcb, inst_len);
1849 return result;
1852 static inline void svm_do_msr_access(
1853 struct vcpu *v, struct cpu_user_regs *regs)
1855 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1856 int inst_len;
1857 u64 msr_content=0;
1858 u32 eax, edx;
1860 ASSERT(vmcb);
1862 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, "
1863 "exitinfo = %lx", (unsigned long)regs->ecx,
1864 (unsigned long)regs->eax, (unsigned long)regs->edx,
1865 (unsigned long)vmcb->exitinfo1);
1867 /* is it a read? */
1868 if (vmcb->exitinfo1 == 0)
1870 inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
1872 regs->edx = 0;
1873 switch (regs->ecx) {
1874 case MSR_IA32_TIME_STAMP_COUNTER:
1875 msr_content = hvm_get_guest_time(v);
1876 break;
1877 case MSR_IA32_SYSENTER_CS:
1878 msr_content = vmcb->sysenter_cs;
1879 break;
1880 case MSR_IA32_SYSENTER_ESP:
1881 msr_content = vmcb->sysenter_esp;
1882 break;
1883 case MSR_IA32_SYSENTER_EIP:
1884 msr_content = vmcb->sysenter_eip;
1885 break;
1886 case MSR_IA32_APICBASE:
1887 msr_content = vcpu_vlapic(v)->apic_base_msr;
1888 break;
1889 default:
1890 if (long_mode_do_msr_read(regs))
1891 goto done;
1893 if ( rdmsr_hypervisor_regs(regs->ecx, &eax, &edx) )
1895 regs->eax = eax;
1896 regs->edx = edx;
1897 goto done;
1900 rdmsr_safe(regs->ecx, regs->eax, regs->edx);
1901 break;
1903 regs->eax = msr_content & 0xFFFFFFFF;
1904 regs->edx = msr_content >> 32;
1906 else
1908 inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
1909 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
1911 switch (regs->ecx)
1913 case MSR_IA32_TIME_STAMP_COUNTER:
1914 hvm_set_guest_time(v, msr_content);
1915 break;
1916 case MSR_IA32_SYSENTER_CS:
1917 vmcb->sysenter_cs = msr_content;
1918 break;
1919 case MSR_IA32_SYSENTER_ESP:
1920 vmcb->sysenter_esp = msr_content;
1921 break;
1922 case MSR_IA32_SYSENTER_EIP:
1923 vmcb->sysenter_eip = msr_content;
1924 break;
1925 case MSR_IA32_APICBASE:
1926 vlapic_msr_set(vcpu_vlapic(v), msr_content);
1927 break;
1928 default:
1929 if ( !long_mode_do_msr_write(regs) )
1930 wrmsr_hypervisor_regs(regs->ecx, regs->eax, regs->edx);
1931 break;
1935 done:
1937 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: "
1938 "ecx=%lx, eax=%lx, edx=%lx",
1939 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1940 (unsigned long)regs->edx);
1942 __update_guest_eip(vmcb, inst_len);
1946 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
1948 __update_guest_eip(vmcb, 1);
1950 /* Check for interrupt not handled or new interrupt. */
1951 if ( (vmcb->rflags & X86_EFLAGS_IF) &&
1952 (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) )
1953 return;
1955 hvm_hlt(vmcb->rflags);
1959 static void svm_vmexit_do_invd(struct vmcb_struct *vmcb)
1961 int inst_len;
1963 /* Invalidate the cache - we can't really do that safely - maybe we should
1964 * WBINVD, but I think it's just fine to completely ignore it - we should
1965 * have cache-snooping that solves it anyways. -- Mats P.
1966 */
1968 /* Tell the user that we did this - just in case someone runs some really
1969 * weird operating system and wants to know why it's not working...
1970 */
1971 printk("INVD instruction intercepted - ignored\n");
1973 inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL);
1974 __update_guest_eip(vmcb, inst_len);
1980 #ifdef XEN_DEBUGGER
1981 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb,
1982 struct cpu_user_regs *regs)
1984 regs->eip = vmcb->rip;
1985 regs->esp = vmcb->rsp;
1986 regs->eflags = vmcb->rflags;
1988 regs->xcs = vmcb->cs.sel;
1989 regs->xds = vmcb->ds.sel;
1990 regs->xes = vmcb->es.sel;
1991 regs->xfs = vmcb->fs.sel;
1992 regs->xgs = vmcb->gs.sel;
1993 regs->xss = vmcb->ss.sel;
1997 static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
1999 vmcb->ss.sel = regs->xss;
2000 vmcb->rsp = regs->esp;
2001 vmcb->rflags = regs->eflags;
2002 vmcb->cs.sel = regs->xcs;
2003 vmcb->rip = regs->eip;
2005 vmcb->gs.sel = regs->xgs;
2006 vmcb->fs.sel = regs->xfs;
2007 vmcb->es.sel = regs->xes;
2008 vmcb->ds.sel = regs->xds;
2010 #endif
2013 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
2015 struct vcpu *v = current;
2016 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
2017 unsigned long g_vaddr;
2018 int inst_len;
2019 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2021 /*
2022 * Unknown how many bytes the invlpg instruction will take. Use the
2023 * maximum instruction length here
2024 */
2025 if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
2027 gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length);
2028 domain_crash(v->domain);
2029 return;
2032 if (invlpga)
2034 inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
2035 ASSERT(inst_len > 0);
2036 __update_guest_eip(vmcb, inst_len);
2038 /*
2039 * The address is implicit on this instruction. At the moment, we don't
2040 * use ecx (ASID) to identify individual guests pages
2041 */
2042 g_vaddr = regs->eax;
2044 else
2046 /* What about multiple prefix codes? */
2047 prefix = (is_prefix(opcode[0])?opcode[0]:0);
2048 inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
2049 ASSERT(inst_len > 0);
2051 inst_len--;
2052 length -= inst_len;
2054 /*
2055 * Decode memory operand of the instruction including ModRM, SIB, and
2056 * displacement to get effective address and length in bytes. Assume
2057 * the system in either 32- or 64-bit mode.
2058 */
2059 g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix, inst_len,
2060 &opcode[inst_len], &length);
2062 inst_len += length;
2063 __update_guest_eip (vmcb, inst_len);
2066 /* Overkill, we may not this */
2067 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
2068 shadow_invlpg(v, g_vaddr);
2072 /*
2073 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2074 * 16-bit realmode. Basically, this mimics a processor reset.
2076 * returns 0 on success, non-zero otherwise
2077 */
2078 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
2079 struct cpu_user_regs *regs)
2081 struct vmcb_struct *vmcb;
2083 ASSERT(v);
2084 ASSERT(regs);
2086 vmcb = v->arch.hvm_svm.vmcb;
2088 ASSERT(vmcb);
2090 /* clear the vmcb and user regs */
2091 memset(regs, 0, sizeof(struct cpu_user_regs));
2093 /* VMCB Control */
2094 vmcb->tsc_offset = 0;
2096 /* VMCB State */
2097 vmcb->cr0 = X86_CR0_ET | X86_CR0_PG | X86_CR0_WP;
2098 v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
2100 vmcb->cr2 = 0;
2101 vmcb->efer = EFER_SVME;
2103 vmcb->cr4 = SVM_CR4_HOST_MASK;
2104 v->arch.hvm_svm.cpu_shadow_cr4 = 0;
2105 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
2107 /* This will jump to ROMBIOS */
2108 vmcb->rip = 0xFFF0;
2110 /* setup the segment registers and all their hidden states */
2111 vmcb->cs.sel = 0xF000;
2112 vmcb->cs.attributes.bytes = 0x089b;
2113 vmcb->cs.limit = 0xffff;
2114 vmcb->cs.base = 0x000F0000;
2116 vmcb->ss.sel = 0x00;
2117 vmcb->ss.attributes.bytes = 0x0893;
2118 vmcb->ss.limit = 0xffff;
2119 vmcb->ss.base = 0x00;
2121 vmcb->ds.sel = 0x00;
2122 vmcb->ds.attributes.bytes = 0x0893;
2123 vmcb->ds.limit = 0xffff;
2124 vmcb->ds.base = 0x00;
2126 vmcb->es.sel = 0x00;
2127 vmcb->es.attributes.bytes = 0x0893;
2128 vmcb->es.limit = 0xffff;
2129 vmcb->es.base = 0x00;
2131 vmcb->fs.sel = 0x00;
2132 vmcb->fs.attributes.bytes = 0x0893;
2133 vmcb->fs.limit = 0xffff;
2134 vmcb->fs.base = 0x00;
2136 vmcb->gs.sel = 0x00;
2137 vmcb->gs.attributes.bytes = 0x0893;
2138 vmcb->gs.limit = 0xffff;
2139 vmcb->gs.base = 0x00;
2141 vmcb->ldtr.sel = 0x00;
2142 vmcb->ldtr.attributes.bytes = 0x0000;
2143 vmcb->ldtr.limit = 0x0;
2144 vmcb->ldtr.base = 0x00;
2146 vmcb->gdtr.sel = 0x00;
2147 vmcb->gdtr.attributes.bytes = 0x0000;
2148 vmcb->gdtr.limit = 0x0;
2149 vmcb->gdtr.base = 0x00;
2151 vmcb->tr.sel = 0;
2152 vmcb->tr.attributes.bytes = 0;
2153 vmcb->tr.limit = 0x0;
2154 vmcb->tr.base = 0;
2156 vmcb->idtr.sel = 0x00;
2157 vmcb->idtr.attributes.bytes = 0x0000;
2158 vmcb->idtr.limit = 0x3ff;
2159 vmcb->idtr.base = 0x00;
2161 vmcb->rax = 0;
2162 vmcb->rsp = 0;
2164 return 0;
2168 /*
2169 * svm_do_vmmcall - SVM VMMCALL handler
2171 * returns 0 on success, non-zero otherwise
2172 */
2173 static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
2175 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2176 int inst_len;
2178 ASSERT(vmcb);
2179 ASSERT(regs);
2181 inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
2182 ASSERT(inst_len > 0);
2184 if ( regs->eax & 0x80000000 )
2186 /* VMMCALL sanity check */
2187 if ( vmcb->cpl > get_vmmcall_cpl(regs->edi) )
2189 printk("VMMCALL CPL check failed\n");
2190 return -1;
2193 /* handle the request */
2194 switch ( regs->eax )
2196 case VMMCALL_RESET_TO_REALMODE:
2197 if ( svm_do_vmmcall_reset_to_realmode(v, regs) )
2199 printk("svm_do_vmmcall_reset_to_realmode() failed\n");
2200 return -1;
2202 /* since we just reset the VMCB, return without adjusting
2203 * the eip */
2204 return 0;
2206 case VMMCALL_DEBUG:
2207 printk("DEBUG features not implemented yet\n");
2208 break;
2209 default:
2210 break;
2213 hvm_print_line(v, regs->eax); /* provides the current domain */
2215 else
2217 hvm_do_hypercall(regs);
2220 __update_guest_eip(vmcb, inst_len);
2221 return 0;
2225 void svm_dump_inst(unsigned long eip)
2227 u8 opcode[256];
2228 unsigned long ptr;
2229 int len;
2230 int i;
2232 ptr = eip & ~0xff;
2233 len = 0;
2235 if (hvm_copy_from_guest_virt(opcode, ptr, sizeof(opcode)) == 0)
2236 len = sizeof(opcode);
2238 printk("Code bytes around(len=%d) %lx:", len, eip);
2239 for (i = 0; i < len; i++)
2241 if ((i & 0x0f) == 0)
2242 printk("\n%08lx:", ptr+i);
2244 printk("%02x ", opcode[i]);
2247 printk("\n");
2251 void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
2253 struct vcpu *v = current;
2254 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2255 unsigned long pt = v->arch.hvm_vcpu.hw_cr3;
2257 printk("%s: guest registers from %s:\n", __func__, from);
2258 #if defined (__x86_64__)
2259 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
2260 regs->rax, regs->rbx, regs->rcx);
2261 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
2262 regs->rdx, regs->rsi, regs->rdi);
2263 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
2264 regs->rbp, regs->rsp, regs->r8);
2265 printk("r9: %016lx r10: %016lx r11: %016lx\n",
2266 regs->r9, regs->r10, regs->r11);
2267 printk("r12: %016lx r13: %016lx r14: %016lx\n",
2268 regs->r12, regs->r13, regs->r14);
2269 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
2270 regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
2271 #else
2272 printk("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n",
2273 regs->eax, regs->ebx, regs->ecx, regs->edx);
2274 printk("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n",
2275 regs->edi, regs->esi, regs->ebp, regs->esp);
2276 printk("%s: guest cr0: %lx\n", __func__,
2277 v->arch.hvm_svm.cpu_shadow_cr0);
2278 printk("guest CR3 = %llx\n", vmcb->cr3);
2279 #endif
2280 printk("%s: pt = %lx\n", __func__, pt);
2284 void svm_dump_host_regs(const char *from)
2286 struct vcpu *v = current;
2287 unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
2288 unsigned long cr3, cr0;
2289 printk("Host registers at %s\n", from);
2291 __asm__ __volatile__ ("\tmov %%cr0,%0\n"
2292 "\tmov %%cr3,%1\n"
2293 : "=r" (cr0), "=r"(cr3));
2294 printk("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
2297 #ifdef SVM_EXTRA_DEBUG
2298 static char *exit_reasons[] = {
2299 [VMEXIT_CR0_READ] = "CR0_READ",
2300 [VMEXIT_CR1_READ] = "CR1_READ",
2301 [VMEXIT_CR2_READ] = "CR2_READ",
2302 [VMEXIT_CR3_READ] = "CR3_READ",
2303 [VMEXIT_CR4_READ] = "CR4_READ",
2304 [VMEXIT_CR5_READ] = "CR5_READ",
2305 [VMEXIT_CR6_READ] = "CR6_READ",
2306 [VMEXIT_CR7_READ] = "CR7_READ",
2307 [VMEXIT_CR8_READ] = "CR8_READ",
2308 [VMEXIT_CR9_READ] = "CR9_READ",
2309 [VMEXIT_CR10_READ] = "CR10_READ",
2310 [VMEXIT_CR11_READ] = "CR11_READ",
2311 [VMEXIT_CR12_READ] = "CR12_READ",
2312 [VMEXIT_CR13_READ] = "CR13_READ",
2313 [VMEXIT_CR14_READ] = "CR14_READ",
2314 [VMEXIT_CR15_READ] = "CR15_READ",
2315 [VMEXIT_CR0_WRITE] = "CR0_WRITE",
2316 [VMEXIT_CR1_WRITE] = "CR1_WRITE",
2317 [VMEXIT_CR2_WRITE] = "CR2_WRITE",
2318 [VMEXIT_CR3_WRITE] = "CR3_WRITE",
2319 [VMEXIT_CR4_WRITE] = "CR4_WRITE",
2320 [VMEXIT_CR5_WRITE] = "CR5_WRITE",
2321 [VMEXIT_CR6_WRITE] = "CR6_WRITE",
2322 [VMEXIT_CR7_WRITE] = "CR7_WRITE",
2323 [VMEXIT_CR8_WRITE] = "CR8_WRITE",
2324 [VMEXIT_CR9_WRITE] = "CR9_WRITE",
2325 [VMEXIT_CR10_WRITE] = "CR10_WRITE",
2326 [VMEXIT_CR11_WRITE] = "CR11_WRITE",
2327 [VMEXIT_CR12_WRITE] = "CR12_WRITE",
2328 [VMEXIT_CR13_WRITE] = "CR13_WRITE",
2329 [VMEXIT_CR14_WRITE] = "CR14_WRITE",
2330 [VMEXIT_CR15_WRITE] = "CR15_WRITE",
2331 [VMEXIT_DR0_READ] = "DR0_READ",
2332 [VMEXIT_DR1_READ] = "DR1_READ",
2333 [VMEXIT_DR2_READ] = "DR2_READ",
2334 [VMEXIT_DR3_READ] = "DR3_READ",
2335 [VMEXIT_DR4_READ] = "DR4_READ",
2336 [VMEXIT_DR5_READ] = "DR5_READ",
2337 [VMEXIT_DR6_READ] = "DR6_READ",
2338 [VMEXIT_DR7_READ] = "DR7_READ",
2339 [VMEXIT_DR8_READ] = "DR8_READ",
2340 [VMEXIT_DR9_READ] = "DR9_READ",
2341 [VMEXIT_DR10_READ] = "DR10_READ",
2342 [VMEXIT_DR11_READ] = "DR11_READ",
2343 [VMEXIT_DR12_READ] = "DR12_READ",
2344 [VMEXIT_DR13_READ] = "DR13_READ",
2345 [VMEXIT_DR14_READ] = "DR14_READ",
2346 [VMEXIT_DR15_READ] = "DR15_READ",
2347 [VMEXIT_DR0_WRITE] = "DR0_WRITE",
2348 [VMEXIT_DR1_WRITE] = "DR1_WRITE",
2349 [VMEXIT_DR2_WRITE] = "DR2_WRITE",
2350 [VMEXIT_DR3_WRITE] = "DR3_WRITE",
2351 [VMEXIT_DR4_WRITE] = "DR4_WRITE",
2352 [VMEXIT_DR5_WRITE] = "DR5_WRITE",
2353 [VMEXIT_DR6_WRITE] = "DR6_WRITE",
2354 [VMEXIT_DR7_WRITE] = "DR7_WRITE",
2355 [VMEXIT_DR8_WRITE] = "DR8_WRITE",
2356 [VMEXIT_DR9_WRITE] = "DR9_WRITE",
2357 [VMEXIT_DR10_WRITE] = "DR10_WRITE",
2358 [VMEXIT_DR11_WRITE] = "DR11_WRITE",
2359 [VMEXIT_DR12_WRITE] = "DR12_WRITE",
2360 [VMEXIT_DR13_WRITE] = "DR13_WRITE",
2361 [VMEXIT_DR14_WRITE] = "DR14_WRITE",
2362 [VMEXIT_DR15_WRITE] = "DR15_WRITE",
2363 [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
2364 [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
2365 [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
2366 [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
2367 [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
2368 [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
2369 [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
2370 [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
2371 [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
2372 [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
2373 [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
2374 [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
2375 [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
2376 [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
2377 [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
2378 [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
2379 [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
2380 [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
2381 [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
2382 [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
2383 [VMEXIT_INTR] = "INTR",
2384 [VMEXIT_NMI] = "NMI",
2385 [VMEXIT_SMI] = "SMI",
2386 [VMEXIT_INIT] = "INIT",
2387 [VMEXIT_VINTR] = "VINTR",
2388 [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
2389 [VMEXIT_IDTR_READ] = "IDTR_READ",
2390 [VMEXIT_GDTR_READ] = "GDTR_READ",
2391 [VMEXIT_LDTR_READ] = "LDTR_READ",
2392 [VMEXIT_TR_READ] = "TR_READ",
2393 [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
2394 [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
2395 [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
2396 [VMEXIT_TR_WRITE] = "TR_WRITE",
2397 [VMEXIT_RDTSC] = "RDTSC",
2398 [VMEXIT_RDPMC] = "RDPMC",
2399 [VMEXIT_PUSHF] = "PUSHF",
2400 [VMEXIT_POPF] = "POPF",
2401 [VMEXIT_CPUID] = "CPUID",
2402 [VMEXIT_RSM] = "RSM",
2403 [VMEXIT_IRET] = "IRET",
2404 [VMEXIT_SWINT] = "SWINT",
2405 [VMEXIT_INVD] = "INVD",
2406 [VMEXIT_PAUSE] = "PAUSE",
2407 [VMEXIT_HLT] = "HLT",
2408 [VMEXIT_INVLPG] = "INVLPG",
2409 [VMEXIT_INVLPGA] = "INVLPGA",
2410 [VMEXIT_IOIO] = "IOIO",
2411 [VMEXIT_MSR] = "MSR",
2412 [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
2413 [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
2414 [VMEXIT_SHUTDOWN] = "SHUTDOWN",
2415 [VMEXIT_VMRUN] = "VMRUN",
2416 [VMEXIT_VMMCALL] = "VMMCALL",
2417 [VMEXIT_VMLOAD] = "VMLOAD",
2418 [VMEXIT_VMSAVE] = "VMSAVE",
2419 [VMEXIT_STGI] = "STGI",
2420 [VMEXIT_CLGI] = "CLGI",
2421 [VMEXIT_SKINIT] = "SKINIT",
2422 [VMEXIT_RDTSCP] = "RDTSCP",
2423 [VMEXIT_ICEBP] = "ICEBP",
2424 [VMEXIT_NPF] = "NPF"
2425 };
2426 #endif /* SVM_EXTRA_DEBUG */
2428 #ifdef SVM_WALK_GUEST_PAGES
2429 void walk_shadow_and_guest_pt(unsigned long gva)
2431 l2_pgentry_t gpde;
2432 l2_pgentry_t spde;
2433 l1_pgentry_t gpte;
2434 l1_pgentry_t spte;
2435 struct vcpu *v = current;
2436 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2437 paddr_t gpa;
2439 gpa = shadow_gva_to_gpa(current, gva);
2440 printk("gva = %lx, gpa=%"PRIpaddr", gCR3=%x\n", gva, gpa, (u32)vmcb->cr3);
2441 if( !svm_paging_enabled(v) || mmio_space(gpa) )
2442 return;
2444 /* let's dump the guest and shadow page info */
2446 __guest_get_l2e(v, gva, &gpde);
2447 printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
2448 __shadow_get_l2e( v, gva, &spde );
2449 printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
2451 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2452 return;
2454 spte = l1e_empty();
2456 /* This is actually overkill - we only need to ensure the hl2 is in-sync.*/
2457 shadow_sync_va(v, gva);
2459 gpte.l1 = 0;
2460 __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ],
2461 sizeof(gpte) );
2462 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
2464 BUG(); // need to think about this, and convert usage of
2465 // phys_to_machine_mapping to use pagetable format...
2466 __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
2467 sizeof(spte) );
2469 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
2471 #endif /* SVM_WALK_GUEST_PAGES */
2474 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
2476 unsigned int exit_reason;
2477 unsigned long eip;
2478 struct vcpu *v = current;
2479 int error;
2480 int do_debug = 0;
2481 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2483 ASSERT(vmcb);
2485 exit_reason = vmcb->exitcode;
2486 save_svm_cpu_user_regs(v, regs);
2488 v->arch.hvm_svm.inject_event = 0;
2490 if (exit_reason == VMEXIT_INVALID)
2492 svm_dump_vmcb(__func__, vmcb);
2493 goto exit_and_crash;
2496 #ifdef SVM_EXTRA_DEBUG
2498 #if defined(__i386__)
2499 #define rip eip
2500 #endif
2502 static unsigned long intercepts_counter = 0;
2504 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF)
2506 if (svm_paging_enabled(v) &&
2507 !mmio_space(shadow_gva_to_gpa(current, vmcb->exitinfo2)))
2509 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2510 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64", "
2511 "gpa=%"PRIx64"\n", intercepts_counter,
2512 exit_reasons[exit_reason], exit_reason, regs->cs,
2513 (u64)regs->rip,
2514 (u64)vmcb->exitinfo1,
2515 (u64)vmcb->exitinfo2,
2516 (u64)vmcb->exitintinfo.bytes,
2517 (u64)shadow_gva_to_gpa(current, vmcb->exitinfo2));
2519 else
2521 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2522 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2523 intercepts_counter,
2524 exit_reasons[exit_reason], exit_reason, regs->cs,
2525 (u64)regs->rip,
2526 (u64)vmcb->exitinfo1,
2527 (u64)vmcb->exitinfo2,
2528 (u64)vmcb->exitintinfo.bytes );
2531 else if ( svm_dbg_on
2532 && exit_reason != VMEXIT_IOIO
2533 && exit_reason != VMEXIT_INTR)
2536 if (exit_reasons[exit_reason])
2538 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2539 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2540 intercepts_counter,
2541 exit_reasons[exit_reason], exit_reason, regs->cs,
2542 (u64)regs->rip,
2543 (u64)vmcb->exitinfo1,
2544 (u64)vmcb->exitinfo2,
2545 (u64)vmcb->exitintinfo.bytes);
2547 else
2549 printk("I%08ld,ExC=%d(0x%x),IP=%x:%"PRIx64","
2550 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2551 intercepts_counter, exit_reason, exit_reason, regs->cs,
2552 (u64)regs->rip,
2553 (u64)vmcb->exitinfo1,
2554 (u64)vmcb->exitinfo2,
2555 (u64)vmcb->exitintinfo.bytes);
2559 #ifdef SVM_WALK_GUEST_PAGES
2560 if( exit_reason == VMEXIT_EXCEPTION_PF
2561 && ( ( vmcb->exitinfo2 == vmcb->rip )
2562 || vmcb->exitintinfo.bytes) )
2564 if ( svm_paging_enabled(v) &&
2565 !mmio_space(gva_to_gpa(vmcb->exitinfo2)) )
2566 walk_shadow_and_guest_pt(vmcb->exitinfo2);
2568 #endif
2570 intercepts_counter++;
2572 #if 0
2573 if (svm_dbg_on)
2574 do_debug = svm_do_debugout(exit_reason);
2575 #endif
2577 if (do_debug)
2579 printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
2580 "hw_cr3 = 0x%16lx\n",
2581 __func__,
2582 (int) v->arch.guest_table.pfn,
2583 (int) v->arch.monitor_table.pfn,
2584 (long unsigned int) v->arch.hvm_vcpu.hw_cr3);
2586 svm_dump_vmcb(__func__, vmcb);
2587 svm_dump_regs(__func__, regs);
2588 svm_dump_inst(svm_rip2pointer(vmcb));
2591 #if defined(__i386__)
2592 #undef rip
2593 #endif
2596 #endif /* SVM_EXTRA_DEBUG */
2599 perfc_incra(svmexits, exit_reason);
2600 eip = vmcb->rip;
2602 #ifdef SVM_EXTRA_DEBUG
2603 if (do_debug)
2605 printk("eip = %lx, exit_reason = %d (0x%x)\n",
2606 eip, exit_reason, exit_reason);
2608 #endif /* SVM_EXTRA_DEBUG */
2610 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
2612 switch (exit_reason)
2614 case VMEXIT_EXCEPTION_DB:
2616 #ifdef XEN_DEBUGGER
2617 svm_debug_save_cpu_user_regs(regs);
2618 pdb_handle_exception(1, regs, 1);
2619 svm_debug_restore_cpu_user_regs(regs);
2620 #else
2621 svm_store_cpu_user_regs(regs, v);
2622 domain_pause_for_debugger();
2623 #endif
2625 break;
2627 case VMEXIT_NMI:
2628 break;
2630 case VMEXIT_SMI:
2631 /*
2632 * For asynchronous SMI's, we just need to allow global interrupts
2633 * so that the SMI is taken properly in the context of the host. The
2634 * standard code does a STGI after the VMEXIT which should accomplish
2635 * this task. Continue as normal and restart the guest.
2636 */
2637 break;
2639 case VMEXIT_INIT:
2640 /*
2641 * Nothing to do, in fact we should never get to this point.
2642 */
2643 break;
2645 case VMEXIT_EXCEPTION_BP:
2646 #ifdef XEN_DEBUGGER
2647 svm_debug_save_cpu_user_regs(regs);
2648 pdb_handle_exception(3, regs, 1);
2649 svm_debug_restore_cpu_user_regs(regs);
2650 #else
2651 if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) )
2652 domain_pause_for_debugger();
2653 else
2654 svm_inject_exception(v, TRAP_int3, 0, 0);
2655 #endif
2656 break;
2658 case VMEXIT_EXCEPTION_NM:
2659 svm_do_no_device_fault(vmcb);
2660 break;
2662 case VMEXIT_EXCEPTION_GP:
2663 /* This should probably not be trapped in the future */
2664 regs->error_code = vmcb->exitinfo1;
2665 svm_do_general_protection_fault(v, regs);
2666 break;
2668 case VMEXIT_EXCEPTION_PF:
2670 unsigned long va;
2671 va = vmcb->exitinfo2;
2672 regs->error_code = vmcb->exitinfo1;
2673 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2674 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2675 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2676 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2677 (unsigned long)regs->esi, (unsigned long)regs->edi);
2679 if (!(error = svm_do_page_fault(va, regs)))
2681 /* Inject #PG using Interruption-Information Fields */
2682 svm_inject_exception(v, TRAP_page_fault, 1, regs->error_code);
2684 v->arch.hvm_svm.cpu_cr2 = va;
2685 vmcb->cr2 = va;
2686 TRACE_3D(TRC_VMX_INTR, v->domain->domain_id,
2687 VMEXIT_EXCEPTION_PF, va);
2689 break;
2692 case VMEXIT_EXCEPTION_DF:
2693 /* Debug info to hopefully help debug WHY the guest double-faulted. */
2694 svm_dump_vmcb(__func__, vmcb);
2695 svm_dump_regs(__func__, regs);
2696 svm_dump_inst(svm_rip2pointer(vmcb));
2697 svm_inject_exception(v, TRAP_double_fault, 1, 0);
2698 break;
2700 case VMEXIT_VINTR:
2701 vmcb->vintr.fields.irq = 0;
2702 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
2703 break;
2705 case VMEXIT_INTR:
2706 break;
2708 case VMEXIT_INVD:
2709 svm_vmexit_do_invd(vmcb);
2710 break;
2712 case VMEXIT_GDTR_WRITE:
2713 printk("WRITE to GDTR\n");
2714 break;
2716 case VMEXIT_TASK_SWITCH:
2717 goto exit_and_crash;
2719 case VMEXIT_CPUID:
2720 svm_vmexit_do_cpuid(vmcb, regs->eax, regs);
2721 break;
2723 case VMEXIT_HLT:
2724 svm_vmexit_do_hlt(vmcb);
2725 break;
2727 case VMEXIT_INVLPG:
2728 svm_handle_invlpg(0, regs);
2729 break;
2731 case VMEXIT_INVLPGA:
2732 svm_handle_invlpg(1, regs);
2733 break;
2735 case VMEXIT_VMMCALL:
2736 svm_do_vmmcall(v, regs);
2737 break;
2739 case VMEXIT_CR0_READ:
2740 svm_cr_access(v, 0, TYPE_MOV_FROM_CR, regs);
2741 break;
2743 case VMEXIT_CR2_READ:
2744 svm_cr_access(v, 2, TYPE_MOV_FROM_CR, regs);
2745 break;
2747 case VMEXIT_CR3_READ:
2748 svm_cr_access(v, 3, TYPE_MOV_FROM_CR, regs);
2749 break;
2751 case VMEXIT_CR4_READ:
2752 svm_cr_access(v, 4, TYPE_MOV_FROM_CR, regs);
2753 break;
2755 case VMEXIT_CR8_READ:
2756 svm_cr_access(v, 8, TYPE_MOV_FROM_CR, regs);
2757 break;
2759 case VMEXIT_CR0_WRITE:
2760 svm_cr_access(v, 0, TYPE_MOV_TO_CR, regs);
2761 break;
2763 case VMEXIT_CR2_WRITE:
2764 svm_cr_access(v, 2, TYPE_MOV_TO_CR, regs);
2765 break;
2767 case VMEXIT_CR3_WRITE:
2768 svm_cr_access(v, 3, TYPE_MOV_TO_CR, regs);
2769 local_flush_tlb();
2770 break;
2772 case VMEXIT_CR4_WRITE:
2773 svm_cr_access(v, 4, TYPE_MOV_TO_CR, regs);
2774 break;
2776 case VMEXIT_CR8_WRITE:
2777 svm_cr_access(v, 8, TYPE_MOV_TO_CR, regs);
2778 break;
2780 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
2781 svm_dr_access(v, regs);
2782 break;
2784 case VMEXIT_IOIO:
2785 svm_io_instruction(v);
2786 break;
2788 case VMEXIT_MSR:
2789 svm_do_msr_access(v, regs);
2790 break;
2792 case VMEXIT_SHUTDOWN:
2793 gdprintk(XENLOG_ERR, "Guest shutdown exit\n");
2794 goto exit_and_crash;
2796 default:
2797 exit_and_crash:
2798 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
2799 "exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
2800 exit_reason,
2801 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
2802 domain_crash(v->domain);
2803 break;
2806 #ifdef SVM_EXTRA_DEBUG
2807 if (do_debug)
2809 printk("%s: Done switch on vmexit_code\n", __func__);
2810 svm_dump_regs(__func__, regs);
2813 if (do_debug)
2815 printk("vmexit_handler():- guest_table = 0x%08x, "
2816 "monitor_table = 0x%08x, hw_cr3 = 0x%16x\n",
2817 (int)v->arch.guest_table.pfn,
2818 (int)v->arch.monitor_table.pfn,
2819 (int)v->arch.hvm_vcpu.hw_cr3);
2820 printk("svm_vmexit_handler: Returning\n");
2822 #endif
2825 asmlinkage void svm_load_cr2(void)
2827 struct vcpu *v = current;
2829 local_irq_disable();
2830 asm volatile("mov %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
2833 asmlinkage void svm_asid(void)
2835 struct vcpu *v = current;
2836 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2838 /*
2839 * if need to assign new asid, or if switching cores,
2840 * retire asid for the old core, and assign a new asid to the current core.
2841 */
2842 if ( test_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ) ||
2843 ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) {
2844 /* recycle asid */
2845 if ( !asidpool_assign_next(vmcb, 1,
2846 v->arch.hvm_svm.asid_core,
2847 v->arch.hvm_svm.launch_core) )
2849 /* If we get here, we have a major problem */
2850 domain_crash_synchronous();
2853 v->arch.hvm_svm.asid_core = v->arch.hvm_svm.launch_core;
2854 clear_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags );
2858 /*
2859 * Local variables:
2860 * mode: C
2861 * c-set-style: "BSD"
2862 * c-basic-offset: 4
2863 * tab-width: 4
2864 * indent-tabs-mode: nil
2865 * End:
2866 */