direct-io.hg

view xen/arch/x86/hvm/svm/svm.c @ 11135:88e6bd5e2b54

Whitespace clean-ups.

Signed-off-by: Steven Hand <steven@xensource.com>
author shand@kneesaa.uk.xensource.com
date Wed Aug 16 11:36:13 2006 +0100 (2006-08-16)
parents d20e1835c24b
children 30fb6ea3a1f7
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005, AMD Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <xen/hypercall.h>
29 #include <asm/current.h>
30 #include <asm/io.h>
31 #include <asm/shadow.h>
32 #include <asm/regs.h>
33 #include <asm/cpufeature.h>
34 #include <asm/processor.h>
35 #include <asm/types.h>
36 #include <asm/msr.h>
37 #include <asm/spinlock.h>
38 #include <asm/hvm/hvm.h>
39 #include <asm/hvm/support.h>
40 #include <asm/hvm/io.h>
41 #include <asm/hvm/svm/svm.h>
42 #include <asm/hvm/svm/vmcb.h>
43 #include <asm/hvm/svm/emulate.h>
44 #include <asm/hvm/svm/vmmcall.h>
45 #include <asm/hvm/svm/intr.h>
46 #include <asm/shadow.h>
47 #if CONFIG_PAGING_LEVELS >= 3
48 #include <asm/shadow_64.h>
49 #endif
50 #include <public/sched.h>
52 #define SVM_EXTRA_DEBUG
54 #define set_segment_register(name, value) \
55 __asm__ __volatile__ ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
57 /* External functions. We should move these to some suitable header file(s) */
59 extern void do_nmi(struct cpu_user_regs *, unsigned long);
60 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
61 int inst_len);
62 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
63 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
64 unsigned long count, int size, long value, int dir, int pvalid);
65 extern int svm_instrlen(struct cpu_user_regs *regs, int mode);
66 extern void svm_dump_inst(unsigned long eip);
67 extern int svm_dbg_on;
68 void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
70 static void svm_relinquish_guest_resources(struct domain *d);
71 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
72 struct cpu_user_regs *regs);
74 /* va of hardware host save area */
75 static void *hsa[NR_CPUS] __read_mostly;
77 /* vmcb used for extended host state */
78 static void *root_vmcb[NR_CPUS] __read_mostly;
80 /* physical address of above for host VMSAVE/VMLOAD */
81 u64 root_vmcb_pa[NR_CPUS] __read_mostly;
84 /* ASID API */
85 enum {
86 ASID_AVAILABLE = 0,
87 ASID_INUSE,
88 ASID_RETIRED
89 };
90 #define INITIAL_ASID 0
91 #define ASID_MAX 64
93 struct asid_pool {
94 spinlock_t asid_lock;
95 u32 asid[ASID_MAX];
96 };
98 static DEFINE_PER_CPU(struct asid_pool, asid_pool);
101 /*
102 * Initializes the POOL of ASID used by the guests per core.
103 */
104 void asidpool_init(int core)
105 {
106 int i;
108 spin_lock_init(&per_cpu(asid_pool,core).asid_lock);
110 /* Host ASID is always in use */
111 per_cpu(asid_pool,core).asid[INITIAL_ASID] = ASID_INUSE;
112 for ( i = 1; i < ASID_MAX; i++ )
113 per_cpu(asid_pool,core).asid[i] = ASID_AVAILABLE;
114 }
117 /* internal function to get the next available ASID */
118 static int asidpool_fetch_next(struct vmcb_struct *vmcb, int core)
119 {
120 int i;
121 for ( i = 1; i < ASID_MAX; i++ )
122 {
123 if ( per_cpu(asid_pool,core).asid[i] == ASID_AVAILABLE )
124 {
125 vmcb->guest_asid = i;
126 per_cpu(asid_pool,core).asid[i] = ASID_INUSE;
127 return i;
128 }
129 }
130 return -1;
131 }
134 /*
135 * This functions assigns on the passed VMCB, the next
136 * available ASID number. If none are available, the
137 * TLB flush flag is set, and all retireds ASID
138 * are made available.
139 *
140 * Returns: 1 -- sucess;
141 * 0 -- failure -- no more ASID numbers
142 * available.
143 */
144 int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
145 int oldcore, int newcore )
146 {
147 int i;
148 int res = 1;
149 static unsigned long cnt=0;
151 spin_lock(&per_cpu(asid_pool,oldcore).asid_lock);
152 if( retire_current && vmcb->guest_asid ) {
153 per_cpu(asid_pool,oldcore).asid[vmcb->guest_asid & (ASID_MAX-1)] =
154 ASID_RETIRED;
155 }
156 spin_unlock(&per_cpu(asid_pool,oldcore).asid_lock);
157 spin_lock(&per_cpu(asid_pool,newcore).asid_lock);
158 if( asidpool_fetch_next( vmcb, newcore ) < 0 ) {
159 if (svm_dbg_on)
160 printk( "SVM: tlb(%ld)\n", cnt++ );
161 /* FLUSH the TLB and all retired slots are made available */
162 vmcb->tlb_control = 1;
163 for( i = 1; i < ASID_MAX; i++ ) {
164 if( per_cpu(asid_pool,newcore).asid[i] == ASID_RETIRED ) {
165 per_cpu(asid_pool,newcore).asid[i] = ASID_AVAILABLE;
166 }
167 }
168 /* Get the First slot available */
169 res = asidpool_fetch_next( vmcb, newcore ) > 0;
170 }
171 spin_unlock(&per_cpu(asid_pool,newcore).asid_lock);
172 return res;
173 }
175 void asidpool_retire( struct vmcb_struct *vmcb, int core )
176 {
177 spin_lock(&per_cpu(asid_pool,core).asid_lock);
178 if( vmcb->guest_asid ) {
179 per_cpu(asid_pool,core).asid[vmcb->guest_asid & (ASID_MAX-1)] =
180 ASID_RETIRED;
181 }
182 spin_unlock(&per_cpu(asid_pool,core).asid_lock);
183 }
185 static inline void svm_inject_exception(struct vcpu *v, int trap,
186 int ev, int error_code)
187 {
188 eventinj_t event;
189 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
191 event.bytes = 0;
192 event.fields.v = 1;
193 event.fields.type = EVENTTYPE_EXCEPTION;
194 event.fields.vector = trap;
195 event.fields.ev = ev;
196 event.fields.errorcode = error_code;
198 ASSERT(vmcb->eventinj.fields.v == 0);
200 vmcb->eventinj = event;
201 }
203 static void stop_svm(void)
204 {
205 u32 eax, edx;
206 int cpu = smp_processor_id();
208 /* We turn off the EFER_SVME bit. */
209 rdmsr(MSR_EFER, eax, edx);
210 eax &= ~EFER_SVME;
211 wrmsr(MSR_EFER, eax, edx);
213 /* release the HSA */
214 free_host_save_area(hsa[cpu]);
215 hsa[cpu] = NULL;
216 wrmsr(MSR_K8_VM_HSAVE_PA, 0, 0 );
218 /* free up the root vmcb */
219 free_vmcb(root_vmcb[cpu]);
220 root_vmcb[cpu] = NULL;
221 root_vmcb_pa[cpu] = 0;
223 printk("AMD SVM Extension is disabled.\n");
224 }
227 static void svm_store_cpu_guest_regs(
228 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
229 {
230 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
232 if ( regs != NULL )
233 {
234 regs->eip = vmcb->rip;
235 regs->esp = vmcb->rsp;
236 regs->eflags = vmcb->rflags;
237 regs->cs = vmcb->cs.sel;
238 regs->ds = vmcb->ds.sel;
239 regs->es = vmcb->es.sel;
240 regs->ss = vmcb->ss.sel;
241 regs->gs = vmcb->gs.sel;
242 regs->fs = vmcb->fs.sel;
243 }
245 if ( crs != NULL )
246 {
247 /* Returning the guest's regs */
248 crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
249 crs[3] = v->arch.hvm_svm.cpu_cr3;
250 crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
251 }
252 }
254 static int svm_paging_enabled(struct vcpu *v)
255 {
256 unsigned long cr0;
258 cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
260 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
261 }
264 #define IS_CANO_ADDRESS(add) 1
266 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
267 {
268 u64 msr_content = 0;
269 struct vcpu *vc = current;
270 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
272 switch (regs->ecx)
273 {
274 case MSR_EFER:
275 msr_content = vmcb->efer;
276 msr_content &= ~EFER_SVME;
277 break;
279 case MSR_FS_BASE:
280 msr_content = vmcb->fs.base;
281 break;
283 case MSR_GS_BASE:
284 msr_content = vmcb->gs.base;
285 break;
287 case MSR_SHADOW_GS_BASE:
288 msr_content = vmcb->kerngsbase;
289 break;
291 case MSR_STAR:
292 msr_content = vmcb->star;
293 break;
295 case MSR_LSTAR:
296 msr_content = vmcb->lstar;
297 break;
299 case MSR_CSTAR:
300 msr_content = vmcb->cstar;
301 break;
303 case MSR_SYSCALL_MASK:
304 msr_content = vmcb->sfmask;
305 break;
306 default:
307 return 0;
308 }
310 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n",
311 msr_content);
313 regs->eax = msr_content & 0xffffffff;
314 regs->edx = msr_content >> 32;
315 return 1;
316 }
318 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
319 {
320 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
321 struct vcpu *vc = current;
322 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
324 HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx "
325 "msr_content %"PRIx64"\n",
326 (unsigned long)regs->ecx, msr_content);
328 switch (regs->ecx)
329 {
330 case MSR_EFER:
331 #ifdef __x86_64__
332 /* offending reserved bit will cause #GP */
333 if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
334 {
335 printk("trying to set reserved bit in EFER\n");
336 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
337 return 0;
338 }
340 /* LME: 0 -> 1 */
341 if ( msr_content & EFER_LME &&
342 !test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state))
343 {
344 if ( svm_paging_enabled(vc) ||
345 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
346 &vc->arch.hvm_svm.cpu_state) )
347 {
348 printk("trying to set LME bit when "
349 "in paging mode or PAE bit is not set\n");
350 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
351 return 0;
352 }
353 set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);
354 }
356 /* We have already recorded that we want LME, so it will be set
357 * next time CR0 gets updated. So we clear that bit and continue.
358 */
359 if ((msr_content ^ vmcb->efer) & EFER_LME)
360 msr_content &= ~EFER_LME;
361 /* No update for LME/LMA since it have no effect */
362 #endif
363 vmcb->efer = msr_content | EFER_SVME;
364 break;
366 case MSR_FS_BASE:
367 case MSR_GS_BASE:
368 if (!(SVM_LONG_GUEST(vc)))
369 domain_crash_synchronous();
371 if (!IS_CANO_ADDRESS(msr_content))
372 {
373 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
374 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
375 }
377 if (regs->ecx == MSR_FS_BASE)
378 vmcb->fs.base = msr_content;
379 else
380 vmcb->gs.base = msr_content;
381 break;
383 case MSR_SHADOW_GS_BASE:
384 vmcb->kerngsbase = msr_content;
385 break;
387 case MSR_STAR:
388 vmcb->star = msr_content;
389 break;
391 case MSR_LSTAR:
392 vmcb->lstar = msr_content;
393 break;
395 case MSR_CSTAR:
396 vmcb->cstar = msr_content;
397 break;
399 case MSR_SYSCALL_MASK:
400 vmcb->sfmask = msr_content;
401 break;
403 default:
404 return 0;
405 }
406 return 1;
407 }
409 static int svm_realmode(struct vcpu *v)
410 {
411 unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
412 unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
414 return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
415 }
417 static int svm_instruction_length(struct vcpu *v)
418 {
419 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
420 unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode;
421 /* check which operating mode the guest is running */
422 if( vmcb->efer & EFER_LMA )
423 mode = vmcb->cs.attributes.fields.l ? 8 : 4;
424 else
425 mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
426 return svm_instrlen(guest_cpu_user_regs(), mode);
427 }
429 static unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
430 {
431 switch ( num )
432 {
433 case 0:
434 return v->arch.hvm_svm.cpu_shadow_cr0;
435 case 2:
436 return v->arch.hvm_svm.cpu_cr2;
437 case 3:
438 return v->arch.hvm_svm.cpu_cr3;
439 default:
440 BUG();
441 }
442 return 0; /* dummy */
443 }
446 /* Make sure that xen intercepts any FP accesses from current */
447 static void svm_stts(struct vcpu *v)
448 {
449 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
451 /*
452 * If the guest does not have TS enabled then we must cause and handle an
453 * exception on first use of the FPU. If the guest *does* have TS enabled
454 * then this is not necessary: no FPU activity can occur until the guest
455 * clears CR0.TS, and we will initialise the FPU when that happens.
456 */
457 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
458 {
459 v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
460 vmcb->cr0 |= X86_CR0_TS;
461 }
462 }
465 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
466 {
467 v->arch.hvm_svm.vmcb->tsc_offset = offset;
468 }
471 /* SVM-specific intitialization code for VCPU application processors */
472 static void svm_init_ap_context(struct vcpu_guest_context *ctxt,
473 int vcpuid, int trampoline_vector)
474 {
475 int i;
476 struct vcpu *v, *bsp = current;
477 struct domain *d = bsp->domain;
478 cpu_user_regs_t *regs;;
481 if ((v = d->vcpu[vcpuid]) == NULL)
482 {
483 printk("vcpuid %d is invalid! good-bye.\n", vcpuid);
484 domain_crash_synchronous();
485 }
486 regs = &v->arch.guest_context.user_regs;
488 memset(ctxt, 0, sizeof(*ctxt));
489 for (i = 0; i < 256; ++i)
490 {
491 ctxt->trap_ctxt[i].vector = i;
492 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
493 }
496 /*
497 * We execute the trampoline code in real mode. The trampoline vector
498 * passed to us is page alligned and is the physicall frame number for
499 * the code. We will execute this code in real mode.
500 */
501 ctxt->user_regs.eip = 0x0;
502 ctxt->user_regs.cs = (trampoline_vector << 8);
503 ctxt->flags = VGCF_HVM_GUEST;
504 }
506 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
507 {
508 char *p;
509 int i;
511 memset(hypercall_page, 0, PAGE_SIZE);
513 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
514 {
515 p = (char *)(hypercall_page + (i * 32));
516 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
517 *(u32 *)(p + 1) = i;
518 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
519 *(u8 *)(p + 6) = 0x01;
520 *(u8 *)(p + 7) = 0xd9;
521 *(u8 *)(p + 8) = 0xc3; /* ret */
522 }
524 /* Don't support HYPERVISOR_iret at the moment */
525 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
526 }
531 int svm_dbg_on = 0;
533 static inline int svm_do_debugout(unsigned long exit_code)
534 {
535 int i;
537 static unsigned long counter = 0;
538 static unsigned long works[] =
539 {
540 VMEXIT_IOIO,
541 VMEXIT_HLT,
542 VMEXIT_CPUID,
543 VMEXIT_DR0_READ,
544 VMEXIT_DR1_READ,
545 VMEXIT_DR2_READ,
546 VMEXIT_DR3_READ,
547 VMEXIT_DR6_READ,
548 VMEXIT_DR7_READ,
549 VMEXIT_DR0_WRITE,
550 VMEXIT_DR1_WRITE,
551 VMEXIT_DR2_WRITE,
552 VMEXIT_DR3_WRITE,
553 VMEXIT_CR0_READ,
554 VMEXIT_CR0_WRITE,
555 VMEXIT_CR3_READ,
556 VMEXIT_CR4_READ,
557 VMEXIT_MSR,
558 VMEXIT_CR0_WRITE,
559 VMEXIT_CR3_WRITE,
560 VMEXIT_CR4_WRITE,
561 VMEXIT_EXCEPTION_PF,
562 VMEXIT_INTR,
563 VMEXIT_INVLPG,
564 VMEXIT_EXCEPTION_NM
565 };
568 #if 0
569 if (svm_dbg_on && exit_code != 0x7B)
570 return 1;
571 #endif
573 counter++;
575 #if 0
576 if ((exit_code == 0x4E
577 || exit_code == VMEXIT_CR0_READ
578 || exit_code == VMEXIT_CR0_WRITE)
579 && counter < 200000)
580 return 0;
582 if ((exit_code == 0x4E) && counter < 500000)
583 return 0;
584 #endif
586 for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
587 if (exit_code == works[i])
588 return 0;
590 return 1;
591 }
593 static void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
594 {
595 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
597 ASSERT(vmcb);
599 ctxt->eax = vmcb->rax;
600 ctxt->ss = vmcb->ss.sel;
601 ctxt->esp = vmcb->rsp;
602 ctxt->eflags = vmcb->rflags;
603 ctxt->cs = vmcb->cs.sel;
604 ctxt->eip = vmcb->rip;
606 ctxt->gs = vmcb->gs.sel;
607 ctxt->fs = vmcb->fs.sel;
608 ctxt->es = vmcb->es.sel;
609 ctxt->ds = vmcb->ds.sel;
610 }
612 static void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
613 {
614 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
616 regs->eip = vmcb->rip;
617 regs->esp = vmcb->rsp;
618 regs->eflags = vmcb->rflags;
619 regs->cs = vmcb->cs.sel;
620 regs->ds = vmcb->ds.sel;
621 regs->es = vmcb->es.sel;
622 regs->ss = vmcb->ss.sel;
623 }
625 /* XXX Use svm_load_cpu_guest_regs instead */
626 static void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
627 {
628 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
629 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
631 /* Write the guest register value into VMCB */
632 vmcb->rax = regs->eax;
633 vmcb->ss.sel = regs->ss;
634 vmcb->rsp = regs->esp;
635 vmcb->rflags = regs->eflags;
636 vmcb->cs.sel = regs->cs;
637 vmcb->rip = regs->eip;
638 if (regs->eflags & EF_TF)
639 *intercepts |= EXCEPTION_BITMAP_DB;
640 else
641 *intercepts &= ~EXCEPTION_BITMAP_DB;
642 }
644 static void svm_load_cpu_guest_regs(
645 struct vcpu *v, struct cpu_user_regs *regs)
646 {
647 svm_load_cpu_user_regs(v, regs);
648 }
652 static void arch_svm_do_launch(struct vcpu *v)
653 {
654 cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
655 int error;
657 #if 0
658 if (svm_dbg_on)
659 printk("Do launch\n");
660 #endif
661 error = construct_vmcb(&v->arch.hvm_svm, regs);
662 if ( error < 0 )
663 {
664 if (v->vcpu_id == 0) {
665 printk("Failed to construct a new VMCB for BSP.\n");
666 } else {
667 printk("Failed to construct a new VMCB for AP %d\n", v->vcpu_id);
668 }
669 domain_crash_synchronous();
670 }
672 svm_do_launch(v);
673 #if 0
674 if (svm_dbg_on)
675 svm_dump_host_regs(__func__);
676 #endif
677 if (v->vcpu_id != 0)
678 {
679 u16 cs_sel = regs->cs;
680 /*
681 * This is the launch of an AP; set state so that we begin executing
682 * the trampoline code in real-mode.
683 */
684 svm_do_vmmcall_reset_to_realmode(v, regs);
685 /* Adjust the state to execute the trampoline code.*/
686 v->arch.hvm_svm.vmcb->rip = 0;
687 v->arch.hvm_svm.vmcb->cs.sel= cs_sel;
688 v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4);
689 }
691 reset_stack_and_jump(svm_asm_do_launch);
692 }
694 static void svm_freeze_time(struct vcpu *v)
695 {
696 struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
698 if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
699 v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
700 stop_timer(&(pt->timer));
701 }
702 }
705 static void svm_ctxt_switch_from(struct vcpu *v)
706 {
707 svm_freeze_time(v);
708 }
710 static void svm_ctxt_switch_to(struct vcpu *v)
711 {
712 #ifdef __x86_64__
713 /*
714 * This is required, because VMRUN does consistency check
715 * and some of the DOM0 selectors are pointing to
716 * invalid GDT locations, and cause AMD processors
717 * to shutdown.
718 */
719 set_segment_register(ds, 0);
720 set_segment_register(es, 0);
721 set_segment_register(ss, 0);
722 #endif
723 }
726 static void svm_final_setup_guest(struct vcpu *v)
727 {
728 struct domain *d = v->domain;
729 struct vcpu *vc;
731 v->arch.schedule_tail = arch_svm_do_launch;
732 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
733 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
735 if ( v != d->vcpu[0] )
736 return;
738 /* Initialize monitor page table */
739 for_each_vcpu( d, vc )
740 vc->arch.monitor_table = pagetable_null();
742 /*
743 * Required to do this once per domain
744 * TODO: add a seperate function to do these.
745 */
746 memset(&d->shared_info->evtchn_mask[0], 0xff,
747 sizeof(d->shared_info->evtchn_mask));
749 /*
750 * Put the domain in shadow mode even though we're going to be using
751 * the shared 1:1 page table initially. It shouldn't hurt
752 */
753 shadow_mode_enable(d, SHM_enable|SHM_refcounts|
754 SHM_translate|SHM_external|SHM_wr_pt_pte);
755 }
758 static int svm_initialize_guest_resources(struct vcpu *v)
759 {
760 svm_final_setup_guest(v);
761 return 1;
762 }
765 int start_svm(void)
766 {
767 u32 eax, ecx, edx;
768 u32 phys_hsa_lo, phys_hsa_hi;
769 u64 phys_hsa;
770 int cpu = smp_processor_id();
772 /* Xen does not fill x86_capability words except 0. */
773 ecx = cpuid_ecx(0x80000001);
774 boot_cpu_data.x86_capability[5] = ecx;
776 if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
777 return 0;
779 if (!(hsa[cpu] = alloc_host_save_area()))
780 return 0;
782 rdmsr(MSR_EFER, eax, edx);
783 eax |= EFER_SVME;
784 wrmsr(MSR_EFER, eax, edx);
785 asidpool_init( cpu );
786 printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
788 /* Initialize the HSA for this core */
789 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
790 phys_hsa_lo = (u32) phys_hsa;
791 phys_hsa_hi = (u32) (phys_hsa >> 32);
792 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
794 if (!(root_vmcb[cpu] = alloc_vmcb()))
795 return 0;
796 root_vmcb_pa[cpu] = virt_to_maddr(root_vmcb[cpu]);
798 if (cpu == 0)
799 setup_vmcb_dump();
801 /* Setup HVM interfaces */
802 hvm_funcs.disable = stop_svm;
804 hvm_funcs.initialize_guest_resources = svm_initialize_guest_resources;
805 hvm_funcs.relinquish_guest_resources = svm_relinquish_guest_resources;
807 hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
808 hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
810 hvm_funcs.realmode = svm_realmode;
811 hvm_funcs.paging_enabled = svm_paging_enabled;
812 hvm_funcs.instruction_length = svm_instruction_length;
813 hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
815 hvm_funcs.stts = svm_stts;
816 hvm_funcs.set_tsc_offset = svm_set_tsc_offset;
818 hvm_funcs.init_ap_context = svm_init_ap_context;
819 hvm_funcs.init_hypercall_page = svm_init_hypercall_page;
821 hvm_enabled = 1;
823 return 1;
824 }
827 static void svm_relinquish_guest_resources(struct domain *d)
828 {
829 struct vcpu *v;
831 for_each_vcpu ( d, v )
832 {
833 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
834 continue;
836 destroy_vmcb(&v->arch.hvm_svm);
837 free_monitor_pagetable(v);
838 kill_timer(&v->arch.hvm_svm.hlt_timer);
839 if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) )
840 {
841 kill_timer( &(VLAPIC(v)->vlapic_timer) );
842 xfree(VLAPIC(v));
843 }
844 }
846 kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
848 if ( d->arch.hvm_domain.shared_page_va )
849 unmap_domain_page_global(
850 (void *)d->arch.hvm_domain.shared_page_va);
852 if ( d->arch.hvm_domain.buffered_io_va )
853 unmap_domain_page_global((void *)d->arch.hvm_domain.buffered_io_va);
855 shadow_direct_map_clean(d);
856 }
859 static void svm_migrate_timers(struct vcpu *v)
860 {
861 struct periodic_time *pt =
862 &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
864 if ( pt->enabled ) {
865 migrate_timer( &pt->timer, v->processor );
866 migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor );
867 }
868 if ( hvm_apic_support(v->domain) && VLAPIC( v ))
869 migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
870 }
873 void arch_svm_do_resume(struct vcpu *v)
874 {
875 /* pinning VCPU to a different core? */
876 if ( v->arch.hvm_svm.launch_core == smp_processor_id()) {
877 hvm_do_resume( v );
878 reset_stack_and_jump( svm_asm_do_resume );
879 }
880 else {
881 if (svm_dbg_on)
882 printk("VCPU core pinned: %d to %d\n",
883 v->arch.hvm_svm.launch_core, smp_processor_id() );
884 v->arch.hvm_svm.launch_core = smp_processor_id();
885 svm_migrate_timers( v );
886 hvm_do_resume( v );
887 reset_stack_and_jump( svm_asm_do_resume );
888 }
889 }
893 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
894 {
895 struct vcpu *v = current;
896 unsigned long eip;
897 unsigned long gpa; /* FIXME: PAE */
898 int result;
899 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
901 ASSERT(vmcb);
903 //#if HVM_DEBUG
904 eip = vmcb->rip;
905 HVM_DBG_LOG(DBG_LEVEL_VMMU,
906 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
907 va, eip, (unsigned long)regs->error_code);
908 //#endif
910 if ( !svm_paging_enabled(v) )
911 {
912 if ( shadow_direct_map_fault(va, regs) )
913 return 1;
915 handle_mmio(va, va);
916 return 1;
917 }
920 gpa = gva_to_gpa(va);
922 /* Use 1:1 page table to identify MMIO address space */
923 if (mmio_space(gpa))
924 {
925 /* No support for APIC */
926 if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
927 {
928 int inst_len;
929 inst_len = svm_instruction_length(v);
930 if (inst_len == -1)
931 {
932 printf("%s: INST_LEN - Unable to decode properly\n", __func__);
933 domain_crash_synchronous();
934 }
936 __update_guest_eip(vmcb, inst_len);
938 return 1;
939 }
941 handle_mmio(va, gpa);
943 return 1;
944 }
946 result = shadow_fault(va, regs);
948 if( result ) {
949 /* Let's make sure that the Guest TLB is flushed */
950 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
951 }
953 return result;
954 }
957 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
958 {
959 struct vcpu *v = current;
961 setup_fpu(v);
962 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
964 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
965 vmcb->cr0 &= ~X86_CR0_TS;
966 }
969 static void svm_do_general_protection_fault(struct vcpu *v,
970 struct cpu_user_regs *regs)
971 {
972 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
973 unsigned long eip, error_code;
975 ASSERT(vmcb);
977 eip = vmcb->rip;
978 error_code = vmcb->exitinfo1;
980 if (vmcb->idtr.limit == 0) {
981 printf("Huh? We got a GP Fault with an invalid IDTR!\n");
982 svm_dump_vmcb(__func__, vmcb);
983 svm_dump_regs(__func__, regs);
984 svm_dump_inst(vmcb->rip);
985 __hvm_bug(regs);
986 }
988 HVM_DBG_LOG(DBG_LEVEL_1,
989 "svm_general_protection_fault: eip = %lx, erro_code = %lx",
990 eip, error_code);
992 HVM_DBG_LOG(DBG_LEVEL_1,
993 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
994 (unsigned long)regs->eax, (unsigned long)regs->ebx,
995 (unsigned long)regs->ecx, (unsigned long)regs->edx,
996 (unsigned long)regs->esi, (unsigned long)regs->edi);
998 /* Reflect it back into the guest */
999 svm_inject_exception(v, TRAP_gp_fault, 1, error_code);
1002 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
1003 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
1004 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
1005 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
1007 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input,
1008 struct cpu_user_regs *regs)
1010 unsigned int eax, ebx, ecx, edx;
1011 unsigned long eip;
1012 struct vcpu *v = current;
1013 int inst_len;
1015 ASSERT(vmcb);
1017 eip = vmcb->rip;
1019 HVM_DBG_LOG(DBG_LEVEL_1,
1020 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
1021 " (esi) %lx, (edi) %lx",
1022 (unsigned long)regs->eax, (unsigned long)regs->ebx,
1023 (unsigned long)regs->ecx, (unsigned long)regs->edx,
1024 (unsigned long)regs->esi, (unsigned long)regs->edi);
1026 cpuid(input, &eax, &ebx, &ecx, &edx);
1028 if (input == 0x00000001)
1030 if ( !hvm_apic_support(v->domain) ||
1031 !vlapic_global_enabled((VLAPIC(v))) )
1033 /* Since the apic is disabled, avoid any confusion
1034 about SMP cpus being available */
1035 clear_bit(X86_FEATURE_APIC, &edx);
1038 #if CONFIG_PAGING_LEVELS < 3
1039 clear_bit(X86_FEATURE_PAE, &edx);
1040 clear_bit(X86_FEATURE_PSE, &edx);
1041 clear_bit(X86_FEATURE_PSE36, &edx);
1042 #else
1043 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
1045 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
1046 clear_bit(X86_FEATURE_PAE, &edx);
1047 clear_bit(X86_FEATURE_PSE, &edx);
1048 clear_bit(X86_FEATURE_PSE36, &edx);
1050 #endif
1051 /* Clear out reserved bits. */
1052 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
1053 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
1055 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
1057 /* Guest should only see one logical processor.
1058 * See details on page 23 of AMD CPUID Specification.
1059 */
1060 clear_bit(X86_FEATURE_HT, &edx); /* clear the hyperthread bit */
1061 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
1062 ebx |= 0x00010000; /* set to 1 just for precaution */
1064 /* Disable machine check architecture */
1065 clear_bit(X86_FEATURE_MCA, &edx);
1066 clear_bit(X86_FEATURE_MCE, &edx);
1068 else if ( (input > 0x00000005) && (input < 0x80000000) )
1070 if ( !cpuid_hypervisor_leaves(input, &eax, &ebx, &ecx, &edx) )
1071 eax = ebx = ecx = edx = 0;
1073 else if ( input == 0x80000001 )
1075 /* We duplicate some CPUID_00000001 code because many bits of
1076 CPUID_80000001_EDX overlaps with CPUID_00000001_EDX. */
1078 if ( !hvm_apic_support(v->domain) ||
1079 !vlapic_global_enabled((VLAPIC(v))) )
1081 /* Since the apic is disabled, avoid any confusion
1082 about SMP cpus being available */
1083 clear_bit(X86_FEATURE_APIC, &edx);
1086 /* Clear the Cmp_Legacy bit
1087 * This bit is supposed to be zero when HTT = 0.
1088 * See details on page 23 of AMD CPUID Specification.
1089 */
1090 clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx);
1092 #ifdef __i386__
1093 /* Mask feature for Intel ia32e or AMD long mode. */
1094 clear_bit(X86_FEATURE_LAHF_LM & 31, &ecx);
1096 clear_bit(X86_FEATURE_LM & 31, &edx);
1097 clear_bit(X86_FEATURE_SYSCALL & 31, &edx);
1098 #endif
1100 #if CONFIG_PAGING_LEVELS < 3
1101 clear_bit(X86_FEATURE_NX & 31, &edx);
1102 clear_bit(X86_FEATURE_PAE, &edx);
1103 clear_bit(X86_FEATURE_PSE, &edx);
1104 clear_bit(X86_FEATURE_PSE36, &edx);
1105 #else
1106 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
1108 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
1110 clear_bit(X86_FEATURE_NX & 31, &edx);
1111 clear_bit(X86_FEATURE_PAE, &edx);
1113 clear_bit(X86_FEATURE_PSE, &edx);
1114 clear_bit(X86_FEATURE_PSE36, &edx);
1116 #endif
1118 /* Make SVM feature invisible to the guest. */
1119 clear_bit(X86_FEATURE_SVME & 31, &ecx);
1121 /* So far, we do not support 3DNow for the guest. */
1122 clear_bit(X86_FEATURE_3DNOW & 31, &edx);
1123 clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx);
1125 else if ( ( input == 0x80000007 ) || ( input == 0x8000000A ) )
1127 /* Mask out features of power management and SVM extension. */
1128 eax = ebx = ecx = edx = 0;
1130 else if ( input == 0x80000008 )
1132 ecx &= 0xFFFFFF00; /* Make sure Number of CPU core is 1 when HTT=0 */
1135 regs->eax = (unsigned long)eax;
1136 regs->ebx = (unsigned long)ebx;
1137 regs->ecx = (unsigned long)ecx;
1138 regs->edx = (unsigned long)edx;
1140 HVM_DBG_LOG(DBG_LEVEL_1,
1141 "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
1142 "ebx=%x, ecx=%x, edx=%x",
1143 eip, input, eax, ebx, ecx, edx);
1145 inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
1146 ASSERT(inst_len > 0);
1147 __update_guest_eip(vmcb, inst_len);
1151 static inline unsigned long *get_reg_p(unsigned int gpreg,
1152 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1154 unsigned long *reg_p = NULL;
1155 switch (gpreg)
1157 case SVM_REG_EAX:
1158 reg_p = (unsigned long *)&regs->eax;
1159 break;
1160 case SVM_REG_EBX:
1161 reg_p = (unsigned long *)&regs->ebx;
1162 break;
1163 case SVM_REG_ECX:
1164 reg_p = (unsigned long *)&regs->ecx;
1165 break;
1166 case SVM_REG_EDX:
1167 reg_p = (unsigned long *)&regs->edx;
1168 break;
1169 case SVM_REG_EDI:
1170 reg_p = (unsigned long *)&regs->edi;
1171 break;
1172 case SVM_REG_ESI:
1173 reg_p = (unsigned long *)&regs->esi;
1174 break;
1175 case SVM_REG_EBP:
1176 reg_p = (unsigned long *)&regs->ebp;
1177 break;
1178 case SVM_REG_ESP:
1179 reg_p = (unsigned long *)&vmcb->rsp;
1180 break;
1181 #ifdef __x86_64__
1182 case SVM_REG_R8:
1183 reg_p = (unsigned long *)&regs->r8;
1184 break;
1185 case SVM_REG_R9:
1186 reg_p = (unsigned long *)&regs->r9;
1187 break;
1188 case SVM_REG_R10:
1189 reg_p = (unsigned long *)&regs->r10;
1190 break;
1191 case SVM_REG_R11:
1192 reg_p = (unsigned long *)&regs->r11;
1193 break;
1194 case SVM_REG_R12:
1195 reg_p = (unsigned long *)&regs->r12;
1196 break;
1197 case SVM_REG_R13:
1198 reg_p = (unsigned long *)&regs->r13;
1199 break;
1200 case SVM_REG_R14:
1201 reg_p = (unsigned long *)&regs->r14;
1202 break;
1203 case SVM_REG_R15:
1204 reg_p = (unsigned long *)&regs->r15;
1205 break;
1206 #endif
1207 default:
1208 BUG();
1211 return reg_p;
1215 static inline unsigned long get_reg(unsigned int gpreg,
1216 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1218 unsigned long *gp;
1219 gp = get_reg_p(gpreg, regs, vmcb);
1220 return *gp;
1224 static inline void set_reg(unsigned int gpreg, unsigned long value,
1225 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1227 unsigned long *gp;
1228 gp = get_reg_p(gpreg, regs, vmcb);
1229 *gp = value;
1233 static void svm_dr_access (struct vcpu *v, unsigned int reg, unsigned int type,
1234 struct cpu_user_regs *regs)
1236 unsigned long *reg_p = 0;
1237 unsigned int gpreg = 0;
1238 unsigned long eip;
1239 int inst_len;
1240 int index;
1241 struct vmcb_struct *vmcb;
1242 u8 buffer[MAX_INST_LEN];
1243 u8 prefix = 0;
1245 vmcb = v->arch.hvm_svm.vmcb;
1247 ASSERT(vmcb);
1249 eip = vmcb->rip;
1250 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1251 index = skip_prefix_bytes(buffer, sizeof(buffer));
1253 ASSERT(buffer[index+0] == 0x0f && (buffer[index+1] & 0xFD) == 0x21);
1255 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1256 prefix = buffer[index-1];
1258 gpreg = decode_src_reg(prefix, buffer[index + 2]);
1259 ASSERT(reg == decode_dest_reg(prefix, buffer[index + 2]));
1261 HVM_DBG_LOG(DBG_LEVEL_1, "svm_dr_access : eip=%lx, reg=%d, gpreg = %x",
1262 eip, reg, gpreg);
1264 reg_p = get_reg_p(gpreg, regs, vmcb);
1266 switch (type)
1268 case TYPE_MOV_TO_DR:
1269 inst_len = __get_instruction_length(vmcb, INSTR_MOV2DR, buffer);
1270 v->arch.guest_context.debugreg[reg] = *reg_p;
1271 break;
1272 case TYPE_MOV_FROM_DR:
1273 inst_len = __get_instruction_length(vmcb, INSTR_MOVDR2, buffer);
1274 *reg_p = v->arch.guest_context.debugreg[reg];
1275 break;
1276 default:
1277 __hvm_bug(regs);
1278 break;
1280 ASSERT(inst_len > 0);
1281 __update_guest_eip(vmcb, inst_len);
1285 static void svm_get_prefix_info(
1286 struct vmcb_struct *vmcb,
1287 unsigned int dir, segment_selector_t **seg, unsigned int *asize)
1289 unsigned char inst[MAX_INST_LEN];
1290 int i;
1292 memset(inst, 0, MAX_INST_LEN);
1293 if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
1294 != MAX_INST_LEN)
1296 printk("%s: get guest instruction failed\n", __func__);
1297 domain_crash_synchronous();
1300 for (i = 0; i < MAX_INST_LEN; i++)
1302 switch (inst[i])
1304 case 0xf3: /* REPZ */
1305 case 0xf2: /* REPNZ */
1306 case 0xf0: /* LOCK */
1307 case 0x66: /* data32 */
1308 #ifdef __x86_64__
1309 /* REX prefixes */
1310 case 0x40:
1311 case 0x41:
1312 case 0x42:
1313 case 0x43:
1314 case 0x44:
1315 case 0x45:
1316 case 0x46:
1317 case 0x47:
1319 case 0x48:
1320 case 0x49:
1321 case 0x4a:
1322 case 0x4b:
1323 case 0x4c:
1324 case 0x4d:
1325 case 0x4e:
1326 case 0x4f:
1327 #endif
1328 continue;
1329 case 0x67: /* addr32 */
1330 *asize ^= 48; /* Switch 16/32 bits */
1331 continue;
1332 case 0x2e: /* CS */
1333 *seg = &vmcb->cs;
1334 continue;
1335 case 0x36: /* SS */
1336 *seg = &vmcb->ss;
1337 continue;
1338 case 0x26: /* ES */
1339 *seg = &vmcb->es;
1340 continue;
1341 case 0x64: /* FS */
1342 *seg = &vmcb->fs;
1343 continue;
1344 case 0x65: /* GS */
1345 *seg = &vmcb->gs;
1346 continue;
1347 case 0x3e: /* DS */
1348 *seg = &vmcb->ds;
1349 continue;
1350 default:
1351 break;
1353 return;
1358 /* Get the address of INS/OUTS instruction */
1359 static inline int svm_get_io_address(
1360 struct vcpu *v,
1361 struct cpu_user_regs *regs, unsigned int dir,
1362 unsigned long *count, unsigned long *addr)
1364 unsigned long reg;
1365 unsigned int asize = 0;
1366 unsigned int isize;
1367 int long_mode;
1368 ioio_info_t info;
1369 segment_selector_t *seg = NULL;
1370 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1372 info.bytes = vmcb->exitinfo1;
1374 /* If we're in long mode, we shouldn't check the segment presence & limit */
1375 long_mode = vmcb->cs.attributes.fields.l && vmcb->efer & EFER_LMA;
1377 /* d field of cs.attributes is 1 for 32-bit, 0 for 16 or 64 bit.
1378 * l field combined with EFER_LMA -> longmode says whether it's 16 or 64 bit.
1379 */
1380 asize = (long_mode)?64:((vmcb->cs.attributes.fields.db)?32:16);
1383 /* The ins/outs instructions are single byte, so if we have got more
1384 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1385 * to figure out what it is...
1386 */
1387 isize = vmcb->exitinfo2 - vmcb->rip;
1389 if (info.fields.rep)
1390 isize --;
1392 if (isize > 1)
1394 svm_get_prefix_info(vmcb, dir, &seg, &asize);
1397 ASSERT(dir == IOREQ_READ || dir == IOREQ_WRITE);
1399 if (dir == IOREQ_WRITE)
1401 reg = regs->esi;
1402 if (!seg) /* If no prefix, used DS. */
1403 seg = &vmcb->ds;
1405 else
1407 reg = regs->edi;
1408 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1411 /* If the segment isn't present, give GP fault! */
1412 if (!long_mode && !seg->attributes.fields.p)
1414 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1415 return 0;
1418 if (asize == 16)
1420 *addr = (reg & 0xFFFF);
1421 *count = regs->ecx & 0xffff;
1423 else
1425 *addr = reg;
1426 *count = regs->ecx;
1429 if (!long_mode) {
1430 if (*addr > seg->limit)
1432 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1433 return 0;
1435 else
1437 *addr += seg->base;
1442 return 1;
1446 static void svm_io_instruction(struct vcpu *v)
1448 struct cpu_user_regs *regs;
1449 struct hvm_io_op *pio_opp;
1450 unsigned int port;
1451 unsigned int size, dir;
1452 ioio_info_t info;
1453 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1455 ASSERT(vmcb);
1456 pio_opp = &current->arch.hvm_vcpu.io_op;
1457 pio_opp->instr = INSTR_PIO;
1458 pio_opp->flags = 0;
1460 regs = &pio_opp->io_context;
1462 /* Copy current guest state into io instruction state structure. */
1463 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1464 hvm_store_cpu_guest_regs(v, regs, NULL);
1466 info.bytes = vmcb->exitinfo1;
1468 port = info.fields.port; /* port used to be addr */
1469 dir = info.fields.type; /* direction */
1470 if (info.fields.sz32)
1471 size = 4;
1472 else if (info.fields.sz16)
1473 size = 2;
1474 else
1475 size = 1;
1477 HVM_DBG_LOG(DBG_LEVEL_IO,
1478 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1479 "exit_qualification = %"PRIx64,
1480 port, vmcb->cs.sel, vmcb->rip, info.bytes);
1482 /* string instruction */
1483 if (info.fields.str)
1485 unsigned long addr, count;
1486 int sign = regs->eflags & EF_DF ? -1 : 1;
1488 if (!svm_get_io_address(v, regs, dir, &count, &addr))
1490 /* We failed to get a valid address, so don't do the IO operation -
1491 * it would just get worse if we do! Hopefully the guest is handing
1492 * gp-faults...
1493 */
1494 return;
1497 /* "rep" prefix */
1498 if (info.fields.rep)
1500 pio_opp->flags |= REPZ;
1502 else
1504 count = 1;
1507 /*
1508 * Handle string pio instructions that cross pages or that
1509 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1510 */
1511 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1513 unsigned long value = 0;
1515 pio_opp->flags |= OVERLAP;
1517 if (dir == IOREQ_WRITE)
1518 hvm_copy(&value, addr, size, HVM_COPY_IN);
1520 send_pio_req(regs, port, 1, size, value, dir, 0);
1522 else
1524 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK))
1526 if (sign > 0)
1527 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1528 else
1529 count = (addr & ~PAGE_MASK) / size;
1531 else
1532 regs->eip = vmcb->exitinfo2;
1534 send_pio_req(regs, port, count, size, addr, dir, 1);
1537 else
1539 /*
1540 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1541 * ExitInfo2
1542 */
1543 regs->eip = vmcb->exitinfo2;
1545 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1546 hvm_print_line(v, regs->eax); /* guest debug output */
1548 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
1552 static int svm_set_cr0(unsigned long value)
1554 struct vcpu *v = current;
1555 unsigned long mfn;
1556 int paging_enabled;
1557 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1559 ASSERT(vmcb);
1561 /* We don't want to lose PG. ET is reserved and should be always be 1*/
1562 paging_enabled = svm_paging_enabled(v);
1563 value |= X86_CR0_ET;
1564 vmcb->cr0 = value | X86_CR0_PG;
1565 v->arch.hvm_svm.cpu_shadow_cr0 = value;
1567 /* TS cleared? Then initialise FPU now. */
1568 if ( !(value & X86_CR0_TS) )
1570 setup_fpu(v);
1571 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1574 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1576 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
1578 /* The guest CR3 must be pointing to the guest physical. */
1579 if (!VALID_MFN(mfn =
1580 get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))
1581 || !get_page(mfn_to_page(mfn), v->domain))
1583 printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3);
1584 domain_crash_synchronous(); /* need to take a clean path */
1587 #if defined(__x86_64__)
1588 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
1589 && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
1590 &v->arch.hvm_svm.cpu_state))
1592 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1593 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1596 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
1598 /* Here the PAE is should to be opened */
1599 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1600 set_bit(SVM_CPU_STATE_LMA_ENABLED,
1601 &v->arch.hvm_svm.cpu_state);
1602 vmcb->efer |= (EFER_LMA | EFER_LME);
1603 if (!shadow_set_guest_paging_levels(v->domain, PAGING_L4) )
1605 printk("Unsupported guest paging levels\n");
1606 domain_crash_synchronous(); /* need to take a clean path */
1609 else
1610 #endif /* __x86_64__ */
1612 #if CONFIG_PAGING_LEVELS >= 3
1613 /* seems it's a 32-bit or 32-bit PAE guest */
1614 if ( test_bit(SVM_CPU_STATE_PAE_ENABLED,
1615 &v->arch.hvm_svm.cpu_state) )
1617 /* The guest enables PAE first and then it enables PG, it is
1618 * really a PAE guest */
1619 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1621 printk("Unsupported guest paging levels\n");
1622 domain_crash_synchronous();
1625 else
1627 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) )
1629 printk("Unsupported guest paging levels\n");
1630 domain_crash_synchronous(); /* need to take a clean path */
1633 #endif
1636 /* Now arch.guest_table points to machine physical. */
1637 v->arch.guest_table = pagetable_from_pfn(mfn);
1638 update_pagetables(v);
1640 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1641 (unsigned long) (mfn << PAGE_SHIFT));
1643 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1644 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1646 /* arch->shadow_table should hold the next CR3 for shadow */
1647 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx\n",
1648 v->arch.hvm_svm.cpu_cr3, mfn);
1650 return 1;
1653 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1654 if ( v->arch.hvm_svm.cpu_cr3 ) {
1655 put_page(mfn_to_page(get_mfn_from_gpfn(
1656 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
1657 v->arch.guest_table = pagetable_null();
1660 /*
1661 * SVM implements paged real-mode and when we return to real-mode
1662 * we revert back to the physical mappings that the domain builder
1663 * created.
1664 */
1665 if ((value & X86_CR0_PE) == 0) {
1666 if (value & X86_CR0_PG) {
1667 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1668 return 0;
1671 clear_all_shadow_status( v->domain );
1672 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1673 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1675 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
1677 /* we should take care of this kind of situation */
1678 clear_all_shadow_status(v->domain);
1679 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1680 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1683 return 1;
1686 /*
1687 * Read from control registers. CR0 and CR4 are read from the shadow.
1688 */
1689 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1691 unsigned long value = 0;
1692 struct vcpu *v = current;
1693 struct vmcb_struct *vmcb;
1695 vmcb = v->arch.hvm_svm.vmcb;
1696 ASSERT(vmcb);
1698 switch (cr)
1700 case 0:
1701 value = v->arch.hvm_svm.cpu_shadow_cr0;
1702 if (svm_dbg_on)
1703 printk("CR0 read =%lx \n", value );
1704 break;
1705 case 2:
1706 value = vmcb->cr2;
1707 break;
1708 case 3:
1709 value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
1710 if (svm_dbg_on)
1711 printk("CR3 read =%lx \n", value );
1712 break;
1713 case 4:
1714 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
1715 if (svm_dbg_on)
1716 printk( "CR4 read=%lx\n", value );
1717 break;
1718 case 8:
1719 #if 0
1720 value = vmcb->m_cr8;
1721 #else
1722 ASSERT(0);
1723 #endif
1724 break;
1726 default:
1727 __hvm_bug(regs);
1730 set_reg(gp, value, regs, vmcb);
1732 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1736 static inline int svm_pgbit_test(struct vcpu *v)
1738 return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
1742 /*
1743 * Write to control registers
1744 */
1745 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1747 unsigned long value;
1748 unsigned long old_cr;
1749 struct vcpu *v = current;
1750 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1752 ASSERT(vmcb);
1754 value = get_reg(gpreg, regs, vmcb);
1756 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1757 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1759 switch (cr)
1761 case 0:
1762 if (svm_dbg_on)
1763 printk("CR0 write =%lx \n", value );
1764 return svm_set_cr0(value);
1766 case 3:
1768 unsigned long old_base_mfn, mfn;
1769 if (svm_dbg_on)
1770 printk("CR3 write =%lx \n", value );
1771 /* If paging is not enabled yet, simply copy the value to CR3. */
1772 if (!svm_paging_enabled(v)) {
1773 v->arch.hvm_svm.cpu_cr3 = value;
1774 break;
1776 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1778 /* We make a new one if the shadow does not exist. */
1779 if (value == v->arch.hvm_svm.cpu_cr3)
1781 /*
1782 * This is simple TLB flush, implying the guest has
1783 * removed some translation or changed page attributes.
1784 * We simply invalidate the shadow.
1785 */
1786 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1787 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1788 __hvm_bug(regs);
1789 shadow_sync_all(v->domain);
1791 else
1793 /*
1794 * If different, make a shadow. Check if the PDBR is valid
1795 * first.
1796 */
1797 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1798 if (((value >> PAGE_SHIFT) > v->domain->max_pages)
1799 || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT))
1800 || !get_page(mfn_to_page(mfn), v->domain))
1802 printk("Invalid CR3 value=%lx\n", value);
1803 domain_crash_synchronous(); /* need to take a clean path */
1806 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1807 v->arch.guest_table = pagetable_from_pfn(mfn);
1809 if (old_base_mfn)
1810 put_page(mfn_to_page(old_base_mfn));
1812 /*
1813 * arch.shadow_table should now hold the next CR3 for shadow
1814 */
1815 #if CONFIG_PAGING_LEVELS >= 3
1816 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 )
1817 shadow_sync_all(v->domain);
1818 #endif
1819 v->arch.hvm_svm.cpu_cr3 = value;
1820 update_pagetables(v);
1821 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1822 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1824 break;
1827 case 4: /* CR4 */
1829 if (svm_dbg_on)
1830 printk( "write cr4=%lx, cr0=%lx\n",
1831 value, v->arch.hvm_svm.cpu_shadow_cr0 );
1832 old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
1833 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1835 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1836 if ( svm_pgbit_test(v) )
1838 /* The guest is a 32-bit PAE guest. */
1839 #if CONFIG_PAGING_LEVELS >= 3
1840 unsigned long mfn, old_base_mfn;
1842 if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1844 printk("Unsupported guest paging levels\n");
1845 domain_crash_synchronous(); /* need to take a clean path */
1848 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1849 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) ||
1850 !get_page(mfn_to_page(mfn), v->domain) )
1852 printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3);
1853 domain_crash_synchronous(); /* need to take a clean path */
1856 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1857 if ( old_base_mfn )
1858 put_page(mfn_to_page(old_base_mfn));
1860 /*
1861 * Now arch.guest_table points to machine physical.
1862 */
1864 v->arch.guest_table = pagetable_from_pfn(mfn);
1865 update_pagetables(v);
1867 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1868 (unsigned long) (mfn << PAGE_SHIFT));
1870 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1872 /*
1873 * arch->shadow_table should hold the next CR3 for shadow
1874 */
1876 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1877 "Update CR3 value = %lx, mfn = %lx",
1878 v->arch.hvm_svm.cpu_cr3, mfn);
1879 #endif
1881 else
1883 /* The guest is a 64 bit or 32-bit PAE guest. */
1884 #if CONFIG_PAGING_LEVELS >= 3
1885 if ( (v->domain->arch.ops != NULL) &&
1886 v->domain->arch.ops->guest_paging_levels == PAGING_L2)
1888 /* Seems the guest first enables PAE without enabling PG,
1889 * it must enable PG after that, and it is a 32-bit PAE
1890 * guest */
1892 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3))
1894 printk("Unsupported guest paging levels\n");
1895 domain_crash_synchronous();
1898 else
1900 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L4))
1902 printk("Unsupported guest paging levels\n");
1903 domain_crash_synchronous();
1906 #endif
1909 else if (value & X86_CR4_PAE) {
1910 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1911 } else {
1912 if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
1913 &v->arch.hvm_svm.cpu_state)) {
1914 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1916 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1919 v->arch.hvm_svm.cpu_shadow_cr4 = value;
1920 vmcb->cr4 = value | SVM_CR4_HOST_MASK;
1922 /*
1923 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1924 * all TLB entries except global entries.
1925 */
1926 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
1928 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1929 shadow_sync_all(v->domain);
1931 break;
1934 default:
1935 printk("invalid cr: %d\n", cr);
1936 __hvm_bug(regs);
1939 return 1;
1943 #define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
1946 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
1947 struct cpu_user_regs *regs)
1949 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1950 int inst_len = 0;
1951 int index;
1952 unsigned int gpreg;
1953 unsigned long value;
1954 u8 buffer[MAX_INST_LEN];
1955 u8 prefix = 0;
1956 int result = 1;
1957 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1958 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1959 enum instruction_index match;
1961 ASSERT(vmcb);
1963 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1965 /* get index to first actual instruction byte - as we will need to know
1966 where the prefix lives later on */
1967 index = skip_prefix_bytes(buffer, sizeof(buffer));
1969 if (type == TYPE_MOV_TO_CR)
1971 inst_len = __get_instruction_length_from_list(vmcb, list_a,
1972 ARR_SIZE(list_a), &buffer[index], &match);
1974 else
1976 inst_len = __get_instruction_length_from_list(vmcb, list_b,
1977 ARR_SIZE(list_b), &buffer[index], &match);
1980 ASSERT(inst_len > 0);
1982 inst_len += index;
1984 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1985 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1986 prefix = buffer[index-1];
1988 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
1990 switch (match)
1992 case INSTR_MOV2CR:
1993 gpreg = decode_src_reg(prefix, buffer[index+2]);
1994 result = mov_to_cr(gpreg, cr, regs);
1995 break;
1997 case INSTR_MOVCR2:
1998 gpreg = decode_src_reg(prefix, buffer[index+2]);
1999 mov_from_cr(cr, gpreg, regs);
2000 break;
2002 case INSTR_CLTS:
2003 /* TS being cleared means that it's time to restore fpu state. */
2004 setup_fpu(current);
2005 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
2006 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
2007 v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
2008 break;
2010 case INSTR_LMSW:
2011 if (svm_dbg_on)
2012 svm_dump_inst(svm_rip2pointer(vmcb));
2014 gpreg = decode_src_reg(prefix, buffer[index+2]);
2015 value = get_reg(gpreg, regs, vmcb) & 0xF;
2017 if (svm_dbg_on)
2018 printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
2019 inst_len);
2021 value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
2023 if (svm_dbg_on)
2024 printk("CR0-LMSW CR0 - New value=%lx\n", value);
2026 result = svm_set_cr0(value);
2027 break;
2029 case INSTR_SMSW:
2030 if (svm_dbg_on)
2031 svm_dump_inst(svm_rip2pointer(vmcb));
2032 value = v->arch.hvm_svm.cpu_shadow_cr0;
2033 gpreg = decode_src_reg(prefix, buffer[index+2]);
2034 set_reg(gpreg, value, regs, vmcb);
2036 if (svm_dbg_on)
2037 printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
2038 inst_len);
2039 break;
2041 default:
2042 __hvm_bug(regs);
2043 break;
2046 ASSERT(inst_len);
2048 __update_guest_eip(vmcb, inst_len);
2050 return result;
2053 static inline void svm_do_msr_access(
2054 struct vcpu *v, struct cpu_user_regs *regs)
2056 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2057 int inst_len;
2058 u64 msr_content=0;
2059 u32 eax, edx;
2061 ASSERT(vmcb);
2063 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, "
2064 "exitinfo = %lx", (unsigned long)regs->ecx,
2065 (unsigned long)regs->eax, (unsigned long)regs->edx,
2066 (unsigned long)vmcb->exitinfo1);
2068 /* is it a read? */
2069 if (vmcb->exitinfo1 == 0)
2071 inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
2073 regs->edx = 0;
2074 switch (regs->ecx) {
2075 case MSR_IA32_TIME_STAMP_COUNTER:
2076 msr_content = hvm_get_guest_time(v);
2077 break;
2078 case MSR_IA32_SYSENTER_CS:
2079 msr_content = vmcb->sysenter_cs;
2080 break;
2081 case MSR_IA32_SYSENTER_ESP:
2082 msr_content = vmcb->sysenter_esp;
2083 break;
2084 case MSR_IA32_SYSENTER_EIP:
2085 msr_content = vmcb->sysenter_eip;
2086 break;
2087 case MSR_IA32_APICBASE:
2088 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
2089 break;
2090 default:
2091 if (long_mode_do_msr_read(regs))
2092 goto done;
2094 if ( rdmsr_hypervisor_regs(regs->ecx, &eax, &edx) )
2096 regs->eax = eax;
2097 regs->edx = edx;
2098 goto done;
2101 rdmsr_safe(regs->ecx, regs->eax, regs->edx);
2102 break;
2104 regs->eax = msr_content & 0xFFFFFFFF;
2105 regs->edx = msr_content >> 32;
2107 else
2109 inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
2110 msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
2112 switch (regs->ecx)
2114 case MSR_IA32_TIME_STAMP_COUNTER:
2115 hvm_set_guest_time(v, msr_content);
2116 break;
2117 case MSR_IA32_SYSENTER_CS:
2118 vmcb->sysenter_cs = msr_content;
2119 break;
2120 case MSR_IA32_SYSENTER_ESP:
2121 vmcb->sysenter_esp = msr_content;
2122 break;
2123 case MSR_IA32_SYSENTER_EIP:
2124 vmcb->sysenter_eip = msr_content;
2125 break;
2126 case MSR_IA32_APICBASE:
2127 vlapic_msr_set(VLAPIC(v), msr_content);
2128 break;
2129 default:
2130 if ( !long_mode_do_msr_write(regs) )
2131 wrmsr_hypervisor_regs(regs->ecx, regs->eax, regs->edx);
2132 break;
2136 done:
2138 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: "
2139 "ecx=%lx, eax=%lx, edx=%lx",
2140 (unsigned long)regs->ecx, (unsigned long)regs->eax,
2141 (unsigned long)regs->edx);
2143 __update_guest_eip(vmcb, inst_len);
2147 /*
2148 * Need to use this exit to reschedule
2149 */
2150 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
2152 struct vcpu *v = current;
2153 struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
2154 s_time_t next_pit = -1, next_wakeup;
2156 __update_guest_eip(vmcb, 1);
2158 /* check for interrupt not handled or new interrupt */
2159 if ( vmcb->vintr.fields.irq || cpu_has_pending_irq(v) )
2160 return;
2162 if ( !v->vcpu_id )
2163 next_pit = get_scheduled(v, pt->irq, pt);
2164 next_wakeup = get_apictime_scheduled(v);
2165 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
2166 next_wakeup = next_pit;
2167 if ( next_wakeup != - 1 )
2168 set_timer(&current->arch.hvm_svm.hlt_timer, next_wakeup);
2169 do_sched_op_compat(SCHEDOP_block, 0);
2173 static void svm_vmexit_do_invd(struct vmcb_struct *vmcb)
2175 int inst_len;
2177 /* Invalidate the cache - we can't really do that safely - maybe we should
2178 * WBINVD, but I think it's just fine to completely ignore it - we should
2179 * have cache-snooping that solves it anyways. -- Mats P.
2180 */
2182 /* Tell the user that we did this - just in case someone runs some really
2183 * weird operating system and wants to know why it's not working...
2184 */
2185 printk("INVD instruction intercepted - ignored\n");
2187 inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL);
2188 __update_guest_eip(vmcb, inst_len);
2194 #ifdef XEN_DEBUGGER
2195 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb,
2196 struct cpu_user_regs *regs)
2198 regs->eip = vmcb->rip;
2199 regs->esp = vmcb->rsp;
2200 regs->eflags = vmcb->rflags;
2202 regs->xcs = vmcb->cs.sel;
2203 regs->xds = vmcb->ds.sel;
2204 regs->xes = vmcb->es.sel;
2205 regs->xfs = vmcb->fs.sel;
2206 regs->xgs = vmcb->gs.sel;
2207 regs->xss = vmcb->ss.sel;
2211 static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
2213 vmcb->ss.sel = regs->xss;
2214 vmcb->rsp = regs->esp;
2215 vmcb->rflags = regs->eflags;
2216 vmcb->cs.sel = regs->xcs;
2217 vmcb->rip = regs->eip;
2219 vmcb->gs.sel = regs->xgs;
2220 vmcb->fs.sel = regs->xfs;
2221 vmcb->es.sel = regs->xes;
2222 vmcb->ds.sel = regs->xds;
2224 #endif
2227 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
2229 struct vcpu *v = current;
2230 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
2231 unsigned long g_vaddr;
2232 int inst_len;
2233 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2235 ASSERT(vmcb);
2236 /*
2237 * Unknown how many bytes the invlpg instruction will take. Use the
2238 * maximum instruction length here
2239 */
2240 if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
2242 printk("svm_handle_invlpg (): Error reading memory %d bytes\n",
2243 length);
2244 __hvm_bug(regs);
2247 if (invlpga)
2249 inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
2250 ASSERT(inst_len > 0);
2251 __update_guest_eip(vmcb, inst_len);
2253 /*
2254 * The address is implicit on this instruction. At the moment, we don't
2255 * use ecx (ASID) to identify individual guests pages
2256 */
2257 g_vaddr = regs->eax;
2259 else
2261 /* What about multiple prefix codes? */
2262 prefix = (is_prefix(opcode[0])?opcode[0]:0);
2263 inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
2264 ASSERT(inst_len > 0);
2266 inst_len--;
2267 length -= inst_len;
2269 /*
2270 * Decode memory operand of the instruction including ModRM, SIB, and
2271 * displacement to get effecticve address and length in bytes. Assume
2272 * the system in either 32- or 64-bit mode.
2273 */
2274 g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix,
2275 &opcode[inst_len], &length);
2277 inst_len += length;
2278 __update_guest_eip (vmcb, inst_len);
2281 /* Overkill, we may not this */
2282 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
2283 shadow_invlpg(v, g_vaddr);
2287 /*
2288 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2289 * 16-bit realmode. Basically, this mimics a processor reset.
2291 * returns 0 on success, non-zero otherwise
2292 */
2293 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
2294 struct cpu_user_regs *regs)
2296 struct vmcb_struct *vmcb;
2298 ASSERT(v);
2299 ASSERT(regs);
2301 vmcb = v->arch.hvm_svm.vmcb;
2303 ASSERT(vmcb);
2305 /* clear the vmcb and user regs */
2306 memset(regs, 0, sizeof(struct cpu_user_regs));
2308 /* VMCB Control */
2309 vmcb->tsc_offset = 0;
2311 /* VMCB State */
2312 vmcb->cr0 = X86_CR0_ET | X86_CR0_PG;
2313 v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
2315 vmcb->cr2 = 0;
2316 vmcb->efer = EFER_SVME;
2318 vmcb->cr4 = SVM_CR4_HOST_MASK;
2319 v->arch.hvm_svm.cpu_shadow_cr4 = 0;
2320 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
2322 /* This will jump to ROMBIOS */
2323 vmcb->rip = 0xFFF0;
2325 /* setup the segment registers and all their hidden states */
2326 vmcb->cs.sel = 0xF000;
2327 vmcb->cs.attributes.bytes = 0x089b;
2328 vmcb->cs.limit = 0xffff;
2329 vmcb->cs.base = 0x000F0000;
2331 vmcb->ss.sel = 0x00;
2332 vmcb->ss.attributes.bytes = 0x0893;
2333 vmcb->ss.limit = 0xffff;
2334 vmcb->ss.base = 0x00;
2336 vmcb->ds.sel = 0x00;
2337 vmcb->ds.attributes.bytes = 0x0893;
2338 vmcb->ds.limit = 0xffff;
2339 vmcb->ds.base = 0x00;
2341 vmcb->es.sel = 0x00;
2342 vmcb->es.attributes.bytes = 0x0893;
2343 vmcb->es.limit = 0xffff;
2344 vmcb->es.base = 0x00;
2346 vmcb->fs.sel = 0x00;
2347 vmcb->fs.attributes.bytes = 0x0893;
2348 vmcb->fs.limit = 0xffff;
2349 vmcb->fs.base = 0x00;
2351 vmcb->gs.sel = 0x00;
2352 vmcb->gs.attributes.bytes = 0x0893;
2353 vmcb->gs.limit = 0xffff;
2354 vmcb->gs.base = 0x00;
2356 vmcb->ldtr.sel = 0x00;
2357 vmcb->ldtr.attributes.bytes = 0x0000;
2358 vmcb->ldtr.limit = 0x0;
2359 vmcb->ldtr.base = 0x00;
2361 vmcb->gdtr.sel = 0x00;
2362 vmcb->gdtr.attributes.bytes = 0x0000;
2363 vmcb->gdtr.limit = 0x0;
2364 vmcb->gdtr.base = 0x00;
2366 vmcb->tr.sel = 0;
2367 vmcb->tr.attributes.bytes = 0;
2368 vmcb->tr.limit = 0x0;
2369 vmcb->tr.base = 0;
2371 vmcb->idtr.sel = 0x00;
2372 vmcb->idtr.attributes.bytes = 0x0000;
2373 vmcb->idtr.limit = 0x3ff;
2374 vmcb->idtr.base = 0x00;
2376 vmcb->rax = 0;
2377 vmcb->rsp = 0;
2379 return 0;
2383 /*
2384 * svm_do_vmmcall - SVM VMMCALL handler
2386 * returns 0 on success, non-zero otherwise
2387 */
2388 static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
2390 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2391 int inst_len;
2393 ASSERT(vmcb);
2394 ASSERT(regs);
2396 inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
2397 ASSERT(inst_len > 0);
2399 if ( regs->eax & 0x80000000 )
2401 /* VMMCALL sanity check */
2402 if ( vmcb->cpl > get_vmmcall_cpl(regs->edi) )
2404 printf("VMMCALL CPL check failed\n");
2405 return -1;
2408 /* handle the request */
2409 switch ( regs->eax )
2411 case VMMCALL_RESET_TO_REALMODE:
2412 if ( svm_do_vmmcall_reset_to_realmode(v, regs) )
2414 printf("svm_do_vmmcall_reset_to_realmode() failed\n");
2415 return -1;
2417 /* since we just reset the VMCB, return without adjusting
2418 * the eip */
2419 return 0;
2421 case VMMCALL_DEBUG:
2422 printf("DEBUG features not implemented yet\n");
2423 break;
2424 default:
2425 break;
2428 hvm_print_line(v, regs->eax); /* provides the current domain */
2430 else
2432 hvm_do_hypercall(regs);
2435 __update_guest_eip(vmcb, inst_len);
2436 return 0;
2440 void svm_dump_inst(unsigned long eip)
2442 u8 opcode[256];
2443 unsigned long ptr;
2444 int len;
2445 int i;
2447 ptr = eip & ~0xff;
2448 len = 0;
2450 if (hvm_copy(opcode, ptr, sizeof(opcode), HVM_COPY_IN))
2451 len = sizeof(opcode);
2453 printf("Code bytes around(len=%d) %lx:", len, eip);
2454 for (i = 0; i < len; i++)
2456 if ((i & 0x0f) == 0)
2457 printf("\n%08lx:", ptr+i);
2459 printf("%02x ", opcode[i]);
2462 printf("\n");
2466 void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
2468 struct vcpu *v = current;
2469 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2470 unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
2472 printf("%s: guest registers from %s:\n", __func__, from);
2473 #if defined (__x86_64__)
2474 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
2475 regs->rax, regs->rbx, regs->rcx);
2476 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
2477 regs->rdx, regs->rsi, regs->rdi);
2478 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
2479 regs->rbp, regs->rsp, regs->r8);
2480 printk("r9: %016lx r10: %016lx r11: %016lx\n",
2481 regs->r9, regs->r10, regs->r11);
2482 printk("r12: %016lx r13: %016lx r14: %016lx\n",
2483 regs->r12, regs->r13, regs->r14);
2484 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
2485 regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
2486 #else
2487 printf("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n",
2488 regs->eax, regs->ebx, regs->ecx, regs->edx);
2489 printf("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n",
2490 regs->edi, regs->esi, regs->ebp, regs->esp);
2491 printf("%s: guest cr0: %lx\n", __func__,
2492 v->arch.hvm_svm.cpu_shadow_cr0);
2493 printf("guest CR3 = %llx\n", vmcb->cr3);
2494 #endif
2495 printf("%s: pt = %lx\n", __func__, pt);
2499 void svm_dump_host_regs(const char *from)
2501 struct vcpu *v = current;
2502 unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
2503 unsigned long cr3, cr0;
2504 printf("Host registers at %s\n", from);
2506 __asm__ __volatile__ ("\tmov %%cr0,%0\n"
2507 "\tmov %%cr3,%1\n"
2508 : "=r" (cr0), "=r"(cr3));
2509 printf("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
2512 #ifdef SVM_EXTRA_DEBUG
2513 static char *exit_reasons[] = {
2514 [VMEXIT_CR0_READ] = "CR0_READ",
2515 [VMEXIT_CR1_READ] = "CR1_READ",
2516 [VMEXIT_CR2_READ] = "CR2_READ",
2517 [VMEXIT_CR3_READ] = "CR3_READ",
2518 [VMEXIT_CR4_READ] = "CR4_READ",
2519 [VMEXIT_CR5_READ] = "CR5_READ",
2520 [VMEXIT_CR6_READ] = "CR6_READ",
2521 [VMEXIT_CR7_READ] = "CR7_READ",
2522 [VMEXIT_CR8_READ] = "CR8_READ",
2523 [VMEXIT_CR9_READ] = "CR9_READ",
2524 [VMEXIT_CR10_READ] = "CR10_READ",
2525 [VMEXIT_CR11_READ] = "CR11_READ",
2526 [VMEXIT_CR12_READ] = "CR12_READ",
2527 [VMEXIT_CR13_READ] = "CR13_READ",
2528 [VMEXIT_CR14_READ] = "CR14_READ",
2529 [VMEXIT_CR15_READ] = "CR15_READ",
2530 [VMEXIT_CR0_WRITE] = "CR0_WRITE",
2531 [VMEXIT_CR1_WRITE] = "CR1_WRITE",
2532 [VMEXIT_CR2_WRITE] = "CR2_WRITE",
2533 [VMEXIT_CR3_WRITE] = "CR3_WRITE",
2534 [VMEXIT_CR4_WRITE] = "CR4_WRITE",
2535 [VMEXIT_CR5_WRITE] = "CR5_WRITE",
2536 [VMEXIT_CR6_WRITE] = "CR6_WRITE",
2537 [VMEXIT_CR7_WRITE] = "CR7_WRITE",
2538 [VMEXIT_CR8_WRITE] = "CR8_WRITE",
2539 [VMEXIT_CR9_WRITE] = "CR9_WRITE",
2540 [VMEXIT_CR10_WRITE] = "CR10_WRITE",
2541 [VMEXIT_CR11_WRITE] = "CR11_WRITE",
2542 [VMEXIT_CR12_WRITE] = "CR12_WRITE",
2543 [VMEXIT_CR13_WRITE] = "CR13_WRITE",
2544 [VMEXIT_CR14_WRITE] = "CR14_WRITE",
2545 [VMEXIT_CR15_WRITE] = "CR15_WRITE",
2546 [VMEXIT_DR0_READ] = "DR0_READ",
2547 [VMEXIT_DR1_READ] = "DR1_READ",
2548 [VMEXIT_DR2_READ] = "DR2_READ",
2549 [VMEXIT_DR3_READ] = "DR3_READ",
2550 [VMEXIT_DR4_READ] = "DR4_READ",
2551 [VMEXIT_DR5_READ] = "DR5_READ",
2552 [VMEXIT_DR6_READ] = "DR6_READ",
2553 [VMEXIT_DR7_READ] = "DR7_READ",
2554 [VMEXIT_DR8_READ] = "DR8_READ",
2555 [VMEXIT_DR9_READ] = "DR9_READ",
2556 [VMEXIT_DR10_READ] = "DR10_READ",
2557 [VMEXIT_DR11_READ] = "DR11_READ",
2558 [VMEXIT_DR12_READ] = "DR12_READ",
2559 [VMEXIT_DR13_READ] = "DR13_READ",
2560 [VMEXIT_DR14_READ] = "DR14_READ",
2561 [VMEXIT_DR15_READ] = "DR15_READ",
2562 [VMEXIT_DR0_WRITE] = "DR0_WRITE",
2563 [VMEXIT_DR1_WRITE] = "DR1_WRITE",
2564 [VMEXIT_DR2_WRITE] = "DR2_WRITE",
2565 [VMEXIT_DR3_WRITE] = "DR3_WRITE",
2566 [VMEXIT_DR4_WRITE] = "DR4_WRITE",
2567 [VMEXIT_DR5_WRITE] = "DR5_WRITE",
2568 [VMEXIT_DR6_WRITE] = "DR6_WRITE",
2569 [VMEXIT_DR7_WRITE] = "DR7_WRITE",
2570 [VMEXIT_DR8_WRITE] = "DR8_WRITE",
2571 [VMEXIT_DR9_WRITE] = "DR9_WRITE",
2572 [VMEXIT_DR10_WRITE] = "DR10_WRITE",
2573 [VMEXIT_DR11_WRITE] = "DR11_WRITE",
2574 [VMEXIT_DR12_WRITE] = "DR12_WRITE",
2575 [VMEXIT_DR13_WRITE] = "DR13_WRITE",
2576 [VMEXIT_DR14_WRITE] = "DR14_WRITE",
2577 [VMEXIT_DR15_WRITE] = "DR15_WRITE",
2578 [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
2579 [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
2580 [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
2581 [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
2582 [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
2583 [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
2584 [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
2585 [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
2586 [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
2587 [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
2588 [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
2589 [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
2590 [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
2591 [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
2592 [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
2593 [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
2594 [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
2595 [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
2596 [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
2597 [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
2598 [VMEXIT_INTR] = "INTR",
2599 [VMEXIT_NMI] = "NMI",
2600 [VMEXIT_SMI] = "SMI",
2601 [VMEXIT_INIT] = "INIT",
2602 [VMEXIT_VINTR] = "VINTR",
2603 [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
2604 [VMEXIT_IDTR_READ] = "IDTR_READ",
2605 [VMEXIT_GDTR_READ] = "GDTR_READ",
2606 [VMEXIT_LDTR_READ] = "LDTR_READ",
2607 [VMEXIT_TR_READ] = "TR_READ",
2608 [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
2609 [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
2610 [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
2611 [VMEXIT_TR_WRITE] = "TR_WRITE",
2612 [VMEXIT_RDTSC] = "RDTSC",
2613 [VMEXIT_RDPMC] = "RDPMC",
2614 [VMEXIT_PUSHF] = "PUSHF",
2615 [VMEXIT_POPF] = "POPF",
2616 [VMEXIT_CPUID] = "CPUID",
2617 [VMEXIT_RSM] = "RSM",
2618 [VMEXIT_IRET] = "IRET",
2619 [VMEXIT_SWINT] = "SWINT",
2620 [VMEXIT_INVD] = "INVD",
2621 [VMEXIT_PAUSE] = "PAUSE",
2622 [VMEXIT_HLT] = "HLT",
2623 [VMEXIT_INVLPG] = "INVLPG",
2624 [VMEXIT_INVLPGA] = "INVLPGA",
2625 [VMEXIT_IOIO] = "IOIO",
2626 [VMEXIT_MSR] = "MSR",
2627 [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
2628 [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
2629 [VMEXIT_SHUTDOWN] = "SHUTDOWN",
2630 [VMEXIT_VMRUN] = "VMRUN",
2631 [VMEXIT_VMMCALL] = "VMMCALL",
2632 [VMEXIT_VMLOAD] = "VMLOAD",
2633 [VMEXIT_VMSAVE] = "VMSAVE",
2634 [VMEXIT_STGI] = "STGI",
2635 [VMEXIT_CLGI] = "CLGI",
2636 [VMEXIT_SKINIT] = "SKINIT",
2637 [VMEXIT_RDTSCP] = "RDTSCP",
2638 [VMEXIT_ICEBP] = "ICEBP",
2639 [VMEXIT_NPF] = "NPF"
2640 };
2641 #endif /* SVM_EXTRA_DEBUG */
2643 #ifdef SVM_WALK_GUEST_PAGES
2644 void walk_shadow_and_guest_pt(unsigned long gva)
2646 l2_pgentry_t gpde;
2647 l2_pgentry_t spde;
2648 l1_pgentry_t gpte;
2649 l1_pgentry_t spte;
2650 struct vcpu *v = current;
2651 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2652 unsigned long gpa;
2654 gpa = gva_to_gpa( gva );
2655 printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
2656 if( !svm_paging_enabled(v) || mmio_space(gpa) )
2657 return;
2659 /* let's dump the guest and shadow page info */
2661 __guest_get_l2e(v, gva, &gpde);
2662 printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
2663 __shadow_get_l2e( v, gva, &spde );
2664 printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
2666 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2667 return;
2669 spte = l1e_empty();
2671 /* This is actually overkill - we only need to ensure the hl2 is in-sync.*/
2672 shadow_sync_va(v, gva);
2674 gpte.l1 = 0;
2675 __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ],
2676 sizeof(gpte) );
2677 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
2678 __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
2679 sizeof(spte) );
2680 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
2682 #endif /* SVM_WALK_GUEST_PAGES */
2687 asmlinkage void svm_vmexit_handler(struct cpu_user_regs regs)
2689 unsigned int exit_reason;
2690 unsigned long eip;
2691 struct vcpu *v = current;
2692 int error;
2693 int do_debug = 0;
2694 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2696 ASSERT(vmcb);
2698 exit_reason = vmcb->exitcode;
2699 save_svm_cpu_user_regs(v, &regs);
2701 vmcb->tlb_control = 1;
2704 if (exit_reason == VMEXIT_INVALID)
2706 svm_dump_vmcb(__func__, vmcb);
2707 domain_crash_synchronous();
2710 #ifdef SVM_EXTRA_DEBUG
2712 #if defined(__i386__)
2713 #define rip eip
2714 #endif
2716 static unsigned long intercepts_counter = 0;
2718 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF)
2720 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2722 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx, "
2723 "gpa=%llx\n", intercepts_counter,
2724 exit_reasons[exit_reason], exit_reason, regs.cs,
2725 (unsigned long long) regs.rip,
2726 (unsigned long long) vmcb->exitinfo1,
2727 (unsigned long long) vmcb->exitinfo2,
2728 (unsigned long long) vmcb->exitintinfo.bytes,
2729 (unsigned long long) gva_to_gpa( vmcb->exitinfo2 ) );
2731 else
2733 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2734 intercepts_counter,
2735 exit_reasons[exit_reason], exit_reason, regs.cs,
2736 (unsigned long long) regs.rip,
2737 (unsigned long long) vmcb->exitinfo1,
2738 (unsigned long long) vmcb->exitinfo2,
2739 (unsigned long long) vmcb->exitintinfo.bytes );
2742 else if ( svm_dbg_on
2743 && exit_reason != VMEXIT_IOIO
2744 && exit_reason != VMEXIT_INTR)
2747 if (exit_reasons[exit_reason])
2749 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2750 intercepts_counter,
2751 exit_reasons[exit_reason], exit_reason, regs.cs,
2752 (unsigned long long) regs.rip,
2753 (unsigned long long) vmcb->exitinfo1,
2754 (unsigned long long) vmcb->exitinfo2,
2755 (unsigned long long) vmcb->exitintinfo.bytes);
2757 else
2759 printk("I%08ld,ExC=%d(0x%x),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2760 intercepts_counter, exit_reason, exit_reason, regs.cs,
2761 (unsigned long long) regs.rip,
2762 (unsigned long long) vmcb->exitinfo1,
2763 (unsigned long long) vmcb->exitinfo2,
2764 (unsigned long long) vmcb->exitintinfo.bytes);
2768 #ifdef SVM_WALK_GUEST_PAGES
2769 if( exit_reason == VMEXIT_EXCEPTION_PF
2770 && ( ( vmcb->exitinfo2 == vmcb->rip )
2771 || vmcb->exitintinfo.bytes) )
2773 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2774 walk_shadow_and_guest_pt( vmcb->exitinfo2 );
2776 #endif
2778 intercepts_counter++;
2780 #if 0
2781 if (svm_dbg_on)
2782 do_debug = svm_do_debugout(exit_reason);
2783 #endif
2785 if (do_debug)
2787 printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
2788 "shadow_table = 0x%08x\n",
2789 __func__,
2790 (int) v->arch.guest_table.pfn,
2791 (int) v->arch.monitor_table.pfn,
2792 (int) v->arch.shadow_table.pfn);
2794 svm_dump_vmcb(__func__, vmcb);
2795 svm_dump_regs(__func__, &regs);
2796 svm_dump_inst(svm_rip2pointer(vmcb));
2799 #if defined(__i386__)
2800 #undef rip
2801 #endif
2804 #endif /* SVM_EXTRA_DEBUG */
2807 perfc_incra(svmexits, exit_reason);
2808 eip = vmcb->rip;
2810 #ifdef SVM_EXTRA_DEBUG
2811 if (do_debug)
2813 printk("eip = %lx, exit_reason = %d (0x%x)\n",
2814 eip, exit_reason, exit_reason);
2816 #endif /* SVM_EXTRA_DEBUG */
2818 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
2820 switch (exit_reason)
2822 case VMEXIT_EXCEPTION_DB:
2824 #ifdef XEN_DEBUGGER
2825 svm_debug_save_cpu_user_regs(&regs);
2826 pdb_handle_exception(1, &regs, 1);
2827 svm_debug_restore_cpu_user_regs(&regs);
2828 #else
2829 svm_store_cpu_user_regs(&regs, v);
2830 domain_pause_for_debugger();
2831 #endif
2833 break;
2835 case VMEXIT_NMI:
2836 do_nmi(&regs, 0);
2837 break;
2839 case VMEXIT_SMI:
2840 /*
2841 * For asynchronous SMI's, we just need to allow global interrupts
2842 * so that the SMI is taken properly in the context of the host. The
2843 * standard code does a STGI after the VMEXIT which should accomplish
2844 * this task. Continue as normal and restart the guest.
2845 */
2846 break;
2848 case VMEXIT_INIT:
2849 /*
2850 * Nothing to do, in fact we should never get to this point.
2851 */
2852 break;
2854 case VMEXIT_EXCEPTION_BP:
2855 #ifdef XEN_DEBUGGER
2856 svm_debug_save_cpu_user_regs(&regs);
2857 pdb_handle_exception(3, &regs, 1);
2858 svm_debug_restore_cpu_user_regs(&regs);
2859 #else
2860 if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) )
2861 domain_pause_for_debugger();
2862 else
2863 svm_inject_exception(v, TRAP_int3, 0, 0);
2864 #endif
2865 break;
2867 case VMEXIT_EXCEPTION_NM:
2868 svm_do_no_device_fault(vmcb);
2869 break;
2871 case VMEXIT_EXCEPTION_GP:
2872 /* This should probably not be trapped in the future */
2873 regs.error_code = vmcb->exitinfo1;
2874 svm_do_general_protection_fault(v, &regs);
2875 break;
2877 case VMEXIT_EXCEPTION_PF:
2879 unsigned long va;
2880 va = vmcb->exitinfo2;
2881 regs.error_code = vmcb->exitinfo1;
2882 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2883 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2884 (unsigned long)regs.eax, (unsigned long)regs.ebx,
2885 (unsigned long)regs.ecx, (unsigned long)regs.edx,
2886 (unsigned long)regs.esi, (unsigned long)regs.edi);
2888 if (!(error = svm_do_page_fault(va, &regs)))
2890 /* Inject #PG using Interruption-Information Fields */
2891 svm_inject_exception(v, TRAP_page_fault, 1, regs.error_code);
2893 v->arch.hvm_svm.cpu_cr2 = va;
2894 vmcb->cr2 = va;
2895 TRACE_3D(TRC_VMX_INT, v->domain->domain_id,
2896 VMEXIT_EXCEPTION_PF, va);
2898 break;
2901 case VMEXIT_EXCEPTION_DF:
2902 /* Debug info to hopefully help debug WHY the guest double-faulted. */
2903 svm_dump_vmcb(__func__, vmcb);
2904 svm_dump_regs(__func__, &regs);
2905 svm_dump_inst(svm_rip2pointer(vmcb));
2906 svm_inject_exception(v, TRAP_double_fault, 1, 0);
2907 break;
2909 case VMEXIT_INTR:
2910 raise_softirq(SCHEDULE_SOFTIRQ);
2911 break;
2914 case VMEXIT_INVD:
2915 svm_vmexit_do_invd(vmcb);
2916 break;
2918 case VMEXIT_GDTR_WRITE:
2919 printk("WRITE to GDTR\n");
2920 break;
2922 case VMEXIT_TASK_SWITCH:
2923 __hvm_bug(&regs);
2924 break;
2926 case VMEXIT_CPUID:
2927 svm_vmexit_do_cpuid(vmcb, regs.eax, &regs);
2928 break;
2930 case VMEXIT_HLT:
2931 svm_vmexit_do_hlt(vmcb);
2932 break;
2934 case VMEXIT_INVLPG:
2935 svm_handle_invlpg(0, &regs);
2936 break;
2938 case VMEXIT_INVLPGA:
2939 svm_handle_invlpg(1, &regs);
2940 break;
2942 case VMEXIT_VMMCALL:
2943 svm_do_vmmcall(v, &regs);
2944 break;
2946 case VMEXIT_CR0_READ:
2947 svm_cr_access(v, 0, TYPE_MOV_FROM_CR, &regs);
2948 break;
2950 case VMEXIT_CR2_READ:
2951 svm_cr_access(v, 2, TYPE_MOV_FROM_CR, &regs);
2952 break;
2954 case VMEXIT_CR3_READ:
2955 svm_cr_access(v, 3, TYPE_MOV_FROM_CR, &regs);
2956 break;
2958 case VMEXIT_CR4_READ:
2959 svm_cr_access(v, 4, TYPE_MOV_FROM_CR, &regs);
2960 break;
2962 case VMEXIT_CR8_READ:
2963 svm_cr_access(v, 8, TYPE_MOV_FROM_CR, &regs);
2964 break;
2966 case VMEXIT_CR0_WRITE:
2967 svm_cr_access(v, 0, TYPE_MOV_TO_CR, &regs);
2968 break;
2970 case VMEXIT_CR2_WRITE:
2971 svm_cr_access(v, 2, TYPE_MOV_TO_CR, &regs);
2972 break;
2974 case VMEXIT_CR3_WRITE:
2975 svm_cr_access(v, 3, TYPE_MOV_TO_CR, &regs);
2976 local_flush_tlb();
2977 break;
2979 case VMEXIT_CR4_WRITE:
2980 svm_cr_access(v, 4, TYPE_MOV_TO_CR, &regs);
2981 break;
2983 case VMEXIT_CR8_WRITE:
2984 svm_cr_access(v, 8, TYPE_MOV_TO_CR, &regs);
2985 break;
2987 case VMEXIT_DR0_READ:
2988 svm_dr_access(v, 0, TYPE_MOV_FROM_DR, &regs);
2989 break;
2991 case VMEXIT_DR1_READ:
2992 svm_dr_access(v, 1, TYPE_MOV_FROM_DR, &regs);
2993 break;
2995 case VMEXIT_DR2_READ:
2996 svm_dr_access(v, 2, TYPE_MOV_FROM_DR, &regs);
2997 break;
2999 case VMEXIT_DR3_READ:
3000 svm_dr_access(v, 3, TYPE_MOV_FROM_DR, &regs);
3001 break;
3003 case VMEXIT_DR6_READ:
3004 svm_dr_access(v, 6, TYPE_MOV_FROM_DR, &regs);
3005 break;
3007 case VMEXIT_DR7_READ:
3008 svm_dr_access(v, 7, TYPE_MOV_FROM_DR, &regs);
3009 break;
3011 case VMEXIT_DR0_WRITE:
3012 svm_dr_access(v, 0, TYPE_MOV_TO_DR, &regs);
3013 break;
3015 case VMEXIT_DR1_WRITE:
3016 svm_dr_access(v, 1, TYPE_MOV_TO_DR, &regs);
3017 break;
3019 case VMEXIT_DR2_WRITE:
3020 svm_dr_access(v, 2, TYPE_MOV_TO_DR, &regs);
3021 break;
3023 case VMEXIT_DR3_WRITE:
3024 svm_dr_access(v, 3, TYPE_MOV_TO_DR, &regs);
3025 break;
3027 case VMEXIT_DR6_WRITE:
3028 svm_dr_access(v, 6, TYPE_MOV_TO_DR, &regs);
3029 break;
3031 case VMEXIT_DR7_WRITE:
3032 svm_dr_access(v, 7, TYPE_MOV_TO_DR, &regs);
3033 break;
3035 case VMEXIT_IOIO:
3036 svm_io_instruction(v);
3037 break;
3039 case VMEXIT_MSR:
3040 svm_do_msr_access(v, &regs);
3041 break;
3043 case VMEXIT_SHUTDOWN:
3044 printk("Guest shutdown exit\n");
3045 domain_crash_synchronous();
3046 break;
3048 default:
3049 printk("unexpected VMEXIT: exit reason = 0x%x, exitinfo1 = %llx, "
3050 "exitinfo2 = %llx\n", exit_reason,
3051 (unsigned long long)vmcb->exitinfo1,
3052 (unsigned long long)vmcb->exitinfo2);
3053 __hvm_bug(&regs); /* should not happen */
3054 break;
3057 #ifdef SVM_EXTRA_DEBUG
3058 if (do_debug)
3060 printk("%s: Done switch on vmexit_code\n", __func__);
3061 svm_dump_regs(__func__, &regs);
3064 if (do_debug)
3066 printk("vmexit_handler():- guest_table = 0x%08x, "
3067 "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
3068 (int)v->arch.guest_table.pfn,
3069 (int)v->arch.monitor_table.pfn,
3070 (int)v->arch.shadow_table.pfn);
3071 printk("svm_vmexit_handler: Returning\n");
3073 #endif
3075 return;
3078 asmlinkage void svm_load_cr2(void)
3080 struct vcpu *v = current;
3082 local_irq_disable();
3083 asm volatile("mov %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
3086 asmlinkage void svm_asid(void)
3088 struct vcpu *v = current;
3089 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
3091 /*
3092 * if need to assign new asid, or if switching cores,
3093 * retire asid for the old core, and assign a new asid to the current core.
3094 */
3095 if ( test_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ) ||
3096 ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) {
3097 /* recycle asid */
3098 if ( !asidpool_assign_next( vmcb, 1,
3099 v->arch.hvm_svm.asid_core, v->arch.hvm_svm.launch_core )) {
3100 /* If we get here, we have a major problem */
3101 domain_crash_synchronous();
3104 v->arch.hvm_svm.asid_core = v->arch.hvm_svm.launch_core;
3105 clear_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags );
3109 /*
3110 * Local variables:
3111 * mode: C
3112 * c-set-style: "BSD"
3113 * c-basic-offset: 4
3114 * tab-width: 4
3115 * indent-tabs-mode: nil
3116 * End:
3117 */