ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 10892:0d2ba35c0cf2

[XEN] Add hypercall support for HVM guests. This is
fairly useless at the moment, since all of the hypercalls
fail, since copy_from_user doesn't work correctly in HVM
domains.

Signed-off-by: Steven Smith <ssmith@xensource.com>

Add a CPUID hypervisor platform interface at leaf
0x40000000. Allow hypercall transfer page to be filled
in via MSR 0x40000000.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Tue Aug 01 17:18:05 2006 +0100 (2006-08-01)
parents 7137825805c7
children 2e3b121662dc
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005, AMD Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <asm/current.h>
29 #include <asm/io.h>
30 #include <asm/shadow.h>
31 #include <asm/regs.h>
32 #include <asm/cpufeature.h>
33 #include <asm/processor.h>
34 #include <asm/types.h>
35 #include <asm/msr.h>
36 #include <asm/spinlock.h>
37 #include <asm/hvm/hvm.h>
38 #include <asm/hvm/support.h>
39 #include <asm/hvm/io.h>
40 #include <asm/hvm/svm/svm.h>
41 #include <asm/hvm/svm/vmcb.h>
42 #include <asm/hvm/svm/emulate.h>
43 #include <asm/hvm/svm/vmmcall.h>
44 #include <asm/hvm/svm/intr.h>
45 #include <asm/shadow.h>
46 #if CONFIG_PAGING_LEVELS >= 3
47 #include <asm/shadow_64.h>
48 #endif
49 #include <public/sched.h>
51 #define SVM_EXTRA_DEBUG
53 #define set_segment_register(name, value) \
54 __asm__ __volatile__ ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
56 /*
57 * External functions, etc. We should move these to some suitable header file(s) */
59 extern void do_nmi(struct cpu_user_regs *, unsigned long);
60 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
61 int inst_len);
62 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
63 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
64 unsigned long count, int size, long value, int dir, int pvalid);
65 extern int svm_instrlen(struct cpu_user_regs *regs, int mode);
66 extern void svm_dump_inst(unsigned long eip);
67 extern int svm_dbg_on;
68 void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
70 static void svm_relinquish_guest_resources(struct domain *d);
71 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
72 struct cpu_user_regs *regs);
76 extern void set_hsa_to_guest( struct arch_svm_struct *arch_svm );
78 /* Host save area and ASID glogal data */
79 struct svm_percore_globals svm_globals[NR_CPUS];
81 /*
82 * Initializes the POOL of ASID used by the guests per core.
83 */
84 void asidpool_init(int core)
85 {
86 int i;
88 spin_lock_init(&svm_globals[core].ASIDpool.asid_lock);
90 /* Host ASID is always in use */
91 svm_globals[core].ASIDpool.asid[INITIAL_ASID] = ASID_INUSE;
92 for ( i = 1; i < ASID_MAX; i++ )
93 svm_globals[core].ASIDpool.asid[i] = ASID_AVAILABLE;
94 }
97 /* internal function to get the next available ASID */
98 static int asidpool_fetch_next(struct vmcb_struct *vmcb, int core)
99 {
100 int i;
101 for ( i = 1; i < ASID_MAX; i++ )
102 {
103 if ( svm_globals[core].ASIDpool.asid[i] == ASID_AVAILABLE )
104 {
105 vmcb->guest_asid = i;
106 svm_globals[core].ASIDpool.asid[i] = ASID_INUSE;
107 return i;
108 }
109 }
110 return -1;
111 }
114 /*
115 * This functions assigns on the passed VMCB, the next
116 * available ASID number. If none are available, the
117 * TLB flush flag is set, and all retireds ASID
118 * are made available.
119 *
120 * Returns: 1 -- sucess;
121 * 0 -- failure -- no more ASID numbers
122 * available.
123 */
124 int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
125 int oldcore, int newcore )
126 {
127 int i;
128 int res = 1;
129 static unsigned long cnt=0;
131 spin_lock(&svm_globals[oldcore].ASIDpool.asid_lock);
132 if( retire_current && vmcb->guest_asid ) {
133 svm_globals[oldcore].ASIDpool.asid[ vmcb->guest_asid & (ASID_MAX-1) ] = ASID_RETIRED;
134 }
135 spin_unlock(&svm_globals[oldcore].ASIDpool.asid_lock);
136 spin_lock(&svm_globals[newcore].ASIDpool.asid_lock);
137 if( asidpool_fetch_next( vmcb, newcore ) < 0 ) {
138 if (svm_dbg_on)
139 printk( "SVM: tlb(%ld)\n", cnt++ );
140 /* FLUSH the TLB and all retired slots are made available */
141 vmcb->tlb_control = 1;
142 for( i = 1; i < ASID_MAX; i++ ) {
143 if( svm_globals[newcore].ASIDpool.asid[i] == ASID_RETIRED ) {
144 svm_globals[newcore].ASIDpool.asid[i] = ASID_AVAILABLE;
145 }
146 }
147 /* Get the First slot available */
148 res = asidpool_fetch_next( vmcb, newcore ) > 0;
149 }
150 spin_unlock(&svm_globals[newcore].ASIDpool.asid_lock);
151 return res;
152 }
154 void asidpool_retire( struct vmcb_struct *vmcb, int core )
155 {
156 spin_lock(&svm_globals[core].ASIDpool.asid_lock);
157 if( vmcb->guest_asid ) {
158 svm_globals[core].ASIDpool.asid[ vmcb->guest_asid & (ASID_MAX-1) ] = ASID_RETIRED;
159 }
160 spin_unlock(&svm_globals[core].ASIDpool.asid_lock);
161 }
163 static inline void svm_inject_exception(struct vcpu *v, int trap, int ev, int error_code)
164 {
165 eventinj_t event;
166 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
168 event.bytes = 0;
169 event.fields.v = 1;
170 event.fields.type = EVENTTYPE_EXCEPTION;
171 event.fields.vector = trap;
172 event.fields.ev = ev;
173 event.fields.errorcode = error_code;
175 ASSERT(vmcb->eventinj.fields.v == 0);
177 vmcb->eventinj = event;
178 }
180 void stop_svm(void)
181 {
182 u32 eax, edx;
183 int cpu = smp_processor_id();
185 /* We turn off the EFER_SVME bit. */
186 rdmsr(MSR_EFER, eax, edx);
187 eax &= ~EFER_SVME;
188 wrmsr(MSR_EFER, eax, edx);
190 /* release the HSA */
191 free_host_save_area( svm_globals[cpu].hsa );
192 free_host_save_area( svm_globals[cpu].scratch_hsa );
193 svm_globals[cpu].hsa = NULL;
194 svm_globals[cpu].hsa_pa = 0;
195 svm_globals[cpu].scratch_hsa = NULL;
196 svm_globals[cpu].scratch_hsa_pa = 0;
197 wrmsr(MSR_K8_VM_HSAVE_PA, 0, 0 );
199 printk("AMD SVM Extension is disabled.\n");
200 }
202 int svm_initialize_guest_resources(struct vcpu *v)
203 {
204 svm_final_setup_guest(v);
205 return 1;
206 }
208 static void svm_store_cpu_guest_regs(
209 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
210 {
211 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
213 if ( regs != NULL )
214 {
215 regs->eip = vmcb->rip;
216 regs->esp = vmcb->rsp;
217 regs->eflags = vmcb->rflags;
218 regs->cs = vmcb->cs.sel;
219 regs->ds = vmcb->ds.sel;
220 regs->es = vmcb->es.sel;
221 regs->ss = vmcb->ss.sel;
222 regs->gs = vmcb->gs.sel;
223 regs->fs = vmcb->fs.sel;
224 }
226 if ( crs != NULL )
227 {
228 /* Returning the guest's regs */
229 crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
230 crs[3] = v->arch.hvm_svm.cpu_cr3;
231 crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
232 }
233 }
235 static void svm_load_cpu_guest_regs(
236 struct vcpu *v, struct cpu_user_regs *regs)
237 {
238 svm_load_cpu_user_regs(v, regs);
239 }
241 #define IS_CANO_ADDRESS(add) 1
243 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
244 {
245 u64 msr_content = 0;
246 struct vcpu *vc = current;
247 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
249 switch (regs->ecx)
250 {
251 case MSR_EFER:
252 msr_content = vmcb->efer;
253 msr_content &= ~EFER_SVME;
254 break;
256 case MSR_FS_BASE:
257 msr_content = vmcb->fs.base;
258 break;
260 case MSR_GS_BASE:
261 msr_content = vmcb->gs.base;
262 break;
264 case MSR_SHADOW_GS_BASE:
265 msr_content = vmcb->kerngsbase;
266 break;
268 case MSR_STAR:
269 msr_content = vmcb->star;
270 break;
272 case MSR_LSTAR:
273 msr_content = vmcb->lstar;
274 break;
276 case MSR_CSTAR:
277 msr_content = vmcb->cstar;
278 break;
280 case MSR_SYSCALL_MASK:
281 msr_content = vmcb->sfmask;
282 break;
284 default:
285 return 0;
286 }
288 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n",
289 msr_content);
291 regs->eax = msr_content & 0xffffffff;
292 regs->edx = msr_content >> 32;
293 return 1;
294 }
296 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
297 {
298 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
299 struct vcpu *vc = current;
300 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
302 HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx "
303 "msr_content %"PRIx64"\n",
304 (unsigned long)regs->ecx, msr_content);
306 switch (regs->ecx)
307 {
308 case MSR_EFER:
309 #ifdef __x86_64__
310 /* offending reserved bit will cause #GP */
311 if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
312 {
313 printk("trying to set reserved bit in EFER\n");
314 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
315 return 0;
316 }
318 /* LME: 0 -> 1 */
319 if ( msr_content & EFER_LME &&
320 !test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state) )
321 {
322 if ( svm_paging_enabled(vc) ||
323 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
324 &vc->arch.hvm_svm.cpu_state) )
325 {
326 printk("trying to set LME bit when "
327 "in paging mode or PAE bit is not set\n");
328 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
329 return 0;
330 }
331 set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);
332 }
334 /* We have already recorded that we want LME, so it will be set
335 * next time CR0 gets updated. So we clear that bit and continue.
336 */
337 if ((msr_content ^ vmcb->efer) & EFER_LME)
338 msr_content &= ~EFER_LME;
339 /* No update for LME/LMA since it have no effect */
340 #endif
341 vmcb->efer = msr_content | EFER_SVME;
342 break;
344 case MSR_FS_BASE:
345 case MSR_GS_BASE:
346 if (!(SVM_LONG_GUEST(vc)))
347 domain_crash_synchronous();
349 if (!IS_CANO_ADDRESS(msr_content))
350 {
351 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
352 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
353 }
355 if (regs->ecx == MSR_FS_BASE)
356 vmcb->fs.base = msr_content;
357 else
358 vmcb->gs.base = msr_content;
359 break;
361 case MSR_SHADOW_GS_BASE:
362 vmcb->kerngsbase = msr_content;
363 break;
365 case MSR_STAR:
366 vmcb->star = msr_content;
367 break;
369 case MSR_LSTAR:
370 vmcb->lstar = msr_content;
371 break;
373 case MSR_CSTAR:
374 vmcb->cstar = msr_content;
375 break;
377 case MSR_SYSCALL_MASK:
378 vmcb->sfmask = msr_content;
379 break;
381 default:
382 return 0;
383 }
384 return 1;
385 }
387 int svm_realmode(struct vcpu *v)
388 {
389 unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
390 unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
392 return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
393 }
395 int svm_instruction_length(struct vcpu *v)
396 {
397 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
398 unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode;
399 /* check which operating mode the guest is running */
400 if( vmcb->efer & EFER_LMA )
401 mode = vmcb->cs.attributes.fields.l ? 8 : 4;
402 else
403 mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
404 return svm_instrlen(guest_cpu_user_regs(), mode);
405 }
407 unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
408 {
409 switch ( num )
410 {
411 case 0:
412 return v->arch.hvm_svm.cpu_shadow_cr0;
413 case 2:
414 return v->arch.hvm_svm.cpu_cr2;
415 case 3:
416 return v->arch.hvm_svm.cpu_cr3;
417 default:
418 BUG();
419 }
420 return 0; /* dummy */
421 }
424 /* SVM-specific intitialization code for VCPU application processors */
425 void svm_init_ap_context(struct vcpu_guest_context *ctxt,
426 int vcpuid, int trampoline_vector)
427 {
428 int i;
429 struct vcpu *v, *bsp = current;
430 struct domain *d = bsp->domain;
431 cpu_user_regs_t *regs;;
434 if ((v = d->vcpu[vcpuid]) == NULL)
435 {
436 printk("vcpuid %d is invalid! good-bye.\n", vcpuid);
437 domain_crash_synchronous();
438 }
439 regs = &v->arch.guest_context.user_regs;
441 memset(ctxt, 0, sizeof(*ctxt));
442 for (i = 0; i < 256; ++i)
443 {
444 ctxt->trap_ctxt[i].vector = i;
445 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
446 }
449 /*
450 * We execute the trampoline code in real mode. The trampoline vector
451 * passed to us is page alligned and is the physicall frame number for
452 * the code. We will execute this code in real mode.
453 */
454 ctxt->user_regs.eip = 0x0;
455 ctxt->user_regs.cs = (trampoline_vector << 8);
456 ctxt->flags = VGCF_HVM_GUEST;
457 }
459 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
460 {
461 char *p;
462 int i;
464 memset(hypercall_page, 0, PAGE_SIZE);
466 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
467 {
468 p = (char *)(hypercall_page + (i * 32));
469 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
470 *(u32 *)(p + 1) = i;
471 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
472 *(u8 *)(p + 6) = 0x01;
473 *(u8 *)(p + 7) = 0xd9;
474 *(u8 *)(p + 8) = 0xc3; /* ret */
475 }
477 /* Don't support HYPERVISOR_iret at the moment */
478 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
479 }
481 int start_svm(void)
482 {
483 u32 eax, ecx, edx;
484 u32 phys_hsa_lo, phys_hsa_hi;
485 u64 phys_hsa;
486 int cpu = smp_processor_id();
488 /* Xen does not fill x86_capability words except 0. */
489 ecx = cpuid_ecx(0x80000001);
490 boot_cpu_data.x86_capability[5] = ecx;
492 if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
493 return 0;
494 svm_globals[cpu].hsa = alloc_host_save_area();
495 if (! svm_globals[cpu].hsa)
496 return 0;
498 rdmsr(MSR_EFER, eax, edx);
499 eax |= EFER_SVME;
500 wrmsr(MSR_EFER, eax, edx);
501 asidpool_init( cpu );
502 printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
504 /* Initialize the HSA for this core */
505 phys_hsa = (u64) virt_to_maddr( svm_globals[cpu].hsa );
506 phys_hsa_lo = (u32) phys_hsa;
507 phys_hsa_hi = (u32) (phys_hsa >> 32);
508 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
509 svm_globals[cpu].hsa_pa = phys_hsa;
511 svm_globals[cpu].scratch_hsa = alloc_host_save_area();
512 svm_globals[cpu].scratch_hsa_pa = (u64)virt_to_maddr( svm_globals[cpu].scratch_hsa );
514 /* Setup HVM interfaces */
515 hvm_funcs.disable = stop_svm;
517 hvm_funcs.initialize_guest_resources = svm_initialize_guest_resources;
518 hvm_funcs.relinquish_guest_resources = svm_relinquish_guest_resources;
520 hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
521 hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
523 hvm_funcs.realmode = svm_realmode;
524 hvm_funcs.paging_enabled = svm_paging_enabled;
525 hvm_funcs.instruction_length = svm_instruction_length;
526 hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
527 hvm_funcs.init_ap_context = svm_init_ap_context;
529 hvm_funcs.init_hypercall_page = svm_init_hypercall_page;
531 hvm_enabled = 1;
533 return 1;
534 }
536 int svm_dbg_on = 0;
538 static inline int svm_do_debugout(unsigned long exit_code)
539 {
540 int i;
542 static unsigned long counter = 0;
543 static unsigned long works[] =
544 {
545 VMEXIT_IOIO,
546 VMEXIT_HLT,
547 VMEXIT_CPUID,
548 VMEXIT_DR0_READ,
549 VMEXIT_DR1_READ,
550 VMEXIT_DR2_READ,
551 VMEXIT_DR3_READ,
552 VMEXIT_DR6_READ,
553 VMEXIT_DR7_READ,
554 VMEXIT_DR0_WRITE,
555 VMEXIT_DR1_WRITE,
556 VMEXIT_DR2_WRITE,
557 VMEXIT_DR3_WRITE,
558 VMEXIT_CR0_READ,
559 VMEXIT_CR0_WRITE,
560 VMEXIT_CR3_READ,
561 VMEXIT_CR4_READ,
562 VMEXIT_MSR,
563 VMEXIT_CR0_WRITE,
564 VMEXIT_CR3_WRITE,
565 VMEXIT_CR4_WRITE,
566 VMEXIT_EXCEPTION_PF,
567 VMEXIT_INTR,
568 VMEXIT_INVLPG,
569 VMEXIT_EXCEPTION_NM
570 };
573 #if 0
574 if (svm_dbg_on && exit_code != 0x7B)
575 return 1;
576 #endif
578 counter++;
580 #if 0
581 if ((exit_code == 0x4E
582 || exit_code == VMEXIT_CR0_READ
583 || exit_code == VMEXIT_CR0_WRITE)
584 && counter < 200000)
585 return 0;
587 if ((exit_code == 0x4E) && counter < 500000)
588 return 0;
589 #endif
591 for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
592 if (exit_code == works[i])
593 return 0;
595 return 1;
596 }
598 void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
599 {
600 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
602 ASSERT(vmcb);
604 ctxt->eax = vmcb->rax;
605 ctxt->ss = vmcb->ss.sel;
606 ctxt->esp = vmcb->rsp;
607 ctxt->eflags = vmcb->rflags;
608 ctxt->cs = vmcb->cs.sel;
609 ctxt->eip = vmcb->rip;
611 ctxt->gs = vmcb->gs.sel;
612 ctxt->fs = vmcb->fs.sel;
613 ctxt->es = vmcb->es.sel;
614 ctxt->ds = vmcb->ds.sel;
615 }
617 void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
618 {
619 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
621 regs->eip = vmcb->rip;
622 regs->esp = vmcb->rsp;
623 regs->eflags = vmcb->rflags;
624 regs->cs = vmcb->cs.sel;
625 regs->ds = vmcb->ds.sel;
626 regs->es = vmcb->es.sel;
627 regs->ss = vmcb->ss.sel;
628 }
630 /* XXX Use svm_load_cpu_guest_regs instead */
631 void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
632 {
633 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
634 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
636 /* Write the guest register value into VMCB */
637 vmcb->rax = regs->eax;
638 vmcb->ss.sel = regs->ss;
639 vmcb->rsp = regs->esp;
640 vmcb->rflags = regs->eflags;
641 vmcb->cs.sel = regs->cs;
642 vmcb->rip = regs->eip;
643 if (regs->eflags & EF_TF)
644 *intercepts |= EXCEPTION_BITMAP_DB;
645 else
646 *intercepts &= ~EXCEPTION_BITMAP_DB;
647 }
649 int svm_paging_enabled(struct vcpu *v)
650 {
651 unsigned long cr0;
653 cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
655 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
656 }
659 /* Make sure that xen intercepts any FP accesses from current */
660 void svm_stts(struct vcpu *v)
661 {
662 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
664 /* FPU state already dirty? Then no need to setup_fpu() lazily. */
665 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
666 return;
668 /*
669 * If the guest does not have TS enabled then we must cause and handle an
670 * exception on first use of the FPU. If the guest *does* have TS enabled
671 * then this is not necessary: no FPU activity can occur until the guest
672 * clears CR0.TS, and we will initialise the FPU when that happens.
673 */
674 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
675 {
676 v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
677 vmcb->cr0 |= X86_CR0_TS;
678 }
679 }
681 static void arch_svm_do_launch(struct vcpu *v)
682 {
683 cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
684 int error;
686 #if 0
687 if (svm_dbg_on)
688 printk("Do launch\n");
689 #endif
690 error = construct_vmcb(&v->arch.hvm_svm, regs);
691 if ( error < 0 )
692 {
693 if (v->vcpu_id == 0) {
694 printk("Failed to construct a new VMCB for BSP.\n");
695 } else {
696 printk("Failed to construct a new VMCB for AP %d\n", v->vcpu_id);
697 }
698 domain_crash_synchronous();
699 }
701 svm_do_launch(v);
702 #if 0
703 if (svm_dbg_on)
704 svm_dump_host_regs(__func__);
705 #endif
706 if (v->vcpu_id != 0)
707 {
708 u16 cs_sel = regs->cs;
709 /*
710 * This is the launch of an AP; set state so that we begin executing
711 * the trampoline code in real-mode.
712 */
713 svm_do_vmmcall_reset_to_realmode(v, regs);
714 /* Adjust the state to execute the trampoline code.*/
715 v->arch.hvm_svm.vmcb->rip = 0;
716 v->arch.hvm_svm.vmcb->cs.sel= cs_sel;
717 v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4);
718 }
720 reset_stack_and_jump(svm_asm_do_launch);
721 }
723 static void svm_freeze_time(struct vcpu *v)
724 {
725 struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
727 if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
728 v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
729 stop_timer(&(pt->timer));
730 }
731 }
733 static void svm_ctxt_switch_from(struct vcpu *v)
734 {
735 svm_freeze_time(v);
736 }
738 static void svm_ctxt_switch_to(struct vcpu *v)
739 {
740 #if __x86_64__
741 /*
742 * This is required, because VMRUN does consistency check
743 * and some of the DOM0 selectors are pointing to
744 * invalid GDT locations, and cause AMD processors
745 * to shutdown.
746 */
747 set_segment_register(ds, 0);
748 set_segment_register(es, 0);
749 set_segment_register(ss, 0);
750 #endif
751 }
753 void svm_final_setup_guest(struct vcpu *v)
754 {
755 struct domain *d = v->domain;
756 struct vcpu *vc;
758 v->arch.schedule_tail = arch_svm_do_launch;
759 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
760 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
762 if ( v != d->vcpu[0] )
763 return;
765 /* Initialize monitor page table */
766 for_each_vcpu( d, vc )
767 vc->arch.monitor_table = pagetable_null();
769 /*
770 * Required to do this once per domain
771 * TODO: add a seperate function to do these.
772 */
773 memset(&d->shared_info->evtchn_mask[0], 0xff,
774 sizeof(d->shared_info->evtchn_mask));
776 /*
777 * Put the domain in shadow mode even though we're going to be using
778 * the shared 1:1 page table initially. It shouldn't hurt
779 */
780 shadow_mode_enable(d,
781 SHM_enable|SHM_refcounts|
782 SHM_translate|SHM_external|SHM_wr_pt_pte);
783 }
786 static void svm_relinquish_guest_resources(struct domain *d)
787 {
788 extern void destroy_vmcb(struct arch_svm_struct *); /* XXX */
789 struct vcpu *v;
791 for_each_vcpu ( d, v )
792 {
793 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
794 continue;
796 destroy_vmcb(&v->arch.hvm_svm);
797 free_monitor_pagetable(v);
798 kill_timer(&v->arch.hvm_svm.hlt_timer);
799 if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) )
800 {
801 kill_timer( &(VLAPIC(v)->vlapic_timer) );
802 xfree(VLAPIC(v));
803 }
804 }
806 kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
808 if ( d->arch.hvm_domain.shared_page_va )
809 unmap_domain_page_global(
810 (void *)d->arch.hvm_domain.shared_page_va);
812 shadow_direct_map_clean(d);
813 }
816 void arch_svm_do_resume(struct vcpu *v)
817 {
818 /* pinning VCPU to a different core? */
819 if ( v->arch.hvm_svm.launch_core == smp_processor_id()) {
820 svm_do_resume( v );
821 reset_stack_and_jump( svm_asm_do_resume );
822 }
823 else {
824 if (svm_dbg_on)
825 printk("VCPU core pinned: %d to %d\n",
826 v->arch.hvm_svm.launch_core, smp_processor_id() );
827 v->arch.hvm_svm.launch_core = smp_processor_id();
828 svm_migrate_timers( v );
829 svm_do_resume( v );
830 reset_stack_and_jump( svm_asm_do_resume );
831 }
832 }
835 void svm_migrate_timers(struct vcpu *v)
836 {
837 struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
839 if ( pt->enabled ) {
840 migrate_timer( &pt->timer, v->processor );
841 migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor );
842 }
843 if ( hvm_apic_support(v->domain) && VLAPIC( v ))
844 migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
845 }
848 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
849 {
850 struct vcpu *v = current;
851 unsigned long eip;
852 unsigned long gpa; /* FIXME: PAE */
853 int result;
854 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
856 ASSERT(vmcb);
858 //#if HVM_DEBUG
859 eip = vmcb->rip;
860 HVM_DBG_LOG(DBG_LEVEL_VMMU,
861 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
862 va, eip, (unsigned long)regs->error_code);
863 //#endif
865 if ( !svm_paging_enabled(v) )
866 {
867 if ( shadow_direct_map_fault(va, regs) )
868 return 1;
870 handle_mmio(va, va);
871 return 1;
872 }
875 gpa = gva_to_gpa(va);
877 /* Use 1:1 page table to identify MMIO address space */
878 if (mmio_space(gpa))
879 {
880 /* No support for APIC */
881 if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
882 {
883 int inst_len;
884 inst_len = svm_instruction_length(v);
885 if (inst_len == -1)
886 {
887 printf("%s: INST_LEN - Unable to decode properly.\n", __func__);
888 domain_crash_synchronous();
889 }
891 __update_guest_eip(vmcb, inst_len);
893 return 1;
894 }
896 handle_mmio(va, gpa);
898 return 1;
899 }
901 result = shadow_fault(va, regs);
903 if( result ) {
904 /* Let's make sure that the Guest TLB is flushed */
905 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
906 }
908 return result;
909 }
912 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
913 {
914 struct vcpu *v = current;
916 setup_fpu(v);
917 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
919 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
920 vmcb->cr0 &= ~X86_CR0_TS;
921 }
924 static void svm_do_general_protection_fault(struct vcpu *v,
925 struct cpu_user_regs *regs)
926 {
927 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
928 unsigned long eip, error_code;
930 ASSERT(vmcb);
932 eip = vmcb->rip;
933 error_code = vmcb->exitinfo1;
935 if (vmcb->idtr.limit == 0) {
936 printf("Huh? We got a GP Fault with an invalid IDTR!\n");
937 svm_dump_vmcb(__func__, vmcb);
938 svm_dump_regs(__func__, regs);
939 svm_dump_inst(vmcb->rip);
940 __hvm_bug(regs);
941 }
943 HVM_DBG_LOG(DBG_LEVEL_1,
944 "svm_general_protection_fault: eip = %lx, erro_code = %lx",
945 eip, error_code);
947 HVM_DBG_LOG(DBG_LEVEL_1,
948 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
949 (unsigned long)regs->eax, (unsigned long)regs->ebx,
950 (unsigned long)regs->ecx, (unsigned long)regs->edx,
951 (unsigned long)regs->esi, (unsigned long)regs->edi);
953 /* Reflect it back into the guest */
954 svm_inject_exception(v, TRAP_gp_fault, 1, error_code);
955 }
957 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
958 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
959 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
960 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
962 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input,
963 struct cpu_user_regs *regs)
964 {
965 unsigned int eax, ebx, ecx, edx;
966 unsigned long eip;
967 struct vcpu *v = current;
968 int inst_len;
970 ASSERT(vmcb);
972 eip = vmcb->rip;
974 HVM_DBG_LOG(DBG_LEVEL_1,
975 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
976 " (esi) %lx, (edi) %lx",
977 (unsigned long)regs->eax, (unsigned long)regs->ebx,
978 (unsigned long)regs->ecx, (unsigned long)regs->edx,
979 (unsigned long)regs->esi, (unsigned long)regs->edi);
981 cpuid(input, &eax, &ebx, &ecx, &edx);
983 if (input == 0x00000001)
984 {
985 if ( !hvm_apic_support(v->domain) ||
986 !vlapic_global_enabled((VLAPIC(v))) )
987 {
988 /* Since the apic is disabled, avoid any confusion
989 about SMP cpus being available */
990 clear_bit(X86_FEATURE_APIC, &edx);
991 }
993 #if CONFIG_PAGING_LEVELS < 3
994 clear_bit(X86_FEATURE_PAE, &edx);
995 clear_bit(X86_FEATURE_PSE, &edx);
996 clear_bit(X86_FEATURE_PSE36, &edx);
997 #else
998 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
999 {
1000 if ( !v->domain->arch.hvm_domain.pae_enabled )
1001 clear_bit(X86_FEATURE_PAE, &edx);
1002 clear_bit(X86_FEATURE_PSE, &edx);
1003 clear_bit(X86_FEATURE_PSE36, &edx);
1005 #endif
1006 /* Clear out reserved bits. */
1007 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
1008 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
1010 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
1012 /* Guest should only see one logical processor.
1013 * See details on page 23 of AMD CPUID Specification.
1014 */
1015 clear_bit(X86_FEATURE_HT, &edx); /* clear the hyperthread bit */
1016 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
1017 ebx |= 0x00010000; /* set to 1 just for precaution */
1019 /* Disable machine check architecture */
1020 clear_bit(X86_FEATURE_MCA, &edx);
1021 clear_bit(X86_FEATURE_MCE, &edx);
1023 else if ( (input > 0x00000005) && (input < 0x80000000) )
1025 if ( !cpuid_hypervisor_leaves(input, &eax, &ebx, &ecx, &edx) )
1026 eax = ebx = ecx = edx = 0;
1028 else if ( input == 0x80000001 )
1030 /* We duplicate some CPUID_00000001 code because many bits of
1031 CPUID_80000001_EDX overlaps with CPUID_00000001_EDX. */
1033 if ( !hvm_apic_support(v->domain) ||
1034 !vlapic_global_enabled((VLAPIC(v))) )
1036 /* Since the apic is disabled, avoid any confusion
1037 about SMP cpus being available */
1038 clear_bit(X86_FEATURE_APIC, &edx);
1041 /* Clear the Cmp_Legacy bit
1042 * This bit is supposed to be zero when HTT = 0.
1043 * See details on page 23 of AMD CPUID Specification.
1044 */
1045 clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx);
1047 #ifdef __i386__
1048 /* Mask feature for Intel ia32e or AMD long mode. */
1049 clear_bit(X86_FEATURE_LAHF_LM & 31, &ecx);
1051 clear_bit(X86_FEATURE_LM & 31, &edx);
1052 clear_bit(X86_FEATURE_SYSCALL & 31, &edx);
1053 #endif
1055 #if CONFIG_PAGING_LEVELS < 3
1056 clear_bit(X86_FEATURE_NX & 31, &edx);
1057 clear_bit(X86_FEATURE_PAE, &edx);
1058 clear_bit(X86_FEATURE_PSE, &edx);
1059 clear_bit(X86_FEATURE_PSE36, &edx);
1060 #else
1061 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
1063 if ( !v->domain->arch.hvm_domain.pae_enabled )
1065 clear_bit(X86_FEATURE_NX & 31, &edx);
1066 clear_bit(X86_FEATURE_PAE, &edx);
1068 clear_bit(X86_FEATURE_PSE, &edx);
1069 clear_bit(X86_FEATURE_PSE36, &edx);
1071 #endif
1073 /* Make SVM feature invisible to the guest. */
1074 clear_bit(X86_FEATURE_SVME & 31, &ecx);
1076 /* So far, we do not support 3DNow for the guest. */
1077 clear_bit(X86_FEATURE_3DNOW & 31, &edx);
1078 clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx);
1080 else if ( ( input == 0x80000007 ) || ( input == 0x8000000A ) )
1082 /* Mask out features of power management and SVM extension. */
1083 eax = ebx = ecx = edx = 0;
1085 else if ( input == 0x80000008 )
1087 ecx &= 0xFFFFFF00; /* Make sure Number of CPU core is 1 when HTT=0 */
1090 regs->eax = (unsigned long)eax;
1091 regs->ebx = (unsigned long)ebx;
1092 regs->ecx = (unsigned long)ecx;
1093 regs->edx = (unsigned long)edx;
1095 HVM_DBG_LOG(DBG_LEVEL_1,
1096 "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
1097 "ebx=%x, ecx=%x, edx=%x",
1098 eip, input, eax, ebx, ecx, edx);
1100 inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
1101 ASSERT(inst_len > 0);
1102 __update_guest_eip(vmcb, inst_len);
1106 static inline unsigned long *get_reg_p(unsigned int gpreg,
1107 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1109 unsigned long *reg_p = NULL;
1110 switch (gpreg)
1112 case SVM_REG_EAX:
1113 reg_p = (unsigned long *)&regs->eax;
1114 break;
1115 case SVM_REG_EBX:
1116 reg_p = (unsigned long *)&regs->ebx;
1117 break;
1118 case SVM_REG_ECX:
1119 reg_p = (unsigned long *)&regs->ecx;
1120 break;
1121 case SVM_REG_EDX:
1122 reg_p = (unsigned long *)&regs->edx;
1123 break;
1124 case SVM_REG_EDI:
1125 reg_p = (unsigned long *)&regs->edi;
1126 break;
1127 case SVM_REG_ESI:
1128 reg_p = (unsigned long *)&regs->esi;
1129 break;
1130 case SVM_REG_EBP:
1131 reg_p = (unsigned long *)&regs->ebp;
1132 break;
1133 case SVM_REG_ESP:
1134 reg_p = (unsigned long *)&vmcb->rsp;
1135 break;
1136 #if __x86_64__
1137 case SVM_REG_R8:
1138 reg_p = (unsigned long *)&regs->r8;
1139 break;
1140 case SVM_REG_R9:
1141 reg_p = (unsigned long *)&regs->r9;
1142 break;
1143 case SVM_REG_R10:
1144 reg_p = (unsigned long *)&regs->r10;
1145 break;
1146 case SVM_REG_R11:
1147 reg_p = (unsigned long *)&regs->r11;
1148 break;
1149 case SVM_REG_R12:
1150 reg_p = (unsigned long *)&regs->r12;
1151 break;
1152 case SVM_REG_R13:
1153 reg_p = (unsigned long *)&regs->r13;
1154 break;
1155 case SVM_REG_R14:
1156 reg_p = (unsigned long *)&regs->r14;
1157 break;
1158 case SVM_REG_R15:
1159 reg_p = (unsigned long *)&regs->r15;
1160 break;
1161 #endif
1162 default:
1163 BUG();
1166 return reg_p;
1170 static inline unsigned long get_reg(unsigned int gpreg,
1171 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1173 unsigned long *gp;
1174 gp = get_reg_p(gpreg, regs, vmcb);
1175 return *gp;
1179 static inline void set_reg(unsigned int gpreg, unsigned long value,
1180 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1182 unsigned long *gp;
1183 gp = get_reg_p(gpreg, regs, vmcb);
1184 *gp = value;
1188 static void svm_dr_access (struct vcpu *v, unsigned int reg, unsigned int type,
1189 struct cpu_user_regs *regs)
1191 unsigned long *reg_p = 0;
1192 unsigned int gpreg = 0;
1193 unsigned long eip;
1194 int inst_len;
1195 int index;
1196 struct vmcb_struct *vmcb;
1197 u8 buffer[MAX_INST_LEN];
1198 u8 prefix = 0;
1200 vmcb = v->arch.hvm_svm.vmcb;
1202 ASSERT(vmcb);
1204 eip = vmcb->rip;
1205 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1206 index = skip_prefix_bytes(buffer, sizeof(buffer));
1208 ASSERT(buffer[index+0] == 0x0f && (buffer[index+1] & 0xFD) == 0x21);
1210 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1211 prefix = buffer[index-1];
1213 gpreg = decode_src_reg(prefix, buffer[index + 2]);
1214 ASSERT(reg == decode_dest_reg(prefix, buffer[index + 2]));
1216 HVM_DBG_LOG(DBG_LEVEL_1, "svm_dr_access : eip=%lx, reg=%d, gpreg = %x",
1217 eip, reg, gpreg);
1219 reg_p = get_reg_p(gpreg, regs, vmcb);
1221 switch (type)
1223 case TYPE_MOV_TO_DR:
1224 inst_len = __get_instruction_length(vmcb, INSTR_MOV2DR, buffer);
1225 v->arch.guest_context.debugreg[reg] = *reg_p;
1226 break;
1227 case TYPE_MOV_FROM_DR:
1228 inst_len = __get_instruction_length(vmcb, INSTR_MOVDR2, buffer);
1229 *reg_p = v->arch.guest_context.debugreg[reg];
1230 break;
1231 default:
1232 __hvm_bug(regs);
1233 break;
1235 ASSERT(inst_len > 0);
1236 __update_guest_eip(vmcb, inst_len);
1240 static void svm_get_prefix_info(
1241 struct vmcb_struct *vmcb,
1242 unsigned int dir, segment_selector_t **seg, unsigned int *asize)
1244 unsigned char inst[MAX_INST_LEN];
1245 int i;
1247 memset(inst, 0, MAX_INST_LEN);
1248 if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
1249 != MAX_INST_LEN)
1251 printk("%s: get guest instruction failed\n", __func__);
1252 domain_crash_synchronous();
1255 for (i = 0; i < MAX_INST_LEN; i++)
1257 switch (inst[i])
1259 case 0xf3: /* REPZ */
1260 case 0xf2: /* REPNZ */
1261 case 0xf0: /* LOCK */
1262 case 0x66: /* data32 */
1263 #if __x86_64__
1264 /* REX prefixes */
1265 case 0x40:
1266 case 0x41:
1267 case 0x42:
1268 case 0x43:
1269 case 0x44:
1270 case 0x45:
1271 case 0x46:
1272 case 0x47:
1274 case 0x48:
1275 case 0x49:
1276 case 0x4a:
1277 case 0x4b:
1278 case 0x4c:
1279 case 0x4d:
1280 case 0x4e:
1281 case 0x4f:
1282 #endif
1283 continue;
1284 case 0x67: /* addr32 */
1285 *asize ^= 48; /* Switch 16/32 bits */
1286 continue;
1287 case 0x2e: /* CS */
1288 *seg = &vmcb->cs;
1289 continue;
1290 case 0x36: /* SS */
1291 *seg = &vmcb->ss;
1292 continue;
1293 case 0x26: /* ES */
1294 *seg = &vmcb->es;
1295 continue;
1296 case 0x64: /* FS */
1297 *seg = &vmcb->fs;
1298 continue;
1299 case 0x65: /* GS */
1300 *seg = &vmcb->gs;
1301 continue;
1302 case 0x3e: /* DS */
1303 *seg = &vmcb->ds;
1304 continue;
1305 default:
1306 break;
1308 return;
1313 /* Get the address of INS/OUTS instruction */
1314 static inline int svm_get_io_address(
1315 struct vcpu *v,
1316 struct cpu_user_regs *regs, unsigned int dir,
1317 unsigned long *count, unsigned long *addr)
1319 unsigned long reg;
1320 unsigned int asize = 0;
1321 unsigned int isize;
1322 int long_mode;
1323 ioio_info_t info;
1324 segment_selector_t *seg = NULL;
1325 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1327 info.bytes = vmcb->exitinfo1;
1329 /* If we're in long mode, we shouldn't check the segment presence and limit */
1330 long_mode = vmcb->cs.attributes.fields.l && vmcb->efer & EFER_LMA;
1332 /* d field of cs.attributes is 1 for 32-bit, 0 for 16 or 64 bit.
1333 * l field combined with EFER_LMA -> longmode says whether it's 16 or 64 bit.
1334 */
1335 asize = (long_mode)?64:((vmcb->cs.attributes.fields.db)?32:16);
1338 /* The ins/outs instructions are single byte, so if we have got more
1339 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1340 * to figure out what it is...
1341 */
1342 isize = vmcb->exitinfo2 - vmcb->rip;
1344 if (info.fields.rep)
1345 isize --;
1347 if (isize > 1)
1349 svm_get_prefix_info(vmcb, dir, &seg, &asize);
1352 ASSERT(dir == IOREQ_READ || dir == IOREQ_WRITE);
1354 if (dir == IOREQ_WRITE)
1356 reg = regs->esi;
1357 if (!seg) /* If no prefix, used DS. */
1358 seg = &vmcb->ds;
1360 else
1362 reg = regs->edi;
1363 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1366 /* If the segment isn't present, give GP fault! */
1367 if (!long_mode && !seg->attributes.fields.p)
1369 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1370 return 0;
1373 if (asize == 16)
1375 *addr = (reg & 0xFFFF);
1376 *count = regs->ecx & 0xffff;
1378 else
1380 *addr = reg;
1381 *count = regs->ecx;
1384 if (!long_mode) {
1385 if (*addr > seg->limit)
1387 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1388 return 0;
1390 else
1392 *addr += seg->base;
1397 return 1;
1401 static void svm_io_instruction(struct vcpu *v)
1403 struct cpu_user_regs *regs;
1404 struct hvm_io_op *pio_opp;
1405 unsigned int port;
1406 unsigned int size, dir;
1407 ioio_info_t info;
1408 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1410 ASSERT(vmcb);
1411 pio_opp = &current->arch.hvm_vcpu.io_op;
1412 pio_opp->instr = INSTR_PIO;
1413 pio_opp->flags = 0;
1415 regs = &pio_opp->io_context;
1417 /* Copy current guest state into io instruction state structure. */
1418 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1420 info.bytes = vmcb->exitinfo1;
1422 port = info.fields.port; /* port used to be addr */
1423 dir = info.fields.type; /* direction */
1424 if (info.fields.sz32)
1425 size = 4;
1426 else if (info.fields.sz16)
1427 size = 2;
1428 else
1429 size = 1;
1431 HVM_DBG_LOG(DBG_LEVEL_IO,
1432 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1433 "exit_qualification = %"PRIx64,
1434 port, vmcb->cs.sel, vmcb->rip, info.bytes);
1436 /* string instruction */
1437 if (info.fields.str)
1439 unsigned long addr, count;
1440 int sign = regs->eflags & EF_DF ? -1 : 1;
1442 if (!svm_get_io_address(v, regs, dir, &count, &addr))
1444 /* We failed to get a valid address, so don't do the IO operation -
1445 * it would just get worse if we do! Hopefully the guest is handing
1446 * gp-faults...
1447 */
1448 return;
1451 /* "rep" prefix */
1452 if (info.fields.rep)
1454 pio_opp->flags |= REPZ;
1456 else
1458 count = 1;
1461 /*
1462 * Handle string pio instructions that cross pages or that
1463 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1464 */
1465 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1467 unsigned long value = 0;
1469 pio_opp->flags |= OVERLAP;
1471 if (dir == IOREQ_WRITE)
1472 hvm_copy(&value, addr, size, HVM_COPY_IN);
1474 send_pio_req(regs, port, 1, size, value, dir, 0);
1476 else
1478 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK))
1480 if (sign > 0)
1481 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1482 else
1483 count = (addr & ~PAGE_MASK) / size;
1485 else
1486 vmcb->rip = vmcb->exitinfo2;
1488 send_pio_req(regs, port, count, size, addr, dir, 1);
1491 else
1493 /*
1494 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1495 * ExitInfo2
1496 */
1497 vmcb->rip = vmcb->exitinfo2;
1499 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1500 hvm_print_line(v, regs->eax); /* guest debug output */
1502 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
1506 static int svm_set_cr0(unsigned long value)
1508 struct vcpu *v = current;
1509 unsigned long mfn;
1510 int paging_enabled;
1511 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1513 ASSERT(vmcb);
1515 /* We don't want to lose PG. ET is reserved and should be always be 1*/
1516 paging_enabled = svm_paging_enabled(v);
1517 value |= X86_CR0_ET;
1518 vmcb->cr0 = value | X86_CR0_PG;
1519 v->arch.hvm_svm.cpu_shadow_cr0 = value;
1521 /* TS cleared? Then initialise FPU now. */
1522 if ( !(value & X86_CR0_TS) )
1524 setup_fpu(v);
1525 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1528 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1530 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
1532 /* The guest CR3 must be pointing to the guest physical. */
1533 if (!VALID_MFN(mfn =
1534 get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))
1535 || !get_page(mfn_to_page(mfn), v->domain))
1537 printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3);
1538 domain_crash_synchronous(); /* need to take a clean path */
1541 #if defined(__x86_64__)
1542 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
1543 && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
1544 &v->arch.hvm_svm.cpu_state))
1546 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1547 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1550 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
1552 /* Here the PAE is should to be opened */
1553 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1554 set_bit(SVM_CPU_STATE_LMA_ENABLED,
1555 &v->arch.hvm_svm.cpu_state);
1556 vmcb->efer |= (EFER_LMA | EFER_LME);
1557 if (!shadow_set_guest_paging_levels(v->domain, PAGING_L4) )
1559 printk("Unsupported guest paging levels\n");
1560 domain_crash_synchronous(); /* need to take a clean path */
1563 else
1564 #endif /* __x86_64__ */
1566 #if CONFIG_PAGING_LEVELS >= 3
1567 /* seems it's a 32-bit or 32-bit PAE guest */
1568 if ( test_bit(SVM_CPU_STATE_PAE_ENABLED,
1569 &v->arch.hvm_svm.cpu_state) )
1571 /* The guest enables PAE first and then it enables PG, it is
1572 * really a PAE guest */
1573 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1575 printk("Unsupported guest paging levels\n");
1576 domain_crash_synchronous();
1579 else
1581 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) )
1583 printk("Unsupported guest paging levels\n");
1584 domain_crash_synchronous(); /* need to take a clean path */
1587 #endif
1590 /* Now arch.guest_table points to machine physical. */
1591 v->arch.guest_table = pagetable_from_pfn(mfn);
1592 update_pagetables(v);
1594 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1595 (unsigned long) (mfn << PAGE_SHIFT));
1597 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1598 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1600 /* arch->shadow_table should hold the next CR3 for shadow */
1601 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx\n",
1602 v->arch.hvm_svm.cpu_cr3, mfn);
1604 return 1;
1607 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1608 if ( v->arch.hvm_svm.cpu_cr3 ) {
1609 put_page(mfn_to_page(get_mfn_from_gpfn(
1610 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
1611 v->arch.guest_table = pagetable_null();
1614 /*
1615 * SVM implements paged real-mode and when we return to real-mode
1616 * we revert back to the physical mappings that the domain builder
1617 * created.
1618 */
1619 if ((value & X86_CR0_PE) == 0) {
1620 if (value & X86_CR0_PG) {
1621 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1622 return 0;
1625 clear_all_shadow_status( v->domain );
1626 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1627 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1629 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
1631 /* we should take care of this kind of situation */
1632 clear_all_shadow_status(v->domain);
1633 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1634 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1637 return 1;
1640 /*
1641 * Read from control registers. CR0 and CR4 are read from the shadow.
1642 */
1643 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1645 unsigned long value = 0;
1646 struct vcpu *v = current;
1647 struct vmcb_struct *vmcb;
1649 vmcb = v->arch.hvm_svm.vmcb;
1650 ASSERT(vmcb);
1652 switch (cr)
1654 case 0:
1655 value = v->arch.hvm_svm.cpu_shadow_cr0;
1656 if (svm_dbg_on)
1657 printk("CR0 read =%lx \n", value );
1658 break;
1659 case 2:
1660 value = vmcb->cr2;
1661 break;
1662 case 3:
1663 value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
1664 if (svm_dbg_on)
1665 printk("CR3 read =%lx \n", value );
1666 break;
1667 case 4:
1668 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
1669 if (svm_dbg_on)
1670 printk( "CR4 read=%lx\n", value );
1671 break;
1672 case 8:
1673 #if 0
1674 value = vmcb->m_cr8;
1675 #else
1676 ASSERT(0);
1677 #endif
1678 break;
1680 default:
1681 __hvm_bug(regs);
1684 set_reg(gp, value, regs, vmcb);
1686 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1690 static inline int svm_pgbit_test(struct vcpu *v)
1692 return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
1696 /*
1697 * Write to control registers
1698 */
1699 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1701 unsigned long value;
1702 unsigned long old_cr;
1703 struct vcpu *v = current;
1704 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1706 ASSERT(vmcb);
1708 value = get_reg(gpreg, regs, vmcb);
1710 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1711 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1713 switch (cr)
1715 case 0:
1716 if (svm_dbg_on)
1717 printk("CR0 write =%lx \n", value );
1718 return svm_set_cr0(value);
1720 case 3:
1722 unsigned long old_base_mfn, mfn;
1723 if (svm_dbg_on)
1724 printk("CR3 write =%lx \n", value );
1725 /* If paging is not enabled yet, simply copy the value to CR3. */
1726 if (!svm_paging_enabled(v)) {
1727 v->arch.hvm_svm.cpu_cr3 = value;
1728 break;
1730 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1732 /* We make a new one if the shadow does not exist. */
1733 if (value == v->arch.hvm_svm.cpu_cr3)
1735 /*
1736 * This is simple TLB flush, implying the guest has
1737 * removed some translation or changed page attributes.
1738 * We simply invalidate the shadow.
1739 */
1740 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1741 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1742 __hvm_bug(regs);
1743 shadow_sync_all(v->domain);
1745 else
1747 /*
1748 * If different, make a shadow. Check if the PDBR is valid
1749 * first.
1750 */
1751 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1752 if (((value >> PAGE_SHIFT) > v->domain->max_pages)
1753 || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT))
1754 || !get_page(mfn_to_page(mfn), v->domain))
1756 printk("Invalid CR3 value=%lx\n", value);
1757 domain_crash_synchronous(); /* need to take a clean path */
1760 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1761 v->arch.guest_table = pagetable_from_pfn(mfn);
1763 if (old_base_mfn)
1764 put_page(mfn_to_page(old_base_mfn));
1766 /*
1767 * arch.shadow_table should now hold the next CR3 for shadow
1768 */
1769 #if CONFIG_PAGING_LEVELS >= 3
1770 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 )
1771 shadow_sync_all(v->domain);
1772 #endif
1773 v->arch.hvm_svm.cpu_cr3 = value;
1774 update_pagetables(v);
1775 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1776 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1778 break;
1781 case 4: /* CR4 */
1783 if (svm_dbg_on)
1784 printk( "write cr4=%lx, cr0=%lx\n",
1785 value, v->arch.hvm_svm.cpu_shadow_cr0 );
1786 old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
1787 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1789 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1790 if ( svm_pgbit_test(v) )
1792 /* The guest is a 32-bit PAE guest. */
1793 #if CONFIG_PAGING_LEVELS >= 3
1794 unsigned long mfn, old_base_mfn;
1796 if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1798 printk("Unsupported guest paging levels\n");
1799 domain_crash_synchronous(); /* need to take a clean path */
1802 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1803 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) ||
1804 !get_page(mfn_to_page(mfn), v->domain) )
1806 printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3);
1807 domain_crash_synchronous(); /* need to take a clean path */
1810 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1811 if ( old_base_mfn )
1812 put_page(mfn_to_page(old_base_mfn));
1814 /*
1815 * Now arch.guest_table points to machine physical.
1816 */
1818 v->arch.guest_table = pagetable_from_pfn(mfn);
1819 update_pagetables(v);
1821 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1822 (unsigned long) (mfn << PAGE_SHIFT));
1824 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1826 /*
1827 * arch->shadow_table should hold the next CR3 for shadow
1828 */
1830 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1831 v->arch.hvm_svm.cpu_cr3, mfn);
1832 #endif
1834 else
1836 /* The guest is a 64 bit or 32-bit PAE guest. */
1837 #if CONFIG_PAGING_LEVELS >= 3
1838 if ( (v->domain->arch.ops != NULL) &&
1839 v->domain->arch.ops->guest_paging_levels == PAGING_L2)
1841 /* Seems the guest first enables PAE without enabling PG,
1842 * it must enable PG after that, and it is a 32-bit PAE
1843 * guest */
1845 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1847 printk("Unsupported guest paging levels\n");
1848 domain_crash_synchronous();
1851 else
1853 if ( !shadow_set_guest_paging_levels(v->domain,
1854 PAGING_L4) )
1856 printk("Unsupported guest paging levels\n");
1857 domain_crash_synchronous();
1860 #endif
1863 else if (value & X86_CR4_PAE) {
1864 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1865 } else {
1866 if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
1867 &v->arch.hvm_svm.cpu_state)) {
1868 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1870 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1873 v->arch.hvm_svm.cpu_shadow_cr4 = value;
1874 vmcb->cr4 = value | SVM_CR4_HOST_MASK;
1876 /*
1877 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1878 * all TLB entries except global entries.
1879 */
1880 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
1882 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1883 shadow_sync_all(v->domain);
1885 break;
1888 default:
1889 printk("invalid cr: %d\n", cr);
1890 __hvm_bug(regs);
1893 return 1;
1897 #define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
1900 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
1901 struct cpu_user_regs *regs)
1903 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1904 int inst_len = 0;
1905 int index;
1906 unsigned int gpreg;
1907 unsigned long value;
1908 u8 buffer[MAX_INST_LEN];
1909 u8 prefix = 0;
1910 int result = 1;
1911 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1912 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1913 enum instruction_index match;
1915 ASSERT(vmcb);
1917 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1918 /* get index to first actual instruction byte - as we will need to know where the
1919 * prefix lives later on
1920 */
1921 index = skip_prefix_bytes(buffer, sizeof(buffer));
1923 if (type == TYPE_MOV_TO_CR)
1925 inst_len = __get_instruction_length_from_list(vmcb, list_a,
1926 ARR_SIZE(list_a), &buffer[index], &match);
1928 else
1930 inst_len = __get_instruction_length_from_list(vmcb, list_b,
1931 ARR_SIZE(list_b), &buffer[index], &match);
1934 ASSERT(inst_len > 0);
1936 inst_len += index;
1938 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1939 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1940 prefix = buffer[index-1];
1942 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
1944 switch (match)
1946 case INSTR_MOV2CR:
1947 gpreg = decode_src_reg(prefix, buffer[index+2]);
1948 result = mov_to_cr(gpreg, cr, regs);
1949 break;
1951 case INSTR_MOVCR2:
1952 gpreg = decode_src_reg(prefix, buffer[index+2]);
1953 mov_from_cr(cr, gpreg, regs);
1954 break;
1956 case INSTR_CLTS:
1957 /* TS being cleared means that it's time to restore fpu state. */
1958 setup_fpu(current);
1959 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1960 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1961 v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
1962 break;
1964 case INSTR_LMSW:
1965 if (svm_dbg_on)
1966 svm_dump_inst(svm_rip2pointer(vmcb));
1968 gpreg = decode_src_reg(prefix, buffer[index+2]);
1969 value = get_reg(gpreg, regs, vmcb) & 0xF;
1971 if (svm_dbg_on)
1972 printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1973 inst_len);
1975 value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
1977 if (svm_dbg_on)
1978 printk("CR0-LMSW CR0 - New value=%lx\n", value);
1980 result = svm_set_cr0(value);
1981 break;
1983 case INSTR_SMSW:
1984 if (svm_dbg_on)
1985 svm_dump_inst(svm_rip2pointer(vmcb));
1986 value = v->arch.hvm_svm.cpu_shadow_cr0;
1987 gpreg = decode_src_reg(prefix, buffer[index+2]);
1988 set_reg(gpreg, value, regs, vmcb);
1990 if (svm_dbg_on)
1991 printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1992 inst_len);
1993 break;
1995 default:
1996 __hvm_bug(regs);
1997 break;
2000 ASSERT(inst_len);
2002 __update_guest_eip(vmcb, inst_len);
2004 return result;
2007 static inline void svm_do_msr_access(
2008 struct vcpu *v, struct cpu_user_regs *regs)
2010 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2011 int inst_len;
2012 u64 msr_content=0;
2013 u32 eax, edx;
2015 ASSERT(vmcb);
2017 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, "
2018 "exitinfo = %lx", (unsigned long)regs->ecx,
2019 (unsigned long)regs->eax, (unsigned long)regs->edx,
2020 (unsigned long)vmcb->exitinfo1);
2022 /* is it a read? */
2023 if (vmcb->exitinfo1 == 0)
2025 inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
2027 regs->edx = 0;
2028 switch (regs->ecx) {
2029 case MSR_IA32_TIME_STAMP_COUNTER:
2030 msr_content = hvm_get_guest_time(v);
2031 break;
2032 case MSR_IA32_SYSENTER_CS:
2033 msr_content = vmcb->sysenter_cs;
2034 break;
2035 case MSR_IA32_SYSENTER_ESP:
2036 msr_content = vmcb->sysenter_esp;
2037 break;
2038 case MSR_IA32_SYSENTER_EIP:
2039 msr_content = vmcb->sysenter_eip;
2040 break;
2041 case MSR_IA32_APICBASE:
2042 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
2043 break;
2044 default:
2045 if (long_mode_do_msr_read(regs))
2046 goto done;
2048 if ( rdmsr_hypervisor_regs(regs->ecx, &eax, &edx) )
2050 regs->eax = eax;
2051 regs->edx = edx;
2052 goto done;
2055 rdmsr_safe(regs->ecx, regs->eax, regs->edx);
2056 break;
2058 regs->eax = msr_content & 0xFFFFFFFF;
2059 regs->edx = msr_content >> 32;
2061 else
2063 inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
2064 msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
2066 switch (regs->ecx)
2068 case MSR_IA32_TIME_STAMP_COUNTER:
2069 svm_set_guest_time(v, msr_content);
2070 break;
2071 case MSR_IA32_SYSENTER_CS:
2072 vmcb->sysenter_cs = msr_content;
2073 break;
2074 case MSR_IA32_SYSENTER_ESP:
2075 vmcb->sysenter_esp = msr_content;
2076 break;
2077 case MSR_IA32_SYSENTER_EIP:
2078 vmcb->sysenter_eip = msr_content;
2079 break;
2080 case MSR_IA32_APICBASE:
2081 vlapic_msr_set(VLAPIC(v), msr_content);
2082 break;
2083 default:
2084 if ( !long_mode_do_msr_write(regs) )
2085 wrmsr_hypervisor_regs(regs->ecx, regs->eax, regs->edx);
2086 break;
2090 done:
2092 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: "
2093 "ecx=%lx, eax=%lx, edx=%lx",
2094 (unsigned long)regs->ecx, (unsigned long)regs->eax,
2095 (unsigned long)regs->edx);
2097 __update_guest_eip(vmcb, inst_len);
2101 /*
2102 * Need to use this exit to reschedule
2103 */
2104 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
2106 struct vcpu *v = current;
2107 struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
2108 s_time_t next_pit = -1, next_wakeup;
2110 __update_guest_eip(vmcb, 1);
2112 /* check for interrupt not handled or new interrupt */
2113 if ( vmcb->vintr.fields.irq || cpu_has_pending_irq(v) )
2114 return;
2116 if ( !v->vcpu_id )
2117 next_pit = get_scheduled(v, pt->irq, pt);
2118 next_wakeup = get_apictime_scheduled(v);
2119 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
2120 next_wakeup = next_pit;
2121 if ( next_wakeup != - 1 )
2122 set_timer(&current->arch.hvm_svm.hlt_timer, next_wakeup);
2123 hvm_safe_block();
2127 static void svm_vmexit_do_invd(struct vmcb_struct *vmcb)
2129 int inst_len;
2131 /* Invalidate the cache - we can't really do that safely - maybe we should
2132 * WBINVD, but I think it's just fine to completely ignore it - we should
2133 * have cache-snooping that solves it anyways. -- Mats P.
2134 */
2136 /* Tell the user that we did this - just in case someone runs some really weird
2137 * operating system and wants to know why it's not working as it should...
2138 */
2139 printk("INVD instruction intercepted - ignored\n");
2141 inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL);
2142 __update_guest_eip(vmcb, inst_len);
2148 #ifdef XEN_DEBUGGER
2149 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb,
2150 struct cpu_user_regs *regs)
2152 regs->eip = vmcb->rip;
2153 regs->esp = vmcb->rsp;
2154 regs->eflags = vmcb->rflags;
2156 regs->xcs = vmcb->cs.sel;
2157 regs->xds = vmcb->ds.sel;
2158 regs->xes = vmcb->es.sel;
2159 regs->xfs = vmcb->fs.sel;
2160 regs->xgs = vmcb->gs.sel;
2161 regs->xss = vmcb->ss.sel;
2165 static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
2167 vmcb->ss.sel = regs->xss;
2168 vmcb->rsp = regs->esp;
2169 vmcb->rflags = regs->eflags;
2170 vmcb->cs.sel = regs->xcs;
2171 vmcb->rip = regs->eip;
2173 vmcb->gs.sel = regs->xgs;
2174 vmcb->fs.sel = regs->xfs;
2175 vmcb->es.sel = regs->xes;
2176 vmcb->ds.sel = regs->xds;
2178 #endif
2181 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
2183 struct vcpu *v = current;
2184 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
2185 unsigned long g_vaddr;
2186 int inst_len;
2187 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2189 ASSERT(vmcb);
2190 /*
2191 * Unknown how many bytes the invlpg instruction will take. Use the
2192 * maximum instruction length here
2193 */
2194 if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
2196 printk("svm_handle_invlpg (): Error reading memory %d bytes\n", length);
2197 __hvm_bug(regs);
2200 if (invlpga)
2202 inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
2203 ASSERT(inst_len > 0);
2204 __update_guest_eip(vmcb, inst_len);
2206 /*
2207 * The address is implicit on this instruction. At the moment, we don't
2208 * use ecx (ASID) to identify individual guests pages
2209 */
2210 g_vaddr = regs->eax;
2212 else
2214 /* What about multiple prefix codes? */
2215 prefix = (is_prefix(opcode[0])?opcode[0]:0);
2216 inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
2217 ASSERT(inst_len > 0);
2219 inst_len--;
2220 length -= inst_len;
2222 /*
2223 * Decode memory operand of the instruction including ModRM, SIB, and
2224 * displacement to get effecticve address and length in bytes. Assume
2225 * the system in either 32- or 64-bit mode.
2226 */
2227 g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix,
2228 &opcode[inst_len], &length);
2230 inst_len += length;
2231 __update_guest_eip (vmcb, inst_len);
2234 /* Overkill, we may not this */
2235 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
2236 shadow_invlpg(v, g_vaddr);
2240 /*
2241 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2242 * 16-bit realmode. Basically, this mimics a processor reset.
2244 * returns 0 on success, non-zero otherwise
2245 */
2246 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
2247 struct cpu_user_regs *regs)
2249 struct vmcb_struct *vmcb;
2251 ASSERT(v);
2252 ASSERT(regs);
2254 vmcb = v->arch.hvm_svm.vmcb;
2256 ASSERT(vmcb);
2258 /* clear the vmcb and user regs */
2259 memset(regs, 0, sizeof(struct cpu_user_regs));
2261 /* VMCB Control */
2262 vmcb->tsc_offset = 0;
2264 /* VMCB State */
2265 vmcb->cr0 = X86_CR0_ET | X86_CR0_PG;
2266 v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
2268 vmcb->cr2 = 0;
2269 vmcb->efer = EFER_SVME;
2271 vmcb->cr4 = SVM_CR4_HOST_MASK;
2272 v->arch.hvm_svm.cpu_shadow_cr4 = 0;
2273 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
2275 /* This will jump to ROMBIOS */
2276 vmcb->rip = 0xFFF0;
2278 /* setup the segment registers and all their hidden states */
2279 vmcb->cs.sel = 0xF000;
2280 vmcb->cs.attributes.bytes = 0x089b;
2281 vmcb->cs.limit = 0xffff;
2282 vmcb->cs.base = 0x000F0000;
2284 vmcb->ss.sel = 0x00;
2285 vmcb->ss.attributes.bytes = 0x0893;
2286 vmcb->ss.limit = 0xffff;
2287 vmcb->ss.base = 0x00;
2289 vmcb->ds.sel = 0x00;
2290 vmcb->ds.attributes.bytes = 0x0893;
2291 vmcb->ds.limit = 0xffff;
2292 vmcb->ds.base = 0x00;
2294 vmcb->es.sel = 0x00;
2295 vmcb->es.attributes.bytes = 0x0893;
2296 vmcb->es.limit = 0xffff;
2297 vmcb->es.base = 0x00;
2299 vmcb->fs.sel = 0x00;
2300 vmcb->fs.attributes.bytes = 0x0893;
2301 vmcb->fs.limit = 0xffff;
2302 vmcb->fs.base = 0x00;
2304 vmcb->gs.sel = 0x00;
2305 vmcb->gs.attributes.bytes = 0x0893;
2306 vmcb->gs.limit = 0xffff;
2307 vmcb->gs.base = 0x00;
2309 vmcb->ldtr.sel = 0x00;
2310 vmcb->ldtr.attributes.bytes = 0x0000;
2311 vmcb->ldtr.limit = 0x0;
2312 vmcb->ldtr.base = 0x00;
2314 vmcb->gdtr.sel = 0x00;
2315 vmcb->gdtr.attributes.bytes = 0x0000;
2316 vmcb->gdtr.limit = 0x0;
2317 vmcb->gdtr.base = 0x00;
2319 vmcb->tr.sel = 0;
2320 vmcb->tr.attributes.bytes = 0;
2321 vmcb->tr.limit = 0x0;
2322 vmcb->tr.base = 0;
2324 vmcb->idtr.sel = 0x00;
2325 vmcb->idtr.attributes.bytes = 0x0000;
2326 vmcb->idtr.limit = 0x3ff;
2327 vmcb->idtr.base = 0x00;
2329 vmcb->rax = 0;
2330 vmcb->rsp = 0;
2332 return 0;
2336 /*
2337 * svm_do_vmmcall - SVM VMMCALL handler
2339 * returns 0 on success, non-zero otherwise
2340 */
2341 static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
2343 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2344 int inst_len;
2346 ASSERT(vmcb);
2347 ASSERT(regs);
2349 inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
2350 ASSERT(inst_len > 0);
2352 /* VMMCALL sanity check */
2353 if (vmcb->cpl > get_vmmcall_cpl(regs->edi))
2355 printf("VMMCALL CPL check failed\n");
2356 return -1;
2359 /* handle the request */
2360 switch (regs->edi)
2362 case VMMCALL_RESET_TO_REALMODE:
2363 if (svm_do_vmmcall_reset_to_realmode(v, regs))
2365 printf("svm_do_vmmcall_reset_to_realmode() failed\n");
2366 return -1;
2369 /* since we just reset the VMCB, return without adjusting the eip */
2370 return 0;
2371 case VMMCALL_DEBUG:
2372 printf("DEBUG features not implemented yet\n");
2373 break;
2374 default:
2375 break;
2378 hvm_print_line(v, regs->eax); /* provides the current domain */
2380 __update_guest_eip(vmcb, inst_len);
2381 return 0;
2385 void svm_dump_inst(unsigned long eip)
2387 u8 opcode[256];
2388 unsigned long ptr;
2389 int len;
2390 int i;
2392 ptr = eip & ~0xff;
2393 len = 0;
2395 if (hvm_copy(opcode, ptr, sizeof(opcode), HVM_COPY_IN))
2396 len = sizeof(opcode);
2398 printf("Code bytes around(len=%d) %lx:", len, eip);
2399 for (i = 0; i < len; i++)
2401 if ((i & 0x0f) == 0)
2402 printf("\n%08lx:", ptr+i);
2404 printf("%02x ", opcode[i]);
2407 printf("\n");
2411 void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
2413 struct vcpu *v = current;
2414 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2415 unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
2417 printf("%s: guest registers from %s:\n", __func__, from);
2418 #if defined (__x86_64__)
2419 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
2420 regs->rax, regs->rbx, regs->rcx);
2421 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
2422 regs->rdx, regs->rsi, regs->rdi);
2423 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
2424 regs->rbp, regs->rsp, regs->r8);
2425 printk("r9: %016lx r10: %016lx r11: %016lx\n",
2426 regs->r9, regs->r10, regs->r11);
2427 printk("r12: %016lx r13: %016lx r14: %016lx\n",
2428 regs->r12, regs->r13, regs->r14);
2429 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
2430 regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
2431 #else
2432 printf("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n",
2433 regs->eax, regs->ebx, regs->ecx, regs->edx);
2434 printf("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n",
2435 regs->edi, regs->esi, regs->ebp, regs->esp);
2436 printf("%s: guest cr0: %lx\n", __func__,
2437 v->arch.hvm_svm.cpu_shadow_cr0);
2438 printf("guest CR3 = %llx\n", vmcb->cr3);
2439 #endif
2440 printf("%s: pt = %lx\n", __func__, pt);
2444 void svm_dump_host_regs(const char *from)
2446 struct vcpu *v = current;
2447 unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
2448 unsigned long cr3, cr0;
2449 printf("Host registers at %s\n", from);
2451 __asm__ __volatile__ ("\tmov %%cr0,%0\n"
2452 "\tmov %%cr3,%1\n"
2453 : "=r" (cr0), "=r"(cr3));
2454 printf("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
2457 #ifdef SVM_EXTRA_DEBUG
2458 static char *exit_reasons[] = {
2459 [VMEXIT_CR0_READ] = "CR0_READ",
2460 [VMEXIT_CR1_READ] = "CR1_READ",
2461 [VMEXIT_CR2_READ] = "CR2_READ",
2462 [VMEXIT_CR3_READ] = "CR3_READ",
2463 [VMEXIT_CR4_READ] = "CR4_READ",
2464 [VMEXIT_CR5_READ] = "CR5_READ",
2465 [VMEXIT_CR6_READ] = "CR6_READ",
2466 [VMEXIT_CR7_READ] = "CR7_READ",
2467 [VMEXIT_CR8_READ] = "CR8_READ",
2468 [VMEXIT_CR9_READ] = "CR9_READ",
2469 [VMEXIT_CR10_READ] = "CR10_READ",
2470 [VMEXIT_CR11_READ] = "CR11_READ",
2471 [VMEXIT_CR12_READ] = "CR12_READ",
2472 [VMEXIT_CR13_READ] = "CR13_READ",
2473 [VMEXIT_CR14_READ] = "CR14_READ",
2474 [VMEXIT_CR15_READ] = "CR15_READ",
2475 [VMEXIT_CR0_WRITE] = "CR0_WRITE",
2476 [VMEXIT_CR1_WRITE] = "CR1_WRITE",
2477 [VMEXIT_CR2_WRITE] = "CR2_WRITE",
2478 [VMEXIT_CR3_WRITE] = "CR3_WRITE",
2479 [VMEXIT_CR4_WRITE] = "CR4_WRITE",
2480 [VMEXIT_CR5_WRITE] = "CR5_WRITE",
2481 [VMEXIT_CR6_WRITE] = "CR6_WRITE",
2482 [VMEXIT_CR7_WRITE] = "CR7_WRITE",
2483 [VMEXIT_CR8_WRITE] = "CR8_WRITE",
2484 [VMEXIT_CR9_WRITE] = "CR9_WRITE",
2485 [VMEXIT_CR10_WRITE] = "CR10_WRITE",
2486 [VMEXIT_CR11_WRITE] = "CR11_WRITE",
2487 [VMEXIT_CR12_WRITE] = "CR12_WRITE",
2488 [VMEXIT_CR13_WRITE] = "CR13_WRITE",
2489 [VMEXIT_CR14_WRITE] = "CR14_WRITE",
2490 [VMEXIT_CR15_WRITE] = "CR15_WRITE",
2491 [VMEXIT_DR0_READ] = "DR0_READ",
2492 [VMEXIT_DR1_READ] = "DR1_READ",
2493 [VMEXIT_DR2_READ] = "DR2_READ",
2494 [VMEXIT_DR3_READ] = "DR3_READ",
2495 [VMEXIT_DR4_READ] = "DR4_READ",
2496 [VMEXIT_DR5_READ] = "DR5_READ",
2497 [VMEXIT_DR6_READ] = "DR6_READ",
2498 [VMEXIT_DR7_READ] = "DR7_READ",
2499 [VMEXIT_DR8_READ] = "DR8_READ",
2500 [VMEXIT_DR9_READ] = "DR9_READ",
2501 [VMEXIT_DR10_READ] = "DR10_READ",
2502 [VMEXIT_DR11_READ] = "DR11_READ",
2503 [VMEXIT_DR12_READ] = "DR12_READ",
2504 [VMEXIT_DR13_READ] = "DR13_READ",
2505 [VMEXIT_DR14_READ] = "DR14_READ",
2506 [VMEXIT_DR15_READ] = "DR15_READ",
2507 [VMEXIT_DR0_WRITE] = "DR0_WRITE",
2508 [VMEXIT_DR1_WRITE] = "DR1_WRITE",
2509 [VMEXIT_DR2_WRITE] = "DR2_WRITE",
2510 [VMEXIT_DR3_WRITE] = "DR3_WRITE",
2511 [VMEXIT_DR4_WRITE] = "DR4_WRITE",
2512 [VMEXIT_DR5_WRITE] = "DR5_WRITE",
2513 [VMEXIT_DR6_WRITE] = "DR6_WRITE",
2514 [VMEXIT_DR7_WRITE] = "DR7_WRITE",
2515 [VMEXIT_DR8_WRITE] = "DR8_WRITE",
2516 [VMEXIT_DR9_WRITE] = "DR9_WRITE",
2517 [VMEXIT_DR10_WRITE] = "DR10_WRITE",
2518 [VMEXIT_DR11_WRITE] = "DR11_WRITE",
2519 [VMEXIT_DR12_WRITE] = "DR12_WRITE",
2520 [VMEXIT_DR13_WRITE] = "DR13_WRITE",
2521 [VMEXIT_DR14_WRITE] = "DR14_WRITE",
2522 [VMEXIT_DR15_WRITE] = "DR15_WRITE",
2523 [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
2524 [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
2525 [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
2526 [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
2527 [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
2528 [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
2529 [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
2530 [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
2531 [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
2532 [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
2533 [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
2534 [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
2535 [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
2536 [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
2537 [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
2538 [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
2539 [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
2540 [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
2541 [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
2542 [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
2543 [VMEXIT_INTR] = "INTR",
2544 [VMEXIT_NMI] = "NMI",
2545 [VMEXIT_SMI] = "SMI",
2546 [VMEXIT_INIT] = "INIT",
2547 [VMEXIT_VINTR] = "VINTR",
2548 [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
2549 [VMEXIT_IDTR_READ] = "IDTR_READ",
2550 [VMEXIT_GDTR_READ] = "GDTR_READ",
2551 [VMEXIT_LDTR_READ] = "LDTR_READ",
2552 [VMEXIT_TR_READ] = "TR_READ",
2553 [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
2554 [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
2555 [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
2556 [VMEXIT_TR_WRITE] = "TR_WRITE",
2557 [VMEXIT_RDTSC] = "RDTSC",
2558 [VMEXIT_RDPMC] = "RDPMC",
2559 [VMEXIT_PUSHF] = "PUSHF",
2560 [VMEXIT_POPF] = "POPF",
2561 [VMEXIT_CPUID] = "CPUID",
2562 [VMEXIT_RSM] = "RSM",
2563 [VMEXIT_IRET] = "IRET",
2564 [VMEXIT_SWINT] = "SWINT",
2565 [VMEXIT_INVD] = "INVD",
2566 [VMEXIT_PAUSE] = "PAUSE",
2567 [VMEXIT_HLT] = "HLT",
2568 [VMEXIT_INVLPG] = "INVLPG",
2569 [VMEXIT_INVLPGA] = "INVLPGA",
2570 [VMEXIT_IOIO] = "IOIO",
2571 [VMEXIT_MSR] = "MSR",
2572 [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
2573 [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
2574 [VMEXIT_SHUTDOWN] = "SHUTDOWN",
2575 [VMEXIT_VMRUN] = "VMRUN",
2576 [VMEXIT_VMMCALL] = "VMMCALL",
2577 [VMEXIT_VMLOAD] = "VMLOAD",
2578 [VMEXIT_VMSAVE] = "VMSAVE",
2579 [VMEXIT_STGI] = "STGI",
2580 [VMEXIT_CLGI] = "CLGI",
2581 [VMEXIT_SKINIT] = "SKINIT",
2582 [VMEXIT_RDTSCP] = "RDTSCP",
2583 [VMEXIT_ICEBP] = "ICEBP",
2584 [VMEXIT_NPF] = "NPF"
2585 };
2586 #endif /* SVM_EXTRA_DEBUG */
2588 #ifdef SVM_WALK_GUEST_PAGES
2589 void walk_shadow_and_guest_pt(unsigned long gva)
2591 l2_pgentry_t gpde;
2592 l2_pgentry_t spde;
2593 l1_pgentry_t gpte;
2594 l1_pgentry_t spte;
2595 struct vcpu *v = current;
2596 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2597 unsigned long gpa;
2599 gpa = gva_to_gpa( gva );
2600 printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
2601 if( !svm_paging_enabled(v) || mmio_space(gpa) )
2602 return;
2604 /* let's dump the guest and shadow page info */
2606 __guest_get_l2e(v, gva, &gpde);
2607 printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
2608 __shadow_get_l2e( v, gva, &spde );
2609 printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
2611 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2612 return;
2614 spte = l1e_empty();
2616 /* This is actually overkill - we only need to make sure the hl2 is in-sync. */
2617 shadow_sync_va(v, gva);
2619 gpte.l1 = 0;
2620 __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ], sizeof(gpte) );
2621 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
2622 __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
2623 sizeof(spte) );
2624 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
2626 #endif /* SVM_WALK_GUEST_PAGES */
2628 asmlinkage void svm_vmexit_handler(struct cpu_user_regs regs)
2630 unsigned int exit_reason;
2631 unsigned long eip;
2632 struct vcpu *v = current;
2633 int error;
2634 int do_debug = 0;
2635 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2637 ASSERT(vmcb);
2639 exit_reason = vmcb->exitcode;
2640 save_svm_cpu_user_regs(v, &regs);
2642 vmcb->tlb_control = 1;
2644 #ifdef SVM_EXTRA_DEBUG
2646 #if defined(__i386__)
2647 #define rip eip
2648 #endif
2650 static unsigned long intercepts_counter = 0;
2652 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF)
2654 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2656 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx, gpa=%llx\n",
2657 intercepts_counter,
2658 exit_reasons[exit_reason], exit_reason, regs.cs,
2659 (unsigned long long) regs.rip,
2660 (unsigned long long) vmcb->exitinfo1,
2661 (unsigned long long) vmcb->exitinfo2,
2662 (unsigned long long) vmcb->exitintinfo.bytes,
2663 (unsigned long long) gva_to_gpa( vmcb->exitinfo2 ) );
2665 else
2667 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2668 intercepts_counter,
2669 exit_reasons[exit_reason], exit_reason, regs.cs,
2670 (unsigned long long) regs.rip,
2671 (unsigned long long) vmcb->exitinfo1,
2672 (unsigned long long) vmcb->exitinfo2,
2673 (unsigned long long) vmcb->exitintinfo.bytes );
2676 else if ( svm_dbg_on
2677 && exit_reason != VMEXIT_IOIO
2678 && exit_reason != VMEXIT_INTR)
2681 if (exit_reasons[exit_reason])
2683 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2684 intercepts_counter,
2685 exit_reasons[exit_reason], exit_reason, regs.cs,
2686 (unsigned long long) regs.rip,
2687 (unsigned long long) vmcb->exitinfo1,
2688 (unsigned long long) vmcb->exitinfo2,
2689 (unsigned long long) vmcb->exitintinfo.bytes);
2691 else
2693 printk("I%08ld,ExC=%d(0x%x),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2694 intercepts_counter, exit_reason, exit_reason, regs.cs,
2695 (unsigned long long) regs.rip,
2696 (unsigned long long) vmcb->exitinfo1,
2697 (unsigned long long) vmcb->exitinfo2,
2698 (unsigned long long) vmcb->exitintinfo.bytes);
2702 #ifdef SVM_WALK_GUEST_PAGES
2703 if( exit_reason == VMEXIT_EXCEPTION_PF
2704 && ( ( vmcb->exitinfo2 == vmcb->rip )
2705 || vmcb->exitintinfo.bytes) )
2707 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2708 walk_shadow_and_guest_pt( vmcb->exitinfo2 );
2710 #endif
2712 intercepts_counter++;
2714 #if 0
2715 if (svm_dbg_on)
2716 do_debug = svm_do_debugout(exit_reason);
2717 #endif
2719 if (do_debug)
2721 printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
2722 "shadow_table = 0x%08x\n",
2723 __func__,
2724 (int) v->arch.guest_table.pfn,
2725 (int) v->arch.monitor_table.pfn,
2726 (int) v->arch.shadow_table.pfn);
2728 svm_dump_vmcb(__func__, vmcb);
2729 svm_dump_regs(__func__, &regs);
2730 svm_dump_inst(svm_rip2pointer(vmcb));
2733 #if defined(__i386__)
2734 #undef rip
2735 #endif
2738 #endif /* SVM_EXTRA_DEBUG */
2740 if (exit_reason == -1)
2742 svm_dump_vmcb(__func__, vmcb);
2743 printk("%s: exit_reason == -1 - Did someone clobber the VMCB\n",
2744 __func__);
2745 domain_crash_synchronous();
2748 perfc_incra(svmexits, exit_reason);
2749 eip = vmcb->rip;
2751 #ifdef SVM_EXTRA_DEBUG
2752 if (do_debug)
2754 printk("eip = %lx, exit_reason = %d (0x%x)\n",
2755 eip, exit_reason, exit_reason);
2757 #endif /* SVM_EXTRA_DEBUG */
2759 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
2761 switch (exit_reason)
2763 case VMEXIT_EXCEPTION_DB:
2765 #ifdef XEN_DEBUGGER
2766 svm_debug_save_cpu_user_regs(&regs);
2767 pdb_handle_exception(1, &regs, 1);
2768 svm_debug_restore_cpu_user_regs(&regs);
2769 #else
2770 svm_store_cpu_user_regs(&regs, v);
2771 domain_pause_for_debugger();
2772 #endif
2774 break;
2776 case VMEXIT_NMI:
2777 do_nmi(&regs, 0);
2778 break;
2780 case VMEXIT_SMI:
2781 /*
2782 * For asynchronous SMI's, we just need to allow global interrupts
2783 * so that the SMI is taken properly in the context of the host. The
2784 * standard code does a STGI after the VMEXIT which should accomplish
2785 * this task. Continue as normal and restart the guest.
2786 */
2787 break;
2789 case VMEXIT_INIT:
2790 /*
2791 * Nothing to do, in fact we should never get to this point.
2792 */
2793 break;
2795 case VMEXIT_EXCEPTION_BP:
2796 #ifdef XEN_DEBUGGER
2797 svm_debug_save_cpu_user_regs(&regs);
2798 pdb_handle_exception(3, &regs, 1);
2799 svm_debug_restore_cpu_user_regs(&regs);
2800 #else
2801 if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) )
2802 domain_pause_for_debugger();
2803 else
2804 svm_inject_exception(v, TRAP_int3, 0, 0);
2805 #endif
2806 break;
2808 case VMEXIT_EXCEPTION_NM:
2809 svm_do_no_device_fault(vmcb);
2810 break;
2812 case VMEXIT_EXCEPTION_GP:
2813 /* This should probably not be trapped in the future */
2814 regs.error_code = vmcb->exitinfo1;
2815 svm_do_general_protection_fault(v, &regs);
2816 break;
2818 case VMEXIT_EXCEPTION_PF:
2820 unsigned long va;
2821 va = vmcb->exitinfo2;
2822 regs.error_code = vmcb->exitinfo1;
2823 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2824 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2825 (unsigned long)regs.eax, (unsigned long)regs.ebx,
2826 (unsigned long)regs.ecx, (unsigned long)regs.edx,
2827 (unsigned long)regs.esi, (unsigned long)regs.edi);
2829 if (!(error = svm_do_page_fault(va, &regs)))
2831 /* Inject #PG using Interruption-Information Fields */
2832 svm_inject_exception(v, TRAP_page_fault, 1, regs.error_code);
2834 v->arch.hvm_svm.cpu_cr2 = va;
2835 vmcb->cr2 = va;
2836 TRACE_3D(TRC_VMX_INT, v->domain->domain_id,
2837 VMEXIT_EXCEPTION_PF, va);
2839 break;
2842 case VMEXIT_EXCEPTION_DF:
2843 /* Debug info to hopefully help debug WHY the guest double-faulted. */
2844 svm_dump_vmcb(__func__, vmcb);
2845 svm_dump_regs(__func__, &regs);
2846 svm_dump_inst(svm_rip2pointer(vmcb));
2847 svm_inject_exception(v, TRAP_double_fault, 1, 0);
2848 break;
2850 case VMEXIT_INTR:
2851 raise_softirq(SCHEDULE_SOFTIRQ);
2852 break;
2855 case VMEXIT_INVD:
2856 svm_vmexit_do_invd(vmcb);
2857 break;
2859 case VMEXIT_GDTR_WRITE:
2860 printk("WRITE to GDTR\n");
2861 break;
2863 case VMEXIT_TASK_SWITCH:
2864 __hvm_bug(&regs);
2865 break;
2867 case VMEXIT_CPUID:
2868 svm_vmexit_do_cpuid(vmcb, regs.eax, &regs);
2869 break;
2871 case VMEXIT_HLT:
2872 svm_vmexit_do_hlt(vmcb);
2873 break;
2875 case VMEXIT_INVLPG:
2876 svm_handle_invlpg(0, &regs);
2877 break;
2879 case VMEXIT_INVLPGA:
2880 svm_handle_invlpg(1, &regs);
2881 break;
2883 case VMEXIT_VMMCALL:
2884 svm_do_vmmcall(v, &regs);
2885 break;
2887 case VMEXIT_CR0_READ:
2888 svm_cr_access(v, 0, TYPE_MOV_FROM_CR, &regs);
2889 break;
2891 case VMEXIT_CR2_READ:
2892 svm_cr_access(v, 2, TYPE_MOV_FROM_CR, &regs);
2893 break;
2895 case VMEXIT_CR3_READ:
2896 svm_cr_access(v, 3, TYPE_MOV_FROM_CR, &regs);
2897 break;
2899 case VMEXIT_CR4_READ:
2900 svm_cr_access(v, 4, TYPE_MOV_FROM_CR, &regs);
2901 break;
2903 case VMEXIT_CR8_READ:
2904 svm_cr_access(v, 8, TYPE_MOV_FROM_CR, &regs);
2905 break;
2907 case VMEXIT_CR0_WRITE:
2908 svm_cr_access(v, 0, TYPE_MOV_TO_CR, &regs);
2909 break;
2911 case VMEXIT_CR2_WRITE:
2912 svm_cr_access(v, 2, TYPE_MOV_TO_CR, &regs);
2913 break;
2915 case VMEXIT_CR3_WRITE:
2916 svm_cr_access(v, 3, TYPE_MOV_TO_CR, &regs);
2917 local_flush_tlb();
2918 break;
2920 case VMEXIT_CR4_WRITE:
2921 svm_cr_access(v, 4, TYPE_MOV_TO_CR, &regs);
2922 break;
2924 case VMEXIT_CR8_WRITE:
2925 svm_cr_access(v, 8, TYPE_MOV_TO_CR, &regs);
2926 break;
2928 case VMEXIT_DR0_READ:
2929 svm_dr_access(v, 0, TYPE_MOV_FROM_DR, &regs);
2930 break;
2932 case VMEXIT_DR1_READ:
2933 svm_dr_access(v, 1, TYPE_MOV_FROM_DR, &regs);
2934 break;
2936 case VMEXIT_DR2_READ:
2937 svm_dr_access(v, 2, TYPE_MOV_FROM_DR, &regs);
2938 break;
2940 case VMEXIT_DR3_READ:
2941 svm_dr_access(v, 3, TYPE_MOV_FROM_DR, &regs);
2942 break;
2944 case VMEXIT_DR6_READ:
2945 svm_dr_access(v, 6, TYPE_MOV_FROM_DR, &regs);
2946 break;
2948 case VMEXIT_DR7_READ:
2949 svm_dr_access(v, 7, TYPE_MOV_FROM_DR, &regs);
2950 break;
2952 case VMEXIT_DR0_WRITE:
2953 svm_dr_access(v, 0, TYPE_MOV_TO_DR, &regs);
2954 break;
2956 case VMEXIT_DR1_WRITE:
2957 svm_dr_access(v, 1, TYPE_MOV_TO_DR, &regs);
2958 break;
2960 case VMEXIT_DR2_WRITE:
2961 svm_dr_access(v, 2, TYPE_MOV_TO_DR, &regs);
2962 break;
2964 case VMEXIT_DR3_WRITE:
2965 svm_dr_access(v, 3, TYPE_MOV_TO_DR, &regs);
2966 break;
2968 case VMEXIT_DR6_WRITE:
2969 svm_dr_access(v, 6, TYPE_MOV_TO_DR, &regs);
2970 break;
2972 case VMEXIT_DR7_WRITE:
2973 svm_dr_access(v, 7, TYPE_MOV_TO_DR, &regs);
2974 break;
2976 case VMEXIT_IOIO:
2977 svm_io_instruction(v);
2978 break;
2980 case VMEXIT_MSR:
2981 svm_do_msr_access(v, &regs);
2982 break;
2984 case VMEXIT_SHUTDOWN:
2985 printk("Guest shutdown exit\n");
2986 domain_crash_synchronous();
2987 break;
2989 default:
2990 printk("unexpected VMEXIT: exit reason = 0x%x, exitinfo1 = %llx, "
2991 "exitinfo2 = %llx\n", exit_reason,
2992 (unsigned long long)vmcb->exitinfo1,
2993 (unsigned long long)vmcb->exitinfo2);
2994 __hvm_bug(&regs); /* should not happen */
2995 break;
2998 #ifdef SVM_EXTRA_DEBUG
2999 if (do_debug)
3001 printk("%s: Done switch on vmexit_code\n", __func__);
3002 svm_dump_regs(__func__, &regs);
3005 if (do_debug)
3007 printk("vmexit_handler():- guest_table = 0x%08x, "
3008 "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
3009 (int)v->arch.guest_table.pfn,
3010 (int)v->arch.monitor_table.pfn,
3011 (int)v->arch.shadow_table.pfn);
3012 printk("svm_vmexit_handler: Returning\n");
3014 #endif
3016 return;
3019 asmlinkage void svm_load_cr2(void)
3021 struct vcpu *v = current;
3023 local_irq_disable();
3024 asm volatile("mov %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
3027 asmlinkage void svm_asid(void)
3029 struct vcpu *v = current;
3030 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
3032 /*
3033 * if need to assign new asid, or if switching cores,
3034 * retire asid for the old core, and assign a new asid to the current core.
3035 */
3036 if ( test_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ) ||
3037 ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) {
3038 /* recycle asid */
3039 if ( !asidpool_assign_next( vmcb, 1,
3040 v->arch.hvm_svm.asid_core, v->arch.hvm_svm.launch_core )) {
3041 /* If we get here, we have a major problem */
3042 domain_crash_synchronous();
3045 v->arch.hvm_svm.asid_core = v->arch.hvm_svm.launch_core;
3046 clear_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags );
3049 /* make sure the HSA is set for the current core */
3050 set_hsa_to_guest( &v->arch.hvm_svm );
3053 /*
3054 * Local variables:
3055 * mode: C
3056 * c-set-style: "BSD"
3057 * c-basic-offset: 4
3058 * tab-width: 4
3059 * indent-tabs-mode: nil
3060 * End:
3061 */