ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 10661:8e55c5c11475

[XEN] Add CPUID hypervisor-info leaves at index 0x40000000.
Currently only a signature leaf is defined ("Xen\0").
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Jul 05 18:48:41 2006 +0100 (2006-07-05)
parents 784961057338
children 3fa8b914e2b5
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005, AMD Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <asm/current.h>
29 #include <asm/io.h>
30 #include <asm/shadow.h>
31 #include <asm/regs.h>
32 #include <asm/cpufeature.h>
33 #include <asm/processor.h>
34 #include <asm/types.h>
35 #include <asm/msr.h>
36 #include <asm/spinlock.h>
37 #include <asm/hvm/hvm.h>
38 #include <asm/hvm/support.h>
39 #include <asm/hvm/io.h>
40 #include <asm/hvm/svm/svm.h>
41 #include <asm/hvm/svm/vmcb.h>
42 #include <asm/hvm/svm/emulate.h>
43 #include <asm/hvm/svm/vmmcall.h>
44 #include <asm/hvm/svm/intr.h>
45 #include <asm/shadow.h>
46 #if CONFIG_PAGING_LEVELS >= 3
47 #include <asm/shadow_64.h>
48 #endif
49 #include <public/sched.h>
51 #define SVM_EXTRA_DEBUG
53 #define set_segment_register(name, value) \
54 __asm__ __volatile__ ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
56 /*
57 * External functions, etc. We should move these to some suitable header file(s) */
59 extern void do_nmi(struct cpu_user_regs *, unsigned long);
60 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
61 int inst_len);
62 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
63 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
64 unsigned long count, int size, long value, int dir, int pvalid);
65 extern int svm_instrlen(struct cpu_user_regs *regs, int mode);
66 extern void svm_dump_inst(unsigned long eip);
67 extern int svm_dbg_on;
68 void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
70 static void svm_relinquish_guest_resources(struct domain *d);
71 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
72 struct cpu_user_regs *regs);
76 extern void set_hsa_to_guest( struct arch_svm_struct *arch_svm );
78 /* Host save area and ASID glogal data */
79 struct svm_percore_globals svm_globals[NR_CPUS];
81 /*
82 * Initializes the POOL of ASID used by the guests per core.
83 */
84 void asidpool_init(int core)
85 {
86 int i;
88 spin_lock_init(&svm_globals[core].ASIDpool.asid_lock);
90 /* Host ASID is always in use */
91 svm_globals[core].ASIDpool.asid[INITIAL_ASID] = ASID_INUSE;
92 for ( i = 1; i < ASID_MAX; i++ )
93 svm_globals[core].ASIDpool.asid[i] = ASID_AVAILABLE;
94 }
97 /* internal function to get the next available ASID */
98 static int asidpool_fetch_next(struct vmcb_struct *vmcb, int core)
99 {
100 int i;
101 for ( i = 1; i < ASID_MAX; i++ )
102 {
103 if ( svm_globals[core].ASIDpool.asid[i] == ASID_AVAILABLE )
104 {
105 vmcb->guest_asid = i;
106 svm_globals[core].ASIDpool.asid[i] = ASID_INUSE;
107 return i;
108 }
109 }
110 return -1;
111 }
114 /*
115 * This functions assigns on the passed VMCB, the next
116 * available ASID number. If none are available, the
117 * TLB flush flag is set, and all retireds ASID
118 * are made available.
119 *
120 * Returns: 1 -- sucess;
121 * 0 -- failure -- no more ASID numbers
122 * available.
123 */
124 int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
125 int oldcore, int newcore )
126 {
127 int i;
128 int res = 1;
129 static unsigned long cnt=0;
131 spin_lock(&svm_globals[oldcore].ASIDpool.asid_lock);
132 if( retire_current && vmcb->guest_asid ) {
133 svm_globals[oldcore].ASIDpool.asid[ vmcb->guest_asid & (ASID_MAX-1) ] = ASID_RETIRED;
134 }
135 spin_unlock(&svm_globals[oldcore].ASIDpool.asid_lock);
136 spin_lock(&svm_globals[newcore].ASIDpool.asid_lock);
137 if( asidpool_fetch_next( vmcb, newcore ) < 0 ) {
138 if (svm_dbg_on)
139 printk( "SVM: tlb(%ld)\n", cnt++ );
140 /* FLUSH the TLB and all retired slots are made available */
141 vmcb->tlb_control = 1;
142 for( i = 1; i < ASID_MAX; i++ ) {
143 if( svm_globals[newcore].ASIDpool.asid[i] == ASID_RETIRED ) {
144 svm_globals[newcore].ASIDpool.asid[i] = ASID_AVAILABLE;
145 }
146 }
147 /* Get the First slot available */
148 res = asidpool_fetch_next( vmcb, newcore ) > 0;
149 }
150 spin_unlock(&svm_globals[newcore].ASIDpool.asid_lock);
151 return res;
152 }
154 void asidpool_retire( struct vmcb_struct *vmcb, int core )
155 {
156 spin_lock(&svm_globals[core].ASIDpool.asid_lock);
157 if( vmcb->guest_asid ) {
158 svm_globals[core].ASIDpool.asid[ vmcb->guest_asid & (ASID_MAX-1) ] = ASID_RETIRED;
159 }
160 spin_unlock(&svm_globals[core].ASIDpool.asid_lock);
161 }
163 static inline void svm_inject_exception(struct vcpu *v, int trap, int ev, int error_code)
164 {
165 eventinj_t event;
166 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
168 event.bytes = 0;
169 event.fields.v = 1;
170 event.fields.type = EVENTTYPE_EXCEPTION;
171 event.fields.vector = trap;
172 event.fields.ev = ev;
173 event.fields.errorcode = error_code;
175 ASSERT(vmcb->eventinj.fields.v == 0);
177 vmcb->eventinj = event;
178 }
180 void stop_svm(void)
181 {
182 u32 eax, edx;
183 int cpu = smp_processor_id();
185 /* We turn off the EFER_SVME bit. */
186 rdmsr(MSR_EFER, eax, edx);
187 eax &= ~EFER_SVME;
188 wrmsr(MSR_EFER, eax, edx);
190 /* release the HSA */
191 free_host_save_area( svm_globals[cpu].hsa );
192 free_host_save_area( svm_globals[cpu].scratch_hsa );
193 svm_globals[cpu].hsa = NULL;
194 svm_globals[cpu].hsa_pa = 0;
195 svm_globals[cpu].scratch_hsa = NULL;
196 svm_globals[cpu].scratch_hsa_pa = 0;
197 wrmsr(MSR_K8_VM_HSAVE_PA, 0, 0 );
199 printk("AMD SVM Extension is disabled.\n");
200 }
202 int svm_initialize_guest_resources(struct vcpu *v)
203 {
204 svm_final_setup_guest(v);
205 return 1;
206 }
208 static void svm_store_cpu_guest_regs(
209 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
210 {
211 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
213 if ( regs != NULL )
214 {
215 regs->eip = vmcb->rip;
216 regs->esp = vmcb->rsp;
217 regs->eflags = vmcb->rflags;
218 regs->cs = vmcb->cs.sel;
219 regs->ds = vmcb->ds.sel;
220 regs->es = vmcb->es.sel;
221 regs->ss = vmcb->ss.sel;
222 regs->gs = vmcb->gs.sel;
223 regs->fs = vmcb->fs.sel;
224 }
226 if ( crs != NULL )
227 {
228 /* Returning the guest's regs */
229 crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
230 crs[3] = v->arch.hvm_svm.cpu_cr3;
231 crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
232 }
233 }
235 static void svm_load_cpu_guest_regs(
236 struct vcpu *v, struct cpu_user_regs *regs)
237 {
238 svm_load_cpu_user_regs(v, regs);
239 }
241 #define IS_CANO_ADDRESS(add) 1
243 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
244 {
245 u64 msr_content = 0;
246 struct vcpu *vc = current;
247 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
249 switch (regs->ecx)
250 {
251 case MSR_EFER:
252 msr_content = vmcb->efer;
253 msr_content &= ~EFER_SVME;
254 break;
256 case MSR_FS_BASE:
257 msr_content = vmcb->fs.base;
258 break;
260 case MSR_GS_BASE:
261 msr_content = vmcb->gs.base;
262 break;
264 case MSR_SHADOW_GS_BASE:
265 msr_content = vmcb->kerngsbase;
266 break;
268 case MSR_STAR:
269 msr_content = vmcb->star;
270 break;
272 case MSR_LSTAR:
273 msr_content = vmcb->lstar;
274 break;
276 case MSR_CSTAR:
277 msr_content = vmcb->cstar;
278 break;
280 case MSR_SYSCALL_MASK:
281 msr_content = vmcb->sfmask;
282 break;
284 default:
285 return 0;
286 }
288 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n",
289 msr_content);
291 regs->eax = msr_content & 0xffffffff;
292 regs->edx = msr_content >> 32;
293 return 1;
294 }
296 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
297 {
298 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
299 struct vcpu *vc = current;
300 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
302 HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx "
303 "msr_content %"PRIx64"\n",
304 (unsigned long)regs->ecx, msr_content);
306 switch (regs->ecx)
307 {
308 case MSR_EFER:
309 #ifdef __x86_64__
310 /* offending reserved bit will cause #GP */
311 if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
312 {
313 printk("trying to set reserved bit in EFER\n");
314 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
315 return 0;
316 }
318 /* LME: 0 -> 1 */
319 if ( msr_content & EFER_LME &&
320 !test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state) )
321 {
322 if ( svm_paging_enabled(vc) ||
323 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
324 &vc->arch.hvm_svm.cpu_state) )
325 {
326 printk("trying to set LME bit when "
327 "in paging mode or PAE bit is not set\n");
328 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
329 return 0;
330 }
331 set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);
332 }
334 /* We have already recorded that we want LME, so it will be set
335 * next time CR0 gets updated. So we clear that bit and continue.
336 */
337 if ((msr_content ^ vmcb->efer) & EFER_LME)
338 msr_content &= ~EFER_LME;
339 /* No update for LME/LMA since it have no effect */
340 #endif
341 vmcb->efer = msr_content | EFER_SVME;
342 break;
344 case MSR_FS_BASE:
345 case MSR_GS_BASE:
346 if (!(SVM_LONG_GUEST(vc)))
347 domain_crash_synchronous();
349 if (!IS_CANO_ADDRESS(msr_content))
350 {
351 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
352 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
353 }
355 if (regs->ecx == MSR_FS_BASE)
356 vmcb->fs.base = msr_content;
357 else
358 vmcb->gs.base = msr_content;
359 break;
361 case MSR_SHADOW_GS_BASE:
362 vmcb->kerngsbase = msr_content;
363 break;
365 case MSR_STAR:
366 vmcb->star = msr_content;
367 break;
369 case MSR_LSTAR:
370 vmcb->lstar = msr_content;
371 break;
373 case MSR_CSTAR:
374 vmcb->cstar = msr_content;
375 break;
377 case MSR_SYSCALL_MASK:
378 vmcb->sfmask = msr_content;
379 break;
381 default:
382 return 0;
383 }
384 return 1;
385 }
387 int svm_realmode(struct vcpu *v)
388 {
389 unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
390 unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
392 return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
393 }
395 int svm_instruction_length(struct vcpu *v)
396 {
397 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
398 unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode;
399 /* check which operating mode the guest is running */
400 if( vmcb->efer & EFER_LMA )
401 mode = vmcb->cs.attributes.fields.l ? 8 : 4;
402 else
403 mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
404 return svm_instrlen(guest_cpu_user_regs(), mode);
405 }
407 unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
408 {
409 switch ( num )
410 {
411 case 0:
412 return v->arch.hvm_svm.cpu_shadow_cr0;
413 case 2:
414 return v->arch.hvm_svm.cpu_cr2;
415 case 3:
416 return v->arch.hvm_svm.cpu_cr3;
417 default:
418 BUG();
419 }
420 return 0; /* dummy */
421 }
424 /* SVM-specific intitialization code for VCPU application processors */
425 void svm_init_ap_context(struct vcpu_guest_context *ctxt,
426 int vcpuid, int trampoline_vector)
427 {
428 int i;
429 struct vcpu *v, *bsp = current;
430 struct domain *d = bsp->domain;
431 cpu_user_regs_t *regs;;
434 if ((v = d->vcpu[vcpuid]) == NULL)
435 {
436 printk("vcpuid %d is invalid! good-bye.\n", vcpuid);
437 domain_crash_synchronous();
438 }
439 regs = &v->arch.guest_context.user_regs;
441 memset(ctxt, 0, sizeof(*ctxt));
442 for (i = 0; i < 256; ++i)
443 {
444 ctxt->trap_ctxt[i].vector = i;
445 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
446 }
449 /*
450 * We execute the trampoline code in real mode. The trampoline vector
451 * passed to us is page alligned and is the physicall frame number for
452 * the code. We will execute this code in real mode.
453 */
454 ctxt->user_regs.eip = 0x0;
455 ctxt->user_regs.cs = (trampoline_vector << 8);
456 ctxt->flags = VGCF_HVM_GUEST;
457 }
459 int start_svm(void)
460 {
461 u32 eax, ecx, edx;
462 u32 phys_hsa_lo, phys_hsa_hi;
463 u64 phys_hsa;
464 int cpu = smp_processor_id();
466 /* Xen does not fill x86_capability words except 0. */
467 ecx = cpuid_ecx(0x80000001);
468 boot_cpu_data.x86_capability[5] = ecx;
470 if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
471 return 0;
472 svm_globals[cpu].hsa = alloc_host_save_area();
473 if (! svm_globals[cpu].hsa)
474 return 0;
476 rdmsr(MSR_EFER, eax, edx);
477 eax |= EFER_SVME;
478 wrmsr(MSR_EFER, eax, edx);
479 asidpool_init( cpu );
480 printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
482 /* Initialize the HSA for this core */
483 phys_hsa = (u64) virt_to_maddr( svm_globals[cpu].hsa );
484 phys_hsa_lo = (u32) phys_hsa;
485 phys_hsa_hi = (u32) (phys_hsa >> 32);
486 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
487 svm_globals[cpu].hsa_pa = phys_hsa;
489 svm_globals[cpu].scratch_hsa = alloc_host_save_area();
490 svm_globals[cpu].scratch_hsa_pa = (u64)virt_to_maddr( svm_globals[cpu].scratch_hsa );
492 /* Setup HVM interfaces */
493 hvm_funcs.disable = stop_svm;
495 hvm_funcs.initialize_guest_resources = svm_initialize_guest_resources;
496 hvm_funcs.relinquish_guest_resources = svm_relinquish_guest_resources;
498 hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
499 hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
501 hvm_funcs.realmode = svm_realmode;
502 hvm_funcs.paging_enabled = svm_paging_enabled;
503 hvm_funcs.instruction_length = svm_instruction_length;
504 hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
505 hvm_funcs.init_ap_context = svm_init_ap_context;
507 hvm_enabled = 1;
509 return 1;
510 }
512 int svm_dbg_on = 0;
514 static inline int svm_do_debugout(unsigned long exit_code)
515 {
516 int i;
518 static unsigned long counter = 0;
519 static unsigned long works[] =
520 {
521 VMEXIT_IOIO,
522 VMEXIT_HLT,
523 VMEXIT_CPUID,
524 VMEXIT_DR0_READ,
525 VMEXIT_DR1_READ,
526 VMEXIT_DR2_READ,
527 VMEXIT_DR3_READ,
528 VMEXIT_DR6_READ,
529 VMEXIT_DR7_READ,
530 VMEXIT_DR0_WRITE,
531 VMEXIT_DR1_WRITE,
532 VMEXIT_DR2_WRITE,
533 VMEXIT_DR3_WRITE,
534 VMEXIT_CR0_READ,
535 VMEXIT_CR0_WRITE,
536 VMEXIT_CR3_READ,
537 VMEXIT_CR4_READ,
538 VMEXIT_MSR,
539 VMEXIT_CR0_WRITE,
540 VMEXIT_CR3_WRITE,
541 VMEXIT_CR4_WRITE,
542 VMEXIT_EXCEPTION_PF,
543 VMEXIT_INTR,
544 VMEXIT_INVLPG,
545 VMEXIT_EXCEPTION_NM
546 };
549 #if 0
550 if (svm_dbg_on && exit_code != 0x7B)
551 return 1;
552 #endif
554 counter++;
556 #if 0
557 if ((exit_code == 0x4E
558 || exit_code == VMEXIT_CR0_READ
559 || exit_code == VMEXIT_CR0_WRITE)
560 && counter < 200000)
561 return 0;
563 if ((exit_code == 0x4E) && counter < 500000)
564 return 0;
565 #endif
567 for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
568 if (exit_code == works[i])
569 return 0;
571 return 1;
572 }
574 void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
575 {
576 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
578 ASSERT(vmcb);
580 ctxt->eax = vmcb->rax;
581 ctxt->ss = vmcb->ss.sel;
582 ctxt->esp = vmcb->rsp;
583 ctxt->eflags = vmcb->rflags;
584 ctxt->cs = vmcb->cs.sel;
585 ctxt->eip = vmcb->rip;
587 ctxt->gs = vmcb->gs.sel;
588 ctxt->fs = vmcb->fs.sel;
589 ctxt->es = vmcb->es.sel;
590 ctxt->ds = vmcb->ds.sel;
591 }
593 void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
594 {
595 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
597 regs->eip = vmcb->rip;
598 regs->esp = vmcb->rsp;
599 regs->eflags = vmcb->rflags;
600 regs->cs = vmcb->cs.sel;
601 regs->ds = vmcb->ds.sel;
602 regs->es = vmcb->es.sel;
603 regs->ss = vmcb->ss.sel;
604 }
606 /* XXX Use svm_load_cpu_guest_regs instead */
607 void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
608 {
609 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
610 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
612 /* Write the guest register value into VMCB */
613 vmcb->rax = regs->eax;
614 vmcb->ss.sel = regs->ss;
615 vmcb->rsp = regs->esp;
616 vmcb->rflags = regs->eflags;
617 vmcb->cs.sel = regs->cs;
618 vmcb->rip = regs->eip;
619 if (regs->eflags & EF_TF)
620 *intercepts |= EXCEPTION_BITMAP_DB;
621 else
622 *intercepts &= ~EXCEPTION_BITMAP_DB;
623 }
625 int svm_paging_enabled(struct vcpu *v)
626 {
627 unsigned long cr0;
629 cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
631 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
632 }
635 /* Make sure that xen intercepts any FP accesses from current */
636 void svm_stts(struct vcpu *v)
637 {
638 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
640 /* FPU state already dirty? Then no need to setup_fpu() lazily. */
641 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
642 return;
644 /*
645 * If the guest does not have TS enabled then we must cause and handle an
646 * exception on first use of the FPU. If the guest *does* have TS enabled
647 * then this is not necessary: no FPU activity can occur until the guest
648 * clears CR0.TS, and we will initialise the FPU when that happens.
649 */
650 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
651 {
652 v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
653 vmcb->cr0 |= X86_CR0_TS;
654 }
655 }
657 static void arch_svm_do_launch(struct vcpu *v)
658 {
659 cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
660 int error;
662 #if 0
663 if (svm_dbg_on)
664 printk("Do launch\n");
665 #endif
666 error = construct_vmcb(&v->arch.hvm_svm, regs);
667 if ( error < 0 )
668 {
669 if (v->vcpu_id == 0) {
670 printk("Failed to construct a new VMCB for BSP.\n");
671 } else {
672 printk("Failed to construct a new VMCB for AP %d\n", v->vcpu_id);
673 }
674 domain_crash_synchronous();
675 }
677 svm_do_launch(v);
678 #if 0
679 if (svm_dbg_on)
680 svm_dump_host_regs(__func__);
681 #endif
682 if (v->vcpu_id != 0)
683 {
684 u16 cs_sel = regs->cs;
685 /*
686 * This is the launch of an AP; set state so that we begin executing
687 * the trampoline code in real-mode.
688 */
689 svm_do_vmmcall_reset_to_realmode(v, regs);
690 /* Adjust the state to execute the trampoline code.*/
691 v->arch.hvm_svm.vmcb->rip = 0;
692 v->arch.hvm_svm.vmcb->cs.sel= cs_sel;
693 v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4);
694 }
696 reset_stack_and_jump(svm_asm_do_launch);
697 }
699 static void svm_freeze_time(struct vcpu *v)
700 {
701 struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
703 if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
704 v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
705 stop_timer(&(pt->timer));
706 }
707 }
709 static void svm_ctxt_switch_from(struct vcpu *v)
710 {
711 svm_freeze_time(v);
712 }
714 static void svm_ctxt_switch_to(struct vcpu *v)
715 {
716 #if __x86_64__
717 /*
718 * This is required, because VMRUN does consistency check
719 * and some of the DOM0 selectors are pointing to
720 * invalid GDT locations, and cause AMD processors
721 * to shutdown.
722 */
723 set_segment_register(ds, 0);
724 set_segment_register(es, 0);
725 set_segment_register(ss, 0);
726 #endif
727 }
729 void svm_final_setup_guest(struct vcpu *v)
730 {
731 struct domain *d = v->domain;
732 struct vcpu *vc;
734 v->arch.schedule_tail = arch_svm_do_launch;
735 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
736 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
738 if ( v != d->vcpu[0] )
739 return;
741 /* Initialize monitor page table */
742 for_each_vcpu( d, vc )
743 vc->arch.monitor_table = pagetable_null();
745 /*
746 * Required to do this once per domain
747 * TODO: add a seperate function to do these.
748 */
749 memset(&d->shared_info->evtchn_mask[0], 0xff,
750 sizeof(d->shared_info->evtchn_mask));
752 /*
753 * Put the domain in shadow mode even though we're going to be using
754 * the shared 1:1 page table initially. It shouldn't hurt
755 */
756 shadow_mode_enable(d,
757 SHM_enable|SHM_refcounts|
758 SHM_translate|SHM_external|SHM_wr_pt_pte);
759 }
762 static void svm_relinquish_guest_resources(struct domain *d)
763 {
764 extern void destroy_vmcb(struct arch_svm_struct *); /* XXX */
765 struct vcpu *v;
767 for_each_vcpu ( d, v )
768 {
769 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
770 continue;
772 destroy_vmcb(&v->arch.hvm_svm);
773 free_monitor_pagetable(v);
774 kill_timer(&v->arch.hvm_svm.hlt_timer);
775 if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) )
776 {
777 kill_timer( &(VLAPIC(v)->vlapic_timer) );
778 xfree(VLAPIC(v));
779 }
780 }
782 kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
784 if ( d->arch.hvm_domain.shared_page_va )
785 unmap_domain_page_global(
786 (void *)d->arch.hvm_domain.shared_page_va);
788 shadow_direct_map_clean(d);
789 }
792 void arch_svm_do_resume(struct vcpu *v)
793 {
794 /* pinning VCPU to a different core? */
795 if ( v->arch.hvm_svm.launch_core == smp_processor_id()) {
796 svm_do_resume( v );
797 reset_stack_and_jump( svm_asm_do_resume );
798 }
799 else {
800 if (svm_dbg_on)
801 printk("VCPU core pinned: %d to %d\n",
802 v->arch.hvm_svm.launch_core, smp_processor_id() );
803 v->arch.hvm_svm.launch_core = smp_processor_id();
804 svm_migrate_timers( v );
805 svm_do_resume( v );
806 reset_stack_and_jump( svm_asm_do_resume );
807 }
808 }
811 void svm_migrate_timers(struct vcpu *v)
812 {
813 struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
815 if ( pt->enabled ) {
816 migrate_timer( &pt->timer, v->processor );
817 migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor );
818 }
819 if ( hvm_apic_support(v->domain) && VLAPIC( v ))
820 migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
821 }
824 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
825 {
826 struct vcpu *v = current;
827 unsigned long eip;
828 unsigned long gpa; /* FIXME: PAE */
829 int result;
830 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
832 ASSERT(vmcb);
834 //#if HVM_DEBUG
835 eip = vmcb->rip;
836 HVM_DBG_LOG(DBG_LEVEL_VMMU,
837 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
838 va, eip, (unsigned long)regs->error_code);
839 //#endif
841 if ( !svm_paging_enabled(v) )
842 {
843 if ( shadow_direct_map_fault(va, regs) )
844 return 1;
846 handle_mmio(va, va);
847 return 1;
848 }
851 gpa = gva_to_gpa(va);
853 /* Use 1:1 page table to identify MMIO address space */
854 if (mmio_space(gpa))
855 {
856 /* No support for APIC */
857 if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
858 {
859 int inst_len;
860 inst_len = svm_instruction_length(v);
861 if (inst_len == -1)
862 {
863 printf("%s: INST_LEN - Unable to decode properly.\n", __func__);
864 domain_crash_synchronous();
865 }
867 __update_guest_eip(vmcb, inst_len);
869 return 1;
870 }
872 handle_mmio(va, gpa);
874 return 1;
875 }
877 result = shadow_fault(va, regs);
879 if( result ) {
880 /* Let's make sure that the Guest TLB is flushed */
881 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
882 }
884 return result;
885 }
888 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
889 {
890 struct vcpu *v = current;
892 setup_fpu(v);
893 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
895 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
896 vmcb->cr0 &= ~X86_CR0_TS;
897 }
900 static void svm_do_general_protection_fault(struct vcpu *v,
901 struct cpu_user_regs *regs)
902 {
903 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
904 unsigned long eip, error_code;
906 ASSERT(vmcb);
908 eip = vmcb->rip;
909 error_code = vmcb->exitinfo1;
911 if (vmcb->idtr.limit == 0) {
912 printf("Huh? We got a GP Fault with an invalid IDTR!\n");
913 svm_dump_vmcb(__func__, vmcb);
914 svm_dump_regs(__func__, regs);
915 svm_dump_inst(vmcb->rip);
916 __hvm_bug(regs);
917 }
919 HVM_DBG_LOG(DBG_LEVEL_1,
920 "svm_general_protection_fault: eip = %lx, erro_code = %lx",
921 eip, error_code);
923 HVM_DBG_LOG(DBG_LEVEL_1,
924 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
925 (unsigned long)regs->eax, (unsigned long)regs->ebx,
926 (unsigned long)regs->ecx, (unsigned long)regs->edx,
927 (unsigned long)regs->esi, (unsigned long)regs->edi);
929 /* Reflect it back into the guest */
930 svm_inject_exception(v, TRAP_gp_fault, 1, error_code);
931 }
933 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
934 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
935 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
936 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
938 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input,
939 struct cpu_user_regs *regs)
940 {
941 unsigned int eax, ebx, ecx, edx;
942 unsigned long eip;
943 struct vcpu *v = current;
944 int inst_len;
946 ASSERT(vmcb);
948 eip = vmcb->rip;
950 HVM_DBG_LOG(DBG_LEVEL_1,
951 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
952 " (esi) %lx, (edi) %lx",
953 (unsigned long)regs->eax, (unsigned long)regs->ebx,
954 (unsigned long)regs->ecx, (unsigned long)regs->edx,
955 (unsigned long)regs->esi, (unsigned long)regs->edi);
957 cpuid(input, &eax, &ebx, &ecx, &edx);
959 if (input == 0x00000001)
960 {
961 if ( !hvm_apic_support(v->domain) ||
962 !vlapic_global_enabled((VLAPIC(v))) )
963 {
964 /* Since the apic is disabled, avoid any confusion
965 about SMP cpus being available */
966 clear_bit(X86_FEATURE_APIC, &edx);
967 }
969 #if CONFIG_PAGING_LEVELS < 3
970 clear_bit(X86_FEATURE_PAE, &edx);
971 clear_bit(X86_FEATURE_PSE, &edx);
972 clear_bit(X86_FEATURE_PSE36, &edx);
973 #else
974 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
975 {
976 if ( !v->domain->arch.hvm_domain.pae_enabled )
977 clear_bit(X86_FEATURE_PAE, &edx);
978 clear_bit(X86_FEATURE_PSE, &edx);
979 clear_bit(X86_FEATURE_PSE36, &edx);
980 }
981 #endif
982 /* Clear out reserved bits. */
983 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
984 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
986 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
988 /* Guest should only see one logical processor.
989 * See details on page 23 of AMD CPUID Specification.
990 */
991 clear_bit(X86_FEATURE_HT, &edx); /* clear the hyperthread bit */
992 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
993 ebx |= 0x00010000; /* set to 1 just for precaution */
995 /* Disable machine check architecture */
996 clear_bit(X86_FEATURE_MCA, &edx);
997 clear_bit(X86_FEATURE_MCE, &edx);
998 }
999 else if ( (input > 0x00000005) && (input < 0x80000000) )
1001 if ( !cpuid_hypervisor_leaves(input, &eax, &ebx, &ecx, &edx) )
1002 eax = ebx = ecx = edx = 0;
1004 else if ( input == 0x80000001 )
1006 /* We duplicate some CPUID_00000001 code because many bits of
1007 CPUID_80000001_EDX overlaps with CPUID_00000001_EDX. */
1009 if ( !hvm_apic_support(v->domain) ||
1010 !vlapic_global_enabled((VLAPIC(v))) )
1012 /* Since the apic is disabled, avoid any confusion
1013 about SMP cpus being available */
1014 clear_bit(X86_FEATURE_APIC, &edx);
1017 /* Clear the Cmp_Legacy bit
1018 * This bit is supposed to be zero when HTT = 0.
1019 * See details on page 23 of AMD CPUID Specification.
1020 */
1021 clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx);
1023 #ifdef __i386__
1024 /* Mask feature for Intel ia32e or AMD long mode. */
1025 clear_bit(X86_FEATURE_LAHF_LM & 31, &ecx);
1027 clear_bit(X86_FEATURE_LM & 31, &edx);
1028 clear_bit(X86_FEATURE_SYSCALL & 31, &edx);
1029 #endif
1031 #if CONFIG_PAGING_LEVELS < 3
1032 clear_bit(X86_FEATURE_NX & 31, &edx);
1033 clear_bit(X86_FEATURE_PAE, &edx);
1034 clear_bit(X86_FEATURE_PSE, &edx);
1035 clear_bit(X86_FEATURE_PSE36, &edx);
1036 #else
1037 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
1039 if ( !v->domain->arch.hvm_domain.pae_enabled )
1041 clear_bit(X86_FEATURE_NX & 31, &edx);
1042 clear_bit(X86_FEATURE_PAE, &edx);
1044 clear_bit(X86_FEATURE_PSE, &edx);
1045 clear_bit(X86_FEATURE_PSE36, &edx);
1047 #endif
1049 /* Make SVM feature invisible to the guest. */
1050 clear_bit(X86_FEATURE_SVME & 31, &ecx);
1052 /* So far, we do not support 3DNow for the guest. */
1053 clear_bit(X86_FEATURE_3DNOW & 31, &edx);
1054 clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx);
1056 else if ( ( input == 0x80000007 ) || ( input == 0x8000000A ) )
1058 /* Mask out features of power management and SVM extension. */
1059 eax = ebx = ecx = edx = 0;
1061 else if ( input == 0x80000008 )
1063 ecx &= 0xFFFFFF00; /* Make sure Number of CPU core is 1 when HTT=0 */
1066 regs->eax = (unsigned long)eax;
1067 regs->ebx = (unsigned long)ebx;
1068 regs->ecx = (unsigned long)ecx;
1069 regs->edx = (unsigned long)edx;
1071 HVM_DBG_LOG(DBG_LEVEL_1,
1072 "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
1073 "ebx=%x, ecx=%x, edx=%x",
1074 eip, input, eax, ebx, ecx, edx);
1076 inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
1077 ASSERT(inst_len > 0);
1078 __update_guest_eip(vmcb, inst_len);
1082 static inline unsigned long *get_reg_p(unsigned int gpreg,
1083 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1085 unsigned long *reg_p = NULL;
1086 switch (gpreg)
1088 case SVM_REG_EAX:
1089 reg_p = (unsigned long *)&regs->eax;
1090 break;
1091 case SVM_REG_EBX:
1092 reg_p = (unsigned long *)&regs->ebx;
1093 break;
1094 case SVM_REG_ECX:
1095 reg_p = (unsigned long *)&regs->ecx;
1096 break;
1097 case SVM_REG_EDX:
1098 reg_p = (unsigned long *)&regs->edx;
1099 break;
1100 case SVM_REG_EDI:
1101 reg_p = (unsigned long *)&regs->edi;
1102 break;
1103 case SVM_REG_ESI:
1104 reg_p = (unsigned long *)&regs->esi;
1105 break;
1106 case SVM_REG_EBP:
1107 reg_p = (unsigned long *)&regs->ebp;
1108 break;
1109 case SVM_REG_ESP:
1110 reg_p = (unsigned long *)&vmcb->rsp;
1111 break;
1112 #if __x86_64__
1113 case SVM_REG_R8:
1114 reg_p = (unsigned long *)&regs->r8;
1115 break;
1116 case SVM_REG_R9:
1117 reg_p = (unsigned long *)&regs->r9;
1118 break;
1119 case SVM_REG_R10:
1120 reg_p = (unsigned long *)&regs->r10;
1121 break;
1122 case SVM_REG_R11:
1123 reg_p = (unsigned long *)&regs->r11;
1124 break;
1125 case SVM_REG_R12:
1126 reg_p = (unsigned long *)&regs->r12;
1127 break;
1128 case SVM_REG_R13:
1129 reg_p = (unsigned long *)&regs->r13;
1130 break;
1131 case SVM_REG_R14:
1132 reg_p = (unsigned long *)&regs->r14;
1133 break;
1134 case SVM_REG_R15:
1135 reg_p = (unsigned long *)&regs->r15;
1136 break;
1137 #endif
1138 default:
1139 BUG();
1142 return reg_p;
1146 static inline unsigned long get_reg(unsigned int gpreg,
1147 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1149 unsigned long *gp;
1150 gp = get_reg_p(gpreg, regs, vmcb);
1151 return *gp;
1155 static inline void set_reg(unsigned int gpreg, unsigned long value,
1156 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1158 unsigned long *gp;
1159 gp = get_reg_p(gpreg, regs, vmcb);
1160 *gp = value;
1164 static void svm_dr_access (struct vcpu *v, unsigned int reg, unsigned int type,
1165 struct cpu_user_regs *regs)
1167 unsigned long *reg_p = 0;
1168 unsigned int gpreg = 0;
1169 unsigned long eip;
1170 int inst_len;
1171 int index;
1172 struct vmcb_struct *vmcb;
1173 u8 buffer[MAX_INST_LEN];
1174 u8 prefix = 0;
1176 vmcb = v->arch.hvm_svm.vmcb;
1178 ASSERT(vmcb);
1180 eip = vmcb->rip;
1181 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1182 index = skip_prefix_bytes(buffer, sizeof(buffer));
1184 ASSERT(buffer[index+0] == 0x0f && (buffer[index+1] & 0xFD) == 0x21);
1186 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1187 prefix = buffer[index-1];
1189 gpreg = decode_src_reg(prefix, buffer[index + 2]);
1190 ASSERT(reg == decode_dest_reg(prefix, buffer[index + 2]));
1192 HVM_DBG_LOG(DBG_LEVEL_1, "svm_dr_access : eip=%lx, reg=%d, gpreg = %x",
1193 eip, reg, gpreg);
1195 reg_p = get_reg_p(gpreg, regs, vmcb);
1197 switch (type)
1199 case TYPE_MOV_TO_DR:
1200 inst_len = __get_instruction_length(vmcb, INSTR_MOV2DR, buffer);
1201 v->arch.guest_context.debugreg[reg] = *reg_p;
1202 break;
1203 case TYPE_MOV_FROM_DR:
1204 inst_len = __get_instruction_length(vmcb, INSTR_MOVDR2, buffer);
1205 *reg_p = v->arch.guest_context.debugreg[reg];
1206 break;
1207 default:
1208 __hvm_bug(regs);
1209 break;
1211 ASSERT(inst_len > 0);
1212 __update_guest_eip(vmcb, inst_len);
1216 static void svm_get_prefix_info(
1217 struct vmcb_struct *vmcb,
1218 unsigned int dir, segment_selector_t **seg, unsigned int *asize)
1220 unsigned char inst[MAX_INST_LEN];
1221 int i;
1223 memset(inst, 0, MAX_INST_LEN);
1224 if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
1225 != MAX_INST_LEN)
1227 printk("%s: get guest instruction failed\n", __func__);
1228 domain_crash_synchronous();
1231 for (i = 0; i < MAX_INST_LEN; i++)
1233 switch (inst[i])
1235 case 0xf3: /* REPZ */
1236 case 0xf2: /* REPNZ */
1237 case 0xf0: /* LOCK */
1238 case 0x66: /* data32 */
1239 #if __x86_64__
1240 /* REX prefixes */
1241 case 0x40:
1242 case 0x41:
1243 case 0x42:
1244 case 0x43:
1245 case 0x44:
1246 case 0x45:
1247 case 0x46:
1248 case 0x47:
1250 case 0x48:
1251 case 0x49:
1252 case 0x4a:
1253 case 0x4b:
1254 case 0x4c:
1255 case 0x4d:
1256 case 0x4e:
1257 case 0x4f:
1258 #endif
1259 continue;
1260 case 0x67: /* addr32 */
1261 *asize ^= 48; /* Switch 16/32 bits */
1262 continue;
1263 case 0x2e: /* CS */
1264 *seg = &vmcb->cs;
1265 continue;
1266 case 0x36: /* SS */
1267 *seg = &vmcb->ss;
1268 continue;
1269 case 0x26: /* ES */
1270 *seg = &vmcb->es;
1271 continue;
1272 case 0x64: /* FS */
1273 *seg = &vmcb->fs;
1274 continue;
1275 case 0x65: /* GS */
1276 *seg = &vmcb->gs;
1277 continue;
1278 case 0x3e: /* DS */
1279 *seg = &vmcb->ds;
1280 continue;
1281 default:
1282 break;
1284 return;
1289 /* Get the address of INS/OUTS instruction */
1290 static inline int svm_get_io_address(
1291 struct vcpu *v,
1292 struct cpu_user_regs *regs, unsigned int dir,
1293 unsigned long *count, unsigned long *addr)
1295 unsigned long reg;
1296 unsigned int asize = 0;
1297 unsigned int isize;
1298 int long_mode;
1299 ioio_info_t info;
1300 segment_selector_t *seg = NULL;
1301 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1303 info.bytes = vmcb->exitinfo1;
1305 /* If we're in long mode, we shouldn't check the segment presence and limit */
1306 long_mode = vmcb->cs.attributes.fields.l && vmcb->efer & EFER_LMA;
1308 /* d field of cs.attributes is 1 for 32-bit, 0 for 16 or 64 bit.
1309 * l field combined with EFER_LMA -> longmode says whether it's 16 or 64 bit.
1310 */
1311 asize = (long_mode)?64:((vmcb->cs.attributes.fields.db)?32:16);
1314 /* The ins/outs instructions are single byte, so if we have got more
1315 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1316 * to figure out what it is...
1317 */
1318 isize = vmcb->exitinfo2 - vmcb->rip;
1320 if (info.fields.rep)
1321 isize --;
1323 if (isize > 1)
1325 svm_get_prefix_info(vmcb, dir, &seg, &asize);
1328 ASSERT(dir == IOREQ_READ || dir == IOREQ_WRITE);
1330 if (dir == IOREQ_WRITE)
1332 reg = regs->esi;
1333 if (!seg) /* If no prefix, used DS. */
1334 seg = &vmcb->ds;
1336 else
1338 reg = regs->edi;
1339 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1342 /* If the segment isn't present, give GP fault! */
1343 if (!long_mode && !seg->attributes.fields.p)
1345 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1346 return 0;
1349 if (asize == 16)
1351 *addr = (reg & 0xFFFF);
1352 *count = regs->ecx & 0xffff;
1354 else
1356 *addr = reg;
1357 *count = regs->ecx;
1360 if (!long_mode) {
1361 if (*addr > seg->limit)
1363 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1364 return 0;
1366 else
1368 *addr += seg->base;
1373 return 1;
1377 static void svm_io_instruction(struct vcpu *v, struct cpu_user_regs *regs)
1379 struct mmio_op *mmio_opp;
1380 unsigned int port;
1381 unsigned int size, dir;
1382 ioio_info_t info;
1383 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1385 ASSERT(vmcb);
1386 mmio_opp = &current->arch.hvm_vcpu.mmio_op;
1387 mmio_opp->instr = INSTR_PIO;
1388 mmio_opp->flags = 0;
1390 info.bytes = vmcb->exitinfo1;
1392 port = info.fields.port; /* port used to be addr */
1393 dir = info.fields.type; /* direction */
1394 if (info.fields.sz32)
1395 size = 4;
1396 else if (info.fields.sz16)
1397 size = 2;
1398 else
1399 size = 1;
1401 HVM_DBG_LOG(DBG_LEVEL_IO,
1402 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1403 "exit_qualification = %"PRIx64,
1404 port, vmcb->cs.sel, vmcb->rip, info.bytes);
1406 /* string instruction */
1407 if (info.fields.str)
1409 unsigned long addr, count;
1410 int sign = regs->eflags & EF_DF ? -1 : 1;
1412 if (!svm_get_io_address(v, regs, dir, &count, &addr))
1414 /* We failed to get a valid address, so don't do the IO operation -
1415 * it would just get worse if we do! Hopefully the guest is handing
1416 * gp-faults...
1417 */
1418 return;
1421 /* "rep" prefix */
1422 if (info.fields.rep)
1424 mmio_opp->flags |= REPZ;
1426 else
1428 count = 1;
1431 /*
1432 * Handle string pio instructions that cross pages or that
1433 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1434 */
1435 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1437 unsigned long value = 0;
1439 mmio_opp->flags |= OVERLAP;
1441 if (dir == IOREQ_WRITE)
1442 hvm_copy(&value, addr, size, HVM_COPY_IN);
1444 send_pio_req(regs, port, 1, size, value, dir, 0);
1446 else
1448 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK))
1450 if (sign > 0)
1451 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1452 else
1453 count = (addr & ~PAGE_MASK) / size;
1455 else
1456 vmcb->rip = vmcb->exitinfo2;
1458 send_pio_req(regs, port, count, size, addr, dir, 1);
1461 else
1463 /*
1464 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1465 * ExitInfo2
1466 */
1467 vmcb->rip = vmcb->exitinfo2;
1469 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1470 hvm_print_line(v, regs->eax); /* guest debug output */
1472 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
1476 static int svm_set_cr0(unsigned long value)
1478 struct vcpu *v = current;
1479 unsigned long mfn;
1480 int paging_enabled;
1481 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1483 ASSERT(vmcb);
1485 /* We don't want to lose PG. ET is reserved and should be always be 1*/
1486 paging_enabled = svm_paging_enabled(v);
1487 value |= X86_CR0_ET;
1488 vmcb->cr0 = value | X86_CR0_PG;
1489 v->arch.hvm_svm.cpu_shadow_cr0 = value;
1491 /* TS cleared? Then initialise FPU now. */
1492 if ( !(value & X86_CR0_TS) )
1494 setup_fpu(v);
1495 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1498 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1500 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
1502 /* The guest CR3 must be pointing to the guest physical. */
1503 if (!VALID_MFN(mfn =
1504 get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))
1505 || !get_page(mfn_to_page(mfn), v->domain))
1507 printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3);
1508 domain_crash_synchronous(); /* need to take a clean path */
1511 #if defined(__x86_64__)
1512 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
1513 && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
1514 &v->arch.hvm_svm.cpu_state))
1516 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1517 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1520 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
1522 /* Here the PAE is should to be opened */
1523 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1524 set_bit(SVM_CPU_STATE_LMA_ENABLED,
1525 &v->arch.hvm_svm.cpu_state);
1526 vmcb->efer |= (EFER_LMA | EFER_LME);
1527 if (!shadow_set_guest_paging_levels(v->domain, PAGING_L4) )
1529 printk("Unsupported guest paging levels\n");
1530 domain_crash_synchronous(); /* need to take a clean path */
1533 else
1534 #endif /* __x86_64__ */
1536 #if CONFIG_PAGING_LEVELS >= 3
1537 /* seems it's a 32-bit or 32-bit PAE guest */
1538 if ( test_bit(SVM_CPU_STATE_PAE_ENABLED,
1539 &v->arch.hvm_svm.cpu_state) )
1541 /* The guest enables PAE first and then it enables PG, it is
1542 * really a PAE guest */
1543 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1545 printk("Unsupported guest paging levels\n");
1546 domain_crash_synchronous();
1549 else
1551 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) )
1553 printk("Unsupported guest paging levels\n");
1554 domain_crash_synchronous(); /* need to take a clean path */
1557 #endif
1560 /* Now arch.guest_table points to machine physical. */
1561 v->arch.guest_table = pagetable_from_pfn(mfn);
1562 update_pagetables(v);
1564 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1565 (unsigned long) (mfn << PAGE_SHIFT));
1567 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1568 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1570 /* arch->shadow_table should hold the next CR3 for shadow */
1571 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx\n",
1572 v->arch.hvm_svm.cpu_cr3, mfn);
1574 return 1;
1577 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1578 if ( v->arch.hvm_svm.cpu_cr3 ) {
1579 put_page(mfn_to_page(get_mfn_from_gpfn(
1580 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
1581 v->arch.guest_table = pagetable_null();
1584 /*
1585 * SVM implements paged real-mode and when we return to real-mode
1586 * we revert back to the physical mappings that the domain builder
1587 * created.
1588 */
1589 if ((value & X86_CR0_PE) == 0) {
1590 if (value & X86_CR0_PG) {
1591 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1592 return 0;
1595 clear_all_shadow_status( v->domain );
1596 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1597 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1599 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
1601 /* we should take care of this kind of situation */
1602 clear_all_shadow_status(v->domain);
1603 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1604 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1607 return 1;
1610 /*
1611 * Read from control registers. CR0 and CR4 are read from the shadow.
1612 */
1613 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1615 unsigned long value = 0;
1616 struct vcpu *v = current;
1617 struct vmcb_struct *vmcb;
1619 vmcb = v->arch.hvm_svm.vmcb;
1620 ASSERT(vmcb);
1622 switch (cr)
1624 case 0:
1625 value = v->arch.hvm_svm.cpu_shadow_cr0;
1626 if (svm_dbg_on)
1627 printk("CR0 read =%lx \n", value );
1628 break;
1629 case 2:
1630 value = vmcb->cr2;
1631 break;
1632 case 3:
1633 value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
1634 if (svm_dbg_on)
1635 printk("CR3 read =%lx \n", value );
1636 break;
1637 case 4:
1638 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
1639 if (svm_dbg_on)
1640 printk( "CR4 read=%lx\n", value );
1641 break;
1642 case 8:
1643 #if 0
1644 value = vmcb->m_cr8;
1645 #else
1646 ASSERT(0);
1647 #endif
1648 break;
1650 default:
1651 __hvm_bug(regs);
1654 set_reg(gp, value, regs, vmcb);
1656 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1660 static inline int svm_pgbit_test(struct vcpu *v)
1662 return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
1666 /*
1667 * Write to control registers
1668 */
1669 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1671 unsigned long value;
1672 unsigned long old_cr;
1673 struct vcpu *v = current;
1674 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1676 ASSERT(vmcb);
1678 value = get_reg(gpreg, regs, vmcb);
1680 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1681 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1683 switch (cr)
1685 case 0:
1686 if (svm_dbg_on)
1687 printk("CR0 write =%lx \n", value );
1688 return svm_set_cr0(value);
1690 case 3:
1692 unsigned long old_base_mfn, mfn;
1693 if (svm_dbg_on)
1694 printk("CR3 write =%lx \n", value );
1695 /* If paging is not enabled yet, simply copy the value to CR3. */
1696 if (!svm_paging_enabled(v)) {
1697 v->arch.hvm_svm.cpu_cr3 = value;
1698 break;
1700 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1702 /* We make a new one if the shadow does not exist. */
1703 if (value == v->arch.hvm_svm.cpu_cr3)
1705 /*
1706 * This is simple TLB flush, implying the guest has
1707 * removed some translation or changed page attributes.
1708 * We simply invalidate the shadow.
1709 */
1710 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1711 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1712 __hvm_bug(regs);
1713 shadow_sync_all(v->domain);
1715 else
1717 /*
1718 * If different, make a shadow. Check if the PDBR is valid
1719 * first.
1720 */
1721 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1722 if (((value >> PAGE_SHIFT) > v->domain->max_pages)
1723 || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT))
1724 || !get_page(mfn_to_page(mfn), v->domain))
1726 printk("Invalid CR3 value=%lx\n", value);
1727 domain_crash_synchronous(); /* need to take a clean path */
1730 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1731 v->arch.guest_table = pagetable_from_pfn(mfn);
1733 if (old_base_mfn)
1734 put_page(mfn_to_page(old_base_mfn));
1736 /*
1737 * arch.shadow_table should now hold the next CR3 for shadow
1738 */
1739 #if CONFIG_PAGING_LEVELS >= 3
1740 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 )
1741 shadow_sync_all(v->domain);
1742 #endif
1743 v->arch.hvm_svm.cpu_cr3 = value;
1744 update_pagetables(v);
1745 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1746 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1748 break;
1751 case 4: /* CR4 */
1753 if (svm_dbg_on)
1754 printk( "write cr4=%lx, cr0=%lx\n",
1755 value, v->arch.hvm_svm.cpu_shadow_cr0 );
1756 old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
1757 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1759 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1760 if ( svm_pgbit_test(v) )
1762 /* The guest is a 32-bit PAE guest. */
1763 #if CONFIG_PAGING_LEVELS >= 4
1764 unsigned long mfn, old_base_mfn;
1766 if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1768 printk("Unsupported guest paging levels\n");
1769 domain_crash_synchronous(); /* need to take a clean path */
1772 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1773 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) ||
1774 !get_page(mfn_to_page(mfn), v->domain) )
1776 printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3);
1777 domain_crash_synchronous(); /* need to take a clean path */
1780 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1781 if ( old_base_mfn )
1782 put_page(mfn_to_page(old_base_mfn));
1784 /*
1785 * Now arch.guest_table points to machine physical.
1786 */
1788 v->arch.guest_table = pagetable_from_pfn(mfn);
1789 update_pagetables(v);
1791 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1792 (unsigned long) (mfn << PAGE_SHIFT));
1794 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1796 /*
1797 * arch->shadow_table should hold the next CR3 for shadow
1798 */
1800 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1801 v->arch.hvm_svm.cpu_cr3, mfn);
1802 #endif
1804 else
1806 /* The guest is a 64 bit or 32-bit PAE guest. */
1807 #if CONFIG_PAGING_LEVELS >= 4
1808 if ( (v->domain->arch.ops != NULL) &&
1809 v->domain->arch.ops->guest_paging_levels == PAGING_L2)
1811 /* Seems the guest first enables PAE without enabling PG,
1812 * it must enable PG after that, and it is a 32-bit PAE
1813 * guest */
1815 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1817 printk("Unsupported guest paging levels\n");
1818 domain_crash_synchronous();
1821 else
1823 if ( !shadow_set_guest_paging_levels(v->domain,
1824 PAGING_L4) )
1826 printk("Unsupported guest paging levels\n");
1827 domain_crash_synchronous();
1830 #endif
1833 else if (value & X86_CR4_PAE) {
1834 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1835 } else {
1836 if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
1837 &v->arch.hvm_svm.cpu_state)) {
1838 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1840 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1843 v->arch.hvm_svm.cpu_shadow_cr4 = value;
1844 vmcb->cr4 = value | SVM_CR4_HOST_MASK;
1846 /*
1847 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1848 * all TLB entries except global entries.
1849 */
1850 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
1852 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1853 shadow_sync_all(v->domain);
1855 break;
1858 default:
1859 printk("invalid cr: %d\n", cr);
1860 __hvm_bug(regs);
1863 return 1;
1867 #define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
1870 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
1871 struct cpu_user_regs *regs)
1873 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1874 int inst_len = 0;
1875 int index;
1876 unsigned int gpreg;
1877 unsigned long value;
1878 u8 buffer[MAX_INST_LEN];
1879 u8 prefix = 0;
1880 int result = 1;
1881 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1882 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1883 enum instruction_index match;
1885 ASSERT(vmcb);
1887 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1888 /* get index to first actual instruction byte - as we will need to know where the
1889 * prefix lives later on
1890 */
1891 index = skip_prefix_bytes(buffer, sizeof(buffer));
1893 if (type == TYPE_MOV_TO_CR)
1895 inst_len = __get_instruction_length_from_list(vmcb, list_a,
1896 ARR_SIZE(list_a), &buffer[index], &match);
1898 else
1900 inst_len = __get_instruction_length_from_list(vmcb, list_b,
1901 ARR_SIZE(list_b), &buffer[index], &match);
1904 ASSERT(inst_len > 0);
1906 inst_len += index;
1908 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1909 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1910 prefix = buffer[index-1];
1912 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
1914 switch (match)
1916 case INSTR_MOV2CR:
1917 gpreg = decode_src_reg(prefix, buffer[index+2]);
1918 result = mov_to_cr(gpreg, cr, regs);
1919 break;
1921 case INSTR_MOVCR2:
1922 gpreg = decode_src_reg(prefix, buffer[index+2]);
1923 mov_from_cr(cr, gpreg, regs);
1924 break;
1926 case INSTR_CLTS:
1927 /* TS being cleared means that it's time to restore fpu state. */
1928 setup_fpu(current);
1929 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1930 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1931 v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
1932 break;
1934 case INSTR_LMSW:
1935 if (svm_dbg_on)
1936 svm_dump_inst(svm_rip2pointer(vmcb));
1938 gpreg = decode_src_reg(prefix, buffer[index+2]);
1939 value = get_reg(gpreg, regs, vmcb) & 0xF;
1941 if (svm_dbg_on)
1942 printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1943 inst_len);
1945 value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
1947 if (svm_dbg_on)
1948 printk("CR0-LMSW CR0 - New value=%lx\n", value);
1950 result = svm_set_cr0(value);
1951 break;
1953 case INSTR_SMSW:
1954 if (svm_dbg_on)
1955 svm_dump_inst(svm_rip2pointer(vmcb));
1956 value = v->arch.hvm_svm.cpu_shadow_cr0;
1957 gpreg = decode_src_reg(prefix, buffer[index+2]);
1958 set_reg(gpreg, value, regs, vmcb);
1960 if (svm_dbg_on)
1961 printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1962 inst_len);
1963 break;
1965 default:
1966 __hvm_bug(regs);
1967 break;
1970 ASSERT(inst_len);
1972 __update_guest_eip(vmcb, inst_len);
1974 return result;
1977 static inline void svm_do_msr_access(struct vcpu *v, struct cpu_user_regs *regs)
1979 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1980 int inst_len;
1981 u64 msr_content=0;
1983 ASSERT(vmcb);
1985 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, "
1986 "exitinfo = %lx", (unsigned long)regs->ecx,
1987 (unsigned long)regs->eax, (unsigned long)regs->edx,
1988 (unsigned long)vmcb->exitinfo1);
1990 /* is it a read? */
1991 if (vmcb->exitinfo1 == 0)
1993 inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
1995 regs->edx = 0;
1996 switch (regs->ecx) {
1997 case MSR_IA32_TIME_STAMP_COUNTER:
1998 msr_content = hvm_get_guest_time(v);
1999 break;
2000 case MSR_IA32_SYSENTER_CS:
2001 msr_content = vmcb->sysenter_cs;
2002 break;
2003 case MSR_IA32_SYSENTER_ESP:
2004 msr_content = vmcb->sysenter_esp;
2005 break;
2006 case MSR_IA32_SYSENTER_EIP:
2007 msr_content = vmcb->sysenter_eip;
2008 break;
2009 case MSR_IA32_APICBASE:
2010 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
2011 break;
2012 default:
2013 if (long_mode_do_msr_read(regs))
2014 goto done;
2015 rdmsr_safe(regs->ecx, regs->eax, regs->edx);
2016 break;
2018 regs->eax = msr_content & 0xFFFFFFFF;
2019 regs->edx = msr_content >> 32;
2021 else
2023 inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
2024 msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
2026 switch (regs->ecx)
2028 case MSR_IA32_TIME_STAMP_COUNTER:
2029 svm_set_guest_time(v, msr_content);
2030 break;
2031 case MSR_IA32_SYSENTER_CS:
2032 vmcb->sysenter_cs = msr_content;
2033 break;
2034 case MSR_IA32_SYSENTER_ESP:
2035 vmcb->sysenter_esp = msr_content;
2036 break;
2037 case MSR_IA32_SYSENTER_EIP:
2038 vmcb->sysenter_eip = msr_content;
2039 break;
2040 case MSR_IA32_APICBASE:
2041 vlapic_msr_set(VLAPIC(v), msr_content);
2042 break;
2043 default:
2044 long_mode_do_msr_write(regs);
2045 break;
2049 done:
2051 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: "
2052 "ecx=%lx, eax=%lx, edx=%lx",
2053 (unsigned long)regs->ecx, (unsigned long)regs->eax,
2054 (unsigned long)regs->edx);
2056 __update_guest_eip(vmcb, inst_len);
2060 /*
2061 * Need to use this exit to reschedule
2062 */
2063 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
2065 struct vcpu *v = current;
2066 struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
2067 s_time_t next_pit = -1, next_wakeup;
2069 __update_guest_eip(vmcb, 1);
2071 /* check for interrupt not handled or new interrupt */
2072 if ( vmcb->vintr.fields.irq || cpu_has_pending_irq(v) )
2073 return;
2075 if ( !v->vcpu_id )
2076 next_pit = get_scheduled(v, pt->irq, pt);
2077 next_wakeup = get_apictime_scheduled(v);
2078 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
2079 next_wakeup = next_pit;
2080 if ( next_wakeup != - 1 )
2081 set_timer(&current->arch.hvm_svm.hlt_timer, next_wakeup);
2082 hvm_safe_block();
2086 static void svm_vmexit_do_invd(struct vmcb_struct *vmcb)
2088 int inst_len;
2090 /* Invalidate the cache - we can't really do that safely - maybe we should
2091 * WBINVD, but I think it's just fine to completely ignore it - we should
2092 * have cache-snooping that solves it anyways. -- Mats P.
2093 */
2095 /* Tell the user that we did this - just in case someone runs some really weird
2096 * operating system and wants to know why it's not working as it should...
2097 */
2098 printk("INVD instruction intercepted - ignored\n");
2100 inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL);
2101 __update_guest_eip(vmcb, inst_len);
2107 #ifdef XEN_DEBUGGER
2108 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb,
2109 struct cpu_user_regs *regs)
2111 regs->eip = vmcb->rip;
2112 regs->esp = vmcb->rsp;
2113 regs->eflags = vmcb->rflags;
2115 regs->xcs = vmcb->cs.sel;
2116 regs->xds = vmcb->ds.sel;
2117 regs->xes = vmcb->es.sel;
2118 regs->xfs = vmcb->fs.sel;
2119 regs->xgs = vmcb->gs.sel;
2120 regs->xss = vmcb->ss.sel;
2124 static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
2126 vmcb->ss.sel = regs->xss;
2127 vmcb->rsp = regs->esp;
2128 vmcb->rflags = regs->eflags;
2129 vmcb->cs.sel = regs->xcs;
2130 vmcb->rip = regs->eip;
2132 vmcb->gs.sel = regs->xgs;
2133 vmcb->fs.sel = regs->xfs;
2134 vmcb->es.sel = regs->xes;
2135 vmcb->ds.sel = regs->xds;
2137 #endif
2140 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
2142 struct vcpu *v = current;
2143 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
2144 unsigned long g_vaddr;
2145 int inst_len;
2146 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2148 ASSERT(vmcb);
2149 /*
2150 * Unknown how many bytes the invlpg instruction will take. Use the
2151 * maximum instruction length here
2152 */
2153 if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
2155 printk("svm_handle_invlpg (): Error reading memory %d bytes\n", length);
2156 __hvm_bug(regs);
2159 if (invlpga)
2161 inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
2162 ASSERT(inst_len > 0);
2163 __update_guest_eip(vmcb, inst_len);
2165 /*
2166 * The address is implicit on this instruction. At the moment, we don't
2167 * use ecx (ASID) to identify individual guests pages
2168 */
2169 g_vaddr = regs->eax;
2171 else
2173 /* What about multiple prefix codes? */
2174 prefix = (is_prefix(opcode[0])?opcode[0]:0);
2175 inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
2176 ASSERT(inst_len > 0);
2178 inst_len--;
2179 length -= inst_len;
2181 /*
2182 * Decode memory operand of the instruction including ModRM, SIB, and
2183 * displacement to get effecticve address and length in bytes. Assume
2184 * the system in either 32- or 64-bit mode.
2185 */
2186 g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix,
2187 &opcode[inst_len], &length);
2189 inst_len += length;
2190 __update_guest_eip (vmcb, inst_len);
2193 /* Overkill, we may not this */
2194 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
2195 shadow_invlpg(v, g_vaddr);
2199 /*
2200 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2201 * 16-bit realmode. Basically, this mimics a processor reset.
2203 * returns 0 on success, non-zero otherwise
2204 */
2205 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
2206 struct cpu_user_regs *regs)
2208 struct vmcb_struct *vmcb;
2210 ASSERT(v);
2211 ASSERT(regs);
2213 vmcb = v->arch.hvm_svm.vmcb;
2215 ASSERT(vmcb);
2217 /* clear the vmcb and user regs */
2218 memset(regs, 0, sizeof(struct cpu_user_regs));
2220 /* VMCB Control */
2221 vmcb->tsc_offset = 0;
2223 /* VMCB State */
2224 vmcb->cr0 = X86_CR0_ET | X86_CR0_PG;
2225 v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
2227 vmcb->cr2 = 0;
2228 vmcb->efer = EFER_SVME;
2230 vmcb->cr4 = SVM_CR4_HOST_MASK;
2231 v->arch.hvm_svm.cpu_shadow_cr4 = 0;
2232 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
2234 /* This will jump to ROMBIOS */
2235 vmcb->rip = 0xFFF0;
2237 /* setup the segment registers and all their hidden states */
2238 vmcb->cs.sel = 0xF000;
2239 vmcb->cs.attributes.bytes = 0x089b;
2240 vmcb->cs.limit = 0xffff;
2241 vmcb->cs.base = 0x000F0000;
2243 vmcb->ss.sel = 0x00;
2244 vmcb->ss.attributes.bytes = 0x0893;
2245 vmcb->ss.limit = 0xffff;
2246 vmcb->ss.base = 0x00;
2248 vmcb->ds.sel = 0x00;
2249 vmcb->ds.attributes.bytes = 0x0893;
2250 vmcb->ds.limit = 0xffff;
2251 vmcb->ds.base = 0x00;
2253 vmcb->es.sel = 0x00;
2254 vmcb->es.attributes.bytes = 0x0893;
2255 vmcb->es.limit = 0xffff;
2256 vmcb->es.base = 0x00;
2258 vmcb->fs.sel = 0x00;
2259 vmcb->fs.attributes.bytes = 0x0893;
2260 vmcb->fs.limit = 0xffff;
2261 vmcb->fs.base = 0x00;
2263 vmcb->gs.sel = 0x00;
2264 vmcb->gs.attributes.bytes = 0x0893;
2265 vmcb->gs.limit = 0xffff;
2266 vmcb->gs.base = 0x00;
2268 vmcb->ldtr.sel = 0x00;
2269 vmcb->ldtr.attributes.bytes = 0x0000;
2270 vmcb->ldtr.limit = 0x0;
2271 vmcb->ldtr.base = 0x00;
2273 vmcb->gdtr.sel = 0x00;
2274 vmcb->gdtr.attributes.bytes = 0x0000;
2275 vmcb->gdtr.limit = 0x0;
2276 vmcb->gdtr.base = 0x00;
2278 vmcb->tr.sel = 0;
2279 vmcb->tr.attributes.bytes = 0;
2280 vmcb->tr.limit = 0x0;
2281 vmcb->tr.base = 0;
2283 vmcb->idtr.sel = 0x00;
2284 vmcb->idtr.attributes.bytes = 0x0000;
2285 vmcb->idtr.limit = 0x3ff;
2286 vmcb->idtr.base = 0x00;
2288 vmcb->rax = 0;
2289 vmcb->rsp = 0;
2291 return 0;
2295 /*
2296 * svm_do_vmmcall - SVM VMMCALL handler
2298 * returns 0 on success, non-zero otherwise
2299 */
2300 static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
2302 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2303 int inst_len;
2305 ASSERT(vmcb);
2306 ASSERT(regs);
2308 inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
2309 ASSERT(inst_len > 0);
2311 /* VMMCALL sanity check */
2312 if (vmcb->cpl > get_vmmcall_cpl(regs->edi))
2314 printf("VMMCALL CPL check failed\n");
2315 return -1;
2318 /* handle the request */
2319 switch (regs->edi)
2321 case VMMCALL_RESET_TO_REALMODE:
2322 if (svm_do_vmmcall_reset_to_realmode(v, regs))
2324 printf("svm_do_vmmcall_reset_to_realmode() failed\n");
2325 return -1;
2328 /* since we just reset the VMCB, return without adjusting the eip */
2329 return 0;
2330 case VMMCALL_DEBUG:
2331 printf("DEBUG features not implemented yet\n");
2332 break;
2333 default:
2334 break;
2337 hvm_print_line(v, regs->eax); /* provides the current domain */
2339 __update_guest_eip(vmcb, inst_len);
2340 return 0;
2344 void svm_dump_inst(unsigned long eip)
2346 u8 opcode[256];
2347 unsigned long ptr;
2348 int len;
2349 int i;
2351 ptr = eip & ~0xff;
2352 len = 0;
2354 if (hvm_copy(opcode, ptr, sizeof(opcode), HVM_COPY_IN))
2355 len = sizeof(opcode);
2357 printf("Code bytes around(len=%d) %lx:", len, eip);
2358 for (i = 0; i < len; i++)
2360 if ((i & 0x0f) == 0)
2361 printf("\n%08lx:", ptr+i);
2363 printf("%02x ", opcode[i]);
2366 printf("\n");
2370 void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
2372 struct vcpu *v = current;
2373 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2374 unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
2376 printf("%s: guest registers from %s:\n", __func__, from);
2377 #if defined (__x86_64__)
2378 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
2379 regs->rax, regs->rbx, regs->rcx);
2380 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
2381 regs->rdx, regs->rsi, regs->rdi);
2382 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
2383 regs->rbp, regs->rsp, regs->r8);
2384 printk("r9: %016lx r10: %016lx r11: %016lx\n",
2385 regs->r9, regs->r10, regs->r11);
2386 printk("r12: %016lx r13: %016lx r14: %016lx\n",
2387 regs->r12, regs->r13, regs->r14);
2388 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
2389 regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
2390 #else
2391 printf("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n",
2392 regs->eax, regs->ebx, regs->ecx, regs->edx);
2393 printf("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n",
2394 regs->edi, regs->esi, regs->ebp, regs->esp);
2395 printf("%s: guest cr0: %lx\n", __func__,
2396 v->arch.hvm_svm.cpu_shadow_cr0);
2397 printf("guest CR3 = %llx\n", vmcb->cr3);
2398 #endif
2399 printf("%s: pt = %lx\n", __func__, pt);
2403 void svm_dump_host_regs(const char *from)
2405 struct vcpu *v = current;
2406 unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
2407 unsigned long cr3, cr0;
2408 printf("Host registers at %s\n", from);
2410 __asm__ __volatile__ ("\tmov %%cr0,%0\n"
2411 "\tmov %%cr3,%1\n"
2412 : "=r" (cr0), "=r"(cr3));
2413 printf("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
2416 #ifdef SVM_EXTRA_DEBUG
2417 static char *exit_reasons[] = {
2418 [VMEXIT_CR0_READ] = "CR0_READ",
2419 [VMEXIT_CR1_READ] = "CR1_READ",
2420 [VMEXIT_CR2_READ] = "CR2_READ",
2421 [VMEXIT_CR3_READ] = "CR3_READ",
2422 [VMEXIT_CR4_READ] = "CR4_READ",
2423 [VMEXIT_CR5_READ] = "CR5_READ",
2424 [VMEXIT_CR6_READ] = "CR6_READ",
2425 [VMEXIT_CR7_READ] = "CR7_READ",
2426 [VMEXIT_CR8_READ] = "CR8_READ",
2427 [VMEXIT_CR9_READ] = "CR9_READ",
2428 [VMEXIT_CR10_READ] = "CR10_READ",
2429 [VMEXIT_CR11_READ] = "CR11_READ",
2430 [VMEXIT_CR12_READ] = "CR12_READ",
2431 [VMEXIT_CR13_READ] = "CR13_READ",
2432 [VMEXIT_CR14_READ] = "CR14_READ",
2433 [VMEXIT_CR15_READ] = "CR15_READ",
2434 [VMEXIT_CR0_WRITE] = "CR0_WRITE",
2435 [VMEXIT_CR1_WRITE] = "CR1_WRITE",
2436 [VMEXIT_CR2_WRITE] = "CR2_WRITE",
2437 [VMEXIT_CR3_WRITE] = "CR3_WRITE",
2438 [VMEXIT_CR4_WRITE] = "CR4_WRITE",
2439 [VMEXIT_CR5_WRITE] = "CR5_WRITE",
2440 [VMEXIT_CR6_WRITE] = "CR6_WRITE",
2441 [VMEXIT_CR7_WRITE] = "CR7_WRITE",
2442 [VMEXIT_CR8_WRITE] = "CR8_WRITE",
2443 [VMEXIT_CR9_WRITE] = "CR9_WRITE",
2444 [VMEXIT_CR10_WRITE] = "CR10_WRITE",
2445 [VMEXIT_CR11_WRITE] = "CR11_WRITE",
2446 [VMEXIT_CR12_WRITE] = "CR12_WRITE",
2447 [VMEXIT_CR13_WRITE] = "CR13_WRITE",
2448 [VMEXIT_CR14_WRITE] = "CR14_WRITE",
2449 [VMEXIT_CR15_WRITE] = "CR15_WRITE",
2450 [VMEXIT_DR0_READ] = "DR0_READ",
2451 [VMEXIT_DR1_READ] = "DR1_READ",
2452 [VMEXIT_DR2_READ] = "DR2_READ",
2453 [VMEXIT_DR3_READ] = "DR3_READ",
2454 [VMEXIT_DR4_READ] = "DR4_READ",
2455 [VMEXIT_DR5_READ] = "DR5_READ",
2456 [VMEXIT_DR6_READ] = "DR6_READ",
2457 [VMEXIT_DR7_READ] = "DR7_READ",
2458 [VMEXIT_DR8_READ] = "DR8_READ",
2459 [VMEXIT_DR9_READ] = "DR9_READ",
2460 [VMEXIT_DR10_READ] = "DR10_READ",
2461 [VMEXIT_DR11_READ] = "DR11_READ",
2462 [VMEXIT_DR12_READ] = "DR12_READ",
2463 [VMEXIT_DR13_READ] = "DR13_READ",
2464 [VMEXIT_DR14_READ] = "DR14_READ",
2465 [VMEXIT_DR15_READ] = "DR15_READ",
2466 [VMEXIT_DR0_WRITE] = "DR0_WRITE",
2467 [VMEXIT_DR1_WRITE] = "DR1_WRITE",
2468 [VMEXIT_DR2_WRITE] = "DR2_WRITE",
2469 [VMEXIT_DR3_WRITE] = "DR3_WRITE",
2470 [VMEXIT_DR4_WRITE] = "DR4_WRITE",
2471 [VMEXIT_DR5_WRITE] = "DR5_WRITE",
2472 [VMEXIT_DR6_WRITE] = "DR6_WRITE",
2473 [VMEXIT_DR7_WRITE] = "DR7_WRITE",
2474 [VMEXIT_DR8_WRITE] = "DR8_WRITE",
2475 [VMEXIT_DR9_WRITE] = "DR9_WRITE",
2476 [VMEXIT_DR10_WRITE] = "DR10_WRITE",
2477 [VMEXIT_DR11_WRITE] = "DR11_WRITE",
2478 [VMEXIT_DR12_WRITE] = "DR12_WRITE",
2479 [VMEXIT_DR13_WRITE] = "DR13_WRITE",
2480 [VMEXIT_DR14_WRITE] = "DR14_WRITE",
2481 [VMEXIT_DR15_WRITE] = "DR15_WRITE",
2482 [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
2483 [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
2484 [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
2485 [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
2486 [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
2487 [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
2488 [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
2489 [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
2490 [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
2491 [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
2492 [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
2493 [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
2494 [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
2495 [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
2496 [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
2497 [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
2498 [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
2499 [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
2500 [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
2501 [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
2502 [VMEXIT_INTR] = "INTR",
2503 [VMEXIT_NMI] = "NMI",
2504 [VMEXIT_SMI] = "SMI",
2505 [VMEXIT_INIT] = "INIT",
2506 [VMEXIT_VINTR] = "VINTR",
2507 [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
2508 [VMEXIT_IDTR_READ] = "IDTR_READ",
2509 [VMEXIT_GDTR_READ] = "GDTR_READ",
2510 [VMEXIT_LDTR_READ] = "LDTR_READ",
2511 [VMEXIT_TR_READ] = "TR_READ",
2512 [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
2513 [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
2514 [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
2515 [VMEXIT_TR_WRITE] = "TR_WRITE",
2516 [VMEXIT_RDTSC] = "RDTSC",
2517 [VMEXIT_RDPMC] = "RDPMC",
2518 [VMEXIT_PUSHF] = "PUSHF",
2519 [VMEXIT_POPF] = "POPF",
2520 [VMEXIT_CPUID] = "CPUID",
2521 [VMEXIT_RSM] = "RSM",
2522 [VMEXIT_IRET] = "IRET",
2523 [VMEXIT_SWINT] = "SWINT",
2524 [VMEXIT_INVD] = "INVD",
2525 [VMEXIT_PAUSE] = "PAUSE",
2526 [VMEXIT_HLT] = "HLT",
2527 [VMEXIT_INVLPG] = "INVLPG",
2528 [VMEXIT_INVLPGA] = "INVLPGA",
2529 [VMEXIT_IOIO] = "IOIO",
2530 [VMEXIT_MSR] = "MSR",
2531 [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
2532 [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
2533 [VMEXIT_SHUTDOWN] = "SHUTDOWN",
2534 [VMEXIT_VMRUN] = "VMRUN",
2535 [VMEXIT_VMMCALL] = "VMMCALL",
2536 [VMEXIT_VMLOAD] = "VMLOAD",
2537 [VMEXIT_VMSAVE] = "VMSAVE",
2538 [VMEXIT_STGI] = "STGI",
2539 [VMEXIT_CLGI] = "CLGI",
2540 [VMEXIT_SKINIT] = "SKINIT",
2541 [VMEXIT_RDTSCP] = "RDTSCP",
2542 [VMEXIT_ICEBP] = "ICEBP",
2543 [VMEXIT_NPF] = "NPF"
2544 };
2545 #endif /* SVM_EXTRA_DEBUG */
2547 #ifdef SVM_WALK_GUEST_PAGES
2548 void walk_shadow_and_guest_pt(unsigned long gva)
2550 l2_pgentry_t gpde;
2551 l2_pgentry_t spde;
2552 l1_pgentry_t gpte;
2553 l1_pgentry_t spte;
2554 struct vcpu *v = current;
2555 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2556 unsigned long gpa;
2558 gpa = gva_to_gpa( gva );
2559 printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
2560 if( !svm_paging_enabled(v) || mmio_space(gpa) )
2561 return;
2563 /* let's dump the guest and shadow page info */
2565 __guest_get_l2e(v, gva, &gpde);
2566 printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
2567 __shadow_get_l2e( v, gva, &spde );
2568 printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
2570 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2571 return;
2573 spte = l1e_empty();
2575 /* This is actually overkill - we only need to make sure the hl2 is in-sync. */
2576 shadow_sync_va(v, gva);
2578 gpte.l1 = 0;
2579 __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ], sizeof(gpte) );
2580 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
2581 __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
2582 sizeof(spte) );
2583 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
2585 #endif /* SVM_WALK_GUEST_PAGES */
2587 asmlinkage void svm_vmexit_handler(struct cpu_user_regs regs)
2589 unsigned int exit_reason;
2590 unsigned long eip;
2591 struct vcpu *v = current;
2592 int error;
2593 int do_debug = 0;
2594 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2596 ASSERT(vmcb);
2598 exit_reason = vmcb->exitcode;
2599 save_svm_cpu_user_regs(v, &regs);
2601 vmcb->tlb_control = 1;
2603 #ifdef SVM_EXTRA_DEBUG
2605 #if defined(__i386__)
2606 #define rip eip
2607 #endif
2609 static unsigned long intercepts_counter = 0;
2611 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF)
2613 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2615 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx, gpa=%llx\n",
2616 intercepts_counter,
2617 exit_reasons[exit_reason], exit_reason, regs.cs,
2618 (unsigned long long) regs.rip,
2619 (unsigned long long) vmcb->exitinfo1,
2620 (unsigned long long) vmcb->exitinfo2,
2621 (unsigned long long) vmcb->exitintinfo.bytes,
2622 (unsigned long long) gva_to_gpa( vmcb->exitinfo2 ) );
2624 else
2626 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2627 intercepts_counter,
2628 exit_reasons[exit_reason], exit_reason, regs.cs,
2629 (unsigned long long) regs.rip,
2630 (unsigned long long) vmcb->exitinfo1,
2631 (unsigned long long) vmcb->exitinfo2,
2632 (unsigned long long) vmcb->exitintinfo.bytes );
2635 else if ( svm_dbg_on
2636 && exit_reason != VMEXIT_IOIO
2637 && exit_reason != VMEXIT_INTR)
2640 if (exit_reasons[exit_reason])
2642 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2643 intercepts_counter,
2644 exit_reasons[exit_reason], exit_reason, regs.cs,
2645 (unsigned long long) regs.rip,
2646 (unsigned long long) vmcb->exitinfo1,
2647 (unsigned long long) vmcb->exitinfo2,
2648 (unsigned long long) vmcb->exitintinfo.bytes);
2650 else
2652 printk("I%08ld,ExC=%d(0x%x),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2653 intercepts_counter, exit_reason, exit_reason, regs.cs,
2654 (unsigned long long) regs.rip,
2655 (unsigned long long) vmcb->exitinfo1,
2656 (unsigned long long) vmcb->exitinfo2,
2657 (unsigned long long) vmcb->exitintinfo.bytes);
2661 #ifdef SVM_WALK_GUEST_PAGES
2662 if( exit_reason == VMEXIT_EXCEPTION_PF
2663 && ( ( vmcb->exitinfo2 == vmcb->rip )
2664 || vmcb->exitintinfo.bytes) )
2666 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2667 walk_shadow_and_guest_pt( vmcb->exitinfo2 );
2669 #endif
2671 intercepts_counter++;
2673 #if 0
2674 if (svm_dbg_on)
2675 do_debug = svm_do_debugout(exit_reason);
2676 #endif
2678 if (do_debug)
2680 printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
2681 "shadow_table = 0x%08x\n",
2682 __func__,
2683 (int) v->arch.guest_table.pfn,
2684 (int) v->arch.monitor_table.pfn,
2685 (int) v->arch.shadow_table.pfn);
2687 svm_dump_vmcb(__func__, vmcb);
2688 svm_dump_regs(__func__, &regs);
2689 svm_dump_inst(svm_rip2pointer(vmcb));
2692 #if defined(__i386__)
2693 #undef rip
2694 #endif
2697 #endif /* SVM_EXTRA_DEBUG */
2699 if (exit_reason == -1)
2701 svm_dump_vmcb(__func__, vmcb);
2702 printk("%s: exit_reason == -1 - Did someone clobber the VMCB\n",
2703 __func__);
2704 domain_crash_synchronous();
2707 perfc_incra(vmexits, exit_reason);
2708 eip = vmcb->rip;
2710 #ifdef SVM_EXTRA_DEBUG
2711 if (do_debug)
2713 printk("eip = %lx, exit_reason = %d (0x%x)\n",
2714 eip, exit_reason, exit_reason);
2716 #endif /* SVM_EXTRA_DEBUG */
2718 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
2720 switch (exit_reason)
2722 case VMEXIT_EXCEPTION_DB:
2724 #ifdef XEN_DEBUGGER
2725 svm_debug_save_cpu_user_regs(&regs);
2726 pdb_handle_exception(1, &regs, 1);
2727 svm_debug_restore_cpu_user_regs(&regs);
2728 #else
2729 svm_store_cpu_user_regs(&regs, v);
2730 domain_pause_for_debugger();
2731 #endif
2733 break;
2735 case VMEXIT_NMI:
2736 do_nmi(&regs, 0);
2737 break;
2739 case VMEXIT_SMI:
2740 /*
2741 * For asynchronous SMI's, we just need to allow global interrupts
2742 * so that the SMI is taken properly in the context of the host. The
2743 * standard code does a STGI after the VMEXIT which should accomplish
2744 * this task. Continue as normal and restart the guest.
2745 */
2746 break;
2748 case VMEXIT_INIT:
2749 /*
2750 * Nothing to do, in fact we should never get to this point.
2751 */
2752 break;
2754 case VMEXIT_EXCEPTION_BP:
2755 #ifdef XEN_DEBUGGER
2756 svm_debug_save_cpu_user_regs(&regs);
2757 pdb_handle_exception(3, &regs, 1);
2758 svm_debug_restore_cpu_user_regs(&regs);
2759 #else
2760 if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) )
2761 domain_pause_for_debugger();
2762 else
2763 svm_inject_exception(v, TRAP_int3, 0, 0);
2764 #endif
2765 break;
2767 case VMEXIT_EXCEPTION_NM:
2768 svm_do_no_device_fault(vmcb);
2769 break;
2771 case VMEXIT_EXCEPTION_GP:
2772 /* This should probably not be trapped in the future */
2773 regs.error_code = vmcb->exitinfo1;
2774 svm_do_general_protection_fault(v, &regs);
2775 break;
2777 case VMEXIT_EXCEPTION_PF:
2779 unsigned long va;
2780 va = vmcb->exitinfo2;
2781 regs.error_code = vmcb->exitinfo1;
2782 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2783 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2784 (unsigned long)regs.eax, (unsigned long)regs.ebx,
2785 (unsigned long)regs.ecx, (unsigned long)regs.edx,
2786 (unsigned long)regs.esi, (unsigned long)regs.edi);
2788 v->arch.hvm_vcpu.mmio_op.inst_decoder_regs = &regs;
2790 //printk("PF1\n");
2791 if (!(error = svm_do_page_fault(va, &regs)))
2793 /* Inject #PG using Interruption-Information Fields */
2794 svm_inject_exception(v, TRAP_page_fault, 1, regs.error_code);
2796 v->arch.hvm_svm.cpu_cr2 = va;
2797 vmcb->cr2 = va;
2798 TRACE_3D(TRC_VMX_INT, v->domain->domain_id,
2799 VMEXIT_EXCEPTION_PF, va);
2801 break;
2804 case VMEXIT_EXCEPTION_DF:
2805 /* Debug info to hopefully help debug WHY the guest double-faulted. */
2806 svm_dump_vmcb(__func__, vmcb);
2807 svm_dump_regs(__func__, &regs);
2808 svm_dump_inst(svm_rip2pointer(vmcb));
2809 svm_inject_exception(v, TRAP_double_fault, 1, 0);
2810 break;
2812 case VMEXIT_INTR:
2813 raise_softirq(SCHEDULE_SOFTIRQ);
2814 break;
2817 case VMEXIT_INVD:
2818 svm_vmexit_do_invd(vmcb);
2819 break;
2821 case VMEXIT_GDTR_WRITE:
2822 printk("WRITE to GDTR\n");
2823 break;
2825 case VMEXIT_TASK_SWITCH:
2826 __hvm_bug(&regs);
2827 break;
2829 case VMEXIT_CPUID:
2830 svm_vmexit_do_cpuid(vmcb, regs.eax, &regs);
2831 break;
2833 case VMEXIT_HLT:
2834 svm_vmexit_do_hlt(vmcb);
2835 break;
2837 case VMEXIT_INVLPG:
2838 svm_handle_invlpg(0, &regs);
2839 break;
2841 case VMEXIT_INVLPGA:
2842 svm_handle_invlpg(1, &regs);
2843 break;
2845 case VMEXIT_VMMCALL:
2846 svm_do_vmmcall(v, &regs);
2847 break;
2849 case VMEXIT_CR0_READ:
2850 svm_cr_access(v, 0, TYPE_MOV_FROM_CR, &regs);
2851 break;
2853 case VMEXIT_CR2_READ:
2854 svm_cr_access(v, 2, TYPE_MOV_FROM_CR, &regs);
2855 break;
2857 case VMEXIT_CR3_READ:
2858 svm_cr_access(v, 3, TYPE_MOV_FROM_CR, &regs);
2859 break;
2861 case VMEXIT_CR4_READ:
2862 svm_cr_access(v, 4, TYPE_MOV_FROM_CR, &regs);
2863 break;
2865 case VMEXIT_CR8_READ:
2866 svm_cr_access(v, 8, TYPE_MOV_FROM_CR, &regs);
2867 break;
2869 case VMEXIT_CR0_WRITE:
2870 svm_cr_access(v, 0, TYPE_MOV_TO_CR, &regs);
2871 break;
2873 case VMEXIT_CR2_WRITE:
2874 svm_cr_access(v, 2, TYPE_MOV_TO_CR, &regs);
2875 break;
2877 case VMEXIT_CR3_WRITE:
2878 svm_cr_access(v, 3, TYPE_MOV_TO_CR, &regs);
2879 local_flush_tlb();
2880 break;
2882 case VMEXIT_CR4_WRITE:
2883 svm_cr_access(v, 4, TYPE_MOV_TO_CR, &regs);
2884 break;
2886 case VMEXIT_CR8_WRITE:
2887 svm_cr_access(v, 8, TYPE_MOV_TO_CR, &regs);
2888 break;
2890 case VMEXIT_DR0_READ:
2891 svm_dr_access(v, 0, TYPE_MOV_FROM_DR, &regs);
2892 break;
2894 case VMEXIT_DR1_READ:
2895 svm_dr_access(v, 1, TYPE_MOV_FROM_DR, &regs);
2896 break;
2898 case VMEXIT_DR2_READ:
2899 svm_dr_access(v, 2, TYPE_MOV_FROM_DR, &regs);
2900 break;
2902 case VMEXIT_DR3_READ:
2903 svm_dr_access(v, 3, TYPE_MOV_FROM_DR, &regs);
2904 break;
2906 case VMEXIT_DR6_READ:
2907 svm_dr_access(v, 6, TYPE_MOV_FROM_DR, &regs);
2908 break;
2910 case VMEXIT_DR7_READ:
2911 svm_dr_access(v, 7, TYPE_MOV_FROM_DR, &regs);
2912 break;
2914 case VMEXIT_DR0_WRITE:
2915 svm_dr_access(v, 0, TYPE_MOV_TO_DR, &regs);
2916 break;
2918 case VMEXIT_DR1_WRITE:
2919 svm_dr_access(v, 1, TYPE_MOV_TO_DR, &regs);
2920 break;
2922 case VMEXIT_DR2_WRITE:
2923 svm_dr_access(v, 2, TYPE_MOV_TO_DR, &regs);
2924 break;
2926 case VMEXIT_DR3_WRITE:
2927 svm_dr_access(v, 3, TYPE_MOV_TO_DR, &regs);
2928 break;
2930 case VMEXIT_DR6_WRITE:
2931 svm_dr_access(v, 6, TYPE_MOV_TO_DR, &regs);
2932 break;
2934 case VMEXIT_DR7_WRITE:
2935 svm_dr_access(v, 7, TYPE_MOV_TO_DR, &regs);
2936 break;
2938 case VMEXIT_IOIO:
2939 svm_io_instruction(v, &regs);
2940 break;
2942 case VMEXIT_MSR:
2943 svm_do_msr_access(v, &regs);
2944 break;
2946 case VMEXIT_SHUTDOWN:
2947 printk("Guest shutdown exit\n");
2948 domain_crash_synchronous();
2949 break;
2951 default:
2952 printk("unexpected VMEXIT: exit reason = 0x%x, exitinfo1 = %llx, "
2953 "exitinfo2 = %llx\n", exit_reason,
2954 (unsigned long long)vmcb->exitinfo1,
2955 (unsigned long long)vmcb->exitinfo2);
2956 __hvm_bug(&regs); /* should not happen */
2957 break;
2960 #ifdef SVM_EXTRA_DEBUG
2961 if (do_debug)
2963 printk("%s: Done switch on vmexit_code\n", __func__);
2964 svm_dump_regs(__func__, &regs);
2967 if (do_debug)
2969 printk("vmexit_handler():- guest_table = 0x%08x, "
2970 "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
2971 (int)v->arch.guest_table.pfn,
2972 (int)v->arch.monitor_table.pfn,
2973 (int)v->arch.shadow_table.pfn);
2974 printk("svm_vmexit_handler: Returning\n");
2976 #endif
2978 return;
2981 asmlinkage void svm_load_cr2(void)
2983 struct vcpu *v = current;
2985 local_irq_disable();
2986 asm volatile("mov %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
2989 asmlinkage void svm_asid(void)
2991 struct vcpu *v = current;
2992 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2994 /*
2995 * if need to assign new asid, or if switching cores,
2996 * retire asid for the old core, and assign a new asid to the current core.
2997 */
2998 if ( test_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ) ||
2999 ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) {
3000 /* recycle asid */
3001 if ( !asidpool_assign_next( vmcb, 1,
3002 v->arch.hvm_svm.asid_core, v->arch.hvm_svm.launch_core )) {
3003 /* If we get here, we have a major problem */
3004 domain_crash_synchronous();
3007 v->arch.hvm_svm.asid_core = v->arch.hvm_svm.launch_core;
3008 clear_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags );
3011 /* make sure the HSA is set for the current core */
3012 set_hsa_to_guest( &v->arch.hvm_svm );
3015 /*
3016 * Local variables:
3017 * mode: C
3018 * c-set-style: "BSD"
3019 * c-basic-offset: 4
3020 * tab-width: 4
3021 * indent-tabs-mode: nil
3022 * End:
3023 */