ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 10080:632ad28f2fd7

SVM patch to cleanup IOIO handling, do not use "real" mode but rather
the correct "bitness".

Signed-off-by: Tom Woller <thomas.woller@amd.com>
Signed-off-by: Mats Petersson <mats.petersson@amd.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed May 17 23:50:23 2006 +0100 (2006-05-17)
parents 0839d57c8864
children 3d85f350a66a
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005, AMD Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <asm/current.h>
29 #include <asm/io.h>
30 #include <asm/shadow.h>
31 #include <asm/regs.h>
32 #include <asm/cpufeature.h>
33 #include <asm/processor.h>
34 #include <asm/types.h>
35 #include <asm/msr.h>
36 #include <asm/spinlock.h>
37 #include <asm/hvm/hvm.h>
38 #include <asm/hvm/support.h>
39 #include <asm/hvm/io.h>
40 #include <asm/hvm/svm/svm.h>
41 #include <asm/hvm/svm/vmcb.h>
42 #include <asm/hvm/svm/emulate.h>
43 #include <asm/hvm/svm/vmmcall.h>
44 #include <asm/hvm/svm/intr.h>
45 #include <asm/shadow.h>
46 #if CONFIG_PAGING_LEVELS >= 3
47 #include <asm/shadow_64.h>
48 #endif
49 #include <public/sched.h>
50 #include <public/hvm/ioreq.h>
52 #define SVM_EXTRA_DEBUG
54 #ifdef TRACE_BUFFER
55 static unsigned long trace_values[NR_CPUS][4];
56 #define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
57 #else
58 #define TRACE_VMEXIT(index,value) ((void)0)
59 #endif
61 /* Useful define */
62 #define MAX_INST_SIZE 15
64 #define set_segment_register(name, value) \
65 __asm__ __volatile__ ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
67 /*
68 * External functions, etc. We should move these to some suitable header file(s) */
70 extern void do_nmi(struct cpu_user_regs *, unsigned long);
71 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
72 int inst_len);
73 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
74 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
75 unsigned long count, int size, long value, int dir, int pvalid);
76 extern int svm_instrlen(struct cpu_user_regs *regs, int mode);
77 extern void svm_dump_inst(unsigned long eip);
78 extern int svm_dbg_on;
79 void svm_manual_event_injection32(struct vcpu *v, struct cpu_user_regs *regs,
80 int vector, int has_code);
81 void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
83 static void svm_relinquish_guest_resources(struct domain *d);
85 /* Host save area */
86 struct host_save_area *host_save_area[ NR_CPUS ] = {0};
87 static struct asid_pool ASIDpool[NR_CPUS];
89 /*
90 * Initializes the POOL of ASID used by the guests per core.
91 */
92 void asidpool_init( int core )
93 {
94 int i;
95 ASIDpool[core].asid_lock = SPIN_LOCK_UNLOCKED;
96 spin_lock(&ASIDpool[core].asid_lock);
97 /* Host ASID is always in use */
98 ASIDpool[core].asid[INITIAL_ASID] = ASID_INUSE;
99 for( i=1; i<ASID_MAX; i++ )
100 {
101 ASIDpool[core].asid[i] = ASID_AVAILABLE;
102 }
103 spin_unlock(&ASIDpool[core].asid_lock);
104 }
107 /* internal function to get the next available ASID */
108 static int asidpool_fetch_next( struct vmcb_struct *vmcb, int core )
109 {
110 int i;
111 for( i = 1; i < ASID_MAX; i++ )
112 {
113 if( ASIDpool[core].asid[i] == ASID_AVAILABLE )
114 {
115 vmcb->guest_asid = i;
116 ASIDpool[core].asid[i] = ASID_INUSE;
117 return i;
118 }
119 }
120 return -1;
121 }
124 /*
125 * This functions assigns on the passed VMCB, the next
126 * available ASID number. If none are available, the
127 * TLB flush flag is set, and all retireds ASID
128 * are made available.
129 *
130 * Returns: 1 -- sucess;
131 * 0 -- failure -- no more ASID numbers
132 * available.
133 */
134 int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
135 int oldcore, int newcore )
136 {
137 int i;
138 int res = 1;
139 static unsigned long cnt=0;
141 spin_lock(&ASIDpool[oldcore].asid_lock);
142 if( retire_current && vmcb->guest_asid ) {
143 ASIDpool[oldcore].asid[ vmcb->guest_asid & (ASID_MAX-1) ] = ASID_RETIRED;
144 }
145 spin_unlock(&ASIDpool[oldcore].asid_lock);
146 spin_lock(&ASIDpool[newcore].asid_lock);
147 if( asidpool_fetch_next( vmcb, newcore ) < 0 ) {
148 if (svm_dbg_on)
149 printk( "SVM: tlb(%ld)\n", cnt++ );
150 /* FLUSH the TLB and all retired slots are made available */
151 vmcb->tlb_control = 1;
152 for( i = 1; i < ASID_MAX; i++ ) {
153 if( ASIDpool[newcore].asid[i] == ASID_RETIRED ) {
154 ASIDpool[newcore].asid[i] = ASID_AVAILABLE;
155 }
156 }
157 /* Get the First slot available */
158 res = asidpool_fetch_next( vmcb, newcore ) > 0;
159 }
160 spin_unlock(&ASIDpool[newcore].asid_lock);
161 return res;
162 }
164 void asidpool_retire( struct vmcb_struct *vmcb, int core )
165 {
166 spin_lock(&ASIDpool[core].asid_lock);
167 if( vmcb->guest_asid ) {
168 ASIDpool[core].asid[ vmcb->guest_asid & (ASID_MAX-1) ] = ASID_RETIRED;
169 }
170 spin_unlock(&ASIDpool[core].asid_lock);
171 }
173 static inline void svm_inject_exception(struct vmcb_struct *vmcb,
174 int trap, int ev, int error_code)
175 {
176 eventinj_t event;
178 event.bytes = 0;
179 event.fields.v = 1;
180 event.fields.type = EVENTTYPE_EXCEPTION;
181 event.fields.vector = trap;
182 event.fields.ev = ev;
183 event.fields.errorcode = error_code;
185 ASSERT(vmcb->eventinj.fields.v == 0);
187 vmcb->eventinj = event;
188 }
190 void stop_svm(void)
191 {
192 u32 eax, edx;
193 int cpu = smp_processor_id();
195 /* We turn off the EFER_SVME bit. */
196 rdmsr(MSR_EFER, eax, edx);
197 eax &= ~EFER_SVME;
198 wrmsr(MSR_EFER, eax, edx);
200 /* release the HSA */
201 free_host_save_area( host_save_area[ cpu ] );
202 host_save_area[ cpu ] = NULL;
204 printk("AMD SVM Extension is disabled.\n");
205 }
207 int svm_initialize_guest_resources(struct vcpu *v)
208 {
209 svm_final_setup_guest(v);
210 return 1;
211 }
213 static void svm_store_cpu_guest_regs(
214 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
215 {
216 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
218 if ( regs != NULL )
219 {
220 #if defined (__x86_64__)
221 regs->rip = vmcb->rip;
222 regs->rsp = vmcb->rsp;
223 regs->rflags = vmcb->rflags;
224 regs->cs = vmcb->cs.sel;
225 regs->ds = vmcb->ds.sel;
226 regs->es = vmcb->es.sel;
227 regs->ss = vmcb->ss.sel;
228 regs->gs = vmcb->gs.sel;
229 regs->fs = vmcb->fs.sel;
230 #elif defined (__i386__)
231 regs->eip = vmcb->rip;
232 regs->esp = vmcb->rsp;
233 regs->eflags = vmcb->rflags;
234 regs->cs = vmcb->cs.sel;
235 regs->ds = vmcb->ds.sel;
236 regs->es = vmcb->es.sel;
237 regs->ss = vmcb->ss.sel;
238 regs->gs = vmcb->gs.sel;
239 regs->fs = vmcb->fs.sel;
240 #endif
241 }
243 if ( crs != NULL )
244 {
245 crs[0] = vmcb->cr0;
246 crs[3] = vmcb->cr3;
247 crs[4] = vmcb->cr4;
248 }
249 }
251 static void svm_load_cpu_guest_regs(
252 struct vcpu *v, struct cpu_user_regs *regs)
253 {
254 svm_load_cpu_user_regs(v, regs);
255 }
257 #define IS_CANO_ADDRESS(add) 1
259 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
260 {
261 u64 msr_content = 0;
262 struct vcpu *vc = current;
263 // struct svm_msr_state *msr = &vc->arch.hvm_svm.msr_content;
264 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
266 switch (regs->ecx)
267 {
268 case MSR_EFER:
269 // msr_content = msr->msr_items[SVM_INDEX_MSR_EFER];
270 msr_content = vmcb->efer;
271 msr_content &= ~EFER_SVME;
272 break;
274 case MSR_FS_BASE:
275 msr_content = vmcb->fs.base;
276 break;
278 case MSR_GS_BASE:
279 msr_content = vmcb->gs.base;
280 break;
282 case MSR_SHADOW_GS_BASE:
283 msr_content = vmcb->kerngsbase;
284 break;
286 case MSR_STAR:
287 msr_content = vmcb->star;
288 break;
290 case MSR_LSTAR:
291 msr_content = vmcb->lstar;
292 break;
294 case MSR_CSTAR:
295 msr_content = vmcb->cstar;
296 break;
298 case MSR_SYSCALL_MASK:
299 msr_content = vmcb->sfmask;
300 break;
302 default:
303 return 0;
304 }
306 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n",
307 msr_content);
309 regs->eax = msr_content & 0xffffffff;
310 regs->edx = msr_content >> 32;
311 return 1;
312 }
314 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
315 {
316 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
317 struct vcpu *vc = current;
318 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
320 HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx "
321 "msr_content %"PRIx64"\n",
322 (unsigned long)regs->ecx, msr_content);
324 switch (regs->ecx)
325 {
326 case MSR_EFER:
327 #ifdef __x86_64__
328 /* offending reserved bit will cause #GP */
329 if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
330 {
331 printk("trying to set reserved bit in EFER\n");
332 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
333 return 0;
334 }
336 /* LME: 0 -> 1 */
337 if ( msr_content & EFER_LME &&
338 !test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state) )
339 {
340 if ( svm_paging_enabled(vc) ||
341 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
342 &vc->arch.hvm_svm.cpu_state) )
343 {
344 printk("trying to set LME bit when "
345 "in paging mode or PAE bit is not set\n");
346 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
347 return 0;
348 }
349 set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);
350 }
352 /* We have already recorded that we want LME, so it will be set
353 * next time CR0 gets updated. So we clear that bit and continue.
354 */
355 if ((msr_content ^ vmcb->efer) & EFER_LME)
356 msr_content &= ~EFER_LME;
357 /* No update for LME/LMA since it have no effect */
358 #endif
359 vmcb->efer = msr_content | EFER_SVME;
360 break;
362 case MSR_FS_BASE:
363 case MSR_GS_BASE:
364 if (!(SVM_LONG_GUEST(vc)))
365 domain_crash_synchronous();
367 if (!IS_CANO_ADDRESS(msr_content))
368 {
369 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
370 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
371 }
373 if (regs->ecx == MSR_FS_BASE)
374 vmcb->fs.base = msr_content;
375 else
376 vmcb->gs.base = msr_content;
377 break;
379 case MSR_SHADOW_GS_BASE:
380 vmcb->kerngsbase = msr_content;
381 break;
383 case MSR_STAR:
384 vmcb->star = msr_content;
385 break;
387 case MSR_LSTAR:
388 vmcb->lstar = msr_content;
389 break;
391 case MSR_CSTAR:
392 vmcb->cstar = msr_content;
393 break;
395 case MSR_SYSCALL_MASK:
396 vmcb->sfmask = msr_content;
397 break;
399 default:
400 return 0;
401 }
402 return 1;
403 }
405 int svm_realmode(struct vcpu *v)
406 {
407 unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
408 unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
410 return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
411 }
413 int svm_instruction_length(struct vcpu *v)
414 {
415 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
416 unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode;
417 /* check which operating mode the guest is running */
418 if( vmcb->efer & EFER_LMA )
419 mode = vmcb->cs.attributes.fields.l ? 8 : 4;
420 else
421 mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
422 return svm_instrlen(guest_cpu_user_regs(), mode);
423 }
425 unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
426 {
427 switch ( num )
428 {
429 case 0:
430 return v->arch.hvm_svm.cpu_shadow_cr0;
431 case 2:
432 return v->arch.hvm_svm.cpu_cr2;
433 case 3:
434 return v->arch.hvm_svm.cpu_cr3;
435 default:
436 BUG();
437 }
438 return 0; /* dummy */
439 }
441 int start_svm(void)
442 {
443 u32 eax, ecx, edx;
444 u32 phys_hsa_lo, phys_hsa_hi;
445 u64 phys_hsa;
446 int cpu = smp_processor_id();
448 /* Xen does not fill x86_capability words except 0. */
449 ecx = cpuid_ecx(0x80000001);
450 boot_cpu_data.x86_capability[5] = ecx;
452 if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
453 return 0;
455 rdmsr(MSR_EFER, eax, edx);
456 eax |= EFER_SVME;
457 wrmsr(MSR_EFER, eax, edx);
458 asidpool_init(smp_processor_id());
459 printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
461 /* Initialize the HSA for this core */
462 host_save_area[ cpu ] = alloc_host_save_area();
463 phys_hsa = (u64) virt_to_maddr( host_save_area[ cpu ] );
464 phys_hsa_lo = (u32) phys_hsa;
465 phys_hsa_hi = (u32) (phys_hsa >> 32);
466 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
468 /* Setup HVM interfaces */
469 hvm_funcs.disable = stop_svm;
471 hvm_funcs.initialize_guest_resources = svm_initialize_guest_resources;
472 hvm_funcs.relinquish_guest_resources = svm_relinquish_guest_resources;
474 hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
475 hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
477 hvm_funcs.realmode = svm_realmode;
478 hvm_funcs.paging_enabled = svm_paging_enabled;
479 hvm_funcs.instruction_length = svm_instruction_length;
480 hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
482 hvm_enabled = 1;
484 return 1;
485 }
487 int svm_dbg_on = 0;
489 static inline int svm_do_debugout(unsigned long exit_code)
490 {
491 int i;
493 static unsigned long counter = 0;
494 static unsigned long works[] =
495 {
496 VMEXIT_IOIO,
497 VMEXIT_HLT,
498 VMEXIT_CPUID,
499 VMEXIT_DR0_READ,
500 VMEXIT_DR1_READ,
501 VMEXIT_DR2_READ,
502 VMEXIT_DR3_READ,
503 VMEXIT_DR6_READ,
504 VMEXIT_DR7_READ,
505 VMEXIT_DR0_WRITE,
506 VMEXIT_DR1_WRITE,
507 VMEXIT_DR2_WRITE,
508 VMEXIT_DR3_WRITE,
509 VMEXIT_CR0_READ,
510 VMEXIT_CR0_WRITE,
511 VMEXIT_CR3_READ,
512 VMEXIT_CR4_READ,
513 VMEXIT_MSR,
514 VMEXIT_CR0_WRITE,
515 VMEXIT_CR3_WRITE,
516 VMEXIT_CR4_WRITE,
517 VMEXIT_EXCEPTION_PF,
518 VMEXIT_INTR,
519 VMEXIT_INVLPG,
520 VMEXIT_EXCEPTION_NM
521 };
524 #if 0
525 if (svm_dbg_on && exit_code != 0x7B)
526 return 1;
527 #endif
529 counter++;
531 #if 0
532 if ((exit_code == 0x4E
533 || exit_code == VMEXIT_CR0_READ
534 || exit_code == VMEXIT_CR0_WRITE)
535 && counter < 200000)
536 return 0;
538 if ((exit_code == 0x4E) && counter < 500000)
539 return 0;
540 #endif
542 for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
543 if (exit_code == works[i])
544 return 0;
546 return 1;
547 }
550 void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
551 {
552 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
554 ASSERT(vmcb);
556 ctxt->eax = vmcb->rax;
557 ctxt->ss = vmcb->ss.sel;
558 ctxt->esp = vmcb->rsp;
559 ctxt->eflags = vmcb->rflags;
560 ctxt->cs = vmcb->cs.sel;
561 ctxt->eip = vmcb->rip;
563 ctxt->gs = vmcb->gs.sel;
564 ctxt->fs = vmcb->fs.sel;
565 ctxt->es = vmcb->es.sel;
566 ctxt->ds = vmcb->ds.sel;
567 }
569 void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
570 {
571 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
573 regs->eip = vmcb->rip;
574 regs->esp = vmcb->rsp;
575 regs->eflags = vmcb->rflags;
576 regs->cs = vmcb->cs.sel;
577 regs->ds = vmcb->ds.sel;
578 regs->es = vmcb->es.sel;
579 regs->ss = vmcb->ss.sel;
580 regs->fs = vmcb->fs.sel;
581 regs->gs = vmcb->gs.sel;
582 }
584 /* XXX Use svm_load_cpu_guest_regs instead */
585 void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
586 {
587 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
588 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
590 /* Write the guest register value into VMCB */
591 vmcb->rax = regs->eax;
592 vmcb->ss.sel = regs->ss;
593 vmcb->rsp = regs->esp;
594 vmcb->rflags = regs->eflags;
595 vmcb->cs.sel = regs->cs;
596 vmcb->rip = regs->eip;
598 vmcb->ds.sel = regs->ds;
599 vmcb->es.sel = regs->es;
600 vmcb->fs.sel = regs->fs;
601 vmcb->gs.sel = regs->gs;
603 if (regs->eflags & EF_TF)
604 *intercepts |= EXCEPTION_BITMAP_DB;
605 else
606 *intercepts &= ~EXCEPTION_BITMAP_DB;
607 }
609 int svm_paging_enabled(struct vcpu *v)
610 {
611 unsigned long cr0;
613 cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
615 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
616 }
619 /* Make sure that xen intercepts any FP accesses from current */
620 void svm_stts(struct vcpu *v)
621 {
622 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
624 /* FPU state already dirty? Then no need to setup_fpu() lazily. */
625 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
626 return;
628 /*
629 * If the guest does not have TS enabled then we must cause and handle an
630 * exception on first use of the FPU. If the guest *does* have TS enabled
631 * then this is not necessary: no FPU activity can occur until the guest
632 * clears CR0.TS, and we will initialise the FPU when that happens.
633 */
634 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
635 {
636 v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
637 vmcb->cr0 |= X86_CR0_TS;
638 }
639 }
641 static void arch_svm_do_launch(struct vcpu *v)
642 {
643 cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
644 int error;
646 #if 0
647 if (svm_dbg_on)
648 printk("Do launch\n");
649 #endif
650 error = construct_vmcb(&v->arch.hvm_svm, regs);
651 if ( error < 0 )
652 {
653 if (v->vcpu_id == 0) {
654 printk("Failed to construct a new VMCB for BSP.\n");
655 } else {
656 printk("Failed to construct a new VMCB for AP %d\n", v->vcpu_id);
657 }
658 domain_crash_synchronous();
659 }
661 svm_do_launch(v);
662 #if 0
663 if (svm_dbg_on)
664 svm_dump_host_regs(__func__);
665 #endif
666 reset_stack_and_jump(svm_asm_do_launch);
667 }
669 static void svm_freeze_time(struct vcpu *v)
670 {
671 struct hvm_time_info *time_info = &v->domain->arch.hvm_domain.vpit.time_info;
673 if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) {
674 v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v);
675 time_info->count_advance += (NOW() - time_info->count_point);
676 stop_timer(&(time_info->pit_timer));
677 }
678 }
680 static void svm_ctxt_switch_from(struct vcpu *v)
681 {
682 svm_freeze_time(v);
683 }
685 static void svm_ctxt_switch_to(struct vcpu *v)
686 {
687 #if __x86_64__
688 /*
689 * This is required, because VMRUN does consistency check
690 * and some of the DOM0 selectors are pointing to
691 * invalid GDT locations, and cause AMD processors
692 * to shutdown.
693 */
694 set_segment_register(ds, 0);
695 set_segment_register(es, 0);
696 set_segment_register(ss, 0);
697 #endif
698 }
700 void svm_final_setup_guest(struct vcpu *v)
701 {
702 v->arch.schedule_tail = arch_svm_do_launch;
703 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
704 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
706 if (v == v->domain->vcpu[0])
707 {
708 struct domain *d = v->domain;
709 struct vcpu *vc;
711 /* Initialize monitor page table */
712 for_each_vcpu(d, vc)
713 vc->arch.monitor_table = mk_pagetable(0);
715 /*
716 * Required to do this once per domain
717 * TODO: add a seperate function to do these.
718 */
719 memset(&d->shared_info->evtchn_mask[0], 0xff,
720 sizeof(d->shared_info->evtchn_mask));
722 /*
723 * Put the domain in shadow mode even though we're going to be using
724 * the shared 1:1 page table initially. It shouldn't hurt
725 */
726 shadow_mode_enable(d,
727 SHM_enable|SHM_refcounts|
728 SHM_translate|SHM_external|SHM_wr_pt_pte);
729 }
730 }
733 static void svm_relinquish_guest_resources(struct domain *d)
734 {
735 extern void destroy_vmcb(struct arch_svm_struct *); /* XXX */
736 struct vcpu *v;
738 for_each_vcpu ( d, v )
739 {
740 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
741 continue;
743 destroy_vmcb(&v->arch.hvm_svm);
744 free_monitor_pagetable(v);
745 kill_timer(&v->arch.hvm_svm.hlt_timer);
746 if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) )
747 {
748 kill_timer( &(VLAPIC(v)->vlapic_timer) );
749 xfree(VLAPIC(v));
750 }
751 }
753 kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
755 if ( d->arch.hvm_domain.shared_page_va )
756 unmap_domain_page_global(
757 (void *)d->arch.hvm_domain.shared_page_va);
759 shadow_direct_map_clean(d);
760 }
763 void arch_svm_do_resume(struct vcpu *v)
764 {
765 /* pinning VCPU to a different core? */
766 if ( v->arch.hvm_svm.launch_core == smp_processor_id()) {
767 svm_do_resume( v );
768 reset_stack_and_jump( svm_asm_do_resume );
769 }
770 else {
771 printk("VCPU core pinned: %d to %d\n",
772 v->arch.hvm_svm.launch_core, smp_processor_id() );
773 v->arch.hvm_svm.launch_core = smp_processor_id();
774 svm_migrate_timers( v );
775 svm_do_resume( v );
776 reset_stack_and_jump( svm_asm_do_resume );
777 }
778 }
781 void svm_migrate_timers(struct vcpu *v)
782 {
783 struct hvm_time_info *time_info = &v->domain->arch.hvm_domain.vpit.time_info;
785 migrate_timer(&time_info->pit_timer, v->processor);
786 migrate_timer(&v->arch.hvm_svm.hlt_timer, v->processor);
787 if ( hvm_apic_support(v->domain) && VLAPIC( v ))
788 migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
789 }
792 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
793 {
794 struct vcpu *v = current;
795 unsigned long eip;
796 unsigned long gpa; /* FIXME: PAE */
797 int result;
798 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
800 ASSERT(vmcb);
802 //#if HVM_DEBUG
803 eip = vmcb->rip;
804 HVM_DBG_LOG(DBG_LEVEL_VMMU,
805 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
806 va, eip, (unsigned long)regs->error_code);
807 //#endif
809 if ( !svm_paging_enabled(v) )
810 {
811 if ( shadow_direct_map_fault(va, regs) )
812 return 1;
814 handle_mmio(va, va);
815 TRACE_VMEXIT(2,2);
816 return 1;
817 }
820 gpa = gva_to_gpa(va);
822 /* Use 1:1 page table to identify MMIO address space */
823 if (mmio_space(gpa))
824 {
825 /* No support for APIC */
826 if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
827 {
828 int inst_len;
829 inst_len = svm_instruction_length(v);
830 if (inst_len == -1)
831 {
832 printf("%s: INST_LEN - Unable to decode properly.\n", __func__);
833 domain_crash_synchronous();
834 }
836 __update_guest_eip(vmcb, inst_len);
838 return 1;
839 }
841 TRACE_VMEXIT (2,2);
842 handle_mmio(va, gpa);
844 return 1;
845 }
847 result = shadow_fault(va, regs);
849 if( result ) {
850 /* Let's make sure that the Guest TLB is flushed */
851 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
852 }
854 TRACE_VMEXIT (2,result);
856 return result;
857 }
860 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
861 {
862 struct vcpu *v = current;
864 setup_fpu(v);
865 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
867 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
868 vmcb->cr0 &= ~X86_CR0_TS;
869 }
872 static void svm_do_general_protection_fault(struct vcpu *v,
873 struct cpu_user_regs *regs)
874 {
875 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
876 unsigned long eip, error_code;
878 ASSERT(vmcb);
880 eip = vmcb->rip;
881 error_code = vmcb->exitinfo1;
883 if (vmcb->idtr.limit == 0) {
884 printf("Huh? We got a GP Fault with an invalid IDTR!\n");
885 svm_dump_vmcb(__func__, vmcb);
886 svm_dump_regs(__func__, regs);
887 svm_dump_inst(vmcb->rip);
888 __hvm_bug(regs);
889 }
891 HVM_DBG_LOG(DBG_LEVEL_1,
892 "svm_general_protection_fault: eip = %lx, erro_code = %lx",
893 eip, error_code);
895 HVM_DBG_LOG(DBG_LEVEL_1,
896 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
897 (unsigned long)regs->eax, (unsigned long)regs->ebx,
898 (unsigned long)regs->ecx, (unsigned long)regs->edx,
899 (unsigned long)regs->esi, (unsigned long)regs->edi);
902 /* Reflect it back into the guest */
903 svm_inject_exception(vmcb, TRAP_gp_fault, 1, error_code);
904 }
906 /* Reserved bits: [31:14], [12:1] */
907 #define SVM_VCPU_CPUID_L1_RESERVED 0xffffdffe
909 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input,
910 struct cpu_user_regs *regs)
911 {
912 unsigned int eax, ebx, ecx, edx;
913 unsigned long eip;
914 struct vcpu *v = current;
915 int inst_len;
917 ASSERT(vmcb);
919 eip = vmcb->rip;
921 HVM_DBG_LOG(DBG_LEVEL_1,
922 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
923 " (esi) %lx, (edi) %lx",
924 (unsigned long)regs->eax, (unsigned long)regs->ebx,
925 (unsigned long)regs->ecx, (unsigned long)regs->edx,
926 (unsigned long)regs->esi, (unsigned long)regs->edi);
928 cpuid(input, &eax, &ebx, &ecx, &edx);
930 if (input == 1)
931 {
932 if ( !hvm_apic_support(v->domain) ||
933 !vlapic_global_enabled((VLAPIC(v))) )
934 {
935 clear_bit(X86_FEATURE_APIC, &edx);
936 /* Since the apic is disabled, avoid any confusion about SMP cpus being available */
937 clear_bit(X86_FEATURE_HT, &edx); /* clear the hyperthread bit */
938 ebx &= 0xFF00FFFF; /* set the logical processor count to 1 */
939 ebx |= 0x00010000;
940 }
942 #if CONFIG_PAGING_LEVELS < 3
943 clear_bit(X86_FEATURE_NX, &edx);
944 clear_bit(X86_FEATURE_PAE, &edx);
945 clear_bit(X86_FEATURE_PSE, &edx);
946 clear_bit(X86_FEATURE_PSE36, &edx);
947 #else
948 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
949 {
950 if ( !v->domain->arch.hvm_domain.pae_enabled )
951 {
952 clear_bit(X86_FEATURE_PAE, &edx);
953 clear_bit(X86_FEATURE_NX, &edx);
954 }
955 clear_bit(X86_FEATURE_PSE, &edx);
956 clear_bit(X86_FEATURE_PSE36, &edx);
957 }
958 #endif
959 /* Clear out reserved bits. */
960 ecx &= ~SVM_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */
961 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
962 }
963 #ifdef __i386__
964 else if ( input == 0x80000001 )
965 {
966 /* Mask feature for Intel ia32e or AMD long mode. */
967 clear_bit(X86_FEATURE_LM & 31, &edx);
968 }
969 #endif
971 regs->eax = (unsigned long)eax;
972 regs->ebx = (unsigned long)ebx;
973 regs->ecx = (unsigned long)ecx;
974 regs->edx = (unsigned long)edx;
976 HVM_DBG_LOG(DBG_LEVEL_1,
977 "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
978 "ebx=%x, ecx=%x, edx=%x",
979 eip, input, eax, ebx, ecx, edx);
981 inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
982 ASSERT(inst_len > 0);
983 __update_guest_eip(vmcb, inst_len);
984 }
987 static inline unsigned long *get_reg_p(unsigned int gpreg,
988 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
989 {
990 unsigned long *reg_p = NULL;
991 switch (gpreg)
992 {
993 case SVM_REG_EAX:
994 reg_p = (unsigned long *)&regs->eax;
995 break;
996 case SVM_REG_EBX:
997 reg_p = (unsigned long *)&regs->ebx;
998 break;
999 case SVM_REG_ECX:
1000 reg_p = (unsigned long *)&regs->ecx;
1001 break;
1002 case SVM_REG_EDX:
1003 reg_p = (unsigned long *)&regs->edx;
1004 break;
1005 case SVM_REG_EDI:
1006 reg_p = (unsigned long *)&regs->edi;
1007 break;
1008 case SVM_REG_ESI:
1009 reg_p = (unsigned long *)&regs->esi;
1010 break;
1011 case SVM_REG_EBP:
1012 reg_p = (unsigned long *)&regs->ebp;
1013 break;
1014 case SVM_REG_ESP:
1015 reg_p = (unsigned long *)&vmcb->rsp;
1016 break;
1017 #if __x86_64__
1018 case SVM_REG_R8:
1019 reg_p = (unsigned long *)&regs->r8;
1020 break;
1021 case SVM_REG_R9:
1022 reg_p = (unsigned long *)&regs->r9;
1023 break;
1024 case SVM_REG_R10:
1025 reg_p = (unsigned long *)&regs->r10;
1026 break;
1027 case SVM_REG_R11:
1028 reg_p = (unsigned long *)&regs->r11;
1029 break;
1030 case SVM_REG_R12:
1031 reg_p = (unsigned long *)&regs->r12;
1032 break;
1033 case SVM_REG_R13:
1034 reg_p = (unsigned long *)&regs->r13;
1035 break;
1036 case SVM_REG_R14:
1037 reg_p = (unsigned long *)&regs->r14;
1038 break;
1039 case SVM_REG_R15:
1040 reg_p = (unsigned long *)&regs->r15;
1041 break;
1042 #endif
1043 default:
1044 BUG();
1047 return reg_p;
1051 static inline unsigned long get_reg(unsigned int gpreg,
1052 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1054 unsigned long *gp;
1055 gp = get_reg_p(gpreg, regs, vmcb);
1056 return *gp;
1060 static inline void set_reg(unsigned int gpreg, unsigned long value,
1061 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1063 unsigned long *gp;
1064 gp = get_reg_p(gpreg, regs, vmcb);
1065 *gp = value;
1069 static void svm_dr_access (struct vcpu *v, unsigned int reg, unsigned int type,
1070 struct cpu_user_regs *regs)
1072 unsigned long *reg_p = 0;
1073 unsigned int gpreg = 0;
1074 unsigned long eip;
1075 int inst_len;
1076 int index;
1077 struct vmcb_struct *vmcb;
1078 u8 buffer[MAX_INST_LEN];
1079 u8 prefix = 0;
1081 vmcb = v->arch.hvm_svm.vmcb;
1083 ASSERT(vmcb);
1085 eip = vmcb->rip;
1086 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1087 index = skip_prefix_bytes(buffer, sizeof(buffer));
1089 ASSERT(buffer[index+0] == 0x0f && (buffer[index+1] & 0xFD) == 0x21);
1091 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1092 prefix = buffer[index-1];
1094 gpreg = decode_src_reg(prefix, buffer[index + 2]);
1095 ASSERT(reg == decode_dest_reg(prefix, buffer[index + 2]));
1097 HVM_DBG_LOG(DBG_LEVEL_1, "svm_dr_access : eip=%lx, reg=%d, gpreg = %x",
1098 eip, reg, gpreg);
1100 reg_p = get_reg_p(gpreg, regs, vmcb);
1102 switch (type)
1104 case TYPE_MOV_TO_DR:
1105 inst_len = __get_instruction_length(vmcb, INSTR_MOV2DR, buffer);
1106 v->arch.guest_context.debugreg[reg] = *reg_p;
1107 break;
1108 case TYPE_MOV_FROM_DR:
1109 inst_len = __get_instruction_length(vmcb, INSTR_MOVDR2, buffer);
1110 *reg_p = v->arch.guest_context.debugreg[reg];
1111 break;
1112 default:
1113 __hvm_bug(regs);
1114 break;
1116 ASSERT(inst_len > 0);
1117 __update_guest_eip(vmcb, inst_len);
1121 static void svm_get_prefix_info(struct vmcb_struct *vmcb,
1122 unsigned int dir, segment_selector_t **seg, unsigned int *asize)
1124 unsigned char inst[MAX_INST_LEN];
1125 int i;
1127 memset(inst, 0, MAX_INST_LEN);
1128 if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
1129 != MAX_INST_LEN)
1131 printk("%s: get guest instruction failed\n", __func__);
1132 domain_crash_synchronous();
1135 for (i = 0; i < MAX_INST_LEN; i++)
1137 switch (inst[i])
1139 case 0xf3: /* REPZ */
1140 case 0xf2: /* REPNZ */
1141 case 0xf0: /* LOCK */
1142 case 0x66: /* data32 */
1143 #if __x86_64__
1144 /* REX prefixes */
1145 case 0x40:
1146 case 0x41:
1147 case 0x42:
1148 case 0x43:
1149 case 0x44:
1150 case 0x45:
1151 case 0x46:
1152 case 0x47:
1154 case 0x48:
1155 case 0x49:
1156 case 0x4a:
1157 case 0x4b:
1158 case 0x4c:
1159 case 0x4d:
1160 case 0x4e:
1161 case 0x4f:
1162 #endif
1163 continue;
1164 case 0x67: /* addr32 */
1165 *asize ^= 48; /* Switch 16/32 bits */
1166 continue;
1167 case 0x2e: /* CS */
1168 *seg = &vmcb->cs;
1169 continue;
1170 case 0x36: /* SS */
1171 *seg = &vmcb->ss;
1172 continue;
1173 case 0x26: /* ES */
1174 *seg = &vmcb->es;
1175 continue;
1176 case 0x64: /* FS */
1177 *seg = &vmcb->fs;
1178 continue;
1179 case 0x65: /* GS */
1180 *seg = &vmcb->gs;
1181 continue;
1182 case 0x3e: /* DS */
1183 *seg = &vmcb->ds;
1184 continue;
1185 default:
1186 break;
1188 return;
1193 /* Get the address of INS/OUTS instruction */
1194 static inline int svm_get_io_address(struct vmcb_struct *vmcb,
1195 struct cpu_user_regs *regs, unsigned int dir,
1196 unsigned long *count, unsigned long *addr)
1198 unsigned long reg;
1199 unsigned int asize = 0;
1200 unsigned int isize;
1201 int long_mode;
1202 ioio_info_t info;
1203 segment_selector_t *seg = NULL;
1205 info.bytes = vmcb->exitinfo1;
1207 /* If we're in long mode, we shouldn't check the segment presence and limit */
1208 long_mode = vmcb->cs.attributes.fields.l && vmcb->efer & EFER_LMA;
1210 /* d field of cs.attributes is 1 for 32-bit, 0 for 16 or 64 bit.
1211 * l field combined with EFER_LMA -> longmode says whether it's 16 or 64 bit.
1212 */
1213 asize = (long_mode)?64:((vmcb->cs.attributes.fields.db)?32:16);
1216 /* The ins/outs instructions are single byte, so if we have got more
1217 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1218 * to figure out what it is...
1219 */
1220 isize = vmcb->exitinfo2 - vmcb->rip;
1222 if (info.fields.rep)
1223 isize --;
1225 if (isize > 1)
1227 svm_get_prefix_info(vmcb, dir, &seg, &asize);
1230 ASSERT(dir == IOREQ_READ || dir == IOREQ_WRITE);
1232 if (dir == IOREQ_WRITE)
1234 reg = regs->esi;
1235 if (!seg) /* If no prefix, used DS. */
1236 seg = &vmcb->ds;
1238 else
1240 reg = regs->edi;
1241 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1244 /* If the segment isn't present, give GP fault! */
1245 if (!long_mode && !seg->attributes.fields.p)
1247 svm_inject_exception(vmcb, TRAP_gp_fault, 1, seg->sel);
1248 return 0;
1251 if (asize == 16)
1253 *addr = (reg & 0xFFFF);
1254 *count = regs->ecx & 0xffff;
1256 else
1258 *addr = reg;
1259 *count = regs->ecx;
1262 if (!long_mode) {
1263 if (*addr > seg->limit)
1265 svm_inject_exception(vmcb, TRAP_gp_fault, 1, seg->sel);
1266 return 0;
1268 else
1270 *addr += seg->base;
1275 return 1;
1279 static void svm_io_instruction(struct vcpu *v, struct cpu_user_regs *regs)
1281 struct mmio_op *mmio_opp;
1282 unsigned int port;
1283 unsigned int size, dir;
1284 ioio_info_t info;
1285 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1287 ASSERT(vmcb);
1288 mmio_opp = &current->arch.hvm_vcpu.mmio_op;
1289 mmio_opp->instr = INSTR_PIO;
1290 mmio_opp->flags = 0;
1292 info.bytes = vmcb->exitinfo1;
1294 port = info.fields.port; /* port used to be addr */
1295 dir = info.fields.type; /* direction */
1296 if (info.fields.sz32)
1297 size = 4;
1298 else if (info.fields.sz16)
1299 size = 2;
1300 else
1301 size = 1;
1303 HVM_DBG_LOG(DBG_LEVEL_IO,
1304 "svm_io_instruction: port 0x%x eip=%lx:%lx, "
1305 "exit_qualification = %lx",
1306 port, vmcb->cs.sel, vmcb->rip, (unsigned long)info.bytes);
1307 /* string instruction */
1308 if (info.fields.str)
1310 unsigned long addr, count;
1311 int sign = regs->eflags & EF_DF ? -1 : 1;
1313 if (!svm_get_io_address(vmcb, regs, dir, &count, &addr))
1315 /* We failed to get a valid address, so don't do the IO operation -
1316 * it would just get worse if we do! Hopefully the guest is handing
1317 * gp-faults...
1318 */
1319 return;
1322 /* "rep" prefix */
1323 if (info.fields.rep)
1325 mmio_opp->flags |= REPZ;
1327 else
1329 count = 1;
1332 /*
1333 * Handle string pio instructions that cross pages or that
1334 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1335 */
1336 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1338 unsigned long value = 0;
1340 mmio_opp->flags |= OVERLAP;
1342 if (dir == IOREQ_WRITE)
1343 hvm_copy(&value, addr, size, HVM_COPY_IN);
1345 send_pio_req(regs, port, 1, size, value, dir, 0);
1347 else
1349 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK))
1351 if (sign > 0)
1352 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1353 else
1354 count = (addr & ~PAGE_MASK) / size;
1356 else
1357 vmcb->rip = vmcb->exitinfo2;
1359 send_pio_req(regs, port, count, size, addr, dir, 1);
1362 else
1364 /*
1365 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1366 * ExitInfo2
1367 */
1368 vmcb->rip = vmcb->exitinfo2;
1370 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1371 hvm_print_line(v, regs->eax); /* guest debug output */
1373 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
1377 static int svm_set_cr0(unsigned long value)
1379 struct vcpu *v = current;
1380 unsigned long mfn;
1381 int paging_enabled;
1382 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1384 ASSERT(vmcb);
1386 /* We don't want to lose PG. ET is reserved and should be always be 1*/
1387 paging_enabled = svm_paging_enabled(v);
1388 value |= X86_CR0_ET;
1389 vmcb->cr0 = value | X86_CR0_PG;
1390 v->arch.hvm_svm.cpu_shadow_cr0 = value;
1392 /* TS cleared? Then initialise FPU now. */
1393 if ( !(value & X86_CR0_TS) )
1395 setup_fpu(v);
1396 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1399 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1401 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
1403 /* The guest CR3 must be pointing to the guest physical. */
1404 if (!VALID_MFN(mfn =
1405 get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))
1406 || !get_page(mfn_to_page(mfn), v->domain))
1408 printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3);
1409 domain_crash_synchronous(); /* need to take a clean path */
1412 #if defined(__x86_64__)
1413 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
1414 && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
1415 &v->arch.hvm_svm.cpu_state))
1417 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1418 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
1421 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
1423 /* Here the PAE is should to be opened */
1424 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1425 set_bit(SVM_CPU_STATE_LMA_ENABLED,
1426 &v->arch.hvm_svm.cpu_state);
1427 vmcb->efer |= (EFER_LMA | EFER_LME);
1428 if (!shadow_set_guest_paging_levels(v->domain, PAGING_L4) )
1430 printk("Unsupported guest paging levels\n");
1431 domain_crash_synchronous(); /* need to take a clean path */
1434 else
1435 #endif /* __x86_64__ */
1437 #if CONFIG_PAGING_LEVELS >= 3
1438 /* seems it's a 32-bit or 32-bit PAE guest */
1439 if ( test_bit(SVM_CPU_STATE_PAE_ENABLED,
1440 &v->arch.hvm_svm.cpu_state) )
1442 /* The guest enables PAE first and then it enables PG, it is
1443 * really a PAE guest */
1444 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1446 printk("Unsupported guest paging levels\n");
1447 domain_crash_synchronous();
1450 else
1452 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) )
1454 printk("Unsupported guest paging levels\n");
1455 domain_crash_synchronous(); /* need to take a clean path */
1458 #endif
1461 /* Now arch.guest_table points to machine physical. */
1462 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1463 update_pagetables(v);
1465 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1466 (unsigned long) (mfn << PAGE_SHIFT));
1468 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1469 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1471 /* arch->shadow_table should hold the next CR3 for shadow */
1472 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx\n",
1473 v->arch.hvm_svm.cpu_cr3, mfn);
1475 return 1;
1478 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1479 if ( v->arch.hvm_svm.cpu_cr3 ) {
1480 put_page(mfn_to_page(get_mfn_from_gpfn(
1481 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
1482 v->arch.guest_table = mk_pagetable(0);
1485 /*
1486 * SVM implements paged real-mode and when we return to real-mode
1487 * we revert back to the physical mappings that the domain builder
1488 * created.
1489 */
1490 if ((value & X86_CR0_PE) == 0) {
1491 if (value & X86_CR0_PG) {
1492 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
1493 return 0;
1496 clear_all_shadow_status( v->domain );
1497 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1498 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1500 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
1502 /* we should take care of this kind of situation */
1503 clear_all_shadow_status(v->domain);
1504 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1505 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1508 return 1;
1511 /*
1512 * Read from control registers. CR0 and CR4 are read from the shadow.
1513 */
1514 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1516 unsigned long value = 0;
1517 struct vcpu *v = current;
1518 struct vmcb_struct *vmcb;
1520 vmcb = v->arch.hvm_svm.vmcb;
1521 ASSERT(vmcb);
1523 switch (cr)
1525 case 0:
1526 value = v->arch.hvm_svm.cpu_shadow_cr0;
1527 if (svm_dbg_on)
1528 printk("CR0 read =%lx \n", value );
1529 break;
1530 case 2:
1531 value = vmcb->cr2;
1532 break;
1533 case 3:
1534 value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
1535 if (svm_dbg_on)
1536 printk("CR3 read =%lx \n", value );
1537 break;
1538 case 4:
1539 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
1540 if (svm_dbg_on)
1541 printk( "CR4 read=%lx\n", value );
1542 break;
1543 case 8:
1544 #if 0
1545 value = vmcb->m_cr8;
1546 #else
1547 ASSERT(0);
1548 #endif
1549 break;
1551 default:
1552 __hvm_bug(regs);
1555 set_reg(gp, value, regs, vmcb);
1557 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1561 static inline int svm_pgbit_test(struct vcpu *v)
1563 return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
1567 /*
1568 * Write to control registers
1569 */
1570 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1572 unsigned long value;
1573 unsigned long old_cr;
1574 struct vcpu *v = current;
1575 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1577 ASSERT(vmcb);
1579 value = get_reg(gpreg, regs, vmcb);
1581 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1582 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1584 switch (cr)
1586 case 0:
1587 if (svm_dbg_on)
1588 printk("CR0 write =%lx \n", value );
1589 return svm_set_cr0(value);
1591 case 3:
1593 unsigned long old_base_mfn, mfn;
1594 if (svm_dbg_on)
1595 printk("CR3 write =%lx \n", value );
1596 /* If paging is not enabled yet, simply copy the value to CR3. */
1597 if (!svm_paging_enabled(v)) {
1598 v->arch.hvm_svm.cpu_cr3 = value;
1599 break;
1601 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1603 /* We make a new one if the shadow does not exist. */
1604 if (value == v->arch.hvm_svm.cpu_cr3)
1606 /*
1607 * This is simple TLB flush, implying the guest has
1608 * removed some translation or changed page attributes.
1609 * We simply invalidate the shadow.
1610 */
1611 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1612 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1613 __hvm_bug(regs);
1614 shadow_sync_all(v->domain);
1616 else
1618 /*
1619 * If different, make a shadow. Check if the PDBR is valid
1620 * first.
1621 */
1622 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1623 if (((value >> PAGE_SHIFT) > v->domain->max_pages)
1624 || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT))
1625 || !get_page(mfn_to_page(mfn), v->domain))
1627 printk("Invalid CR3 value=%lx\n", value);
1628 domain_crash_synchronous(); /* need to take a clean path */
1631 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1632 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1634 if (old_base_mfn)
1635 put_page(mfn_to_page(old_base_mfn));
1637 /*
1638 * arch.shadow_table should now hold the next CR3 for shadow
1639 */
1640 #if CONFIG_PAGING_LEVELS >= 3
1641 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 )
1642 shadow_sync_all(v->domain);
1643 #endif
1644 v->arch.hvm_svm.cpu_cr3 = value;
1645 update_pagetables(v);
1646 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1647 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1649 break;
1652 case 4: /* CR4 */
1654 if (svm_dbg_on)
1655 printk( "write cr4=%lx, cr0=%lx\n",
1656 value, v->arch.hvm_svm.cpu_shadow_cr0 );
1657 old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
1658 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1660 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1661 if ( svm_pgbit_test(v) )
1663 /* The guest is a 32-bit PAE guest. */
1664 #if CONFIG_PAGING_LEVELS >= 4
1665 unsigned long mfn, old_base_mfn;
1667 if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1669 printk("Unsupported guest paging levels\n");
1670 domain_crash_synchronous(); /* need to take a clean path */
1673 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1674 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) ||
1675 !get_page(mfn_to_page(mfn), v->domain) )
1677 printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3);
1678 domain_crash_synchronous(); /* need to take a clean path */
1681 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1682 if ( old_base_mfn )
1683 put_page(mfn_to_page(old_base_mfn));
1685 /*
1686 * Now arch.guest_table points to machine physical.
1687 */
1689 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1690 update_pagetables(v);
1692 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1693 (unsigned long) (mfn << PAGE_SHIFT));
1695 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1697 /*
1698 * arch->shadow_table should hold the next CR3 for shadow
1699 */
1701 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1702 v->arch.hvm_svm.cpu_cr3, mfn);
1703 #endif
1705 else
1707 /* The guest is a 64 bit or 32-bit PAE guest. */
1708 #if CONFIG_PAGING_LEVELS >= 4
1709 if ( (v->domain->arch.ops != NULL) &&
1710 v->domain->arch.ops->guest_paging_levels == PAGING_L2)
1712 /* Seems the guest first enables PAE without enabling PG,
1713 * it must enable PG after that, and it is a 32-bit PAE
1714 * guest */
1716 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1718 printk("Unsupported guest paging levels\n");
1719 domain_crash_synchronous();
1722 else
1724 if ( !shadow_set_guest_paging_levels(v->domain,
1725 PAGING_L4) )
1727 printk("Unsupported guest paging levels\n");
1728 domain_crash_synchronous();
1731 #endif
1734 else if (value & X86_CR4_PAE) {
1735 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1736 } else {
1737 if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
1738 &v->arch.hvm_svm.cpu_state)) {
1739 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
1741 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1744 v->arch.hvm_svm.cpu_shadow_cr4 = value;
1745 vmcb->cr4 = value | SVM_CR4_HOST_MASK;
1747 /*
1748 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1749 * all TLB entries except global entries.
1750 */
1751 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
1753 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1754 shadow_sync_all(v->domain);
1756 break;
1759 default:
1760 printk("invalid cr: %d\n", cr);
1761 __hvm_bug(regs);
1764 return 1;
1768 #define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
1771 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
1772 struct cpu_user_regs *regs)
1774 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1775 int inst_len = 0;
1776 int index;
1777 unsigned int gpreg;
1778 unsigned long value;
1779 u8 buffer[MAX_INST_LEN];
1780 u8 prefix = 0;
1781 int result = 1;
1782 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1783 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1784 enum instruction_index match;
1786 ASSERT(vmcb);
1788 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1789 /* get index to first actual instruction byte - as we will need to know where the
1790 * prefix lives later on
1791 */
1792 index = skip_prefix_bytes(buffer, sizeof(buffer));
1794 if (type == TYPE_MOV_TO_CR)
1796 inst_len = __get_instruction_length_from_list(vmcb, list_a,
1797 ARR_SIZE(list_a), &buffer[index], &match);
1799 else
1801 inst_len = __get_instruction_length_from_list(vmcb, list_b,
1802 ARR_SIZE(list_b), &buffer[index], &match);
1805 ASSERT(inst_len > 0);
1807 inst_len += index;
1809 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1810 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1811 prefix = buffer[index-1];
1813 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
1815 switch (match)
1817 case INSTR_MOV2CR:
1818 gpreg = decode_src_reg(prefix, buffer[index+2]);
1819 result = mov_to_cr(gpreg, cr, regs);
1820 break;
1822 case INSTR_MOVCR2:
1823 gpreg = decode_src_reg(prefix, buffer[index+2]);
1824 mov_from_cr(cr, gpreg, regs);
1825 break;
1827 case INSTR_CLTS:
1828 /* TS being cleared means that it's time to restore fpu state. */
1829 setup_fpu(current);
1830 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1831 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1832 v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
1833 break;
1835 case INSTR_LMSW:
1836 if (svm_dbg_on)
1837 svm_dump_inst(svm_rip2pointer(vmcb));
1839 gpreg = decode_src_reg(prefix, buffer[index+2]);
1840 value = get_reg(gpreg, regs, vmcb) & 0xF;
1842 if (svm_dbg_on)
1843 printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1844 inst_len);
1846 value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
1848 if (svm_dbg_on)
1849 printk("CR0-LMSW CR0 - New value=%lx\n", value);
1851 result = svm_set_cr0(value);
1852 break;
1854 case INSTR_SMSW:
1855 svm_dump_inst(svm_rip2pointer(vmcb));
1856 value = v->arch.hvm_svm.cpu_shadow_cr0;
1857 gpreg = decode_src_reg(prefix, buffer[index+2]);
1858 set_reg(gpreg, value, regs, vmcb);
1860 if (svm_dbg_on)
1861 printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1862 inst_len);
1863 break;
1865 default:
1866 __hvm_bug(regs);
1867 break;
1870 ASSERT(inst_len);
1872 __update_guest_eip(vmcb, inst_len);
1874 return result;
1877 static inline void svm_do_msr_access(struct vcpu *v, struct cpu_user_regs *regs)
1879 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1880 int inst_len;
1881 u64 msr_content=0;
1883 ASSERT(vmcb);
1885 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, "
1886 "exitinfo = %lx", (unsigned long)regs->ecx,
1887 (unsigned long)regs->eax, (unsigned long)regs->edx,
1888 (unsigned long)vmcb->exitinfo1);
1890 /* is it a read? */
1891 if (vmcb->exitinfo1 == 0)
1893 inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
1895 regs->edx = 0;
1896 switch (regs->ecx) {
1897 case MSR_IA32_TIME_STAMP_COUNTER:
1899 struct hvm_time_info *time_info;
1901 rdtscll(msr_content);
1902 time_info = &v->domain->arch.hvm_domain.vpit.time_info;
1903 msr_content += time_info->cache_tsc_offset;
1904 break;
1906 case MSR_IA32_SYSENTER_CS:
1907 msr_content = vmcb->sysenter_cs;
1908 break;
1909 case MSR_IA32_SYSENTER_ESP:
1910 msr_content = vmcb->sysenter_esp;
1911 break;
1912 case MSR_IA32_SYSENTER_EIP:
1913 msr_content = vmcb->sysenter_eip;
1914 break;
1915 case MSR_IA32_APICBASE:
1916 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
1917 break;
1918 default:
1919 if (long_mode_do_msr_read(regs))
1920 goto done;
1921 rdmsr_safe(regs->ecx, regs->eax, regs->edx);
1922 break;
1924 regs->eax = msr_content & 0xFFFFFFFF;
1925 regs->edx = msr_content >> 32;
1927 else
1929 inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
1930 msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
1932 switch (regs->ecx)
1934 case MSR_IA32_TIME_STAMP_COUNTER:
1935 svm_set_guest_time(v, msr_content);
1936 break;
1937 case MSR_IA32_SYSENTER_CS:
1938 vmcb->sysenter_cs = msr_content;
1939 break;
1940 case MSR_IA32_SYSENTER_ESP:
1941 vmcb->sysenter_esp = msr_content;
1942 break;
1943 case MSR_IA32_SYSENTER_EIP:
1944 vmcb->sysenter_eip = msr_content;
1945 break;
1946 case MSR_IA32_APICBASE:
1947 vlapic_msr_set(VLAPIC(v), msr_content);
1948 break;
1949 default:
1950 long_mode_do_msr_write(regs);
1951 break;
1955 done:
1957 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: "
1958 "ecx=%lx, eax=%lx, edx=%lx",
1959 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1960 (unsigned long)regs->edx);
1962 __update_guest_eip(vmcb, inst_len);
1966 /*
1967 * Need to use this exit to reschedule
1968 */
1969 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
1971 struct vcpu *v = current;
1972 struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
1973 s_time_t next_pit = -1, next_wakeup;
1975 __update_guest_eip(vmcb, 1);
1977 /* check for interrupt not handled or new interrupt */
1978 if ( vmcb->vintr.fields.irq || cpu_has_pending_irq(v) )
1979 return;
1981 if ( !v->vcpu_id )
1982 next_pit = get_pit_scheduled(v, vpit);
1983 next_wakeup = get_apictime_scheduled(v);
1984 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
1985 next_wakeup = next_pit;
1986 if ( next_wakeup != - 1 )
1987 set_timer(&current->arch.hvm_svm.hlt_timer, next_wakeup);
1988 hvm_safe_block();
1992 static inline void svm_vmexit_do_mwait(void)
1997 #ifdef XEN_DEBUGGER
1998 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb,
1999 struct cpu_user_regs *regs)
2001 regs->eip = vmcb->rip;
2002 regs->esp = vmcb->rsp;
2003 regs->eflags = vmcb->rflags;
2005 regs->xcs = vmcb->cs.sel;
2006 regs->xds = vmcb->ds.sel;
2007 regs->xes = vmcb->es.sel;
2008 regs->xfs = vmcb->fs.sel;
2009 regs->xgs = vmcb->gs.sel;
2010 regs->xss = vmcb->ss.sel;
2014 static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
2016 vmcb->ss.sel = regs->xss;
2017 vmcb->rsp = regs->esp;
2018 vmcb->rflags = regs->eflags;
2019 vmcb->cs.sel = regs->xcs;
2020 vmcb->rip = regs->eip;
2022 vmcb->gs.sel = regs->xgs;
2023 vmcb->fs.sel = regs->xfs;
2024 vmcb->es.sel = regs->xes;
2025 vmcb->ds.sel = regs->xds;
2027 #endif
2030 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
2032 struct vcpu *v = current;
2033 u8 opcode[MAX_INST_SIZE], prefix, length = MAX_INST_SIZE;
2034 unsigned long g_vaddr;
2035 int inst_len;
2036 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2038 ASSERT(vmcb);
2039 /*
2040 * Unknown how many bytes the invlpg instruction will take. Use the
2041 * maximum instruction length here
2042 */
2043 if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
2045 printk("svm_handle_invlpg (): Error reading memory %d bytes\n", length);
2046 __hvm_bug(regs);
2049 if (invlpga)
2051 inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
2052 ASSERT(inst_len > 0);
2053 __update_guest_eip(vmcb, inst_len);
2055 /*
2056 * The address is implicit on this instruction At the moment, we don't
2057 * use ecx (ASID) to identify individual guests pages
2058 */
2059 g_vaddr = regs->eax;
2061 else
2063 /* What about multiple prefix codes? */
2064 prefix = (is_prefix(opcode[0])?opcode[0]:0);
2065 inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
2066 ASSERT(inst_len > 0);
2068 inst_len--;
2069 length -= inst_len;
2071 /*
2072 * Decode memory operand of the instruction including ModRM, SIB, and
2073 * displacement to get effecticve address and length in bytes. Assume
2074 * the system in either 32- or 64-bit mode.
2075 */
2076 g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix,
2077 &opcode[inst_len], &length);
2079 inst_len += length;
2080 __update_guest_eip (vmcb, inst_len);
2083 /* Overkill, we may not this */
2084 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
2085 shadow_invlpg(v, g_vaddr);
2089 /*
2090 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2091 * 16-bit realmode. Basically, this mimics a processor reset.
2093 * returns 0 on success, non-zero otherwise
2094 */
2095 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
2096 struct cpu_user_regs *regs)
2098 struct vmcb_struct *vmcb;
2100 ASSERT(v);
2101 ASSERT(regs);
2103 vmcb = v->arch.hvm_svm.vmcb;
2105 ASSERT(vmcb);
2107 /* clear the vmcb and user regs */
2108 memset(regs, 0, sizeof(struct cpu_user_regs));
2110 /* VMCB Control */
2111 vmcb->tsc_offset = 0;
2113 /* VMCB State */
2114 vmcb->cr0 = X86_CR0_ET | X86_CR0_PG;
2115 v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
2117 vmcb->cr2 = 0;
2118 vmcb->efer = EFER_SVME;
2120 vmcb->cr4 = SVM_CR4_HOST_MASK;
2121 v->arch.hvm_svm.cpu_shadow_cr4 = 0;
2122 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
2124 /* This will jump to ROMBIOS */
2125 vmcb->rip = 0xFFF0;
2127 /* setup the segment registers and all their hidden states */
2128 vmcb->cs.sel = 0xF000;
2129 vmcb->cs.attributes.bytes = 0x089b;
2130 vmcb->cs.limit = 0xffff;
2131 vmcb->cs.base = 0x000F0000;
2133 vmcb->ss.sel = 0x00;
2134 vmcb->ss.attributes.bytes = 0x0893;
2135 vmcb->ss.limit = 0xffff;
2136 vmcb->ss.base = 0x00;
2138 vmcb->ds.sel = 0x00;
2139 vmcb->ds.attributes.bytes = 0x0893;
2140 vmcb->ds.limit = 0xffff;
2141 vmcb->ds.base = 0x00;
2143 vmcb->es.sel = 0x00;
2144 vmcb->es.attributes.bytes = 0x0893;
2145 vmcb->es.limit = 0xffff;
2146 vmcb->es.base = 0x00;
2148 vmcb->fs.sel = 0x00;
2149 vmcb->fs.attributes.bytes = 0x0893;
2150 vmcb->fs.limit = 0xffff;
2151 vmcb->fs.base = 0x00;
2153 vmcb->gs.sel = 0x00;
2154 vmcb->gs.attributes.bytes = 0x0893;
2155 vmcb->gs.limit = 0xffff;
2156 vmcb->gs.base = 0x00;
2158 vmcb->ldtr.sel = 0x00;
2159 vmcb->ldtr.attributes.bytes = 0x0000;
2160 vmcb->ldtr.limit = 0x0;
2161 vmcb->ldtr.base = 0x00;
2163 vmcb->gdtr.sel = 0x00;
2164 vmcb->gdtr.attributes.bytes = 0x0000;
2165 vmcb->gdtr.limit = 0x0;
2166 vmcb->gdtr.base = 0x00;
2168 vmcb->tr.sel = 0;
2169 vmcb->tr.attributes.bytes = 0;
2170 vmcb->tr.limit = 0x0;
2171 vmcb->tr.base = 0;
2173 vmcb->idtr.sel = 0x00;
2174 vmcb->idtr.attributes.bytes = 0x0000;
2175 vmcb->idtr.limit = 0x3ff;
2176 vmcb->idtr.base = 0x00;
2178 vmcb->rax = 0;
2179 vmcb->rsp = 0;
2181 return 0;
2185 /*
2186 * svm_do_vmmcall - SVM VMMCALL handler
2188 * returns 0 on success, non-zero otherwise
2189 */
2190 static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
2192 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2193 int inst_len;
2195 ASSERT(vmcb);
2196 ASSERT(regs);
2198 inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
2199 ASSERT(inst_len > 0);
2201 /* VMMCALL sanity check */
2202 if (vmcb->cpl > get_vmmcall_cpl(regs->edi))
2204 printf("VMMCALL CPL check failed\n");
2205 return -1;
2208 /* handle the request */
2209 switch (regs->edi)
2211 case VMMCALL_RESET_TO_REALMODE:
2212 if (svm_do_vmmcall_reset_to_realmode(v, regs))
2214 printf("svm_do_vmmcall_reset_to_realmode() failed\n");
2215 return -1;
2218 /* since we just reset the VMCB, return without adjusting the eip */
2219 return 0;
2220 case VMMCALL_DEBUG:
2221 printf("DEBUG features not implemented yet\n");
2222 break;
2223 default:
2224 break;
2227 hvm_print_line(v, regs->eax); /* provides the current domain */
2229 __update_guest_eip(vmcb, inst_len);
2230 return 0;
2234 void svm_dump_inst(unsigned long eip)
2236 u8 opcode[256];
2237 unsigned long ptr;
2238 int len;
2239 int i;
2241 ptr = eip & ~0xff;
2242 len = 0;
2244 if (hvm_copy(opcode, ptr, sizeof(opcode), HVM_COPY_IN))
2245 len = sizeof(opcode);
2247 printf("Code bytes around(len=%d) %lx:", len, eip);
2248 for (i = 0; i < len; i++)
2250 if ((i & 0x0f) == 0)
2251 printf("\n%08lx:", ptr+i);
2253 printf("%02x ", opcode[i]);
2256 printf("\n");
2260 void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
2262 struct vcpu *v = current;
2263 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2264 unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
2266 printf("%s: guest registers from %s:\n", __func__, from);
2267 #if defined (__x86_64__)
2268 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
2269 regs->rax, regs->rbx, regs->rcx);
2270 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
2271 regs->rdx, regs->rsi, regs->rdi);
2272 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
2273 regs->rbp, regs->rsp, regs->r8);
2274 printk("r9: %016lx r10: %016lx r11: %016lx\n",
2275 regs->r9, regs->r10, regs->r11);
2276 printk("r12: %016lx r13: %016lx r14: %016lx\n",
2277 regs->r12, regs->r13, regs->r14);
2278 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
2279 regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
2280 #else
2281 printf("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n",
2282 regs->eax, regs->ebx, regs->ecx, regs->edx);
2283 printf("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n",
2284 regs->edi, regs->esi, regs->ebp, regs->esp);
2285 printf("%s: guest cr0: %lx\n", __func__,
2286 v->arch.hvm_svm.cpu_shadow_cr0);
2287 printf("guest CR3 = %llx\n", vmcb->cr3);
2288 #endif
2289 printf("%s: pt = %lx\n", __func__, pt);
2293 void svm_dump_host_regs(const char *from)
2295 struct vcpu *v = current;
2296 unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
2297 unsigned long cr3, cr0;
2298 printf("Host registers at %s\n", from);
2300 __asm__ __volatile__ ("\tmov %%cr0,%0\n"
2301 "\tmov %%cr3,%1\n"
2302 : "=r" (cr0), "=r"(cr3));
2303 printf("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
2306 #ifdef SVM_EXTRA_DEBUG
2307 static char *exit_reasons[] = {
2308 [VMEXIT_CR0_READ] = "CR0_READ",
2309 [VMEXIT_CR1_READ] = "CR1_READ",
2310 [VMEXIT_CR2_READ] = "CR2_READ",
2311 [VMEXIT_CR3_READ] = "CR3_READ",
2312 [VMEXIT_CR4_READ] = "CR4_READ",
2313 [VMEXIT_CR5_READ] = "CR5_READ",
2314 [VMEXIT_CR6_READ] = "CR6_READ",
2315 [VMEXIT_CR7_READ] = "CR7_READ",
2316 [VMEXIT_CR8_READ] = "CR8_READ",
2317 [VMEXIT_CR9_READ] = "CR9_READ",
2318 [VMEXIT_CR10_READ] = "CR10_READ",
2319 [VMEXIT_CR11_READ] = "CR11_READ",
2320 [VMEXIT_CR12_READ] = "CR12_READ",
2321 [VMEXIT_CR13_READ] = "CR13_READ",
2322 [VMEXIT_CR14_READ] = "CR14_READ",
2323 [VMEXIT_CR15_READ] = "CR15_READ",
2324 [VMEXIT_CR0_WRITE] = "CR0_WRITE",
2325 [VMEXIT_CR1_WRITE] = "CR1_WRITE",
2326 [VMEXIT_CR2_WRITE] = "CR2_WRITE",
2327 [VMEXIT_CR3_WRITE] = "CR3_WRITE",
2328 [VMEXIT_CR4_WRITE] = "CR4_WRITE",
2329 [VMEXIT_CR5_WRITE] = "CR5_WRITE",
2330 [VMEXIT_CR6_WRITE] = "CR6_WRITE",
2331 [VMEXIT_CR7_WRITE] = "CR7_WRITE",
2332 [VMEXIT_CR8_WRITE] = "CR8_WRITE",
2333 [VMEXIT_CR9_WRITE] = "CR9_WRITE",
2334 [VMEXIT_CR10_WRITE] = "CR10_WRITE",
2335 [VMEXIT_CR11_WRITE] = "CR11_WRITE",
2336 [VMEXIT_CR12_WRITE] = "CR12_WRITE",
2337 [VMEXIT_CR13_WRITE] = "CR13_WRITE",
2338 [VMEXIT_CR14_WRITE] = "CR14_WRITE",
2339 [VMEXIT_CR15_WRITE] = "CR15_WRITE",
2340 [VMEXIT_DR0_READ] = "DR0_READ",
2341 [VMEXIT_DR1_READ] = "DR1_READ",
2342 [VMEXIT_DR2_READ] = "DR2_READ",
2343 [VMEXIT_DR3_READ] = "DR3_READ",
2344 [VMEXIT_DR4_READ] = "DR4_READ",
2345 [VMEXIT_DR5_READ] = "DR5_READ",
2346 [VMEXIT_DR6_READ] = "DR6_READ",
2347 [VMEXIT_DR7_READ] = "DR7_READ",
2348 [VMEXIT_DR8_READ] = "DR8_READ",
2349 [VMEXIT_DR9_READ] = "DR9_READ",
2350 [VMEXIT_DR10_READ] = "DR10_READ",
2351 [VMEXIT_DR11_READ] = "DR11_READ",
2352 [VMEXIT_DR12_READ] = "DR12_READ",
2353 [VMEXIT_DR13_READ] = "DR13_READ",
2354 [VMEXIT_DR14_READ] = "DR14_READ",
2355 [VMEXIT_DR15_READ] = "DR15_READ",
2356 [VMEXIT_DR0_WRITE] = "DR0_WRITE",
2357 [VMEXIT_DR1_WRITE] = "DR1_WRITE",
2358 [VMEXIT_DR2_WRITE] = "DR2_WRITE",
2359 [VMEXIT_DR3_WRITE] = "DR3_WRITE",
2360 [VMEXIT_DR4_WRITE] = "DR4_WRITE",
2361 [VMEXIT_DR5_WRITE] = "DR5_WRITE",
2362 [VMEXIT_DR6_WRITE] = "DR6_WRITE",
2363 [VMEXIT_DR7_WRITE] = "DR7_WRITE",
2364 [VMEXIT_DR8_WRITE] = "DR8_WRITE",
2365 [VMEXIT_DR9_WRITE] = "DR9_WRITE",
2366 [VMEXIT_DR10_WRITE] = "DR10_WRITE",
2367 [VMEXIT_DR11_WRITE] = "DR11_WRITE",
2368 [VMEXIT_DR12_WRITE] = "DR12_WRITE",
2369 [VMEXIT_DR13_WRITE] = "DR13_WRITE",
2370 [VMEXIT_DR14_WRITE] = "DR14_WRITE",
2371 [VMEXIT_DR15_WRITE] = "DR15_WRITE",
2372 [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
2373 [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
2374 [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
2375 [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
2376 [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
2377 [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
2378 [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
2379 [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
2380 [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
2381 [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
2382 [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
2383 [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
2384 [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
2385 [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
2386 [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
2387 [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
2388 [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
2389 [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
2390 [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
2391 [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
2392 [VMEXIT_INTR] = "INTR",
2393 [VMEXIT_NMI] = "NMI",
2394 [VMEXIT_SMI] = "SMI",
2395 [VMEXIT_INIT] = "INIT",
2396 [VMEXIT_VINTR] = "VINTR",
2397 [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
2398 [VMEXIT_IDTR_READ] = "IDTR_READ",
2399 [VMEXIT_GDTR_READ] = "GDTR_READ",
2400 [VMEXIT_LDTR_READ] = "LDTR_READ",
2401 [VMEXIT_TR_READ] = "TR_READ",
2402 [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
2403 [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
2404 [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
2405 [VMEXIT_TR_WRITE] = "TR_WRITE",
2406 [VMEXIT_RDTSC] = "RDTSC",
2407 [VMEXIT_RDPMC] = "RDPMC",
2408 [VMEXIT_PUSHF] = "PUSHF",
2409 [VMEXIT_POPF] = "POPF",
2410 [VMEXIT_CPUID] = "CPUID",
2411 [VMEXIT_RSM] = "RSM",
2412 [VMEXIT_IRET] = "IRET",
2413 [VMEXIT_SWINT] = "SWINT",
2414 [VMEXIT_INVD] = "INVD",
2415 [VMEXIT_PAUSE] = "PAUSE",
2416 [VMEXIT_HLT] = "HLT",
2417 [VMEXIT_INVLPG] = "INVLPG",
2418 [VMEXIT_INVLPGA] = "INVLPGA",
2419 [VMEXIT_IOIO] = "IOIO",
2420 [VMEXIT_MSR] = "MSR",
2421 [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
2422 [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
2423 [VMEXIT_SHUTDOWN] = "SHUTDOWN",
2424 [VMEXIT_VMRUN] = "VMRUN",
2425 [VMEXIT_VMMCALL] = "VMMCALL",
2426 [VMEXIT_VMLOAD] = "VMLOAD",
2427 [VMEXIT_VMSAVE] = "VMSAVE",
2428 [VMEXIT_STGI] = "STGI",
2429 [VMEXIT_CLGI] = "CLGI",
2430 [VMEXIT_SKINIT] = "SKINIT",
2431 [VMEXIT_RDTSCP] = "RDTSCP",
2432 [VMEXIT_ICEBP] = "ICEBP",
2433 [VMEXIT_NPF] = "NPF"
2434 };
2435 #endif /* SVM_EXTRA_DEBUG */
2437 #ifdef SVM_WALK_GUEST_PAGES
2438 void walk_shadow_and_guest_pt(unsigned long gva)
2440 l2_pgentry_t gpde;
2441 l2_pgentry_t spde;
2442 l1_pgentry_t gpte;
2443 l1_pgentry_t spte;
2444 struct vcpu *v = current;
2445 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2446 unsigned long gpa;
2448 gpa = gva_to_gpa( gva );
2449 printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
2450 if( !svm_paging_enabled(v) || mmio_space(gpa) )
2451 return;
2453 /* let's dump the guest and shadow page info */
2455 __guest_get_l2e(v, gva, &gpde);
2456 printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
2457 __shadow_get_l2e( v, gva, &spde );
2458 printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
2460 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2461 return;
2463 spte = l1e_empty();
2465 // This is actually overkill - we only need to make sure the hl2 is in-sync.
2466 shadow_sync_va(v, gva);
2468 gpte.l1 = 0;
2469 __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ], sizeof(gpte) );
2470 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
2471 __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
2472 sizeof(spte) );
2473 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
2475 #endif /* SVM_WALK_GUEST_PAGES */
2477 asmlinkage void svm_vmexit_handler(struct cpu_user_regs regs)
2479 unsigned int exit_reason;
2480 unsigned long eip;
2481 struct vcpu *v = current;
2482 int error;
2483 int do_debug = 0;
2484 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2486 ASSERT(vmcb);
2488 exit_reason = vmcb->exitcode;
2489 save_svm_cpu_user_regs(v, &regs);
2490 v->arch.hvm_svm.injecting_event = 0;
2492 vmcb->tlb_control = 1;
2494 #ifdef SVM_EXTRA_DEBUG
2496 #if defined(__i386__)
2497 #define rip eip
2498 #endif
2500 static unsigned long intercepts_counter = 0;
2502 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF)
2504 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2506 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx, gpa=%llx\n",
2507 intercepts_counter,
2508 exit_reasons[exit_reason], exit_reason, regs.cs,
2509 (unsigned long long) regs.rip,
2510 (unsigned long long) vmcb->exitinfo1,
2511 (unsigned long long) vmcb->exitinfo2,
2512 (unsigned long long) vmcb->exitintinfo.bytes,
2513 (unsigned long long) gva_to_gpa( vmcb->exitinfo2 ) );
2515 else
2517 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2518 intercepts_counter,
2519 exit_reasons[exit_reason], exit_reason, regs.cs,
2520 (unsigned long long) regs.rip,
2521 (unsigned long long) vmcb->exitinfo1,
2522 (unsigned long long) vmcb->exitinfo2,
2523 (unsigned long long) vmcb->exitintinfo.bytes );
2526 else if ( svm_dbg_on
2527 && exit_reason != VMEXIT_IOIO
2528 && exit_reason != VMEXIT_INTR)
2531 if (exit_reasons[exit_reason])
2533 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2534 intercepts_counter,
2535 exit_reasons[exit_reason], exit_reason, regs.cs,
2536 (unsigned long long) regs.rip,
2537 (unsigned long long) vmcb->exitinfo1,
2538 (unsigned long long) vmcb->exitinfo2,
2539 (unsigned long long) vmcb->exitintinfo.bytes);
2541 else
2543 printk("I%08ld,ExC=%d(0x%x),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2544 intercepts_counter, exit_reason, exit_reason, regs.cs,
2545 (unsigned long long) regs.rip,
2546 (unsigned long long) vmcb->exitinfo1,
2547 (unsigned long long) vmcb->exitinfo2,
2548 (unsigned long long) vmcb->exitintinfo.bytes);
2552 #ifdef SVM_WALK_GUEST_PAGES
2553 if( exit_reason == VMEXIT_EXCEPTION_PF
2554 && ( ( vmcb->exitinfo2 == vmcb->rip )
2555 || vmcb->exitintinfo.bytes) )
2557 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2558 walk_shadow_and_guest_pt( vmcb->exitinfo2 );
2560 #endif
2562 intercepts_counter++;
2564 #if 0
2565 if (svm_dbg_on)
2566 do_debug = svm_do_debugout(exit_reason);
2567 #endif
2569 if (do_debug)
2571 printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
2572 "shadow_table = 0x%08x\n",
2573 __func__,
2574 (int) v->arch.guest_table.pfn,
2575 (int) v->arch.monitor_table.pfn,
2576 (int) v->arch.shadow_table.pfn);
2578 svm_dump_vmcb(__func__, vmcb);
2579 svm_dump_regs(__func__, &regs);
2580 svm_dump_inst(svm_rip2pointer(vmcb));
2583 #if defined(__i386__)
2584 #undef rip
2585 #endif
2588 #endif /* SVM_EXTRA_DEBUG */
2590 if (exit_reason == -1)
2592 printk("%s: exit_reason == -1 - Did someone clobber the VMCB\n",
2593 __func__);
2594 BUG();
2595 domain_crash_synchronous();
2598 perfc_incra(vmexits, exit_reason);
2599 eip = vmcb->rip;
2601 #ifdef SVM_EXTRA_DEBUG
2602 if (do_debug)
2604 printk("eip = %lx, exit_reason = %d (0x%x)\n",
2605 eip, exit_reason, exit_reason);
2607 #endif /* SVM_EXTRA_DEBUG */
2609 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
2611 switch (exit_reason)
2613 case VMEXIT_EXCEPTION_DB:
2615 #ifdef XEN_DEBUGGER
2616 svm_debug_save_cpu_user_regs(&regs);
2617 pdb_handle_exception(1, &regs, 1);
2618 svm_debug_restore_cpu_user_regs(&regs);
2619 #else
2620 svm_store_cpu_user_regs(&regs, v);
2621 domain_pause_for_debugger();
2622 #endif
2624 break;
2626 case VMEXIT_NMI:
2627 do_nmi(&regs, 0);
2628 break;
2630 case VMEXIT_SMI:
2631 /*
2632 * For asynchronous SMI's, we just need to allow global interrupts
2633 * so that the SMI is taken properly in the context of the host. The
2634 * standard code does a STGI after the VMEXIT which should accomplish
2635 * this task. Continue as normal and restart the guest.
2636 */
2637 break;
2639 case VMEXIT_INIT:
2640 /*
2641 * Nothing to do, in fact we should never get to this point.
2642 */
2643 break;
2645 case VMEXIT_EXCEPTION_BP:
2646 #ifdef XEN_DEBUGGER
2647 svm_debug_save_cpu_user_regs(&regs);
2648 pdb_handle_exception(3, &regs, 1);
2649 svm_debug_restore_cpu_user_regs(&regs);
2650 #else
2651 if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) )
2652 domain_pause_for_debugger();
2653 else
2654 svm_inject_exception(vmcb, TRAP_int3, 0, 0);
2655 #endif
2656 break;
2658 case VMEXIT_EXCEPTION_NM:
2659 svm_do_no_device_fault(vmcb);
2660 break;
2662 case VMEXIT_EXCEPTION_GP:
2663 /* This should probably not be trapped in the future */
2664 regs.error_code = vmcb->exitinfo1;
2665 v->arch.hvm_svm.injecting_event = 1;
2666 svm_do_general_protection_fault(v, &regs);
2667 break;
2669 case VMEXIT_EXCEPTION_PF:
2671 unsigned long va;
2672 va = vmcb->exitinfo2;
2673 regs.error_code = vmcb->exitinfo1;
2674 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2675 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2676 (unsigned long)regs.eax, (unsigned long)regs.ebx,
2677 (unsigned long)regs.ecx, (unsigned long)regs.edx,
2678 (unsigned long)regs.esi, (unsigned long)regs.edi);
2680 v->arch.hvm_vcpu.mmio_op.inst_decoder_regs = &regs;
2682 //printk("PF1\n");
2683 if (!(error = svm_do_page_fault(va, &regs)))
2685 v->arch.hvm_svm.injecting_event = 1;
2686 /* Inject #PG using Interruption-Information Fields */
2687 svm_inject_exception(vmcb, TRAP_page_fault, 1, regs.error_code);
2689 v->arch.hvm_svm.cpu_cr2 = va;
2690 vmcb->cr2 = va;
2691 TRACE_3D(TRC_VMX_INT, v->domain->domain_id,
2692 VMEXIT_EXCEPTION_PF, va);
2694 break;
2697 case VMEXIT_EXCEPTION_DF:
2698 printk("Guest double fault");
2699 BUG();
2700 break;
2702 case VMEXIT_INTR:
2703 raise_softirq(SCHEDULE_SOFTIRQ);
2704 break;
2706 case VMEXIT_GDTR_WRITE:
2707 printk("WRITE to GDTR\n");
2708 break;
2710 case VMEXIT_TASK_SWITCH:
2711 __hvm_bug(&regs);
2712 break;
2714 case VMEXIT_CPUID:
2715 svm_vmexit_do_cpuid(vmcb, regs.eax, &regs);
2716 break;
2718 case VMEXIT_HLT:
2719 svm_vmexit_do_hlt(vmcb);
2720 break;
2722 case VMEXIT_INVLPG:
2723 svm_handle_invlpg(0, &regs);
2724 break;
2726 case VMEXIT_INVLPGA:
2727 svm_handle_invlpg(1, &regs);
2728 break;
2730 case VMEXIT_VMMCALL:
2731 svm_do_vmmcall(v, &regs);
2732 break;
2734 case VMEXIT_CR0_READ:
2735 svm_cr_access(v, 0, TYPE_MOV_FROM_CR, &regs);
2736 break;
2738 case VMEXIT_CR2_READ:
2739 svm_cr_access(v, 2, TYPE_MOV_FROM_CR, &regs);
2740 break;
2742 case VMEXIT_CR3_READ:
2743 svm_cr_access(v, 3, TYPE_MOV_FROM_CR, &regs);
2744 break;
2746 case VMEXIT_CR4_READ:
2747 svm_cr_access(v, 4, TYPE_MOV_FROM_CR, &regs);
2748 break;
2750 case VMEXIT_CR8_READ:
2751 svm_cr_access(v, 8, TYPE_MOV_FROM_CR, &regs);
2752 break;
2754 case VMEXIT_CR0_WRITE:
2755 svm_cr_access(v, 0, TYPE_MOV_TO_CR, &regs);
2756 break;
2758 case VMEXIT_CR2_WRITE:
2759 svm_cr_access(v, 2, TYPE_MOV_TO_CR, &regs);
2760 break;
2762 case VMEXIT_CR3_WRITE:
2763 svm_cr_access(v, 3, TYPE_MOV_TO_CR, &regs);
2764 local_flush_tlb();
2765 break;
2767 case VMEXIT_CR4_WRITE:
2768 svm_cr_access(v, 4, TYPE_MOV_TO_CR, &regs);
2769 break;
2771 case VMEXIT_CR8_WRITE:
2772 svm_cr_access(v, 8, TYPE_MOV_TO_CR, &regs);
2773 break;
2775 case VMEXIT_DR0_READ:
2776 svm_dr_access(v, 0, TYPE_MOV_FROM_DR, &regs);
2777 break;
2779 case VMEXIT_DR1_READ:
2780 svm_dr_access(v, 1, TYPE_MOV_FROM_DR, &regs);
2781 break;
2783 case VMEXIT_DR2_READ:
2784 svm_dr_access(v, 2, TYPE_MOV_FROM_DR, &regs);
2785 break;
2787 case VMEXIT_DR3_READ:
2788 svm_dr_access(v, 3, TYPE_MOV_FROM_DR, &regs);
2789 break;
2791 case VMEXIT_DR6_READ:
2792 svm_dr_access(v, 6, TYPE_MOV_FROM_DR, &regs);
2793 break;
2795 case VMEXIT_DR7_READ:
2796 svm_dr_access(v, 7, TYPE_MOV_FROM_DR, &regs);
2797 break;
2799 case VMEXIT_DR0_WRITE:
2800 svm_dr_access(v, 0, TYPE_MOV_TO_DR, &regs);
2801 break;
2803 case VMEXIT_DR1_WRITE:
2804 svm_dr_access(v, 1, TYPE_MOV_TO_DR, &regs);
2805 break;
2807 case VMEXIT_DR2_WRITE:
2808 svm_dr_access(v, 2, TYPE_MOV_TO_DR, &regs);
2809 break;
2811 case VMEXIT_DR3_WRITE:
2812 svm_dr_access(v, 3, TYPE_MOV_TO_DR, &regs);
2813 break;
2815 case VMEXIT_DR6_WRITE:
2816 svm_dr_access(v, 6, TYPE_MOV_TO_DR, &regs);
2817 break;
2819 case VMEXIT_DR7_WRITE:
2820 svm_dr_access(v, 7, TYPE_MOV_TO_DR, &regs);
2821 break;
2823 case VMEXIT_IOIO:
2824 svm_io_instruction(v, &regs);
2825 break;
2827 case VMEXIT_MSR:
2828 svm_do_msr_access(v, &regs);
2829 break;
2831 case VMEXIT_SHUTDOWN:
2832 printk("Guest shutdown exit\n");
2833 domain_crash_synchronous();
2834 break;
2836 default:
2837 printk("unexpected VMEXIT: exit reason = 0x%x, exitinfo1 = %llx, "
2838 "exitinfo2 = %llx\n", exit_reason,
2839 (unsigned long long)vmcb->exitinfo1,
2840 (unsigned long long)vmcb->exitinfo2);
2841 __hvm_bug(&regs); /* should not happen */
2842 break;
2845 #ifdef SVM_EXTRA_DEBUG
2846 if (do_debug)
2848 printk("%s: Done switch on vmexit_code\n", __func__);
2849 svm_dump_regs(__func__, &regs);
2852 if (do_debug)
2854 printk("vmexit_handler():- guest_table = 0x%08x, "
2855 "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
2856 (int)v->arch.guest_table.pfn,
2857 (int)v->arch.monitor_table.pfn,
2858 (int)v->arch.shadow_table.pfn);
2859 printk("svm_vmexit_handler: Returning\n");
2861 #endif
2863 return;
2866 asmlinkage void svm_load_cr2(void)
2868 struct vcpu *v = current;
2870 local_irq_disable();
2871 asm volatile("mov %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
2874 asmlinkage void svm_asid(void)
2876 struct vcpu *v = current;
2877 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2879 /*
2880 * if need to assign new asid, or if switching cores,
2881 * retire asid for the old core, and assign a new asid to the current core.
2882 */
2883 if ( test_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ) ||
2884 ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) {
2885 /* recycle asid */
2886 if ( !asidpool_assign_next( vmcb, 1,
2887 v->arch.hvm_svm.asid_core, v->arch.hvm_svm.launch_core )) {
2888 /* If we get here, we have a major problem */
2889 domain_crash_synchronous();
2892 v->arch.hvm_svm.asid_core = v->arch.hvm_svm.launch_core;
2893 clear_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags );
2897 /*
2898 * Local variables:
2899 * mode: C
2900 * c-set-style: "BSD"
2901 * c-basic-offset: 4
2902 * tab-width: 4
2903 * indent-tabs-mode: nil
2904 * End:
2905 */