direct-io.hg

view xen/arch/x86/hvm/svm/svm.c @ 10494:b12cd185d579

[HVM][SVM] move printk pin msg under an "if svm_dbg_on" conditional to
alleviate excessive printing of messages when vcpu pinned to new core.

Signed-off-by: Tom Woller <thomas.woller@amd.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Jun 27 11:17:14 2006 +0100 (2006-06-27)
parents 9f937ecc4f54
children 571507a595cd
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005, AMD Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <asm/current.h>
29 #include <asm/io.h>
30 #include <asm/shadow.h>
31 #include <asm/regs.h>
32 #include <asm/cpufeature.h>
33 #include <asm/processor.h>
34 #include <asm/types.h>
35 #include <asm/msr.h>
36 #include <asm/spinlock.h>
37 #include <asm/hvm/hvm.h>
38 #include <asm/hvm/support.h>
39 #include <asm/hvm/io.h>
40 #include <asm/hvm/svm/svm.h>
41 #include <asm/hvm/svm/vmcb.h>
42 #include <asm/hvm/svm/emulate.h>
43 #include <asm/hvm/svm/vmmcall.h>
44 #include <asm/hvm/svm/intr.h>
45 #include <asm/shadow.h>
46 #if CONFIG_PAGING_LEVELS >= 3
47 #include <asm/shadow_64.h>
48 #endif
49 #include <public/sched.h>
50 #include <public/hvm/ioreq.h>
52 #define SVM_EXTRA_DEBUG
54 #define set_segment_register(name, value) \
55 __asm__ __volatile__ ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
57 /*
58 * External functions, etc. We should move these to some suitable header file(s) */
60 extern void do_nmi(struct cpu_user_regs *, unsigned long);
61 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
62 int inst_len);
63 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
64 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
65 unsigned long count, int size, long value, int dir, int pvalid);
66 extern int svm_instrlen(struct cpu_user_regs *regs, int mode);
67 extern void svm_dump_inst(unsigned long eip);
68 extern int svm_dbg_on;
69 void svm_manual_event_injection32(struct vcpu *v, struct cpu_user_regs *regs,
70 int vector, int has_code);
71 void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
73 static void svm_relinquish_guest_resources(struct domain *d);
74 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
75 struct cpu_user_regs *regs);
79 extern void set_hsa_to_guest( struct arch_svm_struct *arch_svm );
81 /* Host save area and ASID glogal data */
82 struct svm_percore_globals svm_globals[NR_CPUS];
84 /*
85 * Initializes the POOL of ASID used by the guests per core.
86 */
87 void asidpool_init(int core)
88 {
89 int i;
91 spin_lock_init(&svm_globals[core].ASIDpool.asid_lock);
93 /* Host ASID is always in use */
94 svm_globals[core].ASIDpool.asid[INITIAL_ASID] = ASID_INUSE;
95 for ( i = 1; i < ASID_MAX; i++ )
96 svm_globals[core].ASIDpool.asid[i] = ASID_AVAILABLE;
97 }
100 /* internal function to get the next available ASID */
101 static int asidpool_fetch_next(struct vmcb_struct *vmcb, int core)
102 {
103 int i;
104 for ( i = 1; i < ASID_MAX; i++ )
105 {
106 if ( svm_globals[core].ASIDpool.asid[i] == ASID_AVAILABLE )
107 {
108 vmcb->guest_asid = i;
109 svm_globals[core].ASIDpool.asid[i] = ASID_INUSE;
110 return i;
111 }
112 }
113 return -1;
114 }
117 /*
118 * This functions assigns on the passed VMCB, the next
119 * available ASID number. If none are available, the
120 * TLB flush flag is set, and all retireds ASID
121 * are made available.
122 *
123 * Returns: 1 -- sucess;
124 * 0 -- failure -- no more ASID numbers
125 * available.
126 */
127 int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
128 int oldcore, int newcore )
129 {
130 int i;
131 int res = 1;
132 static unsigned long cnt=0;
134 spin_lock(&svm_globals[oldcore].ASIDpool.asid_lock);
135 if( retire_current && vmcb->guest_asid ) {
136 svm_globals[oldcore].ASIDpool.asid[ vmcb->guest_asid & (ASID_MAX-1) ] = ASID_RETIRED;
137 }
138 spin_unlock(&svm_globals[oldcore].ASIDpool.asid_lock);
139 spin_lock(&svm_globals[newcore].ASIDpool.asid_lock);
140 if( asidpool_fetch_next( vmcb, newcore ) < 0 ) {
141 if (svm_dbg_on)
142 printk( "SVM: tlb(%ld)\n", cnt++ );
143 /* FLUSH the TLB and all retired slots are made available */
144 vmcb->tlb_control = 1;
145 for( i = 1; i < ASID_MAX; i++ ) {
146 if( svm_globals[newcore].ASIDpool.asid[i] == ASID_RETIRED ) {
147 svm_globals[newcore].ASIDpool.asid[i] = ASID_AVAILABLE;
148 }
149 }
150 /* Get the First slot available */
151 res = asidpool_fetch_next( vmcb, newcore ) > 0;
152 }
153 spin_unlock(&svm_globals[newcore].ASIDpool.asid_lock);
154 return res;
155 }
157 void asidpool_retire( struct vmcb_struct *vmcb, int core )
158 {
159 spin_lock(&svm_globals[core].ASIDpool.asid_lock);
160 if( vmcb->guest_asid ) {
161 svm_globals[core].ASIDpool.asid[ vmcb->guest_asid & (ASID_MAX-1) ] = ASID_RETIRED;
162 }
163 spin_unlock(&svm_globals[core].ASIDpool.asid_lock);
164 }
166 static inline void svm_inject_exception(struct vcpu *v, int trap, int ev, int error_code)
167 {
168 eventinj_t event;
169 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
171 event.bytes = 0;
172 event.fields.v = 1;
173 event.fields.type = EVENTTYPE_EXCEPTION;
174 event.fields.vector = trap;
175 event.fields.ev = ev;
176 event.fields.errorcode = error_code;
178 ASSERT(vmcb->eventinj.fields.v == 0);
180 vmcb->eventinj = event;
181 }
183 void stop_svm(void)
184 {
185 u32 eax, edx;
186 int cpu = smp_processor_id();
188 /* We turn off the EFER_SVME bit. */
189 rdmsr(MSR_EFER, eax, edx);
190 eax &= ~EFER_SVME;
191 wrmsr(MSR_EFER, eax, edx);
193 /* release the HSA */
194 free_host_save_area( svm_globals[cpu].hsa );
195 free_host_save_area( svm_globals[cpu].scratch_hsa );
196 svm_globals[cpu].hsa = NULL;
197 svm_globals[cpu].hsa_pa = 0;
198 svm_globals[cpu].scratch_hsa = NULL;
199 svm_globals[cpu].scratch_hsa_pa = 0;
200 wrmsr(MSR_K8_VM_HSAVE_PA, 0, 0 );
202 printk("AMD SVM Extension is disabled.\n");
203 }
205 int svm_initialize_guest_resources(struct vcpu *v)
206 {
207 svm_final_setup_guest(v);
208 return 1;
209 }
211 static void svm_store_cpu_guest_regs(
212 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
213 {
214 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
216 if ( regs != NULL )
217 {
218 #if defined (__x86_64__)
219 regs->rip = vmcb->rip;
220 regs->rsp = vmcb->rsp;
221 regs->rflags = vmcb->rflags;
222 regs->cs = vmcb->cs.sel;
223 regs->ds = vmcb->ds.sel;
224 regs->es = vmcb->es.sel;
225 regs->ss = vmcb->ss.sel;
226 regs->gs = vmcb->gs.sel;
227 regs->fs = vmcb->fs.sel;
228 #elif defined (__i386__)
229 regs->eip = vmcb->rip;
230 regs->esp = vmcb->rsp;
231 regs->eflags = vmcb->rflags;
232 regs->cs = vmcb->cs.sel;
233 regs->ds = vmcb->ds.sel;
234 regs->es = vmcb->es.sel;
235 regs->ss = vmcb->ss.sel;
236 regs->gs = vmcb->gs.sel;
237 regs->fs = vmcb->fs.sel;
238 #endif
239 }
241 if ( crs != NULL )
242 {
243 crs[0] = vmcb->cr0;
244 crs[3] = vmcb->cr3;
245 crs[4] = vmcb->cr4;
246 }
247 }
249 static void svm_load_cpu_guest_regs(
250 struct vcpu *v, struct cpu_user_regs *regs)
251 {
252 svm_load_cpu_user_regs(v, regs);
253 }
255 #define IS_CANO_ADDRESS(add) 1
257 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
258 {
259 u64 msr_content = 0;
260 struct vcpu *vc = current;
261 // struct svm_msr_state *msr = &vc->arch.hvm_svm.msr_content;
262 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
264 switch (regs->ecx)
265 {
266 case MSR_EFER:
267 // msr_content = msr->msr_items[SVM_INDEX_MSR_EFER];
268 msr_content = vmcb->efer;
269 msr_content &= ~EFER_SVME;
270 break;
272 case MSR_FS_BASE:
273 msr_content = vmcb->fs.base;
274 break;
276 case MSR_GS_BASE:
277 msr_content = vmcb->gs.base;
278 break;
280 case MSR_SHADOW_GS_BASE:
281 msr_content = vmcb->kerngsbase;
282 break;
284 case MSR_STAR:
285 msr_content = vmcb->star;
286 break;
288 case MSR_LSTAR:
289 msr_content = vmcb->lstar;
290 break;
292 case MSR_CSTAR:
293 msr_content = vmcb->cstar;
294 break;
296 case MSR_SYSCALL_MASK:
297 msr_content = vmcb->sfmask;
298 break;
300 default:
301 return 0;
302 }
304 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n",
305 msr_content);
307 regs->eax = msr_content & 0xffffffff;
308 regs->edx = msr_content >> 32;
309 return 1;
310 }
312 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
313 {
314 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
315 struct vcpu *vc = current;
316 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
318 HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx "
319 "msr_content %"PRIx64"\n",
320 (unsigned long)regs->ecx, msr_content);
322 switch (regs->ecx)
323 {
324 case MSR_EFER:
325 #ifdef __x86_64__
326 /* offending reserved bit will cause #GP */
327 if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
328 {
329 printk("trying to set reserved bit in EFER\n");
330 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
331 return 0;
332 }
334 /* LME: 0 -> 1 */
335 if ( msr_content & EFER_LME &&
336 !test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state) )
337 {
338 if ( svm_paging_enabled(vc) ||
339 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
340 &vc->arch.hvm_svm.cpu_state) )
341 {
342 printk("trying to set LME bit when "
343 "in paging mode or PAE bit is not set\n");
344 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
345 return 0;
346 }
347 set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);
348 }
350 /* We have already recorded that we want LME, so it will be set
351 * next time CR0 gets updated. So we clear that bit and continue.
352 */
353 if ((msr_content ^ vmcb->efer) & EFER_LME)
354 msr_content &= ~EFER_LME;
355 /* No update for LME/LMA since it have no effect */
356 #endif
357 vmcb->efer = msr_content | EFER_SVME;
358 break;
360 case MSR_FS_BASE:
361 case MSR_GS_BASE:
362 if (!(SVM_LONG_GUEST(vc)))
363 domain_crash_synchronous();
365 if (!IS_CANO_ADDRESS(msr_content))
366 {
367 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
368 svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
369 }
371 if (regs->ecx == MSR_FS_BASE)
372 vmcb->fs.base = msr_content;
373 else
374 vmcb->gs.base = msr_content;
375 break;
377 case MSR_SHADOW_GS_BASE:
378 vmcb->kerngsbase = msr_content;
379 break;
381 case MSR_STAR:
382 vmcb->star = msr_content;
383 break;
385 case MSR_LSTAR:
386 vmcb->lstar = msr_content;
387 break;
389 case MSR_CSTAR:
390 vmcb->cstar = msr_content;
391 break;
393 case MSR_SYSCALL_MASK:
394 vmcb->sfmask = msr_content;
395 break;
397 default:
398 return 0;
399 }
400 return 1;
401 }
403 int svm_realmode(struct vcpu *v)
404 {
405 unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
406 unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
408 return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
409 }
411 int svm_instruction_length(struct vcpu *v)
412 {
413 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
414 unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode;
415 /* check which operating mode the guest is running */
416 if( vmcb->efer & EFER_LMA )
417 mode = vmcb->cs.attributes.fields.l ? 8 : 4;
418 else
419 mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
420 return svm_instrlen(guest_cpu_user_regs(), mode);
421 }
423 unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
424 {
425 switch ( num )
426 {
427 case 0:
428 return v->arch.hvm_svm.cpu_shadow_cr0;
429 case 2:
430 return v->arch.hvm_svm.cpu_cr2;
431 case 3:
432 return v->arch.hvm_svm.cpu_cr3;
433 default:
434 BUG();
435 }
436 return 0; /* dummy */
437 }
440 /* SVM-specific intitialization code for VCPU application processors */
441 void svm_init_ap_context(struct vcpu_guest_context *ctxt,
442 int vcpuid, int trampoline_vector)
443 {
444 int i;
445 struct vcpu *v, *bsp = current;
446 struct domain *d = bsp->domain;
447 cpu_user_regs_t *regs;;
450 if ((v = d->vcpu[vcpuid]) == NULL)
451 {
452 printk("vcpuid %d is invalid! good-bye.\n", vcpuid);
453 domain_crash_synchronous();
454 }
455 regs = &v->arch.guest_context.user_regs;
457 memset(ctxt, 0, sizeof(*ctxt));
458 for (i = 0; i < 256; ++i)
459 {
460 ctxt->trap_ctxt[i].vector = i;
461 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
462 }
465 /*
466 * We execute the trampoline code in real mode. The trampoline vector
467 * passed to us is page alligned and is the physicall frame number for
468 * the code. We will execute this code in real mode.
469 */
470 ctxt->user_regs.eip = 0x0;
471 ctxt->user_regs.cs = (trampoline_vector << 8);
472 ctxt->flags = VGCF_HVM_GUEST;
473 }
475 int start_svm(void)
476 {
477 u32 eax, ecx, edx;
478 u32 phys_hsa_lo, phys_hsa_hi;
479 u64 phys_hsa;
480 int cpu = smp_processor_id();
482 /* Xen does not fill x86_capability words except 0. */
483 ecx = cpuid_ecx(0x80000001);
484 boot_cpu_data.x86_capability[5] = ecx;
486 if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
487 return 0;
488 svm_globals[cpu].hsa = alloc_host_save_area();
489 if (! svm_globals[cpu].hsa)
490 return 0;
492 rdmsr(MSR_EFER, eax, edx);
493 eax |= EFER_SVME;
494 wrmsr(MSR_EFER, eax, edx);
495 asidpool_init( cpu );
496 printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
498 /* Initialize the HSA for this core */
499 phys_hsa = (u64) virt_to_maddr( svm_globals[cpu].hsa );
500 phys_hsa_lo = (u32) phys_hsa;
501 phys_hsa_hi = (u32) (phys_hsa >> 32);
502 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
503 svm_globals[cpu].hsa_pa = phys_hsa;
505 svm_globals[cpu].scratch_hsa = alloc_host_save_area();
506 svm_globals[cpu].scratch_hsa_pa = (u64)virt_to_maddr( svm_globals[cpu].scratch_hsa );
508 /* Setup HVM interfaces */
509 hvm_funcs.disable = stop_svm;
511 hvm_funcs.initialize_guest_resources = svm_initialize_guest_resources;
512 hvm_funcs.relinquish_guest_resources = svm_relinquish_guest_resources;
514 hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
515 hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
517 hvm_funcs.realmode = svm_realmode;
518 hvm_funcs.paging_enabled = svm_paging_enabled;
519 hvm_funcs.instruction_length = svm_instruction_length;
520 hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
521 hvm_funcs.init_ap_context = svm_init_ap_context;
523 hvm_enabled = 1;
525 return 1;
526 }
528 int svm_dbg_on = 0;
530 static inline int svm_do_debugout(unsigned long exit_code)
531 {
532 int i;
534 static unsigned long counter = 0;
535 static unsigned long works[] =
536 {
537 VMEXIT_IOIO,
538 VMEXIT_HLT,
539 VMEXIT_CPUID,
540 VMEXIT_DR0_READ,
541 VMEXIT_DR1_READ,
542 VMEXIT_DR2_READ,
543 VMEXIT_DR3_READ,
544 VMEXIT_DR6_READ,
545 VMEXIT_DR7_READ,
546 VMEXIT_DR0_WRITE,
547 VMEXIT_DR1_WRITE,
548 VMEXIT_DR2_WRITE,
549 VMEXIT_DR3_WRITE,
550 VMEXIT_CR0_READ,
551 VMEXIT_CR0_WRITE,
552 VMEXIT_CR3_READ,
553 VMEXIT_CR4_READ,
554 VMEXIT_MSR,
555 VMEXIT_CR0_WRITE,
556 VMEXIT_CR3_WRITE,
557 VMEXIT_CR4_WRITE,
558 VMEXIT_EXCEPTION_PF,
559 VMEXIT_INTR,
560 VMEXIT_INVLPG,
561 VMEXIT_EXCEPTION_NM
562 };
565 #if 0
566 if (svm_dbg_on && exit_code != 0x7B)
567 return 1;
568 #endif
570 counter++;
572 #if 0
573 if ((exit_code == 0x4E
574 || exit_code == VMEXIT_CR0_READ
575 || exit_code == VMEXIT_CR0_WRITE)
576 && counter < 200000)
577 return 0;
579 if ((exit_code == 0x4E) && counter < 500000)
580 return 0;
581 #endif
583 for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
584 if (exit_code == works[i])
585 return 0;
587 return 1;
588 }
590 void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
591 {
592 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
594 ASSERT(vmcb);
596 ctxt->eax = vmcb->rax;
597 ctxt->ss = vmcb->ss.sel;
598 ctxt->esp = vmcb->rsp;
599 ctxt->eflags = vmcb->rflags;
600 ctxt->cs = vmcb->cs.sel;
601 ctxt->eip = vmcb->rip;
603 ctxt->gs = vmcb->gs.sel;
604 ctxt->fs = vmcb->fs.sel;
605 ctxt->es = vmcb->es.sel;
606 ctxt->ds = vmcb->ds.sel;
607 }
609 void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
610 {
611 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
613 regs->eip = vmcb->rip;
614 regs->esp = vmcb->rsp;
615 regs->eflags = vmcb->rflags;
616 regs->cs = vmcb->cs.sel;
617 regs->ds = vmcb->ds.sel;
618 regs->es = vmcb->es.sel;
619 regs->ss = vmcb->ss.sel;
620 }
622 /* XXX Use svm_load_cpu_guest_regs instead */
623 void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
624 {
625 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
626 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
628 /* Write the guest register value into VMCB */
629 vmcb->rax = regs->eax;
630 vmcb->ss.sel = regs->ss;
631 vmcb->rsp = regs->esp;
632 vmcb->rflags = regs->eflags;
633 vmcb->cs.sel = regs->cs;
634 vmcb->rip = regs->eip;
635 if (regs->eflags & EF_TF)
636 *intercepts |= EXCEPTION_BITMAP_DB;
637 else
638 *intercepts &= ~EXCEPTION_BITMAP_DB;
639 }
641 int svm_paging_enabled(struct vcpu *v)
642 {
643 unsigned long cr0;
645 cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
647 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
648 }
651 /* Make sure that xen intercepts any FP accesses from current */
652 void svm_stts(struct vcpu *v)
653 {
654 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
656 /* FPU state already dirty? Then no need to setup_fpu() lazily. */
657 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
658 return;
660 /*
661 * If the guest does not have TS enabled then we must cause and handle an
662 * exception on first use of the FPU. If the guest *does* have TS enabled
663 * then this is not necessary: no FPU activity can occur until the guest
664 * clears CR0.TS, and we will initialise the FPU when that happens.
665 */
666 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
667 {
668 v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
669 vmcb->cr0 |= X86_CR0_TS;
670 }
671 }
673 static void arch_svm_do_launch(struct vcpu *v)
674 {
675 cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
676 int error;
678 #if 0
679 if (svm_dbg_on)
680 printk("Do launch\n");
681 #endif
682 error = construct_vmcb(&v->arch.hvm_svm, regs);
683 if ( error < 0 )
684 {
685 if (v->vcpu_id == 0) {
686 printk("Failed to construct a new VMCB for BSP.\n");
687 } else {
688 printk("Failed to construct a new VMCB for AP %d\n", v->vcpu_id);
689 }
690 domain_crash_synchronous();
691 }
693 svm_do_launch(v);
694 #if 0
695 if (svm_dbg_on)
696 svm_dump_host_regs(__func__);
697 #endif
698 if (v->vcpu_id != 0)
699 {
700 u16 cs_sel = regs->cs;
701 /*
702 * This is the launch of an AP; set state so that we begin executing
703 * the trampoline code in real-mode.
704 */
705 svm_do_vmmcall_reset_to_realmode(v, regs);
706 /* Adjust the state to execute the trampoline code.*/
707 v->arch.hvm_svm.vmcb->rip = 0;
708 v->arch.hvm_svm.vmcb->cs.sel= cs_sel;
709 v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4);
710 }
712 reset_stack_and_jump(svm_asm_do_launch);
713 }
715 static void svm_freeze_time(struct vcpu *v)
716 {
717 struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
719 if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
720 v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
721 stop_timer(&(pt->timer));
722 }
723 }
725 static void svm_ctxt_switch_from(struct vcpu *v)
726 {
727 svm_freeze_time(v);
728 }
730 static void svm_ctxt_switch_to(struct vcpu *v)
731 {
732 #if __x86_64__
733 /*
734 * This is required, because VMRUN does consistency check
735 * and some of the DOM0 selectors are pointing to
736 * invalid GDT locations, and cause AMD processors
737 * to shutdown.
738 */
739 set_segment_register(ds, 0);
740 set_segment_register(es, 0);
741 set_segment_register(ss, 0);
742 #endif
743 }
745 void svm_final_setup_guest(struct vcpu *v)
746 {
747 struct domain *d = v->domain;
748 struct vcpu *vc;
750 v->arch.schedule_tail = arch_svm_do_launch;
751 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
752 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
754 if ( v != d->vcpu[0] )
755 return;
757 /* Initialize monitor page table */
758 for_each_vcpu( d, vc )
759 vc->arch.monitor_table = pagetable_null();
761 /*
762 * Required to do this once per domain
763 * TODO: add a seperate function to do these.
764 */
765 memset(&d->shared_info->evtchn_mask[0], 0xff,
766 sizeof(d->shared_info->evtchn_mask));
768 /*
769 * Put the domain in shadow mode even though we're going to be using
770 * the shared 1:1 page table initially. It shouldn't hurt
771 */
772 shadow_mode_enable(d,
773 SHM_enable|SHM_refcounts|
774 SHM_translate|SHM_external|SHM_wr_pt_pte);
775 }
778 static void svm_relinquish_guest_resources(struct domain *d)
779 {
780 extern void destroy_vmcb(struct arch_svm_struct *); /* XXX */
781 struct vcpu *v;
783 for_each_vcpu ( d, v )
784 {
785 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
786 continue;
788 destroy_vmcb(&v->arch.hvm_svm);
789 free_monitor_pagetable(v);
790 kill_timer(&v->arch.hvm_svm.hlt_timer);
791 if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) )
792 {
793 kill_timer( &(VLAPIC(v)->vlapic_timer) );
794 xfree(VLAPIC(v));
795 }
796 }
798 kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
800 if ( d->arch.hvm_domain.shared_page_va )
801 unmap_domain_page_global(
802 (void *)d->arch.hvm_domain.shared_page_va);
804 shadow_direct_map_clean(d);
805 }
808 void arch_svm_do_resume(struct vcpu *v)
809 {
810 /* pinning VCPU to a different core? */
811 if ( v->arch.hvm_svm.launch_core == smp_processor_id()) {
812 svm_do_resume( v );
813 reset_stack_and_jump( svm_asm_do_resume );
814 }
815 else {
816 if (svm_dbg_on)
817 printk("VCPU core pinned: %d to %d\n",
818 v->arch.hvm_svm.launch_core, smp_processor_id() );
819 v->arch.hvm_svm.launch_core = smp_processor_id();
820 svm_migrate_timers( v );
821 svm_do_resume( v );
822 reset_stack_and_jump( svm_asm_do_resume );
823 }
824 }
827 void svm_migrate_timers(struct vcpu *v)
828 {
829 struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
831 if ( pt->enabled ) {
832 migrate_timer( &pt->timer, v->processor );
833 migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor );
834 }
835 if ( hvm_apic_support(v->domain) && VLAPIC( v ))
836 migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
837 }
840 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
841 {
842 struct vcpu *v = current;
843 unsigned long eip;
844 unsigned long gpa; /* FIXME: PAE */
845 int result;
846 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
848 ASSERT(vmcb);
850 //#if HVM_DEBUG
851 eip = vmcb->rip;
852 HVM_DBG_LOG(DBG_LEVEL_VMMU,
853 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
854 va, eip, (unsigned long)regs->error_code);
855 //#endif
857 if ( !svm_paging_enabled(v) )
858 {
859 if ( shadow_direct_map_fault(va, regs) )
860 return 1;
862 handle_mmio(va, va);
863 return 1;
864 }
867 gpa = gva_to_gpa(va);
869 /* Use 1:1 page table to identify MMIO address space */
870 if (mmio_space(gpa))
871 {
872 /* No support for APIC */
873 if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
874 {
875 int inst_len;
876 inst_len = svm_instruction_length(v);
877 if (inst_len == -1)
878 {
879 printf("%s: INST_LEN - Unable to decode properly.\n", __func__);
880 domain_crash_synchronous();
881 }
883 __update_guest_eip(vmcb, inst_len);
885 return 1;
886 }
888 handle_mmio(va, gpa);
890 return 1;
891 }
893 result = shadow_fault(va, regs);
895 if( result ) {
896 /* Let's make sure that the Guest TLB is flushed */
897 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
898 }
900 return result;
901 }
904 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
905 {
906 struct vcpu *v = current;
908 setup_fpu(v);
909 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
911 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
912 vmcb->cr0 &= ~X86_CR0_TS;
913 }
916 static void svm_do_general_protection_fault(struct vcpu *v,
917 struct cpu_user_regs *regs)
918 {
919 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
920 unsigned long eip, error_code;
922 ASSERT(vmcb);
924 eip = vmcb->rip;
925 error_code = vmcb->exitinfo1;
927 if (vmcb->idtr.limit == 0) {
928 printf("Huh? We got a GP Fault with an invalid IDTR!\n");
929 svm_dump_vmcb(__func__, vmcb);
930 svm_dump_regs(__func__, regs);
931 svm_dump_inst(vmcb->rip);
932 __hvm_bug(regs);
933 }
935 HVM_DBG_LOG(DBG_LEVEL_1,
936 "svm_general_protection_fault: eip = %lx, erro_code = %lx",
937 eip, error_code);
939 HVM_DBG_LOG(DBG_LEVEL_1,
940 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
941 (unsigned long)regs->eax, (unsigned long)regs->ebx,
942 (unsigned long)regs->ecx, (unsigned long)regs->edx,
943 (unsigned long)regs->esi, (unsigned long)regs->edi);
945 /* Reflect it back into the guest */
946 svm_inject_exception(v, TRAP_gp_fault, 1, error_code);
947 }
949 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
950 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
951 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
952 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
954 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input,
955 struct cpu_user_regs *regs)
956 {
957 unsigned int eax, ebx, ecx, edx;
958 unsigned long eip;
959 struct vcpu *v = current;
960 int inst_len;
962 ASSERT(vmcb);
964 eip = vmcb->rip;
966 HVM_DBG_LOG(DBG_LEVEL_1,
967 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
968 " (esi) %lx, (edi) %lx",
969 (unsigned long)regs->eax, (unsigned long)regs->ebx,
970 (unsigned long)regs->ecx, (unsigned long)regs->edx,
971 (unsigned long)regs->esi, (unsigned long)regs->edi);
973 cpuid(input, &eax, &ebx, &ecx, &edx);
975 if (input == 0x00000001)
976 {
977 if ( !hvm_apic_support(v->domain) ||
978 !vlapic_global_enabled((VLAPIC(v))) )
979 {
980 /* Since the apic is disabled, avoid any confusion
981 about SMP cpus being available */
982 clear_bit(X86_FEATURE_APIC, &edx);
983 }
985 #if CONFIG_PAGING_LEVELS < 3
986 clear_bit(X86_FEATURE_PAE, &edx);
987 clear_bit(X86_FEATURE_PSE, &edx);
988 clear_bit(X86_FEATURE_PSE36, &edx);
989 #else
990 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
991 {
992 if ( !v->domain->arch.hvm_domain.pae_enabled )
993 {
994 clear_bit(X86_FEATURE_PAE, &edx);
995 }
996 clear_bit(X86_FEATURE_PSE, &edx);
997 clear_bit(X86_FEATURE_PSE36, &edx);
998 }
999 #endif
1000 /* Clear out reserved bits. */
1001 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
1002 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
1004 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
1006 /* Guest should only see one logical processor.
1007 * See details on page 23 of AMD CPUID Specification.
1008 */
1009 clear_bit(X86_FEATURE_HT, &edx); /* clear the hyperthread bit */
1010 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
1011 ebx |= 0x00010000; /* set to 1 just for precaution */
1013 else if ( ( input > 0x00000005 ) && ( input < 0x80000000 ) )
1015 eax = ebx = ecx = edx = 0x0;
1017 else if ( input == 0x80000001 )
1019 /* We duplicate some CPUID_00000001 code because many bits of
1020 CPUID_80000001_EDX overlaps with CPUID_00000001_EDX. */
1022 if ( !hvm_apic_support(v->domain) ||
1023 !vlapic_global_enabled((VLAPIC(v))) )
1025 /* Since the apic is disabled, avoid any confusion
1026 about SMP cpus being available */
1027 clear_bit(X86_FEATURE_APIC, &edx);
1030 /* Clear the Cmp_Legacy bit
1031 * This bit is supposed to be zero when HTT = 0.
1032 * See details on page 23 of AMD CPUID Specification.
1033 */
1034 clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx);
1036 #ifdef __i386__
1037 /* Mask feature for Intel ia32e or AMD long mode. */
1038 clear_bit(X86_FEATURE_LAHF_LM & 31, &ecx);
1040 clear_bit(X86_FEATURE_LM & 31, &edx);
1041 clear_bit(X86_FEATURE_SYSCALL & 31, &edx);
1042 #endif
1044 #if CONFIG_PAGING_LEVELS < 3
1045 clear_bit(X86_FEATURE_NX & 31, &edx);
1046 clear_bit(X86_FEATURE_PAE, &edx);
1047 clear_bit(X86_FEATURE_PSE, &edx);
1048 clear_bit(X86_FEATURE_PSE36, &edx);
1049 #else
1050 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
1052 if ( !v->domain->arch.hvm_domain.pae_enabled )
1054 clear_bit(X86_FEATURE_NX & 31, &edx);
1055 clear_bit(X86_FEATURE_PAE, &edx);
1057 clear_bit(X86_FEATURE_PSE, &edx);
1058 clear_bit(X86_FEATURE_PSE36, &edx);
1060 #endif
1062 /* Make SVM feature invisible to the guest. */
1063 clear_bit(X86_FEATURE_SVME & 31, &ecx);
1065 /* So far, we do not support 3DNow for the guest. */
1066 clear_bit(X86_FEATURE_3DNOW & 31, &edx);
1067 clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx);
1069 else if ( ( input == 0x80000007 ) || ( input == 0x8000000A ) )
1071 /* Mask out features of power management and SVM extension. */
1072 eax = ebx = ecx = edx = 0;
1074 else if ( input == 0x80000008 )
1076 ecx &= 0xFFFFFF00; /* Make sure Number of CPU core is 1 when HTT=0 */
1079 regs->eax = (unsigned long)eax;
1080 regs->ebx = (unsigned long)ebx;
1081 regs->ecx = (unsigned long)ecx;
1082 regs->edx = (unsigned long)edx;
1084 HVM_DBG_LOG(DBG_LEVEL_1,
1085 "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
1086 "ebx=%x, ecx=%x, edx=%x",
1087 eip, input, eax, ebx, ecx, edx);
1089 inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
1090 ASSERT(inst_len > 0);
1091 __update_guest_eip(vmcb, inst_len);
1095 static inline unsigned long *get_reg_p(unsigned int gpreg,
1096 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1098 unsigned long *reg_p = NULL;
1099 switch (gpreg)
1101 case SVM_REG_EAX:
1102 reg_p = (unsigned long *)&regs->eax;
1103 break;
1104 case SVM_REG_EBX:
1105 reg_p = (unsigned long *)&regs->ebx;
1106 break;
1107 case SVM_REG_ECX:
1108 reg_p = (unsigned long *)&regs->ecx;
1109 break;
1110 case SVM_REG_EDX:
1111 reg_p = (unsigned long *)&regs->edx;
1112 break;
1113 case SVM_REG_EDI:
1114 reg_p = (unsigned long *)&regs->edi;
1115 break;
1116 case SVM_REG_ESI:
1117 reg_p = (unsigned long *)&regs->esi;
1118 break;
1119 case SVM_REG_EBP:
1120 reg_p = (unsigned long *)&regs->ebp;
1121 break;
1122 case SVM_REG_ESP:
1123 reg_p = (unsigned long *)&vmcb->rsp;
1124 break;
1125 #if __x86_64__
1126 case SVM_REG_R8:
1127 reg_p = (unsigned long *)&regs->r8;
1128 break;
1129 case SVM_REG_R9:
1130 reg_p = (unsigned long *)&regs->r9;
1131 break;
1132 case SVM_REG_R10:
1133 reg_p = (unsigned long *)&regs->r10;
1134 break;
1135 case SVM_REG_R11:
1136 reg_p = (unsigned long *)&regs->r11;
1137 break;
1138 case SVM_REG_R12:
1139 reg_p = (unsigned long *)&regs->r12;
1140 break;
1141 case SVM_REG_R13:
1142 reg_p = (unsigned long *)&regs->r13;
1143 break;
1144 case SVM_REG_R14:
1145 reg_p = (unsigned long *)&regs->r14;
1146 break;
1147 case SVM_REG_R15:
1148 reg_p = (unsigned long *)&regs->r15;
1149 break;
1150 #endif
1151 default:
1152 BUG();
1155 return reg_p;
1159 static inline unsigned long get_reg(unsigned int gpreg,
1160 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1162 unsigned long *gp;
1163 gp = get_reg_p(gpreg, regs, vmcb);
1164 return *gp;
1168 static inline void set_reg(unsigned int gpreg, unsigned long value,
1169 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1171 unsigned long *gp;
1172 gp = get_reg_p(gpreg, regs, vmcb);
1173 *gp = value;
1177 static void svm_dr_access (struct vcpu *v, unsigned int reg, unsigned int type,
1178 struct cpu_user_regs *regs)
1180 unsigned long *reg_p = 0;
1181 unsigned int gpreg = 0;
1182 unsigned long eip;
1183 int inst_len;
1184 int index;
1185 struct vmcb_struct *vmcb;
1186 u8 buffer[MAX_INST_LEN];
1187 u8 prefix = 0;
1189 vmcb = v->arch.hvm_svm.vmcb;
1191 ASSERT(vmcb);
1193 eip = vmcb->rip;
1194 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1195 index = skip_prefix_bytes(buffer, sizeof(buffer));
1197 ASSERT(buffer[index+0] == 0x0f && (buffer[index+1] & 0xFD) == 0x21);
1199 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1200 prefix = buffer[index-1];
1202 gpreg = decode_src_reg(prefix, buffer[index + 2]);
1203 ASSERT(reg == decode_dest_reg(prefix, buffer[index + 2]));
1205 HVM_DBG_LOG(DBG_LEVEL_1, "svm_dr_access : eip=%lx, reg=%d, gpreg = %x",
1206 eip, reg, gpreg);
1208 reg_p = get_reg_p(gpreg, regs, vmcb);
1210 switch (type)
1212 case TYPE_MOV_TO_DR:
1213 inst_len = __get_instruction_length(vmcb, INSTR_MOV2DR, buffer);
1214 v->arch.guest_context.debugreg[reg] = *reg_p;
1215 break;
1216 case TYPE_MOV_FROM_DR:
1217 inst_len = __get_instruction_length(vmcb, INSTR_MOVDR2, buffer);
1218 *reg_p = v->arch.guest_context.debugreg[reg];
1219 break;
1220 default:
1221 __hvm_bug(regs);
1222 break;
1224 ASSERT(inst_len > 0);
1225 __update_guest_eip(vmcb, inst_len);
1229 static void svm_get_prefix_info(struct vmcb_struct *vmcb,
1230 unsigned int dir, segment_selector_t **seg, unsigned int *asize)
1232 unsigned char inst[MAX_INST_LEN];
1233 int i;
1235 memset(inst, 0, MAX_INST_LEN);
1236 if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
1237 != MAX_INST_LEN)
1239 printk("%s: get guest instruction failed\n", __func__);
1240 domain_crash_synchronous();
1243 for (i = 0; i < MAX_INST_LEN; i++)
1245 switch (inst[i])
1247 case 0xf3: /* REPZ */
1248 case 0xf2: /* REPNZ */
1249 case 0xf0: /* LOCK */
1250 case 0x66: /* data32 */
1251 #if __x86_64__
1252 /* REX prefixes */
1253 case 0x40:
1254 case 0x41:
1255 case 0x42:
1256 case 0x43:
1257 case 0x44:
1258 case 0x45:
1259 case 0x46:
1260 case 0x47:
1262 case 0x48:
1263 case 0x49:
1264 case 0x4a:
1265 case 0x4b:
1266 case 0x4c:
1267 case 0x4d:
1268 case 0x4e:
1269 case 0x4f:
1270 #endif
1271 continue;
1272 case 0x67: /* addr32 */
1273 *asize ^= 48; /* Switch 16/32 bits */
1274 continue;
1275 case 0x2e: /* CS */
1276 *seg = &vmcb->cs;
1277 continue;
1278 case 0x36: /* SS */
1279 *seg = &vmcb->ss;
1280 continue;
1281 case 0x26: /* ES */
1282 *seg = &vmcb->es;
1283 continue;
1284 case 0x64: /* FS */
1285 *seg = &vmcb->fs;
1286 continue;
1287 case 0x65: /* GS */
1288 *seg = &vmcb->gs;
1289 continue;
1290 case 0x3e: /* DS */
1291 *seg = &vmcb->ds;
1292 continue;
1293 default:
1294 break;
1296 return;
1301 /* Get the address of INS/OUTS instruction */
1302 static inline int svm_get_io_address(struct vcpu *v,
1303 struct cpu_user_regs *regs, unsigned int dir,
1304 unsigned long *count, unsigned long *addr)
1306 unsigned long reg;
1307 unsigned int asize = 0;
1308 unsigned int isize;
1309 int long_mode;
1310 ioio_info_t info;
1311 segment_selector_t *seg = NULL;
1312 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1314 info.bytes = vmcb->exitinfo1;
1316 /* If we're in long mode, we shouldn't check the segment presence and limit */
1317 long_mode = vmcb->cs.attributes.fields.l && vmcb->efer & EFER_LMA;
1319 /* d field of cs.attributes is 1 for 32-bit, 0 for 16 or 64 bit.
1320 * l field combined with EFER_LMA -> longmode says whether it's 16 or 64 bit.
1321 */
1322 asize = (long_mode)?64:((vmcb->cs.attributes.fields.db)?32:16);
1325 /* The ins/outs instructions are single byte, so if we have got more
1326 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1327 * to figure out what it is...
1328 */
1329 isize = vmcb->exitinfo2 - vmcb->rip;
1331 if (info.fields.rep)
1332 isize --;
1334 if (isize > 1)
1336 svm_get_prefix_info(vmcb, dir, &seg, &asize);
1339 ASSERT(dir == IOREQ_READ || dir == IOREQ_WRITE);
1341 if (dir == IOREQ_WRITE)
1343 reg = regs->esi;
1344 if (!seg) /* If no prefix, used DS. */
1345 seg = &vmcb->ds;
1347 else
1349 reg = regs->edi;
1350 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1353 /* If the segment isn't present, give GP fault! */
1354 if (!long_mode && !seg->attributes.fields.p)
1356 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1357 return 0;
1360 if (asize == 16)
1362 *addr = (reg & 0xFFFF);
1363 *count = regs->ecx & 0xffff;
1365 else
1367 *addr = reg;
1368 *count = regs->ecx;
1371 if (!long_mode) {
1372 if (*addr > seg->limit)
1374 svm_inject_exception(v, TRAP_gp_fault, 1, seg->sel);
1375 return 0;
1377 else
1379 *addr += seg->base;
1384 return 1;
1388 static void svm_io_instruction(struct vcpu *v, struct cpu_user_regs *regs)
1390 struct mmio_op *mmio_opp;
1391 unsigned int port;
1392 unsigned int size, dir;
1393 ioio_info_t info;
1394 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1396 ASSERT(vmcb);
1397 mmio_opp = &current->arch.hvm_vcpu.mmio_op;
1398 mmio_opp->instr = INSTR_PIO;
1399 mmio_opp->flags = 0;
1401 info.bytes = vmcb->exitinfo1;
1403 port = info.fields.port; /* port used to be addr */
1404 dir = info.fields.type; /* direction */
1405 if (info.fields.sz32)
1406 size = 4;
1407 else if (info.fields.sz16)
1408 size = 2;
1409 else
1410 size = 1;
1412 HVM_DBG_LOG(DBG_LEVEL_IO,
1413 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1414 "exit_qualification = %"PRIx64,
1415 port, vmcb->cs.sel, vmcb->rip, info.bytes);
1417 /* string instruction */
1418 if (info.fields.str)
1420 unsigned long addr, count;
1421 int sign = regs->eflags & EF_DF ? -1 : 1;
1423 if (!svm_get_io_address(v, regs, dir, &count, &addr))
1425 /* We failed to get a valid address, so don't do the IO operation -
1426 * it would just get worse if we do! Hopefully the guest is handing
1427 * gp-faults...
1428 */
1429 return;
1432 /* "rep" prefix */
1433 if (info.fields.rep)
1435 mmio_opp->flags |= REPZ;
1437 else
1439 count = 1;
1442 /*
1443 * Handle string pio instructions that cross pages or that
1444 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1445 */
1446 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1448 unsigned long value = 0;
1450 mmio_opp->flags |= OVERLAP;
1452 if (dir == IOREQ_WRITE)
1453 hvm_copy(&value, addr, size, HVM_COPY_IN);
1455 send_pio_req(regs, port, 1, size, value, dir, 0);
1457 else
1459 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK))
1461 if (sign > 0)
1462 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1463 else
1464 count = (addr & ~PAGE_MASK) / size;
1466 else
1467 vmcb->rip = vmcb->exitinfo2;
1469 send_pio_req(regs, port, count, size, addr, dir, 1);
1472 else
1474 /*
1475 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1476 * ExitInfo2
1477 */
1478 vmcb->rip = vmcb->exitinfo2;
1480 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1481 hvm_print_line(v, regs->eax); /* guest debug output */
1483 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
1487 static int svm_set_cr0(unsigned long value)
1489 struct vcpu *v = current;
1490 unsigned long mfn;
1491 int paging_enabled;
1492 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1494 ASSERT(vmcb);
1496 /* We don't want to lose PG. ET is reserved and should be always be 1*/
1497 paging_enabled = svm_paging_enabled(v);
1498 value |= X86_CR0_ET;
1499 vmcb->cr0 = value | X86_CR0_PG;
1500 v->arch.hvm_svm.cpu_shadow_cr0 = value;
1502 /* TS cleared? Then initialise FPU now. */
1503 if ( !(value & X86_CR0_TS) )
1505 setup_fpu(v);
1506 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1509 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1511 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
1513 /* The guest CR3 must be pointing to the guest physical. */
1514 if (!VALID_MFN(mfn =
1515 get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))
1516 || !get_page(mfn_to_page(mfn), v->domain))
1518 printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3);
1519 domain_crash_synchronous(); /* need to take a clean path */
1522 #if defined(__x86_64__)
1523 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
1524 && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
1525 &v->arch.hvm_svm.cpu_state))
1527 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1528 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1531 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
1533 /* Here the PAE is should to be opened */
1534 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1535 set_bit(SVM_CPU_STATE_LMA_ENABLED,
1536 &v->arch.hvm_svm.cpu_state);
1537 vmcb->efer |= (EFER_LMA | EFER_LME);
1538 if (!shadow_set_guest_paging_levels(v->domain, PAGING_L4) )
1540 printk("Unsupported guest paging levels\n");
1541 domain_crash_synchronous(); /* need to take a clean path */
1544 else
1545 #endif /* __x86_64__ */
1547 #if CONFIG_PAGING_LEVELS >= 3
1548 /* seems it's a 32-bit or 32-bit PAE guest */
1549 if ( test_bit(SVM_CPU_STATE_PAE_ENABLED,
1550 &v->arch.hvm_svm.cpu_state) )
1552 /* The guest enables PAE first and then it enables PG, it is
1553 * really a PAE guest */
1554 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1556 printk("Unsupported guest paging levels\n");
1557 domain_crash_synchronous();
1560 else
1562 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) )
1564 printk("Unsupported guest paging levels\n");
1565 domain_crash_synchronous(); /* need to take a clean path */
1568 #endif
1571 /* Now arch.guest_table points to machine physical. */
1572 v->arch.guest_table = pagetable_from_pfn(mfn);
1573 update_pagetables(v);
1575 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1576 (unsigned long) (mfn << PAGE_SHIFT));
1578 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1579 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1581 /* arch->shadow_table should hold the next CR3 for shadow */
1582 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx\n",
1583 v->arch.hvm_svm.cpu_cr3, mfn);
1585 return 1;
1588 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1589 if ( v->arch.hvm_svm.cpu_cr3 ) {
1590 put_page(mfn_to_page(get_mfn_from_gpfn(
1591 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
1592 v->arch.guest_table = pagetable_null();
1595 /*
1596 * SVM implements paged real-mode and when we return to real-mode
1597 * we revert back to the physical mappings that the domain builder
1598 * created.
1599 */
1600 if ((value & X86_CR0_PE) == 0) {
1601 if (value & X86_CR0_PG) {
1602 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1603 return 0;
1606 clear_all_shadow_status( v->domain );
1607 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1608 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1610 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
1612 /* we should take care of this kind of situation */
1613 clear_all_shadow_status(v->domain);
1614 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1615 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1618 return 1;
1621 /*
1622 * Read from control registers. CR0 and CR4 are read from the shadow.
1623 */
1624 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1626 unsigned long value = 0;
1627 struct vcpu *v = current;
1628 struct vmcb_struct *vmcb;
1630 vmcb = v->arch.hvm_svm.vmcb;
1631 ASSERT(vmcb);
1633 switch (cr)
1635 case 0:
1636 value = v->arch.hvm_svm.cpu_shadow_cr0;
1637 if (svm_dbg_on)
1638 printk("CR0 read =%lx \n", value );
1639 break;
1640 case 2:
1641 value = vmcb->cr2;
1642 break;
1643 case 3:
1644 value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
1645 if (svm_dbg_on)
1646 printk("CR3 read =%lx \n", value );
1647 break;
1648 case 4:
1649 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
1650 if (svm_dbg_on)
1651 printk( "CR4 read=%lx\n", value );
1652 break;
1653 case 8:
1654 #if 0
1655 value = vmcb->m_cr8;
1656 #else
1657 ASSERT(0);
1658 #endif
1659 break;
1661 default:
1662 __hvm_bug(regs);
1665 set_reg(gp, value, regs, vmcb);
1667 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1671 static inline int svm_pgbit_test(struct vcpu *v)
1673 return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
1677 /*
1678 * Write to control registers
1679 */
1680 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1682 unsigned long value;
1683 unsigned long old_cr;
1684 struct vcpu *v = current;
1685 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1687 ASSERT(vmcb);
1689 value = get_reg(gpreg, regs, vmcb);
1691 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1692 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1694 switch (cr)
1696 case 0:
1697 if (svm_dbg_on)
1698 printk("CR0 write =%lx \n", value );
1699 return svm_set_cr0(value);
1701 case 3:
1703 unsigned long old_base_mfn, mfn;
1704 if (svm_dbg_on)
1705 printk("CR3 write =%lx \n", value );
1706 /* If paging is not enabled yet, simply copy the value to CR3. */
1707 if (!svm_paging_enabled(v)) {
1708 v->arch.hvm_svm.cpu_cr3 = value;
1709 break;
1711 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1713 /* We make a new one if the shadow does not exist. */
1714 if (value == v->arch.hvm_svm.cpu_cr3)
1716 /*
1717 * This is simple TLB flush, implying the guest has
1718 * removed some translation or changed page attributes.
1719 * We simply invalidate the shadow.
1720 */
1721 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1722 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1723 __hvm_bug(regs);
1724 shadow_sync_all(v->domain);
1726 else
1728 /*
1729 * If different, make a shadow. Check if the PDBR is valid
1730 * first.
1731 */
1732 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1733 if (((value >> PAGE_SHIFT) > v->domain->max_pages)
1734 || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT))
1735 || !get_page(mfn_to_page(mfn), v->domain))
1737 printk("Invalid CR3 value=%lx\n", value);
1738 domain_crash_synchronous(); /* need to take a clean path */
1741 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1742 v->arch.guest_table = pagetable_from_pfn(mfn);
1744 if (old_base_mfn)
1745 put_page(mfn_to_page(old_base_mfn));
1747 /*
1748 * arch.shadow_table should now hold the next CR3 for shadow
1749 */
1750 #if CONFIG_PAGING_LEVELS >= 3
1751 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 )
1752 shadow_sync_all(v->domain);
1753 #endif
1754 v->arch.hvm_svm.cpu_cr3 = value;
1755 update_pagetables(v);
1756 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1757 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1759 break;
1762 case 4: /* CR4 */
1764 if (svm_dbg_on)
1765 printk( "write cr4=%lx, cr0=%lx\n",
1766 value, v->arch.hvm_svm.cpu_shadow_cr0 );
1767 old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
1768 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1770 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1771 if ( svm_pgbit_test(v) )
1773 /* The guest is a 32-bit PAE guest. */
1774 #if CONFIG_PAGING_LEVELS >= 4
1775 unsigned long mfn, old_base_mfn;
1777 if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1779 printk("Unsupported guest paging levels\n");
1780 domain_crash_synchronous(); /* need to take a clean path */
1783 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1784 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) ||
1785 !get_page(mfn_to_page(mfn), v->domain) )
1787 printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3);
1788 domain_crash_synchronous(); /* need to take a clean path */
1791 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1792 if ( old_base_mfn )
1793 put_page(mfn_to_page(old_base_mfn));
1795 /*
1796 * Now arch.guest_table points to machine physical.
1797 */
1799 v->arch.guest_table = pagetable_from_pfn(mfn);
1800 update_pagetables(v);
1802 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1803 (unsigned long) (mfn << PAGE_SHIFT));
1805 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1807 /*
1808 * arch->shadow_table should hold the next CR3 for shadow
1809 */
1811 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1812 v->arch.hvm_svm.cpu_cr3, mfn);
1813 #endif
1815 else
1817 /* The guest is a 64 bit or 32-bit PAE guest. */
1818 #if CONFIG_PAGING_LEVELS >= 4
1819 if ( (v->domain->arch.ops != NULL) &&
1820 v->domain->arch.ops->guest_paging_levels == PAGING_L2)
1822 /* Seems the guest first enables PAE without enabling PG,
1823 * it must enable PG after that, and it is a 32-bit PAE
1824 * guest */
1826 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1828 printk("Unsupported guest paging levels\n");
1829 domain_crash_synchronous();
1832 else
1834 if ( !shadow_set_guest_paging_levels(v->domain,
1835 PAGING_L4) )
1837 printk("Unsupported guest paging levels\n");
1838 domain_crash_synchronous();
1841 #endif
1844 else if (value & X86_CR4_PAE) {
1845 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1846 } else {
1847 if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
1848 &v->arch.hvm_svm.cpu_state)) {
1849 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1851 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1854 v->arch.hvm_svm.cpu_shadow_cr4 = value;
1855 vmcb->cr4 = value | SVM_CR4_HOST_MASK;
1857 /*
1858 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1859 * all TLB entries except global entries.
1860 */
1861 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
1863 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1864 shadow_sync_all(v->domain);
1866 break;
1869 default:
1870 printk("invalid cr: %d\n", cr);
1871 __hvm_bug(regs);
1874 return 1;
1878 #define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
1881 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
1882 struct cpu_user_regs *regs)
1884 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1885 int inst_len = 0;
1886 int index;
1887 unsigned int gpreg;
1888 unsigned long value;
1889 u8 buffer[MAX_INST_LEN];
1890 u8 prefix = 0;
1891 int result = 1;
1892 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1893 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1894 enum instruction_index match;
1896 ASSERT(vmcb);
1898 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1899 /* get index to first actual instruction byte - as we will need to know where the
1900 * prefix lives later on
1901 */
1902 index = skip_prefix_bytes(buffer, sizeof(buffer));
1904 if (type == TYPE_MOV_TO_CR)
1906 inst_len = __get_instruction_length_from_list(vmcb, list_a,
1907 ARR_SIZE(list_a), &buffer[index], &match);
1909 else
1911 inst_len = __get_instruction_length_from_list(vmcb, list_b,
1912 ARR_SIZE(list_b), &buffer[index], &match);
1915 ASSERT(inst_len > 0);
1917 inst_len += index;
1919 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1920 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1921 prefix = buffer[index-1];
1923 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
1925 switch (match)
1927 case INSTR_MOV2CR:
1928 gpreg = decode_src_reg(prefix, buffer[index+2]);
1929 result = mov_to_cr(gpreg, cr, regs);
1930 break;
1932 case INSTR_MOVCR2:
1933 gpreg = decode_src_reg(prefix, buffer[index+2]);
1934 mov_from_cr(cr, gpreg, regs);
1935 break;
1937 case INSTR_CLTS:
1938 /* TS being cleared means that it's time to restore fpu state. */
1939 setup_fpu(current);
1940 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1941 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1942 v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
1943 break;
1945 case INSTR_LMSW:
1946 if (svm_dbg_on)
1947 svm_dump_inst(svm_rip2pointer(vmcb));
1949 gpreg = decode_src_reg(prefix, buffer[index+2]);
1950 value = get_reg(gpreg, regs, vmcb) & 0xF;
1952 if (svm_dbg_on)
1953 printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1954 inst_len);
1956 value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
1958 if (svm_dbg_on)
1959 printk("CR0-LMSW CR0 - New value=%lx\n", value);
1961 result = svm_set_cr0(value);
1962 break;
1964 case INSTR_SMSW:
1965 if (svm_dbg_on)
1966 svm_dump_inst(svm_rip2pointer(vmcb));
1967 value = v->arch.hvm_svm.cpu_shadow_cr0;
1968 gpreg = decode_src_reg(prefix, buffer[index+2]);
1969 set_reg(gpreg, value, regs, vmcb);
1971 if (svm_dbg_on)
1972 printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1973 inst_len);
1974 break;
1976 default:
1977 __hvm_bug(regs);
1978 break;
1981 ASSERT(inst_len);
1983 __update_guest_eip(vmcb, inst_len);
1985 return result;
1988 static inline void svm_do_msr_access(struct vcpu *v, struct cpu_user_regs *regs)
1990 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1991 int inst_len;
1992 u64 msr_content=0;
1994 ASSERT(vmcb);
1996 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, "
1997 "exitinfo = %lx", (unsigned long)regs->ecx,
1998 (unsigned long)regs->eax, (unsigned long)regs->edx,
1999 (unsigned long)vmcb->exitinfo1);
2001 /* is it a read? */
2002 if (vmcb->exitinfo1 == 0)
2004 inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
2006 regs->edx = 0;
2007 switch (regs->ecx) {
2008 case MSR_IA32_TIME_STAMP_COUNTER:
2009 msr_content = hvm_get_guest_time(v);
2010 break;
2011 case MSR_IA32_SYSENTER_CS:
2012 msr_content = vmcb->sysenter_cs;
2013 break;
2014 case MSR_IA32_SYSENTER_ESP:
2015 msr_content = vmcb->sysenter_esp;
2016 break;
2017 case MSR_IA32_SYSENTER_EIP:
2018 msr_content = vmcb->sysenter_eip;
2019 break;
2020 case MSR_IA32_APICBASE:
2021 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
2022 break;
2023 default:
2024 if (long_mode_do_msr_read(regs))
2025 goto done;
2026 rdmsr_safe(regs->ecx, regs->eax, regs->edx);
2027 break;
2029 regs->eax = msr_content & 0xFFFFFFFF;
2030 regs->edx = msr_content >> 32;
2032 else
2034 inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
2035 msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
2037 switch (regs->ecx)
2039 case MSR_IA32_TIME_STAMP_COUNTER:
2040 svm_set_guest_time(v, msr_content);
2041 break;
2042 case MSR_IA32_SYSENTER_CS:
2043 vmcb->sysenter_cs = msr_content;
2044 break;
2045 case MSR_IA32_SYSENTER_ESP:
2046 vmcb->sysenter_esp = msr_content;
2047 break;
2048 case MSR_IA32_SYSENTER_EIP:
2049 vmcb->sysenter_eip = msr_content;
2050 break;
2051 case MSR_IA32_APICBASE:
2052 vlapic_msr_set(VLAPIC(v), msr_content);
2053 break;
2054 default:
2055 long_mode_do_msr_write(regs);
2056 break;
2060 done:
2062 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: "
2063 "ecx=%lx, eax=%lx, edx=%lx",
2064 (unsigned long)regs->ecx, (unsigned long)regs->eax,
2065 (unsigned long)regs->edx);
2067 __update_guest_eip(vmcb, inst_len);
2071 /*
2072 * Need to use this exit to reschedule
2073 */
2074 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
2076 struct vcpu *v = current;
2077 struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
2078 s_time_t next_pit = -1, next_wakeup;
2080 __update_guest_eip(vmcb, 1);
2082 /* check for interrupt not handled or new interrupt */
2083 if ( vmcb->vintr.fields.irq || cpu_has_pending_irq(v) )
2084 return;
2086 if ( !v->vcpu_id )
2087 next_pit = get_scheduled(v, pt->irq, pt);
2088 next_wakeup = get_apictime_scheduled(v);
2089 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
2090 next_wakeup = next_pit;
2091 if ( next_wakeup != - 1 )
2092 set_timer(&current->arch.hvm_svm.hlt_timer, next_wakeup);
2093 hvm_safe_block();
2097 static void svm_vmexit_do_invd(struct vmcb_struct *vmcb)
2099 int inst_len;
2101 /* Invalidate the cache - we can't really do that safely - maybe we should
2102 * WBINVD, but I think it's just fine to completely ignore it - we should
2103 * have cache-snooping that solves it anyways. -- Mats P.
2104 */
2106 /* Tell the user that we did this - just in case someone runs some really weird
2107 * operating system and wants to know why it's not working as it should...
2108 */
2109 printk("INVD instruction intercepted - ignored\n");
2111 inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL);
2112 __update_guest_eip(vmcb, inst_len);
2118 #ifdef XEN_DEBUGGER
2119 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb,
2120 struct cpu_user_regs *regs)
2122 regs->eip = vmcb->rip;
2123 regs->esp = vmcb->rsp;
2124 regs->eflags = vmcb->rflags;
2126 regs->xcs = vmcb->cs.sel;
2127 regs->xds = vmcb->ds.sel;
2128 regs->xes = vmcb->es.sel;
2129 regs->xfs = vmcb->fs.sel;
2130 regs->xgs = vmcb->gs.sel;
2131 regs->xss = vmcb->ss.sel;
2135 static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
2137 vmcb->ss.sel = regs->xss;
2138 vmcb->rsp = regs->esp;
2139 vmcb->rflags = regs->eflags;
2140 vmcb->cs.sel = regs->xcs;
2141 vmcb->rip = regs->eip;
2143 vmcb->gs.sel = regs->xgs;
2144 vmcb->fs.sel = regs->xfs;
2145 vmcb->es.sel = regs->xes;
2146 vmcb->ds.sel = regs->xds;
2148 #endif
2151 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
2153 struct vcpu *v = current;
2154 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
2155 unsigned long g_vaddr;
2156 int inst_len;
2157 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2159 ASSERT(vmcb);
2160 /*
2161 * Unknown how many bytes the invlpg instruction will take. Use the
2162 * maximum instruction length here
2163 */
2164 if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
2166 printk("svm_handle_invlpg (): Error reading memory %d bytes\n", length);
2167 __hvm_bug(regs);
2170 if (invlpga)
2172 inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
2173 ASSERT(inst_len > 0);
2174 __update_guest_eip(vmcb, inst_len);
2176 /*
2177 * The address is implicit on this instruction. At the moment, we don't
2178 * use ecx (ASID) to identify individual guests pages
2179 */
2180 g_vaddr = regs->eax;
2182 else
2184 /* What about multiple prefix codes? */
2185 prefix = (is_prefix(opcode[0])?opcode[0]:0);
2186 inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
2187 ASSERT(inst_len > 0);
2189 inst_len--;
2190 length -= inst_len;
2192 /*
2193 * Decode memory operand of the instruction including ModRM, SIB, and
2194 * displacement to get effecticve address and length in bytes. Assume
2195 * the system in either 32- or 64-bit mode.
2196 */
2197 g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix,
2198 &opcode[inst_len], &length);
2200 inst_len += length;
2201 __update_guest_eip (vmcb, inst_len);
2204 /* Overkill, we may not this */
2205 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
2206 shadow_invlpg(v, g_vaddr);
2210 /*
2211 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2212 * 16-bit realmode. Basically, this mimics a processor reset.
2214 * returns 0 on success, non-zero otherwise
2215 */
2216 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
2217 struct cpu_user_regs *regs)
2219 struct vmcb_struct *vmcb;
2221 ASSERT(v);
2222 ASSERT(regs);
2224 vmcb = v->arch.hvm_svm.vmcb;
2226 ASSERT(vmcb);
2228 /* clear the vmcb and user regs */
2229 memset(regs, 0, sizeof(struct cpu_user_regs));
2231 /* VMCB Control */
2232 vmcb->tsc_offset = 0;
2234 /* VMCB State */
2235 vmcb->cr0 = X86_CR0_ET | X86_CR0_PG;
2236 v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
2238 vmcb->cr2 = 0;
2239 vmcb->efer = EFER_SVME;
2241 vmcb->cr4 = SVM_CR4_HOST_MASK;
2242 v->arch.hvm_svm.cpu_shadow_cr4 = 0;
2243 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
2245 /* This will jump to ROMBIOS */
2246 vmcb->rip = 0xFFF0;
2248 /* setup the segment registers and all their hidden states */
2249 vmcb->cs.sel = 0xF000;
2250 vmcb->cs.attributes.bytes = 0x089b;
2251 vmcb->cs.limit = 0xffff;
2252 vmcb->cs.base = 0x000F0000;
2254 vmcb->ss.sel = 0x00;
2255 vmcb->ss.attributes.bytes = 0x0893;
2256 vmcb->ss.limit = 0xffff;
2257 vmcb->ss.base = 0x00;
2259 vmcb->ds.sel = 0x00;
2260 vmcb->ds.attributes.bytes = 0x0893;
2261 vmcb->ds.limit = 0xffff;
2262 vmcb->ds.base = 0x00;
2264 vmcb->es.sel = 0x00;
2265 vmcb->es.attributes.bytes = 0x0893;
2266 vmcb->es.limit = 0xffff;
2267 vmcb->es.base = 0x00;
2269 vmcb->fs.sel = 0x00;
2270 vmcb->fs.attributes.bytes = 0x0893;
2271 vmcb->fs.limit = 0xffff;
2272 vmcb->fs.base = 0x00;
2274 vmcb->gs.sel = 0x00;
2275 vmcb->gs.attributes.bytes = 0x0893;
2276 vmcb->gs.limit = 0xffff;
2277 vmcb->gs.base = 0x00;
2279 vmcb->ldtr.sel = 0x00;
2280 vmcb->ldtr.attributes.bytes = 0x0000;
2281 vmcb->ldtr.limit = 0x0;
2282 vmcb->ldtr.base = 0x00;
2284 vmcb->gdtr.sel = 0x00;
2285 vmcb->gdtr.attributes.bytes = 0x0000;
2286 vmcb->gdtr.limit = 0x0;
2287 vmcb->gdtr.base = 0x00;
2289 vmcb->tr.sel = 0;
2290 vmcb->tr.attributes.bytes = 0;
2291 vmcb->tr.limit = 0x0;
2292 vmcb->tr.base = 0;
2294 vmcb->idtr.sel = 0x00;
2295 vmcb->idtr.attributes.bytes = 0x0000;
2296 vmcb->idtr.limit = 0x3ff;
2297 vmcb->idtr.base = 0x00;
2299 vmcb->rax = 0;
2300 vmcb->rsp = 0;
2302 return 0;
2306 /*
2307 * svm_do_vmmcall - SVM VMMCALL handler
2309 * returns 0 on success, non-zero otherwise
2310 */
2311 static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
2313 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2314 int inst_len;
2316 ASSERT(vmcb);
2317 ASSERT(regs);
2319 inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
2320 ASSERT(inst_len > 0);
2322 /* VMMCALL sanity check */
2323 if (vmcb->cpl > get_vmmcall_cpl(regs->edi))
2325 printf("VMMCALL CPL check failed\n");
2326 return -1;
2329 /* handle the request */
2330 switch (regs->edi)
2332 case VMMCALL_RESET_TO_REALMODE:
2333 if (svm_do_vmmcall_reset_to_realmode(v, regs))
2335 printf("svm_do_vmmcall_reset_to_realmode() failed\n");
2336 return -1;
2339 /* since we just reset the VMCB, return without adjusting the eip */
2340 return 0;
2341 case VMMCALL_DEBUG:
2342 printf("DEBUG features not implemented yet\n");
2343 break;
2344 default:
2345 break;
2348 hvm_print_line(v, regs->eax); /* provides the current domain */
2350 __update_guest_eip(vmcb, inst_len);
2351 return 0;
2355 void svm_dump_inst(unsigned long eip)
2357 u8 opcode[256];
2358 unsigned long ptr;
2359 int len;
2360 int i;
2362 ptr = eip & ~0xff;
2363 len = 0;
2365 if (hvm_copy(opcode, ptr, sizeof(opcode), HVM_COPY_IN))
2366 len = sizeof(opcode);
2368 printf("Code bytes around(len=%d) %lx:", len, eip);
2369 for (i = 0; i < len; i++)
2371 if ((i & 0x0f) == 0)
2372 printf("\n%08lx:", ptr+i);
2374 printf("%02x ", opcode[i]);
2377 printf("\n");
2381 void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
2383 struct vcpu *v = current;
2384 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2385 unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
2387 printf("%s: guest registers from %s:\n", __func__, from);
2388 #if defined (__x86_64__)
2389 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
2390 regs->rax, regs->rbx, regs->rcx);
2391 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
2392 regs->rdx, regs->rsi, regs->rdi);
2393 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
2394 regs->rbp, regs->rsp, regs->r8);
2395 printk("r9: %016lx r10: %016lx r11: %016lx\n",
2396 regs->r9, regs->r10, regs->r11);
2397 printk("r12: %016lx r13: %016lx r14: %016lx\n",
2398 regs->r12, regs->r13, regs->r14);
2399 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
2400 regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
2401 #else
2402 printf("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n",
2403 regs->eax, regs->ebx, regs->ecx, regs->edx);
2404 printf("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n",
2405 regs->edi, regs->esi, regs->ebp, regs->esp);
2406 printf("%s: guest cr0: %lx\n", __func__,
2407 v->arch.hvm_svm.cpu_shadow_cr0);
2408 printf("guest CR3 = %llx\n", vmcb->cr3);
2409 #endif
2410 printf("%s: pt = %lx\n", __func__, pt);
2414 void svm_dump_host_regs(const char *from)
2416 struct vcpu *v = current;
2417 unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
2418 unsigned long cr3, cr0;
2419 printf("Host registers at %s\n", from);
2421 __asm__ __volatile__ ("\tmov %%cr0,%0\n"
2422 "\tmov %%cr3,%1\n"
2423 : "=r" (cr0), "=r"(cr3));
2424 printf("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
2427 #ifdef SVM_EXTRA_DEBUG
2428 static char *exit_reasons[] = {
2429 [VMEXIT_CR0_READ] = "CR0_READ",
2430 [VMEXIT_CR1_READ] = "CR1_READ",
2431 [VMEXIT_CR2_READ] = "CR2_READ",
2432 [VMEXIT_CR3_READ] = "CR3_READ",
2433 [VMEXIT_CR4_READ] = "CR4_READ",
2434 [VMEXIT_CR5_READ] = "CR5_READ",
2435 [VMEXIT_CR6_READ] = "CR6_READ",
2436 [VMEXIT_CR7_READ] = "CR7_READ",
2437 [VMEXIT_CR8_READ] = "CR8_READ",
2438 [VMEXIT_CR9_READ] = "CR9_READ",
2439 [VMEXIT_CR10_READ] = "CR10_READ",
2440 [VMEXIT_CR11_READ] = "CR11_READ",
2441 [VMEXIT_CR12_READ] = "CR12_READ",
2442 [VMEXIT_CR13_READ] = "CR13_READ",
2443 [VMEXIT_CR14_READ] = "CR14_READ",
2444 [VMEXIT_CR15_READ] = "CR15_READ",
2445 [VMEXIT_CR0_WRITE] = "CR0_WRITE",
2446 [VMEXIT_CR1_WRITE] = "CR1_WRITE",
2447 [VMEXIT_CR2_WRITE] = "CR2_WRITE",
2448 [VMEXIT_CR3_WRITE] = "CR3_WRITE",
2449 [VMEXIT_CR4_WRITE] = "CR4_WRITE",
2450 [VMEXIT_CR5_WRITE] = "CR5_WRITE",
2451 [VMEXIT_CR6_WRITE] = "CR6_WRITE",
2452 [VMEXIT_CR7_WRITE] = "CR7_WRITE",
2453 [VMEXIT_CR8_WRITE] = "CR8_WRITE",
2454 [VMEXIT_CR9_WRITE] = "CR9_WRITE",
2455 [VMEXIT_CR10_WRITE] = "CR10_WRITE",
2456 [VMEXIT_CR11_WRITE] = "CR11_WRITE",
2457 [VMEXIT_CR12_WRITE] = "CR12_WRITE",
2458 [VMEXIT_CR13_WRITE] = "CR13_WRITE",
2459 [VMEXIT_CR14_WRITE] = "CR14_WRITE",
2460 [VMEXIT_CR15_WRITE] = "CR15_WRITE",
2461 [VMEXIT_DR0_READ] = "DR0_READ",
2462 [VMEXIT_DR1_READ] = "DR1_READ",
2463 [VMEXIT_DR2_READ] = "DR2_READ",
2464 [VMEXIT_DR3_READ] = "DR3_READ",
2465 [VMEXIT_DR4_READ] = "DR4_READ",
2466 [VMEXIT_DR5_READ] = "DR5_READ",
2467 [VMEXIT_DR6_READ] = "DR6_READ",
2468 [VMEXIT_DR7_READ] = "DR7_READ",
2469 [VMEXIT_DR8_READ] = "DR8_READ",
2470 [VMEXIT_DR9_READ] = "DR9_READ",
2471 [VMEXIT_DR10_READ] = "DR10_READ",
2472 [VMEXIT_DR11_READ] = "DR11_READ",
2473 [VMEXIT_DR12_READ] = "DR12_READ",
2474 [VMEXIT_DR13_READ] = "DR13_READ",
2475 [VMEXIT_DR14_READ] = "DR14_READ",
2476 [VMEXIT_DR15_READ] = "DR15_READ",
2477 [VMEXIT_DR0_WRITE] = "DR0_WRITE",
2478 [VMEXIT_DR1_WRITE] = "DR1_WRITE",
2479 [VMEXIT_DR2_WRITE] = "DR2_WRITE",
2480 [VMEXIT_DR3_WRITE] = "DR3_WRITE",
2481 [VMEXIT_DR4_WRITE] = "DR4_WRITE",
2482 [VMEXIT_DR5_WRITE] = "DR5_WRITE",
2483 [VMEXIT_DR6_WRITE] = "DR6_WRITE",
2484 [VMEXIT_DR7_WRITE] = "DR7_WRITE",
2485 [VMEXIT_DR8_WRITE] = "DR8_WRITE",
2486 [VMEXIT_DR9_WRITE] = "DR9_WRITE",
2487 [VMEXIT_DR10_WRITE] = "DR10_WRITE",
2488 [VMEXIT_DR11_WRITE] = "DR11_WRITE",
2489 [VMEXIT_DR12_WRITE] = "DR12_WRITE",
2490 [VMEXIT_DR13_WRITE] = "DR13_WRITE",
2491 [VMEXIT_DR14_WRITE] = "DR14_WRITE",
2492 [VMEXIT_DR15_WRITE] = "DR15_WRITE",
2493 [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
2494 [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
2495 [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
2496 [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
2497 [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
2498 [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
2499 [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
2500 [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
2501 [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
2502 [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
2503 [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
2504 [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
2505 [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
2506 [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
2507 [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
2508 [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
2509 [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
2510 [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
2511 [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
2512 [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
2513 [VMEXIT_INTR] = "INTR",
2514 [VMEXIT_NMI] = "NMI",
2515 [VMEXIT_SMI] = "SMI",
2516 [VMEXIT_INIT] = "INIT",
2517 [VMEXIT_VINTR] = "VINTR",
2518 [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
2519 [VMEXIT_IDTR_READ] = "IDTR_READ",
2520 [VMEXIT_GDTR_READ] = "GDTR_READ",
2521 [VMEXIT_LDTR_READ] = "LDTR_READ",
2522 [VMEXIT_TR_READ] = "TR_READ",
2523 [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
2524 [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
2525 [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
2526 [VMEXIT_TR_WRITE] = "TR_WRITE",
2527 [VMEXIT_RDTSC] = "RDTSC",
2528 [VMEXIT_RDPMC] = "RDPMC",
2529 [VMEXIT_PUSHF] = "PUSHF",
2530 [VMEXIT_POPF] = "POPF",
2531 [VMEXIT_CPUID] = "CPUID",
2532 [VMEXIT_RSM] = "RSM",
2533 [VMEXIT_IRET] = "IRET",
2534 [VMEXIT_SWINT] = "SWINT",
2535 [VMEXIT_INVD] = "INVD",
2536 [VMEXIT_PAUSE] = "PAUSE",
2537 [VMEXIT_HLT] = "HLT",
2538 [VMEXIT_INVLPG] = "INVLPG",
2539 [VMEXIT_INVLPGA] = "INVLPGA",
2540 [VMEXIT_IOIO] = "IOIO",
2541 [VMEXIT_MSR] = "MSR",
2542 [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
2543 [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
2544 [VMEXIT_SHUTDOWN] = "SHUTDOWN",
2545 [VMEXIT_VMRUN] = "VMRUN",
2546 [VMEXIT_VMMCALL] = "VMMCALL",
2547 [VMEXIT_VMLOAD] = "VMLOAD",
2548 [VMEXIT_VMSAVE] = "VMSAVE",
2549 [VMEXIT_STGI] = "STGI",
2550 [VMEXIT_CLGI] = "CLGI",
2551 [VMEXIT_SKINIT] = "SKINIT",
2552 [VMEXIT_RDTSCP] = "RDTSCP",
2553 [VMEXIT_ICEBP] = "ICEBP",
2554 [VMEXIT_NPF] = "NPF"
2555 };
2556 #endif /* SVM_EXTRA_DEBUG */
2558 #ifdef SVM_WALK_GUEST_PAGES
2559 void walk_shadow_and_guest_pt(unsigned long gva)
2561 l2_pgentry_t gpde;
2562 l2_pgentry_t spde;
2563 l1_pgentry_t gpte;
2564 l1_pgentry_t spte;
2565 struct vcpu *v = current;
2566 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2567 unsigned long gpa;
2569 gpa = gva_to_gpa( gva );
2570 printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
2571 if( !svm_paging_enabled(v) || mmio_space(gpa) )
2572 return;
2574 /* let's dump the guest and shadow page info */
2576 __guest_get_l2e(v, gva, &gpde);
2577 printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
2578 __shadow_get_l2e( v, gva, &spde );
2579 printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
2581 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2582 return;
2584 spte = l1e_empty();
2586 // This is actually overkill - we only need to make sure the hl2 is in-sync.
2587 shadow_sync_va(v, gva);
2589 gpte.l1 = 0;
2590 __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ], sizeof(gpte) );
2591 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
2592 __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
2593 sizeof(spte) );
2594 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
2596 #endif /* SVM_WALK_GUEST_PAGES */
2598 asmlinkage void svm_vmexit_handler(struct cpu_user_regs regs)
2600 unsigned int exit_reason;
2601 unsigned long eip;
2602 struct vcpu *v = current;
2603 int error;
2604 int do_debug = 0;
2605 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2607 ASSERT(vmcb);
2609 exit_reason = vmcb->exitcode;
2610 save_svm_cpu_user_regs(v, &regs);
2612 vmcb->tlb_control = 1;
2614 #ifdef SVM_EXTRA_DEBUG
2616 #if defined(__i386__)
2617 #define rip eip
2618 #endif
2620 static unsigned long intercepts_counter = 0;
2622 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF)
2624 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2626 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx, gpa=%llx\n",
2627 intercepts_counter,
2628 exit_reasons[exit_reason], exit_reason, regs.cs,
2629 (unsigned long long) regs.rip,
2630 (unsigned long long) vmcb->exitinfo1,
2631 (unsigned long long) vmcb->exitinfo2,
2632 (unsigned long long) vmcb->exitintinfo.bytes,
2633 (unsigned long long) gva_to_gpa( vmcb->exitinfo2 ) );
2635 else
2637 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2638 intercepts_counter,
2639 exit_reasons[exit_reason], exit_reason, regs.cs,
2640 (unsigned long long) regs.rip,
2641 (unsigned long long) vmcb->exitinfo1,
2642 (unsigned long long) vmcb->exitinfo2,
2643 (unsigned long long) vmcb->exitintinfo.bytes );
2646 else if ( svm_dbg_on
2647 && exit_reason != VMEXIT_IOIO
2648 && exit_reason != VMEXIT_INTR)
2651 if (exit_reasons[exit_reason])
2653 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2654 intercepts_counter,
2655 exit_reasons[exit_reason], exit_reason, regs.cs,
2656 (unsigned long long) regs.rip,
2657 (unsigned long long) vmcb->exitinfo1,
2658 (unsigned long long) vmcb->exitinfo2,
2659 (unsigned long long) vmcb->exitintinfo.bytes);
2661 else
2663 printk("I%08ld,ExC=%d(0x%x),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2664 intercepts_counter, exit_reason, exit_reason, regs.cs,
2665 (unsigned long long) regs.rip,
2666 (unsigned long long) vmcb->exitinfo1,
2667 (unsigned long long) vmcb->exitinfo2,
2668 (unsigned long long) vmcb->exitintinfo.bytes);
2672 #ifdef SVM_WALK_GUEST_PAGES
2673 if( exit_reason == VMEXIT_EXCEPTION_PF
2674 && ( ( vmcb->exitinfo2 == vmcb->rip )
2675 || vmcb->exitintinfo.bytes) )
2677 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2678 walk_shadow_and_guest_pt( vmcb->exitinfo2 );
2680 #endif
2682 intercepts_counter++;
2684 #if 0
2685 if (svm_dbg_on)
2686 do_debug = svm_do_debugout(exit_reason);
2687 #endif
2689 if (do_debug)
2691 printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
2692 "shadow_table = 0x%08x\n",
2693 __func__,
2694 (int) v->arch.guest_table.pfn,
2695 (int) v->arch.monitor_table.pfn,
2696 (int) v->arch.shadow_table.pfn);
2698 svm_dump_vmcb(__func__, vmcb);
2699 svm_dump_regs(__func__, &regs);
2700 svm_dump_inst(svm_rip2pointer(vmcb));
2703 #if defined(__i386__)
2704 #undef rip
2705 #endif
2708 #endif /* SVM_EXTRA_DEBUG */
2710 if (exit_reason == -1)
2712 printk("%s: exit_reason == -1 - Did someone clobber the VMCB\n",
2713 __func__);
2714 BUG();
2715 domain_crash_synchronous();
2718 perfc_incra(vmexits, exit_reason);
2719 eip = vmcb->rip;
2721 #ifdef SVM_EXTRA_DEBUG
2722 if (do_debug)
2724 printk("eip = %lx, exit_reason = %d (0x%x)\n",
2725 eip, exit_reason, exit_reason);
2727 #endif /* SVM_EXTRA_DEBUG */
2729 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
2731 switch (exit_reason)
2733 case VMEXIT_EXCEPTION_DB:
2735 #ifdef XEN_DEBUGGER
2736 svm_debug_save_cpu_user_regs(&regs);
2737 pdb_handle_exception(1, &regs, 1);
2738 svm_debug_restore_cpu_user_regs(&regs);
2739 #else
2740 svm_store_cpu_user_regs(&regs, v);
2741 domain_pause_for_debugger();
2742 #endif
2744 break;
2746 case VMEXIT_NMI:
2747 do_nmi(&regs, 0);
2748 break;
2750 case VMEXIT_SMI:
2751 /*
2752 * For asynchronous SMI's, we just need to allow global interrupts
2753 * so that the SMI is taken properly in the context of the host. The
2754 * standard code does a STGI after the VMEXIT which should accomplish
2755 * this task. Continue as normal and restart the guest.
2756 */
2757 break;
2759 case VMEXIT_INIT:
2760 /*
2761 * Nothing to do, in fact we should never get to this point.
2762 */
2763 break;
2765 case VMEXIT_EXCEPTION_BP:
2766 #ifdef XEN_DEBUGGER
2767 svm_debug_save_cpu_user_regs(&regs);
2768 pdb_handle_exception(3, &regs, 1);
2769 svm_debug_restore_cpu_user_regs(&regs);
2770 #else
2771 if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) )
2772 domain_pause_for_debugger();
2773 else
2774 svm_inject_exception(v, TRAP_int3, 0, 0);
2775 #endif
2776 break;
2778 case VMEXIT_EXCEPTION_NM:
2779 svm_do_no_device_fault(vmcb);
2780 break;
2782 case VMEXIT_EXCEPTION_GP:
2783 /* This should probably not be trapped in the future */
2784 regs.error_code = vmcb->exitinfo1;
2785 svm_do_general_protection_fault(v, &regs);
2786 break;
2788 case VMEXIT_EXCEPTION_PF:
2790 unsigned long va;
2791 va = vmcb->exitinfo2;
2792 regs.error_code = vmcb->exitinfo1;
2793 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2794 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2795 (unsigned long)regs.eax, (unsigned long)regs.ebx,
2796 (unsigned long)regs.ecx, (unsigned long)regs.edx,
2797 (unsigned long)regs.esi, (unsigned long)regs.edi);
2799 v->arch.hvm_vcpu.mmio_op.inst_decoder_regs = &regs;
2801 //printk("PF1\n");
2802 if (!(error = svm_do_page_fault(va, &regs)))
2804 /* Inject #PG using Interruption-Information Fields */
2805 svm_inject_exception(v, TRAP_page_fault, 1, regs.error_code);
2807 v->arch.hvm_svm.cpu_cr2 = va;
2808 vmcb->cr2 = va;
2809 TRACE_3D(TRC_VMX_INT, v->domain->domain_id,
2810 VMEXIT_EXCEPTION_PF, va);
2812 break;
2815 case VMEXIT_EXCEPTION_DF:
2816 printk("Guest double fault");
2817 BUG();
2818 break;
2820 case VMEXIT_INTR:
2821 raise_softirq(SCHEDULE_SOFTIRQ);
2822 break;
2825 case VMEXIT_INVD:
2826 svm_vmexit_do_invd(vmcb);
2827 break;
2829 case VMEXIT_GDTR_WRITE:
2830 printk("WRITE to GDTR\n");
2831 break;
2833 case VMEXIT_TASK_SWITCH:
2834 __hvm_bug(&regs);
2835 break;
2837 case VMEXIT_CPUID:
2838 svm_vmexit_do_cpuid(vmcb, regs.eax, &regs);
2839 break;
2841 case VMEXIT_HLT:
2842 svm_vmexit_do_hlt(vmcb);
2843 break;
2845 case VMEXIT_INVLPG:
2846 svm_handle_invlpg(0, &regs);
2847 break;
2849 case VMEXIT_INVLPGA:
2850 svm_handle_invlpg(1, &regs);
2851 break;
2853 case VMEXIT_VMMCALL:
2854 svm_do_vmmcall(v, &regs);
2855 break;
2857 case VMEXIT_CR0_READ:
2858 svm_cr_access(v, 0, TYPE_MOV_FROM_CR, &regs);
2859 break;
2861 case VMEXIT_CR2_READ:
2862 svm_cr_access(v, 2, TYPE_MOV_FROM_CR, &regs);
2863 break;
2865 case VMEXIT_CR3_READ:
2866 svm_cr_access(v, 3, TYPE_MOV_FROM_CR, &regs);
2867 break;
2869 case VMEXIT_CR4_READ:
2870 svm_cr_access(v, 4, TYPE_MOV_FROM_CR, &regs);
2871 break;
2873 case VMEXIT_CR8_READ:
2874 svm_cr_access(v, 8, TYPE_MOV_FROM_CR, &regs);
2875 break;
2877 case VMEXIT_CR0_WRITE:
2878 svm_cr_access(v, 0, TYPE_MOV_TO_CR, &regs);
2879 break;
2881 case VMEXIT_CR2_WRITE:
2882 svm_cr_access(v, 2, TYPE_MOV_TO_CR, &regs);
2883 break;
2885 case VMEXIT_CR3_WRITE:
2886 svm_cr_access(v, 3, TYPE_MOV_TO_CR, &regs);
2887 local_flush_tlb();
2888 break;
2890 case VMEXIT_CR4_WRITE:
2891 svm_cr_access(v, 4, TYPE_MOV_TO_CR, &regs);
2892 break;
2894 case VMEXIT_CR8_WRITE:
2895 svm_cr_access(v, 8, TYPE_MOV_TO_CR, &regs);
2896 break;
2898 case VMEXIT_DR0_READ:
2899 svm_dr_access(v, 0, TYPE_MOV_FROM_DR, &regs);
2900 break;
2902 case VMEXIT_DR1_READ:
2903 svm_dr_access(v, 1, TYPE_MOV_FROM_DR, &regs);
2904 break;
2906 case VMEXIT_DR2_READ:
2907 svm_dr_access(v, 2, TYPE_MOV_FROM_DR, &regs);
2908 break;
2910 case VMEXIT_DR3_READ:
2911 svm_dr_access(v, 3, TYPE_MOV_FROM_DR, &regs);
2912 break;
2914 case VMEXIT_DR6_READ:
2915 svm_dr_access(v, 6, TYPE_MOV_FROM_DR, &regs);
2916 break;
2918 case VMEXIT_DR7_READ:
2919 svm_dr_access(v, 7, TYPE_MOV_FROM_DR, &regs);
2920 break;
2922 case VMEXIT_DR0_WRITE:
2923 svm_dr_access(v, 0, TYPE_MOV_TO_DR, &regs);
2924 break;
2926 case VMEXIT_DR1_WRITE:
2927 svm_dr_access(v, 1, TYPE_MOV_TO_DR, &regs);
2928 break;
2930 case VMEXIT_DR2_WRITE:
2931 svm_dr_access(v, 2, TYPE_MOV_TO_DR, &regs);
2932 break;
2934 case VMEXIT_DR3_WRITE:
2935 svm_dr_access(v, 3, TYPE_MOV_TO_DR, &regs);
2936 break;
2938 case VMEXIT_DR6_WRITE:
2939 svm_dr_access(v, 6, TYPE_MOV_TO_DR, &regs);
2940 break;
2942 case VMEXIT_DR7_WRITE:
2943 svm_dr_access(v, 7, TYPE_MOV_TO_DR, &regs);
2944 break;
2946 case VMEXIT_IOIO:
2947 svm_io_instruction(v, &regs);
2948 break;
2950 case VMEXIT_MSR:
2951 svm_do_msr_access(v, &regs);
2952 break;
2954 case VMEXIT_SHUTDOWN:
2955 printk("Guest shutdown exit\n");
2956 domain_crash_synchronous();
2957 break;
2959 default:
2960 printk("unexpected VMEXIT: exit reason = 0x%x, exitinfo1 = %llx, "
2961 "exitinfo2 = %llx\n", exit_reason,
2962 (unsigned long long)vmcb->exitinfo1,
2963 (unsigned long long)vmcb->exitinfo2);
2964 __hvm_bug(&regs); /* should not happen */
2965 break;
2968 #ifdef SVM_EXTRA_DEBUG
2969 if (do_debug)
2971 printk("%s: Done switch on vmexit_code\n", __func__);
2972 svm_dump_regs(__func__, &regs);
2975 if (do_debug)
2977 printk("vmexit_handler():- guest_table = 0x%08x, "
2978 "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
2979 (int)v->arch.guest_table.pfn,
2980 (int)v->arch.monitor_table.pfn,
2981 (int)v->arch.shadow_table.pfn);
2982 printk("svm_vmexit_handler: Returning\n");
2984 #endif
2986 return;
2989 asmlinkage void svm_load_cr2(void)
2991 struct vcpu *v = current;
2993 local_irq_disable();
2994 asm volatile("mov %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
2997 asmlinkage void svm_asid(void)
2999 struct vcpu *v = current;
3000 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
3002 /*
3003 * if need to assign new asid, or if switching cores,
3004 * retire asid for the old core, and assign a new asid to the current core.
3005 */
3006 if ( test_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ) ||
3007 ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) {
3008 /* recycle asid */
3009 if ( !asidpool_assign_next( vmcb, 1,
3010 v->arch.hvm_svm.asid_core, v->arch.hvm_svm.launch_core )) {
3011 /* If we get here, we have a major problem */
3012 domain_crash_synchronous();
3015 v->arch.hvm_svm.asid_core = v->arch.hvm_svm.launch_core;
3016 clear_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags );
3019 /* make sure the HSA is set for the current core */
3020 set_hsa_to_guest( &v->arch.hvm_svm );
3023 /*
3024 * Local variables:
3025 * mode: C
3026 * c-set-style: "BSD"
3027 * c-basic-offset: 4
3028 * tab-width: 4
3029 * indent-tabs-mode: nil
3030 * End:
3031 */