ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 13891:0cbf1586a432

[HVM][SVM] Do not advertise FFXSR feature.
Signed-off-by: Keir Fraser <keir@xensource.com>
author Keir Fraser <keir@xensource.com>
date Thu Feb 08 18:01:42 2007 +0000 (2007-02-08)
parents 7a7509570af9
children 6daa91dc9247
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005, AMD Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <xen/hypercall.h>
29 #include <xen/domain_page.h>
30 #include <asm/current.h>
31 #include <asm/io.h>
32 #include <asm/shadow.h>
33 #include <asm/regs.h>
34 #include <asm/cpufeature.h>
35 #include <asm/processor.h>
36 #include <asm/types.h>
37 #include <asm/msr.h>
38 #include <asm/spinlock.h>
39 #include <asm/hvm/hvm.h>
40 #include <asm/hvm/support.h>
41 #include <asm/hvm/io.h>
42 #include <asm/hvm/svm/svm.h>
43 #include <asm/hvm/svm/vmcb.h>
44 #include <asm/hvm/svm/emulate.h>
45 #include <asm/hvm/svm/vmmcall.h>
46 #include <asm/hvm/svm/intr.h>
47 #include <asm/x86_emulate.h>
48 #include <public/sched.h>
49 #include <asm/hvm/vpt.h>
51 #define SVM_EXTRA_DEBUG
53 #define set_segment_register(name, value) \
54 __asm__ __volatile__ ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
56 /* External functions. We should move these to some suitable header file(s) */
58 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
59 int inst_len);
60 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
61 extern void svm_dump_inst(unsigned long eip);
62 extern int svm_dbg_on;
63 void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
65 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
66 struct cpu_user_regs *regs);
68 /* va of hardware host save area */
69 static void *hsa[NR_CPUS] __read_mostly;
71 /* vmcb used for extended host state */
72 static void *root_vmcb[NR_CPUS] __read_mostly;
74 /* physical address of above for host VMSAVE/VMLOAD */
75 u64 root_vmcb_pa[NR_CPUS] __read_mostly;
77 static inline void svm_inject_exception(struct vcpu *v, int trap,
78 int ev, int error_code)
79 {
80 eventinj_t event;
81 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
83 event.bytes = 0;
84 event.fields.v = 1;
85 event.fields.type = EVENTTYPE_EXCEPTION;
86 event.fields.vector = trap;
87 event.fields.ev = ev;
88 event.fields.errorcode = error_code;
90 ASSERT(vmcb->eventinj.fields.v == 0);
92 vmcb->eventinj = event;
93 }
95 static void stop_svm(void)
96 {
97 u32 eax, edx;
98 int cpu = smp_processor_id();
100 /* We turn off the EFER_SVME bit. */
101 rdmsr(MSR_EFER, eax, edx);
102 eax &= ~EFER_SVME;
103 wrmsr(MSR_EFER, eax, edx);
105 /* release the HSA */
106 free_host_save_area(hsa[cpu]);
107 hsa[cpu] = NULL;
108 wrmsr(MSR_K8_VM_HSAVE_PA, 0, 0 );
110 /* free up the root vmcb */
111 free_vmcb(root_vmcb[cpu]);
112 root_vmcb[cpu] = NULL;
113 root_vmcb_pa[cpu] = 0;
114 }
116 static void svm_store_cpu_guest_regs(
117 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
118 {
119 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
121 if ( regs != NULL )
122 {
123 regs->eip = vmcb->rip;
124 regs->esp = vmcb->rsp;
125 regs->eflags = vmcb->rflags;
126 regs->cs = vmcb->cs.sel;
127 regs->ds = vmcb->ds.sel;
128 regs->es = vmcb->es.sel;
129 regs->ss = vmcb->ss.sel;
130 regs->gs = vmcb->gs.sel;
131 regs->fs = vmcb->fs.sel;
132 }
134 if ( crs != NULL )
135 {
136 /* Returning the guest's regs */
137 crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
138 crs[2] = v->arch.hvm_svm.cpu_cr2;
139 crs[3] = v->arch.hvm_svm.cpu_cr3;
140 crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
141 }
142 }
144 static int svm_paging_enabled(struct vcpu *v)
145 {
146 unsigned long cr0;
148 cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
150 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
151 }
153 static int svm_pae_enabled(struct vcpu *v)
154 {
155 unsigned long cr4;
157 if(!svm_paging_enabled(v))
158 return 0;
160 cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
162 return (cr4 & X86_CR4_PAE);
163 }
165 static int svm_long_mode_enabled(struct vcpu *v)
166 {
167 return test_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
168 }
170 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
171 {
172 u64 msr_content = 0;
173 struct vcpu *v = current;
174 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
176 switch ((u32)regs->ecx)
177 {
178 case MSR_EFER:
179 msr_content = vmcb->efer;
180 msr_content &= ~EFER_SVME;
181 break;
183 #ifdef __x86_64__
184 case MSR_FS_BASE:
185 msr_content = vmcb->fs.base;
186 goto check_long_mode;
188 case MSR_GS_BASE:
189 msr_content = vmcb->gs.base;
190 goto check_long_mode;
192 case MSR_SHADOW_GS_BASE:
193 msr_content = vmcb->kerngsbase;
194 check_long_mode:
195 if ( !svm_long_mode_enabled(v) )
196 {
197 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
198 return 0;
199 }
200 break;
201 #endif
203 case MSR_STAR:
204 msr_content = vmcb->star;
205 break;
207 case MSR_LSTAR:
208 msr_content = vmcb->lstar;
209 break;
211 case MSR_CSTAR:
212 msr_content = vmcb->cstar;
213 break;
215 case MSR_SYSCALL_MASK:
216 msr_content = vmcb->sfmask;
217 break;
218 default:
219 return 0;
220 }
222 HVM_DBG_LOG(DBG_LEVEL_2, "msr_content: %"PRIx64"\n",
223 msr_content);
225 regs->eax = (u32)(msr_content >> 0);
226 regs->edx = (u32)(msr_content >> 32);
227 return 1;
228 }
230 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
231 {
232 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
233 u32 ecx = regs->ecx;
234 struct vcpu *v = current;
235 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
237 HVM_DBG_LOG(DBG_LEVEL_1, "msr %x msr_content %"PRIx64"\n",
238 ecx, msr_content);
240 switch ( ecx )
241 {
242 case MSR_EFER:
243 /* offending reserved bit will cause #GP */
244 if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
245 {
246 gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
247 "EFER: %"PRIx64"\n", msr_content);
248 goto gp_fault;
249 }
251 #ifdef __x86_64__
252 /* LME: 0 -> 1 */
253 if ( msr_content & EFER_LME &&
254 !test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
255 {
256 if ( svm_paging_enabled(v) ||
257 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
258 &v->arch.hvm_svm.cpu_state) )
259 {
260 gdprintk(XENLOG_WARNING, "Trying to set LME bit when "
261 "in paging mode or PAE bit is not set\n");
262 goto gp_fault;
263 }
264 set_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state);
265 }
267 /* We have already recorded that we want LME, so it will be set
268 * next time CR0 gets updated. So we clear that bit and continue.
269 */
270 if ((msr_content ^ vmcb->efer) & EFER_LME)
271 msr_content &= ~EFER_LME;
272 /* No update for LME/LMA since it have no effect */
273 #endif
274 vmcb->efer = msr_content | EFER_SVME;
275 break;
277 #ifdef __x86_64__
278 case MSR_FS_BASE:
279 case MSR_GS_BASE:
280 case MSR_SHADOW_GS_BASE:
281 if ( !svm_long_mode_enabled(v) )
282 goto gp_fault;
284 if ( !is_canonical_address(msr_content) )
285 goto uncanonical_address;
287 if ( ecx == MSR_FS_BASE )
288 vmcb->fs.base = msr_content;
289 else if ( ecx == MSR_GS_BASE )
290 vmcb->gs.base = msr_content;
291 else
292 vmcb->kerngsbase = msr_content;
293 break;
294 #endif
296 case MSR_STAR:
297 vmcb->star = msr_content;
298 break;
300 case MSR_LSTAR:
301 case MSR_CSTAR:
302 if ( !is_canonical_address(msr_content) )
303 goto uncanonical_address;
305 if ( ecx == MSR_LSTAR )
306 vmcb->lstar = msr_content;
307 else
308 vmcb->cstar = msr_content;
309 break;
311 case MSR_SYSCALL_MASK:
312 vmcb->sfmask = msr_content;
313 break;
315 default:
316 return 0;
317 }
319 return 1;
321 uncanonical_address:
322 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write %x\n", ecx);
323 gp_fault:
324 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
325 return 0;
326 }
329 #define loaddebug(_v,_reg) \
330 __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
331 #define savedebug(_v,_reg) \
332 __asm__ __volatile__ ("mov %%db" #_reg ",%0" : : "r" ((_v)->debugreg[_reg]))
334 static inline void svm_save_dr(struct vcpu *v)
335 {
336 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
338 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
339 return;
341 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
342 v->arch.hvm_vcpu.flag_dr_dirty = 0;
343 v->arch.hvm_svm.vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES;
345 savedebug(&v->arch.guest_context, 0);
346 savedebug(&v->arch.guest_context, 1);
347 savedebug(&v->arch.guest_context, 2);
348 savedebug(&v->arch.guest_context, 3);
349 v->arch.guest_context.debugreg[6] = vmcb->dr6;
350 v->arch.guest_context.debugreg[7] = vmcb->dr7;
351 }
354 static inline void __restore_debug_registers(struct vcpu *v)
355 {
356 loaddebug(&v->arch.guest_context, 0);
357 loaddebug(&v->arch.guest_context, 1);
358 loaddebug(&v->arch.guest_context, 2);
359 loaddebug(&v->arch.guest_context, 3);
360 /* DR6 and DR7 are loaded from the VMCB. */
361 }
364 int svm_vmcs_save(struct vcpu *v, struct hvm_hw_cpu *c)
365 {
366 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
368 c->eip = vmcb->rip;
370 #ifdef HVM_DEBUG_SUSPEND
371 printk("%s: eip=0x%"PRIx64".\n",
372 __func__,
373 inst_len, c->eip);
374 #endif
376 c->esp = vmcb->rsp;
377 c->eflags = vmcb->rflags;
379 c->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
380 c->cr3 = v->arch.hvm_svm.cpu_cr3;
381 c->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
383 #ifdef HVM_DEBUG_SUSPEND
384 printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
385 __func__,
386 c->cr3,
387 c->cr0,
388 c->cr4);
389 #endif
391 c->idtr_limit = vmcb->idtr.limit;
392 c->idtr_base = vmcb->idtr.base;
394 c->gdtr_limit = vmcb->gdtr.limit;
395 c->gdtr_base = vmcb->gdtr.base;
397 c->cs_sel = vmcb->cs.sel;
398 c->cs_limit = vmcb->cs.limit;
399 c->cs_base = vmcb->cs.base;
400 c->cs_arbytes = vmcb->cs.attr.bytes;
402 c->ds_sel = vmcb->ds.sel;
403 c->ds_limit = vmcb->ds.limit;
404 c->ds_base = vmcb->ds.base;
405 c->ds_arbytes = vmcb->ds.attr.bytes;
407 c->es_sel = vmcb->es.sel;
408 c->es_limit = vmcb->es.limit;
409 c->es_base = vmcb->es.base;
410 c->es_arbytes = vmcb->es.attr.bytes;
412 c->ss_sel = vmcb->ss.sel;
413 c->ss_limit = vmcb->ss.limit;
414 c->ss_base = vmcb->ss.base;
415 c->ss_arbytes = vmcb->ss.attr.bytes;
417 c->fs_sel = vmcb->fs.sel;
418 c->fs_limit = vmcb->fs.limit;
419 c->fs_base = vmcb->fs.base;
420 c->fs_arbytes = vmcb->fs.attr.bytes;
422 c->gs_sel = vmcb->gs.sel;
423 c->gs_limit = vmcb->gs.limit;
424 c->gs_base = vmcb->gs.base;
425 c->gs_arbytes = vmcb->gs.attr.bytes;
427 c->tr_sel = vmcb->tr.sel;
428 c->tr_limit = vmcb->tr.limit;
429 c->tr_base = vmcb->tr.base;
430 c->tr_arbytes = vmcb->tr.attr.bytes;
432 c->ldtr_sel = vmcb->ldtr.sel;
433 c->ldtr_limit = vmcb->ldtr.limit;
434 c->ldtr_base = vmcb->ldtr.base;
435 c->ldtr_arbytes = vmcb->ldtr.attr.bytes;
437 c->sysenter_cs = vmcb->sysenter_cs;
438 c->sysenter_esp = vmcb->sysenter_esp;
439 c->sysenter_eip = vmcb->sysenter_eip;
441 return 1;
442 }
445 int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
446 {
447 unsigned long mfn, old_base_mfn;
448 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
450 vmcb->rip = c->eip;
451 vmcb->rsp = c->esp;
452 vmcb->rflags = c->eflags;
454 v->arch.hvm_svm.cpu_shadow_cr0 = c->cr0;
456 #ifdef HVM_DEBUG_SUSPEND
457 printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
458 __func__,
459 c->cr3,
460 c->cr0,
461 c->cr4);
462 #endif
464 if (!svm_paging_enabled(v)) {
465 printk("%s: paging not enabled.", __func__);
466 goto skip_cr3;
467 }
469 if (c->cr3 == v->arch.hvm_svm.cpu_cr3) {
470 /*
471 * This is simple TLB flush, implying the guest has
472 * removed some translation or changed page attributes.
473 * We simply invalidate the shadow.
474 */
475 mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
476 if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
477 goto bad_cr3;
478 }
479 } else {
480 /*
481 * If different, make a shadow. Check if the PDBR is valid
482 * first.
483 */
484 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64"", c->cr3);
485 /* current!=vcpu as not called by arch_vmx_do_launch */
486 mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
487 if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
488 goto bad_cr3;
489 }
490 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
491 v->arch.guest_table = pagetable_from_pfn(mfn);
492 if (old_base_mfn)
493 put_page(mfn_to_page(old_base_mfn));
494 /*
495 * arch.shadow_table should now hold the next CR3 for shadow
496 */
497 v->arch.hvm_svm.cpu_cr3 = c->cr3;
498 }
500 skip_cr3:
501 #if defined(__x86_64__) && 0
502 if (vmx_long_mode_enabled(v)) {
503 unsigned long vm_entry_value;
504 vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
505 vm_entry_value |= VM_ENTRY_IA32E_MODE;
506 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
507 }
508 #endif
510 vmcb->cr4 = c->cr4 | SVM_CR4_HOST_MASK;
511 v->arch.hvm_svm.cpu_shadow_cr4 = c->cr4;
513 vmcb->idtr.limit = c->idtr_limit;
514 vmcb->idtr.base = c->idtr_base;
516 vmcb->gdtr.limit = c->gdtr_limit;
517 vmcb->gdtr.base = c->gdtr_base;
519 vmcb->cs.sel = c->cs_sel;
520 vmcb->cs.limit = c->cs_limit;
521 vmcb->cs.base = c->cs_base;
522 vmcb->cs.attr.bytes = c->cs_arbytes;
524 vmcb->ds.sel = c->ds_sel;
525 vmcb->ds.limit = c->ds_limit;
526 vmcb->ds.base = c->ds_base;
527 vmcb->ds.attr.bytes = c->ds_arbytes;
529 vmcb->es.sel = c->es_sel;
530 vmcb->es.limit = c->es_limit;
531 vmcb->es.base = c->es_base;
532 vmcb->es.attr.bytes = c->es_arbytes;
534 vmcb->ss.sel = c->ss_sel;
535 vmcb->ss.limit = c->ss_limit;
536 vmcb->ss.base = c->ss_base;
537 vmcb->ss.attr.bytes = c->ss_arbytes;
539 vmcb->fs.sel = c->fs_sel;
540 vmcb->fs.limit = c->fs_limit;
541 vmcb->fs.base = c->fs_base;
542 vmcb->fs.attr.bytes = c->fs_arbytes;
544 vmcb->gs.sel = c->gs_sel;
545 vmcb->gs.limit = c->gs_limit;
546 vmcb->gs.base = c->gs_base;
547 vmcb->gs.attr.bytes = c->gs_arbytes;
549 vmcb->tr.sel = c->tr_sel;
550 vmcb->tr.limit = c->tr_limit;
551 vmcb->tr.base = c->tr_base;
552 vmcb->tr.attr.bytes = c->tr_arbytes;
554 vmcb->ldtr.sel = c->ldtr_sel;
555 vmcb->ldtr.limit = c->ldtr_limit;
556 vmcb->ldtr.base = c->ldtr_base;
557 vmcb->ldtr.attr.bytes = c->ldtr_arbytes;
559 vmcb->sysenter_cs = c->sysenter_cs;
560 vmcb->sysenter_esp = c->sysenter_esp;
561 vmcb->sysenter_eip = c->sysenter_eip;
563 shadow_update_paging_modes(v);
564 return 0;
566 bad_cr3:
567 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"", c->cr3);
568 return -EINVAL;
569 }
572 void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
573 {
574 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
576 data->shadow_gs = vmcb->kerngsbase;
577 /* MSR_LSTAR, MSR_STAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_EFER */
578 data->msr_items[0] = vmcb->lstar;
579 data->msr_items[1] = vmcb->star;
580 data->msr_items[2] = vmcb->cstar;
581 data->msr_items[3] = vmcb->sfmask;
582 data->msr_items[4] = vmcb->efer;
584 data->tsc = hvm_get_guest_time(v);
586 // dump_msr_state(guest_state);
587 }
590 void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
591 {
592 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
594 vmcb->kerngsbase = data->shadow_gs;
595 /* MSR_LSTAR, MSR_STAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_EFER */
596 vmcb->lstar = data->msr_items[0];
597 vmcb->star = data->msr_items[1];
598 vmcb->cstar = data->msr_items[2];
599 vmcb->sfmask = data->msr_items[3];
600 vmcb->efer = data->msr_items[4];
602 hvm_set_guest_time(v, data->tsc);
604 // dump_msr_state(guest_state);
605 }
607 void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
608 {
609 svm_save_cpu_state(v, ctxt);
610 svm_vmcs_save(v, ctxt);
611 }
613 int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
614 {
615 svm_load_cpu_state(v, ctxt);
616 if (svm_vmcb_restore(v, ctxt)) {
617 printk("svm_vmcb restore failed!\n");
618 domain_crash(v->domain);
619 return -EINVAL;
620 }
622 return 0;
623 }
626 static inline void svm_restore_dr(struct vcpu *v)
627 {
628 if ( unlikely(v->arch.guest_context.debugreg[7] & 0xFF) )
629 __restore_debug_registers(v);
630 }
633 static int svm_realmode(struct vcpu *v)
634 {
635 unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
636 unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
638 return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
639 }
641 static int svm_guest_x86_mode(struct vcpu *v)
642 {
643 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
645 if ( (vmcb->efer & EFER_LMA) && vmcb->cs.attr.fields.l )
646 return 8;
648 if ( svm_realmode(v) )
649 return 2;
651 return (vmcb->cs.attr.fields.db ? 4 : 2);
652 }
654 void svm_update_host_cr3(struct vcpu *v)
655 {
656 /* SVM doesn't have a HOST_CR3 equivalent to update. */
657 }
659 void svm_update_guest_cr3(struct vcpu *v)
660 {
661 v->arch.hvm_svm.vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
662 }
664 static void svm_update_vtpr(struct vcpu *v, unsigned long value)
665 {
666 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
668 vmcb->vintr.fields.tpr = value & 0x0f;
669 }
671 unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
672 {
673 switch ( num )
674 {
675 case 0:
676 return v->arch.hvm_svm.cpu_shadow_cr0;
677 case 2:
678 return v->arch.hvm_svm.cpu_cr2;
679 case 3:
680 return v->arch.hvm_svm.cpu_cr3;
681 case 4:
682 return v->arch.hvm_svm.cpu_shadow_cr4;
683 default:
684 BUG();
685 }
686 return 0; /* dummy */
687 }
689 static unsigned long svm_get_segment_base(struct vcpu *v, enum x86_segment seg)
690 {
691 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
692 int long_mode = 0;
694 #ifdef __x86_64__
695 long_mode = vmcb->cs.attr.fields.l && (vmcb->efer & EFER_LMA);
696 #endif
697 switch ( seg )
698 {
699 case x86_seg_cs: return long_mode ? 0 : vmcb->cs.base;
700 case x86_seg_ds: return long_mode ? 0 : vmcb->ds.base;
701 case x86_seg_es: return long_mode ? 0 : vmcb->es.base;
702 case x86_seg_fs: return vmcb->fs.base;
703 case x86_seg_gs: return vmcb->gs.base;
704 case x86_seg_ss: return long_mode ? 0 : vmcb->ss.base;
705 case x86_seg_tr: return vmcb->tr.base;
706 case x86_seg_gdtr: return vmcb->gdtr.base;
707 case x86_seg_idtr: return vmcb->idtr.base;
708 case x86_seg_ldtr: return vmcb->ldtr.base;
709 }
710 BUG();
711 return 0;
712 }
714 static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
715 struct segment_register *reg)
716 {
717 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
718 switch ( seg )
719 {
720 case x86_seg_cs: memcpy(reg, &vmcb->cs, sizeof(*reg)); break;
721 case x86_seg_ds: memcpy(reg, &vmcb->ds, sizeof(*reg)); break;
722 case x86_seg_es: memcpy(reg, &vmcb->es, sizeof(*reg)); break;
723 case x86_seg_fs: memcpy(reg, &vmcb->fs, sizeof(*reg)); break;
724 case x86_seg_gs: memcpy(reg, &vmcb->gs, sizeof(*reg)); break;
725 case x86_seg_ss: memcpy(reg, &vmcb->ss, sizeof(*reg)); break;
726 case x86_seg_tr: memcpy(reg, &vmcb->tr, sizeof(*reg)); break;
727 case x86_seg_gdtr: memcpy(reg, &vmcb->gdtr, sizeof(*reg)); break;
728 case x86_seg_idtr: memcpy(reg, &vmcb->idtr, sizeof(*reg)); break;
729 case x86_seg_ldtr: memcpy(reg, &vmcb->ldtr, sizeof(*reg)); break;
730 default: BUG();
731 }
732 }
734 /* Make sure that xen intercepts any FP accesses from current */
735 static void svm_stts(struct vcpu *v)
736 {
737 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
739 /*
740 * If the guest does not have TS enabled then we must cause and handle an
741 * exception on first use of the FPU. If the guest *does* have TS enabled
742 * then this is not necessary: no FPU activity can occur until the guest
743 * clears CR0.TS, and we will initialise the FPU when that happens.
744 */
745 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
746 {
747 v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
748 vmcb->cr0 |= X86_CR0_TS;
749 }
750 }
753 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
754 {
755 v->arch.hvm_svm.vmcb->tsc_offset = offset;
756 }
759 static void svm_init_ap_context(
760 struct vcpu_guest_context *ctxt, int vcpuid, int trampoline_vector)
761 {
762 memset(ctxt, 0, sizeof(*ctxt));
764 /*
765 * We execute the trampoline code in real mode. The trampoline vector
766 * passed to us is page alligned and is the physicall frame number for
767 * the code. We will execute this code in real mode.
768 */
769 ctxt->user_regs.eip = 0x0;
770 ctxt->user_regs.cs = (trampoline_vector << 8);
771 }
773 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
774 {
775 char *p;
776 int i;
778 memset(hypercall_page, 0, PAGE_SIZE);
780 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
781 {
782 p = (char *)(hypercall_page + (i * 32));
783 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
784 *(u32 *)(p + 1) = i;
785 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
786 *(u8 *)(p + 6) = 0x01;
787 *(u8 *)(p + 7) = 0xd9;
788 *(u8 *)(p + 8) = 0xc3; /* ret */
789 }
791 /* Don't support HYPERVISOR_iret at the moment */
792 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
793 }
796 int svm_dbg_on = 0;
798 static inline int svm_do_debugout(unsigned long exit_code)
799 {
800 int i;
802 static unsigned long counter = 0;
803 static unsigned long works[] =
804 {
805 VMEXIT_IOIO,
806 VMEXIT_HLT,
807 VMEXIT_CPUID,
808 VMEXIT_DR0_READ,
809 VMEXIT_DR1_READ,
810 VMEXIT_DR2_READ,
811 VMEXIT_DR3_READ,
812 VMEXIT_DR6_READ,
813 VMEXIT_DR7_READ,
814 VMEXIT_DR0_WRITE,
815 VMEXIT_DR1_WRITE,
816 VMEXIT_DR2_WRITE,
817 VMEXIT_DR3_WRITE,
818 VMEXIT_CR0_READ,
819 VMEXIT_CR0_WRITE,
820 VMEXIT_CR3_READ,
821 VMEXIT_CR4_READ,
822 VMEXIT_MSR,
823 VMEXIT_CR0_WRITE,
824 VMEXIT_CR3_WRITE,
825 VMEXIT_CR4_WRITE,
826 VMEXIT_EXCEPTION_PF,
827 VMEXIT_INTR,
828 VMEXIT_INVLPG,
829 VMEXIT_EXCEPTION_NM
830 };
833 #if 0
834 if (svm_dbg_on && exit_code != 0x7B)
835 return 1;
836 #endif
838 counter++;
840 #if 0
841 if ((exit_code == 0x4E
842 || exit_code == VMEXIT_CR0_READ
843 || exit_code == VMEXIT_CR0_WRITE)
844 && counter < 200000)
845 return 0;
847 if ((exit_code == 0x4E) && counter < 500000)
848 return 0;
849 #endif
851 for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
852 if (exit_code == works[i])
853 return 0;
855 return 1;
856 }
858 static void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
859 {
860 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
862 ASSERT(vmcb);
864 ctxt->eax = vmcb->rax;
865 ctxt->ss = vmcb->ss.sel;
866 ctxt->esp = vmcb->rsp;
867 ctxt->eflags = vmcb->rflags;
868 ctxt->cs = vmcb->cs.sel;
869 ctxt->eip = vmcb->rip;
871 ctxt->gs = vmcb->gs.sel;
872 ctxt->fs = vmcb->fs.sel;
873 ctxt->es = vmcb->es.sel;
874 ctxt->ds = vmcb->ds.sel;
875 }
877 static void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
878 {
879 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
881 regs->eip = vmcb->rip;
882 regs->esp = vmcb->rsp;
883 regs->eflags = vmcb->rflags;
884 regs->cs = vmcb->cs.sel;
885 regs->ds = vmcb->ds.sel;
886 regs->es = vmcb->es.sel;
887 regs->ss = vmcb->ss.sel;
888 }
890 /* XXX Use svm_load_cpu_guest_regs instead */
891 static void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
892 {
893 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
894 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
896 /* Write the guest register value into VMCB */
897 vmcb->rax = regs->eax;
898 vmcb->ss.sel = regs->ss;
899 vmcb->rsp = regs->esp;
900 vmcb->rflags = regs->eflags | 2UL;
901 vmcb->cs.sel = regs->cs;
902 vmcb->rip = regs->eip;
903 if (regs->eflags & EF_TF)
904 *intercepts |= EXCEPTION_BITMAP_DB;
905 else
906 *intercepts &= ~EXCEPTION_BITMAP_DB;
907 }
909 static void svm_load_cpu_guest_regs(
910 struct vcpu *v, struct cpu_user_regs *regs)
911 {
912 svm_load_cpu_user_regs(v, regs);
913 }
915 static void arch_svm_do_launch(struct vcpu *v)
916 {
917 svm_do_launch(v);
919 if ( v->vcpu_id != 0 )
920 {
921 cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
922 u16 cs_sel = regs->cs;
923 /*
924 * This is the launch of an AP; set state so that we begin executing
925 * the trampoline code in real-mode.
926 */
927 svm_do_vmmcall_reset_to_realmode(v, regs);
928 /* Adjust the state to execute the trampoline code.*/
929 v->arch.hvm_svm.vmcb->rip = 0;
930 v->arch.hvm_svm.vmcb->cs.sel= cs_sel;
931 v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4);
932 }
934 reset_stack_and_jump(svm_asm_do_launch);
935 }
937 static void svm_ctxt_switch_from(struct vcpu *v)
938 {
939 svm_save_dr(v);
940 }
942 static void svm_ctxt_switch_to(struct vcpu *v)
943 {
944 #ifdef __x86_64__
945 /*
946 * This is required, because VMRUN does consistency check
947 * and some of the DOM0 selectors are pointing to
948 * invalid GDT locations, and cause AMD processors
949 * to shutdown.
950 */
951 set_segment_register(ds, 0);
952 set_segment_register(es, 0);
953 set_segment_register(ss, 0);
954 #endif
955 svm_restore_dr(v);
956 }
958 static int svm_vcpu_initialise(struct vcpu *v)
959 {
960 int rc;
962 v->arch.schedule_tail = arch_svm_do_launch;
963 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
964 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
966 v->arch.hvm_svm.saved_irq_vector = -1;
968 if ( (rc = svm_create_vmcb(v)) != 0 )
969 {
970 dprintk(XENLOG_WARNING,
971 "Failed to create VMCB for vcpu %d: err=%d.\n",
972 v->vcpu_id, rc);
973 return rc;
974 }
976 return 0;
977 }
979 static void svm_vcpu_destroy(struct vcpu *v)
980 {
981 svm_destroy_vmcb(v);
982 }
984 static void svm_hvm_inject_exception(
985 unsigned int trapnr, int errcode, unsigned long cr2)
986 {
987 struct vcpu *v = current;
988 svm_inject_exception(v, trapnr, (errcode != -1), errcode);
989 if ( trapnr == TRAP_page_fault )
990 v->arch.hvm_svm.vmcb->cr2 = v->arch.hvm_svm.cpu_cr2 = cr2;
991 }
993 int start_svm(void)
994 {
995 u32 eax, ecx, edx;
996 u32 phys_hsa_lo, phys_hsa_hi;
997 u64 phys_hsa;
998 int cpu = smp_processor_id();
1000 /* Xen does not fill x86_capability words except 0. */
1001 ecx = cpuid_ecx(0x80000001);
1002 boot_cpu_data.x86_capability[5] = ecx;
1004 if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
1005 return 0;
1007 /* check whether SVM feature is disabled in BIOS */
1008 rdmsr(MSR_K8_VM_CR, eax, edx);
1009 if ( eax & K8_VMCR_SVME_DISABLE )
1011 printk("AMD SVM Extension is disabled in BIOS.\n");
1012 return 0;
1015 if (!(hsa[cpu] = alloc_host_save_area()))
1016 return 0;
1018 rdmsr(MSR_EFER, eax, edx);
1019 eax |= EFER_SVME;
1020 wrmsr(MSR_EFER, eax, edx);
1021 printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
1023 /* Initialize the HSA for this core */
1024 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
1025 phys_hsa_lo = (u32) phys_hsa;
1026 phys_hsa_hi = (u32) (phys_hsa >> 32);
1027 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
1029 if (!(root_vmcb[cpu] = alloc_vmcb()))
1030 return 0;
1031 root_vmcb_pa[cpu] = virt_to_maddr(root_vmcb[cpu]);
1033 if (cpu == 0)
1034 setup_vmcb_dump();
1036 /* Setup HVM interfaces */
1037 hvm_funcs.disable = stop_svm;
1039 hvm_funcs.vcpu_initialise = svm_vcpu_initialise;
1040 hvm_funcs.vcpu_destroy = svm_vcpu_destroy;
1042 hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
1043 hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
1045 hvm_funcs.save_cpu_ctxt = svm_save_vmcb_ctxt;
1046 hvm_funcs.load_cpu_ctxt = svm_load_vmcb_ctxt;
1048 hvm_funcs.paging_enabled = svm_paging_enabled;
1049 hvm_funcs.long_mode_enabled = svm_long_mode_enabled;
1050 hvm_funcs.pae_enabled = svm_pae_enabled;
1051 hvm_funcs.guest_x86_mode = svm_guest_x86_mode;
1052 hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
1053 hvm_funcs.get_segment_base = svm_get_segment_base;
1054 hvm_funcs.get_segment_register = svm_get_segment_register;
1056 hvm_funcs.update_host_cr3 = svm_update_host_cr3;
1057 hvm_funcs.update_guest_cr3 = svm_update_guest_cr3;
1059 hvm_funcs.update_vtpr = svm_update_vtpr;
1061 hvm_funcs.stts = svm_stts;
1062 hvm_funcs.set_tsc_offset = svm_set_tsc_offset;
1064 hvm_funcs.inject_exception = svm_hvm_inject_exception;
1066 hvm_funcs.init_ap_context = svm_init_ap_context;
1067 hvm_funcs.init_hypercall_page = svm_init_hypercall_page;
1069 hvm_enable();
1071 return 1;
1074 void arch_svm_do_resume(struct vcpu *v)
1076 /* pinning VCPU to a different core? */
1077 if ( v->arch.hvm_svm.launch_core == smp_processor_id()) {
1078 hvm_do_resume( v );
1079 reset_stack_and_jump( svm_asm_do_resume );
1081 else {
1082 if (svm_dbg_on)
1083 printk("VCPU core pinned: %d to %d\n",
1084 v->arch.hvm_svm.launch_core, smp_processor_id() );
1085 v->arch.hvm_svm.launch_core = smp_processor_id();
1086 hvm_migrate_timers( v );
1087 hvm_do_resume( v );
1088 reset_stack_and_jump( svm_asm_do_resume );
1092 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
1094 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1095 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
1096 va, (unsigned long)current->arch.hvm_svm.vmcb->rip,
1097 (unsigned long)regs->error_code);
1098 return shadow_fault(va, regs);
1102 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
1104 struct vcpu *v = current;
1106 setup_fpu(v);
1107 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1109 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
1110 vmcb->cr0 &= ~X86_CR0_TS;
1114 static void svm_do_general_protection_fault(struct vcpu *v,
1115 struct cpu_user_regs *regs)
1117 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1118 unsigned long eip, error_code;
1120 ASSERT(vmcb);
1122 eip = vmcb->rip;
1123 error_code = vmcb->exitinfo1;
1125 if (vmcb->idtr.limit == 0) {
1126 printk("Huh? We got a GP Fault with an invalid IDTR!\n");
1127 svm_dump_vmcb(__func__, vmcb);
1128 svm_dump_regs(__func__, regs);
1129 svm_dump_inst(svm_rip2pointer(vmcb));
1130 domain_crash(v->domain);
1131 return;
1134 HVM_DBG_LOG(DBG_LEVEL_1,
1135 "svm_general_protection_fault: eip = %lx, erro_code = %lx",
1136 eip, error_code);
1138 HVM_DBG_LOG(DBG_LEVEL_1,
1139 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1140 (unsigned long)regs->eax, (unsigned long)regs->ebx,
1141 (unsigned long)regs->ecx, (unsigned long)regs->edx,
1142 (unsigned long)regs->esi, (unsigned long)regs->edi);
1144 /* Reflect it back into the guest */
1145 svm_inject_exception(v, TRAP_gp_fault, 1, error_code);
1148 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
1149 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
1150 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
1151 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
1153 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb,
1154 struct cpu_user_regs *regs)
1156 unsigned long input = regs->eax;
1157 unsigned int eax, ebx, ecx, edx;
1158 struct vcpu *v = current;
1159 int inst_len;
1161 ASSERT(vmcb);
1163 hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
1165 if ( input == 0x00000001 )
1167 /* Clear out reserved bits. */
1168 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
1169 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
1171 /* Guest should only see one logical processor.
1172 * See details on page 23 of AMD CPUID Specification.
1173 */
1174 clear_bit(X86_FEATURE_HT & 31, &edx); /* clear the hyperthread bit */
1175 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
1176 ebx |= 0x00010000; /* set to 1 just for precaution */
1178 else if ( input == 0x80000001 )
1180 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
1181 clear_bit(X86_FEATURE_APIC & 31, &edx);
1183 #if CONFIG_PAGING_LEVELS >= 3
1184 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
1185 #endif
1186 clear_bit(X86_FEATURE_PAE & 31, &edx);
1188 clear_bit(X86_FEATURE_PSE36 & 31, &edx);
1190 /* Clear the Cmp_Legacy bit
1191 * This bit is supposed to be zero when HTT = 0.
1192 * See details on page 23 of AMD CPUID Specification.
1193 */
1194 clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx);
1196 /* Make SVM feature invisible to the guest. */
1197 clear_bit(X86_FEATURE_SVME & 31, &ecx);
1199 /* So far, we do not support 3DNow for the guest. */
1200 clear_bit(X86_FEATURE_3DNOW & 31, &edx);
1201 clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx);
1202 /* no FFXSR instructions feature. */
1203 clear_bit(X86_FEATURE_FFXSR & 31, &edx);
1205 else if ( input == 0x80000007 || input == 0x8000000A )
1207 /* Mask out features of power management and SVM extension. */
1208 eax = ebx = ecx = edx = 0;
1210 else if ( input == 0x80000008 )
1212 /* Make sure Number of CPU core is 1 when HTT=0 */
1213 ecx &= 0xFFFFFF00;
1216 regs->eax = (unsigned long)eax;
1217 regs->ebx = (unsigned long)ebx;
1218 regs->ecx = (unsigned long)ecx;
1219 regs->edx = (unsigned long)edx;
1221 inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
1222 ASSERT(inst_len > 0);
1223 __update_guest_eip(vmcb, inst_len);
1226 static inline unsigned long *get_reg_p(unsigned int gpreg,
1227 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1229 unsigned long *reg_p = NULL;
1230 switch (gpreg)
1232 case SVM_REG_EAX:
1233 reg_p = (unsigned long *)&regs->eax;
1234 break;
1235 case SVM_REG_EBX:
1236 reg_p = (unsigned long *)&regs->ebx;
1237 break;
1238 case SVM_REG_ECX:
1239 reg_p = (unsigned long *)&regs->ecx;
1240 break;
1241 case SVM_REG_EDX:
1242 reg_p = (unsigned long *)&regs->edx;
1243 break;
1244 case SVM_REG_EDI:
1245 reg_p = (unsigned long *)&regs->edi;
1246 break;
1247 case SVM_REG_ESI:
1248 reg_p = (unsigned long *)&regs->esi;
1249 break;
1250 case SVM_REG_EBP:
1251 reg_p = (unsigned long *)&regs->ebp;
1252 break;
1253 case SVM_REG_ESP:
1254 reg_p = (unsigned long *)&vmcb->rsp;
1255 break;
1256 #ifdef __x86_64__
1257 case SVM_REG_R8:
1258 reg_p = (unsigned long *)&regs->r8;
1259 break;
1260 case SVM_REG_R9:
1261 reg_p = (unsigned long *)&regs->r9;
1262 break;
1263 case SVM_REG_R10:
1264 reg_p = (unsigned long *)&regs->r10;
1265 break;
1266 case SVM_REG_R11:
1267 reg_p = (unsigned long *)&regs->r11;
1268 break;
1269 case SVM_REG_R12:
1270 reg_p = (unsigned long *)&regs->r12;
1271 break;
1272 case SVM_REG_R13:
1273 reg_p = (unsigned long *)&regs->r13;
1274 break;
1275 case SVM_REG_R14:
1276 reg_p = (unsigned long *)&regs->r14;
1277 break;
1278 case SVM_REG_R15:
1279 reg_p = (unsigned long *)&regs->r15;
1280 break;
1281 #endif
1282 default:
1283 BUG();
1286 return reg_p;
1290 static inline unsigned long get_reg(unsigned int gpreg,
1291 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1293 unsigned long *gp;
1294 gp = get_reg_p(gpreg, regs, vmcb);
1295 return *gp;
1299 static inline void set_reg(unsigned int gpreg, unsigned long value,
1300 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1302 unsigned long *gp;
1303 gp = get_reg_p(gpreg, regs, vmcb);
1304 *gp = value;
1308 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
1310 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1312 v->arch.hvm_vcpu.flag_dr_dirty = 1;
1314 __restore_debug_registers(v);
1316 /* allow the guest full access to the debug registers */
1317 vmcb->dr_intercepts = 0;
1321 static void svm_get_prefix_info(
1322 struct vmcb_struct *vmcb,
1323 unsigned int dir, svm_segment_register_t **seg, unsigned int *asize)
1325 unsigned char inst[MAX_INST_LEN];
1326 int i;
1328 memset(inst, 0, MAX_INST_LEN);
1329 if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
1330 != MAX_INST_LEN)
1332 gdprintk(XENLOG_ERR, "get guest instruction failed\n");
1333 domain_crash(current->domain);
1334 return;
1337 for (i = 0; i < MAX_INST_LEN; i++)
1339 switch (inst[i])
1341 case 0xf3: /* REPZ */
1342 case 0xf2: /* REPNZ */
1343 case 0xf0: /* LOCK */
1344 case 0x66: /* data32 */
1345 #ifdef __x86_64__
1346 /* REX prefixes */
1347 case 0x40:
1348 case 0x41:
1349 case 0x42:
1350 case 0x43:
1351 case 0x44:
1352 case 0x45:
1353 case 0x46:
1354 case 0x47:
1356 case 0x48:
1357 case 0x49:
1358 case 0x4a:
1359 case 0x4b:
1360 case 0x4c:
1361 case 0x4d:
1362 case 0x4e:
1363 case 0x4f:
1364 #endif
1365 continue;
1366 case 0x67: /* addr32 */
1367 *asize ^= 48; /* Switch 16/32 bits */
1368 continue;
1369 case 0x2e: /* CS */
1370 *seg = &vmcb->cs;
1371 continue;
1372 case 0x36: /* SS */
1373 *seg = &vmcb->ss;
1374 continue;
1375 case 0x26: /* ES */
1376 *seg = &vmcb->es;
1377 continue;
1378 case 0x64: /* FS */
1379 *seg = &vmcb->fs;
1380 continue;
1381 case 0x65: /* GS */
1382 *seg = &vmcb->gs;
1383 continue;
1384 case 0x3e: /* DS */
1385 *seg = &vmcb->ds;
1386 continue;
1387 default:
1388 break;
1390 return;
1395 /* Get the address of INS/OUTS instruction */
1396 static inline int svm_get_io_address(
1397 struct vcpu *v, struct cpu_user_regs *regs,
1398 unsigned int size, ioio_info_t info,
1399 unsigned long *count, unsigned long *addr)
1401 unsigned long reg;
1402 unsigned int asize, isize;
1403 int long_mode = 0;
1404 svm_segment_register_t *seg = NULL;
1405 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1407 #ifdef __x86_64__
1408 /* If we're in long mode, we shouldn't check the segment presence & limit */
1409 long_mode = vmcb->cs.attr.fields.l && vmcb->efer & EFER_LMA;
1410 #endif
1412 /* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit.
1413 * l field combined with EFER_LMA says whether it's 16 or 64 bit.
1414 */
1415 asize = (long_mode)?64:((vmcb->cs.attr.fields.db)?32:16);
1418 /* The ins/outs instructions are single byte, so if we have got more
1419 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1420 * to figure out what it is...
1421 */
1422 isize = vmcb->exitinfo2 - vmcb->rip;
1424 if (info.fields.rep)
1425 isize --;
1427 if (isize > 1)
1428 svm_get_prefix_info(vmcb, info.fields.type, &seg, &asize);
1430 if (info.fields.type == IOREQ_WRITE)
1432 reg = regs->esi;
1433 if (!seg) /* If no prefix, used DS. */
1434 seg = &vmcb->ds;
1435 if (!long_mode && (seg->attr.fields.type & 0xa) == 0x8) {
1436 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1437 return 0;
1440 else
1442 reg = regs->edi;
1443 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1444 if (!long_mode && (seg->attr.fields.type & 0xa) != 0x2) {
1445 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1446 return 0;
1450 /* If the segment isn't present, give GP fault! */
1451 if (!long_mode && !seg->attr.fields.p)
1453 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1454 return 0;
1457 if (asize == 16)
1459 *addr = (reg & 0xFFFF);
1460 *count = regs->ecx & 0xffff;
1462 else
1464 *addr = reg;
1465 *count = regs->ecx;
1467 if (!info.fields.rep)
1468 *count = 1;
1470 if (!long_mode)
1472 ASSERT(*addr == (u32)*addr);
1473 if ((u32)(*addr + size - 1) < (u32)*addr ||
1474 (seg->attr.fields.type & 0xc) != 0x4 ?
1475 *addr + size - 1 > seg->limit :
1476 *addr <= seg->limit)
1478 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1479 return 0;
1482 /* Check the limit for repeated instructions, as above we checked only
1483 the first instance. Truncate the count if a limit violation would
1484 occur. Note that the checking is not necessary for page granular
1485 segments as transfers crossing page boundaries will be broken up
1486 anyway. */
1487 if (!seg->attr.fields.g && *count > 1)
1489 if ((seg->attr.fields.type & 0xc) != 0x4)
1491 /* expand-up */
1492 if (!(regs->eflags & EF_DF))
1494 if (*addr + *count * size - 1 < *addr ||
1495 *addr + *count * size - 1 > seg->limit)
1496 *count = (seg->limit + 1UL - *addr) / size;
1498 else
1500 if (*count - 1 > *addr / size)
1501 *count = *addr / size + 1;
1504 else
1506 /* expand-down */
1507 if (!(regs->eflags & EF_DF))
1509 if (*count - 1 > -(s32)*addr / size)
1510 *count = -(s32)*addr / size + 1UL;
1512 else
1514 if (*addr < (*count - 1) * size ||
1515 *addr - (*count - 1) * size <= seg->limit)
1516 *count = (*addr - seg->limit - 1) / size + 1;
1519 ASSERT(*count);
1522 *addr += seg->base;
1524 #ifdef __x86_64__
1525 else
1527 if (seg == &vmcb->fs || seg == &vmcb->gs)
1528 *addr += seg->base;
1530 if (!is_canonical_address(*addr) ||
1531 !is_canonical_address(*addr + size - 1))
1533 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1534 return 0;
1536 if (*count > (1UL << 48) / size)
1537 *count = (1UL << 48) / size;
1538 if (!(regs->eflags & EF_DF))
1540 if (*addr + *count * size - 1 < *addr ||
1541 !is_canonical_address(*addr + *count * size - 1))
1542 *count = (*addr & ~((1UL << 48) - 1)) / size;
1544 else
1546 if ((*count - 1) * size > *addr ||
1547 !is_canonical_address(*addr + (*count - 1) * size))
1548 *count = (*addr & ~((1UL << 48) - 1)) / size + 1;
1550 ASSERT(*count);
1552 #endif
1554 return 1;
1558 static void svm_io_instruction(struct vcpu *v)
1560 struct cpu_user_regs *regs;
1561 struct hvm_io_op *pio_opp;
1562 unsigned int port;
1563 unsigned int size, dir, df;
1564 ioio_info_t info;
1565 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1567 ASSERT(vmcb);
1568 pio_opp = &current->arch.hvm_vcpu.io_op;
1569 pio_opp->instr = INSTR_PIO;
1570 pio_opp->flags = 0;
1572 regs = &pio_opp->io_context;
1574 /* Copy current guest state into io instruction state structure. */
1575 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1576 hvm_store_cpu_guest_regs(v, regs, NULL);
1578 info.bytes = vmcb->exitinfo1;
1580 port = info.fields.port; /* port used to be addr */
1581 dir = info.fields.type; /* direction */
1582 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1584 if (info.fields.sz32)
1585 size = 4;
1586 else if (info.fields.sz16)
1587 size = 2;
1588 else
1589 size = 1;
1591 HVM_DBG_LOG(DBG_LEVEL_IO,
1592 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1593 "exit_qualification = %"PRIx64,
1594 port, vmcb->cs.sel, vmcb->rip, info.bytes);
1596 /* string instruction */
1597 if (info.fields.str)
1599 unsigned long addr, count;
1600 int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
1602 if (!svm_get_io_address(v, regs, size, info, &count, &addr))
1604 /* We failed to get a valid address, so don't do the IO operation -
1605 * it would just get worse if we do! Hopefully the guest is handing
1606 * gp-faults...
1607 */
1608 return;
1611 /* "rep" prefix */
1612 if (info.fields.rep)
1614 pio_opp->flags |= REPZ;
1617 /*
1618 * Handle string pio instructions that cross pages or that
1619 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1620 */
1621 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1623 unsigned long value = 0;
1625 pio_opp->flags |= OVERLAP;
1626 pio_opp->addr = addr;
1628 if (dir == IOREQ_WRITE) /* OUTS */
1630 if (hvm_paging_enabled(current))
1631 (void)hvm_copy_from_guest_virt(&value, addr, size);
1632 else
1633 (void)hvm_copy_from_guest_phys(&value, addr, size);
1636 if (count == 1)
1637 regs->eip = vmcb->exitinfo2;
1639 send_pio_req(port, 1, size, value, dir, df, 0);
1641 else
1643 unsigned long last_addr = sign > 0 ? addr + count * size - 1
1644 : addr - (count - 1) * size;
1646 if ((addr & PAGE_MASK) != (last_addr & PAGE_MASK))
1648 if (sign > 0)
1649 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1650 else
1651 count = (addr & ~PAGE_MASK) / size + 1;
1653 else
1654 regs->eip = vmcb->exitinfo2;
1656 send_pio_req(port, count, size, addr, dir, df, 1);
1659 else
1661 /*
1662 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1663 * ExitInfo2
1664 */
1665 regs->eip = vmcb->exitinfo2;
1667 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1668 hvm_print_line(v, regs->eax); /* guest debug output */
1670 send_pio_req(port, 1, size, regs->eax, dir, df, 0);
1674 static int svm_set_cr0(unsigned long value)
1676 struct vcpu *v = current;
1677 unsigned long mfn;
1678 int paging_enabled;
1679 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1680 unsigned long old_base_mfn;
1682 ASSERT(vmcb);
1684 /* We don't want to lose PG. ET is reserved and should be always be 1*/
1685 paging_enabled = svm_paging_enabled(v);
1686 value |= X86_CR0_ET;
1687 vmcb->cr0 = value | X86_CR0_PG | X86_CR0_WP;
1688 v->arch.hvm_svm.cpu_shadow_cr0 = value;
1690 /* TS cleared? Then initialise FPU now. */
1691 if ( !(value & X86_CR0_TS) )
1693 setup_fpu(v);
1694 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1697 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1699 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
1701 /* The guest CR3 must be pointing to the guest physical. */
1702 mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
1703 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
1705 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
1706 v->arch.hvm_svm.cpu_cr3, mfn);
1707 domain_crash(v->domain);
1708 return 0;
1711 #if defined(__x86_64__)
1712 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
1713 && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
1714 &v->arch.hvm_svm.cpu_state))
1716 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1717 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1720 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
1722 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1723 set_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
1724 vmcb->efer |= EFER_LMA | EFER_LME;
1726 #endif /* __x86_64__ */
1728 /* Now arch.guest_table points to machine physical. */
1729 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1730 v->arch.guest_table = pagetable_from_pfn(mfn);
1731 if ( old_base_mfn )
1732 put_page(mfn_to_page(old_base_mfn));
1733 shadow_update_paging_modes(v);
1735 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1736 (unsigned long) (mfn << PAGE_SHIFT));
1739 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1740 if ( v->arch.hvm_svm.cpu_cr3 ) {
1741 put_page(mfn_to_page(get_mfn_from_gpfn(
1742 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
1743 v->arch.guest_table = pagetable_null();
1746 /*
1747 * SVM implements paged real-mode and when we return to real-mode
1748 * we revert back to the physical mappings that the domain builder
1749 * created.
1750 */
1751 if ((value & X86_CR0_PE) == 0) {
1752 if (value & X86_CR0_PG) {
1753 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1754 return 0;
1756 shadow_update_paging_modes(v);
1758 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
1760 if ( svm_long_mode_enabled(v) )
1762 vmcb->efer &= ~EFER_LMA;
1763 clear_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
1765 /* we should take care of this kind of situation */
1766 shadow_update_paging_modes(v);
1769 return 1;
1772 /*
1773 * Read from control registers. CR0 and CR4 are read from the shadow.
1774 */
1775 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1777 unsigned long value = 0;
1778 struct vcpu *v = current;
1779 struct vlapic *vlapic = vcpu_vlapic(v);
1780 struct vmcb_struct *vmcb;
1782 vmcb = v->arch.hvm_svm.vmcb;
1783 ASSERT(vmcb);
1785 switch ( cr )
1787 case 0:
1788 value = v->arch.hvm_svm.cpu_shadow_cr0;
1789 if (svm_dbg_on)
1790 printk("CR0 read =%lx \n", value );
1791 break;
1792 case 2:
1793 value = vmcb->cr2;
1794 break;
1795 case 3:
1796 value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
1797 if (svm_dbg_on)
1798 printk("CR3 read =%lx \n", value );
1799 break;
1800 case 4:
1801 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
1802 if (svm_dbg_on)
1803 printk("CR4 read=%lx\n", value);
1804 break;
1805 case 8:
1806 value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
1807 value = (value & 0xF0) >> 4;
1808 break;
1810 default:
1811 domain_crash(v->domain);
1812 return;
1815 set_reg(gp, value, regs, vmcb);
1817 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1821 static inline int svm_pgbit_test(struct vcpu *v)
1823 return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
1827 /*
1828 * Write to control registers
1829 */
1830 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1832 unsigned long value, old_cr, old_base_mfn, mfn;
1833 struct vcpu *v = current;
1834 struct vlapic *vlapic = vcpu_vlapic(v);
1835 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1837 value = get_reg(gpreg, regs, vmcb);
1839 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1840 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1842 switch (cr)
1844 case 0:
1845 if (svm_dbg_on)
1846 printk("CR0 write =%lx \n", value );
1847 return svm_set_cr0(value);
1849 case 3:
1850 if (svm_dbg_on)
1851 printk("CR3 write =%lx \n", value );
1852 /* If paging is not enabled yet, simply copy the value to CR3. */
1853 if (!svm_paging_enabled(v)) {
1854 v->arch.hvm_svm.cpu_cr3 = value;
1855 break;
1858 /* We make a new one if the shadow does not exist. */
1859 if (value == v->arch.hvm_svm.cpu_cr3)
1861 /*
1862 * This is simple TLB flush, implying the guest has
1863 * removed some translation or changed page attributes.
1864 * We simply invalidate the shadow.
1865 */
1866 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1867 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1868 goto bad_cr3;
1869 shadow_update_cr3(v);
1871 else
1873 /*
1874 * If different, make a shadow. Check if the PDBR is valid
1875 * first.
1876 */
1877 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1878 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1879 if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
1880 goto bad_cr3;
1882 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1883 v->arch.guest_table = pagetable_from_pfn(mfn);
1885 if (old_base_mfn)
1886 put_page(mfn_to_page(old_base_mfn));
1888 v->arch.hvm_svm.cpu_cr3 = value;
1889 update_cr3(v);
1890 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1892 break;
1894 case 4: /* CR4 */
1895 if (svm_dbg_on)
1896 printk( "write cr4=%lx, cr0=%lx\n",
1897 value, v->arch.hvm_svm.cpu_shadow_cr0 );
1898 old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
1899 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1901 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1902 if ( svm_pgbit_test(v) )
1904 /* The guest is a 32-bit PAE guest. */
1905 #if CONFIG_PAGING_LEVELS >= 3
1906 unsigned long mfn, old_base_mfn;
1907 mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
1908 if ( !mfn_valid(mfn) ||
1909 !get_page(mfn_to_page(mfn), v->domain) )
1910 goto bad_cr3;
1912 /*
1913 * Now arch.guest_table points to machine physical.
1914 */
1916 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1917 v->arch.guest_table = pagetable_from_pfn(mfn);
1918 if ( old_base_mfn )
1919 put_page(mfn_to_page(old_base_mfn));
1920 shadow_update_paging_modes(v);
1922 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1923 (unsigned long) (mfn << PAGE_SHIFT));
1925 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1926 "Update CR3 value = %lx, mfn = %lx",
1927 v->arch.hvm_svm.cpu_cr3, mfn);
1928 #endif
1931 else if (value & X86_CR4_PAE) {
1932 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1933 } else {
1934 if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
1935 &v->arch.hvm_svm.cpu_state)) {
1936 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
1938 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1941 v->arch.hvm_svm.cpu_shadow_cr4 = value;
1942 vmcb->cr4 = value | SVM_CR4_HOST_MASK;
1944 /*
1945 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1946 * all TLB entries except global entries.
1947 */
1948 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
1949 shadow_update_paging_modes(v);
1950 break;
1952 case 8:
1953 vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
1954 vmcb->vintr.fields.tpr = value & 0x0F;
1955 break;
1957 default:
1958 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1959 domain_crash(v->domain);
1960 return 0;
1963 return 1;
1965 bad_cr3:
1966 gdprintk(XENLOG_ERR, "Invalid CR3\n");
1967 domain_crash(v->domain);
1968 return 0;
1972 #define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
1975 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
1976 struct cpu_user_regs *regs)
1978 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1979 int inst_len = 0;
1980 int index;
1981 unsigned int gpreg;
1982 unsigned long value;
1983 u8 buffer[MAX_INST_LEN];
1984 u8 prefix = 0;
1985 int result = 1;
1986 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1987 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1988 enum instruction_index match;
1990 ASSERT(vmcb);
1992 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1994 /* get index to first actual instruction byte - as we will need to know
1995 where the prefix lives later on */
1996 index = skip_prefix_bytes(buffer, sizeof(buffer));
1998 if ( type == TYPE_MOV_TO_CR )
2000 inst_len = __get_instruction_length_from_list(
2001 vmcb, list_a, ARR_SIZE(list_a), &buffer[index], &match);
2003 else /* type == TYPE_MOV_FROM_CR */
2005 inst_len = __get_instruction_length_from_list(
2006 vmcb, list_b, ARR_SIZE(list_b), &buffer[index], &match);
2009 ASSERT(inst_len > 0);
2011 inst_len += index;
2013 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
2014 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
2015 prefix = buffer[index-1];
2017 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
2019 switch (match)
2021 case INSTR_MOV2CR:
2022 gpreg = decode_src_reg(prefix, buffer[index+2]);
2023 result = mov_to_cr(gpreg, cr, regs);
2024 break;
2026 case INSTR_MOVCR2:
2027 gpreg = decode_src_reg(prefix, buffer[index+2]);
2028 mov_from_cr(cr, gpreg, regs);
2029 break;
2031 case INSTR_CLTS:
2032 /* TS being cleared means that it's time to restore fpu state. */
2033 setup_fpu(current);
2034 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
2035 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
2036 v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
2037 break;
2039 case INSTR_LMSW:
2040 if (svm_dbg_on)
2041 svm_dump_inst(svm_rip2pointer(vmcb));
2043 gpreg = decode_src_reg(prefix, buffer[index+2]);
2044 value = get_reg(gpreg, regs, vmcb) & 0xF;
2046 if (svm_dbg_on)
2047 printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
2048 inst_len);
2050 value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
2052 if (svm_dbg_on)
2053 printk("CR0-LMSW CR0 - New value=%lx\n", value);
2055 result = svm_set_cr0(value);
2056 break;
2058 case INSTR_SMSW:
2059 if (svm_dbg_on)
2060 svm_dump_inst(svm_rip2pointer(vmcb));
2061 value = v->arch.hvm_svm.cpu_shadow_cr0;
2062 gpreg = decode_src_reg(prefix, buffer[index+2]);
2063 set_reg(gpreg, value, regs, vmcb);
2065 if (svm_dbg_on)
2066 printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
2067 inst_len);
2068 break;
2070 default:
2071 BUG();
2074 ASSERT(inst_len);
2076 __update_guest_eip(vmcb, inst_len);
2078 return result;
2081 static inline void svm_do_msr_access(
2082 struct vcpu *v, struct cpu_user_regs *regs)
2084 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2085 int inst_len;
2086 u64 msr_content=0;
2087 u32 ecx = regs->ecx, eax, edx;
2089 ASSERT(vmcb);
2091 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x, exitinfo = %lx",
2092 ecx, (u32)regs->eax, (u32)regs->edx,
2093 (unsigned long)vmcb->exitinfo1);
2095 /* is it a read? */
2096 if (vmcb->exitinfo1 == 0)
2098 switch (ecx) {
2099 case MSR_IA32_TIME_STAMP_COUNTER:
2100 msr_content = hvm_get_guest_time(v);
2101 break;
2102 case MSR_IA32_SYSENTER_CS:
2103 msr_content = vmcb->sysenter_cs;
2104 break;
2105 case MSR_IA32_SYSENTER_ESP:
2106 msr_content = vmcb->sysenter_esp;
2107 break;
2108 case MSR_IA32_SYSENTER_EIP:
2109 msr_content = vmcb->sysenter_eip;
2110 break;
2111 case MSR_IA32_APICBASE:
2112 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
2113 break;
2114 default:
2115 if (long_mode_do_msr_read(regs))
2116 goto done;
2118 if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
2119 rdmsr_safe(ecx, eax, edx) == 0 )
2121 regs->eax = eax;
2122 regs->edx = edx;
2123 goto done;
2125 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
2126 return;
2128 regs->eax = msr_content & 0xFFFFFFFF;
2129 regs->edx = msr_content >> 32;
2131 done:
2132 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
2133 ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
2135 inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
2137 else
2139 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
2141 switch (ecx)
2143 case MSR_IA32_TIME_STAMP_COUNTER:
2144 hvm_set_guest_time(v, msr_content);
2145 pt_reset(v);
2146 break;
2147 case MSR_IA32_SYSENTER_CS:
2148 vmcb->sysenter_cs = msr_content;
2149 break;
2150 case MSR_IA32_SYSENTER_ESP:
2151 vmcb->sysenter_esp = msr_content;
2152 break;
2153 case MSR_IA32_SYSENTER_EIP:
2154 vmcb->sysenter_eip = msr_content;
2155 break;
2156 case MSR_IA32_APICBASE:
2157 vlapic_msr_set(vcpu_vlapic(v), msr_content);
2158 break;
2159 default:
2160 if ( !long_mode_do_msr_write(regs) )
2161 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
2162 break;
2165 inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
2168 __update_guest_eip(vmcb, inst_len);
2172 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
2174 __update_guest_eip(vmcb, 1);
2176 /* Check for interrupt not handled or new interrupt. */
2177 if ( (vmcb->rflags & X86_EFLAGS_IF) &&
2178 (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) )
2179 return;
2181 hvm_hlt(vmcb->rflags);
2185 static void svm_vmexit_do_invd(struct vmcb_struct *vmcb)
2187 int inst_len;
2189 /* Invalidate the cache - we can't really do that safely - maybe we should
2190 * WBINVD, but I think it's just fine to completely ignore it - we should
2191 * have cache-snooping that solves it anyways. -- Mats P.
2192 */
2194 /* Tell the user that we did this - just in case someone runs some really
2195 * weird operating system and wants to know why it's not working...
2196 */
2197 printk("INVD instruction intercepted - ignored\n");
2199 inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL);
2200 __update_guest_eip(vmcb, inst_len);
2206 #ifdef XEN_DEBUGGER
2207 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb,
2208 struct cpu_user_regs *regs)
2210 regs->eip = vmcb->rip;
2211 regs->esp = vmcb->rsp;
2212 regs->eflags = vmcb->rflags;
2214 regs->xcs = vmcb->cs.sel;
2215 regs->xds = vmcb->ds.sel;
2216 regs->xes = vmcb->es.sel;
2217 regs->xfs = vmcb->fs.sel;
2218 regs->xgs = vmcb->gs.sel;
2219 regs->xss = vmcb->ss.sel;
2223 static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
2225 vmcb->ss.sel = regs->xss;
2226 vmcb->rsp = regs->esp;
2227 vmcb->rflags = regs->eflags;
2228 vmcb->cs.sel = regs->xcs;
2229 vmcb->rip = regs->eip;
2231 vmcb->gs.sel = regs->xgs;
2232 vmcb->fs.sel = regs->xfs;
2233 vmcb->es.sel = regs->xes;
2234 vmcb->ds.sel = regs->xds;
2236 #endif
2239 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
2241 struct vcpu *v = current;
2242 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
2243 unsigned long g_vaddr;
2244 int inst_len;
2245 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2247 /*
2248 * Unknown how many bytes the invlpg instruction will take. Use the
2249 * maximum instruction length here
2250 */
2251 if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
2253 gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length);
2254 domain_crash(v->domain);
2255 return;
2258 if (invlpga)
2260 inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
2261 ASSERT(inst_len > 0);
2262 __update_guest_eip(vmcb, inst_len);
2264 /*
2265 * The address is implicit on this instruction. At the moment, we don't
2266 * use ecx (ASID) to identify individual guests pages
2267 */
2268 g_vaddr = regs->eax;
2270 else
2272 /* What about multiple prefix codes? */
2273 prefix = (is_prefix(opcode[0])?opcode[0]:0);
2274 inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
2275 ASSERT(inst_len > 0);
2277 inst_len--;
2278 length -= inst_len;
2280 /*
2281 * Decode memory operand of the instruction including ModRM, SIB, and
2282 * displacement to get effective address and length in bytes. Assume
2283 * the system in either 32- or 64-bit mode.
2284 */
2285 g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix, inst_len,
2286 &opcode[inst_len], &length);
2288 inst_len += length;
2289 __update_guest_eip (vmcb, inst_len);
2292 shadow_invlpg(v, g_vaddr);
2296 /*
2297 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2298 * 16-bit realmode. Basically, this mimics a processor reset.
2300 * returns 0 on success, non-zero otherwise
2301 */
2302 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
2303 struct cpu_user_regs *regs)
2305 struct vmcb_struct *vmcb;
2307 ASSERT(v);
2308 ASSERT(regs);
2310 vmcb = v->arch.hvm_svm.vmcb;
2312 ASSERT(vmcb);
2314 /* clear the vmcb and user regs */
2315 memset(regs, 0, sizeof(struct cpu_user_regs));
2317 /* VMCB Control */
2318 vmcb->tsc_offset = 0;
2320 /* VMCB State */
2321 vmcb->cr0 = X86_CR0_ET | X86_CR0_PG | X86_CR0_WP;
2322 v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
2324 vmcb->cr2 = 0;
2325 vmcb->efer = EFER_SVME;
2327 vmcb->cr4 = SVM_CR4_HOST_MASK;
2328 v->arch.hvm_svm.cpu_shadow_cr4 = 0;
2329 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
2331 /* This will jump to ROMBIOS */
2332 vmcb->rip = 0xFFF0;
2334 /* setup the segment registers and all their hidden states */
2335 vmcb->cs.sel = 0xF000;
2336 vmcb->cs.attr.bytes = 0x089b;
2337 vmcb->cs.limit = 0xffff;
2338 vmcb->cs.base = 0x000F0000;
2340 vmcb->ss.sel = 0x00;
2341 vmcb->ss.attr.bytes = 0x0893;
2342 vmcb->ss.limit = 0xffff;
2343 vmcb->ss.base = 0x00;
2345 vmcb->ds.sel = 0x00;
2346 vmcb->ds.attr.bytes = 0x0893;
2347 vmcb->ds.limit = 0xffff;
2348 vmcb->ds.base = 0x00;
2350 vmcb->es.sel = 0x00;
2351 vmcb->es.attr.bytes = 0x0893;
2352 vmcb->es.limit = 0xffff;
2353 vmcb->es.base = 0x00;
2355 vmcb->fs.sel = 0x00;
2356 vmcb->fs.attr.bytes = 0x0893;
2357 vmcb->fs.limit = 0xffff;
2358 vmcb->fs.base = 0x00;
2360 vmcb->gs.sel = 0x00;
2361 vmcb->gs.attr.bytes = 0x0893;
2362 vmcb->gs.limit = 0xffff;
2363 vmcb->gs.base = 0x00;
2365 vmcb->ldtr.sel = 0x00;
2366 vmcb->ldtr.attr.bytes = 0x0000;
2367 vmcb->ldtr.limit = 0x0;
2368 vmcb->ldtr.base = 0x00;
2370 vmcb->gdtr.sel = 0x00;
2371 vmcb->gdtr.attr.bytes = 0x0000;
2372 vmcb->gdtr.limit = 0x0;
2373 vmcb->gdtr.base = 0x00;
2375 vmcb->tr.sel = 0;
2376 vmcb->tr.attr.bytes = 0;
2377 vmcb->tr.limit = 0x0;
2378 vmcb->tr.base = 0;
2380 vmcb->idtr.sel = 0x00;
2381 vmcb->idtr.attr.bytes = 0x0000;
2382 vmcb->idtr.limit = 0x3ff;
2383 vmcb->idtr.base = 0x00;
2385 vmcb->rax = 0;
2386 vmcb->rsp = 0;
2388 return 0;
2392 /*
2393 * svm_do_vmmcall - SVM VMMCALL handler
2395 * returns 0 on success, non-zero otherwise
2396 */
2397 static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
2399 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2400 int inst_len;
2402 ASSERT(vmcb);
2403 ASSERT(regs);
2405 inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
2406 ASSERT(inst_len > 0);
2408 if ( regs->eax & 0x80000000 )
2410 /* VMMCALL sanity check */
2411 if ( vmcb->cpl > get_vmmcall_cpl(regs->edi) )
2413 printk("VMMCALL CPL check failed\n");
2414 return -1;
2417 /* handle the request */
2418 switch ( regs->eax )
2420 case VMMCALL_RESET_TO_REALMODE:
2421 if ( svm_do_vmmcall_reset_to_realmode(v, regs) )
2423 printk("svm_do_vmmcall_reset_to_realmode() failed\n");
2424 return -1;
2426 /* since we just reset the VMCB, return without adjusting
2427 * the eip */
2428 return 0;
2430 case VMMCALL_DEBUG:
2431 printk("DEBUG features not implemented yet\n");
2432 break;
2433 default:
2434 break;
2437 hvm_print_line(v, regs->eax); /* provides the current domain */
2439 else
2441 hvm_do_hypercall(regs);
2444 __update_guest_eip(vmcb, inst_len);
2445 return 0;
2449 void svm_dump_inst(unsigned long eip)
2451 u8 opcode[256];
2452 unsigned long ptr;
2453 int len;
2454 int i;
2456 ptr = eip & ~0xff;
2457 len = 0;
2459 if (hvm_copy_from_guest_virt(opcode, ptr, sizeof(opcode)) == 0)
2460 len = sizeof(opcode);
2462 printk("Code bytes around(len=%d) %lx:", len, eip);
2463 for (i = 0; i < len; i++)
2465 if ((i & 0x0f) == 0)
2466 printk("\n%08lx:", ptr+i);
2468 printk("%02x ", opcode[i]);
2471 printk("\n");
2475 void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
2477 struct vcpu *v = current;
2478 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2479 unsigned long pt = v->arch.hvm_vcpu.hw_cr3;
2481 printk("%s: guest registers from %s:\n", __func__, from);
2482 #if defined (__x86_64__)
2483 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
2484 regs->rax, regs->rbx, regs->rcx);
2485 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
2486 regs->rdx, regs->rsi, regs->rdi);
2487 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
2488 regs->rbp, regs->rsp, regs->r8);
2489 printk("r9: %016lx r10: %016lx r11: %016lx\n",
2490 regs->r9, regs->r10, regs->r11);
2491 printk("r12: %016lx r13: %016lx r14: %016lx\n",
2492 regs->r12, regs->r13, regs->r14);
2493 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
2494 regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
2495 #else
2496 printk("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n",
2497 regs->eax, regs->ebx, regs->ecx, regs->edx);
2498 printk("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n",
2499 regs->edi, regs->esi, regs->ebp, regs->esp);
2500 printk("%s: guest cr0: %lx\n", __func__,
2501 v->arch.hvm_svm.cpu_shadow_cr0);
2502 printk("guest CR3 = %llx\n", vmcb->cr3);
2503 #endif
2504 printk("%s: pt = %lx\n", __func__, pt);
2508 void svm_dump_host_regs(const char *from)
2510 struct vcpu *v = current;
2511 unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
2512 unsigned long cr3, cr0;
2513 printk("Host registers at %s\n", from);
2515 __asm__ __volatile__ ("\tmov %%cr0,%0\n"
2516 "\tmov %%cr3,%1\n"
2517 : "=r" (cr0), "=r"(cr3));
2518 printk("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
2521 #ifdef SVM_EXTRA_DEBUG
2522 static char *exit_reasons[] = {
2523 [VMEXIT_CR0_READ] = "CR0_READ",
2524 [VMEXIT_CR1_READ] = "CR1_READ",
2525 [VMEXIT_CR2_READ] = "CR2_READ",
2526 [VMEXIT_CR3_READ] = "CR3_READ",
2527 [VMEXIT_CR4_READ] = "CR4_READ",
2528 [VMEXIT_CR5_READ] = "CR5_READ",
2529 [VMEXIT_CR6_READ] = "CR6_READ",
2530 [VMEXIT_CR7_READ] = "CR7_READ",
2531 [VMEXIT_CR8_READ] = "CR8_READ",
2532 [VMEXIT_CR9_READ] = "CR9_READ",
2533 [VMEXIT_CR10_READ] = "CR10_READ",
2534 [VMEXIT_CR11_READ] = "CR11_READ",
2535 [VMEXIT_CR12_READ] = "CR12_READ",
2536 [VMEXIT_CR13_READ] = "CR13_READ",
2537 [VMEXIT_CR14_READ] = "CR14_READ",
2538 [VMEXIT_CR15_READ] = "CR15_READ",
2539 [VMEXIT_CR0_WRITE] = "CR0_WRITE",
2540 [VMEXIT_CR1_WRITE] = "CR1_WRITE",
2541 [VMEXIT_CR2_WRITE] = "CR2_WRITE",
2542 [VMEXIT_CR3_WRITE] = "CR3_WRITE",
2543 [VMEXIT_CR4_WRITE] = "CR4_WRITE",
2544 [VMEXIT_CR5_WRITE] = "CR5_WRITE",
2545 [VMEXIT_CR6_WRITE] = "CR6_WRITE",
2546 [VMEXIT_CR7_WRITE] = "CR7_WRITE",
2547 [VMEXIT_CR8_WRITE] = "CR8_WRITE",
2548 [VMEXIT_CR9_WRITE] = "CR9_WRITE",
2549 [VMEXIT_CR10_WRITE] = "CR10_WRITE",
2550 [VMEXIT_CR11_WRITE] = "CR11_WRITE",
2551 [VMEXIT_CR12_WRITE] = "CR12_WRITE",
2552 [VMEXIT_CR13_WRITE] = "CR13_WRITE",
2553 [VMEXIT_CR14_WRITE] = "CR14_WRITE",
2554 [VMEXIT_CR15_WRITE] = "CR15_WRITE",
2555 [VMEXIT_DR0_READ] = "DR0_READ",
2556 [VMEXIT_DR1_READ] = "DR1_READ",
2557 [VMEXIT_DR2_READ] = "DR2_READ",
2558 [VMEXIT_DR3_READ] = "DR3_READ",
2559 [VMEXIT_DR4_READ] = "DR4_READ",
2560 [VMEXIT_DR5_READ] = "DR5_READ",
2561 [VMEXIT_DR6_READ] = "DR6_READ",
2562 [VMEXIT_DR7_READ] = "DR7_READ",
2563 [VMEXIT_DR8_READ] = "DR8_READ",
2564 [VMEXIT_DR9_READ] = "DR9_READ",
2565 [VMEXIT_DR10_READ] = "DR10_READ",
2566 [VMEXIT_DR11_READ] = "DR11_READ",
2567 [VMEXIT_DR12_READ] = "DR12_READ",
2568 [VMEXIT_DR13_READ] = "DR13_READ",
2569 [VMEXIT_DR14_READ] = "DR14_READ",
2570 [VMEXIT_DR15_READ] = "DR15_READ",
2571 [VMEXIT_DR0_WRITE] = "DR0_WRITE",
2572 [VMEXIT_DR1_WRITE] = "DR1_WRITE",
2573 [VMEXIT_DR2_WRITE] = "DR2_WRITE",
2574 [VMEXIT_DR3_WRITE] = "DR3_WRITE",
2575 [VMEXIT_DR4_WRITE] = "DR4_WRITE",
2576 [VMEXIT_DR5_WRITE] = "DR5_WRITE",
2577 [VMEXIT_DR6_WRITE] = "DR6_WRITE",
2578 [VMEXIT_DR7_WRITE] = "DR7_WRITE",
2579 [VMEXIT_DR8_WRITE] = "DR8_WRITE",
2580 [VMEXIT_DR9_WRITE] = "DR9_WRITE",
2581 [VMEXIT_DR10_WRITE] = "DR10_WRITE",
2582 [VMEXIT_DR11_WRITE] = "DR11_WRITE",
2583 [VMEXIT_DR12_WRITE] = "DR12_WRITE",
2584 [VMEXIT_DR13_WRITE] = "DR13_WRITE",
2585 [VMEXIT_DR14_WRITE] = "DR14_WRITE",
2586 [VMEXIT_DR15_WRITE] = "DR15_WRITE",
2587 [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
2588 [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
2589 [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
2590 [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
2591 [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
2592 [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
2593 [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
2594 [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
2595 [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
2596 [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
2597 [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
2598 [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
2599 [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
2600 [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
2601 [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
2602 [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
2603 [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
2604 [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
2605 [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
2606 [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
2607 [VMEXIT_INTR] = "INTR",
2608 [VMEXIT_NMI] = "NMI",
2609 [VMEXIT_SMI] = "SMI",
2610 [VMEXIT_INIT] = "INIT",
2611 [VMEXIT_VINTR] = "VINTR",
2612 [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
2613 [VMEXIT_IDTR_READ] = "IDTR_READ",
2614 [VMEXIT_GDTR_READ] = "GDTR_READ",
2615 [VMEXIT_LDTR_READ] = "LDTR_READ",
2616 [VMEXIT_TR_READ] = "TR_READ",
2617 [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
2618 [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
2619 [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
2620 [VMEXIT_TR_WRITE] = "TR_WRITE",
2621 [VMEXIT_RDTSC] = "RDTSC",
2622 [VMEXIT_RDPMC] = "RDPMC",
2623 [VMEXIT_PUSHF] = "PUSHF",
2624 [VMEXIT_POPF] = "POPF",
2625 [VMEXIT_CPUID] = "CPUID",
2626 [VMEXIT_RSM] = "RSM",
2627 [VMEXIT_IRET] = "IRET",
2628 [VMEXIT_SWINT] = "SWINT",
2629 [VMEXIT_INVD] = "INVD",
2630 [VMEXIT_PAUSE] = "PAUSE",
2631 [VMEXIT_HLT] = "HLT",
2632 [VMEXIT_INVLPG] = "INVLPG",
2633 [VMEXIT_INVLPGA] = "INVLPGA",
2634 [VMEXIT_IOIO] = "IOIO",
2635 [VMEXIT_MSR] = "MSR",
2636 [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
2637 [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
2638 [VMEXIT_SHUTDOWN] = "SHUTDOWN",
2639 [VMEXIT_VMRUN] = "VMRUN",
2640 [VMEXIT_VMMCALL] = "VMMCALL",
2641 [VMEXIT_VMLOAD] = "VMLOAD",
2642 [VMEXIT_VMSAVE] = "VMSAVE",
2643 [VMEXIT_STGI] = "STGI",
2644 [VMEXIT_CLGI] = "CLGI",
2645 [VMEXIT_SKINIT] = "SKINIT",
2646 [VMEXIT_RDTSCP] = "RDTSCP",
2647 [VMEXIT_ICEBP] = "ICEBP",
2648 [VMEXIT_NPF] = "NPF"
2649 };
2650 #endif /* SVM_EXTRA_DEBUG */
2652 #ifdef SVM_WALK_GUEST_PAGES
2653 void walk_shadow_and_guest_pt(unsigned long gva)
2655 l2_pgentry_t gpde;
2656 l2_pgentry_t spde;
2657 l1_pgentry_t gpte;
2658 l1_pgentry_t spte;
2659 struct vcpu *v = current;
2660 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2661 paddr_t gpa;
2663 gpa = shadow_gva_to_gpa(current, gva);
2664 printk("gva = %lx, gpa=%"PRIpaddr", gCR3=%x\n", gva, gpa, (u32)vmcb->cr3);
2665 if( !svm_paging_enabled(v) || mmio_space(gpa) )
2666 return;
2668 /* let's dump the guest and shadow page info */
2670 __guest_get_l2e(v, gva, &gpde);
2671 printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
2672 __shadow_get_l2e( v, gva, &spde );
2673 printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
2675 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2676 return;
2678 spte = l1e_empty();
2680 /* This is actually overkill - we only need to ensure the hl2 is in-sync.*/
2681 shadow_sync_va(v, gva);
2683 gpte.l1 = 0;
2684 __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ],
2685 sizeof(gpte) );
2686 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
2688 BUG(); // need to think about this, and convert usage of
2689 // phys_to_machine_mapping to use pagetable format...
2690 __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
2691 sizeof(spte) );
2693 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
2695 #endif /* SVM_WALK_GUEST_PAGES */
2698 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
2700 unsigned int exit_reason;
2701 unsigned long eip;
2702 struct vcpu *v = current;
2703 int error;
2704 int do_debug = 0;
2705 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2707 ASSERT(vmcb);
2709 exit_reason = vmcb->exitcode;
2710 save_svm_cpu_user_regs(v, regs);
2712 if (exit_reason == VMEXIT_INVALID)
2714 svm_dump_vmcb(__func__, vmcb);
2715 goto exit_and_crash;
2718 #ifdef SVM_EXTRA_DEBUG
2720 #if defined(__i386__)
2721 #define rip eip
2722 #endif
2724 static unsigned long intercepts_counter = 0;
2726 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF)
2728 if (svm_paging_enabled(v) &&
2729 !mmio_space(shadow_gva_to_gpa(current, vmcb->exitinfo2)))
2731 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2732 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64", "
2733 "gpa=%"PRIx64"\n", intercepts_counter,
2734 exit_reasons[exit_reason], exit_reason, regs->cs,
2735 (u64)regs->rip,
2736 (u64)vmcb->exitinfo1,
2737 (u64)vmcb->exitinfo2,
2738 (u64)vmcb->exitintinfo.bytes,
2739 (u64)shadow_gva_to_gpa(current, vmcb->exitinfo2));
2741 else
2743 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2744 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2745 intercepts_counter,
2746 exit_reasons[exit_reason], exit_reason, regs->cs,
2747 (u64)regs->rip,
2748 (u64)vmcb->exitinfo1,
2749 (u64)vmcb->exitinfo2,
2750 (u64)vmcb->exitintinfo.bytes );
2753 else if ( svm_dbg_on
2754 && exit_reason != VMEXIT_IOIO
2755 && exit_reason != VMEXIT_INTR)
2758 if (exit_reasons[exit_reason])
2760 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64","
2761 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2762 intercepts_counter,
2763 exit_reasons[exit_reason], exit_reason, regs->cs,
2764 (u64)regs->rip,
2765 (u64)vmcb->exitinfo1,
2766 (u64)vmcb->exitinfo2,
2767 (u64)vmcb->exitintinfo.bytes);
2769 else
2771 printk("I%08ld,ExC=%d(0x%x),IP=%x:%"PRIx64","
2772 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64"\n",
2773 intercepts_counter, exit_reason, exit_reason, regs->cs,
2774 (u64)regs->rip,
2775 (u64)vmcb->exitinfo1,
2776 (u64)vmcb->exitinfo2,
2777 (u64)vmcb->exitintinfo.bytes);
2781 #ifdef SVM_WALK_GUEST_PAGES
2782 if( exit_reason == VMEXIT_EXCEPTION_PF
2783 && ( ( vmcb->exitinfo2 == vmcb->rip )
2784 || vmcb->exitintinfo.bytes) )
2786 if ( svm_paging_enabled(v) &&
2787 !mmio_space(gva_to_gpa(vmcb->exitinfo2)) )
2788 walk_shadow_and_guest_pt(vmcb->exitinfo2);
2790 #endif
2792 intercepts_counter++;
2794 #if 0
2795 if (svm_dbg_on)
2796 do_debug = svm_do_debugout(exit_reason);
2797 #endif
2799 if (do_debug)
2801 printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
2802 "hw_cr3 = 0x%16lx\n",
2803 __func__,
2804 (int) v->arch.guest_table.pfn,
2805 (int) v->arch.monitor_table.pfn,
2806 (long unsigned int) v->arch.hvm_vcpu.hw_cr3);
2808 svm_dump_vmcb(__func__, vmcb);
2809 svm_dump_regs(__func__, regs);
2810 svm_dump_inst(svm_rip2pointer(vmcb));
2813 #if defined(__i386__)
2814 #undef rip
2815 #endif
2818 #endif /* SVM_EXTRA_DEBUG */
2821 perfc_incra(svmexits, exit_reason);
2822 eip = vmcb->rip;
2824 #ifdef SVM_EXTRA_DEBUG
2825 if (do_debug)
2827 printk("eip = %lx, exit_reason = %d (0x%x)\n",
2828 eip, exit_reason, exit_reason);
2830 #endif /* SVM_EXTRA_DEBUG */
2832 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
2834 switch (exit_reason)
2836 case VMEXIT_EXCEPTION_DB:
2838 #ifdef XEN_DEBUGGER
2839 svm_debug_save_cpu_user_regs(regs);
2840 pdb_handle_exception(1, regs, 1);
2841 svm_debug_restore_cpu_user_regs(regs);
2842 #else
2843 svm_store_cpu_user_regs(regs, v);
2844 domain_pause_for_debugger();
2845 #endif
2847 break;
2849 case VMEXIT_INTR:
2850 case VMEXIT_NMI:
2851 case VMEXIT_SMI:
2852 /* Asynchronous events, handled when we STGI'd after the VMEXIT. */
2853 break;
2855 case VMEXIT_INIT:
2856 BUG(); /* unreachable */
2858 case VMEXIT_EXCEPTION_BP:
2859 #ifdef XEN_DEBUGGER
2860 svm_debug_save_cpu_user_regs(regs);
2861 pdb_handle_exception(3, regs, 1);
2862 svm_debug_restore_cpu_user_regs(regs);
2863 #else
2864 if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) )
2865 domain_pause_for_debugger();
2866 else
2867 svm_inject_exception(v, TRAP_int3, 0, 0);
2868 #endif
2869 break;
2871 case VMEXIT_EXCEPTION_NM:
2872 svm_do_no_device_fault(vmcb);
2873 break;
2875 case VMEXIT_EXCEPTION_GP:
2876 /* This should probably not be trapped in the future */
2877 regs->error_code = vmcb->exitinfo1;
2878 svm_do_general_protection_fault(v, regs);
2879 break;
2881 case VMEXIT_EXCEPTION_PF:
2883 unsigned long va;
2884 va = vmcb->exitinfo2;
2885 regs->error_code = vmcb->exitinfo1;
2886 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2887 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2888 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2889 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2890 (unsigned long)regs->esi, (unsigned long)regs->edi);
2892 if (!(error = svm_do_page_fault(va, regs)))
2894 /* Inject #PG using Interruption-Information Fields */
2895 svm_inject_exception(v, TRAP_page_fault, 1, regs->error_code);
2897 v->arch.hvm_svm.cpu_cr2 = va;
2898 vmcb->cr2 = va;
2899 TRACE_3D(TRC_VMX_INTR, v->domain->domain_id,
2900 VMEXIT_EXCEPTION_PF, va);
2902 break;
2905 case VMEXIT_EXCEPTION_DF:
2906 /* Debug info to hopefully help debug WHY the guest double-faulted. */
2907 svm_dump_vmcb(__func__, vmcb);
2908 svm_dump_regs(__func__, regs);
2909 svm_dump_inst(svm_rip2pointer(vmcb));
2910 svm_inject_exception(v, TRAP_double_fault, 1, 0);
2911 break;
2913 case VMEXIT_VINTR:
2914 vmcb->vintr.fields.irq = 0;
2915 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
2916 break;
2918 case VMEXIT_INVD:
2919 svm_vmexit_do_invd(vmcb);
2920 break;
2922 case VMEXIT_GDTR_WRITE:
2923 printk("WRITE to GDTR\n");
2924 break;
2926 case VMEXIT_TASK_SWITCH:
2927 goto exit_and_crash;
2929 case VMEXIT_CPUID:
2930 svm_vmexit_do_cpuid(vmcb, regs);
2931 break;
2933 case VMEXIT_HLT:
2934 svm_vmexit_do_hlt(vmcb);
2935 break;
2937 case VMEXIT_INVLPG:
2938 svm_handle_invlpg(0, regs);
2939 break;
2941 case VMEXIT_INVLPGA:
2942 svm_handle_invlpg(1, regs);
2943 break;
2945 case VMEXIT_VMMCALL:
2946 svm_do_vmmcall(v, regs);
2947 break;
2949 case VMEXIT_CR0_READ:
2950 svm_cr_access(v, 0, TYPE_MOV_FROM_CR, regs);
2951 break;
2953 case VMEXIT_CR2_READ:
2954 svm_cr_access(v, 2, TYPE_MOV_FROM_CR, regs);
2955 break;
2957 case VMEXIT_CR3_READ:
2958 svm_cr_access(v, 3, TYPE_MOV_FROM_CR, regs);
2959 break;
2961 case VMEXIT_CR4_READ:
2962 svm_cr_access(v, 4, TYPE_MOV_FROM_CR, regs);
2963 break;
2965 case VMEXIT_CR8_READ:
2966 svm_cr_access(v, 8, TYPE_MOV_FROM_CR, regs);
2967 break;
2969 case VMEXIT_CR0_WRITE:
2970 svm_cr_access(v, 0, TYPE_MOV_TO_CR, regs);
2971 break;
2973 case VMEXIT_CR2_WRITE:
2974 svm_cr_access(v, 2, TYPE_MOV_TO_CR, regs);
2975 break;
2977 case VMEXIT_CR3_WRITE:
2978 svm_cr_access(v, 3, TYPE_MOV_TO_CR, regs);
2979 local_flush_tlb();
2980 break;
2982 case VMEXIT_CR4_WRITE:
2983 svm_cr_access(v, 4, TYPE_MOV_TO_CR, regs);
2984 break;
2986 case VMEXIT_CR8_WRITE:
2987 svm_cr_access(v, 8, TYPE_MOV_TO_CR, regs);
2988 break;
2990 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
2991 svm_dr_access(v, regs);
2992 break;
2994 case VMEXIT_IOIO:
2995 svm_io_instruction(v);
2996 break;
2998 case VMEXIT_MSR:
2999 svm_do_msr_access(v, regs);
3000 break;
3002 case VMEXIT_SHUTDOWN:
3003 hvm_triple_fault();
3004 break;
3006 default:
3007 exit_and_crash:
3008 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
3009 "exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
3010 exit_reason,
3011 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
3012 domain_crash(v->domain);
3013 break;
3016 #ifdef SVM_EXTRA_DEBUG
3017 if (do_debug)
3019 printk("%s: Done switch on vmexit_code\n", __func__);
3020 svm_dump_regs(__func__, regs);
3023 if (do_debug)
3025 printk("vmexit_handler():- guest_table = 0x%08x, "
3026 "monitor_table = 0x%08x, hw_cr3 = 0x%16x\n",
3027 (int)v->arch.guest_table.pfn,
3028 (int)v->arch.monitor_table.pfn,
3029 (int)v->arch.hvm_vcpu.hw_cr3);
3030 printk("svm_vmexit_handler: Returning\n");
3032 #endif
3035 asmlinkage void svm_load_cr2(void)
3037 struct vcpu *v = current;
3039 local_irq_disable();
3040 asm volatile("mov %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
3043 /*
3044 * Local variables:
3045 * mode: C
3046 * c-set-style: "BSD"
3047 * c-basic-offset: 4
3048 * tab-width: 4
3049 * indent-tabs-mode: nil
3050 * End:
3051 */