ia64/xen-unstable

view xen/arch/x86/hvm/svm/svm.c @ 9704:137b60a47d13

SVM patch to init rsp properly.

Signed-off-by: Tom Woller <thomas.woller@amd.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Apr 13 11:07:28 2006 +0100 (2006-04-13)
parents 9849bd4a86dd
children b77ebfaa72b2
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005, AMD Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <asm/current.h>
29 #include <asm/io.h>
30 #include <asm/shadow.h>
31 #include <asm/regs.h>
32 #include <asm/cpufeature.h>
33 #include <asm/processor.h>
34 #include <asm/types.h>
35 #include <asm/msr.h>
36 #include <asm/spinlock.h>
37 #include <asm/hvm/hvm.h>
38 #include <asm/hvm/support.h>
39 #include <asm/hvm/io.h>
40 #include <asm/hvm/svm/svm.h>
41 #include <asm/hvm/svm/vmcb.h>
42 #include <asm/hvm/svm/emulate.h>
43 #include <asm/hvm/svm/vmmcall.h>
44 #include <asm/hvm/svm/intr.h>
45 #include <asm/shadow.h>
46 #if CONFIG_PAGING_LEVELS >= 3
47 #include <asm/shadow_64.h>
48 #endif
49 #include <public/sched.h>
50 #include <public/hvm/ioreq.h>
52 #define SVM_EXTRA_DEBUG
54 #ifdef TRACE_BUFFER
55 static unsigned long trace_values[NR_CPUS][4];
56 #define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
57 #else
58 #define TRACE_VMEXIT(index,value) ((void)0)
59 #endif
61 /* Useful define */
62 #define MAX_INST_SIZE 15
64 /*
65 * External functions, etc. We should move these to some suitable header file(s) */
67 extern void do_nmi(struct cpu_user_regs *, unsigned long);
68 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
69 int inst_len);
70 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
71 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
72 unsigned long count, int size, long value, int dir, int pvalid);
73 extern int svm_instrlen(struct cpu_user_regs *regs, int mode);
74 extern void svm_dump_inst(unsigned long eip);
75 extern int svm_dbg_on;
76 void svm_manual_event_injection32(struct vcpu *v, struct cpu_user_regs *regs,
77 int vector, int has_code);
78 void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
80 static void svm_relinquish_guest_resources(struct domain *d);
82 static struct asid_pool ASIDpool[NR_CPUS];
84 /*
85 * Initializes the POOL of ASID used by the guests per core.
86 */
87 void asidpool_init( int core )
88 {
89 int i;
90 ASIDpool[core].asid_lock = SPIN_LOCK_UNLOCKED;
91 spin_lock(&ASIDpool[core].asid_lock);
92 /* Host ASID is always in use */
93 ASIDpool[core].asid[INITIAL_ASID] = ASID_INUSE;
94 for( i=1; i<ASID_MAX; i++ )
95 {
96 ASIDpool[core].asid[i] = ASID_AVAILABLE;
97 }
98 spin_unlock(&ASIDpool[core].asid_lock);
99 }
102 /* internal function to get the next available ASID */
103 static int asidpool_fetch_next( struct vmcb_struct *vmcb, int core )
104 {
105 int i;
106 for( i = 1; i < ASID_MAX; i++ )
107 {
108 if( ASIDpool[core].asid[i] == ASID_AVAILABLE )
109 {
110 vmcb->guest_asid = i;
111 ASIDpool[core].asid[i] = ASID_INUSE;
112 return i;
113 }
114 }
115 return -1;
116 }
119 /*
120 * This functions assigns on the passed VMCB, the next
121 * available ASID number. If none are available, the
122 * TLB flush flag is set, and all retireds ASID
123 * are made available.
124 *
125 * Returns: 1 -- sucess;
126 * 0 -- failure -- no more ASID numbers
127 * available.
128 */
129 int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
130 int oldcore, int newcore )
131 {
132 int i;
133 int res = 1;
134 static unsigned long cnt=0;
136 spin_lock(&ASIDpool[oldcore].asid_lock);
137 if( retire_current && vmcb->guest_asid ) {
138 ASIDpool[oldcore].asid[ vmcb->guest_asid & (ASID_MAX-1) ] = ASID_RETIRED;
139 }
140 spin_unlock(&ASIDpool[oldcore].asid_lock);
141 spin_lock(&ASIDpool[newcore].asid_lock);
142 if( asidpool_fetch_next( vmcb, newcore ) < 0 ) {
143 if (svm_dbg_on)
144 printk( "SVM: tlb(%ld)\n", cnt++ );
145 /* FLUSH the TLB and all retired slots are made available */
146 vmcb->tlb_control = 1;
147 for( i = 1; i < ASID_MAX; i++ ) {
148 if( ASIDpool[newcore].asid[i] == ASID_RETIRED ) {
149 ASIDpool[newcore].asid[i] = ASID_AVAILABLE;
150 }
151 }
152 /* Get the First slot available */
153 res = asidpool_fetch_next( vmcb, newcore ) > 0;
154 }
155 spin_unlock(&ASIDpool[newcore].asid_lock);
156 return res;
157 }
159 void asidpool_retire( struct vmcb_struct *vmcb, int core )
160 {
161 spin_lock(&ASIDpool[core].asid_lock);
162 if( vmcb->guest_asid ) {
163 ASIDpool[core].asid[ vmcb->guest_asid & (ASID_MAX-1) ] = ASID_RETIRED;
164 }
165 spin_unlock(&ASIDpool[core].asid_lock);
166 }
168 static inline void svm_inject_exception(struct vmcb_struct *vmcb,
169 int trap, int ev, int error_code)
170 {
171 eventinj_t event;
173 event.bytes = 0;
174 event.fields.v = 1;
175 event.fields.type = EVENTTYPE_EXCEPTION;
176 event.fields.vector = trap;
177 event.fields.ev = ev;
178 event.fields.errorcode = error_code;
180 ASSERT(vmcb->eventinj.fields.v == 0);
182 vmcb->eventinj = event;
183 }
185 void stop_svm(void)
186 {
187 u32 eax, edx;
189 /* We turn off the EFER_SVME bit. */
190 rdmsr(MSR_EFER, eax, edx);
191 eax &= ~EFER_SVME;
192 wrmsr(MSR_EFER, eax, edx);
194 printk("AMD SVM Extension is disabled.\n");
195 }
197 int svm_initialize_guest_resources(struct vcpu *v)
198 {
199 svm_final_setup_guest(v);
200 return 1;
201 }
203 static void svm_store_cpu_guest_regs(
204 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
205 {
206 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
208 if ( regs != NULL )
209 {
210 #if defined (__x86_64__)
211 regs->rip = vmcb->rip;
212 regs->rsp = vmcb->rsp;
213 regs->rflags = vmcb->rflags;
214 regs->cs = vmcb->cs.sel;
215 regs->ds = vmcb->ds.sel;
216 regs->es = vmcb->es.sel;
217 regs->ss = vmcb->ss.sel;
218 regs->gs = vmcb->gs.sel;
219 regs->fs = vmcb->fs.sel;
220 #elif defined (__i386__)
221 regs->eip = vmcb->rip;
222 regs->esp = vmcb->rsp;
223 regs->eflags = vmcb->rflags;
224 regs->cs = vmcb->cs.sel;
225 regs->ds = vmcb->ds.sel;
226 regs->es = vmcb->es.sel;
227 regs->ss = vmcb->ss.sel;
228 regs->gs = vmcb->gs.sel;
229 regs->fs = vmcb->fs.sel;
230 #endif
231 }
233 if ( crs != NULL )
234 {
235 crs[0] = vmcb->cr0;
236 crs[3] = vmcb->cr3;
237 crs[4] = vmcb->cr4;
238 }
239 }
241 static void svm_load_cpu_guest_regs(
242 struct vcpu *v, struct cpu_user_regs *regs)
243 {
244 svm_load_cpu_user_regs(v, regs);
245 }
247 #define IS_CANO_ADDRESS(add) 1
249 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
250 {
251 u64 msr_content = 0;
252 struct vcpu *vc = current;
253 // struct svm_msr_state *msr = &vc->arch.hvm_svm.msr_content;
254 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
256 switch (regs->ecx)
257 {
258 case MSR_EFER:
259 // msr_content = msr->msr_items[SVM_INDEX_MSR_EFER];
260 msr_content = vmcb->efer;
261 msr_content &= ~EFER_SVME;
262 break;
264 case MSR_FS_BASE:
265 msr_content = vmcb->fs.base;
266 break;
268 case MSR_GS_BASE:
269 msr_content = vmcb->gs.base;
270 break;
272 case MSR_SHADOW_GS_BASE:
273 msr_content = vmcb->kerngsbase;
274 break;
276 case MSR_STAR:
277 msr_content = vmcb->star;
278 break;
280 case MSR_LSTAR:
281 msr_content = vmcb->lstar;
282 break;
284 case MSR_CSTAR:
285 msr_content = vmcb->cstar;
286 break;
288 case MSR_SYSCALL_MASK:
289 msr_content = vmcb->sfmask;
290 break;
292 default:
293 return 0;
294 }
296 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n",
297 msr_content);
299 regs->eax = msr_content & 0xffffffff;
300 regs->edx = msr_content >> 32;
301 return 1;
302 }
304 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
305 {
306 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
307 struct vcpu *vc = current;
308 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
310 HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx "
311 "msr_content %"PRIx64"\n",
312 (unsigned long)regs->ecx, msr_content);
314 switch (regs->ecx)
315 {
316 case MSR_EFER:
317 #ifdef __x86_64__
318 /* offending reserved bit will cause #GP */
319 if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
320 {
321 printk("trying to set reserved bit in EFER\n");
322 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
323 return 0;
324 }
326 /* LME: 0 -> 1 */
327 if ( msr_content & EFER_LME &&
328 !test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state) )
329 {
330 if ( svm_paging_enabled(vc) ||
331 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
332 &vc->arch.hvm_svm.cpu_state) )
333 {
334 printk("trying to set LME bit when "
335 "in paging mode or PAE bit is not set\n");
336 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
337 return 0;
338 }
339 set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);
340 }
342 /* We have already recorded that we want LME, so it will be set
343 * next time CR0 gets updated. So we clear that bit and continue.
344 */
345 if ((msr_content ^ vmcb->efer) & EFER_LME)
346 msr_content &= ~EFER_LME;
347 /* No update for LME/LMA since it have no effect */
348 #endif
349 vmcb->efer = msr_content | EFER_SVME;
350 break;
352 case MSR_FS_BASE:
353 case MSR_GS_BASE:
354 if (!(SVM_LONG_GUEST(vc)))
355 domain_crash_synchronous();
357 if (!IS_CANO_ADDRESS(msr_content))
358 {
359 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
360 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
361 }
363 if (regs->ecx == MSR_FS_BASE)
364 vmcb->fs.base = msr_content;
365 else
366 vmcb->gs.base = msr_content;
367 break;
369 case MSR_SHADOW_GS_BASE:
370 vmcb->kerngsbase = msr_content;
371 break;
373 case MSR_STAR:
374 vmcb->star = msr_content;
375 break;
377 case MSR_LSTAR:
378 vmcb->lstar = msr_content;
379 break;
381 case MSR_CSTAR:
382 vmcb->cstar = msr_content;
383 break;
385 case MSR_SYSCALL_MASK:
386 vmcb->sfmask = msr_content;
387 break;
389 default:
390 return 0;
391 }
392 return 1;
393 }
395 int svm_realmode(struct vcpu *v)
396 {
397 unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
398 unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
400 return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
401 }
403 int svm_instruction_length(struct vcpu *v)
404 {
405 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
406 unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode;
407 /* check which operating mode the guest is running */
408 if( vmcb->efer & EFER_LMA )
409 mode = vmcb->cs.attributes.fields.l ? 8 : 4;
410 else
411 mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
412 return svm_instrlen(guest_cpu_user_regs(), mode);
413 }
415 unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
416 {
417 switch ( num )
418 {
419 case 0:
420 return v->arch.hvm_svm.cpu_shadow_cr0;
421 case 2:
422 return v->arch.hvm_svm.cpu_cr2;
423 case 3:
424 return v->arch.hvm_svm.cpu_cr3;
425 default:
426 BUG();
427 }
428 return 0; /* dummy */
429 }
431 int start_svm(void)
432 {
433 u32 eax, ecx, edx;
435 /* Xen does not fill x86_capability words except 0. */
436 ecx = cpuid_ecx(0x80000001);
437 boot_cpu_data.x86_capability[5] = ecx;
439 if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
440 return 0;
442 rdmsr(MSR_EFER, eax, edx);
443 eax |= EFER_SVME;
444 wrmsr(MSR_EFER, eax, edx);
445 asidpool_init(smp_processor_id());
446 printk("AMD SVM Extension is enabled for cpu %d.\n", smp_processor_id());
448 /* Setup HVM interfaces */
449 hvm_funcs.disable = stop_svm;
451 hvm_funcs.initialize_guest_resources = svm_initialize_guest_resources;
452 hvm_funcs.relinquish_guest_resources = svm_relinquish_guest_resources;
454 hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
455 hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
457 hvm_funcs.realmode = svm_realmode;
458 hvm_funcs.paging_enabled = svm_paging_enabled;
459 hvm_funcs.instruction_length = svm_instruction_length;
460 hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
462 hvm_enabled = 1;
464 return 1;
465 }
467 int svm_dbg_on = 0;
469 static inline int svm_do_debugout(unsigned long exit_code)
470 {
471 int i;
473 static unsigned long counter = 0;
474 static unsigned long works[] =
475 {
476 VMEXIT_IOIO,
477 VMEXIT_HLT,
478 VMEXIT_CPUID,
479 VMEXIT_DR0_READ,
480 VMEXIT_DR1_READ,
481 VMEXIT_DR2_READ,
482 VMEXIT_DR3_READ,
483 VMEXIT_DR6_READ,
484 VMEXIT_DR7_READ,
485 VMEXIT_DR0_WRITE,
486 VMEXIT_DR1_WRITE,
487 VMEXIT_DR2_WRITE,
488 VMEXIT_DR3_WRITE,
489 VMEXIT_CR0_READ,
490 VMEXIT_CR0_WRITE,
491 VMEXIT_CR3_READ,
492 VMEXIT_CR4_READ,
493 VMEXIT_MSR,
494 VMEXIT_CR0_WRITE,
495 VMEXIT_CR3_WRITE,
496 VMEXIT_CR4_WRITE,
497 VMEXIT_EXCEPTION_PF,
498 VMEXIT_INTR,
499 VMEXIT_INVLPG,
500 VMEXIT_EXCEPTION_NM
501 };
504 #if 0
505 if (svm_dbg_on && exit_code != 0x7B)
506 return 1;
507 #endif
509 counter++;
511 #if 0
512 if ((exit_code == 0x4E
513 || exit_code == VMEXIT_CR0_READ
514 || exit_code == VMEXIT_CR0_WRITE)
515 && counter < 200000)
516 return 0;
518 if ((exit_code == 0x4E) && counter < 500000)
519 return 0;
520 #endif
522 for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
523 if (exit_code == works[i])
524 return 0;
526 return 1;
527 }
530 void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
531 {
532 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
534 ASSERT(vmcb);
536 ctxt->eax = vmcb->rax;
537 ctxt->ss = vmcb->ss.sel;
538 ctxt->esp = vmcb->rsp;
539 ctxt->eflags = vmcb->rflags;
540 ctxt->cs = vmcb->cs.sel;
541 ctxt->eip = vmcb->rip;
543 ctxt->gs = vmcb->gs.sel;
544 ctxt->fs = vmcb->fs.sel;
545 ctxt->es = vmcb->es.sel;
546 ctxt->ds = vmcb->ds.sel;
547 }
549 #if defined (__x86_64__)
550 void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v )
551 {
552 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
554 regs->rip = vmcb->rip;
555 regs->rsp = vmcb->rsp;
556 regs->rflags = vmcb->rflags;
557 regs->cs = vmcb->cs.sel;
558 regs->ds = vmcb->ds.sel;
559 regs->es = vmcb->es.sel;
560 regs->ss = vmcb->ss.sel;
561 }
562 #elif defined (__i386__)
563 void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
564 {
565 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
567 regs->eip = vmcb->rip;
568 regs->esp = vmcb->rsp;
569 regs->eflags = vmcb->rflags;
570 regs->cs = vmcb->cs.sel;
571 regs->ds = vmcb->ds.sel;
572 regs->es = vmcb->es.sel;
573 regs->ss = vmcb->ss.sel;
574 }
575 #endif
577 /* XXX Use svm_load_cpu_guest_regs instead */
578 #if defined (__i386__)
579 void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
580 {
581 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
582 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
584 /* Write the guest register value into VMCB */
585 vmcb->rax = regs->eax;
586 vmcb->ss.sel = regs->ss;
587 vmcb->rsp = regs->esp;
588 vmcb->rflags = regs->eflags;
589 vmcb->cs.sel = regs->cs;
590 vmcb->rip = regs->eip;
591 if (regs->eflags & EF_TF)
592 *intercepts |= EXCEPTION_BITMAP_DB;
593 else
594 *intercepts &= ~EXCEPTION_BITMAP_DB;
595 }
596 #else /* (__i386__) */
597 void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
598 {
599 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
600 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
602 /* Write the guest register value into VMCB */
603 vmcb->rax = regs->rax;
604 vmcb->ss.sel = regs->ss;
605 vmcb->rsp = regs->rsp;
606 vmcb->rflags = regs->rflags;
607 vmcb->cs.sel = regs->cs;
608 vmcb->rip = regs->rip;
609 if (regs->rflags & EF_TF)
610 *intercepts |= EXCEPTION_BITMAP_DB;
611 else
612 *intercepts &= ~EXCEPTION_BITMAP_DB;
613 }
614 #endif /* !(__i386__) */
616 int svm_paging_enabled(struct vcpu *v)
617 {
618 unsigned long cr0;
620 cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
622 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
623 }
626 /* Make sure that xen intercepts any FP accesses from current */
627 void svm_stts(struct vcpu *v)
628 {
629 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
631 /* FPU state already dirty? Then no need to setup_fpu() lazily. */
632 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
633 return;
635 /*
636 * If the guest does not have TS enabled then we must cause and handle an
637 * exception on first use of the FPU. If the guest *does* have TS enabled
638 * then this is not necessary: no FPU activity can occur until the guest
639 * clears CR0.TS, and we will initialise the FPU when that happens.
640 */
641 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
642 {
643 v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
644 vmcb->cr0 |= X86_CR0_TS;
645 }
646 }
648 static void arch_svm_do_launch(struct vcpu *v)
649 {
650 cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
651 int error;
653 #if 0
654 if (svm_dbg_on)
655 printk("Do launch\n");
656 #endif
657 error = construct_vmcb(&v->arch.hvm_svm, regs);
658 if ( error < 0 )
659 {
660 if (v->vcpu_id == 0) {
661 printk("Failed to construct a new VMCB for BSP.\n");
662 } else {
663 printk("Failed to construct a new VMCB for AP %d\n", v->vcpu_id);
664 }
665 domain_crash_synchronous();
666 }
668 svm_do_launch(v);
669 #if 0
670 if (svm_dbg_on)
671 svm_dump_host_regs(__func__);
672 #endif
673 reset_stack_and_jump(svm_asm_do_launch);
674 }
676 static void svm_freeze_time(struct vcpu *v)
677 {
678 struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
680 if ( vpit->first_injected && !v->domain->arch.hvm_domain.guest_time ) {
681 v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v);
682 stop_timer(&(vpit->pit_timer));
683 }
684 }
686 static void svm_ctxt_switch_from(struct vcpu *v)
687 {
688 svm_freeze_time(v);
689 }
691 static void svm_ctxt_switch_to(struct vcpu *v)
692 {
693 }
695 void svm_final_setup_guest(struct vcpu *v)
696 {
697 v->arch.schedule_tail = arch_svm_do_launch;
698 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
699 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
701 if (v == v->domain->vcpu[0])
702 {
703 struct domain *d = v->domain;
704 struct vcpu *vc;
706 /* Initialize monitor page table */
707 for_each_vcpu(d, vc)
708 vc->arch.monitor_table = mk_pagetable(0);
710 /*
711 * Required to do this once per domain
712 * TODO: add a seperate function to do these.
713 */
714 memset(&d->shared_info->evtchn_mask[0], 0xff,
715 sizeof(d->shared_info->evtchn_mask));
717 /*
718 * Put the domain in shadow mode even though we're going to be using
719 * the shared 1:1 page table initially. It shouldn't hurt
720 */
721 shadow_mode_enable(d,
722 SHM_enable|SHM_refcounts|
723 SHM_translate|SHM_external|SHM_wr_pt_pte);
724 }
725 }
728 static void svm_relinquish_guest_resources(struct domain *d)
729 {
730 extern void destroy_vmcb(struct arch_svm_struct *); /* XXX */
731 struct vcpu *v;
733 for_each_vcpu ( d, v )
734 {
735 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
736 continue;
737 #if 0
738 /* Memory leak by not freeing this. XXXKAF: *Why* is not per core?? */
739 free_host_save_area(v->arch.hvm_svm.host_save_area);
740 #endif
742 destroy_vmcb(&v->arch.hvm_svm);
743 free_monitor_pagetable(v);
744 kill_timer(&v->arch.hvm_svm.hlt_timer);
745 if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) )
746 {
747 kill_timer( &(VLAPIC(v)->vlapic_timer) );
748 xfree(VLAPIC(v));
749 }
750 }
752 kill_timer(&d->arch.hvm_domain.vpit.pit_timer);
754 if ( d->arch.hvm_domain.shared_page_va )
755 unmap_domain_page_global(
756 (void *)d->arch.hvm_domain.shared_page_va);
758 shadow_direct_map_clean(d);
759 }
762 void arch_svm_do_resume(struct vcpu *v)
763 {
764 /* pinning VCPU to a different core? */
765 if ( v->arch.hvm_svm.launch_core == smp_processor_id()) {
766 svm_do_resume( v );
767 reset_stack_and_jump( svm_asm_do_resume );
768 }
769 else {
770 printk("VCPU core pinned: %d to %d\n",
771 v->arch.hvm_svm.launch_core, smp_processor_id() );
772 v->arch.hvm_svm.launch_core = smp_processor_id();
773 svm_migrate_timers( v );
774 svm_do_resume( v );
775 reset_stack_and_jump( svm_asm_do_resume );
776 }
777 }
780 void svm_migrate_timers(struct vcpu *v)
781 {
782 struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
784 migrate_timer( &vpit->pit_timer, v->processor );
785 migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor );
786 if ( hvm_apic_support(v->domain) && VLAPIC( v ))
787 migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
788 }
791 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
792 {
793 struct vcpu *v = current;
794 unsigned long eip;
795 unsigned long gpa; /* FIXME: PAE */
796 int result;
797 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
799 ASSERT(vmcb);
801 //#if HVM_DEBUG
802 eip = vmcb->rip;
803 HVM_DBG_LOG(DBG_LEVEL_VMMU,
804 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
805 va, eip, (unsigned long)regs->error_code);
806 //#endif
808 if ( !svm_paging_enabled(v) )
809 {
810 if ( shadow_direct_map_fault(va, regs) )
811 return 1;
813 handle_mmio(va, va);
814 TRACE_VMEXIT(2,2);
815 return 1;
816 }
819 gpa = gva_to_gpa(va);
821 /* Use 1:1 page table to identify MMIO address space */
822 if (mmio_space(gpa))
823 {
824 /* No support for APIC */
825 if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
826 {
827 int inst_len;
828 inst_len = svm_instruction_length(v);
829 if (inst_len == -1)
830 {
831 printf("%s: INST_LEN - Unable to decode properly.\n", __func__);
832 domain_crash_synchronous();
833 }
835 __update_guest_eip(vmcb, inst_len);
837 return 1;
838 }
840 TRACE_VMEXIT (2,2);
841 handle_mmio(va, gpa);
843 return 1;
844 }
846 result = shadow_fault(va, regs);
848 if( result ) {
849 /* Let's make sure that the Guest TLB is flushed */
850 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
851 }
853 TRACE_VMEXIT (2,result);
855 return result;
856 }
859 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
860 {
861 struct vcpu *v = current;
863 setup_fpu(v);
864 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
866 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
867 vmcb->cr0 &= ~X86_CR0_TS;
868 }
871 static void svm_do_general_protection_fault(struct vcpu *v,
872 struct cpu_user_regs *regs)
873 {
874 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
875 unsigned long eip, error_code;
877 ASSERT(vmcb);
879 eip = vmcb->rip;
880 error_code = vmcb->exitinfo1;
882 if (vmcb->idtr.limit == 0) {
883 printf("Huh? We got a GP Fault with an invalid IDTR!\n");
884 svm_dump_vmcb(__func__, vmcb);
885 svm_dump_regs(__func__, regs);
886 svm_dump_inst(vmcb->rip);
887 __hvm_bug(regs);
888 }
890 HVM_DBG_LOG(DBG_LEVEL_1,
891 "svm_general_protection_fault: eip = %lx, erro_code = %lx",
892 eip, error_code);
894 HVM_DBG_LOG(DBG_LEVEL_1,
895 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
896 (unsigned long)regs->eax, (unsigned long)regs->ebx,
897 (unsigned long)regs->ecx, (unsigned long)regs->edx,
898 (unsigned long)regs->esi, (unsigned long)regs->edi);
901 /* Reflect it back into the guest */
902 svm_inject_exception(vmcb, TRAP_gp_fault, 1, error_code);
903 }
905 /* Reserved bits: [31:14], [12:1] */
906 #define SVM_VCPU_CPUID_L1_RESERVED 0xffffdffe
908 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input,
909 struct cpu_user_regs *regs)
910 {
911 unsigned int eax, ebx, ecx, edx;
912 unsigned long eip;
913 struct vcpu *v = current;
914 int inst_len;
916 ASSERT(vmcb);
918 eip = vmcb->rip;
920 HVM_DBG_LOG(DBG_LEVEL_1,
921 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
922 " (esi) %lx, (edi) %lx",
923 (unsigned long)regs->eax, (unsigned long)regs->ebx,
924 (unsigned long)regs->ecx, (unsigned long)regs->edx,
925 (unsigned long)regs->esi, (unsigned long)regs->edi);
927 cpuid(input, &eax, &ebx, &ecx, &edx);
929 if (input == 1)
930 {
931 if ( !hvm_apic_support(v->domain) ||
932 !vlapic_global_enabled((VLAPIC(v))) )
933 clear_bit(X86_FEATURE_APIC, &edx);
935 #if CONFIG_PAGING_LEVELS < 3
936 clear_bit(X86_FEATURE_NX, &edx);
937 clear_bit(X86_FEATURE_PAE, &edx);
938 clear_bit(X86_FEATURE_PSE, &edx);
939 clear_bit(X86_FEATURE_PSE36, &edx);
940 #else
941 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
942 {
943 if ( !v->domain->arch.hvm_domain.pae_enabled )
944 {
945 clear_bit(X86_FEATURE_PAE, &edx);
946 clear_bit(X86_FEATURE_NX, &edx);
947 }
948 clear_bit(X86_FEATURE_PSE, &edx);
949 clear_bit(X86_FEATURE_PSE36, &edx);
950 }
951 #endif
952 /* Clear out reserved bits. */
953 ecx &= ~SVM_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */
954 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
955 }
956 #ifdef __i386__
957 else if ( input == 0x80000001 )
958 {
959 /* Mask feature for Intel ia32e or AMD long mode. */
960 clear_bit(X86_FEATURE_LM & 31, &edx);
961 }
962 #endif
964 regs->eax = (unsigned long)eax;
965 regs->ebx = (unsigned long)ebx;
966 regs->ecx = (unsigned long)ecx;
967 regs->edx = (unsigned long)edx;
969 HVM_DBG_LOG(DBG_LEVEL_1,
970 "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
971 "ebx=%x, ecx=%x, edx=%x",
972 eip, input, eax, ebx, ecx, edx);
974 inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
975 ASSERT(inst_len > 0);
976 __update_guest_eip(vmcb, inst_len);
977 }
980 static inline unsigned long *get_reg_p(unsigned int gpreg,
981 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
982 {
983 unsigned long *reg_p = NULL;
984 switch (gpreg)
985 {
986 case SVM_REG_EAX:
987 reg_p = (unsigned long *)&regs->eax;
988 break;
989 case SVM_REG_EBX:
990 reg_p = (unsigned long *)&regs->ebx;
991 break;
992 case SVM_REG_ECX:
993 reg_p = (unsigned long *)&regs->ecx;
994 break;
995 case SVM_REG_EDX:
996 reg_p = (unsigned long *)&regs->edx;
997 break;
998 case SVM_REG_EDI:
999 reg_p = (unsigned long *)&regs->edi;
1000 break;
1001 case SVM_REG_ESI:
1002 reg_p = (unsigned long *)&regs->esi;
1003 break;
1004 case SVM_REG_EBP:
1005 reg_p = (unsigned long *)&regs->ebp;
1006 break;
1007 case SVM_REG_ESP:
1008 reg_p = (unsigned long *)&vmcb->rsp;
1009 break;
1010 #if __x86_64__
1011 case SVM_REG_R8:
1012 reg_p = (unsigned long *)&regs->r8;
1013 break;
1014 case SVM_REG_R9:
1015 reg_p = (unsigned long *)&regs->r9;
1016 break;
1017 case SVM_REG_R10:
1018 reg_p = (unsigned long *)&regs->r10;
1019 break;
1020 case SVM_REG_R11:
1021 reg_p = (unsigned long *)&regs->r11;
1022 break;
1023 case SVM_REG_R12:
1024 reg_p = (unsigned long *)&regs->r12;
1025 break;
1026 case SVM_REG_R13:
1027 reg_p = (unsigned long *)&regs->r13;
1028 break;
1029 case SVM_REG_R14:
1030 reg_p = (unsigned long *)&regs->r14;
1031 break;
1032 case SVM_REG_R15:
1033 reg_p = (unsigned long *)&regs->r15;
1034 break;
1035 #endif
1036 default:
1037 BUG();
1040 return reg_p;
1044 static inline unsigned long get_reg(unsigned int gpreg,
1045 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1047 unsigned long *gp;
1048 gp = get_reg_p(gpreg, regs, vmcb);
1049 return *gp;
1053 static inline void set_reg(unsigned int gpreg, unsigned long value,
1054 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1056 unsigned long *gp;
1057 gp = get_reg_p(gpreg, regs, vmcb);
1058 *gp = value;
1062 static void svm_dr_access (struct vcpu *v, unsigned int reg, unsigned int type,
1063 struct cpu_user_regs *regs)
1065 unsigned long *reg_p = 0;
1066 unsigned int gpreg = 0;
1067 unsigned long eip;
1068 int inst_len;
1069 int index;
1070 struct vmcb_struct *vmcb;
1071 u8 buffer[MAX_INST_LEN];
1072 u8 prefix = 0;
1074 vmcb = v->arch.hvm_svm.vmcb;
1076 ASSERT(vmcb);
1078 eip = vmcb->rip;
1079 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1080 index = skip_prefix_bytes(buffer, sizeof(buffer));
1082 ASSERT(buffer[index+0] == 0x0f && (buffer[index+1] & 0xFD) == 0x21);
1084 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1085 prefix = buffer[index-1];
1087 gpreg = decode_src_reg(prefix, buffer[index + 2]);
1088 ASSERT(reg == decode_dest_reg(prefix, buffer[index + 2]));
1090 HVM_DBG_LOG(DBG_LEVEL_1, "svm_dr_access : eip=%lx, reg=%d, gpreg = %x",
1091 eip, reg, gpreg);
1093 reg_p = get_reg_p(gpreg, regs, vmcb);
1095 switch (type)
1097 case TYPE_MOV_TO_DR:
1098 inst_len = __get_instruction_length(vmcb, INSTR_MOV2DR, buffer);
1099 v->arch.guest_context.debugreg[reg] = *reg_p;
1100 break;
1101 case TYPE_MOV_FROM_DR:
1102 inst_len = __get_instruction_length(vmcb, INSTR_MOVDR2, buffer);
1103 *reg_p = v->arch.guest_context.debugreg[reg];
1104 break;
1105 default:
1106 __hvm_bug(regs);
1107 break;
1109 ASSERT(inst_len > 0);
1110 __update_guest_eip(vmcb, inst_len);
1114 static unsigned int check_for_null_selector(struct vmcb_struct *vmcb,
1115 unsigned int dir, unsigned long *base, unsigned int real)
1118 unsigned char inst[MAX_INST_LEN];
1119 segment_selector_t seg;
1120 int i;
1122 memset(inst, 0, MAX_INST_LEN);
1123 if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
1124 != MAX_INST_LEN)
1126 printk("check_for_null_selector: get guest instruction failed\n");
1127 domain_crash_synchronous();
1130 for (i = 0; i < MAX_INST_LEN; i++)
1132 switch (inst[i])
1134 case 0xf3: /* REPZ */
1135 case 0xf2: /* REPNZ */
1136 case 0xf0: /* LOCK */
1137 case 0x66: /* data32 */
1138 case 0x67: /* addr32 */
1139 #if __x86_64__
1140 /* REX prefixes */
1141 case 0x40:
1142 case 0x41:
1143 case 0x42:
1144 case 0x43:
1145 case 0x44:
1146 case 0x45:
1147 case 0x46:
1148 case 0x47:
1150 case 0x48:
1151 case 0x49:
1152 case 0x4a:
1153 case 0x4b:
1154 case 0x4c:
1155 case 0x4d:
1156 case 0x4e:
1157 case 0x4f:
1158 #endif
1159 continue;
1160 case 0x2e: /* CS */
1161 seg = vmcb->cs;
1162 break;
1163 case 0x36: /* SS */
1164 seg = vmcb->ss;
1165 break;
1166 case 0x26: /* ES */
1167 seg = vmcb->es;
1168 break;
1169 case 0x64: /* FS */
1170 seg = vmcb->fs;
1171 break;
1172 case 0x65: /* GS */
1173 seg = vmcb->gs;
1174 break;
1175 case 0x3e: /* DS */
1176 /* FALLTHROUGH */
1177 seg = vmcb->ds;
1178 break;
1179 default:
1180 if (dir == IOREQ_READ) /* IN/INS instruction? */
1181 seg = vmcb->es;
1182 else
1183 seg = vmcb->ds;
1186 if (base)
1187 *base = seg.base;
1189 return seg.attributes.fields.p;
1192 ASSERT(0);
1193 return 0;
1197 /* Get the address of INS/OUTS instruction */
1198 static inline unsigned long svm_get_io_address(struct vmcb_struct *vmcb,
1199 struct cpu_user_regs *regs, unsigned int dir, unsigned int real)
1201 unsigned long addr = 0;
1202 unsigned long base = 0;
1204 check_for_null_selector(vmcb, dir, &base, real);
1206 if (dir == IOREQ_WRITE)
1208 if (real)
1209 addr = (regs->esi & 0xFFFF) + base;
1210 else
1211 addr = regs->esi + base;
1213 else
1215 if (real)
1216 addr = (regs->edi & 0xFFFF) + base;
1217 else
1218 addr = regs->edi + base;
1221 return addr;
1225 static void svm_io_instruction(struct vcpu *v, struct cpu_user_regs *regs)
1227 struct mmio_op *mmio_opp;
1228 unsigned long eip, cs, eflags, cr0;
1229 unsigned long port;
1230 unsigned int real, size, dir;
1231 ioio_info_t info;
1233 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1235 ASSERT(vmcb);
1236 mmio_opp = &current->arch.hvm_vcpu.mmio_op;
1237 mmio_opp->instr = INSTR_PIO;
1238 mmio_opp->flags = 0;
1240 eip = vmcb->rip;
1241 cs = vmcb->cs.sel;
1242 eflags = vmcb->rflags;
1244 info.bytes = vmcb->exitinfo1;
1246 port = info.fields.port; /* port used to be addr */
1247 dir = info.fields.type; /* direction */
1248 if (info.fields.sz32)
1249 size = 4;
1250 else if (info.fields.sz16)
1251 size = 2;
1252 else
1253 size = 1;
1255 cr0 = vmcb->cr0;
1256 real = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
1258 HVM_DBG_LOG(DBG_LEVEL_IO,
1259 "svm_io_instruction: port 0x%lx real %d, eip=%lx:%lx, "
1260 "exit_qualification = %lx",
1261 (unsigned long) port, real, cs, eip, (unsigned long)info.bytes);
1262 /* string instruction */
1263 if (info.fields.str)
1265 unsigned long addr, count = 1;
1266 int sign = regs->eflags & EF_DF ? -1 : 1;
1268 /* Need the original rip, here. */
1269 addr = svm_get_io_address(vmcb, regs, dir, real);
1271 /* "rep" prefix */
1272 if (info.fields.rep)
1274 mmio_opp->flags |= REPZ;
1275 count = real ? regs->ecx & 0xFFFF : regs->ecx;
1278 /*
1279 * Handle string pio instructions that cross pages or that
1280 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1281 */
1282 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1284 unsigned long value = 0;
1286 mmio_opp->flags |= OVERLAP;
1288 if (dir == IOREQ_WRITE)
1289 hvm_copy(&value, addr, size, HVM_COPY_IN);
1291 send_pio_req(regs, port, 1, size, value, dir, 0);
1293 else
1295 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK))
1297 if (sign > 0)
1298 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1299 else
1300 count = (addr & ~PAGE_MASK) / size;
1302 else
1303 vmcb->rip = vmcb->exitinfo2;
1305 send_pio_req(regs, port, count, size, addr, dir, 1);
1308 else
1310 /*
1311 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1312 * ExitInfo2
1313 */
1314 vmcb->rip = vmcb->exitinfo2;
1316 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1317 hvm_print_line(v, regs->eax); /* guest debug output */
1319 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
1323 static int svm_set_cr0(unsigned long value)
1325 struct vcpu *v = current;
1326 unsigned long mfn;
1327 int paging_enabled;
1328 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1330 ASSERT(vmcb);
1332 /* We don't want to lose PG. ET is reserved and should be always be 1*/
1333 paging_enabled = svm_paging_enabled(v);
1334 value |= X86_CR0_ET;
1335 vmcb->cr0 = value | X86_CR0_PG;
1336 v->arch.hvm_svm.cpu_shadow_cr0 = value;
1338 /* TS cleared? Then initialise FPU now. */
1339 if ( !(value & X86_CR0_TS) )
1341 setup_fpu(v);
1342 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1345 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1347 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
1349 /* The guest CR3 must be pointing to the guest physical. */
1350 if (!VALID_MFN(mfn =
1351 get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))
1352 || !get_page(mfn_to_page(mfn), v->domain))
1354 printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3);
1355 domain_crash_synchronous(); /* need to take a clean path */
1358 #if defined(__x86_64__)
1359 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
1360 && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
1361 &v->arch.hvm_svm.cpu_state))
1363 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1364 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
1367 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
1369 /* Here the PAE is should to be opened */
1370 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1371 set_bit(SVM_CPU_STATE_LMA_ENABLED,
1372 &v->arch.hvm_svm.cpu_state);
1373 vmcb->efer |= (EFER_LMA | EFER_LME);
1374 if (!shadow_set_guest_paging_levels(v->domain, PAGING_L4) )
1376 printk("Unsupported guest paging levels\n");
1377 domain_crash_synchronous(); /* need to take a clean path */
1380 else
1381 #endif /* __x86_64__ */
1383 #if CONFIG_PAGING_LEVELS >= 3
1384 /* seems it's a 32-bit or 32-bit PAE guest */
1385 if ( test_bit(SVM_CPU_STATE_PAE_ENABLED,
1386 &v->arch.hvm_svm.cpu_state) )
1388 /* The guest enables PAE first and then it enables PG, it is
1389 * really a PAE guest */
1390 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1392 printk("Unsupported guest paging levels\n");
1393 domain_crash_synchronous();
1396 else
1398 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) )
1400 printk("Unsupported guest paging levels\n");
1401 domain_crash_synchronous(); /* need to take a clean path */
1404 #endif
1407 /* Now arch.guest_table points to machine physical. */
1408 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1409 update_pagetables(v);
1411 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1412 (unsigned long) (mfn << PAGE_SHIFT));
1414 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1415 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1417 /* arch->shadow_table should hold the next CR3 for shadow */
1418 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx\n",
1419 v->arch.hvm_svm.cpu_cr3, mfn);
1421 return 1;
1424 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1425 if ( v->arch.hvm_svm.cpu_cr3 ) {
1426 put_page(mfn_to_page(get_mfn_from_gpfn(
1427 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
1428 v->arch.guest_table = mk_pagetable(0);
1431 /*
1432 * SVM implements paged real-mode and when we return to real-mode
1433 * we revert back to the physical mappings that the domain builder
1434 * created.
1435 */
1436 if ((value & X86_CR0_PE) == 0) {
1437 if (value & X86_CR0_PG) {
1438 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
1439 return 0;
1442 clear_all_shadow_status( v->domain );
1443 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1444 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1446 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
1448 /* we should take care of this kind of situation */
1449 clear_all_shadow_status(v->domain);
1450 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1451 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1454 return 1;
1457 /*
1458 * Read from control registers. CR0 and CR4 are read from the shadow.
1459 */
1460 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1462 unsigned long value = 0;
1463 struct vcpu *v = current;
1464 struct vmcb_struct *vmcb;
1466 vmcb = v->arch.hvm_svm.vmcb;
1467 ASSERT(vmcb);
1469 switch (cr)
1471 case 0:
1472 value = v->arch.hvm_svm.cpu_shadow_cr0;
1473 if (svm_dbg_on)
1474 printk("CR0 read =%lx \n", value );
1475 break;
1476 case 2:
1477 value = vmcb->cr2;
1478 break;
1479 case 3:
1480 value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
1481 if (svm_dbg_on)
1482 printk("CR3 read =%lx \n", value );
1483 break;
1484 case 4:
1485 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
1486 if (svm_dbg_on)
1487 printk( "CR4 read=%lx\n", value );
1488 break;
1489 case 8:
1490 #if 0
1491 value = vmcb->m_cr8;
1492 #else
1493 ASSERT(0);
1494 #endif
1495 break;
1497 default:
1498 __hvm_bug(regs);
1501 set_reg(gp, value, regs, vmcb);
1503 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1507 static inline int svm_pgbit_test(struct vcpu *v)
1509 return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
1513 /*
1514 * Write to control registers
1515 */
1516 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1518 unsigned long value;
1519 unsigned long old_cr;
1520 struct vcpu *v = current;
1521 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1523 ASSERT(vmcb);
1525 value = get_reg(gpreg, regs, vmcb);
1527 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1528 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1530 switch (cr)
1532 case 0:
1533 if (svm_dbg_on)
1534 printk("CR0 write =%lx \n", value );
1535 return svm_set_cr0(value);
1537 case 3:
1539 unsigned long old_base_mfn, mfn;
1540 if (svm_dbg_on)
1541 printk("CR3 write =%lx \n", value );
1542 /* If paging is not enabled yet, simply copy the value to CR3. */
1543 if (!svm_paging_enabled(v)) {
1544 v->arch.hvm_svm.cpu_cr3 = value;
1545 break;
1547 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1549 /* We make a new one if the shadow does not exist. */
1550 if (value == v->arch.hvm_svm.cpu_cr3)
1552 /*
1553 * This is simple TLB flush, implying the guest has
1554 * removed some translation or changed page attributes.
1555 * We simply invalidate the shadow.
1556 */
1557 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1558 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1559 __hvm_bug(regs);
1560 shadow_sync_all(v->domain);
1562 else
1564 /*
1565 * If different, make a shadow. Check if the PDBR is valid
1566 * first.
1567 */
1568 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1569 if (((value >> PAGE_SHIFT) > v->domain->max_pages)
1570 || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT))
1571 || !get_page(mfn_to_page(mfn), v->domain))
1573 printk("Invalid CR3 value=%lx\n", value);
1574 domain_crash_synchronous(); /* need to take a clean path */
1577 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1578 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1580 if (old_base_mfn)
1581 put_page(mfn_to_page(old_base_mfn));
1583 /*
1584 * arch.shadow_table should now hold the next CR3 for shadow
1585 */
1586 #if CONFIG_PAGING_LEVELS >= 3
1587 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 )
1588 shadow_sync_all(v->domain);
1589 #endif
1590 v->arch.hvm_svm.cpu_cr3 = value;
1591 update_pagetables(v);
1592 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1593 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1595 break;
1598 case 4: /* CR4 */
1600 if (svm_dbg_on)
1601 printk( "write cr4=%lx, cr0=%lx\n",
1602 value, v->arch.hvm_svm.cpu_shadow_cr0 );
1603 old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
1604 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1606 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1607 if ( svm_pgbit_test(v) )
1609 /* The guest is a 32-bit PAE guest. */
1610 #if CONFIG_PAGING_LEVELS >= 4
1611 unsigned long mfn, old_base_mfn;
1613 if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1615 printk("Unsupported guest paging levels\n");
1616 domain_crash_synchronous(); /* need to take a clean path */
1619 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1620 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) ||
1621 !get_page(mfn_to_page(mfn), v->domain) )
1623 printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3);
1624 domain_crash_synchronous(); /* need to take a clean path */
1627 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1628 if ( old_base_mfn )
1629 put_page(mfn_to_page(old_base_mfn));
1631 /*
1632 * Now arch.guest_table points to machine physical.
1633 */
1635 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1636 update_pagetables(v);
1638 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1639 (unsigned long) (mfn << PAGE_SHIFT));
1641 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1643 /*
1644 * arch->shadow_table should hold the next CR3 for shadow
1645 */
1647 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1648 v->arch.hvm_svm.cpu_cr3, mfn);
1649 #endif
1651 else
1653 /* The guest is a 64 bit or 32-bit PAE guest. */
1654 #if CONFIG_PAGING_LEVELS >= 4
1655 if ( (v->domain->arch.ops != NULL) &&
1656 v->domain->arch.ops->guest_paging_levels == PAGING_L2)
1658 /* Seems the guest first enables PAE without enabling PG,
1659 * it must enable PG after that, and it is a 32-bit PAE
1660 * guest */
1662 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1664 printk("Unsupported guest paging levels\n");
1665 domain_crash_synchronous();
1668 else
1670 if ( !shadow_set_guest_paging_levels(v->domain,
1671 PAGING_L4) )
1673 printk("Unsupported guest paging levels\n");
1674 domain_crash_synchronous();
1677 #endif
1680 else if (value & X86_CR4_PAE) {
1681 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1682 } else {
1683 if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
1684 &v->arch.hvm_svm.cpu_state)) {
1685 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
1687 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1690 v->arch.hvm_svm.cpu_shadow_cr4 = value;
1691 vmcb->cr4 = value | SVM_CR4_HOST_MASK;
1693 /*
1694 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1695 * all TLB entries except global entries.
1696 */
1697 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
1699 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1700 shadow_sync_all(v->domain);
1702 break;
1705 default:
1706 printk("invalid cr: %d\n", cr);
1707 __hvm_bug(regs);
1710 return 1;
1714 #define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
1717 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
1718 struct cpu_user_regs *regs)
1720 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1721 int inst_len = 0;
1722 int index;
1723 unsigned int gpreg;
1724 unsigned long value;
1725 u8 buffer[MAX_INST_LEN];
1726 u8 prefix = 0;
1727 int result = 1;
1728 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1729 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1730 enum instruction_index match;
1732 ASSERT(vmcb);
1734 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1735 /* get index to first actual instruction byte - as we will need to know where the
1736 * prefix lives later on
1737 */
1738 index = skip_prefix_bytes(buffer, sizeof(buffer));
1740 if (type == TYPE_MOV_TO_CR)
1742 inst_len = __get_instruction_length_from_list(vmcb, list_a,
1743 ARR_SIZE(list_a), &buffer[index], &match);
1745 else
1747 inst_len = __get_instruction_length_from_list(vmcb, list_b,
1748 ARR_SIZE(list_b), &buffer[index], &match);
1751 ASSERT(inst_len > 0);
1753 inst_len += index;
1755 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1756 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1757 prefix = buffer[index-1];
1759 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
1761 switch (match)
1763 case INSTR_MOV2CR:
1764 gpreg = decode_src_reg(prefix, buffer[index+2]);
1765 result = mov_to_cr(gpreg, cr, regs);
1766 break;
1768 case INSTR_MOVCR2:
1769 gpreg = decode_src_reg(prefix, buffer[index+2]);
1770 mov_from_cr(cr, gpreg, regs);
1771 break;
1773 case INSTR_CLTS:
1774 /* TS being cleared means that it's time to restore fpu state. */
1775 setup_fpu(current);
1776 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1777 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1778 v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
1779 break;
1781 case INSTR_LMSW:
1782 if (svm_dbg_on)
1783 svm_dump_inst(svm_rip2pointer(vmcb));
1785 gpreg = decode_src_reg(prefix, buffer[index+2]);
1786 value = get_reg(gpreg, regs, vmcb) & 0xF;
1788 if (svm_dbg_on)
1789 printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1790 inst_len);
1792 value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
1794 if (svm_dbg_on)
1795 printk("CR0-LMSW CR0 - New value=%lx\n", value);
1797 result = svm_set_cr0(value);
1798 break;
1800 case INSTR_SMSW:
1801 svm_dump_inst(svm_rip2pointer(vmcb));
1802 value = v->arch.hvm_svm.cpu_shadow_cr0;
1803 gpreg = decode_src_reg(prefix, buffer[index+2]);
1804 set_reg(gpreg, value, regs, vmcb);
1806 if (svm_dbg_on)
1807 printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1808 inst_len);
1809 break;
1811 default:
1812 __hvm_bug(regs);
1813 break;
1816 ASSERT(inst_len);
1818 __update_guest_eip(vmcb, inst_len);
1820 return result;
1823 static inline void svm_do_msr_access(struct vcpu *v, struct cpu_user_regs *regs)
1825 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1826 int inst_len;
1827 u64 msr_content=0;
1829 ASSERT(vmcb);
1831 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, "
1832 "exitinfo = %lx", (unsigned long)regs->ecx,
1833 (unsigned long)regs->eax, (unsigned long)regs->edx,
1834 (unsigned long)vmcb->exitinfo1);
1836 /* is it a read? */
1837 if (vmcb->exitinfo1 == 0)
1839 inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
1841 regs->edx = 0;
1842 switch (regs->ecx) {
1843 case MSR_IA32_TIME_STAMP_COUNTER:
1845 struct hvm_virpit *vpit;
1847 rdtscll(msr_content);
1848 vpit = &(v->domain->arch.hvm_domain.vpit);
1849 msr_content += vpit->cache_tsc_offset;
1850 break;
1852 case MSR_IA32_SYSENTER_CS:
1853 msr_content = vmcb->sysenter_cs;
1854 break;
1855 case MSR_IA32_SYSENTER_ESP:
1856 msr_content = vmcb->sysenter_esp;
1857 break;
1858 case MSR_IA32_SYSENTER_EIP:
1859 msr_content = vmcb->sysenter_eip;
1860 break;
1861 case MSR_IA32_APICBASE:
1862 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
1863 break;
1864 default:
1865 if (long_mode_do_msr_read(regs))
1866 goto done;
1867 rdmsr_safe(regs->ecx, regs->eax, regs->edx);
1868 break;
1870 regs->eax = msr_content & 0xFFFFFFFF;
1871 regs->edx = msr_content >> 32;
1873 else
1875 inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
1876 msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
1878 switch (regs->ecx)
1880 case MSR_IA32_TIME_STAMP_COUNTER:
1881 svm_set_guest_time(v, msr_content);
1882 break;
1883 case MSR_IA32_SYSENTER_CS:
1884 vmcb->sysenter_cs = msr_content;
1885 break;
1886 case MSR_IA32_SYSENTER_ESP:
1887 vmcb->sysenter_esp = msr_content;
1888 break;
1889 case MSR_IA32_SYSENTER_EIP:
1890 vmcb->sysenter_eip = msr_content;
1891 break;
1892 case MSR_IA32_APICBASE:
1893 vlapic_msr_set(VLAPIC(v), msr_content);
1894 break;
1895 default:
1896 long_mode_do_msr_write(regs);
1897 break;
1901 done:
1903 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: "
1904 "ecx=%lx, eax=%lx, edx=%lx",
1905 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1906 (unsigned long)regs->edx);
1908 __update_guest_eip(vmcb, inst_len);
1912 /*
1913 * Need to use this exit to reschedule
1914 */
1915 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
1917 struct vcpu *v = current;
1918 struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
1919 s_time_t next_pit = -1, next_wakeup;
1921 __update_guest_eip(vmcb, 1);
1923 /* check for interrupt not handled or new interrupt */
1924 if ( vmcb->vintr.fields.irq || cpu_has_pending_irq(v) )
1925 return;
1927 if ( !v->vcpu_id )
1928 next_pit = get_pit_scheduled(v, vpit);
1929 next_wakeup = get_apictime_scheduled(v);
1930 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
1931 next_wakeup = next_pit;
1932 if ( next_wakeup != - 1 )
1933 set_timer(&current->arch.hvm_svm.hlt_timer, next_wakeup);
1934 hvm_safe_block();
1938 static inline void svm_vmexit_do_mwait(void)
1943 #ifdef XEN_DEBUGGER
1944 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb,
1945 struct cpu_user_regs *regs)
1947 regs->eip = vmcb->rip;
1948 regs->esp = vmcb->rsp;
1949 regs->eflags = vmcb->rflags;
1951 regs->xcs = vmcb->cs.sel;
1952 regs->xds = vmcb->ds.sel;
1953 regs->xes = vmcb->es.sel;
1954 regs->xfs = vmcb->fs.sel;
1955 regs->xgs = vmcb->gs.sel;
1956 regs->xss = vmcb->ss.sel;
1960 static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
1962 vmcb->ss.sel = regs->xss;
1963 vmcb->rsp = regs->esp;
1964 vmcb->rflags = regs->eflags;
1965 vmcb->cs.sel = regs->xcs;
1966 vmcb->rip = regs->eip;
1968 vmcb->gs.sel = regs->xgs;
1969 vmcb->fs.sel = regs->xfs;
1970 vmcb->es.sel = regs->xes;
1971 vmcb->ds.sel = regs->xds;
1973 #endif
1976 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
1978 struct vcpu *v = current;
1979 u8 opcode[MAX_INST_SIZE], prefix, length = MAX_INST_SIZE;
1980 unsigned long g_vaddr;
1981 int inst_len;
1982 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1984 ASSERT(vmcb);
1985 /*
1986 * Unknown how many bytes the invlpg instruction will take. Use the
1987 * maximum instruction length here
1988 */
1989 if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
1991 printk("svm_handle_invlpg (): Error reading memory %d bytes\n", length);
1992 __hvm_bug(regs);
1995 if (invlpga)
1997 inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
1998 ASSERT(inst_len > 0);
1999 __update_guest_eip(vmcb, inst_len);
2001 /*
2002 * The address is implicit on this instruction At the moment, we don't
2003 * use ecx (ASID) to identify individual guests pages
2004 */
2005 g_vaddr = regs->eax;
2007 else
2009 /* What about multiple prefix codes? */
2010 prefix = (is_prefix(opcode[0])?opcode[0]:0);
2011 inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
2012 ASSERT(inst_len > 0);
2014 inst_len--;
2015 length -= inst_len;
2017 /*
2018 * Decode memory operand of the instruction including ModRM, SIB, and
2019 * displacement to get effecticve address and length in bytes. Assume
2020 * the system in either 32- or 64-bit mode.
2021 */
2022 g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix,
2023 &opcode[inst_len], &length);
2025 inst_len += length;
2026 __update_guest_eip (vmcb, inst_len);
2029 /* Overkill, we may not this */
2030 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
2031 shadow_invlpg(v, g_vaddr);
2035 /*
2036 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2037 * 16-bit realmode. Basically, this mimics a processor reset.
2039 * returns 0 on success, non-zero otherwise
2040 */
2041 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
2042 struct cpu_user_regs *regs)
2044 struct vmcb_struct *vmcb;
2046 ASSERT(v);
2047 ASSERT(regs);
2049 vmcb = v->arch.hvm_svm.vmcb;
2051 ASSERT(vmcb);
2053 /* clear the vmcb and user regs */
2054 memset(regs, 0, sizeof(struct cpu_user_regs));
2056 /* VMCB Control */
2057 vmcb->tsc_offset = 0;
2059 /* VMCB State */
2060 vmcb->cr0 = X86_CR0_ET | X86_CR0_PG;
2061 v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
2063 vmcb->cr2 = 0;
2064 vmcb->efer = EFER_SVME;
2066 vmcb->cr4 = SVM_CR4_HOST_MASK;
2067 v->arch.hvm_svm.cpu_shadow_cr4 = 0;
2068 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
2070 /* This will jump to ROMBIOS */
2071 vmcb->rip = 0xFFF0;
2073 /* setup the segment registers and all their hidden states */
2074 vmcb->cs.sel = 0xF000;
2075 vmcb->cs.attributes.bytes = 0x089b;
2076 vmcb->cs.limit = 0xffff;
2077 vmcb->cs.base = 0x000F0000;
2079 vmcb->ss.sel = 0x00;
2080 vmcb->ss.attributes.bytes = 0x0893;
2081 vmcb->ss.limit = 0xffff;
2082 vmcb->ss.base = 0x00;
2084 vmcb->ds.sel = 0x00;
2085 vmcb->ds.attributes.bytes = 0x0893;
2086 vmcb->ds.limit = 0xffff;
2087 vmcb->ds.base = 0x00;
2089 vmcb->es.sel = 0x00;
2090 vmcb->es.attributes.bytes = 0x0893;
2091 vmcb->es.limit = 0xffff;
2092 vmcb->es.base = 0x00;
2094 vmcb->fs.sel = 0x00;
2095 vmcb->fs.attributes.bytes = 0x0893;
2096 vmcb->fs.limit = 0xffff;
2097 vmcb->fs.base = 0x00;
2099 vmcb->gs.sel = 0x00;
2100 vmcb->gs.attributes.bytes = 0x0893;
2101 vmcb->gs.limit = 0xffff;
2102 vmcb->gs.base = 0x00;
2104 vmcb->ldtr.sel = 0x00;
2105 vmcb->ldtr.attributes.bytes = 0x0000;
2106 vmcb->ldtr.limit = 0x0;
2107 vmcb->ldtr.base = 0x00;
2109 vmcb->gdtr.sel = 0x00;
2110 vmcb->gdtr.attributes.bytes = 0x0000;
2111 vmcb->gdtr.limit = 0x0;
2112 vmcb->gdtr.base = 0x00;
2114 vmcb->tr.sel = 0;
2115 vmcb->tr.attributes.bytes = 0;
2116 vmcb->tr.limit = 0x0;
2117 vmcb->tr.base = 0;
2119 vmcb->idtr.sel = 0x00;
2120 vmcb->idtr.attributes.bytes = 0x0000;
2121 vmcb->idtr.limit = 0x3ff;
2122 vmcb->idtr.base = 0x00;
2124 vmcb->rax = 0;
2125 vmcb->rsp = 0;
2127 return 0;
2131 /*
2132 * svm_do_vmmcall - SVM VMMCALL handler
2134 * returns 0 on success, non-zero otherwise
2135 */
2136 static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
2138 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2139 int inst_len;
2141 ASSERT(vmcb);
2142 ASSERT(regs);
2144 inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
2145 ASSERT(inst_len > 0);
2147 /* VMMCALL sanity check */
2148 if (vmcb->cpl > get_vmmcall_cpl(regs->edi))
2150 printf("VMMCALL CPL check failed\n");
2151 return -1;
2154 /* handle the request */
2155 switch (regs->edi)
2157 case VMMCALL_RESET_TO_REALMODE:
2158 if (svm_do_vmmcall_reset_to_realmode(v, regs))
2160 printf("svm_do_vmmcall_reset_to_realmode() failed\n");
2161 return -1;
2164 /* since we just reset the VMCB, return without adjusting the eip */
2165 return 0;
2166 case VMMCALL_DEBUG:
2167 printf("DEBUG features not implemented yet\n");
2168 break;
2169 default:
2170 break;
2173 hvm_print_line(v, regs->eax); /* provides the current domain */
2175 __update_guest_eip(vmcb, inst_len);
2176 return 0;
2180 void svm_dump_inst(unsigned long eip)
2182 u8 opcode[256];
2183 unsigned long ptr;
2184 int len;
2185 int i;
2187 ptr = eip & ~0xff;
2188 len = 0;
2190 if (hvm_copy(opcode, ptr, sizeof(opcode), HVM_COPY_IN))
2191 len = sizeof(opcode);
2193 printf("Code bytes around(len=%d) %lx:", len, eip);
2194 for (i = 0; i < len; i++)
2196 if ((i & 0x0f) == 0)
2197 printf("\n%08lx:", ptr+i);
2199 printf("%02x ", opcode[i]);
2202 printf("\n");
2206 void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
2208 struct vcpu *v = current;
2209 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2210 unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
2212 printf("%s: guest registers from %s:\n", __func__, from);
2213 #if defined (__x86_64__)
2214 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
2215 regs->rax, regs->rbx, regs->rcx);
2216 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
2217 regs->rdx, regs->rsi, regs->rdi);
2218 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
2219 regs->rbp, regs->rsp, regs->r8);
2220 printk("r9: %016lx r10: %016lx r11: %016lx\n",
2221 regs->r9, regs->r10, regs->r11);
2222 printk("r12: %016lx r13: %016lx r14: %016lx\n",
2223 regs->r12, regs->r13, regs->r14);
2224 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
2225 regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
2226 #else
2227 printf("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n",
2228 regs->eax, regs->ebx, regs->ecx, regs->edx);
2229 printf("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n",
2230 regs->edi, regs->esi, regs->ebp, regs->esp);
2231 printf("%s: guest cr0: %lx\n", __func__,
2232 v->arch.hvm_svm.cpu_shadow_cr0);
2233 printf("guest CR3 = %llx\n", vmcb->cr3);
2234 #endif
2235 printf("%s: pt = %lx\n", __func__, pt);
2239 void svm_dump_host_regs(const char *from)
2241 struct vcpu *v = current;
2242 unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
2243 unsigned long cr3, cr0;
2244 printf("Host registers at %s\n", from);
2246 __asm__ __volatile__ ("\tmov %%cr0,%0\n"
2247 "\tmov %%cr3,%1\n"
2248 : "=r" (cr0), "=r"(cr3));
2249 printf("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
2252 #ifdef SVM_EXTRA_DEBUG
2253 static char *exit_reasons[] = {
2254 [VMEXIT_CR0_READ] = "CR0_READ",
2255 [VMEXIT_CR1_READ] = "CR1_READ",
2256 [VMEXIT_CR2_READ] = "CR2_READ",
2257 [VMEXIT_CR3_READ] = "CR3_READ",
2258 [VMEXIT_CR4_READ] = "CR4_READ",
2259 [VMEXIT_CR5_READ] = "CR5_READ",
2260 [VMEXIT_CR6_READ] = "CR6_READ",
2261 [VMEXIT_CR7_READ] = "CR7_READ",
2262 [VMEXIT_CR8_READ] = "CR8_READ",
2263 [VMEXIT_CR9_READ] = "CR9_READ",
2264 [VMEXIT_CR10_READ] = "CR10_READ",
2265 [VMEXIT_CR11_READ] = "CR11_READ",
2266 [VMEXIT_CR12_READ] = "CR12_READ",
2267 [VMEXIT_CR13_READ] = "CR13_READ",
2268 [VMEXIT_CR14_READ] = "CR14_READ",
2269 [VMEXIT_CR15_READ] = "CR15_READ",
2270 [VMEXIT_CR0_WRITE] = "CR0_WRITE",
2271 [VMEXIT_CR1_WRITE] = "CR1_WRITE",
2272 [VMEXIT_CR2_WRITE] = "CR2_WRITE",
2273 [VMEXIT_CR3_WRITE] = "CR3_WRITE",
2274 [VMEXIT_CR4_WRITE] = "CR4_WRITE",
2275 [VMEXIT_CR5_WRITE] = "CR5_WRITE",
2276 [VMEXIT_CR6_WRITE] = "CR6_WRITE",
2277 [VMEXIT_CR7_WRITE] = "CR7_WRITE",
2278 [VMEXIT_CR8_WRITE] = "CR8_WRITE",
2279 [VMEXIT_CR9_WRITE] = "CR9_WRITE",
2280 [VMEXIT_CR10_WRITE] = "CR10_WRITE",
2281 [VMEXIT_CR11_WRITE] = "CR11_WRITE",
2282 [VMEXIT_CR12_WRITE] = "CR12_WRITE",
2283 [VMEXIT_CR13_WRITE] = "CR13_WRITE",
2284 [VMEXIT_CR14_WRITE] = "CR14_WRITE",
2285 [VMEXIT_CR15_WRITE] = "CR15_WRITE",
2286 [VMEXIT_DR0_READ] = "DR0_READ",
2287 [VMEXIT_DR1_READ] = "DR1_READ",
2288 [VMEXIT_DR2_READ] = "DR2_READ",
2289 [VMEXIT_DR3_READ] = "DR3_READ",
2290 [VMEXIT_DR4_READ] = "DR4_READ",
2291 [VMEXIT_DR5_READ] = "DR5_READ",
2292 [VMEXIT_DR6_READ] = "DR6_READ",
2293 [VMEXIT_DR7_READ] = "DR7_READ",
2294 [VMEXIT_DR8_READ] = "DR8_READ",
2295 [VMEXIT_DR9_READ] = "DR9_READ",
2296 [VMEXIT_DR10_READ] = "DR10_READ",
2297 [VMEXIT_DR11_READ] = "DR11_READ",
2298 [VMEXIT_DR12_READ] = "DR12_READ",
2299 [VMEXIT_DR13_READ] = "DR13_READ",
2300 [VMEXIT_DR14_READ] = "DR14_READ",
2301 [VMEXIT_DR15_READ] = "DR15_READ",
2302 [VMEXIT_DR0_WRITE] = "DR0_WRITE",
2303 [VMEXIT_DR1_WRITE] = "DR1_WRITE",
2304 [VMEXIT_DR2_WRITE] = "DR2_WRITE",
2305 [VMEXIT_DR3_WRITE] = "DR3_WRITE",
2306 [VMEXIT_DR4_WRITE] = "DR4_WRITE",
2307 [VMEXIT_DR5_WRITE] = "DR5_WRITE",
2308 [VMEXIT_DR6_WRITE] = "DR6_WRITE",
2309 [VMEXIT_DR7_WRITE] = "DR7_WRITE",
2310 [VMEXIT_DR8_WRITE] = "DR8_WRITE",
2311 [VMEXIT_DR9_WRITE] = "DR9_WRITE",
2312 [VMEXIT_DR10_WRITE] = "DR10_WRITE",
2313 [VMEXIT_DR11_WRITE] = "DR11_WRITE",
2314 [VMEXIT_DR12_WRITE] = "DR12_WRITE",
2315 [VMEXIT_DR13_WRITE] = "DR13_WRITE",
2316 [VMEXIT_DR14_WRITE] = "DR14_WRITE",
2317 [VMEXIT_DR15_WRITE] = "DR15_WRITE",
2318 [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
2319 [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
2320 [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
2321 [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
2322 [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
2323 [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
2324 [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
2325 [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
2326 [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
2327 [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
2328 [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
2329 [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
2330 [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
2331 [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
2332 [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
2333 [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
2334 [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
2335 [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
2336 [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
2337 [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
2338 [VMEXIT_INTR] = "INTR",
2339 [VMEXIT_NMI] = "NMI",
2340 [VMEXIT_SMI] = "SMI",
2341 [VMEXIT_INIT] = "INIT",
2342 [VMEXIT_VINTR] = "VINTR",
2343 [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
2344 [VMEXIT_IDTR_READ] = "IDTR_READ",
2345 [VMEXIT_GDTR_READ] = "GDTR_READ",
2346 [VMEXIT_LDTR_READ] = "LDTR_READ",
2347 [VMEXIT_TR_READ] = "TR_READ",
2348 [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
2349 [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
2350 [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
2351 [VMEXIT_TR_WRITE] = "TR_WRITE",
2352 [VMEXIT_RDTSC] = "RDTSC",
2353 [VMEXIT_RDPMC] = "RDPMC",
2354 [VMEXIT_PUSHF] = "PUSHF",
2355 [VMEXIT_POPF] = "POPF",
2356 [VMEXIT_CPUID] = "CPUID",
2357 [VMEXIT_RSM] = "RSM",
2358 [VMEXIT_IRET] = "IRET",
2359 [VMEXIT_SWINT] = "SWINT",
2360 [VMEXIT_INVD] = "INVD",
2361 [VMEXIT_PAUSE] = "PAUSE",
2362 [VMEXIT_HLT] = "HLT",
2363 [VMEXIT_INVLPG] = "INVLPG",
2364 [VMEXIT_INVLPGA] = "INVLPGA",
2365 [VMEXIT_IOIO] = "IOIO",
2366 [VMEXIT_MSR] = "MSR",
2367 [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
2368 [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
2369 [VMEXIT_SHUTDOWN] = "SHUTDOWN",
2370 [VMEXIT_VMRUN] = "VMRUN",
2371 [VMEXIT_VMMCALL] = "VMMCALL",
2372 [VMEXIT_VMLOAD] = "VMLOAD",
2373 [VMEXIT_VMSAVE] = "VMSAVE",
2374 [VMEXIT_STGI] = "STGI",
2375 [VMEXIT_CLGI] = "CLGI",
2376 [VMEXIT_SKINIT] = "SKINIT",
2377 [VMEXIT_RDTSCP] = "RDTSCP",
2378 [VMEXIT_ICEBP] = "ICEBP",
2379 [VMEXIT_NPF] = "NPF"
2380 };
2381 #endif /* SVM_EXTRA_DEBUG */
2383 #ifdef SVM_WALK_GUEST_PAGES
2384 void walk_shadow_and_guest_pt(unsigned long gva)
2386 l2_pgentry_t gpde;
2387 l2_pgentry_t spde;
2388 l1_pgentry_t gpte;
2389 l1_pgentry_t spte;
2390 struct vcpu *v = current;
2391 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2392 unsigned long gpa;
2394 gpa = gva_to_gpa( gva );
2395 printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
2396 if( !svm_paging_enabled(v) || mmio_space(gpa) )
2397 return;
2399 /* let's dump the guest and shadow page info */
2401 __guest_get_l2e(v, gva, &gpde);
2402 printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
2403 __shadow_get_l2e( v, gva, &spde );
2404 printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
2406 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2407 return;
2409 spte = l1e_empty();
2411 // This is actually overkill - we only need to make sure the hl2 is in-sync.
2412 shadow_sync_va(v, gva);
2414 gpte.l1 = 0;
2415 __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ], sizeof(gpte) );
2416 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
2417 __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
2418 sizeof(spte) );
2419 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
2421 #endif /* SVM_WALK_GUEST_PAGES */
2423 asmlinkage void svm_vmexit_handler(struct cpu_user_regs regs)
2425 unsigned int exit_reason;
2426 unsigned long eip;
2427 struct vcpu *v = current;
2428 int error;
2429 int do_debug = 0;
2430 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2432 ASSERT(vmcb);
2434 exit_reason = vmcb->exitcode;
2435 save_svm_cpu_user_regs(v, &regs);
2436 v->arch.hvm_svm.injecting_event = 0;
2438 vmcb->tlb_control = 1;
2440 #ifdef SVM_EXTRA_DEBUG
2442 #if defined(__i386__)
2443 #define rip eip
2444 #endif
2446 static unsigned long intercepts_counter = 0;
2448 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF)
2450 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2452 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx, gpa=%llx\n",
2453 intercepts_counter,
2454 exit_reasons[exit_reason], exit_reason, regs.cs,
2455 (unsigned long long) regs.rip,
2456 (unsigned long long) vmcb->exitinfo1,
2457 (unsigned long long) vmcb->exitinfo2,
2458 (unsigned long long) vmcb->exitintinfo.bytes,
2459 (unsigned long long) gva_to_gpa( vmcb->exitinfo2 ) );
2461 else
2463 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2464 intercepts_counter,
2465 exit_reasons[exit_reason], exit_reason, regs.cs,
2466 (unsigned long long) regs.rip,
2467 (unsigned long long) vmcb->exitinfo1,
2468 (unsigned long long) vmcb->exitinfo2,
2469 (unsigned long long) vmcb->exitintinfo.bytes );
2472 else if ( svm_dbg_on
2473 && exit_reason != VMEXIT_IOIO
2474 && exit_reason != VMEXIT_INTR)
2477 if (exit_reasons[exit_reason])
2479 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2480 intercepts_counter,
2481 exit_reasons[exit_reason], exit_reason, regs.cs,
2482 (unsigned long long) regs.rip,
2483 (unsigned long long) vmcb->exitinfo1,
2484 (unsigned long long) vmcb->exitinfo2,
2485 (unsigned long long) vmcb->exitintinfo.bytes);
2487 else
2489 printk("I%08ld,ExC=%d(0x%x),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2490 intercepts_counter, exit_reason, exit_reason, regs.cs,
2491 (unsigned long long) regs.rip,
2492 (unsigned long long) vmcb->exitinfo1,
2493 (unsigned long long) vmcb->exitinfo2,
2494 (unsigned long long) vmcb->exitintinfo.bytes);
2498 #ifdef SVM_WALK_GUEST_PAGES
2499 if( exit_reason == VMEXIT_EXCEPTION_PF
2500 && ( ( vmcb->exitinfo2 == vmcb->rip )
2501 || vmcb->exitintinfo.bytes) )
2503 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2504 walk_shadow_and_guest_pt( vmcb->exitinfo2 );
2506 #endif
2508 intercepts_counter++;
2510 #if 0
2511 if (svm_dbg_on)
2512 do_debug = svm_do_debugout(exit_reason);
2513 #endif
2515 if (do_debug)
2517 printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
2518 "shadow_table = 0x%08x\n",
2519 __func__,
2520 (int) v->arch.guest_table.pfn,
2521 (int) v->arch.monitor_table.pfn,
2522 (int) v->arch.shadow_table.pfn);
2524 svm_dump_vmcb(__func__, vmcb);
2525 svm_dump_regs(__func__, &regs);
2526 svm_dump_inst(svm_rip2pointer(vmcb));
2529 #if defined(__i386__)
2530 #undef rip
2531 #endif
2534 #endif /* SVM_EXTRA_DEBUG */
2536 if (exit_reason == -1)
2538 printk("%s: exit_reason == -1 - Did someone clobber the VMCB\n",
2539 __func__);
2540 BUG();
2541 domain_crash_synchronous();
2544 perfc_incra(vmexits, exit_reason);
2545 eip = vmcb->rip;
2547 #ifdef SVM_EXTRA_DEBUG
2548 if (do_debug)
2550 printk("eip = %lx, exit_reason = %d (0x%x)\n",
2551 eip, exit_reason, exit_reason);
2553 #endif /* SVM_EXTRA_DEBUG */
2555 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
2557 switch (exit_reason)
2559 case VMEXIT_EXCEPTION_DB:
2561 #ifdef XEN_DEBUGGER
2562 svm_debug_save_cpu_user_regs(&regs);
2563 pdb_handle_exception(1, &regs, 1);
2564 svm_debug_restore_cpu_user_regs(&regs);
2565 #else
2566 svm_store_cpu_user_regs(&regs, v);
2567 domain_pause_for_debugger();
2568 #endif
2570 break;
2572 case VMEXIT_NMI:
2573 do_nmi(&regs, 0);
2574 break;
2576 case VMEXIT_SMI:
2577 /*
2578 * For asynchronous SMI's, we just need to allow global interrupts
2579 * so that the SMI is taken properly in the context of the host. The
2580 * standard code does a STGI after the VMEXIT which should accomplish
2581 * this task. Continue as normal and restart the guest.
2582 */
2583 break;
2585 case VMEXIT_INIT:
2586 /*
2587 * Nothing to do, in fact we should never get to this point.
2588 */
2589 break;
2591 #ifdef XEN_DEBUGGER
2592 case VMEXIT_EXCEPTION_BP:
2593 svm_debug_save_cpu_user_regs(&regs);
2594 pdb_handle_exception(3, &regs, 1);
2595 svm_debug_restore_cpu_user_regs(&regs);
2596 break;
2597 #endif
2599 case VMEXIT_EXCEPTION_NM:
2600 svm_do_no_device_fault(vmcb);
2601 break;
2603 case VMEXIT_EXCEPTION_GP:
2604 /* This should probably not be trapped in the future */
2605 regs.error_code = vmcb->exitinfo1;
2606 v->arch.hvm_svm.injecting_event = 1;
2607 svm_do_general_protection_fault(v, &regs);
2608 break;
2610 case VMEXIT_EXCEPTION_PF:
2612 unsigned long va;
2613 va = vmcb->exitinfo2;
2614 regs.error_code = vmcb->exitinfo1;
2615 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2616 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2617 (unsigned long)regs.eax, (unsigned long)regs.ebx,
2618 (unsigned long)regs.ecx, (unsigned long)regs.edx,
2619 (unsigned long)regs.esi, (unsigned long)regs.edi);
2621 v->arch.hvm_vcpu.mmio_op.inst_decoder_regs = &regs;
2623 //printk("PF1\n");
2624 if (!(error = svm_do_page_fault(va, &regs)))
2626 v->arch.hvm_svm.injecting_event = 1;
2627 /* Inject #PG using Interruption-Information Fields */
2628 svm_inject_exception(vmcb, TRAP_page_fault, 1, regs.error_code);
2630 v->arch.hvm_svm.cpu_cr2 = va;
2631 vmcb->cr2 = va;
2632 TRACE_3D(TRC_VMX_INT, v->domain->domain_id,
2633 VMEXIT_EXCEPTION_PF, va);
2635 break;
2638 case VMEXIT_EXCEPTION_DF:
2639 printk("Guest double fault");
2640 BUG();
2641 break;
2643 case VMEXIT_INTR:
2644 raise_softirq(SCHEDULE_SOFTIRQ);
2645 break;
2647 case VMEXIT_GDTR_WRITE:
2648 printk("WRITE to GDTR\n");
2649 break;
2651 case VMEXIT_TASK_SWITCH:
2652 __hvm_bug(&regs);
2653 break;
2655 case VMEXIT_CPUID:
2656 svm_vmexit_do_cpuid(vmcb, regs.eax, &regs);
2657 break;
2659 case VMEXIT_HLT:
2660 svm_vmexit_do_hlt(vmcb);
2661 break;
2663 case VMEXIT_INVLPG:
2664 svm_handle_invlpg(0, &regs);
2665 break;
2667 case VMEXIT_INVLPGA:
2668 svm_handle_invlpg(1, &regs);
2669 break;
2671 case VMEXIT_VMMCALL:
2672 svm_do_vmmcall(v, &regs);
2673 break;
2675 case VMEXIT_CR0_READ:
2676 svm_cr_access(v, 0, TYPE_MOV_FROM_CR, &regs);
2677 break;
2679 case VMEXIT_CR2_READ:
2680 svm_cr_access(v, 2, TYPE_MOV_FROM_CR, &regs);
2681 break;
2683 case VMEXIT_CR3_READ:
2684 svm_cr_access(v, 3, TYPE_MOV_FROM_CR, &regs);
2685 break;
2687 case VMEXIT_CR4_READ:
2688 svm_cr_access(v, 4, TYPE_MOV_FROM_CR, &regs);
2689 break;
2691 case VMEXIT_CR8_READ:
2692 svm_cr_access(v, 8, TYPE_MOV_FROM_CR, &regs);
2693 break;
2695 case VMEXIT_CR0_WRITE:
2696 svm_cr_access(v, 0, TYPE_MOV_TO_CR, &regs);
2697 break;
2699 case VMEXIT_CR2_WRITE:
2700 svm_cr_access(v, 2, TYPE_MOV_TO_CR, &regs);
2701 break;
2703 case VMEXIT_CR3_WRITE:
2704 svm_cr_access(v, 3, TYPE_MOV_TO_CR, &regs);
2705 local_flush_tlb();
2706 break;
2708 case VMEXIT_CR4_WRITE:
2709 svm_cr_access(v, 4, TYPE_MOV_TO_CR, &regs);
2710 break;
2712 case VMEXIT_CR8_WRITE:
2713 svm_cr_access(v, 8, TYPE_MOV_TO_CR, &regs);
2714 break;
2716 case VMEXIT_DR0_READ:
2717 svm_dr_access(v, 0, TYPE_MOV_FROM_DR, &regs);
2718 break;
2720 case VMEXIT_DR1_READ:
2721 svm_dr_access(v, 1, TYPE_MOV_FROM_DR, &regs);
2722 break;
2724 case VMEXIT_DR2_READ:
2725 svm_dr_access(v, 2, TYPE_MOV_FROM_DR, &regs);
2726 break;
2728 case VMEXIT_DR3_READ:
2729 svm_dr_access(v, 3, TYPE_MOV_FROM_DR, &regs);
2730 break;
2732 case VMEXIT_DR6_READ:
2733 svm_dr_access(v, 6, TYPE_MOV_FROM_DR, &regs);
2734 break;
2736 case VMEXIT_DR7_READ:
2737 svm_dr_access(v, 7, TYPE_MOV_FROM_DR, &regs);
2738 break;
2740 case VMEXIT_DR0_WRITE:
2741 svm_dr_access(v, 0, TYPE_MOV_TO_DR, &regs);
2742 break;
2744 case VMEXIT_DR1_WRITE:
2745 svm_dr_access(v, 1, TYPE_MOV_TO_DR, &regs);
2746 break;
2748 case VMEXIT_DR2_WRITE:
2749 svm_dr_access(v, 2, TYPE_MOV_TO_DR, &regs);
2750 break;
2752 case VMEXIT_DR3_WRITE:
2753 svm_dr_access(v, 3, TYPE_MOV_TO_DR, &regs);
2754 break;
2756 case VMEXIT_DR6_WRITE:
2757 svm_dr_access(v, 6, TYPE_MOV_TO_DR, &regs);
2758 break;
2760 case VMEXIT_DR7_WRITE:
2761 svm_dr_access(v, 7, TYPE_MOV_TO_DR, &regs);
2762 break;
2764 case VMEXIT_IOIO:
2765 svm_io_instruction(v, &regs);
2766 break;
2768 case VMEXIT_MSR:
2769 svm_do_msr_access(v, &regs);
2770 break;
2772 case VMEXIT_SHUTDOWN:
2773 printk("Guest shutdown exit\n");
2774 domain_crash_synchronous();
2775 break;
2777 default:
2778 printk("unexpected VMEXIT: exit reason = 0x%x, exitinfo1 = %llx, "
2779 "exitinfo2 = %llx\n", exit_reason,
2780 (unsigned long long)vmcb->exitinfo1,
2781 (unsigned long long)vmcb->exitinfo2);
2782 __hvm_bug(&regs); /* should not happen */
2783 break;
2786 #ifdef SVM_EXTRA_DEBUG
2787 if (do_debug)
2789 printk("%s: Done switch on vmexit_code\n", __func__);
2790 svm_dump_regs(__func__, &regs);
2793 if (do_debug)
2795 printk("vmexit_handler():- guest_table = 0x%08x, "
2796 "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
2797 (int)v->arch.guest_table.pfn,
2798 (int)v->arch.monitor_table.pfn,
2799 (int)v->arch.shadow_table.pfn);
2800 printk("svm_vmexit_handler: Returning\n");
2802 #endif
2804 return;
2807 asmlinkage void svm_load_cr2(void)
2809 struct vcpu *v = current;
2811 local_irq_disable();
2812 #ifdef __i386__
2813 asm volatile("movl %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
2814 #else
2815 asm volatile("movq %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
2816 #endif
2819 asmlinkage void svm_asid(void)
2821 struct vcpu *v = current;
2822 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2824 /*
2825 * if need to assign new asid, or if switching cores,
2826 * retire asid for the old core, and assign a new asid to the current core.
2827 */
2828 if ( test_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ) ||
2829 ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) {
2830 /* recycle asid */
2831 if ( !asidpool_assign_next( vmcb, 1,
2832 v->arch.hvm_svm.asid_core, v->arch.hvm_svm.launch_core )) {
2833 /* If we get here, we have a major problem */
2834 domain_crash_synchronous();
2837 v->arch.hvm_svm.asid_core = v->arch.hvm_svm.launch_core;
2838 clear_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags );
2842 /*
2843 * Local variables:
2844 * mode: C
2845 * c-set-style: "BSD"
2846 * c-basic-offset: 4
2847 * tab-width: 4
2848 * indent-tabs-mode: nil
2849 * End:
2850 */