direct-io.hg

view xen/arch/x86/hvm/svm/svm.c @ 9698:9c313ff7a0ed

There are instances where we DO NOT want an hvm guest to run an
MP enabled kernel. In such situations we should have a workaround to
guarantee hvm guests will not detect MP.

For example, in the absence of ACPI and MPS the installation code in some
linux distributions key off the presence of cpuid edx/HTT bit (indicating
the presence of Hyper-Threading Technology) to determine if another
logical processor is present and if so load an MP enabled kernel instead
of a uniprocessor kernel. SMBIOS is also looked at for the same purpose
and presents a potential problem as well. While both approaches for
selecting an MP kernel are debatable (since using MPS or ACPI have long
been the standard for MP detection), these approaches are something we
have to live and work around with because making a change in the fully
virtualized guest is not an option.

To solve the problem we need to hide all secondary processors from the hvm
guest. Since the hvm does not surface MPS tables, we only need to deal
with ACPI, cpuid HTT, and possibly SMBIOS. (I did not have time right
now to look closely at the hvm BIOS to know if SMBIOS is also going to be
a problem.)

Also fixes a logic problem the code path where apic=0 was not
being handled correctly (vmx path only).

Signed-off-by: Clyde Griffin <cgriffin@novell.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Apr 21 09:56:50 2006 +0100 (2006-04-21)
parents 5765497cf75e
children e1a47a269600
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005, AMD Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <asm/current.h>
29 #include <asm/io.h>
30 #include <asm/shadow.h>
31 #include <asm/regs.h>
32 #include <asm/cpufeature.h>
33 #include <asm/processor.h>
34 #include <asm/types.h>
35 #include <asm/msr.h>
36 #include <asm/spinlock.h>
37 #include <asm/hvm/hvm.h>
38 #include <asm/hvm/support.h>
39 #include <asm/hvm/io.h>
40 #include <asm/hvm/svm/svm.h>
41 #include <asm/hvm/svm/vmcb.h>
42 #include <asm/hvm/svm/emulate.h>
43 #include <asm/hvm/svm/vmmcall.h>
44 #include <asm/hvm/svm/intr.h>
45 #include <asm/shadow.h>
46 #if CONFIG_PAGING_LEVELS >= 3
47 #include <asm/shadow_64.h>
48 #endif
49 #include <public/sched.h>
50 #include <public/hvm/ioreq.h>
52 #define SVM_EXTRA_DEBUG
54 #ifdef TRACE_BUFFER
55 static unsigned long trace_values[NR_CPUS][4];
56 #define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
57 #else
58 #define TRACE_VMEXIT(index,value) ((void)0)
59 #endif
61 /* Useful define */
62 #define MAX_INST_SIZE 15
64 /*
65 * External functions, etc. We should move these to some suitable header file(s) */
67 extern void do_nmi(struct cpu_user_regs *, unsigned long);
68 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
69 int inst_len);
70 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
71 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
72 unsigned long count, int size, long value, int dir, int pvalid);
73 extern int svm_instrlen(struct cpu_user_regs *regs, int mode);
74 extern void svm_dump_inst(unsigned long eip);
75 extern int svm_dbg_on;
76 void svm_manual_event_injection32(struct vcpu *v, struct cpu_user_regs *regs,
77 int vector, int has_code);
78 void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
80 static void svm_relinquish_guest_resources(struct domain *d);
82 static struct asid_pool ASIDpool[NR_CPUS];
84 /*
85 * Initializes the POOL of ASID used by the guests per core.
86 */
87 void asidpool_init( int core )
88 {
89 int i;
90 ASIDpool[core].asid_lock = SPIN_LOCK_UNLOCKED;
91 spin_lock(&ASIDpool[core].asid_lock);
92 /* Host ASID is always in use */
93 ASIDpool[core].asid[INITIAL_ASID] = ASID_INUSE;
94 for( i=1; i<ASID_MAX; i++ )
95 {
96 ASIDpool[core].asid[i] = ASID_AVAILABLE;
97 }
98 spin_unlock(&ASIDpool[core].asid_lock);
99 }
102 /* internal function to get the next available ASID */
103 static int asidpool_fetch_next( struct vmcb_struct *vmcb, int core )
104 {
105 int i;
106 for( i = 1; i < ASID_MAX; i++ )
107 {
108 if( ASIDpool[core].asid[i] == ASID_AVAILABLE )
109 {
110 vmcb->guest_asid = i;
111 ASIDpool[core].asid[i] = ASID_INUSE;
112 return i;
113 }
114 }
115 return -1;
116 }
119 /*
120 * This functions assigns on the passed VMCB, the next
121 * available ASID number. If none are available, the
122 * TLB flush flag is set, and all retireds ASID
123 * are made available.
124 *
125 * Returns: 1 -- sucess;
126 * 0 -- failure -- no more ASID numbers
127 * available.
128 */
129 int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
130 int oldcore, int newcore )
131 {
132 int i;
133 int res = 1;
134 static unsigned long cnt=0;
136 spin_lock(&ASIDpool[oldcore].asid_lock);
137 if( retire_current && vmcb->guest_asid ) {
138 ASIDpool[oldcore].asid[ vmcb->guest_asid & (ASID_MAX-1) ] = ASID_RETIRED;
139 }
140 spin_unlock(&ASIDpool[oldcore].asid_lock);
141 spin_lock(&ASIDpool[newcore].asid_lock);
142 if( asidpool_fetch_next( vmcb, newcore ) < 0 ) {
143 if (svm_dbg_on)
144 printk( "SVM: tlb(%ld)\n", cnt++ );
145 /* FLUSH the TLB and all retired slots are made available */
146 vmcb->tlb_control = 1;
147 for( i = 1; i < ASID_MAX; i++ ) {
148 if( ASIDpool[newcore].asid[i] == ASID_RETIRED ) {
149 ASIDpool[newcore].asid[i] = ASID_AVAILABLE;
150 }
151 }
152 /* Get the First slot available */
153 res = asidpool_fetch_next( vmcb, newcore ) > 0;
154 }
155 spin_unlock(&ASIDpool[newcore].asid_lock);
156 return res;
157 }
159 void asidpool_retire( struct vmcb_struct *vmcb, int core )
160 {
161 spin_lock(&ASIDpool[core].asid_lock);
162 if( vmcb->guest_asid ) {
163 ASIDpool[core].asid[ vmcb->guest_asid & (ASID_MAX-1) ] = ASID_RETIRED;
164 }
165 spin_unlock(&ASIDpool[core].asid_lock);
166 }
168 static inline void svm_inject_exception(struct vmcb_struct *vmcb,
169 int trap, int ev, int error_code)
170 {
171 eventinj_t event;
173 event.bytes = 0;
174 event.fields.v = 1;
175 event.fields.type = EVENTTYPE_EXCEPTION;
176 event.fields.vector = trap;
177 event.fields.ev = ev;
178 event.fields.errorcode = error_code;
180 ASSERT(vmcb->eventinj.fields.v == 0);
182 vmcb->eventinj = event;
183 }
185 void stop_svm(void)
186 {
187 u32 eax, edx;
189 /* We turn off the EFER_SVME bit. */
190 rdmsr(MSR_EFER, eax, edx);
191 eax &= ~EFER_SVME;
192 wrmsr(MSR_EFER, eax, edx);
194 printk("AMD SVM Extension is disabled.\n");
195 }
197 int svm_initialize_guest_resources(struct vcpu *v)
198 {
199 svm_final_setup_guest(v);
200 return 1;
201 }
203 static void svm_store_cpu_guest_regs(
204 struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
205 {
206 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
208 if ( regs != NULL )
209 {
210 #if defined (__x86_64__)
211 regs->rip = vmcb->rip;
212 regs->rsp = vmcb->rsp;
213 regs->rflags = vmcb->rflags;
214 regs->cs = vmcb->cs.sel;
215 regs->ds = vmcb->ds.sel;
216 regs->es = vmcb->es.sel;
217 regs->ss = vmcb->ss.sel;
218 regs->gs = vmcb->gs.sel;
219 regs->fs = vmcb->fs.sel;
220 #elif defined (__i386__)
221 regs->eip = vmcb->rip;
222 regs->esp = vmcb->rsp;
223 regs->eflags = vmcb->rflags;
224 regs->cs = vmcb->cs.sel;
225 regs->ds = vmcb->ds.sel;
226 regs->es = vmcb->es.sel;
227 regs->ss = vmcb->ss.sel;
228 regs->gs = vmcb->gs.sel;
229 regs->fs = vmcb->fs.sel;
230 #endif
231 }
233 if ( crs != NULL )
234 {
235 crs[0] = vmcb->cr0;
236 crs[3] = vmcb->cr3;
237 crs[4] = vmcb->cr4;
238 }
239 }
241 static void svm_load_cpu_guest_regs(
242 struct vcpu *v, struct cpu_user_regs *regs)
243 {
244 svm_load_cpu_user_regs(v, regs);
245 }
247 #define IS_CANO_ADDRESS(add) 1
249 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
250 {
251 u64 msr_content = 0;
252 struct vcpu *vc = current;
253 // struct svm_msr_state *msr = &vc->arch.hvm_svm.msr_content;
254 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
256 switch (regs->ecx)
257 {
258 case MSR_EFER:
259 // msr_content = msr->msr_items[SVM_INDEX_MSR_EFER];
260 msr_content = vmcb->efer;
261 msr_content &= ~EFER_SVME;
262 break;
264 case MSR_FS_BASE:
265 msr_content = vmcb->fs.base;
266 break;
268 case MSR_GS_BASE:
269 msr_content = vmcb->gs.base;
270 break;
272 case MSR_SHADOW_GS_BASE:
273 msr_content = vmcb->kerngsbase;
274 break;
276 case MSR_STAR:
277 msr_content = vmcb->star;
278 break;
280 case MSR_LSTAR:
281 msr_content = vmcb->lstar;
282 break;
284 case MSR_CSTAR:
285 msr_content = vmcb->cstar;
286 break;
288 case MSR_SYSCALL_MASK:
289 msr_content = vmcb->sfmask;
290 break;
292 default:
293 return 0;
294 }
296 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n",
297 msr_content);
299 regs->eax = msr_content & 0xffffffff;
300 regs->edx = msr_content >> 32;
301 return 1;
302 }
304 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
305 {
306 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
307 struct vcpu *vc = current;
308 struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
310 HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx "
311 "msr_content %"PRIx64"\n",
312 (unsigned long)regs->ecx, msr_content);
314 switch (regs->ecx)
315 {
316 case MSR_EFER:
317 #ifdef __x86_64__
318 /* offending reserved bit will cause #GP */
319 if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
320 {
321 printk("trying to set reserved bit in EFER\n");
322 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
323 return 0;
324 }
326 /* LME: 0 -> 1 */
327 if ( msr_content & EFER_LME &&
328 !test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state) )
329 {
330 if ( svm_paging_enabled(vc) ||
331 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
332 &vc->arch.hvm_svm.cpu_state) )
333 {
334 printk("trying to set LME bit when "
335 "in paging mode or PAE bit is not set\n");
336 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
337 return 0;
338 }
339 set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);
340 }
342 /* We have already recorded that we want LME, so it will be set
343 * next time CR0 gets updated. So we clear that bit and continue.
344 */
345 if ((msr_content ^ vmcb->efer) & EFER_LME)
346 msr_content &= ~EFER_LME;
347 /* No update for LME/LMA since it have no effect */
348 #endif
349 vmcb->efer = msr_content | EFER_SVME;
350 break;
352 case MSR_FS_BASE:
353 case MSR_GS_BASE:
354 if (!(SVM_LONG_GUEST(vc)))
355 domain_crash_synchronous();
357 if (!IS_CANO_ADDRESS(msr_content))
358 {
359 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
360 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
361 }
363 if (regs->ecx == MSR_FS_BASE)
364 vmcb->fs.base = msr_content;
365 else
366 vmcb->gs.base = msr_content;
367 break;
369 case MSR_SHADOW_GS_BASE:
370 vmcb->kerngsbase = msr_content;
371 break;
373 case MSR_STAR:
374 vmcb->star = msr_content;
375 break;
377 case MSR_LSTAR:
378 vmcb->lstar = msr_content;
379 break;
381 case MSR_CSTAR:
382 vmcb->cstar = msr_content;
383 break;
385 case MSR_SYSCALL_MASK:
386 vmcb->sfmask = msr_content;
387 break;
389 default:
390 return 0;
391 }
392 return 1;
393 }
395 int svm_realmode(struct vcpu *v)
396 {
397 unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
398 unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
400 return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
401 }
403 int svm_instruction_length(struct vcpu *v)
404 {
405 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
406 unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode;
407 /* check which operating mode the guest is running */
408 if( vmcb->efer & EFER_LMA )
409 mode = vmcb->cs.attributes.fields.l ? 8 : 4;
410 else
411 mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
412 return svm_instrlen(guest_cpu_user_regs(), mode);
413 }
415 unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
416 {
417 switch ( num )
418 {
419 case 0:
420 return v->arch.hvm_svm.cpu_shadow_cr0;
421 case 2:
422 return v->arch.hvm_svm.cpu_cr2;
423 case 3:
424 return v->arch.hvm_svm.cpu_cr3;
425 default:
426 BUG();
427 }
428 return 0; /* dummy */
429 }
431 int start_svm(void)
432 {
433 u32 eax, ecx, edx;
435 /* Xen does not fill x86_capability words except 0. */
436 ecx = cpuid_ecx(0x80000001);
437 boot_cpu_data.x86_capability[5] = ecx;
439 if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
440 return 0;
442 rdmsr(MSR_EFER, eax, edx);
443 eax |= EFER_SVME;
444 wrmsr(MSR_EFER, eax, edx);
445 asidpool_init(smp_processor_id());
446 printk("AMD SVM Extension is enabled for cpu %d.\n", smp_processor_id());
448 /* Setup HVM interfaces */
449 hvm_funcs.disable = stop_svm;
451 hvm_funcs.initialize_guest_resources = svm_initialize_guest_resources;
452 hvm_funcs.relinquish_guest_resources = svm_relinquish_guest_resources;
454 hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
455 hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
457 hvm_funcs.realmode = svm_realmode;
458 hvm_funcs.paging_enabled = svm_paging_enabled;
459 hvm_funcs.instruction_length = svm_instruction_length;
460 hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
462 hvm_enabled = 1;
464 return 1;
465 }
467 int svm_dbg_on = 0;
469 static inline int svm_do_debugout(unsigned long exit_code)
470 {
471 int i;
473 static unsigned long counter = 0;
474 static unsigned long works[] =
475 {
476 VMEXIT_IOIO,
477 VMEXIT_HLT,
478 VMEXIT_CPUID,
479 VMEXIT_DR0_READ,
480 VMEXIT_DR1_READ,
481 VMEXIT_DR2_READ,
482 VMEXIT_DR3_READ,
483 VMEXIT_DR6_READ,
484 VMEXIT_DR7_READ,
485 VMEXIT_DR0_WRITE,
486 VMEXIT_DR1_WRITE,
487 VMEXIT_DR2_WRITE,
488 VMEXIT_DR3_WRITE,
489 VMEXIT_CR0_READ,
490 VMEXIT_CR0_WRITE,
491 VMEXIT_CR3_READ,
492 VMEXIT_CR4_READ,
493 VMEXIT_MSR,
494 VMEXIT_CR0_WRITE,
495 VMEXIT_CR3_WRITE,
496 VMEXIT_CR4_WRITE,
497 VMEXIT_EXCEPTION_PF,
498 VMEXIT_INTR,
499 VMEXIT_INVLPG,
500 VMEXIT_EXCEPTION_NM
501 };
504 #if 0
505 if (svm_dbg_on && exit_code != 0x7B)
506 return 1;
507 #endif
509 counter++;
511 #if 0
512 if ((exit_code == 0x4E
513 || exit_code == VMEXIT_CR0_READ
514 || exit_code == VMEXIT_CR0_WRITE)
515 && counter < 200000)
516 return 0;
518 if ((exit_code == 0x4E) && counter < 500000)
519 return 0;
520 #endif
522 for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
523 if (exit_code == works[i])
524 return 0;
526 return 1;
527 }
530 void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
531 {
532 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
534 ASSERT(vmcb);
536 ctxt->eax = vmcb->rax;
537 ctxt->ss = vmcb->ss.sel;
538 ctxt->esp = vmcb->rsp;
539 ctxt->eflags = vmcb->rflags;
540 ctxt->cs = vmcb->cs.sel;
541 ctxt->eip = vmcb->rip;
543 ctxt->gs = vmcb->gs.sel;
544 ctxt->fs = vmcb->fs.sel;
545 ctxt->es = vmcb->es.sel;
546 ctxt->ds = vmcb->ds.sel;
547 }
549 #if defined (__x86_64__)
550 void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v )
551 {
552 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
554 regs->rip = vmcb->rip;
555 regs->rsp = vmcb->rsp;
556 regs->rflags = vmcb->rflags;
557 regs->cs = vmcb->cs.sel;
558 regs->ds = vmcb->ds.sel;
559 regs->es = vmcb->es.sel;
560 regs->ss = vmcb->ss.sel;
561 }
562 #elif defined (__i386__)
563 void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
564 {
565 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
567 regs->eip = vmcb->rip;
568 regs->esp = vmcb->rsp;
569 regs->eflags = vmcb->rflags;
570 regs->cs = vmcb->cs.sel;
571 regs->ds = vmcb->ds.sel;
572 regs->es = vmcb->es.sel;
573 regs->ss = vmcb->ss.sel;
574 }
575 #endif
577 /* XXX Use svm_load_cpu_guest_regs instead */
578 #if defined (__i386__)
579 void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
580 {
581 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
582 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
584 /* Write the guest register value into VMCB */
585 vmcb->rax = regs->eax;
586 vmcb->ss.sel = regs->ss;
587 vmcb->rsp = regs->esp;
588 vmcb->rflags = regs->eflags;
589 vmcb->cs.sel = regs->cs;
590 vmcb->rip = regs->eip;
591 if (regs->eflags & EF_TF)
592 *intercepts |= EXCEPTION_BITMAP_DB;
593 else
594 *intercepts &= ~EXCEPTION_BITMAP_DB;
595 }
596 #else /* (__i386__) */
597 void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
598 {
599 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
600 u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
602 /* Write the guest register value into VMCB */
603 vmcb->rax = regs->rax;
604 vmcb->ss.sel = regs->ss;
605 vmcb->rsp = regs->rsp;
606 vmcb->rflags = regs->rflags;
607 vmcb->cs.sel = regs->cs;
608 vmcb->rip = regs->rip;
609 if (regs->rflags & EF_TF)
610 *intercepts |= EXCEPTION_BITMAP_DB;
611 else
612 *intercepts &= ~EXCEPTION_BITMAP_DB;
613 }
614 #endif /* !(__i386__) */
616 int svm_paging_enabled(struct vcpu *v)
617 {
618 unsigned long cr0;
620 cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
622 return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
623 }
626 /* Make sure that xen intercepts any FP accesses from current */
627 void svm_stts(struct vcpu *v)
628 {
629 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
631 /* FPU state already dirty? Then no need to setup_fpu() lazily. */
632 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
633 return;
635 /*
636 * If the guest does not have TS enabled then we must cause and handle an
637 * exception on first use of the FPU. If the guest *does* have TS enabled
638 * then this is not necessary: no FPU activity can occur until the guest
639 * clears CR0.TS, and we will initialise the FPU when that happens.
640 */
641 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
642 {
643 v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
644 vmcb->cr0 |= X86_CR0_TS;
645 }
646 }
648 static void arch_svm_do_launch(struct vcpu *v)
649 {
650 cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
651 int error;
653 #if 0
654 if (svm_dbg_on)
655 printk("Do launch\n");
656 #endif
657 error = construct_vmcb(&v->arch.hvm_svm, regs);
658 if ( error < 0 )
659 {
660 if (v->vcpu_id == 0) {
661 printk("Failed to construct a new VMCB for BSP.\n");
662 } else {
663 printk("Failed to construct a new VMCB for AP %d\n", v->vcpu_id);
664 }
665 domain_crash_synchronous();
666 }
668 svm_do_launch(v);
669 #if 0
670 if (svm_dbg_on)
671 svm_dump_host_regs(__func__);
672 #endif
673 reset_stack_and_jump(svm_asm_do_launch);
674 }
676 static void svm_freeze_time(struct vcpu *v)
677 {
678 struct hvm_time_info *time_info = &v->domain->arch.hvm_domain.vpit.time_info;
680 if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) {
681 v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v);
682 time_info->count_advance += (NOW() - time_info->count_point);
683 stop_timer(&(time_info->pit_timer));
684 }
685 }
687 static void svm_ctxt_switch_from(struct vcpu *v)
688 {
689 svm_freeze_time(v);
690 }
692 static void svm_ctxt_switch_to(struct vcpu *v)
693 {
694 }
696 void svm_final_setup_guest(struct vcpu *v)
697 {
698 v->arch.schedule_tail = arch_svm_do_launch;
699 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
700 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
702 if (v == v->domain->vcpu[0])
703 {
704 struct domain *d = v->domain;
705 struct vcpu *vc;
707 /* Initialize monitor page table */
708 for_each_vcpu(d, vc)
709 vc->arch.monitor_table = mk_pagetable(0);
711 /*
712 * Required to do this once per domain
713 * TODO: add a seperate function to do these.
714 */
715 memset(&d->shared_info->evtchn_mask[0], 0xff,
716 sizeof(d->shared_info->evtchn_mask));
718 /*
719 * Put the domain in shadow mode even though we're going to be using
720 * the shared 1:1 page table initially. It shouldn't hurt
721 */
722 shadow_mode_enable(d,
723 SHM_enable|SHM_refcounts|
724 SHM_translate|SHM_external|SHM_wr_pt_pte);
725 }
726 }
729 static void svm_relinquish_guest_resources(struct domain *d)
730 {
731 extern void destroy_vmcb(struct arch_svm_struct *); /* XXX */
732 struct vcpu *v;
734 for_each_vcpu ( d, v )
735 {
736 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
737 continue;
738 #if 0
739 /* Memory leak by not freeing this. XXXKAF: *Why* is not per core?? */
740 free_host_save_area(v->arch.hvm_svm.host_save_area);
741 #endif
743 destroy_vmcb(&v->arch.hvm_svm);
744 free_monitor_pagetable(v);
745 kill_timer(&v->arch.hvm_svm.hlt_timer);
746 if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) )
747 {
748 kill_timer( &(VLAPIC(v)->vlapic_timer) );
749 xfree(VLAPIC(v));
750 }
751 }
753 kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
755 if ( d->arch.hvm_domain.shared_page_va )
756 unmap_domain_page_global(
757 (void *)d->arch.hvm_domain.shared_page_va);
759 shadow_direct_map_clean(d);
760 }
763 void arch_svm_do_resume(struct vcpu *v)
764 {
765 /* pinning VCPU to a different core? */
766 if ( v->arch.hvm_svm.launch_core == smp_processor_id()) {
767 svm_do_resume( v );
768 reset_stack_and_jump( svm_asm_do_resume );
769 }
770 else {
771 printk("VCPU core pinned: %d to %d\n",
772 v->arch.hvm_svm.launch_core, smp_processor_id() );
773 v->arch.hvm_svm.launch_core = smp_processor_id();
774 svm_migrate_timers( v );
775 svm_do_resume( v );
776 reset_stack_and_jump( svm_asm_do_resume );
777 }
778 }
781 void svm_migrate_timers(struct vcpu *v)
782 {
783 struct hvm_time_info *time_info = &v->domain->arch.hvm_domain.vpit.time_info;
785 migrate_timer(&time_info->pit_timer, v->processor);
786 migrate_timer(&v->arch.hvm_svm.hlt_timer, v->processor);
787 if ( hvm_apic_support(v->domain) && VLAPIC( v ))
788 migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
789 }
792 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
793 {
794 struct vcpu *v = current;
795 unsigned long eip;
796 unsigned long gpa; /* FIXME: PAE */
797 int result;
798 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
800 ASSERT(vmcb);
802 //#if HVM_DEBUG
803 eip = vmcb->rip;
804 HVM_DBG_LOG(DBG_LEVEL_VMMU,
805 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
806 va, eip, (unsigned long)regs->error_code);
807 //#endif
809 if ( !svm_paging_enabled(v) )
810 {
811 if ( shadow_direct_map_fault(va, regs) )
812 return 1;
814 handle_mmio(va, va);
815 TRACE_VMEXIT(2,2);
816 return 1;
817 }
820 gpa = gva_to_gpa(va);
822 /* Use 1:1 page table to identify MMIO address space */
823 if (mmio_space(gpa))
824 {
825 /* No support for APIC */
826 if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
827 {
828 int inst_len;
829 inst_len = svm_instruction_length(v);
830 if (inst_len == -1)
831 {
832 printf("%s: INST_LEN - Unable to decode properly.\n", __func__);
833 domain_crash_synchronous();
834 }
836 __update_guest_eip(vmcb, inst_len);
838 return 1;
839 }
841 TRACE_VMEXIT (2,2);
842 handle_mmio(va, gpa);
844 return 1;
845 }
847 result = shadow_fault(va, regs);
849 if( result ) {
850 /* Let's make sure that the Guest TLB is flushed */
851 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
852 }
854 TRACE_VMEXIT (2,result);
856 return result;
857 }
860 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
861 {
862 struct vcpu *v = current;
864 setup_fpu(v);
865 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
867 if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
868 vmcb->cr0 &= ~X86_CR0_TS;
869 }
872 static void svm_do_general_protection_fault(struct vcpu *v,
873 struct cpu_user_regs *regs)
874 {
875 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
876 unsigned long eip, error_code;
878 ASSERT(vmcb);
880 eip = vmcb->rip;
881 error_code = vmcb->exitinfo1;
883 if (vmcb->idtr.limit == 0) {
884 printf("Huh? We got a GP Fault with an invalid IDTR!\n");
885 svm_dump_vmcb(__func__, vmcb);
886 svm_dump_regs(__func__, regs);
887 svm_dump_inst(vmcb->rip);
888 __hvm_bug(regs);
889 }
891 HVM_DBG_LOG(DBG_LEVEL_1,
892 "svm_general_protection_fault: eip = %lx, erro_code = %lx",
893 eip, error_code);
895 HVM_DBG_LOG(DBG_LEVEL_1,
896 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
897 (unsigned long)regs->eax, (unsigned long)regs->ebx,
898 (unsigned long)regs->ecx, (unsigned long)regs->edx,
899 (unsigned long)regs->esi, (unsigned long)regs->edi);
902 /* Reflect it back into the guest */
903 svm_inject_exception(vmcb, TRAP_gp_fault, 1, error_code);
904 }
906 /* Reserved bits: [31:14], [12:1] */
907 #define SVM_VCPU_CPUID_L1_RESERVED 0xffffdffe
909 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input,
910 struct cpu_user_regs *regs)
911 {
912 unsigned int eax, ebx, ecx, edx;
913 unsigned long eip;
914 struct vcpu *v = current;
915 int inst_len;
917 ASSERT(vmcb);
919 eip = vmcb->rip;
921 HVM_DBG_LOG(DBG_LEVEL_1,
922 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
923 " (esi) %lx, (edi) %lx",
924 (unsigned long)regs->eax, (unsigned long)regs->ebx,
925 (unsigned long)regs->ecx, (unsigned long)regs->edx,
926 (unsigned long)regs->esi, (unsigned long)regs->edi);
928 cpuid(input, &eax, &ebx, &ecx, &edx);
930 if (input == 1)
931 {
932 if ( !hvm_apic_support(v->domain) ||
933 !vlapic_global_enabled((VLAPIC(v))) )
934 {
935 clear_bit(X86_FEATURE_APIC, &edx);
936 /* Since the apic is disabled, avoid any confusion about SMP cpus being available */
937 clear_bit(X86_FEATURE_HT, &edx); /* clear the hyperthread bit */
938 ebx &= 0xFF00FFFF; /* set the logical processor count to 1 */
939 ebx |= 0x00010000;
940 }
942 #if CONFIG_PAGING_LEVELS < 3
943 clear_bit(X86_FEATURE_NX, &edx);
944 clear_bit(X86_FEATURE_PAE, &edx);
945 clear_bit(X86_FEATURE_PSE, &edx);
946 clear_bit(X86_FEATURE_PSE36, &edx);
947 #else
948 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
949 {
950 if ( !v->domain->arch.hvm_domain.pae_enabled )
951 {
952 clear_bit(X86_FEATURE_PAE, &edx);
953 clear_bit(X86_FEATURE_NX, &edx);
954 }
955 clear_bit(X86_FEATURE_PSE, &edx);
956 clear_bit(X86_FEATURE_PSE36, &edx);
957 }
958 #endif
959 /* Clear out reserved bits. */
960 ecx &= ~SVM_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */
961 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
962 }
963 #ifdef __i386__
964 else if ( input == 0x80000001 )
965 {
966 /* Mask feature for Intel ia32e or AMD long mode. */
967 clear_bit(X86_FEATURE_LM & 31, &edx);
968 }
969 #endif
971 regs->eax = (unsigned long)eax;
972 regs->ebx = (unsigned long)ebx;
973 regs->ecx = (unsigned long)ecx;
974 regs->edx = (unsigned long)edx;
976 HVM_DBG_LOG(DBG_LEVEL_1,
977 "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
978 "ebx=%x, ecx=%x, edx=%x",
979 eip, input, eax, ebx, ecx, edx);
981 inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
982 ASSERT(inst_len > 0);
983 __update_guest_eip(vmcb, inst_len);
984 }
987 static inline unsigned long *get_reg_p(unsigned int gpreg,
988 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
989 {
990 unsigned long *reg_p = NULL;
991 switch (gpreg)
992 {
993 case SVM_REG_EAX:
994 reg_p = (unsigned long *)&regs->eax;
995 break;
996 case SVM_REG_EBX:
997 reg_p = (unsigned long *)&regs->ebx;
998 break;
999 case SVM_REG_ECX:
1000 reg_p = (unsigned long *)&regs->ecx;
1001 break;
1002 case SVM_REG_EDX:
1003 reg_p = (unsigned long *)&regs->edx;
1004 break;
1005 case SVM_REG_EDI:
1006 reg_p = (unsigned long *)&regs->edi;
1007 break;
1008 case SVM_REG_ESI:
1009 reg_p = (unsigned long *)&regs->esi;
1010 break;
1011 case SVM_REG_EBP:
1012 reg_p = (unsigned long *)&regs->ebp;
1013 break;
1014 case SVM_REG_ESP:
1015 reg_p = (unsigned long *)&vmcb->rsp;
1016 break;
1017 #if __x86_64__
1018 case SVM_REG_R8:
1019 reg_p = (unsigned long *)&regs->r8;
1020 break;
1021 case SVM_REG_R9:
1022 reg_p = (unsigned long *)&regs->r9;
1023 break;
1024 case SVM_REG_R10:
1025 reg_p = (unsigned long *)&regs->r10;
1026 break;
1027 case SVM_REG_R11:
1028 reg_p = (unsigned long *)&regs->r11;
1029 break;
1030 case SVM_REG_R12:
1031 reg_p = (unsigned long *)&regs->r12;
1032 break;
1033 case SVM_REG_R13:
1034 reg_p = (unsigned long *)&regs->r13;
1035 break;
1036 case SVM_REG_R14:
1037 reg_p = (unsigned long *)&regs->r14;
1038 break;
1039 case SVM_REG_R15:
1040 reg_p = (unsigned long *)&regs->r15;
1041 break;
1042 #endif
1043 default:
1044 BUG();
1047 return reg_p;
1051 static inline unsigned long get_reg(unsigned int gpreg,
1052 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1054 unsigned long *gp;
1055 gp = get_reg_p(gpreg, regs, vmcb);
1056 return *gp;
1060 static inline void set_reg(unsigned int gpreg, unsigned long value,
1061 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1063 unsigned long *gp;
1064 gp = get_reg_p(gpreg, regs, vmcb);
1065 *gp = value;
1069 static void svm_dr_access (struct vcpu *v, unsigned int reg, unsigned int type,
1070 struct cpu_user_regs *regs)
1072 unsigned long *reg_p = 0;
1073 unsigned int gpreg = 0;
1074 unsigned long eip;
1075 int inst_len;
1076 int index;
1077 struct vmcb_struct *vmcb;
1078 u8 buffer[MAX_INST_LEN];
1079 u8 prefix = 0;
1081 vmcb = v->arch.hvm_svm.vmcb;
1083 ASSERT(vmcb);
1085 eip = vmcb->rip;
1086 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1087 index = skip_prefix_bytes(buffer, sizeof(buffer));
1089 ASSERT(buffer[index+0] == 0x0f && (buffer[index+1] & 0xFD) == 0x21);
1091 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1092 prefix = buffer[index-1];
1094 gpreg = decode_src_reg(prefix, buffer[index + 2]);
1095 ASSERT(reg == decode_dest_reg(prefix, buffer[index + 2]));
1097 HVM_DBG_LOG(DBG_LEVEL_1, "svm_dr_access : eip=%lx, reg=%d, gpreg = %x",
1098 eip, reg, gpreg);
1100 reg_p = get_reg_p(gpreg, regs, vmcb);
1102 switch (type)
1104 case TYPE_MOV_TO_DR:
1105 inst_len = __get_instruction_length(vmcb, INSTR_MOV2DR, buffer);
1106 v->arch.guest_context.debugreg[reg] = *reg_p;
1107 break;
1108 case TYPE_MOV_FROM_DR:
1109 inst_len = __get_instruction_length(vmcb, INSTR_MOVDR2, buffer);
1110 *reg_p = v->arch.guest_context.debugreg[reg];
1111 break;
1112 default:
1113 __hvm_bug(regs);
1114 break;
1116 ASSERT(inst_len > 0);
1117 __update_guest_eip(vmcb, inst_len);
1121 static unsigned int check_for_null_selector(struct vmcb_struct *vmcb,
1122 unsigned int dir, unsigned long *base, unsigned int real)
1125 unsigned char inst[MAX_INST_LEN];
1126 segment_selector_t seg;
1127 int i;
1129 memset(inst, 0, MAX_INST_LEN);
1130 if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
1131 != MAX_INST_LEN)
1133 printk("check_for_null_selector: get guest instruction failed\n");
1134 domain_crash_synchronous();
1137 for (i = 0; i < MAX_INST_LEN; i++)
1139 switch (inst[i])
1141 case 0xf3: /* REPZ */
1142 case 0xf2: /* REPNZ */
1143 case 0xf0: /* LOCK */
1144 case 0x66: /* data32 */
1145 case 0x67: /* addr32 */
1146 #if __x86_64__
1147 /* REX prefixes */
1148 case 0x40:
1149 case 0x41:
1150 case 0x42:
1151 case 0x43:
1152 case 0x44:
1153 case 0x45:
1154 case 0x46:
1155 case 0x47:
1157 case 0x48:
1158 case 0x49:
1159 case 0x4a:
1160 case 0x4b:
1161 case 0x4c:
1162 case 0x4d:
1163 case 0x4e:
1164 case 0x4f:
1165 #endif
1166 continue;
1167 case 0x2e: /* CS */
1168 seg = vmcb->cs;
1169 break;
1170 case 0x36: /* SS */
1171 seg = vmcb->ss;
1172 break;
1173 case 0x26: /* ES */
1174 seg = vmcb->es;
1175 break;
1176 case 0x64: /* FS */
1177 seg = vmcb->fs;
1178 break;
1179 case 0x65: /* GS */
1180 seg = vmcb->gs;
1181 break;
1182 case 0x3e: /* DS */
1183 /* FALLTHROUGH */
1184 seg = vmcb->ds;
1185 break;
1186 default:
1187 if (dir == IOREQ_READ) /* IN/INS instruction? */
1188 seg = vmcb->es;
1189 else
1190 seg = vmcb->ds;
1193 if (base)
1194 *base = seg.base;
1196 return seg.attributes.fields.p;
1199 ASSERT(0);
1200 return 0;
1204 /* Get the address of INS/OUTS instruction */
1205 static inline unsigned long svm_get_io_address(struct vmcb_struct *vmcb,
1206 struct cpu_user_regs *regs, unsigned int dir, unsigned int real)
1208 unsigned long addr = 0;
1209 unsigned long base = 0;
1211 check_for_null_selector(vmcb, dir, &base, real);
1213 if (dir == IOREQ_WRITE)
1215 if (real)
1216 addr = (regs->esi & 0xFFFF) + base;
1217 else
1218 addr = regs->esi + base;
1220 else
1222 if (real)
1223 addr = (regs->edi & 0xFFFF) + base;
1224 else
1225 addr = regs->edi + base;
1228 return addr;
1232 static void svm_io_instruction(struct vcpu *v, struct cpu_user_regs *regs)
1234 struct mmio_op *mmio_opp;
1235 unsigned long eip, cs, eflags, cr0;
1236 unsigned long port;
1237 unsigned int real, size, dir;
1238 ioio_info_t info;
1240 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1242 ASSERT(vmcb);
1243 mmio_opp = &current->arch.hvm_vcpu.mmio_op;
1244 mmio_opp->instr = INSTR_PIO;
1245 mmio_opp->flags = 0;
1247 eip = vmcb->rip;
1248 cs = vmcb->cs.sel;
1249 eflags = vmcb->rflags;
1251 info.bytes = vmcb->exitinfo1;
1253 port = info.fields.port; /* port used to be addr */
1254 dir = info.fields.type; /* direction */
1255 if (info.fields.sz32)
1256 size = 4;
1257 else if (info.fields.sz16)
1258 size = 2;
1259 else
1260 size = 1;
1262 cr0 = vmcb->cr0;
1263 real = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
1265 HVM_DBG_LOG(DBG_LEVEL_IO,
1266 "svm_io_instruction: port 0x%lx real %d, eip=%lx:%lx, "
1267 "exit_qualification = %lx",
1268 (unsigned long) port, real, cs, eip, (unsigned long)info.bytes);
1269 /* string instruction */
1270 if (info.fields.str)
1272 unsigned long addr, count = 1;
1273 int sign = regs->eflags & EF_DF ? -1 : 1;
1275 /* Need the original rip, here. */
1276 addr = svm_get_io_address(vmcb, regs, dir, real);
1278 /* "rep" prefix */
1279 if (info.fields.rep)
1281 mmio_opp->flags |= REPZ;
1282 count = real ? regs->ecx & 0xFFFF : regs->ecx;
1285 /*
1286 * Handle string pio instructions that cross pages or that
1287 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1288 */
1289 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1291 unsigned long value = 0;
1293 mmio_opp->flags |= OVERLAP;
1295 if (dir == IOREQ_WRITE)
1296 hvm_copy(&value, addr, size, HVM_COPY_IN);
1298 send_pio_req(regs, port, 1, size, value, dir, 0);
1300 else
1302 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK))
1304 if (sign > 0)
1305 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1306 else
1307 count = (addr & ~PAGE_MASK) / size;
1309 else
1310 vmcb->rip = vmcb->exitinfo2;
1312 send_pio_req(regs, port, count, size, addr, dir, 1);
1315 else
1317 /*
1318 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1319 * ExitInfo2
1320 */
1321 vmcb->rip = vmcb->exitinfo2;
1323 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1324 hvm_print_line(v, regs->eax); /* guest debug output */
1326 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
1330 static int svm_set_cr0(unsigned long value)
1332 struct vcpu *v = current;
1333 unsigned long mfn;
1334 int paging_enabled;
1335 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1337 ASSERT(vmcb);
1339 /* We don't want to lose PG. ET is reserved and should be always be 1*/
1340 paging_enabled = svm_paging_enabled(v);
1341 value |= X86_CR0_ET;
1342 vmcb->cr0 = value | X86_CR0_PG;
1343 v->arch.hvm_svm.cpu_shadow_cr0 = value;
1345 /* TS cleared? Then initialise FPU now. */
1346 if ( !(value & X86_CR0_TS) )
1348 setup_fpu(v);
1349 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1352 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1354 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
1356 /* The guest CR3 must be pointing to the guest physical. */
1357 if (!VALID_MFN(mfn =
1358 get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))
1359 || !get_page(mfn_to_page(mfn), v->domain))
1361 printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3);
1362 domain_crash_synchronous(); /* need to take a clean path */
1365 #if defined(__x86_64__)
1366 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
1367 && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
1368 &v->arch.hvm_svm.cpu_state))
1370 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1371 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
1374 if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
1376 /* Here the PAE is should to be opened */
1377 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1378 set_bit(SVM_CPU_STATE_LMA_ENABLED,
1379 &v->arch.hvm_svm.cpu_state);
1380 vmcb->efer |= (EFER_LMA | EFER_LME);
1381 if (!shadow_set_guest_paging_levels(v->domain, PAGING_L4) )
1383 printk("Unsupported guest paging levels\n");
1384 domain_crash_synchronous(); /* need to take a clean path */
1387 else
1388 #endif /* __x86_64__ */
1390 #if CONFIG_PAGING_LEVELS >= 3
1391 /* seems it's a 32-bit or 32-bit PAE guest */
1392 if ( test_bit(SVM_CPU_STATE_PAE_ENABLED,
1393 &v->arch.hvm_svm.cpu_state) )
1395 /* The guest enables PAE first and then it enables PG, it is
1396 * really a PAE guest */
1397 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1399 printk("Unsupported guest paging levels\n");
1400 domain_crash_synchronous();
1403 else
1405 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) )
1407 printk("Unsupported guest paging levels\n");
1408 domain_crash_synchronous(); /* need to take a clean path */
1411 #endif
1414 /* Now arch.guest_table points to machine physical. */
1415 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1416 update_pagetables(v);
1418 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1419 (unsigned long) (mfn << PAGE_SHIFT));
1421 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1422 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1424 /* arch->shadow_table should hold the next CR3 for shadow */
1425 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx\n",
1426 v->arch.hvm_svm.cpu_cr3, mfn);
1428 return 1;
1431 if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
1432 if ( v->arch.hvm_svm.cpu_cr3 ) {
1433 put_page(mfn_to_page(get_mfn_from_gpfn(
1434 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
1435 v->arch.guest_table = mk_pagetable(0);
1438 /*
1439 * SVM implements paged real-mode and when we return to real-mode
1440 * we revert back to the physical mappings that the domain builder
1441 * created.
1442 */
1443 if ((value & X86_CR0_PE) == 0) {
1444 if (value & X86_CR0_PG) {
1445 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
1446 return 0;
1449 clear_all_shadow_status( v->domain );
1450 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1451 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1453 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
1455 /* we should take care of this kind of situation */
1456 clear_all_shadow_status(v->domain);
1457 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1458 vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
1461 return 1;
1464 /*
1465 * Read from control registers. CR0 and CR4 are read from the shadow.
1466 */
1467 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1469 unsigned long value = 0;
1470 struct vcpu *v = current;
1471 struct vmcb_struct *vmcb;
1473 vmcb = v->arch.hvm_svm.vmcb;
1474 ASSERT(vmcb);
1476 switch (cr)
1478 case 0:
1479 value = v->arch.hvm_svm.cpu_shadow_cr0;
1480 if (svm_dbg_on)
1481 printk("CR0 read =%lx \n", value );
1482 break;
1483 case 2:
1484 value = vmcb->cr2;
1485 break;
1486 case 3:
1487 value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
1488 if (svm_dbg_on)
1489 printk("CR3 read =%lx \n", value );
1490 break;
1491 case 4:
1492 value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
1493 if (svm_dbg_on)
1494 printk( "CR4 read=%lx\n", value );
1495 break;
1496 case 8:
1497 #if 0
1498 value = vmcb->m_cr8;
1499 #else
1500 ASSERT(0);
1501 #endif
1502 break;
1504 default:
1505 __hvm_bug(regs);
1508 set_reg(gp, value, regs, vmcb);
1510 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1514 static inline int svm_pgbit_test(struct vcpu *v)
1516 return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
1520 /*
1521 * Write to control registers
1522 */
1523 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1525 unsigned long value;
1526 unsigned long old_cr;
1527 struct vcpu *v = current;
1528 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1530 ASSERT(vmcb);
1532 value = get_reg(gpreg, regs, vmcb);
1534 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1535 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1537 switch (cr)
1539 case 0:
1540 if (svm_dbg_on)
1541 printk("CR0 write =%lx \n", value );
1542 return svm_set_cr0(value);
1544 case 3:
1546 unsigned long old_base_mfn, mfn;
1547 if (svm_dbg_on)
1548 printk("CR3 write =%lx \n", value );
1549 /* If paging is not enabled yet, simply copy the value to CR3. */
1550 if (!svm_paging_enabled(v)) {
1551 v->arch.hvm_svm.cpu_cr3 = value;
1552 break;
1554 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1556 /* We make a new one if the shadow does not exist. */
1557 if (value == v->arch.hvm_svm.cpu_cr3)
1559 /*
1560 * This is simple TLB flush, implying the guest has
1561 * removed some translation or changed page attributes.
1562 * We simply invalidate the shadow.
1563 */
1564 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1565 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1566 __hvm_bug(regs);
1567 shadow_sync_all(v->domain);
1569 else
1571 /*
1572 * If different, make a shadow. Check if the PDBR is valid
1573 * first.
1574 */
1575 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1576 if (((value >> PAGE_SHIFT) > v->domain->max_pages)
1577 || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT))
1578 || !get_page(mfn_to_page(mfn), v->domain))
1580 printk("Invalid CR3 value=%lx\n", value);
1581 domain_crash_synchronous(); /* need to take a clean path */
1584 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1585 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1587 if (old_base_mfn)
1588 put_page(mfn_to_page(old_base_mfn));
1590 /*
1591 * arch.shadow_table should now hold the next CR3 for shadow
1592 */
1593 #if CONFIG_PAGING_LEVELS >= 3
1594 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 )
1595 shadow_sync_all(v->domain);
1596 #endif
1597 v->arch.hvm_svm.cpu_cr3 = value;
1598 update_pagetables(v);
1599 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
1600 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1602 break;
1605 case 4: /* CR4 */
1607 if (svm_dbg_on)
1608 printk( "write cr4=%lx, cr0=%lx\n",
1609 value, v->arch.hvm_svm.cpu_shadow_cr0 );
1610 old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
1611 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
1613 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1614 if ( svm_pgbit_test(v) )
1616 /* The guest is a 32-bit PAE guest. */
1617 #if CONFIG_PAGING_LEVELS >= 4
1618 unsigned long mfn, old_base_mfn;
1620 if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1622 printk("Unsupported guest paging levels\n");
1623 domain_crash_synchronous(); /* need to take a clean path */
1626 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1627 v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) ||
1628 !get_page(mfn_to_page(mfn), v->domain) )
1630 printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3);
1631 domain_crash_synchronous(); /* need to take a clean path */
1634 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1635 if ( old_base_mfn )
1636 put_page(mfn_to_page(old_base_mfn));
1638 /*
1639 * Now arch.guest_table points to machine physical.
1640 */
1642 v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
1643 update_pagetables(v);
1645 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1646 (unsigned long) (mfn << PAGE_SHIFT));
1648 vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
1650 /*
1651 * arch->shadow_table should hold the next CR3 for shadow
1652 */
1654 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1655 v->arch.hvm_svm.cpu_cr3, mfn);
1656 #endif
1658 else
1660 /* The guest is a 64 bit or 32-bit PAE guest. */
1661 #if CONFIG_PAGING_LEVELS >= 4
1662 if ( (v->domain->arch.ops != NULL) &&
1663 v->domain->arch.ops->guest_paging_levels == PAGING_L2)
1665 /* Seems the guest first enables PAE without enabling PG,
1666 * it must enable PG after that, and it is a 32-bit PAE
1667 * guest */
1669 if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
1671 printk("Unsupported guest paging levels\n");
1672 domain_crash_synchronous();
1675 else
1677 if ( !shadow_set_guest_paging_levels(v->domain,
1678 PAGING_L4) )
1680 printk("Unsupported guest paging levels\n");
1681 domain_crash_synchronous();
1684 #endif
1687 else if (value & X86_CR4_PAE) {
1688 set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1689 } else {
1690 if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
1691 &v->arch.hvm_svm.cpu_state)) {
1692 svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
1694 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
1697 v->arch.hvm_svm.cpu_shadow_cr4 = value;
1698 vmcb->cr4 = value | SVM_CR4_HOST_MASK;
1700 /*
1701 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1702 * all TLB entries except global entries.
1703 */
1704 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
1706 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
1707 shadow_sync_all(v->domain);
1709 break;
1712 default:
1713 printk("invalid cr: %d\n", cr);
1714 __hvm_bug(regs);
1717 return 1;
1721 #define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
1724 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
1725 struct cpu_user_regs *regs)
1727 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1728 int inst_len = 0;
1729 int index;
1730 unsigned int gpreg;
1731 unsigned long value;
1732 u8 buffer[MAX_INST_LEN];
1733 u8 prefix = 0;
1734 int result = 1;
1735 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1736 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1737 enum instruction_index match;
1739 ASSERT(vmcb);
1741 inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
1742 /* get index to first actual instruction byte - as we will need to know where the
1743 * prefix lives later on
1744 */
1745 index = skip_prefix_bytes(buffer, sizeof(buffer));
1747 if (type == TYPE_MOV_TO_CR)
1749 inst_len = __get_instruction_length_from_list(vmcb, list_a,
1750 ARR_SIZE(list_a), &buffer[index], &match);
1752 else
1754 inst_len = __get_instruction_length_from_list(vmcb, list_b,
1755 ARR_SIZE(list_b), &buffer[index], &match);
1758 ASSERT(inst_len > 0);
1760 inst_len += index;
1762 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1763 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1764 prefix = buffer[index-1];
1766 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
1768 switch (match)
1770 case INSTR_MOV2CR:
1771 gpreg = decode_src_reg(prefix, buffer[index+2]);
1772 result = mov_to_cr(gpreg, cr, regs);
1773 break;
1775 case INSTR_MOVCR2:
1776 gpreg = decode_src_reg(prefix, buffer[index+2]);
1777 mov_from_cr(cr, gpreg, regs);
1778 break;
1780 case INSTR_CLTS:
1781 /* TS being cleared means that it's time to restore fpu state. */
1782 setup_fpu(current);
1783 vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
1784 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1785 v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
1786 break;
1788 case INSTR_LMSW:
1789 if (svm_dbg_on)
1790 svm_dump_inst(svm_rip2pointer(vmcb));
1792 gpreg = decode_src_reg(prefix, buffer[index+2]);
1793 value = get_reg(gpreg, regs, vmcb) & 0xF;
1795 if (svm_dbg_on)
1796 printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1797 inst_len);
1799 value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
1801 if (svm_dbg_on)
1802 printk("CR0-LMSW CR0 - New value=%lx\n", value);
1804 result = svm_set_cr0(value);
1805 break;
1807 case INSTR_SMSW:
1808 svm_dump_inst(svm_rip2pointer(vmcb));
1809 value = v->arch.hvm_svm.cpu_shadow_cr0;
1810 gpreg = decode_src_reg(prefix, buffer[index+2]);
1811 set_reg(gpreg, value, regs, vmcb);
1813 if (svm_dbg_on)
1814 printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg,
1815 inst_len);
1816 break;
1818 default:
1819 __hvm_bug(regs);
1820 break;
1823 ASSERT(inst_len);
1825 __update_guest_eip(vmcb, inst_len);
1827 return result;
1830 static inline void svm_do_msr_access(struct vcpu *v, struct cpu_user_regs *regs)
1832 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1833 int inst_len;
1834 u64 msr_content=0;
1836 ASSERT(vmcb);
1838 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, "
1839 "exitinfo = %lx", (unsigned long)regs->ecx,
1840 (unsigned long)regs->eax, (unsigned long)regs->edx,
1841 (unsigned long)vmcb->exitinfo1);
1843 /* is it a read? */
1844 if (vmcb->exitinfo1 == 0)
1846 inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
1848 regs->edx = 0;
1849 switch (regs->ecx) {
1850 case MSR_IA32_TIME_STAMP_COUNTER:
1852 struct hvm_time_info *time_info;
1854 rdtscll(msr_content);
1855 time_info = &v->domain->arch.hvm_domain.vpit.time_info;
1856 msr_content += time_info->cache_tsc_offset;
1857 break;
1859 case MSR_IA32_SYSENTER_CS:
1860 msr_content = vmcb->sysenter_cs;
1861 break;
1862 case MSR_IA32_SYSENTER_ESP:
1863 msr_content = vmcb->sysenter_esp;
1864 break;
1865 case MSR_IA32_SYSENTER_EIP:
1866 msr_content = vmcb->sysenter_eip;
1867 break;
1868 case MSR_IA32_APICBASE:
1869 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
1870 break;
1871 default:
1872 if (long_mode_do_msr_read(regs))
1873 goto done;
1874 rdmsr_safe(regs->ecx, regs->eax, regs->edx);
1875 break;
1877 regs->eax = msr_content & 0xFFFFFFFF;
1878 regs->edx = msr_content >> 32;
1880 else
1882 inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
1883 msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
1885 switch (regs->ecx)
1887 case MSR_IA32_TIME_STAMP_COUNTER:
1888 svm_set_guest_time(v, msr_content);
1889 break;
1890 case MSR_IA32_SYSENTER_CS:
1891 vmcb->sysenter_cs = msr_content;
1892 break;
1893 case MSR_IA32_SYSENTER_ESP:
1894 vmcb->sysenter_esp = msr_content;
1895 break;
1896 case MSR_IA32_SYSENTER_EIP:
1897 vmcb->sysenter_eip = msr_content;
1898 break;
1899 case MSR_IA32_APICBASE:
1900 vlapic_msr_set(VLAPIC(v), msr_content);
1901 break;
1902 default:
1903 long_mode_do_msr_write(regs);
1904 break;
1908 done:
1910 HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: "
1911 "ecx=%lx, eax=%lx, edx=%lx",
1912 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1913 (unsigned long)regs->edx);
1915 __update_guest_eip(vmcb, inst_len);
1919 /*
1920 * Need to use this exit to reschedule
1921 */
1922 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
1924 struct vcpu *v = current;
1925 struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
1926 s_time_t next_pit = -1, next_wakeup;
1928 __update_guest_eip(vmcb, 1);
1930 /* check for interrupt not handled or new interrupt */
1931 if ( vmcb->vintr.fields.irq || cpu_has_pending_irq(v) )
1932 return;
1934 if ( !v->vcpu_id )
1935 next_pit = get_pit_scheduled(v, vpit);
1936 next_wakeup = get_apictime_scheduled(v);
1937 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
1938 next_wakeup = next_pit;
1939 if ( next_wakeup != - 1 )
1940 set_timer(&current->arch.hvm_svm.hlt_timer, next_wakeup);
1941 hvm_safe_block();
1945 static inline void svm_vmexit_do_mwait(void)
1950 #ifdef XEN_DEBUGGER
1951 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb,
1952 struct cpu_user_regs *regs)
1954 regs->eip = vmcb->rip;
1955 regs->esp = vmcb->rsp;
1956 regs->eflags = vmcb->rflags;
1958 regs->xcs = vmcb->cs.sel;
1959 regs->xds = vmcb->ds.sel;
1960 regs->xes = vmcb->es.sel;
1961 regs->xfs = vmcb->fs.sel;
1962 regs->xgs = vmcb->gs.sel;
1963 regs->xss = vmcb->ss.sel;
1967 static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
1969 vmcb->ss.sel = regs->xss;
1970 vmcb->rsp = regs->esp;
1971 vmcb->rflags = regs->eflags;
1972 vmcb->cs.sel = regs->xcs;
1973 vmcb->rip = regs->eip;
1975 vmcb->gs.sel = regs->xgs;
1976 vmcb->fs.sel = regs->xfs;
1977 vmcb->es.sel = regs->xes;
1978 vmcb->ds.sel = regs->xds;
1980 #endif
1983 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
1985 struct vcpu *v = current;
1986 u8 opcode[MAX_INST_SIZE], prefix, length = MAX_INST_SIZE;
1987 unsigned long g_vaddr;
1988 int inst_len;
1989 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1991 ASSERT(vmcb);
1992 /*
1993 * Unknown how many bytes the invlpg instruction will take. Use the
1994 * maximum instruction length here
1995 */
1996 if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
1998 printk("svm_handle_invlpg (): Error reading memory %d bytes\n", length);
1999 __hvm_bug(regs);
2002 if (invlpga)
2004 inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
2005 ASSERT(inst_len > 0);
2006 __update_guest_eip(vmcb, inst_len);
2008 /*
2009 * The address is implicit on this instruction At the moment, we don't
2010 * use ecx (ASID) to identify individual guests pages
2011 */
2012 g_vaddr = regs->eax;
2014 else
2016 /* What about multiple prefix codes? */
2017 prefix = (is_prefix(opcode[0])?opcode[0]:0);
2018 inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
2019 ASSERT(inst_len > 0);
2021 inst_len--;
2022 length -= inst_len;
2024 /*
2025 * Decode memory operand of the instruction including ModRM, SIB, and
2026 * displacement to get effecticve address and length in bytes. Assume
2027 * the system in either 32- or 64-bit mode.
2028 */
2029 g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix,
2030 &opcode[inst_len], &length);
2032 inst_len += length;
2033 __update_guest_eip (vmcb, inst_len);
2036 /* Overkill, we may not this */
2037 set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
2038 shadow_invlpg(v, g_vaddr);
2042 /*
2043 * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
2044 * 16-bit realmode. Basically, this mimics a processor reset.
2046 * returns 0 on success, non-zero otherwise
2047 */
2048 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
2049 struct cpu_user_regs *regs)
2051 struct vmcb_struct *vmcb;
2053 ASSERT(v);
2054 ASSERT(regs);
2056 vmcb = v->arch.hvm_svm.vmcb;
2058 ASSERT(vmcb);
2060 /* clear the vmcb and user regs */
2061 memset(regs, 0, sizeof(struct cpu_user_regs));
2063 /* VMCB Control */
2064 vmcb->tsc_offset = 0;
2066 /* VMCB State */
2067 vmcb->cr0 = X86_CR0_ET | X86_CR0_PG;
2068 v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
2070 vmcb->cr2 = 0;
2071 vmcb->efer = EFER_SVME;
2073 vmcb->cr4 = SVM_CR4_HOST_MASK;
2074 v->arch.hvm_svm.cpu_shadow_cr4 = 0;
2075 clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
2077 /* This will jump to ROMBIOS */
2078 vmcb->rip = 0xFFF0;
2080 /* setup the segment registers and all their hidden states */
2081 vmcb->cs.sel = 0xF000;
2082 vmcb->cs.attributes.bytes = 0x089b;
2083 vmcb->cs.limit = 0xffff;
2084 vmcb->cs.base = 0x000F0000;
2086 vmcb->ss.sel = 0x00;
2087 vmcb->ss.attributes.bytes = 0x0893;
2088 vmcb->ss.limit = 0xffff;
2089 vmcb->ss.base = 0x00;
2091 vmcb->ds.sel = 0x00;
2092 vmcb->ds.attributes.bytes = 0x0893;
2093 vmcb->ds.limit = 0xffff;
2094 vmcb->ds.base = 0x00;
2096 vmcb->es.sel = 0x00;
2097 vmcb->es.attributes.bytes = 0x0893;
2098 vmcb->es.limit = 0xffff;
2099 vmcb->es.base = 0x00;
2101 vmcb->fs.sel = 0x00;
2102 vmcb->fs.attributes.bytes = 0x0893;
2103 vmcb->fs.limit = 0xffff;
2104 vmcb->fs.base = 0x00;
2106 vmcb->gs.sel = 0x00;
2107 vmcb->gs.attributes.bytes = 0x0893;
2108 vmcb->gs.limit = 0xffff;
2109 vmcb->gs.base = 0x00;
2111 vmcb->ldtr.sel = 0x00;
2112 vmcb->ldtr.attributes.bytes = 0x0000;
2113 vmcb->ldtr.limit = 0x0;
2114 vmcb->ldtr.base = 0x00;
2116 vmcb->gdtr.sel = 0x00;
2117 vmcb->gdtr.attributes.bytes = 0x0000;
2118 vmcb->gdtr.limit = 0x0;
2119 vmcb->gdtr.base = 0x00;
2121 vmcb->tr.sel = 0;
2122 vmcb->tr.attributes.bytes = 0;
2123 vmcb->tr.limit = 0x0;
2124 vmcb->tr.base = 0;
2126 vmcb->idtr.sel = 0x00;
2127 vmcb->idtr.attributes.bytes = 0x0000;
2128 vmcb->idtr.limit = 0x3ff;
2129 vmcb->idtr.base = 0x00;
2131 vmcb->rax = 0;
2132 vmcb->rsp = 0;
2134 return 0;
2138 /*
2139 * svm_do_vmmcall - SVM VMMCALL handler
2141 * returns 0 on success, non-zero otherwise
2142 */
2143 static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
2145 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2146 int inst_len;
2148 ASSERT(vmcb);
2149 ASSERT(regs);
2151 inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
2152 ASSERT(inst_len > 0);
2154 /* VMMCALL sanity check */
2155 if (vmcb->cpl > get_vmmcall_cpl(regs->edi))
2157 printf("VMMCALL CPL check failed\n");
2158 return -1;
2161 /* handle the request */
2162 switch (regs->edi)
2164 case VMMCALL_RESET_TO_REALMODE:
2165 if (svm_do_vmmcall_reset_to_realmode(v, regs))
2167 printf("svm_do_vmmcall_reset_to_realmode() failed\n");
2168 return -1;
2171 /* since we just reset the VMCB, return without adjusting the eip */
2172 return 0;
2173 case VMMCALL_DEBUG:
2174 printf("DEBUG features not implemented yet\n");
2175 break;
2176 default:
2177 break;
2180 hvm_print_line(v, regs->eax); /* provides the current domain */
2182 __update_guest_eip(vmcb, inst_len);
2183 return 0;
2187 void svm_dump_inst(unsigned long eip)
2189 u8 opcode[256];
2190 unsigned long ptr;
2191 int len;
2192 int i;
2194 ptr = eip & ~0xff;
2195 len = 0;
2197 if (hvm_copy(opcode, ptr, sizeof(opcode), HVM_COPY_IN))
2198 len = sizeof(opcode);
2200 printf("Code bytes around(len=%d) %lx:", len, eip);
2201 for (i = 0; i < len; i++)
2203 if ((i & 0x0f) == 0)
2204 printf("\n%08lx:", ptr+i);
2206 printf("%02x ", opcode[i]);
2209 printf("\n");
2213 void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
2215 struct vcpu *v = current;
2216 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2217 unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
2219 printf("%s: guest registers from %s:\n", __func__, from);
2220 #if defined (__x86_64__)
2221 printk("rax: %016lx rbx: %016lx rcx: %016lx\n",
2222 regs->rax, regs->rbx, regs->rcx);
2223 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
2224 regs->rdx, regs->rsi, regs->rdi);
2225 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
2226 regs->rbp, regs->rsp, regs->r8);
2227 printk("r9: %016lx r10: %016lx r11: %016lx\n",
2228 regs->r9, regs->r10, regs->r11);
2229 printk("r12: %016lx r13: %016lx r14: %016lx\n",
2230 regs->r12, regs->r13, regs->r14);
2231 printk("r15: %016lx cr0: %016lx cr3: %016lx\n",
2232 regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
2233 #else
2234 printf("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n",
2235 regs->eax, regs->ebx, regs->ecx, regs->edx);
2236 printf("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n",
2237 regs->edi, regs->esi, regs->ebp, regs->esp);
2238 printf("%s: guest cr0: %lx\n", __func__,
2239 v->arch.hvm_svm.cpu_shadow_cr0);
2240 printf("guest CR3 = %llx\n", vmcb->cr3);
2241 #endif
2242 printf("%s: pt = %lx\n", __func__, pt);
2246 void svm_dump_host_regs(const char *from)
2248 struct vcpu *v = current;
2249 unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
2250 unsigned long cr3, cr0;
2251 printf("Host registers at %s\n", from);
2253 __asm__ __volatile__ ("\tmov %%cr0,%0\n"
2254 "\tmov %%cr3,%1\n"
2255 : "=r" (cr0), "=r"(cr3));
2256 printf("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
2259 #ifdef SVM_EXTRA_DEBUG
2260 static char *exit_reasons[] = {
2261 [VMEXIT_CR0_READ] = "CR0_READ",
2262 [VMEXIT_CR1_READ] = "CR1_READ",
2263 [VMEXIT_CR2_READ] = "CR2_READ",
2264 [VMEXIT_CR3_READ] = "CR3_READ",
2265 [VMEXIT_CR4_READ] = "CR4_READ",
2266 [VMEXIT_CR5_READ] = "CR5_READ",
2267 [VMEXIT_CR6_READ] = "CR6_READ",
2268 [VMEXIT_CR7_READ] = "CR7_READ",
2269 [VMEXIT_CR8_READ] = "CR8_READ",
2270 [VMEXIT_CR9_READ] = "CR9_READ",
2271 [VMEXIT_CR10_READ] = "CR10_READ",
2272 [VMEXIT_CR11_READ] = "CR11_READ",
2273 [VMEXIT_CR12_READ] = "CR12_READ",
2274 [VMEXIT_CR13_READ] = "CR13_READ",
2275 [VMEXIT_CR14_READ] = "CR14_READ",
2276 [VMEXIT_CR15_READ] = "CR15_READ",
2277 [VMEXIT_CR0_WRITE] = "CR0_WRITE",
2278 [VMEXIT_CR1_WRITE] = "CR1_WRITE",
2279 [VMEXIT_CR2_WRITE] = "CR2_WRITE",
2280 [VMEXIT_CR3_WRITE] = "CR3_WRITE",
2281 [VMEXIT_CR4_WRITE] = "CR4_WRITE",
2282 [VMEXIT_CR5_WRITE] = "CR5_WRITE",
2283 [VMEXIT_CR6_WRITE] = "CR6_WRITE",
2284 [VMEXIT_CR7_WRITE] = "CR7_WRITE",
2285 [VMEXIT_CR8_WRITE] = "CR8_WRITE",
2286 [VMEXIT_CR9_WRITE] = "CR9_WRITE",
2287 [VMEXIT_CR10_WRITE] = "CR10_WRITE",
2288 [VMEXIT_CR11_WRITE] = "CR11_WRITE",
2289 [VMEXIT_CR12_WRITE] = "CR12_WRITE",
2290 [VMEXIT_CR13_WRITE] = "CR13_WRITE",
2291 [VMEXIT_CR14_WRITE] = "CR14_WRITE",
2292 [VMEXIT_CR15_WRITE] = "CR15_WRITE",
2293 [VMEXIT_DR0_READ] = "DR0_READ",
2294 [VMEXIT_DR1_READ] = "DR1_READ",
2295 [VMEXIT_DR2_READ] = "DR2_READ",
2296 [VMEXIT_DR3_READ] = "DR3_READ",
2297 [VMEXIT_DR4_READ] = "DR4_READ",
2298 [VMEXIT_DR5_READ] = "DR5_READ",
2299 [VMEXIT_DR6_READ] = "DR6_READ",
2300 [VMEXIT_DR7_READ] = "DR7_READ",
2301 [VMEXIT_DR8_READ] = "DR8_READ",
2302 [VMEXIT_DR9_READ] = "DR9_READ",
2303 [VMEXIT_DR10_READ] = "DR10_READ",
2304 [VMEXIT_DR11_READ] = "DR11_READ",
2305 [VMEXIT_DR12_READ] = "DR12_READ",
2306 [VMEXIT_DR13_READ] = "DR13_READ",
2307 [VMEXIT_DR14_READ] = "DR14_READ",
2308 [VMEXIT_DR15_READ] = "DR15_READ",
2309 [VMEXIT_DR0_WRITE] = "DR0_WRITE",
2310 [VMEXIT_DR1_WRITE] = "DR1_WRITE",
2311 [VMEXIT_DR2_WRITE] = "DR2_WRITE",
2312 [VMEXIT_DR3_WRITE] = "DR3_WRITE",
2313 [VMEXIT_DR4_WRITE] = "DR4_WRITE",
2314 [VMEXIT_DR5_WRITE] = "DR5_WRITE",
2315 [VMEXIT_DR6_WRITE] = "DR6_WRITE",
2316 [VMEXIT_DR7_WRITE] = "DR7_WRITE",
2317 [VMEXIT_DR8_WRITE] = "DR8_WRITE",
2318 [VMEXIT_DR9_WRITE] = "DR9_WRITE",
2319 [VMEXIT_DR10_WRITE] = "DR10_WRITE",
2320 [VMEXIT_DR11_WRITE] = "DR11_WRITE",
2321 [VMEXIT_DR12_WRITE] = "DR12_WRITE",
2322 [VMEXIT_DR13_WRITE] = "DR13_WRITE",
2323 [VMEXIT_DR14_WRITE] = "DR14_WRITE",
2324 [VMEXIT_DR15_WRITE] = "DR15_WRITE",
2325 [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
2326 [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
2327 [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
2328 [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
2329 [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
2330 [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
2331 [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
2332 [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
2333 [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
2334 [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
2335 [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
2336 [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
2337 [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
2338 [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
2339 [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
2340 [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
2341 [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
2342 [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
2343 [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
2344 [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
2345 [VMEXIT_INTR] = "INTR",
2346 [VMEXIT_NMI] = "NMI",
2347 [VMEXIT_SMI] = "SMI",
2348 [VMEXIT_INIT] = "INIT",
2349 [VMEXIT_VINTR] = "VINTR",
2350 [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
2351 [VMEXIT_IDTR_READ] = "IDTR_READ",
2352 [VMEXIT_GDTR_READ] = "GDTR_READ",
2353 [VMEXIT_LDTR_READ] = "LDTR_READ",
2354 [VMEXIT_TR_READ] = "TR_READ",
2355 [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
2356 [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
2357 [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
2358 [VMEXIT_TR_WRITE] = "TR_WRITE",
2359 [VMEXIT_RDTSC] = "RDTSC",
2360 [VMEXIT_RDPMC] = "RDPMC",
2361 [VMEXIT_PUSHF] = "PUSHF",
2362 [VMEXIT_POPF] = "POPF",
2363 [VMEXIT_CPUID] = "CPUID",
2364 [VMEXIT_RSM] = "RSM",
2365 [VMEXIT_IRET] = "IRET",
2366 [VMEXIT_SWINT] = "SWINT",
2367 [VMEXIT_INVD] = "INVD",
2368 [VMEXIT_PAUSE] = "PAUSE",
2369 [VMEXIT_HLT] = "HLT",
2370 [VMEXIT_INVLPG] = "INVLPG",
2371 [VMEXIT_INVLPGA] = "INVLPGA",
2372 [VMEXIT_IOIO] = "IOIO",
2373 [VMEXIT_MSR] = "MSR",
2374 [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
2375 [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
2376 [VMEXIT_SHUTDOWN] = "SHUTDOWN",
2377 [VMEXIT_VMRUN] = "VMRUN",
2378 [VMEXIT_VMMCALL] = "VMMCALL",
2379 [VMEXIT_VMLOAD] = "VMLOAD",
2380 [VMEXIT_VMSAVE] = "VMSAVE",
2381 [VMEXIT_STGI] = "STGI",
2382 [VMEXIT_CLGI] = "CLGI",
2383 [VMEXIT_SKINIT] = "SKINIT",
2384 [VMEXIT_RDTSCP] = "RDTSCP",
2385 [VMEXIT_ICEBP] = "ICEBP",
2386 [VMEXIT_NPF] = "NPF"
2387 };
2388 #endif /* SVM_EXTRA_DEBUG */
2390 #ifdef SVM_WALK_GUEST_PAGES
2391 void walk_shadow_and_guest_pt(unsigned long gva)
2393 l2_pgentry_t gpde;
2394 l2_pgentry_t spde;
2395 l1_pgentry_t gpte;
2396 l1_pgentry_t spte;
2397 struct vcpu *v = current;
2398 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2399 unsigned long gpa;
2401 gpa = gva_to_gpa( gva );
2402 printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
2403 if( !svm_paging_enabled(v) || mmio_space(gpa) )
2404 return;
2406 /* let's dump the guest and shadow page info */
2408 __guest_get_l2e(v, gva, &gpde);
2409 printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
2410 __shadow_get_l2e( v, gva, &spde );
2411 printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
2413 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2414 return;
2416 spte = l1e_empty();
2418 // This is actually overkill - we only need to make sure the hl2 is in-sync.
2419 shadow_sync_va(v, gva);
2421 gpte.l1 = 0;
2422 __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ], sizeof(gpte) );
2423 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
2424 __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
2425 sizeof(spte) );
2426 printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
2428 #endif /* SVM_WALK_GUEST_PAGES */
2430 asmlinkage void svm_vmexit_handler(struct cpu_user_regs regs)
2432 unsigned int exit_reason;
2433 unsigned long eip;
2434 struct vcpu *v = current;
2435 int error;
2436 int do_debug = 0;
2437 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2439 ASSERT(vmcb);
2441 exit_reason = vmcb->exitcode;
2442 save_svm_cpu_user_regs(v, &regs);
2443 v->arch.hvm_svm.injecting_event = 0;
2445 vmcb->tlb_control = 1;
2447 #ifdef SVM_EXTRA_DEBUG
2449 #if defined(__i386__)
2450 #define rip eip
2451 #endif
2453 static unsigned long intercepts_counter = 0;
2455 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF)
2457 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2459 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx, gpa=%llx\n",
2460 intercepts_counter,
2461 exit_reasons[exit_reason], exit_reason, regs.cs,
2462 (unsigned long long) regs.rip,
2463 (unsigned long long) vmcb->exitinfo1,
2464 (unsigned long long) vmcb->exitinfo2,
2465 (unsigned long long) vmcb->exitintinfo.bytes,
2466 (unsigned long long) gva_to_gpa( vmcb->exitinfo2 ) );
2468 else
2470 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2471 intercepts_counter,
2472 exit_reasons[exit_reason], exit_reason, regs.cs,
2473 (unsigned long long) regs.rip,
2474 (unsigned long long) vmcb->exitinfo1,
2475 (unsigned long long) vmcb->exitinfo2,
2476 (unsigned long long) vmcb->exitintinfo.bytes );
2479 else if ( svm_dbg_on
2480 && exit_reason != VMEXIT_IOIO
2481 && exit_reason != VMEXIT_INTR)
2484 if (exit_reasons[exit_reason])
2486 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2487 intercepts_counter,
2488 exit_reasons[exit_reason], exit_reason, regs.cs,
2489 (unsigned long long) regs.rip,
2490 (unsigned long long) vmcb->exitinfo1,
2491 (unsigned long long) vmcb->exitinfo2,
2492 (unsigned long long) vmcb->exitintinfo.bytes);
2494 else
2496 printk("I%08ld,ExC=%d(0x%x),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n",
2497 intercepts_counter, exit_reason, exit_reason, regs.cs,
2498 (unsigned long long) regs.rip,
2499 (unsigned long long) vmcb->exitinfo1,
2500 (unsigned long long) vmcb->exitinfo2,
2501 (unsigned long long) vmcb->exitintinfo.bytes);
2505 #ifdef SVM_WALK_GUEST_PAGES
2506 if( exit_reason == VMEXIT_EXCEPTION_PF
2507 && ( ( vmcb->exitinfo2 == vmcb->rip )
2508 || vmcb->exitintinfo.bytes) )
2510 if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
2511 walk_shadow_and_guest_pt( vmcb->exitinfo2 );
2513 #endif
2515 intercepts_counter++;
2517 #if 0
2518 if (svm_dbg_on)
2519 do_debug = svm_do_debugout(exit_reason);
2520 #endif
2522 if (do_debug)
2524 printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
2525 "shadow_table = 0x%08x\n",
2526 __func__,
2527 (int) v->arch.guest_table.pfn,
2528 (int) v->arch.monitor_table.pfn,
2529 (int) v->arch.shadow_table.pfn);
2531 svm_dump_vmcb(__func__, vmcb);
2532 svm_dump_regs(__func__, &regs);
2533 svm_dump_inst(svm_rip2pointer(vmcb));
2536 #if defined(__i386__)
2537 #undef rip
2538 #endif
2541 #endif /* SVM_EXTRA_DEBUG */
2543 if (exit_reason == -1)
2545 printk("%s: exit_reason == -1 - Did someone clobber the VMCB\n",
2546 __func__);
2547 BUG();
2548 domain_crash_synchronous();
2551 perfc_incra(vmexits, exit_reason);
2552 eip = vmcb->rip;
2554 #ifdef SVM_EXTRA_DEBUG
2555 if (do_debug)
2557 printk("eip = %lx, exit_reason = %d (0x%x)\n",
2558 eip, exit_reason, exit_reason);
2560 #endif /* SVM_EXTRA_DEBUG */
2562 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
2564 switch (exit_reason)
2566 case VMEXIT_EXCEPTION_DB:
2568 #ifdef XEN_DEBUGGER
2569 svm_debug_save_cpu_user_regs(&regs);
2570 pdb_handle_exception(1, &regs, 1);
2571 svm_debug_restore_cpu_user_regs(&regs);
2572 #else
2573 svm_store_cpu_user_regs(&regs, v);
2574 domain_pause_for_debugger();
2575 #endif
2577 break;
2579 case VMEXIT_NMI:
2580 do_nmi(&regs, 0);
2581 break;
2583 case VMEXIT_SMI:
2584 /*
2585 * For asynchronous SMI's, we just need to allow global interrupts
2586 * so that the SMI is taken properly in the context of the host. The
2587 * standard code does a STGI after the VMEXIT which should accomplish
2588 * this task. Continue as normal and restart the guest.
2589 */
2590 break;
2592 case VMEXIT_INIT:
2593 /*
2594 * Nothing to do, in fact we should never get to this point.
2595 */
2596 break;
2598 case VMEXIT_EXCEPTION_BP:
2599 #ifdef XEN_DEBUGGER
2600 svm_debug_save_cpu_user_regs(&regs);
2601 pdb_handle_exception(3, &regs, 1);
2602 svm_debug_restore_cpu_user_regs(&regs);
2603 #else
2604 if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) )
2605 domain_pause_for_debugger();
2606 else
2607 svm_inject_exception(vmcb, TRAP_int3, 0, 0);
2608 #endif
2609 break;
2611 case VMEXIT_EXCEPTION_NM:
2612 svm_do_no_device_fault(vmcb);
2613 break;
2615 case VMEXIT_EXCEPTION_GP:
2616 /* This should probably not be trapped in the future */
2617 regs.error_code = vmcb->exitinfo1;
2618 v->arch.hvm_svm.injecting_event = 1;
2619 svm_do_general_protection_fault(v, &regs);
2620 break;
2622 case VMEXIT_EXCEPTION_PF:
2624 unsigned long va;
2625 va = vmcb->exitinfo2;
2626 regs.error_code = vmcb->exitinfo1;
2627 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2628 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2629 (unsigned long)regs.eax, (unsigned long)regs.ebx,
2630 (unsigned long)regs.ecx, (unsigned long)regs.edx,
2631 (unsigned long)regs.esi, (unsigned long)regs.edi);
2633 v->arch.hvm_vcpu.mmio_op.inst_decoder_regs = &regs;
2635 //printk("PF1\n");
2636 if (!(error = svm_do_page_fault(va, &regs)))
2638 v->arch.hvm_svm.injecting_event = 1;
2639 /* Inject #PG using Interruption-Information Fields */
2640 svm_inject_exception(vmcb, TRAP_page_fault, 1, regs.error_code);
2642 v->arch.hvm_svm.cpu_cr2 = va;
2643 vmcb->cr2 = va;
2644 TRACE_3D(TRC_VMX_INT, v->domain->domain_id,
2645 VMEXIT_EXCEPTION_PF, va);
2647 break;
2650 case VMEXIT_EXCEPTION_DF:
2651 printk("Guest double fault");
2652 BUG();
2653 break;
2655 case VMEXIT_INTR:
2656 raise_softirq(SCHEDULE_SOFTIRQ);
2657 break;
2659 case VMEXIT_GDTR_WRITE:
2660 printk("WRITE to GDTR\n");
2661 break;
2663 case VMEXIT_TASK_SWITCH:
2664 __hvm_bug(&regs);
2665 break;
2667 case VMEXIT_CPUID:
2668 svm_vmexit_do_cpuid(vmcb, regs.eax, &regs);
2669 break;
2671 case VMEXIT_HLT:
2672 svm_vmexit_do_hlt(vmcb);
2673 break;
2675 case VMEXIT_INVLPG:
2676 svm_handle_invlpg(0, &regs);
2677 break;
2679 case VMEXIT_INVLPGA:
2680 svm_handle_invlpg(1, &regs);
2681 break;
2683 case VMEXIT_VMMCALL:
2684 svm_do_vmmcall(v, &regs);
2685 break;
2687 case VMEXIT_CR0_READ:
2688 svm_cr_access(v, 0, TYPE_MOV_FROM_CR, &regs);
2689 break;
2691 case VMEXIT_CR2_READ:
2692 svm_cr_access(v, 2, TYPE_MOV_FROM_CR, &regs);
2693 break;
2695 case VMEXIT_CR3_READ:
2696 svm_cr_access(v, 3, TYPE_MOV_FROM_CR, &regs);
2697 break;
2699 case VMEXIT_CR4_READ:
2700 svm_cr_access(v, 4, TYPE_MOV_FROM_CR, &regs);
2701 break;
2703 case VMEXIT_CR8_READ:
2704 svm_cr_access(v, 8, TYPE_MOV_FROM_CR, &regs);
2705 break;
2707 case VMEXIT_CR0_WRITE:
2708 svm_cr_access(v, 0, TYPE_MOV_TO_CR, &regs);
2709 break;
2711 case VMEXIT_CR2_WRITE:
2712 svm_cr_access(v, 2, TYPE_MOV_TO_CR, &regs);
2713 break;
2715 case VMEXIT_CR3_WRITE:
2716 svm_cr_access(v, 3, TYPE_MOV_TO_CR, &regs);
2717 local_flush_tlb();
2718 break;
2720 case VMEXIT_CR4_WRITE:
2721 svm_cr_access(v, 4, TYPE_MOV_TO_CR, &regs);
2722 break;
2724 case VMEXIT_CR8_WRITE:
2725 svm_cr_access(v, 8, TYPE_MOV_TO_CR, &regs);
2726 break;
2728 case VMEXIT_DR0_READ:
2729 svm_dr_access(v, 0, TYPE_MOV_FROM_DR, &regs);
2730 break;
2732 case VMEXIT_DR1_READ:
2733 svm_dr_access(v, 1, TYPE_MOV_FROM_DR, &regs);
2734 break;
2736 case VMEXIT_DR2_READ:
2737 svm_dr_access(v, 2, TYPE_MOV_FROM_DR, &regs);
2738 break;
2740 case VMEXIT_DR3_READ:
2741 svm_dr_access(v, 3, TYPE_MOV_FROM_DR, &regs);
2742 break;
2744 case VMEXIT_DR6_READ:
2745 svm_dr_access(v, 6, TYPE_MOV_FROM_DR, &regs);
2746 break;
2748 case VMEXIT_DR7_READ:
2749 svm_dr_access(v, 7, TYPE_MOV_FROM_DR, &regs);
2750 break;
2752 case VMEXIT_DR0_WRITE:
2753 svm_dr_access(v, 0, TYPE_MOV_TO_DR, &regs);
2754 break;
2756 case VMEXIT_DR1_WRITE:
2757 svm_dr_access(v, 1, TYPE_MOV_TO_DR, &regs);
2758 break;
2760 case VMEXIT_DR2_WRITE:
2761 svm_dr_access(v, 2, TYPE_MOV_TO_DR, &regs);
2762 break;
2764 case VMEXIT_DR3_WRITE:
2765 svm_dr_access(v, 3, TYPE_MOV_TO_DR, &regs);
2766 break;
2768 case VMEXIT_DR6_WRITE:
2769 svm_dr_access(v, 6, TYPE_MOV_TO_DR, &regs);
2770 break;
2772 case VMEXIT_DR7_WRITE:
2773 svm_dr_access(v, 7, TYPE_MOV_TO_DR, &regs);
2774 break;
2776 case VMEXIT_IOIO:
2777 svm_io_instruction(v, &regs);
2778 break;
2780 case VMEXIT_MSR:
2781 svm_do_msr_access(v, &regs);
2782 break;
2784 case VMEXIT_SHUTDOWN:
2785 printk("Guest shutdown exit\n");
2786 domain_crash_synchronous();
2787 break;
2789 default:
2790 printk("unexpected VMEXIT: exit reason = 0x%x, exitinfo1 = %llx, "
2791 "exitinfo2 = %llx\n", exit_reason,
2792 (unsigned long long)vmcb->exitinfo1,
2793 (unsigned long long)vmcb->exitinfo2);
2794 __hvm_bug(&regs); /* should not happen */
2795 break;
2798 #ifdef SVM_EXTRA_DEBUG
2799 if (do_debug)
2801 printk("%s: Done switch on vmexit_code\n", __func__);
2802 svm_dump_regs(__func__, &regs);
2805 if (do_debug)
2807 printk("vmexit_handler():- guest_table = 0x%08x, "
2808 "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
2809 (int)v->arch.guest_table.pfn,
2810 (int)v->arch.monitor_table.pfn,
2811 (int)v->arch.shadow_table.pfn);
2812 printk("svm_vmexit_handler: Returning\n");
2814 #endif
2816 return;
2819 asmlinkage void svm_load_cr2(void)
2821 struct vcpu *v = current;
2823 local_irq_disable();
2824 #ifdef __i386__
2825 asm volatile("movl %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
2826 #else
2827 asm volatile("movq %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
2828 #endif
2831 asmlinkage void svm_asid(void)
2833 struct vcpu *v = current;
2834 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2836 /*
2837 * if need to assign new asid, or if switching cores,
2838 * retire asid for the old core, and assign a new asid to the current core.
2839 */
2840 if ( test_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ) ||
2841 ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) {
2842 /* recycle asid */
2843 if ( !asidpool_assign_next( vmcb, 1,
2844 v->arch.hvm_svm.asid_core, v->arch.hvm_svm.launch_core )) {
2845 /* If we get here, we have a major problem */
2846 domain_crash_synchronous();
2849 v->arch.hvm_svm.asid_core = v->arch.hvm_svm.launch_core;
2850 clear_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags );
2854 /*
2855 * Local variables:
2856 * mode: C
2857 * c-set-style: "BSD"
2858 * c-basic-offset: 4
2859 * tab-width: 4
2860 * indent-tabs-mode: nil
2861 * End:
2862 */