direct-io.hg

view xen/arch/x86/hvm/vmx/vmx.c @ 8736:8aeb417387ca

Fix some more pfn/mfn/gmfn/gpfn inconsistencies. Fix some direct
uses of max_page variable to use the mfn_valid() predicate.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Feb 02 12:18:28 2006 +0100 (2006-02-02)
parents 0c94043f5c5b
children 0e7bdd973e17
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/domain_page.h>
28 #include <xen/hypercall.h>
29 #include <asm/current.h>
30 #include <asm/io.h>
31 #include <asm/shadow.h>
32 #include <asm/regs.h>
33 #include <asm/cpufeature.h>
34 #include <asm/processor.h>
35 #include <asm/types.h>
36 #include <asm/msr.h>
37 #include <asm/spinlock.h>
38 #include <asm/hvm/hvm.h>
39 #include <asm/hvm/support.h>
40 #include <asm/hvm/vmx/vmx.h>
41 #include <asm/hvm/vmx/vmcs.h>
42 #include <asm/shadow.h>
43 #if CONFIG_PAGING_LEVELS >= 3
44 #include <asm/shadow_64.h>
45 #endif
46 #include <public/sched.h>
47 #include <public/hvm/ioreq.h>
48 #include <asm/hvm/vpic.h>
49 #include <asm/hvm/vlapic.h>
52 #ifdef CONFIG_VMX
54 static unsigned long trace_values[NR_CPUS][4];
55 #define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
57 void vmx_final_setup_guest(struct vcpu *v)
58 {
59 v->arch.schedule_tail = arch_vmx_do_launch;
61 if ( v->vcpu_id == 0 )
62 {
63 struct domain *d = v->domain;
64 struct vcpu *vc;
66 /* Initialize monitor page table */
67 for_each_vcpu(d, vc)
68 vc->arch.monitor_table = mk_pagetable(0);
70 /*
71 * Required to do this once per domain
72 * XXX todo: add a seperate function to do these.
73 */
74 memset(&d->shared_info->evtchn_mask[0], 0xff,
75 sizeof(d->shared_info->evtchn_mask));
77 /* Put the domain in shadow mode even though we're going to be using
78 * the shared 1:1 page table initially. It shouldn't hurt */
79 shadow_mode_enable(d,
80 SHM_enable|SHM_refcounts|
81 SHM_translate|SHM_external|SHM_wr_pt_pte);
82 }
83 }
85 void vmx_relinquish_resources(struct vcpu *v)
86 {
87 struct hvm_virpit *vpit;
89 if (v->vcpu_id == 0) {
90 /* unmap IO shared page */
91 struct domain *d = v->domain;
92 if ( d->arch.hvm_domain.shared_page_va )
93 unmap_domain_page_global(
94 (void *)d->arch.hvm_domain.shared_page_va);
95 }
97 destroy_vmcs(&v->arch.hvm_vmx);
98 free_monitor_pagetable(v);
99 vpit = &v->domain->arch.hvm_domain.vpit;
100 kill_timer(&vpit->pit_timer);
101 kill_timer(&v->arch.hvm_vmx.hlt_timer);
102 if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) )
103 {
104 kill_timer(&VLAPIC(v)->vlapic_timer);
105 xfree(VLAPIC(v));
106 }
107 }
109 #ifdef __x86_64__
110 static struct vmx_msr_state percpu_msr[NR_CPUS];
112 static u32 msr_data_index[VMX_MSR_COUNT] =
113 {
114 MSR_LSTAR, MSR_STAR, MSR_CSTAR,
115 MSR_SYSCALL_MASK, MSR_EFER,
116 };
118 void vmx_save_segments(struct vcpu *v)
119 {
120 rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.msr_content.shadow_gs);
121 }
123 /*
124 * To avoid MSR save/restore at every VM exit/entry time, we restore
125 * the x86_64 specific MSRs at domain switch time. Since those MSRs are
126 * are not modified once set for generic domains, we don't save them,
127 * but simply reset them to the values set at percpu_traps_init().
128 */
129 void vmx_load_msrs(struct vcpu *n)
130 {
131 struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()];
132 int i;
134 if ( !hvm_switch_on )
135 return;
137 while ( host_state->flags )
138 {
139 i = find_first_set_bit(host_state->flags);
140 wrmsrl(msr_data_index[i], host_state->msr_items[i]);
141 clear_bit(i, &host_state->flags);
142 }
143 }
145 static void vmx_save_init_msrs(void)
146 {
147 struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()];
148 int i;
150 for ( i = 0; i < VMX_MSR_COUNT; i++ )
151 rdmsrl(msr_data_index[i], host_state->msr_items[i]);
152 }
154 #define CASE_READ_MSR(address) \
155 case MSR_ ## address: \
156 msr_content = msr->msr_items[VMX_INDEX_MSR_ ## address]; \
157 break
159 #define CASE_WRITE_MSR(address) \
160 case MSR_ ## address: \
161 { \
162 msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content; \
163 if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)) { \
164 set_bit(VMX_INDEX_MSR_ ## address, &msr->flags); \
165 } \
166 wrmsrl(MSR_ ## address, msr_content); \
167 set_bit(VMX_INDEX_MSR_ ## address, &host_state->flags); \
168 } \
169 break
171 #define IS_CANO_ADDRESS(add) 1
172 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
173 {
174 u64 msr_content = 0;
175 struct vcpu *vc = current;
176 struct vmx_msr_state * msr = &vc->arch.hvm_vmx.msr_content;
177 switch(regs->ecx){
178 case MSR_EFER:
179 msr_content = msr->msr_items[VMX_INDEX_MSR_EFER];
180 HVM_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %llx\n", (unsigned long long)msr_content);
181 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
182 &vc->arch.hvm_vmx.cpu_state))
183 msr_content |= 1 << _EFER_LME;
185 if (VMX_LONG_GUEST(vc))
186 msr_content |= 1 << _EFER_LMA;
187 break;
188 case MSR_FS_BASE:
189 if (!(VMX_LONG_GUEST(vc)))
190 /* XXX should it be GP fault */
191 domain_crash_synchronous();
192 __vmread(GUEST_FS_BASE, &msr_content);
193 break;
194 case MSR_GS_BASE:
195 if (!(VMX_LONG_GUEST(vc)))
196 domain_crash_synchronous();
197 __vmread(GUEST_GS_BASE, &msr_content);
198 break;
199 case MSR_SHADOW_GS_BASE:
200 msr_content = msr->shadow_gs;
201 break;
203 CASE_READ_MSR(STAR);
204 CASE_READ_MSR(LSTAR);
205 CASE_READ_MSR(CSTAR);
206 CASE_READ_MSR(SYSCALL_MASK);
207 default:
208 return 0;
209 }
210 HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %lx\n", msr_content);
211 regs->eax = msr_content & 0xffffffff;
212 regs->edx = msr_content >> 32;
213 return 1;
214 }
216 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
217 {
218 u64 msr_content = regs->eax | ((u64)regs->edx << 32);
219 struct vcpu *vc = current;
220 struct vmx_msr_state * msr = &vc->arch.hvm_vmx.msr_content;
221 struct vmx_msr_state * host_state =
222 &percpu_msr[smp_processor_id()];
224 HVM_DBG_LOG(DBG_LEVEL_1, " mode_do_msr_write msr %lx msr_content %lx\n",
225 regs->ecx, msr_content);
227 switch (regs->ecx){
228 case MSR_EFER:
229 if ((msr_content & EFER_LME) ^
230 test_bit(VMX_CPU_STATE_LME_ENABLED,
231 &vc->arch.hvm_vmx.cpu_state)){
232 if (test_bit(VMX_CPU_STATE_PG_ENABLED,
233 &vc->arch.hvm_vmx.cpu_state) ||
234 !test_bit(VMX_CPU_STATE_PAE_ENABLED,
235 &vc->arch.hvm_vmx.cpu_state)){
236 vmx_inject_exception(vc, TRAP_gp_fault, 0);
237 }
238 }
239 if (msr_content & EFER_LME)
240 set_bit(VMX_CPU_STATE_LME_ENABLED,
241 &vc->arch.hvm_vmx.cpu_state);
242 /* No update for LME/LMA since it have no effect */
243 msr->msr_items[VMX_INDEX_MSR_EFER] =
244 msr_content;
245 if (msr_content & ~(EFER_LME | EFER_LMA)){
246 msr->msr_items[VMX_INDEX_MSR_EFER] = msr_content;
247 if (!test_bit(VMX_INDEX_MSR_EFER, &msr->flags)){
248 rdmsrl(MSR_EFER,
249 host_state->msr_items[VMX_INDEX_MSR_EFER]);
250 set_bit(VMX_INDEX_MSR_EFER, &host_state->flags);
251 set_bit(VMX_INDEX_MSR_EFER, &msr->flags);
252 wrmsrl(MSR_EFER, msr_content);
253 }
254 }
255 break;
257 case MSR_FS_BASE:
258 case MSR_GS_BASE:
259 if (!(VMX_LONG_GUEST(vc)))
260 domain_crash_synchronous();
261 if (!IS_CANO_ADDRESS(msr_content)){
262 HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
263 vmx_inject_exception(vc, TRAP_gp_fault, 0);
264 }
265 if (regs->ecx == MSR_FS_BASE)
266 __vmwrite(GUEST_FS_BASE, msr_content);
267 else
268 __vmwrite(GUEST_GS_BASE, msr_content);
269 break;
271 case MSR_SHADOW_GS_BASE:
272 if (!(VMX_LONG_GUEST(vc)))
273 domain_crash_synchronous();
274 vc->arch.hvm_vmx.msr_content.shadow_gs = msr_content;
275 wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
276 break;
278 CASE_WRITE_MSR(STAR);
279 CASE_WRITE_MSR(LSTAR);
280 CASE_WRITE_MSR(CSTAR);
281 CASE_WRITE_MSR(SYSCALL_MASK);
282 default:
283 return 0;
284 }
285 return 1;
286 }
288 void
289 vmx_restore_msrs(struct vcpu *v)
290 {
291 int i = 0;
292 struct vmx_msr_state *guest_state;
293 struct vmx_msr_state *host_state;
294 unsigned long guest_flags ;
296 guest_state = &v->arch.hvm_vmx.msr_content;;
297 host_state = &percpu_msr[smp_processor_id()];
299 wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs);
300 guest_flags = guest_state->flags;
301 if (!guest_flags)
302 return;
304 while (guest_flags){
305 i = find_first_set_bit(guest_flags);
307 HVM_DBG_LOG(DBG_LEVEL_2,
308 "restore guest's index %d msr %lx with %lx\n",
309 i, (unsigned long) msr_data_index[i], (unsigned long) guest_state->msr_items[i]);
310 set_bit(i, &host_state->flags);
311 wrmsrl(msr_data_index[i], guest_state->msr_items[i]);
312 clear_bit(i, &guest_flags);
313 }
314 }
315 #else /* __i386__ */
316 #define vmx_save_init_msrs() ((void)0)
318 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs){
319 return 0;
320 }
321 static inline int long_mode_do_msr_write(struct cpu_user_regs *regs){
322 return 0;
323 }
324 #endif
326 void stop_vmx(void)
327 {
328 if (read_cr4() & X86_CR4_VMXE)
329 __vmxoff();
330 }
332 int vmx_initialize_guest_resources(struct vcpu *v)
333 {
334 vmx_final_setup_guest(v);
335 return 1;
336 }
338 int vmx_relinquish_guest_resources(struct vcpu *v)
339 {
340 vmx_relinquish_resources(v);
341 return 1;
342 }
344 void vmx_store_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
345 {
346 #if defined (__x86_64__)
347 __vmread(GUEST_RFLAGS, &regs->rflags);
348 __vmread(GUEST_SS_SELECTOR, &regs->ss);
349 __vmread(GUEST_CS_SELECTOR, &regs->cs);
350 __vmread(GUEST_DS_SELECTOR, &regs->ds);
351 __vmread(GUEST_ES_SELECTOR, &regs->es);
352 __vmread(GUEST_GS_SELECTOR, &regs->gs);
353 __vmread(GUEST_FS_SELECTOR, &regs->fs);
354 __vmread(GUEST_RIP, &regs->rip);
355 __vmread(GUEST_RSP, &regs->rsp);
356 #elif defined (__i386__)
357 __vmread(GUEST_RFLAGS, &regs->eflags);
358 __vmread(GUEST_SS_SELECTOR, &regs->ss);
359 __vmread(GUEST_CS_SELECTOR, &regs->cs);
360 __vmread(GUEST_DS_SELECTOR, &regs->ds);
361 __vmread(GUEST_ES_SELECTOR, &regs->es);
362 __vmread(GUEST_GS_SELECTOR, &regs->gs);
363 __vmread(GUEST_FS_SELECTOR, &regs->fs);
364 __vmread(GUEST_RIP, &regs->eip);
365 __vmread(GUEST_RSP, &regs->esp);
366 #else
367 #error Unsupported architecture
368 #endif
369 }
371 void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
372 {
373 #if defined (__x86_64__)
374 __vmwrite(GUEST_SS_SELECTOR, regs->ss);
375 __vmwrite(GUEST_RSP, regs->rsp);
377 __vmwrite(GUEST_RFLAGS, regs->rflags);
378 if (regs->rflags & EF_TF)
379 __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
380 else
381 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
383 __vmwrite(GUEST_CS_SELECTOR, regs->cs);
384 __vmwrite(GUEST_RIP, regs->rip);
385 #elif defined (__i386__)
386 __vmwrite(GUEST_SS_SELECTOR, regs->ss);
387 __vmwrite(GUEST_RSP, regs->esp);
389 __vmwrite(GUEST_RFLAGS, regs->eflags);
390 if (regs->eflags & EF_TF)
391 __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
392 else
393 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
395 __vmwrite(GUEST_CS_SELECTOR, regs->cs);
396 __vmwrite(GUEST_RIP, regs->eip);
397 #else
398 #error Unsupported architecture
399 #endif
400 }
402 void vmx_store_cpu_guest_ctrl_regs(struct vcpu *v, unsigned long crs[8])
403 {
404 __vmread(CR0_READ_SHADOW, &crs[0]);
405 __vmread(GUEST_CR3, &crs[3]);
406 __vmread(CR4_READ_SHADOW, &crs[4]);
407 }
409 void vmx_modify_guest_state(struct vcpu *v)
410 {
411 modify_vmcs(&v->arch.hvm_vmx, &v->arch.guest_context.user_regs);
412 }
414 int vmx_realmode(struct vcpu *v)
415 {
416 unsigned long rflags;
418 __vmread(GUEST_RFLAGS, &rflags);
419 return rflags & X86_EFLAGS_VM;
420 }
422 int vmx_instruction_length(struct vcpu *v)
423 {
424 unsigned long inst_len;
426 if (__vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len))
427 return 0;
428 return inst_len;
429 }
431 extern long evtchn_send(int lport);
432 void do_nmi(struct cpu_user_regs *);
434 static int check_vmx_controls(ctrls, msr)
435 {
436 u32 vmx_msr_low, vmx_msr_high;
438 rdmsr(msr, vmx_msr_low, vmx_msr_high);
439 if (ctrls < vmx_msr_low || ctrls > vmx_msr_high) {
440 printk("Insufficient VMX capability 0x%x, "
441 "msr=0x%x,low=0x%8x,high=0x%x\n",
442 ctrls, msr, vmx_msr_low, vmx_msr_high);
443 return 0;
444 }
445 return 1;
446 }
448 int start_vmx(void)
449 {
450 struct vmcs_struct *vmcs;
451 u32 ecx;
452 u32 eax, edx;
453 u64 phys_vmcs; /* debugging */
455 /*
456 * Xen does not fill x86_capability words except 0.
457 */
458 ecx = cpuid_ecx(1);
459 boot_cpu_data.x86_capability[4] = ecx;
461 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
462 return 0;
464 rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
466 if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) {
467 if ((eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0) {
468 printk("VMX disabled by Feature Control MSR.\n");
469 return 0;
470 }
471 }
472 else {
473 wrmsr(IA32_FEATURE_CONTROL_MSR,
474 IA32_FEATURE_CONTROL_MSR_LOCK |
475 IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
476 }
478 if (!check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS,
479 MSR_IA32_VMX_PINBASED_CTLS_MSR))
480 return 0;
481 if (!check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS,
482 MSR_IA32_VMX_PROCBASED_CTLS_MSR))
483 return 0;
484 if (!check_vmx_controls(MONITOR_VM_EXIT_CONTROLS,
485 MSR_IA32_VMX_EXIT_CTLS_MSR))
486 return 0;
487 if (!check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS,
488 MSR_IA32_VMX_ENTRY_CTLS_MSR))
489 return 0;
491 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
493 if (!(vmcs = alloc_vmcs())) {
494 printk("Failed to allocate VMCS\n");
495 return 0;
496 }
498 phys_vmcs = (u64) virt_to_maddr(vmcs);
500 if (!(__vmxon(phys_vmcs))) {
501 printk("VMXON is done\n");
502 }
504 vmx_save_init_msrs();
506 /* Setup HVM interfaces */
507 hvm_funcs.disable = stop_vmx;
509 hvm_funcs.initialize_guest_resources = vmx_initialize_guest_resources;
510 hvm_funcs.relinquish_guest_resources = vmx_relinquish_guest_resources;
512 hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs;
513 hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs;
515 #ifdef __x86_64__
516 hvm_funcs.save_segments = vmx_save_segments;
517 hvm_funcs.load_msrs = vmx_load_msrs;
518 hvm_funcs.restore_msrs = vmx_restore_msrs;
519 #endif
521 hvm_funcs.store_cpu_guest_ctrl_regs = vmx_store_cpu_guest_ctrl_regs;
522 hvm_funcs.modify_guest_state = vmx_modify_guest_state;
524 hvm_funcs.realmode = vmx_realmode;
525 hvm_funcs.paging_enabled = vmx_paging_enabled;
526 hvm_funcs.instruction_length = vmx_instruction_length;
528 hvm_enabled = 1;
530 return 1;
531 }
533 /*
534 * Not all cases receive valid value in the VM-exit instruction length field.
535 */
536 #define __get_instruction_length(len) \
537 __vmread(VM_EXIT_INSTRUCTION_LEN, &(len)); \
538 if ((len) < 1 || (len) > 15) \
539 __hvm_bug(&regs);
541 static void inline __update_guest_eip(unsigned long inst_len)
542 {
543 unsigned long current_eip;
545 __vmread(GUEST_RIP, &current_eip);
546 __vmwrite(GUEST_RIP, current_eip + inst_len);
547 }
550 static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
551 {
552 unsigned long gpa; /* FIXME: PAE */
553 int result;
555 #if 0 /* keep for debugging */
556 {
557 unsigned long eip;
559 __vmread(GUEST_RIP, &eip);
560 HVM_DBG_LOG(DBG_LEVEL_VMMU,
561 "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
562 va, eip, (unsigned long)regs->error_code);
563 }
564 #endif
566 if (!vmx_paging_enabled(current)){
567 handle_mmio(va, va);
568 TRACE_VMEXIT (2,2);
569 return 1;
570 }
571 gpa = gva_to_gpa(va);
573 /* Use 1:1 page table to identify MMIO address space */
574 if ( mmio_space(gpa) ){
575 struct vcpu *v = current;
576 /* No support for APIC */
577 if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000) {
578 u32 inst_len;
579 __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len));
580 __update_guest_eip(inst_len);
581 return 1;
582 }
583 TRACE_VMEXIT (2,2);
584 handle_mmio(va, gpa);
585 return 1;
586 }
588 result = shadow_fault(va, regs);
589 TRACE_VMEXIT (2,result);
590 #if 0
591 if ( !result )
592 {
593 __vmread(GUEST_RIP, &eip);
594 printk("vmx pgfault to guest va=%lx eip=%lx\n", va, eip);
595 }
596 #endif
598 return result;
599 }
601 static void vmx_do_no_device_fault(void)
602 {
603 unsigned long cr0;
604 struct vcpu *v = current;
606 clts();
607 setup_fpu(current);
608 __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
609 if (!(cr0 & X86_CR0_TS)) {
610 __vmread_vcpu(v, GUEST_CR0, &cr0);
611 cr0 &= ~X86_CR0_TS;
612 __vmwrite(GUEST_CR0, cr0);
613 }
614 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
615 }
617 /* Reserved bits: [31:15], [12:11], [9], [6], [2:1] */
618 #define VMX_VCPU_CPUID_L1_RESERVED 0xffff9a46
620 static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs *regs)
621 {
622 unsigned int eax, ebx, ecx, edx;
623 unsigned long eip;
624 struct vcpu *v = current;
626 __vmread(GUEST_RIP, &eip);
628 HVM_DBG_LOG(DBG_LEVEL_1,
629 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
630 " (esi) %lx, (edi) %lx",
631 (unsigned long)regs->eax, (unsigned long)regs->ebx,
632 (unsigned long)regs->ecx, (unsigned long)regs->edx,
633 (unsigned long)regs->esi, (unsigned long)regs->edi);
635 cpuid(input, &eax, &ebx, &ecx, &edx);
637 if (input == 1)
638 {
639 if ( hvm_apic_support(v->domain) &&
640 !vlapic_global_enabled((VLAPIC(v))) )
641 clear_bit(X86_FEATURE_APIC, &edx);
643 #ifdef __x86_64__
644 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
645 #endif
646 {
647 clear_bit(X86_FEATURE_PSE, &edx);
648 clear_bit(X86_FEATURE_PAE, &edx);
649 clear_bit(X86_FEATURE_PSE36, &edx);
650 }
652 /* Unsupportable for virtualised CPUs. */
653 ecx &= ~VMX_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */
654 clear_bit(X86_FEATURE_VMXE & 31, &ecx);
655 clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
656 }
657 #ifdef __i386__
658 else if ( input == 0x80000001 )
659 {
660 /* Mask feature for Intel ia32e or AMD long mode. */
661 clear_bit(X86_FEATURE_LM & 31, &edx);
662 }
663 #endif
665 regs->eax = (unsigned long) eax;
666 regs->ebx = (unsigned long) ebx;
667 regs->ecx = (unsigned long) ecx;
668 regs->edx = (unsigned long) edx;
670 HVM_DBG_LOG(DBG_LEVEL_1,
671 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x",
672 eip, input, eax, ebx, ecx, edx);
674 }
676 #define CASE_GET_REG_P(REG, reg) \
677 case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
679 static void vmx_dr_access (unsigned long exit_qualification, struct cpu_user_regs *regs)
680 {
681 unsigned int reg;
682 unsigned long *reg_p = 0;
683 struct vcpu *v = current;
684 unsigned long eip;
686 __vmread(GUEST_RIP, &eip);
688 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
690 HVM_DBG_LOG(DBG_LEVEL_1,
691 "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx",
692 eip, reg, exit_qualification);
694 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
695 CASE_GET_REG_P(EAX, eax);
696 CASE_GET_REG_P(ECX, ecx);
697 CASE_GET_REG_P(EDX, edx);
698 CASE_GET_REG_P(EBX, ebx);
699 CASE_GET_REG_P(EBP, ebp);
700 CASE_GET_REG_P(ESI, esi);
701 CASE_GET_REG_P(EDI, edi);
702 case REG_ESP:
703 break;
704 default:
705 __hvm_bug(regs);
706 }
708 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
709 case TYPE_MOV_TO_DR:
710 /* don't need to check the range */
711 if (reg != REG_ESP)
712 v->arch.guest_context.debugreg[reg] = *reg_p;
713 else {
714 unsigned long value;
715 __vmread(GUEST_RSP, &value);
716 v->arch.guest_context.debugreg[reg] = value;
717 }
718 break;
719 case TYPE_MOV_FROM_DR:
720 if (reg != REG_ESP)
721 *reg_p = v->arch.guest_context.debugreg[reg];
722 else {
723 __vmwrite(GUEST_RSP, v->arch.guest_context.debugreg[reg]);
724 }
725 break;
726 }
727 }
729 /*
730 * Invalidate the TLB for va. Invalidate the shadow page corresponding
731 * the address va.
732 */
733 static void vmx_vmexit_do_invlpg(unsigned long va)
734 {
735 unsigned long eip;
736 struct vcpu *v = current;
738 __vmread(GUEST_RIP, &eip);
740 HVM_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg: eip=%lx, va=%lx",
741 eip, va);
743 /*
744 * We do the safest things first, then try to update the shadow
745 * copying from guest
746 */
747 shadow_invlpg(v, va);
748 }
750 static int check_for_null_selector(unsigned long eip)
751 {
752 unsigned char inst[MAX_INST_LEN];
753 unsigned long sel;
754 int i, inst_len;
755 int inst_copy_from_guest(unsigned char *, unsigned long, int);
757 __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
758 memset(inst, 0, MAX_INST_LEN);
759 if (inst_copy_from_guest(inst, eip, inst_len) != inst_len) {
760 printf("check_for_null_selector: get guest instruction failed\n");
761 domain_crash_synchronous();
762 }
764 for (i = 0; i < inst_len; i++) {
765 switch (inst[i]) {
766 case 0xf3: /* REPZ */
767 case 0xf2: /* REPNZ */
768 case 0xf0: /* LOCK */
769 case 0x66: /* data32 */
770 case 0x67: /* addr32 */
771 continue;
772 case 0x2e: /* CS */
773 __vmread(GUEST_CS_SELECTOR, &sel);
774 break;
775 case 0x36: /* SS */
776 __vmread(GUEST_SS_SELECTOR, &sel);
777 break;
778 case 0x26: /* ES */
779 __vmread(GUEST_ES_SELECTOR, &sel);
780 break;
781 case 0x64: /* FS */
782 __vmread(GUEST_FS_SELECTOR, &sel);
783 break;
784 case 0x65: /* GS */
785 __vmread(GUEST_GS_SELECTOR, &sel);
786 break;
787 case 0x3e: /* DS */
788 /* FALLTHROUGH */
789 default:
790 /* DS is the default */
791 __vmread(GUEST_DS_SELECTOR, &sel);
792 }
793 return sel == 0 ? 1 : 0;
794 }
796 return 0;
797 }
799 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
800 unsigned long count, int size, long value,
801 int dir, int pvalid);
803 static void vmx_io_instruction(struct cpu_user_regs *regs,
804 unsigned long exit_qualification, unsigned long inst_len)
805 {
806 struct mmio_op *mmio_opp;
807 unsigned long eip, cs, eflags;
808 unsigned long port, size, dir;
809 int vm86;
811 mmio_opp = &current->arch.hvm_vcpu.mmio_op;
812 mmio_opp->instr = INSTR_PIO;
813 mmio_opp->flags = 0;
815 __vmread(GUEST_RIP, &eip);
816 __vmread(GUEST_CS_SELECTOR, &cs);
817 __vmread(GUEST_RFLAGS, &eflags);
818 vm86 = eflags & X86_EFLAGS_VM ? 1 : 0;
820 HVM_DBG_LOG(DBG_LEVEL_1,
821 "vmx_io_instruction: vm86 %d, eip=%lx:%lx, "
822 "exit_qualification = %lx",
823 vm86, cs, eip, exit_qualification);
825 if (test_bit(6, &exit_qualification))
826 port = (exit_qualification >> 16) & 0xFFFF;
827 else
828 port = regs->edx & 0xffff;
829 TRACE_VMEXIT(2, port);
830 size = (exit_qualification & 7) + 1;
831 dir = test_bit(3, &exit_qualification); /* direction */
833 if (test_bit(4, &exit_qualification)) { /* string instruction */
834 unsigned long addr, count = 1;
835 int sign = regs->eflags & EF_DF ? -1 : 1;
837 __vmread(GUEST_LINEAR_ADDRESS, &addr);
839 /*
840 * In protected mode, guest linear address is invalid if the
841 * selector is null.
842 */
843 if (!vm86 && check_for_null_selector(eip))
844 addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
846 if (test_bit(5, &exit_qualification)) { /* "rep" prefix */
847 mmio_opp->flags |= REPZ;
848 count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
849 }
851 /*
852 * Handle string pio instructions that cross pages or that
853 * are unaligned. See the comments in hvm_domain.c/handle_mmio()
854 */
855 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
856 unsigned long value = 0;
858 mmio_opp->flags |= OVERLAP;
859 if (dir == IOREQ_WRITE)
860 hvm_copy(&value, addr, size, HVM_COPY_IN);
861 send_pio_req(regs, port, 1, size, value, dir, 0);
862 } else {
863 if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) {
864 if (sign > 0)
865 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
866 else
867 count = (addr & ~PAGE_MASK) / size;
868 } else
869 __update_guest_eip(inst_len);
871 send_pio_req(regs, port, count, size, addr, dir, 1);
872 }
873 } else {
874 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
875 hvm_print_line(current, regs->eax); /* guest debug output */
877 __update_guest_eip(inst_len);
878 send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
879 }
880 }
882 int
883 vmx_world_save(struct vcpu *v, struct vmx_assist_context *c)
884 {
885 unsigned long inst_len;
886 int error = 0;
888 error |= __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
889 error |= __vmread(GUEST_RIP, &c->eip);
890 c->eip += inst_len; /* skip transition instruction */
891 error |= __vmread(GUEST_RSP, &c->esp);
892 error |= __vmread(GUEST_RFLAGS, &c->eflags);
894 error |= __vmread(CR0_READ_SHADOW, &c->cr0);
895 c->cr3 = v->arch.hvm_vmx.cpu_cr3;
896 error |= __vmread(CR4_READ_SHADOW, &c->cr4);
898 error |= __vmread(GUEST_IDTR_LIMIT, &c->idtr_limit);
899 error |= __vmread(GUEST_IDTR_BASE, &c->idtr_base);
901 error |= __vmread(GUEST_GDTR_LIMIT, &c->gdtr_limit);
902 error |= __vmread(GUEST_GDTR_BASE, &c->gdtr_base);
904 error |= __vmread(GUEST_CS_SELECTOR, &c->cs_sel);
905 error |= __vmread(GUEST_CS_LIMIT, &c->cs_limit);
906 error |= __vmread(GUEST_CS_BASE, &c->cs_base);
907 error |= __vmread(GUEST_CS_AR_BYTES, &c->cs_arbytes.bytes);
909 error |= __vmread(GUEST_DS_SELECTOR, &c->ds_sel);
910 error |= __vmread(GUEST_DS_LIMIT, &c->ds_limit);
911 error |= __vmread(GUEST_DS_BASE, &c->ds_base);
912 error |= __vmread(GUEST_DS_AR_BYTES, &c->ds_arbytes.bytes);
914 error |= __vmread(GUEST_ES_SELECTOR, &c->es_sel);
915 error |= __vmread(GUEST_ES_LIMIT, &c->es_limit);
916 error |= __vmread(GUEST_ES_BASE, &c->es_base);
917 error |= __vmread(GUEST_ES_AR_BYTES, &c->es_arbytes.bytes);
919 error |= __vmread(GUEST_SS_SELECTOR, &c->ss_sel);
920 error |= __vmread(GUEST_SS_LIMIT, &c->ss_limit);
921 error |= __vmread(GUEST_SS_BASE, &c->ss_base);
922 error |= __vmread(GUEST_SS_AR_BYTES, &c->ss_arbytes.bytes);
924 error |= __vmread(GUEST_FS_SELECTOR, &c->fs_sel);
925 error |= __vmread(GUEST_FS_LIMIT, &c->fs_limit);
926 error |= __vmread(GUEST_FS_BASE, &c->fs_base);
927 error |= __vmread(GUEST_FS_AR_BYTES, &c->fs_arbytes.bytes);
929 error |= __vmread(GUEST_GS_SELECTOR, &c->gs_sel);
930 error |= __vmread(GUEST_GS_LIMIT, &c->gs_limit);
931 error |= __vmread(GUEST_GS_BASE, &c->gs_base);
932 error |= __vmread(GUEST_GS_AR_BYTES, &c->gs_arbytes.bytes);
934 error |= __vmread(GUEST_TR_SELECTOR, &c->tr_sel);
935 error |= __vmread(GUEST_TR_LIMIT, &c->tr_limit);
936 error |= __vmread(GUEST_TR_BASE, &c->tr_base);
937 error |= __vmread(GUEST_TR_AR_BYTES, &c->tr_arbytes.bytes);
939 error |= __vmread(GUEST_LDTR_SELECTOR, &c->ldtr_sel);
940 error |= __vmread(GUEST_LDTR_LIMIT, &c->ldtr_limit);
941 error |= __vmread(GUEST_LDTR_BASE, &c->ldtr_base);
942 error |= __vmread(GUEST_LDTR_AR_BYTES, &c->ldtr_arbytes.bytes);
944 return !error;
945 }
947 int
948 vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
949 {
950 unsigned long mfn, old_cr4, old_base_mfn;
951 int error = 0;
953 error |= __vmwrite(GUEST_RIP, c->eip);
954 error |= __vmwrite(GUEST_RSP, c->esp);
955 error |= __vmwrite(GUEST_RFLAGS, c->eflags);
957 error |= __vmwrite(CR0_READ_SHADOW, c->cr0);
959 if (!vmx_paging_enabled(v)) {
960 HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
961 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
962 goto skip_cr3;
963 }
965 if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
966 /*
967 * This is simple TLB flush, implying the guest has
968 * removed some translation or changed page attributes.
969 * We simply invalidate the shadow.
970 */
971 mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
972 if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
973 printk("Invalid CR3 value=%x", c->cr3);
974 domain_crash_synchronous();
975 return 0;
976 }
977 shadow_sync_all(v->domain);
978 } else {
979 /*
980 * If different, make a shadow. Check if the PDBR is valid
981 * first.
982 */
983 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3);
984 if ((c->cr3 >> PAGE_SHIFT) > v->domain->max_pages) {
985 printk("Invalid CR3 value=%x", c->cr3);
986 domain_crash_synchronous();
987 return 0;
988 }
989 mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
990 if(!get_page(mfn_to_page(mfn), v->domain))
991 return 0;
992 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
993 v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
994 if (old_base_mfn)
995 put_page(mfn_to_page(old_base_mfn));
996 update_pagetables(v);
997 /*
998 * arch.shadow_table should now hold the next CR3 for shadow
999 */
1000 v->arch.hvm_vmx.cpu_cr3 = c->cr3;
1001 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
1002 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1005 skip_cr3:
1007 error |= __vmread(CR4_READ_SHADOW, &old_cr4);
1008 error |= __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
1009 error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
1011 error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
1012 error |= __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
1014 error |= __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
1015 error |= __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
1017 error |= __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
1018 error |= __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
1019 error |= __vmwrite(GUEST_CS_BASE, c->cs_base);
1020 error |= __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes.bytes);
1022 error |= __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
1023 error |= __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
1024 error |= __vmwrite(GUEST_DS_BASE, c->ds_base);
1025 error |= __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes.bytes);
1027 error |= __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
1028 error |= __vmwrite(GUEST_ES_LIMIT, c->es_limit);
1029 error |= __vmwrite(GUEST_ES_BASE, c->es_base);
1030 error |= __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes.bytes);
1032 error |= __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
1033 error |= __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
1034 error |= __vmwrite(GUEST_SS_BASE, c->ss_base);
1035 error |= __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes.bytes);
1037 error |= __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
1038 error |= __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
1039 error |= __vmwrite(GUEST_FS_BASE, c->fs_base);
1040 error |= __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes.bytes);
1042 error |= __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
1043 error |= __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
1044 error |= __vmwrite(GUEST_GS_BASE, c->gs_base);
1045 error |= __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes.bytes);
1047 error |= __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
1048 error |= __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
1049 error |= __vmwrite(GUEST_TR_BASE, c->tr_base);
1050 error |= __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes.bytes);
1052 error |= __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
1053 error |= __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
1054 error |= __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
1055 error |= __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes);
1057 return !error;
1060 enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE };
1062 int
1063 vmx_assist(struct vcpu *v, int mode)
1065 struct vmx_assist_context c;
1066 u32 magic;
1067 u32 cp;
1069 /* make sure vmxassist exists (this is not an error) */
1070 if (!hvm_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), HVM_COPY_IN))
1071 return 0;
1072 if (magic != VMXASSIST_MAGIC)
1073 return 0;
1075 switch (mode) {
1076 /*
1077 * Transfer control to vmxassist.
1078 * Store the current context in VMXASSIST_OLD_CONTEXT and load
1079 * the new VMXASSIST_NEW_CONTEXT context. This context was created
1080 * by vmxassist and will transfer control to it.
1081 */
1082 case VMX_ASSIST_INVOKE:
1083 /* save the old context */
1084 if (!hvm_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), HVM_COPY_IN))
1085 goto error;
1086 if (cp != 0) {
1087 if (!vmx_world_save(v, &c))
1088 goto error;
1089 if (!hvm_copy(&c, cp, sizeof(c), HVM_COPY_OUT))
1090 goto error;
1093 /* restore the new context, this should activate vmxassist */
1094 if (!hvm_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), HVM_COPY_IN))
1095 goto error;
1096 if (cp != 0) {
1097 if (!hvm_copy(&c, cp, sizeof(c), HVM_COPY_IN))
1098 goto error;
1099 if (!vmx_world_restore(v, &c))
1100 goto error;
1101 return 1;
1103 break;
1105 /*
1106 * Restore the VMXASSIST_OLD_CONTEXT that was saved by VMX_ASSIST_INVOKE
1107 * above.
1108 */
1109 case VMX_ASSIST_RESTORE:
1110 /* save the old context */
1111 if (!hvm_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), HVM_COPY_IN))
1112 goto error;
1113 if (cp != 0) {
1114 if (!hvm_copy(&c, cp, sizeof(c), HVM_COPY_IN))
1115 goto error;
1116 if (!vmx_world_restore(v, &c))
1117 goto error;
1118 return 1;
1120 break;
1123 error:
1124 printf("Failed to transfer to vmxassist\n");
1125 domain_crash_synchronous();
1126 return 0;
1129 static int vmx_set_cr0(unsigned long value)
1131 struct vcpu *v = current;
1132 unsigned long mfn;
1133 unsigned long eip;
1134 int paging_enabled;
1135 unsigned long vm_entry_value;
1136 unsigned long old_cr0;
1138 /*
1139 * CR0: We don't want to lose PE and PG.
1140 */
1141 __vmread_vcpu(v, CR0_READ_SHADOW, &old_cr0);
1142 paging_enabled = (old_cr0 & X86_CR0_PE) && (old_cr0 & X86_CR0_PG);
1143 /* If OS don't use clts to clear TS bit...*/
1144 if((old_cr0 & X86_CR0_TS) && !(value & X86_CR0_TS))
1146 clts();
1147 setup_fpu(v);
1151 __vmwrite(GUEST_CR0, value | X86_CR0_PE | X86_CR0_PG | X86_CR0_NE);
1152 __vmwrite(CR0_READ_SHADOW, value);
1154 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
1156 if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) {
1157 /*
1158 * The guest CR3 must be pointing to the guest physical.
1159 */
1160 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
1161 v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
1162 !get_page(mfn_to_page(mfn), v->domain) )
1164 printk("Invalid CR3 value = %lx", v->arch.hvm_vmx.cpu_cr3);
1165 domain_crash_synchronous(); /* need to take a clean path */
1168 #if defined(__x86_64__)
1169 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
1170 &v->arch.hvm_vmx.cpu_state) &&
1171 !test_bit(VMX_CPU_STATE_PAE_ENABLED,
1172 &v->arch.hvm_vmx.cpu_state)){
1173 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
1174 vmx_inject_exception(v, TRAP_gp_fault, 0);
1176 if (test_bit(VMX_CPU_STATE_LME_ENABLED,
1177 &v->arch.hvm_vmx.cpu_state)){
1178 /* Here the PAE is should to be opened */
1179 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
1180 set_bit(VMX_CPU_STATE_LMA_ENABLED,
1181 &v->arch.hvm_vmx.cpu_state);
1182 __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
1183 vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE;
1184 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
1186 #if CONFIG_PAGING_LEVELS >= 4
1187 if(!shadow_set_guest_paging_levels(v->domain, 4)) {
1188 printk("Unsupported guest paging levels\n");
1189 domain_crash_synchronous(); /* need to take a clean path */
1191 #endif
1193 else
1195 #if CONFIG_PAGING_LEVELS >= 4
1196 if(!shadow_set_guest_paging_levels(v->domain, 2)) {
1197 printk("Unsupported guest paging levels\n");
1198 domain_crash_synchronous(); /* need to take a clean path */
1200 #endif
1204 unsigned long crn;
1205 /* update CR4's PAE if needed */
1206 __vmread(GUEST_CR4, &crn);
1207 if ( (!(crn & X86_CR4_PAE)) &&
1208 test_bit(VMX_CPU_STATE_PAE_ENABLED,
1209 &v->arch.hvm_vmx.cpu_state) )
1211 HVM_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n");
1212 __vmwrite(GUEST_CR4, crn | X86_CR4_PAE);
1215 #endif
1216 /*
1217 * Now arch.guest_table points to machine physical.
1218 */
1219 v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
1220 update_pagetables(v);
1222 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
1223 (unsigned long) (mfn << PAGE_SHIFT));
1225 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1226 /*
1227 * arch->shadow_table should hold the next CR3 for shadow
1228 */
1229 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
1230 v->arch.hvm_vmx.cpu_cr3, mfn);
1233 if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled)
1234 if(v->arch.hvm_vmx.cpu_cr3) {
1235 put_page(mfn_to_page(get_mfn_from_gpfn(
1236 v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)));
1237 v->arch.guest_table = mk_pagetable(0);
1240 /*
1241 * VMX does not implement real-mode virtualization. We emulate
1242 * real-mode by performing a world switch to VMXAssist whenever
1243 * a partition disables the CR0.PE bit.
1244 */
1245 if ((value & X86_CR0_PE) == 0) {
1246 if ( value & X86_CR0_PG ) {
1247 /* inject GP here */
1248 vmx_inject_exception(v, TRAP_gp_fault, 0);
1249 return 0;
1250 } else {
1251 /*
1252 * Disable paging here.
1253 * Same to PE == 1 && PG == 0
1254 */
1255 if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
1256 &v->arch.hvm_vmx.cpu_state)){
1257 clear_bit(VMX_CPU_STATE_LMA_ENABLED,
1258 &v->arch.hvm_vmx.cpu_state);
1259 __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
1260 vm_entry_value &= ~VM_ENTRY_CONTROLS_IA32E_MODE;
1261 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
1265 clear_all_shadow_status(v->domain);
1266 if (vmx_assist(v, VMX_ASSIST_INVOKE)) {
1267 set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.hvm_vmx.cpu_state);
1268 __vmread(GUEST_RIP, &eip);
1269 HVM_DBG_LOG(DBG_LEVEL_1,
1270 "Transfering control to vmxassist %%eip 0x%lx\n", eip);
1271 return 0; /* do not update eip! */
1273 } else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
1274 &v->arch.hvm_vmx.cpu_state)) {
1275 __vmread(GUEST_RIP, &eip);
1276 HVM_DBG_LOG(DBG_LEVEL_1,
1277 "Enabling CR0.PE at %%eip 0x%lx\n", eip);
1278 if (vmx_assist(v, VMX_ASSIST_RESTORE)) {
1279 clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
1280 &v->arch.hvm_vmx.cpu_state);
1281 __vmread(GUEST_RIP, &eip);
1282 HVM_DBG_LOG(DBG_LEVEL_1,
1283 "Restoring to %%eip 0x%lx\n", eip);
1284 return 0; /* do not update eip! */
1288 return 1;
1291 #define CASE_GET_REG(REG, reg) \
1292 case REG_ ## REG: value = regs->reg; break
1294 #define CASE_EXTEND_SET_REG \
1295 CASE_EXTEND_REG(S)
1296 #define CASE_EXTEND_GET_REG \
1297 CASE_EXTEND_REG(G)
1299 #ifdef __i386__
1300 #define CASE_EXTEND_REG(T)
1301 #else
1302 #define CASE_EXTEND_REG(T) \
1303 CASE_ ## T ## ET_REG(R8, r8); \
1304 CASE_ ## T ## ET_REG(R9, r9); \
1305 CASE_ ## T ## ET_REG(R10, r10); \
1306 CASE_ ## T ## ET_REG(R11, r11); \
1307 CASE_ ## T ## ET_REG(R12, r12); \
1308 CASE_ ## T ## ET_REG(R13, r13); \
1309 CASE_ ## T ## ET_REG(R14, r14); \
1310 CASE_ ## T ## ET_REG(R15, r15);
1311 #endif
1314 /*
1315 * Write to control registers
1316 */
1317 static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
1319 unsigned long value;
1320 unsigned long old_cr;
1321 struct vcpu *v = current;
1323 switch (gp) {
1324 CASE_GET_REG(EAX, eax);
1325 CASE_GET_REG(ECX, ecx);
1326 CASE_GET_REG(EDX, edx);
1327 CASE_GET_REG(EBX, ebx);
1328 CASE_GET_REG(EBP, ebp);
1329 CASE_GET_REG(ESI, esi);
1330 CASE_GET_REG(EDI, edi);
1331 CASE_EXTEND_GET_REG
1332 case REG_ESP:
1333 __vmread(GUEST_RSP, &value);
1334 break;
1335 default:
1336 printk("invalid gp: %d\n", gp);
1337 __hvm_bug(regs);
1340 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
1341 HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
1343 switch(cr) {
1344 case 0:
1346 return vmx_set_cr0(value);
1348 case 3:
1350 unsigned long old_base_mfn, mfn;
1352 /*
1353 * If paging is not enabled yet, simply copy the value to CR3.
1354 */
1355 if (!vmx_paging_enabled(v)) {
1356 v->arch.hvm_vmx.cpu_cr3 = value;
1357 break;
1360 /*
1361 * We make a new one if the shadow does not exist.
1362 */
1363 if (value == v->arch.hvm_vmx.cpu_cr3) {
1364 /*
1365 * This is simple TLB flush, implying the guest has
1366 * removed some translation or changed page attributes.
1367 * We simply invalidate the shadow.
1368 */
1369 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
1370 if (mfn != pagetable_get_pfn(v->arch.guest_table))
1371 __hvm_bug(regs);
1372 shadow_sync_all(v->domain);
1373 } else {
1374 /*
1375 * If different, make a shadow. Check if the PDBR is valid
1376 * first.
1377 */
1378 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
1379 if ( ((value >> PAGE_SHIFT) > v->domain->max_pages ) ||
1380 !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT)) ||
1381 !get_page(mfn_to_page(mfn), v->domain) )
1383 printk("Invalid CR3 value=%lx", value);
1384 domain_crash_synchronous(); /* need to take a clean path */
1386 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1387 v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
1388 if (old_base_mfn)
1389 put_page(mfn_to_page(old_base_mfn));
1390 update_pagetables(v);
1391 /*
1392 * arch.shadow_table should now hold the next CR3 for shadow
1393 */
1394 v->arch.hvm_vmx.cpu_cr3 = value;
1395 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
1396 value);
1397 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
1399 break;
1401 case 4: /* CR4 */
1403 if (value & X86_CR4_PAE){
1404 set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
1405 } else {
1406 if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
1407 &v->arch.hvm_vmx.cpu_state)){
1408 vmx_inject_exception(v, TRAP_gp_fault, 0);
1410 clear_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
1413 __vmread(CR4_READ_SHADOW, &old_cr);
1415 __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
1416 __vmwrite(CR4_READ_SHADOW, value);
1418 /*
1419 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
1420 * all TLB entries except global entries.
1421 */
1422 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
1423 shadow_sync_all(v->domain);
1425 break;
1427 default:
1428 printk("invalid cr: %d\n", gp);
1429 __hvm_bug(regs);
1432 return 1;
1435 #define CASE_SET_REG(REG, reg) \
1436 case REG_ ## REG: \
1437 regs->reg = value; \
1438 break
1440 /*
1441 * Read from control registers. CR0 and CR4 are read from the shadow.
1442 */
1443 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1445 unsigned long value;
1446 struct vcpu *v = current;
1448 if (cr != 3)
1449 __hvm_bug(regs);
1451 value = (unsigned long) v->arch.hvm_vmx.cpu_cr3;
1453 switch (gp) {
1454 CASE_SET_REG(EAX, eax);
1455 CASE_SET_REG(ECX, ecx);
1456 CASE_SET_REG(EDX, edx);
1457 CASE_SET_REG(EBX, ebx);
1458 CASE_SET_REG(EBP, ebp);
1459 CASE_SET_REG(ESI, esi);
1460 CASE_SET_REG(EDI, edi);
1461 CASE_EXTEND_SET_REG
1462 case REG_ESP:
1463 __vmwrite(GUEST_RSP, value);
1464 regs->esp = value;
1465 break;
1466 default:
1467 printk("invalid gp: %d\n", gp);
1468 __hvm_bug(regs);
1471 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
1474 static int vmx_cr_access(unsigned long exit_qualification, struct cpu_user_regs *regs)
1476 unsigned int gp, cr;
1477 unsigned long value;
1478 struct vcpu *v = current;
1480 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
1481 case TYPE_MOV_TO_CR:
1482 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1483 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1484 TRACE_VMEXIT(1,TYPE_MOV_TO_CR);
1485 TRACE_VMEXIT(2,cr);
1486 TRACE_VMEXIT(3,gp);
1487 return mov_to_cr(gp, cr, regs);
1488 case TYPE_MOV_FROM_CR:
1489 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
1490 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
1491 TRACE_VMEXIT(1,TYPE_MOV_FROM_CR);
1492 TRACE_VMEXIT(2,cr);
1493 TRACE_VMEXIT(3,gp);
1494 mov_from_cr(cr, gp, regs);
1495 break;
1496 case TYPE_CLTS:
1497 TRACE_VMEXIT(1,TYPE_CLTS);
1498 clts();
1499 setup_fpu(current);
1501 __vmread_vcpu(v, GUEST_CR0, &value);
1502 value &= ~X86_CR0_TS; /* clear TS */
1503 __vmwrite(GUEST_CR0, value);
1505 __vmread_vcpu(v, CR0_READ_SHADOW, &value);
1506 value &= ~X86_CR0_TS; /* clear TS */
1507 __vmwrite(CR0_READ_SHADOW, value);
1508 break;
1509 case TYPE_LMSW:
1510 TRACE_VMEXIT(1,TYPE_LMSW);
1511 __vmread_vcpu(v, CR0_READ_SHADOW, &value);
1512 value = (value & ~0xF) |
1513 (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
1514 return vmx_set_cr0(value);
1515 break;
1516 default:
1517 __hvm_bug(regs);
1518 break;
1520 return 1;
1523 static inline void vmx_do_msr_read(struct cpu_user_regs *regs)
1525 u64 msr_content = 0;
1526 struct vcpu *v = current;
1528 HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
1529 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1530 (unsigned long)regs->edx);
1531 switch (regs->ecx) {
1532 case MSR_IA32_TIME_STAMP_COUNTER:
1534 struct hvm_virpit *vpit;
1536 rdtscll(msr_content);
1537 vpit = &(v->domain->arch.hvm_domain.vpit);
1538 msr_content += vpit->shift;
1539 break;
1541 case MSR_IA32_SYSENTER_CS:
1542 __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
1543 break;
1544 case MSR_IA32_SYSENTER_ESP:
1545 __vmread(GUEST_SYSENTER_ESP, &msr_content);
1546 break;
1547 case MSR_IA32_SYSENTER_EIP:
1548 __vmread(GUEST_SYSENTER_EIP, &msr_content);
1549 break;
1550 case MSR_IA32_APICBASE:
1551 msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
1552 break;
1553 default:
1554 if(long_mode_do_msr_read(regs))
1555 return;
1556 rdmsr_user(regs->ecx, regs->eax, regs->edx);
1557 break;
1560 regs->eax = msr_content & 0xFFFFFFFF;
1561 regs->edx = msr_content >> 32;
1563 HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
1564 "ecx=%lx, eax=%lx, edx=%lx",
1565 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1566 (unsigned long)regs->edx);
1569 static inline void vmx_do_msr_write(struct cpu_user_regs *regs)
1571 u64 msr_content;
1572 struct vcpu *v = current;
1574 HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write: ecx=%lx, eax=%lx, edx=%lx",
1575 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1576 (unsigned long)regs->edx);
1578 msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
1580 switch (regs->ecx) {
1581 case MSR_IA32_TIME_STAMP_COUNTER:
1583 struct hvm_virpit *vpit;
1584 u64 host_tsc, drift;
1586 rdtscll(host_tsc);
1587 vpit = &(v->domain->arch.hvm_domain.vpit);
1588 drift = v->arch.hvm_vmx.tsc_offset - vpit->shift;
1589 vpit->shift = msr_content - host_tsc;
1590 v->arch.hvm_vmx.tsc_offset = vpit->shift + drift;
1591 __vmwrite(TSC_OFFSET, vpit->shift);
1593 #if defined (__i386__)
1594 __vmwrite(TSC_OFFSET_HIGH, ((vpit->shift)>>32));
1595 #endif
1596 break;
1598 case MSR_IA32_SYSENTER_CS:
1599 __vmwrite(GUEST_SYSENTER_CS, msr_content);
1600 break;
1601 case MSR_IA32_SYSENTER_ESP:
1602 __vmwrite(GUEST_SYSENTER_ESP, msr_content);
1603 break;
1604 case MSR_IA32_SYSENTER_EIP:
1605 __vmwrite(GUEST_SYSENTER_EIP, msr_content);
1606 break;
1607 case MSR_IA32_APICBASE:
1608 vlapic_msr_set(VLAPIC(v), msr_content);
1609 break;
1610 default:
1611 long_mode_do_msr_write(regs);
1612 break;
1615 HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write returns: "
1616 "ecx=%lx, eax=%lx, edx=%lx",
1617 (unsigned long)regs->ecx, (unsigned long)regs->eax,
1618 (unsigned long)regs->edx);
1621 /*
1622 * Need to use this exit to reschedule
1623 */
1624 void vmx_vmexit_do_hlt(void)
1626 struct vcpu *v=current;
1627 struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
1628 s_time_t next_pit=-1,next_wakeup;
1630 if ( !v->vcpu_id ) {
1631 next_pit = get_pit_scheduled(v,vpit);
1633 next_wakeup = get_apictime_scheduled(v);
1634 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) {
1635 next_wakeup = next_pit;
1637 if ( next_wakeup != - 1 )
1638 set_timer(&current->arch.hvm_vmx.hlt_timer, next_wakeup);
1639 do_sched_op(SCHEDOP_block, 0);
1642 static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs)
1644 unsigned int vector;
1645 int error;
1647 asmlinkage void do_IRQ(struct cpu_user_regs *);
1648 void smp_apic_timer_interrupt(struct cpu_user_regs *);
1649 void timer_interrupt(int, void *, struct cpu_user_regs *);
1650 void smp_event_check_interrupt(void);
1651 void smp_invalidate_interrupt(void);
1652 void smp_call_function_interrupt(void);
1653 void smp_spurious_interrupt(struct cpu_user_regs *regs);
1654 void smp_error_interrupt(struct cpu_user_regs *regs);
1656 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
1657 && !(vector & INTR_INFO_VALID_MASK))
1658 __hvm_bug(regs);
1660 vector &= 0xff;
1661 local_irq_disable();
1663 switch(vector) {
1664 case LOCAL_TIMER_VECTOR:
1665 smp_apic_timer_interrupt(regs);
1666 break;
1667 case EVENT_CHECK_VECTOR:
1668 smp_event_check_interrupt();
1669 break;
1670 case INVALIDATE_TLB_VECTOR:
1671 smp_invalidate_interrupt();
1672 break;
1673 case CALL_FUNCTION_VECTOR:
1674 smp_call_function_interrupt();
1675 break;
1676 case SPURIOUS_APIC_VECTOR:
1677 smp_spurious_interrupt(regs);
1678 break;
1679 case ERROR_APIC_VECTOR:
1680 smp_error_interrupt(regs);
1681 break;
1682 default:
1683 regs->entry_vector = vector;
1684 do_IRQ(regs);
1685 break;
1689 #if defined (__x86_64__)
1690 void store_cpu_user_regs(struct cpu_user_regs *regs)
1692 __vmread(GUEST_SS_SELECTOR, &regs->ss);
1693 __vmread(GUEST_RSP, &regs->rsp);
1694 __vmread(GUEST_RFLAGS, &regs->rflags);
1695 __vmread(GUEST_CS_SELECTOR, &regs->cs);
1696 __vmread(GUEST_DS_SELECTOR, &regs->ds);
1697 __vmread(GUEST_ES_SELECTOR, &regs->es);
1698 __vmread(GUEST_RIP, &regs->rip);
1700 #elif defined (__i386__)
1701 void store_cpu_user_regs(struct cpu_user_regs *regs)
1703 __vmread(GUEST_SS_SELECTOR, &regs->ss);
1704 __vmread(GUEST_RSP, &regs->esp);
1705 __vmread(GUEST_RFLAGS, &regs->eflags);
1706 __vmread(GUEST_CS_SELECTOR, &regs->cs);
1707 __vmread(GUEST_DS_SELECTOR, &regs->ds);
1708 __vmread(GUEST_ES_SELECTOR, &regs->es);
1709 __vmread(GUEST_RIP, &regs->eip);
1711 #endif
1713 #ifdef XEN_DEBUGGER
1714 void save_cpu_user_regs(struct cpu_user_regs *regs)
1716 __vmread(GUEST_SS_SELECTOR, &regs->xss);
1717 __vmread(GUEST_RSP, &regs->esp);
1718 __vmread(GUEST_RFLAGS, &regs->eflags);
1719 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
1720 __vmread(GUEST_RIP, &regs->eip);
1722 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
1723 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
1724 __vmread(GUEST_ES_SELECTOR, &regs->xes);
1725 __vmread(GUEST_DS_SELECTOR, &regs->xds);
1728 void restore_cpu_user_regs(struct cpu_user_regs *regs)
1730 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
1731 __vmwrite(GUEST_RSP, regs->esp);
1732 __vmwrite(GUEST_RFLAGS, regs->eflags);
1733 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
1734 __vmwrite(GUEST_RIP, regs->eip);
1736 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
1737 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
1738 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
1739 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
1741 #endif
1743 asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs)
1745 unsigned int exit_reason, idtv_info_field;
1746 unsigned long exit_qualification, eip, inst_len = 0;
1747 struct vcpu *v = current;
1748 int error;
1750 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
1751 __hvm_bug(&regs);
1753 perfc_incra(vmexits, exit_reason);
1755 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
1756 if (idtv_info_field & INTR_INFO_VALID_MASK) {
1757 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
1759 __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
1760 if (inst_len >= 1 && inst_len <= 15)
1761 __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
1763 if (idtv_info_field & 0x800) { /* valid error code */
1764 unsigned long error_code;
1765 __vmread(IDT_VECTORING_ERROR_CODE, &error_code);
1766 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
1769 HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
1772 /* don't bother H/W interrutps */
1773 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
1774 exit_reason != EXIT_REASON_VMCALL &&
1775 exit_reason != EXIT_REASON_IO_INSTRUCTION)
1776 HVM_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
1778 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
1779 printk("Failed vm entry\n");
1780 domain_crash_synchronous();
1781 return;
1785 __vmread(GUEST_RIP, &eip);
1786 TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
1787 TRACE_VMEXIT(0,exit_reason);
1790 switch (exit_reason) {
1791 case EXIT_REASON_EXCEPTION_NMI:
1793 /*
1794 * We don't set the software-interrupt exiting (INT n).
1795 * (1) We can get an exception (e.g. #PG) in the guest, or
1796 * (2) NMI
1797 */
1798 int error;
1799 unsigned int vector;
1800 unsigned long va;
1802 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
1803 || !(vector & INTR_INFO_VALID_MASK))
1804 __hvm_bug(&regs);
1805 vector &= 0xff;
1807 TRACE_VMEXIT(1,vector);
1808 perfc_incra(cause_vector, vector);
1810 TRACE_3D(TRC_VMX_VECTOR, v->domain->domain_id, eip, vector);
1811 switch (vector) {
1812 #ifdef XEN_DEBUGGER
1813 case TRAP_debug:
1815 save_cpu_user_regs(&regs);
1816 pdb_handle_exception(1, &regs, 1);
1817 restore_cpu_user_regs(&regs);
1818 break;
1820 case TRAP_int3:
1822 save_cpu_user_regs(&regs);
1823 pdb_handle_exception(3, &regs, 1);
1824 restore_cpu_user_regs(&regs);
1825 break;
1827 #else
1828 case TRAP_debug:
1830 void store_cpu_user_regs(struct cpu_user_regs *regs);
1832 store_cpu_user_regs(&regs);
1833 __vm_clear_bit(GUEST_PENDING_DBG_EXCEPTIONS, PENDING_DEBUG_EXC_BS);
1835 domain_pause_for_debugger();
1836 do_sched_op(SCHEDOP_yield, 0);
1838 break;
1840 #endif
1841 case TRAP_no_device:
1843 vmx_do_no_device_fault();
1844 break;
1846 case TRAP_page_fault:
1848 __vmread(EXIT_QUALIFICATION, &va);
1849 __vmread(VM_EXIT_INTR_ERROR_CODE, &regs.error_code);
1851 TRACE_VMEXIT(3,regs.error_code);
1852 TRACE_VMEXIT(4,va);
1854 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1855 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1856 (unsigned long)regs.eax, (unsigned long)regs.ebx,
1857 (unsigned long)regs.ecx, (unsigned long)regs.edx,
1858 (unsigned long)regs.esi, (unsigned long)regs.edi);
1859 v->arch.hvm_vcpu.mmio_op.inst_decoder_regs = &regs;
1861 if (!(error = vmx_do_page_fault(va, &regs))) {
1862 /*
1863 * Inject #PG using Interruption-Information Fields
1864 */
1865 vmx_inject_exception(v, TRAP_page_fault, regs.error_code);
1866 v->arch.hvm_vmx.cpu_cr2 = va;
1867 TRACE_3D(TRC_VMX_INT, v->domain->domain_id, TRAP_page_fault, va);
1869 break;
1871 case TRAP_nmi:
1872 do_nmi(&regs);
1873 break;
1874 default:
1875 vmx_reflect_exception(v);
1876 break;
1878 break;
1880 case EXIT_REASON_EXTERNAL_INTERRUPT:
1881 vmx_vmexit_do_extint(&regs);
1882 break;
1883 case EXIT_REASON_PENDING_INTERRUPT:
1884 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
1885 MONITOR_CPU_BASED_EXEC_CONTROLS);
1886 break;
1887 case EXIT_REASON_TASK_SWITCH:
1888 __hvm_bug(&regs);
1889 break;
1890 case EXIT_REASON_CPUID:
1891 __get_instruction_length(inst_len);
1892 vmx_vmexit_do_cpuid(regs.eax, &regs);
1893 __update_guest_eip(inst_len);
1894 break;
1895 case EXIT_REASON_HLT:
1896 __get_instruction_length(inst_len);
1897 __update_guest_eip(inst_len);
1898 vmx_vmexit_do_hlt();
1899 break;
1900 case EXIT_REASON_INVLPG:
1902 unsigned long va;
1904 __vmread(EXIT_QUALIFICATION, &va);
1905 vmx_vmexit_do_invlpg(va);
1906 __get_instruction_length(inst_len);
1907 __update_guest_eip(inst_len);
1908 break;
1910 case EXIT_REASON_VMCALL:
1911 __get_instruction_length(inst_len);
1912 __vmread(GUEST_RIP, &eip);
1913 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1915 hvm_print_line(v, regs.eax); /* provides the current domain */
1916 __update_guest_eip(inst_len);
1917 break;
1918 case EXIT_REASON_CR_ACCESS:
1920 __vmread(GUEST_RIP, &eip);
1921 __get_instruction_length(inst_len);
1922 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1924 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx",
1925 eip, inst_len, exit_qualification);
1926 if (vmx_cr_access(exit_qualification, &regs))
1927 __update_guest_eip(inst_len);
1928 TRACE_VMEXIT(3,regs.error_code);
1929 TRACE_VMEXIT(4,exit_qualification);
1930 break;
1932 case EXIT_REASON_DR_ACCESS:
1933 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1934 vmx_dr_access(exit_qualification, &regs);
1935 __get_instruction_length(inst_len);
1936 __update_guest_eip(inst_len);
1937 break;
1938 case EXIT_REASON_IO_INSTRUCTION:
1939 __vmread(EXIT_QUALIFICATION, &exit_qualification);
1940 __get_instruction_length(inst_len);
1941 vmx_io_instruction(&regs, exit_qualification, inst_len);
1942 TRACE_VMEXIT(4,exit_qualification);
1943 break;
1944 case EXIT_REASON_MSR_READ:
1945 __get_instruction_length(inst_len);
1946 vmx_do_msr_read(&regs);
1947 __update_guest_eip(inst_len);
1948 break;
1949 case EXIT_REASON_MSR_WRITE:
1950 __vmread(GUEST_RIP, &eip);
1951 vmx_do_msr_write(&regs);
1952 __get_instruction_length(inst_len);
1953 __update_guest_eip(inst_len);
1954 break;
1955 case EXIT_REASON_MWAIT_INSTRUCTION:
1956 __hvm_bug(&regs);
1957 break;
1958 default:
1959 __hvm_bug(&regs); /* should not happen */
1963 asmlinkage void vmx_load_cr2(void)
1965 struct vcpu *v = current;
1967 local_irq_disable();
1968 #ifdef __i386__
1969 asm volatile("movl %0,%%cr2": :"r" (v->arch.hvm_vmx.cpu_cr2));
1970 #else
1971 asm volatile("movq %0,%%cr2": :"r" (v->arch.hvm_vmx.cpu_cr2));
1972 #endif
1975 asmlinkage void vmx_trace_vmentry (void)
1977 TRACE_5D(TRC_VMENTRY,
1978 trace_values[smp_processor_id()][0],
1979 trace_values[smp_processor_id()][1],
1980 trace_values[smp_processor_id()][2],
1981 trace_values[smp_processor_id()][3],
1982 trace_values[smp_processor_id()][4]);
1983 TRACE_VMEXIT(0,9);
1984 TRACE_VMEXIT(1,9);
1985 TRACE_VMEXIT(2,9);
1986 TRACE_VMEXIT(3,9);
1987 TRACE_VMEXIT(4,9);
1988 return;
1991 asmlinkage void vmx_trace_vmexit (void)
1993 TRACE_3D(TRC_VMEXIT,0,0,0);
1994 return;
1996 #endif /* CONFIG_VMX */
1998 /*
1999 * Local variables:
2000 * mode: C
2001 * c-set-style: "BSD"
2002 * c-basic-offset: 4
2003 * tab-width: 4
2004 * indent-tabs-mode: nil
2005 * End:
2006 */