ia64/xen-unstable

view xen/arch/x86/hvm/vmx/vmcs.c @ 16374:ab516ca6e984

vmx: Set HOST_FS and HOST_GS to unusable on vmexit.
Signed-off-by: Keir Fraser <keir@xensource.com>
author Keir Fraser <keir@xensource.com>
date Tue Nov 13 18:30:47 2007 +0000 (2007-11-13)
parents 8d8d179b9b05
children a35586bd493b
line source
1 /*
2 * vmcs.c: VMCS management
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 */
19 #include <xen/config.h>
20 #include <xen/init.h>
21 #include <xen/mm.h>
22 #include <xen/lib.h>
23 #include <xen/errno.h>
24 #include <xen/domain_page.h>
25 #include <asm/current.h>
26 #include <asm/cpufeature.h>
27 #include <asm/processor.h>
28 #include <asm/msr.h>
29 #include <asm/hvm/hvm.h>
30 #include <asm/hvm/io.h>
31 #include <asm/hvm/support.h>
32 #include <asm/hvm/vmx/vmx.h>
33 #include <asm/hvm/vmx/vmcs.h>
34 #include <asm/flushtlb.h>
35 #include <xen/event.h>
36 #include <xen/kernel.h>
37 #include <xen/keyhandler.h>
38 #include <asm/shadow.h>
39 #include <asm/tboot.h>
41 /* Dynamic (run-time adjusted) execution control flags. */
42 u32 vmx_pin_based_exec_control __read_mostly;
43 u32 vmx_cpu_based_exec_control __read_mostly;
44 u32 vmx_secondary_exec_control __read_mostly;
45 u32 vmx_vmexit_control __read_mostly;
46 u32 vmx_vmentry_control __read_mostly;
47 bool_t cpu_has_vmx_ins_outs_instr_info __read_mostly;
49 static DEFINE_PER_CPU(struct vmcs_struct *, host_vmcs);
50 static DEFINE_PER_CPU(struct vmcs_struct *, current_vmcs);
51 static DEFINE_PER_CPU(struct list_head, active_vmcs_list);
53 static u32 vmcs_revision_id __read_mostly;
55 static u32 adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr)
56 {
57 u32 vmx_msr_low, vmx_msr_high, ctl = ctl_min | ctl_opt;
59 rdmsr(msr, vmx_msr_low, vmx_msr_high);
61 ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
62 ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */
64 /* Ensure minimum (required) set of control bits are supported. */
65 BUG_ON(ctl_min & ~ctl);
67 return ctl;
68 }
70 static void vmx_init_vmcs_config(void)
71 {
72 u32 vmx_msr_low, vmx_msr_high, min, opt;
73 u32 _vmx_pin_based_exec_control;
74 u32 _vmx_cpu_based_exec_control;
75 u32 _vmx_secondary_exec_control = 0;
76 u32 _vmx_vmexit_control;
77 u32 _vmx_vmentry_control;
79 min = (PIN_BASED_EXT_INTR_MASK |
80 PIN_BASED_NMI_EXITING);
81 opt = PIN_BASED_VIRTUAL_NMIS;
82 _vmx_pin_based_exec_control = adjust_vmx_controls(
83 min, opt, MSR_IA32_VMX_PINBASED_CTLS);
85 min = (CPU_BASED_HLT_EXITING |
86 CPU_BASED_INVLPG_EXITING |
87 CPU_BASED_MONITOR_EXITING |
88 CPU_BASED_MWAIT_EXITING |
89 CPU_BASED_MOV_DR_EXITING |
90 CPU_BASED_ACTIVATE_IO_BITMAP |
91 CPU_BASED_USE_TSC_OFFSETING);
92 opt = CPU_BASED_ACTIVATE_MSR_BITMAP;
93 opt |= CPU_BASED_TPR_SHADOW;
94 opt |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
95 _vmx_cpu_based_exec_control = adjust_vmx_controls(
96 min, opt, MSR_IA32_VMX_PROCBASED_CTLS);
97 #ifdef __x86_64__
98 if ( !(_vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW) )
99 {
100 min |= CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING;
101 _vmx_cpu_based_exec_control = adjust_vmx_controls(
102 min, opt, MSR_IA32_VMX_PROCBASED_CTLS);
103 }
104 #endif
106 if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
107 {
108 min = 0;
109 opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
110 SECONDARY_EXEC_WBINVD_EXITING);
111 _vmx_secondary_exec_control = adjust_vmx_controls(
112 min, opt, MSR_IA32_VMX_PROCBASED_CTLS2);
113 }
115 #if defined(__i386__)
116 /* If we can't virtualise APIC accesses, the TPR shadow is pointless. */
117 if ( !(_vmx_secondary_exec_control &
118 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) )
119 _vmx_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
120 #endif
122 min = VM_EXIT_ACK_INTR_ON_EXIT;
123 opt = 0;
124 #ifdef __x86_64__
125 min |= VM_EXIT_IA32E_MODE;
126 #endif
127 _vmx_vmexit_control = adjust_vmx_controls(
128 min, opt, MSR_IA32_VMX_EXIT_CTLS);
130 min = opt = 0;
131 _vmx_vmentry_control = adjust_vmx_controls(
132 min, opt, MSR_IA32_VMX_ENTRY_CTLS);
134 rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
136 if ( !vmx_pin_based_exec_control )
137 {
138 /* First time through. */
139 vmcs_revision_id = vmx_msr_low;
140 vmx_pin_based_exec_control = _vmx_pin_based_exec_control;
141 vmx_cpu_based_exec_control = _vmx_cpu_based_exec_control;
142 vmx_secondary_exec_control = _vmx_secondary_exec_control;
143 vmx_vmexit_control = _vmx_vmexit_control;
144 vmx_vmentry_control = _vmx_vmentry_control;
145 cpu_has_vmx_ins_outs_instr_info = !!(vmx_msr_high & (1U<<22));
146 }
147 else
148 {
149 /* Globals are already initialised: re-check them. */
150 BUG_ON(vmcs_revision_id != vmx_msr_low);
151 BUG_ON(vmx_pin_based_exec_control != _vmx_pin_based_exec_control);
152 BUG_ON(vmx_cpu_based_exec_control != _vmx_cpu_based_exec_control);
153 BUG_ON(vmx_secondary_exec_control != _vmx_secondary_exec_control);
154 BUG_ON(vmx_vmexit_control != _vmx_vmexit_control);
155 BUG_ON(vmx_vmentry_control != _vmx_vmentry_control);
156 BUG_ON(cpu_has_vmx_ins_outs_instr_info != !!(vmx_msr_high & (1U<<22)));
157 }
159 /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
160 BUG_ON((vmx_msr_high & 0x1fff) > PAGE_SIZE);
162 #ifdef __x86_64__
163 /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
164 BUG_ON(vmx_msr_high & (1u<<16));
165 #endif
167 /* Require Write-Back (WB) memory type for VMCS accesses. */
168 BUG_ON(((vmx_msr_high >> 18) & 15) != 6);
169 }
171 static struct vmcs_struct *vmx_alloc_vmcs(void)
172 {
173 struct vmcs_struct *vmcs;
175 if ( (vmcs = alloc_xenheap_page()) == NULL )
176 {
177 gdprintk(XENLOG_WARNING, "Failed to allocate VMCS.\n");
178 return NULL;
179 }
181 clear_page(vmcs);
182 vmcs->vmcs_revision_id = vmcs_revision_id;
184 return vmcs;
185 }
187 static void vmx_free_vmcs(struct vmcs_struct *vmcs)
188 {
189 free_xenheap_page(vmcs);
190 }
192 static void __vmx_clear_vmcs(void *info)
193 {
194 struct vcpu *v = info;
195 struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
197 /* Otherwise we can nest (vmx_cpu_down() vs. vmx_clear_vmcs()). */
198 ASSERT(!local_irq_is_enabled());
200 if ( arch_vmx->active_cpu == smp_processor_id() )
201 {
202 __vmpclear(virt_to_maddr(arch_vmx->vmcs));
204 arch_vmx->active_cpu = -1;
205 arch_vmx->launched = 0;
207 list_del(&arch_vmx->active_list);
209 if ( arch_vmx->vmcs == this_cpu(current_vmcs) )
210 this_cpu(current_vmcs) = NULL;
211 }
212 }
214 static void vmx_clear_vmcs(struct vcpu *v)
215 {
216 int cpu = v->arch.hvm_vmx.active_cpu;
218 if ( cpu != -1 )
219 on_selected_cpus(cpumask_of_cpu(cpu), __vmx_clear_vmcs, v, 1, 1);
220 }
222 static void vmx_load_vmcs(struct vcpu *v)
223 {
224 unsigned long flags;
226 local_irq_save(flags);
228 if ( v->arch.hvm_vmx.active_cpu == -1 )
229 {
230 list_add(&v->arch.hvm_vmx.active_list, &this_cpu(active_vmcs_list));
231 v->arch.hvm_vmx.active_cpu = smp_processor_id();
232 }
234 ASSERT(v->arch.hvm_vmx.active_cpu == smp_processor_id());
236 __vmptrld(virt_to_maddr(v->arch.hvm_vmx.vmcs));
237 this_cpu(current_vmcs) = v->arch.hvm_vmx.vmcs;
239 local_irq_restore(flags);
240 }
242 int vmx_cpu_up(void)
243 {
244 u32 eax, edx;
245 int cpu = smp_processor_id();
246 u64 cr0, vmx_cr0_fixed0, vmx_cr0_fixed1;
248 BUG_ON(!(read_cr4() & X86_CR4_VMXE));
250 /*
251 * Ensure the current processor operating mode meets
252 * the requred CRO fixed bits in VMX operation.
253 */
254 cr0 = read_cr0();
255 rdmsrl(MSR_IA32_VMX_CR0_FIXED0, vmx_cr0_fixed0);
256 rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx_cr0_fixed1);
257 if ( (~cr0 & vmx_cr0_fixed0) || (cr0 & ~vmx_cr0_fixed1) )
258 {
259 printk("CPU%d: some settings of host CR0 are "
260 "not allowed in VMX operation.\n", cpu);
261 return 0;
262 }
264 rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
266 if ( eax & IA32_FEATURE_CONTROL_MSR_LOCK )
267 {
268 if ( !(eax & (IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_OUTSIDE_SMX |
269 IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_INSIDE_SMX)) )
270 {
271 printk("CPU%d: VMX disabled by BIOS.\n", cpu);
272 return 0;
273 }
274 }
275 else
276 {
277 eax = (IA32_FEATURE_CONTROL_MSR_LOCK |
278 IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_OUTSIDE_SMX |
279 IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_INSIDE_SMX);
280 wrmsr(IA32_FEATURE_CONTROL_MSR, eax, 0);
281 }
283 if ( !tboot_in_measured_env() &&
284 !(eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_OUTSIDE_SMX) )
285 {
286 printk("VMX only allowed in SMX but SMX not active.\n");
287 return 0;
288 }
290 vmx_init_vmcs_config();
292 INIT_LIST_HEAD(&this_cpu(active_vmcs_list));
294 if ( this_cpu(host_vmcs) == NULL )
295 {
296 this_cpu(host_vmcs) = vmx_alloc_vmcs();
297 if ( this_cpu(host_vmcs) == NULL )
298 {
299 printk("CPU%d: Could not allocate host VMCS\n", cpu);
300 return 0;
301 }
302 }
304 if ( __vmxon(virt_to_maddr(this_cpu(host_vmcs))) )
305 {
306 printk("CPU%d: VMXON failed\n", cpu);
307 return 0;
308 }
310 return 1;
311 }
313 void vmx_cpu_down(void)
314 {
315 struct list_head *active_vmcs_list = &this_cpu(active_vmcs_list);
316 unsigned long flags;
318 local_irq_save(flags);
320 while ( !list_empty(active_vmcs_list) )
321 __vmx_clear_vmcs(list_entry(active_vmcs_list->next,
322 struct vcpu, arch.hvm_vmx.active_list));
324 BUG_ON(!(read_cr4() & X86_CR4_VMXE));
325 __vmxoff();
327 local_irq_restore(flags);
328 }
330 struct foreign_vmcs {
331 struct vcpu *v;
332 unsigned int count;
333 };
334 static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs);
336 void vmx_vmcs_enter(struct vcpu *v)
337 {
338 struct foreign_vmcs *fv;
340 /*
341 * NB. We must *always* run an HVM VCPU on its own VMCS, except for
342 * vmx_vmcs_enter/exit critical regions.
343 */
344 if ( likely(v == current) )
345 return;
347 fv = &this_cpu(foreign_vmcs);
349 if ( fv->v == v )
350 {
351 BUG_ON(fv->count == 0);
352 }
353 else
354 {
355 BUG_ON(fv->v != NULL);
356 BUG_ON(fv->count != 0);
358 vcpu_pause(v);
359 spin_lock(&v->arch.hvm_vmx.vmcs_lock);
361 vmx_clear_vmcs(v);
362 vmx_load_vmcs(v);
364 fv->v = v;
365 }
367 fv->count++;
368 }
370 void vmx_vmcs_exit(struct vcpu *v)
371 {
372 struct foreign_vmcs *fv;
374 if ( likely(v == current) )
375 return;
377 fv = &this_cpu(foreign_vmcs);
378 BUG_ON(fv->v != v);
379 BUG_ON(fv->count == 0);
381 if ( --fv->count == 0 )
382 {
383 /* Don't confuse vmx_do_resume (for @v or @current!) */
384 vmx_clear_vmcs(v);
385 if ( is_hvm_vcpu(current) )
386 vmx_load_vmcs(current);
388 spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
389 vcpu_unpause(v);
391 fv->v = NULL;
392 }
393 }
395 struct xgt_desc {
396 unsigned short size;
397 unsigned long address __attribute__((packed));
398 };
400 static void vmx_set_host_env(struct vcpu *v)
401 {
402 unsigned int tr, cpu;
403 struct xgt_desc desc;
405 cpu = smp_processor_id();
407 __asm__ __volatile__ ( "sidt (%0) \n" : : "a" (&desc) : "memory" );
408 __vmwrite(HOST_IDTR_BASE, desc.address);
410 __asm__ __volatile__ ( "sgdt (%0) \n" : : "a" (&desc) : "memory" );
411 __vmwrite(HOST_GDTR_BASE, desc.address);
413 __asm__ __volatile__ ( "str (%0) \n" : : "a" (&tr) : "memory" );
414 __vmwrite(HOST_TR_SELECTOR, tr);
415 __vmwrite(HOST_TR_BASE, (unsigned long)&init_tss[cpu]);
417 /*
418 * Skip end of cpu_user_regs when entering the hypervisor because the
419 * CPU does not save context onto the stack. SS,RSP,CS,RIP,RFLAGS,etc
420 * all get saved into the VMCS instead.
421 */
422 __vmwrite(HOST_RSP,
423 (unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code);
424 }
426 void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr)
427 {
428 char *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
430 /* VMX MSR bitmap supported? */
431 if ( msr_bitmap == NULL )
432 return;
434 /*
435 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
436 * have the write-low and read-high bitmap offsets the wrong way round.
437 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
438 */
439 if ( msr <= 0x1fff )
440 {
441 __clear_bit(msr, msr_bitmap + 0x000); /* read-low */
442 __clear_bit(msr, msr_bitmap + 0x800); /* write-low */
443 }
444 else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
445 {
446 msr &= 0x1fff;
447 __clear_bit(msr, msr_bitmap + 0x400); /* read-high */
448 __clear_bit(msr, msr_bitmap + 0xc00); /* write-high */
449 }
450 }
452 #define GUEST_SEGMENT_LIMIT 0xffffffff
454 static int construct_vmcs(struct vcpu *v)
455 {
456 union vmcs_arbytes arbytes;
458 vmx_vmcs_enter(v);
460 /* VMCS controls. */
461 __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
462 __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
463 __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
464 __vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmx_cpu_based_exec_control);
465 v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
466 if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
467 __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control);
469 /* MSR access bitmap. */
470 if ( cpu_has_vmx_msr_bitmap )
471 {
472 char *msr_bitmap = alloc_xenheap_page();
474 if ( msr_bitmap == NULL )
475 return -ENOMEM;
477 memset(msr_bitmap, ~0, PAGE_SIZE);
478 v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
479 __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
481 vmx_disable_intercept_for_msr(v, MSR_FS_BASE);
482 vmx_disable_intercept_for_msr(v, MSR_GS_BASE);
483 vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
484 vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
485 vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
486 }
488 /* I/O access bitmap. */
489 __vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap));
490 __vmwrite(IO_BITMAP_B, virt_to_maddr(hvm_io_bitmap + PAGE_SIZE));
492 /* Host data selectors. */
493 __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
494 __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
495 __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
496 __vmwrite(HOST_FS_SELECTOR, 0);
497 __vmwrite(HOST_GS_SELECTOR, 0);
498 __vmwrite(HOST_FS_BASE, 0);
499 __vmwrite(HOST_GS_BASE, 0);
501 /* Host control registers. */
502 __vmwrite(HOST_CR0, read_cr0() | X86_CR0_TS);
503 __vmwrite(HOST_CR4, mmu_cr4_features);
505 /* Host CS:RIP. */
506 __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
507 __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
509 /* MSR intercepts. */
510 __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
511 __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
512 __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
514 __vmwrite(VM_ENTRY_INTR_INFO, 0);
516 __vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
517 __vmwrite(CR4_GUEST_HOST_MASK, ~0UL);
519 __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
520 __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
522 __vmwrite(CR3_TARGET_COUNT, 0);
524 __vmwrite(GUEST_ACTIVITY_STATE, 0);
526 /* Guest segment bases. */
527 __vmwrite(GUEST_ES_BASE, 0);
528 __vmwrite(GUEST_SS_BASE, 0);
529 __vmwrite(GUEST_DS_BASE, 0);
530 __vmwrite(GUEST_FS_BASE, 0);
531 __vmwrite(GUEST_GS_BASE, 0);
532 __vmwrite(GUEST_CS_BASE, 0);
534 /* Guest segment limits. */
535 __vmwrite(GUEST_ES_LIMIT, GUEST_SEGMENT_LIMIT);
536 __vmwrite(GUEST_SS_LIMIT, GUEST_SEGMENT_LIMIT);
537 __vmwrite(GUEST_DS_LIMIT, GUEST_SEGMENT_LIMIT);
538 __vmwrite(GUEST_FS_LIMIT, GUEST_SEGMENT_LIMIT);
539 __vmwrite(GUEST_GS_LIMIT, GUEST_SEGMENT_LIMIT);
540 __vmwrite(GUEST_CS_LIMIT, GUEST_SEGMENT_LIMIT);
542 /* Guest segment AR bytes. */
543 arbytes.bytes = 0;
544 arbytes.fields.seg_type = 0x3; /* type = 3 */
545 arbytes.fields.s = 1; /* code or data, i.e. not system */
546 arbytes.fields.dpl = 0; /* DPL = 3 */
547 arbytes.fields.p = 1; /* segment present */
548 arbytes.fields.default_ops_size = 1; /* 32-bit */
549 arbytes.fields.g = 1;
550 arbytes.fields.null_bit = 0; /* not null */
551 __vmwrite(GUEST_ES_AR_BYTES, arbytes.bytes);
552 __vmwrite(GUEST_SS_AR_BYTES, arbytes.bytes);
553 __vmwrite(GUEST_DS_AR_BYTES, arbytes.bytes);
554 __vmwrite(GUEST_FS_AR_BYTES, arbytes.bytes);
555 __vmwrite(GUEST_GS_AR_BYTES, arbytes.bytes);
556 arbytes.fields.seg_type = 0xb; /* type = 0xb */
557 __vmwrite(GUEST_CS_AR_BYTES, arbytes.bytes);
559 /* Guest GDT. */
560 __vmwrite(GUEST_GDTR_BASE, 0);
561 __vmwrite(GUEST_GDTR_LIMIT, 0);
563 /* Guest IDT. */
564 __vmwrite(GUEST_IDTR_BASE, 0);
565 __vmwrite(GUEST_IDTR_LIMIT, 0);
567 /* Guest LDT and TSS. */
568 arbytes.fields.s = 0; /* not code or data segement */
569 arbytes.fields.seg_type = 0x2; /* LTD */
570 arbytes.fields.default_ops_size = 0; /* 16-bit */
571 arbytes.fields.g = 0;
572 __vmwrite(GUEST_LDTR_AR_BYTES, arbytes.bytes);
573 arbytes.fields.seg_type = 0xb; /* 32-bit TSS (busy) */
574 __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
576 __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
577 __vmwrite(GUEST_DR7, 0);
578 __vmwrite(VMCS_LINK_POINTER, ~0UL);
579 #if defined(__i386__)
580 __vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
581 #endif
583 __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault));
585 v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
586 hvm_update_guest_cr(v, 0);
588 v->arch.hvm_vcpu.guest_cr[4] = 0;
589 hvm_update_guest_cr(v, 4);
591 if ( cpu_has_vmx_tpr_shadow )
592 {
593 __vmwrite(VIRTUAL_APIC_PAGE_ADDR,
594 page_to_maddr(vcpu_vlapic(v)->regs_page));
595 __vmwrite(TPR_THRESHOLD, 0);
596 }
598 __vmwrite(GUEST_LDTR_SELECTOR, 0);
599 __vmwrite(GUEST_LDTR_BASE, 0);
600 __vmwrite(GUEST_LDTR_LIMIT, 0);
602 __vmwrite(GUEST_TR_BASE, 0);
603 __vmwrite(GUEST_TR_LIMIT, 0xff);
605 vmx_vmcs_exit(v);
607 paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */
609 vmx_vlapic_msr_changed(v);
611 return 0;
612 }
614 int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val)
615 {
616 unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
617 const struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
619 for ( i = 0; i < msr_count; i++ )
620 {
621 if ( msr_area[i].index == msr )
622 {
623 *val = msr_area[i].data;
624 return 0;
625 }
626 }
628 return -ESRCH;
629 }
631 int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val)
632 {
633 unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
634 struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
636 for ( i = 0; i < msr_count; i++ )
637 {
638 if ( msr_area[i].index == msr )
639 {
640 msr_area[i].data = val;
641 return 0;
642 }
643 }
645 return -ESRCH;
646 }
648 int vmx_add_guest_msr(struct vcpu *v, u32 msr)
649 {
650 unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
651 struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
653 for ( i = 0; i < msr_count; i++ )
654 if ( msr_area[i].index == msr )
655 return 0;
657 if ( msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) )
658 return -ENOSPC;
660 if ( msr_area == NULL )
661 {
662 if ( (msr_area = alloc_xenheap_page()) == NULL )
663 return -ENOMEM;
664 v->arch.hvm_vmx.msr_area = msr_area;
665 __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(msr_area));
666 __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
667 }
669 msr_area[msr_count].index = msr;
670 msr_area[msr_count].mbz = 0;
671 msr_area[msr_count].data = 0;
672 v->arch.hvm_vmx.msr_count = ++msr_count;
673 __vmwrite(VM_EXIT_MSR_STORE_COUNT, msr_count);
674 __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, msr_count);
676 return 0;
677 }
679 int vmx_add_host_load_msr(struct vcpu *v, u32 msr)
680 {
681 unsigned int i, msr_count = v->arch.hvm_vmx.host_msr_count;
682 struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.host_msr_area;
684 for ( i = 0; i < msr_count; i++ )
685 if ( msr_area[i].index == msr )
686 return 0;
688 if ( msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) )
689 return -ENOSPC;
691 if ( msr_area == NULL )
692 {
693 if ( (msr_area = alloc_xenheap_page()) == NULL )
694 return -ENOMEM;
695 v->arch.hvm_vmx.host_msr_area = msr_area;
696 __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
697 }
699 msr_area[msr_count].index = msr;
700 msr_area[msr_count].mbz = 0;
701 rdmsrl(msr, msr_area[msr_count].data);
702 v->arch.hvm_vmx.host_msr_count = ++msr_count;
703 __vmwrite(VM_EXIT_MSR_LOAD_COUNT, msr_count);
705 return 0;
706 }
708 int vmx_create_vmcs(struct vcpu *v)
709 {
710 struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
711 int rc;
713 if ( arch_vmx->vmcs == NULL )
714 {
715 if ( (arch_vmx->vmcs = vmx_alloc_vmcs()) == NULL )
716 return -ENOMEM;
718 INIT_LIST_HEAD(&arch_vmx->active_list);
719 __vmpclear(virt_to_maddr(arch_vmx->vmcs));
720 arch_vmx->active_cpu = -1;
721 arch_vmx->launched = 0;
722 }
724 if ( (rc = construct_vmcs(v)) != 0 )
725 {
726 vmx_free_vmcs(arch_vmx->vmcs);
727 arch_vmx->vmcs = NULL;
728 return rc;
729 }
731 return 0;
732 }
734 void vmx_destroy_vmcs(struct vcpu *v)
735 {
736 struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
738 if ( arch_vmx->vmcs == NULL )
739 return;
741 vmx_clear_vmcs(v);
743 vmx_free_vmcs(arch_vmx->vmcs);
744 arch_vmx->vmcs = NULL;
745 }
747 void vm_launch_fail(unsigned long eflags)
748 {
749 unsigned long error = __vmread(VM_INSTRUCTION_ERROR);
750 printk("<vm_launch_fail> error code %lx\n", error);
751 domain_crash_synchronous();
752 }
754 void vm_resume_fail(unsigned long eflags)
755 {
756 unsigned long error = __vmread(VM_INSTRUCTION_ERROR);
757 printk("<vm_resume_fail> error code %lx\n", error);
758 domain_crash_synchronous();
759 }
761 static void flush_cache(void *info)
762 {
763 wbinvd();
764 }
766 void vmx_do_resume(struct vcpu *v)
767 {
768 bool_t debug_state;
770 if ( v->arch.hvm_vmx.active_cpu == smp_processor_id() )
771 {
772 if ( v->arch.hvm_vmx.vmcs != this_cpu(current_vmcs) )
773 vmx_load_vmcs(v);
774 }
775 else
776 {
777 /* For pass-through domain, guest PCI-E device driver may leverage the
778 * "Non-Snoop" I/O, and explicitly "WBINVD" or "CFLUSH" to a RAM space.
779 * In that case, if migration occurs before "WBINVD" or "CFLUSH", need
780 * to maintain data consistency.
781 */
782 if ( !list_empty(&(domain_hvm_iommu(v->domain)->pdev_list)) )
783 {
784 int cpu = v->arch.hvm_vmx.active_cpu;
785 if ( cpu != -1 )
786 on_selected_cpus(cpumask_of_cpu(cpu), flush_cache, NULL, 1, 1);
787 }
789 vmx_clear_vmcs(v);
790 vmx_load_vmcs(v);
791 hvm_migrate_timers(v);
792 vmx_set_host_env(v);
793 }
795 debug_state = v->domain->debugger_attached;
796 if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
797 {
798 unsigned long intercepts = __vmread(EXCEPTION_BITMAP);
799 unsigned long mask = (1U << TRAP_debug) | (1U << TRAP_int3);
800 v->arch.hvm_vcpu.debug_state_latch = debug_state;
801 if ( debug_state )
802 intercepts |= mask;
803 else
804 intercepts &= ~mask;
805 __vmwrite(EXCEPTION_BITMAP, intercepts);
806 }
808 hvm_do_resume(v);
809 reset_stack_and_jump(vmx_asm_do_vmentry);
810 }
812 static void vmx_dump_sel(char *name, enum x86_segment seg)
813 {
814 struct segment_register sreg;
815 hvm_get_segment_register(current, seg, &sreg);
816 printk("%s: sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016llx\n",
817 name, sreg.sel, sreg.attr.bytes, sreg.limit,
818 (unsigned long long)sreg.base);
819 }
821 static unsigned long vmr(unsigned long field)
822 {
823 int rc;
824 unsigned long val;
825 val = __vmread_safe(field, &rc);
826 return rc ? 0 : val;
827 }
829 void vmcs_dump_vcpu(void)
830 {
831 unsigned long long x;
833 printk("*** Guest State ***\n");
834 printk("CR0: actual=0x%016llx, shadow=0x%016llx, gh_mask=%016llx\n",
835 (unsigned long long)vmr(GUEST_CR0),
836 (unsigned long long)vmr(CR0_READ_SHADOW),
837 (unsigned long long)vmr(CR0_GUEST_HOST_MASK));
838 printk("CR4: actual=0x%016llx, shadow=0x%016llx, gh_mask=%016llx\n",
839 (unsigned long long)vmr(GUEST_CR4),
840 (unsigned long long)vmr(CR4_READ_SHADOW),
841 (unsigned long long)vmr(CR4_GUEST_HOST_MASK));
842 printk("CR3: actual=0x%016llx, target_count=%d\n",
843 (unsigned long long)vmr(GUEST_CR3),
844 (int)vmr(CR3_TARGET_COUNT));
845 printk(" target0=%016llx, target1=%016llx\n",
846 (unsigned long long)vmr(CR3_TARGET_VALUE0),
847 (unsigned long long)vmr(CR3_TARGET_VALUE1));
848 printk(" target2=%016llx, target3=%016llx\n",
849 (unsigned long long)vmr(CR3_TARGET_VALUE2),
850 (unsigned long long)vmr(CR3_TARGET_VALUE3));
851 printk("RSP = 0x%016llx RIP = 0x%016llx\n",
852 (unsigned long long)vmr(GUEST_RSP),
853 (unsigned long long)vmr(GUEST_RIP));
854 printk("RFLAGS=0x%016llx DR7 = 0x%016llx\n",
855 (unsigned long long)vmr(GUEST_RFLAGS),
856 (unsigned long long)vmr(GUEST_DR7));
857 printk("Sysenter RSP=%016llx CS:RIP=%04x:%016llx\n",
858 (unsigned long long)vmr(GUEST_SYSENTER_ESP),
859 (int)vmr(GUEST_SYSENTER_CS),
860 (unsigned long long)vmr(GUEST_SYSENTER_EIP));
861 vmx_dump_sel("CS", x86_seg_cs);
862 vmx_dump_sel("DS", x86_seg_ds);
863 vmx_dump_sel("SS", x86_seg_ss);
864 vmx_dump_sel("ES", x86_seg_es);
865 vmx_dump_sel("FS", x86_seg_fs);
866 vmx_dump_sel("GS", x86_seg_gs);
867 vmx_dump_sel("GDTR", x86_seg_gdtr);
868 vmx_dump_sel("LDTR", x86_seg_ldtr);
869 vmx_dump_sel("IDTR", x86_seg_idtr);
870 vmx_dump_sel("TR", x86_seg_tr);
871 x = (unsigned long long)vmr(TSC_OFFSET_HIGH) << 32;
872 x |= (uint32_t)vmr(TSC_OFFSET);
873 printk("TSC Offset = %016llx\n", x);
874 x = (unsigned long long)vmr(GUEST_IA32_DEBUGCTL) << 32;
875 x |= (uint32_t)vmr(GUEST_IA32_DEBUGCTL);
876 printk("DebugCtl=%016llx DebugExceptions=%016llx\n", x,
877 (unsigned long long)vmr(GUEST_PENDING_DBG_EXCEPTIONS));
878 printk("Interruptibility=%04x ActivityState=%04x\n",
879 (int)vmr(GUEST_INTERRUPTIBILITY_INFO),
880 (int)vmr(GUEST_ACTIVITY_STATE));
882 printk("*** Host State ***\n");
883 printk("RSP = 0x%016llx RIP = 0x%016llx\n",
884 (unsigned long long)vmr(HOST_RSP),
885 (unsigned long long)vmr(HOST_RIP));
886 printk("CS=%04x DS=%04x ES=%04x FS=%04x GS=%04x SS=%04x TR=%04x\n",
887 (uint16_t)vmr(HOST_CS_SELECTOR),
888 (uint16_t)vmr(HOST_DS_SELECTOR),
889 (uint16_t)vmr(HOST_ES_SELECTOR),
890 (uint16_t)vmr(HOST_FS_SELECTOR),
891 (uint16_t)vmr(HOST_GS_SELECTOR),
892 (uint16_t)vmr(HOST_SS_SELECTOR),
893 (uint16_t)vmr(HOST_TR_SELECTOR));
894 printk("FSBase=%016llx GSBase=%016llx TRBase=%016llx\n",
895 (unsigned long long)vmr(HOST_FS_BASE),
896 (unsigned long long)vmr(HOST_GS_BASE),
897 (unsigned long long)vmr(HOST_TR_BASE));
898 printk("GDTBase=%016llx IDTBase=%016llx\n",
899 (unsigned long long)vmr(HOST_GDTR_BASE),
900 (unsigned long long)vmr(HOST_IDTR_BASE));
901 printk("CR0=%016llx CR3=%016llx CR4=%016llx\n",
902 (unsigned long long)vmr(HOST_CR0),
903 (unsigned long long)vmr(HOST_CR3),
904 (unsigned long long)vmr(HOST_CR4));
905 printk("Sysenter RSP=%016llx CS:RIP=%04x:%016llx\n",
906 (unsigned long long)vmr(HOST_IA32_SYSENTER_ESP),
907 (int)vmr(HOST_IA32_SYSENTER_CS),
908 (unsigned long long)vmr(HOST_IA32_SYSENTER_EIP));
910 printk("*** Control State ***\n");
911 printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
912 (uint32_t)vmr(PIN_BASED_VM_EXEC_CONTROL),
913 (uint32_t)vmr(CPU_BASED_VM_EXEC_CONTROL),
914 (uint32_t)vmr(SECONDARY_VM_EXEC_CONTROL));
915 printk("EntryControls=%08x ExitControls=%08x\n",
916 (uint32_t)vmr(VM_ENTRY_CONTROLS),
917 (uint32_t)vmr(VM_EXIT_CONTROLS));
918 printk("ExceptionBitmap=%08x\n",
919 (uint32_t)vmr(EXCEPTION_BITMAP));
920 printk("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
921 (uint32_t)vmr(VM_ENTRY_INTR_INFO),
922 (uint32_t)vmr(VM_ENTRY_EXCEPTION_ERROR_CODE),
923 (uint32_t)vmr(VM_ENTRY_INSTRUCTION_LEN));
924 printk("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
925 (uint32_t)vmr(VM_EXIT_INTR_INFO),
926 (uint32_t)vmr(VM_EXIT_INTR_ERROR_CODE),
927 (uint32_t)vmr(VM_ENTRY_INSTRUCTION_LEN));
928 printk(" reason=%08x qualification=%08x\n",
929 (uint32_t)vmr(VM_EXIT_REASON),
930 (uint32_t)vmr(EXIT_QUALIFICATION));
931 printk("IDTVectoring: info=%08x errcode=%08x\n",
932 (uint32_t)vmr(IDT_VECTORING_INFO),
933 (uint32_t)vmr(IDT_VECTORING_ERROR_CODE));
934 printk("TPR Threshold = 0x%02x\n",
935 (uint32_t)vmr(TPR_THRESHOLD));
936 }
938 static void vmcs_dump(unsigned char ch)
939 {
940 struct domain *d;
941 struct vcpu *v;
943 printk("*********** VMCS Areas **************\n");
945 rcu_read_lock(&domlist_read_lock);
947 for_each_domain ( d )
948 {
949 if ( !is_hvm_domain(d) )
950 continue;
951 printk("\n>>> Domain %d <<<\n", d->domain_id);
952 for_each_vcpu ( d, v )
953 {
954 printk("\tVCPU %d\n", v->vcpu_id);
955 vmx_vmcs_enter(v);
956 vmcs_dump_vcpu();
957 vmx_vmcs_exit(v);
958 }
959 }
961 rcu_read_unlock(&domlist_read_lock);
963 printk("**************************************\n");
964 }
966 void setup_vmcs_dump(void)
967 {
968 register_keyhandler('v', vmcs_dump, "dump Intel's VMCS");
969 }
972 /*
973 * Local variables:
974 * mode: C
975 * c-set-style: "BSD"
976 * c-basic-offset: 4
977 * tab-width: 4
978 * indent-tabs-mode: nil
979 * End:
980 */