ia64/xen-unstable

annotate xen/arch/x86/hvm/vmx/vmcs.c @ 19648:f0e2df69a8eb

x86 hvm: Allow cross-vendor migration

Intercept #UD and emulate SYSCALL/SYSENTER/SYSEXIT as necessary.

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 15:01:36 2009 +0100 (2009-05-26)
parents 7d552e56d105
children 822ea2bf0c54
rev   line source
kaf24@8708 1 /*
kaf24@8708 2 * vmcs.c: VMCS management
kaf24@8708 3 * Copyright (c) 2004, Intel Corporation.
kaf24@8708 4 *
kaf24@8708 5 * This program is free software; you can redistribute it and/or modify it
kaf24@8708 6 * under the terms and conditions of the GNU General Public License,
kaf24@8708 7 * version 2, as published by the Free Software Foundation.
kaf24@8708 8 *
kaf24@8708 9 * This program is distributed in the hope it will be useful, but WITHOUT
kaf24@8708 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
kaf24@8708 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
kaf24@8708 12 * more details.
kaf24@8708 13 *
kaf24@8708 14 * You should have received a copy of the GNU General Public License along with
kaf24@8708 15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
kaf24@8708 16 * Place - Suite 330, Boston, MA 02111-1307 USA.
kaf24@8708 17 */
kaf24@8708 18
kaf24@8708 19 #include <xen/config.h>
kaf24@8708 20 #include <xen/init.h>
kaf24@8708 21 #include <xen/mm.h>
kaf24@8708 22 #include <xen/lib.h>
kaf24@8708 23 #include <xen/errno.h>
kaf24@8708 24 #include <xen/domain_page.h>
kaf24@8708 25 #include <asm/current.h>
kaf24@8708 26 #include <asm/cpufeature.h>
kaf24@8708 27 #include <asm/processor.h>
kaf24@8708 28 #include <asm/msr.h>
kaf24@8708 29 #include <asm/hvm/hvm.h>
kaf24@8708 30 #include <asm/hvm/io.h>
kaf24@8708 31 #include <asm/hvm/support.h>
kaf24@8708 32 #include <asm/hvm/vmx/vmx.h>
kaf24@8708 33 #include <asm/hvm/vmx/vmcs.h>
kaf24@8708 34 #include <asm/flushtlb.h>
kaf24@8708 35 #include <xen/event.h>
kaf24@8708 36 #include <xen/kernel.h>
kaf24@10079 37 #include <xen/keyhandler.h>
kaf24@11310 38 #include <asm/shadow.h>
keir@16274 39 #include <asm/tboot.h>
kaf24@8708 40
keir@17416 41 static int opt_vpid_enabled = 1;
keir@17416 42 boolean_param("vpid", opt_vpid_enabled);
keir@17416 43
kfraser@11625 44 /* Dynamic (run-time adjusted) execution control flags. */
keir@14946 45 u32 vmx_pin_based_exec_control __read_mostly;
keir@14946 46 u32 vmx_cpu_based_exec_control __read_mostly;
kfraser@15240 47 u32 vmx_secondary_exec_control __read_mostly;
keir@14946 48 u32 vmx_vmexit_control __read_mostly;
keir@14946 49 u32 vmx_vmentry_control __read_mostly;
keir@15449 50 bool_t cpu_has_vmx_ins_outs_instr_info __read_mostly;
kfraser@11625 51
kfraser@15583 52 static DEFINE_PER_CPU(struct vmcs_struct *, host_vmcs);
kfraser@15486 53 static DEFINE_PER_CPU(struct vmcs_struct *, current_vmcs);
kfraser@15583 54 static DEFINE_PER_CPU(struct list_head, active_vmcs_list);
kfraser@15486 55
keir@14946 56 static u32 vmcs_revision_id __read_mostly;
kaf24@8708 57
keir@18987 58 static void __init vmx_display_features(void)
keir@18987 59 {
keir@18987 60 int printed = 0;
keir@18987 61
keir@18987 62 printk("VMX: Supported advanced features:\n");
keir@18987 63
keir@18987 64 #define P(p,s) if ( p ) { printk(" - %s\n", s); printed = 1; }
keir@18987 65 P(cpu_has_vmx_virtualize_apic_accesses, "APIC MMIO access virtualisation");
keir@18987 66 P(cpu_has_vmx_tpr_shadow, "APIC TPR shadow");
keir@18987 67 P(cpu_has_vmx_ept, "Extended Page Tables (EPT)");
keir@18987 68 P(cpu_has_vmx_vpid, "Virtual-Processor Identifiers (VPID)");
keir@18987 69 P(cpu_has_vmx_vnmi, "Virtual NMI");
keir@18987 70 P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap");
keir@18987 71 #undef P
keir@18987 72
keir@18987 73 if ( !printed )
keir@18987 74 printk(" - none\n");
keir@18987 75 }
keir@18987 76
keir@14943 77 static u32 adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr)
kfraser@11625 78 {
keir@14943 79 u32 vmx_msr_low, vmx_msr_high, ctl = ctl_min | ctl_opt;
kfraser@11625 80
kfraser@11625 81 rdmsr(msr, vmx_msr_low, vmx_msr_high);
kfraser@11625 82
kfraser@14638 83 ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
kfraser@14638 84 ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */
kfraser@11625 85
kfraser@14638 86 /* Ensure minimum (required) set of control bits are supported. */
kfraser@14638 87 BUG_ON(ctl_min & ~ctl);
kfraser@11625 88
kfraser@14638 89 return ctl;
kfraser@11625 90 }
kfraser@11625 91
kfraser@15589 92 static void vmx_init_vmcs_config(void)
kfraser@10648 93 {
keir@17645 94 u32 vmx_basic_msr_low, vmx_basic_msr_high, min, opt;
kfraser@11625 95 u32 _vmx_pin_based_exec_control;
kfraser@11625 96 u32 _vmx_cpu_based_exec_control;
kfraser@15240 97 u32 _vmx_secondary_exec_control = 0;
kfraser@11625 98 u32 _vmx_vmexit_control;
kfraser@11625 99 u32 _vmx_vmentry_control;
kfraser@10648 100
keir@17645 101 rdmsr(MSR_IA32_VMX_BASIC, vmx_basic_msr_low, vmx_basic_msr_high);
keir@17645 102
keir@15353 103 min = (PIN_BASED_EXT_INTR_MASK |
keir@15353 104 PIN_BASED_NMI_EXITING);
kfraser@15502 105 opt = PIN_BASED_VIRTUAL_NMIS;
kfraser@14638 106 _vmx_pin_based_exec_control = adjust_vmx_controls(
kfraser@15241 107 min, opt, MSR_IA32_VMX_PINBASED_CTLS);
kfraser@14638 108
keir@14943 109 min = (CPU_BASED_HLT_EXITING |
kfraser@15511 110 CPU_BASED_INVLPG_EXITING |
keir@17406 111 CPU_BASED_CR3_LOAD_EXITING |
keir@17406 112 CPU_BASED_CR3_STORE_EXITING |
keir@16365 113 CPU_BASED_MONITOR_EXITING |
keir@14943 114 CPU_BASED_MWAIT_EXITING |
keir@14943 115 CPU_BASED_MOV_DR_EXITING |
keir@14943 116 CPU_BASED_ACTIVATE_IO_BITMAP |
keir@18026 117 CPU_BASED_USE_TSC_OFFSETING |
keir@18026 118 (opt_softtsc ? CPU_BASED_RDTSC_EXITING : 0));
keir@17406 119 opt = (CPU_BASED_ACTIVATE_MSR_BITMAP |
keir@17406 120 CPU_BASED_TPR_SHADOW |
keir@18931 121 CPU_BASED_MONITOR_TRAP_FLAG |
keir@17406 122 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
kfraser@14638 123 _vmx_cpu_based_exec_control = adjust_vmx_controls(
kfraser@15241 124 min, opt, MSR_IA32_VMX_PROCBASED_CTLS);
keir@14943 125 #ifdef __x86_64__
keir@14943 126 if ( !(_vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW) )
keir@14943 127 {
keir@14943 128 min |= CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING;
keir@14943 129 _vmx_cpu_based_exec_control = adjust_vmx_controls(
kfraser@15241 130 min, opt, MSR_IA32_VMX_PROCBASED_CTLS);
keir@14943 131 }
keir@14943 132 #endif
kfraser@14638 133
kfraser@15241 134 if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
kfraser@15240 135 {
kfraser@15240 136 min = 0;
keir@16367 137 opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
keir@17406 138 SECONDARY_EXEC_WBINVD_EXITING |
keir@17406 139 SECONDARY_EXEC_ENABLE_EPT);
keir@17416 140 if ( opt_vpid_enabled )
keir@17416 141 opt |= SECONDARY_EXEC_ENABLE_VPID;
kfraser@15240 142 _vmx_secondary_exec_control = adjust_vmx_controls(
kfraser@15240 143 min, opt, MSR_IA32_VMX_PROCBASED_CTLS2);
kfraser@15240 144 }
kfraser@15240 145
keir@17406 146 if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT )
keir@17406 147 {
keir@17645 148 /*
keir@17645 149 * To use EPT we expect to be able to clear certain intercepts.
keir@17645 150 * We check VMX_BASIC_MSR[55] to correctly handle default1 controls.
keir@17645 151 */
keir@17645 152 uint32_t must_be_one, must_be_zero, msr = MSR_IA32_VMX_PROCBASED_CTLS;
keir@17645 153 if ( vmx_basic_msr_high & (1u << 23) )
keir@17645 154 msr = MSR_IA32_VMX_TRUE_PROCBASED_CTLS;
keir@17645 155 rdmsr(msr, must_be_one, must_be_zero);
keir@17406 156 if ( must_be_one & (CPU_BASED_INVLPG_EXITING |
keir@17406 157 CPU_BASED_CR3_LOAD_EXITING |
keir@17406 158 CPU_BASED_CR3_STORE_EXITING) )
keir@17406 159 _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
keir@17406 160 }
keir@17406 161
kfraser@15241 162 #if defined(__i386__)
kfraser@15241 163 /* If we can't virtualise APIC accesses, the TPR shadow is pointless. */
kfraser@15241 164 if ( !(_vmx_secondary_exec_control &
kfraser@15241 165 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) )
kfraser@15241 166 _vmx_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
kfraser@15241 167 #endif
kfraser@15241 168
keir@14943 169 min = VM_EXIT_ACK_INTR_ON_EXIT;
keir@19073 170 opt = VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT;
kfraser@14638 171 #ifdef __x86_64__
keir@14943 172 min |= VM_EXIT_IA32E_MODE;
kfraser@14638 173 #endif
kfraser@14638 174 _vmx_vmexit_control = adjust_vmx_controls(
kfraser@15241 175 min, opt, MSR_IA32_VMX_EXIT_CTLS);
kfraser@14638 176
keir@19073 177 min = 0;
keir@19073 178 opt = VM_ENTRY_LOAD_GUEST_PAT;
kfraser@14638 179 _vmx_vmentry_control = adjust_vmx_controls(
kfraser@15241 180 min, opt, MSR_IA32_VMX_ENTRY_CTLS);
kfraser@10648 181
kfraser@15589 182 if ( !vmx_pin_based_exec_control )
kfraser@11625 183 {
kfraser@15589 184 /* First time through. */
keir@17645 185 vmcs_revision_id = vmx_basic_msr_low;
kfraser@11625 186 vmx_pin_based_exec_control = _vmx_pin_based_exec_control;
kfraser@11625 187 vmx_cpu_based_exec_control = _vmx_cpu_based_exec_control;
kfraser@15241 188 vmx_secondary_exec_control = _vmx_secondary_exec_control;
kfraser@11625 189 vmx_vmexit_control = _vmx_vmexit_control;
kfraser@11625 190 vmx_vmentry_control = _vmx_vmentry_control;
keir@17645 191 cpu_has_vmx_ins_outs_instr_info = !!(vmx_basic_msr_high & (1U<<22));
keir@18987 192 vmx_display_features();
kfraser@11625 193 }
kfraser@11625 194 else
kfraser@11625 195 {
kfraser@15589 196 /* Globals are already initialised: re-check them. */
keir@17645 197 BUG_ON(vmcs_revision_id != vmx_basic_msr_low);
kfraser@11625 198 BUG_ON(vmx_pin_based_exec_control != _vmx_pin_based_exec_control);
kfraser@11625 199 BUG_ON(vmx_cpu_based_exec_control != _vmx_cpu_based_exec_control);
kfraser@15241 200 BUG_ON(vmx_secondary_exec_control != _vmx_secondary_exec_control);
kfraser@11625 201 BUG_ON(vmx_vmexit_control != _vmx_vmexit_control);
kfraser@11625 202 BUG_ON(vmx_vmentry_control != _vmx_vmentry_control);
keir@17645 203 BUG_ON(cpu_has_vmx_ins_outs_instr_info !=
keir@17645 204 !!(vmx_basic_msr_high & (1U<<22)));
kfraser@11625 205 }
kfraser@10648 206
kfraser@11625 207 /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
keir@17645 208 BUG_ON((vmx_basic_msr_high & 0x1fff) > PAGE_SIZE);
kfraser@15572 209
kfraser@15572 210 #ifdef __x86_64__
kfraser@15572 211 /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
keir@17645 212 BUG_ON(vmx_basic_msr_high & (1u<<16));
kfraser@15572 213 #endif
kfraser@15572 214
kfraser@15572 215 /* Require Write-Back (WB) memory type for VMCS accesses. */
keir@17645 216 BUG_ON(((vmx_basic_msr_high >> 18) & 15) != 6);
kfraser@10648 217 }
kfraser@10648 218
kfraser@10648 219 static struct vmcs_struct *vmx_alloc_vmcs(void)
kaf24@8708 220 {
kaf24@8708 221 struct vmcs_struct *vmcs;
kaf24@8708 222
kfraser@11625 223 if ( (vmcs = alloc_xenheap_page()) == NULL )
kfraser@10648 224 {
kaf24@12038 225 gdprintk(XENLOG_WARNING, "Failed to allocate VMCS.\n");
kfraser@10648 226 return NULL;
kfraser@10648 227 }
kaf24@8708 228
kfraser@15405 229 clear_page(vmcs);
kfraser@10648 230 vmcs->vmcs_revision_id = vmcs_revision_id;
kfraser@10648 231
kaf24@8708 232 return vmcs;
kaf24@8708 233 }
kaf24@8708 234
kfraser@10648 235 static void vmx_free_vmcs(struct vmcs_struct *vmcs)
kaf24@8708 236 {
kfraser@11625 237 free_xenheap_page(vmcs);
kaf24@8708 238 }
kaf24@8708 239
kaf24@10356 240 static void __vmx_clear_vmcs(void *info)
kaf24@8708 241 {
kaf24@10356 242 struct vcpu *v = info;
kfraser@15583 243 struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
kfraser@10648 244
kfraser@15589 245 /* Otherwise we can nest (vmx_cpu_down() vs. vmx_clear_vmcs()). */
kfraser@15583 246 ASSERT(!local_irq_is_enabled());
kfraser@15486 247
kfraser@15583 248 if ( arch_vmx->active_cpu == smp_processor_id() )
kfraser@15583 249 {
kfraser@15583 250 __vmpclear(virt_to_maddr(arch_vmx->vmcs));
kfraser@15583 251
kfraser@15583 252 arch_vmx->active_cpu = -1;
kfraser@15583 253 arch_vmx->launched = 0;
kfraser@15583 254
kfraser@15583 255 list_del(&arch_vmx->active_list);
kfraser@15583 256
kfraser@15583 257 if ( arch_vmx->vmcs == this_cpu(current_vmcs) )
kfraser@15583 258 this_cpu(current_vmcs) = NULL;
kfraser@15583 259 }
kaf24@8708 260 }
kaf24@8708 261
kaf24@10356 262 static void vmx_clear_vmcs(struct vcpu *v)
kaf24@8806 263 {
kaf24@10510 264 int cpu = v->arch.hvm_vmx.active_cpu;
kaf24@8806 265
kfraser@15583 266 if ( cpu != -1 )
kfraser@15583 267 on_selected_cpus(cpumask_of_cpu(cpu), __vmx_clear_vmcs, v, 1, 1);
kaf24@8806 268 }
kaf24@8806 269
kaf24@10356 270 static void vmx_load_vmcs(struct vcpu *v)
kaf24@8806 271 {
kfraser@15583 272 unsigned long flags;
kfraser@15583 273
kfraser@15583 274 local_irq_save(flags);
kfraser@15583 275
kfraser@15583 276 if ( v->arch.hvm_vmx.active_cpu == -1 )
kfraser@15583 277 {
kfraser@15583 278 list_add(&v->arch.hvm_vmx.active_list, &this_cpu(active_vmcs_list));
kfraser@15583 279 v->arch.hvm_vmx.active_cpu = smp_processor_id();
kfraser@15583 280 }
kfraser@15583 281
kfraser@15583 282 ASSERT(v->arch.hvm_vmx.active_cpu == smp_processor_id());
kfraser@15583 283
kaf24@10356 284 __vmptrld(virt_to_maddr(v->arch.hvm_vmx.vmcs));
kfraser@15486 285 this_cpu(current_vmcs) = v->arch.hvm_vmx.vmcs;
kfraser@15583 286
kfraser@15583 287 local_irq_restore(flags);
kfraser@15583 288 }
kfraser@15583 289
kfraser@15589 290 int vmx_cpu_up(void)
kfraser@15589 291 {
kfraser@15589 292 u32 eax, edx;
keir@17775 293 int bios_locked, cpu = smp_processor_id();
kfraser@15668 294 u64 cr0, vmx_cr0_fixed0, vmx_cr0_fixed1;
kfraser@15589 295
kfraser@15589 296 BUG_ON(!(read_cr4() & X86_CR4_VMXE));
kfraser@15589 297
kfraser@15668 298 /*
kfraser@15668 299 * Ensure the current processor operating mode meets
kfraser@15668 300 * the requred CRO fixed bits in VMX operation.
kfraser@15668 301 */
kfraser@15668 302 cr0 = read_cr0();
kfraser@15668 303 rdmsrl(MSR_IA32_VMX_CR0_FIXED0, vmx_cr0_fixed0);
kfraser@15668 304 rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx_cr0_fixed1);
kfraser@15668 305 if ( (~cr0 & vmx_cr0_fixed0) || (cr0 & ~vmx_cr0_fixed1) )
kfraser@15668 306 {
kfraser@15668 307 printk("CPU%d: some settings of host CR0 are "
kfraser@15668 308 "not allowed in VMX operation.\n", cpu);
kfraser@15668 309 return 0;
kfraser@15668 310 }
kfraser@15668 311
kfraser@15589 312 rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
kfraser@15589 313
keir@17775 314 bios_locked = !!(eax & IA32_FEATURE_CONTROL_MSR_LOCK);
keir@17775 315 if ( bios_locked )
kfraser@15589 316 {
kfraser@15810 317 if ( !(eax & (IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_OUTSIDE_SMX |
kfraser@15810 318 IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_INSIDE_SMX)) )
kfraser@15589 319 {
kfraser@15810 320 printk("CPU%d: VMX disabled by BIOS.\n", cpu);
kfraser@15589 321 return 0;
kfraser@15589 322 }
kfraser@15589 323 }
kfraser@15589 324 else
kfraser@15589 325 {
keir@16643 326 eax = IA32_FEATURE_CONTROL_MSR_LOCK;
keir@16643 327 eax |= IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_OUTSIDE_SMX;
keir@16643 328 if ( test_bit(X86_FEATURE_SMXE, &boot_cpu_data.x86_capability) )
keir@16643 329 eax |= IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_INSIDE_SMX;
kfraser@15810 330 wrmsr(IA32_FEATURE_CONTROL_MSR, eax, 0);
kfraser@15589 331 }
kfraser@15589 332
kfraser@15589 333 vmx_init_vmcs_config();
kfraser@15589 334
kfraser@15589 335 INIT_LIST_HEAD(&this_cpu(active_vmcs_list));
kfraser@15589 336
kfraser@15589 337 if ( this_cpu(host_vmcs) == NULL )
kfraser@15589 338 {
kfraser@15589 339 this_cpu(host_vmcs) = vmx_alloc_vmcs();
kfraser@15589 340 if ( this_cpu(host_vmcs) == NULL )
kfraser@15589 341 {
kfraser@15589 342 printk("CPU%d: Could not allocate host VMCS\n", cpu);
kfraser@15589 343 return 0;
kfraser@15589 344 }
kfraser@15589 345 }
kfraser@15589 346
keir@17775 347 switch ( __vmxon(virt_to_maddr(this_cpu(host_vmcs))) )
kfraser@15589 348 {
keir@17775 349 case -2: /* #UD or #GP */
keir@17775 350 if ( bios_locked &&
keir@17775 351 test_bit(X86_FEATURE_SMXE, &boot_cpu_data.x86_capability) &&
keir@17775 352 (!(eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_OUTSIDE_SMX) ||
keir@17775 353 !(eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_INSIDE_SMX)) )
keir@17775 354 {
keir@17775 355 printk("CPU%d: VMXON failed: perhaps because of TXT settings "
keir@17775 356 "in your BIOS configuration?\n", cpu);
keir@17775 357 printk(" --> Disable TXT in your BIOS unless using a secure "
keir@17775 358 "bootloader.\n");
keir@17775 359 return 0;
keir@17775 360 }
keir@17775 361 /* fall through */
keir@17775 362 case -1: /* CF==1 or ZF==1 */
keir@17775 363 printk("CPU%d: unexpected VMXON failure\n", cpu);
kfraser@15589 364 return 0;
keir@17775 365 case 0: /* success */
keir@17775 366 break;
keir@17775 367 default:
keir@17775 368 BUG();
kfraser@15589 369 }
kfraser@15589 370
keir@17406 371 ept_sync_all();
keir@17406 372
keir@17416 373 vpid_sync_all();
keir@17416 374
kfraser@15589 375 return 1;
kfraser@15589 376 }
kfraser@15589 377
kfraser@15589 378 void vmx_cpu_down(void)
kfraser@15583 379 {
kfraser@15583 380 struct list_head *active_vmcs_list = &this_cpu(active_vmcs_list);
kfraser@15583 381 unsigned long flags;
kfraser@15583 382
kfraser@15583 383 local_irq_save(flags);
kfraser@15583 384
kfraser@15583 385 while ( !list_empty(active_vmcs_list) )
kfraser@15583 386 __vmx_clear_vmcs(list_entry(active_vmcs_list->next,
kfraser@15583 387 struct vcpu, arch.hvm_vmx.active_list));
kfraser@15583 388
kfraser@15589 389 BUG_ON(!(read_cr4() & X86_CR4_VMXE));
kfraser@15589 390 __vmxoff();
kfraser@15583 391
kfraser@15583 392 local_irq_restore(flags);
kfraser@15583 393 }
kfraser@15583 394
kfraser@15735 395 struct foreign_vmcs {
kfraser@15735 396 struct vcpu *v;
kfraser@15735 397 unsigned int count;
kfraser@15735 398 };
kfraser@15735 399 static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs);
kfraser@15735 400
kaf24@10356 401 void vmx_vmcs_enter(struct vcpu *v)
kaf24@8708 402 {
kfraser@15735 403 struct foreign_vmcs *fv;
kfraser@15735 404
kaf24@10356 405 /*
kaf24@10356 406 * NB. We must *always* run an HVM VCPU on its own VMCS, except for
kfraser@12265 407 * vmx_vmcs_enter/exit critical regions.
kaf24@10356 408 */
kfraser@15735 409 if ( likely(v == current) )
kaf24@10356 410 return;
kaf24@10356 411
kfraser@15735 412 fv = &this_cpu(foreign_vmcs);
kaf24@10356 413
kfraser@15735 414 if ( fv->v == v )
kfraser@15735 415 {
kfraser@15735 416 BUG_ON(fv->count == 0);
kfraser@15735 417 }
kfraser@15735 418 else
kfraser@15735 419 {
kfraser@15735 420 BUG_ON(fv->v != NULL);
kfraser@15735 421 BUG_ON(fv->count != 0);
kfraser@15735 422
kfraser@15735 423 vcpu_pause(v);
kfraser@15735 424 spin_lock(&v->arch.hvm_vmx.vmcs_lock);
kfraser@15735 425
kfraser@15735 426 vmx_clear_vmcs(v);
kfraser@15735 427 vmx_load_vmcs(v);
kfraser@15735 428
kfraser@15735 429 fv->v = v;
kfraser@15735 430 }
kfraser@15735 431
kfraser@15735 432 fv->count++;
kaf24@8708 433 }
kaf24@10356 434
kaf24@10356 435 void vmx_vmcs_exit(struct vcpu *v)
kaf24@10356 436 {
kfraser@15735 437 struct foreign_vmcs *fv;
kfraser@15735 438
kfraser@15735 439 if ( likely(v == current) )
kaf24@10356 440 return;
kaf24@10356 441
kfraser@15735 442 fv = &this_cpu(foreign_vmcs);
kfraser@15735 443 BUG_ON(fv->v != v);
kfraser@15735 444 BUG_ON(fv->count == 0);
kaf24@10356 445
kfraser@15735 446 if ( --fv->count == 0 )
kfraser@15735 447 {
kfraser@15735 448 /* Don't confuse vmx_do_resume (for @v or @current!) */
kfraser@15735 449 vmx_clear_vmcs(v);
kfraser@15735 450 if ( is_hvm_vcpu(current) )
kfraser@15735 451 vmx_load_vmcs(current);
kfraser@15735 452
kfraser@15735 453 spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
kfraser@15735 454 vcpu_unpause(v);
kfraser@15735 455
kfraser@15735 456 fv->v = NULL;
kfraser@15735 457 }
kaf24@10356 458 }
kaf24@8708 459
kfraser@15571 460 struct xgt_desc {
kfraser@15571 461 unsigned short size;
kfraser@15571 462 unsigned long address __attribute__((packed));
kaf24@8708 463 };
kaf24@8708 464
kaf24@8708 465 static void vmx_set_host_env(struct vcpu *v)
kaf24@8708 466 {
keir@16375 467 unsigned int cpu = smp_processor_id();
kaf24@8708 468
keir@18790 469 __vmwrite(HOST_GDTR_BASE,
keir@18790 470 (unsigned long)(this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY));
keir@16375 471 __vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
kaf24@8708 472
keir@18523 473 __vmwrite(HOST_TR_SELECTOR, TSS_ENTRY << 3);
keir@16375 474 __vmwrite(HOST_TR_BASE, (unsigned long)&init_tss[cpu]);
kfraser@15571 475
keir@16375 476 __vmwrite(HOST_SYSENTER_ESP, get_stack_bottom());
kfraser@13375 477
kfraser@13375 478 /*
kfraser@13375 479 * Skip end of cpu_user_regs when entering the hypervisor because the
kfraser@13375 480 * CPU does not save context onto the stack. SS,RSP,CS,RIP,RFLAGS,etc
kfraser@13375 481 * all get saved into the VMCS instead.
kfraser@13375 482 */
kfraser@13375 483 __vmwrite(HOST_RSP,
kfraser@13375 484 (unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code);
kaf24@8708 485 }
kaf24@8708 486
keir@16148 487 void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr)
keir@16148 488 {
keir@17211 489 unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
keir@16148 490
keir@16148 491 /* VMX MSR bitmap supported? */
keir@16148 492 if ( msr_bitmap == NULL )
keir@16148 493 return;
keir@16148 494
keir@16148 495 /*
keir@16148 496 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
keir@16148 497 * have the write-low and read-high bitmap offsets the wrong way round.
keir@16148 498 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
keir@16148 499 */
keir@16148 500 if ( msr <= 0x1fff )
keir@16148 501 {
keir@17211 502 __clear_bit(msr, msr_bitmap + 0x000/BYTES_PER_LONG); /* read-low */
keir@17211 503 __clear_bit(msr, msr_bitmap + 0x800/BYTES_PER_LONG); /* write-low */
keir@16148 504 }
keir@16148 505 else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
keir@16148 506 {
keir@16148 507 msr &= 0x1fff;
keir@17211 508 __clear_bit(msr, msr_bitmap + 0x400/BYTES_PER_LONG); /* read-high */
keir@17211 509 __clear_bit(msr, msr_bitmap + 0xc00/BYTES_PER_LONG); /* write-high */
keir@16148 510 }
keir@16148 511 }
keir@16148 512
keir@16148 513 static int construct_vmcs(struct vcpu *v)
kaf24@8708 514 {
keir@17406 515 struct domain *d = v->domain;
keir@16375 516 uint16_t sysenter_cs;
keir@16375 517 unsigned long sysenter_eip;
kaf24@8708 518
kfraser@12262 519 vmx_vmcs_enter(v);
kfraser@12262 520
kfraser@12260 521 /* VMCS controls. */
kfraser@12320 522 __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
keir@17406 523
kfraser@15509 524 v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
keir@17406 525 v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control;
keir@17406 526
keir@17406 527 if ( paging_mode_hap(d) )
keir@17406 528 {
keir@17406 529 v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING |
keir@17406 530 CPU_BASED_CR3_LOAD_EXITING |
keir@17406 531 CPU_BASED_CR3_STORE_EXITING);
keir@17406 532 }
keir@17406 533 else
keir@17406 534 {
keir@17406 535 v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
keir@19073 536 vmx_vmexit_control &= ~(VM_EXIT_SAVE_GUEST_PAT |
keir@19073 537 VM_EXIT_LOAD_HOST_PAT);
keir@19073 538 vmx_vmentry_control &= ~VM_ENTRY_LOAD_GUEST_PAT;
keir@17406 539 }
keir@17406 540
keir@18931 541 /* Do not enable Monitor Trap Flag unless start single step debug */
keir@18931 542 v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
keir@18931 543
keir@17406 544 __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
keir@19073 545 __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
keir@19073 546 __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
keir@19073 547
keir@17406 548 if ( cpu_has_vmx_secondary_exec_control )
keir@17406 549 __vmwrite(SECONDARY_VM_EXEC_CONTROL,
keir@17406 550 v->arch.hvm_vmx.secondary_exec_control);
kfraser@12260 551
keir@16148 552 /* MSR access bitmap. */
keir@14677 553 if ( cpu_has_vmx_msr_bitmap )
keir@16148 554 {
keir@17211 555 unsigned long *msr_bitmap = alloc_xenheap_page();
keir@16148 556
keir@16148 557 if ( msr_bitmap == NULL )
keir@16148 558 return -ENOMEM;
keir@16148 559
keir@16148 560 memset(msr_bitmap, ~0, PAGE_SIZE);
keir@16148 561 v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
keir@16148 562 __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
keir@16148 563
keir@16148 564 vmx_disable_intercept_for_msr(v, MSR_FS_BASE);
keir@16148 565 vmx_disable_intercept_for_msr(v, MSR_GS_BASE);
keir@16148 566 vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
keir@16148 567 vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
keir@16148 568 vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
keir@19073 569 if ( cpu_has_vmx_pat && paging_mode_hap(d) )
keir@19073 570 vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT);
keir@16148 571 }
kfraser@14664 572
kfraser@13627 573 /* I/O access bitmap. */
keir@17294 574 __vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0));
keir@17294 575 __vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE));
kfraser@13627 576
kfraser@12260 577 /* Host data selectors. */
kfraser@12320 578 __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
kfraser@12320 579 __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
kfraser@12320 580 __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
keir@16374 581 __vmwrite(HOST_FS_SELECTOR, 0);
keir@16374 582 __vmwrite(HOST_GS_SELECTOR, 0);
kfraser@12320 583 __vmwrite(HOST_FS_BASE, 0);
kfraser@12320 584 __vmwrite(HOST_GS_BASE, 0);
kfraser@12260 585
kfraser@12260 586 /* Host control registers. */
keir@16972 587 v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS;
keir@16972 588 __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
keir@16267 589 __vmwrite(HOST_CR4, mmu_cr4_features);
kfraser@12260 590
kfraser@12260 591 /* Host CS:RIP. */
kfraser@12320 592 __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
kfraser@12320 593 __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
kfraser@12260 594
keir@16375 595 /* Host SYSENTER CS:RIP. */
keir@16375 596 rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs);
keir@16375 597 __vmwrite(HOST_SYSENTER_CS, sysenter_cs);
keir@16375 598 rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip);
keir@16375 599 __vmwrite(HOST_SYSENTER_EIP, sysenter_eip);
keir@16375 600
kfraser@12260 601 /* MSR intercepts. */
keir@16148 602 __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
kfraser@12320 603 __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
kfraser@12320 604 __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
kfraser@11625 605
kfraser@15678 606 __vmwrite(VM_ENTRY_INTR_INFO, 0);
kaf24@8708 607
kfraser@12320 608 __vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
kfraser@12320 609 __vmwrite(CR4_GUEST_HOST_MASK, ~0UL);
kaf24@8708 610
kfraser@12320 611 __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
kfraser@12320 612 __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
kaf24@8708 613
kfraser@12320 614 __vmwrite(CR3_TARGET_COUNT, 0);
kfraser@12320 615
kfraser@12320 616 __vmwrite(GUEST_ACTIVITY_STATE, 0);
kfraser@11625 617
kfraser@12260 618 /* Guest segment bases. */
kfraser@12320 619 __vmwrite(GUEST_ES_BASE, 0);
kfraser@12320 620 __vmwrite(GUEST_SS_BASE, 0);
kfraser@12320 621 __vmwrite(GUEST_DS_BASE, 0);
kfraser@12320 622 __vmwrite(GUEST_FS_BASE, 0);
kfraser@12320 623 __vmwrite(GUEST_GS_BASE, 0);
kfraser@12320 624 __vmwrite(GUEST_CS_BASE, 0);
kaf24@8708 625
kfraser@12260 626 /* Guest segment limits. */
keir@16429 627 __vmwrite(GUEST_ES_LIMIT, ~0u);
keir@16429 628 __vmwrite(GUEST_SS_LIMIT, ~0u);
keir@16429 629 __vmwrite(GUEST_DS_LIMIT, ~0u);
keir@16429 630 __vmwrite(GUEST_FS_LIMIT, ~0u);
keir@16429 631 __vmwrite(GUEST_GS_LIMIT, ~0u);
keir@16429 632 __vmwrite(GUEST_CS_LIMIT, ~0u);
kaf24@8708 633
kfraser@12260 634 /* Guest segment AR bytes. */
keir@16429 635 __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */
keir@16429 636 __vmwrite(GUEST_SS_AR_BYTES, 0xc093);
keir@16429 637 __vmwrite(GUEST_DS_AR_BYTES, 0xc093);
keir@16429 638 __vmwrite(GUEST_FS_AR_BYTES, 0xc093);
keir@16429 639 __vmwrite(GUEST_GS_AR_BYTES, 0xc093);
keir@16429 640 __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */
keir@16429 641
keir@16429 642 /* Guest IDT. */
keir@16429 643 __vmwrite(GUEST_IDTR_BASE, 0);
keir@16429 644 __vmwrite(GUEST_IDTR_LIMIT, 0);
kaf24@8708 645
kfraser@12260 646 /* Guest GDT. */
kfraser@12320 647 __vmwrite(GUEST_GDTR_BASE, 0);
kfraser@12320 648 __vmwrite(GUEST_GDTR_LIMIT, 0);
kaf24@8708 649
keir@16429 650 /* Guest LDT. */
keir@16429 651 __vmwrite(GUEST_LDTR_AR_BYTES, 0x0082); /* LDT */
keir@16429 652 __vmwrite(GUEST_LDTR_SELECTOR, 0);
keir@16429 653 __vmwrite(GUEST_LDTR_BASE, 0);
keir@16429 654 __vmwrite(GUEST_LDTR_LIMIT, 0);
kaf24@8708 655
keir@16429 656 /* Guest TSS. */
keir@16429 657 __vmwrite(GUEST_TR_AR_BYTES, 0x008b); /* 32-bit TSS (busy) */
keir@16429 658 __vmwrite(GUEST_TR_BASE, 0);
keir@16429 659 __vmwrite(GUEST_TR_LIMIT, 0xff);
kaf24@8708 660
kfraser@12320 661 __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
kfraser@12320 662 __vmwrite(GUEST_DR7, 0);
kfraser@12320 663 __vmwrite(VMCS_LINK_POINTER, ~0UL);
kfraser@10725 664 #if defined(__i386__)
kfraser@12320 665 __vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
kfraser@10725 666 #endif
kaf24@8708 667
keir@17406 668 __vmwrite(EXCEPTION_BITMAP,
keir@17406 669 HVM_TRAP_MASK
keir@17406 670 | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault))
keir@19648 671 | (1U << TRAP_no_device)
keir@19648 672 | (1U << TRAP_invalid_op));
kaf24@8708 673
kfraser@15735 674 v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
kfraser@15735 675 hvm_update_guest_cr(v, 0);
kfraser@12263 676
kfraser@15735 677 v->arch.hvm_vcpu.guest_cr[4] = 0;
kfraser@15735 678 hvm_update_guest_cr(v, 4);
kfraser@12263 679
keir@14943 680 if ( cpu_has_vmx_tpr_shadow )
keir@14943 681 {
kfraser@15572 682 __vmwrite(VIRTUAL_APIC_PAGE_ADDR,
kfraser@15572 683 page_to_maddr(vcpu_vlapic(v)->regs_page));
kfraser@15240 684 __vmwrite(TPR_THRESHOLD, 0);
kfraser@15240 685 }
kfraser@15240 686
keir@17406 687 if ( paging_mode_hap(d) )
keir@17406 688 {
keir@17406 689 __vmwrite(EPT_POINTER, d->arch.hvm_domain.vmx.ept_control.eptp);
keir@17618 690 #ifdef __i386__
keir@17406 691 __vmwrite(EPT_POINTER_HIGH,
keir@17406 692 d->arch.hvm_domain.vmx.ept_control.eptp >> 32);
keir@17406 693 #endif
keir@17406 694 }
keir@17406 695
keir@17416 696 if ( cpu_has_vmx_vpid )
keir@17416 697 {
keir@17416 698 v->arch.hvm_vmx.vpid =
keir@17416 699 v->domain->arch.hvm_domain.vmx.vpid_base + v->vcpu_id;
keir@17416 700 __vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vmx.vpid);
keir@17416 701 }
keir@17416 702
keir@19073 703 if ( cpu_has_vmx_pat && paging_mode_hap(d) )
keir@19073 704 {
keir@19073 705 u64 host_pat, guest_pat;
keir@19073 706
keir@19073 707 rdmsrl(MSR_IA32_CR_PAT, host_pat);
keir@19073 708 guest_pat = 0x7040600070406ULL;
keir@19073 709
keir@19073 710 __vmwrite(HOST_PAT, host_pat);
keir@19073 711 __vmwrite(GUEST_PAT, guest_pat);
keir@19073 712 #ifdef __i386__
keir@19073 713 __vmwrite(HOST_PAT_HIGH, host_pat >> 32);
keir@19073 714 __vmwrite(GUEST_PAT_HIGH, guest_pat >> 32);
keir@19073 715 #endif
keir@19073 716 }
keir@19073 717
steven@13059 718 vmx_vmcs_exit(v);
kfraser@12263 719
Tim@13909 720 paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */
kfraser@15246 721
kfraser@15246 722 vmx_vlapic_msr_changed(v);
keir@16148 723
keir@16148 724 return 0;
keir@16148 725 }
keir@16148 726
keir@17893 727 int vmx_read_guest_msr(u32 msr, u64 *val)
keir@16148 728 {
keir@17893 729 struct vcpu *curr = current;
keir@17893 730 unsigned int i, msr_count = curr->arch.hvm_vmx.msr_count;
keir@17893 731 const struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
keir@16148 732
keir@16148 733 for ( i = 0; i < msr_count; i++ )
keir@16148 734 {
keir@16148 735 if ( msr_area[i].index == msr )
keir@16148 736 {
keir@16148 737 *val = msr_area[i].data;
keir@16148 738 return 0;
keir@16148 739 }
keir@16148 740 }
keir@16148 741
keir@16148 742 return -ESRCH;
keir@16148 743 }
keir@16148 744
keir@17893 745 int vmx_write_guest_msr(u32 msr, u64 val)
keir@16148 746 {
keir@17893 747 struct vcpu *curr = current;
keir@17893 748 unsigned int i, msr_count = curr->arch.hvm_vmx.msr_count;
keir@17893 749 struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
keir@16148 750
keir@16148 751 for ( i = 0; i < msr_count; i++ )
keir@16148 752 {
keir@16148 753 if ( msr_area[i].index == msr )
keir@16148 754 {
keir@16148 755 msr_area[i].data = val;
keir@16148 756 return 0;
keir@16148 757 }
keir@16148 758 }
keir@16148 759
keir@16148 760 return -ESRCH;
keir@16148 761 }
keir@16148 762
keir@17893 763 int vmx_add_guest_msr(u32 msr)
keir@16148 764 {
keir@17893 765 struct vcpu *curr = current;
keir@17893 766 unsigned int i, msr_count = curr->arch.hvm_vmx.msr_count;
keir@17893 767 struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
keir@17893 768
keir@17893 769 if ( msr_area == NULL )
keir@17893 770 {
keir@17893 771 if ( (msr_area = alloc_xenheap_page()) == NULL )
keir@17893 772 return -ENOMEM;
keir@17893 773 curr->arch.hvm_vmx.msr_area = msr_area;
keir@17893 774 __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(msr_area));
keir@17893 775 __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
keir@17893 776 }
keir@16148 777
keir@16148 778 for ( i = 0; i < msr_count; i++ )
keir@16148 779 if ( msr_area[i].index == msr )
keir@16148 780 return 0;
keir@16148 781
keir@16148 782 if ( msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) )
keir@16148 783 return -ENOSPC;
keir@16148 784
keir@16148 785 msr_area[msr_count].index = msr;
keir@16148 786 msr_area[msr_count].mbz = 0;
keir@16148 787 msr_area[msr_count].data = 0;
keir@17893 788 curr->arch.hvm_vmx.msr_count = ++msr_count;
keir@16148 789 __vmwrite(VM_EXIT_MSR_STORE_COUNT, msr_count);
keir@16148 790 __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, msr_count);
keir@16148 791
keir@16148 792 return 0;
keir@16148 793 }
keir@16148 794
keir@17893 795 int vmx_add_host_load_msr(u32 msr)
keir@16148 796 {
keir@17893 797 struct vcpu *curr = current;
keir@17893 798 unsigned int i, msr_count = curr->arch.hvm_vmx.host_msr_count;
keir@17893 799 struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.host_msr_area;
keir@17893 800
keir@17893 801 if ( msr_area == NULL )
keir@17893 802 {
keir@17893 803 if ( (msr_area = alloc_xenheap_page()) == NULL )
keir@17893 804 return -ENOMEM;
keir@17893 805 curr->arch.hvm_vmx.host_msr_area = msr_area;
keir@17893 806 __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
keir@17893 807 }
keir@16148 808
keir@16148 809 for ( i = 0; i < msr_count; i++ )
keir@16148 810 if ( msr_area[i].index == msr )
keir@16148 811 return 0;
keir@16148 812
keir@16148 813 if ( msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) )
keir@16148 814 return -ENOSPC;
keir@16148 815
keir@16148 816 msr_area[msr_count].index = msr;
keir@16148 817 msr_area[msr_count].mbz = 0;
keir@16148 818 rdmsrl(msr, msr_area[msr_count].data);
keir@17893 819 curr->arch.hvm_vmx.host_msr_count = ++msr_count;
keir@16148 820 __vmwrite(VM_EXIT_MSR_LOAD_COUNT, msr_count);
keir@16148 821
keir@16148 822 return 0;
kfraser@10648 823 }
kaf24@8708 824
kfraser@10648 825 int vmx_create_vmcs(struct vcpu *v)
kfraser@10648 826 {
kfraser@15583 827 struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
keir@16148 828 int rc;
kfraser@15583 829
keir@17893 830 if ( (arch_vmx->vmcs = vmx_alloc_vmcs()) == NULL )
keir@17893 831 return -ENOMEM;
kfraser@13858 832
keir@17893 833 INIT_LIST_HEAD(&arch_vmx->active_list);
keir@17893 834 __vmpclear(virt_to_maddr(arch_vmx->vmcs));
keir@17893 835 arch_vmx->active_cpu = -1;
keir@17893 836 arch_vmx->launched = 0;
kfraser@12320 837
keir@16148 838 if ( (rc = construct_vmcs(v)) != 0 )
keir@16148 839 {
keir@16148 840 vmx_free_vmcs(arch_vmx->vmcs);
keir@16148 841 return rc;
keir@16148 842 }
kfraser@12262 843
kfraser@10648 844 return 0;
kaf24@8708 845 }
kaf24@8708 846
kaf24@10356 847 void vmx_destroy_vmcs(struct vcpu *v)
kaf24@8708 848 {
kaf24@10356 849 struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
kaf24@10356 850
kaf24@10356 851 vmx_clear_vmcs(v);
kaf24@10356 852
kfraser@10648 853 vmx_free_vmcs(arch_vmx->vmcs);
keir@17893 854
keir@17893 855 free_xenheap_page(v->arch.hvm_vmx.host_msr_area);
keir@17893 856 free_xenheap_page(v->arch.hvm_vmx.msr_area);
keir@17893 857 free_xenheap_page(v->arch.hvm_vmx.msr_bitmap);
kaf24@8708 858 }
kaf24@8708 859
keir@17394 860 void vm_launch_fail(void)
kaf24@8708 861 {
kfraser@12320 862 unsigned long error = __vmread(VM_INSTRUCTION_ERROR);
kaf24@8708 863 printk("<vm_launch_fail> error code %lx\n", error);
kfraser@12409 864 domain_crash_synchronous();
kaf24@8708 865 }
kaf24@8708 866
keir@17394 867 void vm_resume_fail(void)
kaf24@8708 868 {
kfraser@12320 869 unsigned long error = __vmread(VM_INSTRUCTION_ERROR);
kaf24@8708 870 printk("<vm_resume_fail> error code %lx\n", error);
kfraser@12409 871 domain_crash_synchronous();
kaf24@8708 872 }
kaf24@8708 873
keir@16384 874 static void wbinvd_ipi(void *info)
keir@16186 875 {
keir@16186 876 wbinvd();
keir@16186 877 }
keir@16186 878
keir@14635 879 void vmx_do_resume(struct vcpu *v)
kaf24@8708 880 {
keir@14635 881 bool_t debug_state;
keir@14635 882
kaf24@10356 883 if ( v->arch.hvm_vmx.active_cpu == smp_processor_id() )
kaf24@8708 884 {
kfraser@15486 885 if ( v->arch.hvm_vmx.vmcs != this_cpu(current_vmcs) )
kfraser@15486 886 vmx_load_vmcs(v);
kaf24@8708 887 }
kaf24@8708 888 else
kaf24@8708 889 {
keir@16384 890 /*
keir@16384 891 * For pass-through domain, guest PCI-E device driver may leverage the
keir@16384 892 * "Non-Snoop" I/O, and explicitly WBINVD or CLFLUSH to a RAM space.
keir@16384 893 * Since migration may occur before WBINVD or CLFLUSH, we need to
keir@16384 894 * maintain data consistency either by:
keir@16384 895 * 1: flushing cache (wbinvd) when the guest is scheduled out if
keir@16384 896 * there is no wbinvd exit, or
keir@16384 897 * 2: execute wbinvd on all dirty pCPUs when guest wbinvd exits.
keir@16186 898 */
keir@17974 899 if ( has_arch_pdevs(v->domain) && !cpu_has_wbinvd_exiting )
keir@16186 900 {
keir@16186 901 int cpu = v->arch.hvm_vmx.active_cpu;
keir@16186 902 if ( cpu != -1 )
keir@16384 903 on_selected_cpus(cpumask_of_cpu(cpu), wbinvd_ipi, NULL, 1, 1);
keir@16186 904 }
keir@16186 905
kaf24@10356 906 vmx_clear_vmcs(v);
kaf24@10356 907 vmx_load_vmcs(v);
Tim@12494 908 hvm_migrate_timers(v);
kaf24@8806 909 vmx_set_host_env(v);
keir@17416 910 vpid_sync_vcpu_all(v);
kaf24@8708 911 }
kaf24@10356 912
keir@14635 913 debug_state = v->domain->debugger_attached;
keir@14635 914 if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
keir@14635 915 {
keir@14635 916 v->arch.hvm_vcpu.debug_state_latch = debug_state;
keir@19599 917 vmx_update_debug_state(v);
keir@14635 918 }
keir@14635 919
shand@11153 920 hvm_do_resume(v);
kaf24@10356 921 reset_stack_and_jump(vmx_asm_do_vmentry);
kaf24@8708 922 }
kaf24@8708 923
kfraser@15935 924 static unsigned long vmr(unsigned long field)
kfraser@15935 925 {
kfraser@15935 926 int rc;
kfraser@15935 927 unsigned long val;
kfraser@15935 928 val = __vmread_safe(field, &rc);
kfraser@15935 929 return rc ? 0 : val;
kfraser@15935 930 }
kfraser@15935 931
keir@18901 932 static void vmx_dump_sel(char *name, uint32_t selector)
keir@18901 933 {
keir@18901 934 uint32_t sel, attr, limit;
keir@18901 935 uint64_t base;
keir@18901 936 sel = vmr(selector);
keir@18901 937 attr = vmr(selector + (GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR));
keir@18901 938 limit = vmr(selector + (GUEST_ES_LIMIT - GUEST_ES_SELECTOR));
keir@18901 939 base = vmr(selector + (GUEST_ES_BASE - GUEST_ES_SELECTOR));
keir@18901 940 printk("%s: sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016"PRIx64"\n",
keir@18901 941 name, sel, attr, limit, base);
keir@18901 942 }
keir@18901 943
keir@18901 944 static void vmx_dump_sel2(char *name, uint32_t lim)
keir@18901 945 {
keir@18901 946 uint32_t limit;
keir@18901 947 uint64_t base;
keir@18901 948 limit = vmr(lim);
keir@18901 949 base = vmr(lim + (GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
keir@18901 950 printk("%s: limit=0x%08x, base=0x%016"PRIx64"\n",
keir@18901 951 name, limit, base);
keir@18901 952 }
keir@18901 953
keir@16499 954 void vmcs_dump_vcpu(struct vcpu *v)
kaf24@10079 955 {
keir@16499 956 struct cpu_user_regs *regs = &v->arch.guest_context.user_regs;
kfraser@15935 957 unsigned long long x;
kfraser@15935 958
keir@16499 959 if ( v == current )
keir@16499 960 regs = guest_cpu_user_regs();
keir@16499 961
keir@16499 962 vmx_vmcs_enter(v);
keir@16499 963
kfraser@15935 964 printk("*** Guest State ***\n");
kfraser@15935 965 printk("CR0: actual=0x%016llx, shadow=0x%016llx, gh_mask=%016llx\n",
kfraser@15935 966 (unsigned long long)vmr(GUEST_CR0),
kfraser@15935 967 (unsigned long long)vmr(CR0_READ_SHADOW),
kfraser@15935 968 (unsigned long long)vmr(CR0_GUEST_HOST_MASK));
kfraser@15935 969 printk("CR4: actual=0x%016llx, shadow=0x%016llx, gh_mask=%016llx\n",
kfraser@15935 970 (unsigned long long)vmr(GUEST_CR4),
kfraser@15935 971 (unsigned long long)vmr(CR4_READ_SHADOW),
kfraser@15935 972 (unsigned long long)vmr(CR4_GUEST_HOST_MASK));
kfraser@15935 973 printk("CR3: actual=0x%016llx, target_count=%d\n",
kfraser@15935 974 (unsigned long long)vmr(GUEST_CR3),
kfraser@15935 975 (int)vmr(CR3_TARGET_COUNT));
kfraser@15935 976 printk(" target0=%016llx, target1=%016llx\n",
kfraser@15935 977 (unsigned long long)vmr(CR3_TARGET_VALUE0),
kfraser@15935 978 (unsigned long long)vmr(CR3_TARGET_VALUE1));
kfraser@15935 979 printk(" target2=%016llx, target3=%016llx\n",
kfraser@15935 980 (unsigned long long)vmr(CR3_TARGET_VALUE2),
kfraser@15935 981 (unsigned long long)vmr(CR3_TARGET_VALUE3));
keir@16499 982 printk("RSP = 0x%016llx (0x%016llx) RIP = 0x%016llx (0x%016llx)\n",
kfraser@15935 983 (unsigned long long)vmr(GUEST_RSP),
keir@16499 984 (unsigned long long)regs->esp,
keir@16499 985 (unsigned long long)vmr(GUEST_RIP),
keir@16499 986 (unsigned long long)regs->eip);
keir@16499 987 printk("RFLAGS=0x%016llx (0x%016llx) DR7 = 0x%016llx\n",
keir@15953 988 (unsigned long long)vmr(GUEST_RFLAGS),
keir@16499 989 (unsigned long long)regs->eflags,
keir@15953 990 (unsigned long long)vmr(GUEST_DR7));
kfraser@15935 991 printk("Sysenter RSP=%016llx CS:RIP=%04x:%016llx\n",
kfraser@15935 992 (unsigned long long)vmr(GUEST_SYSENTER_ESP),
kfraser@15935 993 (int)vmr(GUEST_SYSENTER_CS),
kfraser@15935 994 (unsigned long long)vmr(GUEST_SYSENTER_EIP));
keir@18901 995 vmx_dump_sel("CS", GUEST_CS_SELECTOR);
keir@18901 996 vmx_dump_sel("DS", GUEST_DS_SELECTOR);
keir@18901 997 vmx_dump_sel("SS", GUEST_SS_SELECTOR);
keir@18901 998 vmx_dump_sel("ES", GUEST_ES_SELECTOR);
keir@18901 999 vmx_dump_sel("FS", GUEST_FS_SELECTOR);
keir@18901 1000 vmx_dump_sel("GS", GUEST_GS_SELECTOR);
keir@18901 1001 vmx_dump_sel2("GDTR", GUEST_GDTR_LIMIT);
keir@18901 1002 vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR);
keir@18901 1003 vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT);
keir@18901 1004 vmx_dump_sel("TR", GUEST_TR_SELECTOR);
keir@19073 1005 printk("Guest PAT = 0x%08x%08x\n",
keir@19073 1006 (uint32_t)vmr(GUEST_PAT_HIGH), (uint32_t)vmr(GUEST_PAT));
kfraser@15935 1007 x = (unsigned long long)vmr(TSC_OFFSET_HIGH) << 32;
kfraser@15935 1008 x |= (uint32_t)vmr(TSC_OFFSET);
kfraser@15935 1009 printk("TSC Offset = %016llx\n", x);
keir@17244 1010 x = (unsigned long long)vmr(GUEST_IA32_DEBUGCTL_HIGH) << 32;
kfraser@15935 1011 x |= (uint32_t)vmr(GUEST_IA32_DEBUGCTL);
kfraser@15935 1012 printk("DebugCtl=%016llx DebugExceptions=%016llx\n", x,
kfraser@15935 1013 (unsigned long long)vmr(GUEST_PENDING_DBG_EXCEPTIONS));
kfraser@15935 1014 printk("Interruptibility=%04x ActivityState=%04x\n",
kfraser@15935 1015 (int)vmr(GUEST_INTERRUPTIBILITY_INFO),
kfraser@15935 1016 (int)vmr(GUEST_ACTIVITY_STATE));
kfraser@15935 1017
kfraser@15935 1018 printk("*** Host State ***\n");
kfraser@15935 1019 printk("RSP = 0x%016llx RIP = 0x%016llx\n",
kfraser@15935 1020 (unsigned long long)vmr(HOST_RSP),
kfraser@15935 1021 (unsigned long long)vmr(HOST_RIP));
kfraser@15935 1022 printk("CS=%04x DS=%04x ES=%04x FS=%04x GS=%04x SS=%04x TR=%04x\n",
kfraser@15935 1023 (uint16_t)vmr(HOST_CS_SELECTOR),
kfraser@15935 1024 (uint16_t)vmr(HOST_DS_SELECTOR),
kfraser@15935 1025 (uint16_t)vmr(HOST_ES_SELECTOR),
kfraser@15935 1026 (uint16_t)vmr(HOST_FS_SELECTOR),
kfraser@15935 1027 (uint16_t)vmr(HOST_GS_SELECTOR),
kfraser@15935 1028 (uint16_t)vmr(HOST_SS_SELECTOR),
kfraser@15935 1029 (uint16_t)vmr(HOST_TR_SELECTOR));
kfraser@15935 1030 printk("FSBase=%016llx GSBase=%016llx TRBase=%016llx\n",
kfraser@15935 1031 (unsigned long long)vmr(HOST_FS_BASE),
kfraser@15935 1032 (unsigned long long)vmr(HOST_GS_BASE),
kfraser@15935 1033 (unsigned long long)vmr(HOST_TR_BASE));
kfraser@15935 1034 printk("GDTBase=%016llx IDTBase=%016llx\n",
kfraser@15935 1035 (unsigned long long)vmr(HOST_GDTR_BASE),
kfraser@15935 1036 (unsigned long long)vmr(HOST_IDTR_BASE));
kfraser@15935 1037 printk("CR0=%016llx CR3=%016llx CR4=%016llx\n",
kfraser@15935 1038 (unsigned long long)vmr(HOST_CR0),
kfraser@15935 1039 (unsigned long long)vmr(HOST_CR3),
kfraser@15935 1040 (unsigned long long)vmr(HOST_CR4));
kfraser@15935 1041 printk("Sysenter RSP=%016llx CS:RIP=%04x:%016llx\n",
keir@16375 1042 (unsigned long long)vmr(HOST_SYSENTER_ESP),
keir@16375 1043 (int)vmr(HOST_SYSENTER_CS),
keir@16375 1044 (unsigned long long)vmr(HOST_SYSENTER_EIP));
keir@19073 1045 printk("Host PAT = 0x%08x%08x\n",
keir@19073 1046 (uint32_t)vmr(HOST_PAT_HIGH), (uint32_t)vmr(HOST_PAT));
kfraser@15935 1047
kfraser@15935 1048 printk("*** Control State ***\n");
kfraser@15935 1049 printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
kfraser@15935 1050 (uint32_t)vmr(PIN_BASED_VM_EXEC_CONTROL),
kfraser@15935 1051 (uint32_t)vmr(CPU_BASED_VM_EXEC_CONTROL),
kfraser@15935 1052 (uint32_t)vmr(SECONDARY_VM_EXEC_CONTROL));
kfraser@15935 1053 printk("EntryControls=%08x ExitControls=%08x\n",
kfraser@15935 1054 (uint32_t)vmr(VM_ENTRY_CONTROLS),
kfraser@15935 1055 (uint32_t)vmr(VM_EXIT_CONTROLS));
kfraser@15935 1056 printk("ExceptionBitmap=%08x\n",
kfraser@15935 1057 (uint32_t)vmr(EXCEPTION_BITMAP));
kfraser@15935 1058 printk("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
kfraser@15935 1059 (uint32_t)vmr(VM_ENTRY_INTR_INFO),
kfraser@15935 1060 (uint32_t)vmr(VM_ENTRY_EXCEPTION_ERROR_CODE),
kfraser@15935 1061 (uint32_t)vmr(VM_ENTRY_INSTRUCTION_LEN));
kfraser@15935 1062 printk("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
kfraser@15935 1063 (uint32_t)vmr(VM_EXIT_INTR_INFO),
kfraser@15935 1064 (uint32_t)vmr(VM_EXIT_INTR_ERROR_CODE),
kfraser@15935 1065 (uint32_t)vmr(VM_ENTRY_INSTRUCTION_LEN));
kfraser@15935 1066 printk(" reason=%08x qualification=%08x\n",
kfraser@15935 1067 (uint32_t)vmr(VM_EXIT_REASON),
kfraser@15935 1068 (uint32_t)vmr(EXIT_QUALIFICATION));
kfraser@15935 1069 printk("IDTVectoring: info=%08x errcode=%08x\n",
kfraser@15935 1070 (uint32_t)vmr(IDT_VECTORING_INFO),
kfraser@15935 1071 (uint32_t)vmr(IDT_VECTORING_ERROR_CODE));
kfraser@15935 1072 printk("TPR Threshold = 0x%02x\n",
kfraser@15935 1073 (uint32_t)vmr(TPR_THRESHOLD));
keir@17406 1074 printk("EPT pointer = 0x%08x%08x\n",
keir@17406 1075 (uint32_t)vmr(EPT_POINTER_HIGH), (uint32_t)vmr(EPT_POINTER));
keir@17416 1076 printk("Virtual processor ID = 0x%04x\n",
keir@17416 1077 (uint32_t)vmr(VIRTUAL_PROCESSOR_ID));
keir@16499 1078
keir@16499 1079 vmx_vmcs_exit(v);
kaf24@10079 1080 }
kaf24@10079 1081
kaf24@10079 1082 static void vmcs_dump(unsigned char ch)
kaf24@10079 1083 {
kaf24@10079 1084 struct domain *d;
kaf24@10079 1085 struct vcpu *v;
kaf24@10079 1086
kaf24@10079 1087 printk("*********** VMCS Areas **************\n");
kfraser@14058 1088
kfraser@14058 1089 rcu_read_lock(&domlist_read_lock);
kfraser@14058 1090
kfraser@12210 1091 for_each_domain ( d )
kfraser@12210 1092 {
kfraser@12210 1093 if ( !is_hvm_domain(d) )
kfraser@12210 1094 continue;
kaf24@10079 1095 printk("\n>>> Domain %d <<<\n", d->domain_id);
kfraser@12210 1096 for_each_vcpu ( d, v )
kfraser@12210 1097 {
kaf24@10079 1098 printk("\tVCPU %d\n", v->vcpu_id);
keir@16499 1099 vmcs_dump_vcpu(v);
kaf24@10079 1100 }
kaf24@10079 1101 }
kaf24@10079 1102
kfraser@14058 1103 rcu_read_unlock(&domlist_read_lock);
kfraser@14058 1104
kaf24@10079 1105 printk("**************************************\n");
kaf24@10079 1106 }
kaf24@10079 1107
shand@11153 1108 void setup_vmcs_dump(void)
kaf24@10079 1109 {
kaf24@10079 1110 register_keyhandler('v', vmcs_dump, "dump Intel's VMCS");
kaf24@10079 1111 }
kaf24@10079 1112
kaf24@10079 1113
kaf24@8708 1114 /*
kaf24@8708 1115 * Local variables:
kaf24@8708 1116 * mode: C
kaf24@8708 1117 * c-set-style: "BSD"
kaf24@8708 1118 * c-basic-offset: 4
kaf24@8708 1119 * tab-width: 4
kaf24@8708 1120 * indent-tabs-mode: nil
kaf24@8708 1121 * End:
kaf24@8708 1122 */