ia64/xen-unstable

view xen/arch/x86/vmx_vmcs.c @ 5836:07884fe7c5ce

Save per cpu IDT information into the VMCS host area.

Without this, we may be saving the IDT info from the wrong cpu into the
vmcs.
This makes domU work in the presence of VMX domains on SMP/HT enabled
machines.

Signed-off-by: Chengyuan Li <chengyuan.li@intel.com>
Signed-off-by: Xiaofeng Ling <xiaofeng.ling@intel.com>=20
Signed-off-by: Arun Sharma <arun.sharma@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Jul 21 13:59:49 2005 +0000 (2005-07-21)
parents a83ac0806d6b
children 6e11af443eb1
line source
1 /*
2 * vmx_vmcs.c: VMCS management
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/mm.h>
23 #include <xen/lib.h>
24 #include <xen/errno.h>
25 #include <xen/domain_page.h>
26 #include <asm/current.h>
27 #include <asm/cpufeature.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/vmx.h>
31 #include <asm/flushtlb.h>
32 #include <xen/event.h>
33 #include <xen/kernel.h>
34 #include <public/io/ioreq.h>
35 #if CONFIG_PAGING_LEVELS >= 4
36 #include <asm/shadow_64.h>
37 #endif
38 #ifdef CONFIG_VMX
40 struct vmcs_struct *alloc_vmcs(void)
41 {
42 struct vmcs_struct *vmcs;
43 u32 vmx_msr_low, vmx_msr_high;
45 rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high);
46 vmcs_size = vmx_msr_high & 0x1fff;
47 vmcs = alloc_xenheap_pages(get_order(vmcs_size));
48 memset((char *)vmcs, 0, vmcs_size); /* don't remove this */
50 vmcs->vmcs_revision_id = vmx_msr_low;
51 return vmcs;
52 }
54 void free_vmcs(struct vmcs_struct *vmcs)
55 {
56 int order;
58 order = get_order(vmcs_size);
59 free_xenheap_pages(vmcs, order);
60 }
62 static inline int construct_vmcs_controls(void)
63 {
64 int error = 0;
66 error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
67 MONITOR_PIN_BASED_EXEC_CONTROLS);
69 error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
70 MONITOR_CPU_BASED_EXEC_CONTROLS);
72 error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
74 error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
76 return error;
77 }
79 #define GUEST_SEGMENT_LIMIT 0xffffffff
80 #define HOST_SEGMENT_LIMIT 0xffffffff
82 struct host_execution_env {
83 /* selectors */
84 unsigned short ldtr_selector;
85 unsigned short tr_selector;
86 unsigned short ds_selector;
87 unsigned short cs_selector;
88 /* limits */
89 unsigned short gdtr_limit;
90 unsigned short ldtr_limit;
91 unsigned short idtr_limit;
92 unsigned short tr_limit;
93 /* base */
94 unsigned long gdtr_base;
95 unsigned long ldtr_base;
96 unsigned long idtr_base;
97 unsigned long tr_base;
98 unsigned long ds_base;
99 unsigned long cs_base;
100 #ifdef __x86_64__
101 unsigned long fs_base;
102 unsigned long gs_base;
103 #endif
105 /* control registers */
106 unsigned long cr3;
107 unsigned long cr0;
108 unsigned long cr4;
109 unsigned long dr7;
110 };
112 #define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
114 int vmx_setup_platform(struct vcpu *d, struct cpu_user_regs *regs)
115 {
116 int i;
117 unsigned int n;
118 unsigned long *p, mpfn, offset, addr;
119 struct e820entry *e820p;
120 unsigned long gpfn = 0;
122 local_flush_tlb_pge();
123 regs->ebx = 0; /* Linux expects ebx to be 0 for boot proc */
125 n = regs->ecx;
126 if (n > 32) {
127 VMX_DBG_LOG(DBG_LEVEL_1, "Too many e820 entries: %d", n);
128 return -1;
129 }
131 addr = regs->edi;
132 offset = (addr & ~PAGE_MASK);
133 addr = round_pgdown(addr);
135 mpfn = phys_to_machine_mapping(addr >> PAGE_SHIFT);
136 p = map_domain_page(mpfn);
138 e820p = (struct e820entry *) ((unsigned long) p + offset);
140 #ifndef NDEBUG
141 print_e820_memory_map(e820p, n);
142 #endif
144 for ( i = 0; i < n; i++ )
145 {
146 if ( e820p[i].type == E820_SHARED_PAGE )
147 {
148 gpfn = (e820p[i].addr >> PAGE_SHIFT);
149 break;
150 }
151 }
153 if ( gpfn == 0 )
154 {
155 unmap_domain_page(p);
156 return -1;
157 }
159 unmap_domain_page(p);
161 /* Initialise shared page */
162 mpfn = phys_to_machine_mapping(gpfn);
163 p = map_domain_page(mpfn);
164 d->domain->arch.vmx_platform.shared_page_va = (unsigned long)p;
166 VMX_DBG_LOG(DBG_LEVEL_1, "eport: %x\n", iopacket_port(d->domain));
168 clear_bit(iopacket_port(d->domain),
169 &d->domain->shared_info->evtchn_mask[0]);
171 return 0;
172 }
174 void vmx_do_launch(struct vcpu *v)
175 {
176 /* Update CR3, GDT, LDT, TR */
177 unsigned int tr, cpu, error = 0;
178 struct host_execution_env host_env;
179 struct Xgt_desc_struct desc;
180 unsigned long pfn = 0;
181 struct pfn_info *page;
182 struct cpu_user_regs *regs = guest_cpu_user_regs();
184 vmx_stts();
186 cpu = smp_processor_id();
188 page = (struct pfn_info *) alloc_domheap_page(NULL);
189 pfn = (unsigned long) (page - frame_table);
191 vmx_setup_platform(v, regs);
193 __asm__ __volatile__ ("sidt (%0) \n" :: "a"(&desc) : "memory");
194 host_env.idtr_limit = desc.size;
195 host_env.idtr_base = desc.address;
196 error |= __vmwrite(HOST_IDTR_BASE, host_env.idtr_base);
198 __asm__ __volatile__ ("sgdt (%0) \n" :: "a"(&desc) : "memory");
199 host_env.gdtr_limit = desc.size;
200 host_env.gdtr_base = desc.address;
201 error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
203 error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
204 error |= __vmwrite(GUEST_LDTR_BASE, 0);
205 error |= __vmwrite(GUEST_LDTR_LIMIT, 0);
207 __asm__ __volatile__ ("str (%0) \n" :: "a"(&tr) : "memory");
208 host_env.tr_selector = tr;
209 host_env.tr_limit = sizeof(struct tss_struct);
210 host_env.tr_base = (unsigned long) &init_tss[cpu];
212 error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
213 error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
214 error |= __vmwrite(GUEST_TR_BASE, 0);
215 error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
217 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.guest_table));
218 __vmwrite(HOST_CR3, pagetable_get_paddr(v->arch.monitor_table));
219 __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
221 v->arch.schedule_tail = arch_vmx_do_resume;
222 }
224 /*
225 * Initially set the same environement as host.
226 */
227 static inline int
228 construct_init_vmcs_guest(struct cpu_user_regs *regs,
229 struct vcpu_guest_context *ctxt,
230 struct host_execution_env *host_env)
231 {
232 int error = 0;
233 union vmcs_arbytes arbytes;
234 unsigned long dr7;
235 unsigned long eflags, shadow_cr;
237 /* MSR */
238 error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
239 error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
241 error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
242 error |= __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
243 error |= __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
244 /* interrupt */
245 error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
246 /* mask */
247 error |= __vmwrite(CR0_GUEST_HOST_MASK, -1UL);
248 error |= __vmwrite(CR4_GUEST_HOST_MASK, -1UL);
250 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
251 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
253 /* TSC */
254 error |= __vmwrite(TSC_OFFSET, 0);
255 error |= __vmwrite(CR3_TARGET_COUNT, 0);
257 /* Guest Selectors */
258 error |= __vmwrite(GUEST_CS_SELECTOR, regs->cs);
259 error |= __vmwrite(GUEST_ES_SELECTOR, regs->es);
260 error |= __vmwrite(GUEST_SS_SELECTOR, regs->ss);
261 error |= __vmwrite(GUEST_DS_SELECTOR, regs->ds);
262 error |= __vmwrite(GUEST_FS_SELECTOR, regs->fs);
263 error |= __vmwrite(GUEST_GS_SELECTOR, regs->gs);
265 /* Guest segment Limits */
266 error |= __vmwrite(GUEST_CS_LIMIT, GUEST_SEGMENT_LIMIT);
267 error |= __vmwrite(GUEST_ES_LIMIT, GUEST_SEGMENT_LIMIT);
268 error |= __vmwrite(GUEST_SS_LIMIT, GUEST_SEGMENT_LIMIT);
269 error |= __vmwrite(GUEST_DS_LIMIT, GUEST_SEGMENT_LIMIT);
270 error |= __vmwrite(GUEST_FS_LIMIT, GUEST_SEGMENT_LIMIT);
271 error |= __vmwrite(GUEST_GS_LIMIT, GUEST_SEGMENT_LIMIT);
273 error |= __vmwrite(GUEST_IDTR_LIMIT, host_env->idtr_limit);
275 /* AR bytes */
276 arbytes.bytes = 0;
277 arbytes.fields.seg_type = 0x3; /* type = 3 */
278 arbytes.fields.s = 1; /* code or data, i.e. not system */
279 arbytes.fields.dpl = 0; /* DPL = 3 */
280 arbytes.fields.p = 1; /* segment present */
281 arbytes.fields.default_ops_size = 1; /* 32-bit */
282 arbytes.fields.g = 1;
283 arbytes.fields.null_bit = 0; /* not null */
285 error |= __vmwrite(GUEST_ES_AR_BYTES, arbytes.bytes);
286 error |= __vmwrite(GUEST_SS_AR_BYTES, arbytes.bytes);
287 error |= __vmwrite(GUEST_DS_AR_BYTES, arbytes.bytes);
288 error |= __vmwrite(GUEST_FS_AR_BYTES, arbytes.bytes);
289 error |= __vmwrite(GUEST_GS_AR_BYTES, arbytes.bytes);
291 arbytes.fields.seg_type = 0xb; /* type = 0xb */
292 error |= __vmwrite(GUEST_CS_AR_BYTES, arbytes.bytes);
294 error |= __vmwrite(GUEST_GDTR_BASE, regs->edx);
295 regs->edx = 0;
296 error |= __vmwrite(GUEST_GDTR_LIMIT, regs->eax);
297 regs->eax = 0;
299 arbytes.fields.s = 0; /* not code or data segement */
300 arbytes.fields.seg_type = 0x2; /* LTD */
301 arbytes.fields.default_ops_size = 0; /* 16-bit */
302 arbytes.fields.g = 0;
303 error |= __vmwrite(GUEST_LDTR_AR_BYTES, arbytes.bytes);
305 arbytes.fields.seg_type = 0xb; /* 32-bit TSS (busy) */
306 error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
308 error |= __vmwrite(GUEST_CR0, host_env->cr0); /* same CR0 */
310 /* Initally PG, PE are not set*/
311 shadow_cr = host_env->cr0;
312 shadow_cr &= ~X86_CR0_PG;
313 error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
314 /* CR3 is set in vmx_final_setup_guest */
315 #ifdef __x86_64__
316 error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PSE);
317 #else
318 error |= __vmwrite(GUEST_CR4, host_env->cr4);
319 #endif
320 shadow_cr = host_env->cr4;
322 #ifdef __x86_64__
323 shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
324 #else
325 shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE);
326 #endif
327 error |= __vmwrite(CR4_READ_SHADOW, shadow_cr);
329 error |= __vmwrite(GUEST_ES_BASE, host_env->ds_base);
330 error |= __vmwrite(GUEST_CS_BASE, host_env->cs_base);
331 error |= __vmwrite(GUEST_SS_BASE, host_env->ds_base);
332 error |= __vmwrite(GUEST_DS_BASE, host_env->ds_base);
333 error |= __vmwrite(GUEST_FS_BASE, host_env->ds_base);
334 error |= __vmwrite(GUEST_GS_BASE, host_env->ds_base);
335 error |= __vmwrite(GUEST_IDTR_BASE, host_env->idtr_base);
337 error |= __vmwrite(GUEST_RSP, regs->esp);
338 error |= __vmwrite(GUEST_RIP, regs->eip);
340 eflags = regs->eflags & ~VMCS_EFLAGS_RESERVED_0; /* clear 0s */
341 eflags |= VMCS_EFLAGS_RESERVED_1; /* set 1s */
343 error |= __vmwrite(GUEST_RFLAGS, eflags);
345 error |= __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
346 __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
347 error |= __vmwrite(GUEST_DR7, dr7);
348 error |= __vmwrite(VMCS_LINK_POINTER, 0xffffffff);
349 error |= __vmwrite(VMCS_LINK_POINTER_HIGH, 0xffffffff);
351 return error;
352 }
354 static inline int construct_vmcs_host(struct host_execution_env *host_env)
355 {
356 int error = 0;
357 unsigned long crn;
359 /* Host Selectors */
360 host_env->ds_selector = __HYPERVISOR_DS;
361 error |= __vmwrite(HOST_ES_SELECTOR, host_env->ds_selector);
362 error |= __vmwrite(HOST_SS_SELECTOR, host_env->ds_selector);
363 error |= __vmwrite(HOST_DS_SELECTOR, host_env->ds_selector);
364 #if defined (__i386__)
365 error |= __vmwrite(HOST_FS_SELECTOR, host_env->ds_selector);
366 error |= __vmwrite(HOST_GS_SELECTOR, host_env->ds_selector);
367 error |= __vmwrite(HOST_FS_BASE, host_env->ds_base);
368 error |= __vmwrite(HOST_GS_BASE, host_env->ds_base);
370 #else
371 rdmsrl(MSR_FS_BASE, host_env->fs_base);
372 rdmsrl(MSR_GS_BASE, host_env->gs_base);
373 error |= __vmwrite(HOST_FS_BASE, host_env->fs_base);
374 error |= __vmwrite(HOST_GS_BASE, host_env->gs_base);
376 #endif
377 host_env->cs_selector = __HYPERVISOR_CS;
378 error |= __vmwrite(HOST_CS_SELECTOR, host_env->cs_selector);
380 host_env->ds_base = 0;
381 host_env->cs_base = 0;
383 __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) : );
384 host_env->cr0 = crn;
385 error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
387 /* CR3 is set in vmx_final_setup_hostos */
388 __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) : );
389 host_env->cr4 = crn;
390 error |= __vmwrite(HOST_CR4, crn);
392 error |= __vmwrite(HOST_RIP, (unsigned long) vmx_asm_vmexit_handler);
393 #ifdef __x86_64__
394 /* TBD: support cr8 for 64-bit guest */
395 __vmwrite(VIRTUAL_APIC_PAGE_ADDR, 0);
396 __vmwrite(TPR_THRESHOLD, 0);
397 __vmwrite(SECONDARY_VM_EXEC_CONTROL, 0);
398 #endif
400 return error;
401 }
403 /*
404 * Need to extend to support full virtualization.
405 * The variable use_host_env indicates if the new VMCS needs to use
406 * the same setups as the host has (xenolinux).
407 */
409 int construct_vmcs(struct arch_vmx_struct *arch_vmx,
410 struct cpu_user_regs *regs,
411 struct vcpu_guest_context *ctxt,
412 int use_host_env)
413 {
414 int error;
415 u64 vmcs_phys_ptr;
417 struct host_execution_env host_env;
419 if (use_host_env != VMCS_USE_HOST_ENV)
420 return -EINVAL;
422 memset(&host_env, 0, sizeof(struct host_execution_env));
424 vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
426 if ((error = __vmpclear (vmcs_phys_ptr))) {
427 printk("construct_vmcs: VMCLEAR failed\n");
428 return -EINVAL;
429 }
430 if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
431 printk("construct_vmcs: load_vmcs failed: VMCS = %lx\n",
432 (unsigned long) vmcs_phys_ptr);
433 return -EINVAL;
434 }
435 if ((error = construct_vmcs_controls())) {
436 printk("construct_vmcs: construct_vmcs_controls failed\n");
437 return -EINVAL;
438 }
439 /* host selectors */
440 if ((error = construct_vmcs_host(&host_env))) {
441 printk("construct_vmcs: construct_vmcs_host failed\n");
442 return -EINVAL;
443 }
444 /* guest selectors */
445 if ((error = construct_init_vmcs_guest(regs, ctxt, &host_env))) {
446 printk("construct_vmcs: construct_vmcs_guest failed\n");
447 return -EINVAL;
448 }
450 if ((error |= __vmwrite(EXCEPTION_BITMAP,
451 MONITOR_DEFAULT_EXCEPTION_BITMAP))) {
452 printk("construct_vmcs: setting Exception bitmap failed\n");
453 return -EINVAL;
454 }
456 return 0;
457 }
459 int load_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
460 {
461 int error;
463 if ((error = __vmptrld(phys_ptr))) {
464 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
465 return error;
466 }
467 set_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
468 return 0;
469 }
471 int store_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
472 {
473 /* take the current VMCS */
474 __vmptrst(phys_ptr);
475 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
476 return 0;
477 }
479 void vm_launch_fail(unsigned long eflags)
480 {
481 __vmx_bug(guest_cpu_user_regs());
482 }
484 void vm_resume_fail(unsigned long eflags)
485 {
486 __vmx_bug(guest_cpu_user_regs());
487 }
489 #endif /* CONFIG_VMX */
491 /*
492 * Local variables:
493 * mode: C
494 * c-set-style: "BSD"
495 * c-basic-offset: 4
496 * tab-width: 4
497 * indent-tabs-mode: nil
498 * End:
499 */