ia64/xen-unstable

view xen/arch/x86/vmx_vmcs.c @ 6114:d1034eae9708

Improved error reporting on vmlaunch/vmresume failure.

Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Signed-off-by: Arun Sharma <arun.sharma@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Aug 11 21:01:10 2005 +0000 (2005-08-11)
parents f294acb25858
children bf98996dded2
line source
1 /*
2 * vmx_vmcs.c: VMCS management
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/mm.h>
23 #include <xen/lib.h>
24 #include <xen/errno.h>
25 #include <xen/domain_page.h>
26 #include <asm/current.h>
27 #include <asm/cpufeature.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/vmx.h>
31 #include <asm/flushtlb.h>
32 #include <xen/event.h>
33 #include <xen/kernel.h>
34 #include <public/io/ioreq.h>
35 #if CONFIG_PAGING_LEVELS >= 4
36 #include <asm/shadow_64.h>
37 #endif
38 #ifdef CONFIG_VMX
40 struct vmcs_struct *alloc_vmcs(void)
41 {
42 struct vmcs_struct *vmcs;
43 u32 vmx_msr_low, vmx_msr_high;
45 rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high);
46 vmcs_size = vmx_msr_high & 0x1fff;
47 vmcs = alloc_xenheap_pages(get_order(vmcs_size));
48 memset((char *)vmcs, 0, vmcs_size); /* don't remove this */
50 vmcs->vmcs_revision_id = vmx_msr_low;
51 return vmcs;
52 }
54 void free_vmcs(struct vmcs_struct *vmcs)
55 {
56 int order;
58 order = get_order(vmcs_size);
59 free_xenheap_pages(vmcs, order);
60 }
62 static inline int construct_vmcs_controls(struct arch_vmx_struct *arch_vmx)
63 {
64 int error = 0;
65 void *io_bitmap_a;
66 void *io_bitmap_b;
68 error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
69 MONITOR_PIN_BASED_EXEC_CONTROLS);
71 error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
72 MONITOR_CPU_BASED_EXEC_CONTROLS);
74 error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
76 error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
78 /* need to use 0x1000 instead of PAGE_SIZE */
79 io_bitmap_a = (void*) alloc_xenheap_pages(get_order(0x1000));
80 io_bitmap_b = (void*) alloc_xenheap_pages(get_order(0x1000));
81 memset(io_bitmap_a, 0xff, 0x1000);
82 /* don't bother debug port access */
83 clear_bit(PC_DEBUG_PORT, io_bitmap_a);
84 memset(io_bitmap_b, 0xff, 0x1000);
86 error |= __vmwrite(IO_BITMAP_A, (u64) virt_to_phys(io_bitmap_a));
87 error |= __vmwrite(IO_BITMAP_B, (u64) virt_to_phys(io_bitmap_b));
89 arch_vmx->io_bitmap_a = io_bitmap_a;
90 arch_vmx->io_bitmap_b = io_bitmap_b;
92 return error;
93 }
95 #define GUEST_SEGMENT_LIMIT 0xffffffff
96 #define HOST_SEGMENT_LIMIT 0xffffffff
98 struct host_execution_env {
99 /* selectors */
100 unsigned short ldtr_selector;
101 unsigned short tr_selector;
102 unsigned short ds_selector;
103 unsigned short cs_selector;
104 /* limits */
105 unsigned short gdtr_limit;
106 unsigned short ldtr_limit;
107 unsigned short idtr_limit;
108 unsigned short tr_limit;
109 /* base */
110 unsigned long gdtr_base;
111 unsigned long ldtr_base;
112 unsigned long idtr_base;
113 unsigned long tr_base;
114 unsigned long ds_base;
115 unsigned long cs_base;
116 #ifdef __x86_64__
117 unsigned long fs_base;
118 unsigned long gs_base;
119 #endif
121 /* control registers */
122 unsigned long cr3;
123 unsigned long cr0;
124 unsigned long cr4;
125 unsigned long dr7;
126 };
128 #define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
130 int vmx_setup_platform(struct vcpu *d, struct cpu_user_regs *regs)
131 {
132 int i;
133 unsigned int n;
134 unsigned long *p, mpfn, offset, addr;
135 struct e820entry *e820p;
136 unsigned long gpfn = 0;
138 local_flush_tlb_pge();
139 regs->ebx = 0; /* Linux expects ebx to be 0 for boot proc */
141 n = regs->ecx;
142 if (n > 32) {
143 VMX_DBG_LOG(DBG_LEVEL_1, "Too many e820 entries: %d", n);
144 return -1;
145 }
147 addr = regs->edi;
148 offset = (addr & ~PAGE_MASK);
149 addr = round_pgdown(addr);
151 mpfn = phys_to_machine_mapping(addr >> PAGE_SHIFT);
152 p = map_domain_page(mpfn);
154 e820p = (struct e820entry *) ((unsigned long) p + offset);
156 #ifndef NDEBUG
157 print_e820_memory_map(e820p, n);
158 #endif
160 for ( i = 0; i < n; i++ )
161 {
162 if ( e820p[i].type == E820_SHARED_PAGE )
163 {
164 gpfn = (e820p[i].addr >> PAGE_SHIFT);
165 break;
166 }
167 }
169 if ( gpfn == 0 )
170 {
171 unmap_domain_page(p);
172 return -1;
173 }
175 unmap_domain_page(p);
177 /* Initialise shared page */
178 mpfn = phys_to_machine_mapping(gpfn);
179 p = map_domain_page(mpfn);
180 d->domain->arch.vmx_platform.shared_page_va = (unsigned long)p;
182 VMX_DBG_LOG(DBG_LEVEL_1, "eport: %x\n", iopacket_port(d->domain));
184 clear_bit(iopacket_port(d->domain),
185 &d->domain->shared_info->evtchn_mask[0]);
187 return 0;
188 }
190 void vmx_do_launch(struct vcpu *v)
191 {
192 /* Update CR3, GDT, LDT, TR */
193 unsigned int tr, cpu, error = 0;
194 struct host_execution_env host_env;
195 struct Xgt_desc_struct desc;
196 unsigned long pfn = 0;
197 struct pfn_info *page;
198 struct cpu_user_regs *regs = guest_cpu_user_regs();
200 vmx_stts();
202 cpu = smp_processor_id();
204 page = (struct pfn_info *) alloc_domheap_page(NULL);
205 pfn = (unsigned long) (page - frame_table);
207 vmx_setup_platform(v, regs);
209 __asm__ __volatile__ ("sidt (%0) \n" :: "a"(&desc) : "memory");
210 host_env.idtr_limit = desc.size;
211 host_env.idtr_base = desc.address;
212 error |= __vmwrite(HOST_IDTR_BASE, host_env.idtr_base);
214 __asm__ __volatile__ ("sgdt (%0) \n" :: "a"(&desc) : "memory");
215 host_env.gdtr_limit = desc.size;
216 host_env.gdtr_base = desc.address;
217 error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
219 error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
220 error |= __vmwrite(GUEST_LDTR_BASE, 0);
221 error |= __vmwrite(GUEST_LDTR_LIMIT, 0);
223 __asm__ __volatile__ ("str (%0) \n" :: "a"(&tr) : "memory");
224 host_env.tr_selector = tr;
225 host_env.tr_limit = sizeof(struct tss_struct);
226 host_env.tr_base = (unsigned long) &init_tss[cpu];
228 error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
229 error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
230 error |= __vmwrite(GUEST_TR_BASE, 0);
231 error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
233 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.guest_table));
234 __vmwrite(HOST_CR3, pagetable_get_paddr(v->arch.monitor_table));
235 __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
237 v->arch.schedule_tail = arch_vmx_do_resume;
238 }
240 /*
241 * Initially set the same environement as host.
242 */
243 static inline int
244 construct_init_vmcs_guest(struct cpu_user_regs *regs,
245 struct vcpu_guest_context *ctxt,
246 struct host_execution_env *host_env)
247 {
248 int error = 0;
249 union vmcs_arbytes arbytes;
250 unsigned long dr7;
251 unsigned long eflags, shadow_cr;
253 /* MSR */
254 error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
255 error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
257 error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
258 error |= __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
259 error |= __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
260 /* interrupt */
261 error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
262 /* mask */
263 error |= __vmwrite(CR0_GUEST_HOST_MASK, -1UL);
264 error |= __vmwrite(CR4_GUEST_HOST_MASK, -1UL);
266 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
267 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
269 /* TSC */
270 error |= __vmwrite(TSC_OFFSET, 0);
271 error |= __vmwrite(CR3_TARGET_COUNT, 0);
273 /* Guest Selectors */
274 error |= __vmwrite(GUEST_CS_SELECTOR, regs->cs);
275 error |= __vmwrite(GUEST_ES_SELECTOR, regs->es);
276 error |= __vmwrite(GUEST_SS_SELECTOR, regs->ss);
277 error |= __vmwrite(GUEST_DS_SELECTOR, regs->ds);
278 error |= __vmwrite(GUEST_FS_SELECTOR, regs->fs);
279 error |= __vmwrite(GUEST_GS_SELECTOR, regs->gs);
281 /* Guest segment Limits */
282 error |= __vmwrite(GUEST_CS_LIMIT, GUEST_SEGMENT_LIMIT);
283 error |= __vmwrite(GUEST_ES_LIMIT, GUEST_SEGMENT_LIMIT);
284 error |= __vmwrite(GUEST_SS_LIMIT, GUEST_SEGMENT_LIMIT);
285 error |= __vmwrite(GUEST_DS_LIMIT, GUEST_SEGMENT_LIMIT);
286 error |= __vmwrite(GUEST_FS_LIMIT, GUEST_SEGMENT_LIMIT);
287 error |= __vmwrite(GUEST_GS_LIMIT, GUEST_SEGMENT_LIMIT);
289 error |= __vmwrite(GUEST_IDTR_LIMIT, host_env->idtr_limit);
291 /* AR bytes */
292 arbytes.bytes = 0;
293 arbytes.fields.seg_type = 0x3; /* type = 3 */
294 arbytes.fields.s = 1; /* code or data, i.e. not system */
295 arbytes.fields.dpl = 0; /* DPL = 3 */
296 arbytes.fields.p = 1; /* segment present */
297 arbytes.fields.default_ops_size = 1; /* 32-bit */
298 arbytes.fields.g = 1;
299 arbytes.fields.null_bit = 0; /* not null */
301 error |= __vmwrite(GUEST_ES_AR_BYTES, arbytes.bytes);
302 error |= __vmwrite(GUEST_SS_AR_BYTES, arbytes.bytes);
303 error |= __vmwrite(GUEST_DS_AR_BYTES, arbytes.bytes);
304 error |= __vmwrite(GUEST_FS_AR_BYTES, arbytes.bytes);
305 error |= __vmwrite(GUEST_GS_AR_BYTES, arbytes.bytes);
307 arbytes.fields.seg_type = 0xb; /* type = 0xb */
308 error |= __vmwrite(GUEST_CS_AR_BYTES, arbytes.bytes);
310 error |= __vmwrite(GUEST_GDTR_BASE, regs->edx);
311 regs->edx = 0;
312 error |= __vmwrite(GUEST_GDTR_LIMIT, regs->eax);
313 regs->eax = 0;
315 arbytes.fields.s = 0; /* not code or data segement */
316 arbytes.fields.seg_type = 0x2; /* LTD */
317 arbytes.fields.default_ops_size = 0; /* 16-bit */
318 arbytes.fields.g = 0;
319 error |= __vmwrite(GUEST_LDTR_AR_BYTES, arbytes.bytes);
321 arbytes.fields.seg_type = 0xb; /* 32-bit TSS (busy) */
322 error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
324 error |= __vmwrite(GUEST_CR0, host_env->cr0); /* same CR0 */
326 /* Initally PG, PE are not set*/
327 shadow_cr = host_env->cr0;
328 shadow_cr &= ~X86_CR0_PG;
329 error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
330 /* CR3 is set in vmx_final_setup_guest */
331 #ifdef __x86_64__
332 error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PSE);
333 #else
334 error |= __vmwrite(GUEST_CR4, host_env->cr4);
335 #endif
336 shadow_cr = host_env->cr4;
338 #ifdef __x86_64__
339 shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
340 #else
341 shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE);
342 #endif
343 error |= __vmwrite(CR4_READ_SHADOW, shadow_cr);
345 error |= __vmwrite(GUEST_ES_BASE, host_env->ds_base);
346 error |= __vmwrite(GUEST_CS_BASE, host_env->cs_base);
347 error |= __vmwrite(GUEST_SS_BASE, host_env->ds_base);
348 error |= __vmwrite(GUEST_DS_BASE, host_env->ds_base);
349 error |= __vmwrite(GUEST_FS_BASE, host_env->ds_base);
350 error |= __vmwrite(GUEST_GS_BASE, host_env->ds_base);
351 error |= __vmwrite(GUEST_IDTR_BASE, host_env->idtr_base);
353 error |= __vmwrite(GUEST_RSP, regs->esp);
354 error |= __vmwrite(GUEST_RIP, regs->eip);
356 eflags = regs->eflags & ~VMCS_EFLAGS_RESERVED_0; /* clear 0s */
357 eflags |= VMCS_EFLAGS_RESERVED_1; /* set 1s */
359 error |= __vmwrite(GUEST_RFLAGS, eflags);
361 error |= __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
362 __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
363 error |= __vmwrite(GUEST_DR7, dr7);
364 error |= __vmwrite(VMCS_LINK_POINTER, 0xffffffff);
365 error |= __vmwrite(VMCS_LINK_POINTER_HIGH, 0xffffffff);
367 return error;
368 }
370 static inline int construct_vmcs_host(struct host_execution_env *host_env)
371 {
372 int error = 0;
373 unsigned long crn;
375 /* Host Selectors */
376 host_env->ds_selector = __HYPERVISOR_DS;
377 error |= __vmwrite(HOST_ES_SELECTOR, host_env->ds_selector);
378 error |= __vmwrite(HOST_SS_SELECTOR, host_env->ds_selector);
379 error |= __vmwrite(HOST_DS_SELECTOR, host_env->ds_selector);
380 #if defined (__i386__)
381 error |= __vmwrite(HOST_FS_SELECTOR, host_env->ds_selector);
382 error |= __vmwrite(HOST_GS_SELECTOR, host_env->ds_selector);
383 error |= __vmwrite(HOST_FS_BASE, host_env->ds_base);
384 error |= __vmwrite(HOST_GS_BASE, host_env->ds_base);
386 #else
387 rdmsrl(MSR_FS_BASE, host_env->fs_base);
388 rdmsrl(MSR_GS_BASE, host_env->gs_base);
389 error |= __vmwrite(HOST_FS_BASE, host_env->fs_base);
390 error |= __vmwrite(HOST_GS_BASE, host_env->gs_base);
392 #endif
393 host_env->cs_selector = __HYPERVISOR_CS;
394 error |= __vmwrite(HOST_CS_SELECTOR, host_env->cs_selector);
396 host_env->ds_base = 0;
397 host_env->cs_base = 0;
399 __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) : );
400 host_env->cr0 = crn;
401 error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
403 /* CR3 is set in vmx_final_setup_hostos */
404 __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) : );
405 host_env->cr4 = crn;
406 error |= __vmwrite(HOST_CR4, crn);
408 error |= __vmwrite(HOST_RIP, (unsigned long) vmx_asm_vmexit_handler);
409 #ifdef __x86_64__
410 /* TBD: support cr8 for 64-bit guest */
411 __vmwrite(VIRTUAL_APIC_PAGE_ADDR, 0);
412 __vmwrite(TPR_THRESHOLD, 0);
413 __vmwrite(SECONDARY_VM_EXEC_CONTROL, 0);
414 #endif
416 return error;
417 }
419 /*
420 * Need to extend to support full virtualization.
421 * The variable use_host_env indicates if the new VMCS needs to use
422 * the same setups as the host has (xenolinux).
423 */
425 int construct_vmcs(struct arch_vmx_struct *arch_vmx,
426 struct cpu_user_regs *regs,
427 struct vcpu_guest_context *ctxt,
428 int use_host_env)
429 {
430 int error;
431 u64 vmcs_phys_ptr;
433 struct host_execution_env host_env;
435 if (use_host_env != VMCS_USE_HOST_ENV)
436 return -EINVAL;
438 memset(&host_env, 0, sizeof(struct host_execution_env));
440 vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
442 if ((error = __vmpclear (vmcs_phys_ptr))) {
443 printk("construct_vmcs: VMCLEAR failed\n");
444 return -EINVAL;
445 }
446 if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
447 printk("construct_vmcs: load_vmcs failed: VMCS = %lx\n",
448 (unsigned long) vmcs_phys_ptr);
449 return -EINVAL;
450 }
451 if ((error = construct_vmcs_controls(arch_vmx))) {
452 printk("construct_vmcs: construct_vmcs_controls failed\n");
453 return -EINVAL;
454 }
455 /* host selectors */
456 if ((error = construct_vmcs_host(&host_env))) {
457 printk("construct_vmcs: construct_vmcs_host failed\n");
458 return -EINVAL;
459 }
460 /* guest selectors */
461 if ((error = construct_init_vmcs_guest(regs, ctxt, &host_env))) {
462 printk("construct_vmcs: construct_vmcs_guest failed\n");
463 return -EINVAL;
464 }
466 if ((error |= __vmwrite(EXCEPTION_BITMAP,
467 MONITOR_DEFAULT_EXCEPTION_BITMAP))) {
468 printk("construct_vmcs: setting Exception bitmap failed\n");
469 return -EINVAL;
470 }
472 if (regs->eflags & EF_TF)
473 __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
474 else
475 __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
477 return 0;
478 }
480 /*
481 * modify guest eflags and execption bitmap for gdb
482 */
483 int modify_vmcs(struct arch_vmx_struct *arch_vmx,
484 struct cpu_user_regs *regs)
485 {
486 int error;
487 u64 vmcs_phys_ptr, old, old_phys_ptr;
488 vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
490 old_phys_ptr = virt_to_phys(&old);
491 __vmptrst(old_phys_ptr);
492 if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
493 printk("modify_vmcs: load_vmcs failed: VMCS = %lx\n",
494 (unsigned long) vmcs_phys_ptr);
495 return -EINVAL;
496 }
497 load_cpu_user_regs(regs);
499 __vmptrld(old_phys_ptr);
501 return 0;
502 }
504 int load_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
505 {
506 int error;
508 if ((error = __vmptrld(phys_ptr))) {
509 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
510 return error;
511 }
512 set_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
513 return 0;
514 }
516 int store_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
517 {
518 /* take the current VMCS */
519 __vmptrst(phys_ptr);
520 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
521 return 0;
522 }
524 void vm_launch_fail(unsigned long eflags)
525 {
526 unsigned long error;
527 __vmread(VM_INSTRUCTION_ERROR, &error);
528 printk("<vm_launch_fail> error code %lx\n", error);
529 __vmx_bug(guest_cpu_user_regs());
530 }
532 void vm_resume_fail(unsigned long eflags)
533 {
534 unsigned long error;
535 __vmread(VM_INSTRUCTION_ERROR, &error);
536 printk("<vm_resume_fail> error code %lx\n", error);
537 __vmx_bug(guest_cpu_user_regs());
538 }
540 #endif /* CONFIG_VMX */
542 /*
543 * Local variables:
544 * mode: C
545 * c-set-style: "BSD"
546 * c-basic-offset: 4
547 * tab-width: 4
548 * indent-tabs-mode: nil
549 * End:
550 */