direct-io.hg

view xen/arch/x86/vmx_vmcs.c @ 5669:ff5d7ccd8d69

No changes from me.
author cl349@firebug.cl.cam.ac.uk
date Tue Jul 05 08:47:55 2005 +0000 (2005-07-05)
parents 8bd2e8933277
children f261f14b9781 a83ac0806d6b
line source
1 /*
2 * vmx_vmcs.c: VMCS management
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/mm.h>
23 #include <xen/lib.h>
24 #include <xen/errno.h>
25 #include <xen/domain_page.h>
26 #include <asm/current.h>
27 #include <asm/cpufeature.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/vmx.h>
31 #include <xen/event.h>
32 #include <xen/kernel.h>
33 #include <public/io/ioreq.h>
35 #ifdef CONFIG_VMX
37 struct vmcs_struct *alloc_vmcs(void)
38 {
39 struct vmcs_struct *vmcs;
40 u32 vmx_msr_low, vmx_msr_high;
42 rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high);
43 vmcs_size = vmx_msr_high & 0x1fff;
44 vmcs = alloc_xenheap_pages(get_order(vmcs_size));
45 memset((char *)vmcs, 0, vmcs_size); /* don't remove this */
47 vmcs->vmcs_revision_id = vmx_msr_low;
48 return vmcs;
49 }
51 void free_vmcs(struct vmcs_struct *vmcs)
52 {
53 int order;
55 order = (vmcs_size >> PAGE_SHIFT) - 1;
56 free_xenheap_pages(vmcs, order);
57 }
59 static inline int construct_vmcs_controls(void)
60 {
61 int error = 0;
63 error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
64 MONITOR_PIN_BASED_EXEC_CONTROLS);
66 error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
67 MONITOR_CPU_BASED_EXEC_CONTROLS);
68 #if defined (__x86_64__)
69 error |= __vmwrite(VM_EXIT_CONTROLS,
70 MONITOR_VM_EXIT_CONTROLS | VM_EXIT_CONTROLS_IA_32E_MODE);
71 #else
72 error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
73 #endif
74 error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
76 return error;
77 }
79 #define GUEST_SEGMENT_LIMIT 0xffffffff
80 #define HOST_SEGMENT_LIMIT 0xffffffff
82 struct host_execution_env {
83 /* selectors */
84 unsigned short ldtr_selector;
85 unsigned short tr_selector;
86 unsigned short ds_selector;
87 unsigned short cs_selector;
88 /* limits */
89 unsigned short gdtr_limit;
90 unsigned short ldtr_limit;
91 unsigned short idtr_limit;
92 unsigned short tr_limit;
93 /* base */
94 unsigned long gdtr_base;
95 unsigned long ldtr_base;
96 unsigned long idtr_base;
97 unsigned long tr_base;
98 unsigned long ds_base;
99 unsigned long cs_base;
100 #ifdef __x86_64__
101 unsigned long fs_base;
102 unsigned long gs_base;
103 #endif
105 /* control registers */
106 unsigned long cr3;
107 unsigned long cr0;
108 unsigned long cr4;
109 unsigned long dr7;
110 };
112 #define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
114 int vmx_setup_platform(struct vcpu *d, struct cpu_user_regs *regs)
115 {
116 int i;
117 unsigned int n;
118 unsigned long *p, mpfn, offset, addr;
119 struct e820entry *e820p;
120 unsigned long gpfn = 0;
122 regs->ebx = 0; /* Linux expects ebx to be 0 for boot proc */
124 n = regs->ecx;
125 if (n > 32) {
126 VMX_DBG_LOG(DBG_LEVEL_1, "Too many e820 entries: %d", n);
127 return -1;
128 }
130 addr = regs->edi;
131 offset = (addr & ~PAGE_MASK);
132 addr = round_pgdown(addr);
134 mpfn = phys_to_machine_mapping(addr >> PAGE_SHIFT);
135 p = map_domain_page(mpfn);
137 e820p = (struct e820entry *) ((unsigned long) p + offset);
139 #ifndef NDEBUG
140 print_e820_memory_map(e820p, n);
141 #endif
143 for ( i = 0; i < n; i++ )
144 {
145 if ( e820p[i].type == E820_SHARED_PAGE )
146 {
147 gpfn = (e820p[i].addr >> PAGE_SHIFT);
148 break;
149 }
150 }
152 if ( gpfn == 0 )
153 {
154 unmap_domain_page(p);
155 return -1;
156 }
158 unmap_domain_page(p);
160 /* Initialise shared page */
161 mpfn = phys_to_machine_mapping(gpfn);
162 p = map_domain_page(mpfn);
163 d->domain->arch.vmx_platform.shared_page_va = (unsigned long)p;
165 VMX_DBG_LOG(DBG_LEVEL_1, "eport: %x\n", iopacket_port(d->domain));
167 clear_bit(iopacket_port(d->domain),
168 &d->domain->shared_info->evtchn_mask[0]);
170 return 0;
171 }
173 void vmx_do_launch(struct vcpu *v)
174 {
175 /* Update CR3, GDT, LDT, TR */
176 unsigned int tr, cpu, error = 0;
177 struct host_execution_env host_env;
178 struct Xgt_desc_struct desc;
179 unsigned long pfn = 0;
180 struct pfn_info *page;
181 struct cpu_user_regs *regs = guest_cpu_user_regs();
183 vmx_stts();
185 cpu = smp_processor_id();
187 page = (struct pfn_info *) alloc_domheap_page(NULL);
188 pfn = (unsigned long) (page - frame_table);
190 vmx_setup_platform(v, regs);
192 __asm__ __volatile__ ("sgdt (%0) \n" :: "a"(&desc) : "memory");
193 host_env.gdtr_limit = desc.size;
194 host_env.gdtr_base = desc.address;
196 error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
198 error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
199 error |= __vmwrite(GUEST_LDTR_BASE, 0);
200 error |= __vmwrite(GUEST_LDTR_LIMIT, 0);
202 __asm__ __volatile__ ("str (%0) \n" :: "a"(&tr) : "memory");
203 host_env.tr_selector = tr;
204 host_env.tr_limit = sizeof(struct tss_struct);
205 host_env.tr_base = (unsigned long) &init_tss[cpu];
207 error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
208 error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
209 error |= __vmwrite(GUEST_TR_BASE, 0);
210 error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
212 __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.guest_table));
213 __vmwrite(HOST_CR3, pagetable_get_paddr(v->arch.monitor_table));
214 __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
216 v->arch.schedule_tail = arch_vmx_do_resume;
217 }
219 /*
220 * Initially set the same environement as host.
221 */
222 static inline int
223 construct_init_vmcs_guest(struct cpu_user_regs *regs,
224 struct vcpu_guest_context *ctxt,
225 struct host_execution_env *host_env)
226 {
227 int error = 0;
228 union vmcs_arbytes arbytes;
229 unsigned long dr7;
230 unsigned long eflags, shadow_cr;
232 /* MSR */
233 error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
234 error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
236 error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
237 error |= __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
238 error |= __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
239 /* interrupt */
240 error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
241 /* mask */
242 error |= __vmwrite(CR0_GUEST_HOST_MASK, -1UL);
243 error |= __vmwrite(CR4_GUEST_HOST_MASK, -1UL);
245 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
246 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
248 /* TSC */
249 error |= __vmwrite(TSC_OFFSET, 0);
250 error |= __vmwrite(CR3_TARGET_COUNT, 0);
252 /* Guest Selectors */
253 error |= __vmwrite(GUEST_CS_SELECTOR, regs->cs);
254 error |= __vmwrite(GUEST_ES_SELECTOR, regs->es);
255 error |= __vmwrite(GUEST_SS_SELECTOR, regs->ss);
256 error |= __vmwrite(GUEST_DS_SELECTOR, regs->ds);
257 error |= __vmwrite(GUEST_FS_SELECTOR, regs->fs);
258 error |= __vmwrite(GUEST_GS_SELECTOR, regs->gs);
260 /* Guest segment Limits */
261 error |= __vmwrite(GUEST_CS_LIMIT, GUEST_SEGMENT_LIMIT);
262 error |= __vmwrite(GUEST_ES_LIMIT, GUEST_SEGMENT_LIMIT);
263 error |= __vmwrite(GUEST_SS_LIMIT, GUEST_SEGMENT_LIMIT);
264 error |= __vmwrite(GUEST_DS_LIMIT, GUEST_SEGMENT_LIMIT);
265 error |= __vmwrite(GUEST_FS_LIMIT, GUEST_SEGMENT_LIMIT);
266 error |= __vmwrite(GUEST_GS_LIMIT, GUEST_SEGMENT_LIMIT);
268 error |= __vmwrite(GUEST_IDTR_LIMIT, host_env->idtr_limit);
270 /* AR bytes */
271 arbytes.bytes = 0;
272 arbytes.fields.seg_type = 0x3; /* type = 3 */
273 arbytes.fields.s = 1; /* code or data, i.e. not system */
274 arbytes.fields.dpl = 0; /* DPL = 3 */
275 arbytes.fields.p = 1; /* segment present */
276 arbytes.fields.default_ops_size = 1; /* 32-bit */
277 arbytes.fields.g = 1;
278 arbytes.fields.null_bit = 0; /* not null */
280 error |= __vmwrite(GUEST_ES_AR_BYTES, arbytes.bytes);
281 error |= __vmwrite(GUEST_SS_AR_BYTES, arbytes.bytes);
282 error |= __vmwrite(GUEST_DS_AR_BYTES, arbytes.bytes);
283 error |= __vmwrite(GUEST_FS_AR_BYTES, arbytes.bytes);
284 error |= __vmwrite(GUEST_GS_AR_BYTES, arbytes.bytes);
286 arbytes.fields.seg_type = 0xb; /* type = 0xb */
287 error |= __vmwrite(GUEST_CS_AR_BYTES, arbytes.bytes);
289 error |= __vmwrite(GUEST_GDTR_BASE, regs->edx);
290 regs->edx = 0;
291 error |= __vmwrite(GUEST_GDTR_LIMIT, regs->eax);
292 regs->eax = 0;
294 arbytes.fields.s = 0; /* not code or data segement */
295 arbytes.fields.seg_type = 0x2; /* LTD */
296 arbytes.fields.default_ops_size = 0; /* 16-bit */
297 arbytes.fields.g = 0;
298 error |= __vmwrite(GUEST_LDTR_AR_BYTES, arbytes.bytes);
300 arbytes.fields.seg_type = 0xb; /* 32-bit TSS (busy) */
301 error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
303 error |= __vmwrite(GUEST_CR0, host_env->cr0); /* same CR0 */
305 /* Initally PG, PE are not set*/
306 shadow_cr = host_env->cr0;
307 shadow_cr &= ~X86_CR0_PG;
308 error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
309 /* CR3 is set in vmx_final_setup_guest */
310 #ifdef __x86_64__
311 error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PAE);
312 printk("construct_init_vmcs_guest: guest CR4 is %lx\n", host_env->cr4 );
313 #else
314 error |= __vmwrite(GUEST_CR4, host_env->cr4);
315 #endif
316 shadow_cr = host_env->cr4;
318 #ifdef __x86_64__
319 shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
320 #else
321 shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE);
322 #endif
323 error |= __vmwrite(CR4_READ_SHADOW, shadow_cr);
325 error |= __vmwrite(GUEST_ES_BASE, host_env->ds_base);
326 error |= __vmwrite(GUEST_CS_BASE, host_env->cs_base);
327 error |= __vmwrite(GUEST_SS_BASE, host_env->ds_base);
328 error |= __vmwrite(GUEST_DS_BASE, host_env->ds_base);
329 error |= __vmwrite(GUEST_FS_BASE, host_env->ds_base);
330 error |= __vmwrite(GUEST_GS_BASE, host_env->ds_base);
331 error |= __vmwrite(GUEST_IDTR_BASE, host_env->idtr_base);
333 error |= __vmwrite(GUEST_RSP, regs->esp);
334 error |= __vmwrite(GUEST_RIP, regs->eip);
336 eflags = regs->eflags & ~VMCS_EFLAGS_RESERVED_0; /* clear 0s */
337 eflags |= VMCS_EFLAGS_RESERVED_1; /* set 1s */
339 error |= __vmwrite(GUEST_RFLAGS, eflags);
341 error |= __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
342 __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
343 error |= __vmwrite(GUEST_DR7, dr7);
344 error |= __vmwrite(VMCS_LINK_POINTER, 0xffffffff);
345 error |= __vmwrite(VMCS_LINK_POINTER_HIGH, 0xffffffff);
347 return error;
348 }
350 static inline int construct_vmcs_host(struct host_execution_env *host_env)
351 {
352 int error = 0;
353 unsigned long crn;
354 struct Xgt_desc_struct desc;
356 /* Host Selectors */
357 host_env->ds_selector = __HYPERVISOR_DS;
358 error |= __vmwrite(HOST_ES_SELECTOR, host_env->ds_selector);
359 error |= __vmwrite(HOST_SS_SELECTOR, host_env->ds_selector);
360 error |= __vmwrite(HOST_DS_SELECTOR, host_env->ds_selector);
361 #if defined (__i386__)
362 error |= __vmwrite(HOST_FS_SELECTOR, host_env->ds_selector);
363 error |= __vmwrite(HOST_GS_SELECTOR, host_env->ds_selector);
364 error |= __vmwrite(HOST_FS_BASE, host_env->ds_base);
365 error |= __vmwrite(HOST_GS_BASE, host_env->ds_base);
367 #else
368 rdmsrl(MSR_FS_BASE, host_env->fs_base);
369 rdmsrl(MSR_GS_BASE, host_env->gs_base);
370 error |= __vmwrite(HOST_FS_BASE, host_env->fs_base);
371 error |= __vmwrite(HOST_GS_BASE, host_env->gs_base);
373 #endif
374 host_env->cs_selector = __HYPERVISOR_CS;
375 error |= __vmwrite(HOST_CS_SELECTOR, host_env->cs_selector);
377 host_env->ds_base = 0;
378 host_env->cs_base = 0;
380 /* Debug */
381 __asm__ __volatile__ ("sidt (%0) \n" :: "a"(&desc) : "memory");
382 host_env->idtr_limit = desc.size;
383 host_env->idtr_base = desc.address;
384 error |= __vmwrite(HOST_IDTR_BASE, host_env->idtr_base);
386 __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) : );
388 host_env->cr0 = crn;
389 error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
391 /* CR3 is set in vmx_final_setup_hostos */
392 __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) : );
393 host_env->cr4 = crn;
394 error |= __vmwrite(HOST_CR4, crn);
395 error |= __vmwrite(HOST_RIP, (unsigned long) vmx_asm_vmexit_handler);
396 #ifdef __x86_64__
397 /* TBD: support cr8 for 64-bit guest */
398 __vmwrite(VIRTUAL_APIC_PAGE_ADDR, 0);
399 __vmwrite(TPR_THRESHOLD, 0);
400 __vmwrite(SECONDARY_VM_EXEC_CONTROL, 0);
401 #endif
403 return error;
404 }
406 /*
407 * Need to extend to support full virtualization.
408 * The variable use_host_env indicates if the new VMCS needs to use
409 * the same setups as the host has (xenolinux).
410 */
412 int construct_vmcs(struct arch_vmx_struct *arch_vmx,
413 struct cpu_user_regs *regs,
414 struct vcpu_guest_context *ctxt,
415 int use_host_env)
416 {
417 int error;
418 u64 vmcs_phys_ptr;
420 struct host_execution_env host_env;
422 if (use_host_env != VMCS_USE_HOST_ENV)
423 return -EINVAL;
425 memset(&host_env, 0, sizeof(struct host_execution_env));
427 vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
429 if ((error = __vmpclear (vmcs_phys_ptr))) {
430 printk("construct_vmcs: VMCLEAR failed\n");
431 return -EINVAL;
432 }
433 if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
434 printk("construct_vmcs: load_vmcs failed: VMCS = %lx\n",
435 (unsigned long) vmcs_phys_ptr);
436 return -EINVAL;
437 }
438 if ((error = construct_vmcs_controls())) {
439 printk("construct_vmcs: construct_vmcs_controls failed\n");
440 return -EINVAL;
441 }
442 /* host selectors */
443 if ((error = construct_vmcs_host(&host_env))) {
444 printk("construct_vmcs: construct_vmcs_host failed\n");
445 return -EINVAL;
446 }
447 /* guest selectors */
448 if ((error = construct_init_vmcs_guest(regs, ctxt, &host_env))) {
449 printk("construct_vmcs: construct_vmcs_guest failed\n");
450 return -EINVAL;
451 }
453 if ((error |= __vmwrite(EXCEPTION_BITMAP,
454 MONITOR_DEFAULT_EXCEPTION_BITMAP))) {
455 printk("construct_vmcs: setting Exception bitmap failed\n");
456 return -EINVAL;
457 }
459 return 0;
460 }
462 int load_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
463 {
464 int error;
466 if ((error = __vmptrld(phys_ptr))) {
467 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
468 return error;
469 }
470 set_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
471 return 0;
472 }
474 int store_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
475 {
476 /* take the current VMCS */
477 __vmptrst(phys_ptr);
478 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
479 return 0;
480 }
482 void vm_launch_fail(unsigned long eflags)
483 {
484 __vmx_bug(guest_cpu_user_regs());
485 }
487 void vm_resume_fail(unsigned long eflags)
488 {
489 __vmx_bug(guest_cpu_user_regs());
490 }
492 #endif /* CONFIG_VMX */
494 /*
495 * Local variables:
496 * mode: C
497 * c-set-style: "BSD"
498 * c-basic-offset: 4
499 * tab-width: 4
500 * indent-tabs-mode: nil
501 * End:
502 */