ia64/xen-unstable

view xen/arch/x86/vmx_vmcs.c @ 3858:5b63436f25fe

bitkeeper revision 1.1205.1.2 (421527deX3t0INFwjrOweq0E7Le7pw)

Rename fields in arch_exec_domain to be more uniform.
Promote vmx_shadow_invlpg() to shadow_invlpg().
author maf46@burn.cl.cam.ac.uk
date Thu Feb 17 23:25:18 2005 +0000 (2005-02-17)
parents 0fe3bb5ed3aa
children ad1d06d64313
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /*
3 * vmx_vmcs.c: VMCS management
4 * Copyright (c) 2004, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/mm.h>
24 #include <xen/lib.h>
25 #include <xen/errno.h>
27 #include <asm/cpufeature.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/vmx.h>
31 #include <xen/event.h>
32 #include <xen/kernel.h>
33 #include <public/io/ioreq.h>
34 #include <asm/domain_page.h>
36 #ifdef CONFIG_VMX
38 struct vmcs_struct *alloc_vmcs(void)
39 {
40 struct vmcs_struct *vmcs;
41 unsigned int cpu_sig = cpuid_eax(0x00000001);
43 vmcs = (struct vmcs_struct *) alloc_xenheap_pages(get_order(vmcs_size));
44 memset((char *) vmcs, 0, vmcs_size); /* don't remove this */
46 vmcs->vmcs_revision_id = (cpu_sig > 0xf41)? 3 : 1;
47 return vmcs;
48 }
50 void free_vmcs(struct vmcs_struct *vmcs)
51 {
52 int order;
54 order = (vmcs_size >> PAGE_SHIFT) - 1;
55 free_xenheap_pages((unsigned long) vmcs, order);
56 }
58 static inline int construct_vmcs_controls(void)
59 {
60 int error = 0;
62 error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
63 MONITOR_PIN_BASED_EXEC_CONTROLS);
65 error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
66 MONITOR_CPU_BASED_EXEC_CONTROLS);
68 error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
69 error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
71 return error;
72 }
74 #define GUEST_SEGMENT_LIMIT 0xffffffff
75 #define HOST_SEGMENT_LIMIT 0xffffffff
77 struct host_execution_env {
78 /* selectors */
79 unsigned short ldtr_selector;
80 unsigned short tr_selector;
81 unsigned short ds_selector;
82 unsigned short cs_selector;
83 /* limits */
84 unsigned short gdtr_limit;
85 unsigned short ldtr_limit;
86 unsigned short idtr_limit;
87 unsigned short tr_limit;
88 /* base */
89 unsigned long gdtr_base;
90 unsigned long ldtr_base;
91 unsigned long idtr_base;
92 unsigned long tr_base;
93 unsigned long ds_base;
94 unsigned long cs_base;
95 /* control registers */
96 unsigned long cr3;
97 unsigned long cr0;
98 unsigned long cr4;
99 unsigned long dr7;
100 };
102 #define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
104 int vmx_setup_platform(struct exec_domain *d, execution_context_t *context)
105 {
106 int i;
107 unsigned int n;
108 unsigned long *p, mpfn, offset, addr;
109 struct e820entry *e820p;
110 unsigned long gpfn = 0;
112 context->ebx = 0; /* Linux expects ebx to be 0 for boot proc */
114 n = context->ecx;
115 if (n > 32) {
116 VMX_DBG_LOG(DBG_LEVEL_1, "Too many e820 entries: %d", n);
117 return -1;
118 }
120 addr = context->edi;
121 offset = (addr & ~PAGE_MASK);
122 addr = round_pgdown(addr);
123 mpfn = phys_to_machine_mapping(addr >> PAGE_SHIFT);
124 p = map_domain_mem(mpfn << PAGE_SHIFT);
126 e820p = (struct e820entry *) ((unsigned long) p + offset);
128 for (i = 0; i < n; i++) {
129 if (e820p[i].type == E820_SHARED_PAGE) {
130 gpfn = (e820p[i].addr >> PAGE_SHIFT);
131 break;
132 }
133 }
135 if (gpfn == 0) {
136 printk("No shared Page ?\n");
137 unmap_domain_mem(p);
138 return -1;
139 }
140 unmap_domain_mem(p);
142 mpfn = phys_to_machine_mapping(gpfn);
143 p = map_domain_mem(mpfn << PAGE_SHIFT);
144 ASSERT(p != NULL);
146 /* Initialise shared page */
147 memset(p, 0, PAGE_SIZE);
149 d->arch.arch_vmx.vmx_platform.shared_page_va = (unsigned long) p;
151 return 0;
152 }
154 void vmx_do_launch(struct exec_domain *ed)
155 {
156 /* Update CR3, GDT, LDT, TR */
157 unsigned int tr, cpu, error = 0;
158 struct host_execution_env host_env;
159 struct Xgt_desc_struct desc;
160 struct list_head *list_ent;
161 l2_pgentry_t *mpl2e, *hl2_vtable;
162 unsigned long i, pfn = 0;
163 struct pfn_info *page;
164 execution_context_t *ec = get_execution_context();
165 struct domain *d = ed->domain;
167 cpu = smp_processor_id();
168 d->arch.min_pfn = d->arch.max_pfn = 0;
170 spin_lock(&d->page_alloc_lock);
171 list_ent = d->page_list.next;
173 mpl2e = (l2_pgentry_t *)map_domain_mem(pagetable_val(ed->arch.monitor_table));
175 for ( i = 0; list_ent != &d->page_list; i++ )
176 {
177 pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
178 d->arch.min_pfn = min(d->arch.min_pfn, pfn);
179 d->arch.max_pfn = max(d->arch.max_pfn, pfn);
180 list_ent = frame_table[pfn].list.next;
181 }
183 spin_unlock(&d->page_alloc_lock);
185 page = (struct pfn_info *) alloc_domheap_page(NULL);
186 pfn = (unsigned long) (page - frame_table);
188 /*
189 * make linear_pt_table work for guest ptes
190 */
191 mpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
192 mk_l2_pgentry((pfn << PAGE_SHIFT)| __PAGE_HYPERVISOR);
194 hl2_vtable = map_domain_mem(pfn << PAGE_SHIFT);
195 memset(hl2_vtable, 0, PAGE_SIZE); /* clean it up */
196 ed->arch.hl2_vtable = hl2_vtable;
198 unmap_domain_mem(mpl2e);
200 vmx_setup_platform(ed, ec);
202 __asm__ __volatile__ ("sgdt (%%eax) \n" :: "a"(&desc) : "memory");
203 host_env.gdtr_limit = desc.size;
204 host_env.gdtr_base = desc.address;
206 error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
208 error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
209 error |= __vmwrite(GUEST_LDTR_BASE, 0);
210 error |= __vmwrite(GUEST_LDTR_LIMIT, 0);
212 __asm__ __volatile__ ("str (%%eax) \n" :: "a"(&tr) : "memory");
213 host_env.tr_selector = tr;
214 host_env.tr_limit = sizeof(struct tss_struct);
215 host_env.tr_base = (unsigned long) &init_tss[cpu];
217 error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
218 error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
219 error |= __vmwrite(GUEST_TR_BASE, 0);
220 error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
222 __vmwrite(GUEST_CR3, pagetable_val(ed->arch.guest_table));
223 __vmwrite(HOST_CR3, pagetable_val(ed->arch.monitor_table));
224 __vmwrite(HOST_ESP, (unsigned long)get_stack_bottom());
226 ed->arch.schedule_tail = arch_vmx_do_resume;
227 }
229 /*
230 * Initially set the same environement as host.
231 */
232 static inline int
233 construct_init_vmcs_guest(execution_context_t *context,
234 full_execution_context_t *full_context,
235 struct host_execution_env *host_env)
236 {
237 int error = 0;
238 union vmcs_arbytes arbytes;
239 unsigned long dr7;
240 unsigned long eflags, shadow_cr;
242 /* MSR */
243 error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
244 error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
246 error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
247 error |= __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
248 error |= __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
249 /* interrupt */
250 error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
251 /* mask */
252 error |= __vmwrite(CR0_GUEST_HOST_MASK, 0xffffffff);
253 error |= __vmwrite(CR4_GUEST_HOST_MASK, 0xffffffff);
255 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
256 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
258 /* TSC */
259 error |= __vmwrite(TSC_OFFSET, 0);
260 error |= __vmwrite(CR3_TARGET_COUNT, 0);
262 /* Guest Selectors */
263 error |= __vmwrite(GUEST_CS_SELECTOR, context->cs);
264 error |= __vmwrite(GUEST_ES_SELECTOR, context->es);
265 error |= __vmwrite(GUEST_SS_SELECTOR, context->ss);
266 error |= __vmwrite(GUEST_DS_SELECTOR, context->ds);
267 error |= __vmwrite(GUEST_FS_SELECTOR, context->fs);
268 error |= __vmwrite(GUEST_GS_SELECTOR, context->gs);
270 /* Guest segment Limits */
271 error |= __vmwrite(GUEST_CS_LIMIT, GUEST_SEGMENT_LIMIT);
272 error |= __vmwrite(GUEST_ES_LIMIT, GUEST_SEGMENT_LIMIT);
273 error |= __vmwrite(GUEST_SS_LIMIT, GUEST_SEGMENT_LIMIT);
274 error |= __vmwrite(GUEST_DS_LIMIT, GUEST_SEGMENT_LIMIT);
275 error |= __vmwrite(GUEST_FS_LIMIT, GUEST_SEGMENT_LIMIT);
276 error |= __vmwrite(GUEST_GS_LIMIT, GUEST_SEGMENT_LIMIT);
278 error |= __vmwrite(GUEST_IDTR_LIMIT, host_env->idtr_limit);
280 /* AR bytes */
281 arbytes.bytes = 0;
282 arbytes.fields.seg_type = 0x3; /* type = 3 */
283 arbytes.fields.s = 1; /* code or data, i.e. not system */
284 arbytes.fields.dpl = 0; /* DPL = 3 */
285 arbytes.fields.p = 1; /* segment present */
286 arbytes.fields.default_ops_size = 1; /* 32-bit */
287 arbytes.fields.g = 1;
288 arbytes.fields.null_bit = 0; /* not null */
290 error |= __vmwrite(GUEST_ES_AR_BYTES, arbytes.bytes);
291 error |= __vmwrite(GUEST_SS_AR_BYTES, arbytes.bytes);
292 error |= __vmwrite(GUEST_DS_AR_BYTES, arbytes.bytes);
293 error |= __vmwrite(GUEST_FS_AR_BYTES, arbytes.bytes);
294 error |= __vmwrite(GUEST_GS_AR_BYTES, arbytes.bytes);
296 arbytes.fields.seg_type = 0xb; /* type = 0xb */
297 error |= __vmwrite(GUEST_CS_AR_BYTES, arbytes.bytes);
299 error |= __vmwrite(GUEST_GDTR_BASE, context->edx);
300 context->edx = 0;
301 error |= __vmwrite(GUEST_GDTR_LIMIT, context->eax);
302 context->eax = 0;
304 arbytes.fields.s = 0; /* not code or data segement */
305 arbytes.fields.seg_type = 0x2; /* LTD */
306 arbytes.fields.default_ops_size = 0; /* 16-bit */
307 arbytes.fields.g = 0;
308 error |= __vmwrite(GUEST_LDTR_AR_BYTES, arbytes.bytes);
310 arbytes.fields.seg_type = 0xb; /* 32-bit TSS (busy) */
311 error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
313 error |= __vmwrite(GUEST_CR0, host_env->cr0); /* same CR0 */
315 /* Initally PG, PE are not set*/
316 shadow_cr = host_env->cr0;
317 shadow_cr &= ~(X86_CR0_PE | X86_CR0_PG);
318 error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
319 /* CR3 is set in vmx_final_setup_guest */
320 error |= __vmwrite(GUEST_CR4, host_env->cr4);
321 shadow_cr = host_env->cr4;
322 shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE);
323 error |= __vmwrite(CR4_READ_SHADOW, shadow_cr);
325 error |= __vmwrite(GUEST_ES_BASE, host_env->ds_base);
326 error |= __vmwrite(GUEST_CS_BASE, host_env->cs_base);
327 error |= __vmwrite(GUEST_SS_BASE, host_env->ds_base);
328 error |= __vmwrite(GUEST_DS_BASE, host_env->ds_base);
329 error |= __vmwrite(GUEST_FS_BASE, host_env->ds_base);
330 error |= __vmwrite(GUEST_GS_BASE, host_env->ds_base);
331 error |= __vmwrite(GUEST_IDTR_BASE, host_env->idtr_base);
333 error |= __vmwrite(GUEST_ESP, context->esp);
334 error |= __vmwrite(GUEST_EIP, context->eip);
336 eflags = context->eflags & ~VMCS_EFLAGS_RESERVED_0; /* clear 0s */
337 eflags |= VMCS_EFLAGS_RESERVED_1; /* set 1s */
339 error |= __vmwrite(GUEST_EFLAGS, eflags);
341 error |= __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
342 __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
343 error |= __vmwrite(GUEST_DR7, dr7);
344 error |= __vmwrite(GUEST_VMCS0, 0xffffffff);
345 error |= __vmwrite(GUEST_VMCS1, 0xffffffff);
347 return error;
348 }
350 static inline int construct_vmcs_host(struct host_execution_env *host_env)
351 {
352 int error = 0;
353 unsigned long crn;
354 struct Xgt_desc_struct desc;
356 /* Host Selectors */
357 host_env->ds_selector = __HYPERVISOR_DS;
358 error |= __vmwrite(HOST_ES_SELECTOR, host_env->ds_selector);
359 error |= __vmwrite(HOST_SS_SELECTOR, host_env->ds_selector);
360 error |= __vmwrite(HOST_DS_SELECTOR, host_env->ds_selector);
361 error |= __vmwrite(HOST_FS_SELECTOR, host_env->ds_selector);
362 error |= __vmwrite(HOST_GS_SELECTOR, host_env->ds_selector);
364 host_env->cs_selector = __HYPERVISOR_CS;
365 error |= __vmwrite(HOST_CS_SELECTOR, host_env->cs_selector);
367 host_env->ds_base = 0;
368 host_env->cs_base = 0;
369 error |= __vmwrite(HOST_FS_BASE, host_env->ds_base);
370 error |= __vmwrite(HOST_GS_BASE, host_env->ds_base);
372 /* Debug */
373 __asm__ __volatile__ ("sidt (%%eax) \n" :: "a"(&desc) : "memory");
374 host_env->idtr_limit = desc.size;
375 host_env->idtr_base = desc.address;
376 error |= __vmwrite(HOST_IDTR_BASE, host_env->idtr_base);
378 __asm__ __volatile__ ("movl %%cr0,%0" : "=r" (crn) : );
379 host_env->cr0 = crn;
380 error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
382 /* CR3 is set in vmx_final_setup_hostos */
383 __asm__ __volatile__ ("movl %%cr4,%0" : "=r" (crn) : );
384 host_env->cr4 = crn;
385 error |= __vmwrite(HOST_CR4, crn);
386 error |= __vmwrite(HOST_EIP, (unsigned long) vmx_asm_vmexit_handler);
388 return error;
389 }
391 /*
392 * Need to extend to support full virtualization.
393 * The variable use_host_env indicates if the new VMCS needs to use
394 * the same setups as the host has (xenolinux).
395 */
397 int construct_vmcs(struct arch_vmx_struct *arch_vmx,
398 execution_context_t *context,
399 full_execution_context_t *full_context,
400 int use_host_env)
401 {
402 int error;
403 u64 vmcs_phys_ptr;
405 struct host_execution_env host_env;
407 if (use_host_env != VMCS_USE_HOST_ENV)
408 return -EINVAL;
410 memset(&host_env, 0, sizeof(struct host_execution_env));
412 vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
414 if ((error = __vmpclear (vmcs_phys_ptr))) {
415 printk("construct_vmcs: VMCLEAR failed\n");
416 return -EINVAL;
417 }
418 if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
419 printk("construct_vmcs: load_vmcs failed: VMCS = %lx\n",
420 (unsigned long) vmcs_phys_ptr);
421 return -EINVAL;
422 }
423 if ((error = construct_vmcs_controls())) {
424 printk("construct_vmcs: construct_vmcs_controls failed\n");
425 return -EINVAL;
426 }
427 /* host selectors */
428 if ((error = construct_vmcs_host(&host_env))) {
429 printk("construct_vmcs: construct_vmcs_host failed\n");
430 return -EINVAL;
431 }
432 /* guest selectors */
433 if ((error = construct_init_vmcs_guest(context, full_context, &host_env))) {
434 printk("construct_vmcs: construct_vmcs_guest failed\n");
435 return -EINVAL;
436 }
438 if ((error |= __vmwrite(EXCEPTION_BITMAP,
439 MONITOR_DEFAULT_EXCEPTION_BITMAP))) {
440 printk("construct_vmcs: setting Exception bitmap failed\n");
441 return -EINVAL;
442 }
444 return 0;
445 }
447 int load_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
448 {
449 int error;
451 if ((error = __vmptrld(phys_ptr))) {
452 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
453 return error;
454 }
455 set_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
456 return 0;
457 }
459 int store_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
460 {
461 /* take the current VMCS */
462 __vmptrst(phys_ptr);
463 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
464 return 0;
465 }
467 void vm_launch_fail(unsigned long eflags)
468 {
469 BUG();
470 }
472 void vm_resume_fail(unsigned long eflags)
473 {
474 BUG();
475 }
477 #endif /* CONFIG_VMX */