ia64/xen-unstable

view xen/arch/x86/vmx_vmcs.c @ 3708:9e80fc0dcac5

bitkeeper revision 1.1159.212.121 (42081031Gcfd1G5fgexBl7vd4XfmLQ)

Use 1:1 pagetables used for guest physical mode emulation for doing
phys_to_machine_mapping as well.

Signed-off-by: Arun Sharma <arun.sharma@intel.com>
Signed-off-by: ian@xensource.com
author iap10@labyrinth.cl.cam.ac.uk
date Tue Feb 08 01:04:49 2005 +0000 (2005-02-08)
parents ef5e5cd10778
children ea98f0bb6510
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /*
3 * vmx_vmcs.c: VMCS management
4 * Copyright (c) 2004, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/mm.h>
24 #include <xen/lib.h>
25 #include <xen/errno.h>
27 #include <asm/cpufeature.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/vmx.h>
31 #include <xen/event.h>
32 #include <xen/kernel.h>
33 #include <public/io/ioreq.h>
34 #include <asm/domain_page.h>
36 struct vmcs_struct *alloc_vmcs(void)
37 {
38 struct vmcs_struct *vmcs;
39 unsigned int cpu_sig = cpuid_eax(0x00000001);
41 vmcs = (struct vmcs_struct *) alloc_xenheap_pages(get_order(vmcs_size));
42 memset((char *) vmcs, 0, vmcs_size); /* don't remove this */
44 vmcs->vmcs_revision_id = (cpu_sig > 0xf41)? 3 : 1;
45 return vmcs;
46 }
48 void free_vmcs(struct vmcs_struct *vmcs)
49 {
50 int order;
52 order = (vmcs_size >> PAGE_SHIFT) - 1;
53 free_xenheap_pages((unsigned long) vmcs, order);
54 }
56 static inline int construct_vmcs_controls(void)
57 {
58 int error = 0;
60 error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
61 MONITOR_PIN_BASED_EXEC_CONTROLS);
63 error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
64 MONITOR_CPU_BASED_EXEC_CONTROLS);
66 error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
67 error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
69 return error;
70 }
72 #define GUEST_SEGMENT_LIMIT 0xffffffff
73 #define HOST_SEGMENT_LIMIT 0xffffffff
75 struct host_execution_env {
76 /* selectors */
77 unsigned short ldtr_selector;
78 unsigned short tr_selector;
79 unsigned short ds_selector;
80 unsigned short cs_selector;
81 /* limits */
82 unsigned short gdtr_limit;
83 unsigned short ldtr_limit;
84 unsigned short idtr_limit;
85 unsigned short tr_limit;
86 /* base */
87 unsigned long gdtr_base;
88 unsigned long ldtr_base;
89 unsigned long idtr_base;
90 unsigned long tr_base;
91 unsigned long ds_base;
92 unsigned long cs_base;
93 /* control registers */
94 unsigned long cr3;
95 unsigned long cr0;
96 unsigned long cr4;
97 unsigned long dr7;
98 };
100 #define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
102 int vmx_setup_platform(struct exec_domain *d, execution_context_t *context)
103 {
104 int i;
105 unsigned int n;
106 unsigned long *p, mpfn, offset, addr;
107 struct e820entry *e820p;
108 unsigned long gpfn = 0;
110 context->ebx = 0; /* Linux expects ebx to be 0 for boot proc */
112 n = context->ecx;
113 if (n > 32) {
114 VMX_DBG_LOG(DBG_LEVEL_1, "Too many e820 entries: %d\n", n);
115 return -1;
116 }
118 addr = context->edi;
119 offset = (addr & ~PAGE_MASK);
120 addr = round_pgdown(addr);
121 mpfn = phys_to_machine_mapping(addr >> PAGE_SHIFT);
122 p = map_domain_mem(mpfn << PAGE_SHIFT);
124 e820p = (struct e820entry *) ((unsigned long) p + offset);
126 for (i = 0; i < n; i++) {
127 if (e820p[i].type == E820_SHARED_PAGE) {
128 gpfn = (e820p[i].addr >> PAGE_SHIFT);
129 break;
130 }
131 }
133 if (gpfn == 0) {
134 printk("No shared Page ?\n");
135 unmap_domain_mem(p);
136 return -1;
137 }
138 unmap_domain_mem(p);
140 mpfn = phys_to_machine_mapping(gpfn);
141 p = map_domain_mem(mpfn << PAGE_SHIFT);
142 ASSERT(p != NULL);
143 d->arch.arch_vmx.vmx_platform.shared_page_va = (unsigned long) p;
145 return 0;
146 }
148 void vmx_do_launch(struct exec_domain *ed)
149 {
150 /* Update CR3, GDT, LDT, TR */
151 unsigned int tr, cpu, error = 0;
152 struct host_execution_env host_env;
153 struct Xgt_desc_struct desc;
154 struct list_head *list_ent;
155 l2_pgentry_t *mpl2e, *guest_pl2e_cache;
156 unsigned long i, pfn = 0;
157 struct pfn_info *page;
158 execution_context_t *ec = get_execution_context();
159 struct domain *d = ed->domain;
161 cpu = smp_processor_id();
162 d->arch.min_pfn = d->arch.max_pfn = 0;
164 spin_lock(&d->page_alloc_lock);
165 list_ent = d->page_list.next;
167 mpl2e = (l2_pgentry_t *)map_domain_mem(pagetable_val(ed->arch.monitor_table));
169 for ( i = 0; list_ent != &d->page_list; i++ )
170 {
171 pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
172 d->arch.min_pfn = min(d->arch.min_pfn, pfn);
173 d->arch.max_pfn = max(d->arch.max_pfn, pfn);
174 list_ent = frame_table[pfn].list.next;
175 }
177 spin_unlock(&d->page_alloc_lock);
179 page = (struct pfn_info *) alloc_domheap_page(NULL);
180 pfn = (unsigned long) (page - frame_table);
182 /*
183 * make linear_pt_table work for guest ptes
184 */
185 mpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
186 mk_l2_pgentry((pfn << PAGE_SHIFT)| __PAGE_HYPERVISOR);
188 guest_pl2e_cache = map_domain_mem(pfn << PAGE_SHIFT);
189 memset(guest_pl2e_cache, 0, PAGE_SIZE); /* clean it up */
190 ed->arch.guest_pl2e_cache = guest_pl2e_cache;
192 unmap_domain_mem(mpl2e);
194 vmx_setup_platform(ed, ec);
196 __asm__ __volatile__ ("sgdt (%%eax) \n" :: "a"(&desc) : "memory");
197 host_env.gdtr_limit = desc.size;
198 host_env.gdtr_base = desc.address;
200 error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
202 error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
203 error |= __vmwrite(GUEST_LDTR_BASE, 0);
204 error |= __vmwrite(GUEST_LDTR_LIMIT, 0);
206 __asm__ __volatile__ ("str (%%eax) \n" :: "a"(&tr) : "memory");
207 host_env.tr_selector = tr;
208 host_env.tr_limit = sizeof(struct tss_struct);
209 host_env.tr_base = (unsigned long) &init_tss[cpu];
211 error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
212 error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
213 error |= __vmwrite(GUEST_TR_BASE, 0);
214 error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
216 ed->arch.shadow_table = ed->arch.pagetable;
217 __vmwrite(GUEST_CR3, pagetable_val(ed->arch.pagetable));
218 __vmwrite(HOST_CR3, pagetable_val(ed->arch.monitor_table));
219 __vmwrite(HOST_ESP, (unsigned long) get_stack_top());
221 ed->arch.schedule_tail = arch_vmx_do_resume;
222 }
224 /*
225 * Initially set the same environement as host.
226 */
227 static inline int
228 construct_init_vmcs_guest(execution_context_t *context,
229 full_execution_context_t *full_context,
230 struct host_execution_env *host_env)
231 {
232 int error = 0;
233 union vmcs_arbytes arbytes;
234 unsigned long dr7;
235 unsigned long eflags, shadow_cr;
237 /* MSR */
238 error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
239 error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
241 error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
242 error |= __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
243 error |= __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
244 /* interrupt */
245 error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
246 /* mask */
247 error |= __vmwrite(CR0_GUEST_HOST_MASK, 0xffffffff);
248 error |= __vmwrite(CR4_GUEST_HOST_MASK, 0xffffffff);
250 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
251 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
253 /* TSC */
254 error |= __vmwrite(TSC_OFFSET, 0);
255 error |= __vmwrite(CR3_TARGET_COUNT, 0);
257 /* Guest Selectors */
258 error |= __vmwrite(GUEST_CS_SELECTOR, context->cs);
259 error |= __vmwrite(GUEST_ES_SELECTOR, context->es);
260 error |= __vmwrite(GUEST_SS_SELECTOR, context->ss);
261 error |= __vmwrite(GUEST_DS_SELECTOR, context->ds);
262 error |= __vmwrite(GUEST_FS_SELECTOR, context->fs);
263 error |= __vmwrite(GUEST_GS_SELECTOR, context->gs);
265 /* Guest segment Limits */
266 error |= __vmwrite(GUEST_CS_LIMIT, GUEST_SEGMENT_LIMIT);
267 error |= __vmwrite(GUEST_ES_LIMIT, GUEST_SEGMENT_LIMIT);
268 error |= __vmwrite(GUEST_SS_LIMIT, GUEST_SEGMENT_LIMIT);
269 error |= __vmwrite(GUEST_DS_LIMIT, GUEST_SEGMENT_LIMIT);
270 error |= __vmwrite(GUEST_FS_LIMIT, GUEST_SEGMENT_LIMIT);
271 error |= __vmwrite(GUEST_GS_LIMIT, GUEST_SEGMENT_LIMIT);
273 error |= __vmwrite(GUEST_IDTR_LIMIT, host_env->idtr_limit);
275 /* AR bytes */
276 arbytes.bytes = 0;
277 arbytes.fields.seg_type = 0x3; /* type = 3 */
278 arbytes.fields.s = 1; /* code or data, i.e. not system */
279 arbytes.fields.dpl = 0; /* DPL = 3 */
280 arbytes.fields.p = 1; /* segment present */
281 arbytes.fields.default_ops_size = 1; /* 32-bit */
282 arbytes.fields.g = 1;
283 arbytes.fields.null_bit = 0; /* not null */
285 error |= __vmwrite(GUEST_ES_AR_BYTES, arbytes.bytes);
286 error |= __vmwrite(GUEST_SS_AR_BYTES, arbytes.bytes);
287 error |= __vmwrite(GUEST_DS_AR_BYTES, arbytes.bytes);
288 error |= __vmwrite(GUEST_FS_AR_BYTES, arbytes.bytes);
289 error |= __vmwrite(GUEST_GS_AR_BYTES, arbytes.bytes);
291 arbytes.fields.seg_type = 0xb; /* type = 0xb */
292 error |= __vmwrite(GUEST_CS_AR_BYTES, arbytes.bytes);
294 error |= __vmwrite(GUEST_GDTR_BASE, context->edx);
295 context->edx = 0;
296 error |= __vmwrite(GUEST_GDTR_LIMIT, context->eax);
297 context->eax = 0;
299 arbytes.fields.s = 0; /* not code or data segement */
300 arbytes.fields.seg_type = 0x2; /* LTD */
301 arbytes.fields.default_ops_size = 0; /* 16-bit */
302 arbytes.fields.g = 0;
303 error |= __vmwrite(GUEST_LDTR_AR_BYTES, arbytes.bytes);
305 arbytes.fields.seg_type = 0xb; /* 32-bit TSS (busy) */
306 error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
308 error |= __vmwrite(GUEST_CR0, host_env->cr0); /* same CR0 */
310 /* Initally PG, PE are not set*/
311 shadow_cr = host_env->cr0;
312 shadow_cr &= ~(X86_CR0_PE | X86_CR0_PG);
313 error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
314 /* CR3 is set in vmx_final_setup_guestos */
315 error |= __vmwrite(GUEST_CR4, host_env->cr4);
316 shadow_cr = host_env->cr4;
317 shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE);
318 error |= __vmwrite(CR4_READ_SHADOW, shadow_cr);
320 error |= __vmwrite(GUEST_ES_BASE, host_env->ds_base);
321 error |= __vmwrite(GUEST_CS_BASE, host_env->cs_base);
322 error |= __vmwrite(GUEST_SS_BASE, host_env->ds_base);
323 error |= __vmwrite(GUEST_DS_BASE, host_env->ds_base);
324 error |= __vmwrite(GUEST_FS_BASE, host_env->ds_base);
325 error |= __vmwrite(GUEST_GS_BASE, host_env->ds_base);
326 error |= __vmwrite(GUEST_IDTR_BASE, host_env->idtr_base);
328 error |= __vmwrite(GUEST_ESP, context->esp);
329 error |= __vmwrite(GUEST_EIP, context->eip);
331 eflags = context->eflags & ~VMCS_EFLAGS_RESERVED_0; /* clear 0s */
332 eflags |= VMCS_EFLAGS_RESERVED_1; /* set 1s */
334 error |= __vmwrite(GUEST_EFLAGS, eflags);
336 error |= __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
337 __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
338 error |= __vmwrite(GUEST_DR7, dr7);
339 error |= __vmwrite(GUEST_VMCS0, 0xffffffff);
340 error |= __vmwrite(GUEST_VMCS1, 0xffffffff);
342 return error;
343 }
345 static inline int construct_vmcs_host(struct host_execution_env *host_env)
346 {
347 int error = 0;
348 unsigned long crn;
349 struct Xgt_desc_struct desc;
351 /* Host Selectors */
352 host_env->ds_selector = __HYPERVISOR_DS;
353 error |= __vmwrite(HOST_ES_SELECTOR, host_env->ds_selector);
354 error |= __vmwrite(HOST_SS_SELECTOR, host_env->ds_selector);
355 error |= __vmwrite(HOST_DS_SELECTOR, host_env->ds_selector);
356 error |= __vmwrite(HOST_FS_SELECTOR, host_env->ds_selector);
357 error |= __vmwrite(HOST_GS_SELECTOR, host_env->ds_selector);
359 host_env->cs_selector = __HYPERVISOR_CS;
360 error |= __vmwrite(HOST_CS_SELECTOR, host_env->cs_selector);
362 host_env->ds_base = 0;
363 host_env->cs_base = 0;
364 error |= __vmwrite(HOST_FS_BASE, host_env->ds_base);
365 error |= __vmwrite(HOST_GS_BASE, host_env->ds_base);
367 /* Debug */
368 __asm__ __volatile__ ("sidt (%%eax) \n" :: "a"(&desc) : "memory");
369 host_env->idtr_limit = desc.size;
370 host_env->idtr_base = desc.address;
371 error |= __vmwrite(HOST_IDTR_BASE, host_env->idtr_base);
373 __asm__ __volatile__ ("movl %%cr0,%0" : "=r" (crn) : );
374 host_env->cr0 = crn;
375 error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
377 /* CR3 is set in vmx_final_setup_hostos */
378 __asm__ __volatile__ ("movl %%cr4,%0" : "=r" (crn) : );
379 host_env->cr4 = crn;
380 error |= __vmwrite(HOST_CR4, crn);
381 error |= __vmwrite(HOST_EIP, (unsigned long) vmx_asm_vmexit_handler);
383 return error;
384 }
386 /*
387 * Need to extend to support full virtualization.
388 * The variable use_host_env indicates if the new VMCS needs to use
389 * the same setups as the host has (xenolinux).
390 */
392 int construct_vmcs(struct arch_vmx_struct *arch_vmx,
393 execution_context_t *context,
394 full_execution_context_t *full_context,
395 int use_host_env)
396 {
397 int error;
398 u64 vmcs_phys_ptr;
400 struct host_execution_env host_env;
402 if (use_host_env != VMCS_USE_HOST_ENV)
403 return -EINVAL;
405 memset(&host_env, 0, sizeof(struct host_execution_env));
407 vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
409 if ((error = __vmpclear (vmcs_phys_ptr))) {
410 printk("construct_vmcs: VMCLEAR failed\n");
411 return -EINVAL;
412 }
413 if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
414 printk("construct_vmcs: load_vmcs failed: VMCS = %lx\n",
415 (unsigned long) vmcs_phys_ptr);
416 return -EINVAL;
417 }
418 if ((error = construct_vmcs_controls())) {
419 printk("construct_vmcs: construct_vmcs_controls failed\n");
420 return -EINVAL;
421 }
422 /* host selectors */
423 if ((error = construct_vmcs_host(&host_env))) {
424 printk("construct_vmcs: construct_vmcs_host failed\n");
425 return -EINVAL;
426 }
427 /* guest selectors */
428 if ((error = construct_init_vmcs_guest(context, full_context, &host_env))) {
429 printk("construct_vmcs: construct_vmcs_guest failed\n");
430 return -EINVAL;
431 }
433 if ((error |= __vmwrite(EXCEPTION_BITMAP,
434 MONITOR_DEFAULT_EXCEPTION_BITMAP))) {
435 printk("construct_vmcs: setting Exception bitmap failed\n");
436 return -EINVAL;
437 }
439 return 0;
440 }
442 int load_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
443 {
444 int error;
446 if ((error = __vmptrld(phys_ptr))) {
447 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
448 return error;
449 }
450 set_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
451 return 0;
452 }
454 int store_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
455 {
456 /* take the current VMCS */
457 __vmptrst(phys_ptr);
458 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
459 return 0;
460 }
462 void vm_launch_fail(unsigned long eflags)
463 {
464 BUG();
465 }
467 void vm_resume_fail(unsigned long eflags)
468 {
469 BUG();
470 }