ia64/xen-unstable

view xen/arch/ia64/vmx/vmx_init.c @ 17208:82fa2e6cb592

[IA64] vmx_init_env must be called on every processor

Signed-off-by: Anthony Xu <anthony.xu@intel.com>
author Alex Williamson <alex.williamson@hp.com>
date Fri Mar 14 15:02:12 2008 -0600 (2008-03-14)
parents b45f3a99a4bb
children e768be7bf561
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /*
3 * vmx_init.c: initialization work for vt specific domain
4 * Copyright (c) 2005, Intel Corporation.
5 * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
6 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
7 * Fred Yang <fred.yang@intel.com>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
20 * Place - Suite 330, Boston, MA 02111-1307 USA.
21 *
22 */
24 /*
25 * 05/08/16 Kun tian (Kevin Tian) <kevin.tian@intel.com>:
26 * Disable doubling mapping
27 *
28 * 05/03/23 Kun Tian (Kevin Tian) <kevin.tian@intel.com>:
29 * Simplied design in first step:
30 * - One virtual environment
31 * - Domain is bound to one LP
32 * Later to support guest SMP:
33 * - Need interface to handle VP scheduled to different LP
34 */
35 #include <xen/config.h>
36 #include <xen/types.h>
37 #include <xen/sched.h>
38 #include <asm/pal.h>
39 #include <asm/page.h>
40 #include <asm/processor.h>
41 #include <asm/vmx_vcpu.h>
42 #include <xen/lib.h>
43 #include <asm/vmmu.h>
44 #include <public/xen.h>
45 #include <public/hvm/ioreq.h>
46 #include <public/event_channel.h>
47 #include <asm/vmx_phy_mode.h>
48 #include <asm/processor.h>
49 #include <asm/vmx.h>
50 #include <xen/mm.h>
51 #include <asm/viosapic.h>
52 #include <xen/event.h>
53 #include <asm/vlsapic.h>
54 #include <asm/vhpt.h>
55 #include <asm/vmx_pal_vsa.h>
56 #include "entry.h"
58 /* Global flag to identify whether Intel vmx feature is on */
59 u32 vmx_enabled = 0;
60 static u64 buffer_size;
61 static u64 vp_env_info;
62 static u64 vm_buffer = 0; /* Buffer required to bring up VMX feature */
63 u64 __vsa_base = 0; /* Run-time service base of VMX */
65 /* Check whether vt feature is enabled or not. */
66 void
67 identify_vmx_feature(void)
68 {
69 pal_status_t ret;
70 u64 avail = 1, status = 1, control = 1;
72 vmx_enabled = 0;
73 /* Check VT-i feature */
74 ret = ia64_pal_proc_get_features(&avail, &status, &control);
75 if (ret != PAL_STATUS_SUCCESS) {
76 printk("Get proc features failed.\n");
77 goto no_vti;
78 }
80 /* FIXME: do we need to check status field, to see whether
81 * PSR.vm is actually enabled? If yes, aonther call to
82 * ia64_pal_proc_set_features may be reuqired then.
83 */
84 printk("avail:0x%lx, status:0x%lx,control:0x%lx, vm?0x%lx\n",
85 avail, status, control, avail & PAL_PROC_VM_BIT);
86 if (!(avail & PAL_PROC_VM_BIT)) {
87 printk("No VT feature supported.\n");
88 goto no_vti;
89 }
91 ret = ia64_pal_vp_env_info(&buffer_size, &vp_env_info);
92 if (ret != PAL_STATUS_SUCCESS) {
93 printk("Get vp environment info failed.\n");
94 goto no_vti;
95 }
97 /* Does xen has ability to decode itself? */
98 if (!(vp_env_info & VP_OPCODE))
99 printk("WARNING: no opcode provided from hardware(%lx)!!!\n", vp_env_info);
100 printk("vm buffer size: %ld\n", buffer_size);
102 vmx_enabled = 1;
103 no_vti:
104 return;
105 }
107 /*
108 * ** This function must be called on every processor **
109 *
110 * Init virtual environment on current LP
111 * vsa_base is the indicator whether it's first LP to be initialized
112 * for current domain.
113 */
114 void*
115 vmx_init_env(void *start, unsigned long end_in_pa)
116 {
117 u64 status, tmp_base;
119 if (!vm_buffer) {
120 /* VM buffer must must be 4K aligned and
121 * must be pinned by both itr and dtr. */
122 #define VM_BUFFER_ALIGN (4 * 1024)
123 #define VM_BUFFER_ALIGN_UP(x) (((x) + (VM_BUFFER_ALIGN - 1)) & \
124 ~(VM_BUFFER_ALIGN - 1))
125 unsigned long s_vm_buffer =
126 VM_BUFFER_ALIGN_UP((unsigned long)start);
127 unsigned long e_vm_buffer = s_vm_buffer + buffer_size;
128 if (__pa(e_vm_buffer) < end_in_pa) {
129 init_xenheap_pages(__pa(start), __pa(s_vm_buffer));
130 start = (void*)e_vm_buffer;
131 vm_buffer = virt_to_xenva(s_vm_buffer);
132 printk("vm_buffer: 0x%lx\n", vm_buffer);
133 } else {
134 printk("Can't allocate vm_buffer "
135 "start 0x%p end_in_pa 0x%lx "
136 "buffer_size 0x%lx\n",
137 start, end_in_pa, buffer_size);
138 vmx_enabled = 0;
139 return start;
140 }
141 }
143 status=ia64_pal_vp_init_env(__vsa_base ? VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
144 __pa(vm_buffer),
145 vm_buffer,
146 &tmp_base);
148 if (status != PAL_STATUS_SUCCESS) {
149 printk("ia64_pal_vp_init_env failed.\n");
150 vmx_enabled = 0;
151 return start;
152 }
154 if (!__vsa_base)
155 __vsa_base = tmp_base;
156 else
157 ASSERT(tmp_base == __vsa_base);
159 return start;
160 }
162 typedef union {
163 u64 value;
164 struct {
165 u64 number : 8;
166 u64 revision : 8;
167 u64 model : 8;
168 u64 family : 8;
169 u64 archrev : 8;
170 u64 rv : 24;
171 };
172 } cpuid3_t;
174 /* Allocate vpd from domheap */
175 static vpd_t *alloc_vpd(void)
176 {
177 int i;
178 cpuid3_t cpuid3;
179 struct page_info *page;
180 vpd_t *vpd;
181 mapped_regs_t *mregs;
183 page = alloc_domheap_pages(NULL, get_order(VPD_SIZE), 0);
184 if (page == NULL) {
185 printk("VPD allocation failed.\n");
186 return NULL;
187 }
188 vpd = page_to_virt(page);
190 printk(XENLOG_DEBUG "vpd base: 0x%p, vpd size:%ld\n",
191 vpd, sizeof(vpd_t));
192 memset(vpd, 0, VPD_SIZE);
193 mregs = &vpd->vpd_low;
195 /* CPUID init */
196 for (i = 0; i < 5; i++)
197 mregs->vcpuid[i] = ia64_get_cpuid(i);
199 /* Limit the CPUID number to 5 */
200 cpuid3.value = mregs->vcpuid[3];
201 cpuid3.number = 4; /* 5 - 1 */
202 mregs->vcpuid[3] = cpuid3.value;
204 mregs->vac.a_from_int_cr = 1;
205 mregs->vac.a_to_int_cr = 1;
206 mregs->vac.a_from_psr = 1;
207 mregs->vac.a_from_cpuid = 1;
208 mregs->vac.a_cover = 1;
209 mregs->vac.a_bsw = 1;
210 mregs->vac.a_int = 1;
211 mregs->vdc.d_vmsw = 1;
213 return vpd;
214 }
216 /* Free vpd to domheap */
217 static void
218 free_vpd(struct vcpu *v)
219 {
220 if ( v->arch.privregs )
221 free_domheap_pages(virt_to_page(v->arch.privregs),
222 get_order(VPD_SIZE));
223 }
225 // This is used for PAL_VP_CREATE and PAL_VPS_SET_PENDING_INTERRUPT
226 // so that we don't have to pin the vpd down with itr[].
227 void
228 __vmx_vpd_pin(struct vcpu* v)
229 {
230 unsigned long privregs = (unsigned long)v->arch.privregs;
231 u64 psr;
233 privregs &= ~(IA64_GRANULE_SIZE - 1);
235 // check overlapping with current stack
236 if (privregs ==
237 ((unsigned long)current & ~(IA64_GRANULE_SIZE - 1)))
238 return;
240 if (!VMX_DOMAIN(current)) {
241 // check overlapping with vhpt
242 if (privregs ==
243 (vcpu_vhpt_maddr(current) & ~(IA64_GRANULE_SHIFT - 1)))
244 return;
245 } else {
246 // check overlapping with vhpt
247 if (privregs ==
248 ((unsigned long)current->arch.vhpt.hash &
249 ~(IA64_GRANULE_SHIFT - 1)))
250 return;
252 // check overlapping with privregs
253 if (privregs ==
254 ((unsigned long)current->arch.privregs &
255 ~(IA64_GRANULE_SHIFT - 1)))
256 return;
257 }
259 psr = ia64_clear_ic();
260 ia64_ptr(0x2 /*D*/, privregs, IA64_GRANULE_SIZE);
261 ia64_srlz_d();
262 ia64_itr(0x2 /*D*/, IA64_TR_MAPPED_REGS, privregs,
263 pte_val(pfn_pte(__pa(privregs) >> PAGE_SHIFT, PAGE_KERNEL)),
264 IA64_GRANULE_SHIFT);
265 ia64_set_psr(psr);
266 ia64_srlz_d();
267 }
269 void
270 __vmx_vpd_unpin(struct vcpu* v)
271 {
272 if (!VMX_DOMAIN(current)) {
273 int rc;
274 rc = !set_one_rr(VRN7 << VRN_SHIFT, VCPU(current, rrs[VRN7]));
275 BUG_ON(rc);
276 } else {
277 IA64FAULT fault;
278 fault = vmx_vcpu_set_rr(current, VRN7 << VRN_SHIFT,
279 VMX(current, vrr[VRN7]));
280 BUG_ON(fault != IA64_NO_FAULT);
281 }
282 }
284 /*
285 * Create a VP on intialized VMX environment.
286 */
287 static void
288 vmx_create_vp(struct vcpu *v)
289 {
290 u64 ret;
291 vpd_t *vpd = (vpd_t *)v->arch.privregs;
292 u64 ivt_base;
293 extern char vmx_ia64_ivt;
294 /* ia64_ivt is function pointer, so need this tranlation */
295 ivt_base = (u64) &vmx_ia64_ivt;
296 printk(XENLOG_DEBUG "ivt_base: 0x%lx\n", ivt_base);
298 vmx_vpd_pin(v);
299 ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)ivt_base, 0);
300 vmx_vpd_unpin(v);
302 if (ret != PAL_STATUS_SUCCESS){
303 panic_domain(vcpu_regs(v),"ia64_pal_vp_create failed. \n");
304 }
305 }
307 /* Other non-context related tasks can be done in context switch */
308 void
309 vmx_save_state(struct vcpu *v)
310 {
311 u64 status;
313 BUG_ON(v != current);
315 ia64_call_vsa(PAL_VPS_SAVE, v->arch.privregs, 0, 0, 0, 0, 0, 0);
317 /* Need to save KR when domain switch, though HV itself doesn;t
318 * use them.
319 */
320 v->arch.arch_vmx.vkr[0] = ia64_get_kr(0);
321 v->arch.arch_vmx.vkr[1] = ia64_get_kr(1);
322 v->arch.arch_vmx.vkr[2] = ia64_get_kr(2);
323 v->arch.arch_vmx.vkr[3] = ia64_get_kr(3);
324 v->arch.arch_vmx.vkr[4] = ia64_get_kr(4);
325 v->arch.arch_vmx.vkr[5] = ia64_get_kr(5);
326 v->arch.arch_vmx.vkr[6] = ia64_get_kr(6);
327 v->arch.arch_vmx.vkr[7] = ia64_get_kr(7);
328 }
330 /* Even guest is in physical mode, we still need such double mapping */
331 void
332 vmx_load_state(struct vcpu *v)
333 {
334 BUG_ON(v != current);
336 vmx_load_all_rr(v);
338 /* vmx_load_all_rr() pins down v->arch.privregs with both dtr/itr*/
339 ia64_call_vsa(PAL_VPS_RESTORE, v->arch.privregs, 0, 0, 0, 0, 0, 0);
341 ia64_set_kr(0, v->arch.arch_vmx.vkr[0]);
342 ia64_set_kr(1, v->arch.arch_vmx.vkr[1]);
343 ia64_set_kr(2, v->arch.arch_vmx.vkr[2]);
344 ia64_set_kr(3, v->arch.arch_vmx.vkr[3]);
345 ia64_set_kr(4, v->arch.arch_vmx.vkr[4]);
346 ia64_set_kr(5, v->arch.arch_vmx.vkr[5]);
347 ia64_set_kr(6, v->arch.arch_vmx.vkr[6]);
348 ia64_set_kr(7, v->arch.arch_vmx.vkr[7]);
349 /* Guest vTLB is not required to be switched explicitly, since
350 * anchored in vcpu */
352 migrate_timer(&v->arch.arch_vmx.vtm.vtm_timer, v->processor);
353 }
355 static int
356 vmx_vcpu_initialise(struct vcpu *v)
357 {
358 struct vmx_ioreq_page *iorp = &v->domain->arch.hvm_domain.ioreq;
360 int rc = alloc_unbound_xen_event_channel(v, 0);
361 if (rc < 0)
362 return rc;
363 v->arch.arch_vmx.xen_port = rc;
365 spin_lock(&iorp->lock);
366 if (v->domain->arch.vmx_platform.ioreq.va != 0) {
367 vcpu_iodata_t *p = get_vio(v);
368 p->vp_eport = v->arch.arch_vmx.xen_port;
369 }
370 spin_unlock(&iorp->lock);
372 gdprintk(XENLOG_INFO, "Allocated port %ld for hvm %d vcpu %d.\n",
373 v->arch.arch_vmx.xen_port, v->domain->domain_id, v->vcpu_id);
375 return 0;
376 }
378 static int vmx_create_event_channels(struct vcpu *v)
379 {
380 struct vcpu *o;
382 if (v->vcpu_id == 0) {
383 /* Ugly: create event channels for every vcpu when vcpu 0
384 starts, so that they're available for ioemu to bind to. */
385 for_each_vcpu(v->domain, o) {
386 int rc = vmx_vcpu_initialise(o);
387 if (rc < 0) //XXX error recovery
388 return rc;
389 }
390 }
392 return 0;
393 }
395 /*
396 * Event channel has destoryed in domain_kill(), so we needn't
397 * do anything here
398 */
399 static void vmx_release_assist_channel(struct vcpu *v)
400 {
401 return;
402 }
404 /* following three functions are based from hvm_xxx_ioreq_page()
405 * in xen/arch/x86/hvm/hvm.c */
406 static void vmx_init_ioreq_page(
407 struct domain *d, struct vmx_ioreq_page *iorp)
408 {
409 memset(iorp, 0, sizeof(*iorp));
410 spin_lock_init(&iorp->lock);
411 domain_pause(d);
412 }
414 static void vmx_destroy_ioreq_page(
415 struct domain *d, struct vmx_ioreq_page *iorp)
416 {
417 spin_lock(&iorp->lock);
419 ASSERT(d->is_dying);
421 if (iorp->va != NULL) {
422 put_page(iorp->page);
423 iorp->page = NULL;
424 iorp->va = NULL;
425 }
427 spin_unlock(&iorp->lock);
428 }
430 int vmx_set_ioreq_page(
431 struct domain *d, struct vmx_ioreq_page *iorp, unsigned long gpfn)
432 {
433 struct page_info *page;
434 unsigned long mfn;
435 pte_t pte;
437 pte = *lookup_noalloc_domain_pte(d, gpfn << PAGE_SHIFT);
438 if (!pte_present(pte) || !pte_mem(pte))
439 return -EINVAL;
440 mfn = (pte_val(pte) & _PFN_MASK) >> PAGE_SHIFT;
441 ASSERT(mfn_valid(mfn));
443 page = mfn_to_page(mfn);
444 if (get_page(page, d) == 0)
445 return -EINVAL;
447 spin_lock(&iorp->lock);
449 if ((iorp->va != NULL) || d->is_dying) {
450 spin_unlock(&iorp->lock);
451 put_page(page);
452 return -EINVAL;
453 }
455 iorp->va = mfn_to_virt(mfn);
456 iorp->page = page;
458 spin_unlock(&iorp->lock);
460 domain_unpause(d);
462 return 0;
463 }
465 /*
466 * Initialize VMX envirenment for guest. Only the 1st vp/vcpu
467 * is registered here.
468 */
469 int
470 vmx_final_setup_guest(struct vcpu *v)
471 {
472 vpd_t *vpd;
473 int rc;
474 struct switch_stack *sw;
476 vpd = alloc_vpd();
477 ASSERT(vpd);
478 if (!vpd)
479 return -ENOMEM;
481 v->arch.privregs = (mapped_regs_t *)vpd;
482 vpd->vpd_low.virt_env_vaddr = vm_buffer;
484 /* Per-domain vTLB and vhpt implementation. Now vmx domain will stick
485 * to this solution. Maybe it can be deferred until we know created
486 * one as vmx domain */
487 rc = init_domain_tlb(v);
488 if (rc)
489 return rc;
491 if (!v->domain->arch.is_sioemu) {
492 rc = vmx_create_event_channels(v);
493 if (rc)
494 return rc;
495 }
497 /* v->arch.schedule_tail = arch_vmx_do_launch; */
498 vmx_create_vp(v);
500 /* Physical mode emulation initialization, including
501 * emulation ID allcation and related memory request
502 */
503 physical_mode_init(v);
505 vlsapic_reset(v);
506 vtm_init(v);
508 /* Set up guest 's indicator for VTi domain*/
509 set_bit(ARCH_VMX_DOMAIN, &v->arch.arch_vmx.flags);
511 /* Initialize pNonSys=1 for the first context switching */
512 sw = (struct switch_stack *)vcpu_regs(v) - 1;
513 sw->pr = (1UL << PRED_NON_SYSCALL);
515 return 0;
516 }
518 void
519 vmx_relinquish_guest_resources(struct domain *d)
520 {
521 struct vcpu *v;
523 if (d->arch.is_sioemu)
524 return;
526 for_each_vcpu(d, v)
527 vmx_release_assist_channel(v);
529 vacpi_relinquish_resources(d);
531 vmx_destroy_ioreq_page(d, &d->arch.vmx_platform.ioreq);
532 vmx_destroy_ioreq_page(d, &d->arch.vmx_platform.buf_ioreq);
533 vmx_destroy_ioreq_page(d, &d->arch.vmx_platform.buf_pioreq);
534 }
536 void
537 vmx_relinquish_vcpu_resources(struct vcpu *v)
538 {
539 vtime_t *vtm = &(v->arch.arch_vmx.vtm);
541 kill_timer(&vtm->vtm_timer);
543 free_domain_tlb(v);
544 free_vpd(v);
545 }
547 typedef struct io_range {
548 unsigned long start;
549 unsigned long size;
550 unsigned long type;
551 } io_range_t;
553 static const io_range_t io_ranges[] = {
554 {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
555 {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
556 {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
557 {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
558 {PIB_START, PIB_SIZE, GPFN_PIB},
559 };
561 // The P2M table is built in libxc/ia64/xc_ia64_hvm_build.c @ setup_guest()
562 // so only mark IO memory space here
563 static void vmx_build_io_physmap_table(struct domain *d)
564 {
565 unsigned long i, j;
567 /* Mark I/O ranges */
568 for (i = 0; i < (sizeof(io_ranges) / sizeof(io_range_t)); i++) {
569 for (j = io_ranges[i].start;
570 j < io_ranges[i].start + io_ranges[i].size; j += PAGE_SIZE)
571 (void)__assign_domain_page(d, j, io_ranges[i].type,
572 ASSIGN_writable);
573 }
575 }
577 int vmx_setup_platform(struct domain *d)
578 {
579 ASSERT(d != dom0); /* only for non-privileged vti domain */
581 if (!d->arch.is_sioemu) {
582 vmx_build_io_physmap_table(d);
584 vmx_init_ioreq_page(d, &d->arch.vmx_platform.ioreq);
585 vmx_init_ioreq_page(d, &d->arch.vmx_platform.buf_ioreq);
586 vmx_init_ioreq_page(d, &d->arch.vmx_platform.buf_pioreq);
587 }
588 /* TEMP */
589 d->arch.vmx_platform.pib_base = 0xfee00000UL;
591 d->arch.sal_data = xmalloc(struct xen_sal_data);
592 if (d->arch.sal_data == NULL)
593 return -ENOMEM;
595 /* Only open one port for I/O and interrupt emulation */
596 memset(&d->shared_info->evtchn_mask[0], 0xff,
597 sizeof(d->shared_info->evtchn_mask));
599 /* Initialize iosapic model within hypervisor */
600 viosapic_init(d);
602 if (!d->arch.is_sioemu)
603 vacpi_init(d);
605 if (d->arch.is_sioemu) {
606 int i;
607 for (i = 1; i < MAX_VIRT_CPUS; i++)
608 d->shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
609 }
611 return 0;
612 }
614 void vmx_do_resume(struct vcpu *v)
615 {
616 ioreq_t *p;
618 vmx_load_state(v);
620 if (v->domain->arch.is_sioemu)
621 return;
623 /* stolen from hvm_do_resume() in arch/x86/hvm/hvm.c */
624 /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */
625 p = &get_vio(v)->vp_ioreq;
626 while (p->state != STATE_IOREQ_NONE) {
627 switch (p->state) {
628 case STATE_IORESP_READY: /* IORESP_READY -> NONE */
629 vmx_io_assist(v);
630 break;
631 case STATE_IOREQ_READY:
632 case STATE_IOREQ_INPROCESS:
633 /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
634 wait_on_xen_event_channel(v->arch.arch_vmx.xen_port,
635 (p->state != STATE_IOREQ_READY) &&
636 (p->state != STATE_IOREQ_INPROCESS));
637 break;
638 default:
639 gdprintk(XENLOG_ERR,
640 "Weird HVM iorequest state %d.\n", p->state);
641 domain_crash_synchronous();
642 }
643 }
644 }