ia64/xen-unstable

view xen/arch/ia64/vmx/vmx_init.c @ 19848:5839491bbf20

[IA64] replace MAX_VCPUS with d->max_vcpus where necessary.

don't use MAX_VCPUS, and use vcpu::max_vcpus.
The changeset of 2f9e1348aa98 introduced max_vcpus to allow more vcpus
per guest. This patch is ia64 counter part.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Isaku Yamahata <yamahata@valinux.co.jp>
date Mon Jun 29 11:26:05 2009 +0900 (2009-06-29)
parents c8962b24fb50
children
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /*
3 * vmx_init.c: initialization work for vt specific domain
4 * Copyright (c) 2005, Intel Corporation.
5 * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
6 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
7 * Fred Yang <fred.yang@intel.com>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
20 * Place - Suite 330, Boston, MA 02111-1307 USA.
21 *
22 */
24 /*
25 * 05/08/16 Kun tian (Kevin Tian) <kevin.tian@intel.com>:
26 * Disable doubling mapping
27 *
28 * 05/03/23 Kun Tian (Kevin Tian) <kevin.tian@intel.com>:
29 * Simplied design in first step:
30 * - One virtual environment
31 * - Domain is bound to one LP
32 * Later to support guest SMP:
33 * - Need interface to handle VP scheduled to different LP
34 */
35 #include <xen/config.h>
36 #include <xen/types.h>
37 #include <xen/sched.h>
38 #include <asm/pal.h>
39 #include <asm/page.h>
40 #include <asm/processor.h>
41 #include <asm/vmx_vcpu.h>
42 #include <xen/lib.h>
43 #include <asm/vmmu.h>
44 #include <public/xen.h>
45 #include <public/hvm/ioreq.h>
46 #include <public/event_channel.h>
47 #include <public/arch-ia64/hvm/memmap.h>
48 #include <asm/vmx_phy_mode.h>
49 #include <asm/processor.h>
50 #include <asm/vmx.h>
51 #include <xen/mm.h>
52 #include <asm/viosapic.h>
53 #include <xen/event.h>
54 #include <asm/vlsapic.h>
55 #include <asm/vhpt.h>
56 #include <asm/vmx_pal_vsa.h>
57 #include <asm/patch.h>
59 /* Global flag to identify whether Intel vmx feature is on */
60 u32 vmx_enabled = 0;
61 static u64 buffer_size;
62 static u64 vp_env_info;
63 static u64 vm_buffer = 0; /* Buffer required to bring up VMX feature */
64 u64 __vsa_base = 0; /* Run-time service base of VMX */
66 /* Check whether vt feature is enabled or not. */
68 void vmx_vps_patch(void)
69 {
70 u64 addr;
72 addr = (u64)&vmx_vps_sync_read;
73 ia64_patch_imm64(addr, __vsa_base+PAL_VPS_SYNC_READ);
74 ia64_fc((void *)addr);
75 addr = (u64)&vmx_vps_sync_write;
76 ia64_patch_imm64(addr, __vsa_base+PAL_VPS_SYNC_WRITE);
77 ia64_fc((void *)addr);
78 addr = (u64)&vmx_vps_resume_normal;
79 ia64_patch_imm64(addr, __vsa_base+PAL_VPS_RESUME_NORMAL);
80 ia64_fc((void *)addr);
81 addr = (u64)&vmx_vps_resume_handler;
82 ia64_patch_imm64(addr, __vsa_base+PAL_VPS_RESUME_HANDLER);
83 ia64_fc((void *)addr);
84 ia64_sync_i();
85 ia64_srlz_i();
86 }
89 void
90 identify_vmx_feature(void)
91 {
92 pal_status_t ret;
93 u64 avail = 1, status = 1, control = 1;
95 vmx_enabled = 0;
96 /* Check VT-i feature */
97 ret = ia64_pal_proc_get_features(&avail, &status, &control);
98 if (ret != PAL_STATUS_SUCCESS) {
99 printk("Get proc features failed.\n");
100 goto no_vti;
101 }
103 /* FIXME: do we need to check status field, to see whether
104 * PSR.vm is actually enabled? If yes, aonther call to
105 * ia64_pal_proc_set_features may be reuqired then.
106 */
107 printk("avail:0x%lx, status:0x%lx,control:0x%lx, vm?0x%lx\n",
108 avail, status, control, avail & PAL_PROC_VM_BIT);
109 if (!(avail & PAL_PROC_VM_BIT)) {
110 printk("No VT feature supported.\n");
111 goto no_vti;
112 }
114 ret = ia64_pal_vp_env_info(&buffer_size, &vp_env_info);
115 if (ret != PAL_STATUS_SUCCESS) {
116 printk("Get vp environment info failed.\n");
117 goto no_vti;
118 }
120 printk("vm buffer size: %ld\n", buffer_size);
122 vmx_enabled = 1;
123 no_vti:
124 return;
125 }
127 /*
128 * ** This function must be called on every processor **
129 *
130 * Init virtual environment on current LP
131 * vsa_base is the indicator whether it's first LP to be initialized
132 * for current domain.
133 */
134 void*
135 vmx_init_env(void *start, unsigned long end_in_pa)
136 {
137 u64 status, tmp_base;
139 if (!vm_buffer) {
140 /* VM buffer must must be 4K aligned and
141 * must be pinned by both itr and dtr. */
142 #define VM_BUFFER_ALIGN (4 * 1024)
143 #define VM_BUFFER_ALIGN_UP(x) (((x) + (VM_BUFFER_ALIGN - 1)) & \
144 ~(VM_BUFFER_ALIGN - 1))
145 unsigned long s_vm_buffer =
146 VM_BUFFER_ALIGN_UP((unsigned long)start);
147 unsigned long e_vm_buffer = s_vm_buffer + buffer_size;
148 if (__pa(e_vm_buffer) < end_in_pa) {
149 init_boot_pages(__pa(start), __pa(s_vm_buffer));
150 start = (void*)e_vm_buffer;
151 vm_buffer = virt_to_xenva(s_vm_buffer);
152 printk("vm_buffer: 0x%lx\n", vm_buffer);
153 } else {
154 printk("Can't allocate vm_buffer "
155 "start 0x%p end_in_pa 0x%lx "
156 "buffer_size 0x%lx\n",
157 start, end_in_pa, buffer_size);
158 vmx_enabled = 0;
159 return start;
160 }
161 }
163 status=ia64_pal_vp_init_env(__vsa_base ? VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
164 __pa(vm_buffer),
165 vm_buffer,
166 &tmp_base);
168 if (status != PAL_STATUS_SUCCESS) {
169 printk("ia64_pal_vp_init_env failed.\n");
170 vmx_enabled = 0;
171 return start;
172 }
174 if (!__vsa_base){
175 __vsa_base = tmp_base;
176 vmx_vps_patch();
177 }
178 else
179 ASSERT(tmp_base == __vsa_base);
181 return start;
182 }
184 typedef union {
185 u64 value;
186 struct {
187 u64 number : 8;
188 u64 revision : 8;
189 u64 model : 8;
190 u64 family : 8;
191 u64 archrev : 8;
192 u64 rv : 24;
193 };
194 } cpuid3_t;
196 /* Allocate vpd from domheap */
197 static vpd_t *alloc_vpd(void)
198 {
199 int i;
200 cpuid3_t cpuid3;
201 struct page_info *page;
202 vpd_t *vpd;
203 mapped_regs_t *mregs;
205 page = alloc_domheap_pages(NULL, get_order(VPD_SIZE), 0);
206 if (page == NULL) {
207 printk("VPD allocation failed.\n");
208 return NULL;
209 }
210 vpd = page_to_virt(page);
212 printk(XENLOG_DEBUG "vpd base: 0x%p, vpd size:%ld\n",
213 vpd, sizeof(vpd_t));
214 memset(vpd, 0, VPD_SIZE);
215 mregs = &vpd->vpd_low;
217 /* CPUID init */
218 for (i = 0; i < 5; i++)
219 mregs->vcpuid[i] = ia64_get_cpuid(i);
221 /* Limit the CPUID number to 5 */
222 cpuid3.value = mregs->vcpuid[3];
223 cpuid3.number = 4; /* 5 - 1 */
224 mregs->vcpuid[3] = cpuid3.value;
226 mregs->vac.a_from_int_cr = 1;
227 mregs->vac.a_to_int_cr = 1;
228 mregs->vac.a_from_psr = 1;
229 mregs->vac.a_from_cpuid = 1;
230 mregs->vac.a_cover = 1;
231 mregs->vac.a_bsw = 1;
232 mregs->vac.a_int = 1;
233 mregs->vdc.d_vmsw = 1;
235 return vpd;
236 }
238 /* Free vpd to domheap */
239 static void
240 free_vpd(struct vcpu *v)
241 {
242 if ( v->arch.privregs )
243 free_domheap_pages(virt_to_page(v->arch.privregs),
244 get_order(VPD_SIZE));
245 }
247 // This is used for PAL_VP_CREATE and PAL_VPS_SET_PENDING_INTERRUPT
248 // so that we don't have to pin the vpd down with itr[].
249 void
250 __vmx_vpd_pin(struct vcpu* v)
251 {
252 unsigned long privregs = (unsigned long)v->arch.privregs;
253 u64 psr;
255 privregs &= ~(IA64_GRANULE_SIZE - 1);
257 // check overlapping with current stack
258 if (privregs ==
259 ((unsigned long)current & ~(IA64_GRANULE_SIZE - 1)))
260 return;
262 if (!VMX_DOMAIN(current)) {
263 // check overlapping with vhpt
264 if (privregs ==
265 (vcpu_vhpt_maddr(current) & ~(IA64_GRANULE_SHIFT - 1)))
266 return;
267 } else {
268 // check overlapping with vhpt
269 if (privregs ==
270 ((unsigned long)current->arch.vhpt.hash &
271 ~(IA64_GRANULE_SHIFT - 1)))
272 return;
274 // check overlapping with privregs
275 if (privregs ==
276 ((unsigned long)current->arch.privregs &
277 ~(IA64_GRANULE_SHIFT - 1)))
278 return;
279 }
281 psr = ia64_clear_ic();
282 ia64_ptr(0x2 /*D*/, privregs, IA64_GRANULE_SIZE);
283 ia64_srlz_d();
284 ia64_itr(0x2 /*D*/, IA64_TR_MAPPED_REGS, privregs,
285 pte_val(pfn_pte(__pa(privregs) >> PAGE_SHIFT, PAGE_KERNEL)),
286 IA64_GRANULE_SHIFT);
287 ia64_set_psr(psr);
288 ia64_srlz_d();
289 }
291 void
292 __vmx_vpd_unpin(struct vcpu* v)
293 {
294 if (!VMX_DOMAIN(current)) {
295 int rc;
296 rc = !set_one_rr(VRN7 << VRN_SHIFT, VCPU(current, rrs[VRN7]));
297 BUG_ON(rc);
298 } else {
299 IA64FAULT fault;
300 fault = vmx_vcpu_set_rr(current, VRN7 << VRN_SHIFT,
301 VMX(current, vrr[VRN7]));
302 BUG_ON(fault != IA64_NO_FAULT);
303 }
304 }
306 /*
307 * Create a VP on intialized VMX environment.
308 */
309 static void
310 vmx_create_vp(struct vcpu *v)
311 {
312 u64 ret;
313 vpd_t *vpd = (vpd_t *)v->arch.privregs;
314 u64 ivt_base;
315 extern char vmx_ia64_ivt;
316 /* ia64_ivt is function pointer, so need this tranlation */
317 ivt_base = (u64) &vmx_ia64_ivt;
318 printk(XENLOG_DEBUG "ivt_base: 0x%lx\n", ivt_base);
320 vmx_vpd_pin(v);
321 ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)ivt_base, 0);
322 vmx_vpd_unpin(v);
324 if (ret != PAL_STATUS_SUCCESS){
325 panic_domain(vcpu_regs(v),"ia64_pal_vp_create failed. \n");
326 }
327 }
329 /* Other non-context related tasks can be done in context switch */
330 void
331 vmx_save_state(struct vcpu *v)
332 {
333 BUG_ON(v != current);
335 ia64_call_vsa(PAL_VPS_SAVE, (u64)v->arch.privregs, 1, 0, 0, 0, 0, 0);
337 /* Need to save KR when domain switch, though HV itself doesn;t
338 * use them.
339 */
340 v->arch.arch_vmx.vkr[0] = ia64_get_kr(0);
341 v->arch.arch_vmx.vkr[1] = ia64_get_kr(1);
342 v->arch.arch_vmx.vkr[2] = ia64_get_kr(2);
343 v->arch.arch_vmx.vkr[3] = ia64_get_kr(3);
344 v->arch.arch_vmx.vkr[4] = ia64_get_kr(4);
345 v->arch.arch_vmx.vkr[5] = ia64_get_kr(5);
346 v->arch.arch_vmx.vkr[6] = ia64_get_kr(6);
347 v->arch.arch_vmx.vkr[7] = ia64_get_kr(7);
348 }
350 /* Even guest is in physical mode, we still need such double mapping */
351 void
352 vmx_load_state(struct vcpu *v)
353 {
354 BUG_ON(v != current);
356 vmx_load_all_rr(v);
358 /* vmx_load_all_rr() pins down v->arch.privregs with both dtr/itr*/
359 ia64_call_vsa(PAL_VPS_RESTORE, (u64)v->arch.privregs, 1, 0, 0, 0, 0, 0);
361 ia64_set_kr(0, v->arch.arch_vmx.vkr[0]);
362 ia64_set_kr(1, v->arch.arch_vmx.vkr[1]);
363 ia64_set_kr(2, v->arch.arch_vmx.vkr[2]);
364 ia64_set_kr(3, v->arch.arch_vmx.vkr[3]);
365 ia64_set_kr(4, v->arch.arch_vmx.vkr[4]);
366 ia64_set_kr(5, v->arch.arch_vmx.vkr[5]);
367 ia64_set_kr(6, v->arch.arch_vmx.vkr[6]);
368 ia64_set_kr(7, v->arch.arch_vmx.vkr[7]);
369 /* Guest vTLB is not required to be switched explicitly, since
370 * anchored in vcpu */
372 migrate_timer(&v->arch.arch_vmx.vtm.vtm_timer, v->processor);
373 }
375 static int
376 vmx_vcpu_initialise(struct vcpu *v)
377 {
378 struct vmx_ioreq_page *iorp = &v->domain->arch.hvm_domain.ioreq;
380 int rc = alloc_unbound_xen_event_channel(v, 0);
381 if (rc < 0)
382 return rc;
383 v->arch.arch_vmx.xen_port = rc;
385 spin_lock(&iorp->lock);
386 if (v->domain->arch.vmx_platform.ioreq.va != 0) {
387 vcpu_iodata_t *p = get_vio(v);
388 p->vp_eport = v->arch.arch_vmx.xen_port;
389 }
390 spin_unlock(&iorp->lock);
392 gdprintk(XENLOG_INFO, "Allocated port %ld for hvm %d vcpu %d.\n",
393 v->arch.arch_vmx.xen_port, v->domain->domain_id, v->vcpu_id);
395 return 0;
396 }
398 static int vmx_create_event_channels(struct vcpu *v)
399 {
400 struct vcpu *o;
402 if (v->vcpu_id == 0) {
403 /* Ugly: create event channels for every vcpu when vcpu 0
404 starts, so that they're available for ioemu to bind to. */
405 for_each_vcpu(v->domain, o) {
406 int rc = vmx_vcpu_initialise(o);
407 if (rc < 0) //XXX error recovery
408 return rc;
409 }
410 }
412 return 0;
413 }
415 /*
416 * Event channel has destoryed in domain_kill(), so we needn't
417 * do anything here
418 */
419 static void vmx_release_assist_channel(struct vcpu *v)
420 {
421 return;
422 }
424 /* following three functions are based from hvm_xxx_ioreq_page()
425 * in xen/arch/x86/hvm/hvm.c */
426 static void vmx_init_ioreq_page(
427 struct domain *d, struct vmx_ioreq_page *iorp)
428 {
429 memset(iorp, 0, sizeof(*iorp));
430 spin_lock_init(&iorp->lock);
431 domain_pause(d);
432 }
434 static void vmx_destroy_ioreq_page(
435 struct domain *d, struct vmx_ioreq_page *iorp)
436 {
437 spin_lock(&iorp->lock);
439 ASSERT(d->is_dying);
441 if (iorp->va != NULL) {
442 put_page(iorp->page);
443 iorp->page = NULL;
444 iorp->va = NULL;
445 }
447 spin_unlock(&iorp->lock);
448 }
450 int vmx_set_ioreq_page(
451 struct domain *d, struct vmx_ioreq_page *iorp, unsigned long gpfn)
452 {
453 struct page_info *page;
454 unsigned long mfn;
455 pte_t pte;
457 pte = *lookup_noalloc_domain_pte(d, gpfn << PAGE_SHIFT);
458 if (!pte_present(pte) || !pte_mem(pte))
459 return -EINVAL;
460 mfn = pte_pfn(pte);
461 ASSERT(mfn_valid(mfn));
463 page = mfn_to_page(mfn);
464 if (get_page(page, d) == 0)
465 return -EINVAL;
467 spin_lock(&iorp->lock);
469 if ((iorp->va != NULL) || d->is_dying) {
470 spin_unlock(&iorp->lock);
471 put_page(page);
472 return -EINVAL;
473 }
475 iorp->va = mfn_to_virt(mfn);
476 iorp->page = page;
478 spin_unlock(&iorp->lock);
480 domain_unpause(d);
482 return 0;
483 }
485 /*
486 * Initialize VMX envirenment for guest. Only the 1st vp/vcpu
487 * is registered here.
488 */
489 int
490 vmx_final_setup_guest(struct vcpu *v)
491 {
492 vpd_t *vpd;
493 int rc;
495 vpd = alloc_vpd();
496 ASSERT(vpd);
497 if (!vpd)
498 return -ENOMEM;
500 v->arch.privregs = (mapped_regs_t *)vpd;
501 vpd->vpd_low.virt_env_vaddr = vm_buffer;
503 /* Per-domain vTLB and vhpt implementation. Now vmx domain will stick
504 * to this solution. Maybe it can be deferred until we know created
505 * one as vmx domain */
506 rc = init_domain_tlb(v);
507 if (rc)
508 return rc;
510 if (!v->domain->arch.is_sioemu) {
511 rc = vmx_create_event_channels(v);
512 if (rc)
513 return rc;
514 }
516 /* v->arch.schedule_tail = arch_vmx_do_launch; */
517 vmx_create_vp(v);
519 /* Physical mode emulation initialization, including
520 * emulation ID allcation and related memory request
521 */
522 physical_mode_init(v);
524 vlsapic_reset(v);
525 vtm_init(v);
527 /* Set up guest 's indicator for VTi domain*/
528 set_bit(ARCH_VMX_DOMAIN, &v->arch.arch_vmx.flags);
530 return 0;
531 }
533 void
534 vmx_relinquish_guest_resources(struct domain *d)
535 {
536 struct vcpu *v;
538 if (d->arch.is_sioemu)
539 return;
541 for_each_vcpu(d, v)
542 vmx_release_assist_channel(v);
544 vacpi_relinquish_resources(d);
546 vmx_destroy_ioreq_page(d, &d->arch.vmx_platform.ioreq);
547 vmx_destroy_ioreq_page(d, &d->arch.vmx_platform.buf_ioreq);
548 vmx_destroy_ioreq_page(d, &d->arch.vmx_platform.buf_pioreq);
549 }
551 void
552 vmx_relinquish_vcpu_resources(struct vcpu *v)
553 {
554 vtime_t *vtm = &(v->arch.arch_vmx.vtm);
556 kill_timer(&vtm->vtm_timer);
558 if (v->arch.arch_vmx.sioemu_info_mva)
559 put_page(virt_to_page((unsigned long)
560 v->arch.arch_vmx.sioemu_info_mva));
562 free_domain_tlb(v);
563 free_vpd(v);
564 }
566 typedef struct io_range {
567 unsigned long start;
568 unsigned long size;
569 unsigned long type;
570 } io_range_t;
572 static const io_range_t io_ranges[] = {
573 {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER << PAGE_SHIFT},
574 {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO << PAGE_SHIFT},
575 {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO << PAGE_SHIFT},
576 {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC << PAGE_SHIFT},
577 {PIB_START, PIB_SIZE, GPFN_PIB << PAGE_SHIFT},
578 };
580 // The P2M table is built in libxc/ia64/xc_ia64_hvm_build.c @ setup_guest()
581 // so only mark IO memory space here
582 static void vmx_build_io_physmap_table(struct domain *d)
583 {
584 unsigned long i, j;
586 /* Mark I/O ranges */
587 for (i = 0; i < (sizeof(io_ranges) / sizeof(io_range_t)); i++) {
588 for (j = io_ranges[i].start;
589 j < io_ranges[i].start + io_ranges[i].size; j += PAGE_SIZE)
590 (void)__assign_domain_page(d, j, io_ranges[i].type,
591 ASSIGN_writable | ASSIGN_io);
592 }
594 }
596 int vmx_setup_platform(struct domain *d)
597 {
598 ASSERT(d != dom0); /* only for non-privileged vti domain */
600 if (!d->arch.is_sioemu) {
601 vmx_build_io_physmap_table(d);
603 vmx_init_ioreq_page(d, &d->arch.vmx_platform.ioreq);
604 vmx_init_ioreq_page(d, &d->arch.vmx_platform.buf_ioreq);
605 vmx_init_ioreq_page(d, &d->arch.vmx_platform.buf_pioreq);
606 }
607 /* TEMP */
608 d->arch.vmx_platform.pib_base = 0xfee00000UL;
610 d->arch.sal_data = xmalloc(struct xen_sal_data);
611 if (d->arch.sal_data == NULL)
612 return -ENOMEM;
614 /* Only open one port for I/O and interrupt emulation */
615 memset(&d->shared_info->evtchn_mask[0], 0xff,
616 sizeof(d->shared_info->evtchn_mask));
618 /* Initialize iosapic model within hypervisor */
619 viosapic_init(d);
621 if (!d->arch.is_sioemu)
622 vacpi_init(d);
624 if (d->arch.is_sioemu) {
625 int i;
626 for (i = 1; i < XEN_LEGACY_MAX_VCPUS; i++)
627 d->shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
628 }
630 return 0;
631 }
633 void vmx_do_resume(struct vcpu *v)
634 {
635 ioreq_t *p;
637 vmx_load_state(v);
639 if (v->domain->arch.is_sioemu)
640 return;
642 /* stolen from hvm_do_resume() in arch/x86/hvm/hvm.c */
643 /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */
644 p = &get_vio(v)->vp_ioreq;
645 while (p->state != STATE_IOREQ_NONE) {
646 switch (p->state) {
647 case STATE_IORESP_READY: /* IORESP_READY -> NONE */
648 vmx_io_assist(v);
649 break;
650 case STATE_IOREQ_READY:
651 case STATE_IOREQ_INPROCESS:
652 /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
653 wait_on_xen_event_channel(v->arch.arch_vmx.xen_port,
654 (p->state != STATE_IOREQ_READY) &&
655 (p->state != STATE_IOREQ_INPROCESS));
656 break;
657 default:
658 gdprintk(XENLOG_ERR,
659 "Weird HVM iorequest state %d.\n", p->state);
660 domain_crash_synchronous();
661 }
662 }
663 }