ia64/xen-unstable

view xen/arch/ia64/xen/domain.c @ 10516:7c9692adcca7

[IA64] sync console before halt/reboot

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author awilliam@xenbuild.aw
date Mon Jun 26 14:34:18 2006 -0600 (2006-06-26)
parents fdf25330e4a6
children 85958f34f183
line source
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * Copyright (C) 2005 Intel Co
8 * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
9 *
10 * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add VTI domain support
11 *
12 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
13 * VA Linux Systems Japan K.K.
14 * dom0 vp model support
15 */
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/lib.h>
20 #include <xen/errno.h>
21 #include <xen/sched.h>
22 #include <xen/smp.h>
23 #include <xen/delay.h>
24 #include <xen/softirq.h>
25 #include <xen/mm.h>
26 #include <xen/iocap.h>
27 #include <asm/ptrace.h>
28 #include <asm/system.h>
29 #include <asm/io.h>
30 #include <asm/processor.h>
31 #include <asm/desc.h>
32 #include <asm/hw_irq.h>
33 #include <asm/setup.h>
34 //#include <asm/mpspec.h>
35 #include <xen/irq.h>
36 #include <xen/event.h>
37 //#include <xen/shadow.h>
38 #include <xen/console.h>
39 #include <xen/compile.h>
41 #include <xen/elf.h>
42 //#include <asm/page.h>
43 #include <asm/pgalloc.h>
45 #include <asm/offsets.h> /* for IA64_THREAD_INFO_SIZE */
47 #include <asm/vcpu.h> /* for function declarations */
48 #include <public/arch-ia64.h>
49 #include <xen/domain.h>
50 #include <asm/vmx.h>
51 #include <asm/vmx_vcpu.h>
52 #include <asm/vmx_vpd.h>
53 #include <asm/vmx_phy_mode.h>
54 #include <asm/pal.h>
55 #include <asm/vhpt.h>
56 #include <public/hvm/ioreq.h>
57 #include <public/arch-ia64.h>
58 #include <asm/tlbflush.h>
59 #include <asm/regionreg.h>
60 #include <asm/dom_fw.h>
62 #ifndef CONFIG_XEN_IA64_DOM0_VP
63 #define CONFIG_DOMAIN0_CONTIGUOUS
64 #endif
65 unsigned long dom0_start = -1L;
66 unsigned long dom0_size = 512*1024*1024;
67 unsigned long dom0_align = 64*1024*1024;
69 /* dom0_max_vcpus: maximum number of VCPUs to create for dom0. */
70 static unsigned int dom0_max_vcpus = 1;
71 integer_param("dom0_max_vcpus", dom0_max_vcpus);
73 extern unsigned long running_on_sim;
75 extern char dom0_command_line[];
77 /* FIXME: where these declarations should be there ? */
78 extern void serial_input_init(void);
79 static void init_switch_stack(struct vcpu *v);
80 extern void vmx_do_launch(struct vcpu *);
81 void build_physmap_table(struct domain *d);
83 /* this belongs in include/asm, but there doesn't seem to be a suitable place */
84 unsigned long context_switch_count = 0;
86 extern struct vcpu *ia64_switch_to (struct vcpu *next_task);
88 /* Address of vpsr.i (in fact evtchn_upcall_mask) of current vcpu.
89 This is a Xen virtual address. */
90 DEFINE_PER_CPU(uint8_t *, current_psr_i_addr);
91 DEFINE_PER_CPU(int *, current_psr_ic_addr);
93 #include <xen/sched-if.h>
95 void schedule_tail(struct vcpu *prev)
96 {
97 extern char ia64_ivt;
98 context_saved(prev);
100 if (VMX_DOMAIN(current)) {
101 vmx_do_launch(current);
102 } else {
103 ia64_set_iva(&ia64_ivt);
104 ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
105 VHPT_ENABLED);
106 load_region_regs(current);
107 vcpu_load_kernel_regs(current);
108 __ia64_per_cpu_var(current_psr_i_addr) = &current->domain->
109 shared_info->vcpu_info[current->vcpu_id].evtchn_upcall_mask;
110 __ia64_per_cpu_var(current_psr_ic_addr) = (int *)
111 (current->domain->arch.shared_info_va + XSI_PSR_IC_OFS);
112 }
113 }
115 void context_switch(struct vcpu *prev, struct vcpu *next)
116 {
117 uint64_t spsr;
118 uint64_t pta;
120 local_irq_save(spsr);
121 context_switch_count++;
123 __ia64_save_fpu(prev->arch._thread.fph);
124 __ia64_load_fpu(next->arch._thread.fph);
125 if (VMX_DOMAIN(prev))
126 vmx_save_state(prev);
127 if (VMX_DOMAIN(next))
128 vmx_load_state(next);
129 /*ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);*/
130 prev = ia64_switch_to(next);
132 /* Note: ia64_switch_to does not return here at vcpu initialization. */
134 //cpu_set(smp_processor_id(), current->domain->domain_dirty_cpumask);
136 // leave this debug for now: it acts as a heartbeat when more than
137 // one domain is active
138 {
139 static long cnt[16] = { 50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50};
140 static int i = 100;
141 int id = ((struct vcpu *)current)->domain->domain_id & 0xf;
142 if (!cnt[id]--) { cnt[id] = 500000; printk("%x",id); }
143 if (!i--) { i = 1000000; printk("+"); }
144 }
146 if (VMX_DOMAIN(current)){
147 vmx_load_all_rr(current);
148 } else {
149 struct domain *nd;
150 extern char ia64_ivt;
152 ia64_set_iva(&ia64_ivt);
154 nd = current->domain;
155 if (!is_idle_domain(nd)) {
156 ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
157 VHPT_ENABLED);
158 load_region_regs(current);
159 vcpu_load_kernel_regs(current);
160 vcpu_set_next_timer(current);
161 if (vcpu_timer_expired(current))
162 vcpu_pend_timer(current);
163 __ia64_per_cpu_var(current_psr_i_addr) = &nd->shared_info->
164 vcpu_info[current->vcpu_id].evtchn_upcall_mask;
165 __ia64_per_cpu_var(current_psr_ic_addr) =
166 (int *)(nd->arch.shared_info_va + XSI_PSR_IC_OFS);
167 } else {
168 /* When switching to idle domain, only need to disable vhpt
169 * walker. Then all accesses happen within idle context will
170 * be handled by TR mapping and identity mapping.
171 */
172 pta = ia64_get_pta();
173 ia64_set_pta(pta & ~VHPT_ENABLED);
174 __ia64_per_cpu_var(current_psr_i_addr) = NULL;
175 __ia64_per_cpu_var(current_psr_ic_addr) = NULL;
176 }
177 }
178 local_irq_restore(spsr);
179 context_saved(prev);
180 }
182 void continue_running(struct vcpu *same)
183 {
184 /* nothing to do */
185 }
187 static void default_idle(void)
188 {
189 int cpu = smp_processor_id();
190 local_irq_disable();
191 if ( !softirq_pending(cpu))
192 safe_halt();
193 local_irq_enable();
194 }
196 static void continue_cpu_idle_loop(void)
197 {
198 int cpu = smp_processor_id();
199 for ( ; ; )
200 {
201 #ifdef IA64
202 // __IRQ_STAT(cpu, idle_timestamp) = jiffies
203 #else
204 irq_stat[cpu].idle_timestamp = jiffies;
205 #endif
206 while ( !softirq_pending(cpu) )
207 default_idle();
208 add_preempt_count(SOFTIRQ_OFFSET);
209 raise_softirq(SCHEDULE_SOFTIRQ);
210 do_softirq();
211 sub_preempt_count(SOFTIRQ_OFFSET);
212 }
213 }
215 void startup_cpu_idle_loop(void)
216 {
217 /* Just some sanity to ensure that the scheduler is set up okay. */
218 ASSERT(current->domain->domain_id == IDLE_DOMAIN_ID);
219 raise_softirq(SCHEDULE_SOFTIRQ);
221 continue_cpu_idle_loop();
222 }
224 struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
225 {
226 struct vcpu *v;
227 struct thread_info *ti;
229 /* Still keep idle vcpu0 static allocated at compilation, due
230 * to some code from Linux still requires it in early phase.
231 */
232 if (is_idle_domain(d) && !vcpu_id)
233 v = idle_vcpu[0];
234 else {
235 if ((v = alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER)) == NULL)
236 return NULL;
237 memset(v, 0, sizeof(*v));
239 ti = alloc_thread_info(v);
240 /* Clear thread_info to clear some important fields, like
241 * preempt_count
242 */
243 memset(ti, 0, sizeof(struct thread_info));
244 init_switch_stack(v);
245 }
247 if (!is_idle_domain(d)) {
248 v->arch.privregs =
249 alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
250 BUG_ON(v->arch.privregs == NULL);
251 memset(v->arch.privregs, 0, PAGE_SIZE);
253 if (!vcpu_id)
254 memset(&d->shared_info->evtchn_mask[0], 0xff,
255 sizeof(d->shared_info->evtchn_mask));
257 v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
258 v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4;
259 v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
260 v->arch.metaphysical_saved_rr4 = d->arch.metaphysical_rr4;
262 /* Is it correct ?
263 It depends on the domain rid usage.
265 A domain may share rid among its processor (eg having a
266 global VHPT). In this case, we should also share rid
267 among vcpus and the rid range should be the same.
269 However a domain may have per cpu rid allocation. In
270 this case we don't want to share rid among vcpus, but we may
271 do it if two vcpus are on the same cpu... */
273 v->arch.starting_rid = d->arch.starting_rid;
274 v->arch.ending_rid = d->arch.ending_rid;
275 v->arch.breakimm = d->arch.breakimm;
276 }
278 return v;
279 }
281 void free_vcpu_struct(struct vcpu *v)
282 {
283 if (VMX_DOMAIN(v))
284 vmx_relinquish_vcpu_resources(v);
285 else {
286 if (v->arch.privregs != NULL)
287 free_xenheap_pages(v->arch.privregs, get_order(sizeof(mapped_regs_t)));
288 }
290 free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
291 }
293 static void init_switch_stack(struct vcpu *v)
294 {
295 struct pt_regs *regs = vcpu_regs (v);
296 struct switch_stack *sw = (struct switch_stack *) regs - 1;
297 extern void ia64_ret_from_clone;
299 memset(sw, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs));
300 sw->ar_bspstore = (unsigned long)v + IA64_RBS_OFFSET;
301 sw->b0 = (unsigned long) &ia64_ret_from_clone;
302 sw->ar_fpsr = FPSR_DEFAULT;
303 v->arch._thread.ksp = (unsigned long) sw - 16;
304 // stay on kernel stack because may get interrupts!
305 // ia64_ret_from_clone switches to user stack
306 v->arch._thread.on_ustack = 0;
307 memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
308 }
310 int arch_domain_create(struct domain *d)
311 {
312 // the following will eventually need to be negotiated dynamically
313 d->arch.shared_info_va = DEFAULT_SHAREDINFO_ADDR;
314 d->arch.breakimm = 0x1000;
316 if (is_idle_domain(d))
317 return 0;
319 if ((d->shared_info = (void *)alloc_xenheap_page()) == NULL)
320 goto fail_nomem;
321 memset(d->shared_info, 0, PAGE_SIZE);
323 d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
324 /* We may also need emulation rid for region4, though it's unlikely
325 * to see guest issue uncacheable access in metaphysical mode. But
326 * keep such info here may be more sane.
327 */
328 if (!allocate_rid_range(d,0))
329 goto fail_nomem;
330 d->arch.sys_pgnr = 0;
332 memset(&d->arch.mm, 0, sizeof(d->arch.mm));
334 d->arch.physmap_built = 0;
335 if ((d->arch.mm.pgd = pgd_alloc(&d->arch.mm)) == NULL)
336 goto fail_nomem;
338 printf ("arch_domain_create: domain=%p\n", d);
339 return 0;
341 fail_nomem:
342 if (d->arch.mm.pgd != NULL)
343 pgd_free(d->arch.mm.pgd);
344 if (d->shared_info != NULL)
345 free_xenheap_page(d->shared_info);
346 return -ENOMEM;
347 }
349 void arch_domain_destroy(struct domain *d)
350 {
351 BUG_ON(d->arch.mm.pgd != NULL);
352 if (d->shared_info != NULL)
353 free_xenheap_page(d->shared_info);
355 domain_flush_destroy (d);
357 deallocate_rid_range(d);
358 }
360 void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
361 {
362 c->user_regs = *vcpu_regs (v);
363 c->shared = v->domain->shared_info->arch;
364 }
366 int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
367 {
368 struct pt_regs *regs = vcpu_regs (v);
369 struct domain *d = v->domain;
370 unsigned long cmdline_addr;
372 if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
373 return 0;
374 if (c->flags & VGCF_VMX_GUEST) {
375 if (!vmx_enabled) {
376 printk("No VMX hardware feature for vmx domain.\n");
377 return -EINVAL;
378 }
380 if (v == d->vcpu[0])
381 vmx_setup_platform(d, c);
383 vmx_final_setup_guest(v);
384 } else if (!d->arch.physmap_built)
385 build_physmap_table(d);
387 *regs = c->user_regs;
388 cmdline_addr = 0;
389 if (v == d->vcpu[0]) {
390 /* Only for first vcpu. */
391 d->arch.sys_pgnr = c->sys_pgnr;
392 d->arch.initrd_start = c->initrd.start;
393 d->arch.initrd_len = c->initrd.size;
394 d->arch.cmdline = c->cmdline;
395 d->shared_info->arch = c->shared;
397 if (!VMX_DOMAIN(v)) {
398 const char *cmdline = d->arch.cmdline;
399 int len;
401 if (*cmdline == 0) {
402 #define DEFAULT_CMDLINE "nomca nosmp xencons=tty0 console=tty0 root=/dev/hda1"
403 cmdline = DEFAULT_CMDLINE;
404 len = sizeof (DEFAULT_CMDLINE);
405 printf("domU command line defaulted to"
406 DEFAULT_CMDLINE "\n");
407 }
408 else
409 len = IA64_COMMAND_LINE_SIZE;
410 cmdline_addr = dom_fw_setup (d, cmdline, len);
411 }
413 /* Cache synchronization seems to be done by the linux kernel
414 during mmap/unmap operation. However be conservative. */
415 domain_cache_flush (d, 1);
416 }
417 vcpu_init_regs (v);
418 regs->r28 = cmdline_addr;
420 if ( c->privregs && copy_from_user(v->arch.privregs,
421 c->privregs, sizeof(mapped_regs_t))) {
422 printk("Bad ctxt address in arch_set_info_guest: %p\n",
423 c->privregs);
424 return -EFAULT;
425 }
427 /* Don't redo final setup */
428 set_bit(_VCPUF_initialised, &v->vcpu_flags);
429 return 0;
430 }
432 static void relinquish_memory(struct domain *d, struct list_head *list)
433 {
434 struct list_head *ent;
435 struct page_info *page;
436 #ifndef __ia64__
437 unsigned long x, y;
438 #endif
440 /* Use a recursive lock, as we may enter 'free_domheap_page'. */
441 spin_lock_recursive(&d->page_alloc_lock);
442 ent = list->next;
443 while ( ent != list )
444 {
445 page = list_entry(ent, struct page_info, list);
446 /* Grab a reference to the page so it won't disappear from under us. */
447 if ( unlikely(!get_page(page, d)) )
448 {
449 /* Couldn't get a reference -- someone is freeing this page. */
450 ent = ent->next;
451 continue;
452 }
454 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
455 put_page_and_type(page);
457 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
458 put_page(page);
460 #ifndef __ia64__
461 /*
462 * Forcibly invalidate base page tables at this point to break circular
463 * 'linear page table' references. This is okay because MMU structures
464 * are not shared across domains and this domain is now dead. Thus base
465 * tables are not in use so a non-zero count means circular reference.
466 */
467 y = page->u.inuse.type_info;
468 for ( ; ; )
469 {
470 x = y;
471 if ( likely((x & (PGT_type_mask|PGT_validated)) !=
472 (PGT_base_page_table|PGT_validated)) )
473 break;
475 y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
476 if ( likely(y == x) )
477 {
478 free_page_type(page, PGT_base_page_table);
479 break;
480 }
481 }
482 #endif
484 /* Follow the list chain and /then/ potentially free the page. */
485 ent = ent->next;
486 #ifdef CONFIG_XEN_IA64_DOM0_VP
487 BUG_ON(get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY);
488 #endif
489 put_page(page);
490 }
492 spin_unlock_recursive(&d->page_alloc_lock);
493 }
495 void domain_relinquish_resources(struct domain *d)
496 {
497 /* Relinquish every page of memory. */
499 // relase page traversing d->arch.mm.
500 relinquish_mm(d);
502 relinquish_memory(d, &d->xenpage_list);
503 relinquish_memory(d, &d->page_list);
504 }
506 void build_physmap_table(struct domain *d)
507 {
508 struct list_head *list_ent = d->page_list.next;
509 unsigned long mfn, i = 0;
511 ASSERT(!d->arch.physmap_built);
512 while(list_ent != &d->page_list) {
513 mfn = page_to_mfn(list_entry(
514 list_ent, struct page_info, list));
515 assign_domain_page(d, i << PAGE_SHIFT, mfn << PAGE_SHIFT);
517 i++;
518 list_ent = mfn_to_page(mfn)->list.next;
519 }
520 d->arch.physmap_built = 1;
521 }
523 unsigned long
524 domain_set_shared_info_va (unsigned long va)
525 {
526 struct vcpu *v = current;
527 struct domain *d = v->domain;
528 struct vcpu *v1;
530 /* Check virtual address:
531 must belong to region 7,
532 must be 64Kb aligned,
533 must not be within Xen virtual space. */
534 if ((va >> 61) != 7
535 || (va & 0xffffUL) != 0
536 || (va >= HYPERVISOR_VIRT_START && va < HYPERVISOR_VIRT_END))
537 panic_domain (NULL, "%s: bad va (0x%016lx)\n", __func__, va);
539 /* Note: this doesn't work well if other cpus are already running.
540 However this is part of the spec :-) */
541 printf ("Domain set shared_info_va to 0x%016lx\n", va);
542 d->arch.shared_info_va = va;
544 for_each_vcpu (d, v1) {
545 VCPU(v1, interrupt_mask_addr) =
546 (unsigned char *)va + INT_ENABLE_OFFSET(v1);
547 }
549 __ia64_per_cpu_var(current_psr_ic_addr) = (int *)(va + XSI_PSR_IC_OFS);
551 /* Remap the shared pages. */
552 set_one_rr (7UL << 61, PSCB(v,rrs[7]));
554 return 0;
555 }
558 // remove following line if not privifying in memory
559 //#define HAVE_PRIVIFY_MEMORY
560 #ifndef HAVE_PRIVIFY_MEMORY
561 #define privify_memory(x,y) do {} while(0)
562 #endif
564 // see arch/x86/xxx/domain_build.c
565 int elf_sanity_check(Elf_Ehdr *ehdr)
566 {
567 if (!(IS_ELF(*ehdr)))
568 {
569 printk("DOM0 image is not a Xen-compatible Elf image.\n");
570 return 0;
571 }
572 return 1;
573 }
575 static void loaddomainelfimage(struct domain *d, unsigned long image_start)
576 {
577 char *elfbase = (char *) image_start;
578 Elf_Ehdr ehdr;
579 Elf_Phdr phdr;
580 int h, filesz, memsz;
581 unsigned long elfaddr, dom_mpaddr, dom_imva;
582 struct page_info *p;
584 memcpy(&ehdr, (void *) image_start, sizeof(Elf_Ehdr));
585 for ( h = 0; h < ehdr.e_phnum; h++ ) {
586 memcpy(&phdr,
587 elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize),
588 sizeof(Elf_Phdr));
589 if ((phdr.p_type != PT_LOAD))
590 continue;
592 filesz = phdr.p_filesz;
593 memsz = phdr.p_memsz;
594 elfaddr = (unsigned long) elfbase + phdr.p_offset;
595 dom_mpaddr = phdr.p_paddr;
597 //printf("p_offset: %x, size=%x\n",elfaddr,filesz);
598 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
599 if (d == dom0) {
600 if (dom_mpaddr+memsz>dom0_size)
601 panic("Dom0 doesn't fit in memory space!\n");
602 dom_imva = __va_ul(dom_mpaddr + dom0_start);
603 memcpy((void *)dom_imva, (void *)elfaddr, filesz);
604 if (memsz > filesz)
605 memset((void *)dom_imva+filesz, 0,
606 memsz-filesz);
607 //FIXME: This test for code seems to find a lot more than objdump -x does
608 if (phdr.p_flags & PF_X) {
609 privify_memory(dom_imva,filesz);
610 flush_icache_range (dom_imva, dom_imva+filesz);
611 }
612 }
613 else
614 #endif
615 while (memsz > 0) {
616 p = assign_new_domain_page(d,dom_mpaddr);
617 BUG_ON (unlikely(p == NULL));
618 dom_imva = __va_ul(page_to_maddr(p));
619 if (filesz > 0) {
620 if (filesz >= PAGE_SIZE)
621 memcpy((void *) dom_imva,
622 (void *) elfaddr,
623 PAGE_SIZE);
624 else {
625 // copy partial page
626 memcpy((void *) dom_imva,
627 (void *) elfaddr, filesz);
628 // zero the rest of page
629 memset((void *) dom_imva+filesz, 0,
630 PAGE_SIZE-filesz);
631 }
632 //FIXME: This test for code seems to find a lot more than objdump -x does
633 if (phdr.p_flags & PF_X) {
634 privify_memory(dom_imva,PAGE_SIZE);
635 flush_icache_range(dom_imva,
636 dom_imva+PAGE_SIZE);
637 }
638 }
639 else if (memsz > 0) {
640 /* always zero out entire page */
641 memset((void *) dom_imva, 0, PAGE_SIZE);
642 }
643 memsz -= PAGE_SIZE;
644 filesz -= PAGE_SIZE;
645 elfaddr += PAGE_SIZE;
646 dom_mpaddr += PAGE_SIZE;
647 }
648 }
649 }
651 void alloc_dom0(void)
652 {
653 /* Check dom0 size. */
654 if (dom0_size < 4 * 1024 * 1024) {
655 panic("dom0_mem is too small, boot aborted"
656 " (try e.g. dom0_mem=256M or dom0_mem=65536K)\n");
657 }
659 /* Check dom0 align. */
660 if ((dom0_align - 1) & dom0_align) { /* not a power of two */
661 panic("dom0_align (%lx) must be power of two, boot aborted"
662 " (try e.g. dom0_align=256M or dom0_align=65536K)\n",
663 dom0_align);
664 }
665 if (dom0_align < PAGE_SIZE) {
666 panic("dom0_align must be >= %ld, boot aborted"
667 " (try e.g. dom0_align=256M or dom0_align=65536K)\n",
668 PAGE_SIZE);
669 }
670 if (dom0_size % dom0_align) {
671 dom0_size = (dom0_size / dom0_align + 1) * dom0_align;
672 printf("dom0_size rounded up to %ld, due to dom0_align=%lx\n",
673 dom0_size,dom0_align);
674 }
676 if (running_on_sim) {
677 dom0_size = 128*1024*1024; //FIXME: Should be configurable
678 }
679 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
680 printf("alloc_dom0: starting (initializing %lu MB...)\n",dom0_size/(1024*1024));
682 /* FIXME: The first trunk (say 256M) should always be assigned to
683 * Dom0, since Dom0's physical == machine address for DMA purpose.
684 * Some old version linux, like 2.4, assumes physical memory existing
685 * in 2nd 64M space.
686 */
687 dom0_start = alloc_boot_pages(dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT);
688 dom0_start <<= PAGE_SHIFT;
689 if (!dom0_start) {
690 panic("alloc_dom0: can't allocate contiguous memory size=%lu\n",
691 dom0_size);
692 }
693 printf("alloc_dom0: dom0_start=0x%lx\n", dom0_start);
694 #else
695 // no need to allocate pages for now
696 // pages are allocated by map_new_domain_page() via loaddomainelfimage()
697 dom0_start = 0;
698 #endif
700 }
703 /*
704 * Domain 0 has direct access to all devices absolutely. However
705 * the major point of this stub here, is to allow alloc_dom_mem
706 * handled with order > 0 request. Dom0 requires that bit set to
707 * allocate memory for other domains.
708 */
709 static void physdev_init_dom0(struct domain *d)
710 {
711 if (iomem_permit_access(d, 0UL, ~0UL))
712 BUG();
713 if (irqs_permit_access(d, 0, NR_IRQS-1))
714 BUG();
715 }
717 int construct_dom0(struct domain *d,
718 unsigned long image_start, unsigned long image_len,
719 unsigned long initrd_start, unsigned long initrd_len,
720 char *cmdline)
721 {
722 int i, rc;
723 unsigned long alloc_start, alloc_end;
724 start_info_t *si;
725 struct vcpu *v = d->vcpu[0];
726 unsigned long max_pages;
728 struct domain_setup_info dsi;
729 unsigned long p_start;
730 unsigned long pkern_start;
731 unsigned long pkern_entry;
732 unsigned long pkern_end;
733 unsigned long pinitrd_start = 0;
734 unsigned long pstart_info;
735 unsigned long cmdline_addr;
736 struct page_info *start_info_page;
738 #ifdef VALIDATE_VT
739 unsigned int vmx_dom0 = 0;
740 unsigned long mfn;
741 struct page_info *page = NULL;
742 #endif
744 //printf("construct_dom0: starting\n");
746 /* Sanity! */
747 BUG_ON(d != dom0);
748 BUG_ON(d->vcpu[0] == NULL);
749 BUG_ON(test_bit(_VCPUF_initialised, &v->vcpu_flags));
751 memset(&dsi, 0, sizeof(struct domain_setup_info));
753 printk("*** LOADING DOMAIN 0 ***\n");
755 alloc_start = dom0_start;
756 alloc_end = dom0_start + dom0_size;
757 max_pages = dom0_size / PAGE_SIZE;
758 d->max_pages = max_pages;
759 #ifndef CONFIG_XEN_IA64_DOM0_VP
760 d->tot_pages = d->max_pages;
761 #else
762 d->tot_pages = 0;
763 #endif
764 dsi.image_addr = (unsigned long)image_start;
765 dsi.image_len = image_len;
766 rc = parseelfimage(&dsi);
767 if ( rc != 0 )
768 return rc;
770 #ifdef VALIDATE_VT
771 /* Temp workaround */
772 if (running_on_sim)
773 dsi.xen_section_string = (char *)1;
775 /* Check whether dom0 is vti domain */
776 if ((!vmx_enabled) && !dsi.xen_section_string) {
777 printk("Lack of hardware support for unmodified vmx dom0\n");
778 panic("");
779 }
781 if (vmx_enabled && !dsi.xen_section_string) {
782 printk("Dom0 is vmx domain!\n");
783 vmx_dom0 = 1;
784 }
785 #endif
787 p_start = dsi.v_start;
788 pkern_start = dsi.v_kernstart;
789 pkern_end = dsi.v_kernend;
790 pkern_entry = dsi.v_kernentry;
792 //printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry);
794 if ( (p_start & (PAGE_SIZE-1)) != 0 )
795 {
796 printk("Initial guest OS must load to a page boundary.\n");
797 return -EINVAL;
798 }
800 pstart_info = PAGE_ALIGN(pkern_end);
801 if(initrd_start && initrd_len){
802 unsigned long offset;
804 pinitrd_start= (dom0_start + dom0_size) -
805 (PAGE_ALIGN(initrd_len) + 4*1024*1024);
806 if (pinitrd_start <= pstart_info)
807 panic("%s:enough memory is not assigned to dom0", __func__);
809 for (offset = 0; offset < initrd_len; offset += PAGE_SIZE) {
810 struct page_info *p;
811 p = assign_new_domain_page(d, pinitrd_start + offset);
812 if (p == NULL)
813 panic("%s: can't allocate page for initrd image", __func__);
814 if (initrd_len < offset + PAGE_SIZE)
815 memcpy(page_to_virt(p), (void*)(initrd_start + offset),
816 initrd_len - offset);
817 else
818 copy_page(page_to_virt(p), (void*)(initrd_start + offset));
819 }
820 }
822 printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
823 " Kernel image: %lx->%lx\n"
824 " Entry address: %lx\n"
825 " Init. ramdisk: %lx len %lx\n"
826 " Start info.: %lx->%lx\n",
827 pkern_start, pkern_end, pkern_entry, pinitrd_start, initrd_len,
828 pstart_info, pstart_info + PAGE_SIZE);
830 if ( (pkern_end - pkern_start) > (max_pages * PAGE_SIZE) )
831 {
832 printk("Initial guest OS requires too much space\n"
833 "(%luMB is greater than %luMB limit)\n",
834 (pkern_end-pkern_start)>>20,
835 (max_pages <<PAGE_SHIFT)>>20);
836 return -ENOMEM;
837 }
839 // if high 3 bits of pkern start are non-zero, error
841 // if pkern end is after end of metaphysical memory, error
842 // (we should be able to deal with this... later)
844 /* Mask all upcalls... */
845 for ( i = 1; i < MAX_VIRT_CPUS; i++ )
846 d->shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
848 if (dom0_max_vcpus == 0)
849 dom0_max_vcpus = MAX_VIRT_CPUS;
850 if (dom0_max_vcpus > num_online_cpus())
851 dom0_max_vcpus = num_online_cpus();
852 if (dom0_max_vcpus > MAX_VIRT_CPUS)
853 dom0_max_vcpus = MAX_VIRT_CPUS;
855 printf ("Dom0 max_vcpus=%d\n", dom0_max_vcpus);
856 for ( i = 1; i < dom0_max_vcpus; i++ )
857 if (alloc_vcpu(d, i, i) == NULL)
858 printf ("Cannot allocate dom0 vcpu %d\n", i);
860 #if defined(VALIDATE_VT) && !defined(CONFIG_XEN_IA64_DOM0_VP)
861 /* Construct a frame-allocation list for the initial domain, since these
862 * pages are allocated by boot allocator and pfns are not set properly
863 */
864 for ( mfn = (alloc_start>>PAGE_SHIFT);
865 mfn < (alloc_end>>PAGE_SHIFT);
866 mfn++ )
867 {
868 page = mfn_to_page(mfn);
869 page_set_owner(page, d);
870 page->u.inuse.type_info = 0;
871 page->count_info = PGC_allocated | 1;
872 list_add_tail(&page->list, &d->page_list);
874 /* Construct 1:1 mapping */
875 set_gpfn_from_mfn(mfn, mfn);
876 }
877 #endif
879 /* Copy the OS image. */
880 loaddomainelfimage(d,image_start);
882 /* Copy the initial ramdisk. */
883 //if ( initrd_len != 0 )
884 // memcpy((void *)vinitrd_start, initrd_start, initrd_len);
886 d->shared_info->arch.flags = SIF_INITDOMAIN|SIF_PRIVILEGED;
888 /* Set up start info area. */
889 d->shared_info->arch.start_info_pfn = pstart_info >> PAGE_SHIFT;
890 start_info_page = assign_new_domain_page(d, pstart_info);
891 if (start_info_page == NULL)
892 panic("can't allocate start info page");
893 si = page_to_virt(start_info_page);
894 memset(si, 0, PAGE_SIZE);
895 sprintf(si->magic, "xen-%i.%i-ia64", XEN_VERSION, XEN_SUBVERSION);
896 si->nr_pages = max_pages;
898 console_endboot();
900 printk("Dom0: 0x%lx\n", (u64)dom0);
902 #ifdef VALIDATE_VT
903 /* VMX specific construction for Dom0, if hardware supports VMX
904 * and Dom0 is unmodified image
905 */
906 if (vmx_dom0)
907 vmx_final_setup_guest(v);
908 #endif
910 set_bit(_VCPUF_initialised, &v->vcpu_flags);
912 cmdline_addr = dom_fw_setup(d, dom0_command_line, COMMAND_LINE_SIZE);
914 vcpu_init_regs (v);
916 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
917 pkern_entry += dom0_start;
918 #endif
919 vcpu_regs (v)->cr_iip = pkern_entry;
920 vcpu_regs (v)->r28 = cmdline_addr;
922 physdev_init_dom0(d);
924 // FIXME: Hack for keyboard input
925 //serial_input_init();
927 return 0;
928 }
930 void machine_restart(char * __unused)
931 {
932 console_start_sync();
933 if (running_on_sim)
934 printf ("machine_restart called. spinning...\n");
935 else
936 (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
937 while(1);
938 }
940 void machine_halt(void)
941 {
942 console_start_sync();
943 if (running_on_sim)
944 printf ("machine_halt called. spinning...\n");
945 else
946 (*efi.reset_system)(EFI_RESET_SHUTDOWN,0,0,NULL);
947 while(1);
948 }
950 void sync_vcpu_execstate(struct vcpu *v)
951 {
952 // __ia64_save_fpu(v->arch._thread.fph);
953 // if (VMX_DOMAIN(v))
954 // vmx_save_state(v);
955 // FIXME SMP: Anything else needed here for SMP?
956 }
958 static void parse_dom0_mem(char *s)
959 {
960 dom0_size = parse_size_and_unit(s);
961 }
962 custom_param("dom0_mem", parse_dom0_mem);
965 static void parse_dom0_align(char *s)
966 {
967 dom0_align = parse_size_and_unit(s);
968 }
969 custom_param("dom0_align", parse_dom0_align);