direct-io.hg

view xen/arch/ia64/xen/domain.c @ 10799:d7a511069a32

[IA64] remove duplicated console_endboot().

There are two. one is in construct_dom0() and another is in start_kernel()
Like x86, left the one in start_kernel()

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author awilliam@xenbuild.aw
date Mon Jul 10 14:12:47 2006 -0600 (2006-07-10)
parents 8ad37880564d
children 306d7857928c
line source
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * Copyright (C) 2005 Intel Co
8 * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
9 *
10 * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add VTI domain support
11 *
12 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
13 * VA Linux Systems Japan K.K.
14 * dom0 vp model support
15 */
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/lib.h>
20 #include <xen/errno.h>
21 #include <xen/sched.h>
22 #include <xen/smp.h>
23 #include <xen/delay.h>
24 #include <xen/softirq.h>
25 #include <xen/mm.h>
26 #include <xen/iocap.h>
27 #include <asm/asm-xsi-offsets.h>
28 #include <asm/ptrace.h>
29 #include <asm/system.h>
30 #include <asm/io.h>
31 #include <asm/processor.h>
32 #include <asm/desc.h>
33 #include <asm/hw_irq.h>
34 #include <asm/setup.h>
35 //#include <asm/mpspec.h>
36 #include <xen/irq.h>
37 #include <xen/event.h>
38 //#include <xen/shadow.h>
39 #include <xen/console.h>
40 #include <xen/compile.h>
42 #include <xen/elf.h>
43 //#include <asm/page.h>
44 #include <asm/pgalloc.h>
46 #include <asm/offsets.h> /* for IA64_THREAD_INFO_SIZE */
48 #include <asm/vcpu.h> /* for function declarations */
49 #include <public/arch-ia64.h>
50 #include <xen/domain.h>
51 #include <asm/vmx.h>
52 #include <asm/vmx_vcpu.h>
53 #include <asm/vmx_vpd.h>
54 #include <asm/vmx_phy_mode.h>
55 #include <asm/pal.h>
56 #include <asm/vhpt.h>
57 #include <public/hvm/ioreq.h>
58 #include <public/arch-ia64.h>
59 #include <asm/tlbflush.h>
60 #include <asm/regionreg.h>
61 #include <asm/dom_fw.h>
62 #include <asm/privop_stat.h>
64 #ifndef CONFIG_XEN_IA64_DOM0_VP
65 #define CONFIG_DOMAIN0_CONTIGUOUS
66 #endif
67 unsigned long dom0_start = -1L;
68 unsigned long dom0_size = 512*1024*1024;
69 unsigned long dom0_align = 64*1024*1024;
71 /* dom0_max_vcpus: maximum number of VCPUs to create for dom0. */
72 static unsigned int dom0_max_vcpus = 1;
73 integer_param("dom0_max_vcpus", dom0_max_vcpus);
75 extern unsigned long running_on_sim;
77 extern char dom0_command_line[];
79 /* FIXME: where these declarations should be there ? */
80 extern void serial_input_init(void);
81 static void init_switch_stack(struct vcpu *v);
82 extern void vmx_do_launch(struct vcpu *);
84 /* this belongs in include/asm, but there doesn't seem to be a suitable place */
85 extern struct vcpu *ia64_switch_to (struct vcpu *next_task);
87 /* Address of vpsr.i (in fact evtchn_upcall_mask) of current vcpu.
88 This is a Xen virtual address. */
89 DEFINE_PER_CPU(uint8_t *, current_psr_i_addr);
90 DEFINE_PER_CPU(int *, current_psr_ic_addr);
92 #include <xen/sched-if.h>
94 static void flush_vtlb_for_context_switch(struct vcpu* vcpu)
95 {
96 int cpu = smp_processor_id();
97 int last_vcpu_id = vcpu->domain->arch.last_vcpu[cpu].vcpu_id;
98 int last_processor = vcpu->arch.last_processor;
100 if (is_idle_domain(vcpu->domain))
101 return;
103 vcpu->domain->arch.last_vcpu[cpu].vcpu_id = vcpu->vcpu_id;
104 vcpu->arch.last_processor = cpu;
106 if ((last_vcpu_id != vcpu->vcpu_id &&
107 last_vcpu_id != INVALID_VCPU_ID) ||
108 (last_vcpu_id == vcpu->vcpu_id &&
109 last_processor != cpu &&
110 last_processor != INVALID_PROCESSOR)) {
112 // if the vTLB implementation was changed,
113 // the followings must be updated either.
114 if (VMX_DOMAIN(vcpu)) {
115 // currently vTLB for vt-i domian is per vcpu.
116 // so any flushing isn't needed.
117 } else {
118 vhpt_flush();
119 }
120 local_flush_tlb_all();
121 }
122 }
124 void schedule_tail(struct vcpu *prev)
125 {
126 extern char ia64_ivt;
127 context_saved(prev);
129 if (VMX_DOMAIN(current)) {
130 vmx_do_launch(current);
131 } else {
132 ia64_set_iva(&ia64_ivt);
133 ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
134 VHPT_ENABLED);
135 load_region_regs(current);
136 vcpu_load_kernel_regs(current);
137 __ia64_per_cpu_var(current_psr_i_addr) = &current->domain->
138 shared_info->vcpu_info[current->vcpu_id].evtchn_upcall_mask;
139 __ia64_per_cpu_var(current_psr_ic_addr) = (int *)
140 (current->domain->arch.shared_info_va + XSI_PSR_IC_OFS);
141 }
142 flush_vtlb_for_context_switch(current);
143 }
145 void context_switch(struct vcpu *prev, struct vcpu *next)
146 {
147 uint64_t spsr;
148 uint64_t pta;
150 local_irq_save(spsr);
151 context_switch_count++;
153 __ia64_save_fpu(prev->arch._thread.fph);
154 __ia64_load_fpu(next->arch._thread.fph);
155 if (VMX_DOMAIN(prev))
156 vmx_save_state(prev);
157 if (VMX_DOMAIN(next))
158 vmx_load_state(next);
159 /*ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);*/
160 prev = ia64_switch_to(next);
162 /* Note: ia64_switch_to does not return here at vcpu initialization. */
164 //cpu_set(smp_processor_id(), current->domain->domain_dirty_cpumask);
166 // leave this debug for now: it acts as a heartbeat when more than
167 // one domain is active
168 {
169 static long cnt[16] = { 50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50};
170 static int i = 100;
171 int id = ((struct vcpu *)current)->domain->domain_id & 0xf;
172 if (!cnt[id]--) { cnt[id] = 500000; printk("%x",id); }
173 if (!i--) { i = 1000000; printk("+"); }
174 }
176 if (VMX_DOMAIN(current)){
177 vmx_load_all_rr(current);
178 } else {
179 struct domain *nd;
180 extern char ia64_ivt;
182 ia64_set_iva(&ia64_ivt);
184 nd = current->domain;
185 if (!is_idle_domain(nd)) {
186 ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
187 VHPT_ENABLED);
188 load_region_regs(current);
189 vcpu_load_kernel_regs(current);
190 vcpu_set_next_timer(current);
191 if (vcpu_timer_expired(current))
192 vcpu_pend_timer(current);
193 __ia64_per_cpu_var(current_psr_i_addr) = &nd->shared_info->
194 vcpu_info[current->vcpu_id].evtchn_upcall_mask;
195 __ia64_per_cpu_var(current_psr_ic_addr) =
196 (int *)(nd->arch.shared_info_va + XSI_PSR_IC_OFS);
197 } else {
198 /* When switching to idle domain, only need to disable vhpt
199 * walker. Then all accesses happen within idle context will
200 * be handled by TR mapping and identity mapping.
201 */
202 pta = ia64_get_pta();
203 ia64_set_pta(pta & ~VHPT_ENABLED);
204 __ia64_per_cpu_var(current_psr_i_addr) = NULL;
205 __ia64_per_cpu_var(current_psr_ic_addr) = NULL;
206 }
207 }
208 flush_vtlb_for_context_switch(current);
209 local_irq_restore(spsr);
210 context_saved(prev);
211 }
213 void continue_running(struct vcpu *same)
214 {
215 /* nothing to do */
216 }
218 static void default_idle(void)
219 {
220 local_irq_disable();
221 if ( !softirq_pending(smp_processor_id()) )
222 safe_halt();
223 local_irq_enable();
224 }
226 static void continue_cpu_idle_loop(void)
227 {
228 for ( ; ; )
229 {
230 #ifdef IA64
231 // __IRQ_STAT(cpu, idle_timestamp) = jiffies
232 #else
233 irq_stat[cpu].idle_timestamp = jiffies;
234 #endif
235 while ( !softirq_pending(smp_processor_id()) )
236 default_idle();
237 raise_softirq(SCHEDULE_SOFTIRQ);
238 do_softirq();
239 }
240 }
242 void startup_cpu_idle_loop(void)
243 {
244 /* Just some sanity to ensure that the scheduler is set up okay. */
245 ASSERT(current->domain->domain_id == IDLE_DOMAIN_ID);
246 raise_softirq(SCHEDULE_SOFTIRQ);
248 continue_cpu_idle_loop();
249 }
251 void hlt_timer_fn(void *data)
252 {
253 struct vcpu *v = data;
254 if (vcpu_timer_expired(v))
255 vcpu_pend_timer(v);
256 vcpu_unblock(v);
257 }
259 struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
260 {
261 struct vcpu *v;
262 struct thread_info *ti;
264 /* Still keep idle vcpu0 static allocated at compilation, due
265 * to some code from Linux still requires it in early phase.
266 */
267 if (is_idle_domain(d) && !vcpu_id)
268 v = idle_vcpu[0];
269 else {
270 if ((v = alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER)) == NULL)
271 return NULL;
272 memset(v, 0, sizeof(*v));
274 ti = alloc_thread_info(v);
275 /* Clear thread_info to clear some important fields, like
276 * preempt_count
277 */
278 memset(ti, 0, sizeof(struct thread_info));
279 init_switch_stack(v);
280 }
282 if (!is_idle_domain(d)) {
283 if (!d->arch.is_vti) {
284 /* Create privregs page only if not VTi. */
285 v->arch.privregs =
286 alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
287 BUG_ON(v->arch.privregs == NULL);
288 memset(v->arch.privregs, 0, PAGE_SIZE);
289 share_xen_page_with_guest(virt_to_page(v->arch.privregs),
290 d, XENSHARE_writable);
291 }
293 v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
294 v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4;
295 v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
296 v->arch.metaphysical_saved_rr4 = d->arch.metaphysical_rr4;
298 /* Is it correct ?
299 It depends on the domain rid usage.
301 A domain may share rid among its processor (eg having a
302 global VHPT). In this case, we should also share rid
303 among vcpus and the rid range should be the same.
305 However a domain may have per cpu rid allocation. In
306 this case we don't want to share rid among vcpus, but we may
307 do it if two vcpus are on the same cpu... */
309 v->arch.starting_rid = d->arch.starting_rid;
310 v->arch.ending_rid = d->arch.ending_rid;
311 v->arch.breakimm = d->arch.breakimm;
312 v->arch.last_processor = INVALID_PROCESSOR;
313 }
314 if (!VMX_DOMAIN(v))
315 init_timer(&v->arch.hlt_timer, hlt_timer_fn, v, v->processor);
317 return v;
318 }
320 void free_vcpu_struct(struct vcpu *v)
321 {
322 if (VMX_DOMAIN(v))
323 vmx_relinquish_vcpu_resources(v);
324 else {
325 if (v->arch.privregs != NULL)
326 free_xenheap_pages(v->arch.privregs,
327 get_order_from_shift(XMAPPEDREGS_SHIFT));
328 kill_timer(&v->arch.hlt_timer);
329 }
331 free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
332 }
334 static void init_switch_stack(struct vcpu *v)
335 {
336 struct pt_regs *regs = vcpu_regs (v);
337 struct switch_stack *sw = (struct switch_stack *) regs - 1;
338 extern void ia64_ret_from_clone;
340 memset(sw, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs));
341 sw->ar_bspstore = (unsigned long)v + IA64_RBS_OFFSET;
342 sw->b0 = (unsigned long) &ia64_ret_from_clone;
343 sw->ar_fpsr = FPSR_DEFAULT;
344 v->arch._thread.ksp = (unsigned long) sw - 16;
345 // stay on kernel stack because may get interrupts!
346 // ia64_ret_from_clone switches to user stack
347 v->arch._thread.on_ustack = 0;
348 memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
349 }
351 int arch_domain_create(struct domain *d)
352 {
353 int i;
355 // the following will eventually need to be negotiated dynamically
356 d->arch.shared_info_va = DEFAULT_SHAREDINFO_ADDR;
357 d->arch.breakimm = 0x1000;
358 for (i = 0; i < NR_CPUS; i++) {
359 d->arch.last_vcpu[i].vcpu_id = INVALID_VCPU_ID;
360 }
362 if (is_idle_domain(d))
363 return 0;
365 d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT));
366 if (d->shared_info == NULL)
367 goto fail_nomem;
368 memset(d->shared_info, 0, XSI_SIZE);
369 for (i = 0; i < XSI_SIZE; i += PAGE_SIZE)
370 share_xen_page_with_guest(virt_to_page((char *)d->shared_info + i),
371 d, XENSHARE_writable);
373 d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
374 /* We may also need emulation rid for region4, though it's unlikely
375 * to see guest issue uncacheable access in metaphysical mode. But
376 * keep such info here may be more sane.
377 */
378 if (!allocate_rid_range(d,0))
379 goto fail_nomem;
381 memset(&d->arch.mm, 0, sizeof(d->arch.mm));
383 if ((d->arch.mm.pgd = pgd_alloc(&d->arch.mm)) == NULL)
384 goto fail_nomem;
386 printf ("arch_domain_create: domain=%p\n", d);
387 return 0;
389 fail_nomem:
390 if (d->arch.mm.pgd != NULL)
391 pgd_free(d->arch.mm.pgd);
392 if (d->shared_info != NULL)
393 free_xenheap_pages(d->shared_info, get_order_from_shift(XSI_SHIFT));
394 return -ENOMEM;
395 }
397 void arch_domain_destroy(struct domain *d)
398 {
399 BUG_ON(d->arch.mm.pgd != NULL);
400 if (d->shared_info != NULL)
401 free_xenheap_pages(d->shared_info, get_order_from_shift(XSI_SHIFT));
403 domain_flush_destroy (d);
405 deallocate_rid_range(d);
406 }
408 void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
409 {
410 c->user_regs = *vcpu_regs (v);
411 c->privregs_pfn = virt_to_maddr(v->arch.privregs) >> PAGE_SHIFT;
412 }
414 int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
415 {
416 struct pt_regs *regs = vcpu_regs (v);
417 struct domain *d = v->domain;
419 *regs = c->user_regs;
421 if (!d->arch.is_vti) {
422 /* domain runs at PL2/3 */
423 regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT;
424 regs->ar_rsc |= (2 << 2); /* force PL2/3 */
425 }
427 if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
428 return 0;
429 if (d->arch.is_vti)
430 vmx_final_setup_guest(v);
432 /* This overrides some registers. */
433 vcpu_init_regs(v);
435 /* Don't redo final setup */
436 set_bit(_VCPUF_initialised, &v->vcpu_flags);
437 return 0;
438 }
440 static void relinquish_memory(struct domain *d, struct list_head *list)
441 {
442 struct list_head *ent;
443 struct page_info *page;
444 #ifndef __ia64__
445 unsigned long x, y;
446 #endif
448 /* Use a recursive lock, as we may enter 'free_domheap_page'. */
449 spin_lock_recursive(&d->page_alloc_lock);
450 ent = list->next;
451 while ( ent != list )
452 {
453 page = list_entry(ent, struct page_info, list);
454 /* Grab a reference to the page so it won't disappear from under us. */
455 if ( unlikely(!get_page(page, d)) )
456 {
457 /* Couldn't get a reference -- someone is freeing this page. */
458 ent = ent->next;
459 continue;
460 }
462 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
463 put_page_and_type(page);
465 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
466 put_page(page);
468 #ifndef __ia64__
469 /*
470 * Forcibly invalidate base page tables at this point to break circular
471 * 'linear page table' references. This is okay because MMU structures
472 * are not shared across domains and this domain is now dead. Thus base
473 * tables are not in use so a non-zero count means circular reference.
474 */
475 y = page->u.inuse.type_info;
476 for ( ; ; )
477 {
478 x = y;
479 if ( likely((x & (PGT_type_mask|PGT_validated)) !=
480 (PGT_base_page_table|PGT_validated)) )
481 break;
483 y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
484 if ( likely(y == x) )
485 {
486 free_page_type(page, PGT_base_page_table);
487 break;
488 }
489 }
490 #endif
492 /* Follow the list chain and /then/ potentially free the page. */
493 ent = ent->next;
494 #ifdef CONFIG_XEN_IA64_DOM0_VP
495 BUG_ON(get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY);
496 #endif
497 put_page(page);
498 }
500 spin_unlock_recursive(&d->page_alloc_lock);
501 }
503 void domain_relinquish_resources(struct domain *d)
504 {
505 /* Relinquish every page of memory. */
507 // relase page traversing d->arch.mm.
508 relinquish_mm(d);
510 relinquish_memory(d, &d->xenpage_list);
511 relinquish_memory(d, &d->page_list);
513 if (d->arch.is_vti && d->arch.sal_data)
514 xfree(d->arch.sal_data);
515 }
517 void build_physmap_table(struct domain *d)
518 {
519 struct list_head *list_ent = d->page_list.next;
520 unsigned long mfn, i = 0;
522 while(list_ent != &d->page_list) {
523 mfn = page_to_mfn(list_entry(
524 list_ent, struct page_info, list));
525 assign_domain_page(d, i << PAGE_SHIFT, mfn << PAGE_SHIFT);
527 i++;
528 list_ent = mfn_to_page(mfn)->list.next;
529 }
530 }
532 unsigned long
533 domain_set_shared_info_va (unsigned long va)
534 {
535 struct vcpu *v = current;
536 struct domain *d = v->domain;
537 struct vcpu *v1;
539 /* Check virtual address:
540 must belong to region 7,
541 must be 64Kb aligned,
542 must not be within Xen virtual space. */
543 if ((va >> 61) != 7
544 || (va & 0xffffUL) != 0
545 || (va >= HYPERVISOR_VIRT_START && va < HYPERVISOR_VIRT_END))
546 panic_domain (NULL, "%s: bad va (0x%016lx)\n", __func__, va);
548 /* Note: this doesn't work well if other cpus are already running.
549 However this is part of the spec :-) */
550 printf ("Domain set shared_info_va to 0x%016lx\n", va);
551 d->arch.shared_info_va = va;
553 for_each_vcpu (d, v1) {
554 VCPU(v1, interrupt_mask_addr) =
555 (unsigned char *)va + INT_ENABLE_OFFSET(v1);
556 }
558 __ia64_per_cpu_var(current_psr_ic_addr) = (int *)(va + XSI_PSR_IC_OFS);
560 /* Remap the shared pages. */
561 set_one_rr (7UL << 61, PSCB(v,rrs[7]));
563 return 0;
564 }
567 // remove following line if not privifying in memory
568 //#define HAVE_PRIVIFY_MEMORY
569 #ifndef HAVE_PRIVIFY_MEMORY
570 #define privify_memory(x,y) do {} while(0)
571 #endif
573 // see arch/x86/xxx/domain_build.c
574 int elf_sanity_check(Elf_Ehdr *ehdr)
575 {
576 if (!(IS_ELF(*ehdr)))
577 {
578 printk("DOM0 image is not a Xen-compatible Elf image.\n");
579 return 0;
580 }
581 return 1;
582 }
584 static void loaddomainelfimage(struct domain *d, unsigned long image_start)
585 {
586 char *elfbase = (char *) image_start;
587 Elf_Ehdr ehdr;
588 Elf_Phdr phdr;
589 int h, filesz, memsz;
590 unsigned long elfaddr, dom_mpaddr, dom_imva;
591 struct page_info *p;
593 memcpy(&ehdr, (void *) image_start, sizeof(Elf_Ehdr));
594 for ( h = 0; h < ehdr.e_phnum; h++ ) {
595 memcpy(&phdr,
596 elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize),
597 sizeof(Elf_Phdr));
598 if ((phdr.p_type != PT_LOAD))
599 continue;
601 filesz = phdr.p_filesz;
602 memsz = phdr.p_memsz;
603 elfaddr = (unsigned long) elfbase + phdr.p_offset;
604 dom_mpaddr = phdr.p_paddr;
606 //printf("p_offset: %x, size=%x\n",elfaddr,filesz);
607 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
608 if (d == dom0) {
609 if (dom_mpaddr+memsz>dom0_size)
610 panic("Dom0 doesn't fit in memory space!\n");
611 dom_imva = __va_ul(dom_mpaddr + dom0_start);
612 memcpy((void *)dom_imva, (void *)elfaddr, filesz);
613 if (memsz > filesz)
614 memset((void *)dom_imva+filesz, 0,
615 memsz-filesz);
616 //FIXME: This test for code seems to find a lot more than objdump -x does
617 if (phdr.p_flags & PF_X) {
618 privify_memory(dom_imva,filesz);
619 flush_icache_range (dom_imva, dom_imva+filesz);
620 }
621 }
622 else
623 #endif
624 while (memsz > 0) {
625 p = assign_new_domain_page(d,dom_mpaddr);
626 BUG_ON (unlikely(p == NULL));
627 dom_imva = __va_ul(page_to_maddr(p));
628 if (filesz > 0) {
629 if (filesz >= PAGE_SIZE)
630 memcpy((void *) dom_imva,
631 (void *) elfaddr,
632 PAGE_SIZE);
633 else {
634 // copy partial page
635 memcpy((void *) dom_imva,
636 (void *) elfaddr, filesz);
637 // zero the rest of page
638 memset((void *) dom_imva+filesz, 0,
639 PAGE_SIZE-filesz);
640 }
641 //FIXME: This test for code seems to find a lot more than objdump -x does
642 if (phdr.p_flags & PF_X) {
643 privify_memory(dom_imva,PAGE_SIZE);
644 flush_icache_range(dom_imva,
645 dom_imva+PAGE_SIZE);
646 }
647 }
648 else if (memsz > 0) {
649 /* always zero out entire page */
650 memset((void *) dom_imva, 0, PAGE_SIZE);
651 }
652 memsz -= PAGE_SIZE;
653 filesz -= PAGE_SIZE;
654 elfaddr += PAGE_SIZE;
655 dom_mpaddr += PAGE_SIZE;
656 }
657 }
658 }
660 void alloc_dom0(void)
661 {
662 /* Check dom0 size. */
663 if (dom0_size < 4 * 1024 * 1024) {
664 panic("dom0_mem is too small, boot aborted"
665 " (try e.g. dom0_mem=256M or dom0_mem=65536K)\n");
666 }
668 /* Check dom0 align. */
669 if ((dom0_align - 1) & dom0_align) { /* not a power of two */
670 panic("dom0_align (%lx) must be power of two, boot aborted"
671 " (try e.g. dom0_align=256M or dom0_align=65536K)\n",
672 dom0_align);
673 }
674 if (dom0_align < PAGE_SIZE) {
675 panic("dom0_align must be >= %ld, boot aborted"
676 " (try e.g. dom0_align=256M or dom0_align=65536K)\n",
677 PAGE_SIZE);
678 }
679 if (dom0_size % dom0_align) {
680 dom0_size = (dom0_size / dom0_align + 1) * dom0_align;
681 printf("dom0_size rounded up to %ld, due to dom0_align=%lx\n",
682 dom0_size,dom0_align);
683 }
685 if (running_on_sim) {
686 dom0_size = 128*1024*1024; //FIXME: Should be configurable
687 }
688 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
689 printf("alloc_dom0: starting (initializing %lu MB...)\n",dom0_size/(1024*1024));
691 /* FIXME: The first trunk (say 256M) should always be assigned to
692 * Dom0, since Dom0's physical == machine address for DMA purpose.
693 * Some old version linux, like 2.4, assumes physical memory existing
694 * in 2nd 64M space.
695 */
696 dom0_start = alloc_boot_pages(dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT);
697 dom0_start <<= PAGE_SHIFT;
698 if (!dom0_start) {
699 panic("alloc_dom0: can't allocate contiguous memory size=%lu\n",
700 dom0_size);
701 }
702 printf("alloc_dom0: dom0_start=0x%lx\n", dom0_start);
703 #else
704 // no need to allocate pages for now
705 // pages are allocated by map_new_domain_page() via loaddomainelfimage()
706 dom0_start = 0;
707 #endif
709 }
712 /*
713 * Domain 0 has direct access to all devices absolutely. However
714 * the major point of this stub here, is to allow alloc_dom_mem
715 * handled with order > 0 request. Dom0 requires that bit set to
716 * allocate memory for other domains.
717 */
718 static void physdev_init_dom0(struct domain *d)
719 {
720 if (iomem_permit_access(d, 0UL, ~0UL))
721 BUG();
722 if (irqs_permit_access(d, 0, NR_IRQS-1))
723 BUG();
724 }
726 int construct_dom0(struct domain *d,
727 unsigned long image_start, unsigned long image_len,
728 unsigned long initrd_start, unsigned long initrd_len,
729 char *cmdline)
730 {
731 int i, rc;
732 unsigned long alloc_start, alloc_end;
733 start_info_t *si;
734 struct vcpu *v = d->vcpu[0];
735 unsigned long max_pages;
737 struct domain_setup_info dsi;
738 unsigned long p_start;
739 unsigned long pkern_start;
740 unsigned long pkern_entry;
741 unsigned long pkern_end;
742 unsigned long pinitrd_start = 0;
743 unsigned long pstart_info;
744 struct page_info *start_info_page;
745 unsigned long bp_mpa;
746 struct ia64_boot_param *bp;
748 #ifdef VALIDATE_VT
749 unsigned int vmx_dom0 = 0;
750 unsigned long mfn;
751 struct page_info *page = NULL;
752 #endif
754 //printf("construct_dom0: starting\n");
756 /* Sanity! */
757 BUG_ON(d != dom0);
758 BUG_ON(d->vcpu[0] == NULL);
759 BUG_ON(test_bit(_VCPUF_initialised, &v->vcpu_flags));
761 memset(&dsi, 0, sizeof(struct domain_setup_info));
763 printk("*** LOADING DOMAIN 0 ***\n");
765 alloc_start = dom0_start;
766 alloc_end = dom0_start + dom0_size;
767 max_pages = dom0_size / PAGE_SIZE;
768 d->max_pages = max_pages;
769 #ifndef CONFIG_XEN_IA64_DOM0_VP
770 d->tot_pages = d->max_pages;
771 #else
772 d->tot_pages = 0;
773 #endif
774 dsi.image_addr = (unsigned long)image_start;
775 dsi.image_len = image_len;
776 rc = parseelfimage(&dsi);
777 if ( rc != 0 )
778 return rc;
780 #ifdef VALIDATE_VT
781 /* Temp workaround */
782 if (running_on_sim)
783 dsi.xen_section_string = (char *)1;
785 /* Check whether dom0 is vti domain */
786 if ((!vmx_enabled) && !dsi.xen_section_string) {
787 printk("Lack of hardware support for unmodified vmx dom0\n");
788 panic("");
789 }
791 if (vmx_enabled && !dsi.xen_section_string) {
792 printk("Dom0 is vmx domain!\n");
793 vmx_dom0 = 1;
794 }
795 #endif
797 p_start = dsi.v_start;
798 pkern_start = dsi.v_kernstart;
799 pkern_end = dsi.v_kernend;
800 pkern_entry = dsi.v_kernentry;
802 //printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry);
804 if ( (p_start & (PAGE_SIZE-1)) != 0 )
805 {
806 printk("Initial guest OS must load to a page boundary.\n");
807 return -EINVAL;
808 }
810 pstart_info = PAGE_ALIGN(pkern_end);
811 if(initrd_start && initrd_len){
812 unsigned long offset;
814 pinitrd_start= (dom0_start + dom0_size) -
815 (PAGE_ALIGN(initrd_len) + 4*1024*1024);
816 if (pinitrd_start <= pstart_info)
817 panic("%s:enough memory is not assigned to dom0", __func__);
819 for (offset = 0; offset < initrd_len; offset += PAGE_SIZE) {
820 struct page_info *p;
821 p = assign_new_domain_page(d, pinitrd_start + offset);
822 if (p == NULL)
823 panic("%s: can't allocate page for initrd image", __func__);
824 if (initrd_len < offset + PAGE_SIZE)
825 memcpy(page_to_virt(p), (void*)(initrd_start + offset),
826 initrd_len - offset);
827 else
828 copy_page(page_to_virt(p), (void*)(initrd_start + offset));
829 }
830 }
832 printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
833 " Kernel image: %lx->%lx\n"
834 " Entry address: %lx\n"
835 " Init. ramdisk: %lx len %lx\n"
836 " Start info.: %lx->%lx\n",
837 pkern_start, pkern_end, pkern_entry, pinitrd_start, initrd_len,
838 pstart_info, pstart_info + PAGE_SIZE);
840 if ( (pkern_end - pkern_start) > (max_pages * PAGE_SIZE) )
841 {
842 printk("Initial guest OS requires too much space\n"
843 "(%luMB is greater than %luMB limit)\n",
844 (pkern_end-pkern_start)>>20,
845 (max_pages <<PAGE_SHIFT)>>20);
846 return -ENOMEM;
847 }
849 // if high 3 bits of pkern start are non-zero, error
851 // if pkern end is after end of metaphysical memory, error
852 // (we should be able to deal with this... later)
854 /* Mask all upcalls... */
855 for ( i = 1; i < MAX_VIRT_CPUS; i++ )
856 d->shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
858 if (dom0_max_vcpus == 0)
859 dom0_max_vcpus = MAX_VIRT_CPUS;
860 if (dom0_max_vcpus > num_online_cpus())
861 dom0_max_vcpus = num_online_cpus();
862 if (dom0_max_vcpus > MAX_VIRT_CPUS)
863 dom0_max_vcpus = MAX_VIRT_CPUS;
865 printf ("Dom0 max_vcpus=%d\n", dom0_max_vcpus);
866 for ( i = 1; i < dom0_max_vcpus; i++ )
867 if (alloc_vcpu(d, i, i) == NULL)
868 printf ("Cannot allocate dom0 vcpu %d\n", i);
870 #if defined(VALIDATE_VT) && !defined(CONFIG_XEN_IA64_DOM0_VP)
871 /* Construct a frame-allocation list for the initial domain, since these
872 * pages are allocated by boot allocator and pfns are not set properly
873 */
874 for ( mfn = (alloc_start>>PAGE_SHIFT);
875 mfn < (alloc_end>>PAGE_SHIFT);
876 mfn++ )
877 {
878 page = mfn_to_page(mfn);
879 page_set_owner(page, d);
880 page->u.inuse.type_info = 0;
881 page->count_info = PGC_allocated | 1;
882 list_add_tail(&page->list, &d->page_list);
884 /* Construct 1:1 mapping */
885 set_gpfn_from_mfn(mfn, mfn);
886 }
887 #endif
889 /* Copy the OS image. */
890 loaddomainelfimage(d,image_start);
892 /* Copy the initial ramdisk. */
893 //if ( initrd_len != 0 )
894 // memcpy((void *)vinitrd_start, initrd_start, initrd_len);
896 /* Set up start info area. */
897 d->shared_info->arch.start_info_pfn = pstart_info >> PAGE_SHIFT;
898 start_info_page = assign_new_domain_page(d, pstart_info);
899 if (start_info_page == NULL)
900 panic("can't allocate start info page");
901 si = page_to_virt(start_info_page);
902 memset(si, 0, PAGE_SIZE);
903 sprintf(si->magic, "xen-%i.%i-ia64", XEN_VERSION, XEN_SUBVERSION);
904 si->nr_pages = max_pages;
905 si->flags = SIF_INITDOMAIN|SIF_PRIVILEGED;
907 printk("Dom0: 0x%lx\n", (u64)dom0);
909 #ifdef VALIDATE_VT
910 /* VMX specific construction for Dom0, if hardware supports VMX
911 * and Dom0 is unmodified image
912 */
913 if (vmx_dom0)
914 vmx_final_setup_guest(v);
915 #endif
917 set_bit(_VCPUF_initialised, &v->vcpu_flags);
919 /* Build firmware.
920 Note: Linux kernel reserve memory used by start_info, so there is
921 no need to remove it from MDT. */
922 bp_mpa = pstart_info + sizeof(struct start_info);
923 dom_fw_setup(d, bp_mpa, max_pages * PAGE_SIZE);
925 /* Fill boot param. */
926 strncpy((char *)si->cmd_line, dom0_command_line, sizeof(si->cmd_line));
927 si->cmd_line[sizeof(si->cmd_line)-1] = 0;
929 bp = (struct ia64_boot_param *)(si + 1);
930 bp->command_line = pstart_info + offsetof (start_info_t, cmd_line);
932 /* We assume console has reached the last line! */
933 bp->console_info.num_cols = ia64_boot_param->console_info.num_cols;
934 bp->console_info.num_rows = ia64_boot_param->console_info.num_rows;
935 bp->console_info.orig_x = 0;
936 bp->console_info.orig_y = bp->console_info.num_rows == 0 ?
937 0 : bp->console_info.num_rows - 1;
939 bp->initrd_start = (dom0_start+dom0_size) -
940 (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024);
941 bp->initrd_size = ia64_boot_param->initrd_size;
943 vcpu_init_regs (v);
945 vcpu_regs(v)->r28 = bp_mpa;
947 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
948 pkern_entry += dom0_start;
949 #endif
950 vcpu_regs (v)->cr_iip = pkern_entry;
952 physdev_init_dom0(d);
954 // FIXME: Hack for keyboard input
955 //serial_input_init();
957 return 0;
958 }
960 void machine_restart(char * __unused)
961 {
962 console_start_sync();
963 if (running_on_sim)
964 printf ("machine_restart called. spinning...\n");
965 else
966 (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
967 while(1);
968 }
970 void machine_halt(void)
971 {
972 console_start_sync();
973 if (running_on_sim)
974 printf ("machine_halt called. spinning...\n");
975 else
976 (*efi.reset_system)(EFI_RESET_SHUTDOWN,0,0,NULL);
977 while(1);
978 }
980 void sync_vcpu_execstate(struct vcpu *v)
981 {
982 // __ia64_save_fpu(v->arch._thread.fph);
983 // if (VMX_DOMAIN(v))
984 // vmx_save_state(v);
985 // FIXME SMP: Anything else needed here for SMP?
986 }
988 static void parse_dom0_mem(char *s)
989 {
990 dom0_size = parse_size_and_unit(s);
991 }
992 custom_param("dom0_mem", parse_dom0_mem);
995 static void parse_dom0_align(char *s)
996 {
997 dom0_align = parse_size_and_unit(s);
998 }
999 custom_param("dom0_align", parse_dom0_align);