direct-io.hg

view xen/arch/ia64/xen/domain.c @ 10470:411a3c01bb40

[XEN] Xen always relinquishes VGA console to domain0 when domain0
starts to boot (previous behaviour looked for console=tty0 on
dom0's command line). To prevent this 'console=vga[keep]' must
be specified.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Jun 20 18:51:46 2006 +0100 (2006-06-20)
parents 0d1dab1d9b67
children fdf25330e4a6
line source
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * Copyright (C) 2005 Intel Co
8 * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
9 *
10 * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add VTI domain support
11 *
12 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
13 * VA Linux Systems Japan K.K.
14 * dom0 vp model support
15 */
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/lib.h>
20 #include <xen/errno.h>
21 #include <xen/sched.h>
22 #include <xen/smp.h>
23 #include <xen/delay.h>
24 #include <xen/softirq.h>
25 #include <xen/mm.h>
26 #include <xen/iocap.h>
27 #include <asm/ptrace.h>
28 #include <asm/system.h>
29 #include <asm/io.h>
30 #include <asm/processor.h>
31 #include <asm/desc.h>
32 #include <asm/hw_irq.h>
33 #include <asm/setup.h>
34 //#include <asm/mpspec.h>
35 #include <xen/irq.h>
36 #include <xen/event.h>
37 //#include <xen/shadow.h>
38 #include <xen/console.h>
39 #include <xen/compile.h>
41 #include <xen/elf.h>
42 //#include <asm/page.h>
43 #include <asm/pgalloc.h>
45 #include <asm/offsets.h> /* for IA64_THREAD_INFO_SIZE */
47 #include <asm/vcpu.h> /* for function declarations */
48 #include <public/arch-ia64.h>
49 #include <xen/domain.h>
50 #include <asm/vmx.h>
51 #include <asm/vmx_vcpu.h>
52 #include <asm/vmx_vpd.h>
53 #include <asm/vmx_phy_mode.h>
54 #include <asm/pal.h>
55 #include <asm/vhpt.h>
56 #include <public/hvm/ioreq.h>
57 #include <public/arch-ia64.h>
58 #include <asm/tlbflush.h>
59 #include <asm/regionreg.h>
60 #include <asm/dom_fw.h>
62 #ifndef CONFIG_XEN_IA64_DOM0_VP
63 #define CONFIG_DOMAIN0_CONTIGUOUS
64 #endif
65 unsigned long dom0_start = -1L;
66 unsigned long dom0_size = 512*1024*1024;
67 unsigned long dom0_align = 64*1024*1024;
69 /* dom0_max_vcpus: maximum number of VCPUs to create for dom0. */
70 static unsigned int dom0_max_vcpus = 1;
71 integer_param("dom0_max_vcpus", dom0_max_vcpus);
73 extern unsigned long running_on_sim;
75 extern char dom0_command_line[];
77 /* FIXME: where these declarations should be there ? */
78 extern void serial_input_init(void);
79 static void init_switch_stack(struct vcpu *v);
80 extern void vmx_do_launch(struct vcpu *);
81 void build_physmap_table(struct domain *d);
83 /* this belongs in include/asm, but there doesn't seem to be a suitable place */
84 unsigned long context_switch_count = 0;
86 extern struct vcpu *ia64_switch_to (struct vcpu *next_task);
88 /* Address of vpsr.i (in fact evtchn_upcall_mask) of current vcpu.
89 This is a Xen virtual address. */
90 DEFINE_PER_CPU(uint8_t *, current_psr_i_addr);
92 #include <xen/sched-if.h>
94 void schedule_tail(struct vcpu *prev)
95 {
96 extern char ia64_ivt;
97 context_saved(prev);
99 if (VMX_DOMAIN(current)) {
100 vmx_do_launch(current);
101 } else {
102 ia64_set_iva(&ia64_ivt);
103 ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
104 VHPT_ENABLED);
105 load_region_regs(current);
106 vcpu_load_kernel_regs(current);
107 __ia64_per_cpu_var(current_psr_i_addr) = &current->domain->
108 shared_info->vcpu_info[current->vcpu_id].evtchn_upcall_mask;
109 }
110 }
112 void context_switch(struct vcpu *prev, struct vcpu *next)
113 {
114 uint64_t spsr;
115 uint64_t pta;
117 local_irq_save(spsr);
118 context_switch_count++;
120 __ia64_save_fpu(prev->arch._thread.fph);
121 __ia64_load_fpu(next->arch._thread.fph);
122 if (VMX_DOMAIN(prev))
123 vmx_save_state(prev);
124 if (VMX_DOMAIN(next))
125 vmx_load_state(next);
126 /*ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);*/
127 prev = ia64_switch_to(next);
129 /* Note: ia64_switch_to does not return here at vcpu initialization. */
131 //cpu_set(smp_processor_id(), current->domain->domain_dirty_cpumask);
133 // leave this debug for now: it acts as a heartbeat when more than
134 // one domain is active
135 {
136 static long cnt[16] = { 50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50};
137 static int i = 100;
138 int id = ((struct vcpu *)current)->domain->domain_id & 0xf;
139 if (!cnt[id]--) { cnt[id] = 500000; printk("%x",id); }
140 if (!i--) { i = 1000000; printk("+"); }
141 }
143 if (VMX_DOMAIN(current)){
144 vmx_load_all_rr(current);
145 } else {
146 struct domain *nd;
147 extern char ia64_ivt;
149 ia64_set_iva(&ia64_ivt);
151 nd = current->domain;
152 if (!is_idle_domain(nd)) {
153 ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
154 VHPT_ENABLED);
155 load_region_regs(current);
156 vcpu_load_kernel_regs(current);
157 vcpu_set_next_timer(current);
158 if (vcpu_timer_expired(current))
159 vcpu_pend_timer(current);
160 __ia64_per_cpu_var(current_psr_i_addr) = &nd->shared_info->
161 vcpu_info[current->vcpu_id].evtchn_upcall_mask;
162 } else {
163 /* When switching to idle domain, only need to disable vhpt
164 * walker. Then all accesses happen within idle context will
165 * be handled by TR mapping and identity mapping.
166 */
167 pta = ia64_get_pta();
168 ia64_set_pta(pta & ~VHPT_ENABLED);
169 __ia64_per_cpu_var(current_psr_i_addr) = NULL;
170 }
171 }
172 local_irq_restore(spsr);
173 context_saved(prev);
174 }
176 void continue_running(struct vcpu *same)
177 {
178 /* nothing to do */
179 }
181 static void default_idle(void)
182 {
183 int cpu = smp_processor_id();
184 local_irq_disable();
185 if ( !softirq_pending(cpu))
186 safe_halt();
187 local_irq_enable();
188 }
190 static void continue_cpu_idle_loop(void)
191 {
192 int cpu = smp_processor_id();
193 for ( ; ; )
194 {
195 #ifdef IA64
196 // __IRQ_STAT(cpu, idle_timestamp) = jiffies
197 #else
198 irq_stat[cpu].idle_timestamp = jiffies;
199 #endif
200 while ( !softirq_pending(cpu) )
201 default_idle();
202 add_preempt_count(SOFTIRQ_OFFSET);
203 raise_softirq(SCHEDULE_SOFTIRQ);
204 do_softirq();
205 sub_preempt_count(SOFTIRQ_OFFSET);
206 }
207 }
209 void startup_cpu_idle_loop(void)
210 {
211 /* Just some sanity to ensure that the scheduler is set up okay. */
212 ASSERT(current->domain->domain_id == IDLE_DOMAIN_ID);
213 raise_softirq(SCHEDULE_SOFTIRQ);
215 continue_cpu_idle_loop();
216 }
218 struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
219 {
220 struct vcpu *v;
221 struct thread_info *ti;
223 /* Still keep idle vcpu0 static allocated at compilation, due
224 * to some code from Linux still requires it in early phase.
225 */
226 if (is_idle_domain(d) && !vcpu_id)
227 v = idle_vcpu[0];
228 else {
229 if ((v = alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER)) == NULL)
230 return NULL;
231 memset(v, 0, sizeof(*v));
233 ti = alloc_thread_info(v);
234 /* Clear thread_info to clear some important fields, like
235 * preempt_count
236 */
237 memset(ti, 0, sizeof(struct thread_info));
238 init_switch_stack(v);
239 }
241 if (!is_idle_domain(d)) {
242 v->arch.privregs =
243 alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
244 BUG_ON(v->arch.privregs == NULL);
245 memset(v->arch.privregs, 0, PAGE_SIZE);
247 if (!vcpu_id)
248 memset(&d->shared_info->evtchn_mask[0], 0xff,
249 sizeof(d->shared_info->evtchn_mask));
251 v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
252 v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4;
253 v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
254 v->arch.metaphysical_saved_rr4 = d->arch.metaphysical_rr4;
256 /* Is it correct ?
257 It depends on the domain rid usage.
259 A domain may share rid among its processor (eg having a
260 global VHPT). In this case, we should also share rid
261 among vcpus and the rid range should be the same.
263 However a domain may have per cpu rid allocation. In
264 this case we don't want to share rid among vcpus, but we may
265 do it if two vcpus are on the same cpu... */
267 v->arch.starting_rid = d->arch.starting_rid;
268 v->arch.ending_rid = d->arch.ending_rid;
269 v->arch.breakimm = d->arch.breakimm;
270 }
272 return v;
273 }
275 void free_vcpu_struct(struct vcpu *v)
276 {
277 if (VMX_DOMAIN(v))
278 vmx_relinquish_vcpu_resources(v);
279 else {
280 if (v->arch.privregs != NULL)
281 free_xenheap_pages(v->arch.privregs, get_order(sizeof(mapped_regs_t)));
282 }
284 free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
285 }
287 static void init_switch_stack(struct vcpu *v)
288 {
289 struct pt_regs *regs = vcpu_regs (v);
290 struct switch_stack *sw = (struct switch_stack *) regs - 1;
291 extern void ia64_ret_from_clone;
293 memset(sw, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs));
294 sw->ar_bspstore = (unsigned long)v + IA64_RBS_OFFSET;
295 sw->b0 = (unsigned long) &ia64_ret_from_clone;
296 sw->ar_fpsr = FPSR_DEFAULT;
297 v->arch._thread.ksp = (unsigned long) sw - 16;
298 // stay on kernel stack because may get interrupts!
299 // ia64_ret_from_clone switches to user stack
300 v->arch._thread.on_ustack = 0;
301 memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
302 }
304 int arch_domain_create(struct domain *d)
305 {
306 // the following will eventually need to be negotiated dynamically
307 d->arch.shared_info_va = SHAREDINFO_ADDR;
308 d->arch.breakimm = 0x1000;
309 seqlock_init(&d->arch.vtlb_lock);
311 if (is_idle_domain(d))
312 return 0;
314 if ((d->shared_info = (void *)alloc_xenheap_page()) == NULL)
315 goto fail_nomem;
316 memset(d->shared_info, 0, PAGE_SIZE);
318 d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
319 /* We may also need emulation rid for region4, though it's unlikely
320 * to see guest issue uncacheable access in metaphysical mode. But
321 * keep such info here may be more sane.
322 */
323 if (!allocate_rid_range(d,0))
324 goto fail_nomem;
325 d->arch.sys_pgnr = 0;
327 memset(&d->arch.mm, 0, sizeof(d->arch.mm));
329 d->arch.physmap_built = 0;
330 if ((d->arch.mm.pgd = pgd_alloc(&d->arch.mm)) == NULL)
331 goto fail_nomem;
333 printf ("arch_domain_create: domain=%p\n", d);
334 return 0;
336 fail_nomem:
337 if (d->arch.mm.pgd != NULL)
338 pgd_free(d->arch.mm.pgd);
339 if (d->shared_info != NULL)
340 free_xenheap_page(d->shared_info);
341 return -ENOMEM;
342 }
344 void arch_domain_destroy(struct domain *d)
345 {
346 BUG_ON(d->arch.mm.pgd != NULL);
347 if (d->shared_info != NULL)
348 free_xenheap_page(d->shared_info);
350 domain_flush_destroy (d);
352 deallocate_rid_range(d);
353 }
355 void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
356 {
357 c->regs = *vcpu_regs (v);
358 c->shared = v->domain->shared_info->arch;
359 }
361 int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
362 {
363 struct pt_regs *regs = vcpu_regs (v);
364 struct domain *d = v->domain;
365 unsigned long cmdline_addr;
367 if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
368 return 0;
369 if (c->flags & VGCF_VMX_GUEST) {
370 if (!vmx_enabled) {
371 printk("No VMX hardware feature for vmx domain.\n");
372 return -EINVAL;
373 }
375 if (v == d->vcpu[0])
376 vmx_setup_platform(d, c);
378 vmx_final_setup_guest(v);
379 } else if (!d->arch.physmap_built)
380 build_physmap_table(d);
382 *regs = c->regs;
383 cmdline_addr = 0;
384 if (v == d->vcpu[0]) {
385 /* Only for first vcpu. */
386 d->arch.sys_pgnr = c->sys_pgnr;
387 d->arch.initrd_start = c->initrd.start;
388 d->arch.initrd_len = c->initrd.size;
389 d->arch.cmdline = c->cmdline;
390 d->shared_info->arch = c->shared;
392 if (!VMX_DOMAIN(v)) {
393 const char *cmdline = d->arch.cmdline;
394 int len;
396 if (*cmdline == 0) {
397 #define DEFAULT_CMDLINE "nomca nosmp xencons=tty0 console=tty0 root=/dev/hda1"
398 cmdline = DEFAULT_CMDLINE;
399 len = sizeof (DEFAULT_CMDLINE);
400 printf("domU command line defaulted to"
401 DEFAULT_CMDLINE "\n");
402 }
403 else
404 len = IA64_COMMAND_LINE_SIZE;
405 cmdline_addr = dom_fw_setup (d, cmdline, len);
406 }
408 /* Cache synchronization seems to be done by the linux kernel
409 during mmap/unmap operation. However be conservative. */
410 domain_cache_flush (d, 1);
411 }
412 vcpu_init_regs (v);
413 regs->r28 = cmdline_addr;
415 if ( c->privregs && copy_from_user(v->arch.privregs,
416 c->privregs, sizeof(mapped_regs_t))) {
417 printk("Bad ctxt address in arch_set_info_guest: %p\n",
418 c->privregs);
419 return -EFAULT;
420 }
422 /* Don't redo final setup */
423 set_bit(_VCPUF_initialised, &v->vcpu_flags);
424 return 0;
425 }
427 static void relinquish_memory(struct domain *d, struct list_head *list)
428 {
429 struct list_head *ent;
430 struct page_info *page;
431 #ifndef __ia64__
432 unsigned long x, y;
433 #endif
435 /* Use a recursive lock, as we may enter 'free_domheap_page'. */
436 spin_lock_recursive(&d->page_alloc_lock);
437 ent = list->next;
438 while ( ent != list )
439 {
440 page = list_entry(ent, struct page_info, list);
441 /* Grab a reference to the page so it won't disappear from under us. */
442 if ( unlikely(!get_page(page, d)) )
443 {
444 /* Couldn't get a reference -- someone is freeing this page. */
445 ent = ent->next;
446 continue;
447 }
449 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
450 put_page_and_type(page);
452 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
453 put_page(page);
455 #ifndef __ia64__
456 /*
457 * Forcibly invalidate base page tables at this point to break circular
458 * 'linear page table' references. This is okay because MMU structures
459 * are not shared across domains and this domain is now dead. Thus base
460 * tables are not in use so a non-zero count means circular reference.
461 */
462 y = page->u.inuse.type_info;
463 for ( ; ; )
464 {
465 x = y;
466 if ( likely((x & (PGT_type_mask|PGT_validated)) !=
467 (PGT_base_page_table|PGT_validated)) )
468 break;
470 y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
471 if ( likely(y == x) )
472 {
473 free_page_type(page, PGT_base_page_table);
474 break;
475 }
476 }
477 #endif
479 /* Follow the list chain and /then/ potentially free the page. */
480 ent = ent->next;
481 #ifdef CONFIG_XEN_IA64_DOM0_VP
482 BUG_ON(get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY);
483 #endif
484 put_page(page);
485 }
487 spin_unlock_recursive(&d->page_alloc_lock);
488 }
490 void domain_relinquish_resources(struct domain *d)
491 {
492 /* Relinquish every page of memory. */
494 // relase page traversing d->arch.mm.
495 relinquish_mm(d);
497 relinquish_memory(d, &d->xenpage_list);
498 relinquish_memory(d, &d->page_list);
499 }
501 void build_physmap_table(struct domain *d)
502 {
503 struct list_head *list_ent = d->page_list.next;
504 unsigned long mfn, i = 0;
506 ASSERT(!d->arch.physmap_built);
507 while(list_ent != &d->page_list) {
508 mfn = page_to_mfn(list_entry(
509 list_ent, struct page_info, list));
510 assign_domain_page(d, i << PAGE_SHIFT, mfn << PAGE_SHIFT);
512 i++;
513 list_ent = mfn_to_page(mfn)->list.next;
514 }
515 d->arch.physmap_built = 1;
516 }
518 // remove following line if not privifying in memory
519 //#define HAVE_PRIVIFY_MEMORY
520 #ifndef HAVE_PRIVIFY_MEMORY
521 #define privify_memory(x,y) do {} while(0)
522 #endif
524 // see arch/x86/xxx/domain_build.c
525 int elf_sanity_check(Elf_Ehdr *ehdr)
526 {
527 if (!(IS_ELF(*ehdr)))
528 {
529 printk("DOM0 image is not a Xen-compatible Elf image.\n");
530 return 0;
531 }
532 return 1;
533 }
535 static void loaddomainelfimage(struct domain *d, unsigned long image_start)
536 {
537 char *elfbase = (char *) image_start;
538 Elf_Ehdr ehdr;
539 Elf_Phdr phdr;
540 int h, filesz, memsz;
541 unsigned long elfaddr, dom_mpaddr, dom_imva;
542 struct page_info *p;
544 memcpy(&ehdr, (void *) image_start, sizeof(Elf_Ehdr));
545 for ( h = 0; h < ehdr.e_phnum; h++ ) {
546 memcpy(&phdr,
547 elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize),
548 sizeof(Elf_Phdr));
549 if ((phdr.p_type != PT_LOAD))
550 continue;
552 filesz = phdr.p_filesz;
553 memsz = phdr.p_memsz;
554 elfaddr = (unsigned long) elfbase + phdr.p_offset;
555 dom_mpaddr = phdr.p_paddr;
557 //printf("p_offset: %x, size=%x\n",elfaddr,filesz);
558 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
559 if (d == dom0) {
560 if (dom_mpaddr+memsz>dom0_size)
561 panic("Dom0 doesn't fit in memory space!\n");
562 dom_imva = __va_ul(dom_mpaddr + dom0_start);
563 memcpy((void *)dom_imva, (void *)elfaddr, filesz);
564 if (memsz > filesz)
565 memset((void *)dom_imva+filesz, 0,
566 memsz-filesz);
567 //FIXME: This test for code seems to find a lot more than objdump -x does
568 if (phdr.p_flags & PF_X) {
569 privify_memory(dom_imva,filesz);
570 flush_icache_range (dom_imva, dom_imva+filesz);
571 }
572 }
573 else
574 #endif
575 while (memsz > 0) {
576 p = assign_new_domain_page(d,dom_mpaddr);
577 BUG_ON (unlikely(p == NULL));
578 dom_imva = __va_ul(page_to_maddr(p));
579 if (filesz > 0) {
580 if (filesz >= PAGE_SIZE)
581 memcpy((void *) dom_imva,
582 (void *) elfaddr,
583 PAGE_SIZE);
584 else {
585 // copy partial page
586 memcpy((void *) dom_imva,
587 (void *) elfaddr, filesz);
588 // zero the rest of page
589 memset((void *) dom_imva+filesz, 0,
590 PAGE_SIZE-filesz);
591 }
592 //FIXME: This test for code seems to find a lot more than objdump -x does
593 if (phdr.p_flags & PF_X) {
594 privify_memory(dom_imva,PAGE_SIZE);
595 flush_icache_range(dom_imva,
596 dom_imva+PAGE_SIZE);
597 }
598 }
599 else if (memsz > 0) {
600 /* always zero out entire page */
601 memset((void *) dom_imva, 0, PAGE_SIZE);
602 }
603 memsz -= PAGE_SIZE;
604 filesz -= PAGE_SIZE;
605 elfaddr += PAGE_SIZE;
606 dom_mpaddr += PAGE_SIZE;
607 }
608 }
609 }
611 void alloc_dom0(void)
612 {
613 /* Check dom0 size. */
614 if (dom0_size < 4 * 1024 * 1024) {
615 panic("dom0_mem is too small, boot aborted"
616 " (try e.g. dom0_mem=256M or dom0_mem=65536K)\n");
617 }
619 /* Check dom0 align. */
620 if ((dom0_align - 1) & dom0_align) { /* not a power of two */
621 panic("dom0_align (%lx) must be power of two, boot aborted"
622 " (try e.g. dom0_align=256M or dom0_align=65536K)\n",
623 dom0_align);
624 }
625 if (dom0_align < PAGE_SIZE) {
626 panic("dom0_align must be >= %ld, boot aborted"
627 " (try e.g. dom0_align=256M or dom0_align=65536K)\n",
628 PAGE_SIZE);
629 }
630 if (dom0_size % dom0_align) {
631 dom0_size = (dom0_size / dom0_align + 1) * dom0_align;
632 printf("dom0_size rounded up to %ld, due to dom0_align=%lx\n",
633 dom0_size,dom0_align);
634 }
636 if (running_on_sim) {
637 dom0_size = 128*1024*1024; //FIXME: Should be configurable
638 }
639 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
640 printf("alloc_dom0: starting (initializing %lu MB...)\n",dom0_size/(1024*1024));
642 /* FIXME: The first trunk (say 256M) should always be assigned to
643 * Dom0, since Dom0's physical == machine address for DMA purpose.
644 * Some old version linux, like 2.4, assumes physical memory existing
645 * in 2nd 64M space.
646 */
647 dom0_start = alloc_boot_pages(dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT);
648 dom0_start <<= PAGE_SHIFT;
649 if (!dom0_start) {
650 panic("alloc_dom0: can't allocate contiguous memory size=%lu\n",
651 dom0_size);
652 }
653 printf("alloc_dom0: dom0_start=0x%lx\n", dom0_start);
654 #else
655 // no need to allocate pages for now
656 // pages are allocated by map_new_domain_page() via loaddomainelfimage()
657 dom0_start = 0;
658 #endif
660 }
663 /*
664 * Domain 0 has direct access to all devices absolutely. However
665 * the major point of this stub here, is to allow alloc_dom_mem
666 * handled with order > 0 request. Dom0 requires that bit set to
667 * allocate memory for other domains.
668 */
669 static void physdev_init_dom0(struct domain *d)
670 {
671 if (iomem_permit_access(d, 0UL, ~0UL))
672 BUG();
673 if (irqs_permit_access(d, 0, NR_IRQS-1))
674 BUG();
675 }
677 int construct_dom0(struct domain *d,
678 unsigned long image_start, unsigned long image_len,
679 unsigned long initrd_start, unsigned long initrd_len,
680 char *cmdline)
681 {
682 int i, rc;
683 unsigned long alloc_start, alloc_end;
684 start_info_t *si;
685 struct vcpu *v = d->vcpu[0];
686 unsigned long max_pages;
688 struct domain_setup_info dsi;
689 unsigned long p_start;
690 unsigned long pkern_start;
691 unsigned long pkern_entry;
692 unsigned long pkern_end;
693 unsigned long pinitrd_start = 0;
694 unsigned long pstart_info;
695 unsigned long cmdline_addr;
696 struct page_info *start_info_page;
698 #ifdef VALIDATE_VT
699 unsigned int vmx_dom0 = 0;
700 unsigned long mfn;
701 struct page_info *page = NULL;
702 #endif
704 //printf("construct_dom0: starting\n");
706 /* Sanity! */
707 BUG_ON(d != dom0);
708 BUG_ON(d->vcpu[0] == NULL);
709 BUG_ON(test_bit(_VCPUF_initialised, &v->vcpu_flags));
711 memset(&dsi, 0, sizeof(struct domain_setup_info));
713 printk("*** LOADING DOMAIN 0 ***\n");
715 alloc_start = dom0_start;
716 alloc_end = dom0_start + dom0_size;
717 max_pages = dom0_size / PAGE_SIZE;
718 d->max_pages = max_pages;
719 #ifndef CONFIG_XEN_IA64_DOM0_VP
720 d->tot_pages = d->max_pages;
721 #else
722 d->tot_pages = 0;
723 #endif
724 dsi.image_addr = (unsigned long)image_start;
725 dsi.image_len = image_len;
726 rc = parseelfimage(&dsi);
727 if ( rc != 0 )
728 return rc;
730 #ifdef VALIDATE_VT
731 /* Temp workaround */
732 if (running_on_sim)
733 dsi.xen_section_string = (char *)1;
735 /* Check whether dom0 is vti domain */
736 if ((!vmx_enabled) && !dsi.xen_section_string) {
737 printk("Lack of hardware support for unmodified vmx dom0\n");
738 panic("");
739 }
741 if (vmx_enabled && !dsi.xen_section_string) {
742 printk("Dom0 is vmx domain!\n");
743 vmx_dom0 = 1;
744 }
745 #endif
747 p_start = dsi.v_start;
748 pkern_start = dsi.v_kernstart;
749 pkern_end = dsi.v_kernend;
750 pkern_entry = dsi.v_kernentry;
752 //printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry);
754 if ( (p_start & (PAGE_SIZE-1)) != 0 )
755 {
756 printk("Initial guest OS must load to a page boundary.\n");
757 return -EINVAL;
758 }
760 pstart_info = PAGE_ALIGN(pkern_end);
761 if(initrd_start && initrd_len){
762 unsigned long offset;
764 pinitrd_start= (dom0_start + dom0_size) -
765 (PAGE_ALIGN(initrd_len) + 4*1024*1024);
766 if (pinitrd_start <= pstart_info)
767 panic("%s:enough memory is not assigned to dom0", __func__);
769 for (offset = 0; offset < initrd_len; offset += PAGE_SIZE) {
770 struct page_info *p;
771 p = assign_new_domain_page(d, pinitrd_start + offset);
772 if (p == NULL)
773 panic("%s: can't allocate page for initrd image", __func__);
774 if (initrd_len < offset + PAGE_SIZE)
775 memcpy(page_to_virt(p), (void*)(initrd_start + offset),
776 initrd_len - offset);
777 else
778 copy_page(page_to_virt(p), (void*)(initrd_start + offset));
779 }
780 }
782 printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
783 " Kernel image: %lx->%lx\n"
784 " Entry address: %lx\n"
785 " Init. ramdisk: %lx len %lx\n"
786 " Start info.: %lx->%lx\n",
787 pkern_start, pkern_end, pkern_entry, pinitrd_start, initrd_len,
788 pstart_info, pstart_info + PAGE_SIZE);
790 if ( (pkern_end - pkern_start) > (max_pages * PAGE_SIZE) )
791 {
792 printk("Initial guest OS requires too much space\n"
793 "(%luMB is greater than %luMB limit)\n",
794 (pkern_end-pkern_start)>>20,
795 (max_pages <<PAGE_SHIFT)>>20);
796 return -ENOMEM;
797 }
799 // if high 3 bits of pkern start are non-zero, error
801 // if pkern end is after end of metaphysical memory, error
802 // (we should be able to deal with this... later)
804 /* Mask all upcalls... */
805 for ( i = 1; i < MAX_VIRT_CPUS; i++ )
806 d->shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
808 if (dom0_max_vcpus == 0)
809 dom0_max_vcpus = MAX_VIRT_CPUS;
810 if (dom0_max_vcpus > num_online_cpus())
811 dom0_max_vcpus = num_online_cpus();
812 if (dom0_max_vcpus > MAX_VIRT_CPUS)
813 dom0_max_vcpus = MAX_VIRT_CPUS;
815 printf ("Dom0 max_vcpus=%d\n", dom0_max_vcpus);
816 for ( i = 1; i < dom0_max_vcpus; i++ )
817 if (alloc_vcpu(d, i, i) == NULL)
818 printf ("Cannot allocate dom0 vcpu %d\n", i);
820 #if defined(VALIDATE_VT) && !defined(CONFIG_XEN_IA64_DOM0_VP)
821 /* Construct a frame-allocation list for the initial domain, since these
822 * pages are allocated by boot allocator and pfns are not set properly
823 */
824 for ( mfn = (alloc_start>>PAGE_SHIFT);
825 mfn < (alloc_end>>PAGE_SHIFT);
826 mfn++ )
827 {
828 page = mfn_to_page(mfn);
829 page_set_owner(page, d);
830 page->u.inuse.type_info = 0;
831 page->count_info = PGC_allocated | 1;
832 list_add_tail(&page->list, &d->page_list);
834 /* Construct 1:1 mapping */
835 set_gpfn_from_mfn(mfn, mfn);
836 }
837 #endif
839 /* Copy the OS image. */
840 loaddomainelfimage(d,image_start);
842 /* Copy the initial ramdisk. */
843 //if ( initrd_len != 0 )
844 // memcpy((void *)vinitrd_start, initrd_start, initrd_len);
846 d->shared_info->arch.flags = SIF_INITDOMAIN|SIF_PRIVILEGED;
848 /* Set up start info area. */
849 d->shared_info->arch.start_info_pfn = pstart_info >> PAGE_SHIFT;
850 start_info_page = assign_new_domain_page(d, pstart_info);
851 if (start_info_page == NULL)
852 panic("can't allocate start info page");
853 si = page_to_virt(start_info_page);
854 memset(si, 0, PAGE_SIZE);
855 sprintf(si->magic, "xen-%i.%i-ia64", XEN_VERSION, XEN_SUBVERSION);
856 si->nr_pages = max_pages;
858 console_endboot();
860 printk("Dom0: 0x%lx\n", (u64)dom0);
862 #ifdef VALIDATE_VT
863 /* VMX specific construction for Dom0, if hardware supports VMX
864 * and Dom0 is unmodified image
865 */
866 if (vmx_dom0)
867 vmx_final_setup_guest(v);
868 #endif
870 set_bit(_VCPUF_initialised, &v->vcpu_flags);
872 cmdline_addr = dom_fw_setup(d, dom0_command_line, COMMAND_LINE_SIZE);
874 vcpu_init_regs (v);
876 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
877 pkern_entry += dom0_start;
878 #endif
879 vcpu_regs (v)->cr_iip = pkern_entry;
880 vcpu_regs (v)->r28 = cmdline_addr;
882 physdev_init_dom0(d);
884 // FIXME: Hack for keyboard input
885 //serial_input_init();
887 return 0;
888 }
890 void machine_restart(char * __unused)
891 {
892 if (running_on_sim)
893 printf ("machine_restart called. spinning...\n");
894 else
895 (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
896 while(1);
897 }
899 void machine_halt(void)
900 {
901 if (running_on_sim)
902 printf ("machine_halt called. spinning...\n");
903 else
904 (*efi.reset_system)(EFI_RESET_SHUTDOWN,0,0,NULL);
905 while(1);
906 }
908 void sync_vcpu_execstate(struct vcpu *v)
909 {
910 // __ia64_save_fpu(v->arch._thread.fph);
911 // if (VMX_DOMAIN(v))
912 // vmx_save_state(v);
913 // FIXME SMP: Anything else needed here for SMP?
914 }
916 static void parse_dom0_mem(char *s)
917 {
918 dom0_size = parse_size_and_unit(s);
919 }
920 custom_param("dom0_mem", parse_dom0_mem);
923 static void parse_dom0_align(char *s)
924 {
925 dom0_align = parse_size_and_unit(s);
926 }
927 custom_param("dom0_align", parse_dom0_align);