ia64/xen-unstable

view xen/arch/ia64/xen/domain.c @ 9758:ae0d41bd3bba

[IA64] domain0 builder change

make domain0 builder for dom0 vp model.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Alex Williamson <alex.williamson@hp.com>
author awilliam@ldap.hp.com
date Tue Apr 25 13:48:02 2006 -0600 (2006-04-25)
parents 14a34d811e81
children 7a9a00c51588
line source
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * Copyright (C) 2005 Intel Co
8 * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
9 *
10 * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add VTI domain support
11 */
13 #include <xen/config.h>
14 #include <xen/init.h>
15 #include <xen/lib.h>
16 #include <xen/errno.h>
17 #include <xen/sched.h>
18 #include <xen/smp.h>
19 #include <xen/delay.h>
20 #include <xen/softirq.h>
21 #include <xen/mm.h>
22 #include <xen/iocap.h>
23 #include <asm/ptrace.h>
24 #include <asm/system.h>
25 #include <asm/io.h>
26 #include <asm/processor.h>
27 #include <asm/desc.h>
28 #include <asm/hw_irq.h>
29 #include <asm/setup.h>
30 //#include <asm/mpspec.h>
31 #include <xen/irq.h>
32 #include <xen/event.h>
33 //#include <xen/shadow.h>
34 #include <xen/console.h>
35 #include <xen/compile.h>
37 #include <xen/elf.h>
38 //#include <asm/page.h>
39 #include <asm/pgalloc.h>
41 #include <asm/offsets.h> /* for IA64_THREAD_INFO_SIZE */
43 #include <asm/vcpu.h> /* for function declarations */
44 #include <public/arch-ia64.h>
45 #include <asm/vmx.h>
46 #include <asm/vmx_vcpu.h>
47 #include <asm/vmx_vpd.h>
48 #include <asm/vmx_phy_mode.h>
49 #include <asm/pal.h>
50 #include <asm/vhpt.h>
51 #include <public/hvm/ioreq.h>
52 #include <public/arch-ia64.h>
53 #include <asm/tlbflush.h>
54 #include <asm/regionreg.h>
55 #include <asm/dom_fw.h>
57 #ifndef CONFIG_XEN_IA64_DOM0_VP
58 #define CONFIG_DOMAIN0_CONTIGUOUS
59 #endif
60 unsigned long dom0_start = -1L;
61 unsigned long dom0_size = 512*1024*1024;
62 unsigned long dom0_align = 64*1024*1024;
64 /* dom0_max_vcpus: maximum number of VCPUs to create for dom0. */
65 static unsigned int dom0_max_vcpus = 1;
66 integer_param("dom0_max_vcpus", dom0_max_vcpus);
68 // initialized by arch/ia64/setup.c:find_initrd()
69 unsigned long initrd_start = 0, initrd_end = 0;
70 extern unsigned long running_on_sim;
72 #define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend))
74 /* FIXME: where these declarations should be there ? */
75 extern void domain_pend_keyboard_interrupt(int);
76 extern long platform_is_hp_ski(void);
77 extern void sync_split_caches(void);
78 extern void serial_input_init(void);
80 static void init_switch_stack(struct vcpu *v);
81 void build_physmap_table(struct domain *d);
83 /* this belongs in include/asm, but there doesn't seem to be a suitable place */
84 void arch_domain_destroy(struct domain *d)
85 {
86 struct page_info *page;
87 struct list_head *ent, *prev;
89 if (d->arch.mm->pgd != NULL)
90 {
91 list_for_each ( ent, &d->arch.mm->pt_list )
92 {
93 page = list_entry(ent, struct page_info, list);
94 prev = ent->prev;
95 list_del(ent);
96 free_xenheap_page(page_to_virt(page));
97 ent = prev;
98 }
99 pgd_free(d->arch.mm->pgd);
100 }
101 if (d->arch.mm != NULL)
102 xfree(d->arch.mm);
103 if (d->shared_info != NULL)
104 free_xenheap_page(d->shared_info);
106 deallocate_rid_range(d);
108 /* It is really good in this? */
109 flush_tlb_all();
111 /* It is really good in this? */
112 vhpt_flush_all();
113 }
115 static void default_idle(void)
116 {
117 int cpu = smp_processor_id();
118 local_irq_disable();
119 if ( !softirq_pending(cpu))
120 safe_halt();
121 local_irq_enable();
122 }
124 static void continue_cpu_idle_loop(void)
125 {
126 int cpu = smp_processor_id();
127 for ( ; ; )
128 {
129 #ifdef IA64
130 // __IRQ_STAT(cpu, idle_timestamp) = jiffies
131 #else
132 irq_stat[cpu].idle_timestamp = jiffies;
133 #endif
134 while ( !softirq_pending(cpu) )
135 default_idle();
136 add_preempt_count(SOFTIRQ_OFFSET);
137 raise_softirq(SCHEDULE_SOFTIRQ);
138 do_softirq();
139 sub_preempt_count(SOFTIRQ_OFFSET);
140 }
141 }
143 void startup_cpu_idle_loop(void)
144 {
145 /* Just some sanity to ensure that the scheduler is set up okay. */
146 ASSERT(current->domain == IDLE_DOMAIN_ID);
147 raise_softirq(SCHEDULE_SOFTIRQ);
148 #if 0
149 //do we have to ensure the idle task has a shared page so that, for example,
150 //region registers can be loaded from it. Apparently not...
151 idle0_task.shared_info = (void *)alloc_xenheap_page();
152 memset(idle0_task.shared_info, 0, PAGE_SIZE);
153 /* pin mapping */
154 // FIXME: Does this belong here? Or do only at domain switch time?
155 {
156 /* WARNING: following must be inlined to avoid nested fault */
157 unsigned long psr = ia64_clear_ic();
158 ia64_itr(0x2, IA64_TR_SHARED_INFO, SHAREDINFO_ADDR,
159 pte_val(pfn_pte(ia64_tpa(idle0_task.shared_info) >> PAGE_SHIFT, PAGE_KERNEL)),
160 PAGE_SHIFT);
161 ia64_set_psr(psr);
162 ia64_srlz_i();
163 }
164 #endif
166 continue_cpu_idle_loop();
167 }
169 struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
170 {
171 struct vcpu *v;
172 struct thread_info *ti;
174 /* Still keep idle vcpu0 static allocated at compilation, due
175 * to some code from Linux still requires it in early phase.
176 */
177 if (is_idle_domain(d) && !vcpu_id)
178 v = idle_vcpu[0];
179 else {
180 if ((v = alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER)) == NULL)
181 return NULL;
182 memset(v, 0, sizeof(*v));
184 ti = alloc_thread_info(v);
185 /* Clear thread_info to clear some important fields, like
186 * preempt_count
187 */
188 memset(ti, 0, sizeof(struct thread_info));
189 init_switch_stack(v);
190 }
192 if (!is_idle_domain(d)) {
193 v->arch.privregs =
194 alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
195 BUG_ON(v->arch.privregs == NULL);
196 memset(v->arch.privregs, 0, PAGE_SIZE);
198 if (!vcpu_id)
199 memset(&d->shared_info->evtchn_mask[0], 0xff,
200 sizeof(d->shared_info->evtchn_mask));
202 v->vcpu_info = &(d->shared_info->vcpu_info[0]);
203 v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
204 v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4;
205 v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
206 v->arch.metaphysical_saved_rr4 = d->arch.metaphysical_rr4;
208 /* Is it correct ?
209 It depends on the domain rid usage.
211 A domain may share rid among its processor (eg having a
212 global VHPT). In this case, we should also share rid
213 among vcpus and the rid range should be the same.
215 However a domain may have per cpu rid allocation. In
216 this case we don't want to share rid among vcpus, but we may
217 do it if two vcpus are on the same cpu... */
219 v->arch.starting_rid = d->arch.starting_rid;
220 v->arch.ending_rid = d->arch.ending_rid;
221 v->arch.breakimm = d->arch.breakimm;
222 }
224 return v;
225 }
227 void free_vcpu_struct(struct vcpu *v)
228 {
229 if (VMX_DOMAIN(v))
230 vmx_relinquish_vcpu_resources(v);
231 else {
232 if (v->arch.privregs != NULL)
233 free_xenheap_pages(v->arch.privregs, get_order(sizeof(mapped_regs_t)));
234 }
236 free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
237 }
239 static void init_switch_stack(struct vcpu *v)
240 {
241 struct pt_regs *regs = vcpu_regs (v);
242 struct switch_stack *sw = (struct switch_stack *) regs - 1;
243 extern void ia64_ret_from_clone;
245 memset(sw, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs));
246 sw->ar_bspstore = (unsigned long)v + IA64_RBS_OFFSET;
247 sw->b0 = (unsigned long) &ia64_ret_from_clone;
248 sw->ar_fpsr = FPSR_DEFAULT;
249 v->arch._thread.ksp = (unsigned long) sw - 16;
250 // stay on kernel stack because may get interrupts!
251 // ia64_ret_from_clone (which b0 gets in new_thread) switches
252 // to user stack
253 v->arch._thread.on_ustack = 0;
254 memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
255 }
257 int arch_domain_create(struct domain *d)
258 {
259 // the following will eventually need to be negotiated dynamically
260 d->xen_vastart = XEN_START_ADDR;
261 d->xen_vaend = XEN_END_ADDR;
262 d->shared_info_va = SHAREDINFO_ADDR;
264 if (is_idle_domain(d))
265 return 0;
267 if ((d->shared_info = (void *)alloc_xenheap_page()) == NULL)
268 goto fail_nomem;
269 memset(d->shared_info, 0, PAGE_SIZE);
271 d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
272 /* We may also need emulation rid for region4, though it's unlikely
273 * to see guest issue uncacheable access in metaphysical mode. But
274 * keep such info here may be more sane.
275 */
276 if (!allocate_rid_range(d,0))
277 goto fail_nomem;
278 d->arch.breakimm = 0x1000;
279 d->arch.sys_pgnr = 0;
281 if ((d->arch.mm = xmalloc(struct mm_struct)) == NULL)
282 goto fail_nomem;
283 memset(d->arch.mm, 0, sizeof(*d->arch.mm));
284 INIT_LIST_HEAD(&d->arch.mm->pt_list);
286 d->arch.physmap_built = 0;
287 if ((d->arch.mm->pgd = pgd_alloc(d->arch.mm)) == NULL)
288 goto fail_nomem;
290 printf ("arch_domain_create: domain=%p\n", d);
291 return 0;
293 fail_nomem:
294 if (d->arch.mm->pgd != NULL)
295 pgd_free(d->arch.mm->pgd);
296 if (d->arch.mm != NULL)
297 xfree(d->arch.mm);
298 if (d->shared_info != NULL)
299 free_xenheap_page(d->shared_info);
300 return -ENOMEM;
301 }
303 void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
304 {
305 struct pt_regs *regs = vcpu_regs (v);
307 printf("arch_getdomaininfo_ctxt\n");
308 c->regs = *regs;
309 c->vcpu.evtchn_vector = v->vcpu_info->arch.evtchn_vector;
311 c->shared = v->domain->shared_info->arch;
312 }
314 int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
315 {
316 struct pt_regs *regs = vcpu_regs (v);
317 struct domain *d = v->domain;
319 printf("arch_set_info_guest\n");
320 if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
321 return 0;
322 if (c->flags & VGCF_VMX_GUEST) {
323 if (!vmx_enabled) {
324 printk("No VMX hardware feature for vmx domain.\n");
325 return -EINVAL;
326 }
328 if (v == d->vcpu[0])
329 vmx_setup_platform(d, c);
331 vmx_final_setup_guest(v);
332 } else if (!d->arch.physmap_built)
333 build_physmap_table(d);
335 *regs = c->regs;
336 if (v == d->vcpu[0]) {
337 /* Only for first vcpu. */
338 d->arch.sys_pgnr = c->sys_pgnr;
339 d->arch.initrd_start = c->initrd.start;
340 d->arch.initrd_len = c->initrd.size;
341 d->arch.cmdline = c->cmdline;
342 d->shared_info->arch = c->shared;
344 /* Cache synchronization seems to be done by the linux kernel
345 during mmap/unmap operation. However be conservative. */
346 domain_cache_flush (d, 1);
347 }
348 new_thread(v, regs->cr_iip, 0, 0);
350 v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
351 if ( c->vcpu.privregs && copy_from_user(v->arch.privregs,
352 c->vcpu.privregs, sizeof(mapped_regs_t))) {
353 printk("Bad ctxt address in arch_set_info_guest: %p\n",
354 c->vcpu.privregs);
355 return -EFAULT;
356 }
358 v->arch.domain_itm_last = -1L;
360 /* Don't redo final setup */
361 set_bit(_VCPUF_initialised, &v->vcpu_flags);
362 return 0;
363 }
365 static void relinquish_memory(struct domain *d, struct list_head *list)
366 {
367 struct list_head *ent;
368 struct page_info *page;
369 #ifndef __ia64__
370 unsigned long x, y;
371 #endif
373 /* Use a recursive lock, as we may enter 'free_domheap_page'. */
374 spin_lock_recursive(&d->page_alloc_lock);
375 ent = list->next;
376 while ( ent != list )
377 {
378 page = list_entry(ent, struct page_info, list);
379 /* Grab a reference to the page so it won't disappear from under us. */
380 if ( unlikely(!get_page(page, d)) )
381 {
382 /* Couldn't get a reference -- someone is freeing this page. */
383 ent = ent->next;
384 continue;
385 }
387 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
388 put_page_and_type(page);
390 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
391 put_page(page);
393 #ifndef __ia64__
394 /*
395 * Forcibly invalidate base page tables at this point to break circular
396 * 'linear page table' references. This is okay because MMU structures
397 * are not shared across domains and this domain is now dead. Thus base
398 * tables are not in use so a non-zero count means circular reference.
399 */
400 y = page->u.inuse.type_info;
401 for ( ; ; )
402 {
403 x = y;
404 if ( likely((x & (PGT_type_mask|PGT_validated)) !=
405 (PGT_base_page_table|PGT_validated)) )
406 break;
408 y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
409 if ( likely(y == x) )
410 {
411 free_page_type(page, PGT_base_page_table);
412 break;
413 }
414 }
415 #endif
417 /* Follow the list chain and /then/ potentially free the page. */
418 ent = ent->next;
419 put_page(page);
420 }
422 spin_unlock_recursive(&d->page_alloc_lock);
423 }
425 void domain_relinquish_resources(struct domain *d)
426 {
427 /* Relinquish every page of memory. */
429 /* xenheap_list is not used in ia64. */
430 BUG_ON(!list_empty(&d->xenpage_list));
432 relinquish_memory(d, &d->page_list);
433 }
435 // heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread()
436 // and linux/arch/ia64/kernel/process.c:kernel_thread()
437 void new_thread(struct vcpu *v,
438 unsigned long start_pc,
439 unsigned long start_stack,
440 unsigned long start_info)
441 {
442 struct domain *d = v->domain;
443 struct pt_regs *regs;
444 extern char dom0_command_line[];
446 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
447 if (d == dom0 && v->vcpu_id == 0) start_pc += dom0_start;
448 #endif
450 regs = vcpu_regs (v);
451 if (VMX_DOMAIN(v)) {
452 /* dt/rt/it:1;i/ic:1, si:1, vm/bn:1, ac:1 */
453 regs->cr_ipsr = 0x501008826008; /* Need to be expanded as macro */
454 } else {
455 regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR)
456 | IA64_PSR_BITS_TO_SET | IA64_PSR_BN;
457 regs->cr_ipsr &= ~(IA64_PSR_BITS_TO_CLEAR
458 | IA64_PSR_RI | IA64_PSR_IS);
459 regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2
460 }
461 regs->cr_iip = start_pc;
462 regs->cr_ifs = 1UL << 63; /* or clear? */
463 regs->ar_fpsr = FPSR_DEFAULT;
465 if (VMX_DOMAIN(v)) {
466 vmx_init_all_rr(v);
467 if (d == dom0)
468 regs->r28 = dom_fw_setup(d,dom0_command_line,
469 COMMAND_LINE_SIZE);
470 /* Virtual processor context setup */
471 VCPU(v, vpsr) = IA64_PSR_BN;
472 VCPU(v, dcr) = 0;
473 } else {
474 init_all_rr(v);
475 if (v->vcpu_id == 0) {
476 /* Build the firmware. */
477 if (d == dom0)
478 regs->r28 = dom_fw_setup(d,dom0_command_line,
479 COMMAND_LINE_SIZE);
480 else {
481 const char *cmdline = d->arch.cmdline;
482 int len;
484 if (*cmdline == 0) {
485 #define DEFAULT_CMDLINE "nomca nosmp xencons=tty0 console=tty0 root=/dev/hda1"
486 cmdline = DEFAULT_CMDLINE;
487 len = sizeof (DEFAULT_CMDLINE);
488 printf("domU command line defaulted to"
489 DEFAULT_CMDLINE "\n");
490 }
491 else
492 len = IA64_COMMAND_LINE_SIZE;
494 regs->r28 = dom_fw_setup (d, cmdline, len);
495 }
496 d->shared_info->arch.flags = (d == dom0) ?
497 (SIF_INITDOMAIN|SIF_PRIVILEGED) : 0;
498 }
499 regs->ar_rsc |= (2 << 2); /* force PL2/3 */
500 VCPU(v, banknum) = 1;
501 VCPU(v, metaphysical_mode) = 1;
502 VCPU(v, interrupt_mask_addr) =
503 (uint64_t)SHAREDINFO_ADDR + INT_ENABLE_OFFSET(v);
504 VCPU(v, itv) = (1 << 16); /* timer vector masked */
505 }
506 }
508 static pte_t*
509 lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
510 {
511 struct page_info *pt;
512 struct mm_struct *mm = d->arch.mm;
513 pgd_t *pgd;
514 pud_t *pud;
515 pmd_t *pmd;
517 BUG_ON(mm->pgd == NULL);
518 pgd = pgd_offset(mm, mpaddr);
519 if (pgd_none(*pgd)) {
520 pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
521 pt = maddr_to_page(pgd_val(*pgd));
522 list_add_tail(&pt->list, &d->arch.mm->pt_list);
523 }
525 pud = pud_offset(pgd, mpaddr);
526 if (pud_none(*pud)) {
527 pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
528 pt = maddr_to_page(pud_val(*pud));
529 list_add_tail(&pt->list, &d->arch.mm->pt_list);
530 }
532 pmd = pmd_offset(pud, mpaddr);
533 if (pmd_none(*pmd)) {
534 pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm, mpaddr));
535 pt = maddr_to_page(pmd_val(*pmd));
536 list_add_tail(&pt->list, &d->arch.mm->pt_list);
537 }
539 return pte_offset_map(pmd, mpaddr);
540 }
542 //XXX xxx_none() should be used instread of !xxx_present()?
543 static pte_t*
544 lookup_noalloc_domain_pte(struct domain* d, unsigned long mpaddr)
545 {
546 struct mm_struct *mm = d->arch.mm;
547 pgd_t *pgd;
548 pud_t *pud;
549 pmd_t *pmd;
551 BUG_ON(mm->pgd == NULL);
552 pgd = pgd_offset(mm, mpaddr);
553 if (!pgd_present(*pgd))
554 goto not_present;
556 pud = pud_offset(pgd, mpaddr);
557 if (!pud_present(*pud))
558 goto not_present;
560 pmd = pmd_offset(pud, mpaddr);
561 if (!pmd_present(*pmd))
562 goto not_present;
564 return pte_offset_map(pmd, mpaddr);
566 not_present:
567 return NULL;
568 }
570 #ifdef CONFIG_XEN_IA64_DOM0_VP
571 static pte_t*
572 lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr)
573 {
574 struct mm_struct *mm = d->arch.mm;
575 pgd_t *pgd;
576 pud_t *pud;
577 pmd_t *pmd;
579 BUG_ON(mm->pgd == NULL);
580 pgd = pgd_offset(mm, mpaddr);
581 if (pgd_none(*pgd))
582 goto not_present;
584 pud = pud_offset(pgd, mpaddr);
585 if (pud_none(*pud))
586 goto not_present;
588 pmd = pmd_offset(pud, mpaddr);
589 if (pmd_none(*pmd))
590 goto not_present;
592 return pte_offset_map(pmd, mpaddr);
594 not_present:
595 return NULL;
596 }
597 #endif
599 /* Allocate a new page for domain and map it to the specified metaphysical
600 address. */
601 struct page_info *
602 __assign_new_domain_page(struct domain *d, unsigned long mpaddr, pte_t* pte)
603 {
604 struct page_info *p = NULL;
605 unsigned long maddr;
607 BUG_ON(!pte_none(*pte));
609 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
610 if (d == dom0) {
611 #if 0
612 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
613 /* FIXME: is it true ?
614 dom0 memory is not contiguous! */
615 panic("assign_new_domain_page: bad domain0 "
616 "mpaddr=%lx, start=%lx, end=%lx!\n",
617 mpaddr, dom0_start, dom0_start+dom0_size);
618 }
619 #endif
620 p = mfn_to_page((mpaddr >> PAGE_SHIFT));
621 return p;
622 }
623 else
624 #endif
625 {
626 p = alloc_domheap_page(d);
627 // zero out pages for security reasons
628 if (p)
629 clear_page(page_to_virt(p));
630 }
631 if (unlikely(!p)) {
632 printf("assign_new_domain_page: Can't alloc!!!! Aaaargh!\n");
633 return(p);
634 }
635 maddr = page_to_maddr (p);
636 if (unlikely(maddr > __get_cpu_var(vhpt_paddr)
637 && maddr < __get_cpu_var(vhpt_pend))) {
638 /* FIXME: how can this happen ?
639 vhpt is allocated by alloc_domheap_page. */
640 printf("assign_new_domain_page: reassigned vhpt page %lx!!\n",
641 maddr);
642 }
644 set_pte(pte, pfn_pte(maddr >> PAGE_SHIFT,
645 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
647 //XXX CONFIG_XEN_IA64_DOM0_VP
648 // TODO racy
649 if ((mpaddr & GPFN_IO_MASK) == GPFN_MEM)
650 set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT);
651 return p;
652 }
654 struct page_info *
655 assign_new_domain_page(struct domain *d, unsigned long mpaddr)
656 {
657 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
658 pte_t dummy_pte = __pte(0);
659 return __assign_new_domain_page(d, mpaddr, &dummy_pte);
660 #else
661 struct page_info *p = NULL;
662 pte_t *pte;
664 pte = lookup_alloc_domain_pte(d, mpaddr);
665 if (pte_none(*pte)) {
666 p = __assign_new_domain_page(d, mpaddr, pte);
667 } else {
668 DPRINTK("%s: d 0x%p mpaddr %lx already mapped!\n",
669 __func__, d, mpaddr);
670 }
672 return p;
673 #endif
674 }
676 void
677 assign_new_domain0_page(struct domain *d, unsigned long mpaddr)
678 {
679 #ifndef CONFIG_DOMAIN0_CONTIGUOUS
680 pte_t *pte;
682 BUG_ON(d != dom0);
683 pte = lookup_alloc_domain_pte(d, mpaddr);
684 if (pte_none(*pte)) {
685 struct page_info *p = __assign_new_domain_page(d, mpaddr, pte);
686 if (p == NULL) {
687 panic("%s: can't allocate page for dom0", __func__);
688 }
689 }
690 #endif
691 }
693 /* map a physical address to the specified metaphysical addr */
694 void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr)
695 {
696 pte_t *pte;
698 pte = lookup_alloc_domain_pte(d, mpaddr);
699 if (pte_none(*pte)) {
700 set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT,
701 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
703 //XXX CONFIG_XEN_IA64_DOM0_VP
704 // TODO racy
705 if ((mpaddr & GPFN_IO_MASK) == GPFN_MEM)
706 set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
707 }
708 else printk("assign_domain_page: mpaddr %lx already mapped!\n",mpaddr);
709 }
711 #ifdef CONFIG_XEN_IA64_DOM0_VP
712 static void
713 assign_domain_same_page(struct domain *d,
714 unsigned long mpaddr, unsigned long size)
715 {
716 //XXX optimization
717 unsigned long end = mpaddr + size;
718 for (; mpaddr < end; mpaddr += PAGE_SIZE) {
719 assign_domain_page(d, mpaddr, mpaddr);
720 }
721 }
723 unsigned long
724 assign_domain_mmio_page(struct domain *d,
725 unsigned long mpaddr, unsigned long size)
726 {
727 if (size == 0) {
728 DPRINTK("%s: domain %p mpaddr 0x%lx size = 0x%lx\n",
729 __func__, d, mpaddr, size);
730 }
731 assign_domain_same_page(d, mpaddr, size);
732 return mpaddr;
733 }
735 unsigned long
736 assign_domain_mach_page(struct domain *d,
737 unsigned long mpaddr, unsigned long size)
738 {
739 assign_domain_same_page(d, mpaddr, size);
740 return mpaddr;
741 }
743 //XXX selege hammer.
744 // flush finer range.
745 void
746 domain_page_flush(struct domain* d, unsigned long mpaddr,
747 unsigned long old_mfn, unsigned long new_mfn)
748 {
749 struct vcpu* v;
750 //XXX SMP
751 for_each_vcpu(d, v) {
752 vcpu_purge_tr_entry(&v->arch.dtlb);
753 vcpu_purge_tr_entry(&v->arch.itlb);
754 }
756 // flush vhpt
757 vhpt_flush();
758 // flush tlb
759 flush_tlb_all();
760 }
762 static void
763 zap_domain_page_one(struct domain *d, unsigned long mpaddr)
764 {
765 struct mm_struct *mm = d->arch.mm;
766 pte_t *pte;
767 pte_t old_pte;
768 unsigned long mfn;
769 struct page_info *page;
771 pte = lookup_noalloc_domain_pte_none(d, mpaddr);
772 if (pte == NULL)
773 return;
774 if (pte_none(*pte))
775 return;
777 // update pte
778 old_pte = ptep_get_and_clear(mm, mpaddr, pte);
779 mfn = pte_pfn(old_pte);
780 page = mfn_to_page(mfn);
782 if (page_get_owner(page) == d) {
783 BUG_ON(get_gpfn_from_mfn(mfn) != (mpaddr >> PAGE_SHIFT));
784 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
785 }
787 domain_page_flush(d, mpaddr, mfn, INVALID_MFN);
789 put_page(page);
790 }
791 #endif
793 void build_physmap_table(struct domain *d)
794 {
795 struct list_head *list_ent = d->page_list.next;
796 unsigned long mfn, i = 0;
798 ASSERT(!d->arch.physmap_built);
799 while(list_ent != &d->page_list) {
800 mfn = page_to_mfn(list_entry(
801 list_ent, struct page_info, list));
802 assign_domain_page(d, i << PAGE_SHIFT, mfn << PAGE_SHIFT);
804 i++;
805 list_ent = mfn_to_page(mfn)->list.next;
806 }
807 d->arch.physmap_built = 1;
808 }
810 void mpafoo(unsigned long mpaddr)
811 {
812 extern unsigned long privop_trace;
813 if (mpaddr == 0x3800)
814 privop_trace = 1;
815 }
817 #ifdef CONFIG_XEN_IA64_DOM0_VP
818 unsigned long
819 ____lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
820 {
821 pte_t *pte;
823 pte = lookup_noalloc_domain_pte(d, mpaddr);
824 if (pte == NULL)
825 goto not_present;
827 if (pte_present(*pte))
828 return (pte->pte & _PFN_MASK);
829 else if (VMX_DOMAIN(d->vcpu[0]))
830 return GPFN_INV_MASK;
832 not_present:
833 return INVALID_MFN;
834 }
836 unsigned long
837 __lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
838 {
839 unsigned long machine = ____lookup_domain_mpa(d, mpaddr);
840 if (machine != INVALID_MFN)
841 return machine;
843 printk("%s: d 0x%p id %d current 0x%p id %d\n",
844 __func__, d, d->domain_id, current, current->vcpu_id);
845 printk("%s: bad mpa 0x%lx (max_pages 0x%lx)\n",
846 __func__, mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
847 return INVALID_MFN;
848 }
849 #endif
851 unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
852 {
853 pte_t *pte;
855 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
856 if (d == dom0) {
857 pte_t pteval;
858 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
859 //printk("lookup_domain_mpa: bad dom0 mpaddr 0x%lx!\n",mpaddr);
860 //printk("lookup_domain_mpa: start=0x%lx,end=0x%lx!\n",dom0_start,dom0_start+dom0_size);
861 mpafoo(mpaddr);
862 }
863 pteval = pfn_pte(mpaddr >> PAGE_SHIFT,
864 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX));
865 pte = &pteval;
866 return *(unsigned long *)pte;
867 }
868 #endif
869 pte = lookup_noalloc_domain_pte(d, mpaddr);
870 if (pte != NULL) {
871 if (pte_present(*pte)) {
872 //printk("lookup_domain_page: found mapping for %lx, pte=%lx\n",mpaddr,pte_val(*pte));
873 return *(unsigned long *)pte;
874 } else if (VMX_DOMAIN(d->vcpu[0]))
875 return GPFN_INV_MASK;
876 }
878 printk("%s: d 0x%p id %d current 0x%p id %d\n",
879 __func__, d, d->domain_id, current, current->vcpu_id);
880 if ((mpaddr >> PAGE_SHIFT) < d->max_pages)
881 printk("%s: non-allocated mpa 0x%lx (< 0x%lx)\n", __func__,
882 mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
883 else
884 printk("%s: bad mpa 0x%lx (=> 0x%lx)\n", __func__,
885 mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
886 mpafoo(mpaddr);
887 return 0;
888 }
890 /* Flush cache of domain d. */
891 void domain_cache_flush (struct domain *d, int sync_only)
892 {
893 struct mm_struct *mm = d->arch.mm;
894 pgd_t *pgd = mm->pgd;
895 unsigned long maddr;
896 int i,j,k, l;
897 int nbr_page = 0;
898 void (*flush_func)(unsigned long start, unsigned long end);
899 extern void flush_dcache_range (unsigned long, unsigned long);
901 if (sync_only)
902 flush_func = &flush_icache_range;
903 else
904 flush_func = &flush_dcache_range;
906 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
907 if (d == dom0) {
908 /* This is not fully correct (because of hole), but it should
909 be enough for now. */
910 (*flush_func)(__va_ul (dom0_start),
911 __va_ul (dom0_start + dom0_size));
912 return;
913 }
914 #endif
915 for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
916 pud_t *pud;
917 if (!pgd_present(*pgd))
918 continue;
919 pud = pud_offset(pgd, 0);
920 for (j = 0; j < PTRS_PER_PUD; pud++, j++) {
921 pmd_t *pmd;
922 if (!pud_present(*pud))
923 continue;
924 pmd = pmd_offset(pud, 0);
925 for (k = 0; k < PTRS_PER_PMD; pmd++, k++) {
926 pte_t *pte;
927 if (!pmd_present(*pmd))
928 continue;
929 pte = pte_offset_map(pmd, 0);
930 for (l = 0; l < PTRS_PER_PTE; pte++, l++) {
931 if (!pte_present(*pte))
932 continue;
933 /* Convert PTE to maddr. */
934 maddr = __va_ul (pte_val(*pte)
935 & _PAGE_PPN_MASK);
936 (*flush_func)(maddr, maddr+ PAGE_SIZE);
937 nbr_page++;
938 }
939 }
940 }
941 }
942 //printf ("domain_cache_flush: %d %d pages\n", d->domain_id, nbr_page);
943 }
945 // FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE
946 #if 1
947 unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
948 {
949 unsigned long pte = lookup_domain_mpa(d,mpaddr);
950 unsigned long imva;
952 pte &= _PAGE_PPN_MASK;
953 imva = (unsigned long) __va(pte);
954 imva |= mpaddr & ~PAGE_MASK;
955 return(imva);
956 }
957 #else
958 unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
959 {
960 unsigned long imva = __gpa_to_mpa(d, mpaddr);
962 return __va(imva);
963 }
964 #endif
966 // remove following line if not privifying in memory
967 //#define HAVE_PRIVIFY_MEMORY
968 #ifndef HAVE_PRIVIFY_MEMORY
969 #define privify_memory(x,y) do {} while(0)
970 #endif
972 // see arch/x86/xxx/domain_build.c
973 int elf_sanity_check(Elf_Ehdr *ehdr)
974 {
975 if (!(IS_ELF(*ehdr)))
976 {
977 printk("DOM0 image is not a Xen-compatible Elf image.\n");
978 return 0;
979 }
980 return 1;
981 }
983 static void copy_memory(void *dst, void *src, int size)
984 {
985 int remain;
987 if (IS_XEN_ADDRESS(dom0,(unsigned long) src)) {
988 memcpy(dst,src,size);
989 }
990 else {
991 printf("About to call __copy_from_user(%p,%p,%d)\n",
992 dst,src,size);
993 while ((remain = __copy_from_user(dst,src,size)) != 0) {
994 printf("incomplete user copy, %d remain of %d\n",
995 remain,size);
996 dst += size - remain; src += size - remain;
997 size -= remain;
998 }
999 }
1002 static void loaddomainelfimage(struct domain *d, unsigned long image_start)
1004 char *elfbase = (char *) image_start;
1005 //Elf_Ehdr *ehdr = (Elf_Ehdr *)image_start;
1006 Elf_Ehdr ehdr;
1007 Elf_Phdr phdr;
1008 int h, filesz, memsz;
1009 unsigned long elfaddr, dom_mpaddr, dom_imva;
1010 struct page_info *p;
1012 copy_memory(&ehdr, (void *) image_start, sizeof(Elf_Ehdr));
1013 for ( h = 0; h < ehdr.e_phnum; h++ ) {
1014 copy_memory(&phdr,
1015 elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize),
1016 sizeof(Elf_Phdr));
1017 if ((phdr.p_type != PT_LOAD))
1018 continue;
1020 filesz = phdr.p_filesz;
1021 memsz = phdr.p_memsz;
1022 elfaddr = (unsigned long) elfbase + phdr.p_offset;
1023 dom_mpaddr = phdr.p_paddr;
1025 //printf("p_offset: %x, size=%x\n",elfaddr,filesz);
1026 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
1027 if (d == dom0) {
1028 if (dom_mpaddr+memsz>dom0_size)
1029 panic("Dom0 doesn't fit in memory space!\n");
1030 dom_imva = __va_ul(dom_mpaddr + dom0_start);
1031 copy_memory((void *)dom_imva, (void *)elfaddr, filesz);
1032 if (memsz > filesz)
1033 memset((void *)dom_imva+filesz, 0,
1034 memsz-filesz);
1035 //FIXME: This test for code seems to find a lot more than objdump -x does
1036 if (phdr.p_flags & PF_X) {
1037 privify_memory(dom_imva,filesz);
1038 flush_icache_range (dom_imva, dom_imva+filesz);
1041 else
1042 #endif
1043 while (memsz > 0) {
1044 p = assign_new_domain_page(d,dom_mpaddr);
1045 BUG_ON (unlikely(p == NULL));
1046 dom_imva = __va_ul(page_to_maddr(p));
1047 if (filesz > 0) {
1048 if (filesz >= PAGE_SIZE)
1049 copy_memory((void *) dom_imva,
1050 (void *) elfaddr,
1051 PAGE_SIZE);
1052 else {
1053 // copy partial page
1054 copy_memory((void *) dom_imva,
1055 (void *) elfaddr, filesz);
1056 // zero the rest of page
1057 memset((void *) dom_imva+filesz, 0,
1058 PAGE_SIZE-filesz);
1060 //FIXME: This test for code seems to find a lot more than objdump -x does
1061 if (phdr.p_flags & PF_X) {
1062 privify_memory(dom_imva,PAGE_SIZE);
1063 flush_icache_range(dom_imva,
1064 dom_imva+PAGE_SIZE);
1067 else if (memsz > 0) {
1068 /* always zero out entire page */
1069 memset((void *) dom_imva, 0, PAGE_SIZE);
1071 memsz -= PAGE_SIZE;
1072 filesz -= PAGE_SIZE;
1073 elfaddr += PAGE_SIZE;
1074 dom_mpaddr += PAGE_SIZE;
1079 void alloc_dom0(void)
1081 if (platform_is_hp_ski()) {
1082 dom0_size = 128*1024*1024; //FIXME: Should be configurable
1084 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
1085 printf("alloc_dom0: starting (initializing %lu MB...)\n",dom0_size/(1024*1024));
1087 /* FIXME: The first trunk (say 256M) should always be assigned to
1088 * Dom0, since Dom0's physical == machine address for DMA purpose.
1089 * Some old version linux, like 2.4, assumes physical memory existing
1090 * in 2nd 64M space.
1091 */
1092 dom0_start = alloc_boot_pages(dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT);
1093 dom0_start <<= PAGE_SHIFT;
1094 if (!dom0_start) {
1095 printf("alloc_dom0: can't allocate contiguous memory size=%lu\n",
1096 dom0_size);
1097 while(1);
1099 printf("alloc_dom0: dom0_start=0x%lx\n", dom0_start);
1100 #else
1101 // no need to allocate pages for now
1102 // pages are allocated by map_new_domain_page() via loaddomainelfimage()
1103 dom0_start = 0;
1104 #endif
1109 /*
1110 * Domain 0 has direct access to all devices absolutely. However
1111 * the major point of this stub here, is to allow alloc_dom_mem
1112 * handled with order > 0 request. Dom0 requires that bit set to
1113 * allocate memory for other domains.
1114 */
1115 static void physdev_init_dom0(struct domain *d)
1117 if (iomem_permit_access(d, 0UL, ~0UL))
1118 BUG();
1119 if (irqs_permit_access(d, 0, NR_PIRQS-1))
1120 BUG();
1123 static unsigned int vmx_dom0 = 0;
1124 int construct_dom0(struct domain *d,
1125 unsigned long image_start, unsigned long image_len,
1126 unsigned long initrd_start, unsigned long initrd_len,
1127 char *cmdline)
1129 int i, rc;
1130 unsigned long alloc_start, alloc_end;
1131 start_info_t *si;
1132 struct vcpu *v = d->vcpu[0];
1133 unsigned long max_pages;
1135 struct domain_setup_info dsi;
1136 unsigned long p_start;
1137 unsigned long pkern_start;
1138 unsigned long pkern_entry;
1139 unsigned long pkern_end;
1140 unsigned long pinitrd_start = 0;
1141 unsigned long pstart_info;
1142 struct page_info *start_info_page;
1144 #ifdef VALIDATE_VT
1145 unsigned long mfn;
1146 struct page_info *page = NULL;
1147 #endif
1149 //printf("construct_dom0: starting\n");
1151 /* Sanity! */
1152 BUG_ON(d != dom0);
1153 BUG_ON(d->vcpu[0] == NULL);
1154 BUG_ON(test_bit(_VCPUF_initialised, &v->vcpu_flags));
1156 memset(&dsi, 0, sizeof(struct domain_setup_info));
1158 printk("*** LOADING DOMAIN 0 ***\n");
1160 alloc_start = dom0_start;
1161 alloc_end = dom0_start + dom0_size;
1162 max_pages = dom0_size / PAGE_SIZE;
1163 d->max_pages = max_pages;
1164 #ifndef CONFIG_XEN_IA64_DOM0_VP
1165 d->tot_pages = d->max_pages;
1166 #else
1167 d->tot_pages = 0;
1168 #endif
1169 dsi.image_addr = (unsigned long)image_start;
1170 dsi.image_len = image_len;
1171 rc = parseelfimage(&dsi);
1172 if ( rc != 0 )
1173 return rc;
1175 #ifdef VALIDATE_VT
1176 /* Temp workaround */
1177 if (running_on_sim)
1178 dsi.xen_section_string = (char *)1;
1180 /* Check whether dom0 is vti domain */
1181 if ((!vmx_enabled) && !dsi.xen_section_string) {
1182 printk("Lack of hardware support for unmodified vmx dom0\n");
1183 panic("");
1186 if (vmx_enabled && !dsi.xen_section_string) {
1187 printk("Dom0 is vmx domain!\n");
1188 vmx_dom0 = 1;
1190 #endif
1192 p_start = dsi.v_start;
1193 pkern_start = dsi.v_kernstart;
1194 pkern_end = dsi.v_kernend;
1195 pkern_entry = dsi.v_kernentry;
1197 //printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry);
1199 if ( (p_start & (PAGE_SIZE-1)) != 0 )
1201 printk("Initial guest OS must load to a page boundary.\n");
1202 return -EINVAL;
1205 pstart_info = PAGE_ALIGN(pkern_end);
1206 if(initrd_start && initrd_len){
1207 unsigned long offset;
1209 pinitrd_start= (dom0_start + dom0_size) -
1210 (PAGE_ALIGN(initrd_len) + 4*1024*1024);
1211 if (pinitrd_start <= pstart_info)
1212 panic("%s:enough memory is not assigned to dom0", __func__);
1214 for (offset = 0; offset < initrd_len; offset += PAGE_SIZE) {
1215 struct page_info *p;
1216 p = assign_new_domain_page(d, pinitrd_start + offset);
1217 if (p == NULL)
1218 panic("%s: can't allocate page for initrd image", __func__);
1219 if (initrd_len < offset + PAGE_SIZE)
1220 memcpy(page_to_virt(p), (void*)(initrd_start + offset),
1221 initrd_len - offset);
1222 else
1223 copy_page(page_to_virt(p), (void*)(initrd_start + offset));
1227 printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
1228 " Kernel image: %lx->%lx\n"
1229 " Entry address: %lx\n"
1230 " Init. ramdisk: %lx len %lx\n"
1231 " Start info.: %lx->%lx\n",
1232 pkern_start, pkern_end, pkern_entry, pinitrd_start, initrd_len,
1233 pstart_info, pstart_info + PAGE_SIZE);
1235 if ( (pkern_end - pkern_start) > (max_pages * PAGE_SIZE) )
1237 printk("Initial guest OS requires too much space\n"
1238 "(%luMB is greater than %luMB limit)\n",
1239 (pkern_end-pkern_start)>>20,
1240 (max_pages <<PAGE_SHIFT)>>20);
1241 return -ENOMEM;
1244 // if high 3 bits of pkern start are non-zero, error
1246 // if pkern end is after end of metaphysical memory, error
1247 // (we should be able to deal with this... later)
1249 /* Mask all upcalls... */
1250 for ( i = 1; i < MAX_VIRT_CPUS; i++ )
1251 d->shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
1253 if (dom0_max_vcpus == 0)
1254 dom0_max_vcpus = MAX_VIRT_CPUS;
1255 if (dom0_max_vcpus > num_online_cpus())
1256 dom0_max_vcpus = num_online_cpus();
1257 if (dom0_max_vcpus > MAX_VIRT_CPUS)
1258 dom0_max_vcpus = MAX_VIRT_CPUS;
1260 printf ("Dom0 max_vcpus=%d\n", dom0_max_vcpus);
1261 for ( i = 1; i < dom0_max_vcpus; i++ )
1262 if (alloc_vcpu(d, i, i) == NULL)
1263 printf ("Cannot allocate dom0 vcpu %d\n", i);
1265 #if defined(VALIDATE_VT) && !defined(CONFIG_XEN_IA64_DOM0_VP)
1266 /* Construct a frame-allocation list for the initial domain, since these
1267 * pages are allocated by boot allocator and pfns are not set properly
1268 */
1269 for ( mfn = (alloc_start>>PAGE_SHIFT);
1270 mfn < (alloc_end>>PAGE_SHIFT);
1271 mfn++ )
1273 page = mfn_to_page(mfn);
1274 page_set_owner(page, d);
1275 page->u.inuse.type_info = 0;
1276 page->count_info = PGC_allocated | 1;
1277 list_add_tail(&page->list, &d->page_list);
1279 /* Construct 1:1 mapping */
1280 set_gpfn_from_mfn(mfn, mfn);
1282 #endif
1284 /* Copy the OS image. */
1285 loaddomainelfimage(d,image_start);
1287 /* Copy the initial ramdisk. */
1288 //if ( initrd_len != 0 )
1289 // memcpy((void *)vinitrd_start, initrd_start, initrd_len);
1292 /* Set up start info area. */
1293 d->shared_info->arch.start_info_pfn = pstart_info >> PAGE_SHIFT;
1294 start_info_page = assign_new_domain_page(d, pstart_info);
1295 if (start_info_page == NULL)
1296 panic("can't allocate start info page");
1297 si = page_to_virt(start_info_page);
1298 memset(si, 0, PAGE_SIZE);
1299 sprintf(si->magic, "xen-%i.%i-ia64", XEN_VERSION, XEN_SUBVERSION);
1300 si->nr_pages = max_pages;
1302 /* Give up the VGA console if DOM0 is configured to grab it. */
1303 if (cmdline != NULL)
1304 console_endboot(strstr(cmdline, "tty0") != NULL);
1306 /* VMX specific construction for Dom0, if hardware supports VMX
1307 * and Dom0 is unmodified image
1308 */
1309 printk("Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d);
1310 if (vmx_dom0)
1311 vmx_final_setup_guest(v);
1313 set_bit(_VCPUF_initialised, &v->vcpu_flags);
1315 new_thread(v, pkern_entry, 0, 0);
1316 physdev_init_dom0(d);
1318 // dom0 doesn't need build_physmap_table()
1319 // see arch_set_info_guest()
1320 // instead we allocate pages manually.
1321 for (i = 0; i < max_pages; i++) {
1322 assign_new_domain0_page(d, i << PAGE_SHIFT);
1324 d->arch.physmap_built = 1;
1326 // FIXME: Hack for keyboard input
1327 //serial_input_init();
1329 return 0;
1332 void machine_restart(char * __unused)
1334 if (platform_is_hp_ski()) dummy();
1335 printf("machine_restart called: spinning....\n");
1336 while(1);
1339 void machine_halt(void)
1341 if (platform_is_hp_ski()) dummy();
1342 printf("machine_halt called: spinning....\n");
1343 while(1);
1346 void dummy_called(char *function)
1348 if (platform_is_hp_ski()) asm("break 0;;");
1349 printf("dummy called in %s: spinning....\n", function);
1350 while(1);
1354 #if 0
1355 void switch_to(struct vcpu *prev, struct vcpu *next)
1357 struct vcpu *last;
1359 __switch_to(prev,next,last);
1360 //set_current(next);
1362 #endif
1364 void domain_pend_keyboard_interrupt(int irq)
1366 vcpu_pend_interrupt(dom0->vcpu[0],irq);
1369 void sync_vcpu_execstate(struct vcpu *v)
1371 ia64_save_fpu(v->arch._thread.fph);
1372 if (VMX_DOMAIN(v))
1373 vmx_save_state(v);
1374 else {
1375 if (IA64_HAS_EXTRA_STATE(v))
1376 ia64_save_extra(v);
1378 // FIXME SMP: Anything else needed here for SMP?
1381 // FIXME: It would be nice to print out a nice error message for bad
1382 // values of these boot-time parameters, but it seems we are too early
1383 // in the boot and attempts to print freeze the system?
1384 #define abort(x...) do {} while(0)
1385 #define warn(x...) do {} while(0)
1387 static void parse_dom0_mem(char *s)
1389 unsigned long bytes = parse_size_and_unit(s);
1391 if (dom0_size < 4 * 1024 * 1024) {
1392 abort("parse_dom0_mem: too small, boot aborted"
1393 " (try e.g. dom0_mem=256M or dom0_mem=65536K)\n");
1395 if (dom0_size % dom0_align) {
1396 dom0_size = ((dom0_size / dom0_align) + 1) * dom0_align;
1397 warn("parse_dom0_mem: dom0_size rounded up from"
1398 " %lx to %lx bytes, due to dom0_align=%lx\n",
1399 bytes,dom0_size,dom0_align);
1401 else dom0_size = bytes;
1403 custom_param("dom0_mem", parse_dom0_mem);
1406 static void parse_dom0_align(char *s)
1408 unsigned long bytes = parse_size_and_unit(s);
1410 if ((bytes - 1) ^ bytes) { /* not a power of two */
1411 abort("parse_dom0_align: dom0_align must be power of two, "
1412 "boot aborted"
1413 " (try e.g. dom0_align=256M or dom0_align=65536K)\n");
1415 else if (bytes < PAGE_SIZE) {
1416 abort("parse_dom0_align: dom0_align must be >= %ld, "
1417 "boot aborted"
1418 " (try e.g. dom0_align=256M or dom0_align=65536K)\n",
1419 PAGE_SIZE);
1421 else dom0_align = bytes;
1422 if (dom0_size % dom0_align) {
1423 dom0_size = (dom0_size / dom0_align + 1) * dom0_align;
1424 warn("parse_dom0_align: dom0_size rounded up from"
1425 " %ld to %ld bytes, due to dom0_align=%lx\n",
1426 bytes,dom0_size,dom0_align);
1429 custom_param("dom0_align", parse_dom0_align);