ia64/xen-unstable

view xen/arch/ia64/xen/domain.c @ 9770:ced37bea0647

[IA64] FPH enabling + cleanup

Move contents of switch_to macro from xensystem.h to context_switch function.
Initialize FPU on all processors. FPH is always enabled in Xen.
Speed up context-switch (a little bit!) by not enabling/disabling FPH.
Cleanup (unused function/variablesi/fields, debug printf...)
vmx_ia64_switch_to removed (was unused).

Signed-off-by: Tristan Gingold <tristan.gingold@bull.net>
author awilliam@xenbuild.aw
date Tue Apr 25 22:35:41 2006 -0600 (2006-04-25)
parents 7a9a00c51588
children fcfc614d3713
line source
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * Copyright (C) 2005 Intel Co
8 * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
9 *
10 * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add VTI domain support
11 */
13 #include <xen/config.h>
14 #include <xen/init.h>
15 #include <xen/lib.h>
16 #include <xen/errno.h>
17 #include <xen/sched.h>
18 #include <xen/smp.h>
19 #include <xen/delay.h>
20 #include <xen/softirq.h>
21 #include <xen/mm.h>
22 #include <xen/iocap.h>
23 #include <asm/ptrace.h>
24 #include <asm/system.h>
25 #include <asm/io.h>
26 #include <asm/processor.h>
27 #include <asm/desc.h>
28 #include <asm/hw_irq.h>
29 #include <asm/setup.h>
30 //#include <asm/mpspec.h>
31 #include <xen/irq.h>
32 #include <xen/event.h>
33 //#include <xen/shadow.h>
34 #include <xen/console.h>
35 #include <xen/compile.h>
37 #include <xen/elf.h>
38 //#include <asm/page.h>
39 #include <asm/pgalloc.h>
41 #include <asm/offsets.h> /* for IA64_THREAD_INFO_SIZE */
43 #include <asm/vcpu.h> /* for function declarations */
44 #include <public/arch-ia64.h>
45 #include <asm/vmx.h>
46 #include <asm/vmx_vcpu.h>
47 #include <asm/vmx_vpd.h>
48 #include <asm/vmx_phy_mode.h>
49 #include <asm/pal.h>
50 #include <asm/vhpt.h>
51 #include <public/hvm/ioreq.h>
52 #include <public/arch-ia64.h>
53 #include <asm/tlbflush.h>
54 #include <asm/regionreg.h>
55 #include <asm/dom_fw.h>
57 #ifndef CONFIG_XEN_IA64_DOM0_VP
58 #define CONFIG_DOMAIN0_CONTIGUOUS
59 #endif
60 unsigned long dom0_start = -1L;
61 unsigned long dom0_size = 512*1024*1024;
62 unsigned long dom0_align = 64*1024*1024;
64 /* dom0_max_vcpus: maximum number of VCPUs to create for dom0. */
65 static unsigned int dom0_max_vcpus = 1;
66 integer_param("dom0_max_vcpus", dom0_max_vcpus);
68 // initialized by arch/ia64/setup.c:find_initrd()
69 unsigned long initrd_start = 0, initrd_end = 0;
70 extern unsigned long running_on_sim;
72 #define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend))
74 /* FIXME: where these declarations should be there ? */
75 extern long platform_is_hp_ski(void);
76 extern void serial_input_init(void);
77 static void init_switch_stack(struct vcpu *v);
78 void build_physmap_table(struct domain *d);
80 /* this belongs in include/asm, but there doesn't seem to be a suitable place */
81 void arch_domain_destroy(struct domain *d)
82 {
83 struct page_info *page;
84 struct list_head *ent, *prev;
86 if (d->arch.mm->pgd != NULL)
87 {
88 list_for_each ( ent, &d->arch.mm->pt_list )
89 {
90 page = list_entry(ent, struct page_info, list);
91 prev = ent->prev;
92 list_del(ent);
93 free_xenheap_page(page_to_virt(page));
94 ent = prev;
95 }
96 pgd_free(d->arch.mm->pgd);
97 }
98 if (d->arch.mm != NULL)
99 xfree(d->arch.mm);
100 if (d->shared_info != NULL)
101 free_xenheap_page(d->shared_info);
103 deallocate_rid_range(d);
105 /* It is really good in this? */
106 flush_tlb_all();
108 /* It is really good in this? */
109 vhpt_flush_all();
110 }
112 static void default_idle(void)
113 {
114 int cpu = smp_processor_id();
115 local_irq_disable();
116 if ( !softirq_pending(cpu))
117 safe_halt();
118 local_irq_enable();
119 }
121 static void continue_cpu_idle_loop(void)
122 {
123 int cpu = smp_processor_id();
124 for ( ; ; )
125 {
126 #ifdef IA64
127 // __IRQ_STAT(cpu, idle_timestamp) = jiffies
128 #else
129 irq_stat[cpu].idle_timestamp = jiffies;
130 #endif
131 while ( !softirq_pending(cpu) )
132 default_idle();
133 add_preempt_count(SOFTIRQ_OFFSET);
134 raise_softirq(SCHEDULE_SOFTIRQ);
135 do_softirq();
136 sub_preempt_count(SOFTIRQ_OFFSET);
137 }
138 }
140 void startup_cpu_idle_loop(void)
141 {
142 /* Just some sanity to ensure that the scheduler is set up okay. */
143 ASSERT(current->domain == IDLE_DOMAIN_ID);
144 raise_softirq(SCHEDULE_SOFTIRQ);
146 continue_cpu_idle_loop();
147 }
149 struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
150 {
151 struct vcpu *v;
152 struct thread_info *ti;
154 /* Still keep idle vcpu0 static allocated at compilation, due
155 * to some code from Linux still requires it in early phase.
156 */
157 if (is_idle_domain(d) && !vcpu_id)
158 v = idle_vcpu[0];
159 else {
160 if ((v = alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER)) == NULL)
161 return NULL;
162 memset(v, 0, sizeof(*v));
164 ti = alloc_thread_info(v);
165 /* Clear thread_info to clear some important fields, like
166 * preempt_count
167 */
168 memset(ti, 0, sizeof(struct thread_info));
169 init_switch_stack(v);
170 }
172 if (!is_idle_domain(d)) {
173 v->arch.privregs =
174 alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
175 BUG_ON(v->arch.privregs == NULL);
176 memset(v->arch.privregs, 0, PAGE_SIZE);
178 if (!vcpu_id)
179 memset(&d->shared_info->evtchn_mask[0], 0xff,
180 sizeof(d->shared_info->evtchn_mask));
182 v->vcpu_info = &(d->shared_info->vcpu_info[0]);
183 v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
184 v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4;
185 v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
186 v->arch.metaphysical_saved_rr4 = d->arch.metaphysical_rr4;
188 /* Is it correct ?
189 It depends on the domain rid usage.
191 A domain may share rid among its processor (eg having a
192 global VHPT). In this case, we should also share rid
193 among vcpus and the rid range should be the same.
195 However a domain may have per cpu rid allocation. In
196 this case we don't want to share rid among vcpus, but we may
197 do it if two vcpus are on the same cpu... */
199 v->arch.starting_rid = d->arch.starting_rid;
200 v->arch.ending_rid = d->arch.ending_rid;
201 v->arch.breakimm = d->arch.breakimm;
202 }
204 return v;
205 }
207 void free_vcpu_struct(struct vcpu *v)
208 {
209 if (VMX_DOMAIN(v))
210 vmx_relinquish_vcpu_resources(v);
211 else {
212 if (v->arch.privregs != NULL)
213 free_xenheap_pages(v->arch.privregs, get_order(sizeof(mapped_regs_t)));
214 }
216 free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
217 }
219 static void init_switch_stack(struct vcpu *v)
220 {
221 struct pt_regs *regs = vcpu_regs (v);
222 struct switch_stack *sw = (struct switch_stack *) regs - 1;
223 extern void ia64_ret_from_clone;
225 memset(sw, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs));
226 sw->ar_bspstore = (unsigned long)v + IA64_RBS_OFFSET;
227 sw->b0 = (unsigned long) &ia64_ret_from_clone;
228 sw->ar_fpsr = FPSR_DEFAULT;
229 v->arch._thread.ksp = (unsigned long) sw - 16;
230 // stay on kernel stack because may get interrupts!
231 // ia64_ret_from_clone (which b0 gets in new_thread) switches
232 // to user stack
233 v->arch._thread.on_ustack = 0;
234 memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
235 }
237 int arch_domain_create(struct domain *d)
238 {
239 // the following will eventually need to be negotiated dynamically
240 d->xen_vastart = XEN_START_ADDR;
241 d->xen_vaend = XEN_END_ADDR;
242 d->shared_info_va = SHAREDINFO_ADDR;
244 if (is_idle_domain(d))
245 return 0;
247 if ((d->shared_info = (void *)alloc_xenheap_page()) == NULL)
248 goto fail_nomem;
249 memset(d->shared_info, 0, PAGE_SIZE);
251 d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
252 /* We may also need emulation rid for region4, though it's unlikely
253 * to see guest issue uncacheable access in metaphysical mode. But
254 * keep such info here may be more sane.
255 */
256 if (!allocate_rid_range(d,0))
257 goto fail_nomem;
258 d->arch.breakimm = 0x1000;
259 d->arch.sys_pgnr = 0;
261 if ((d->arch.mm = xmalloc(struct mm_struct)) == NULL)
262 goto fail_nomem;
263 memset(d->arch.mm, 0, sizeof(*d->arch.mm));
264 INIT_LIST_HEAD(&d->arch.mm->pt_list);
266 d->arch.physmap_built = 0;
267 if ((d->arch.mm->pgd = pgd_alloc(d->arch.mm)) == NULL)
268 goto fail_nomem;
270 printf ("arch_domain_create: domain=%p\n", d);
271 return 0;
273 fail_nomem:
274 if (d->arch.mm->pgd != NULL)
275 pgd_free(d->arch.mm->pgd);
276 if (d->arch.mm != NULL)
277 xfree(d->arch.mm);
278 if (d->shared_info != NULL)
279 free_xenheap_page(d->shared_info);
280 return -ENOMEM;
281 }
283 void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
284 {
285 struct pt_regs *regs = vcpu_regs (v);
287 c->regs = *regs;
288 c->vcpu.evtchn_vector = v->vcpu_info->arch.evtchn_vector;
290 c->shared = v->domain->shared_info->arch;
291 }
293 int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
294 {
295 struct pt_regs *regs = vcpu_regs (v);
296 struct domain *d = v->domain;
298 if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
299 return 0;
300 if (c->flags & VGCF_VMX_GUEST) {
301 if (!vmx_enabled) {
302 printk("No VMX hardware feature for vmx domain.\n");
303 return -EINVAL;
304 }
306 if (v == d->vcpu[0])
307 vmx_setup_platform(d, c);
309 vmx_final_setup_guest(v);
310 } else if (!d->arch.physmap_built)
311 build_physmap_table(d);
313 *regs = c->regs;
314 if (v == d->vcpu[0]) {
315 /* Only for first vcpu. */
316 d->arch.sys_pgnr = c->sys_pgnr;
317 d->arch.initrd_start = c->initrd.start;
318 d->arch.initrd_len = c->initrd.size;
319 d->arch.cmdline = c->cmdline;
320 d->shared_info->arch = c->shared;
322 /* Cache synchronization seems to be done by the linux kernel
323 during mmap/unmap operation. However be conservative. */
324 domain_cache_flush (d, 1);
325 }
326 new_thread(v, regs->cr_iip, 0, 0);
328 v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
329 if ( c->vcpu.privregs && copy_from_user(v->arch.privregs,
330 c->vcpu.privregs, sizeof(mapped_regs_t))) {
331 printk("Bad ctxt address in arch_set_info_guest: %p\n",
332 c->vcpu.privregs);
333 return -EFAULT;
334 }
336 v->arch.domain_itm_last = -1L;
338 /* Don't redo final setup */
339 set_bit(_VCPUF_initialised, &v->vcpu_flags);
340 return 0;
341 }
343 static void relinquish_memory(struct domain *d, struct list_head *list)
344 {
345 struct list_head *ent;
346 struct page_info *page;
347 #ifndef __ia64__
348 unsigned long x, y;
349 #endif
351 /* Use a recursive lock, as we may enter 'free_domheap_page'. */
352 spin_lock_recursive(&d->page_alloc_lock);
353 ent = list->next;
354 while ( ent != list )
355 {
356 page = list_entry(ent, struct page_info, list);
357 /* Grab a reference to the page so it won't disappear from under us. */
358 if ( unlikely(!get_page(page, d)) )
359 {
360 /* Couldn't get a reference -- someone is freeing this page. */
361 ent = ent->next;
362 continue;
363 }
365 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
366 put_page_and_type(page);
368 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
369 put_page(page);
371 #ifndef __ia64__
372 /*
373 * Forcibly invalidate base page tables at this point to break circular
374 * 'linear page table' references. This is okay because MMU structures
375 * are not shared across domains and this domain is now dead. Thus base
376 * tables are not in use so a non-zero count means circular reference.
377 */
378 y = page->u.inuse.type_info;
379 for ( ; ; )
380 {
381 x = y;
382 if ( likely((x & (PGT_type_mask|PGT_validated)) !=
383 (PGT_base_page_table|PGT_validated)) )
384 break;
386 y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
387 if ( likely(y == x) )
388 {
389 free_page_type(page, PGT_base_page_table);
390 break;
391 }
392 }
393 #endif
395 /* Follow the list chain and /then/ potentially free the page. */
396 ent = ent->next;
397 put_page(page);
398 }
400 spin_unlock_recursive(&d->page_alloc_lock);
401 }
403 void domain_relinquish_resources(struct domain *d)
404 {
405 /* Relinquish every page of memory. */
407 /* xenheap_list is not used in ia64. */
408 BUG_ON(!list_empty(&d->xenpage_list));
410 relinquish_memory(d, &d->page_list);
411 }
413 // heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread()
414 // and linux/arch/ia64/kernel/process.c:kernel_thread()
415 void new_thread(struct vcpu *v,
416 unsigned long start_pc,
417 unsigned long start_stack,
418 unsigned long start_info)
419 {
420 struct domain *d = v->domain;
421 struct pt_regs *regs;
422 extern char dom0_command_line[];
424 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
425 if (d == dom0 && v->vcpu_id == 0) start_pc += dom0_start;
426 #endif
428 regs = vcpu_regs (v);
429 if (VMX_DOMAIN(v)) {
430 /* dt/rt/it:1;i/ic:1, si:1, vm/bn:1, ac:1 */
431 regs->cr_ipsr = 0x501008826008; /* Need to be expanded as macro */
432 } else {
433 regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR)
434 | IA64_PSR_BITS_TO_SET | IA64_PSR_BN;
435 regs->cr_ipsr &= ~(IA64_PSR_BITS_TO_CLEAR
436 | IA64_PSR_RI | IA64_PSR_IS);
437 regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2
438 }
439 regs->cr_iip = start_pc;
440 regs->cr_ifs = 1UL << 63; /* or clear? */
441 regs->ar_fpsr = FPSR_DEFAULT;
443 if (VMX_DOMAIN(v)) {
444 vmx_init_all_rr(v);
445 if (d == dom0)
446 regs->r28 = dom_fw_setup(d,dom0_command_line,
447 COMMAND_LINE_SIZE);
448 /* Virtual processor context setup */
449 VCPU(v, vpsr) = IA64_PSR_BN;
450 VCPU(v, dcr) = 0;
451 } else {
452 init_all_rr(v);
453 if (v->vcpu_id == 0) {
454 /* Build the firmware. */
455 if (d == dom0)
456 regs->r28 = dom_fw_setup(d,dom0_command_line,
457 COMMAND_LINE_SIZE);
458 else {
459 const char *cmdline = d->arch.cmdline;
460 int len;
462 if (*cmdline == 0) {
463 #define DEFAULT_CMDLINE "nomca nosmp xencons=tty0 console=tty0 root=/dev/hda1"
464 cmdline = DEFAULT_CMDLINE;
465 len = sizeof (DEFAULT_CMDLINE);
466 printf("domU command line defaulted to"
467 DEFAULT_CMDLINE "\n");
468 }
469 else
470 len = IA64_COMMAND_LINE_SIZE;
472 regs->r28 = dom_fw_setup (d, cmdline, len);
473 }
474 d->shared_info->arch.flags = (d == dom0) ?
475 (SIF_INITDOMAIN|SIF_PRIVILEGED) : 0;
476 }
477 regs->ar_rsc |= (2 << 2); /* force PL2/3 */
478 VCPU(v, banknum) = 1;
479 VCPU(v, metaphysical_mode) = 1;
480 VCPU(v, interrupt_mask_addr) =
481 (uint64_t)SHAREDINFO_ADDR + INT_ENABLE_OFFSET(v);
482 VCPU(v, itv) = (1 << 16); /* timer vector masked */
483 }
484 }
486 static pte_t*
487 lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
488 {
489 struct page_info *pt;
490 struct mm_struct *mm = d->arch.mm;
491 pgd_t *pgd;
492 pud_t *pud;
493 pmd_t *pmd;
495 BUG_ON(mm->pgd == NULL);
496 pgd = pgd_offset(mm, mpaddr);
497 if (pgd_none(*pgd)) {
498 pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
499 pt = maddr_to_page(pgd_val(*pgd));
500 list_add_tail(&pt->list, &d->arch.mm->pt_list);
501 }
503 pud = pud_offset(pgd, mpaddr);
504 if (pud_none(*pud)) {
505 pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
506 pt = maddr_to_page(pud_val(*pud));
507 list_add_tail(&pt->list, &d->arch.mm->pt_list);
508 }
510 pmd = pmd_offset(pud, mpaddr);
511 if (pmd_none(*pmd)) {
512 pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm, mpaddr));
513 pt = maddr_to_page(pmd_val(*pmd));
514 list_add_tail(&pt->list, &d->arch.mm->pt_list);
515 }
517 return pte_offset_map(pmd, mpaddr);
518 }
520 //XXX xxx_none() should be used instread of !xxx_present()?
521 static pte_t*
522 lookup_noalloc_domain_pte(struct domain* d, unsigned long mpaddr)
523 {
524 struct mm_struct *mm = d->arch.mm;
525 pgd_t *pgd;
526 pud_t *pud;
527 pmd_t *pmd;
529 BUG_ON(mm->pgd == NULL);
530 pgd = pgd_offset(mm, mpaddr);
531 if (!pgd_present(*pgd))
532 goto not_present;
534 pud = pud_offset(pgd, mpaddr);
535 if (!pud_present(*pud))
536 goto not_present;
538 pmd = pmd_offset(pud, mpaddr);
539 if (!pmd_present(*pmd))
540 goto not_present;
542 return pte_offset_map(pmd, mpaddr);
544 not_present:
545 return NULL;
546 }
548 #ifdef CONFIG_XEN_IA64_DOM0_VP
549 static pte_t*
550 lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr)
551 {
552 struct mm_struct *mm = d->arch.mm;
553 pgd_t *pgd;
554 pud_t *pud;
555 pmd_t *pmd;
557 BUG_ON(mm->pgd == NULL);
558 pgd = pgd_offset(mm, mpaddr);
559 if (pgd_none(*pgd))
560 goto not_present;
562 pud = pud_offset(pgd, mpaddr);
563 if (pud_none(*pud))
564 goto not_present;
566 pmd = pmd_offset(pud, mpaddr);
567 if (pmd_none(*pmd))
568 goto not_present;
570 return pte_offset_map(pmd, mpaddr);
572 not_present:
573 return NULL;
574 }
575 #endif
577 /* Allocate a new page for domain and map it to the specified metaphysical
578 address. */
579 struct page_info *
580 __assign_new_domain_page(struct domain *d, unsigned long mpaddr, pte_t* pte)
581 {
582 struct page_info *p = NULL;
583 unsigned long maddr;
585 BUG_ON(!pte_none(*pte));
587 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
588 if (d == dom0) {
589 #if 0
590 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
591 /* FIXME: is it true ?
592 dom0 memory is not contiguous! */
593 panic("assign_new_domain_page: bad domain0 "
594 "mpaddr=%lx, start=%lx, end=%lx!\n",
595 mpaddr, dom0_start, dom0_start+dom0_size);
596 }
597 #endif
598 p = mfn_to_page((mpaddr >> PAGE_SHIFT));
599 return p;
600 }
601 else
602 #endif
603 {
604 p = alloc_domheap_page(d);
605 // zero out pages for security reasons
606 if (p)
607 clear_page(page_to_virt(p));
608 }
609 if (unlikely(!p)) {
610 printf("assign_new_domain_page: Can't alloc!!!! Aaaargh!\n");
611 return(p);
612 }
613 maddr = page_to_maddr (p);
614 if (unlikely(maddr > __get_cpu_var(vhpt_paddr)
615 && maddr < __get_cpu_var(vhpt_pend))) {
616 /* FIXME: how can this happen ?
617 vhpt is allocated by alloc_domheap_page. */
618 printf("assign_new_domain_page: reassigned vhpt page %lx!!\n",
619 maddr);
620 }
622 set_pte(pte, pfn_pte(maddr >> PAGE_SHIFT,
623 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
625 //XXX CONFIG_XEN_IA64_DOM0_VP
626 // TODO racy
627 if ((mpaddr & GPFN_IO_MASK) == GPFN_MEM)
628 set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT);
629 return p;
630 }
632 struct page_info *
633 assign_new_domain_page(struct domain *d, unsigned long mpaddr)
634 {
635 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
636 pte_t dummy_pte = __pte(0);
637 return __assign_new_domain_page(d, mpaddr, &dummy_pte);
638 #else
639 struct page_info *p = NULL;
640 pte_t *pte;
642 pte = lookup_alloc_domain_pte(d, mpaddr);
643 if (pte_none(*pte)) {
644 p = __assign_new_domain_page(d, mpaddr, pte);
645 } else {
646 DPRINTK("%s: d 0x%p mpaddr %lx already mapped!\n",
647 __func__, d, mpaddr);
648 }
650 return p;
651 #endif
652 }
654 void
655 assign_new_domain0_page(struct domain *d, unsigned long mpaddr)
656 {
657 #ifndef CONFIG_DOMAIN0_CONTIGUOUS
658 pte_t *pte;
660 BUG_ON(d != dom0);
661 pte = lookup_alloc_domain_pte(d, mpaddr);
662 if (pte_none(*pte)) {
663 struct page_info *p = __assign_new_domain_page(d, mpaddr, pte);
664 if (p == NULL) {
665 panic("%s: can't allocate page for dom0", __func__);
666 }
667 }
668 #endif
669 }
671 /* map a physical address to the specified metaphysical addr */
672 void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr)
673 {
674 pte_t *pte;
676 pte = lookup_alloc_domain_pte(d, mpaddr);
677 if (pte_none(*pte)) {
678 set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT,
679 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
681 //XXX CONFIG_XEN_IA64_DOM0_VP
682 // TODO racy
683 if ((mpaddr & GPFN_IO_MASK) == GPFN_MEM)
684 set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
685 }
686 else printk("assign_domain_page: mpaddr %lx already mapped!\n",mpaddr);
687 }
689 #ifdef CONFIG_XEN_IA64_DOM0_VP
690 static void
691 assign_domain_same_page(struct domain *d,
692 unsigned long mpaddr, unsigned long size)
693 {
694 //XXX optimization
695 unsigned long end = mpaddr + size;
696 for (; mpaddr < end; mpaddr += PAGE_SIZE) {
697 assign_domain_page(d, mpaddr, mpaddr);
698 }
699 }
701 unsigned long
702 assign_domain_mmio_page(struct domain *d,
703 unsigned long mpaddr, unsigned long size)
704 {
705 if (size == 0) {
706 DPRINTK("%s: domain %p mpaddr 0x%lx size = 0x%lx\n",
707 __func__, d, mpaddr, size);
708 }
709 assign_domain_same_page(d, mpaddr, size);
710 return mpaddr;
711 }
713 unsigned long
714 assign_domain_mach_page(struct domain *d,
715 unsigned long mpaddr, unsigned long size)
716 {
717 assign_domain_same_page(d, mpaddr, size);
718 return mpaddr;
719 }
721 //XXX selege hammer.
722 // flush finer range.
723 void
724 domain_page_flush(struct domain* d, unsigned long mpaddr,
725 unsigned long old_mfn, unsigned long new_mfn)
726 {
727 struct vcpu* v;
728 //XXX SMP
729 for_each_vcpu(d, v) {
730 vcpu_purge_tr_entry(&v->arch.dtlb);
731 vcpu_purge_tr_entry(&v->arch.itlb);
732 }
734 // flush vhpt
735 vhpt_flush();
736 // flush tlb
737 flush_tlb_all();
738 }
740 static void
741 zap_domain_page_one(struct domain *d, unsigned long mpaddr)
742 {
743 struct mm_struct *mm = d->arch.mm;
744 pte_t *pte;
745 pte_t old_pte;
746 unsigned long mfn;
747 struct page_info *page;
749 pte = lookup_noalloc_domain_pte_none(d, mpaddr);
750 if (pte == NULL)
751 return;
752 if (pte_none(*pte))
753 return;
755 // update pte
756 old_pte = ptep_get_and_clear(mm, mpaddr, pte);
757 mfn = pte_pfn(old_pte);
758 page = mfn_to_page(mfn);
760 if (page_get_owner(page) == d) {
761 BUG_ON(get_gpfn_from_mfn(mfn) != (mpaddr >> PAGE_SHIFT));
762 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
763 }
765 domain_page_flush(d, mpaddr, mfn, INVALID_MFN);
767 put_page(page);
768 }
769 #endif
771 void build_physmap_table(struct domain *d)
772 {
773 struct list_head *list_ent = d->page_list.next;
774 unsigned long mfn, i = 0;
776 ASSERT(!d->arch.physmap_built);
777 while(list_ent != &d->page_list) {
778 mfn = page_to_mfn(list_entry(
779 list_ent, struct page_info, list));
780 assign_domain_page(d, i << PAGE_SHIFT, mfn << PAGE_SHIFT);
782 i++;
783 list_ent = mfn_to_page(mfn)->list.next;
784 }
785 d->arch.physmap_built = 1;
786 }
788 void mpafoo(unsigned long mpaddr)
789 {
790 extern unsigned long privop_trace;
791 if (mpaddr == 0x3800)
792 privop_trace = 1;
793 }
795 #ifdef CONFIG_XEN_IA64_DOM0_VP
796 unsigned long
797 ____lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
798 {
799 pte_t *pte;
801 pte = lookup_noalloc_domain_pte(d, mpaddr);
802 if (pte == NULL)
803 goto not_present;
805 if (pte_present(*pte))
806 return (pte->pte & _PFN_MASK);
807 else if (VMX_DOMAIN(d->vcpu[0]))
808 return GPFN_INV_MASK;
810 not_present:
811 return INVALID_MFN;
812 }
814 unsigned long
815 __lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
816 {
817 unsigned long machine = ____lookup_domain_mpa(d, mpaddr);
818 if (machine != INVALID_MFN)
819 return machine;
821 printk("%s: d 0x%p id %d current 0x%p id %d\n",
822 __func__, d, d->domain_id, current, current->vcpu_id);
823 printk("%s: bad mpa 0x%lx (max_pages 0x%lx)\n",
824 __func__, mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
825 return INVALID_MFN;
826 }
827 #endif
829 unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
830 {
831 pte_t *pte;
833 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
834 if (d == dom0) {
835 pte_t pteval;
836 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
837 //printk("lookup_domain_mpa: bad dom0 mpaddr 0x%lx!\n",mpaddr);
838 //printk("lookup_domain_mpa: start=0x%lx,end=0x%lx!\n",dom0_start,dom0_start+dom0_size);
839 mpafoo(mpaddr);
840 }
841 pteval = pfn_pte(mpaddr >> PAGE_SHIFT,
842 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX));
843 pte = &pteval;
844 return *(unsigned long *)pte;
845 }
846 #endif
847 pte = lookup_noalloc_domain_pte(d, mpaddr);
848 if (pte != NULL) {
849 if (pte_present(*pte)) {
850 //printk("lookup_domain_page: found mapping for %lx, pte=%lx\n",mpaddr,pte_val(*pte));
851 return *(unsigned long *)pte;
852 } else if (VMX_DOMAIN(d->vcpu[0]))
853 return GPFN_INV_MASK;
854 }
856 printk("%s: d 0x%p id %d current 0x%p id %d\n",
857 __func__, d, d->domain_id, current, current->vcpu_id);
858 if ((mpaddr >> PAGE_SHIFT) < d->max_pages)
859 printk("%s: non-allocated mpa 0x%lx (< 0x%lx)\n", __func__,
860 mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
861 else
862 printk("%s: bad mpa 0x%lx (=> 0x%lx)\n", __func__,
863 mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
864 mpafoo(mpaddr);
865 return 0;
866 }
868 #ifdef CONFIG_XEN_IA64_DOM0_VP
869 //XXX SMP
870 unsigned long
871 dom0vp_populate_physmap(struct domain *d, unsigned long gpfn,
872 unsigned int extent_order, unsigned int address_bits)
873 {
874 unsigned long ret = 0;
875 int flags = 0;
876 unsigned long mpaddr = gpfn << PAGE_SHIFT;
877 unsigned long extent_size = 1UL << extent_order;
878 unsigned long offset;
879 struct page_info* page;
880 unsigned long physaddr;
882 if (extent_order > 0 && !multipage_allocation_permitted(d)) {
883 ret = -EINVAL;
884 goto out;
885 }
887 if (gpfn + (1 << extent_order) < gpfn) {
888 ret = -EINVAL;
889 goto out;
890 }
891 if (gpfn > d->max_pages || gpfn + (1 << extent_order) > d->max_pages) {
892 ret = -EINVAL;
893 goto out;
894 }
895 if ((extent_size << PAGE_SHIFT) < extent_size) {
896 ret = -EINVAL;
897 goto out;
898 }
900 //XXX check address_bits and set flags = ALLOC_DOM_DMA if needed
902 // check the rage is not populated yet.
903 //XXX loop optimization
904 for (offset = 0; offset < extent_size << PAGE_SHIFT; offset += PAGE_SIZE) {
905 if (____lookup_domain_mpa(d, mpaddr + offset) != INVALID_MFN) {
906 ret = -EBUSY;
907 goto out;
908 }
909 }
911 page = alloc_domheap_pages(d, extent_order, flags);
912 if (page == NULL) {
913 ret = -ENOMEM;
914 DPRINTK("Could not allocate order=%d extent: id=%d flags=%x\n",
915 extent_order, d->domain_id, flags);
916 goto out;
917 }
919 //XXX loop optimization
920 physaddr = page_to_maddr(page);
921 for (offset = 0; offset < extent_size << PAGE_SHIFT; offset += PAGE_SIZE) {
922 assign_domain_page(d, mpaddr + offset, physaddr + offset);
923 }
925 out:
926 return ret;
927 }
929 //XXX SMP
930 unsigned long
931 dom0vp_zap_physmap(struct domain *d, unsigned long gpfn,
932 unsigned int extent_order)
933 {
934 unsigned long ret = 0;
935 if (extent_order != 0) {
936 //XXX
937 ret = -ENOSYS;
938 goto out;
939 }
941 zap_domain_page_one(d, gpfn << PAGE_SHIFT);
943 out:
944 return ret;
945 }
947 static void
948 assign_domain_page_replace(struct domain *d, unsigned long mpaddr,
949 unsigned long mfn, unsigned int flags)
950 {
951 struct mm_struct *mm = d->arch.mm;
952 pte_t* pte;
953 pte_t old_pte;
955 pte = lookup_alloc_domain_pte(d, mpaddr);
957 // update pte
958 old_pte = ptep_get_and_clear(mm, mpaddr, pte);
959 set_pte(pte, pfn_pte(mfn,
960 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
961 if (!pte_none(old_pte)) {
962 unsigned long old_mfn;
963 struct page_info* old_page;
965 // XXX should previous underlying page be removed?
966 // or should error be returned because it is a due to a domain?
967 old_mfn = pte_pfn(old_pte);//XXX
968 old_page = mfn_to_page(old_mfn);
970 if (page_get_owner(old_page) == d) {
971 BUG_ON(get_gpfn_from_mfn(old_mfn) != (mpaddr >> PAGE_SHIFT));
972 set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
973 }
975 domain_page_flush(d, mpaddr, old_mfn, mfn);
977 put_page(old_page);
978 } else {
979 BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
980 get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
981 }
982 }
984 unsigned long
985 dom0vp_add_physmap(struct domain* d, unsigned long gpfn, unsigned long mfn,
986 unsigned int flags, domid_t domid)
987 {
988 int error = 0;
990 struct domain* rd;
991 rd = find_domain_by_id(domid);
992 if (unlikely(rd == NULL)) {
993 error = -EINVAL;
994 goto out0;
995 }
996 if (unlikely(rd == d)) {
997 error = -EINVAL;
998 goto out1;
999 }
1000 if (unlikely(get_page(mfn_to_page(mfn), rd) == 0)) {
1001 error = -EINVAL;
1002 goto out1;
1005 assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, 0/* flags:XXX */);
1006 out1:
1007 put_domain(rd);
1008 out0:
1009 return error;
1011 #endif
1013 /* Flush cache of domain d. */
1014 void domain_cache_flush (struct domain *d, int sync_only)
1016 struct mm_struct *mm = d->arch.mm;
1017 pgd_t *pgd = mm->pgd;
1018 unsigned long maddr;
1019 int i,j,k, l;
1020 int nbr_page = 0;
1021 void (*flush_func)(unsigned long start, unsigned long end);
1022 extern void flush_dcache_range (unsigned long, unsigned long);
1024 if (sync_only)
1025 flush_func = &flush_icache_range;
1026 else
1027 flush_func = &flush_dcache_range;
1029 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
1030 if (d == dom0) {
1031 /* This is not fully correct (because of hole), but it should
1032 be enough for now. */
1033 (*flush_func)(__va_ul (dom0_start),
1034 __va_ul (dom0_start + dom0_size));
1035 return;
1037 #endif
1038 for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
1039 pud_t *pud;
1040 if (!pgd_present(*pgd))
1041 continue;
1042 pud = pud_offset(pgd, 0);
1043 for (j = 0; j < PTRS_PER_PUD; pud++, j++) {
1044 pmd_t *pmd;
1045 if (!pud_present(*pud))
1046 continue;
1047 pmd = pmd_offset(pud, 0);
1048 for (k = 0; k < PTRS_PER_PMD; pmd++, k++) {
1049 pte_t *pte;
1050 if (!pmd_present(*pmd))
1051 continue;
1052 pte = pte_offset_map(pmd, 0);
1053 for (l = 0; l < PTRS_PER_PTE; pte++, l++) {
1054 if (!pte_present(*pte))
1055 continue;
1056 /* Convert PTE to maddr. */
1057 maddr = __va_ul (pte_val(*pte)
1058 & _PAGE_PPN_MASK);
1059 (*flush_func)(maddr, maddr+ PAGE_SIZE);
1060 nbr_page++;
1065 //printf ("domain_cache_flush: %d %d pages\n", d->domain_id, nbr_page);
1068 // FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE
1069 #if 1
1070 unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
1072 unsigned long pte = lookup_domain_mpa(d,mpaddr);
1073 unsigned long imva;
1075 pte &= _PAGE_PPN_MASK;
1076 imva = (unsigned long) __va(pte);
1077 imva |= mpaddr & ~PAGE_MASK;
1078 return(imva);
1080 #else
1081 unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
1083 unsigned long imva = __gpa_to_mpa(d, mpaddr);
1085 return __va(imva);
1087 #endif
1089 // remove following line if not privifying in memory
1090 //#define HAVE_PRIVIFY_MEMORY
1091 #ifndef HAVE_PRIVIFY_MEMORY
1092 #define privify_memory(x,y) do {} while(0)
1093 #endif
1095 // see arch/x86/xxx/domain_build.c
1096 int elf_sanity_check(Elf_Ehdr *ehdr)
1098 if (!(IS_ELF(*ehdr)))
1100 printk("DOM0 image is not a Xen-compatible Elf image.\n");
1101 return 0;
1103 return 1;
1106 static void copy_memory(void *dst, void *src, int size)
1108 int remain;
1110 if (IS_XEN_ADDRESS(dom0,(unsigned long) src)) {
1111 memcpy(dst,src,size);
1113 else {
1114 printf("About to call __copy_from_user(%p,%p,%d)\n",
1115 dst,src,size);
1116 while ((remain = __copy_from_user(dst,src,size)) != 0) {
1117 printf("incomplete user copy, %d remain of %d\n",
1118 remain,size);
1119 dst += size - remain; src += size - remain;
1120 size -= remain;
1125 static void loaddomainelfimage(struct domain *d, unsigned long image_start)
1127 char *elfbase = (char *) image_start;
1128 //Elf_Ehdr *ehdr = (Elf_Ehdr *)image_start;
1129 Elf_Ehdr ehdr;
1130 Elf_Phdr phdr;
1131 int h, filesz, memsz;
1132 unsigned long elfaddr, dom_mpaddr, dom_imva;
1133 struct page_info *p;
1135 copy_memory(&ehdr, (void *) image_start, sizeof(Elf_Ehdr));
1136 for ( h = 0; h < ehdr.e_phnum; h++ ) {
1137 copy_memory(&phdr,
1138 elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize),
1139 sizeof(Elf_Phdr));
1140 if ((phdr.p_type != PT_LOAD))
1141 continue;
1143 filesz = phdr.p_filesz;
1144 memsz = phdr.p_memsz;
1145 elfaddr = (unsigned long) elfbase + phdr.p_offset;
1146 dom_mpaddr = phdr.p_paddr;
1148 //printf("p_offset: %x, size=%x\n",elfaddr,filesz);
1149 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
1150 if (d == dom0) {
1151 if (dom_mpaddr+memsz>dom0_size)
1152 panic("Dom0 doesn't fit in memory space!\n");
1153 dom_imva = __va_ul(dom_mpaddr + dom0_start);
1154 copy_memory((void *)dom_imva, (void *)elfaddr, filesz);
1155 if (memsz > filesz)
1156 memset((void *)dom_imva+filesz, 0,
1157 memsz-filesz);
1158 //FIXME: This test for code seems to find a lot more than objdump -x does
1159 if (phdr.p_flags & PF_X) {
1160 privify_memory(dom_imva,filesz);
1161 flush_icache_range (dom_imva, dom_imva+filesz);
1164 else
1165 #endif
1166 while (memsz > 0) {
1167 p = assign_new_domain_page(d,dom_mpaddr);
1168 BUG_ON (unlikely(p == NULL));
1169 dom_imva = __va_ul(page_to_maddr(p));
1170 if (filesz > 0) {
1171 if (filesz >= PAGE_SIZE)
1172 copy_memory((void *) dom_imva,
1173 (void *) elfaddr,
1174 PAGE_SIZE);
1175 else {
1176 // copy partial page
1177 copy_memory((void *) dom_imva,
1178 (void *) elfaddr, filesz);
1179 // zero the rest of page
1180 memset((void *) dom_imva+filesz, 0,
1181 PAGE_SIZE-filesz);
1183 //FIXME: This test for code seems to find a lot more than objdump -x does
1184 if (phdr.p_flags & PF_X) {
1185 privify_memory(dom_imva,PAGE_SIZE);
1186 flush_icache_range(dom_imva,
1187 dom_imva+PAGE_SIZE);
1190 else if (memsz > 0) {
1191 /* always zero out entire page */
1192 memset((void *) dom_imva, 0, PAGE_SIZE);
1194 memsz -= PAGE_SIZE;
1195 filesz -= PAGE_SIZE;
1196 elfaddr += PAGE_SIZE;
1197 dom_mpaddr += PAGE_SIZE;
1202 void alloc_dom0(void)
1204 if (platform_is_hp_ski()) {
1205 dom0_size = 128*1024*1024; //FIXME: Should be configurable
1207 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
1208 printf("alloc_dom0: starting (initializing %lu MB...)\n",dom0_size/(1024*1024));
1210 /* FIXME: The first trunk (say 256M) should always be assigned to
1211 * Dom0, since Dom0's physical == machine address for DMA purpose.
1212 * Some old version linux, like 2.4, assumes physical memory existing
1213 * in 2nd 64M space.
1214 */
1215 dom0_start = alloc_boot_pages(dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT);
1216 dom0_start <<= PAGE_SHIFT;
1217 if (!dom0_start) {
1218 panic("alloc_dom0: can't allocate contiguous memory size=%lu\n",
1219 dom0_size);
1221 printf("alloc_dom0: dom0_start=0x%lx\n", dom0_start);
1222 #else
1223 // no need to allocate pages for now
1224 // pages are allocated by map_new_domain_page() via loaddomainelfimage()
1225 dom0_start = 0;
1226 #endif
1231 /*
1232 * Domain 0 has direct access to all devices absolutely. However
1233 * the major point of this stub here, is to allow alloc_dom_mem
1234 * handled with order > 0 request. Dom0 requires that bit set to
1235 * allocate memory for other domains.
1236 */
1237 static void physdev_init_dom0(struct domain *d)
1239 if (iomem_permit_access(d, 0UL, ~0UL))
1240 BUG();
1241 if (irqs_permit_access(d, 0, NR_PIRQS-1))
1242 BUG();
1245 static unsigned int vmx_dom0 = 0;
1246 int construct_dom0(struct domain *d,
1247 unsigned long image_start, unsigned long image_len,
1248 unsigned long initrd_start, unsigned long initrd_len,
1249 char *cmdline)
1251 int i, rc;
1252 unsigned long alloc_start, alloc_end;
1253 start_info_t *si;
1254 struct vcpu *v = d->vcpu[0];
1255 unsigned long max_pages;
1257 struct domain_setup_info dsi;
1258 unsigned long p_start;
1259 unsigned long pkern_start;
1260 unsigned long pkern_entry;
1261 unsigned long pkern_end;
1262 unsigned long pinitrd_start = 0;
1263 unsigned long pstart_info;
1264 struct page_info *start_info_page;
1266 #ifdef VALIDATE_VT
1267 unsigned long mfn;
1268 struct page_info *page = NULL;
1269 #endif
1271 //printf("construct_dom0: starting\n");
1273 /* Sanity! */
1274 BUG_ON(d != dom0);
1275 BUG_ON(d->vcpu[0] == NULL);
1276 BUG_ON(test_bit(_VCPUF_initialised, &v->vcpu_flags));
1278 memset(&dsi, 0, sizeof(struct domain_setup_info));
1280 printk("*** LOADING DOMAIN 0 ***\n");
1282 alloc_start = dom0_start;
1283 alloc_end = dom0_start + dom0_size;
1284 max_pages = dom0_size / PAGE_SIZE;
1285 d->max_pages = max_pages;
1286 #ifndef CONFIG_XEN_IA64_DOM0_VP
1287 d->tot_pages = d->max_pages;
1288 #else
1289 d->tot_pages = 0;
1290 #endif
1291 dsi.image_addr = (unsigned long)image_start;
1292 dsi.image_len = image_len;
1293 rc = parseelfimage(&dsi);
1294 if ( rc != 0 )
1295 return rc;
1297 #ifdef VALIDATE_VT
1298 /* Temp workaround */
1299 if (running_on_sim)
1300 dsi.xen_section_string = (char *)1;
1302 /* Check whether dom0 is vti domain */
1303 if ((!vmx_enabled) && !dsi.xen_section_string) {
1304 printk("Lack of hardware support for unmodified vmx dom0\n");
1305 panic("");
1308 if (vmx_enabled && !dsi.xen_section_string) {
1309 printk("Dom0 is vmx domain!\n");
1310 vmx_dom0 = 1;
1312 #endif
1314 p_start = dsi.v_start;
1315 pkern_start = dsi.v_kernstart;
1316 pkern_end = dsi.v_kernend;
1317 pkern_entry = dsi.v_kernentry;
1319 //printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry);
1321 if ( (p_start & (PAGE_SIZE-1)) != 0 )
1323 printk("Initial guest OS must load to a page boundary.\n");
1324 return -EINVAL;
1327 pstart_info = PAGE_ALIGN(pkern_end);
1328 if(initrd_start && initrd_len){
1329 unsigned long offset;
1331 pinitrd_start= (dom0_start + dom0_size) -
1332 (PAGE_ALIGN(initrd_len) + 4*1024*1024);
1333 if (pinitrd_start <= pstart_info)
1334 panic("%s:enough memory is not assigned to dom0", __func__);
1336 for (offset = 0; offset < initrd_len; offset += PAGE_SIZE) {
1337 struct page_info *p;
1338 p = assign_new_domain_page(d, pinitrd_start + offset);
1339 if (p == NULL)
1340 panic("%s: can't allocate page for initrd image", __func__);
1341 if (initrd_len < offset + PAGE_SIZE)
1342 memcpy(page_to_virt(p), (void*)(initrd_start + offset),
1343 initrd_len - offset);
1344 else
1345 copy_page(page_to_virt(p), (void*)(initrd_start + offset));
1349 printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
1350 " Kernel image: %lx->%lx\n"
1351 " Entry address: %lx\n"
1352 " Init. ramdisk: %lx len %lx\n"
1353 " Start info.: %lx->%lx\n",
1354 pkern_start, pkern_end, pkern_entry, pinitrd_start, initrd_len,
1355 pstart_info, pstart_info + PAGE_SIZE);
1357 if ( (pkern_end - pkern_start) > (max_pages * PAGE_SIZE) )
1359 printk("Initial guest OS requires too much space\n"
1360 "(%luMB is greater than %luMB limit)\n",
1361 (pkern_end-pkern_start)>>20,
1362 (max_pages <<PAGE_SHIFT)>>20);
1363 return -ENOMEM;
1366 // if high 3 bits of pkern start are non-zero, error
1368 // if pkern end is after end of metaphysical memory, error
1369 // (we should be able to deal with this... later)
1371 /* Mask all upcalls... */
1372 for ( i = 1; i < MAX_VIRT_CPUS; i++ )
1373 d->shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
1375 if (dom0_max_vcpus == 0)
1376 dom0_max_vcpus = MAX_VIRT_CPUS;
1377 if (dom0_max_vcpus > num_online_cpus())
1378 dom0_max_vcpus = num_online_cpus();
1379 if (dom0_max_vcpus > MAX_VIRT_CPUS)
1380 dom0_max_vcpus = MAX_VIRT_CPUS;
1382 printf ("Dom0 max_vcpus=%d\n", dom0_max_vcpus);
1383 for ( i = 1; i < dom0_max_vcpus; i++ )
1384 if (alloc_vcpu(d, i, i) == NULL)
1385 printf ("Cannot allocate dom0 vcpu %d\n", i);
1387 #if defined(VALIDATE_VT) && !defined(CONFIG_XEN_IA64_DOM0_VP)
1388 /* Construct a frame-allocation list for the initial domain, since these
1389 * pages are allocated by boot allocator and pfns are not set properly
1390 */
1391 for ( mfn = (alloc_start>>PAGE_SHIFT);
1392 mfn < (alloc_end>>PAGE_SHIFT);
1393 mfn++ )
1395 page = mfn_to_page(mfn);
1396 page_set_owner(page, d);
1397 page->u.inuse.type_info = 0;
1398 page->count_info = PGC_allocated | 1;
1399 list_add_tail(&page->list, &d->page_list);
1401 /* Construct 1:1 mapping */
1402 set_gpfn_from_mfn(mfn, mfn);
1404 #endif
1406 /* Copy the OS image. */
1407 loaddomainelfimage(d,image_start);
1409 /* Copy the initial ramdisk. */
1410 //if ( initrd_len != 0 )
1411 // memcpy((void *)vinitrd_start, initrd_start, initrd_len);
1414 /* Set up start info area. */
1415 d->shared_info->arch.start_info_pfn = pstart_info >> PAGE_SHIFT;
1416 start_info_page = assign_new_domain_page(d, pstart_info);
1417 if (start_info_page == NULL)
1418 panic("can't allocate start info page");
1419 si = page_to_virt(start_info_page);
1420 memset(si, 0, PAGE_SIZE);
1421 sprintf(si->magic, "xen-%i.%i-ia64", XEN_VERSION, XEN_SUBVERSION);
1422 si->nr_pages = max_pages;
1424 /* Give up the VGA console if DOM0 is configured to grab it. */
1425 if (cmdline != NULL)
1426 console_endboot(strstr(cmdline, "tty0") != NULL);
1428 /* VMX specific construction for Dom0, if hardware supports VMX
1429 * and Dom0 is unmodified image
1430 */
1431 printk("Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d);
1432 if (vmx_dom0)
1433 vmx_final_setup_guest(v);
1435 set_bit(_VCPUF_initialised, &v->vcpu_flags);
1437 new_thread(v, pkern_entry, 0, 0);
1438 physdev_init_dom0(d);
1440 // dom0 doesn't need build_physmap_table()
1441 // see arch_set_info_guest()
1442 // instead we allocate pages manually.
1443 for (i = 0; i < max_pages; i++) {
1444 assign_new_domain0_page(d, i << PAGE_SHIFT);
1446 d->arch.physmap_built = 1;
1448 // FIXME: Hack for keyboard input
1449 //serial_input_init();
1451 return 0;
1454 void machine_restart(char * __unused)
1456 if (platform_is_hp_ski()) dummy();
1457 printf("machine_restart called: spinning....\n");
1458 while(1);
1461 void machine_halt(void)
1463 if (platform_is_hp_ski()) dummy();
1464 printf("machine_halt called: spinning....\n");
1465 while(1);
1468 void dummy_called(char *function)
1470 if (platform_is_hp_ski()) asm("break 0;;");
1471 printf("dummy called in %s: spinning....\n", function);
1472 while(1);
1475 void domain_pend_keyboard_interrupt(int irq)
1477 vcpu_pend_interrupt(dom0->vcpu[0],irq);
1480 void sync_vcpu_execstate(struct vcpu *v)
1482 __ia64_save_fpu(v->arch._thread.fph);
1483 if (VMX_DOMAIN(v))
1484 vmx_save_state(v);
1485 // FIXME SMP: Anything else needed here for SMP?
1488 // FIXME: It would be nice to print out a nice error message for bad
1489 // values of these boot-time parameters, but it seems we are too early
1490 // in the boot and attempts to print freeze the system?
1491 #define abort(x...) do {} while(0)
1492 #define warn(x...) do {} while(0)
1494 static void parse_dom0_mem(char *s)
1496 unsigned long bytes = parse_size_and_unit(s);
1498 if (dom0_size < 4 * 1024 * 1024) {
1499 abort("parse_dom0_mem: too small, boot aborted"
1500 " (try e.g. dom0_mem=256M or dom0_mem=65536K)\n");
1502 if (dom0_size % dom0_align) {
1503 dom0_size = ((dom0_size / dom0_align) + 1) * dom0_align;
1504 warn("parse_dom0_mem: dom0_size rounded up from"
1505 " %lx to %lx bytes, due to dom0_align=%lx\n",
1506 bytes,dom0_size,dom0_align);
1508 else dom0_size = bytes;
1510 custom_param("dom0_mem", parse_dom0_mem);
1513 static void parse_dom0_align(char *s)
1515 unsigned long bytes = parse_size_and_unit(s);
1517 if ((bytes - 1) ^ bytes) { /* not a power of two */
1518 abort("parse_dom0_align: dom0_align must be power of two, "
1519 "boot aborted"
1520 " (try e.g. dom0_align=256M or dom0_align=65536K)\n");
1522 else if (bytes < PAGE_SIZE) {
1523 abort("parse_dom0_align: dom0_align must be >= %ld, "
1524 "boot aborted"
1525 " (try e.g. dom0_align=256M or dom0_align=65536K)\n",
1526 PAGE_SIZE);
1528 else dom0_align = bytes;
1529 if (dom0_size % dom0_align) {
1530 dom0_size = (dom0_size / dom0_align + 1) * dom0_align;
1531 warn("parse_dom0_align: dom0_size rounded up from"
1532 " %ld to %ld bytes, due to dom0_align=%lx\n",
1533 bytes,dom0_size,dom0_align);
1536 custom_param("dom0_align", parse_dom0_align);