ia64/xen-unstable

view xen/arch/ia64/xen/domain.c @ 6863:b52a48644770

PAL cache flush (added during VTI merge) breaks on simulator
author djm@kirby.fc.hp.com
date Fri Sep 16 16:53:30 2005 -0600 (2005-09-16)
parents 52d2d5208575
children 7f9acc83ffcd
line source
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * Copyright (C) 2005 Intel Co
8 * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
9 *
10 * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add CONFIG_VTI domain support
11 */
13 #include <xen/config.h>
14 #include <xen/lib.h>
15 #include <xen/errno.h>
16 #include <xen/sched.h>
17 #include <xen/smp.h>
18 #include <xen/delay.h>
19 #include <xen/softirq.h>
20 #include <xen/mm.h>
21 #include <asm/ptrace.h>
22 #include <asm/system.h>
23 #include <asm/io.h>
24 #include <asm/processor.h>
25 #include <asm/desc.h>
26 //#include <asm/mpspec.h>
27 #include <xen/irq.h>
28 #include <xen/event.h>
29 //#include <xen/shadow.h>
30 #include <xen/console.h>
32 #include <xen/elf.h>
33 //#include <asm/page.h>
34 #include <asm/pgalloc.h>
35 #include <asm/dma.h> /* for MAX_DMA_ADDRESS */
37 #include <asm/asm-offsets.h> /* for IA64_THREAD_INFO_SIZE */
39 #include <asm/vcpu.h> /* for function declarations */
40 #include <public/arch-ia64.h>
41 #include <asm/vmx.h>
42 #include <asm/vmx_vcpu.h>
43 #include <asm/vmx_vpd.h>
44 #include <asm/pal.h>
45 #include <public/io/ioreq.h>
47 #define CONFIG_DOMAIN0_CONTIGUOUS
48 unsigned long dom0_start = -1L;
49 unsigned long dom0_size = 512*1024*1024; //FIXME: Should be configurable
50 //FIXME: alignment should be 256MB, lest Linux use a 256MB page size
51 unsigned long dom0_align = 256*1024*1024;
52 #ifdef DOMU_BUILD_STAGING
53 unsigned long domU_staging_size = 32*1024*1024; //FIXME: Should be configurable
54 unsigned long domU_staging_start;
55 unsigned long domU_staging_align = 64*1024;
56 unsigned long *domU_staging_area;
57 #endif
59 // initialized by arch/ia64/setup.c:find_initrd()
60 unsigned long initrd_start = 0, initrd_end = 0;
62 #define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend))
64 //extern int loadelfimage(char *);
65 extern int readelfimage_base_and_size(char *, unsigned long,
66 unsigned long *, unsigned long *, unsigned long *);
68 unsigned long map_domain_page0(struct domain *);
69 extern unsigned long dom_fw_setup(struct domain *, char *, int);
71 /* this belongs in include/asm, but there doesn't seem to be a suitable place */
72 void free_perdomain_pt(struct domain *d)
73 {
74 printf("free_perdomain_pt: not implemented\n");
75 //free_page((unsigned long)d->mm.perdomain_pt);
76 }
78 int hlt_counter;
80 void disable_hlt(void)
81 {
82 hlt_counter++;
83 }
85 void enable_hlt(void)
86 {
87 hlt_counter--;
88 }
90 static void default_idle(void)
91 {
92 if ( hlt_counter == 0 )
93 {
94 local_irq_disable();
95 if ( !softirq_pending(smp_processor_id()) )
96 safe_halt();
97 //else
98 local_irq_enable();
99 }
100 }
102 void continue_cpu_idle_loop(void)
103 {
104 int cpu = smp_processor_id();
105 for ( ; ; )
106 {
107 #ifdef IA64
108 // __IRQ_STAT(cpu, idle_timestamp) = jiffies
109 #else
110 irq_stat[cpu].idle_timestamp = jiffies;
111 #endif
112 while ( !softirq_pending(cpu) )
113 default_idle();
114 raise_softirq(SCHEDULE_SOFTIRQ);
115 do_softirq();
116 }
117 }
119 void startup_cpu_idle_loop(void)
120 {
121 /* Just some sanity to ensure that the scheduler is set up okay. */
122 ASSERT(current->domain == IDLE_DOMAIN_ID);
123 raise_softirq(SCHEDULE_SOFTIRQ);
124 do_softirq();
126 /*
127 * Declares CPU setup done to the boot processor.
128 * Therefore memory barrier to ensure state is visible.
129 */
130 smp_mb();
131 #if 0
132 //do we have to ensure the idle task has a shared page so that, for example,
133 //region registers can be loaded from it. Apparently not...
134 idle0_task.shared_info = (void *)alloc_xenheap_page();
135 memset(idle0_task.shared_info, 0, PAGE_SIZE);
136 /* pin mapping */
137 // FIXME: Does this belong here? Or do only at domain switch time?
138 {
139 /* WARNING: following must be inlined to avoid nested fault */
140 unsigned long psr = ia64_clear_ic();
141 ia64_itr(0x2, IA64_TR_SHARED_INFO, SHAREDINFO_ADDR,
142 pte_val(pfn_pte(ia64_tpa(idle0_task.shared_info) >> PAGE_SHIFT, PAGE_KERNEL)),
143 PAGE_SHIFT);
144 ia64_set_psr(psr);
145 ia64_srlz_i();
146 }
147 #endif
149 continue_cpu_idle_loop();
150 }
152 struct vcpu *arch_alloc_vcpu_struct(void)
153 {
154 /* Per-vp stack is used here. So we need keep vcpu
155 * same page as per-vp stack */
156 return alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER);
157 }
159 void arch_free_vcpu_struct(struct vcpu *v)
160 {
161 free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
162 }
164 static void init_switch_stack(struct vcpu *v)
165 {
166 struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
167 struct switch_stack *sw = (struct switch_stack *) regs - 1;
168 extern void ia64_ret_from_clone;
170 memset(sw, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs));
171 sw->ar_bspstore = (unsigned long)v + IA64_RBS_OFFSET;
172 sw->b0 = (unsigned long) &ia64_ret_from_clone;
173 sw->ar_fpsr = FPSR_DEFAULT;
174 v->arch._thread.ksp = (unsigned long) sw - 16;
175 // stay on kernel stack because may get interrupts!
176 // ia64_ret_from_clone (which b0 gets in new_thread) switches
177 // to user stack
178 v->arch._thread.on_ustack = 0;
179 memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
180 }
182 void arch_do_createdomain(struct vcpu *v)
183 {
184 struct domain *d = v->domain;
185 struct thread_info *ti = alloc_thread_info(v);
187 /* Clear thread_info to clear some important fields, like preempt_count */
188 memset(ti, 0, sizeof(struct thread_info));
189 init_switch_stack(v);
191 d->shared_info = (void *)alloc_xenheap_page();
192 if (!d->shared_info) {
193 printk("ERROR/HALTING: CAN'T ALLOC PAGE\n");
194 while (1);
195 }
196 memset(d->shared_info, 0, PAGE_SIZE);
197 #if 0
198 d->vcpu[0].arch.privregs =
199 alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
200 printf("arch_vcpu_info=%p\n", d->vcpu[0].arch.privregs);
201 memset(d->vcpu.arch.privregs, 0, PAGE_SIZE);
202 #endif
203 v->vcpu_info = &(d->shared_info->vcpu_data[0]);
205 d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
207 #ifdef CONFIG_VTI
208 /* Per-domain vTLB and vhpt implementation. Now vmx domain will stick
209 * to this solution. Maybe it can be deferred until we know created
210 * one as vmx domain */
211 v->arch.vtlb = init_domain_tlb(v);
212 #endif
214 /* We may also need emulation rid for region4, though it's unlikely
215 * to see guest issue uncacheable access in metaphysical mode. But
216 * keep such info here may be more sane.
217 */
218 if (((d->arch.metaphysical_rr0 = allocate_metaphysical_rr()) == -1UL)
219 || ((d->arch.metaphysical_rr4 = allocate_metaphysical_rr()) == -1UL))
220 BUG();
221 // VCPU(v, metaphysical_mode) = 1;
222 v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
223 v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4;
224 v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
225 v->arch.metaphysical_saved_rr4 = d->arch.metaphysical_rr4;
226 #define DOMAIN_RID_BITS_DEFAULT 18
227 if (!allocate_rid_range(d,DOMAIN_RID_BITS_DEFAULT)) // FIXME
228 BUG();
229 v->arch.starting_rid = d->arch.starting_rid;
230 v->arch.ending_rid = d->arch.ending_rid;
231 // the following will eventually need to be negotiated dynamically
232 d->xen_vastart = XEN_START_ADDR;
233 d->xen_vaend = XEN_END_ADDR;
234 d->shared_info_va = SHAREDINFO_ADDR;
235 d->arch.breakimm = 0x1000;
236 v->arch.breakimm = d->arch.breakimm;
238 d->arch.sys_pgnr = 0;
239 d->arch.mm = xmalloc(struct mm_struct);
240 if (unlikely(!d->arch.mm)) {
241 printk("Can't allocate mm_struct for domain %d\n",d->domain_id);
242 return -ENOMEM;
243 }
244 memset(d->arch.mm, 0, sizeof(*d->arch.mm));
245 d->arch.mm->pgd = pgd_alloc(d->arch.mm);
246 if (unlikely(!d->arch.mm->pgd)) {
247 printk("Can't allocate pgd for domain %d\n",d->domain_id);
248 return -ENOMEM;
249 }
250 }
252 void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
253 {
254 struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
256 printf("arch_getdomaininfo_ctxt\n");
257 c->regs = *regs;
258 c->vcpu.evtchn_vector = v->vcpu_info->arch.evtchn_vector;
259 #if 0
260 if (c->vcpu.privregs && copy_to_user(c->vcpu.privregs,
261 v->vcpu_info->arch.privregs, sizeof(mapped_regs_t))) {
262 printk("Bad ctxt address: 0x%lx\n", c->vcpu.privregs);
263 return -EFAULT;
264 }
265 #endif
267 c->shared = v->domain->shared_info->arch;
268 }
270 int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
271 {
272 struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
273 struct domain *d = v->domain;
274 int i, rc, ret;
275 unsigned long progress = 0;
277 printf("arch_set_info_guest\n");
278 if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
279 return 0;
281 if (c->flags & VGCF_VMX_GUEST) {
282 if (!vmx_enabled) {
283 printk("No VMX hardware feature for vmx domain.\n");
284 return -EINVAL;
285 }
287 vmx_setup_platform(v, c);
288 }
289 else{
290 v->arch.privregs =
291 alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
292 printf("arch_vcpu_info=%p\n", v->arch.privregs);
293 memset(v->arch.privregs, 0, PAGE_SIZE);
294 }
295 *regs = c->regs;
296 new_thread(v, regs->cr_iip, 0, 0);
298 v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
299 if ( c->vcpu.privregs && copy_from_user(v->arch.privregs,
300 c->vcpu.privregs, sizeof(mapped_regs_t))) {
301 printk("Bad ctxt address in arch_set_info_guest: 0x%lx\n", c->vcpu.privregs);
302 return -EFAULT;
303 }
305 v->arch.domain_itm_last = -1L;
306 d->arch.sys_pgnr = c->sys_pgnr;
307 d->shared_info->arch = c->shared;
309 /* Don't redo final setup */
310 set_bit(_VCPUF_initialised, &v->vcpu_flags);
311 return 0;
312 }
314 void arch_do_boot_vcpu(struct vcpu *v)
315 {
316 struct domain *d = v->domain;
317 printf("arch_do_boot_vcpu: not implemented\n");
319 d->vcpu[v->vcpu_id]->arch.privregs =
320 alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
321 printf("arch_vcpu_info=%p\n", d->vcpu[v->vcpu_id]->arch.privregs);
322 memset(d->vcpu[v->vcpu_id]->arch.privregs, 0, PAGE_SIZE);
323 return;
324 }
326 void domain_relinquish_resources(struct domain *d)
327 {
328 /* FIXME */
329 printf("domain_relinquish_resources: not implemented\n");
330 }
332 // heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread()
333 // and linux/arch/ia64/kernel/process.c:kernel_thread()
334 void new_thread(struct vcpu *v,
335 unsigned long start_pc,
336 unsigned long start_stack,
337 unsigned long start_info)
338 {
339 struct domain *d = v->domain;
340 struct pt_regs *regs;
341 struct ia64_boot_param *bp;
342 extern char saved_command_line[];
345 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
346 if (d == dom0) start_pc += dom0_start;
347 #endif
349 regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
350 if (VMX_DOMAIN(v)) {
351 /* dt/rt/it:1;i/ic:1, si:1, vm/bn:1, ac:1 */
352 regs->cr_ipsr = 0x501008826008; /* Need to be expanded as macro */
353 } else {
354 regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR)
355 | IA64_PSR_BITS_TO_SET | IA64_PSR_BN
356 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS);
357 regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2
358 }
359 regs->cr_iip = start_pc;
360 regs->cr_ifs = 1UL << 63; /* or clear? */
361 regs->ar_fpsr = FPSR_DEFAULT;
363 if (VMX_DOMAIN(v)) {
364 #ifdef CONFIG_VTI
365 vmx_init_all_rr(v);
366 if (d == dom0)
367 VCPU(v,vgr[12]) = dom_fw_setup(d,saved_command_line,256L);
368 /* Virtual processor context setup */
369 VCPU(v, vpsr) = IA64_PSR_BN;
370 VCPU(v, dcr) = 0;
371 #endif
372 } else {
373 init_all_rr(v);
374 if (d == dom0)
375 regs->r28 = dom_fw_setup(d,saved_command_line,256L);
376 else {
377 regs->ar_rsc |= (2 << 2); /* force PL2/3 */
378 regs->r28 = dom_fw_setup(d,"nomca nosmp xencons=tty0 console=tty0 root=/dev/hda1",256L); //FIXME
379 }
380 VCPU(v, banknum) = 1;
381 VCPU(v, metaphysical_mode) = 1;
382 d->shared_info->arch.flags = (d == dom0) ? (SIF_INITDOMAIN|SIF_PRIVILEGED|SIF_BLK_BE_DOMAIN|SIF_NET_BE_DOMAIN|SIF_USB_BE_DOMAIN) : 0;
383 }
384 }
386 static struct page * map_new_domain0_page(unsigned long mpaddr)
387 {
388 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
389 printk("map_new_domain0_page: bad domain0 mpaddr %p!\n",mpaddr);
390 printk("map_new_domain0_page: start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size);
391 while(1);
392 }
393 return pfn_to_page((mpaddr >> PAGE_SHIFT));
394 }
396 /* allocate new page for domain and map it to the specified metaphysical addr */
397 struct page * map_new_domain_page(struct domain *d, unsigned long mpaddr)
398 {
399 struct mm_struct *mm = d->arch.mm;
400 struct page *p = (struct page *)0;
401 pgd_t *pgd;
402 pud_t *pud;
403 pmd_t *pmd;
404 pte_t *pte;
405 extern unsigned long vhpt_paddr, vhpt_pend;
407 if (!mm->pgd) {
408 printk("map_new_domain_page: domain pgd must exist!\n");
409 return(p);
410 }
411 pgd = pgd_offset(mm,mpaddr);
412 if (pgd_none(*pgd))
413 pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
415 pud = pud_offset(pgd, mpaddr);
416 if (pud_none(*pud))
417 pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
419 pmd = pmd_offset(pud, mpaddr);
420 if (pmd_none(*pmd))
421 pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr));
422 // pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr));
424 pte = pte_offset_map(pmd, mpaddr);
425 if (pte_none(*pte)) {
426 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
427 if (d == dom0) p = map_new_domain0_page(mpaddr);
428 else
429 #endif
430 {
431 p = alloc_domheap_page(d);
432 // zero out pages for security reasons
433 memset(__va(page_to_phys(p)),0,PAGE_SIZE);
434 }
435 if (unlikely(!p)) {
436 printf("map_new_domain_page: Can't alloc!!!! Aaaargh!\n");
437 return(p);
438 }
439 if (unlikely(page_to_phys(p) > vhpt_paddr && page_to_phys(p) < vhpt_pend)) {
440 printf("map_new_domain_page: reassigned vhpt page %p!!\n",page_to_phys(p));
441 }
442 set_pte(pte, pfn_pte(page_to_phys(p) >> PAGE_SHIFT,
443 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
444 }
445 else printk("map_new_domain_page: mpaddr %lx already mapped!\n",mpaddr);
446 return p;
447 }
449 /* map a physical address to the specified metaphysical addr */
450 void map_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr)
451 {
452 struct mm_struct *mm = d->arch.mm;
453 pgd_t *pgd;
454 pud_t *pud;
455 pmd_t *pmd;
456 pte_t *pte;
458 if (!mm->pgd) {
459 printk("map_domain_page: domain pgd must exist!\n");
460 return;
461 }
462 pgd = pgd_offset(mm,mpaddr);
463 if (pgd_none(*pgd))
464 pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
466 pud = pud_offset(pgd, mpaddr);
467 if (pud_none(*pud))
468 pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
470 pmd = pmd_offset(pud, mpaddr);
471 if (pmd_none(*pmd))
472 pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr));
473 // pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr));
475 pte = pte_offset_map(pmd, mpaddr);
476 if (pte_none(*pte)) {
477 set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT,
478 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
479 }
480 else printk("map_domain_page: mpaddr %lx already mapped!\n",mpaddr);
481 }
483 /* map a physical address with specified I/O flag */
484 void map_domain_io_page(struct domain *d, unsigned long mpaddr, unsigned long flags)
485 {
486 struct mm_struct *mm = d->arch.mm;
487 pgd_t *pgd;
488 pud_t *pud;
489 pmd_t *pmd;
490 pte_t *pte;
491 pte_t io_pte;
493 if (!mm->pgd) {
494 printk("map_domain_page: domain pgd must exist!\n");
495 return;
496 }
497 ASSERT(flags & GPFN_IO_MASK);
499 pgd = pgd_offset(mm,mpaddr);
500 if (pgd_none(*pgd))
501 pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
503 pud = pud_offset(pgd, mpaddr);
504 if (pud_none(*pud))
505 pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
507 pmd = pmd_offset(pud, mpaddr);
508 if (pmd_none(*pmd))
509 pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr));
510 // pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr));
512 pte = pte_offset_map(pmd, mpaddr);
513 if (pte_none(*pte)) {
514 pte_val(io_pte) = flags;
515 set_pte(pte, io_pte);
516 }
517 else printk("map_domain_page: mpaddr %lx already mapped!\n",mpaddr);
518 }
520 void mpafoo(unsigned long mpaddr)
521 {
522 extern unsigned long privop_trace;
523 if (mpaddr == 0x3800)
524 privop_trace = 1;
525 }
527 unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
528 {
529 struct mm_struct *mm = d->arch.mm;
530 pgd_t *pgd = pgd_offset(mm, mpaddr);
531 pud_t *pud;
532 pmd_t *pmd;
533 pte_t *pte;
535 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
536 if (d == dom0) {
537 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
538 //printk("lookup_domain_mpa: bad dom0 mpaddr %p!\n",mpaddr);
539 //printk("lookup_domain_mpa: start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size);
540 mpafoo(mpaddr);
541 }
542 pte_t pteval = pfn_pte(mpaddr >> PAGE_SHIFT,
543 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX));
544 pte = &pteval;
545 return *(unsigned long *)pte;
546 }
547 #endif
548 tryagain:
549 if (pgd_present(*pgd)) {
550 pud = pud_offset(pgd,mpaddr);
551 if (pud_present(*pud)) {
552 pmd = pmd_offset(pud,mpaddr);
553 if (pmd_present(*pmd)) {
554 pte = pte_offset_map(pmd,mpaddr);
555 if (pte_present(*pte)) {
556 //printk("lookup_domain_page: found mapping for %lx, pte=%lx\n",mpaddr,pte_val(*pte));
557 return *(unsigned long *)pte;
558 }
559 }
560 }
561 }
562 /* if lookup fails and mpaddr is "legal", "create" the page */
563 if ((mpaddr >> PAGE_SHIFT) < d->max_pages) {
564 if (map_new_domain_page(d,mpaddr)) goto tryagain;
565 }
566 printk("lookup_domain_mpa: bad mpa %p (> %p\n",
567 mpaddr,d->max_pages<<PAGE_SHIFT);
568 mpafoo(mpaddr);
569 return 0;
570 }
572 // FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE
573 #ifndef CONFIG_VTI
574 unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
575 {
576 unsigned long pte = lookup_domain_mpa(d,mpaddr);
577 unsigned long imva;
579 pte &= _PAGE_PPN_MASK;
580 imva = __va(pte);
581 imva |= mpaddr & ~PAGE_MASK;
582 return(imva);
583 }
584 #else // CONFIG_VTI
585 unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
586 {
587 unsigned long imva = __gpa_to_mpa(d, mpaddr);
589 return __va(imva);
590 }
591 #endif // CONFIG_VTI
593 // remove following line if not privifying in memory
594 //#define HAVE_PRIVIFY_MEMORY
595 #ifndef HAVE_PRIVIFY_MEMORY
596 #define privify_memory(x,y) do {} while(0)
597 #endif
599 // see arch/x86/xxx/domain_build.c
600 int elf_sanity_check(Elf_Ehdr *ehdr)
601 {
602 return (IS_ELF(*ehdr));
603 }
605 static void copy_memory(void *dst, void *src, int size)
606 {
607 int remain;
609 if (IS_XEN_ADDRESS(dom0,src)) {
610 memcpy(dst,src,size);
611 }
612 else {
613 printf("About to call __copy_from_user(%p,%p,%d)\n",
614 dst,src,size);
615 while (remain = __copy_from_user(dst,src,size)) {
616 printf("incomplete user copy, %d remain of %d\n",
617 remain,size);
618 dst += size - remain; src += size - remain;
619 size -= remain;
620 }
621 }
622 }
624 void loaddomainelfimage(struct domain *d, unsigned long image_start)
625 {
626 char *elfbase = image_start;
627 //Elf_Ehdr *ehdr = (Elf_Ehdr *)image_start;
628 Elf_Ehdr ehdr;
629 Elf_Phdr phdr;
630 int h, filesz, memsz, paddr;
631 unsigned long elfaddr, dom_mpaddr, dom_imva;
632 struct page *p;
633 unsigned long pteval;
635 copy_memory(&ehdr,image_start,sizeof(Elf_Ehdr));
636 for ( h = 0; h < ehdr.e_phnum; h++ ) {
637 copy_memory(&phdr,elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize),
638 sizeof(Elf_Phdr));
639 //if ( !is_loadable_phdr(phdr) )
640 if ((phdr.p_type != PT_LOAD)) {
641 continue;
642 }
643 filesz = phdr.p_filesz; memsz = phdr.p_memsz;
644 elfaddr = elfbase + phdr.p_offset;
645 dom_mpaddr = phdr.p_paddr;
646 //printf("p_offset: %x, size=%x\n",elfaddr,filesz);
647 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
648 if (d == dom0) {
649 if (dom_mpaddr+memsz>dom0_size || dom_mpaddr+filesz>dom0_size) {
650 printf("Domain0 doesn't fit in allocated space!\n");
651 while(1);
652 }
653 dom_imva = __va(dom_mpaddr + dom0_start);
654 copy_memory(dom_imva,elfaddr,filesz);
655 if (memsz > filesz) memset(dom_imva+filesz,0,memsz-filesz);
656 //FIXME: This test for code seems to find a lot more than objdump -x does
657 if (phdr.p_flags & PF_X) privify_memory(dom_imva,filesz);
658 }
659 else
660 #endif
661 while (memsz > 0) {
662 #ifdef DOMU_AUTO_RESTART
663 pteval = lookup_domain_mpa(d,dom_mpaddr);
664 if (pteval) dom_imva = __va(pteval & _PFN_MASK);
665 else { printf("loaddomainelfimage: BAD!\n"); while(1); }
666 #else
667 p = map_new_domain_page(d,dom_mpaddr);
668 if (unlikely(!p)) BUG();
669 dom_imva = __va(page_to_phys(p));
670 #endif
671 if (filesz > 0) {
672 if (filesz >= PAGE_SIZE)
673 copy_memory(dom_imva,elfaddr,PAGE_SIZE);
674 else { // copy partial page, zero the rest of page
675 copy_memory(dom_imva,elfaddr,filesz);
676 memset(dom_imva+filesz,0,PAGE_SIZE-filesz);
677 }
678 //FIXME: This test for code seems to find a lot more than objdump -x does
679 if (phdr.p_flags & PF_X)
680 privify_memory(dom_imva,PAGE_SIZE);
681 }
682 else if (memsz > 0) // always zero out entire page
683 memset(dom_imva,0,PAGE_SIZE);
684 memsz -= PAGE_SIZE; filesz -= PAGE_SIZE;
685 elfaddr += PAGE_SIZE; dom_mpaddr += PAGE_SIZE;
686 }
687 }
688 }
690 int
691 parsedomainelfimage(char *elfbase, unsigned long elfsize, unsigned long *entry)
692 {
693 Elf_Ehdr ehdr;
695 copy_memory(&ehdr,elfbase,sizeof(Elf_Ehdr));
697 if ( !elf_sanity_check(&ehdr) ) {
698 printk("ELF sanity check failed.\n");
699 return -EINVAL;
700 }
702 if ( (ehdr.e_phoff + (ehdr.e_phnum * ehdr.e_phentsize)) > elfsize )
703 {
704 printk("ELF program headers extend beyond end of image.\n");
705 return -EINVAL;
706 }
708 if ( (ehdr.e_shoff + (ehdr.e_shnum * ehdr.e_shentsize)) > elfsize )
709 {
710 printk("ELF section headers extend beyond end of image.\n");
711 return -EINVAL;
712 }
714 #if 0
715 /* Find the section-header strings table. */
716 if ( ehdr.e_shstrndx == SHN_UNDEF )
717 {
718 printk("ELF image has no section-header strings table (shstrtab).\n");
719 return -EINVAL;
720 }
721 #endif
723 *entry = ehdr.e_entry;
724 printf("parsedomainelfimage: entry point = %p\n",*entry);
726 return 0;
727 }
730 void alloc_dom0(void)
731 {
732 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
733 if (platform_is_hp_ski()) {
734 dom0_size = 128*1024*1024; //FIXME: Should be configurable
735 }
736 printf("alloc_dom0: starting (initializing %d MB...)\n",dom0_size/(1024*1024));
738 /* FIXME: The first trunk (say 256M) should always be assigned to
739 * Dom0, since Dom0's physical == machine address for DMA purpose.
740 * Some old version linux, like 2.4, assumes physical memory existing
741 * in 2nd 64M space.
742 */
743 dom0_start = alloc_boot_pages(
744 dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT);
745 dom0_start <<= PAGE_SHIFT;
746 if (!dom0_start) {
747 printf("construct_dom0: can't allocate contiguous memory size=%p\n",
748 dom0_size);
749 while(1);
750 }
751 printf("alloc_dom0: dom0_start=%p\n",dom0_start);
752 #else
753 dom0_start = 0;
754 #endif
756 }
758 #ifdef DOMU_BUILD_STAGING
759 void alloc_domU_staging(void)
760 {
761 domU_staging_size = 32*1024*1024; //FIXME: Should be configurable
762 printf("alloc_domU_staging: starting (initializing %d MB...)\n",domU_staging_size/(1024*1024));
763 domU_staging_start = alloc_boot_pages(
764 domU_staging_size >> PAGE_SHIFT, domU_staging_align >> PAGE_SHIFT);
765 domU_staging_start <<= PAGE_SHIFT;
766 if (!domU_staging_size) {
767 printf("alloc_domU_staging: can't allocate, spinning...\n");
768 while(1);
769 }
770 else domU_staging_area = (unsigned long *)__va(domU_staging_start);
771 printf("alloc_domU_staging: domU_staging_area=%p\n",domU_staging_area);
773 }
775 unsigned long
776 domU_staging_read_8(unsigned long at)
777 {
778 // no way to return errors so just do it
779 return domU_staging_area[at>>3];
781 }
783 unsigned long
784 domU_staging_write_32(unsigned long at, unsigned long a, unsigned long b,
785 unsigned long c, unsigned long d)
786 {
787 if (at + 32 > domU_staging_size) return -1;
788 if (at & 0x1f) return -1;
789 at >>= 3;
790 domU_staging_area[at++] = a;
791 domU_staging_area[at++] = b;
792 domU_staging_area[at++] = c;
793 domU_staging_area[at] = d;
794 return 0;
796 }
797 #endif
799 /*
800 * Domain 0 has direct access to all devices absolutely. However
801 * the major point of this stub here, is to allow alloc_dom_mem
802 * handled with order > 0 request. Dom0 requires that bit set to
803 * allocate memory for other domains.
804 */
805 void physdev_init_dom0(struct domain *d)
806 {
807 set_bit(_DOMF_physdev_access, &d->domain_flags);
808 }
810 extern unsigned long running_on_sim;
811 unsigned int vmx_dom0 = 0;
812 int construct_dom0(struct domain *d,
813 unsigned long image_start, unsigned long image_len,
814 unsigned long initrd_start, unsigned long initrd_len,
815 char *cmdline)
816 {
817 char *dst;
818 int i, rc;
819 unsigned long pfn, mfn;
820 unsigned long nr_pt_pages;
821 unsigned long count;
822 unsigned long alloc_start, alloc_end;
823 struct pfn_info *page = NULL;
824 start_info_t *si;
825 struct vcpu *v = d->vcpu[0];
827 struct domain_setup_info dsi;
828 unsigned long p_start;
829 unsigned long pkern_start;
830 unsigned long pkern_entry;
831 unsigned long pkern_end;
832 unsigned long ret, progress = 0;
834 //printf("construct_dom0: starting\n");
835 /* Sanity! */
836 #ifndef CLONE_DOMAIN0
837 if ( d != dom0 )
838 BUG();
839 if ( test_bit(_DOMF_constructed, &d->domain_flags) )
840 BUG();
841 #endif
843 memset(&dsi, 0, sizeof(struct domain_setup_info));
845 printk("*** LOADING DOMAIN 0 ***\n");
847 alloc_start = dom0_start;
848 alloc_end = dom0_start + dom0_size;
849 d->tot_pages = d->max_pages = dom0_size/PAGE_SIZE;
850 image_start = __va(ia64_boot_param->initrd_start);
851 image_len = ia64_boot_param->initrd_size;
852 //printk("image_start=%lx, image_len=%lx\n",image_start,image_len);
853 //printk("First word of image: %lx\n",*(unsigned long *)image_start);
855 //printf("construct_dom0: about to call parseelfimage\n");
856 dsi.image_addr = (unsigned long)image_start;
857 dsi.image_len = image_len;
858 rc = parseelfimage(&dsi);
859 if ( rc != 0 )
860 return rc;
862 #ifdef CONFIG_VTI
863 /* Temp workaround */
864 if (running_on_sim)
865 dsi.xen_section_string = (char *)1;
867 /* Check whether dom0 is vti domain */
868 if ((!vmx_enabled) && !dsi.xen_section_string) {
869 printk("Lack of hardware support for unmodified vmx dom0\n");
870 panic("");
871 }
873 if (vmx_enabled && !dsi.xen_section_string) {
874 printk("Dom0 is vmx domain!\n");
875 vmx_dom0 = 1;
876 }
877 #endif
879 p_start = dsi.v_start;
880 pkern_start = dsi.v_kernstart;
881 pkern_end = dsi.v_kernend;
882 pkern_entry = dsi.v_kernentry;
884 //printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry);
886 if ( (p_start & (PAGE_SIZE-1)) != 0 )
887 {
888 printk("Initial guest OS must load to a page boundary.\n");
889 return -EINVAL;
890 }
892 printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
893 " Kernel image: %lx->%lx\n"
894 " Entry address: %lx\n"
895 " Init. ramdisk: (NOT IMPLEMENTED YET)\n",
896 pkern_start, pkern_end, pkern_entry);
898 if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) )
899 {
900 printk("Initial guest OS requires too much space\n"
901 "(%luMB is greater than %luMB limit)\n",
902 (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20);
903 return -ENOMEM;
904 }
906 // if high 3 bits of pkern start are non-zero, error
908 // if pkern end is after end of metaphysical memory, error
909 // (we should be able to deal with this... later)
912 //
914 #if 0
915 strcpy(d->name,"Domain0");
916 #endif
918 /* Mask all upcalls... */
919 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
920 d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
922 #ifdef CONFIG_VTI
923 /* Construct a frame-allocation list for the initial domain, since these
924 * pages are allocated by boot allocator and pfns are not set properly
925 */
926 for ( mfn = (alloc_start>>PAGE_SHIFT);
927 mfn < (alloc_end>>PAGE_SHIFT);
928 mfn++ )
929 {
930 page = &frame_table[mfn];
931 page_set_owner(page, d);
932 page->u.inuse.type_info = 0;
933 page->count_info = PGC_allocated | 1;
934 list_add_tail(&page->list, &d->page_list);
936 /* Construct 1:1 mapping */
937 machine_to_phys_mapping[mfn] = mfn;
938 }
940 /* Dom0's pfn is equal to mfn, so there's no need to allocate pmt
941 * for dom0
942 */
943 d->arch.pmt = NULL;
944 #endif
946 /* Copy the OS image. */
947 loaddomainelfimage(d,image_start);
949 /* Copy the initial ramdisk. */
950 //if ( initrd_len != 0 )
951 // memcpy((void *)vinitrd_start, initrd_start, initrd_len);
953 /* Sync d/i cache conservatively */
954 if (!running_on_sim) {
955 ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
956 if (ret != PAL_STATUS_SUCCESS)
957 panic("PAL CACHE FLUSH failed for dom0.\n");
958 printk("Sync i/d cache for dom0 image SUCC\n");
959 }
961 /* Set up start info area. */
962 si = (start_info_t *)alloc_xenheap_page();
963 memset(si, 0, PAGE_SIZE);
964 d->shared_info->arch.start_info_pfn = __pa(si) >> PAGE_SHIFT;
966 #if 0
967 si->nr_pages = d->tot_pages;
968 si->shared_info = virt_to_phys(d->shared_info);
969 si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
970 //si->pt_base = vpt_start;
971 //si->nr_pt_frames = nr_pt_pages;
972 //si->mfn_list = vphysmap_start;
974 if ( initrd_len != 0 )
975 {
976 //si->mod_start = vinitrd_start;
977 si->mod_len = initrd_len;
978 printk("Initrd len 0x%lx, start at 0x%08lx\n",
979 si->mod_len, si->mod_start);
980 }
982 dst = si->cmd_line;
983 if ( cmdline != NULL )
984 {
985 for ( i = 0; i < 255; i++ )
986 {
987 if ( cmdline[i] == '\0' )
988 break;
989 *dst++ = cmdline[i];
990 }
991 }
992 *dst = '\0';
994 zap_low_mappings(); /* Do the same for the idle page tables. */
995 #endif
997 /* Give up the VGA console if DOM0 is configured to grab it. */
998 if (cmdline != NULL)
999 console_endboot(strstr(cmdline, "tty0") != NULL);
1001 /* VMX specific construction for Dom0, if hardware supports VMX
1002 * and Dom0 is unmodified image
1003 */
1004 printk("Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d);
1005 if (vmx_dom0)
1006 vmx_final_setup_domain(dom0);
1007 else{
1008 d->vcpu[0]->arch.privregs =
1009 alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
1010 printf("arch_vcpu_info=%p\n", d->vcpu[0]->arch.privregs);
1011 memset(d->vcpu[0]->arch.privregs, 0, PAGE_SIZE);
1014 set_bit(_DOMF_constructed, &d->domain_flags);
1016 new_thread(v, pkern_entry, 0, 0);
1017 physdev_init_dom0(d);
1019 // FIXME: Hack for keyboard input
1020 #ifdef CLONE_DOMAIN0
1021 if (d == dom0)
1022 #endif
1023 serial_input_init();
1024 if (d == dom0) {
1025 VCPU(v, delivery_mask[0]) = -1L;
1026 VCPU(v, delivery_mask[1]) = -1L;
1027 VCPU(v, delivery_mask[2]) = -1L;
1028 VCPU(v, delivery_mask[3]) = -1L;
1030 else __set_bit(0x30, VCPU(v, delivery_mask));
1032 return 0;
1035 // FIXME: When dom0 can construct domains, this goes away (or is rewritten)
1036 int construct_domU(struct domain *d,
1037 unsigned long image_start, unsigned long image_len,
1038 unsigned long initrd_start, unsigned long initrd_len,
1039 char *cmdline)
1041 int i, rc;
1042 struct vcpu *v = d->vcpu[0];
1043 unsigned long pkern_entry;
1045 #ifndef DOMU_AUTO_RESTART
1046 if ( test_bit(_DOMF_constructed, &d->domain_flags) ) BUG();
1047 #endif
1049 printk("*** LOADING DOMAIN %d ***\n",d->domain_id);
1051 d->max_pages = dom0_size/PAGE_SIZE; // FIXME: use dom0 size
1052 // FIXME: use domain0 command line
1053 rc = parsedomainelfimage(image_start, image_len, &pkern_entry);
1054 printk("parsedomainelfimage returns %d\n",rc);
1055 if ( rc != 0 ) return rc;
1057 /* Mask all upcalls... */
1058 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
1059 d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
1061 /* Copy the OS image. */
1062 printk("calling loaddomainelfimage(%p,%p)\n",d,image_start);
1063 loaddomainelfimage(d,image_start);
1064 printk("loaddomainelfimage returns\n");
1066 set_bit(_DOMF_constructed, &d->domain_flags);
1068 printk("calling new_thread, entry=%p\n",pkern_entry);
1069 #ifdef DOMU_AUTO_RESTART
1070 v->domain->arch.image_start = image_start;
1071 v->domain->arch.image_len = image_len;
1072 v->domain->arch.entry = pkern_entry;
1073 #endif
1074 new_thread(v, pkern_entry, 0, 0);
1075 printk("new_thread returns\n");
1076 __set_bit(0x30, VCPU(v, delivery_mask));
1078 return 0;
1081 #ifdef DOMU_AUTO_RESTART
1082 void reconstruct_domU(struct vcpu *v)
1084 /* re-copy the OS image to reset data values to original */
1085 printk("reconstruct_domU: restarting domain %d...\n",
1086 v->domain->domain_id);
1087 loaddomainelfimage(v->domain,v->domain->arch.image_start);
1088 new_thread(v, v->domain->arch.entry, 0, 0);
1090 #endif
1092 // FIXME: When dom0 can construct domains, this goes away (or is rewritten)
1093 int launch_domainU(unsigned long size)
1095 #ifdef CLONE_DOMAIN0
1096 static int next = CLONE_DOMAIN0+1;
1097 #else
1098 static int next = 1;
1099 #endif
1101 struct domain *d = do_createdomain(next,0);
1102 if (!d) {
1103 printf("launch_domainU: couldn't create\n");
1104 return 1;
1106 else next++;
1107 if (construct_domU(d, (unsigned long)domU_staging_area, size,0,0,0)) {
1108 printf("launch_domainU: couldn't construct(id=%d,%lx,%lx)\n",
1109 d->domain_id,domU_staging_area,size);
1110 return 2;
1112 domain_unpause_by_systemcontroller(d);
1115 void machine_restart(char * __unused)
1117 if (platform_is_hp_ski()) dummy();
1118 printf("machine_restart called: spinning....\n");
1119 while(1);
1122 void machine_halt(void)
1124 if (platform_is_hp_ski()) dummy();
1125 printf("machine_halt called: spinning....\n");
1126 while(1);
1129 void dummy_called(char *function)
1131 if (platform_is_hp_ski()) asm("break 0;;");
1132 printf("dummy called in %s: spinning....\n", function);
1133 while(1);
1137 #if 0
1138 void switch_to(struct vcpu *prev, struct vcpu *next)
1140 struct vcpu *last;
1142 __switch_to(prev,next,last);
1143 //set_current(next);
1145 #endif
1147 void domain_pend_keyboard_interrupt(int irq)
1149 vcpu_pend_interrupt(dom0->vcpu[0],irq);
1152 void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
1154 if ( v->processor == newcpu )
1155 return;
1157 set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
1158 v->processor = newcpu;
1161 void sync_vcpu_execstate(struct vcpu *v)
1163 ia64_save_fpu(v->arch._thread.fph);
1164 #ifdef CONFIG_VTI
1165 if (VMX_DOMAIN(v))
1166 vmx_save_state(v);
1167 #else
1168 if (0) do {} while(0);
1169 #endif
1170 else {
1171 if (IA64_HAS_EXTRA_STATE(v))
1172 ia64_save_extra(v);
1174 // FIXME SMP: Anything else needed here for SMP?