ia64/xen-unstable

view xen/arch/ia64/domain.c @ 6538:84ee014ebd41

Merge xen-vtx-unstable.hg
author adsharma@los-vmm.sc.intel.com
date Wed Aug 17 12:34:38 2005 -0800 (2005-08-17)
parents 23979fb12c49 57b3fdca5dae
children 99914b54f7bf
line source
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * Copyright (C) 2005 Intel Co
8 * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
9 *
10 * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add CONFIG_VTI domain support
11 */
13 #include <xen/config.h>
14 #include <xen/lib.h>
15 #include <xen/errno.h>
16 #include <xen/sched.h>
17 #include <xen/smp.h>
18 #include <xen/delay.h>
19 #include <xen/softirq.h>
20 #include <xen/mm.h>
21 #include <asm/ptrace.h>
22 #include <asm/system.h>
23 #include <asm/io.h>
24 #include <asm/processor.h>
25 #include <asm/desc.h>
26 //#include <asm/mpspec.h>
27 #include <xen/irq.h>
28 #include <xen/event.h>
29 //#include <xen/shadow.h>
30 #include <xen/console.h>
32 #include <xen/elf.h>
33 //#include <asm/page.h>
34 #include <asm/pgalloc.h>
35 #include <asm/dma.h> /* for MAX_DMA_ADDRESS */
37 #include <asm/asm-offsets.h> /* for IA64_THREAD_INFO_SIZE */
39 #include <asm/vcpu.h> /* for function declarations */
40 #include <public/arch-ia64.h>
41 #ifdef CONFIG_VTI
42 #include <asm/vmx.h>
43 #include <asm/vmx_vcpu.h>
44 #include <asm/vmx_vpd.h>
45 #include <asm/pal.h>
46 #include <public/io/ioreq.h>
47 #endif // CONFIG_VTI
49 #define CONFIG_DOMAIN0_CONTIGUOUS
50 unsigned long dom0_start = -1L;
51 #ifdef CONFIG_VTI
52 unsigned long dom0_size = 512*1024*1024; //FIXME: Should be configurable
53 //FIXME: alignment should be 256MB, lest Linux use a 256MB page size
54 unsigned long dom0_align = 256*1024*1024;
55 #else // CONFIG_VTI
56 unsigned long dom0_size = 512*1024*1024; //FIXME: Should be configurable
57 //FIXME: alignment should be 256MB, lest Linux use a 256MB page size
58 unsigned long dom0_align = 64*1024*1024;
59 #endif // CONFIG_VTI
60 #ifdef DOMU_BUILD_STAGING
61 unsigned long domU_staging_size = 32*1024*1024; //FIXME: Should be configurable
62 unsigned long domU_staging_start;
63 unsigned long domU_staging_align = 64*1024;
64 unsigned long *domU_staging_area;
65 #endif
67 // initialized by arch/ia64/setup.c:find_initrd()
68 unsigned long initrd_start = 0, initrd_end = 0;
70 #define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend))
72 //extern int loadelfimage(char *);
73 extern int readelfimage_base_and_size(char *, unsigned long,
74 unsigned long *, unsigned long *, unsigned long *);
76 unsigned long map_domain_page0(struct domain *);
77 extern unsigned long dom_fw_setup(struct domain *, char *, int);
79 /* this belongs in include/asm, but there doesn't seem to be a suitable place */
80 void free_perdomain_pt(struct domain *d)
81 {
82 printf("free_perdomain_pt: not implemented\n");
83 //free_page((unsigned long)d->mm.perdomain_pt);
84 }
86 int hlt_counter;
88 void disable_hlt(void)
89 {
90 hlt_counter++;
91 }
93 void enable_hlt(void)
94 {
95 hlt_counter--;
96 }
98 static void default_idle(void)
99 {
100 if ( hlt_counter == 0 )
101 {
102 local_irq_disable();
103 if ( !softirq_pending(smp_processor_id()) )
104 safe_halt();
105 //else
106 local_irq_enable();
107 }
108 }
110 void continue_cpu_idle_loop(void)
111 {
112 int cpu = smp_processor_id();
113 for ( ; ; )
114 {
115 #ifdef IA64
116 // __IRQ_STAT(cpu, idle_timestamp) = jiffies
117 #else
118 irq_stat[cpu].idle_timestamp = jiffies;
119 #endif
120 while ( !softirq_pending(cpu) )
121 default_idle();
122 raise_softirq(SCHEDULE_SOFTIRQ);
123 do_softirq();
124 }
125 }
127 void startup_cpu_idle_loop(void)
128 {
129 /* Just some sanity to ensure that the scheduler is set up okay. */
130 ASSERT(current->domain == IDLE_DOMAIN_ID);
131 raise_softirq(SCHEDULE_SOFTIRQ);
132 do_softirq();
134 /*
135 * Declares CPU setup done to the boot processor.
136 * Therefore memory barrier to ensure state is visible.
137 */
138 smp_mb();
139 #if 0
140 //do we have to ensure the idle task has a shared page so that, for example,
141 //region registers can be loaded from it. Apparently not...
142 idle0_task.shared_info = (void *)alloc_xenheap_page();
143 memset(idle0_task.shared_info, 0, PAGE_SIZE);
144 /* pin mapping */
145 // FIXME: Does this belong here? Or do only at domain switch time?
146 {
147 /* WARNING: following must be inlined to avoid nested fault */
148 unsigned long psr = ia64_clear_ic();
149 ia64_itr(0x2, IA64_TR_SHARED_INFO, SHAREDINFO_ADDR,
150 pte_val(pfn_pte(ia64_tpa(idle0_task.shared_info) >> PAGE_SHIFT, PAGE_KERNEL)),
151 PAGE_SHIFT);
152 ia64_set_psr(psr);
153 ia64_srlz_i();
154 }
155 #endif
157 continue_cpu_idle_loop();
158 }
160 struct vcpu *arch_alloc_vcpu_struct(void)
161 {
162 /* Per-vp stack is used here. So we need keep vcpu
163 * same page as per-vp stack */
164 return alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER);
165 }
167 void arch_free_vcpu_struct(struct vcpu *v)
168 {
169 free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
170 }
172 static void init_switch_stack(struct vcpu *v)
173 {
174 struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
175 struct switch_stack *sw = (struct switch_stack *) regs - 1;
176 extern void ia64_ret_from_clone;
178 memset(sw, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs));
179 sw->ar_bspstore = (unsigned long)v + IA64_RBS_OFFSET;
180 sw->b0 = (unsigned long) &ia64_ret_from_clone;
181 sw->ar_fpsr = FPSR_DEFAULT;
182 v->arch._thread.ksp = (unsigned long) sw - 16;
183 // stay on kernel stack because may get interrupts!
184 // ia64_ret_from_clone (which b0 gets in new_thread) switches
185 // to user stack
186 v->arch._thread.on_ustack = 0;
187 memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
188 }
190 #ifdef CONFIG_VTI
191 void arch_do_createdomain(struct vcpu *v)
192 {
193 struct domain *d = v->domain;
194 struct thread_info *ti = alloc_thread_info(v);
196 /* Clear thread_info to clear some important fields, like preempt_count */
197 memset(ti, 0, sizeof(struct thread_info));
198 init_switch_stack(v);
200 /* Shared info area is required to be allocated at domain
201 * creation, since control panel will write some I/O info
202 * between front end and back end to that area. However for
203 * vmx domain, our design is to let domain itself to allcoate
204 * shared info area, to keep machine page contiguous. So this
205 * page will be released later when domainN issues request
206 * after up.
207 */
208 d->shared_info = (void *)alloc_xenheap_page();
209 /* Now assume all vcpu info and event indicators can be
210 * held in one shared page. Definitely later we need to
211 * consider more about it
212 */
214 memset(d->shared_info, 0, PAGE_SIZE);
215 d->shared_info->vcpu_data[v->vcpu_id].arch.privregs =
216 alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
217 printf("arch_vcpu_info=%p\n", d->shared_info->vcpu_data[0].arch.privregs);
218 memset(d->shared_info->vcpu_data[v->vcpu_id].arch.privregs, 0, PAGE_SIZE);
219 v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
220 /* Mask all events, and specific port will be unmasked
221 * when customer subscribes to it.
222 */
223 if(v == d->vcpu[0]) {
224 memset(&d->shared_info->evtchn_mask[0], 0xff,
225 sizeof(d->shared_info->evtchn_mask));
226 }
228 /* Allocate per-domain vTLB and vhpt */
229 v->arch.vtlb = init_domain_tlb(v);
231 /* Physical->machine page table will be allocated when
232 * final setup, since we have no the maximum pfn number in
233 * this stage
234 */
236 /* FIXME: This is identity mapped address for xenheap.
237 * Do we need it at all?
238 */
239 d->xen_vastart = XEN_START_ADDR;
240 d->xen_vaend = XEN_END_ADDR;
241 d->arch.breakimm = 0x1000;
242 }
243 #else // CONFIG_VTI
244 void arch_do_createdomain(struct vcpu *v)
245 {
246 struct domain *d = v->domain;
247 struct thread_info *ti = alloc_thread_info(v);
249 /* Clear thread_info to clear some important fields, like preempt_count */
250 memset(ti, 0, sizeof(struct thread_info));
251 init_switch_stack(v);
253 d->shared_info = (void *)alloc_xenheap_page();
254 if (!d->shared_info) {
255 printk("ERROR/HALTING: CAN'T ALLOC PAGE\n");
256 while (1);
257 }
258 memset(d->shared_info, 0, PAGE_SIZE);
259 d->shared_info->vcpu_data[0].arch.privregs =
260 alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
261 printf("arch_vcpu_info=%p\n", d->shared_info->vcpu_data[0].arch.privregs);
262 memset(d->shared_info->vcpu_data[0].arch.privregs, 0, PAGE_SIZE);
263 v->vcpu_info = &(d->shared_info->vcpu_data[0]);
265 d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
266 if ((d->arch.metaphysical_rr0 = allocate_metaphysical_rr0()) == -1UL)
267 BUG();
268 VCPU(v, metaphysical_mode) = 1;
269 v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
270 v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
271 #define DOMAIN_RID_BITS_DEFAULT 18
272 if (!allocate_rid_range(d,DOMAIN_RID_BITS_DEFAULT)) // FIXME
273 BUG();
274 v->arch.starting_rid = d->arch.starting_rid;
275 v->arch.ending_rid = d->arch.ending_rid;
276 // the following will eventually need to be negotiated dynamically
277 d->xen_vastart = XEN_START_ADDR;
278 d->xen_vaend = XEN_END_ADDR;
279 d->shared_info_va = SHAREDINFO_ADDR;
280 d->arch.breakimm = 0x1000;
281 v->arch.breakimm = d->arch.breakimm;
283 d->arch.mm = xmalloc(struct mm_struct);
284 if (unlikely(!d->arch.mm)) {
285 printk("Can't allocate mm_struct for domain %d\n",d->domain_id);
286 return -ENOMEM;
287 }
288 memset(d->arch.mm, 0, sizeof(*d->arch.mm));
289 d->arch.mm->pgd = pgd_alloc(d->arch.mm);
290 if (unlikely(!d->arch.mm->pgd)) {
291 printk("Can't allocate pgd for domain %d\n",d->domain_id);
292 return -ENOMEM;
293 }
294 }
295 #endif // CONFIG_VTI
297 void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
298 {
299 struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
301 printf("arch_getdomaininfo_ctxt\n");
302 c->regs = *regs;
303 c->vcpu.evtchn_vector = v->vcpu_info->arch.evtchn_vector;
304 #if 0
305 if (c->vcpu.privregs && copy_to_user(c->vcpu.privregs,
306 v->vcpu_info->arch.privregs, sizeof(mapped_regs_t))) {
307 printk("Bad ctxt address: 0x%lx\n", c->vcpu.privregs);
308 return -EFAULT;
309 }
310 #endif
312 c->shared = v->domain->shared_info->arch;
313 }
315 #ifndef CONFIG_VTI
316 int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
317 {
318 struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
320 printf("arch_set_info_guest\n");
321 *regs = c->regs;
322 regs->cr_ipsr = IA64_PSR_IT|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IC|IA64_PSR_I|IA64_PSR_DFH|IA64_PSR_BN|IA64_PSR_SP|IA64_PSR_DI;
323 regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT;
324 regs->ar_rsc |= (2 << 2); /* force PL2/3 */
326 v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
327 if ( c->vcpu.privregs && copy_from_user(v->vcpu_info->arch.privregs,
328 c->vcpu.privregs, sizeof(mapped_regs_t))) {
329 printk("Bad ctxt address in arch_set_info_guest: 0x%lx\n", c->vcpu.privregs);
330 return -EFAULT;
331 }
333 init_all_rr(v);
335 // this should be in userspace
336 regs->r28 = dom_fw_setup(v->domain,"nomca nosmp xencons=tty0 console=tty0 root=/dev/hda1",256L); //FIXME
337 v->arch.domain_itm_last = -1L;
338 VCPU(v, banknum) = 1;
339 VCPU(v, metaphysical_mode) = 1;
341 v->domain->shared_info->arch = c->shared;
342 return 0;
343 }
344 #else // CONFIG_VTI
345 int arch_set_info_guest(
346 struct vcpu *v, struct vcpu_guest_context *c)
347 {
348 struct domain *d = v->domain;
349 int i, rc, ret;
350 unsigned long progress = 0;
351 shared_iopage_t *sp;
353 if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
354 return 0;
356 /* Lazy FP not implemented yet */
357 clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
358 if ( c->flags & VGCF_FPU_VALID )
359 set_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
361 /* Sync d/i cache conservatively, after domain N is loaded */
362 ret = ia64_pal_cache_flush(3, 0, &progress, NULL);
363 if (ret != PAL_STATUS_SUCCESS)
364 panic("PAL CACHE FLUSH failed for dom[%d].\n",
365 v->domain->domain_id);
366 DPRINTK("Sync i/d cache for dom%d image SUCC\n",
367 v->domain->domain_id);
369 /* Physical mode emulation initialization, including
370 * emulation ID allcation and related memory request
371 */
372 physical_mode_init(v);
374 /* FIXME: only support PMT table continuously by far */
375 d->arch.pmt = __va(c->pt_base);
376 d->arch.max_pfn = c->pt_max_pfn;
377 d->arch.vmx_platform.shared_page_va = __va(c->share_io_pg);
378 sp = get_sp(d);
379 memset((char *)sp,0,PAGE_SIZE);
380 /* FIXME: temp due to old CP */
381 sp->sp_global.eport = 2;
382 #ifdef V_IOSAPIC_READY
383 sp->vcpu_number = 1;
384 #endif
385 /* TEMP */
386 d->arch.vmx_platform.pib_base = 0xfee00000UL;
389 if (c->flags & VGCF_VMX_GUEST) {
390 if (!vmx_enabled)
391 panic("No VMX hardware feature for vmx domain.\n");
393 vmx_final_setup_domain(d);
395 /* One more step to enable interrupt assist */
396 set_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags);
397 }
399 vlsapic_reset(v);
400 vtm_init(v);
402 /* Only open one port for I/O and interrupt emulation */
403 if (v == d->vcpu[0]) {
404 memset(&d->shared_info->evtchn_mask[0], 0xff,
405 sizeof(d->shared_info->evtchn_mask));
406 clear_bit(iopacket_port(d), &d->shared_info->evtchn_mask[0]);
407 }
408 /* Setup domain context. Actually IA-64 is a bit different with
409 * x86, with almost all system resources better managed by HV
410 * directly. CP only needs to provide start IP of guest, which
411 * ideally is the load address of guest Firmware.
412 */
413 new_thread(v, c->guest_iip, 0, 0);
416 d->xen_vastart = XEN_START_ADDR;
417 d->xen_vaend = XEN_END_ADDR;
418 d->arch.breakimm = 0x1000 + d->domain_id;
419 v->arch._thread.on_ustack = 0;
421 /* Don't redo final setup */
422 set_bit(_VCPUF_initialised, &v->vcpu_flags);
424 return 0;
425 }
426 #endif // CONFIG_VTI
428 void arch_do_boot_vcpu(struct vcpu *v)
429 {
430 struct domain *d = v->domain;
431 printf("arch_do_boot_vcpu: not implemented\n");
433 d->shared_info->vcpu_data[v->vcpu_id].arch.privregs =
434 alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
435 printf("arch_vcpu_info=%p\n", d->shared_info->vcpu_data[v->vcpu_id].arch.privregs);
436 memset(d->shared_info->vcpu_data[v->vcpu_id].arch.privregs, 0, PAGE_SIZE);
437 return;
438 }
440 void domain_relinquish_resources(struct domain *d)
441 {
442 /* FIXME */
443 printf("domain_relinquish_resources: not implemented\n");
444 }
446 #ifdef CONFIG_VTI
447 void new_thread(struct vcpu *v,
448 unsigned long start_pc,
449 unsigned long start_stack,
450 unsigned long start_info)
451 {
452 struct domain *d = v->domain;
453 struct pt_regs *regs;
454 struct ia64_boot_param *bp;
455 extern char saved_command_line[];
456 //char *dom0_cmdline = "BOOT_IMAGE=scsi0:\EFI\redhat\xenlinux nomca root=/dev/sdb1 ro";
459 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
460 if (d == dom0) start_pc += dom0_start;
461 #endif
463 regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
464 if (VMX_DOMAIN(v)) {
465 /* dt/rt/it:1;i/ic:1, si:1, vm/bn:1, ac:1 */
466 regs->cr_ipsr = 0x501008826008; /* Need to be expanded as macro */
467 } else {
468 regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR)
469 | IA64_PSR_BITS_TO_SET | IA64_PSR_BN
470 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS);
471 regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2
472 }
473 regs->cr_iip = start_pc;
474 regs->cr_ifs = 0; /* why? - matthewc */
475 regs->ar_fpsr = FPSR_DEFAULT;
476 if (VMX_DOMAIN(v)) {
477 vmx_init_all_rr(v);
478 } else
479 init_all_rr(v);
481 if (VMX_DOMAIN(v)) {
482 if (d == dom0) {
483 VMX_VPD(v,vgr[12]) = dom_fw_setup(d,saved_command_line,256L);
484 printk("new_thread, done with dom_fw_setup\n");
485 }
486 /* Virtual processor context setup */
487 VMX_VPD(v, vpsr) = IA64_PSR_BN;
488 VPD_CR(v, dcr) = 0;
489 } else {
490 regs->r28 = dom_fw_setup(d,saved_command_line,256L);
491 VCPU(v, banknum) = 1;
492 VCPU(v, metaphysical_mode) = 1;
493 d->shared_info->arch.flags = (d == dom0) ? (SIF_INITDOMAIN|SIF_PRIVILEGED|SIF_BLK_BE_DOMAIN|SIF_NET_BE_DOMAIN|SIF_USB_BE_DOMAIN) : 0;
494 }
495 }
496 #else // CONFIG_VTI
498 // heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread()
499 // and linux/arch/ia64/kernel/process.c:kernel_thread()
500 void new_thread(struct vcpu *v,
501 unsigned long start_pc,
502 unsigned long start_stack,
503 unsigned long start_info)
504 {
505 struct domain *d = v->domain;
506 struct pt_regs *regs;
507 struct ia64_boot_param *bp;
508 extern char saved_command_line[];
510 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
511 if (d == dom0) start_pc += dom0_start;
512 #endif
514 regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
515 regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR)
516 | IA64_PSR_BITS_TO_SET | IA64_PSR_BN
517 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS);
518 regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2
519 regs->cr_iip = start_pc;
520 regs->cr_ifs = 1UL << 63;
521 regs->ar_fpsr = FPSR_DEFAULT;
522 init_all_rr(v);
523 regs->r28 = dom_fw_setup(d,saved_command_line,256L); //FIXME
524 VCPU(v, banknum) = 1;
525 VCPU(v, metaphysical_mode) = 1;
526 d->shared_info->arch.flags = (d == dom0) ? (SIF_INITDOMAIN|SIF_PRIVILEGED|SIF_BLK_BE_DOMAIN|SIF_NET_BE_DOMAIN|SIF_USB_BE_DOMAIN) : 0;
527 }
528 #endif // CONFIG_VTI
530 static struct page * map_new_domain0_page(unsigned long mpaddr)
531 {
532 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
533 printk("map_new_domain0_page: bad domain0 mpaddr %p!\n",mpaddr);
534 printk("map_new_domain0_page: start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size);
535 while(1);
536 }
537 return pfn_to_page((mpaddr >> PAGE_SHIFT));
538 }
540 /* allocate new page for domain and map it to the specified metaphysical addr */
541 struct page * map_new_domain_page(struct domain *d, unsigned long mpaddr)
542 {
543 struct mm_struct *mm = d->arch.mm;
544 struct page *p = (struct page *)0;
545 pgd_t *pgd;
546 pud_t *pud;
547 pmd_t *pmd;
548 pte_t *pte;
549 extern unsigned long vhpt_paddr, vhpt_pend;
551 if (!mm->pgd) {
552 printk("map_new_domain_page: domain pgd must exist!\n");
553 return(p);
554 }
555 pgd = pgd_offset(mm,mpaddr);
556 if (pgd_none(*pgd))
557 pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
559 pud = pud_offset(pgd, mpaddr);
560 if (pud_none(*pud))
561 pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
563 pmd = pmd_offset(pud, mpaddr);
564 if (pmd_none(*pmd))
565 pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr));
566 // pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr));
568 pte = pte_offset_map(pmd, mpaddr);
569 if (pte_none(*pte)) {
570 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
571 if (d == dom0) p = map_new_domain0_page(mpaddr);
572 else
573 #endif
574 {
575 p = alloc_domheap_page(d);
576 // zero out pages for security reasons
577 memset(__va(page_to_phys(p)),0,PAGE_SIZE);
578 }
579 if (unlikely(!p)) {
580 printf("map_new_domain_page: Can't alloc!!!! Aaaargh!\n");
581 return(p);
582 }
583 if (unlikely(page_to_phys(p) > vhpt_paddr && page_to_phys(p) < vhpt_pend)) {
584 printf("map_new_domain_page: reassigned vhpt page %p!!\n",page_to_phys(p));
585 }
586 set_pte(pte, pfn_pte(page_to_phys(p) >> PAGE_SHIFT,
587 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
588 }
589 else printk("map_new_domain_page: mpaddr %lx already mapped!\n",mpaddr);
590 return p;
591 }
593 /* map a physical address to the specified metaphysical addr */
594 void map_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr)
595 {
596 struct mm_struct *mm = d->arch.mm;
597 pgd_t *pgd;
598 pud_t *pud;
599 pmd_t *pmd;
600 pte_t *pte;
602 if (!mm->pgd) {
603 printk("map_domain_page: domain pgd must exist!\n");
604 return;
605 }
606 pgd = pgd_offset(mm,mpaddr);
607 if (pgd_none(*pgd))
608 pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
610 pud = pud_offset(pgd, mpaddr);
611 if (pud_none(*pud))
612 pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
614 pmd = pmd_offset(pud, mpaddr);
615 if (pmd_none(*pmd))
616 pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr));
617 // pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr));
619 pte = pte_offset_map(pmd, mpaddr);
620 if (pte_none(*pte)) {
621 set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT,
622 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
623 }
624 else printk("map_domain_page: mpaddr %lx already mapped!\n",mpaddr);
625 }
627 void mpafoo(unsigned long mpaddr)
628 {
629 extern unsigned long privop_trace;
630 if (mpaddr == 0x3800)
631 privop_trace = 1;
632 }
634 unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
635 {
636 struct mm_struct *mm = d->arch.mm;
637 pgd_t *pgd = pgd_offset(mm, mpaddr);
638 pud_t *pud;
639 pmd_t *pmd;
640 pte_t *pte;
642 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
643 if (d == dom0) {
644 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
645 //printk("lookup_domain_mpa: bad dom0 mpaddr %p!\n",mpaddr);
646 //printk("lookup_domain_mpa: start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size);
647 mpafoo(mpaddr);
648 }
649 pte_t pteval = pfn_pte(mpaddr >> PAGE_SHIFT,
650 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX));
651 pte = &pteval;
652 return *(unsigned long *)pte;
653 }
654 #endif
655 tryagain:
656 if (pgd_present(*pgd)) {
657 pud = pud_offset(pgd,mpaddr);
658 if (pud_present(*pud)) {
659 pmd = pmd_offset(pud,mpaddr);
660 if (pmd_present(*pmd)) {
661 pte = pte_offset_map(pmd,mpaddr);
662 if (pte_present(*pte)) {
663 //printk("lookup_domain_page: found mapping for %lx, pte=%lx\n",mpaddr,pte_val(*pte));
664 return *(unsigned long *)pte;
665 }
666 }
667 }
668 }
669 /* if lookup fails and mpaddr is "legal", "create" the page */
670 if ((mpaddr >> PAGE_SHIFT) < d->max_pages) {
671 if (map_new_domain_page(d,mpaddr)) goto tryagain;
672 }
673 printk("lookup_domain_mpa: bad mpa %p (> %p\n",
674 mpaddr,d->max_pages<<PAGE_SHIFT);
675 mpafoo(mpaddr);
676 return 0;
677 }
679 // FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE
680 #ifndef CONFIG_VTI
681 unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
682 {
683 unsigned long pte = lookup_domain_mpa(d,mpaddr);
684 unsigned long imva;
686 pte &= _PAGE_PPN_MASK;
687 imva = __va(pte);
688 imva |= mpaddr & ~PAGE_MASK;
689 return(imva);
690 }
691 #else // CONFIG_VTI
692 unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
693 {
694 unsigned long imva = __gpa_to_mpa(d, mpaddr);
696 return __va(imva);
697 }
698 #endif // CONFIG_VTI
700 // remove following line if not privifying in memory
701 //#define HAVE_PRIVIFY_MEMORY
702 #ifndef HAVE_PRIVIFY_MEMORY
703 #define privify_memory(x,y) do {} while(0)
704 #endif
706 // see arch/x86/xxx/domain_build.c
707 int elf_sanity_check(Elf_Ehdr *ehdr)
708 {
709 return (IS_ELF(*ehdr));
710 }
712 static void copy_memory(void *dst, void *src, int size)
713 {
714 int remain;
716 if (IS_XEN_ADDRESS(dom0,src)) {
717 memcpy(dst,src,size);
718 }
719 else {
720 printf("About to call __copy_from_user(%p,%p,%d)\n",
721 dst,src,size);
722 while (remain = __copy_from_user(dst,src,size)) {
723 printf("incomplete user copy, %d remain of %d\n",
724 remain,size);
725 dst += size - remain; src += size - remain;
726 size -= remain;
727 }
728 }
729 }
731 void loaddomainelfimage(struct domain *d, unsigned long image_start)
732 {
733 char *elfbase = image_start;
734 //Elf_Ehdr *ehdr = (Elf_Ehdr *)image_start;
735 Elf_Ehdr ehdr;
736 Elf_Phdr phdr;
737 int h, filesz, memsz, paddr;
738 unsigned long elfaddr, dom_mpaddr, dom_imva;
739 struct page *p;
740 unsigned long pteval;
742 copy_memory(&ehdr,image_start,sizeof(Elf_Ehdr));
743 for ( h = 0; h < ehdr.e_phnum; h++ ) {
744 copy_memory(&phdr,elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize),
745 sizeof(Elf_Phdr));
746 //if ( !is_loadable_phdr(phdr) )
747 if ((phdr.p_type != PT_LOAD)) {
748 continue;
749 }
750 filesz = phdr.p_filesz; memsz = phdr.p_memsz;
751 elfaddr = elfbase + phdr.p_offset;
752 dom_mpaddr = phdr.p_paddr;
753 //printf("p_offset: %x, size=%x\n",elfaddr,filesz);
754 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
755 if (d == dom0) {
756 if (dom_mpaddr+memsz>dom0_size || dom_mpaddr+filesz>dom0_size) {
757 printf("Domain0 doesn't fit in allocated space!\n");
758 while(1);
759 }
760 dom_imva = __va(dom_mpaddr + dom0_start);
761 copy_memory(dom_imva,elfaddr,filesz);
762 if (memsz > filesz) memset(dom_imva+filesz,0,memsz-filesz);
763 //FIXME: This test for code seems to find a lot more than objdump -x does
764 if (phdr.p_flags & PF_X) privify_memory(dom_imva,filesz);
765 }
766 else
767 #endif
768 while (memsz > 0) {
769 #ifdef DOMU_AUTO_RESTART
770 pteval = lookup_domain_mpa(d,dom_mpaddr);
771 if (pteval) dom_imva = __va(pteval & _PFN_MASK);
772 else { printf("loaddomainelfimage: BAD!\n"); while(1); }
773 #else
774 p = map_new_domain_page(d,dom_mpaddr);
775 if (unlikely(!p)) BUG();
776 dom_imva = __va(page_to_phys(p));
777 #endif
778 if (filesz > 0) {
779 if (filesz >= PAGE_SIZE)
780 copy_memory(dom_imva,elfaddr,PAGE_SIZE);
781 else { // copy partial page, zero the rest of page
782 copy_memory(dom_imva,elfaddr,filesz);
783 memset(dom_imva+filesz,0,PAGE_SIZE-filesz);
784 }
785 //FIXME: This test for code seems to find a lot more than objdump -x does
786 if (phdr.p_flags & PF_X)
787 privify_memory(dom_imva,PAGE_SIZE);
788 }
789 else if (memsz > 0) // always zero out entire page
790 memset(dom_imva,0,PAGE_SIZE);
791 memsz -= PAGE_SIZE; filesz -= PAGE_SIZE;
792 elfaddr += PAGE_SIZE; dom_mpaddr += PAGE_SIZE;
793 }
794 }
795 }
797 int
798 parsedomainelfimage(char *elfbase, unsigned long elfsize, unsigned long *entry)
799 {
800 Elf_Ehdr ehdr;
802 copy_memory(&ehdr,elfbase,sizeof(Elf_Ehdr));
804 if ( !elf_sanity_check(&ehdr) ) {
805 printk("ELF sanity check failed.\n");
806 return -EINVAL;
807 }
809 if ( (ehdr.e_phoff + (ehdr.e_phnum * ehdr.e_phentsize)) > elfsize )
810 {
811 printk("ELF program headers extend beyond end of image.\n");
812 return -EINVAL;
813 }
815 if ( (ehdr.e_shoff + (ehdr.e_shnum * ehdr.e_shentsize)) > elfsize )
816 {
817 printk("ELF section headers extend beyond end of image.\n");
818 return -EINVAL;
819 }
821 #if 0
822 /* Find the section-header strings table. */
823 if ( ehdr.e_shstrndx == SHN_UNDEF )
824 {
825 printk("ELF image has no section-header strings table (shstrtab).\n");
826 return -EINVAL;
827 }
828 #endif
830 *entry = ehdr.e_entry;
831 printf("parsedomainelfimage: entry point = %p\n",*entry);
833 return 0;
834 }
837 void alloc_dom0(void)
838 {
839 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
840 if (platform_is_hp_ski()) {
841 dom0_size = 128*1024*1024; //FIXME: Should be configurable
842 }
843 printf("alloc_dom0: starting (initializing %d MB...)\n",dom0_size/(1024*1024));
845 /* FIXME: The first trunk (say 256M) should always be assigned to
846 * Dom0, since Dom0's physical == machine address for DMA purpose.
847 * Some old version linux, like 2.4, assumes physical memory existing
848 * in 2nd 64M space.
849 */
850 dom0_start = alloc_boot_pages(
851 dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT);
852 dom0_start <<= PAGE_SHIFT;
853 if (!dom0_start) {
854 printf("construct_dom0: can't allocate contiguous memory size=%p\n",
855 dom0_size);
856 while(1);
857 }
858 printf("alloc_dom0: dom0_start=%p\n",dom0_start);
859 #else
860 dom0_start = 0;
861 #endif
863 }
865 #ifdef DOMU_BUILD_STAGING
866 void alloc_domU_staging(void)
867 {
868 domU_staging_size = 32*1024*1024; //FIXME: Should be configurable
869 printf("alloc_domU_staging: starting (initializing %d MB...)\n",domU_staging_size/(1024*1024));
870 domU_staging_start = alloc_boot_pages(
871 domU_staging_size >> PAGE_SHIFT, domU_staging_align >> PAGE_SHIFT);
872 domU_staging_start <<= PAGE_SHIFT;
873 if (!domU_staging_size) {
874 printf("alloc_domU_staging: can't allocate, spinning...\n");
875 while(1);
876 }
877 else domU_staging_area = (unsigned long *)__va(domU_staging_start);
878 printf("alloc_domU_staging: domU_staging_area=%p\n",domU_staging_area);
880 }
882 unsigned long
883 domU_staging_read_8(unsigned long at)
884 {
885 // no way to return errors so just do it
886 return domU_staging_area[at>>3];
888 }
890 unsigned long
891 domU_staging_write_32(unsigned long at, unsigned long a, unsigned long b,
892 unsigned long c, unsigned long d)
893 {
894 if (at + 32 > domU_staging_size) return -1;
895 if (at & 0x1f) return -1;
896 at >>= 3;
897 domU_staging_area[at++] = a;
898 domU_staging_area[at++] = b;
899 domU_staging_area[at++] = c;
900 domU_staging_area[at] = d;
901 return 0;
903 }
904 #endif
906 #ifdef CONFIG_VTI
907 /* Up to whether domain is vmx one, different context may be setup
908 * here.
909 */
910 void
911 post_arch_do_create_domain(struct vcpu *v, int vmx_domain)
912 {
913 struct domain *d = v->domain;
915 if (!vmx_domain) {
916 d->shared_info = (void*)alloc_xenheap_page();
917 if (!d->shared_info)
918 panic("Allocate share info for non-vmx domain failed.\n");
919 d->shared_info_va = 0xfffd000000000000;
921 printk("Build shared info for non-vmx domain\n");
922 build_shared_info(d);
923 /* Setup start info area */
924 }
925 }
927 /* For VMX domain, this is invoked when kernel model in domain
928 * request actively
929 */
930 void build_shared_info(struct domain *d)
931 {
932 int i;
934 /* Set up shared-info area. */
935 update_dom_time(d);
937 /* Mask all upcalls... */
938 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
939 d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
941 /* ... */
942 }
944 /*
945 * Domain 0 has direct access to all devices absolutely. However
946 * the major point of this stub here, is to allow alloc_dom_mem
947 * handled with order > 0 request. Dom0 requires that bit set to
948 * allocate memory for other domains.
949 */
950 void physdev_init_dom0(struct domain *d)
951 {
952 set_bit(_DOMF_physdev_access, &d->domain_flags);
953 }
955 extern unsigned long running_on_sim;
956 unsigned int vmx_dom0 = 0;
957 int construct_dom0(struct domain *d,
958 unsigned long image_start, unsigned long image_len,
959 unsigned long initrd_start, unsigned long initrd_len,
960 char *cmdline)
961 {
962 char *dst;
963 int i, rc;
964 unsigned long pfn, mfn;
965 unsigned long nr_pt_pages;
966 unsigned long count;
967 unsigned long alloc_start, alloc_end;
968 struct pfn_info *page = NULL;
969 start_info_t *si;
970 struct vcpu *v = d->vcpu[0];
971 struct domain_setup_info dsi;
972 unsigned long p_start;
973 unsigned long pkern_start;
974 unsigned long pkern_entry;
975 unsigned long pkern_end;
976 unsigned long ret;
977 unsigned long progress = 0;
979 //printf("construct_dom0: starting\n");
980 /* Sanity! */
981 #ifndef CLONE_DOMAIN0
982 if ( d != dom0 )
983 BUG();
984 if ( test_bit(_DOMF_constructed, &d->domain_flags) )
985 BUG();
986 #endif
988 printk("##Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d);
989 memset(&dsi, 0, sizeof(struct domain_setup_info));
991 printk("*** LOADING DOMAIN 0 ***\n");
993 alloc_start = dom0_start;
994 alloc_end = dom0_start + dom0_size;
995 d->tot_pages = d->max_pages = (alloc_end - alloc_start)/PAGE_SIZE;
996 image_start = __va(ia64_boot_param->initrd_start);
997 image_len = ia64_boot_param->initrd_size;
999 dsi.image_addr = (unsigned long)image_start;
1000 dsi.image_len = image_len;
1001 rc = parseelfimage(&dsi);
1002 if ( rc != 0 )
1003 return rc;
1005 /* Temp workaround */
1006 if (running_on_sim)
1007 dsi.xen_section_string = (char *)1;
1009 if ((!vmx_enabled) && !dsi.xen_section_string) {
1010 printk("Lack of hardware support for unmodified vmx dom0\n");
1011 panic("");
1014 if (vmx_enabled && !dsi.xen_section_string) {
1015 printk("Dom0 is vmx domain!\n");
1016 vmx_dom0 = 1;
1019 p_start = dsi.v_start;
1020 pkern_start = dsi.v_kernstart;
1021 pkern_end = dsi.v_kernend;
1022 pkern_entry = dsi.v_kernentry;
1024 printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",
1025 p_start,pkern_start,pkern_end,pkern_entry);
1027 if ( (p_start & (PAGE_SIZE-1)) != 0 )
1029 printk("Initial guest OS must load to a page boundary.\n");
1030 return -EINVAL;
1033 printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
1034 " Kernel image: %lx->%lx\n"
1035 " Entry address: %lx\n"
1036 " Init. ramdisk: (NOT IMPLEMENTED YET)\n",
1037 pkern_start, pkern_end, pkern_entry);
1039 if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) )
1041 printk("Initial guest OS requires too much space\n"
1042 "(%luMB is greater than %luMB limit)\n",
1043 (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20);
1044 return -ENOMEM;
1047 // Other sanity check about Dom0 image
1049 /* Construct a frame-allocation list for the initial domain, since these
1050 * pages are allocated by boot allocator and pfns are not set properly
1051 */
1052 for ( mfn = (alloc_start>>PAGE_SHIFT);
1053 mfn < (alloc_end>>PAGE_SHIFT);
1054 mfn++ )
1056 page = &frame_table[mfn];
1057 page_set_owner(page, d);
1058 page->u.inuse.type_info = 0;
1059 page->count_info = PGC_allocated | 1;
1060 list_add_tail(&page->list, &d->page_list);
1062 /* Construct 1:1 mapping */
1063 machine_to_phys_mapping[mfn] = mfn;
1066 post_arch_do_create_domain(v, vmx_dom0);
1068 /* Load Dom0 image to its own memory */
1069 loaddomainelfimage(d,image_start);
1071 /* Copy the initial ramdisk. */
1073 /* Sync d/i cache conservatively */
1074 ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
1075 if (ret != PAL_STATUS_SUCCESS)
1076 panic("PAL CACHE FLUSH failed for dom0.\n");
1077 printk("Sync i/d cache for dom0 image SUCC\n");
1079 /* Physical mode emulation initialization, including
1080 * emulation ID allcation and related memory request
1081 */
1082 physical_mode_init(v);
1083 /* Dom0's pfn is equal to mfn, so there's no need to allocate pmt
1084 * for dom0
1085 */
1086 d->arch.pmt = NULL;
1088 /* Give up the VGA console if DOM0 is configured to grab it. */
1089 if (cmdline != NULL)
1090 console_endboot(strstr(cmdline, "tty0") != NULL);
1092 /* VMX specific construction for Dom0, if hardware supports VMX
1093 * and Dom0 is unmodified image
1094 */
1095 printk("Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d);
1096 if (vmx_dom0)
1097 vmx_final_setup_domain(dom0);
1099 /* vpd is ready now */
1100 vlsapic_reset(v);
1101 vtm_init(v);
1103 set_bit(_DOMF_constructed, &d->domain_flags);
1104 new_thread(v, pkern_entry, 0, 0);
1106 physdev_init_dom0(d);
1107 // FIXME: Hack for keyboard input
1108 #ifdef CLONE_DOMAIN0
1109 if (d == dom0)
1110 #endif
1111 serial_input_init();
1112 if (d == dom0) {
1113 VCPU(v, delivery_mask[0]) = -1L;
1114 VCPU(v, delivery_mask[1]) = -1L;
1115 VCPU(v, delivery_mask[2]) = -1L;
1116 VCPU(v, delivery_mask[3]) = -1L;
1118 else __set_bit(0x30,VCPU(v, delivery_mask));
1120 return 0;
1124 #else //CONFIG_VTI
1126 int construct_dom0(struct domain *d,
1127 unsigned long image_start, unsigned long image_len,
1128 unsigned long initrd_start, unsigned long initrd_len,
1129 char *cmdline)
1131 char *dst;
1132 int i, rc;
1133 unsigned long pfn, mfn;
1134 unsigned long nr_pt_pages;
1135 unsigned long count;
1136 //l2_pgentry_t *l2tab, *l2start;
1137 //l1_pgentry_t *l1tab = NULL, *l1start = NULL;
1138 struct pfn_info *page = NULL;
1139 start_info_t *si;
1140 struct vcpu *v = d->vcpu[0];
1142 struct domain_setup_info dsi;
1143 unsigned long p_start;
1144 unsigned long pkern_start;
1145 unsigned long pkern_entry;
1146 unsigned long pkern_end;
1148 //printf("construct_dom0: starting\n");
1149 /* Sanity! */
1150 #ifndef CLONE_DOMAIN0
1151 if ( d != dom0 )
1152 BUG();
1153 if ( test_bit(_DOMF_constructed, &d->domain_flags) )
1154 BUG();
1155 #endif
1157 memset(&dsi, 0, sizeof(struct domain_setup_info));
1159 printk("*** LOADING DOMAIN 0 ***\n");
1161 d->max_pages = dom0_size/PAGE_SIZE;
1162 image_start = __va(ia64_boot_param->initrd_start);
1163 image_len = ia64_boot_param->initrd_size;
1164 //printk("image_start=%lx, image_len=%lx\n",image_start,image_len);
1165 //printk("First word of image: %lx\n",*(unsigned long *)image_start);
1167 //printf("construct_dom0: about to call parseelfimage\n");
1168 dsi.image_addr = (unsigned long)image_start;
1169 dsi.image_len = image_len;
1170 rc = parseelfimage(&dsi);
1171 if ( rc != 0 )
1172 return rc;
1174 p_start = dsi.v_start;
1175 pkern_start = dsi.v_kernstart;
1176 pkern_end = dsi.v_kernend;
1177 pkern_entry = dsi.v_kernentry;
1179 //printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry);
1181 if ( (p_start & (PAGE_SIZE-1)) != 0 )
1183 printk("Initial guest OS must load to a page boundary.\n");
1184 return -EINVAL;
1187 printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
1188 " Kernel image: %lx->%lx\n"
1189 " Entry address: %lx\n"
1190 " Init. ramdisk: (NOT IMPLEMENTED YET)\n",
1191 pkern_start, pkern_end, pkern_entry);
1193 if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) )
1195 printk("Initial guest OS requires too much space\n"
1196 "(%luMB is greater than %luMB limit)\n",
1197 (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20);
1198 return -ENOMEM;
1201 // if high 3 bits of pkern start are non-zero, error
1203 // if pkern end is after end of metaphysical memory, error
1204 // (we should be able to deal with this... later)
1207 //
1209 #if 0
1210 strcpy(d->name,"Domain0");
1211 #endif
1213 /* Mask all upcalls... */
1214 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
1215 d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
1217 /* Copy the OS image. */
1218 //(void)loadelfimage(image_start);
1219 loaddomainelfimage(d,image_start);
1221 /* Copy the initial ramdisk. */
1222 //if ( initrd_len != 0 )
1223 // memcpy((void *)vinitrd_start, initrd_start, initrd_len);
1225 #if 0
1226 /* Set up start info area. */
1227 //si = (start_info_t *)vstartinfo_start;
1228 memset(si, 0, PAGE_SIZE);
1229 si->nr_pages = d->tot_pages;
1230 si->shared_info = virt_to_phys(d->shared_info);
1231 si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
1232 //si->pt_base = vpt_start;
1233 //si->nr_pt_frames = nr_pt_pages;
1234 //si->mfn_list = vphysmap_start;
1236 if ( initrd_len != 0 )
1238 //si->mod_start = vinitrd_start;
1239 si->mod_len = initrd_len;
1240 printk("Initrd len 0x%lx, start at 0x%08lx\n",
1241 si->mod_len, si->mod_start);
1244 dst = si->cmd_line;
1245 if ( cmdline != NULL )
1247 for ( i = 0; i < 255; i++ )
1249 if ( cmdline[i] == '\0' )
1250 break;
1251 *dst++ = cmdline[i];
1254 *dst = '\0';
1256 zap_low_mappings(); /* Do the same for the idle page tables. */
1257 #endif
1259 /* Give up the VGA console if DOM0 is configured to grab it. */
1260 #ifdef IA64
1261 if (cmdline != NULL)
1262 #endif
1263 console_endboot(strstr(cmdline, "tty0") != NULL);
1265 set_bit(_DOMF_constructed, &d->domain_flags);
1267 new_thread(v, pkern_entry, 0, 0);
1268 // FIXME: Hack for keyboard input
1269 #ifdef CLONE_DOMAIN0
1270 if (d == dom0)
1271 #endif
1272 serial_input_init();
1273 if (d == dom0) {
1274 VCPU(v, delivery_mask[0]) = -1L;
1275 VCPU(v, delivery_mask[1]) = -1L;
1276 VCPU(v, delivery_mask[2]) = -1L;
1277 VCPU(v, delivery_mask[3]) = -1L;
1279 else __set_bit(0x30, VCPU(v, delivery_mask));
1281 return 0;
1283 #endif // CONFIG_VTI
1285 // FIXME: When dom0 can construct domains, this goes away (or is rewritten)
1286 int construct_domU(struct domain *d,
1287 unsigned long image_start, unsigned long image_len,
1288 unsigned long initrd_start, unsigned long initrd_len,
1289 char *cmdline)
1291 int i, rc;
1292 struct vcpu *v = d->vcpu[0];
1293 unsigned long pkern_entry;
1295 #ifndef DOMU_AUTO_RESTART
1296 if ( test_bit(_DOMF_constructed, &d->domain_flags) ) BUG();
1297 #endif
1299 printk("*** LOADING DOMAIN %d ***\n",d->domain_id);
1301 d->max_pages = dom0_size/PAGE_SIZE; // FIXME: use dom0 size
1302 // FIXME: use domain0 command line
1303 rc = parsedomainelfimage(image_start, image_len, &pkern_entry);
1304 printk("parsedomainelfimage returns %d\n",rc);
1305 if ( rc != 0 ) return rc;
1307 /* Mask all upcalls... */
1308 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
1309 d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
1311 /* Copy the OS image. */
1312 printk("calling loaddomainelfimage(%p,%p)\n",d,image_start);
1313 loaddomainelfimage(d,image_start);
1314 printk("loaddomainelfimage returns\n");
1316 set_bit(_DOMF_constructed, &d->domain_flags);
1318 printk("calling new_thread, entry=%p\n",pkern_entry);
1319 #ifdef DOMU_AUTO_RESTART
1320 v->domain->arch.image_start = image_start;
1321 v->domain->arch.image_len = image_len;
1322 v->domain->arch.entry = pkern_entry;
1323 #endif
1324 new_thread(v, pkern_entry, 0, 0);
1325 printk("new_thread returns\n");
1326 __set_bit(0x30, VCPU(v, delivery_mask));
1328 return 0;
1331 #ifdef DOMU_AUTO_RESTART
1332 void reconstruct_domU(struct vcpu *v)
1334 /* re-copy the OS image to reset data values to original */
1335 printk("reconstruct_domU: restarting domain %d...\n",
1336 v->domain->domain_id);
1337 loaddomainelfimage(v->domain,v->domain->arch.image_start);
1338 new_thread(v, v->domain->arch.entry, 0, 0);
1340 #endif
1342 // FIXME: When dom0 can construct domains, this goes away (or is rewritten)
1343 int launch_domainU(unsigned long size)
1345 #ifdef CLONE_DOMAIN0
1346 static int next = CLONE_DOMAIN0+1;
1347 #else
1348 static int next = 1;
1349 #endif
1351 struct domain *d = do_createdomain(next,0);
1352 if (!d) {
1353 printf("launch_domainU: couldn't create\n");
1354 return 1;
1356 else next++;
1357 if (construct_domU(d, (unsigned long)domU_staging_area, size,0,0,0)) {
1358 printf("launch_domainU: couldn't construct(id=%d,%lx,%lx)\n",
1359 d->domain_id,domU_staging_area,size);
1360 return 2;
1362 domain_unpause_by_systemcontroller(d);
1365 void machine_restart(char * __unused)
1367 if (platform_is_hp_ski()) dummy();
1368 printf("machine_restart called: spinning....\n");
1369 while(1);
1372 void machine_halt(void)
1374 if (platform_is_hp_ski()) dummy();
1375 printf("machine_halt called: spinning....\n");
1376 while(1);
1379 void dummy_called(char *function)
1381 if (platform_is_hp_ski()) asm("break 0;;");
1382 printf("dummy called in %s: spinning....\n", function);
1383 while(1);
1387 #if 0
1388 void switch_to(struct vcpu *prev, struct vcpu *next)
1390 struct vcpu *last;
1392 __switch_to(prev,next,last);
1393 //set_current(next);
1395 #endif
1397 void domain_pend_keyboard_interrupt(int irq)
1399 vcpu_pend_interrupt(dom0->vcpu[0],irq);
1402 void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
1404 if ( v->processor == newcpu )
1405 return;
1407 set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
1408 v->processor = newcpu;