ia64/xen-unstable

view xen-2.4.16/common/domain.c @ 119:134b05519a54

bitkeeper revision 1.22.1.6 (3e3fe07f2qF_tQ4Ixhocx0G38HxwOg)

domain.c:
We now properly deallocate domain memory during teardown.
author kaf24@labyrinth.cl.cam.ac.uk
date Tue Feb 04 15:47:11 2003 +0000 (2003-02-04)
parents 73643659824d
children 658b3aeca0e5
line source
1 #include <xeno/config.h>
2 #include <xeno/init.h>
3 #include <xeno/lib.h>
4 #include <xeno/errno.h>
5 #include <xeno/sched.h>
6 #include <xeno/mm.h>
7 #include <xeno/skbuff.h>
8 #include <xeno/interrupt.h>
9 #include <xeno/delay.h>
10 #include <xeno/event.h>
11 #include <xeno/dom0_ops.h>
12 #include <asm/io.h>
13 #include <asm/domain_page.h>
14 #include <asm/msr.h>
15 #include <xeno/multiboot.h>
17 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
18 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
20 extern int do_process_page_updates_bh(page_update_request_t *, int);
22 extern int nr_mods;
23 extern module_t *mod;
24 extern unsigned char *cmdline;
26 rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;
28 schedule_data_t schedule_data[NR_CPUS];
30 int wake_up(struct task_struct *p)
31 {
32 unsigned long flags;
33 int ret = 0;
34 spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
35 if ( __task_on_runqueue(p) ) goto out;
36 p->state = TASK_RUNNING;
37 __add_to_runqueue(p);
38 ret = 1;
40 out:
41 spin_unlock_irqrestore(&schedule_data[p->processor].lock, flags);
42 return ret;
43 }
46 struct task_struct *do_newdomain(void)
47 {
48 int retval;
49 struct task_struct *p = NULL;
50 unsigned long flags;
52 retval = -ENOMEM;
53 p = alloc_task_struct();
54 if (!p) goto newdomain_out;
55 memset(p, 0, sizeof(*p));
56 p->shared_info = (void *)get_free_page(GFP_KERNEL);
57 memset(p->shared_info, 0, PAGE_SIZE);
59 SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
60 SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS);
62 p->addr_limit = USER_DS;
63 p->state = TASK_UNINTERRUPTIBLE;
64 p->active_mm = &p->mm;
65 p->num_net_vifs = 0;
67 /*
68 * KAF: Passing in newdomain struct to this function is gross!
69 * Therefore, for now we just allocate the single blk_ring
70 * before the multiople net_rings :-)
71 */
72 p->blk_ring_base = (blk_ring_t *)(p->shared_info + 1);
73 p->net_ring_base = (net_ring_t *)(p->blk_ring_base + 1);
74 p->pg_head = p->tot_pages = 0;
75 write_lock_irqsave(&tasklist_lock, flags);
76 SET_LINKS(p);
77 write_unlock_irqrestore(&tasklist_lock, flags);
79 newdomain_out:
80 return(p);
81 }
84 void reschedule(struct task_struct *p)
85 {
86 int cpu = p->processor;
87 struct task_struct *curr;
88 unsigned long flags;
90 if ( p->has_cpu ) return;
92 spin_lock_irqsave(&schedule_data[cpu].lock, flags);
93 curr = schedule_data[cpu].curr;
94 if ( is_idle_task(curr) )
95 {
96 set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
97 spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
98 #ifdef CONFIG_SMP
99 if ( cpu != smp_processor_id() ) smp_send_event_check_cpu(cpu);
100 #endif
101 }
102 else
103 {
104 spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
105 }
106 }
109 static void process_timeout(unsigned long __data)
110 {
111 struct task_struct * p = (struct task_struct *) __data;
112 wake_up(p);
113 }
115 long schedule_timeout(long timeout)
116 {
117 struct timer_list timer;
118 unsigned long expire;
120 switch (timeout)
121 {
122 case MAX_SCHEDULE_TIMEOUT:
123 /*
124 * These two special cases are useful to be comfortable in the caller.
125 * Nothing more. We could take MAX_SCHEDULE_TIMEOUT from one of the
126 * negative value but I' d like to return a valid offset (>=0) to allow
127 * the caller to do everything it want with the retval.
128 */
129 schedule();
130 goto out;
131 default:
132 /*
133 * Another bit of PARANOID. Note that the retval will be 0 since no
134 * piece of kernel is supposed to do a check for a negative retval of
135 * schedule_timeout() (since it should never happens anyway). You just
136 * have the printk() that will tell you if something is gone wrong and
137 * where.
138 */
139 if (timeout < 0)
140 {
141 printk(KERN_ERR "schedule_timeout: wrong timeout "
142 "value %lx from %p\n", timeout,
143 __builtin_return_address(0));
144 current->state = TASK_RUNNING;
145 goto out;
146 }
147 }
149 expire = timeout + jiffies;
151 init_timer(&timer);
152 timer.expires = expire;
153 timer.data = (unsigned long) current;
154 timer.function = process_timeout;
156 add_timer(&timer);
157 schedule();
158 del_timer_sync(&timer);
160 timeout = expire - jiffies;
162 out:
163 return timeout < 0 ? 0 : timeout;
164 }
167 long do_yield(void)
168 {
169 current->state = TASK_INTERRUPTIBLE;
170 schedule();
171 return 0;
172 }
174 /* Get a pointer to the specified domain. Consider replacing this
175 * with a hash lookup later.
176 *
177 * Also, kill_other_domain should call this instead of scanning on its own.
178 */
179 struct task_struct *find_domain_by_id(unsigned int dom)
180 {
181 struct task_struct *p = &idle0_task;
183 read_lock_irq(&tasklist_lock);
184 do {
185 if ( (p->domain == dom) ) {
186 read_unlock_irq(&tasklist_lock);
187 return (p);
188 }
189 } while ( (p = p->next_task) != &idle0_task );
190 read_unlock_irq(&tasklist_lock);
192 return 0;
193 }
196 void kill_domain_with_errmsg(const char *err)
197 {
198 printk("DOM%d FATAL ERROR: %s\n",
199 current->domain, err);
200 kill_domain();
201 }
204 /* Kill the currently executing domain. */
205 void kill_domain(void)
206 {
207 if ( current->domain == 0 )
208 {
209 extern void machine_restart(char *);
210 printk("Domain 0 killed: rebooting machine!\n");
211 machine_restart(0);
212 }
214 printk("Killing domain %d\n", current->domain);
215 current->state = TASK_DYING;
216 schedule();
217 BUG(); /* never get here */
218 }
221 long kill_other_domain(unsigned int dom)
222 {
223 struct task_struct *p = &idle0_task;
224 unsigned long cpu_mask = 0;
225 long ret = -ESRCH;
227 read_lock_irq(&tasklist_lock);
228 do {
229 if ( p->domain == dom )
230 {
231 cpu_mask = mark_guest_event(p, _EVENT_DIE);
232 ret = 0;
233 break;
234 }
235 }
236 while ( (p = p->next_task) != &idle0_task );
237 read_unlock_irq(&tasklist_lock);
239 hyp_event_notify(cpu_mask);
241 return ret;
242 }
245 /* Release resources belonging to task @p. */
246 void release_task(struct task_struct *p)
247 {
248 ASSERT(!__task_on_runqueue(p));
249 ASSERT(p->state == TASK_DYING);
250 ASSERT(!p->has_cpu);
251 write_lock_irq(&tasklist_lock);
252 REMOVE_LINKS(p);
253 write_unlock_irq(&tasklist_lock);
255 /*
256 * Safe! Only queue skbuffs with tasklist_lock held.
257 * Only access shared_info with tasklist_lock held.
258 * And free_task_struct() only releases if refcnt == 0.
259 */
260 while ( p->num_net_vifs )
261 {
262 destroy_net_vif(p);
263 }
264 if ( p->mm.perdomain_pt ) free_page((unsigned long)p->mm.perdomain_pt);
265 free_page((unsigned long)p->shared_info);
266 if ( p->tot_pages != 0 )
267 {
268 /* Splice domain's pages into the free list. */
269 struct list_head *first = &frame_table[p->pg_head].list;
270 struct list_head *last = first->prev;
271 free_list.next->prev = last;
272 last->next = free_list.next;
273 free_list.next = first;
274 first->prev = &free_list;
275 free_pfns += p->tot_pages;
276 }
277 free_task_struct(p);
278 }
281 asmlinkage void schedule(void)
282 {
283 struct task_struct *prev, *next;
284 struct list_head *tmp;
285 int this_cpu;
287 need_resched_back:
288 prev = current;
289 this_cpu = prev->processor;
291 spin_lock_irq(&schedule_data[this_cpu].lock);
293 ASSERT(!in_interrupt());
294 ASSERT(__task_on_runqueue(prev));
296 if ( !prev->counter )
297 {
298 prev->counter = 2;
299 __move_last_runqueue(prev);
300 }
302 switch ( prev->state )
303 {
304 case TASK_INTERRUPTIBLE:
305 if ( signal_pending(prev) )
306 {
307 prev->state = TASK_RUNNING;
308 break;
309 }
310 default:
311 __del_from_runqueue(prev);
312 case TASK_RUNNING:;
313 }
314 clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
316 /* Round-robin, skipping idle where possible. */
317 next = NULL;
318 list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) {
319 next = list_entry(tmp, struct task_struct, run_list);
320 if ( next->domain != IDLE_DOMAIN_ID ) break;
321 }
323 prev->has_cpu = 0;
324 next->has_cpu = 1;
326 schedule_data[this_cpu].prev = prev;
327 schedule_data[this_cpu].curr = next;
329 spin_unlock_irq(&schedule_data[this_cpu].lock);
331 if ( unlikely(prev == next) )
332 {
333 /* We won't go through the normal tail, so do this by hand */
334 prev->policy &= ~SCHED_YIELD;
335 goto same_process;
336 }
338 prepare_to_switch();
339 switch_to(prev, next);
340 prev = schedule_data[this_cpu].prev;
342 prev->policy &= ~SCHED_YIELD;
343 if ( prev->state == TASK_DYING ) release_task(prev);
345 same_process:
346 if ( test_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events) )
347 goto need_resched_back;
348 return;
349 }
352 unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes)
353 {
354 struct list_head *temp;
355 struct pfn_info *pf, *pf_head;
356 unsigned int alloc_pfns;
357 unsigned int req_pages;
359 /* how many pages do we need to alloc? */
360 req_pages = kbytes >> (PAGE_SHIFT - 10);
362 /* is there enough mem to serve the request? */
363 if(req_pages > free_pfns)
364 return -1;
366 /* allocate pages and build a thread through frame_table */
367 temp = free_list.next;
369 /* allocate first page */
370 pf = pf_head = list_entry(temp, struct pfn_info, list);
371 pf->flags |= p->domain;
372 temp = temp->next;
373 list_del(&pf->list);
374 INIT_LIST_HEAD(&pf->list);
375 p->pg_head = pf - frame_table;
376 pf->type_count = pf->tot_count = 0;
377 free_pfns--;
379 /* allocate the rest */
380 for ( alloc_pfns = req_pages - 1; alloc_pfns; alloc_pfns-- )
381 {
382 pf = list_entry(temp, struct pfn_info, list);
383 pf->flags |= p->domain;
384 temp = temp->next;
385 list_del(&pf->list);
387 list_add_tail(&pf->list, &pf_head->list);
388 pf->type_count = pf->tot_count = 0;
390 free_pfns--;
391 }
393 p->tot_pages = req_pages;
395 return 0;
396 }
398 /* final_setup_guestos is used for final setup and launching of domains other
399 * than domain 0. ie. the domains that are being built by the userspace dom0
400 * domain builder.
401 *
402 * Initial load map:
403 * start_address:
404 * OS image
405 * ....
406 * stack_start:
407 * start_info:
408 * <one page>
409 * page tables:
410 * <enough pages>
411 * end_address:
412 * shared_info:
413 * <one page>
414 */
416 int final_setup_guestos(struct task_struct * p, dom_meminfo_t * meminfo)
417 {
418 struct list_head *list_ent;
419 l2_pgentry_t * l2tab;
420 l1_pgentry_t * l1tab;
421 start_info_t * virt_startinfo_addr;
422 unsigned long virt_stack_addr;
423 unsigned long long time;
424 unsigned long phys_l2tab;
425 page_update_request_t * pgt_updates;
426 unsigned long curr_update_phys;
427 unsigned long count;
428 net_ring_t *net_ring;
429 net_vif_t *net_vif;
430 char *dst; // temporary
431 int i; // temporary
433 /* first of all, set up domain pagetables */
434 pgt_updates = (page_update_request_t *)
435 map_domain_mem(meminfo->pgt_update_arr);
436 curr_update_phys = meminfo->pgt_update_arr;
437 for(count = 0; count < meminfo->num_pgt_updates; count++){
438 do_process_page_updates_bh(pgt_updates, 1);
439 pgt_updates++;
440 if(!((unsigned long)pgt_updates & (PAGE_SIZE-1))){
441 unmap_domain_mem(pgt_updates-1);
442 list_ent = frame_table[curr_update_phys >> PAGE_SHIFT].list.next;
443 curr_update_phys = list_entry(list_ent, struct pfn_info, list) -
444 frame_table;
445 curr_update_phys <<= PAGE_SHIFT;
446 pgt_updates = map_domain_mem(curr_update_phys);
447 }
448 }
449 unmap_domain_mem((void *)((unsigned long)(pgt_updates-1) & PAGE_MASK));
451 /* entries 0xe0000000 onwards in page table must contain hypervisor
452 * mem mappings - set them up.
453 */
454 phys_l2tab = meminfo->l2_pgt_addr;
455 l2tab = map_domain_mem(phys_l2tab);
456 memcpy(l2tab + DOMAIN_ENTRIES_PER_L2_PAGETABLE,
457 ((l2_pgentry_t *)idle_pg_table[p->processor]) +
458 DOMAIN_ENTRIES_PER_L2_PAGETABLE,
459 (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE)
460 * sizeof(l2_pgentry_t));
461 l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
462 mk_l2_pgentry(__pa(p->mm.perdomain_pt) | PAGE_HYPERVISOR);
463 p->mm.pagetable = mk_pagetable(phys_l2tab);
464 unmap_domain_mem(l2tab);
466 /* map in the shared info structure */
467 phys_l2tab = pagetable_val(p->mm.pagetable);
468 l2tab = map_domain_mem(phys_l2tab);
469 l2tab += l2_table_offset(meminfo->virt_shinfo_addr);
470 l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
471 l1tab += l1_table_offset(meminfo->virt_shinfo_addr);
472 *l1tab = mk_l1_pgentry(__pa(p->shared_info) | L1_PROT);
473 unmap_domain_mem((void *)((unsigned long)l2tab & PAGE_MASK));
474 unmap_domain_mem((void *)((unsigned long)l1tab & PAGE_MASK));
476 /* set up the shared info structure */
477 rdtscll(time);
478 p->shared_info->wall_time = time;
479 p->shared_info->domain_time = time;
480 p->shared_info->ticks_per_ms = ticks_per_usec * 1000;
482 /* we pass start info struct to guest os as function parameter on stack */
483 virt_startinfo_addr = (start_info_t *)meminfo->virt_startinfo_addr;
484 virt_stack_addr = (unsigned long)virt_startinfo_addr;
486 /* we need to populate start_info struct within the context of the
487 * new domain. thus, temporarely install its pagetables.
488 */
489 __cli();
490 __asm__ __volatile__ (
491 "mov %%eax,%%cr3" : : "a" (pagetable_val(p->mm.pagetable)));
493 memset(virt_startinfo_addr, 0, sizeof(virt_startinfo_addr));
494 virt_startinfo_addr->nr_pages = p->tot_pages;
495 virt_startinfo_addr->shared_info = (shared_info_t *)meminfo->virt_shinfo_addr;
496 virt_startinfo_addr->pt_base = meminfo->virt_load_addr +
497 ((p->tot_pages - 1) << PAGE_SHIFT);
499 /* Add virtual network interfaces and point to them in startinfo. */
500 while (meminfo->num_vifs-- > 0) {
501 net_vif = create_net_vif(p->domain);
502 net_ring = net_vif->net_ring;
503 if (!net_ring) panic("no network ring!\n");
504 }
506 /* XXX SMH: horrible hack to convert hypervisor VAs in SHIP to guest VAs */
507 #define SH2G(_x) (meminfo->virt_shinfo_addr | (((unsigned long)(_x)) & 0xFFF))
509 virt_startinfo_addr->net_rings = (net_ring_t *)SH2G(p->net_ring_base);
510 virt_startinfo_addr->num_net_rings = p->num_net_vifs;
512 /* Add block io interface */
513 virt_startinfo_addr->blk_ring = (blk_ring_t *)SH2G(p->blk_ring_base);
515 dst = virt_startinfo_addr->cmd_line;
516 if ( mod[0].string )
517 {
518 char *modline = (char *)__va(mod[0].string);
519 for ( i = 0; i < 255; i++ )
520 {
521 if ( modline[i] == '\0' ) break;
522 *dst++ = modline[i];
523 }
524 }
525 *dst = '\0';
527 if ( opt_nfsroot )
528 {
529 unsigned char boot[150];
530 unsigned char ipbase[20], nfsserv[20], gateway[20], netmask[20];
531 unsigned char nfsroot[70];
532 snprintf(nfsroot, 70, opt_nfsroot, p->domain);
533 snprintf(boot, 200,
534 " root=/dev/nfs ip=%s:%s:%s:%s::eth0:off nfsroot=%s",
535 quad_to_str(opt_ipbase + p->domain, ipbase),
536 quad_to_str(opt_nfsserv, nfsserv),
537 quad_to_str(opt_gateway, gateway),
538 quad_to_str(opt_netmask, netmask),
539 nfsroot);
540 strcpy(dst, boot);
541 }
543 /* Reinstate the caller's page tables. */
544 __asm__ __volatile__ (
545 "mov %%eax,%%cr3" : : "a" (pagetable_val(current->mm.pagetable)));
546 __sti();
548 new_thread(p,
549 (unsigned long)meminfo->virt_load_addr,
550 (unsigned long)virt_stack_addr,
551 (unsigned long)virt_startinfo_addr);
553 return 0;
554 }
556 static unsigned long alloc_page_from_domain(unsigned long * cur_addr,
557 unsigned long * index)
558 {
559 struct list_head *ent = frame_table[*cur_addr >> PAGE_SHIFT].list.prev;
560 *cur_addr = list_entry(ent, struct pfn_info, list) - frame_table;
561 *cur_addr <<= PAGE_SHIFT;
562 (*index)--;
563 return *cur_addr;
564 }
566 /* setup_guestos is used for building dom0 solely. other domains are built in
567 * userspace dom0 and final setup is being done by final_setup_guestos.
568 */
569 int setup_guestos(struct task_struct *p, dom0_newdomain_t *params)
570 {
571 struct list_head *list_ent;
572 char *src, *dst;
573 int i, dom = p->domain;
574 unsigned long phys_l1tab, phys_l2tab;
575 unsigned long cur_address, alloc_address;
576 unsigned long virt_load_address, virt_stack_address, virt_shinfo_address;
577 start_info_t *virt_startinfo_address;
578 unsigned long long time;
579 unsigned long count;
580 unsigned long alloc_index;
581 l2_pgentry_t *l2tab, *l2start;
582 l1_pgentry_t *l1tab = NULL, *l1start = NULL;
583 struct pfn_info *page = NULL;
584 net_ring_t *net_ring;
585 net_vif_t *net_vif;
587 /* Sanity! */
588 if ( p->domain != 0 ) BUG();
590 if ( strncmp(__va(mod[0].mod_start), "XenoGues", 8) )
591 {
592 printk("DOM%d: Invalid guest OS image\n", dom);
593 return -1;
594 }
596 virt_load_address = *(unsigned long *)__va(mod[0].mod_start + 8);
597 if ( (virt_load_address & (PAGE_SIZE-1)) )
598 {
599 printk("DOM%d: Guest OS load address not page-aligned (%08lx)\n",
600 dom, virt_load_address);
601 return -1;
602 }
604 if ( alloc_new_dom_mem(p, params->memory_kb) ) return -ENOMEM;
605 alloc_address = p->pg_head << PAGE_SHIFT;
606 alloc_index = p->tot_pages;
608 if ( (mod[nr_mods-1].mod_end-mod[0].mod_start) >
609 (params->memory_kb << 9) )
610 {
611 printk("DOM%d: Guest OS image is too large\n"
612 " (%luMB is greater than %uMB limit for a\n"
613 " %uMB address space)\n",
614 dom, (mod[nr_mods-1].mod_end-mod[0].mod_start)>>20,
615 (params->memory_kb)>>11,
616 (params->memory_kb)>>10);
617 /* XXX should free domain memory here XXX */
618 return -1;
619 }
621 printk("DOM%d: Guest OS virtual load address is %08lx\n", dom,
622 virt_load_address);
624 /*
625 * WARNING: The new domain must have its 'processor' field
626 * filled in by now !!
627 */
628 phys_l2tab = alloc_page_from_domain(&alloc_address, &alloc_index);
629 l2start = l2tab = map_domain_mem(phys_l2tab);
630 memcpy(l2tab, idle_pg_table[p->processor], PAGE_SIZE);
631 l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
632 mk_l2_pgentry(__pa(p->mm.perdomain_pt) | __PAGE_HYPERVISOR);
633 memset(l2tab, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
634 p->mm.pagetable = mk_pagetable(phys_l2tab);
636 /*
637 * NB. The upper limit on this loop does one extra page. This is to make
638 * sure a pte exists when we want to map the shared_info struct.
639 */
641 l2tab += l2_table_offset(virt_load_address);
642 cur_address = p->pg_head << PAGE_SHIFT;
643 for ( count = 0; count < p->tot_pages + 1; count++ )
644 {
645 if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
646 {
647 if ( l1tab != NULL ) unmap_domain_mem(l1start);
648 phys_l1tab = alloc_page_from_domain(&alloc_address, &alloc_index);
649 *l2tab++ = mk_l2_pgentry(phys_l1tab|L2_PROT);
650 l1start = l1tab = map_domain_mem(phys_l1tab);
651 clear_page(l1tab);
652 l1tab += l1_table_offset(
653 virt_load_address + (count << PAGE_SHIFT));
654 }
655 *l1tab++ = mk_l1_pgentry(cur_address|L1_PROT);
657 if ( count < p->tot_pages )
658 {
659 page = frame_table + (cur_address >> PAGE_SHIFT);
660 page->flags = dom | PGT_writeable_page;
661 page->type_count = page->tot_count = 1;
662 /* Set up the MPT entry. */
663 machine_to_phys_mapping[cur_address >> PAGE_SHIFT] = count;
664 }
666 list_ent = frame_table[cur_address >> PAGE_SHIFT].list.next;
667 cur_address = list_entry(list_ent, struct pfn_info, list) -
668 frame_table;
669 cur_address <<= PAGE_SHIFT;
670 }
671 unmap_domain_mem(l1start);
673 /* pages that are part of page tables must be read only */
674 cur_address = p->pg_head << PAGE_SHIFT;
675 for ( count = 0; count < alloc_index; count++ )
676 {
677 list_ent = frame_table[cur_address >> PAGE_SHIFT].list.next;
678 cur_address = list_entry(list_ent, struct pfn_info, list) -
679 frame_table;
680 cur_address <<= PAGE_SHIFT;
681 }
683 l2tab = l2start + l2_table_offset(virt_load_address +
684 (alloc_index << PAGE_SHIFT));
685 l1start = l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
686 l1tab += l1_table_offset(virt_load_address + (alloc_index << PAGE_SHIFT));
687 l2tab++;
688 for ( count = alloc_index; count < p->tot_pages; count++ )
689 {
690 *l1tab++ = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
691 if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) )
692 {
693 unmap_domain_mem(l1start);
694 l1start = l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
695 l2tab++;
696 }
697 page = frame_table + (cur_address >> PAGE_SHIFT);
698 page->flags = dom | PGT_l1_page_table;
699 page->tot_count++;
701 list_ent = frame_table[cur_address >> PAGE_SHIFT].list.next;
702 cur_address = list_entry(list_ent, struct pfn_info, list) -
703 frame_table;
704 cur_address <<= PAGE_SHIFT;
705 }
706 page->flags = dom | PGT_l2_page_table;
707 unmap_domain_mem(l1start);
709 /* Map in the the shared info structure. */
710 virt_shinfo_address = virt_load_address + (p->tot_pages << PAGE_SHIFT);
711 l2tab = l2start + l2_table_offset(virt_shinfo_address);
712 l1start = l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
713 l1tab += l1_table_offset(virt_shinfo_address);
714 *l1tab = mk_l1_pgentry(__pa(p->shared_info)|L1_PROT);
715 unmap_domain_mem(l1start);
717 /* Set up shared info area. */
718 rdtscll(time);
719 p->shared_info->wall_time = time;
720 p->shared_info->domain_time = time;
721 p->shared_info->ticks_per_ms = ticks_per_usec * 1000;
723 virt_startinfo_address = (start_info_t *)
724 (virt_load_address + ((alloc_index - 1) << PAGE_SHIFT));
725 virt_stack_address = (unsigned long)virt_startinfo_address;
727 unmap_domain_mem(l2start);
729 /* Install the new page tables. */
730 __cli();
731 __asm__ __volatile__ (
732 "mov %%eax,%%cr3" : : "a" (pagetable_val(p->mm.pagetable)));
734 /* Copy the guest OS image. */
735 src = (char *)__va(mod[0].mod_start + 12);
736 dst = (char *)virt_load_address;
737 while ( src < (char *)__va(mod[nr_mods-1].mod_end) ) *dst++ = *src++;
739 /* Set up start info area. */
740 memset(virt_startinfo_address, 0, sizeof(*virt_startinfo_address));
741 virt_startinfo_address->nr_pages = p->tot_pages;
742 virt_startinfo_address->shared_info =
743 (shared_info_t *)virt_shinfo_address;
744 virt_startinfo_address->pt_base = virt_load_address +
745 ((p->tot_pages - 1) << PAGE_SHIFT);
747 /* Add virtual network interfaces and point to them in startinfo. */
748 while (params->num_vifs-- > 0) {
749 net_vif = create_net_vif(dom);
750 net_ring = net_vif->net_ring;
751 if (!net_ring) panic("no network ring!\n");
752 }
754 /* XXX SMH: horrible hack to convert hypervisor VAs in SHIP to guest VAs */
755 #define SHIP2GUEST(_x) (virt_shinfo_address | (((unsigned long)(_x)) & 0xFFF))
757 virt_startinfo_address->net_rings =
758 (net_ring_t *)SHIP2GUEST(p->net_ring_base);
759 virt_startinfo_address->num_net_rings = p->num_net_vifs;
761 /* Add block io interface */
762 virt_startinfo_address->blk_ring =
763 (blk_ring_t *)SHIP2GUEST(p->blk_ring_base);
766 /* We tell OS about any modules we were given. */
767 if ( nr_mods > 1 )
768 {
769 virt_startinfo_address->mod_start =
770 (mod[1].mod_start-mod[0].mod_start-12) + virt_load_address;
771 virt_startinfo_address->mod_len =
772 mod[nr_mods-1].mod_end - mod[1].mod_start;
773 }
775 dst = virt_startinfo_address->cmd_line;
776 if ( mod[0].string )
777 {
778 char *modline = (char *)__va(mod[0].string);
779 for ( i = 0; i < 255; i++ )
780 {
781 if ( modline[i] == '\0' ) break;
782 *dst++ = modline[i];
783 }
784 }
785 *dst = '\0';
787 if ( opt_nfsroot )
788 {
789 unsigned char boot[150];
790 unsigned char ipbase[20], nfsserv[20], gateway[20], netmask[20];
791 unsigned char nfsroot[70];
792 snprintf(nfsroot, 70, opt_nfsroot, dom);
793 snprintf(boot, 200,
794 " root=/dev/nfs ip=%s:%s:%s:%s::eth0:off nfsroot=%s",
795 quad_to_str(opt_ipbase + dom, ipbase),
796 quad_to_str(opt_nfsserv, nfsserv),
797 quad_to_str(opt_gateway, gateway),
798 quad_to_str(opt_netmask, netmask),
799 nfsroot);
800 strcpy(dst, boot);
801 }
803 /* Reinstate the caller's page tables. */
804 __asm__ __volatile__ (
805 "mov %%eax,%%cr3" : : "a" (pagetable_val(current->mm.pagetable)));
806 __sti();
808 new_thread(p,
809 (unsigned long)virt_load_address,
810 (unsigned long)virt_stack_address,
811 (unsigned long)virt_startinfo_address);
813 return 0;
814 }
816 void __init domain_init(void)
817 {
818 int i;
819 for ( i = 0; i < NR_CPUS; i++ )
820 {
821 INIT_LIST_HEAD(&schedule_data[i].runqueue);
822 spin_lock_init(&schedule_data[i].lock);
823 schedule_data[i].prev = &idle0_task;
824 schedule_data[i].curr = &idle0_task;
825 }
826 }