ia64/xen-unstable

view tools/libxc/xc_plan9_build.c @ 4895:24dfd18ea63e

bitkeeper revision 1.1159.258.120 (42848bfe8kMyWWcBA64rq7h7l7AyoA)

Shadow code bug fix (found by Ian) that was breaking refcounts, and subsequently
causing migration problems.
author mafetter@fleming.research
date Fri May 13 11:14:06 2005 +0000 (2005-05-13)
parents 346f2b5e1711
children 0914061e11ca 0dc3b8b8c298
line source
1 /******************************************************************************
2 * xc_plan9_build.c
3 * derived from xc_linux_build.c
4 */
6 #include "xc_private.h"
8 #include <zlib.h>
10 #define DEBUG 1
11 #ifdef DEBUG
12 #define DPRINTF(x) printf x; fflush(stdout);
13 #else
14 #define DPRINTF(x)
15 #endif
17 #include "plan9a.out.h"
19 /* really TOS which means stack starts at 0x2000, and uses page 1*/
20 #define STACKPAGE 2
21 struct Exec header, origheader;
23 typedef struct page {
24 char data[PAGE_SIZE];
25 } PAGE;
28 int
29 memcpy_toguest(int xc_handle, u32 dom, void *v, int size,
30 unsigned long *page_array, unsigned int to_page)
31 {
32 int ret;
33 unsigned char *cp = v;
34 unsigned int whichpage;
35 unsigned char *vaddr;
37 // DPRINTF(("memcpy_to_guest: to_page 0x%x, count %d\n", to_page, size));
38 for (ret = 0, whichpage = to_page; size > 0;
39 whichpage++, size -= PAGE_SIZE, cp += PAGE_SIZE) {
41 // DPRINTF (("map_pfn_writeable(%p, 0x%lx)\n", pm_handle,
42 // page_array[whichpage]));
43 vaddr = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
44 PROT_READ | PROT_WRITE,
45 page_array[whichpage]);
46 // DPRINTF (("vaddr is %p\n", vaddr));
47 if (vaddr == NULL) {
48 ret = -1;
49 ERROR("Couldn't map guest memory");
50 goto out;
51 }
52 // DPRINTF (("copy %p to %p, count 0x%x\n", cp, vaddr, 4096));
53 memcpy(vaddr, cp, 4096);
54 munmap(vaddr, PAGE_SIZE);
55 // DPRINTF (("Did %ud'th pages\n", whichpage));
56 }
57 out:
58 return ret;
59 }
61 /* this is a function which can go away. It dumps a hunk of
62 * guest pages to a file (/tmp/dumpit); handy for debugging
63 * your image builder.
64 * Xen guys, nuke this if you wish.
65 */
66 void
67 dumpit(int xc_handle, u32 dom,
68 int start_page, int tot, unsigned long *page_array)
69 {
70 int i, ofd;
71 unsigned char *vaddr;
73 ofd = open("/tmp/dumpit", O_RDWR);
74 for (i = start_page; i < tot; i++) {
75 vaddr = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
76 PROT_READ | PROT_WRITE,
77 page_array[i]);
78 if (!vaddr) {
79 fprintf(stderr, "Page %d\n", i);
80 perror("shit");
81 read(0, &i, 1);
82 return;
83 }
84 write(ofd, vaddr, 4096);
85 munmap(vaddr, PAGE_SIZE);
86 }
87 }
88 int
89 blah(char *b)
90 {
91 fprintf(stderr, "Error in xc_plan9_build!\n");
92 perror(b);
93 return errno;
94 }
96 /* swap bytes. For plan 9 headers */
97 void
98 swabby(unsigned long *s, char *name)
99 {
100 unsigned long it;
101 it = ((*s & 0xff000000) >> 24) | ((*s & 0xff0000) >> 8) |
102 ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
103 DPRINTF(("Item %s is 0x%lx\n", name, it));
104 *s = it;
105 }
107 void
108 plan9header(Exec * header)
109 {
110 /* header is big-endian */
111 swabby(&header->magic, "magic");
112 swabby(&header->text, "text");
113 swabby(&header->data, "data");
114 swabby(&header->bss, "bss");
115 swabby(&header->syms, "syms");
116 swabby(&header->entry, "entry");
117 swabby(&header->spsz, "spsz");
118 swabby(&header->pcsz, "pcsz");
120 }
122 static int
123 loadp9image(gzFile kernel_gfd, int xc_handle, u32 dom,
124 unsigned long *page_array,
125 unsigned long tot_pages, unsigned long *virt_load_addr,
126 unsigned long *ksize, unsigned long *symtab_addr,
127 unsigned long *symtab_len,
128 unsigned long *first_data_page, unsigned long *pdb_page,
129 const char *cmdline);
131 #define P9ROUND (P9SIZE / 8)
133 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
134 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
136 static long
137 get_tot_pages(int xc_handle, u32 domid)
138 {
139 dom0_op_t op;
140 op.cmd = DOM0_GETDOMAININFO;
141 op.u.getdomaininfo.domain = (domid_t) domid;
142 op.u.getdomaininfo.ctxt = NULL;
143 return (do_dom0_op(xc_handle, &op) < 0) ?
144 -1 : op.u.getdomaininfo.tot_pages;
145 }
147 static int
148 get_pfn_list(int xc_handle,
149 u32 domid, unsigned long *pfn_buf, unsigned long max_pfns)
150 {
151 dom0_op_t op;
152 int ret;
153 op.cmd = DOM0_GETMEMLIST;
154 op.u.getmemlist.domain = (domid_t) domid;
155 op.u.getmemlist.max_pfns = max_pfns;
156 op.u.getmemlist.buffer = pfn_buf;
158 if (mlock(pfn_buf, max_pfns * sizeof (unsigned long)) != 0)
159 return -1;
161 ret = do_dom0_op(xc_handle, &op);
163 (void) munlock(pfn_buf, max_pfns * sizeof (unsigned long));
165 #if 0
166 #ifdef DEBUG
167 DPRINTF(("Ret for get_pfn_list is %d\n", ret));
168 if (ret >= 0) {
169 int i, j;
170 for (i = 0; i < op.u.getmemlist.num_pfns; i += 16) {
171 fprintf(stderr, "0x%x: ", i);
172 for (j = 0; j < 16; j++)
173 fprintf(stderr, "0x%lx ", pfn_buf[i + j]);
174 fprintf(stderr, "\n");
175 }
176 }
177 #endif
178 #endif
179 return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
180 }
182 static int
183 setup_guestos(int xc_handle,
184 u32 dom,
185 gzFile kernel_gfd,
186 unsigned long tot_pages,
187 unsigned long *virt_startinfo_addr,
188 unsigned long *virt_load_addr,
189 full_execution_context_t * ctxt,
190 const char *cmdline,
191 unsigned long shared_info_frame,
192 unsigned int control_evtchn,
193 int flags)
194 {
195 l1_pgentry_t *vl1e = NULL;
196 l2_pgentry_t *vl2tab = NULL, *vl2e = NULL;
197 unsigned long *cpage_array = NULL;
198 unsigned long *pte_array = NULL;
199 unsigned long l2tab;
200 unsigned long l1tab;
201 unsigned long count;
202 unsigned long symtab_addr = 0, symtab_len = 0;
203 start_info_t *start_info;
204 shared_info_t *shared_info;
205 unsigned long ksize;
206 mmu_t *mmu = NULL;
207 int i;
208 unsigned long first_page_after_kernel,
209 first_data_page,
210 page_array_page;
211 unsigned long cpu0pdb, cpu0pte, cpu0ptelast;
212 unsigned long /*last_pfn, */ tot_pte_pages;
214 DPRINTF(("tot pages is %ld\n", tot_pages));
215 if ((cpage_array = malloc(tot_pages * sizeof (unsigned long))) == NULL) {
216 PERROR("Could not allocate cpage array");
217 goto error_out;
218 }
220 if (get_pfn_list(xc_handle, dom, cpage_array, tot_pages) != tot_pages) {
221 PERROR("Could not get the page frame list");
222 goto error_out;
223 }
225 for (i = 0; i < 64; i++)
226 DPRINTF(("First %d page is 0x%lx\n", i, cpage_array[i]));
228 tot_pte_pages = tot_pages >> 10;
229 DPRINTF(("Page range is 0 to 0x%lx, which requires 0x%lx pte pages\n",
230 tot_pte_pages, tot_pte_pages));
232 if (loadp9image(kernel_gfd, xc_handle, dom, cpage_array, tot_pages,
233 virt_load_addr, &ksize, &symtab_addr, &symtab_len,
234 &first_data_page, &first_page_after_kernel, cmdline))
235 goto error_out;
236 DPRINTF(("First data page is 0x%lx\n", first_data_page));
237 DPRINTF(("First page after kernel is 0x%lx\n",
238 first_page_after_kernel));
240 /*
241 NEED TO INCREMENT first page after kernel by:
242 + 1 (pdb)
243 + tot_pte_pages (pte)
244 + tot_pte_pages (page_array)
245 */
246 /* SO, have to copy the first kernel pages pfns right into the
247 * page_array, then do identity maps for the rest.
248 */
249 DPRINTF(("mapped kernel pages\n"));
251 /* now loop over all ptes and store into the page_array, so as
252 * to get the identity map.
253 */
254 if ((pte_array =
255 malloc(tot_pte_pages * 1024 * sizeof (unsigned long))) == NULL) {
256 PERROR("Could not allocate pte array");
257 goto error_out;
258 }
260 /* plan 9 on startup expects a "l2" (xen parlance) at 0x2000,
261 * this "l2" should have one PTE pointer for a va of 0x80000000.
262 * and an l1 (PTEs to you) at 0x3000. (physical).
263 * the PTEs should map the first 4M of memory.
264 */
265 /* get a physical address for the L2. This means take the PFN and
266 * shift left.
267 */
268 /* this terminology is plan 9 terminology.
269 * pdb is essentially the Xen L2. 'Page Directory Block'?
270 * I need to ask JMK.
271 * cpupte is the pte array.
272 * Plan 9 counts on these being set up for cpu0.
273 * SO: cpu0pdb (Xen L2)
274 * and cpupte (Xen L1)
275 */
276 /* cpu0pdb is right after kernel */
277 cpu0pdb = first_page_after_kernel;
278 /* cpu0pte comes right after cpu0pdb */
279 cpu0pte = cpu0pdb + 1;
280 /* number of the past cpu0pte page */
281 cpu0ptelast = cpu0pte + tot_pte_pages - 1;
282 /* first page of the page array (mfn) */
283 page_array_page = cpu0ptelast + 1;
285 DPRINTF(("cpu0pdb 0x%lx, cpu0pte 0x%lx cpu0ptelast 0x%lx\n", cpu0pdb,
286 cpu0pte, cpu0ptelast));
287 l2tab = cpage_array[cpu0pdb] << PAGE_SHIFT;
288 DPRINTF(("l2tab 0x%lx\n", l2tab));
289 ctxt->pt_base = l2tab;
291 /* get a physical address for the L1. This means take the PFN and
292 * shift left.
293 */
294 l1tab = cpage_array[cpu0pte] << PAGE_SHIFT;
295 DPRINTF(("l1tab 0x%lx\n", l1tab));
296 if ((mmu = init_mmu_updates(xc_handle, dom)) == NULL)
297 goto error_out;
298 DPRINTF(("now map in l2tab\n"));
300 /* Initialise the page tables. */
301 /* mmap in the l2tab */
302 if ((vl2tab = xc_map_foreign_range(xc_handle, dom,
303 PAGE_SIZE, PROT_READ | PROT_WRITE,
304 l2tab >> PAGE_SHIFT)) == NULL)
305 goto error_out;
306 DPRINTF(("vl2tab 0x%p\n", vl2tab));
307 /* now we have the cpu0pdb for the kernel, starting at 0x2000,
308 * so we can plug in the physical pointer to the 0x3000 pte
309 */
310 /* zero it */
311 memset(vl2tab, 0, PAGE_SIZE);
312 /* get a pointer in the l2tab for the virt_load_addr */
313 DPRINTF(("&vl2tab[l2_table_offset(*virt_load_addr)] is 0x%p[0x%lx]\n",
314 &vl2tab[l2_table_offset(*virt_load_addr)],
315 l2_table_offset(*virt_load_addr)));
317 vl2e = &vl2tab[l2_table_offset(*virt_load_addr)];
319 /* OK, for all the available PTE, set the PTE pointer up */
320 DPRINTF(("For i = %ld to %ld ...\n", cpu0pte, cpu0ptelast));
321 for (i = cpu0pte; i <= cpu0ptelast; i++) {
322 DPRINTF(("Index %d Set %p to 0x%lx\n", i, vl2e,
323 (cpage_array[i] << PAGE_SHIFT) | L2_PROT));
324 *vl2e++ = (cpage_array[i] << PAGE_SHIFT) | L2_PROT;
325 }
327 /* unmap it ... */
328 munmap(vl2tab, PAGE_SIZE);
330 /* for the pages from virt_load_pointer to the end of this
331 * set of PTEs, map in the PFN for that VA
332 */
333 for (vl1e = (l1_pgentry_t *) pte_array, count = 0;
334 count < tot_pte_pages * 1024; count++, vl1e++) {
336 *vl1e = cpage_array[count];
337 if (!cpage_array[count])
338 continue;
339 /* set in the PFN for this entry */
340 *vl1e = (cpage_array[count] << PAGE_SHIFT) | L1_PROT;
341 /*
342 DPRINTF (("vl1e # %d 0x%lx gets 0x%lx\n",
343 count, vl1e, *vl1e));
344 */
345 if ((count >= cpu0pdb) && (count <= cpu0ptelast)) {
346 //DPRINTF((" Fix up page %d as it is in pte ville: ", count));
347 *vl1e &= ~_PAGE_RW;
348 DPRINTF(("0x%lx\n", *vl1e));
349 }
350 if ((count >= (0x100000 >> 12))
351 && (count < (first_data_page >> 12))) {
352 //DPRINTF((" Fix up page %d as it is in text ", count));
353 *vl1e &= ~_PAGE_RW;
354 //DPRINTF (("0x%lx\n", *vl1e));
355 }
356 }
357 /* special thing. Pre-map the shared info page */
358 vl1e = &pte_array[2];
359 *vl1e = (shared_info_frame << PAGE_SHIFT) | L1_PROT;
360 DPRINTF(("v1l1 %p, has value 0x%lx\n", vl1e, *(unsigned long *) vl1e));
361 /* another special thing. VA 80005000 has to point to 80006000 */
362 /* this is a Plan 9 thing -- the 'mach' pointer */
363 /* 80005000 is the mach pointer per-cpu, and the actual
364 * mach pointers are 80006000, 80007000 etc.
365 */
366 vl1e = &pte_array[5];
367 *vl1e = (cpage_array[6] << PAGE_SHIFT) | L1_PROT;
369 /* OK, it's all set up, copy it in */
370 memcpy_toguest(xc_handle, dom, pte_array,
371 (tot_pte_pages * 1024 * sizeof (unsigned long) /**/),
372 cpage_array, cpu0pte);
374 /* We really need to have the vl1tab unmapped or the add_mmu_update
375 * below will fail bigtime.
376 */
377 /* Xen guys: remember my errors on domain exit? Something I'm doing
378 * wrong in here? We never did find out ...
379 */
380 /* get rid of the entries we can not use ... */
381 memcpy_toguest(xc_handle, dom, cpage_array,
382 (tot_pte_pages * 1024 * sizeof (unsigned long) /**/),
383 cpage_array, page_array_page);
384 /* last chance to dump all of memory */
385 // dumpit(xc_handle, dom, 0 /*0x100000>>12*/, tot_pages, cpage_array) ;
386 /*
387 * Pin down l2tab addr as page dir page - causes hypervisor to provide
388 * correct protection for the page
389 */
390 if (add_mmu_update(xc_handle, mmu,
391 l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE))
392 goto error_out;
394 for (count = 0; count < tot_pages; count++) {
395 /*
396 DPRINTF (("add_mmu_update(0x%x, 0x%x, 0x%x, %d)\n", xc_handle, mmu,
397 (cpage_array[count]
398 << PAGE_SHIFT) |
399 MMU_MACHPHYS_UPDATE,
400 count));
401 */
402 if (add_mmu_update(xc_handle, mmu,
403 (cpage_array[count] << PAGE_SHIFT) |
404 MMU_MACHPHYS_UPDATE, count))
405 goto error_out;
406 //DPRINTF(("Do the next one\n"));
407 }
408 /*
409 */
411 //dumpit(pm_handle, 3, 4, page_array);
412 /* put the virt_startinfo_addr at KZERO */
413 /* just hard-code for now */
414 *virt_startinfo_addr = 0x80000000;
416 DPRINTF(("virt_startinfo_addr = 0x%lx\n", *virt_startinfo_addr));
417 start_info = xc_map_foreign_range(xc_handle, dom,
418 PAGE_SIZE, PROT_READ | PROT_WRITE,
419 cpage_array[0]);
420 DPRINTF(("startinfo = 0x%p\n", start_info));
421 DPRINTF(("shared_info_frame is %lx\n", shared_info_frame));
422 memset(start_info, 0, sizeof (*start_info));
423 start_info->pt_base = 0x80000000 | cpu0pdb << PAGE_SHIFT;
424 start_info->mfn_list = 0x80000000 | (page_array_page) << PAGE_SHIFT;
425 DPRINTF(("mfn_list 0x%lx\n", start_info->mfn_list));
426 start_info->mod_start = 0;
427 start_info->mod_len = 0;
428 start_info->nr_pages = tot_pte_pages * 1024;
429 start_info->nr_pt_frames = tot_pte_pages + 1;
430 start_info->shared_info = shared_info_frame;
431 start_info->flags = 0;
432 DPRINTF((" control event channel is %d\n", control_evtchn));
433 start_info->domain_controller_evtchn = control_evtchn;
434 strncpy(start_info->cmd_line, cmdline, MAX_CMDLINE);
435 start_info->cmd_line[MAX_CMDLINE - 1] = '\0';
436 munmap(start_info, PAGE_SIZE);
438 DPRINTF(("done setting up start_info\n"));
439 DPRINTF(("shared_info_frame = 0x%lx\n", shared_info_frame));
440 /* shared_info page starts its life empty. */
442 shared_info = xc_map_foreign_range(xc_handle, dom,
443 PAGE_SIZE, PROT_READ | PROT_WRITE,
444 shared_info_frame);
445 memset(shared_info, 0, PAGE_SIZE);
446 /* Mask all upcalls... */
447 DPRINTF(("mask all upcalls\n"));
448 for (i = 0; i < MAX_VIRT_CPUS; i++)
449 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
450 munmap(shared_info, PAGE_SIZE);
452 /* Send the page update requests down to the hypervisor. */
453 DPRINTF(("send page update reqs down.\n"));
454 if (finish_mmu_updates(xc_handle, mmu))
455 goto error_out;
457 //DPRINTF (("call dumpit.\n"));
458 //dumpit(pm_handle, 0x100000>>12, tot_pages, page_array) ;
459 //dumpit (pm_handle, 2, 0x100, page_array);
460 free(mmu);
462 /* we don't bother freeing anything at this point --
463 * we're exiting and it is pointless
464 */
465 return 0;
467 error_out:
468 /* oh well we still free some things -- I oughtta nuke this */
469 if (mmu != NULL)
470 free(mmu);
471 ;
472 return -1;
473 }
475 int
476 xc_plan9_build(int xc_handle,
477 u32 domid,
478 const char *image_name,
479 const char *cmdline,
480 unsigned int control_evtchn, unsigned long flags)
481 {
482 dom0_op_t launch_op, op;
483 unsigned long load_addr;
484 long tot_pages;
485 int kernel_fd = -1;
486 gzFile kernel_gfd = NULL;
487 int rc, i;
488 full_execution_context_t st_ctxt, *ctxt = &st_ctxt;
489 unsigned long virt_startinfo_addr;
491 if ((tot_pages = get_tot_pages(xc_handle, domid)) < 0) {
492 PERROR("Could not find total pages for domain");
493 return 1;
494 }
495 DPRINTF(("get_tot_pages returns %ld pages\n", tot_pages));
497 kernel_fd = open(image_name, O_RDONLY);
498 if (kernel_fd < 0) {
499 PERROR("Could not open kernel image");
500 return 1;
501 }
503 if ((kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL) {
504 PERROR("Could not allocate decompression state for state file");
505 close(kernel_fd);
506 return 1;
507 }
509 DPRINTF(("get_tot_pages returns %ld pages\n", tot_pages));
510 if (mlock(&st_ctxt, sizeof (st_ctxt))) {
511 PERROR("Unable to mlock ctxt");
512 return 1;
513 }
515 op.cmd = DOM0_GETDOMAININFO;
516 op.u.getdomaininfo.domain = (domid_t) domid;
517 op.u.getdomaininfo.ctxt = ctxt;
518 if ((do_dom0_op(xc_handle, &op) < 0) ||
519 ((u32) op.u.getdomaininfo.domain != domid)) {
520 PERROR("Could not get info on domain");
521 goto error_out;
522 }
523 DPRINTF(("get_tot_pages returns %ld pages\n", tot_pages));
525 if (!(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED)
526 || (op.u.getdomaininfo.ctxt->pt_base != 0)) {
527 ERROR("Domain is already constructed");
528 goto error_out;
529 }
531 DPRINTF(("get_tot_pages returns %ld pages\n", tot_pages));
532 if (setup_guestos(xc_handle, domid, kernel_gfd, tot_pages,
533 &virt_startinfo_addr,
534 &load_addr, &st_ctxt, cmdline,
535 op.u.getdomaininfo.shared_info_frame,
536 control_evtchn, flags) < 0) {
537 ERROR("Error constructing guest OS");
538 goto error_out;
539 }
541 /* leave the leak in here for now
542 if ( kernel_fd >= 0 )
543 close(kernel_fd);
544 if( kernel_gfd )
545 gzclose(kernel_gfd);
546 */
547 ctxt->flags = 0;
549 /*
550 * Initial register values:
551 * DS,ES,FS,GS = FLAT_GUESTOS_DS
552 * CS:EIP = FLAT_GUESTOS_CS:start_pc
553 * SS:ESP = FLAT_GUESTOS_DS:start_stack
554 * ESI = start_info
555 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
556 * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
557 */
558 ctxt->cpu_ctxt.ds = FLAT_GUESTOS_DS;
559 ctxt->cpu_ctxt.es = FLAT_GUESTOS_DS;
560 ctxt->cpu_ctxt.fs = FLAT_GUESTOS_DS;
561 ctxt->cpu_ctxt.gs = FLAT_GUESTOS_DS;
562 ctxt->cpu_ctxt.ss = FLAT_GUESTOS_DS;
563 ctxt->cpu_ctxt.cs = FLAT_GUESTOS_CS;
564 ctxt->cpu_ctxt.eip = load_addr;
565 ctxt->cpu_ctxt.eip = 0x80100020;
566 /* put stack at top of second page */
567 ctxt->cpu_ctxt.esp = 0x80000000 + (STACKPAGE << PAGE_SHIFT);
569 /* why is this set? */
570 ctxt->cpu_ctxt.esi = ctxt->cpu_ctxt.esp;
571 ctxt->cpu_ctxt.eflags = (1 << 9) | (1 << 2);
573 /* FPU is set up to default initial state. */
574 memset(ctxt->fpu_ctxt, 0, sizeof (ctxt->fpu_ctxt));
576 /* Virtual IDT is empty at start-of-day. */
577 for (i = 0; i < 256; i++) {
578 ctxt->trap_ctxt[i].vector = i;
579 ctxt->trap_ctxt[i].cs = FLAT_GUESTOS_CS;
580 }
581 ctxt->fast_trap_idx = 0;
583 /* No LDT. */
584 ctxt->ldt_ents = 0;
586 /* Use the default Xen-provided GDT. */
587 ctxt->gdt_ents = 0;
589 /* Ring 1 stack is the initial stack. */
590 /* put stack at top of second page */
591 ctxt->guestos_ss = FLAT_GUESTOS_DS;
592 ctxt->guestos_esp = ctxt->cpu_ctxt.esp;
594 /* No debugging. */
595 memset(ctxt->debugreg, 0, sizeof (ctxt->debugreg));
597 /* No callback handlers. */
598 ctxt->event_callback_cs = FLAT_GUESTOS_CS;
599 ctxt->event_callback_eip = 0;
600 ctxt->failsafe_callback_cs = FLAT_GUESTOS_CS;
601 ctxt->failsafe_callback_eip = 0;
603 memset(&launch_op, 0, sizeof (launch_op));
605 launch_op.u.builddomain.domain = (domid_t) domid;
606 // launch_op.u.builddomain.num_vifs = 1;
607 launch_op.u.builddomain.ctxt = ctxt;
608 launch_op.cmd = DOM0_BUILDDOMAIN;
609 rc = do_dom0_op(xc_handle, &launch_op);
611 fprintf(stderr, "RC is %d\n", rc);
612 return rc;
614 error_out:
615 if (kernel_fd >= 0)
616 close(kernel_fd);
617 if (kernel_gfd)
618 gzclose(kernel_gfd);
620 return -1;
621 }
623 /*
624 * Plan 9 memory layout (initial)
625 * ----------------
626 * | info from xen| @0
627 * ---------------|<--- boot args (start at 0x1200 + 64)
628 * | stack |
629 * ----------------<--- page 2
630 * | empty |
631 * ---------------<---- page 5 MACHADDR (always points to machp[cpuno]
632 * | aliased |
633 * ---------------<----- page 6 CPU0MACH
634 * | CPU0MACH |
635 * ----------------
636 * | empty |
637 * ---------------- *virt_load_addr = ehdr.e_entry (0x80100000)
638 * | kernel |
639 * | |
640 * ---------------- <----- page aligned boundary.
641 * | data |
642 * | |
643 * ----------------
644 * | bss |
645 * ----------------<--- end of kernel (page aligned)
646 * | PMD cpu0pdb |
647 * ----------------<--- page +1
648 * | PTE cpu0pte |
649 * ----------------<--- page (tot_pte_pages)/1024
650 * | page_array |
651 * ---------------- <--- page (tot_pte_pages)/1024
652 * | empty to TOM |
653 * ----------------
654 */
656 static int
657 loadp9image(gzFile kernel_gfd, int xc_handle, u32 dom,
658 unsigned long *page_array,
659 unsigned long tot_pages, unsigned long *virt_load_addr,
660 unsigned long *ksize, unsigned long *symtab_addr,
661 unsigned long *symtab_len,
662 unsigned long *first_data_page, unsigned long *pdb_page,
663 const char *cmdline)
664 {
665 unsigned long datapage;
666 Exec ehdr;
668 char *p;
669 unsigned long maxva;
670 int curpos, ret;
671 PAGE *image = 0;
672 unsigned long image_tot_pages = 0;
673 unsigned long textround;
674 static PAGE args;
676 ret = -1;
678 p = NULL;
679 maxva = 0;
681 if (gzread(kernel_gfd, &ehdr, sizeof (Exec)) != sizeof (Exec)) {
682 PERROR("Error reading kernel image P9 header.");
683 goto out;
684 }
686 plan9header(&ehdr);
687 curpos = sizeof (Exec);
689 if (ehdr.magic != I_MAGIC) {
690 PERROR("Image does not have an P9 header.");
691 goto out;
692 }
694 textround = ((ehdr.text + 0x20 + 4095) >> 12) << 12;
695 *first_data_page = 0x100000 + textround;
696 DPRINTF(("ehrd.text is 0x%lx, textround is 0x%lx\n",
697 ehdr.text, textround));
699 image_tot_pages =
700 (textround + ehdr.data + ehdr.bss + PAGE_SIZE - 1) >> PAGE_SHIFT;
701 DPRINTF(("tot pages is %ld\n", image_tot_pages));
703 *virt_load_addr = 0x80100000;
705 if ((*virt_load_addr & (PAGE_SIZE - 1)) != 0) {
706 ERROR("We can only deal with page-aligned load addresses");
707 goto out;
708 }
710 if ((*virt_load_addr + (image_tot_pages << PAGE_SHIFT)) >
711 HYPERVISOR_VIRT_START) {
712 ERROR("Cannot map all domain memory without hitting Xen space");
713 goto out;
714 }
716 /* just malloc an image that is image_tot_pages in size. Then read in
717 * the image -- text, data, -- to page-rounded alignments.
718 * then copy into xen .
719 * this gets BSS zeroed for free
720 */
721 DPRINTF(("Allocate %ld bytes\n", image_tot_pages * sizeof (*image)));
722 image = calloc(image_tot_pages, sizeof (*image));
723 if (!image)
724 return blah("alloc data");
725 /* text starts at 0x20, after the header, just like Unix long ago */
726 if (gzread(kernel_gfd, &image[0].data[sizeof (Exec)], ehdr.text) <
727 ehdr.text)
728 return blah("read text");
729 DPRINTF(("READ TEXT %ld bytes\n", ehdr.text));
730 datapage = ((ehdr.text + sizeof (Exec)) / PAGE_SIZE) + 1;
731 if (gzread(kernel_gfd, image[datapage].data, ehdr.data) < ehdr.data)
732 return blah("read data");
733 DPRINTF(("READ DATA %ld bytes\n", ehdr.data));
735 /* nice contig stuff */
736 /* oops need to start at 0x100000 */
738 ret = memcpy_toguest(xc_handle, dom,
739 image, image_tot_pages * 4096, page_array, 0x100);
740 DPRINTF(("done copying kernel to guest memory\n"));
742 /* now do the bootargs */
743 /* in plan 9, the x=y bootargs start at 0x1200 + 64 in real memory */
744 /* we'll copy to page 1, so we offset into the page struct at
745 * 0x200 + 64
746 */
747 memset(&args, 0, sizeof(args));
748 memcpy(&args.data[0x200 + 64], cmdline, strlen(cmdline));
749 printf("Copied :%s: to page for args\n", cmdline);
750 ret = memcpy_toguest(xc_handle, dom, &args, sizeof(args), page_array,1);
751 //dumpit(xc_handle, dom, 0 /*0x100000>>12*/, 4, page_array) ;
752 out:
753 if (image)
754 free(image);
755 *pdb_page = image_tot_pages + (0x100000 >> PAGE_SHIFT);
756 return ret;
757 }