direct-io.hg

view tools/libxc/xc_plan9_build.c @ 3435:0fd048d86eed

bitkeeper revision 1.1159.220.3 (41e670c37jmaTxUns3KlvsbVRCg-UA)

The getdomaininfo hypercall now listens to the exec_domain parameter
that was already passed to it, and performs some basic sanity checking.

Added exec_domain (aka vcpu) parameters to xc_domain_getfullinfo()
and xc_domain_get_cpu_usage().
author mafetter@fleming.research
date Thu Jan 13 12:59:47 2005 +0000 (2005-01-13)
parents 2419f5c72fe5
children 6096356005ba
line source
1 /******************************************************************************
2 * xc_plan9_build.c
3 * derived from xc_linux_build.c
4 */
6 #include "xc_private.h"
8 #include <zlib.h>
10 #define DEBUG 1
11 #ifdef DEBUG
12 #define DPRINTF(x) printf x; fflush(stdout);
13 #else
14 #define DPRINTF(x)
15 #endif
17 #include "plan9a.out.h"
19 /* really TOS which means stack starts at 0x2000, and uses page 1*/
20 #define STACKPAGE 2
21 struct Exec header, origheader;
23 typedef struct page {
24 char data[PAGE_SIZE];
25 } PAGE;
28 int
29 memcpy_toguest(int xc_handle, u32 dom, void *v, int size,
30 unsigned long *page_array, unsigned int to_page)
31 {
32 int ret;
33 unsigned char *cp = v;
34 unsigned int whichpage;
35 unsigned char *vaddr;
37 // DPRINTF(("memcpy_to_guest: to_page 0x%x, count %d\n", to_page, size));
38 for (ret = 0, whichpage = to_page; size > 0;
39 whichpage++, size -= PAGE_SIZE, cp += PAGE_SIZE) {
41 // DPRINTF (("map_pfn_writeable(%p, 0x%lx)\n", pm_handle,
42 // page_array[whichpage]));
43 vaddr = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
44 PROT_READ | PROT_WRITE,
45 page_array[whichpage]);
46 // DPRINTF (("vaddr is %p\n", vaddr));
47 if (vaddr == NULL) {
48 ret = -1;
49 ERROR("Couldn't map guest memory");
50 goto out;
51 }
52 // DPRINTF (("copy %p to %p, count 0x%x\n", cp, vaddr, 4096));
53 memcpy(vaddr, cp, 4096);
54 munmap(vaddr, PAGE_SIZE);
55 // DPRINTF (("Did %ud'th pages\n", whichpage));
56 }
57 out:
58 return ret;
59 }
61 /* this is a function which can go away. It dumps a hunk of
62 * guest pages to a file (/tmp/dumpit); handy for debugging
63 * your image builder.
64 * Xen guys, nuke this if you wish.
65 */
66 void
67 dumpit(int xc_handle, u32 dom,
68 int start_page, int tot, unsigned long *page_array)
69 {
70 int i, ofd;
71 unsigned char *vaddr;
73 ofd = open("/tmp/dumpit", O_RDWR);
74 for (i = start_page; i < tot; i++) {
75 vaddr = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
76 PROT_READ | PROT_WRITE,
77 page_array[i]);
78 if (!vaddr) {
79 fprintf(stderr, "Page %d\n", i);
80 perror("shit");
81 read(0, &i, 1);
82 return;
83 }
84 write(ofd, vaddr, 4096);
85 munmap(vaddr, PAGE_SIZE);
86 }
87 }
88 int
89 blah(char *b)
90 {
91 fprintf(stderr, "Error in xc_plan9_build!\n");
92 perror(b);
93 return errno;
94 }
96 /* swap bytes. For plan 9 headers */
97 void
98 swabby(unsigned long *s, char *name)
99 {
100 unsigned long it;
101 it = ((*s & 0xff000000) >> 24) | ((*s & 0xff0000) >> 8) |
102 ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
103 DPRINTF(("Item %s is 0x%lx\n", name, it));
104 *s = it;
105 }
107 void
108 plan9header(Exec * header)
109 {
110 /* header is big-endian */
111 swabby(&header->magic, "magic");
112 swabby(&header->text, "text");
113 swabby(&header->data, "data");
114 swabby(&header->bss, "bss");
115 swabby(&header->syms, "syms");
116 swabby(&header->entry, "entry");
117 swabby(&header->spsz, "spsz");
118 swabby(&header->pcsz, "pcsz");
120 }
122 static int
123 loadp9image(gzFile kernel_gfd, int xc_handle, u32 dom,
124 unsigned long *page_array,
125 unsigned long tot_pages, unsigned long *virt_load_addr,
126 unsigned long *ksize, unsigned long *symtab_addr,
127 unsigned long *symtab_len,
128 unsigned long *first_data_page, unsigned long *pdb_page);
130 #define P9ROUND (P9SIZE / 8)
132 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
133 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
135 static int
136 setup_guestos(int xc_handle,
137 u32 dom,
138 gzFile kernel_gfd,
139 unsigned long tot_pages,
140 unsigned long *virt_startinfo_addr,
141 unsigned long *virt_load_addr,
142 full_execution_context_t * ctxt,
143 const char *cmdline,
144 unsigned long shared_info_frame,
145 unsigned int control_evtchn,
146 int flags)
147 {
148 l1_pgentry_t *vl1e = NULL;
149 l2_pgentry_t *vl2tab = NULL, *vl2e = NULL;
150 unsigned long *cpage_array = NULL;
151 unsigned long *pte_array = NULL;
152 unsigned long l2tab;
153 unsigned long l1tab;
154 unsigned long count;
155 unsigned long symtab_addr = 0, symtab_len = 0;
156 start_info_t *start_info;
157 shared_info_t *shared_info;
158 unsigned long ksize;
159 mmu_t *mmu = NULL;
160 int i;
161 unsigned long first_page_after_kernel,
162 first_data_page,
163 page_array_page;
164 unsigned long cpu0pdb, cpu0pte, cpu0ptelast;
165 unsigned long /*last_pfn, */ tot_pte_pages;
167 DPRINTF(("tot pages is %ld\n", tot_pages));
168 if ((cpage_array = malloc(tot_pages * sizeof (unsigned long))) == NULL) {
169 PERROR("Could not allocate cpage array");
170 goto error_out;
171 }
173 if (xc_get_pfn_list(xc_handle, dom, cpage_array, tot_pages) != tot_pages) {
174 PERROR("Could not get the page frame list");
175 goto error_out;
176 }
178 for (i = 0; i < 64; i++)
179 DPRINTF(("First %d page is 0x%lx\n", i, cpage_array[i]));
181 tot_pte_pages = tot_pages >> 10;
182 DPRINTF(("Page range is 0 to 0x%lx, which requires 0x%lx pte pages\n",
183 tot_pte_pages, tot_pte_pages));
185 if (loadp9image(kernel_gfd, xc_handle, dom, cpage_array, tot_pages,
186 virt_load_addr, &ksize, &symtab_addr, &symtab_len,
187 &first_data_page, &first_page_after_kernel))
188 goto error_out;
189 DPRINTF(("First data page is 0x%lx\n", first_data_page));
190 DPRINTF(("First page after kernel is 0x%lx\n",
191 first_page_after_kernel));
193 /*
194 NEED TO INCREMENT first page after kernel by:
195 + 1 (pdb)
196 + tot_pte_pages (pte)
197 + tot_pte_pages (page_array)
198 */
199 /* SO, have to copy the first kernel pages pfns right into the
200 * page_array, then do identity maps for the rest.
201 */
202 DPRINTF(("mapped kernel pages\n"));
204 /* now loop over all ptes and store into the page_array, so as
205 * to get the identity map.
206 */
207 if ((pte_array =
208 malloc(tot_pte_pages * 1024 * sizeof (unsigned long))) == NULL) {
209 PERROR("Could not allocate pte array");
210 goto error_out;
211 }
213 /* plan 9 on startup expects a "l2" (xen parlance) at 0x2000,
214 * this "l2" should have one PTE pointer for a va of 0x80000000.
215 * and an l1 (PTEs to you) at 0x3000. (physical).
216 * the PTEs should map the first 4M of memory.
217 */
218 /* get a physical address for the L2. This means take the PFN and
219 * shift left.
220 */
221 /* this terminology is plan 9 terminology.
222 * pdb is essentially the Xen L2. 'Page Directory Block'?
223 * I need to ask JMK.
224 * cpupte is the pte array.
225 * Plan 9 counts on these being set up for cpu0.
226 * SO: cpu0pdb (Xen L2)
227 * and cpupte (Xen L1)
228 */
229 /* cpu0pdb is right after kernel */
230 cpu0pdb = first_page_after_kernel;
231 /* cpu0pte comes right after cpu0pdb */
232 cpu0pte = cpu0pdb + 1;
233 /* number of the past cpu0pte page */
234 cpu0ptelast = cpu0pte + tot_pte_pages - 1;
235 /* first page of the page array (mfn) */
236 page_array_page = cpu0ptelast + 1;
238 DPRINTF(("cpu0pdb 0x%lx, cpu0pte 0x%lx cpu0ptelast 0x%lx\n", cpu0pdb,
239 cpu0pte, cpu0ptelast));
240 l2tab = cpage_array[cpu0pdb] << PAGE_SHIFT;
241 DPRINTF(("l2tab 0x%lx\n", l2tab));
242 ctxt->pt_base = l2tab;
244 /* get a physical address for the L1. This means take the PFN and
245 * shift left.
246 */
247 l1tab = cpage_array[cpu0pte] << PAGE_SHIFT;
248 DPRINTF(("l1tab 0x%lx\n", l1tab));
249 if ((mmu = init_mmu_updates(xc_handle, dom)) == NULL)
250 goto error_out;
251 DPRINTF(("now map in l2tab\n"));
253 /* Initialise the page tables. */
254 /* mmap in the l2tab */
255 if ((vl2tab = xc_map_foreign_range(xc_handle, dom,
256 PAGE_SIZE, PROT_READ | PROT_WRITE,
257 l2tab >> PAGE_SHIFT)) == NULL)
258 goto error_out;
259 DPRINTF(("vl2tab 0x%p\n", vl2tab));
260 /* now we have the cpu0pdb for the kernel, starting at 0x2000,
261 * so we can plug in the physical pointer to the 0x3000 pte
262 */
263 /* zero it */
264 memset(vl2tab, 0, PAGE_SIZE);
265 /* get a pointer in the l2tab for the virt_load_addr */
266 DPRINTF(("&vl2tab[l2_table_offset(*virt_load_addr)] is 0x%p[0x%lx]\n",
267 &vl2tab[l2_table_offset(*virt_load_addr)],
268 l2_table_offset(*virt_load_addr)));
270 vl2e = &vl2tab[l2_table_offset(*virt_load_addr)];
272 /* OK, for all the available PTE, set the PTE pointer up */
273 DPRINTF(("For i = %ld to %ld ...\n", cpu0pte, cpu0ptelast));
274 for (i = cpu0pte; i <= cpu0ptelast; i++) {
275 DPRINTF(("Index %d Set %p to 0x%lx\n", i, vl2e,
276 (cpage_array[i] << PAGE_SHIFT) | L2_PROT));
277 *vl2e++ = (cpage_array[i] << PAGE_SHIFT) | L2_PROT;
278 }
280 /* unmap it ... */
281 munmap(vl2tab, PAGE_SIZE);
283 /* for the pages from virt_load_pointer to the end of this
284 * set of PTEs, map in the PFN for that VA
285 */
286 for (vl1e = (l1_pgentry_t *) pte_array, count = 0;
287 count < tot_pte_pages * 1024; count++, vl1e++) {
289 *vl1e = cpage_array[count];
290 if (!cpage_array[count])
291 continue;
292 /* set in the PFN for this entry */
293 *vl1e = (cpage_array[count] << PAGE_SHIFT) | L1_PROT;
294 /*
295 DPRINTF (("vl1e # %d 0x%lx gets 0x%lx\n",
296 count, vl1e, *vl1e));
297 */
298 if ((count >= cpu0pdb) && (count <= cpu0ptelast)) {
299 //DPRINTF((" Fix up page %d as it is in pte ville: ", count));
300 *vl1e &= ~_PAGE_RW;
301 DPRINTF(("0x%lx\n", *vl1e));
302 }
303 if ((count >= (0x100000 >> 12))
304 && (count < (first_data_page >> 12))) {
305 //DPRINTF((" Fix up page %d as it is in text ", count));
306 *vl1e &= ~_PAGE_RW;
307 //DPRINTF (("0x%lx\n", *vl1e));
308 }
309 }
310 /* special thing. Pre-map the shared info page */
311 vl1e = &pte_array[2];
312 *vl1e = (shared_info_frame << PAGE_SHIFT) | L1_PROT;
313 DPRINTF(("v1l1 %p, has value 0x%lx\n", vl1e, *(unsigned long *) vl1e));
314 /* another special thing. VA 80005000 has to point to 80006000 */
315 /* this is a Plan 9 thing -- the 'mach' pointer */
316 /* 80005000 is the mach pointer per-cpu, and the actual
317 * mach pointers are 80006000, 80007000 etc.
318 */
319 vl1e = &pte_array[5];
320 *vl1e = (cpage_array[6] << PAGE_SHIFT) | L1_PROT;
322 /* OK, it's all set up, copy it in */
323 memcpy_toguest(xc_handle, dom, pte_array,
324 (tot_pte_pages * 1024 * sizeof (unsigned long) /**/),
325 cpage_array, cpu0pte);
327 /* We really need to have the vl1tab unmapped or the add_mmu_update
328 * below will fail bigtime.
329 */
330 /* Xen guys: remember my errors on domain exit? Something I'm doing
331 * wrong in here? We never did find out ...
332 */
333 /* get rid of the entries we can not use ... */
334 memcpy_toguest(xc_handle, dom, cpage_array,
335 (tot_pte_pages * 1024 * sizeof (unsigned long) /**/),
336 cpage_array, page_array_page);
337 /* last chance to dump all of memory */
338 // dumpit(xc_handle, dom, 0 /*0x100000>>12*/, tot_pages, cpage_array) ;
339 /*
340 * Pin down l2tab addr as page dir page - causes hypervisor to provide
341 * correct protection for the page
342 */
343 if (add_mmu_update(xc_handle, mmu,
344 l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE))
345 goto error_out;
347 for (count = 0; count < tot_pages; count++) {
348 /*
349 DPRINTF (("add_mmu_update(0x%x, 0x%x, 0x%x, %d)\n", xc_handle, mmu,
350 (cpage_array[count]
351 << PAGE_SHIFT) |
352 MMU_MACHPHYS_UPDATE,
353 count));
354 */
355 if (add_mmu_update(xc_handle, mmu,
356 (cpage_array[count] << PAGE_SHIFT) |
357 MMU_MACHPHYS_UPDATE, count))
358 goto error_out;
359 //DPRINTF(("Do the next one\n"));
360 }
361 /*
362 */
364 //dumpit(pm_handle, 3, 4, page_array);
365 /* put the virt_startinfo_addr at KZERO */
366 /* just hard-code for now */
367 *virt_startinfo_addr = 0x80000000;
369 DPRINTF(("virt_startinfo_addr = 0x%lx\n", *virt_startinfo_addr));
370 start_info = xc_map_foreign_range(xc_handle, dom,
371 PAGE_SIZE, PROT_READ | PROT_WRITE,
372 cpage_array[0]);
373 DPRINTF(("startinfo = 0x%p\n", start_info));
374 DPRINTF(("shared_info_frame is %lx\n", shared_info_frame));
375 memset(start_info, 0, sizeof (*start_info));
376 start_info->pt_base = 0x80000000 | cpu0pdb << PAGE_SHIFT;
377 start_info->mfn_list = 0x80000000 | (page_array_page) << PAGE_SHIFT;
378 DPRINTF(("mfn_list 0x%lx\n", start_info->mfn_list));
379 start_info->mod_start = 0;
380 start_info->mod_len = 0;
381 start_info->nr_pages = tot_pte_pages * 1024;
382 start_info->nr_pt_frames = tot_pte_pages + 1;
383 start_info->shared_info = shared_info_frame;
384 start_info->flags = 0;
385 DPRINTF((" control event channel is %d\n", control_evtchn));
386 start_info->domain_controller_evtchn = control_evtchn;
387 strncpy(start_info->cmd_line, cmdline, MAX_CMDLINE);
388 start_info->cmd_line[MAX_CMDLINE - 1] = '\0';
389 munmap(start_info, PAGE_SIZE);
391 DPRINTF(("done setting up start_info\n"));
392 DPRINTF(("shared_info_frame = 0x%lx\n", shared_info_frame));
393 /* shared_info page starts its life empty. */
395 shared_info = xc_map_foreign_range(xc_handle, dom,
396 PAGE_SIZE, PROT_READ | PROT_WRITE,
397 shared_info_frame);
398 memset(shared_info, 0, PAGE_SIZE);
399 /* Mask all upcalls... */
400 DPRINTF(("mask all upcalls\n"));
401 for (i = 0; i < MAX_VIRT_CPUS; i++)
402 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
403 munmap(shared_info, PAGE_SIZE);
405 /* Send the page update requests down to the hypervisor. */
406 DPRINTF(("send page update reqs down.\n"));
407 if (finish_mmu_updates(xc_handle, mmu))
408 goto error_out;
410 //DPRINTF (("call dumpit.\n"));
411 //dumpit(pm_handle, 0x100000>>12, tot_pages, page_array) ;
412 //dumpit (pm_handle, 2, 0x100, page_array);
413 free(mmu);
415 /* we don't bother freeing anything at this point --
416 * we're exiting and it is pointless
417 */
418 return 0;
420 error_out:
421 /* oh well we still free some things -- I oughtta nuke this */
422 if (mmu != NULL)
423 free(mmu);
424 ;
425 return -1;
426 }
428 int
429 xc_plan9_build(int xc_handle,
430 u32 domid,
431 const char *image_name,
432 const char *cmdline,
433 unsigned int control_evtchn, unsigned long flags)
434 {
435 dom0_op_t launch_op, op;
436 unsigned long load_addr;
437 long tot_pages;
438 int kernel_fd = -1;
439 gzFile kernel_gfd = NULL;
440 int rc, i;
441 full_execution_context_t st_ctxt, *ctxt = &st_ctxt;
442 unsigned long virt_startinfo_addr;
444 if ((tot_pages = xc_get_tot_pages(xc_handle, domid)) < 0) {
445 PERROR("Could not find total pages for domain");
446 return 1;
447 }
448 DPRINTF(("xc_get_tot_pages returns %ld pages\n", tot_pages));
450 kernel_fd = open(image_name, O_RDONLY);
451 if (kernel_fd < 0) {
452 PERROR("Could not open kernel image");
453 return 1;
454 }
456 if ((kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL) {
457 PERROR("Could not allocate decompression state for state file");
458 close(kernel_fd);
459 return 1;
460 }
462 DPRINTF(("xc_get_tot_pages returns %ld pages\n", tot_pages));
463 if (mlock(&st_ctxt, sizeof (st_ctxt))) {
464 PERROR("Unable to mlock ctxt");
465 return 1;
466 }
468 op.cmd = DOM0_GETDOMAININFO;
469 op.u.getdomaininfo.domain = (domid_t) domid;
470 op.u.getdomaininfo.exec_domain = 0;
471 op.u.getdomaininfo.ctxt = ctxt;
472 if ((do_dom0_op(xc_handle, &op) < 0) ||
473 ((u32) op.u.getdomaininfo.domain != domid)) {
474 PERROR("Could not get info on domain");
475 goto error_out;
476 }
477 DPRINTF(("xc_get_tot_pages returns %ld pages\n", tot_pages));
479 if (!(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED)
480 || (op.u.getdomaininfo.ctxt->pt_base != 0)) {
481 ERROR("Domain is already constructed");
482 goto error_out;
483 }
485 DPRINTF(("xc_get_tot_pages returns %ld pages\n", tot_pages));
486 if (setup_guestos(xc_handle, domid, kernel_gfd, tot_pages,
487 &virt_startinfo_addr,
488 &load_addr, &st_ctxt, cmdline,
489 op.u.getdomaininfo.shared_info_frame,
490 control_evtchn, flags) < 0) {
491 ERROR("Error constructing guest OS");
492 goto error_out;
493 }
495 /* leave the leak in here for now
496 if ( kernel_fd >= 0 )
497 close(kernel_fd);
498 if( kernel_gfd )
499 gzclose(kernel_gfd);
500 */
501 ctxt->flags = 0;
503 /*
504 * Initial register values:
505 * DS,ES,FS,GS = FLAT_GUESTOS_DS
506 * CS:EIP = FLAT_GUESTOS_CS:start_pc
507 * SS:ESP = FLAT_GUESTOS_DS:start_stack
508 * ESI = start_info
509 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
510 * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
511 */
512 ctxt->cpu_ctxt.ds = FLAT_GUESTOS_DS;
513 ctxt->cpu_ctxt.es = FLAT_GUESTOS_DS;
514 ctxt->cpu_ctxt.fs = FLAT_GUESTOS_DS;
515 ctxt->cpu_ctxt.gs = FLAT_GUESTOS_DS;
516 ctxt->cpu_ctxt.ss = FLAT_GUESTOS_DS;
517 ctxt->cpu_ctxt.cs = FLAT_GUESTOS_CS;
518 ctxt->cpu_ctxt.eip = load_addr;
519 ctxt->cpu_ctxt.eip = 0x80100020;
520 /* put stack at top of second page */
521 ctxt->cpu_ctxt.esp = 0x80000000 + (STACKPAGE << PAGE_SHIFT);
523 /* why is this set? */
524 ctxt->cpu_ctxt.esi = ctxt->cpu_ctxt.esp;
525 ctxt->cpu_ctxt.eflags = (1 << 9) | (1 << 2);
527 /* FPU is set up to default initial state. */
528 memset(ctxt->fpu_ctxt, 0, sizeof (ctxt->fpu_ctxt));
530 /* Virtual IDT is empty at start-of-day. */
531 for (i = 0; i < 256; i++) {
532 ctxt->trap_ctxt[i].vector = i;
533 ctxt->trap_ctxt[i].cs = FLAT_GUESTOS_CS;
534 }
535 ctxt->fast_trap_idx = 0;
537 /* No LDT. */
538 ctxt->ldt_ents = 0;
540 /* Use the default Xen-provided GDT. */
541 ctxt->gdt_ents = 0;
543 /* Ring 1 stack is the initial stack. */
544 /* put stack at top of second page */
545 ctxt->guestos_ss = FLAT_GUESTOS_DS;
546 ctxt->guestos_esp = ctxt->cpu_ctxt.esp;
548 /* No debugging. */
549 memset(ctxt->debugreg, 0, sizeof (ctxt->debugreg));
551 /* No callback handlers. */
552 ctxt->event_callback_cs = FLAT_GUESTOS_CS;
553 ctxt->event_callback_eip = 0;
554 ctxt->failsafe_callback_cs = FLAT_GUESTOS_CS;
555 ctxt->failsafe_callback_eip = 0;
557 memset(&launch_op, 0, sizeof (launch_op));
559 launch_op.u.builddomain.domain = (domid_t) domid;
560 // launch_op.u.builddomain.num_vifs = 1;
561 launch_op.u.builddomain.ctxt = ctxt;
562 launch_op.cmd = DOM0_BUILDDOMAIN;
563 rc = do_dom0_op(xc_handle, &launch_op);
565 fprintf(stderr, "RC is %d\n", rc);
566 return rc;
568 error_out:
569 if (kernel_fd >= 0)
570 close(kernel_fd);
571 if (kernel_gfd)
572 gzclose(kernel_gfd);
574 return -1;
575 }
577 /*
578 * Plan 9 memory layout (initial)
579 * ----------------
580 * | info from xen| @0
581 * ----------------
582 * | stack |
583 * ----------------<--- page 2
584 * | empty |
585 * ---------------<---- page 5 MACHADDR (always points to machp[cpuno]
586 * | aliased |
587 * ---------------<----- page 6 CPU0MACH
588 * | CPU0MACH |
589 * ----------------
590 * | empty |
591 * ---------------- *virt_load_addr = ehdr.e_entry (0x80100000)
592 * | kernel |
593 * | |
594 * ---------------- <----- page aligned boundary.
595 * | data |
596 * | |
597 * ----------------
598 * | bss |
599 * ----------------<--- end of kernel (page aligned)
600 * | PMD cpu0pdb |
601 * ----------------<--- page +1
602 * | PTE cpu0pte |
603 * ----------------<--- page (tot_pte_pages)/1024
604 * | page_array |
605 * ---------------- <--- page (tot_pte_pages)/1024
606 * | empty to TOM |
607 * ----------------
608 */
610 static int
611 loadp9image(gzFile kernel_gfd, int xc_handle, u32 dom,
612 unsigned long *page_array,
613 unsigned long tot_pages, unsigned long *virt_load_addr,
614 unsigned long *ksize, unsigned long *symtab_addr,
615 unsigned long *symtab_len,
616 unsigned long *first_data_page, unsigned long *pdb_page)
617 {
618 unsigned long datapage;
619 Exec ehdr;
621 char *p;
622 unsigned long maxva;
623 int curpos, ret;
624 PAGE *image = 0;
625 unsigned long image_tot_pages = 0;
626 unsigned long textround;
628 ret = -1;
630 p = NULL;
631 maxva = 0;
633 if (gzread(kernel_gfd, &ehdr, sizeof (Exec)) != sizeof (Exec)) {
634 PERROR("Error reading kernel image P9 header.");
635 goto out;
636 }
638 plan9header(&ehdr);
639 curpos = sizeof (Exec);
641 if (ehdr.magic != I_MAGIC) {
642 PERROR("Image does not have an P9 header.");
643 goto out;
644 }
646 textround = ((ehdr.text + 0x20 + 4095) >> 12) << 12;
647 *first_data_page = 0x100000 + textround;
648 DPRINTF(("ehrd.text is 0x%lx, textround is 0x%lx\n",
649 ehdr.text, textround));
651 image_tot_pages =
652 (textround + ehdr.data + ehdr.bss + PAGE_SIZE - 1) >> PAGE_SHIFT;
653 DPRINTF(("tot pages is %ld\n", image_tot_pages));
655 *virt_load_addr = 0x80100000;
657 if ((*virt_load_addr & (PAGE_SIZE - 1)) != 0) {
658 ERROR("We can only deal with page-aligned load addresses");
659 goto out;
660 }
662 if ((*virt_load_addr + (image_tot_pages << PAGE_SHIFT)) >
663 HYPERVISOR_VIRT_START) {
664 ERROR("Cannot map all domain memory without hitting Xen space");
665 goto out;
666 }
668 /* just malloc an image that is image_tot_pages in size. Then read in
669 * the image -- text, data, -- to page-rounded alignments.
670 * then copy into xen .
671 * this gets BSS zeroed for free
672 */
673 DPRINTF(("Allocate %ld bytes\n", image_tot_pages * sizeof (*image)));
674 image = calloc(image_tot_pages, sizeof (*image));
675 if (!image)
676 return blah("alloc data");
677 /* text starts at 0x20, after the header, just like Unix long ago */
678 if (gzread(kernel_gfd, &image[0].data[sizeof (Exec)], ehdr.text) <
679 ehdr.text)
680 return blah("read text");
681 DPRINTF(("READ TEXT %ld bytes\n", ehdr.text));
682 datapage = ((ehdr.text + sizeof (Exec)) / PAGE_SIZE) + 1;
683 if (gzread(kernel_gfd, image[datapage].data, ehdr.data) < ehdr.data)
684 return blah("read data");
685 DPRINTF(("READ DATA %ld bytes\n", ehdr.data));
687 /* nice contig stuff */
688 /* oops need to start at 0x100000 */
690 ret = memcpy_toguest(xc_handle, dom,
691 image, image_tot_pages * 4096, page_array, 0x100);
692 DPRINTF(("done copying kernel to guest memory\n"));
694 out:
695 if (image)
696 free(image);
697 *pdb_page = image_tot_pages + (0x100000 >> PAGE_SHIFT);
698 return ret;
699 }