ia64/xen-unstable

view tools/libxc/xc_linux_build.c @ 4895:24dfd18ea63e

bitkeeper revision 1.1159.258.120 (42848bfe8kMyWWcBA64rq7h7l7AyoA)

Shadow code bug fix (found by Ian) that was breaking refcounts, and subsequently
causing migration problems.
author mafetter@fleming.research
date Fri May 13 11:14:06 2005 +0000 (2005-05-13)
parents d787d8fcc4d3
children fe5933507ca5
line source
1 /******************************************************************************
2 * xc_linux_build.c
3 */
5 #include "xc_private.h"
6 #define ELFSIZE 32
7 #include "xc_elf.h"
8 #include <stdlib.h>
9 #include <zlib.h>
11 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
12 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
14 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
15 #define round_pgdown(_p) ((_p)&PAGE_MASK)
17 struct domain_setup_info
18 {
19 unsigned long v_start;
20 unsigned long v_end;
21 unsigned long v_kernstart;
22 unsigned long v_kernend;
23 unsigned long v_kernentry;
25 unsigned int use_writable_pagetables;
26 unsigned int load_bsd_symtab;
28 unsigned long symtab_addr;
29 unsigned long symtab_len;
30 };
32 static int
33 parseelfimage(
34 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
35 static int
36 loadelfimage(
37 char *elfbase, int xch, u32 dom, unsigned long *parray,
38 unsigned long vstart);
39 static int
40 loadelfsymtab(
41 char *elfbase, int xch, u32 dom, unsigned long *parray,
42 struct domain_setup_info *dsi);
44 static long get_tot_pages(int xc_handle, u32 domid)
45 {
46 dom0_op_t op;
47 op.cmd = DOM0_GETDOMAININFO;
48 op.u.getdomaininfo.domain = (domid_t)domid;
49 op.u.getdomaininfo.ctxt = NULL;
50 return (do_dom0_op(xc_handle, &op) < 0) ?
51 -1 : op.u.getdomaininfo.tot_pages;
52 }
54 static int get_pfn_list(int xc_handle,
55 u32 domid,
56 unsigned long *pfn_buf,
57 unsigned long max_pfns)
58 {
59 dom0_op_t op;
60 int ret;
61 op.cmd = DOM0_GETMEMLIST;
62 op.u.getmemlist.domain = (domid_t)domid;
63 op.u.getmemlist.max_pfns = max_pfns;
64 op.u.getmemlist.buffer = pfn_buf;
66 if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 )
67 return -1;
69 ret = do_dom0_op(xc_handle, &op);
71 (void)munlock(pfn_buf, max_pfns * sizeof(unsigned long));
73 return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
74 }
76 static int copy_to_domain_page(int xc_handle,
77 u32 domid,
78 unsigned long dst_pfn,
79 void *src_page)
80 {
81 void *vaddr = xc_map_foreign_range(
82 xc_handle, domid, PAGE_SIZE, PROT_WRITE, dst_pfn);
83 if ( vaddr == NULL )
84 return -1;
85 memcpy(vaddr, src_page, PAGE_SIZE);
86 munmap(vaddr, PAGE_SIZE);
87 return 0;
88 }
90 static int setup_guestos(int xc_handle,
91 u32 dom,
92 char *image, unsigned long image_size,
93 gzFile initrd_gfd, unsigned long initrd_len,
94 unsigned long nr_pages,
95 unsigned long *pvsi, unsigned long *pvke,
96 full_execution_context_t *ctxt,
97 const char *cmdline,
98 unsigned long shared_info_frame,
99 unsigned int control_evtchn,
100 unsigned long flags)
101 {
102 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
103 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
104 unsigned long *page_array = NULL;
105 unsigned long l2tab;
106 unsigned long l1tab;
107 unsigned long count, i;
108 start_info_t *start_info;
109 shared_info_t *shared_info;
110 mmu_t *mmu = NULL;
111 int rc;
113 unsigned long nr_pt_pages;
114 unsigned long ppt_alloc;
115 unsigned long *physmap, *physmap_e, physmap_pfn;
117 struct domain_setup_info dsi;
118 unsigned long vinitrd_start;
119 unsigned long vinitrd_end;
120 unsigned long vphysmap_start;
121 unsigned long vphysmap_end;
122 unsigned long vstartinfo_start;
123 unsigned long vstartinfo_end;
124 unsigned long vstack_start;
125 unsigned long vstack_end;
126 unsigned long vpt_start;
127 unsigned long vpt_end;
128 unsigned long v_end;
130 memset(&dsi, 0, sizeof(struct domain_setup_info));
132 rc = parseelfimage(image, image_size, &dsi);
133 if ( rc != 0 )
134 goto error_out;
136 if (dsi.use_writable_pagetables)
137 xc_domain_setvmassist(xc_handle, dom, VMASST_CMD_enable,
138 VMASST_TYPE_writable_pagetables);
140 if (dsi.load_bsd_symtab)
141 loadelfsymtab(image, xc_handle, dom, NULL, &dsi);
143 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
144 {
145 PERROR("Guest OS must load to a page boundary.\n");
146 goto error_out;
147 }
149 /*
150 * Why do we need this? The number of page-table frames depends on the
151 * size of the bootstrap address space. But the size of the address space
152 * depends on the number of page-table frames (since each one is mapped
153 * read-only). We have a pair of simultaneous equations in two unknowns,
154 * which we solve by exhaustive search.
155 */
156 vinitrd_start = round_pgup(dsi.v_end);
157 vinitrd_end = vinitrd_start + initrd_len;
158 vphysmap_start = round_pgup(vinitrd_end);
159 vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
160 vpt_start = round_pgup(vphysmap_end);
161 for ( nr_pt_pages = 2; ; nr_pt_pages++ )
162 {
163 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
164 vstartinfo_start = vpt_end;
165 vstartinfo_end = vstartinfo_start + PAGE_SIZE;
166 vstack_start = vstartinfo_end;
167 vstack_end = vstack_start + PAGE_SIZE;
168 v_end = (vstack_end + (1<<22)-1) & ~((1<<22)-1);
169 if ( (v_end - vstack_end) < (512 << 10) )
170 v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */
171 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
172 L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
173 break;
174 }
176 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
177 " Loaded kernel: %08lx->%08lx\n"
178 " Init. ramdisk: %08lx->%08lx\n"
179 " Phys-Mach map: %08lx->%08lx\n"
180 " Page tables: %08lx->%08lx\n"
181 " Start info: %08lx->%08lx\n"
182 " Boot stack: %08lx->%08lx\n"
183 " TOTAL: %08lx->%08lx\n",
184 dsi.v_kernstart, dsi.v_kernend,
185 vinitrd_start, vinitrd_end,
186 vphysmap_start, vphysmap_end,
187 vpt_start, vpt_end,
188 vstartinfo_start, vstartinfo_end,
189 vstack_start, vstack_end,
190 dsi.v_start, v_end);
191 printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
193 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
194 {
195 printf("Initial guest OS requires too much space\n"
196 "(%luMB is greater than %luMB limit)\n",
197 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
198 goto error_out;
199 }
201 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
202 {
203 PERROR("Could not allocate memory");
204 goto error_out;
205 }
207 if ( get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
208 {
209 PERROR("Could not get the page frame list");
210 goto error_out;
211 }
213 loadelfimage(image, xc_handle, dom, page_array, dsi.v_start);
215 if (dsi.load_bsd_symtab)
216 loadelfsymtab(image, xc_handle, dom, page_array, &dsi);
218 /* Load the initial ramdisk image. */
219 if ( initrd_len != 0 )
220 {
221 for ( i = (vinitrd_start - dsi.v_start);
222 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
223 {
224 char page[PAGE_SIZE];
225 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
226 {
227 PERROR("Error reading initrd image, could not");
228 goto error_out;
229 }
230 copy_to_domain_page(xc_handle, dom,
231 page_array[i>>PAGE_SHIFT], page);
232 }
233 }
235 if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
236 goto error_out;
238 /* First allocate page for page dir. */
239 ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
240 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
241 ctxt->pt_base = l2tab;
243 /* Initialise the page tables. */
244 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
245 PROT_READ|PROT_WRITE,
246 l2tab >> PAGE_SHIFT)) == NULL )
247 goto error_out;
248 memset(vl2tab, 0, PAGE_SIZE);
249 vl2e = &vl2tab[l2_table_offset(dsi.v_start)];
250 for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
251 {
252 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
253 {
254 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
255 if ( vl1tab != NULL )
256 munmap(vl1tab, PAGE_SIZE);
257 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
258 PROT_READ|PROT_WRITE,
259 l1tab >> PAGE_SHIFT)) == NULL )
260 {
261 munmap(vl2tab, PAGE_SIZE);
262 goto error_out;
263 }
264 memset(vl1tab, 0, PAGE_SIZE);
265 vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
266 *vl2e++ = l1tab | L2_PROT;
267 }
269 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
270 if ( (count >= ((vpt_start-dsi.v_start)>>PAGE_SHIFT)) &&
271 (count < ((vpt_end -dsi.v_start)>>PAGE_SHIFT)) )
272 *vl1e &= ~_PAGE_RW;
273 vl1e++;
274 }
275 munmap(vl1tab, PAGE_SIZE);
276 munmap(vl2tab, PAGE_SIZE);
278 /* Write the phys->machine and machine->phys table entries. */
279 physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
280 physmap = physmap_e = xc_map_foreign_range(
281 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
282 page_array[physmap_pfn++]);
283 for ( count = 0; count < nr_pages; count++ )
284 {
285 if ( add_mmu_update(xc_handle, mmu,
286 (page_array[count] << PAGE_SHIFT) |
287 MMU_MACHPHYS_UPDATE, count) )
288 {
289 munmap(physmap, PAGE_SIZE);
290 goto error_out;
291 }
292 *physmap_e++ = page_array[count];
293 if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 )
294 {
295 munmap(physmap, PAGE_SIZE);
296 physmap = physmap_e = xc_map_foreign_range(
297 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
298 page_array[physmap_pfn++]);
299 }
300 }
301 munmap(physmap, PAGE_SIZE);
303 /*
304 * Pin down l2tab addr as page dir page - causes hypervisor to provide
305 * correct protection for the page
306 */
307 if ( add_mmu_update(xc_handle, mmu,
308 l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) )
309 goto error_out;
311 start_info = xc_map_foreign_range(
312 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
313 page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]);
314 memset(start_info, 0, sizeof(*start_info));
315 start_info->nr_pages = nr_pages;
316 start_info->shared_info = shared_info_frame << PAGE_SHIFT;
317 start_info->flags = flags;
318 start_info->pt_base = vpt_start;
319 start_info->nr_pt_frames = nr_pt_pages;
320 start_info->mfn_list = vphysmap_start;
321 start_info->domain_controller_evtchn = control_evtchn;
322 if ( initrd_len != 0 )
323 {
324 start_info->mod_start = vinitrd_start;
325 start_info->mod_len = initrd_len;
326 }
327 strncpy(start_info->cmd_line, cmdline, MAX_CMDLINE);
328 start_info->cmd_line[MAX_CMDLINE-1] = '\0';
329 munmap(start_info, PAGE_SIZE);
331 /* shared_info page starts its life empty. */
332 shared_info = xc_map_foreign_range(
333 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
334 memset(shared_info, 0, sizeof(shared_info_t));
335 /* Mask all upcalls... */
336 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
337 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
338 munmap(shared_info, PAGE_SIZE);
340 /* Send the page update requests down to the hypervisor. */
341 if ( finish_mmu_updates(xc_handle, mmu) )
342 goto error_out;
344 free(mmu);
345 free(page_array);
347 *pvsi = vstartinfo_start;
348 *pvke = dsi.v_kernentry;
350 return 0;
352 error_out:
353 if ( mmu != NULL )
354 free(mmu);
355 if ( page_array != NULL )
356 free(page_array);
357 return -1;
358 }
360 static unsigned long get_filesz(int fd)
361 {
362 u16 sig;
363 u32 _sz = 0;
364 unsigned long sz;
366 lseek(fd, 0, SEEK_SET);
367 read(fd, &sig, sizeof(sig));
368 sz = lseek(fd, 0, SEEK_END);
369 if ( sig == 0x8b1f ) /* GZIP signature? */
370 {
371 lseek(fd, -4, SEEK_END);
372 read(fd, &_sz, 4);
373 sz = _sz;
374 }
375 lseek(fd, 0, SEEK_SET);
377 return sz;
378 }
380 static char *read_kernel_image(const char *filename, unsigned long *size)
381 {
382 int kernel_fd = -1;
383 gzFile kernel_gfd = NULL;
384 char *image = NULL;
385 unsigned int bytes;
387 if ( (kernel_fd = open(filename, O_RDONLY)) < 0 )
388 {
389 PERROR("Could not open kernel image");
390 goto out;
391 }
393 *size = get_filesz(kernel_fd);
395 if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL )
396 {
397 PERROR("Could not allocate decompression state for state file");
398 goto out;
399 }
401 if ( (image = malloc(*size)) == NULL )
402 {
403 PERROR("Could not allocate memory for kernel image");
404 goto out;
405 }
407 if ( (bytes = gzread(kernel_gfd, image, *size)) != *size )
408 {
409 PERROR("Error reading kernel image, could not"
410 " read the whole image (%d != %ld).", bytes, *size);
411 free(image);
412 image = NULL;
413 }
415 out:
416 if ( kernel_gfd != NULL )
417 gzclose(kernel_gfd);
418 else if ( kernel_fd >= 0 )
419 close(kernel_fd);
420 return image;
421 }
423 int xc_linux_build(int xc_handle,
424 u32 domid,
425 const char *image_name,
426 const char *ramdisk_name,
427 const char *cmdline,
428 unsigned int control_evtchn,
429 unsigned long flags)
430 {
431 dom0_op_t launch_op, op;
432 int initrd_fd = -1;
433 gzFile initrd_gfd = NULL;
434 int rc, i;
435 full_execution_context_t st_ctxt, *ctxt = &st_ctxt;
436 unsigned long nr_pages;
437 char *image = NULL;
438 unsigned long image_size, initrd_size=0;
439 unsigned long vstartinfo_start, vkern_entry;
441 if ( (nr_pages = get_tot_pages(xc_handle, domid)) < 0 )
442 {
443 PERROR("Could not find total pages for domain");
444 goto error_out;
445 }
447 if ( (image = read_kernel_image(image_name, &image_size)) == NULL )
448 goto error_out;
450 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
451 {
452 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
453 {
454 PERROR("Could not open the initial ramdisk image");
455 goto error_out;
456 }
458 initrd_size = get_filesz(initrd_fd);
460 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
461 {
462 PERROR("Could not allocate decompression state for initrd");
463 goto error_out;
464 }
465 }
467 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
468 {
469 PERROR("Unable to mlock ctxt");
470 return 1;
471 }
473 op.cmd = DOM0_GETDOMAININFO;
474 op.u.getdomaininfo.domain = (domid_t)domid;
475 op.u.getdomaininfo.ctxt = ctxt;
476 if ( (do_dom0_op(xc_handle, &op) < 0) ||
477 ((u16)op.u.getdomaininfo.domain != domid) )
478 {
479 PERROR("Could not get info on domain");
480 goto error_out;
481 }
482 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
483 (ctxt->pt_base != 0) )
484 {
485 ERROR("Domain is already constructed");
486 goto error_out;
487 }
489 if ( setup_guestos(xc_handle, domid, image, image_size,
490 initrd_gfd, initrd_size, nr_pages,
491 &vstartinfo_start, &vkern_entry,
492 ctxt, cmdline,
493 op.u.getdomaininfo.shared_info_frame,
494 control_evtchn, flags) < 0 )
495 {
496 ERROR("Error constructing guest OS");
497 goto error_out;
498 }
500 if ( initrd_fd >= 0 )
501 close(initrd_fd);
502 if ( initrd_gfd )
503 gzclose(initrd_gfd);
504 if ( image != NULL )
505 free(image);
507 ctxt->flags = 0;
509 /*
510 * Initial register values:
511 * DS,ES,FS,GS = FLAT_GUESTOS_DS
512 * CS:EIP = FLAT_GUESTOS_CS:start_pc
513 * SS:ESP = FLAT_GUESTOS_DS:start_stack
514 * ESI = start_info
515 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
516 * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
517 */
518 ctxt->cpu_ctxt.ds = FLAT_GUESTOS_DS;
519 ctxt->cpu_ctxt.es = FLAT_GUESTOS_DS;
520 ctxt->cpu_ctxt.fs = FLAT_GUESTOS_DS;
521 ctxt->cpu_ctxt.gs = FLAT_GUESTOS_DS;
522 ctxt->cpu_ctxt.ss = FLAT_GUESTOS_DS;
523 ctxt->cpu_ctxt.cs = FLAT_GUESTOS_CS;
524 ctxt->cpu_ctxt.eip = vkern_entry;
525 ctxt->cpu_ctxt.esp = vstartinfo_start + 2*PAGE_SIZE;
526 ctxt->cpu_ctxt.esi = vstartinfo_start;
527 ctxt->cpu_ctxt.eflags = (1<<9) | (1<<2);
529 /* FPU is set up to default initial state. */
530 memset(ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
532 /* Virtual IDT is empty at start-of-day. */
533 for ( i = 0; i < 256; i++ )
534 {
535 ctxt->trap_ctxt[i].vector = i;
536 ctxt->trap_ctxt[i].cs = FLAT_GUESTOS_CS;
537 }
538 ctxt->fast_trap_idx = 0;
540 /* No LDT. */
541 ctxt->ldt_ents = 0;
543 /* Use the default Xen-provided GDT. */
544 ctxt->gdt_ents = 0;
546 /* Ring 1 stack is the initial stack. */
547 ctxt->guestos_ss = FLAT_GUESTOS_DS;
548 ctxt->guestos_esp = vstartinfo_start + 2*PAGE_SIZE;
550 /* No debugging. */
551 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
553 /* No callback handlers. */
554 ctxt->event_callback_cs = FLAT_GUESTOS_CS;
555 ctxt->event_callback_eip = 0;
556 ctxt->failsafe_callback_cs = FLAT_GUESTOS_CS;
557 ctxt->failsafe_callback_eip = 0;
559 memset( &launch_op, 0, sizeof(launch_op) );
561 launch_op.u.builddomain.domain = (domid_t)domid;
562 launch_op.u.builddomain.ctxt = ctxt;
564 launch_op.cmd = DOM0_BUILDDOMAIN;
565 rc = do_dom0_op(xc_handle, &launch_op);
567 return rc;
569 error_out:
570 if ( initrd_gfd != NULL )
571 gzclose(initrd_gfd);
572 else if ( initrd_fd >= 0 )
573 close(initrd_fd);
574 if ( image != NULL )
575 free(image);
577 return -1;
578 }
580 static inline int is_loadable_phdr(Elf_Phdr *phdr)
581 {
582 return ((phdr->p_type == PT_LOAD) &&
583 ((phdr->p_flags & (PF_W|PF_X)) != 0));
584 }
586 static int parseelfimage(char *elfbase,
587 unsigned long elfsize,
588 struct domain_setup_info *dsi)
589 {
590 Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
591 Elf_Phdr *phdr;
592 Elf_Shdr *shdr;
593 unsigned long kernstart = ~0UL, kernend=0UL;
594 char *shstrtab, *guestinfo=NULL, *p;
595 int h;
597 if ( !IS_ELF(*ehdr) )
598 {
599 ERROR("Kernel image does not have an ELF header.");
600 return -EINVAL;
601 }
603 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
604 {
605 ERROR("ELF program headers extend beyond end of image.");
606 return -EINVAL;
607 }
609 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
610 {
611 ERROR("ELF section headers extend beyond end of image.");
612 return -EINVAL;
613 }
615 /* Find the section-header strings table. */
616 if ( ehdr->e_shstrndx == SHN_UNDEF )
617 {
618 ERROR("ELF image has no section-header strings table (shstrtab).");
619 return -EINVAL;
620 }
621 shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff +
622 (ehdr->e_shstrndx*ehdr->e_shentsize));
623 shstrtab = elfbase + shdr->sh_offset;
625 /* Find the special '__xen_guest' section and check its contents. */
626 for ( h = 0; h < ehdr->e_shnum; h++ )
627 {
628 shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff + (h*ehdr->e_shentsize));
629 if ( strcmp(&shstrtab[shdr->sh_name], "__xen_guest") != 0 )
630 continue;
632 guestinfo = elfbase + shdr->sh_offset;
634 if ( (strstr(guestinfo, "LOADER=generic") == NULL) &&
635 (strstr(guestinfo, "GUEST_OS=linux") == NULL) )
636 {
637 ERROR("Will only load images built for the generic loader "
638 "or Linux images");
639 ERROR("Actually saw: '%s'", guestinfo);
640 return -EINVAL;
641 }
643 if ( (strstr(guestinfo, "XEN_VER=2.0") == NULL) )
644 {
645 ERROR("Will only load images built for Xen v2.0");
646 ERROR("Actually saw: '%s'", guestinfo);
647 return -EINVAL;
648 }
650 break;
651 }
652 if ( guestinfo == NULL )
653 {
654 ERROR("Not a Xen-ELF image: '__xen_guest' section not found.");
655 return -EINVAL;
656 }
658 for ( h = 0; h < ehdr->e_phnum; h++ )
659 {
660 phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
661 if ( !is_loadable_phdr(phdr) )
662 continue;
663 if ( phdr->p_vaddr < kernstart )
664 kernstart = phdr->p_vaddr;
665 if ( (phdr->p_vaddr + phdr->p_memsz) > kernend )
666 kernend = phdr->p_vaddr + phdr->p_memsz;
667 }
669 if ( (kernstart > kernend) ||
670 (ehdr->e_entry < kernstart) ||
671 (ehdr->e_entry > kernend) )
672 {
673 ERROR("Malformed ELF image.");
674 return -EINVAL;
675 }
677 dsi->v_start = kernstart;
678 if ( (p = strstr(guestinfo, "VIRT_BASE=")) != NULL )
679 dsi->v_start = strtoul(p+10, &p, 0);
681 if ( (p = strstr(guestinfo, "PT_MODE_WRITABLE")) != NULL )
682 dsi->use_writable_pagetables = 1;
684 if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL )
685 dsi->load_bsd_symtab = 1;
687 dsi->v_kernstart = kernstart;
688 dsi->v_kernend = kernend;
689 dsi->v_kernentry = ehdr->e_entry;
691 dsi->v_end = dsi->v_kernend;
693 return 0;
694 }
696 static int
697 loadelfimage(
698 char *elfbase, int xch, u32 dom, unsigned long *parray,
699 unsigned long vstart)
700 {
701 Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
702 Elf_Phdr *phdr;
703 int h;
705 char *va;
706 unsigned long pa, done, chunksz;
708 for ( h = 0; h < ehdr->e_phnum; h++ )
709 {
710 phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
711 if ( !is_loadable_phdr(phdr) )
712 continue;
714 for ( done = 0; done < phdr->p_filesz; done += chunksz )
715 {
716 pa = (phdr->p_vaddr + done) - vstart;
717 va = xc_map_foreign_range(
718 xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
719 chunksz = phdr->p_filesz - done;
720 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
721 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
722 memcpy(va + (pa & (PAGE_SIZE-1)),
723 elfbase + phdr->p_offset + done, chunksz);
724 munmap(va, PAGE_SIZE);
725 }
727 for ( ; done < phdr->p_memsz; done += chunksz )
728 {
729 pa = (phdr->p_vaddr + done) - vstart;
730 va = xc_map_foreign_range(
731 xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
732 chunksz = phdr->p_memsz - done;
733 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
734 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
735 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
736 munmap(va, PAGE_SIZE);
737 }
738 }
740 return 0;
741 }
743 static void
744 map_memcpy(
745 unsigned long dst, char *src, unsigned long size,
746 int xch, u32 dom, unsigned long *parray, unsigned long vstart)
747 {
748 char *va;
749 unsigned long chunksz, done, pa;
751 for ( done = 0; done < size; done += chunksz )
752 {
753 pa = dst + done - vstart;
754 va = xc_map_foreign_range(
755 xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
756 chunksz = size - done;
757 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
758 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
759 memcpy(va + (pa & (PAGE_SIZE-1)), src + done, chunksz);
760 munmap(va, PAGE_SIZE);
761 }
762 }
764 #define ELFROUND (ELFSIZE / 8)
766 static int
767 loadelfsymtab(
768 char *elfbase, int xch, u32 dom, unsigned long *parray,
769 struct domain_setup_info *dsi)
770 {
771 Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase, *sym_ehdr;
772 Elf_Shdr *shdr;
773 unsigned long maxva, symva;
774 char *p;
775 int h, i;
777 p = malloc(sizeof(int) + sizeof(Elf_Ehdr) +
778 ehdr->e_shnum * sizeof(Elf_Shdr));
779 if (p == NULL)
780 return 0;
782 maxva = (dsi->v_kernend + ELFROUND - 1) & ~(ELFROUND - 1);
783 symva = maxva;
784 maxva += sizeof(int);
785 dsi->symtab_addr = maxva;
786 dsi->symtab_len = 0;
787 maxva += sizeof(Elf_Ehdr) + ehdr->e_shnum * sizeof(Elf_Shdr);
788 maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1);
790 shdr = (Elf_Shdr *)(p + sizeof(int) + sizeof(Elf_Ehdr));
791 memcpy(shdr, elfbase + ehdr->e_shoff, ehdr->e_shnum * sizeof(Elf_Shdr));
793 for ( h = 0; h < ehdr->e_shnum; h++ )
794 {
795 if ( shdr[h].sh_type == SHT_STRTAB )
796 {
797 /* Look for a strtab @i linked to symtab @h. */
798 for ( i = 0; i < ehdr->e_shnum; i++ )
799 if ( (shdr[i].sh_type == SHT_SYMTAB) &&
800 (shdr[i].sh_link == h) )
801 break;
802 /* Skip symtab @h if we found no corresponding strtab @i. */
803 if ( i == ehdr->e_shnum )
804 {
805 shdr[h].sh_offset = 0;
806 continue;
807 }
808 }
810 if ( (shdr[h].sh_type == SHT_STRTAB) ||
811 (shdr[h].sh_type == SHT_SYMTAB) )
812 {
813 if ( parray != NULL )
814 map_memcpy(maxva, elfbase + shdr[h].sh_offset, shdr[h].sh_size,
815 xch, dom, parray, dsi->v_start);
817 /* Mangled to be based on ELF header location. */
818 shdr[h].sh_offset = maxva - dsi->symtab_addr;
820 dsi->symtab_len += shdr[h].sh_size;
821 maxva += shdr[h].sh_size;
822 maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1);
823 }
825 shdr[h].sh_name = 0; /* Name is NULL. */
826 }
828 if ( dsi->symtab_len == 0 )
829 {
830 dsi->symtab_addr = 0;
831 goto out;
832 }
834 if ( parray != NULL )
835 {
836 *(int *)p = maxva - dsi->symtab_addr;
837 sym_ehdr = (Elf_Ehdr *)(p + sizeof(int));
838 memcpy(sym_ehdr, ehdr, sizeof(Elf_Ehdr));
839 sym_ehdr->e_phoff = 0;
840 sym_ehdr->e_shoff = sizeof(Elf_Ehdr);
841 sym_ehdr->e_phentsize = 0;
842 sym_ehdr->e_phnum = 0;
843 sym_ehdr->e_shstrndx = SHN_UNDEF;
845 /* Copy total length, crafted ELF header and section header table */
846 map_memcpy(symva, p, sizeof(int) + sizeof(Elf_Ehdr) +
847 ehdr->e_shnum * sizeof(Elf_Shdr), xch, dom, parray,
848 dsi->v_start);
849 }
851 dsi->symtab_len = maxva - dsi->symtab_addr;
852 dsi->v_end = round_pgup(maxva);
854 out:
855 if ( p != NULL )
856 free(p);
858 return 0;
859 }