ia64/xen-unstable

view tools/libxc/xc_hvm_build.c @ 11114:5a454f6e320f

Make room in e820map for SMBIOS tables.
Signed-off-by: Andrew D. Ball <aball@us.ibm.com>
author kfraser@localhost.localdomain
date Mon Aug 14 17:29:11 2006 +0100 (2006-08-14)
parents 21240dc9f2e8
children 0f917d63e960
line source
1 /******************************************************************************
2 * xc_hvm_build.c
3 */
5 #define ELFSIZE 32
6 #include <stddef.h>
7 #include <inttypes.h>
8 #include "xg_private.h"
9 #include "xc_private.h"
10 #include "xc_elf.h"
11 #include <stdlib.h>
12 #include <unistd.h>
13 #include <zlib.h>
14 #include <xen/hvm/hvm_info_table.h>
15 #include <xen/hvm/ioreq.h>
16 #include <xen/hvm/params.h>
17 #include <xen/hvm/e820.h>
19 #define HVM_LOADER_ENTR_ADDR 0x00100000
21 static int
22 parseelfimage(
23 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
24 static int
25 loadelfimage(
26 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
27 struct domain_setup_info *dsi);
29 static void xc_set_hvm_param(int handle,
30 domid_t dom, int param, unsigned long value)
31 {
32 DECLARE_HYPERCALL;
33 xen_hvm_param_t arg;
34 int rc;
36 hypercall.op = __HYPERVISOR_hvm_op;
37 hypercall.arg[0] = HVMOP_set_param;
38 hypercall.arg[1] = (unsigned long)&arg;
39 arg.domid = dom;
40 arg.index = param;
41 arg.value = value;
42 if ( mlock(&arg, sizeof(arg)) != 0 )
43 {
44 PERROR("Could not lock memory for set parameter");
45 return;
46 }
47 rc = do_xen_hypercall(handle, &hypercall);
48 safe_munlock(&arg, sizeof(arg));
49 if (rc < 0)
50 PERROR("set HVM parameter failed (%d)", rc);
51 }
53 static void build_e820map(void *e820_page, unsigned long long mem_size)
54 {
55 struct e820entry *e820entry =
56 (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
57 unsigned char nr_map = 0;
59 /* XXX: Doesn't work for > 4GB yet */
60 e820entry[nr_map].addr = 0x0;
61 e820entry[nr_map].size = 0x9F000;
62 e820entry[nr_map].type = E820_RAM;
63 nr_map++;
65 e820entry[nr_map].addr = 0x9F000;
66 e820entry[nr_map].size = 0x1000;
67 e820entry[nr_map].type = E820_RESERVED;
68 nr_map++;
70 e820entry[nr_map].addr = 0xA0000;
71 e820entry[nr_map].size = 0x20000;
72 e820entry[nr_map].type = E820_IO;
73 nr_map++;
75 e820entry[nr_map].addr = 0xF0000;
76 e820entry[nr_map].size = 0x10000;
77 e820entry[nr_map].type = E820_RESERVED;
78 nr_map++;
80 #define STATIC_PAGES 3
81 /* 3 static pages:
82 * - ioreq buffer.
83 * - xenstore.
84 * - shared_page.
85 */
87 /* Most of the ram goes here */
88 e820entry[nr_map].addr = 0x100000;
89 e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES * PAGE_SIZE;
90 e820entry[nr_map].type = E820_RAM;
91 nr_map++;
93 /* Statically allocated special pages */
95 /* For buffered IO requests */
96 e820entry[nr_map].addr = mem_size - 3 * PAGE_SIZE;
97 e820entry[nr_map].size = PAGE_SIZE;
98 e820entry[nr_map].type = E820_BUFFERED_IO;
99 nr_map++;
101 /* For xenstore */
102 e820entry[nr_map].addr = mem_size - 2 * PAGE_SIZE;
103 e820entry[nr_map].size = PAGE_SIZE;
104 e820entry[nr_map].type = E820_XENSTORE;
105 nr_map++;
107 /* Shared ioreq_t page */
108 e820entry[nr_map].addr = mem_size - PAGE_SIZE;
109 e820entry[nr_map].size = PAGE_SIZE;
110 e820entry[nr_map].type = E820_SHARED_PAGE;
111 nr_map++;
113 e820entry[nr_map].addr = mem_size;
114 e820entry[nr_map].size = 0x3 * PAGE_SIZE;
115 e820entry[nr_map].type = E820_NVS;
116 nr_map++;
118 e820entry[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
119 e820entry[nr_map].size = 0xA * PAGE_SIZE;
120 e820entry[nr_map].type = E820_ACPI;
121 nr_map++;
123 e820entry[nr_map].addr = 0xFEC00000;
124 e820entry[nr_map].size = 0x1400000;
125 e820entry[nr_map].type = E820_IO;
126 nr_map++;
128 *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map;
129 }
131 static void set_hvm_info_checksum(struct hvm_info_table *t)
132 {
133 uint8_t *ptr = (uint8_t *)t, sum = 0;
134 unsigned int i;
136 t->checksum = 0;
138 for (i = 0; i < t->length; i++)
139 sum += *ptr++;
141 t->checksum = -sum;
142 }
144 /*
145 * Use E820 reserved memory 0x9F800 to pass HVM info to hvmloader
146 * hvmloader will use this info to set BIOS accordingly
147 */
148 static int set_hvm_info(int xc_handle, uint32_t dom,
149 xen_pfn_t *pfn_list, unsigned int vcpus,
150 unsigned int acpi, unsigned int apic)
151 {
152 char *va_map;
153 struct hvm_info_table *va_hvm;
155 va_map = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
156 PROT_READ | PROT_WRITE,
157 pfn_list[HVM_INFO_PFN]);
159 if ( va_map == NULL )
160 return -1;
162 va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
163 memset(va_hvm, 0, sizeof(*va_hvm));
165 strncpy(va_hvm->signature, "HVM INFO", 8);
166 va_hvm->length = sizeof(struct hvm_info_table);
167 va_hvm->acpi_enabled = acpi;
168 va_hvm->nr_vcpus = vcpus;
170 set_hvm_info_checksum(va_hvm);
172 munmap(va_map, PAGE_SIZE);
174 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);
176 return 0;
177 }
179 static int setup_guest(int xc_handle,
180 uint32_t dom, int memsize,
181 char *image, unsigned long image_size,
182 unsigned long nr_pages,
183 vcpu_guest_context_t *ctxt,
184 unsigned long shared_info_frame,
185 unsigned int vcpus,
186 unsigned int pae,
187 unsigned int acpi,
188 unsigned int apic,
189 unsigned int store_evtchn,
190 unsigned long *store_mfn)
191 {
192 xen_pfn_t *page_array = NULL;
193 unsigned long count, i;
194 unsigned long long ptr;
195 xc_mmu_t *mmu = NULL;
197 shared_info_t *shared_info;
198 void *e820_page;
200 struct domain_setup_info dsi;
201 uint64_t v_end;
203 unsigned long shared_page_frame = 0;
204 shared_iopage_t *sp;
206 unsigned long ioreq_buffer_frame = 0;
207 void *ioreq_buffer_page;
209 memset(&dsi, 0, sizeof(struct domain_setup_info));
211 if ( (parseelfimage(image, image_size, &dsi)) != 0 )
212 goto error_out;
214 if ( (dsi.v_kernstart & (PAGE_SIZE - 1)) != 0 )
215 {
216 PERROR("Guest OS must load to a page boundary.\n");
217 goto error_out;
218 }
220 /* memsize is in megabytes */
221 v_end = (unsigned long long)memsize << 20;
223 IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
224 " Loaded HVM loader: %016"PRIx64"->%016"PRIx64"\n"
225 " TOTAL: %016"PRIx64"->%016"PRIx64"\n",
226 dsi.v_kernstart, dsi.v_kernend,
227 dsi.v_start, v_end);
228 IPRINTF(" ENTRY ADDRESS: %016"PRIx64"\n", dsi.v_kernentry);
230 if ( (v_end - dsi.v_start) > ((unsigned long long)nr_pages << PAGE_SHIFT) )
231 {
232 PERROR("Initial guest OS requires too much space: "
233 "(%lluMB is greater than %lluMB limit)\n",
234 (unsigned long long)(v_end - dsi.v_start) >> 20,
235 ((unsigned long long)nr_pages << PAGE_SHIFT) >> 20);
236 goto error_out;
237 }
239 if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
240 {
241 PERROR("Could not allocate memory.\n");
242 goto error_out;
243 }
245 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
246 {
247 PERROR("Could not get the page frame list.\n");
248 goto error_out;
249 }
251 loadelfimage(image, xc_handle, dom, page_array, &dsi);
253 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
254 goto error_out;
256 /* Write the machine->phys table entries. */
257 for ( count = 0; count < nr_pages; count++ )
258 {
259 ptr = (unsigned long long)page_array[count] << PAGE_SHIFT;
260 if ( xc_add_mmu_update(xc_handle, mmu,
261 ptr | MMU_MACHPHYS_UPDATE, count) )
262 goto error_out;
263 }
265 if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) )
266 {
267 ERROR("Couldn't set hvm info for HVM guest.\n");
268 goto error_out;
269 }
271 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
273 if ( (e820_page = xc_map_foreign_range(
274 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
275 page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
276 goto error_out;
277 memset(e820_page, 0, PAGE_SIZE);
278 build_e820map(e820_page, v_end);
279 munmap(e820_page, PAGE_SIZE);
281 /* shared_info page starts its life empty. */
282 if ( (shared_info = xc_map_foreign_range(
283 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
284 shared_info_frame)) == 0 )
285 goto error_out;
286 memset(shared_info, 0, PAGE_SIZE);
287 /* Mask all upcalls... */
288 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
289 shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
290 munmap(shared_info, PAGE_SIZE);
292 /* Paranoia */
293 shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
294 if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
295 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
296 shared_page_frame)) == 0 )
297 goto error_out;
298 memset(sp, 0, PAGE_SIZE);
299 munmap(sp, PAGE_SIZE);
301 /* clean the buffered IO requests page */
302 ioreq_buffer_frame = page_array[(v_end >> PAGE_SHIFT) - 3];
303 ioreq_buffer_page = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
304 PROT_READ | PROT_WRITE,
305 ioreq_buffer_frame);
307 if ( ioreq_buffer_page == NULL )
308 goto error_out;
310 memset(ioreq_buffer_page, 0, PAGE_SIZE);
312 munmap(ioreq_buffer_page, PAGE_SIZE);
314 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, (v_end >> PAGE_SHIFT) - 2);
315 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
317 *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
318 if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
319 goto error_out;
321 /* Send the page update requests down to the hypervisor. */
322 if ( xc_finish_mmu_updates(xc_handle, mmu) )
323 goto error_out;
325 free(mmu);
326 free(page_array);
328 /*
329 * Initial register values:
330 */
331 ctxt->user_regs.eip = dsi.v_kernentry;
333 return 0;
335 error_out:
336 free(mmu);
337 free(page_array);
338 return -1;
339 }
341 static int xc_hvm_build_internal(int xc_handle,
342 uint32_t domid,
343 int memsize,
344 char *image,
345 unsigned long image_size,
346 unsigned int vcpus,
347 unsigned int pae,
348 unsigned int acpi,
349 unsigned int apic,
350 unsigned int store_evtchn,
351 unsigned long *store_mfn)
352 {
353 dom0_op_t launch_op, op;
354 int rc, i;
355 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
356 unsigned long nr_pages;
357 xen_capabilities_info_t xen_caps;
359 if ( (image == NULL) || (image_size == 0) )
360 {
361 ERROR("Image required");
362 goto error_out;
363 }
365 if ( (rc = xc_version(xc_handle, XENVER_capabilities, &xen_caps)) != 0 )
366 {
367 PERROR("Failed to get xen version info");
368 goto error_out;
369 }
371 if ( !strstr(xen_caps, "hvm") )
372 {
373 PERROR("CPU doesn't support HVM extensions or "
374 "the extensions are not enabled");
375 goto error_out;
376 }
378 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
379 {
380 PERROR("Could not find total pages for domain");
381 goto error_out;
382 }
384 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
385 {
386 PERROR("%s: ctxt mlock failed", __func__);
387 return 1;
388 }
390 op.cmd = DOM0_GETDOMAININFO;
391 op.u.getdomaininfo.domain = (domid_t)domid;
392 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
393 ((uint16_t)op.u.getdomaininfo.domain != domid) )
394 {
395 PERROR("Could not get info on domain");
396 goto error_out;
397 }
399 memset(ctxt, 0, sizeof(*ctxt));
401 ctxt->flags = VGCF_HVM_GUEST;
402 if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
403 ctxt, op.u.getdomaininfo.shared_info_frame,
404 vcpus, pae, acpi, apic, store_evtchn, store_mfn) < 0)
405 {
406 ERROR("Error constructing guest OS");
407 goto error_out;
408 }
410 /* FPU is set up to default initial state. */
411 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
413 /* Virtual IDT is empty at start-of-day. */
414 for ( i = 0; i < 256; i++ )
415 {
416 ctxt->trap_ctxt[i].vector = i;
417 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
418 }
420 /* No LDT. */
421 ctxt->ldt_ents = 0;
423 /* Use the default Xen-provided GDT. */
424 ctxt->gdt_ents = 0;
426 /* No debugging. */
427 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
429 /* No callback handlers. */
430 #if defined(__i386__)
431 ctxt->event_callback_cs = FLAT_KERNEL_CS;
432 ctxt->event_callback_eip = 0;
433 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
434 ctxt->failsafe_callback_eip = 0;
435 #elif defined(__x86_64__)
436 ctxt->event_callback_eip = 0;
437 ctxt->failsafe_callback_eip = 0;
438 ctxt->syscall_callback_eip = 0;
439 #endif
441 memset( &launch_op, 0, sizeof(launch_op) );
443 launch_op.u.setvcpucontext.domain = (domid_t)domid;
444 launch_op.u.setvcpucontext.vcpu = 0;
445 set_xen_guest_handle(launch_op.u.setvcpucontext.ctxt, ctxt);
447 launch_op.cmd = DOM0_SETVCPUCONTEXT;
448 rc = xc_dom0_op(xc_handle, &launch_op);
450 return rc;
452 error_out:
453 return -1;
454 }
456 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
457 {
458 return ((phdr->p_type == PT_LOAD) &&
459 ((phdr->p_flags & (PF_W|PF_X)) != 0));
460 }
462 static int parseelfimage(char *elfbase,
463 unsigned long elfsize,
464 struct domain_setup_info *dsi)
465 {
466 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
467 Elf32_Phdr *phdr;
468 Elf32_Shdr *shdr;
469 unsigned long kernstart = ~0UL, kernend=0UL;
470 char *shstrtab;
471 int h;
473 if ( !IS_ELF(*ehdr) )
474 {
475 ERROR("Kernel image does not have an ELF header.");
476 return -EINVAL;
477 }
479 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
480 {
481 ERROR("ELF program headers extend beyond end of image.");
482 return -EINVAL;
483 }
485 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
486 {
487 ERROR("ELF section headers extend beyond end of image.");
488 return -EINVAL;
489 }
491 /* Find the section-header strings table. */
492 if ( ehdr->e_shstrndx == SHN_UNDEF )
493 {
494 ERROR("ELF image has no section-header strings table (shstrtab).");
495 return -EINVAL;
496 }
497 shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
498 (ehdr->e_shstrndx*ehdr->e_shentsize));
499 shstrtab = elfbase + shdr->sh_offset;
501 for ( h = 0; h < ehdr->e_phnum; h++ )
502 {
503 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
504 if ( !is_loadable_phdr(phdr) )
505 continue;
506 if ( phdr->p_paddr < kernstart )
507 kernstart = phdr->p_paddr;
508 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
509 kernend = phdr->p_paddr + phdr->p_memsz;
510 }
512 if ( (kernstart > kernend) ||
513 (ehdr->e_entry < kernstart) ||
514 (ehdr->e_entry > kernend) )
515 {
516 ERROR("Malformed ELF image.");
517 return -EINVAL;
518 }
520 dsi->v_start = 0x00000000;
522 dsi->v_kernstart = kernstart;
523 dsi->v_kernend = kernend;
524 dsi->v_kernentry = HVM_LOADER_ENTR_ADDR;
526 dsi->v_end = dsi->v_kernend;
528 return 0;
529 }
531 static int
532 loadelfimage(
533 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
534 struct domain_setup_info *dsi)
535 {
536 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
537 Elf32_Phdr *phdr;
538 int h;
540 char *va;
541 unsigned long pa, done, chunksz;
543 for ( h = 0; h < ehdr->e_phnum; h++ )
544 {
545 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
546 if ( !is_loadable_phdr(phdr) )
547 continue;
549 for ( done = 0; done < phdr->p_filesz; done += chunksz )
550 {
551 pa = (phdr->p_paddr + done) - dsi->v_start;
552 if ((va = xc_map_foreign_range(
553 xch, dom, PAGE_SIZE, PROT_WRITE,
554 parray[pa >> PAGE_SHIFT])) == 0)
555 return -1;
556 chunksz = phdr->p_filesz - done;
557 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
558 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
559 memcpy(va + (pa & (PAGE_SIZE-1)),
560 elfbase + phdr->p_offset + done, chunksz);
561 munmap(va, PAGE_SIZE);
562 }
564 for ( ; done < phdr->p_memsz; done += chunksz )
565 {
566 pa = (phdr->p_paddr + done) - dsi->v_start;
567 if ((va = xc_map_foreign_range(
568 xch, dom, PAGE_SIZE, PROT_WRITE,
569 parray[pa >> PAGE_SHIFT])) == 0)
570 return -1;
571 chunksz = phdr->p_memsz - done;
572 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
573 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
574 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
575 munmap(va, PAGE_SIZE);
576 }
577 }
579 return 0;
580 }
582 /* xc_hvm_build
583 *
584 * Create a domain for a virtualized Linux, using files/filenames
585 *
586 */
588 int xc_hvm_build(int xc_handle,
589 uint32_t domid,
590 int memsize,
591 const char *image_name,
592 unsigned int vcpus,
593 unsigned int pae,
594 unsigned int acpi,
595 unsigned int apic,
596 unsigned int store_evtchn,
597 unsigned long *store_mfn)
598 {
599 char *image;
600 int sts;
601 unsigned long image_size;
603 if ( (image_name == NULL) ||
604 ((image = xc_read_image(image_name, &image_size)) == NULL) )
605 return -1;
607 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
608 image, image_size,
609 vcpus, pae, acpi, apic,
610 store_evtchn, store_mfn);
612 free(image);
614 return sts;
615 }
617 /* xc_hvm_build_mem
618 *
619 * Create a domain for a virtualized Linux, using buffers
620 *
621 */
623 int xc_hvm_build_mem(int xc_handle,
624 uint32_t domid,
625 int memsize,
626 const char *image_buffer,
627 unsigned long image_size,
628 unsigned int vcpus,
629 unsigned int pae,
630 unsigned int acpi,
631 unsigned int apic,
632 unsigned int store_evtchn,
633 unsigned long *store_mfn)
634 {
635 int sts;
636 unsigned long img_len;
637 char *img;
639 /* Validate that there is a kernel buffer */
641 if ( (image_buffer == NULL) || (image_size == 0) )
642 {
643 ERROR("kernel image buffer not present");
644 return -1;
645 }
647 img = xc_inflate_buffer(image_buffer, image_size, &img_len);
648 if (img == NULL)
649 {
650 ERROR("unable to inflate ram disk buffer");
651 return -1;
652 }
654 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
655 img, img_len,
656 vcpus, pae, acpi, apic,
657 store_evtchn, store_mfn);
659 /* xc_inflate_buffer may return the original buffer pointer (for
660 for already inflated buffers), so exercise some care in freeing */
662 if ( (img != NULL) && (img != image_buffer) )
663 free(img);
665 return sts;
666 }
668 /*
669 * Local variables:
670 * mode: C
671 * c-set-style: "BSD"
672 * c-basic-offset: 4
673 * tab-width: 4
674 * indent-tabs-mode: nil
675 * End:
676 */