direct-io.hg

view tools/libxc/xc_hvm_build.c @ 10276:b3d901ba705d

Represent PFNs with their own type, rather than 'unsigned long'.
('long' changes size and alignment between 32- and 64-bit ABIs.)
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Jun 06 09:48:17 2006 +0100 (2006-06-06)
parents 414dabe82a31
children f8af7041bf5b
line source
1 /******************************************************************************
2 * xc_hvm_build.c
3 */
5 #define ELFSIZE 32
6 #include <stddef.h>
7 #include "xg_private.h"
8 #include "xc_elf.h"
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <zlib.h>
12 #include <xen/hvm/hvm_info_table.h>
13 #include <xen/hvm/ioreq.h>
15 #define HVM_LOADER_ENTR_ADDR 0x00100000
17 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
18 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
19 #ifdef __x86_64__
20 #define L3_PROT (_PAGE_PRESENT)
21 #endif
23 #define E820MAX 128
25 #define E820_RAM 1
26 #define E820_RESERVED 2
27 #define E820_ACPI 3
28 #define E820_NVS 4
29 #define E820_IO 16
30 #define E820_SHARED_PAGE 17
31 #define E820_XENSTORE 18
33 #define E820_MAP_PAGE 0x00090000
34 #define E820_MAP_NR_OFFSET 0x000001E8
35 #define E820_MAP_OFFSET 0x000002D0
37 struct e820entry {
38 uint64_t addr;
39 uint64_t size;
40 uint32_t type;
41 } __attribute__((packed));
43 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
44 #define round_pgdown(_p) ((_p)&PAGE_MASK)
46 static int
47 parseelfimage(
48 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
49 static int
50 loadelfimage(
51 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
52 struct domain_setup_info *dsi);
54 static unsigned char build_e820map(void *e820_page, unsigned long long mem_size)
55 {
56 struct e820entry *e820entry =
57 (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
58 unsigned char nr_map = 0;
60 /* XXX: Doesn't work for > 4GB yet */
61 e820entry[nr_map].addr = 0x0;
62 e820entry[nr_map].size = 0x9F800;
63 e820entry[nr_map].type = E820_RAM;
64 nr_map++;
66 e820entry[nr_map].addr = 0x9F800;
67 e820entry[nr_map].size = 0x800;
68 e820entry[nr_map].type = E820_RESERVED;
69 nr_map++;
71 e820entry[nr_map].addr = 0xA0000;
72 e820entry[nr_map].size = 0x20000;
73 e820entry[nr_map].type = E820_IO;
74 nr_map++;
76 e820entry[nr_map].addr = 0xF0000;
77 e820entry[nr_map].size = 0x10000;
78 e820entry[nr_map].type = E820_RESERVED;
79 nr_map++;
81 #define STATIC_PAGES 2 /* for ioreq_t and store_mfn */
82 /* Most of the ram goes here */
83 e820entry[nr_map].addr = 0x100000;
84 e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES * PAGE_SIZE;
85 e820entry[nr_map].type = E820_RAM;
86 nr_map++;
88 /* Statically allocated special pages */
90 /* For xenstore */
91 e820entry[nr_map].addr = mem_size - 2 * PAGE_SIZE;
92 e820entry[nr_map].size = PAGE_SIZE;
93 e820entry[nr_map].type = E820_XENSTORE;
94 nr_map++;
96 /* Shared ioreq_t page */
97 e820entry[nr_map].addr = mem_size - PAGE_SIZE;
98 e820entry[nr_map].size = PAGE_SIZE;
99 e820entry[nr_map].type = E820_SHARED_PAGE;
100 nr_map++;
102 e820entry[nr_map].addr = mem_size;
103 e820entry[nr_map].size = 0x3 * PAGE_SIZE;
104 e820entry[nr_map].type = E820_NVS;
105 nr_map++;
107 e820entry[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
108 e820entry[nr_map].size = 0xA * PAGE_SIZE;
109 e820entry[nr_map].type = E820_ACPI;
110 nr_map++;
112 e820entry[nr_map].addr = 0xFEC00000;
113 e820entry[nr_map].size = 0x1400000;
114 e820entry[nr_map].type = E820_IO;
115 nr_map++;
117 return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map);
118 }
120 static void set_hvm_info_checksum(struct hvm_info_table *t)
121 {
122 uint8_t *ptr = (uint8_t *)t, sum = 0;
123 unsigned int i;
125 t->checksum = 0;
127 for (i = 0; i < t->length; i++)
128 sum += *ptr++;
130 t->checksum = -sum;
131 }
133 /*
134 * Use E820 reserved memory 0x9F800 to pass HVM info to hvmloader
135 * hvmloader will use this info to set BIOS accordingly
136 */
137 static int set_hvm_info(int xc_handle, uint32_t dom,
138 xen_pfn_t *pfn_list, unsigned int vcpus,
139 unsigned int pae, unsigned int acpi, unsigned int apic)
140 {
141 char *va_map;
142 struct hvm_info_table *va_hvm;
144 va_map = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
145 PROT_READ | PROT_WRITE,
146 pfn_list[HVM_INFO_PFN]);
148 if ( va_map == NULL )
149 return -1;
151 va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
152 memset(va_hvm, 0, sizeof(*va_hvm));
154 strncpy(va_hvm->signature, "HVM INFO", 8);
155 va_hvm->length = sizeof(struct hvm_info_table);
156 va_hvm->acpi_enabled = acpi;
157 va_hvm->apic_enabled = apic;
158 va_hvm->pae_enabled = pae;
159 va_hvm->nr_vcpus = vcpus;
161 set_hvm_info_checksum(va_hvm);
163 munmap(va_map, PAGE_SIZE);
165 return 0;
166 }
168 static int setup_guest(int xc_handle,
169 uint32_t dom, int memsize,
170 char *image, unsigned long image_size,
171 unsigned long nr_pages,
172 vcpu_guest_context_t *ctxt,
173 unsigned long shared_info_frame,
174 unsigned int vcpus,
175 unsigned int pae,
176 unsigned int acpi,
177 unsigned int apic,
178 unsigned int store_evtchn,
179 unsigned long *store_mfn)
180 {
181 xen_pfn_t *page_array = NULL;
182 unsigned long count, i;
183 unsigned long long ptr;
184 xc_mmu_t *mmu = NULL;
186 shared_info_t *shared_info;
187 void *e820_page;
188 unsigned char e820_map_nr;
190 struct domain_setup_info dsi;
191 unsigned long long v_end;
193 unsigned long shared_page_frame = 0;
194 shared_iopage_t *sp;
196 memset(&dsi, 0, sizeof(struct domain_setup_info));
198 if ( (parseelfimage(image, image_size, &dsi)) != 0 )
199 goto error_out;
201 if ( (dsi.v_kernstart & (PAGE_SIZE - 1)) != 0 )
202 {
203 PERROR("Guest OS must load to a page boundary.\n");
204 goto error_out;
205 }
207 /* memsize is in megabytes */
208 v_end = (unsigned long long)memsize << 20;
210 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
211 " Loaded HVM loader: %08lx->%08lx\n"
212 " TOTAL: %08lx->%016llx\n",
213 dsi.v_kernstart, dsi.v_kernend,
214 dsi.v_start, v_end);
215 printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
217 if ( (v_end - dsi.v_start) > ((unsigned long long)nr_pages << PAGE_SHIFT) )
218 {
219 PERROR("Initial guest OS requires too much space: "
220 "(%lluMB is greater than %lluMB limit)\n",
221 (unsigned long long)(v_end - dsi.v_start) >> 20,
222 ((unsigned long long)nr_pages << PAGE_SHIFT) >> 20);
223 goto error_out;
224 }
226 if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
227 {
228 PERROR("Could not allocate memory.\n");
229 goto error_out;
230 }
232 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
233 {
234 PERROR("Could not get the page frame list.\n");
235 goto error_out;
236 }
238 loadelfimage(image, xc_handle, dom, page_array, &dsi);
240 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
241 goto error_out;
243 /* Write the machine->phys table entries. */
244 for ( count = 0; count < nr_pages; count++ )
245 {
246 ptr = (unsigned long long)page_array[count] << PAGE_SHIFT;
247 if ( xc_add_mmu_update(xc_handle, mmu,
248 ptr | MMU_MACHPHYS_UPDATE, count) )
249 goto error_out;
250 }
252 if ( set_hvm_info(xc_handle, dom, page_array, vcpus, pae, acpi, apic) )
253 {
254 ERROR("Couldn't set hvm info for HVM guest.\n");
255 goto error_out;
256 }
258 if ( (e820_page = xc_map_foreign_range(
259 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
260 page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
261 goto error_out;
262 memset(e820_page, 0, PAGE_SIZE);
263 e820_map_nr = build_e820map(e820_page, v_end);
264 munmap(e820_page, PAGE_SIZE);
266 /* shared_info page starts its life empty. */
267 if ( (shared_info = xc_map_foreign_range(
268 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
269 shared_info_frame)) == 0 )
270 goto error_out;
271 memset(shared_info, 0, sizeof(shared_info_t));
272 /* Mask all upcalls... */
273 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
274 shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
275 munmap(shared_info, PAGE_SIZE);
277 /* Populate the event channel port in the shared page */
278 shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
279 if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
280 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
281 shared_page_frame)) == 0 )
282 goto error_out;
283 memset(sp, 0, PAGE_SIZE);
285 /* FIXME: how about if we overflow the page here? */
286 for ( i = 0; i < vcpus; i++ ) {
287 unsigned int vp_eport;
289 vp_eport = xc_evtchn_alloc_unbound(xc_handle, dom, 0);
290 if ( vp_eport < 0 ) {
291 PERROR("Couldn't get unbound port from VMX guest.\n");
292 goto error_out;
293 }
294 sp->vcpu_iodata[i].vp_eport = vp_eport;
295 }
297 munmap(sp, PAGE_SIZE);
299 *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
300 if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
301 goto error_out;
303 /* Send the page update requests down to the hypervisor. */
304 if ( xc_finish_mmu_updates(xc_handle, mmu) )
305 goto error_out;
307 free(mmu);
308 free(page_array);
310 /*
311 * Initial register values:
312 */
313 ctxt->user_regs.ds = 0;
314 ctxt->user_regs.es = 0;
315 ctxt->user_regs.fs = 0;
316 ctxt->user_regs.gs = 0;
317 ctxt->user_regs.ss = 0;
318 ctxt->user_regs.cs = 0;
319 ctxt->user_regs.eip = dsi.v_kernentry;
320 ctxt->user_regs.edx = 0;
321 ctxt->user_regs.eax = 0;
322 ctxt->user_regs.esp = 0;
323 ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */
324 ctxt->user_regs.ecx = 0;
325 ctxt->user_regs.esi = 0;
326 ctxt->user_regs.edi = 0;
327 ctxt->user_regs.ebp = 0;
329 ctxt->user_regs.eflags = 0;
331 return 0;
333 error_out:
334 free(mmu);
335 free(page_array);
336 return -1;
337 }
339 static int xc_hvm_build_internal(int xc_handle,
340 uint32_t domid,
341 int memsize,
342 char *image,
343 unsigned long image_size,
344 unsigned int vcpus,
345 unsigned int pae,
346 unsigned int acpi,
347 unsigned int apic,
348 unsigned int store_evtchn,
349 unsigned long *store_mfn)
350 {
351 dom0_op_t launch_op, op;
352 int rc, i;
353 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
354 unsigned long nr_pages;
355 xen_capabilities_info_t xen_caps;
357 if ( (image == NULL) || (image_size == 0) )
358 {
359 ERROR("Image required");
360 goto error_out;
361 }
363 if ( (rc = xc_version(xc_handle, XENVER_capabilities, &xen_caps)) != 0 )
364 {
365 PERROR("Failed to get xen version info");
366 goto error_out;
367 }
369 if ( !strstr(xen_caps, "hvm") )
370 {
371 PERROR("CPU doesn't support HVM extensions or "
372 "the extensions are not enabled");
373 goto error_out;
374 }
376 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
377 {
378 PERROR("Could not find total pages for domain");
379 goto error_out;
380 }
382 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
383 {
384 PERROR("%s: ctxt mlock failed", __func__);
385 return 1;
386 }
388 op.cmd = DOM0_GETDOMAININFO;
389 op.u.getdomaininfo.domain = (domid_t)domid;
390 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
391 ((uint16_t)op.u.getdomaininfo.domain != domid) )
392 {
393 PERROR("Could not get info on domain");
394 goto error_out;
395 }
397 memset(ctxt, 0, sizeof(*ctxt));
399 ctxt->flags = VGCF_HVM_GUEST;
400 if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
401 ctxt, op.u.getdomaininfo.shared_info_frame,
402 vcpus, pae, acpi, apic, store_evtchn, store_mfn) < 0)
403 {
404 ERROR("Error constructing guest OS");
405 goto error_out;
406 }
408 /* FPU is set up to default initial state. */
409 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
411 /* Virtual IDT is empty at start-of-day. */
412 for ( i = 0; i < 256; i++ )
413 {
414 ctxt->trap_ctxt[i].vector = i;
415 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
416 }
418 /* No LDT. */
419 ctxt->ldt_ents = 0;
421 /* Use the default Xen-provided GDT. */
422 ctxt->gdt_ents = 0;
424 /* No debugging. */
425 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
427 /* No callback handlers. */
428 #if defined(__i386__)
429 ctxt->event_callback_cs = FLAT_KERNEL_CS;
430 ctxt->event_callback_eip = 0;
431 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
432 ctxt->failsafe_callback_eip = 0;
433 #elif defined(__x86_64__)
434 ctxt->event_callback_eip = 0;
435 ctxt->failsafe_callback_eip = 0;
436 ctxt->syscall_callback_eip = 0;
437 #endif
439 memset( &launch_op, 0, sizeof(launch_op) );
441 launch_op.u.setvcpucontext.domain = (domid_t)domid;
442 launch_op.u.setvcpucontext.vcpu = 0;
443 set_xen_guest_handle(launch_op.u.setvcpucontext.ctxt, ctxt);
445 launch_op.cmd = DOM0_SETVCPUCONTEXT;
446 rc = xc_dom0_op(xc_handle, &launch_op);
448 return rc;
450 error_out:
451 return -1;
452 }
454 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
455 {
456 return ((phdr->p_type == PT_LOAD) &&
457 ((phdr->p_flags & (PF_W|PF_X)) != 0));
458 }
460 static int parseelfimage(char *elfbase,
461 unsigned long elfsize,
462 struct domain_setup_info *dsi)
463 {
464 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
465 Elf32_Phdr *phdr;
466 Elf32_Shdr *shdr;
467 unsigned long kernstart = ~0UL, kernend=0UL;
468 char *shstrtab;
469 int h;
471 if ( !IS_ELF(*ehdr) )
472 {
473 ERROR("Kernel image does not have an ELF header.");
474 return -EINVAL;
475 }
477 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
478 {
479 ERROR("ELF program headers extend beyond end of image.");
480 return -EINVAL;
481 }
483 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
484 {
485 ERROR("ELF section headers extend beyond end of image.");
486 return -EINVAL;
487 }
489 /* Find the section-header strings table. */
490 if ( ehdr->e_shstrndx == SHN_UNDEF )
491 {
492 ERROR("ELF image has no section-header strings table (shstrtab).");
493 return -EINVAL;
494 }
495 shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
496 (ehdr->e_shstrndx*ehdr->e_shentsize));
497 shstrtab = elfbase + shdr->sh_offset;
499 for ( h = 0; h < ehdr->e_phnum; h++ )
500 {
501 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
502 if ( !is_loadable_phdr(phdr) )
503 continue;
504 if ( phdr->p_paddr < kernstart )
505 kernstart = phdr->p_paddr;
506 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
507 kernend = phdr->p_paddr + phdr->p_memsz;
508 }
510 if ( (kernstart > kernend) ||
511 (ehdr->e_entry < kernstart) ||
512 (ehdr->e_entry > kernend) )
513 {
514 ERROR("Malformed ELF image.");
515 return -EINVAL;
516 }
518 dsi->v_start = 0x00000000;
520 dsi->v_kernstart = kernstart;
521 dsi->v_kernend = kernend;
522 dsi->v_kernentry = HVM_LOADER_ENTR_ADDR;
524 dsi->v_end = dsi->v_kernend;
526 return 0;
527 }
529 static int
530 loadelfimage(
531 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
532 struct domain_setup_info *dsi)
533 {
534 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
535 Elf32_Phdr *phdr;
536 int h;
538 char *va;
539 unsigned long pa, done, chunksz;
541 for ( h = 0; h < ehdr->e_phnum; h++ )
542 {
543 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
544 if ( !is_loadable_phdr(phdr) )
545 continue;
547 for ( done = 0; done < phdr->p_filesz; done += chunksz )
548 {
549 pa = (phdr->p_paddr + done) - dsi->v_start;
550 if ((va = xc_map_foreign_range(
551 xch, dom, PAGE_SIZE, PROT_WRITE,
552 parray[pa >> PAGE_SHIFT])) == 0)
553 return -1;
554 chunksz = phdr->p_filesz - done;
555 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
556 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
557 memcpy(va + (pa & (PAGE_SIZE-1)),
558 elfbase + phdr->p_offset + done, chunksz);
559 munmap(va, PAGE_SIZE);
560 }
562 for ( ; done < phdr->p_memsz; done += chunksz )
563 {
564 pa = (phdr->p_paddr + done) - dsi->v_start;
565 if ((va = xc_map_foreign_range(
566 xch, dom, PAGE_SIZE, PROT_WRITE,
567 parray[pa >> PAGE_SHIFT])) == 0)
568 return -1;
569 chunksz = phdr->p_memsz - done;
570 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
571 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
572 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
573 munmap(va, PAGE_SIZE);
574 }
575 }
577 return 0;
578 }
580 /* xc_hvm_build
581 *
582 * Create a domain for a virtualized Linux, using files/filenames
583 *
584 */
586 int xc_hvm_build(int xc_handle,
587 uint32_t domid,
588 int memsize,
589 const char *image_name,
590 unsigned int vcpus,
591 unsigned int pae,
592 unsigned int acpi,
593 unsigned int apic,
594 unsigned int store_evtchn,
595 unsigned long *store_mfn)
596 {
597 char *image;
598 int sts;
599 unsigned long image_size;
601 if ( (image_name == NULL) ||
602 ((image = xc_read_image(image_name, &image_size)) == NULL) )
603 return -1;
605 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
606 image, image_size,
607 vcpus, pae, acpi, apic,
608 store_evtchn, store_mfn);
610 free(image);
612 return sts;
613 }
615 /* xc_hvm_build_mem
616 *
617 * Create a domain for a virtualized Linux, using buffers
618 *
619 */
621 int xc_hvm_build_mem(int xc_handle,
622 uint32_t domid,
623 int memsize,
624 const char *image_buffer,
625 unsigned long image_size,
626 unsigned int vcpus,
627 unsigned int pae,
628 unsigned int acpi,
629 unsigned int apic,
630 unsigned int store_evtchn,
631 unsigned long *store_mfn)
632 {
633 int sts;
634 unsigned long img_len;
635 char *img;
637 /* Validate that there is a kernel buffer */
639 if ( (image_buffer == NULL) || (image_size == 0) )
640 {
641 ERROR("kernel image buffer not present");
642 return -1;
643 }
645 img = xc_inflate_buffer(image_buffer, image_size, &img_len);
646 if (img == NULL)
647 {
648 ERROR("unable to inflate ram disk buffer");
649 return -1;
650 }
652 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
653 img, img_len,
654 vcpus, pae, acpi, apic,
655 store_evtchn, store_mfn);
657 /* xc_inflate_buffer may return the original buffer pointer (for
658 for already inflated buffers), so exercise some care in freeing */
660 if ( (img != NULL) && (img != image_buffer) )
661 free(img);
663 return sts;
664 }
666 /*
667 * Local variables:
668 * mode: C
669 * c-set-style: "BSD"
670 * c-basic-offset: 4
671 * tab-width: 4
672 * indent-tabs-mode: nil
673 * End:
674 */