ia64/xen-unstable

view tools/libxc/xc_hvm_build.c @ 12226:45e34f00a78f

[HVM] Clean up VCPU initialisation in Xen. No longer
parse HVM e820 tables in Xen (add some extra HVM parameters as a
cleaner alternative). Lots of code removal.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Nov 02 15:55:51 2006 +0000 (2006-11-02)
parents cf3d69ba5633
children cfb1136ee8f7
line source
1 /******************************************************************************
2 * xc_hvm_build.c
3 */
5 #define ELFSIZE 32
6 #include <stddef.h>
7 #include <inttypes.h>
8 #include "xg_private.h"
9 #include "xc_private.h"
10 #include "xc_elf.h"
11 #include <stdlib.h>
12 #include <unistd.h>
13 #include <zlib.h>
14 #include <xen/hvm/hvm_info_table.h>
15 #include <xen/hvm/params.h>
16 #include <xen/hvm/e820.h>
18 #define HVM_LOADER_ENTR_ADDR 0x00100000
19 static int
20 parseelfimage(
21 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
22 static int
23 loadelfimage(
24 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
25 struct domain_setup_info *dsi);
27 static void xc_set_hvm_param(int handle,
28 domid_t dom, int param, unsigned long value)
29 {
30 DECLARE_HYPERCALL;
31 xen_hvm_param_t arg;
32 int rc;
34 hypercall.op = __HYPERVISOR_hvm_op;
35 hypercall.arg[0] = HVMOP_set_param;
36 hypercall.arg[1] = (unsigned long)&arg;
37 arg.domid = dom;
38 arg.index = param;
39 arg.value = value;
40 if ( lock_pages(&arg, sizeof(arg)) != 0 )
41 {
42 PERROR("Could not lock memory for set parameter");
43 return;
44 }
45 rc = do_xen_hypercall(handle, &hypercall);
46 unlock_pages(&arg, sizeof(arg));
47 if (rc < 0)
48 PERROR("set HVM parameter failed (%d)", rc);
49 }
51 static void build_e820map(void *e820_page, unsigned long long mem_size)
52 {
53 struct e820entry *e820entry =
54 (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
55 unsigned long long extra_mem_size = 0;
56 unsigned char nr_map = 0;
58 /*
59 * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
60 * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
61 * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
62 */
63 if ( mem_size > HVM_BELOW_4G_RAM_END )
64 {
65 extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
66 mem_size = HVM_BELOW_4G_RAM_END;
67 }
69 e820entry[nr_map].addr = 0x0;
70 e820entry[nr_map].size = 0x9F000;
71 e820entry[nr_map].type = E820_RAM;
72 nr_map++;
74 e820entry[nr_map].addr = 0x9F000;
75 e820entry[nr_map].size = 0x1000;
76 e820entry[nr_map].type = E820_RESERVED;
77 nr_map++;
79 e820entry[nr_map].addr = 0xEA000;
80 e820entry[nr_map].size = 0x01000;
81 e820entry[nr_map].type = E820_ACPI;
82 nr_map++;
84 e820entry[nr_map].addr = 0xF0000;
85 e820entry[nr_map].size = 0x10000;
86 e820entry[nr_map].type = E820_RESERVED;
87 nr_map++;
89 /* Low RAM goes here. Remove 3 pages for ioreq, bufioreq, and xenstore. */
90 e820entry[nr_map].addr = 0x100000;
91 e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE * 3;
92 e820entry[nr_map].type = E820_RAM;
93 nr_map++;
95 if ( extra_mem_size )
96 {
97 e820entry[nr_map].addr = (1ULL << 32);
98 e820entry[nr_map].size = extra_mem_size;
99 e820entry[nr_map].type = E820_RAM;
100 nr_map++;
101 }
103 *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map;
104 }
106 static void set_hvm_info_checksum(struct hvm_info_table *t)
107 {
108 uint8_t *ptr = (uint8_t *)t, sum = 0;
109 unsigned int i;
111 t->checksum = 0;
113 for (i = 0; i < t->length; i++)
114 sum += *ptr++;
116 t->checksum = -sum;
117 }
119 /*
120 * Use E820 reserved memory 0x9F800 to pass HVM info to hvmloader
121 * hvmloader will use this info to set BIOS accordingly
122 */
123 static int set_hvm_info(int xc_handle, uint32_t dom,
124 xen_pfn_t *pfn_list, unsigned int vcpus,
125 unsigned int acpi)
126 {
127 char *va_map;
128 struct hvm_info_table *va_hvm;
130 va_map = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
131 PROT_READ | PROT_WRITE,
132 pfn_list[HVM_INFO_PFN]);
134 if ( va_map == NULL )
135 return -1;
137 va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
138 memset(va_hvm, 0, sizeof(*va_hvm));
140 strncpy(va_hvm->signature, "HVM INFO", 8);
141 va_hvm->length = sizeof(struct hvm_info_table);
142 va_hvm->acpi_enabled = acpi;
143 va_hvm->nr_vcpus = vcpus;
145 set_hvm_info_checksum(va_hvm);
147 munmap(va_map, PAGE_SIZE);
149 return 0;
150 }
152 static int setup_guest(int xc_handle,
153 uint32_t dom, int memsize,
154 char *image, unsigned long image_size,
155 vcpu_guest_context_t *ctxt,
156 unsigned long shared_info_frame,
157 unsigned int vcpus,
158 unsigned int pae,
159 unsigned int acpi,
160 unsigned int apic,
161 unsigned int store_evtchn,
162 unsigned long *store_mfn)
163 {
164 xen_pfn_t *page_array = NULL;
165 unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
166 unsigned long shared_page_nr;
167 shared_info_t *shared_info;
168 void *e820_page;
169 struct domain_setup_info dsi;
170 uint64_t v_end;
171 int rc;
173 memset(&dsi, 0, sizeof(struct domain_setup_info));
175 if ( (parseelfimage(image, image_size, &dsi)) != 0 )
176 goto error_out;
178 if ( (dsi.v_kernstart & (PAGE_SIZE - 1)) != 0 )
179 {
180 PERROR("Guest OS must load to a page boundary.\n");
181 goto error_out;
182 }
184 v_end = (unsigned long long)memsize << 20;
186 IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
187 " Loaded HVM loader: %016"PRIx64"->%016"PRIx64"\n"
188 " TOTAL: %016"PRIx64"->%016"PRIx64"\n",
189 dsi.v_kernstart, dsi.v_kernend,
190 dsi.v_start, v_end);
191 IPRINTF(" ENTRY ADDRESS: %016"PRIx64"\n", dsi.v_kernentry);
193 if ( (v_end - dsi.v_start) > ((unsigned long long)nr_pages << PAGE_SHIFT) )
194 {
195 PERROR("Initial guest OS requires too much space: "
196 "(%lluMB is greater than %lluMB limit)\n",
197 (unsigned long long)(v_end - dsi.v_start) >> 20,
198 ((unsigned long long)nr_pages << PAGE_SHIFT) >> 20);
199 goto error_out;
200 }
202 if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
203 {
204 PERROR("Could not allocate memory.\n");
205 goto error_out;
206 }
208 for ( i = 0; i < nr_pages; i++ )
209 page_array[i] = i;
210 for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
211 page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
213 /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
214 rc = xc_domain_memory_populate_physmap(
215 xc_handle, dom, (nr_pages > 0xa0) ? 0xa0 : nr_pages,
216 0, 0, &page_array[0x00]);
217 if ( (rc == 0) && (nr_pages > 0xc0) )
218 rc = xc_domain_memory_populate_physmap(
219 xc_handle, dom, nr_pages - 0xc0, 0, 0, &page_array[0xc0]);
220 if ( rc != 0 )
221 {
222 PERROR("Could not allocate memory for HVM guest.\n");
223 goto error_out;
224 }
226 if ( (nr_pages > 0xa0) &&
227 xc_domain_memory_decrease_reservation(
228 xc_handle, dom, (nr_pages < 0xc0) ? (nr_pages - 0xa0) : 0x20,
229 0, &page_array[0xa0]) )
230 {
231 PERROR("Could not free VGA hole.\n");
232 goto error_out;
233 }
235 if ( xc_domain_translate_gpfn_list(xc_handle, dom, nr_pages,
236 page_array, page_array) )
237 {
238 PERROR("Could not translate addresses of HVM guest.\n");
239 goto error_out;
240 }
242 loadelfimage(image, xc_handle, dom, page_array, &dsi);
244 if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi) )
245 {
246 ERROR("Couldn't set hvm info for HVM guest.\n");
247 goto error_out;
248 }
250 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
251 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);
253 if ( (e820_page = xc_map_foreign_range(
254 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
255 page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == NULL )
256 goto error_out;
257 memset(e820_page, 0, PAGE_SIZE);
258 build_e820map(e820_page, v_end);
259 munmap(e820_page, PAGE_SIZE);
261 /* shared_info page starts its life empty. */
262 if ( (shared_info = xc_map_foreign_range(
263 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
264 shared_info_frame)) == NULL )
265 goto error_out;
266 memset(shared_info, 0, PAGE_SIZE);
267 /* Mask all upcalls... */
268 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
269 shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
270 memset(&shared_info->evtchn_mask[0], 0xff,
271 sizeof(shared_info->evtchn_mask));
272 munmap(shared_info, PAGE_SIZE);
274 if ( v_end > HVM_BELOW_4G_RAM_END )
275 shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
276 else
277 shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
279 /* Paranoia: clean pages. */
280 if ( xc_clear_domain_page(xc_handle, dom, page_array[shared_page_nr]) ||
281 xc_clear_domain_page(xc_handle, dom, page_array[shared_page_nr-1]) ||
282 xc_clear_domain_page(xc_handle, dom, page_array[shared_page_nr-2]) )
283 goto error_out;
285 *store_mfn = page_array[shared_page_nr - 1];
286 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
287 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
288 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
289 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
291 free(page_array);
293 ctxt->user_regs.eip = dsi.v_kernentry;
295 return 0;
297 error_out:
298 free(page_array);
299 return -1;
300 }
302 static int xc_hvm_build_internal(int xc_handle,
303 uint32_t domid,
304 int memsize,
305 char *image,
306 unsigned long image_size,
307 unsigned int vcpus,
308 unsigned int pae,
309 unsigned int acpi,
310 unsigned int apic,
311 unsigned int store_evtchn,
312 unsigned long *store_mfn)
313 {
314 struct xen_domctl launch_domctl, domctl;
315 int rc, i;
316 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
318 if ( (image == NULL) || (image_size == 0) )
319 {
320 ERROR("Image required");
321 goto error_out;
322 }
324 if ( lock_pages(&st_ctxt, sizeof(st_ctxt) ) )
325 {
326 PERROR("%s: ctxt mlock failed", __func__);
327 return 1;
328 }
330 domctl.cmd = XEN_DOMCTL_getdomaininfo;
331 domctl.domain = (domid_t)domid;
332 if ( (xc_domctl(xc_handle, &domctl) < 0) ||
333 ((uint16_t)domctl.domain != domid) )
334 {
335 PERROR("Could not get info on domain");
336 goto error_out;
337 }
339 memset(ctxt, 0, sizeof(*ctxt));
340 ctxt->flags = VGCF_HVM_GUEST;
342 if ( setup_guest(xc_handle, domid, memsize, image, image_size,
343 ctxt, domctl.u.getdomaininfo.shared_info_frame,
344 vcpus, pae, acpi, apic, store_evtchn, store_mfn) < 0)
345 {
346 ERROR("Error constructing guest OS");
347 goto error_out;
348 }
350 /* FPU is set up to default initial state. */
351 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
353 /* Virtual IDT is empty at start-of-day. */
354 for ( i = 0; i < 256; i++ )
355 {
356 ctxt->trap_ctxt[i].vector = i;
357 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
358 }
360 /* No LDT. */
361 ctxt->ldt_ents = 0;
363 /* Use the default Xen-provided GDT. */
364 ctxt->gdt_ents = 0;
366 /* No debugging. */
367 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
369 /* No callback handlers. */
370 #if defined(__i386__)
371 ctxt->event_callback_cs = FLAT_KERNEL_CS;
372 ctxt->event_callback_eip = 0;
373 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
374 ctxt->failsafe_callback_eip = 0;
375 #elif defined(__x86_64__)
376 ctxt->event_callback_eip = 0;
377 ctxt->failsafe_callback_eip = 0;
378 ctxt->syscall_callback_eip = 0;
379 #endif
381 memset(&launch_domctl, 0, sizeof(launch_domctl));
383 launch_domctl.domain = (domid_t)domid;
384 launch_domctl.u.vcpucontext.vcpu = 0;
385 set_xen_guest_handle(launch_domctl.u.vcpucontext.ctxt, ctxt);
387 launch_domctl.cmd = XEN_DOMCTL_setvcpucontext;
388 rc = xc_domctl(xc_handle, &launch_domctl);
390 return rc;
392 error_out:
393 return -1;
394 }
396 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
397 {
398 return ((phdr->p_type == PT_LOAD) &&
399 ((phdr->p_flags & (PF_W|PF_X)) != 0));
400 }
402 static int parseelfimage(char *elfbase,
403 unsigned long elfsize,
404 struct domain_setup_info *dsi)
405 {
406 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
407 Elf32_Phdr *phdr;
408 Elf32_Shdr *shdr;
409 unsigned long kernstart = ~0UL, kernend=0UL;
410 char *shstrtab;
411 int h;
413 if ( !IS_ELF(*ehdr) )
414 {
415 ERROR("Kernel image does not have an ELF header.");
416 return -EINVAL;
417 }
419 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
420 {
421 ERROR("ELF program headers extend beyond end of image.");
422 return -EINVAL;
423 }
425 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
426 {
427 ERROR("ELF section headers extend beyond end of image.");
428 return -EINVAL;
429 }
431 /* Find the section-header strings table. */
432 if ( ehdr->e_shstrndx == SHN_UNDEF )
433 {
434 ERROR("ELF image has no section-header strings table (shstrtab).");
435 return -EINVAL;
436 }
437 shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
438 (ehdr->e_shstrndx*ehdr->e_shentsize));
439 shstrtab = elfbase + shdr->sh_offset;
441 for ( h = 0; h < ehdr->e_phnum; h++ )
442 {
443 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
444 if ( !is_loadable_phdr(phdr) )
445 continue;
446 if ( phdr->p_paddr < kernstart )
447 kernstart = phdr->p_paddr;
448 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
449 kernend = phdr->p_paddr + phdr->p_memsz;
450 }
452 if ( (kernstart > kernend) ||
453 (ehdr->e_entry < kernstart) ||
454 (ehdr->e_entry > kernend) )
455 {
456 ERROR("Malformed ELF image.");
457 return -EINVAL;
458 }
460 dsi->v_start = 0x00000000;
462 dsi->v_kernstart = kernstart;
463 dsi->v_kernend = kernend;
464 dsi->v_kernentry = HVM_LOADER_ENTR_ADDR;
466 dsi->v_end = dsi->v_kernend;
468 return 0;
469 }
471 static int
472 loadelfimage(
473 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
474 struct domain_setup_info *dsi)
475 {
476 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
477 Elf32_Phdr *phdr;
478 int h;
480 char *va;
481 unsigned long pa, done, chunksz;
483 for ( h = 0; h < ehdr->e_phnum; h++ )
484 {
485 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
486 if ( !is_loadable_phdr(phdr) )
487 continue;
489 for ( done = 0; done < phdr->p_filesz; done += chunksz )
490 {
491 pa = (phdr->p_paddr + done) - dsi->v_start;
492 if ((va = xc_map_foreign_range(
493 xch, dom, PAGE_SIZE, PROT_WRITE,
494 parray[pa >> PAGE_SHIFT])) == 0)
495 return -1;
496 chunksz = phdr->p_filesz - done;
497 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
498 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
499 memcpy(va + (pa & (PAGE_SIZE-1)),
500 elfbase + phdr->p_offset + done, chunksz);
501 munmap(va, PAGE_SIZE);
502 }
504 for ( ; done < phdr->p_memsz; done += chunksz )
505 {
506 pa = (phdr->p_paddr + done) - dsi->v_start;
507 if ((va = xc_map_foreign_range(
508 xch, dom, PAGE_SIZE, PROT_WRITE,
509 parray[pa >> PAGE_SHIFT])) == 0)
510 return -1;
511 chunksz = phdr->p_memsz - done;
512 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
513 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
514 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
515 munmap(va, PAGE_SIZE);
516 }
517 }
519 return 0;
520 }
522 /* xc_hvm_build
523 *
524 * Create a domain for a virtualized Linux, using files/filenames
525 *
526 */
528 int xc_hvm_build(int xc_handle,
529 uint32_t domid,
530 int memsize,
531 const char *image_name,
532 unsigned int vcpus,
533 unsigned int pae,
534 unsigned int acpi,
535 unsigned int apic,
536 unsigned int store_evtchn,
537 unsigned long *store_mfn)
538 {
539 char *image;
540 int sts;
541 unsigned long image_size;
543 if ( (image_name == NULL) ||
544 ((image = xc_read_image(image_name, &image_size)) == NULL) )
545 return -1;
547 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
548 image, image_size,
549 vcpus, pae, acpi, apic,
550 store_evtchn, store_mfn);
552 free(image);
554 return sts;
555 }
557 /* xc_hvm_build_mem
558 *
559 * Create a domain for a virtualized Linux, using buffers
560 *
561 */
563 int xc_hvm_build_mem(int xc_handle,
564 uint32_t domid,
565 int memsize,
566 const char *image_buffer,
567 unsigned long image_size,
568 unsigned int vcpus,
569 unsigned int pae,
570 unsigned int acpi,
571 unsigned int apic,
572 unsigned int store_evtchn,
573 unsigned long *store_mfn)
574 {
575 int sts;
576 unsigned long img_len;
577 char *img;
579 /* Validate that there is a kernel buffer */
581 if ( (image_buffer == NULL) || (image_size == 0) )
582 {
583 ERROR("kernel image buffer not present");
584 return -1;
585 }
587 img = xc_inflate_buffer(image_buffer, image_size, &img_len);
588 if (img == NULL)
589 {
590 ERROR("unable to inflate ram disk buffer");
591 return -1;
592 }
594 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
595 img, img_len,
596 vcpus, pae, acpi, apic,
597 store_evtchn, store_mfn);
599 /* xc_inflate_buffer may return the original buffer pointer (for
600 for already inflated buffers), so exercise some care in freeing */
602 if ( (img != NULL) && (img != image_buffer) )
603 free(img);
605 return sts;
606 }
608 /*
609 * Local variables:
610 * mode: C
611 * c-set-style: "BSD"
612 * c-basic-offset: 4
613 * tab-width: 4
614 * indent-tabs-mode: nil
615 * End:
616 */