direct-io.hg

view tools/libxc/xc_hvm_build.c @ 12308:3280d6211213

[HVM] Reserve 2 pages for ACPI tables in e820.
This is required when appending SSDTs to the core
ACPI data.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Nov 08 19:48:46 2006 +0000 (2006-11-08)
parents 5a41ea282c86
children ddc4bc24f07c
line source
1 /******************************************************************************
2 * xc_hvm_build.c
3 */
5 #define ELFSIZE 32
6 #include <stddef.h>
7 #include <inttypes.h>
8 #include "xg_private.h"
9 #include "xc_private.h"
10 #include "xc_elf.h"
11 #include <stdlib.h>
12 #include <unistd.h>
13 #include <zlib.h>
14 #include <xen/hvm/hvm_info_table.h>
15 #include <xen/hvm/params.h>
16 #include <xen/hvm/e820.h>
18 #define HVM_LOADER_ENTR_ADDR 0x00100000
19 static int
20 parseelfimage(
21 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
22 static int
23 loadelfimage(
24 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
25 struct domain_setup_info *dsi);
27 static void xc_set_hvm_param(int handle,
28 domid_t dom, int param, unsigned long value)
29 {
30 DECLARE_HYPERCALL;
31 xen_hvm_param_t arg;
32 int rc;
34 hypercall.op = __HYPERVISOR_hvm_op;
35 hypercall.arg[0] = HVMOP_set_param;
36 hypercall.arg[1] = (unsigned long)&arg;
37 arg.domid = dom;
38 arg.index = param;
39 arg.value = value;
40 if ( lock_pages(&arg, sizeof(arg)) != 0 )
41 {
42 PERROR("Could not lock memory for set parameter");
43 return;
44 }
45 rc = do_xen_hypercall(handle, &hypercall);
46 unlock_pages(&arg, sizeof(arg));
47 if (rc < 0)
48 PERROR("set HVM parameter failed (%d)", rc);
49 }
51 static void build_e820map(void *e820_page, unsigned long long mem_size)
52 {
53 struct e820entry *e820entry =
54 (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
55 unsigned long long extra_mem_size = 0;
56 unsigned char nr_map = 0;
58 /*
59 * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
60 * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
61 * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
62 */
63 if ( mem_size > HVM_BELOW_4G_RAM_END )
64 {
65 extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
66 mem_size = HVM_BELOW_4G_RAM_END;
67 }
69 e820entry[nr_map].addr = 0x0;
70 e820entry[nr_map].size = 0x9F000;
71 e820entry[nr_map].type = E820_RAM;
72 nr_map++;
74 e820entry[nr_map].addr = 0x9F000;
75 e820entry[nr_map].size = 0x1000;
76 e820entry[nr_map].type = E820_RESERVED;
77 nr_map++;
79 e820entry[nr_map].addr = 0xEA000;
80 e820entry[nr_map].size = 0x02000;
81 e820entry[nr_map].type = E820_ACPI;
82 nr_map++;
84 e820entry[nr_map].addr = 0xF0000;
85 e820entry[nr_map].size = 0x10000;
86 e820entry[nr_map].type = E820_RESERVED;
87 nr_map++;
89 /* Low RAM goes here. Remove 3 pages for ioreq, bufioreq, and xenstore. */
90 e820entry[nr_map].addr = 0x100000;
91 e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE * 3;
92 e820entry[nr_map].type = E820_RAM;
93 nr_map++;
95 if ( extra_mem_size )
96 {
97 e820entry[nr_map].addr = (1ULL << 32);
98 e820entry[nr_map].size = extra_mem_size;
99 e820entry[nr_map].type = E820_RAM;
100 nr_map++;
101 }
103 *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map;
104 }
106 static void set_hvm_info_checksum(struct hvm_info_table *t)
107 {
108 uint8_t *ptr = (uint8_t *)t, sum = 0;
109 unsigned int i;
111 t->checksum = 0;
113 for (i = 0; i < t->length; i++)
114 sum += *ptr++;
116 t->checksum = -sum;
117 }
119 /*
120 * Use E820 reserved memory 0x9F800 to pass HVM info to hvmloader
121 * hvmloader will use this info to set BIOS accordingly
122 */
123 static int set_hvm_info(int xc_handle, uint32_t dom,
124 xen_pfn_t *pfn_list, unsigned int vcpus,
125 unsigned int acpi)
126 {
127 char *va_map;
128 struct hvm_info_table *va_hvm;
130 va_map = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
131 PROT_READ | PROT_WRITE,
132 pfn_list[HVM_INFO_PFN]);
134 if ( va_map == NULL )
135 return -1;
137 va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
138 memset(va_hvm, 0, sizeof(*va_hvm));
140 strncpy(va_hvm->signature, "HVM INFO", 8);
141 va_hvm->length = sizeof(struct hvm_info_table);
142 va_hvm->acpi_enabled = acpi;
143 va_hvm->nr_vcpus = vcpus;
145 set_hvm_info_checksum(va_hvm);
147 munmap(va_map, PAGE_SIZE);
149 return 0;
150 }
152 static int setup_guest(int xc_handle,
153 uint32_t dom, int memsize,
154 char *image, unsigned long image_size,
155 vcpu_guest_context_t *ctxt,
156 unsigned long shared_info_frame,
157 unsigned int vcpus,
158 unsigned int pae,
159 unsigned int acpi,
160 unsigned int store_evtchn,
161 unsigned long *store_mfn)
162 {
163 xen_pfn_t *page_array = NULL;
164 unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
165 unsigned long shared_page_nr;
166 shared_info_t *shared_info;
167 void *e820_page;
168 struct domain_setup_info dsi;
169 uint64_t v_end;
170 int rc;
172 memset(&dsi, 0, sizeof(struct domain_setup_info));
174 if ( (parseelfimage(image, image_size, &dsi)) != 0 )
175 goto error_out;
177 if ( (dsi.v_kernstart & (PAGE_SIZE - 1)) != 0 )
178 {
179 PERROR("Guest OS must load to a page boundary.\n");
180 goto error_out;
181 }
183 v_end = (unsigned long long)memsize << 20;
185 IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
186 " Loaded HVM loader: %016"PRIx64"->%016"PRIx64"\n"
187 " TOTAL: %016"PRIx64"->%016"PRIx64"\n",
188 dsi.v_kernstart, dsi.v_kernend,
189 dsi.v_start, v_end);
190 IPRINTF(" ENTRY ADDRESS: %016"PRIx64"\n", dsi.v_kernentry);
192 if ( (v_end - dsi.v_start) > ((unsigned long long)nr_pages << PAGE_SHIFT) )
193 {
194 PERROR("Initial guest OS requires too much space: "
195 "(%lluMB is greater than %lluMB limit)\n",
196 (unsigned long long)(v_end - dsi.v_start) >> 20,
197 ((unsigned long long)nr_pages << PAGE_SHIFT) >> 20);
198 goto error_out;
199 }
201 if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
202 {
203 PERROR("Could not allocate memory.\n");
204 goto error_out;
205 }
207 for ( i = 0; i < nr_pages; i++ )
208 page_array[i] = i;
209 for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
210 page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
212 /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
213 rc = xc_domain_memory_populate_physmap(
214 xc_handle, dom, (nr_pages > 0xa0) ? 0xa0 : nr_pages,
215 0, 0, &page_array[0x00]);
216 if ( (rc == 0) && (nr_pages > 0xc0) )
217 rc = xc_domain_memory_populate_physmap(
218 xc_handle, dom, nr_pages - 0xc0, 0, 0, &page_array[0xc0]);
219 if ( rc != 0 )
220 {
221 PERROR("Could not allocate memory for HVM guest.\n");
222 goto error_out;
223 }
225 if ( xc_domain_translate_gpfn_list(xc_handle, dom, nr_pages,
226 page_array, page_array) )
227 {
228 PERROR("Could not translate addresses of HVM guest.\n");
229 goto error_out;
230 }
232 loadelfimage(image, xc_handle, dom, page_array, &dsi);
234 if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi) )
235 {
236 ERROR("Couldn't set hvm info for HVM guest.\n");
237 goto error_out;
238 }
240 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
242 if ( (e820_page = xc_map_foreign_range(
243 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
244 page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == NULL )
245 goto error_out;
246 memset(e820_page, 0, PAGE_SIZE);
247 build_e820map(e820_page, v_end);
248 munmap(e820_page, PAGE_SIZE);
250 /* shared_info page starts its life empty. */
251 if ( (shared_info = xc_map_foreign_range(
252 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
253 shared_info_frame)) == NULL )
254 goto error_out;
255 memset(shared_info, 0, PAGE_SIZE);
256 /* Mask all upcalls... */
257 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
258 shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
259 memset(&shared_info->evtchn_mask[0], 0xff,
260 sizeof(shared_info->evtchn_mask));
261 munmap(shared_info, PAGE_SIZE);
263 if ( v_end > HVM_BELOW_4G_RAM_END )
264 shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
265 else
266 shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
268 /* Paranoia: clean pages. */
269 if ( xc_clear_domain_page(xc_handle, dom, page_array[shared_page_nr]) ||
270 xc_clear_domain_page(xc_handle, dom, page_array[shared_page_nr-1]) ||
271 xc_clear_domain_page(xc_handle, dom, page_array[shared_page_nr-2]) )
272 goto error_out;
274 *store_mfn = page_array[shared_page_nr - 1];
275 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
276 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
277 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
278 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
280 free(page_array);
282 ctxt->user_regs.eip = dsi.v_kernentry;
284 return 0;
286 error_out:
287 free(page_array);
288 return -1;
289 }
291 static int xc_hvm_build_internal(int xc_handle,
292 uint32_t domid,
293 int memsize,
294 char *image,
295 unsigned long image_size,
296 unsigned int vcpus,
297 unsigned int pae,
298 unsigned int acpi,
299 unsigned int store_evtchn,
300 unsigned long *store_mfn)
301 {
302 struct xen_domctl launch_domctl, domctl;
303 vcpu_guest_context_t ctxt;
304 int rc;
306 if ( (image == NULL) || (image_size == 0) )
307 {
308 ERROR("Image required");
309 goto error_out;
310 }
312 domctl.cmd = XEN_DOMCTL_getdomaininfo;
313 domctl.domain = (domid_t)domid;
314 if ( (xc_domctl(xc_handle, &domctl) < 0) ||
315 ((uint16_t)domctl.domain != domid) )
316 {
317 PERROR("Could not get info on domain");
318 goto error_out;
319 }
321 memset(&ctxt, 0, sizeof(ctxt));
323 if ( setup_guest(xc_handle, domid, memsize, image, image_size,
324 &ctxt, domctl.u.getdomaininfo.shared_info_frame,
325 vcpus, pae, acpi, store_evtchn, store_mfn) < 0)
326 {
327 ERROR("Error constructing guest OS");
328 goto error_out;
329 }
331 if ( lock_pages(&ctxt, sizeof(ctxt) ) )
332 {
333 PERROR("%s: ctxt mlock failed", __func__);
334 goto error_out;
335 }
337 memset(&launch_domctl, 0, sizeof(launch_domctl));
338 launch_domctl.domain = (domid_t)domid;
339 launch_domctl.u.vcpucontext.vcpu = 0;
340 set_xen_guest_handle(launch_domctl.u.vcpucontext.ctxt, &ctxt);
341 launch_domctl.cmd = XEN_DOMCTL_setvcpucontext;
342 rc = xc_domctl(xc_handle, &launch_domctl);
344 unlock_pages(&ctxt, sizeof(ctxt));
346 return rc;
348 error_out:
349 return -1;
350 }
352 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
353 {
354 return ((phdr->p_type == PT_LOAD) &&
355 ((phdr->p_flags & (PF_W|PF_X)) != 0));
356 }
358 static int parseelfimage(char *elfbase,
359 unsigned long elfsize,
360 struct domain_setup_info *dsi)
361 {
362 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
363 Elf32_Phdr *phdr;
364 Elf32_Shdr *shdr;
365 unsigned long kernstart = ~0UL, kernend=0UL;
366 char *shstrtab;
367 int h;
369 if ( !IS_ELF(*ehdr) )
370 {
371 ERROR("Kernel image does not have an ELF header.");
372 return -EINVAL;
373 }
375 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
376 {
377 ERROR("ELF program headers extend beyond end of image.");
378 return -EINVAL;
379 }
381 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
382 {
383 ERROR("ELF section headers extend beyond end of image.");
384 return -EINVAL;
385 }
387 /* Find the section-header strings table. */
388 if ( ehdr->e_shstrndx == SHN_UNDEF )
389 {
390 ERROR("ELF image has no section-header strings table (shstrtab).");
391 return -EINVAL;
392 }
393 shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
394 (ehdr->e_shstrndx*ehdr->e_shentsize));
395 shstrtab = elfbase + shdr->sh_offset;
397 for ( h = 0; h < ehdr->e_phnum; h++ )
398 {
399 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
400 if ( !is_loadable_phdr(phdr) )
401 continue;
402 if ( phdr->p_paddr < kernstart )
403 kernstart = phdr->p_paddr;
404 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
405 kernend = phdr->p_paddr + phdr->p_memsz;
406 }
408 if ( (kernstart > kernend) ||
409 (ehdr->e_entry < kernstart) ||
410 (ehdr->e_entry > kernend) )
411 {
412 ERROR("Malformed ELF image.");
413 return -EINVAL;
414 }
416 dsi->v_start = 0x00000000;
418 dsi->v_kernstart = kernstart;
419 dsi->v_kernend = kernend;
420 dsi->v_kernentry = HVM_LOADER_ENTR_ADDR;
422 dsi->v_end = dsi->v_kernend;
424 return 0;
425 }
427 static int
428 loadelfimage(
429 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
430 struct domain_setup_info *dsi)
431 {
432 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
433 Elf32_Phdr *phdr;
434 int h;
436 char *va;
437 unsigned long pa, done, chunksz;
439 for ( h = 0; h < ehdr->e_phnum; h++ )
440 {
441 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
442 if ( !is_loadable_phdr(phdr) )
443 continue;
445 for ( done = 0; done < phdr->p_filesz; done += chunksz )
446 {
447 pa = (phdr->p_paddr + done) - dsi->v_start;
448 if ((va = xc_map_foreign_range(
449 xch, dom, PAGE_SIZE, PROT_WRITE,
450 parray[pa >> PAGE_SHIFT])) == 0)
451 return -1;
452 chunksz = phdr->p_filesz - done;
453 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
454 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
455 memcpy(va + (pa & (PAGE_SIZE-1)),
456 elfbase + phdr->p_offset + done, chunksz);
457 munmap(va, PAGE_SIZE);
458 }
460 for ( ; done < phdr->p_memsz; done += chunksz )
461 {
462 pa = (phdr->p_paddr + done) - dsi->v_start;
463 if ((va = xc_map_foreign_range(
464 xch, dom, PAGE_SIZE, PROT_WRITE,
465 parray[pa >> PAGE_SHIFT])) == 0)
466 return -1;
467 chunksz = phdr->p_memsz - done;
468 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
469 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
470 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
471 munmap(va, PAGE_SIZE);
472 }
473 }
475 return 0;
476 }
478 /* xc_hvm_build
479 *
480 * Create a domain for a virtualized Linux, using files/filenames
481 *
482 */
484 int xc_hvm_build(int xc_handle,
485 uint32_t domid,
486 int memsize,
487 const char *image_name,
488 unsigned int vcpus,
489 unsigned int pae,
490 unsigned int acpi,
491 unsigned int store_evtchn,
492 unsigned long *store_mfn)
493 {
494 char *image;
495 int sts;
496 unsigned long image_size;
498 if ( (image_name == NULL) ||
499 ((image = xc_read_image(image_name, &image_size)) == NULL) )
500 return -1;
502 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
503 image, image_size,
504 vcpus, pae, acpi,
505 store_evtchn, store_mfn);
507 free(image);
509 return sts;
510 }
512 /* xc_hvm_build_mem
513 *
514 * Create a domain for a virtualized Linux, using buffers
515 *
516 */
518 int xc_hvm_build_mem(int xc_handle,
519 uint32_t domid,
520 int memsize,
521 const char *image_buffer,
522 unsigned long image_size,
523 unsigned int vcpus,
524 unsigned int pae,
525 unsigned int acpi,
526 unsigned int store_evtchn,
527 unsigned long *store_mfn)
528 {
529 int sts;
530 unsigned long img_len;
531 char *img;
533 /* Validate that there is a kernel buffer */
535 if ( (image_buffer == NULL) || (image_size == 0) )
536 {
537 ERROR("kernel image buffer not present");
538 return -1;
539 }
541 img = xc_inflate_buffer(image_buffer, image_size, &img_len);
542 if (img == NULL)
543 {
544 ERROR("unable to inflate ram disk buffer");
545 return -1;
546 }
548 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
549 img, img_len,
550 vcpus, pae, acpi,
551 store_evtchn, store_mfn);
553 /* xc_inflate_buffer may return the original buffer pointer (for
554 for already inflated buffers), so exercise some care in freeing */
556 if ( (img != NULL) && (img != image_buffer) )
557 free(img);
559 return sts;
560 }
562 /*
563 * Local variables:
564 * mode: C
565 * c-set-style: "BSD"
566 * c-basic-offset: 4
567 * tab-width: 4
568 * indent-tabs-mode: nil
569 * End:
570 */