ia64/xen-unstable

view tools/libxc/xc_hvm_build.c @ 8974:0349fb4de335

Clean up some vmx code.

Signed-off-by: Xin Li <xin.b.li@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Feb 23 11:34:11 2006 +0100 (2006-02-23)
parents b5bb9920bf48
children 899532500ada
line source
1 /******************************************************************************
2 * xc_hvm_build.c
3 */
5 #include <stddef.h>
6 #include "xg_private.h"
7 #define ELFSIZE 32
8 #include "xc_elf.h"
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <zlib.h>
12 #include <xen/hvm/hvm_info_table.h>
13 #include <xen/hvm/ioreq.h>
15 #define HVM_LOADER_ENTR_ADDR 0x00100000
17 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
18 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
19 #ifdef __x86_64__
20 #define L3_PROT (_PAGE_PRESENT)
21 #endif
23 #define E820MAX 128
25 #define E820_RAM 1
26 #define E820_RESERVED 2
27 #define E820_ACPI 3
28 #define E820_NVS 4
29 #define E820_IO 16
30 #define E820_SHARED_PAGE 17
31 #define E820_XENSTORE 18
33 #define E820_MAP_PAGE 0x00090000
34 #define E820_MAP_NR_OFFSET 0x000001E8
35 #define E820_MAP_OFFSET 0x000002D0
37 struct e820entry {
38 uint64_t addr;
39 uint64_t size;
40 uint32_t type;
41 } __attribute__((packed));
43 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
44 #define round_pgdown(_p) ((_p)&PAGE_MASK)
46 static int
47 parseelfimage(
48 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
49 static int
50 loadelfimage(
51 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
52 struct domain_setup_info *dsi);
54 static unsigned char build_e820map(void *e820_page, unsigned long mem_size)
55 {
56 struct e820entry *e820entry =
57 (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
58 unsigned char nr_map = 0;
60 /* XXX: Doesn't work for > 4GB yet */
61 e820entry[nr_map].addr = 0x0;
62 e820entry[nr_map].size = 0x9F800;
63 e820entry[nr_map].type = E820_RAM;
64 nr_map++;
66 e820entry[nr_map].addr = 0x9F800;
67 e820entry[nr_map].size = 0x800;
68 e820entry[nr_map].type = E820_RESERVED;
69 nr_map++;
71 e820entry[nr_map].addr = 0xA0000;
72 e820entry[nr_map].size = 0x20000;
73 e820entry[nr_map].type = E820_IO;
74 nr_map++;
76 e820entry[nr_map].addr = 0xF0000;
77 e820entry[nr_map].size = 0x10000;
78 e820entry[nr_map].type = E820_RESERVED;
79 nr_map++;
81 #define STATIC_PAGES 2 /* for ioreq_t and store_mfn */
82 /* Most of the ram goes here */
83 e820entry[nr_map].addr = 0x100000;
84 e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES*PAGE_SIZE;
85 e820entry[nr_map].type = E820_RAM;
86 nr_map++;
88 /* Statically allocated special pages */
90 /* Shared ioreq_t page */
91 e820entry[nr_map].addr = mem_size - PAGE_SIZE;
92 e820entry[nr_map].size = PAGE_SIZE;
93 e820entry[nr_map].type = E820_SHARED_PAGE;
94 nr_map++;
96 /* For xenstore */
97 e820entry[nr_map].addr = mem_size - 2*PAGE_SIZE;
98 e820entry[nr_map].size = PAGE_SIZE;
99 e820entry[nr_map].type = E820_XENSTORE;
100 nr_map++;
102 e820entry[nr_map].addr = mem_size;
103 e820entry[nr_map].size = 0x3 * PAGE_SIZE;
104 e820entry[nr_map].type = E820_NVS;
105 nr_map++;
107 e820entry[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
108 e820entry[nr_map].size = 0xA * PAGE_SIZE;
109 e820entry[nr_map].type = E820_ACPI;
110 nr_map++;
112 e820entry[nr_map].addr = 0xFEC00000;
113 e820entry[nr_map].size = 0x1400000;
114 e820entry[nr_map].type = E820_IO;
115 nr_map++;
117 return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map);
118 }
120 static void
121 set_hvm_info_checksum(struct hvm_info_table *t)
122 {
123 uint8_t *ptr = (uint8_t *)t, sum = 0;
124 unsigned int i;
126 t->checksum = 0;
128 for (i = 0; i < t->length; i++)
129 sum += *ptr++;
131 t->checksum = -sum;
132 }
134 /*
135 * Use E820 reserved memory 0x9F800 to pass HVM info to vmxloader
136 * hvmloader will use this info to set BIOS accordingly
137 */
138 static int set_hvm_info(int xc_handle, uint32_t dom,
139 unsigned long *pfn_list, unsigned int vcpus,
140 unsigned int pae, unsigned int acpi, unsigned int apic)
141 {
142 char *va_map;
143 struct hvm_info_table *va_hvm;
146 va_map = xc_map_foreign_range(
147 xc_handle,
148 dom,
149 PAGE_SIZE,
150 PROT_READ|PROT_WRITE,
151 pfn_list[HVM_INFO_PFN]);
153 if ( va_map == NULL )
154 return -1;
156 va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
157 memset(va_hvm, 0, sizeof(*va_hvm));
158 strncpy(va_hvm->signature, "HVM INFO", 8);
159 va_hvm->length = sizeof(struct hvm_info_table);
160 va_hvm->acpi_enabled = acpi;
161 va_hvm->apic_enabled = apic;
162 va_hvm->pae_enabled = pae;
163 va_hvm->nr_vcpus = vcpus;
165 set_hvm_info_checksum(va_hvm);
167 munmap(va_map, PAGE_SIZE);
169 return 0;
170 }
172 static int setup_guest(int xc_handle,
173 uint32_t dom, int memsize,
174 char *image, unsigned long image_size,
175 unsigned long nr_pages,
176 vcpu_guest_context_t *ctxt,
177 unsigned long shared_info_frame,
178 unsigned int vcpus,
179 unsigned int pae,
180 unsigned int acpi,
181 unsigned int apic,
182 unsigned int store_evtchn,
183 unsigned long *store_mfn)
184 {
185 unsigned long *page_array = NULL;
187 unsigned long count, i;
188 shared_info_t *shared_info;
189 void *e820_page;
190 unsigned char e820_map_nr;
191 xc_mmu_t *mmu = NULL;
192 int rc;
194 struct domain_setup_info dsi;
195 unsigned long v_end;
197 unsigned long shared_page_frame = 0;
198 shared_iopage_t *sp;
200 memset(&dsi, 0, sizeof(struct domain_setup_info));
202 if ( (rc = parseelfimage(image, image_size, &dsi)) != 0 )
203 goto error_out;
205 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
206 {
207 PERROR("Guest OS must load to a page boundary.\n");
208 goto error_out;
209 }
211 /* memsize is in megabytes */
212 v_end = (unsigned long)memsize << 20;
214 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
215 " Loaded HVM loader: %08lx->%08lx\n"
216 " TOTAL: %08lx->%08lx\n",
217 dsi.v_kernstart, dsi.v_kernend,
218 dsi.v_start, v_end);
219 printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
221 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
222 {
223 ERROR("Initial guest OS requires too much space\n"
224 "(%luMB is greater than %luMB limit)\n",
225 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
226 goto error_out;
227 }
229 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
230 {
231 PERROR("Could not allocate memory");
232 goto error_out;
233 }
235 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
236 {
237 PERROR("Could not get the page frame list");
238 goto error_out;
239 }
241 loadelfimage(image, xc_handle, dom, page_array, &dsi);
243 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
244 goto error_out;
246 /* Write the machine->phys table entries. */
247 for ( count = 0; count < nr_pages; count++ )
248 {
249 if ( xc_add_mmu_update(xc_handle, mmu,
250 (page_array[count] << PAGE_SHIFT) |
251 MMU_MACHPHYS_UPDATE, count) )
252 goto error_out;
253 }
255 if ( set_hvm_info(xc_handle, dom, page_array, vcpus, pae, acpi, apic) ) {
256 fprintf(stderr, "Couldn't set hvm info for HVM guest.\n");
257 goto error_out;
258 }
260 if ( (e820_page = xc_map_foreign_range(
261 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
262 page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
263 goto error_out;
264 memset(e820_page, 0, PAGE_SIZE);
265 e820_map_nr = build_e820map(e820_page, v_end);
266 munmap(e820_page, PAGE_SIZE);
268 /* shared_info page starts its life empty. */
269 if ( (shared_info = xc_map_foreign_range(
270 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
271 shared_info_frame)) == 0 )
272 goto error_out;
273 memset(shared_info, 0, sizeof(shared_info_t));
274 /* Mask all upcalls... */
275 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
276 shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
277 munmap(shared_info, PAGE_SIZE);
279 /* Populate the event channel port in the shared page */
280 shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
281 if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
282 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
283 shared_page_frame)) == 0 )
284 goto error_out;
285 memset(sp, 0, PAGE_SIZE);
287 /* FIXME: how about if we overflow the page here? */
288 for ( i = 0; i < vcpus; i++ ) {
289 unsigned int vp_eport;
291 vp_eport = xc_evtchn_alloc_unbound(xc_handle, dom, 0);
292 if ( vp_eport < 0 ) {
293 fprintf(stderr, "Couldn't get unbound port from VMX guest.\n");
294 goto error_out;
295 }
296 sp->vcpu_iodata[i].vp_eport = vp_eport;
297 }
299 munmap(sp, PAGE_SIZE);
301 *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
302 if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
303 goto error_out;
305 /* Send the page update requests down to the hypervisor. */
306 if ( xc_finish_mmu_updates(xc_handle, mmu) )
307 goto error_out;
309 free(mmu);
310 free(page_array);
312 /*
313 * Initial register values:
314 */
315 ctxt->user_regs.ds = 0;
316 ctxt->user_regs.es = 0;
317 ctxt->user_regs.fs = 0;
318 ctxt->user_regs.gs = 0;
319 ctxt->user_regs.ss = 0;
320 ctxt->user_regs.cs = 0;
321 ctxt->user_regs.eip = dsi.v_kernentry;
322 ctxt->user_regs.edx = 0;
323 ctxt->user_regs.eax = 0;
324 ctxt->user_regs.esp = 0;
325 ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */
326 ctxt->user_regs.ecx = 0;
327 ctxt->user_regs.esi = 0;
328 ctxt->user_regs.edi = 0;
329 ctxt->user_regs.ebp = 0;
331 ctxt->user_regs.eflags = 0;
333 return 0;
335 error_out:
336 free(mmu);
337 free(page_array);
338 return -1;
339 }
341 int xc_hvm_build(int xc_handle,
342 uint32_t domid,
343 int memsize,
344 const char *image_name,
345 unsigned int vcpus,
346 unsigned int pae,
347 unsigned int acpi,
348 unsigned int apic,
349 unsigned int store_evtchn,
350 unsigned long *store_mfn)
351 {
352 dom0_op_t launch_op, op;
353 int rc, i;
354 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
355 unsigned long nr_pages;
356 char *image = NULL;
357 unsigned long image_size;
358 xen_capabilities_info_t xen_caps;
360 if ( (rc = xc_version(xc_handle, XENVER_capabilities, &xen_caps)) != 0 )
361 {
362 PERROR("Failed to get xen version info");
363 goto error_out;
364 }
366 if ( !strstr(xen_caps, "hvm") )
367 {
368 PERROR("CPU doesn't support HVM extensions or "
369 "the extensions are not enabled");
370 goto error_out;
371 }
373 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
374 {
375 PERROR("Could not find total pages for domain");
376 goto error_out;
377 }
379 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
380 goto error_out;
382 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
383 {
384 PERROR("%s: ctxt mlock failed", __func__);
385 return 1;
386 }
388 op.cmd = DOM0_GETDOMAININFO;
389 op.u.getdomaininfo.domain = (domid_t)domid;
390 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
391 ((uint16_t)op.u.getdomaininfo.domain != domid) )
392 {
393 PERROR("Could not get info on domain");
394 goto error_out;
395 }
397 memset(ctxt, 0, sizeof(*ctxt));
399 ctxt->flags = VGCF_HVM_GUEST;
400 if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
401 ctxt, op.u.getdomaininfo.shared_info_frame,
402 vcpus, pae, acpi, apic, store_evtchn, store_mfn) < 0)
403 {
404 ERROR("Error constructing guest OS");
405 goto error_out;
406 }
408 free(image);
410 /* FPU is set up to default initial state. */
411 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
413 /* Virtual IDT is empty at start-of-day. */
414 for ( i = 0; i < 256; i++ )
415 {
416 ctxt->trap_ctxt[i].vector = i;
417 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
418 }
420 /* No LDT. */
421 ctxt->ldt_ents = 0;
423 /* Use the default Xen-provided GDT. */
424 ctxt->gdt_ents = 0;
426 /* No debugging. */
427 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
429 /* No callback handlers. */
430 #if defined(__i386__)
431 ctxt->event_callback_cs = FLAT_KERNEL_CS;
432 ctxt->event_callback_eip = 0;
433 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
434 ctxt->failsafe_callback_eip = 0;
435 #elif defined(__x86_64__)
436 ctxt->event_callback_eip = 0;
437 ctxt->failsafe_callback_eip = 0;
438 ctxt->syscall_callback_eip = 0;
439 #endif
441 memset( &launch_op, 0, sizeof(launch_op) );
443 launch_op.u.setvcpucontext.domain = (domid_t)domid;
444 launch_op.u.setvcpucontext.vcpu = 0;
445 launch_op.u.setvcpucontext.ctxt = ctxt;
447 launch_op.cmd = DOM0_SETVCPUCONTEXT;
448 rc = xc_dom0_op(xc_handle, &launch_op);
450 return rc;
452 error_out:
453 free(image);
454 return -1;
455 }
457 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
458 {
459 return ((phdr->p_type == PT_LOAD) &&
460 ((phdr->p_flags & (PF_W|PF_X)) != 0));
461 }
463 static int parseelfimage(char *elfbase,
464 unsigned long elfsize,
465 struct domain_setup_info *dsi)
466 {
467 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
468 Elf32_Phdr *phdr;
469 Elf32_Shdr *shdr;
470 unsigned long kernstart = ~0UL, kernend=0UL;
471 char *shstrtab;
472 int h;
474 if ( !IS_ELF(*ehdr) )
475 {
476 ERROR("Kernel image does not have an ELF header.");
477 return -EINVAL;
478 }
480 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
481 {
482 ERROR("ELF program headers extend beyond end of image.");
483 return -EINVAL;
484 }
486 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
487 {
488 ERROR("ELF section headers extend beyond end of image.");
489 return -EINVAL;
490 }
492 /* Find the section-header strings table. */
493 if ( ehdr->e_shstrndx == SHN_UNDEF )
494 {
495 ERROR("ELF image has no section-header strings table (shstrtab).");
496 return -EINVAL;
497 }
498 shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
499 (ehdr->e_shstrndx*ehdr->e_shentsize));
500 shstrtab = elfbase + shdr->sh_offset;
502 for ( h = 0; h < ehdr->e_phnum; h++ )
503 {
504 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
505 if ( !is_loadable_phdr(phdr) )
506 continue;
507 if ( phdr->p_paddr < kernstart )
508 kernstart = phdr->p_paddr;
509 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
510 kernend = phdr->p_paddr + phdr->p_memsz;
511 }
513 if ( (kernstart > kernend) ||
514 (ehdr->e_entry < kernstart) ||
515 (ehdr->e_entry > kernend) )
516 {
517 ERROR("Malformed ELF image.");
518 return -EINVAL;
519 }
521 dsi->v_start = 0x00000000;
523 dsi->v_kernstart = kernstart;
524 dsi->v_kernend = kernend;
525 dsi->v_kernentry = HVM_LOADER_ENTR_ADDR;
527 dsi->v_end = dsi->v_kernend;
529 return 0;
530 }
532 static int
533 loadelfimage(
534 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
535 struct domain_setup_info *dsi)
536 {
537 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
538 Elf32_Phdr *phdr;
539 int h;
541 char *va;
542 unsigned long pa, done, chunksz;
544 for ( h = 0; h < ehdr->e_phnum; h++ )
545 {
546 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
547 if ( !is_loadable_phdr(phdr) )
548 continue;
550 for ( done = 0; done < phdr->p_filesz; done += chunksz )
551 {
552 pa = (phdr->p_paddr + done) - dsi->v_start;
553 if ((va = xc_map_foreign_range(
554 xch, dom, PAGE_SIZE, PROT_WRITE,
555 parray[pa >> PAGE_SHIFT])) == 0)
556 return -1;
557 chunksz = phdr->p_filesz - done;
558 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
559 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
560 memcpy(va + (pa & (PAGE_SIZE-1)),
561 elfbase + phdr->p_offset + done, chunksz);
562 munmap(va, PAGE_SIZE);
563 }
565 for ( ; done < phdr->p_memsz; done += chunksz )
566 {
567 pa = (phdr->p_paddr + done) - dsi->v_start;
568 if ((va = xc_map_foreign_range(
569 xch, dom, PAGE_SIZE, PROT_WRITE,
570 parray[pa >> PAGE_SHIFT])) == 0)
571 return -1;
572 chunksz = phdr->p_memsz - done;
573 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
574 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
575 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
576 munmap(va, PAGE_SIZE);
577 }
578 }
580 return 0;
581 }
583 /*
584 * Local variables:
585 * mode: C
586 * c-set-style: "BSD"
587 * c-basic-offset: 4
588 * tab-width: 4
589 * indent-tabs-mode: nil
590 * End:
591 */