ia64/xen-unstable

view tools/libxc/xc_vmx_build.c @ 4718:c36d6a98229a

bitkeeper revision 1.1389.1.15 (4273436eypuqO7C1_MKQjhBV2kylZw)

[PATCH] vmx-mach-to-phys.patch

VMX domains need to setup their machine to physical maps.

Signed-off-by: Arun Sharma <arun.sharma@intel.com>
author arun.sharma@intel.com[kaf24]
date Sat Apr 30 08:35:58 2005 +0000 (2005-04-30)
parents cd690b71434a
children 79b7835ac75d a68686aaebc3 9f5d923b1fb7 d23d2657744d
line source
1 /******************************************************************************
2 * xc_vmx_build.c
3 */
5 #include "xc_private.h"
6 #define ELFSIZE 32
7 #include "xc_elf.h"
8 #include <stdlib.h>
9 #include <zlib.h>
10 #include "linux_boot_params.h"
12 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
13 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
15 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
16 #define round_pgdown(_p) ((_p)&PAGE_MASK)
18 #define LINUX_BOOT_PARAMS_ADDR 0x00090000
19 #define LINUX_KERNEL_ENTR_ADDR 0x00100000
20 #define LINUX_PAGE_OFFSET 0xC0000000
22 struct domain_setup_info
23 {
24 unsigned long v_start;
25 unsigned long v_end;
26 unsigned long v_kernstart;
27 unsigned long v_kernend;
28 unsigned long v_kernentry;
29 };
31 static int
32 parseelfimage(
33 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
34 static int
35 loadelfimage(
36 char *elfbase, int xch, u32 dom, unsigned long *parray,
37 unsigned long vstart);
39 static void build_e820map(struct mem_map *mem_mapp, unsigned long mem_size)
40 {
41 int nr_map = 0;
43 /* XXX: Doesn't work for > 4GB yet */
44 mem_mapp->map[0].addr = 0x0;
45 mem_mapp->map[0].size = 0x9F800;
46 mem_mapp->map[0].type = E820_RAM;
47 mem_mapp->map[0].caching_attr = MEMMAP_WB;
48 nr_map++;
50 mem_mapp->map[1].addr = 0x9F800;
51 mem_mapp->map[1].size = 0x800;
52 mem_mapp->map[1].type = E820_RESERVED;
53 mem_mapp->map[1].caching_attr = MEMMAP_UC;
54 nr_map++;
56 mem_mapp->map[2].addr = 0xA0000;
57 mem_mapp->map[2].size = 0x20000;
58 mem_mapp->map[2].type = E820_IO;
59 mem_mapp->map[2].caching_attr = MEMMAP_UC;
60 nr_map++;
62 mem_mapp->map[3].addr = 0xF0000;
63 mem_mapp->map[3].size = 0x10000;
64 mem_mapp->map[3].type = E820_RESERVED;
65 mem_mapp->map[3].caching_attr = MEMMAP_UC;
66 nr_map++;
68 mem_mapp->map[4].addr = 0x100000;
69 mem_mapp->map[4].size = mem_size - 0x100000 - PAGE_SIZE;
70 mem_mapp->map[4].type = E820_RAM;
71 mem_mapp->map[4].caching_attr = MEMMAP_WB;
72 nr_map++;
74 mem_mapp->map[5].addr = mem_size - PAGE_SIZE;
75 mem_mapp->map[5].size = PAGE_SIZE;
76 mem_mapp->map[5].type = E820_SHARED;
77 mem_mapp->map[5].caching_attr = MEMMAP_WB;
78 nr_map++;
80 mem_mapp->map[6].addr = mem_size;
81 mem_mapp->map[6].size = 0x3 * PAGE_SIZE;
82 mem_mapp->map[6].type = E820_NVS;
83 mem_mapp->map[6].caching_attr = MEMMAP_UC;
84 nr_map++;
86 mem_mapp->map[7].addr = mem_size + 0x3 * PAGE_SIZE;
87 mem_mapp->map[7].size = 0xA * PAGE_SIZE;
88 mem_mapp->map[7].type = E820_ACPI;
89 mem_mapp->map[7].caching_attr = MEMMAP_WB;
90 nr_map++;
92 mem_mapp->map[8].addr = 0xFEC00000;
93 mem_mapp->map[8].size = 0x1400000;
94 mem_mapp->map[8].type = E820_IO;
95 mem_mapp->map[8].caching_attr = MEMMAP_UC;
96 nr_map++;
98 mem_mapp->nr_map = nr_map;
99 }
101 static int zap_mmio_range(int xc_handle, u32 dom,
102 l2_pgentry_t *vl2tab,
103 unsigned long mmio_range_start,
104 unsigned long mmio_range_size)
105 {
106 unsigned long mmio_addr;
107 unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
108 unsigned long vl2e;
109 l1_pgentry_t *vl1tab;
111 mmio_addr = mmio_range_start & PAGE_MASK;
112 for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
113 vl2e = vl2tab[l2_table_offset(mmio_addr)];
114 vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
115 PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
116 if (vl1tab == 0) {
117 PERROR("Failed zap MMIO range");
118 return -1;
119 }
120 vl1tab[l1_table_offset(mmio_addr)] = 0;
121 munmap(vl1tab, PAGE_SIZE);
122 }
123 return 0;
124 }
126 static int zap_mmio_ranges(int xc_handle, u32 dom,
127 unsigned long l2tab,
128 struct mem_map *mem_mapp)
129 {
130 int i;
131 l2_pgentry_t *vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
132 PROT_READ|PROT_WRITE,
133 l2tab >> PAGE_SHIFT);
134 if (vl2tab == 0)
135 return -1;
136 for (i = 0; i < mem_mapp->nr_map; i++) {
137 if ((mem_mapp->map[i].type == E820_IO)
138 && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
139 if (zap_mmio_range(xc_handle, dom, vl2tab,
140 mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
141 return -1;
142 }
143 munmap(vl2tab, PAGE_SIZE);
144 return 0;
145 }
147 static int setup_guest(int xc_handle,
148 u32 dom, int memsize,
149 char *image, unsigned long image_size,
150 gzFile initrd_gfd, unsigned long initrd_len,
151 unsigned long nr_pages,
152 vcpu_guest_context_t *ctxt,
153 const char *cmdline,
154 unsigned long shared_info_frame,
155 unsigned int control_evtchn,
156 unsigned long flags,
157 struct mem_map * mem_mapp)
158 {
159 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
160 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
161 unsigned long *page_array = NULL;
162 unsigned long l2tab;
163 unsigned long l1tab;
164 unsigned long count, i;
165 shared_info_t *shared_info;
166 struct linux_boot_params * boot_paramsp;
167 __u16 * boot_gdtp;
168 mmu_t *mmu = NULL;
169 int rc;
171 unsigned long nr_pt_pages;
172 unsigned long ppt_alloc;
174 struct domain_setup_info dsi;
175 unsigned long vinitrd_start;
176 unsigned long vinitrd_end;
177 unsigned long vboot_params_start;
178 unsigned long vboot_params_end;
179 unsigned long vboot_gdt_start;
180 unsigned long vboot_gdt_end;
181 unsigned long vpt_start;
182 unsigned long vpt_end;
183 unsigned long v_end;
185 memset(&dsi, 0, sizeof(struct domain_setup_info));
187 if ( (rc = parseelfimage(image, image_size, &dsi)) != 0 )
188 goto error_out;
190 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
191 {
192 PERROR("Guest OS must load to a page boundary.\n");
193 goto error_out;
194 }
196 /*
197 * Why do we need this? The number of page-table frames depends on the
198 * size of the bootstrap address space. But the size of the address space
199 * depends on the number of page-table frames (since each one is mapped
200 * read-only). We have a pair of simultaneous equations in two unknowns,
201 * which we solve by exhaustive search.
202 */
203 vboot_params_start = LINUX_BOOT_PARAMS_ADDR;
204 vboot_params_end = vboot_params_start + PAGE_SIZE;
205 vboot_gdt_start = vboot_params_end;
206 vboot_gdt_end = vboot_gdt_start + PAGE_SIZE;
208 /* memsize is in megabytes */
209 v_end = memsize << 20;
210 vinitrd_end = v_end - PAGE_SIZE; /* leaving the top 4k untouched for IO requests page use */
211 vinitrd_start = vinitrd_end - initrd_len;
212 vinitrd_start = vinitrd_start & (~(PAGE_SIZE - 1));
214 if(initrd_len == 0)
215 vinitrd_start = vinitrd_end = 0;
217 nr_pt_pages = 1 + ((memsize + 3) >> 2);
218 vpt_start = v_end;
219 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
221 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
222 " Boot_params: %08lx->%08lx\n"
223 " boot_gdt: %08lx->%08lx\n"
224 " Loaded kernel: %08lx->%08lx\n"
225 " Init. ramdisk: %08lx->%08lx\n"
226 " Page tables: %08lx->%08lx\n"
227 " TOTAL: %08lx->%08lx\n",
228 vboot_params_start, vboot_params_end,
229 vboot_gdt_start, vboot_gdt_end,
230 dsi.v_kernstart, dsi.v_kernend,
231 vinitrd_start, vinitrd_end,
232 vpt_start, vpt_end,
233 dsi.v_start, v_end);
234 printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
235 printf(" INITRD LENGTH: %08lx\n", initrd_len);
237 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
238 {
239 printf("Initial guest OS requires too much space\n"
240 "(%luMB is greater than %luMB limit)\n",
241 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
242 goto error_out;
243 }
245 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
246 {
247 PERROR("Could not allocate memory");
248 goto error_out;
249 }
251 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
252 {
253 PERROR("Could not get the page frame list");
254 goto error_out;
255 }
257 loadelfimage(image, xc_handle, dom, page_array, dsi.v_start);
259 /* Load the initial ramdisk image. */
260 if ( initrd_len != 0 )
261 {
262 for ( i = (vinitrd_start - dsi.v_start);
263 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
264 {
265 char page[PAGE_SIZE];
266 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
267 {
268 PERROR("Error reading initrd image, could not");
269 goto error_out;
270 }
271 xc_copy_to_domain_page(xc_handle, dom,
272 page_array[i>>PAGE_SHIFT], page);
273 }
274 }
276 if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
277 goto error_out;
279 /* First allocate page for page dir. */
280 ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
281 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
282 ctxt->pt_base = l2tab;
284 /* Initialise the page tables. */
285 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
286 PROT_READ|PROT_WRITE,
287 l2tab >> PAGE_SHIFT)) == NULL )
288 goto error_out;
289 memset(vl2tab, 0, PAGE_SIZE);
290 vl2e = &vl2tab[l2_table_offset(dsi.v_start)];
291 for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
292 {
293 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
294 {
295 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
296 if ( vl1tab != NULL )
297 munmap(vl1tab, PAGE_SIZE);
298 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
299 PROT_READ|PROT_WRITE,
300 l1tab >> PAGE_SHIFT)) == NULL )
301 {
302 munmap(vl2tab, PAGE_SIZE);
303 goto error_out;
304 }
305 memset(vl1tab, 0, PAGE_SIZE);
306 vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
307 *vl2e++ = l1tab | L2_PROT;
308 }
310 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
311 vl1e++;
312 }
313 munmap(vl1tab, PAGE_SIZE);
314 munmap(vl2tab, PAGE_SIZE);
316 /* Write the machine->phys table entries. */
317 for ( count = 0; count < nr_pages; count++ )
318 {
319 if ( add_mmu_update(xc_handle, mmu,
320 (page_array[count] << PAGE_SHIFT) |
321 MMU_MACHPHYS_UPDATE, count) )
322 goto error_out;
323 }
326 if ((boot_paramsp = xc_map_foreign_range(
327 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
328 page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
329 goto error_out;
330 memset(boot_paramsp, 0, sizeof(*boot_paramsp));
332 strncpy((char *)boot_paramsp->cmd_line, cmdline, 0x800);
333 boot_paramsp->cmd_line[0x800-1] = '\0';
334 boot_paramsp->cmd_line_ptr = ((unsigned long) vboot_params_start) + offsetof(struct linux_boot_params, cmd_line);
336 boot_paramsp->setup_sects = 0;
337 boot_paramsp->mount_root_rdonly = 1;
338 boot_paramsp->swapdev = 0x0;
339 boot_paramsp->ramdisk_flags = 0x0;
340 boot_paramsp->root_dev = 0x0; /* We must tell kernel root dev by kernel command line. */
342 /* we don't have a ps/2 mouse now.
343 * 0xAA means a aux mouse is there.
344 * See detect_auxiliary_port() in pc_keyb.c.
345 */
346 boot_paramsp->aux_device_info = 0x0;
348 boot_paramsp->header_magic[0] = 0x48; /* "H" */
349 boot_paramsp->header_magic[1] = 0x64; /* "d" */
350 boot_paramsp->header_magic[2] = 0x72; /* "r" */
351 boot_paramsp->header_magic[3] = 0x53; /* "S" */
353 boot_paramsp->protocol_version = 0x0203; /* 2.03 */
354 boot_paramsp->loader_type = 0x71; /* GRUB */
355 boot_paramsp->loader_flags = 0x1; /* loaded high */
356 boot_paramsp->code32_start = LINUX_KERNEL_ENTR_ADDR; /* 1MB */
357 boot_paramsp->initrd_start = vinitrd_start;
358 boot_paramsp->initrd_size = initrd_len;
360 i = ((memsize - 1) << 10) - 4;
361 boot_paramsp->alt_mem_k = i; /* alt_mem_k */
362 boot_paramsp->screen.overlap.ext_mem_k = i & 0xFFFF; /* ext_mem_k */
364 /*
365 * Stuff SCREAN_INFO
366 */
367 boot_paramsp->screen.info.orig_x = 0;
368 boot_paramsp->screen.info.orig_y = 0;
369 boot_paramsp->screen.info.orig_video_page = 8;
370 boot_paramsp->screen.info.orig_video_mode = 3;
371 boot_paramsp->screen.info.orig_video_cols = 80;
372 boot_paramsp->screen.info.orig_video_ega_bx = 0;
373 boot_paramsp->screen.info.orig_video_lines = 25;
374 boot_paramsp->screen.info.orig_video_isVGA = 1;
375 boot_paramsp->screen.info.orig_video_points = 0x0010;
377 /* seems we may NOT stuff boot_paramsp->apm_bios_info */
378 /* seems we may NOT stuff boot_paramsp->drive_info */
379 /* seems we may NOT stuff boot_paramsp->sys_desc_table */
380 *((unsigned short *) &boot_paramsp->drive_info.dummy[0]) = 800;
381 boot_paramsp->drive_info.dummy[2] = 4;
382 boot_paramsp->drive_info.dummy[14] = 32;
384 /* memsize is in megabytes */
385 build_e820map(mem_mapp, memsize << 20);
386 if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1)
387 goto error_out;
388 boot_paramsp->e820_map_nr = mem_mapp->nr_map;
389 for (i=0; i<mem_mapp->nr_map; i++) {
390 boot_paramsp->e820_map[i].addr = mem_mapp->map[i].addr;
391 boot_paramsp->e820_map[i].size = mem_mapp->map[i].size;
392 boot_paramsp->e820_map[i].type = mem_mapp->map[i].type;
393 }
394 munmap(boot_paramsp, PAGE_SIZE);
396 if ((boot_gdtp = xc_map_foreign_range(
397 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
398 page_array[(vboot_gdt_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
399 goto error_out;
400 memset(boot_gdtp, 0, PAGE_SIZE);
401 boot_gdtp[12*4 + 0] = boot_gdtp[13*4 + 0] = 0xffff; /* limit */
402 boot_gdtp[12*4 + 1] = boot_gdtp[13*4 + 1] = 0x0000; /* base */
403 boot_gdtp[12*4 + 2] = 0x9a00; boot_gdtp[13*4 + 2] = 0x9200; /* perms */
404 boot_gdtp[12*4 + 3] = boot_gdtp[13*4 + 3] = 0x00cf; /* granu + top of limit */
405 munmap(boot_gdtp, PAGE_SIZE);
407 /* shared_info page starts its life empty. */
408 if ((shared_info = xc_map_foreign_range(
409 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
410 shared_info_frame)) == 0)
411 goto error_out;
412 memset(shared_info, 0, sizeof(shared_info_t));
413 /* Mask all upcalls... */
414 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
415 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
416 munmap(shared_info, PAGE_SIZE);
418 /*
419 * Pin down l2tab addr as page dir page - causes hypervisor to provide
420 * correct protection for the page
421 */
422 if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom) )
423 goto error_out;
425 /* Send the page update requests down to the hypervisor. */
426 if ( finish_mmu_updates(xc_handle, mmu) )
427 goto error_out;
429 free(mmu);
430 free(page_array);
432 /*
433 * Initial register values:
434 */
435 ctxt->user_regs.ds = 0x68;
436 ctxt->user_regs.es = 0x0;
437 ctxt->user_regs.fs = 0x0;
438 ctxt->user_regs.gs = 0x0;
439 ctxt->user_regs.ss = 0x68;
440 ctxt->user_regs.cs = 0x60;
441 ctxt->user_regs.eip = dsi.v_kernentry;
442 ctxt->user_regs.edx = vboot_gdt_start;
443 ctxt->user_regs.eax = 0x800;
444 ctxt->user_regs.esp = vboot_gdt_end;
445 ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */
446 ctxt->user_regs.ecx = mem_mapp->nr_map;
447 ctxt->user_regs.esi = vboot_params_start;
448 ctxt->user_regs.edi = vboot_params_start + 0x2d0;
450 ctxt->user_regs.eflags = (1<<2);
452 return 0;
454 error_out:
455 if ( mmu != NULL )
456 free(mmu);
457 if ( page_array != NULL )
458 free(page_array);
459 return -1;
460 }
463 #define VMX_FEATURE_FLAG 0x20
465 int vmx_identify(void)
466 {
467 int eax, ecx;
469 #ifdef __i386__
470 __asm__ __volatile__ ("pushl %%ebx; cpuid; popl %%ebx"
471 : "=a" (eax), "=c" (ecx)
472 : "0" (1)
473 : "dx");
474 #elif defined __x86_64__
475 __asm__ __volatile__ ("pushq %%rbx; cpuid; popq %%rbx"
476 : "=a" (eax), "=c" (ecx)
477 : "0" (1)
478 : "dx");
479 #endif
481 if (!(ecx & VMX_FEATURE_FLAG)) {
482 return -1;
483 }
484 return 0;
485 }
487 int xc_vmx_build(int xc_handle,
488 u32 domid,
489 int memsize,
490 const char *image_name,
491 struct mem_map *mem_mapp,
492 const char *ramdisk_name,
493 const char *cmdline,
494 unsigned int control_evtchn,
495 unsigned long flags)
496 {
497 dom0_op_t launch_op, op;
498 int initrd_fd = -1;
499 gzFile initrd_gfd = NULL;
500 int rc, i;
501 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
502 unsigned long nr_pages;
503 char *image = NULL;
504 unsigned long image_size, initrd_size=0;
506 if ( vmx_identify() < 0 )
507 {
508 PERROR("CPU doesn't support VMX Extensions");
509 goto error_out;
510 }
512 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
513 {
514 PERROR("Could not find total pages for domain");
515 goto error_out;
516 }
518 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
519 goto error_out;
521 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
522 {
523 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
524 {
525 PERROR("Could not open the initial ramdisk image");
526 goto error_out;
527 }
529 initrd_size = xc_get_filesz(initrd_fd);
531 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
532 {
533 PERROR("Could not allocate decompression state for initrd");
534 goto error_out;
535 }
536 }
538 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
539 {
540 PERROR("Unable to mlock ctxt");
541 return 1;
542 }
544 op.cmd = DOM0_GETDOMAININFO;
545 op.u.getdomaininfo.domain = (domid_t)domid;
546 op.u.getdomaininfo.exec_domain = 0;
547 op.u.getdomaininfo.ctxt = ctxt;
548 if ( (do_dom0_op(xc_handle, &op) < 0) ||
549 ((u16)op.u.getdomaininfo.domain != domid) )
550 {
551 PERROR("Could not get info on domain");
552 goto error_out;
553 }
554 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
555 (ctxt->pt_base != 0) )
556 {
557 ERROR("Domain is already constructed");
558 goto error_out;
559 }
561 if ( setup_guest(xc_handle, domid, memsize, image, image_size,
562 initrd_gfd, initrd_size, nr_pages,
563 ctxt, cmdline,
564 op.u.getdomaininfo.shared_info_frame,
565 control_evtchn, flags, mem_mapp) < 0 )
566 {
567 ERROR("Error constructing guest OS");
568 goto error_out;
569 }
571 if ( initrd_fd >= 0 )
572 close(initrd_fd);
573 if ( initrd_gfd )
574 gzclose(initrd_gfd);
575 if ( image != NULL )
576 free(image);
578 ctxt->flags = VGCF_VMX_GUEST;
579 /* FPU is set up to default initial state. */
580 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
582 /* Virtual IDT is empty at start-of-day. */
583 for ( i = 0; i < 256; i++ )
584 {
585 ctxt->trap_ctxt[i].vector = i;
586 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
587 }
589 #if defined(__i386__)
590 ctxt->fast_trap_idx = 0;
591 #endif
593 /* No LDT. */
594 ctxt->ldt_ents = 0;
596 /* Use the default Xen-provided GDT. */
597 ctxt->gdt_ents = 0;
599 /* Ring 1 stack is the initial stack. */
600 /*
601 ctxt->kernel_ss = FLAT_KERNEL_DS;
602 ctxt->kernel_sp = vstartinfo_start;
603 */
604 /* No debugging. */
605 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
607 /* No callback handlers. */
608 #if defined(__i386__)
609 ctxt->event_callback_cs = FLAT_KERNEL_CS;
610 ctxt->event_callback_eip = 0;
611 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
612 ctxt->failsafe_callback_eip = 0;
613 #elif defined(__x86_64__)
614 ctxt->event_callback_eip = 0;
615 ctxt->failsafe_callback_eip = 0;
616 ctxt->syscall_callback_eip = 0;
617 #endif
619 memset( &launch_op, 0, sizeof(launch_op) );
621 launch_op.u.setdomaininfo.domain = (domid_t)domid;
622 launch_op.u.setdomaininfo.exec_domain = 0;
623 launch_op.u.setdomaininfo.ctxt = ctxt;
625 launch_op.cmd = DOM0_SETDOMAININFO;
626 rc = do_dom0_op(xc_handle, &launch_op);
627 return rc;
629 error_out:
630 if ( initrd_gfd != NULL )
631 gzclose(initrd_gfd);
632 else if ( initrd_fd >= 0 )
633 close(initrd_fd);
634 if ( image != NULL )
635 free(image);
637 return -1;
638 }
640 static inline int is_loadable_phdr(Elf_Phdr *phdr)
641 {
642 return ((phdr->p_type == PT_LOAD) &&
643 ((phdr->p_flags & (PF_W|PF_X)) != 0));
644 }
646 static int parseelfimage(char *elfbase,
647 unsigned long elfsize,
648 struct domain_setup_info *dsi)
649 {
650 Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
651 Elf_Phdr *phdr;
652 Elf_Shdr *shdr;
653 unsigned long kernstart = ~0UL, kernend=0UL;
654 char *shstrtab;
655 int h;
657 if ( !IS_ELF(*ehdr) )
658 {
659 ERROR("Kernel image does not have an ELF header.");
660 return -EINVAL;
661 }
663 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
664 {
665 ERROR("ELF program headers extend beyond end of image.");
666 return -EINVAL;
667 }
669 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
670 {
671 ERROR("ELF section headers extend beyond end of image.");
672 return -EINVAL;
673 }
675 /* Find the section-header strings table. */
676 if ( ehdr->e_shstrndx == SHN_UNDEF )
677 {
678 ERROR("ELF image has no section-header strings table (shstrtab).");
679 return -EINVAL;
680 }
681 shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff +
682 (ehdr->e_shstrndx*ehdr->e_shentsize));
683 shstrtab = elfbase + shdr->sh_offset;
685 for ( h = 0; h < ehdr->e_phnum; h++ )
686 {
687 phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
688 if ( !is_loadable_phdr(phdr) )
689 continue;
690 if ( phdr->p_paddr < kernstart )
691 kernstart = phdr->p_paddr;
692 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
693 kernend = phdr->p_paddr + phdr->p_memsz;
694 }
696 if ( (kernstart > kernend) ||
697 (ehdr->e_entry < kernstart) ||
698 (ehdr->e_entry > kernend) )
699 {
700 ERROR("Malformed ELF image.");
701 return -EINVAL;
702 }
704 dsi->v_start = 0x00000000;
706 dsi->v_kernstart = kernstart - LINUX_PAGE_OFFSET;
707 dsi->v_kernend = kernend - LINUX_PAGE_OFFSET;
708 dsi->v_kernentry = LINUX_KERNEL_ENTR_ADDR;
710 dsi->v_end = dsi->v_kernend;
712 return 0;
713 }
715 static int
716 loadelfimage(
717 char *elfbase, int xch, u32 dom, unsigned long *parray,
718 unsigned long vstart)
719 {
720 Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
721 Elf_Phdr *phdr;
722 int h;
724 char *va;
725 unsigned long pa, done, chunksz;
727 for ( h = 0; h < ehdr->e_phnum; h++ )
728 {
729 phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
730 if ( !is_loadable_phdr(phdr) )
731 continue;
733 for ( done = 0; done < phdr->p_filesz; done += chunksz )
734 {
735 pa = (phdr->p_paddr + done) - vstart - LINUX_PAGE_OFFSET;
736 if ((va = xc_map_foreign_range(
737 xch, dom, PAGE_SIZE, PROT_WRITE,
738 parray[pa>>PAGE_SHIFT])) == 0)
739 return -1;
740 chunksz = phdr->p_filesz - done;
741 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
742 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
743 memcpy(va + (pa & (PAGE_SIZE-1)),
744 elfbase + phdr->p_offset + done, chunksz);
745 munmap(va, PAGE_SIZE);
746 }
748 for ( ; done < phdr->p_memsz; done += chunksz )
749 {
750 pa = (phdr->p_paddr + done) - vstart - LINUX_PAGE_OFFSET;
751 if ((va = xc_map_foreign_range(
752 xch, dom, PAGE_SIZE, PROT_WRITE,
753 parray[pa>>PAGE_SHIFT])) == 0)
754 return -1;
755 chunksz = phdr->p_memsz - done;
756 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
757 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
758 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
759 munmap(va, PAGE_SIZE);
760 }
761 }
763 return 0;
764 }