ia64/xen-unstable

view tools/libxc/xc_vmx_build.c @ 6766:219d96d545fc

merge?
author cl349@firebug.cl.cam.ac.uk
date Mon Sep 12 20:00:41 2005 +0000 (2005-09-12)
parents dd668f7527cb 888094e5ac07
children 4d899a738d59 8ca0f98ba8e2
line source
1 /******************************************************************************
2 * xc_vmx_build.c
3 */
5 #include <stddef.h>
6 #include "xg_private.h"
7 #define ELFSIZE 32
8 #include "xc_elf.h"
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <zlib.h>
12 #include <xen/io/ioreq.h>
13 #include "linux_boot_params.h"
15 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
16 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
17 #ifdef __x86_64__
18 #define L3_PROT (_PAGE_PRESENT)
19 #endif
21 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
22 #define round_pgdown(_p) ((_p)&PAGE_MASK)
24 #define LINUX_BOOT_PARAMS_ADDR 0x00090000
25 #define LINUX_KERNEL_ENTR_ADDR 0x00100000
26 #define LINUX_PAGE_OFFSET 0xC0000000
28 static int
29 parseelfimage(
30 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
31 static int
32 loadelfimage(
33 char *elfbase, int xch, u32 dom, unsigned long *parray,
34 struct domain_setup_info *dsi);
36 static void build_e820map(struct mem_map *mem_mapp, unsigned long mem_size)
37 {
38 int nr_map = 0;
40 /* XXX: Doesn't work for > 4GB yet */
41 mem_mapp->map[nr_map].addr = 0x0;
42 mem_mapp->map[nr_map].size = 0x9F800;
43 mem_mapp->map[nr_map].type = E820_RAM;
44 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
45 nr_map++;
47 mem_mapp->map[nr_map].addr = 0x9F800;
48 mem_mapp->map[nr_map].size = 0x800;
49 mem_mapp->map[nr_map].type = E820_RESERVED;
50 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
51 nr_map++;
53 mem_mapp->map[nr_map].addr = 0xA0000;
54 mem_mapp->map[nr_map].size = 0x20000;
55 mem_mapp->map[nr_map].type = E820_IO;
56 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
57 nr_map++;
59 mem_mapp->map[nr_map].addr = 0xF0000;
60 mem_mapp->map[nr_map].size = 0x10000;
61 mem_mapp->map[nr_map].type = E820_RESERVED;
62 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
63 nr_map++;
65 #define STATIC_PAGES 2 /* for ioreq_t and store_mfn */
66 /* Most of the ram goes here */
67 mem_mapp->map[nr_map].addr = 0x100000;
68 mem_mapp->map[nr_map].size = mem_size - 0x100000 - STATIC_PAGES*PAGE_SIZE;
69 mem_mapp->map[nr_map].type = E820_RAM;
70 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
71 nr_map++;
73 /* Statically allocated special pages */
75 /* Shared ioreq_t page */
76 mem_mapp->map[nr_map].addr = mem_size - PAGE_SIZE;
77 mem_mapp->map[nr_map].size = PAGE_SIZE;
78 mem_mapp->map[nr_map].type = E820_SHARED;
79 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
80 nr_map++;
82 /* For xenstore */
83 mem_mapp->map[nr_map].addr = mem_size - 2*PAGE_SIZE;
84 mem_mapp->map[nr_map].size = PAGE_SIZE;
85 mem_mapp->map[nr_map].type = E820_XENSTORE;
86 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
87 nr_map++;
89 mem_mapp->map[nr_map].addr = mem_size;
90 mem_mapp->map[nr_map].size = 0x3 * PAGE_SIZE;
91 mem_mapp->map[nr_map].type = E820_NVS;
92 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
93 nr_map++;
95 mem_mapp->map[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
96 mem_mapp->map[nr_map].size = 0xA * PAGE_SIZE;
97 mem_mapp->map[nr_map].type = E820_ACPI;
98 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
99 nr_map++;
101 mem_mapp->map[nr_map].addr = 0xFEC00000;
102 mem_mapp->map[nr_map].size = 0x1400000;
103 mem_mapp->map[nr_map].type = E820_IO;
104 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
105 nr_map++;
107 mem_mapp->nr_map = nr_map;
108 }
110 #ifdef __i386__
111 static int zap_mmio_range(int xc_handle, u32 dom,
112 l2_pgentry_32_t *vl2tab,
113 unsigned long mmio_range_start,
114 unsigned long mmio_range_size)
115 {
116 unsigned long mmio_addr;
117 unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
118 unsigned long vl2e;
119 l1_pgentry_32_t *vl1tab;
121 mmio_addr = mmio_range_start & PAGE_MASK;
122 for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
123 vl2e = vl2tab[l2_table_offset(mmio_addr)];
124 if (vl2e == 0)
125 continue;
126 vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
127 PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
128 if (vl1tab == 0) {
129 PERROR("Failed zap MMIO range");
130 return -1;
131 }
132 vl1tab[l1_table_offset(mmio_addr)] = 0;
133 munmap(vl1tab, PAGE_SIZE);
134 }
135 return 0;
136 }
138 static int zap_mmio_ranges(int xc_handle, u32 dom,
139 unsigned long l2tab,
140 struct mem_map *mem_mapp)
141 {
142 int i;
143 l2_pgentry_32_t *vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
144 PROT_READ|PROT_WRITE,
145 l2tab >> PAGE_SHIFT);
146 if (vl2tab == 0)
147 return -1;
148 for (i = 0; i < mem_mapp->nr_map; i++) {
149 if ((mem_mapp->map[i].type == E820_IO)
150 && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
151 if (zap_mmio_range(xc_handle, dom, vl2tab,
152 mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
153 return -1;
154 }
155 munmap(vl2tab, PAGE_SIZE);
156 return 0;
157 }
158 #else
159 static int zap_mmio_range(int xc_handle, u32 dom,
160 l3_pgentry_t *vl3tab,
161 unsigned long mmio_range_start,
162 unsigned long mmio_range_size)
163 {
164 unsigned long mmio_addr;
165 unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
166 unsigned long vl2e = 0;
167 unsigned long vl3e;
168 l1_pgentry_t *vl1tab;
169 l2_pgentry_t *vl2tab;
171 mmio_addr = mmio_range_start & PAGE_MASK;
172 for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
173 vl3e = vl3tab[l3_table_offset(mmio_addr)];
174 if (vl3e == 0)
175 continue;
176 vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
177 PROT_READ|PROT_WRITE, vl3e >> PAGE_SHIFT);
178 if (vl2tab == 0) {
179 PERROR("Failed zap MMIO range");
180 return -1;
181 }
182 vl2e = vl2tab[l2_table_offset(mmio_addr)];
183 if (vl2e == 0)
184 continue;
185 vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
186 PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
188 vl1tab[l1_table_offset(mmio_addr)] = 0;
189 munmap(vl2tab, PAGE_SIZE);
190 munmap(vl1tab, PAGE_SIZE);
191 }
192 return 0;
193 }
195 static int zap_mmio_ranges(int xc_handle, u32 dom,
196 unsigned long l3tab,
197 struct mem_map *mem_mapp)
198 {
199 int i;
200 l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
201 PROT_READ|PROT_WRITE,
202 l3tab >> PAGE_SHIFT);
203 if (vl3tab == 0)
204 return -1;
205 for (i = 0; i < mem_mapp->nr_map; i++) {
206 if ((mem_mapp->map[i].type == E820_IO)
207 && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
208 if (zap_mmio_range(xc_handle, dom, vl3tab,
209 mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
210 return -1;
211 }
212 munmap(vl3tab, PAGE_SIZE);
213 return 0;
214 }
216 #endif
218 static int setup_guest(int xc_handle,
219 u32 dom, int memsize,
220 char *image, unsigned long image_size,
221 gzFile initrd_gfd, unsigned long initrd_len,
222 unsigned long nr_pages,
223 vcpu_guest_context_t *ctxt,
224 const char *cmdline,
225 unsigned long shared_info_frame,
226 unsigned int control_evtchn,
227 unsigned long flags,
228 unsigned int vcpus,
229 unsigned int store_evtchn,
230 unsigned long *store_mfn,
231 struct mem_map *mem_mapp
232 )
233 {
234 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
235 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
236 unsigned long *page_array = NULL;
237 #ifdef __x86_64__
238 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
239 unsigned long l3tab;
240 #endif
241 unsigned long l2tab;
242 unsigned long l1tab;
243 unsigned long count, i;
244 shared_info_t *shared_info;
245 struct linux_boot_params * boot_paramsp;
246 __u16 * boot_gdtp;
247 xc_mmu_t *mmu = NULL;
248 int rc;
250 unsigned long nr_pt_pages;
251 unsigned long ppt_alloc;
253 struct domain_setup_info dsi;
254 unsigned long vinitrd_start;
255 unsigned long vinitrd_end;
256 unsigned long vboot_params_start;
257 unsigned long vboot_params_end;
258 unsigned long vboot_gdt_start;
259 unsigned long vboot_gdt_end;
260 unsigned long vpt_start;
261 unsigned long vpt_end;
262 unsigned long v_end;
264 unsigned long shared_page_frame = 0;
265 shared_iopage_t *sp;
267 memset(&dsi, 0, sizeof(struct domain_setup_info));
269 if ( (rc = parseelfimage(image, image_size, &dsi)) != 0 )
270 goto error_out;
272 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
273 {
274 PERROR("Guest OS must load to a page boundary.\n");
275 goto error_out;
276 }
278 /*
279 * Why do we need this? The number of page-table frames depends on the
280 * size of the bootstrap address space. But the size of the address space
281 * depends on the number of page-table frames (since each one is mapped
282 * read-only). We have a pair of simultaneous equations in two unknowns,
283 * which we solve by exhaustive search.
284 */
285 vboot_params_start = LINUX_BOOT_PARAMS_ADDR;
286 vboot_params_end = vboot_params_start + PAGE_SIZE;
287 vboot_gdt_start = vboot_params_end;
288 vboot_gdt_end = vboot_gdt_start + PAGE_SIZE;
290 /* memsize is in megabytes */
291 v_end = memsize << 20;
292 vinitrd_end = v_end - PAGE_SIZE; /* leaving the top 4k untouched for IO requests page use */
293 vinitrd_start = vinitrd_end - initrd_len;
294 vinitrd_start = vinitrd_start & (~(PAGE_SIZE - 1));
296 if(initrd_len == 0)
297 vinitrd_start = vinitrd_end = 0;
299 #ifdef __i386__
300 nr_pt_pages = 1 + ((memsize + 3) >> 2);
301 #else
302 nr_pt_pages = 5 + ((memsize + 1) >> 1);
303 #endif
304 vpt_start = v_end;
305 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
307 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
308 " Boot_params: %08lx->%08lx\n"
309 " boot_gdt: %08lx->%08lx\n"
310 " Loaded kernel: %08lx->%08lx\n"
311 " Init. ramdisk: %08lx->%08lx\n"
312 " Page tables: %08lx->%08lx\n"
313 " TOTAL: %08lx->%08lx\n",
314 vboot_params_start, vboot_params_end,
315 vboot_gdt_start, vboot_gdt_end,
316 dsi.v_kernstart, dsi.v_kernend,
317 vinitrd_start, vinitrd_end,
318 vpt_start, vpt_end,
319 dsi.v_start, v_end);
320 printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
321 printf(" INITRD LENGTH: %08lx\n", initrd_len);
323 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
324 {
325 printf("Initial guest OS requires too much space\n"
326 "(%luMB is greater than %luMB limit)\n",
327 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
328 goto error_out;
329 }
331 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
332 {
333 PERROR("Could not allocate memory");
334 goto error_out;
335 }
337 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
338 {
339 PERROR("Could not get the page frame list");
340 goto error_out;
341 }
343 loadelfimage(image, xc_handle, dom, page_array, &dsi);
345 /* Load the initial ramdisk image. */
346 if ( initrd_len != 0 )
347 {
348 for ( i = (vinitrd_start - dsi.v_start);
349 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
350 {
351 char page[PAGE_SIZE];
352 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
353 {
354 PERROR("Error reading initrd image, could not");
355 goto error_out;
356 }
357 xc_copy_to_domain_page(xc_handle, dom,
358 page_array[i>>PAGE_SHIFT], page);
359 }
360 }
362 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
363 goto error_out;
365 #ifdef __i386__
366 /* First allocate page for page dir. */
367 ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
368 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
369 ctxt->ctrlreg[3] = l2tab;
371 /* Initialise the page tables. */
372 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
373 PROT_READ|PROT_WRITE,
374 l2tab >> PAGE_SHIFT)) == NULL )
375 goto error_out;
376 memset(vl2tab, 0, PAGE_SIZE);
377 vl2e = &vl2tab[l2_table_offset(dsi.v_start)];
378 for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
379 {
380 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
381 {
382 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
383 if ( vl1tab != NULL )
384 munmap(vl1tab, PAGE_SIZE);
385 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
386 PROT_READ|PROT_WRITE,
387 l1tab >> PAGE_SHIFT)) == NULL )
388 {
389 munmap(vl2tab, PAGE_SIZE);
390 goto error_out;
391 }
392 memset(vl1tab, 0, PAGE_SIZE);
393 vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
394 *vl2e++ = l1tab | L2_PROT;
395 }
397 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
398 vl1e++;
399 }
400 munmap(vl1tab, PAGE_SIZE);
401 munmap(vl2tab, PAGE_SIZE);
402 #else
403 /* First allocate pdpt */
404 ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
405 /* here l3tab means pdpt, only 4 entry is used */
406 l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
407 ctxt->ctrlreg[3] = l3tab;
409 /* Initialise the page tables. */
410 if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
411 PROT_READ|PROT_WRITE,
412 l3tab >> PAGE_SHIFT)) == NULL )
413 goto error_out;
414 memset(vl3tab, 0, PAGE_SIZE);
416 vl3e = &vl3tab[l3_table_offset(dsi.v_start)];
418 for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
419 {
420 if (!(count % (1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))){
421 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
423 if (vl2tab != NULL)
424 munmap(vl2tab, PAGE_SIZE);
426 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
427 PROT_READ|PROT_WRITE,
428 l2tab >> PAGE_SHIFT)) == NULL )
429 goto error_out;
431 memset(vl2tab, 0, PAGE_SIZE);
432 *vl3e++ = l2tab | L3_PROT;
433 vl2e = &vl2tab[l2_table_offset(dsi.v_start + (count << PAGE_SHIFT))];
434 }
435 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
436 {
437 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
438 if ( vl1tab != NULL )
439 munmap(vl1tab, PAGE_SIZE);
440 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
441 PROT_READ|PROT_WRITE,
442 l1tab >> PAGE_SHIFT)) == NULL )
443 {
444 munmap(vl2tab, PAGE_SIZE);
445 goto error_out;
446 }
447 memset(vl1tab, 0, PAGE_SIZE);
448 vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
449 *vl2e++ = l1tab | L2_PROT;
450 }
452 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
453 vl1e++;
454 }
456 munmap(vl1tab, PAGE_SIZE);
457 munmap(vl2tab, PAGE_SIZE);
458 munmap(vl3tab, PAGE_SIZE);
459 #endif
460 /* Write the machine->phys table entries. */
461 for ( count = 0; count < nr_pages; count++ )
462 {
463 if ( xc_add_mmu_update(xc_handle, mmu,
464 (page_array[count] << PAGE_SHIFT) |
465 MMU_MACHPHYS_UPDATE, count) )
466 goto error_out;
467 }
470 if ((boot_paramsp = xc_map_foreign_range(
471 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
472 page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
473 goto error_out;
475 memset(boot_paramsp, 0, sizeof(*boot_paramsp));
477 strncpy((char *)boot_paramsp->cmd_line, cmdline, 0x800);
478 boot_paramsp->cmd_line[0x800-1] = '\0';
479 boot_paramsp->cmd_line_ptr = ((unsigned long) vboot_params_start) + offsetof(struct linux_boot_params, cmd_line);
481 boot_paramsp->setup_sects = 0;
482 boot_paramsp->mount_root_rdonly = 1;
483 boot_paramsp->swapdev = 0x0;
484 boot_paramsp->ramdisk_flags = 0x0;
485 boot_paramsp->root_dev = 0x0; /* We must tell kernel root dev by kernel command line. */
487 /* we don't have a ps/2 mouse now.
488 * 0xAA means a aux mouse is there.
489 * See detect_auxiliary_port() in pc_keyb.c.
490 */
491 boot_paramsp->aux_device_info = 0x0;
493 boot_paramsp->header_magic[0] = 0x48; /* "H" */
494 boot_paramsp->header_magic[1] = 0x64; /* "d" */
495 boot_paramsp->header_magic[2] = 0x72; /* "r" */
496 boot_paramsp->header_magic[3] = 0x53; /* "S" */
498 boot_paramsp->protocol_version = 0x0203; /* 2.03 */
499 boot_paramsp->loader_type = 0x71; /* GRUB */
500 boot_paramsp->loader_flags = 0x1; /* loaded high */
501 boot_paramsp->code32_start = LINUX_KERNEL_ENTR_ADDR; /* 1MB */
502 boot_paramsp->initrd_start = vinitrd_start;
503 boot_paramsp->initrd_size = initrd_len;
505 i = ((memsize - 1) << 10) - 4;
506 boot_paramsp->alt_mem_k = i; /* alt_mem_k */
507 boot_paramsp->screen.overlap.ext_mem_k = i & 0xFFFF; /* ext_mem_k */
509 /*
510 * Stuff SCREAN_INFO
511 */
512 boot_paramsp->screen.info.orig_x = 0;
513 boot_paramsp->screen.info.orig_y = 0;
514 boot_paramsp->screen.info.orig_video_page = 8;
515 boot_paramsp->screen.info.orig_video_mode = 3;
516 boot_paramsp->screen.info.orig_video_cols = 80;
517 boot_paramsp->screen.info.orig_video_ega_bx = 0;
518 boot_paramsp->screen.info.orig_video_lines = 25;
519 boot_paramsp->screen.info.orig_video_isVGA = 1;
520 boot_paramsp->screen.info.orig_video_points = 0x0010;
522 /* seems we may NOT stuff boot_paramsp->apm_bios_info */
523 /* seems we may NOT stuff boot_paramsp->drive_info */
524 /* seems we may NOT stuff boot_paramsp->sys_desc_table */
525 *((unsigned short *) &boot_paramsp->drive_info.dummy[0]) = 800;
526 boot_paramsp->drive_info.dummy[2] = 4;
527 boot_paramsp->drive_info.dummy[14] = 32;
529 /* memsize is in megabytes */
530 /* If you need to create a special e820map, comment this line
531 and use mem-map.sxp */
532 build_e820map(mem_mapp, memsize << 20);
533 *store_mfn = page_array[(v_end-2) >> PAGE_SHIFT];
534 #if defined (__i386__)
535 if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1)
536 #else
537 if (zap_mmio_ranges(xc_handle, dom, l3tab, mem_mapp) == -1)
538 #endif
539 goto error_out;
540 boot_paramsp->e820_map_nr = mem_mapp->nr_map;
541 for (i=0; i<mem_mapp->nr_map; i++) {
542 boot_paramsp->e820_map[i].addr = mem_mapp->map[i].addr;
543 boot_paramsp->e820_map[i].size = mem_mapp->map[i].size;
544 boot_paramsp->e820_map[i].type = mem_mapp->map[i].type;
545 if (mem_mapp->map[i].type == E820_SHARED)
546 shared_page_frame = (mem_mapp->map[i].addr >> PAGE_SHIFT);
547 }
548 munmap(boot_paramsp, PAGE_SIZE);
550 if ((boot_gdtp = xc_map_foreign_range(
551 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
552 page_array[(vboot_gdt_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
553 goto error_out;
554 memset(boot_gdtp, 0, PAGE_SIZE);
555 boot_gdtp[12*4 + 0] = boot_gdtp[13*4 + 0] = 0xffff; /* limit */
556 boot_gdtp[12*4 + 1] = boot_gdtp[13*4 + 1] = 0x0000; /* base */
557 boot_gdtp[12*4 + 2] = 0x9a00; boot_gdtp[13*4 + 2] = 0x9200; /* perms */
558 boot_gdtp[12*4 + 3] = boot_gdtp[13*4 + 3] = 0x00cf; /* granu + top of limit */
559 munmap(boot_gdtp, PAGE_SIZE);
561 /* shared_info page starts its life empty. */
562 if ((shared_info = xc_map_foreign_range(
563 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
564 shared_info_frame)) == 0)
565 goto error_out;
566 memset(shared_info, 0, sizeof(shared_info_t));
567 /* Mask all upcalls... */
568 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
569 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
570 munmap(shared_info, PAGE_SIZE);
572 /* Populate the event channel port in the shared page */
573 if ((sp = (shared_iopage_t *) xc_map_foreign_range(
574 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
575 page_array[shared_page_frame])) == 0)
576 goto error_out;
577 memset(sp, 0, PAGE_SIZE);
578 sp->sp_global.eport = control_evtchn;
579 munmap(sp, PAGE_SIZE);
581 /* Send the page update requests down to the hypervisor. */
582 if ( xc_finish_mmu_updates(xc_handle, mmu) )
583 goto error_out;
585 free(mmu);
586 free(page_array);
588 /*
589 * Initial register values:
590 */
591 ctxt->user_regs.ds = 0x68;
592 ctxt->user_regs.es = 0x0;
593 ctxt->user_regs.fs = 0x0;
594 ctxt->user_regs.gs = 0x0;
595 ctxt->user_regs.ss = 0x68;
596 ctxt->user_regs.cs = 0x60;
597 ctxt->user_regs.eip = dsi.v_kernentry;
598 ctxt->user_regs.edx = vboot_gdt_start;
599 ctxt->user_regs.eax = 0x800;
600 ctxt->user_regs.esp = vboot_gdt_end;
601 ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */
602 ctxt->user_regs.ecx = mem_mapp->nr_map;
603 ctxt->user_regs.esi = vboot_params_start;
604 ctxt->user_regs.edi = vboot_params_start + 0x2d0;
606 ctxt->user_regs.eflags = 0;
608 return 0;
610 error_out:
611 free(mmu);
612 free(page_array);
613 return -1;
614 }
617 #define VMX_FEATURE_FLAG 0x20
619 static int vmx_identify(void)
620 {
621 int eax, ecx;
623 #ifdef __i386__
624 __asm__ __volatile__ ("pushl %%ebx; cpuid; popl %%ebx"
625 : "=a" (eax), "=c" (ecx)
626 : "0" (1)
627 : "dx");
628 #elif defined __x86_64__
629 __asm__ __volatile__ ("pushq %%rbx; cpuid; popq %%rbx"
630 : "=a" (eax), "=c" (ecx)
631 : "0" (1)
632 : "dx");
633 #endif
635 if (!(ecx & VMX_FEATURE_FLAG)) {
636 return -1;
637 }
638 return 0;
639 }
641 int xc_vmx_build(int xc_handle,
642 u32 domid,
643 int memsize,
644 const char *image_name,
645 struct mem_map *mem_mapp,
646 const char *ramdisk_name,
647 const char *cmdline,
648 unsigned int control_evtchn,
649 unsigned long flags,
650 unsigned int vcpus,
651 unsigned int store_evtchn,
652 unsigned long *store_mfn)
653 {
654 dom0_op_t launch_op, op;
655 int initrd_fd = -1;
656 gzFile initrd_gfd = NULL;
657 int rc, i;
658 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
659 unsigned long nr_pages;
660 char *image = NULL;
661 unsigned long image_size, initrd_size=0;
663 if ( vmx_identify() < 0 )
664 {
665 PERROR("CPU doesn't support VMX Extensions");
666 goto error_out;
667 }
669 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
670 {
671 PERROR("Could not find total pages for domain");
672 goto error_out;
673 }
675 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
676 goto error_out;
678 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
679 {
680 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
681 {
682 PERROR("Could not open the initial ramdisk image");
683 goto error_out;
684 }
686 initrd_size = xc_get_filesz(initrd_fd);
688 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
689 {
690 PERROR("Could not allocate decompression state for initrd");
691 goto error_out;
692 }
693 }
695 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
696 {
697 PERROR("xc_vmx_build: ctxt mlock failed");
698 return 1;
699 }
701 op.cmd = DOM0_GETDOMAININFO;
702 op.u.getdomaininfo.domain = (domid_t)domid;
703 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
704 ((u16)op.u.getdomaininfo.domain != domid) )
705 {
706 PERROR("Could not get info on domain");
707 goto error_out;
708 }
710 if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
711 {
712 PERROR("Could not get vcpu context");
713 goto error_out;
714 }
716 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
717 (ctxt->ctrlreg[3] != 0) )
718 {
719 ERROR("Domain is already constructed");
720 goto error_out;
721 }
723 if ( setup_guest(xc_handle, domid, memsize, image, image_size,
724 initrd_gfd, initrd_size, nr_pages,
725 ctxt, cmdline,
726 op.u.getdomaininfo.shared_info_frame,
727 control_evtchn, flags, vcpus, store_evtchn, store_mfn,
728 mem_mapp) < 0 )
729 {
730 ERROR("Error constructing guest OS");
731 goto error_out;
732 }
734 if ( initrd_fd >= 0 )
735 close(initrd_fd);
736 if ( initrd_gfd )
737 gzclose(initrd_gfd);
738 free(image);
740 ctxt->flags = VGCF_VMX_GUEST;
741 /* FPU is set up to default initial state. */
742 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
744 /* Virtual IDT is empty at start-of-day. */
745 for ( i = 0; i < 256; i++ )
746 {
747 ctxt->trap_ctxt[i].vector = i;
748 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
749 }
751 /* No LDT. */
752 ctxt->ldt_ents = 0;
754 /* Use the default Xen-provided GDT. */
755 ctxt->gdt_ents = 0;
757 /* Ring 1 stack is the initial stack. */
758 /*
759 ctxt->kernel_ss = FLAT_KERNEL_DS;
760 ctxt->kernel_sp = vstartinfo_start;
761 */
762 /* No debugging. */
763 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
765 /* No callback handlers. */
766 #if defined(__i386__)
767 ctxt->event_callback_cs = FLAT_KERNEL_CS;
768 ctxt->event_callback_eip = 0;
769 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
770 ctxt->failsafe_callback_eip = 0;
771 #elif defined(__x86_64__)
772 ctxt->event_callback_eip = 0;
773 ctxt->failsafe_callback_eip = 0;
774 ctxt->syscall_callback_eip = 0;
775 #endif
777 memset( &launch_op, 0, sizeof(launch_op) );
779 launch_op.u.setdomaininfo.domain = (domid_t)domid;
780 launch_op.u.setdomaininfo.vcpu = 0;
781 launch_op.u.setdomaininfo.ctxt = ctxt;
783 launch_op.cmd = DOM0_SETDOMAININFO;
784 rc = xc_dom0_op(xc_handle, &launch_op);
786 return rc;
788 error_out:
789 if ( initrd_gfd != NULL )
790 gzclose(initrd_gfd);
791 else if ( initrd_fd >= 0 )
792 close(initrd_fd);
793 free(image);
795 return -1;
796 }
798 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
799 {
800 return ((phdr->p_type == PT_LOAD) &&
801 ((phdr->p_flags & (PF_W|PF_X)) != 0));
802 }
804 static int parseelfimage(char *elfbase,
805 unsigned long elfsize,
806 struct domain_setup_info *dsi)
807 {
808 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
809 Elf32_Phdr *phdr;
810 Elf32_Shdr *shdr;
811 unsigned long kernstart = ~0UL, kernend=0UL;
812 char *shstrtab;
813 int h;
815 if ( !IS_ELF(*ehdr) )
816 {
817 ERROR("Kernel image does not have an ELF header.");
818 return -EINVAL;
819 }
821 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
822 {
823 ERROR("ELF program headers extend beyond end of image.");
824 return -EINVAL;
825 }
827 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
828 {
829 ERROR("ELF section headers extend beyond end of image.");
830 return -EINVAL;
831 }
833 /* Find the section-header strings table. */
834 if ( ehdr->e_shstrndx == SHN_UNDEF )
835 {
836 ERROR("ELF image has no section-header strings table (shstrtab).");
837 return -EINVAL;
838 }
839 shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
840 (ehdr->e_shstrndx*ehdr->e_shentsize));
841 shstrtab = elfbase + shdr->sh_offset;
843 for ( h = 0; h < ehdr->e_phnum; h++ )
844 {
845 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
846 if ( !is_loadable_phdr(phdr) )
847 continue;
848 if ( phdr->p_paddr < kernstart )
849 kernstart = phdr->p_paddr;
850 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
851 kernend = phdr->p_paddr + phdr->p_memsz;
852 }
854 if ( (kernstart > kernend) ||
855 (ehdr->e_entry < kernstart) ||
856 (ehdr->e_entry > kernend) )
857 {
858 ERROR("Malformed ELF image.");
859 return -EINVAL;
860 }
862 dsi->v_start = 0x00000000;
864 dsi->v_kernstart = kernstart - LINUX_PAGE_OFFSET;
865 dsi->v_kernend = kernend - LINUX_PAGE_OFFSET;
866 dsi->v_kernentry = LINUX_KERNEL_ENTR_ADDR;
868 dsi->v_end = dsi->v_kernend;
870 return 0;
871 }
873 static int
874 loadelfimage(
875 char *elfbase, int xch, u32 dom, unsigned long *parray,
876 struct domain_setup_info *dsi)
877 {
878 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
879 Elf32_Phdr *phdr;
880 int h;
882 char *va;
883 unsigned long pa, done, chunksz;
885 for ( h = 0; h < ehdr->e_phnum; h++ )
886 {
887 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
888 if ( !is_loadable_phdr(phdr) )
889 continue;
891 for ( done = 0; done < phdr->p_filesz; done += chunksz )
892 {
893 pa = (phdr->p_paddr + done) - dsi->v_start - LINUX_PAGE_OFFSET;
894 if ((va = xc_map_foreign_range(
895 xch, dom, PAGE_SIZE, PROT_WRITE,
896 parray[pa>>PAGE_SHIFT])) == 0)
897 return -1;
898 chunksz = phdr->p_filesz - done;
899 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
900 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
901 memcpy(va + (pa & (PAGE_SIZE-1)),
902 elfbase + phdr->p_offset + done, chunksz);
903 munmap(va, PAGE_SIZE);
904 }
906 for ( ; done < phdr->p_memsz; done += chunksz )
907 {
908 pa = (phdr->p_paddr + done) - dsi->v_start - LINUX_PAGE_OFFSET;
909 if ((va = xc_map_foreign_range(
910 xch, dom, PAGE_SIZE, PROT_WRITE,
911 parray[pa>>PAGE_SHIFT])) == 0)
912 return -1;
913 chunksz = phdr->p_memsz - done;
914 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
915 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
916 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
917 munmap(va, PAGE_SIZE);
918 }
919 }
921 return 0;
922 }