ia64/xen-unstable

view tools/libxc/xc_vmx_build.c @ 6946:e703abaf6e3d

Add behaviour to the remove methods to remove the transaction's path itself. This allows us to write Remove(path) to remove the specified path rather than having to slice the path ourselves.
author emellor@ewan
date Sun Sep 18 14:42:13 2005 +0100 (2005-09-18)
parents e78650eb2cf0
children 619e3d6f01b3 3133e64d0462
line source
1 /******************************************************************************
2 * xc_vmx_build.c
3 */
5 #include <stddef.h>
6 #include "xg_private.h"
7 #define ELFSIZE 32
8 #include "xc_elf.h"
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <zlib.h>
12 #include <xen/io/ioreq.h>
13 #include "linux_boot_params.h"
15 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
16 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
17 #ifdef __x86_64__
18 #define L3_PROT (_PAGE_PRESENT)
19 #endif
21 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
22 #define round_pgdown(_p) ((_p)&PAGE_MASK)
24 #define LINUX_BOOT_PARAMS_ADDR 0x00090000
25 #define LINUX_KERNEL_ENTR_ADDR 0x00100000
26 #define LINUX_PAGE_OFFSET 0xC0000000
28 static int
29 parseelfimage(
30 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
31 static int
32 loadelfimage(
33 char *elfbase, int xch, u32 dom, unsigned long *parray,
34 struct domain_setup_info *dsi);
36 static void build_e820map(struct mem_map *mem_mapp, unsigned long mem_size)
37 {
38 int nr_map = 0;
40 /* XXX: Doesn't work for > 4GB yet */
41 mem_mapp->map[nr_map].addr = 0x0;
42 mem_mapp->map[nr_map].size = 0x9F800;
43 mem_mapp->map[nr_map].type = E820_RAM;
44 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
45 nr_map++;
47 mem_mapp->map[nr_map].addr = 0x9F800;
48 mem_mapp->map[nr_map].size = 0x800;
49 mem_mapp->map[nr_map].type = E820_RESERVED;
50 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
51 nr_map++;
53 mem_mapp->map[nr_map].addr = 0xA0000;
54 mem_mapp->map[nr_map].size = 0x20000;
55 mem_mapp->map[nr_map].type = E820_IO;
56 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
57 nr_map++;
59 mem_mapp->map[nr_map].addr = 0xF0000;
60 mem_mapp->map[nr_map].size = 0x10000;
61 mem_mapp->map[nr_map].type = E820_RESERVED;
62 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
63 nr_map++;
65 #define STATIC_PAGES 2 /* for ioreq_t and store_mfn */
66 /* Most of the ram goes here */
67 mem_mapp->map[nr_map].addr = 0x100000;
68 mem_mapp->map[nr_map].size = mem_size - 0x100000 - STATIC_PAGES*PAGE_SIZE;
69 mem_mapp->map[nr_map].type = E820_RAM;
70 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
71 nr_map++;
73 /* Statically allocated special pages */
75 /* Shared ioreq_t page */
76 mem_mapp->map[nr_map].addr = mem_size - PAGE_SIZE;
77 mem_mapp->map[nr_map].size = PAGE_SIZE;
78 mem_mapp->map[nr_map].type = E820_SHARED;
79 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
80 nr_map++;
82 /* For xenstore */
83 mem_mapp->map[nr_map].addr = mem_size - 2*PAGE_SIZE;
84 mem_mapp->map[nr_map].size = PAGE_SIZE;
85 mem_mapp->map[nr_map].type = E820_XENSTORE;
86 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
87 nr_map++;
89 mem_mapp->map[nr_map].addr = mem_size;
90 mem_mapp->map[nr_map].size = 0x3 * PAGE_SIZE;
91 mem_mapp->map[nr_map].type = E820_NVS;
92 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
93 nr_map++;
95 mem_mapp->map[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
96 mem_mapp->map[nr_map].size = 0xA * PAGE_SIZE;
97 mem_mapp->map[nr_map].type = E820_ACPI;
98 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
99 nr_map++;
101 mem_mapp->map[nr_map].addr = 0xFEC00000;
102 mem_mapp->map[nr_map].size = 0x1400000;
103 mem_mapp->map[nr_map].type = E820_IO;
104 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
105 nr_map++;
107 mem_mapp->nr_map = nr_map;
108 }
110 #ifdef __i386__
111 static int zap_mmio_range(int xc_handle, u32 dom,
112 l2_pgentry_32_t *vl2tab,
113 unsigned long mmio_range_start,
114 unsigned long mmio_range_size)
115 {
116 unsigned long mmio_addr;
117 unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
118 unsigned long vl2e;
119 l1_pgentry_32_t *vl1tab;
121 mmio_addr = mmio_range_start & PAGE_MASK;
122 for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
123 vl2e = vl2tab[l2_table_offset(mmio_addr)];
124 if (vl2e == 0)
125 continue;
126 vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
127 PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
128 if (vl1tab == 0) {
129 PERROR("Failed zap MMIO range");
130 return -1;
131 }
132 vl1tab[l1_table_offset(mmio_addr)] = 0;
133 munmap(vl1tab, PAGE_SIZE);
134 }
135 return 0;
136 }
138 static int zap_mmio_ranges(int xc_handle, u32 dom,
139 unsigned long l2tab,
140 struct mem_map *mem_mapp)
141 {
142 int i;
143 l2_pgentry_32_t *vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
144 PROT_READ|PROT_WRITE,
145 l2tab >> PAGE_SHIFT);
146 if (vl2tab == 0)
147 return -1;
148 for (i = 0; i < mem_mapp->nr_map; i++) {
149 if ((mem_mapp->map[i].type == E820_IO)
150 && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
151 if (zap_mmio_range(xc_handle, dom, vl2tab,
152 mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
153 return -1;
154 }
155 munmap(vl2tab, PAGE_SIZE);
156 return 0;
157 }
158 #else
159 static int zap_mmio_range(int xc_handle, u32 dom,
160 l3_pgentry_t *vl3tab,
161 unsigned long mmio_range_start,
162 unsigned long mmio_range_size)
163 {
164 unsigned long mmio_addr;
165 unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
166 unsigned long vl2e = 0;
167 unsigned long vl3e;
168 l1_pgentry_t *vl1tab;
169 l2_pgentry_t *vl2tab;
171 mmio_addr = mmio_range_start & PAGE_MASK;
172 for ( ; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE )
173 {
174 vl3e = vl3tab[l3_table_offset(mmio_addr)];
175 if ( vl3e == 0 )
176 continue;
178 vl2tab = xc_map_foreign_range(
179 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl3e>>PAGE_SHIFT);
180 if ( vl2tab == NULL )
181 {
182 PERROR("Failed zap MMIO range");
183 return -1;
184 }
186 vl2e = vl2tab[l2_table_offset(mmio_addr)];
187 if ( vl2e == 0 )
188 {
189 munmap(vl2tab, PAGE_SIZE);
190 continue;
191 }
193 vl1tab = xc_map_foreign_range(
194 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl2e>>PAGE_SHIFT);
195 if ( vl1tab == NULL )
196 {
197 PERROR("Failed zap MMIO range");
198 munmap(vl2tab, PAGE_SIZE);
199 return -1;
200 }
202 vl1tab[l1_table_offset(mmio_addr)] = 0;
203 munmap(vl2tab, PAGE_SIZE);
204 munmap(vl1tab, PAGE_SIZE);
205 }
206 return 0;
207 }
209 static int zap_mmio_ranges(int xc_handle, u32 dom,
210 unsigned long l3tab,
211 struct mem_map *mem_mapp)
212 {
213 int i;
214 l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
215 PROT_READ|PROT_WRITE,
216 l3tab >> PAGE_SHIFT);
217 if (vl3tab == 0)
218 return -1;
219 for (i = 0; i < mem_mapp->nr_map; i++) {
220 if ((mem_mapp->map[i].type == E820_IO)
221 && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
222 if (zap_mmio_range(xc_handle, dom, vl3tab,
223 mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
224 return -1;
225 }
226 munmap(vl3tab, PAGE_SIZE);
227 return 0;
228 }
230 #endif
232 static int setup_guest(int xc_handle,
233 u32 dom, int memsize,
234 char *image, unsigned long image_size,
235 gzFile initrd_gfd, unsigned long initrd_len,
236 unsigned long nr_pages,
237 vcpu_guest_context_t *ctxt,
238 const char *cmdline,
239 unsigned long shared_info_frame,
240 unsigned int control_evtchn,
241 unsigned long flags,
242 unsigned int vcpus,
243 unsigned int store_evtchn,
244 unsigned long *store_mfn,
245 struct mem_map *mem_mapp
246 )
247 {
248 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
249 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
250 unsigned long *page_array = NULL;
251 #ifdef __x86_64__
252 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
253 unsigned long l3tab;
254 #endif
255 unsigned long l2tab;
256 unsigned long l1tab;
257 unsigned long count, i;
258 shared_info_t *shared_info;
259 struct linux_boot_params * boot_paramsp;
260 __u16 * boot_gdtp;
261 xc_mmu_t *mmu = NULL;
262 int rc;
264 unsigned long nr_pt_pages;
265 unsigned long ppt_alloc;
267 struct domain_setup_info dsi;
268 unsigned long vinitrd_start;
269 unsigned long vinitrd_end;
270 unsigned long vboot_params_start;
271 unsigned long vboot_params_end;
272 unsigned long vboot_gdt_start;
273 unsigned long vboot_gdt_end;
274 unsigned long vpt_start;
275 unsigned long vpt_end;
276 unsigned long v_end;
278 unsigned long shared_page_frame = 0;
279 shared_iopage_t *sp;
281 memset(&dsi, 0, sizeof(struct domain_setup_info));
283 if ( (rc = parseelfimage(image, image_size, &dsi)) != 0 )
284 goto error_out;
286 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
287 {
288 PERROR("Guest OS must load to a page boundary.\n");
289 goto error_out;
290 }
292 /*
293 * Why do we need this? The number of page-table frames depends on the
294 * size of the bootstrap address space. But the size of the address space
295 * depends on the number of page-table frames (since each one is mapped
296 * read-only). We have a pair of simultaneous equations in two unknowns,
297 * which we solve by exhaustive search.
298 */
299 vboot_params_start = LINUX_BOOT_PARAMS_ADDR;
300 vboot_params_end = vboot_params_start + PAGE_SIZE;
301 vboot_gdt_start = vboot_params_end;
302 vboot_gdt_end = vboot_gdt_start + PAGE_SIZE;
304 /* memsize is in megabytes */
305 v_end = memsize << 20;
306 vinitrd_end = v_end - PAGE_SIZE; /* leaving the top 4k untouched for IO requests page use */
307 vinitrd_start = vinitrd_end - initrd_len;
308 vinitrd_start = vinitrd_start & (~(PAGE_SIZE - 1));
310 if(initrd_len == 0)
311 vinitrd_start = vinitrd_end = 0;
313 #ifdef __i386__
314 nr_pt_pages = 1 + ((memsize + 3) >> 2);
315 #else
316 nr_pt_pages = 5 + ((memsize + 1) >> 1);
317 #endif
318 vpt_start = v_end;
319 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
321 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
322 " Boot_params: %08lx->%08lx\n"
323 " boot_gdt: %08lx->%08lx\n"
324 " Loaded kernel: %08lx->%08lx\n"
325 " Init. ramdisk: %08lx->%08lx\n"
326 " Page tables: %08lx->%08lx\n"
327 " TOTAL: %08lx->%08lx\n",
328 vboot_params_start, vboot_params_end,
329 vboot_gdt_start, vboot_gdt_end,
330 dsi.v_kernstart, dsi.v_kernend,
331 vinitrd_start, vinitrd_end,
332 vpt_start, vpt_end,
333 dsi.v_start, v_end);
334 printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
335 printf(" INITRD LENGTH: %08lx\n", initrd_len);
337 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
338 {
339 printf("Initial guest OS requires too much space\n"
340 "(%luMB is greater than %luMB limit)\n",
341 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
342 goto error_out;
343 }
345 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
346 {
347 PERROR("Could not allocate memory");
348 goto error_out;
349 }
351 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
352 {
353 PERROR("Could not get the page frame list");
354 goto error_out;
355 }
357 loadelfimage(image, xc_handle, dom, page_array, &dsi);
359 /* Load the initial ramdisk image. */
360 if ( initrd_len != 0 )
361 {
362 for ( i = (vinitrd_start - dsi.v_start);
363 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
364 {
365 char page[PAGE_SIZE];
366 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
367 {
368 PERROR("Error reading initrd image, could not");
369 goto error_out;
370 }
371 xc_copy_to_domain_page(xc_handle, dom,
372 page_array[i>>PAGE_SHIFT], page);
373 }
374 }
376 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
377 goto error_out;
379 /* First allocate page for page dir or pdpt */
380 ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
381 if ( page_array[ppt_alloc] > 0xfffff )
382 {
383 unsigned long nmfn;
384 nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
385 if ( nmfn == 0 )
386 {
387 fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
388 goto error_out;
389 }
390 page_array[ppt_alloc] = nmfn;
391 }
393 #ifdef __i386__
394 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
395 ctxt->ctrlreg[3] = l2tab;
397 /* Initialise the page tables. */
398 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
399 PROT_READ|PROT_WRITE,
400 l2tab >> PAGE_SHIFT)) == NULL )
401 goto error_out;
402 memset(vl2tab, 0, PAGE_SIZE);
403 vl2e = &vl2tab[l2_table_offset(dsi.v_start)];
404 for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
405 {
406 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
407 {
408 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
409 if ( vl1tab != NULL )
410 munmap(vl1tab, PAGE_SIZE);
411 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
412 PROT_READ|PROT_WRITE,
413 l1tab >> PAGE_SHIFT)) == NULL )
414 {
415 munmap(vl2tab, PAGE_SIZE);
416 goto error_out;
417 }
418 memset(vl1tab, 0, PAGE_SIZE);
419 vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
420 *vl2e++ = l1tab | L2_PROT;
421 }
423 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
424 vl1e++;
425 }
426 munmap(vl1tab, PAGE_SIZE);
427 munmap(vl2tab, PAGE_SIZE);
428 #else
429 /* here l3tab means pdpt, only 4 entry is used */
430 l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
431 ctxt->ctrlreg[3] = l3tab;
433 /* Initialise the page tables. */
434 if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
435 PROT_READ|PROT_WRITE,
436 l3tab >> PAGE_SHIFT)) == NULL )
437 goto error_out;
438 memset(vl3tab, 0, PAGE_SIZE);
440 vl3e = &vl3tab[l3_table_offset(dsi.v_start)];
442 for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
443 {
444 if (!(count % (1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))){
445 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
447 if (vl2tab != NULL)
448 munmap(vl2tab, PAGE_SIZE);
450 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
451 PROT_READ|PROT_WRITE,
452 l2tab >> PAGE_SHIFT)) == NULL )
453 goto error_out;
455 memset(vl2tab, 0, PAGE_SIZE);
456 *vl3e++ = l2tab | L3_PROT;
457 vl2e = &vl2tab[l2_table_offset(dsi.v_start + (count << PAGE_SHIFT))];
458 }
459 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
460 {
461 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
462 if ( vl1tab != NULL )
463 munmap(vl1tab, PAGE_SIZE);
464 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
465 PROT_READ|PROT_WRITE,
466 l1tab >> PAGE_SHIFT)) == NULL )
467 {
468 munmap(vl2tab, PAGE_SIZE);
469 goto error_out;
470 }
471 memset(vl1tab, 0, PAGE_SIZE);
472 vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
473 *vl2e++ = l1tab | L2_PROT;
474 }
476 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
477 vl1e++;
478 }
480 munmap(vl1tab, PAGE_SIZE);
481 munmap(vl2tab, PAGE_SIZE);
482 munmap(vl3tab, PAGE_SIZE);
483 #endif
484 /* Write the machine->phys table entries. */
485 for ( count = 0; count < nr_pages; count++ )
486 {
487 if ( xc_add_mmu_update(xc_handle, mmu,
488 (page_array[count] << PAGE_SHIFT) |
489 MMU_MACHPHYS_UPDATE, count) )
490 goto error_out;
491 }
494 if ((boot_paramsp = xc_map_foreign_range(
495 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
496 page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
497 goto error_out;
499 memset(boot_paramsp, 0, sizeof(*boot_paramsp));
501 strncpy((char *)boot_paramsp->cmd_line, cmdline, 0x800);
502 boot_paramsp->cmd_line[0x800-1] = '\0';
503 boot_paramsp->cmd_line_ptr = ((unsigned long) vboot_params_start) + offsetof(struct linux_boot_params, cmd_line);
505 boot_paramsp->setup_sects = 0;
506 boot_paramsp->mount_root_rdonly = 1;
507 boot_paramsp->swapdev = 0x0;
508 boot_paramsp->ramdisk_flags = 0x0;
509 boot_paramsp->root_dev = 0x0; /* We must tell kernel root dev by kernel command line. */
511 /* we don't have a ps/2 mouse now.
512 * 0xAA means a aux mouse is there.
513 * See detect_auxiliary_port() in pc_keyb.c.
514 */
515 boot_paramsp->aux_device_info = 0x0;
517 boot_paramsp->header_magic[0] = 0x48; /* "H" */
518 boot_paramsp->header_magic[1] = 0x64; /* "d" */
519 boot_paramsp->header_magic[2] = 0x72; /* "r" */
520 boot_paramsp->header_magic[3] = 0x53; /* "S" */
522 boot_paramsp->protocol_version = 0x0203; /* 2.03 */
523 boot_paramsp->loader_type = 0x71; /* GRUB */
524 boot_paramsp->loader_flags = 0x1; /* loaded high */
525 boot_paramsp->code32_start = LINUX_KERNEL_ENTR_ADDR; /* 1MB */
526 boot_paramsp->initrd_start = vinitrd_start;
527 boot_paramsp->initrd_size = initrd_len;
529 i = ((memsize - 1) << 10) - 4;
530 boot_paramsp->alt_mem_k = i; /* alt_mem_k */
531 boot_paramsp->screen.overlap.ext_mem_k = i & 0xFFFF; /* ext_mem_k */
533 /*
534 * Stuff SCREAN_INFO
535 */
536 boot_paramsp->screen.info.orig_x = 0;
537 boot_paramsp->screen.info.orig_y = 0;
538 boot_paramsp->screen.info.orig_video_page = 8;
539 boot_paramsp->screen.info.orig_video_mode = 3;
540 boot_paramsp->screen.info.orig_video_cols = 80;
541 boot_paramsp->screen.info.orig_video_ega_bx = 0;
542 boot_paramsp->screen.info.orig_video_lines = 25;
543 boot_paramsp->screen.info.orig_video_isVGA = 1;
544 boot_paramsp->screen.info.orig_video_points = 0x0010;
546 /* seems we may NOT stuff boot_paramsp->apm_bios_info */
547 /* seems we may NOT stuff boot_paramsp->drive_info */
548 /* seems we may NOT stuff boot_paramsp->sys_desc_table */
549 *((unsigned short *) &boot_paramsp->drive_info.dummy[0]) = 800;
550 boot_paramsp->drive_info.dummy[2] = 4;
551 boot_paramsp->drive_info.dummy[14] = 32;
553 /* memsize is in megabytes */
554 /* If you need to create a special e820map, comment this line
555 and use mem-map.sxp */
556 build_e820map(mem_mapp, memsize << 20);
557 *store_mfn = page_array[(v_end-2) >> PAGE_SHIFT];
558 #if defined (__i386__)
559 if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1)
560 #else
561 if (zap_mmio_ranges(xc_handle, dom, l3tab, mem_mapp) == -1)
562 #endif
563 goto error_out;
564 boot_paramsp->e820_map_nr = mem_mapp->nr_map;
565 for (i=0; i<mem_mapp->nr_map; i++) {
566 boot_paramsp->e820_map[i].addr = mem_mapp->map[i].addr;
567 boot_paramsp->e820_map[i].size = mem_mapp->map[i].size;
568 boot_paramsp->e820_map[i].type = mem_mapp->map[i].type;
569 if (mem_mapp->map[i].type == E820_SHARED)
570 shared_page_frame = (mem_mapp->map[i].addr >> PAGE_SHIFT);
571 }
572 munmap(boot_paramsp, PAGE_SIZE);
574 if ((boot_gdtp = xc_map_foreign_range(
575 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
576 page_array[(vboot_gdt_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
577 goto error_out;
578 memset(boot_gdtp, 0, PAGE_SIZE);
579 boot_gdtp[12*4 + 0] = boot_gdtp[13*4 + 0] = 0xffff; /* limit */
580 boot_gdtp[12*4 + 1] = boot_gdtp[13*4 + 1] = 0x0000; /* base */
581 boot_gdtp[12*4 + 2] = 0x9a00; boot_gdtp[13*4 + 2] = 0x9200; /* perms */
582 boot_gdtp[12*4 + 3] = boot_gdtp[13*4 + 3] = 0x00cf; /* granu + top of limit */
583 munmap(boot_gdtp, PAGE_SIZE);
585 /* shared_info page starts its life empty. */
586 if ((shared_info = xc_map_foreign_range(
587 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
588 shared_info_frame)) == 0)
589 goto error_out;
590 memset(shared_info, 0, sizeof(shared_info_t));
591 /* Mask all upcalls... */
592 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
593 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
594 munmap(shared_info, PAGE_SIZE);
596 /* Populate the event channel port in the shared page */
597 if ((sp = (shared_iopage_t *) xc_map_foreign_range(
598 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
599 page_array[shared_page_frame])) == 0)
600 goto error_out;
601 memset(sp, 0, PAGE_SIZE);
602 sp->sp_global.eport = control_evtchn;
603 munmap(sp, PAGE_SIZE);
605 /* Send the page update requests down to the hypervisor. */
606 if ( xc_finish_mmu_updates(xc_handle, mmu) )
607 goto error_out;
609 free(mmu);
610 free(page_array);
612 /*
613 * Initial register values:
614 */
615 ctxt->user_regs.ds = 0x68;
616 ctxt->user_regs.es = 0x0;
617 ctxt->user_regs.fs = 0x0;
618 ctxt->user_regs.gs = 0x0;
619 ctxt->user_regs.ss = 0x68;
620 ctxt->user_regs.cs = 0x60;
621 ctxt->user_regs.eip = dsi.v_kernentry;
622 ctxt->user_regs.edx = vboot_gdt_start;
623 ctxt->user_regs.eax = 0x800;
624 ctxt->user_regs.esp = vboot_gdt_end;
625 ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */
626 ctxt->user_regs.ecx = mem_mapp->nr_map;
627 ctxt->user_regs.esi = vboot_params_start;
628 ctxt->user_regs.edi = vboot_params_start + 0x2d0;
630 ctxt->user_regs.eflags = 0;
632 return 0;
634 error_out:
635 free(mmu);
636 free(page_array);
637 return -1;
638 }
641 #define VMX_FEATURE_FLAG 0x20
643 static int vmx_identify(void)
644 {
645 int eax, ecx;
647 #ifdef __i386__
648 __asm__ __volatile__ ("pushl %%ebx; cpuid; popl %%ebx"
649 : "=a" (eax), "=c" (ecx)
650 : "0" (1)
651 : "dx");
652 #elif defined __x86_64__
653 __asm__ __volatile__ ("pushq %%rbx; cpuid; popq %%rbx"
654 : "=a" (eax), "=c" (ecx)
655 : "0" (1)
656 : "dx");
657 #endif
659 if (!(ecx & VMX_FEATURE_FLAG)) {
660 return -1;
661 }
662 return 0;
663 }
665 int xc_vmx_build(int xc_handle,
666 u32 domid,
667 int memsize,
668 const char *image_name,
669 struct mem_map *mem_mapp,
670 const char *ramdisk_name,
671 const char *cmdline,
672 unsigned int control_evtchn,
673 unsigned long flags,
674 unsigned int vcpus,
675 unsigned int store_evtchn,
676 unsigned long *store_mfn)
677 {
678 dom0_op_t launch_op, op;
679 int initrd_fd = -1;
680 gzFile initrd_gfd = NULL;
681 int rc, i;
682 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
683 unsigned long nr_pages;
684 char *image = NULL;
685 unsigned long image_size, initrd_size=0;
687 if ( vmx_identify() < 0 )
688 {
689 PERROR("CPU doesn't support VMX Extensions");
690 goto error_out;
691 }
693 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
694 {
695 PERROR("Could not find total pages for domain");
696 goto error_out;
697 }
699 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
700 goto error_out;
702 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
703 {
704 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
705 {
706 PERROR("Could not open the initial ramdisk image");
707 goto error_out;
708 }
710 initrd_size = xc_get_filesz(initrd_fd);
712 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
713 {
714 PERROR("Could not allocate decompression state for initrd");
715 goto error_out;
716 }
717 }
719 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
720 {
721 PERROR("xc_vmx_build: ctxt mlock failed");
722 return 1;
723 }
725 op.cmd = DOM0_GETDOMAININFO;
726 op.u.getdomaininfo.domain = (domid_t)domid;
727 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
728 ((u16)op.u.getdomaininfo.domain != domid) )
729 {
730 PERROR("Could not get info on domain");
731 goto error_out;
732 }
734 if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
735 {
736 PERROR("Could not get vcpu context");
737 goto error_out;
738 }
740 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
741 (ctxt->ctrlreg[3] != 0) )
742 {
743 ERROR("Domain is already constructed");
744 goto error_out;
745 }
747 if ( setup_guest(xc_handle, domid, memsize, image, image_size,
748 initrd_gfd, initrd_size, nr_pages,
749 ctxt, cmdline,
750 op.u.getdomaininfo.shared_info_frame,
751 control_evtchn, flags, vcpus, store_evtchn, store_mfn,
752 mem_mapp) < 0 )
753 {
754 ERROR("Error constructing guest OS");
755 goto error_out;
756 }
758 if ( initrd_fd >= 0 )
759 close(initrd_fd);
760 if ( initrd_gfd )
761 gzclose(initrd_gfd);
762 free(image);
764 ctxt->flags = VGCF_VMX_GUEST;
765 /* FPU is set up to default initial state. */
766 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
768 /* Virtual IDT is empty at start-of-day. */
769 for ( i = 0; i < 256; i++ )
770 {
771 ctxt->trap_ctxt[i].vector = i;
772 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
773 }
775 /* No LDT. */
776 ctxt->ldt_ents = 0;
778 /* Use the default Xen-provided GDT. */
779 ctxt->gdt_ents = 0;
781 /* Ring 1 stack is the initial stack. */
782 /*
783 ctxt->kernel_ss = FLAT_KERNEL_DS;
784 ctxt->kernel_sp = vstartinfo_start;
785 */
786 /* No debugging. */
787 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
789 /* No callback handlers. */
790 #if defined(__i386__)
791 ctxt->event_callback_cs = FLAT_KERNEL_CS;
792 ctxt->event_callback_eip = 0;
793 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
794 ctxt->failsafe_callback_eip = 0;
795 #elif defined(__x86_64__)
796 ctxt->event_callback_eip = 0;
797 ctxt->failsafe_callback_eip = 0;
798 ctxt->syscall_callback_eip = 0;
799 #endif
801 memset( &launch_op, 0, sizeof(launch_op) );
803 launch_op.u.setdomaininfo.domain = (domid_t)domid;
804 launch_op.u.setdomaininfo.vcpu = 0;
805 launch_op.u.setdomaininfo.ctxt = ctxt;
807 launch_op.cmd = DOM0_SETDOMAININFO;
808 rc = xc_dom0_op(xc_handle, &launch_op);
810 return rc;
812 error_out:
813 if ( initrd_gfd != NULL )
814 gzclose(initrd_gfd);
815 else if ( initrd_fd >= 0 )
816 close(initrd_fd);
817 free(image);
819 return -1;
820 }
822 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
823 {
824 return ((phdr->p_type == PT_LOAD) &&
825 ((phdr->p_flags & (PF_W|PF_X)) != 0));
826 }
828 static int parseelfimage(char *elfbase,
829 unsigned long elfsize,
830 struct domain_setup_info *dsi)
831 {
832 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
833 Elf32_Phdr *phdr;
834 Elf32_Shdr *shdr;
835 unsigned long kernstart = ~0UL, kernend=0UL;
836 char *shstrtab;
837 int h;
839 if ( !IS_ELF(*ehdr) )
840 {
841 ERROR("Kernel image does not have an ELF header.");
842 return -EINVAL;
843 }
845 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
846 {
847 ERROR("ELF program headers extend beyond end of image.");
848 return -EINVAL;
849 }
851 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
852 {
853 ERROR("ELF section headers extend beyond end of image.");
854 return -EINVAL;
855 }
857 /* Find the section-header strings table. */
858 if ( ehdr->e_shstrndx == SHN_UNDEF )
859 {
860 ERROR("ELF image has no section-header strings table (shstrtab).");
861 return -EINVAL;
862 }
863 shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
864 (ehdr->e_shstrndx*ehdr->e_shentsize));
865 shstrtab = elfbase + shdr->sh_offset;
867 for ( h = 0; h < ehdr->e_phnum; h++ )
868 {
869 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
870 if ( !is_loadable_phdr(phdr) )
871 continue;
872 if ( phdr->p_paddr < kernstart )
873 kernstart = phdr->p_paddr;
874 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
875 kernend = phdr->p_paddr + phdr->p_memsz;
876 }
878 if ( (kernstart > kernend) ||
879 (ehdr->e_entry < kernstart) ||
880 (ehdr->e_entry > kernend) )
881 {
882 ERROR("Malformed ELF image.");
883 return -EINVAL;
884 }
886 dsi->v_start = 0x00000000;
888 dsi->v_kernstart = kernstart - LINUX_PAGE_OFFSET;
889 dsi->v_kernend = kernend - LINUX_PAGE_OFFSET;
890 dsi->v_kernentry = LINUX_KERNEL_ENTR_ADDR;
892 dsi->v_end = dsi->v_kernend;
894 return 0;
895 }
897 static int
898 loadelfimage(
899 char *elfbase, int xch, u32 dom, unsigned long *parray,
900 struct domain_setup_info *dsi)
901 {
902 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
903 Elf32_Phdr *phdr;
904 int h;
906 char *va;
907 unsigned long pa, done, chunksz;
909 for ( h = 0; h < ehdr->e_phnum; h++ )
910 {
911 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
912 if ( !is_loadable_phdr(phdr) )
913 continue;
915 for ( done = 0; done < phdr->p_filesz; done += chunksz )
916 {
917 pa = (phdr->p_paddr + done) - dsi->v_start - LINUX_PAGE_OFFSET;
918 if ((va = xc_map_foreign_range(
919 xch, dom, PAGE_SIZE, PROT_WRITE,
920 parray[pa>>PAGE_SHIFT])) == 0)
921 return -1;
922 chunksz = phdr->p_filesz - done;
923 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
924 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
925 memcpy(va + (pa & (PAGE_SIZE-1)),
926 elfbase + phdr->p_offset + done, chunksz);
927 munmap(va, PAGE_SIZE);
928 }
930 for ( ; done < phdr->p_memsz; done += chunksz )
931 {
932 pa = (phdr->p_paddr + done) - dsi->v_start - LINUX_PAGE_OFFSET;
933 if ((va = xc_map_foreign_range(
934 xch, dom, PAGE_SIZE, PROT_WRITE,
935 parray[pa>>PAGE_SHIFT])) == 0)
936 return -1;
937 chunksz = phdr->p_memsz - done;
938 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
939 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
940 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
941 munmap(va, PAGE_SIZE);
942 }
943 }
945 return 0;
946 }