ia64/xen-unstable

view tools/libxc/xc_vmx_build.c @ 6427:3428d58a85e1

merge?
author cl349@firebug.cl.cam.ac.uk
date Thu Aug 25 14:41:52 2005 +0000 (2005-08-25)
parents 4abd299ef2f6 fdfd511768a3
children b54144915ae6
line source
1 /******************************************************************************
2 * xc_vmx_build.c
3 */
5 #include <stddef.h>
6 #include "xg_private.h"
7 #define ELFSIZE 32
8 #include "xc_elf.h"
9 #include <stdlib.h>
10 #include <zlib.h>
11 #include <xen/io/ioreq.h>
12 #include "linux_boot_params.h"
14 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
15 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
16 #ifdef __x86_64__
17 #define L3_PROT (_PAGE_PRESENT)
18 #endif
20 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
21 #define round_pgdown(_p) ((_p)&PAGE_MASK)
23 #define LINUX_BOOT_PARAMS_ADDR 0x00090000
24 #define LINUX_KERNEL_ENTR_ADDR 0x00100000
25 #define LINUX_PAGE_OFFSET 0xC0000000
27 static int
28 parseelfimage(
29 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
30 static int
31 loadelfimage(
32 char *elfbase, int xch, u32 dom, unsigned long *parray,
33 struct domain_setup_info *dsi);
35 static void build_e820map(struct mem_map *mem_mapp, unsigned long mem_size)
36 {
37 int nr_map = 0;
39 /* XXX: Doesn't work for > 4GB yet */
40 mem_mapp->map[nr_map].addr = 0x0;
41 mem_mapp->map[nr_map].size = 0x9F800;
42 mem_mapp->map[nr_map].type = E820_RAM;
43 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
44 nr_map++;
46 mem_mapp->map[nr_map].addr = 0x9F800;
47 mem_mapp->map[nr_map].size = 0x800;
48 mem_mapp->map[nr_map].type = E820_RESERVED;
49 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
50 nr_map++;
52 mem_mapp->map[nr_map].addr = 0xA0000;
53 mem_mapp->map[nr_map].size = 0x20000;
54 mem_mapp->map[nr_map].type = E820_IO;
55 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
56 nr_map++;
58 mem_mapp->map[nr_map].addr = 0xF0000;
59 mem_mapp->map[nr_map].size = 0x10000;
60 mem_mapp->map[nr_map].type = E820_RESERVED;
61 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
62 nr_map++;
64 #define STATIC_PAGES 2 /* for ioreq_t and store_mfn */
65 /* Most of the ram goes here */
66 mem_mapp->map[nr_map].addr = 0x100000;
67 mem_mapp->map[nr_map].size = mem_size - 0x100000 - STATIC_PAGES*PAGE_SIZE;
68 mem_mapp->map[nr_map].type = E820_RAM;
69 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
70 nr_map++;
72 /* Statically allocated special pages */
74 /* Shared ioreq_t page */
75 mem_mapp->map[nr_map].addr = mem_size - PAGE_SIZE;
76 mem_mapp->map[nr_map].size = PAGE_SIZE;
77 mem_mapp->map[nr_map].type = E820_SHARED;
78 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
79 nr_map++;
81 /* For xenstore */
82 mem_mapp->map[nr_map].addr = mem_size - 2*PAGE_SIZE;
83 mem_mapp->map[nr_map].size = PAGE_SIZE;
84 mem_mapp->map[nr_map].type = E820_XENSTORE;
85 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
86 nr_map++;
88 mem_mapp->map[nr_map].addr = mem_size;
89 mem_mapp->map[nr_map].size = 0x3 * PAGE_SIZE;
90 mem_mapp->map[nr_map].type = E820_NVS;
91 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
92 nr_map++;
94 mem_mapp->map[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
95 mem_mapp->map[nr_map].size = 0xA * PAGE_SIZE;
96 mem_mapp->map[nr_map].type = E820_ACPI;
97 mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
98 nr_map++;
100 mem_mapp->map[nr_map].addr = 0xFEC00000;
101 mem_mapp->map[nr_map].size = 0x1400000;
102 mem_mapp->map[nr_map].type = E820_IO;
103 mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
104 nr_map++;
106 mem_mapp->nr_map = nr_map;
107 }
109 #ifdef __i386__
110 static int zap_mmio_range(int xc_handle, u32 dom,
111 l2_pgentry_32_t *vl2tab,
112 unsigned long mmio_range_start,
113 unsigned long mmio_range_size)
114 {
115 unsigned long mmio_addr;
116 unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
117 unsigned long vl2e;
118 l1_pgentry_32_t *vl1tab;
120 mmio_addr = mmio_range_start & PAGE_MASK;
121 for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
122 vl2e = vl2tab[l2_table_offset(mmio_addr)];
123 if (vl2e == 0)
124 continue;
125 vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
126 PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
127 if (vl1tab == 0) {
128 PERROR("Failed zap MMIO range");
129 return -1;
130 }
131 vl1tab[l1_table_offset(mmio_addr)] = 0;
132 munmap(vl1tab, PAGE_SIZE);
133 }
134 return 0;
135 }
137 static int zap_mmio_ranges(int xc_handle, u32 dom,
138 unsigned long l2tab,
139 struct mem_map *mem_mapp)
140 {
141 int i;
142 l2_pgentry_32_t *vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
143 PROT_READ|PROT_WRITE,
144 l2tab >> PAGE_SHIFT);
145 if (vl2tab == 0)
146 return -1;
147 for (i = 0; i < mem_mapp->nr_map; i++) {
148 if ((mem_mapp->map[i].type == E820_IO)
149 && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
150 if (zap_mmio_range(xc_handle, dom, vl2tab,
151 mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
152 return -1;
153 }
154 munmap(vl2tab, PAGE_SIZE);
155 return 0;
156 }
157 #else
158 static int zap_mmio_range(int xc_handle, u32 dom,
159 l3_pgentry_t *vl3tab,
160 unsigned long mmio_range_start,
161 unsigned long mmio_range_size)
162 {
163 unsigned long mmio_addr;
164 unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
165 unsigned long vl2e = 0;
166 unsigned long vl3e;
167 l1_pgentry_t *vl1tab;
168 l2_pgentry_t *vl2tab;
170 mmio_addr = mmio_range_start & PAGE_MASK;
171 for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
172 vl3e = vl3tab[l3_table_offset(mmio_addr)];
173 if (vl3e == 0)
174 continue;
175 vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
176 PROT_READ|PROT_WRITE, vl3e >> PAGE_SHIFT);
177 if (vl2tab == 0) {
178 PERROR("Failed zap MMIO range");
179 return -1;
180 }
181 vl2e = vl2tab[l2_table_offset(mmio_addr)];
182 if (vl2e == 0)
183 continue;
184 vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
185 PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
187 vl1tab[l1_table_offset(mmio_addr)] = 0;
188 munmap(vl2tab, PAGE_SIZE);
189 munmap(vl1tab, PAGE_SIZE);
190 }
191 return 0;
192 }
194 static int zap_mmio_ranges(int xc_handle, u32 dom,
195 unsigned long l3tab,
196 struct mem_map *mem_mapp)
197 {
198 int i;
199 l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
200 PROT_READ|PROT_WRITE,
201 l3tab >> PAGE_SHIFT);
202 if (vl3tab == 0)
203 return -1;
204 for (i = 0; i < mem_mapp->nr_map; i++) {
205 if ((mem_mapp->map[i].type == E820_IO)
206 && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
207 if (zap_mmio_range(xc_handle, dom, vl3tab,
208 mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
209 return -1;
210 }
211 munmap(vl3tab, PAGE_SIZE);
212 return 0;
213 }
215 #endif
217 static int setup_guest(int xc_handle,
218 u32 dom, int memsize,
219 char *image, unsigned long image_size,
220 gzFile initrd_gfd, unsigned long initrd_len,
221 unsigned long nr_pages,
222 vcpu_guest_context_t *ctxt,
223 const char *cmdline,
224 unsigned long shared_info_frame,
225 unsigned int control_evtchn,
226 unsigned long flags,
227 unsigned int vcpus,
228 unsigned int store_evtchn,
229 unsigned long *store_mfn,
230 struct mem_map *mem_mapp
231 )
232 {
233 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
234 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
235 unsigned long *page_array = NULL;
236 #ifdef __x86_64__
237 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
238 unsigned long l3tab;
239 #endif
240 unsigned long l2tab;
241 unsigned long l1tab;
242 unsigned long count, i;
243 shared_info_t *shared_info;
244 struct linux_boot_params * boot_paramsp;
245 __u16 * boot_gdtp;
246 xc_mmu_t *mmu = NULL;
247 int rc;
249 unsigned long nr_pt_pages;
250 unsigned long ppt_alloc;
252 struct domain_setup_info dsi;
253 unsigned long vinitrd_start;
254 unsigned long vinitrd_end;
255 unsigned long vboot_params_start;
256 unsigned long vboot_params_end;
257 unsigned long vboot_gdt_start;
258 unsigned long vboot_gdt_end;
259 unsigned long vpt_start;
260 unsigned long vpt_end;
261 unsigned long v_end;
263 unsigned long shared_page_frame = 0;
264 shared_iopage_t *sp;
266 memset(&dsi, 0, sizeof(struct domain_setup_info));
268 if ( (rc = parseelfimage(image, image_size, &dsi)) != 0 )
269 goto error_out;
271 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
272 {
273 PERROR("Guest OS must load to a page boundary.\n");
274 goto error_out;
275 }
277 /*
278 * Why do we need this? The number of page-table frames depends on the
279 * size of the bootstrap address space. But the size of the address space
280 * depends on the number of page-table frames (since each one is mapped
281 * read-only). We have a pair of simultaneous equations in two unknowns,
282 * which we solve by exhaustive search.
283 */
284 vboot_params_start = LINUX_BOOT_PARAMS_ADDR;
285 vboot_params_end = vboot_params_start + PAGE_SIZE;
286 vboot_gdt_start = vboot_params_end;
287 vboot_gdt_end = vboot_gdt_start + PAGE_SIZE;
289 /* memsize is in megabytes */
290 v_end = memsize << 20;
291 vinitrd_end = v_end - PAGE_SIZE; /* leaving the top 4k untouched for IO requests page use */
292 vinitrd_start = vinitrd_end - initrd_len;
293 vinitrd_start = vinitrd_start & (~(PAGE_SIZE - 1));
295 if(initrd_len == 0)
296 vinitrd_start = vinitrd_end = 0;
298 #ifdef __i386__
299 nr_pt_pages = 1 + ((memsize + 3) >> 2);
300 #else
301 nr_pt_pages = 5 + ((memsize + 1) >> 1);
302 #endif
303 vpt_start = v_end;
304 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
306 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
307 " Boot_params: %08lx->%08lx\n"
308 " boot_gdt: %08lx->%08lx\n"
309 " Loaded kernel: %08lx->%08lx\n"
310 " Init. ramdisk: %08lx->%08lx\n"
311 " Page tables: %08lx->%08lx\n"
312 " TOTAL: %08lx->%08lx\n",
313 vboot_params_start, vboot_params_end,
314 vboot_gdt_start, vboot_gdt_end,
315 dsi.v_kernstart, dsi.v_kernend,
316 vinitrd_start, vinitrd_end,
317 vpt_start, vpt_end,
318 dsi.v_start, v_end);
319 printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
320 printf(" INITRD LENGTH: %08lx\n", initrd_len);
322 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
323 {
324 printf("Initial guest OS requires too much space\n"
325 "(%luMB is greater than %luMB limit)\n",
326 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
327 goto error_out;
328 }
330 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
331 {
332 PERROR("Could not allocate memory");
333 goto error_out;
334 }
336 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
337 {
338 PERROR("Could not get the page frame list");
339 goto error_out;
340 }
342 loadelfimage(image, xc_handle, dom, page_array, &dsi);
344 /* Load the initial ramdisk image. */
345 if ( initrd_len != 0 )
346 {
347 for ( i = (vinitrd_start - dsi.v_start);
348 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
349 {
350 char page[PAGE_SIZE];
351 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
352 {
353 PERROR("Error reading initrd image, could not");
354 goto error_out;
355 }
356 xc_copy_to_domain_page(xc_handle, dom,
357 page_array[i>>PAGE_SHIFT], page);
358 }
359 }
361 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
362 goto error_out;
364 #ifdef __i386__
365 /* First allocate page for page dir. */
366 ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
367 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
368 ctxt->ctrlreg[3] = l2tab;
370 /* Initialise the page tables. */
371 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
372 PROT_READ|PROT_WRITE,
373 l2tab >> PAGE_SHIFT)) == NULL )
374 goto error_out;
375 memset(vl2tab, 0, PAGE_SIZE);
376 vl2e = &vl2tab[l2_table_offset(dsi.v_start)];
377 for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
378 {
379 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
380 {
381 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
382 if ( vl1tab != NULL )
383 munmap(vl1tab, PAGE_SIZE);
384 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
385 PROT_READ|PROT_WRITE,
386 l1tab >> PAGE_SHIFT)) == NULL )
387 {
388 munmap(vl2tab, PAGE_SIZE);
389 goto error_out;
390 }
391 memset(vl1tab, 0, PAGE_SIZE);
392 vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
393 *vl2e++ = l1tab | L2_PROT;
394 }
396 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
397 vl1e++;
398 }
399 munmap(vl1tab, PAGE_SIZE);
400 munmap(vl2tab, PAGE_SIZE);
401 #else
402 /* First allocate pdpt */
403 ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
404 /* here l3tab means pdpt, only 4 entry is used */
405 l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
406 ctxt->ctrlreg[3] = l3tab;
408 /* Initialise the page tables. */
409 if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
410 PROT_READ|PROT_WRITE,
411 l3tab >> PAGE_SHIFT)) == NULL )
412 goto error_out;
413 memset(vl3tab, 0, PAGE_SIZE);
415 vl3e = &vl3tab[l3_table_offset(dsi.v_start)];
417 for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
418 {
419 if (!(count % (1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))){
420 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
422 if (vl2tab != NULL)
423 munmap(vl2tab, PAGE_SIZE);
425 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
426 PROT_READ|PROT_WRITE,
427 l2tab >> PAGE_SHIFT)) == NULL )
428 goto error_out;
430 memset(vl2tab, 0, PAGE_SIZE);
431 *vl3e++ = l2tab | L3_PROT;
432 vl2e = &vl2tab[l2_table_offset(dsi.v_start + (count << PAGE_SHIFT))];
433 }
434 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
435 {
436 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
437 if ( vl1tab != NULL )
438 munmap(vl1tab, PAGE_SIZE);
439 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
440 PROT_READ|PROT_WRITE,
441 l1tab >> PAGE_SHIFT)) == NULL )
442 {
443 munmap(vl2tab, PAGE_SIZE);
444 goto error_out;
445 }
446 memset(vl1tab, 0, PAGE_SIZE);
447 vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
448 *vl2e++ = l1tab | L2_PROT;
449 }
451 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
452 vl1e++;
453 }
455 munmap(vl1tab, PAGE_SIZE);
456 munmap(vl2tab, PAGE_SIZE);
457 munmap(vl3tab, PAGE_SIZE);
458 #endif
459 /* Write the machine->phys table entries. */
460 for ( count = 0; count < nr_pages; count++ )
461 {
462 if ( xc_add_mmu_update(xc_handle, mmu,
463 (page_array[count] << PAGE_SHIFT) |
464 MMU_MACHPHYS_UPDATE, count) )
465 goto error_out;
466 }
469 if ((boot_paramsp = xc_map_foreign_range(
470 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
471 page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
472 goto error_out;
474 memset(boot_paramsp, 0, sizeof(*boot_paramsp));
476 strncpy((char *)boot_paramsp->cmd_line, cmdline, 0x800);
477 boot_paramsp->cmd_line[0x800-1] = '\0';
478 boot_paramsp->cmd_line_ptr = ((unsigned long) vboot_params_start) + offsetof(struct linux_boot_params, cmd_line);
480 boot_paramsp->setup_sects = 0;
481 boot_paramsp->mount_root_rdonly = 1;
482 boot_paramsp->swapdev = 0x0;
483 boot_paramsp->ramdisk_flags = 0x0;
484 boot_paramsp->root_dev = 0x0; /* We must tell kernel root dev by kernel command line. */
486 /* we don't have a ps/2 mouse now.
487 * 0xAA means a aux mouse is there.
488 * See detect_auxiliary_port() in pc_keyb.c.
489 */
490 boot_paramsp->aux_device_info = 0x0;
492 boot_paramsp->header_magic[0] = 0x48; /* "H" */
493 boot_paramsp->header_magic[1] = 0x64; /* "d" */
494 boot_paramsp->header_magic[2] = 0x72; /* "r" */
495 boot_paramsp->header_magic[3] = 0x53; /* "S" */
497 boot_paramsp->protocol_version = 0x0203; /* 2.03 */
498 boot_paramsp->loader_type = 0x71; /* GRUB */
499 boot_paramsp->loader_flags = 0x1; /* loaded high */
500 boot_paramsp->code32_start = LINUX_KERNEL_ENTR_ADDR; /* 1MB */
501 boot_paramsp->initrd_start = vinitrd_start;
502 boot_paramsp->initrd_size = initrd_len;
504 i = ((memsize - 1) << 10) - 4;
505 boot_paramsp->alt_mem_k = i; /* alt_mem_k */
506 boot_paramsp->screen.overlap.ext_mem_k = i & 0xFFFF; /* ext_mem_k */
508 /*
509 * Stuff SCREAN_INFO
510 */
511 boot_paramsp->screen.info.orig_x = 0;
512 boot_paramsp->screen.info.orig_y = 0;
513 boot_paramsp->screen.info.orig_video_page = 8;
514 boot_paramsp->screen.info.orig_video_mode = 3;
515 boot_paramsp->screen.info.orig_video_cols = 80;
516 boot_paramsp->screen.info.orig_video_ega_bx = 0;
517 boot_paramsp->screen.info.orig_video_lines = 25;
518 boot_paramsp->screen.info.orig_video_isVGA = 1;
519 boot_paramsp->screen.info.orig_video_points = 0x0010;
521 /* seems we may NOT stuff boot_paramsp->apm_bios_info */
522 /* seems we may NOT stuff boot_paramsp->drive_info */
523 /* seems we may NOT stuff boot_paramsp->sys_desc_table */
524 *((unsigned short *) &boot_paramsp->drive_info.dummy[0]) = 800;
525 boot_paramsp->drive_info.dummy[2] = 4;
526 boot_paramsp->drive_info.dummy[14] = 32;
528 /* memsize is in megabytes */
529 /* If you need to create a special e820map, comment this line
530 and use mem-map.sxp */
531 build_e820map(mem_mapp, memsize << 20);
532 *store_mfn = page_array[(v_end-2) >> PAGE_SHIFT];
533 #if defined (__i386__)
534 if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1)
535 #else
536 if (zap_mmio_ranges(xc_handle, dom, l3tab, mem_mapp) == -1)
537 #endif
538 goto error_out;
539 boot_paramsp->e820_map_nr = mem_mapp->nr_map;
540 for (i=0; i<mem_mapp->nr_map; i++) {
541 boot_paramsp->e820_map[i].addr = mem_mapp->map[i].addr;
542 boot_paramsp->e820_map[i].size = mem_mapp->map[i].size;
543 boot_paramsp->e820_map[i].type = mem_mapp->map[i].type;
544 if (mem_mapp->map[i].type == E820_SHARED)
545 shared_page_frame = (mem_mapp->map[i].addr >> PAGE_SHIFT);
546 }
547 munmap(boot_paramsp, PAGE_SIZE);
549 if ((boot_gdtp = xc_map_foreign_range(
550 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
551 page_array[(vboot_gdt_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
552 goto error_out;
553 memset(boot_gdtp, 0, PAGE_SIZE);
554 boot_gdtp[12*4 + 0] = boot_gdtp[13*4 + 0] = 0xffff; /* limit */
555 boot_gdtp[12*4 + 1] = boot_gdtp[13*4 + 1] = 0x0000; /* base */
556 boot_gdtp[12*4 + 2] = 0x9a00; boot_gdtp[13*4 + 2] = 0x9200; /* perms */
557 boot_gdtp[12*4 + 3] = boot_gdtp[13*4 + 3] = 0x00cf; /* granu + top of limit */
558 munmap(boot_gdtp, PAGE_SIZE);
560 /* shared_info page starts its life empty. */
561 if ((shared_info = xc_map_foreign_range(
562 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
563 shared_info_frame)) == 0)
564 goto error_out;
565 memset(shared_info, 0, sizeof(shared_info_t));
566 /* Mask all upcalls... */
567 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
568 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
569 munmap(shared_info, PAGE_SIZE);
571 /* Populate the event channel port in the shared page */
572 if ((sp = (shared_iopage_t *) xc_map_foreign_range(
573 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
574 page_array[shared_page_frame])) == 0)
575 goto error_out;
576 memset(sp, 0, PAGE_SIZE);
577 sp->sp_global.eport = control_evtchn;
578 munmap(sp, PAGE_SIZE);
580 /*
581 * Pin down l2tab addr as page dir page - causes hypervisor to provide
582 * correct protection for the page
583 */
584 #ifdef __i386__
585 if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom) )
586 goto error_out;
587 #endif
589 /* Send the page update requests down to the hypervisor. */
590 if ( xc_finish_mmu_updates(xc_handle, mmu) )
591 goto error_out;
593 free(mmu);
594 free(page_array);
596 /*
597 * Initial register values:
598 */
599 ctxt->user_regs.ds = 0x68;
600 ctxt->user_regs.es = 0x0;
601 ctxt->user_regs.fs = 0x0;
602 ctxt->user_regs.gs = 0x0;
603 ctxt->user_regs.ss = 0x68;
604 ctxt->user_regs.cs = 0x60;
605 ctxt->user_regs.eip = dsi.v_kernentry;
606 ctxt->user_regs.edx = vboot_gdt_start;
607 ctxt->user_regs.eax = 0x800;
608 ctxt->user_regs.esp = vboot_gdt_end;
609 ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */
610 ctxt->user_regs.ecx = mem_mapp->nr_map;
611 ctxt->user_regs.esi = vboot_params_start;
612 ctxt->user_regs.edi = vboot_params_start + 0x2d0;
614 ctxt->user_regs.eflags = 0;
616 return 0;
618 error_out:
619 free(mmu);
620 free(page_array);
621 return -1;
622 }
625 #define VMX_FEATURE_FLAG 0x20
627 static int vmx_identify(void)
628 {
629 int eax, ecx;
631 #ifdef __i386__
632 __asm__ __volatile__ ("pushl %%ebx; cpuid; popl %%ebx"
633 : "=a" (eax), "=c" (ecx)
634 : "0" (1)
635 : "dx");
636 #elif defined __x86_64__
637 __asm__ __volatile__ ("pushq %%rbx; cpuid; popq %%rbx"
638 : "=a" (eax), "=c" (ecx)
639 : "0" (1)
640 : "dx");
641 #endif
643 if (!(ecx & VMX_FEATURE_FLAG)) {
644 return -1;
645 }
646 return 0;
647 }
649 int xc_vmx_build(int xc_handle,
650 u32 domid,
651 int memsize,
652 const char *image_name,
653 struct mem_map *mem_mapp,
654 const char *ramdisk_name,
655 const char *cmdline,
656 unsigned int control_evtchn,
657 unsigned long flags,
658 unsigned int vcpus,
659 unsigned int store_evtchn,
660 unsigned long *store_mfn)
661 {
662 dom0_op_t launch_op, op;
663 int initrd_fd = -1;
664 gzFile initrd_gfd = NULL;
665 int rc, i;
666 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
667 unsigned long nr_pages;
668 char *image = NULL;
669 unsigned long image_size, initrd_size=0;
671 if ( vmx_identify() < 0 )
672 {
673 PERROR("CPU doesn't support VMX Extensions");
674 goto error_out;
675 }
677 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
678 {
679 PERROR("Could not find total pages for domain");
680 goto error_out;
681 }
683 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
684 goto error_out;
686 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
687 {
688 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
689 {
690 PERROR("Could not open the initial ramdisk image");
691 goto error_out;
692 }
694 initrd_size = xc_get_filesz(initrd_fd);
696 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
697 {
698 PERROR("Could not allocate decompression state for initrd");
699 goto error_out;
700 }
701 }
703 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
704 {
705 PERROR("xc_vmx_build: ctxt mlock failed");
706 return 1;
707 }
709 op.cmd = DOM0_GETDOMAININFO;
710 op.u.getdomaininfo.domain = (domid_t)domid;
711 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
712 ((u16)op.u.getdomaininfo.domain != domid) )
713 {
714 PERROR("Could not get info on domain");
715 goto error_out;
716 }
718 if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
719 {
720 PERROR("Could not get vcpu context");
721 goto error_out;
722 }
724 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
725 (ctxt->ctrlreg[3] != 0) )
726 {
727 ERROR("Domain is already constructed");
728 goto error_out;
729 }
731 if ( setup_guest(xc_handle, domid, memsize, image, image_size,
732 initrd_gfd, initrd_size, nr_pages,
733 ctxt, cmdline,
734 op.u.getdomaininfo.shared_info_frame,
735 control_evtchn, flags, vcpus, store_evtchn, store_mfn,
736 mem_mapp) < 0 )
737 {
738 ERROR("Error constructing guest OS");
739 goto error_out;
740 }
742 if ( initrd_fd >= 0 )
743 close(initrd_fd);
744 if ( initrd_gfd )
745 gzclose(initrd_gfd);
746 free(image);
748 ctxt->flags = VGCF_VMX_GUEST;
749 /* FPU is set up to default initial state. */
750 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
752 /* Virtual IDT is empty at start-of-day. */
753 for ( i = 0; i < 256; i++ )
754 {
755 ctxt->trap_ctxt[i].vector = i;
756 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
757 }
759 /* No LDT. */
760 ctxt->ldt_ents = 0;
762 /* Use the default Xen-provided GDT. */
763 ctxt->gdt_ents = 0;
765 /* Ring 1 stack is the initial stack. */
766 /*
767 ctxt->kernel_ss = FLAT_KERNEL_DS;
768 ctxt->kernel_sp = vstartinfo_start;
769 */
770 /* No debugging. */
771 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
773 /* No callback handlers. */
774 #if defined(__i386__)
775 ctxt->event_callback_cs = FLAT_KERNEL_CS;
776 ctxt->event_callback_eip = 0;
777 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
778 ctxt->failsafe_callback_eip = 0;
779 #elif defined(__x86_64__)
780 ctxt->event_callback_eip = 0;
781 ctxt->failsafe_callback_eip = 0;
782 ctxt->syscall_callback_eip = 0;
783 #endif
785 memset( &launch_op, 0, sizeof(launch_op) );
787 launch_op.u.setdomaininfo.domain = (domid_t)domid;
788 launch_op.u.setdomaininfo.vcpu = 0;
789 launch_op.u.setdomaininfo.ctxt = ctxt;
791 launch_op.cmd = DOM0_SETDOMAININFO;
792 rc = xc_dom0_op(xc_handle, &launch_op);
794 return rc;
796 error_out:
797 if ( initrd_gfd != NULL )
798 gzclose(initrd_gfd);
799 else if ( initrd_fd >= 0 )
800 close(initrd_fd);
801 free(image);
803 return -1;
804 }
806 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
807 {
808 return ((phdr->p_type == PT_LOAD) &&
809 ((phdr->p_flags & (PF_W|PF_X)) != 0));
810 }
812 static int parseelfimage(char *elfbase,
813 unsigned long elfsize,
814 struct domain_setup_info *dsi)
815 {
816 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
817 Elf32_Phdr *phdr;
818 Elf32_Shdr *shdr;
819 unsigned long kernstart = ~0UL, kernend=0UL;
820 char *shstrtab;
821 int h;
823 if ( !IS_ELF(*ehdr) )
824 {
825 ERROR("Kernel image does not have an ELF header.");
826 return -EINVAL;
827 }
829 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
830 {
831 ERROR("ELF program headers extend beyond end of image.");
832 return -EINVAL;
833 }
835 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
836 {
837 ERROR("ELF section headers extend beyond end of image.");
838 return -EINVAL;
839 }
841 /* Find the section-header strings table. */
842 if ( ehdr->e_shstrndx == SHN_UNDEF )
843 {
844 ERROR("ELF image has no section-header strings table (shstrtab).");
845 return -EINVAL;
846 }
847 shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
848 (ehdr->e_shstrndx*ehdr->e_shentsize));
849 shstrtab = elfbase + shdr->sh_offset;
851 for ( h = 0; h < ehdr->e_phnum; h++ )
852 {
853 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
854 if ( !is_loadable_phdr(phdr) )
855 continue;
856 if ( phdr->p_paddr < kernstart )
857 kernstart = phdr->p_paddr;
858 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
859 kernend = phdr->p_paddr + phdr->p_memsz;
860 }
862 if ( (kernstart > kernend) ||
863 (ehdr->e_entry < kernstart) ||
864 (ehdr->e_entry > kernend) )
865 {
866 ERROR("Malformed ELF image.");
867 return -EINVAL;
868 }
870 dsi->v_start = 0x00000000;
872 dsi->v_kernstart = kernstart - LINUX_PAGE_OFFSET;
873 dsi->v_kernend = kernend - LINUX_PAGE_OFFSET;
874 dsi->v_kernentry = LINUX_KERNEL_ENTR_ADDR;
876 dsi->v_end = dsi->v_kernend;
878 return 0;
879 }
881 static int
882 loadelfimage(
883 char *elfbase, int xch, u32 dom, unsigned long *parray,
884 struct domain_setup_info *dsi)
885 {
886 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
887 Elf32_Phdr *phdr;
888 int h;
890 char *va;
891 unsigned long pa, done, chunksz;
893 for ( h = 0; h < ehdr->e_phnum; h++ )
894 {
895 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
896 if ( !is_loadable_phdr(phdr) )
897 continue;
899 for ( done = 0; done < phdr->p_filesz; done += chunksz )
900 {
901 pa = (phdr->p_paddr + done) - dsi->v_start - LINUX_PAGE_OFFSET;
902 if ((va = xc_map_foreign_range(
903 xch, dom, PAGE_SIZE, PROT_WRITE,
904 parray[pa>>PAGE_SHIFT])) == 0)
905 return -1;
906 chunksz = phdr->p_filesz - done;
907 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
908 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
909 memcpy(va + (pa & (PAGE_SIZE-1)),
910 elfbase + phdr->p_offset + done, chunksz);
911 munmap(va, PAGE_SIZE);
912 }
914 for ( ; done < phdr->p_memsz; done += chunksz )
915 {
916 pa = (phdr->p_paddr + done) - dsi->v_start - LINUX_PAGE_OFFSET;
917 if ((va = xc_map_foreign_range(
918 xch, dom, PAGE_SIZE, PROT_WRITE,
919 parray[pa>>PAGE_SHIFT])) == 0)
920 return -1;
921 chunksz = phdr->p_memsz - done;
922 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
923 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
924 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
925 munmap(va, PAGE_SIZE);
926 }
927 }
929 return 0;
930 }