ia64/xen-unstable

view tools/libxc/xc_vmx_build.c @ 7238:971e7c7411b3

Raise an exception if an error appears on the pipes to our children, and make
sure that the child's pipes are closed even under that exception. Move the
handling of POLLHUP to the end of the loop, so that we guarantee to read any
remaining data from the child if POLLHUP and POLLIN appear at the same time.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@ewan
date Thu Oct 06 10:13:11 2005 +0100 (2005-10-06)
parents ef9591d03fdd
children 93e27f7ca8a8 61b3b357d827
line source
1 /******************************************************************************
2 * xc_vmx_build.c
3 */
5 #include <stddef.h>
6 #include "xg_private.h"
7 #define ELFSIZE 32
8 #include "xc_elf.h"
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <zlib.h>
12 #include <xen/io/ioreq.h>
14 #define VMX_LOADER_ENTR_ADDR 0x00100000
16 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
17 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
18 #ifdef __x86_64__
19 #define L3_PROT (_PAGE_PRESENT)
20 #endif
22 #define E820MAX 128
24 #define E820_RAM 1
25 #define E820_RESERVED 2
26 #define E820_ACPI 3
27 #define E820_NVS 4
28 #define E820_IO 16
29 #define E820_SHARED_PAGE 17
30 #define E820_XENSTORE 18
32 #define E820_MAP_PAGE 0x00090000
33 #define E820_MAP_NR_OFFSET 0x000001E8
34 #define E820_MAP_OFFSET 0x000002D0
36 struct e820entry {
37 u64 addr;
38 u64 size;
39 u32 type;
40 } __attribute__((packed));
42 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
43 #define round_pgdown(_p) ((_p)&PAGE_MASK)
45 static int
46 parseelfimage(
47 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
48 static int
49 loadelfimage(
50 char *elfbase, int xch, u32 dom, unsigned long *parray,
51 struct domain_setup_info *dsi);
53 static unsigned char build_e820map(void *e820_page, unsigned long mem_size)
54 {
55 struct e820entry *e820entry =
56 (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
57 unsigned char nr_map = 0;
59 /* XXX: Doesn't work for > 4GB yet */
60 e820entry[nr_map].addr = 0x0;
61 e820entry[nr_map].size = 0x9F800;
62 e820entry[nr_map].type = E820_RAM;
63 nr_map++;
65 e820entry[nr_map].addr = 0x9F800;
66 e820entry[nr_map].size = 0x800;
67 e820entry[nr_map].type = E820_RESERVED;
68 nr_map++;
70 e820entry[nr_map].addr = 0xA0000;
71 e820entry[nr_map].size = 0x20000;
72 e820entry[nr_map].type = E820_IO;
73 nr_map++;
75 e820entry[nr_map].addr = 0xF0000;
76 e820entry[nr_map].size = 0x10000;
77 e820entry[nr_map].type = E820_RESERVED;
78 nr_map++;
80 #define STATIC_PAGES 2 /* for ioreq_t and store_mfn */
81 /* Most of the ram goes here */
82 e820entry[nr_map].addr = 0x100000;
83 e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES*PAGE_SIZE;
84 e820entry[nr_map].type = E820_RAM;
85 nr_map++;
87 /* Statically allocated special pages */
89 /* Shared ioreq_t page */
90 e820entry[nr_map].addr = mem_size - PAGE_SIZE;
91 e820entry[nr_map].size = PAGE_SIZE;
92 e820entry[nr_map].type = E820_SHARED_PAGE;
93 nr_map++;
95 /* For xenstore */
96 e820entry[nr_map].addr = mem_size - 2*PAGE_SIZE;
97 e820entry[nr_map].size = PAGE_SIZE;
98 e820entry[nr_map].type = E820_XENSTORE;
99 nr_map++;
101 e820entry[nr_map].addr = mem_size;
102 e820entry[nr_map].size = 0x3 * PAGE_SIZE;
103 e820entry[nr_map].type = E820_NVS;
104 nr_map++;
106 e820entry[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
107 e820entry[nr_map].size = 0xA * PAGE_SIZE;
108 e820entry[nr_map].type = E820_ACPI;
109 nr_map++;
111 e820entry[nr_map].addr = 0xFEC00000;
112 e820entry[nr_map].size = 0x1400000;
113 e820entry[nr_map].type = E820_IO;
114 nr_map++;
116 return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map);
117 }
119 /*
120 * Use E820 reserved memory 0x9F800 to pass number of vcpus to vmxloader
121 * vmxloader will use it to config ACPI MADT table
122 */
123 #define VCPU_MAGIC 0x76637075 /* "vcpu" */
124 static int
125 set_nr_vcpus(int xc_handle, u32 dom, unsigned long *pfn_list,
126 struct domain_setup_info *dsi, unsigned long vcpus)
127 {
128 char *va_map;
129 unsigned long *va_vcpus;
131 va_map = xc_map_foreign_range(
132 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
133 pfn_list[(0x9F000 - dsi->v_start) >> PAGE_SHIFT]);
134 if ( va_map == NULL )
135 return -1;
137 va_vcpus = (unsigned long *)(va_map + 0x800);
138 *va_vcpus++ = VCPU_MAGIC;
139 *va_vcpus++ = vcpus;
141 munmap(va_map, PAGE_SIZE);
143 return 0;
144 }
146 #ifdef __i386__
147 static int zap_mmio_range(int xc_handle, u32 dom,
148 l2_pgentry_32_t *vl2tab,
149 unsigned long mmio_range_start,
150 unsigned long mmio_range_size)
151 {
152 unsigned long mmio_addr;
153 unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
154 unsigned long vl2e;
155 l1_pgentry_32_t *vl1tab;
157 mmio_addr = mmio_range_start & PAGE_MASK;
158 for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
159 vl2e = vl2tab[l2_table_offset(mmio_addr)];
160 if (vl2e == 0)
161 continue;
162 vl1tab = xc_map_foreign_range(
163 xc_handle, dom, PAGE_SIZE,
164 PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
165 if ( vl1tab == 0 )
166 {
167 PERROR("Failed zap MMIO range");
168 return -1;
169 }
170 vl1tab[l1_table_offset(mmio_addr)] = 0;
171 munmap(vl1tab, PAGE_SIZE);
172 }
173 return 0;
174 }
176 static int zap_mmio_ranges(int xc_handle, u32 dom, unsigned long l2tab,
177 unsigned char e820_map_nr, unsigned char *e820map)
178 {
179 unsigned int i;
180 struct e820entry *e820entry = (struct e820entry *)e820map;
182 l2_pgentry_32_t *vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
183 PROT_READ|PROT_WRITE,
184 l2tab >> PAGE_SHIFT);
185 if ( vl2tab == 0 )
186 return -1;
188 for ( i = 0; i < e820_map_nr; i++ )
189 {
190 if ( (e820entry[i].type == E820_IO) &&
191 (zap_mmio_range(xc_handle, dom, vl2tab,
192 e820entry[i].addr, e820entry[i].size) == -1))
193 return -1;
194 }
196 munmap(vl2tab, PAGE_SIZE);
197 return 0;
198 }
199 #else
200 static int zap_mmio_range(int xc_handle, u32 dom,
201 l3_pgentry_t *vl3tab,
202 unsigned long mmio_range_start,
203 unsigned long mmio_range_size)
204 {
205 unsigned long mmio_addr;
206 unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
207 unsigned long vl2e = 0;
208 unsigned long vl3e;
209 l1_pgentry_t *vl1tab;
210 l2_pgentry_t *vl2tab;
212 mmio_addr = mmio_range_start & PAGE_MASK;
213 for ( ; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE )
214 {
215 vl3e = vl3tab[l3_table_offset(mmio_addr)];
216 if ( vl3e == 0 )
217 continue;
219 vl2tab = xc_map_foreign_range(
220 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl3e>>PAGE_SHIFT);
221 if ( vl2tab == NULL )
222 {
223 PERROR("Failed zap MMIO range");
224 return -1;
225 }
227 vl2e = vl2tab[l2_table_offset(mmio_addr)];
228 if ( vl2e == 0 )
229 {
230 munmap(vl2tab, PAGE_SIZE);
231 continue;
232 }
234 vl1tab = xc_map_foreign_range(
235 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl2e>>PAGE_SHIFT);
236 if ( vl1tab == NULL )
237 {
238 PERROR("Failed zap MMIO range");
239 munmap(vl2tab, PAGE_SIZE);
240 return -1;
241 }
243 vl1tab[l1_table_offset(mmio_addr)] = 0;
244 munmap(vl2tab, PAGE_SIZE);
245 munmap(vl1tab, PAGE_SIZE);
246 }
247 return 0;
248 }
250 static int zap_mmio_ranges(int xc_handle, u32 dom, unsigned long l3tab,
251 unsigned char e820_map_nr, unsigned char *e820map)
252 {
253 unsigned int i;
254 struct e820entry *e820entry = (struct e820entry *)e820map;
256 l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
257 PROT_READ|PROT_WRITE,
258 l3tab >> PAGE_SHIFT);
259 if (vl3tab == 0)
260 return -1;
261 for ( i = 0; i < e820_map_nr; i++ ) {
262 if ( (e820entry[i].type == E820_IO) &&
263 (zap_mmio_range(xc_handle, dom, vl3tab,
264 e820entry[i].addr, e820entry[i].size) == -1) )
265 return -1;
266 }
267 munmap(vl3tab, PAGE_SIZE);
268 return 0;
269 }
271 #endif
273 static int setup_guest(int xc_handle,
274 u32 dom, int memsize,
275 char *image, unsigned long image_size,
276 unsigned long nr_pages,
277 vcpu_guest_context_t *ctxt,
278 unsigned long shared_info_frame,
279 unsigned int control_evtchn,
280 unsigned long flags,
281 unsigned int vcpus,
282 unsigned int store_evtchn,
283 unsigned long *store_mfn)
284 {
285 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
286 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
287 unsigned long *page_array = NULL;
288 #ifdef __x86_64__
289 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
290 unsigned long l3tab;
291 #endif
292 unsigned long l2tab;
293 unsigned long l1tab;
294 unsigned long count, i;
295 shared_info_t *shared_info;
296 void *e820_page;
297 unsigned char e820_map_nr;
298 xc_mmu_t *mmu = NULL;
299 int rc;
301 unsigned long nr_pt_pages;
302 unsigned long ppt_alloc;
304 struct domain_setup_info dsi;
305 unsigned long vpt_start;
306 unsigned long vpt_end;
307 unsigned long v_end;
309 unsigned long shared_page_frame = 0;
310 shared_iopage_t *sp;
312 memset(&dsi, 0, sizeof(struct domain_setup_info));
314 if ( (rc = parseelfimage(image, image_size, &dsi)) != 0 )
315 goto error_out;
317 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
318 {
319 PERROR("Guest OS must load to a page boundary.\n");
320 goto error_out;
321 }
323 /* memsize is in megabytes */
324 v_end = memsize << 20;
326 #ifdef __i386__
327 nr_pt_pages = 1 + ((memsize + 3) >> 2);
328 #else
329 nr_pt_pages = 5 + ((memsize + 1) >> 1);
330 #endif
331 vpt_start = v_end;
332 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
334 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
335 " Loaded VMX loader: %08lx->%08lx\n"
336 " Page tables: %08lx->%08lx\n"
337 " TOTAL: %08lx->%08lx\n",
338 dsi.v_kernstart, dsi.v_kernend,
339 vpt_start, vpt_end,
340 dsi.v_start, v_end);
341 printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
343 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
344 {
345 ERROR("Initial guest OS requires too much space\n"
346 "(%luMB is greater than %luMB limit)\n",
347 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
348 goto error_out;
349 }
351 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
352 {
353 PERROR("Could not allocate memory");
354 goto error_out;
355 }
357 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
358 {
359 PERROR("Could not get the page frame list");
360 goto error_out;
361 }
363 loadelfimage(image, xc_handle, dom, page_array, &dsi);
365 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
366 goto error_out;
368 /* First allocate page for page dir or pdpt */
369 ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
370 if ( page_array[ppt_alloc] > 0xfffff )
371 {
372 unsigned long nmfn;
373 nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
374 if ( nmfn == 0 )
375 {
376 fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
377 goto error_out;
378 }
379 page_array[ppt_alloc] = nmfn;
380 }
382 #ifdef __i386__
383 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
384 ctxt->ctrlreg[3] = l2tab;
386 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
387 PROT_READ|PROT_WRITE,
388 l2tab >> PAGE_SHIFT)) == NULL )
389 goto error_out;
390 memset(vl2tab, 0, PAGE_SIZE);
391 vl2e = &vl2tab[l2_table_offset(dsi.v_start)];
392 for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
393 {
394 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
395 {
396 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
397 if ( vl1tab != NULL )
398 munmap(vl1tab, PAGE_SIZE);
399 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
400 PROT_READ|PROT_WRITE,
401 l1tab >> PAGE_SHIFT)) == NULL )
402 {
403 munmap(vl2tab, PAGE_SIZE);
404 goto error_out;
405 }
406 memset(vl1tab, 0, PAGE_SIZE);
407 vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
408 *vl2e++ = l1tab | L2_PROT;
409 }
411 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
412 vl1e++;
413 }
414 munmap(vl1tab, PAGE_SIZE);
415 munmap(vl2tab, PAGE_SIZE);
416 #else
417 l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
418 ctxt->ctrlreg[3] = l3tab;
420 if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
421 PROT_READ|PROT_WRITE,
422 l3tab >> PAGE_SHIFT)) == NULL )
423 goto error_out;
424 memset(vl3tab, 0, PAGE_SIZE);
426 /* Fill in every PDPT entry. */
427 for ( i = 0; i < L3_PAGETABLE_ENTRIES_PAE; i++ )
428 {
429 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
430 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
431 PROT_READ|PROT_WRITE,
432 l2tab >> PAGE_SHIFT)) == NULL )
433 goto error_out;
434 memset(vl2tab, 0, PAGE_SIZE);
435 munmap(vl2tab, PAGE_SIZE);
436 vl3tab[i] = l2tab | L3_PROT;
437 }
439 vl3e = &vl3tab[l3_table_offset(dsi.v_start)];
441 for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
442 {
443 if (!(count & (1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))){
444 l2tab = vl3tab[count >> (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)]
445 & PAGE_MASK;
447 if (vl2tab != NULL)
448 munmap(vl2tab, PAGE_SIZE);
450 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
451 PROT_READ|PROT_WRITE,
452 l2tab >> PAGE_SHIFT)) == NULL )
453 goto error_out;
455 vl2e = &vl2tab[l2_table_offset(dsi.v_start + (count << PAGE_SHIFT))];
456 }
457 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
458 {
459 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
460 if ( vl1tab != NULL )
461 munmap(vl1tab, PAGE_SIZE);
462 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
463 PROT_READ|PROT_WRITE,
464 l1tab >> PAGE_SHIFT)) == NULL )
465 {
466 munmap(vl2tab, PAGE_SIZE);
467 goto error_out;
468 }
469 memset(vl1tab, 0, PAGE_SIZE);
470 vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
471 *vl2e++ = l1tab | L2_PROT;
472 }
474 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
475 vl1e++;
476 }
478 munmap(vl1tab, PAGE_SIZE);
479 munmap(vl2tab, PAGE_SIZE);
480 munmap(vl3tab, PAGE_SIZE);
481 #endif
482 /* Write the machine->phys table entries. */
483 for ( count = 0; count < nr_pages; count++ )
484 {
485 if ( xc_add_mmu_update(xc_handle, mmu,
486 (page_array[count] << PAGE_SHIFT) |
487 MMU_MACHPHYS_UPDATE, count) )
488 goto error_out;
489 }
491 set_nr_vcpus(xc_handle, dom, page_array, &dsi, vcpus);
493 *store_mfn = page_array[(v_end-2) >> PAGE_SHIFT];
494 shared_page_frame = (v_end - PAGE_SIZE) >> PAGE_SHIFT;
496 if ((e820_page = xc_map_foreign_range(
497 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
498 page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0)
499 goto error_out;
500 memset(e820_page, 0, PAGE_SIZE);
501 e820_map_nr = build_e820map(e820_page, v_end);
502 #if defined (__i386__)
503 if (zap_mmio_ranges(xc_handle, dom, l2tab, e820_map_nr,
504 ((unsigned char *)e820_page) + E820_MAP_OFFSET) == -1)
505 #else
506 if (zap_mmio_ranges(xc_handle, dom, l3tab, e820_map_nr,
507 ((unsigned char *)e820_page) + E820_MAP_OFFSET) == -1)
508 #endif
509 goto error_out;
510 munmap(e820_page, PAGE_SIZE);
512 /* shared_info page starts its life empty. */
513 if ((shared_info = xc_map_foreign_range(
514 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
515 shared_info_frame)) == 0)
516 goto error_out;
517 memset(shared_info, 0, sizeof(shared_info_t));
518 /* Mask all upcalls... */
519 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
520 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
522 shared_info->n_vcpu = vcpus;
523 printf(" VCPUS: %d\n", shared_info->n_vcpu);
525 munmap(shared_info, PAGE_SIZE);
527 /* Populate the event channel port in the shared page */
528 if ((sp = (shared_iopage_t *) xc_map_foreign_range(
529 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
530 page_array[shared_page_frame])) == 0)
531 goto error_out;
532 memset(sp, 0, PAGE_SIZE);
533 sp->sp_global.eport = control_evtchn;
534 munmap(sp, PAGE_SIZE);
536 /* Send the page update requests down to the hypervisor. */
537 if ( xc_finish_mmu_updates(xc_handle, mmu) )
538 goto error_out;
540 free(mmu);
541 free(page_array);
543 /*
544 * Initial register values:
545 */
546 ctxt->user_regs.ds = 0;
547 ctxt->user_regs.es = 0;
548 ctxt->user_regs.fs = 0;
549 ctxt->user_regs.gs = 0;
550 ctxt->user_regs.ss = 0;
551 ctxt->user_regs.cs = 0;
552 ctxt->user_regs.eip = dsi.v_kernentry;
553 ctxt->user_regs.edx = 0;
554 ctxt->user_regs.eax = 0;
555 ctxt->user_regs.esp = 0;
556 ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */
557 ctxt->user_regs.ecx = 0;
558 ctxt->user_regs.esi = 0;
559 ctxt->user_regs.edi = 0;
560 ctxt->user_regs.ebp = 0;
562 ctxt->user_regs.eflags = 0;
564 return 0;
566 error_out:
567 free(mmu);
568 free(page_array);
569 return -1;
570 }
573 #define VMX_FEATURE_FLAG 0x20
575 static int vmx_identify(void)
576 {
577 int eax, ecx;
579 #ifdef __i386__
580 __asm__ __volatile__ ("pushl %%ebx; cpuid; popl %%ebx"
581 : "=a" (eax), "=c" (ecx)
582 : "0" (1)
583 : "dx");
584 #elif defined __x86_64__
585 __asm__ __volatile__ ("pushq %%rbx; cpuid; popq %%rbx"
586 : "=a" (eax), "=c" (ecx)
587 : "0" (1)
588 : "dx");
589 #endif
591 if (!(ecx & VMX_FEATURE_FLAG)) {
592 return -1;
593 }
594 return 0;
595 }
597 int xc_vmx_build(int xc_handle,
598 u32 domid,
599 int memsize,
600 const char *image_name,
601 unsigned int control_evtchn,
602 unsigned long flags,
603 unsigned int vcpus,
604 unsigned int store_evtchn,
605 unsigned long *store_mfn)
606 {
607 dom0_op_t launch_op, op;
608 int rc, i;
609 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
610 unsigned long nr_pages;
611 char *image = NULL;
612 unsigned long image_size;
614 if ( vmx_identify() < 0 )
615 {
616 PERROR("CPU doesn't support VMX Extensions");
617 goto error_out;
618 }
620 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
621 {
622 PERROR("Could not find total pages for domain");
623 goto error_out;
624 }
626 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
627 goto error_out;
629 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
630 {
631 PERROR("xc_vmx_build: ctxt mlock failed");
632 return 1;
633 }
635 op.cmd = DOM0_GETDOMAININFO;
636 op.u.getdomaininfo.domain = (domid_t)domid;
637 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
638 ((u16)op.u.getdomaininfo.domain != domid) )
639 {
640 PERROR("Could not get info on domain");
641 goto error_out;
642 }
644 if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
645 {
646 PERROR("Could not get vcpu context");
647 goto error_out;
648 }
650 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
651 (ctxt->ctrlreg[3] != 0) )
652 {
653 ERROR("Domain is already constructed");
654 goto error_out;
655 }
657 if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
658 ctxt, op.u.getdomaininfo.shared_info_frame, control_evtchn,
659 flags, vcpus, store_evtchn, store_mfn) < 0)
660 {
661 ERROR("Error constructing guest OS");
662 goto error_out;
663 }
665 free(image);
667 ctxt->flags = VGCF_VMX_GUEST;
668 /* FPU is set up to default initial state. */
669 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
671 /* Virtual IDT is empty at start-of-day. */
672 for ( i = 0; i < 256; i++ )
673 {
674 ctxt->trap_ctxt[i].vector = i;
675 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
676 }
678 /* No LDT. */
679 ctxt->ldt_ents = 0;
681 /* Use the default Xen-provided GDT. */
682 ctxt->gdt_ents = 0;
684 /* No debugging. */
685 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
687 /* No callback handlers. */
688 #if defined(__i386__)
689 ctxt->event_callback_cs = FLAT_KERNEL_CS;
690 ctxt->event_callback_eip = 0;
691 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
692 ctxt->failsafe_callback_eip = 0;
693 #elif defined(__x86_64__)
694 ctxt->event_callback_eip = 0;
695 ctxt->failsafe_callback_eip = 0;
696 ctxt->syscall_callback_eip = 0;
697 #endif
699 memset( &launch_op, 0, sizeof(launch_op) );
701 launch_op.u.setdomaininfo.domain = (domid_t)domid;
702 launch_op.u.setdomaininfo.vcpu = 0;
703 launch_op.u.setdomaininfo.ctxt = ctxt;
705 launch_op.cmd = DOM0_SETDOMAININFO;
706 rc = xc_dom0_op(xc_handle, &launch_op);
708 return rc;
710 error_out:
711 free(image);
713 return -1;
714 }
716 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
717 {
718 return ((phdr->p_type == PT_LOAD) &&
719 ((phdr->p_flags & (PF_W|PF_X)) != 0));
720 }
722 static int parseelfimage(char *elfbase,
723 unsigned long elfsize,
724 struct domain_setup_info *dsi)
725 {
726 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
727 Elf32_Phdr *phdr;
728 Elf32_Shdr *shdr;
729 unsigned long kernstart = ~0UL, kernend=0UL;
730 char *shstrtab;
731 int h;
733 if ( !IS_ELF(*ehdr) )
734 {
735 ERROR("Kernel image does not have an ELF header.");
736 return -EINVAL;
737 }
739 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
740 {
741 ERROR("ELF program headers extend beyond end of image.");
742 return -EINVAL;
743 }
745 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
746 {
747 ERROR("ELF section headers extend beyond end of image.");
748 return -EINVAL;
749 }
751 /* Find the section-header strings table. */
752 if ( ehdr->e_shstrndx == SHN_UNDEF )
753 {
754 ERROR("ELF image has no section-header strings table (shstrtab).");
755 return -EINVAL;
756 }
757 shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
758 (ehdr->e_shstrndx*ehdr->e_shentsize));
759 shstrtab = elfbase + shdr->sh_offset;
761 for ( h = 0; h < ehdr->e_phnum; h++ )
762 {
763 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
764 if ( !is_loadable_phdr(phdr) )
765 continue;
766 if ( phdr->p_paddr < kernstart )
767 kernstart = phdr->p_paddr;
768 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
769 kernend = phdr->p_paddr + phdr->p_memsz;
770 }
772 if ( (kernstart > kernend) ||
773 (ehdr->e_entry < kernstart) ||
774 (ehdr->e_entry > kernend) )
775 {
776 ERROR("Malformed ELF image.");
777 return -EINVAL;
778 }
780 dsi->v_start = 0x00000000;
782 dsi->v_kernstart = kernstart;
783 dsi->v_kernend = kernend;
784 dsi->v_kernentry = VMX_LOADER_ENTR_ADDR;
786 dsi->v_end = dsi->v_kernend;
788 return 0;
789 }
791 static int
792 loadelfimage(
793 char *elfbase, int xch, u32 dom, unsigned long *parray,
794 struct domain_setup_info *dsi)
795 {
796 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
797 Elf32_Phdr *phdr;
798 int h;
800 char *va;
801 unsigned long pa, done, chunksz;
803 for ( h = 0; h < ehdr->e_phnum; h++ )
804 {
805 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
806 if ( !is_loadable_phdr(phdr) )
807 continue;
809 for ( done = 0; done < phdr->p_filesz; done += chunksz )
810 {
811 pa = (phdr->p_paddr + done) - dsi->v_start;
812 if ((va = xc_map_foreign_range(
813 xch, dom, PAGE_SIZE, PROT_WRITE,
814 parray[pa >> PAGE_SHIFT])) == 0)
815 return -1;
816 chunksz = phdr->p_filesz - done;
817 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
818 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
819 memcpy(va + (pa & (PAGE_SIZE-1)),
820 elfbase + phdr->p_offset + done, chunksz);
821 munmap(va, PAGE_SIZE);
822 }
824 for ( ; done < phdr->p_memsz; done += chunksz )
825 {
826 pa = (phdr->p_paddr + done) - dsi->v_start;
827 if ((va = xc_map_foreign_range(
828 xch, dom, PAGE_SIZE, PROT_WRITE,
829 parray[pa >> PAGE_SHIFT])) == 0)
830 return -1;
831 chunksz = phdr->p_memsz - done;
832 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
833 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
834 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
835 munmap(va, PAGE_SIZE);
836 }
837 }
839 return 0;
840 }
842 /*
843 * Local variables:
844 * mode: C
845 * c-set-style: "BSD"
846 * c-basic-offset: 4
847 * tab-width: 4
848 * indent-tabs-mode: nil
849 * End:
850 */