direct-io.hg

view tools/libxc/xc_vmx_build.c @ 7786:20bd6f55b813

Clean up xenstore/console shared page initialisation, which is
now handled solely by the domain builder.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Mon Nov 14 11:36:42 2005 +0100 (2005-11-14)
parents 8ee7df2c18d1
children 36ab34f1c31e
line source
1 /******************************************************************************
2 * xc_vmx_build.c
3 */
5 #include <stddef.h>
6 #include "xg_private.h"
7 #define ELFSIZE 32
8 #include "xc_elf.h"
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <zlib.h>
12 #include <xen/io/ioreq.h>
14 #define VMX_LOADER_ENTR_ADDR 0x00100000
16 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
17 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
18 #ifdef __x86_64__
19 #define L3_PROT (_PAGE_PRESENT)
20 #endif
22 #define E820MAX 128
24 #define E820_RAM 1
25 #define E820_RESERVED 2
26 #define E820_ACPI 3
27 #define E820_NVS 4
28 #define E820_IO 16
29 #define E820_SHARED_PAGE 17
30 #define E820_XENSTORE 18
32 #define E820_MAP_PAGE 0x00090000
33 #define E820_MAP_NR_OFFSET 0x000001E8
34 #define E820_MAP_OFFSET 0x000002D0
36 #define VCPU_NR_PAGE 0x0009F000
37 #define VCPU_NR_OFFSET 0x00000800
39 struct e820entry {
40 uint64_t addr;
41 uint64_t size;
42 uint32_t type;
43 } __attribute__((packed));
45 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
46 #define round_pgdown(_p) ((_p)&PAGE_MASK)
48 static int
49 parseelfimage(
50 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
51 static int
52 loadelfimage(
53 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
54 struct domain_setup_info *dsi);
56 static unsigned char build_e820map(void *e820_page, unsigned long mem_size)
57 {
58 struct e820entry *e820entry =
59 (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
60 unsigned char nr_map = 0;
62 /* XXX: Doesn't work for > 4GB yet */
63 e820entry[nr_map].addr = 0x0;
64 e820entry[nr_map].size = 0x9F800;
65 e820entry[nr_map].type = E820_RAM;
66 nr_map++;
68 e820entry[nr_map].addr = 0x9F800;
69 e820entry[nr_map].size = 0x800;
70 e820entry[nr_map].type = E820_RESERVED;
71 nr_map++;
73 e820entry[nr_map].addr = 0xA0000;
74 e820entry[nr_map].size = 0x20000;
75 e820entry[nr_map].type = E820_IO;
76 nr_map++;
78 e820entry[nr_map].addr = 0xF0000;
79 e820entry[nr_map].size = 0x10000;
80 e820entry[nr_map].type = E820_RESERVED;
81 nr_map++;
83 #define STATIC_PAGES 2 /* for ioreq_t and store_mfn */
84 /* Most of the ram goes here */
85 e820entry[nr_map].addr = 0x100000;
86 e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES*PAGE_SIZE;
87 e820entry[nr_map].type = E820_RAM;
88 nr_map++;
90 /* Statically allocated special pages */
92 /* Shared ioreq_t page */
93 e820entry[nr_map].addr = mem_size - PAGE_SIZE;
94 e820entry[nr_map].size = PAGE_SIZE;
95 e820entry[nr_map].type = E820_SHARED_PAGE;
96 nr_map++;
98 /* For xenstore */
99 e820entry[nr_map].addr = mem_size - 2*PAGE_SIZE;
100 e820entry[nr_map].size = PAGE_SIZE;
101 e820entry[nr_map].type = E820_XENSTORE;
102 nr_map++;
104 e820entry[nr_map].addr = mem_size;
105 e820entry[nr_map].size = 0x3 * PAGE_SIZE;
106 e820entry[nr_map].type = E820_NVS;
107 nr_map++;
109 e820entry[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
110 e820entry[nr_map].size = 0xA * PAGE_SIZE;
111 e820entry[nr_map].type = E820_ACPI;
112 nr_map++;
114 e820entry[nr_map].addr = 0xFEC00000;
115 e820entry[nr_map].size = 0x1400000;
116 e820entry[nr_map].type = E820_IO;
117 nr_map++;
119 return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map);
120 }
122 /*
123 * Use E820 reserved memory 0x9F800 to pass number of vcpus to vmxloader
124 * vmxloader will use it to config ACPI MADT table
125 */
126 #define VCPU_MAGIC 0x76637075 /* "vcpu" */
127 static int set_vcpu_nr(int xc_handle, uint32_t dom,
128 unsigned long *pfn_list, unsigned int vcpus)
129 {
130 char *va_map;
131 unsigned int *va_vcpus;
133 va_map = xc_map_foreign_range(xc_handle, dom,
134 PAGE_SIZE, PROT_READ|PROT_WRITE,
135 pfn_list[VCPU_NR_PAGE >> PAGE_SHIFT]);
136 if ( va_map == NULL )
137 return -1;
139 va_vcpus = (unsigned int *)(va_map + VCPU_NR_OFFSET);
140 va_vcpus[0] = VCPU_MAGIC;
141 va_vcpus[1] = vcpus;
143 munmap(va_map, PAGE_SIZE);
145 return 0;
146 }
148 #ifdef __i386__
149 static int zap_mmio_range(int xc_handle, uint32_t dom,
150 l2_pgentry_32_t *vl2tab,
151 unsigned long mmio_range_start,
152 unsigned long mmio_range_size)
153 {
154 unsigned long mmio_addr;
155 unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
156 unsigned long vl2e;
157 l1_pgentry_32_t *vl1tab;
159 mmio_addr = mmio_range_start & PAGE_MASK;
160 for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
161 vl2e = vl2tab[l2_table_offset(mmio_addr)];
162 if (vl2e == 0)
163 continue;
164 vl1tab = xc_map_foreign_range(
165 xc_handle, dom, PAGE_SIZE,
166 PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
167 if ( vl1tab == 0 )
168 {
169 PERROR("Failed zap MMIO range");
170 return -1;
171 }
172 vl1tab[l1_table_offset(mmio_addr)] = 0;
173 munmap(vl1tab, PAGE_SIZE);
174 }
175 return 0;
176 }
178 static int zap_mmio_ranges(int xc_handle, uint32_t dom, unsigned long l2tab,
179 unsigned char e820_map_nr, unsigned char *e820map)
180 {
181 unsigned int i;
182 struct e820entry *e820entry = (struct e820entry *)e820map;
184 l2_pgentry_32_t *vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
185 PROT_READ|PROT_WRITE,
186 l2tab >> PAGE_SHIFT);
187 if ( vl2tab == 0 )
188 return -1;
190 for ( i = 0; i < e820_map_nr; i++ )
191 {
192 if ( (e820entry[i].type == E820_IO) &&
193 (zap_mmio_range(xc_handle, dom, vl2tab,
194 e820entry[i].addr, e820entry[i].size) == -1))
195 return -1;
196 }
198 munmap(vl2tab, PAGE_SIZE);
199 return 0;
200 }
201 #else
202 static int zap_mmio_range(int xc_handle, uint32_t dom,
203 l3_pgentry_t *vl3tab,
204 unsigned long mmio_range_start,
205 unsigned long mmio_range_size)
206 {
207 unsigned long mmio_addr;
208 unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
209 unsigned long vl2e = 0;
210 unsigned long vl3e;
211 l1_pgentry_t *vl1tab;
212 l2_pgentry_t *vl2tab;
214 mmio_addr = mmio_range_start & PAGE_MASK;
215 for ( ; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE )
216 {
217 vl3e = vl3tab[l3_table_offset(mmio_addr)];
218 if ( vl3e == 0 )
219 continue;
221 vl2tab = xc_map_foreign_range(
222 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl3e>>PAGE_SHIFT);
223 if ( vl2tab == NULL )
224 {
225 PERROR("Failed zap MMIO range");
226 return -1;
227 }
229 vl2e = vl2tab[l2_table_offset(mmio_addr)];
230 if ( vl2e == 0 )
231 {
232 munmap(vl2tab, PAGE_SIZE);
233 continue;
234 }
236 vl1tab = xc_map_foreign_range(
237 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl2e>>PAGE_SHIFT);
238 if ( vl1tab == NULL )
239 {
240 PERROR("Failed zap MMIO range");
241 munmap(vl2tab, PAGE_SIZE);
242 return -1;
243 }
245 vl1tab[l1_table_offset(mmio_addr)] = 0;
246 munmap(vl2tab, PAGE_SIZE);
247 munmap(vl1tab, PAGE_SIZE);
248 }
249 return 0;
250 }
252 static int zap_mmio_ranges(int xc_handle, uint32_t dom, unsigned long l3tab,
253 unsigned char e820_map_nr, unsigned char *e820map)
254 {
255 unsigned int i;
256 struct e820entry *e820entry = (struct e820entry *)e820map;
258 l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
259 PROT_READ|PROT_WRITE,
260 l3tab >> PAGE_SHIFT);
261 if (vl3tab == 0)
262 return -1;
263 for ( i = 0; i < e820_map_nr; i++ ) {
264 if ( (e820entry[i].type == E820_IO) &&
265 (zap_mmio_range(xc_handle, dom, vl3tab,
266 e820entry[i].addr, e820entry[i].size) == -1) )
267 return -1;
268 }
269 munmap(vl3tab, PAGE_SIZE);
270 return 0;
271 }
273 #endif
275 static int setup_guest(int xc_handle,
276 uint32_t dom, int memsize,
277 char *image, unsigned long image_size,
278 unsigned long nr_pages,
279 vcpu_guest_context_t *ctxt,
280 unsigned long shared_info_frame,
281 unsigned int control_evtchn,
282 unsigned int lapic,
283 unsigned int vcpus,
284 unsigned int store_evtchn,
285 unsigned long *store_mfn)
286 {
287 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
288 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
289 unsigned long *page_array = NULL;
290 #ifdef __x86_64__
291 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
292 unsigned long l3tab;
293 #endif
294 unsigned long l2tab;
295 unsigned long l1tab;
296 unsigned long count, i;
297 shared_info_t *shared_info;
298 void *e820_page;
299 unsigned char e820_map_nr;
300 xc_mmu_t *mmu = NULL;
301 int rc;
303 unsigned long nr_pt_pages;
304 unsigned long ppt_alloc;
306 struct domain_setup_info dsi;
307 unsigned long vpt_start;
308 unsigned long vpt_end;
309 unsigned long v_end;
311 unsigned long shared_page_frame = 0;
312 shared_iopage_t *sp;
314 memset(&dsi, 0, sizeof(struct domain_setup_info));
316 if ( (rc = parseelfimage(image, image_size, &dsi)) != 0 )
317 goto error_out;
319 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
320 {
321 PERROR("Guest OS must load to a page boundary.\n");
322 goto error_out;
323 }
325 /* memsize is in megabytes */
326 v_end = memsize << 20;
328 #ifdef __i386__
329 nr_pt_pages = 1 + ((memsize + 3) >> 2);
330 #else
331 nr_pt_pages = 5 + ((memsize + 1) >> 1);
332 #endif
333 vpt_start = v_end;
334 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
336 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
337 " Loaded VMX loader: %08lx->%08lx\n"
338 " Page tables: %08lx->%08lx\n"
339 " TOTAL: %08lx->%08lx\n",
340 dsi.v_kernstart, dsi.v_kernend,
341 vpt_start, vpt_end,
342 dsi.v_start, v_end);
343 printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
345 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
346 {
347 ERROR("Initial guest OS requires too much space\n"
348 "(%luMB is greater than %luMB limit)\n",
349 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
350 goto error_out;
351 }
353 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
354 {
355 PERROR("Could not allocate memory");
356 goto error_out;
357 }
359 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
360 {
361 PERROR("Could not get the page frame list");
362 goto error_out;
363 }
365 loadelfimage(image, xc_handle, dom, page_array, &dsi);
367 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
368 goto error_out;
370 /* First allocate page for page dir or pdpt */
371 ppt_alloc = vpt_start >> PAGE_SHIFT;
372 if ( page_array[ppt_alloc] > 0xfffff )
373 {
374 unsigned long nmfn;
375 nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
376 if ( nmfn == 0 )
377 {
378 fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
379 goto error_out;
380 }
381 page_array[ppt_alloc] = nmfn;
382 }
384 #ifdef __i386__
385 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
386 ctxt->ctrlreg[3] = l2tab;
388 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
389 PROT_READ|PROT_WRITE,
390 l2tab >> PAGE_SHIFT)) == NULL )
391 goto error_out;
392 memset(vl2tab, 0, PAGE_SIZE);
393 vl2e = &vl2tab[l2_table_offset(0)];
394 for ( count = 0; count < (v_end >> PAGE_SHIFT); count++ )
395 {
396 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
397 {
398 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
399 if ( vl1tab != NULL )
400 munmap(vl1tab, PAGE_SIZE);
401 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
402 PROT_READ|PROT_WRITE,
403 l1tab >> PAGE_SHIFT)) == NULL )
404 {
405 munmap(vl2tab, PAGE_SIZE);
406 goto error_out;
407 }
408 memset(vl1tab, 0, PAGE_SIZE);
409 vl1e = &vl1tab[l1_table_offset(count << PAGE_SHIFT)];
410 *vl2e++ = l1tab | L2_PROT;
411 }
413 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
414 vl1e++;
415 }
416 munmap(vl1tab, PAGE_SIZE);
417 munmap(vl2tab, PAGE_SIZE);
418 #else
419 l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
420 ctxt->ctrlreg[3] = l3tab;
422 if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
423 PROT_READ|PROT_WRITE,
424 l3tab >> PAGE_SHIFT)) == NULL )
425 goto error_out;
426 memset(vl3tab, 0, PAGE_SIZE);
428 /* Fill in every PDPT entry. */
429 for ( i = 0; i < L3_PAGETABLE_ENTRIES_PAE; i++ )
430 {
431 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
432 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
433 PROT_READ|PROT_WRITE,
434 l2tab >> PAGE_SHIFT)) == NULL )
435 goto error_out;
436 memset(vl2tab, 0, PAGE_SIZE);
437 munmap(vl2tab, PAGE_SIZE);
438 vl3tab[i] = l2tab | L3_PROT;
439 }
441 vl3e = &vl3tab[l3_table_offset(0)];
442 for ( count = 0; count < (v_end >> PAGE_SHIFT); count++ )
443 {
444 if (!(count & (1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))){
445 l2tab = vl3tab[count >> (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)]
446 & PAGE_MASK;
448 if (vl2tab != NULL)
449 munmap(vl2tab, PAGE_SIZE);
451 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
452 PROT_READ|PROT_WRITE,
453 l2tab >> PAGE_SHIFT)) == NULL )
454 goto error_out;
456 vl2e = &vl2tab[l2_table_offset(count << PAGE_SHIFT)];
457 }
458 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
459 {
460 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
461 if ( vl1tab != NULL )
462 munmap(vl1tab, PAGE_SIZE);
463 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
464 PROT_READ|PROT_WRITE,
465 l1tab >> PAGE_SHIFT)) == NULL )
466 {
467 munmap(vl2tab, PAGE_SIZE);
468 goto error_out;
469 }
470 memset(vl1tab, 0, PAGE_SIZE);
471 vl1e = &vl1tab[l1_table_offset(count << PAGE_SHIFT)];
472 *vl2e++ = l1tab | L2_PROT;
473 }
475 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
476 vl1e++;
477 }
479 munmap(vl1tab, PAGE_SIZE);
480 munmap(vl2tab, PAGE_SIZE);
481 munmap(vl3tab, PAGE_SIZE);
482 #endif
483 /* Write the machine->phys table entries. */
484 for ( count = 0; count < nr_pages; count++ )
485 {
486 if ( xc_add_mmu_update(xc_handle, mmu,
487 (page_array[count] << PAGE_SHIFT) |
488 MMU_MACHPHYS_UPDATE, count) )
489 goto error_out;
490 }
492 if (set_vcpu_nr(xc_handle, dom, page_array, vcpus)) {
493 fprintf(stderr, "Couldn't set vcpu number for VMX guest.\n");
494 goto error_out;
495 }
497 *store_mfn = page_array[(v_end-2) >> PAGE_SHIFT];
498 if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
499 goto error_out;
501 shared_page_frame = (v_end - PAGE_SIZE) >> PAGE_SHIFT;
503 if ((e820_page = xc_map_foreign_range(
504 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
505 page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0)
506 goto error_out;
507 memset(e820_page, 0, PAGE_SIZE);
508 e820_map_nr = build_e820map(e820_page, v_end);
509 #if defined (__i386__)
510 if (zap_mmio_ranges(xc_handle, dom, l2tab, e820_map_nr,
511 ((unsigned char *)e820_page) + E820_MAP_OFFSET) == -1)
512 #else
513 if (zap_mmio_ranges(xc_handle, dom, l3tab, e820_map_nr,
514 ((unsigned char *)e820_page) + E820_MAP_OFFSET) == -1)
515 #endif
516 goto error_out;
517 munmap(e820_page, PAGE_SIZE);
519 /* shared_info page starts its life empty. */
520 if ((shared_info = xc_map_foreign_range(
521 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
522 shared_info_frame)) == 0)
523 goto error_out;
524 memset(shared_info, 0, sizeof(shared_info_t));
525 /* Mask all upcalls... */
526 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
527 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
529 munmap(shared_info, PAGE_SIZE);
531 /* Populate the event channel port in the shared page */
532 if ((sp = (shared_iopage_t *) xc_map_foreign_range(
533 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
534 page_array[shared_page_frame])) == 0)
535 goto error_out;
536 memset(sp, 0, PAGE_SIZE);
537 sp->sp_global.eport = control_evtchn;
538 munmap(sp, PAGE_SIZE);
540 /* Send the page update requests down to the hypervisor. */
541 if ( xc_finish_mmu_updates(xc_handle, mmu) )
542 goto error_out;
544 free(mmu);
545 free(page_array);
547 /*
548 * Initial register values:
549 */
550 ctxt->user_regs.ds = 0;
551 ctxt->user_regs.es = 0;
552 ctxt->user_regs.fs = 0;
553 ctxt->user_regs.gs = 0;
554 ctxt->user_regs.ss = 0;
555 ctxt->user_regs.cs = 0;
556 ctxt->user_regs.eip = dsi.v_kernentry;
557 ctxt->user_regs.edx = 0;
558 ctxt->user_regs.eax = 0;
559 ctxt->user_regs.esp = 0;
560 ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */
561 ctxt->user_regs.ecx = lapic;
562 ctxt->user_regs.esi = 0;
563 ctxt->user_regs.edi = 0;
564 ctxt->user_regs.ebp = 0;
566 ctxt->user_regs.eflags = 0;
568 return 0;
570 error_out:
571 free(mmu);
572 free(page_array);
573 return -1;
574 }
576 #define VMX_FEATURE_FLAG 0x20
578 static int vmx_identify(void)
579 {
580 int eax, ecx;
582 __asm__ __volatile__ (
583 #if defined(__i386__)
584 "push %%ebx; cpuid; pop %%ebx"
585 #elif defined(__x86_64__)
586 "push %%rbx; cpuid; pop %%rbx"
587 #endif
588 : "=a" (eax), "=c" (ecx)
589 : "0" (1)
590 : "dx");
592 if (!(ecx & VMX_FEATURE_FLAG)) {
593 return -1;
594 }
596 return 0;
597 }
599 int xc_vmx_build(int xc_handle,
600 uint32_t domid,
601 int memsize,
602 const char *image_name,
603 unsigned int control_evtchn,
604 unsigned int lapic,
605 unsigned int vcpus,
606 unsigned int store_evtchn,
607 unsigned long *store_mfn)
608 {
609 dom0_op_t launch_op, op;
610 int rc, i;
611 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
612 unsigned long nr_pages;
613 char *image = NULL;
614 unsigned long image_size;
616 if ( vmx_identify() < 0 )
617 {
618 PERROR("CPU doesn't support VMX Extensions");
619 goto error_out;
620 }
622 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
623 {
624 PERROR("Could not find total pages for domain");
625 goto error_out;
626 }
628 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
629 goto error_out;
631 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
632 {
633 PERROR("%s: ctxt mlock failed", __func__);
634 return 1;
635 }
637 op.cmd = DOM0_GETDOMAININFO;
638 op.u.getdomaininfo.domain = (domid_t)domid;
639 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
640 ((uint16_t)op.u.getdomaininfo.domain != domid) )
641 {
642 PERROR("Could not get info on domain");
643 goto error_out;
644 }
646 if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
647 {
648 PERROR("Could not get vcpu context");
649 goto error_out;
650 }
652 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
653 (ctxt->ctrlreg[3] != 0) )
654 {
655 ERROR("Domain is already constructed");
656 goto error_out;
657 }
659 if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
660 ctxt, op.u.getdomaininfo.shared_info_frame, control_evtchn,
661 lapic, vcpus, store_evtchn, store_mfn) < 0)
662 {
663 ERROR("Error constructing guest OS");
664 goto error_out;
665 }
667 free(image);
669 ctxt->flags = VGCF_VMX_GUEST;
670 /* FPU is set up to default initial state. */
671 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
673 /* Virtual IDT is empty at start-of-day. */
674 for ( i = 0; i < 256; i++ )
675 {
676 ctxt->trap_ctxt[i].vector = i;
677 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
678 }
680 /* No LDT. */
681 ctxt->ldt_ents = 0;
683 /* Use the default Xen-provided GDT. */
684 ctxt->gdt_ents = 0;
686 /* No debugging. */
687 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
689 /* No callback handlers. */
690 #if defined(__i386__)
691 ctxt->event_callback_cs = FLAT_KERNEL_CS;
692 ctxt->event_callback_eip = 0;
693 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
694 ctxt->failsafe_callback_eip = 0;
695 #elif defined(__x86_64__)
696 ctxt->event_callback_eip = 0;
697 ctxt->failsafe_callback_eip = 0;
698 ctxt->syscall_callback_eip = 0;
699 #endif
701 memset( &launch_op, 0, sizeof(launch_op) );
703 launch_op.u.setdomaininfo.domain = (domid_t)domid;
704 launch_op.u.setdomaininfo.vcpu = 0;
705 launch_op.u.setdomaininfo.ctxt = ctxt;
707 launch_op.cmd = DOM0_SETDOMAININFO;
708 rc = xc_dom0_op(xc_handle, &launch_op);
710 return rc;
712 error_out:
713 free(image);
714 return -1;
715 }
717 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
718 {
719 return ((phdr->p_type == PT_LOAD) &&
720 ((phdr->p_flags & (PF_W|PF_X)) != 0));
721 }
723 static int parseelfimage(char *elfbase,
724 unsigned long elfsize,
725 struct domain_setup_info *dsi)
726 {
727 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
728 Elf32_Phdr *phdr;
729 Elf32_Shdr *shdr;
730 unsigned long kernstart = ~0UL, kernend=0UL;
731 char *shstrtab;
732 int h;
734 if ( !IS_ELF(*ehdr) )
735 {
736 ERROR("Kernel image does not have an ELF header.");
737 return -EINVAL;
738 }
740 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
741 {
742 ERROR("ELF program headers extend beyond end of image.");
743 return -EINVAL;
744 }
746 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
747 {
748 ERROR("ELF section headers extend beyond end of image.");
749 return -EINVAL;
750 }
752 /* Find the section-header strings table. */
753 if ( ehdr->e_shstrndx == SHN_UNDEF )
754 {
755 ERROR("ELF image has no section-header strings table (shstrtab).");
756 return -EINVAL;
757 }
758 shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
759 (ehdr->e_shstrndx*ehdr->e_shentsize));
760 shstrtab = elfbase + shdr->sh_offset;
762 for ( h = 0; h < ehdr->e_phnum; h++ )
763 {
764 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
765 if ( !is_loadable_phdr(phdr) )
766 continue;
767 if ( phdr->p_paddr < kernstart )
768 kernstart = phdr->p_paddr;
769 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
770 kernend = phdr->p_paddr + phdr->p_memsz;
771 }
773 if ( (kernstart > kernend) ||
774 (ehdr->e_entry < kernstart) ||
775 (ehdr->e_entry > kernend) )
776 {
777 ERROR("Malformed ELF image.");
778 return -EINVAL;
779 }
781 dsi->v_start = 0x00000000;
783 dsi->v_kernstart = kernstart;
784 dsi->v_kernend = kernend;
785 dsi->v_kernentry = VMX_LOADER_ENTR_ADDR;
787 dsi->v_end = dsi->v_kernend;
789 return 0;
790 }
792 static int
793 loadelfimage(
794 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
795 struct domain_setup_info *dsi)
796 {
797 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
798 Elf32_Phdr *phdr;
799 int h;
801 char *va;
802 unsigned long pa, done, chunksz;
804 for ( h = 0; h < ehdr->e_phnum; h++ )
805 {
806 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
807 if ( !is_loadable_phdr(phdr) )
808 continue;
810 for ( done = 0; done < phdr->p_filesz; done += chunksz )
811 {
812 pa = (phdr->p_paddr + done) - dsi->v_start;
813 if ((va = xc_map_foreign_range(
814 xch, dom, PAGE_SIZE, PROT_WRITE,
815 parray[pa >> PAGE_SHIFT])) == 0)
816 return -1;
817 chunksz = phdr->p_filesz - done;
818 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
819 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
820 memcpy(va + (pa & (PAGE_SIZE-1)),
821 elfbase + phdr->p_offset + done, chunksz);
822 munmap(va, PAGE_SIZE);
823 }
825 for ( ; done < phdr->p_memsz; done += chunksz )
826 {
827 pa = (phdr->p_paddr + done) - dsi->v_start;
828 if ((va = xc_map_foreign_range(
829 xch, dom, PAGE_SIZE, PROT_WRITE,
830 parray[pa >> PAGE_SHIFT])) == 0)
831 return -1;
832 chunksz = phdr->p_memsz - done;
833 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
834 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
835 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
836 munmap(va, PAGE_SIZE);
837 }
838 }
840 return 0;
841 }
843 /*
844 * Local variables:
845 * mode: C
846 * c-set-style: "BSD"
847 * c-basic-offset: 4
848 * tab-width: 4
849 * indent-tabs-mode: nil
850 * End:
851 */