direct-io.hg

view tools/libxc/xc_vmx_build.c @ 8500:dd5649730b32

Fix a couple of bogus dom0_op names:
setdomaininfo -> setvcpucontext
pincpudomain -> setvcpuaffinity

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Jan 06 12:53:19 2006 +0100 (2006-01-06)
parents 9fc306e40a7c
children 334dc7e6a23f
line source
1 /******************************************************************************
2 * xc_vmx_build.c
3 */
5 #include <stddef.h>
6 #include "xg_private.h"
7 #define ELFSIZE 32
8 #include "xc_elf.h"
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <zlib.h>
12 #include <xen/hvm/hvm_info_table.h>
13 #include <xen/hvm/ioreq.h>
15 #define VMX_LOADER_ENTR_ADDR 0x00100000
17 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
18 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
19 #ifdef __x86_64__
20 #define L3_PROT (_PAGE_PRESENT)
21 #endif
23 #define E820MAX 128
25 #define E820_RAM 1
26 #define E820_RESERVED 2
27 #define E820_ACPI 3
28 #define E820_NVS 4
29 #define E820_IO 16
30 #define E820_SHARED_PAGE 17
31 #define E820_XENSTORE 18
33 #define E820_MAP_PAGE 0x00090000
34 #define E820_MAP_NR_OFFSET 0x000001E8
35 #define E820_MAP_OFFSET 0x000002D0
37 struct e820entry {
38 uint64_t addr;
39 uint64_t size;
40 uint32_t type;
41 } __attribute__((packed));
43 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
44 #define round_pgdown(_p) ((_p)&PAGE_MASK)
46 static int
47 parseelfimage(
48 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
49 static int
50 loadelfimage(
51 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
52 struct domain_setup_info *dsi);
54 static unsigned char build_e820map(void *e820_page, unsigned long mem_size)
55 {
56 struct e820entry *e820entry =
57 (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
58 unsigned char nr_map = 0;
60 /* XXX: Doesn't work for > 4GB yet */
61 e820entry[nr_map].addr = 0x0;
62 e820entry[nr_map].size = 0x9F800;
63 e820entry[nr_map].type = E820_RAM;
64 nr_map++;
66 e820entry[nr_map].addr = 0x9F800;
67 e820entry[nr_map].size = 0x800;
68 e820entry[nr_map].type = E820_RESERVED;
69 nr_map++;
71 e820entry[nr_map].addr = 0xA0000;
72 e820entry[nr_map].size = 0x20000;
73 e820entry[nr_map].type = E820_IO;
74 nr_map++;
76 e820entry[nr_map].addr = 0xF0000;
77 e820entry[nr_map].size = 0x10000;
78 e820entry[nr_map].type = E820_RESERVED;
79 nr_map++;
81 #define STATIC_PAGES 2 /* for ioreq_t and store_mfn */
82 /* Most of the ram goes here */
83 e820entry[nr_map].addr = 0x100000;
84 e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES*PAGE_SIZE;
85 e820entry[nr_map].type = E820_RAM;
86 nr_map++;
88 /* Statically allocated special pages */
90 /* Shared ioreq_t page */
91 e820entry[nr_map].addr = mem_size - PAGE_SIZE;
92 e820entry[nr_map].size = PAGE_SIZE;
93 e820entry[nr_map].type = E820_SHARED_PAGE;
94 nr_map++;
96 /* For xenstore */
97 e820entry[nr_map].addr = mem_size - 2*PAGE_SIZE;
98 e820entry[nr_map].size = PAGE_SIZE;
99 e820entry[nr_map].type = E820_XENSTORE;
100 nr_map++;
102 e820entry[nr_map].addr = mem_size;
103 e820entry[nr_map].size = 0x3 * PAGE_SIZE;
104 e820entry[nr_map].type = E820_NVS;
105 nr_map++;
107 e820entry[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
108 e820entry[nr_map].size = 0xA * PAGE_SIZE;
109 e820entry[nr_map].type = E820_ACPI;
110 nr_map++;
112 e820entry[nr_map].addr = 0xFEC00000;
113 e820entry[nr_map].size = 0x1400000;
114 e820entry[nr_map].type = E820_IO;
115 nr_map++;
117 return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map);
118 }
120 static void
121 set_hvm_info_checksum(struct hvm_info_table *t)
122 {
123 uint8_t *ptr = (uint8_t *)t, sum = 0;
124 unsigned int i;
126 t->checksum = 0;
128 for (i = 0; i < t->length; i++)
129 sum += *ptr++;
131 t->checksum = -sum;
132 }
134 /*
135 * Use E820 reserved memory 0x9F800 to pass HVM info to vmxloader
136 * vmxloader will use this info to set BIOS accordingly
137 */
138 static int set_hvm_info(int xc_handle, uint32_t dom,
139 unsigned long *pfn_list, unsigned int vcpus,
140 unsigned int acpi, unsigned int apic)
141 {
142 char *va_map;
143 struct hvm_info_table *va_hvm;
145 va_map = xc_map_foreign_range(
146 xc_handle,
147 dom,
148 PAGE_SIZE,
149 PROT_READ|PROT_WRITE,
150 pfn_list[HVM_INFO_PFN]);
152 if ( va_map == NULL )
153 return -1;
155 va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
156 memset(va_hvm, 0, sizeof(*va_hvm));
157 strncpy(va_hvm->signature, "HVM INFO", 8);
158 va_hvm->length = sizeof(struct hvm_info_table);
159 va_hvm->acpi_enabled = acpi;
160 va_hvm->apic_enabled = apic;
161 va_hvm->nr_vcpus = vcpus;
163 set_hvm_info_checksum(va_hvm);
165 munmap(va_map, PAGE_SIZE);
167 return 0;
168 }
170 #ifdef __i386__
171 static int zap_mmio_range(int xc_handle, uint32_t dom,
172 l2_pgentry_32_t *vl2tab,
173 unsigned long mmio_range_start,
174 unsigned long mmio_range_size)
175 {
176 unsigned long mmio_addr;
177 unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
178 unsigned long vl2e;
179 l1_pgentry_32_t *vl1tab;
181 mmio_addr = mmio_range_start & PAGE_MASK;
182 for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
183 vl2e = vl2tab[l2_table_offset(mmio_addr)];
184 if (vl2e == 0)
185 continue;
186 vl1tab = xc_map_foreign_range(
187 xc_handle, dom, PAGE_SIZE,
188 PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
189 if ( vl1tab == 0 )
190 {
191 PERROR("Failed zap MMIO range");
192 return -1;
193 }
194 vl1tab[l1_table_offset(mmio_addr)] = 0;
195 munmap(vl1tab, PAGE_SIZE);
196 }
197 return 0;
198 }
200 static int zap_mmio_ranges(int xc_handle, uint32_t dom, unsigned long l2tab,
201 unsigned char e820_map_nr, unsigned char *e820map)
202 {
203 unsigned int i;
204 struct e820entry *e820entry = (struct e820entry *)e820map;
206 l2_pgentry_32_t *vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
207 PROT_READ|PROT_WRITE,
208 l2tab >> PAGE_SHIFT);
209 if ( vl2tab == 0 )
210 return -1;
212 for ( i = 0; i < e820_map_nr; i++ )
213 {
214 if ( (e820entry[i].type == E820_IO) &&
215 (zap_mmio_range(xc_handle, dom, vl2tab,
216 e820entry[i].addr, e820entry[i].size) == -1))
217 return -1;
218 }
220 munmap(vl2tab, PAGE_SIZE);
221 return 0;
222 }
223 #else
224 static int zap_mmio_range(int xc_handle, uint32_t dom,
225 l3_pgentry_t *vl3tab,
226 unsigned long mmio_range_start,
227 unsigned long mmio_range_size)
228 {
229 unsigned long mmio_addr;
230 unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
231 unsigned long vl2e = 0;
232 unsigned long vl3e;
233 l1_pgentry_t *vl1tab;
234 l2_pgentry_t *vl2tab;
236 mmio_addr = mmio_range_start & PAGE_MASK;
237 for ( ; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE )
238 {
239 vl3e = vl3tab[l3_table_offset(mmio_addr)];
240 if ( vl3e == 0 )
241 continue;
243 vl2tab = xc_map_foreign_range(
244 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl3e>>PAGE_SHIFT);
245 if ( vl2tab == NULL )
246 {
247 PERROR("Failed zap MMIO range");
248 return -1;
249 }
251 vl2e = vl2tab[l2_table_offset(mmio_addr)];
252 if ( vl2e == 0 )
253 {
254 munmap(vl2tab, PAGE_SIZE);
255 continue;
256 }
258 vl1tab = xc_map_foreign_range(
259 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl2e>>PAGE_SHIFT);
260 if ( vl1tab == NULL )
261 {
262 PERROR("Failed zap MMIO range");
263 munmap(vl2tab, PAGE_SIZE);
264 return -1;
265 }
267 vl1tab[l1_table_offset(mmio_addr)] = 0;
268 munmap(vl2tab, PAGE_SIZE);
269 munmap(vl1tab, PAGE_SIZE);
270 }
271 return 0;
272 }
274 static int zap_mmio_ranges(int xc_handle, uint32_t dom, unsigned long l3tab,
275 unsigned char e820_map_nr, unsigned char *e820map)
276 {
277 unsigned int i;
278 struct e820entry *e820entry = (struct e820entry *)e820map;
280 l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
281 PROT_READ|PROT_WRITE,
282 l3tab >> PAGE_SHIFT);
283 if (vl3tab == 0)
284 return -1;
285 for ( i = 0; i < e820_map_nr; i++ ) {
286 if ( (e820entry[i].type == E820_IO) &&
287 (zap_mmio_range(xc_handle, dom, vl3tab,
288 e820entry[i].addr, e820entry[i].size) == -1) )
289 return -1;
290 }
291 munmap(vl3tab, PAGE_SIZE);
292 return 0;
293 }
295 #endif
297 static int setup_guest(int xc_handle,
298 uint32_t dom, int memsize,
299 char *image, unsigned long image_size,
300 unsigned long nr_pages,
301 vcpu_guest_context_t *ctxt,
302 unsigned long shared_info_frame,
303 unsigned int control_evtchn,
304 unsigned int vcpus,
305 unsigned int acpi,
306 unsigned int apic,
307 unsigned int store_evtchn,
308 unsigned long *store_mfn)
309 {
310 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
311 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
312 unsigned long *page_array = NULL;
313 #ifdef __x86_64__
314 l3_pgentry_t *vl3tab=NULL;
315 unsigned long l3tab;
316 #endif
317 unsigned long l2tab = 0;
318 unsigned long l1tab = 0;
319 unsigned long count, i;
320 shared_info_t *shared_info;
321 void *e820_page;
322 unsigned char e820_map_nr;
323 xc_mmu_t *mmu = NULL;
324 int rc;
326 unsigned long nr_pt_pages;
327 unsigned long ppt_alloc;
329 struct domain_setup_info dsi;
330 unsigned long vpt_start;
331 unsigned long vpt_end;
332 unsigned long v_end;
334 unsigned long shared_page_frame = 0;
335 shared_iopage_t *sp;
337 memset(&dsi, 0, sizeof(struct domain_setup_info));
339 if ( (rc = parseelfimage(image, image_size, &dsi)) != 0 )
340 goto error_out;
342 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
343 {
344 PERROR("Guest OS must load to a page boundary.\n");
345 goto error_out;
346 }
348 /* memsize is in megabytes */
349 v_end = (unsigned long)memsize << 20;
351 #ifdef __i386__
352 nr_pt_pages = 1 + ((memsize + 3) >> 2);
353 #else
354 nr_pt_pages = 5 + ((memsize + 1) >> 1);
355 #endif
356 vpt_start = v_end;
357 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
359 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
360 " Loaded VMX loader: %08lx->%08lx\n"
361 " Page tables: %08lx->%08lx\n"
362 " TOTAL: %08lx->%08lx\n",
363 dsi.v_kernstart, dsi.v_kernend,
364 vpt_start, vpt_end,
365 dsi.v_start, v_end);
366 printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
368 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
369 {
370 ERROR("Initial guest OS requires too much space\n"
371 "(%luMB is greater than %luMB limit)\n",
372 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
373 goto error_out;
374 }
376 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
377 {
378 PERROR("Could not allocate memory");
379 goto error_out;
380 }
382 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
383 {
384 PERROR("Could not get the page frame list");
385 goto error_out;
386 }
388 loadelfimage(image, xc_handle, dom, page_array, &dsi);
390 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
391 goto error_out;
393 /* First allocate page for page dir or pdpt */
394 ppt_alloc = vpt_start >> PAGE_SHIFT;
395 if ( page_array[ppt_alloc] > 0xfffff )
396 {
397 unsigned long nmfn;
398 nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
399 if ( nmfn == 0 )
400 {
401 fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
402 goto error_out;
403 }
404 page_array[ppt_alloc] = nmfn;
405 }
407 #ifdef __i386__
408 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
409 ctxt->ctrlreg[3] = l2tab;
411 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
412 PROT_READ|PROT_WRITE,
413 l2tab >> PAGE_SHIFT)) == NULL )
414 goto error_out;
415 memset(vl2tab, 0, PAGE_SIZE);
416 vl2e = &vl2tab[l2_table_offset(0)];
417 for ( count = 0; count < (v_end >> PAGE_SHIFT); count++ )
418 {
419 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
420 {
421 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
422 if ( vl1tab != NULL )
423 munmap(vl1tab, PAGE_SIZE);
424 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
425 PROT_READ|PROT_WRITE,
426 l1tab >> PAGE_SHIFT)) == NULL )
427 {
428 munmap(vl2tab, PAGE_SIZE);
429 goto error_out;
430 }
431 memset(vl1tab, 0, PAGE_SIZE);
432 vl1e = &vl1tab[l1_table_offset(count << PAGE_SHIFT)];
433 *vl2e++ = l1tab | L2_PROT;
434 }
436 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
437 vl1e++;
438 }
439 munmap(vl1tab, PAGE_SIZE);
440 munmap(vl2tab, PAGE_SIZE);
441 #else
442 l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
443 ctxt->ctrlreg[3] = l3tab;
445 if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
446 PROT_READ|PROT_WRITE,
447 l3tab >> PAGE_SHIFT)) == NULL )
448 goto error_out;
449 memset(vl3tab, 0, PAGE_SIZE);
451 /* Fill in every PDPT entry. */
452 for ( i = 0; i < L3_PAGETABLE_ENTRIES_PAE; i++ )
453 {
454 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
455 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
456 PROT_READ|PROT_WRITE,
457 l2tab >> PAGE_SHIFT)) == NULL )
458 goto error_out;
459 memset(vl2tab, 0, PAGE_SIZE);
460 munmap(vl2tab, PAGE_SIZE);
461 vl2tab = NULL;
462 vl3tab[i] = l2tab | L3_PROT;
463 }
465 for ( count = 0; count < (v_end >> PAGE_SHIFT); count++ )
466 {
467 if ( !(count & ((1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)) - 1)) )
468 {
469 l2tab = vl3tab[count >> (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)]
470 & PAGE_MASK;
472 if (vl2tab != NULL)
473 munmap(vl2tab, PAGE_SIZE);
475 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
476 PROT_READ|PROT_WRITE,
477 l2tab >> PAGE_SHIFT)) == NULL )
478 goto error_out;
480 vl2e = &vl2tab[l2_table_offset(count << PAGE_SHIFT)];
481 }
482 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
483 {
484 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
485 if ( vl1tab != NULL )
486 munmap(vl1tab, PAGE_SIZE);
487 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
488 PROT_READ|PROT_WRITE,
489 l1tab >> PAGE_SHIFT)) == NULL )
490 {
491 munmap(vl2tab, PAGE_SIZE);
492 goto error_out;
493 }
494 memset(vl1tab, 0, PAGE_SIZE);
495 vl1e = &vl1tab[l1_table_offset(count << PAGE_SHIFT)];
496 *vl2e++ = l1tab | L2_PROT;
497 }
499 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
500 vl1e++;
501 }
503 munmap(vl1tab, PAGE_SIZE);
504 munmap(vl2tab, PAGE_SIZE);
505 munmap(vl3tab, PAGE_SIZE);
506 #endif
507 /* Write the machine->phys table entries. */
508 for ( count = 0; count < nr_pages; count++ )
509 {
510 if ( xc_add_mmu_update(xc_handle, mmu,
511 (page_array[count] << PAGE_SHIFT) |
512 MMU_MACHPHYS_UPDATE, count) )
513 goto error_out;
514 }
516 if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) ) {
517 fprintf(stderr, "Couldn't set hvm info for VMX guest.\n");
518 goto error_out;
519 }
521 if ( (e820_page = xc_map_foreign_range(
522 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
523 page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
524 goto error_out;
525 memset(e820_page, 0, PAGE_SIZE);
526 e820_map_nr = build_e820map(e820_page, v_end);
527 #if defined (__i386__)
528 if (zap_mmio_ranges(xc_handle, dom, l2tab, e820_map_nr,
529 ((unsigned char *)e820_page) + E820_MAP_OFFSET) == -1)
530 #else
531 if (zap_mmio_ranges(xc_handle, dom, l3tab, e820_map_nr,
532 ((unsigned char *)e820_page) + E820_MAP_OFFSET) == -1)
533 #endif
534 goto error_out;
535 munmap(e820_page, PAGE_SIZE);
537 /* shared_info page starts its life empty. */
538 if ( (shared_info = xc_map_foreign_range(
539 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
540 shared_info_frame)) == 0 )
541 goto error_out;
542 memset(shared_info, 0, sizeof(shared_info_t));
543 /* Mask all upcalls... */
544 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
545 shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
546 munmap(shared_info, PAGE_SIZE);
548 /* Populate the event channel port in the shared page */
549 shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
550 if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
551 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
552 shared_page_frame)) == 0 )
553 goto error_out;
554 memset(sp, 0, PAGE_SIZE);
555 sp->sp_global.eport = control_evtchn;
556 munmap(sp, PAGE_SIZE);
558 *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
559 if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
560 goto error_out;
562 /* Send the page update requests down to the hypervisor. */
563 if ( xc_finish_mmu_updates(xc_handle, mmu) )
564 goto error_out;
566 free(mmu);
567 free(page_array);
569 /*
570 * Initial register values:
571 */
572 ctxt->user_regs.ds = 0;
573 ctxt->user_regs.es = 0;
574 ctxt->user_regs.fs = 0;
575 ctxt->user_regs.gs = 0;
576 ctxt->user_regs.ss = 0;
577 ctxt->user_regs.cs = 0;
578 ctxt->user_regs.eip = dsi.v_kernentry;
579 ctxt->user_regs.edx = 0;
580 ctxt->user_regs.eax = 0;
581 ctxt->user_regs.esp = 0;
582 ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */
583 ctxt->user_regs.ecx = 0;
584 ctxt->user_regs.esi = 0;
585 ctxt->user_regs.edi = 0;
586 ctxt->user_regs.ebp = 0;
588 ctxt->user_regs.eflags = 0;
590 return 0;
592 error_out:
593 free(mmu);
594 free(page_array);
595 return -1;
596 }
598 int xc_vmx_build(int xc_handle,
599 uint32_t domid,
600 int memsize,
601 const char *image_name,
602 unsigned int control_evtchn,
603 unsigned int vcpus,
604 unsigned int acpi,
605 unsigned int apic,
606 unsigned int store_evtchn,
607 unsigned long *store_mfn)
608 {
609 dom0_op_t launch_op, op;
610 int rc, i;
611 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
612 unsigned long nr_pages;
613 char *image = NULL;
614 unsigned long image_size;
615 xen_capabilities_info_t xen_caps;
617 if ( (rc = xc_version(xc_handle, XENVER_capabilities, &xen_caps)) != 0 )
618 {
619 PERROR("Failed to get xen version info");
620 goto error_out;
621 }
623 if ( !strstr(xen_caps, "hvm") )
624 {
625 PERROR("CPU doesn't support VMX Extensions or "
626 "CPU VMX Extensions are not turned on");
627 goto error_out;
628 }
630 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
631 {
632 PERROR("Could not find total pages for domain");
633 goto error_out;
634 }
636 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
637 goto error_out;
639 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
640 {
641 PERROR("%s: ctxt mlock failed", __func__);
642 return 1;
643 }
645 op.cmd = DOM0_GETDOMAININFO;
646 op.u.getdomaininfo.domain = (domid_t)domid;
647 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
648 ((uint16_t)op.u.getdomaininfo.domain != domid) )
649 {
650 PERROR("Could not get info on domain");
651 goto error_out;
652 }
654 if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) )
655 {
656 PERROR("Could not get vcpu context");
657 goto error_out;
658 }
660 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
661 (ctxt->ctrlreg[3] != 0) )
662 {
663 ERROR("Domain is already constructed");
664 goto error_out;
665 }
667 if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
668 ctxt, op.u.getdomaininfo.shared_info_frame, control_evtchn,
669 vcpus, acpi, apic, store_evtchn, store_mfn) < 0)
670 {
671 ERROR("Error constructing guest OS");
672 goto error_out;
673 }
675 free(image);
677 ctxt->flags = VGCF_VMX_GUEST;
678 /* FPU is set up to default initial state. */
679 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
681 /* Virtual IDT is empty at start-of-day. */
682 for ( i = 0; i < 256; i++ )
683 {
684 ctxt->trap_ctxt[i].vector = i;
685 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
686 }
688 /* No LDT. */
689 ctxt->ldt_ents = 0;
691 /* Use the default Xen-provided GDT. */
692 ctxt->gdt_ents = 0;
694 /* No debugging. */
695 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
697 /* No callback handlers. */
698 #if defined(__i386__)
699 ctxt->event_callback_cs = FLAT_KERNEL_CS;
700 ctxt->event_callback_eip = 0;
701 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
702 ctxt->failsafe_callback_eip = 0;
703 #elif defined(__x86_64__)
704 ctxt->event_callback_eip = 0;
705 ctxt->failsafe_callback_eip = 0;
706 ctxt->syscall_callback_eip = 0;
707 #endif
709 memset( &launch_op, 0, sizeof(launch_op) );
711 launch_op.u.setvcpucontext.domain = (domid_t)domid;
712 launch_op.u.setvcpucontext.vcpu = 0;
713 launch_op.u.setvcpucontext.ctxt = ctxt;
715 launch_op.cmd = DOM0_SETVCPUCONTEXT;
716 rc = xc_dom0_op(xc_handle, &launch_op);
718 return rc;
720 error_out:
721 free(image);
722 return -1;
723 }
725 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
726 {
727 return ((phdr->p_type == PT_LOAD) &&
728 ((phdr->p_flags & (PF_W|PF_X)) != 0));
729 }
731 static int parseelfimage(char *elfbase,
732 unsigned long elfsize,
733 struct domain_setup_info *dsi)
734 {
735 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
736 Elf32_Phdr *phdr;
737 Elf32_Shdr *shdr;
738 unsigned long kernstart = ~0UL, kernend=0UL;
739 char *shstrtab;
740 int h;
742 if ( !IS_ELF(*ehdr) )
743 {
744 ERROR("Kernel image does not have an ELF header.");
745 return -EINVAL;
746 }
748 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
749 {
750 ERROR("ELF program headers extend beyond end of image.");
751 return -EINVAL;
752 }
754 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
755 {
756 ERROR("ELF section headers extend beyond end of image.");
757 return -EINVAL;
758 }
760 /* Find the section-header strings table. */
761 if ( ehdr->e_shstrndx == SHN_UNDEF )
762 {
763 ERROR("ELF image has no section-header strings table (shstrtab).");
764 return -EINVAL;
765 }
766 shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
767 (ehdr->e_shstrndx*ehdr->e_shentsize));
768 shstrtab = elfbase + shdr->sh_offset;
770 for ( h = 0; h < ehdr->e_phnum; h++ )
771 {
772 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
773 if ( !is_loadable_phdr(phdr) )
774 continue;
775 if ( phdr->p_paddr < kernstart )
776 kernstart = phdr->p_paddr;
777 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
778 kernend = phdr->p_paddr + phdr->p_memsz;
779 }
781 if ( (kernstart > kernend) ||
782 (ehdr->e_entry < kernstart) ||
783 (ehdr->e_entry > kernend) )
784 {
785 ERROR("Malformed ELF image.");
786 return -EINVAL;
787 }
789 dsi->v_start = 0x00000000;
791 dsi->v_kernstart = kernstart;
792 dsi->v_kernend = kernend;
793 dsi->v_kernentry = VMX_LOADER_ENTR_ADDR;
795 dsi->v_end = dsi->v_kernend;
797 return 0;
798 }
800 static int
801 loadelfimage(
802 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
803 struct domain_setup_info *dsi)
804 {
805 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
806 Elf32_Phdr *phdr;
807 int h;
809 char *va;
810 unsigned long pa, done, chunksz;
812 for ( h = 0; h < ehdr->e_phnum; h++ )
813 {
814 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
815 if ( !is_loadable_phdr(phdr) )
816 continue;
818 for ( done = 0; done < phdr->p_filesz; done += chunksz )
819 {
820 pa = (phdr->p_paddr + done) - dsi->v_start;
821 if ((va = xc_map_foreign_range(
822 xch, dom, PAGE_SIZE, PROT_WRITE,
823 parray[pa >> PAGE_SHIFT])) == 0)
824 return -1;
825 chunksz = phdr->p_filesz - done;
826 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
827 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
828 memcpy(va + (pa & (PAGE_SIZE-1)),
829 elfbase + phdr->p_offset + done, chunksz);
830 munmap(va, PAGE_SIZE);
831 }
833 for ( ; done < phdr->p_memsz; done += chunksz )
834 {
835 pa = (phdr->p_paddr + done) - dsi->v_start;
836 if ((va = xc_map_foreign_range(
837 xch, dom, PAGE_SIZE, PROT_WRITE,
838 parray[pa >> PAGE_SHIFT])) == 0)
839 return -1;
840 chunksz = phdr->p_memsz - done;
841 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
842 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
843 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
844 munmap(va, PAGE_SIZE);
845 }
846 }
848 return 0;
849 }
851 /*
852 * Local variables:
853 * mode: C
854 * c-set-style: "BSD"
855 * c-basic-offset: 4
856 * tab-width: 4
857 * indent-tabs-mode: nil
858 * End:
859 */