ia64/xen-unstable

view tools/libxc/xc_linux_build.c @ 7238:971e7c7411b3

Raise an exception if an error appears on the pipes to our children, and make
sure that the child's pipes are closed even under that exception. Move the
handling of POLLHUP to the end of the loop, so that we guarantee to read any
remaining data from the child if POLLHUP and POLLIN appear at the same time.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@ewan
date Thu Oct 06 10:13:11 2005 +0100 (2005-10-06)
parents 06d84bf87159
children 0fd84f168103
line source
1 /******************************************************************************
2 * xc_linux_build.c
3 */
5 #include "xg_private.h"
6 #include <xenctrl.h>
8 #if defined(__i386__)
9 #define ELFSIZE 32
10 #endif
12 #if defined(__x86_64__) || defined(__ia64__)
13 #define ELFSIZE 64
14 #endif
16 #include "xc_elf.h"
17 #include "xc_aout9.h"
18 #include <stdlib.h>
19 #include <unistd.h>
20 #include <zlib.h>
22 #if defined(__i386__)
23 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
24 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
25 #define L3_PROT (_PAGE_PRESENT)
26 #endif
28 #if defined(__x86_64__)
29 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
30 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
31 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
32 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
33 #endif
35 #ifdef __ia64__
36 #define already_built(ctxt) (0)
37 #define get_tot_pages xc_get_max_pages
38 #else
39 #define already_built(ctxt) ((ctxt)->ctrlreg[3] != 0)
40 #define get_tot_pages xc_get_tot_pages
41 #endif
43 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
44 #define round_pgdown(_p) ((_p)&PAGE_MASK)
46 #ifdef __ia64__
47 #define probe_aout9(image,image_size,load_funcs) 1
48 #endif
50 static int probeimageformat(char *image,
51 unsigned long image_size,
52 struct load_funcs *load_funcs)
53 {
54 if ( probe_elf(image, image_size, load_funcs) &&
55 probe_bin(image, image_size, load_funcs) &&
56 probe_aout9(image, image_size, load_funcs) )
57 {
58 ERROR( "Unrecognized image format" );
59 return -EINVAL;
60 }
62 return 0;
63 }
65 #define alloc_pt(ltab, vltab) \
66 do { \
67 ltab = (u64)page_array[ppt_alloc++] << PAGE_SHIFT; \
68 if ( vltab != NULL ) \
69 munmap(vltab, PAGE_SIZE); \
70 if ( (vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, \
71 PROT_READ|PROT_WRITE, \
72 ltab >> PAGE_SHIFT)) == NULL ) \
73 goto error_out; \
74 memset(vltab, 0, PAGE_SIZE); \
75 } while ( 0 )
77 #if defined(__i386__)
79 static int setup_pg_tables(int xc_handle, u32 dom,
80 vcpu_guest_context_t *ctxt,
81 unsigned long dsi_v_start,
82 unsigned long v_end,
83 unsigned long *page_array,
84 unsigned long vpt_start,
85 unsigned long vpt_end)
86 {
87 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
88 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
89 unsigned long l1tab = 0;
90 unsigned long l2tab = 0;
91 unsigned long ppt_alloc;
92 unsigned long count;
94 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
95 alloc_pt(l2tab, vl2tab);
96 vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
97 ctxt->ctrlreg[3] = l2tab;
99 for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++ )
100 {
101 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
102 {
103 alloc_pt(l1tab, vl1tab);
104 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
105 *vl2e++ = l1tab | L2_PROT;
106 }
108 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
109 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
110 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
111 *vl1e &= ~_PAGE_RW;
112 vl1e++;
113 }
114 munmap(vl1tab, PAGE_SIZE);
115 munmap(vl2tab, PAGE_SIZE);
116 return 0;
118 error_out:
119 if (vl1tab)
120 munmap(vl1tab, PAGE_SIZE);
121 if (vl2tab)
122 munmap(vl2tab, PAGE_SIZE);
123 return -1;
124 }
126 static int setup_pg_tables_pae(int xc_handle, u32 dom,
127 vcpu_guest_context_t *ctxt,
128 unsigned long dsi_v_start,
129 unsigned long v_end,
130 unsigned long *page_array,
131 unsigned long vpt_start,
132 unsigned long vpt_end)
133 {
134 l1_pgentry_64_t *vl1tab = NULL, *vl1e = NULL;
135 l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
136 l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
137 u64 l1tab, l2tab, l3tab;
138 unsigned long ppt_alloc, count, nmfn;
140 /* First allocate page for page dir. */
141 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
143 if ( page_array[ppt_alloc] > 0xfffff )
144 {
145 nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
146 if ( nmfn == 0 )
147 {
148 fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
149 goto error_out;
150 }
151 page_array[ppt_alloc] = nmfn;
152 }
154 alloc_pt(l3tab, vl3tab);
155 vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
156 ctxt->ctrlreg[3] = l3tab;
158 for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++)
159 {
160 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
161 {
162 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
163 {
164 alloc_pt(l2tab, vl2tab);
165 vl2e = &vl2tab[l2_table_offset_pae(
166 dsi_v_start + (count << PAGE_SHIFT))];
167 *vl3e++ = l2tab | L3_PROT;
168 }
170 alloc_pt(l1tab, vl1tab);
171 vl1e = &vl1tab[l1_table_offset_pae(
172 dsi_v_start + (count << PAGE_SHIFT))];
173 *vl2e++ = l1tab | L2_PROT;
174 }
176 *vl1e = ((u64)page_array[count] << PAGE_SHIFT) | L1_PROT;
177 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
178 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
179 *vl1e &= ~_PAGE_RW;
180 vl1e++;
181 }
183 munmap(vl1tab, PAGE_SIZE);
184 munmap(vl2tab, PAGE_SIZE);
185 munmap(vl3tab, PAGE_SIZE);
186 return 0;
188 error_out:
189 if (vl1tab)
190 munmap(vl1tab, PAGE_SIZE);
191 if (vl2tab)
192 munmap(vl2tab, PAGE_SIZE);
193 if (vl3tab)
194 munmap(vl3tab, PAGE_SIZE);
195 return -1;
196 }
198 #endif
200 #if defined(__x86_64__)
202 static int setup_pg_tables_64(int xc_handle, u32 dom,
203 vcpu_guest_context_t *ctxt,
204 unsigned long dsi_v_start,
205 unsigned long v_end,
206 unsigned long *page_array,
207 unsigned long vpt_start,
208 unsigned long vpt_end)
209 {
210 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
211 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
212 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
213 l4_pgentry_t *vl4tab=NULL, *vl4e=NULL;
214 unsigned long l2tab = 0;
215 unsigned long l1tab = 0;
216 unsigned long l3tab = 0;
217 unsigned long l4tab = 0;
218 unsigned long ppt_alloc;
219 unsigned long count;
221 /* First allocate page for page dir. */
222 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
223 alloc_pt(l4tab, vl4tab);
224 vl4e = &vl4tab[l4_table_offset(dsi_v_start)];
225 ctxt->ctrlreg[3] = l4tab;
227 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
228 {
229 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
230 {
231 alloc_pt(l1tab, vl1tab);
233 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
234 {
235 alloc_pt(l2tab, vl2tab);
236 if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
237 {
238 alloc_pt(l3tab, vl3tab);
239 vl3e = &vl3tab[l3_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
240 *vl4e = l3tab | L4_PROT;
241 vl4e++;
242 }
243 vl2e = &vl2tab[l2_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
244 *vl3e = l2tab | L3_PROT;
245 vl3e++;
246 }
247 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
248 *vl2e = l1tab | L2_PROT;
249 vl2e++;
250 }
252 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
253 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
254 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
255 {
256 *vl1e &= ~_PAGE_RW;
257 }
258 vl1e++;
259 }
261 munmap(vl1tab, PAGE_SIZE);
262 munmap(vl2tab, PAGE_SIZE);
263 munmap(vl3tab, PAGE_SIZE);
264 munmap(vl4tab, PAGE_SIZE);
265 return 0;
267 error_out:
268 if (vl1tab)
269 munmap(vl1tab, PAGE_SIZE);
270 if (vl2tab)
271 munmap(vl2tab, PAGE_SIZE);
272 if (vl3tab)
273 munmap(vl3tab, PAGE_SIZE);
274 if (vl4tab)
275 munmap(vl4tab, PAGE_SIZE);
276 return -1;
277 }
278 #endif
280 #ifdef __ia64__
281 #include <asm/fpu.h> /* for FPSR_DEFAULT */
282 static int setup_guest(int xc_handle,
283 u32 dom,
284 char *image, unsigned long image_size,
285 gzFile initrd_gfd, unsigned long initrd_len,
286 unsigned long nr_pages,
287 unsigned long *pvsi, unsigned long *pvke,
288 unsigned long *pvss, vcpu_guest_context_t *ctxt,
289 const char *cmdline,
290 unsigned long shared_info_frame,
291 unsigned long flags,
292 unsigned int vcpus,
293 unsigned int store_evtchn, unsigned long *store_mfn,
294 unsigned int console_evtchn, unsigned long *console_mfn)
295 {
296 unsigned long *page_array = NULL;
297 struct load_funcs load_funcs;
298 struct domain_setup_info dsi;
299 unsigned long start_page, pgnr;
300 start_info_t *start_info;
301 int rc;
303 rc = probeimageformat(image, image_size, &load_funcs);
304 if ( rc != 0 )
305 goto error_out;
307 memset(&dsi, 0, sizeof(struct domain_setup_info));
309 rc = (load_funcs.parseimage)(image, image_size, &dsi);
310 if ( rc != 0 )
311 goto error_out;
313 dsi.v_start = round_pgdown(dsi.v_start);
314 dsi.v_end = round_pgup(dsi.v_end);
316 start_page = dsi.v_start >> PAGE_SHIFT;
317 pgnr = (dsi.v_end - dsi.v_start) >> PAGE_SHIFT;
318 if ( (page_array = malloc(pgnr * sizeof(unsigned long))) == NULL )
319 {
320 PERROR("Could not allocate memory");
321 goto error_out;
322 }
324 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, start_page, pgnr) != pgnr )
325 {
326 PERROR("Could not get the page frame list");
327 goto error_out;
328 }
330 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
331 &dsi);
333 *pvke = dsi.v_kernentry;
335 /* Now need to retrieve machine pfn for system pages:
336 * start_info/store/console
337 */
338 pgnr = 3;
339 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array,
340 nr_pages - 3, pgnr) != pgnr )
341 {
342 PERROR("Could not get page frame for xenstore");
343 goto error_out;
344 }
346 *store_mfn = page_array[1];
347 *console_mfn = page_array[2];
348 printf("store_mfn: 0x%lx, console_mfn: 0x%lx\n",
349 (u64)store_mfn, (u64)console_mfn);
351 start_info = xc_map_foreign_range(
352 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[0]);
353 memset(start_info, 0, sizeof(*start_info));
354 start_info->flags = flags;
355 start_info->store_mfn = nr_pages - 2;
356 start_info->store_evtchn = store_evtchn;
357 start_info->console_mfn = nr_pages - 1;
358 start_info->console_evtchn = console_evtchn;
359 munmap(start_info, PAGE_SIZE);
361 free(page_array);
362 return 0;
364 error_out:
365 free(page_array);
366 return -1;
367 }
368 #else /* x86 */
369 static int setup_guest(int xc_handle,
370 u32 dom,
371 char *image, unsigned long image_size,
372 gzFile initrd_gfd, unsigned long initrd_len,
373 unsigned long nr_pages,
374 unsigned long *pvsi, unsigned long *pvke,
375 unsigned long *pvss, vcpu_guest_context_t *ctxt,
376 const char *cmdline,
377 unsigned long shared_info_frame,
378 unsigned long flags,
379 unsigned int vcpus,
380 unsigned int store_evtchn, unsigned long *store_mfn,
381 unsigned int console_evtchn, unsigned long *console_mfn)
382 {
383 unsigned long *page_array = NULL;
384 unsigned long count, i;
385 start_info_t *start_info;
386 shared_info_t *shared_info;
387 xc_mmu_t *mmu = NULL;
388 int rc;
390 unsigned long nr_pt_pages;
391 unsigned long physmap_pfn;
392 unsigned long *physmap, *physmap_e;
394 struct load_funcs load_funcs;
395 struct domain_setup_info dsi;
396 unsigned long vinitrd_start;
397 unsigned long vinitrd_end;
398 unsigned long vphysmap_start;
399 unsigned long vphysmap_end;
400 unsigned long vstartinfo_start;
401 unsigned long vstartinfo_end;
402 unsigned long vstoreinfo_start;
403 unsigned long vstoreinfo_end;
404 unsigned long vconsole_start;
405 unsigned long vconsole_end;
406 unsigned long vstack_start;
407 unsigned long vstack_end;
408 unsigned long vpt_start;
409 unsigned long vpt_end;
410 unsigned long v_end;
412 rc = probeimageformat(image, image_size, &load_funcs);
413 if ( rc != 0 )
414 goto error_out;
416 memset(&dsi, 0, sizeof(struct domain_setup_info));
418 rc = (load_funcs.parseimage)(image, image_size, &dsi);
419 if ( rc != 0 )
420 goto error_out;
422 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
423 {
424 PERROR("Guest OS must load to a page boundary.\n");
425 goto error_out;
426 }
428 /*
429 * Why do we need this? The number of page-table frames depends on the
430 * size of the bootstrap address space. But the size of the address space
431 * depends on the number of page-table frames (since each one is mapped
432 * read-only). We have a pair of simultaneous equations in two unknowns,
433 * which we solve by exhaustive search.
434 */
435 vinitrd_start = round_pgup(dsi.v_end);
436 vinitrd_end = vinitrd_start + initrd_len;
437 vphysmap_start = round_pgup(vinitrd_end);
438 vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
439 vstartinfo_start = round_pgup(vphysmap_end);
440 vstartinfo_end = vstartinfo_start + PAGE_SIZE;
441 vstoreinfo_start = vstartinfo_end;
442 vstoreinfo_end = vstoreinfo_start + PAGE_SIZE;
443 vconsole_start = vstoreinfo_end;
444 vconsole_end = vconsole_start + PAGE_SIZE;
445 vpt_start = vconsole_end;
447 for ( nr_pt_pages = 2; ; nr_pt_pages++ )
448 {
449 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
450 vstack_start = vpt_end;
451 vstack_end = vstack_start + PAGE_SIZE;
452 v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
453 if ( (v_end - vstack_end) < (512UL << 10) )
454 v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
455 #if defined(__i386__)
456 if (dsi.pae_kernel) {
457 /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
458 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >>
459 L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
460 break;
461 } else {
462 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
463 L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
464 break;
465 }
466 #endif
467 #if defined(__x86_64__)
468 #define NR(_l,_h,_s) \
469 (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
470 ((_l) & ~((1UL<<(_s))-1))) >> (_s))
471 if ( (1 + /* # L4 */
472 NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
473 NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
474 NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
475 <= nr_pt_pages )
476 break;
477 #endif
478 }
480 #define _p(a) ((void *) (a))
482 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
483 " Loaded kernel: %p->%p\n"
484 " Init. ramdisk: %p->%p\n"
485 " Phys-Mach map: %p->%p\n"
486 " Start info: %p->%p\n"
487 " Store page: %p->%p\n"
488 " Console page: %p->%p\n"
489 " Page tables: %p->%p\n"
490 " Boot stack: %p->%p\n"
491 " TOTAL: %p->%p\n",
492 _p(dsi.v_kernstart), _p(dsi.v_kernend),
493 _p(vinitrd_start), _p(vinitrd_end),
494 _p(vphysmap_start), _p(vphysmap_end),
495 _p(vstartinfo_start), _p(vstartinfo_end),
496 _p(vstoreinfo_start), _p(vstoreinfo_end),
497 _p(vconsole_start), _p(vconsole_end),
498 _p(vpt_start), _p(vpt_end),
499 _p(vstack_start), _p(vstack_end),
500 _p(dsi.v_start), _p(v_end));
501 printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
503 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
504 {
505 printf("Initial guest OS requires too much space\n"
506 "(%luMB is greater than %luMB limit)\n",
507 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
508 goto error_out;
509 }
511 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
512 {
513 PERROR("Could not allocate memory");
514 goto error_out;
515 }
517 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
518 {
519 PERROR("Could not get the page frame list");
520 goto error_out;
521 }
523 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
524 &dsi);
526 /* Load the initial ramdisk image. */
527 if ( initrd_len != 0 )
528 {
529 for ( i = (vinitrd_start - dsi.v_start);
530 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
531 {
532 char page[PAGE_SIZE];
533 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
534 {
535 PERROR("Error reading initrd image, could not");
536 goto error_out;
537 }
538 xc_copy_to_domain_page(xc_handle, dom,
539 page_array[i>>PAGE_SHIFT], page);
540 }
541 }
543 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
544 goto error_out;
546 /* setup page tables */
547 #if defined(__i386__)
548 if (dsi.pae_kernel)
549 rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
550 dsi.v_start, v_end,
551 page_array, vpt_start, vpt_end);
552 else {
553 rc = setup_pg_tables(xc_handle, dom, ctxt,
554 dsi.v_start, v_end,
555 page_array, vpt_start, vpt_end);
556 }
557 #endif
558 #if defined(__x86_64__)
559 rc = setup_pg_tables_64(xc_handle, dom, ctxt,
560 dsi.v_start, v_end,
561 page_array, vpt_start, vpt_end);
562 #endif
563 if (0 != rc)
564 goto error_out;
566 /* Write the phys->machine and machine->phys table entries. */
567 physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
568 physmap = physmap_e = xc_map_foreign_range(
569 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
570 page_array[physmap_pfn++]);
572 for ( count = 0; count < nr_pages; count++ )
573 {
574 if ( xc_add_mmu_update(
575 xc_handle, mmu,
576 ((u64)page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
577 count) )
578 {
579 fprintf(stderr,"m2p update failure p=%lx m=%lx\n",
580 count, page_array[count]);
581 munmap(physmap, PAGE_SIZE);
582 goto error_out;
583 }
584 *physmap_e++ = page_array[count];
585 if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 )
586 {
587 munmap(physmap, PAGE_SIZE);
588 physmap = physmap_e = xc_map_foreign_range(
589 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
590 page_array[physmap_pfn++]);
591 }
592 }
593 munmap(physmap, PAGE_SIZE);
595 #if defined(__i386__)
596 /*
597 * Pin down l2tab addr as page dir page - causes hypervisor to provide
598 * correct protection for the page
599 */
600 if (dsi.pae_kernel) {
601 if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
602 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
603 goto error_out;
604 } else {
605 if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
606 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
607 goto error_out;
608 }
609 #endif
611 #if defined(__x86_64__)
612 /*
613 * Pin down l4tab addr as page dir page - causes hypervisor to provide
614 * correct protection for the page
615 */
616 if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
617 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
618 goto error_out;
619 #endif
621 *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT];
622 *console_mfn = page_array[(vconsole_start-dsi.v_start) >> PAGE_SHIFT];
625 start_info = xc_map_foreign_range(
626 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
627 page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]);
628 memset(start_info, 0, sizeof(*start_info));
629 start_info->nr_pages = nr_pages;
630 start_info->shared_info = shared_info_frame << PAGE_SHIFT;
631 start_info->flags = flags;
632 start_info->pt_base = vpt_start;
633 start_info->nr_pt_frames = nr_pt_pages;
634 start_info->mfn_list = vphysmap_start;
635 start_info->store_mfn = *store_mfn;
636 start_info->store_evtchn = store_evtchn;
637 start_info->console_mfn = *console_mfn;
638 start_info->console_evtchn = console_evtchn;
639 if ( initrd_len != 0 )
640 {
641 start_info->mod_start = vinitrd_start;
642 start_info->mod_len = initrd_len;
643 }
644 strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
645 start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
646 munmap(start_info, PAGE_SIZE);
648 /* shared_info page starts its life empty. */
649 shared_info = xc_map_foreign_range(
650 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
651 memset(shared_info, 0, sizeof(shared_info_t));
652 /* Mask all upcalls... */
653 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
654 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
656 shared_info->n_vcpu = vcpus;
657 printf(" VCPUS: %d\n", shared_info->n_vcpu);
659 munmap(shared_info, PAGE_SIZE);
661 /* Send the page update requests down to the hypervisor. */
662 if ( xc_finish_mmu_updates(xc_handle, mmu) )
663 goto error_out;
665 free(mmu);
666 free(page_array);
668 *pvsi = vstartinfo_start;
669 *pvss = vstack_start;
670 *pvke = dsi.v_kernentry;
672 return 0;
674 error_out:
675 free(mmu);
676 free(page_array);
677 return -1;
678 }
679 #endif
681 int xc_linux_build(int xc_handle,
682 u32 domid,
683 const char *image_name,
684 const char *ramdisk_name,
685 const char *cmdline,
686 unsigned long flags,
687 unsigned int vcpus,
688 unsigned int store_evtchn,
689 unsigned long *store_mfn,
690 unsigned int console_evtchn,
691 unsigned long *console_mfn)
692 {
693 dom0_op_t launch_op, op;
694 int initrd_fd = -1;
695 gzFile initrd_gfd = NULL;
696 int rc, i;
697 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
698 unsigned long nr_pages;
699 char *image = NULL;
700 unsigned long image_size, initrd_size=0;
701 unsigned long vstartinfo_start, vkern_entry, vstack_start;
703 if ( (nr_pages = get_tot_pages(xc_handle, domid)) < 0 )
704 {
705 PERROR("Could not find total pages for domain");
706 goto error_out;
707 }
709 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
710 goto error_out;
712 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
713 {
714 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
715 {
716 PERROR("Could not open the initial ramdisk image");
717 goto error_out;
718 }
720 initrd_size = xc_get_filesz(initrd_fd);
722 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
723 {
724 PERROR("Could not allocate decompression state for initrd");
725 goto error_out;
726 }
727 }
729 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
730 {
731 PERROR("xc_linux_build: ctxt mlock failed");
732 return 1;
733 }
735 op.cmd = DOM0_GETDOMAININFO;
736 op.u.getdomaininfo.domain = (domid_t)domid;
737 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
738 ((u16)op.u.getdomaininfo.domain != domid) )
739 {
740 PERROR("Could not get info on domain");
741 goto error_out;
742 }
744 if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
745 {
746 PERROR("Could not get vcpu context");
747 goto error_out;
748 }
750 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) || already_built(ctxt) )
751 {
752 ERROR("Domain is already constructed");
753 goto error_out;
754 }
756 if ( setup_guest(xc_handle, domid, image, image_size,
757 initrd_gfd, initrd_size, nr_pages,
758 &vstartinfo_start, &vkern_entry,
759 &vstack_start, ctxt, cmdline,
760 op.u.getdomaininfo.shared_info_frame,
761 flags, vcpus,
762 store_evtchn, store_mfn,
763 console_evtchn, console_mfn) < 0 )
764 {
765 ERROR("Error constructing guest OS");
766 goto error_out;
767 }
769 if ( initrd_fd >= 0 )
770 close(initrd_fd);
771 if ( initrd_gfd )
772 gzclose(initrd_gfd);
773 free(image);
775 #ifdef __ia64__
776 /* based on new_thread in xen/arch/ia64/domain.c */
777 ctxt->flags = 0;
778 ctxt->shared.flags = flags;
779 ctxt->shared.start_info_pfn = nr_pages - 3; /* metaphysical */
780 ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
781 ctxt->regs.cr_iip = vkern_entry;
782 ctxt->regs.cr_ifs = 1UL << 63;
783 ctxt->regs.ar_fpsr = FPSR_DEFAULT;
784 /* currently done by hypervisor, should move here */
785 /* ctxt->regs.r28 = dom_fw_setup(); */
786 ctxt->vcpu.privregs = 0;
787 ctxt->sys_pgnr = nr_pages - 3;
788 i = 0; /* silence unused variable warning */
789 #else /* x86 */
790 /*
791 * Initial register values:
792 * DS,ES,FS,GS = FLAT_KERNEL_DS
793 * CS:EIP = FLAT_KERNEL_CS:start_pc
794 * SS:ESP = FLAT_KERNEL_DS:start_stack
795 * ESI = start_info
796 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
797 * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
798 */
799 ctxt->user_regs.ds = FLAT_KERNEL_DS;
800 ctxt->user_regs.es = FLAT_KERNEL_DS;
801 ctxt->user_regs.fs = FLAT_KERNEL_DS;
802 ctxt->user_regs.gs = FLAT_KERNEL_DS;
803 ctxt->user_regs.ss = FLAT_KERNEL_SS;
804 ctxt->user_regs.cs = FLAT_KERNEL_CS;
805 ctxt->user_regs.eip = vkern_entry;
806 ctxt->user_regs.esp = vstack_start + PAGE_SIZE;
807 ctxt->user_regs.esi = vstartinfo_start;
808 ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
810 /* FPU is set up to default initial state. */
811 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
813 /* Virtual IDT is empty at start-of-day. */
814 for ( i = 0; i < 256; i++ )
815 {
816 ctxt->trap_ctxt[i].vector = i;
817 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
818 }
820 /* No LDT. */
821 ctxt->ldt_ents = 0;
823 /* Use the default Xen-provided GDT. */
824 ctxt->gdt_ents = 0;
826 /* Ring 1 stack is the initial stack. */
827 ctxt->kernel_ss = FLAT_KERNEL_SS;
828 ctxt->kernel_sp = vstack_start + PAGE_SIZE;
830 /* No debugging. */
831 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
833 /* No callback handlers. */
834 #if defined(__i386__)
835 ctxt->event_callback_cs = FLAT_KERNEL_CS;
836 ctxt->event_callback_eip = 0;
837 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
838 ctxt->failsafe_callback_eip = 0;
839 #elif defined(__x86_64__)
840 ctxt->event_callback_eip = 0;
841 ctxt->failsafe_callback_eip = 0;
842 ctxt->syscall_callback_eip = 0;
843 #endif
844 #endif /* x86 */
846 memset( &launch_op, 0, sizeof(launch_op) );
848 launch_op.u.setdomaininfo.domain = (domid_t)domid;
849 launch_op.u.setdomaininfo.vcpu = 0;
850 launch_op.u.setdomaininfo.ctxt = ctxt;
852 launch_op.cmd = DOM0_SETDOMAININFO;
853 rc = xc_dom0_op(xc_handle, &launch_op);
855 return rc;
857 error_out:
858 if ( initrd_gfd != NULL )
859 gzclose(initrd_gfd);
860 else if ( initrd_fd >= 0 )
861 close(initrd_fd);
862 free(image);
864 return -1;
865 }
867 /*
868 * Local variables:
869 * mode: C
870 * c-set-style: "BSD"
871 * c-basic-offset: 4
872 * tab-width: 4
873 * indent-tabs-mode: nil
874 * End:
875 */