direct-io.hg

view tools/libxc/xc_linux_build.c @ 7786:20bd6f55b813

Clean up xenstore/console shared page initialisation, which is
now handled solely by the domain builder.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Mon Nov 14 11:36:42 2005 +0100 (2005-11-14)
parents 8ee7df2c18d1
children fb50fba1895c
line source
1 /******************************************************************************
2 * xc_linux_build.c
3 */
5 #include "xg_private.h"
6 #include <xenctrl.h>
8 #if defined(__i386__)
9 #define ELFSIZE 32
10 #endif
12 #if defined(__x86_64__) || defined(__ia64__)
13 #define ELFSIZE 64
14 #endif
16 #include "xc_elf.h"
17 #include "xc_aout9.h"
18 #include <stdlib.h>
19 #include <unistd.h>
20 #include <zlib.h>
22 #if defined(__i386__)
23 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
24 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
25 #define L3_PROT (_PAGE_PRESENT)
26 #endif
28 #if defined(__x86_64__)
29 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
30 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
31 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
32 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
33 #endif
35 #ifdef __ia64__
36 #define already_built(ctxt) (0)
37 #define get_tot_pages xc_get_max_pages
38 #else
39 #define already_built(ctxt) ((ctxt)->ctrlreg[3] != 0)
40 #define get_tot_pages xc_get_tot_pages
41 #endif
43 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
44 #define round_pgdown(_p) ((_p)&PAGE_MASK)
46 #ifdef __ia64__
47 #define probe_aout9(image,image_size,load_funcs) 1
48 #endif
50 static int probeimageformat(char *image,
51 unsigned long image_size,
52 struct load_funcs *load_funcs)
53 {
54 if ( probe_elf(image, image_size, load_funcs) &&
55 probe_bin(image, image_size, load_funcs) &&
56 probe_aout9(image, image_size, load_funcs) )
57 {
58 ERROR( "Unrecognized image format" );
59 return -EINVAL;
60 }
62 return 0;
63 }
65 #define alloc_pt(ltab, vltab) \
66 do { \
67 ltab = (uint64_t)page_array[ppt_alloc++] << PAGE_SHIFT; \
68 if ( vltab != NULL ) \
69 munmap(vltab, PAGE_SIZE); \
70 if ( (vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, \
71 PROT_READ|PROT_WRITE, \
72 ltab >> PAGE_SHIFT)) == NULL ) \
73 goto error_out; \
74 memset(vltab, 0, PAGE_SIZE); \
75 } while ( 0 )
77 #if defined(__i386__)
79 static int setup_pg_tables(int xc_handle, uint32_t dom,
80 vcpu_guest_context_t *ctxt,
81 unsigned long dsi_v_start,
82 unsigned long v_end,
83 unsigned long *page_array,
84 unsigned long vpt_start,
85 unsigned long vpt_end)
86 {
87 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
88 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
89 unsigned long l1tab = 0;
90 unsigned long l2tab = 0;
91 unsigned long ppt_alloc;
92 unsigned long count;
94 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
95 alloc_pt(l2tab, vl2tab);
96 vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
97 ctxt->ctrlreg[3] = l2tab;
99 for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++ )
100 {
101 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
102 {
103 alloc_pt(l1tab, vl1tab);
104 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
105 *vl2e++ = l1tab | L2_PROT;
106 }
108 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
109 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
110 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
111 *vl1e &= ~_PAGE_RW;
112 vl1e++;
113 }
114 munmap(vl1tab, PAGE_SIZE);
115 munmap(vl2tab, PAGE_SIZE);
116 return 0;
118 error_out:
119 if (vl1tab)
120 munmap(vl1tab, PAGE_SIZE);
121 if (vl2tab)
122 munmap(vl2tab, PAGE_SIZE);
123 return -1;
124 }
126 static int setup_pg_tables_pae(int xc_handle, uint32_t dom,
127 vcpu_guest_context_t *ctxt,
128 unsigned long dsi_v_start,
129 unsigned long v_end,
130 unsigned long *page_array,
131 unsigned long vpt_start,
132 unsigned long vpt_end)
133 {
134 l1_pgentry_64_t *vl1tab = NULL, *vl1e = NULL;
135 l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
136 l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
137 uint64_t l1tab, l2tab, l3tab;
138 unsigned long ppt_alloc, count, nmfn;
140 /* First allocate page for page dir. */
141 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
143 if ( page_array[ppt_alloc] > 0xfffff )
144 {
145 nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
146 if ( nmfn == 0 )
147 {
148 fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
149 goto error_out;
150 }
151 page_array[ppt_alloc] = nmfn;
152 }
154 alloc_pt(l3tab, vl3tab);
155 vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
156 ctxt->ctrlreg[3] = l3tab;
158 for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++)
159 {
160 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
161 {
162 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
163 {
164 alloc_pt(l2tab, vl2tab);
165 vl2e = &vl2tab[l2_table_offset_pae(
166 dsi_v_start + (count << PAGE_SHIFT))];
167 *vl3e++ = l2tab | L3_PROT;
168 }
170 alloc_pt(l1tab, vl1tab);
171 vl1e = &vl1tab[l1_table_offset_pae(
172 dsi_v_start + (count << PAGE_SHIFT))];
173 *vl2e++ = l1tab | L2_PROT;
174 }
176 *vl1e = ((uint64_t)page_array[count] << PAGE_SHIFT) | L1_PROT;
177 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
178 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
179 *vl1e &= ~_PAGE_RW;
180 vl1e++;
181 }
183 munmap(vl1tab, PAGE_SIZE);
184 munmap(vl2tab, PAGE_SIZE);
185 munmap(vl3tab, PAGE_SIZE);
186 return 0;
188 error_out:
189 if (vl1tab)
190 munmap(vl1tab, PAGE_SIZE);
191 if (vl2tab)
192 munmap(vl2tab, PAGE_SIZE);
193 if (vl3tab)
194 munmap(vl3tab, PAGE_SIZE);
195 return -1;
196 }
198 #endif
200 #if defined(__x86_64__)
202 static int setup_pg_tables_64(int xc_handle, uint32_t dom,
203 vcpu_guest_context_t *ctxt,
204 unsigned long dsi_v_start,
205 unsigned long v_end,
206 unsigned long *page_array,
207 unsigned long vpt_start,
208 unsigned long vpt_end)
209 {
210 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
211 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
212 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
213 l4_pgentry_t *vl4tab=NULL, *vl4e=NULL;
214 unsigned long l2tab = 0;
215 unsigned long l1tab = 0;
216 unsigned long l3tab = 0;
217 unsigned long l4tab = 0;
218 unsigned long ppt_alloc;
219 unsigned long count;
221 /* First allocate page for page dir. */
222 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
223 alloc_pt(l4tab, vl4tab);
224 vl4e = &vl4tab[l4_table_offset(dsi_v_start)];
225 ctxt->ctrlreg[3] = l4tab;
227 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
228 {
229 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
230 {
231 alloc_pt(l1tab, vl1tab);
233 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
234 {
235 alloc_pt(l2tab, vl2tab);
236 if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
237 {
238 alloc_pt(l3tab, vl3tab);
239 vl3e = &vl3tab[l3_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
240 *vl4e = l3tab | L4_PROT;
241 vl4e++;
242 }
243 vl2e = &vl2tab[l2_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
244 *vl3e = l2tab | L3_PROT;
245 vl3e++;
246 }
247 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
248 *vl2e = l1tab | L2_PROT;
249 vl2e++;
250 }
252 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
253 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
254 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
255 {
256 *vl1e &= ~_PAGE_RW;
257 }
258 vl1e++;
259 }
261 munmap(vl1tab, PAGE_SIZE);
262 munmap(vl2tab, PAGE_SIZE);
263 munmap(vl3tab, PAGE_SIZE);
264 munmap(vl4tab, PAGE_SIZE);
265 return 0;
267 error_out:
268 if (vl1tab)
269 munmap(vl1tab, PAGE_SIZE);
270 if (vl2tab)
271 munmap(vl2tab, PAGE_SIZE);
272 if (vl3tab)
273 munmap(vl3tab, PAGE_SIZE);
274 if (vl4tab)
275 munmap(vl4tab, PAGE_SIZE);
276 return -1;
277 }
278 #endif
280 #ifdef __ia64__
281 #include <asm/fpu.h> /* for FPSR_DEFAULT */
282 static int setup_guest(int xc_handle,
283 uint32_t dom,
284 char *image, unsigned long image_size,
285 gzFile initrd_gfd, unsigned long initrd_len,
286 unsigned long nr_pages,
287 unsigned long *pvsi, unsigned long *pvke,
288 unsigned long *pvss, vcpu_guest_context_t *ctxt,
289 const char *cmdline,
290 unsigned long shared_info_frame,
291 unsigned long flags,
292 unsigned int store_evtchn, unsigned long *store_mfn,
293 unsigned int console_evtchn, unsigned long *console_mfn)
294 {
295 unsigned long *page_array = NULL;
296 struct load_funcs load_funcs;
297 struct domain_setup_info dsi;
298 unsigned long start_page, pgnr;
299 start_info_t *start_info;
300 int rc;
302 rc = probeimageformat(image, image_size, &load_funcs);
303 if ( rc != 0 )
304 goto error_out;
306 memset(&dsi, 0, sizeof(struct domain_setup_info));
308 rc = (load_funcs.parseimage)(image, image_size, &dsi);
309 if ( rc != 0 )
310 goto error_out;
312 dsi.v_start = round_pgdown(dsi.v_start);
313 dsi.v_end = round_pgup(dsi.v_end);
315 start_page = dsi.v_start >> PAGE_SHIFT;
316 pgnr = (dsi.v_end - dsi.v_start) >> PAGE_SHIFT;
317 if ( (page_array = malloc(pgnr * sizeof(unsigned long))) == NULL )
318 {
319 PERROR("Could not allocate memory");
320 goto error_out;
321 }
323 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, start_page, pgnr) != pgnr )
324 {
325 PERROR("Could not get the page frame list");
326 goto error_out;
327 }
329 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
330 &dsi);
332 *pvke = dsi.v_kernentry;
334 /* Now need to retrieve machine pfn for system pages:
335 * start_info/store/console
336 */
337 pgnr = 3;
338 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array,
339 nr_pages - 3, pgnr) != pgnr )
340 {
341 PERROR("Could not get page frame for xenstore");
342 goto error_out;
343 }
345 *store_mfn = page_array[1];
346 *console_mfn = page_array[2];
347 printf("store_mfn: 0x%lx, console_mfn: 0x%lx\n",
348 (uint64_t)store_mfn, (uint64_t)console_mfn);
350 start_info = xc_map_foreign_range(
351 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[0]);
352 memset(start_info, 0, sizeof(*start_info));
353 rc = xc_version(xc_handle, XENVER_version, NULL);
354 sprintf(start_info->magic, "xen-%i.%i", rc >> 16, rc & (0xFFFF));
355 start_info->flags = flags;
356 start_info->store_mfn = nr_pages - 2;
357 start_info->store_evtchn = store_evtchn;
358 start_info->console_mfn = nr_pages - 1;
359 start_info->console_evtchn = console_evtchn;
360 munmap(start_info, PAGE_SIZE);
362 free(page_array);
363 return 0;
365 error_out:
366 free(page_array);
367 return -1;
368 }
369 #else /* x86 */
370 static int setup_guest(int xc_handle,
371 uint32_t dom,
372 char *image, unsigned long image_size,
373 gzFile initrd_gfd, unsigned long initrd_len,
374 unsigned long nr_pages,
375 unsigned long *pvsi, unsigned long *pvke,
376 unsigned long *pvss, vcpu_guest_context_t *ctxt,
377 const char *cmdline,
378 unsigned long shared_info_frame,
379 unsigned long flags,
380 unsigned int store_evtchn, unsigned long *store_mfn,
381 unsigned int console_evtchn, unsigned long *console_mfn)
382 {
383 unsigned long *page_array = NULL;
384 unsigned long count, i;
385 start_info_t *start_info;
386 shared_info_t *shared_info;
387 xc_mmu_t *mmu = NULL;
388 int rc;
390 unsigned long nr_pt_pages;
391 unsigned long physmap_pfn;
392 unsigned long *physmap, *physmap_e;
394 struct load_funcs load_funcs;
395 struct domain_setup_info dsi;
396 unsigned long vinitrd_start;
397 unsigned long vinitrd_end;
398 unsigned long vphysmap_start;
399 unsigned long vphysmap_end;
400 unsigned long vstartinfo_start;
401 unsigned long vstartinfo_end;
402 unsigned long vstoreinfo_start;
403 unsigned long vstoreinfo_end;
404 unsigned long vconsole_start;
405 unsigned long vconsole_end;
406 unsigned long vstack_start;
407 unsigned long vstack_end;
408 unsigned long vpt_start;
409 unsigned long vpt_end;
410 unsigned long v_end;
412 rc = probeimageformat(image, image_size, &load_funcs);
413 if ( rc != 0 )
414 goto error_out;
416 memset(&dsi, 0, sizeof(struct domain_setup_info));
418 rc = (load_funcs.parseimage)(image, image_size, &dsi);
419 if ( rc != 0 )
420 goto error_out;
422 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
423 {
424 PERROR("Guest OS must load to a page boundary.\n");
425 goto error_out;
426 }
428 /*
429 * Why do we need this? The number of page-table frames depends on the
430 * size of the bootstrap address space. But the size of the address space
431 * depends on the number of page-table frames (since each one is mapped
432 * read-only). We have a pair of simultaneous equations in two unknowns,
433 * which we solve by exhaustive search.
434 */
435 vinitrd_start = round_pgup(dsi.v_end);
436 vinitrd_end = vinitrd_start + initrd_len;
437 vphysmap_start = round_pgup(vinitrd_end);
438 vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
439 vstartinfo_start = round_pgup(vphysmap_end);
440 vstartinfo_end = vstartinfo_start + PAGE_SIZE;
441 vstoreinfo_start = vstartinfo_end;
442 vstoreinfo_end = vstoreinfo_start + PAGE_SIZE;
443 vconsole_start = vstoreinfo_end;
444 vconsole_end = vconsole_start + PAGE_SIZE;
445 vpt_start = vconsole_end;
447 for ( nr_pt_pages = 2; ; nr_pt_pages++ )
448 {
449 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
450 vstack_start = vpt_end;
451 vstack_end = vstack_start + PAGE_SIZE;
452 v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
453 if ( (v_end - vstack_end) < (512UL << 10) )
454 v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
455 #if defined(__i386__)
456 if (dsi.pae_kernel) {
457 /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
458 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >>
459 L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
460 break;
461 } else {
462 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
463 L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
464 break;
465 }
466 #endif
467 #if defined(__x86_64__)
468 #define NR(_l,_h,_s) \
469 (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
470 ((_l) & ~((1UL<<(_s))-1))) >> (_s))
471 if ( (1 + /* # L4 */
472 NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
473 NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
474 NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
475 <= nr_pt_pages )
476 break;
477 #endif
478 }
480 #define _p(a) ((void *) (a))
482 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
483 " Loaded kernel: %p->%p\n"
484 " Init. ramdisk: %p->%p\n"
485 " Phys-Mach map: %p->%p\n"
486 " Start info: %p->%p\n"
487 " Store page: %p->%p\n"
488 " Console page: %p->%p\n"
489 " Page tables: %p->%p\n"
490 " Boot stack: %p->%p\n"
491 " TOTAL: %p->%p\n",
492 _p(dsi.v_kernstart), _p(dsi.v_kernend),
493 _p(vinitrd_start), _p(vinitrd_end),
494 _p(vphysmap_start), _p(vphysmap_end),
495 _p(vstartinfo_start), _p(vstartinfo_end),
496 _p(vstoreinfo_start), _p(vstoreinfo_end),
497 _p(vconsole_start), _p(vconsole_end),
498 _p(vpt_start), _p(vpt_end),
499 _p(vstack_start), _p(vstack_end),
500 _p(dsi.v_start), _p(v_end));
501 printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
503 if ( ((v_end - dsi.v_start)>>PAGE_SHIFT) > nr_pages )
504 {
505 PERROR("Initial guest OS requires too much space\n"
506 "(%luMB is greater than %luMB limit)\n",
507 (v_end-dsi.v_start)>>20, nr_pages>>(20-PAGE_SHIFT));
508 goto error_out;
509 }
511 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
512 {
513 PERROR("Could not allocate memory");
514 goto error_out;
515 }
517 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
518 {
519 PERROR("Could not get the page frame list");
520 goto error_out;
521 }
523 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
524 &dsi);
526 /* Load the initial ramdisk image. */
527 if ( initrd_len != 0 )
528 {
529 for ( i = (vinitrd_start - dsi.v_start);
530 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
531 {
532 char page[PAGE_SIZE];
533 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
534 {
535 PERROR("Error reading initrd image, could not");
536 goto error_out;
537 }
538 xc_copy_to_domain_page(xc_handle, dom,
539 page_array[i>>PAGE_SHIFT], page);
540 }
541 }
543 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
544 goto error_out;
546 /* setup page tables */
547 #if defined(__i386__)
548 if (dsi.pae_kernel)
549 rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
550 dsi.v_start, v_end,
551 page_array, vpt_start, vpt_end);
552 else
553 rc = setup_pg_tables(xc_handle, dom, ctxt,
554 dsi.v_start, v_end,
555 page_array, vpt_start, vpt_end);
556 #endif
557 #if defined(__x86_64__)
558 rc = setup_pg_tables_64(xc_handle, dom, ctxt,
559 dsi.v_start, v_end,
560 page_array, vpt_start, vpt_end);
561 #endif
562 if (0 != rc)
563 goto error_out;
565 /* Write the phys->machine and machine->phys table entries. */
566 physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
567 physmap = physmap_e = xc_map_foreign_range(
568 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
569 page_array[physmap_pfn++]);
571 for ( count = 0; count < nr_pages; count++ )
572 {
573 if ( xc_add_mmu_update(
574 xc_handle, mmu,
575 ((uint64_t)page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
576 count) )
577 {
578 fprintf(stderr,"m2p update failure p=%lx m=%lx\n",
579 count, page_array[count]);
580 munmap(physmap, PAGE_SIZE);
581 goto error_out;
582 }
583 *physmap_e++ = page_array[count];
584 if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 )
585 {
586 munmap(physmap, PAGE_SIZE);
587 physmap = physmap_e = xc_map_foreign_range(
588 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
589 page_array[physmap_pfn++]);
590 }
591 }
592 munmap(physmap, PAGE_SIZE);
594 #if defined(__i386__)
595 /*
596 * Pin down l2tab addr as page dir page - causes hypervisor to provide
597 * correct protection for the page
598 */
599 if (dsi.pae_kernel) {
600 if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
601 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
602 goto error_out;
603 } else {
604 if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
605 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
606 goto error_out;
607 }
608 #endif
610 #if defined(__x86_64__)
611 /*
612 * Pin down l4tab addr as page dir page - causes hypervisor to provide
613 * correct protection for the page
614 */
615 if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
616 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
617 goto error_out;
618 #endif
620 *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT];
621 *console_mfn = page_array[(vconsole_start-dsi.v_start) >> PAGE_SHIFT];
622 if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) ||
623 xc_clear_domain_page(xc_handle, dom, *console_mfn) )
624 goto error_out;
626 start_info = xc_map_foreign_range(
627 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
628 page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]);
629 memset(start_info, 0, sizeof(*start_info));
630 rc = xc_version(xc_handle, XENVER_version, NULL);
631 sprintf(start_info->magic, "xen-%i.%i", rc >> 16, rc & (0xFFFF));
632 start_info->nr_pages = nr_pages;
633 start_info->shared_info = shared_info_frame << PAGE_SHIFT;
634 start_info->flags = flags;
635 start_info->pt_base = vpt_start;
636 start_info->nr_pt_frames = nr_pt_pages;
637 start_info->mfn_list = vphysmap_start;
638 start_info->store_mfn = *store_mfn;
639 start_info->store_evtchn = store_evtchn;
640 start_info->console_mfn = *console_mfn;
641 start_info->console_evtchn = console_evtchn;
642 if ( initrd_len != 0 )
643 {
644 start_info->mod_start = vinitrd_start;
645 start_info->mod_len = initrd_len;
646 }
647 strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
648 start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
649 munmap(start_info, PAGE_SIZE);
651 /* shared_info page starts its life empty. */
652 shared_info = xc_map_foreign_range(
653 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
654 memset(shared_info, 0, sizeof(shared_info_t));
655 /* Mask all upcalls... */
656 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
657 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
659 munmap(shared_info, PAGE_SIZE);
661 /* Send the page update requests down to the hypervisor. */
662 if ( xc_finish_mmu_updates(xc_handle, mmu) )
663 goto error_out;
665 free(mmu);
666 free(page_array);
668 *pvsi = vstartinfo_start;
669 *pvss = vstack_start;
670 *pvke = dsi.v_kernentry;
672 return 0;
674 error_out:
675 free(mmu);
676 free(page_array);
677 return -1;
678 }
679 #endif
681 int xc_linux_build(int xc_handle,
682 uint32_t domid,
683 const char *image_name,
684 const char *ramdisk_name,
685 const char *cmdline,
686 unsigned long flags,
687 unsigned int store_evtchn,
688 unsigned long *store_mfn,
689 unsigned int console_evtchn,
690 unsigned long *console_mfn)
691 {
692 dom0_op_t launch_op, op;
693 int initrd_fd = -1;
694 gzFile initrd_gfd = NULL;
695 int rc, i;
696 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
697 unsigned long nr_pages;
698 char *image = NULL;
699 unsigned long image_size, initrd_size=0;
700 unsigned long vstartinfo_start, vkern_entry, vstack_start;
702 if ( (nr_pages = get_tot_pages(xc_handle, domid)) < 0 )
703 {
704 PERROR("Could not find total pages for domain");
705 goto error_out;
706 }
708 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
709 goto error_out;
711 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
712 {
713 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
714 {
715 PERROR("Could not open the initial ramdisk image");
716 goto error_out;
717 }
719 initrd_size = xc_get_filesz(initrd_fd);
721 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
722 {
723 PERROR("Could not allocate decompression state for initrd");
724 goto error_out;
725 }
726 }
728 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
729 {
730 PERROR("%s: ctxt mlock failed", __func__);
731 return 1;
732 }
734 op.cmd = DOM0_GETDOMAININFO;
735 op.u.getdomaininfo.domain = (domid_t)domid;
736 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
737 ((uint16_t)op.u.getdomaininfo.domain != domid) )
738 {
739 PERROR("Could not get info on domain");
740 goto error_out;
741 }
743 if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
744 {
745 PERROR("Could not get vcpu context");
746 goto error_out;
747 }
749 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) || already_built(ctxt) )
750 {
751 ERROR("Domain is already constructed");
752 goto error_out;
753 }
755 if ( setup_guest(xc_handle, domid, image, image_size,
756 initrd_gfd, initrd_size, nr_pages,
757 &vstartinfo_start, &vkern_entry,
758 &vstack_start, ctxt, cmdline,
759 op.u.getdomaininfo.shared_info_frame,
760 flags, store_evtchn, store_mfn,
761 console_evtchn, console_mfn) < 0 )
762 {
763 ERROR("Error constructing guest OS");
764 goto error_out;
765 }
767 if ( initrd_fd >= 0 )
768 close(initrd_fd);
769 if ( initrd_gfd )
770 gzclose(initrd_gfd);
771 free(image);
773 #ifdef __ia64__
774 /* based on new_thread in xen/arch/ia64/domain.c */
775 ctxt->flags = 0;
776 ctxt->shared.flags = flags;
777 ctxt->shared.start_info_pfn = nr_pages - 3; /* metaphysical */
778 ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
779 ctxt->regs.cr_iip = vkern_entry;
780 ctxt->regs.cr_ifs = 1UL << 63;
781 ctxt->regs.ar_fpsr = FPSR_DEFAULT;
782 /* currently done by hypervisor, should move here */
783 /* ctxt->regs.r28 = dom_fw_setup(); */
784 ctxt->vcpu.privregs = 0;
785 ctxt->sys_pgnr = 3;
786 i = 0; /* silence unused variable warning */
787 #else /* x86 */
788 /*
789 * Initial register values:
790 * DS,ES,FS,GS = FLAT_KERNEL_DS
791 * CS:EIP = FLAT_KERNEL_CS:start_pc
792 * SS:ESP = FLAT_KERNEL_DS:start_stack
793 * ESI = start_info
794 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
795 * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
796 */
797 ctxt->user_regs.ds = FLAT_KERNEL_DS;
798 ctxt->user_regs.es = FLAT_KERNEL_DS;
799 ctxt->user_regs.fs = FLAT_KERNEL_DS;
800 ctxt->user_regs.gs = FLAT_KERNEL_DS;
801 ctxt->user_regs.ss = FLAT_KERNEL_SS;
802 ctxt->user_regs.cs = FLAT_KERNEL_CS;
803 ctxt->user_regs.eip = vkern_entry;
804 ctxt->user_regs.esp = vstack_start + PAGE_SIZE;
805 ctxt->user_regs.esi = vstartinfo_start;
806 ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
808 /* FPU is set up to default initial state. */
809 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
811 /* Virtual IDT is empty at start-of-day. */
812 for ( i = 0; i < 256; i++ )
813 {
814 ctxt->trap_ctxt[i].vector = i;
815 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
816 }
818 /* No LDT. */
819 ctxt->ldt_ents = 0;
821 /* Use the default Xen-provided GDT. */
822 ctxt->gdt_ents = 0;
824 /* Ring 1 stack is the initial stack. */
825 ctxt->kernel_ss = FLAT_KERNEL_SS;
826 ctxt->kernel_sp = vstack_start + PAGE_SIZE;
828 /* No debugging. */
829 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
831 /* No callback handlers. */
832 #if defined(__i386__)
833 ctxt->event_callback_cs = FLAT_KERNEL_CS;
834 ctxt->event_callback_eip = 0;
835 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
836 ctxt->failsafe_callback_eip = 0;
837 #elif defined(__x86_64__)
838 ctxt->event_callback_eip = 0;
839 ctxt->failsafe_callback_eip = 0;
840 ctxt->syscall_callback_eip = 0;
841 #endif
842 #endif /* x86 */
844 memset( &launch_op, 0, sizeof(launch_op) );
846 launch_op.u.setdomaininfo.domain = (domid_t)domid;
847 launch_op.u.setdomaininfo.vcpu = 0;
848 launch_op.u.setdomaininfo.ctxt = ctxt;
850 launch_op.cmd = DOM0_SETDOMAININFO;
851 rc = xc_dom0_op(xc_handle, &launch_op);
853 return rc;
855 error_out:
856 if ( initrd_gfd != NULL )
857 gzclose(initrd_gfd);
858 else if ( initrd_fd >= 0 )
859 close(initrd_fd);
860 free(image);
861 return -1;
862 }
864 /*
865 * Local variables:
866 * mode: C
867 * c-set-style: "BSD"
868 * c-basic-offset: 4
869 * tab-width: 4
870 * indent-tabs-mode: nil
871 * End:
872 */