ia64/xen-unstable

view tools/libxc/xc_linux_build.c @ 6708:aa0990ef260f

merge
author iap10@freefall.cl.cam.ac.uk
date Thu Sep 08 17:42:49 2005 +0000 (2005-09-08)
parents 3bde4219c681 12ff9c954ace
children c7999d49ecf7
line source
1 /******************************************************************************
2 * xc_linux_build.c
3 */
5 #include "xg_private.h"
6 #include <xenctrl.h>
8 #if defined(__i386__)
9 #define ELFSIZE 32
10 #endif
12 #if defined(__x86_64__) || defined(__ia64__)
13 #define ELFSIZE 64
14 #endif
17 #include "xc_elf.h"
18 #include "xc_aout9.h"
19 #include <stdlib.h>
20 #include <unistd.h>
21 #include <zlib.h>
23 #if defined(__i386__)
24 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
25 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
26 #define L3_PROT (_PAGE_PRESENT)
27 #endif
29 #if defined(__x86_64__)
30 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
31 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
32 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
33 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
34 #endif
37 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
38 #define round_pgdown(_p) ((_p)&PAGE_MASK)
40 #ifdef __ia64__
41 #define probe_aout9(image,image_size,load_funcs) 1
42 #endif
44 static int probeimageformat(char *image,
45 unsigned long image_size,
46 struct load_funcs *load_funcs)
47 {
48 if ( probe_elf(image, image_size, load_funcs) &&
49 probe_bin(image, image_size, load_funcs) &&
50 probe_aout9(image, image_size, load_funcs) )
51 {
52 ERROR( "Unrecognized image format" );
53 return -EINVAL;
54 }
56 return 0;
57 }
59 #define alloc_pt(ltab, vltab) \
60 ltab = (unsigned long long)(page_array[ppt_alloc++]) << PAGE_SHIFT; \
61 if (vltab != NULL) { \
62 munmap(vltab, PAGE_SIZE); \
63 } \
64 if ((vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, \
65 PROT_READ|PROT_WRITE, \
66 ltab >> PAGE_SHIFT)) == NULL) { \
67 goto error_out; \
68 } \
69 memset(vltab, 0, PAGE_SIZE);
71 #if defined(__i386__)
73 static int setup_pg_tables(int xc_handle, u32 dom,
74 vcpu_guest_context_t *ctxt,
75 unsigned long dsi_v_start,
76 unsigned long v_end,
77 unsigned long *page_array,
78 unsigned long vpt_start,
79 unsigned long vpt_end)
80 {
81 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
82 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
83 unsigned long l1tab = 0;
84 unsigned long l2tab = 0;
85 unsigned long ppt_alloc;
86 unsigned long count;
88 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
89 alloc_pt(l2tab, vl2tab);
90 vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
91 ctxt->ctrlreg[3] = l2tab;
93 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++ )
94 {
95 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
96 {
97 alloc_pt(l1tab, vl1tab);
98 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
99 *vl2e++ = l1tab | L2_PROT;
100 }
102 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
103 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
104 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
105 *vl1e &= ~_PAGE_RW;
106 vl1e++;
107 }
108 munmap(vl1tab, PAGE_SIZE);
109 munmap(vl2tab, PAGE_SIZE);
110 return 0;
112 error_out:
113 if (vl1tab)
114 munmap(vl1tab, PAGE_SIZE);
115 if (vl2tab)
116 munmap(vl2tab, PAGE_SIZE);
117 return -1;
118 }
120 static int setup_pg_tables_pae(int xc_handle, u32 dom,
121 vcpu_guest_context_t *ctxt,
122 unsigned long dsi_v_start,
123 unsigned long v_end,
124 unsigned long *page_array,
125 unsigned long vpt_start,
126 unsigned long vpt_end)
127 {
128 l1_pgentry_64_t *vl1tab=NULL, *vl1e=NULL;
129 l2_pgentry_64_t *vl2tab=NULL, *vl2e=NULL;
130 l3_pgentry_64_t *vl3tab=NULL, *vl3e=NULL;
131 unsigned long long l1tab = 0;
132 unsigned long long l2tab = 0;
133 unsigned long long l3tab = 0;
134 unsigned long ppt_alloc;
135 unsigned long count;
137 /* First allocate page for page dir. */
138 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
140 if ( page_array[ppt_alloc] > 0xfffff )
141 {
142 unsigned long nmfn;
143 nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
144 if ( nmfn == 0 )
145 {
146 fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
147 goto error_out;
148 }
149 page_array[ppt_alloc] = nmfn;
150 }
152 alloc_pt(l3tab, vl3tab);
153 vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
154 ctxt->ctrlreg[3] = l3tab;
156 if(l3tab>0xfffff000ULL)
157 {
158 fprintf(stderr,"L3TAB = %llx above 4GB!\n",l3tab);
159 goto error_out;
160 }
162 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
163 {
164 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
165 {
166 alloc_pt(l1tab, vl1tab);
168 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
169 {
170 alloc_pt(l2tab, vl2tab);
171 vl2e = &vl2tab[l2_table_offset_pae(dsi_v_start + (count<<PAGE_SHIFT))];
172 *vl3e = l2tab | L3_PROT;
173 vl3e++;
174 }
175 vl1e = &vl1tab[l1_table_offset_pae(dsi_v_start + (count<<PAGE_SHIFT))];
176 *vl2e = l1tab | L2_PROT;
177 vl2e++;
178 }
180 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
181 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
182 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
183 {
184 *vl1e &= ~_PAGE_RW;
185 }
186 vl1e++;
187 }
189 munmap(vl1tab, PAGE_SIZE);
190 munmap(vl2tab, PAGE_SIZE);
191 munmap(vl3tab, PAGE_SIZE);
192 return 0;
194 error_out:
195 if (vl1tab)
196 munmap(vl1tab, PAGE_SIZE);
197 if (vl2tab)
198 munmap(vl2tab, PAGE_SIZE);
199 if (vl3tab)
200 munmap(vl3tab, PAGE_SIZE);
201 return -1;
202 }
204 #endif
206 #if defined(__x86_64__)
208 static int setup_pg_tables_64(int xc_handle, u32 dom,
209 vcpu_guest_context_t *ctxt,
210 unsigned long dsi_v_start,
211 unsigned long v_end,
212 unsigned long *page_array,
213 unsigned long vpt_start,
214 unsigned long vpt_end)
215 {
216 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
217 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
218 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
219 l4_pgentry_t *vl4tab=NULL, *vl4e=NULL;
220 unsigned long l2tab = 0;
221 unsigned long l1tab = 0;
222 unsigned long l3tab = 0;
223 unsigned long l4tab = 0;
224 unsigned long ppt_alloc;
225 unsigned long count;
227 /* First allocate page for page dir. */
228 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
229 alloc_pt(l4tab, vl4tab);
230 vl4e = &vl4tab[l4_table_offset(dsi_v_start)];
231 ctxt->ctrlreg[3] = l4tab;
233 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
234 {
235 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
236 {
237 alloc_pt(l1tab, vl1tab);
239 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
240 {
241 alloc_pt(l2tab, vl2tab);
242 if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
243 {
244 alloc_pt(l3tab, vl3tab);
245 vl3e = &vl3tab[l3_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
246 *vl4e = l3tab | L4_PROT;
247 vl4e++;
248 }
249 vl2e = &vl2tab[l2_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
250 *vl3e = l2tab | L3_PROT;
251 vl3e++;
252 }
253 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
254 *vl2e = l1tab | L2_PROT;
255 vl2e++;
256 }
258 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
259 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
260 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
261 {
262 *vl1e &= ~_PAGE_RW;
263 }
264 vl1e++;
265 }
267 munmap(vl1tab, PAGE_SIZE);
268 munmap(vl2tab, PAGE_SIZE);
269 munmap(vl3tab, PAGE_SIZE);
270 munmap(vl4tab, PAGE_SIZE);
271 return 0;
273 error_out:
274 if (vl1tab)
275 munmap(vl1tab, PAGE_SIZE);
276 if (vl2tab)
277 munmap(vl2tab, PAGE_SIZE);
278 if (vl3tab)
279 munmap(vl3tab, PAGE_SIZE);
280 if (vl4tab)
281 munmap(vl4tab, PAGE_SIZE);
282 return -1;
283 }
284 #endif
286 static int compare (const void * a, const void * b)
287 {
288 return ( *(long*)a - *(long*)b );
289 }
291 #ifdef __ia64__
292 #include <asm/fpu.h> /* for FPSR_DEFAULT */
293 static int setup_guest(int xc_handle,
294 u32 dom,
295 char *image, unsigned long image_size,
296 gzFile initrd_gfd, unsigned long initrd_len,
297 unsigned long nr_pages,
298 unsigned long *pvsi, unsigned long *pvke,
299 unsigned long *pvss, vcpu_guest_context_t *ctxt,
300 const char *cmdline,
301 unsigned long shared_info_frame,
302 unsigned long flags,
303 unsigned int vcpus,
304 unsigned int store_evtchn, unsigned long *store_mfn)
305 {
306 unsigned long *page_array = NULL;
307 struct load_funcs load_funcs;
308 struct domain_setup_info dsi;
309 unsigned long start_page;
310 int rc;
312 rc = probeimageformat(image, image_size, &load_funcs);
313 if ( rc != 0 )
314 goto error_out;
316 memset(&dsi, 0, sizeof(struct domain_setup_info));
318 rc = (load_funcs.parseimage)(image, image_size, &dsi);
319 if ( rc != 0 )
320 goto error_out;
322 dsi.v_start = round_pgdown(dsi.v_start);
323 dsi.v_end = round_pgup(dsi.v_end);
325 start_page = dsi.v_start >> PAGE_SHIFT;
326 nr_pages = (dsi.v_end - dsi.v_start) >> PAGE_SHIFT;
327 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
328 {
329 PERROR("Could not allocate memory");
330 goto error_out;
331 }
333 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, start_page, nr_pages) != nr_pages )
334 {
335 PERROR("Could not get the page frame list");
336 goto error_out;
337 }
339 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
340 &dsi);
342 *pvke = dsi.v_kernentry;
343 return 0;
345 error_out:
346 free(page_array);
347 return -1;
348 }
349 #else /* x86 */
350 static int setup_guest(int xc_handle,
351 u32 dom,
352 char *image, unsigned long image_size,
353 gzFile initrd_gfd, unsigned long initrd_len,
354 unsigned long nr_pages,
355 unsigned long *pvsi, unsigned long *pvke,
356 unsigned long *pvss, vcpu_guest_context_t *ctxt,
357 const char *cmdline,
358 unsigned long shared_info_frame,
359 unsigned long flags,
360 unsigned int vcpus,
361 unsigned int store_evtchn, unsigned long *store_mfn,
362 unsigned int console_evtchn, unsigned long *console_mfn)
363 {
364 unsigned long *page_array = NULL;
365 unsigned long count, i;
366 start_info_t *start_info;
367 shared_info_t *shared_info;
368 xc_mmu_t *mmu = NULL;
369 int rc;
371 unsigned long nr_pt_pages;
372 unsigned long physmap_pfn;
373 unsigned long *physmap, *physmap_e;
375 struct load_funcs load_funcs;
376 struct domain_setup_info dsi;
377 unsigned long vinitrd_start;
378 unsigned long vinitrd_end;
379 unsigned long vphysmap_start;
380 unsigned long vphysmap_end;
381 unsigned long vstartinfo_start;
382 unsigned long vstartinfo_end;
383 unsigned long vstoreinfo_start;
384 unsigned long vstoreinfo_end;
385 unsigned long vconsole_start;
386 unsigned long vconsole_end;
387 unsigned long vstack_start;
388 unsigned long vstack_end;
389 unsigned long vpt_start;
390 unsigned long vpt_end;
391 unsigned long v_end;
393 rc = probeimageformat(image, image_size, &load_funcs);
394 if ( rc != 0 )
395 goto error_out;
397 memset(&dsi, 0, sizeof(struct domain_setup_info));
399 rc = (load_funcs.parseimage)(image, image_size, &dsi);
400 if ( rc != 0 )
401 goto error_out;
403 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
404 {
405 PERROR("Guest OS must load to a page boundary.\n");
406 goto error_out;
407 }
409 /*
410 * Why do we need this? The number of page-table frames depends on the
411 * size of the bootstrap address space. But the size of the address space
412 * depends on the number of page-table frames (since each one is mapped
413 * read-only). We have a pair of simultaneous equations in two unknowns,
414 * which we solve by exhaustive search.
415 */
416 vinitrd_start = round_pgup(dsi.v_end);
417 vinitrd_end = vinitrd_start + initrd_len;
418 vphysmap_start = round_pgup(vinitrd_end);
419 vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
420 vstartinfo_start = round_pgup(vphysmap_end);
421 vstartinfo_end = vstartinfo_start + PAGE_SIZE;
422 vstoreinfo_start = vstartinfo_end;
423 vstoreinfo_end = vstoreinfo_start + PAGE_SIZE;
424 vconsole_start = vstoreinfo_end;
425 vconsole_end = vconsole_start + PAGE_SIZE;
426 vpt_start = vconsole_end;
428 for ( nr_pt_pages = 2; ; nr_pt_pages++ )
429 {
430 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
431 vstack_start = vpt_end;
432 vstack_end = vstack_start + PAGE_SIZE;
433 v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
434 if ( (v_end - vstack_end) < (512UL << 10) )
435 v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
436 #if defined(__i386__)
437 if (dsi.pae_kernel) {
438 /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
439 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >>
440 L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
441 break;
442 } else {
443 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
444 L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
445 break;
446 }
447 #endif
448 #if defined(__x86_64__)
449 #define NR(_l,_h,_s) \
450 (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
451 ((_l) & ~((1UL<<(_s))-1))) >> (_s))
452 if ( (1 + /* # L4 */
453 NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
454 NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
455 NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
456 <= nr_pt_pages )
457 break;
458 #endif
459 }
461 #define _p(a) ((void *) (a))
463 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
464 " Loaded kernel: %p->%p\n"
465 " Init. ramdisk: %p->%p\n"
466 " Phys-Mach map: %p->%p\n"
467 " Start info: %p->%p\n"
468 " Store page: %p->%p\n"
469 " Console page: %p->%p\n"
470 " Page tables: %p->%p\n"
471 " Boot stack: %p->%p\n"
472 " TOTAL: %p->%p\n",
473 _p(dsi.v_kernstart), _p(dsi.v_kernend),
474 _p(vinitrd_start), _p(vinitrd_end),
475 _p(vphysmap_start), _p(vphysmap_end),
476 _p(vstartinfo_start), _p(vstartinfo_end),
477 _p(vstoreinfo_start), _p(vstoreinfo_end),
478 _p(vconsole_start), _p(vconsole_end),
479 _p(vpt_start), _p(vpt_end),
480 _p(vstack_start), _p(vstack_end),
481 _p(dsi.v_start), _p(v_end));
482 printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
484 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
485 {
486 printf("Initial guest OS requires too much space\n"
487 "(%luMB is greater than %luMB limit)\n",
488 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
489 goto error_out;
490 }
492 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
493 {
494 PERROR("Could not allocate memory");
495 goto error_out;
496 }
498 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
499 {
500 PERROR("Could not get the page frame list");
501 goto error_out;
502 }
504 qsort( page_array, nr_pages, sizeof(*page_array), compare );
507 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
508 &dsi);
510 /* Load the initial ramdisk image. */
511 if ( initrd_len != 0 )
512 {
513 for ( i = (vinitrd_start - dsi.v_start);
514 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
515 {
516 char page[PAGE_SIZE];
517 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
518 {
519 PERROR("Error reading initrd image, could not");
520 goto error_out;
521 }
522 xc_copy_to_domain_page(xc_handle, dom,
523 page_array[i>>PAGE_SHIFT], page);
524 }
525 }
527 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
528 goto error_out;
530 /* setup page tables */
531 #if defined(__i386__)
532 if (dsi.pae_kernel)
533 rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
534 dsi.v_start, v_end,
535 page_array, vpt_start, vpt_end);
536 else {
537 rc = setup_pg_tables(xc_handle, dom, ctxt,
538 dsi.v_start, v_end,
539 page_array, vpt_start, vpt_end);
540 }
541 #endif
542 #if defined(__x86_64__)
543 rc = setup_pg_tables_64(xc_handle, dom, ctxt,
544 dsi.v_start, v_end,
545 page_array, vpt_start, vpt_end);
546 #endif
547 if (0 != rc)
548 goto error_out;
550 /* Write the phys->machine and machine->phys table entries. */
551 physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
552 physmap = physmap_e = xc_map_foreign_range(
553 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
554 page_array[physmap_pfn++]);
556 for ( count = 0; count < nr_pages; count++ )
557 {
558 if ( xc_add_mmu_update(xc_handle, mmu,
559 ((unsigned long long)page_array[count] << PAGE_SHIFT) |
560 MMU_MACHPHYS_UPDATE, count) )
561 {
562 fprintf(stderr,"m2p update failure p=%lx m=%lx\n",count,page_array[count] );
563 munmap(physmap, PAGE_SIZE);
564 goto error_out;
565 }
566 *physmap_e++ = page_array[count];
567 if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 )
568 {
569 munmap(physmap, PAGE_SIZE);
570 physmap = physmap_e = xc_map_foreign_range(
571 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
572 page_array[physmap_pfn++]);
573 }
574 }
575 munmap(physmap, PAGE_SIZE);
577 #if defined(__i386__)
578 /*
579 * Pin down l2tab addr as page dir page - causes hypervisor to provide
580 * correct protection for the page
581 */
582 if (dsi.pae_kernel) {
583 if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
584 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
585 goto error_out;
586 } else {
587 if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
588 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
589 goto error_out;
590 }
591 #endif
593 #if defined(__x86_64__)
594 /*
595 * Pin down l4tab addr as page dir page - causes hypervisor to provide
596 * correct protection for the page
597 */
598 if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
599 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
600 goto error_out;
601 #endif
603 *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT];
604 *console_mfn = page_array[(vconsole_start-dsi.v_start) >> PAGE_SHIFT];
607 start_info = xc_map_foreign_range(
608 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
609 page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]);
610 memset(start_info, 0, sizeof(*start_info));
611 start_info->nr_pages = nr_pages;
612 start_info->shared_info = shared_info_frame << PAGE_SHIFT;
613 start_info->flags = flags;
614 start_info->pt_base = vpt_start;
615 start_info->nr_pt_frames = nr_pt_pages;
616 start_info->mfn_list = vphysmap_start;
617 start_info->store_mfn = *store_mfn;
618 start_info->store_evtchn = store_evtchn;
619 start_info->console_mfn = *console_mfn;
620 start_info->console_evtchn = console_evtchn;
621 if ( initrd_len != 0 )
622 {
623 start_info->mod_start = vinitrd_start;
624 start_info->mod_len = initrd_len;
625 }
626 strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
627 start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
628 munmap(start_info, PAGE_SIZE);
630 /* shared_info page starts its life empty. */
631 shared_info = xc_map_foreign_range(
632 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
633 memset(shared_info, 0, sizeof(shared_info_t));
634 /* Mask all upcalls... */
635 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
636 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
638 shared_info->n_vcpu = vcpus;
639 printf(" VCPUS: %d\n", shared_info->n_vcpu);
641 munmap(shared_info, PAGE_SIZE);
643 /* Send the page update requests down to the hypervisor. */
644 if ( xc_finish_mmu_updates(xc_handle, mmu) )
645 goto error_out;
647 free(mmu);
648 free(page_array);
650 *pvsi = vstartinfo_start;
651 *pvss = vstack_start;
652 *pvke = dsi.v_kernentry;
654 return 0;
656 error_out:
657 free(mmu);
658 free(page_array);
659 return -1;
660 }
661 #endif
663 int xc_linux_build(int xc_handle,
664 u32 domid,
665 const char *image_name,
666 const char *ramdisk_name,
667 const char *cmdline,
668 unsigned long flags,
669 unsigned int vcpus,
670 unsigned int store_evtchn,
671 unsigned long *store_mfn,
672 unsigned int console_evtchn,
673 unsigned long *console_mfn)
674 {
675 dom0_op_t launch_op, op;
676 int initrd_fd = -1;
677 gzFile initrd_gfd = NULL;
678 int rc, i;
679 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
680 unsigned long nr_pages;
681 char *image = NULL;
682 unsigned long image_size, initrd_size=0;
683 unsigned long vstartinfo_start, vkern_entry, vstack_start;
685 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
686 {
687 PERROR("Could not find total pages for domain");
688 goto error_out;
689 }
691 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
692 goto error_out;
694 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
695 {
696 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
697 {
698 PERROR("Could not open the initial ramdisk image");
699 goto error_out;
700 }
702 initrd_size = xc_get_filesz(initrd_fd);
704 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
705 {
706 PERROR("Could not allocate decompression state for initrd");
707 goto error_out;
708 }
709 }
711 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
712 {
713 PERROR("xc_linux_build: ctxt mlock failed");
714 return 1;
715 }
717 op.cmd = DOM0_GETDOMAININFO;
718 op.u.getdomaininfo.domain = (domid_t)domid;
719 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
720 ((u16)op.u.getdomaininfo.domain != domid) )
721 {
722 PERROR("Could not get info on domain");
723 goto error_out;
724 }
726 if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
727 {
728 PERROR("Could not get vcpu context");
729 goto error_out;
730 }
732 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
733 #ifdef __ia64__
734 0 )
735 #else
736 (ctxt->ctrlreg[3] != 0) )
737 #endif
738 {
739 ERROR("Domain is already constructed");
740 goto error_out;
741 }
743 if ( setup_guest(xc_handle, domid, image, image_size,
744 initrd_gfd, initrd_size, nr_pages,
745 &vstartinfo_start, &vkern_entry,
746 &vstack_start, ctxt, cmdline,
747 op.u.getdomaininfo.shared_info_frame,
748 flags, vcpus,
749 store_evtchn, store_mfn,
750 console_evtchn, console_mfn) < 0 )
751 {
752 ERROR("Error constructing guest OS");
753 goto error_out;
754 }
756 if ( initrd_fd >= 0 )
757 close(initrd_fd);
758 if ( initrd_gfd )
759 gzclose(initrd_gfd);
760 free(image);
762 #ifdef __ia64__
763 /* based on new_thread in xen/arch/ia64/domain.c */
764 ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
765 ctxt->regs.cr_iip = vkern_entry;
766 ctxt->regs.cr_ifs = 1UL << 63;
767 ctxt->regs.ar_fpsr = FPSR_DEFAULT;
768 /* ctxt->regs.r28 = dom_fw_setup(); currently done by hypervisor, should move here */
769 ctxt->vcpu.privregs = 0;
770 ctxt->shared.flags = flags;
771 i = 0; /* silence unused variable warning */
772 #else /* x86 */
773 /*
774 * Initial register values:
775 * DS,ES,FS,GS = FLAT_KERNEL_DS
776 * CS:EIP = FLAT_KERNEL_CS:start_pc
777 * SS:ESP = FLAT_KERNEL_DS:start_stack
778 * ESI = start_info
779 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
780 * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
781 */
782 ctxt->user_regs.ds = FLAT_KERNEL_DS;
783 ctxt->user_regs.es = FLAT_KERNEL_DS;
784 ctxt->user_regs.fs = FLAT_KERNEL_DS;
785 ctxt->user_regs.gs = FLAT_KERNEL_DS;
786 ctxt->user_regs.ss = FLAT_KERNEL_SS;
787 ctxt->user_regs.cs = FLAT_KERNEL_CS;
788 ctxt->user_regs.eip = vkern_entry;
789 ctxt->user_regs.esp = vstack_start + PAGE_SIZE;
790 ctxt->user_regs.esi = vstartinfo_start;
791 ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
793 /* FPU is set up to default initial state. */
794 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
796 /* Virtual IDT is empty at start-of-day. */
797 for ( i = 0; i < 256; i++ )
798 {
799 ctxt->trap_ctxt[i].vector = i;
800 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
801 }
803 /* No LDT. */
804 ctxt->ldt_ents = 0;
806 /* Use the default Xen-provided GDT. */
807 ctxt->gdt_ents = 0;
809 /* Ring 1 stack is the initial stack. */
810 ctxt->kernel_ss = FLAT_KERNEL_SS;
811 ctxt->kernel_sp = vstack_start + PAGE_SIZE;
813 /* No debugging. */
814 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
816 /* No callback handlers. */
817 #if defined(__i386__)
818 ctxt->event_callback_cs = FLAT_KERNEL_CS;
819 ctxt->event_callback_eip = 0;
820 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
821 ctxt->failsafe_callback_eip = 0;
822 #elif defined(__x86_64__)
823 ctxt->event_callback_eip = 0;
824 ctxt->failsafe_callback_eip = 0;
825 ctxt->syscall_callback_eip = 0;
826 #endif
827 #endif /* x86 */
829 memset( &launch_op, 0, sizeof(launch_op) );
831 launch_op.u.setdomaininfo.domain = (domid_t)domid;
832 launch_op.u.setdomaininfo.vcpu = 0;
833 launch_op.u.setdomaininfo.ctxt = ctxt;
835 launch_op.cmd = DOM0_SETDOMAININFO;
836 rc = xc_dom0_op(xc_handle, &launch_op);
838 return rc;
840 error_out:
841 if ( initrd_gfd != NULL )
842 gzclose(initrd_gfd);
843 else if ( initrd_fd >= 0 )
844 close(initrd_fd);
845 free(image);
847 return -1;
848 }