ia64/xen-unstable

view tools/libxc/xc_linux_build.c @ 6700:12ff9c954ace

Give each domain some memory below 4GB. This solves the "PGD's must be below 4GB" for the initial page tables. I'm not sure we'll stick with this approach, but this is good enough for the time being.

PAE should be a *lot* more robust on systems that actually have more than 4GB thanks to all the various patches that went in today. I find it astounding that it ever appeared to work at all!

Signed-off-by: ian@xensource.com
author iap10@freefall.cl.cam.ac.uk
date Thu Sep 08 01:07:15 2005 +0000 (2005-09-08)
parents f0d728001aaa
children 5db85ba1c4e0 3bde4219c681 aa0990ef260f
line source
1 /******************************************************************************
2 * xc_linux_build.c
3 */
5 #include "xg_private.h"
6 #include <xenctrl.h>
8 #if defined(__i386__)
9 #define ELFSIZE 32
10 #endif
12 #if defined(__x86_64__) || defined(__ia64__)
13 #define ELFSIZE 64
14 #endif
17 #include "xc_elf.h"
18 #include "xc_aout9.h"
19 #include <stdlib.h>
20 #include <unistd.h>
21 #include <zlib.h>
23 #if defined(__i386__)
24 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
25 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
26 #define L3_PROT (_PAGE_PRESENT)
27 #endif
29 #if defined(__x86_64__)
30 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
31 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
32 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
33 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
34 #endif
37 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
38 #define round_pgdown(_p) ((_p)&PAGE_MASK)
40 #ifdef __ia64__
41 #define probe_aout9(image,image_size,load_funcs) 1
42 #endif
44 static int probeimageformat(char *image,
45 unsigned long image_size,
46 struct load_funcs *load_funcs)
47 {
48 if ( probe_elf(image, image_size, load_funcs) &&
49 probe_bin(image, image_size, load_funcs) &&
50 probe_aout9(image, image_size, load_funcs) )
51 {
52 ERROR( "Unrecognized image format" );
53 return -EINVAL;
54 }
56 return 0;
57 }
59 #define alloc_pt(ltab, vltab) \
60 ltab = (unsigned long long)(page_array[ppt_alloc++]) << PAGE_SHIFT; \
61 if (vltab != NULL) { \
62 munmap(vltab, PAGE_SIZE); \
63 } \
64 if ((vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, \
65 PROT_READ|PROT_WRITE, \
66 ltab >> PAGE_SHIFT)) == NULL) { \
67 goto error_out; \
68 } \
69 memset(vltab, 0, PAGE_SIZE);
71 #if defined(__i386__)
73 static int setup_pg_tables(int xc_handle, u32 dom,
74 vcpu_guest_context_t *ctxt,
75 unsigned long dsi_v_start,
76 unsigned long v_end,
77 unsigned long *page_array,
78 unsigned long vpt_start,
79 unsigned long vpt_end)
80 {
81 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
82 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
83 unsigned long l1tab = 0;
84 unsigned long l2tab = 0;
85 unsigned long ppt_alloc;
86 unsigned long count;
88 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
89 alloc_pt(l2tab, vl2tab);
90 vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
91 ctxt->ctrlreg[3] = l2tab;
93 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++ )
94 {
95 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
96 {
97 alloc_pt(l1tab, vl1tab);
98 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
99 *vl2e++ = l1tab | L2_PROT;
100 }
102 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
103 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
104 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
105 *vl1e &= ~_PAGE_RW;
106 vl1e++;
107 }
108 munmap(vl1tab, PAGE_SIZE);
109 munmap(vl2tab, PAGE_SIZE);
110 return 0;
112 error_out:
113 if (vl1tab)
114 munmap(vl1tab, PAGE_SIZE);
115 if (vl2tab)
116 munmap(vl2tab, PAGE_SIZE);
117 return -1;
118 }
120 static int setup_pg_tables_pae(int xc_handle, u32 dom,
121 vcpu_guest_context_t *ctxt,
122 unsigned long dsi_v_start,
123 unsigned long v_end,
124 unsigned long *page_array,
125 unsigned long vpt_start,
126 unsigned long vpt_end)
127 {
128 l1_pgentry_64_t *vl1tab=NULL, *vl1e=NULL;
129 l2_pgentry_64_t *vl2tab=NULL, *vl2e=NULL;
130 l3_pgentry_64_t *vl3tab=NULL, *vl3e=NULL;
131 unsigned long long l1tab = 0;
132 unsigned long long l2tab = 0;
133 unsigned long long l3tab = 0;
134 unsigned long ppt_alloc;
135 unsigned long count;
137 /* First allocate page for page dir. */
138 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
139 alloc_pt(l3tab, vl3tab);
140 vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
141 ctxt->ctrlreg[3] = l3tab;
143 if(l3tab>0xfffff000)
144 {
145 fprintf(stderr,"L3TAB = %llx above 4GB!\n",l3tab);
146 goto error_out;
147 }
149 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
150 {
151 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
152 {
153 alloc_pt(l1tab, vl1tab);
155 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
156 {
157 alloc_pt(l2tab, vl2tab);
158 vl2e = &vl2tab[l2_table_offset_pae(dsi_v_start + (count<<PAGE_SHIFT))];
159 *vl3e = l2tab | L3_PROT;
160 vl3e++;
161 }
162 vl1e = &vl1tab[l1_table_offset_pae(dsi_v_start + (count<<PAGE_SHIFT))];
163 *vl2e = l1tab | L2_PROT;
164 vl2e++;
165 }
167 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
168 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
169 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
170 {
171 *vl1e &= ~_PAGE_RW;
172 }
173 vl1e++;
174 }
176 munmap(vl1tab, PAGE_SIZE);
177 munmap(vl2tab, PAGE_SIZE);
178 munmap(vl3tab, PAGE_SIZE);
179 return 0;
181 error_out:
182 if (vl1tab)
183 munmap(vl1tab, PAGE_SIZE);
184 if (vl2tab)
185 munmap(vl2tab, PAGE_SIZE);
186 if (vl3tab)
187 munmap(vl3tab, PAGE_SIZE);
188 return -1;
189 }
191 #endif
193 #if defined(__x86_64__)
195 static int setup_pg_tables_64(int xc_handle, u32 dom,
196 vcpu_guest_context_t *ctxt,
197 unsigned long dsi_v_start,
198 unsigned long v_end,
199 unsigned long *page_array,
200 unsigned long vpt_start,
201 unsigned long vpt_end)
202 {
203 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
204 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
205 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
206 l4_pgentry_t *vl4tab=NULL, *vl4e=NULL;
207 unsigned long l2tab = 0;
208 unsigned long l1tab = 0;
209 unsigned long l3tab = 0;
210 unsigned long l4tab = 0;
211 unsigned long ppt_alloc;
212 unsigned long count;
214 /* First allocate page for page dir. */
215 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
216 alloc_pt(l4tab, vl4tab);
217 vl4e = &vl4tab[l4_table_offset(dsi_v_start)];
218 ctxt->ctrlreg[3] = l4tab;
220 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
221 {
222 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
223 {
224 alloc_pt(l1tab, vl1tab);
226 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
227 {
228 alloc_pt(l2tab, vl2tab);
229 if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
230 {
231 alloc_pt(l3tab, vl3tab);
232 vl3e = &vl3tab[l3_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
233 *vl4e = l3tab | L4_PROT;
234 vl4e++;
235 }
236 vl2e = &vl2tab[l2_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
237 *vl3e = l2tab | L3_PROT;
238 vl3e++;
239 }
240 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
241 *vl2e = l1tab | L2_PROT;
242 vl2e++;
243 }
245 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
246 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
247 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
248 {
249 *vl1e &= ~_PAGE_RW;
250 }
251 vl1e++;
252 }
254 munmap(vl1tab, PAGE_SIZE);
255 munmap(vl2tab, PAGE_SIZE);
256 munmap(vl3tab, PAGE_SIZE);
257 munmap(vl4tab, PAGE_SIZE);
258 return 0;
260 error_out:
261 if (vl1tab)
262 munmap(vl1tab, PAGE_SIZE);
263 if (vl2tab)
264 munmap(vl2tab, PAGE_SIZE);
265 if (vl3tab)
266 munmap(vl3tab, PAGE_SIZE);
267 if (vl4tab)
268 munmap(vl4tab, PAGE_SIZE);
269 return -1;
270 }
271 #endif
273 static int compare (const void * a, const void * b)
274 {
275 return ( *(long*)a - *(long*)b );
276 }
278 #ifdef __ia64__
279 #include <asm/fpu.h> /* for FPSR_DEFAULT */
280 static int setup_guest(int xc_handle,
281 u32 dom,
282 char *image, unsigned long image_size,
283 gzFile initrd_gfd, unsigned long initrd_len,
284 unsigned long nr_pages,
285 unsigned long *pvsi, unsigned long *pvke,
286 unsigned long *pvss, vcpu_guest_context_t *ctxt,
287 const char *cmdline,
288 unsigned long shared_info_frame,
289 unsigned long flags,
290 unsigned int vcpus,
291 unsigned int store_evtchn, unsigned long *store_mfn)
292 {
293 unsigned long *page_array = NULL;
294 struct load_funcs load_funcs;
295 struct domain_setup_info dsi;
296 unsigned long start_page;
297 int rc;
299 rc = probeimageformat(image, image_size, &load_funcs);
300 if ( rc != 0 )
301 goto error_out;
303 memset(&dsi, 0, sizeof(struct domain_setup_info));
305 rc = (load_funcs.parseimage)(image, image_size, &dsi);
306 if ( rc != 0 )
307 goto error_out;
309 dsi.v_start = round_pgdown(dsi.v_start);
310 dsi.v_end = round_pgup(dsi.v_end);
312 start_page = dsi.v_start >> PAGE_SHIFT;
313 nr_pages = (dsi.v_end - dsi.v_start) >> PAGE_SHIFT;
314 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
315 {
316 PERROR("Could not allocate memory");
317 goto error_out;
318 }
320 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, start_page, nr_pages) != nr_pages )
321 {
322 PERROR("Could not get the page frame list");
323 goto error_out;
324 }
326 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
327 &dsi);
329 *pvke = dsi.v_kernentry;
330 return 0;
332 error_out:
333 free(page_array);
334 return -1;
335 }
336 #else /* x86 */
337 static int setup_guest(int xc_handle,
338 u32 dom,
339 char *image, unsigned long image_size,
340 gzFile initrd_gfd, unsigned long initrd_len,
341 unsigned long nr_pages,
342 unsigned long *pvsi, unsigned long *pvke,
343 unsigned long *pvss, vcpu_guest_context_t *ctxt,
344 const char *cmdline,
345 unsigned long shared_info_frame,
346 unsigned long flags,
347 unsigned int vcpus,
348 unsigned int store_evtchn, unsigned long *store_mfn,
349 unsigned int console_evtchn, unsigned long *console_mfn)
350 {
351 unsigned long *page_array = NULL;
352 unsigned long count, i;
353 start_info_t *start_info;
354 shared_info_t *shared_info;
355 xc_mmu_t *mmu = NULL;
356 int rc;
358 unsigned long nr_pt_pages;
359 unsigned long physmap_pfn;
360 unsigned long *physmap, *physmap_e;
362 struct load_funcs load_funcs;
363 struct domain_setup_info dsi;
364 unsigned long vinitrd_start;
365 unsigned long vinitrd_end;
366 unsigned long vphysmap_start;
367 unsigned long vphysmap_end;
368 unsigned long vstartinfo_start;
369 unsigned long vstartinfo_end;
370 unsigned long vstoreinfo_start;
371 unsigned long vstoreinfo_end;
372 unsigned long vconsole_start;
373 unsigned long vconsole_end;
374 unsigned long vstack_start;
375 unsigned long vstack_end;
376 unsigned long vpt_start;
377 unsigned long vpt_end;
378 unsigned long v_end;
380 rc = probeimageformat(image, image_size, &load_funcs);
381 if ( rc != 0 )
382 goto error_out;
384 memset(&dsi, 0, sizeof(struct domain_setup_info));
386 rc = (load_funcs.parseimage)(image, image_size, &dsi);
387 if ( rc != 0 )
388 goto error_out;
390 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
391 {
392 PERROR("Guest OS must load to a page boundary.\n");
393 goto error_out;
394 }
396 /*
397 * Why do we need this? The number of page-table frames depends on the
398 * size of the bootstrap address space. But the size of the address space
399 * depends on the number of page-table frames (since each one is mapped
400 * read-only). We have a pair of simultaneous equations in two unknowns,
401 * which we solve by exhaustive search.
402 */
403 vinitrd_start = round_pgup(dsi.v_end);
404 vinitrd_end = vinitrd_start + initrd_len;
405 vphysmap_start = round_pgup(vinitrd_end);
406 vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
407 vstartinfo_start = round_pgup(vphysmap_end);
408 vstartinfo_end = vstartinfo_start + PAGE_SIZE;
409 vstoreinfo_start = vstartinfo_end;
410 vstoreinfo_end = vstoreinfo_start + PAGE_SIZE;
411 vconsole_start = vstoreinfo_end;
412 vconsole_end = vconsole_start + PAGE_SIZE;
413 vpt_start = vconsole_end;
415 for ( nr_pt_pages = 2; ; nr_pt_pages++ )
416 {
417 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
418 vstack_start = vpt_end;
419 vstack_end = vstack_start + PAGE_SIZE;
420 v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
421 if ( (v_end - vstack_end) < (512UL << 10) )
422 v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
423 #if defined(__i386__)
424 if (dsi.pae_kernel) {
425 /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
426 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >>
427 L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
428 break;
429 } else {
430 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
431 L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
432 break;
433 }
434 #endif
435 #if defined(__x86_64__)
436 #define NR(_l,_h,_s) \
437 (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
438 ((_l) & ~((1UL<<(_s))-1))) >> (_s))
439 if ( (1 + /* # L4 */
440 NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
441 NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
442 NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
443 <= nr_pt_pages )
444 break;
445 #endif
446 }
448 #define _p(a) ((void *) (a))
450 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
451 " Loaded kernel: %p->%p\n"
452 " Init. ramdisk: %p->%p\n"
453 " Phys-Mach map: %p->%p\n"
454 " Start info: %p->%p\n"
455 " Store page: %p->%p\n"
456 " Console page: %p->%p\n"
457 " Page tables: %p->%p\n"
458 " Boot stack: %p->%p\n"
459 " TOTAL: %p->%p\n",
460 _p(dsi.v_kernstart), _p(dsi.v_kernend),
461 _p(vinitrd_start), _p(vinitrd_end),
462 _p(vphysmap_start), _p(vphysmap_end),
463 _p(vstartinfo_start), _p(vstartinfo_end),
464 _p(vstoreinfo_start), _p(vstoreinfo_end),
465 _p(vconsole_start), _p(vconsole_end),
466 _p(vpt_start), _p(vpt_end),
467 _p(vstack_start), _p(vstack_end),
468 _p(dsi.v_start), _p(v_end));
469 printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
471 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
472 {
473 printf("Initial guest OS requires too much space\n"
474 "(%luMB is greater than %luMB limit)\n",
475 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
476 goto error_out;
477 }
479 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
480 {
481 PERROR("Could not allocate memory");
482 goto error_out;
483 }
485 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
486 {
487 PERROR("Could not get the page frame list");
488 goto error_out;
489 }
491 qsort( page_array, nr_pages, sizeof(*page_array), compare );
494 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
495 &dsi);
497 /* Load the initial ramdisk image. */
498 if ( initrd_len != 0 )
499 {
500 for ( i = (vinitrd_start - dsi.v_start);
501 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
502 {
503 char page[PAGE_SIZE];
504 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
505 {
506 PERROR("Error reading initrd image, could not");
507 goto error_out;
508 }
509 xc_copy_to_domain_page(xc_handle, dom,
510 page_array[i>>PAGE_SHIFT], page);
511 }
512 }
514 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
515 goto error_out;
517 /* setup page tables */
518 #if defined(__i386__)
519 if (dsi.pae_kernel)
520 rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
521 dsi.v_start, v_end,
522 page_array, vpt_start, vpt_end);
523 else {
524 rc = setup_pg_tables(xc_handle, dom, ctxt,
525 dsi.v_start, v_end,
526 page_array, vpt_start, vpt_end);
527 }
528 #endif
529 #if defined(__x86_64__)
530 rc = setup_pg_tables_64(xc_handle, dom, ctxt,
531 dsi.v_start, v_end,
532 page_array, vpt_start, vpt_end);
533 #endif
534 if (0 != rc)
535 goto error_out;
537 /* Write the phys->machine and machine->phys table entries. */
538 physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
539 physmap = physmap_e = xc_map_foreign_range(
540 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
541 page_array[physmap_pfn++]);
543 for ( count = 0; count < nr_pages; count++ )
544 {
545 if ( xc_add_mmu_update(xc_handle, mmu,
546 ((unsigned long long)page_array[count] << PAGE_SHIFT) |
547 MMU_MACHPHYS_UPDATE, count) )
548 {
549 fprintf(stderr,"m2p update failure p=%lx m=%lx\n",count,page_array[count] );
550 munmap(physmap, PAGE_SIZE);
551 goto error_out;
552 }
553 *physmap_e++ = page_array[count];
554 if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 )
555 {
556 munmap(physmap, PAGE_SIZE);
557 physmap = physmap_e = xc_map_foreign_range(
558 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
559 page_array[physmap_pfn++]);
560 }
561 }
562 munmap(physmap, PAGE_SIZE);
564 #if defined(__i386__)
565 /*
566 * Pin down l2tab addr as page dir page - causes hypervisor to provide
567 * correct protection for the page
568 */
569 if (dsi.pae_kernel) {
570 if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
571 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
572 goto error_out;
573 } else {
574 if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
575 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
576 goto error_out;
577 }
578 #endif
580 #if defined(__x86_64__)
581 /*
582 * Pin down l4tab addr as page dir page - causes hypervisor to provide
583 * correct protection for the page
584 */
585 if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
586 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
587 goto error_out;
588 #endif
590 *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT];
591 *console_mfn = page_array[(vconsole_start-dsi.v_start) >> PAGE_SHIFT];
594 start_info = xc_map_foreign_range(
595 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
596 page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]);
597 memset(start_info, 0, sizeof(*start_info));
598 start_info->nr_pages = nr_pages;
599 start_info->shared_info = shared_info_frame << PAGE_SHIFT;
600 start_info->flags = flags;
601 start_info->pt_base = vpt_start;
602 start_info->nr_pt_frames = nr_pt_pages;
603 start_info->mfn_list = vphysmap_start;
604 start_info->store_mfn = *store_mfn;
605 start_info->store_evtchn = store_evtchn;
606 start_info->console_mfn = *console_mfn;
607 start_info->console_evtchn = console_evtchn;
608 if ( initrd_len != 0 )
609 {
610 start_info->mod_start = vinitrd_start;
611 start_info->mod_len = initrd_len;
612 }
613 strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
614 start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
615 munmap(start_info, PAGE_SIZE);
617 /* shared_info page starts its life empty. */
618 shared_info = xc_map_foreign_range(
619 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
620 memset(shared_info, 0, sizeof(shared_info_t));
621 /* Mask all upcalls... */
622 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
623 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
625 shared_info->n_vcpu = vcpus;
626 printf(" VCPUS: %d\n", shared_info->n_vcpu);
628 munmap(shared_info, PAGE_SIZE);
630 /* Send the page update requests down to the hypervisor. */
631 if ( xc_finish_mmu_updates(xc_handle, mmu) )
632 goto error_out;
634 free(mmu);
635 free(page_array);
637 *pvsi = vstartinfo_start;
638 *pvss = vstack_start;
639 *pvke = dsi.v_kernentry;
641 return 0;
643 error_out:
644 free(mmu);
645 free(page_array);
646 return -1;
647 }
648 #endif
650 int xc_linux_build(int xc_handle,
651 u32 domid,
652 const char *image_name,
653 const char *ramdisk_name,
654 const char *cmdline,
655 unsigned long flags,
656 unsigned int vcpus,
657 unsigned int store_evtchn,
658 unsigned long *store_mfn,
659 unsigned int console_evtchn,
660 unsigned long *console_mfn)
661 {
662 dom0_op_t launch_op, op;
663 int initrd_fd = -1;
664 gzFile initrd_gfd = NULL;
665 int rc, i;
666 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
667 unsigned long nr_pages;
668 char *image = NULL;
669 unsigned long image_size, initrd_size=0;
670 unsigned long vstartinfo_start, vkern_entry, vstack_start;
672 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
673 {
674 PERROR("Could not find total pages for domain");
675 goto error_out;
676 }
678 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
679 goto error_out;
681 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
682 {
683 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
684 {
685 PERROR("Could not open the initial ramdisk image");
686 goto error_out;
687 }
689 initrd_size = xc_get_filesz(initrd_fd);
691 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
692 {
693 PERROR("Could not allocate decompression state for initrd");
694 goto error_out;
695 }
696 }
698 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
699 {
700 PERROR("xc_linux_build: ctxt mlock failed");
701 return 1;
702 }
704 op.cmd = DOM0_GETDOMAININFO;
705 op.u.getdomaininfo.domain = (domid_t)domid;
706 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
707 ((u16)op.u.getdomaininfo.domain != domid) )
708 {
709 PERROR("Could not get info on domain");
710 goto error_out;
711 }
713 if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
714 {
715 PERROR("Could not get vcpu context");
716 goto error_out;
717 }
719 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
720 #ifdef __ia64__
721 0 )
722 #else
723 (ctxt->ctrlreg[3] != 0) )
724 #endif
725 {
726 ERROR("Domain is already constructed");
727 goto error_out;
728 }
730 if ( setup_guest(xc_handle, domid, image, image_size,
731 initrd_gfd, initrd_size, nr_pages,
732 &vstartinfo_start, &vkern_entry,
733 &vstack_start, ctxt, cmdline,
734 op.u.getdomaininfo.shared_info_frame,
735 flags, vcpus,
736 store_evtchn, store_mfn,
737 console_evtchn, console_mfn) < 0 )
738 {
739 ERROR("Error constructing guest OS");
740 goto error_out;
741 }
743 if ( initrd_fd >= 0 )
744 close(initrd_fd);
745 if ( initrd_gfd )
746 gzclose(initrd_gfd);
747 free(image);
749 #ifdef __ia64__
750 /* based on new_thread in xen/arch/ia64/domain.c */
751 ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
752 ctxt->regs.cr_iip = vkern_entry;
753 ctxt->regs.cr_ifs = 1UL << 63;
754 ctxt->regs.ar_fpsr = FPSR_DEFAULT;
755 /* ctxt->regs.r28 = dom_fw_setup(); currently done by hypervisor, should move here */
756 ctxt->vcpu.privregs = 0;
757 ctxt->shared.flags = flags;
758 i = 0; /* silence unused variable warning */
759 #else /* x86 */
760 /*
761 * Initial register values:
762 * DS,ES,FS,GS = FLAT_KERNEL_DS
763 * CS:EIP = FLAT_KERNEL_CS:start_pc
764 * SS:ESP = FLAT_KERNEL_DS:start_stack
765 * ESI = start_info
766 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
767 * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
768 */
769 ctxt->user_regs.ds = FLAT_KERNEL_DS;
770 ctxt->user_regs.es = FLAT_KERNEL_DS;
771 ctxt->user_regs.fs = FLAT_KERNEL_DS;
772 ctxt->user_regs.gs = FLAT_KERNEL_DS;
773 ctxt->user_regs.ss = FLAT_KERNEL_SS;
774 ctxt->user_regs.cs = FLAT_KERNEL_CS;
775 ctxt->user_regs.eip = vkern_entry;
776 ctxt->user_regs.esp = vstack_start + PAGE_SIZE;
777 ctxt->user_regs.esi = vstartinfo_start;
778 ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
780 /* FPU is set up to default initial state. */
781 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
783 /* Virtual IDT is empty at start-of-day. */
784 for ( i = 0; i < 256; i++ )
785 {
786 ctxt->trap_ctxt[i].vector = i;
787 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
788 }
790 /* No LDT. */
791 ctxt->ldt_ents = 0;
793 /* Use the default Xen-provided GDT. */
794 ctxt->gdt_ents = 0;
796 /* Ring 1 stack is the initial stack. */
797 ctxt->kernel_ss = FLAT_KERNEL_SS;
798 ctxt->kernel_sp = vstack_start + PAGE_SIZE;
800 /* No debugging. */
801 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
803 /* No callback handlers. */
804 #if defined(__i386__)
805 ctxt->event_callback_cs = FLAT_KERNEL_CS;
806 ctxt->event_callback_eip = 0;
807 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
808 ctxt->failsafe_callback_eip = 0;
809 #elif defined(__x86_64__)
810 ctxt->event_callback_eip = 0;
811 ctxt->failsafe_callback_eip = 0;
812 ctxt->syscall_callback_eip = 0;
813 #endif
814 #endif /* x86 */
816 memset( &launch_op, 0, sizeof(launch_op) );
818 launch_op.u.setdomaininfo.domain = (domid_t)domid;
819 launch_op.u.setdomaininfo.vcpu = 0;
820 launch_op.u.setdomaininfo.ctxt = ctxt;
822 launch_op.cmd = DOM0_SETDOMAININFO;
823 rc = xc_dom0_op(xc_handle, &launch_op);
825 return rc;
827 error_out:
828 if ( initrd_gfd != NULL )
829 gzclose(initrd_gfd);
830 else if ( initrd_fd >= 0 )
831 close(initrd_fd);
832 free(image);
834 return -1;
835 }