ia64/xen-unstable

view tools/libxc/xc_linux_build.c @ 6698:f0d728001aaa

More tools PAE fixes to avoid loosing high-order pte bits.
Signed-off-by: ian@xensource.com
author iap10@freefall.cl.cam.ac.uk
date Wed Sep 07 23:11:44 2005 +0000 (2005-09-07)
parents 7d0fb56b4a91
children 12ff9c954ace
line source
1 /******************************************************************************
2 * xc_linux_build.c
3 */
5 #include "xg_private.h"
6 #include <xenctrl.h>
8 #if defined(__i386__)
9 #define ELFSIZE 32
10 #endif
12 #if defined(__x86_64__) || defined(__ia64__)
13 #define ELFSIZE 64
14 #endif
17 #include "xc_elf.h"
18 #include "xc_aout9.h"
19 #include <stdlib.h>
20 #include <unistd.h>
21 #include <zlib.h>
23 #if defined(__i386__)
24 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
25 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
26 #define L3_PROT (_PAGE_PRESENT)
27 #endif
29 #if defined(__x86_64__)
30 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
31 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
32 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
33 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
34 #endif
37 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
38 #define round_pgdown(_p) ((_p)&PAGE_MASK)
40 #ifdef __ia64__
41 #define probe_aout9(image,image_size,load_funcs) 1
42 #endif
44 static int probeimageformat(char *image,
45 unsigned long image_size,
46 struct load_funcs *load_funcs)
47 {
48 if ( probe_elf(image, image_size, load_funcs) &&
49 probe_bin(image, image_size, load_funcs) &&
50 probe_aout9(image, image_size, load_funcs) )
51 {
52 ERROR( "Unrecognized image format" );
53 return -EINVAL;
54 }
56 return 0;
57 }
59 #define alloc_pt(ltab, vltab) \
60 ltab = (unsigned long long)(page_array[ppt_alloc++]) << PAGE_SHIFT; \
61 if (vltab != NULL) { \
62 munmap(vltab, PAGE_SIZE); \
63 } \
64 if ((vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, \
65 PROT_READ|PROT_WRITE, \
66 ltab >> PAGE_SHIFT)) == NULL) { \
67 goto error_out; \
68 } \
69 memset(vltab, 0, PAGE_SIZE);
71 #if defined(__i386__)
73 static int setup_pg_tables(int xc_handle, u32 dom,
74 vcpu_guest_context_t *ctxt,
75 unsigned long dsi_v_start,
76 unsigned long v_end,
77 unsigned long *page_array,
78 unsigned long vpt_start,
79 unsigned long vpt_end)
80 {
81 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
82 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
83 unsigned long l1tab = 0;
84 unsigned long l2tab = 0;
85 unsigned long ppt_alloc;
86 unsigned long count;
88 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
89 alloc_pt(l2tab, vl2tab);
90 vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
91 ctxt->ctrlreg[3] = l2tab;
93 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++ )
94 {
95 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
96 {
97 alloc_pt(l1tab, vl1tab);
98 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
99 *vl2e++ = l1tab | L2_PROT;
100 }
102 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
103 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
104 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
105 *vl1e &= ~_PAGE_RW;
106 vl1e++;
107 }
108 munmap(vl1tab, PAGE_SIZE);
109 munmap(vl2tab, PAGE_SIZE);
110 return 0;
112 error_out:
113 if (vl1tab)
114 munmap(vl1tab, PAGE_SIZE);
115 if (vl2tab)
116 munmap(vl2tab, PAGE_SIZE);
117 return -1;
118 }
120 static int setup_pg_tables_pae(int xc_handle, u32 dom,
121 vcpu_guest_context_t *ctxt,
122 unsigned long dsi_v_start,
123 unsigned long v_end,
124 unsigned long *page_array,
125 unsigned long vpt_start,
126 unsigned long vpt_end)
127 {
128 l1_pgentry_64_t *vl1tab=NULL, *vl1e=NULL;
129 l2_pgentry_64_t *vl2tab=NULL, *vl2e=NULL;
130 l3_pgentry_64_t *vl3tab=NULL, *vl3e=NULL;
131 unsigned long long l1tab = 0;
132 unsigned long long l2tab = 0;
133 unsigned long long l3tab = 0;
134 unsigned long ppt_alloc;
135 unsigned long count;
137 /* First allocate page for page dir. */
138 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
139 alloc_pt(l3tab, vl3tab);
140 vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
141 ctxt->ctrlreg[3] = l3tab;
143 if(l3tab>0xfffff000)
144 {
145 fprintf(stderr,"L3TAB = %llx above 4GB!\n",l3tab);
146 goto error_out;
147 }
149 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
150 {
151 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
152 {
153 alloc_pt(l1tab, vl1tab);
155 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
156 {
157 alloc_pt(l2tab, vl2tab);
158 vl2e = &vl2tab[l2_table_offset_pae(dsi_v_start + (count<<PAGE_SHIFT))];
159 *vl3e = l2tab | L3_PROT;
160 vl3e++;
161 }
162 vl1e = &vl1tab[l1_table_offset_pae(dsi_v_start + (count<<PAGE_SHIFT))];
163 *vl2e = l1tab | L2_PROT;
164 vl2e++;
165 }
167 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
168 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
169 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
170 {
171 *vl1e &= ~_PAGE_RW;
172 }
173 vl1e++;
174 }
176 munmap(vl1tab, PAGE_SIZE);
177 munmap(vl2tab, PAGE_SIZE);
178 munmap(vl3tab, PAGE_SIZE);
179 return 0;
181 error_out:
182 if (vl1tab)
183 munmap(vl1tab, PAGE_SIZE);
184 if (vl2tab)
185 munmap(vl2tab, PAGE_SIZE);
186 if (vl3tab)
187 munmap(vl3tab, PAGE_SIZE);
188 return -1;
189 }
191 #endif
193 #if defined(__x86_64__)
195 static int setup_pg_tables_64(int xc_handle, u32 dom,
196 vcpu_guest_context_t *ctxt,
197 unsigned long dsi_v_start,
198 unsigned long v_end,
199 unsigned long *page_array,
200 unsigned long vpt_start,
201 unsigned long vpt_end)
202 {
203 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
204 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
205 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
206 l4_pgentry_t *vl4tab=NULL, *vl4e=NULL;
207 unsigned long l2tab = 0;
208 unsigned long l1tab = 0;
209 unsigned long l3tab = 0;
210 unsigned long l4tab = 0;
211 unsigned long ppt_alloc;
212 unsigned long count;
214 /* First allocate page for page dir. */
215 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
216 alloc_pt(l4tab, vl4tab);
217 vl4e = &vl4tab[l4_table_offset(dsi_v_start)];
218 ctxt->ctrlreg[3] = l4tab;
220 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
221 {
222 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
223 {
224 alloc_pt(l1tab, vl1tab);
226 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
227 {
228 alloc_pt(l2tab, vl2tab);
229 if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
230 {
231 alloc_pt(l3tab, vl3tab);
232 vl3e = &vl3tab[l3_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
233 *vl4e = l3tab | L4_PROT;
234 vl4e++;
235 }
236 vl2e = &vl2tab[l2_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
237 *vl3e = l2tab | L3_PROT;
238 vl3e++;
239 }
240 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
241 *vl2e = l1tab | L2_PROT;
242 vl2e++;
243 }
245 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
246 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
247 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
248 {
249 *vl1e &= ~_PAGE_RW;
250 }
251 vl1e++;
252 }
254 munmap(vl1tab, PAGE_SIZE);
255 munmap(vl2tab, PAGE_SIZE);
256 munmap(vl3tab, PAGE_SIZE);
257 munmap(vl4tab, PAGE_SIZE);
258 return 0;
260 error_out:
261 if (vl1tab)
262 munmap(vl1tab, PAGE_SIZE);
263 if (vl2tab)
264 munmap(vl2tab, PAGE_SIZE);
265 if (vl3tab)
266 munmap(vl3tab, PAGE_SIZE);
267 if (vl4tab)
268 munmap(vl4tab, PAGE_SIZE);
269 return -1;
270 }
271 #endif
273 #ifdef __ia64__
274 #include <asm/fpu.h> /* for FPSR_DEFAULT */
275 static int setup_guest(int xc_handle,
276 u32 dom,
277 char *image, unsigned long image_size,
278 gzFile initrd_gfd, unsigned long initrd_len,
279 unsigned long nr_pages,
280 unsigned long *pvsi, unsigned long *pvke,
281 unsigned long *pvss, vcpu_guest_context_t *ctxt,
282 const char *cmdline,
283 unsigned long shared_info_frame,
284 unsigned long flags,
285 unsigned int vcpus,
286 unsigned int store_evtchn, unsigned long *store_mfn)
287 {
288 unsigned long *page_array = NULL;
289 struct load_funcs load_funcs;
290 struct domain_setup_info dsi;
291 unsigned long start_page;
292 int rc;
294 rc = probeimageformat(image, image_size, &load_funcs);
295 if ( rc != 0 )
296 goto error_out;
298 memset(&dsi, 0, sizeof(struct domain_setup_info));
300 rc = (load_funcs.parseimage)(image, image_size, &dsi);
301 if ( rc != 0 )
302 goto error_out;
304 dsi.v_start = round_pgdown(dsi.v_start);
305 dsi.v_end = round_pgup(dsi.v_end);
307 start_page = dsi.v_start >> PAGE_SHIFT;
308 nr_pages = (dsi.v_end - dsi.v_start) >> PAGE_SHIFT;
309 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
310 {
311 PERROR("Could not allocate memory");
312 goto error_out;
313 }
315 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, start_page, nr_pages) != nr_pages )
316 {
317 PERROR("Could not get the page frame list");
318 goto error_out;
319 }
321 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
322 &dsi);
324 *pvke = dsi.v_kernentry;
325 return 0;
327 error_out:
328 free(page_array);
329 return -1;
330 }
331 #else /* x86 */
332 static int setup_guest(int xc_handle,
333 u32 dom,
334 char *image, unsigned long image_size,
335 gzFile initrd_gfd, unsigned long initrd_len,
336 unsigned long nr_pages,
337 unsigned long *pvsi, unsigned long *pvke,
338 unsigned long *pvss, vcpu_guest_context_t *ctxt,
339 const char *cmdline,
340 unsigned long shared_info_frame,
341 unsigned long flags,
342 unsigned int vcpus,
343 unsigned int store_evtchn, unsigned long *store_mfn,
344 unsigned int console_evtchn, unsigned long *console_mfn)
345 {
346 unsigned long *page_array = NULL;
347 unsigned long count, i;
348 start_info_t *start_info;
349 shared_info_t *shared_info;
350 xc_mmu_t *mmu = NULL;
351 int rc;
353 unsigned long nr_pt_pages;
354 unsigned long physmap_pfn;
355 unsigned long *physmap, *physmap_e;
357 struct load_funcs load_funcs;
358 struct domain_setup_info dsi;
359 unsigned long vinitrd_start;
360 unsigned long vinitrd_end;
361 unsigned long vphysmap_start;
362 unsigned long vphysmap_end;
363 unsigned long vstartinfo_start;
364 unsigned long vstartinfo_end;
365 unsigned long vstoreinfo_start;
366 unsigned long vstoreinfo_end;
367 unsigned long vconsole_start;
368 unsigned long vconsole_end;
369 unsigned long vstack_start;
370 unsigned long vstack_end;
371 unsigned long vpt_start;
372 unsigned long vpt_end;
373 unsigned long v_end;
375 rc = probeimageformat(image, image_size, &load_funcs);
376 if ( rc != 0 )
377 goto error_out;
379 memset(&dsi, 0, sizeof(struct domain_setup_info));
381 rc = (load_funcs.parseimage)(image, image_size, &dsi);
382 if ( rc != 0 )
383 goto error_out;
385 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
386 {
387 PERROR("Guest OS must load to a page boundary.\n");
388 goto error_out;
389 }
391 /*
392 * Why do we need this? The number of page-table frames depends on the
393 * size of the bootstrap address space. But the size of the address space
394 * depends on the number of page-table frames (since each one is mapped
395 * read-only). We have a pair of simultaneous equations in two unknowns,
396 * which we solve by exhaustive search.
397 */
398 vinitrd_start = round_pgup(dsi.v_end);
399 vinitrd_end = vinitrd_start + initrd_len;
400 vphysmap_start = round_pgup(vinitrd_end);
401 vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
402 vstartinfo_start = round_pgup(vphysmap_end);
403 vstartinfo_end = vstartinfo_start + PAGE_SIZE;
404 vstoreinfo_start = vstartinfo_end;
405 vstoreinfo_end = vstoreinfo_start + PAGE_SIZE;
406 vconsole_start = vstoreinfo_end;
407 vconsole_end = vconsole_start + PAGE_SIZE;
408 vpt_start = vconsole_end;
410 for ( nr_pt_pages = 2; ; nr_pt_pages++ )
411 {
412 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
413 vstack_start = vpt_end;
414 vstack_end = vstack_start + PAGE_SIZE;
415 v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
416 if ( (v_end - vstack_end) < (512UL << 10) )
417 v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
418 #if defined(__i386__)
419 if (dsi.pae_kernel) {
420 /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
421 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >>
422 L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
423 break;
424 } else {
425 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
426 L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
427 break;
428 }
429 #endif
430 #if defined(__x86_64__)
431 #define NR(_l,_h,_s) \
432 (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
433 ((_l) & ~((1UL<<(_s))-1))) >> (_s))
434 if ( (1 + /* # L4 */
435 NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
436 NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
437 NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
438 <= nr_pt_pages )
439 break;
440 #endif
441 }
443 #define _p(a) ((void *) (a))
445 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
446 " Loaded kernel: %p->%p\n"
447 " Init. ramdisk: %p->%p\n"
448 " Phys-Mach map: %p->%p\n"
449 " Start info: %p->%p\n"
450 " Store page: %p->%p\n"
451 " Console page: %p->%p\n"
452 " Page tables: %p->%p\n"
453 " Boot stack: %p->%p\n"
454 " TOTAL: %p->%p\n",
455 _p(dsi.v_kernstart), _p(dsi.v_kernend),
456 _p(vinitrd_start), _p(vinitrd_end),
457 _p(vphysmap_start), _p(vphysmap_end),
458 _p(vstartinfo_start), _p(vstartinfo_end),
459 _p(vstoreinfo_start), _p(vstoreinfo_end),
460 _p(vconsole_start), _p(vconsole_end),
461 _p(vpt_start), _p(vpt_end),
462 _p(vstack_start), _p(vstack_end),
463 _p(dsi.v_start), _p(v_end));
464 printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
466 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
467 {
468 printf("Initial guest OS requires too much space\n"
469 "(%luMB is greater than %luMB limit)\n",
470 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
471 goto error_out;
472 }
474 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
475 {
476 PERROR("Could not allocate memory");
477 goto error_out;
478 }
480 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
481 {
482 PERROR("Could not get the page frame list");
483 goto error_out;
484 }
486 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
487 &dsi);
489 /* Load the initial ramdisk image. */
490 if ( initrd_len != 0 )
491 {
492 for ( i = (vinitrd_start - dsi.v_start);
493 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
494 {
495 char page[PAGE_SIZE];
496 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
497 {
498 PERROR("Error reading initrd image, could not");
499 goto error_out;
500 }
501 xc_copy_to_domain_page(xc_handle, dom,
502 page_array[i>>PAGE_SHIFT], page);
503 }
504 }
506 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
507 goto error_out;
509 /* setup page tables */
510 #if defined(__i386__)
511 if (dsi.pae_kernel)
512 rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
513 dsi.v_start, v_end,
514 page_array, vpt_start, vpt_end);
515 else {
516 rc = setup_pg_tables(xc_handle, dom, ctxt,
517 dsi.v_start, v_end,
518 page_array, vpt_start, vpt_end);
519 }
520 #endif
521 #if defined(__x86_64__)
522 rc = setup_pg_tables_64(xc_handle, dom, ctxt,
523 dsi.v_start, v_end,
524 page_array, vpt_start, vpt_end);
525 #endif
526 if (0 != rc)
527 goto error_out;
529 /* Write the phys->machine and machine->phys table entries. */
530 physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
531 physmap = physmap_e = xc_map_foreign_range(
532 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
533 page_array[physmap_pfn++]);
535 for ( count = 0; count < nr_pages; count++ )
536 {
537 if ( xc_add_mmu_update(xc_handle, mmu,
538 ((unsigned long long)page_array[count] << PAGE_SHIFT) |
539 MMU_MACHPHYS_UPDATE, count) )
540 {
541 fprintf(stderr,"m2p update failure p=%lx m=%lx\n",count,page_array[count] );
542 munmap(physmap, PAGE_SIZE);
543 goto error_out;
544 }
545 *physmap_e++ = page_array[count];
546 if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 )
547 {
548 munmap(physmap, PAGE_SIZE);
549 physmap = physmap_e = xc_map_foreign_range(
550 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
551 page_array[physmap_pfn++]);
552 }
553 }
554 munmap(physmap, PAGE_SIZE);
556 #if defined(__i386__)
557 /*
558 * Pin down l2tab addr as page dir page - causes hypervisor to provide
559 * correct protection for the page
560 */
561 if (dsi.pae_kernel) {
562 if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
563 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
564 goto error_out;
565 } else {
566 if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
567 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
568 goto error_out;
569 }
570 #endif
572 #if defined(__x86_64__)
573 /*
574 * Pin down l4tab addr as page dir page - causes hypervisor to provide
575 * correct protection for the page
576 */
577 if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
578 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
579 goto error_out;
580 #endif
582 *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT];
583 *console_mfn = page_array[(vconsole_start-dsi.v_start) >> PAGE_SHIFT];
586 start_info = xc_map_foreign_range(
587 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
588 page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]);
589 memset(start_info, 0, sizeof(*start_info));
590 start_info->nr_pages = nr_pages;
591 start_info->shared_info = shared_info_frame << PAGE_SHIFT;
592 start_info->flags = flags;
593 start_info->pt_base = vpt_start;
594 start_info->nr_pt_frames = nr_pt_pages;
595 start_info->mfn_list = vphysmap_start;
596 start_info->store_mfn = *store_mfn;
597 start_info->store_evtchn = store_evtchn;
598 start_info->console_mfn = *console_mfn;
599 start_info->console_evtchn = console_evtchn;
600 if ( initrd_len != 0 )
601 {
602 start_info->mod_start = vinitrd_start;
603 start_info->mod_len = initrd_len;
604 }
605 strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
606 start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
607 munmap(start_info, PAGE_SIZE);
609 /* shared_info page starts its life empty. */
610 shared_info = xc_map_foreign_range(
611 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
612 memset(shared_info, 0, sizeof(shared_info_t));
613 /* Mask all upcalls... */
614 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
615 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
617 shared_info->n_vcpu = vcpus;
618 printf(" VCPUS: %d\n", shared_info->n_vcpu);
620 munmap(shared_info, PAGE_SIZE);
622 /* Send the page update requests down to the hypervisor. */
623 if ( xc_finish_mmu_updates(xc_handle, mmu) )
624 goto error_out;
626 free(mmu);
627 free(page_array);
629 *pvsi = vstartinfo_start;
630 *pvss = vstack_start;
631 *pvke = dsi.v_kernentry;
633 return 0;
635 error_out:
636 free(mmu);
637 free(page_array);
638 return -1;
639 }
640 #endif
642 int xc_linux_build(int xc_handle,
643 u32 domid,
644 const char *image_name,
645 const char *ramdisk_name,
646 const char *cmdline,
647 unsigned long flags,
648 unsigned int vcpus,
649 unsigned int store_evtchn,
650 unsigned long *store_mfn,
651 unsigned int console_evtchn,
652 unsigned long *console_mfn)
653 {
654 dom0_op_t launch_op, op;
655 int initrd_fd = -1;
656 gzFile initrd_gfd = NULL;
657 int rc, i;
658 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
659 unsigned long nr_pages;
660 char *image = NULL;
661 unsigned long image_size, initrd_size=0;
662 unsigned long vstartinfo_start, vkern_entry, vstack_start;
664 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
665 {
666 PERROR("Could not find total pages for domain");
667 goto error_out;
668 }
670 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
671 goto error_out;
673 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
674 {
675 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
676 {
677 PERROR("Could not open the initial ramdisk image");
678 goto error_out;
679 }
681 initrd_size = xc_get_filesz(initrd_fd);
683 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
684 {
685 PERROR("Could not allocate decompression state for initrd");
686 goto error_out;
687 }
688 }
690 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
691 {
692 PERROR("xc_linux_build: ctxt mlock failed");
693 return 1;
694 }
696 op.cmd = DOM0_GETDOMAININFO;
697 op.u.getdomaininfo.domain = (domid_t)domid;
698 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
699 ((u16)op.u.getdomaininfo.domain != domid) )
700 {
701 PERROR("Could not get info on domain");
702 goto error_out;
703 }
705 if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
706 {
707 PERROR("Could not get vcpu context");
708 goto error_out;
709 }
711 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
712 #ifdef __ia64__
713 0 )
714 #else
715 (ctxt->ctrlreg[3] != 0) )
716 #endif
717 {
718 ERROR("Domain is already constructed");
719 goto error_out;
720 }
722 if ( setup_guest(xc_handle, domid, image, image_size,
723 initrd_gfd, initrd_size, nr_pages,
724 &vstartinfo_start, &vkern_entry,
725 &vstack_start, ctxt, cmdline,
726 op.u.getdomaininfo.shared_info_frame,
727 flags, vcpus,
728 store_evtchn, store_mfn,
729 console_evtchn, console_mfn) < 0 )
730 {
731 ERROR("Error constructing guest OS");
732 goto error_out;
733 }
735 if ( initrd_fd >= 0 )
736 close(initrd_fd);
737 if ( initrd_gfd )
738 gzclose(initrd_gfd);
739 free(image);
741 #ifdef __ia64__
742 /* based on new_thread in xen/arch/ia64/domain.c */
743 ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
744 ctxt->regs.cr_iip = vkern_entry;
745 ctxt->regs.cr_ifs = 1UL << 63;
746 ctxt->regs.ar_fpsr = FPSR_DEFAULT;
747 /* ctxt->regs.r28 = dom_fw_setup(); currently done by hypervisor, should move here */
748 ctxt->vcpu.privregs = 0;
749 ctxt->shared.flags = flags;
750 i = 0; /* silence unused variable warning */
751 #else /* x86 */
752 /*
753 * Initial register values:
754 * DS,ES,FS,GS = FLAT_KERNEL_DS
755 * CS:EIP = FLAT_KERNEL_CS:start_pc
756 * SS:ESP = FLAT_KERNEL_DS:start_stack
757 * ESI = start_info
758 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
759 * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
760 */
761 ctxt->user_regs.ds = FLAT_KERNEL_DS;
762 ctxt->user_regs.es = FLAT_KERNEL_DS;
763 ctxt->user_regs.fs = FLAT_KERNEL_DS;
764 ctxt->user_regs.gs = FLAT_KERNEL_DS;
765 ctxt->user_regs.ss = FLAT_KERNEL_SS;
766 ctxt->user_regs.cs = FLAT_KERNEL_CS;
767 ctxt->user_regs.eip = vkern_entry;
768 ctxt->user_regs.esp = vstack_start + PAGE_SIZE;
769 ctxt->user_regs.esi = vstartinfo_start;
770 ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
772 /* FPU is set up to default initial state. */
773 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
775 /* Virtual IDT is empty at start-of-day. */
776 for ( i = 0; i < 256; i++ )
777 {
778 ctxt->trap_ctxt[i].vector = i;
779 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
780 }
782 /* No LDT. */
783 ctxt->ldt_ents = 0;
785 /* Use the default Xen-provided GDT. */
786 ctxt->gdt_ents = 0;
788 /* Ring 1 stack is the initial stack. */
789 ctxt->kernel_ss = FLAT_KERNEL_SS;
790 ctxt->kernel_sp = vstack_start + PAGE_SIZE;
792 /* No debugging. */
793 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
795 /* No callback handlers. */
796 #if defined(__i386__)
797 ctxt->event_callback_cs = FLAT_KERNEL_CS;
798 ctxt->event_callback_eip = 0;
799 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
800 ctxt->failsafe_callback_eip = 0;
801 #elif defined(__x86_64__)
802 ctxt->event_callback_eip = 0;
803 ctxt->failsafe_callback_eip = 0;
804 ctxt->syscall_callback_eip = 0;
805 #endif
806 #endif /* x86 */
808 memset( &launch_op, 0, sizeof(launch_op) );
810 launch_op.u.setdomaininfo.domain = (domid_t)domid;
811 launch_op.u.setdomaininfo.vcpu = 0;
812 launch_op.u.setdomaininfo.ctxt = ctxt;
814 launch_op.cmd = DOM0_SETDOMAININFO;
815 rc = xc_dom0_op(xc_handle, &launch_op);
817 return rc;
819 error_out:
820 if ( initrd_gfd != NULL )
821 gzclose(initrd_gfd);
822 else if ( initrd_fd >= 0 )
823 close(initrd_fd);
824 free(image);
826 return -1;
827 }