ia64/xen-unstable

view tools/libxc/xc_linux_build.c @ 6711:4cdf880c9463

Bug fix on x86_64 increase_reservation
Signed-off-by: ian@xensource.com
author iap10@firebug.cl.cam.ac.uk
date Thu Sep 08 20:39:58 2005 +0000 (2005-09-08)
parents c7999d49ecf7
children 2704a88c3295 cdfa7dd00c44
line source
1 /******************************************************************************
2 * xc_linux_build.c
3 */
5 #include "xg_private.h"
6 #include <xenctrl.h>
8 #if defined(__i386__)
9 #define ELFSIZE 32
10 #endif
12 #if defined(__x86_64__) || defined(__ia64__)
13 #define ELFSIZE 64
14 #endif
17 #include "xc_elf.h"
18 #include "xc_aout9.h"
19 #include <stdlib.h>
20 #include <unistd.h>
21 #include <zlib.h>
23 #if defined(__i386__)
24 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
25 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
26 #define L3_PROT (_PAGE_PRESENT)
27 #endif
29 #if defined(__x86_64__)
30 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
31 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
32 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
33 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
34 #endif
37 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
38 #define round_pgdown(_p) ((_p)&PAGE_MASK)
40 #ifdef __ia64__
41 #define probe_aout9(image,image_size,load_funcs) 1
42 #endif
44 static int probeimageformat(char *image,
45 unsigned long image_size,
46 struct load_funcs *load_funcs)
47 {
48 if ( probe_elf(image, image_size, load_funcs) &&
49 probe_bin(image, image_size, load_funcs) &&
50 probe_aout9(image, image_size, load_funcs) )
51 {
52 ERROR( "Unrecognized image format" );
53 return -EINVAL;
54 }
56 return 0;
57 }
59 #define alloc_pt(ltab, vltab) \
60 ltab = (unsigned long long)(page_array[ppt_alloc++]) << PAGE_SHIFT; \
61 if (vltab != NULL) { \
62 munmap(vltab, PAGE_SIZE); \
63 } \
64 if ((vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, \
65 PROT_READ|PROT_WRITE, \
66 ltab >> PAGE_SHIFT)) == NULL) { \
67 goto error_out; \
68 } \
69 memset(vltab, 0, PAGE_SIZE);
71 #if defined(__i386__)
73 static int setup_pg_tables(int xc_handle, u32 dom,
74 vcpu_guest_context_t *ctxt,
75 unsigned long dsi_v_start,
76 unsigned long v_end,
77 unsigned long *page_array,
78 unsigned long vpt_start,
79 unsigned long vpt_end)
80 {
81 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
82 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
83 unsigned long l1tab = 0;
84 unsigned long l2tab = 0;
85 unsigned long ppt_alloc;
86 unsigned long count;
88 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
89 alloc_pt(l2tab, vl2tab);
90 vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
91 ctxt->ctrlreg[3] = l2tab;
93 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++ )
94 {
95 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
96 {
97 alloc_pt(l1tab, vl1tab);
98 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
99 *vl2e++ = l1tab | L2_PROT;
100 }
102 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
103 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
104 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
105 *vl1e &= ~_PAGE_RW;
106 vl1e++;
107 }
108 munmap(vl1tab, PAGE_SIZE);
109 munmap(vl2tab, PAGE_SIZE);
110 return 0;
112 error_out:
113 if (vl1tab)
114 munmap(vl1tab, PAGE_SIZE);
115 if (vl2tab)
116 munmap(vl2tab, PAGE_SIZE);
117 return -1;
118 }
120 static int setup_pg_tables_pae(int xc_handle, u32 dom,
121 vcpu_guest_context_t *ctxt,
122 unsigned long dsi_v_start,
123 unsigned long v_end,
124 unsigned long *page_array,
125 unsigned long vpt_start,
126 unsigned long vpt_end)
127 {
128 l1_pgentry_64_t *vl1tab=NULL, *vl1e=NULL;
129 l2_pgentry_64_t *vl2tab=NULL, *vl2e=NULL;
130 l3_pgentry_64_t *vl3tab=NULL, *vl3e=NULL;
131 unsigned long long l1tab = 0;
132 unsigned long long l2tab = 0;
133 unsigned long long l3tab = 0;
134 unsigned long ppt_alloc;
135 unsigned long count;
137 /* First allocate page for page dir. */
138 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
140 if ( page_array[ppt_alloc] > 0xfffff )
141 {
142 unsigned long nmfn;
143 nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
144 if ( nmfn == 0 )
145 {
146 fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
147 goto error_out;
148 }
149 page_array[ppt_alloc] = nmfn;
150 }
152 alloc_pt(l3tab, vl3tab);
153 vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
154 ctxt->ctrlreg[3] = l3tab;
156 if(l3tab>0xfffff000ULL)
157 {
158 fprintf(stderr,"L3TAB = %llx above 4GB!\n",l3tab);
159 goto error_out;
160 }
162 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
163 {
164 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
165 {
166 alloc_pt(l1tab, vl1tab);
168 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
169 {
170 alloc_pt(l2tab, vl2tab);
171 vl2e = &vl2tab[l2_table_offset_pae(dsi_v_start + (count<<PAGE_SHIFT))];
172 *vl3e = l2tab | L3_PROT;
173 vl3e++;
174 }
175 vl1e = &vl1tab[l1_table_offset_pae(dsi_v_start + (count<<PAGE_SHIFT))];
176 *vl2e = l1tab | L2_PROT;
177 vl2e++;
178 }
180 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
181 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
182 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
183 {
184 *vl1e &= ~_PAGE_RW;
185 }
186 vl1e++;
187 }
189 munmap(vl1tab, PAGE_SIZE);
190 munmap(vl2tab, PAGE_SIZE);
191 munmap(vl3tab, PAGE_SIZE);
192 return 0;
194 error_out:
195 if (vl1tab)
196 munmap(vl1tab, PAGE_SIZE);
197 if (vl2tab)
198 munmap(vl2tab, PAGE_SIZE);
199 if (vl3tab)
200 munmap(vl3tab, PAGE_SIZE);
201 return -1;
202 }
204 #endif
206 #if defined(__x86_64__)
208 static int setup_pg_tables_64(int xc_handle, u32 dom,
209 vcpu_guest_context_t *ctxt,
210 unsigned long dsi_v_start,
211 unsigned long v_end,
212 unsigned long *page_array,
213 unsigned long vpt_start,
214 unsigned long vpt_end)
215 {
216 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
217 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
218 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
219 l4_pgentry_t *vl4tab=NULL, *vl4e=NULL;
220 unsigned long l2tab = 0;
221 unsigned long l1tab = 0;
222 unsigned long l3tab = 0;
223 unsigned long l4tab = 0;
224 unsigned long ppt_alloc;
225 unsigned long count;
227 /* First allocate page for page dir. */
228 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
229 alloc_pt(l4tab, vl4tab);
230 vl4e = &vl4tab[l4_table_offset(dsi_v_start)];
231 ctxt->ctrlreg[3] = l4tab;
233 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
234 {
235 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
236 {
237 alloc_pt(l1tab, vl1tab);
239 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
240 {
241 alloc_pt(l2tab, vl2tab);
242 if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
243 {
244 alloc_pt(l3tab, vl3tab);
245 vl3e = &vl3tab[l3_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
246 *vl4e = l3tab | L4_PROT;
247 vl4e++;
248 }
249 vl2e = &vl2tab[l2_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
250 *vl3e = l2tab | L3_PROT;
251 vl3e++;
252 }
253 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
254 *vl2e = l1tab | L2_PROT;
255 vl2e++;
256 }
258 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
259 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
260 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
261 {
262 *vl1e &= ~_PAGE_RW;
263 }
264 vl1e++;
265 }
267 munmap(vl1tab, PAGE_SIZE);
268 munmap(vl2tab, PAGE_SIZE);
269 munmap(vl3tab, PAGE_SIZE);
270 munmap(vl4tab, PAGE_SIZE);
271 return 0;
273 error_out:
274 if (vl1tab)
275 munmap(vl1tab, PAGE_SIZE);
276 if (vl2tab)
277 munmap(vl2tab, PAGE_SIZE);
278 if (vl3tab)
279 munmap(vl3tab, PAGE_SIZE);
280 if (vl4tab)
281 munmap(vl4tab, PAGE_SIZE);
282 return -1;
283 }
284 #endif
286 #ifdef __ia64__
287 #include <asm/fpu.h> /* for FPSR_DEFAULT */
288 static int setup_guest(int xc_handle,
289 u32 dom,
290 char *image, unsigned long image_size,
291 gzFile initrd_gfd, unsigned long initrd_len,
292 unsigned long nr_pages,
293 unsigned long *pvsi, unsigned long *pvke,
294 unsigned long *pvss, vcpu_guest_context_t *ctxt,
295 const char *cmdline,
296 unsigned long shared_info_frame,
297 unsigned long flags,
298 unsigned int vcpus,
299 unsigned int store_evtchn, unsigned long *store_mfn)
300 {
301 unsigned long *page_array = NULL;
302 struct load_funcs load_funcs;
303 struct domain_setup_info dsi;
304 unsigned long start_page;
305 int rc;
307 rc = probeimageformat(image, image_size, &load_funcs);
308 if ( rc != 0 )
309 goto error_out;
311 memset(&dsi, 0, sizeof(struct domain_setup_info));
313 rc = (load_funcs.parseimage)(image, image_size, &dsi);
314 if ( rc != 0 )
315 goto error_out;
317 dsi.v_start = round_pgdown(dsi.v_start);
318 dsi.v_end = round_pgup(dsi.v_end);
320 start_page = dsi.v_start >> PAGE_SHIFT;
321 nr_pages = (dsi.v_end - dsi.v_start) >> PAGE_SHIFT;
322 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
323 {
324 PERROR("Could not allocate memory");
325 goto error_out;
326 }
328 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, start_page, nr_pages) != nr_pages )
329 {
330 PERROR("Could not get the page frame list");
331 goto error_out;
332 }
334 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
335 &dsi);
337 *pvke = dsi.v_kernentry;
338 return 0;
340 error_out:
341 free(page_array);
342 return -1;
343 }
344 #else /* x86 */
345 static int setup_guest(int xc_handle,
346 u32 dom,
347 char *image, unsigned long image_size,
348 gzFile initrd_gfd, unsigned long initrd_len,
349 unsigned long nr_pages,
350 unsigned long *pvsi, unsigned long *pvke,
351 unsigned long *pvss, vcpu_guest_context_t *ctxt,
352 const char *cmdline,
353 unsigned long shared_info_frame,
354 unsigned long flags,
355 unsigned int vcpus,
356 unsigned int store_evtchn, unsigned long *store_mfn,
357 unsigned int console_evtchn, unsigned long *console_mfn)
358 {
359 unsigned long *page_array = NULL;
360 unsigned long count, i;
361 start_info_t *start_info;
362 shared_info_t *shared_info;
363 xc_mmu_t *mmu = NULL;
364 int rc;
366 unsigned long nr_pt_pages;
367 unsigned long physmap_pfn;
368 unsigned long *physmap, *physmap_e;
370 struct load_funcs load_funcs;
371 struct domain_setup_info dsi;
372 unsigned long vinitrd_start;
373 unsigned long vinitrd_end;
374 unsigned long vphysmap_start;
375 unsigned long vphysmap_end;
376 unsigned long vstartinfo_start;
377 unsigned long vstartinfo_end;
378 unsigned long vstoreinfo_start;
379 unsigned long vstoreinfo_end;
380 unsigned long vconsole_start;
381 unsigned long vconsole_end;
382 unsigned long vstack_start;
383 unsigned long vstack_end;
384 unsigned long vpt_start;
385 unsigned long vpt_end;
386 unsigned long v_end;
388 rc = probeimageformat(image, image_size, &load_funcs);
389 if ( rc != 0 )
390 goto error_out;
392 memset(&dsi, 0, sizeof(struct domain_setup_info));
394 rc = (load_funcs.parseimage)(image, image_size, &dsi);
395 if ( rc != 0 )
396 goto error_out;
398 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
399 {
400 PERROR("Guest OS must load to a page boundary.\n");
401 goto error_out;
402 }
404 /*
405 * Why do we need this? The number of page-table frames depends on the
406 * size of the bootstrap address space. But the size of the address space
407 * depends on the number of page-table frames (since each one is mapped
408 * read-only). We have a pair of simultaneous equations in two unknowns,
409 * which we solve by exhaustive search.
410 */
411 vinitrd_start = round_pgup(dsi.v_end);
412 vinitrd_end = vinitrd_start + initrd_len;
413 vphysmap_start = round_pgup(vinitrd_end);
414 vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
415 vstartinfo_start = round_pgup(vphysmap_end);
416 vstartinfo_end = vstartinfo_start + PAGE_SIZE;
417 vstoreinfo_start = vstartinfo_end;
418 vstoreinfo_end = vstoreinfo_start + PAGE_SIZE;
419 vconsole_start = vstoreinfo_end;
420 vconsole_end = vconsole_start + PAGE_SIZE;
421 vpt_start = vconsole_end;
423 for ( nr_pt_pages = 2; ; nr_pt_pages++ )
424 {
425 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
426 vstack_start = vpt_end;
427 vstack_end = vstack_start + PAGE_SIZE;
428 v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
429 if ( (v_end - vstack_end) < (512UL << 10) )
430 v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
431 #if defined(__i386__)
432 if (dsi.pae_kernel) {
433 /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
434 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >>
435 L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
436 break;
437 } else {
438 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
439 L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
440 break;
441 }
442 #endif
443 #if defined(__x86_64__)
444 #define NR(_l,_h,_s) \
445 (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
446 ((_l) & ~((1UL<<(_s))-1))) >> (_s))
447 if ( (1 + /* # L4 */
448 NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
449 NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
450 NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
451 <= nr_pt_pages )
452 break;
453 #endif
454 }
456 #define _p(a) ((void *) (a))
458 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
459 " Loaded kernel: %p->%p\n"
460 " Init. ramdisk: %p->%p\n"
461 " Phys-Mach map: %p->%p\n"
462 " Start info: %p->%p\n"
463 " Store page: %p->%p\n"
464 " Console page: %p->%p\n"
465 " Page tables: %p->%p\n"
466 " Boot stack: %p->%p\n"
467 " TOTAL: %p->%p\n",
468 _p(dsi.v_kernstart), _p(dsi.v_kernend),
469 _p(vinitrd_start), _p(vinitrd_end),
470 _p(vphysmap_start), _p(vphysmap_end),
471 _p(vstartinfo_start), _p(vstartinfo_end),
472 _p(vstoreinfo_start), _p(vstoreinfo_end),
473 _p(vconsole_start), _p(vconsole_end),
474 _p(vpt_start), _p(vpt_end),
475 _p(vstack_start), _p(vstack_end),
476 _p(dsi.v_start), _p(v_end));
477 printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
479 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
480 {
481 printf("Initial guest OS requires too much space\n"
482 "(%luMB is greater than %luMB limit)\n",
483 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
484 goto error_out;
485 }
487 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
488 {
489 PERROR("Could not allocate memory");
490 goto error_out;
491 }
493 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
494 {
495 PERROR("Could not get the page frame list");
496 goto error_out;
497 }
499 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
500 &dsi);
502 /* Load the initial ramdisk image. */
503 if ( initrd_len != 0 )
504 {
505 for ( i = (vinitrd_start - dsi.v_start);
506 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
507 {
508 char page[PAGE_SIZE];
509 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
510 {
511 PERROR("Error reading initrd image, could not");
512 goto error_out;
513 }
514 xc_copy_to_domain_page(xc_handle, dom,
515 page_array[i>>PAGE_SHIFT], page);
516 }
517 }
519 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
520 goto error_out;
522 /* setup page tables */
523 #if defined(__i386__)
524 if (dsi.pae_kernel)
525 rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
526 dsi.v_start, v_end,
527 page_array, vpt_start, vpt_end);
528 else {
529 rc = setup_pg_tables(xc_handle, dom, ctxt,
530 dsi.v_start, v_end,
531 page_array, vpt_start, vpt_end);
532 }
533 #endif
534 #if defined(__x86_64__)
535 rc = setup_pg_tables_64(xc_handle, dom, ctxt,
536 dsi.v_start, v_end,
537 page_array, vpt_start, vpt_end);
538 #endif
539 if (0 != rc)
540 goto error_out;
542 /* Write the phys->machine and machine->phys table entries. */
543 physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
544 physmap = physmap_e = xc_map_foreign_range(
545 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
546 page_array[physmap_pfn++]);
548 for ( count = 0; count < nr_pages; count++ )
549 {
550 if ( xc_add_mmu_update(xc_handle, mmu,
551 ((unsigned long long)page_array[count] << PAGE_SHIFT) |
552 MMU_MACHPHYS_UPDATE, count) )
553 {
554 fprintf(stderr,"m2p update failure p=%lx m=%lx\n",count,page_array[count] );
555 munmap(physmap, PAGE_SIZE);
556 goto error_out;
557 }
558 *physmap_e++ = page_array[count];
559 if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 )
560 {
561 munmap(physmap, PAGE_SIZE);
562 physmap = physmap_e = xc_map_foreign_range(
563 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
564 page_array[physmap_pfn++]);
565 }
566 }
567 munmap(physmap, PAGE_SIZE);
569 #if defined(__i386__)
570 /*
571 * Pin down l2tab addr as page dir page - causes hypervisor to provide
572 * correct protection for the page
573 */
574 if (dsi.pae_kernel) {
575 if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
576 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
577 goto error_out;
578 } else {
579 if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
580 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
581 goto error_out;
582 }
583 #endif
585 #if defined(__x86_64__)
586 /*
587 * Pin down l4tab addr as page dir page - causes hypervisor to provide
588 * correct protection for the page
589 */
590 if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
591 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
592 goto error_out;
593 #endif
595 *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT];
596 *console_mfn = page_array[(vconsole_start-dsi.v_start) >> PAGE_SHIFT];
599 start_info = xc_map_foreign_range(
600 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
601 page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]);
602 memset(start_info, 0, sizeof(*start_info));
603 start_info->nr_pages = nr_pages;
604 start_info->shared_info = shared_info_frame << PAGE_SHIFT;
605 start_info->flags = flags;
606 start_info->pt_base = vpt_start;
607 start_info->nr_pt_frames = nr_pt_pages;
608 start_info->mfn_list = vphysmap_start;
609 start_info->store_mfn = *store_mfn;
610 start_info->store_evtchn = store_evtchn;
611 start_info->console_mfn = *console_mfn;
612 start_info->console_evtchn = console_evtchn;
613 if ( initrd_len != 0 )
614 {
615 start_info->mod_start = vinitrd_start;
616 start_info->mod_len = initrd_len;
617 }
618 strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
619 start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
620 munmap(start_info, PAGE_SIZE);
622 /* shared_info page starts its life empty. */
623 shared_info = xc_map_foreign_range(
624 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
625 memset(shared_info, 0, sizeof(shared_info_t));
626 /* Mask all upcalls... */
627 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
628 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
630 shared_info->n_vcpu = vcpus;
631 printf(" VCPUS: %d\n", shared_info->n_vcpu);
633 munmap(shared_info, PAGE_SIZE);
635 /* Send the page update requests down to the hypervisor. */
636 if ( xc_finish_mmu_updates(xc_handle, mmu) )
637 goto error_out;
639 free(mmu);
640 free(page_array);
642 *pvsi = vstartinfo_start;
643 *pvss = vstack_start;
644 *pvke = dsi.v_kernentry;
646 return 0;
648 error_out:
649 free(mmu);
650 free(page_array);
651 return -1;
652 }
653 #endif
655 int xc_linux_build(int xc_handle,
656 u32 domid,
657 const char *image_name,
658 const char *ramdisk_name,
659 const char *cmdline,
660 unsigned long flags,
661 unsigned int vcpus,
662 unsigned int store_evtchn,
663 unsigned long *store_mfn,
664 unsigned int console_evtchn,
665 unsigned long *console_mfn)
666 {
667 dom0_op_t launch_op, op;
668 int initrd_fd = -1;
669 gzFile initrd_gfd = NULL;
670 int rc, i;
671 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
672 unsigned long nr_pages;
673 char *image = NULL;
674 unsigned long image_size, initrd_size=0;
675 unsigned long vstartinfo_start, vkern_entry, vstack_start;
677 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
678 {
679 PERROR("Could not find total pages for domain");
680 goto error_out;
681 }
683 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
684 goto error_out;
686 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
687 {
688 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
689 {
690 PERROR("Could not open the initial ramdisk image");
691 goto error_out;
692 }
694 initrd_size = xc_get_filesz(initrd_fd);
696 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
697 {
698 PERROR("Could not allocate decompression state for initrd");
699 goto error_out;
700 }
701 }
703 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
704 {
705 PERROR("xc_linux_build: ctxt mlock failed");
706 return 1;
707 }
709 op.cmd = DOM0_GETDOMAININFO;
710 op.u.getdomaininfo.domain = (domid_t)domid;
711 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
712 ((u16)op.u.getdomaininfo.domain != domid) )
713 {
714 PERROR("Could not get info on domain");
715 goto error_out;
716 }
718 if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
719 {
720 PERROR("Could not get vcpu context");
721 goto error_out;
722 }
724 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
725 #ifdef __ia64__
726 0 )
727 #else
728 (ctxt->ctrlreg[3] != 0) )
729 #endif
730 {
731 ERROR("Domain is already constructed");
732 goto error_out;
733 }
735 if ( setup_guest(xc_handle, domid, image, image_size,
736 initrd_gfd, initrd_size, nr_pages,
737 &vstartinfo_start, &vkern_entry,
738 &vstack_start, ctxt, cmdline,
739 op.u.getdomaininfo.shared_info_frame,
740 flags, vcpus,
741 store_evtchn, store_mfn,
742 console_evtchn, console_mfn) < 0 )
743 {
744 ERROR("Error constructing guest OS");
745 goto error_out;
746 }
748 if ( initrd_fd >= 0 )
749 close(initrd_fd);
750 if ( initrd_gfd )
751 gzclose(initrd_gfd);
752 free(image);
754 #ifdef __ia64__
755 /* based on new_thread in xen/arch/ia64/domain.c */
756 ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
757 ctxt->regs.cr_iip = vkern_entry;
758 ctxt->regs.cr_ifs = 1UL << 63;
759 ctxt->regs.ar_fpsr = FPSR_DEFAULT;
760 /* ctxt->regs.r28 = dom_fw_setup(); currently done by hypervisor, should move here */
761 ctxt->vcpu.privregs = 0;
762 ctxt->shared.flags = flags;
763 i = 0; /* silence unused variable warning */
764 #else /* x86 */
765 /*
766 * Initial register values:
767 * DS,ES,FS,GS = FLAT_KERNEL_DS
768 * CS:EIP = FLAT_KERNEL_CS:start_pc
769 * SS:ESP = FLAT_KERNEL_DS:start_stack
770 * ESI = start_info
771 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
772 * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
773 */
774 ctxt->user_regs.ds = FLAT_KERNEL_DS;
775 ctxt->user_regs.es = FLAT_KERNEL_DS;
776 ctxt->user_regs.fs = FLAT_KERNEL_DS;
777 ctxt->user_regs.gs = FLAT_KERNEL_DS;
778 ctxt->user_regs.ss = FLAT_KERNEL_SS;
779 ctxt->user_regs.cs = FLAT_KERNEL_CS;
780 ctxt->user_regs.eip = vkern_entry;
781 ctxt->user_regs.esp = vstack_start + PAGE_SIZE;
782 ctxt->user_regs.esi = vstartinfo_start;
783 ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
785 /* FPU is set up to default initial state. */
786 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
788 /* Virtual IDT is empty at start-of-day. */
789 for ( i = 0; i < 256; i++ )
790 {
791 ctxt->trap_ctxt[i].vector = i;
792 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
793 }
795 /* No LDT. */
796 ctxt->ldt_ents = 0;
798 /* Use the default Xen-provided GDT. */
799 ctxt->gdt_ents = 0;
801 /* Ring 1 stack is the initial stack. */
802 ctxt->kernel_ss = FLAT_KERNEL_SS;
803 ctxt->kernel_sp = vstack_start + PAGE_SIZE;
805 /* No debugging. */
806 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
808 /* No callback handlers. */
809 #if defined(__i386__)
810 ctxt->event_callback_cs = FLAT_KERNEL_CS;
811 ctxt->event_callback_eip = 0;
812 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
813 ctxt->failsafe_callback_eip = 0;
814 #elif defined(__x86_64__)
815 ctxt->event_callback_eip = 0;
816 ctxt->failsafe_callback_eip = 0;
817 ctxt->syscall_callback_eip = 0;
818 #endif
819 #endif /* x86 */
821 memset( &launch_op, 0, sizeof(launch_op) );
823 launch_op.u.setdomaininfo.domain = (domid_t)domid;
824 launch_op.u.setdomaininfo.vcpu = 0;
825 launch_op.u.setdomaininfo.ctxt = ctxt;
827 launch_op.cmd = DOM0_SETDOMAININFO;
828 rc = xc_dom0_op(xc_handle, &launch_op);
830 return rc;
832 error_out:
833 if ( initrd_gfd != NULL )
834 gzclose(initrd_gfd);
835 else if ( initrd_fd >= 0 )
836 close(initrd_fd);
837 free(image);
839 return -1;
840 }