ia64/xen-unstable

view tools/libxc/xc_linux_build.c @ 6946:e703abaf6e3d

Add behaviour to the remove methods to remove the transaction's path itself. This allows us to write Remove(path) to remove the specified path rather than having to slice the path ourselves.
author emellor@ewan
date Sun Sep 18 14:42:13 2005 +0100 (2005-09-18)
parents 3233e7ecfa9f
children 619e3d6f01b3 3133e64d0462
line source
1 /******************************************************************************
2 * xc_linux_build.c
3 */
5 #include "xg_private.h"
6 #include <xenctrl.h>
8 #if defined(__i386__)
9 #define ELFSIZE 32
10 #endif
12 #if defined(__x86_64__) || defined(__ia64__)
13 #define ELFSIZE 64
14 #endif
17 #include "xc_elf.h"
18 #include "xc_aout9.h"
19 #include <stdlib.h>
20 #include <unistd.h>
21 #include <zlib.h>
23 #if defined(__i386__)
24 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
25 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
26 #define L3_PROT (_PAGE_PRESENT)
27 #endif
29 #if defined(__x86_64__)
30 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
31 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
32 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
33 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
34 #endif
37 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
38 #define round_pgdown(_p) ((_p)&PAGE_MASK)
40 #ifdef __ia64__
41 #define probe_aout9(image,image_size,load_funcs) 1
42 #endif
44 static int probeimageformat(char *image,
45 unsigned long image_size,
46 struct load_funcs *load_funcs)
47 {
48 if ( probe_elf(image, image_size, load_funcs) &&
49 probe_bin(image, image_size, load_funcs) &&
50 probe_aout9(image, image_size, load_funcs) )
51 {
52 ERROR( "Unrecognized image format" );
53 return -EINVAL;
54 }
56 return 0;
57 }
59 #define alloc_pt(ltab, vltab) \
60 ltab = (unsigned long long)(page_array[ppt_alloc++]) << PAGE_SHIFT; \
61 if (vltab != NULL) { \
62 munmap(vltab, PAGE_SIZE); \
63 } \
64 if ((vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, \
65 PROT_READ|PROT_WRITE, \
66 ltab >> PAGE_SHIFT)) == NULL) { \
67 goto error_out; \
68 } \
69 memset(vltab, 0, PAGE_SIZE);
71 #if defined(__i386__)
73 static int setup_pg_tables(int xc_handle, u32 dom,
74 vcpu_guest_context_t *ctxt,
75 unsigned long dsi_v_start,
76 unsigned long v_end,
77 unsigned long *page_array,
78 unsigned long vpt_start,
79 unsigned long vpt_end)
80 {
81 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
82 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
83 unsigned long l1tab = 0;
84 unsigned long l2tab = 0;
85 unsigned long ppt_alloc;
86 unsigned long count;
88 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
89 alloc_pt(l2tab, vl2tab);
90 vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
91 ctxt->ctrlreg[3] = l2tab;
93 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++ )
94 {
95 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
96 {
97 alloc_pt(l1tab, vl1tab);
98 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
99 *vl2e++ = l1tab | L2_PROT;
100 }
102 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
103 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
104 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
105 *vl1e &= ~_PAGE_RW;
106 vl1e++;
107 }
108 munmap(vl1tab, PAGE_SIZE);
109 munmap(vl2tab, PAGE_SIZE);
110 return 0;
112 error_out:
113 if (vl1tab)
114 munmap(vl1tab, PAGE_SIZE);
115 if (vl2tab)
116 munmap(vl2tab, PAGE_SIZE);
117 return -1;
118 }
120 static int setup_pg_tables_pae(int xc_handle, u32 dom,
121 vcpu_guest_context_t *ctxt,
122 unsigned long dsi_v_start,
123 unsigned long v_end,
124 unsigned long *page_array,
125 unsigned long vpt_start,
126 unsigned long vpt_end)
127 {
128 l1_pgentry_64_t *vl1tab=NULL, *vl1e=NULL;
129 l2_pgentry_64_t *vl2tab=NULL, *vl2e=NULL;
130 l3_pgentry_64_t *vl3tab=NULL, *vl3e=NULL;
131 unsigned long long l1tab = 0;
132 unsigned long long l2tab = 0;
133 unsigned long long l3tab = 0;
134 unsigned long ppt_alloc;
135 unsigned long count;
137 /* First allocate page for page dir. */
138 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
140 if ( page_array[ppt_alloc] > 0xfffff )
141 {
142 unsigned long nmfn;
143 nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
144 if ( nmfn == 0 )
145 {
146 fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
147 goto error_out;
148 }
149 page_array[ppt_alloc] = nmfn;
150 }
152 alloc_pt(l3tab, vl3tab);
153 vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
154 ctxt->ctrlreg[3] = l3tab;
156 if(l3tab>0xfffff000ULL)
157 {
158 fprintf(stderr,"L3TAB = %llx above 4GB!\n",l3tab);
159 goto error_out;
160 }
162 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
163 {
164 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
165 {
166 alloc_pt(l1tab, vl1tab);
168 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
169 {
170 alloc_pt(l2tab, vl2tab);
171 vl2e = &vl2tab[l2_table_offset_pae(dsi_v_start + (count<<PAGE_SHIFT))];
172 *vl3e = l2tab | L3_PROT;
173 vl3e++;
174 }
175 vl1e = &vl1tab[l1_table_offset_pae(dsi_v_start + (count<<PAGE_SHIFT))];
176 *vl2e = l1tab | L2_PROT;
177 vl2e++;
178 }
180 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
181 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
182 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
183 {
184 *vl1e &= ~_PAGE_RW;
185 }
186 vl1e++;
187 }
189 munmap(vl1tab, PAGE_SIZE);
190 munmap(vl2tab, PAGE_SIZE);
191 munmap(vl3tab, PAGE_SIZE);
192 return 0;
194 error_out:
195 if (vl1tab)
196 munmap(vl1tab, PAGE_SIZE);
197 if (vl2tab)
198 munmap(vl2tab, PAGE_SIZE);
199 if (vl3tab)
200 munmap(vl3tab, PAGE_SIZE);
201 return -1;
202 }
204 #endif
206 #if defined(__x86_64__)
208 static int setup_pg_tables_64(int xc_handle, u32 dom,
209 vcpu_guest_context_t *ctxt,
210 unsigned long dsi_v_start,
211 unsigned long v_end,
212 unsigned long *page_array,
213 unsigned long vpt_start,
214 unsigned long vpt_end)
215 {
216 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
217 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
218 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
219 l4_pgentry_t *vl4tab=NULL, *vl4e=NULL;
220 unsigned long l2tab = 0;
221 unsigned long l1tab = 0;
222 unsigned long l3tab = 0;
223 unsigned long l4tab = 0;
224 unsigned long ppt_alloc;
225 unsigned long count;
227 /* First allocate page for page dir. */
228 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
229 alloc_pt(l4tab, vl4tab);
230 vl4e = &vl4tab[l4_table_offset(dsi_v_start)];
231 ctxt->ctrlreg[3] = l4tab;
233 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
234 {
235 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
236 {
237 alloc_pt(l1tab, vl1tab);
239 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
240 {
241 alloc_pt(l2tab, vl2tab);
242 if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
243 {
244 alloc_pt(l3tab, vl3tab);
245 vl3e = &vl3tab[l3_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
246 *vl4e = l3tab | L4_PROT;
247 vl4e++;
248 }
249 vl2e = &vl2tab[l2_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
250 *vl3e = l2tab | L3_PROT;
251 vl3e++;
252 }
253 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
254 *vl2e = l1tab | L2_PROT;
255 vl2e++;
256 }
258 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
259 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
260 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
261 {
262 *vl1e &= ~_PAGE_RW;
263 }
264 vl1e++;
265 }
267 munmap(vl1tab, PAGE_SIZE);
268 munmap(vl2tab, PAGE_SIZE);
269 munmap(vl3tab, PAGE_SIZE);
270 munmap(vl4tab, PAGE_SIZE);
271 return 0;
273 error_out:
274 if (vl1tab)
275 munmap(vl1tab, PAGE_SIZE);
276 if (vl2tab)
277 munmap(vl2tab, PAGE_SIZE);
278 if (vl3tab)
279 munmap(vl3tab, PAGE_SIZE);
280 if (vl4tab)
281 munmap(vl4tab, PAGE_SIZE);
282 return -1;
283 }
284 #endif
286 #ifdef __ia64__
287 #include <asm/fpu.h> /* for FPSR_DEFAULT */
288 static int setup_guest(int xc_handle,
289 u32 dom,
290 char *image, unsigned long image_size,
291 gzFile initrd_gfd, unsigned long initrd_len,
292 unsigned long nr_pages,
293 unsigned long *pvsi, unsigned long *pvke,
294 unsigned long *pvss, vcpu_guest_context_t *ctxt,
295 const char *cmdline,
296 unsigned long shared_info_frame,
297 unsigned long flags,
298 unsigned int vcpus,
299 unsigned int store_evtchn, unsigned long *store_mfn,
300 unsigned int console_evtchn, unsigned long *console_mfn)
301 {
302 unsigned long *page_array = NULL;
303 struct load_funcs load_funcs;
304 struct domain_setup_info dsi;
305 unsigned long start_page, pgnr;
306 start_info_t *start_info;
307 int rc;
309 rc = probeimageformat(image, image_size, &load_funcs);
310 if ( rc != 0 )
311 goto error_out;
313 memset(&dsi, 0, sizeof(struct domain_setup_info));
315 rc = (load_funcs.parseimage)(image, image_size, &dsi);
316 if ( rc != 0 )
317 goto error_out;
319 dsi.v_start = round_pgdown(dsi.v_start);
320 dsi.v_end = round_pgup(dsi.v_end);
322 start_page = dsi.v_start >> PAGE_SHIFT;
323 pgnr = (dsi.v_end - dsi.v_start) >> PAGE_SHIFT;
324 if ( (page_array = malloc(pgnr * sizeof(unsigned long))) == NULL )
325 {
326 PERROR("Could not allocate memory");
327 goto error_out;
328 }
330 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, start_page, pgnr) != pgnr )
331 {
332 PERROR("Could not get the page frame list");
333 goto error_out;
334 }
336 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
337 &dsi);
339 *pvke = dsi.v_kernentry;
341 /* Now need to retrieve machine pfn for system pages:
342 * start_info/store/console
343 */
344 pgnr = 3;
345 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, nr_pages - 3, pgnr) != pgnr)
346 {
347 PERROR("Could not get page frame for xenstore");
348 goto error_out;
349 }
351 *store_mfn = page_array[1];
352 *console_mfn = page_array[2];
353 printf("store_mfn: 0x%lx, console_mfn: 0x%lx\n",
354 (u64)store_mfn, (u64)console_mfn);
356 start_info = xc_map_foreign_range(
357 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[0]);
358 memset(start_info, 0, sizeof(*start_info));
359 start_info->flags = flags;
360 start_info->store_mfn = nr_pages - 2;
361 start_info->store_evtchn = store_evtchn;
362 start_info->console_mfn = nr_pages - 1;
363 start_info->console_evtchn = console_evtchn;
364 munmap(start_info, PAGE_SIZE);
366 free(page_array);
367 return 0;
369 error_out:
370 free(page_array);
371 return -1;
372 }
373 #else /* x86 */
374 static int setup_guest(int xc_handle,
375 u32 dom,
376 char *image, unsigned long image_size,
377 gzFile initrd_gfd, unsigned long initrd_len,
378 unsigned long nr_pages,
379 unsigned long *pvsi, unsigned long *pvke,
380 unsigned long *pvss, vcpu_guest_context_t *ctxt,
381 const char *cmdline,
382 unsigned long shared_info_frame,
383 unsigned long flags,
384 unsigned int vcpus,
385 unsigned int store_evtchn, unsigned long *store_mfn,
386 unsigned int console_evtchn, unsigned long *console_mfn)
387 {
388 unsigned long *page_array = NULL;
389 unsigned long count, i;
390 start_info_t *start_info;
391 shared_info_t *shared_info;
392 xc_mmu_t *mmu = NULL;
393 int rc;
395 unsigned long nr_pt_pages;
396 unsigned long physmap_pfn;
397 unsigned long *physmap, *physmap_e;
399 struct load_funcs load_funcs;
400 struct domain_setup_info dsi;
401 unsigned long vinitrd_start;
402 unsigned long vinitrd_end;
403 unsigned long vphysmap_start;
404 unsigned long vphysmap_end;
405 unsigned long vstartinfo_start;
406 unsigned long vstartinfo_end;
407 unsigned long vstoreinfo_start;
408 unsigned long vstoreinfo_end;
409 unsigned long vconsole_start;
410 unsigned long vconsole_end;
411 unsigned long vstack_start;
412 unsigned long vstack_end;
413 unsigned long vpt_start;
414 unsigned long vpt_end;
415 unsigned long v_end;
417 rc = probeimageformat(image, image_size, &load_funcs);
418 if ( rc != 0 )
419 goto error_out;
421 memset(&dsi, 0, sizeof(struct domain_setup_info));
423 rc = (load_funcs.parseimage)(image, image_size, &dsi);
424 if ( rc != 0 )
425 goto error_out;
427 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
428 {
429 PERROR("Guest OS must load to a page boundary.\n");
430 goto error_out;
431 }
433 /*
434 * Why do we need this? The number of page-table frames depends on the
435 * size of the bootstrap address space. But the size of the address space
436 * depends on the number of page-table frames (since each one is mapped
437 * read-only). We have a pair of simultaneous equations in two unknowns,
438 * which we solve by exhaustive search.
439 */
440 vinitrd_start = round_pgup(dsi.v_end);
441 vinitrd_end = vinitrd_start + initrd_len;
442 vphysmap_start = round_pgup(vinitrd_end);
443 vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
444 vstartinfo_start = round_pgup(vphysmap_end);
445 vstartinfo_end = vstartinfo_start + PAGE_SIZE;
446 vstoreinfo_start = vstartinfo_end;
447 vstoreinfo_end = vstoreinfo_start + PAGE_SIZE;
448 vconsole_start = vstoreinfo_end;
449 vconsole_end = vconsole_start + PAGE_SIZE;
450 vpt_start = vconsole_end;
452 for ( nr_pt_pages = 2; ; nr_pt_pages++ )
453 {
454 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
455 vstack_start = vpt_end;
456 vstack_end = vstack_start + PAGE_SIZE;
457 v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
458 if ( (v_end - vstack_end) < (512UL << 10) )
459 v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
460 #if defined(__i386__)
461 if (dsi.pae_kernel) {
462 /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
463 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >>
464 L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
465 break;
466 } else {
467 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
468 L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
469 break;
470 }
471 #endif
472 #if defined(__x86_64__)
473 #define NR(_l,_h,_s) \
474 (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
475 ((_l) & ~((1UL<<(_s))-1))) >> (_s))
476 if ( (1 + /* # L4 */
477 NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
478 NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
479 NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
480 <= nr_pt_pages )
481 break;
482 #endif
483 }
485 #define _p(a) ((void *) (a))
487 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
488 " Loaded kernel: %p->%p\n"
489 " Init. ramdisk: %p->%p\n"
490 " Phys-Mach map: %p->%p\n"
491 " Start info: %p->%p\n"
492 " Store page: %p->%p\n"
493 " Console page: %p->%p\n"
494 " Page tables: %p->%p\n"
495 " Boot stack: %p->%p\n"
496 " TOTAL: %p->%p\n",
497 _p(dsi.v_kernstart), _p(dsi.v_kernend),
498 _p(vinitrd_start), _p(vinitrd_end),
499 _p(vphysmap_start), _p(vphysmap_end),
500 _p(vstartinfo_start), _p(vstartinfo_end),
501 _p(vstoreinfo_start), _p(vstoreinfo_end),
502 _p(vconsole_start), _p(vconsole_end),
503 _p(vpt_start), _p(vpt_end),
504 _p(vstack_start), _p(vstack_end),
505 _p(dsi.v_start), _p(v_end));
506 printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
508 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
509 {
510 printf("Initial guest OS requires too much space\n"
511 "(%luMB is greater than %luMB limit)\n",
512 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
513 goto error_out;
514 }
516 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
517 {
518 PERROR("Could not allocate memory");
519 goto error_out;
520 }
522 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
523 {
524 PERROR("Could not get the page frame list");
525 goto error_out;
526 }
528 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
529 &dsi);
531 /* Load the initial ramdisk image. */
532 if ( initrd_len != 0 )
533 {
534 for ( i = (vinitrd_start - dsi.v_start);
535 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
536 {
537 char page[PAGE_SIZE];
538 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
539 {
540 PERROR("Error reading initrd image, could not");
541 goto error_out;
542 }
543 xc_copy_to_domain_page(xc_handle, dom,
544 page_array[i>>PAGE_SHIFT], page);
545 }
546 }
548 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
549 goto error_out;
551 /* setup page tables */
552 #if defined(__i386__)
553 if (dsi.pae_kernel)
554 rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
555 dsi.v_start, v_end,
556 page_array, vpt_start, vpt_end);
557 else {
558 rc = setup_pg_tables(xc_handle, dom, ctxt,
559 dsi.v_start, v_end,
560 page_array, vpt_start, vpt_end);
561 }
562 #endif
563 #if defined(__x86_64__)
564 rc = setup_pg_tables_64(xc_handle, dom, ctxt,
565 dsi.v_start, v_end,
566 page_array, vpt_start, vpt_end);
567 #endif
568 if (0 != rc)
569 goto error_out;
571 /* Write the phys->machine and machine->phys table entries. */
572 physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
573 physmap = physmap_e = xc_map_foreign_range(
574 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
575 page_array[physmap_pfn++]);
577 for ( count = 0; count < nr_pages; count++ )
578 {
579 if ( xc_add_mmu_update(xc_handle, mmu,
580 ((unsigned long long)page_array[count] << PAGE_SHIFT) |
581 MMU_MACHPHYS_UPDATE, count) )
582 {
583 fprintf(stderr,"m2p update failure p=%lx m=%lx\n",count,page_array[count] );
584 munmap(physmap, PAGE_SIZE);
585 goto error_out;
586 }
587 *physmap_e++ = page_array[count];
588 if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 )
589 {
590 munmap(physmap, PAGE_SIZE);
591 physmap = physmap_e = xc_map_foreign_range(
592 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
593 page_array[physmap_pfn++]);
594 }
595 }
596 munmap(physmap, PAGE_SIZE);
598 #if defined(__i386__)
599 /*
600 * Pin down l2tab addr as page dir page - causes hypervisor to provide
601 * correct protection for the page
602 */
603 if (dsi.pae_kernel) {
604 if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
605 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
606 goto error_out;
607 } else {
608 if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
609 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
610 goto error_out;
611 }
612 #endif
614 #if defined(__x86_64__)
615 /*
616 * Pin down l4tab addr as page dir page - causes hypervisor to provide
617 * correct protection for the page
618 */
619 if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
620 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
621 goto error_out;
622 #endif
624 *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT];
625 *console_mfn = page_array[(vconsole_start-dsi.v_start) >> PAGE_SHIFT];
628 start_info = xc_map_foreign_range(
629 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
630 page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]);
631 memset(start_info, 0, sizeof(*start_info));
632 start_info->nr_pages = nr_pages;
633 start_info->shared_info = shared_info_frame << PAGE_SHIFT;
634 start_info->flags = flags;
635 start_info->pt_base = vpt_start;
636 start_info->nr_pt_frames = nr_pt_pages;
637 start_info->mfn_list = vphysmap_start;
638 start_info->store_mfn = *store_mfn;
639 start_info->store_evtchn = store_evtchn;
640 start_info->console_mfn = *console_mfn;
641 start_info->console_evtchn = console_evtchn;
642 if ( initrd_len != 0 )
643 {
644 start_info->mod_start = vinitrd_start;
645 start_info->mod_len = initrd_len;
646 }
647 strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
648 start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
649 munmap(start_info, PAGE_SIZE);
651 /* shared_info page starts its life empty. */
652 shared_info = xc_map_foreign_range(
653 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
654 memset(shared_info, 0, sizeof(shared_info_t));
655 /* Mask all upcalls... */
656 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
657 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
659 shared_info->n_vcpu = vcpus;
660 printf(" VCPUS: %d\n", shared_info->n_vcpu);
662 munmap(shared_info, PAGE_SIZE);
664 /* Send the page update requests down to the hypervisor. */
665 if ( xc_finish_mmu_updates(xc_handle, mmu) )
666 goto error_out;
668 free(mmu);
669 free(page_array);
671 *pvsi = vstartinfo_start;
672 *pvss = vstack_start;
673 *pvke = dsi.v_kernentry;
675 return 0;
677 error_out:
678 free(mmu);
679 free(page_array);
680 return -1;
681 }
682 #endif
684 int xc_linux_build(int xc_handle,
685 u32 domid,
686 const char *image_name,
687 const char *ramdisk_name,
688 const char *cmdline,
689 unsigned long flags,
690 unsigned int vcpus,
691 unsigned int store_evtchn,
692 unsigned long *store_mfn,
693 unsigned int console_evtchn,
694 unsigned long *console_mfn)
695 {
696 dom0_op_t launch_op, op;
697 int initrd_fd = -1;
698 gzFile initrd_gfd = NULL;
699 int rc, i;
700 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
701 unsigned long nr_pages;
702 char *image = NULL;
703 unsigned long image_size, initrd_size=0;
704 unsigned long vstartinfo_start, vkern_entry, vstack_start;
706 #ifdef __ia64__
707 /* Current xen/ia64 allocates domU pages on demand */
708 if ( (nr_pages = xc_get_max_pages(xc_handle, domid)) < 0 )
709 #else
710 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
711 #endif
712 {
713 PERROR("Could not find total pages for domain");
714 goto error_out;
715 }
717 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
718 goto error_out;
720 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
721 {
722 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
723 {
724 PERROR("Could not open the initial ramdisk image");
725 goto error_out;
726 }
728 initrd_size = xc_get_filesz(initrd_fd);
730 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
731 {
732 PERROR("Could not allocate decompression state for initrd");
733 goto error_out;
734 }
735 }
737 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
738 {
739 PERROR("xc_linux_build: ctxt mlock failed");
740 return 1;
741 }
743 op.cmd = DOM0_GETDOMAININFO;
744 op.u.getdomaininfo.domain = (domid_t)domid;
745 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
746 ((u16)op.u.getdomaininfo.domain != domid) )
747 {
748 PERROR("Could not get info on domain");
749 goto error_out;
750 }
752 if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
753 {
754 PERROR("Could not get vcpu context");
755 goto error_out;
756 }
758 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
759 #ifdef __ia64__
760 0 )
761 #else
762 (ctxt->ctrlreg[3] != 0) )
763 #endif
764 {
765 ERROR("Domain is already constructed");
766 goto error_out;
767 }
769 if ( setup_guest(xc_handle, domid, image, image_size,
770 initrd_gfd, initrd_size, nr_pages,
771 &vstartinfo_start, &vkern_entry,
772 &vstack_start, ctxt, cmdline,
773 op.u.getdomaininfo.shared_info_frame,
774 flags, vcpus,
775 store_evtchn, store_mfn,
776 console_evtchn, console_mfn) < 0 )
777 {
778 ERROR("Error constructing guest OS");
779 goto error_out;
780 }
782 if ( initrd_fd >= 0 )
783 close(initrd_fd);
784 if ( initrd_gfd )
785 gzclose(initrd_gfd);
786 free(image);
788 #ifdef __ia64__
789 /* based on new_thread in xen/arch/ia64/domain.c */
790 ctxt->flags = 0;
791 ctxt->shared.flags = flags;
792 ctxt->shared.start_info_pfn = nr_pages - 3; // metaphysical
793 ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
794 ctxt->regs.cr_iip = vkern_entry;
795 ctxt->regs.cr_ifs = 1UL << 63;
796 ctxt->regs.ar_fpsr = FPSR_DEFAULT;
797 /* ctxt->regs.r28 = dom_fw_setup(); currently done by hypervisor, should move here */
798 ctxt->vcpu.privregs = 0;
799 ctxt->sys_pgnr = nr_pages - 3;
800 i = 0; /* silence unused variable warning */
801 #else /* x86 */
802 /*
803 * Initial register values:
804 * DS,ES,FS,GS = FLAT_KERNEL_DS
805 * CS:EIP = FLAT_KERNEL_CS:start_pc
806 * SS:ESP = FLAT_KERNEL_DS:start_stack
807 * ESI = start_info
808 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
809 * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
810 */
811 ctxt->user_regs.ds = FLAT_KERNEL_DS;
812 ctxt->user_regs.es = FLAT_KERNEL_DS;
813 ctxt->user_regs.fs = FLAT_KERNEL_DS;
814 ctxt->user_regs.gs = FLAT_KERNEL_DS;
815 ctxt->user_regs.ss = FLAT_KERNEL_SS;
816 ctxt->user_regs.cs = FLAT_KERNEL_CS;
817 ctxt->user_regs.eip = vkern_entry;
818 ctxt->user_regs.esp = vstack_start + PAGE_SIZE;
819 ctxt->user_regs.esi = vstartinfo_start;
820 ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
822 /* FPU is set up to default initial state. */
823 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
825 /* Virtual IDT is empty at start-of-day. */
826 for ( i = 0; i < 256; i++ )
827 {
828 ctxt->trap_ctxt[i].vector = i;
829 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
830 }
832 /* No LDT. */
833 ctxt->ldt_ents = 0;
835 /* Use the default Xen-provided GDT. */
836 ctxt->gdt_ents = 0;
838 /* Ring 1 stack is the initial stack. */
839 ctxt->kernel_ss = FLAT_KERNEL_SS;
840 ctxt->kernel_sp = vstack_start + PAGE_SIZE;
842 /* No debugging. */
843 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
845 /* No callback handlers. */
846 #if defined(__i386__)
847 ctxt->event_callback_cs = FLAT_KERNEL_CS;
848 ctxt->event_callback_eip = 0;
849 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
850 ctxt->failsafe_callback_eip = 0;
851 #elif defined(__x86_64__)
852 ctxt->event_callback_eip = 0;
853 ctxt->failsafe_callback_eip = 0;
854 ctxt->syscall_callback_eip = 0;
855 #endif
856 #endif /* x86 */
858 memset( &launch_op, 0, sizeof(launch_op) );
860 launch_op.u.setdomaininfo.domain = (domid_t)domid;
861 launch_op.u.setdomaininfo.vcpu = 0;
862 launch_op.u.setdomaininfo.ctxt = ctxt;
864 launch_op.cmd = DOM0_SETDOMAININFO;
865 rc = xc_dom0_op(xc_handle, &launch_op);
867 return rc;
869 error_out:
870 if ( initrd_gfd != NULL )
871 gzclose(initrd_gfd);
872 else if ( initrd_fd >= 0 )
873 close(initrd_fd);
874 free(image);
876 return -1;
877 }