ia64/xen-unstable

view tools/libxc/xc_linux_build.c @ 8964:8946b6dcd49e

Fix x86_64 Xen build.

event_callback_cs and failsafe_callback_cs are x86_32 only.

Signed-off-by: Ian Campbell <Ian.Campbell@XenSource.com>
author Ian.Campbell@xensource.com
date Wed Feb 22 17:26:39 2006 +0000 (2006-02-22)
parents 8aeb417387ca
children b470657718fe
line source
1 /******************************************************************************
2 * xc_linux_build.c
3 */
5 #include "xg_private.h"
6 #include "xc_private.h"
7 #include <xenctrl.h>
9 #if defined(__i386__)
10 #define ELFSIZE 32
11 #endif
13 #if defined(__x86_64__) || defined(__ia64__)
14 #define ELFSIZE 64
15 #endif
17 #include "xc_elf.h"
18 #include "xc_aout9.h"
19 #include <stdlib.h>
20 #include <unistd.h>
21 #include <zlib.h>
23 #if defined(__i386__)
24 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
25 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
26 #define L3_PROT (_PAGE_PRESENT)
27 #endif
29 #if defined(__x86_64__)
30 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
31 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
32 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
33 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
34 #endif
36 #ifdef __ia64__
37 #define get_tot_pages xc_get_max_pages
38 #else
39 #define get_tot_pages xc_get_tot_pages
40 #endif
42 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
43 #define round_pgdown(_p) ((_p)&PAGE_MASK)
45 #ifdef __ia64__
46 #define probe_aout9(image,image_size,load_funcs) 1
47 #endif
49 static int probeimageformat(char *image,
50 unsigned long image_size,
51 struct load_funcs *load_funcs)
52 {
53 if ( probe_elf(image, image_size, load_funcs) &&
54 probe_bin(image, image_size, load_funcs) &&
55 probe_aout9(image, image_size, load_funcs) )
56 {
57 ERROR( "Unrecognized image format" );
58 return -EINVAL;
59 }
61 return 0;
62 }
64 #define alloc_pt(ltab, vltab, pltab) \
65 do { \
66 pltab = ppt_alloc++; \
67 ltab = (uint64_t)page_array[pltab] << PAGE_SHIFT; \
68 pltab <<= PAGE_SHIFT; \
69 if ( vltab != NULL ) \
70 munmap(vltab, PAGE_SIZE); \
71 if ( (vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, \
72 PROT_READ|PROT_WRITE, \
73 ltab >> PAGE_SHIFT)) == NULL ) \
74 goto error_out; \
75 memset(vltab, 0x0, PAGE_SIZE); \
76 } while ( 0 )
78 #if defined(__i386__)
80 static int setup_pg_tables(int xc_handle, uint32_t dom,
81 vcpu_guest_context_t *ctxt,
82 unsigned long dsi_v_start,
83 unsigned long v_end,
84 unsigned long *page_array,
85 unsigned long vpt_start,
86 unsigned long vpt_end,
87 unsigned shadow_mode_enabled)
88 {
89 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
90 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
91 unsigned long l1tab = 0, pl1tab;
92 unsigned long l2tab = 0, pl2tab;
93 unsigned long ppt_alloc;
94 unsigned long count;
96 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
97 alloc_pt(l2tab, vl2tab, pl2tab);
98 vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
99 if (shadow_mode_enabled)
100 ctxt->ctrlreg[3] = pl2tab;
101 else
102 ctxt->ctrlreg[3] = l2tab;
104 for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++ )
105 {
106 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
107 {
108 alloc_pt(l1tab, vl1tab, pl1tab);
109 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
110 if (shadow_mode_enabled)
111 *vl2e = pl1tab | L2_PROT;
112 else
113 *vl2e = l1tab | L2_PROT;
114 vl2e++;
115 }
117 if ( shadow_mode_enabled )
118 {
119 *vl1e = (count << PAGE_SHIFT) | L1_PROT;
120 }
121 else
122 {
123 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
124 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
125 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
126 *vl1e &= ~_PAGE_RW;
127 }
128 vl1e++;
129 }
130 munmap(vl1tab, PAGE_SIZE);
131 munmap(vl2tab, PAGE_SIZE);
132 return 0;
134 error_out:
135 if (vl1tab)
136 munmap(vl1tab, PAGE_SIZE);
137 if (vl2tab)
138 munmap(vl2tab, PAGE_SIZE);
139 return -1;
140 }
142 static int setup_pg_tables_pae(int xc_handle, uint32_t dom,
143 vcpu_guest_context_t *ctxt,
144 unsigned long dsi_v_start,
145 unsigned long v_end,
146 unsigned long *page_array,
147 unsigned long vpt_start,
148 unsigned long vpt_end,
149 unsigned shadow_mode_enabled)
150 {
151 l1_pgentry_64_t *vl1tab = NULL, *vl1e = NULL;
152 l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
153 l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
154 uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab;
155 unsigned long ppt_alloc, count, nmfn;
157 /* First allocate page for page dir. */
158 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
160 if ( page_array[ppt_alloc] > 0xfffff )
161 {
162 nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
163 if ( nmfn == 0 )
164 {
165 fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
166 goto error_out;
167 }
168 page_array[ppt_alloc] = nmfn;
169 }
171 alloc_pt(l3tab, vl3tab, pl3tab);
172 vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
173 if (shadow_mode_enabled)
174 ctxt->ctrlreg[3] = pl3tab;
175 else
176 ctxt->ctrlreg[3] = l3tab;
178 for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++)
179 {
180 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
181 {
182 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
183 {
184 alloc_pt(l2tab, vl2tab, pl2tab);
185 vl2e = &vl2tab[l2_table_offset_pae(
186 dsi_v_start + (count << PAGE_SHIFT))];
187 if (shadow_mode_enabled)
188 *vl3e = pl2tab | L3_PROT;
189 else
190 *vl3e++ = l2tab | L3_PROT;
191 }
193 alloc_pt(l1tab, vl1tab, pl1tab);
194 vl1e = &vl1tab[l1_table_offset_pae(
195 dsi_v_start + (count << PAGE_SHIFT))];
196 if (shadow_mode_enabled)
197 *vl2e = pl1tab | L2_PROT;
198 else
199 *vl2e++ = l1tab | L2_PROT;
200 }
202 if ( shadow_mode_enabled )
203 {
204 *vl1e = (count << PAGE_SHIFT) | L1_PROT;
205 }
206 else
207 {
208 *vl1e = ((uint64_t)page_array[count] << PAGE_SHIFT) | L1_PROT;
209 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
210 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
211 *vl1e &= ~_PAGE_RW;
212 }
213 vl1e++;
214 }
216 munmap(vl1tab, PAGE_SIZE);
217 munmap(vl2tab, PAGE_SIZE);
218 munmap(vl3tab, PAGE_SIZE);
219 return 0;
221 error_out:
222 if (vl1tab)
223 munmap(vl1tab, PAGE_SIZE);
224 if (vl2tab)
225 munmap(vl2tab, PAGE_SIZE);
226 if (vl3tab)
227 munmap(vl3tab, PAGE_SIZE);
228 return -1;
229 }
231 #endif
233 #if defined(__x86_64__)
235 static int setup_pg_tables_64(int xc_handle, uint32_t dom,
236 vcpu_guest_context_t *ctxt,
237 unsigned long dsi_v_start,
238 unsigned long v_end,
239 unsigned long *page_array,
240 unsigned long vpt_start,
241 unsigned long vpt_end,
242 int shadow_mode_enabled)
243 {
244 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
245 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
246 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
247 l4_pgentry_t *vl4tab=NULL, *vl4e=NULL;
248 unsigned long l2tab = 0, pl2tab;
249 unsigned long l1tab = 0, pl1tab;
250 unsigned long l3tab = 0, pl3tab;
251 unsigned long l4tab = 0, pl4tab;
252 unsigned long ppt_alloc;
253 unsigned long count;
255 /* First allocate page for page dir. */
256 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
257 alloc_pt(l4tab, vl4tab, pl4tab);
258 vl4e = &vl4tab[l4_table_offset(dsi_v_start)];
259 if (shadow_mode_enabled)
260 ctxt->ctrlreg[3] = pl4tab;
261 else
262 ctxt->ctrlreg[3] = l4tab;
264 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
265 {
266 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
267 {
268 alloc_pt(l1tab, vl1tab, pl1tab);
270 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
271 {
272 alloc_pt(l2tab, vl2tab, pl2tab);
273 if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
274 {
275 alloc_pt(l3tab, vl3tab, pl3tab);
276 vl3e = &vl3tab[l3_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
277 if (shadow_mode_enabled)
278 *vl4e = pl3tab | L4_PROT;
279 else
280 *vl4e = l3tab | L4_PROT;
281 vl4e++;
282 }
283 vl2e = &vl2tab[l2_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
284 if (shadow_mode_enabled)
285 *vl3e = pl2tab | L3_PROT;
286 else
287 *vl3e = l2tab | L3_PROT;
288 vl3e++;
289 }
290 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
291 if (shadow_mode_enabled)
292 *vl2e = pl1tab | L2_PROT;
293 else
294 *vl2e = l1tab | L2_PROT;
295 vl2e++;
296 }
298 if ( shadow_mode_enabled )
299 {
300 *vl1e = (count << PAGE_SHIFT) | L1_PROT;
301 }
302 else
303 {
304 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
305 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
306 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
307 {
308 *vl1e &= ~_PAGE_RW;
309 }
310 }
311 vl1e++;
312 }
314 munmap(vl1tab, PAGE_SIZE);
315 munmap(vl2tab, PAGE_SIZE);
316 munmap(vl3tab, PAGE_SIZE);
317 munmap(vl4tab, PAGE_SIZE);
318 return 0;
320 error_out:
321 if (vl1tab)
322 munmap(vl1tab, PAGE_SIZE);
323 if (vl2tab)
324 munmap(vl2tab, PAGE_SIZE);
325 if (vl3tab)
326 munmap(vl3tab, PAGE_SIZE);
327 if (vl4tab)
328 munmap(vl4tab, PAGE_SIZE);
329 return -1;
330 }
331 #endif
333 #ifdef __ia64__
334 extern unsigned long xc_ia64_fpsr_default(void);
336 static int setup_guest(int xc_handle,
337 uint32_t dom,
338 char *image, unsigned long image_size,
339 gzFile initrd_gfd, unsigned long initrd_len,
340 unsigned long nr_pages,
341 unsigned long *pvsi, unsigned long *pvke,
342 unsigned long *pvss, vcpu_guest_context_t *ctxt,
343 const char *cmdline,
344 unsigned long shared_info_frame,
345 unsigned long flags,
346 unsigned int store_evtchn, unsigned long *store_mfn,
347 unsigned int console_evtchn, unsigned long *console_mfn)
348 {
349 unsigned long *page_array = NULL;
350 struct load_funcs load_funcs;
351 struct domain_setup_info dsi;
352 unsigned long vinitrd_start;
353 unsigned long vinitrd_end;
354 unsigned long v_end;
355 unsigned long start_page, pgnr;
356 start_info_t *start_info;
357 int rc;
358 unsigned long i;
360 rc = probeimageformat(image, image_size, &load_funcs);
361 if ( rc != 0 )
362 goto error_out;
364 memset(&dsi, 0, sizeof(struct domain_setup_info));
366 rc = (load_funcs.parseimage)(image, image_size, &dsi);
367 if ( rc != 0 )
368 goto error_out;
370 dsi.v_start = round_pgdown(dsi.v_start);
371 vinitrd_start = round_pgup(dsi.v_end);
372 vinitrd_end = vinitrd_start + initrd_len;
373 v_end = round_pgup(vinitrd_end);
375 start_page = dsi.v_start >> PAGE_SHIFT;
376 pgnr = (v_end - dsi.v_start) >> PAGE_SHIFT;
377 if ( (page_array = malloc(pgnr * sizeof(unsigned long))) == NULL )
378 {
379 PERROR("Could not allocate memory");
380 goto error_out;
381 }
383 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, start_page, pgnr) != pgnr )
384 {
385 PERROR("Could not get the page frame list");
386 goto error_out;
387 }
389 #define _p(a) ((void *) (a))
391 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
392 " Loaded kernel: %p->%p\n"
393 " Init. ramdisk: %p->%p\n"
394 " TOTAL: %p->%p\n",
395 _p(dsi.v_kernstart), _p(dsi.v_kernend),
396 _p(vinitrd_start), _p(vinitrd_end),
397 _p(dsi.v_start), _p(v_end));
398 printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
400 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
401 &dsi);
403 /* Load the initial ramdisk image. */
404 if ( initrd_len != 0 )
405 {
406 for ( i = (vinitrd_start - dsi.v_start);
407 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
408 {
409 char page[PAGE_SIZE];
410 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
411 {
412 PERROR("Error reading initrd image, could not");
413 goto error_out;
414 }
415 xc_copy_to_domain_page(xc_handle, dom,
416 page_array[i>>PAGE_SHIFT], page);
417 }
418 }
421 *pvke = dsi.v_kernentry;
423 /* Now need to retrieve machine pfn for system pages:
424 * start_info/store/console
425 */
426 pgnr = 3;
427 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array,
428 nr_pages - 3, pgnr) != pgnr )
429 {
430 PERROR("Could not get page frame for xenstore");
431 goto error_out;
432 }
434 *store_mfn = page_array[1];
435 *console_mfn = page_array[2];
436 printf("store_mfn: 0x%lx, console_mfn: 0x%lx\n",
437 (uint64_t)store_mfn, (uint64_t)console_mfn);
439 start_info = xc_map_foreign_range(
440 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[0]);
441 memset(start_info, 0, sizeof(*start_info));
442 rc = xc_version(xc_handle, XENVER_version, NULL);
443 sprintf(start_info->magic, "xen-%i.%i-ia64", rc >> 16, rc & (0xFFFF));
444 start_info->flags = flags;
445 start_info->store_mfn = nr_pages - 2;
446 start_info->store_evtchn = store_evtchn;
447 start_info->console_mfn = nr_pages - 1;
448 start_info->console_evtchn = console_evtchn;
449 start_info->nr_pages = nr_pages; // FIXME?: nr_pages - 2 ????
450 if ( initrd_len != 0 )
451 {
452 ctxt->initrd.start = vinitrd_start;
453 ctxt->initrd.size = initrd_len;
454 }
455 else
456 {
457 ctxt->initrd.start = 0;
458 ctxt->initrd.size = 0;
459 }
460 if ( cmdline != NULL )
461 {
462 strncpy((char *)ctxt->cmdline, cmdline, IA64_COMMAND_LINE_SIZE);
463 ctxt->cmdline[IA64_COMMAND_LINE_SIZE-1] = '\0';
464 }
465 munmap(start_info, PAGE_SIZE);
467 free(page_array);
468 return 0;
470 error_out:
471 free(page_array);
472 return -1;
473 }
474 #else /* x86 */
475 static int setup_guest(int xc_handle,
476 uint32_t dom,
477 char *image, unsigned long image_size,
478 gzFile initrd_gfd, unsigned long initrd_len,
479 unsigned long nr_pages,
480 unsigned long *pvsi, unsigned long *pvke,
481 unsigned long *pvss, vcpu_guest_context_t *ctxt,
482 const char *cmdline,
483 unsigned long shared_info_frame,
484 unsigned long flags,
485 unsigned int store_evtchn, unsigned long *store_mfn,
486 unsigned int console_evtchn, unsigned long *console_mfn)
487 {
488 unsigned long *page_array = NULL;
489 unsigned long count, i, hypercall_pfn;
490 start_info_t *start_info;
491 shared_info_t *shared_info;
492 xc_mmu_t *mmu = NULL;
493 char *p;
494 DECLARE_DOM0_OP;
495 int rc;
497 unsigned long nr_pt_pages;
498 unsigned long physmap_pfn;
499 unsigned long *physmap, *physmap_e;
501 struct load_funcs load_funcs;
502 struct domain_setup_info dsi;
503 unsigned long vinitrd_start;
504 unsigned long vinitrd_end;
505 unsigned long vphysmap_start;
506 unsigned long vphysmap_end;
507 unsigned long vstartinfo_start;
508 unsigned long vstartinfo_end;
509 unsigned long vstoreinfo_start;
510 unsigned long vstoreinfo_end;
511 unsigned long vconsole_start;
512 unsigned long vconsole_end;
513 unsigned long vstack_start;
514 unsigned long vstack_end;
515 unsigned long vpt_start;
516 unsigned long vpt_end;
517 unsigned long v_end;
518 unsigned shadow_mode_enabled;
519 unsigned long guest_store_mfn, guest_console_mfn, guest_shared_info_mfn;
521 rc = probeimageformat(image, image_size, &load_funcs);
522 if ( rc != 0 )
523 goto error_out;
525 memset(&dsi, 0, sizeof(struct domain_setup_info));
527 rc = (load_funcs.parseimage)(image, image_size, &dsi);
528 if ( rc != 0 )
529 goto error_out;
531 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
532 {
533 PERROR("Guest OS must load to a page boundary.\n");
534 goto error_out;
535 }
537 shadow_mode_enabled = !!strstr(dsi.xen_guest_string,
538 "SHADOW=translate");
539 /*
540 * Why do we need this? The number of page-table frames depends on the
541 * size of the bootstrap address space. But the size of the address space
542 * depends on the number of page-table frames (since each one is mapped
543 * read-only). We have a pair of simultaneous equations in two unknowns,
544 * which we solve by exhaustive search.
545 */
546 vinitrd_start = round_pgup(dsi.v_end);
547 vinitrd_end = vinitrd_start + initrd_len;
548 vphysmap_start = round_pgup(vinitrd_end);
549 vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
550 vstartinfo_start = round_pgup(vphysmap_end);
551 vstartinfo_end = vstartinfo_start + PAGE_SIZE;
552 vstoreinfo_start = vstartinfo_end;
553 vstoreinfo_end = vstoreinfo_start + PAGE_SIZE;
554 vconsole_start = vstoreinfo_end;
555 vconsole_end = vconsole_start + PAGE_SIZE;
556 vpt_start = vconsole_end;
558 for ( nr_pt_pages = 2; ; nr_pt_pages++ )
559 {
560 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
561 vstack_start = vpt_end;
562 vstack_end = vstack_start + PAGE_SIZE;
563 v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
564 if ( (v_end - vstack_end) < (512UL << 10) )
565 v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
566 #if defined(__i386__)
567 if ( dsi.pae_kernel )
568 {
569 /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
570 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >>
571 L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
572 break;
573 }
574 else
575 {
576 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
577 L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
578 break;
579 }
580 #endif
581 #if defined(__x86_64__)
582 #define NR(_l,_h,_s) \
583 (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
584 ((_l) & ~((1UL<<(_s))-1))) >> (_s))
585 if ( (1 + /* # L4 */
586 NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
587 NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
588 NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
589 <= nr_pt_pages )
590 break;
591 #endif
592 }
594 #define _p(a) ((void *) (a))
596 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
597 " Loaded kernel: %p->%p\n"
598 " Init. ramdisk: %p->%p\n"
599 " Phys-Mach map: %p->%p\n"
600 " Start info: %p->%p\n"
601 " Store page: %p->%p\n"
602 " Console page: %p->%p\n"
603 " Page tables: %p->%p\n"
604 " Boot stack: %p->%p\n"
605 " TOTAL: %p->%p\n",
606 _p(dsi.v_kernstart), _p(dsi.v_kernend),
607 _p(vinitrd_start), _p(vinitrd_end),
608 _p(vphysmap_start), _p(vphysmap_end),
609 _p(vstartinfo_start), _p(vstartinfo_end),
610 _p(vstoreinfo_start), _p(vstoreinfo_end),
611 _p(vconsole_start), _p(vconsole_end),
612 _p(vpt_start), _p(vpt_end),
613 _p(vstack_start), _p(vstack_end),
614 _p(dsi.v_start), _p(v_end));
615 printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
617 if ( ((v_end - dsi.v_start)>>PAGE_SHIFT) > nr_pages )
618 {
619 PERROR("Initial guest OS requires too much space\n"
620 "(%luMB is greater than %luMB limit)\n",
621 (v_end-dsi.v_start)>>20, nr_pages>>(20-PAGE_SHIFT));
622 goto error_out;
623 }
625 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
626 {
627 PERROR("Could not allocate memory");
628 goto error_out;
629 }
631 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
632 {
633 PERROR("Could not get the page frame list");
634 goto error_out;
635 }
637 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
638 &dsi);
640 /* Load the initial ramdisk image. */
641 if ( initrd_len != 0 )
642 {
643 for ( i = (vinitrd_start - dsi.v_start);
644 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
645 {
646 char page[PAGE_SIZE];
647 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
648 {
649 PERROR("Error reading initrd image, could not");
650 goto error_out;
651 }
652 xc_copy_to_domain_page(xc_handle, dom,
653 page_array[i>>PAGE_SHIFT], page);
654 }
655 }
657 /* setup page tables */
658 #if defined(__i386__)
659 if (dsi.pae_kernel)
660 rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
661 dsi.v_start, v_end,
662 page_array, vpt_start, vpt_end,
663 shadow_mode_enabled);
664 else
665 rc = setup_pg_tables(xc_handle, dom, ctxt,
666 dsi.v_start, v_end,
667 page_array, vpt_start, vpt_end,
668 shadow_mode_enabled);
669 #endif
670 #if defined(__x86_64__)
671 rc = setup_pg_tables_64(xc_handle, dom, ctxt,
672 dsi.v_start, v_end,
673 page_array, vpt_start, vpt_end,
674 shadow_mode_enabled);
675 #endif
676 if (0 != rc)
677 goto error_out;
679 #if defined(__i386__)
680 /*
681 * Pin down l2tab addr as page dir page - causes hypervisor to provide
682 * correct protection for the page
683 */
684 if ( !shadow_mode_enabled )
685 {
686 if ( dsi.pae_kernel )
687 {
688 if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
689 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
690 goto error_out;
691 }
692 else
693 {
694 if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
695 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
696 goto error_out;
697 }
698 }
699 #endif
701 #if defined(__x86_64__)
702 /*
703 * Pin down l4tab addr as page dir page - causes hypervisor to provide
704 * correct protection for the page
705 */
706 if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
707 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
708 goto error_out;
709 #endif
711 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
712 goto error_out;
714 /* Write the phys->machine and machine->phys table entries. */
715 physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
716 physmap = physmap_e = xc_map_foreign_range(
717 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
718 page_array[physmap_pfn++]);
720 for ( count = 0; count < nr_pages; count++ )
721 {
722 if ( xc_add_mmu_update(
723 xc_handle, mmu,
724 ((uint64_t)page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
725 count) )
726 {
727 fprintf(stderr,"m2p update failure p=%lx m=%lx\n",
728 count, page_array[count]);
729 munmap(physmap, PAGE_SIZE);
730 goto error_out;
731 }
732 *physmap_e++ = page_array[count];
733 if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 )
734 {
735 munmap(physmap, PAGE_SIZE);
736 physmap = physmap_e = xc_map_foreign_range(
737 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
738 page_array[physmap_pfn++]);
739 }
740 }
741 munmap(physmap, PAGE_SIZE);
743 /* Send the page update requests down to the hypervisor. */
744 if ( xc_finish_mmu_updates(xc_handle, mmu) )
745 goto error_out;
747 if ( shadow_mode_enabled )
748 {
749 struct xen_reserved_phys_area xrpa;
751 /* Enable shadow translate mode */
752 if ( xc_shadow_control(xc_handle, dom,
753 DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE,
754 NULL, 0, NULL) < 0 )
755 {
756 PERROR("Could not enable translation mode");
757 goto error_out;
758 }
760 /* Find the shared info frame. It's guaranteed to be at the
761 start of the PFN hole. */
762 xrpa.domid = dom;
763 xrpa.idx = 0;
764 rc = xc_memory_op(xc_handle, XENMEM_reserved_phys_area, &xrpa);
765 if ( rc != 0 )
766 {
767 PERROR("Cannot find shared info pfn");
768 goto error_out;
769 }
770 guest_shared_info_mfn = xrpa.first_gpfn;
771 }
772 else
773 {
774 guest_shared_info_mfn = shared_info_frame;
775 }
777 *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT];
778 *console_mfn = page_array[(vconsole_start-dsi.v_start) >> PAGE_SHIFT];
779 if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) ||
780 xc_clear_domain_page(xc_handle, dom, *console_mfn) )
781 goto error_out;
782 if ( shadow_mode_enabled )
783 {
784 guest_store_mfn = (vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT;
785 guest_console_mfn = (vconsole_start-dsi.v_start) >> PAGE_SHIFT;
786 }
787 else
788 {
789 guest_store_mfn = *store_mfn;
790 guest_console_mfn = *console_mfn;
791 }
793 start_info = xc_map_foreign_range(
794 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
795 page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]);
796 /*shared_info, start_info */
797 memset(start_info, 0, sizeof(*start_info));
798 rc = xc_version(xc_handle, XENVER_version, NULL);
799 sprintf(start_info->magic, "xen-%i.%i-x86_%d%s",
800 rc >> 16, rc & (0xFFFF), (unsigned int)sizeof(long)*8,
801 dsi.pae_kernel ? "p" : "");
802 start_info->nr_pages = nr_pages;
803 start_info->shared_info = guest_shared_info_mfn << PAGE_SHIFT;
804 start_info->flags = flags;
805 start_info->pt_base = vpt_start;
806 start_info->nr_pt_frames = nr_pt_pages;
807 start_info->mfn_list = vphysmap_start;
808 start_info->store_mfn = guest_store_mfn;
809 start_info->store_evtchn = store_evtchn;
810 start_info->console_mfn = guest_console_mfn;
811 start_info->console_evtchn = console_evtchn;
812 if ( initrd_len != 0 )
813 {
814 start_info->mod_start = vinitrd_start;
815 start_info->mod_len = initrd_len;
816 }
817 if ( cmdline != NULL )
818 {
819 strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
820 start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
821 }
822 munmap(start_info, PAGE_SIZE);
824 /* shared_info page starts its life empty. */
825 shared_info = xc_map_foreign_range(
826 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
827 memset(shared_info, 0, sizeof(shared_info_t));
828 /* Mask all upcalls... */
829 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
830 shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
832 munmap(shared_info, PAGE_SIZE);
834 /* Send the page update requests down to the hypervisor. */
835 if ( xc_finish_mmu_updates(xc_handle, mmu) )
836 goto error_out;
838 p = strstr(dsi.xen_guest_string, "HYPERCALL_PAGE=");
839 if ( p != NULL )
840 {
841 p += strlen("HYPERCALL_PAGE=");
842 hypercall_pfn = strtoul(p, NULL, 16);
843 if ( hypercall_pfn >= nr_pages )
844 goto error_out;
845 op.u.hypercall_init.domain = (domid_t)dom;
846 op.u.hypercall_init.mfn = page_array[hypercall_pfn];
847 op.cmd = DOM0_HYPERCALL_INIT;
848 if ( xc_dom0_op(xc_handle, &op) )
849 goto error_out;
850 }
852 free(mmu);
853 free(page_array);
855 *pvsi = vstartinfo_start;
856 *pvss = vstack_start;
857 *pvke = dsi.v_kernentry;
859 return 0;
861 error_out:
862 free(mmu);
863 free(page_array);
864 return -1;
865 }
866 #endif
868 int xc_linux_build(int xc_handle,
869 uint32_t domid,
870 const char *image_name,
871 const char *ramdisk_name,
872 const char *cmdline,
873 unsigned long flags,
874 unsigned int store_evtchn,
875 unsigned long *store_mfn,
876 unsigned int console_evtchn,
877 unsigned long *console_mfn)
878 {
879 dom0_op_t launch_op;
880 DECLARE_DOM0_OP;
881 int initrd_fd = -1;
882 gzFile initrd_gfd = NULL;
883 int rc, i;
884 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
885 unsigned long nr_pages;
886 char *image = NULL;
887 unsigned long image_size, initrd_size=0;
888 unsigned long vstartinfo_start, vkern_entry, vstack_start;
890 if ( (nr_pages = get_tot_pages(xc_handle, domid)) < 0 )
891 {
892 PERROR("Could not find total pages for domain");
893 goto error_out;
894 }
896 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
897 goto error_out;
899 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
900 {
901 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
902 {
903 PERROR("Could not open the initial ramdisk image");
904 goto error_out;
905 }
907 initrd_size = xc_get_filesz(initrd_fd);
909 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
910 {
911 PERROR("Could not allocate decompression state for initrd");
912 goto error_out;
913 }
914 }
916 #ifdef VALGRIND
917 memset(&st_ctxt, 0, sizeof(st_ctxt));
918 #endif
920 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
921 {
922 PERROR("%s: ctxt mlock failed", __func__);
923 return 1;
924 }
926 op.cmd = DOM0_GETDOMAININFO;
927 op.u.getdomaininfo.domain = (domid_t)domid;
928 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
929 ((uint16_t)op.u.getdomaininfo.domain != domid) )
930 {
931 PERROR("Could not get info on domain");
932 goto error_out;
933 }
935 memset(ctxt, 0, sizeof(*ctxt));
937 if ( setup_guest(xc_handle, domid, image, image_size,
938 initrd_gfd, initrd_size, nr_pages,
939 &vstartinfo_start, &vkern_entry,
940 &vstack_start, ctxt, cmdline,
941 op.u.getdomaininfo.shared_info_frame,
942 flags, store_evtchn, store_mfn,
943 console_evtchn, console_mfn) < 0 )
944 {
945 ERROR("Error constructing guest OS");
946 goto error_out;
947 }
949 if ( initrd_fd >= 0 )
950 close(initrd_fd);
951 if ( initrd_gfd )
952 gzclose(initrd_gfd);
953 free(image);
955 #ifdef __ia64__
956 /* based on new_thread in xen/arch/ia64/domain.c */
957 ctxt->flags = 0;
958 ctxt->shared.flags = flags;
959 ctxt->shared.start_info_pfn = nr_pages - 3; /* metaphysical */
960 ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
961 ctxt->regs.cr_iip = vkern_entry;
962 ctxt->regs.cr_ifs = 1UL << 63;
963 ctxt->regs.ar_fpsr = xc_ia64_fpsr_default();
964 /* currently done by hypervisor, should move here */
965 /* ctxt->regs.r28 = dom_fw_setup(); */
966 ctxt->vcpu.privregs = 0;
967 ctxt->sys_pgnr = 3;
968 i = 0; /* silence unused variable warning */
969 #else /* x86 */
970 /*
971 * Initial register values:
972 * DS,ES,FS,GS = FLAT_KERNEL_DS
973 * CS:EIP = FLAT_KERNEL_CS:start_pc
974 * SS:ESP = FLAT_KERNEL_DS:start_stack
975 * ESI = start_info
976 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
977 * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
978 */
979 ctxt->user_regs.ds = FLAT_KERNEL_DS;
980 ctxt->user_regs.es = FLAT_KERNEL_DS;
981 ctxt->user_regs.fs = FLAT_KERNEL_DS;
982 ctxt->user_regs.gs = FLAT_KERNEL_DS;
983 ctxt->user_regs.ss = FLAT_KERNEL_SS;
984 ctxt->user_regs.cs = FLAT_KERNEL_CS;
985 ctxt->user_regs.eip = vkern_entry;
986 ctxt->user_regs.esp = vstack_start + PAGE_SIZE;
987 ctxt->user_regs.esi = vstartinfo_start;
988 ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
990 ctxt->flags = VGCF_IN_KERNEL;
992 /* FPU is set up to default initial state. */
993 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
995 /* Virtual IDT is empty at start-of-day. */
996 for ( i = 0; i < 256; i++ )
997 {
998 ctxt->trap_ctxt[i].vector = i;
999 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
1002 /* No LDT. */
1003 ctxt->ldt_ents = 0;
1005 /* Use the default Xen-provided GDT. */
1006 ctxt->gdt_ents = 0;
1008 /* Ring 1 stack is the initial stack. */
1009 ctxt->kernel_ss = FLAT_KERNEL_SS;
1010 ctxt->kernel_sp = vstack_start + PAGE_SIZE;
1012 /* No debugging. */
1013 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
1015 /* No callback handlers. */
1016 #if defined(__i386__)
1017 ctxt->event_callback_cs = FLAT_KERNEL_CS;
1018 ctxt->event_callback_eip = 0;
1019 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
1020 ctxt->failsafe_callback_eip = 0;
1021 #elif defined(__x86_64__)
1022 ctxt->event_callback_eip = 0;
1023 ctxt->failsafe_callback_eip = 0;
1024 ctxt->syscall_callback_eip = 0;
1025 #endif
1026 #endif /* x86 */
1028 memset( &launch_op, 0, sizeof(launch_op) );
1030 launch_op.u.setvcpucontext.domain = (domid_t)domid;
1031 launch_op.u.setvcpucontext.vcpu = 0;
1032 launch_op.u.setvcpucontext.ctxt = ctxt;
1034 launch_op.cmd = DOM0_SETVCPUCONTEXT;
1035 rc = xc_dom0_op(xc_handle, &launch_op);
1037 return rc;
1039 error_out:
1040 if ( initrd_gfd != NULL )
1041 gzclose(initrd_gfd);
1042 else if ( initrd_fd >= 0 )
1043 close(initrd_fd);
1044 free(image);
1045 return -1;
1048 /*
1049 * Local variables:
1050 * mode: C
1051 * c-set-style: "BSD"
1052 * c-basic-offset: 4
1053 * tab-width: 4
1054 * indent-tabs-mode: nil
1055 * End:
1056 */