direct-io.hg

view tools/libxc/xc_linux_build.c @ 8500:dd5649730b32

Fix a couple of bogus dom0_op names:
setdomaininfo -> setvcpucontext
pincpudomain -> setvcpuaffinity

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Jan 06 12:53:19 2006 +0100 (2006-01-06)
parents f89906acd9f6
children c503c3887971 0a69deed5e1f
line source
1 /******************************************************************************
2 * xc_linux_build.c
3 */
5 #include "xg_private.h"
6 #include <xenctrl.h>
8 #if defined(__i386__)
9 #define ELFSIZE 32
10 #endif
12 #if defined(__x86_64__) || defined(__ia64__)
13 #define ELFSIZE 64
14 #endif
16 #include "xc_elf.h"
17 #include "xc_aout9.h"
18 #include <stdlib.h>
19 #include <unistd.h>
20 #include <zlib.h>
22 #if defined(__i386__)
23 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
24 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
25 #define L3_PROT (_PAGE_PRESENT)
26 #endif
28 #if defined(__x86_64__)
29 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
30 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
31 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
32 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
33 #endif
35 #ifdef __ia64__
36 #define already_built(ctxt) (0)
37 #define get_tot_pages xc_get_max_pages
38 #else
39 #define already_built(ctxt) ((ctxt)->ctrlreg[3] != 0)
40 #define get_tot_pages xc_get_tot_pages
41 #endif
43 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
44 #define round_pgdown(_p) ((_p)&PAGE_MASK)
46 #ifdef __ia64__
47 #define probe_aout9(image,image_size,load_funcs) 1
48 #endif
50 static int probeimageformat(char *image,
51 unsigned long image_size,
52 struct load_funcs *load_funcs)
53 {
54 if ( probe_elf(image, image_size, load_funcs) &&
55 probe_bin(image, image_size, load_funcs) &&
56 probe_aout9(image, image_size, load_funcs) )
57 {
58 ERROR( "Unrecognized image format" );
59 return -EINVAL;
60 }
62 return 0;
63 }
65 #define alloc_pt(ltab, vltab) \
66 do { \
67 ltab = (uint64_t)page_array[ppt_alloc++] << PAGE_SHIFT; \
68 if ( vltab != NULL ) \
69 munmap(vltab, PAGE_SIZE); \
70 if ( (vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, \
71 PROT_READ|PROT_WRITE, \
72 ltab >> PAGE_SHIFT)) == NULL ) \
73 goto error_out; \
74 memset(vltab, 0, PAGE_SIZE); \
75 } while ( 0 )
77 #if defined(__i386__)
79 static int setup_pg_tables(int xc_handle, uint32_t dom,
80 vcpu_guest_context_t *ctxt,
81 unsigned long dsi_v_start,
82 unsigned long v_end,
83 unsigned long *page_array,
84 unsigned long vpt_start,
85 unsigned long vpt_end)
86 {
87 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
88 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
89 unsigned long l1tab = 0;
90 unsigned long l2tab = 0;
91 unsigned long ppt_alloc;
92 unsigned long count;
94 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
95 alloc_pt(l2tab, vl2tab);
96 vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
97 ctxt->ctrlreg[3] = l2tab;
99 for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++ )
100 {
101 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
102 {
103 alloc_pt(l1tab, vl1tab);
104 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
105 *vl2e++ = l1tab | L2_PROT;
106 }
108 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
109 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
110 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
111 *vl1e &= ~_PAGE_RW;
112 vl1e++;
113 }
114 munmap(vl1tab, PAGE_SIZE);
115 munmap(vl2tab, PAGE_SIZE);
116 return 0;
118 error_out:
119 if (vl1tab)
120 munmap(vl1tab, PAGE_SIZE);
121 if (vl2tab)
122 munmap(vl2tab, PAGE_SIZE);
123 return -1;
124 }
126 static int setup_pg_tables_pae(int xc_handle, uint32_t dom,
127 vcpu_guest_context_t *ctxt,
128 unsigned long dsi_v_start,
129 unsigned long v_end,
130 unsigned long *page_array,
131 unsigned long vpt_start,
132 unsigned long vpt_end)
133 {
134 l1_pgentry_64_t *vl1tab = NULL, *vl1e = NULL;
135 l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
136 l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
137 uint64_t l1tab, l2tab, l3tab;
138 unsigned long ppt_alloc, count, nmfn;
140 /* First allocate page for page dir. */
141 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
143 if ( page_array[ppt_alloc] > 0xfffff )
144 {
145 nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
146 if ( nmfn == 0 )
147 {
148 fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
149 goto error_out;
150 }
151 page_array[ppt_alloc] = nmfn;
152 }
154 alloc_pt(l3tab, vl3tab);
155 vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
156 ctxt->ctrlreg[3] = l3tab;
158 for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++)
159 {
160 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
161 {
162 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
163 {
164 alloc_pt(l2tab, vl2tab);
165 vl2e = &vl2tab[l2_table_offset_pae(
166 dsi_v_start + (count << PAGE_SHIFT))];
167 *vl3e++ = l2tab | L3_PROT;
168 }
170 alloc_pt(l1tab, vl1tab);
171 vl1e = &vl1tab[l1_table_offset_pae(
172 dsi_v_start + (count << PAGE_SHIFT))];
173 *vl2e++ = l1tab | L2_PROT;
174 }
176 *vl1e = ((uint64_t)page_array[count] << PAGE_SHIFT) | L1_PROT;
177 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
178 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
179 *vl1e &= ~_PAGE_RW;
180 vl1e++;
181 }
183 munmap(vl1tab, PAGE_SIZE);
184 munmap(vl2tab, PAGE_SIZE);
185 munmap(vl3tab, PAGE_SIZE);
186 return 0;
188 error_out:
189 if (vl1tab)
190 munmap(vl1tab, PAGE_SIZE);
191 if (vl2tab)
192 munmap(vl2tab, PAGE_SIZE);
193 if (vl3tab)
194 munmap(vl3tab, PAGE_SIZE);
195 return -1;
196 }
198 #endif
200 #if defined(__x86_64__)
202 static int setup_pg_tables_64(int xc_handle, uint32_t dom,
203 vcpu_guest_context_t *ctxt,
204 unsigned long dsi_v_start,
205 unsigned long v_end,
206 unsigned long *page_array,
207 unsigned long vpt_start,
208 unsigned long vpt_end)
209 {
210 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
211 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
212 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
213 l4_pgentry_t *vl4tab=NULL, *vl4e=NULL;
214 unsigned long l2tab = 0;
215 unsigned long l1tab = 0;
216 unsigned long l3tab = 0;
217 unsigned long l4tab = 0;
218 unsigned long ppt_alloc;
219 unsigned long count;
221 /* First allocate page for page dir. */
222 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
223 alloc_pt(l4tab, vl4tab);
224 vl4e = &vl4tab[l4_table_offset(dsi_v_start)];
225 ctxt->ctrlreg[3] = l4tab;
227 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
228 {
229 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
230 {
231 alloc_pt(l1tab, vl1tab);
233 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
234 {
235 alloc_pt(l2tab, vl2tab);
236 if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
237 {
238 alloc_pt(l3tab, vl3tab);
239 vl3e = &vl3tab[l3_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
240 *vl4e = l3tab | L4_PROT;
241 vl4e++;
242 }
243 vl2e = &vl2tab[l2_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
244 *vl3e = l2tab | L3_PROT;
245 vl3e++;
246 }
247 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
248 *vl2e = l1tab | L2_PROT;
249 vl2e++;
250 }
252 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
253 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
254 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
255 {
256 *vl1e &= ~_PAGE_RW;
257 }
258 vl1e++;
259 }
261 munmap(vl1tab, PAGE_SIZE);
262 munmap(vl2tab, PAGE_SIZE);
263 munmap(vl3tab, PAGE_SIZE);
264 munmap(vl4tab, PAGE_SIZE);
265 return 0;
267 error_out:
268 if (vl1tab)
269 munmap(vl1tab, PAGE_SIZE);
270 if (vl2tab)
271 munmap(vl2tab, PAGE_SIZE);
272 if (vl3tab)
273 munmap(vl3tab, PAGE_SIZE);
274 if (vl4tab)
275 munmap(vl4tab, PAGE_SIZE);
276 return -1;
277 }
278 #endif
280 #ifdef __ia64__
281 extern unsigned long xc_ia64_fpsr_default(void);
283 static int setup_guest(int xc_handle,
284 uint32_t dom,
285 char *image, unsigned long image_size,
286 gzFile initrd_gfd, unsigned long initrd_len,
287 unsigned long nr_pages,
288 unsigned long *pvsi, unsigned long *pvke,
289 unsigned long *pvss, vcpu_guest_context_t *ctxt,
290 const char *cmdline,
291 unsigned long shared_info_frame,
292 unsigned long flags,
293 unsigned int store_evtchn, unsigned long *store_mfn,
294 unsigned int console_evtchn, unsigned long *console_mfn)
295 {
296 unsigned long *page_array = NULL;
297 struct load_funcs load_funcs;
298 struct domain_setup_info dsi;
299 unsigned long vinitrd_start;
300 unsigned long vinitrd_end;
301 unsigned long v_end;
302 unsigned long start_page, pgnr;
303 start_info_t *start_info;
304 int rc;
305 unsigned long i;
307 rc = probeimageformat(image, image_size, &load_funcs);
308 if ( rc != 0 )
309 goto error_out;
311 memset(&dsi, 0, sizeof(struct domain_setup_info));
313 rc = (load_funcs.parseimage)(image, image_size, &dsi);
314 if ( rc != 0 )
315 goto error_out;
317 dsi.v_start = round_pgdown(dsi.v_start);
318 vinitrd_start = round_pgup(dsi.v_end);
319 vinitrd_end = vinitrd_start + initrd_len;
320 v_end = round_pgup(vinitrd_end);
322 start_page = dsi.v_start >> PAGE_SHIFT;
323 pgnr = (v_end - dsi.v_start) >> PAGE_SHIFT;
324 if ( (page_array = malloc(pgnr * sizeof(unsigned long))) == NULL )
325 {
326 PERROR("Could not allocate memory");
327 goto error_out;
328 }
330 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, start_page, pgnr) != pgnr )
331 {
332 PERROR("Could not get the page frame list");
333 goto error_out;
334 }
336 #define _p(a) ((void *) (a))
338 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
339 " Loaded kernel: %p->%p\n"
340 " Init. ramdisk: %p->%p\n"
341 " TOTAL: %p->%p\n",
342 _p(dsi.v_kernstart), _p(dsi.v_kernend),
343 _p(vinitrd_start), _p(vinitrd_end),
344 _p(dsi.v_start), _p(v_end));
345 printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
347 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
348 &dsi);
350 /* Load the initial ramdisk image. */
351 if ( initrd_len != 0 )
352 {
353 for ( i = (vinitrd_start - dsi.v_start);
354 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
355 {
356 char page[PAGE_SIZE];
357 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
358 {
359 PERROR("Error reading initrd image, could not");
360 goto error_out;
361 }
362 xc_copy_to_domain_page(xc_handle, dom,
363 page_array[i>>PAGE_SHIFT], page);
364 }
365 }
368 *pvke = dsi.v_kernentry;
370 /* Now need to retrieve machine pfn for system pages:
371 * start_info/store/console
372 */
373 pgnr = 3;
374 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array,
375 nr_pages - 3, pgnr) != pgnr )
376 {
377 PERROR("Could not get page frame for xenstore");
378 goto error_out;
379 }
381 *store_mfn = page_array[1];
382 *console_mfn = page_array[2];
383 printf("store_mfn: 0x%lx, console_mfn: 0x%lx\n",
384 (uint64_t)store_mfn, (uint64_t)console_mfn);
386 start_info = xc_map_foreign_range(
387 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[0]);
388 memset(start_info, 0, sizeof(*start_info));
389 rc = xc_version(xc_handle, XENVER_version, NULL);
390 sprintf(start_info->magic, "xen-%i.%i-ia64", rc >> 16, rc & (0xFFFF));
391 start_info->flags = flags;
392 start_info->store_mfn = nr_pages - 2;
393 start_info->store_evtchn = store_evtchn;
394 start_info->console_mfn = nr_pages - 1;
395 start_info->console_evtchn = console_evtchn;
396 start_info->nr_pages = nr_pages; // FIXME?: nr_pages - 2 ????
397 if ( initrd_len != 0 )
398 {
399 ctxt->initrd.start = vinitrd_start;
400 ctxt->initrd.size = initrd_len;
401 } else {
402 ctxt->initrd.start = 0;
403 ctxt->initrd.size = 0;
404 }
405 strncpy((char *)ctxt->cmdline, cmdline, IA64_COMMAND_LINE_SIZE);
406 ctxt->cmdline[IA64_COMMAND_LINE_SIZE-1] = '\0';
407 munmap(start_info, PAGE_SIZE);
409 free(page_array);
410 return 0;
412 error_out:
413 free(page_array);
414 return -1;
415 }
416 #else /* x86 */
417 static int setup_guest(int xc_handle,
418 uint32_t dom,
419 char *image, unsigned long image_size,
420 gzFile initrd_gfd, unsigned long initrd_len,
421 unsigned long nr_pages,
422 unsigned long *pvsi, unsigned long *pvke,
423 unsigned long *pvss, vcpu_guest_context_t *ctxt,
424 const char *cmdline,
425 unsigned long shared_info_frame,
426 unsigned long flags,
427 unsigned int store_evtchn, unsigned long *store_mfn,
428 unsigned int console_evtchn, unsigned long *console_mfn)
429 {
430 unsigned long *page_array = NULL;
431 unsigned long count, i;
432 start_info_t *start_info;
433 shared_info_t *shared_info;
434 xc_mmu_t *mmu = NULL;
435 int rc;
437 unsigned long nr_pt_pages;
438 unsigned long physmap_pfn;
439 unsigned long *physmap, *physmap_e;
441 struct load_funcs load_funcs;
442 struct domain_setup_info dsi;
443 unsigned long vinitrd_start;
444 unsigned long vinitrd_end;
445 unsigned long vphysmap_start;
446 unsigned long vphysmap_end;
447 unsigned long vstartinfo_start;
448 unsigned long vstartinfo_end;
449 unsigned long vstoreinfo_start;
450 unsigned long vstoreinfo_end;
451 unsigned long vconsole_start;
452 unsigned long vconsole_end;
453 unsigned long vstack_start;
454 unsigned long vstack_end;
455 unsigned long vpt_start;
456 unsigned long vpt_end;
457 unsigned long v_end;
459 rc = probeimageformat(image, image_size, &load_funcs);
460 if ( rc != 0 )
461 goto error_out;
463 memset(&dsi, 0, sizeof(struct domain_setup_info));
465 rc = (load_funcs.parseimage)(image, image_size, &dsi);
466 if ( rc != 0 )
467 goto error_out;
469 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
470 {
471 PERROR("Guest OS must load to a page boundary.\n");
472 goto error_out;
473 }
475 /*
476 * Why do we need this? The number of page-table frames depends on the
477 * size of the bootstrap address space. But the size of the address space
478 * depends on the number of page-table frames (since each one is mapped
479 * read-only). We have a pair of simultaneous equations in two unknowns,
480 * which we solve by exhaustive search.
481 */
482 vinitrd_start = round_pgup(dsi.v_end);
483 vinitrd_end = vinitrd_start + initrd_len;
484 vphysmap_start = round_pgup(vinitrd_end);
485 vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
486 vstartinfo_start = round_pgup(vphysmap_end);
487 vstartinfo_end = vstartinfo_start + PAGE_SIZE;
488 vstoreinfo_start = vstartinfo_end;
489 vstoreinfo_end = vstoreinfo_start + PAGE_SIZE;
490 vconsole_start = vstoreinfo_end;
491 vconsole_end = vconsole_start + PAGE_SIZE;
492 vpt_start = vconsole_end;
494 for ( nr_pt_pages = 2; ; nr_pt_pages++ )
495 {
496 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
497 vstack_start = vpt_end;
498 vstack_end = vstack_start + PAGE_SIZE;
499 v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
500 if ( (v_end - vstack_end) < (512UL << 10) )
501 v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
502 #if defined(__i386__)
503 if (dsi.pae_kernel) {
504 /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
505 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >>
506 L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
507 break;
508 } else {
509 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
510 L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
511 break;
512 }
513 #endif
514 #if defined(__x86_64__)
515 #define NR(_l,_h,_s) \
516 (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
517 ((_l) & ~((1UL<<(_s))-1))) >> (_s))
518 if ( (1 + /* # L4 */
519 NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
520 NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
521 NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
522 <= nr_pt_pages )
523 break;
524 #endif
525 }
527 #define _p(a) ((void *) (a))
529 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
530 " Loaded kernel: %p->%p\n"
531 " Init. ramdisk: %p->%p\n"
532 " Phys-Mach map: %p->%p\n"
533 " Start info: %p->%p\n"
534 " Store page: %p->%p\n"
535 " Console page: %p->%p\n"
536 " Page tables: %p->%p\n"
537 " Boot stack: %p->%p\n"
538 " TOTAL: %p->%p\n",
539 _p(dsi.v_kernstart), _p(dsi.v_kernend),
540 _p(vinitrd_start), _p(vinitrd_end),
541 _p(vphysmap_start), _p(vphysmap_end),
542 _p(vstartinfo_start), _p(vstartinfo_end),
543 _p(vstoreinfo_start), _p(vstoreinfo_end),
544 _p(vconsole_start), _p(vconsole_end),
545 _p(vpt_start), _p(vpt_end),
546 _p(vstack_start), _p(vstack_end),
547 _p(dsi.v_start), _p(v_end));
548 printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
550 if ( ((v_end - dsi.v_start)>>PAGE_SHIFT) > nr_pages )
551 {
552 PERROR("Initial guest OS requires too much space\n"
553 "(%luMB is greater than %luMB limit)\n",
554 (v_end-dsi.v_start)>>20, nr_pages>>(20-PAGE_SHIFT));
555 goto error_out;
556 }
558 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
559 {
560 PERROR("Could not allocate memory");
561 goto error_out;
562 }
564 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
565 {
566 PERROR("Could not get the page frame list");
567 goto error_out;
568 }
570 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
571 &dsi);
573 /* Load the initial ramdisk image. */
574 if ( initrd_len != 0 )
575 {
576 for ( i = (vinitrd_start - dsi.v_start);
577 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
578 {
579 char page[PAGE_SIZE];
580 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
581 {
582 PERROR("Error reading initrd image, could not");
583 goto error_out;
584 }
585 xc_copy_to_domain_page(xc_handle, dom,
586 page_array[i>>PAGE_SHIFT], page);
587 }
588 }
590 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
591 goto error_out;
593 /* setup page tables */
594 #if defined(__i386__)
595 if (dsi.pae_kernel)
596 rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
597 dsi.v_start, v_end,
598 page_array, vpt_start, vpt_end);
599 else
600 rc = setup_pg_tables(xc_handle, dom, ctxt,
601 dsi.v_start, v_end,
602 page_array, vpt_start, vpt_end);
603 #endif
604 #if defined(__x86_64__)
605 rc = setup_pg_tables_64(xc_handle, dom, ctxt,
606 dsi.v_start, v_end,
607 page_array, vpt_start, vpt_end);
608 #endif
609 if (0 != rc)
610 goto error_out;
612 /* Write the phys->machine and machine->phys table entries. */
613 physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
614 physmap = physmap_e = xc_map_foreign_range(
615 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
616 page_array[physmap_pfn++]);
618 for ( count = 0; count < nr_pages; count++ )
619 {
620 if ( xc_add_mmu_update(
621 xc_handle, mmu,
622 ((uint64_t)page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
623 count) )
624 {
625 fprintf(stderr,"m2p update failure p=%lx m=%lx\n",
626 count, page_array[count]);
627 munmap(physmap, PAGE_SIZE);
628 goto error_out;
629 }
630 *physmap_e++ = page_array[count];
631 if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 )
632 {
633 munmap(physmap, PAGE_SIZE);
634 physmap = physmap_e = xc_map_foreign_range(
635 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
636 page_array[physmap_pfn++]);
637 }
638 }
639 munmap(physmap, PAGE_SIZE);
641 #if defined(__i386__)
642 /*
643 * Pin down l2tab addr as page dir page - causes hypervisor to provide
644 * correct protection for the page
645 */
646 if (dsi.pae_kernel) {
647 if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
648 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
649 goto error_out;
650 } else {
651 if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
652 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
653 goto error_out;
654 }
655 #endif
657 #if defined(__x86_64__)
658 /*
659 * Pin down l4tab addr as page dir page - causes hypervisor to provide
660 * correct protection for the page
661 */
662 if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
663 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
664 goto error_out;
665 #endif
667 *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT];
668 *console_mfn = page_array[(vconsole_start-dsi.v_start) >> PAGE_SHIFT];
669 if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) ||
670 xc_clear_domain_page(xc_handle, dom, *console_mfn) )
671 goto error_out;
673 start_info = xc_map_foreign_range(
674 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
675 page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]);
676 memset(start_info, 0, sizeof(*start_info));
677 rc = xc_version(xc_handle, XENVER_version, NULL);
678 sprintf(start_info->magic, "xen-%i.%i-x86_%d%s",
679 rc >> 16, rc & (0xFFFF), (unsigned int)sizeof(long)*8,
680 dsi.pae_kernel ? "p" : "");
681 start_info->nr_pages = nr_pages;
682 start_info->shared_info = shared_info_frame << PAGE_SHIFT;
683 start_info->flags = flags;
684 start_info->pt_base = vpt_start;
685 start_info->nr_pt_frames = nr_pt_pages;
686 start_info->mfn_list = vphysmap_start;
687 start_info->store_mfn = *store_mfn;
688 start_info->store_evtchn = store_evtchn;
689 start_info->console_mfn = *console_mfn;
690 start_info->console_evtchn = console_evtchn;
691 if ( initrd_len != 0 )
692 {
693 start_info->mod_start = vinitrd_start;
694 start_info->mod_len = initrd_len;
695 }
696 strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
697 start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
698 munmap(start_info, PAGE_SIZE);
700 /* shared_info page starts its life empty. */
701 shared_info = xc_map_foreign_range(
702 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
703 memset(shared_info, 0, sizeof(shared_info_t));
704 /* Mask all upcalls... */
705 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
706 shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
708 munmap(shared_info, PAGE_SIZE);
710 /* Send the page update requests down to the hypervisor. */
711 if ( xc_finish_mmu_updates(xc_handle, mmu) )
712 goto error_out;
714 free(mmu);
715 free(page_array);
717 *pvsi = vstartinfo_start;
718 *pvss = vstack_start;
719 *pvke = dsi.v_kernentry;
721 return 0;
723 error_out:
724 free(mmu);
725 free(page_array);
726 return -1;
727 }
728 #endif
730 int xc_linux_build(int xc_handle,
731 uint32_t domid,
732 const char *image_name,
733 const char *ramdisk_name,
734 const char *cmdline,
735 unsigned long flags,
736 unsigned int store_evtchn,
737 unsigned long *store_mfn,
738 unsigned int console_evtchn,
739 unsigned long *console_mfn)
740 {
741 dom0_op_t launch_op;
742 DECLARE_DOM0_OP;
743 int initrd_fd = -1;
744 gzFile initrd_gfd = NULL;
745 int rc, i;
746 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
747 unsigned long nr_pages;
748 char *image = NULL;
749 unsigned long image_size, initrd_size=0;
750 unsigned long vstartinfo_start, vkern_entry, vstack_start;
752 if ( (nr_pages = get_tot_pages(xc_handle, domid)) < 0 )
753 {
754 PERROR("Could not find total pages for domain");
755 goto error_out;
756 }
758 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
759 goto error_out;
761 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
762 {
763 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
764 {
765 PERROR("Could not open the initial ramdisk image");
766 goto error_out;
767 }
769 initrd_size = xc_get_filesz(initrd_fd);
771 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
772 {
773 PERROR("Could not allocate decompression state for initrd");
774 goto error_out;
775 }
776 }
778 #ifdef VALGRIND
779 memset(&st_ctxt, 0, sizeof(st_ctxt));
780 #endif
782 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
783 {
784 PERROR("%s: ctxt mlock failed", __func__);
785 return 1;
786 }
788 op.cmd = DOM0_GETDOMAININFO;
789 op.u.getdomaininfo.domain = (domid_t)domid;
790 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
791 ((uint16_t)op.u.getdomaininfo.domain != domid) )
792 {
793 PERROR("Could not get info on domain");
794 goto error_out;
795 }
797 if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) )
798 {
799 PERROR("Could not get vcpu context");
800 goto error_out;
801 }
803 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) || already_built(ctxt) )
804 {
805 ERROR("Domain is already constructed");
806 goto error_out;
807 }
809 if ( setup_guest(xc_handle, domid, image, image_size,
810 initrd_gfd, initrd_size, nr_pages,
811 &vstartinfo_start, &vkern_entry,
812 &vstack_start, ctxt, cmdline,
813 op.u.getdomaininfo.shared_info_frame,
814 flags, store_evtchn, store_mfn,
815 console_evtchn, console_mfn) < 0 )
816 {
817 ERROR("Error constructing guest OS");
818 goto error_out;
819 }
821 if ( initrd_fd >= 0 )
822 close(initrd_fd);
823 if ( initrd_gfd )
824 gzclose(initrd_gfd);
825 free(image);
827 #ifdef __ia64__
828 /* based on new_thread in xen/arch/ia64/domain.c */
829 ctxt->flags = 0;
830 ctxt->shared.flags = flags;
831 ctxt->shared.start_info_pfn = nr_pages - 3; /* metaphysical */
832 ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
833 ctxt->regs.cr_iip = vkern_entry;
834 ctxt->regs.cr_ifs = 1UL << 63;
835 ctxt->regs.ar_fpsr = xc_ia64_fpsr_default();
836 /* currently done by hypervisor, should move here */
837 /* ctxt->regs.r28 = dom_fw_setup(); */
838 ctxt->vcpu.privregs = 0;
839 ctxt->sys_pgnr = 3;
840 i = 0; /* silence unused variable warning */
841 #else /* x86 */
842 /*
843 * Initial register values:
844 * DS,ES,FS,GS = FLAT_KERNEL_DS
845 * CS:EIP = FLAT_KERNEL_CS:start_pc
846 * SS:ESP = FLAT_KERNEL_DS:start_stack
847 * ESI = start_info
848 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
849 * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
850 */
851 ctxt->user_regs.ds = FLAT_KERNEL_DS;
852 ctxt->user_regs.es = FLAT_KERNEL_DS;
853 ctxt->user_regs.fs = FLAT_KERNEL_DS;
854 ctxt->user_regs.gs = FLAT_KERNEL_DS;
855 ctxt->user_regs.ss = FLAT_KERNEL_SS;
856 ctxt->user_regs.cs = FLAT_KERNEL_CS;
857 ctxt->user_regs.eip = vkern_entry;
858 ctxt->user_regs.esp = vstack_start + PAGE_SIZE;
859 ctxt->user_regs.esi = vstartinfo_start;
860 ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
862 /* FPU is set up to default initial state. */
863 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
865 /* Virtual IDT is empty at start-of-day. */
866 for ( i = 0; i < 256; i++ )
867 {
868 ctxt->trap_ctxt[i].vector = i;
869 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
870 }
872 /* No LDT. */
873 ctxt->ldt_ents = 0;
875 /* Use the default Xen-provided GDT. */
876 ctxt->gdt_ents = 0;
878 /* Ring 1 stack is the initial stack. */
879 ctxt->kernel_ss = FLAT_KERNEL_SS;
880 ctxt->kernel_sp = vstack_start + PAGE_SIZE;
882 /* No debugging. */
883 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
885 /* No callback handlers. */
886 #if defined(__i386__)
887 ctxt->event_callback_cs = FLAT_KERNEL_CS;
888 ctxt->event_callback_eip = 0;
889 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
890 ctxt->failsafe_callback_eip = 0;
891 #elif defined(__x86_64__)
892 ctxt->event_callback_eip = 0;
893 ctxt->failsafe_callback_eip = 0;
894 ctxt->syscall_callback_eip = 0;
895 #endif
896 #endif /* x86 */
898 memset( &launch_op, 0, sizeof(launch_op) );
900 launch_op.u.setvcpucontext.domain = (domid_t)domid;
901 launch_op.u.setvcpucontext.vcpu = 0;
902 launch_op.u.setvcpucontext.ctxt = ctxt;
904 launch_op.cmd = DOM0_SETVCPUCONTEXT;
905 rc = xc_dom0_op(xc_handle, &launch_op);
907 return rc;
909 error_out:
910 if ( initrd_gfd != NULL )
911 gzclose(initrd_gfd);
912 else if ( initrd_fd >= 0 )
913 close(initrd_fd);
914 free(image);
915 return -1;
916 }
918 /*
919 * Local variables:
920 * mode: C
921 * c-set-style: "BSD"
922 * c-basic-offset: 4
923 * tab-width: 4
924 * indent-tabs-mode: nil
925 * End:
926 */