direct-io.hg

view tools/libxc/xc_linux_restore.c @ 3435:0fd048d86eed

bitkeeper revision 1.1159.220.3 (41e670c37jmaTxUns3KlvsbVRCg-UA)

The getdomaininfo hypercall now listens to the exec_domain parameter
that was already passed to it, and performs some basic sanity checking.

Added exec_domain (aka vcpu) parameters to xc_domain_getfullinfo()
and xc_domain_get_cpu_usage().
author mafetter@fleming.research
date Thu Jan 13 12:59:47 2005 +0000 (2005-01-13)
parents 2419f5c72fe5
children 6096356005ba
line source
1 /******************************************************************************
2 * xc_linux_restore.c
3 *
4 * Restore the state of a Linux session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 */
9 #include "xc_private.h"
10 #include <xen/linux/suspend.h>
12 #define MAX_BATCH_SIZE 1024
14 #define DEBUG 0
16 #if DEBUG
17 #define DPRINTF(_f, _a...) printf ( _f , ## _a )
18 #else
19 #define DPRINTF(_f, _a...) ((void)0)
20 #endif
22 /** Read the vmconfig string from the state input.
23 * It is stored as a 4-byte count 'n' followed by n bytes.
24 * The config data is stored in a new string in 'ioctxt->vmconfig',
25 * and is null-terminated. The count is stored in 'ioctxt->vmconfig_n'.
26 *
27 * @param ioctxt i/o context
28 * @return 0 on success, non-zero on error.
29 */
30 static int read_vmconfig(XcIOContext *ioctxt)
31 {
32 int err = -1;
34 if ( xcio_read(ioctxt, &ioctxt->vmconfig_n, sizeof(ioctxt->vmconfig_n)) )
35 goto exit;
37 ioctxt->vmconfig = malloc(ioctxt->vmconfig_n + 1);
38 if ( ioctxt->vmconfig == NULL )
39 goto exit;
41 if ( xcio_read(ioctxt, ioctxt->vmconfig, ioctxt->vmconfig_n) )
42 goto exit;
44 ioctxt->vmconfig[ioctxt->vmconfig_n] = '\0';
45 err = 0;
47 exit:
48 if ( err )
49 {
50 if ( ioctxt->vmconfig != NULL )
51 free(ioctxt->vmconfig);
52 ioctxt->vmconfig = NULL;
53 ioctxt->vmconfig_n = 0;
54 }
55 return err;
56 }
58 int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
59 {
60 dom0_op_t op;
61 int rc = 1, i, n, k;
62 unsigned long mfn, pfn, xpfn;
63 unsigned int prev_pc, this_pc;
64 u32 dom = 0;
65 int verify = 0;
67 /* Number of page frames in use by this Linux session. */
68 unsigned long nr_pfns;
70 /* The new domain's shared-info frame number. */
71 unsigned long shared_info_frame;
72 unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
73 shared_info_t *shared_info = (shared_info_t *)shared_info_page;
75 /* A copy of the CPU context of the guest. */
76 full_execution_context_t ctxt;
78 /* First 16 bytes of the state file must contain 'LinuxGuestRecord'. */
79 char signature[16];
81 /* A table containg the type of each PFN (/not/ MFN!). */
82 unsigned long *pfn_type = NULL;
84 /* A table of MFNs to map in the current region */
85 unsigned long *region_mfn = NULL;
87 /* A temporary mapping, and a copy, of one frame of guest memory. */
88 unsigned long *ppage = NULL;
90 /* A copy of the pfn-to-mfn table frame list. */
91 unsigned long pfn_to_mfn_frame_list[1024];
93 /* A table mapping each PFN to its new MFN. */
94 unsigned long *pfn_to_mfn_table = NULL;
96 /* used by mapper for updating the domain's copy of the table */
97 unsigned long *live_pfn_to_mfn_table = NULL;
99 /* A temporary mapping of the guest's suspend record. */
100 suspend_record_t *p_srec;
102 char *region_base;
104 mmu_t *mmu = NULL;
106 /* used by debug verify code */
107 unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
109 xcio_info(ioctxt, "xc_linux_restore start\n");
111 if ( mlock(&ctxt, sizeof(ctxt) ) )
112 {
113 /* needed for when we do the build dom0 op,
114 but might as well do early */
115 PERROR("Unable to mlock ctxt");
116 return 1;
117 }
119 /* Start reading the saved-domain record. */
120 if ( xcio_read(ioctxt, signature, 16) ||
121 (memcmp(signature, "LinuxGuestRecord", 16) != 0) )
122 {
123 xcio_error(ioctxt, "Unrecognised state format -- no signature found");
124 goto out;
125 }
127 if ( xcio_read(ioctxt, &nr_pfns, sizeof(unsigned long)) ||
128 xcio_read(ioctxt, pfn_to_mfn_frame_list, PAGE_SIZE) )
129 {
130 xcio_error(ioctxt, "Error reading header");
131 goto out;
132 }
134 if ( read_vmconfig(ioctxt) )
135 {
136 xcio_error(ioctxt, "Error writing vmconfig");
137 goto out;
138 }
140 if ( nr_pfns > 1024*1024 )
141 {
142 xcio_error(ioctxt, "Invalid state file -- pfn count out of range");
143 goto out;
144 }
146 /* We want zeroed memory so use calloc rather than malloc. */
147 pfn_to_mfn_table = calloc(1, 4 * nr_pfns);
148 pfn_type = calloc(1, 4 * nr_pfns);
149 region_mfn = calloc(1, 4 * MAX_BATCH_SIZE);
151 if ( (pfn_to_mfn_table == NULL) ||
152 (pfn_type == NULL) ||
153 (region_mfn == NULL) )
154 {
155 errno = ENOMEM;
156 goto out;
157 }
159 if ( mlock(region_mfn, 4 * MAX_BATCH_SIZE ) )
160 {
161 xcio_error(ioctxt, "Could not mlock region_mfn");
162 goto out;
163 }
165 /* Create domain on CPU -1 so that it may auto load-balance in future. */
166 if ( xc_domain_create(xc_handle, nr_pfns * (PAGE_SIZE / 1024),
167 -1, 1, &dom) )
168 {
169 xcio_error(ioctxt, "Could not create domain. pfns=%d, %dKB",
170 nr_pfns,nr_pfns * (PAGE_SIZE / 1024));
171 goto out;
172 }
174 ioctxt->domain = dom;
175 xcio_info(ioctxt, "Created domain %ld\n",dom);
177 /* Get the domain's shared-info frame. */
178 op.cmd = DOM0_GETDOMAININFO;
179 op.u.getdomaininfo.domain = (domid_t)dom;
180 op.u.getdomaininfo.exec_domain = 0;
181 op.u.getdomaininfo.ctxt = NULL;
182 if ( do_dom0_op(xc_handle, &op) < 0 )
183 {
184 xcio_error(ioctxt, "Could not get information on new domain");
185 goto out;
186 }
187 shared_info_frame = op.u.getdomaininfo.shared_info_frame;
189 if(ioctxt->flags & XCFLAGS_CONFIGURE)
190 {
191 if(xcio_configure_domain(ioctxt))
192 {
193 xcio_error(ioctxt, "Configuring domain failed");
194 goto out;
195 }
196 }
198 /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */
199 if ( xc_get_pfn_list(xc_handle, dom, pfn_to_mfn_table, nr_pfns) != nr_pfns )
200 {
201 xcio_error(ioctxt, "Did not read correct number of frame "
202 "numbers for new dom");
203 goto out;
204 }
206 if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
207 {
208 xcio_error(ioctxt, "Could not initialise for MMU updates");
209 goto out;
210 }
212 xcio_info(ioctxt, "Reloading memory pages: 0%%");
214 /*
215 * Now simply read each saved frame into its new machine frame.
216 * We uncanonicalise page tables as we go.
217 */
218 prev_pc = 0;
220 n = 0;
221 while ( 1 )
222 {
223 int j;
224 unsigned long region_pfn_type[MAX_BATCH_SIZE];
226 this_pc = (n * 100) / nr_pfns;
227 if ( (this_pc - prev_pc) >= 5 )
228 {
229 xcio_info(ioctxt, "\b\b\b\b%3d%%", this_pc);
230 prev_pc = this_pc;
231 }
233 if ( xcio_read(ioctxt, &j, sizeof(int)) )
234 {
235 xcio_error(ioctxt, "Error when reading from state file");
236 goto out;
237 }
239 DPRINTF("batch %d\n",j);
241 if ( j == -1 )
242 {
243 verify = 1;
244 printf("Entering page verify mode\n");
245 continue;
246 }
248 if ( j == 0 )
249 break; /* our work here is done */
251 if ( j > MAX_BATCH_SIZE )
252 {
253 xcio_error(ioctxt, "Max batch size exceeded. Giving up.");
254 goto out;
255 }
257 if ( xcio_read(ioctxt, region_pfn_type, j*sizeof(unsigned long)) ) {
258 xcio_error(ioctxt, "Error when reading from state file");
259 goto out;
260 }
262 for ( i = 0; i < j; i++ )
263 {
264 if ( (region_pfn_type[i] & LTAB_MASK) == XTAB)
265 {
266 region_mfn[i] = 0; /* we know map will fail, but don't care */
267 }
268 else
269 {
270 pfn = region_pfn_type[i] & ~LTAB_MASK;
271 region_mfn[i] = pfn_to_mfn_table[pfn];
272 }
273 }
275 if ( (region_base = xc_map_foreign_batch( xc_handle, dom,
276 PROT_WRITE,
277 region_mfn,
278 j )) == 0 )
279 {
280 xcio_error(ioctxt, "map batch failed");
281 goto out;
282 }
284 for ( i = 0; i < j; i++ )
285 {
286 unsigned long *ppage;
288 pfn = region_pfn_type[i] & ~LTAB_MASK;
290 if ( (region_pfn_type[i] & LTAB_MASK) == XTAB) continue;
292 if (pfn>nr_pfns)
293 {
294 xcio_error(ioctxt, "pfn out of range");
295 goto out;
296 }
298 region_pfn_type[i] &= LTAB_MASK;
300 pfn_type[pfn] = region_pfn_type[i];
302 mfn = pfn_to_mfn_table[pfn];
304 if ( verify )
305 ppage = (unsigned long*) buf; /* debug case */
306 else
307 ppage = (unsigned long*) (region_base + i*PAGE_SIZE);
309 if ( xcio_read(ioctxt, ppage, PAGE_SIZE) )
310 {
311 xcio_error(ioctxt, "Error when reading from state file");
312 goto out;
313 }
315 switch( region_pfn_type[i] & LTABTYPE_MASK )
316 {
317 case 0:
318 break;
320 case L1TAB:
321 {
322 for ( k = 0; k < 1024; k++ )
323 {
324 if ( ppage[k] & _PAGE_PRESENT )
325 {
326 xpfn = ppage[k] >> PAGE_SHIFT;
327 if ( xpfn >= nr_pfns )
328 {
329 xcio_error(ioctxt, "Frame number in type %lu page "
330 "table is out of range. i=%d k=%d "
331 "pfn=0x%lx nr_pfns=%lu",
332 region_pfn_type[i]>>28, i,
333 k, xpfn, nr_pfns);
334 goto out;
335 }
337 ppage[k] &= (PAGE_SIZE - 1) &
338 ~(_PAGE_GLOBAL | _PAGE_PAT);
339 ppage[k] |= pfn_to_mfn_table[xpfn] << PAGE_SHIFT;
340 }
341 }
342 }
343 break;
345 case L2TAB:
346 {
347 for ( k = 0;
348 k < (HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT);
349 k++ )
350 {
351 if ( ppage[k] & _PAGE_PRESENT )
352 {
353 xpfn = ppage[k] >> PAGE_SHIFT;
355 if ( xpfn >= nr_pfns )
356 {
357 xcio_error(ioctxt, "Frame number in type %lu page"
358 " table is out of range. i=%d k=%d "
359 "pfn=%lu nr_pfns=%lu",
360 region_pfn_type[i]>>28, i, k,
361 xpfn, nr_pfns);
362 goto out;
363 }
365 ppage[k] &= (PAGE_SIZE - 1) &
366 ~(_PAGE_GLOBAL | _PAGE_PSE);
367 ppage[k] |= pfn_to_mfn_table[xpfn] << PAGE_SHIFT;
368 }
369 }
370 }
371 break;
373 default:
374 xcio_error(ioctxt, "Bogus page type %lx page table is "
375 "out of range. i=%d nr_pfns=%lu",
376 region_pfn_type[i], i, nr_pfns);
377 goto out;
379 } /* end of page type switch statement */
381 if ( verify )
382 {
383 int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE );
384 if ( res )
385 {
386 int v;
387 printf("************** pfn=%lx type=%lx gotcs=%08lx "
388 "actualcs=%08lx\n", pfn, pfn_type[pfn],
389 csum_page(region_base + i*PAGE_SIZE),
390 csum_page(buf));
391 for ( v = 0; v < 4; v++ )
392 {
393 unsigned long *p = (unsigned long *)
394 (region_base + i*PAGE_SIZE);
395 if ( buf[v] != p[v] )
396 printf(" %d: %08lx %08lx\n",
397 v, buf[v], p[v] );
398 }
399 }
400 }
402 if ( add_mmu_update(xc_handle, mmu,
403 (mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) )
404 {
405 printf("machpys mfn=%ld pfn=%ld\n",mfn,pfn);
406 goto out;
407 }
409 } /* end of 'batch' for loop */
411 munmap( region_base, j*PAGE_SIZE );
412 n+=j; /* crude stats */
413 }
415 xcio_info(ioctxt, "Received all pages\n");
417 /*
418 * Pin page tables. Do this after writing to them as otherwise Xen
419 * will barf when doing the type-checking.
420 */
421 for ( i = 0; i < nr_pfns; i++ )
422 {
423 if ( pfn_type[i] == (L1TAB|LPINTAB) )
424 {
425 if ( add_mmu_update(xc_handle, mmu,
426 (pfn_to_mfn_table[i]<<PAGE_SHIFT) |
427 MMU_EXTENDED_COMMAND,
428 MMUEXT_PIN_L1_TABLE) ) {
429 printf("ERR pin L1 pfn=%lx mfn=%lx\n",
430 (unsigned long)i, pfn_to_mfn_table[i]);
431 goto out;
432 }
433 }
434 }
436 /* must pin all L1's before L2's (need consistent va back ptr) */
437 for ( i = 0; i < nr_pfns; i++ )
438 {
439 if ( pfn_type[i] == (L2TAB|LPINTAB) )
440 {
441 if ( add_mmu_update(xc_handle, mmu,
442 (pfn_to_mfn_table[i]<<PAGE_SHIFT) |
443 MMU_EXTENDED_COMMAND,
444 MMUEXT_PIN_L2_TABLE) )
445 {
446 printf("ERR pin L2 pfn=%lx mfn=%lx\n",
447 (unsigned long)i, pfn_to_mfn_table[i]);
448 goto out;
449 }
450 }
451 }
453 if ( finish_mmu_updates(xc_handle, mmu) ) goto out;
455 xcio_info(ioctxt, "\b\b\b\b100%%\n");
456 xcio_info(ioctxt, "Memory reloaded.\n");
458 /* Get the list of PFNs that are not in the psuedo-phys map */
459 {
460 unsigned int count, *pfntab;
461 int rc;
463 if ( xcio_read(ioctxt, &count, sizeof(count)) )
464 {
465 xcio_error(ioctxt, "Error when reading from state file");
466 goto out;
467 }
469 pfntab = malloc( sizeof(unsigned int) * count );
470 if ( pfntab == NULL )
471 {
472 xcio_error(ioctxt, "Out of memory");
473 goto out;
474 }
476 if ( xcio_read(ioctxt, pfntab, sizeof(unsigned int)*count) )
477 {
478 xcio_error(ioctxt, "Error when reading pfntab from state file");
479 goto out;
480 }
482 for ( i = 0; i < count; i++ )
483 {
484 unsigned long pfn = pfntab[i];
485 pfntab[i]=pfn_to_mfn_table[pfn];
486 pfn_to_mfn_table[pfn] = 0x80000001; // not in pmap
487 }
489 if ( count > 0 )
490 {
491 if ( (rc = do_dom_mem_op( xc_handle,
492 MEMOP_decrease_reservation,
493 pfntab, count, 0, dom )) <0 )
494 {
495 xcio_error(ioctxt, "Could not decrease reservation : %d",rc);
496 goto out;
497 }
498 else
499 {
500 printf("Decreased reservation by %d pages\n", count);
501 }
502 }
503 }
505 if ( xcio_read(ioctxt, &ctxt, sizeof(ctxt)) ||
506 xcio_read(ioctxt, shared_info_page, PAGE_SIZE) )
507 {
508 xcio_error(ioctxt, "Error when reading from state file");
509 goto out;
510 }
512 /* Uncanonicalise the suspend-record frame number and poke resume rec. */
513 pfn = ctxt.cpu_ctxt.esi;
514 if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) )
515 {
516 xcio_error(ioctxt, "Suspend record frame number is bad");
517 goto out;
518 }
519 ctxt.cpu_ctxt.esi = mfn = pfn_to_mfn_table[pfn];
520 p_srec = xc_map_foreign_range(
521 xc_handle, dom, PAGE_SIZE, PROT_WRITE, mfn);
522 p_srec->resume_info.nr_pages = nr_pfns;
523 p_srec->resume_info.shared_info = shared_info_frame << PAGE_SHIFT;
524 p_srec->resume_info.flags = 0;
525 munmap(p_srec, PAGE_SIZE);
527 /* Uncanonicalise each GDT frame number. */
528 if ( ctxt.gdt_ents > 8192 )
529 {
530 xcio_error(ioctxt, "GDT entry count out of range");
531 goto out;
532 }
534 for ( i = 0; i < ctxt.gdt_ents; i += 512 )
535 {
536 pfn = ctxt.gdt_frames[i];
537 if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) )
538 {
539 xcio_error(ioctxt, "GDT frame number is bad");
540 goto out;
541 }
542 ctxt.gdt_frames[i] = pfn_to_mfn_table[pfn];
543 }
545 /* Uncanonicalise the page table base pointer. */
546 pfn = ctxt.pt_base >> PAGE_SHIFT;
547 if ( (pfn >= nr_pfns) || ((pfn_type[pfn]&LTABTYPE_MASK) != L2TAB) )
548 {
549 printf("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx\n",
550 pfn, nr_pfns, pfn_type[pfn], (unsigned long)L2TAB);
551 xcio_error(ioctxt, "PT base is bad.");
552 goto out;
553 }
554 ctxt.pt_base = pfn_to_mfn_table[pfn] << PAGE_SHIFT;
556 /* clear any pending events and the selector */
557 memset(&(shared_info->evtchn_pending[0]), 0,
558 sizeof (shared_info->evtchn_pending));
559 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
560 shared_info->vcpu_data[i].evtchn_pending_sel = 0;
562 /* Copy saved contents of shared-info page. No checking needed. */
563 ppage = xc_map_foreign_range(
564 xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
565 memcpy(ppage, shared_info, sizeof(shared_info_t));
566 munmap(ppage, PAGE_SIZE);
568 /* Uncanonicalise the pfn-to-mfn table frame-number list. */
569 for ( i = 0; i < (nr_pfns+1023)/1024; i++ )
570 {
571 unsigned long pfn, mfn;
573 pfn = pfn_to_mfn_frame_list[i];
574 if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) )
575 {
576 xcio_error(ioctxt, "PFN-to-MFN frame number is bad");
577 goto out;
578 }
579 mfn = pfn_to_mfn_table[pfn];
580 pfn_to_mfn_frame_list[i] = mfn;
581 }
583 if ( (live_pfn_to_mfn_table =
584 xc_map_foreign_batch(xc_handle, dom,
585 PROT_WRITE,
586 pfn_to_mfn_frame_list,
587 (nr_pfns+1023)/1024 )) == 0 )
588 {
589 xcio_error(ioctxt, "Couldn't map pfn_to_mfn table");
590 goto out;
591 }
593 memcpy(live_pfn_to_mfn_table, pfn_to_mfn_table,
594 nr_pfns*sizeof(unsigned long) );
596 munmap(live_pfn_to_mfn_table, ((nr_pfns+1023)/1024)*PAGE_SIZE);
598 /*
599 * Safety checking of saved context:
600 * 1. cpu_ctxt is fine, as Xen checks that on context switch.
601 * 2. fpu_ctxt is fine, as it can't hurt Xen.
602 * 3. trap_ctxt needs the code selectors checked.
603 * 4. fast_trap_idx is checked by Xen.
604 * 5. ldt base must be page-aligned, no more than 8192 ents, ...
605 * 6. gdt already done, and further checking is done by Xen.
606 * 7. check that guestos_ss is safe.
607 * 8. pt_base is already done.
608 * 9. debugregs are checked by Xen.
609 * 10. callback code selectors need checking.
610 */
611 for ( i = 0; i < 256; i++ )
612 {
613 ctxt.trap_ctxt[i].vector = i;
614 if ( (ctxt.trap_ctxt[i].cs & 3) == 0 )
615 ctxt.trap_ctxt[i].cs = FLAT_GUESTOS_CS;
616 }
617 if ( (ctxt.guestos_ss & 3) == 0 )
618 ctxt.guestos_ss = FLAT_GUESTOS_DS;
619 if ( (ctxt.event_callback_cs & 3) == 0 )
620 ctxt.event_callback_cs = FLAT_GUESTOS_CS;
621 if ( (ctxt.failsafe_callback_cs & 3) == 0 )
622 ctxt.failsafe_callback_cs = FLAT_GUESTOS_CS;
623 if ( ((ctxt.ldt_base & (PAGE_SIZE - 1)) != 0) ||
624 (ctxt.ldt_ents > 8192) ||
625 (ctxt.ldt_base > HYPERVISOR_VIRT_START) ||
626 ((ctxt.ldt_base + ctxt.ldt_ents*8) > HYPERVISOR_VIRT_START) )
627 {
628 xcio_error(ioctxt, "Bad LDT base or size");
629 goto out;
630 }
632 xcio_info(ioctxt, "Domain ready to be built.\n");
634 op.cmd = DOM0_BUILDDOMAIN;
635 op.u.builddomain.domain = (domid_t)dom;
636 op.u.builddomain.ctxt = &ctxt;
637 rc = do_dom0_op(xc_handle, &op);
639 if ( rc != 0 )
640 {
641 xcio_error(ioctxt, "Couldn't build the domain");
642 goto out;
643 }
645 if ( ioctxt->flags & XCFLAGS_CONFIGURE )
646 {
647 xcio_info(ioctxt, "Domain ready to be unpaused\n");
648 op.cmd = DOM0_UNPAUSEDOMAIN;
649 op.u.unpausedomain.domain = (domid_t)dom;
650 rc = do_dom0_op(xc_handle, &op);
651 }
653 if ( rc == 0 )
654 {
655 /* Success: print the domain id. */
656 xcio_info(ioctxt, "DOM=%lu\n", dom);
657 return 0;
658 }
661 out:
662 if ( (rc != 0) && (dom != 0) )
663 xc_domain_destroy(xc_handle, dom);
664 if ( mmu != NULL )
665 free(mmu);
666 if ( pfn_to_mfn_table != NULL )
667 free(pfn_to_mfn_table);
668 if ( pfn_type != NULL )
669 free(pfn_type);
671 if ( rc == 0 )
672 ioctxt->domain = dom;
674 DPRINTF("Restore exit with rc=%d\n",rc);
675 return rc;
676 }