ia64/xen-unstable

view tools/libxc/xc_linux_restore.c @ 1921:24ecc060e9d7

bitkeeper revision 1.1108.21.1 (41062740xHG36OEbpVAmVX5N9WCaNw)

make vmlinuz really stripped
author cl349@freefall.cl.cam.ac.uk
date Tue Jul 27 09:58:24 2004 +0000 (2004-07-27)
parents 7ee821f4caea
children e991e54df4a7 0b883cd8b325 0a4b76b6b5a0
line source
1 /******************************************************************************
2 * xc_linux_restore.c
3 *
4 * Restore the state of a Linux session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 */
9 #include "xc_private.h"
10 #include <asm-xen/suspend.h>
12 #define MAX_BATCH_SIZE 1024
14 #define DEBUG 0
16 #if DEBUG
17 #define DPRINTF(_f, _a...) printf ( _f , ## _a )
18 #else
19 #define DPRINTF(_f, _a...) ((void)0)
20 #endif
22 static int get_pfn_list(int xc_handle,
23 u32 domain_id,
24 unsigned long *pfn_buf,
25 unsigned long max_pfns)
26 {
27 dom0_op_t op;
28 int ret;
29 op.cmd = DOM0_GETMEMLIST;
30 op.u.getmemlist.domain = (domid_t)domain_id;
31 op.u.getmemlist.max_pfns = max_pfns;
32 op.u.getmemlist.buffer = pfn_buf;
34 if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 )
35 {
36 PERROR("Could not lock pfn list buffer");
37 return -1;
38 }
40 ret = do_dom0_op(xc_handle, &op);
42 (void)munlock(pfn_buf, max_pfns * sizeof(unsigned long));
44 return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
45 }
47 /** Read the vmconfig string from the state input.
48 * It is stored as a 4-byte count 'n' followed by n bytes.
49 * The config data is stored in a new string in 'ioctxt->vmconfig',
50 * and is null-terminated. The count is stored in 'ioctxt->vmconfig_n'.
51 *
52 * @param ioctxt i/o context
53 * @return 0 on success, non-zero on error.
54 */
55 static int read_vmconfig(XcIOContext *ioctxt)
56 {
57 int err = -1;
59 if ( xcio_read(ioctxt, &ioctxt->vmconfig_n, sizeof(ioctxt->vmconfig_n)) )
60 goto exit;
62 ioctxt->vmconfig = malloc(ioctxt->vmconfig_n + 1);
63 if ( ioctxt->vmconfig == NULL )
64 goto exit;
66 if ( xcio_read(ioctxt, ioctxt->vmconfig, ioctxt->vmconfig_n) )
67 goto exit;
69 ioctxt->vmconfig[ioctxt->vmconfig_n] = '\0';
70 err = 0;
72 exit:
73 if ( err )
74 {
75 if ( ioctxt->vmconfig != NULL )
76 free(ioctxt->vmconfig);
77 ioctxt->vmconfig = NULL;
78 ioctxt->vmconfig_n = 0;
79 }
80 return err;
81 }
83 int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
84 {
85 dom0_op_t op;
86 int rc = 1, i, n, k;
87 unsigned long mfn, pfn, xpfn;
88 unsigned int prev_pc, this_pc;
89 u32 dom = ioctxt->domain;
90 int verify = 0;
92 /* Number of page frames in use by this Linux session. */
93 unsigned long nr_pfns;
95 /* The new domain's shared-info frame number. */
96 unsigned long shared_info_frame;
97 unsigned char shared_info[PAGE_SIZE]; /* saved contents from file */
99 /* A copy of the CPU context of the guest. */
100 full_execution_context_t ctxt;
102 /* First 16 bytes of the state file must contain 'LinuxGuestRecord'. */
103 char signature[16];
105 /* A copy of the domain's name. */
106 char name[MAX_DOMAIN_NAME];
108 /* A table containg the type of each PFN (/not/ MFN!). */
109 unsigned long *pfn_type = NULL;
111 /* A table of MFNs to map in the current region */
112 unsigned long *region_mfn = NULL;
114 /* A temporary mapping, and a copy, of one frame of guest memory. */
115 unsigned long *ppage;
117 /* A copy of the pfn-to-mfn table frame list. */
118 unsigned long pfn_to_mfn_frame_list[1024];
120 /* A table mapping each PFN to its new MFN. */
121 unsigned long *pfn_to_mfn_table = NULL;
123 /* used by mapper for updating the domain's copy of the table */
124 unsigned long *live_pfn_to_mfn_table = NULL;
126 /* A temporary mapping of the guest's suspend record. */
127 suspend_record_t *p_srec;
129 char *region_base;
131 mmu_t *mmu = NULL;
133 void *pm_handle = NULL;
135 /* used by debug verify code */
136 unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
138 if ( mlock(&ctxt, sizeof(ctxt) ) )
139 {
140 /* needed for when we do the build dom0 op,
141 but might as well do early */
142 PERROR("Unable to mlock ctxt");
143 return 1;
144 }
146 /* Start writing out the saved-domain record. */
147 if ( xcio_read(ioctxt, signature, 16) ||
148 (memcmp(signature, "LinuxGuestRecord", 16) != 0) )
149 {
150 xcio_error(ioctxt, "Unrecognised state format -- no signature found");
151 goto out;
152 }
154 if ( xcio_read(ioctxt, name, sizeof(name)) ||
155 xcio_read(ioctxt, &nr_pfns, sizeof(unsigned long)) ||
156 xcio_read(ioctxt, pfn_to_mfn_frame_list, PAGE_SIZE) )
157 {
158 xcio_error(ioctxt, "Error reading header");
159 goto out;
160 }
162 if ( read_vmconfig(ioctxt) )
163 {
164 xcio_error(ioctxt, "Error writing vmconfig");
165 goto out;
166 }
168 for ( i = 0; i < MAX_DOMAIN_NAME; i++ )
169 {
170 if ( name[i] == '\0' ) break;
171 if ( name[i] & 0x80 )
172 {
173 xcio_error(ioctxt, "Random characters in domain name");
174 goto out;
175 }
176 }
177 name[MAX_DOMAIN_NAME-1] = '\0';
179 if ( nr_pfns > 1024*1024 )
180 {
181 xcio_error(ioctxt, "Invalid state file -- pfn count out of range");
182 goto out;
183 }
185 /* We want zeroed memory so use calloc rather than malloc. */
186 pfn_to_mfn_table = calloc(1, 4 * nr_pfns);
187 pfn_type = calloc(1, 4 * nr_pfns);
188 region_mfn = calloc(1, 4 * MAX_BATCH_SIZE);
190 if ( (pfn_to_mfn_table == NULL) ||
191 (pfn_type == NULL) ||
192 (region_mfn == NULL) )
193 {
194 errno = ENOMEM;
195 goto out;
196 }
198 if ( mlock(region_mfn, 4 * MAX_BATCH_SIZE ) )
199 {
200 xcio_error(ioctxt, "Could not mlock region_mfn");
201 goto out;
202 }
204 /* Set the domain's name to that from the restore file */
205 if ( xc_domain_setname( xc_handle, dom, name ) )
206 {
207 xcio_error(ioctxt, "Could not set domain name");
208 goto out;
209 }
211 /* Set the domain's initial memory allocation
212 to that from the restore file */
214 if ( xc_domain_setinitialmem(xc_handle, dom,
215 nr_pfns * (PAGE_SIZE / 1024)) )
216 {
217 xcio_error(ioctxt, "Could not set domain initial memory");
218 goto out;
219 }
221 /* Get the domain's shared-info frame. */
222 op.cmd = DOM0_GETDOMAININFO;
223 op.u.getdomaininfo.domain = (domid_t)dom;
224 op.u.getdomaininfo.ctxt = NULL;
225 if ( do_dom0_op(xc_handle, &op) < 0 )
226 {
227 xcio_error(ioctxt, "Could not get information on new domain");
228 goto out;
229 }
230 shared_info_frame = op.u.getdomaininfo.shared_info_frame;
232 if ( (pm_handle = init_pfn_mapper((domid_t)dom)) == NULL )
233 goto out;
235 /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */
236 if ( get_pfn_list(xc_handle, dom, pfn_to_mfn_table, nr_pfns) != nr_pfns )
237 {
238 xcio_error(ioctxt, "Did not read correct number of frame "
239 "numbers for new dom");
240 goto out;
241 }
243 if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
244 {
245 xcio_error(ioctxt, "Could not initialise for MMU updates");
246 goto out;
247 }
249 xcio_info(ioctxt, "Reloading memory pages: 0%%");
251 /*
252 * Now simply read each saved frame into its new machine frame.
253 * We uncanonicalise page tables as we go.
254 */
255 prev_pc = 0;
257 n = 0;
258 while ( 1 )
259 {
260 int j;
261 unsigned long region_pfn_type[MAX_BATCH_SIZE];
263 this_pc = (n * 100) / nr_pfns;
264 if ( (this_pc - prev_pc) >= 5 )
265 {
266 xcio_info(ioctxt, "\b\b\b\b%3d%%", this_pc);
267 prev_pc = this_pc;
268 }
270 if ( xcio_read(ioctxt, &j, sizeof(int)) )
271 {
272 xcio_error(ioctxt, "Error when reading from state file");
273 goto out;
274 }
276 DPRINTF("batch %d\n",j);
278 if ( j == -1 )
279 {
280 verify = 1;
281 printf("Entering page verify mode\n");
282 continue;
283 }
285 if ( j == 0 )
286 break; /* our work here is done */
288 if ( j > MAX_BATCH_SIZE )
289 {
290 xcio_error(ioctxt, "Max batch size exceeded. Giving up.");
291 goto out;
292 }
294 if ( xcio_read(ioctxt, region_pfn_type, j*sizeof(unsigned long)) ) {
295 xcio_error(ioctxt, "Error when reading from state file");
296 goto out;
297 }
299 for ( i = 0; i < j; i++ )
300 {
301 if ( (region_pfn_type[i] & LTAB_MASK) == XTAB)
302 {
303 region_mfn[i] = 0; /* we know map will fail, but don't care */
304 }
305 else
306 {
307 pfn = region_pfn_type[i] & ~LTAB_MASK;
308 region_mfn[i] = pfn_to_mfn_table[pfn];
309 }
310 }
312 if ( (region_base = mfn_mapper_map_batch( xc_handle, dom,
313 PROT_WRITE,
314 region_mfn,
315 j )) == 0 )
316 {
317 xcio_error(ioctxt, "map batch failed");
318 goto out;
319 }
321 for ( i = 0; i < j; i++ )
322 {
323 unsigned long *ppage;
325 pfn = region_pfn_type[i] & ~LTAB_MASK;
327 if ( (region_pfn_type[i] & LTAB_MASK) == XTAB) continue;
329 if (pfn>nr_pfns)
330 {
331 xcio_error(ioctxt, "pfn out of range");
332 goto out;
333 }
335 region_pfn_type[i] &= LTAB_MASK;
337 pfn_type[pfn] = region_pfn_type[i];
339 mfn = pfn_to_mfn_table[pfn];
341 if ( verify )
342 ppage = (unsigned long*) buf; /* debug case */
343 else
344 ppage = (unsigned long*) (region_base + i*PAGE_SIZE);
346 if ( xcio_read(ioctxt, ppage, PAGE_SIZE) )
347 {
348 xcio_error(ioctxt, "Error when reading from state file");
349 goto out;
350 }
352 switch( region_pfn_type[i] )
353 {
354 case 0:
355 break;
357 case L1TAB:
358 {
359 for ( k = 0; k < 1024; k++ )
360 {
361 if ( ppage[k] & _PAGE_PRESENT )
362 {
363 xpfn = ppage[k] >> PAGE_SHIFT;
364 if ( xpfn >= nr_pfns )
365 {
366 xcio_error(ioctxt, "Frame number in type %lu page "
367 "table is out of range. i=%d k=%d "
368 "pfn=0x%lx nr_pfns=%lu",
369 region_pfn_type[i]>>28, i,
370 k, xpfn, nr_pfns);
371 goto out;
372 }
374 ppage[k] &= (PAGE_SIZE - 1) &
375 ~(_PAGE_GLOBAL | _PAGE_PAT);
376 ppage[k] |= pfn_to_mfn_table[xpfn] << PAGE_SHIFT;
377 }
378 }
379 }
380 break;
382 case L2TAB:
383 {
384 for ( k = 0;
385 k < (HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT);
386 k++ )
387 {
388 if ( ppage[k] & _PAGE_PRESENT )
389 {
390 xpfn = ppage[k] >> PAGE_SHIFT;
392 if ( xpfn >= nr_pfns )
393 {
394 xcio_error(ioctxt, "Frame number in type %lu page"
395 " table is out of range. i=%d k=%d "
396 "pfn=%lu nr_pfns=%lu",
397 region_pfn_type[i]>>28, i, k,
398 xpfn, nr_pfns);
399 goto out;
400 }
402 ppage[k] &= (PAGE_SIZE - 1) &
403 ~(_PAGE_GLOBAL | _PAGE_PSE);
404 ppage[k] |= pfn_to_mfn_table[xpfn] << PAGE_SHIFT;
405 }
406 }
407 }
408 break;
410 default:
411 xcio_error(ioctxt, "Bogus page type %lx page table is "
412 "out of range. i=%d nr_pfns=%lu",
413 region_pfn_type[i], i, nr_pfns);
414 goto out;
416 } /* end of page type switch statement */
418 if ( verify )
419 {
420 int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE );
421 if ( res )
422 {
423 int v;
424 printf("************** pfn=%lx type=%lx gotcs=%08lx "
425 "actualcs=%08lx\n", pfn, pfn_type[pfn],
426 csum_page(region_base + i*PAGE_SIZE),
427 csum_page(buf));
428 for ( v = 0; v < 4; v++ )
429 {
430 unsigned long *p = (unsigned long *)
431 (region_base + i*PAGE_SIZE);
432 if ( buf[v] != p[v] )
433 printf(" %d: %08lx %08lx\n",
434 v, buf[v], p[v] );
435 }
436 }
437 }
439 if ( add_mmu_update(xc_handle, mmu,
440 (mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) )
441 {
442 printf("machpys mfn=%ld pfn=%ld\n",mfn,pfn);
443 goto out;
444 }
446 } /* end of 'batch' for loop */
448 munmap( region_base, j*PAGE_SIZE );
449 n+=j; /* crude stats */
450 }
452 DPRINTF("Received all pages\n");
454 /*
455 * Pin page tables. Do this after writing to them as otherwise Xen
456 * will barf when doing the type-checking.
457 */
458 for ( i = 0; i < nr_pfns; i++ )
459 {
460 if ( pfn_type[i] == L1TAB )
461 {
462 if ( add_mmu_update(xc_handle, mmu,
463 (pfn_to_mfn_table[i]<<PAGE_SHIFT) |
464 MMU_EXTENDED_COMMAND,
465 MMUEXT_PIN_L1_TABLE) ) {
466 printf("ERR pin L1 pfn=%lx mfn=%lx\n",
467 (unsigned long)i, pfn_to_mfn_table[i]);
468 goto out;
469 }
470 }
471 else if ( pfn_type[i] == L2TAB )
472 {
473 if ( add_mmu_update(xc_handle, mmu,
474 (pfn_to_mfn_table[i]<<PAGE_SHIFT) |
475 MMU_EXTENDED_COMMAND,
476 MMUEXT_PIN_L2_TABLE) )
477 {
478 printf("ERR pin L2 pfn=%lx mfn=%lx\n",
479 (unsigned long)i, pfn_to_mfn_table[i]);
480 goto out;
481 }
482 }
483 }
485 if ( finish_mmu_updates(xc_handle, mmu) ) goto out;
487 xcio_info(ioctxt, "\b\b\b\b100%%\nMemory reloaded.\n");
490 if ( xcio_read(ioctxt, &ctxt, sizeof(ctxt)) ||
491 xcio_read(ioctxt, shared_info, PAGE_SIZE) )
492 {
493 xcio_error(ioctxt, "Error when reading from state file");
494 goto out;
495 }
497 /* Uncanonicalise the suspend-record frame number and poke resume rec. */
498 pfn = ctxt.cpu_ctxt.esi;
499 if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) )
500 {
501 xcio_error(ioctxt, "Suspend record frame number is bad");
502 goto out;
503 }
504 ctxt.cpu_ctxt.esi = mfn = pfn_to_mfn_table[pfn];
505 p_srec = map_pfn_writeable(pm_handle, mfn);
506 p_srec->resume_info.nr_pages = nr_pfns;
507 p_srec->resume_info.shared_info = shared_info_frame << PAGE_SHIFT;
508 p_srec->resume_info.flags = 0;
509 unmap_pfn(pm_handle, p_srec);
511 /* Uncanonicalise each GDT frame number. */
512 if ( ctxt.gdt_ents > 8192 )
513 {
514 xcio_error(ioctxt, "GDT entry count out of range");
515 goto out;
516 }
517 for ( i = 0; i < ctxt.gdt_ents; i += 512 )
518 {
519 pfn = ctxt.gdt_frames[i];
520 if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) )
521 {
522 xcio_error(ioctxt, "GDT frame number is bad");
523 goto out;
524 }
525 ctxt.gdt_frames[i] = pfn_to_mfn_table[pfn];
526 }
528 /* Uncanonicalise the page table base pointer. */
529 pfn = ctxt.pt_base >> PAGE_SHIFT;
530 if ( (pfn >= nr_pfns) || (pfn_type[pfn] != L2TAB) )
531 {
532 printf("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx\n",
533 pfn, nr_pfns, pfn_type[pfn], (unsigned long)L2TAB);
534 xcio_error(ioctxt, "PT base is bad.");
535 goto out;
536 }
537 ctxt.pt_base = pfn_to_mfn_table[pfn] << PAGE_SHIFT;
540 /* clear any pending events and the selector */
541 memset( &(((shared_info_t *)shared_info)->evtchn_pending[0]),
542 0, sizeof (((shared_info_t *)shared_info)->evtchn_pending)+
543 sizeof(((shared_info_t *)shared_info)->evtchn_pending_sel) );
545 /* Copy saved contents of shared-info page. No checking needed. */
546 ppage = map_pfn_writeable(pm_handle, shared_info_frame);
547 memcpy(ppage, shared_info, sizeof(shared_info_t));
548 unmap_pfn(pm_handle, ppage);
551 /* Uncanonicalise the pfn-to-mfn table frame-number list. */
552 for ( i = 0; i < (nr_pfns+1023)/1024; i++ )
553 {
554 unsigned long pfn, mfn;
556 pfn = pfn_to_mfn_frame_list[i];
557 if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) )
558 {
559 xcio_error(ioctxt, "PFN-to-MFN frame number is bad");
560 goto out;
561 }
562 mfn = pfn_to_mfn_table[pfn];
563 pfn_to_mfn_frame_list[i] = mfn;
564 }
566 if ( (live_pfn_to_mfn_table =
567 mfn_mapper_map_batch(xc_handle, dom,
568 PROT_WRITE,
569 pfn_to_mfn_frame_list,
570 (nr_pfns+1023)/1024 )) == 0 )
571 {
572 xcio_error(ioctxt, "Couldn't map pfn_to_mfn table");
573 goto out;
574 }
576 memcpy(live_pfn_to_mfn_table, pfn_to_mfn_table,
577 nr_pfns*sizeof(unsigned long) );
579 munmap(live_pfn_to_mfn_table, ((nr_pfns+1023)/1024)*PAGE_SIZE);
581 /*
582 * Safety checking of saved context:
583 * 1. cpu_ctxt is fine, as Xen checks that on context switch.
584 * 2. fpu_ctxt is fine, as it can't hurt Xen.
585 * 3. trap_ctxt needs the code selectors checked.
586 * 4. fast_trap_idx is checked by Xen.
587 * 5. ldt base must be page-aligned, no more than 8192 ents, ...
588 * 6. gdt already done, and further checking is done by Xen.
589 * 7. check that guestos_ss is safe.
590 * 8. pt_base is already done.
591 * 9. debugregs are checked by Xen.
592 * 10. callback code selectors need checking.
593 */
594 for ( i = 0; i < 256; i++ )
595 {
596 ctxt.trap_ctxt[i].vector = i;
597 if ( (ctxt.trap_ctxt[i].cs & 3) == 0 )
598 ctxt.trap_ctxt[i].cs = FLAT_GUESTOS_CS;
599 }
600 if ( (ctxt.guestos_ss & 3) == 0 )
601 ctxt.guestos_ss = FLAT_GUESTOS_DS;
602 if ( (ctxt.event_callback_cs & 3) == 0 )
603 ctxt.event_callback_cs = FLAT_GUESTOS_CS;
604 if ( (ctxt.failsafe_callback_cs & 3) == 0 )
605 ctxt.failsafe_callback_cs = FLAT_GUESTOS_CS;
606 if ( ((ctxt.ldt_base & (PAGE_SIZE - 1)) != 0) ||
607 (ctxt.ldt_ents > 8192) ||
608 (ctxt.ldt_base > HYPERVISOR_VIRT_START) ||
609 ((ctxt.ldt_base + ctxt.ldt_ents*8) > HYPERVISOR_VIRT_START) )
610 {
611 xcio_error(ioctxt, "Bad LDT base or size");
612 goto out;
613 }
615 op.cmd = DOM0_BUILDDOMAIN;
616 op.u.builddomain.domain = (domid_t)dom;
617 op.u.builddomain.ctxt = &ctxt;
618 rc = do_dom0_op(xc_handle, &op);
620 /* don't start the domain as we have console etc to set up */
622 if ( rc == 0 )
623 {
624 /* Success: print the domain id. */
625 xcio_info(ioctxt, "DOM=%lu\n", dom);
626 return 0;
627 }
630 out:
631 if ( (rc != 0) && (dom != 0) )
632 xc_domain_destroy(xc_handle, dom);
633 if ( mmu != NULL )
634 free(mmu);
635 if ( pm_handle != NULL )
636 (void)close_pfn_mapper(pm_handle);
637 if ( pfn_to_mfn_table != NULL )
638 free(pfn_to_mfn_table);
639 if ( pfn_type != NULL )
640 free(pfn_type);
642 if ( rc == 0 )
643 ioctxt->domain = dom;
645 DPRINTF("Restore exit with rc=%d\n",rc);
646 return rc;
647 }