ia64/xen-unstable

view tools/libxc/xc_linux_save.c @ 1921:24ecc060e9d7

bitkeeper revision 1.1108.21.1 (41062740xHG36OEbpVAmVX5N9WCaNw)

make vmlinuz really stripped
author cl349@freefall.cl.cam.ac.uk
date Tue Jul 27 09:58:24 2004 +0000 (2004-07-27)
parents 236a9f2698a3
children a989641f2755 bd1640d9d7d4 994a7468bb63 0a4b76b6b5a0
line source
1 /******************************************************************************
2 * xc_linux_save.c
3 *
4 * Save the state of a running Linux session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 */
9 #include <sys/time.h>
10 #include "xc_private.h"
11 #include <asm-xen/suspend.h>
13 #define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */
15 #define DEBUG 0
16 #define DDEBUG 0
18 #if DEBUG
19 #define DPRINTF(_f, _a...) printf ( _f , ## _a )
20 #else
21 #define DPRINTF(_f, _a...) ((void)0)
22 #endif
24 #if DDEBUG
25 #define DDPRINTF(_f, _a...) printf ( _f , ## _a )
26 #else
27 #define DDPRINTF(_f, _a...) ((void)0)
28 #endif
30 /*
31 * Returns TRUE if the given machine frame number has a unique mapping
32 * in the guest's pseudophysical map.
33 * 0x80000000-3 mark the shared_info, and blk/net rings
34 */
35 #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
36 (((_mfn) < (1024*1024)) && \
37 (((live_mfn_to_pfn_table[_mfn] < nr_pfns) && \
38 (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn))) || \
39 ((live_mfn_to_pfn_table[_mfn] >= 0x80000000) && \
40 (live_mfn_to_pfn_table[_mfn] <= 0x80000003)) || \
41 (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == 0x80000004)))
43 /* Returns TRUE if MFN is successfully converted to a PFN. */
44 #define translate_mfn_to_pfn(_pmfn) \
45 ({ \
46 unsigned long mfn = *(_pmfn); \
47 int _res = 1; \
48 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \
49 _res = 0; \
50 else \
51 *(_pmfn) = live_mfn_to_pfn_table[mfn]; \
52 _res; \
53 })
55 static inline int test_bit ( int nr, volatile void * addr)
56 {
57 return (((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >>
58 (nr % (sizeof(unsigned long)*8))) & 1;
59 }
61 static inline void clear_bit ( int nr, volatile void * addr)
62 {
63 ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] &=
64 ~(1 << (nr % (sizeof(unsigned long)*8) ) );
65 }
67 static inline void set_bit ( int nr, volatile void * addr)
68 {
69 ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] |=
70 (1 << (nr % (sizeof(unsigned long)*8) ) );
71 }
73 /* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
74 static inline unsigned int hweight32(unsigned int w)
75 {
76 unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
77 res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
78 res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
79 res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
80 return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
81 }
83 static inline int count_bits ( int nr, volatile void *addr)
84 {
85 int i, count = 0;
86 unsigned long *p = (unsigned long *)addr;
87 /* We know that the array is padded to unsigned long. */
88 for(i=0;i<nr/(sizeof(unsigned long)*8);i++,p++)
89 count += hweight32( *p );
90 return count;
91 }
93 static inline int permute( int i, int nr, int order_nr )
94 {
95 /* Need a simple permutation function so that we scan pages in a
96 pseudo random order, enabling us to get a better estimate of
97 the domain's page dirtying rate as we go (there are often
98 contiguous ranges of pfns that have similar behaviour, and we
99 want to mix them up. */
101 /* e.g. nr->oder 15->4 16->4 17->5 */
102 /* 512MB domain, 128k pages, order 17 */
104 /*
105 QPONMLKJIHGFEDCBA
106 QPONMLKJIH
107 GFEDCBA
108 */
110 /*
111 QPONMLKJIHGFEDCBA
112 EDCBA
113 QPONM
114 LKJIHGF
115 */
117 do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
118 while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
120 return i;
121 }
123 static long long tv_to_us( struct timeval *new )
124 {
125 return (new->tv_sec * 1000000) + new->tv_usec;
126 }
128 static long long llgettimeofday()
129 {
130 struct timeval now;
131 gettimeofday(&now, NULL);
132 return tv_to_us(&now);
133 }
135 static long long tv_delta( struct timeval *new, struct timeval *old )
136 {
137 return ((new->tv_sec - old->tv_sec)*1000000 ) +
138 (new->tv_usec - old->tv_usec);
139 }
141 static int print_stats( int xc_handle, u32 domid,
142 int pages_sent, xc_shadow_control_stats_t *stats,
143 int print )
144 {
145 static struct timeval wall_last;
146 static long long d0_cpu_last;
147 static long long d1_cpu_last;
149 struct timeval wall_now;
150 long long wall_delta;
151 long long d0_cpu_now, d0_cpu_delta;
152 long long d1_cpu_now, d1_cpu_delta;
154 gettimeofday(&wall_now, NULL);
156 d0_cpu_now = xc_domain_get_cpu_usage( xc_handle, 0 )/1000;
157 d1_cpu_now = xc_domain_get_cpu_usage( xc_handle, domid )/1000;
159 if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
160 printf("ARRHHH!!\n");
162 wall_delta = tv_delta(&wall_now,&wall_last)/1000;
164 if ( wall_delta == 0 ) wall_delta = 1;
166 d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
167 d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
169 if ( print )
170 printf("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
171 "dirtied %dMb/s\n",
172 wall_delta,
173 (int)((d0_cpu_delta*100)/wall_delta),
174 (int)((d1_cpu_delta*100)/wall_delta),
175 (int)((pages_sent*PAGE_SIZE*8)/(wall_delta*1000)),
176 (int)((stats->dirty_count*PAGE_SIZE*8)/(wall_delta*1000)));
178 d0_cpu_last = d0_cpu_now;
179 d1_cpu_last = d1_cpu_now;
180 wall_last = wall_now;
182 return 0;
183 }
185 /** Write the vmconfig string.
186 * It is stored as a 4-byte count 'n' followed by n bytes.
187 *
188 * @param ioctxt i/o context
189 * @return 0 on success, non-zero on error.
190 */
191 static int write_vmconfig(XcIOContext *ioctxt){
192 int err = -1;
193 if(xcio_write(ioctxt, &ioctxt->vmconfig_n, sizeof(ioctxt->vmconfig_n))) goto exit;
194 if(xcio_write(ioctxt, ioctxt->vmconfig, ioctxt->vmconfig_n)) goto exit;
195 err = 0;
196 exit:
197 return err;
198 }
200 static int analysis_phase( int xc_handle, u32 domid,
201 int nr_pfns, unsigned long *arr )
202 {
203 long long start, now;
204 xc_shadow_control_stats_t stats;
206 start = llgettimeofday();
208 while ( 0 )
209 {
210 int i;
212 xc_shadow_control( xc_handle, domid,
213 DOM0_SHADOW_CONTROL_OP_CLEAN2,
214 arr, nr_pfns, NULL);
215 printf("#Flush\n");
216 for ( i = 0; i < 100; i++ )
217 {
218 usleep(10000);
219 now = llgettimeofday();
220 xc_shadow_control( xc_handle, domid,
221 DOM0_SHADOW_CONTROL_OP_PEEK,
222 NULL, 0, &stats);
224 printf("now= %lld faults= %ld dirty= %ld dirty_net= %ld "
225 "dirty_block= %ld\n",
226 ((now-start)+500)/1000,
227 stats.fault_count, stats.dirty_count,
228 stats.dirty_net_count, stats.dirty_block_count);
229 }
230 }
232 return -1;
233 }
235 int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
236 {
237 dom0_op_t op;
238 int rc = 1, i, j, k, last_iter, iter = 0;
239 unsigned long mfn;
240 u32 domid = ioctxt->domain;
241 int live = (ioctxt->flags & XCFLAGS_LIVE);
242 int debug = (ioctxt->flags & XCFLAGS_DEBUG);
243 int sent_last_iter, skip_this_iter;
245 /* Important tuning parameters */
246 int max_iters = 29; /* limit us to 30 times round loop */
247 int max_factor = 3; /* never send more than 3x nr_pfns */
249 /* The new domain's shared-info frame number. */
250 unsigned long shared_info_frame;
252 /* A copy of the CPU context of the guest. */
253 full_execution_context_t ctxt;
255 /* A copy of the domain's name. */
256 char name[MAX_DOMAIN_NAME];
258 /* A table containg the type of each PFN (/not/ MFN!). */
259 unsigned long *pfn_type = NULL;
260 unsigned long *pfn_batch = NULL;
262 /* A temporary mapping, and a copy, of one frame of guest memory. */
263 unsigned long page[1024];
265 /* A copy of the pfn-to-mfn table frame list. */
266 unsigned long *live_pfn_to_mfn_frame_list;
267 unsigned long pfn_to_mfn_frame_list[1024];
269 /* Live mapping of the table mapping each PFN to its current MFN. */
270 unsigned long *live_pfn_to_mfn_table = NULL;
271 /* Live mapping of system MFN to PFN table. */
272 unsigned long *live_mfn_to_pfn_table = NULL;
274 /* Live mapping of shared info structure */
275 unsigned long *live_shinfo;
277 /* base of the region in which domain memory is mapped */
278 unsigned char *region_base = NULL;
280 /* A temporary mapping, and a copy, of the guest's suspend record. */
281 suspend_record_t *p_srec;
283 /* number of pages we're dealing with */
284 unsigned long nr_pfns;
286 /* power of 2 order of nr_pfns */
287 int order_nr;
289 /* bitmap of pages:
290 - that should be sent this iteration (unless later marked as skip);
291 - to skip this iteration because already dirty;
292 - to fixup by sending at the end if not already resent; */
293 unsigned long *to_send, *to_skip, *to_fix;
295 xc_shadow_control_stats_t stats;
297 int needed_to_fix = 0;
298 int total_sent = 0;
300 if (mlock(&ctxt, sizeof(ctxt))) {
301 xcio_perror(ioctxt, "Unable to mlock ctxt");
302 return 1;
303 }
305 /* Ensure that the domain exists, and that it is stopped. */
306 if ( xc_domain_pause(xc_handle, domid) ){
307 xcio_perror(ioctxt, "Could not pause domain");
308 goto out;
309 }
311 if ( xc_domain_getfullinfo( xc_handle, domid, &op, &ctxt) )
312 {
313 xcio_error(ioctxt, "Could not get full domain info");
314 goto out;
315 }
316 memcpy(name, op.u.getdomaininfo.name, sizeof(name));
317 shared_info_frame = op.u.getdomaininfo.shared_info_frame;
319 /* A cheesy test to see whether the domain contains valid state. */
320 if ( ctxt.pt_base == 0 ){
321 xcio_error(ioctxt, "Domain is not in a valid Linux guest OS state");
322 goto out;
323 }
325 /* Map the suspend-record MFN to pin it. The page must be owned by
326 domid for this to succeed. */
327 p_srec = mfn_mapper_map_single(xc_handle, domid,
328 sizeof(*p_srec), PROT_READ,
329 ctxt.cpu_ctxt.esi);
330 if (!p_srec){
331 xcio_error(ioctxt, "Couldn't map state record");
332 goto out;
333 }
335 nr_pfns = p_srec->nr_pfns;
337 /* cheesy sanity check */
338 if ( nr_pfns > 1024*1024 ){
339 xcio_error(ioctxt, "Invalid state record -- pfn count out of range: %lu", nr_pfns);
340 goto out;
341 }
343 /* the pfn_to_mfn_frame_list fits in a single page */
344 live_pfn_to_mfn_frame_list =
345 mfn_mapper_map_single(xc_handle, domid,
346 PAGE_SIZE, PROT_READ,
347 p_srec->pfn_to_mfn_frame_list );
349 if (!live_pfn_to_mfn_frame_list){
350 xcio_error(ioctxt, "Couldn't map pfn_to_mfn_frame_list");
351 goto out;
352 }
354 /* Track the mfn_to_pfn table down from the domains PT */
355 {
356 unsigned long *pgd;
357 unsigned long mfn_to_pfn_table_start_mfn;
359 pgd = mfn_mapper_map_single(xc_handle, domid,
360 PAGE_SIZE, PROT_READ,
361 ctxt.pt_base>>PAGE_SHIFT);
363 mfn_to_pfn_table_start_mfn =
364 pgd[HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT]>>PAGE_SHIFT;
366 live_mfn_to_pfn_table =
367 mfn_mapper_map_single(xc_handle, ~0UL,
368 PAGE_SIZE*1024, PROT_READ,
369 mfn_to_pfn_table_start_mfn );
370 }
372 /* Map all the frames of the pfn->mfn table. For migrate to succeed,
373 the guest must not change which frames are used for this purpose.
374 (its not clear why it would want to change them, and we'll be OK
375 from a safety POV anyhow. */
377 live_pfn_to_mfn_table = mfn_mapper_map_batch(xc_handle, domid,
378 PROT_READ,
379 live_pfn_to_mfn_frame_list,
380 (nr_pfns+1023)/1024 );
381 if( !live_pfn_to_mfn_table ){
382 xcio_perror(ioctxt, "Couldn't map pfn_to_mfn table");
383 goto out;
384 }
387 /* Canonicalise the pfn-to-mfn table frame-number list. */
388 memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
389 for ( i = 0; i < nr_pfns; i += 1024 ){
390 if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ){
391 xcio_error(ioctxt, "Frame # in pfn-to-mfn frame list is not in pseudophys");
392 goto out;
393 }
394 }
396 /* At this point, we can start the domain again if we're doing a
397 live suspend */
399 if( live ){
400 if ( xc_shadow_control( xc_handle, domid,
401 DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
402 NULL, 0, NULL ) < 0 ) {
403 xcio_error(ioctxt, "Couldn't enable shadow mode");
404 goto out;
405 }
407 if ( xc_domain_unpause(xc_handle, domid) < 0 ){
408 xcio_error(ioctxt, "Couldn't unpause domain");
409 goto out;
410 }
412 last_iter = 0;
413 sent_last_iter = 1<<20; /* 4GB of pages */
414 } else{
415 last_iter = 1;
416 }
418 /* calculate the power of 2 order of nr_pfns, e.g.
419 15->4 16->4 17->5 */
420 for( i=nr_pfns-1, order_nr=0; i ; i>>=1, order_nr++ );
422 /* Setup to_send bitmap */
423 {
424 int sz = (nr_pfns/8) + 8; /* includes slop at end of array */
426 to_send = malloc( sz );
427 to_fix = calloc( 1, sz );
428 to_skip = malloc( sz );
430 if (!to_send || !to_fix || !to_skip){
431 xcio_error(ioctxt, "Couldn't allocate to_send array");
432 goto out;
433 }
435 memset( to_send, 0xff, sz );
437 if ( mlock( to_send, sz ) ){
438 xcio_perror(ioctxt, "Unable to mlock to_send");
439 return 1;
440 }
442 /* (to fix is local only) */
444 if ( mlock( to_skip, sz ) ){
445 xcio_perror(ioctxt, "Unable to mlock to_skip");
446 return 1;
447 }
449 }
451 analysis_phase( xc_handle, domid, nr_pfns, to_skip );
453 /* We want zeroed memory so use calloc rather than malloc. */
454 pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
455 pfn_batch = calloc(BATCH_SIZE, sizeof(unsigned long));
457 if ( (pfn_type == NULL) || (pfn_batch == NULL) ){
458 errno = ENOMEM;
459 goto out;
460 }
462 if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) ){
463 xcio_error(ioctxt, "Unable to mlock");
464 goto out;
465 }
468 /*
469 * Quick belt and braces sanity check.
470 */
471 #if DEBUG
472 for ( i = 0; i < nr_pfns; i++ ){
473 mfn = live_pfn_to_mfn_table[i];
475 if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0x80000004) )
476 printf("i=0x%x mfn=%x live_mfn_to_pfn_table=%x\n",
477 i,mfn,live_mfn_to_pfn_table[mfn]);
478 }
479 #endif
481 /* Map the shared info frame */
482 live_shinfo = mfn_mapper_map_single(xc_handle, domid,
483 PAGE_SIZE, PROT_READ,
484 shared_info_frame);
486 if (!live_shinfo){
487 xcio_error(ioctxt, "Couldn't map live_shinfo");
488 goto out;
489 }
491 /* Start writing out the saved-domain record. */
493 if ( xcio_write(ioctxt, "LinuxGuestRecord", 16) ||
494 xcio_write(ioctxt, name, sizeof(name)) ||
495 xcio_write(ioctxt, &nr_pfns, sizeof(unsigned long)) ||
496 xcio_write(ioctxt, pfn_to_mfn_frame_list, PAGE_SIZE) ){
497 xcio_error(ioctxt, "Error writing header");
498 goto out;
499 }
500 if(write_vmconfig(ioctxt)){
501 xcio_error(ioctxt, "Error writing vmconfig");
502 goto out;
503 }
505 print_stats( xc_handle, domid, 0, &stats, 0 );
507 /* Now write out each data page, canonicalising page tables as we go... */
509 while(1){
510 unsigned int prev_pc, sent_this_iter, N, batch;
512 iter++;
513 sent_this_iter = 0;
514 skip_this_iter = 0;
515 prev_pc = 0;
516 N=0;
518 xcio_info(ioctxt, "Saving memory pages: iter %d 0%%", iter);
520 while( N < nr_pfns ){
521 unsigned int this_pc = (N * 100) / nr_pfns;
523 if ( (this_pc - prev_pc) >= 5 ){
524 xcio_info(ioctxt, "\b\b\b\b%3d%%", this_pc);
525 prev_pc = this_pc;
526 }
528 /* slightly wasteful to peek the whole array evey time,
529 but this is fast enough for the moment. */
531 if ( !last_iter &&
532 xc_shadow_control(xc_handle, domid,
533 DOM0_SHADOW_CONTROL_OP_PEEK,
534 to_skip, nr_pfns, NULL) != nr_pfns ) {
535 xcio_error(ioctxt, "Error peeking shadow bitmap");
536 goto out;
537 }
540 /* load pfn_type[] with the mfn of all the pages we're doing in
541 this batch. */
543 for ( batch = 0; batch < BATCH_SIZE && N < nr_pfns ; N++ )
544 {
545 int n = permute(N, nr_pfns, order_nr );
547 if ( 0 && debug ) {
548 fprintf(stderr,"%d pfn= %08lx mfn= %08lx %d "
549 " [mfn]= %08lx\n",
550 iter, (unsigned long)n, live_pfn_to_mfn_table[n],
551 test_bit(n,to_send),
552 live_mfn_to_pfn_table[live_pfn_to_mfn_table[n]&
553 0xFFFFF]);
554 }
556 if ( !last_iter &&
557 test_bit(n, to_send) &&
558 test_bit(n, to_skip) ) {
559 skip_this_iter++; /* stats keeping */
560 }
562 if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
563 (test_bit(n, to_send) && last_iter) ||
564 (test_bit(n, to_fix) && last_iter)) ) {
565 continue;
566 }
568 /* we get here if:
569 1. page is marked to_send & hasn't already been re-dirtied
570 2. (ignore to_skip in last iteration)
571 3. add in pages that still need fixup (net bufs)
572 */
574 pfn_batch[batch] = n;
575 pfn_type[batch] = live_pfn_to_mfn_table[n];
577 if( pfn_type[batch] == 0x80000004 ){
578 /* not currently in pusedo-physical map -- set bit
579 in to_fix that we must send this page in last_iter
580 unless its sent sooner anyhow */
582 set_bit( n, to_fix );
583 if( iter>1 )
584 DDPRINTF("netbuf race: iter %d, pfn %lx. mfn %lx\n",
585 iter,n,pfn_type[batch]);
586 continue;
587 }
589 if ( last_iter &&
590 test_bit(n, to_fix) &&
591 !test_bit(n, to_send) )
592 {
593 needed_to_fix++;
594 DPRINTF("Fix! iter %d, pfn %lx. mfn %lx\n",
595 iter,n,pfn_type[batch]);
596 }
598 clear_bit(n, to_fix);
600 batch++;
601 }
603 DDPRINTF("batch %d:%d (n=%d)\n", iter, batch, n);
605 if ( batch == 0 )
606 goto skip; /* vanishingly unlikely... */
608 if ( (region_base = mfn_mapper_map_batch(xc_handle, domid,
609 PROT_READ,
610 pfn_type,
611 batch)) == 0 ){
612 xcio_perror(ioctxt, "map batch failed");
613 goto out;
614 }
616 if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) ){
617 xcio_error(ioctxt, "get_pfn_type_batch failed");
618 goto out;
619 }
621 for ( j = 0; j < batch; j++ ){
622 if ( (pfn_type[j] & LTAB_MASK) == XTAB ){
623 DDPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
624 continue;
625 }
627 if ( 0 && debug )
628 fprintf(stderr, "%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
629 " sum= %08lx\n",
630 iter,
631 (pfn_type[j] & LTAB_MASK) | pfn_batch[j],
632 pfn_type[j],
633 live_mfn_to_pfn_table[pfn_type[j]&(~LTAB_MASK)],
634 csum_page(region_base + (PAGE_SIZE*j)));
636 /* canonicalise mfn->pfn */
637 pfn_type[j] = (pfn_type[j] & LTAB_MASK) | pfn_batch[j];
638 }
640 if ( xcio_write(ioctxt, &batch, sizeof(int) ) ){
641 xcio_error(ioctxt, "Error when writing to state file (2)");
642 goto out;
643 }
645 if ( xcio_write(ioctxt, pfn_type, sizeof(unsigned long)*j ) ){
646 xcio_error(ioctxt, "Error when writing to state file (3)");
647 goto out;
648 }
650 /* entering this loop, pfn_type is now in pfns (Not mfns) */
651 for( j = 0; j < batch; j++ ){
652 /* write out pages in batch */
653 if( (pfn_type[j] & LTAB_MASK) == XTAB){
654 DDPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
655 continue;
656 }
658 if ( ((pfn_type[j] & LTAB_MASK) == L1TAB) ||
659 ((pfn_type[j] & LTAB_MASK) == L2TAB) ){
660 memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
662 for ( k = 0;
663 k < (((pfn_type[j] & LTAB_MASK) == L2TAB) ?
664 (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) :
665 1024);
666 k++ ){
667 unsigned long pfn;
669 if ( !(page[k] & _PAGE_PRESENT) )
670 continue;
672 mfn = page[k] >> PAGE_SHIFT;
673 pfn = live_mfn_to_pfn_table[mfn];
675 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
676 {
677 /* I don't think this should ever happen */
678 printf("FNI %d : [%08lx,%d] pte=%08lx, "
679 "mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
680 j, pfn_type[j], k,
681 page[k], mfn, live_mfn_to_pfn_table[mfn],
682 (live_mfn_to_pfn_table[mfn]<nr_pfns)?
683 live_pfn_to_mfn_table[
684 live_mfn_to_pfn_table[mfn]] :
685 0xdeadbeef);
687 pfn = 0; /* be suspicious */
688 }
690 page[k] &= PAGE_SIZE - 1;
691 page[k] |= pfn << PAGE_SHIFT;
693 #if 0
694 printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx "
695 "xpfn=%d\n",
696 pfn_type[j]>>28,
697 j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
698 #endif
700 } /* end of page table rewrite for loop */
702 if ( xcio_write(ioctxt, page, PAGE_SIZE) ){
703 xcio_error(ioctxt, "Error when writing to state file (4)");
704 goto out;
705 }
707 } /* end of it's a PT page */ else { /* normal page */
709 if ( xcio_write(ioctxt, region_base + (PAGE_SIZE*j),
710 PAGE_SIZE) ){
711 xcio_error(ioctxt, "Error when writing to state file (5)");
712 goto out;
713 }
714 }
715 } /* end of the write out for this batch */
717 sent_this_iter += batch;
719 } /* end of this while loop for this iteration */
721 munmap(region_base, batch*PAGE_SIZE);
723 skip:
725 total_sent += sent_this_iter;
727 xcio_info(ioctxt, "\r %d: sent %d, skipped %d, ",
728 iter, sent_this_iter, skip_this_iter );
730 if ( last_iter ) {
731 print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
733 xcio_info(ioctxt, "Total pages sent= %d (%.2fx)\n",
734 total_sent, ((float)total_sent)/nr_pfns );
735 xcio_info(ioctxt, "(of which %d were fixups)\n", needed_to_fix );
736 }
738 if (last_iter && debug){
739 int minusone = -1;
740 memset( to_send, 0xff, (nr_pfns+8)/8 );
741 debug = 0;
742 printf("Entering debug resend-all mode\n");
744 /* send "-1" to put receiver into debug mode */
745 if ( xcio_write(ioctxt, &minusone, sizeof(int)) )
746 {
747 xcio_error(ioctxt, "Error when writing to state file (6)");
748 goto out;
749 }
751 continue;
752 }
754 if ( last_iter ) break;
756 if ( live )
757 {
758 if (
759 /* ( sent_this_iter > (sent_last_iter * 0.95) ) || */
760 (iter >= max_iters) ||
761 (sent_this_iter+skip_this_iter < 50) ||
762 (total_sent > nr_pfns*max_factor) )
763 {
764 DPRINTF("Start last iteration\n");
765 last_iter = 1;
767 xc_domain_pause( xc_handle, domid );
768 }
770 if ( xc_shadow_control( xc_handle, domid,
771 DOM0_SHADOW_CONTROL_OP_CLEAN2,
772 to_send, nr_pfns, &stats ) != nr_pfns )
773 {
774 xcio_error(ioctxt, "Error flushing shadow PT");
775 goto out;
776 }
778 sent_last_iter = sent_this_iter;
780 print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
782 }
785 } /* end of while 1 */
787 DPRINTF("All memory is saved\n");
789 /* Success! */
790 rc = 0;
792 /* Zero terminate */
793 if ( xcio_write(ioctxt, &rc, sizeof(int)) )
794 {
795 xcio_error(ioctxt, "Error when writing to state file (6)");
796 goto out;
797 }
799 /* Get the final execution context */
800 if ( xc_domain_getfullinfo( xc_handle, domid, &op, &ctxt) )
801 {
802 xcio_perror(ioctxt, "Could not get full domain info");
803 goto out;
804 }
806 /* Canonicalise the suspend-record frame number. */
807 if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) ){
808 xcio_error(ioctxt, "State record is not in range of pseudophys map");
809 goto out;
810 }
812 /* Canonicalise each GDT frame number. */
813 for ( i = 0; i < ctxt.gdt_ents; i += 512 ) {
814 if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) {
815 xcio_error(ioctxt, "GDT frame is not in range of pseudophys map");
816 goto out;
817 }
818 }
820 /* Canonicalise the page table base pointer. */
821 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) ) {
822 xcio_error(ioctxt, "PT base is not in range of pseudophys map");
823 goto out;
824 }
825 ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] <<
826 PAGE_SHIFT;
828 if ( xcio_write(ioctxt, &ctxt, sizeof(ctxt)) ||
829 xcio_write(ioctxt, live_shinfo, PAGE_SIZE) ) {
830 xcio_error(ioctxt, "Error when writing to state file (1)");
831 goto out;
832 }
833 munmap(live_shinfo, PAGE_SIZE);
835 out:
836 if ( pfn_type != NULL ) free(pfn_type);
837 DPRINTF("Save exit rc=%d\n",rc);
838 return !!rc;
840 }