ia64/xen-unstable

view tools/xc/lib/xc_linux_save.c @ 1602:4d2fe50e4fc8

bitkeeper revision 1.1023.1.2 (40e03fb3Kn9LqtnC7_hTzWLj-CbQDg)

Merge.
author mjw@wray-m-3.hpl.hp.com
date Mon Jun 28 15:56:35 2004 +0000 (2004-06-28)
parents c5d25124f417
children 75dec051cc70
line source
1 /******************************************************************************
2 * xc_linux_save.c
3 *
4 * Save the state of a running Linux session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 */
9 #include <sys/time.h>
10 #include "xc_private.h"
11 #include <asm-xen/suspend.h>
13 #define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */
15 #define DEBUG 0
16 #define DDEBUG 0
18 #if DEBUG
19 #define DPRINTF(_f, _a...) printf ( _f , ## _a )
20 #else
21 #define DPRINTF(_f, _a...) ((void)0)
22 #endif
24 #if DDEBUG
25 #define DDPRINTF(_f, _a...) printf ( _f , ## _a )
26 #else
27 #define DDPRINTF(_f, _a...) ((void)0)
28 #endif
30 /*
31 * Returns TRUE if the given machine frame number has a unique mapping
32 * in the guest's pseudophysical map.
33 * 0x80000000-3 mark the shared_info, and blk/net rings
34 */
35 #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
36 (((_mfn) < (1024*1024)) && \
37 (((live_mfn_to_pfn_table[_mfn] < nr_pfns) && \
38 (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn))) || \
39 ((live_mfn_to_pfn_table[_mfn] >= 0x80000000) && \
40 (live_mfn_to_pfn_table[_mfn] <= 0x80000003)) || \
41 (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == 0x80000004)))
43 /* Returns TRUE if MFN is successfully converted to a PFN. */
44 #define translate_mfn_to_pfn(_pmfn) \
45 ({ \
46 unsigned long mfn = *(_pmfn); \
47 int _res = 1; \
48 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \
49 _res = 0; \
50 else \
51 *(_pmfn) = live_mfn_to_pfn_table[mfn]; \
52 _res; \
53 })
55 static inline int test_bit ( int nr, volatile void * addr)
56 {
57 return (((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >>
58 (nr % (sizeof(unsigned long)*8))) & 1;
59 }
61 static inline void clear_bit ( int nr, volatile void * addr)
62 {
63 ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] &=
64 ~(1 << (nr % (sizeof(unsigned long)*8) ) );
65 }
67 static inline void set_bit ( int nr, volatile void * addr)
68 {
69 ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] |=
70 (1 << (nr % (sizeof(unsigned long)*8) ) );
71 }
73 /* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
74 static inline unsigned int hweight32(unsigned int w)
75 {
76 unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
77 res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
78 res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
79 res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
80 return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
81 }
83 static inline int count_bits ( int nr, volatile void *addr)
84 {
85 int i, count = 0;
86 unsigned long *p = (unsigned long *)addr;
87 /* We know that the array is padded to unsigned long. */
88 for(i=0;i<nr/(sizeof(unsigned long)*8);i++,p++)
89 count += hweight32( *p );
90 return count;
91 }
93 static inline int permute( int i, int nr, int order_nr )
94 {
95 /* Need a simple permutation function so that we scan pages in a
96 pseudo random order, enabling us to get a better estimate of
97 the domain's page dirtying rate as we go (there are often
98 contiguous ranges of pfns that have similar behaviour, and we
99 want to mix them up. */
101 /* e.g. nr->oder 15->4 16->4 17->5 */
102 /* 512MB domain, 128k pages, order 17 */
104 /*
105 QPONMLKJIHGFEDCBA
106 QPONMLKJIH
107 GFEDCBA
108 */
110 /*
111 QPONMLKJIHGFEDCBA
112 EDCBA
113 QPONM
114 LKJIHGF
115 */
117 do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
118 while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
120 return i;
121 }
123 static long long tv_to_us( struct timeval *new )
124 {
125 return (new->tv_sec * 1000000) + new->tv_usec;
126 }
128 static long long llgettimeofday()
129 {
130 struct timeval now;
131 gettimeofday(&now, NULL);
132 return tv_to_us(&now);
133 }
135 static long long tv_delta( struct timeval *new, struct timeval *old )
136 {
137 return ((new->tv_sec - old->tv_sec)*1000000 ) +
138 (new->tv_usec - old->tv_usec);
139 }
141 static int print_stats( int xc_handle, u32 domid,
142 int pages_sent, xc_shadow_control_stats_t *stats,
143 int print )
144 {
145 static struct timeval wall_last;
146 static long long d0_cpu_last;
147 static long long d1_cpu_last;
149 struct timeval wall_now;
150 long long wall_delta;
151 long long d0_cpu_now, d0_cpu_delta;
152 long long d1_cpu_now, d1_cpu_delta;
154 gettimeofday(&wall_now, NULL);
156 d0_cpu_now = xc_domain_get_cpu_usage( xc_handle, 0 )/1000;
157 d1_cpu_now = xc_domain_get_cpu_usage( xc_handle, domid )/1000;
159 if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
160 printf("ARRHHH!!\n");
162 wall_delta = tv_delta(&wall_now,&wall_last)/1000;
164 if ( wall_delta == 0 ) wall_delta = 1;
166 d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
167 d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
169 if ( print )
170 printf("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
171 "dirtied %dMb/s\n",
172 wall_delta,
173 (int)((d0_cpu_delta*100)/wall_delta),
174 (int)((d1_cpu_delta*100)/wall_delta),
175 (int)((pages_sent*PAGE_SIZE*8)/(wall_delta*1000)),
176 (int)((stats->dirty_count*PAGE_SIZE*8)/(wall_delta*1000)));
178 d0_cpu_last = d0_cpu_now;
179 d1_cpu_last = d1_cpu_now;
180 wall_last = wall_now;
182 return 0;
183 }
185 /** Write the vmconfig string.
186 * It is stored as a 4-byte count 'n' followed by n bytes.
187 *
188 * @param ioctxt i/o context
189 * @return 0 on success, non-zero on error.
190 */
191 static int write_vmconfig(XcIOContext *ioctxt){
192 int err = -1;
193 if(xcio_write(ioctxt, &ioctxt->vmconfig_n, sizeof(ioctxt->vmconfig_n))) goto exit;
194 if(xcio_write(ioctxt, ioctxt->vmconfig, ioctxt->vmconfig_n)) goto exit;
195 err = 0;
196 exit:
197 return err;
198 }
200 static int analysis_phase( int xc_handle, u32 domid,
201 int nr_pfns, unsigned long *arr )
202 {
203 long long start, now;
204 xc_shadow_control_stats_t stats;
206 start = llgettimeofday();
208 while ( 0 )
209 {
210 int i;
212 xc_shadow_control( xc_handle, domid,
213 DOM0_SHADOW_CONTROL_OP_CLEAN2,
214 arr, nr_pfns, NULL);
215 printf("#Flush\n");
216 for ( i = 0; i < 100; i++ )
217 {
218 usleep(10000);
219 now = llgettimeofday();
220 xc_shadow_control( xc_handle, domid,
221 DOM0_SHADOW_CONTROL_OP_PEEK,
222 NULL, 0, &stats);
224 printf("now= %lld faults= %ld dirty= %ld dirty_net= %ld "
225 "dirty_block= %ld\n",
226 ((now-start)+500)/1000,
227 stats.fault_count, stats.dirty_count,
228 stats.dirty_net_count, stats.dirty_block_count);
229 }
230 }
232 return -1;
233 }
235 int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
236 {
237 dom0_op_t op;
238 int rc = 1, i, j, k, last_iter, iter = 0;
239 unsigned long mfn;
240 u32 domid = ioctxt->domain;
241 int live = (ioctxt->flags & XCFLAGS_LIVE);
242 int debug = (ioctxt->flags & XCFLAGS_DEBUG);
243 int sent_last_iter, skip_this_iter;
245 /* Important tuning parameters */
246 int max_iters = 29; /* limit us to 30 times round loop */
247 int max_factor = 3; /* never send more than 3x nr_pfns */
249 /* The new domain's shared-info frame number. */
250 unsigned long shared_info_frame;
252 /* A copy of the CPU context of the guest. */
253 full_execution_context_t ctxt;
255 /* A copy of the domain's name. */
256 char name[MAX_DOMAIN_NAME];
258 /* A table containg the type of each PFN (/not/ MFN!). */
259 unsigned long *pfn_type = NULL;
260 unsigned long *pfn_batch = NULL;
262 /* A temporary mapping, and a copy, of one frame of guest memory. */
263 unsigned long page[1024];
265 /* A copy of the pfn-to-mfn table frame list. */
266 unsigned long *live_pfn_to_mfn_frame_list;
267 unsigned long pfn_to_mfn_frame_list[1024];
269 /* Live mapping of the table mapping each PFN to its current MFN. */
270 unsigned long *live_pfn_to_mfn_table = NULL;
271 /* Live mapping of system MFN to PFN table. */
272 unsigned long *live_mfn_to_pfn_table = NULL;
274 /* Live mapping of shared info structure */
275 unsigned long *live_shinfo;
277 /* base of the region in which domain memory is mapped */
278 unsigned char *region_base = NULL;
280 /* A temporary mapping, and a copy, of the guest's suspend record. */
281 suspend_record_t *p_srec;
283 /* number of pages we're dealing with */
284 unsigned long nr_pfns;
286 /* power of 2 order of nr_pfns */
287 int order_nr;
289 /* bitmap of pages:
290 - that should be sent this iteration (unless later marked as skip);
291 - to skip this iteration because already dirty;
292 - to fixup by sending at the end if not already resent; */
293 unsigned long *to_send, *to_skip, *to_fix;
295 xc_shadow_control_stats_t stats;
297 int needed_to_fix = 0;
298 int total_sent = 0;
300 if (mlock(&ctxt, sizeof(ctxt))) {
301 xcio_perror(ioctxt, "Unable to mlock ctxt");
302 return 1;
303 }
305 /* Ensure that the domain exists, and that it is stopped. */
306 if ( xc_domain_pause(xc_handle, domid) ){
307 xcio_perror(ioctxt, "Could not pause domain");
308 goto out;
309 }
311 if ( xc_domain_getfullinfo( xc_handle, domid, &op, &ctxt) )
312 {
313 PERROR("Could not get full domain info");
314 goto out;
315 }
316 memcpy(name, op.u.getdomaininfo.name, sizeof(name));
317 shared_info_frame = op.u.getdomaininfo.shared_info_frame;
319 /* A cheesy test to see whether the domain contains valid state. */
320 if ( ctxt.pt_base == 0 ){
321 xcio_error(ioctxt, "Domain is not in a valid Linux guest OS state");
322 goto out;
323 }
325 /* Map the suspend-record MFN to pin it. The page must be owned by
326 domid for this to succeed. */
327 p_srec = mfn_mapper_map_single(xc_handle, domid,
328 sizeof(*p_srec), PROT_READ,
329 ctxt.cpu_ctxt.esi);
330 if (!p_srec){
331 xcio_error(ioctxt, "Couldn't map state record");
332 goto out;
333 }
335 nr_pfns = p_srec->nr_pfns;
337 /* cheesy sanity check */
338 if ( nr_pfns > 1024*1024 ){
339 xcio_error(ioctxt, "Invalid state record -- pfn count out of range");
340 goto out;
341 }
343 /* the pfn_to_mfn_frame_list fits in a single page */
344 live_pfn_to_mfn_frame_list =
345 mfn_mapper_map_single(xc_handle, domid,
346 PAGE_SIZE, PROT_READ,
347 p_srec->pfn_to_mfn_frame_list );
349 if (!live_pfn_to_mfn_frame_list){
350 xcio_error(ioctxt, "Couldn't map pfn_to_mfn_frame_list");
351 goto out;
352 }
354 /* Track the mfn_to_pfn table down from the domains PT */
355 {
356 unsigned long *pgd;
357 unsigned long mfn_to_pfn_table_start_mfn;
359 pgd = mfn_mapper_map_single(xc_handle, domid,
360 PAGE_SIZE, PROT_READ,
361 ctxt.pt_base>>PAGE_SHIFT);
363 mfn_to_pfn_table_start_mfn =
364 pgd[HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT]>>PAGE_SHIFT;
366 live_mfn_to_pfn_table =
367 mfn_mapper_map_single(xc_handle, ~0UL,
368 PAGE_SIZE*1024, PROT_READ,
369 mfn_to_pfn_table_start_mfn );
370 }
372 /* Map all the frames of the pfn->mfn table. For migrate to succeed,
373 the guest must not change which frames are used for this purpose.
374 (its not clear why it would want to change them, and we'll be OK
375 from a safety POV anyhow. */
377 live_pfn_to_mfn_table = mfn_mapper_map_batch(xc_handle, domid,
378 PROT_READ,
379 live_pfn_to_mfn_frame_list,
380 (nr_pfns+1023)/1024 );
381 if( !live_pfn_to_mfn_table ){
382 xcio_perror(ioctxt, "Couldn't map pfn_to_mfn table");
383 goto out;
384 }
387 /* Canonicalise the pfn-to-mfn table frame-number list. */
388 memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
389 for ( i = 0; i < nr_pfns; i += 1024 ){
390 if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ){
391 xcio_error(ioctxt, "Frame # in pfn-to-mfn frame list is not in pseudophys");
392 goto out;
393 }
394 }
396 /* At this point, we can start the domain again if we're doing a
397 live suspend */
399 if( live ){
400 if ( xc_shadow_control( xc_handle, domid,
401 DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
402 NULL, 0, NULL ) < 0 )
403 xcio_error(ioctxt, "Couldn't enable shadow mode");
404 goto out;
405 }
407 if ( xc_domain_unpause(xc_handle, domid) < 0 ){
408 xcio_error(ioctxt, "Couldn't unpause domain");
409 goto out;
410 }
412 last_iter = 0;
413 sent_last_iter = 1<<20; /* 4GB of pages */
414 } else{
415 last_iter = 1;
416 }
418 /* calculate the power of 2 order of nr_pfns, e.g.
419 15->4 16->4 17->5 */
420 for( i=nr_pfns-1, order_nr=0; i ; i>>=1, order_nr++ );
422 /* Setup to_send bitmap */
423 {
424 int sz = (nr_pfns/8) + 8; /* includes slop at end of array */
426 to_send = malloc( sz );
427 to_fix = calloc( 1, sz );
428 to_skip = malloc( sz );
430 if (!to_send || !to_fix || !to_skip){
431 xcio_error(ioctxt, "Couldn't allocate to_send array");
432 goto out;
433 }
435 memset( to_send, 0xff, sz );
437 if ( mlock( to_send, sz ) ){
438 xcio_perror(ioctxt, "Unable to mlock to_send");
439 return 1;
440 }
442 /* (to fix is local only) */
444 if ( mlock( to_skip, sz ) ){
445 xcio_perror(ioctxt, "Unable to mlock to_skip");
446 return 1;
447 }
449 }
451 analysis_phase( xc_handle, domid, nr_pfns, to_skip );
453 /* We want zeroed memory so use calloc rather than malloc. */
454 pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
455 pfn_batch = calloc(BATCH_SIZE, sizeof(unsigned long));
457 if ( (pfn_type == NULL) || (pfn_batch == NULL) ){
458 errno = ENOMEM;
459 goto out;
460 }
462 if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) ){
463 xcio_error(ioctxt, "Unable to mlock");
464 goto out;
465 }
468 /*
469 * Quick belt and braces sanity check.
470 */
471 #if DEBUG
472 for ( i = 0; i < nr_pfns; i++ ){
473 mfn = live_pfn_to_mfn_table[i];
475 if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0x80000004) )
476 printf("i=0x%x mfn=%x live_mfn_to_pfn_table=%x\n",
477 i,mfn,live_mfn_to_pfn_table[mfn]);
478 }
479 #endif
481 /* Map the shared info frame */
482 live_shinfo = mfn_mapper_map_single(xc_handle, domid,
483 PAGE_SIZE, PROT_READ,
484 shared_info_frame);
486 if (!live_shinfo){
487 xcio_error(ioctxt, "Couldn't map live_shinfo");
488 goto out;
489 }
491 /* Start writing out the saved-domain record. */
493 if ( xcio_write(ioctxt, "LinuxGuestRecord", 16) ||
494 xcio_write(ioctxt, name, sizeof(name)) ||
495 xcio_write(ioctxt, &nr_pfns, sizeof(unsigned long)) ||
496 xcio_write(ioctxt, pfn_to_mfn_frame_list, PAGE_SIZE) ){
497 xcio_error(ioctxt, "Error writing header");
498 goto out;
499 }
500 if(write_vmconfig(ioctxt)){
501 xcio_error(ioctxt, "Error writing vmconfig");
502 goto out;
503 }
505 print_stats( xc_handle, domid, 0, &stats, 0 );
507 /* Now write out each data page, canonicalising page tables as we go... */
509 while(1){
510 unsigned int prev_pc, sent_this_iter, N, batch;
512 iter++;
513 sent_this_iter = 0;
514 skip_this_iter = 0;
515 prev_pc = 0;
516 N=0;
518 xcio_info(ioctxt, "Saving memory pages: iter %d 0%%", iter);
520 while( N < nr_pfns ){
521 unsigned int this_pc = (N * 100) / nr_pfns;
523 if ( (this_pc - prev_pc) >= 5 ){
524 xcio_info(ioctxt, "\b\b\b\b%3d%%", this_pc);
525 prev_pc = this_pc;
526 }
528 /* slightly wasteful to peek the whole array evey time,
529 but this is fast enough for the moment. */
531 if ( !last_iter &&
532 xc_shadow_control(xc_handle, domid,
533 DOM0_SHADOW_CONTROL_OP_PEEK,
534 to_skip, nr_pfns, NULL) != nr_pfns ) {
535 xcio_error(ioctxt, "Error peeking shadow bitmap");
536 goto out;
537 }
540 /* load pfn_type[] with the mfn of all the pages we're doing in
541 this batch. */
543 for ( batch = 0; batch < BATCH_SIZE && N < nr_pfns ; N++ )
544 {
545 int n = permute(N, nr_pfns, order_nr );
547 if ( 0 && debug )
548 fprintf(stderr,"%d pfn= %08lx mfn= %08lx %d "
549 " [mfn]= %08lx\n",
550 iter, n, live_pfn_to_mfn_table[n],
551 test_bit(n,to_send),
552 live_mfn_to_pfn_table[live_pfn_to_mfn_table[n]&
553 0xFFFFF]);
555 if ( !last_iter &&
556 test_bit(n, to_send) &&
557 test_bit(n, to_skip) )
558 skip_this_iter++; /* stats keeping */
559 }
561 if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
562 (test_bit(n, to_send) && last_iter) ||
563 (test_bit(n, to_fix) && last_iter)) )
564 continue;
565 }
567 /* we get here if:
568 1. page is marked to_send & hasn't already been re-dirtied
569 2. (ignore to_skip in last iteration)
570 3. add in pages that still need fixup (net bufs)
571 */
573 pfn_batch[batch] = n;
574 pfn_type[batch] = live_pfn_to_mfn_table[n];
576 if( pfn_type[batch] == 0x80000004 ){
577 /* not currently in pusedo-physical map -- set bit
578 in to_fix that we must send this page in last_iter
579 unless its sent sooner anyhow */
581 set_bit( n, to_fix );
582 if( iter>1 )
583 DDPRINTF("netbuf race: iter %d, pfn %lx. mfn %lx\n",
584 iter,n,pfn_type[batch]);
585 continue;
586 }
588 if ( last_iter &&
589 test_bit(n, to_fix) &&
590 !test_bit(n, to_send) )
591 {
592 needed_to_fix++;
593 DPRINTF("Fix! iter %d, pfn %lx. mfn %lx\n",
594 iter,n,pfn_type[batch]);
595 }
597 clear_bit(n, to_fix);
599 batch++;
600 }
602 DDPRINTF("batch %d:%d (n=%d)\n", iter, batch, n);
604 if ( batch == 0 )
605 goto skip; /* vanishingly unlikely... */
607 if ( (region_base = mfn_mapper_map_batch(xc_handle, domid,
608 PROT_READ,
609 pfn_type,
610 batch)) == 0 ){
611 xcio_perror(ioctxt, "map batch failed");
612 goto out;
613 }
615 if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) ){
616 xcio_error(ioctxt, "get_pfn_type_batch failed");
617 goto out;
618 }
620 for ( j = 0; j < batch; j++ ){
621 if ( (pfn_type[j] & LTAB_MASK) == XTAB ){
622 DDPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
623 continue;
624 }
626 if ( 0 && debug )
627 fprintf(stderr, "%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
628 " sum= %08lx\n",
629 iter,
630 (pfn_type[j] & LTAB_MASK) | pfn_batch[j],
631 pfn_type[j],
632 live_mfn_to_pfn_table[pfn_type[j]&(~LTAB_MASK)],
633 csum_page(region_base + (PAGE_SIZE*j)));
635 /* canonicalise mfn->pfn */
636 pfn_type[j] = (pfn_type[j] & LTAB_MASK) | pfn_batch[j];
637 }
639 if ( xcio_write(ioctxt, &batch, sizeof(int) ) ){
640 xcio_error(ioctxt, "Error when writing to state file (2)");
641 goto out;
642 }
644 if ( xcio_write(ioctxt, pfn_type, sizeof(unsigned long)*j ) ){
645 xcio_error(ioctxt, "Error when writing to state file (3)");
646 goto out;
647 }
649 /* entering this loop, pfn_type is now in pfns (Not mfns) */
650 for( j = 0; j < batch; j++ ){
651 /* write out pages in batch */
652 if( (pfn_type[j] & LTAB_MASK) == XTAB){
653 DDPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
654 continue;
655 }
657 if ( ((pfn_type[j] & LTAB_MASK) == L1TAB) ||
658 ((pfn_type[j] & LTAB_MASK) == L2TAB) ){
659 memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
661 for ( k = 0;
662 k < (((pfn_type[j] & LTAB_MASK) == L2TAB) ?
663 (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) :
664 1024);
665 k++ ){
666 unsigned long pfn;
668 if ( !(page[k] & _PAGE_PRESENT) )
669 continue;
671 mfn = page[k] >> PAGE_SHIFT;
672 pfn = live_mfn_to_pfn_table[mfn];
674 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
675 {
676 /* I don't think this should ever happen */
677 printf("FNI %d : [%08lx,%d] pte=%08lx, "
678 "mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
679 j, pfn_type[j], k,
680 page[k], mfn, live_mfn_to_pfn_table[mfn],
681 (live_mfn_to_pfn_table[mfn]<nr_pfns)?
682 live_pfn_to_mfn_table[
683 live_mfn_to_pfn_table[mfn]] :
684 0xdeadbeef);
686 pfn = 0; /* be suspicious */
687 }
689 page[k] &= PAGE_SIZE - 1;
690 page[k] |= pfn << PAGE_SHIFT;
692 #if 0
693 printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx "
694 "xpfn=%d\n",
695 pfn_type[j]>>28,
696 j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
697 #endif
699 } /* end of page table rewrite for loop */
701 if ( xcio_write(ioctxt, page, PAGE_SIZE) ){
702 xcio_error(ioctxt, "Error when writing to state file (4)");
703 goto out;
704 }
706 } /* end of it's a PT page */ else { /* normal page */
708 if ( xcio_write(ioctxt, region_base + (PAGE_SIZE*j),
709 PAGE_SIZE) ){
710 xcio_error(ioctxt, "Error when writing to state file (5)");
711 goto out;
712 }
713 }
714 } /* end of the write out for this batch */
716 sent_this_iter += batch;
718 } /* end of this while loop for this iteration */
720 munmap(region_base, batch*PAGE_SIZE);
722 skip:
724 total_sent += sent_this_iter;
726 xcio_info(ioctxt, "\r %d: sent %d, skipped %d, ",
727 iter, sent_this_iter, skip_this_iter );
729 if ( last_iter ) {
730 print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
732 xcio_info(ioctxt, "Total pages sent= %d (%.2fx)\n",
733 total_sent, ((float)total_sent)/nr_pfns );
734 xcio_info(ioctxt, "(of which %d were fixups)\n", needed_to_fix );
735 }
737 if (last_iter && debug){
738 int minusone = -1;
739 memset( to_send, 0xff, (nr_pfns+8)/8 );
740 debug = 0;
741 printf("Entering debug resend-all mode\n");
743 /* send "-1" to put receiver into debug mode */
744 if ( xcio_write(ioctxt, &minusone, sizeof(int)) )
745 {
746 xcio_error(ioctxt, "Error when writing to state file (6)");
747 goto out;
748 }
750 continue;
751 }
753 if ( last_iter ) break;
755 if ( live )
756 {
757 if (
758 /* ( sent_this_iter > (sent_last_iter * 0.95) ) || */
759 (iter >= max_iters) ||
760 (sent_this_iter+skip_this_iter < 50) ||
761 (total_sent > nr_pfns*max_factor) )
762 {
763 DPRINTF("Start last iteration\n");
764 last_iter = 1;
766 xc_domain_pause( xc_handle, domid );
767 }
769 if ( xc_shadow_control( xc_handle, domid,
770 DOM0_SHADOW_CONTROL_OP_CLEAN2,
771 to_send, nr_pfns, &stats ) != nr_pfns )
772 {
773 xcio_error(ioctxt, "Error flushing shadow PT");
774 goto out;
775 }
777 sent_last_iter = sent_this_iter;
779 print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
781 }
784 } /* end of while 1 */
786 DPRINTF("All memory is saved\n");
788 /* Success! */
789 rc = 0;
791 /* Zero terminate */
792 if ( xcio_write(ioctxt, &rc, sizeof(int)) )
793 {
794 xcio_error(ioctxt, "Error when writing to state file (6)");
795 goto out;
796 }
798 /* Get the final execution context */
799 if ( xc_domain_getfullinfo( xc_handle, domid, &op, &ctxt) )
800 {
801 xcio_perror(ioctxt, "Could not get full domain info");
802 goto out;
803 }
805 /* Canonicalise the suspend-record frame number. */
806 if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) ){
807 xcio_error(ioctxt, "State record is not in range of pseudophys map");
808 goto out;
809 }
811 /* Canonicalise each GDT frame number. */
812 for ( i = 0; i < ctxt.gdt_ents; i += 512 ) {
813 if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) {
814 xcio_error(ioctxt, "GDT frame is not in range of pseudophys map");
815 goto out;
816 }
817 }
819 /* Canonicalise the page table base pointer. */
820 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) ) {
821 xcio_error(ioctxt, "PT base is not in range of pseudophys map");
822 goto out;
823 }
824 ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] <<
825 PAGE_SHIFT;
827 if ( xcio_write(ioctxt, &ctxt, sizeof(ctxt)) ||
828 xcio_write(ioctxt, live_shinfo, PAGE_SIZE) ) {
829 xcio_error(ioctxt, "Error when writing to state file (1)");
830 goto out;
831 }
832 munmap(live_shinfo, PAGE_SIZE);
834 out:
835 if ( pfn_type != NULL ) free(pfn_type);
836 DPRINTF("Save exit rc=%d\n",rc);
837 return !!rc;
839 }