direct-io.hg

view tools/libxc/xc_linux_save.c @ 2693:2584528df9e1

bitkeeper revision 1.1159.123.2 (4177d169N58TtQXn_XJO4xNBKbMQUw)

Merge freefall.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into freefall.cl.cam.ac.uk:/local/scratch/kaf24/xeno
author kaf24@freefall.cl.cam.ac.uk
date Thu Oct 21 15:10:33 2004 +0000 (2004-10-21)
parents 0174982516f6 d8e27145f1eb
children 8aa9d487a8dd
line source
1 /******************************************************************************
2 * xc_linux_save.c
3 *
4 * Save the state of a running Linux session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 */
9 #include <sys/time.h>
10 #include "xc_private.h"
11 #include <asm-xen/suspend.h>
13 #define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */
15 #define DEBUG 0
16 #define DDEBUG 0
18 #if DEBUG
19 #define DPRINTF(_f, _a...) printf ( _f , ## _a )
20 #else
21 #define DPRINTF(_f, _a...) ((void)0)
22 #endif
24 #if DDEBUG
25 #define DDPRINTF(_f, _a...) printf ( _f , ## _a )
26 #else
27 #define DDPRINTF(_f, _a...) ((void)0)
28 #endif
30 /*
31 * Returns TRUE if the given machine frame number has a unique mapping
32 * in the guest's pseudophysical map.
33 */
35 #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
36 (((_mfn) < (1024*1024)) && \
37 ((live_mfn_to_pfn_table[_mfn] < nr_pfns) && \
38 (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn))))
41 /* Returns TRUE if MFN is successfully converted to a PFN. */
42 #define translate_mfn_to_pfn(_pmfn) \
43 ({ \
44 unsigned long mfn = *(_pmfn); \
45 int _res = 1; \
46 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \
47 _res = 0; \
48 else \
49 *(_pmfn) = live_mfn_to_pfn_table[mfn]; \
50 _res; \
51 })
53 #define is_mapped(pfn) (!((pfn) & 0x80000000UL))
55 static inline int test_bit ( int nr, volatile void * addr)
56 {
57 return (((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >>
58 (nr % (sizeof(unsigned long)*8))) & 1;
59 }
61 static inline void clear_bit ( int nr, volatile void * addr)
62 {
63 ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] &=
64 ~(1 << (nr % (sizeof(unsigned long)*8) ) );
65 }
67 static inline void set_bit ( int nr, volatile void * addr)
68 {
69 ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] |=
70 (1 << (nr % (sizeof(unsigned long)*8) ) );
71 }
73 /* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
74 static inline unsigned int hweight32(unsigned int w)
75 {
76 unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
77 res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
78 res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
79 res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
80 return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
81 }
83 static inline int count_bits ( int nr, volatile void *addr)
84 {
85 int i, count = 0;
86 unsigned long *p = (unsigned long *)addr;
87 /* We know that the array is padded to unsigned long. */
88 for(i=0;i<nr/(sizeof(unsigned long)*8);i++,p++)
89 count += hweight32( *p );
90 return count;
91 }
93 static inline int permute( int i, int nr, int order_nr )
94 {
95 /* Need a simple permutation function so that we scan pages in a
96 pseudo random order, enabling us to get a better estimate of
97 the domain's page dirtying rate as we go (there are often
98 contiguous ranges of pfns that have similar behaviour, and we
99 want to mix them up. */
101 /* e.g. nr->oder 15->4 16->4 17->5 */
102 /* 512MB domain, 128k pages, order 17 */
104 /*
105 QPONMLKJIHGFEDCBA
106 QPONMLKJIH
107 GFEDCBA
108 */
110 /*
111 QPONMLKJIHGFEDCBA
112 EDCBA
113 QPONM
114 LKJIHGF
115 */
117 do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
118 while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
120 return i;
121 }
123 static long long tv_to_us( struct timeval *new )
124 {
125 return (new->tv_sec * 1000000) + new->tv_usec;
126 }
128 static long long llgettimeofday()
129 {
130 struct timeval now;
131 gettimeofday(&now, NULL);
132 return tv_to_us(&now);
133 }
135 static long long tv_delta( struct timeval *new, struct timeval *old )
136 {
137 return ((new->tv_sec - old->tv_sec)*1000000 ) +
138 (new->tv_usec - old->tv_usec);
139 }
141 static int print_stats( int xc_handle, u32 domid,
142 int pages_sent, xc_shadow_control_stats_t *stats,
143 int print )
144 {
145 static struct timeval wall_last;
146 static long long d0_cpu_last;
147 static long long d1_cpu_last;
149 struct timeval wall_now;
150 long long wall_delta;
151 long long d0_cpu_now, d0_cpu_delta;
152 long long d1_cpu_now, d1_cpu_delta;
154 gettimeofday(&wall_now, NULL);
156 d0_cpu_now = xc_domain_get_cpu_usage( xc_handle, 0 )/1000;
157 d1_cpu_now = xc_domain_get_cpu_usage( xc_handle, domid )/1000;
159 if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
160 printf("ARRHHH!!\n");
162 wall_delta = tv_delta(&wall_now,&wall_last)/1000;
164 if ( wall_delta == 0 ) wall_delta = 1;
166 d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
167 d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
169 if ( print )
170 printf("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
171 "dirtied %dMb/s\n",
172 wall_delta,
173 (int)((d0_cpu_delta*100)/wall_delta),
174 (int)((d1_cpu_delta*100)/wall_delta),
175 (int)((pages_sent*PAGE_SIZE*8)/(wall_delta*1000)),
176 (int)((stats->dirty_count*PAGE_SIZE*8)/(wall_delta*1000)));
178 d0_cpu_last = d0_cpu_now;
179 d1_cpu_last = d1_cpu_now;
180 wall_last = wall_now;
182 return 0;
183 }
185 /** Write the vmconfig string.
186 * It is stored as a 4-byte count 'n' followed by n bytes.
187 *
188 * @param ioctxt i/o context
189 * @return 0 on success, non-zero on error.
190 */
191 static int write_vmconfig(XcIOContext *ioctxt){
192 int err = -1;
193 if(xcio_write(ioctxt, &ioctxt->vmconfig_n, sizeof(ioctxt->vmconfig_n))) goto exit;
194 if(xcio_write(ioctxt, ioctxt->vmconfig, ioctxt->vmconfig_n)) goto exit;
195 err = 0;
196 exit:
197 return err;
198 }
200 static int analysis_phase( int xc_handle, u32 domid,
201 int nr_pfns, unsigned long *arr )
202 {
203 long long start, now;
204 xc_shadow_control_stats_t stats;
206 start = llgettimeofday();
208 while ( 0 )
209 {
210 int i;
212 xc_shadow_control( xc_handle, domid,
213 DOM0_SHADOW_CONTROL_OP_CLEAN,
214 arr, nr_pfns, NULL);
215 printf("#Flush\n");
216 for ( i = 0; i < 100; i++ )
217 {
218 usleep(10000);
219 now = llgettimeofday();
220 xc_shadow_control( xc_handle, domid,
221 DOM0_SHADOW_CONTROL_OP_PEEK,
222 NULL, 0, &stats);
224 printf("now= %lld faults= %ld dirty= %ld dirty_net= %ld "
225 "dirty_block= %ld\n",
226 ((now-start)+500)/1000,
227 stats.fault_count, stats.dirty_count,
228 stats.dirty_net_count, stats.dirty_block_count);
229 }
230 }
232 return -1;
233 }
236 int suspend_and_state( int xc_handle, XcIOContext *ioctxt,
237 dom0_op_t *op,
238 full_execution_context_t *ctxt )
239 {
240 int i=0;
242 xcio_suspend_domain(ioctxt);
244 retry:
246 if ( xc_domain_getfullinfo( xc_handle, ioctxt->domain, op, ctxt) )
247 {
248 xcio_error(ioctxt, "Could not get full domain info");
249 return -1;
250 }
252 if ( (op->u.getdomaininfo.flags &
253 ( DOMFLAGS_SHUTDOWN | (SHUTDOWN_suspend<<DOMFLAGS_SHUTDOWNSHIFT) ))
254 == ( DOMFLAGS_SHUTDOWN | (SHUTDOWN_suspend<<DOMFLAGS_SHUTDOWNSHIFT) ))
255 {
256 return 0; // success
257 }
259 if ( op->u.getdomaininfo.flags & DOMFLAGS_PAUSED )
260 {
261 // try unpausing domain, wait, and retest
262 xc_domain_unpause( xc_handle, ioctxt->domain );
264 xcio_error(ioctxt, "Domain was paused. Wait and re-test. (%lx)",
265 op->u.getdomaininfo.flags);
266 usleep(10000); // 10ms
268 goto retry;
269 }
272 if( ++i < 100 )
273 {
274 xcio_error(ioctxt, "Retry suspend domain (%lx)",
275 op->u.getdomaininfo.flags);
276 usleep(10000); // 10ms
277 goto retry;
278 }
280 xcio_error(ioctxt, "Unable to suspend domain. (%lx)",
281 op->u.getdomaininfo.flags);
283 return -1;
284 }
286 int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
287 {
288 dom0_op_t op;
289 int rc = 1, i, j, k, last_iter, iter = 0;
290 unsigned long mfn;
291 u32 domid = ioctxt->domain;
292 int live = (ioctxt->flags & XCFLAGS_LIVE);
293 int debug = (ioctxt->flags & XCFLAGS_DEBUG);
294 int sent_last_iter, skip_this_iter;
296 /* Important tuning parameters */
297 int max_iters = 29; /* limit us to 30 times round loop */
298 int max_factor = 3; /* never send more than 3x nr_pfns */
300 /* The new domain's shared-info frame number. */
301 unsigned long shared_info_frame;
303 /* A copy of the CPU context of the guest. */
304 full_execution_context_t ctxt;
306 /* A table containg the type of each PFN (/not/ MFN!). */
307 unsigned long *pfn_type = NULL;
308 unsigned long *pfn_batch = NULL;
310 /* A temporary mapping, and a copy, of one frame of guest memory. */
311 unsigned long page[1024];
313 /* A copy of the pfn-to-mfn table frame list. */
314 unsigned long *live_pfn_to_mfn_frame_list = NULL;
315 unsigned long pfn_to_mfn_frame_list[1024];
317 /* Live mapping of the table mapping each PFN to its current MFN. */
318 unsigned long *live_pfn_to_mfn_table = NULL;
319 /* Live mapping of system MFN to PFN table. */
320 unsigned long *live_mfn_to_pfn_table = NULL;
321 unsigned long mfn_to_pfn_table_start_mfn;
323 /* Live mapping of shared info structure */
324 shared_info_t *live_shinfo = NULL;
326 /* base of the region in which domain memory is mapped */
327 unsigned char *region_base = NULL;
329 /* A temporary mapping, and a copy, of the guest's suspend record. */
330 suspend_record_t *p_srec = NULL;
332 /* number of pages we're dealing with */
333 unsigned long nr_pfns;
335 /* power of 2 order of nr_pfns */
336 int order_nr;
338 /* bitmap of pages:
339 - that should be sent this iteration (unless later marked as skip);
340 - to skip this iteration because already dirty;
341 - to fixup by sending at the end if not already resent; */
342 unsigned long *to_send, *to_skip, *to_fix;
344 xc_shadow_control_stats_t stats;
346 int needed_to_fix = 0;
347 int total_sent = 0;
349 if (mlock(&ctxt, sizeof(ctxt))) {
350 xcio_perror(ioctxt, "Unable to mlock ctxt");
351 return 1;
352 }
354 if ( xc_domain_getfullinfo( xc_handle, domid, &op, &ctxt) )
355 {
356 xcio_error(ioctxt, "Could not get full domain info");
357 goto out;
358 }
359 shared_info_frame = op.u.getdomaininfo.shared_info_frame;
361 /* A cheesy test to see whether the domain contains valid state. */
362 if ( ctxt.pt_base == 0 ){
363 xcio_error(ioctxt, "Domain is not in a valid Linux guest OS state");
364 goto out;
365 }
367 nr_pfns = op.u.getdomaininfo.max_pages;
369 /* cheesy sanity check */
370 if ( nr_pfns > 1024*1024 ){
371 xcio_error(ioctxt, "Invalid state record -- pfn count out of range: %lu", nr_pfns);
372 goto out;
373 }
376 /* Map the shared info frame */
377 live_shinfo = xc_map_foreign_range(xc_handle, domid,
378 PAGE_SIZE, PROT_READ,
379 shared_info_frame);
381 if (!live_shinfo){
382 xcio_error(ioctxt, "Couldn't map live_shinfo");
383 goto out;
384 }
386 /* the pfn_to_mfn_frame_list fits in a single page */
387 live_pfn_to_mfn_frame_list =
388 xc_map_foreign_range(xc_handle, domid,
389 PAGE_SIZE, PROT_READ,
390 live_shinfo->arch.pfn_to_mfn_frame_list );
392 if (!live_pfn_to_mfn_frame_list){
393 xcio_error(ioctxt, "Couldn't map pfn_to_mfn_frame_list");
394 goto out;
395 }
398 /* Map all the frames of the pfn->mfn table. For migrate to succeed,
399 the guest must not change which frames are used for this purpose.
400 (its not clear why it would want to change them, and we'll be OK
401 from a safety POV anyhow. */
403 live_pfn_to_mfn_table = xc_map_foreign_batch(xc_handle, domid,
404 PROT_READ,
405 live_pfn_to_mfn_frame_list,
406 (nr_pfns+1023)/1024 );
407 if( !live_pfn_to_mfn_table ){
408 xcio_perror(ioctxt, "Couldn't map pfn_to_mfn table");
409 goto out;
410 }
412 /* Setup the mfn_to_pfn table mapping */
413 mfn_to_pfn_table_start_mfn = xc_get_m2p_start_mfn( xc_handle );
415 live_mfn_to_pfn_table =
416 xc_map_foreign_range(xc_handle, DOMID_XEN,
417 PAGE_SIZE*1024, PROT_READ,
418 mfn_to_pfn_table_start_mfn );
420 /* Canonicalise the pfn-to-mfn table frame-number list. */
421 memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
423 for ( i = 0; i < nr_pfns; i += 1024 ){
424 if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ){
425 xcio_error(ioctxt, "Frame # in pfn-to-mfn frame list is not in pseudophys");
426 goto out;
427 }
428 }
431 /* Domain is still running at this point */
433 if( live )
434 {
435 if ( xc_shadow_control( xc_handle, domid,
436 DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
437 NULL, 0, NULL ) < 0 ) {
438 xcio_error(ioctxt, "Couldn't enable shadow mode");
439 goto out;
440 }
442 last_iter = 0;
443 sent_last_iter = 1<<20; /* 4GB of pages */
444 } else{
445 /* This is a non-live suspend. Issue the call back to get the
446 domain suspended */
448 last_iter = 1;
450 if ( suspend_and_state( xc_handle, ioctxt, &op, &ctxt) )
451 {
452 xcio_error(ioctxt, "Domain appears not to have suspended: %lx",
453 op.u.getdomaininfo.flags);
454 goto out;
455 }
457 }
459 /* calculate the power of 2 order of nr_pfns, e.g.
460 15->4 16->4 17->5 */
461 for( i=nr_pfns-1, order_nr=0; i ; i>>=1, order_nr++ );
463 /* Setup to_send bitmap */
464 {
465 /* size these for a maximal 4GB domain, to make interaction
466 with balloon driver easier. It's only user space memory,
467 ater all... (3x 128KB) */
469 int sz = ( 1<<20 ) / 8;
471 to_send = malloc( sz );
472 to_fix = calloc( 1, sz );
473 to_skip = malloc( sz );
475 if (!to_send || !to_fix || !to_skip){
476 xcio_error(ioctxt, "Couldn't allocate to_send array");
477 goto out;
478 }
480 memset( to_send, 0xff, sz );
482 if ( mlock( to_send, sz ) ){
483 xcio_perror(ioctxt, "Unable to mlock to_send");
484 return 1;
485 }
487 /* (to fix is local only) */
489 if ( mlock( to_skip, sz ) ){
490 xcio_perror(ioctxt, "Unable to mlock to_skip");
491 return 1;
492 }
494 }
496 analysis_phase( xc_handle, domid, nr_pfns, to_skip );
498 /* We want zeroed memory so use calloc rather than malloc. */
499 pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
500 pfn_batch = calloc(BATCH_SIZE, sizeof(unsigned long));
502 if ( (pfn_type == NULL) || (pfn_batch == NULL) ){
503 errno = ENOMEM;
504 goto out;
505 }
507 if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) ){
508 xcio_error(ioctxt, "Unable to mlock");
509 goto out;
510 }
513 /*
514 * Quick belt and braces sanity check.
515 */
516 #if DEBUG
517 {
518 int err=0;
519 for ( i = 0; i < nr_pfns; i++ )
520 {
521 mfn = live_pfn_to_mfn_table[i];
523 if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0xffffffffUL) )
524 {
525 printf("i=0x%x mfn=%lx live_mfn_to_pfn_table=%lx\n",
526 i,mfn,live_mfn_to_pfn_table[mfn]);
527 err++;
528 }
529 }
530 printf("Had %d unexplained entries in p2m table\n",err);
531 }
532 #endif
535 /* Start writing out the saved-domain record. */
537 if ( xcio_write(ioctxt, "LinuxGuestRecord", 16) ||
538 xcio_write(ioctxt, &nr_pfns, sizeof(unsigned long)) ||
539 xcio_write(ioctxt, pfn_to_mfn_frame_list, PAGE_SIZE) ){
540 xcio_error(ioctxt, "Error writing header");
541 goto out;
542 }
543 if(write_vmconfig(ioctxt)){
544 xcio_error(ioctxt, "Error writing vmconfig");
545 goto out;
546 }
548 print_stats( xc_handle, domid, 0, &stats, 0 );
550 /* Now write out each data page, canonicalising page tables as we go... */
552 while(1){
553 unsigned int prev_pc, sent_this_iter, N, batch;
555 iter++;
556 sent_this_iter = 0;
557 skip_this_iter = 0;
558 prev_pc = 0;
559 N=0;
561 xcio_info(ioctxt, "Saving memory pages: iter %d 0%%", iter);
563 while( N < nr_pfns ){
564 unsigned int this_pc = (N * 100) / nr_pfns;
566 if ( (this_pc - prev_pc) >= 5 ){
567 xcio_info(ioctxt, "\b\b\b\b%3d%%", this_pc);
568 prev_pc = this_pc;
569 }
571 /* slightly wasteful to peek the whole array evey time,
572 but this is fast enough for the moment. */
574 if ( !last_iter &&
575 xc_shadow_control(xc_handle, domid,
576 DOM0_SHADOW_CONTROL_OP_PEEK,
577 to_skip, nr_pfns, NULL) != nr_pfns )
578 {
579 xcio_error(ioctxt, "Error peeking shadow bitmap");
580 goto out;
581 }
584 /* load pfn_type[] with the mfn of all the pages we're doing in
585 this batch. */
587 for ( batch = 0; batch < BATCH_SIZE && N < nr_pfns ; N++ )
588 {
589 int n = permute(N, nr_pfns, order_nr );
591 if ( 0 && debug ) {
592 fprintf(stderr,"%d pfn= %08lx mfn= %08lx %d "
593 " [mfn]= %08lx\n",
594 iter, (unsigned long)n, live_pfn_to_mfn_table[n],
595 test_bit(n,to_send),
596 live_mfn_to_pfn_table[live_pfn_to_mfn_table[n]&
597 0xFFFFF]);
598 }
600 if ( !last_iter &&
601 test_bit(n, to_send) &&
602 test_bit(n, to_skip) ) {
603 skip_this_iter++; /* stats keeping */
604 }
606 if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
607 (test_bit(n, to_send) && last_iter) ||
608 (test_bit(n, to_fix) && last_iter)) ) {
609 continue;
610 }
612 /* we get here if:
613 1. page is marked to_send & hasn't already been re-dirtied
614 2. (ignore to_skip in last iteration)
615 3. add in pages that still need fixup (net bufs)
616 */
618 pfn_batch[batch] = n;
619 pfn_type[batch] = live_pfn_to_mfn_table[n];
621 if( ! is_mapped(pfn_type[batch]) )
622 {
623 /* not currently in pusedo-physical map -- set bit
624 in to_fix that we must send this page in last_iter
625 unless its sent sooner anyhow */
627 set_bit( n, to_fix );
628 if( iter>1 )
629 DDPRINTF("netbuf race: iter %d, pfn %x. mfn %lx\n",
630 iter,n,pfn_type[batch]);
631 continue;
632 }
634 if ( last_iter &&
635 test_bit(n, to_fix) &&
636 !test_bit(n, to_send) )
637 {
638 needed_to_fix++;
639 DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n",
640 iter,n,pfn_type[batch]);
641 }
643 clear_bit(n, to_fix);
645 batch++;
646 }
648 // DDPRINTF("batch %d:%d (n=%d)\n", iter, batch, n);
650 if ( batch == 0 )
651 goto skip; /* vanishingly unlikely... */
653 if ( (region_base = xc_map_foreign_batch(xc_handle, domid,
654 PROT_READ,
655 pfn_type,
656 batch)) == 0 ){
657 xcio_perror(ioctxt, "map batch failed");
658 goto out;
659 }
661 if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) ){
662 xcio_error(ioctxt, "get_pfn_type_batch failed");
663 goto out;
664 }
666 for ( j = 0; j < batch; j++ ){
667 if ( (pfn_type[j] & LTAB_MASK) == XTAB ){
668 DDPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
669 continue;
670 }
672 if ( 0 && debug )
673 fprintf(stderr, "%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
674 " sum= %08lx\n",
675 iter,
676 (pfn_type[j] & LTAB_MASK) | pfn_batch[j],
677 pfn_type[j],
678 live_mfn_to_pfn_table[pfn_type[j]&(~LTAB_MASK)],
679 csum_page(region_base + (PAGE_SIZE*j)));
681 /* canonicalise mfn->pfn */
682 pfn_type[j] = (pfn_type[j] & LTAB_MASK) | pfn_batch[j];
683 }
685 if ( xcio_write(ioctxt, &batch, sizeof(int) ) ){
686 xcio_error(ioctxt, "Error when writing to state file (2)");
687 goto out;
688 }
690 if ( xcio_write(ioctxt, pfn_type, sizeof(unsigned long)*j ) ){
691 xcio_error(ioctxt, "Error when writing to state file (3)");
692 goto out;
693 }
695 /* entering this loop, pfn_type is now in pfns (Not mfns) */
696 for( j = 0; j < batch; j++ ){
697 /* write out pages in batch */
698 if( (pfn_type[j] & LTAB_MASK) == XTAB){
699 DDPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
700 continue;
701 }
703 if ( ((pfn_type[j] & LTABTYPE_MASK) == L1TAB) ||
704 ((pfn_type[j] & LTABTYPE_MASK) == L2TAB) ){
705 memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
707 for ( k = 0;
708 k < (((pfn_type[j] & LTABTYPE_MASK) == L2TAB) ?
709 (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) :
710 1024);
711 k++ ){
712 unsigned long pfn;
714 if ( !(page[k] & _PAGE_PRESENT) )
715 continue;
717 mfn = page[k] >> PAGE_SHIFT;
718 pfn = live_mfn_to_pfn_table[mfn];
720 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
721 {
722 /* I don't think this should ever happen */
723 printf("FNI %d : [%08lx,%d] pte=%08lx, "
724 "mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
725 j, pfn_type[j], k,
726 page[k], mfn, live_mfn_to_pfn_table[mfn],
727 (live_mfn_to_pfn_table[mfn]<nr_pfns)?
728 live_pfn_to_mfn_table[
729 live_mfn_to_pfn_table[mfn]] :
730 0xdeadbeef);
732 pfn = 0; /* be suspicious */
733 }
735 page[k] &= PAGE_SIZE - 1;
736 page[k] |= pfn << PAGE_SHIFT;
738 #if 0
739 printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx "
740 "xpfn=%d\n",
741 pfn_type[j]>>28,
742 j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
743 #endif
745 } /* end of page table rewrite for loop */
747 if ( xcio_write(ioctxt, page, PAGE_SIZE) ){
748 xcio_error(ioctxt, "Error when writing to state file (4)");
749 goto out;
750 }
752 } /* end of it's a PT page */ else { /* normal page */
754 if ( xcio_write(ioctxt, region_base + (PAGE_SIZE*j),
755 PAGE_SIZE) ){
756 xcio_error(ioctxt, "Error when writing to state file (5)");
757 goto out;
758 }
759 }
760 } /* end of the write out for this batch */
762 sent_this_iter += batch;
764 } /* end of this while loop for this iteration */
766 munmap(region_base, batch*PAGE_SIZE);
768 skip:
770 total_sent += sent_this_iter;
772 xcio_info(ioctxt, "\r %d: sent %d, skipped %d, ",
773 iter, sent_this_iter, skip_this_iter );
775 if ( last_iter ) {
776 print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
778 xcio_info(ioctxt, "Total pages sent= %d (%.2fx)\n",
779 total_sent, ((float)total_sent)/nr_pfns );
780 xcio_info(ioctxt, "(of which %d were fixups)\n", needed_to_fix );
781 }
783 if (last_iter && debug){
784 int minusone = -1;
785 memset( to_send, 0xff, (nr_pfns+8)/8 );
786 debug = 0;
787 printf("Entering debug resend-all mode\n");
789 /* send "-1" to put receiver into debug mode */
790 if ( xcio_write(ioctxt, &minusone, sizeof(int)) )
791 {
792 xcio_error(ioctxt, "Error when writing to state file (6)");
793 goto out;
794 }
796 continue;
797 }
799 if ( last_iter ) break;
801 if ( live )
802 {
803 if (
804 /* ( sent_this_iter > (sent_last_iter * 0.95) ) || */
805 (iter >= max_iters) ||
806 (sent_this_iter+skip_this_iter < 50) ||
807 (total_sent > nr_pfns*max_factor) )
808 {
809 DPRINTF("Start last iteration\n");
810 last_iter = 1;
812 if ( suspend_and_state( xc_handle, ioctxt, &op, &ctxt) )
813 {
814 xcio_error(ioctxt, "Domain appears not to have suspended: %lx",
815 op.u.getdomaininfo.flags);
816 goto out;
817 }
819 printf("SUSPEND flags %08lx shinfo %08lx eip %08lx esi %08lx\n",
820 op.u.getdomaininfo.flags, op.u.getdomaininfo.shared_info_frame,
821 ctxt.cpu_ctxt.eip, ctxt.cpu_ctxt.esi );
824 }
826 if ( xc_shadow_control( xc_handle, domid,
827 DOM0_SHADOW_CONTROL_OP_CLEAN,
828 to_send, nr_pfns, &stats ) != nr_pfns )
829 {
830 xcio_error(ioctxt, "Error flushing shadow PT");
831 goto out;
832 }
834 sent_last_iter = sent_this_iter;
836 print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
838 }
841 } /* end of while 1 */
843 DPRINTF("All memory is saved\n");
845 /* Success! */
846 rc = 0;
848 /* Zero terminate */
849 if ( xcio_write(ioctxt, &rc, sizeof(int)) )
850 {
851 xcio_error(ioctxt, "Error when writing to state file (6)");
852 goto out;
853 }
855 /* Send through a list of all the PFNs that were not in map at the close */
856 {
857 unsigned int i,j;
858 unsigned int pfntab[1024];
860 for ( i = 0, j = 0; i < nr_pfns; i++ )
861 {
862 if ( ! is_mapped(live_pfn_to_mfn_table[i]) )
863 j++;
864 }
866 if ( xcio_write(ioctxt, &j, sizeof(unsigned int)) )
867 {
868 xcio_error(ioctxt, "Error when writing to state file (6a)");
869 goto out;
870 }
872 for ( i = 0, j = 0; i < nr_pfns; )
873 {
874 if ( ! is_mapped(live_pfn_to_mfn_table[i]) )
875 {
876 pfntab[j++] = i;
877 }
878 i++;
879 if ( j == 1024 || i == nr_pfns )
880 {
881 if ( xcio_write(ioctxt, &pfntab, sizeof(unsigned long)*j) )
882 {
883 xcio_error(ioctxt, "Error when writing to state file (6b)");
884 goto out;
885 }
886 j = 0;
887 }
888 }
889 }
891 /* Map the suspend-record MFN to pin it. The page must be owned by
892 domid for this to succeed. */
893 p_srec = xc_map_foreign_range(xc_handle, domid,
894 sizeof(*p_srec), PROT_READ,
895 ctxt.cpu_ctxt.esi);
896 if (!p_srec){
897 xcio_error(ioctxt, "Couldn't map suspend record");
898 goto out;
899 }
901 if (nr_pfns != p_srec->nr_pfns )
902 {
903 xcio_error(ioctxt, "Suspend record nr_pfns unexpected (%ld != %ld)",
904 p_srec->nr_pfns, nr_pfns);
905 goto out;
906 }
908 /* Canonicalise the suspend-record frame number. */
909 if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) ){
910 xcio_error(ioctxt, "Suspend record is not in range of pseudophys map");
911 goto out;
912 }
914 /* Canonicalise each GDT frame number. */
915 for ( i = 0; i < ctxt.gdt_ents; i += 512 ) {
916 if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) {
917 xcio_error(ioctxt, "GDT frame is not in range of pseudophys map");
918 goto out;
919 }
920 }
922 /* Canonicalise the page table base pointer. */
923 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) ) {
924 xcio_error(ioctxt, "PT base is not in range of pseudophys map");
925 goto out;
926 }
927 ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] <<
928 PAGE_SHIFT;
930 if ( xcio_write(ioctxt, &ctxt, sizeof(ctxt)) ||
931 xcio_write(ioctxt, live_shinfo, PAGE_SIZE) ) {
932 xcio_error(ioctxt, "Error when writing to state file (1)");
933 goto out;
934 }
936 out:
938 if ( live_shinfo ) munmap(live_shinfo, PAGE_SIZE);
939 if ( p_srec ) munmap(p_srec, sizeof(*p_srec));
940 if ( live_pfn_to_mfn_frame_list ) munmap(live_pfn_to_mfn_frame_list, PAGE_SIZE);
941 if ( live_pfn_to_mfn_table ) munmap(live_pfn_to_mfn_table, nr_pfns*4 );
942 if ( live_mfn_to_pfn_table ) munmap(live_mfn_to_pfn_table, PAGE_SIZE*1024 );
944 if ( pfn_type != NULL ) free(pfn_type);
945 DPRINTF("Save exit rc=%d\n",rc);
946 return !!rc;
948 }