ia64/xen-unstable

view tools/libxc/xc_linux_save.c @ 2422:2274a0386cc9

bitkeeper revision 1.1159.69.5 (4138e882jA1YaR_OfTfNHe_uT4PDIg)

trivial
author iap10@labyrinth.cl.cam.ac.uk
date Fri Sep 03 21:56:18 2004 +0000 (2004-09-03)
parents 7c3a3f3cf69b
children 6ceaf7d959a7
line source
1 /******************************************************************************
2 * xc_linux_save.c
3 *
4 * Save the state of a running Linux session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 */
9 #include <sys/time.h>
10 #include "xc_private.h"
11 #include <asm-xen/suspend.h>
13 #define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */
15 #define DEBUG 0
16 #define DDEBUG 0
18 #if DEBUG
19 #define DPRINTF(_f, _a...) printf ( _f , ## _a )
20 #else
21 #define DPRINTF(_f, _a...) ((void)0)
22 #endif
24 #if DDEBUG
25 #define DDPRINTF(_f, _a...) printf ( _f , ## _a )
26 #else
27 #define DDPRINTF(_f, _a...) ((void)0)
28 #endif
30 /*
31 * Returns TRUE if the given machine frame number has a unique mapping
32 * in the guest's pseudophysical map.
33 */
35 #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
36 (((_mfn) < (1024*1024)) && \
37 ((live_mfn_to_pfn_table[_mfn] < nr_pfns) && \
38 (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn))))
41 /* Returns TRUE if MFN is successfully converted to a PFN. */
42 #define translate_mfn_to_pfn(_pmfn) \
43 ({ \
44 unsigned long mfn = *(_pmfn); \
45 int _res = 1; \
46 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \
47 _res = 0; \
48 else \
49 *(_pmfn) = live_mfn_to_pfn_table[mfn]; \
50 _res; \
51 })
53 #define is_mapped(pfn) (!((pfn) & 0x80000000UL))
55 static inline int test_bit ( int nr, volatile void * addr)
56 {
57 return (((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >>
58 (nr % (sizeof(unsigned long)*8))) & 1;
59 }
61 static inline void clear_bit ( int nr, volatile void * addr)
62 {
63 ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] &=
64 ~(1 << (nr % (sizeof(unsigned long)*8) ) );
65 }
67 static inline void set_bit ( int nr, volatile void * addr)
68 {
69 ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] |=
70 (1 << (nr % (sizeof(unsigned long)*8) ) );
71 }
73 /* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
74 static inline unsigned int hweight32(unsigned int w)
75 {
76 unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
77 res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
78 res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
79 res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
80 return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
81 }
83 static inline int count_bits ( int nr, volatile void *addr)
84 {
85 int i, count = 0;
86 unsigned long *p = (unsigned long *)addr;
87 /* We know that the array is padded to unsigned long. */
88 for(i=0;i<nr/(sizeof(unsigned long)*8);i++,p++)
89 count += hweight32( *p );
90 return count;
91 }
93 static inline int permute( int i, int nr, int order_nr )
94 {
95 /* Need a simple permutation function so that we scan pages in a
96 pseudo random order, enabling us to get a better estimate of
97 the domain's page dirtying rate as we go (there are often
98 contiguous ranges of pfns that have similar behaviour, and we
99 want to mix them up. */
101 /* e.g. nr->oder 15->4 16->4 17->5 */
102 /* 512MB domain, 128k pages, order 17 */
104 /*
105 QPONMLKJIHGFEDCBA
106 QPONMLKJIH
107 GFEDCBA
108 */
110 /*
111 QPONMLKJIHGFEDCBA
112 EDCBA
113 QPONM
114 LKJIHGF
115 */
117 do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
118 while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
120 return i;
121 }
123 static long long tv_to_us( struct timeval *new )
124 {
125 return (new->tv_sec * 1000000) + new->tv_usec;
126 }
128 static long long llgettimeofday()
129 {
130 struct timeval now;
131 gettimeofday(&now, NULL);
132 return tv_to_us(&now);
133 }
135 static long long tv_delta( struct timeval *new, struct timeval *old )
136 {
137 return ((new->tv_sec - old->tv_sec)*1000000 ) +
138 (new->tv_usec - old->tv_usec);
139 }
141 static int print_stats( int xc_handle, u32 domid,
142 int pages_sent, xc_shadow_control_stats_t *stats,
143 int print )
144 {
145 static struct timeval wall_last;
146 static long long d0_cpu_last;
147 static long long d1_cpu_last;
149 struct timeval wall_now;
150 long long wall_delta;
151 long long d0_cpu_now, d0_cpu_delta;
152 long long d1_cpu_now, d1_cpu_delta;
154 gettimeofday(&wall_now, NULL);
156 d0_cpu_now = xc_domain_get_cpu_usage( xc_handle, 0 )/1000;
157 d1_cpu_now = xc_domain_get_cpu_usage( xc_handle, domid )/1000;
159 if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
160 printf("ARRHHH!!\n");
162 wall_delta = tv_delta(&wall_now,&wall_last)/1000;
164 if ( wall_delta == 0 ) wall_delta = 1;
166 d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
167 d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
169 if ( print )
170 printf("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
171 "dirtied %dMb/s\n",
172 wall_delta,
173 (int)((d0_cpu_delta*100)/wall_delta),
174 (int)((d1_cpu_delta*100)/wall_delta),
175 (int)((pages_sent*PAGE_SIZE*8)/(wall_delta*1000)),
176 (int)((stats->dirty_count*PAGE_SIZE*8)/(wall_delta*1000)));
178 d0_cpu_last = d0_cpu_now;
179 d1_cpu_last = d1_cpu_now;
180 wall_last = wall_now;
182 return 0;
183 }
185 /** Write the vmconfig string.
186 * It is stored as a 4-byte count 'n' followed by n bytes.
187 *
188 * @param ioctxt i/o context
189 * @return 0 on success, non-zero on error.
190 */
191 static int write_vmconfig(XcIOContext *ioctxt){
192 int err = -1;
193 if(xcio_write(ioctxt, &ioctxt->vmconfig_n, sizeof(ioctxt->vmconfig_n))) goto exit;
194 if(xcio_write(ioctxt, ioctxt->vmconfig, ioctxt->vmconfig_n)) goto exit;
195 err = 0;
196 exit:
197 return err;
198 }
200 static int analysis_phase( int xc_handle, u32 domid,
201 int nr_pfns, unsigned long *arr )
202 {
203 long long start, now;
204 xc_shadow_control_stats_t stats;
206 start = llgettimeofday();
208 while ( 0 )
209 {
210 int i;
212 xc_shadow_control( xc_handle, domid,
213 DOM0_SHADOW_CONTROL_OP_CLEAN2,
214 arr, nr_pfns, NULL);
215 printf("#Flush\n");
216 for ( i = 0; i < 100; i++ )
217 {
218 usleep(10000);
219 now = llgettimeofday();
220 xc_shadow_control( xc_handle, domid,
221 DOM0_SHADOW_CONTROL_OP_PEEK,
222 NULL, 0, &stats);
224 printf("now= %lld faults= %ld dirty= %ld dirty_net= %ld "
225 "dirty_block= %ld\n",
226 ((now-start)+500)/1000,
227 stats.fault_count, stats.dirty_count,
228 stats.dirty_net_count, stats.dirty_block_count);
229 }
230 }
232 return -1;
233 }
236 int suspend_and_state( int xc_handle, XcIOContext *ioctxt,
237 dom0_op_t *op,
238 full_execution_context_t *ctxt )
239 {
240 int i=0;
242 xcio_suspend_domain(ioctxt);
244 retry:
246 if ( xc_domain_getfullinfo( xc_handle, ioctxt->domain, op, ctxt) )
247 {
248 xcio_error(ioctxt, "Could not get full domain info");
249 return -1;
250 }
252 if ( (op->u.getdomaininfo.flags &
253 ( DOMFLAGS_SHUTDOWN | (SHUTDOWN_suspend<<DOMFLAGS_SHUTDOWNSHIFT) ))
254 == ( DOMFLAGS_SHUTDOWN | (SHUTDOWN_suspend<<DOMFLAGS_SHUTDOWNSHIFT) ))
255 {
256 return 0; // success
257 }
259 if ( op->u.getdomaininfo.flags & DOMFLAGS_PAUSED )
260 {
261 // try unpausing domain, wait, and retest
262 xc_domain_unpause( xc_handle, ioctxt->domain );
264 xcio_error(ioctxt, "Domain was paused. Wait and re-test. (%lx)",
265 op->u.getdomaininfo.flags);
266 usleep(10000); // 10ms
268 goto retry;
269 }
272 if( ++i < 3 )
273 {
274 usleep(10000); // 10ms
275 goto retry;
276 }
278 xcio_error(ioctxt, "Unable to suspend domain. (%lx)",
279 op->u.getdomaininfo.flags);
281 return -1;
282 }
284 int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
285 {
286 dom0_op_t op;
287 int rc = 1, i, j, k, last_iter, iter = 0;
288 unsigned long mfn;
289 u32 domid = ioctxt->domain;
290 int live = (ioctxt->flags & XCFLAGS_LIVE);
291 int debug = (ioctxt->flags & XCFLAGS_DEBUG);
292 int sent_last_iter, skip_this_iter;
294 /* Important tuning parameters */
295 int max_iters = 29; /* limit us to 30 times round loop */
296 int max_factor = 3; /* never send more than 3x nr_pfns */
298 /* The new domain's shared-info frame number. */
299 unsigned long shared_info_frame;
301 /* A copy of the CPU context of the guest. */
302 full_execution_context_t ctxt;
304 /* A copy of the domain's name. */
305 char name[MAX_DOMAIN_NAME];
307 /* A table containg the type of each PFN (/not/ MFN!). */
308 unsigned long *pfn_type = NULL;
309 unsigned long *pfn_batch = NULL;
311 /* A temporary mapping, and a copy, of one frame of guest memory. */
312 unsigned long page[1024];
314 /* A copy of the pfn-to-mfn table frame list. */
315 unsigned long *live_pfn_to_mfn_frame_list = NULL;
316 unsigned long pfn_to_mfn_frame_list[1024];
318 /* Live mapping of the table mapping each PFN to its current MFN. */
319 unsigned long *live_pfn_to_mfn_table = NULL;
320 /* Live mapping of system MFN to PFN table. */
321 unsigned long *live_mfn_to_pfn_table = NULL;
322 unsigned long mfn_to_pfn_table_start_mfn;
324 /* Live mapping of shared info structure */
325 shared_info_t *live_shinfo = NULL;
327 /* base of the region in which domain memory is mapped */
328 unsigned char *region_base = NULL;
330 /* A temporary mapping, and a copy, of the guest's suspend record. */
331 suspend_record_t *p_srec = NULL;
333 /* number of pages we're dealing with */
334 unsigned long nr_pfns;
336 /* power of 2 order of nr_pfns */
337 int order_nr;
339 /* bitmap of pages:
340 - that should be sent this iteration (unless later marked as skip);
341 - to skip this iteration because already dirty;
342 - to fixup by sending at the end if not already resent; */
343 unsigned long *to_send, *to_skip, *to_fix;
345 xc_shadow_control_stats_t stats;
347 int needed_to_fix = 0;
348 int total_sent = 0;
350 if (mlock(&ctxt, sizeof(ctxt))) {
351 xcio_perror(ioctxt, "Unable to mlock ctxt");
352 return 1;
353 }
355 if ( xc_domain_getfullinfo( xc_handle, domid, &op, &ctxt) )
356 {
357 xcio_error(ioctxt, "Could not get full domain info");
358 goto out;
359 }
360 memcpy(name, op.u.getdomaininfo.name, sizeof(name));
361 shared_info_frame = op.u.getdomaininfo.shared_info_frame;
363 /* A cheesy test to see whether the domain contains valid state. */
364 if ( ctxt.pt_base == 0 ){
365 xcio_error(ioctxt, "Domain is not in a valid Linux guest OS state");
366 goto out;
367 }
369 nr_pfns = op.u.getdomaininfo.max_pages;
371 /* cheesy sanity check */
372 if ( nr_pfns > 1024*1024 ){
373 xcio_error(ioctxt, "Invalid state record -- pfn count out of range: %lu", nr_pfns);
374 goto out;
375 }
378 /* Map the shared info frame */
379 live_shinfo = xc_map_foreign_range(xc_handle, domid,
380 PAGE_SIZE, PROT_READ,
381 shared_info_frame);
383 if (!live_shinfo){
384 xcio_error(ioctxt, "Couldn't map live_shinfo");
385 goto out;
386 }
388 /* the pfn_to_mfn_frame_list fits in a single page */
389 live_pfn_to_mfn_frame_list =
390 xc_map_foreign_range(xc_handle, domid,
391 PAGE_SIZE, PROT_READ,
392 live_shinfo->arch.pfn_to_mfn_frame_list );
394 if (!live_pfn_to_mfn_frame_list){
395 xcio_error(ioctxt, "Couldn't map pfn_to_mfn_frame_list");
396 goto out;
397 }
400 /* Map all the frames of the pfn->mfn table. For migrate to succeed,
401 the guest must not change which frames are used for this purpose.
402 (its not clear why it would want to change them, and we'll be OK
403 from a safety POV anyhow. */
405 live_pfn_to_mfn_table = xc_map_foreign_batch(xc_handle, domid,
406 PROT_READ,
407 live_pfn_to_mfn_frame_list,
408 (nr_pfns+1023)/1024 );
409 if( !live_pfn_to_mfn_table ){
410 xcio_perror(ioctxt, "Couldn't map pfn_to_mfn table");
411 goto out;
412 }
414 /* Setup the mfn_to_pfn table mapping */
415 mfn_to_pfn_table_start_mfn = xc_get_m2p_start_mfn( xc_handle );
417 live_mfn_to_pfn_table =
418 xc_map_foreign_range(xc_handle, DOMID_XEN,
419 PAGE_SIZE*1024, PROT_READ,
420 mfn_to_pfn_table_start_mfn );
422 /* Canonicalise the pfn-to-mfn table frame-number list. */
423 memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
425 for ( i = 0; i < nr_pfns; i += 1024 ){
426 if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ){
427 xcio_error(ioctxt, "Frame # in pfn-to-mfn frame list is not in pseudophys");
428 goto out;
429 }
430 }
433 /* Domain is still running at this point */
435 if( live )
436 {
437 if ( xc_shadow_control( xc_handle, domid,
438 DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
439 NULL, 0, NULL ) < 0 ) {
440 xcio_error(ioctxt, "Couldn't enable shadow mode");
441 goto out;
442 }
444 last_iter = 0;
445 sent_last_iter = 1<<20; /* 4GB of pages */
446 } else{
447 /* This is a non-live suspend. Issue the call back to get the
448 domain suspended */
450 last_iter = 1;
452 if ( suspend_and_state( xc_handle, ioctxt, &op, &ctxt) )
453 {
454 xcio_error(ioctxt, "Domain appears not to have suspended: %lx",
455 op.u.getdomaininfo.flags);
456 goto out;
457 }
459 }
461 /* calculate the power of 2 order of nr_pfns, e.g.
462 15->4 16->4 17->5 */
463 for( i=nr_pfns-1, order_nr=0; i ; i>>=1, order_nr++ );
465 /* Setup to_send bitmap */
466 {
467 /* size these for a maximal 4GB domain, to make interaction
468 with balloon driver easier. It's only user space memory,
469 ater all... (3x 128KB) */
471 int sz = ( 1<<20 ) / 8;
473 to_send = malloc( sz );
474 to_fix = calloc( 1, sz );
475 to_skip = malloc( sz );
477 if (!to_send || !to_fix || !to_skip){
478 xcio_error(ioctxt, "Couldn't allocate to_send array");
479 goto out;
480 }
482 memset( to_send, 0xff, sz );
484 if ( mlock( to_send, sz ) ){
485 xcio_perror(ioctxt, "Unable to mlock to_send");
486 return 1;
487 }
489 /* (to fix is local only) */
491 if ( mlock( to_skip, sz ) ){
492 xcio_perror(ioctxt, "Unable to mlock to_skip");
493 return 1;
494 }
496 }
498 analysis_phase( xc_handle, domid, nr_pfns, to_skip );
500 /* We want zeroed memory so use calloc rather than malloc. */
501 pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
502 pfn_batch = calloc(BATCH_SIZE, sizeof(unsigned long));
504 if ( (pfn_type == NULL) || (pfn_batch == NULL) ){
505 errno = ENOMEM;
506 goto out;
507 }
509 if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) ){
510 xcio_error(ioctxt, "Unable to mlock");
511 goto out;
512 }
515 /*
516 * Quick belt and braces sanity check.
517 */
518 #if DEBUG
519 {
520 int err=0;
521 for ( i = 0; i < nr_pfns; i++ )
522 {
523 mfn = live_pfn_to_mfn_table[i];
525 if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0xffffffffUL) )
526 {
527 printf("i=0x%x mfn=%lx live_mfn_to_pfn_table=%lx\n",
528 i,mfn,live_mfn_to_pfn_table[mfn]);
529 err++;
530 }
531 }
532 printf("Had %d unexplained entries in p2m table\n",err);
533 }
534 #endif
537 /* Start writing out the saved-domain record. */
539 if ( xcio_write(ioctxt, "LinuxGuestRecord", 16) ||
540 xcio_write(ioctxt, name, sizeof(name)) ||
541 xcio_write(ioctxt, &nr_pfns, sizeof(unsigned long)) ||
542 xcio_write(ioctxt, pfn_to_mfn_frame_list, PAGE_SIZE) ){
543 xcio_error(ioctxt, "Error writing header");
544 goto out;
545 }
546 if(write_vmconfig(ioctxt)){
547 xcio_error(ioctxt, "Error writing vmconfig");
548 goto out;
549 }
551 print_stats( xc_handle, domid, 0, &stats, 0 );
553 /* Now write out each data page, canonicalising page tables as we go... */
555 while(1){
556 unsigned int prev_pc, sent_this_iter, N, batch;
558 iter++;
559 sent_this_iter = 0;
560 skip_this_iter = 0;
561 prev_pc = 0;
562 N=0;
564 xcio_info(ioctxt, "Saving memory pages: iter %d 0%%", iter);
566 while( N < nr_pfns ){
567 unsigned int this_pc = (N * 100) / nr_pfns;
569 if ( (this_pc - prev_pc) >= 5 ){
570 xcio_info(ioctxt, "\b\b\b\b%3d%%", this_pc);
571 prev_pc = this_pc;
572 }
574 /* slightly wasteful to peek the whole array evey time,
575 but this is fast enough for the moment. */
577 if ( !last_iter &&
578 xc_shadow_control(xc_handle, domid,
579 DOM0_SHADOW_CONTROL_OP_PEEK,
580 to_skip, nr_pfns, NULL) != nr_pfns )
581 {
582 xcio_error(ioctxt, "Error peeking shadow bitmap");
583 goto out;
584 }
587 /* load pfn_type[] with the mfn of all the pages we're doing in
588 this batch. */
590 for ( batch = 0; batch < BATCH_SIZE && N < nr_pfns ; N++ )
591 {
592 int n = permute(N, nr_pfns, order_nr );
594 if ( 0 && debug ) {
595 fprintf(stderr,"%d pfn= %08lx mfn= %08lx %d "
596 " [mfn]= %08lx\n",
597 iter, (unsigned long)n, live_pfn_to_mfn_table[n],
598 test_bit(n,to_send),
599 live_mfn_to_pfn_table[live_pfn_to_mfn_table[n]&
600 0xFFFFF]);
601 }
603 if ( !last_iter &&
604 test_bit(n, to_send) &&
605 test_bit(n, to_skip) ) {
606 skip_this_iter++; /* stats keeping */
607 }
609 if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
610 (test_bit(n, to_send) && last_iter) ||
611 (test_bit(n, to_fix) && last_iter)) ) {
612 continue;
613 }
615 /* we get here if:
616 1. page is marked to_send & hasn't already been re-dirtied
617 2. (ignore to_skip in last iteration)
618 3. add in pages that still need fixup (net bufs)
619 */
621 pfn_batch[batch] = n;
622 pfn_type[batch] = live_pfn_to_mfn_table[n];
624 if( ! is_mapped(pfn_type[batch]) )
625 {
626 /* not currently in pusedo-physical map -- set bit
627 in to_fix that we must send this page in last_iter
628 unless its sent sooner anyhow */
630 set_bit( n, to_fix );
631 if( iter>1 )
632 DDPRINTF("netbuf race: iter %d, pfn %x. mfn %lx\n",
633 iter,n,pfn_type[batch]);
634 continue;
635 }
637 if ( last_iter &&
638 test_bit(n, to_fix) &&
639 !test_bit(n, to_send) )
640 {
641 needed_to_fix++;
642 DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n",
643 iter,n,pfn_type[batch]);
644 }
646 clear_bit(n, to_fix);
648 batch++;
649 }
651 // DDPRINTF("batch %d:%d (n=%d)\n", iter, batch, n);
653 if ( batch == 0 )
654 goto skip; /* vanishingly unlikely... */
656 if ( (region_base = xc_map_foreign_batch(xc_handle, domid,
657 PROT_READ,
658 pfn_type,
659 batch)) == 0 ){
660 xcio_perror(ioctxt, "map batch failed");
661 goto out;
662 }
664 if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) ){
665 xcio_error(ioctxt, "get_pfn_type_batch failed");
666 goto out;
667 }
669 for ( j = 0; j < batch; j++ ){
670 if ( (pfn_type[j] & LTAB_MASK) == XTAB ){
671 DDPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
672 continue;
673 }
675 if ( 0 && debug )
676 fprintf(stderr, "%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
677 " sum= %08lx\n",
678 iter,
679 (pfn_type[j] & LTAB_MASK) | pfn_batch[j],
680 pfn_type[j],
681 live_mfn_to_pfn_table[pfn_type[j]&(~LTAB_MASK)],
682 csum_page(region_base + (PAGE_SIZE*j)));
684 /* canonicalise mfn->pfn */
685 pfn_type[j] = (pfn_type[j] & LTAB_MASK) | pfn_batch[j];
686 }
688 if ( xcio_write(ioctxt, &batch, sizeof(int) ) ){
689 xcio_error(ioctxt, "Error when writing to state file (2)");
690 goto out;
691 }
693 if ( xcio_write(ioctxt, pfn_type, sizeof(unsigned long)*j ) ){
694 xcio_error(ioctxt, "Error when writing to state file (3)");
695 goto out;
696 }
698 /* entering this loop, pfn_type is now in pfns (Not mfns) */
699 for( j = 0; j < batch; j++ ){
700 /* write out pages in batch */
701 if( (pfn_type[j] & LTAB_MASK) == XTAB){
702 DDPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
703 continue;
704 }
706 if ( ((pfn_type[j] & LTABTYPE_MASK) == L1TAB) ||
707 ((pfn_type[j] & LTABTYPE_MASK) == L2TAB) ){
708 memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
710 for ( k = 0;
711 k < (((pfn_type[j] & LTABTYPE_MASK) == L2TAB) ?
712 (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) :
713 1024);
714 k++ ){
715 unsigned long pfn;
717 if ( !(page[k] & _PAGE_PRESENT) )
718 continue;
720 mfn = page[k] >> PAGE_SHIFT;
721 pfn = live_mfn_to_pfn_table[mfn];
723 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
724 {
725 /* I don't think this should ever happen */
726 printf("FNI %d : [%08lx,%d] pte=%08lx, "
727 "mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
728 j, pfn_type[j], k,
729 page[k], mfn, live_mfn_to_pfn_table[mfn],
730 (live_mfn_to_pfn_table[mfn]<nr_pfns)?
731 live_pfn_to_mfn_table[
732 live_mfn_to_pfn_table[mfn]] :
733 0xdeadbeef);
735 pfn = 0; /* be suspicious */
736 }
738 page[k] &= PAGE_SIZE - 1;
739 page[k] |= pfn << PAGE_SHIFT;
741 #if 0
742 printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx "
743 "xpfn=%d\n",
744 pfn_type[j]>>28,
745 j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
746 #endif
748 } /* end of page table rewrite for loop */
750 if ( xcio_write(ioctxt, page, PAGE_SIZE) ){
751 xcio_error(ioctxt, "Error when writing to state file (4)");
752 goto out;
753 }
755 } /* end of it's a PT page */ else { /* normal page */
757 if ( xcio_write(ioctxt, region_base + (PAGE_SIZE*j),
758 PAGE_SIZE) ){
759 xcio_error(ioctxt, "Error when writing to state file (5)");
760 goto out;
761 }
762 }
763 } /* end of the write out for this batch */
765 sent_this_iter += batch;
767 } /* end of this while loop for this iteration */
769 munmap(region_base, batch*PAGE_SIZE);
771 skip:
773 total_sent += sent_this_iter;
775 xcio_info(ioctxt, "\r %d: sent %d, skipped %d, ",
776 iter, sent_this_iter, skip_this_iter );
778 if ( last_iter ) {
779 print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
781 xcio_info(ioctxt, "Total pages sent= %d (%.2fx)\n",
782 total_sent, ((float)total_sent)/nr_pfns );
783 xcio_info(ioctxt, "(of which %d were fixups)\n", needed_to_fix );
784 }
786 if (last_iter && debug){
787 int minusone = -1;
788 memset( to_send, 0xff, (nr_pfns+8)/8 );
789 debug = 0;
790 printf("Entering debug resend-all mode\n");
792 /* send "-1" to put receiver into debug mode */
793 if ( xcio_write(ioctxt, &minusone, sizeof(int)) )
794 {
795 xcio_error(ioctxt, "Error when writing to state file (6)");
796 goto out;
797 }
799 continue;
800 }
802 if ( last_iter ) break;
804 if ( live )
805 {
806 if (
807 /* ( sent_this_iter > (sent_last_iter * 0.95) ) || */
808 (iter >= max_iters) ||
809 (sent_this_iter+skip_this_iter < 50) ||
810 (total_sent > nr_pfns*max_factor) )
811 {
812 DPRINTF("Start last iteration\n");
813 last_iter = 1;
815 if ( suspend_and_state( xc_handle, ioctxt, &op, &ctxt) )
816 {
817 xcio_error(ioctxt, "Domain appears not to have suspended: %lx",
818 op.u.getdomaininfo.flags);
819 goto out;
820 }
822 printf("SUSPEND flags %08lx shinfo %08lx eip %08lx esi %08lx\n",
823 op.u.getdomaininfo.flags, op.u.getdomaininfo.shared_info_frame,
824 ctxt.cpu_ctxt.eip, ctxt.cpu_ctxt.esi );
827 }
829 if ( xc_shadow_control( xc_handle, domid,
830 DOM0_SHADOW_CONTROL_OP_CLEAN2,
831 to_send, nr_pfns, &stats ) != nr_pfns )
832 {
833 xcio_error(ioctxt, "Error flushing shadow PT");
834 goto out;
835 }
837 sent_last_iter = sent_this_iter;
839 print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
841 }
844 } /* end of while 1 */
846 DPRINTF("All memory is saved\n");
848 /* Success! */
849 rc = 0;
851 /* Zero terminate */
852 if ( xcio_write(ioctxt, &rc, sizeof(int)) )
853 {
854 xcio_error(ioctxt, "Error when writing to state file (6)");
855 goto out;
856 }
858 /* Send through a list of all the PFNs that were not in map at the close */
859 {
860 unsigned int i,j;
861 unsigned int pfntab[1024];
863 for ( i = 0, j = 0; i < nr_pfns; i++ )
864 {
865 if ( ! is_mapped(live_pfn_to_mfn_table[i]) )
866 j++;
867 }
869 if ( xcio_write(ioctxt, &j, sizeof(unsigned int)) )
870 {
871 xcio_error(ioctxt, "Error when writing to state file (6a)");
872 goto out;
873 }
875 for ( i = 0, j = 0; i < nr_pfns; )
876 {
877 if ( ! is_mapped(live_pfn_to_mfn_table[i]) )
878 {
879 pfntab[j++] = i;
880 }
881 i++;
882 if ( j == 1024 || i == nr_pfns )
883 {
884 if ( xcio_write(ioctxt, &pfntab, sizeof(unsigned long)*j) )
885 {
886 xcio_error(ioctxt, "Error when writing to state file (6b)");
887 goto out;
888 }
889 j = 0;
890 }
891 }
892 }
894 /* Map the suspend-record MFN to pin it. The page must be owned by
895 domid for this to succeed. */
896 p_srec = xc_map_foreign_range(xc_handle, domid,
897 sizeof(*p_srec), PROT_READ,
898 ctxt.cpu_ctxt.esi);
899 if (!p_srec){
900 xcio_error(ioctxt, "Couldn't map suspend record");
901 goto out;
902 }
904 if (nr_pfns != p_srec->nr_pfns )
905 {
906 xcio_error(ioctxt, "Suspend record nr_pfns unexpected (%ld != %ld)",
907 p_srec->nr_pfns, nr_pfns);
908 goto out;
909 }
911 /* Canonicalise the suspend-record frame number. */
912 if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) ){
913 xcio_error(ioctxt, "Suspend record is not in range of pseudophys map");
914 goto out;
915 }
917 /* Canonicalise each GDT frame number. */
918 for ( i = 0; i < ctxt.gdt_ents; i += 512 ) {
919 if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) {
920 xcio_error(ioctxt, "GDT frame is not in range of pseudophys map");
921 goto out;
922 }
923 }
925 /* Canonicalise the page table base pointer. */
926 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) ) {
927 xcio_error(ioctxt, "PT base is not in range of pseudophys map");
928 goto out;
929 }
930 ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] <<
931 PAGE_SHIFT;
933 if ( xcio_write(ioctxt, &ctxt, sizeof(ctxt)) ||
934 xcio_write(ioctxt, live_shinfo, PAGE_SIZE) ) {
935 xcio_error(ioctxt, "Error when writing to state file (1)");
936 goto out;
937 }
939 out:
941 if ( live_shinfo ) munmap(live_shinfo, PAGE_SIZE);
942 if ( p_srec ) munmap(p_srec, sizeof(*p_srec));
943 if ( live_pfn_to_mfn_frame_list ) munmap(live_pfn_to_mfn_frame_list, PAGE_SIZE);
944 if ( live_pfn_to_mfn_table ) munmap(live_pfn_to_mfn_table, nr_pfns*4 );
945 if ( live_mfn_to_pfn_table ) munmap(live_mfn_to_pfn_table, PAGE_SIZE*1024 );
947 if ( pfn_type != NULL ) free(pfn_type);
948 DPRINTF("Save exit rc=%d\n",rc);
949 return !!rc;
951 }