ia64/xen-unstable

view tools/libxc/xc_domain_save.c @ 14989:1cfe47358f9f

save/restore: If ENABLE_LOGDIRTY fails, it may be because it is
already active. To find out, attempt to disable and then reenable the
mode.

Signed-off-by: Brendan Cully <brendan@cs.ubc.ca>
author kfraser@localhost.localdomain
date Tue May 01 10:16:26 2007 +0100 (2007-05-01)
parents 55d0a5c70986
children 7eeddd787d2f
line source
1 /******************************************************************************
2 * xc_linux_save.c
3 *
4 * Save the state of a running Linux session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 */
9 #include <inttypes.h>
10 #include <time.h>
11 #include <stdlib.h>
12 #include <unistd.h>
13 #include <sys/time.h>
15 #include "xc_private.h"
16 #include "xc_dom.h"
17 #include "xg_private.h"
18 #include "xg_save_restore.h"
20 #include <xen/hvm/params.h>
21 #include <xen/hvm/e820.h>
23 /*
24 ** Default values for important tuning parameters. Can override by passing
25 ** non-zero replacement values to xc_domain_save().
26 **
27 ** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
28 **
29 */
30 #define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */
31 #define DEF_MAX_FACTOR 3 /* never send more than 3x p2m_size */
33 /* max mfn of the whole machine */
34 static unsigned long max_mfn;
36 /* virtual starting address of the hypervisor */
37 static unsigned long hvirt_start;
39 /* #levels of page tables used by the current guest */
40 static unsigned int pt_levels;
42 /* HVM: shared-memory bitmaps for getting log-dirty bits from qemu-dm */
43 static unsigned long *qemu_bitmaps[2];
44 static int qemu_active;
45 static int qemu_non_active;
47 /* number of pfns this guest has (i.e. number of entries in the P2M) */
48 static unsigned long p2m_size;
50 /* Live mapping of the table mapping each PFN to its current MFN. */
51 static xen_pfn_t *live_p2m = NULL;
53 /* Live mapping of system MFN to PFN table. */
54 static xen_pfn_t *live_m2p = NULL;
55 static unsigned long m2p_mfn0;
57 /* grep fodder: machine_to_phys */
59 #define mfn_to_pfn(_mfn) live_m2p[(_mfn)]
61 /*
62 * Returns TRUE if the given machine frame number has a unique mapping
63 * in the guest's pseudophysical map.
64 */
65 #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
66 (((_mfn) < (max_mfn)) && \
67 ((mfn_to_pfn(_mfn) < (p2m_size)) && \
68 (live_p2m[mfn_to_pfn(_mfn)] == (_mfn))))
70 /* Returns TRUE if MFN is successfully converted to a PFN. */
71 #define translate_mfn_to_pfn(_pmfn) \
72 ({ \
73 unsigned long mfn = *(_pmfn); \
74 int _res = 1; \
75 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \
76 _res = 0; \
77 else \
78 *(_pmfn) = mfn_to_pfn(mfn); \
79 _res; \
80 })
82 /*
83 ** During (live) save/migrate, we maintain a number of bitmaps to track
84 ** which pages we have to send, to fixup, and to skip.
85 */
87 #define BITS_PER_LONG (sizeof(unsigned long) * 8)
88 #define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
89 #define BITMAP_SIZE (BITS_TO_LONGS(p2m_size) * sizeof(unsigned long))
91 #define BITMAP_ENTRY(_nr,_bmap) \
92 ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
94 #define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
96 static inline int test_bit (int nr, volatile void * addr)
97 {
98 return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
99 }
101 static inline void clear_bit (int nr, volatile void * addr)
102 {
103 BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
104 }
106 static inline void set_bit ( int nr, volatile void * addr)
107 {
108 BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
109 }
111 /* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
112 static inline unsigned int hweight32(unsigned int w)
113 {
114 unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
115 res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
116 res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
117 res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
118 return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
119 }
121 static inline int count_bits ( int nr, volatile void *addr)
122 {
123 int i, count = 0;
124 volatile unsigned long *p = (volatile unsigned long *)addr;
125 /* We know that the array is padded to unsigned long. */
126 for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ )
127 count += hweight32(*p);
128 return count;
129 }
131 static inline int permute( int i, int nr, int order_nr )
132 {
133 /* Need a simple permutation function so that we scan pages in a
134 pseudo random order, enabling us to get a better estimate of
135 the domain's page dirtying rate as we go (there are often
136 contiguous ranges of pfns that have similar behaviour, and we
137 want to mix them up. */
139 /* e.g. nr->oder 15->4 16->4 17->5 */
140 /* 512MB domain, 128k pages, order 17 */
142 /*
143 QPONMLKJIHGFEDCBA
144 QPONMLKJIH
145 GFEDCBA
146 */
148 /*
149 QPONMLKJIHGFEDCBA
150 EDCBA
151 QPONM
152 LKJIHGF
153 */
155 do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
156 while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
158 return i;
159 }
161 static uint64_t tv_to_us(struct timeval *new)
162 {
163 return (new->tv_sec * 1000000) + new->tv_usec;
164 }
166 static uint64_t llgettimeofday(void)
167 {
168 struct timeval now;
169 gettimeofday(&now, NULL);
170 return tv_to_us(&now);
171 }
173 static uint64_t tv_delta(struct timeval *new, struct timeval *old)
174 {
175 return (((new->tv_sec - old->tv_sec)*1000000) +
176 (new->tv_usec - old->tv_usec));
177 }
179 static int noncached_write(int fd, int live, void *buffer, int len)
180 {
181 static int write_count = 0;
183 int rc = write(fd,buffer,len);
185 write_count += len;
186 if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) )
187 {
188 /* Time to discard cache - dont care if this fails */
189 discard_file_cache(fd, 0 /* no flush */);
190 write_count = 0;
191 }
193 return rc;
194 }
196 #ifdef ADAPTIVE_SAVE
198 /*
199 ** We control the rate at which we transmit (or save) to minimize impact
200 ** on running domains (including the target if we're doing live migrate).
201 */
203 #define MAX_MBIT_RATE 500 /* maximum transmit rate for migrate */
204 #define START_MBIT_RATE 100 /* initial transmit rate for migrate */
206 /* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */
207 #define RATE_TO_BTU 781250
209 /* Amount in bytes we allow ourselves to send in a burst */
210 #define BURST_BUDGET (100*1024)
212 /* We keep track of the current and previous transmission rate */
213 static int mbit_rate, ombit_rate = 0;
215 /* Have we reached the maximum transmission rate? */
216 #define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE)
218 static inline void initialize_mbit_rate()
219 {
220 mbit_rate = START_MBIT_RATE;
221 }
223 static int ratewrite(int io_fd, int live, void *buf, int n)
224 {
225 static int budget = 0;
226 static int burst_time_us = -1;
227 static struct timeval last_put = { 0 };
228 struct timeval now;
229 struct timespec delay;
230 long long delta;
232 if ( START_MBIT_RATE == 0 )
233 return noncached_write(io_fd, live, buf, n);
235 budget -= n;
236 if ( budget < 0 )
237 {
238 if ( mbit_rate != ombit_rate )
239 {
240 burst_time_us = RATE_TO_BTU / mbit_rate;
241 ombit_rate = mbit_rate;
242 DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
243 mbit_rate, BURST_BUDGET, burst_time_us);
244 }
245 if ( last_put.tv_sec == 0 )
246 {
247 budget += BURST_BUDGET;
248 gettimeofday(&last_put, NULL);
249 }
250 else
251 {
252 while ( budget < 0 )
253 {
254 gettimeofday(&now, NULL);
255 delta = tv_delta(&now, &last_put);
256 while ( delta > burst_time_us )
257 {
258 budget += BURST_BUDGET;
259 last_put.tv_usec += burst_time_us;
260 if ( last_put.tv_usec > 1000000
261 {
262 last_put.tv_usec -= 1000000;
263 last_put.tv_sec++;
264 }
265 delta -= burst_time_us;
266 }
267 if ( budget > 0 )
268 break;
269 delay.tv_sec = 0;
270 delay.tv_nsec = 1000 * (burst_time_us - delta);
271 while ( delay.tv_nsec > 0 )
272 if ( nanosleep(&delay, &delay) == 0 )
273 break;
274 }
275 }
276 }
277 return noncached_write(io_fd, live, buf, n);
278 }
280 #else /* ! ADAPTIVE SAVE */
282 #define RATE_IS_MAX() (0)
283 #define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), (_buf), (_n))
284 #define initialize_mbit_rate()
286 #endif
288 static inline ssize_t write_exact(int fd, void *buf, size_t count)
289 {
290 return (write(fd, buf, count) == count);
291 }
293 static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
294 xc_shadow_op_stats_t *stats, int print)
295 {
296 static struct timeval wall_last;
297 static long long d0_cpu_last;
298 static long long d1_cpu_last;
300 struct timeval wall_now;
301 long long wall_delta;
302 long long d0_cpu_now, d0_cpu_delta;
303 long long d1_cpu_now, d1_cpu_delta;
305 gettimeofday(&wall_now, NULL);
307 d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
308 d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
310 if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
311 DPRINTF("ARRHHH!!\n");
313 wall_delta = tv_delta(&wall_now,&wall_last)/1000;
314 if ( wall_delta == 0 )
315 wall_delta = 1;
317 d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
318 d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
320 if ( print )
321 DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
322 "dirtied %dMb/s %" PRId32 " pages\n",
323 wall_delta,
324 (int)((d0_cpu_delta*100)/wall_delta),
325 (int)((d1_cpu_delta*100)/wall_delta),
326 (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
327 (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
328 stats->dirty_count);
330 #ifdef ADAPTIVE_SAVE
331 if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate )
332 {
333 mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
334 + 50;
335 if ( mbit_rate > MAX_MBIT_RATE )
336 mbit_rate = MAX_MBIT_RATE;
337 }
338 #endif
340 d0_cpu_last = d0_cpu_now;
341 d1_cpu_last = d1_cpu_now;
342 wall_last = wall_now;
344 return 0;
345 }
348 static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size,
349 unsigned long *arr, int runs)
350 {
351 long long start, now;
352 xc_shadow_op_stats_t stats;
353 int j;
355 start = llgettimeofday();
357 for ( j = 0; j < runs; j++ )
358 {
359 int i;
361 xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
362 arr, p2m_size, NULL, 0, NULL);
363 DPRINTF("#Flush\n");
364 for ( i = 0; i < 40; i++ )
365 {
366 usleep(50000);
367 now = llgettimeofday();
368 xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
369 NULL, 0, NULL, 0, &stats);
370 DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
371 ((now-start)+500)/1000,
372 stats.fault_count, stats.dirty_count);
373 }
374 }
376 return -1;
377 }
380 static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
381 int dom, xc_dominfo_t *info)
382 {
383 int i = 0;
385 if ( !(*suspend)(dom) )
386 {
387 ERROR("Suspend request failed");
388 return -1;
389 }
391 retry:
393 if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
394 {
395 ERROR("Could not get domain info");
396 return -1;
397 }
399 if ( info->dying )
400 {
401 ERROR("domain is dying");
402 return -1;
403 }
405 if ( info->crashed )
406 {
407 ERROR("domain has crashed");
408 return -1;
409 }
411 if ( info->shutdown )
412 {
413 switch ( info->shutdown_reason )
414 {
415 case SHUTDOWN_poweroff:
416 case SHUTDOWN_reboot:
417 ERROR("domain has shut down");
418 return -1;
419 case SHUTDOWN_suspend:
420 return 0;
421 case SHUTDOWN_crash:
422 ERROR("domain has crashed");
423 return -1;
424 }
425 }
427 if ( info->paused )
428 {
429 /* Try unpausing domain, wait, and retest. */
430 xc_domain_unpause( xc_handle, dom );
431 ERROR("Domain was paused. Wait and re-test.");
432 usleep(10000); /* 10ms */
433 goto retry;
434 }
436 if ( ++i < 100 )
437 {
438 ERROR("Retry suspend domain");
439 usleep(10000); /* 10ms */
440 goto retry;
441 }
443 ERROR("Unable to suspend domain.");
445 return -1;
446 }
448 /*
449 ** Map the top-level page of MFNs from the guest. The guest might not have
450 ** finished resuming from a previous restore operation, so we wait a while for
451 ** it to update the MFN to a reasonable value.
452 */
453 static void *map_frame_list_list(int xc_handle, uint32_t dom,
454 shared_info_t *shinfo)
455 {
456 int count = 100;
457 void *p;
459 while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) )
460 usleep(10000);
462 if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 )
463 {
464 ERROR("Timed out waiting for frame list updated.");
465 return NULL;
466 }
468 p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
469 shinfo->arch.pfn_to_mfn_frame_list_list);
470 if ( p == NULL )
471 ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
473 return p;
474 }
476 /*
477 ** During transfer (or in the state file), all page-table pages must be
478 ** converted into a 'canonical' form where references to actual mfns
479 ** are replaced with references to the corresponding pfns.
480 **
481 ** This function performs the appropriate conversion, taking into account
482 ** which entries do not require canonicalization (in particular, those
483 ** entries which map the virtual address reserved for the hypervisor).
484 */
485 static int canonicalize_pagetable(unsigned long type, unsigned long pfn,
486 const void *spage, void *dpage)
487 {
489 int i, pte_last, xen_start, xen_end, race = 0;
490 uint64_t pte;
492 /*
493 ** We need to determine which entries in this page table hold
494 ** reserved hypervisor mappings. This depends on the current
495 ** page table type as well as the number of paging levels.
496 */
497 xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8);
499 if ( (pt_levels == 2) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
500 xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT);
502 if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) )
503 xen_start = L3_PAGETABLE_ENTRIES_PAE;
505 /*
506 ** in PAE only the L2 mapping the top 1GB contains Xen mappings.
507 ** We can spot this by looking for the guest linear mapping which
508 ** Xen always ensures is present in that L2. Guests must ensure
509 ** that this check will fail for other L2s.
510 */
511 if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
512 {
513 int hstart;
514 uint64_t he;
516 hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
517 he = ((const uint64_t *) spage)[hstart];
519 if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
520 {
521 /* hvirt starts with xen stuff... */
522 xen_start = hstart;
523 }
524 else if ( hvirt_start != 0xf5800000 )
525 {
526 /* old L2s from before hole was shrunk... */
527 hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
528 he = ((const uint64_t *) spage)[hstart];
529 if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
530 xen_start = hstart;
531 }
532 }
534 if ( (pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) )
535 {
536 /*
537 ** XXX SMH: should compute these from hvirt_start (which we have)
538 ** and hvirt_end (which we don't)
539 */
540 xen_start = 256;
541 xen_end = 272;
542 }
544 /* Now iterate through the page table, canonicalizing each PTE */
545 for (i = 0; i < pte_last; i++ )
546 {
547 unsigned long pfn, mfn;
549 if ( pt_levels == 2 )
550 pte = ((const uint32_t*)spage)[i];
551 else
552 pte = ((const uint64_t*)spage)[i];
554 if ( (i >= xen_start) && (i < xen_end) )
555 pte = 0;
557 if ( pte & _PAGE_PRESENT )
558 {
559 mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
560 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
561 {
562 /* This will happen if the type info is stale which
563 is quite feasible under live migration */
564 pfn = 0; /* zap it - we'll retransmit this page later */
565 race = 1; /* inform the caller of race; fatal if !live */
566 }
567 else
568 pfn = mfn_to_pfn(mfn);
570 pte &= ~MADDR_MASK_X86;
571 pte |= (uint64_t)pfn << PAGE_SHIFT;
573 /*
574 * PAE guest L3Es can contain these flags when running on
575 * a 64bit hypervisor. We zap these here to avoid any
576 * surprise at restore time...
577 */
578 if ( (pt_levels == 3) &&
579 (type == XEN_DOMCTL_PFINFO_L3TAB) &&
580 (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) )
581 pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED);
582 }
584 if ( pt_levels == 2 )
585 ((uint32_t*)dpage)[i] = pte;
586 else
587 ((uint64_t*)dpage)[i] = pte;
588 }
590 return race;
591 }
593 static xen_pfn_t *xc_map_m2p(int xc_handle,
594 unsigned long max_mfn,
595 int prot)
596 {
597 struct xen_machphys_mfn_list xmml;
598 privcmd_mmap_entry_t *entries;
599 unsigned long m2p_chunks, m2p_size;
600 xen_pfn_t *m2p;
601 xen_pfn_t *extent_start;
602 int i, rc;
604 m2p_size = M2P_SIZE(max_mfn);
605 m2p_chunks = M2P_CHUNKS(max_mfn);
607 xmml.max_extents = m2p_chunks;
608 if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) )
609 {
610 ERROR("failed to allocate space for m2p mfns");
611 return NULL;
612 }
613 set_xen_guest_handle(xmml.extent_start, extent_start);
615 if ( xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) ||
616 (xmml.nr_extents != m2p_chunks) )
617 {
618 ERROR("xc_get_m2p_mfns");
619 return NULL;
620 }
622 if ( (m2p = mmap(NULL, m2p_size, prot,
623 MAP_SHARED, xc_handle, 0)) == MAP_FAILED )
624 {
625 ERROR("failed to mmap m2p");
626 return NULL;
627 }
629 if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) )
630 {
631 ERROR("failed to allocate space for mmap entries");
632 return NULL;
633 }
635 for ( i = 0; i < m2p_chunks; i++ )
636 {
637 entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE));
638 entries[i].mfn = extent_start[i];
639 entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
640 }
642 if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN,
643 entries, m2p_chunks)) < 0 )
644 {
645 ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc);
646 return NULL;
647 }
649 m2p_mfn0 = entries[0].mfn;
651 free(extent_start);
652 free(entries);
654 return m2p;
655 }
658 static xen_pfn_t *map_and_save_p2m_table(int xc_handle,
659 int io_fd,
660 uint32_t dom,
661 unsigned long p2m_size,
662 shared_info_t *live_shinfo)
663 {
664 vcpu_guest_context_t ctxt;
666 /* Double and single indirect references to the live P2M table */
667 xen_pfn_t *live_p2m_frame_list_list = NULL;
668 xen_pfn_t *live_p2m_frame_list = NULL;
670 /* A copy of the pfn-to-mfn table frame list. */
671 xen_pfn_t *p2m_frame_list = NULL;
673 /* The mapping of the live p2m table itself */
674 xen_pfn_t *p2m = NULL;
676 int i, success = 0;
678 live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
679 live_shinfo);
680 if ( !live_p2m_frame_list_list )
681 goto out;
683 live_p2m_frame_list =
684 xc_map_foreign_batch(xc_handle, dom, PROT_READ,
685 live_p2m_frame_list_list,
686 P2M_FLL_ENTRIES);
687 if ( !live_p2m_frame_list )
688 {
689 ERROR("Couldn't map p2m_frame_list");
690 goto out;
691 }
694 /* Map all the frames of the pfn->mfn table. For migrate to succeed,
695 the guest must not change which frames are used for this purpose.
696 (its not clear why it would want to change them, and we'll be OK
697 from a safety POV anyhow. */
699 p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ,
700 live_p2m_frame_list,
701 P2M_FL_ENTRIES);
702 if ( !p2m )
703 {
704 ERROR("Couldn't map p2m table");
705 goto out;
706 }
707 live_p2m = p2m; /* So that translation macros will work */
709 /* Get a local copy of the live_P2M_frame_list */
710 if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) )
711 {
712 ERROR("Couldn't allocate p2m_frame_list array");
713 goto out;
714 }
715 memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
717 /* Canonicalise the pfn-to-mfn table frame-number list. */
718 for ( i = 0; i < p2m_size; i += fpp )
719 {
720 if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) )
721 {
722 ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
723 ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
724 (uint64_t)p2m_frame_list[i/fpp]);
725 goto out;
726 }
727 }
729 if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
730 {
731 ERROR("Could not get vcpu context");
732 goto out;
733 }
735 /*
736 * Write an extended-info structure to inform the restore code that
737 * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
738 * slow paths in the restore code.
739 */
740 if ( (pt_levels == 3) &&
741 (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) )
742 {
743 unsigned long signature = ~0UL;
744 uint32_t tot_sz = sizeof(struct vcpu_guest_context) + 8;
745 uint32_t chunk_sz = sizeof(struct vcpu_guest_context);
746 char chunk_sig[] = "vcpu";
747 if ( !write_exact(io_fd, &signature, sizeof(signature)) ||
748 !write_exact(io_fd, &tot_sz, sizeof(tot_sz)) ||
749 !write_exact(io_fd, &chunk_sig, 4) ||
750 !write_exact(io_fd, &chunk_sz, sizeof(chunk_sz)) ||
751 !write_exact(io_fd, &ctxt, sizeof(ctxt)) )
752 {
753 ERROR("write: extended info");
754 goto out;
755 }
756 }
758 if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) )
759 {
760 ERROR("write: p2m_frame_list");
761 goto out;
762 }
764 success = 1;
766 out:
768 if ( !success && p2m )
769 munmap(p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
771 if ( live_p2m_frame_list_list )
772 munmap(live_p2m_frame_list_list, PAGE_SIZE);
774 if ( live_p2m_frame_list )
775 munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
777 if ( p2m_frame_list )
778 free(p2m_frame_list);
780 return success ? p2m : NULL;
781 }
785 int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
786 uint32_t max_factor, uint32_t flags, int (*suspend)(int),
787 int hvm, void *(*init_qemu_maps)(int, unsigned),
788 void (*qemu_flip_buffer)(int, int))
789 {
790 xc_dominfo_t info;
792 int rc = 1, i, j, last_iter, iter = 0;
793 int live = (flags & XCFLAGS_LIVE);
794 int debug = (flags & XCFLAGS_DEBUG);
795 int race = 0, sent_last_iter, skip_this_iter;
797 /* The new domain's shared-info frame number. */
798 unsigned long shared_info_frame;
800 /* A copy of the CPU context of the guest. */
801 vcpu_guest_context_t ctxt;
803 /* A table containing the type of each PFN (/not/ MFN!). */
804 unsigned long *pfn_type = NULL;
805 unsigned long *pfn_batch = NULL;
807 /* A copy of one frame of guest memory. */
808 char page[PAGE_SIZE];
810 /* Live mapping of shared info structure */
811 shared_info_t *live_shinfo = NULL;
813 /* base of the region in which domain memory is mapped */
814 unsigned char *region_base = NULL;
816 /* power of 2 order of p2m_size */
817 int order_nr;
819 /* bitmap of pages:
820 - that should be sent this iteration (unless later marked as skip);
821 - to skip this iteration because already dirty;
822 - to fixup by sending at the end if not already resent; */
823 unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL;
825 xc_shadow_op_stats_t stats;
827 unsigned long needed_to_fix = 0;
828 unsigned long total_sent = 0;
830 uint64_t vcpumap = 1ULL;
832 /* HVM: a buffer for holding HVM context */
833 uint32_t hvm_buf_size = 0;
834 uint8_t *hvm_buf = NULL;
836 /* HVM: magic frames for ioreqs and xenstore comms. */
837 uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
839 /* If no explicit control parameters given, use defaults */
840 max_iters = max_iters ? : DEF_MAX_ITERS;
841 max_factor = max_factor ? : DEF_MAX_FACTOR;
843 initialize_mbit_rate();
845 if ( !get_platform_info(xc_handle, dom,
846 &max_mfn, &hvirt_start, &pt_levels) )
847 {
848 ERROR("Unable to get platform info.");
849 return 1;
850 }
852 if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
853 {
854 ERROR("Could not get domain info");
855 return 1;
856 }
858 shared_info_frame = info.shared_info_frame;
860 /* Map the shared info frame */
861 if ( !hvm )
862 {
863 live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
864 PROT_READ, shared_info_frame);
865 if ( !live_shinfo )
866 {
867 ERROR("Couldn't map live_shinfo");
868 goto out;
869 }
870 }
872 /* Get the size of the P2M table */
873 p2m_size = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom) + 1;
875 /* Domain is still running at this point */
876 if ( live )
877 {
878 /* Live suspend. Enable log-dirty mode. */
879 if ( xc_shadow_control(xc_handle, dom,
880 XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
881 NULL, 0, NULL, 0, NULL) < 0 )
882 {
883 /* log-dirty already enabled? There's no test op,
884 so attempt to disable then reenable it */
885 if ( !(xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_OFF,
886 NULL, 0, NULL, 0, NULL) >= 0 &&
887 xc_shadow_control(xc_handle, dom,
888 XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
889 NULL, 0, NULL, 0, NULL) >= 0) )
890 {
891 ERROR("Couldn't enable shadow mode");
892 goto out;
893 }
894 }
896 if ( hvm )
897 {
898 /* Get qemu-dm logging dirty pages too */
899 void *seg = init_qemu_maps(dom, BITMAP_SIZE);
900 qemu_bitmaps[0] = seg;
901 qemu_bitmaps[1] = seg + BITMAP_SIZE;
902 qemu_active = 0;
903 qemu_non_active = 1;
904 }
905 }
906 else
907 {
908 /* This is a non-live suspend. Suspend the domain .*/
909 if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info) )
910 {
911 ERROR("Domain appears not to have suspended");
912 goto out;
913 }
914 }
916 last_iter = !live;
918 /* pretend we sent all the pages last iteration */
919 sent_last_iter = p2m_size;
921 /* calculate the power of 2 order of p2m_size, e.g.
922 15->4 16->4 17->5 */
923 for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ )
924 continue;
926 /* Setup to_send / to_fix and to_skip bitmaps */
927 to_send = malloc(BITMAP_SIZE);
928 to_fix = calloc(1, BITMAP_SIZE);
929 to_skip = malloc(BITMAP_SIZE);
931 if ( !to_send || !to_fix || !to_skip )
932 {
933 ERROR("Couldn't allocate to_send array");
934 goto out;
935 }
937 memset(to_send, 0xff, BITMAP_SIZE);
939 if ( lock_pages(to_send, BITMAP_SIZE) )
940 {
941 ERROR("Unable to lock to_send");
942 return 1;
943 }
945 /* (to fix is local only) */
946 if ( lock_pages(to_skip, BITMAP_SIZE) )
947 {
948 ERROR("Unable to lock to_skip");
949 return 1;
950 }
952 if ( hvm )
953 {
954 /* Need another buffer for HVM context */
955 hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0);
956 if ( hvm_buf_size == -1 )
957 {
958 ERROR("Couldn't get HVM context size from Xen");
959 goto out;
960 }
961 hvm_buf = malloc(hvm_buf_size);
962 if ( !hvm_buf )
963 {
964 ERROR("Couldn't allocate memory");
965 goto out;
966 }
967 }
969 analysis_phase(xc_handle, dom, p2m_size, to_skip, 0);
971 /* We want zeroed memory so use calloc rather than malloc. */
972 pfn_type = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
973 pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
974 if ( (pfn_type == NULL) || (pfn_batch == NULL) )
975 {
976 ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays");
977 errno = ENOMEM;
978 goto out;
979 }
981 if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) )
982 {
983 ERROR("Unable to lock");
984 goto out;
985 }
987 /* Setup the mfn_to_pfn table mapping */
988 if ( !(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ)) )
989 {
990 ERROR("Failed to map live M2P table");
991 goto out;
992 }
994 /* Start writing out the saved-domain record. */
995 if ( !write_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
996 {
997 ERROR("write: p2m_size");
998 goto out;
999 }
1001 if ( !hvm )
1003 int err = 0;
1004 unsigned long mfn;
1006 /* Map the P2M table, and write the list of P2M frames */
1007 live_p2m = map_and_save_p2m_table(xc_handle, io_fd, dom,
1008 p2m_size, live_shinfo);
1009 if ( live_p2m == NULL )
1011 ERROR("Failed to map/save the p2m frame list");
1012 goto out;
1015 /*
1016 * Quick belt and braces sanity check.
1017 */
1019 for ( i = 0; i < p2m_size; i++ )
1021 mfn = live_p2m[i];
1022 if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) )
1024 DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i,
1025 mfn, mfn_to_pfn(mfn));
1026 err++;
1029 DPRINTF("Had %d unexplained entries in p2m table\n", err);
1032 print_stats(xc_handle, dom, 0, &stats, 0);
1034 /* Now write out each data page, canonicalising page tables as we go... */
1035 for ( ; ; )
1037 unsigned int prev_pc, sent_this_iter, N, batch;
1039 iter++;
1040 sent_this_iter = 0;
1041 skip_this_iter = 0;
1042 prev_pc = 0;
1043 N = 0;
1045 DPRINTF("Saving memory pages: iter %d 0%%", iter);
1047 while ( N < p2m_size )
1049 unsigned int this_pc = (N * 100) / p2m_size;
1050 int rc;
1052 if ( (this_pc - prev_pc) >= 5 )
1054 DPRINTF("\b\b\b\b%3d%%", this_pc);
1055 prev_pc = this_pc;
1058 if ( !last_iter )
1060 /* Slightly wasteful to peek the whole array evey time,
1061 but this is fast enough for the moment. */
1062 rc = xc_shadow_control(
1063 xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip,
1064 p2m_size, NULL, 0, NULL);
1065 if ( rc != p2m_size )
1067 ERROR("Error peeking shadow bitmap");
1068 goto out;
1072 /* load pfn_type[] with the mfn of all the pages we're doing in
1073 this batch. */
1074 for ( batch = 0;
1075 (batch < MAX_BATCH_SIZE) && (N < p2m_size);
1076 N++ )
1078 int n = permute(N, p2m_size, order_nr);
1080 if ( debug )
1081 DPRINTF("%d pfn= %08lx mfn= %08lx %d [mfn]= %08lx\n",
1082 iter, (unsigned long)n, hvm ? 0 : live_p2m[n],
1083 test_bit(n, to_send),
1084 hvm ? 0 : mfn_to_pfn(live_p2m[n]&0xFFFFF));
1086 if ( !last_iter &&
1087 test_bit(n, to_send) &&
1088 test_bit(n, to_skip) )
1089 skip_this_iter++; /* stats keeping */
1091 if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
1092 (test_bit(n, to_send) && last_iter) ||
1093 (test_bit(n, to_fix) && last_iter)) )
1094 continue;
1096 /* Skip PFNs that aren't really there */
1097 if ( hvm && ((n >= 0xa0 && n < 0xc0) /* VGA hole */
1098 || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT)
1099 && n < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */ )
1100 continue;
1102 /*
1103 ** we get here if:
1104 ** 1. page is marked to_send & hasn't already been re-dirtied
1105 ** 2. (ignore to_skip in last iteration)
1106 ** 3. add in pages that still need fixup (net bufs)
1107 */
1109 pfn_batch[batch] = n;
1111 /* Hypercall interfaces operate in PFNs for HVM guests
1112 * and MFNs for PV guests */
1113 if ( hvm )
1114 pfn_type[batch] = n;
1115 else
1116 pfn_type[batch] = live_p2m[n];
1118 if ( !is_mapped(pfn_type[batch]) )
1120 /*
1121 ** not currently in psuedo-physical map -- set bit
1122 ** in to_fix since we must send this page in last_iter
1123 ** unless its sent sooner anyhow, or it never enters
1124 ** pseudo-physical map (e.g. for ballooned down doms)
1125 */
1126 set_bit(n, to_fix);
1127 continue;
1130 if ( last_iter &&
1131 test_bit(n, to_fix) &&
1132 !test_bit(n, to_send) )
1134 needed_to_fix++;
1135 DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n",
1136 iter, n, pfn_type[batch]);
1139 clear_bit(n, to_fix);
1141 batch++;
1144 if ( batch == 0 )
1145 goto skip; /* vanishingly unlikely... */
1147 region_base = xc_map_foreign_batch(
1148 xc_handle, dom, PROT_READ, pfn_type, batch);
1149 if ( region_base == NULL )
1151 ERROR("map batch failed");
1152 goto out;
1155 if ( !hvm )
1157 /* Get page types */
1158 for ( j = 0; j < batch; j++ )
1159 ((uint32_t *)pfn_type)[j] = pfn_type[j];
1160 if ( xc_get_pfn_type_batch(xc_handle, dom, batch,
1161 (uint32_t *)pfn_type) )
1163 ERROR("get_pfn_type_batch failed");
1164 goto out;
1166 for ( j = batch-1; j >= 0; j-- )
1167 pfn_type[j] = ((uint32_t *)pfn_type)[j];
1169 for ( j = 0; j < batch; j++ )
1172 if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
1173 XEN_DOMCTL_PFINFO_XTAB )
1175 DPRINTF("type fail: page %i mfn %08lx\n",
1176 j, pfn_type[j]);
1177 continue;
1180 if ( debug )
1181 DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
1182 " sum= %08lx\n",
1183 iter,
1184 (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
1185 pfn_batch[j],
1186 pfn_type[j],
1187 mfn_to_pfn(pfn_type[j] &
1188 ~XEN_DOMCTL_PFINFO_LTAB_MASK),
1189 csum_page(region_base + (PAGE_SIZE*j)));
1191 /* canonicalise mfn->pfn */
1192 pfn_type[j] = (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
1193 pfn_batch[j];
1197 if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) )
1199 ERROR("Error when writing to state file (2) (errno %d)",
1200 errno);
1201 goto out;
1204 if ( !write_exact(io_fd, pfn_type, sizeof(unsigned long)*batch) )
1206 ERROR("Error when writing to state file (3) (errno %d)",
1207 errno);
1208 goto out;
1211 /* entering this loop, pfn_type is now in pfns (Not mfns) */
1212 for ( j = 0; j < batch; j++ )
1214 unsigned long pfn, pagetype;
1215 void *spage = (char *)region_base + (PAGE_SIZE*j);
1217 pfn = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
1218 pagetype = pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK;
1220 /* write out pages in batch */
1221 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
1222 continue;
1224 pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
1226 if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
1227 (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
1229 /* We have a pagetable page: need to rewrite it. */
1230 race =
1231 canonicalize_pagetable(pagetype, pfn, spage, page);
1233 if ( race && !live )
1235 ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn,
1236 pagetype);
1237 goto out;
1240 if ( ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE )
1242 ERROR("Error when writing to state file (4)"
1243 " (errno %d)", errno);
1244 goto out;
1247 else
1249 /* We have a normal page: just write it directly. */
1250 if ( ratewrite(io_fd, live, spage, PAGE_SIZE) !=
1251 PAGE_SIZE )
1253 ERROR("Error when writing to state file (5)"
1254 " (errno %d)", errno);
1255 goto out;
1258 } /* end of the write out for this batch */
1260 sent_this_iter += batch;
1262 munmap(region_base, batch*PAGE_SIZE);
1264 } /* end of this while loop for this iteration */
1266 skip:
1268 total_sent += sent_this_iter;
1270 DPRINTF("\r %d: sent %d, skipped %d, ",
1271 iter, sent_this_iter, skip_this_iter );
1273 if ( last_iter )
1275 print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
1277 DPRINTF("Total pages sent= %ld (%.2fx)\n",
1278 total_sent, ((float)total_sent)/p2m_size );
1279 DPRINTF("(of which %ld were fixups)\n", needed_to_fix );
1282 if ( last_iter && debug )
1284 int minusone = -1;
1285 memset(to_send, 0xff, BITMAP_SIZE);
1286 debug = 0;
1287 DPRINTF("Entering debug resend-all mode\n");
1289 /* send "-1" to put receiver into debug mode */
1290 if ( !write_exact(io_fd, &minusone, sizeof(int)) )
1292 ERROR("Error when writing to state file (6) (errno %d)",
1293 errno);
1294 goto out;
1297 continue;
1300 if ( last_iter )
1301 break;
1303 if ( live )
1305 if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
1306 (iter >= max_iters) ||
1307 (sent_this_iter+skip_this_iter < 50) ||
1308 (total_sent > p2m_size*max_factor) )
1310 DPRINTF("Start last iteration\n");
1311 last_iter = 1;
1313 if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info) )
1315 ERROR("Domain appears not to have suspended");
1316 goto out;
1319 DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame);
1322 if ( xc_shadow_control(xc_handle, dom,
1323 XEN_DOMCTL_SHADOW_OP_CLEAN, to_send,
1324 p2m_size, NULL, 0, &stats) != p2m_size )
1326 ERROR("Error flushing shadow PT");
1327 goto out;
1330 if ( hvm )
1332 /* Pull in the dirty bits from qemu-dm too */
1333 if ( !last_iter )
1335 qemu_active = qemu_non_active;
1336 qemu_non_active = qemu_active ? 0 : 1;
1337 qemu_flip_buffer(dom, qemu_active);
1338 for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
1340 to_send[j] |= qemu_bitmaps[qemu_non_active][j];
1341 qemu_bitmaps[qemu_non_active][j] = 0;
1344 else
1346 for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
1347 to_send[j] |= qemu_bitmaps[qemu_active][j];
1351 sent_last_iter = sent_this_iter;
1353 print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
1356 } /* end of infinite for loop */
1358 DPRINTF("All memory is saved\n");
1361 struct {
1362 int minustwo;
1363 int max_vcpu_id;
1364 uint64_t vcpumap;
1365 } chunk = { -2, info.max_vcpu_id };
1367 if ( info.max_vcpu_id >= 64 )
1369 ERROR("Too many VCPUS in guest!");
1370 goto out;
1373 for ( i = 1; i <= info.max_vcpu_id; i++ )
1375 xc_vcpuinfo_t vinfo;
1376 if ( (xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
1377 vinfo.online )
1378 vcpumap |= 1ULL << i;
1381 chunk.vcpumap = vcpumap;
1382 if ( !write_exact(io_fd, &chunk, sizeof(chunk)) )
1384 ERROR("Error when writing to state file (errno %d)", errno);
1385 goto out;
1389 /* Zero terminate */
1390 i = 0;
1391 if ( !write_exact(io_fd, &i, sizeof(int)) )
1393 ERROR("Error when writing to state file (6') (errno %d)", errno);
1394 goto out;
1397 if ( hvm )
1399 uint32_t rec_size;
1401 /* Save magic-page locations. */
1402 memset(magic_pfns, 0, sizeof(magic_pfns));
1403 xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
1404 (unsigned long *)&magic_pfns[0]);
1405 xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
1406 (unsigned long *)&magic_pfns[1]);
1407 xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
1408 (unsigned long *)&magic_pfns[2]);
1409 if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) )
1411 ERROR("Error when writing to state file (7)");
1412 goto out;
1415 /* Get HVM context from Xen and save it too */
1416 if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf,
1417 hvm_buf_size)) == -1 )
1419 ERROR("HVM:Could not get hvm buffer");
1420 goto out;
1423 if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) )
1425 ERROR("error write hvm buffer size");
1426 goto out;
1429 if ( !write_exact(io_fd, hvm_buf, rec_size) )
1431 ERROR("write HVM info failed!\n");
1432 goto out;
1435 /* HVM guests are done now */
1436 rc = 0;
1437 goto out;
1440 /* PV guests only from now on */
1442 /* Send through a list of all the PFNs that were not in map at the close */
1444 unsigned int i,j;
1445 unsigned long pfntab[1024];
1447 for ( i = 0, j = 0; i < p2m_size; i++ )
1449 if ( !is_mapped(live_p2m[i]) )
1450 j++;
1453 if ( !write_exact(io_fd, &j, sizeof(unsigned int)) )
1455 ERROR("Error when writing to state file (6a) (errno %d)", errno);
1456 goto out;
1459 for ( i = 0, j = 0; i < p2m_size; )
1461 if ( !is_mapped(live_p2m[i]) )
1462 pfntab[j++] = i;
1464 i++;
1465 if ( (j == 1024) || (i == p2m_size) )
1467 if ( !write_exact(io_fd, &pfntab, sizeof(unsigned long)*j) )
1469 ERROR("Error when writing to state file (6b) (errno %d)",
1470 errno);
1471 goto out;
1473 j = 0;
1478 if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
1480 ERROR("Could not get vcpu context");
1481 goto out;
1484 /* Canonicalise the suspend-record frame number. */
1485 if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) )
1487 ERROR("Suspend record is not in range of pseudophys map");
1488 goto out;
1491 for ( i = 0; i <= info.max_vcpu_id; i++ )
1493 if ( !(vcpumap & (1ULL << i)) )
1494 continue;
1496 if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
1498 ERROR("No context for VCPU%d", i);
1499 goto out;
1502 /* Canonicalise each GDT frame number. */
1503 for ( j = 0; (512*j) < ctxt.gdt_ents; j++ )
1505 if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) )
1507 ERROR("GDT frame is not in range of pseudophys map");
1508 goto out;
1512 /* Canonicalise the page table base pointer. */
1513 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) )
1515 ERROR("PT base is not in range of pseudophys map");
1516 goto out;
1518 ctxt.ctrlreg[3] =
1519 xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
1521 /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
1522 if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
1524 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) )
1526 ERROR("PT base is not in range of pseudophys map");
1527 goto out;
1529 /* Least-significant bit means 'valid PFN'. */
1530 ctxt.ctrlreg[1] = 1 |
1531 xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1])));
1534 if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) )
1536 ERROR("Error when writing to state file (1) (errno %d)", errno);
1537 goto out;
1541 /*
1542 * Reset the MFN to be a known-invalid value. See map_frame_list_list().
1543 */
1544 memcpy(page, live_shinfo, PAGE_SIZE);
1545 ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0;
1546 if ( !write_exact(io_fd, page, PAGE_SIZE) )
1548 ERROR("Error when writing to state file (1) (errno %d)", errno);
1549 goto out;
1552 /* Success! */
1553 rc = 0;
1555 out:
1557 if ( live )
1559 if ( xc_shadow_control(xc_handle, dom,
1560 XEN_DOMCTL_SHADOW_OP_OFF,
1561 NULL, 0, NULL, 0, NULL) < 0 )
1562 DPRINTF("Warning - couldn't disable shadow mode");
1565 /* Flush last write and discard cache for file. */
1566 discard_file_cache(io_fd, 1 /* flush */);
1568 if ( live_shinfo )
1569 munmap(live_shinfo, PAGE_SIZE);
1571 if ( live_p2m )
1572 munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
1574 if ( live_m2p )
1575 munmap(live_m2p, M2P_SIZE(max_mfn));
1577 free(pfn_type);
1578 free(pfn_batch);
1579 free(to_send);
1580 free(to_fix);
1581 free(to_skip);
1583 DPRINTF("Save exit rc=%d\n",rc);
1585 return !!rc;
1588 /*
1589 * Local variables:
1590 * mode: C
1591 * c-set-style: "BSD"
1592 * c-basic-offset: 4
1593 * tab-width: 4
1594 * indent-tabs-mode: nil
1595 * End:
1596 */