ia64/xen-unstable

view tools/libxc/ia64/xc_ia64_linux_save.c @ 10786:86e5d8458c08

[IA64] live migration

Shadow mode and live migration.

Virtualize Dirty bit.

Signed-off-by: Tristan Gingold <tristan.gingold@bull.net>
author awilliam@xenbuild.aw
date Wed Jul 26 09:36:36 2006 -0600 (2006-07-26)
parents 306d7857928c
children ac41222866e9 86d26e6ec89b
line source
1 /******************************************************************************
2 * xc_ia64_linux_save.c
3 *
4 * Save the state of a running Linux session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 * Rewritten for ia64 by Tristan Gingold <tristan.gingold@bull.net>
8 */
10 #include <inttypes.h>
11 #include <time.h>
12 #include <stdlib.h>
13 #include <unistd.h>
14 #include <sys/time.h>
16 #include "xg_private.h"
18 /*
19 ** Default values for important tuning parameters. Can override by passing
20 ** non-zero replacement values to xc_linux_save().
21 **
22 ** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
23 **
24 */
25 #define DEF_MAX_ITERS (4 - 1) /* limit us to 4 times round loop */
26 #define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */
28 /*
29 ** During (live) save/migrate, we maintain a number of bitmaps to track
30 ** which pages we have to send, and to skip.
31 */
33 #define BITS_PER_LONG (sizeof(unsigned long) * 8)
35 #define BITMAP_ENTRY(_nr,_bmap) \
36 ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
38 #define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
40 static inline int test_bit (int nr, volatile void * addr)
41 {
42 return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
43 }
45 static inline void clear_bit (int nr, volatile void * addr)
46 {
47 BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
48 }
50 static inline void set_bit ( int nr, volatile void * addr)
51 {
52 BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
53 }
55 /* total number of pages used by the current guest */
56 static unsigned long max_pfn;
58 static int xc_ia64_shadow_control(int xc_handle,
59 uint32_t domid,
60 unsigned int sop,
61 unsigned long *dirty_bitmap,
62 unsigned long pages,
63 xc_shadow_control_stats_t *stats)
64 {
65 if (dirty_bitmap != NULL && pages > 0) {
66 int i;
67 unsigned char *bmap = (unsigned char *)dirty_bitmap;
68 unsigned long bmap_bytes =
69 ((pages + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1)) / 8;
70 unsigned int bmap_pages = (bmap_bytes + PAGE_SIZE - 1) / PAGE_SIZE;
72 /* Touch the page so that it is in the TC.
73 FIXME: use a more reliable method. */
74 for (i = 0 ; i < bmap_pages ; i++)
75 bmap[i * PAGE_SIZE] = 0;
76 /* Because bmap is not page aligned (allocated by malloc), be sure the
77 last page is touched. */
78 bmap[bmap_bytes - 1] = 0;
79 }
81 return xc_shadow_control(xc_handle, domid, sop,
82 dirty_bitmap, pages, stats);
83 }
85 static inline ssize_t
86 write_exact(int fd, void *buf, size_t count)
87 {
88 if (write(fd, buf, count) != count)
89 return 0;
90 return 1;
91 }
93 static int
94 suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
95 int dom, xc_dominfo_t *info)
96 {
97 int i = 0;
99 if (!(*suspend)(dom)) {
100 ERR("Suspend request failed");
101 return -1;
102 }
104 retry:
106 if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) {
107 ERR("Could not get domain info");
108 return -1;
109 }
111 if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend)
112 return 0; // success
114 if (info->paused) {
115 // try unpausing domain, wait, and retest
116 xc_domain_unpause(xc_handle, dom);
118 ERR("Domain was paused. Wait and re-test.");
119 usleep(10000); // 10ms
121 goto retry;
122 }
125 if(++i < 100) {
126 ERR("Retry suspend domain.");
127 usleep(10000); // 10ms
128 goto retry;
129 }
131 ERR("Unable to suspend domain.");
133 return -1;
134 }
136 int
137 xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
138 uint32_t max_factor, uint32_t flags, int (*suspend)(int))
139 {
140 DECLARE_DOM0_OP;
141 xc_dominfo_t info;
143 int rc = 1;
145 //int live = (flags & XCFLAGS_LIVE);
146 int debug = (flags & XCFLAGS_DEBUG);
147 int live = (flags & XCFLAGS_LIVE);
149 /* The new domain's shared-info frame number. */
150 unsigned long shared_info_frame;
152 /* A copy of the CPU context of the guest. */
153 vcpu_guest_context_t ctxt;
155 unsigned long *page_array = NULL;
157 /* Live mapping of shared info structure */
158 shared_info_t *live_shinfo = NULL;
160 /* Iteration number. */
161 int iter;
163 /* Number of pages sent in the last iteration (live only). */
164 unsigned int sent_last_iter;
166 /* Number of pages sent (live only). */
167 unsigned int total_sent;
169 /* Size of the shadow bitmap (live only). */
170 unsigned int bitmap_size = 0;
172 /* True if last iteration. */
173 int last_iter;
175 /* Bitmap of pages to be sent. */
176 unsigned long *to_send = NULL;
177 /* Bitmap of pages not to be sent (because dirtied). */
178 unsigned long *to_skip = NULL;
180 char *mem;
182 if (debug)
183 fprintf (stderr, "xc_linux_save (ia64): started dom=%d\n", dom);
185 /* If no explicit control parameters given, use defaults */
186 if (!max_iters)
187 max_iters = DEF_MAX_ITERS;
188 if (!max_factor)
189 max_factor = DEF_MAX_FACTOR;
191 //initialize_mbit_rate();
193 if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
194 ERR("Could not get domain info");
195 return 1;
196 }
198 shared_info_frame = info.shared_info_frame;
200 #if 0
201 /* cheesy sanity check */
202 if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) {
203 ERR("Invalid state record -- pfn count out of range: %lu",
204 (info.max_memkb >> (PAGE_SHIFT - 10)));
205 goto out;
206 }
207 #endif
209 /* Map the shared info frame */
210 live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
211 PROT_READ, shared_info_frame);
212 if (!live_shinfo) {
213 ERR("Couldn't map live_shinfo");
214 goto out;
215 }
217 max_pfn = info.max_memkb >> (PAGE_SHIFT - 10);
219 page_array = malloc(max_pfn * sizeof(unsigned long));
220 if (page_array == NULL) {
221 ERR("Could not allocate memory");
222 goto out;
223 }
225 /* This is expected by xm restore. */
226 if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
227 ERR("write: max_pfn");
228 goto out;
229 }
231 /* xc_linux_restore starts to read here. */
232 /* Write a version number. This can avoid searching for a stupid bug
233 if the format change.
234 The version is hard-coded, don't forget to change the restore code
235 too! */
236 {
237 unsigned long version = 1;
239 if (!write_exact(io_fd, &version, sizeof(unsigned long))) {
240 ERR("write: version");
241 goto out;
242 }
243 }
245 op.cmd = DOM0_DOMAIN_SETUP;
246 op.u.domain_setup.domain = (domid_t)dom;
247 op.u.domain_setup.flags = XEN_DOMAINSETUP_query;
248 if (xc_dom0_op(xc_handle, &op) < 0) {
249 ERR("Could not get domain setup");
250 goto out;
251 }
252 op.u.domain_setup.domain = 0;
253 if (!write_exact(io_fd, &op.u.domain_setup, sizeof(op.u.domain_setup))) {
254 ERR("write: domain setup");
255 goto out;
256 }
258 /* Domain is still running at this point */
259 if (live) {
261 if (xc_ia64_shadow_control(xc_handle, dom,
262 DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
263 NULL, 0, NULL ) < 0) {
264 ERR("Couldn't enable shadow mode");
265 goto out;
266 }
268 last_iter = 0;
270 bitmap_size = ((max_pfn + BITS_PER_LONG-1) & ~(BITS_PER_LONG-1)) / 8;
271 to_send = malloc(bitmap_size);
272 to_skip = malloc(bitmap_size);
274 if (!to_send || !to_skip) {
275 ERR("Couldn't allocate bitmap array");
276 goto out;
277 }
279 /* Initially all the pages must be sent. */
280 memset(to_send, 0xff, bitmap_size);
282 if (mlock(to_send, bitmap_size)) {
283 ERR("Unable to mlock to_send");
284 goto out;
285 }
286 if (mlock(to_skip, bitmap_size)) {
287 ERR("Unable to mlock to_skip");
288 goto out;
289 }
291 } else {
293 /* This is a non-live suspend. Issue the call back to get the
294 domain suspended */
296 last_iter = 1;
298 if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) {
299 ERR("Domain appears not to have suspended");
300 goto out;
301 }
303 }
305 sent_last_iter = max_pfn;
306 total_sent = 0;
308 for (iter = 1; ; iter++) {
309 unsigned int sent_this_iter, skip_this_iter;
310 unsigned long N;
312 sent_this_iter = 0;
313 skip_this_iter = 0;
315 /* Get the pfn list, as it may change. */
316 if (xc_ia64_get_pfn_list(xc_handle, dom, page_array,
317 0, max_pfn) != max_pfn) {
318 ERR("Could not get the page frame list");
319 goto out;
320 }
322 /* Dirtied pages won't be saved.
323 slightly wasteful to peek the whole array evey time,
324 but this is fast enough for the moment. */
325 if (!last_iter) {
326 if (xc_ia64_shadow_control(xc_handle, dom,
327 DOM0_SHADOW_CONTROL_OP_PEEK,
328 to_skip, max_pfn, NULL) != max_pfn) {
329 ERR("Error peeking shadow bitmap");
330 goto out;
331 }
332 }
334 /* Start writing out the saved-domain record. */
335 for (N = 0; N < max_pfn; N++) {
336 if (page_array[N] == INVALID_MFN)
337 continue;
338 if (!last_iter) {
339 if (test_bit(N, to_skip) && test_bit(N, to_send))
340 skip_this_iter++;
341 if (test_bit(N, to_skip) || !test_bit(N, to_send))
342 continue;
343 }
345 if (debug)
346 fprintf(stderr, "xc_linux_save: page %lx (%lu/%lu)\n",
347 page_array[N], N, max_pfn);
349 mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
350 PROT_READ|PROT_WRITE, page_array[N]);
351 if (mem == NULL) {
352 /* The page may have move.
353 It will be remarked dirty.
354 FIXME: to be tracked. */
355 fprintf(stderr, "cannot map page %lx: %s\n",
356 page_array[N], strerror (errno));
357 continue;
358 }
360 if (!write_exact(io_fd, &N, sizeof(N))) {
361 ERR("write: max_pfn");
362 goto out;
363 }
365 if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
366 ERR("Error when writing to state file (5)");
367 goto out;
368 }
369 munmap(mem, PAGE_SIZE);
370 sent_this_iter++;
371 total_sent++;
372 }
374 if (last_iter)
375 break;
377 DPRINTF(" %d: sent %d, skipped %d\n",
378 iter, sent_this_iter, skip_this_iter );
380 if (live) {
381 if ( /* ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || */
382 (iter >= max_iters) || (sent_this_iter+skip_this_iter < 50) ||
383 (total_sent > max_pfn*max_factor)) {
384 DPRINTF("Start last iteration\n");
385 last_iter = 1;
387 if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) {
388 ERR("Domain appears not to have suspended");
389 goto out;
390 }
391 }
393 /* Pages to be sent are pages which were dirty. */
394 if (xc_ia64_shadow_control(xc_handle, dom,
395 DOM0_SHADOW_CONTROL_OP_CLEAN,
396 to_send, max_pfn, NULL ) != max_pfn) {
397 ERR("Error flushing shadow PT");
398 goto out;
399 }
401 sent_last_iter = sent_this_iter;
403 //print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
404 }
406 }
408 fprintf (stderr, "All memory is saved\n");
410 /* terminate */
411 {
412 unsigned long pfn = INVALID_MFN;
413 if (!write_exact(io_fd, &pfn, sizeof(pfn))) {
414 ERR("Error when writing to state file (6)");
415 goto out;
416 }
417 }
419 /* Send through a list of all the PFNs that were not in map at the close */
420 {
421 unsigned int i,j;
422 unsigned long pfntab[1024];
424 for (i = 0, j = 0; i < max_pfn; i++) {
425 if (page_array[i] == INVALID_MFN)
426 j++;
427 }
429 if (!write_exact(io_fd, &j, sizeof(unsigned int))) {
430 ERR("Error when writing to state file (6a)");
431 goto out;
432 }
434 for (i = 0, j = 0; i < max_pfn; ) {
436 if (page_array[i] == INVALID_MFN)
437 pfntab[j++] = i;
439 i++;
440 if (j == 1024 || i == max_pfn) {
441 if (!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) {
442 ERR("Error when writing to state file (6b)");
443 goto out;
444 }
445 j = 0;
446 }
447 }
449 }
451 if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
452 ERR("Could not get vcpu context");
453 goto out;
454 }
456 if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) {
457 ERR("Error when writing to state file (1)");
458 goto out;
459 }
461 mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
462 PROT_READ|PROT_WRITE, ctxt.privregs_pfn);
463 if (mem == NULL) {
464 ERR("cannot map privreg page");
465 goto out;
466 }
467 if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
468 ERR("Error when writing privreg to state file (5)");
469 goto out;
470 }
471 munmap(mem, PAGE_SIZE);
473 if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) {
474 ERR("Error when writing to state file (1)");
475 goto out;
476 }
478 /* Success! */
479 rc = 0;
481 out:
483 if (live) {
484 if (xc_ia64_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_OFF,
485 NULL, 0, NULL ) < 0) {
486 DPRINTF("Warning - couldn't disable shadow mode");
487 }
488 }
490 free(page_array);
491 free(to_send);
492 free(to_skip);
493 if (live_shinfo)
494 munmap(live_shinfo, PAGE_SIZE);
496 fprintf(stderr,"Save exit rc=%d\n",rc);
498 return !!rc;
499 }
501 /*
502 * Local variables:
503 * mode: C
504 * c-set-style: "BSD"
505 * c-basic-offset: 4
506 * tab-width: 4
507 * indent-tabs-mode: nil
508 * End:
509 */