From abc3a3a1c06d59703992bb6502ef9b5e5f38c386 Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Sun, 14 Jun 2015 20:49:19 +0100 Subject: [PATCH] tools/libx{c,l}: Introduce restore_callbacks.checkpoint() And call it when a checkpoint record is found in the libxc stream. Some parts of this patch have been based on patches from the COLO series. Signed-off-by: Wen Congyang Signed-off-by: Yang Hongyang Signed-off-by: Andrew Cooper Acked-by: Ian Campbell CC: Ian Jackson CC: Wei Liu --- v3: Named constants for the API v2: Borrow sufficient fragments from several COLO patches to get BROKEN_CHANNEL and checkpoint failover to function. --- tools/libxc/include/xenguest.h | 7 ++++ tools/libxc/xc_sr_common.h | 7 ++-- tools/libxc/xc_sr_restore.c | 53 +++++++++++++++++++++--------- tools/libxl/libxl_save_msgs_gen.pl | 2 +- 4 files changed, 51 insertions(+), 18 deletions(-) diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h index 7581263b82..e95af54dac 100644 --- a/tools/libxc/include/xenguest.h +++ b/tools/libxc/include/xenguest.h @@ -102,6 +102,13 @@ struct restore_callbacks { int (*toolstack_restore)(uint32_t domid, const uint8_t *buf, uint32_t size, void* data); + /* A checkpoint record has been found in the stream. + * returns: */ +#define XGR_CHECKPOINT_ERROR 0 /* Terminate processing */ +#define XGR_CHECKPOINT_SUCCESS 1 /* Continue reading more data from the stream */ +#define XGR_CHECKPOINT_FAILOVER 2 /* Failover and resume VM */ + int (*checkpoint)(void* data); + /* to be provided as the last argument to each callback function */ void* data; }; diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h index 08c66db966..1f4d4e408d 100644 --- a/tools/libxc/xc_sr_common.h +++ b/tools/libxc/xc_sr_common.h @@ -130,10 +130,13 @@ struct xc_sr_restore_ops * Process an individual record from the stream. The caller shall take * care of processing common records (e.g. END, PAGE_DATA). * - * @return 0 for success, -1 for failure, or the sentinel value - * RECORD_NOT_PROCESSED. + * @return 0 for success, -1 for failure, or the following sentinels: + * - RECORD_NOT_PROCESSED + * - BROKEN_CHANNEL: under Remus/COLO, this means master may be dead, and + * a failover is needed. */ #define RECORD_NOT_PROCESSED 1 +#define BROKEN_CHANNEL 2 int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec); /** diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c index 9e27dba312..18ba411888 100644 --- a/tools/libxc/xc_sr_restore.c +++ b/tools/libxc/xc_sr_restore.c @@ -1,5 +1,7 @@ #include +#include + #include "xc_sr_common.h" /* @@ -472,7 +474,7 @@ static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec); static int handle_checkpoint(struct xc_sr_context *ctx) { xc_interface *xch = ctx->xch; - int rc = 0; + int rc = 0, ret; unsigned i; if ( !ctx->restore.checkpointed ) @@ -482,6 +484,21 @@ static int handle_checkpoint(struct xc_sr_context *ctx) goto err; } + ret = ctx->restore.callbacks->checkpoint(ctx->restore.callbacks->data); + switch ( ret ) + { + case XGR_CHECKPOINT_SUCCESS: + break; + + case XGR_CHECKPOINT_FAILOVER: + rc = BROKEN_CHANNEL; + goto err; + + default: /* Other fatal error */ + rc = -1; + goto err; + } + if ( ctx->restore.buffer_all_records ) { IPRINTF("All records buffered"); @@ -560,19 +577,6 @@ static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec) free(rec->data); rec->data = NULL; - if ( rc == RECORD_NOT_PROCESSED ) - { - if ( rec->type & REC_TYPE_OPTIONAL ) - DPRINTF("Ignoring optional record %#x (%s)", - rec->type, rec_type_to_str(rec->type)); - else - { - ERROR("Mandatory record %#x (%s) not handled", - rec->type, rec_type_to_str(rec->type)); - rc = -1; - } - } - return rc; } @@ -678,7 +682,22 @@ static int restore(struct xc_sr_context *ctx) else { rc = process_record(ctx, &rec); - if ( rc ) + if ( rc == RECORD_NOT_PROCESSED ) + { + if ( rec.type & REC_TYPE_OPTIONAL ) + DPRINTF("Ignoring optional record %#x (%s)", + rec.type, rec_type_to_str(rec.type)); + else + { + ERROR("Mandatory record %#x (%s) not handled", + rec.type, rec_type_to_str(rec.type)); + rc = -1; + goto err; + } + } + else if ( rc == BROKEN_CHANNEL ) + goto remus_failover; + else if ( rc ) goto err; } @@ -735,6 +754,10 @@ int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t dom, ctx.restore.checkpointed = checkpointed_stream; ctx.restore.callbacks = callbacks; + /* Sanity checks for callbacks. */ + if ( checkpointed_stream ) + assert(callbacks->checkpoint); + IPRINTF("In experimental %s", __func__); DPRINTF("fd %d, dom %u, hvm %u, pae %u, superpages %d" ", checkpointed_stream %d", io_fd, dom, hvm, pae, diff --git a/tools/libxl/libxl_save_msgs_gen.pl b/tools/libxl/libxl_save_msgs_gen.pl index 6b4b65e94b..825d5ccc36 100755 --- a/tools/libxl/libxl_save_msgs_gen.pl +++ b/tools/libxl/libxl_save_msgs_gen.pl @@ -25,7 +25,7 @@ our @msgs = ( 'unsigned long', 'total'] ], [ 3, 'scxA', "suspend", [] ], [ 4, 'scxA', "postcopy", [] ], - [ 5, 'scxA', "checkpoint", [] ], + [ 5, 'srcxA', "checkpoint", [] ], [ 6, 'scxA', "switch_qemu_logdirty", [qw(int domid unsigned enable)] ], # toolstack_save done entirely `by hand' -- 2.39.5