aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorShriram Rajagopalan <rshriram@cs.ubc.ca>2012-05-18 11:00:44 +0100
committerShriram Rajagopalan <rshriram@cs.ubc.ca>2012-05-18 11:00:44 +0100
commitd473b016fb7ab10d46fcfb37f990037ac0368a35 (patch)
treeb866f568950dfc367a1e2121eab72a78c6304505 /tools
parentbea0ff6e774d171bf6acb451f75dc194f4002782 (diff)
downloadxen-d473b016fb7ab10d46fcfb37f990037ac0368a35.tar.gz
xen-d473b016fb7ab10d46fcfb37f990037ac0368a35.tar.bz2
xen-d473b016fb7ab10d46fcfb37f990037ac0368a35.zip
libxl: Remus - suspend/postflush/commit callbacks
* Add libxl callback functions for Remus checkpoint suspend, postflush (aka resume) and checkpoint commit callbacks. * suspend callback is a stub that just bounces off libxl__domain_suspend_common_callback - which suspends the domain and saves the devices model state to a file. * resume callback currently just resumes the domain (and the device model). * commit callback just writes out the saved device model state to the network and sleeps for the checkpoint interval. * Introduce a new public API, libxl_domain_remus_start (currently a stub) that sets up the network and disk buffer and initiates continuous checkpointing. * Future patches will augment these callbacks/functions with more functionalities like issuing network buffer plug/unplug commands, disk checkpoint commands, etc. Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> Acked-by: Ian Campbell <ian.campbell@citrix.com> Committed-by: Ian Campbell <ian.campbell@citrix.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/libxc/xenguest.h23
-rw-r--r--tools/libxl/libxl.c39
-rw-r--r--tools/libxl/libxl.h2
-rw-r--r--tools/libxl/libxl_dom.c56
-rw-r--r--tools/libxl/libxl_internal.h3
-rw-r--r--tools/libxl/libxl_types.idl6
6 files changed, 123 insertions, 6 deletions
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
index 6435f659af..91d53f7426 100644
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -33,10 +33,29 @@
/* callbacks provided by xc_domain_save */
struct save_callbacks {
+ /* Called after expiration of checkpoint interval,
+ * to suspend the guest.
+ */
int (*suspend)(void* data);
- /* callback to rendezvous with external checkpoint functions */
+
+ /* Called after the guest's dirty pages have been
+ * copied into an output buffer.
+ * Callback function resumes the guest & the device model,
+ * returns to xc_domain_save.
+ * xc_domain_save then flushes the output buffer, while the
+ * guest continues to run.
+ */
int (*postcopy)(void* data);
- /* returns:
+
+ /* Called after the memory checkpoint has been flushed
+ * out into the network. Typical actions performed in this
+ * callback include:
+ * (a) send the saved device model state (for HVM guests),
+ * (b) wait for checkpoint ack
+ * (c) release the network output buffer pertaining to the acked checkpoint.
+ * (c) sleep for the checkpoint interval.
+ *
+ * returns:
* 0: terminate checkpointing gracefully
* 1: take another checkpoint */
int (*checkpoint)(void* data);
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index cd11d2b2e8..f24d021c35 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -619,6 +619,41 @@ libxl_vminfo * libxl_list_vm(libxl_ctx *ctx, int *nb_vm)
return ptr;
}
+/* TODO: Explicit Checkpoint acknowledgements via recv_fd. */
+int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
+ uint32_t domid, int send_fd, int recv_fd)
+{
+ GC_INIT(ctx);
+ libxl_domain_type type = libxl__domain_type(gc, domid);
+ int rc = 0;
+
+ if (info == NULL) {
+ LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
+ "No remus_info structure supplied for domain %d", domid);
+ rc = ERROR_INVAL;
+ goto remus_fail;
+ }
+
+ /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */
+
+ /* Point of no return */
+ rc = libxl__domain_suspend_common(gc, domid, send_fd, type, /* live */ 1,
+ /* debug */ 0, info);
+
+ /*
+ * With Remus, if we reach this point, it means either
+ * backup died or some network error occurred preventing us
+ * from sending checkpoints.
+ */
+
+ /* TBD: Remus cleanup - i.e. detach qdisc, release other
+ * resources.
+ */
+ remus_fail:
+ GC_FREE;
+ return rc;
+}
+
int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info,
uint32_t domid, int fd)
{
@@ -628,7 +663,9 @@ int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info,
int debug = info != NULL && info->flags & XL_SUSPEND_DEBUG;
int rc = 0;
- rc = libxl__domain_suspend_common(gc, domid, fd, type, live, debug);
+ rc = libxl__domain_suspend_common(gc, domid, fd, type, live, debug,
+ /* No Remus */ NULL);
+
if (!rc && type == LIBXL_DOMAIN_TYPE_HVM)
rc = libxl__domain_save_device_model(gc, domid, fd);
GC_FREE;
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 10bd053226..316d290579 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -525,6 +525,8 @@ int libxl_domain_create_restore(libxl_ctx *ctx, libxl_domain_config *d_config,
void libxl_domain_config_init(libxl_domain_config *d_config);
void libxl_domain_config_dispose(libxl_domain_config *d_config);
+int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
+ uint32_t domid, int send_fd, int recv_fd);
int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info,
uint32_t domid, int fd);
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 9d2e8b4fd4..e42730f846 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -566,6 +566,8 @@ struct suspendinfo {
int hvm;
unsigned int flags;
int guest_responded;
+ int save_fd; /* Migration stream fd (for Remus) */
+ int interval; /* checkpoint interval (for Remus) */
};
static int libxl__domain_suspend_common_switch_qemu_logdirty(int domid, unsigned int enable, void *data)
@@ -848,9 +850,43 @@ static int libxl__toolstack_save(uint32_t domid, uint8_t **buf,
return 0;
}
+static int libxl__remus_domain_suspend_callback(void *data)
+{
+ /* TODO: Issue disk and network checkpoint reqs. */
+ return libxl__domain_suspend_common_callback(data);
+}
+
+static int libxl__remus_domain_resume_callback(void *data)
+{
+ struct suspendinfo *si = data;
+ libxl_ctx *ctx = libxl__gc_owner(si->gc);
+
+ /* Resumes the domain and the device model */
+ if (libxl_domain_resume(ctx, si->domid, /* Fast Suspend */1))
+ return 0;
+
+ /* TODO: Deal with disk. Start a new network output buffer */
+ return 1;
+}
+
+static int libxl__remus_domain_checkpoint_callback(void *data)
+{
+ struct suspendinfo *si = data;
+
+ /* This would go into tailbuf. */
+ if (si->hvm &&
+ libxl__domain_save_device_model(si->gc, si->domid, si->save_fd))
+ return 0;
+
+ /* TODO: Wait for disk and memory ack, release network buffer */
+ usleep(si->interval * 1000);
+ return 1;
+}
+
int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd,
libxl_domain_type type,
- int live, int debug)
+ int live, int debug,
+ const libxl_domain_remus_info *r_info)
{
libxl_ctx *ctx = libxl__gc_owner(gc);
int flags;
@@ -881,10 +917,20 @@ int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd,
return ERROR_INVAL;
}
+ memset(&si, 0, sizeof(si));
flags = (live) ? XCFLAGS_LIVE : 0
| (debug) ? XCFLAGS_DEBUG : 0
| (hvm) ? XCFLAGS_HVM : 0;
+ if (r_info != NULL) {
+ si.interval = r_info->interval;
+ if (r_info->compression)
+ flags |= XCFLAGS_CHECKPOINT_COMPRESS;
+ si.save_fd = fd;
+ }
+ else
+ si.save_fd = -1;
+
si.domid = domid;
si.flags = flags;
si.hvm = hvm;
@@ -908,7 +954,13 @@ int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd,
}
memset(&callbacks, 0, sizeof(callbacks));
- callbacks.suspend = libxl__domain_suspend_common_callback;
+ if (r_info != NULL) {
+ callbacks.suspend = libxl__remus_domain_suspend_callback;
+ callbacks.postcopy = libxl__remus_domain_resume_callback;
+ callbacks.checkpoint = libxl__remus_domain_checkpoint_callback;
+ } else
+ callbacks.suspend = libxl__domain_suspend_common_callback;
+
callbacks.switch_qemu_logdirty = libxl__domain_suspend_common_switch_qemu_logdirty;
callbacks.toolstack_save = libxl__toolstack_save;
callbacks.data = &si;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index e76723d124..49d01a857a 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -757,7 +757,8 @@ _hidden int libxl__domain_restore_common(libxl__gc *gc, uint32_t domid,
int fd);
_hidden int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd,
libxl_domain_type type,
- int live, int debug);
+ int live, int debug,
+ const libxl_domain_remus_info *r_info);
_hidden const char *libxl__device_model_savefile(libxl__gc *gc, uint32_t domid);
_hidden int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid);
_hidden int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid);
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 551e367107..a21bd85fa8 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -454,6 +454,12 @@ libxl_sched_sedf_domain = Struct("sched_sedf_domain", [
("weight", integer),
])
+libxl_domain_remus_info = Struct("domain_remus_info",[
+ ("interval", integer),
+ ("blackhole", bool),
+ ("compression", bool),
+ ])
+
libxl_event_type = Enumeration("event_type", [
(1, "DOMAIN_SHUTDOWN"),
(2, "DOMAIN_DEATH"),