diff options
author | Shriram Rajagopalan <rshriram@cs.ubc.ca> | 2011-04-08 16:49:04 +0100 |
---|---|---|
committer | Shriram Rajagopalan <rshriram@cs.ubc.ca> | 2011-04-08 16:49:04 +0100 |
commit | d732509e34bc6278ec2d050808f8de15e5872c4d (patch) | |
tree | 5d3c92ca7311783255ecb4a67b585978534aab62 /tools/python | |
parent | be1806baf415c623403e4599fa197646b97a5f3d (diff) | |
download | xen-d732509e34bc6278ec2d050808f8de15e5872c4d.tar.gz xen-d732509e34bc6278ec2d050808f8de15e5872c4d.tar.bz2 xen-d732509e34bc6278ec2d050808f8de15e5872c4d.zip |
remus: proper cleanup on checkpoint failure.
While running remus, when an error occurs during checkpointing
(e.g., timeouts on primary, failing to checkpoint network buffer
or disk or even communication failure) the domU is sometimes
left in suspended state on primary. Instead of blindly closing
the checkpoint file handle, attempt to resume the domain before
the close.
Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
Diffstat (limited to 'tools/python')
-rw-r--r-- | tools/python/xen/lowlevel/checkpoint/checkpoint.c | 3 | ||||
-rw-r--r-- | tools/python/xen/remus/save.py | 6 |
2 files changed, 8 insertions, 1 deletions
diff --git a/tools/python/xen/lowlevel/checkpoint/checkpoint.c b/tools/python/xen/lowlevel/checkpoint/checkpoint.c index 7545d7deb6..1581b64095 100644 --- a/tools/python/xen/lowlevel/checkpoint/checkpoint.c +++ b/tools/python/xen/lowlevel/checkpoint/checkpoint.c @@ -80,6 +80,9 @@ static PyObject* pycheckpoint_close(PyObject* obj, PyObject* args) { CheckpointObject* self = (CheckpointObject*)obj; + if (checkpoint_resume(&self->cps) < 0) + fprintf(stderr, "%s\n", checkpoint_error(&self->cps)); + checkpoint_close(&self->cps); Py_XDECREF(self->suspend_cb); diff --git a/tools/python/xen/remus/save.py b/tools/python/xen/remus/save.py index 71517da8c1..9858aec571 100644 --- a/tools/python/xen/remus/save.py +++ b/tools/python/xen/remus/save.py @@ -158,9 +158,13 @@ class Saver(object): self.checkpointer.open(self.vm.domid) self.checkpointer.start(self.fd, self.suspendcb, self.resumecb, self.checkpointcb, self.interval) - self.checkpointer.close() except xen.lowlevel.checkpoint.error, e: raise CheckpointError(e) + finally: + try: #errors in checkpoint close are not critical atm. + self.checkpointer.close() + except: + pass def _resume(self): """low-overhead version of XendDomainInfo.resumeDomain""" |