aboutsummaryrefslogtreecommitdiffstats
path: root/tools/libxc/xc_domain_save.c
diff options
context:
space:
mode:
authorShriram Rajagopalan <rshriram@cs.ubc.ca>2011-12-01 15:36:15 +0000
committerShriram Rajagopalan <rshriram@cs.ubc.ca>2011-12-01 15:36:15 +0000
commitf6b3d39f5d316079add221d893e28009354b3f07 (patch)
tree765693b5557fcde1fe2ae9221b4ea4b97ad4aba0 /tools/libxc/xc_domain_save.c
parentde869779a0b7c411a69b787ec01b485492b40f32 (diff)
downloadxen-f6b3d39f5d316079add221d893e28009354b3f07.tar.gz
xen-f6b3d39f5d316079add221d893e28009354b3f07.tar.bz2
xen-f6b3d39f5d316079add221d893e28009354b3f07.zip
tools/libxc: Remus Checkpoint Compression
Instead of sending dirty pages of guest memory as-is, use a simple compression algorithm that sends a RLE-encoded XOR of the page against its last sent copy. A small LRU cache is used to hold recently dirtied pages. Pagetable pages are sent as-is, as they are canonicalized at sender side and uncanonicalized at receiver. [ Fixed up a conflict in sg_save_restore.h. I had to increase the ID values used from -11 and -12 to -12 and -13 because -11 had been taken by ..._HVM_VIRIDIAN in the meantime. -iwj ] Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> Acked-by: Brendan Cully <brendan@cs.ubc.ca> Acked-by: Ian Jackson <ian.jackson@eu.citrix.com> Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
Diffstat (limited to 'tools/libxc/xc_domain_save.c')
-rw-r--r--tools/libxc/xc_domain_save.c218
1 files changed, 210 insertions, 8 deletions
diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c
index 88edd37fe0..a6bb89475f 100644
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -218,6 +218,56 @@ static inline int write_uncached(xc_interface *xch,
return noncached_write(xch, ob, fd, buf, len);
}
+static int write_compressed(xc_interface *xch, comp_ctx *compress_ctx,
+ int dobuf, struct outbuf* ob, int fd)
+{
+ int rc = 0;
+ int header = sizeof(int) + sizeof(unsigned long);
+ int marker = XC_SAVE_ID_COMPRESSED_DATA;
+ unsigned long compbuf_len = 0;
+
+ do
+ {
+ /* check for available space (atleast 8k) */
+ if ((ob->pos + header + XC_PAGE_SIZE * 2) > ob->size)
+ {
+ if (outbuf_flush(xch, ob, fd) < 0)
+ {
+ ERROR("Error when flushing outbuf intermediate");
+ return -1;
+ }
+ }
+
+ rc = xc_compression_compress_pages(xch, compress_ctx,
+ ob->buf + ob->pos + header,
+ ob->size - ob->pos - header,
+ &compbuf_len);
+ if (!rc)
+ return 0;
+
+ if (outbuf_hardwrite(xch, ob, fd, &marker, sizeof(marker)) < 0)
+ {
+ PERROR("Error when writing marker (errno %d)", errno);
+ return -1;
+ }
+
+ if (outbuf_hardwrite(xch, ob, fd, &compbuf_len, sizeof(compbuf_len)) < 0)
+ {
+ PERROR("Error when writing compbuf_len (errno %d)", errno);
+ return -1;
+ }
+
+ ob->pos += (size_t) compbuf_len;
+ if (!dobuf && outbuf_flush(xch, ob, fd) < 0)
+ {
+ ERROR("Error when writing compressed chunk");
+ return -1;
+ }
+ } while (rc != 0);
+
+ return 0;
+}
+
struct time_stats {
struct timeval wall;
long long d0_cpu, d1_cpu;
@@ -815,11 +865,35 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
unsigned long mfn;
- struct outbuf ob;
+ /* Without checkpoint compression, the dirty pages, pfn arrays
+ * and tailbuf (vcpu ctx, shared info page, etc.) are written
+ * directly to outbuf. All of this is done while the domain is
+ * suspended.
+ *
+ * When checkpoint compression is enabled, the dirty pages are
+ * buffered, compressed "after" the domain is resumed and then
+ * written to outbuf. Since tailbuf data are collected while a
+ * domain is suspended, they cannot be directly written to the
+ * outbuf as there is no dirty page data preceeding tailbuf.
+ *
+ * So,two output buffers are maintained. Tailbuf data goes into
+ * ob_tailbuf. The dirty pages are compressed after resuming the
+ * domain and written to ob_pagebuf. ob_tailbuf is then appended
+ * to ob_pagebuf and finally flushed out.
+ */
+ struct outbuf ob_pagebuf, ob_tailbuf, *ob = NULL;
struct save_ctx _ctx;
struct save_ctx *ctx = &_ctx;
struct domain_info_context *dinfo = &ctx->dinfo;
+ /* Compression context */
+ comp_ctx *compress_ctx= NULL;
+ /* Even if XCFLAGS_CHECKPOINT_COMPRESS is set, we enable compression only
+ * after sending XC_SAVE_ID_ENABLE_COMPRESSION and the tailbuf for
+ * first time.
+ */
+ int compressing = 0;
+
int completed = 0;
if ( hvm && !callbacks->switch_qemu_logdirty )
@@ -829,7 +903,7 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
return 1;
}
- outbuf_init(xch, &ob, OUTBUF_SIZE);
+ outbuf_init(xch, &ob_pagebuf, OUTBUF_SIZE);
memset(ctx, 0, sizeof(*ctx));
@@ -917,6 +991,16 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
}
}
+ if ( flags & XCFLAGS_CHECKPOINT_COMPRESS )
+ {
+ if (!(compress_ctx = xc_compression_create_context(xch, dinfo->p2m_size)))
+ {
+ ERROR("Failed to create compression context");
+ goto out;
+ }
+ outbuf_init(xch, &ob_tailbuf, OUTBUF_SIZE/4);
+ }
+
last_iter = !live;
/* pretend we sent all the pages last iteration */
@@ -1025,9 +1109,11 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
}
copypages:
-#define wrexact(fd, buf, len) write_buffer(xch, last_iter, &ob, (fd), (buf), (len))
-#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, &ob, (fd), (buf), (len))
+#define wrexact(fd, buf, len) write_buffer(xch, last_iter, ob, (fd), (buf), (len))
+#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, ob, (fd), (buf), (len))
+#define wrcompressed(fd) write_compressed(xch, compress_ctx, last_iter, ob, (fd))
+ ob = &ob_pagebuf; /* Holds pfn_types, pages/compressed pages */
/* Now write out each data page, canonicalising page tables as we go... */
for ( ; ; )
{
@@ -1270,7 +1356,7 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
{
/* If the page is not a normal data page, write out any
run of pages we may have previously acumulated */
- if ( run )
+ if ( !compressing && run )
{
if ( wruncached(io_fd, live,
(char*)region_base+(PAGE_SIZE*(j-run)),
@@ -1305,7 +1391,41 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
goto out;
}
- if ( wruncached(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE )
+ if (compressing)
+ {
+ int c_err;
+ /* Mark pagetable page to be sent uncompressed */
+ c_err = xc_compression_add_page(xch, compress_ctx, page,
+ pfn, 1 /* raw page */);
+ if (c_err == -2) /* OOB PFN */
+ {
+ ERROR("Could not add pagetable page "
+ "(pfn:%" PRIpfn "to page buffer\n", pfn);
+ goto out;
+ }
+
+ if (c_err == -1)
+ {
+ /*
+ * We are out of buffer space to hold dirty
+ * pages. Compress and flush the current buffer
+ * to make space. This is a corner case, that
+ * slows down checkpointing as the compression
+ * happens while domain is suspended. Happens
+ * seldom and if you find this occuring
+ * frequently, increase the PAGE_BUFFER_SIZE
+ * in xc_compression.c.
+ */
+ if (wrcompressed(io_fd) < 0)
+ {
+ ERROR("Error when writing compressed"
+ " data (4b)\n");
+ goto out;
+ }
+ }
+ }
+ else if ( wruncached(io_fd, live, page,
+ PAGE_SIZE) != PAGE_SIZE )
{
PERROR("Error when writing to state file (4b)"
" (errno %d)", errno);
@@ -1315,7 +1435,34 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
else
{
/* We have a normal page: accumulate it for writing. */
- run++;
+ if (compressing)
+ {
+ int c_err;
+ /* For checkpoint compression, accumulate the page in the
+ * page buffer, to be compressed later.
+ */
+ c_err = xc_compression_add_page(xch, compress_ctx, spage,
+ pfn, 0 /* not raw page */);
+
+ if (c_err == -2) /* OOB PFN */
+ {
+ ERROR("Could not add page "
+ "(pfn:%" PRIpfn "to page buffer\n", pfn);
+ goto out;
+ }
+
+ if (c_err == -1)
+ {
+ if (wrcompressed(io_fd) < 0)
+ {
+ ERROR("Error when writing compressed"
+ " data (4c)\n");
+ goto out;
+ }
+ }
+ }
+ else
+ run++;
}
} /* end of the write out for this batch */
@@ -1423,6 +1570,15 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
DPRINTF("All memory is saved\n");
+ /* After last_iter, buffer the rest of pagebuf & tailbuf data into a
+ * separate output buffer and flush it after the compressed page chunks.
+ */
+ if (compressing)
+ {
+ ob = &ob_tailbuf;
+ ob->pos = 0;
+ }
+
{
struct {
int id;
@@ -1534,6 +1690,25 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
}
}
+ /* Enable compression logic on both sides by sending this
+ * one time marker.
+ * NOTE: We could have simplified this procedure by sending
+ * the enable/disable compression flag before the beginning of
+ * the main for loop. But this would break compatibility for
+ * live migration code, with older versions of xen. So we have
+ * to enable it after the last_iter, when the XC_SAVE_ID_*
+ * elements are sent.
+ */
+ if (!compressing && (flags & XCFLAGS_CHECKPOINT_COMPRESS))
+ {
+ i = XC_SAVE_ID_ENABLE_COMPRESSION;
+ if ( wrexact(io_fd, &i, sizeof(int)) )
+ {
+ PERROR("Error when writing enable_compression marker");
+ goto out;
+ }
+ }
+
/* Zero terminate */
i = 0;
if ( wrexact(io_fd, &i, sizeof(int)) )
@@ -1778,14 +1953,38 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
if ( !rc && callbacks->postcopy )
callbacks->postcopy(callbacks->data);
+ /* guest has been resumed. Now we can compress data
+ * at our own pace.
+ */
+ if (!rc && compressing)
+ {
+ ob = &ob_pagebuf;
+ if (wrcompressed(io_fd) < 0)
+ {
+ ERROR("Error when writing compressed data, after postcopy\n");
+ rc = 1;
+ goto out;
+ }
+ /* Append the tailbuf data to the main outbuf */
+ if ( wrexact(io_fd, ob_tailbuf.buf, ob_tailbuf.pos) )
+ {
+ rc = 1;
+ PERROR("Error when copying tailbuf into outbuf");
+ goto out;
+ }
+ }
+
/* Flush last write and discard cache for file. */
- if ( outbuf_flush(xch, &ob, io_fd) < 0 ) {
+ if ( outbuf_flush(xch, ob, io_fd) < 0 ) {
PERROR("Error when flushing output buffer");
rc = 1;
}
discard_file_cache(xch, io_fd, 1 /* flush */);
+ /* Enable compression now, finally */
+ compressing = (flags & XCFLAGS_CHECKPOINT_COMPRESS);
+
/* checkpoint_cb can spend arbitrarily long in between rounds */
if (!rc && callbacks->checkpoint &&
callbacks->checkpoint(callbacks->data) > 0)
@@ -1827,6 +2026,9 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
DPRINTF("Warning - couldn't disable qemu log-dirty mode");
}
+ if (compress_ctx)
+ xc_compression_free_context(xch, compress_ctx);
+
if ( live_shinfo )
munmap(live_shinfo, PAGE_SIZE);