diff options
author | Ian Campbell <ian.campbell@citrix.com> | 2010-09-03 18:38:11 +0100 |
---|---|---|
committer | Ian Campbell <ian.campbell@citrix.com> | 2010-09-03 18:38:11 +0100 |
commit | 59e3c6a2c6cae1d50ec39ef334c8ab346da1c3b9 (patch) | |
tree | 6b54b13e9d06ce21d8f683f2c80f41f266d3d5cc | |
parent | 47e13da0a1b0c6e22f95eb79585cb1ab1dddec75 (diff) | |
download | xen-59e3c6a2c6cae1d50ec39ef334c8ab346da1c3b9.tar.gz xen-59e3c6a2c6cae1d50ec39ef334c8ab346da1c3b9.tar.bz2 xen-59e3c6a2c6cae1d50ec39ef334c8ab346da1c3b9.zip |
libxc: document save/restore protocol
Reverse engineered from the code, likely contains inaccuracies but I
think provides a base to work from.
Add symbolic names for the minus-flags.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
-rw-r--r-- | tools/libxc/xc_domain_restore.c | 16 | ||||
-rw-r--r-- | tools/libxc/xc_domain_save.c | 20 | ||||
-rw-r--r-- | tools/libxc/xg_save_restore.h | 113 |
3 files changed, 130 insertions, 19 deletions
diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c index 6541021de1..0d326d4a27 100644 --- a/tools/libxc/xc_domain_restore.c +++ b/tools/libxc/xc_domain_restore.c @@ -683,11 +683,11 @@ static int pagebuf_get_one(xc_interface *xch, struct restore_ctx *ctx, if (!count) { // DPRINTF("Last batch read\n"); return 0; - } else if (count == -1) { + } else if (count == XC_SAVE_ID_ENABLE_VERIFY_MODE) { DPRINTF("Entering page verify mode\n"); buf->verify = 1; return pagebuf_get_one(xch, ctx, buf, fd, dom); - } else if (count == -2) { + } else if (count == XC_SAVE_ID_VCPU_INFO) { buf->new_ctxt_format = 1; if ( RDEXACT(fd, &buf->max_vcpu_id, sizeof(buf->max_vcpu_id)) || buf->max_vcpu_id >= 64 || RDEXACT(fd, &buf->vcpumap, @@ -697,7 +697,7 @@ static int pagebuf_get_one(xc_interface *xch, struct restore_ctx *ctx, } // DPRINTF("Max VCPU ID: %d, vcpumap: %llx\n", buf->max_vcpu_id, buf->vcpumap); return pagebuf_get_one(xch, ctx, buf, fd, dom); - } else if (count == -3) { + } else if (count == XC_SAVE_ID_HVM_IDENT_PT) { /* Skip padding 4 bytes then read the EPT identity PT location. */ if ( RDEXACT(fd, &buf->identpt, sizeof(uint32_t)) || RDEXACT(fd, &buf->identpt, sizeof(uint64_t)) ) @@ -707,7 +707,7 @@ static int pagebuf_get_one(xc_interface *xch, struct restore_ctx *ctx, } // DPRINTF("EPT identity map address: %llx\n", buf->identpt); return pagebuf_get_one(xch, ctx, buf, fd, dom); - } else if ( count == -4 ) { + } else if ( count == XC_SAVE_ID_HVM_VM86_TSS ) { /* Skip padding 4 bytes then read the vm86 TSS location. */ if ( RDEXACT(fd, &buf->vm86_tss, sizeof(uint32_t)) || RDEXACT(fd, &buf->vm86_tss, sizeof(uint64_t)) ) @@ -717,7 +717,7 @@ static int pagebuf_get_one(xc_interface *xch, struct restore_ctx *ctx, } // DPRINTF("VM86 TSS location: %llx\n", buf->vm86_tss); return pagebuf_get_one(xch, ctx, buf, fd, dom); - } else if ( count == -5 ) { + } else if ( count == XC_SAVE_ID_TMEM ) { DPRINTF("xc_domain_restore start tmem\n"); if ( xc_tmem_restore(xch, dom, fd) ) { PERROR("error reading/restoring tmem"); @@ -725,13 +725,13 @@ static int pagebuf_get_one(xc_interface *xch, struct restore_ctx *ctx, } return pagebuf_get_one(xch, ctx, buf, fd, dom); } - else if ( count == -6 ) { + else if ( count == XC_SAVE_ID_TMEM_EXTRA ) { if ( xc_tmem_restore_extra(xch, dom, fd) ) { PERROR("error reading/restoring tmem extra"); return -1; } return pagebuf_get_one(xch, ctx, buf, fd, dom); - } else if ( count == -7 ) { + } else if ( count == XC_SAVE_ID_TSC_INFO ) { uint32_t tsc_mode, khz, incarn; uint64_t nsec; if ( RDEXACT(fd, &tsc_mode, sizeof(uint32_t)) || @@ -743,7 +743,7 @@ static int pagebuf_get_one(xc_interface *xch, struct restore_ctx *ctx, return -1; } return pagebuf_get_one(xch, ctx, buf, fd, dom); - } else if (count == -8 ) { + } else if (count == XC_SAVE_ID_HVM_CONSOLE_PFN ) { /* Skip padding 4 bytes then read the console pfn location. */ if ( RDEXACT(fd, &buf->console_pfn, sizeof(uint32_t)) || RDEXACT(fd, &buf->console_pfn, sizeof(uint64_t)) ) diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c index 4af189b448..b9273bb3ae 100644 --- a/tools/libxc/xc_domain_save.c +++ b/tools/libxc/xc_domain_save.c @@ -861,7 +861,7 @@ static xen_pfn_t *map_and_save_p2m_table(xc_interface *xch, /* must be done AFTER suspend_and_state() */ static int save_tsc_info(xc_interface *xch, uint32_t dom, int io_fd) { - int marker = -7; + int marker = XC_SAVE_ID_TSC_INFO; uint32_t tsc_mode, khz, incarn; uint64_t nsec; @@ -1142,7 +1142,7 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter print_stats(xch, dom, 0, &stats, 0); - tmem_saved = xc_tmem_save(xch, dom, io_fd, live, -5); + tmem_saved = xc_tmem_save(xch, dom, io_fd, live, XC_SAVE_ID_TMEM); if ( tmem_saved == -1 ) { PERROR("Error when writing to state file (tmem)"); @@ -1474,13 +1474,13 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter if ( last_iter && debug ) { - int minusone = -1; + int id = XC_SAVE_ID_ENABLE_VERIFY_MODE; memset(to_send, 0xff, BITMAP_SIZE); debug = 0; DPRINTF("Entering debug resend-all mode\n"); /* send "-1" to put receiver into debug mode */ - if ( wrexact(io_fd, &minusone, sizeof(int)) ) + if ( wrexact(io_fd, &id, sizeof(int)) ) { PERROR("Error when writing to state file (6)"); goto out; @@ -1511,7 +1511,7 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame); if ( (tmem_saved > 0) && - (xc_tmem_save_extra(xch,dom,io_fd,-6) == -1) ) + (xc_tmem_save_extra(xch,dom,io_fd,XC_SAVE_ID_TMEM_EXTRA) == -1) ) { PERROR("Error when writing to state file (tmem)"); goto out; @@ -1545,10 +1545,10 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter { struct { - int minustwo; + int id; int max_vcpu_id; uint64_t vcpumap; - } chunk = { -2, info.max_vcpu_id }; + } chunk = { XC_SAVE_ID_VCPU_INFO, info.max_vcpu_id }; if ( info.max_vcpu_id >= 64 ) { @@ -1580,7 +1580,7 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter uint64_t data; } chunk = { 0, }; - chunk.id = -3; + chunk.id = XC_SAVE_ID_HVM_IDENT_PT; xc_get_hvm_param(xch, dom, HVM_PARAM_IDENT_PT, (unsigned long *)&chunk.data); @@ -1591,7 +1591,7 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter goto out; } - chunk.id = -4; + chunk.id = XC_SAVE_ID_HVM_VM86_TSS; xc_get_hvm_param(xch, dom, HVM_PARAM_VM86_TSS, (unsigned long *)&chunk.data); @@ -1602,7 +1602,7 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter goto out; } - chunk.id = -8; + chunk.id = XC_SAVE_ID_HVM_CONSOLE_PFN; xc_get_hvm_param(xch, dom, HVM_PARAM_CONSOLE_PFN, (unsigned long *)&chunk.data); diff --git a/tools/libxc/xg_save_restore.h b/tools/libxc/xg_save_restore.h index 9170f9535b..a16e6f3b7c 100644 --- a/tools/libxc/xg_save_restore.h +++ b/tools/libxc/xg_save_restore.h @@ -1,5 +1,5 @@ /* - * Defintions and utilities for save / restore. + * Definitions and utilities for save / restore. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -22,6 +22,117 @@ #include <xen/foreign/x86_64.h> /* + * SAVE/RESTORE/MIGRATE PROTOCOL + * ============================= + * + * The general form of a stream of chunks is a header followed by a + * body consisting of a variable number of chunks (terminated by a + * chunk with type 0) followed by a trailer. + * + * For a rolling/checkpoint (e.g. remus) migration then the body and + * trailer phases can be repeated until an external event + * (e.g. failure) causes the process to terminate and commit to the + * most recent complete checkpoint. + * + * HEADER + * ------ + * + * unsigned long : p2m_size + * + * extended-info (PV-only, optional): + * + * If first unsigned long == ~0UL then extended info is present, + * otherwise unsigned long is part of p2m. Note that p2m_size above + * does not include the length of the extended info. + * + * extended-info: + * + * unsigned long : signature == ~0UL + * uint32_t : number of bytes remaining in extended-info + * + * 1 or more extended-info blocks of form: + * char[4] : block identifier + * uint32_t : block data size + * bytes : block data + * + * defined extended-info blocks: + * "vcpu" : VCPU context info containing vcpu_guest_context_t. + * The precise variant of the context structure + * (e.g. 32 vs 64 bit) is distinguished by + * the block size. + * "extv" : Presence indicates use of extended VCPU context in + * tail, data size is 0. + * + * p2m (PV-only): + * + * consists of p2m_size bytes comprising an array of xen_pfn_t sized entries. + * + * BODY PHASE + * ---------- + * + * A series of chunks with a common header: + * int : chunk type + * + * If the chunk type is +ve then chunk contains guest memory data, and the + * type contains the number of pages in the batch: + * + * unsigned long[] : PFN array, length == number of pages in batch + * Each entry consists of XEN_DOMCTL_PFINFO_* + * in bits 31-28 and the PFN number in bits 27-0. + * page data : PAGE_SIZE bytes for each page marked present in PFN + * array + * + * If the chunk type is -ve then chunk consists of one of a number of + * metadata types. See definitions of XC_SAVE_ID_* below. + * + * If chunk type is 0 then body phase is complete. + * + * TAIL PHASE + * ---------- + * + * Content differs for PV and HVM guests. + * + * HVM TAIL: + * + * "Magic" pages: + * uint64_t : I/O req PFN + * uint64_t : Buffered I/O req PFN + * uint64_t : Store PFN + * Xen HVM Context: + * uint32_t : Length of context in bytes + * bytes : Context data + * Qemu context: + * char[21] : Signature: + * "QemuDeviceModelRecord" : Read Qemu save data until EOF + * "RemusDeviceModelState" : uint32_t length field followed by that many + * bytes of Qemu save data + * + * PV TAIL: + * + * Unmapped PFN list : list of all the PFNs that were not in map at the close + * unsigned int : Number of unmapped pages + * unsigned long[] : PFNs of unmapped pages + * + * VCPU context data : A series of VCPU records, one per present VCPU + * Maximum and present map supplied in XC_SAVE_ID_VCPUINFO + * bytes: : VCPU context structure. Size is determined by size + * provided in extended-info header + * bytes[128] : Extended VCPU context (present IFF "extv" block + * present in extended-info header) + * + * Shared Info Page : 4096 bytes of shared info page + */ + +#define XC_SAVE_ID_ENABLE_VERIFY_MODE -1 /* Switch to validation phase. */ +#define XC_SAVE_ID_VCPU_INFO -2 /* Additional VCPU info */ +#define XC_SAVE_ID_HVM_IDENT_PT -3 /* (HVM-only) */ +#define XC_SAVE_ID_HVM_VM86_TSS -4 /* (HVM-only) */ +#define XC_SAVE_ID_TMEM -5 +#define XC_SAVE_ID_TMEM_EXTRA -6 +#define XC_SAVE_ID_TSC_INFO -7 +#define XC_SAVE_ID_HVM_CONSOLE_PFN -8 /* (HVM-only) */ + +/* ** We process save/restore/migrate in batches of pages; the below ** determines how many pages we (at maximum) deal with in each batch. */ |