aboutsummaryrefslogtreecommitdiffstats
path: root/tools/libxc/xc_linux_restore.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/libxc/xc_linux_restore.c')
-rw-r--r--tools/libxc/xc_linux_restore.c600
1 files changed, 600 insertions, 0 deletions
diff --git a/tools/libxc/xc_linux_restore.c b/tools/libxc/xc_linux_restore.c
new file mode 100644
index 0000000000..badba75162
--- /dev/null
+++ b/tools/libxc/xc_linux_restore.c
@@ -0,0 +1,600 @@
+/******************************************************************************
+ * xc_linux_restore.c
+ *
+ * Restore the state of a Linux session.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ */
+
+#include "xc_private.h"
+#include <asm-xen/suspend.h>
+
+#define MAX_BATCH_SIZE 1024
+
+#define DEBUG 0
+
+#if DEBUG
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+
+static int get_pfn_list(int xc_handle,
+ u32 domain_id,
+ unsigned long *pfn_buf,
+ unsigned long max_pfns)
+{
+ dom0_op_t op;
+ int ret;
+ op.cmd = DOM0_GETMEMLIST;
+ op.u.getmemlist.domain = (domid_t)domain_id;
+ op.u.getmemlist.max_pfns = max_pfns;
+ op.u.getmemlist.buffer = pfn_buf;
+
+ if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 )
+ {
+ PERROR("Could not lock pfn list buffer");
+ return -1;
+ }
+
+ ret = do_dom0_op(xc_handle, &op);
+
+ (void)munlock(pfn_buf, max_pfns * sizeof(unsigned long));
+
+ return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
+}
+
+/** Read the vmconfig string from the state input.
+ * It is stored as a 4-byte count 'n' followed by n bytes.
+ * The config data is stored in a new string in 'ioctxt->vmconfig',
+ * and is null-terminated. The count is stored in 'ioctxt->vmconfig_n'.
+ *
+ * @param ioctxt i/o context
+ * @return 0 on success, non-zero on error.
+ */
+static int read_vmconfig(XcIOContext *ioctxt){
+ int err = -1;
+ if(xcio_read(ioctxt, &ioctxt->vmconfig_n, sizeof(ioctxt->vmconfig_n))){
+ goto exit;
+ }
+ ioctxt->vmconfig = malloc(ioctxt->vmconfig_n + 1);
+ if(!ioctxt->vmconfig) goto exit;
+ if(xcio_read(ioctxt, ioctxt->vmconfig, ioctxt->vmconfig_n)){
+ goto exit;
+ }
+ ioctxt->vmconfig[ioctxt->vmconfig_n] = '\0';
+ err = 0;
+ exit:
+ if(err){
+ if(ioctxt->vmconfig){
+ free(ioctxt->vmconfig);
+ }
+ ioctxt->vmconfig = NULL;
+ ioctxt->vmconfig_n = 0;
+ }
+ return err;
+}
+
+int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
+{
+ dom0_op_t op;
+ int rc = 1, i, n, k;
+ unsigned long mfn, pfn, xpfn;
+ unsigned int prev_pc, this_pc;
+ u32 dom = ioctxt->domain;
+ int verify = 0;
+
+ /* Number of page frames in use by this Linux session. */
+ unsigned long nr_pfns;
+
+ /* The new domain's shared-info frame number. */
+ unsigned long shared_info_frame;
+ unsigned char shared_info[PAGE_SIZE]; /* saved contents from file */
+
+ /* A copy of the CPU context of the guest. */
+ full_execution_context_t ctxt;
+
+ /* First 16 bytes of the state file must contain 'LinuxGuestRecord'. */
+ char signature[16];
+
+ /* A copy of the domain's name. */
+ char name[MAX_DOMAIN_NAME];
+
+ /* A table containg the type of each PFN (/not/ MFN!). */
+ unsigned long *pfn_type = NULL;
+
+ /* A table of MFNs to map in the current region */
+ unsigned long *region_mfn = NULL;
+
+ /* A temporary mapping, and a copy, of one frame of guest memory. */
+ unsigned long *ppage;
+
+ /* A copy of the pfn-to-mfn table frame list. */
+ unsigned long pfn_to_mfn_frame_list[1024];
+
+ /* A table mapping each PFN to its new MFN. */
+ unsigned long *pfn_to_mfn_table = NULL;
+
+ /* used by mapper for updating the domain's copy of the table */
+ unsigned long *live_pfn_to_mfn_table = NULL;
+
+ /* A temporary mapping of the guest's suspend record. */
+ suspend_record_t *p_srec;
+
+ char *region_base;
+
+ mmu_t *mmu = NULL;
+
+ int pm_handle = -1;
+
+ /* used by debug verify code */
+ unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
+
+ if ( mlock(&ctxt, sizeof(ctxt) ) ) {
+ /* needed for when we do the build dom0 op,
+ but might as well do early */
+ PERROR("Unable to mlock ctxt");
+ return 1;
+ }
+
+ /* Start writing out the saved-domain record. */
+ if ( xcio_read(ioctxt, signature, 16) ||
+ (memcmp(signature, "LinuxGuestRecord", 16) != 0) ) {
+ xcio_error(ioctxt, "Unrecognised state format -- no signature found");
+ goto out;
+ }
+
+ if ( xcio_read(ioctxt, name, sizeof(name)) ||
+ xcio_read(ioctxt, &nr_pfns, sizeof(unsigned long)) ||
+ xcio_read(ioctxt, pfn_to_mfn_frame_list, PAGE_SIZE) ) {
+ xcio_error(ioctxt, "Error reading header");
+ goto out;
+ }
+
+ if(read_vmconfig(ioctxt)){
+ xcio_error(ioctxt, "Error writing vmconfig");
+ goto out;
+ }
+
+ for ( i = 0; i < MAX_DOMAIN_NAME; i++ ) {
+ if ( name[i] == '\0' ) break;
+ if ( name[i] & 0x80 )
+ {
+ xcio_error(ioctxt, "Random characters in domain name");
+ goto out;
+ }
+ }
+ name[MAX_DOMAIN_NAME-1] = '\0';
+
+ if ( nr_pfns > 1024*1024 ) {
+ xcio_error(ioctxt, "Invalid state file -- pfn count out of range");
+ goto out;
+ }
+
+ /* We want zeroed memory so use calloc rather than malloc. */
+ pfn_to_mfn_table = calloc(1, 4 * nr_pfns);
+ pfn_type = calloc(1, 4 * nr_pfns);
+ region_mfn = calloc(1, 4 * MAX_BATCH_SIZE);
+
+ if ( (pfn_to_mfn_table == NULL) || (pfn_type == NULL) ||
+ (region_mfn == NULL) ) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if ( mlock(region_mfn, 4 * MAX_BATCH_SIZE ) ) {
+ xcio_error(ioctxt, "Could not mlock region_mfn");
+ goto out;
+ }
+
+ /* Set the domain's name to that from the restore file */
+ if ( xc_domain_setname( xc_handle, dom, name ) ) {
+ xcio_error(ioctxt, "Could not set domain name");
+ goto out;
+ }
+
+ /* Set the domain's initial memory allocation
+ to that from the restore file */
+
+ if ( xc_domain_setinitialmem(xc_handle, dom,
+ nr_pfns * (PAGE_SIZE / 1024)) )
+ {
+ xcio_error(ioctxt, "Could not set domain initial memory");
+ goto out;
+ }
+
+ /* Get the domain's shared-info frame. */
+ op.cmd = DOM0_GETDOMAININFO;
+ op.u.getdomaininfo.domain = (domid_t)dom;
+ op.u.getdomaininfo.ctxt = NULL;
+ if ( do_dom0_op(xc_handle, &op) < 0 ) {
+ xcio_error(ioctxt, "Could not get information on new domain");
+ goto out;
+ }
+ shared_info_frame = op.u.getdomaininfo.shared_info_frame;
+
+ if ( (pm_handle = init_pfn_mapper((domid_t)dom)) < 0 )
+ goto out;
+
+
+
+ /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */
+ if ( get_pfn_list(xc_handle, dom, pfn_to_mfn_table, nr_pfns) != nr_pfns ) {
+ xcio_error(ioctxt, "Did not read correct number of frame numbers for new dom");
+ goto out;
+ }
+
+ if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL ) {
+ xcio_error(ioctxt, "Could not initialise for MMU updates");
+ goto out;
+ }
+
+ xcio_info(ioctxt, "Reloading memory pages: 0%%");
+
+ /*
+ * Now simply read each saved frame into its new machine frame.
+ * We uncanonicalise page tables as we go.
+ */
+ prev_pc = 0;
+
+ n=0;
+ while(1) {
+ int j;
+ unsigned long region_pfn_type[MAX_BATCH_SIZE];
+
+ this_pc = (n * 100) / nr_pfns;
+ if ( (this_pc - prev_pc) >= 5 ) {
+ xcio_info(ioctxt, "\b\b\b\b%3d%%", this_pc);
+ prev_pc = this_pc;
+ }
+
+ if ( xcio_read(ioctxt, &j, sizeof(int)) ) {
+ xcio_error(ioctxt, "Error when reading from state file");
+ goto out;
+ }
+
+ DPRINTF("batch %d\n",j);
+
+ if ( j == -1 ) {
+ verify = 1;
+ printf("Entering page verify mode\n");
+ continue;
+ }
+
+ if ( j == 0 ) break; /* our work here is done */
+
+ if( j > MAX_BATCH_SIZE ) {
+ xcio_error(ioctxt, "Max batch size exceeded. Giving up.");
+ goto out;
+ }
+
+ if ( xcio_read(ioctxt, region_pfn_type, j*sizeof(unsigned long)) ) {
+ xcio_error(ioctxt, "Error when reading from state file");
+ goto out;
+ }
+
+ for(i=0; i<j; i++) {
+ if ( (region_pfn_type[i] & LTAB_MASK) == XTAB) {
+ region_mfn[i] = 0; /* we know map will fail, but don't care */
+ } else {
+ pfn = region_pfn_type[i] & ~LTAB_MASK;
+ region_mfn[i] = pfn_to_mfn_table[pfn];
+ }
+ }
+
+ if ( (region_base = mfn_mapper_map_batch( xc_handle, dom,
+ PROT_WRITE,
+ region_mfn,
+ j )) == 0) {
+ xcio_error(ioctxt, "map batch failed");
+ goto out;
+ }
+
+ for(i=0;i<j;i++) {
+ unsigned long *ppage;
+
+ pfn = region_pfn_type[i] & ~LTAB_MASK;
+
+ if ( (region_pfn_type[i] & LTAB_MASK) == XTAB) continue;
+
+ if (pfn>nr_pfns) {
+ xcio_error(ioctxt, "pfn out of range");
+ goto out;
+ }
+
+ region_pfn_type[i] &= LTAB_MASK;
+
+ pfn_type[pfn] = region_pfn_type[i];
+
+ mfn = pfn_to_mfn_table[pfn];
+
+ if ( verify ) {
+ ppage = (unsigned long*) buf; /* debug case */
+ } else {
+ ppage = (unsigned long*) (region_base + i*PAGE_SIZE);
+ }
+
+ if ( xcio_read(ioctxt, ppage, PAGE_SIZE) ) {
+ xcio_error(ioctxt, "Error when reading from state file");
+ goto out;
+ }
+
+ switch( region_pfn_type[i] ) {
+ case 0:
+ break;
+
+ case L1TAB:
+ {
+ for ( k = 0; k < 1024; k++ ) {
+ if ( ppage[k] & _PAGE_PRESENT ) {
+ xpfn = ppage[k] >> PAGE_SHIFT;
+
+ if ( xpfn >= nr_pfns ) {
+ xcio_error(ioctxt, "Frame number in type %lu page table is "
+ "out of range. i=%d k=%d pfn=0x%lx "
+ "nr_pfns=%lu", region_pfn_type[i]>>28, i,
+ k, xpfn, nr_pfns);
+ goto out;
+ }
+
+ ppage[k] &= (PAGE_SIZE - 1) &
+ ~(_PAGE_GLOBAL | _PAGE_PAT);
+ ppage[k] |= pfn_to_mfn_table[xpfn] << PAGE_SHIFT;
+ }
+ }
+ }
+ break;
+
+ case L2TAB:
+ {
+ for ( k = 0;
+ k < (HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT);
+ k++ ) {
+ if ( ppage[k] & _PAGE_PRESENT ) {
+ xpfn = ppage[k] >> PAGE_SHIFT;
+
+ if ( xpfn >= nr_pfns ) {
+ xcio_error(ioctxt, "Frame number in type %lu page table is "
+ "out of range. i=%d k=%d pfn=%lu nr_pfns=%lu",
+ region_pfn_type[i]>>28, i, k, xpfn, nr_pfns);
+
+ goto out;
+ }
+
+ ppage[k] &= (PAGE_SIZE - 1) &
+ ~(_PAGE_GLOBAL | _PAGE_PSE);
+ ppage[k] |= pfn_to_mfn_table[xpfn] << PAGE_SHIFT;
+ }
+ }
+ }
+ break;
+
+ default:
+ xcio_error(ioctxt, "Bogus page type %lx page table is out of range."
+ " i=%d nr_pfns=%lu", region_pfn_type[i], i, nr_pfns);
+ goto out;
+
+ } /* end of page type switch statement */
+
+ if ( verify ) {
+ int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE );
+ if (res) {
+ int v;
+ printf("************** pfn=%lx type=%lx gotcs=%08lx "
+ "actualcs=%08lx\n", pfn, pfn_type[pfn],
+ csum_page(region_base + i*PAGE_SIZE),
+ csum_page(buf));
+ for ( v = 0; v < 4; v++ ) {
+ unsigned long *p = (unsigned long *)
+ (region_base + i*PAGE_SIZE);
+ if ( buf[v] != p[v] )
+ printf(" %d: %08lx %08lx\n",
+ v, buf[v], p[v] );
+ }
+ }
+ }
+
+ if ( add_mmu_update(xc_handle, mmu,
+ (mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) ) {
+ printf("machpys mfn=%ld pfn=%ld\n",mfn,pfn);
+ goto out;
+ }
+
+ } /* end of 'batch' for loop */
+
+ munmap( region_base, j*PAGE_SIZE );
+ n+=j; /* crude stats */
+ }
+
+ printf("Received all pages\n");
+
+ DPRINTF("Received all pages\n");
+
+ /*
+ * Pin page tables. Do this after writing to them as otherwise Xen
+ * will barf when doing the type-checking.
+ */
+ for ( i = 0; i < nr_pfns; i++ ) {
+ if ( pfn_type[i] == L1TAB ) {
+ if ( add_mmu_update(xc_handle, mmu,
+ (pfn_to_mfn_table[i]<<PAGE_SHIFT) |
+ MMU_EXTENDED_COMMAND,
+ MMUEXT_PIN_L1_TABLE) ) {
+ printf("ERR pin L1 pfn=%lx mfn=%lx\n",
+ (unsigned long)i, pfn_to_mfn_table[i]);
+ goto out;
+ }
+ } else if ( pfn_type[i] == L2TAB ) {
+ if ( add_mmu_update(xc_handle, mmu,
+ (pfn_to_mfn_table[i]<<PAGE_SHIFT) |
+ MMU_EXTENDED_COMMAND,
+ MMUEXT_PIN_L2_TABLE) ) {
+ printf("ERR pin L2 pfn=%lx mfn=%lx\n",
+ (unsigned long)i, pfn_to_mfn_table[i]);
+ goto out;
+ }
+ }
+ }
+
+ if ( finish_mmu_updates(xc_handle, mmu) ) goto out;
+
+ xcio_info(ioctxt, "\b\b\b\b100%%\nMemory reloaded.\n");
+
+
+ if ( xcio_read(ioctxt, &ctxt, sizeof(ctxt)) ||
+ xcio_read(ioctxt, shared_info, PAGE_SIZE) ) {
+ xcio_error(ioctxt, "Error when reading from state file");
+ goto out;
+ }
+
+ /* Uncanonicalise the suspend-record frame number and poke resume rec. */
+ pfn = ctxt.cpu_ctxt.esi;
+ if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) )
+ {
+ xcio_error(ioctxt, "Suspend record frame number is bad");
+ goto out;
+ }
+ ctxt.cpu_ctxt.esi = mfn = pfn_to_mfn_table[pfn];
+ p_srec = map_pfn_writeable(pm_handle, mfn);
+ p_srec->resume_info.nr_pages = nr_pfns;
+ p_srec->resume_info.shared_info = shared_info_frame << PAGE_SHIFT;
+ p_srec->resume_info.flags = 0;
+ unmap_pfn(pm_handle, p_srec);
+
+ /* Uncanonicalise each GDT frame number. */
+ if ( ctxt.gdt_ents > 8192 ) {
+ xcio_error(ioctxt, "GDT entry count out of range");
+ goto out;
+ }
+ for ( i = 0; i < ctxt.gdt_ents; i += 512 ) {
+ pfn = ctxt.gdt_frames[i];
+ if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) ) {
+ xcio_error(ioctxt, "GDT frame number is bad");
+ goto out;
+ }
+ ctxt.gdt_frames[i] = pfn_to_mfn_table[pfn];
+ }
+
+ /* Uncanonicalise the page table base pointer. */
+ pfn = ctxt.pt_base >> PAGE_SHIFT;
+ if ( (pfn >= nr_pfns) || (pfn_type[pfn] != L2TAB) ) {
+ printf("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx\n",
+ pfn, nr_pfns, pfn_type[pfn], (unsigned long)L2TAB);
+ xcio_error(ioctxt, "PT base is bad.");
+ goto out;
+ }
+ ctxt.pt_base = pfn_to_mfn_table[pfn] << PAGE_SHIFT;
+
+
+ /* clear any pending events and the selector */
+ memset( &(((shared_info_t *)shared_info)->evtchn_pending[0]),
+ 0, sizeof (((shared_info_t *)shared_info)->evtchn_pending)+
+ sizeof(((shared_info_t *)shared_info)->evtchn_pending_sel) );
+
+ /* Copy saved contents of shared-info page. No checking needed. */
+ ppage = map_pfn_writeable(pm_handle, shared_info_frame);
+ memcpy(ppage, shared_info, sizeof(shared_info_t));
+ unmap_pfn(pm_handle, ppage);
+
+
+ /* Uncanonicalise the pfn-to-mfn table frame-number list. */
+ for ( i = 0; i < (nr_pfns+1023)/1024; i++ ) {
+ unsigned long pfn, mfn;
+
+ pfn = pfn_to_mfn_frame_list[i];
+ if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) ) {
+ xcio_error(ioctxt, "PFN-to-MFN frame number is bad");
+ goto out;
+ }
+ mfn = pfn_to_mfn_table[pfn];
+ pfn_to_mfn_frame_list[i] = mfn;
+ }
+
+ if ( (live_pfn_to_mfn_table =
+ mfn_mapper_map_batch(xc_handle, dom,
+ PROT_WRITE,
+ pfn_to_mfn_frame_list,
+ (nr_pfns+1023)/1024 )) == 0 ) {
+ xcio_error(ioctxt, "Couldn't map pfn_to_mfn table");
+ goto out;
+ }
+
+ memcpy( live_pfn_to_mfn_table, pfn_to_mfn_table,
+ nr_pfns*sizeof(unsigned long) );
+
+ munmap( live_pfn_to_mfn_table, ((nr_pfns+1023)/1024)*PAGE_SIZE );
+
+ /*
+ * Safety checking of saved context:
+ * 1. cpu_ctxt is fine, as Xen checks that on context switch.
+ * 2. fpu_ctxt is fine, as it can't hurt Xen.
+ * 3. trap_ctxt needs the code selectors checked.
+ * 4. fast_trap_idx is checked by Xen.
+ * 5. ldt base must be page-aligned, no more than 8192 ents, ...
+ * 6. gdt already done, and further checking is done by Xen.
+ * 7. check that guestos_ss is safe.
+ * 8. pt_base is already done.
+ * 9. debugregs are checked by Xen.
+ * 10. callback code selectors need checking.
+ */
+ for ( i = 0; i < 256; i++ ) {
+ ctxt.trap_ctxt[i].vector = i;
+ if ( (ctxt.trap_ctxt[i].cs & 3) == 0 )
+ ctxt.trap_ctxt[i].cs = FLAT_GUESTOS_CS;
+ }
+ if ( (ctxt.guestos_ss & 3) == 0 ){
+ ctxt.guestos_ss = FLAT_GUESTOS_DS;
+ }
+ if ( (ctxt.event_callback_cs & 3) == 0 ){
+ ctxt.event_callback_cs = FLAT_GUESTOS_CS;
+ }
+ if ( (ctxt.failsafe_callback_cs & 3) == 0 ){
+ ctxt.failsafe_callback_cs = FLAT_GUESTOS_CS;
+ }
+ if ( ((ctxt.ldt_base & (PAGE_SIZE - 1)) != 0) ||
+ (ctxt.ldt_ents > 8192) ||
+ (ctxt.ldt_base > HYPERVISOR_VIRT_START) ||
+ ((ctxt.ldt_base + ctxt.ldt_ents*8) > HYPERVISOR_VIRT_START) )
+ {
+ xcio_error(ioctxt, "Bad LDT base or size");
+ goto out;
+ }
+
+ op.cmd = DOM0_BUILDDOMAIN;
+ op.u.builddomain.domain = (domid_t)dom;
+ op.u.builddomain.ctxt = &ctxt;
+ rc = do_dom0_op(xc_handle, &op);
+
+ /* don't start the domain as we have console etc to set up */
+
+ if( rc == 0 ) {
+ /* Success: print the domain id. */
+ xcio_info(ioctxt, "DOM=%lu\n", dom);
+ return 0;
+ }
+
+
+ out:
+ if ( (rc != 0) && (dom != 0) ){
+ xc_domain_destroy(xc_handle, dom);
+ }
+ if ( mmu != NULL ){
+ free(mmu);
+ }
+ if ( pm_handle >= 0 ){
+ (void)close_pfn_mapper(pm_handle);
+ }
+ if ( pfn_to_mfn_table != NULL ){
+ free(pfn_to_mfn_table);
+ }
+ if ( pfn_type != NULL ){
+ free(pfn_type);
+ }
+
+ if ( rc == 0 ){
+ ioctxt->domain = dom;
+ }
+ DPRINTF("Restore exit with rc=%d\n",rc);
+ return rc;
+}