diff options
Diffstat (limited to 'tools/libxc/xc_linux_save.c')
-rw-r--r-- | tools/libxc/xc_linux_save.c | 840 |
1 files changed, 840 insertions, 0 deletions
diff --git a/tools/libxc/xc_linux_save.c b/tools/libxc/xc_linux_save.c new file mode 100644 index 0000000000..c74e209bdc --- /dev/null +++ b/tools/libxc/xc_linux_save.c @@ -0,0 +1,840 @@ +/****************************************************************************** + * xc_linux_save.c + * + * Save the state of a running Linux session. + * + * Copyright (c) 2003, K A Fraser. + */ + +#include <sys/time.h> +#include "xc_private.h" +#include <asm-xen/suspend.h> + +#define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */ + +#define DEBUG 0 +#define DDEBUG 0 + +#if DEBUG +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + +#if DDEBUG +#define DDPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DDPRINTF(_f, _a...) ((void)0) +#endif + +/* + * Returns TRUE if the given machine frame number has a unique mapping + * in the guest's pseudophysical map. + * 0x80000000-3 mark the shared_info, and blk/net rings + */ +#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \ + (((_mfn) < (1024*1024)) && \ + (((live_mfn_to_pfn_table[_mfn] < nr_pfns) && \ + (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn))) || \ + ((live_mfn_to_pfn_table[_mfn] >= 0x80000000) && \ + (live_mfn_to_pfn_table[_mfn] <= 0x80000003)) || \ + (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == 0x80000004))) + +/* Returns TRUE if MFN is successfully converted to a PFN. */ +#define translate_mfn_to_pfn(_pmfn) \ +({ \ + unsigned long mfn = *(_pmfn); \ + int _res = 1; \ + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \ + _res = 0; \ + else \ + *(_pmfn) = live_mfn_to_pfn_table[mfn]; \ + _res; \ +}) + +static inline int test_bit ( int nr, volatile void * addr) +{ + return (((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >> + (nr % (sizeof(unsigned long)*8))) & 1; +} + +static inline void clear_bit ( int nr, volatile void * addr) +{ + ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] &= + ~(1 << (nr % (sizeof(unsigned long)*8) ) ); +} + +static inline void set_bit ( int nr, volatile void * addr) +{ + ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] |= + (1 << (nr % (sizeof(unsigned long)*8) ) ); +} + +/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */ +static inline unsigned int hweight32(unsigned int w) +{ + unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F); + res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF); + return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF); +} + +static inline int count_bits ( int nr, volatile void *addr) +{ + int i, count = 0; + unsigned long *p = (unsigned long *)addr; + /* We know that the array is padded to unsigned long. */ + for(i=0;i<nr/(sizeof(unsigned long)*8);i++,p++) + count += hweight32( *p ); + return count; +} + +static inline int permute( int i, int nr, int order_nr ) +{ + /* Need a simple permutation function so that we scan pages in a + pseudo random order, enabling us to get a better estimate of + the domain's page dirtying rate as we go (there are often + contiguous ranges of pfns that have similar behaviour, and we + want to mix them up. */ + + /* e.g. nr->oder 15->4 16->4 17->5 */ + /* 512MB domain, 128k pages, order 17 */ + + /* + QPONMLKJIHGFEDCBA + QPONMLKJIH + GFEDCBA + */ + + /* + QPONMLKJIHGFEDCBA + EDCBA + QPONM + LKJIHGF + */ + + do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); } + while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */ + + return i; +} + +static long long tv_to_us( struct timeval *new ) +{ + return (new->tv_sec * 1000000) + new->tv_usec; +} + +static long long llgettimeofday() +{ + struct timeval now; + gettimeofday(&now, NULL); + return tv_to_us(&now); +} + +static long long tv_delta( struct timeval *new, struct timeval *old ) +{ + return ((new->tv_sec - old->tv_sec)*1000000 ) + + (new->tv_usec - old->tv_usec); +} + +static int print_stats( int xc_handle, u32 domid, + int pages_sent, xc_shadow_control_stats_t *stats, + int print ) +{ + static struct timeval wall_last; + static long long d0_cpu_last; + static long long d1_cpu_last; + + struct timeval wall_now; + long long wall_delta; + long long d0_cpu_now, d0_cpu_delta; + long long d1_cpu_now, d1_cpu_delta; + + gettimeofday(&wall_now, NULL); + + d0_cpu_now = xc_domain_get_cpu_usage( xc_handle, 0 )/1000; + d1_cpu_now = xc_domain_get_cpu_usage( xc_handle, domid )/1000; + + if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) + printf("ARRHHH!!\n"); + + wall_delta = tv_delta(&wall_now,&wall_last)/1000; + + if ( wall_delta == 0 ) wall_delta = 1; + + d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000; + d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000; + + if ( print ) + printf("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, " + "dirtied %dMb/s\n", + wall_delta, + (int)((d0_cpu_delta*100)/wall_delta), + (int)((d1_cpu_delta*100)/wall_delta), + (int)((pages_sent*PAGE_SIZE*8)/(wall_delta*1000)), + (int)((stats->dirty_count*PAGE_SIZE*8)/(wall_delta*1000))); + + d0_cpu_last = d0_cpu_now; + d1_cpu_last = d1_cpu_now; + wall_last = wall_now; + + return 0; +} + +/** Write the vmconfig string. + * It is stored as a 4-byte count 'n' followed by n bytes. + * + * @param ioctxt i/o context + * @return 0 on success, non-zero on error. + */ +static int write_vmconfig(XcIOContext *ioctxt){ + int err = -1; + if(xcio_write(ioctxt, &ioctxt->vmconfig_n, sizeof(ioctxt->vmconfig_n))) goto exit; + if(xcio_write(ioctxt, ioctxt->vmconfig, ioctxt->vmconfig_n)) goto exit; + err = 0; + exit: + return err; +} + +static int analysis_phase( int xc_handle, u32 domid, + int nr_pfns, unsigned long *arr ) +{ + long long start, now; + xc_shadow_control_stats_t stats; + + start = llgettimeofday(); + + while ( 0 ) + { + int i; + + xc_shadow_control( xc_handle, domid, + DOM0_SHADOW_CONTROL_OP_CLEAN2, + arr, nr_pfns, NULL); + printf("#Flush\n"); + for ( i = 0; i < 100; i++ ) + { + usleep(10000); + now = llgettimeofday(); + xc_shadow_control( xc_handle, domid, + DOM0_SHADOW_CONTROL_OP_PEEK, + NULL, 0, &stats); + + printf("now= %lld faults= %ld dirty= %ld dirty_net= %ld " + "dirty_block= %ld\n", + ((now-start)+500)/1000, + stats.fault_count, stats.dirty_count, + stats.dirty_net_count, stats.dirty_block_count); + } + } + + return -1; +} + +int xc_linux_save(int xc_handle, XcIOContext *ioctxt) +{ + dom0_op_t op; + int rc = 1, i, j, k, last_iter, iter = 0; + unsigned long mfn; + u32 domid = ioctxt->domain; + int live = (ioctxt->flags & XCFLAGS_LIVE); + int debug = (ioctxt->flags & XCFLAGS_DEBUG); + int sent_last_iter, skip_this_iter; + + /* Important tuning parameters */ + int max_iters = 29; /* limit us to 30 times round loop */ + int max_factor = 3; /* never send more than 3x nr_pfns */ + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + + /* A copy of the CPU context of the guest. */ + full_execution_context_t ctxt; + + /* A copy of the domain's name. */ + char name[MAX_DOMAIN_NAME]; + + /* A table containg the type of each PFN (/not/ MFN!). */ + unsigned long *pfn_type = NULL; + unsigned long *pfn_batch = NULL; + + /* A temporary mapping, and a copy, of one frame of guest memory. */ + unsigned long page[1024]; + + /* A copy of the pfn-to-mfn table frame list. */ + unsigned long *live_pfn_to_mfn_frame_list; + unsigned long pfn_to_mfn_frame_list[1024]; + + /* Live mapping of the table mapping each PFN to its current MFN. */ + unsigned long *live_pfn_to_mfn_table = NULL; + /* Live mapping of system MFN to PFN table. */ + unsigned long *live_mfn_to_pfn_table = NULL; + + /* Live mapping of shared info structure */ + unsigned long *live_shinfo; + + /* base of the region in which domain memory is mapped */ + unsigned char *region_base = NULL; + + /* A temporary mapping, and a copy, of the guest's suspend record. */ + suspend_record_t *p_srec; + + /* number of pages we're dealing with */ + unsigned long nr_pfns; + + /* power of 2 order of nr_pfns */ + int order_nr; + + /* bitmap of pages: + - that should be sent this iteration (unless later marked as skip); + - to skip this iteration because already dirty; + - to fixup by sending at the end if not already resent; */ + unsigned long *to_send, *to_skip, *to_fix; + + xc_shadow_control_stats_t stats; + + int needed_to_fix = 0; + int total_sent = 0; + + if (mlock(&ctxt, sizeof(ctxt))) { + xcio_perror(ioctxt, "Unable to mlock ctxt"); + return 1; + } + + /* Ensure that the domain exists, and that it is stopped. */ + if ( xc_domain_pause(xc_handle, domid) ){ + xcio_perror(ioctxt, "Could not pause domain"); + goto out; + } + + if ( xc_domain_getfullinfo( xc_handle, domid, &op, &ctxt) ) + { + xcio_error(ioctxt, "Could not get full domain info"); + goto out; + } + memcpy(name, op.u.getdomaininfo.name, sizeof(name)); + shared_info_frame = op.u.getdomaininfo.shared_info_frame; + + /* A cheesy test to see whether the domain contains valid state. */ + if ( ctxt.pt_base == 0 ){ + xcio_error(ioctxt, "Domain is not in a valid Linux guest OS state"); + goto out; + } + + /* Map the suspend-record MFN to pin it. The page must be owned by + domid for this to succeed. */ + p_srec = mfn_mapper_map_single(xc_handle, domid, + sizeof(*p_srec), PROT_READ, + ctxt.cpu_ctxt.esi); + if (!p_srec){ + xcio_error(ioctxt, "Couldn't map state record"); + goto out; + } + + nr_pfns = p_srec->nr_pfns; + + /* cheesy sanity check */ + if ( nr_pfns > 1024*1024 ){ + xcio_error(ioctxt, "Invalid state record -- pfn count out of range: %lu", nr_pfns); + goto out; + } + + /* the pfn_to_mfn_frame_list fits in a single page */ + live_pfn_to_mfn_frame_list = + mfn_mapper_map_single(xc_handle, domid, + PAGE_SIZE, PROT_READ, + p_srec->pfn_to_mfn_frame_list ); + + if (!live_pfn_to_mfn_frame_list){ + xcio_error(ioctxt, "Couldn't map pfn_to_mfn_frame_list"); + goto out; + } + + /* Track the mfn_to_pfn table down from the domains PT */ + { + unsigned long *pgd; + unsigned long mfn_to_pfn_table_start_mfn; + + pgd = mfn_mapper_map_single(xc_handle, domid, + PAGE_SIZE, PROT_READ, + ctxt.pt_base>>PAGE_SHIFT); + + mfn_to_pfn_table_start_mfn = + pgd[HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT]>>PAGE_SHIFT; + + live_mfn_to_pfn_table = + mfn_mapper_map_single(xc_handle, ~0UL, + PAGE_SIZE*1024, PROT_READ, + mfn_to_pfn_table_start_mfn ); + } + + /* Map all the frames of the pfn->mfn table. For migrate to succeed, + the guest must not change which frames are used for this purpose. + (its not clear why it would want to change them, and we'll be OK + from a safety POV anyhow. */ + + live_pfn_to_mfn_table = mfn_mapper_map_batch(xc_handle, domid, + PROT_READ, + live_pfn_to_mfn_frame_list, + (nr_pfns+1023)/1024 ); + if( !live_pfn_to_mfn_table ){ + xcio_perror(ioctxt, "Couldn't map pfn_to_mfn table"); + goto out; + } + + + /* Canonicalise the pfn-to-mfn table frame-number list. */ + memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE ); + for ( i = 0; i < nr_pfns; i += 1024 ){ + if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ){ + xcio_error(ioctxt, "Frame # in pfn-to-mfn frame list is not in pseudophys"); + goto out; + } + } + + /* At this point, we can start the domain again if we're doing a + live suspend */ + + if( live ){ + if ( xc_shadow_control( xc_handle, domid, + DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL ) < 0 ) { + xcio_error(ioctxt, "Couldn't enable shadow mode"); + goto out; + } + + if ( xc_domain_unpause(xc_handle, domid) < 0 ){ + xcio_error(ioctxt, "Couldn't unpause domain"); + goto out; + } + + last_iter = 0; + sent_last_iter = 1<<20; /* 4GB of pages */ + } else{ + last_iter = 1; + } + + /* calculate the power of 2 order of nr_pfns, e.g. + 15->4 16->4 17->5 */ + for( i=nr_pfns-1, order_nr=0; i ; i>>=1, order_nr++ ); + + /* Setup to_send bitmap */ + { + int sz = (nr_pfns/8) + 8; /* includes slop at end of array */ + + to_send = malloc( sz ); + to_fix = calloc( 1, sz ); + to_skip = malloc( sz ); + + if (!to_send || !to_fix || !to_skip){ + xcio_error(ioctxt, "Couldn't allocate to_send array"); + goto out; + } + + memset( to_send, 0xff, sz ); + + if ( mlock( to_send, sz ) ){ + xcio_perror(ioctxt, "Unable to mlock to_send"); + return 1; + } + + /* (to fix is local only) */ + + if ( mlock( to_skip, sz ) ){ + xcio_perror(ioctxt, "Unable to mlock to_skip"); + return 1; + } + + } + + analysis_phase( xc_handle, domid, nr_pfns, to_skip ); + + /* We want zeroed memory so use calloc rather than malloc. */ + pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long)); + pfn_batch = calloc(BATCH_SIZE, sizeof(unsigned long)); + + if ( (pfn_type == NULL) || (pfn_batch == NULL) ){ + errno = ENOMEM; + goto out; + } + + if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) ){ + xcio_error(ioctxt, "Unable to mlock"); + goto out; + } + + + /* + * Quick belt and braces sanity check. + */ +#if DEBUG + for ( i = 0; i < nr_pfns; i++ ){ + mfn = live_pfn_to_mfn_table[i]; + + if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0x80000004) ) + printf("i=0x%x mfn=%x live_mfn_to_pfn_table=%x\n", + i,mfn,live_mfn_to_pfn_table[mfn]); + } +#endif + + /* Map the shared info frame */ + live_shinfo = mfn_mapper_map_single(xc_handle, domid, + PAGE_SIZE, PROT_READ, + shared_info_frame); + + if (!live_shinfo){ + xcio_error(ioctxt, "Couldn't map live_shinfo"); + goto out; + } + + /* Start writing out the saved-domain record. */ + + if ( xcio_write(ioctxt, "LinuxGuestRecord", 16) || + xcio_write(ioctxt, name, sizeof(name)) || + xcio_write(ioctxt, &nr_pfns, sizeof(unsigned long)) || + xcio_write(ioctxt, pfn_to_mfn_frame_list, PAGE_SIZE) ){ + xcio_error(ioctxt, "Error writing header"); + goto out; + } + if(write_vmconfig(ioctxt)){ + xcio_error(ioctxt, "Error writing vmconfig"); + goto out; + } + + print_stats( xc_handle, domid, 0, &stats, 0 ); + + /* Now write out each data page, canonicalising page tables as we go... */ + + while(1){ + unsigned int prev_pc, sent_this_iter, N, batch; + + iter++; + sent_this_iter = 0; + skip_this_iter = 0; + prev_pc = 0; + N=0; + + xcio_info(ioctxt, "Saving memory pages: iter %d 0%%", iter); + + while( N < nr_pfns ){ + unsigned int this_pc = (N * 100) / nr_pfns; + + if ( (this_pc - prev_pc) >= 5 ){ + xcio_info(ioctxt, "\b\b\b\b%3d%%", this_pc); + prev_pc = this_pc; + } + + /* slightly wasteful to peek the whole array evey time, + but this is fast enough for the moment. */ + + if ( !last_iter && + xc_shadow_control(xc_handle, domid, + DOM0_SHADOW_CONTROL_OP_PEEK, + to_skip, nr_pfns, NULL) != nr_pfns ) { + xcio_error(ioctxt, "Error peeking shadow bitmap"); + goto out; + } + + + /* load pfn_type[] with the mfn of all the pages we're doing in + this batch. */ + + for ( batch = 0; batch < BATCH_SIZE && N < nr_pfns ; N++ ) + { + int n = permute(N, nr_pfns, order_nr ); + + if ( 0 && debug ) { + fprintf(stderr,"%d pfn= %08lx mfn= %08lx %d " + " [mfn]= %08lx\n", + iter, (unsigned long)n, live_pfn_to_mfn_table[n], + test_bit(n,to_send), + live_mfn_to_pfn_table[live_pfn_to_mfn_table[n]& + 0xFFFFF]); + } + + if ( !last_iter && + test_bit(n, to_send) && + test_bit(n, to_skip) ) { + skip_this_iter++; /* stats keeping */ + } + + if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) || + (test_bit(n, to_send) && last_iter) || + (test_bit(n, to_fix) && last_iter)) ) { + continue; + } + + /* we get here if: + 1. page is marked to_send & hasn't already been re-dirtied + 2. (ignore to_skip in last iteration) + 3. add in pages that still need fixup (net bufs) + */ + + pfn_batch[batch] = n; + pfn_type[batch] = live_pfn_to_mfn_table[n]; + + if( pfn_type[batch] == 0x80000004 ){ + /* not currently in pusedo-physical map -- set bit + in to_fix that we must send this page in last_iter + unless its sent sooner anyhow */ + + set_bit( n, to_fix ); + if( iter>1 ) + DDPRINTF("netbuf race: iter %d, pfn %lx. mfn %lx\n", + iter,n,pfn_type[batch]); + continue; + } + + if ( last_iter && + test_bit(n, to_fix) && + !test_bit(n, to_send) ) + { + needed_to_fix++; + DPRINTF("Fix! iter %d, pfn %lx. mfn %lx\n", + iter,n,pfn_type[batch]); + } + + clear_bit(n, to_fix); + + batch++; + } + + DDPRINTF("batch %d:%d (n=%d)\n", iter, batch, n); + + if ( batch == 0 ) + goto skip; /* vanishingly unlikely... */ + + if ( (region_base = mfn_mapper_map_batch(xc_handle, domid, + PROT_READ, + pfn_type, + batch)) == 0 ){ + xcio_perror(ioctxt, "map batch failed"); + goto out; + } + + if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) ){ + xcio_error(ioctxt, "get_pfn_type_batch failed"); + goto out; + } + + for ( j = 0; j < batch; j++ ){ + if ( (pfn_type[j] & LTAB_MASK) == XTAB ){ + DDPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]); + continue; + } + + if ( 0 && debug ) + fprintf(stderr, "%d pfn= %08lx mfn= %08lx [mfn]= %08lx" + " sum= %08lx\n", + iter, + (pfn_type[j] & LTAB_MASK) | pfn_batch[j], + pfn_type[j], + live_mfn_to_pfn_table[pfn_type[j]&(~LTAB_MASK)], + csum_page(region_base + (PAGE_SIZE*j))); + + /* canonicalise mfn->pfn */ + pfn_type[j] = (pfn_type[j] & LTAB_MASK) | pfn_batch[j]; + } + + if ( xcio_write(ioctxt, &batch, sizeof(int) ) ){ + xcio_error(ioctxt, "Error when writing to state file (2)"); + goto out; + } + + if ( xcio_write(ioctxt, pfn_type, sizeof(unsigned long)*j ) ){ + xcio_error(ioctxt, "Error when writing to state file (3)"); + goto out; + } + + /* entering this loop, pfn_type is now in pfns (Not mfns) */ + for( j = 0; j < batch; j++ ){ + /* write out pages in batch */ + if( (pfn_type[j] & LTAB_MASK) == XTAB){ + DDPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]); + continue; + } + + if ( ((pfn_type[j] & LTAB_MASK) == L1TAB) || + ((pfn_type[j] & LTAB_MASK) == L2TAB) ){ + memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE); + + for ( k = 0; + k < (((pfn_type[j] & LTAB_MASK) == L2TAB) ? + (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : + 1024); + k++ ){ + unsigned long pfn; + + if ( !(page[k] & _PAGE_PRESENT) ) + continue; + + mfn = page[k] >> PAGE_SHIFT; + pfn = live_mfn_to_pfn_table[mfn]; + + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) + { + /* I don't think this should ever happen */ + printf("FNI %d : [%08lx,%d] pte=%08lx, " + "mfn=%08lx, pfn=%08lx [mfn]=%08lx\n", + j, pfn_type[j], k, + page[k], mfn, live_mfn_to_pfn_table[mfn], + (live_mfn_to_pfn_table[mfn]<nr_pfns)? + live_pfn_to_mfn_table[ + live_mfn_to_pfn_table[mfn]] : + 0xdeadbeef); + + pfn = 0; /* be suspicious */ + } + + page[k] &= PAGE_SIZE - 1; + page[k] |= pfn << PAGE_SHIFT; + +#if 0 + printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx " + "xpfn=%d\n", + pfn_type[j]>>28, + j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT); +#endif + + } /* end of page table rewrite for loop */ + + if ( xcio_write(ioctxt, page, PAGE_SIZE) ){ + xcio_error(ioctxt, "Error when writing to state file (4)"); + goto out; + } + + } /* end of it's a PT page */ else { /* normal page */ + + if ( xcio_write(ioctxt, region_base + (PAGE_SIZE*j), + PAGE_SIZE) ){ + xcio_error(ioctxt, "Error when writing to state file (5)"); + goto out; + } + } + } /* end of the write out for this batch */ + + sent_this_iter += batch; + + } /* end of this while loop for this iteration */ + + munmap(region_base, batch*PAGE_SIZE); + + skip: + + total_sent += sent_this_iter; + + xcio_info(ioctxt, "\r %d: sent %d, skipped %d, ", + iter, sent_this_iter, skip_this_iter ); + + if ( last_iter ) { + print_stats( xc_handle, domid, sent_this_iter, &stats, 1); + + xcio_info(ioctxt, "Total pages sent= %d (%.2fx)\n", + total_sent, ((float)total_sent)/nr_pfns ); + xcio_info(ioctxt, "(of which %d were fixups)\n", needed_to_fix ); + } + + if (last_iter && debug){ + int minusone = -1; + memset( to_send, 0xff, (nr_pfns+8)/8 ); + debug = 0; + printf("Entering debug resend-all mode\n"); + + /* send "-1" to put receiver into debug mode */ + if ( xcio_write(ioctxt, &minusone, sizeof(int)) ) + { + xcio_error(ioctxt, "Error when writing to state file (6)"); + goto out; + } + + continue; + } + + if ( last_iter ) break; + + if ( live ) + { + if ( + /* ( sent_this_iter > (sent_last_iter * 0.95) ) || */ + (iter >= max_iters) || + (sent_this_iter+skip_this_iter < 50) || + (total_sent > nr_pfns*max_factor) ) + { + DPRINTF("Start last iteration\n"); + last_iter = 1; + + xc_domain_pause( xc_handle, domid ); + } + + if ( xc_shadow_control( xc_handle, domid, + DOM0_SHADOW_CONTROL_OP_CLEAN2, + to_send, nr_pfns, &stats ) != nr_pfns ) + { + xcio_error(ioctxt, "Error flushing shadow PT"); + goto out; + } + + sent_last_iter = sent_this_iter; + + print_stats( xc_handle, domid, sent_this_iter, &stats, 1); + + } + + + } /* end of while 1 */ + + DPRINTF("All memory is saved\n"); + + /* Success! */ + rc = 0; + + /* Zero terminate */ + if ( xcio_write(ioctxt, &rc, sizeof(int)) ) + { + xcio_error(ioctxt, "Error when writing to state file (6)"); + goto out; + } + + /* Get the final execution context */ + if ( xc_domain_getfullinfo( xc_handle, domid, &op, &ctxt) ) + { + xcio_perror(ioctxt, "Could not get full domain info"); + goto out; + } + + /* Canonicalise the suspend-record frame number. */ + if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) ){ + xcio_error(ioctxt, "State record is not in range of pseudophys map"); + goto out; + } + + /* Canonicalise each GDT frame number. */ + for ( i = 0; i < ctxt.gdt_ents; i += 512 ) { + if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) { + xcio_error(ioctxt, "GDT frame is not in range of pseudophys map"); + goto out; + } + } + + /* Canonicalise the page table base pointer. */ + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) ) { + xcio_error(ioctxt, "PT base is not in range of pseudophys map"); + goto out; + } + ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << + PAGE_SHIFT; + + if ( xcio_write(ioctxt, &ctxt, sizeof(ctxt)) || + xcio_write(ioctxt, live_shinfo, PAGE_SIZE) ) { + xcio_error(ioctxt, "Error when writing to state file (1)"); + goto out; + } + munmap(live_shinfo, PAGE_SIZE); + + out: + if ( pfn_type != NULL ) free(pfn_type); + DPRINTF("Save exit rc=%d\n",rc); + return !!rc; + +} |