diff options
Diffstat (limited to 'tools/xc/lib/xc_linux_save.c')
-rw-r--r-- | tools/xc/lib/xc_linux_save.c | 156 |
1 files changed, 116 insertions, 40 deletions
diff --git a/tools/xc/lib/xc_linux_save.c b/tools/xc/lib/xc_linux_save.c index 2d31b3dae2..4913a8527f 100644 --- a/tools/xc/lib/xc_linux_save.c +++ b/tools/xc/lib/xc_linux_save.c @@ -64,36 +64,94 @@ /* test_bit */ -inline int test_bit ( int nr, volatile void * addr) +static inline int test_bit ( int nr, volatile void * addr) { return ( ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >> (nr % (sizeof(unsigned long)*8) ) ) & 1; } -inline void clear_bit ( int nr, volatile void * addr) +static inline void clear_bit ( int nr, volatile void * addr) { ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] &= ~(1 << (nr % (sizeof(unsigned long)*8) ) ); } -inline void set_bit ( int nr, volatile void * addr) +static inline void set_bit ( int nr, volatile void * addr) { ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] |= (1 << (nr % (sizeof(unsigned long)*8) ) ); } +/* + * hweightN: returns the hamming weight (i.e. the number + * of bits set) of a N-bit word + */ + +static inline unsigned int hweight32(unsigned int w) +{ + unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F); + res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF); + return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF); +} + +static inline int count_bits ( int nr, volatile void *addr) +{ + int i, count = 0; + unsigned long *p = (unsigned long *)addr; + // we know the array is padded to unsigned long + for(i=0;i<nr/(sizeof(unsigned long)*8);i++,p++) + count += hweight32( *p ); + return count; +} + +static inline int permute( int i, int nr, int order_nr ) +{ + /* Need a simple permutation function so that we scan pages in a + pseudo random order, enabling us to get a better estimate of + the domain's page dirtying rate as we go (there are often + contiguous ranges of pfns that have similar behaviour, and we + want to mix them up. */ + + /* e.g. nr->oder 15->4 16->4 17->5 */ + /* 512MB domain, 128k pages, order 17 */ + + /* + QPONMLKJIHGFEDCBA + QPONMLKJIH + GFEDCBA + */ + + /* + QPONMLKJIHGFEDCBA + EDCBA + QPONM + LKJIHGF + */ -long long tv_to_us( struct timeval *new ) + do + { + i = ( ( i>>(order_nr-10)) | ( i<<10 ) ) & + ((1<<order_nr)-1); + } + while ( i >= nr ); // this won't ever loop if nr is a power of 2 + + return i; +} + +static long long tv_to_us( struct timeval *new ) { return (new->tv_sec * 1000000) + new->tv_usec; } -long long tvdelta( struct timeval *new, struct timeval *old ) +static long long tvdelta( struct timeval *new, struct timeval *old ) { return ((new->tv_sec - old->tv_sec)*1000000 ) + (new->tv_usec - old->tv_usec); } -int track_cpu_usage( int xc_handle, u64 domid, int pages, int print ) +static int track_cpu_usage( int xc_handle, u64 domid, int faults, + int pages_sent, int pages_dirtied, int print ) { static struct timeval wall_last; static long long d0_cpu_last; @@ -123,11 +181,13 @@ int track_cpu_usage( int xc_handle, u64 domid, int pages, int print ) d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000; if(print) - printf("interval %lldms, dom0 used %lldms (%d%%), target used %lldms (%d%%), b/w %dMb/s\n", + printf("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, dirtied %dMb/s\n", wall_delta, - d0_cpu_delta, (int)((d0_cpu_delta*100)/wall_delta), - d1_cpu_delta, (int)((d1_cpu_delta*100)/wall_delta), - (int)((pages*PAGE_SIZE*8)/(wall_delta*1000))); + (int)((d0_cpu_delta*100)/wall_delta), + (int)((d1_cpu_delta*100)/wall_delta), + (int)((pages_sent*PAGE_SIZE*8)/(wall_delta*1000)), + (int)((pages_dirtied*PAGE_SIZE*8)/(wall_delta*1000)) + ); d0_cpu_last = d0_cpu_now; d1_cpu_last = d1_cpu_now; @@ -144,13 +204,14 @@ int xc_linux_save(int xc_handle, void *writerst ) { dom0_op_t op; - int rc = 1, i, j, k, n, last_iter, iter = 0; + int rc = 1, i, j, k, last_iter, iter = 0; unsigned long mfn; int verbose = flags & XCFLAGS_VERBOSE; int live = flags & XCFLAGS_LIVE; int debug = flags & XCFLAGS_DEBUG; int sent_last_iter, sent_this_iter, skip_this_iter; - + unsigned long dirtied_this_iter, faults_this_iter; + /* Important tuning parameters */ int max_iters = 29; // limit us to 30 times round loop int max_factor = 3; // never send more than 3x nr_pfns @@ -192,6 +253,9 @@ int xc_linux_save(int xc_handle, /* number of pages we're dealing with */ unsigned long nr_pfns; + /* power of 2 order of nr_pfns */ + int order_nr; + /* bitmap of pages: - that should be sent this iteration (unless later marked as skip); - to skip this iteration because already dirty; @@ -310,7 +374,7 @@ int xc_linux_save(int xc_handle, { if ( xc_shadow_control( xc_handle, domid, DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY, - NULL, 0 ) < 0 ) + NULL, 0, NULL, NULL ) < 0 ) { ERROR("Couldn't enable shadow mode"); goto out; @@ -361,6 +425,11 @@ int xc_linux_save(int xc_handle, } + /* calculate the power of 2 order of nr_pfns, e.g. + 15->4 16->4 17->5 */ + for( i=nr_pfns-1, order_nr=0; i ; i>>=1, order_nr++ ); + +printf("nr_pfns=%d order_nr=%d\n",nr_pfns, order_nr); /* We want zeroed memory so use calloc rather than malloc. */ pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long)); @@ -415,25 +484,26 @@ int xc_linux_save(int xc_handle, goto out; } - track_cpu_usage( xc_handle, domid, 0, 0); + track_cpu_usage( xc_handle, domid, 0, 0, 0, 0 ); /* Now write out each data page, canonicalising page tables as we go... */ while(1) { - unsigned int prev_pc, batch, sent_this_iter; + unsigned int prev_pc, sent_this_iter, N, batch; iter++; - sent_this_iter = 0; skip_this_iter = 0; prev_pc = 0; + N=0; + verbose_printf("Saving memory pages: iter %d 0%%", iter); - n=0; - while( n < nr_pfns ) + while( N < nr_pfns ) { - unsigned int this_pc = (n * 100) / nr_pfns; + unsigned int this_pc = (N * 100) / nr_pfns; + if ( (this_pc - prev_pc) >= 5 ) { verbose_printf("\b\b\b\b%3d%%", this_pc); @@ -444,9 +514,9 @@ int xc_linux_save(int xc_handle, but this is fast enough for the moment. */ if ( !last_iter && - xc_shadow_control( xc_handle, domid, - DOM0_SHADOW_CONTROL_OP_PEEK, - to_skip, nr_pfns ) != nr_pfns ) + xc_shadow_control(xc_handle, domid, + DOM0_SHADOW_CONTROL_OP_PEEK, + to_skip, nr_pfns, NULL, NULL) != nr_pfns ) { ERROR("Error peeking shadow bitmap"); goto out; @@ -456,8 +526,9 @@ int xc_linux_save(int xc_handle, /* load pfn_type[] with the mfn of all the pages we're doing in this batch. */ - for( batch = 0; batch < BATCH_SIZE && n < nr_pfns ; n++ ) + for( batch = 0; batch < BATCH_SIZE && N < nr_pfns ; N++ ) { + int n = permute(N, nr_pfns, order_nr ); if(0 && debug) fprintf(stderr,"%d pfn= %08lx mfn= %08lx %d [mfn]= %08lx\n", @@ -528,7 +599,7 @@ int xc_linux_save(int xc_handle, for( j = 0; j < batch; j++ ) { - if((pfn_type[j]>>29) == 7) + if( (pfn_type[j] & LTAB_MASK) == XTAB) { DDPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]); continue; @@ -537,16 +608,16 @@ int xc_linux_save(int xc_handle, if(0 && debug) fprintf(stderr,"%d pfn= %08lx mfn= %08lx [mfn]= %08lx sum= %08lx\n", iter, - (pfn_type[j] & PGT_type_mask) | pfn_batch[j], + (pfn_type[j] & LTAB_MASK) | pfn_batch[j], pfn_type[j], - live_mfn_to_pfn_table[pfn_type[j]&(~PGT_type_mask)], + live_mfn_to_pfn_table[pfn_type[j]&(~LTAB_MASK)], csum_page(region_base + (PAGE_SIZE*j)) ); /* canonicalise mfn->pfn */ - pfn_type[j] = (pfn_type[j] & PGT_type_mask) | + pfn_type[j] = (pfn_type[j] & LTAB_MASK) | pfn_batch[j]; - //live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask]; + //live_mfn_to_pfn_table[pfn_type[j]&~LTAB_MASK]; } @@ -568,20 +639,20 @@ int xc_linux_save(int xc_handle, { /* write out pages in batch */ - if((pfn_type[j]>>29) == 7) + if( (pfn_type[j] & LTAB_MASK) == XTAB) { DDPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]); continue; } - if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) || - ((pfn_type[j] & PGT_type_mask) == L2TAB) ) + if ( ((pfn_type[j] & LTAB_MASK) == L1TAB) || + ((pfn_type[j] & LTAB_MASK) == L2TAB) ) { memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE); for ( k = 0; - k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ? + k < (((pfn_type[j] & LTAB_MASK) == L2TAB) ? (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024); k++ ) { @@ -610,9 +681,9 @@ int xc_linux_save(int xc_handle, page[k] &= PAGE_SIZE - 1; page[k] |= pfn << PAGE_SHIFT; -#if DEBUG +#if 0 printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n", - pfn_type[j]>>29, + pfn_type[j]>>28, j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT); #endif @@ -646,13 +717,13 @@ int xc_linux_save(int xc_handle, total_sent += sent_this_iter; - verbose_printf("\b\b\b\b100%% (pages sent= %d, skipped= %d )\n", - sent_this_iter, skip_this_iter ); + verbose_printf("\r %d: sent %d, skipped %d, ", + iter, sent_this_iter, skip_this_iter ); - track_cpu_usage( xc_handle, domid, sent_this_iter, 1); - if ( last_iter ) { + track_cpu_usage( xc_handle, domid, 0, sent_this_iter, 0, 1); + verbose_printf("Total pages sent= %d (%.2fx)\n", total_sent, ((float)total_sent)/nr_pfns ); verbose_printf("(of which %d were fixups)\n", needed_to_fix ); @@ -683,7 +754,7 @@ int xc_linux_save(int xc_handle, if ( // ( sent_this_iter > (sent_last_iter * 0.95) ) || (iter >= max_iters) || - (sent_this_iter+skip_this_iter < 10) || + (sent_this_iter+skip_this_iter < 50) || (total_sent > nr_pfns*max_factor) ) { DPRINTF("Start last iteration\n"); @@ -695,7 +766,8 @@ int xc_linux_save(int xc_handle, if ( xc_shadow_control( xc_handle, domid, DOM0_SHADOW_CONTROL_OP_CLEAN2, - to_send, nr_pfns ) != nr_pfns ) + to_send, nr_pfns, &faults_this_iter, + &dirtied_this_iter) != nr_pfns ) { ERROR("Error flushing shadow PT"); goto out; @@ -703,6 +775,10 @@ int xc_linux_save(int xc_handle, sent_last_iter = sent_this_iter; + //dirtied_this_iter = count_bits( nr_pfns, to_send ); + track_cpu_usage( xc_handle, domid, faults_this_iter, + sent_this_iter, dirtied_this_iter, 1); + } |