diff options
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/examples/xc_dom_control.py | 4 | ||||
-rw-r--r-- | tools/xc/lib/xc.h | 10 | ||||
-rw-r--r-- | tools/xc/lib/xc_domain.c | 15 | ||||
-rw-r--r-- | tools/xc/lib/xc_linux_build.c | 2 | ||||
-rw-r--r-- | tools/xc/lib/xc_linux_restore.c | 83 | ||||
-rw-r--r-- | tools/xc/lib/xc_linux_save.c | 546 | ||||
-rw-r--r-- | tools/xc/lib/xc_private.c | 80 | ||||
-rw-r--r-- | tools/xc/lib/xc_private.h | 4 | ||||
-rw-r--r-- | tools/xc/py/Xc.c | 16 | ||||
-rw-r--r-- | tools/xend/lib/utils.c | 5 |
10 files changed, 517 insertions, 248 deletions
diff --git a/tools/examples/xc_dom_control.py b/tools/examples/xc_dom_control.py index 60bd65d0ac..877afa53d1 100755 --- a/tools/examples/xc_dom_control.py +++ b/tools/examples/xc_dom_control.py @@ -139,10 +139,12 @@ elif cmd == 'suspend': xc.domain_stop( dom=dom ) while not xc.domain_getinfo( first_dom=dom, max_doms=1 )[0]['stopped']: - time.sleep(0.1); + print "Sleep..." + time.sleep(0.001); rc = xc.linux_save( dom=dom, state_file=file, progress=1) if rc == 0 : xc.domain_destroy( dom=dom, force=1 ) + else: xc.domain_start( dom=dom ) # sensible for production use elif cmd == 'cpu_bvtslice': if len(sys.argv) < 3: diff --git a/tools/xc/lib/xc.h b/tools/xc/lib/xc.h index a0205bcc6b..2132d6e7c1 100644 --- a/tools/xc/lib/xc.h +++ b/tools/xc/lib/xc.h @@ -57,7 +57,10 @@ int xc_domain_getinfo(int xc_handle, int xc_shadow_control(int xc_handle, u64 domid, - unsigned int sop); + unsigned int sop, + unsigned long *dirty_bitmap, + unsigned long pages); + #define XCFLAGS_VERBOSE 1 #define XCFLAGS_LIVE 2 @@ -247,11 +250,6 @@ int xc_readconsolering(int xc_handle, int xc_physinfo(int xc_handle, xc_physinfo_t *info); - -int xc_shadow_control(int xc_handle, - u64 domid, - unsigned int sop); - int xc_domain_setname(int xc_handle, u64 domid, char *name); diff --git a/tools/xc/lib/xc_domain.c b/tools/xc/lib/xc_domain.c index c26a3f87c3..6d0dd6d0f3 100644 --- a/tools/xc/lib/xc_domain.c +++ b/tools/xc/lib/xc_domain.c @@ -109,13 +109,24 @@ int xc_domain_getinfo(int xc_handle, int xc_shadow_control(int xc_handle, u64 domid, - unsigned int sop) + unsigned int sop, + unsigned long *dirty_bitmap, + unsigned long pages) { + int rc; dom0_op_t op; op.cmd = DOM0_SHADOW_CONTROL; op.u.shadow_control.domain = (domid_t)domid; op.u.shadow_control.op = sop; - return do_dom0_op(xc_handle, &op); + op.u.shadow_control.dirty_bitmap = dirty_bitmap; + op.u.shadow_control.pages = pages; + + rc = do_dom0_op(xc_handle, &op); + + if ( rc == 0 ) + return op.u.shadow_control.pages; + else + return rc; } int xc_domain_setname(int xc_handle, diff --git a/tools/xc/lib/xc_linux_build.c b/tools/xc/lib/xc_linux_build.c index 83debd904d..98a3fb6a60 100644 --- a/tools/xc/lib/xc_linux_build.c +++ b/tools/xc/lib/xc_linux_build.c @@ -284,7 +284,7 @@ static int setup_guestos(int xc_handle, /* shared_info page starts its life empty. */ shared_info = map_pfn_writeable(pm_handle, shared_info_frame); - memset(shared_info, 0, PAGE_SIZE); + memset(shared_info, 0, sizeof(shared_info_t)); /* Mask all upcalls... */ for ( i = 0; i < MAX_VIRT_CPUS; i++ ) shared_info->vcpu_data[i].evtchn_upcall_mask = 1; diff --git a/tools/xc/lib/xc_linux_restore.c b/tools/xc/lib/xc_linux_restore.c index 65ba875aef..1bbc575889 100644 --- a/tools/xc/lib/xc_linux_restore.c +++ b/tools/xc/lib/xc_linux_restore.c @@ -12,6 +12,15 @@ #define MAX_BATCH_SIZE 1024 +#define DEBUG 0 + +#if DEBUG +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + + /* This may allow us to create a 'quiet' command-line option, if necessary. */ #define verbose_printf(_f, _a...) \ do { \ @@ -116,8 +125,6 @@ int xc_linux_restore(int xc_handle, if ( (*readerfn)(readerst, name, sizeof(name)) || (*readerfn)(readerst, &nr_pfns, sizeof(unsigned long)) || - (*readerfn)(readerst, &ctxt, sizeof(ctxt)) || - (*readerfn)(readerst, shared_info, PAGE_SIZE) || (*readerfn)(readerst, pfn_to_mfn_frame_list, PAGE_SIZE) ) { ERROR("Error when reading from state file"); @@ -181,10 +188,7 @@ int xc_linux_restore(int xc_handle, if ( (pm_handle = init_pfn_mapper((domid_t)dom)) < 0 ) goto out; - /* Copy saved contents of shared-info page. No checking needed. */ - ppage = map_pfn_writeable(pm_handle, shared_info_frame); - memcpy(ppage, shared_info, PAGE_SIZE); - unmap_pfn(pm_handle, ppage); + /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */ if ( get_pfn_list(xc_handle, dom, pfn_to_mfn_table, nr_pfns) != nr_pfns ) @@ -235,9 +239,16 @@ int xc_linux_restore(int xc_handle, goto out; } - //printf("batch=%d\n",j); + DPRINTF("batch %d\n",j); - if(j==0) break; // our work here is done + if (j == 0) + break; // our work here is done + + if( j > MAX_BATCH_SIZE ) + { + ERROR("Max batch size exceeded. Giving up."); + goto out; + } if ( (*readerfn)(readerst, region_pfn_type, j*sizeof(unsigned long)) ) { @@ -247,6 +258,9 @@ int xc_linux_restore(int xc_handle, for(i=0;i<j;i++) { + if ((region_pfn_type[i]>>29) == 7) + continue; + pfn = region_pfn_type[i] & ~PGT_type_mask; mfn = pfn_to_mfn_table[pfn]; @@ -266,8 +280,9 @@ int xc_linux_restore(int xc_handle, unsigned long *ppage; pfn = region_pfn_type[i] & ~PGT_type_mask; - -//if(pfn_type[i])printf("^pfn=%d %08lx\n",pfn,pfn_type[i]); + + if ((region_pfn_type[i]>>29) == 7) + continue; if (pfn>nr_pfns) { @@ -281,8 +296,6 @@ int xc_linux_restore(int xc_handle, mfn = pfn_to_mfn_table[pfn]; -//if(region_pfn_type[i])printf("i=%d pfn=%d mfn=%d type=%lx\n",i,pfn,mfn,region_pfn_type[i]); - ppage = (unsigned long*) (region_base + i*PAGE_SIZE); if ( (*readerfn)(readerst, ppage, PAGE_SIZE) ) @@ -304,21 +317,12 @@ int xc_linux_restore(int xc_handle, { xpfn = ppage[k] >> PAGE_SHIFT; -/*printf("L1 i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n", - i,pfn,mfn,k,ppage[k],xpfn);*/ - if ( xpfn >= nr_pfns ) { - ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=%d nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns); + ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=0x%x nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns); goto out; } -#if 0 - if ( (region_pfn_type[xpfn] != NONE) && (ppage[k] & _PAGE_RW) ) - { - ERROR("Write access requested for a restricted frame"); - goto out; - } -#endif + ppage[k] &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PAT); ppage[k] |= pfn_to_mfn_table[xpfn] << PAGE_SHIFT; } @@ -334,9 +338,6 @@ int xc_linux_restore(int xc_handle, { xpfn = ppage[k] >> PAGE_SHIFT; -/*printf("L2 i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n", - i,pfn,mfn,k,ppage[k],xpfn);*/ - if ( xpfn >= nr_pfns ) { ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=%d nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns); @@ -360,18 +361,21 @@ int xc_linux_restore(int xc_handle, default: ERROR("Bogus page type %x page table is out of range. i=%d nr_pfns=%d",region_pfn_type[i],i,nr_pfns); goto out; - } + + } // end of page type switch statement if ( add_mmu_update(xc_handle, mmu, (mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) ) goto out; - } + } // end of 'batch' for loop n+=j; // crude stats } + DPRINTF("Received all pages\n"); + mfn_mapper_close( region_mapper ); /* @@ -386,7 +390,10 @@ int xc_linux_restore(int xc_handle, (pfn_to_mfn_table[i]<<PAGE_SHIFT) | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L1_TABLE) ) + { + printf("ERR pin L1 pfn=%lx mfn=%lx\n"); goto out; + } } else if ( pfn_type[i] == L2TAB ) { @@ -394,7 +401,10 @@ int xc_linux_restore(int xc_handle, (pfn_to_mfn_table[i]<<PAGE_SHIFT) | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) ) + { + printf("ERR pin L2 pfn=%lx mfn=%lx\n"); goto out; + } } } @@ -403,6 +413,15 @@ int xc_linux_restore(int xc_handle, verbose_printf("\b\b\b\b100%%\nMemory reloaded.\n"); + + if ( (*readerfn)(readerst, &ctxt, sizeof(ctxt)) || + (*readerfn)(readerst, shared_info, PAGE_SIZE) ) + { + ERROR("Error when reading from state file"); + goto out; + } + + /* Uncanonicalise the suspend-record frame number and poke resume rec. */ pfn = ctxt.cpu_ctxt.esi; if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) ) @@ -445,9 +464,13 @@ int xc_linux_restore(int xc_handle, } ctxt.pt_base = pfn_to_mfn_table[pfn] << PAGE_SHIFT; - /* Uncanonicalise the pfn-to-mfn table frame-number list. */ + /* Copy saved contents of shared-info page. No checking needed. */ + ppage = map_pfn_writeable(pm_handle, shared_info_frame); + memcpy(ppage, shared_info, sizeof(shared_info_t)); + unmap_pfn(pm_handle, ppage); + /* Uncanonicalise the pfn-to-mfn table frame-number list. */ if ( (mapper_handle1 = mfn_mapper_init(xc_handle, dom, 1024*1024, PROT_WRITE )) == NULL ) @@ -520,6 +543,8 @@ int xc_linux_restore(int xc_handle, op.u.builddomain.ctxt = &ctxt; rc = do_dom0_op(xc_handle, &op); + DPRINTF("Everything OK!\n"); + out: if ( mmu != NULL ) free(mmu); diff --git a/tools/xc/lib/xc_linux_save.c b/tools/xc/lib/xc_linux_save.c index 02e3ffc352..37dd7c6fce 100644 --- a/tools/xc/lib/xc_linux_save.c +++ b/tools/xc/lib/xc_linux_save.c @@ -11,6 +11,14 @@ #define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */ +#define DEBUG 0 + +#if DEBUG +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + /* This may allow us to create a 'quiet' command-line option, if necessary. */ #define verbose_printf(_f, _a...) \ do { \ @@ -22,11 +30,17 @@ /* * Returns TRUE if the given machine frame number has a unique mapping * in the guest's pseudophysical map. + * 0x80000000-3 mark the shared_info, and blk/net rings */ #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \ - (((_mfn) < (1024*1024)) && \ - (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn))) - + (((_mfn) < (1024*1024)) && \ + ( ( (live_mfn_to_pfn_table[_mfn] < nr_pfns) && \ + (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn)) ) || \ +\ + (live_mfn_to_pfn_table[_mfn] >= 0x80000000 && \ + live_mfn_to_pfn_table[_mfn] <= 0x80000003 ) || \ + live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == 0x80000004 ) ) + /* Returns TRUE if MFN is successfully converted to a PFN. */ #define translate_mfn_to_pfn(_pmfn) \ ({ \ @@ -40,6 +54,14 @@ }) +/* test_bit */ +inline int test_bit ( int nr, volatile void * addr) +{ + return ( ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >> + (nr % (sizeof(unsigned long)*8) ) ) & 1; +} + + int xc_linux_save(int xc_handle, u64 domid, unsigned int flags, @@ -47,14 +69,11 @@ int xc_linux_save(int xc_handle, void *writerst ) { dom0_op_t op; - int rc = 1, i, j, k, n; + int rc = 1, i, j, k, n, last_iter, iter = 0; unsigned long mfn; - unsigned int prev_pc, this_pc; int verbose = flags & XCFLAGS_VERBOSE; - //int live = flags & XCFLAGS_LIVE; - - /* state of the new MFN mapper */ - mfn_mapper_t *mapper_handle1, *mapper_handle2; + int live = flags & XCFLAGS_LIVE; + int sent_last_iter, sent_this_iter, max_iters; /* Remember if we stopped the guest, so we can restart it on exit. */ int we_stopped_it = 0; @@ -90,8 +109,13 @@ int xc_linux_save(int xc_handle, unsigned char *region_base; /* A temporary mapping, and a copy, of the guest's suspend record. */ - suspend_record_t *p_srec, srec; + suspend_record_t *p_srec; + /* number of pages we're dealing with */ + unsigned long nr_pfns; + + /* bitmap of pages left to send */ + unsigned long *to_send; if ( mlock(&ctxt, sizeof(ctxt) ) ) { @@ -129,7 +153,8 @@ int xc_linux_save(int xc_handle, goto out; } - sleep(1); + usleep(1000); // 1ms + printf("Sleep for 1ms\n"); } /* A cheesy test to see whether the domain contains valid state. */ @@ -139,11 +164,10 @@ int xc_linux_save(int xc_handle, goto out; } - /* Map the suspend-record MFN to pin it. The page must be owned by domid for this to succeed. */ p_srec = mfn_mapper_map_single(xc_handle, domid, - sizeof(srec), PROT_READ, + sizeof(*p_srec), PROT_READ, ctxt.cpu_ctxt.esi ); if (!p_srec) @@ -152,10 +176,10 @@ int xc_linux_save(int xc_handle, goto out; } - memcpy( &srec, p_srec, sizeof(srec) ); + nr_pfns = p_srec->nr_pfns; /* cheesy sanity check */ - if ( srec.nr_pfns > 1024*1024 ) + if ( nr_pfns > 1024*1024 ) { ERROR("Invalid state record -- pfn count out of range"); goto out; @@ -165,55 +189,13 @@ int xc_linux_save(int xc_handle, live_pfn_to_mfn_frame_list = mfn_mapper_map_single(xc_handle, domid, PAGE_SIZE, PROT_READ, - srec.pfn_to_mfn_frame_list ); + p_srec->pfn_to_mfn_frame_list ); if (!live_pfn_to_mfn_frame_list) { ERROR("Couldn't map pfn_to_mfn_frame_list"); goto out; } - - - if ( (mapper_handle1 = mfn_mapper_init(xc_handle, domid, - 1024*1024, PROT_READ )) - == NULL ) - goto out; - - for ( i = 0; i < (srec.nr_pfns+1023)/1024; i++ ) - { - /* Grab a copy of the pfn-to-mfn table frame list. - This has the effect of preventing the page from being freed and - given to another domain. (though the domain is stopped anyway...) */ - mfn_mapper_queue_entry( mapper_handle1, i<<PAGE_SHIFT, - live_pfn_to_mfn_frame_list[i], - PAGE_SIZE ); - } - - if ( mfn_mapper_flush_queue(mapper_handle1) ) - { - ERROR("Couldn't map pfn_to_mfn table"); - goto out; - } - - live_pfn_to_mfn_table = mfn_mapper_base( mapper_handle1 ); - - - - /* We want zeroed memory so use calloc rather than malloc. */ - pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long)); - - if ( (pfn_type == NULL) ) - { - errno = ENOMEM; - goto out; - } - - if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) ) - { - ERROR("Unable to mlock"); - goto out; - } - /* Track the mfn_to_pfn table down from the domains PT */ { @@ -233,58 +215,112 @@ int xc_linux_save(int xc_handle, mfn_to_pfn_table_start_mfn ); } + /* Map all the frames of the pfn->mfn table. For migrate to succeed, + the guest must not change which frames are used for this purpose. + (its not clear why it would want to change them, and we'll be OK + from a safety POV anyhow. */ + + live_pfn_to_mfn_table = mfn_mapper_map_batch( xc_handle, domid, + PROT_READ, + live_pfn_to_mfn_frame_list, + (nr_pfns+1023)/1024 ); + if( !live_pfn_to_mfn_table ) + { + PERROR("Couldn't map pfn_to_mfn table"); + goto out; + } - /* - * Quick belt and braces sanity check. - */ - for ( i = 0; i < srec.nr_pfns; i++ ) + /* Canonicalise the pfn-to-mfn table frame-number list. */ + memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE ); + for ( i = 0; i < nr_pfns; i += 1024 ) { - mfn = live_pfn_to_mfn_table[i]; + if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ) + { + ERROR("Frame # in pfn-to-mfn frame list is not in pseudophys"); + goto out; + } + } - if( live_mfn_to_pfn_table[mfn] != i ) - printf("i=%d mfn=%d live_mfn_to_pfn_table=%d\n", - i,mfn,live_mfn_to_pfn_table[mfn]); + /* At this point, we can start the domain again if we're doing a + live suspend */ + + if( live ) + { + if ( xc_shadow_control( xc_handle, domid, + DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY, + NULL, 0 ) < 0 ) + { + ERROR("Couldn't enable shadow mode"); + goto out; + } + + if ( xc_domain_start( xc_handle, domid ) < 0 ) + { + ERROR("Couldn't restart domain"); + goto out; + } + + last_iter = 0; + sent_last_iter = 1<<20; // 4GB's worth of pages + max_iters = 9; // limit us to 10 time round loop } + else + last_iter = 1; - /* Canonicalise the suspend-record frame number. */ - if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) ) + /* Setup to_send bitmap */ { - ERROR("State record is not in range of pseudophys map"); - goto out; + int sz = (nr_pfns/8) + 8; // includes slop at end of array + + to_send = malloc( sz ); + + if (!to_send) + { + ERROR("Couldn't allocate to_send array"); + goto out; + } + memset( to_send, 0xff, sz ); + + if ( mlock( to_send, sz ) ) + { + PERROR("Unable to mlock to_send"); + return 1; + } } - /* Canonicalise each GDT frame number. */ - for ( i = 0; i < ctxt.gdt_ents; i += 512 ) + + /* We want zeroed memory so use calloc rather than malloc. */ + pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long)); + + if ( (pfn_type == NULL) ) { - if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) - { - ERROR("GDT frame is not in range of pseudophys map"); - goto out; - } + errno = ENOMEM; + goto out; } - /* Canonicalise the page table base pointer. */ - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) ) + if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) ) { - ERROR("PT base is not in range of pseudophys map"); - goto out; + ERROR("Unable to mlock"); + goto out; } - ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT; - /* Canonicalise the pfn-to-mfn table frame-number list. */ - memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE ); - for ( i = 0; i < srec.nr_pfns; i += 1024 ) + + /* + * Quick belt and braces sanity check. + */ +#if DEBUG + for ( i = 0; i < nr_pfns; i++ ) { - if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ) - { - ERROR("Frame # in pfn-to-mfn frame list is not in pseudophys"); - goto out; - } + mfn = live_pfn_to_mfn_table[i]; + + if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0x80000004) ) + printf("i=0x%x mfn=%x live_mfn_to_pfn_table=%x\n", + i,mfn,live_mfn_to_pfn_table[mfn]); } +#endif - /* Start writing out the saved-domain record. */ + /* Map the shared info frame */ live_shinfo = mfn_mapper_map_single(xc_handle, domid, PAGE_SIZE, PROT_READ, shared_info_frame); @@ -295,163 +331,271 @@ int xc_linux_save(int xc_handle, goto out; } + /* Start writing out the saved-domain record. */ + if ( (*writerfn)(writerst, "LinuxGuestRecord", 16) || (*writerfn)(writerst, name, sizeof(name)) || - (*writerfn)(writerst, &srec.nr_pfns, sizeof(unsigned long)) || - (*writerfn)(writerst, &ctxt, sizeof(ctxt)) || - (*writerfn)(writerst, live_shinfo, PAGE_SIZE) || + (*writerfn)(writerst, &nr_pfns, sizeof(unsigned long)) || (*writerfn)(writerst, pfn_to_mfn_frame_list, PAGE_SIZE) ) { ERROR("Error when writing to state file (1)"); goto out; } - munmap(live_shinfo, PAGE_SIZE); - - verbose_printf("Saving memory pages: 0%%"); - - if ( (mapper_handle2 = mfn_mapper_init(xc_handle, domid, - BATCH_SIZE*4096, PROT_READ )) - == NULL ) - goto out; - - region_base = mfn_mapper_base( mapper_handle2 ); /* Now write out each data page, canonicalising page tables as we go... */ - prev_pc = 0; - for ( n = 0; n < srec.nr_pfns; ) - { - this_pc = (n * 100) / srec.nr_pfns; - if ( (this_pc - prev_pc) >= 5 ) - { - verbose_printf("\b\b\b\b%3d%%", this_pc); - prev_pc = this_pc; - } - - for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ ) - { - pfn_type[j] = live_pfn_to_mfn_table[i]; - } + while(1) + { + unsigned int prev_pc, batch, sent_this_iter; - for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ ) - { - /* queue up mappings for all of the pages in this batch */ + iter++; -//printf("region n=%d j=%d i=%d mfn=%d\n",n,j,i,live_pfn_to_mfn_table[i]); - mfn_mapper_queue_entry( mapper_handle2, j<<PAGE_SHIFT, - live_pfn_to_mfn_table[i], - PAGE_SIZE ); - } + sent_this_iter = 0; + prev_pc = 0; + verbose_printf("Saving memory pages: iter %d 0%%", iter); - if( mfn_mapper_flush_queue(mapper_handle2) ) - { - ERROR("Couldn't map page region"); - goto out; - } - - if ( get_pfn_type_batch(xc_handle, domid, j, pfn_type) ) - { - ERROR("get_pfn_type_batch failed"); - goto out; - } - - for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ ) + n=0; + while( n < nr_pfns ) { - if((pfn_type[j]>>29) == 7) + unsigned int this_pc = (n * 100) / nr_pfns; + if ( (this_pc - prev_pc) >= 5 ) { - ERROR("bogus page"); - goto out; + verbose_printf("\b\b\b\b%3d%%", this_pc); + prev_pc = this_pc; } - /* canonicalise mfn->pfn */ - pfn_type[j] = (pfn_type[j] & PGT_type_mask) | - live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask]; - -/* if(pfn_type[j]>>29) - printf("i=%d type=%d\n",i,pfn_type[i]); */ - } - - if ( (*writerfn)(writerst, &j, sizeof(int) ) ) - { - ERROR("Error when writing to state file (2)"); - goto out; - } + /* load pfn_type[] with the mfn of all the pages we're doing in + this batch. */ - if ( (*writerfn)(writerst, pfn_type, sizeof(unsigned long)*j ) ) - { - ERROR("Error when writing to state file (3)"); - goto out; - } + for( batch = 0; batch < BATCH_SIZE && n < nr_pfns ; n++ ) + { + if ( !test_bit(n, to_send ) ) continue; + pfn_type[batch] = live_pfn_to_mfn_table[n]; - for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ ) - { - /* write out pages in batch */ + if( pfn_type[batch] == 0x80000004 ) + { + DPRINTF("Skip netbuf pfn %lx. mfn %lx\n",n,pfn_type[batch]); + continue; + } - if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) || - ((pfn_type[j] & PGT_type_mask) == L2TAB) ) + if(iter>1) { DPRINTF("pfn=%x mfn=%x\n",n,pfn_type[batch]); } + + batch++; + } + + DPRINTF("batch %d:%d (n=%d)\n",iter,batch,n); + + if(batch == 0) goto skip; // vanishingly unlikely... + + if ( (region_base = mfn_mapper_map_batch( xc_handle, domid, + PROT_READ, + pfn_type, + batch )) == 0) + { + PERROR("map batch failed"); + goto out; + } + + if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) ) { + ERROR("get_pfn_type_batch failed"); + goto out; + } + + for( j = 0; j < batch; j++ ) + { + if((pfn_type[j]>>29) == 7) + { + DPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]); + continue; + } - memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE); + /* canonicalise mfn->pfn */ + pfn_type[j] = (pfn_type[j] & PGT_type_mask) | + live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask]; + } + + + if ( (*writerfn)(writerst, &batch, sizeof(int) ) ) + { + ERROR("Error when writing to state file (2)"); + goto out; + } - for ( k = 0; - k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ? - (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024); - k++ ) + if ( (*writerfn)(writerst, pfn_type, sizeof(unsigned long)*j ) ) + { + ERROR("Error when writing to state file (3)"); + goto out; + } + + /* entering this loop, pfn_type is now in pfns (Not mfns) */ + for( j = 0; j < batch; j++ ) + { + /* write out pages in batch */ + + if((pfn_type[j]>>29) == 7) { - if ( !(page[k] & _PAGE_PRESENT) ) continue; - mfn = page[k] >> PAGE_SHIFT; - - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) + DPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]); + continue; + } + + if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) || + ((pfn_type[j] & PGT_type_mask) == L2TAB) ) + { + + memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE); + + for ( k = 0; + k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ? + (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024); + k++ ) + { + unsigned long pfn; + + if ( !(page[k] & _PAGE_PRESENT) ) continue; + mfn = page[k] >> PAGE_SHIFT; + pfn = live_mfn_to_pfn_table[mfn]; + + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) + { + // I don't think this should ever happen + + printf("FNI %d : [%08lx,%d] pte=%08lx, mfn=%08lx, pfn=%08lx [mfn]=%08lx\n", + j, pfn_type[j], k, + page[k], mfn, live_mfn_to_pfn_table[mfn], + (live_mfn_to_pfn_table[mfn]<nr_pfns)? + live_pfn_to_mfn_table[live_mfn_to_pfn_table[mfn]]: 0xdeadbeef); + + pfn = 0; // be suspicious, very suspicious + + //goto out; // let's try our luck + + + } + page[k] &= PAGE_SIZE - 1; + page[k] |= pfn << PAGE_SHIFT; + +#if DEBUG + printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n", + pfn_type[j]>>29, + j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT); +#endif + + } /* end of page table rewrite for loop */ + + if ( (*writerfn)(writerst, page, PAGE_SIZE) ) + { + ERROR("Error when writing to state file (4)"); + goto out; + } + + } /* end of it's a PT page */ + else + { /* normal page */ + if ( (*writerfn)(writerst, region_base + (PAGE_SIZE*j), PAGE_SIZE) ) { - ERROR("Frame number in pagetable page is invalid"); + ERROR("Error when writing to state file (5)"); goto out; } - page[k] &= PAGE_SIZE - 1; - page[k] |= live_mfn_to_pfn_table[mfn] << PAGE_SHIFT; + } + } /* end of the write out for this batch */ + + sent_this_iter += batch; - /* - printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n", - pfn_type[j]>>29, - j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT); - */ + } /* end of this while loop for this iteration */ - } + munmap(region_base, batch*PAGE_SIZE); - if ( (*writerfn)(writerst, page, PAGE_SIZE) ) - { - ERROR("Error when writing to state file (4)"); - goto out; - } + skip: + + verbose_printf("\b\b\b\b100%% (%d pages)\n", sent_this_iter ); + + if ( last_iter ) + break; + if ( live ) + { + if ( ( sent_this_iter > (sent_last_iter * 0.95) ) || + (iter >= max_iters) || (sent_this_iter < 10) ) + { + printf("Start last iteration\n"); + last_iter = 1; - } - else + xc_domain_stop_sync( xc_handle, domid ); + + } + + if ( xc_shadow_control( xc_handle, domid, + DOM0_SHADOW_CONTROL_OP_CLEAN, + to_send, nr_pfns ) != nr_pfns ) { - if ( (*writerfn)(writerst, region_base + (PAGE_SIZE*j), PAGE_SIZE) ) - { - ERROR("Error when writing to state file (5)"); - goto out; - } + ERROR("Error flushing shadow PT"); + goto out; } + + sent_last_iter = sent_this_iter; } - - n+=j; /* i is the master loop counter */ - } - verbose_printf("\b\b\b\b100%%\nMemory saved.\n"); + + } /* end of while 1 */ + + DPRINTF("All memory is saved\n"); /* Success! */ rc = 0; - + /* Zero terminate */ if ( (*writerfn)(writerst, &rc, sizeof(int)) ) { ERROR("Error when writing to state file (6)"); goto out; } - + + /* Get the final execution context */ + op.cmd = DOM0_GETDOMAININFO; + op.u.getdomaininfo.domain = (domid_t)domid; + op.u.getdomaininfo.ctxt = &ctxt; + if ( (do_dom0_op(xc_handle, &op) < 0) || + ((u64)op.u.getdomaininfo.domain != domid) ) + { + PERROR("Could not get info on domain"); + goto out; + } + + /* Canonicalise the suspend-record frame number. */ + if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) ) + { + ERROR("State record is not in range of pseudophys map"); + goto out; + } + + /* Canonicalise each GDT frame number. */ + for ( i = 0; i < ctxt.gdt_ents; i += 512 ) + { + if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) + { + ERROR("GDT frame is not in range of pseudophys map"); + goto out; + } + } + + /* Canonicalise the page table base pointer. */ + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) ) + { + ERROR("PT base is not in range of pseudophys map"); + goto out; + } + ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT; + + if ( (*writerfn)(writerst, &ctxt, sizeof(ctxt)) || + (*writerfn)(writerst, live_shinfo, PAGE_SIZE) ) + { + ERROR("Error when writing to state file (1)"); + goto out; + } + munmap(live_shinfo, PAGE_SIZE); out: /* Restart the domain if we had to stop it to save its state. */ diff --git a/tools/xc/lib/xc_private.c b/tools/xc/lib/xc_private.c index d137176ca8..41eb2e744a 100644 --- a/tools/xc/lib/xc_private.c +++ b/tools/xc/lib/xc_private.c @@ -47,6 +47,31 @@ void unmap_pfn(int pm_handle, void *vaddr) /*******************/ +void * mfn_mapper_map_batch(int xc_handle, domid_t dom, int prot, + unsigned long *arr, int num ) +{ + privcmd_mmapbatch_t ioctlx; + void *addr; + addr = mmap( NULL, num*PAGE_SIZE, prot, MAP_SHARED, xc_handle, 0 ); + if (addr) + { + ioctlx.num=num; + ioctlx.dom=dom; + ioctlx.addr=(unsigned long)addr; + ioctlx.arr=arr; + if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx ) <0 ) + { + perror("XXXXXXXX"); + munmap(addr, num*PAGE_SIZE); + return 0; + } + } + return addr; + +} + +/*******************/ + void * mfn_mapper_map_single(int xc_handle, domid_t dom, int size, int prot, unsigned long mfn ) @@ -64,7 +89,10 @@ void * mfn_mapper_map_single(int xc_handle, domid_t dom, entry.mfn=mfn; entry.npages=(size+PAGE_SIZE-1)>>PAGE_SHIFT; if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx ) <0 ) + { + munmap(addr, size); return 0; + } } return addr; } @@ -295,7 +323,7 @@ static int flush_mmu_updates(int xc_handle, mmu_t *mmu) hypercall.op = __HYPERVISOR_mmu_update; hypercall.arg[0] = (unsigned long)mmu->updates; - hypercall.arg[1] = (unsigned long)mmu->idx; + hypercall.arg[1] = (unsigned long)&(mmu->idx); if ( mlock(mmu->updates, sizeof(mmu->updates)) != 0 ) { @@ -342,3 +370,53 @@ int finish_mmu_updates(int xc_handle, mmu_t *mmu) { return flush_mmu_updates(xc_handle, mmu); } + + +/***********************************************************/ + +/* this function is a hack until we get proper synchronous domain stop */ + +int xc_domain_stop_sync( int xc_handle, domid_t domid ) +{ + dom0_op_t op; + int i; + + + op.cmd = DOM0_STOPDOMAIN; + op.u.stopdomain.domain = (domid_t)domid; + if ( do_dom0_op(xc_handle, &op) != 0 ) + { + PERROR("Stopping target domain failed"); + goto out; + } + + usleep(100); // 100us + + for(i=0;;i++) + { + if (i>0) + if (i==1) printf("Sleep."); + else printf("."); + + op.cmd = DOM0_GETDOMAININFO; + op.u.getdomaininfo.domain = (domid_t)domid; + op.u.getdomaininfo.ctxt = NULL; + if ( (do_dom0_op(xc_handle, &op) < 0) || + ((u64)op.u.getdomaininfo.domain != domid) ) + { + PERROR("Could not get info on domain"); + goto out; + } + + if ( op.u.getdomaininfo.state == DOMSTATE_STOPPED ) + { + printf("\nDomain %lld stopped\n",domid); + return 0; + } + + usleep(1000); + } + +out: + return -1; +} diff --git a/tools/xc/lib/xc_private.h b/tools/xc/lib/xc_private.h index 3a2e3ea9f1..e3eff85e59 100644 --- a/tools/xc/lib/xc_private.h +++ b/tools/xc/lib/xc_private.h @@ -232,6 +232,9 @@ typedef struct mfn_mapper { void * mfn_mapper_map_single(int xc_handle, domid_t dom, int size, int prot, unsigned long mfn ); +void * mfn_mapper_map_batch(int xc_handle, domid_t dom, int prot, + unsigned long *arr, int num ); + mfn_mapper_t * mfn_mapper_init(int xc_handle, domid_t dom, int size, int prot); void * mfn_mapper_base(mfn_mapper_t *t); @@ -245,5 +248,6 @@ void * mfn_mapper_queue_entry(mfn_mapper_t *t, int offset, /*********************/ +int xc_domain_stop_sync( int xc_handle, domid_t dom ); #endif /* __XC_PRIVATE_H__ */ diff --git a/tools/xc/py/Xc.c b/tools/xc/py/Xc.c index 974ad994b7..b2ae143eda 100644 --- a/tools/xc/py/Xc.c +++ b/tools/xc/py/Xc.c @@ -191,17 +191,17 @@ static PyObject *pyxc_linux_save(PyObject *self, u64 dom; char *state_file; - int progress = 1; + int progress = 1, live = -1; unsigned int flags = 0; - static char *kwd_list[] = { "dom", "state_file", "progress", NULL }; + static char *kwd_list[] = { "dom", "state_file", "progress", "live", NULL }; - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Ls|i", kwd_list, - &dom, &state_file, &progress) ) + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Ls|ii", kwd_list, + &dom, &state_file, &progress, &live) ) return NULL; - if ( progress ) - flags |= XCFLAGS_VERBOSE; + if (progress) flags |= XCFLAGS_VERBOSE; + if (live == 1) flags |= XCFLAGS_LIVE; if ( strncmp(state_file,"tcp:", strlen("tcp:")) == 0 ) { @@ -226,6 +226,8 @@ static PyObject *pyxc_linux_save(PyObject *self, return 0; } + if (live == -1) flags |= XCFLAGS_LIVE; // default to live for tcp + strncpy( server, state_file+strlen("tcp://"), max_namelen); server[max_namelen-1]='\0'; if ( (port_s = strchr(server,':')) != NULL ) @@ -1270,7 +1272,7 @@ static PyObject *pyxc_shadow_control(PyObject *self, &dom, &op) ) return NULL; - if ( xc_shadow_control(xc->xc_handle, dom, op) != 0 ) + if ( xc_shadow_control(xc->xc_handle, dom, op, NULL, 0) < 0 ) return PyErr_SetFromErrno(xc_error); Py_INCREF(zero); diff --git a/tools/xend/lib/utils.c b/tools/xend/lib/utils.c index 297976e9be..441b62f153 100644 --- a/tools/xend/lib/utils.c +++ b/tools/xend/lib/utils.c @@ -723,6 +723,11 @@ static PyObject *xu_port_new(PyObject *self, PyObject *args) goto fail4; } + xup->interface->tx_resp_prod = 0; + xup->interface->rx_req_prod = 0; + xup->interface->tx_req_prod = 0; + xup->interface->rx_resp_prod = 0; + xup->tx_req_cons = 0; xup->tx_resp_prod = 0; xup->rx_req_prod = 0; |