aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rwxr-xr-xtools/examples/xc_dom_control.py4
-rw-r--r--tools/xc/lib/xc.h10
-rw-r--r--tools/xc/lib/xc_domain.c15
-rw-r--r--tools/xc/lib/xc_linux_build.c2
-rw-r--r--tools/xc/lib/xc_linux_restore.c83
-rw-r--r--tools/xc/lib/xc_linux_save.c546
-rw-r--r--tools/xc/lib/xc_private.c80
-rw-r--r--tools/xc/lib/xc_private.h4
-rw-r--r--tools/xc/py/Xc.c16
-rw-r--r--tools/xend/lib/utils.c5
10 files changed, 517 insertions, 248 deletions
diff --git a/tools/examples/xc_dom_control.py b/tools/examples/xc_dom_control.py
index 60bd65d0ac..877afa53d1 100755
--- a/tools/examples/xc_dom_control.py
+++ b/tools/examples/xc_dom_control.py
@@ -139,10 +139,12 @@ elif cmd == 'suspend':
xc.domain_stop( dom=dom )
while not xc.domain_getinfo( first_dom=dom, max_doms=1 )[0]['stopped']:
- time.sleep(0.1);
+ print "Sleep..."
+ time.sleep(0.001);
rc = xc.linux_save( dom=dom, state_file=file, progress=1)
if rc == 0 : xc.domain_destroy( dom=dom, force=1 )
+ else: xc.domain_start( dom=dom ) # sensible for production use
elif cmd == 'cpu_bvtslice':
if len(sys.argv) < 3:
diff --git a/tools/xc/lib/xc.h b/tools/xc/lib/xc.h
index a0205bcc6b..2132d6e7c1 100644
--- a/tools/xc/lib/xc.h
+++ b/tools/xc/lib/xc.h
@@ -57,7 +57,10 @@ int xc_domain_getinfo(int xc_handle,
int xc_shadow_control(int xc_handle,
u64 domid,
- unsigned int sop);
+ unsigned int sop,
+ unsigned long *dirty_bitmap,
+ unsigned long pages);
+
#define XCFLAGS_VERBOSE 1
#define XCFLAGS_LIVE 2
@@ -247,11 +250,6 @@ int xc_readconsolering(int xc_handle,
int xc_physinfo(int xc_handle,
xc_physinfo_t *info);
-
-int xc_shadow_control(int xc_handle,
- u64 domid,
- unsigned int sop);
-
int xc_domain_setname(int xc_handle,
u64 domid,
char *name);
diff --git a/tools/xc/lib/xc_domain.c b/tools/xc/lib/xc_domain.c
index c26a3f87c3..6d0dd6d0f3 100644
--- a/tools/xc/lib/xc_domain.c
+++ b/tools/xc/lib/xc_domain.c
@@ -109,13 +109,24 @@ int xc_domain_getinfo(int xc_handle,
int xc_shadow_control(int xc_handle,
u64 domid,
- unsigned int sop)
+ unsigned int sop,
+ unsigned long *dirty_bitmap,
+ unsigned long pages)
{
+ int rc;
dom0_op_t op;
op.cmd = DOM0_SHADOW_CONTROL;
op.u.shadow_control.domain = (domid_t)domid;
op.u.shadow_control.op = sop;
- return do_dom0_op(xc_handle, &op);
+ op.u.shadow_control.dirty_bitmap = dirty_bitmap;
+ op.u.shadow_control.pages = pages;
+
+ rc = do_dom0_op(xc_handle, &op);
+
+ if ( rc == 0 )
+ return op.u.shadow_control.pages;
+ else
+ return rc;
}
int xc_domain_setname(int xc_handle,
diff --git a/tools/xc/lib/xc_linux_build.c b/tools/xc/lib/xc_linux_build.c
index 83debd904d..98a3fb6a60 100644
--- a/tools/xc/lib/xc_linux_build.c
+++ b/tools/xc/lib/xc_linux_build.c
@@ -284,7 +284,7 @@ static int setup_guestos(int xc_handle,
/* shared_info page starts its life empty. */
shared_info = map_pfn_writeable(pm_handle, shared_info_frame);
- memset(shared_info, 0, PAGE_SIZE);
+ memset(shared_info, 0, sizeof(shared_info_t));
/* Mask all upcalls... */
for ( i = 0; i < MAX_VIRT_CPUS; i++ )
shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
diff --git a/tools/xc/lib/xc_linux_restore.c b/tools/xc/lib/xc_linux_restore.c
index 65ba875aef..1bbc575889 100644
--- a/tools/xc/lib/xc_linux_restore.c
+++ b/tools/xc/lib/xc_linux_restore.c
@@ -12,6 +12,15 @@
#define MAX_BATCH_SIZE 1024
+#define DEBUG 0
+
+#if DEBUG
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+
/* This may allow us to create a 'quiet' command-line option, if necessary. */
#define verbose_printf(_f, _a...) \
do { \
@@ -116,8 +125,6 @@ int xc_linux_restore(int xc_handle,
if ( (*readerfn)(readerst, name, sizeof(name)) ||
(*readerfn)(readerst, &nr_pfns, sizeof(unsigned long)) ||
- (*readerfn)(readerst, &ctxt, sizeof(ctxt)) ||
- (*readerfn)(readerst, shared_info, PAGE_SIZE) ||
(*readerfn)(readerst, pfn_to_mfn_frame_list, PAGE_SIZE) )
{
ERROR("Error when reading from state file");
@@ -181,10 +188,7 @@ int xc_linux_restore(int xc_handle,
if ( (pm_handle = init_pfn_mapper((domid_t)dom)) < 0 )
goto out;
- /* Copy saved contents of shared-info page. No checking needed. */
- ppage = map_pfn_writeable(pm_handle, shared_info_frame);
- memcpy(ppage, shared_info, PAGE_SIZE);
- unmap_pfn(pm_handle, ppage);
+
/* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */
if ( get_pfn_list(xc_handle, dom, pfn_to_mfn_table, nr_pfns) != nr_pfns )
@@ -235,9 +239,16 @@ int xc_linux_restore(int xc_handle,
goto out;
}
- //printf("batch=%d\n",j);
+ DPRINTF("batch %d\n",j);
- if(j==0) break; // our work here is done
+ if (j == 0)
+ break; // our work here is done
+
+ if( j > MAX_BATCH_SIZE )
+ {
+ ERROR("Max batch size exceeded. Giving up.");
+ goto out;
+ }
if ( (*readerfn)(readerst, region_pfn_type, j*sizeof(unsigned long)) )
{
@@ -247,6 +258,9 @@ int xc_linux_restore(int xc_handle,
for(i=0;i<j;i++)
{
+ if ((region_pfn_type[i]>>29) == 7)
+ continue;
+
pfn = region_pfn_type[i] & ~PGT_type_mask;
mfn = pfn_to_mfn_table[pfn];
@@ -266,8 +280,9 @@ int xc_linux_restore(int xc_handle,
unsigned long *ppage;
pfn = region_pfn_type[i] & ~PGT_type_mask;
-
-//if(pfn_type[i])printf("^pfn=%d %08lx\n",pfn,pfn_type[i]);
+
+ if ((region_pfn_type[i]>>29) == 7)
+ continue;
if (pfn>nr_pfns)
{
@@ -281,8 +296,6 @@ int xc_linux_restore(int xc_handle,
mfn = pfn_to_mfn_table[pfn];
-//if(region_pfn_type[i])printf("i=%d pfn=%d mfn=%d type=%lx\n",i,pfn,mfn,region_pfn_type[i]);
-
ppage = (unsigned long*) (region_base + i*PAGE_SIZE);
if ( (*readerfn)(readerst, ppage, PAGE_SIZE) )
@@ -304,21 +317,12 @@ int xc_linux_restore(int xc_handle,
{
xpfn = ppage[k] >> PAGE_SHIFT;
-/*printf("L1 i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
- i,pfn,mfn,k,ppage[k],xpfn);*/
-
if ( xpfn >= nr_pfns )
{
- ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=%d nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns);
+ ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=0x%x nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns);
goto out;
}
-#if 0
- if ( (region_pfn_type[xpfn] != NONE) && (ppage[k] & _PAGE_RW) )
- {
- ERROR("Write access requested for a restricted frame");
- goto out;
- }
-#endif
+
ppage[k] &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PAT);
ppage[k] |= pfn_to_mfn_table[xpfn] << PAGE_SHIFT;
}
@@ -334,9 +338,6 @@ int xc_linux_restore(int xc_handle,
{
xpfn = ppage[k] >> PAGE_SHIFT;
-/*printf("L2 i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
- i,pfn,mfn,k,ppage[k],xpfn);*/
-
if ( xpfn >= nr_pfns )
{
ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=%d nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns);
@@ -360,18 +361,21 @@ int xc_linux_restore(int xc_handle,
default:
ERROR("Bogus page type %x page table is out of range. i=%d nr_pfns=%d",region_pfn_type[i],i,nr_pfns);
goto out;
- }
+
+ } // end of page type switch statement
if ( add_mmu_update(xc_handle, mmu,
(mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) )
goto out;
- }
+ } // end of 'batch' for loop
n+=j; // crude stats
}
+ DPRINTF("Received all pages\n");
+
mfn_mapper_close( region_mapper );
/*
@@ -386,7 +390,10 @@ int xc_linux_restore(int xc_handle,
(pfn_to_mfn_table[i]<<PAGE_SHIFT) |
MMU_EXTENDED_COMMAND,
MMUEXT_PIN_L1_TABLE) )
+ {
+ printf("ERR pin L1 pfn=%lx mfn=%lx\n");
goto out;
+ }
}
else if ( pfn_type[i] == L2TAB )
{
@@ -394,7 +401,10 @@ int xc_linux_restore(int xc_handle,
(pfn_to_mfn_table[i]<<PAGE_SHIFT) |
MMU_EXTENDED_COMMAND,
MMUEXT_PIN_L2_TABLE) )
+ {
+ printf("ERR pin L2 pfn=%lx mfn=%lx\n");
goto out;
+ }
}
}
@@ -403,6 +413,15 @@ int xc_linux_restore(int xc_handle,
verbose_printf("\b\b\b\b100%%\nMemory reloaded.\n");
+
+ if ( (*readerfn)(readerst, &ctxt, sizeof(ctxt)) ||
+ (*readerfn)(readerst, shared_info, PAGE_SIZE) )
+ {
+ ERROR("Error when reading from state file");
+ goto out;
+ }
+
+
/* Uncanonicalise the suspend-record frame number and poke resume rec. */
pfn = ctxt.cpu_ctxt.esi;
if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) )
@@ -445,9 +464,13 @@ int xc_linux_restore(int xc_handle,
}
ctxt.pt_base = pfn_to_mfn_table[pfn] << PAGE_SHIFT;
- /* Uncanonicalise the pfn-to-mfn table frame-number list. */
+ /* Copy saved contents of shared-info page. No checking needed. */
+ ppage = map_pfn_writeable(pm_handle, shared_info_frame);
+ memcpy(ppage, shared_info, sizeof(shared_info_t));
+ unmap_pfn(pm_handle, ppage);
+ /* Uncanonicalise the pfn-to-mfn table frame-number list. */
if ( (mapper_handle1 = mfn_mapper_init(xc_handle, dom,
1024*1024, PROT_WRITE ))
== NULL )
@@ -520,6 +543,8 @@ int xc_linux_restore(int xc_handle,
op.u.builddomain.ctxt = &ctxt;
rc = do_dom0_op(xc_handle, &op);
+ DPRINTF("Everything OK!\n");
+
out:
if ( mmu != NULL )
free(mmu);
diff --git a/tools/xc/lib/xc_linux_save.c b/tools/xc/lib/xc_linux_save.c
index 02e3ffc352..37dd7c6fce 100644
--- a/tools/xc/lib/xc_linux_save.c
+++ b/tools/xc/lib/xc_linux_save.c
@@ -11,6 +11,14 @@
#define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */
+#define DEBUG 0
+
+#if DEBUG
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
/* This may allow us to create a 'quiet' command-line option, if necessary. */
#define verbose_printf(_f, _a...) \
do { \
@@ -22,11 +30,17 @@
/*
* Returns TRUE if the given machine frame number has a unique mapping
* in the guest's pseudophysical map.
+ * 0x80000000-3 mark the shared_info, and blk/net rings
*/
#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
- (((_mfn) < (1024*1024)) && \
- (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn)))
-
+ (((_mfn) < (1024*1024)) && \
+ ( ( (live_mfn_to_pfn_table[_mfn] < nr_pfns) && \
+ (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn)) ) || \
+\
+ (live_mfn_to_pfn_table[_mfn] >= 0x80000000 && \
+ live_mfn_to_pfn_table[_mfn] <= 0x80000003 ) || \
+ live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == 0x80000004 ) )
+
/* Returns TRUE if MFN is successfully converted to a PFN. */
#define translate_mfn_to_pfn(_pmfn) \
({ \
@@ -40,6 +54,14 @@
})
+/* test_bit */
+inline int test_bit ( int nr, volatile void * addr)
+{
+ return ( ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >>
+ (nr % (sizeof(unsigned long)*8) ) ) & 1;
+}
+
+
int xc_linux_save(int xc_handle,
u64 domid,
unsigned int flags,
@@ -47,14 +69,11 @@ int xc_linux_save(int xc_handle,
void *writerst )
{
dom0_op_t op;
- int rc = 1, i, j, k, n;
+ int rc = 1, i, j, k, n, last_iter, iter = 0;
unsigned long mfn;
- unsigned int prev_pc, this_pc;
int verbose = flags & XCFLAGS_VERBOSE;
- //int live = flags & XCFLAGS_LIVE;
-
- /* state of the new MFN mapper */
- mfn_mapper_t *mapper_handle1, *mapper_handle2;
+ int live = flags & XCFLAGS_LIVE;
+ int sent_last_iter, sent_this_iter, max_iters;
/* Remember if we stopped the guest, so we can restart it on exit. */
int we_stopped_it = 0;
@@ -90,8 +109,13 @@ int xc_linux_save(int xc_handle,
unsigned char *region_base;
/* A temporary mapping, and a copy, of the guest's suspend record. */
- suspend_record_t *p_srec, srec;
+ suspend_record_t *p_srec;
+ /* number of pages we're dealing with */
+ unsigned long nr_pfns;
+
+ /* bitmap of pages left to send */
+ unsigned long *to_send;
if ( mlock(&ctxt, sizeof(ctxt) ) )
{
@@ -129,7 +153,8 @@ int xc_linux_save(int xc_handle,
goto out;
}
- sleep(1);
+ usleep(1000); // 1ms
+ printf("Sleep for 1ms\n");
}
/* A cheesy test to see whether the domain contains valid state. */
@@ -139,11 +164,10 @@ int xc_linux_save(int xc_handle,
goto out;
}
-
/* Map the suspend-record MFN to pin it. The page must be owned by
domid for this to succeed. */
p_srec = mfn_mapper_map_single(xc_handle, domid,
- sizeof(srec), PROT_READ,
+ sizeof(*p_srec), PROT_READ,
ctxt.cpu_ctxt.esi );
if (!p_srec)
@@ -152,10 +176,10 @@ int xc_linux_save(int xc_handle,
goto out;
}
- memcpy( &srec, p_srec, sizeof(srec) );
+ nr_pfns = p_srec->nr_pfns;
/* cheesy sanity check */
- if ( srec.nr_pfns > 1024*1024 )
+ if ( nr_pfns > 1024*1024 )
{
ERROR("Invalid state record -- pfn count out of range");
goto out;
@@ -165,55 +189,13 @@ int xc_linux_save(int xc_handle,
live_pfn_to_mfn_frame_list =
mfn_mapper_map_single(xc_handle, domid,
PAGE_SIZE, PROT_READ,
- srec.pfn_to_mfn_frame_list );
+ p_srec->pfn_to_mfn_frame_list );
if (!live_pfn_to_mfn_frame_list)
{
ERROR("Couldn't map pfn_to_mfn_frame_list");
goto out;
}
-
-
- if ( (mapper_handle1 = mfn_mapper_init(xc_handle, domid,
- 1024*1024, PROT_READ ))
- == NULL )
- goto out;
-
- for ( i = 0; i < (srec.nr_pfns+1023)/1024; i++ )
- {
- /* Grab a copy of the pfn-to-mfn table frame list.
- This has the effect of preventing the page from being freed and
- given to another domain. (though the domain is stopped anyway...) */
- mfn_mapper_queue_entry( mapper_handle1, i<<PAGE_SHIFT,
- live_pfn_to_mfn_frame_list[i],
- PAGE_SIZE );
- }
-
- if ( mfn_mapper_flush_queue(mapper_handle1) )
- {
- ERROR("Couldn't map pfn_to_mfn table");
- goto out;
- }
-
- live_pfn_to_mfn_table = mfn_mapper_base( mapper_handle1 );
-
-
-
- /* We want zeroed memory so use calloc rather than malloc. */
- pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
-
- if ( (pfn_type == NULL) )
- {
- errno = ENOMEM;
- goto out;
- }
-
- if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) )
- {
- ERROR("Unable to mlock");
- goto out;
- }
-
/* Track the mfn_to_pfn table down from the domains PT */
{
@@ -233,58 +215,112 @@ int xc_linux_save(int xc_handle,
mfn_to_pfn_table_start_mfn );
}
+ /* Map all the frames of the pfn->mfn table. For migrate to succeed,
+ the guest must not change which frames are used for this purpose.
+ (its not clear why it would want to change them, and we'll be OK
+ from a safety POV anyhow. */
+
+ live_pfn_to_mfn_table = mfn_mapper_map_batch( xc_handle, domid,
+ PROT_READ,
+ live_pfn_to_mfn_frame_list,
+ (nr_pfns+1023)/1024 );
+ if( !live_pfn_to_mfn_table )
+ {
+ PERROR("Couldn't map pfn_to_mfn table");
+ goto out;
+ }
- /*
- * Quick belt and braces sanity check.
- */
- for ( i = 0; i < srec.nr_pfns; i++ )
+ /* Canonicalise the pfn-to-mfn table frame-number list. */
+ memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
+ for ( i = 0; i < nr_pfns; i += 1024 )
{
- mfn = live_pfn_to_mfn_table[i];
+ if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
+ {
+ ERROR("Frame # in pfn-to-mfn frame list is not in pseudophys");
+ goto out;
+ }
+ }
- if( live_mfn_to_pfn_table[mfn] != i )
- printf("i=%d mfn=%d live_mfn_to_pfn_table=%d\n",
- i,mfn,live_mfn_to_pfn_table[mfn]);
+ /* At this point, we can start the domain again if we're doing a
+ live suspend */
+
+ if( live )
+ {
+ if ( xc_shadow_control( xc_handle, domid,
+ DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
+ NULL, 0 ) < 0 )
+ {
+ ERROR("Couldn't enable shadow mode");
+ goto out;
+ }
+
+ if ( xc_domain_start( xc_handle, domid ) < 0 )
+ {
+ ERROR("Couldn't restart domain");
+ goto out;
+ }
+
+ last_iter = 0;
+ sent_last_iter = 1<<20; // 4GB's worth of pages
+ max_iters = 9; // limit us to 10 time round loop
}
+ else
+ last_iter = 1;
- /* Canonicalise the suspend-record frame number. */
- if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) )
+ /* Setup to_send bitmap */
{
- ERROR("State record is not in range of pseudophys map");
- goto out;
+ int sz = (nr_pfns/8) + 8; // includes slop at end of array
+
+ to_send = malloc( sz );
+
+ if (!to_send)
+ {
+ ERROR("Couldn't allocate to_send array");
+ goto out;
+ }
+ memset( to_send, 0xff, sz );
+
+ if ( mlock( to_send, sz ) )
+ {
+ PERROR("Unable to mlock to_send");
+ return 1;
+ }
}
- /* Canonicalise each GDT frame number. */
- for ( i = 0; i < ctxt.gdt_ents; i += 512 )
+
+ /* We want zeroed memory so use calloc rather than malloc. */
+ pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
+
+ if ( (pfn_type == NULL) )
{
- if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) )
- {
- ERROR("GDT frame is not in range of pseudophys map");
- goto out;
- }
+ errno = ENOMEM;
+ goto out;
}
- /* Canonicalise the page table base pointer. */
- if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) )
+ if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) )
{
- ERROR("PT base is not in range of pseudophys map");
- goto out;
+ ERROR("Unable to mlock");
+ goto out;
}
- ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
- /* Canonicalise the pfn-to-mfn table frame-number list. */
- memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
- for ( i = 0; i < srec.nr_pfns; i += 1024 )
+
+ /*
+ * Quick belt and braces sanity check.
+ */
+#if DEBUG
+ for ( i = 0; i < nr_pfns; i++ )
{
- if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
- {
- ERROR("Frame # in pfn-to-mfn frame list is not in pseudophys");
- goto out;
- }
+ mfn = live_pfn_to_mfn_table[i];
+
+ if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0x80000004) )
+ printf("i=0x%x mfn=%x live_mfn_to_pfn_table=%x\n",
+ i,mfn,live_mfn_to_pfn_table[mfn]);
}
+#endif
- /* Start writing out the saved-domain record. */
+ /* Map the shared info frame */
live_shinfo = mfn_mapper_map_single(xc_handle, domid,
PAGE_SIZE, PROT_READ,
shared_info_frame);
@@ -295,163 +331,271 @@ int xc_linux_save(int xc_handle,
goto out;
}
+ /* Start writing out the saved-domain record. */
+
if ( (*writerfn)(writerst, "LinuxGuestRecord", 16) ||
(*writerfn)(writerst, name, sizeof(name)) ||
- (*writerfn)(writerst, &srec.nr_pfns, sizeof(unsigned long)) ||
- (*writerfn)(writerst, &ctxt, sizeof(ctxt)) ||
- (*writerfn)(writerst, live_shinfo, PAGE_SIZE) ||
+ (*writerfn)(writerst, &nr_pfns, sizeof(unsigned long)) ||
(*writerfn)(writerst, pfn_to_mfn_frame_list, PAGE_SIZE) )
{
ERROR("Error when writing to state file (1)");
goto out;
}
- munmap(live_shinfo, PAGE_SIZE);
-
- verbose_printf("Saving memory pages: 0%%");
-
- if ( (mapper_handle2 = mfn_mapper_init(xc_handle, domid,
- BATCH_SIZE*4096, PROT_READ ))
- == NULL )
- goto out;
-
- region_base = mfn_mapper_base( mapper_handle2 );
/* Now write out each data page, canonicalising page tables as we go... */
- prev_pc = 0;
- for ( n = 0; n < srec.nr_pfns; )
- {
- this_pc = (n * 100) / srec.nr_pfns;
- if ( (this_pc - prev_pc) >= 5 )
- {
- verbose_printf("\b\b\b\b%3d%%", this_pc);
- prev_pc = this_pc;
- }
-
- for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
- {
- pfn_type[j] = live_pfn_to_mfn_table[i];
- }
+ while(1)
+ {
+ unsigned int prev_pc, batch, sent_this_iter;
- for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
- {
- /* queue up mappings for all of the pages in this batch */
+ iter++;
-//printf("region n=%d j=%d i=%d mfn=%d\n",n,j,i,live_pfn_to_mfn_table[i]);
- mfn_mapper_queue_entry( mapper_handle2, j<<PAGE_SHIFT,
- live_pfn_to_mfn_table[i],
- PAGE_SIZE );
- }
+ sent_this_iter = 0;
+ prev_pc = 0;
+ verbose_printf("Saving memory pages: iter %d 0%%", iter);
- if( mfn_mapper_flush_queue(mapper_handle2) )
- {
- ERROR("Couldn't map page region");
- goto out;
- }
-
- if ( get_pfn_type_batch(xc_handle, domid, j, pfn_type) )
- {
- ERROR("get_pfn_type_batch failed");
- goto out;
- }
-
- for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
+ n=0;
+ while( n < nr_pfns )
{
- if((pfn_type[j]>>29) == 7)
+ unsigned int this_pc = (n * 100) / nr_pfns;
+ if ( (this_pc - prev_pc) >= 5 )
{
- ERROR("bogus page");
- goto out;
+ verbose_printf("\b\b\b\b%3d%%", this_pc);
+ prev_pc = this_pc;
}
- /* canonicalise mfn->pfn */
- pfn_type[j] = (pfn_type[j] & PGT_type_mask) |
- live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask];
-
-/* if(pfn_type[j]>>29)
- printf("i=%d type=%d\n",i,pfn_type[i]); */
- }
-
- if ( (*writerfn)(writerst, &j, sizeof(int) ) )
- {
- ERROR("Error when writing to state file (2)");
- goto out;
- }
+ /* load pfn_type[] with the mfn of all the pages we're doing in
+ this batch. */
- if ( (*writerfn)(writerst, pfn_type, sizeof(unsigned long)*j ) )
- {
- ERROR("Error when writing to state file (3)");
- goto out;
- }
+ for( batch = 0; batch < BATCH_SIZE && n < nr_pfns ; n++ )
+ {
+ if ( !test_bit(n, to_send ) ) continue;
+ pfn_type[batch] = live_pfn_to_mfn_table[n];
- for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
- {
- /* write out pages in batch */
+ if( pfn_type[batch] == 0x80000004 )
+ {
+ DPRINTF("Skip netbuf pfn %lx. mfn %lx\n",n,pfn_type[batch]);
+ continue;
+ }
- if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) ||
- ((pfn_type[j] & PGT_type_mask) == L2TAB) )
+ if(iter>1) { DPRINTF("pfn=%x mfn=%x\n",n,pfn_type[batch]); }
+
+ batch++;
+ }
+
+ DPRINTF("batch %d:%d (n=%d)\n",iter,batch,n);
+
+ if(batch == 0) goto skip; // vanishingly unlikely...
+
+ if ( (region_base = mfn_mapper_map_batch( xc_handle, domid,
+ PROT_READ,
+ pfn_type,
+ batch )) == 0)
+ {
+ PERROR("map batch failed");
+ goto out;
+ }
+
+ if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) )
{
+ ERROR("get_pfn_type_batch failed");
+ goto out;
+ }
+
+ for( j = 0; j < batch; j++ )
+ {
+ if((pfn_type[j]>>29) == 7)
+ {
+ DPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
+ continue;
+ }
- memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
+ /* canonicalise mfn->pfn */
+ pfn_type[j] = (pfn_type[j] & PGT_type_mask) |
+ live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask];
+ }
+
+
+ if ( (*writerfn)(writerst, &batch, sizeof(int) ) )
+ {
+ ERROR("Error when writing to state file (2)");
+ goto out;
+ }
- for ( k = 0;
- k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ?
- (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024);
- k++ )
+ if ( (*writerfn)(writerst, pfn_type, sizeof(unsigned long)*j ) )
+ {
+ ERROR("Error when writing to state file (3)");
+ goto out;
+ }
+
+ /* entering this loop, pfn_type is now in pfns (Not mfns) */
+ for( j = 0; j < batch; j++ )
+ {
+ /* write out pages in batch */
+
+ if((pfn_type[j]>>29) == 7)
{
- if ( !(page[k] & _PAGE_PRESENT) ) continue;
- mfn = page[k] >> PAGE_SHIFT;
-
- if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
+ DPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
+ continue;
+ }
+
+ if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) ||
+ ((pfn_type[j] & PGT_type_mask) == L2TAB) )
+ {
+
+ memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
+
+ for ( k = 0;
+ k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ?
+ (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024);
+ k++ )
+ {
+ unsigned long pfn;
+
+ if ( !(page[k] & _PAGE_PRESENT) ) continue;
+ mfn = page[k] >> PAGE_SHIFT;
+ pfn = live_mfn_to_pfn_table[mfn];
+
+ if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
+ {
+ // I don't think this should ever happen
+
+ printf("FNI %d : [%08lx,%d] pte=%08lx, mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
+ j, pfn_type[j], k,
+ page[k], mfn, live_mfn_to_pfn_table[mfn],
+ (live_mfn_to_pfn_table[mfn]<nr_pfns)?
+ live_pfn_to_mfn_table[live_mfn_to_pfn_table[mfn]]: 0xdeadbeef);
+
+ pfn = 0; // be suspicious, very suspicious
+
+ //goto out; // let's try our luck
+
+
+ }
+ page[k] &= PAGE_SIZE - 1;
+ page[k] |= pfn << PAGE_SHIFT;
+
+#if DEBUG
+ printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
+ pfn_type[j]>>29,
+ j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
+#endif
+
+ } /* end of page table rewrite for loop */
+
+ if ( (*writerfn)(writerst, page, PAGE_SIZE) )
+ {
+ ERROR("Error when writing to state file (4)");
+ goto out;
+ }
+
+ } /* end of it's a PT page */
+ else
+ { /* normal page */
+ if ( (*writerfn)(writerst, region_base + (PAGE_SIZE*j), PAGE_SIZE) )
{
- ERROR("Frame number in pagetable page is invalid");
+ ERROR("Error when writing to state file (5)");
goto out;
}
- page[k] &= PAGE_SIZE - 1;
- page[k] |= live_mfn_to_pfn_table[mfn] << PAGE_SHIFT;
+ }
+ } /* end of the write out for this batch */
+
+ sent_this_iter += batch;
- /*
- printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
- pfn_type[j]>>29,
- j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
- */
+ } /* end of this while loop for this iteration */
- }
+ munmap(region_base, batch*PAGE_SIZE);
- if ( (*writerfn)(writerst, page, PAGE_SIZE) )
- {
- ERROR("Error when writing to state file (4)");
- goto out;
- }
+ skip:
+
+ verbose_printf("\b\b\b\b100%% (%d pages)\n", sent_this_iter );
+
+ if ( last_iter )
+ break;
+ if ( live )
+ {
+ if ( ( sent_this_iter > (sent_last_iter * 0.95) ) ||
+ (iter >= max_iters) || (sent_this_iter < 10) )
+ {
+ printf("Start last iteration\n");
+ last_iter = 1;
- }
- else
+ xc_domain_stop_sync( xc_handle, domid );
+
+ }
+
+ if ( xc_shadow_control( xc_handle, domid,
+ DOM0_SHADOW_CONTROL_OP_CLEAN,
+ to_send, nr_pfns ) != nr_pfns )
{
- if ( (*writerfn)(writerst, region_base + (PAGE_SIZE*j), PAGE_SIZE) )
- {
- ERROR("Error when writing to state file (5)");
- goto out;
- }
+ ERROR("Error flushing shadow PT");
+ goto out;
}
+
+ sent_last_iter = sent_this_iter;
}
-
- n+=j; /* i is the master loop counter */
- }
- verbose_printf("\b\b\b\b100%%\nMemory saved.\n");
+
+ } /* end of while 1 */
+
+ DPRINTF("All memory is saved\n");
/* Success! */
rc = 0;
-
+
/* Zero terminate */
if ( (*writerfn)(writerst, &rc, sizeof(int)) )
{
ERROR("Error when writing to state file (6)");
goto out;
}
-
+
+ /* Get the final execution context */
+ op.cmd = DOM0_GETDOMAININFO;
+ op.u.getdomaininfo.domain = (domid_t)domid;
+ op.u.getdomaininfo.ctxt = &ctxt;
+ if ( (do_dom0_op(xc_handle, &op) < 0) ||
+ ((u64)op.u.getdomaininfo.domain != domid) )
+ {
+ PERROR("Could not get info on domain");
+ goto out;
+ }
+
+ /* Canonicalise the suspend-record frame number. */
+ if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) )
+ {
+ ERROR("State record is not in range of pseudophys map");
+ goto out;
+ }
+
+ /* Canonicalise each GDT frame number. */
+ for ( i = 0; i < ctxt.gdt_ents; i += 512 )
+ {
+ if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) )
+ {
+ ERROR("GDT frame is not in range of pseudophys map");
+ goto out;
+ }
+ }
+
+ /* Canonicalise the page table base pointer. */
+ if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) )
+ {
+ ERROR("PT base is not in range of pseudophys map");
+ goto out;
+ }
+ ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
+
+ if ( (*writerfn)(writerst, &ctxt, sizeof(ctxt)) ||
+ (*writerfn)(writerst, live_shinfo, PAGE_SIZE) )
+ {
+ ERROR("Error when writing to state file (1)");
+ goto out;
+ }
+ munmap(live_shinfo, PAGE_SIZE);
out:
/* Restart the domain if we had to stop it to save its state. */
diff --git a/tools/xc/lib/xc_private.c b/tools/xc/lib/xc_private.c
index d137176ca8..41eb2e744a 100644
--- a/tools/xc/lib/xc_private.c
+++ b/tools/xc/lib/xc_private.c
@@ -47,6 +47,31 @@ void unmap_pfn(int pm_handle, void *vaddr)
/*******************/
+void * mfn_mapper_map_batch(int xc_handle, domid_t dom, int prot,
+ unsigned long *arr, int num )
+{
+ privcmd_mmapbatch_t ioctlx;
+ void *addr;
+ addr = mmap( NULL, num*PAGE_SIZE, prot, MAP_SHARED, xc_handle, 0 );
+ if (addr)
+ {
+ ioctlx.num=num;
+ ioctlx.dom=dom;
+ ioctlx.addr=(unsigned long)addr;
+ ioctlx.arr=arr;
+ if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx ) <0 )
+ {
+ perror("XXXXXXXX");
+ munmap(addr, num*PAGE_SIZE);
+ return 0;
+ }
+ }
+ return addr;
+
+}
+
+/*******************/
+
void * mfn_mapper_map_single(int xc_handle, domid_t dom,
int size, int prot,
unsigned long mfn )
@@ -64,7 +89,10 @@ void * mfn_mapper_map_single(int xc_handle, domid_t dom,
entry.mfn=mfn;
entry.npages=(size+PAGE_SIZE-1)>>PAGE_SHIFT;
if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx ) <0 )
+ {
+ munmap(addr, size);
return 0;
+ }
}
return addr;
}
@@ -295,7 +323,7 @@ static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
hypercall.op = __HYPERVISOR_mmu_update;
hypercall.arg[0] = (unsigned long)mmu->updates;
- hypercall.arg[1] = (unsigned long)mmu->idx;
+ hypercall.arg[1] = (unsigned long)&(mmu->idx);
if ( mlock(mmu->updates, sizeof(mmu->updates)) != 0 )
{
@@ -342,3 +370,53 @@ int finish_mmu_updates(int xc_handle, mmu_t *mmu)
{
return flush_mmu_updates(xc_handle, mmu);
}
+
+
+/***********************************************************/
+
+/* this function is a hack until we get proper synchronous domain stop */
+
+int xc_domain_stop_sync( int xc_handle, domid_t domid )
+{
+ dom0_op_t op;
+ int i;
+
+
+ op.cmd = DOM0_STOPDOMAIN;
+ op.u.stopdomain.domain = (domid_t)domid;
+ if ( do_dom0_op(xc_handle, &op) != 0 )
+ {
+ PERROR("Stopping target domain failed");
+ goto out;
+ }
+
+ usleep(100); // 100us
+
+ for(i=0;;i++)
+ {
+ if (i>0)
+ if (i==1) printf("Sleep.");
+ else printf(".");
+
+ op.cmd = DOM0_GETDOMAININFO;
+ op.u.getdomaininfo.domain = (domid_t)domid;
+ op.u.getdomaininfo.ctxt = NULL;
+ if ( (do_dom0_op(xc_handle, &op) < 0) ||
+ ((u64)op.u.getdomaininfo.domain != domid) )
+ {
+ PERROR("Could not get info on domain");
+ goto out;
+ }
+
+ if ( op.u.getdomaininfo.state == DOMSTATE_STOPPED )
+ {
+ printf("\nDomain %lld stopped\n",domid);
+ return 0;
+ }
+
+ usleep(1000);
+ }
+
+out:
+ return -1;
+}
diff --git a/tools/xc/lib/xc_private.h b/tools/xc/lib/xc_private.h
index 3a2e3ea9f1..e3eff85e59 100644
--- a/tools/xc/lib/xc_private.h
+++ b/tools/xc/lib/xc_private.h
@@ -232,6 +232,9 @@ typedef struct mfn_mapper {
void * mfn_mapper_map_single(int xc_handle, domid_t dom, int size, int prot,
unsigned long mfn );
+void * mfn_mapper_map_batch(int xc_handle, domid_t dom, int prot,
+ unsigned long *arr, int num );
+
mfn_mapper_t * mfn_mapper_init(int xc_handle, domid_t dom, int size, int prot);
void * mfn_mapper_base(mfn_mapper_t *t);
@@ -245,5 +248,6 @@ void * mfn_mapper_queue_entry(mfn_mapper_t *t, int offset,
/*********************/
+int xc_domain_stop_sync( int xc_handle, domid_t dom );
#endif /* __XC_PRIVATE_H__ */
diff --git a/tools/xc/py/Xc.c b/tools/xc/py/Xc.c
index 974ad994b7..b2ae143eda 100644
--- a/tools/xc/py/Xc.c
+++ b/tools/xc/py/Xc.c
@@ -191,17 +191,17 @@ static PyObject *pyxc_linux_save(PyObject *self,
u64 dom;
char *state_file;
- int progress = 1;
+ int progress = 1, live = -1;
unsigned int flags = 0;
- static char *kwd_list[] = { "dom", "state_file", "progress", NULL };
+ static char *kwd_list[] = { "dom", "state_file", "progress", "live", NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Ls|i", kwd_list,
- &dom, &state_file, &progress) )
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Ls|ii", kwd_list,
+ &dom, &state_file, &progress, &live) )
return NULL;
- if ( progress )
- flags |= XCFLAGS_VERBOSE;
+ if (progress) flags |= XCFLAGS_VERBOSE;
+ if (live == 1) flags |= XCFLAGS_LIVE;
if ( strncmp(state_file,"tcp:", strlen("tcp:")) == 0 )
{
@@ -226,6 +226,8 @@ static PyObject *pyxc_linux_save(PyObject *self,
return 0;
}
+ if (live == -1) flags |= XCFLAGS_LIVE; // default to live for tcp
+
strncpy( server, state_file+strlen("tcp://"), max_namelen);
server[max_namelen-1]='\0';
if ( (port_s = strchr(server,':')) != NULL )
@@ -1270,7 +1272,7 @@ static PyObject *pyxc_shadow_control(PyObject *self,
&dom, &op) )
return NULL;
- if ( xc_shadow_control(xc->xc_handle, dom, op) != 0 )
+ if ( xc_shadow_control(xc->xc_handle, dom, op, NULL, 0) < 0 )
return PyErr_SetFromErrno(xc_error);
Py_INCREF(zero);
diff --git a/tools/xend/lib/utils.c b/tools/xend/lib/utils.c
index 297976e9be..441b62f153 100644
--- a/tools/xend/lib/utils.c
+++ b/tools/xend/lib/utils.c
@@ -723,6 +723,11 @@ static PyObject *xu_port_new(PyObject *self, PyObject *args)
goto fail4;
}
+ xup->interface->tx_resp_prod = 0;
+ xup->interface->rx_req_prod = 0;
+ xup->interface->tx_req_prod = 0;
+ xup->interface->rx_resp_prod = 0;
+
xup->tx_req_cons = 0;
xup->tx_resp_prod = 0;
xup->rx_req_prod = 0;