aboutsummaryrefslogtreecommitdiffstats
path: root/tools/xc/lib/xc_linux_save.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/xc/lib/xc_linux_save.c')
-rw-r--r--tools/xc/lib/xc_linux_save.c392
1 files changed, 246 insertions, 146 deletions
diff --git a/tools/xc/lib/xc_linux_save.c b/tools/xc/lib/xc_linux_save.c
index dc759f546c..88ed9e15d7 100644
--- a/tools/xc/lib/xc_linux_save.c
+++ b/tools/xc/lib/xc_linux_save.c
@@ -10,6 +10,8 @@
#include <asm-xen/suspend.h>
#include <zlib.h>
+#define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */
+
/* This may allow us to create a 'quiet' command-line option, if necessary. */
#define verbose_printf(_f, _a...) \
do { \
@@ -24,7 +26,7 @@
*/
#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
(((_mfn) < (1024*1024)) && \
- (pfn_to_mfn_table[mfn_to_pfn_table[_mfn]] == (_mfn)))
+ (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn)))
/* Returns TRUE if MFN is successfully converted to a PFN. */
#define translate_mfn_to_pfn(_pmfn) \
@@ -34,37 +36,11 @@
if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \
_res = 0; \
else \
- *(_pmfn) = mfn_to_pfn_table[mfn]; \
+ *(_pmfn) = live_mfn_to_pfn_table[mfn]; \
_res; \
})
-static int check_pfn_ownership(int xc_handle,
- unsigned long mfn,
- u64 dom)
-{
- dom0_op_t op;
- op.cmd = DOM0_GETPAGEFRAMEINFO;
- op.u.getpageframeinfo.pfn = mfn;
- op.u.getpageframeinfo.domain = (domid_t)dom;
- return (do_dom0_op(xc_handle, &op) >= 0);
-}
-#define GETPFN_ERR (~0U)
-static unsigned int get_pfn_type(int xc_handle,
- unsigned long mfn,
- u64 dom)
-{
- dom0_op_t op;
- op.cmd = DOM0_GETPAGEFRAMEINFO;
- op.u.getpageframeinfo.pfn = mfn;
- op.u.getpageframeinfo.domain = (domid_t)dom;
- if ( do_dom0_op(xc_handle, &op) < 0 )
- {
- PERROR("Unexpected failure when getting page frame info!");
- return GETPFN_ERR;
- }
- return op.u.getpageframeinfo.type;
-}
static int checked_write(gzFile fd, void *buf, size_t count)
{
@@ -80,10 +56,13 @@ int xc_linux_save(int xc_handle,
int verbose)
{
dom0_op_t op;
- int rc = 1, i, j;
+ int rc = 1, i, j, k, n;
unsigned long mfn;
unsigned int prev_pc, this_pc;
+ /* state of the new MFN mapper */
+ mfn_mapper_t *mapper_handle1, *mapper_handle2;
+
/* Remember if we stopped the guest, so we can restart it on exit. */
int we_stopped_it = 0;
@@ -100,18 +79,23 @@ int xc_linux_save(int xc_handle,
unsigned long *pfn_type = NULL;
/* A temporary mapping, and a copy, of one frame of guest memory. */
- unsigned long *ppage, page[1024];
+ unsigned long page[1024];
- /* A temporary mapping, and a copy, of the pfn-to-mfn table frame list. */
- unsigned long *p_pfn_to_mfn_frame_list, pfn_to_mfn_frame_list[1024];
- /* A temporary mapping of one frame in the above list. */
- unsigned long *pfn_to_mfn_frame;
+ /* A copy of the pfn-to-mfn table frame list. */
+ unsigned long *live_pfn_to_mfn_frame_list;
+ unsigned long pfn_to_mfn_frame_list[1024];
- /* A table mapping each PFN to its current MFN. */
- unsigned long *pfn_to_mfn_table = NULL;
- /* A table mapping each current MFN to its canonical PFN. */
- unsigned long *mfn_to_pfn_table = NULL;
+ /* Live mapping of the table mapping each PFN to its current MFN. */
+ unsigned long *live_pfn_to_mfn_table = NULL;
+ /* Live mapping of system MFN to PFN table. */
+ unsigned long *live_mfn_to_pfn_table = NULL;
+ /* Live mapping of shared info structure */
+ unsigned long *live_shinfo;
+
+ /* base of the region in which domain memory is mapped */
+ unsigned char *region_base;
+
/* A temporary mapping, and a copy, of the guest's suspend record. */
suspend_record_t *p_srec, srec;
@@ -138,11 +122,18 @@ int xc_linux_save(int xc_handle,
return 1;
}
+ if ( mlock(&ctxt, sizeof(ctxt) ) )
+ {
+ PERROR("Unable to mlock ctxt");
+ return 1;
+ }
+
/* Ensure that the domain exists, and that it is stopped. */
for ( ; ; )
{
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)domid;
+ op.u.getdomaininfo.ctxt = &ctxt;
if ( (do_dom0_op(xc_handle, &op) < 0) ||
((u64)op.u.getdomaininfo.domain != domid) )
{
@@ -150,7 +141,6 @@ int xc_linux_save(int xc_handle,
goto out;
}
- memcpy(&ctxt, &op.u.getdomaininfo.ctxt, sizeof(ctxt));
memcpy(name, op.u.getdomaininfo.name, sizeof(name));
shared_info_frame = op.u.getdomaininfo.shared_info_frame;
@@ -178,99 +168,115 @@ int xc_linux_save(int xc_handle,
goto out;
}
- if ( (pm_handle = init_pfn_mapper((domid_t)domid)) < 0 )
- goto out;
- /* Is the suspend-record MFN actually valid for this domain? */
- if ( !check_pfn_ownership(xc_handle, ctxt.cpu_ctxt.esi, domid) )
+ /* Map the suspend-record MFN to pin it. The page must be owned by
+ domid for this to succeed. */
+ p_srec = mfn_mapper_map_single(xc_handle, domid,
+ sizeof(srec), PROT_READ,
+ ctxt.cpu_ctxt.esi );
+
+ if (!p_srec)
{
- ERROR("Invalid state record pointer");
+ ERROR("Couldn't map state record");
goto out;
}
- /* If the suspend-record MFN is okay then grab a copy of it to @srec. */
- p_srec = map_pfn_readonly(pm_handle, ctxt.cpu_ctxt.esi);
- memcpy(&srec, p_srec, sizeof(srec));
- unmap_pfn(pm_handle, p_srec);
+ memcpy( &srec, p_srec, sizeof(srec) );
+ /* cheesy sanity check */
if ( srec.nr_pfns > 1024*1024 )
{
ERROR("Invalid state record -- pfn count out of range");
goto out;
}
- if ( !check_pfn_ownership(xc_handle, srec.pfn_to_mfn_frame_list, domid) )
+ /* the pfn_to_mfn_frame_list fits in a single page */
+ live_pfn_to_mfn_frame_list =
+ mfn_mapper_map_single(xc_handle, domid,
+ PAGE_SIZE, PROT_READ,
+ srec.pfn_to_mfn_frame_list );
+
+ if (!live_pfn_to_mfn_frame_list)
{
- ERROR("Invalid pfn-to-mfn frame list pointer");
+ ERROR("Couldn't map pfn_to_mfn_frame_list");
goto out;
}
+
+
+ if ( (mapper_handle1 = mfn_mapper_init(xc_handle, domid,
+ 1024*1024, PROT_READ ))
+ == NULL )
+ goto out;
+
+ for ( i = 0; i < (srec.nr_pfns+1023)/1024; i++ )
+ {
+ /* Grab a copy of the pfn-to-mfn table frame list.
+ This has the effect of preventing the page from being freed and
+ given to another domain. (though the domain is stopped anyway...) */
+ mfn_mapper_queue_entry( mapper_handle1, i<<PAGE_SHIFT,
+ live_pfn_to_mfn_frame_list[i],
+ PAGE_SIZE );
+ }
+
+ if ( mfn_mapper_flush_queue(mapper_handle1) )
+ {
+ ERROR("Couldn't map pfn_to_mfn table");
+ goto out;
+ }
+
+ live_pfn_to_mfn_table = mfn_mapper_base( mapper_handle1 );
+
- /* Grab a copy of the pfn-to-mfn table frame list. */
- p_pfn_to_mfn_frame_list = map_pfn_readonly(
- pm_handle, srec.pfn_to_mfn_frame_list);
- memcpy(pfn_to_mfn_frame_list, p_pfn_to_mfn_frame_list, PAGE_SIZE);
- unmap_pfn(pm_handle, p_pfn_to_mfn_frame_list);
/* We want zeroed memory so use calloc rather than malloc. */
- mfn_to_pfn_table = calloc(1, 4 * 1024 * 1024);
- pfn_to_mfn_table = calloc(1, 4 * srec.nr_pfns);
- pfn_type = calloc(1, 4 * srec.nr_pfns);
+ pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
- if ( (mfn_to_pfn_table == NULL) ||
- (pfn_to_mfn_table == NULL) ||
- (pfn_type == NULL) )
+ if ( (pfn_type == NULL) )
{
errno = ENOMEM;
goto out;
}
+ if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) )
+ {
+ ERROR("Unable to mlock");
+ goto out;
+ }
- /*
- * Construct the local pfn-to-mfn and mfn-to-pfn tables. On exit from this
- * loop we have each MFN mapped at most once. Note that there may be MFNs
- * that aren't mapped at all: we detect these by MFN_IS_IN_PSEUDOPHYS_MAP.
- */
- pfn_to_mfn_frame = NULL;
- for ( i = 0; i < srec.nr_pfns; i++ )
+
+ /* Track the mfn_to_pfn table down from the domains PT */
{
- /* Each frameful of table frames must be checked & mapped on demand. */
- if ( (i & 1023) == 0 )
- {
- mfn = pfn_to_mfn_frame_list[i/1024];
- if ( !check_pfn_ownership(xc_handle, mfn, domid) )
- {
- ERROR("Invalid frame number if pfn-to-mfn frame list");
- goto out;
- }
- if ( pfn_to_mfn_frame != NULL )
- unmap_pfn(pm_handle, pfn_to_mfn_frame);
- pfn_to_mfn_frame = map_pfn_readonly(pm_handle, mfn);
- }
-
- mfn = pfn_to_mfn_frame[i & 1023];
+ unsigned long *pgd;
+ unsigned long mfn_to_pfn_table_start_mfn;
- if ( !check_pfn_ownership(xc_handle, mfn, domid) )
- {
- ERROR("Invalid frame specified with pfn-to-mfn table");
- goto out;
- }
+ pgd = mfn_mapper_map_single(xc_handle, domid,
+ PAGE_SIZE, PROT_READ,
+ ctxt.pt_base>>PAGE_SHIFT);
- /* Did we map this MFN already? That would be invalid! */
- if ( MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
- {
- ERROR("A machine frame appears twice in pseudophys space");
- goto out;
- }
+ mfn_to_pfn_table_start_mfn =
+ pgd[HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT]>>PAGE_SHIFT;
+
+ live_mfn_to_pfn_table =
+ mfn_mapper_map_single(xc_handle, ~0ULL,
+ PAGE_SIZE*1024, PROT_READ,
+ mfn_to_pfn_table_start_mfn );
+ }
- pfn_to_mfn_table[i] = mfn;
- mfn_to_pfn_table[mfn] = i;
- /* Query page type by MFN, but store it by PFN. */
- if ( (pfn_type[i] = get_pfn_type(xc_handle, mfn, domid)) ==
- GETPFN_ERR )
- goto out;
+ /*
+ * Quick belt and braces sanity check.
+ */
+
+ for ( i = 0; i < srec.nr_pfns; i++ )
+ {
+ mfn = live_pfn_to_mfn_table[i];
+
+ if( live_mfn_to_pfn_table[mfn] != i )
+ printf("i=%d mfn=%d live_mfn_to_pfn_table=%d\n",
+ i,mfn,live_mfn_to_pfn_table[mfn]);
}
+
/* Canonicalise the suspend-record frame number. */
if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) )
{
@@ -294,9 +300,10 @@ int xc_linux_save(int xc_handle,
ERROR("PT base is not in range of pseudophys map");
goto out;
}
- ctxt.pt_base = mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
+ ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
/* Canonicalise the pfn-to-mfn table frame-number list. */
+ memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
for ( i = 0; i < srec.nr_pfns; i += 1024 )
{
if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
@@ -307,63 +314,152 @@ int xc_linux_save(int xc_handle,
}
/* Start writing out the saved-domain record. */
- ppage = map_pfn_readonly(pm_handle, shared_info_frame);
+ live_shinfo = mfn_mapper_map_single(xc_handle, domid,
+ PAGE_SIZE, PROT_READ,
+ shared_info_frame);
+
+ if (!live_shinfo)
+ {
+ ERROR("Couldn't map live_shinfo");
+ goto out;
+ }
+
if ( !checked_write(gfd, "LinuxGuestRecord", 16) ||
!checked_write(gfd, name, sizeof(name)) ||
!checked_write(gfd, &srec.nr_pfns, sizeof(unsigned long)) ||
!checked_write(gfd, &ctxt, sizeof(ctxt)) ||
- !checked_write(gfd, ppage, PAGE_SIZE) ||
- !checked_write(gfd, pfn_to_mfn_frame_list, PAGE_SIZE) ||
- !checked_write(gfd, pfn_type, 4 * srec.nr_pfns) )
+ !checked_write(gfd, live_shinfo, PAGE_SIZE) ||
+ !checked_write(gfd, pfn_to_mfn_frame_list, PAGE_SIZE) )
{
ERROR("Error when writing to state file");
goto out;
}
- unmap_pfn(pm_handle, ppage);
+ munmap(live_shinfo, PAGE_SIZE);
verbose_printf("Saving memory pages: 0%%");
+ if ( (mapper_handle2 = mfn_mapper_init(xc_handle, domid,
+ BATCH_SIZE*4096, PROT_READ ))
+ == NULL )
+ goto out;
+
+ region_base = mfn_mapper_base( mapper_handle2 );
+
/* Now write out each data page, canonicalising page tables as we go... */
prev_pc = 0;
- for ( i = 0; i < srec.nr_pfns; i++ )
+ for ( n = 0; n < srec.nr_pfns; )
{
- this_pc = (i * 100) / srec.nr_pfns;
+ this_pc = (n * 100) / srec.nr_pfns;
if ( (this_pc - prev_pc) >= 5 )
{
verbose_printf("\b\b\b\b%3d%%", this_pc);
prev_pc = this_pc;
}
- mfn = pfn_to_mfn_table[i];
-
- ppage = map_pfn_readonly(pm_handle, mfn);
- memcpy(page, ppage, PAGE_SIZE);
- unmap_pfn(pm_handle, ppage);
-
- if ( (pfn_type[i] == L1TAB) || (pfn_type[i] == L2TAB) )
- {
- for ( j = 0;
- j < ((pfn_type[i] == L2TAB) ?
- (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024);
- j++ )
- {
- if ( !(page[j] & _PAGE_PRESENT) ) continue;
- mfn = page[j] >> PAGE_SHIFT;
- if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
- {
- ERROR("Frame number in pagetable page is invalid");
- goto out;
- }
- page[j] &= PAGE_SIZE - 1;
- page[j] |= mfn_to_pfn_table[mfn] << PAGE_SHIFT;
- }
- }
-
- if ( !checked_write(gfd, page, PAGE_SIZE) )
- {
- ERROR("Error when writing to state file");
- goto out;
- }
+ for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
+ {
+ pfn_type[j] = live_pfn_to_mfn_table[i];
+ }
+
+
+ for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
+ {
+ /* queue up mappings for all of the pages in this batch */
+
+//printf("region n=%d j=%d i=%d mfn=%d\n",n,j,i,live_pfn_to_mfn_table[i]);
+ mfn_mapper_queue_entry( mapper_handle2, j<<PAGE_SHIFT,
+ live_pfn_to_mfn_table[i],
+ PAGE_SIZE );
+ }
+
+ if( mfn_mapper_flush_queue(mapper_handle2) )
+ {
+ ERROR("Couldn't map page region");
+ goto out;
+ }
+
+ if ( get_pfn_type_batch(xc_handle, domid, j, pfn_type) )
+ {
+ ERROR("get_pfn_type_batch failed");
+ goto out;
+ }
+
+ for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
+ {
+ if((pfn_type[j]>>29) == 7)
+ {
+ ERROR("bogus page");
+ goto out;
+ }
+
+ /* canonicalise mfn->pfn */
+ pfn_type[j] = (pfn_type[j] & PGT_type_mask) |
+ live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask];
+
+/* if(pfn_type[j]>>29)
+ printf("i=%d type=%d\n",i,pfn_type[i]); */
+ }
+
+
+ if ( !checked_write(gfd, &j, sizeof(int) ) )
+ {
+ ERROR("Error when writing to state file");
+ goto out;
+ }
+
+ if ( !checked_write(gfd, pfn_type, sizeof(unsigned long)*j ) )
+ {
+ ERROR("Error when writing to state file");
+ goto out;
+ }
+
+
+ for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
+ {
+ /* write out pages in batch */
+
+ if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) ||
+ ((pfn_type[j] & PGT_type_mask) == L2TAB) )
+ {
+
+ memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
+
+ for ( k = 0;
+ k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ?
+ (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024);
+ k++ )
+ {
+ if ( !(page[k] & _PAGE_PRESENT) ) continue;
+ mfn = page[k] >> PAGE_SHIFT;
+ if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
+ {
+ ERROR("Frame number in pagetable page is invalid");
+ goto out;
+ }
+ page[k] &= PAGE_SIZE - 1;
+ page[k] |= live_mfn_to_pfn_table[mfn] << PAGE_SHIFT;
+
+ }
+
+ if ( !checked_write(gfd, page, PAGE_SIZE) )
+ {
+ ERROR("Error when writing to state file");
+ goto out;
+ }
+
+
+ }
+ else
+ {
+ if ( !checked_write(gfd, region_base + (PAGE_SIZE*j), PAGE_SIZE) )
+ {
+ ERROR("Error when writing to state file");
+ goto out;
+ }
+ }
+ }
+
+ n+=j; /* i is the master loop counter */
}
verbose_printf("\b\b\b\b100%%\nMemory saved.\n");
@@ -371,10 +467,19 @@ int xc_linux_save(int xc_handle,
/* Success! */
rc = 0;
- out:
+ /* Zero terminate */
+ if ( !checked_write(gfd, &rc, sizeof(int)) )
+ {
+ ERROR("Error when writing to state file");
+ goto out;
+ }
+
+
+out:
/* Restart the domain if we had to stop it to save its state. */
if ( we_stopped_it )
{
+ printf("Restart domain\n");
op.cmd = DOM0_STARTDOMAIN;
op.u.startdomain.domain = (domid_t)domid;
(void)do_dom0_op(xc_handle, &op);
@@ -382,13 +487,6 @@ int xc_linux_save(int xc_handle,
gzclose(gfd);
- if ( pm_handle >= 0 )
- (void)close_pfn_mapper(pm_handle);
-
- if ( pfn_to_mfn_table != NULL )
- free(pfn_to_mfn_table);
- if ( mfn_to_pfn_table != NULL )
- free(mfn_to_pfn_table);
if ( pfn_type != NULL )
free(pfn_type);
@@ -397,4 +495,6 @@ int xc_linux_save(int xc_handle,
unlink(state_file);
return !!rc;
+
+
}