aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-05-11 14:57:44 +0000
committeriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-05-11 14:57:44 +0000
commit07588dc73f0be59b52b968f7b88e43798bbe5e51 (patch)
tree1efd062adfd783ffd2d9e6a9bbcc7cdc858c9cdf
parenta8e33843ee074cd479e0cb69a7e379587cdca5bb (diff)
downloadxen-07588dc73f0be59b52b968f7b88e43798bbe5e51.tar.gz
xen-07588dc73f0be59b52b968f7b88e43798bbe5e51.tar.bz2
xen-07588dc73f0be59b52b968f7b88e43798bbe5e51.zip
bitkeeper revision 1.896 (40a0e9e8M0uaTwE5LBe9sIhr2vdX7Q)
Live migration initial checkin.
-rwxr-xr-xtools/examples/xc_dom_control.py4
-rw-r--r--tools/xc/lib/xc.h10
-rw-r--r--tools/xc/lib/xc_domain.c15
-rw-r--r--tools/xc/lib/xc_linux_build.c2
-rw-r--r--tools/xc/lib/xc_linux_restore.c40
-rw-r--r--tools/xc/lib/xc_linux_save.c536
-rw-r--r--tools/xc/lib/xc_private.c74
-rw-r--r--tools/xc/lib/xc_private.h4
-rw-r--r--tools/xc/py/Xc.c11
-rw-r--r--tools/xend/lib/utils.c5
-rw-r--r--xen/common/dom0_ops.c6
-rw-r--r--xen/common/domain.c12
-rw-r--r--xen/common/memory.c20
-rw-r--r--xen/common/network.c3
-rw-r--r--xen/common/shadow.c72
-rw-r--r--xen/drivers/block/xen_block.c14
-rw-r--r--xen/include/asm-i386/processor.h2
-rw-r--r--xen/include/hypervisor-ifs/dom0_ops.h3
-rw-r--r--xen/include/xen/mm.h6
-rw-r--r--xen/include/xen/shadow.h33
-rw-r--r--xen/net/dev.c26
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c4
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c89
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c8
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/kernel/setup.c10
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/kernel/time.c5
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c21
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c10
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c154
-rw-r--r--xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h2
-rw-r--r--xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h7
-rw-r--r--xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h9
32 files changed, 871 insertions, 346 deletions
diff --git a/tools/examples/xc_dom_control.py b/tools/examples/xc_dom_control.py
index 4f0bd5de52..d6cae4f720 100755
--- a/tools/examples/xc_dom_control.py
+++ b/tools/examples/xc_dom_control.py
@@ -139,10 +139,12 @@ elif cmd == 'suspend':
xc.domain_stop( dom=dom )
while not xc.domain_getinfo( first_dom=dom, max_doms=1 )[0]['stopped']:
- time.sleep(0.1);
+ print "Sleep..."
+ time.sleep(0.001);
rc = xc.linux_save( dom=dom, state_file=file, progress=1)
if rc == 0 : xc.domain_destroy( dom=dom, force=1 )
+ else: xc.domain_start( dom=dom ) # sensible for production use
elif cmd == 'cpu_bvtslice':
if len(sys.argv) < 3:
diff --git a/tools/xc/lib/xc.h b/tools/xc/lib/xc.h
index a0205bcc6b..2132d6e7c1 100644
--- a/tools/xc/lib/xc.h
+++ b/tools/xc/lib/xc.h
@@ -57,7 +57,10 @@ int xc_domain_getinfo(int xc_handle,
int xc_shadow_control(int xc_handle,
u64 domid,
- unsigned int sop);
+ unsigned int sop,
+ unsigned long *dirty_bitmap,
+ unsigned long pages);
+
#define XCFLAGS_VERBOSE 1
#define XCFLAGS_LIVE 2
@@ -247,11 +250,6 @@ int xc_readconsolering(int xc_handle,
int xc_physinfo(int xc_handle,
xc_physinfo_t *info);
-
-int xc_shadow_control(int xc_handle,
- u64 domid,
- unsigned int sop);
-
int xc_domain_setname(int xc_handle,
u64 domid,
char *name);
diff --git a/tools/xc/lib/xc_domain.c b/tools/xc/lib/xc_domain.c
index c26a3f87c3..6d0dd6d0f3 100644
--- a/tools/xc/lib/xc_domain.c
+++ b/tools/xc/lib/xc_domain.c
@@ -109,13 +109,24 @@ int xc_domain_getinfo(int xc_handle,
int xc_shadow_control(int xc_handle,
u64 domid,
- unsigned int sop)
+ unsigned int sop,
+ unsigned long *dirty_bitmap,
+ unsigned long pages)
{
+ int rc;
dom0_op_t op;
op.cmd = DOM0_SHADOW_CONTROL;
op.u.shadow_control.domain = (domid_t)domid;
op.u.shadow_control.op = sop;
- return do_dom0_op(xc_handle, &op);
+ op.u.shadow_control.dirty_bitmap = dirty_bitmap;
+ op.u.shadow_control.pages = pages;
+
+ rc = do_dom0_op(xc_handle, &op);
+
+ if ( rc == 0 )
+ return op.u.shadow_control.pages;
+ else
+ return rc;
}
int xc_domain_setname(int xc_handle,
diff --git a/tools/xc/lib/xc_linux_build.c b/tools/xc/lib/xc_linux_build.c
index 83debd904d..98a3fb6a60 100644
--- a/tools/xc/lib/xc_linux_build.c
+++ b/tools/xc/lib/xc_linux_build.c
@@ -284,7 +284,7 @@ static int setup_guestos(int xc_handle,
/* shared_info page starts its life empty. */
shared_info = map_pfn_writeable(pm_handle, shared_info_frame);
- memset(shared_info, 0, PAGE_SIZE);
+ memset(shared_info, 0, sizeof(shared_info_t));
/* Mask all upcalls... */
for ( i = 0; i < MAX_VIRT_CPUS; i++ )
shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
diff --git a/tools/xc/lib/xc_linux_restore.c b/tools/xc/lib/xc_linux_restore.c
index 861d38a5f7..9b1532159f 100644
--- a/tools/xc/lib/xc_linux_restore.c
+++ b/tools/xc/lib/xc_linux_restore.c
@@ -230,9 +230,16 @@ int xc_linux_restore(int xc_handle,
goto out;
}
- //printf("batch=%d\n",j);
+ printf("batch %d\n",j);
- if(j==0) break; // our work here is done
+ if (j == 0)
+ break; // our work here is done
+
+ if( j > MAX_BATCH_SIZE )
+ {
+ ERROR("Max batch size exceeded. Giving up.");
+ goto out;
+ }
if ( (*readerfn)(readerst, region_pfn_type, j*sizeof(unsigned long)) )
{
@@ -242,6 +249,9 @@ int xc_linux_restore(int xc_handle,
for(i=0;i<j;i++)
{
+ if ((region_pfn_type[i]>>29) == 7)
+ continue;
+
pfn = region_pfn_type[i] & ~PGT_type_mask;
mfn = pfn_to_mfn_table[pfn];
@@ -261,9 +271,15 @@ int xc_linux_restore(int xc_handle,
unsigned long *ppage;
pfn = region_pfn_type[i] & ~PGT_type_mask;
+
+//if(n>=nr_pfns || ((region_pfn_type[i] & PGT_type_mask) == L2TAB) ) printf("pfn=%08lx mfn=%x\n",region_pfn_type[i],pfn_to_mfn_table[pfn]);
+
//if(pfn_type[i])printf("^pfn=%d %08lx\n",pfn,pfn_type[i]);
+ if ((region_pfn_type[i]>>29) == 7)
+ continue;
+
if (pfn>nr_pfns)
{
ERROR("pfn out of range");
@@ -304,7 +320,7 @@ int xc_linux_restore(int xc_handle,
if ( xpfn >= nr_pfns )
{
- ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=%d nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns);
+ ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=0x%x nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns);
goto out;
}
#if 0
@@ -355,17 +371,19 @@ int xc_linux_restore(int xc_handle,
default:
ERROR("Bogus page type %x page table is out of range. i=%d nr_pfns=%d",region_pfn_type[i],i,nr_pfns);
goto out;
- }
+
+ } // end of page type switch statement
if ( add_mmu_update(xc_handle, mmu,
(mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) )
goto out;
- }
+ } // end of 'batch' for loop
n+=j; // crude stats
}
+printf("RECEIVED ALL PAGES\n");
mfn_mapper_close( region_mapper );
@@ -381,7 +399,10 @@ int xc_linux_restore(int xc_handle,
(pfn_to_mfn_table[i]<<PAGE_SHIFT) |
MMU_EXTENDED_COMMAND,
MMUEXT_PIN_L1_TABLE) )
+ {
+ printf("ERR pin L1 pfn=%lx mfn=%lx\n");
goto out;
+ }
}
else if ( pfn_type[i] == L2TAB )
{
@@ -389,7 +410,10 @@ int xc_linux_restore(int xc_handle,
(pfn_to_mfn_table[i]<<PAGE_SHIFT) |
MMU_EXTENDED_COMMAND,
MMUEXT_PIN_L2_TABLE) )
+ {
+ printf("ERR pin L2 pfn=%lx mfn=%lx\n");
goto out;
+ }
}
}
@@ -421,6 +445,8 @@ int xc_linux_restore(int xc_handle,
p_srec->resume_info.flags = 0;
unmap_pfn(pm_handle, p_srec);
+printf("new shared info is %lx\n", shared_info_frame);
+
/* Uncanonicalise each GDT frame number. */
if ( ctxt.gdt_ents > 8192 )
{
@@ -451,7 +477,7 @@ int xc_linux_restore(int xc_handle,
/* Copy saved contents of shared-info page. No checking needed. */
ppage = map_pfn_writeable(pm_handle, shared_info_frame);
- memcpy(ppage, shared_info, PAGE_SIZE);
+ memcpy(ppage, shared_info, sizeof(shared_info_t));
unmap_pfn(pm_handle, ppage);
@@ -528,7 +554,9 @@ int xc_linux_restore(int xc_handle,
op.u.builddomain.ctxt = &ctxt;
rc = do_dom0_op(xc_handle, &op);
+printf("NORMAL EXIT RESTORE\n");
out:
+printf("EXIT RESTORE\n");
if ( mmu != NULL )
free(mmu);
diff --git a/tools/xc/lib/xc_linux_save.c b/tools/xc/lib/xc_linux_save.c
index 02e3ffc352..cbb1d66fd1 100644
--- a/tools/xc/lib/xc_linux_save.c
+++ b/tools/xc/lib/xc_linux_save.c
@@ -22,11 +22,17 @@
/*
* Returns TRUE if the given machine frame number has a unique mapping
* in the guest's pseudophysical map.
+ * 0x80000000-3 mark the shared_info, and blk/net rings
*/
#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
- (((_mfn) < (1024*1024)) && \
- (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn)))
-
+ (((_mfn) < (1024*1024)) && \
+ ( ( (live_mfn_to_pfn_table[_mfn] < nr_pfns) && \
+ (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn)) ) || \
+\
+ (live_mfn_to_pfn_table[_mfn] >= 0x80000000 && \
+ live_mfn_to_pfn_table[_mfn] <= 0x80000003 ) || \
+ live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == 0x80000004 ) )
+
/* Returns TRUE if MFN is successfully converted to a PFN. */
#define translate_mfn_to_pfn(_pmfn) \
({ \
@@ -40,6 +46,14 @@
})
+/* test_bit */
+inline int test_bit ( int nr, volatile void * addr)
+{
+ return ( ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >>
+ (nr % (sizeof(unsigned long)*8) ) ) & 1;
+}
+
+
int xc_linux_save(int xc_handle,
u64 domid,
unsigned int flags,
@@ -47,14 +61,11 @@ int xc_linux_save(int xc_handle,
void *writerst )
{
dom0_op_t op;
- int rc = 1, i, j, k, n;
+ int rc = 1, i, j, k, n, last_iter, iter = 0;
unsigned long mfn;
- unsigned int prev_pc, this_pc;
int verbose = flags & XCFLAGS_VERBOSE;
- //int live = flags & XCFLAGS_LIVE;
-
- /* state of the new MFN mapper */
- mfn_mapper_t *mapper_handle1, *mapper_handle2;
+ int live = 1; //flags & XCFLAGS_LIVE; // XXXXXXXXXXXXXXXXXXX
+ int sent_last_iter, sent_this_iter, max_iters;
/* Remember if we stopped the guest, so we can restart it on exit. */
int we_stopped_it = 0;
@@ -90,8 +101,13 @@ int xc_linux_save(int xc_handle,
unsigned char *region_base;
/* A temporary mapping, and a copy, of the guest's suspend record. */
- suspend_record_t *p_srec, srec;
+ suspend_record_t *p_srec;
+
+ /* number of pages we're dealing with */
+ unsigned long nr_pfns;
+ /* bitmap of pages left to send */
+ unsigned long *to_send;
if ( mlock(&ctxt, sizeof(ctxt) ) )
{
@@ -129,21 +145,24 @@ int xc_linux_save(int xc_handle,
goto out;
}
- sleep(1);
+ usleep(1000); // 1ms
+ printf("Sleep for 1ms\n");
}
+#if 1
/* A cheesy test to see whether the domain contains valid state. */
if ( ctxt.pt_base == 0 )
{
ERROR("Domain is not in a valid Linux guest OS state");
goto out;
}
+#endif
/* Map the suspend-record MFN to pin it. The page must be owned by
domid for this to succeed. */
p_srec = mfn_mapper_map_single(xc_handle, domid,
- sizeof(srec), PROT_READ,
+ sizeof(*p_srec), PROT_READ,
ctxt.cpu_ctxt.esi );
if (!p_srec)
@@ -152,10 +171,10 @@ int xc_linux_save(int xc_handle,
goto out;
}
- memcpy( &srec, p_srec, sizeof(srec) );
+ nr_pfns = p_srec->nr_pfns;
/* cheesy sanity check */
- if ( srec.nr_pfns > 1024*1024 )
+ if ( nr_pfns > 1024*1024 )
{
ERROR("Invalid state record -- pfn count out of range");
goto out;
@@ -165,55 +184,13 @@ int xc_linux_save(int xc_handle,
live_pfn_to_mfn_frame_list =
mfn_mapper_map_single(xc_handle, domid,
PAGE_SIZE, PROT_READ,
- srec.pfn_to_mfn_frame_list );
+ p_srec->pfn_to_mfn_frame_list );
if (!live_pfn_to_mfn_frame_list)
{
ERROR("Couldn't map pfn_to_mfn_frame_list");
goto out;
}
-
-
- if ( (mapper_handle1 = mfn_mapper_init(xc_handle, domid,
- 1024*1024, PROT_READ ))
- == NULL )
- goto out;
-
- for ( i = 0; i < (srec.nr_pfns+1023)/1024; i++ )
- {
- /* Grab a copy of the pfn-to-mfn table frame list.
- This has the effect of preventing the page from being freed and
- given to another domain. (though the domain is stopped anyway...) */
- mfn_mapper_queue_entry( mapper_handle1, i<<PAGE_SHIFT,
- live_pfn_to_mfn_frame_list[i],
- PAGE_SIZE );
- }
-
- if ( mfn_mapper_flush_queue(mapper_handle1) )
- {
- ERROR("Couldn't map pfn_to_mfn table");
- goto out;
- }
-
- live_pfn_to_mfn_table = mfn_mapper_base( mapper_handle1 );
-
-
-
- /* We want zeroed memory so use calloc rather than malloc. */
- pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
-
- if ( (pfn_type == NULL) )
- {
- errno = ENOMEM;
- goto out;
- }
-
- if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) )
- {
- ERROR("Unable to mlock");
- goto out;
- }
-
/* Track the mfn_to_pfn table down from the domains PT */
{
@@ -233,49 +210,106 @@ int xc_linux_save(int xc_handle,
mfn_to_pfn_table_start_mfn );
}
+ /* Map all the frames of the pfn->mfn table. For migrate to succeed,
+ the guest must not change which frames are used for this purpose.
+ (its not clear why it would want to change them, and we'll be OK
+ from a safety POV anyhow. */
- /*
- * Quick belt and braces sanity check.
- */
+ live_pfn_to_mfn_table = mfn_mapper_map_batch( xc_handle, domid,
+ PROT_READ,
+ live_pfn_to_mfn_frame_list,
+ (nr_pfns+1023)/1024 );
+ if( !live_pfn_to_mfn_table )
+ {
+ PERROR("Couldn't map pfn_to_mfn table");
+ goto out;
+ }
+
+ for(i=0;i<(nr_pfns+1023)/1024 ;i++)
+ printf("LF: %d %x\n",i,live_pfn_to_mfn_frame_list[i]);
- for ( i = 0; i < srec.nr_pfns; i++ )
+
+ /* At this point, we can start the domain again if we're doign a
+ live suspend */
+
+ if( live )
+ {
+#if 1
+ if ( xc_shadow_control( xc_handle, domid,
+ DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
+ NULL, 0 ) < 0 )
+ {
+ ERROR("Couldn't enable shadow mode");
+ goto out;
+ }
+#endif
+ if ( xc_domain_start( xc_handle, domid ) < 0 )
+ {
+ ERROR("Couldn't restart domain");
+ goto out;
+ }
+//exit(-1);
+ last_iter = 0;
+ sent_last_iter = 1<<20; // 4GB's worth of pages
+ max_iters = 8; // limit us to 9 time round loop
+ }
+ else
+ last_iter = 1;
+
+
+ /* Setup to_send bitmap */
{
- mfn = live_pfn_to_mfn_table[i];
+ int sz = (nr_pfns/8) + 8; // includes slop at end of array
+
+ to_send = malloc( sz );
- if( live_mfn_to_pfn_table[mfn] != i )
- printf("i=%d mfn=%d live_mfn_to_pfn_table=%d\n",
- i,mfn,live_mfn_to_pfn_table[mfn]);
+ if (!to_send)
+ {
+ ERROR("Couldn't allocate to_send array");
+ goto out;
+ }
+ memset( to_send, 0xff, sz );
+
+ if ( mlock( to_send, sz ) )
+ {
+ PERROR("Unable to mlock to_send");
+ return 1;
+ }
}
- /* Canonicalise the suspend-record frame number. */
- if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) )
+ /* We want zeroed memory so use calloc rather than malloc. */
+ pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
+
+ if ( (pfn_type == NULL) )
{
- ERROR("State record is not in range of pseudophys map");
+ errno = ENOMEM;
goto out;
}
- /* Canonicalise each GDT frame number. */
- for ( i = 0; i < ctxt.gdt_ents; i += 512 )
+ if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) )
{
- if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) )
- {
- ERROR("GDT frame is not in range of pseudophys map");
- goto out;
- }
+ ERROR("Unable to mlock");
+ goto out;
}
- /* Canonicalise the page table base pointer. */
- if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) )
+
+ /*
+ * Quick belt and braces sanity check.
+ */
+
+ for ( i = 0; i < nr_pfns; i++ )
{
- ERROR("PT base is not in range of pseudophys map");
- goto out;
+ mfn = live_pfn_to_mfn_table[i];
+
+ if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0x80000004) )
+ printf("i=0x%x mfn=%x live_mfn_to_pfn_table=%x\n",
+ i,mfn,live_mfn_to_pfn_table[mfn]);
}
- ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
/* Canonicalise the pfn-to-mfn table frame-number list. */
memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
- for ( i = 0; i < srec.nr_pfns; i += 1024 )
+ for ( i = 0; i < nr_pfns; i += 1024 )
{
if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
{
@@ -284,7 +318,7 @@ int xc_linux_save(int xc_handle,
}
}
- /* Start writing out the saved-domain record. */
+ /* Map the shared info frame */
live_shinfo = mfn_mapper_map_single(xc_handle, domid,
PAGE_SIZE, PROT_READ,
shared_info_frame);
@@ -295,164 +329,290 @@ int xc_linux_save(int xc_handle,
goto out;
}
+ /* Start writing out the saved-domain record. */
+
if ( (*writerfn)(writerst, "LinuxGuestRecord", 16) ||
(*writerfn)(writerst, name, sizeof(name)) ||
- (*writerfn)(writerst, &srec.nr_pfns, sizeof(unsigned long)) ||
- (*writerfn)(writerst, &ctxt, sizeof(ctxt)) ||
- (*writerfn)(writerst, live_shinfo, PAGE_SIZE) ||
+ (*writerfn)(writerst, &nr_pfns, sizeof(unsigned long)) ||
(*writerfn)(writerst, pfn_to_mfn_frame_list, PAGE_SIZE) )
{
ERROR("Error when writing to state file (1)");
goto out;
}
- munmap(live_shinfo, PAGE_SIZE);
-
- verbose_printf("Saving memory pages: 0%%");
-
- if ( (mapper_handle2 = mfn_mapper_init(xc_handle, domid,
- BATCH_SIZE*4096, PROT_READ ))
- == NULL )
- goto out;
-
- region_base = mfn_mapper_base( mapper_handle2 );
/* Now write out each data page, canonicalising page tables as we go... */
- prev_pc = 0;
- for ( n = 0; n < srec.nr_pfns; )
+
+ while(1)
{
- this_pc = (n * 100) / srec.nr_pfns;
- if ( (this_pc - prev_pc) >= 5 )
- {
- verbose_printf("\b\b\b\b%3d%%", this_pc);
- prev_pc = this_pc;
- }
+ unsigned int prev_pc, batch, sent_this_iter;
- for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
- {
- pfn_type[j] = live_pfn_to_mfn_table[i];
- }
+ iter++;
+ sent_this_iter = 0;
+ prev_pc = 0;
+ verbose_printf("Saving memory pages: iter %d 0%%", iter);
- for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
+ n=0;
+ while( n < nr_pfns )
{
- /* queue up mappings for all of the pages in this batch */
+ unsigned int this_pc = (n * 100) / nr_pfns;
+ if ( (this_pc - prev_pc) >= 5 )
+ {
+ verbose_printf("\b\b\b\b%3d%%", this_pc);
+ prev_pc = this_pc;
+ }
-//printf("region n=%d j=%d i=%d mfn=%d\n",n,j,i,live_pfn_to_mfn_table[i]);
- mfn_mapper_queue_entry( mapper_handle2, j<<PAGE_SHIFT,
- live_pfn_to_mfn_table[i],
- PAGE_SIZE );
- }
- if( mfn_mapper_flush_queue(mapper_handle2) )
- {
- ERROR("Couldn't map page region");
- goto out;
- }
+ /* load pfn_type[] with the mfn of all the pages we're doing in
+ this batch. */
- if ( get_pfn_type_batch(xc_handle, domid, j, pfn_type) )
- {
- ERROR("get_pfn_type_batch failed");
- goto out;
- }
-
- for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
- {
- if((pfn_type[j]>>29) == 7)
+ for( batch = 0; batch < BATCH_SIZE && n < nr_pfns ; n++ )
{
- ERROR("bogus page");
- goto out;
- }
+ if ( !test_bit(n, to_send ) ) continue;
- /* canonicalise mfn->pfn */
- pfn_type[j] = (pfn_type[j] & PGT_type_mask) |
- live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask];
-
-/* if(pfn_type[j]>>29)
- printf("i=%d type=%d\n",i,pfn_type[i]); */
- }
+ pfn_type[batch] = live_pfn_to_mfn_table[n];
+ if( pfn_type[batch] == 0x80000004 )
+ {
+ //printf("Skip netbuf pfn %lx. mfn %lx\n",n,pfn_type[batch]);
+ continue;
+ }
- if ( (*writerfn)(writerst, &j, sizeof(int) ) )
- {
- ERROR("Error when writing to state file (2)");
- goto out;
- }
+//if(iter>1) printf("pfn=%x mfn=%x\n",n,pfn_type[batch]);
+
+ batch++;
+ }
- if ( (*writerfn)(writerst, pfn_type, sizeof(unsigned long)*j ) )
- {
- ERROR("Error when writing to state file (3)");
- goto out;
- }
+ for( j = 0; j < batch; j++ )
+ {
+ if( (pfn_type[j] &0xfffff) == 0x0000004 )
+ {
+ printf("XXXXXXXXSkip netbuf entry %d mfn %lx\n",j,pfn_type[j]);
+ }
- for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
- {
- /* write out pages in batch */
+
+ }
- if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) ||
- ((pfn_type[j] & PGT_type_mask) == L2TAB) )
+
+ printf("batch %d:%d (n=%d)\n",iter,batch,n);
+
+ if(batch == 0) goto skip; // vanishingly unlikely...
+
+ if ( (region_base = mfn_mapper_map_batch( xc_handle, domid,
+ PROT_READ,
+ pfn_type,
+ batch )) == 0)
+ {
+ PERROR("map batch failed");
+ goto out;
+ }
+
+ if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) )
{
+ ERROR("get_pfn_type_batch failed");
+ goto out;
+ }
+
+ for( j = 0; j < batch; j++ )
+ {
+ if((pfn_type[j]>>29) == 7)
+ {
+ //printf("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
+ continue;
+ }
+//if((pfn_type[j] & PGT_type_mask) == L2TAB) printf("L2 pfn=%08lx mfn=%lx\n",pfn_type[j],live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask]);
- memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
+ /* canonicalise mfn->pfn */
+ pfn_type[j] = (pfn_type[j] & PGT_type_mask) |
+ live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask];
+ }
+
+
+ if ( (*writerfn)(writerst, &batch, sizeof(int) ) )
+ {
+ ERROR("Error when writing to state file (2)");
+ goto out;
+ }
- for ( k = 0;
- k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ?
- (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024);
- k++ )
+ if ( (*writerfn)(writerst, pfn_type, sizeof(unsigned long)*j ) )
+ {
+ ERROR("Error when writing to state file (3)");
+ goto out;
+ }
+
+ /* entering this loop, pfn_type is now in pfns (Not mfns) */
+ for( j = 0; j < batch; j++ )
+ {
+ /* write out pages in batch */
+
+ if((pfn_type[j]>>29) == 7)
{
- if ( !(page[k] & _PAGE_PRESENT) ) continue;
- mfn = page[k] >> PAGE_SHIFT;
-
- if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
+ //printf("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
+ continue;
+ }
+
+ if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) ||
+ ((pfn_type[j] & PGT_type_mask) == L2TAB) )
+ {
+
+ memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
+
+ for ( k = 0;
+ k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ?
+ (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024);
+ k++ )
{
- ERROR("Frame number in pagetable page is invalid");
+ unsigned long pfn;
+
+ if ( !(page[k] & _PAGE_PRESENT) ) continue;
+ mfn = page[k] >> PAGE_SHIFT;
+ pfn = live_mfn_to_pfn_table[mfn];
+
+ if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
+ {
+ printf("FNI %d : [%08lx,%d] pte=%08lx, mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
+ j, pfn_type[j], k,
+ page[k], mfn, live_mfn_to_pfn_table[mfn],
+ (live_mfn_to_pfn_table[mfn]<nr_pfns)?
+ live_pfn_to_mfn_table[live_mfn_to_pfn_table[mfn]]: 0xdeadbeef);
+ pfn = 0; // be suspicious
+
+// ERROR("Frame number in pagetable page is invalid");
+// goto out;
+
+
+ }
+ page[k] &= PAGE_SIZE - 1;
+ page[k] |= pfn << PAGE_SHIFT;
+
+ /*
+ printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
+ pfn_type[j]>>29,
+ j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
+ */
+
+ } /* end of page table rewrite for loop */
+
+ if ( (*writerfn)(writerst, page, PAGE_SIZE) )
+ {
+ ERROR("Error when writing to state file (4)");
+ goto out;
+ }
+
+ } /* end of it's a PT page */
+ else
+ { /* normal page */
+ if ( (*writerfn)(writerst, region_base + (PAGE_SIZE*j), PAGE_SIZE) )
+ {
+ ERROR("Error when writing to state file (5)");
goto out;
}
- page[k] &= PAGE_SIZE - 1;
- page[k] |= live_mfn_to_pfn_table[mfn] << PAGE_SHIFT;
-
- /*
- printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
- pfn_type[j]>>29,
- j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
- */
-
}
+ } /* end of the write out for this batch */
+
+ sent_this_iter += batch;
- if ( (*writerfn)(writerst, page, PAGE_SIZE) )
- {
- ERROR("Error when writing to state file (4)");
- goto out;
- }
+ } /* end of this while loop for this iteration */
+ munmap(region_base, batch*PAGE_SIZE);
+
+ skip:
+
+ verbose_printf("\b\b\b\b100%% (%d pages)\n", sent_this_iter );
+
+ if ( last_iter )
+ break;
+ if ( live )
+ {
+ if ( sent_this_iter < (sent_last_iter * 0.95) && iter < max_iters )
+ {
+ // we seem to be doing OK, keep going
}
else
{
- if ( (*writerfn)(writerst, region_base + (PAGE_SIZE*j), PAGE_SIZE) )
- {
- ERROR("Error when writing to state file (5)");
- goto out;
- }
+ printf("Start last iteration\n");
+ last_iter = 1;
+
+ xc_domain_stop_sync( xc_handle, domid );
+
+ }
+
+ if ( xc_shadow_control( xc_handle, domid,
+ DOM0_SHADOW_CONTROL_OP_CLEAN,
+ to_send, nr_pfns ) != nr_pfns )
+ {
+ ERROR("Error flushing shadow PT");
+ goto out;
}
+
+#if 0
+ if(last_iter) memset(to_send, 0xff, (nr_pfns+7)/8 );
+#endif
+
+ sent_last_iter = sent_this_iter;
}
-
- n+=j; /* i is the master loop counter */
- }
- verbose_printf("\b\b\b\b100%%\nMemory saved.\n");
+
+ } /* end of while 1 */
+
+printf("All memory is saved\n");
/* Success! */
rc = 0;
-
+
/* Zero terminate */
if ( (*writerfn)(writerst, &rc, sizeof(int)) )
{
ERROR("Error when writing to state file (6)");
goto out;
}
-
+ /* Get the final execution context */
+ op.cmd = DOM0_GETDOMAININFO;
+ op.u.getdomaininfo.domain = (domid_t)domid;
+ op.u.getdomaininfo.ctxt = &ctxt;
+ if ( (do_dom0_op(xc_handle, &op) < 0) ||
+ ((u64)op.u.getdomaininfo.domain != domid) )
+ {
+ PERROR("Could not get info on domain");
+ goto out;
+ }
+printf("A\n");
+ /* Canonicalise the suspend-record frame number. */
+ if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) )
+ {
+ ERROR("State record is not in range of pseudophys map");
+ goto out;
+ }
+printf("B\n");
+ /* Canonicalise each GDT frame number. */
+ for ( i = 0; i < ctxt.gdt_ents; i += 512 )
+ {
+ if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) )
+ {
+ ERROR("GDT frame is not in range of pseudophys map");
+ goto out;
+ }
+ }
+printf("C\n");
+ /* Canonicalise the page table base pointer. */
+ if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) )
+ {
+ ERROR("PT base is not in range of pseudophys map");
+ goto out;
+ }
+ ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
+printf("D\n");
+ if ( (*writerfn)(writerst, &ctxt, sizeof(ctxt)) ||
+ (*writerfn)(writerst, live_shinfo, PAGE_SIZE) )
+ {
+ ERROR("Error when writing to state file (1)");
+ goto out;
+ }
+ munmap(live_shinfo, PAGE_SIZE);
+printf("E\n");
out:
/* Restart the domain if we had to stop it to save its state. */
if ( we_stopped_it )
diff --git a/tools/xc/lib/xc_private.c b/tools/xc/lib/xc_private.c
index d137176ca8..47931f28ec 100644
--- a/tools/xc/lib/xc_private.c
+++ b/tools/xc/lib/xc_private.c
@@ -47,6 +47,31 @@ void unmap_pfn(int pm_handle, void *vaddr)
/*******************/
+void * mfn_mapper_map_batch(int xc_handle, domid_t dom, int prot,
+ unsigned long *arr, int num )
+{
+ privcmd_mmapbatch_t ioctlx;
+ void *addr;
+ addr = mmap( NULL, num*PAGE_SIZE, prot, MAP_SHARED, xc_handle, 0 );
+ if (addr)
+ {
+ ioctlx.num=num;
+ ioctlx.dom=dom;
+ ioctlx.addr=(unsigned long)addr;
+ ioctlx.arr=arr;
+ if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx ) <0 )
+ {
+ perror("XXXXXXXX");
+ munmap(addr, num*PAGE_SIZE);
+ return 0;
+ }
+ }
+ return addr;
+
+}
+
+/*******************/
+
void * mfn_mapper_map_single(int xc_handle, domid_t dom,
int size, int prot,
unsigned long mfn )
@@ -64,7 +89,10 @@ void * mfn_mapper_map_single(int xc_handle, domid_t dom,
entry.mfn=mfn;
entry.npages=(size+PAGE_SIZE-1)>>PAGE_SHIFT;
if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx ) <0 )
+ {
+ munmap(addr, size);
return 0;
+ }
}
return addr;
}
@@ -295,7 +323,7 @@ static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
hypercall.op = __HYPERVISOR_mmu_update;
hypercall.arg[0] = (unsigned long)mmu->updates;
- hypercall.arg[1] = (unsigned long)mmu->idx;
+ hypercall.arg[1] = (unsigned long)&(mmu->idx);
if ( mlock(mmu->updates, sizeof(mmu->updates)) != 0 )
{
@@ -342,3 +370,47 @@ int finish_mmu_updates(int xc_handle, mmu_t *mmu)
{
return flush_mmu_updates(xc_handle, mmu);
}
+
+
+/***********************************************************/
+
+/* this function is a hack until we get proper synchronous domain stop */
+
+int xc_domain_stop_sync( int xc_handle, domid_t domid )
+{
+ dom0_op_t op;
+
+ while (1)
+ {
+ op.cmd = DOM0_STOPDOMAIN;
+ op.u.stopdomain.domain = (domid_t)domid;
+ if ( do_dom0_op(xc_handle, &op) != 0 )
+ {
+ PERROR("Stopping target domain failed");
+ goto out;
+ }
+
+ usleep(1000); // 1ms
+ printf("Sleep for 1ms\n");
+
+ op.cmd = DOM0_GETDOMAININFO;
+ op.u.getdomaininfo.domain = (domid_t)domid;
+ op.u.getdomaininfo.ctxt = NULL;
+ if ( (do_dom0_op(xc_handle, &op) < 0) ||
+ ((u64)op.u.getdomaininfo.domain != domid) )
+ {
+ PERROR("Could not get info on domain");
+ goto out;
+ }
+
+ if ( op.u.getdomaininfo.state == DOMSTATE_STOPPED )
+ {
+ printf("Domain %lld stopped\n",domid);
+ return 0;
+ }
+
+ }
+
+out:
+ return -1;
+}
diff --git a/tools/xc/lib/xc_private.h b/tools/xc/lib/xc_private.h
index 3a2e3ea9f1..e3eff85e59 100644
--- a/tools/xc/lib/xc_private.h
+++ b/tools/xc/lib/xc_private.h
@@ -232,6 +232,9 @@ typedef struct mfn_mapper {
void * mfn_mapper_map_single(int xc_handle, domid_t dom, int size, int prot,
unsigned long mfn );
+void * mfn_mapper_map_batch(int xc_handle, domid_t dom, int prot,
+ unsigned long *arr, int num );
+
mfn_mapper_t * mfn_mapper_init(int xc_handle, domid_t dom, int size, int prot);
void * mfn_mapper_base(mfn_mapper_t *t);
@@ -245,5 +248,6 @@ void * mfn_mapper_queue_entry(mfn_mapper_t *t, int offset,
/*********************/
+int xc_domain_stop_sync( int xc_handle, domid_t dom );
#endif /* __XC_PRIVATE_H__ */
diff --git a/tools/xc/py/Xc.c b/tools/xc/py/Xc.c
index 929e9f3104..97bff12492 100644
--- a/tools/xc/py/Xc.c
+++ b/tools/xc/py/Xc.c
@@ -190,16 +190,17 @@ static PyObject *pyxc_linux_save(PyObject *self,
u64 dom;
char *state_file;
- int progress = 1;
+ int progress = 1, live = 0;
unsigned int flags = 0;
- static char *kwd_list[] = { "dom", "state_file", "progress", NULL };
+ static char *kwd_list[] = { "dom", "state_file", "progress", "live", NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Ls|i", kwd_list,
- &dom, &state_file, &progress) )
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Ls|ii", kwd_list,
+ &dom, &state_file, &progress, &live) )
return NULL;
if (progress) flags |= XCFLAGS_VERBOSE;
+ if (live) flags |= XCFLAGS_LIVE;
if (strncmp(state_file,"tcp:", strlen("tcp:")) == 0)
{
@@ -1273,7 +1274,7 @@ static PyObject *pyxc_shadow_control(PyObject *self,
&dom, &op) )
return NULL;
- if ( xc_shadow_control(xc->xc_handle, dom, op) != 0 )
+ if ( xc_shadow_control(xc->xc_handle, dom, op, NULL, 0) < 0 )
return PyErr_SetFromErrno(xc_error);
Py_INCREF(zero);
diff --git a/tools/xend/lib/utils.c b/tools/xend/lib/utils.c
index 297976e9be..441b62f153 100644
--- a/tools/xend/lib/utils.c
+++ b/tools/xend/lib/utils.c
@@ -723,6 +723,11 @@ static PyObject *xu_port_new(PyObject *self, PyObject *args)
goto fail4;
}
+ xup->interface->tx_resp_prod = 0;
+ xup->interface->rx_req_prod = 0;
+ xup->interface->tx_req_prod = 0;
+ xup->interface->rx_resp_prod = 0;
+
xup->tx_req_cons = 0;
xup->tx_resp_prod = 0;
xup->rx_req_prod = 0;
diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c
index 9370a61a8d..dee7552bdd 100644
--- a/xen/common/dom0_ops.c
+++ b/xen/common/dom0_ops.c
@@ -525,10 +525,10 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
p = find_domain_by_id( op->u.shadow_control.domain );
if ( p )
{
- ret = shadow_mode_control(p, op->u.shadow_control.op );
+ ret = shadow_mode_control(p, &op->u.shadow_control );
put_task_struct(p);
- }
-
+ copy_to_user(u_dom0_op, op, sizeof(*op));
+ }
}
break;
diff --git a/xen/common/domain.c b/xen/common/domain.c
index a9c40ae98f..b9e8150bfb 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -89,9 +89,15 @@ struct task_struct *do_createdomain(domid_t dom_id, unsigned int cpu)
memset(p->shared_info, 0, PAGE_SIZE);
SHARE_PFN_WITH_DOMAIN(virt_to_page(p->shared_info), p);
+ machine_to_phys_mapping[virt_to_phys(p->shared_info) >> PAGE_SHIFT] =
+ 0x80000000UL; // set m2p table to magic marker (helps debug)
+
p->mm.perdomain_pt = (l1_pgentry_t *)get_free_page(GFP_KERNEL);
memset(p->mm.perdomain_pt, 0, PAGE_SIZE);
+ machine_to_phys_mapping[virt_to_phys(p->mm.perdomain_pt) >> PAGE_SHIFT] =
+ 0x0fffdeadUL; // set m2p table to magic marker (helps debug)
+
init_blkdev_info(p);
/* Per-domain PCI-device list. */
@@ -486,6 +492,7 @@ void free_all_dom_mem(struct task_struct *p)
unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes)
{
unsigned int alloc_pfns, nr_pages;
+ struct pfn_info *page;
nr_pages = (kbytes + ((PAGE_SIZE-1)>>10)) >> (PAGE_SHIFT - 10);
p->max_pages = nr_pages; /* this can now be controlled independently */
@@ -493,13 +500,16 @@ unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes)
/* grow the allocation if necessary */
for ( alloc_pfns = p->tot_pages; alloc_pfns < nr_pages; alloc_pfns++ )
{
- if ( unlikely(alloc_domain_page(p) == NULL) ||
+ if ( unlikely((page=alloc_domain_page(p)) == NULL) ||
unlikely(free_pfns < (SLACK_DOMAIN_MEM_KILOBYTES >>
(PAGE_SHIFT-10))) )
{
free_all_dom_mem(p);
return -ENOMEM;
}
+
+ /* initialise to machine_to_phys_mapping table to likely pfn */
+ machine_to_phys_mapping[page-frame_table] = alloc_pfns;
}
p->tot_pages = nr_pages;
diff --git a/xen/common/memory.c b/xen/common/memory.c
index ed2e5b6e17..243875f22e 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -213,7 +213,12 @@ void __init init_frametable(unsigned long nr_pages)
belonging to the machine_to_phys_mapping to CPU0 idle task */
mfn = virt_to_phys((void *)RDWR_MPT_VIRT_START)>>PAGE_SHIFT;
-// for(i=0;i<nr_pages;i+=1024,mfn++)
+
+ /* initialise to a magic of 0x55555555 so easier to spot bugs later */
+ memset( machine_to_phys_mapping, 0x55, 4*1024*1024 );
+
+ /* The array is sized for a 4GB machine regardless of actuall mem size.
+ This costs 4MB -- may want to fix some day */
for(i=0;i<1024*1024;i+=1024,mfn++)
{
frame_table[mfn].count_and_flags = 1 | PGC_allocated;
@@ -325,7 +330,7 @@ static int get_page_from_pagenr(unsigned long page_nr, struct task_struct *p)
if ( unlikely(!get_page(page, p)) )
{
- MEM_LOG("Could not get page ref for pfn %08lx\n", page_nr);
+ MEM_LOG("Could not get page ref for pfn %08lx", page_nr);
return 0;
}
@@ -944,8 +949,9 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
}
-int do_mmu_update(mmu_update_t *ureqs, int count)
+int do_mmu_update(mmu_update_t *ureqs, int * p_count)
{
+ int count;
mmu_update_t req;
unsigned long va = 0, deferred_ops, pfn, prev_pfn = 0;
struct pfn_info *page;
@@ -954,6 +960,11 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
unsigned long prev_spfn = 0;
l1_pgentry_t *prev_spl1e = 0;
+ if ( unlikely( get_user(count, p_count) ) )
+ {
+ return -EFAULT;
+ }
+
perfc_incrc(calls_to_mmu_update);
perfc_addc(num_page_updates, count);
@@ -1110,6 +1121,9 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
percpu_info[cpu].gps = percpu_info[cpu].pts = NULL;
}
+ if ( unlikely(rc) )
+ put_user( count, p_count );
+
return rc;
}
diff --git a/xen/common/network.c b/xen/common/network.c
index 2f9051d9e5..befc929474 100644
--- a/xen/common/network.c
+++ b/xen/common/network.c
@@ -111,6 +111,9 @@ net_vif_t *create_net_vif(domid_t dom)
clear_page(new_ring);
SHARE_PFN_WITH_DOMAIN(virt_to_page(new_ring), p);
+ machine_to_phys_mapping[virt_to_phys(new_ring)>>PAGE_SHIFT] =
+ 0x80000001; // magic value aids debugging
+
/*
* Fill in the new vif struct. Note that, while the vif's refcnt is
* non-zero, we hold a reference to the task structure.
diff --git a/xen/common/shadow.c b/xen/common/shadow.c
index 1144c0e65e..fe142e3ee9 100644
--- a/xen/common/shadow.c
+++ b/xen/common/shadow.c
@@ -123,6 +123,7 @@ static inline int shadow_page_op( struct mm_struct *m, unsigned int op,
}
return work;
}
+
static void __scan_shadow_table( struct mm_struct *m, unsigned int op )
{
int j, work=0;
@@ -150,7 +151,7 @@ static void __scan_shadow_table( struct mm_struct *m, unsigned int op )
}
shadow_audit(m,0);
}
- SH_LOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
+ SH_VLOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
}
@@ -160,7 +161,6 @@ int shadow_mode_enable( struct task_struct *p, unsigned int mode )
struct shadow_status **fptr;
int i;
-
spin_lock_init(&m->shadow_lock);
spin_lock(&m->shadow_lock);
@@ -217,7 +217,6 @@ int shadow_mode_enable( struct task_struct *p, unsigned int mode )
// call shadow_mk_pagetable
shadow_mk_pagetable( m );
-
return 0;
nomem:
@@ -260,9 +259,12 @@ void shadow_mode_disable( struct task_struct *p )
kfree( &m->shadow_ht[0] );
}
-static void shadow_mode_table_op( struct task_struct *p, unsigned int op )
+static int shadow_mode_table_op( struct task_struct *p,
+ dom0_shadow_control_t *sc )
{
+ unsigned int op = sc->op;
struct mm_struct *m = &p->mm;
+ int rc = 0;
// since Dom0 did the hypercall, we should be running with it's page
// tables right now. Calling flush on yourself would be really
@@ -271,13 +273,13 @@ static void shadow_mode_table_op( struct task_struct *p, unsigned int op )
if ( m == &current->mm )
{
printk("Don't try and flush your own page tables!\n");
- return;
+ return -EINVAL;
}
spin_lock(&m->shadow_lock);
- SH_LOG("shadow mode table op %08lx %08lx count %d",pagetable_val( m->pagetable),pagetable_val(m->shadow_table), m->shadow_page_count);
+ SH_VLOG("shadow mode table op %08lx %08lx count %d",pagetable_val( m->pagetable),pagetable_val(m->shadow_table), m->shadow_page_count);
shadow_audit(m,1);
@@ -288,27 +290,60 @@ static void shadow_mode_table_op( struct task_struct *p, unsigned int op )
break;
case DOM0_SHADOW_CONTROL_OP_CLEAN:
- __scan_shadow_table( m, op );
- // we used to bzero dirty bitmap here, but now leave this to user space
- // if we were double buffering we'd do the flip here
+ {
+ int i;
+
+ __scan_shadow_table( m, op );
+
+ if( p->tot_pages > sc->pages ||
+ !sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ sc->pages = p->tot_pages;
+
+#define chunk (8*1024) // do this in 1KB chunks for L1 cache
+
+ for(i=0;i<p->tot_pages;i+=chunk)
+ {
+ int bytes = (( ((p->tot_pages-i) > (chunk))?
+ (chunk):(p->tot_pages-i) ) + 7) / 8;
+
+ copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
+ p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
+ bytes );
+
+ memset( p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
+ 0, bytes);
+ }
+
break;
+ }
}
+
+out:
+
spin_unlock(&m->shadow_lock);
- SH_LOG("shadow mode table op : page count %d", m->shadow_page_count);
+ SH_VLOG("shadow mode table op : page count %d", m->shadow_page_count);
shadow_audit(m,1);
// call shadow_mk_pagetable
shadow_mk_pagetable( m );
+ return rc;
}
-int shadow_mode_control( struct task_struct *p, unsigned int op )
+int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc )
{
int we_paused = 0;
+ unsigned int cmd = sc->op;
+ int rc = 0;
// don't call if already shadowed...
@@ -321,18 +356,23 @@ int shadow_mode_control( struct task_struct *p, unsigned int op )
we_paused = 1;
}
- if ( p->mm.shadow_mode && op == DOM0_SHADOW_CONTROL_OP_OFF )
+ if ( p->mm.shadow_mode && cmd == DOM0_SHADOW_CONTROL_OP_OFF )
{
shadow_mode_disable(p);
}
- else if ( op == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST )
+ else if ( cmd == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST )
{
if(p->mm.shadow_mode) shadow_mode_disable(p);
shadow_mode_enable(p, SHM_test);
}
- else if ( p->mm.shadow_mode && op >= DOM0_SHADOW_CONTROL_OP_FLUSH && op<=DOM0_SHADOW_CONTROL_OP_CLEAN )
+ else if ( cmd == DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY )
{
- shadow_mode_table_op(p, op);
+ if(p->mm.shadow_mode) shadow_mode_disable(p);
+ shadow_mode_enable(p, SHM_logdirty);
+ }
+ else if ( p->mm.shadow_mode && cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH && cmd<=DOM0_SHADOW_CONTROL_OP_CLEAN )
+ {
+ rc = shadow_mode_table_op(p, sc);
}
else
{
@@ -341,7 +381,7 @@ int shadow_mode_control( struct task_struct *p, unsigned int op )
}
if ( we_paused ) wake_up(p);
- return 0;
+ return rc;
}
diff --git a/xen/drivers/block/xen_block.c b/xen/drivers/block/xen_block.c
index f44902b1c9..6901262cb8 100644
--- a/xen/drivers/block/xen_block.c
+++ b/xen/drivers/block/xen_block.c
@@ -19,6 +19,7 @@
#include <xen/interrupt.h>
#include <xen/vbd.h>
#include <xen/slab.h>
+#include <xen/shadow.h>
/*
* These are rather arbitrary. They are fairly large because adjacent requests
@@ -358,9 +359,18 @@ static void unlock_buffer(unsigned long buffer,
pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
pfn++ )
{
+
+ /* Find the domain from the frame_table. Yuk... */
+ struct task_struct *p = frame_table[pfn].u.domain;
+
+ if( p->mm.shadow_mode == SHM_logdirty )
+ mark_dirty( &p->mm, pfn );
+
+
if ( writeable_buffer )
put_page_type(&frame_table[pfn]);
put_page(&frame_table[pfn]);
+
}
}
@@ -597,6 +607,10 @@ void init_blkdev_info(struct task_struct *p)
p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL);
clear_page(p->blk_ring_base);
SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p);
+
+ machine_to_phys_mapping[virt_to_phys(p->blk_ring_base)>>PAGE_SHIFT] =
+ 0x80000002; // magic value aids debugging
+
p->blkdev_list.next = NULL;
spin_lock_init(&p->vbd_lock);
}
diff --git a/xen/include/asm-i386/processor.h b/xen/include/asm-i386/processor.h
index 2968e2e4e9..c16d476016 100644
--- a/xen/include/asm-i386/processor.h
+++ b/xen/include/asm-i386/processor.h
@@ -449,7 +449,7 @@ struct mm_struct {
struct shadow_status *shadow_ht;
struct shadow_status *shadow_ht_free;
struct shadow_status *shadow_ht_extras; /* extra allocation units */
- unsigned int *shadow_dirty_bitmap;
+ unsigned long *shadow_dirty_bitmap;
unsigned int shadow_dirty_bitmap_size; /* in pages, bit per page */
unsigned int shadow_page_count;
unsigned int shadow_max_page_count;
diff --git a/xen/include/hypervisor-ifs/dom0_ops.h b/xen/include/hypervisor-ifs/dom0_ops.h
index 024e75ad83..0027e9df29 100644
--- a/xen/include/hypervisor-ifs/dom0_ops.h
+++ b/xen/include/hypervisor-ifs/dom0_ops.h
@@ -243,6 +243,9 @@ typedef struct dom0_shadow_control_st
/* IN variables. */
domid_t domain;
int op;
+ unsigned long *dirty_bitmap; // pointe to mlocked buffer
+ /* IN/OUT variables */
+ unsigned long pages; // size of buffer, updated with actual size
} dom0_shadow_control_t;
#define DOM0_SETDOMAINNAME 26
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 628d20c17e..c132ad9662 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -164,8 +164,8 @@ static inline int get_page(struct pfn_info *page,
unlikely(x & PGC_zombie) || /* Zombie? */
unlikely(p != domain) ) /* Wrong owner? */
{
- DPRINTK("Error pfn %08lx: ed=%p(%lld), sd=%p(%lld), caf=%08x\n",
- page_to_pfn(page), domain, (domain)?domain->domain:1234, p, (p)?p->domain:1234, x);
+ DPRINTK("Error pfn %08lx: ed=%p(%lld), sd=%p(%lld), caf=%08x, taf=%08x\n",
+ page_to_pfn(page), domain, (domain)?domain->domain:999, p, (p && !((x & PGC_count_mask) == 0))?p->domain:999, x, page->type_and_flags);
return 0;
}
__asm__ __volatile__(
@@ -314,7 +314,7 @@ int check_descriptor(unsigned long a, unsigned long b);
#define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
/* Part of the domain API. */
-int do_mmu_update(mmu_update_t *updates, int count);
+int do_mmu_update(mmu_update_t *updates, int *count);
#define DEFAULT_GDT_ENTRIES ((LAST_RESERVED_GDT_ENTRY*8)+7)
#define DEFAULT_GDT_ADDRESS ((unsigned long)gdt_table)
diff --git a/xen/include/xen/shadow.h b/xen/include/xen/shadow.h
index fba6fe3dfd..01b46301aa 100644
--- a/xen/include/xen/shadow.h
+++ b/xen/include/xen/shadow.h
@@ -23,7 +23,7 @@
#define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
#define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
-extern int shadow_mode_control( struct task_struct *p, unsigned int op );
+extern int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc );
extern int shadow_fault( unsigned long va, long error_code );
extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,
unsigned long *prev_spfn_ptr,
@@ -50,7 +50,7 @@ struct shadow_status {
#ifndef NDEBUG
#define SH_LOG(_f, _a...) \
- printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
+ printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
current->domain , __LINE__ , ## _a )
#else
#define SH_LOG(_f, _a...)
@@ -58,7 +58,7 @@ struct shadow_status {
#if SHADOW_DEBUG
#define SH_VLOG(_f, _a...) \
- printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
+ printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
current->domain , __LINE__ , ## _a )
#else
#define SH_VLOG(_f, _a...)
@@ -66,19 +66,27 @@ struct shadow_status {
#if 0
#define SH_VVLOG(_f, _a...) \
- printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
+ printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
current->domain , __LINE__ , ## _a )
#else
#define SH_VVLOG(_f, _a...)
#endif
-
/************************************************************************/
static inline void mark_dirty( struct mm_struct *m, unsigned int mfn )
{
- unsigned int pfn = machine_to_phys_mapping[mfn];
+ unsigned int pfn;
+
+ pfn = machine_to_phys_mapping[mfn];
+
+ /* We use values with the top bit set to mark MFNs that aren't
+ really part of the domain's psuedo-physical memory map e.g.
+ the shared info frame. Nothing to do here...
+ */
+ if ( unlikely(pfn & 0x80000000U) ) return;
+
ASSERT(m->shadow_dirty_bitmap);
if( likely(pfn<m->shadow_dirty_bitmap_size) )
{
@@ -91,7 +99,14 @@ static inline void mark_dirty( struct mm_struct *m, unsigned int mfn )
}
else
{
- SH_LOG("mark_dirty pfn out of range attempt!");
+ extern void show_traceX(void);
+ SH_LOG("mark_dirty OOR! mfn=%x pfn=%x max=%x (mm %p)",
+ mfn, pfn, m->shadow_dirty_bitmap_size, m );
+ SH_LOG("dom=%lld caf=%08x taf=%08x\n",
+ frame_table[mfn].u.domain->domain,
+ frame_table[mfn].count_and_flags,
+ frame_table[mfn].type_and_flags );
+ //show_traceX();
}
}
@@ -116,7 +131,7 @@ static inline void l1pte_write_fault( struct mm_struct *m,
spte = gpte;
gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
- mark_dirty( m, gpte >> PAGE_SHIFT );
+ mark_dirty( m, (gpte >> PAGE_SHIFT) );
break;
}
@@ -343,7 +358,7 @@ static inline unsigned long get_shadow_status( struct mm_struct *m,
if( m->shadow_mode == SHM_logdirty )
mark_dirty( m, gpfn );
-
+
spin_lock(&m->shadow_lock);
res = __shadow_status( m, gpfn );
if (!res) spin_unlock(&m->shadow_lock);
diff --git a/xen/net/dev.c b/xen/net/dev.c
index 5ab01092f0..0252568131 100644
--- a/xen/net/dev.c
+++ b/xen/net/dev.c
@@ -547,6 +547,9 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
goto out;
}
+ machine_to_phys_mapping[new_page - frame_table] =
+ machine_to_phys_mapping[old_page - frame_table];
+
if ( p->mm.shadow_mode &&
(spte_pfn=get_shadow_status(&p->mm, pte_page-frame_table)) )
{
@@ -557,17 +560,15 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
*sptr = new_pte;
unmap_domain_mem(sptr);
- if( p->mm.shadow_mode == SHM_logdirty )
- mark_dirty( &p->mm, new_page-frame_table );
-
put_shadow_status(&p->mm);
}
-
- machine_to_phys_mapping[new_page - frame_table]
- = machine_to_phys_mapping[old_page - frame_table];
unmap_domain_mem(ptep);
+ /* if in shadow mode, mark the buffer as dirty */
+ if( p->mm.shadow_mode == SHM_logdirty )
+ mark_dirty( &p->mm, (new_page-frame_table) );
+
/* Updates must happen before releasing the descriptor. */
smp_wmb();
@@ -2143,8 +2144,6 @@ static void get_rx_bufs(net_vif_t *vif)
put_page_and_type(pte_page);
make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
goto rx_unmap_and_continue;
-
- /* XXX IAP should SHADOW_CONFIG do something here? */
}
/*
@@ -2156,9 +2155,11 @@ static void get_rx_bufs(net_vif_t *vif)
0) !=
(PGC_allocated | PGC_tlb_flush_on_type_change | 2)) )
{
- DPRINTK("Page held more than once %08x %s\n",
+ DPRINTK("Page held more than once mfn=%x %08x %s\n",
+ buf_page-frame_table,
buf_page->count_and_flags,
(buf_page->u.domain)?buf_page->u.domain->name:"None");
+
if ( !get_page_type(buf_page, PGT_writeable_page) )
put_page(buf_page);
else if ( cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) !=
@@ -2264,6 +2265,13 @@ long flush_bufs_for_vif(net_vif_t *vif)
put_page_and_type(&frame_table[rx->pte_ptr >> PAGE_SHIFT]);
+ /* if in shadow mode, mark the PTE as dirty */
+ if( p->mm.shadow_mode == SHM_logdirty )
+ mark_dirty( &p->mm, rx->pte_ptr>>PAGE_SHIFT );
+ /* assume the shadow page table is about to be blown away,
+ and that its not worth marking the buffer as dirty */
+
+
make_rx_response(vif, rx->id, 0, RING_STATUS_DROPPED, 0);
}
vif->rx_cons = i;
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c
index d00dd98f7b..43a6a23479 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c
@@ -527,8 +527,6 @@ static void reset_xlblk_interface(void)
{
block_io_op_t op;
- nr_pending = 0;
-
op.cmd = BLOCK_IO_OP_RESET;
if ( HYPERVISOR_block_io_op(&op) != 0 )
printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
@@ -549,6 +547,8 @@ int __init xlblk_init(void)
{
int error;
+ nr_pending = 0;
+
reset_xlblk_interface();
xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c
index ccda2c2022..2fc577061e 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c
@@ -103,12 +103,12 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
if (msg[j].va + (msg[j].npages<<PAGE_SHIFT) > vma->vm_end)
return -EINVAL;
- if (rc = direct_remap_area_pages(vma->vm_mm,
+ if ( (rc = direct_remap_area_pages(vma->vm_mm,
msg[j].va&PAGE_MASK,
msg[j].mfn<<PAGE_SHIFT,
msg[j].npages<<PAGE_SHIFT,
vma->vm_page_prot,
- mmapcmd.dom))
+ mmapcmd.dom)) <0)
return rc;
}
}
@@ -116,6 +116,91 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
}
break;
+ case IOCTL_PRIVCMD_MMAPBATCH:
+ {
+#define MAX_DIRECTMAP_MMU_QUEUE 130
+ mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
+ privcmd_mmapbatch_t m;
+ struct vm_area_struct *vma = NULL;
+ unsigned long *p, addr;
+ unsigned long mfn;
+ int i;
+
+ if ( copy_from_user(&m, (void *)data, sizeof(m)) )
+ { ret = -EFAULT; goto batch_err; }
+
+ vma = find_vma( current->mm, m.addr );
+
+ if (!vma)
+ { ret = -EINVAL; goto batch_err; }
+
+ if (m.addr > PAGE_OFFSET)
+ { ret = -EFAULT; goto batch_err; }
+
+ if (m.addr + (m.num<<PAGE_SHIFT) > vma->vm_end)
+ { ret = -EFAULT; goto batch_err; }
+
+ // everything fits inside the vma
+
+//printk("direct_r_a_p sx=%ld address=%lx macaddr=%lx dom=%lld\n",size,address,machine_addr,domid);
+// memset( u, 0, sizeof(mmu_update_t)*MAX_DIRECTMAP_MMU_QUEUE );// XXX
+
+
+ if ( m.dom != 0 )
+ {
+ u[0].val = (unsigned long)(m.dom<<16) & ~0xFFFFUL;
+ u[0].ptr = (unsigned long)(m.dom<< 0) & ~0xFFFFUL;
+ u[1].val = (unsigned long)(m.dom>>16) & ~0xFFFFUL;
+ u[1].ptr = (unsigned long)(m.dom>>32) & ~0xFFFFUL;
+ u[0].ptr |= MMU_EXTENDED_COMMAND;
+ u[0].val |= MMUEXT_SET_SUBJECTDOM_L;
+ u[1].ptr |= MMU_EXTENDED_COMMAND;
+ u[1].val |= MMUEXT_SET_SUBJECTDOM_H;
+ v = w = &u[2];
+ }
+ else
+ {
+ v = w = &u[0];
+ }
+
+ p = m.arr;
+ addr = m.addr;
+//printk("BATCH: arr=%p addr=%lx num=%d u=%p,w=%p\n",p,addr,m.num,u,w);
+ for (i=0; i<m.num; i++, addr+=PAGE_SIZE, p++)
+ {
+ unsigned int count;
+ if ( get_user(mfn, p) ) return -EFAULT;
+
+ v->val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot) |
+ _PAGE_IO;
+
+ __direct_remap_area_pages( vma->vm_mm,
+ addr,
+ PAGE_SIZE,
+ v);
+ v++;
+ count = v-u;
+//printk("Q i=%d mfn=%x co=%d v=%p : %lx %lx\n",i,mfn,count,v, w->val,w->ptr);
+
+ if ( HYPERVISOR_mmu_update(u, &count) < 0 )
+ {
+ //printk("Fail %d->%d mfn=%lx\n",v-u,count, w->val);
+ put_user( 0xe0000000 | mfn, p );
+ }
+ v=w;
+ }
+ ret = 0;
+ break;
+
+ batch_err:
+ printk("batch_err ret=%d vma=%p addr=%lx num=%d arr=%lx %lx-%lx\n",
+ ret, vma, m.addr, m.num, m.arr, vma->vm_start, vma->vm_end);
+ break;
+ }
+ break;
+
+
+
default:
ret = -EINVAL;
break;
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c
index ef54ff7fe9..daa8441d7b 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c
@@ -248,6 +248,8 @@ static void network_alloc_rx_buffers(struct net_device *dev)
np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr =
virt_to_machine(get_ppte(skb->head));
+ /* Shadow optimisation: disown this page from p->m map */
+ phys_to_machine_mapping[virt_to_phys(skb->head)>>PAGE_SHIFT] = 0x80000004;
np->rx_bufs_to_notify++;
}
while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE );
@@ -364,6 +366,9 @@ static inline void _network_interrupt(struct net_device *dev)
skb = np->rx_skbs[rx->id];
ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
+ phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
+ (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
+
if ( unlikely(rx->status != RING_STATUS_OK) )
{
/* Gate this error. We get a (valid) slew of them on suspend. */
@@ -382,9 +387,6 @@ static inline void _network_interrupt(struct net_device *dev)
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->frag_list = NULL;
- phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
- (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
-
skb->data = skb->tail = skb->head + rx->offset;
skb_put(skb, rx->size);
skb->protocol = eth_type_trans(skb, dev);
diff --git a/xenolinux-2.4.26-sparse/arch/xen/kernel/setup.c b/xenolinux-2.4.26-sparse/arch/xen/kernel/setup.c
index 6be85db7f1..b06c6c26b0 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/kernel/setup.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/setup.c
@@ -1161,11 +1161,11 @@ static void stop_task(void *unused)
virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
suspend_record->nr_pfns = max_pfn;
- j = 0;
- for ( i = 0; i < max_pfn; i += (PAGE_SIZE / sizeof(unsigned long)) )
- pfn_to_mfn_frame_list[j++] =
+ for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
+ {
+ pfn_to_mfn_frame_list[j] =
virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
-
+ }
/*
* NB. This is /not/ a full dev_close() as that loses route information!
* Instead we do essentialy the same as dev_close() but without notifying
@@ -1207,7 +1207,9 @@ static void stop_task(void *unused)
memcpy(&start_info, &suspend_record->resume_info, sizeof(start_info));
set_fixmap(FIX_SHARED_INFO, start_info.shared_info);
+
HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+
memset(empty_zero_page, 0, PAGE_SIZE);
irq_resume();
diff --git a/xenolinux-2.4.26-sparse/arch/xen/kernel/time.c b/xenolinux-2.4.26-sparse/arch/xen/kernel/time.c
index 52920cd0fc..3291a0338d 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/kernel/time.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/time.c
@@ -62,6 +62,7 @@
#include <linux/smp.h>
#include <linux/irq.h>
#include <linux/sysctl.h>
+#include <linux/sysrq.h>
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
extern rwlock_t xtime_lock;
@@ -581,6 +582,10 @@ static void dbg_time_int(int irq, void *dev_id, struct pt_regs *ptregs)
timer->expires,(u32)(t_st>>32), (u32)t_st);
printk(KERN_ALERT "time: processed_system_time=0x%X:%08X\n",
(u32)(processed_system_time>>32), (u32)processed_system_time);
+
+
+ handle_sysrq('t',NULL,NULL,NULL);
+
}
static struct irqaction dbg_time = {
diff --git a/xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c b/xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c
index 0337cae1ca..78dbb9ef23 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c
@@ -317,16 +317,17 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
__asm__ __volatile__ ( "sldt %0" : "=r" (ldt) );
if ( ldt == 0 )
{
- mmu_update_t u;
- u.ptr = MMU_EXTENDED_COMMAND;
- u.ptr |= (unsigned long)&default_ldt[0];
- u.val = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
- if ( unlikely(HYPERVISOR_mmu_update(&u, 1) < 0) )
- {
- show_trace(NULL);
- panic("Failed to install default LDT");
- }
- return;
+ int count = 1;
+ mmu_update_t u;
+ u.ptr = MMU_EXTENDED_COMMAND;
+ u.ptr |= (unsigned long)&default_ldt[0];
+ u.val = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
+ if ( unlikely(HYPERVISOR_mmu_update(&u, &count) < 0) )
+ {
+ show_trace(NULL);
+ panic("Failed to install default LDT");
+ }
+ return;
}
}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c b/xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c
index c6dc710576..daa5ee1d73 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c
@@ -37,12 +37,13 @@ static void DEBUG_allow_pt_reads(void)
int i;
for ( i = idx-1; i >= 0; i-- )
{
+ int count = 1;
pte = update_debug_queue[i].ptep;
if ( pte == NULL ) continue;
update_debug_queue[i].ptep = NULL;
update.ptr = virt_to_machine(pte);
update.val = update_debug_queue[i].pteval;
- HYPERVISOR_mmu_update(&update, 1);
+ HYPERVISOR_mmu_update(&update, &count);
}
}
static void DEBUG_disallow_pt_read(unsigned long va)
@@ -51,6 +52,7 @@ static void DEBUG_disallow_pt_read(unsigned long va)
pmd_t *pmd;
pgd_t *pgd;
unsigned long pteval;
+ int count = 1;
/*
* We may fault because of an already outstanding update.
* That's okay -- it'll get fixed up in the fault handler.
@@ -62,7 +64,7 @@ static void DEBUG_disallow_pt_read(unsigned long va)
update.ptr = virt_to_machine(pte);
pteval = *(unsigned long *)pte;
update.val = pteval & ~_PAGE_PRESENT;
- HYPERVISOR_mmu_update(&update, 1);
+ HYPERVISOR_mmu_update(&update, &count);
update_debug_queue[idx].ptep = pte;
update_debug_queue[idx].pteval = pteval;
}
@@ -100,7 +102,7 @@ void MULTICALL_flush_page_update_queue(void)
wmb(); /* Make sure index is cleared first to avoid double updates. */
queue_multicall2(__HYPERVISOR_mmu_update,
(unsigned long)update_queue,
- _idx);
+ &_idx);
}
spin_unlock_irqrestore(&update_lock, flags);
}
@@ -116,7 +118,7 @@ static inline void __flush_page_update_queue(void)
#endif
idx = 0;
wmb(); /* Make sure index is cleared first to avoid double updates. */
- if ( unlikely(HYPERVISOR_mmu_update(update_queue, _idx) < 0) )
+ if ( unlikely(HYPERVISOR_mmu_update(update_queue, &_idx) < 0) )
panic("Failed to execute MMU updates");
}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c b/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c
index 28a0a4071a..dbe706bb95 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c
@@ -27,16 +27,12 @@
#define direct_mk_pte_phys(physpage, pgprot) \
__direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
-static inline int direct_remap_area_pte(pte_t *pte,
+static inline void direct_remap_area_pte(pte_t *pte,
unsigned long address,
unsigned long size,
- unsigned long machine_addr,
- pgprot_t prot,
- domid_t domid)
+ mmu_update_t **v)
{
unsigned long end;
-#define MAX_DIRECTMAP_MMU_QUEUE 130
- mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v, *w;
address &= ~PMD_MASK;
end = address + size;
@@ -45,95 +41,55 @@ static inline int direct_remap_area_pte(pte_t *pte,
if (address >= end)
BUG();
- /* If not I/O mapping then specify General-Purpose Subject Domain (GPS). */
- if ( domid != 0 )
- {
- u[0].val = (unsigned long)(domid<<16) & ~0xFFFFUL;
- u[0].ptr = (unsigned long)(domid<< 0) & ~0xFFFFUL;
- u[1].val = (unsigned long)(domid>>16) & ~0xFFFFUL;
- u[1].ptr = (unsigned long)(domid>>32) & ~0xFFFFUL;
- u[0].ptr |= MMU_EXTENDED_COMMAND;
- u[0].val |= MMUEXT_SET_SUBJECTDOM_L;
- u[1].ptr |= MMU_EXTENDED_COMMAND;
- u[1].val |= MMUEXT_SET_SUBJECTDOM_H;
- v = w = &u[2];
- }
- else
- {
- v = w = &u[0];
- }
-
do {
- if ( (v-u) == MAX_DIRECTMAP_MMU_QUEUE )
- {
- if ( HYPERVISOR_mmu_update(u, MAX_DIRECTMAP_MMU_QUEUE) < 0 )
- return -EFAULT;
- v = w;
- }
-#if 0 /* thanks to new ioctl mmaping interface this is no longer a bug */
+#if 0 // XXX
if (!pte_none(*pte)) {
printk("direct_remap_area_pte: page already exists\n");
BUG();
}
#endif
- v->ptr = virt_to_machine(pte);
- v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot) | _PAGE_IO;
- v++;
+ (*v)->ptr = virt_to_machine(pte);
+ (*v)++;
address += PAGE_SIZE;
- machine_addr += PAGE_SIZE;
pte++;
} while (address && (address < end));
-
- if ( ((v-w) != 0) && (HYPERVISOR_mmu_update(u, v-u) < 0) )
- return -EFAULT;
-
- return 0;
+ return ;
}
static inline int direct_remap_area_pmd(struct mm_struct *mm,
pmd_t *pmd,
unsigned long address,
unsigned long size,
- unsigned long machine_addr,
- pgprot_t prot,
- domid_t domid)
+ mmu_update_t **v)
{
- int error = 0;
unsigned long end;
address &= ~PGDIR_MASK;
end = address + size;
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
- machine_addr -= address;
if (address >= end)
BUG();
do {
pte_t * pte = pte_alloc(mm, pmd, address);
if (!pte)
return -ENOMEM;
- error = direct_remap_area_pte(pte, address, end - address,
- address + machine_addr, prot, domid);
- if ( error )
- break;
+ direct_remap_area_pte(pte, address, end - address, v);
+
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address && (address < end));
- return error;
+ return 0;
}
-int direct_remap_area_pages(struct mm_struct *mm,
- unsigned long address,
- unsigned long machine_addr,
- unsigned long size,
- pgprot_t prot,
- domid_t domid)
+int __direct_remap_area_pages(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long size,
+ mmu_update_t *v)
{
- int error = 0;
pgd_t * dir;
unsigned long end = address + size;
- machine_addr -= address;
dir = pgd_offset(mm, address);
flush_cache_all();
if (address >= end)
@@ -141,21 +97,89 @@ int direct_remap_area_pages(struct mm_struct *mm,
spin_lock(&mm->page_table_lock);
do {
pmd_t *pmd = pmd_alloc(mm, dir, address);
- error = -ENOMEM;
if (!pmd)
- break;
- error = direct_remap_area_pmd(mm, pmd, address, end - address,
- machine_addr + address, prot, domid);
- if (error)
- break;
+ return -ENOMEM;
+ direct_remap_area_pmd(mm, pmd, address, end - address, &v);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
+
} while (address && (address < end));
spin_unlock(&mm->page_table_lock);
flush_tlb_all();
- return error;
+ return 0;
}
+
+int direct_remap_area_pages(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long machine_addr,
+ unsigned long size,
+ pgprot_t prot,
+ domid_t domid)
+{
+ int i, count;
+ unsigned long start_address;
+#define MAX_DIRECTMAP_MMU_QUEUE 130
+ mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
+
+ if ( domid != 0 )
+ {
+ u[0].val = (unsigned long)(domid<<16) & ~0xFFFFUL;
+ u[0].ptr = (unsigned long)(domid<< 0) & ~0xFFFFUL;
+ u[1].val = (unsigned long)(domid>>16) & ~0xFFFFUL;
+ u[1].ptr = (unsigned long)(domid>>32) & ~0xFFFFUL;
+ u[0].ptr |= MMU_EXTENDED_COMMAND;
+ u[0].val |= MMUEXT_SET_SUBJECTDOM_L;
+ u[1].ptr |= MMU_EXTENDED_COMMAND;
+ u[1].val |= MMUEXT_SET_SUBJECTDOM_H;
+ v = w = &u[2];
+ }
+ else
+ {
+ v = w = &u[0];
+ }
+
+ start_address = address;
+
+ for(i=0; i<size;
+ i+=PAGE_SIZE, machine_addr+=PAGE_SIZE, address+=PAGE_SIZE, v++)
+ {
+ if( (v-u) == MAX_DIRECTMAP_MMU_QUEUE )
+ {
+ /* get the ptep's filled in */
+ __direct_remap_area_pages( mm,
+ start_address,
+ address-start_address,
+ w);
+
+ count = v-u;
+ if ( HYPERVISOR_mmu_update(u, &count) < 0 )
+ return -EFAULT;
+ v=w;
+ start_address = address;
+ }
+
+ /* fill in the machine addresses */
+ v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot) | _PAGE_IO;
+ }
+
+ if(v!=w)
+ {
+ /* get the ptep's filled in */
+ __direct_remap_area_pages( mm,
+ start_address,
+ address-start_address,
+ w);
+ count = v-u;
+ if ( HYPERVISOR_mmu_update(u, &count) < 0 )
+ return -EFAULT;
+
+ }
+
+ return 0;
+}
+
+
#endif /* CONFIG_XEN_PRIVILEGED_GUEST */
diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h b/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h
index c454728c0e..e8b2bc40b0 100644
--- a/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h
+++ b/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h
@@ -153,7 +153,7 @@ static inline int HYPERVISOR_set_trap_table(trap_info_t *table)
return ret;
}
-static inline int HYPERVISOR_mmu_update(mmu_update_t *req, int count)
+static inline int HYPERVISOR_mmu_update(mmu_update_t *req, int *count)
{
int ret;
__asm__ __volatile__ (
diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h b/xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h
index d853a3f2af..143beeeef5 100644
--- a/xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h
+++ b/xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h
@@ -276,4 +276,11 @@ extern int direct_remap_area_pages(struct mm_struct *mm,
pgprot_t prot,
domid_t domid);
+extern int __direct_remap_area_pages(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long size,
+ mmu_update_t *v);
+
+
+
#endif /* _I386_PGALLOC_H */
diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h b/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h
index 3bf03c6064..08e452de15 100644
--- a/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h
+++ b/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h
@@ -25,6 +25,13 @@ typedef struct privcmd_mmap {
privcmd_mmap_entry_t *entry;
} privcmd_mmap_t;
+typedef struct privcmd_mmapbatch {
+ int num; // number of pages to populate
+ domid_t dom; // target domain
+ unsigned long addr; // virtual address
+ unsigned long *arr; // array of mfns - top nibble set on err
+} privcmd_mmapbatch_t;
+
typedef struct privcmd_blkmsg
{
unsigned long op;
@@ -50,5 +57,7 @@ typedef struct privcmd_blkmsg
_IOC(_IOC_NONE, 'P', 1, 0)
#define IOCTL_PRIVCMD_MMAP \
_IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t))
+#define IOCTL_PRIVCMD_MMAPBATCH \
+ _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmapbatch_t))
#endif /* __PROC_CMD_H__ */