diff options
-rwxr-xr-x | tools/examples/xc_dom_control.py | 10 | ||||
-rw-r--r-- | tools/xc/lib/xc_linux_restore.c | 7 | ||||
-rw-r--r-- | tools/xc/lib/xc_linux_save.c | 69 | ||||
-rw-r--r-- | tools/xc/py/Xc.c | 30 | ||||
-rw-r--r-- | xen/common/shadow.c | 56 | ||||
-rw-r--r-- | xen/include/hypervisor-ifs/dom0_ops.h | 3 |
6 files changed, 124 insertions, 51 deletions
diff --git a/tools/examples/xc_dom_control.py b/tools/examples/xc_dom_control.py index 2da4ddaf7b..9feab009e3 100755 --- a/tools/examples/xc_dom_control.py +++ b/tools/examples/xc_dom_control.py @@ -136,17 +136,7 @@ elif cmd == 'suspend': pid = int(fd.readline()) os.kill(pid, signal.SIGTERM) - """ - xc.domain_stop( dom=dom ) - XXX - while not xc.domain_getinfo( first_dom=dom, max_doms=1 )[0]['stopped']: - print "Sleep..." - time.sleep(0.001); - """ - rc = xc.linux_save( dom=dom, state_file=file, progress=1) - if rc == 0 : xc.domain_destroy( dom=dom, force=1 ) - else: xc.domain_start( dom=dom ) # sensible for production use elif cmd == 'cpu_bvtslice': if len(sys.argv) < 3: diff --git a/tools/xc/lib/xc_linux_restore.c b/tools/xc/lib/xc_linux_restore.c index d66e22fd0a..4e89b5715f 100644 --- a/tools/xc/lib/xc_linux_restore.c +++ b/tools/xc/lib/xc_linux_restore.c @@ -592,14 +592,11 @@ int xc_linux_restore(int xc_handle, out: - if ( rc != 0 ) // destroy is something went wrong + if ( rc != 0 ) // destroy if something went wrong { if ( dom != 0 ) { - op.cmd = DOM0_DESTROYDOMAIN; - op.u.destroydomain.domain = (domid_t)dom; - op.u.destroydomain.force = 1; - (void)do_dom0_op(xc_handle, &op); + xc_domain_destroy( xc_handle, dom, 1 ); } } diff --git a/tools/xc/lib/xc_linux_save.c b/tools/xc/lib/xc_linux_save.c index 64625c53f6..8bcd207d7f 100644 --- a/tools/xc/lib/xc_linux_save.c +++ b/tools/xc/lib/xc_linux_save.c @@ -95,7 +95,7 @@ int xc_linux_save(int xc_handle, int verbose = flags & XCFLAGS_VERBOSE; int live = flags & XCFLAGS_LIVE; int debug = flags & XCFLAGS_DEBUG; - int sent_last_iter, sent_this_iter, max_iters; + int sent_last_iter, sent_this_iter, skip_this_iter, max_iters; /* Remember if we stopped the guest, so we can restart it on exit. */ int we_stopped_it = 0; @@ -137,8 +137,11 @@ int xc_linux_save(int xc_handle, /* number of pages we're dealing with */ unsigned long nr_pfns; - /* bitmap of pages left to send */ - unsigned long *to_send, *to_fix; + /* bitmap of pages: + - that should be sent this iteration (unless later marked as skip); + - to skip this iteration because already dirty; + - to fixup by sending at the end if not already resent; */ + unsigned long *to_send, *to_skip, *to_fix; int needed_to_fix = 0; int total_sent = 0; @@ -289,7 +292,7 @@ int xc_linux_save(int xc_handle, last_iter = 0; sent_last_iter = 1<<20; // 4GB's worth of pages - max_iters = 9; // limit us to 10 time round loop + max_iters = 19; // limit us to 20 times round loop } else last_iter = 1; @@ -301,12 +304,14 @@ int xc_linux_save(int xc_handle, to_send = malloc( sz ); to_fix = calloc( 1, sz ); + to_skip = malloc( sz ); - if (!to_send || !to_fix) + if (!to_send || !to_fix || !to_skip) { ERROR("Couldn't allocate to_send array"); goto out; } + memset( to_send, 0xff, sz ); if ( mlock( to_send, sz ) ) @@ -314,6 +319,15 @@ int xc_linux_save(int xc_handle, PERROR("Unable to mlock to_send"); return 1; } + + /* (to fix is local only) */ + + if ( mlock( to_skip, sz ) ) + { + PERROR("Unable to mlock to_skip"); + return 1; + } + } @@ -379,6 +393,7 @@ int xc_linux_save(int xc_handle, iter++; sent_this_iter = 0; + skip_this_iter = 0; prev_pc = 0; verbose_printf("Saving memory pages: iter %d 0%%", iter); @@ -392,6 +407,18 @@ int xc_linux_save(int xc_handle, prev_pc = this_pc; } + /* slightly wasteful to peek the whole array evey time, + but this is fast enough for the moment. */ + + if ( !last_iter && + xc_shadow_control( xc_handle, domid, + DOM0_SHADOW_CONTROL_OP_PEEK, + to_skip, nr_pfns ) != nr_pfns ) + { + ERROR("Error peeking shadow bitmap"); + goto out; + } + /* load pfn_type[] with the mfn of all the pages we're doing in this batch. */ @@ -405,15 +432,29 @@ int xc_linux_save(int xc_handle, test_bit(n,to_send), live_mfn_to_pfn_table[live_pfn_to_mfn_table[n]&0xFFFFF]); + if (!last_iter && test_bit(n, to_send) && test_bit(n, to_skip)) + skip_this_iter++; // stats keeping + + if (! ( (test_bit(n, to_send) && !test_bit(n, to_skip)) || + (test_bit(n, to_send) && last_iter) || + (test_bit(n, to_fix) && last_iter) ) ) + continue; - if ( !test_bit(n, to_send ) && - !( last_iter && test_bit(n, to_fix ) ) ) continue; + /* we get here if: + 1. page is marked to_send & hasn't already been re-dirtied + 2. (ignore to_skip in last iteration) + 3. add in pages that still need fixup (net bufs) + */ pfn_batch[batch] = n; pfn_type[batch] = live_pfn_to_mfn_table[n]; if( pfn_type[batch] == 0x80000004 ) { + /* not currently in pusedo-physical map -- set bit + in to_fix that we must send this page in last_iter + unless its sent sooner anyhow */ + set_bit( n, to_fix ); if( iter>1 ) DDPRINTF("Urk! netbuf race: iter %d, pfn %lx. mfn %lx\n", @@ -572,7 +613,8 @@ int xc_linux_save(int xc_handle, total_sent += sent_this_iter; - verbose_printf("\b\b\b\b100%% (%d pages)\n", sent_this_iter ); + verbose_printf("\b\b\b\b100%% (pages sent= %d, skipped= %d )\n", + sent_this_iter, skip_this_iter ); if ( last_iter ) { @@ -604,7 +646,8 @@ int xc_linux_save(int xc_handle, if ( live ) { if ( ( sent_this_iter > (sent_last_iter * 0.95) ) || - (iter >= max_iters) || (sent_this_iter < 10) ) + (iter >= max_iters) || (sent_this_iter < 10) || + (total_sent > nr_pfns*2) ) { DPRINTF("Start last iteration\n"); last_iter = 1; @@ -685,14 +728,6 @@ int xc_linux_save(int xc_handle, munmap(live_shinfo, PAGE_SIZE); out: - /* Restart the domain if we had to stop it to save its state. */ - if ( we_stopped_it ) - { - printf("Restart domain\n"); - op.cmd = DOM0_STARTDOMAIN; - op.u.startdomain.domain = (domid_t)domid; - (void)do_dom0_op(xc_handle, &op); - } if ( pfn_type != NULL ) free(pfn_type); diff --git a/tools/xc/py/Xc.c b/tools/xc/py/Xc.c index 7bb1d877bd..8a6e3b22ef 100644 --- a/tools/xc/py/Xc.c +++ b/tools/xc/py/Xc.c @@ -214,6 +214,7 @@ static PyObject *pyxc_linux_save(PyObject *self, struct hostent *h; struct sockaddr_in s; int sockbufsize; + int rc = -1; int writerfn(void *fd, const void *buf, size_t count) { @@ -257,12 +258,24 @@ static PyObject *pyxc_linux_save(PyObject *self, if ( xc_linux_save(xc->xc_handle, dom, flags, writerfn, (void*)sd) == 0 ) { - close(sd); - Py_INCREF(zero); - return zero; + if ( read( sd, &rc, sizeof(int) ) != sizeof(int) ) + goto serr; + + if ( rc == 0 ) + { + printf("Migration succesful -- destroy local copy\n"); + xc_domain_destroy( xc->xc_handle, dom, 1 ); + close(sd); + Py_INCREF(zero); + return zero; + } + else + errno = rc; } serr: + printf("Migration failed -- restart local copy\n"); + xc_domain_start( xc->xc_handle, dom ); PyErr_SetFromErrno(xc_error); if ( sd >= 0 ) close(sd); return NULL; @@ -355,7 +368,7 @@ static PyObject *pyxc_linux_restore(PyObject *self, struct sockaddr_in s, d, p; socklen_t dlen, plen; int sockbufsize; - int on = 1; + int on = 1, rc = -1; int readerfn(void *fd, void *buf, size_t count) { @@ -413,13 +426,18 @@ static PyObject *pyxc_linux_restore(PyObject *self, sizeof sockbufsize) < 0 ) goto serr; - if ( xc_linux_restore(xc->xc_handle, dom, flags, - readerfn, (void*)sd, &dom) == 0 ) + rc = xc_linux_restore(xc->xc_handle, dom, flags, + readerfn, (void*)sd, &dom); + + write( sd, &rc, sizeof(int) ); + + if (rc == 0) { close(sd); Py_INCREF(zero); return zero; } + errno = rc; serr: PyErr_SetFromErrno(xc_error); diff --git a/xen/common/shadow.c b/xen/common/shadow.c index f82502d2b4..62081df926 100644 --- a/xen/common/shadow.c +++ b/xen/common/shadow.c @@ -110,10 +110,10 @@ static void __free_shadow_table( struct mm_struct *m ) } static inline int shadow_page_op( struct mm_struct *m, unsigned int op, - struct pfn_info *spfn_info ) + struct pfn_info *spfn_info, int *work ) { - int work = 0; unsigned int spfn = spfn_info-frame_table; + int restart = 0; switch( op ) { @@ -129,7 +129,7 @@ static inline int shadow_page_op( struct mm_struct *m, unsigned int op, { if ( (spl1e[i] & _PAGE_PRESENT ) && (spl1e[i] & _PAGE_RW) ) { - work++; + *work++; spl1e[i] &= ~_PAGE_RW; } } @@ -138,14 +138,36 @@ static inline int shadow_page_op( struct mm_struct *m, unsigned int op, } break; + case DOM0_SHADOW_CONTROL_OP_CLEAN2: + { + if ( (spfn_info->type_and_flags & PGT_type_mask) == + PGT_l1_page_table ) + { + delete_shadow_status( m, frame_table-spfn_info ); + restart = 1; // we need to go to start of list again + } + else if ( (spfn_info->type_and_flags & PGT_type_mask) == + PGT_l2_page_table ) + { + unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT ); + memset( spl1e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*spl1e) ); + unmap_domain_mem( spl1e ); + } + else + BUG(); + } + break; + + + } - return work; + return restart; } static void __scan_shadow_table( struct mm_struct *m, unsigned int op ) { int j, work=0; - struct shadow_status *a; + struct shadow_status *a, *next; // the code assumes you're not using the page tables i.e. // the domain is stopped and cr3 is something else!! @@ -156,16 +178,25 @@ static void __scan_shadow_table( struct mm_struct *m, unsigned int op ) for(j=0;j<shadow_ht_buckets;j++) { - a = &m->shadow_ht[j]; + retry: + a = &m->shadow_ht[j]; + next = a->next; if (a->pfn) { - work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] ); + if ( shadow_page_op( m, op, + &frame_table[a->spfn_and_flags & PSH_pfn_mask], + &work ) ) + goto retry; } - a=a->next; + a=next; while(a) { - work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] ); - a=a->next; + next = a->next; + if ( shadow_page_op( m, op, + &frame_table[a->spfn_and_flags & PSH_pfn_mask], + &work ) ) + goto retry; + a=next; } shadow_audit(m,0); } @@ -304,7 +335,8 @@ static int shadow_mode_table_op( struct task_struct *p, __free_shadow_table( m ); break; - case DOM0_SHADOW_CONTROL_OP_CLEAN: + case DOM0_SHADOW_CONTROL_OP_CLEAN: // zero all-non hypervisor + case DOM0_SHADOW_CONTROL_OP_CLEAN2: // zero all L2, free L1s { int i,j,zero=1; @@ -418,7 +450,7 @@ int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc ) if(p->mm.shadow_mode) shadow_mode_disable(p); shadow_mode_enable(p, SHM_logdirty); } - else if ( p->mm.shadow_mode && cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH && cmd<=DOM0_SHADOW_CONTROL_OP_PEEK ) + else if ( p->mm.shadow_mode && cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH && cmd<=DOM0_SHADOW_CONTROL_OP_CLEAN2 ) { rc = shadow_mode_table_op(p, sc); } diff --git a/xen/include/hypervisor-ifs/dom0_ops.h b/xen/include/hypervisor-ifs/dom0_ops.h index 113f6bb02b..58b1480525 100644 --- a/xen/include/hypervisor-ifs/dom0_ops.h +++ b/xen/include/hypervisor-ifs/dom0_ops.h @@ -236,9 +236,10 @@ typedef struct dom0_sched_id_st #define DOM0_SHADOW_CONTROL_OP_OFF 0 #define DOM0_SHADOW_CONTROL_OP_ENABLE_TEST 1 #define DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY 2 -#define DOM0_SHADOW_CONTROL_OP_FLUSH 10 +#define DOM0_SHADOW_CONTROL_OP_FLUSH 10 /* table ops */ #define DOM0_SHADOW_CONTROL_OP_CLEAN 11 #define DOM0_SHADOW_CONTROL_OP_PEEK 12 +#define DOM0_SHADOW_CONTROL_OP_CLEAN2 13 typedef struct dom0_shadow_control_st { /* IN variables. */ |