aboutsummaryrefslogtreecommitdiffstats
path: root/xen
diff options
context:
space:
mode:
authoriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-05-20 17:43:25 +0000
committeriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-05-20 17:43:25 +0000
commitb65519b91f0dbc706aa372fa165cdd535e572ab9 (patch)
tree73be7b710de0dcf211eb9b47772a223655fe792f /xen
parentc5d63dfc853a34d5af27b54d4456fbff11bf9cd8 (diff)
parent6bf53ef08d47db840c3d18ed5eb0e1fcf1dda730 (diff)
downloadxen-b65519b91f0dbc706aa372fa165cdd535e572ab9.tar.gz
xen-b65519b91f0dbc706aa372fa165cdd535e572ab9.tar.bz2
xen-b65519b91f0dbc706aa372fa165cdd535e572ab9.zip
bitkeeper revision 1.917 (40acee3d26HD4ugLPjY-eN66o0rNIg)
Merge labyrinth.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk into labyrinth.cl.cam.ac.uk:/auto/groups/xeno/users/iap10/xeno-clone/xeno.bk
Diffstat (limited to 'xen')
-rw-r--r--xen/common/dom0_ops.c22
-rw-r--r--xen/common/domain.c10
-rw-r--r--xen/common/schedule.c5
-rw-r--r--xen/common/shadow.c145
-rw-r--r--xen/common/trace.c2
-rw-r--r--xen/include/hypervisor-ifs/dom0_ops.h7
-rw-r--r--xen/include/hypervisor-ifs/trace.h5
-rw-r--r--xen/include/xen/trace.h20
8 files changed, 160 insertions, 56 deletions
diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c
index 32fa532c9a..780e76e638 100644
--- a/xen/common/dom0_ops.c
+++ b/xen/common/dom0_ops.c
@@ -22,8 +22,8 @@
#include <hypervisor-ifs/sched_ctl.h>
-#define TRC_DOM0OP_START_BASE 0x00020000
-#define TRC_DOM0OP_FINISH_BASE 0x00030000
+#define TRC_DOM0OP_ENTER_BASE 0x00020000
+#define TRC_DOM0OP_LEAVE_BASE 0x00030000
extern unsigned int alloc_new_dom_mem(struct task_struct *, unsigned int);
@@ -64,7 +64,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
return -EACCES;
}
- TRACE_5D( TRC_DOM0OP_START_BASE + op->cmd,
+ TRACE_5D( TRC_DOM0OP_ENTER_BASE + op->cmd,
0, op->u.dummy[0], op->u.dummy[1], op->u.dummy[2], op->u.dummy[3] );
switch ( op->cmd )
@@ -102,6 +102,20 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
case DOM0_STOPDOMAIN:
{
ret = stop_other_domain(op->u.stopdomain.domain);
+
+ /* This is grim, but helps for live migrate. It's also unsafe
+ in the strict sense as we're not explicitly setting a
+ timeout, but dom0 is bound to have other timers going off to
+ wake us back up.
+ We go to sleep so that the other domain can stop quicker, hence
+ we have less total down time in a migrate.
+ */
+ if( ret == 0 && op->u.stopdomain.sync == 1 )
+ {
+ extern long do_block( void );
+ printk("T\n");
+ do_block(); // Yuk...
+ }
}
break;
@@ -668,7 +682,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
}
- TRACE_5D( TRC_DOM0OP_FINISH_BASE + op->cmd, ret,
+ TRACE_5D( TRC_DOM0OP_LEAVE_BASE + op->cmd, ret,
op->u.dummy[0], op->u.dummy[1], op->u.dummy[2], op->u.dummy[3] );
diff --git a/xen/common/domain.c b/xen/common/domain.c
index ee11f20fcd..52becabf5c 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -266,6 +266,16 @@ void stop_domain(void)
unlazy_fpu(current);
wmb(); /* All CPUs must see saved info in state TASK_STOPPED. */
set_current_state(TASK_STOPPED);
+
+ /* OK, this is grim, but helps speed up live migrate. When a domain stops,
+ kick Dom0 */
+ {
+ struct task_struct *p;
+ printk("S\n");
+ guest_schedule_to_run( p = find_domain_by_id(0ULL) );
+ put_task_struct(p);
+ }
+
__enter_scheduler();
}
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 5aa12bf8cb..b5e4219d6c 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -27,6 +27,9 @@
#include <xen/perfc.h>
#include <xen/sched-if.h>
#include <hypervisor-ifs/sched_ctl.h>
+
+#undef TRACE_BUFFER
+
#include <xen/trace.h>
/*#define WAKEUP_HISTO*/
@@ -216,7 +219,7 @@ void wake_up(struct task_struct *p)
/*
* Block the currently-executing domain until a pertinent event occurs.
*/
-static long do_block(void)
+long do_block(void)
{
ASSERT(current->domain != IDLE_DOMAIN_ID);
current->shared_info->vcpu_data[0].evtchn_upcall_mask = 0;
diff --git a/xen/common/shadow.c b/xen/common/shadow.c
index 216c3deda1..f222419b25 100644
--- a/xen/common/shadow.c
+++ b/xen/common/shadow.c
@@ -109,43 +109,68 @@ static void __free_shadow_table( struct mm_struct *m )
SH_LOG("Free shadow table. Freed= %d",free);
}
-static inline int shadow_page_op( struct mm_struct *m, unsigned int op,
- struct pfn_info *spfn_info )
+
+#define TABLE_OP_ZERO_L2 1
+#define TABLE_OP_ZERO_L1 2
+#define TABLE_OP_FREE_L1 3
+
+static inline int shadow_page_op( struct mm_struct *m, unsigned int op,
+ unsigned int gpfn,
+ struct pfn_info *spfn_info, int *work )
{
- int work = 0;
unsigned int spfn = spfn_info-frame_table;
+ int restart = 0;
switch( op )
{
- case DOM0_SHADOW_CONTROL_OP_CLEAN:
- {
- int i;
- if ( (spfn_info->type_and_flags & PGT_type_mask) ==
+ case TABLE_OP_ZERO_L2:
+ {
+ if ( (spfn_info->type_and_flags & PGT_type_mask) ==
+ PGT_l2_page_table )
+ {
+ unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
+ memset( spl1e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*spl1e) );
+ unmap_domain_mem( spl1e );
+ }
+ }
+ break;
+
+ case TABLE_OP_ZERO_L1:
+ {
+ if ( (spfn_info->type_and_flags & PGT_type_mask) ==
PGT_l1_page_table )
- {
- unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
-
- for (i=0;i<ENTRIES_PER_L1_PAGETABLE;i++)
- {
- if ( (spl1e[i] & _PAGE_PRESENT ) && (spl1e[i] & _PAGE_RW) )
- {
- work++;
- spl1e[i] &= ~_PAGE_RW;
- }
- }
- unmap_domain_mem( spl1e );
- }
+ {
+ unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
+ memset( spl1e, 0, ENTRIES_PER_L1_PAGETABLE * sizeof(*spl1e) );
+ unmap_domain_mem( spl1e );
+ }
}
break;
+ case TABLE_OP_FREE_L1:
+ {
+ if ( (spfn_info->type_and_flags & PGT_type_mask) ==
+ PGT_l1_page_table )
+ {
+ // lock is already held
+ delete_shadow_status( m, gpfn );
+ restart = 1; // we need to go to start of list again
+ }
+ }
+
+ break;
+
+ default:
+ BUG();
+
}
- return work;
+ return restart;
}
static void __scan_shadow_table( struct mm_struct *m, unsigned int op )
{
int j, work=0;
- struct shadow_status *a;
+ struct shadow_status *a, *next;
// the code assumes you're not using the page tables i.e.
// the domain is stopped and cr3 is something else!!
@@ -156,16 +181,25 @@ static void __scan_shadow_table( struct mm_struct *m, unsigned int op )
for(j=0;j<shadow_ht_buckets;j++)
{
- a = &m->shadow_ht[j];
+ retry:
+ a = &m->shadow_ht[j];
+ next = a->next;
if (a->pfn)
{
- work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
+ if ( shadow_page_op( m, op, a->pfn,
+ &frame_table[a->spfn_and_flags & PSH_pfn_mask],
+ &work ) )
+ goto retry;
}
- a=a->next;
+ a=next;
while(a)
{
- work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
- a=a->next;
+ next = a->next;
+ if ( shadow_page_op( m, op, a->pfn,
+ &frame_table[a->spfn_and_flags & PSH_pfn_mask],
+ &work ) )
+ goto retry;
+ a=next;
}
shadow_audit(m,0);
}
@@ -301,16 +335,29 @@ static int shadow_mode_table_op( struct task_struct *p,
switch(op)
{
case DOM0_SHADOW_CONTROL_OP_FLUSH:
- __free_shadow_table( m );
+ // XXX THIS IS VERY DANGEROUS : MUST ENSURE THE PTs ARE NOT IN USE ON
+ // OTHER CPU -- fix when we get sched sync pause.
+ __free_shadow_table( m );
break;
- case DOM0_SHADOW_CONTROL_OP_CLEAN:
+ case DOM0_SHADOW_CONTROL_OP_CLEAN: // zero all-non hypervisor
+ {
+ __scan_shadow_table( m, TABLE_OP_ZERO_L2 );
+ __scan_shadow_table( m, TABLE_OP_ZERO_L1 );
+
+ goto send_bitmap;
+ }
+
+
+ case DOM0_SHADOW_CONTROL_OP_CLEAN2: // zero all L2, free L1s
{
int i,j,zero=1;
- __scan_shadow_table( m, op );
- // __free_shadow_table( m );
-
+ __scan_shadow_table( m, TABLE_OP_ZERO_L2 );
+ __scan_shadow_table( m, TABLE_OP_FREE_L1 );
+
+ send_bitmap:
+
if( p->tot_pages > sc->pages ||
!sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap )
{
@@ -350,6 +397,38 @@ static int shadow_mode_table_op( struct task_struct *p,
break;
}
+
+ case DOM0_SHADOW_CONTROL_OP_PEEK:
+ {
+ int i;
+
+ if( p->tot_pages > sc->pages ||
+ !sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ sc->pages = p->tot_pages;
+
+#define chunk (8*1024) // do this in 1KB chunks for L1 cache
+
+ for(i=0;i<p->tot_pages;i+=chunk)
+ {
+ int bytes = (( ((p->tot_pages-i) > (chunk))?
+ (chunk):(p->tot_pages-i) ) + 7) / 8;
+
+ copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
+ p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
+ bytes );
+ }
+
+ break;
+ }
+
+ default:
+ BUG();
+
}
@@ -386,7 +465,7 @@ int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc )
if(p->mm.shadow_mode) shadow_mode_disable(p);
shadow_mode_enable(p, SHM_logdirty);
}
- else if ( p->mm.shadow_mode && cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH && cmd<=DOM0_SHADOW_CONTROL_OP_CLEAN )
+ else if ( p->mm.shadow_mode && cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH && cmd<=DOM0_SHADOW_CONTROL_OP_CLEAN2 )
{
rc = shadow_mode_table_op(p, sc);
}
diff --git a/xen/common/trace.c b/xen/common/trace.c
index 0140e7444a..e79c97ee54 100644
--- a/xen/common/trace.c
+++ b/xen/common/trace.c
@@ -27,7 +27,6 @@
#include <xen/sched.h>
#include <xen/slab.h>
#include <xen/smp.h>
-#include <xen/spinlock.h>
#include <xen/trace.h>
#include <xen/errno.h>
#include <asm/atomic.h>
@@ -86,7 +85,6 @@ void init_trace_bufs(void)
/* For use in Xen. */
buf->vdata = (struct t_rec *)(buf+1);
buf->head_ptr = buf->vdata;
- spin_lock_init(&buf->lock);
/* For use in user space. */
buf->data = (struct t_rec *)__pa(buf->vdata);
diff --git a/xen/include/hypervisor-ifs/dom0_ops.h b/xen/include/hypervisor-ifs/dom0_ops.h
index 879a728bd4..2a17605bf2 100644
--- a/xen/include/hypervisor-ifs/dom0_ops.h
+++ b/xen/include/hypervisor-ifs/dom0_ops.h
@@ -74,6 +74,9 @@ typedef struct dom0_stopdomain_st
{
/* IN parameters. */
domid_t domain;
+ /* hack to indicate that you want to wait for other domain -- replace
+ with proper sychronous stop soon! */
+ int sync;
} dom0_stopdomain_t;
#define DOM0_GETDOMAININFO 12
@@ -236,8 +239,10 @@ typedef struct dom0_sched_id_st
#define DOM0_SHADOW_CONTROL_OP_OFF 0
#define DOM0_SHADOW_CONTROL_OP_ENABLE_TEST 1
#define DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY 2
-#define DOM0_SHADOW_CONTROL_OP_FLUSH 10
+#define DOM0_SHADOW_CONTROL_OP_FLUSH 10 /* table ops */
#define DOM0_SHADOW_CONTROL_OP_CLEAN 11
+#define DOM0_SHADOW_CONTROL_OP_PEEK 12
+#define DOM0_SHADOW_CONTROL_OP_CLEAN2 13
typedef struct dom0_shadow_control_st
{
/* IN variables. */
diff --git a/xen/include/hypervisor-ifs/trace.h b/xen/include/hypervisor-ifs/trace.h
index 4d267ba101..d201eceb88 100644
--- a/xen/include/hypervisor-ifs/trace.h
+++ b/xen/include/hypervisor-ifs/trace.h
@@ -20,13 +20,12 @@ struct t_buf {
struct t_rec *data; /* pointer to data area. physical address
* for convenience in user space code */
- unsigned int size; /* size of the data area, in t_recs */
- unsigned int head; /* array index of the most recent record */
+ unsigned long size; /* size of the data area, in t_recs */
+ unsigned long head; /* array index of the most recent record */
#ifdef __KERNEL__
struct t_rec *head_ptr; /* pointer to the head record */
struct t_rec *vdata; /* virtual address pointer to data */
- spinlock_t lock; /* ensure mutually exlusive access (for inserts) */
#endif
/* never add anything here - the kernel stuff must be the last elements */
diff --git a/xen/include/xen/trace.h b/xen/include/xen/trace.h
index 00b18b0211..782023f76e 100644
--- a/xen/include/xen/trace.h
+++ b/xen/include/xen/trace.h
@@ -61,10 +61,12 @@ static inline int trace(u32 event, u32 d1, u32 d2, u32 d3, u32 d4, u32 d5)
if ( !tb_init_done )
return -1;
+
buf = t_bufs[smp_processor_id()];
- rec = buf->head_ptr;
- spin_lock_irqsave(&buf->lock, flags);
+ local_irq_save(flags);
+
+ rec = buf->head_ptr;
rdtscll(rec->cycles);
rec->event = event;
@@ -76,18 +78,12 @@ static inline int trace(u32 event, u32 d1, u32 d2, u32 d3, u32 d4, u32 d5)
wmb(); /* above must be visible before reader sees index updated */
- if ( likely(buf->head_ptr < (buf->vdata + buf->size - 1)) )
- {
- buf->head_ptr++;
- buf->head++;
- }
- else
- {
- buf->head = 0;
+ buf->head_ptr++;
+ buf->head++;
+ if ( buf->head_ptr == (buf->vdata + (buf->size-1)) )
buf->head_ptr = buf->vdata;
- }
- spin_unlock_irqrestore(&buf->lock, flags);
+ local_irq_restore(flags);
return 0;
}