merge?

author: cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk> 2005-08-17 08:27:16 +0000
committer: cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk> 2005-08-17 08:27:16 +0000
commit: f5eca0c88e319bb76137e30a780f66e84225a543 (patch)
tree: a403fe6d5fd24c90b899859501ca445616a842af
parent: 72593432a519af4b6eed451fa4653992eb46755e (diff)
parent: 8b799c9ee64bcc7a9883aca42978c77717f0de67 (diff)
download: xen-f5eca0c88e319bb76137e30a780f66e84225a543.tar.gz
xen-f5eca0c88e319bb76137e30a780f66e84225a543.tar.bz2
xen-f5eca0c88e319bb76137e30a780f66e84225a543.zip
8 files changed, 144 insertions, 91 deletions
diff --git a/tools/examples/network-bridge b/tools/examples/network-bridge
index 24df59809a..bb4055c0f0 100755
--- a/tools/examples/network-bridge
+++ b/tools/examples/network-bridge
@@ -187,6 +187,7 @@ op_start () {
 			ifconfig ${netdev} 0.0.0.0 down
 		fi
 	fi
+	del_addrs ${netdev}
 	ip link set ${netdev} name p${netdev}
 	ip link set veth0 name ${netdev}
 	ifconfig p${netdev} -arp down
diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py
index 72aa2a3f63..1d37f222ed 100644
--- a/tools/python/xen/xm/create.py
+++ b/tools/python/xen/xm/create.py
@@ -380,7 +380,6 @@ def randomMAC():
 
     @return: MAC address string
     """
-    random.seed()
     mac = [ 0xaa, 0x00, 0x00,
             random.randint(0x00, 0x7f),
             random.randint(0x00, 0xff),
@@ -689,6 +688,7 @@ def balloon_out(dom0_min_mem, opts):
     del xc
 
 def main(argv):
+    random.seed()
     opts = gopts
     args = opts.parse(argv)
     if opts.vals.help:
diff --git a/xen/arch/ia64/xenmisc.c b/xen/arch/ia64/xenmisc.c
index 1f2b9c423d..6a79cd9b0b 100644
--- a/xen/arch/ia64/xenmisc.c
+++ b/xen/arch/ia64/xenmisc.c
@@ -280,7 +280,6 @@ void cs01foo(void) {}
 
 unsigned long context_switch_count = 0;
 
-// context_switch
 void context_switch(struct vcpu *prev, struct vcpu *next)
 {
 //printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
@@ -290,22 +289,14 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
 //if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo();
 //printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id);
 #ifdef CONFIG_VTI
-	unsigned long psr;
-	/* Interrupt is enabled after next task is chosen.
-	 * So we have to disable it for stack switch.
-	 */
-	local_irq_save(psr);
 	vtm_domain_out(prev);
-	/* Housekeeping for prev domain */
-#endif // CONFIG_VTI
-
+#endif
 	context_switch_count++;
 	switch_to(prev,next,prev);
 #ifdef CONFIG_VTI
-	/* Post-setup for new domain */
 	 vtm_domain_in(current);
-	local_irq_restore(psr);
-#endif // CONFIG_VTI
+#endif
+
 // leave this debug for now: it acts as a heartbeat when more than
 // one domain is active
 {
@@ -315,25 +306,27 @@ int id = ((struct vcpu *)current)->domain->domain_id & 0xf;
 if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; }
 if (!i--) { printk("+",id); i = 1000000; }
 }
-	clear_bit(_VCPUF_running, &prev->vcpu_flags);
-	//if (!is_idle_task(next->domain) )
-		//send_guest_virq(next, VIRQ_TIMER);
+
 #ifdef CONFIG_VTI
 	if (VMX_DOMAIN(current))
 		vmx_load_all_rr(current);
-	return;
-#else // CONFIG_VTI
+#else
 	if (!is_idle_task(current->domain)) {
 		load_region_regs(current);
 		if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
 	}
 	if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
-#endif // CONFIG_VTI
+#endif
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+	/* nothing to do */
 }
 
 void continue_running(struct vcpu *same)
 {
-    /* nothing to do */
+	/* nothing to do */
 }
 
 void panic_domain(struct pt_regs *regs, const char *fmt, ...)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 598c2095ee..ad8716aac6 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -48,6 +48,8 @@ boolean_param("noreboot", opt_noreboot);
 
 struct percpu_ctxt {
     struct vcpu *curr_vcpu;
+    unsigned int context_not_finalised;
+    unsigned int dirty_segment_mask;
 } __cacheline_aligned;
 static struct percpu_ctxt percpu_ctxt[NR_CPUS];
 
@@ -541,51 +543,59 @@ void toggle_guest_mode(struct vcpu *v)
     __r; })
 
 #if CONFIG_VMX
-#define load_msrs(_p, _n)     if (vmx_switch_on) vmx_load_msrs((_p), (_n))
+#define load_msrs(n)     if (vmx_switch_on) vmx_load_msrs(n)
 #else
-#define load_msrs(_p, _n)     ((void)0)
+#define load_msrs(n)     ((void)0)
 #endif 
 
-static void load_segments(struct vcpu *p, struct vcpu *n)
+/*
+ * save_segments() writes a mask of segments which are dirty (non-zero),
+ * allowing load_segments() to avoid some expensive segment loads and
+ * MSR writes.
+ */
+#define DIRTY_DS           0x01
+#define DIRTY_ES           0x02
+#define DIRTY_FS           0x04
+#define DIRTY_GS           0x08
+#define DIRTY_FS_BASE      0x10
+#define DIRTY_GS_BASE_USER 0x20
+
+static void load_segments(struct vcpu *n)
 {
-    struct vcpu_guest_context *pctxt = &p->arch.guest_context;
     struct vcpu_guest_context *nctxt = &n->arch.guest_context;
     int all_segs_okay = 1;
+    unsigned int dirty_segment_mask, cpu = smp_processor_id();
+
+    /* Load and clear the dirty segment mask. */
+    dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask;
+    percpu_ctxt[cpu].dirty_segment_mask = 0;
 
     /* Either selector != 0 ==> reload. */
-    if ( unlikely(pctxt->user_regs.ds | nctxt->user_regs.ds) )
+    if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) )
         all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);
 
     /* Either selector != 0 ==> reload. */
-    if ( unlikely(pctxt->user_regs.es | nctxt->user_regs.es) )
+    if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) )
         all_segs_okay &= loadsegment(es, nctxt->user_regs.es);
 
     /*
      * Either selector != 0 ==> reload.
      * Also reload to reset FS_BASE if it was non-zero.
      */
-    if ( unlikely(pctxt->user_regs.fs |
-                  pctxt->fs_base |
+    if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |
                   nctxt->user_regs.fs) )
-    {
         all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);
-        if ( pctxt->user_regs.fs ) /* != 0 selector kills fs_base */
-            pctxt->fs_base = 0;
-    }
 
     /*
      * Either selector != 0 ==> reload.
      * Also reload to reset GS_BASE if it was non-zero.
      */
-    if ( unlikely(pctxt->user_regs.gs |
-                  pctxt->gs_base_user |
+    if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |
                   nctxt->user_regs.gs) )
     {
         /* Reset GS_BASE with user %gs? */
-        if ( pctxt->user_regs.gs || !nctxt->gs_base_user )
+        if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user )
             all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
-        if ( pctxt->user_regs.gs ) /* != 0 selector kills gs_base_user */
-            pctxt->gs_base_user = 0;
     }
 
     /* This can only be non-zero if selector is NULL. */
@@ -650,7 +660,9 @@ static void load_segments(struct vcpu *p, struct vcpu *n)
 
 static void save_segments(struct vcpu *v)
 {
-    struct cpu_user_regs *regs = &v->arch.guest_context.user_regs;
+    struct vcpu_guest_context *ctxt = &v->arch.guest_context;
+    struct cpu_user_regs      *regs = &ctxt->user_regs;
+    unsigned int dirty_segment_mask = 0;
 
     if ( VMX_DOMAIN(v) )
         rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs);
@@ -659,18 +671,34 @@ static void save_segments(struct vcpu *v)
     __asm__ __volatile__ ( "movl %%es,%0" : "=m" (regs->es) );
     __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (regs->fs) );
     __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (regs->gs) );
-}
 
-static void clear_segments(void)
-{
-    __asm__ __volatile__ (
-        " movl %0,%%ds; "
-        " movl %0,%%es; "
-        " movl %0,%%fs; "
-        " movl %0,%%gs; "
-        ""safe_swapgs"  "
-        " movl %0,%%gs"
-        : : "r" (0) );
+    if ( regs->ds )
+        dirty_segment_mask |= DIRTY_DS;
+
+    if ( regs->es )
+        dirty_segment_mask |= DIRTY_ES;
+
+    if ( regs->fs )
+    {
+        dirty_segment_mask |= DIRTY_FS;
+        ctxt->fs_base = 0; /* != 0 selector kills fs_base */
+    }
+    else if ( ctxt->fs_base )
+    {
+        dirty_segment_mask |= DIRTY_FS_BASE;
+    }
+
+    if ( regs->gs )
+    {
+        dirty_segment_mask |= DIRTY_GS;
+        ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
+    }
+    else if ( ctxt->gs_base_user )
+    {
+        dirty_segment_mask |= DIRTY_GS_BASE_USER;
+    }
+
+    percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
 }
 
 long do_switch_to_user(void)
@@ -706,10 +734,9 @@ long do_switch_to_user(void)
 
 #elif defined(__i386__)
 
-#define load_segments(_p, _n) ((void)0)
-#define load_msrs(_p, _n)     ((void)0)
-#define save_segments(_p)     ((void)0)
-#define clear_segments()      ((void)0)
+#define load_segments(n) ((void)0)
+#define load_msrs(n)     ((void)0)
+#define save_segments(p) ((void)0)
 
 static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
 {
@@ -726,9 +753,9 @@ static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
 static void __context_switch(void)
 {
     struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
-    unsigned int         cpu = smp_processor_id();
-    struct vcpu  *p = percpu_ctxt[cpu].curr_vcpu;
-    struct vcpu  *n = current;
+    unsigned int          cpu = smp_processor_id();
+    struct vcpu          *p = percpu_ctxt[cpu].curr_vcpu;
+    struct vcpu          *n = current;
 
     if ( !is_idle_task(p->domain) )
     {
@@ -786,23 +813,31 @@ static void __context_switch(void)
 
 void context_switch(struct vcpu *prev, struct vcpu *next)
 {
-    struct vcpu *realprev;
+    unsigned int cpu = smp_processor_id();
 
-    local_irq_disable();
+    ASSERT(!local_irq_is_enabled());
 
     set_current(next);
 
-    if ( ((realprev = percpu_ctxt[smp_processor_id()].curr_vcpu) == next) || 
-         is_idle_task(next->domain) )
+    if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
     {
-        local_irq_enable();
+        __context_switch();
+        percpu_ctxt[cpu].context_not_finalised = 1;
     }
-    else
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+    unsigned int cpu = smp_processor_id();
+
+    ASSERT(local_irq_is_enabled());
+
+    if ( percpu_ctxt[cpu].context_not_finalised )
     {
-        __context_switch();
+        percpu_ctxt[cpu].context_not_finalised = 0;
+
+        BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
 
-        local_irq_enable();
-        
         if ( VMX_DOMAIN(next) )
         {
             vmx_restore_msrs(next);
@@ -810,19 +845,11 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
         else
         {
             load_LDT(next);
-            load_segments(realprev, next);
-            load_msrs(realprev, next);
+            load_segments(next);
+            load_msrs(next);
         }
     }
 
-    /*
-     * We do this late on because it doesn't need to be protected by the
-     * schedule_lock, and because we want this to be the very last use of
-     * 'prev' (after this point, a dying domain's info structure may be freed
-     * without warning). 
-     */
-    clear_bit(_VCPUF_running, &prev->vcpu_flags);
-
     schedule_tail(next);
     BUG();
 }
@@ -835,12 +862,19 @@ void continue_running(struct vcpu *same)
 
 int __sync_lazy_execstate(void)
 {
-    if ( percpu_ctxt[smp_processor_id()].curr_vcpu == current )
-        return 0;
-    __context_switch();
-    load_LDT(current);
-    clear_segments();
-    return 1;
+    unsigned long flags;
+    int switch_required;
+
+    local_irq_save(flags);
+
+    switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current);
+
+    if ( switch_required )
+        __context_switch();
+
+    local_irq_restore(flags);
+
+    return switch_required;
 }
 
 void sync_lazy_execstate_cpu(unsigned int cpu)
diff --git a/xen/arch/x86/vmx.c b/xen/arch/x86/vmx.c
index bf9fc928df..f7c39b8346 100644
--- a/xen/arch/x86/vmx.c
+++ b/xen/arch/x86/vmx.c
@@ -65,7 +65,7 @@ static u32 msr_data_index[VMX_MSR_COUNT] =
  * are not modified once set for generic domains, we don't save them, 
  * but simply reset them to the values set at percpu_traps_init().
  */
-void vmx_load_msrs(struct vcpu *p, struct vcpu *n)
+void vmx_load_msrs(struct vcpu *n)
 {
     struct msr_state *host_state;
     host_state = &percpu_msr[smp_processor_id()];
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 422f4f48ae..6cf1b7858d 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -474,13 +474,14 @@ static void __enter_scheduler(void)
 
     set_ac_timer(&schedule_data[cpu].s_timer, now + r_time);
 
-    /* Must be protected by the schedule_lock! */
-    set_bit(_VCPUF_running, &next->vcpu_flags);
-
-    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
-
     if ( unlikely(prev == next) )
+    {
+        spin_unlock_irq(&schedule_data[cpu].schedule_lock);
         return continue_running(prev);
+    }
+
+    clear_bit(_VCPUF_running, &prev->vcpu_flags);
+    set_bit(_VCPUF_running, &next->vcpu_flags);
 
     perfc_incrc(sched_ctx);
 
@@ -517,6 +518,10 @@ static void __enter_scheduler(void)
              next->domain->domain_id, next->vcpu_id);
 
     context_switch(prev, next);
+
+    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+
+    context_switch_finalise(next);
 }
 
 /* No locking needed -- pointer comparison is safe :-) */
diff --git a/xen/include/asm-x86/vmx_vmcs.h b/xen/include/asm-x86/vmx_vmcs.h
index 987e4e79e6..49a7d1c0bf 100644
--- a/xen/include/asm-x86/vmx_vmcs.h
+++ b/xen/include/asm-x86/vmx_vmcs.h
@@ -28,10 +28,10 @@ extern int start_vmx(void);
 extern void stop_vmx(void);
 
 #if defined (__x86_64__)
-extern void vmx_load_msrs(struct vcpu *p, struct vcpu *n);
+extern void vmx_load_msrs(struct vcpu *n);
 void vmx_restore_msrs(struct vcpu *d);
 #else
-#define vmx_load_msrs(_p, _n)      ((void)0)
+#define vmx_load_msrs(_n)          ((void)0)
 #define vmx_restore_msrs(_v)       ((void)0)
 #endif
 
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 45e04fc93d..e0ea2f756d 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -258,12 +258,32 @@ extern void sync_lazy_execstate_mask(cpumask_t mask);
 extern void sync_lazy_execstate_all(void);
 extern int __sync_lazy_execstate(void);
 
-/* Called by the scheduler to switch to another vcpu. */
+/*
+ * Called by the scheduler to switch to another VCPU. On entry, although
+ * VCPUF_running is no longer asserted for @prev, its context is still running
+ * on the local CPU and is not committed to memory. The local scheduler lock
+ * is therefore still held, and interrupts are disabled, because the local CPU
+ * is in an inconsistent state.
+ * 
+ * The callee must ensure that the local CPU is no longer running in @prev's
+ * context, and that the context is saved to memory, before returning.
+ * Alternatively, if implementing lazy context switching, it suffices to ensure
+ * that invoking __sync_lazy_execstate() will switch and commit @prev's state.
+ */
 extern void context_switch(
     struct vcpu *prev, 
     struct vcpu *next);
 
-/* Called by the scheduler to continue running the current vcpu. */
+/*
+ * On some architectures (notably x86) it is not possible to entirely load
+ * @next's context with interrupts disabled. These may implement a function to
+ * finalise loading the new context after interrupts are re-enabled. This
+ * function is not given @prev and is not permitted to access it.
+ */
+extern void context_switch_finalise(
+    struct vcpu *next);
+
+/* Called by the scheduler to continue running the current VCPU. */
 extern void continue_running(
     struct vcpu *same);
author	cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>	2005-08-17 08:27:16 +0000
committer	cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>	2005-08-17 08:27:16 +0000
commit	f5eca0c88e319bb76137e30a780f66e84225a543 (patch)
tree	a403fe6d5fd24c90b899859501ca445616a842af
parent	72593432a519af4b6eed451fa4653992eb46755e (diff)
parent	8b799c9ee64bcc7a9883aca42978c77717f0de67 (diff)
download	xen-f5eca0c88e319bb76137e30a780f66e84225a543.tar.gz xen-f5eca0c88e319bb76137e30a780f66e84225a543.tar.bz2 xen-f5eca0c88e319bb76137e30a780f66e84225a543.zip