aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>2005-08-17 08:27:16 +0000
committercl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>2005-08-17 08:27:16 +0000
commitf5eca0c88e319bb76137e30a780f66e84225a543 (patch)
treea403fe6d5fd24c90b899859501ca445616a842af
parent72593432a519af4b6eed451fa4653992eb46755e (diff)
parent8b799c9ee64bcc7a9883aca42978c77717f0de67 (diff)
downloadxen-f5eca0c88e319bb76137e30a780f66e84225a543.tar.gz
xen-f5eca0c88e319bb76137e30a780f66e84225a543.tar.bz2
xen-f5eca0c88e319bb76137e30a780f66e84225a543.zip
merge?
-rwxr-xr-xtools/examples/network-bridge1
-rw-r--r--tools/python/xen/xm/create.py2
-rw-r--r--xen/arch/ia64/xenmisc.c31
-rw-r--r--xen/arch/x86/domain.c156
-rw-r--r--xen/arch/x86/vmx.c2
-rw-r--r--xen/common/schedule.c15
-rw-r--r--xen/include/asm-x86/vmx_vmcs.h4
-rw-r--r--xen/include/xen/sched.h24
8 files changed, 144 insertions, 91 deletions
diff --git a/tools/examples/network-bridge b/tools/examples/network-bridge
index 24df59809a..bb4055c0f0 100755
--- a/tools/examples/network-bridge
+++ b/tools/examples/network-bridge
@@ -187,6 +187,7 @@ op_start () {
ifconfig ${netdev} 0.0.0.0 down
fi
fi
+ del_addrs ${netdev}
ip link set ${netdev} name p${netdev}
ip link set veth0 name ${netdev}
ifconfig p${netdev} -arp down
diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py
index 72aa2a3f63..1d37f222ed 100644
--- a/tools/python/xen/xm/create.py
+++ b/tools/python/xen/xm/create.py
@@ -380,7 +380,6 @@ def randomMAC():
@return: MAC address string
"""
- random.seed()
mac = [ 0xaa, 0x00, 0x00,
random.randint(0x00, 0x7f),
random.randint(0x00, 0xff),
@@ -689,6 +688,7 @@ def balloon_out(dom0_min_mem, opts):
del xc
def main(argv):
+ random.seed()
opts = gopts
args = opts.parse(argv)
if opts.vals.help:
diff --git a/xen/arch/ia64/xenmisc.c b/xen/arch/ia64/xenmisc.c
index 1f2b9c423d..6a79cd9b0b 100644
--- a/xen/arch/ia64/xenmisc.c
+++ b/xen/arch/ia64/xenmisc.c
@@ -280,7 +280,6 @@ void cs01foo(void) {}
unsigned long context_switch_count = 0;
-// context_switch
void context_switch(struct vcpu *prev, struct vcpu *next)
{
//printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
@@ -290,22 +289,14 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
//if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo();
//printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id);
#ifdef CONFIG_VTI
- unsigned long psr;
- /* Interrupt is enabled after next task is chosen.
- * So we have to disable it for stack switch.
- */
- local_irq_save(psr);
vtm_domain_out(prev);
- /* Housekeeping for prev domain */
-#endif // CONFIG_VTI
-
+#endif
context_switch_count++;
switch_to(prev,next,prev);
#ifdef CONFIG_VTI
- /* Post-setup for new domain */
vtm_domain_in(current);
- local_irq_restore(psr);
-#endif // CONFIG_VTI
+#endif
+
// leave this debug for now: it acts as a heartbeat when more than
// one domain is active
{
@@ -315,25 +306,27 @@ int id = ((struct vcpu *)current)->domain->domain_id & 0xf;
if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; }
if (!i--) { printk("+",id); i = 1000000; }
}
- clear_bit(_VCPUF_running, &prev->vcpu_flags);
- //if (!is_idle_task(next->domain) )
- //send_guest_virq(next, VIRQ_TIMER);
+
#ifdef CONFIG_VTI
if (VMX_DOMAIN(current))
vmx_load_all_rr(current);
- return;
-#else // CONFIG_VTI
+#else
if (!is_idle_task(current->domain)) {
load_region_regs(current);
if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
}
if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
-#endif // CONFIG_VTI
+#endif
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+ /* nothing to do */
}
void continue_running(struct vcpu *same)
{
- /* nothing to do */
+ /* nothing to do */
}
void panic_domain(struct pt_regs *regs, const char *fmt, ...)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 598c2095ee..ad8716aac6 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -48,6 +48,8 @@ boolean_param("noreboot", opt_noreboot);
struct percpu_ctxt {
struct vcpu *curr_vcpu;
+ unsigned int context_not_finalised;
+ unsigned int dirty_segment_mask;
} __cacheline_aligned;
static struct percpu_ctxt percpu_ctxt[NR_CPUS];
@@ -541,51 +543,59 @@ void toggle_guest_mode(struct vcpu *v)
__r; })
#if CONFIG_VMX
-#define load_msrs(_p, _n) if (vmx_switch_on) vmx_load_msrs((_p), (_n))
+#define load_msrs(n) if (vmx_switch_on) vmx_load_msrs(n)
#else
-#define load_msrs(_p, _n) ((void)0)
+#define load_msrs(n) ((void)0)
#endif
-static void load_segments(struct vcpu *p, struct vcpu *n)
+/*
+ * save_segments() writes a mask of segments which are dirty (non-zero),
+ * allowing load_segments() to avoid some expensive segment loads and
+ * MSR writes.
+ */
+#define DIRTY_DS 0x01
+#define DIRTY_ES 0x02
+#define DIRTY_FS 0x04
+#define DIRTY_GS 0x08
+#define DIRTY_FS_BASE 0x10
+#define DIRTY_GS_BASE_USER 0x20
+
+static void load_segments(struct vcpu *n)
{
- struct vcpu_guest_context *pctxt = &p->arch.guest_context;
struct vcpu_guest_context *nctxt = &n->arch.guest_context;
int all_segs_okay = 1;
+ unsigned int dirty_segment_mask, cpu = smp_processor_id();
+
+ /* Load and clear the dirty segment mask. */
+ dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask;
+ percpu_ctxt[cpu].dirty_segment_mask = 0;
/* Either selector != 0 ==> reload. */
- if ( unlikely(pctxt->user_regs.ds | nctxt->user_regs.ds) )
+ if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) )
all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);
/* Either selector != 0 ==> reload. */
- if ( unlikely(pctxt->user_regs.es | nctxt->user_regs.es) )
+ if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) )
all_segs_okay &= loadsegment(es, nctxt->user_regs.es);
/*
* Either selector != 0 ==> reload.
* Also reload to reset FS_BASE if it was non-zero.
*/
- if ( unlikely(pctxt->user_regs.fs |
- pctxt->fs_base |
+ if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |
nctxt->user_regs.fs) )
- {
all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);
- if ( pctxt->user_regs.fs ) /* != 0 selector kills fs_base */
- pctxt->fs_base = 0;
- }
/*
* Either selector != 0 ==> reload.
* Also reload to reset GS_BASE if it was non-zero.
*/
- if ( unlikely(pctxt->user_regs.gs |
- pctxt->gs_base_user |
+ if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |
nctxt->user_regs.gs) )
{
/* Reset GS_BASE with user %gs? */
- if ( pctxt->user_regs.gs || !nctxt->gs_base_user )
+ if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user )
all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
- if ( pctxt->user_regs.gs ) /* != 0 selector kills gs_base_user */
- pctxt->gs_base_user = 0;
}
/* This can only be non-zero if selector is NULL. */
@@ -650,7 +660,9 @@ static void load_segments(struct vcpu *p, struct vcpu *n)
static void save_segments(struct vcpu *v)
{
- struct cpu_user_regs *regs = &v->arch.guest_context.user_regs;
+ struct vcpu_guest_context *ctxt = &v->arch.guest_context;
+ struct cpu_user_regs *regs = &ctxt->user_regs;
+ unsigned int dirty_segment_mask = 0;
if ( VMX_DOMAIN(v) )
rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs);
@@ -659,18 +671,34 @@ static void save_segments(struct vcpu *v)
__asm__ __volatile__ ( "movl %%es,%0" : "=m" (regs->es) );
__asm__ __volatile__ ( "movl %%fs,%0" : "=m" (regs->fs) );
__asm__ __volatile__ ( "movl %%gs,%0" : "=m" (regs->gs) );
-}
-static void clear_segments(void)
-{
- __asm__ __volatile__ (
- " movl %0,%%ds; "
- " movl %0,%%es; "
- " movl %0,%%fs; "
- " movl %0,%%gs; "
- ""safe_swapgs" "
- " movl %0,%%gs"
- : : "r" (0) );
+ if ( regs->ds )
+ dirty_segment_mask |= DIRTY_DS;
+
+ if ( regs->es )
+ dirty_segment_mask |= DIRTY_ES;
+
+ if ( regs->fs )
+ {
+ dirty_segment_mask |= DIRTY_FS;
+ ctxt->fs_base = 0; /* != 0 selector kills fs_base */
+ }
+ else if ( ctxt->fs_base )
+ {
+ dirty_segment_mask |= DIRTY_FS_BASE;
+ }
+
+ if ( regs->gs )
+ {
+ dirty_segment_mask |= DIRTY_GS;
+ ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
+ }
+ else if ( ctxt->gs_base_user )
+ {
+ dirty_segment_mask |= DIRTY_GS_BASE_USER;
+ }
+
+ percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
}
long do_switch_to_user(void)
@@ -706,10 +734,9 @@ long do_switch_to_user(void)
#elif defined(__i386__)
-#define load_segments(_p, _n) ((void)0)
-#define load_msrs(_p, _n) ((void)0)
-#define save_segments(_p) ((void)0)
-#define clear_segments() ((void)0)
+#define load_segments(n) ((void)0)
+#define load_msrs(n) ((void)0)
+#define save_segments(p) ((void)0)
static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
{
@@ -726,9 +753,9 @@ static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
static void __context_switch(void)
{
struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
- unsigned int cpu = smp_processor_id();
- struct vcpu *p = percpu_ctxt[cpu].curr_vcpu;
- struct vcpu *n = current;
+ unsigned int cpu = smp_processor_id();
+ struct vcpu *p = percpu_ctxt[cpu].curr_vcpu;
+ struct vcpu *n = current;
if ( !is_idle_task(p->domain) )
{
@@ -786,23 +813,31 @@ static void __context_switch(void)
void context_switch(struct vcpu *prev, struct vcpu *next)
{
- struct vcpu *realprev;
+ unsigned int cpu = smp_processor_id();
- local_irq_disable();
+ ASSERT(!local_irq_is_enabled());
set_current(next);
- if ( ((realprev = percpu_ctxt[smp_processor_id()].curr_vcpu) == next) ||
- is_idle_task(next->domain) )
+ if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
{
- local_irq_enable();
+ __context_switch();
+ percpu_ctxt[cpu].context_not_finalised = 1;
}
- else
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+ unsigned int cpu = smp_processor_id();
+
+ ASSERT(local_irq_is_enabled());
+
+ if ( percpu_ctxt[cpu].context_not_finalised )
{
- __context_switch();
+ percpu_ctxt[cpu].context_not_finalised = 0;
+
+ BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
- local_irq_enable();
-
if ( VMX_DOMAIN(next) )
{
vmx_restore_msrs(next);
@@ -810,19 +845,11 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
else
{
load_LDT(next);
- load_segments(realprev, next);
- load_msrs(realprev, next);
+ load_segments(next);
+ load_msrs(next);
}
}
- /*
- * We do this late on because it doesn't need to be protected by the
- * schedule_lock, and because we want this to be the very last use of
- * 'prev' (after this point, a dying domain's info structure may be freed
- * without warning).
- */
- clear_bit(_VCPUF_running, &prev->vcpu_flags);
-
schedule_tail(next);
BUG();
}
@@ -835,12 +862,19 @@ void continue_running(struct vcpu *same)
int __sync_lazy_execstate(void)
{
- if ( percpu_ctxt[smp_processor_id()].curr_vcpu == current )
- return 0;
- __context_switch();
- load_LDT(current);
- clear_segments();
- return 1;
+ unsigned long flags;
+ int switch_required;
+
+ local_irq_save(flags);
+
+ switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current);
+
+ if ( switch_required )
+ __context_switch();
+
+ local_irq_restore(flags);
+
+ return switch_required;
}
void sync_lazy_execstate_cpu(unsigned int cpu)
diff --git a/xen/arch/x86/vmx.c b/xen/arch/x86/vmx.c
index bf9fc928df..f7c39b8346 100644
--- a/xen/arch/x86/vmx.c
+++ b/xen/arch/x86/vmx.c
@@ -65,7 +65,7 @@ static u32 msr_data_index[VMX_MSR_COUNT] =
* are not modified once set for generic domains, we don't save them,
* but simply reset them to the values set at percpu_traps_init().
*/
-void vmx_load_msrs(struct vcpu *p, struct vcpu *n)
+void vmx_load_msrs(struct vcpu *n)
{
struct msr_state *host_state;
host_state = &percpu_msr[smp_processor_id()];
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 422f4f48ae..6cf1b7858d 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -474,13 +474,14 @@ static void __enter_scheduler(void)
set_ac_timer(&schedule_data[cpu].s_timer, now + r_time);
- /* Must be protected by the schedule_lock! */
- set_bit(_VCPUF_running, &next->vcpu_flags);
-
- spin_unlock_irq(&schedule_data[cpu].schedule_lock);
-
if ( unlikely(prev == next) )
+ {
+ spin_unlock_irq(&schedule_data[cpu].schedule_lock);
return continue_running(prev);
+ }
+
+ clear_bit(_VCPUF_running, &prev->vcpu_flags);
+ set_bit(_VCPUF_running, &next->vcpu_flags);
perfc_incrc(sched_ctx);
@@ -517,6 +518,10 @@ static void __enter_scheduler(void)
next->domain->domain_id, next->vcpu_id);
context_switch(prev, next);
+
+ spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+
+ context_switch_finalise(next);
}
/* No locking needed -- pointer comparison is safe :-) */
diff --git a/xen/include/asm-x86/vmx_vmcs.h b/xen/include/asm-x86/vmx_vmcs.h
index 987e4e79e6..49a7d1c0bf 100644
--- a/xen/include/asm-x86/vmx_vmcs.h
+++ b/xen/include/asm-x86/vmx_vmcs.h
@@ -28,10 +28,10 @@ extern int start_vmx(void);
extern void stop_vmx(void);
#if defined (__x86_64__)
-extern void vmx_load_msrs(struct vcpu *p, struct vcpu *n);
+extern void vmx_load_msrs(struct vcpu *n);
void vmx_restore_msrs(struct vcpu *d);
#else
-#define vmx_load_msrs(_p, _n) ((void)0)
+#define vmx_load_msrs(_n) ((void)0)
#define vmx_restore_msrs(_v) ((void)0)
#endif
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 45e04fc93d..e0ea2f756d 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -258,12 +258,32 @@ extern void sync_lazy_execstate_mask(cpumask_t mask);
extern void sync_lazy_execstate_all(void);
extern int __sync_lazy_execstate(void);
-/* Called by the scheduler to switch to another vcpu. */
+/*
+ * Called by the scheduler to switch to another VCPU. On entry, although
+ * VCPUF_running is no longer asserted for @prev, its context is still running
+ * on the local CPU and is not committed to memory. The local scheduler lock
+ * is therefore still held, and interrupts are disabled, because the local CPU
+ * is in an inconsistent state.
+ *
+ * The callee must ensure that the local CPU is no longer running in @prev's
+ * context, and that the context is saved to memory, before returning.
+ * Alternatively, if implementing lazy context switching, it suffices to ensure
+ * that invoking __sync_lazy_execstate() will switch and commit @prev's state.
+ */
extern void context_switch(
struct vcpu *prev,
struct vcpu *next);
-/* Called by the scheduler to continue running the current vcpu. */
+/*
+ * On some architectures (notably x86) it is not possible to entirely load
+ * @next's context with interrupts disabled. These may implement a function to
+ * finalise loading the new context after interrupts are re-enabled. This
+ * function is not given @prev and is not permitted to access it.
+ */
+extern void context_switch_finalise(
+ struct vcpu *next);
+
+/* Called by the scheduler to continue running the current VCPU. */
extern void continue_running(
struct vcpu *same);