aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2010-02-16 09:27:45 +0000
committerKeir Fraser <keir.fraser@citrix.com>2010-02-16 09:27:45 +0000
commit8a85ee5b553427d80bb959697ff1e8f382e9594b (patch)
treebdc59fff7425870bd319e18fe1dff054d24b3adc
parent82dc4c9fd8b278972528b70fdfdf3d316e413642 (diff)
downloadxen-8a85ee5b553427d80bb959697ff1e8f382e9594b.tar.gz
xen-8a85ee5b553427d80bb959697ff1e8f382e9594b.tar.bz2
xen-8a85ee5b553427d80bb959697ff1e8f382e9594b.zip
cpuidle: do not enter deep C state if there is urgent VCPU
when VCPU is polling on event channel, it usually has urgent task running, e.g. spin_lock, in this case, it is better for cpuidle driver not to enter deep C state. This patch fix the issue that SLES 11 SP1 domain0 hangs in the box of large number of CPUs (>= 64 CPUs). Signed-off-by: Yu Ke <ke.yu@intel.com> Signed-off-by: Tian Kevin <kevin.tian@intel.com> Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
-rw-r--r--xen/arch/x86/acpi/cpu_idle.c50
-rw-r--r--xen/common/sched_credit.c1
-rw-r--r--xen/common/schedule.c47
-rw-r--r--xen/include/xen/sched-if.h1
-rw-r--r--xen/include/xen/sched.h2
5 files changed, 77 insertions, 24 deletions
diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c
index 45030ba402..f06580fbb4 100644
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -41,6 +41,7 @@
#include <xen/keyhandler.h>
#include <xen/cpuidle.h>
#include <xen/trace.h>
+#include <xen/sched-if.h>
#include <asm/cache.h>
#include <asm/io.h>
#include <asm/hpet.h>
@@ -216,6 +217,15 @@ static inline void trace_exit_reason(u32 *irq_traced)
}
}
+/* vcpu is urgent if vcpu is polling event channel
+ *
+ * if urgent vcpu exists, CPU should not enter deep C state
+ */
+static int sched_has_urgent_vcpu(void)
+{
+ return atomic_read(&this_cpu(schedule_data).urgent_count);
+}
+
static void acpi_processor_idle(void)
{
struct acpi_processor_power *power = processor_powers[smp_processor_id()];
@@ -226,27 +236,7 @@ static void acpi_processor_idle(void)
u32 exp = 0, pred = 0;
u32 irq_traced[4] = { 0 };
- cpufreq_dbs_timer_suspend();
-
- sched_tick_suspend();
- /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */
- process_pending_softirqs();
-
- /*
- * Interrupts must be disabled during bus mastering calculations and
- * for C2/C3 transitions.
- */
- local_irq_disable();
-
- if ( softirq_pending(smp_processor_id()) )
- {
- local_irq_enable();
- sched_tick_resume();
- cpufreq_dbs_timer_resume();
- return;
- }
-
- if ( max_cstate > 0 && power &&
+ if ( max_cstate > 0 && power && !sched_has_urgent_vcpu() &&
(next_state = cpuidle_current_governor->select(power)) > 0 )
{
cx = &power->states[next_state];
@@ -263,6 +253,24 @@ static void acpi_processor_idle(void)
pm_idle_save();
else
acpi_safe_halt();
+ return;
+ }
+
+ cpufreq_dbs_timer_suspend();
+
+ sched_tick_suspend();
+ /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */
+ process_pending_softirqs();
+
+ /*
+ * Interrupts must be disabled during bus mastering calculations and
+ * for C2/C3 transitions.
+ */
+ local_irq_disable();
+
+ if ( softirq_pending(smp_processor_id()) )
+ {
+ local_irq_enable();
sched_tick_resume();
cpufreq_dbs_timer_resume();
return;
diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index b0ccb0ccac..914022ebb9 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -1060,6 +1060,7 @@ csched_runq_steal(int peer_cpu, int cpu, int pri)
/* We got a candidate. Grab it! */
CSCHED_VCPU_STAT_CRANK(speer, migrate_q);
CSCHED_STAT_CRANK(migrate_queued);
+ BUG_ON(vc->is_urgent);
__runq_remove(speer);
vc->processor = cpu;
return speer;
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 3b4be55d79..d02eb1f0af 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -100,6 +100,29 @@ static inline void trace_continue_running(struct vcpu *v)
(unsigned char *)&d);
}
+static inline void vcpu_urgent_count_update(struct vcpu *v)
+{
+ if ( is_idle_vcpu(v) )
+ return;
+
+ if ( unlikely(v->is_urgent) )
+ {
+ if ( !test_bit(v->vcpu_id, v->domain->poll_mask) )
+ {
+ v->is_urgent = 0;
+ atomic_dec(&per_cpu(schedule_data,v->processor).urgent_count);
+ }
+ }
+ else
+ {
+ if ( unlikely(test_bit(v->vcpu_id, v->domain->poll_mask)) )
+ {
+ v->is_urgent = 1;
+ atomic_inc(&per_cpu(schedule_data,v->processor).urgent_count);
+ }
+ }
+}
+
static inline void vcpu_runstate_change(
struct vcpu *v, int new_state, s_time_t new_entry_time)
{
@@ -108,6 +131,8 @@ static inline void vcpu_runstate_change(
ASSERT(v->runstate.state != new_state);
ASSERT(spin_is_locked(&per_cpu(schedule_data,v->processor).schedule_lock));
+ vcpu_urgent_count_update(v);
+
trace_runstate_change(v, new_state);
delta = new_entry_time - v->runstate.state_entry_time;
@@ -188,6 +213,8 @@ void sched_destroy_vcpu(struct vcpu *v)
kill_timer(&v->periodic_timer);
kill_timer(&v->singleshot_timer);
kill_timer(&v->poll_timer);
+ if ( test_and_clear_bool(v->is_urgent) )
+ atomic_dec(&per_cpu(schedule_data, v->processor).urgent_count);
SCHED_OP(destroy_vcpu, v);
}
@@ -277,7 +304,7 @@ void vcpu_unblock(struct vcpu *v)
static void vcpu_migrate(struct vcpu *v)
{
unsigned long flags;
- int old_cpu;
+ int old_cpu, new_cpu;
vcpu_schedule_lock_irqsave(v, flags);
@@ -293,9 +320,23 @@ static void vcpu_migrate(struct vcpu *v)
return;
}
- /* Switch to new CPU, then unlock old CPU. */
+ /* Select new CPU. */
old_cpu = v->processor;
- v->processor = SCHED_OP(pick_cpu, v);
+ new_cpu = SCHED_OP(pick_cpu, v);
+
+ /*
+ * Transfer urgency status to new CPU before switching CPUs, as once
+ * the switch occurs, v->is_urgent is no longer protected by the per-CPU
+ * scheduler lock we are holding.
+ */
+ if ( unlikely(v->is_urgent) && (old_cpu != new_cpu) )
+ {
+ atomic_inc(&per_cpu(schedule_data, new_cpu).urgent_count);
+ atomic_dec(&per_cpu(schedule_data, old_cpu).urgent_count);
+ }
+
+ /* Switch to new CPU, then unlock old CPU. */
+ v->processor = new_cpu;
spin_unlock_irqrestore(
&per_cpu(schedule_data, old_cpu).schedule_lock, flags);
diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h
index 5caf8245d2..ed05757730 100644
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -16,6 +16,7 @@ struct schedule_data {
struct vcpu *idle; /* idle task for this cpu */
void *sched_priv;
struct timer s_timer; /* scheduling timer */
+ atomic_t urgent_count; /* how many urgent vcpus */
} __cacheline_aligned;
DECLARE_PER_CPU(struct schedule_data, schedule_data);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index d9180773f9..2b2eca3bb8 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -115,6 +115,8 @@ struct vcpu
bool_t is_initialised;
/* Currently running on a CPU? */
bool_t is_running;
+ /* VCPU should wake fast (do not deep sleep the CPU). */
+ bool_t is_urgent;
#ifdef VCPU_TRAP_LAST
#define VCPU_TRAP_NONE 0