aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2009-04-14 11:20:55 +0100
committerKeir Fraser <keir.fraser@citrix.com>2009-04-14 11:20:55 +0100
commit382b95f627a91a75545799f36534dcf6d145381e (patch)
tree5995c7c9678ecfb097519dd6268bf6ab6993174b
parent891410cbc469d93c3c6bb102ac83ea2036c79983 (diff)
downloadxen-382b95f627a91a75545799f36534dcf6d145381e.tar.gz
xen-382b95f627a91a75545799f36534dcf6d145381e.tar.bz2
xen-382b95f627a91a75545799f36534dcf6d145381e.zip
Fix cpufreq HW-ALL coordination handle
Currently cpufreq HW-ALL coordination is handled same way as SW-ALL. However, SW-ALL will bring more IPIs which is bad for cpuidle. This patch implement HW-ALL coordination handled in different way from SW-ALL, for the sake of performance and reduce IPIs. We also suspend/resume HW-ALL dbs timer for idle. Signed-off-by: Yu, Ke <ke.yu@intel.com> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> Signed-off-by: Tian, Kevin <kevin.tian@intel.com>
-rw-r--r--xen/arch/x86/acpi/cpu_idle.c7
-rw-r--r--xen/arch/x86/acpi/cpufreq/cpufreq.c6
-rw-r--r--xen/drivers/cpufreq/cpufreq.c125
-rw-r--r--xen/drivers/cpufreq/cpufreq_ondemand.c41
-rw-r--r--xen/include/acpi/cpufreq/cpufreq.h4
5 files changed, 124 insertions, 59 deletions
diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c
index a7ca18826e..856572a089 100644
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -47,6 +47,7 @@
#include <asm/processor.h>
#include <public/platform.h>
#include <public/sysctl.h>
+#include <acpi/cpufreq/cpufreq.h>
/*#define DEBUG_PM_CX*/
@@ -195,6 +196,8 @@ static void acpi_processor_idle(void)
int sleep_ticks = 0;
u32 t1, t2 = 0;
+ cpufreq_dbs_timer_suspend();
+
sched_tick_suspend();
/*
* sched_tick_suspend may raise TIMER_SOFTIRQ by __stop_timer,
@@ -214,6 +217,7 @@ static void acpi_processor_idle(void)
{
local_irq_enable();
sched_tick_resume();
+ cpufreq_dbs_timer_resume();
return;
}
@@ -234,6 +238,7 @@ static void acpi_processor_idle(void)
else
acpi_safe_halt();
sched_tick_resume();
+ cpufreq_dbs_timer_resume();
return;
}
@@ -341,6 +346,7 @@ static void acpi_processor_idle(void)
default:
local_irq_enable();
sched_tick_resume();
+ cpufreq_dbs_timer_resume();
return;
}
@@ -352,6 +358,7 @@ static void acpi_processor_idle(void)
}
sched_tick_resume();
+ cpufreq_dbs_timer_resume();
if ( cpuidle_current_governor->reflect )
cpuidle_current_governor->reflect(power);
diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c b/xen/arch/x86/acpi/cpufreq/cpufreq.c
index cda7fb40aa..1631a30935 100644
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -191,7 +191,11 @@ static void drv_read(struct drv_cmd *cmd)
static void drv_write(struct drv_cmd *cmd)
{
- on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0);
+ if ((cpus_weight(cmd->mask) == 1) &&
+ cpu_isset(smp_processor_id(), cmd->mask))
+ do_drv_write((void *)cmd);
+ else
+ on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0);
}
static u32 get_cur_val(cpumask_t mask)
diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index efb805b01c..39cc7eba61 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -130,7 +130,7 @@ int cpufreq_add_cpu(unsigned int cpu)
int ret = 0;
unsigned int firstcpu;
unsigned int dom, domexist = 0;
- unsigned int j;
+ unsigned int hw_all = 0;
struct list_head *pos;
struct cpufreq_dom *cpufreq_dom = NULL;
struct cpufreq_policy new_policy;
@@ -146,9 +146,8 @@ int cpufreq_add_cpu(unsigned int cpu)
if (cpufreq_cpu_policy[cpu])
return 0;
- ret = cpufreq_statistic_init(cpu);
- if (ret)
- return ret;
+ if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW)
+ hw_all = 1;
dom = perf->domain_info.domain;
@@ -160,61 +159,57 @@ int cpufreq_add_cpu(unsigned int cpu)
}
}
- if (domexist) {
- /* share policy with the first cpu since on same boat */
- firstcpu = first_cpu(cpufreq_dom->map);
- policy = cpufreq_cpu_policy[firstcpu];
-
- cpufreq_cpu_policy[cpu] = policy;
- cpu_set(cpu, cpufreq_dom->map);
- cpu_set(cpu, policy->cpus);
-
- /* domain coordination sanity check */
- if ((perf->domain_info.coord_type !=
- processor_pminfo[firstcpu]->perf.domain_info.coord_type) ||
- (perf->domain_info.num_processors !=
- processor_pminfo[firstcpu]->perf.domain_info.num_processors)) {
- ret = -EINVAL;
- goto err2;
- }
-
- printk(KERN_EMERG"adding CPU %u\n", cpu);
- } else {
+ if (!domexist) {
cpufreq_dom = xmalloc(struct cpufreq_dom);
- if (!cpufreq_dom) {
- cpufreq_statistic_exit(cpu);
+ if (!cpufreq_dom)
return -ENOMEM;
- }
+
memset(cpufreq_dom, 0, sizeof(struct cpufreq_dom));
cpufreq_dom->dom = dom;
- cpu_set(cpu, cpufreq_dom->map);
list_add(&cpufreq_dom->node, &cpufreq_dom_list_head);
+ } else {
+ /* domain sanity check under whatever coordination type */
+ firstcpu = first_cpu(cpufreq_dom->map);
+ if ((perf->domain_info.coord_type !=
+ processor_pminfo[firstcpu]->perf.domain_info.coord_type) ||
+ (perf->domain_info.num_processors !=
+ processor_pminfo[firstcpu]->perf.domain_info.num_processors)) {
+ return -EINVAL;
+ }
+ }
- /* for the first cpu, setup policy and do init work */
+ if (!domexist || hw_all) {
policy = xmalloc(struct cpufreq_policy);
- if (!policy) {
- list_del(&cpufreq_dom->node);
- xfree(cpufreq_dom);
- cpufreq_statistic_exit(cpu);
- return -ENOMEM;
- }
+ if (!policy)
+ ret = -ENOMEM;
+
memset(policy, 0, sizeof(struct cpufreq_policy));
policy->cpu = cpu;
- cpu_set(cpu, policy->cpus);
cpufreq_cpu_policy[cpu] = policy;
ret = cpufreq_driver->init(policy);
- if (ret)
- goto err1;
+ if (ret) {
+ xfree(policy);
+ return ret;
+ }
printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
+ } else {
+ firstcpu = first_cpu(cpufreq_dom->map);
+ policy = cpufreq_cpu_policy[firstcpu];
+
+ cpufreq_cpu_policy[cpu] = policy;
+ printk(KERN_EMERG"adding CPU %u\n", cpu);
}
- /*
- * After get full cpumap of the coordination domain,
- * we can safely start gov here.
- */
- if (cpus_weight(cpufreq_dom->map) ==
- perf->domain_info.num_processors) {
+ cpu_set(cpu, policy->cpus);
+ cpu_set(cpu, cpufreq_dom->map);
+
+ ret = cpufreq_statistic_init(cpu);
+ if (ret)
+ goto err1;
+
+ if (hw_all ||
+ (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors)) {
memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
policy->governor = NULL;
@@ -240,22 +235,29 @@ int cpufreq_add_cpu(unsigned int cpu)
return 0;
err2:
- cpufreq_driver->exit(policy);
+ cpufreq_statistic_exit(cpu);
err1:
- for_each_cpu_mask(j, cpufreq_dom->map) {
- cpufreq_cpu_policy[j] = NULL;
- cpufreq_statistic_exit(j);
+ cpufreq_cpu_policy[cpu] = NULL;
+ cpu_clear(cpu, policy->cpus);
+ cpu_clear(cpu, cpufreq_dom->map);
+
+ if (cpus_empty(policy->cpus)) {
+ cpufreq_driver->exit(policy);
+ xfree(policy);
+ }
+
+ if (cpus_empty(cpufreq_dom->map)) {
+ list_del(&cpufreq_dom->node);
+ xfree(cpufreq_dom);
}
- list_del(&cpufreq_dom->node);
- xfree(cpufreq_dom);
- xfree(policy);
return ret;
}
int cpufreq_del_cpu(unsigned int cpu)
{
unsigned int dom, domexist = 0;
+ unsigned int hw_all = 0;
struct list_head *pos;
struct cpufreq_dom *cpufreq_dom = NULL;
struct cpufreq_policy *policy;
@@ -270,6 +272,9 @@ int cpufreq_del_cpu(unsigned int cpu)
if (!cpufreq_cpu_policy[cpu])
return 0;
+ if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW)
+ hw_all = 1;
+
dom = perf->domain_info.domain;
policy = cpufreq_cpu_policy[cpu];
@@ -284,23 +289,27 @@ int cpufreq_del_cpu(unsigned int cpu)
if (!domexist)
return -EINVAL;
- /* for the first cpu of the domain, stop gov */
- if (cpus_weight(cpufreq_dom->map) ==
- perf->domain_info.num_processors)
+ /* for HW_ALL, stop gov for each core of the _PSD domain */
+ /* for SW_ALL & SW_ANY, stop gov for the 1st core of the _PSD domain */
+ if (hw_all ||
+ (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors))
__cpufreq_governor(policy, CPUFREQ_GOV_STOP);
+ cpufreq_statistic_exit(cpu);
cpufreq_cpu_policy[cpu] = NULL;
cpu_clear(cpu, policy->cpus);
cpu_clear(cpu, cpufreq_dom->map);
- cpufreq_statistic_exit(cpu);
+
+ if (cpus_empty(policy->cpus)) {
+ cpufreq_driver->exit(policy);
+ xfree(policy);
+ }
/* for the last cpu of the domain, clean room */
/* It's safe here to free freq_table, drv_data and policy */
- if (!cpus_weight(cpufreq_dom->map)) {
- cpufreq_driver->exit(policy);
+ if (cpus_empty(cpufreq_dom->map)) {
list_del(&cpufreq_dom->node);
xfree(cpufreq_dom);
- xfree(policy);
}
printk(KERN_EMERG"deleting CPU %u\n", cpu);
diff --git a/xen/drivers/cpufreq/cpufreq_ondemand.c b/xen/drivers/cpufreq/cpufreq_ondemand.c
index 74dd74ea0e..a4ff4f9848 100644
--- a/xen/drivers/cpufreq/cpufreq_ondemand.c
+++ b/xen/drivers/cpufreq/cpufreq_ondemand.c
@@ -190,6 +190,12 @@ static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
(void *)dbs_info, dbs_info->cpu);
set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
+
+ if ( processor_pminfo[dbs_info->cpu]->perf.shared_type
+ == CPUFREQ_SHARED_TYPE_HW )
+ {
+ dbs_info->stoppable = 1;
+ }
}
static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
@@ -337,3 +343,38 @@ static void __exit cpufreq_gov_dbs_exit(void)
cpufreq_unregister_governor(&cpufreq_gov_dbs);
}
__exitcall(cpufreq_gov_dbs_exit);
+
+void cpufreq_dbs_timer_suspend(void)
+{
+ int cpu;
+
+ cpu = smp_processor_id();
+
+ if ( per_cpu(cpu_dbs_info,cpu).stoppable )
+ {
+ stop_timer( &dbs_timer[cpu] );
+ }
+}
+
+void cpufreq_dbs_timer_resume(void)
+{
+ int cpu;
+ struct timer* t;
+ s_time_t now;
+
+ cpu = smp_processor_id();
+
+ if ( per_cpu(cpu_dbs_info,cpu).stoppable )
+ {
+ now = NOW();
+ t = &dbs_timer[cpu];
+ if (t->expires <= now)
+ {
+ t->function(t->data);
+ }
+ else
+ {
+ set_timer(t, align_timer(now , dbs_tuners_ins.sampling_rate));
+ }
+ }
+}
diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h
index 8423664efe..2f24c4fed6 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -221,6 +221,7 @@ struct cpu_dbs_info_s {
struct cpufreq_frequency_table *freq_table;
int cpu;
unsigned int enable:1;
+ unsigned int stoppable:1;
};
int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event);
@@ -232,4 +233,7 @@ int write_ondemand_sampling_rate(unsigned int sampling_rate);
int write_ondemand_up_threshold(unsigned int up_threshold);
int write_userspace_scaling_setspeed(unsigned int cpu, unsigned int freq);
+
+void cpufreq_dbs_timer_suspend(void);
+void cpufreq_dbs_timer_resume(void);
#endif /* __XEN_CPUFREQ_PM_H__ */