diff options
author | Keir Fraser <keir.fraser@citrix.com> | 2009-04-14 11:20:55 +0100 |
---|---|---|
committer | Keir Fraser <keir.fraser@citrix.com> | 2009-04-14 11:20:55 +0100 |
commit | 382b95f627a91a75545799f36534dcf6d145381e (patch) | |
tree | 5995c7c9678ecfb097519dd6268bf6ab6993174b | |
parent | 891410cbc469d93c3c6bb102ac83ea2036c79983 (diff) | |
download | xen-382b95f627a91a75545799f36534dcf6d145381e.tar.gz xen-382b95f627a91a75545799f36534dcf6d145381e.tar.bz2 xen-382b95f627a91a75545799f36534dcf6d145381e.zip |
Fix cpufreq HW-ALL coordination handle
Currently cpufreq HW-ALL coordination is handled same way as SW-ALL.
However, SW-ALL will bring more IPIs which is bad for cpuidle.
This patch implement HW-ALL coordination handled in different way from
SW-ALL, for the sake of performance and reduce IPIs. We also
suspend/resume HW-ALL dbs timer for idle.
Signed-off-by: Yu, Ke <ke.yu@intel.com>
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Signed-off-by: Tian, Kevin <kevin.tian@intel.com>
-rw-r--r-- | xen/arch/x86/acpi/cpu_idle.c | 7 | ||||
-rw-r--r-- | xen/arch/x86/acpi/cpufreq/cpufreq.c | 6 | ||||
-rw-r--r-- | xen/drivers/cpufreq/cpufreq.c | 125 | ||||
-rw-r--r-- | xen/drivers/cpufreq/cpufreq_ondemand.c | 41 | ||||
-rw-r--r-- | xen/include/acpi/cpufreq/cpufreq.h | 4 |
5 files changed, 124 insertions, 59 deletions
diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c index a7ca18826e..856572a089 100644 --- a/xen/arch/x86/acpi/cpu_idle.c +++ b/xen/arch/x86/acpi/cpu_idle.c @@ -47,6 +47,7 @@ #include <asm/processor.h> #include <public/platform.h> #include <public/sysctl.h> +#include <acpi/cpufreq/cpufreq.h> /*#define DEBUG_PM_CX*/ @@ -195,6 +196,8 @@ static void acpi_processor_idle(void) int sleep_ticks = 0; u32 t1, t2 = 0; + cpufreq_dbs_timer_suspend(); + sched_tick_suspend(); /* * sched_tick_suspend may raise TIMER_SOFTIRQ by __stop_timer, @@ -214,6 +217,7 @@ static void acpi_processor_idle(void) { local_irq_enable(); sched_tick_resume(); + cpufreq_dbs_timer_resume(); return; } @@ -234,6 +238,7 @@ static void acpi_processor_idle(void) else acpi_safe_halt(); sched_tick_resume(); + cpufreq_dbs_timer_resume(); return; } @@ -341,6 +346,7 @@ static void acpi_processor_idle(void) default: local_irq_enable(); sched_tick_resume(); + cpufreq_dbs_timer_resume(); return; } @@ -352,6 +358,7 @@ static void acpi_processor_idle(void) } sched_tick_resume(); + cpufreq_dbs_timer_resume(); if ( cpuidle_current_governor->reflect ) cpuidle_current_governor->reflect(power); diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c b/xen/arch/x86/acpi/cpufreq/cpufreq.c index cda7fb40aa..1631a30935 100644 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c @@ -191,7 +191,11 @@ static void drv_read(struct drv_cmd *cmd) static void drv_write(struct drv_cmd *cmd) { - on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0); + if ((cpus_weight(cmd->mask) == 1) && + cpu_isset(smp_processor_id(), cmd->mask)) + do_drv_write((void *)cmd); + else + on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0); } static u32 get_cur_val(cpumask_t mask) diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c index efb805b01c..39cc7eba61 100644 --- a/xen/drivers/cpufreq/cpufreq.c +++ b/xen/drivers/cpufreq/cpufreq.c @@ -130,7 +130,7 @@ int cpufreq_add_cpu(unsigned int cpu) int ret = 0; unsigned int firstcpu; unsigned int dom, domexist = 0; - unsigned int j; + unsigned int hw_all = 0; struct list_head *pos; struct cpufreq_dom *cpufreq_dom = NULL; struct cpufreq_policy new_policy; @@ -146,9 +146,8 @@ int cpufreq_add_cpu(unsigned int cpu) if (cpufreq_cpu_policy[cpu]) return 0; - ret = cpufreq_statistic_init(cpu); - if (ret) - return ret; + if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW) + hw_all = 1; dom = perf->domain_info.domain; @@ -160,61 +159,57 @@ int cpufreq_add_cpu(unsigned int cpu) } } - if (domexist) { - /* share policy with the first cpu since on same boat */ - firstcpu = first_cpu(cpufreq_dom->map); - policy = cpufreq_cpu_policy[firstcpu]; - - cpufreq_cpu_policy[cpu] = policy; - cpu_set(cpu, cpufreq_dom->map); - cpu_set(cpu, policy->cpus); - - /* domain coordination sanity check */ - if ((perf->domain_info.coord_type != - processor_pminfo[firstcpu]->perf.domain_info.coord_type) || - (perf->domain_info.num_processors != - processor_pminfo[firstcpu]->perf.domain_info.num_processors)) { - ret = -EINVAL; - goto err2; - } - - printk(KERN_EMERG"adding CPU %u\n", cpu); - } else { + if (!domexist) { cpufreq_dom = xmalloc(struct cpufreq_dom); - if (!cpufreq_dom) { - cpufreq_statistic_exit(cpu); + if (!cpufreq_dom) return -ENOMEM; - } + memset(cpufreq_dom, 0, sizeof(struct cpufreq_dom)); cpufreq_dom->dom = dom; - cpu_set(cpu, cpufreq_dom->map); list_add(&cpufreq_dom->node, &cpufreq_dom_list_head); + } else { + /* domain sanity check under whatever coordination type */ + firstcpu = first_cpu(cpufreq_dom->map); + if ((perf->domain_info.coord_type != + processor_pminfo[firstcpu]->perf.domain_info.coord_type) || + (perf->domain_info.num_processors != + processor_pminfo[firstcpu]->perf.domain_info.num_processors)) { + return -EINVAL; + } + } - /* for the first cpu, setup policy and do init work */ + if (!domexist || hw_all) { policy = xmalloc(struct cpufreq_policy); - if (!policy) { - list_del(&cpufreq_dom->node); - xfree(cpufreq_dom); - cpufreq_statistic_exit(cpu); - return -ENOMEM; - } + if (!policy) + ret = -ENOMEM; + memset(policy, 0, sizeof(struct cpufreq_policy)); policy->cpu = cpu; - cpu_set(cpu, policy->cpus); cpufreq_cpu_policy[cpu] = policy; ret = cpufreq_driver->init(policy); - if (ret) - goto err1; + if (ret) { + xfree(policy); + return ret; + } printk(KERN_EMERG"CPU %u initialization completed\n", cpu); + } else { + firstcpu = first_cpu(cpufreq_dom->map); + policy = cpufreq_cpu_policy[firstcpu]; + + cpufreq_cpu_policy[cpu] = policy; + printk(KERN_EMERG"adding CPU %u\n", cpu); } - /* - * After get full cpumap of the coordination domain, - * we can safely start gov here. - */ - if (cpus_weight(cpufreq_dom->map) == - perf->domain_info.num_processors) { + cpu_set(cpu, policy->cpus); + cpu_set(cpu, cpufreq_dom->map); + + ret = cpufreq_statistic_init(cpu); + if (ret) + goto err1; + + if (hw_all || + (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors)) { memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); policy->governor = NULL; @@ -240,22 +235,29 @@ int cpufreq_add_cpu(unsigned int cpu) return 0; err2: - cpufreq_driver->exit(policy); + cpufreq_statistic_exit(cpu); err1: - for_each_cpu_mask(j, cpufreq_dom->map) { - cpufreq_cpu_policy[j] = NULL; - cpufreq_statistic_exit(j); + cpufreq_cpu_policy[cpu] = NULL; + cpu_clear(cpu, policy->cpus); + cpu_clear(cpu, cpufreq_dom->map); + + if (cpus_empty(policy->cpus)) { + cpufreq_driver->exit(policy); + xfree(policy); + } + + if (cpus_empty(cpufreq_dom->map)) { + list_del(&cpufreq_dom->node); + xfree(cpufreq_dom); } - list_del(&cpufreq_dom->node); - xfree(cpufreq_dom); - xfree(policy); return ret; } int cpufreq_del_cpu(unsigned int cpu) { unsigned int dom, domexist = 0; + unsigned int hw_all = 0; struct list_head *pos; struct cpufreq_dom *cpufreq_dom = NULL; struct cpufreq_policy *policy; @@ -270,6 +272,9 @@ int cpufreq_del_cpu(unsigned int cpu) if (!cpufreq_cpu_policy[cpu]) return 0; + if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW) + hw_all = 1; + dom = perf->domain_info.domain; policy = cpufreq_cpu_policy[cpu]; @@ -284,23 +289,27 @@ int cpufreq_del_cpu(unsigned int cpu) if (!domexist) return -EINVAL; - /* for the first cpu of the domain, stop gov */ - if (cpus_weight(cpufreq_dom->map) == - perf->domain_info.num_processors) + /* for HW_ALL, stop gov for each core of the _PSD domain */ + /* for SW_ALL & SW_ANY, stop gov for the 1st core of the _PSD domain */ + if (hw_all || + (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors)) __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + cpufreq_statistic_exit(cpu); cpufreq_cpu_policy[cpu] = NULL; cpu_clear(cpu, policy->cpus); cpu_clear(cpu, cpufreq_dom->map); - cpufreq_statistic_exit(cpu); + + if (cpus_empty(policy->cpus)) { + cpufreq_driver->exit(policy); + xfree(policy); + } /* for the last cpu of the domain, clean room */ /* It's safe here to free freq_table, drv_data and policy */ - if (!cpus_weight(cpufreq_dom->map)) { - cpufreq_driver->exit(policy); + if (cpus_empty(cpufreq_dom->map)) { list_del(&cpufreq_dom->node); xfree(cpufreq_dom); - xfree(policy); } printk(KERN_EMERG"deleting CPU %u\n", cpu); diff --git a/xen/drivers/cpufreq/cpufreq_ondemand.c b/xen/drivers/cpufreq/cpufreq_ondemand.c index 74dd74ea0e..a4ff4f9848 100644 --- a/xen/drivers/cpufreq/cpufreq_ondemand.c +++ b/xen/drivers/cpufreq/cpufreq_ondemand.c @@ -190,6 +190,12 @@ static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) (void *)dbs_info, dbs_info->cpu); set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate); + + if ( processor_pminfo[dbs_info->cpu]->perf.shared_type + == CPUFREQ_SHARED_TYPE_HW ) + { + dbs_info->stoppable = 1; + } } static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) @@ -337,3 +343,38 @@ static void __exit cpufreq_gov_dbs_exit(void) cpufreq_unregister_governor(&cpufreq_gov_dbs); } __exitcall(cpufreq_gov_dbs_exit); + +void cpufreq_dbs_timer_suspend(void) +{ + int cpu; + + cpu = smp_processor_id(); + + if ( per_cpu(cpu_dbs_info,cpu).stoppable ) + { + stop_timer( &dbs_timer[cpu] ); + } +} + +void cpufreq_dbs_timer_resume(void) +{ + int cpu; + struct timer* t; + s_time_t now; + + cpu = smp_processor_id(); + + if ( per_cpu(cpu_dbs_info,cpu).stoppable ) + { + now = NOW(); + t = &dbs_timer[cpu]; + if (t->expires <= now) + { + t->function(t->data); + } + else + { + set_timer(t, align_timer(now , dbs_tuners_ins.sampling_rate)); + } + } +} diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h index 8423664efe..2f24c4fed6 100644 --- a/xen/include/acpi/cpufreq/cpufreq.h +++ b/xen/include/acpi/cpufreq/cpufreq.h @@ -221,6 +221,7 @@ struct cpu_dbs_info_s { struct cpufreq_frequency_table *freq_table; int cpu; unsigned int enable:1; + unsigned int stoppable:1; }; int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); @@ -232,4 +233,7 @@ int write_ondemand_sampling_rate(unsigned int sampling_rate); int write_ondemand_up_threshold(unsigned int up_threshold); int write_userspace_scaling_setspeed(unsigned int cpu, unsigned int freq); + +void cpufreq_dbs_timer_suspend(void); +void cpufreq_dbs_timer_resume(void); #endif /* __XEN_CPUFREQ_PM_H__ */ |