/* * xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c * * Copyright (C) 2001 Russell King * (C) 2003 Venkatesh Pallipadi . * Jun Nakajima * Feb 2008 Liu Jinsong * Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include #include #include #include #include #include #include #define DEF_FREQUENCY_UP_THRESHOLD (80) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) #define MIN_DBS_INTERVAL (MICROSECS(100)) #define MIN_SAMPLING_RATE_RATIO (2) #define MIN_SAMPLING_MILLISECS (MIN_SAMPLING_RATE_RATIO * 10) #define MIN_STAT_SAMPLING_RATE \ (MIN_SAMPLING_MILLISECS * MILLISECS(1)) #define MIN_SAMPLING_RATE \ (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) #define MAX_SAMPLING_RATE (500 * def_sampling_rate) #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) #define TRANSITION_LATENCY_LIMIT (10 * 1000 ) static uint64_t def_sampling_rate; static uint64_t usr_sampling_rate; /* Sampling types */ enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); static unsigned int dbs_enable; /* number of CPUs using this policy */ static struct dbs_tuners { uint64_t sampling_rate; unsigned int up_threshold; unsigned int powersave_bias; } dbs_tuners_ins = { .sampling_rate = 0, .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, .powersave_bias = 0, }; static DEFINE_PER_CPU(struct timer, dbs_timer); int write_ondemand_sampling_rate(unsigned int sampling_rate) { if ( (sampling_rate > MAX_SAMPLING_RATE / MICROSECS(1)) || (sampling_rate < MIN_SAMPLING_RATE / MICROSECS(1)) ) return -EINVAL; dbs_tuners_ins.sampling_rate = sampling_rate * MICROSECS(1); return 0; } int write_ondemand_up_threshold(unsigned int up_threshold) { if ( (up_threshold > MAX_FREQUENCY_UP_THRESHOLD) || (up_threshold < MIN_FREQUENCY_UP_THRESHOLD) ) return -EINVAL; dbs_tuners_ins.up_threshold = up_threshold; return 0; } int get_cpufreq_ondemand_para(uint32_t *sampling_rate_max, uint32_t *sampling_rate_min, uint32_t *sampling_rate, uint32_t *up_threshold) { if (!sampling_rate_max || !sampling_rate_min || !sampling_rate || !up_threshold) return -EINVAL; *sampling_rate_max = MAX_SAMPLING_RATE/MICROSECS(1); *sampling_rate_min = MIN_SAMPLING_RATE/MICROSECS(1); *sampling_rate = dbs_tuners_ins.sampling_rate / MICROSECS(1); *up_threshold = dbs_tuners_ins.up_threshold; return 0; } static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) { uint64_t cur_ns, total_ns; uint64_t max_load_freq = 0; struct cpufreq_policy *policy; unsigned int max; unsigned int j; if (!this_dbs_info->enable) return; policy = this_dbs_info->cur_policy; max = policy->max; if (unlikely(policy->resume)) { __cpufreq_driver_target(policy, max,CPUFREQ_RELATION_H); return; } cur_ns = NOW(); total_ns = cur_ns - this_dbs_info->prev_cpu_wall; this_dbs_info->prev_cpu_wall = NOW(); if (total_ns < MIN_DBS_INTERVAL) return; /* Get Idle Time */ for_each_cpu(j, policy->cpus) { uint64_t idle_ns, total_idle_ns; uint64_t load, load_freq, freq_avg; struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); total_idle_ns = get_cpu_idle_time(j); idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle; j_dbs_info->prev_cpu_idle = total_idle_ns; if (unlikely(total_ns < idle_ns)) continue; load = 100 * (total_ns - idle_ns) / total_ns; freq_avg = cpufreq_driver_getavg(j, GOV_GETAVG); load_freq = load * freq_avg; if (load_freq > max_load_freq) max_load_freq = load_freq; } /* Check for frequency increase */ if (max_load_freq > (uint64_t) dbs_tuners_ins.up_threshold * policy->cur) { /* if we are already at full speed then break out early */ if (policy->cur == max) return; __cpufreq_driver_target(policy, max, CPUFREQ_RELATION_H); return; } /* Check for frequency decrease */ /* if we cannot reduce the frequency anymore, break out early */ if (policy->cur == policy->min) return; /* * The optimal frequency is the frequency that is the lowest that * can support the current CPU usage without triggering the up * policy. To be safe, we focus 10 points under the threshold. */ if (max_load_freq < (uint64_t) (dbs_tuners_ins.up_threshold - 10) * policy->cur) { uint64_t freq_next; freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - 10); __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); } } static void do_dbs_timer(void *dbs) { struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs; if (!dbs_info->enable) return; dbs_check_cpu(dbs_info); set_timer(&per_cpu(dbs_timer, dbs_info->cpu), align_timer(NOW() , dbs_tuners_ins.sampling_rate)); } static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) { dbs_info->enable = 1; init_timer(&per_cpu(dbs_timer, dbs_info->cpu), do_dbs_timer, (void *)dbs_info, dbs_info->cpu); set_timer(&per_cpu(dbs_timer, dbs_info->cpu), NOW()+dbs_tuners_ins.sampling_rate); if ( processor_pminfo[dbs_info->cpu]->perf.shared_type == CPUFREQ_SHARED_TYPE_HW ) { dbs_info->stoppable = 1; } } static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) { dbs_info->enable = 0; dbs_info->stoppable = 0; kill_timer(&per_cpu(dbs_timer, dbs_info->cpu)); } int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { unsigned int cpu = policy->cpu; struct cpu_dbs_info_s *this_dbs_info; unsigned int j; this_dbs_info = &per_cpu(cpu_dbs_info, cpu); switch (event) { case CPUFREQ_GOV_START: if ((!cpu_online(cpu)) || (!policy->cur)) return -EINVAL; if (policy->cpuinfo.transition_latency > (TRANSITION_LATENCY_LIMIT * 1000)) { printk(KERN_WARNING "ondemand governor failed to load " "due to too long transition latency\n"); return -EINVAL; } if (this_dbs_info->enable) /* Already enabled */ break; dbs_enable++; for_each_cpu(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j); j_dbs_info->prev_cpu_wall = NOW(); } this_dbs_info->cpu = cpu; /* * Start the timerschedule work, when this governor * is used for first time */ if ((dbs_enable == 1) && !dbs_tuners_ins.sampling_rate) { def_sampling_rate = (uint64_t) policy->cpuinfo.transition_latency * DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) def_sampling_rate = MIN_STAT_SAMPLING_RATE; if (!usr_sampling_rate) dbs_tuners_ins.sampling_rate = def_sampling_rate; else if (usr_sampling_rate < MIN_SAMPLING_RATE) { printk(KERN_WARNING "cpufreq/ondemand: " "specified sampling rate too low, using %"PRIu64"\n", MIN_SAMPLING_RATE); dbs_tuners_ins.sampling_rate = MIN_SAMPLING_RATE; } else if (usr_sampling_rate > MAX_SAMPLING_RATE) { printk(KERN_WARNING "cpufreq/ondemand: " "specified sampling rate too high, using %"PRIu64"\n", MAX_SAMPLING_RATE); dbs_tuners_ins.sampling_rate = MAX_SAMPLING_RATE; } else dbs_tuners_ins.sampling_rate = usr_sampling_rate; } dbs_timer_init(this_dbs_info); break; case CPUFREQ_GOV_STOP: dbs_timer_exit(this_dbs_info); dbs_enable--; break; case CPUFREQ_GOV_LIMITS: if ( this_dbs_info->cur_policy == NULL ) { printk(KERN_WARNING "CPU%d ondemand governor not started yet," "unable to GOV_LIMIT\n", cpu); return -EINVAL; } if (policy->max < this_dbs_info->cur_policy->cur) __cpufreq_driver_target(this_dbs_info->cur_policy, policy->max, CPUFREQ_RELATION_H); else if (policy->min > this_dbs_info->cur_policy->cur) __cpufreq_driver_target(this_dbs_info->cur_policy, policy->min, CPUFREQ_RELATION_L); break; } return 0; } static bool_t __init cpufreq_dbs_handle_option(const char *name, const char *val) { if ( !strcmp(name, "rate") && val ) { usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1); } else if ( !strcmp(name, "up_threshold") && val ) { unsigned long tmp = simple_strtoul(val, NULL, 0); if ( tmp < MIN_FREQUENCY_UP_THRESHOLD ) { printk(XENLOG_WARNING "cpufreq/ondemand: " "specified threshold too low, using %d\n", MIN_FREQUENCY_UP_THRESHOLD); tmp = MIN_FREQUENCY_UP_THRESHOLD; } else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD ) { printk(XENLOG_WARNING "cpufreq/ondemand: " "specified threshold too high, using %d\n", MAX_FREQUENCY_UP_THRESHOLD); tmp = MAX_FREQUENCY_UP_THRESHOLD; } dbs_tuners_ins.up_threshold = tmp; } else if ( !strcmp(name, "bias") && val ) { unsigned long tmp = simple_strtoul(val, NULL, 0); if ( tmp > 1000 ) { printk(XENLOG_WARNING "cpufreq/ondemand: " "specified bias too high, using 1000\n"); tmp = 1000; } dbs_tuners_ins.powersave_bias = tmp; } else return 0; return 1; } struct cpufreq_governor cpufreq_gov_dbs = { .name = "ondemand", .governor = cpufreq_governor_dbs, .handle_option = cpufreq_dbs_handle_option }; static int __init cpufreq_gov_dbs_init(void) { return cpufreq_register_governor(&cpufreq_gov_dbs); } __initcall(cpufreq_gov_dbs_init); void cpufreq_dbs_timer_suspend(void) { int cpu; cpu = smp_processor_id(); if ( per_cpu(cpu_dbs_info,cpu).stoppable ) { stop_timer( &per_cpu(dbs_timer, cpu) ); } } void cpufreq_dbs_timer_resume(void) { int cpu; struct timer* t; s_time_t now; cpu = smp_processor_id(); if ( per_cpu(cpu_dbs_info,cpu).stoppable ) { now = NOW(); t = &per_cpu(dbs_timer, cpu); if (t->expires <= now) { t->function(t->data); } else { set_timer(t, align_timer(now , dbs_tuners_ins.sampling_rate)); } } }