diff options
author | root <root@artemis.panaceas.org> | 2015-12-25 04:40:36 +0000 |
---|---|---|
committer | root <root@artemis.panaceas.org> | 2015-12-25 04:40:36 +0000 |
commit | 849369d6c66d3054688672f97d31fceb8e8230fb (patch) | |
tree | 6135abc790ca67dedbe07c39806591e70eda81ce /drivers/cpuidle | |
download | linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.gz linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.bz2 linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.zip |
initial_commit
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r-- | drivers/cpuidle/Kconfig | 20 | ||||
-rw-r--r-- | drivers/cpuidle/Makefile | 5 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle.c | 441 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle.h | 32 | ||||
-rw-r--r-- | drivers/cpuidle/driver.c | 68 | ||||
-rw-r--r-- | drivers/cpuidle/governor.c | 138 | ||||
-rw-r--r-- | drivers/cpuidle/governors/Makefile | 6 | ||||
-rw-r--r-- | drivers/cpuidle/governors/ladder.c | 182 | ||||
-rw-r--r-- | drivers/cpuidle/governors/menu.c | 429 | ||||
-rw-r--r-- | drivers/cpuidle/sysfs.c | 387 |
10 files changed, 1708 insertions, 0 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig new file mode 100644 index 00000000..7dbc4a83 --- /dev/null +++ b/drivers/cpuidle/Kconfig @@ -0,0 +1,20 @@ + +config CPU_IDLE + bool "CPU idle PM support" + default ACPI + help + CPU idle is a generic framework for supporting software-controlled + idle processor power management. It includes modular cross-platform + governors that can be swapped during runtime. + + If you're using an ACPI-enabled platform, you should say Y here. + +config CPU_IDLE_GOV_LADDER + bool + depends on CPU_IDLE + default y + +config CPU_IDLE_GOV_MENU + bool + depends on CPU_IDLE && NO_HZ + default y diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile new file mode 100644 index 00000000..5634f883 --- /dev/null +++ b/drivers/cpuidle/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for cpuidle. +# + +obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c new file mode 100644 index 00000000..bf509245 --- /dev/null +++ b/drivers/cpuidle/cpuidle.c @@ -0,0 +1,441 @@ +/* + * cpuidle.c - core cpuidle infrastructure + * + * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * Shaohua Li <shaohua.li@intel.com> + * Adam Belay <abelay@novell.com> + * + * This code is licenced under the GPL. + */ + +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/notifier.h> +#include <linux/pm_qos_params.h> +#include <linux/cpu.h> +#include <linux/cpuidle.h> +#include <linux/ktime.h> +#include <linux/hrtimer.h> +#include <trace/events/power.h> + +#include "cpuidle.h" + +DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices); + +DEFINE_MUTEX(cpuidle_lock); +LIST_HEAD(cpuidle_detected_devices); +static void (*pm_idle_old)(void); + +static int enabled_devices; + +#if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT) +static void cpuidle_kick_cpus(void) +{ + cpu_idle_wait(); +} +#elif defined(CONFIG_SMP) +# error "Arch needs cpu_idle_wait() equivalent here" +#else /* !CONFIG_ARCH_HAS_CPU_IDLE_WAIT && !CONFIG_SMP */ +static void cpuidle_kick_cpus(void) {} +#endif + +static int __cpuidle_register_device(struct cpuidle_device *dev); + +/** + * cpuidle_idle_call - the main idle loop + * + * NOTE: no locks or semaphores should be used here + */ +static void cpuidle_idle_call(void) +{ + struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); + struct cpuidle_state *target_state; + int next_state; + + /* check if the device is ready */ + if (!dev || !dev->enabled) { + if (pm_idle_old) + pm_idle_old(); + else +#if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE) + default_idle(); +#else + local_irq_enable(); +#endif + return; + } + +#if 0 + /* shows regressions, re-enable for 2.6.29 */ + /* + * run any timers that can be run now, at this point + * before calculating the idle duration etc. + */ + hrtimer_peek_ahead_timers(); +#endif + + /* + * Call the device's prepare function before calling the + * governor's select function. ->prepare gives the device's + * cpuidle driver a chance to update any dynamic information + * of its cpuidle states for the current idle period, e.g. + * state availability, latencies, residencies, etc. + */ + if (dev->prepare) + dev->prepare(dev); + + /* ask the governor for the next state */ + next_state = cpuidle_curr_governor->select(dev); + if (need_resched()) { + local_irq_enable(); + return; + } + + target_state = &dev->states[next_state]; + + /* enter the state and update stats */ + dev->last_state = target_state; + + trace_power_start(POWER_CSTATE, next_state, dev->cpu); + trace_cpu_idle(next_state, dev->cpu); + + dev->last_residency = target_state->enter(dev, target_state); + + trace_power_end(dev->cpu); + trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); + + if (dev->last_state) + target_state = dev->last_state; + + target_state->time += (unsigned long long)dev->last_residency; + target_state->usage++; + + /* give the governor an opportunity to reflect on the outcome */ + if (cpuidle_curr_governor->reflect) + cpuidle_curr_governor->reflect(dev); +} + +/** + * cpuidle_install_idle_handler - installs the cpuidle idle loop handler + */ +void cpuidle_install_idle_handler(void) +{ + if (enabled_devices && (pm_idle != cpuidle_idle_call)) { + /* Make sure all changes finished before we switch to new idle */ + smp_wmb(); + pm_idle = cpuidle_idle_call; + } +} + +/** + * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler + */ +void cpuidle_uninstall_idle_handler(void) +{ + if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) { + pm_idle = pm_idle_old; + cpuidle_kick_cpus(); + } +} + +/** + * cpuidle_pause_and_lock - temporarily disables CPUIDLE + */ +void cpuidle_pause_and_lock(void) +{ + mutex_lock(&cpuidle_lock); + cpuidle_uninstall_idle_handler(); +} + +EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock); + +/** + * cpuidle_resume_and_unlock - resumes CPUIDLE operation + */ +void cpuidle_resume_and_unlock(void) +{ + cpuidle_install_idle_handler(); + mutex_unlock(&cpuidle_lock); +} + +EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock); + +#ifdef CONFIG_ARCH_HAS_CPU_RELAX +static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st) +{ + ktime_t t1, t2; + s64 diff; + int ret; + + t1 = ktime_get(); + local_irq_enable(); + while (!need_resched()) + cpu_relax(); + + t2 = ktime_get(); + diff = ktime_to_us(ktime_sub(t2, t1)); + if (diff > INT_MAX) + diff = INT_MAX; + + ret = (int) diff; + return ret; +} + +static void poll_idle_init(struct cpuidle_device *dev) +{ + struct cpuidle_state *state = &dev->states[0]; + + cpuidle_set_statedata(state, NULL); + + snprintf(state->name, CPUIDLE_NAME_LEN, "POLL"); + snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); + state->exit_latency = 0; + state->target_residency = 0; + state->power_usage = -1; + state->flags = 0; + state->enter = poll_idle; +} +#else +static void poll_idle_init(struct cpuidle_device *dev) {} +#endif /* CONFIG_ARCH_HAS_CPU_RELAX */ + +/** + * cpuidle_enable_device - enables idle PM for a CPU + * @dev: the CPU + * + * This function must be called between cpuidle_pause_and_lock and + * cpuidle_resume_and_unlock when used externally. + */ +int cpuidle_enable_device(struct cpuidle_device *dev) +{ + int ret, i; + + if (dev->enabled) + return 0; + if (!cpuidle_get_driver() || !cpuidle_curr_governor) + return -EIO; + if (!dev->state_count) + return -EINVAL; + + if (dev->registered == 0) { + ret = __cpuidle_register_device(dev); + if (ret) + return ret; + } + + poll_idle_init(dev); + + if ((ret = cpuidle_add_state_sysfs(dev))) + return ret; + + if (cpuidle_curr_governor->enable && + (ret = cpuidle_curr_governor->enable(dev))) + goto fail_sysfs; + + for (i = 0; i < dev->state_count; i++) { + dev->states[i].usage = 0; + dev->states[i].time = 0; + } + dev->last_residency = 0; + dev->last_state = NULL; + + smp_wmb(); + + dev->enabled = 1; + + enabled_devices++; + return 0; + +fail_sysfs: + cpuidle_remove_state_sysfs(dev); + + return ret; +} + +EXPORT_SYMBOL_GPL(cpuidle_enable_device); + +/** + * cpuidle_disable_device - disables idle PM for a CPU + * @dev: the CPU + * + * This function must be called between cpuidle_pause_and_lock and + * cpuidle_resume_and_unlock when used externally. + */ +void cpuidle_disable_device(struct cpuidle_device *dev) +{ + if (!dev->enabled) + return; + if (!cpuidle_get_driver() || !cpuidle_curr_governor) + return; + + dev->enabled = 0; + + if (cpuidle_curr_governor->disable) + cpuidle_curr_governor->disable(dev); + + cpuidle_remove_state_sysfs(dev); + enabled_devices--; +} + +EXPORT_SYMBOL_GPL(cpuidle_disable_device); + +/** + * __cpuidle_register_device - internal register function called before register + * and enable routines + * @dev: the cpu + * + * cpuidle_lock mutex must be held before this is called + */ +static int __cpuidle_register_device(struct cpuidle_device *dev) +{ + int ret; + struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu); + struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); + + if (!sys_dev) + return -EINVAL; + if (!try_module_get(cpuidle_driver->owner)) + return -EINVAL; + + init_completion(&dev->kobj_unregister); + + /* + * cpuidle driver should set the dev->power_specified bit + * before registering the device if the driver provides + * power_usage numbers. + * + * For those devices whose ->power_specified is not set, + * we fill in power_usage with decreasing values as the + * cpuidle code has an implicit assumption that state Cn + * uses less power than C(n-1). + * + * With CONFIG_ARCH_HAS_CPU_RELAX, C0 is already assigned + * an power value of -1. So we use -2, -3, etc, for other + * c-states. + */ + if (!dev->power_specified) { + int i; + for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++) + dev->states[i].power_usage = -1 - i; + } + + per_cpu(cpuidle_devices, dev->cpu) = dev; + list_add(&dev->device_list, &cpuidle_detected_devices); + if ((ret = cpuidle_add_sysfs(sys_dev))) { + module_put(cpuidle_driver->owner); + return ret; + } + + dev->registered = 1; + return 0; +} + +/** + * cpuidle_register_device - registers a CPU's idle PM feature + * @dev: the cpu + */ +int cpuidle_register_device(struct cpuidle_device *dev) +{ + int ret; + + mutex_lock(&cpuidle_lock); + + if ((ret = __cpuidle_register_device(dev))) { + mutex_unlock(&cpuidle_lock); + return ret; + } + + cpuidle_enable_device(dev); + cpuidle_install_idle_handler(); + + mutex_unlock(&cpuidle_lock); + + return 0; + +} + +EXPORT_SYMBOL_GPL(cpuidle_register_device); + +/** + * cpuidle_unregister_device - unregisters a CPU's idle PM feature + * @dev: the cpu + */ +void cpuidle_unregister_device(struct cpuidle_device *dev) +{ + struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu); + struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); + + if (dev->registered == 0) + return; + + cpuidle_pause_and_lock(); + + cpuidle_disable_device(dev); + + cpuidle_remove_sysfs(sys_dev); + list_del(&dev->device_list); + wait_for_completion(&dev->kobj_unregister); + per_cpu(cpuidle_devices, dev->cpu) = NULL; + + cpuidle_resume_and_unlock(); + + module_put(cpuidle_driver->owner); +} + +EXPORT_SYMBOL_GPL(cpuidle_unregister_device); + +#ifdef CONFIG_SMP + +static void smp_callback(void *v) +{ + /* we already woke the CPU up, nothing more to do */ +} + +/* + * This function gets called when a part of the kernel has a new latency + * requirement. This means we need to get all processors out of their C-state, + * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that + * wakes them all right up. + */ +static int cpuidle_latency_notify(struct notifier_block *b, + unsigned long l, void *v) +{ + smp_call_function(smp_callback, NULL, 1); + return NOTIFY_OK; +} + +static struct notifier_block cpuidle_latency_notifier = { + .notifier_call = cpuidle_latency_notify, +}; + +static inline void latency_notifier_init(struct notifier_block *n) +{ + pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n); +} + +#else /* CONFIG_SMP */ + +#define latency_notifier_init(x) do { } while (0) + +#endif /* CONFIG_SMP */ + +/** + * cpuidle_init - core initializer + */ +static int __init cpuidle_init(void) +{ + int ret; + + pm_idle_old = pm_idle; + + ret = cpuidle_add_class_sysfs(&cpu_sysdev_class); + if (ret) + return ret; + + latency_notifier_init(&cpuidle_latency_notifier); + + return 0; +} + +core_initcall(cpuidle_init); diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h new file mode 100644 index 00000000..33e50d55 --- /dev/null +++ b/drivers/cpuidle/cpuidle.h @@ -0,0 +1,32 @@ +/* + * cpuidle.h - The internal header file + */ + +#ifndef __DRIVER_CPUIDLE_H +#define __DRIVER_CPUIDLE_H + +#include <linux/sysdev.h> + +/* For internal use only */ +extern struct cpuidle_governor *cpuidle_curr_governor; +extern struct list_head cpuidle_governors; +extern struct list_head cpuidle_detected_devices; +extern struct mutex cpuidle_lock; +extern spinlock_t cpuidle_driver_lock; + +/* idle loop */ +extern void cpuidle_install_idle_handler(void); +extern void cpuidle_uninstall_idle_handler(void); + +/* governors */ +extern int cpuidle_switch_governor(struct cpuidle_governor *gov); + +/* sysfs */ +extern int cpuidle_add_class_sysfs(struct sysdev_class *cls); +extern void cpuidle_remove_class_sysfs(struct sysdev_class *cls); +extern int cpuidle_add_state_sysfs(struct cpuidle_device *device); +extern void cpuidle_remove_state_sysfs(struct cpuidle_device *device); +extern int cpuidle_add_sysfs(struct sys_device *sysdev); +extern void cpuidle_remove_sysfs(struct sys_device *sysdev); + +#endif /* __DRIVER_CPUIDLE_H */ diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c new file mode 100644 index 00000000..fd1601e3 --- /dev/null +++ b/drivers/cpuidle/driver.c @@ -0,0 +1,68 @@ +/* + * driver.c - driver support + * + * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * Shaohua Li <shaohua.li@intel.com> + * Adam Belay <abelay@novell.com> + * + * This code is licenced under the GPL. + */ + +#include <linux/mutex.h> +#include <linux/module.h> +#include <linux/cpuidle.h> + +#include "cpuidle.h" + +static struct cpuidle_driver *cpuidle_curr_driver; +DEFINE_SPINLOCK(cpuidle_driver_lock); + +/** + * cpuidle_register_driver - registers a driver + * @drv: the driver + */ +int cpuidle_register_driver(struct cpuidle_driver *drv) +{ + if (!drv) + return -EINVAL; + + spin_lock(&cpuidle_driver_lock); + if (cpuidle_curr_driver) { + spin_unlock(&cpuidle_driver_lock); + return -EBUSY; + } + cpuidle_curr_driver = drv; + spin_unlock(&cpuidle_driver_lock); + + return 0; +} + +EXPORT_SYMBOL_GPL(cpuidle_register_driver); + +/** + * cpuidle_get_driver - return the current driver + */ +struct cpuidle_driver *cpuidle_get_driver(void) +{ + return cpuidle_curr_driver; +} +EXPORT_SYMBOL_GPL(cpuidle_get_driver); + +/** + * cpuidle_unregister_driver - unregisters a driver + * @drv: the driver + */ +void cpuidle_unregister_driver(struct cpuidle_driver *drv) +{ + if (drv != cpuidle_curr_driver) { + WARN(1, "invalid cpuidle_unregister_driver(%s)\n", + drv->name); + return; + } + + spin_lock(&cpuidle_driver_lock); + cpuidle_curr_driver = NULL; + spin_unlock(&cpuidle_driver_lock); +} + +EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c new file mode 100644 index 00000000..724c164d --- /dev/null +++ b/drivers/cpuidle/governor.c @@ -0,0 +1,138 @@ +/* + * governor.c - governor support + * + * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * Shaohua Li <shaohua.li@intel.com> + * Adam Belay <abelay@novell.com> + * + * This code is licenced under the GPL. + */ + +#include <linux/mutex.h> +#include <linux/module.h> +#include <linux/cpuidle.h> + +#include "cpuidle.h" + +LIST_HEAD(cpuidle_governors); +struct cpuidle_governor *cpuidle_curr_governor; + +/** + * __cpuidle_find_governor - finds a governor of the specified name + * @str: the name + * + * Must be called with cpuidle_lock acquired. + */ +static struct cpuidle_governor * __cpuidle_find_governor(const char *str) +{ + struct cpuidle_governor *gov; + + list_for_each_entry(gov, &cpuidle_governors, governor_list) + if (!strnicmp(str, gov->name, CPUIDLE_NAME_LEN)) + return gov; + + return NULL; +} + +/** + * cpuidle_switch_governor - changes the governor + * @gov: the new target governor + * + * NOTE: "gov" can be NULL to specify disabled + * Must be called with cpuidle_lock acquired. + */ +int cpuidle_switch_governor(struct cpuidle_governor *gov) +{ + struct cpuidle_device *dev; + + if (gov == cpuidle_curr_governor) + return 0; + + cpuidle_uninstall_idle_handler(); + + if (cpuidle_curr_governor) { + list_for_each_entry(dev, &cpuidle_detected_devices, device_list) + cpuidle_disable_device(dev); + module_put(cpuidle_curr_governor->owner); + } + + cpuidle_curr_governor = gov; + + if (gov) { + if (!try_module_get(cpuidle_curr_governor->owner)) + return -EINVAL; + list_for_each_entry(dev, &cpuidle_detected_devices, device_list) + cpuidle_enable_device(dev); + cpuidle_install_idle_handler(); + printk(KERN_INFO "cpuidle: using governor %s\n", gov->name); + } + + return 0; +} + +/** + * cpuidle_register_governor - registers a governor + * @gov: the governor + */ +int cpuidle_register_governor(struct cpuidle_governor *gov) +{ + int ret = -EEXIST; + + if (!gov || !gov->select) + return -EINVAL; + + mutex_lock(&cpuidle_lock); + if (__cpuidle_find_governor(gov->name) == NULL) { + ret = 0; + list_add_tail(&gov->governor_list, &cpuidle_governors); + if (!cpuidle_curr_governor || + cpuidle_curr_governor->rating < gov->rating) + cpuidle_switch_governor(gov); + } + mutex_unlock(&cpuidle_lock); + + return ret; +} + +/** + * cpuidle_replace_governor - find a replacement governor + * @exclude_rating: the rating that will be skipped while looking for + * new governor. + */ +static struct cpuidle_governor *cpuidle_replace_governor(int exclude_rating) +{ + struct cpuidle_governor *gov; + struct cpuidle_governor *ret_gov = NULL; + unsigned int max_rating = 0; + + list_for_each_entry(gov, &cpuidle_governors, governor_list) { + if (gov->rating == exclude_rating) + continue; + if (gov->rating > max_rating) { + max_rating = gov->rating; + ret_gov = gov; + } + } + + return ret_gov; +} + +/** + * cpuidle_unregister_governor - unregisters a governor + * @gov: the governor + */ +void cpuidle_unregister_governor(struct cpuidle_governor *gov) +{ + if (!gov) + return; + + mutex_lock(&cpuidle_lock); + if (gov == cpuidle_curr_governor) { + struct cpuidle_governor *new_gov; + new_gov = cpuidle_replace_governor(gov->rating); + cpuidle_switch_governor(new_gov); + } + list_del(&gov->governor_list); + mutex_unlock(&cpuidle_lock); +} + diff --git a/drivers/cpuidle/governors/Makefile b/drivers/cpuidle/governors/Makefile new file mode 100644 index 00000000..1b512722 --- /dev/null +++ b/drivers/cpuidle/governors/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for cpuidle governors. +# + +obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o +obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c new file mode 100644 index 00000000..12c98900 --- /dev/null +++ b/drivers/cpuidle/governors/ladder.c @@ -0,0 +1,182 @@ +/* + * ladder.c - the residency ladder algorithm + * + * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> + * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> + * Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de> + * + * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * Shaohua Li <shaohua.li@intel.com> + * Adam Belay <abelay@novell.com> + * + * This code is licenced under the GPL. + */ + +#include <linux/kernel.h> +#include <linux/cpuidle.h> +#include <linux/pm_qos_params.h> +#include <linux/moduleparam.h> +#include <linux/jiffies.h> + +#include <asm/io.h> +#include <asm/uaccess.h> + +#define PROMOTION_COUNT 4 +#define DEMOTION_COUNT 1 + +struct ladder_device_state { + struct { + u32 promotion_count; + u32 demotion_count; + u32 promotion_time; + u32 demotion_time; + } threshold; + struct { + int promotion_count; + int demotion_count; + } stats; +}; + +struct ladder_device { + struct ladder_device_state states[CPUIDLE_STATE_MAX]; + int last_state_idx; +}; + +static DEFINE_PER_CPU(struct ladder_device, ladder_devices); + +/** + * ladder_do_selection - prepares private data for a state change + * @ldev: the ladder device + * @old_idx: the current state index + * @new_idx: the new target state index + */ +static inline void ladder_do_selection(struct ladder_device *ldev, + int old_idx, int new_idx) +{ + ldev->states[old_idx].stats.promotion_count = 0; + ldev->states[old_idx].stats.demotion_count = 0; + ldev->last_state_idx = new_idx; +} + +/** + * ladder_select_state - selects the next state to enter + * @dev: the CPU + */ +static int ladder_select_state(struct cpuidle_device *dev) +{ + struct ladder_device *ldev = &__get_cpu_var(ladder_devices); + struct ladder_device_state *last_state; + int last_residency, last_idx = ldev->last_state_idx; + int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); + + /* Special case when user has set very strict latency requirement */ + if (unlikely(latency_req == 0)) { + ladder_do_selection(ldev, last_idx, 0); + return 0; + } + + last_state = &ldev->states[last_idx]; + + if (dev->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID) + last_residency = cpuidle_get_last_residency(dev) - dev->states[last_idx].exit_latency; + else + last_residency = last_state->threshold.promotion_time + 1; + + /* consider promotion */ + if (last_idx < dev->state_count - 1 && + last_residency > last_state->threshold.promotion_time && + dev->states[last_idx + 1].exit_latency <= latency_req) { + last_state->stats.promotion_count++; + last_state->stats.demotion_count = 0; + if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { + ladder_do_selection(ldev, last_idx, last_idx + 1); + return last_idx + 1; + } + } + + /* consider demotion */ + if (last_idx > CPUIDLE_DRIVER_STATE_START && + dev->states[last_idx].exit_latency > latency_req) { + int i; + + for (i = last_idx - 1; i > CPUIDLE_DRIVER_STATE_START; i--) { + if (dev->states[i].exit_latency <= latency_req) + break; + } + ladder_do_selection(ldev, last_idx, i); + return i; + } + + if (last_idx > CPUIDLE_DRIVER_STATE_START && + last_residency < last_state->threshold.demotion_time) { + last_state->stats.demotion_count++; + last_state->stats.promotion_count = 0; + if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) { + ladder_do_selection(ldev, last_idx, last_idx - 1); + return last_idx - 1; + } + } + + /* otherwise remain at the current state */ + return last_idx; +} + +/** + * ladder_enable_device - setup for the governor + * @dev: the CPU + */ +static int ladder_enable_device(struct cpuidle_device *dev) +{ + int i; + struct ladder_device *ldev = &per_cpu(ladder_devices, dev->cpu); + struct ladder_device_state *lstate; + struct cpuidle_state *state; + + ldev->last_state_idx = CPUIDLE_DRIVER_STATE_START; + + for (i = 0; i < dev->state_count; i++) { + state = &dev->states[i]; + lstate = &ldev->states[i]; + + lstate->stats.promotion_count = 0; + lstate->stats.demotion_count = 0; + + lstate->threshold.promotion_count = PROMOTION_COUNT; + lstate->threshold.demotion_count = DEMOTION_COUNT; + + if (i < dev->state_count - 1) + lstate->threshold.promotion_time = state->exit_latency; + if (i > 0) + lstate->threshold.demotion_time = state->exit_latency; + } + + return 0; +} + +static struct cpuidle_governor ladder_governor = { + .name = "ladder", + .rating = 10, + .enable = ladder_enable_device, + .select = ladder_select_state, + .owner = THIS_MODULE, +}; + +/** + * init_ladder - initializes the governor + */ +static int __init init_ladder(void) +{ + return cpuidle_register_governor(&ladder_governor); +} + +/** + * exit_ladder - exits the governor + */ +static void __exit exit_ladder(void) +{ + cpuidle_unregister_governor(&ladder_governor); +} + +MODULE_LICENSE("GPL"); +module_init(init_ladder); +module_exit(exit_ladder); diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c new file mode 100644 index 00000000..e2f72719 --- /dev/null +++ b/drivers/cpuidle/governors/menu.c @@ -0,0 +1,429 @@ +/* + * menu.c - the menu idle governor + * + * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com> + * Copyright (C) 2009 Intel Corporation + * Author: + * Arjan van de Ven <arjan@linux.intel.com> + * + * This code is licenced under the GPL version 2 as described + * in the COPYING file that acompanies the Linux Kernel. + */ + +#include <linux/kernel.h> +#include <linux/cpuidle.h> +#include <linux/pm_qos_params.h> +#include <linux/time.h> +#include <linux/ktime.h> +#include <linux/hrtimer.h> +#include <linux/tick.h> +#include <linux/sched.h> +#include <linux/math64.h> + +#define BUCKETS 12 +#define INTERVALS 8 +#define RESOLUTION 1024 +#define DECAY 8 +#define MAX_INTERESTING 50000 +#define STDDEV_THRESH 400 + + +/* + * Concepts and ideas behind the menu governor + * + * For the menu governor, there are 3 decision factors for picking a C + * state: + * 1) Energy break even point + * 2) Performance impact + * 3) Latency tolerance (from pmqos infrastructure) + * These these three factors are treated independently. + * + * Energy break even point + * ----------------------- + * C state entry and exit have an energy cost, and a certain amount of time in + * the C state is required to actually break even on this cost. CPUIDLE + * provides us this duration in the "target_residency" field. So all that we + * need is a good prediction of how long we'll be idle. Like the traditional + * menu governor, we start with the actual known "next timer event" time. + * + * Since there are other source of wakeups (interrupts for example) than + * the next timer event, this estimation is rather optimistic. To get a + * more realistic estimate, a correction factor is applied to the estimate, + * that is based on historic behavior. For example, if in the past the actual + * duration always was 50% of the next timer tick, the correction factor will + * be 0.5. + * + * menu uses a running average for this correction factor, however it uses a + * set of factors, not just a single factor. This stems from the realization + * that the ratio is dependent on the order of magnitude of the expected + * duration; if we expect 500 milliseconds of idle time the likelihood of + * getting an interrupt very early is much higher than if we expect 50 micro + * seconds of idle time. A second independent factor that has big impact on + * the actual factor is if there is (disk) IO outstanding or not. + * (as a special twist, we consider every sleep longer than 50 milliseconds + * as perfect; there are no power gains for sleeping longer than this) + * + * For these two reasons we keep an array of 12 independent factors, that gets + * indexed based on the magnitude of the expected duration as well as the + * "is IO outstanding" property. + * + * Repeatable-interval-detector + * ---------------------------- + * There are some cases where "next timer" is a completely unusable predictor: + * Those cases where the interval is fixed, for example due to hardware + * interrupt mitigation, but also due to fixed transfer rate devices such as + * mice. + * For this, we use a different predictor: We track the duration of the last 8 + * intervals and if the stand deviation of these 8 intervals is below a + * threshold value, we use the average of these intervals as prediction. + * + * Limiting Performance Impact + * --------------------------- + * C states, especially those with large exit latencies, can have a real + * noticeable impact on workloads, which is not acceptable for most sysadmins, + * and in addition, less performance has a power price of its own. + * + * As a general rule of thumb, menu assumes that the following heuristic + * holds: + * The busier the system, the less impact of C states is acceptable + * + * This rule-of-thumb is implemented using a performance-multiplier: + * If the exit latency times the performance multiplier is longer than + * the predicted duration, the C state is not considered a candidate + * for selection due to a too high performance impact. So the higher + * this multiplier is, the longer we need to be idle to pick a deep C + * state, and thus the less likely a busy CPU will hit such a deep + * C state. + * + * Two factors are used in determing this multiplier: + * a value of 10 is added for each point of "per cpu load average" we have. + * a value of 5 points is added for each process that is waiting for + * IO on this CPU. + * (these values are experimentally determined) + * + * The load average factor gives a longer term (few seconds) input to the + * decision, while the iowait value gives a cpu local instantanious input. + * The iowait factor may look low, but realize that this is also already + * represented in the system load average. + * + */ + +struct menu_device { + int last_state_idx; + int needs_update; + + unsigned int expected_us; + u64 predicted_us; + unsigned int exit_us; + unsigned int bucket; + u64 correction_factor[BUCKETS]; + u32 intervals[INTERVALS]; + int interval_ptr; +}; + + +#define LOAD_INT(x) ((x) >> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) + +static int get_loadavg(void) +{ + unsigned long this = this_cpu_load(); + + + return LOAD_INT(this) * 10 + LOAD_FRAC(this) / 10; +} + +static inline int which_bucket(unsigned int duration) +{ + int bucket = 0; + + /* + * We keep two groups of stats; one with no + * IO pending, one without. + * This allows us to calculate + * E(duration)|iowait + */ + if (nr_iowait_cpu(smp_processor_id())) + bucket = BUCKETS/2; + + if (duration < 10) + return bucket; + if (duration < 100) + return bucket + 1; + if (duration < 1000) + return bucket + 2; + if (duration < 10000) + return bucket + 3; + if (duration < 100000) + return bucket + 4; + return bucket + 5; +} + +/* + * Return a multiplier for the exit latency that is intended + * to take performance requirements into account. + * The more performance critical we estimate the system + * to be, the higher this multiplier, and thus the higher + * the barrier to go to an expensive C state. + */ +static inline int performance_multiplier(void) +{ + int mult = 1; + + /* for higher loadavg, we are more reluctant */ + + /* + * this doesn't work as intended - it is almost always 0, but can + * sometimes, depending on workload, spike very high into the hundreds + * even when the average cpu load is under 10%. + */ + /* mult += 2 * get_loadavg(); */ + + /* for IO wait tasks (per cpu!) we add 5x each */ + mult += 10 * nr_iowait_cpu(smp_processor_id()); + + return mult; +} + +static DEFINE_PER_CPU(struct menu_device, menu_devices); + +static void menu_update(struct cpuidle_device *dev); + +/* This implements DIV_ROUND_CLOSEST but avoids 64 bit division */ +static u64 div_round64(u64 dividend, u32 divisor) +{ + return div_u64(dividend + (divisor / 2), divisor); +} + +/* + * Try detecting repeating patterns by keeping track of the last 8 + * intervals, and checking if the standard deviation of that set + * of points is below a threshold. If it is... then use the + * average of these 8 points as the estimated value. + */ +static void detect_repeating_patterns(struct menu_device *data) +{ + int i; + uint64_t avg = 0; + uint64_t stddev = 0; /* contains the square of the std deviation */ + + /* first calculate average and standard deviation of the past */ + for (i = 0; i < INTERVALS; i++) + avg += data->intervals[i]; + avg = avg / INTERVALS; + + /* if the avg is beyond the known next tick, it's worthless */ + if (avg > data->expected_us) + return; + + for (i = 0; i < INTERVALS; i++) + stddev += (data->intervals[i] - avg) * + (data->intervals[i] - avg); + + stddev = stddev / INTERVALS; + + /* + * now.. if stddev is small.. then assume we have a + * repeating pattern and predict we keep doing this. + */ + + if (avg && stddev < STDDEV_THRESH) + data->predicted_us = avg; +} + +/** + * menu_select - selects the next idle state to enter + * @dev: the CPU + */ +static int menu_select(struct cpuidle_device *dev) +{ + struct menu_device *data = &__get_cpu_var(menu_devices); + int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); + unsigned int power_usage = -1; + int i; + int multiplier; + struct timespec t; + + if (data->needs_update) { + menu_update(dev); + data->needs_update = 0; + } + + data->last_state_idx = 0; + data->exit_us = 0; + + /* Special case when user has set very strict latency requirement */ + if (unlikely(latency_req == 0)) + return 0; + + /* determine the expected residency time, round up */ + t = ktime_to_timespec(tick_nohz_get_sleep_length()); + data->expected_us = + t.tv_sec * USEC_PER_SEC + t.tv_nsec / NSEC_PER_USEC; + + + data->bucket = which_bucket(data->expected_us); + + multiplier = performance_multiplier(); + + /* + * if the correction factor is 0 (eg first time init or cpu hotplug + * etc), we actually want to start out with a unity factor. + */ + if (data->correction_factor[data->bucket] == 0) + data->correction_factor[data->bucket] = RESOLUTION * DECAY; + + /* Make sure to round up for half microseconds */ + data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], + RESOLUTION * DECAY); + + detect_repeating_patterns(data); + + /* + * We want to default to C1 (hlt), not to busy polling + * unless the timer is happening really really soon. + */ + if (data->expected_us > 5) + data->last_state_idx = CPUIDLE_DRIVER_STATE_START; + + /* + * Find the idle state with the lowest power while satisfying + * our constraints. + */ + for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++) { + struct cpuidle_state *s = &dev->states[i]; + + if (s->flags & CPUIDLE_FLAG_IGNORE) + continue; + if (s->target_residency > data->predicted_us) + continue; + if (s->exit_latency > latency_req) + continue; + if (s->exit_latency * multiplier > data->predicted_us) + continue; + + if (s->power_usage < power_usage) { + power_usage = s->power_usage; + data->last_state_idx = i; + data->exit_us = s->exit_latency; + } + } + + return data->last_state_idx; +} + +/** + * menu_reflect - records that data structures need update + * @dev: the CPU + * + * NOTE: it's important to be fast here because this operation will add to + * the overall exit latency. + */ +static void menu_reflect(struct cpuidle_device *dev) +{ + struct menu_device *data = &__get_cpu_var(menu_devices); + data->needs_update = 1; +} + +/** + * menu_update - attempts to guess what happened after entry + * @dev: the CPU + */ +static void menu_update(struct cpuidle_device *dev) +{ + struct menu_device *data = &__get_cpu_var(menu_devices); + int last_idx = data->last_state_idx; + unsigned int last_idle_us = cpuidle_get_last_residency(dev); + struct cpuidle_state *target = &dev->states[last_idx]; + unsigned int measured_us; + u64 new_factor; + + /* + * Ugh, this idle state doesn't support residency measurements, so we + * are basically lost in the dark. As a compromise, assume we slept + * for the whole expected time. + */ + if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID))) + last_idle_us = data->expected_us; + + + measured_us = last_idle_us; + + /* + * We correct for the exit latency; we are assuming here that the + * exit latency happens after the event that we're interested in. + */ + if (measured_us > data->exit_us) + measured_us -= data->exit_us; + + + /* update our correction ratio */ + + new_factor = data->correction_factor[data->bucket] + * (DECAY - 1) / DECAY; + + if (data->expected_us > 0 && measured_us < MAX_INTERESTING) + new_factor += RESOLUTION * measured_us / data->expected_us; + else + /* + * we were idle so long that we count it as a perfect + * prediction + */ + new_factor += RESOLUTION; + + /* + * We don't want 0 as factor; we always want at least + * a tiny bit of estimated time. + */ + if (new_factor == 0) + new_factor = 1; + + data->correction_factor[data->bucket] = new_factor; + + /* update the repeating-pattern data */ + data->intervals[data->interval_ptr++] = last_idle_us; + if (data->interval_ptr >= INTERVALS) + data->interval_ptr = 0; +} + +/** + * menu_enable_device - scans a CPU's states and does setup + * @dev: the CPU + */ +static int menu_enable_device(struct cpuidle_device *dev) +{ + struct menu_device *data = &per_cpu(menu_devices, dev->cpu); + + memset(data, 0, sizeof(struct menu_device)); + + return 0; +} + +static struct cpuidle_governor menu_governor = { + .name = "menu", + .rating = 20, + .enable = menu_enable_device, + .select = menu_select, + .reflect = menu_reflect, + .owner = THIS_MODULE, +}; + +/** + * init_menu - initializes the governor + */ +static int __init init_menu(void) +{ + return cpuidle_register_governor(&menu_governor); +} + +/** + * exit_menu - exits the governor + */ +static void __exit exit_menu(void) +{ + cpuidle_unregister_governor(&menu_governor); +} + +MODULE_LICENSE("GPL"); +module_init(init_menu); +module_exit(exit_menu); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c new file mode 100644 index 00000000..be7917ec --- /dev/null +++ b/drivers/cpuidle/sysfs.c @@ -0,0 +1,387 @@ +/* + * sysfs.c - sysfs support + * + * (C) 2006-2007 Shaohua Li <shaohua.li@intel.com> + * + * This code is licenced under the GPL. + */ + +#include <linux/kernel.h> +#include <linux/cpuidle.h> +#include <linux/sysfs.h> +#include <linux/slab.h> +#include <linux/cpu.h> + +#include "cpuidle.h" + +static unsigned int sysfs_switch; +static int __init cpuidle_sysfs_setup(char *unused) +{ + sysfs_switch = 1; + return 1; +} +__setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup); + +static ssize_t show_available_governors(struct sysdev_class *class, + struct sysdev_class_attribute *attr, + char *buf) +{ + ssize_t i = 0; + struct cpuidle_governor *tmp; + + mutex_lock(&cpuidle_lock); + list_for_each_entry(tmp, &cpuidle_governors, governor_list) { + if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2)) + goto out; + i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name); + } + +out: + i+= sprintf(&buf[i], "\n"); + mutex_unlock(&cpuidle_lock); + return i; +} + +static ssize_t show_current_driver(struct sysdev_class *class, + struct sysdev_class_attribute *attr, + char *buf) +{ + ssize_t ret; + struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); + + spin_lock(&cpuidle_driver_lock); + if (cpuidle_driver) + ret = sprintf(buf, "%s\n", cpuidle_driver->name); + else + ret = sprintf(buf, "none\n"); + spin_unlock(&cpuidle_driver_lock); + + return ret; +} + +static ssize_t show_current_governor(struct sysdev_class *class, + struct sysdev_class_attribute *attr, + char *buf) +{ + ssize_t ret; + + mutex_lock(&cpuidle_lock); + if (cpuidle_curr_governor) + ret = sprintf(buf, "%s\n", cpuidle_curr_governor->name); + else + ret = sprintf(buf, "none\n"); + mutex_unlock(&cpuidle_lock); + + return ret; +} + +static ssize_t store_current_governor(struct sysdev_class *class, + struct sysdev_class_attribute *attr, + const char *buf, size_t count) +{ + char gov_name[CPUIDLE_NAME_LEN]; + int ret = -EINVAL; + size_t len = count; + struct cpuidle_governor *gov; + + if (!len || len >= sizeof(gov_name)) + return -EINVAL; + + memcpy(gov_name, buf, len); + gov_name[len] = '\0'; + if (gov_name[len - 1] == '\n') + gov_name[--len] = '\0'; + + mutex_lock(&cpuidle_lock); + + list_for_each_entry(gov, &cpuidle_governors, governor_list) { + if (strlen(gov->name) == len && !strcmp(gov->name, gov_name)) { + ret = cpuidle_switch_governor(gov); + break; + } + } + + mutex_unlock(&cpuidle_lock); + + if (ret) + return ret; + else + return count; +} + +static SYSDEV_CLASS_ATTR(current_driver, 0444, show_current_driver, NULL); +static SYSDEV_CLASS_ATTR(current_governor_ro, 0444, show_current_governor, + NULL); + +static struct attribute *cpuclass_default_attrs[] = { + &attr_current_driver.attr, + &attr_current_governor_ro.attr, + NULL +}; + +static SYSDEV_CLASS_ATTR(available_governors, 0444, show_available_governors, + NULL); +static SYSDEV_CLASS_ATTR(current_governor, 0644, show_current_governor, + store_current_governor); + +static struct attribute *cpuclass_switch_attrs[] = { + &attr_available_governors.attr, + &attr_current_driver.attr, + &attr_current_governor.attr, + NULL +}; + +static struct attribute_group cpuclass_attr_group = { + .attrs = cpuclass_default_attrs, + .name = "cpuidle", +}; + +/** + * cpuidle_add_class_sysfs - add CPU global sysfs attributes + */ +int cpuidle_add_class_sysfs(struct sysdev_class *cls) +{ + if (sysfs_switch) + cpuclass_attr_group.attrs = cpuclass_switch_attrs; + + return sysfs_create_group(&cls->kset.kobj, &cpuclass_attr_group); +} + +/** + * cpuidle_remove_class_sysfs - remove CPU global sysfs attributes + */ +void cpuidle_remove_class_sysfs(struct sysdev_class *cls) +{ + sysfs_remove_group(&cls->kset.kobj, &cpuclass_attr_group); +} + +struct cpuidle_attr { + struct attribute attr; + ssize_t (*show)(struct cpuidle_device *, char *); + ssize_t (*store)(struct cpuidle_device *, const char *, size_t count); +}; + +#define define_one_ro(_name, show) \ + static struct cpuidle_attr attr_##_name = __ATTR(_name, 0444, show, NULL) +#define define_one_rw(_name, show, store) \ + static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store) + +#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj) +#define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr) +static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf) +{ + int ret = -EIO; + struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); + struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr); + + if (cattr->show) { + mutex_lock(&cpuidle_lock); + ret = cattr->show(dev, buf); + mutex_unlock(&cpuidle_lock); + } + return ret; +} + +static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr, + const char * buf, size_t count) +{ + int ret = -EIO; + struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); + struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr); + + if (cattr->store) { + mutex_lock(&cpuidle_lock); + ret = cattr->store(dev, buf, count); + mutex_unlock(&cpuidle_lock); + } + return ret; +} + +static const struct sysfs_ops cpuidle_sysfs_ops = { + .show = cpuidle_show, + .store = cpuidle_store, +}; + +static void cpuidle_sysfs_release(struct kobject *kobj) +{ + struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); + + complete(&dev->kobj_unregister); +} + +static struct kobj_type ktype_cpuidle = { + .sysfs_ops = &cpuidle_sysfs_ops, + .release = cpuidle_sysfs_release, +}; + +struct cpuidle_state_attr { + struct attribute attr; + ssize_t (*show)(struct cpuidle_state *, char *); + ssize_t (*store)(struct cpuidle_state *, const char *, size_t); +}; + +#define define_one_state_ro(_name, show) \ +static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0444, show, NULL) + +#define define_show_state_function(_name) \ +static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \ +{ \ + return sprintf(buf, "%u\n", state->_name);\ +} + +#define define_show_state_ull_function(_name) \ +static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \ +{ \ + return sprintf(buf, "%llu\n", state->_name);\ +} + +#define define_show_state_str_function(_name) \ +static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \ +{ \ + if (state->_name[0] == '\0')\ + return sprintf(buf, "<null>\n");\ + return sprintf(buf, "%s\n", state->_name);\ +} + +define_show_state_function(exit_latency) +define_show_state_function(power_usage) +define_show_state_ull_function(usage) +define_show_state_ull_function(time) +define_show_state_str_function(name) +define_show_state_str_function(desc) + +define_one_state_ro(name, show_state_name); +define_one_state_ro(desc, show_state_desc); +define_one_state_ro(latency, show_state_exit_latency); +define_one_state_ro(power, show_state_power_usage); +define_one_state_ro(usage, show_state_usage); +define_one_state_ro(time, show_state_time); + +static struct attribute *cpuidle_state_default_attrs[] = { + &attr_name.attr, + &attr_desc.attr, + &attr_latency.attr, + &attr_power.attr, + &attr_usage.attr, + &attr_time.attr, + NULL +}; + +#define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj) +#define kobj_to_state(k) (kobj_to_state_obj(k)->state) +#define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr) +static ssize_t cpuidle_state_show(struct kobject * kobj, + struct attribute * attr ,char * buf) +{ + int ret = -EIO; + struct cpuidle_state *state = kobj_to_state(kobj); + struct cpuidle_state_attr * cattr = attr_to_stateattr(attr); + + if (cattr->show) + ret = cattr->show(state, buf); + + return ret; +} + +static const struct sysfs_ops cpuidle_state_sysfs_ops = { + .show = cpuidle_state_show, +}; + +static void cpuidle_state_sysfs_release(struct kobject *kobj) +{ + struct cpuidle_state_kobj *state_obj = kobj_to_state_obj(kobj); + + complete(&state_obj->kobj_unregister); +} + +static struct kobj_type ktype_state_cpuidle = { + .sysfs_ops = &cpuidle_state_sysfs_ops, + .default_attrs = cpuidle_state_default_attrs, + .release = cpuidle_state_sysfs_release, +}; + +static inline void cpuidle_free_state_kobj(struct cpuidle_device *device, int i) +{ + kobject_put(&device->kobjs[i]->kobj); + wait_for_completion(&device->kobjs[i]->kobj_unregister); + kfree(device->kobjs[i]); + device->kobjs[i] = NULL; +} + +/** + * cpuidle_add_driver_sysfs - adds driver-specific sysfs attributes + * @device: the target device + */ +int cpuidle_add_state_sysfs(struct cpuidle_device *device) +{ + int i, ret = -ENOMEM; + struct cpuidle_state_kobj *kobj; + + /* state statistics */ + for (i = 0; i < device->state_count; i++) { + kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL); + if (!kobj) + goto error_state; + kobj->state = &device->states[i]; + init_completion(&kobj->kobj_unregister); + + ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle, &device->kobj, + "state%d", i); + if (ret) { + kfree(kobj); + goto error_state; + } + kobject_uevent(&kobj->kobj, KOBJ_ADD); + device->kobjs[i] = kobj; + } + + return 0; + +error_state: + for (i = i - 1; i >= 0; i--) + cpuidle_free_state_kobj(device, i); + return ret; +} + +/** + * cpuidle_remove_driver_sysfs - removes driver-specific sysfs attributes + * @device: the target device + */ +void cpuidle_remove_state_sysfs(struct cpuidle_device *device) +{ + int i; + + for (i = 0; i < device->state_count; i++) + cpuidle_free_state_kobj(device, i); +} + +/** + * cpuidle_add_sysfs - creates a sysfs instance for the target device + * @sysdev: the target device + */ +int cpuidle_add_sysfs(struct sys_device *sysdev) +{ + int cpu = sysdev->id; + struct cpuidle_device *dev; + int error; + + dev = per_cpu(cpuidle_devices, cpu); + error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &sysdev->kobj, + "cpuidle"); + if (!error) + kobject_uevent(&dev->kobj, KOBJ_ADD); + return error; +} + +/** + * cpuidle_remove_sysfs - deletes a sysfs instance on the target device + * @sysdev: the target device + */ +void cpuidle_remove_sysfs(struct sys_device *sysdev) +{ + int cpu = sysdev->id; + struct cpuidle_device *dev; + + dev = per_cpu(cpuidle_devices, cpu); + kobject_put(&dev->kobj); +} |