diff options
Diffstat (limited to 'kernel/power')
-rw-r--r-- | kernel/power/Kconfig | 342 | ||||
-rw-r--r-- | kernel/power/Makefile | 23 | ||||
-rw-r--r-- | kernel/power/block_io.c | 103 | ||||
-rw-r--r-- | kernel/power/console.c | 35 | ||||
-rw-r--r-- | kernel/power/consoleearlysuspend.c | 78 | ||||
-rw-r--r-- | kernel/power/cpufreq_earlysuspend.c | 67 | ||||
-rw-r--r-- | kernel/power/cpufreq_governor_chg.c | 138 | ||||
-rw-r--r-- | kernel/power/cpuhotplug_earlysuspend.c | 111 | ||||
-rw-r--r-- | kernel/power/earlysuspend.c | 187 | ||||
-rw-r--r-- | kernel/power/fbearlysuspend.c | 153 | ||||
-rw-r--r-- | kernel/power/hibernate.c | 1067 | ||||
-rw-r--r-- | kernel/power/main.c | 453 | ||||
-rw-r--r-- | kernel/power/power.h | 271 | ||||
-rw-r--r-- | kernel/power/poweroff.c | 46 | ||||
-rw-r--r-- | kernel/power/process.c | 206 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 2325 | ||||
-rw-r--r-- | kernel/power/suspend.c | 335 | ||||
-rw-r--r-- | kernel/power/suspend_test.c | 188 | ||||
-rw-r--r-- | kernel/power/suspend_time.c | 111 | ||||
-rw-r--r-- | kernel/power/swap.c | 989 | ||||
-rw-r--r-- | kernel/power/user.c | 486 | ||||
-rw-r--r-- | kernel/power/userwakelock.c | 219 | ||||
-rw-r--r-- | kernel/power/wakelock.c | 634 |
23 files changed, 8567 insertions, 0 deletions
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig new file mode 100644 index 00000000..f6f69736 --- /dev/null +++ b/kernel/power/Kconfig @@ -0,0 +1,342 @@ +config SUSPEND + bool "Suspend to RAM and standby" + depends on ARCH_SUSPEND_POSSIBLE + default y + ---help--- + Allow the system to enter sleep states in which main memory is + powered and thus its contents are preserved, such as the + suspend-to-RAM state (e.g. the ACPI S3 state). + +config PM_TEST_SUSPEND + bool "Test suspend/resume and wakealarm during bootup" + depends on SUSPEND && PM_DEBUG && RTC_CLASS=y + ---help--- + This option will let you suspend your machine during bootup, and + make it wake up a few seconds later using an RTC wakeup alarm. + Enable this with a kernel parameter like "test_suspend=mem". + + You probably want to have your system's RTC driver statically + linked, ensuring that it's available when this test runs. + +config SUSPEND_DEVICE_TIME_DEBUG + bool "Warnning device suspend/resume takes too much time" + depends on SUSPEND && PM_DEBUG + default n + ---help--- + This option will enable a timing function to check device + suspend time consumption, If the device takes more time that + the threshold(default 0.5 ms), it will print the device and + bus name on the console. You can change the threshold + on-the-fly by modify /sys/power/time_threshold the time unit + is in microsecond. + + This options only for debug proprose, If in doubt, say N. + +config SUSPEND_FREEZER + bool "Enable freezer for suspend to RAM/standby" \ + if ARCH_WANTS_FREEZER_CONTROL || BROKEN + depends on SUSPEND + default y + help + This allows you to turn off the freezer for suspend. If this is + done, no tasks are frozen for suspend to RAM/standby. + + Turning OFF this setting is NOT recommended! If in doubt, say Y. + +config HAS_WAKELOCK + bool + +config HAS_EARLYSUSPEND + bool + +config CPUFREQ_GOV_ON_EARLYSUPSEND + bool "Use conservative cpu frequency governor when device enters early suspend" + depends on HAS_EARLYSUSPEND && CPU_FREQ + default n + help + Also will restore to original cpu frequency governor when device is resumed + +config CPUHOTPLUG_EARLYSUSPEND + bool "hotplug cpu when device enters early suspend" + depends on HAS_EARLYSUSPEND && SMP + default n + help + Will restore to original cpu nums online when device is resumed + +config WAKELOCK + bool "Wake lock" + depends on PM && RTC_CLASS + default n + select HAS_WAKELOCK + ---help--- + Enable wakelocks. When user space request a sleep state the + sleep request will be delayed until no wake locks are held. + +config WAKELOCK_STAT + bool "Wake lock stats" + depends on WAKELOCK + default y + ---help--- + Report wake lock stats in /proc/wakelocks + +config USER_WAKELOCK + bool "Userspace wake locks" + depends on WAKELOCK + default y + ---help--- + User-space wake lock api. Write "lockname" or "lockname timeout" + to /sys/power/wake_lock lock and if needed create a wake lock. + Write "lockname" to /sys/power/wake_unlock to unlock a user wake + lock. + +config EARLYSUSPEND + bool "Early suspend" + depends on WAKELOCK + default y + select HAS_EARLYSUSPEND + ---help--- + Call early suspend handlers when the user requested sleep state + changes. + +choice + prompt "User-space screen access" + default FB_EARLYSUSPEND if !FRAMEBUFFER_CONSOLE + default CONSOLE_EARLYSUSPEND + depends on HAS_EARLYSUSPEND + + config NO_USER_SPACE_SCREEN_ACCESS_CONTROL + bool "None" + + config CONSOLE_EARLYSUSPEND + bool "Console switch on early-suspend" + depends on HAS_EARLYSUSPEND && VT + ---help--- + Register early suspend handler to perform a console switch to + when user-space should stop drawing to the screen and a switch + back when it should resume. + + config FB_EARLYSUSPEND + bool "Sysfs interface" + depends on HAS_EARLYSUSPEND + ---help--- + Register early suspend handler that notifies and waits for + user-space through sysfs when user-space should stop drawing + to the screen and notifies user-space when it should resume. +endchoice + +config HIBERNATE_CALLBACKS + bool + +config HIBERNATION + bool "Hibernation (aka 'suspend to disk')" + depends on SWAP && ARCH_HIBERNATION_POSSIBLE + select HIBERNATE_CALLBACKS + select LZO_COMPRESS + select LZO_DECOMPRESS + ---help--- + Enable the suspend to disk (STD) functionality, which is usually + called "hibernation" in user interfaces. STD checkpoints the + system and powers it off; and restores that checkpoint on reboot. + + You can suspend your machine with 'echo disk > /sys/power/state' + after placing resume=/dev/swappartition on the kernel command line + in your bootloader's configuration file. + + Alternatively, you can use the additional userland tools available + from <http://suspend.sf.net>. + + In principle it does not require ACPI or APM, although for example + ACPI will be used for the final steps when it is available. One + of the reasons to use software suspend is that the firmware hooks + for suspend states like suspend-to-RAM (STR) often don't work very + well with Linux. + + It creates an image which is saved in your active swap. Upon the next + boot, pass the 'resume=/dev/swappartition' argument to the kernel to + have it detect the saved image, restore memory state from it, and + continue to run as before. If you do not want the previous state to + be reloaded, then use the 'noresume' kernel command line argument. + Note, however, that fsck will be run on your filesystems and you will + need to run mkswap against the swap partition used for the suspend. + + It also works with swap files to a limited extent (for details see + <file:Documentation/power/swsusp-and-swap-files.txt>). + + Right now you may boot without resuming and resume later but in the + meantime you cannot use the swap partition(s)/file(s) involved in + suspending. Also in this case you must not use the filesystems + that were mounted before the suspend. In particular, you MUST NOT + MOUNT any journaled filesystems mounted before the suspend or they + will get corrupted in a nasty way. + + For more information take a look at <file:Documentation/power/swsusp.txt>. + +config PM_STD_PARTITION + string "Default resume partition" + depends on HIBERNATION + default "" + ---help--- + The default resume partition is the partition that the suspend- + to-disk implementation will look for a suspended disk image. + + The partition specified here will be different for almost every user. + It should be a valid swap partition (at least for now) that is turned + on before suspending. + + The partition specified can be overridden by specifying: + + resume=/dev/<other device> + + which will set the resume partition to the device specified. + + Note there is currently not a way to specify which device to save the + suspended image to. It will simply pick the first available swap + device. + +config PM_SLEEP + def_bool y + depends on SUSPEND || HIBERNATE_CALLBACKS + +config PM_SLEEP_SMP + def_bool y + depends on SMP + depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE + depends on PM_SLEEP + select HOTPLUG + select HOTPLUG_CPU + +config PM_RUNTIME + bool "Run-time PM core functionality" + depends on !IA64_HP_SIM + ---help--- + Enable functionality allowing I/O devices to be put into energy-saving + (low power) states at run time (or autosuspended) after a specified + period of inactivity and woken up in response to a hardware-generated + wake-up event or a driver's request. + + Hardware support is generally required for this functionality to work + and the bus type drivers of the buses the devices are on are + responsible for the actual handling of the autosuspend requests and + wake-up events. + +config PM + def_bool y + depends on PM_SLEEP || PM_RUNTIME + +config PM_DEBUG + bool "Power Management Debug Support" + depends on PM + ---help--- + This option enables various debugging support in the Power Management + code. This is helpful when debugging and reporting PM bugs, like + suspend support. + +config PM_ADVANCED_DEBUG + bool "Extra PM attributes in sysfs for low-level debugging/testing" + depends on PM_DEBUG + ---help--- + Add extra sysfs attributes allowing one to access some Power Management + fields of device objects from user space. If you are not a kernel + developer interested in debugging/testing Power Management, say "no". + +config PM_TEST_SUSPEND + bool "Test suspend/resume and wakealarm during bootup" + depends on SUSPEND && PM_DEBUG && RTC_CLASS=y + ---help--- + This option will let you suspend your machine during bootup, and + make it wake up a few seconds later using an RTC wakeup alarm. + Enable this with a kernel parameter like "test_suspend=mem". + + You probably want to have your system's RTC driver statically + linked, ensuring that it's available when this test runs. + +config CAN_PM_TRACE + def_bool y + depends on PM_DEBUG && PM_SLEEP + +config PM_TRACE + bool + help + This enables code to save the last PM event point across + reboot. The architecture needs to support this, x86 for + example does by saving things in the RTC, see below. + + The architecture specific code must provide the extern + functions from <linux/resume-trace.h> as well as the + <asm/resume-trace.h> header with a TRACE_RESUME() macro. + + The way the information is presented is architecture- + dependent, x86 will print the information during a + late_initcall. + +config PM_TRACE_RTC + bool "Suspend/resume event tracing" + depends on CAN_PM_TRACE + depends on X86 + select PM_TRACE + ---help--- + This enables some cheesy code to save the last PM event point in the + RTC across reboots, so that you can debug a machine that just hangs + during suspend (or more commonly, during resume). + + To use this debugging feature you should attempt to suspend the + machine, reboot it and then run + + dmesg -s 1000000 | grep 'hash matches' + + CAUTION: this option will cause your machine's real-time clock to be + set to an invalid time after a resume. + +config APM_EMULATION + tristate "Advanced Power Management Emulation" + depends on PM && SYS_SUPPORTS_APM_EMULATION + help + APM is a BIOS specification for saving power using several different + techniques. This is mostly useful for battery powered laptops with + APM compliant BIOSes. If you say Y here, the system time will be + reset after a RESUME operation, the /proc/apm device will provide + battery status information, and user-space programs will receive + notification of APM "events" (e.g. battery status change). + + In order to use APM, you will need supporting software. For location + and more information, read <file:Documentation/power/pm.txt> and the + Battery Powered Linux mini-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. + + This driver does not spin down disk drives (see the hdparm(8) + manpage ("man 8 hdparm") for that), and it doesn't turn off + VESA-compliant "green" monitors. + + Generally, if you don't have a battery in your machine, there isn't + much point in using this driver and you should say N. If you get + random kernel OOPSes or reboots that don't seem to be related to + anything, try disabling/enabling this option (or disabling/enabling + APM in your BIOS). + +config ARCH_HAS_OPP + bool + +config PM_OPP + bool "Operating Performance Point (OPP) Layer library" + depends on ARCH_HAS_OPP + ---help--- + SOCs have a standard set of tuples consisting of frequency and + voltage pairs that the device will support per voltage domain. This + is called Operating Performance Point or OPP. The actual definitions + of OPP varies over silicon within the same family of devices. + + OPP layer organizes the data internally using device pointers + representing individual voltage domains and provides SOC + implementations a ready to use framework to manage OPPs. + For more information, read <file:Documentation/power/opp.txt> + +config PM_RUNTIME_CLK + def_bool y + depends on PM_RUNTIME && HAVE_CLK + +config SUSPEND_TIME + bool "Log time spent in suspend" + ---help--- + Prints the time spent in suspend in the kernel log, and + keeps statistics on the time spent in suspend in + /sys/kernel/debug/suspend_time diff --git a/kernel/power/Makefile b/kernel/power/Makefile new file mode 100644 index 00000000..8de92715 --- /dev/null +++ b/kernel/power/Makefile @@ -0,0 +1,23 @@ + +ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG + +obj-$(CONFIG_PM) += main.o +obj-$(CONFIG_PM_SLEEP) += console.o +obj-$(CONFIG_FREEZER) += process.o +obj-$(CONFIG_SUSPEND) += suspend.o +obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o +obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ + block_io.o +obj-$(CONFIG_WAKELOCK) += wakelock.o +obj-$(CONFIG_USER_WAKELOCK) += userwakelock.o +obj-$(CONFIG_EARLYSUSPEND) += earlysuspend.o +obj-$(CONFIG_CONSOLE_EARLYSUSPEND) += consoleearlysuspend.o +obj-$(CONFIG_FB_EARLYSUSPEND) += fbearlysuspend.o +obj-$(CONFIG_CPUFREQ_GOV_ON_EARLYSUPSEND) += cpufreq_earlysuspend.o +obj-$(CONFIG_CPUHOTPLUG_EARLYSUSPEND) += cpuhotplug_earlysuspend.o +obj-$(CONFIG_SUSPEND_TIME) += suspend_time.o + +obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o + +obj-$(CONFIG_CPU_FREQ) += cpufreq_governor_chg.o + diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c new file mode 100644 index 00000000..d09dd10c --- /dev/null +++ b/kernel/power/block_io.c @@ -0,0 +1,103 @@ +/* + * This file provides functions for block I/O operations on swap/file. + * + * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * This file is released under the GPLv2. + */ + +#include <linux/bio.h> +#include <linux/kernel.h> +#include <linux/pagemap.h> +#include <linux/swap.h> + +#include "power.h" + +/** + * submit - submit BIO request. + * @rw: READ or WRITE. + * @off physical offset of page. + * @page: page we're reading or writing. + * @bio_chain: list of pending biod (for async reading) + * + * Straight from the textbook - allocate and initialize the bio. + * If we're reading, make sure the page is marked as dirty. + * Then submit it and, if @bio_chain == NULL, wait. + */ +static int submit(int rw, struct block_device *bdev, sector_t sector, + struct page *page, struct bio **bio_chain) +{ + const int bio_rw = rw | REQ_SYNC; + struct bio *bio; + + bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); + bio->bi_sector = sector; + bio->bi_bdev = bdev; + bio->bi_end_io = end_swap_bio_read; + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + printk(KERN_ERR "PM: Adding page to bio failed at %llu\n", + (unsigned long long)sector); + bio_put(bio); + return -EFAULT; + } + + lock_page(page); + bio_get(bio); + + if (bio_chain == NULL) { + submit_bio(bio_rw, bio); + wait_on_page_locked(page); + if (rw == READ) + bio_set_pages_dirty(bio); + bio_put(bio); + } else { + if (rw == READ) + get_page(page); /* These pages are freed later */ + bio->bi_private = *bio_chain; + *bio_chain = bio; + submit_bio(bio_rw, bio); + } + return 0; +} + +int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) +{ + return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), + virt_to_page(addr), bio_chain); +} + +int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain) +{ + return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), + virt_to_page(addr), bio_chain); +} + +int hib_wait_on_bio_chain(struct bio **bio_chain) +{ + struct bio *bio; + struct bio *next_bio; + int ret = 0; + + if (bio_chain == NULL) + return 0; + + bio = *bio_chain; + if (bio == NULL) + return 0; + while (bio) { + struct page *page; + + next_bio = bio->bi_private; + page = bio->bi_io_vec[0].bv_page; + wait_on_page_locked(page); + if (!PageUptodate(page) || PageError(page)) + ret = -EIO; + put_page(page); + bio_put(bio); + bio = next_bio; + } + *bio_chain = NULL; + return ret; +} diff --git a/kernel/power/console.c b/kernel/power/console.c new file mode 100644 index 00000000..218e5af9 --- /dev/null +++ b/kernel/power/console.c @@ -0,0 +1,35 @@ +/* + * drivers/power/process.c - Functions for saving/restoring console. + * + * Originally from swsusp. + */ + +#include <linux/vt_kern.h> +#include <linux/kbd_kern.h> +#include <linux/vt.h> +#include <linux/module.h> +#include "power.h" + +#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) +#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) + +static int orig_fgconsole, orig_kmsg; + +int pm_prepare_console(void) +{ + orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1); + if (orig_fgconsole < 0) + return 1; + + orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE); + return 0; +} + +void pm_restore_console(void) +{ + if (orig_fgconsole >= 0) { + vt_move_to_console(orig_fgconsole, 0); + vt_kmsg_redirect(orig_kmsg); + } +} +#endif diff --git a/kernel/power/consoleearlysuspend.c b/kernel/power/consoleearlysuspend.c new file mode 100644 index 00000000..a3edcb26 --- /dev/null +++ b/kernel/power/consoleearlysuspend.c @@ -0,0 +1,78 @@ +/* kernel/power/consoleearlysuspend.c + * + * Copyright (C) 2005-2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/console.h> +#include <linux/earlysuspend.h> +#include <linux/kbd_kern.h> +#include <linux/module.h> +#include <linux/vt_kern.h> +#include <linux/wait.h> + +#define EARLY_SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) + +static int orig_fgconsole; +static void console_early_suspend(struct early_suspend *h) +{ + acquire_console_sem(); + orig_fgconsole = fg_console; + if (vc_allocate(EARLY_SUSPEND_CONSOLE)) + goto err; + if (set_console(EARLY_SUSPEND_CONSOLE)) + goto err; + release_console_sem(); + + if (vt_waitactive(EARLY_SUSPEND_CONSOLE + 1)) + pr_warning("console_early_suspend: Can't switch VCs.\n"); + return; +err: + pr_warning("console_early_suspend: Can't set console\n"); + release_console_sem(); +} + +static void console_late_resume(struct early_suspend *h) +{ + int ret; + acquire_console_sem(); + ret = set_console(orig_fgconsole); + release_console_sem(); + if (ret) { + pr_warning("console_late_resume: Can't set console.\n"); + return; + } + + if (vt_waitactive(orig_fgconsole + 1)) + pr_warning("console_late_resume: Can't switch VCs.\n"); +} + +static struct early_suspend console_early_suspend_desc = { + .level = EARLY_SUSPEND_LEVEL_STOP_DRAWING, + .suspend = console_early_suspend, + .resume = console_late_resume, +}; + +static int __init console_early_suspend_init(void) +{ + register_early_suspend(&console_early_suspend_desc); + return 0; +} + +static void __exit console_early_suspend_exit(void) +{ + unregister_early_suspend(&console_early_suspend_desc); +} + +module_init(console_early_suspend_init); +module_exit(console_early_suspend_exit); + diff --git a/kernel/power/cpufreq_earlysuspend.c b/kernel/power/cpufreq_earlysuspend.c new file mode 100644 index 00000000..65faf3b9 --- /dev/null +++ b/kernel/power/cpufreq_earlysuspend.c @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2012 Freescale Semiconductor, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/fs.h> +#include <linux/earlysuspend.h> +#include <linux/cpufreq.h> + +extern void cpufreq_save_default_governor(void); +extern void cpufreq_restore_default_governor(void); +extern void cpufreq_set_conservative_governor_param(int up_th, int down_th); +extern void cpufreq_set_performance_governor(void); +extern void cpufreq_set_conservative_governor(void); + +#define SET_GOVERNOR_TO_PERFORMANCE 1 + +static void cpufreq_early_suspend(struct early_suspend *p) +{ + cpufreq_save_default_governor(); +#ifdef SET_GOVERNOR_TO_PERFORMANCE//[ + cpufreq_set_performance_governor(); +#else //][SET_GOVERNOR_TO_PERFORMANCE + cpufreq_set_conservative_governor(); + cpufreq_set_conservative_governor_param( + SET_CONSERVATIVE_GOVERNOR_UP_THRESHOLD, + SET_CONSERVATIVE_GOVERNOR_DOWN_THRESHOLD); +#endif //]SET_GOVERNOR_TO_PERFORMANCE +} + +static void cpufreq_late_resume(struct early_suspend *p) +{ + cpufreq_restore_default_governor(); +} + +struct early_suspend cpufreq_earlysuspend = { + .level = EARLY_SUSPEND_LEVEL_POST_DISABLE_FB, + .suspend = cpufreq_early_suspend, + .resume = cpufreq_late_resume, +}; + +static int __init cpufreq_on_earlysuspend_init(void) +{ + register_early_suspend(&cpufreq_earlysuspend); + return 0; +} + +static void __exit cpufreq_on_earlysuspend_exit(void) +{ + unregister_early_suspend(&cpufreq_earlysuspend); +} + +module_init(cpufreq_on_earlysuspend_init); +module_exit(cpufreq_on_earlysuspend_exit); diff --git a/kernel/power/cpufreq_governor_chg.c b/kernel/power/cpufreq_governor_chg.c new file mode 100644 index 00000000..719b5f99 --- /dev/null +++ b/kernel/power/cpufreq_governor_chg.c @@ -0,0 +1,138 @@ + +#include <linux/fs.h> +#include <linux/cpufreq.h> +#if 0 +extern void cpufreq_save_default_governor(void); +extern void cpufreq_restore_default_governor(void); +extern void cpufreq_set_conservative_governor(void); +extern void cpufreq_set_performance_governor(void); +extern void cpufreq_set_conservative_governor_param(int up_th, int down_th); +#endif + +#define GOV_CHG_DBG 1 + +#define SET_CONSERVATIVE_GOVERNOR_UP_THRESHOLD 95 +#define SET_CONSERVATIVE_GOVERNOR_DOWN_THRESHOLD 50 + +static char cpufreq_gov_default[32]; + +static char *sz_cpufreq_gov_performance = "performance"; +static char *sz_cpufreq_gov_conservative = "conservative"; + +static char *cpufreq_sysfs_place_holder = "/sys/devices/system/cpu/cpu%i/cpufreq/scaling_governor"; +static char *cpufreq_gov_conservative_param = "/sys/devices/system/cpu/cpufreq/conservative/%s"; + +static void cpufreq_set_governor(char *governor) +{ + struct file *scaling_gov = NULL; + char buf[128]; + int i; + loff_t offset = 0; + + if (governor == NULL) + return; + + for_each_online_cpu(i) { + sprintf(buf, cpufreq_sysfs_place_holder, i); + scaling_gov = filp_open(buf, O_RDWR, 0); + if (scaling_gov != NULL) { + if (scaling_gov->f_op != NULL && + scaling_gov->f_op->write != NULL) + { + scaling_gov->f_op->write(scaling_gov, + governor, + strlen(governor), + &offset); +#ifdef GOV_CHG_DBG//[ + printk("%s():set policy \"%s\"\n",__FUNCTION__,governor); +#endif //]GOV_CHG_DBG + } + else + pr_err("f_op might be null\n"); + + filp_close(scaling_gov, NULL); + } else { + pr_err("%s. Can't open %s\n", __func__, buf); + } + } +} + +void cpufreq_save_default_governor(void) +{ + int ret; + struct cpufreq_policy current_policy; + ret = cpufreq_get_policy(¤t_policy, 0); + if (ret < 0) + pr_err("%s: cpufreq_get_policy got error", __func__); + memcpy(cpufreq_gov_default, current_policy.governor->name, 32); +#ifdef GOV_CHG_DBG//[ + printk("%s():save policy \"%s\"\n",__FUNCTION__,cpufreq_gov_default); +#endif //]GOV_CHG_DBG +} + +void cpufreq_restore_default_governor(void) +{ + cpufreq_set_governor(cpufreq_gov_default); +#ifdef GOV_CHG_DBG//[ + printk("%s():restore policy \"%s\"\n",__FUNCTION__,cpufreq_gov_default); +#endif //]GOV_CHG_DBG +} + +void cpufreq_set_conservative_governor_param(int up_th, int down_th) +{ + struct file *gov_param = NULL; + static char buf[128], parm[8]; + loff_t offset = 0; + + if (up_th <= down_th) { + printk(KERN_ERR "%s: up_th(%d) is lesser than down_th(%d)\n", + __func__, up_th, down_th); + return; + } + + sprintf(parm, "%d", up_th); + sprintf(buf, cpufreq_gov_conservative_param , "up_threshold"); + gov_param = filp_open(buf, O_RDONLY, 0); + if (gov_param != NULL) { + if (gov_param->f_op != NULL && + gov_param->f_op->write != NULL) + gov_param->f_op->write(gov_param, + parm, + strlen(parm), + &offset); + else + pr_err("f_op might be null\n"); + + filp_close(gov_param, NULL); + } else { + pr_err("%s. Can't open %s\n", __func__, buf); + } + + sprintf(parm, "%d", down_th); + sprintf(buf, cpufreq_gov_conservative_param , "down_threshold"); + gov_param = filp_open(buf, O_RDONLY, 0); + if (gov_param != NULL) { + if (gov_param->f_op != NULL && + gov_param->f_op->write != NULL) + gov_param->f_op->write(gov_param, + parm, + strlen(parm), + &offset); + else + pr_err("f_op might be null\n"); + + filp_close(gov_param, NULL); + } else { + pr_err("%s. Can't open %s\n", __func__, buf); + } +} +void cpufreq_set_performance_governor(void) +{ + cpufreq_set_governor(sz_cpufreq_gov_performance); +} +void cpufreq_set_conservative_governor(void) +{ + cpufreq_set_governor(sz_cpufreq_gov_conservative); +} + + diff --git a/kernel/power/cpuhotplug_earlysuspend.c b/kernel/power/cpuhotplug_earlysuspend.c new file mode 100644 index 00000000..4b12c7e9 --- /dev/null +++ b/kernel/power/cpuhotplug_earlysuspend.c @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2011-2012 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/earlysuspend.h> +#include <linux/cpu.h> + +enum { + DEBUG_USER_STATE = 1U << 0, + DEBUG_SUSPEND = 1U << 2, + DEBUG_VERBOSE = 1U << 3, +}; +static int debug_mask = DEBUG_USER_STATE; +module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP); + +static DEFINE_PER_CPU(int, tag); +static struct work_struct cpu_up_work; +static struct workqueue_struct *cpu_op_workqueue; + + +static void earlysuspend_cpu_op(int cpu, bool status) +{ + /* tag the cpu is onling/offline */ + if (status) { + per_cpu(tag, cpu) = 0; + cpu_up(cpu); + } else { + per_cpu(tag, cpu) = 1; + cpu_down(cpu); + } +} + +static void cpuhotplug_early_suspend(struct early_suspend *p) +{ + int first_cpu, cpu; + /* skip the first cpu, cpu0 always online */ + first_cpu = cpumask_first(cpu_online_mask); + for_each_possible_cpu(cpu) { + if (cpu == first_cpu) + continue; + if (cpu_online(cpu)) + earlysuspend_cpu_op(cpu, false); + } +} + +static void cpu_up_work_func(struct work_struct *work) +{ + int first_cpu, c; + /* skip the first cpu, cpu0 always online */ + first_cpu = cpumask_first(cpu_online_mask); + for_each_possible_cpu(c) { + if (c == first_cpu) + continue; + if (debug_mask & DEBUG_SUSPEND) + pr_info(" %s: CPU%d tag %d\n", __func__, c, + per_cpu(tag, c)); + if (!cpu_online(c) && per_cpu(tag, c)) + earlysuspend_cpu_op(c, true); + } +} + + +static void cpuhotplug_late_resume(struct early_suspend *p) +{ + if (debug_mask & DEBUG_SUSPEND) + pr_info(" %s: bootup secondary cpus\n", __func__); + queue_work(cpu_op_workqueue, &cpu_up_work); + +} + +struct early_suspend cpuhotplug_earlysuspend = { + .level = EARLY_SUSPEND_LEVEL_DISABLE_FB, + .suspend = cpuhotplug_early_suspend, + .resume = cpuhotplug_late_resume, +}; + + +static int __init cpuhotplug_earlysuspend_init(void) +{ + cpu_op_workqueue = create_workqueue("cpu hotplug earlysuspend wq"); + INIT_WORK(&cpu_up_work, cpu_up_work_func); + + register_early_suspend(&cpuhotplug_earlysuspend); + return 0; +} + +static void __exit cpuhotplug_earlysuspend_exit(void) +{ + if (NULL != cpu_op_workqueue) + destroy_workqueue(cpu_op_workqueue); + unregister_early_suspend(&cpuhotplug_earlysuspend); +} + +module_init(cpuhotplug_earlysuspend_init); +module_exit(cpuhotplug_earlysuspend_exit); +MODULE_AUTHOR("Freescale Semiconductor, Inc."); diff --git a/kernel/power/earlysuspend.c b/kernel/power/earlysuspend.c new file mode 100644 index 00000000..b15f02eb --- /dev/null +++ b/kernel/power/earlysuspend.c @@ -0,0 +1,187 @@ +/* kernel/power/earlysuspend.c + * + * Copyright (C) 2005-2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/earlysuspend.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/rtc.h> +#include <linux/syscalls.h> /* sys_sync */ +#include <linux/wakelock.h> +#include <linux/workqueue.h> + +#include "power.h" + +enum { + DEBUG_USER_STATE = 1U << 0, + DEBUG_SUSPEND = 1U << 2, + DEBUG_VERBOSE = 1U << 3, +}; +static int debug_mask = DEBUG_USER_STATE; +module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP); + +static DEFINE_MUTEX(early_suspend_lock); +static LIST_HEAD(early_suspend_handlers); +static void early_suspend(struct work_struct *work); +static void late_resume(struct work_struct *work); +static DECLARE_WORK(early_suspend_work, early_suspend); +static DECLARE_WORK(late_resume_work, late_resume); +static DEFINE_SPINLOCK(state_lock); +enum { + SUSPEND_REQUESTED = 0x1, + SUSPENDED = 0x2, + SUSPEND_REQUESTED_AND_SUSPENDED = SUSPEND_REQUESTED | SUSPENDED, +}; +static int state; + +void register_early_suspend(struct early_suspend *handler) +{ + struct list_head *pos; + + mutex_lock(&early_suspend_lock); + list_for_each(pos, &early_suspend_handlers) { + struct early_suspend *e; + e = list_entry(pos, struct early_suspend, link); + if (e->level > handler->level) + break; + } + list_add_tail(&handler->link, pos); + if ((state & SUSPENDED) && handler->suspend) + handler->suspend(handler); + mutex_unlock(&early_suspend_lock); +} +EXPORT_SYMBOL(register_early_suspend); + +void unregister_early_suspend(struct early_suspend *handler) +{ + mutex_lock(&early_suspend_lock); + list_del(&handler->link); + mutex_unlock(&early_suspend_lock); +} +EXPORT_SYMBOL(unregister_early_suspend); + +static void early_suspend(struct work_struct *work) +{ + struct early_suspend *pos; + unsigned long irqflags; + int abort = 0; + + mutex_lock(&early_suspend_lock); + spin_lock_irqsave(&state_lock, irqflags); + if (state == SUSPEND_REQUESTED) + state |= SUSPENDED; + else + abort = 1; + spin_unlock_irqrestore(&state_lock, irqflags); + + if (abort) { + if (debug_mask & DEBUG_SUSPEND) + pr_info("early_suspend: abort, state %d\n", state); + mutex_unlock(&early_suspend_lock); + goto abort; + } + + if (debug_mask & DEBUG_SUSPEND) + pr_info("early_suspend: call handlers\n"); + list_for_each_entry(pos, &early_suspend_handlers, link) { + if (pos->suspend != NULL) { + if (debug_mask & DEBUG_VERBOSE) + pr_info("early_suspend: calling %pf\n", pos->suspend); + pos->suspend(pos); + } + } + mutex_unlock(&early_suspend_lock); + + if (debug_mask & DEBUG_SUSPEND) + pr_info("early_suspend: sync\n"); + + sys_sync(); +abort: + spin_lock_irqsave(&state_lock, irqflags); + if (state == SUSPEND_REQUESTED_AND_SUSPENDED) + wake_unlock(&main_wake_lock); + spin_unlock_irqrestore(&state_lock, irqflags); +} + +static void late_resume(struct work_struct *work) +{ + struct early_suspend *pos; + unsigned long irqflags; + int abort = 0; + + mutex_lock(&early_suspend_lock); + spin_lock_irqsave(&state_lock, irqflags); + if (state == SUSPENDED) + state &= ~SUSPENDED; + else + abort = 1; + spin_unlock_irqrestore(&state_lock, irqflags); + + if (abort) { + if (debug_mask & DEBUG_SUSPEND) + pr_info("late_resume: abort, state %d\n", state); + goto abort; + } + if (debug_mask & DEBUG_SUSPEND) + pr_info("late_resume: call handlers\n"); + list_for_each_entry_reverse(pos, &early_suspend_handlers, link) { + if (pos->resume != NULL) { + if (debug_mask & DEBUG_VERBOSE) + pr_info("late_resume: calling %pf\n", pos->resume); + + pos->resume(pos); + } + } + if (debug_mask & DEBUG_SUSPEND) + pr_info("late_resume: done\n"); +abort: + mutex_unlock(&early_suspend_lock); +} + +void request_suspend_state(suspend_state_t new_state) +{ + unsigned long irqflags; + int old_sleep; + + spin_lock_irqsave(&state_lock, irqflags); + old_sleep = state & SUSPEND_REQUESTED; + if (debug_mask & DEBUG_USER_STATE) { + struct timespec ts; + struct rtc_time tm; + getnstimeofday(&ts); + rtc_time_to_tm(ts.tv_sec, &tm); + pr_info("request_suspend_state: %s (%d->%d) at %lld " + "(%d-%02d-%02d %02d:%02d:%02d.%09lu UTC)\n", + new_state != PM_SUSPEND_ON ? "sleep" : "wakeup", + requested_suspend_state, new_state, + ktime_to_ns(ktime_get()), + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec); + } + if (!old_sleep && new_state != PM_SUSPEND_ON) { + state |= SUSPEND_REQUESTED; + queue_work(suspend_work_queue, &early_suspend_work); + } else if (old_sleep && new_state == PM_SUSPEND_ON) { + state &= ~SUSPEND_REQUESTED; + wake_lock(&main_wake_lock); + queue_work(suspend_work_queue, &late_resume_work); + } + requested_suspend_state = new_state; + spin_unlock_irqrestore(&state_lock, irqflags); +} + +suspend_state_t get_suspend_state(void) +{ + return requested_suspend_state; +} diff --git a/kernel/power/fbearlysuspend.c b/kernel/power/fbearlysuspend.c new file mode 100644 index 00000000..15137650 --- /dev/null +++ b/kernel/power/fbearlysuspend.c @@ -0,0 +1,153 @@ +/* kernel/power/fbearlysuspend.c + * + * Copyright (C) 2005-2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/earlysuspend.h> +#include <linux/module.h> +#include <linux/wait.h> + +#include "power.h" + +static wait_queue_head_t fb_state_wq; +static DEFINE_SPINLOCK(fb_state_lock); +static enum { + FB_STATE_STOPPED_DRAWING, + FB_STATE_REQUEST_STOP_DRAWING, + FB_STATE_DRAWING_OK, +} fb_state; + +/* tell userspace to stop drawing, wait for it to stop */ +static void stop_drawing_early_suspend(struct early_suspend *h) +{ + int ret; + unsigned long irq_flags; + + spin_lock_irqsave(&fb_state_lock, irq_flags); + fb_state = FB_STATE_REQUEST_STOP_DRAWING; + spin_unlock_irqrestore(&fb_state_lock, irq_flags); + + wake_up_all(&fb_state_wq); + ret = wait_event_timeout(fb_state_wq, + fb_state == FB_STATE_STOPPED_DRAWING, + HZ); + if (unlikely(fb_state != FB_STATE_STOPPED_DRAWING)) + pr_warning("stop_drawing_early_suspend: timeout waiting for " + "userspace to stop drawing\n"); +} + +/* tell userspace to start drawing */ +static void start_drawing_late_resume(struct early_suspend *h) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&fb_state_lock, irq_flags); + fb_state = FB_STATE_DRAWING_OK; + spin_unlock_irqrestore(&fb_state_lock, irq_flags); + wake_up(&fb_state_wq); +} + +static struct early_suspend stop_drawing_early_suspend_desc = { + .level = EARLY_SUSPEND_LEVEL_STOP_DRAWING, + .suspend = stop_drawing_early_suspend, + .resume = start_drawing_late_resume, +}; + +static ssize_t wait_for_fb_sleep_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + char *s = buf; + int ret; + + ret = wait_event_interruptible(fb_state_wq, + fb_state != FB_STATE_DRAWING_OK); + if (ret && fb_state == FB_STATE_DRAWING_OK) + return ret; + else + s += sprintf(buf, "sleeping"); + return s - buf; +} + +static ssize_t wait_for_fb_wake_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + char *s = buf; + int ret; + unsigned long irq_flags; + + spin_lock_irqsave(&fb_state_lock, irq_flags); + if (fb_state == FB_STATE_REQUEST_STOP_DRAWING) { + fb_state = FB_STATE_STOPPED_DRAWING; + wake_up(&fb_state_wq); + } + spin_unlock_irqrestore(&fb_state_lock, irq_flags); + + ret = wait_event_interruptible(fb_state_wq, + fb_state == FB_STATE_DRAWING_OK); + if (ret && fb_state != FB_STATE_DRAWING_OK) + return ret; + else + s += sprintf(buf, "awake"); + + return s - buf; +} + +#define power_ro_attr(_name) \ +static struct kobj_attribute _name##_attr = { \ + .attr = { \ + .name = __stringify(_name), \ + .mode = 0444, \ + }, \ + .show = _name##_show, \ + .store = NULL, \ +} + +power_ro_attr(wait_for_fb_sleep); +power_ro_attr(wait_for_fb_wake); + +static struct attribute *g[] = { + &wait_for_fb_sleep_attr.attr, + &wait_for_fb_wake_attr.attr, + NULL, +}; + +static struct attribute_group attr_group = { + .attrs = g, +}; + +static int __init android_power_init(void) +{ + int ret; + + init_waitqueue_head(&fb_state_wq); + fb_state = FB_STATE_DRAWING_OK; + + ret = sysfs_create_group(power_kobj, &attr_group); + if (ret) { + pr_err("android_power_init: sysfs_create_group failed\n"); + return ret; + } + + register_early_suspend(&stop_drawing_early_suspend_desc); + return 0; +} + +static void __exit android_power_exit(void) +{ + unregister_early_suspend(&stop_drawing_early_suspend_desc); + sysfs_remove_group(power_kobj, &attr_group); +} + +module_init(android_power_init); +module_exit(android_power_exit); + diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c new file mode 100644 index 00000000..8884c276 --- /dev/null +++ b/kernel/power/hibernate.c @@ -0,0 +1,1067 @@ +/* + * kernel/power/hibernate.c - Hibernation (a.k.a suspend-to-disk) support. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * Copyright (c) 2004 Pavel Machek <pavel@ucw.cz> + * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include <linux/suspend.h> +#include <linux/syscalls.h> +#include <linux/reboot.h> +#include <linux/string.h> +#include <linux/device.h> +#include <linux/kmod.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/pm.h> +#include <linux/console.h> +#include <linux/cpu.h> +#include <linux/freezer.h> +#include <linux/gfp.h> +#include <linux/syscore_ops.h> +#include <scsi/scsi_scan.h> + +#include "power.h" + + +static int nocompress = 0; +static int noresume = 0; +static char resume_file[256] = CONFIG_PM_STD_PARTITION; +dev_t swsusp_resume_device; +sector_t swsusp_resume_block; +int in_suspend __nosavedata = 0; + +enum { + HIBERNATION_INVALID, + HIBERNATION_PLATFORM, + HIBERNATION_TEST, + HIBERNATION_TESTPROC, + HIBERNATION_SHUTDOWN, + HIBERNATION_REBOOT, + /* keep last */ + __HIBERNATION_AFTER_LAST +}; +#define HIBERNATION_MAX (__HIBERNATION_AFTER_LAST-1) +#define HIBERNATION_FIRST (HIBERNATION_INVALID + 1) + +static int hibernation_mode = HIBERNATION_SHUTDOWN; + +static const struct platform_hibernation_ops *hibernation_ops; + +/** + * hibernation_set_ops - Set the global hibernate operations. + * @ops: Hibernation operations to use in subsequent hibernation transitions. + */ +void hibernation_set_ops(const struct platform_hibernation_ops *ops) +{ + if (ops && !(ops->begin && ops->end && ops->pre_snapshot + && ops->prepare && ops->finish && ops->enter && ops->pre_restore + && ops->restore_cleanup && ops->leave)) { + WARN_ON(1); + return; + } + mutex_lock(&pm_mutex); + hibernation_ops = ops; + if (ops) + hibernation_mode = HIBERNATION_PLATFORM; + else if (hibernation_mode == HIBERNATION_PLATFORM) + hibernation_mode = HIBERNATION_SHUTDOWN; + + mutex_unlock(&pm_mutex); +} + +static bool entering_platform_hibernation; + +bool system_entering_hibernation(void) +{ + return entering_platform_hibernation; +} +EXPORT_SYMBOL(system_entering_hibernation); + +#ifdef CONFIG_PM_DEBUG +static void hibernation_debug_sleep(void) +{ + printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n"); + mdelay(5000); +} + +static int hibernation_testmode(int mode) +{ + if (hibernation_mode == mode) { + hibernation_debug_sleep(); + return 1; + } + return 0; +} + +static int hibernation_test(int level) +{ + if (pm_test_level == level) { + hibernation_debug_sleep(); + return 1; + } + return 0; +} +#else /* !CONFIG_PM_DEBUG */ +static int hibernation_testmode(int mode) { return 0; } +static int hibernation_test(int level) { return 0; } +#endif /* !CONFIG_PM_DEBUG */ + +/** + * platform_begin - Call platform to start hibernation. + * @platform_mode: Whether or not to use the platform driver. + */ +static int platform_begin(int platform_mode) +{ + return (platform_mode && hibernation_ops) ? + hibernation_ops->begin() : 0; +} + +/** + * platform_end - Call platform to finish transition to the working state. + * @platform_mode: Whether or not to use the platform driver. + */ +static void platform_end(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->end(); +} + +/** + * platform_pre_snapshot - Call platform to prepare the machine for hibernation. + * @platform_mode: Whether or not to use the platform driver. + * + * Use the platform driver to prepare the system for creating a hibernate image, + * if so configured, and return an error code if that fails. + */ + +static int platform_pre_snapshot(int platform_mode) +{ + return (platform_mode && hibernation_ops) ? + hibernation_ops->pre_snapshot() : 0; +} + +/** + * platform_leave - Call platform to prepare a transition to the working state. + * @platform_mode: Whether or not to use the platform driver. + * + * Use the platform driver prepare to prepare the machine for switching to the + * normal mode of operation. + * + * This routine is called on one CPU with interrupts disabled. + */ +static void platform_leave(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->leave(); +} + +/** + * platform_finish - Call platform to switch the system to the working state. + * @platform_mode: Whether or not to use the platform driver. + * + * Use the platform driver to switch the machine to the normal mode of + * operation. + * + * This routine must be called after platform_prepare(). + */ +static void platform_finish(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->finish(); +} + +/** + * platform_pre_restore - Prepare for hibernate image restoration. + * @platform_mode: Whether or not to use the platform driver. + * + * Use the platform driver to prepare the system for resume from a hibernation + * image. + * + * If the restore fails after this function has been called, + * platform_restore_cleanup() must be called. + */ +static int platform_pre_restore(int platform_mode) +{ + return (platform_mode && hibernation_ops) ? + hibernation_ops->pre_restore() : 0; +} + +/** + * platform_restore_cleanup - Switch to the working state after failing restore. + * @platform_mode: Whether or not to use the platform driver. + * + * Use the platform driver to switch the system to the normal mode of operation + * after a failing restore. + * + * If platform_pre_restore() has been called before the failing restore, this + * function must be called too, regardless of the result of + * platform_pre_restore(). + */ +static void platform_restore_cleanup(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->restore_cleanup(); +} + +/** + * platform_recover - Recover from a failure to suspend devices. + * @platform_mode: Whether or not to use the platform driver. + */ +static void platform_recover(int platform_mode) +{ + if (platform_mode && hibernation_ops && hibernation_ops->recover) + hibernation_ops->recover(); +} + +/** + * swsusp_show_speed - Print time elapsed between two events during hibernation. + * @start: Starting event. + * @stop: Final event. + * @nr_pages: Number of memory pages processed between @start and @stop. + * @msg: Additional diagnostic message to print. + */ +void swsusp_show_speed(struct timeval *start, struct timeval *stop, + unsigned nr_pages, char *msg) +{ + s64 elapsed_centisecs64; + int centisecs; + int k; + int kps; + + elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); + do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); + centisecs = elapsed_centisecs64; + if (centisecs == 0) + centisecs = 1; /* avoid div-by-zero */ + k = nr_pages * (PAGE_SIZE / 1024); + kps = (k * 100) / centisecs; + printk(KERN_INFO "PM: %s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", + msg, k, + centisecs / 100, centisecs % 100, + kps / 1000, (kps % 1000) / 10); +} + +/** + * create_image - Create a hibernation image. + * @platform_mode: Whether or not to use the platform driver. + * + * Execute device drivers' .freeze_noirq() callbacks, create a hibernation image + * and execute the drivers' .thaw_noirq() callbacks. + * + * Control reappears in this routine after the subsequent restore. + */ +static int create_image(int platform_mode) +{ + int error; + + error = dpm_suspend_noirq(PMSG_FREEZE); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down, " + "aborting hibernation\n"); + return error; + } + + error = platform_pre_snapshot(platform_mode); + if (error || hibernation_test(TEST_PLATFORM)) + goto Platform_finish; + + error = disable_nonboot_cpus(); + if (error || hibernation_test(TEST_CPUS) + || hibernation_testmode(HIBERNATION_TEST)) + goto Enable_cpus; + + local_irq_disable(); + + error = syscore_suspend(); + if (error) { + printk(KERN_ERR "PM: Some system devices failed to power down, " + "aborting hibernation\n"); + goto Enable_irqs; + } + + if (hibernation_test(TEST_CORE) || pm_wakeup_pending()) + goto Power_up; + + in_suspend = 1; + save_processor_state(); + error = swsusp_arch_suspend(); + if (error) + printk(KERN_ERR "PM: Error %d creating hibernation image\n", + error); + /* Restore control flow magically appears here */ + restore_processor_state(); + if (!in_suspend) { + events_check_enabled = false; + platform_leave(platform_mode); + } + + Power_up: + syscore_resume(); + + Enable_irqs: + local_irq_enable(); + + Enable_cpus: + enable_nonboot_cpus(); + + Platform_finish: + platform_finish(platform_mode); + + dpm_resume_noirq(in_suspend ? + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); + + return error; +} + +/** + * hibernation_snapshot - Quiesce devices and create a hibernation image. + * @platform_mode: If set, use platform driver to prepare for the transition. + * + * This routine must be called with pm_mutex held. + */ +int hibernation_snapshot(int platform_mode) +{ + pm_message_t msg = PMSG_RECOVER; + int error; + + error = platform_begin(platform_mode); + if (error) + goto Close; + + error = dpm_prepare(PMSG_FREEZE); + if (error) + goto Complete_devices; + + /* Preallocate image memory before shutting down devices. */ + error = hibernate_preallocate_memory(); + if (error) + goto Complete_devices; + + suspend_console(); + pm_restrict_gfp_mask(); + error = dpm_suspend(PMSG_FREEZE); + if (error) + goto Recover_platform; + + if (hibernation_test(TEST_DEVICES)) + goto Recover_platform; + + error = create_image(platform_mode); + /* + * Control returns here (1) after the image has been created or the + * image creation has failed and (2) after a successful restore. + */ + + Resume_devices: + /* We may need to release the preallocated image pages here. */ + if (error || !in_suspend) + swsusp_free(); + + msg = in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE; + dpm_resume(msg); + + if (error || !in_suspend) + pm_restore_gfp_mask(); + + resume_console(); + + Complete_devices: + dpm_complete(msg); + + Close: + platform_end(platform_mode); + return error; + + Recover_platform: + platform_recover(platform_mode); + goto Resume_devices; +} + +/** + * resume_target_kernel - Restore system state from a hibernation image. + * @platform_mode: Whether or not to use the platform driver. + * + * Execute device drivers' .freeze_noirq() callbacks, restore the contents of + * highmem that have not been restored yet from the image and run the low-level + * code that will restore the remaining contents of memory and switch to the + * just restored target kernel. + */ +static int resume_target_kernel(bool platform_mode) +{ + int error; + + error = dpm_suspend_noirq(PMSG_QUIESCE); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down, " + "aborting resume\n"); + return error; + } + + error = platform_pre_restore(platform_mode); + if (error) + goto Cleanup; + + error = disable_nonboot_cpus(); + if (error) + goto Enable_cpus; + + local_irq_disable(); + + error = syscore_suspend(); + if (error) + goto Enable_irqs; + + save_processor_state(); + error = restore_highmem(); + if (!error) { + error = swsusp_arch_resume(); + /* + * The code below is only ever reached in case of a failure. + * Otherwise, execution continues at the place where + * swsusp_arch_suspend() was called. + */ + BUG_ON(!error); + /* + * This call to restore_highmem() reverts the changes made by + * the previous one. + */ + restore_highmem(); + } + /* + * The only reason why swsusp_arch_resume() can fail is memory being + * very tight, so we have to free it as soon as we can to avoid + * subsequent failures. + */ + swsusp_free(); + restore_processor_state(); + touch_softlockup_watchdog(); + + syscore_resume(); + + Enable_irqs: + local_irq_enable(); + + Enable_cpus: + enable_nonboot_cpus(); + + Cleanup: + platform_restore_cleanup(platform_mode); + + dpm_resume_noirq(PMSG_RECOVER); + + return error; +} + +/** + * hibernation_restore - Quiesce devices and restore from a hibernation image. + * @platform_mode: If set, use platform driver to prepare for the transition. + * + * This routine must be called with pm_mutex held. If it is successful, control + * reappears in the restored target kernel in hibernation_snaphot(). + */ +int hibernation_restore(int platform_mode) +{ + int error; + + pm_prepare_console(); + suspend_console(); + pm_restrict_gfp_mask(); + error = dpm_suspend_start(PMSG_QUIESCE); + if (!error) { + error = resume_target_kernel(platform_mode); + dpm_resume_end(PMSG_RECOVER); + } + pm_restore_gfp_mask(); + resume_console(); + pm_restore_console(); + return error; +} + +/** + * hibernation_platform_enter - Power off the system using the platform driver. + */ +int hibernation_platform_enter(void) +{ + int error; + + if (!hibernation_ops) + return -ENOSYS; + + /* + * We have cancelled the power transition by running + * hibernation_ops->finish() before saving the image, so we should let + * the firmware know that we're going to enter the sleep state after all + */ + error = hibernation_ops->begin(); + if (error) + goto Close; + + entering_platform_hibernation = true; + suspend_console(); + error = dpm_suspend_start(PMSG_HIBERNATE); + if (error) { + if (hibernation_ops->recover) + hibernation_ops->recover(); + goto Resume_devices; + } + + error = dpm_suspend_noirq(PMSG_HIBERNATE); + if (error) + goto Resume_devices; + + error = hibernation_ops->prepare(); + if (error) + goto Platform_finish; + + error = disable_nonboot_cpus(); + if (error) + goto Platform_finish; + + local_irq_disable(); + syscore_suspend(); + if (pm_wakeup_pending()) { + error = -EAGAIN; + goto Power_up; + } + + hibernation_ops->enter(); + /* We should never get here */ + while (1); + + Power_up: + syscore_resume(); + local_irq_enable(); + enable_nonboot_cpus(); + + Platform_finish: + hibernation_ops->finish(); + + dpm_resume_noirq(PMSG_RESTORE); + + Resume_devices: + entering_platform_hibernation = false; + dpm_resume_end(PMSG_RESTORE); + resume_console(); + + Close: + hibernation_ops->end(); + + return error; +} + +/** + * power_down - Shut the machine down for hibernation. + * + * Use the platform driver, if configured, to put the system into the sleep + * state corresponding to hibernation, or try to power it off or reboot, + * depending on the value of hibernation_mode. + */ +static void power_down(void) +{ + switch (hibernation_mode) { + case HIBERNATION_TEST: + case HIBERNATION_TESTPROC: + break; + case HIBERNATION_REBOOT: + kernel_restart(NULL); + break; + case HIBERNATION_PLATFORM: + hibernation_platform_enter(); + case HIBERNATION_SHUTDOWN: + kernel_power_off(); + break; + } + kernel_halt(); + /* + * Valid image is on the disk, if we continue we risk serious data + * corruption after resume. + */ + printk(KERN_CRIT "PM: Please power down manually\n"); + while(1); +} + +static int prepare_processes(void) +{ + int error = 0; + + if (freeze_processes()) { + error = -EBUSY; + thaw_processes(); + } + return error; +} + +/** + * hibernate - Carry out system hibernation, including saving the image. + */ +int hibernate(void) +{ + int error; + + mutex_lock(&pm_mutex); + /* The snapshot device should not be opened while we're running */ + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } + + pm_prepare_console(); + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + if (error) + goto Exit; + + error = usermodehelper_disable(); + if (error) + goto Exit; + + /* Allocate memory management structures */ + error = create_basic_memory_bitmaps(); + if (error) + goto Enable_umh; + + printk(KERN_INFO "PM: Syncing filesystems ... "); + sys_sync(); + printk("done.\n"); + + error = prepare_processes(); + if (error) + goto Free_bitmaps; + + if (hibernation_test(TEST_FREEZER)) + goto Thaw; + + if (hibernation_testmode(HIBERNATION_TESTPROC)) + goto Thaw; + + error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); + if (error) + goto Thaw; + + if (in_suspend) { + unsigned int flags = 0; + + if (hibernation_mode == HIBERNATION_PLATFORM) + flags |= SF_PLATFORM_MODE; + if (nocompress) + flags |= SF_NOCOMPRESS_MODE; + pr_debug("PM: writing image.\n"); + error = swsusp_write(flags); + swsusp_free(); + if (!error) + power_down(); + in_suspend = 0; + pm_restore_gfp_mask(); + } else { + pr_debug("PM: Image restored successfully.\n"); + } + + Thaw: + thaw_processes(); + Free_bitmaps: + free_basic_memory_bitmaps(); + Enable_umh: + usermodehelper_enable(); + Exit: + pm_notifier_call_chain(PM_POST_HIBERNATION); + pm_restore_console(); + atomic_inc(&snapshot_device_available); + Unlock: + mutex_unlock(&pm_mutex); + return error; +} + + +/** + * software_resume - Resume from a saved hibernation image. + * + * This routine is called as a late initcall, when all devices have been + * discovered and initialized already. + * + * The image reading code is called to see if there is a hibernation image + * available for reading. If that is the case, devices are quiesced and the + * contents of memory is restored from the saved image. + * + * If this is successful, control reappears in the restored target kernel in + * hibernation_snaphot() which returns to hibernate(). Otherwise, the routine + * attempts to recover gracefully and make the kernel return to the normal mode + * of operation. + */ +static int software_resume(void) +{ + int error; + unsigned int flags; + + /* + * If the user said "noresume".. bail out early. + */ + if (noresume) + return 0; + + /* + * name_to_dev_t() below takes a sysfs buffer mutex when sysfs + * is configured into the kernel. Since the regular hibernate + * trigger path is via sysfs which takes a buffer mutex before + * calling hibernate functions (which take pm_mutex) this can + * cause lockdep to complain about a possible ABBA deadlock + * which cannot happen since we're in the boot code here and + * sysfs can't be invoked yet. Therefore, we use a subclass + * here to avoid lockdep complaining. + */ + mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING); + + if (swsusp_resume_device) + goto Check_image; + + if (!strlen(resume_file)) { + error = -ENOENT; + goto Unlock; + } + + pr_debug("PM: Checking hibernation image partition %s\n", resume_file); + + /* Check if the device is there */ + swsusp_resume_device = name_to_dev_t(resume_file); + if (!swsusp_resume_device) { + /* + * Some device discovery might still be in progress; we need + * to wait for this to finish. + */ + wait_for_device_probe(); + /* + * We can't depend on SCSI devices being available after loading + * one of their modules until scsi_complete_async_scans() is + * called and the resume device usually is a SCSI one. + */ + scsi_complete_async_scans(); + + swsusp_resume_device = name_to_dev_t(resume_file); + if (!swsusp_resume_device) { + error = -ENODEV; + goto Unlock; + } + } + + Check_image: + pr_debug("PM: Hibernation image partition %d:%d present\n", + MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); + + pr_debug("PM: Looking for hibernation image.\n"); + error = swsusp_check(); + if (error) + goto Unlock; + + /* The snapshot device should not be opened while we're running */ + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + swsusp_close(FMODE_READ); + goto Unlock; + } + + pm_prepare_console(); + error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + if (error) + goto close_finish; + + error = usermodehelper_disable(); + if (error) + goto close_finish; + + error = create_basic_memory_bitmaps(); + if (error) + goto close_finish; + + pr_debug("PM: Preparing processes for restore.\n"); + error = prepare_processes(); + if (error) { + swsusp_close(FMODE_READ); + goto Done; + } + + pr_debug("PM: Loading hibernation image.\n"); + + error = swsusp_read(&flags); + swsusp_close(FMODE_READ); + if (!error) + hibernation_restore(flags & SF_PLATFORM_MODE); + + printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n"); + swsusp_free(); + thaw_processes(); + Done: + free_basic_memory_bitmaps(); + usermodehelper_enable(); + Finish: + pm_notifier_call_chain(PM_POST_RESTORE); + pm_restore_console(); + atomic_inc(&snapshot_device_available); + /* For success case, the suspend path will release the lock */ + Unlock: + mutex_unlock(&pm_mutex); + pr_debug("PM: Hibernation image not present or could not be loaded.\n"); + return error; +close_finish: + swsusp_close(FMODE_READ); + goto Finish; +} + +late_initcall(software_resume); + + +static const char * const hibernation_modes[] = { + [HIBERNATION_PLATFORM] = "platform", + [HIBERNATION_SHUTDOWN] = "shutdown", + [HIBERNATION_REBOOT] = "reboot", + [HIBERNATION_TEST] = "test", + [HIBERNATION_TESTPROC] = "testproc", +}; + +/* + * /sys/power/disk - Control hibernation mode. + * + * Hibernation can be handled in several ways. There are a few different ways + * to put the system into the sleep state: using the platform driver (e.g. ACPI + * or other hibernation_ops), powering it off or rebooting it (for testing + * mostly), or using one of the two available test modes. + * + * The sysfs file /sys/power/disk provides an interface for selecting the + * hibernation mode to use. Reading from this file causes the available modes + * to be printed. There are 5 modes that can be supported: + * + * 'platform' + * 'shutdown' + * 'reboot' + * 'test' + * 'testproc' + * + * If a platform hibernation driver is in use, 'platform' will be supported + * and will be used by default. Otherwise, 'shutdown' will be used by default. + * The selected option (i.e. the one corresponding to the current value of + * hibernation_mode) is enclosed by a square bracket. + * + * To select a given hibernation mode it is necessary to write the mode's + * string representation (as returned by reading from /sys/power/disk) back + * into /sys/power/disk. + */ + +static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + int i; + char *start = buf; + + for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { + if (!hibernation_modes[i]) + continue; + switch (i) { + case HIBERNATION_SHUTDOWN: + case HIBERNATION_REBOOT: + case HIBERNATION_TEST: + case HIBERNATION_TESTPROC: + break; + case HIBERNATION_PLATFORM: + if (hibernation_ops) + break; + /* not a valid mode, continue with loop */ + continue; + } + if (i == hibernation_mode) + buf += sprintf(buf, "[%s] ", hibernation_modes[i]); + else + buf += sprintf(buf, "%s ", hibernation_modes[i]); + } + buf += sprintf(buf, "\n"); + return buf-start; +} + +static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = 0; + int i; + int len; + char *p; + int mode = HIBERNATION_INVALID; + + p = memchr(buf, '\n', n); + len = p ? p - buf : n; + + mutex_lock(&pm_mutex); + for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { + if (len == strlen(hibernation_modes[i]) + && !strncmp(buf, hibernation_modes[i], len)) { + mode = i; + break; + } + } + if (mode != HIBERNATION_INVALID) { + switch (mode) { + case HIBERNATION_SHUTDOWN: + case HIBERNATION_REBOOT: + case HIBERNATION_TEST: + case HIBERNATION_TESTPROC: + hibernation_mode = mode; + break; + case HIBERNATION_PLATFORM: + if (hibernation_ops) + hibernation_mode = mode; + else + error = -EINVAL; + } + } else + error = -EINVAL; + + if (!error) + pr_debug("PM: Hibernation mode set to '%s'\n", + hibernation_modes[mode]); + mutex_unlock(&pm_mutex); + return error ? error : n; +} + +power_attr(disk); + +static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), + MINOR(swsusp_resume_device)); +} + +static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned int maj, min; + dev_t res; + int ret = -EINVAL; + + if (sscanf(buf, "%u:%u", &maj, &min) != 2) + goto out; + + res = MKDEV(maj,min); + if (maj != MAJOR(res) || min != MINOR(res)) + goto out; + + mutex_lock(&pm_mutex); + swsusp_resume_device = res; + mutex_unlock(&pm_mutex); + printk(KERN_INFO "PM: Starting manual resume from disk\n"); + noresume = 0; + software_resume(); + ret = n; + out: + return ret; +} + +power_attr(resume); + +static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%lu\n", image_size); +} + +static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long size; + + if (sscanf(buf, "%lu", &size) == 1) { + image_size = size; + return n; + } + + return -EINVAL; +} + +power_attr(image_size); + +static ssize_t reserved_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", reserved_size); +} + +static ssize_t reserved_size_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long size; + + if (sscanf(buf, "%lu", &size) == 1) { + reserved_size = size; + return n; + } + + return -EINVAL; +} + +power_attr(reserved_size); + +static struct attribute * g[] = { + &disk_attr.attr, + &resume_attr.attr, + &image_size_attr.attr, + &reserved_size_attr.attr, + NULL, +}; + + +static struct attribute_group attr_group = { + .attrs = g, +}; + + +static int __init pm_disk_init(void) +{ + return sysfs_create_group(power_kobj, &attr_group); +} + +core_initcall(pm_disk_init); + + +static int __init resume_setup(char *str) +{ + if (noresume) + return 1; + + strncpy( resume_file, str, 255 ); + return 1; +} + +static int __init resume_offset_setup(char *str) +{ + unsigned long long offset; + + if (noresume) + return 1; + + if (sscanf(str, "%llu", &offset) == 1) + swsusp_resume_block = offset; + + return 1; +} + +static int __init hibernate_setup(char *str) +{ + if (!strncmp(str, "noresume", 8)) + noresume = 1; + else if (!strncmp(str, "nocompress", 10)) + nocompress = 1; + return 1; +} + +static int __init noresume_setup(char *str) +{ + noresume = 1; + return 1; +} + +__setup("noresume", noresume_setup); +__setup("resume_offset=", resume_offset_setup); +__setup("resume=", resume_setup); +__setup("hibernate=", hibernate_setup); diff --git a/kernel/power/main.c b/kernel/power/main.c new file mode 100644 index 00000000..88de0d86 --- /dev/null +++ b/kernel/power/main.c @@ -0,0 +1,453 @@ +/* + * kernel/power/main.c - PM subsystem core functionality. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * + * This file is released under the GPLv2 + * + */ + +#include <linux/kobject.h> +#include <linux/string.h> +#include <linux/resume-trace.h> +#include <linux/workqueue.h> + +#include "power.h" + +DEFINE_MUTEX(pm_mutex); +EXPORT_SYMBOL(pm_mutex); + +#include "../../drivers/misc/ntx-misc.h" +#include "../../arch/arm/mach-mx6/ntx_hwconfig.h" +extern volatile NTX_HWCONFIG *gptHWCFG; + +#ifdef CONFIG_PM_SLEEP + +/* Routines for PM-transition notifications */ + +static BLOCKING_NOTIFIER_HEAD(pm_chain_head); + +int register_pm_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&pm_chain_head, nb); +} +EXPORT_SYMBOL_GPL(register_pm_notifier); + +int unregister_pm_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&pm_chain_head, nb); +} +EXPORT_SYMBOL_GPL(unregister_pm_notifier); + +int pm_notifier_call_chain(unsigned long val) +{ + return (blocking_notifier_call_chain(&pm_chain_head, val, NULL) + == NOTIFY_BAD) ? -EINVAL : 0; +} + +/* If set, devices may be suspended and resumed asynchronously. */ +int pm_async_enabled = 1; + +static ssize_t pm_async_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", pm_async_enabled); +} + +static ssize_t pm_async_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long val; + + if (strict_strtoul(buf, 10, &val)) + return -EINVAL; + + if (val > 1) + return -EINVAL; + + pm_async_enabled = val; + return n; +} + +power_attr(pm_async); + +#ifdef CONFIG_PM_DEBUG +int pm_test_level = TEST_NONE; + +static const char * const pm_tests[__TEST_AFTER_LAST] = { + [TEST_NONE] = "none", + [TEST_CORE] = "core", + [TEST_CPUS] = "processors", + [TEST_PLATFORM] = "platform", + [TEST_DEVICES] = "devices", + [TEST_FREEZER] = "freezer", +}; + +static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + char *s = buf; + int level; + + for (level = TEST_FIRST; level <= TEST_MAX; level++) + if (pm_tests[level]) { + if (level == pm_test_level) + s += sprintf(s, "[%s] ", pm_tests[level]); + else + s += sprintf(s, "%s ", pm_tests[level]); + } + + if (s != buf) + /* convert the last space to a newline */ + *(s-1) = '\n'; + + return (s - buf); +} + +static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + const char * const *s; + int level; + char *p; + int len; + int error = -EINVAL; + + p = memchr(buf, '\n', n); + len = p ? p - buf : n; + + mutex_lock(&pm_mutex); + + level = TEST_FIRST; + for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++) + if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) { + pm_test_level = level; + error = 0; + break; + } + + mutex_unlock(&pm_mutex); + + return error ? error : n; +} + +power_attr(pm_test); +#endif /* CONFIG_PM_DEBUG */ + +#endif /* CONFIG_PM_SLEEP */ + +struct kobject *power_kobj; + +/** + * state - control system power state. + * + * show() returns what states are supported, which is hard-coded to + * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and + * 'disk' (Suspend-to-Disk). + * + * store() accepts one of those strings, translates it into the + * proper enumerated value, and initiates a suspend transition. + */ +static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + char *s = buf; +#ifdef CONFIG_SUSPEND + int i; + + for (i = 0; i < PM_SUSPEND_MAX; i++) { + if (pm_states[i] && valid_state(i)) + s += sprintf(s,"%s ", pm_states[i]); + } +#endif +#ifdef CONFIG_HIBERNATION + s += sprintf(s, "%s\n", "disk"); +#else + if (s != buf) + /* convert the last space to a newline */ + *(s-1) = '\n'; +#endif + return (s - buf); +} + +static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ +#ifdef CONFIG_SUSPEND +#ifdef CONFIG_EARLYSUSPEND + suspend_state_t state = PM_SUSPEND_ON; +#else + suspend_state_t state = PM_SUSPEND_STANDBY; +#endif + const char * const *s; +#endif + char *p; + int len; + int error = -EINVAL; + + p = memchr(buf, '\n', n); + len = p ? p - buf : n; + + /* First, check if we are requested to hibernate */ + if (len == 4 && !strncmp(buf, "disk", len)) { + error = hibernate(); + goto Exit; + } + +#ifdef CONFIG_SUSPEND + for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { + if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) + break; + } + if (state < PM_SUSPEND_MAX && *s) +#ifdef CONFIG_EARLYSUSPEND + if (state == PM_SUSPEND_ON || valid_state(state)) { + error = 0; + request_suspend_state(state); + } +#else + error = enter_state(state); +#endif +#endif + + Exit: + return error ? error : n; +} + +power_attr(state); + +extern int gSleep_Mode_Suspend; +extern int ntx_get_homepad_enabled_status(void); +static ssize_t state_extended_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + char *s = buf; + s += sprintf(s, "%d\n", gSleep_Mode_Suspend); + return (s - buf); +} + +static ssize_t state_extended_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + if ('1' == *buf) { + gSleep_Mode_Suspend = 1; + if(36==gptHWCFG->m_val.bPCB || 40==gptHWCFG->m_val.bPCB || 49==gptHWCFG->m_val.bPCB) { + // E60Q3X/E60Q5X/E60QDX + msp430_homepad_enable(0); + } + } + else { + gSleep_Mode_Suspend = 0; +// printk ("[%s-%d] %s() %d\n",__FILE__,__LINE__,__func__,gSleep_Mode_Suspend); + if(36==gptHWCFG->m_val.bPCB || 40==gptHWCFG->m_val.bPCB || 49==gptHWCFG->m_val.bPCB) { + // E60Q3X/E60Q5X/E60QDX + if(0!=ntx_get_homepad_enabled_status()){ + msp430_homepad_enable(2); + } + } + } + + return n; +} + +//power_attr(state_extended); +static struct kobj_attribute state_extended_attr = { + .attr = { + .name = "state-extended", + .mode = 0644, + }, + .show = state_extended_show, + .store = state_extended_store, +}; + +#ifdef CONFIG_PM_SLEEP +/* + * The 'wakeup_count' attribute, along with the functions defined in + * drivers/base/power/wakeup.c, provides a means by which wakeup events can be + * handled in a non-racy way. + * + * If a wakeup event occurs when the system is in a sleep state, it simply is + * woken up. In turn, if an event that would wake the system up from a sleep + * state occurs when it is undergoing a transition to that sleep state, the + * transition should be aborted. Moreover, if such an event occurs when the + * system is in the working state, an attempt to start a transition to the + * given sleep state should fail during certain period after the detection of + * the event. Using the 'state' attribute alone is not sufficient to satisfy + * these requirements, because a wakeup event may occur exactly when 'state' + * is being written to and may be delivered to user space right before it is + * frozen, so the event will remain only partially processed until the system is + * woken up by another event. In particular, it won't cause the transition to + * a sleep state to be aborted. + * + * This difficulty may be overcome if user space uses 'wakeup_count' before + * writing to 'state'. It first should read from 'wakeup_count' and store + * the read value. Then, after carrying out its own preparations for the system + * transition to a sleep state, it should write the stored value to + * 'wakeup_count'. If that fails, at least one wakeup event has occurred since + * 'wakeup_count' was read and 'state' should not be written to. Otherwise, it + * is allowed to write to 'state', but the transition will be aborted if there + * are any wakeup events detected after 'wakeup_count' was written to. + */ + +static ssize_t wakeup_count_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + unsigned int val; + + return pm_get_wakeup_count(&val) ? sprintf(buf, "%u\n", val) : -EINTR; +} + +static ssize_t wakeup_count_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned int val; + + if (sscanf(buf, "%u", &val) == 1) { + if (pm_save_wakeup_count(val)) + return n; + } + return -EINVAL; +} + +power_attr(wakeup_count); +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_PM_TRACE +int pm_trace_enabled; + +static ssize_t pm_trace_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", pm_trace_enabled); +} + +static ssize_t +pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int val; + + if (sscanf(buf, "%d", &val) == 1) { + pm_trace_enabled = !!val; + return n; + } + return -EINVAL; +} + +power_attr(pm_trace); + +static ssize_t pm_trace_dev_match_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return show_trace_dev_match(buf, PAGE_SIZE); +} + +static ssize_t +pm_trace_dev_match_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + return -EINVAL; +} + +power_attr(pm_trace_dev_match); + +#endif /* CONFIG_PM_TRACE */ + +#ifdef CONFIG_SUSPEND_DEVICE_TIME_DEBUG +/* + * threshold of device suspend time consumption in microsecond(0.5ms), the + * driver suspend/resume time longer than this threshold will be + * print to console, 0 to disable */ +int device_suspend_time_threshold; + +static ssize_t +device_suspend_time_threshold_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + if (device_suspend_time_threshold == 0) + return sprintf(buf, "off\n"); + else + return sprintf(buf, "%d usecs\n", + device_suspend_time_threshold); +} + +static ssize_t +device_suspend_time_threshold_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int val; + if (sscanf(buf, "%d", &val) > 0) { + device_suspend_time_threshold = val; + return n; + } + return -EINVAL; +} +power_attr(device_suspend_time_threshold); +#endif + +#ifdef CONFIG_USER_WAKELOCK +power_attr(wake_lock); +power_attr(wake_unlock); +#endif + +static struct attribute * g[] = { + &state_attr.attr, +#ifdef CONFIG_PM_TRACE + &pm_trace_attr.attr, + &pm_trace_dev_match_attr.attr, +#endif +#ifdef CONFIG_SUSPEND_DEVICE_TIME_DEBUG + &device_suspend_time_threshold_attr.attr, +#endif +#ifdef CONFIG_PM_SLEEP + &pm_async_attr.attr, + &wakeup_count_attr.attr, +#ifdef CONFIG_PM_DEBUG + &pm_test_attr.attr, +#endif +#ifdef CONFIG_USER_WAKELOCK + &wake_lock_attr.attr, + &wake_unlock_attr.attr, +#endif +#endif + &state_extended_attr.attr, + NULL, +}; + +static struct attribute_group attr_group = { + .attrs = g, +}; + +#ifdef CONFIG_PM_RUNTIME +struct workqueue_struct *pm_wq; +EXPORT_SYMBOL_GPL(pm_wq); + +static int __init pm_start_workqueue(void) +{ + pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0); + + return pm_wq ? 0 : -ENOMEM; +} +#else +static inline int pm_start_workqueue(void) { return 0; } +#endif + +static int __init pm_init(void) +{ + int error = pm_start_workqueue(); + if (error) + return error; + hibernate_image_size_init(); + hibernate_reserved_size_init(); + power_kobj = kobject_create_and_add("power", NULL); + if (!power_kobj) + return -ENOMEM; + return sysfs_create_group(power_kobj, &attr_group); +} + +core_initcall(pm_init); diff --git a/kernel/power/power.h b/kernel/power/power.h new file mode 100644 index 00000000..b6b90064 --- /dev/null +++ b/kernel/power/power.h @@ -0,0 +1,271 @@ +#include <linux/suspend.h> +#include <linux/suspend_ioctls.h> +#include <linux/utsname.h> +#include <linux/freezer.h> + +struct swsusp_info { + struct new_utsname uts; + u32 version_code; + unsigned long num_physpages; + int cpus; + unsigned long image_pages; + unsigned long pages; + unsigned long size; +} __attribute__((aligned(PAGE_SIZE))); + +#ifdef CONFIG_HIBERNATION +/* kernel/power/snapshot.c */ +extern void __init hibernate_reserved_size_init(void); +extern void __init hibernate_image_size_init(void); + +#ifdef CONFIG_ARCH_HIBERNATION_HEADER +/* Maximum size of architecture specific data in a hibernation header */ +#define MAX_ARCH_HEADER_SIZE (sizeof(struct new_utsname) + 4) + +extern int arch_hibernation_header_save(void *addr, unsigned int max_size); +extern int arch_hibernation_header_restore(void *addr); + +static inline int init_header_complete(struct swsusp_info *info) +{ + return arch_hibernation_header_save(info, MAX_ARCH_HEADER_SIZE); +} + +static inline char *check_image_kernel(struct swsusp_info *info) +{ + return arch_hibernation_header_restore(info) ? + "architecture specific data" : NULL; +} +#endif /* CONFIG_ARCH_HIBERNATION_HEADER */ + +/* + * Keep some memory free so that I/O operations can succeed without paging + * [Might this be more than 4 MB?] + */ +#define PAGES_FOR_IO ((4096 * 1024) >> PAGE_SHIFT) + +/* + * Keep 1 MB of memory free so that device drivers can allocate some pages in + * their .suspend() routines without breaking the suspend to disk. + */ +#define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) + +/* kernel/power/hibernate.c */ +extern int hibernation_snapshot(int platform_mode); +extern int hibernation_restore(int platform_mode); +extern int hibernation_platform_enter(void); + +#else /* !CONFIG_HIBERNATION */ + +static inline void hibernate_reserved_size_init(void) {} +static inline void hibernate_image_size_init(void) {} +#endif /* !CONFIG_HIBERNATION */ + +extern int pfn_is_nosave(unsigned long); + +#define power_attr(_name) \ +static struct kobj_attribute _name##_attr = { \ + .attr = { \ + .name = __stringify(_name), \ + .mode = 0644, \ + }, \ + .show = _name##_show, \ + .store = _name##_store, \ +} + +/* Preferred image size in bytes (default 500 MB) */ +extern unsigned long image_size; +/* Size of memory reserved for drivers (default SPARE_PAGES x PAGE_SIZE) */ +extern unsigned long reserved_size; +extern int in_suspend; +extern dev_t swsusp_resume_device; +extern sector_t swsusp_resume_block; + +extern asmlinkage int swsusp_arch_suspend(void); +extern asmlinkage int swsusp_arch_resume(void); + +extern int create_basic_memory_bitmaps(void); +extern void free_basic_memory_bitmaps(void); +extern int hibernate_preallocate_memory(void); + +/** + * Auxiliary structure used for reading the snapshot image data and + * metadata from and writing them to the list of page backup entries + * (PBEs) which is the main data structure of swsusp. + * + * Using struct snapshot_handle we can transfer the image, including its + * metadata, as a continuous sequence of bytes with the help of + * snapshot_read_next() and snapshot_write_next(). + * + * The code that writes the image to a storage or transfers it to + * the user land is required to use snapshot_read_next() for this + * purpose and it should not make any assumptions regarding the internal + * structure of the image. Similarly, the code that reads the image from + * a storage or transfers it from the user land is required to use + * snapshot_write_next(). + * + * This may allow us to change the internal structure of the image + * in the future with considerably less effort. + */ + +struct snapshot_handle { + unsigned int cur; /* number of the block of PAGE_SIZE bytes the + * next operation will refer to (ie. current) + */ + void *buffer; /* address of the block to read from + * or write to + */ + int sync_read; /* Set to one to notify the caller of + * snapshot_write_next() that it may + * need to call wait_on_bio_chain() + */ +}; + +/* This macro returns the address from/to which the caller of + * snapshot_read_next()/snapshot_write_next() is allowed to + * read/write data after the function returns + */ +#define data_of(handle) ((handle).buffer) + +extern unsigned int snapshot_additional_pages(struct zone *zone); +extern unsigned long snapshot_get_image_size(void); +extern int snapshot_read_next(struct snapshot_handle *handle); +extern int snapshot_write_next(struct snapshot_handle *handle); +extern void snapshot_write_finalize(struct snapshot_handle *handle); +extern int snapshot_image_loaded(struct snapshot_handle *handle); + +/* If unset, the snapshot device cannot be open. */ +extern atomic_t snapshot_device_available; + +extern sector_t alloc_swapdev_block(int swap); +extern void free_all_swap_pages(int swap); +extern int swsusp_swap_in_use(void); + +/* + * Flags that can be passed from the hibernatig hernel to the "boot" kernel in + * the image header. + */ +#define SF_PLATFORM_MODE 1 +#define SF_NOCOMPRESS_MODE 2 + +/* kernel/power/hibernate.c */ +extern int swsusp_check(void); +extern void swsusp_free(void); +extern int swsusp_read(unsigned int *flags_p); +extern int swsusp_write(unsigned int flags); +extern void swsusp_close(fmode_t); + +/* kernel/power/block_io.c */ +extern struct block_device *hib_resume_bdev; + +extern int hib_bio_read_page(pgoff_t page_off, void *addr, + struct bio **bio_chain); +extern int hib_bio_write_page(pgoff_t page_off, void *addr, + struct bio **bio_chain); +extern int hib_wait_on_bio_chain(struct bio **bio_chain); + +struct timeval; +/* kernel/power/swsusp.c */ +extern void swsusp_show_speed(struct timeval *, struct timeval *, + unsigned int, char *); + +#ifdef CONFIG_SUSPEND +/* kernel/power/suspend.c */ +extern const char *const pm_states[]; + +extern bool valid_state(suspend_state_t state); +extern int suspend_devices_and_enter(suspend_state_t state); +extern int enter_state(suspend_state_t state); +#else /* !CONFIG_SUSPEND */ +static inline int suspend_devices_and_enter(suspend_state_t state) +{ + return -ENOSYS; +} +static inline int enter_state(suspend_state_t state) { return -ENOSYS; } +static inline bool valid_state(suspend_state_t state) { return false; } +#endif /* !CONFIG_SUSPEND */ + +#ifdef CONFIG_PM_TEST_SUSPEND +/* kernel/power/suspend_test.c */ +extern void suspend_test_start(void); +extern void suspend_test_finish(const char *label); +#else /* !CONFIG_PM_TEST_SUSPEND */ +static inline void suspend_test_start(void) {} +static inline void suspend_test_finish(const char *label) {} +#endif /* !CONFIG_PM_TEST_SUSPEND */ + +#ifdef CONFIG_PM_SLEEP +/* kernel/power/main.c */ +extern int pm_notifier_call_chain(unsigned long val); +#endif + +#ifdef CONFIG_HIGHMEM +int restore_highmem(void); +#else +static inline unsigned int count_highmem_pages(void) { return 0; } +static inline int restore_highmem(void) { return 0; } +#endif + +/* + * Suspend test levels + */ +enum { + /* keep first */ + TEST_NONE, + TEST_CORE, + TEST_CPUS, + TEST_PLATFORM, + TEST_DEVICES, + TEST_FREEZER, + /* keep last */ + __TEST_AFTER_LAST +}; + +#define TEST_FIRST TEST_NONE +#define TEST_MAX (__TEST_AFTER_LAST - 1) + +extern int pm_test_level; + +#ifdef CONFIG_SUSPEND_FREEZER +static inline int suspend_freeze_processes(void) +{ + return freeze_processes(); +} + +static inline void suspend_thaw_processes(void) +{ + thaw_processes(); +} +#else +static inline int suspend_freeze_processes(void) +{ + return 0; +} + +static inline void suspend_thaw_processes(void) +{ +} +#endif + +#ifdef CONFIG_WAKELOCK +/* kernel/power/wakelock.c */ +extern struct workqueue_struct *suspend_work_queue; +extern struct wake_lock main_wake_lock; +extern suspend_state_t requested_suspend_state; +#endif + +#ifdef CONFIG_USER_WAKELOCK +ssize_t wake_lock_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf); +ssize_t wake_lock_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n); +ssize_t wake_unlock_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf); +ssize_t wake_unlock_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n); +#endif + +#ifdef CONFIG_EARLYSUSPEND +/* kernel/power/earlysuspend.c */ +void request_suspend_state(suspend_state_t state); +suspend_state_t get_suspend_state(void); +#endif diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c new file mode 100644 index 00000000..d5235937 --- /dev/null +++ b/kernel/power/poweroff.c @@ -0,0 +1,46 @@ +/* + * poweroff.c - sysrq handler to gracefully power down machine. + * + * This file is released under the GPL v2 + */ + +#include <linux/kernel.h> +#include <linux/sysrq.h> +#include <linux/init.h> +#include <linux/pm.h> +#include <linux/workqueue.h> +#include <linux/reboot.h> +#include <linux/cpumask.h> + +/* + * When the user hits Sys-Rq o to power down the machine this is the + * callback we use. + */ + +static void do_poweroff(struct work_struct *dummy) +{ + kernel_power_off(); +} + +static DECLARE_WORK(poweroff_work, do_poweroff); + +static void handle_poweroff(int key) +{ + /* run sysrq poweroff on boot cpu */ + schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work); +} + +static struct sysrq_key_op sysrq_poweroff_op = { + .handler = handle_poweroff, + .help_msg = "powerOff", + .action_msg = "Power Off", + .enable_mask = SYSRQ_ENABLE_BOOT, +}; + +static int pm_sysrq_init(void) +{ + register_sysrq_key('o', &sysrq_poweroff_op); + return 0; +} + +subsys_initcall(pm_sysrq_init); diff --git a/kernel/power/process.c b/kernel/power/process.c new file mode 100644 index 00000000..31338cde --- /dev/null +++ b/kernel/power/process.c @@ -0,0 +1,206 @@ +/* + * drivers/power/process.c - Functions for starting/stopping processes on + * suspend transitions. + * + * Originally from swsusp. + */ + + +#undef DEBUG + +#include <linux/interrupt.h> +#include <linux/oom.h> +#include <linux/suspend.h> +#include <linux/module.h> +#include <linux/syscalls.h> +#include <linux/freezer.h> +#include <linux/delay.h> +#include <linux/workqueue.h> +#include <linux/wakelock.h> + +/* + * Timeout for stopping processes + */ +#define TIMEOUT (20 * HZ) + +static inline int freezable(struct task_struct * p) +{ + if ((p == current) || + (p->flags & PF_NOFREEZE) || + (p->exit_state != 0)) + return 0; + return 1; +} + +static int try_to_freeze_tasks(bool sig_only) +{ + struct task_struct *g, *p; + unsigned long end_time; + unsigned int todo; + bool wq_busy = false; + struct timeval start, end; + u64 elapsed_csecs64; + unsigned int elapsed_csecs; + bool wakeup = false; + + do_gettimeofday(&start); + + end_time = jiffies + TIMEOUT; + + if (!sig_only) + freeze_workqueues_begin(); + + while (true) { + todo = 0; + read_lock(&tasklist_lock); + do_each_thread(g, p) { + if (frozen(p) || !freezable(p)) + continue; + + if (!freeze_task(p, sig_only)) + continue; + + /* + * Now that we've done set_freeze_flag, don't + * perturb a task in TASK_STOPPED or TASK_TRACED. + * It is "frozen enough". If the task does wake + * up, it will immediately call try_to_freeze. + * + * Because freeze_task() goes through p's + * scheduler lock after setting TIF_FREEZE, it's + * guaranteed that either we see TASK_RUNNING or + * try_to_stop() after schedule() in ptrace/signal + * stop sees TIF_FREEZE. + */ + if (!task_is_stopped_or_traced(p) && + !freezer_should_skip(p)) + todo++; + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + + if (!sig_only) { + wq_busy = freeze_workqueues_busy(); + todo += wq_busy; + } + + if (todo && has_wake_lock(WAKE_LOCK_SUSPEND)) { + wakeup = 1; + break; + } + if (!todo || time_after(jiffies, end_time)) + break; + + if (pm_wakeup_pending()) { + wakeup = true; + break; + } + + /* + * We need to retry, but first give the freezing tasks some + * time to enter the regrigerator. + */ + msleep(10); + } + + do_gettimeofday(&end); + elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start); + do_div(elapsed_csecs64, NSEC_PER_SEC / 100); + elapsed_csecs = elapsed_csecs64; + + if (todo) { + /* This does not unfreeze processes that are already frozen + * (we have slightly ugly calling convention in that respect, + * and caller must call thaw_processes() if something fails), + * but it cleans up leftover PF_FREEZE requests. + */ + if(wakeup) { + printk("\n"); + printk(KERN_ERR "Freezing of %s aborted\n", + sig_only ? "user space " : "tasks "); + } + else { + printk("\n"); + printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " + "(%d tasks refusing to freeze, wq_busy=%d):\n", + elapsed_csecs / 100, elapsed_csecs % 100, + todo - wq_busy, wq_busy); + } + thaw_workqueues(); + + read_lock(&tasklist_lock); + do_each_thread(g, p) { + task_lock(p); + if (freezing(p) && !freezer_should_skip(p) && + elapsed_csecs > 100) + sched_show_task(p); + cancel_freezing(p); + task_unlock(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + } else { + printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100, + elapsed_csecs % 100); + } + + return todo ? -EBUSY : 0; +} + +/** + * freeze_processes - tell processes to enter the refrigerator + */ +int freeze_processes(void) +{ + int error; + + printk("Freezing user space processes ... "); + error = try_to_freeze_tasks(true); + if (error) + goto Exit; + printk("done.\n"); + + printk("Freezing remaining freezable tasks ... "); + error = try_to_freeze_tasks(false); + if (error) + goto Exit; + printk("done."); + + oom_killer_disable(); + Exit: + BUG_ON(in_atomic()); + printk("\n"); + + return error; +} + +static void thaw_tasks(bool nosig_only) +{ + struct task_struct *g, *p; + + read_lock(&tasklist_lock); + do_each_thread(g, p) { + if (!freezable(p)) + continue; + + if (nosig_only && should_send_signal(p)) + continue; + + if (cgroup_freezing_or_frozen(p)) + continue; + + thaw_process(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); +} + +void thaw_processes(void) +{ + oom_killer_enable(); + + printk("Restarting tasks ... "); + thaw_workqueues(); + thaw_tasks(true); + thaw_tasks(false); + schedule(); + printk("done.\n"); +} + diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c new file mode 100644 index 00000000..06efa54f --- /dev/null +++ b/kernel/power/snapshot.c @@ -0,0 +1,2325 @@ +/* + * linux/kernel/power/snapshot.c + * + * This file provides system snapshot/restore functionality for swsusp. + * + * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz> + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * This file is released under the GPLv2. + * + */ + +#include <linux/version.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/suspend.h> +#include <linux/delay.h> +#include <linux/bitops.h> +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/pm.h> +#include <linux/device.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/syscalls.h> +#include <linux/console.h> +#include <linux/highmem.h> +#include <linux/list.h> +#include <linux/slab.h> + +#include <asm/uaccess.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <asm/io.h> + +#include "power.h" + +static int swsusp_page_is_free(struct page *); +static void swsusp_set_page_forbidden(struct page *); +static void swsusp_unset_page_forbidden(struct page *); + +/* + * Number of bytes to reserve for memory allocations made by device drivers + * from their ->freeze() and ->freeze_noirq() callbacks so that they don't + * cause image creation to fail (tunable via /sys/power/reserved_size). + */ +unsigned long reserved_size; + +void __init hibernate_reserved_size_init(void) +{ + reserved_size = SPARE_PAGES * PAGE_SIZE; +} + +/* + * Preferred image size in bytes (tunable via /sys/power/image_size). + * When it is set to N, swsusp will do its best to ensure the image + * size will not exceed N bytes, but if that is impossible, it will + * try to create the smallest image possible. + */ +unsigned long image_size; + +void __init hibernate_image_size_init(void) +{ + image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; +} + +/* List of PBEs needed for restoring the pages that were allocated before + * the suspend and included in the suspend image, but have also been + * allocated by the "resume" kernel, so their contents cannot be written + * directly to their "original" page frames. + */ +struct pbe *restore_pblist; + +/* Pointer to an auxiliary buffer (1 page) */ +static void *buffer; + +/** + * @safe_needed - on resume, for storing the PBE list and the image, + * we can only use memory pages that do not conflict with the pages + * used before suspend. The unsafe pages have PageNosaveFree set + * and we count them using unsafe_pages. + * + * Each allocated image page is marked as PageNosave and PageNosaveFree + * so that swsusp_free() can release it. + */ + +#define PG_ANY 0 +#define PG_SAFE 1 +#define PG_UNSAFE_CLEAR 1 +#define PG_UNSAFE_KEEP 0 + +static unsigned int allocated_unsafe_pages; + +static void *get_image_page(gfp_t gfp_mask, int safe_needed) +{ + void *res; + + res = (void *)get_zeroed_page(gfp_mask); + if (safe_needed) + while (res && swsusp_page_is_free(virt_to_page(res))) { + /* The page is unsafe, mark it for swsusp_free() */ + swsusp_set_page_forbidden(virt_to_page(res)); + allocated_unsafe_pages++; + res = (void *)get_zeroed_page(gfp_mask); + } + if (res) { + swsusp_set_page_forbidden(virt_to_page(res)); + swsusp_set_page_free(virt_to_page(res)); + } + return res; +} + +unsigned long get_safe_page(gfp_t gfp_mask) +{ + return (unsigned long)get_image_page(gfp_mask, PG_SAFE); +} + +static struct page *alloc_image_page(gfp_t gfp_mask) +{ + struct page *page; + + page = alloc_page(gfp_mask); + if (page) { + swsusp_set_page_forbidden(page); + swsusp_set_page_free(page); + } + return page; +} + +/** + * free_image_page - free page represented by @addr, allocated with + * get_image_page (page flags set by it must be cleared) + */ + +static inline void free_image_page(void *addr, int clear_nosave_free) +{ + struct page *page; + + BUG_ON(!virt_addr_valid(addr)); + + page = virt_to_page(addr); + + swsusp_unset_page_forbidden(page); + if (clear_nosave_free) + swsusp_unset_page_free(page); + + __free_page(page); +} + +/* struct linked_page is used to build chains of pages */ + +#define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) + +struct linked_page { + struct linked_page *next; + char data[LINKED_PAGE_DATA_SIZE]; +} __attribute__((packed)); + +static inline void +free_list_of_pages(struct linked_page *list, int clear_page_nosave) +{ + while (list) { + struct linked_page *lp = list->next; + + free_image_page(list, clear_page_nosave); + list = lp; + } +} + +/** + * struct chain_allocator is used for allocating small objects out of + * a linked list of pages called 'the chain'. + * + * The chain grows each time when there is no room for a new object in + * the current page. The allocated objects cannot be freed individually. + * It is only possible to free them all at once, by freeing the entire + * chain. + * + * NOTE: The chain allocator may be inefficient if the allocated objects + * are not much smaller than PAGE_SIZE. + */ + +struct chain_allocator { + struct linked_page *chain; /* the chain */ + unsigned int used_space; /* total size of objects allocated out + * of the current page + */ + gfp_t gfp_mask; /* mask for allocating pages */ + int safe_needed; /* if set, only "safe" pages are allocated */ +}; + +static void +chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed) +{ + ca->chain = NULL; + ca->used_space = LINKED_PAGE_DATA_SIZE; + ca->gfp_mask = gfp_mask; + ca->safe_needed = safe_needed; +} + +static void *chain_alloc(struct chain_allocator *ca, unsigned int size) +{ + void *ret; + + if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { + struct linked_page *lp; + + lp = get_image_page(ca->gfp_mask, ca->safe_needed); + if (!lp) + return NULL; + + lp->next = ca->chain; + ca->chain = lp; + ca->used_space = 0; + } + ret = ca->chain->data + ca->used_space; + ca->used_space += size; + return ret; +} + +/** + * Data types related to memory bitmaps. + * + * Memory bitmap is a structure consiting of many linked lists of + * objects. The main list's elements are of type struct zone_bitmap + * and each of them corresonds to one zone. For each zone bitmap + * object there is a list of objects of type struct bm_block that + * represent each blocks of bitmap in which information is stored. + * + * struct memory_bitmap contains a pointer to the main list of zone + * bitmap objects, a struct bm_position used for browsing the bitmap, + * and a pointer to the list of pages used for allocating all of the + * zone bitmap objects and bitmap block objects. + * + * NOTE: It has to be possible to lay out the bitmap in memory + * using only allocations of order 0. Additionally, the bitmap is + * designed to work with arbitrary number of zones (this is over the + * top for now, but let's avoid making unnecessary assumptions ;-). + * + * struct zone_bitmap contains a pointer to a list of bitmap block + * objects and a pointer to the bitmap block object that has been + * most recently used for setting bits. Additionally, it contains the + * pfns that correspond to the start and end of the represented zone. + * + * struct bm_block contains a pointer to the memory page in which + * information is stored (in the form of a block of bitmap) + * It also contains the pfns that correspond to the start and end of + * the represented memory area. + */ + +#define BM_END_OF_MAP (~0UL) + +#define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) + +struct bm_block { + struct list_head hook; /* hook into a list of bitmap blocks */ + unsigned long start_pfn; /* pfn represented by the first bit */ + unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ + unsigned long *data; /* bitmap representing pages */ +}; + +static inline unsigned long bm_block_bits(struct bm_block *bb) +{ + return bb->end_pfn - bb->start_pfn; +} + +/* strcut bm_position is used for browsing memory bitmaps */ + +struct bm_position { + struct bm_block *block; + int bit; +}; + +struct memory_bitmap { + struct list_head blocks; /* list of bitmap blocks */ + struct linked_page *p_list; /* list of pages used to store zone + * bitmap objects and bitmap block + * objects + */ + struct bm_position cur; /* most recently used bit position */ +}; + +/* Functions that operate on memory bitmaps */ + +static void memory_bm_position_reset(struct memory_bitmap *bm) +{ + bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); + bm->cur.bit = 0; +} + +static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); + +/** + * create_bm_block_list - create a list of block bitmap objects + * @pages - number of pages to track + * @list - list to put the allocated blocks into + * @ca - chain allocator to be used for allocating memory + */ +static int create_bm_block_list(unsigned long pages, + struct list_head *list, + struct chain_allocator *ca) +{ + unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); + + while (nr_blocks-- > 0) { + struct bm_block *bb; + + bb = chain_alloc(ca, sizeof(struct bm_block)); + if (!bb) + return -ENOMEM; + list_add(&bb->hook, list); + } + + return 0; +} + +struct mem_extent { + struct list_head hook; + unsigned long start; + unsigned long end; +}; + +/** + * free_mem_extents - free a list of memory extents + * @list - list of extents to empty + */ +static void free_mem_extents(struct list_head *list) +{ + struct mem_extent *ext, *aux; + + list_for_each_entry_safe(ext, aux, list, hook) { + list_del(&ext->hook); + kfree(ext); + } +} + +/** + * create_mem_extents - create a list of memory extents representing + * contiguous ranges of PFNs + * @list - list to put the extents into + * @gfp_mask - mask to use for memory allocations + */ +static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) +{ + struct zone *zone; + + INIT_LIST_HEAD(list); + + for_each_populated_zone(zone) { + unsigned long zone_start, zone_end; + struct mem_extent *ext, *cur, *aux; + + zone_start = zone->zone_start_pfn; + zone_end = zone->zone_start_pfn + zone->spanned_pages; + + list_for_each_entry(ext, list, hook) + if (zone_start <= ext->end) + break; + + if (&ext->hook == list || zone_end < ext->start) { + /* New extent is necessary */ + struct mem_extent *new_ext; + + new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); + if (!new_ext) { + free_mem_extents(list); + return -ENOMEM; + } + new_ext->start = zone_start; + new_ext->end = zone_end; + list_add_tail(&new_ext->hook, &ext->hook); + continue; + } + + /* Merge this zone's range of PFNs with the existing one */ + if (zone_start < ext->start) + ext->start = zone_start; + if (zone_end > ext->end) + ext->end = zone_end; + + /* More merging may be possible */ + cur = ext; + list_for_each_entry_safe_continue(cur, aux, list, hook) { + if (zone_end < cur->start) + break; + if (zone_end < cur->end) + ext->end = cur->end; + list_del(&cur->hook); + kfree(cur); + } + } + + return 0; +} + +/** + * memory_bm_create - allocate memory for a memory bitmap + */ +static int +memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) +{ + struct chain_allocator ca; + struct list_head mem_extents; + struct mem_extent *ext; + int error; + + chain_init(&ca, gfp_mask, safe_needed); + INIT_LIST_HEAD(&bm->blocks); + + error = create_mem_extents(&mem_extents, gfp_mask); + if (error) + return error; + + list_for_each_entry(ext, &mem_extents, hook) { + struct bm_block *bb; + unsigned long pfn = ext->start; + unsigned long pages = ext->end - ext->start; + + bb = list_entry(bm->blocks.prev, struct bm_block, hook); + + error = create_bm_block_list(pages, bm->blocks.prev, &ca); + if (error) + goto Error; + + list_for_each_entry_continue(bb, &bm->blocks, hook) { + bb->data = get_image_page(gfp_mask, safe_needed); + if (!bb->data) { + error = -ENOMEM; + goto Error; + } + + bb->start_pfn = pfn; + if (pages >= BM_BITS_PER_BLOCK) { + pfn += BM_BITS_PER_BLOCK; + pages -= BM_BITS_PER_BLOCK; + } else { + /* This is executed only once in the loop */ + pfn += pages; + } + bb->end_pfn = pfn; + } + } + + bm->p_list = ca.chain; + memory_bm_position_reset(bm); + Exit: + free_mem_extents(&mem_extents); + return error; + + Error: + bm->p_list = ca.chain; + memory_bm_free(bm, PG_UNSAFE_CLEAR); + goto Exit; +} + +/** + * memory_bm_free - free memory occupied by the memory bitmap @bm + */ +static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) +{ + struct bm_block *bb; + + list_for_each_entry(bb, &bm->blocks, hook) + if (bb->data) + free_image_page(bb->data, clear_nosave_free); + + free_list_of_pages(bm->p_list, clear_nosave_free); + + INIT_LIST_HEAD(&bm->blocks); +} + +/** + * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds + * to given pfn. The cur_zone_bm member of @bm and the cur_block member + * of @bm->cur_zone_bm are updated. + */ +static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, + void **addr, unsigned int *bit_nr) +{ + struct bm_block *bb; + + /* + * Check if the pfn corresponds to the current bitmap block and find + * the block where it fits if this is not the case. + */ + bb = bm->cur.block; + if (pfn < bb->start_pfn) + list_for_each_entry_continue_reverse(bb, &bm->blocks, hook) + if (pfn >= bb->start_pfn) + break; + + if (pfn >= bb->end_pfn) + list_for_each_entry_continue(bb, &bm->blocks, hook) + if (pfn >= bb->start_pfn && pfn < bb->end_pfn) + break; + + if (&bb->hook == &bm->blocks) + return -EFAULT; + + /* The block has been found */ + bm->cur.block = bb; + pfn -= bb->start_pfn; + bm->cur.bit = pfn + 1; + *bit_nr = pfn; + *addr = bb->data; + return 0; +} + +static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + int error; + + error = memory_bm_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error); + set_bit(bit, addr); +} + +static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + int error; + + error = memory_bm_find_bit(bm, pfn, &addr, &bit); + if (!error) + set_bit(bit, addr); + return error; +} + +static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + int error; + + error = memory_bm_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error); + clear_bit(bit, addr); +} + +static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + int error; + + error = memory_bm_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error); + return test_bit(bit, addr); +} + +static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + + return !memory_bm_find_bit(bm, pfn, &addr, &bit); +} + +/** + * memory_bm_next_pfn - find the pfn that corresponds to the next set bit + * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is + * returned. + * + * It is required to run memory_bm_position_reset() before the first call to + * this function. + */ + +static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) +{ + struct bm_block *bb; + int bit; + + bb = bm->cur.block; + do { + bit = bm->cur.bit; + bit = find_next_bit(bb->data, bm_block_bits(bb), bit); + if (bit < bm_block_bits(bb)) + goto Return_pfn; + + bb = list_entry(bb->hook.next, struct bm_block, hook); + bm->cur.block = bb; + bm->cur.bit = 0; + } while (&bb->hook != &bm->blocks); + + memory_bm_position_reset(bm); + return BM_END_OF_MAP; + + Return_pfn: + bm->cur.bit = bit + 1; + return bb->start_pfn + bit; +} + +/** + * This structure represents a range of page frames the contents of which + * should not be saved during the suspend. + */ + +struct nosave_region { + struct list_head list; + unsigned long start_pfn; + unsigned long end_pfn; +}; + +static LIST_HEAD(nosave_regions); + +/** + * register_nosave_region - register a range of page frames the contents + * of which should not be saved during the suspend (to be used in the early + * initialization code) + */ + +void __init +__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, + int use_kmalloc) +{ + struct nosave_region *region; + + if (start_pfn >= end_pfn) + return; + + if (!list_empty(&nosave_regions)) { + /* Try to extend the previous region (they should be sorted) */ + region = list_entry(nosave_regions.prev, + struct nosave_region, list); + if (region->end_pfn == start_pfn) { + region->end_pfn = end_pfn; + goto Report; + } + } + if (use_kmalloc) { + /* during init, this shouldn't fail */ + region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); + BUG_ON(!region); + } else + /* This allocation cannot fail */ + region = alloc_bootmem(sizeof(struct nosave_region)); + region->start_pfn = start_pfn; + region->end_pfn = end_pfn; + list_add_tail(®ion->list, &nosave_regions); + Report: + printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n", + start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); +} + +/* + * Set bits in this map correspond to the page frames the contents of which + * should not be saved during the suspend. + */ +static struct memory_bitmap *forbidden_pages_map; + +/* Set bits in this map correspond to free page frames. */ +static struct memory_bitmap *free_pages_map; + +/* + * Each page frame allocated for creating the image is marked by setting the + * corresponding bits in forbidden_pages_map and free_pages_map simultaneously + */ + +void swsusp_set_page_free(struct page *page) +{ + if (free_pages_map) + memory_bm_set_bit(free_pages_map, page_to_pfn(page)); +} + +static int swsusp_page_is_free(struct page *page) +{ + return free_pages_map ? + memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; +} + +void swsusp_unset_page_free(struct page *page) +{ + if (free_pages_map) + memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); +} + +static void swsusp_set_page_forbidden(struct page *page) +{ + if (forbidden_pages_map) + memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); +} + +int swsusp_page_is_forbidden(struct page *page) +{ + return forbidden_pages_map ? + memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; +} + +static void swsusp_unset_page_forbidden(struct page *page) +{ + if (forbidden_pages_map) + memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); +} + +/** + * mark_nosave_pages - set bits corresponding to the page frames the + * contents of which should not be saved in a given bitmap. + */ + +static void mark_nosave_pages(struct memory_bitmap *bm) +{ + struct nosave_region *region; + + if (list_empty(&nosave_regions)) + return; + + list_for_each_entry(region, &nosave_regions, list) { + unsigned long pfn; + + pr_debug("PM: Marking nosave pages: %016lx - %016lx\n", + region->start_pfn << PAGE_SHIFT, + region->end_pfn << PAGE_SHIFT); + + for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) + if (pfn_valid(pfn)) { + /* + * It is safe to ignore the result of + * mem_bm_set_bit_check() here, since we won't + * touch the PFNs for which the error is + * returned anyway. + */ + mem_bm_set_bit_check(bm, pfn); + } + } +} + +/** + * create_basic_memory_bitmaps - create bitmaps needed for marking page + * frames that should not be saved and free page frames. The pointers + * forbidden_pages_map and free_pages_map are only modified if everything + * goes well, because we don't want the bits to be used before both bitmaps + * are set up. + */ + +int create_basic_memory_bitmaps(void) +{ + struct memory_bitmap *bm1, *bm2; + int error = 0; + + BUG_ON(forbidden_pages_map || free_pages_map); + + bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); + if (!bm1) + return -ENOMEM; + + error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); + if (error) + goto Free_first_object; + + bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); + if (!bm2) + goto Free_first_bitmap; + + error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); + if (error) + goto Free_second_object; + + forbidden_pages_map = bm1; + free_pages_map = bm2; + mark_nosave_pages(forbidden_pages_map); + + pr_debug("PM: Basic memory bitmaps created\n"); + + return 0; + + Free_second_object: + kfree(bm2); + Free_first_bitmap: + memory_bm_free(bm1, PG_UNSAFE_CLEAR); + Free_first_object: + kfree(bm1); + return -ENOMEM; +} + +/** + * free_basic_memory_bitmaps - free memory bitmaps allocated by + * create_basic_memory_bitmaps(). The auxiliary pointers are necessary + * so that the bitmaps themselves are not referred to while they are being + * freed. + */ + +void free_basic_memory_bitmaps(void) +{ + struct memory_bitmap *bm1, *bm2; + + BUG_ON(!(forbidden_pages_map && free_pages_map)); + + bm1 = forbidden_pages_map; + bm2 = free_pages_map; + forbidden_pages_map = NULL; + free_pages_map = NULL; + memory_bm_free(bm1, PG_UNSAFE_CLEAR); + kfree(bm1); + memory_bm_free(bm2, PG_UNSAFE_CLEAR); + kfree(bm2); + + pr_debug("PM: Basic memory bitmaps freed\n"); +} + +/** + * snapshot_additional_pages - estimate the number of additional pages + * be needed for setting up the suspend image data structures for given + * zone (usually the returned value is greater than the exact number) + */ + +unsigned int snapshot_additional_pages(struct zone *zone) +{ + unsigned int res; + + res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); + res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE); + return 2 * res; +} + +#ifdef CONFIG_HIGHMEM +/** + * count_free_highmem_pages - compute the total number of free highmem + * pages, system-wide. + */ + +static unsigned int count_free_highmem_pages(void) +{ + struct zone *zone; + unsigned int cnt = 0; + + for_each_populated_zone(zone) + if (is_highmem(zone)) + cnt += zone_page_state(zone, NR_FREE_PAGES); + + return cnt; +} + +/** + * saveable_highmem_page - Determine whether a highmem page should be + * included in the suspend image. + * + * We should save the page if it isn't Nosave or NosaveFree, or Reserved, + * and it isn't a part of a free chunk of pages. + */ +static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) +{ + struct page *page; + + if (!pfn_valid(pfn)) + return NULL; + + page = pfn_to_page(pfn); + if (page_zone(page) != zone) + return NULL; + + BUG_ON(!PageHighMem(page)); + + if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) || + PageReserved(page)) + return NULL; + + return page; +} + +/** + * count_highmem_pages - compute the total number of saveable highmem + * pages. + */ + +static unsigned int count_highmem_pages(void) +{ + struct zone *zone; + unsigned int n = 0; + + for_each_populated_zone(zone) { + unsigned long pfn, max_zone_pfn; + + if (!is_highmem(zone)) + continue; + + mark_free_pages(zone); + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (saveable_highmem_page(zone, pfn)) + n++; + } + return n; +} +#else +static inline void *saveable_highmem_page(struct zone *z, unsigned long p) +{ + return NULL; +} +#endif /* CONFIG_HIGHMEM */ + +/** + * saveable_page - Determine whether a non-highmem page should be included + * in the suspend image. + * + * We should save the page if it isn't Nosave, and is not in the range + * of pages statically defined as 'unsaveable', and it isn't a part of + * a free chunk of pages. + */ +static struct page *saveable_page(struct zone *zone, unsigned long pfn) +{ + struct page *page; + + if (!pfn_valid(pfn)) + return NULL; + + page = pfn_to_page(pfn); + if (page_zone(page) != zone) + return NULL; + + BUG_ON(PageHighMem(page)); + + if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) + return NULL; + + if (PageReserved(page) + && (!kernel_page_present(page) || pfn_is_nosave(pfn))) + return NULL; + + return page; +} + +/** + * count_data_pages - compute the total number of saveable non-highmem + * pages. + */ + +static unsigned int count_data_pages(void) +{ + struct zone *zone; + unsigned long pfn, max_zone_pfn; + unsigned int n = 0; + + for_each_populated_zone(zone) { + if (is_highmem(zone)) + continue; + + mark_free_pages(zone); + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (saveable_page(zone, pfn)) + n++; + } + return n; +} + +/* This is needed, because copy_page and memcpy are not usable for copying + * task structs. + */ +static inline void do_copy_page(long *dst, long *src) +{ + int n; + + for (n = PAGE_SIZE / sizeof(long); n; n--) + *dst++ = *src++; +} + + +/** + * safe_copy_page - check if the page we are going to copy is marked as + * present in the kernel page tables (this always is the case if + * CONFIG_DEBUG_PAGEALLOC is not set and in that case + * kernel_page_present() always returns 'true'). + */ +static void safe_copy_page(void *dst, struct page *s_page) +{ + if (kernel_page_present(s_page)) { + do_copy_page(dst, page_address(s_page)); + } else { + kernel_map_pages(s_page, 1, 1); + do_copy_page(dst, page_address(s_page)); + kernel_map_pages(s_page, 1, 0); + } +} + + +#ifdef CONFIG_HIGHMEM +static inline struct page * +page_is_saveable(struct zone *zone, unsigned long pfn) +{ + return is_highmem(zone) ? + saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); +} + +static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) +{ + struct page *s_page, *d_page; + void *src, *dst; + + s_page = pfn_to_page(src_pfn); + d_page = pfn_to_page(dst_pfn); + if (PageHighMem(s_page)) { + src = kmap_atomic(s_page, KM_USER0); + dst = kmap_atomic(d_page, KM_USER1); + do_copy_page(dst, src); + kunmap_atomic(dst, KM_USER1); + kunmap_atomic(src, KM_USER0); + } else { + if (PageHighMem(d_page)) { + /* Page pointed to by src may contain some kernel + * data modified by kmap_atomic() + */ + safe_copy_page(buffer, s_page); + dst = kmap_atomic(d_page, KM_USER0); + copy_page(dst, buffer); + kunmap_atomic(dst, KM_USER0); + } else { + safe_copy_page(page_address(d_page), s_page); + } + } +} +#else +#define page_is_saveable(zone, pfn) saveable_page(zone, pfn) + +static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) +{ + safe_copy_page(page_address(pfn_to_page(dst_pfn)), + pfn_to_page(src_pfn)); +} +#endif /* CONFIG_HIGHMEM */ + +static void +copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) +{ + struct zone *zone; + unsigned long pfn; + + for_each_populated_zone(zone) { + unsigned long max_zone_pfn; + + mark_free_pages(zone); + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (page_is_saveable(zone, pfn)) + memory_bm_set_bit(orig_bm, pfn); + } + memory_bm_position_reset(orig_bm); + memory_bm_position_reset(copy_bm); + for(;;) { + pfn = memory_bm_next_pfn(orig_bm); + if (unlikely(pfn == BM_END_OF_MAP)) + break; + copy_data_page(memory_bm_next_pfn(copy_bm), pfn); + } +} + +/* Total number of image pages */ +static unsigned int nr_copy_pages; +/* Number of pages needed for saving the original pfns of the image pages */ +static unsigned int nr_meta_pages; +/* + * Numbers of normal and highmem page frames allocated for hibernation image + * before suspending devices. + */ +unsigned int alloc_normal, alloc_highmem; +/* + * Memory bitmap used for marking saveable pages (during hibernation) or + * hibernation image pages (during restore) + */ +static struct memory_bitmap orig_bm; +/* + * Memory bitmap used during hibernation for marking allocated page frames that + * will contain copies of saveable pages. During restore it is initially used + * for marking hibernation image pages, but then the set bits from it are + * duplicated in @orig_bm and it is released. On highmem systems it is next + * used for marking "safe" highmem pages, but it has to be reinitialized for + * this purpose. + */ +static struct memory_bitmap copy_bm; + +/** + * swsusp_free - free pages allocated for the suspend. + * + * Suspend pages are alocated before the atomic copy is made, so we + * need to release them after the resume. + */ + +void swsusp_free(void) +{ + struct zone *zone; + unsigned long pfn, max_zone_pfn; + + for_each_populated_zone(zone) { + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + + if (swsusp_page_is_forbidden(page) && + swsusp_page_is_free(page)) { + swsusp_unset_page_forbidden(page); + swsusp_unset_page_free(page); + __free_page(page); + } + } + } + nr_copy_pages = 0; + nr_meta_pages = 0; + restore_pblist = NULL; + buffer = NULL; + alloc_normal = 0; + alloc_highmem = 0; +} + +/* Helper functions used for the shrinking of memory. */ + +#define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) + +/** + * preallocate_image_pages - Allocate a number of pages for hibernation image + * @nr_pages: Number of page frames to allocate. + * @mask: GFP flags to use for the allocation. + * + * Return value: Number of page frames actually allocated + */ +static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) +{ + unsigned long nr_alloc = 0; + + while (nr_pages > 0) { + struct page *page; + + page = alloc_image_page(mask); + if (!page) + break; + memory_bm_set_bit(©_bm, page_to_pfn(page)); + if (PageHighMem(page)) + alloc_highmem++; + else + alloc_normal++; + nr_pages--; + nr_alloc++; + } + + return nr_alloc; +} + +static unsigned long preallocate_image_memory(unsigned long nr_pages, + unsigned long avail_normal) +{ + unsigned long alloc; + + if (avail_normal <= alloc_normal) + return 0; + + alloc = avail_normal - alloc_normal; + if (nr_pages < alloc) + alloc = nr_pages; + + return preallocate_image_pages(alloc, GFP_IMAGE); +} + +#ifdef CONFIG_HIGHMEM +static unsigned long preallocate_image_highmem(unsigned long nr_pages) +{ + return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); +} + +/** + * __fraction - Compute (an approximation of) x * (multiplier / base) + */ +static unsigned long __fraction(u64 x, u64 multiplier, u64 base) +{ + x *= multiplier; + do_div(x, base); + return (unsigned long)x; +} + +static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, + unsigned long highmem, + unsigned long total) +{ + unsigned long alloc = __fraction(nr_pages, highmem, total); + + return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); +} +#else /* CONFIG_HIGHMEM */ +static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) +{ + return 0; +} + +static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, + unsigned long highmem, + unsigned long total) +{ + return 0; +} +#endif /* CONFIG_HIGHMEM */ + +/** + * free_unnecessary_pages - Release preallocated pages not needed for the image + */ +static void free_unnecessary_pages(void) +{ + unsigned long save, to_free_normal, to_free_highmem; + + save = count_data_pages(); + if (alloc_normal >= save) { + to_free_normal = alloc_normal - save; + save = 0; + } else { + to_free_normal = 0; + save -= alloc_normal; + } + save += count_highmem_pages(); + if (alloc_highmem >= save) { + to_free_highmem = alloc_highmem - save; + } else { + to_free_highmem = 0; + save -= alloc_highmem; + if (to_free_normal > save) + to_free_normal -= save; + else + to_free_normal = 0; + } + + memory_bm_position_reset(©_bm); + + while (to_free_normal > 0 || to_free_highmem > 0) { + unsigned long pfn = memory_bm_next_pfn(©_bm); + struct page *page = pfn_to_page(pfn); + + if (PageHighMem(page)) { + if (!to_free_highmem) + continue; + to_free_highmem--; + alloc_highmem--; + } else { + if (!to_free_normal) + continue; + to_free_normal--; + alloc_normal--; + } + memory_bm_clear_bit(©_bm, pfn); + swsusp_unset_page_forbidden(page); + swsusp_unset_page_free(page); + __free_page(page); + } +} + +/** + * minimum_image_size - Estimate the minimum acceptable size of an image + * @saveable: Number of saveable pages in the system. + * + * We want to avoid attempting to free too much memory too hard, so estimate the + * minimum acceptable size of a hibernation image to use as the lower limit for + * preallocating memory. + * + * We assume that the minimum image size should be proportional to + * + * [number of saveable pages] - [number of pages that can be freed in theory] + * + * where the second term is the sum of (1) reclaimable slab pages, (2) active + * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages, + * minus mapped file pages. + */ +static unsigned long minimum_image_size(unsigned long saveable) +{ + unsigned long size; + + size = global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_ACTIVE_ANON) + + global_page_state(NR_INACTIVE_ANON) + + global_page_state(NR_ACTIVE_FILE) + + global_page_state(NR_INACTIVE_FILE) + - global_page_state(NR_FILE_MAPPED); + + return saveable <= size ? 0 : saveable - size; +} + +/** + * hibernate_preallocate_memory - Preallocate memory for hibernation image + * + * To create a hibernation image it is necessary to make a copy of every page + * frame in use. We also need a number of page frames to be free during + * hibernation for allocations made while saving the image and for device + * drivers, in case they need to allocate memory from their hibernation + * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough + * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through + * /sys/power/reserved_size, respectively). To make this happen, we compute the + * total number of available page frames and allocate at least + * + * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) + * + * of them, which corresponds to the maximum size of a hibernation image. + * + * If image_size is set below the number following from the above formula, + * the preallocation of memory is continued until the total number of saveable + * pages in the system is below the requested image size or the minimum + * acceptable image size returned by minimum_image_size(), whichever is greater. + */ +int hibernate_preallocate_memory(void) +{ + struct zone *zone; + unsigned long saveable, size, max_size, count, highmem, pages = 0; + unsigned long alloc, save_highmem, pages_highmem, avail_normal; + struct timeval start, stop; + int error; + + printk(KERN_INFO "PM: Preallocating image memory... "); + do_gettimeofday(&start); + + error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); + if (error) + goto err_out; + + error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); + if (error) + goto err_out; + + alloc_normal = 0; + alloc_highmem = 0; + + /* Count the number of saveable data pages. */ + save_highmem = count_highmem_pages(); + saveable = count_data_pages(); + + /* + * Compute the total number of page frames we can use (count) and the + * number of pages needed for image metadata (size). + */ + count = saveable; + saveable += save_highmem; + highmem = save_highmem; + size = 0; + for_each_populated_zone(zone) { + size += snapshot_additional_pages(zone); + if (is_highmem(zone)) + highmem += zone_page_state(zone, NR_FREE_PAGES); + else + count += zone_page_state(zone, NR_FREE_PAGES); + } + avail_normal = count; + count += highmem; + count -= totalreserve_pages; + + /* Compute the maximum number of saveable pages to leave in memory. */ + max_size = (count - (size + PAGES_FOR_IO)) / 2 + - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); + /* Compute the desired number of image pages specified by image_size. */ + size = DIV_ROUND_UP(image_size, PAGE_SIZE); + if (size > max_size) + size = max_size; + /* + * If the desired number of image pages is at least as large as the + * current number of saveable pages in memory, allocate page frames for + * the image and we're done. + */ + if (size >= saveable) { + pages = preallocate_image_highmem(save_highmem); + pages += preallocate_image_memory(saveable - pages, avail_normal); + goto out; + } + + /* Estimate the minimum size of the image. */ + pages = minimum_image_size(saveable); + /* + * To avoid excessive pressure on the normal zone, leave room in it to + * accommodate an image of the minimum size (unless it's already too + * small, in which case don't preallocate pages from it at all). + */ + if (avail_normal > pages) + avail_normal -= pages; + else + avail_normal = 0; + if (size < pages) + size = min_t(unsigned long, pages, max_size); + + /* + * Let the memory management subsystem know that we're going to need a + * large number of page frames to allocate and make it free some memory. + * NOTE: If this is not done, performance will be hurt badly in some + * test cases. + */ + shrink_all_memory(saveable - size); + + /* + * The number of saveable pages in memory was too high, so apply some + * pressure to decrease it. First, make room for the largest possible + * image and fail if that doesn't work. Next, try to decrease the size + * of the image as much as indicated by 'size' using allocations from + * highmem and non-highmem zones separately. + */ + pages_highmem = preallocate_image_highmem(highmem / 2); + alloc = (count - max_size) - pages_highmem; + pages = preallocate_image_memory(alloc, avail_normal); + if (pages < alloc) { + /* We have exhausted non-highmem pages, try highmem. */ + alloc -= pages; + pages += pages_highmem; + pages_highmem = preallocate_image_highmem(alloc); + if (pages_highmem < alloc) + goto err_out; + pages += pages_highmem; + /* + * size is the desired number of saveable pages to leave in + * memory, so try to preallocate (all memory - size) pages. + */ + alloc = (count - pages) - size; + pages += preallocate_image_highmem(alloc); + } else { + /* + * There are approximately max_size saveable pages at this point + * and we want to reduce this number down to size. + */ + alloc = max_size - size; + size = preallocate_highmem_fraction(alloc, highmem, count); + pages_highmem += size; + alloc -= size; + size = preallocate_image_memory(alloc, avail_normal); + pages_highmem += preallocate_image_highmem(alloc - size); + pages += pages_highmem + size; + } + + /* + * We only need as many page frames for the image as there are saveable + * pages in memory, but we have allocated more. Release the excessive + * ones now. + */ + free_unnecessary_pages(); + + out: + do_gettimeofday(&stop); + printk(KERN_CONT "done (allocated %lu pages)\n", pages); + swsusp_show_speed(&start, &stop, pages, "Allocated"); + + return 0; + + err_out: + printk(KERN_CONT "\n"); + swsusp_free(); + return -ENOMEM; +} + +#ifdef CONFIG_HIGHMEM +/** + * count_pages_for_highmem - compute the number of non-highmem pages + * that will be necessary for creating copies of highmem pages. + */ + +static unsigned int count_pages_for_highmem(unsigned int nr_highmem) +{ + unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; + + if (free_highmem >= nr_highmem) + nr_highmem = 0; + else + nr_highmem -= free_highmem; + + return nr_highmem; +} +#else +static unsigned int +count_pages_for_highmem(unsigned int nr_highmem) { return 0; } +#endif /* CONFIG_HIGHMEM */ + +/** + * enough_free_mem - Make sure we have enough free memory for the + * snapshot image. + */ + +static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) +{ + struct zone *zone; + unsigned int free = alloc_normal; + + for_each_populated_zone(zone) + if (!is_highmem(zone)) + free += zone_page_state(zone, NR_FREE_PAGES); + + nr_pages += count_pages_for_highmem(nr_highmem); + pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n", + nr_pages, PAGES_FOR_IO, free); + + return free > nr_pages + PAGES_FOR_IO; +} + +#ifdef CONFIG_HIGHMEM +/** + * get_highmem_buffer - if there are some highmem pages in the suspend + * image, we may need the buffer to copy them and/or load their data. + */ + +static inline int get_highmem_buffer(int safe_needed) +{ + buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); + return buffer ? 0 : -ENOMEM; +} + +/** + * alloc_highmem_image_pages - allocate some highmem pages for the image. + * Try to allocate as many pages as needed, but if the number of free + * highmem pages is lesser than that, allocate them all. + */ + +static inline unsigned int +alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) +{ + unsigned int to_alloc = count_free_highmem_pages(); + + if (to_alloc > nr_highmem) + to_alloc = nr_highmem; + + nr_highmem -= to_alloc; + while (to_alloc-- > 0) { + struct page *page; + + page = alloc_image_page(__GFP_HIGHMEM); + memory_bm_set_bit(bm, page_to_pfn(page)); + } + return nr_highmem; +} +#else +static inline int get_highmem_buffer(int safe_needed) { return 0; } + +static inline unsigned int +alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } +#endif /* CONFIG_HIGHMEM */ + +/** + * swsusp_alloc - allocate memory for the suspend image + * + * We first try to allocate as many highmem pages as there are + * saveable highmem pages in the system. If that fails, we allocate + * non-highmem pages for the copies of the remaining highmem ones. + * + * In this approach it is likely that the copies of highmem pages will + * also be located in the high memory, because of the way in which + * copy_data_pages() works. + */ + +static int +swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, + unsigned int nr_pages, unsigned int nr_highmem) +{ + if (nr_highmem > 0) { + if (get_highmem_buffer(PG_ANY)) + goto err_out; + if (nr_highmem > alloc_highmem) { + nr_highmem -= alloc_highmem; + nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); + } + } + if (nr_pages > alloc_normal) { + nr_pages -= alloc_normal; + while (nr_pages-- > 0) { + struct page *page; + + page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); + if (!page) + goto err_out; + memory_bm_set_bit(copy_bm, page_to_pfn(page)); + } + } + + return 0; + + err_out: + swsusp_free(); + return -ENOMEM; +} + +asmlinkage int swsusp_save(void) +{ + unsigned int nr_pages, nr_highmem; + + printk(KERN_INFO "PM: Creating hibernation image:\n"); + + drain_local_pages(NULL); + nr_pages = count_data_pages(); + nr_highmem = count_highmem_pages(); + printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem); + + if (!enough_free_mem(nr_pages, nr_highmem)) { + printk(KERN_ERR "PM: Not enough free memory\n"); + return -ENOMEM; + } + + if (swsusp_alloc(&orig_bm, ©_bm, nr_pages, nr_highmem)) { + printk(KERN_ERR "PM: Memory allocation failed\n"); + return -ENOMEM; + } + + /* During allocating of suspend pagedir, new cold pages may appear. + * Kill them. + */ + drain_local_pages(NULL); + copy_data_pages(©_bm, &orig_bm); + + /* + * End of critical section. From now on, we can write to memory, + * but we should not touch disk. This specially means we must _not_ + * touch swap space! Except we must write out our image of course. + */ + + nr_pages += nr_highmem; + nr_copy_pages = nr_pages; + nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); + + printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n", + nr_pages); + + return 0; +} + +#ifndef CONFIG_ARCH_HIBERNATION_HEADER +static int init_header_complete(struct swsusp_info *info) +{ + memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); + info->version_code = LINUX_VERSION_CODE; + return 0; +} + +static char *check_image_kernel(struct swsusp_info *info) +{ + if (info->version_code != LINUX_VERSION_CODE) + return "kernel version"; + if (strcmp(info->uts.sysname,init_utsname()->sysname)) + return "system type"; + if (strcmp(info->uts.release,init_utsname()->release)) + return "kernel release"; + if (strcmp(info->uts.version,init_utsname()->version)) + return "version"; + if (strcmp(info->uts.machine,init_utsname()->machine)) + return "machine"; + return NULL; +} +#endif /* CONFIG_ARCH_HIBERNATION_HEADER */ + +unsigned long snapshot_get_image_size(void) +{ + return nr_copy_pages + nr_meta_pages + 1; +} + +static int init_header(struct swsusp_info *info) +{ + memset(info, 0, sizeof(struct swsusp_info)); + info->num_physpages = num_physpages; + info->image_pages = nr_copy_pages; + info->pages = snapshot_get_image_size(); + info->size = info->pages; + info->size <<= PAGE_SHIFT; + return init_header_complete(info); +} + +/** + * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm + * are stored in the array @buf[] (1 page at a time) + */ + +static inline void +pack_pfns(unsigned long *buf, struct memory_bitmap *bm) +{ + int j; + + for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { + buf[j] = memory_bm_next_pfn(bm); + if (unlikely(buf[j] == BM_END_OF_MAP)) + break; + } +} + +/** + * snapshot_read_next - used for reading the system memory snapshot. + * + * On the first call to it @handle should point to a zeroed + * snapshot_handle structure. The structure gets updated and a pointer + * to it should be passed to this function every next time. + * + * On success the function returns a positive number. Then, the caller + * is allowed to read up to the returned number of bytes from the memory + * location computed by the data_of() macro. + * + * The function returns 0 to indicate the end of data stream condition, + * and a negative number is returned on error. In such cases the + * structure pointed to by @handle is not updated and should not be used + * any more. + */ + +int snapshot_read_next(struct snapshot_handle *handle) +{ + if (handle->cur > nr_meta_pages + nr_copy_pages) + return 0; + + if (!buffer) { + /* This makes the buffer be freed by swsusp_free() */ + buffer = get_image_page(GFP_ATOMIC, PG_ANY); + if (!buffer) + return -ENOMEM; + } + if (!handle->cur) { + int error; + + error = init_header((struct swsusp_info *)buffer); + if (error) + return error; + handle->buffer = buffer; + memory_bm_position_reset(&orig_bm); + memory_bm_position_reset(©_bm); + } else if (handle->cur <= nr_meta_pages) { + clear_page(buffer); + pack_pfns(buffer, &orig_bm); + } else { + struct page *page; + + page = pfn_to_page(memory_bm_next_pfn(©_bm)); + if (PageHighMem(page)) { + /* Highmem pages are copied to the buffer, + * because we can't return with a kmapped + * highmem page (we may not be called again). + */ + void *kaddr; + + kaddr = kmap_atomic(page, KM_USER0); + copy_page(buffer, kaddr); + kunmap_atomic(kaddr, KM_USER0); + handle->buffer = buffer; + } else { + handle->buffer = page_address(page); + } + } + handle->cur++; + return PAGE_SIZE; +} + +/** + * mark_unsafe_pages - mark the pages that cannot be used for storing + * the image during resume, because they conflict with the pages that + * had been used before suspend + */ + +static int mark_unsafe_pages(struct memory_bitmap *bm) +{ + struct zone *zone; + unsigned long pfn, max_zone_pfn; + + /* Clear page flags */ + for_each_populated_zone(zone) { + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (pfn_valid(pfn)) + swsusp_unset_page_free(pfn_to_page(pfn)); + } + + /* Mark pages that correspond to the "original" pfns as "unsafe" */ + memory_bm_position_reset(bm); + do { + pfn = memory_bm_next_pfn(bm); + if (likely(pfn != BM_END_OF_MAP)) { + if (likely(pfn_valid(pfn))) + swsusp_set_page_free(pfn_to_page(pfn)); + else + return -EFAULT; + } + } while (pfn != BM_END_OF_MAP); + + allocated_unsafe_pages = 0; + + return 0; +} + +static void +duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) +{ + unsigned long pfn; + + memory_bm_position_reset(src); + pfn = memory_bm_next_pfn(src); + while (pfn != BM_END_OF_MAP) { + memory_bm_set_bit(dst, pfn); + pfn = memory_bm_next_pfn(src); + } +} + +static int check_header(struct swsusp_info *info) +{ + char *reason; + + reason = check_image_kernel(info); + if (!reason && info->num_physpages != num_physpages) + reason = "memory size"; + if (reason) { + printk(KERN_ERR "PM: Image mismatch: %s\n", reason); + return -EPERM; + } + return 0; +} + +/** + * load header - check the image header and copy data from it + */ + +static int +load_header(struct swsusp_info *info) +{ + int error; + + restore_pblist = NULL; + error = check_header(info); + if (!error) { + nr_copy_pages = info->image_pages; + nr_meta_pages = info->pages - info->image_pages - 1; + } + return error; +} + +/** + * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set + * the corresponding bit in the memory bitmap @bm + */ +static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) +{ + int j; + + for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { + if (unlikely(buf[j] == BM_END_OF_MAP)) + break; + + if (memory_bm_pfn_present(bm, buf[j])) + memory_bm_set_bit(bm, buf[j]); + else + return -EFAULT; + } + + return 0; +} + +/* List of "safe" pages that may be used to store data loaded from the suspend + * image + */ +static struct linked_page *safe_pages_list; + +#ifdef CONFIG_HIGHMEM +/* struct highmem_pbe is used for creating the list of highmem pages that + * should be restored atomically during the resume from disk, because the page + * frames they have occupied before the suspend are in use. + */ +struct highmem_pbe { + struct page *copy_page; /* data is here now */ + struct page *orig_page; /* data was here before the suspend */ + struct highmem_pbe *next; +}; + +/* List of highmem PBEs needed for restoring the highmem pages that were + * allocated before the suspend and included in the suspend image, but have + * also been allocated by the "resume" kernel, so their contents cannot be + * written directly to their "original" page frames. + */ +static struct highmem_pbe *highmem_pblist; + +/** + * count_highmem_image_pages - compute the number of highmem pages in the + * suspend image. The bits in the memory bitmap @bm that correspond to the + * image pages are assumed to be set. + */ + +static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) +{ + unsigned long pfn; + unsigned int cnt = 0; + + memory_bm_position_reset(bm); + pfn = memory_bm_next_pfn(bm); + while (pfn != BM_END_OF_MAP) { + if (PageHighMem(pfn_to_page(pfn))) + cnt++; + + pfn = memory_bm_next_pfn(bm); + } + return cnt; +} + +/** + * prepare_highmem_image - try to allocate as many highmem pages as + * there are highmem image pages (@nr_highmem_p points to the variable + * containing the number of highmem image pages). The pages that are + * "safe" (ie. will not be overwritten when the suspend image is + * restored) have the corresponding bits set in @bm (it must be + * unitialized). + * + * NOTE: This function should not be called if there are no highmem + * image pages. + */ + +static unsigned int safe_highmem_pages; + +static struct memory_bitmap *safe_highmem_bm; + +static int +prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) +{ + unsigned int to_alloc; + + if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) + return -ENOMEM; + + if (get_highmem_buffer(PG_SAFE)) + return -ENOMEM; + + to_alloc = count_free_highmem_pages(); + if (to_alloc > *nr_highmem_p) + to_alloc = *nr_highmem_p; + else + *nr_highmem_p = to_alloc; + + safe_highmem_pages = 0; + while (to_alloc-- > 0) { + struct page *page; + + page = alloc_page(__GFP_HIGHMEM); + if (!swsusp_page_is_free(page)) { + /* The page is "safe", set its bit the bitmap */ + memory_bm_set_bit(bm, page_to_pfn(page)); + safe_highmem_pages++; + } + /* Mark the page as allocated */ + swsusp_set_page_forbidden(page); + swsusp_set_page_free(page); + } + memory_bm_position_reset(bm); + safe_highmem_bm = bm; + return 0; +} + +/** + * get_highmem_page_buffer - for given highmem image page find the buffer + * that suspend_write_next() should set for its caller to write to. + * + * If the page is to be saved to its "original" page frame or a copy of + * the page is to be made in the highmem, @buffer is returned. Otherwise, + * the copy of the page is to be made in normal memory, so the address of + * the copy is returned. + * + * If @buffer is returned, the caller of suspend_write_next() will write + * the page's contents to @buffer, so they will have to be copied to the + * right location on the next call to suspend_write_next() and it is done + * with the help of copy_last_highmem_page(). For this purpose, if + * @buffer is returned, @last_highmem page is set to the page to which + * the data will have to be copied from @buffer. + */ + +static struct page *last_highmem_page; + +static void * +get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) +{ + struct highmem_pbe *pbe; + void *kaddr; + + if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { + /* We have allocated the "original" page frame and we can + * use it directly to store the loaded page. + */ + last_highmem_page = page; + return buffer; + } + /* The "original" page frame has not been allocated and we have to + * use a "safe" page frame to store the loaded page. + */ + pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); + if (!pbe) { + swsusp_free(); + return ERR_PTR(-ENOMEM); + } + pbe->orig_page = page; + if (safe_highmem_pages > 0) { + struct page *tmp; + + /* Copy of the page will be stored in high memory */ + kaddr = buffer; + tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); + safe_highmem_pages--; + last_highmem_page = tmp; + pbe->copy_page = tmp; + } else { + /* Copy of the page will be stored in normal memory */ + kaddr = safe_pages_list; + safe_pages_list = safe_pages_list->next; + pbe->copy_page = virt_to_page(kaddr); + } + pbe->next = highmem_pblist; + highmem_pblist = pbe; + return kaddr; +} + +/** + * copy_last_highmem_page - copy the contents of a highmem image from + * @buffer, where the caller of snapshot_write_next() has place them, + * to the right location represented by @last_highmem_page . + */ + +static void copy_last_highmem_page(void) +{ + if (last_highmem_page) { + void *dst; + + dst = kmap_atomic(last_highmem_page, KM_USER0); + copy_page(dst, buffer); + kunmap_atomic(dst, KM_USER0); + last_highmem_page = NULL; + } +} + +static inline int last_highmem_page_copied(void) +{ + return !last_highmem_page; +} + +static inline void free_highmem_data(void) +{ + if (safe_highmem_bm) + memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); + + if (buffer) + free_image_page(buffer, PG_UNSAFE_CLEAR); +} +#else +static inline int get_safe_write_buffer(void) { return 0; } + +static unsigned int +count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } + +static inline int +prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) +{ + return 0; +} + +static inline void * +get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) +{ + return ERR_PTR(-EINVAL); +} + +static inline void copy_last_highmem_page(void) {} +static inline int last_highmem_page_copied(void) { return 1; } +static inline void free_highmem_data(void) {} +#endif /* CONFIG_HIGHMEM */ + +/** + * prepare_image - use the memory bitmap @bm to mark the pages that will + * be overwritten in the process of restoring the system memory state + * from the suspend image ("unsafe" pages) and allocate memory for the + * image. + * + * The idea is to allocate a new memory bitmap first and then allocate + * as many pages as needed for the image data, but not to assign these + * pages to specific tasks initially. Instead, we just mark them as + * allocated and create a lists of "safe" pages that will be used + * later. On systems with high memory a list of "safe" highmem pages is + * also created. + */ + +#define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) + +static int +prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) +{ + unsigned int nr_pages, nr_highmem; + struct linked_page *sp_list, *lp; + int error; + + /* If there is no highmem, the buffer will not be necessary */ + free_image_page(buffer, PG_UNSAFE_CLEAR); + buffer = NULL; + + nr_highmem = count_highmem_image_pages(bm); + error = mark_unsafe_pages(bm); + if (error) + goto Free; + + error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); + if (error) + goto Free; + + duplicate_memory_bitmap(new_bm, bm); + memory_bm_free(bm, PG_UNSAFE_KEEP); + if (nr_highmem > 0) { + error = prepare_highmem_image(bm, &nr_highmem); + if (error) + goto Free; + } + /* Reserve some safe pages for potential later use. + * + * NOTE: This way we make sure there will be enough safe pages for the + * chain_alloc() in get_buffer(). It is a bit wasteful, but + * nr_copy_pages cannot be greater than 50% of the memory anyway. + */ + sp_list = NULL; + /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ + nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; + nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); + while (nr_pages > 0) { + lp = get_image_page(GFP_ATOMIC, PG_SAFE); + if (!lp) { + error = -ENOMEM; + goto Free; + } + lp->next = sp_list; + sp_list = lp; + nr_pages--; + } + /* Preallocate memory for the image */ + safe_pages_list = NULL; + nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; + while (nr_pages > 0) { + lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); + if (!lp) { + error = -ENOMEM; + goto Free; + } + if (!swsusp_page_is_free(virt_to_page(lp))) { + /* The page is "safe", add it to the list */ + lp->next = safe_pages_list; + safe_pages_list = lp; + } + /* Mark the page as allocated */ + swsusp_set_page_forbidden(virt_to_page(lp)); + swsusp_set_page_free(virt_to_page(lp)); + nr_pages--; + } + /* Free the reserved safe pages so that chain_alloc() can use them */ + while (sp_list) { + lp = sp_list->next; + free_image_page(sp_list, PG_UNSAFE_CLEAR); + sp_list = lp; + } + return 0; + + Free: + swsusp_free(); + return error; +} + +/** + * get_buffer - compute the address that snapshot_write_next() should + * set for its caller to write to. + */ + +static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) +{ + struct pbe *pbe; + struct page *page; + unsigned long pfn = memory_bm_next_pfn(bm); + + if (pfn == BM_END_OF_MAP) + return ERR_PTR(-EFAULT); + + page = pfn_to_page(pfn); + if (PageHighMem(page)) + return get_highmem_page_buffer(page, ca); + + if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) + /* We have allocated the "original" page frame and we can + * use it directly to store the loaded page. + */ + return page_address(page); + + /* The "original" page frame has not been allocated and we have to + * use a "safe" page frame to store the loaded page. + */ + pbe = chain_alloc(ca, sizeof(struct pbe)); + if (!pbe) { + swsusp_free(); + return ERR_PTR(-ENOMEM); + } + pbe->orig_address = page_address(page); + pbe->address = safe_pages_list; + safe_pages_list = safe_pages_list->next; + pbe->next = restore_pblist; + restore_pblist = pbe; + return pbe->address; +} + +/** + * snapshot_write_next - used for writing the system memory snapshot. + * + * On the first call to it @handle should point to a zeroed + * snapshot_handle structure. The structure gets updated and a pointer + * to it should be passed to this function every next time. + * + * On success the function returns a positive number. Then, the caller + * is allowed to write up to the returned number of bytes to the memory + * location computed by the data_of() macro. + * + * The function returns 0 to indicate the "end of file" condition, + * and a negative number is returned on error. In such cases the + * structure pointed to by @handle is not updated and should not be used + * any more. + */ + +int snapshot_write_next(struct snapshot_handle *handle) +{ + static struct chain_allocator ca; + int error = 0; + + /* Check if we have already loaded the entire image */ + if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) + return 0; + + handle->sync_read = 1; + + if (!handle->cur) { + if (!buffer) + /* This makes the buffer be freed by swsusp_free() */ + buffer = get_image_page(GFP_ATOMIC, PG_ANY); + + if (!buffer) + return -ENOMEM; + + handle->buffer = buffer; + } else if (handle->cur == 1) { + error = load_header(buffer); + if (error) + return error; + + error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); + if (error) + return error; + + } else if (handle->cur <= nr_meta_pages + 1) { + error = unpack_orig_pfns(buffer, ©_bm); + if (error) + return error; + + if (handle->cur == nr_meta_pages + 1) { + error = prepare_image(&orig_bm, ©_bm); + if (error) + return error; + + chain_init(&ca, GFP_ATOMIC, PG_SAFE); + memory_bm_position_reset(&orig_bm); + restore_pblist = NULL; + handle->buffer = get_buffer(&orig_bm, &ca); + handle->sync_read = 0; + if (IS_ERR(handle->buffer)) + return PTR_ERR(handle->buffer); + } + } else { + copy_last_highmem_page(); + handle->buffer = get_buffer(&orig_bm, &ca); + if (IS_ERR(handle->buffer)) + return PTR_ERR(handle->buffer); + if (handle->buffer != buffer) + handle->sync_read = 0; + } + handle->cur++; + return PAGE_SIZE; +} + +/** + * snapshot_write_finalize - must be called after the last call to + * snapshot_write_next() in case the last page in the image happens + * to be a highmem page and its contents should be stored in the + * highmem. Additionally, it releases the memory that will not be + * used any more. + */ + +void snapshot_write_finalize(struct snapshot_handle *handle) +{ + copy_last_highmem_page(); + /* Free only if we have loaded the image entirely */ + if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { + memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); + free_highmem_data(); + } +} + +int snapshot_image_loaded(struct snapshot_handle *handle) +{ + return !(!nr_copy_pages || !last_highmem_page_copied() || + handle->cur <= nr_meta_pages + nr_copy_pages); +} + +#ifdef CONFIG_HIGHMEM +/* Assumes that @buf is ready and points to a "safe" page */ +static inline void +swap_two_pages_data(struct page *p1, struct page *p2, void *buf) +{ + void *kaddr1, *kaddr2; + + kaddr1 = kmap_atomic(p1, KM_USER0); + kaddr2 = kmap_atomic(p2, KM_USER1); + copy_page(buf, kaddr1); + copy_page(kaddr1, kaddr2); + copy_page(kaddr2, buf); + kunmap_atomic(kaddr2, KM_USER1); + kunmap_atomic(kaddr1, KM_USER0); +} + +/** + * restore_highmem - for each highmem page that was allocated before + * the suspend and included in the suspend image, and also has been + * allocated by the "resume" kernel swap its current (ie. "before + * resume") contents with the previous (ie. "before suspend") one. + * + * If the resume eventually fails, we can call this function once + * again and restore the "before resume" highmem state. + */ + +int restore_highmem(void) +{ + struct highmem_pbe *pbe = highmem_pblist; + void *buf; + + if (!pbe) + return 0; + + buf = get_image_page(GFP_ATOMIC, PG_SAFE); + if (!buf) + return -ENOMEM; + + while (pbe) { + swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); + pbe = pbe->next; + } + free_image_page(buf, PG_UNSAFE_CLEAR); + return 0; +} +#endif /* CONFIG_HIGHMEM */ diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c new file mode 100644 index 00000000..3b06b54c --- /dev/null +++ b/kernel/power/suspend.c @@ -0,0 +1,335 @@ +/* + * kernel/power/suspend.c - Suspend to RAM and standby functionality. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/console.h> +#include <linux/cpu.h> +#include <linux/syscalls.h> +#include <linux/gfp.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/suspend.h> +#include <linux/syscore_ops.h> +#include <trace/events/power.h> + +#include "power.h" +extern void cpufreq_save_default_governor(void); +extern void cpufreq_restore_default_governor(void); +extern void cpufreq_set_conservative_governor(void); +extern void cpufreq_set_performance_governor(void); +extern void cpufreq_set_conservative_governor_param(int up_th, int down_th); + +const char *const pm_states[PM_SUSPEND_MAX] = { +#ifdef CONFIG_EARLYSUSPEND + [PM_SUSPEND_ON] = "on", +#endif + [PM_SUSPEND_STANDBY] = "standby", + [PM_SUSPEND_MEM] = "mem", +}; + +static const struct platform_suspend_ops *suspend_ops; + +/** + * suspend_set_ops - Set the global suspend method table. + * @ops: Pointer to ops structure. + */ +void suspend_set_ops(const struct platform_suspend_ops *ops) +{ + mutex_lock(&pm_mutex); + suspend_ops = ops; + mutex_unlock(&pm_mutex); +} + +bool valid_state(suspend_state_t state) +{ + /* + * All states need lowlevel support and need to be valid to the lowlevel + * implementation, no valid callback implies that none are valid. + */ + return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); +} + +/** + * suspend_valid_only_mem - generic memory-only valid callback + * + * Platform drivers that implement mem suspend only and only need + * to check for that in their .valid callback can use this instead + * of rolling their own .valid callback. + */ +int suspend_valid_only_mem(suspend_state_t state) +{ + return state == PM_SUSPEND_MEM; +} + +static int suspend_test(int level) +{ +#ifdef CONFIG_PM_DEBUG + if (pm_test_level == level) { + printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n"); + mdelay(5000); + return 1; + } +#endif /* !CONFIG_PM_DEBUG */ + return 0; +} + +/** + * suspend_prepare - Do prep work before entering low-power state. + * + * This is common code that is called for each state that we're entering. + * Run suspend notifiers, allocate a console and stop all processes. + */ +static int suspend_prepare(void) +{ + int error; + + if (!suspend_ops || !suspend_ops->enter) + return -EPERM; + + pm_prepare_console(); + + error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); + if (error) + goto Finish; + + error = usermodehelper_disable(); + if (error) + goto Finish; + + error = suspend_freeze_processes(); + if (!error) + return 0; + + suspend_thaw_processes(); + usermodehelper_enable(); + Finish: + pm_notifier_call_chain(PM_POST_SUSPEND); + pm_restore_console(); + return error; +} + +/* default implementation */ +void __attribute__ ((weak)) arch_suspend_disable_irqs(void) +{ + local_irq_disable(); +} + +/* default implementation */ +void __attribute__ ((weak)) arch_suspend_enable_irqs(void) +{ + local_irq_enable(); +} + +/** + * suspend_enter - enter the desired system sleep state. + * @state: state to enter + * + * This function should be called after devices have been suspended. + */ +static int suspend_enter(suspend_state_t state) +{ + int error; + + if (suspend_ops->prepare) { + error = suspend_ops->prepare(); + if (error) + goto Platform_finish; + } + + error = dpm_suspend_noirq(PMSG_SUSPEND); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down\n"); + goto Platform_finish; + } + + if (suspend_ops->prepare_late) { + error = suspend_ops->prepare_late(); + if (error) + goto Platform_wake; + } + + if (suspend_test(TEST_PLATFORM)) + goto Platform_wake; + + error = disable_nonboot_cpus(); + if (error || suspend_test(TEST_CPUS)) + goto Enable_cpus; + + arch_suspend_disable_irqs(); + BUG_ON(!irqs_disabled()); + + error = syscore_suspend(); + if (!error) { + if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) { + error = suspend_ops->enter(state); + events_check_enabled = false; + } + syscore_resume(); + } + + arch_suspend_enable_irqs(); + BUG_ON(irqs_disabled()); + + Enable_cpus: + enable_nonboot_cpus(); + + Platform_wake: + if (suspend_ops->wake) + suspend_ops->wake(); + + dpm_resume_noirq(PMSG_RESUME); + + Platform_finish: + if (suspend_ops->finish) + suspend_ops->finish(); + + return error; +} + +/** + * suspend_devices_and_enter - suspend devices and enter the desired system + * sleep state. + * @state: state to enter + */ +int suspend_devices_and_enter(suspend_state_t state) +{ + int error; + + if (!suspend_ops) + return -ENOSYS; + + trace_machine_suspend(state); + if (suspend_ops->begin) { + error = suspend_ops->begin(state); + if (error) + goto Close; + } + suspend_console(); + suspend_test_start(); + error = dpm_suspend_start(PMSG_SUSPEND); + if (error) { + printk(KERN_ERR "PM: Some devices failed to suspend\n"); + goto Recover_platform; + } + suspend_test_finish("suspend devices"); + if (suspend_test(TEST_DEVICES)) + goto Recover_platform; + + error = suspend_enter(state); + + Resume_devices: + suspend_test_start(); + dpm_resume_end(PMSG_RESUME); + suspend_test_finish("resume devices"); + resume_console(); + Close: + if (suspend_ops->end) + suspend_ops->end(); + trace_machine_suspend(PWR_EVENT_EXIT); + return error; + + Recover_platform: + if (suspend_ops->recover) + suspend_ops->recover(); + goto Resume_devices; +} + +/** + * suspend_finish - Do final work before exiting suspend sequence. + * + * Call platform code to clean up, restart processes, and free the + * console that we've allocated. This is not called for suspend-to-disk. + */ +static void suspend_finish(void) +{ + suspend_thaw_processes(); + usermodehelper_enable(); + pm_notifier_call_chain(PM_POST_SUSPEND); + pm_restore_console(); +} + +/** + * enter_state - Do common work of entering low-power state. + * @state: pm_state structure for state we're entering. + * + * Make sure we're the only ones trying to enter a sleep state. Fail + * if someone has beat us to it, since we don't want anything weird to + * happen when we wake up. + * Then, do the setup for suspend, enter the state, and cleaup (after + * we've woken up). + */ +int enter_state(suspend_state_t state) +{ + int error; + + if (!valid_state(state)) + return -ENODEV; + +#ifndef CONFIG_CPUFREQ_GOV_ON_EARLYSUPSEND//[ + cpufreq_save_default_governor(); + cpufreq_set_performance_governor(); +#endif //] CONFIG_CPUFREQ_GOV_ON_EARLYSUPSEND + + + if (!mutex_trylock(&pm_mutex)) + return -EBUSY; + + printk(KERN_INFO "PM: Syncing filesystems ... "); + sys_sync(); + printk("done.\n"); + + pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); + + error = suspend_prepare(); + if (error) + goto Unlock; + + if (suspend_test(TEST_FREEZER)) + goto Finish; + + pr_debug("PM: Entering %s sleep\n", pm_states[state]); + pm_restrict_gfp_mask(); + error = suspend_devices_and_enter(state); + pm_restore_gfp_mask(); + + Finish: + pr_debug("PM: Finishing wakeup.\n"); + suspend_finish(); + Unlock: + + mutex_unlock(&pm_mutex); + +#ifndef CONFIG_CPUFREQ_GOV_ON_EARLYSUPSEND//[ + cpufreq_restore_default_governor(); +#endif //] CONFIG_CPUFREQ_GOV_ON_EARLYSUPSEND + + return error; +} + +/** + * pm_suspend - Externally visible function for suspending system. + * @state: Enumerated value of state to enter. + * + * Determine whether or not value is within range, get state + * structure, and enter (above). + */ +int pm_suspend(suspend_state_t state) +{ + if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) + return enter_state(state); + return -EINVAL; +} +EXPORT_SYMBOL(pm_suspend); diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c new file mode 100644 index 00000000..25596e45 --- /dev/null +++ b/kernel/power/suspend_test.c @@ -0,0 +1,188 @@ +/* + * kernel/power/suspend_test.c - Suspend to RAM and standby test facility. + * + * Copyright (c) 2009 Pavel Machek <pavel@ucw.cz> + * + * This file is released under the GPLv2. + */ + +#include <linux/init.h> +#include <linux/rtc.h> + +#include "power.h" + +/* + * We test the system suspend code by setting an RTC wakealarm a short + * time in the future, then suspending. Suspending the devices won't + * normally take long ... some systems only need a few milliseconds. + * + * The time it takes is system-specific though, so when we test this + * during system bootup we allow a LOT of time. + */ +#define TEST_SUSPEND_SECONDS 10 + +static unsigned long suspend_test_start_time; + +void suspend_test_start(void) +{ + /* FIXME Use better timebase than "jiffies", ideally a clocksource. + * What we want is a hardware counter that will work correctly even + * during the irqs-are-off stages of the suspend/resume cycle... + */ + suspend_test_start_time = jiffies; +} + +void suspend_test_finish(const char *label) +{ + long nj = jiffies - suspend_test_start_time; + unsigned msec; + + msec = jiffies_to_msecs(abs(nj)); + pr_info("PM: %s took %d.%03d seconds\n", label, + msec / 1000, msec % 1000); + + /* Warning on suspend means the RTC alarm period needs to be + * larger -- the system was sooo slooowwww to suspend that the + * alarm (should have) fired before the system went to sleep! + * + * Warning on either suspend or resume also means the system + * has some performance issues. The stack dump of a WARN_ON + * is more likely to get the right attention than a printk... + */ + WARN(msec > (TEST_SUSPEND_SECONDS * 1000), + "Component: %s, time: %u\n", label, msec); +} + +/* + * To test system suspend, we need a hands-off mechanism to resume the + * system. RTCs wake alarms are a common self-contained mechanism. + */ + +static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) +{ + static char err_readtime[] __initdata = + KERN_ERR "PM: can't read %s time, err %d\n"; + static char err_wakealarm [] __initdata = + KERN_ERR "PM: can't set %s wakealarm, err %d\n"; + static char err_suspend[] __initdata = + KERN_ERR "PM: suspend test failed, error %d\n"; + static char info_test[] __initdata = + KERN_INFO "PM: test RTC wakeup from '%s' suspend\n"; + + unsigned long now; + struct rtc_wkalrm alm; + int status; + + /* this may fail if the RTC hasn't been initialized */ + status = rtc_read_time(rtc, &alm.time); + if (status < 0) { + printk(err_readtime, dev_name(&rtc->dev), status); + return; + } + rtc_tm_to_time(&alm.time, &now); + + memset(&alm, 0, sizeof alm); + rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time); + alm.enabled = true; + + status = rtc_set_alarm(rtc, &alm); + if (status < 0) { + printk(err_wakealarm, dev_name(&rtc->dev), status); + return; + } + + if (state == PM_SUSPEND_MEM) { + printk(info_test, pm_states[state]); + status = pm_suspend(state); + if (status == -ENODEV) + state = PM_SUSPEND_STANDBY; + } + if (state == PM_SUSPEND_STANDBY) { + printk(info_test, pm_states[state]); + status = pm_suspend(state); + } + if (status < 0) + printk(err_suspend, status); + + /* Some platforms can't detect that the alarm triggered the + * wakeup, or (accordingly) disable it after it afterwards. + * It's supposed to give oneshot behavior; cope. + */ + alm.enabled = false; + rtc_set_alarm(rtc, &alm); +} + +static int __init has_wakealarm(struct device *dev, void *name_ptr) +{ + struct rtc_device *candidate = to_rtc_device(dev); + + if (!candidate->ops->set_alarm) + return 0; + if (!device_may_wakeup(candidate->dev.parent)) + return 0; + + *(const char **)name_ptr = dev_name(dev); + return 1; +} + +/* + * Kernel options like "test_suspend=mem" force suspend/resume sanity tests + * at startup time. They're normally disabled, for faster boot and because + * we can't know which states really work on this particular system. + */ +static suspend_state_t test_state __initdata = PM_SUSPEND_ON; + +static char warn_bad_state[] __initdata = + KERN_WARNING "PM: can't test '%s' suspend state\n"; + +static int __init setup_test_suspend(char *value) +{ + unsigned i; + + /* "=mem" ==> "mem" */ + value++; + for (i = 0; i < PM_SUSPEND_MAX; i++) { + if (!pm_states[i]) + continue; + if (strcmp(pm_states[i], value) != 0) + continue; + test_state = (__force suspend_state_t) i; + return 0; + } + printk(warn_bad_state, value); + return 0; +} +__setup("test_suspend", setup_test_suspend); + +static int __init test_suspend(void) +{ + static char warn_no_rtc[] __initdata = + KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n"; + + char *pony = NULL; + struct rtc_device *rtc = NULL; + + /* PM is initialized by now; is that state testable? */ + if (test_state == PM_SUSPEND_ON) + goto done; + if (!valid_state(test_state)) { + printk(warn_bad_state, pm_states[test_state]); + goto done; + } + + /* RTCs have initialized by now too ... can we use one? */ + class_find_device(rtc_class, NULL, &pony, has_wakealarm); + if (pony) + rtc = rtc_class_open(pony); + if (!rtc) { + printk(warn_no_rtc); + goto done; + } + + /* go for it */ + test_wakealarm(rtc, test_state); + rtc_class_close(rtc); +done: + return 0; +} +late_initcall(test_suspend); diff --git a/kernel/power/suspend_time.c b/kernel/power/suspend_time.c new file mode 100644 index 00000000..d2a65da9 --- /dev/null +++ b/kernel/power/suspend_time.c @@ -0,0 +1,111 @@ +/* + * debugfs file to track time spent in suspend + * + * Copyright (c) 2011, Google, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/debugfs.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/seq_file.h> +#include <linux/syscore_ops.h> +#include <linux/time.h> + +static struct timespec suspend_time_before; +static unsigned int time_in_suspend_bins[32]; + +#ifdef CONFIG_DEBUG_FS +static int suspend_time_debug_show(struct seq_file *s, void *data) +{ + int bin; + seq_printf(s, "time (secs) count\n"); + seq_printf(s, "------------------\n"); + for (bin = 0; bin < 32; bin++) { + if (time_in_suspend_bins[bin] == 0) + continue; + seq_printf(s, "%4d - %4d %4u\n", + bin ? 1 << (bin - 1) : 0, 1 << bin, + time_in_suspend_bins[bin]); + } + return 0; +} + +static int suspend_time_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, suspend_time_debug_show, NULL); +} + +static const struct file_operations suspend_time_debug_fops = { + .open = suspend_time_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init suspend_time_debug_init(void) +{ + struct dentry *d; + + d = debugfs_create_file("suspend_time", 0755, NULL, NULL, + &suspend_time_debug_fops); + if (!d) { + pr_err("Failed to create suspend_time debug file\n"); + return -ENOMEM; + } + + return 0; +} + +late_initcall(suspend_time_debug_init); +#endif + +static int suspend_time_syscore_suspend(void) +{ + read_persistent_clock(&suspend_time_before); + + return 0; +} + +static void suspend_time_syscore_resume(void) +{ + struct timespec after; + + read_persistent_clock(&after); + + after = timespec_sub(after, suspend_time_before); + + time_in_suspend_bins[fls(after.tv_sec)]++; + + pr_info("Suspended for %lu.%03lu seconds\n", after.tv_sec, + after.tv_nsec / NSEC_PER_MSEC); +} + +static struct syscore_ops suspend_time_syscore_ops = { + .suspend = suspend_time_syscore_suspend, + .resume = suspend_time_syscore_resume, +}; + +static int suspend_time_syscore_init(void) +{ + register_syscore_ops(&suspend_time_syscore_ops); + + return 0; +} + +static void suspend_time_syscore_exit(void) +{ + unregister_syscore_ops(&suspend_time_syscore_ops); +} +module_init(suspend_time_syscore_init); +module_exit(suspend_time_syscore_exit); diff --git a/kernel/power/swap.c b/kernel/power/swap.c new file mode 100644 index 00000000..7c97c3a0 --- /dev/null +++ b/kernel/power/swap.c @@ -0,0 +1,989 @@ +/* + * linux/kernel/power/swap.c + * + * This file provides functions for reading the suspend image from + * and writing it to a swap partition. + * + * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com> + * + * This file is released under the GPLv2. + * + */ + +#include <linux/module.h> +#include <linux/file.h> +#include <linux/delay.h> +#include <linux/bitops.h> +#include <linux/genhd.h> +#include <linux/device.h> +#include <linux/buffer_head.h> +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/pm.h> +#include <linux/slab.h> +#include <linux/lzo.h> +#include <linux/vmalloc.h> + +#include "power.h" + +#define HIBERNATE_SIG "S1SUSPEND" + +/* + * The swap map is a data structure used for keeping track of each page + * written to a swap partition. It consists of many swap_map_page + * structures that contain each an array of MAP_PAGE_ENTRIES swap entries. + * These structures are stored on the swap and linked together with the + * help of the .next_swap member. + * + * The swap map is created during suspend. The swap map pages are + * allocated and populated one at a time, so we only need one memory + * page to set up the entire structure. + * + * During resume we also only need to use one swap_map_page structure + * at a time. + */ + +#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1) + +struct swap_map_page { + sector_t entries[MAP_PAGE_ENTRIES]; + sector_t next_swap; +}; + +/** + * The swap_map_handle structure is used for handling swap in + * a file-alike way + */ + +struct swap_map_handle { + struct swap_map_page *cur; + sector_t cur_swap; + sector_t first_sector; + unsigned int k; +}; + +struct swsusp_header { + char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)]; + sector_t image; + unsigned int flags; /* Flags to pass to the "boot" kernel */ + char orig_sig[10]; + char sig[10]; +} __attribute__((packed)); + +static struct swsusp_header *swsusp_header; + +/** + * The following functions are used for tracing the allocated + * swap pages, so that they can be freed in case of an error. + */ + +struct swsusp_extent { + struct rb_node node; + unsigned long start; + unsigned long end; +}; + +static struct rb_root swsusp_extents = RB_ROOT; + +static int swsusp_extents_insert(unsigned long swap_offset) +{ + struct rb_node **new = &(swsusp_extents.rb_node); + struct rb_node *parent = NULL; + struct swsusp_extent *ext; + + /* Figure out where to put the new node */ + while (*new) { + ext = container_of(*new, struct swsusp_extent, node); + parent = *new; + if (swap_offset < ext->start) { + /* Try to merge */ + if (swap_offset == ext->start - 1) { + ext->start--; + return 0; + } + new = &((*new)->rb_left); + } else if (swap_offset > ext->end) { + /* Try to merge */ + if (swap_offset == ext->end + 1) { + ext->end++; + return 0; + } + new = &((*new)->rb_right); + } else { + /* It already is in the tree */ + return -EINVAL; + } + } + /* Add the new node and rebalance the tree. */ + ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL); + if (!ext) + return -ENOMEM; + + ext->start = swap_offset; + ext->end = swap_offset; + rb_link_node(&ext->node, parent, new); + rb_insert_color(&ext->node, &swsusp_extents); + return 0; +} + +/** + * alloc_swapdev_block - allocate a swap page and register that it has + * been allocated, so that it can be freed in case of an error. + */ + +sector_t alloc_swapdev_block(int swap) +{ + unsigned long offset; + + offset = swp_offset(get_swap_page_of_type(swap)); + if (offset) { + if (swsusp_extents_insert(offset)) + swap_free(swp_entry(swap, offset)); + else + return swapdev_block(swap, offset); + } + return 0; +} + +/** + * free_all_swap_pages - free swap pages allocated for saving image data. + * It also frees the extents used to register which swap entries had been + * allocated. + */ + +void free_all_swap_pages(int swap) +{ + struct rb_node *node; + + while ((node = swsusp_extents.rb_node)) { + struct swsusp_extent *ext; + unsigned long offset; + + ext = container_of(node, struct swsusp_extent, node); + rb_erase(node, &swsusp_extents); + for (offset = ext->start; offset <= ext->end; offset++) + swap_free(swp_entry(swap, offset)); + + kfree(ext); + } +} + +int swsusp_swap_in_use(void) +{ + return (swsusp_extents.rb_node != NULL); +} + +/* + * General things + */ + +static unsigned short root_swap = 0xffff; +struct block_device *hib_resume_bdev; + +/* + * Saving part + */ + +static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) +{ + int error; + + hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); + if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || + !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { + memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); + memcpy(swsusp_header->sig, HIBERNATE_SIG, 10); + swsusp_header->image = handle->first_sector; + swsusp_header->flags = flags; + error = hib_bio_write_page(swsusp_resume_block, + swsusp_header, NULL); + } else { + printk(KERN_ERR "PM: Swap header not found!\n"); + error = -ENODEV; + } + return error; +} + +/** + * swsusp_swap_check - check if the resume device is a swap device + * and get its index (if so) + * + * This is called before saving image + */ +static int swsusp_swap_check(void) +{ + int res; + + res = swap_type_of(swsusp_resume_device, swsusp_resume_block, + &hib_resume_bdev); + if (res < 0) + return res; + + root_swap = res; + res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL); + if (res) + return res; + + res = set_blocksize(hib_resume_bdev, PAGE_SIZE); + if (res < 0) + blkdev_put(hib_resume_bdev, FMODE_WRITE); + + return res; +} + +/** + * write_page - Write one page to given swap location. + * @buf: Address we're writing. + * @offset: Offset of the swap page we're writing to. + * @bio_chain: Link the next write BIO here + */ + +static int write_page(void *buf, sector_t offset, struct bio **bio_chain) +{ + void *src; + + if (!offset) + return -ENOSPC; + + if (bio_chain) { + src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + if (src) { + copy_page(src, buf); + } else { + WARN_ON_ONCE(1); + bio_chain = NULL; /* Go synchronous */ + src = buf; + } + } else { + src = buf; + } + return hib_bio_write_page(offset, src, bio_chain); +} + +static void release_swap_writer(struct swap_map_handle *handle) +{ + if (handle->cur) + free_page((unsigned long)handle->cur); + handle->cur = NULL; +} + +static int get_swap_writer(struct swap_map_handle *handle) +{ + int ret; + + ret = swsusp_swap_check(); + if (ret) { + if (ret != -ENOSPC) + printk(KERN_ERR "PM: Cannot find swap device, try " + "swapon -a.\n"); + return ret; + } + handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); + if (!handle->cur) { + ret = -ENOMEM; + goto err_close; + } + handle->cur_swap = alloc_swapdev_block(root_swap); + if (!handle->cur_swap) { + ret = -ENOSPC; + goto err_rel; + } + handle->k = 0; + handle->first_sector = handle->cur_swap; + return 0; +err_rel: + release_swap_writer(handle); +err_close: + swsusp_close(FMODE_WRITE); + return ret; +} + +static int swap_write_page(struct swap_map_handle *handle, void *buf, + struct bio **bio_chain) +{ + int error = 0; + sector_t offset; + + if (!handle->cur) + return -EINVAL; + offset = alloc_swapdev_block(root_swap); + error = write_page(buf, offset, bio_chain); + if (error) + return error; + handle->cur->entries[handle->k++] = offset; + if (handle->k >= MAP_PAGE_ENTRIES) { + error = hib_wait_on_bio_chain(bio_chain); + if (error) + goto out; + offset = alloc_swapdev_block(root_swap); + if (!offset) + return -ENOSPC; + handle->cur->next_swap = offset; + error = write_page(handle->cur, handle->cur_swap, NULL); + if (error) + goto out; + clear_page(handle->cur); + handle->cur_swap = offset; + handle->k = 0; + } + out: + return error; +} + +static int flush_swap_writer(struct swap_map_handle *handle) +{ + if (handle->cur && handle->cur_swap) + return write_page(handle->cur, handle->cur_swap, NULL); + else + return -EINVAL; +} + +static int swap_writer_finish(struct swap_map_handle *handle, + unsigned int flags, int error) +{ + if (!error) { + flush_swap_writer(handle); + printk(KERN_INFO "PM: S"); + error = mark_swapfiles(handle, flags); + printk("|\n"); + } + + if (error) + free_all_swap_pages(root_swap); + release_swap_writer(handle); + swsusp_close(FMODE_WRITE); + + return error; +} + +/* We need to remember how much compressed data we need to read. */ +#define LZO_HEADER sizeof(size_t) + +/* Number of pages/bytes we'll compress at one time. */ +#define LZO_UNC_PAGES 32 +#define LZO_UNC_SIZE (LZO_UNC_PAGES * PAGE_SIZE) + +/* Number of pages/bytes we need for compressed data (worst case). */ +#define LZO_CMP_PAGES DIV_ROUND_UP(lzo1x_worst_compress(LZO_UNC_SIZE) + \ + LZO_HEADER, PAGE_SIZE) +#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE) + +/** + * save_image - save the suspend image data + */ + +static int save_image(struct swap_map_handle *handle, + struct snapshot_handle *snapshot, + unsigned int nr_to_write) +{ + unsigned int m; + int ret; + int nr_pages; + int err2; + struct bio *bio; + struct timeval start; + struct timeval stop; + + printk(KERN_INFO "PM: Saving image data pages (%u pages) ... ", + nr_to_write); + m = nr_to_write / 100; + if (!m) + m = 1; + nr_pages = 0; + bio = NULL; + do_gettimeofday(&start); + while (1) { + ret = snapshot_read_next(snapshot); + if (ret <= 0) + break; + ret = swap_write_page(handle, data_of(*snapshot), &bio); + if (ret) + break; + if (!(nr_pages % m)) + printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + } + err2 = hib_wait_on_bio_chain(&bio); + do_gettimeofday(&stop); + if (!ret) + ret = err2; + if (!ret) + printk(KERN_CONT "\b\b\b\bdone\n"); + else + printk(KERN_CONT "\n"); + swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); + return ret; +} + + +/** + * save_image_lzo - Save the suspend image data compressed with LZO. + * @handle: Swap mam handle to use for saving the image. + * @snapshot: Image to read data from. + * @nr_to_write: Number of pages to save. + */ +static int save_image_lzo(struct swap_map_handle *handle, + struct snapshot_handle *snapshot, + unsigned int nr_to_write) +{ + unsigned int m; + int ret = 0; + int nr_pages; + int err2; + struct bio *bio; + struct timeval start; + struct timeval stop; + size_t off, unc_len, cmp_len; + unsigned char *unc, *cmp, *wrk, *page; + + page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + if (!page) { + printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + return -ENOMEM; + } + + wrk = vmalloc(LZO1X_1_MEM_COMPRESS); + if (!wrk) { + printk(KERN_ERR "PM: Failed to allocate LZO workspace\n"); + free_page((unsigned long)page); + return -ENOMEM; + } + + unc = vmalloc(LZO_UNC_SIZE); + if (!unc) { + printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); + vfree(wrk); + free_page((unsigned long)page); + return -ENOMEM; + } + + cmp = vmalloc(LZO_CMP_SIZE); + if (!cmp) { + printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); + vfree(unc); + vfree(wrk); + free_page((unsigned long)page); + return -ENOMEM; + } + + printk(KERN_INFO + "PM: Compressing and saving image data (%u pages) ... ", + nr_to_write); + m = nr_to_write / 100; + if (!m) + m = 1; + nr_pages = 0; + bio = NULL; + do_gettimeofday(&start); + for (;;) { + for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) { + ret = snapshot_read_next(snapshot); + if (ret < 0) + goto out_finish; + + if (!ret) + break; + + memcpy(unc + off, data_of(*snapshot), PAGE_SIZE); + + if (!(nr_pages % m)) + printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + } + + if (!off) + break; + + unc_len = off; + ret = lzo1x_1_compress(unc, unc_len, + cmp + LZO_HEADER, &cmp_len, wrk); + if (ret < 0) { + printk(KERN_ERR "PM: LZO compression failed\n"); + break; + } + + if (unlikely(!cmp_len || + cmp_len > lzo1x_worst_compress(unc_len))) { + printk(KERN_ERR "PM: Invalid LZO compressed length\n"); + ret = -1; + break; + } + + *(size_t *)cmp = cmp_len; + + /* + * Given we are writing one page at a time to disk, we copy + * that much from the buffer, although the last bit will likely + * be smaller than full page. This is OK - we saved the length + * of the compressed data, so any garbage at the end will be + * discarded when we read it. + */ + for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) { + memcpy(page, cmp + off, PAGE_SIZE); + + ret = swap_write_page(handle, page, &bio); + if (ret) + goto out_finish; + } + } + +out_finish: + err2 = hib_wait_on_bio_chain(&bio); + do_gettimeofday(&stop); + if (!ret) + ret = err2; + if (!ret) + printk(KERN_CONT "\b\b\b\bdone\n"); + else + printk(KERN_CONT "\n"); + swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); + + vfree(cmp); + vfree(unc); + vfree(wrk); + free_page((unsigned long)page); + + return ret; +} + +/** + * enough_swap - Make sure we have enough swap to save the image. + * + * Returns TRUE or FALSE after checking the total amount of swap + * space avaiable from the resume partition. + */ + +static int enough_swap(unsigned int nr_pages, unsigned int flags) +{ + unsigned int free_swap = count_swap_pages(root_swap, 1); + unsigned int required; + + pr_debug("PM: Free swap pages: %u\n", free_swap); + + required = PAGES_FOR_IO + ((flags & SF_NOCOMPRESS_MODE) ? + nr_pages : (nr_pages * LZO_CMP_PAGES) / LZO_UNC_PAGES + 1); + return free_swap > required; +} + +/** + * swsusp_write - Write entire image and metadata. + * @flags: flags to pass to the "boot" kernel in the image header + * + * It is important _NOT_ to umount filesystems at this point. We want + * them synced (in case something goes wrong) but we DO not want to mark + * filesystem clean: it is not. (And it does not matter, if we resume + * correctly, we'll mark system clean, anyway.) + */ + +int swsusp_write(unsigned int flags) +{ + struct swap_map_handle handle; + struct snapshot_handle snapshot; + struct swsusp_info *header; + unsigned long pages; + int error; + + pages = snapshot_get_image_size(); + error = get_swap_writer(&handle); + if (error) { + printk(KERN_ERR "PM: Cannot get swap writer\n"); + return error; + } + if (!enough_swap(pages, flags)) { + printk(KERN_ERR "PM: Not enough free swap\n"); + error = -ENOSPC; + goto out_finish; + } + memset(&snapshot, 0, sizeof(struct snapshot_handle)); + error = snapshot_read_next(&snapshot); + if (error < PAGE_SIZE) { + if (error >= 0) + error = -EFAULT; + + goto out_finish; + } + header = (struct swsusp_info *)data_of(snapshot); + error = swap_write_page(&handle, header, NULL); + if (!error) { + error = (flags & SF_NOCOMPRESS_MODE) ? + save_image(&handle, &snapshot, pages - 1) : + save_image_lzo(&handle, &snapshot, pages - 1); + } +out_finish: + error = swap_writer_finish(&handle, flags, error); + return error; +} + +/** + * The following functions allow us to read data using a swap map + * in a file-alike way + */ + +static void release_swap_reader(struct swap_map_handle *handle) +{ + if (handle->cur) + free_page((unsigned long)handle->cur); + handle->cur = NULL; +} + +static int get_swap_reader(struct swap_map_handle *handle, + unsigned int *flags_p) +{ + int error; + + *flags_p = swsusp_header->flags; + + if (!swsusp_header->image) /* how can this happen? */ + return -EINVAL; + + handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH); + if (!handle->cur) + return -ENOMEM; + + error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL); + if (error) { + release_swap_reader(handle); + return error; + } + handle->k = 0; + return 0; +} + +static int swap_read_page(struct swap_map_handle *handle, void *buf, + struct bio **bio_chain) +{ + sector_t offset; + int error; + + if (!handle->cur) + return -EINVAL; + offset = handle->cur->entries[handle->k]; + if (!offset) + return -EFAULT; + error = hib_bio_read_page(offset, buf, bio_chain); + if (error) + return error; + if (++handle->k >= MAP_PAGE_ENTRIES) { + error = hib_wait_on_bio_chain(bio_chain); + handle->k = 0; + offset = handle->cur->next_swap; + if (!offset) + release_swap_reader(handle); + else if (!error) + error = hib_bio_read_page(offset, handle->cur, NULL); + } + return error; +} + +static int swap_reader_finish(struct swap_map_handle *handle) +{ + release_swap_reader(handle); + + return 0; +} + +/** + * load_image - load the image using the swap map handle + * @handle and the snapshot handle @snapshot + * (assume there are @nr_pages pages to load) + */ + +static int load_image(struct swap_map_handle *handle, + struct snapshot_handle *snapshot, + unsigned int nr_to_read) +{ + unsigned int m; + int error = 0; + struct timeval start; + struct timeval stop; + struct bio *bio; + int err2; + unsigned nr_pages; + + printk(KERN_INFO "PM: Loading image data pages (%u pages) ... ", + nr_to_read); + m = nr_to_read / 100; + if (!m) + m = 1; + nr_pages = 0; + bio = NULL; + do_gettimeofday(&start); + for ( ; ; ) { + error = snapshot_write_next(snapshot); + if (error <= 0) + break; + error = swap_read_page(handle, data_of(*snapshot), &bio); + if (error) + break; + if (snapshot->sync_read) + error = hib_wait_on_bio_chain(&bio); + if (error) + break; + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + } + err2 = hib_wait_on_bio_chain(&bio); + do_gettimeofday(&stop); + if (!error) + error = err2; + if (!error) { + printk("\b\b\b\bdone\n"); + snapshot_write_finalize(snapshot); + if (!snapshot_image_loaded(snapshot)) + error = -ENODATA; + } else + printk("\n"); + swsusp_show_speed(&start, &stop, nr_to_read, "Read"); + return error; +} + +/** + * load_image_lzo - Load compressed image data and decompress them with LZO. + * @handle: Swap map handle to use for loading data. + * @snapshot: Image to copy uncompressed data into. + * @nr_to_read: Number of pages to load. + */ +static int load_image_lzo(struct swap_map_handle *handle, + struct snapshot_handle *snapshot, + unsigned int nr_to_read) +{ + unsigned int m; + int error = 0; + struct bio *bio; + struct timeval start; + struct timeval stop; + unsigned nr_pages; + size_t i, off, unc_len, cmp_len; + unsigned char *unc, *cmp, *page[LZO_CMP_PAGES]; + + for (i = 0; i < LZO_CMP_PAGES; i++) { + page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + if (!page[i]) { + printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + + while (i) + free_page((unsigned long)page[--i]); + + return -ENOMEM; + } + } + + unc = vmalloc(LZO_UNC_SIZE); + if (!unc) { + printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); + + for (i = 0; i < LZO_CMP_PAGES; i++) + free_page((unsigned long)page[i]); + + return -ENOMEM; + } + + cmp = vmalloc(LZO_CMP_SIZE); + if (!cmp) { + printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); + + vfree(unc); + for (i = 0; i < LZO_CMP_PAGES; i++) + free_page((unsigned long)page[i]); + + return -ENOMEM; + } + + printk(KERN_INFO + "PM: Loading and decompressing image data (%u pages) ... ", + nr_to_read); + m = nr_to_read / 100; + if (!m) + m = 1; + nr_pages = 0; + bio = NULL; + do_gettimeofday(&start); + + error = snapshot_write_next(snapshot); + if (error <= 0) + goto out_finish; + + for (;;) { + error = swap_read_page(handle, page[0], NULL); /* sync */ + if (error) + break; + + cmp_len = *(size_t *)page[0]; + if (unlikely(!cmp_len || + cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) { + printk(KERN_ERR "PM: Invalid LZO compressed length\n"); + error = -1; + break; + } + + for (off = PAGE_SIZE, i = 1; + off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { + error = swap_read_page(handle, page[i], &bio); + if (error) + goto out_finish; + } + + error = hib_wait_on_bio_chain(&bio); /* need all data now */ + if (error) + goto out_finish; + + for (off = 0, i = 0; + off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { + memcpy(cmp + off, page[i], PAGE_SIZE); + } + + unc_len = LZO_UNC_SIZE; + error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len, + unc, &unc_len); + if (error < 0) { + printk(KERN_ERR "PM: LZO decompression failed\n"); + break; + } + + if (unlikely(!unc_len || + unc_len > LZO_UNC_SIZE || + unc_len & (PAGE_SIZE - 1))) { + printk(KERN_ERR "PM: Invalid LZO uncompressed length\n"); + error = -1; + break; + } + + for (off = 0; off < unc_len; off += PAGE_SIZE) { + memcpy(data_of(*snapshot), unc + off, PAGE_SIZE); + + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + + error = snapshot_write_next(snapshot); + if (error <= 0) + goto out_finish; + } + } + +out_finish: + do_gettimeofday(&stop); + if (!error) { + printk("\b\b\b\bdone\n"); + snapshot_write_finalize(snapshot); + if (!snapshot_image_loaded(snapshot)) + error = -ENODATA; + } else + printk("\n"); + swsusp_show_speed(&start, &stop, nr_to_read, "Read"); + + vfree(cmp); + vfree(unc); + for (i = 0; i < LZO_CMP_PAGES; i++) + free_page((unsigned long)page[i]); + + return error; +} + +/** + * swsusp_read - read the hibernation image. + * @flags_p: flags passed by the "frozen" kernel in the image header should + * be written into this memory location + */ + +int swsusp_read(unsigned int *flags_p) +{ + int error; + struct swap_map_handle handle; + struct snapshot_handle snapshot; + struct swsusp_info *header; + + memset(&snapshot, 0, sizeof(struct snapshot_handle)); + error = snapshot_write_next(&snapshot); + if (error < PAGE_SIZE) + return error < 0 ? error : -EFAULT; + header = (struct swsusp_info *)data_of(snapshot); + error = get_swap_reader(&handle, flags_p); + if (error) + goto end; + if (!error) + error = swap_read_page(&handle, header, NULL); + if (!error) { + error = (*flags_p & SF_NOCOMPRESS_MODE) ? + load_image(&handle, &snapshot, header->pages - 1) : + load_image_lzo(&handle, &snapshot, header->pages - 1); + } + swap_reader_finish(&handle); +end: + if (!error) + pr_debug("PM: Image successfully loaded\n"); + else + pr_debug("PM: Error %d resuming\n", error); + return error; +} + +/** + * swsusp_check - Check for swsusp signature in the resume device + */ + +int swsusp_check(void) +{ + int error; + + hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, + FMODE_READ, NULL); + if (!IS_ERR(hib_resume_bdev)) { + set_blocksize(hib_resume_bdev, PAGE_SIZE); + clear_page(swsusp_header); + error = hib_bio_read_page(swsusp_resume_block, + swsusp_header, NULL); + if (error) + goto put; + + if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { + memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); + /* Reset swap signature now */ + error = hib_bio_write_page(swsusp_resume_block, + swsusp_header, NULL); + } else { + error = -EINVAL; + } + +put: + if (error) + blkdev_put(hib_resume_bdev, FMODE_READ); + else + pr_debug("PM: Image signature found, resuming\n"); + } else { + error = PTR_ERR(hib_resume_bdev); + } + + if (error) + pr_debug("PM: Image not found (code %d)\n", error); + + return error; +} + +/** + * swsusp_close - close swap device. + */ + +void swsusp_close(fmode_t mode) +{ + if (IS_ERR(hib_resume_bdev)) { + pr_debug("PM: Image device not initialised\n"); + return; + } + + blkdev_put(hib_resume_bdev, mode); +} + +static int swsusp_header_init(void) +{ + swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL); + if (!swsusp_header) + panic("Could not allocate memory for swsusp_header\n"); + return 0; +} + +core_initcall(swsusp_header_init); diff --git a/kernel/power/user.c b/kernel/power/user.c new file mode 100644 index 00000000..42ddbc6f --- /dev/null +++ b/kernel/power/user.c @@ -0,0 +1,486 @@ +/* + * linux/kernel/power/user.c + * + * This file provides the user space interface for software suspend/resume. + * + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * This file is released under the GPLv2. + * + */ + +#include <linux/suspend.h> +#include <linux/syscalls.h> +#include <linux/reboot.h> +#include <linux/string.h> +#include <linux/device.h> +#include <linux/miscdevice.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/pm.h> +#include <linux/fs.h> +#include <linux/console.h> +#include <linux/cpu.h> +#include <linux/freezer.h> +#include <scsi/scsi_scan.h> + +#include <asm/uaccess.h> + +#include "power.h" + +/* + * NOTE: The SNAPSHOT_SET_SWAP_FILE and SNAPSHOT_PMOPS ioctls are obsolete and + * will be removed in the future. They are only preserved here for + * compatibility with existing userland utilities. + */ +#define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int) +#define SNAPSHOT_PMOPS _IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int) + +#define PMOPS_PREPARE 1 +#define PMOPS_ENTER 2 +#define PMOPS_FINISH 3 + +/* + * NOTE: The following ioctl definitions are wrong and have been replaced with + * correct ones. They are only preserved here for compatibility with existing + * userland utilities and will be removed in the future. + */ +#define SNAPSHOT_ATOMIC_SNAPSHOT _IOW(SNAPSHOT_IOC_MAGIC, 3, void *) +#define SNAPSHOT_SET_IMAGE_SIZE _IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long) +#define SNAPSHOT_AVAIL_SWAP _IOR(SNAPSHOT_IOC_MAGIC, 7, void *) +#define SNAPSHOT_GET_SWAP_PAGE _IOR(SNAPSHOT_IOC_MAGIC, 8, void *) + + +#define SNAPSHOT_MINOR 231 + +static struct snapshot_data { + struct snapshot_handle handle; + int swap; + int mode; + char frozen; + char ready; + char platform_support; +} snapshot_state; + +atomic_t snapshot_device_available = ATOMIC_INIT(1); + +static int snapshot_open(struct inode *inode, struct file *filp) +{ + struct snapshot_data *data; + int error; + + mutex_lock(&pm_mutex); + + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } + + if ((filp->f_flags & O_ACCMODE) == O_RDWR) { + atomic_inc(&snapshot_device_available); + error = -ENOSYS; + goto Unlock; + } + if(create_basic_memory_bitmaps()) { + atomic_inc(&snapshot_device_available); + error = -ENOMEM; + goto Unlock; + } + nonseekable_open(inode, filp); + data = &snapshot_state; + filp->private_data = data; + memset(&data->handle, 0, sizeof(struct snapshot_handle)); + if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { + /* Hibernating. The image device should be accessible. */ + data->swap = swsusp_resume_device ? + swap_type_of(swsusp_resume_device, 0, NULL) : -1; + data->mode = O_RDONLY; + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + if (error) + pm_notifier_call_chain(PM_POST_HIBERNATION); + } else { + /* + * Resuming. We may need to wait for the image device to + * appear. + */ + wait_for_device_probe(); + scsi_complete_async_scans(); + + data->swap = -1; + data->mode = O_WRONLY; + error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + if (error) + pm_notifier_call_chain(PM_POST_RESTORE); + } + if (error) { + free_basic_memory_bitmaps(); + atomic_inc(&snapshot_device_available); + } + data->frozen = 0; + data->ready = 0; + data->platform_support = 0; + + Unlock: + mutex_unlock(&pm_mutex); + + return error; +} + +static int snapshot_release(struct inode *inode, struct file *filp) +{ + struct snapshot_data *data; + + mutex_lock(&pm_mutex); + + swsusp_free(); + free_basic_memory_bitmaps(); + data = filp->private_data; + free_all_swap_pages(data->swap); + if (data->frozen) { + pm_restore_gfp_mask(); + thaw_processes(); + } + pm_notifier_call_chain(data->mode == O_RDONLY ? + PM_POST_HIBERNATION : PM_POST_RESTORE); + atomic_inc(&snapshot_device_available); + + mutex_unlock(&pm_mutex); + + return 0; +} + +static ssize_t snapshot_read(struct file *filp, char __user *buf, + size_t count, loff_t *offp) +{ + struct snapshot_data *data; + ssize_t res; + loff_t pg_offp = *offp & ~PAGE_MASK; + + mutex_lock(&pm_mutex); + + data = filp->private_data; + if (!data->ready) { + res = -ENODATA; + goto Unlock; + } + if (!pg_offp) { /* on page boundary? */ + res = snapshot_read_next(&data->handle); + if (res <= 0) + goto Unlock; + } else { + res = PAGE_SIZE - pg_offp; + } + + res = simple_read_from_buffer(buf, count, &pg_offp, + data_of(data->handle), res); + if (res > 0) + *offp += res; + + Unlock: + mutex_unlock(&pm_mutex); + + return res; +} + +static ssize_t snapshot_write(struct file *filp, const char __user *buf, + size_t count, loff_t *offp) +{ + struct snapshot_data *data; + ssize_t res; + loff_t pg_offp = *offp & ~PAGE_MASK; + + mutex_lock(&pm_mutex); + + data = filp->private_data; + + if (!pg_offp) { + res = snapshot_write_next(&data->handle); + if (res <= 0) + goto unlock; + } else { + res = PAGE_SIZE - pg_offp; + } + + res = simple_write_to_buffer(data_of(data->handle), res, &pg_offp, + buf, count); + if (res > 0) + *offp += res; +unlock: + mutex_unlock(&pm_mutex); + + return res; +} + +static void snapshot_deprecated_ioctl(unsigned int cmd) +{ + if (printk_ratelimit()) + printk(KERN_NOTICE "%pf: ioctl '%.8x' is deprecated and will " + "be removed soon, update your suspend-to-disk " + "utilities\n", + __builtin_return_address(0), cmd); +} + +static long snapshot_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + int error = 0; + struct snapshot_data *data; + loff_t size; + sector_t offset; + + if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC) + return -ENOTTY; + if (_IOC_NR(cmd) > SNAPSHOT_IOC_MAXNR) + return -ENOTTY; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!mutex_trylock(&pm_mutex)) + return -EBUSY; + + data = filp->private_data; + + switch (cmd) { + + case SNAPSHOT_FREEZE: + if (data->frozen) + break; + + printk("Syncing filesystems ... "); + sys_sync(); + printk("done.\n"); + + error = usermodehelper_disable(); + if (error) + break; + + error = freeze_processes(); + if (error) { + thaw_processes(); + usermodehelper_enable(); + } + if (!error) + data->frozen = 1; + break; + + case SNAPSHOT_UNFREEZE: + if (!data->frozen || data->ready) + break; + pm_restore_gfp_mask(); + thaw_processes(); + usermodehelper_enable(); + data->frozen = 0; + break; + + case SNAPSHOT_ATOMIC_SNAPSHOT: + snapshot_deprecated_ioctl(cmd); + case SNAPSHOT_CREATE_IMAGE: + if (data->mode != O_RDONLY || !data->frozen || data->ready) { + error = -EPERM; + break; + } + pm_restore_gfp_mask(); + error = hibernation_snapshot(data->platform_support); + if (!error) + error = put_user(in_suspend, (int __user *)arg); + if (!error) + data->ready = 1; + break; + + case SNAPSHOT_ATOMIC_RESTORE: + snapshot_write_finalize(&data->handle); + if (data->mode != O_WRONLY || !data->frozen || + !snapshot_image_loaded(&data->handle)) { + error = -EPERM; + break; + } + error = hibernation_restore(data->platform_support); + break; + + case SNAPSHOT_FREE: + swsusp_free(); + memset(&data->handle, 0, sizeof(struct snapshot_handle)); + data->ready = 0; + break; + + case SNAPSHOT_SET_IMAGE_SIZE: + snapshot_deprecated_ioctl(cmd); + case SNAPSHOT_PREF_IMAGE_SIZE: + image_size = arg; + break; + + case SNAPSHOT_GET_IMAGE_SIZE: + if (!data->ready) { + error = -ENODATA; + break; + } + size = snapshot_get_image_size(); + size <<= PAGE_SHIFT; + error = put_user(size, (loff_t __user *)arg); + break; + + case SNAPSHOT_AVAIL_SWAP: + snapshot_deprecated_ioctl(cmd); + case SNAPSHOT_AVAIL_SWAP_SIZE: + size = count_swap_pages(data->swap, 1); + size <<= PAGE_SHIFT; + error = put_user(size, (loff_t __user *)arg); + break; + + case SNAPSHOT_GET_SWAP_PAGE: + snapshot_deprecated_ioctl(cmd); + case SNAPSHOT_ALLOC_SWAP_PAGE: + if (data->swap < 0 || data->swap >= MAX_SWAPFILES) { + error = -ENODEV; + break; + } + offset = alloc_swapdev_block(data->swap); + if (offset) { + offset <<= PAGE_SHIFT; + error = put_user(offset, (loff_t __user *)arg); + } else { + error = -ENOSPC; + } + break; + + case SNAPSHOT_FREE_SWAP_PAGES: + if (data->swap < 0 || data->swap >= MAX_SWAPFILES) { + error = -ENODEV; + break; + } + free_all_swap_pages(data->swap); + break; + + case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */ + snapshot_deprecated_ioctl(cmd); + if (!swsusp_swap_in_use()) { + /* + * User space encodes device types as two-byte values, + * so we need to recode them + */ + if (old_decode_dev(arg)) { + data->swap = swap_type_of(old_decode_dev(arg), + 0, NULL); + if (data->swap < 0) + error = -ENODEV; + } else { + data->swap = -1; + error = -EINVAL; + } + } else { + error = -EPERM; + } + break; + + case SNAPSHOT_S2RAM: + if (!data->frozen) { + error = -EPERM; + break; + } + /* + * Tasks are frozen and the notifiers have been called with + * PM_HIBERNATION_PREPARE + */ + error = suspend_devices_and_enter(PM_SUSPEND_MEM); + data->ready = 0; + break; + + case SNAPSHOT_PLATFORM_SUPPORT: + data->platform_support = !!arg; + break; + + case SNAPSHOT_POWER_OFF: + if (data->platform_support) + error = hibernation_platform_enter(); + break; + + case SNAPSHOT_PMOPS: /* This ioctl is deprecated */ + snapshot_deprecated_ioctl(cmd); + error = -EINVAL; + + switch (arg) { + + case PMOPS_PREPARE: + data->platform_support = 1; + error = 0; + break; + + case PMOPS_ENTER: + if (data->platform_support) + error = hibernation_platform_enter(); + break; + + case PMOPS_FINISH: + if (data->platform_support) + error = 0; + break; + + default: + printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg); + + } + break; + + case SNAPSHOT_SET_SWAP_AREA: + if (swsusp_swap_in_use()) { + error = -EPERM; + } else { + struct resume_swap_area swap_area; + dev_t swdev; + + error = copy_from_user(&swap_area, (void __user *)arg, + sizeof(struct resume_swap_area)); + if (error) { + error = -EFAULT; + break; + } + + /* + * User space encodes device types as two-byte values, + * so we need to recode them + */ + swdev = new_decode_dev(swap_area.dev); + if (swdev) { + offset = swap_area.offset; + data->swap = swap_type_of(swdev, offset, NULL); + if (data->swap < 0) + error = -ENODEV; + } else { + data->swap = -1; + error = -EINVAL; + } + } + break; + + default: + error = -ENOTTY; + + } + + mutex_unlock(&pm_mutex); + + return error; +} + +static const struct file_operations snapshot_fops = { + .open = snapshot_open, + .release = snapshot_release, + .read = snapshot_read, + .write = snapshot_write, + .llseek = no_llseek, + .unlocked_ioctl = snapshot_ioctl, +}; + +static struct miscdevice snapshot_device = { + .minor = SNAPSHOT_MINOR, + .name = "snapshot", + .fops = &snapshot_fops, +}; + +static int __init snapshot_device_init(void) +{ + return misc_register(&snapshot_device); +}; + +device_initcall(snapshot_device_init); diff --git a/kernel/power/userwakelock.c b/kernel/power/userwakelock.c new file mode 100644 index 00000000..a28a8db4 --- /dev/null +++ b/kernel/power/userwakelock.c @@ -0,0 +1,219 @@ +/* kernel/power/userwakelock.c + * + * Copyright (C) 2005-2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/ctype.h> +#include <linux/module.h> +#include <linux/wakelock.h> +#include <linux/slab.h> + +#include "power.h" + +enum { + DEBUG_FAILURE = BIT(0), + DEBUG_ERROR = BIT(1), + DEBUG_NEW = BIT(2), + DEBUG_ACCESS = BIT(3), + DEBUG_LOOKUP = BIT(4), +}; +static int debug_mask = DEBUG_FAILURE; +module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP); + +static DEFINE_MUTEX(tree_lock); + +struct user_wake_lock { + struct rb_node node; + struct wake_lock wake_lock; + char name[0]; +}; +struct rb_root user_wake_locks; + +static struct user_wake_lock *lookup_wake_lock_name( + const char *buf, int allocate, long *timeoutptr) +{ + struct rb_node **p = &user_wake_locks.rb_node; + struct rb_node *parent = NULL; + struct user_wake_lock *l; + int diff; + u64 timeout; + int name_len; + const char *arg; + + /* Find length of lock name and start of optional timeout string */ + arg = buf; + while (*arg && !isspace(*arg)) + arg++; + name_len = arg - buf; + if (!name_len) + goto bad_arg; + while (isspace(*arg)) + arg++; + + /* Process timeout string */ + if (timeoutptr && *arg) { + timeout = simple_strtoull(arg, (char **)&arg, 0); + while (isspace(*arg)) + arg++; + if (*arg) + goto bad_arg; + /* convert timeout from nanoseconds to jiffies > 0 */ + timeout += (NSEC_PER_SEC / HZ) - 1; + do_div(timeout, (NSEC_PER_SEC / HZ)); + if (timeout <= 0) + timeout = 1; + *timeoutptr = timeout; + } else if (*arg) + goto bad_arg; + else if (timeoutptr) + *timeoutptr = 0; + + /* Lookup wake lock in rbtree */ + while (*p) { + parent = *p; + l = rb_entry(parent, struct user_wake_lock, node); + diff = strncmp(buf, l->name, name_len); + if (!diff && l->name[name_len]) + diff = -1; + if (debug_mask & DEBUG_ERROR) + pr_info("lookup_wake_lock_name: compare %.*s %s %d\n", + name_len, buf, l->name, diff); + + if (diff < 0) + p = &(*p)->rb_left; + else if (diff > 0) + p = &(*p)->rb_right; + else + return l; + } + + /* Allocate and add new wakelock to rbtree */ + if (!allocate) { + if (debug_mask & DEBUG_ERROR) + pr_info("lookup_wake_lock_name: %.*s not found\n", + name_len, buf); + return ERR_PTR(-EINVAL); + } + l = kzalloc(sizeof(*l) + name_len + 1, GFP_KERNEL); + if (l == NULL) { + if (debug_mask & DEBUG_FAILURE) + pr_err("lookup_wake_lock_name: failed to allocate " + "memory for %.*s\n", name_len, buf); + return ERR_PTR(-ENOMEM); + } + memcpy(l->name, buf, name_len); + if (debug_mask & DEBUG_NEW) + pr_info("lookup_wake_lock_name: new wake lock %s\n", l->name); + wake_lock_init(&l->wake_lock, WAKE_LOCK_SUSPEND, l->name); + rb_link_node(&l->node, parent, p); + rb_insert_color(&l->node, &user_wake_locks); + return l; + +bad_arg: + if (debug_mask & DEBUG_ERROR) + pr_info("lookup_wake_lock_name: wake lock, %.*s, bad arg, %s\n", + name_len, buf, arg); + return ERR_PTR(-EINVAL); +} + +ssize_t wake_lock_show( + struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + char *s = buf; + char *end = buf + PAGE_SIZE; + struct rb_node *n; + struct user_wake_lock *l; + + mutex_lock(&tree_lock); + + for (n = rb_first(&user_wake_locks); n != NULL; n = rb_next(n)) { + l = rb_entry(n, struct user_wake_lock, node); + if (wake_lock_active(&l->wake_lock)) + s += scnprintf(s, end - s, "%s ", l->name); + } + s += scnprintf(s, end - s, "\n"); + + mutex_unlock(&tree_lock); + return (s - buf); +} + +ssize_t wake_lock_store( + struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + long timeout; + struct user_wake_lock *l; + + mutex_lock(&tree_lock); + l = lookup_wake_lock_name(buf, 1, &timeout); + if (IS_ERR(l)) { + n = PTR_ERR(l); + goto bad_name; + } + + if (debug_mask & DEBUG_ACCESS) + pr_info("wake_lock_store: %s, timeout %ld\n", l->name, timeout); + + if (timeout) + wake_lock_timeout(&l->wake_lock, timeout); + else + wake_lock(&l->wake_lock); +bad_name: + mutex_unlock(&tree_lock); + return n; +} + + +ssize_t wake_unlock_show( + struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + char *s = buf; + char *end = buf + PAGE_SIZE; + struct rb_node *n; + struct user_wake_lock *l; + + mutex_lock(&tree_lock); + + for (n = rb_first(&user_wake_locks); n != NULL; n = rb_next(n)) { + l = rb_entry(n, struct user_wake_lock, node); + if (!wake_lock_active(&l->wake_lock)) + s += scnprintf(s, end - s, "%s ", l->name); + } + s += scnprintf(s, end - s, "\n"); + + mutex_unlock(&tree_lock); + return (s - buf); +} + +ssize_t wake_unlock_store( + struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + struct user_wake_lock *l; + + mutex_lock(&tree_lock); + l = lookup_wake_lock_name(buf, 0, NULL); + if (IS_ERR(l)) { + n = PTR_ERR(l); + goto not_found; + } + + if (debug_mask & DEBUG_ACCESS) + pr_info("wake_unlock_store: %s\n", l->name); + + wake_unlock(&l->wake_lock); +not_found: + mutex_unlock(&tree_lock); + return n; +} + diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c new file mode 100644 index 00000000..81e1b7c6 --- /dev/null +++ b/kernel/power/wakelock.c @@ -0,0 +1,634 @@ +/* kernel/power/wakelock.c + * + * Copyright (C) 2005-2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/rtc.h> +#include <linux/suspend.h> +#include <linux/syscalls.h> /* sys_sync */ +#include <linux/wakelock.h> +#ifdef CONFIG_WAKELOCK_STAT +#include <linux/proc_fs.h> +#endif +#include "power.h" + +enum { + DEBUG_EXIT_SUSPEND = 1U << 0, + DEBUG_WAKEUP = 1U << 1, + DEBUG_SUSPEND = 1U << 2, + DEBUG_EXPIRE = 1U << 3, + DEBUG_WAKE_LOCK = 1U << 4, +}; +static int debug_mask = DEBUG_EXIT_SUSPEND | DEBUG_WAKEUP; +module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP); + +#define WAKE_LOCK_TYPE_MASK (0x0f) +#define WAKE_LOCK_INITIALIZED (1U << 8) +#define WAKE_LOCK_ACTIVE (1U << 9) +#define WAKE_LOCK_AUTO_EXPIRE (1U << 10) +#define WAKE_LOCK_PREVENTING_SUSPEND (1U << 11) + +static DEFINE_SPINLOCK(list_lock); +static LIST_HEAD(inactive_locks); +static struct list_head active_wake_locks[WAKE_LOCK_TYPE_COUNT]; +static int current_event_num; +struct workqueue_struct *suspend_work_queue; +struct wake_lock main_wake_lock; +suspend_state_t requested_suspend_state = PM_SUSPEND_MEM; +static struct wake_lock unknown_wakeup; +static struct wake_lock suspend_backoff_lock; + +#define SUSPEND_BACKOFF_THRESHOLD 10 +#define SUSPEND_BACKOFF_INTERVAL 10000 + +static unsigned suspend_short_count; + +#ifdef CONFIG_WAKELOCK_STAT +static struct wake_lock deleted_wake_locks; +static ktime_t last_sleep_time_update; +static int wait_for_wakeup; + +int get_expired_time(struct wake_lock *lock, ktime_t *expire_time) +{ + struct timespec ts; + struct timespec kt; + struct timespec tomono; + struct timespec delta; + struct timespec sleep; + long timeout; + + if (!(lock->flags & WAKE_LOCK_AUTO_EXPIRE)) + return 0; + get_xtime_and_monotonic_and_sleep_offset(&kt, &tomono, &sleep); + timeout = lock->expires - jiffies; + if (timeout > 0) + return 0; + jiffies_to_timespec(-timeout, &delta); + set_normalized_timespec(&ts, kt.tv_sec + tomono.tv_sec - delta.tv_sec, + kt.tv_nsec + tomono.tv_nsec - delta.tv_nsec); + *expire_time = timespec_to_ktime(ts); + return 1; +} + + +static int print_lock_stat(struct seq_file *m, struct wake_lock *lock) +{ + int lock_count = lock->stat.count; + int expire_count = lock->stat.expire_count; + ktime_t active_time = ktime_set(0, 0); + ktime_t total_time = lock->stat.total_time; + ktime_t max_time = lock->stat.max_time; + + ktime_t prevent_suspend_time = lock->stat.prevent_suspend_time; + if (lock->flags & WAKE_LOCK_ACTIVE) { + ktime_t now, add_time; + int expired = get_expired_time(lock, &now); + if (!expired) + now = ktime_get(); + add_time = ktime_sub(now, lock->stat.last_time); + lock_count++; + if (!expired) + active_time = add_time; + else + expire_count++; + total_time = ktime_add(total_time, add_time); + if (lock->flags & WAKE_LOCK_PREVENTING_SUSPEND) + prevent_suspend_time = ktime_add(prevent_suspend_time, + ktime_sub(now, last_sleep_time_update)); + if (add_time.tv64 > max_time.tv64) + max_time = add_time; + } + + return seq_printf(m, + "\"%s\"\t%d\t%d\t%d\t%lld\t%lld\t%lld\t%lld\t%lld\n", + lock->name, lock_count, expire_count, + lock->stat.wakeup_count, ktime_to_ns(active_time), + ktime_to_ns(total_time), + ktime_to_ns(prevent_suspend_time), ktime_to_ns(max_time), + ktime_to_ns(lock->stat.last_time)); +} + +static int wakelock_stats_show(struct seq_file *m, void *unused) +{ + unsigned long irqflags; + struct wake_lock *lock; + int ret; + int type; + + spin_lock_irqsave(&list_lock, irqflags); + + ret = seq_puts(m, "name\tcount\texpire_count\twake_count\tactive_since" + "\ttotal_time\tsleep_time\tmax_time\tlast_change\n"); + list_for_each_entry(lock, &inactive_locks, link) + ret = print_lock_stat(m, lock); + for (type = 0; type < WAKE_LOCK_TYPE_COUNT; type++) { + list_for_each_entry(lock, &active_wake_locks[type], link) + ret = print_lock_stat(m, lock); + } + spin_unlock_irqrestore(&list_lock, irqflags); + return 0; +} + +static void wake_unlock_stat_locked(struct wake_lock *lock, int expired) +{ + ktime_t duration; + ktime_t now; + if (!(lock->flags & WAKE_LOCK_ACTIVE)) + return; + if (get_expired_time(lock, &now)) + expired = 1; + else + now = ktime_get(); + lock->stat.count++; + if (expired) + lock->stat.expire_count++; + duration = ktime_sub(now, lock->stat.last_time); + lock->stat.total_time = ktime_add(lock->stat.total_time, duration); + if (ktime_to_ns(duration) > ktime_to_ns(lock->stat.max_time)) + lock->stat.max_time = duration; + lock->stat.last_time = ktime_get(); + if (lock->flags & WAKE_LOCK_PREVENTING_SUSPEND) { + duration = ktime_sub(now, last_sleep_time_update); + lock->stat.prevent_suspend_time = ktime_add( + lock->stat.prevent_suspend_time, duration); + lock->flags &= ~WAKE_LOCK_PREVENTING_SUSPEND; + } +} + +static void update_sleep_wait_stats_locked(int done) +{ + struct wake_lock *lock; + ktime_t now, etime, elapsed, add; + int expired; + + now = ktime_get(); + elapsed = ktime_sub(now, last_sleep_time_update); + list_for_each_entry(lock, &active_wake_locks[WAKE_LOCK_SUSPEND], link) { + expired = get_expired_time(lock, &etime); + if (lock->flags & WAKE_LOCK_PREVENTING_SUSPEND) { + if (expired) + add = ktime_sub(etime, last_sleep_time_update); + else + add = elapsed; + lock->stat.prevent_suspend_time = ktime_add( + lock->stat.prevent_suspend_time, add); + } + if (done || expired) + lock->flags &= ~WAKE_LOCK_PREVENTING_SUSPEND; + else + lock->flags |= WAKE_LOCK_PREVENTING_SUSPEND; + } + last_sleep_time_update = now; +} +#endif + + +static void expire_wake_lock(struct wake_lock *lock) +{ +#ifdef CONFIG_WAKELOCK_STAT + wake_unlock_stat_locked(lock, 1); +#endif + lock->flags &= ~(WAKE_LOCK_ACTIVE | WAKE_LOCK_AUTO_EXPIRE); + list_del(&lock->link); + list_add(&lock->link, &inactive_locks); + if (debug_mask & (DEBUG_WAKE_LOCK | DEBUG_EXPIRE)) + pr_info("expired wake lock %s\n", lock->name); +} + +/* Caller must acquire the list_lock spinlock */ +static void print_active_locks(int type) +{ + struct wake_lock *lock; + bool print_expired = true; + + BUG_ON(type >= WAKE_LOCK_TYPE_COUNT); + list_for_each_entry(lock, &active_wake_locks[type], link) { + if (lock->flags & WAKE_LOCK_AUTO_EXPIRE) { + long timeout = lock->expires - jiffies; + if (timeout > 0) + pr_info("active wake lock %s, time left %ld\n", + lock->name, timeout); + else if (print_expired) + pr_info("wake lock %s, expired\n", lock->name); + } else { + pr_info("active wake lock %s\n", lock->name); + if (!(debug_mask & DEBUG_EXPIRE)) + print_expired = false; + } + } +} + +static long has_wake_lock_locked(int type) +{ + struct wake_lock *lock, *n; + long max_timeout = 0; + + BUG_ON(type >= WAKE_LOCK_TYPE_COUNT); + list_for_each_entry_safe(lock, n, &active_wake_locks[type], link) { + if (lock->flags & WAKE_LOCK_AUTO_EXPIRE) { + long timeout = lock->expires - jiffies; + if (timeout <= 0) + expire_wake_lock(lock); + else if (timeout > max_timeout) + max_timeout = timeout; + } else + return -1; + } + return max_timeout; +} + +long has_wake_lock(int type) +{ + long ret; + unsigned long irqflags; + spin_lock_irqsave(&list_lock, irqflags); + ret = has_wake_lock_locked(type); + if (ret && (debug_mask & DEBUG_WAKEUP) && type == WAKE_LOCK_SUSPEND) + print_active_locks(type); + spin_unlock_irqrestore(&list_lock, irqflags); + return ret; +} + +static void suspend_backoff(void) +{ + pr_info("suspend: too many immediate wakeups, back off\n"); + wake_lock_timeout(&suspend_backoff_lock, + msecs_to_jiffies(SUSPEND_BACKOFF_INTERVAL)); +} + +static void suspend(struct work_struct *work) +{ + int ret; + int entry_event_num; + struct timespec ts_entry, ts_exit; + + if (has_wake_lock(WAKE_LOCK_SUSPEND)) { + if (debug_mask & DEBUG_SUSPEND) + pr_info("suspend: abort suspend\n"); + return; + } + + entry_event_num = current_event_num; + sys_sync(); + if (debug_mask & DEBUG_SUSPEND) + pr_info("suspend: enter suspend\n"); + getnstimeofday(&ts_entry); + ret = pm_suspend(requested_suspend_state); + getnstimeofday(&ts_exit); + + if (debug_mask & DEBUG_EXIT_SUSPEND) { + struct rtc_time tm; + rtc_time_to_tm(ts_exit.tv_sec, &tm); + pr_info("suspend: exit suspend, ret = %d " + "(%d-%02d-%02d %02d:%02d:%02d.%09lu UTC)\n", ret, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, ts_exit.tv_nsec); + } + + if (ts_exit.tv_sec - ts_entry.tv_sec <= 1) { + ++suspend_short_count; + + if (suspend_short_count == SUSPEND_BACKOFF_THRESHOLD) { + suspend_backoff(); + suspend_short_count = 0; + } + } else { + suspend_short_count = 0; + } + + if (current_event_num == entry_event_num) { + if (debug_mask & DEBUG_SUSPEND) + pr_info("suspend: pm_suspend returned with no event\n"); + wake_lock_timeout(&unknown_wakeup, HZ / 2); + } +} +static DECLARE_WORK(suspend_work, suspend); + +static void expire_wake_locks(unsigned long data) +{ + long has_lock; + unsigned long irqflags; + if (debug_mask & DEBUG_EXPIRE) + pr_info("expire_wake_locks: start\n"); + spin_lock_irqsave(&list_lock, irqflags); + if (debug_mask & DEBUG_SUSPEND) + print_active_locks(WAKE_LOCK_SUSPEND); + has_lock = has_wake_lock_locked(WAKE_LOCK_SUSPEND); + if (debug_mask & DEBUG_EXPIRE) + pr_info("expire_wake_locks: done, has_lock %ld\n", has_lock); + if (has_lock == 0) + queue_work(suspend_work_queue, &suspend_work); + spin_unlock_irqrestore(&list_lock, irqflags); +} +static DEFINE_TIMER(expire_timer, expire_wake_locks, 0, 0); + +static int power_suspend_late(struct device *dev) +{ + int ret = has_wake_lock(WAKE_LOCK_SUSPEND) ? -EAGAIN : 0; +#ifdef CONFIG_WAKELOCK_STAT + wait_for_wakeup = !ret; +#endif + if (debug_mask & DEBUG_SUSPEND) + pr_info("power_suspend_late return %d\n", ret); + return ret; +} + +static struct dev_pm_ops power_driver_pm_ops = { + .suspend_noirq = power_suspend_late, +}; + +static struct platform_driver power_driver = { + .driver.name = "power", + .driver.pm = &power_driver_pm_ops, +}; +static struct platform_device power_device = { + .name = "power", +}; + +void wake_lock_init(struct wake_lock *lock, int type, const char *name) +{ + unsigned long irqflags = 0; + + if (name) + lock->name = name; + BUG_ON(!lock->name); + + if (debug_mask & DEBUG_WAKE_LOCK) + pr_info("wake_lock_init name=%s\n", lock->name); +#ifdef CONFIG_WAKELOCK_STAT + lock->stat.count = 0; + lock->stat.expire_count = 0; + lock->stat.wakeup_count = 0; + lock->stat.total_time = ktime_set(0, 0); + lock->stat.prevent_suspend_time = ktime_set(0, 0); + lock->stat.max_time = ktime_set(0, 0); + lock->stat.last_time = ktime_set(0, 0); +#endif + lock->flags = (type & WAKE_LOCK_TYPE_MASK) | WAKE_LOCK_INITIALIZED; + + INIT_LIST_HEAD(&lock->link); + spin_lock_irqsave(&list_lock, irqflags); + list_add(&lock->link, &inactive_locks); + spin_unlock_irqrestore(&list_lock, irqflags); +} +EXPORT_SYMBOL(wake_lock_init); + +void wake_lock_destroy(struct wake_lock *lock) +{ + unsigned long irqflags; + if (debug_mask & DEBUG_WAKE_LOCK) + pr_info("wake_lock_destroy name=%s\n", lock->name); + spin_lock_irqsave(&list_lock, irqflags); + lock->flags &= ~WAKE_LOCK_INITIALIZED; +#ifdef CONFIG_WAKELOCK_STAT + if (lock->stat.count) { + deleted_wake_locks.stat.count += lock->stat.count; + deleted_wake_locks.stat.expire_count += lock->stat.expire_count; + deleted_wake_locks.stat.total_time = + ktime_add(deleted_wake_locks.stat.total_time, + lock->stat.total_time); + deleted_wake_locks.stat.prevent_suspend_time = + ktime_add(deleted_wake_locks.stat.prevent_suspend_time, + lock->stat.prevent_suspend_time); + deleted_wake_locks.stat.max_time = + ktime_add(deleted_wake_locks.stat.max_time, + lock->stat.max_time); + } +#endif + list_del(&lock->link); + spin_unlock_irqrestore(&list_lock, irqflags); +} +EXPORT_SYMBOL(wake_lock_destroy); + +static void wake_lock_internal( + struct wake_lock *lock, long timeout, int has_timeout) +{ + int type; + unsigned long irqflags; + long expire_in; + + spin_lock_irqsave(&list_lock, irqflags); + type = lock->flags & WAKE_LOCK_TYPE_MASK; + BUG_ON(type >= WAKE_LOCK_TYPE_COUNT); + BUG_ON(!(lock->flags & WAKE_LOCK_INITIALIZED)); +#ifdef CONFIG_WAKELOCK_STAT + if (type == WAKE_LOCK_SUSPEND && wait_for_wakeup) { + if (debug_mask & DEBUG_WAKEUP) + pr_info("wakeup wake lock: %s\n", lock->name); + wait_for_wakeup = 0; + lock->stat.wakeup_count++; + } + if ((lock->flags & WAKE_LOCK_AUTO_EXPIRE) && + (long)(lock->expires - jiffies) <= 0) { + wake_unlock_stat_locked(lock, 0); + lock->stat.last_time = ktime_get(); + } +#endif + if (!(lock->flags & WAKE_LOCK_ACTIVE)) { + lock->flags |= WAKE_LOCK_ACTIVE; +#ifdef CONFIG_WAKELOCK_STAT + lock->stat.last_time = ktime_get(); +#endif + } + list_del(&lock->link); + if (has_timeout) { + if (debug_mask & DEBUG_WAKE_LOCK) + pr_info("wake_lock: %s, type %d, timeout %ld.%03lu\n", + lock->name, type, timeout / HZ, + (timeout % HZ) * MSEC_PER_SEC / HZ); + lock->expires = jiffies + timeout; + lock->flags |= WAKE_LOCK_AUTO_EXPIRE; + list_add_tail(&lock->link, &active_wake_locks[type]); + } else { + if (debug_mask & DEBUG_WAKE_LOCK) + pr_info("wake_lock: %s, type %d\n", lock->name, type); + lock->expires = LONG_MAX; + lock->flags &= ~WAKE_LOCK_AUTO_EXPIRE; + list_add(&lock->link, &active_wake_locks[type]); + } + if (type == WAKE_LOCK_SUSPEND) { + current_event_num++; +#ifdef CONFIG_WAKELOCK_STAT + if (lock == &main_wake_lock) + update_sleep_wait_stats_locked(1); + else if (!wake_lock_active(&main_wake_lock)) + update_sleep_wait_stats_locked(0); +#endif + if (has_timeout) + expire_in = has_wake_lock_locked(type); + else + expire_in = -1; + if (expire_in > 0) { + if (debug_mask & DEBUG_EXPIRE) + pr_info("wake_lock: %s, start expire timer, " + "%ld\n", lock->name, expire_in); + mod_timer(&expire_timer, jiffies + expire_in); + } else { + if (del_timer(&expire_timer)) + if (debug_mask & DEBUG_EXPIRE) + pr_info("wake_lock: %s, stop expire timer\n", + lock->name); + if (expire_in == 0) + queue_work(suspend_work_queue, &suspend_work); + } + } + spin_unlock_irqrestore(&list_lock, irqflags); +} + +void wake_lock(struct wake_lock *lock) +{ + wake_lock_internal(lock, 0, 0); +} +EXPORT_SYMBOL(wake_lock); + +void wake_lock_timeout(struct wake_lock *lock, long timeout) +{ + wake_lock_internal(lock, timeout, 1); +} +EXPORT_SYMBOL(wake_lock_timeout); + +void wake_unlock(struct wake_lock *lock) +{ + int type; + unsigned long irqflags; + spin_lock_irqsave(&list_lock, irqflags); + type = lock->flags & WAKE_LOCK_TYPE_MASK; +#ifdef CONFIG_WAKELOCK_STAT + wake_unlock_stat_locked(lock, 0); +#endif + if (debug_mask & DEBUG_WAKE_LOCK) + pr_info("wake_unlock: %s\n", lock->name); + lock->flags &= ~(WAKE_LOCK_ACTIVE | WAKE_LOCK_AUTO_EXPIRE); + list_del(&lock->link); + list_add(&lock->link, &inactive_locks); + if (type == WAKE_LOCK_SUSPEND) { + long has_lock = has_wake_lock_locked(type); + if (has_lock > 0) { + if (debug_mask & DEBUG_EXPIRE) + pr_info("wake_unlock: %s, start expire timer, " + "%ld\n", lock->name, has_lock); + mod_timer(&expire_timer, jiffies + has_lock); + } else { + if (del_timer(&expire_timer)) + if (debug_mask & DEBUG_EXPIRE) + pr_info("wake_unlock: %s, stop expire " + "timer\n", lock->name); + if (has_lock == 0) + queue_work(suspend_work_queue, &suspend_work); + } + if (lock == &main_wake_lock) { + if (debug_mask & DEBUG_SUSPEND) + print_active_locks(WAKE_LOCK_SUSPEND); +#ifdef CONFIG_WAKELOCK_STAT + update_sleep_wait_stats_locked(0); +#endif + } + } + spin_unlock_irqrestore(&list_lock, irqflags); +} +EXPORT_SYMBOL(wake_unlock); + +int wake_lock_active(struct wake_lock *lock) +{ + return !!(lock->flags & WAKE_LOCK_ACTIVE); +} +EXPORT_SYMBOL(wake_lock_active); + +static int wakelock_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, wakelock_stats_show, NULL); +} + +static const struct file_operations wakelock_stats_fops = { + .owner = THIS_MODULE, + .open = wakelock_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init wakelocks_init(void) +{ + int ret; + int i; + + for (i = 0; i < ARRAY_SIZE(active_wake_locks); i++) + INIT_LIST_HEAD(&active_wake_locks[i]); + +#ifdef CONFIG_WAKELOCK_STAT + wake_lock_init(&deleted_wake_locks, WAKE_LOCK_SUSPEND, + "deleted_wake_locks"); +#endif + wake_lock_init(&main_wake_lock, WAKE_LOCK_SUSPEND, "main"); + wake_lock(&main_wake_lock); + wake_lock_init(&unknown_wakeup, WAKE_LOCK_SUSPEND, "unknown_wakeups"); + wake_lock_init(&suspend_backoff_lock, WAKE_LOCK_SUSPEND, + "suspend_backoff"); + + ret = platform_device_register(&power_device); + if (ret) { + pr_err("wakelocks_init: platform_device_register failed\n"); + goto err_platform_device_register; + } + ret = platform_driver_register(&power_driver); + if (ret) { + pr_err("wakelocks_init: platform_driver_register failed\n"); + goto err_platform_driver_register; + } + + suspend_work_queue = create_singlethread_workqueue("suspend"); + if (suspend_work_queue == NULL) { + ret = -ENOMEM; + goto err_suspend_work_queue; + } + +#ifdef CONFIG_WAKELOCK_STAT + proc_create("wakelocks", S_IRUGO, NULL, &wakelock_stats_fops); +#endif + + return 0; + +err_suspend_work_queue: + platform_driver_unregister(&power_driver); +err_platform_driver_register: + platform_device_unregister(&power_device); +err_platform_device_register: + wake_lock_destroy(&suspend_backoff_lock); + wake_lock_destroy(&unknown_wakeup); + wake_lock_destroy(&main_wake_lock); +#ifdef CONFIG_WAKELOCK_STAT + wake_lock_destroy(&deleted_wake_locks); +#endif + return ret; +} + +static void __exit wakelocks_exit(void) +{ +#ifdef CONFIG_WAKELOCK_STAT + remove_proc_entry("wakelocks", NULL); +#endif + destroy_workqueue(suspend_work_queue); + platform_driver_unregister(&power_driver); + platform_device_unregister(&power_device); + wake_lock_destroy(&suspend_backoff_lock); + wake_lock_destroy(&unknown_wakeup); + wake_lock_destroy(&main_wake_lock); +#ifdef CONFIG_WAKELOCK_STAT + wake_lock_destroy(&deleted_wake_locks); +#endif +} + +core_initcall(wakelocks_init); +module_exit(wakelocks_exit); |