aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/cpufreq
diff options
context:
space:
mode:
authorroot <root@artemis.panaceas.org>2015-12-25 04:40:36 +0000
committerroot <root@artemis.panaceas.org>2015-12-25 04:40:36 +0000
commit849369d6c66d3054688672f97d31fceb8e8230fb (patch)
tree6135abc790ca67dedbe07c39806591e70eda81ce /drivers/cpufreq
downloadlinux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.gz
linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.bz2
linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.zip
initial_commit
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r--drivers/cpufreq/Kconfig215
-rw-r--r--drivers/cpufreq/Kconfig.x86255
-rw-r--r--drivers/cpufreq/Makefile44
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c773
-rw-r--r--drivers/cpufreq/cpufreq-nforce2.c444
-rw-r--r--drivers/cpufreq/cpufreq.c1897
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c630
-rw-r--r--drivers/cpufreq/cpufreq_interactive.c832
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c759
-rw-r--r--drivers/cpufreq/cpufreq_performance.c63
-rw-r--r--drivers/cpufreq/cpufreq_powersave.c65
-rw-r--r--drivers/cpufreq/cpufreq_stats.c427
-rw-r--r--drivers/cpufreq/cpufreq_userspace.c220
-rw-r--r--drivers/cpufreq/db8500-cpufreq.c169
-rw-r--r--drivers/cpufreq/e_powersaver.c367
-rw-r--r--drivers/cpufreq/elanfreq.c309
-rw-r--r--drivers/cpufreq/freq_table.c236
-rw-r--r--drivers/cpufreq/gx-suspmod.c514
-rw-r--r--drivers/cpufreq/longhaul.c1024
-rw-r--r--drivers/cpufreq/longhaul.h353
-rw-r--r--drivers/cpufreq/longrun.c324
-rw-r--r--drivers/cpufreq/mperf.c51
-rw-r--r--drivers/cpufreq/mperf.h9
-rw-r--r--drivers/cpufreq/p4-clockmod.c329
-rw-r--r--drivers/cpufreq/pcc-cpufreq.c624
-rw-r--r--drivers/cpufreq/powernow-k6.c261
-rw-r--r--drivers/cpufreq/powernow-k7.c747
-rw-r--r--drivers/cpufreq/powernow-k7.h43
-rw-r--r--drivers/cpufreq/powernow-k8.c1619
-rw-r--r--drivers/cpufreq/powernow-k8.h222
-rw-r--r--drivers/cpufreq/sc520_freq.c192
-rw-r--r--drivers/cpufreq/speedstep-centrino.c633
-rw-r--r--drivers/cpufreq/speedstep-ich.c448
-rw-r--r--drivers/cpufreq/speedstep-lib.c478
-rw-r--r--drivers/cpufreq/speedstep-lib.h49
-rw-r--r--drivers/cpufreq/speedstep-smi.c464
36 files changed, 16089 insertions, 0 deletions
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
new file mode 100644
index 00000000..19470885
--- /dev/null
+++ b/drivers/cpufreq/Kconfig
@@ -0,0 +1,215 @@
+menu "CPU Frequency scaling"
+
+config CPU_FREQ
+ bool "CPU Frequency scaling"
+ help
+ CPU Frequency scaling allows you to change the clock speed of
+ CPUs on the fly. This is a nice method to save power, because
+ the lower the CPU clock speed, the less power the CPU consumes.
+
+ Note that this driver doesn't automatically change the CPU
+ clock speed, you need to either enable a dynamic cpufreq governor
+ (see below) after boot, or use a userspace tool.
+
+ For details, take a look at <file:Documentation/cpu-freq>.
+
+ If in doubt, say N.
+
+if CPU_FREQ
+
+config CPU_FREQ_TABLE
+ tristate
+
+config CPU_FREQ_STAT
+ tristate "CPU frequency translation statistics"
+ select CPU_FREQ_TABLE
+ default y
+ help
+ This driver exports CPU frequency statistics information through sysfs
+ file system.
+
+ To compile this driver as a module, choose M here: the
+ module will be called cpufreq_stats.
+
+ If in doubt, say N.
+
+config CPU_FREQ_STAT_DETAILS
+ bool "CPU frequency translation statistics details"
+ depends on CPU_FREQ_STAT
+ help
+ This will show detail CPU frequency translation table in sysfs file
+ system.
+
+ If in doubt, say N.
+
+choice
+ prompt "Default CPUFreq governor"
+ default CPU_FREQ_DEFAULT_GOV_USERSPACE if CPU_FREQ_SA1100 || CPU_FREQ_SA1110
+ default CPU_FREQ_DEFAULT_GOV_PERFORMANCE
+ help
+ This option sets which CPUFreq governor shall be loaded at
+ startup. If in doubt, select 'performance'.
+
+config CPU_FREQ_DEFAULT_GOV_PERFORMANCE
+ bool "performance"
+ select CPU_FREQ_GOV_PERFORMANCE
+ help
+ Use the CPUFreq governor 'performance' as default. This sets
+ the frequency statically to the highest frequency supported by
+ the CPU.
+
+config CPU_FREQ_DEFAULT_GOV_POWERSAVE
+ bool "powersave"
+ depends on EXPERT
+ select CPU_FREQ_GOV_POWERSAVE
+ help
+ Use the CPUFreq governor 'powersave' as default. This sets
+ the frequency statically to the lowest frequency supported by
+ the CPU.
+
+config CPU_FREQ_DEFAULT_GOV_USERSPACE
+ bool "userspace"
+ select CPU_FREQ_GOV_USERSPACE
+ help
+ Use the CPUFreq governor 'userspace' as default. This allows
+ you to set the CPU frequency manually or when a userspace
+ program shall be able to set the CPU dynamically without having
+ to enable the userspace governor manually.
+
+config CPU_FREQ_DEFAULT_GOV_ONDEMAND
+ bool "ondemand"
+ select CPU_FREQ_GOV_ONDEMAND
+ select CPU_FREQ_GOV_PERFORMANCE
+ help
+ Use the CPUFreq governor 'ondemand' as default. This allows
+ you to get a full dynamic frequency capable system by simply
+ loading your cpufreq low-level hardware driver.
+ Be aware that not all cpufreq drivers support the ondemand
+ governor. If unsure have a look at the help section of the
+ driver. Fallback governor will be the performance governor.
+
+config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
+ bool "conservative"
+ select CPU_FREQ_GOV_CONSERVATIVE
+ select CPU_FREQ_GOV_PERFORMANCE
+ help
+ Use the CPUFreq governor 'conservative' as default. This allows
+ you to get a full dynamic frequency capable system by simply
+ loading your cpufreq low-level hardware driver.
+ Be aware that not all cpufreq drivers support the conservative
+ governor. If unsure have a look at the help section of the
+ driver. Fallback governor will be the performance governor.
+
+config CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+ bool "interactive"
+ select CPU_FREQ_GOV_INTERACTIVE
+ help
+ Use the CPUFreq governor 'interactive' as default. This allows
+ you to get a full dynamic cpu frequency capable system by simply
+ loading your cpufreq low-level hardware driver, using the
+ 'interactive' governor for latency-sensitive workloads.
+
+endchoice
+
+config CPU_FREQ_GOV_PERFORMANCE
+ tristate "'performance' governor"
+ help
+ This cpufreq governor sets the frequency statically to the
+ highest available CPU frequency.
+
+ To compile this driver as a module, choose M here: the
+ module will be called cpufreq_performance.
+
+ If in doubt, say Y.
+
+config CPU_FREQ_GOV_POWERSAVE
+ tristate "'powersave' governor"
+ help
+ This cpufreq governor sets the frequency statically to the
+ lowest available CPU frequency.
+
+ To compile this driver as a module, choose M here: the
+ module will be called cpufreq_powersave.
+
+ If in doubt, say Y.
+
+config CPU_FREQ_GOV_USERSPACE
+ tristate "'userspace' governor for userspace frequency scaling"
+ help
+ Enable this cpufreq governor when you either want to set the
+ CPU frequency manually or when a userspace program shall
+ be able to set the CPU dynamically, like on LART
+ <http://www.lartmaker.nl/>.
+
+ To compile this driver as a module, choose M here: the
+ module will be called cpufreq_userspace.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ If in doubt, say Y.
+
+config CPU_FREQ_GOV_ONDEMAND
+ tristate "'ondemand' cpufreq policy governor"
+ select CPU_FREQ_TABLE
+ help
+ 'ondemand' - This driver adds a dynamic cpufreq policy governor.
+ The governor does a periodic polling and
+ changes frequency based on the CPU utilization.
+ The support for this governor depends on CPU capability to
+ do fast frequency switching (i.e, very low latency frequency
+ transitions).
+
+ To compile this driver as a module, choose M here: the
+ module will be called cpufreq_ondemand.
+
+ For details, take a look at linux/Documentation/cpu-freq.
+
+ If in doubt, say N.
+
+config CPU_FREQ_GOV_INTERACTIVE
+ tristate "'interactive' cpufreq policy governor"
+ help
+ 'interactive' - This driver adds a dynamic cpufreq policy governor
+ designed for latency-sensitive workloads.
+
+ This governor attempts to reduce the latency of clock
+ increases so that the system is more responsive to
+ interactive workloads.
+
+ To compile this driver as a module, choose M here: the
+ module will be called cpufreq_interactive.
+
+ For details, take a look at linux/Documentation/cpu-freq.
+
+ If in doubt, say N.
+
+config CPU_FREQ_GOV_CONSERVATIVE
+ tristate "'conservative' cpufreq governor"
+ depends on CPU_FREQ
+ help
+ 'conservative' - this driver is rather similar to the 'ondemand'
+ governor both in its source code and its purpose, the difference is
+ its optimisation for better suitability in a battery powered
+ environment. The frequency is gracefully increased and decreased
+ rather than jumping to 100% when speed is required.
+
+ If you have a desktop machine then you should really be considering
+ the 'ondemand' governor instead, however if you are using a laptop,
+ PDA or even an AMD64 based computer (due to the unacceptable
+ step-by-step latency issues between the minimum and maximum frequency
+ transitions in the CPU) you will probably want to use this governor.
+
+ To compile this driver as a module, choose M here: the
+ module will be called cpufreq_conservative.
+
+ For details, take a look at linux/Documentation/cpu-freq.
+
+ If in doubt, say N.
+
+menu "x86 CPU frequency scaling drivers"
+depends on X86
+source "drivers/cpufreq/Kconfig.x86"
+endmenu
+
+endif
+endmenu
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
new file mode 100644
index 00000000..78ff7ee4
--- /dev/null
+++ b/drivers/cpufreq/Kconfig.x86
@@ -0,0 +1,255 @@
+#
+# x86 CPU Frequency scaling drivers
+#
+
+config X86_PCC_CPUFREQ
+ tristate "Processor Clocking Control interface driver"
+ depends on ACPI && ACPI_PROCESSOR
+ help
+ This driver adds support for the PCC interface.
+
+ For details, take a look at:
+ <file:Documentation/cpu-freq/pcc-cpufreq.txt>.
+
+ To compile this driver as a module, choose M here: the
+ module will be called pcc-cpufreq.
+
+ If in doubt, say N.
+
+config X86_ACPI_CPUFREQ
+ tristate "ACPI Processor P-States driver"
+ select CPU_FREQ_TABLE
+ depends on ACPI_PROCESSOR
+ help
+ This driver adds a CPUFreq driver which utilizes the ACPI
+ Processor Performance States.
+ This driver also supports Intel Enhanced Speedstep.
+
+ To compile this driver as a module, choose M here: the
+ module will be called acpi-cpufreq.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ If in doubt, say N.
+
+config ELAN_CPUFREQ
+ tristate "AMD Elan SC400 and SC410"
+ select CPU_FREQ_TABLE
+ depends on MELAN
+ ---help---
+ This adds the CPUFreq driver for AMD Elan SC400 and SC410
+ processors.
+
+ You need to specify the processor maximum speed as boot
+ parameter: elanfreq=maxspeed (in kHz) or as module
+ parameter "max_freq".
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ If in doubt, say N.
+
+config SC520_CPUFREQ
+ tristate "AMD Elan SC520"
+ select CPU_FREQ_TABLE
+ depends on MELAN
+ ---help---
+ This adds the CPUFreq driver for AMD Elan SC520 processor.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ If in doubt, say N.
+
+
+config X86_POWERNOW_K6
+ tristate "AMD Mobile K6-2/K6-3 PowerNow!"
+ select CPU_FREQ_TABLE
+ depends on X86_32
+ help
+ This adds the CPUFreq driver for mobile AMD K6-2+ and mobile
+ AMD K6-3+ processors.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ If in doubt, say N.
+
+config X86_POWERNOW_K7
+ tristate "AMD Mobile Athlon/Duron PowerNow!"
+ select CPU_FREQ_TABLE
+ depends on X86_32
+ help
+ This adds the CPUFreq driver for mobile AMD K7 mobile processors.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ If in doubt, say N.
+
+config X86_POWERNOW_K7_ACPI
+ bool
+ depends on X86_POWERNOW_K7 && ACPI_PROCESSOR
+ depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m)
+ depends on X86_32
+ default y
+
+config X86_POWERNOW_K8
+ tristate "AMD Opteron/Athlon64 PowerNow!"
+ select CPU_FREQ_TABLE
+ depends on ACPI && ACPI_PROCESSOR
+ help
+ This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors.
+
+ To compile this driver as a module, choose M here: the
+ module will be called powernow-k8.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+config X86_GX_SUSPMOD
+ tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
+ depends on X86_32 && PCI
+ help
+ This add the CPUFreq driver for NatSemi Geode processors which
+ support suspend modulation.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ If in doubt, say N.
+
+config X86_SPEEDSTEP_CENTRINO
+ tristate "Intel Enhanced SpeedStep (deprecated)"
+ select CPU_FREQ_TABLE
+ select X86_SPEEDSTEP_CENTRINO_TABLE if X86_32
+ depends on X86_32 || (X86_64 && ACPI_PROCESSOR)
+ help
+ This is deprecated and this functionality is now merged into
+ acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of
+ speedstep_centrino.
+ This adds the CPUFreq driver for Enhanced SpeedStep enabled
+ mobile CPUs. This means Intel Pentium M (Centrino) CPUs
+ or 64bit enabled Intel Xeons.
+
+ To compile this driver as a module, choose M here: the
+ module will be called speedstep-centrino.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ If in doubt, say N.
+
+config X86_SPEEDSTEP_CENTRINO_TABLE
+ bool "Built-in tables for Banias CPUs"
+ depends on X86_32 && X86_SPEEDSTEP_CENTRINO
+ default y
+ help
+ Use built-in tables for Banias CPUs if ACPI encoding
+ is not available.
+
+ If in doubt, say N.
+
+config X86_SPEEDSTEP_ICH
+ tristate "Intel Speedstep on ICH-M chipsets (ioport interface)"
+ select CPU_FREQ_TABLE
+ depends on X86_32
+ help
+ This adds the CPUFreq driver for certain mobile Intel Pentium III
+ (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all
+ mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2,
+ ICH3 or ICH4 southbridge.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ If in doubt, say N.
+
+config X86_SPEEDSTEP_SMI
+ tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)"
+ select CPU_FREQ_TABLE
+ depends on X86_32 && EXPERIMENTAL
+ help
+ This adds the CPUFreq driver for certain mobile Intel Pentium III
+ (Coppermine), all mobile Intel Pentium III-M (Tualatin)
+ on systems which have an Intel 440BX/ZX/MX southbridge.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ If in doubt, say N.
+
+config X86_P4_CLOCKMOD
+ tristate "Intel Pentium 4 clock modulation"
+ select CPU_FREQ_TABLE
+ help
+ This adds the CPUFreq driver for Intel Pentium 4 / XEON
+ processors. When enabled it will lower CPU temperature by skipping
+ clocks.
+
+ This driver should be only used in exceptional
+ circumstances when very low power is needed because it causes severe
+ slowdowns and noticeable latencies. Normally Speedstep should be used
+ instead.
+
+ To compile this driver as a module, choose M here: the
+ module will be called p4-clockmod.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ Unless you are absolutely sure say N.
+
+config X86_CPUFREQ_NFORCE2
+ tristate "nVidia nForce2 FSB changing"
+ depends on X86_32 && EXPERIMENTAL
+ help
+ This adds the CPUFreq driver for FSB changing on nVidia nForce2
+ platforms.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ If in doubt, say N.
+
+config X86_LONGRUN
+ tristate "Transmeta LongRun"
+ depends on X86_32
+ help
+ This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors
+ which support LongRun.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ If in doubt, say N.
+
+config X86_LONGHAUL
+ tristate "VIA Cyrix III Longhaul"
+ select CPU_FREQ_TABLE
+ depends on X86_32 && ACPI_PROCESSOR
+ help
+ This adds the CPUFreq driver for VIA Samuel/CyrixIII,
+ VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T
+ processors.
+
+ For details, take a look at <file:Documentation/cpu-freq/>.
+
+ If in doubt, say N.
+
+config X86_E_POWERSAVER
+ tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)"
+ select CPU_FREQ_TABLE
+ depends on X86_32 && EXPERIMENTAL
+ help
+ This adds the CPUFreq driver for VIA C7 processors. However, this driver
+ does not have any safeguards to prevent operating the CPU out of spec
+ and is thus considered dangerous. Please use the regular ACPI cpufreq
+ driver, enabled by CONFIG_X86_ACPI_CPUFREQ.
+
+ If in doubt, say N.
+
+comment "shared options"
+
+config X86_SPEEDSTEP_LIB
+ tristate
+ default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD)
+
+config X86_SPEEDSTEP_RELAXED_CAP_CHECK
+ bool "Relaxed speedstep capability checks"
+ depends on X86_32 && (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH)
+ help
+ Don't perform all checks for a speedstep capable system which would
+ normally be done. Some ancient or strange systems, though speedstep
+ capable, don't always indicate that they are speedstep capable. This
+ option lets the probing code bypass some of those checks if the
+ parameter "relaxed_check=1" is passed to the module.
+
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
new file mode 100644
index 00000000..c371bb46
--- /dev/null
+++ b/drivers/cpufreq/Makefile
@@ -0,0 +1,44 @@
+# CPUfreq core
+obj-$(CONFIG_CPU_FREQ) += cpufreq.o
+# CPUfreq stats
+obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o
+
+# CPUfreq governors
+obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o
+obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o
+obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o
+obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o
+obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o
+obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o
+
+# CPUfreq cross-arch helpers
+obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o
+
+##################################################################################d
+# x86 drivers.
+# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
+# K8 systems. ACPI is preferred to all other hardware-specific drivers.
+# speedstep-* is preferred over p4-clockmod.
+
+obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o
+obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o
+obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
+obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
+obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
+obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
+obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o
+obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o
+obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o
+obj-$(CONFIG_X86_LONGRUN) += longrun.o
+obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o
+obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o
+obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o
+obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o
+obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o
+obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o
+obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o
+
+##################################################################################d
+
+# ARM SoC drivers
+obj-$(CONFIG_UX500_SOC_DB8500) += db8500-cpufreq.o
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
new file mode 100644
index 00000000..596d5dd3
--- /dev/null
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -0,0 +1,773 @@
+/*
+ * acpi-cpufreq.c - ACPI Processor P-States Driver
+ *
+ * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
+ * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
+ * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <linux/cpufreq.h>
+#include <linux/compiler.h>
+#include <linux/dmi.h>
+#include <linux/slab.h>
+
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+
+#include <acpi/processor.h>
+
+#include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
+#include "mperf.h"
+
+MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
+MODULE_DESCRIPTION("ACPI Processor P-States Driver");
+MODULE_LICENSE("GPL");
+
+enum {
+ UNDEFINED_CAPABLE = 0,
+ SYSTEM_INTEL_MSR_CAPABLE,
+ SYSTEM_IO_CAPABLE,
+};
+
+#define INTEL_MSR_RANGE (0xffff)
+
+struct acpi_cpufreq_data {
+ struct acpi_processor_performance *acpi_data;
+ struct cpufreq_frequency_table *freq_table;
+ unsigned int resume;
+ unsigned int cpu_feature;
+};
+
+static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
+
+/* acpi_perf_data is a pointer to percpu data. */
+static struct acpi_processor_performance __percpu *acpi_perf_data;
+
+static struct cpufreq_driver acpi_cpufreq_driver;
+
+static unsigned int acpi_pstate_strict;
+
+static int check_est_cpu(unsigned int cpuid)
+{
+ struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
+
+ return cpu_has(cpu, X86_FEATURE_EST);
+}
+
+static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
+{
+ struct acpi_processor_performance *perf;
+ int i;
+
+ perf = data->acpi_data;
+
+ for (i = 0; i < perf->state_count; i++) {
+ if (value == perf->states[i].status)
+ return data->freq_table[i].frequency;
+ }
+ return 0;
+}
+
+static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
+{
+ int i;
+ struct acpi_processor_performance *perf;
+
+ msr &= INTEL_MSR_RANGE;
+ perf = data->acpi_data;
+
+ for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+ if (msr == perf->states[data->freq_table[i].index].status)
+ return data->freq_table[i].frequency;
+ }
+ return data->freq_table[0].frequency;
+}
+
+static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
+{
+ switch (data->cpu_feature) {
+ case SYSTEM_INTEL_MSR_CAPABLE:
+ return extract_msr(val, data);
+ case SYSTEM_IO_CAPABLE:
+ return extract_io(val, data);
+ default:
+ return 0;
+ }
+}
+
+struct msr_addr {
+ u32 reg;
+};
+
+struct io_addr {
+ u16 port;
+ u8 bit_width;
+};
+
+struct drv_cmd {
+ unsigned int type;
+ const struct cpumask *mask;
+ union {
+ struct msr_addr msr;
+ struct io_addr io;
+ } addr;
+ u32 val;
+};
+
+/* Called via smp_call_function_single(), on the target CPU */
+static void do_drv_read(void *_cmd)
+{
+ struct drv_cmd *cmd = _cmd;
+ u32 h;
+
+ switch (cmd->type) {
+ case SYSTEM_INTEL_MSR_CAPABLE:
+ rdmsr(cmd->addr.msr.reg, cmd->val, h);
+ break;
+ case SYSTEM_IO_CAPABLE:
+ acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
+ &cmd->val,
+ (u32)cmd->addr.io.bit_width);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Called via smp_call_function_many(), on the target CPUs */
+static void do_drv_write(void *_cmd)
+{
+ struct drv_cmd *cmd = _cmd;
+ u32 lo, hi;
+
+ switch (cmd->type) {
+ case SYSTEM_INTEL_MSR_CAPABLE:
+ rdmsr(cmd->addr.msr.reg, lo, hi);
+ lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
+ wrmsr(cmd->addr.msr.reg, lo, hi);
+ break;
+ case SYSTEM_IO_CAPABLE:
+ acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
+ cmd->val,
+ (u32)cmd->addr.io.bit_width);
+ break;
+ default:
+ break;
+ }
+}
+
+static void drv_read(struct drv_cmd *cmd)
+{
+ int err;
+ cmd->val = 0;
+
+ err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
+ WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */
+}
+
+static void drv_write(struct drv_cmd *cmd)
+{
+ int this_cpu;
+
+ this_cpu = get_cpu();
+ if (cpumask_test_cpu(this_cpu, cmd->mask))
+ do_drv_write(cmd);
+ smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
+ put_cpu();
+}
+
+static u32 get_cur_val(const struct cpumask *mask)
+{
+ struct acpi_processor_performance *perf;
+ struct drv_cmd cmd;
+
+ if (unlikely(cpumask_empty(mask)))
+ return 0;
+
+ switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
+ case SYSTEM_INTEL_MSR_CAPABLE:
+ cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
+ cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
+ break;
+ case SYSTEM_IO_CAPABLE:
+ cmd.type = SYSTEM_IO_CAPABLE;
+ perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
+ cmd.addr.io.port = perf->control_register.address;
+ cmd.addr.io.bit_width = perf->control_register.bit_width;
+ break;
+ default:
+ return 0;
+ }
+
+ cmd.mask = mask;
+ drv_read(&cmd);
+
+ pr_debug("get_cur_val = %u\n", cmd.val);
+
+ return cmd.val;
+}
+
+static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
+{
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
+ unsigned int freq;
+ unsigned int cached_freq;
+
+ pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
+
+ if (unlikely(data == NULL ||
+ data->acpi_data == NULL || data->freq_table == NULL)) {
+ return 0;
+ }
+
+ cached_freq = data->freq_table[data->acpi_data->state].frequency;
+ freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
+ if (freq != cached_freq) {
+ /*
+ * The dreaded BIOS frequency change behind our back.
+ * Force set the frequency on next target call.
+ */
+ data->resume = 1;
+ }
+
+ pr_debug("cur freq = %u\n", freq);
+
+ return freq;
+}
+
+static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
+ struct acpi_cpufreq_data *data)
+{
+ unsigned int cur_freq;
+ unsigned int i;
+
+ for (i = 0; i < 100; i++) {
+ cur_freq = extract_freq(get_cur_val(mask), data);
+ if (cur_freq == freq)
+ return 1;
+ udelay(10);
+ }
+ return 0;
+}
+
+static int acpi_cpufreq_target(struct cpufreq_policy *policy,
+ unsigned int target_freq, unsigned int relation)
+{
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
+ struct acpi_processor_performance *perf;
+ struct cpufreq_freqs freqs;
+ struct drv_cmd cmd;
+ unsigned int next_state = 0; /* Index into freq_table */
+ unsigned int next_perf_state = 0; /* Index into perf table */
+ unsigned int i;
+ int result = 0;
+
+ pr_debug("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
+
+ if (unlikely(data == NULL ||
+ data->acpi_data == NULL || data->freq_table == NULL)) {
+ return -ENODEV;
+ }
+
+ perf = data->acpi_data;
+ result = cpufreq_frequency_table_target(policy,
+ data->freq_table,
+ target_freq,
+ relation, &next_state);
+ if (unlikely(result)) {
+ result = -ENODEV;
+ goto out;
+ }
+
+ next_perf_state = data->freq_table[next_state].index;
+ if (perf->state == next_perf_state) {
+ if (unlikely(data->resume)) {
+ pr_debug("Called after resume, resetting to P%d\n",
+ next_perf_state);
+ data->resume = 0;
+ } else {
+ pr_debug("Already at target state (P%d)\n",
+ next_perf_state);
+ goto out;
+ }
+ }
+
+ switch (data->cpu_feature) {
+ case SYSTEM_INTEL_MSR_CAPABLE:
+ cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
+ cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
+ cmd.val = (u32) perf->states[next_perf_state].control;
+ break;
+ case SYSTEM_IO_CAPABLE:
+ cmd.type = SYSTEM_IO_CAPABLE;
+ cmd.addr.io.port = perf->control_register.address;
+ cmd.addr.io.bit_width = perf->control_register.bit_width;
+ cmd.val = (u32) perf->states[next_perf_state].control;
+ break;
+ default:
+ result = -ENODEV;
+ goto out;
+ }
+
+ /* cpufreq holds the hotplug lock, so we are safe from here on */
+ if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
+ cmd.mask = policy->cpus;
+ else
+ cmd.mask = cpumask_of(policy->cpu);
+
+ freqs.old = perf->states[perf->state].core_frequency * 1000;
+ freqs.new = data->freq_table[next_state].frequency;
+ for_each_cpu(i, policy->cpus) {
+ freqs.cpu = i;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ }
+
+ drv_write(&cmd);
+
+ if (acpi_pstate_strict) {
+ if (!check_freqs(cmd.mask, freqs.new, data)) {
+ pr_debug("acpi_cpufreq_target failed (%d)\n",
+ policy->cpu);
+ result = -EAGAIN;
+ goto out;
+ }
+ }
+
+ for_each_cpu(i, policy->cpus) {
+ freqs.cpu = i;
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ }
+ perf->state = next_perf_state;
+
+out:
+ return result;
+}
+
+static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
+{
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
+
+ pr_debug("acpi_cpufreq_verify\n");
+
+ return cpufreq_frequency_table_verify(policy, data->freq_table);
+}
+
+static unsigned long
+acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
+{
+ struct acpi_processor_performance *perf = data->acpi_data;
+
+ if (cpu_khz) {
+ /* search the closest match to cpu_khz */
+ unsigned int i;
+ unsigned long freq;
+ unsigned long freqn = perf->states[0].core_frequency * 1000;
+
+ for (i = 0; i < (perf->state_count-1); i++) {
+ freq = freqn;
+ freqn = perf->states[i+1].core_frequency * 1000;
+ if ((2 * cpu_khz) > (freqn + freq)) {
+ perf->state = i;
+ return freq;
+ }
+ }
+ perf->state = perf->state_count-1;
+ return freqn;
+ } else {
+ /* assume CPU is at P0... */
+ perf->state = 0;
+ return perf->states[0].core_frequency * 1000;
+ }
+}
+
+static void free_acpi_perf_data(void)
+{
+ unsigned int i;
+
+ /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
+ for_each_possible_cpu(i)
+ free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
+ ->shared_cpu_map);
+ free_percpu(acpi_perf_data);
+}
+
+/*
+ * acpi_cpufreq_early_init - initialize ACPI P-States library
+ *
+ * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
+ * in order to determine correct frequency and voltage pairings. We can
+ * do _PDC and _PSD and find out the processor dependency for the
+ * actual init that will happen later...
+ */
+static int __init acpi_cpufreq_early_init(void)
+{
+ unsigned int i;
+ pr_debug("acpi_cpufreq_early_init\n");
+
+ acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
+ if (!acpi_perf_data) {
+ pr_debug("Memory allocation error for acpi_perf_data.\n");
+ return -ENOMEM;
+ }
+ for_each_possible_cpu(i) {
+ if (!zalloc_cpumask_var_node(
+ &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
+ GFP_KERNEL, cpu_to_node(i))) {
+
+ /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
+ free_acpi_perf_data();
+ return -ENOMEM;
+ }
+ }
+
+ /* Do initialization in ACPI core */
+ acpi_processor_preregister_performance(acpi_perf_data);
+ return 0;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * Some BIOSes do SW_ANY coordination internally, either set it up in hw
+ * or do it in BIOS firmware and won't inform about it to OS. If not
+ * detected, this has a side effect of making CPU run at a different speed
+ * than OS intended it to run at. Detect it and handle it cleanly.
+ */
+static int bios_with_sw_any_bug;
+
+static int sw_any_bug_found(const struct dmi_system_id *d)
+{
+ bios_with_sw_any_bug = 1;
+ return 0;
+}
+
+static const struct dmi_system_id sw_any_bug_dmi_table[] = {
+ {
+ .callback = sw_any_bug_found,
+ .ident = "Supermicro Server X6DLP",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
+ DMI_MATCH(DMI_BIOS_VERSION, "080010"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
+ },
+ },
+ { }
+};
+
+static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
+{
+ /* Intel Xeon Processor 7100 Series Specification Update
+ * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
+ * AL30: A Machine Check Exception (MCE) Occurring during an
+ * Enhanced Intel SpeedStep Technology Ratio Change May Cause
+ * Both Processor Cores to Lock Up. */
+ if (c->x86_vendor == X86_VENDOR_INTEL) {
+ if ((c->x86 == 15) &&
+ (c->x86_model == 6) &&
+ (c->x86_mask == 8)) {
+ printk(KERN_INFO "acpi-cpufreq: Intel(R) "
+ "Xeon(R) 7100 Errata AL30, processors may "
+ "lock up on frequency changes: disabling "
+ "acpi-cpufreq.\n");
+ return -ENODEV;
+ }
+ }
+ return 0;
+}
+#endif
+
+static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+ unsigned int i;
+ unsigned int valid_states = 0;
+ unsigned int cpu = policy->cpu;
+ struct acpi_cpufreq_data *data;
+ unsigned int result = 0;
+ struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
+ struct acpi_processor_performance *perf;
+#ifdef CONFIG_SMP
+ static int blacklisted;
+#endif
+
+ pr_debug("acpi_cpufreq_cpu_init\n");
+
+#ifdef CONFIG_SMP
+ if (blacklisted)
+ return blacklisted;
+ blacklisted = acpi_cpufreq_blacklist(c);
+ if (blacklisted)
+ return blacklisted;
+#endif
+
+ data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
+ per_cpu(acfreq_data, cpu) = data;
+
+ if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
+ acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
+
+ result = acpi_processor_register_performance(data->acpi_data, cpu);
+ if (result)
+ goto err_free;
+
+ perf = data->acpi_data;
+ policy->shared_type = perf->shared_type;
+
+ /*
+ * Will let policy->cpus know about dependency only when software
+ * coordination is required.
+ */
+ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
+ policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
+ cpumask_copy(policy->cpus, perf->shared_cpu_map);
+ }
+ cpumask_copy(policy->related_cpus, perf->shared_cpu_map);
+
+#ifdef CONFIG_SMP
+ dmi_check_system(sw_any_bug_dmi_table);
+ if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) {
+ policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+ cpumask_copy(policy->cpus, cpu_core_mask(cpu));
+ }
+#endif
+
+ /* capability check */
+ if (perf->state_count <= 1) {
+ pr_debug("No P-States\n");
+ result = -ENODEV;
+ goto err_unreg;
+ }
+
+ if (perf->control_register.space_id != perf->status_register.space_id) {
+ result = -ENODEV;
+ goto err_unreg;
+ }
+
+ switch (perf->control_register.space_id) {
+ case ACPI_ADR_SPACE_SYSTEM_IO:
+ pr_debug("SYSTEM IO addr space\n");
+ data->cpu_feature = SYSTEM_IO_CAPABLE;
+ break;
+ case ACPI_ADR_SPACE_FIXED_HARDWARE:
+ pr_debug("HARDWARE addr space\n");
+ if (!check_est_cpu(cpu)) {
+ result = -ENODEV;
+ goto err_unreg;
+ }
+ data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
+ break;
+ default:
+ pr_debug("Unknown addr space %d\n",
+ (u32) (perf->control_register.space_id));
+ result = -ENODEV;
+ goto err_unreg;
+ }
+
+ data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
+ (perf->state_count+1), GFP_KERNEL);
+ if (!data->freq_table) {
+ result = -ENOMEM;
+ goto err_unreg;
+ }
+
+ /* detect transition latency */
+ policy->cpuinfo.transition_latency = 0;
+ for (i = 0; i < perf->state_count; i++) {
+ if ((perf->states[i].transition_latency * 1000) >
+ policy->cpuinfo.transition_latency)
+ policy->cpuinfo.transition_latency =
+ perf->states[i].transition_latency * 1000;
+ }
+
+ /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
+ if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
+ policy->cpuinfo.transition_latency > 20 * 1000) {
+ policy->cpuinfo.transition_latency = 20 * 1000;
+ printk_once(KERN_INFO
+ "P-state transition latency capped at 20 uS\n");
+ }
+
+ /* table init */
+ for (i = 0; i < perf->state_count; i++) {
+ if (i > 0 && perf->states[i].core_frequency >=
+ data->freq_table[valid_states-1].frequency / 1000)
+ continue;
+
+ data->freq_table[valid_states].index = i;
+ data->freq_table[valid_states].frequency =
+ perf->states[i].core_frequency * 1000;
+ valid_states++;
+ }
+ data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
+ perf->state = 0;
+
+ result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
+ if (result)
+ goto err_freqfree;
+
+ if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
+ printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
+
+ switch (perf->control_register.space_id) {
+ case ACPI_ADR_SPACE_SYSTEM_IO:
+ /* Current speed is unknown and not detectable by IO port */
+ policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
+ break;
+ case ACPI_ADR_SPACE_FIXED_HARDWARE:
+ acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
+ policy->cur = get_cur_freq_on_cpu(cpu);
+ break;
+ default:
+ break;
+ }
+
+ /* notify BIOS that we exist */
+ acpi_processor_notify_smm(THIS_MODULE);
+
+ /* Check for APERF/MPERF support in hardware */
+ if (cpu_has(c, X86_FEATURE_APERFMPERF))
+ acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
+
+ pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
+ for (i = 0; i < perf->state_count; i++)
+ pr_debug(" %cP%d: %d MHz, %d mW, %d uS\n",
+ (i == perf->state ? '*' : ' '), i,
+ (u32) perf->states[i].core_frequency,
+ (u32) perf->states[i].power,
+ (u32) perf->states[i].transition_latency);
+
+ cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
+
+ /*
+ * the first call to ->target() should result in us actually
+ * writing something to the appropriate registers.
+ */
+ data->resume = 1;
+
+ return result;
+
+err_freqfree:
+ kfree(data->freq_table);
+err_unreg:
+ acpi_processor_unregister_performance(perf, cpu);
+err_free:
+ kfree(data);
+ per_cpu(acfreq_data, cpu) = NULL;
+
+ return result;
+}
+
+static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
+
+ pr_debug("acpi_cpufreq_cpu_exit\n");
+
+ if (data) {
+ cpufreq_frequency_table_put_attr(policy->cpu);
+ per_cpu(acfreq_data, policy->cpu) = NULL;
+ acpi_processor_unregister_performance(data->acpi_data,
+ policy->cpu);
+ kfree(data->freq_table);
+ kfree(data);
+ }
+
+ return 0;
+}
+
+static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
+{
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
+
+ pr_debug("acpi_cpufreq_resume\n");
+
+ data->resume = 1;
+
+ return 0;
+}
+
+static struct freq_attr *acpi_cpufreq_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+static struct cpufreq_driver acpi_cpufreq_driver = {
+ .verify = acpi_cpufreq_verify,
+ .target = acpi_cpufreq_target,
+ .bios_limit = acpi_processor_get_bios_limit,
+ .init = acpi_cpufreq_cpu_init,
+ .exit = acpi_cpufreq_cpu_exit,
+ .resume = acpi_cpufreq_resume,
+ .name = "acpi-cpufreq",
+ .owner = THIS_MODULE,
+ .attr = acpi_cpufreq_attr,
+};
+
+static int __init acpi_cpufreq_init(void)
+{
+ int ret;
+
+ if (acpi_disabled)
+ return 0;
+
+ pr_debug("acpi_cpufreq_init\n");
+
+ ret = acpi_cpufreq_early_init();
+ if (ret)
+ return ret;
+
+ ret = cpufreq_register_driver(&acpi_cpufreq_driver);
+ if (ret)
+ free_acpi_perf_data();
+
+ return ret;
+}
+
+static void __exit acpi_cpufreq_exit(void)
+{
+ pr_debug("acpi_cpufreq_exit\n");
+
+ cpufreq_unregister_driver(&acpi_cpufreq_driver);
+
+ free_acpi_perf_data();
+}
+
+module_param(acpi_pstate_strict, uint, 0644);
+MODULE_PARM_DESC(acpi_pstate_strict,
+ "value 0 or non-zero. non-zero -> strict ACPI checks are "
+ "performed during frequency changes.");
+
+late_initcall(acpi_cpufreq_init);
+module_exit(acpi_cpufreq_exit);
+
+MODULE_ALIAS("acpi");
diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
new file mode 100644
index 00000000..7bac8088
--- /dev/null
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -0,0 +1,444 @@
+/*
+ * (C) 2004-2006 Sebastian Witt <se.witt@gmx.net>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ * Based upon reverse engineered information
+ *
+ * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#define NFORCE2_XTAL 25
+#define NFORCE2_BOOTFSB 0x48
+#define NFORCE2_PLLENABLE 0xa8
+#define NFORCE2_PLLREG 0xa4
+#define NFORCE2_PLLADR 0xa0
+#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div)
+
+#define NFORCE2_MIN_FSB 50
+#define NFORCE2_SAFE_DISTANCE 50
+
+/* Delay in ms between FSB changes */
+/* #define NFORCE2_DELAY 10 */
+
+/*
+ * nforce2_chipset:
+ * FSB is changed using the chipset
+ */
+static struct pci_dev *nforce2_dev;
+
+/* fid:
+ * multiplier * 10
+ */
+static int fid;
+
+/* min_fsb, max_fsb:
+ * minimum and maximum FSB (= FSB at boot time)
+ */
+static int min_fsb;
+static int max_fsb;
+
+MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
+MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
+MODULE_LICENSE("GPL");
+
+module_param(fid, int, 0444);
+module_param(min_fsb, int, 0444);
+
+MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
+MODULE_PARM_DESC(min_fsb,
+ "Minimum FSB to use, if not defined: current FSB - 50");
+
+#define PFX "cpufreq-nforce2: "
+
+/**
+ * nforce2_calc_fsb - calculate FSB
+ * @pll: PLL value
+ *
+ * Calculates FSB from PLL value
+ */
+static int nforce2_calc_fsb(int pll)
+{
+ unsigned char mul, div;
+
+ mul = (pll >> 8) & 0xff;
+ div = pll & 0xff;
+
+ if (div > 0)
+ return NFORCE2_XTAL * mul / div;
+
+ return 0;
+}
+
+/**
+ * nforce2_calc_pll - calculate PLL value
+ * @fsb: FSB
+ *
+ * Calculate PLL value for given FSB
+ */
+static int nforce2_calc_pll(unsigned int fsb)
+{
+ unsigned char xmul, xdiv;
+ unsigned char mul = 0, div = 0;
+ int tried = 0;
+
+ /* Try to calculate multiplier and divider up to 4 times */
+ while (((mul == 0) || (div == 0)) && (tried <= 3)) {
+ for (xdiv = 2; xdiv <= 0x80; xdiv++)
+ for (xmul = 1; xmul <= 0xfe; xmul++)
+ if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) ==
+ fsb + tried) {
+ mul = xmul;
+ div = xdiv;
+ }
+ tried++;
+ }
+
+ if ((mul == 0) || (div == 0))
+ return -1;
+
+ return NFORCE2_PLL(mul, div);
+}
+
+/**
+ * nforce2_write_pll - write PLL value to chipset
+ * @pll: PLL value
+ *
+ * Writes new FSB PLL value to chipset
+ */
+static void nforce2_write_pll(int pll)
+{
+ int temp;
+
+ /* Set the pll addr. to 0x00 */
+ pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0);
+
+ /* Now write the value in all 64 registers */
+ for (temp = 0; temp <= 0x3f; temp++)
+ pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll);
+
+ return;
+}
+
+/**
+ * nforce2_fsb_read - Read FSB
+ *
+ * Read FSB from chipset
+ * If bootfsb != 0, return FSB at boot-time
+ */
+static unsigned int nforce2_fsb_read(int bootfsb)
+{
+ struct pci_dev *nforce2_sub5;
+ u32 fsb, temp = 0;
+
+ /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
+ nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF,
+ PCI_ANY_ID, PCI_ANY_ID, NULL);
+ if (!nforce2_sub5)
+ return 0;
+
+ pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb);
+ fsb /= 1000000;
+
+ /* Check if PLL register is already set */
+ pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
+
+ if (bootfsb || !temp)
+ return fsb;
+
+ /* Use PLL register FSB value */
+ pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp);
+ fsb = nforce2_calc_fsb(temp);
+
+ return fsb;
+}
+
+/**
+ * nforce2_set_fsb - set new FSB
+ * @fsb: New FSB
+ *
+ * Sets new FSB
+ */
+static int nforce2_set_fsb(unsigned int fsb)
+{
+ u32 temp = 0;
+ unsigned int tfsb;
+ int diff;
+ int pll = 0;
+
+ if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
+ printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb);
+ return -EINVAL;
+ }
+
+ tfsb = nforce2_fsb_read(0);
+ if (!tfsb) {
+ printk(KERN_ERR PFX "Error while reading the FSB\n");
+ return -EINVAL;
+ }
+
+ /* First write? Then set actual value */
+ pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
+ if (!temp) {
+ pll = nforce2_calc_pll(tfsb);
+
+ if (pll < 0)
+ return -EINVAL;
+
+ nforce2_write_pll(pll);
+ }
+
+ /* Enable write access */
+ temp = 0x01;
+ pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp);
+
+ diff = tfsb - fsb;
+
+ if (!diff)
+ return 0;
+
+ while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) {
+ if (diff < 0)
+ tfsb++;
+ else
+ tfsb--;
+
+ /* Calculate the PLL reg. value */
+ pll = nforce2_calc_pll(tfsb);
+ if (pll == -1)
+ return -EINVAL;
+
+ nforce2_write_pll(pll);
+#ifdef NFORCE2_DELAY
+ mdelay(NFORCE2_DELAY);
+#endif
+ }
+
+ temp = 0x40;
+ pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp);
+
+ return 0;
+}
+
+/**
+ * nforce2_get - get the CPU frequency
+ * @cpu: CPU number
+ *
+ * Returns the CPU frequency
+ */
+static unsigned int nforce2_get(unsigned int cpu)
+{
+ if (cpu)
+ return 0;
+ return nforce2_fsb_read(0) * fid * 100;
+}
+
+/**
+ * nforce2_target - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency
+ * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * Sets a new CPUFreq policy.
+ */
+static int nforce2_target(struct cpufreq_policy *policy,
+ unsigned int target_freq, unsigned int relation)
+{
+/* unsigned long flags; */
+ struct cpufreq_freqs freqs;
+ unsigned int target_fsb;
+
+ if ((target_freq > policy->max) || (target_freq < policy->min))
+ return -EINVAL;
+
+ target_fsb = target_freq / (fid * 100);
+
+ freqs.old = nforce2_get(policy->cpu);
+ freqs.new = target_fsb * fid * 100;
+ freqs.cpu = 0; /* Only one CPU on nForce2 platforms */
+
+ if (freqs.old == freqs.new)
+ return 0;
+
+ pr_debug("Old CPU frequency %d kHz, new %d kHz\n",
+ freqs.old, freqs.new);
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+ /* Disable IRQs */
+ /* local_irq_save(flags); */
+
+ if (nforce2_set_fsb(target_fsb) < 0)
+ printk(KERN_ERR PFX "Changing FSB to %d failed\n",
+ target_fsb);
+ else
+ pr_debug("Changed FSB successfully to %d\n",
+ target_fsb);
+
+ /* Enable IRQs */
+ /* local_irq_restore(flags); */
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+ return 0;
+}
+
+/**
+ * nforce2_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ */
+static int nforce2_verify(struct cpufreq_policy *policy)
+{
+ unsigned int fsb_pol_max;
+
+ fsb_pol_max = policy->max / (fid * 100);
+
+ if (policy->min < (fsb_pol_max * fid * 100))
+ policy->max = (fsb_pol_max + 1) * fid * 100;
+
+ cpufreq_verify_within_limits(policy,
+ policy->cpuinfo.min_freq,
+ policy->cpuinfo.max_freq);
+ return 0;
+}
+
+static int nforce2_cpu_init(struct cpufreq_policy *policy)
+{
+ unsigned int fsb;
+ unsigned int rfid;
+
+ /* capability check */
+ if (policy->cpu != 0)
+ return -ENODEV;
+
+ /* Get current FSB */
+ fsb = nforce2_fsb_read(0);
+
+ if (!fsb)
+ return -EIO;
+
+ /* FIX: Get FID from CPU */
+ if (!fid) {
+ if (!cpu_khz) {
+ printk(KERN_WARNING PFX
+ "cpu_khz not set, can't calculate multiplier!\n");
+ return -ENODEV;
+ }
+
+ fid = cpu_khz / (fsb * 100);
+ rfid = fid % 5;
+
+ if (rfid) {
+ if (rfid > 2)
+ fid += 5 - rfid;
+ else
+ fid -= rfid;
+ }
+ }
+
+ printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb,
+ fid / 10, fid % 10);
+
+ /* Set maximum FSB to FSB at boot time */
+ max_fsb = nforce2_fsb_read(1);
+
+ if (!max_fsb)
+ return -EIO;
+
+ if (!min_fsb)
+ min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE;
+
+ if (min_fsb < NFORCE2_MIN_FSB)
+ min_fsb = NFORCE2_MIN_FSB;
+
+ /* cpuinfo and default policy values */
+ policy->cpuinfo.min_freq = min_fsb * fid * 100;
+ policy->cpuinfo.max_freq = max_fsb * fid * 100;
+ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+ policy->cur = nforce2_get(policy->cpu);
+ policy->min = policy->cpuinfo.min_freq;
+ policy->max = policy->cpuinfo.max_freq;
+
+ return 0;
+}
+
+static int nforce2_cpu_exit(struct cpufreq_policy *policy)
+{
+ return 0;
+}
+
+static struct cpufreq_driver nforce2_driver = {
+ .name = "nforce2",
+ .verify = nforce2_verify,
+ .target = nforce2_target,
+ .get = nforce2_get,
+ .init = nforce2_cpu_init,
+ .exit = nforce2_cpu_exit,
+ .owner = THIS_MODULE,
+};
+
+/**
+ * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic
+ *
+ * Detects nForce2 A2 and C1 stepping
+ *
+ */
+static int nforce2_detect_chipset(void)
+{
+ nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
+ PCI_DEVICE_ID_NVIDIA_NFORCE2,
+ PCI_ANY_ID, PCI_ANY_ID, NULL);
+
+ if (nforce2_dev == NULL)
+ return -ENODEV;
+
+ printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n",
+ nforce2_dev->revision);
+ printk(KERN_INFO PFX
+ "FSB changing is maybe unstable and can lead to "
+ "crashes and data loss.\n");
+
+ return 0;
+}
+
+/**
+ * nforce2_init - initializes the nForce2 CPUFreq driver
+ *
+ * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported
+ * devices, -EINVAL on problems during initiatization, and zero on
+ * success.
+ */
+static int __init nforce2_init(void)
+{
+ /* TODO: do we need to detect the processor? */
+
+ /* detect chipset */
+ if (nforce2_detect_chipset()) {
+ printk(KERN_INFO PFX "No nForce2 chipset.\n");
+ return -ENODEV;
+ }
+
+ return cpufreq_register_driver(&nforce2_driver);
+}
+
+/**
+ * nforce2_exit - unregisters cpufreq module
+ *
+ * Unregisters nForce2 FSB change support.
+ */
+static void __exit nforce2_exit(void)
+{
+ cpufreq_unregister_driver(&nforce2_driver);
+}
+
+module_init(nforce2_init);
+module_exit(nforce2_exit);
+
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
new file mode 100644
index 00000000..0a5bea9e
--- /dev/null
+++ b/drivers/cpufreq/cpufreq.c
@@ -0,0 +1,1897 @@
+/*
+ * linux/drivers/cpufreq/cpufreq.c
+ *
+ * Copyright (C) 2001 Russell King
+ * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *
+ * Oct 2005 - Ashok Raj <ashok.raj@intel.com>
+ * Added handling for CPU hotplug
+ * Feb 2006 - Jacob Shin <jacob.shin@amd.com>
+ * Fix handling for CPU hotplug -- affected CPUs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/cpufreq.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/completion.h>
+#include <linux/mutex.h>
+#include <linux/syscore_ops.h>
+
+#include <trace/events/power.h>
+
+/**
+ * The "cpufreq driver" - the arch- or hardware-dependent low
+ * level driver of CPUFreq support, and its spinlock. This lock
+ * also protects the cpufreq_cpu_data array.
+ */
+static struct cpufreq_driver *cpufreq_driver;
+static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
+#ifdef CONFIG_HOTPLUG_CPU
+/* This one keeps track of the previously set governor of a removed CPU */
+static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
+#endif
+static DEFINE_SPINLOCK(cpufreq_driver_lock);
+
+/*
+ * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
+ * all cpufreq/hotplug/workqueue/etc related lock issues.
+ *
+ * The rules for this semaphore:
+ * - Any routine that wants to read from the policy structure will
+ * do a down_read on this semaphore.
+ * - Any routine that will write to the policy structure and/or may take away
+ * the policy altogether (eg. CPU hotplug), will hold this lock in write
+ * mode before doing so.
+ *
+ * Additional rules:
+ * - All holders of the lock should check to make sure that the CPU they
+ * are concerned with are online after they get the lock.
+ * - Governor routines that can be called in cpufreq hotplug path should not
+ * take this sem as top level hotplug notifier handler takes this.
+ * - Lock should not be held across
+ * __cpufreq_governor(data, CPUFREQ_GOV_STOP);
+ */
+static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
+static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
+
+#define lock_policy_rwsem(mode, cpu) \
+static int lock_policy_rwsem_##mode \
+(int cpu) \
+{ \
+ int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); \
+ BUG_ON(policy_cpu == -1); \
+ down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \
+ if (unlikely(!cpu_online(cpu))) { \
+ up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \
+ return -1; \
+ } \
+ \
+ return 0; \
+}
+
+lock_policy_rwsem(read, cpu);
+
+lock_policy_rwsem(write, cpu);
+
+static void unlock_policy_rwsem_read(int cpu)
+{
+ int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
+ BUG_ON(policy_cpu == -1);
+ up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
+}
+
+static void unlock_policy_rwsem_write(int cpu)
+{
+ int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
+ BUG_ON(policy_cpu == -1);
+ up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
+}
+
+
+/* internal prototypes */
+static int __cpufreq_governor(struct cpufreq_policy *policy,
+ unsigned int event);
+static unsigned int __cpufreq_get(unsigned int cpu);
+static void handle_update(struct work_struct *work);
+
+/**
+ * Two notifier lists: the "policy" list is involved in the
+ * validation process for a new CPU frequency policy; the
+ * "transition" list for kernel code that needs to handle
+ * changes to devices when the CPU clock speed changes.
+ * The mutex locks both lists.
+ */
+static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
+static struct srcu_notifier_head cpufreq_transition_notifier_list;
+
+static bool init_cpufreq_transition_notifier_list_called;
+static int __init init_cpufreq_transition_notifier_list(void)
+{
+ srcu_init_notifier_head(&cpufreq_transition_notifier_list);
+ init_cpufreq_transition_notifier_list_called = true;
+ return 0;
+}
+pure_initcall(init_cpufreq_transition_notifier_list);
+
+static LIST_HEAD(cpufreq_governor_list);
+static DEFINE_MUTEX(cpufreq_governor_mutex);
+
+struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
+{
+ struct cpufreq_policy *data;
+ unsigned long flags;
+
+ if (cpu >= nr_cpu_ids)
+ goto err_out;
+
+ /* get the cpufreq driver */
+ spin_lock_irqsave(&cpufreq_driver_lock, flags);
+
+ if (!cpufreq_driver)
+ goto err_out_unlock;
+
+ if (!try_module_get(cpufreq_driver->owner))
+ goto err_out_unlock;
+
+
+ /* get the CPU */
+ data = per_cpu(cpufreq_cpu_data, cpu);
+
+ if (!data)
+ goto err_out_put_module;
+
+ if (!kobject_get(&data->kobj))
+ goto err_out_put_module;
+
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+ return data;
+
+err_out_put_module:
+ module_put(cpufreq_driver->owner);
+err_out_unlock:
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+err_out:
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
+
+
+void cpufreq_cpu_put(struct cpufreq_policy *data)
+{
+ kobject_put(&data->kobj);
+ module_put(cpufreq_driver->owner);
+}
+EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
+
+
+/*********************************************************************
+ * EXTERNALLY AFFECTING FREQUENCY CHANGES *
+ *********************************************************************/
+
+/**
+ * adjust_jiffies - adjust the system "loops_per_jiffy"
+ *
+ * This function alters the system "loops_per_jiffy" for the clock
+ * speed change. Note that loops_per_jiffy cannot be updated on SMP
+ * systems as each CPU might be scaled differently. So, use the arch
+ * per-CPU loops_per_jiffy value wherever possible.
+ */
+#ifndef CONFIG_SMP
+static unsigned long l_p_j_ref;
+static unsigned int l_p_j_ref_freq;
+
+static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
+{
+ if (ci->flags & CPUFREQ_CONST_LOOPS)
+ return;
+
+ if (!l_p_j_ref_freq) {
+ l_p_j_ref = loops_per_jiffy;
+ l_p_j_ref_freq = ci->old;
+ pr_debug("saving %lu as reference value for loops_per_jiffy; "
+ "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
+ }
+ if ((val == CPUFREQ_PRECHANGE && ci->old < ci->new) ||
+ (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
+ (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
+ loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
+ ci->new);
+ pr_debug("scaling loops_per_jiffy to %lu "
+ "for frequency %u kHz\n", loops_per_jiffy, ci->new);
+ }
+}
+#else
+static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
+{
+ return;
+}
+#endif
+
+
+/**
+ * cpufreq_notify_transition - call notifier chain and adjust_jiffies
+ * on frequency transition.
+ *
+ * This function calls the transition notifiers and the "adjust_jiffies"
+ * function. It is called twice on all CPU frequency changes that have
+ * external effects.
+ */
+void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
+{
+ struct cpufreq_policy *policy;
+
+ BUG_ON(irqs_disabled());
+
+ freqs->flags = cpufreq_driver->flags;
+ pr_debug("notification %u of frequency transition to %u kHz\n",
+ state, freqs->new);
+
+ policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
+ switch (state) {
+
+ case CPUFREQ_PRECHANGE:
+ /* detect if the driver reported a value as "old frequency"
+ * which is not equal to what the cpufreq core thinks is
+ * "old frequency".
+ */
+ if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
+ if ((policy) && (policy->cpu == freqs->cpu) &&
+ (policy->cur) && (policy->cur != freqs->old)) {
+ pr_debug("Warning: CPU frequency is"
+ " %u, cpufreq assumed %u kHz.\n",
+ freqs->old, policy->cur);
+ freqs->old = policy->cur;
+ }
+ }
+ srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
+ CPUFREQ_PRECHANGE, freqs);
+ adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
+ break;
+
+ case CPUFREQ_POSTCHANGE:
+ adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
+ pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
+ (unsigned long)freqs->cpu);
+ trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
+ trace_cpu_frequency(freqs->new, freqs->cpu);
+ srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
+ CPUFREQ_POSTCHANGE, freqs);
+ if (likely(policy) && likely(policy->cpu == freqs->cpu))
+ policy->cur = freqs->new;
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
+
+
+
+/*********************************************************************
+ * SYSFS INTERFACE *
+ *********************************************************************/
+
+static struct cpufreq_governor *__find_governor(const char *str_governor)
+{
+ struct cpufreq_governor *t;
+
+ list_for_each_entry(t, &cpufreq_governor_list, governor_list)
+ if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
+ return t;
+
+ return NULL;
+}
+
+/**
+ * cpufreq_parse_governor - parse a governor string
+ */
+static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
+ struct cpufreq_governor **governor)
+{
+ int err = -EINVAL;
+
+ if (!cpufreq_driver)
+ goto out;
+
+ if (cpufreq_driver->setpolicy) {
+ if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
+ *policy = CPUFREQ_POLICY_PERFORMANCE;
+ err = 0;
+ } else if (!strnicmp(str_governor, "powersave",
+ CPUFREQ_NAME_LEN)) {
+ *policy = CPUFREQ_POLICY_POWERSAVE;
+ err = 0;
+ }
+ } else if (cpufreq_driver->target) {
+ struct cpufreq_governor *t;
+
+ mutex_lock(&cpufreq_governor_mutex);
+
+ t = __find_governor(str_governor);
+
+ if (t == NULL) {
+ int ret;
+
+ mutex_unlock(&cpufreq_governor_mutex);
+ ret = request_module("cpufreq_%s", str_governor);
+ mutex_lock(&cpufreq_governor_mutex);
+
+ if (ret == 0)
+ t = __find_governor(str_governor);
+ }
+
+ if (t != NULL) {
+ *governor = t;
+ err = 0;
+ }
+
+ mutex_unlock(&cpufreq_governor_mutex);
+ }
+out:
+ return err;
+}
+
+
+/**
+ * cpufreq_per_cpu_attr_read() / show_##file_name() -
+ * print out cpufreq information
+ *
+ * Write out information from cpufreq_driver->policy[cpu]; object must be
+ * "unsigned int".
+ */
+
+#define show_one(file_name, object) \
+static ssize_t show_##file_name \
+(struct cpufreq_policy *policy, char *buf) \
+{ \
+ return sprintf(buf, "%u\n", policy->object); \
+}
+
+show_one(cpuinfo_min_freq, cpuinfo.min_freq);
+show_one(cpuinfo_max_freq, cpuinfo.max_freq);
+show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
+show_one(scaling_min_freq, min);
+show_one(scaling_max_freq, max);
+show_one(scaling_cur_freq, cur);
+
+static int __cpufreq_set_policy(struct cpufreq_policy *data,
+ struct cpufreq_policy *policy);
+
+/**
+ * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
+ */
+#define store_one(file_name, object) \
+static ssize_t store_##file_name \
+(struct cpufreq_policy *policy, const char *buf, size_t count) \
+{ \
+ unsigned int ret = -EINVAL; \
+ struct cpufreq_policy new_policy; \
+ \
+ ret = cpufreq_get_policy(&new_policy, policy->cpu); \
+ if (ret) \
+ return -EINVAL; \
+ \
+ ret = sscanf(buf, "%u", &new_policy.object); \
+ if (ret != 1) \
+ return -EINVAL; \
+ \
+ ret = __cpufreq_set_policy(policy, &new_policy); \
+ policy->user_policy.object = policy->object; \
+ \
+ return ret ? ret : count; \
+}
+
+store_one(scaling_min_freq, min);
+store_one(scaling_max_freq, max);
+
+/**
+ * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
+ */
+static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
+ char *buf)
+{
+ unsigned int cur_freq = __cpufreq_get(policy->cpu);
+ if (!cur_freq)
+ return sprintf(buf, "<unknown>");
+ return sprintf(buf, "%u\n", cur_freq);
+}
+
+
+/**
+ * show_scaling_governor - show the current policy for the specified CPU
+ */
+static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
+{
+ if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
+ return sprintf(buf, "powersave\n");
+ else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
+ return sprintf(buf, "performance\n");
+ else if (policy->governor)
+ return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
+ policy->governor->name);
+ return -EINVAL;
+}
+
+
+/**
+ * store_scaling_governor - store policy for the specified CPU
+ */
+static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
+ const char *buf, size_t count)
+{
+ unsigned int ret = -EINVAL;
+ char str_governor[16];
+ struct cpufreq_policy new_policy;
+
+ ret = cpufreq_get_policy(&new_policy, policy->cpu);
+ if (ret)
+ return ret;
+
+ ret = sscanf(buf, "%15s", str_governor);
+ if (ret != 1)
+ return -EINVAL;
+
+ if (cpufreq_parse_governor(str_governor, &new_policy.policy,
+ &new_policy.governor))
+ return -EINVAL;
+
+ /* Do not use cpufreq_set_policy here or the user_policy.max
+ will be wrongly overridden */
+ ret = __cpufreq_set_policy(policy, &new_policy);
+
+ policy->user_policy.policy = policy->policy;
+ policy->user_policy.governor = policy->governor;
+
+ if (ret)
+ return ret;
+ else
+ return count;
+}
+
+/**
+ * show_scaling_driver - show the cpufreq driver currently loaded
+ */
+static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
+{
+ return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
+}
+
+/**
+ * show_scaling_available_governors - show the available CPUfreq governors
+ */
+static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
+ char *buf)
+{
+ ssize_t i = 0;
+ struct cpufreq_governor *t;
+
+ if (!cpufreq_driver->target) {
+ i += sprintf(buf, "performance powersave");
+ goto out;
+ }
+
+ list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
+ if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
+ - (CPUFREQ_NAME_LEN + 2)))
+ goto out;
+ i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
+ }
+out:
+ i += sprintf(&buf[i], "\n");
+ return i;
+}
+
+static ssize_t show_cpus(const struct cpumask *mask, char *buf)
+{
+ ssize_t i = 0;
+ unsigned int cpu;
+
+ for_each_cpu(cpu, mask) {
+ if (i)
+ i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
+ i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
+ if (i >= (PAGE_SIZE - 5))
+ break;
+ }
+ i += sprintf(&buf[i], "\n");
+ return i;
+}
+
+/**
+ * show_related_cpus - show the CPUs affected by each transition even if
+ * hw coordination is in use
+ */
+static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
+{
+ if (cpumask_empty(policy->related_cpus))
+ return show_cpus(policy->cpus, buf);
+ return show_cpus(policy->related_cpus, buf);
+}
+
+/**
+ * show_affected_cpus - show the CPUs affected by each transition
+ */
+static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
+{
+ return show_cpus(policy->cpus, buf);
+}
+
+static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
+ const char *buf, size_t count)
+{
+ unsigned int freq = 0;
+ unsigned int ret;
+
+ if (!policy->governor || !policy->governor->store_setspeed)
+ return -EINVAL;
+
+ ret = sscanf(buf, "%u", &freq);
+ if (ret != 1)
+ return -EINVAL;
+
+ policy->governor->store_setspeed(policy, freq);
+
+ return count;
+}
+
+static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
+{
+ if (!policy->governor || !policy->governor->show_setspeed)
+ return sprintf(buf, "<unsupported>\n");
+
+ return policy->governor->show_setspeed(policy, buf);
+}
+
+/**
+ * show_scaling_driver - show the current cpufreq HW/BIOS limitation
+ */
+static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
+{
+ unsigned int limit;
+ int ret;
+ if (cpufreq_driver->bios_limit) {
+ ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
+ if (!ret)
+ return sprintf(buf, "%u\n", limit);
+ }
+ return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
+}
+
+cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
+cpufreq_freq_attr_ro(cpuinfo_min_freq);
+cpufreq_freq_attr_ro(cpuinfo_max_freq);
+cpufreq_freq_attr_ro(cpuinfo_transition_latency);
+cpufreq_freq_attr_ro(scaling_available_governors);
+cpufreq_freq_attr_ro(scaling_driver);
+cpufreq_freq_attr_ro(scaling_cur_freq);
+cpufreq_freq_attr_ro(bios_limit);
+cpufreq_freq_attr_ro(related_cpus);
+cpufreq_freq_attr_ro(affected_cpus);
+cpufreq_freq_attr_rw(scaling_min_freq);
+cpufreq_freq_attr_rw(scaling_max_freq);
+cpufreq_freq_attr_rw(scaling_governor);
+cpufreq_freq_attr_rw(scaling_setspeed);
+
+static struct attribute *default_attrs[] = {
+ &cpuinfo_min_freq.attr,
+ &cpuinfo_max_freq.attr,
+ &cpuinfo_transition_latency.attr,
+ &scaling_min_freq.attr,
+ &scaling_max_freq.attr,
+ &affected_cpus.attr,
+ &related_cpus.attr,
+ &scaling_governor.attr,
+ &scaling_driver.attr,
+ &scaling_available_governors.attr,
+ &scaling_setspeed.attr,
+ NULL
+};
+
+struct kobject *cpufreq_global_kobject;
+EXPORT_SYMBOL(cpufreq_global_kobject);
+
+#define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
+#define to_attr(a) container_of(a, struct freq_attr, attr)
+
+static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct cpufreq_policy *policy = to_policy(kobj);
+ struct freq_attr *fattr = to_attr(attr);
+ ssize_t ret = -EINVAL;
+ policy = cpufreq_cpu_get(policy->cpu);
+ if (!policy)
+ goto no_policy;
+
+ if (lock_policy_rwsem_read(policy->cpu) < 0)
+ goto fail;
+
+ if (fattr->show)
+ ret = fattr->show(policy, buf);
+ else
+ ret = -EIO;
+
+ unlock_policy_rwsem_read(policy->cpu);
+fail:
+ cpufreq_cpu_put(policy);
+no_policy:
+ return ret;
+}
+
+static ssize_t store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cpufreq_policy *policy = to_policy(kobj);
+ struct freq_attr *fattr = to_attr(attr);
+ ssize_t ret = -EINVAL;
+ policy = cpufreq_cpu_get(policy->cpu);
+ if (!policy)
+ goto no_policy;
+
+ if (lock_policy_rwsem_write(policy->cpu) < 0)
+ goto fail;
+
+ if (fattr->store)
+ ret = fattr->store(policy, buf, count);
+ else
+ ret = -EIO;
+
+ unlock_policy_rwsem_write(policy->cpu);
+fail:
+ cpufreq_cpu_put(policy);
+no_policy:
+ return ret;
+}
+
+static void cpufreq_sysfs_release(struct kobject *kobj)
+{
+ struct cpufreq_policy *policy = to_policy(kobj);
+ pr_debug("last reference is dropped\n");
+ complete(&policy->kobj_unregister);
+}
+
+static const struct sysfs_ops sysfs_ops = {
+ .show = show,
+ .store = store,
+};
+
+static struct kobj_type ktype_cpufreq = {
+ .sysfs_ops = &sysfs_ops,
+ .default_attrs = default_attrs,
+ .release = cpufreq_sysfs_release,
+};
+
+/*
+ * Returns:
+ * Negative: Failure
+ * 0: Success
+ * Positive: When we have a managed CPU and the sysfs got symlinked
+ */
+static int cpufreq_add_dev_policy(unsigned int cpu,
+ struct cpufreq_policy *policy,
+ struct sys_device *sys_dev)
+{
+ int ret = 0;
+#ifdef CONFIG_SMP
+ unsigned long flags;
+ unsigned int j;
+#ifdef CONFIG_HOTPLUG_CPU
+ struct cpufreq_governor *gov;
+
+ gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
+ if (gov) {
+ policy->governor = gov;
+ pr_debug("Restoring governor %s for cpu %d\n",
+ policy->governor->name, cpu);
+ }
+#endif
+
+ for_each_cpu(j, policy->cpus) {
+ struct cpufreq_policy *managed_policy;
+
+ if (cpu == j)
+ continue;
+
+ /* Check for existing affected CPUs.
+ * They may not be aware of it due to CPU Hotplug.
+ * cpufreq_cpu_put is called when the device is removed
+ * in __cpufreq_remove_dev()
+ */
+ managed_policy = cpufreq_cpu_get(j);
+ if (unlikely(managed_policy)) {
+
+ /* Set proper policy_cpu */
+ unlock_policy_rwsem_write(cpu);
+ per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
+
+ if (lock_policy_rwsem_write(cpu) < 0) {
+ /* Should not go through policy unlock path */
+ if (cpufreq_driver->exit)
+ cpufreq_driver->exit(policy);
+ cpufreq_cpu_put(managed_policy);
+ return -EBUSY;
+ }
+
+ spin_lock_irqsave(&cpufreq_driver_lock, flags);
+ cpumask_copy(managed_policy->cpus, policy->cpus);
+ per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
+ pr_debug("CPU already managed, adding link\n");
+ ret = sysfs_create_link(&sys_dev->kobj,
+ &managed_policy->kobj,
+ "cpufreq");
+ if (ret)
+ cpufreq_cpu_put(managed_policy);
+ /*
+ * Success. We only needed to be added to the mask.
+ * Call driver->exit() because only the cpu parent of
+ * the kobj needed to call init().
+ */
+ if (cpufreq_driver->exit)
+ cpufreq_driver->exit(policy);
+
+ if (!ret)
+ return 1;
+ else
+ return ret;
+ }
+ }
+#endif
+ return ret;
+}
+
+
+/* symlink affected CPUs */
+static int cpufreq_add_dev_symlink(unsigned int cpu,
+ struct cpufreq_policy *policy)
+{
+ unsigned int j;
+ int ret = 0;
+
+ for_each_cpu(j, policy->cpus) {
+ struct cpufreq_policy *managed_policy;
+ struct sys_device *cpu_sys_dev;
+
+ if (j == cpu)
+ continue;
+ if (!cpu_online(j))
+ continue;
+
+ pr_debug("CPU %u already managed, adding link\n", j);
+ managed_policy = cpufreq_cpu_get(cpu);
+ cpu_sys_dev = get_cpu_sysdev(j);
+ ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
+ "cpufreq");
+ if (ret) {
+ cpufreq_cpu_put(managed_policy);
+ return ret;
+ }
+ }
+ return ret;
+}
+
+static int cpufreq_add_dev_interface(unsigned int cpu,
+ struct cpufreq_policy *policy,
+ struct sys_device *sys_dev)
+{
+ struct cpufreq_policy new_policy;
+ struct freq_attr **drv_attr;
+ unsigned long flags;
+ int ret = 0;
+ unsigned int j;
+
+ /* prepare interface data */
+ ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
+ &sys_dev->kobj, "cpufreq");
+ if (ret)
+ return ret;
+
+ /* set up files for this cpu device */
+ drv_attr = cpufreq_driver->attr;
+ while ((drv_attr) && (*drv_attr)) {
+ ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
+ if (ret)
+ goto err_out_kobj_put;
+ drv_attr++;
+ }
+ if (cpufreq_driver->get) {
+ ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
+ if (ret)
+ goto err_out_kobj_put;
+ }
+ if (cpufreq_driver->target) {
+ ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
+ if (ret)
+ goto err_out_kobj_put;
+ }
+ if (cpufreq_driver->bios_limit) {
+ ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
+ if (ret)
+ goto err_out_kobj_put;
+ }
+
+ spin_lock_irqsave(&cpufreq_driver_lock, flags);
+ for_each_cpu(j, policy->cpus) {
+ if (!cpu_online(j))
+ continue;
+ per_cpu(cpufreq_cpu_data, j) = policy;
+ per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
+ }
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
+ ret = cpufreq_add_dev_symlink(cpu, policy);
+ if (ret)
+ goto err_out_kobj_put;
+
+ memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
+ /* assure that the starting sequence is run in __cpufreq_set_policy */
+ policy->governor = NULL;
+
+ /* set default policy */
+ ret = __cpufreq_set_policy(policy, &new_policy);
+ policy->user_policy.policy = policy->policy;
+ policy->user_policy.governor = policy->governor;
+
+ if (ret) {
+ pr_debug("setting policy failed\n");
+ if (cpufreq_driver->exit)
+ cpufreq_driver->exit(policy);
+ }
+ return ret;
+
+err_out_kobj_put:
+ kobject_put(&policy->kobj);
+ wait_for_completion(&policy->kobj_unregister);
+ return ret;
+}
+
+
+/**
+ * cpufreq_add_dev - add a CPU device
+ *
+ * Adds the cpufreq interface for a CPU device.
+ *
+ * The Oracle says: try running cpufreq registration/unregistration concurrently
+ * with with cpu hotplugging and all hell will break loose. Tried to clean this
+ * mess up, but more thorough testing is needed. - Mathieu
+ */
+static int cpufreq_add_dev(struct sys_device *sys_dev)
+{
+ unsigned int cpu = sys_dev->id;
+ int ret = 0, found = 0;
+ struct cpufreq_policy *policy;
+ unsigned long flags;
+ unsigned int j;
+#ifdef CONFIG_HOTPLUG_CPU
+ int sibling;
+#endif
+
+ if (cpu_is_offline(cpu))
+ return 0;
+
+ pr_debug("adding CPU %u\n", cpu);
+
+#ifdef CONFIG_SMP
+ /* check whether a different CPU already registered this
+ * CPU because it is in the same boat. */
+ policy = cpufreq_cpu_get(cpu);
+ if (unlikely(policy)) {
+ cpufreq_cpu_put(policy);
+ return 0;
+ }
+#endif
+
+ if (!try_module_get(cpufreq_driver->owner)) {
+ ret = -EINVAL;
+ goto module_out;
+ }
+
+ ret = -ENOMEM;
+ policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
+ if (!policy)
+ goto nomem_out;
+
+ if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
+ goto err_free_policy;
+
+ if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
+ goto err_free_cpumask;
+
+ policy->cpu = cpu;
+ cpumask_copy(policy->cpus, cpumask_of(cpu));
+
+ /* Initially set CPU itself as the policy_cpu */
+ per_cpu(cpufreq_policy_cpu, cpu) = cpu;
+ ret = (lock_policy_rwsem_write(cpu) < 0);
+ WARN_ON(ret);
+
+ init_completion(&policy->kobj_unregister);
+ INIT_WORK(&policy->update, handle_update);
+
+ /* Set governor before ->init, so that driver could check it */
+#ifdef CONFIG_HOTPLUG_CPU
+ for_each_online_cpu(sibling) {
+ struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
+ if (cp && cp->governor &&
+ (cpumask_test_cpu(cpu, cp->related_cpus))) {
+ policy->governor = cp->governor;
+ found = 1;
+ break;
+ }
+ }
+#endif
+ if (!found)
+ policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+ /* call driver. From then on the cpufreq must be able
+ * to accept all calls to ->verify and ->setpolicy for this CPU
+ */
+ ret = cpufreq_driver->init(policy);
+ if (ret) {
+ pr_debug("initialization failed\n");
+ goto err_unlock_policy;
+ }
+ policy->user_policy.min = policy->min;
+ policy->user_policy.max = policy->max;
+
+ blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+ CPUFREQ_START, policy);
+
+ ret = cpufreq_add_dev_policy(cpu, policy, sys_dev);
+ if (ret) {
+ if (ret > 0)
+ /* This is a managed cpu, symlink created,
+ exit with 0 */
+ ret = 0;
+ goto err_unlock_policy;
+ }
+
+ ret = cpufreq_add_dev_interface(cpu, policy, sys_dev);
+ if (ret)
+ goto err_out_unregister;
+
+ unlock_policy_rwsem_write(cpu);
+
+ kobject_uevent(&policy->kobj, KOBJ_ADD);
+ module_put(cpufreq_driver->owner);
+ pr_debug("initialization complete\n");
+
+ return 0;
+
+
+err_out_unregister:
+ spin_lock_irqsave(&cpufreq_driver_lock, flags);
+ for_each_cpu(j, policy->cpus)
+ per_cpu(cpufreq_cpu_data, j) = NULL;
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
+ kobject_put(&policy->kobj);
+ wait_for_completion(&policy->kobj_unregister);
+
+err_unlock_policy:
+ unlock_policy_rwsem_write(cpu);
+ free_cpumask_var(policy->related_cpus);
+err_free_cpumask:
+ free_cpumask_var(policy->cpus);
+err_free_policy:
+ kfree(policy);
+nomem_out:
+ module_put(cpufreq_driver->owner);
+module_out:
+ return ret;
+}
+
+
+/**
+ * __cpufreq_remove_dev - remove a CPU device
+ *
+ * Removes the cpufreq interface for a CPU device.
+ * Caller should already have policy_rwsem in write mode for this CPU.
+ * This routine frees the rwsem before returning.
+ */
+static int __cpufreq_remove_dev(struct sys_device *sys_dev)
+{
+ unsigned int cpu = sys_dev->id;
+ unsigned long flags;
+ struct cpufreq_policy *data;
+ struct kobject *kobj;
+ struct completion *cmp;
+#ifdef CONFIG_SMP
+ struct sys_device *cpu_sys_dev;
+ unsigned int j;
+#endif
+
+ pr_debug("unregistering CPU %u\n", cpu);
+
+ spin_lock_irqsave(&cpufreq_driver_lock, flags);
+ data = per_cpu(cpufreq_cpu_data, cpu);
+
+ if (!data) {
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+ unlock_policy_rwsem_write(cpu);
+ return -EINVAL;
+ }
+ per_cpu(cpufreq_cpu_data, cpu) = NULL;
+
+
+#ifdef CONFIG_SMP
+ /* if this isn't the CPU which is the parent of the kobj, we
+ * only need to unlink, put and exit
+ */
+ if (unlikely(cpu != data->cpu)) {
+ pr_debug("removing link\n");
+ cpumask_clear_cpu(cpu, data->cpus);
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+ kobj = &sys_dev->kobj;
+ cpufreq_cpu_put(data);
+ unlock_policy_rwsem_write(cpu);
+ sysfs_remove_link(kobj, "cpufreq");
+ return 0;
+ }
+#endif
+
+#ifdef CONFIG_SMP
+
+#ifdef CONFIG_HOTPLUG_CPU
+ strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
+ CPUFREQ_NAME_LEN);
+#endif
+
+ /* if we have other CPUs still registered, we need to unlink them,
+ * or else wait_for_completion below will lock up. Clean the
+ * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
+ * the sysfs links afterwards.
+ */
+ if (unlikely(cpumask_weight(data->cpus) > 1)) {
+ for_each_cpu(j, data->cpus) {
+ if (j == cpu)
+ continue;
+ per_cpu(cpufreq_cpu_data, j) = NULL;
+ }
+ }
+
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
+ if (unlikely(cpumask_weight(data->cpus) > 1)) {
+ for_each_cpu(j, data->cpus) {
+ if (j == cpu)
+ continue;
+ pr_debug("removing link for cpu %u\n", j);
+#ifdef CONFIG_HOTPLUG_CPU
+ strncpy(per_cpu(cpufreq_cpu_governor, j),
+ data->governor->name, CPUFREQ_NAME_LEN);
+#endif
+ cpu_sys_dev = get_cpu_sysdev(j);
+ kobj = &cpu_sys_dev->kobj;
+ unlock_policy_rwsem_write(cpu);
+ sysfs_remove_link(kobj, "cpufreq");
+ lock_policy_rwsem_write(cpu);
+ cpufreq_cpu_put(data);
+ }
+ }
+#else
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+#endif
+
+ if (cpufreq_driver->target)
+ __cpufreq_governor(data, CPUFREQ_GOV_STOP);
+
+ kobj = &data->kobj;
+ cmp = &data->kobj_unregister;
+ unlock_policy_rwsem_write(cpu);
+ kobject_put(kobj);
+
+ /* we need to make sure that the underlying kobj is actually
+ * not referenced anymore by anybody before we proceed with
+ * unloading.
+ */
+ pr_debug("waiting for dropping of refcount\n");
+ wait_for_completion(cmp);
+ pr_debug("wait complete\n");
+
+ lock_policy_rwsem_write(cpu);
+ if (cpufreq_driver->exit)
+ cpufreq_driver->exit(data);
+ unlock_policy_rwsem_write(cpu);
+
+#ifdef CONFIG_HOTPLUG_CPU
+ /* when the CPU which is the parent of the kobj is hotplugged
+ * offline, check for siblings, and create cpufreq sysfs interface
+ * and symlinks
+ */
+ if (unlikely(cpumask_weight(data->cpus) > 1)) {
+ /* first sibling now owns the new sysfs dir */
+ cpumask_clear_cpu(cpu, data->cpus);
+ cpufreq_add_dev(get_cpu_sysdev(cpumask_first(data->cpus)));
+
+ /* finally remove our own symlink */
+ lock_policy_rwsem_write(cpu);
+ __cpufreq_remove_dev(sys_dev);
+ }
+#endif
+
+ free_cpumask_var(data->related_cpus);
+ free_cpumask_var(data->cpus);
+ kfree(data);
+
+ return 0;
+}
+
+
+static int cpufreq_remove_dev(struct sys_device *sys_dev)
+{
+ unsigned int cpu = sys_dev->id;
+ int retval;
+
+ if (cpu_is_offline(cpu))
+ return 0;
+
+ if (unlikely(lock_policy_rwsem_write(cpu)))
+ BUG();
+
+ retval = __cpufreq_remove_dev(sys_dev);
+ return retval;
+}
+
+
+static void handle_update(struct work_struct *work)
+{
+ struct cpufreq_policy *policy =
+ container_of(work, struct cpufreq_policy, update);
+ unsigned int cpu = policy->cpu;
+ pr_debug("handle_update for cpu %u called\n", cpu);
+ cpufreq_update_policy(cpu);
+}
+
+/**
+ * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
+ * @cpu: cpu number
+ * @old_freq: CPU frequency the kernel thinks the CPU runs at
+ * @new_freq: CPU frequency the CPU actually runs at
+ *
+ * We adjust to current frequency first, and need to clean up later.
+ * So either call to cpufreq_update_policy() or schedule handle_update()).
+ */
+static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
+ unsigned int new_freq)
+{
+ struct cpufreq_freqs freqs;
+
+ pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
+ "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
+
+ freqs.cpu = cpu;
+ freqs.old = old_freq;
+ freqs.new = new_freq;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+}
+
+
+/**
+ * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
+ * @cpu: CPU number
+ *
+ * This is the last known freq, without actually getting it from the driver.
+ * Return value will be same as what is shown in scaling_cur_freq in sysfs.
+ */
+unsigned int cpufreq_quick_get(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ unsigned int ret_freq = 0;
+
+ if (policy) {
+ ret_freq = policy->cur;
+ cpufreq_cpu_put(policy);
+ }
+
+ return ret_freq;
+}
+EXPORT_SYMBOL(cpufreq_quick_get);
+
+
+static unsigned int __cpufreq_get(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
+ unsigned int ret_freq = 0;
+
+ if (!cpufreq_driver->get)
+ return ret_freq;
+
+ ret_freq = cpufreq_driver->get(cpu);
+
+ if (ret_freq && policy->cur &&
+ !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
+ /* verify no discrepancy between actual and
+ saved value exists */
+ if (unlikely(ret_freq != policy->cur)) {
+ cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
+ schedule_work(&policy->update);
+ }
+ }
+
+ return ret_freq;
+}
+
+/**
+ * cpufreq_get - get the current CPU frequency (in kHz)
+ * @cpu: CPU number
+ *
+ * Get the CPU current (static) CPU frequency
+ */
+unsigned int cpufreq_get(unsigned int cpu)
+{
+ unsigned int ret_freq = 0;
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+
+ if (!policy)
+ goto out;
+
+ if (unlikely(lock_policy_rwsem_read(cpu)))
+ goto out_policy;
+
+ ret_freq = __cpufreq_get(cpu);
+
+ unlock_policy_rwsem_read(cpu);
+
+out_policy:
+ cpufreq_cpu_put(policy);
+out:
+ return ret_freq;
+}
+EXPORT_SYMBOL(cpufreq_get);
+
+static struct sysdev_driver cpufreq_sysdev_driver = {
+ .add = cpufreq_add_dev,
+ .remove = cpufreq_remove_dev,
+};
+
+
+/**
+ * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
+ *
+ * This function is only executed for the boot processor. The other CPUs
+ * have been put offline by means of CPU hotplug.
+ */
+static int cpufreq_bp_suspend(void)
+{
+ int ret = 0;
+
+ int cpu = smp_processor_id();
+ struct cpufreq_policy *cpu_policy;
+
+ pr_debug("suspending cpu %u\n", cpu);
+
+ /* If there's no policy for the boot CPU, we have nothing to do. */
+ cpu_policy = cpufreq_cpu_get(cpu);
+ if (!cpu_policy)
+ return 0;
+
+ if (cpufreq_driver->suspend) {
+ ret = cpufreq_driver->suspend(cpu_policy);
+ if (ret)
+ printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
+ "step on CPU %u\n", cpu_policy->cpu);
+ }
+
+ cpufreq_cpu_put(cpu_policy);
+ return ret;
+}
+
+/**
+ * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
+ *
+ * 1.) resume CPUfreq hardware support (cpufreq_driver->resume())
+ * 2.) schedule call cpufreq_update_policy() ASAP as interrupts are
+ * restored. It will verify that the current freq is in sync with
+ * what we believe it to be. This is a bit later than when it
+ * should be, but nonethteless it's better than calling
+ * cpufreq_driver->get() here which might re-enable interrupts...
+ *
+ * This function is only executed for the boot CPU. The other CPUs have not
+ * been turned on yet.
+ */
+static void cpufreq_bp_resume(void)
+{
+ int ret = 0;
+
+ int cpu = smp_processor_id();
+ struct cpufreq_policy *cpu_policy;
+
+ pr_debug("resuming cpu %u\n", cpu);
+
+ /* If there's no policy for the boot CPU, we have nothing to do. */
+ cpu_policy = cpufreq_cpu_get(cpu);
+ if (!cpu_policy)
+ return;
+
+ if (cpufreq_driver->resume) {
+ ret = cpufreq_driver->resume(cpu_policy);
+ if (ret) {
+ printk(KERN_ERR "cpufreq: resume failed in ->resume "
+ "step on CPU %u\n", cpu_policy->cpu);
+ goto fail;
+ }
+ }
+
+ schedule_work(&cpu_policy->update);
+
+fail:
+ cpufreq_cpu_put(cpu_policy);
+}
+
+static struct syscore_ops cpufreq_syscore_ops = {
+ .suspend = cpufreq_bp_suspend,
+ .resume = cpufreq_bp_resume,
+};
+
+
+/*********************************************************************
+ * NOTIFIER LISTS INTERFACE *
+ *********************************************************************/
+
+/**
+ * cpufreq_register_notifier - register a driver with cpufreq
+ * @nb: notifier function to register
+ * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
+ *
+ * Add a driver to one of two lists: either a list of drivers that
+ * are notified about clock rate changes (once before and once after
+ * the transition), or a list of drivers that are notified about
+ * changes in cpufreq policy.
+ *
+ * This function may sleep, and has the same return conditions as
+ * blocking_notifier_chain_register.
+ */
+int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
+{
+ int ret;
+
+ WARN_ON(!init_cpufreq_transition_notifier_list_called);
+
+ switch (list) {
+ case CPUFREQ_TRANSITION_NOTIFIER:
+ ret = srcu_notifier_chain_register(
+ &cpufreq_transition_notifier_list, nb);
+ break;
+ case CPUFREQ_POLICY_NOTIFIER:
+ ret = blocking_notifier_chain_register(
+ &cpufreq_policy_notifier_list, nb);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(cpufreq_register_notifier);
+
+
+/**
+ * cpufreq_unregister_notifier - unregister a driver with cpufreq
+ * @nb: notifier block to be unregistered
+ * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
+ *
+ * Remove a driver from the CPU frequency notifier list.
+ *
+ * This function may sleep, and has the same return conditions as
+ * blocking_notifier_chain_unregister.
+ */
+int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
+{
+ int ret;
+
+ switch (list) {
+ case CPUFREQ_TRANSITION_NOTIFIER:
+ ret = srcu_notifier_chain_unregister(
+ &cpufreq_transition_notifier_list, nb);
+ break;
+ case CPUFREQ_POLICY_NOTIFIER:
+ ret = blocking_notifier_chain_unregister(
+ &cpufreq_policy_notifier_list, nb);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(cpufreq_unregister_notifier);
+
+
+/*********************************************************************
+ * GOVERNORS *
+ *********************************************************************/
+
+
+int __cpufreq_driver_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ int retval = -EINVAL;
+
+ pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
+ target_freq, relation);
+ if (cpu_online(policy->cpu) && cpufreq_driver->target)
+ retval = cpufreq_driver->target(policy, target_freq, relation);
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
+
+int cpufreq_driver_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ int ret = -EINVAL;
+
+ policy = cpufreq_cpu_get(policy->cpu);
+ if (!policy)
+ goto no_policy;
+
+ if (unlikely(lock_policy_rwsem_write(policy->cpu)))
+ goto fail;
+
+ ret = __cpufreq_driver_target(policy, target_freq, relation);
+
+ unlock_policy_rwsem_write(policy->cpu);
+
+fail:
+ cpufreq_cpu_put(policy);
+no_policy:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cpufreq_driver_target);
+
+int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
+{
+ int ret = 0;
+
+ policy = cpufreq_cpu_get(policy->cpu);
+ if (!policy)
+ return -EINVAL;
+
+ if (cpu_online(cpu) && cpufreq_driver->getavg)
+ ret = cpufreq_driver->getavg(policy, cpu);
+
+ cpufreq_cpu_put(policy);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
+
+/*
+ * when "event" is CPUFREQ_GOV_LIMITS
+ */
+
+static int __cpufreq_governor(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ int ret;
+
+ /* Only must be defined when default governor is known to have latency
+ restrictions, like e.g. conservative or ondemand.
+ That this is the case is already ensured in Kconfig
+ */
+#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
+ struct cpufreq_governor *gov = &cpufreq_gov_performance;
+#else
+ struct cpufreq_governor *gov = NULL;
+#endif
+
+ if (policy->governor->max_transition_latency &&
+ policy->cpuinfo.transition_latency >
+ policy->governor->max_transition_latency) {
+ if (!gov)
+ return -EINVAL;
+ else {
+ printk(KERN_WARNING "%s governor failed, too long"
+ " transition latency of HW, fallback"
+ " to %s governor\n",
+ policy->governor->name,
+ gov->name);
+ policy->governor = gov;
+ }
+ }
+
+ if (!try_module_get(policy->governor->owner))
+ return -EINVAL;
+
+ pr_debug("__cpufreq_governor for CPU %u, event %u\n",
+ policy->cpu, event);
+ ret = policy->governor->governor(policy, event);
+
+ /* we keep one module reference alive for
+ each CPU governed by this CPU */
+ if ((event != CPUFREQ_GOV_START) || ret)
+ module_put(policy->governor->owner);
+ if ((event == CPUFREQ_GOV_STOP) && !ret)
+ module_put(policy->governor->owner);
+
+ return ret;
+}
+
+
+int cpufreq_register_governor(struct cpufreq_governor *governor)
+{
+ int err;
+
+ if (!governor)
+ return -EINVAL;
+
+ mutex_lock(&cpufreq_governor_mutex);
+
+ err = -EBUSY;
+ if (__find_governor(governor->name) == NULL) {
+ err = 0;
+ list_add(&governor->governor_list, &cpufreq_governor_list);
+ }
+
+ mutex_unlock(&cpufreq_governor_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(cpufreq_register_governor);
+
+
+void cpufreq_unregister_governor(struct cpufreq_governor *governor)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ int cpu;
+#endif
+
+ if (!governor)
+ return;
+
+#ifdef CONFIG_HOTPLUG_CPU
+ for_each_present_cpu(cpu) {
+ if (cpu_online(cpu))
+ continue;
+ if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
+ strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
+ }
+#endif
+
+ mutex_lock(&cpufreq_governor_mutex);
+ list_del(&governor->governor_list);
+ mutex_unlock(&cpufreq_governor_mutex);
+ return;
+}
+EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
+
+
+
+/*********************************************************************
+ * POLICY INTERFACE *
+ *********************************************************************/
+
+/**
+ * cpufreq_get_policy - get the current cpufreq_policy
+ * @policy: struct cpufreq_policy into which the current cpufreq_policy
+ * is written
+ *
+ * Reads the current cpufreq policy.
+ */
+int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
+{
+ struct cpufreq_policy *cpu_policy;
+ if (!policy)
+ return -EINVAL;
+
+ cpu_policy = cpufreq_cpu_get(cpu);
+ if (!cpu_policy)
+ return -EINVAL;
+
+ memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
+
+ cpufreq_cpu_put(cpu_policy);
+ return 0;
+}
+EXPORT_SYMBOL(cpufreq_get_policy);
+
+
+/*
+ * data : current policy.
+ * policy : policy to be set.
+ */
+static int __cpufreq_set_policy(struct cpufreq_policy *data,
+ struct cpufreq_policy *policy)
+{
+ int ret = 0;
+
+ pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
+ policy->min, policy->max);
+
+ memcpy(&policy->cpuinfo, &data->cpuinfo,
+ sizeof(struct cpufreq_cpuinfo));
+
+ if (policy->min > data->max || policy->max < data->min) {
+ ret = -EINVAL;
+ goto error_out;
+ }
+
+ /* verify the cpu speed can be set within this limit */
+ ret = cpufreq_driver->verify(policy);
+ if (ret)
+ goto error_out;
+
+ /* adjust if necessary - all reasons */
+ blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+ CPUFREQ_ADJUST, policy);
+
+ /* adjust if necessary - hardware incompatibility*/
+ blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+ CPUFREQ_INCOMPATIBLE, policy);
+
+ /* verify the cpu speed can be set within this limit,
+ which might be different to the first one */
+ ret = cpufreq_driver->verify(policy);
+ if (ret)
+ goto error_out;
+
+ /* notification of the new policy */
+ blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+ CPUFREQ_NOTIFY, policy);
+
+ data->min = policy->min;
+ data->max = policy->max;
+
+ pr_debug("new min and max freqs are %u - %u kHz\n",
+ data->min, data->max);
+
+ if (cpufreq_driver->setpolicy) {
+ data->policy = policy->policy;
+ pr_debug("setting range\n");
+ ret = cpufreq_driver->setpolicy(policy);
+ } else {
+ if (policy->governor != data->governor) {
+ /* save old, working values */
+ struct cpufreq_governor *old_gov = data->governor;
+
+ pr_debug("governor switch\n");
+
+ /* end old governor */
+ if (data->governor)
+ __cpufreq_governor(data, CPUFREQ_GOV_STOP);
+
+ /* start new governor */
+ data->governor = policy->governor;
+ if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
+ /* new governor failed, so re-start old one */
+ pr_debug("starting governor %s failed\n",
+ data->governor->name);
+ if (old_gov) {
+ data->governor = old_gov;
+ __cpufreq_governor(data,
+ CPUFREQ_GOV_START);
+ }
+ ret = -EINVAL;
+ goto error_out;
+ }
+ /* might be a policy change, too, so fall through */
+ }
+ pr_debug("governor: change or update limits\n");
+ __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
+ }
+
+error_out:
+ return ret;
+}
+
+/**
+ * cpufreq_update_policy - re-evaluate an existing cpufreq policy
+ * @cpu: CPU which shall be re-evaluated
+ *
+ * Useful for policy notifiers which have different necessities
+ * at different times.
+ */
+int cpufreq_update_policy(unsigned int cpu)
+{
+ struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
+ struct cpufreq_policy policy;
+ int ret;
+
+ if (!data) {
+ ret = -ENODEV;
+ goto no_policy;
+ }
+
+ if (unlikely(lock_policy_rwsem_write(cpu))) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ pr_debug("updating policy for CPU %u\n", cpu);
+ memcpy(&policy, data, sizeof(struct cpufreq_policy));
+ policy.min = data->user_policy.min;
+ policy.max = data->user_policy.max;
+ policy.policy = data->user_policy.policy;
+ policy.governor = data->user_policy.governor;
+
+ /* BIOS might change freq behind our back
+ -> ask driver for current freq and notify governors about a change */
+ if (cpufreq_driver->get) {
+ policy.cur = cpufreq_driver->get(cpu);
+ if (!data->cur) {
+ pr_debug("Driver did not initialize current freq");
+ data->cur = policy.cur;
+ } else {
+ if (data->cur != policy.cur)
+ cpufreq_out_of_sync(cpu, data->cur,
+ policy.cur);
+ }
+ }
+
+ ret = __cpufreq_set_policy(data, &policy);
+
+ unlock_policy_rwsem_write(cpu);
+
+fail:
+ cpufreq_cpu_put(data);
+no_policy:
+ return ret;
+}
+EXPORT_SYMBOL(cpufreq_update_policy);
+
+static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ struct sys_device *sys_dev;
+
+ sys_dev = get_cpu_sysdev(cpu);
+ if (sys_dev) {
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ cpufreq_add_dev(sys_dev);
+ break;
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ if (unlikely(lock_policy_rwsem_write(cpu)))
+ BUG();
+
+ __cpufreq_remove_dev(sys_dev);
+ break;
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
+ cpufreq_add_dev(sys_dev);
+ break;
+ }
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __refdata cpufreq_cpu_notifier = {
+ .notifier_call = cpufreq_cpu_callback,
+};
+
+/*********************************************************************
+ * REGISTER / UNREGISTER CPUFREQ DRIVER *
+ *********************************************************************/
+
+/**
+ * cpufreq_register_driver - register a CPU Frequency driver
+ * @driver_data: A struct cpufreq_driver containing the values#
+ * submitted by the CPU Frequency driver.
+ *
+ * Registers a CPU Frequency driver to this core code. This code
+ * returns zero on success, -EBUSY when another driver got here first
+ * (and isn't unregistered in the meantime).
+ *
+ */
+int cpufreq_register_driver(struct cpufreq_driver *driver_data)
+{
+ unsigned long flags;
+ int ret;
+
+ if (!driver_data || !driver_data->verify || !driver_data->init ||
+ ((!driver_data->setpolicy) && (!driver_data->target)))
+ return -EINVAL;
+
+ pr_debug("trying to register driver %s\n", driver_data->name);
+
+ if (driver_data->setpolicy)
+ driver_data->flags |= CPUFREQ_CONST_LOOPS;
+
+ spin_lock_irqsave(&cpufreq_driver_lock, flags);
+ if (cpufreq_driver) {
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+ return -EBUSY;
+ }
+ cpufreq_driver = driver_data;
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
+ ret = sysdev_driver_register(&cpu_sysdev_class,
+ &cpufreq_sysdev_driver);
+ if (ret)
+ goto err_null_driver;
+
+ if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
+ int i;
+ ret = -ENODEV;
+
+ /* check for at least one working CPU */
+ for (i = 0; i < nr_cpu_ids; i++)
+ if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
+ ret = 0;
+ break;
+ }
+
+ /* if all ->init() calls failed, unregister */
+ if (ret) {
+ pr_debug("no CPU initialized for driver %s\n",
+ driver_data->name);
+ goto err_sysdev_unreg;
+ }
+ }
+
+ register_hotcpu_notifier(&cpufreq_cpu_notifier);
+ pr_debug("driver %s up and running\n", driver_data->name);
+
+ return 0;
+err_sysdev_unreg:
+ sysdev_driver_unregister(&cpu_sysdev_class,
+ &cpufreq_sysdev_driver);
+err_null_driver:
+ spin_lock_irqsave(&cpufreq_driver_lock, flags);
+ cpufreq_driver = NULL;
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cpufreq_register_driver);
+
+
+/**
+ * cpufreq_unregister_driver - unregister the current CPUFreq driver
+ *
+ * Unregister the current CPUFreq driver. Only call this if you have
+ * the right to do so, i.e. if you have succeeded in initialising before!
+ * Returns zero if successful, and -EINVAL if the cpufreq_driver is
+ * currently not initialised.
+ */
+int cpufreq_unregister_driver(struct cpufreq_driver *driver)
+{
+ unsigned long flags;
+
+ if (!cpufreq_driver || (driver != cpufreq_driver))
+ return -EINVAL;
+
+ pr_debug("unregistering driver %s\n", driver->name);
+
+ sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
+ unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
+
+ spin_lock_irqsave(&cpufreq_driver_lock, flags);
+ cpufreq_driver = NULL;
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
+
+static int __init cpufreq_core_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ per_cpu(cpufreq_policy_cpu, cpu) = -1;
+ init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
+ }
+
+ cpufreq_global_kobject = kobject_create_and_add("cpufreq",
+ &cpu_sysdev_class.kset.kobj);
+ BUG_ON(!cpufreq_global_kobject);
+ register_syscore_ops(&cpufreq_syscore_ops);
+
+ return 0;
+}
+core_initcall(cpufreq_core_init);
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
new file mode 100644
index 00000000..fbd15688
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -0,0 +1,630 @@
+/*
+ * drivers/cpufreq/cpufreq_conservative.c
+ *
+ * Copyright (C) 2001 Russell King
+ * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
+ * Jun Nakajima <jun.nakajima@intel.com>
+ * (C) 2009 Alexander Clouter <alex@digriz.org.uk>
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/cpu.h>
+#include <linux/jiffies.h>
+#include <linux/kernel_stat.h>
+#include <linux/mutex.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
+
+/*
+ * dbs is used in this file as a shortform for demandbased switching
+ * It helps to keep variable names smaller, simpler
+ */
+
+#define DEF_FREQUENCY_UP_THRESHOLD (80)
+#define DEF_FREQUENCY_DOWN_THRESHOLD (20)
+
+/*
+ * The polling frequency of this governor depends on the capability of
+ * the processor. Default polling frequency is 1000 times the transition
+ * latency of the processor. The governor will work on any processor with
+ * transition latency <= 10mS, using appropriate sampling
+ * rate.
+ * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
+ * this governor will not work.
+ * All times here are in uS.
+ */
+#define MIN_SAMPLING_RATE_RATIO (2)
+
+static unsigned int min_sampling_rate;
+
+#define LATENCY_MULTIPLIER (1000)
+#define MIN_LATENCY_MULTIPLIER (100)
+#define DEF_SAMPLING_DOWN_FACTOR (1)
+#define MAX_SAMPLING_DOWN_FACTOR (10)
+#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000)
+
+static void do_dbs_timer(struct work_struct *work);
+
+struct cpu_dbs_info_s {
+ cputime64_t prev_cpu_idle;
+ cputime64_t prev_cpu_wall;
+ cputime64_t prev_cpu_nice;
+ struct cpufreq_policy *cur_policy;
+ struct delayed_work work;
+ unsigned int down_skip;
+ unsigned int requested_freq;
+ int cpu;
+ unsigned int enable:1;
+ /*
+ * percpu mutex that serializes governor limit change with
+ * do_dbs_timer invocation. We do not want do_dbs_timer to run
+ * when user is changing the governor or limits.
+ */
+ struct mutex timer_mutex;
+};
+static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info);
+
+static unsigned int dbs_enable; /* number of CPUs using this policy */
+
+/*
+ * dbs_mutex protects dbs_enable in governor start/stop.
+ */
+static DEFINE_MUTEX(dbs_mutex);
+
+static struct dbs_tuners {
+ unsigned int sampling_rate;
+ unsigned int sampling_down_factor;
+ unsigned int up_threshold;
+ unsigned int down_threshold;
+ unsigned int ignore_nice;
+ unsigned int freq_step;
+} dbs_tuners_ins = {
+ .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
+ .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD,
+ .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
+ .ignore_nice = 0,
+ .freq_step = 5,
+};
+
+static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
+ cputime64_t *wall)
+{
+ cputime64_t idle_time;
+ cputime64_t cur_wall_time;
+ cputime64_t busy_time;
+
+ cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+ busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
+ kstat_cpu(cpu).cpustat.system);
+
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
+
+ idle_time = cputime64_sub(cur_wall_time, busy_time);
+ if (wall)
+ *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
+
+ return (cputime64_t)jiffies_to_usecs(idle_time);
+}
+
+static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
+{
+ u64 idle_time = get_cpu_idle_time_us(cpu, wall);
+
+ if (idle_time == -1ULL)
+ return get_cpu_idle_time_jiffy(cpu, wall);
+
+ return idle_time;
+}
+
+/* keep track of frequency transitions */
+static int
+dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cs_cpu_dbs_info,
+ freq->cpu);
+
+ struct cpufreq_policy *policy;
+
+ if (!this_dbs_info->enable)
+ return 0;
+
+ policy = this_dbs_info->cur_policy;
+
+ /*
+ * we only care if our internally tracked freq moves outside
+ * the 'valid' ranges of freqency available to us otherwise
+ * we do not change it
+ */
+ if (this_dbs_info->requested_freq > policy->max
+ || this_dbs_info->requested_freq < policy->min)
+ this_dbs_info->requested_freq = freq->new;
+
+ return 0;
+}
+
+static struct notifier_block dbs_cpufreq_notifier_block = {
+ .notifier_call = dbs_cpufreq_notifier
+};
+
+/************************** sysfs interface ************************/
+static ssize_t show_sampling_rate_min(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", min_sampling_rate);
+}
+
+define_one_global_ro(sampling_rate_min);
+
+/* cpufreq_conservative Governor Tunables */
+#define show_one(file_name, object) \
+static ssize_t show_##file_name \
+(struct kobject *kobj, struct attribute *attr, char *buf) \
+{ \
+ return sprintf(buf, "%u\n", dbs_tuners_ins.object); \
+}
+show_one(sampling_rate, sampling_rate);
+show_one(sampling_down_factor, sampling_down_factor);
+show_one(up_threshold, up_threshold);
+show_one(down_threshold, down_threshold);
+show_one(ignore_nice_load, ignore_nice);
+show_one(freq_step, freq_step);
+
+static ssize_t store_sampling_down_factor(struct kobject *a,
+ struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+
+ if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
+ return -EINVAL;
+
+ dbs_tuners_ins.sampling_down_factor = input;
+ return count;
+}
+
+static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+
+ if (ret != 1)
+ return -EINVAL;
+
+ dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
+ return count;
+}
+
+static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+
+ if (ret != 1 || input > 100 ||
+ input <= dbs_tuners_ins.down_threshold)
+ return -EINVAL;
+
+ dbs_tuners_ins.up_threshold = input;
+ return count;
+}
+
+static ssize_t store_down_threshold(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+
+ /* cannot be lower than 11 otherwise freq will not fall */
+ if (ret != 1 || input < 11 || input > 100 ||
+ input >= dbs_tuners_ins.up_threshold)
+ return -EINVAL;
+
+ dbs_tuners_ins.down_threshold = input;
+ return count;
+}
+
+static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+
+ unsigned int j;
+
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+
+ if (input > 1)
+ input = 1;
+
+ if (input == dbs_tuners_ins.ignore_nice) /* nothing to do */
+ return count;
+
+ dbs_tuners_ins.ignore_nice = input;
+
+ /* we need to re-evaluate prev_cpu_idle */
+ for_each_online_cpu(j) {
+ struct cpu_dbs_info_s *dbs_info;
+ dbs_info = &per_cpu(cs_cpu_dbs_info, j);
+ dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
+ &dbs_info->prev_cpu_wall);
+ if (dbs_tuners_ins.ignore_nice)
+ dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+ }
+ return count;
+}
+
+static ssize_t store_freq_step(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+
+ if (ret != 1)
+ return -EINVAL;
+
+ if (input > 100)
+ input = 100;
+
+ /* no need to test here if freq_step is zero as the user might actually
+ * want this, they would be crazy though :) */
+ dbs_tuners_ins.freq_step = input;
+ return count;
+}
+
+define_one_global_rw(sampling_rate);
+define_one_global_rw(sampling_down_factor);
+define_one_global_rw(up_threshold);
+define_one_global_rw(down_threshold);
+define_one_global_rw(ignore_nice_load);
+define_one_global_rw(freq_step);
+
+static struct attribute *dbs_attributes[] = {
+ &sampling_rate_min.attr,
+ &sampling_rate.attr,
+ &sampling_down_factor.attr,
+ &up_threshold.attr,
+ &down_threshold.attr,
+ &ignore_nice_load.attr,
+ &freq_step.attr,
+ NULL
+};
+
+static struct attribute_group dbs_attr_group = {
+ .attrs = dbs_attributes,
+ .name = "conservative",
+};
+
+/************************** sysfs end ************************/
+
+static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
+{
+ unsigned int load = 0;
+ unsigned int max_load = 0;
+ unsigned int freq_target;
+
+ struct cpufreq_policy *policy;
+ unsigned int j;
+
+ policy = this_dbs_info->cur_policy;
+
+ /*
+ * Every sampling_rate, we check, if current idle time is less
+ * than 20% (default), then we try to increase frequency
+ * Every sampling_rate*sampling_down_factor, we check, if current
+ * idle time is more than 80%, then we try to decrease frequency
+ *
+ * Any frequency increase takes it to the maximum frequency.
+ * Frequency reduction happens at minimum steps of
+ * 5% (default) of maximum frequency
+ */
+
+ /* Get Absolute Load */
+ for_each_online_cpu(j) {
+ struct cpu_dbs_info_s *j_dbs_info;
+ cputime64_t cur_wall_time, cur_idle_time;
+ unsigned int idle_time, wall_time;
+
+ j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);
+
+ cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
+
+ wall_time = (unsigned int) cputime64_sub(cur_wall_time,
+ j_dbs_info->prev_cpu_wall);
+ j_dbs_info->prev_cpu_wall = cur_wall_time;
+
+ idle_time = (unsigned int) cputime64_sub(cur_idle_time,
+ j_dbs_info->prev_cpu_idle);
+ j_dbs_info->prev_cpu_idle = cur_idle_time;
+
+ if (dbs_tuners_ins.ignore_nice) {
+ cputime64_t cur_nice;
+ unsigned long cur_nice_jiffies;
+
+ cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
+ j_dbs_info->prev_cpu_nice);
+ /*
+ * Assumption: nice time between sampling periods will
+ * be less than 2^32 jiffies for 32 bit sys
+ */
+ cur_nice_jiffies = (unsigned long)
+ cputime64_to_jiffies64(cur_nice);
+
+ j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+ idle_time += jiffies_to_usecs(cur_nice_jiffies);
+ }
+
+ if (unlikely(!wall_time || wall_time < idle_time))
+ continue;
+
+ load = 100 * (wall_time - idle_time) / wall_time;
+
+ if (load > max_load)
+ max_load = load;
+ }
+
+ /*
+ * break out if we 'cannot' reduce the speed as the user might
+ * want freq_step to be zero
+ */
+ if (dbs_tuners_ins.freq_step == 0)
+ return;
+
+ /* Check for frequency increase */
+ if (max_load > dbs_tuners_ins.up_threshold) {
+ this_dbs_info->down_skip = 0;
+
+ /* if we are already at full speed then break out early */
+ if (this_dbs_info->requested_freq == policy->max)
+ return;
+
+#if 1
+ this_dbs_info->requested_freq = policy->max;
+#else
+ freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;
+
+ /* max freq cannot be less than 100. But who knows.... */
+ if (unlikely(freq_target == 0))
+ freq_target = 5;
+
+ this_dbs_info->requested_freq += freq_target;
+ if (this_dbs_info->requested_freq > policy->max)
+ this_dbs_info->requested_freq = policy->max;
+#endif
+
+ __cpufreq_driver_target(policy, this_dbs_info->requested_freq,
+ CPUFREQ_RELATION_H);
+ return;
+ }
+
+ /*
+ * The optimal frequency is the frequency that is the lowest that
+ * can support the current CPU usage without triggering the up
+ * policy. To be safe, we focus 10 points under the threshold.
+ */
+ if (max_load < (dbs_tuners_ins.down_threshold - 10)) {
+ freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;
+
+ this_dbs_info->requested_freq -= freq_target;
+ if (this_dbs_info->requested_freq < policy->min)
+ this_dbs_info->requested_freq = policy->min;
+
+ /*
+ * if we cannot reduce the frequency anymore, break out early
+ */
+ if (policy->cur == policy->min)
+ return;
+
+ __cpufreq_driver_target(policy, this_dbs_info->requested_freq,
+ CPUFREQ_RELATION_H);
+ return;
+ }
+}
+
+static void do_dbs_timer(struct work_struct *work)
+{
+ struct cpu_dbs_info_s *dbs_info =
+ container_of(work, struct cpu_dbs_info_s, work.work);
+ unsigned int cpu = dbs_info->cpu;
+
+ /* We want all CPUs to do sampling nearly on same jiffy */
+ int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+
+ delay -= jiffies % delay;
+
+ mutex_lock(&dbs_info->timer_mutex);
+
+ dbs_check_cpu(dbs_info);
+
+ schedule_delayed_work_on(cpu, &dbs_info->work, delay);
+ mutex_unlock(&dbs_info->timer_mutex);
+}
+
+static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
+{
+ /* We want all CPUs to do sampling nearly on same jiffy */
+ int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+ delay -= jiffies % delay;
+
+ dbs_info->enable = 1;
+ INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
+ schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
+}
+
+static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
+{
+ dbs_info->enable = 0;
+ cancel_delayed_work_sync(&dbs_info->work);
+}
+
+static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ unsigned int cpu = policy->cpu;
+ struct cpu_dbs_info_s *this_dbs_info;
+ unsigned int j;
+ int rc;
+
+ this_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu);
+
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ if ((!cpu_online(cpu)) || (!policy->cur))
+ return -EINVAL;
+
+ mutex_lock(&dbs_mutex);
+
+ for_each_online_cpu(j) {
+ struct cpu_dbs_info_s *j_dbs_info;
+ j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);
+ j_dbs_info->cur_policy = policy;
+
+ j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
+ &j_dbs_info->prev_cpu_wall);
+ if (dbs_tuners_ins.ignore_nice) {
+ j_dbs_info->prev_cpu_nice =
+ kstat_cpu(j).cpustat.nice;
+ }
+ }
+ this_dbs_info->down_skip = 0;
+ this_dbs_info->requested_freq = policy->cur;
+
+ mutex_init(&this_dbs_info->timer_mutex);
+ dbs_enable++;
+ /*
+ * Start the timerschedule work, when this governor
+ * is used for first time
+ */
+ if (dbs_enable == 1) {
+ unsigned int latency;
+ /* policy latency is in nS. Convert it to uS first */
+ latency = policy->cpuinfo.transition_latency / 1000;
+ if (latency == 0)
+ latency = 1;
+
+ rc = sysfs_create_group(cpufreq_global_kobject,
+ &dbs_attr_group);
+ if (rc) {
+ mutex_unlock(&dbs_mutex);
+ return rc;
+ }
+
+ /*
+ * conservative does not implement micro like ondemand
+ * governor, thus we are bound to jiffes/HZ
+ */
+ min_sampling_rate =
+ MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
+ /* Bring kernel and HW constraints together */
+ min_sampling_rate = max(min_sampling_rate,
+ MIN_LATENCY_MULTIPLIER * latency);
+ dbs_tuners_ins.sampling_rate =
+ max(min_sampling_rate,
+ latency * LATENCY_MULTIPLIER);
+
+ cpufreq_register_notifier(
+ &dbs_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ }
+ mutex_unlock(&dbs_mutex);
+
+ dbs_timer_init(this_dbs_info);
+
+ break;
+
+ case CPUFREQ_GOV_STOP:
+ dbs_timer_exit(this_dbs_info);
+
+ mutex_lock(&dbs_mutex);
+ dbs_enable--;
+ mutex_destroy(&this_dbs_info->timer_mutex);
+
+ /*
+ * Stop the timerschedule work, when this governor
+ * is used for first time
+ */
+ if (dbs_enable == 0)
+ cpufreq_unregister_notifier(
+ &dbs_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+
+ mutex_unlock(&dbs_mutex);
+ if (!dbs_enable)
+ sysfs_remove_group(cpufreq_global_kobject,
+ &dbs_attr_group);
+
+ break;
+
+ case CPUFREQ_GOV_LIMITS:
+ mutex_lock(&this_dbs_info->timer_mutex);
+ if (policy->max < this_dbs_info->cur_policy->cur)
+ __cpufreq_driver_target(
+ this_dbs_info->cur_policy,
+ policy->max, CPUFREQ_RELATION_H);
+ else if (policy->min > this_dbs_info->cur_policy->cur)
+ __cpufreq_driver_target(
+ this_dbs_info->cur_policy,
+ policy->min, CPUFREQ_RELATION_L);
+ mutex_unlock(&this_dbs_info->timer_mutex);
+
+ break;
+ }
+ return 0;
+}
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
+static
+#endif
+struct cpufreq_governor cpufreq_gov_conservative = {
+ .name = "conservative",
+ .governor = cpufreq_governor_dbs,
+ .max_transition_latency = TRANSITION_LATENCY_LIMIT,
+ .owner = THIS_MODULE,
+};
+
+static int __init cpufreq_gov_dbs_init(void)
+{
+ return cpufreq_register_governor(&cpufreq_gov_conservative);
+}
+
+static void __exit cpufreq_gov_dbs_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_conservative);
+}
+
+
+MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>");
+MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for "
+ "Low Latency Frequency Transition capable processors "
+ "optimised for use in a battery environment");
+MODULE_LICENSE("GPL");
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
+fs_initcall(cpufreq_gov_dbs_init);
+#else
+module_init(cpufreq_gov_dbs_init);
+#endif
+module_exit(cpufreq_gov_dbs_exit);
diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c
new file mode 100644
index 00000000..2db752c1
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_interactive.c
@@ -0,0 +1,832 @@
+/*
+ * drivers/cpufreq/cpufreq_interactive.c
+ *
+ * Copyright (C) 2010 Google, Inc.
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: Mike Chan (mike@android.com)
+ *
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpufreq.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/tick.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/cputime.h>
+
+static atomic_t active_count = ATOMIC_INIT(0);
+
+struct cpufreq_interactive_cpuinfo {
+ struct timer_list cpu_timer;
+ int timer_idlecancel;
+ u64 time_in_idle;
+ u64 idle_exit_time;
+ u64 timer_run_time;
+ int idling;
+ u64 freq_change_time;
+ u64 freq_change_time_in_idle;
+ struct cpufreq_policy *policy;
+ struct cpufreq_frequency_table *freq_table;
+ unsigned int target_freq;
+ int governor_enabled;
+};
+
+static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
+
+/* Workqueues handle frequency scaling */
+static struct task_struct *up_task;
+static struct workqueue_struct *down_wq;
+static struct work_struct freq_scale_down_work;
+static cpumask_t up_cpumask;
+static spinlock_t up_cpumask_lock;
+static cpumask_t down_cpumask;
+static spinlock_t down_cpumask_lock;
+static struct mutex set_speed_lock;
+
+/* Hi speed to bump to from lo speed when load burst (default max) */
+static u64 hispeed_freq;
+
+/* Go to hi speed when CPU load at or above this value. */
+#define DEFAULT_GO_HISPEED_LOAD 95
+static unsigned long go_hispeed_load;
+
+/*
+ * The minimum amount of time to spend at a frequency before we can ramp down.
+ */
+#define DEFAULT_MIN_SAMPLE_TIME (20 * USEC_PER_MSEC)
+static unsigned long min_sample_time;
+
+/*
+ * The sample rate of the timer used to increase frequency
+ */
+#define DEFAULT_TIMER_RATE (50 * USEC_PER_MSEC)
+#define CPUFREQ_IRQ_LEN 60
+#define CPUFREQ_NOTE_LEN 120
+static unsigned long timer_rate;
+
+static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
+ unsigned int event);
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+static
+#endif
+struct cpufreq_governor cpufreq_gov_interactive = {
+ .name = "interactive",
+ .governor = cpufreq_governor_interactive,
+ .max_transition_latency = 10000000,
+ .owner = THIS_MODULE,
+};
+
+static struct irq_tuner irq_tuner_ins[MAX_CPUFREQ_IRQ_NUMBER];
+static struct irq_desc *cpufreq_irq_desc[MAX_CPUFREQ_IRQ_NUMBER];
+
+static bool cpufreq_interactive_check_irq(void)
+{
+ bool val;
+ unsigned int irq_count = 0, i;
+ static unsigned int irq_count_start[MAX_CPUFREQ_IRQ_NUMBER];
+ static unsigned int irq_count_end[MAX_CPUFREQ_IRQ_NUMBER];
+
+ val = false;
+ for (i = 0; i < MAX_CPUFREQ_IRQ_NUMBER; i++) {
+ if (irq_tuner_ins[i].irq_number == 0)
+ break;
+ if (!irq_tuner_ins[i].enable)
+ continue;
+ if (irq_count_start[i] == 0)
+ irq_count_start[i] = cpufreq_irq_desc[i] &&
+ cpufreq_irq_desc[i]->kstat_irqs ?
+ *per_cpu_ptr(cpufreq_irq_desc[i]->kstat_irqs, 0) : 0;
+ else if (irq_count_end[i] == 0)
+ irq_count_end[i] = cpufreq_irq_desc[i] &&
+ cpufreq_irq_desc[i]->kstat_irqs ?
+ *per_cpu_ptr(cpufreq_irq_desc[i]->kstat_irqs, 0) : 0;
+ else {
+ irq_count = irq_count_end[i] - irq_count_start[i];
+ irq_count_start[i] = irq_count_end[i];
+ irq_count_end[i] = 0;
+ }
+ if (irq_count > irq_tuner_ins[i].up_threshold) {
+ irq_count = 0;
+ val = true;
+ break;
+ }
+ }
+
+ return val;
+}
+static void cpufreq_interactive_timer(unsigned long data)
+{
+ unsigned int delta_idle;
+ unsigned int delta_time;
+ int cpu_load;
+ int load_since_change;
+ u64 time_in_idle;
+ u64 idle_exit_time;
+ struct cpufreq_interactive_cpuinfo *pcpu =
+ &per_cpu(cpuinfo, data);
+ u64 now_idle;
+ unsigned int new_freq;
+ unsigned int index;
+ unsigned long flags;
+ bool irq_load;
+
+ smp_rmb();
+
+ if (!pcpu->governor_enabled)
+ goto exit;
+
+ /*
+ * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
+ * this lets idle exit know the current idle time sample has
+ * been processed, and idle exit can generate a new sample and
+ * re-arm the timer. This prevents a concurrent idle
+ * exit on that CPU from writing a new set of info at the same time
+ * the timer function runs (the timer function can't use that info
+ * until more time passes).
+ */
+ time_in_idle = pcpu->time_in_idle;
+ idle_exit_time = pcpu->idle_exit_time;
+ now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
+ smp_wmb();
+
+ /* If we raced with cancelling a timer, skip. */
+ if (!idle_exit_time)
+ goto exit;
+
+ delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle);
+ delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
+ idle_exit_time);
+
+ /*
+ * If timer ran less than 1ms after short-term sample started, retry.
+ */
+ if (delta_time < 1000)
+ goto rearm;
+
+ if (delta_idle > delta_time)
+ cpu_load = 0;
+ else
+ cpu_load = 100 * (delta_time - delta_idle) / delta_time;
+
+ delta_idle = (unsigned int) cputime64_sub(now_idle,
+ pcpu->freq_change_time_in_idle);
+ delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
+ pcpu->freq_change_time);
+
+ if ((delta_time == 0) || (delta_idle > delta_time))
+ load_since_change = 0;
+ else
+ load_since_change =
+ 100 * (delta_time - delta_idle) / delta_time;
+
+ /*
+ * Choose greater of short-term load (since last idle timer
+ * started or timer function re-armed itself) or long-term load
+ * (since last frequency change).
+ */
+ if (load_since_change > cpu_load)
+ cpu_load = load_since_change;
+
+ irq_load = cpufreq_interactive_check_irq();
+ if (cpu_load >= go_hispeed_load || irq_load) {
+ if (pcpu->policy->cur == pcpu->policy->min)
+ new_freq = hispeed_freq;
+ else
+ new_freq = pcpu->policy->max * cpu_load / 100;
+ if (irq_load)
+ new_freq = hispeed_freq;
+ } else {
+ new_freq = pcpu->policy->cur * cpu_load / 100;
+ }
+
+ if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
+ new_freq, CPUFREQ_RELATION_H,
+ &index)) {
+ pr_warn_once("timer %d: cpufreq_frequency_table_target error\n",
+ (int) data);
+ goto rearm;
+ }
+
+ new_freq = pcpu->freq_table[index].frequency;
+ if (pcpu->target_freq == new_freq)
+ goto rearm_if_notmax;
+
+ /*
+ * Do not scale down unless we have been at this frequency for the
+ * minimum sample time.
+ */
+ if (new_freq < pcpu->target_freq) {
+ if (cputime64_sub(pcpu->timer_run_time, pcpu->freq_change_time)
+ < min_sample_time)
+ goto rearm;
+ }
+
+ if (new_freq < pcpu->target_freq) {
+ pcpu->target_freq = new_freq;
+ spin_lock_irqsave(&down_cpumask_lock, flags);
+ cpumask_set_cpu(data, &down_cpumask);
+ spin_unlock_irqrestore(&down_cpumask_lock, flags);
+ queue_work(down_wq, &freq_scale_down_work);
+ } else {
+ pcpu->target_freq = new_freq;
+ spin_lock_irqsave(&up_cpumask_lock, flags);
+ cpumask_set_cpu(data, &up_cpumask);
+ spin_unlock_irqrestore(&up_cpumask_lock, flags);
+ wake_up_process(up_task);
+ }
+
+rearm_if_notmax:
+ /*
+ * Already set max speed and don't see a need to change that,
+ * wait until next idle to re-evaluate, don't need timer.
+ */
+ if (pcpu->target_freq == pcpu->policy->max)
+ goto exit;
+
+rearm:
+ if (!timer_pending(&pcpu->cpu_timer)) {
+ /*
+ * If already at min: if that CPU is idle, don't set timer.
+ * Else cancel the timer if that CPU goes idle. We don't
+ * need to re-evaluate speed until the next idle exit.
+ */
+ if (pcpu->target_freq == pcpu->policy->min) {
+ smp_rmb();
+
+ if (pcpu->idling)
+ goto exit;
+
+ pcpu->timer_idlecancel = 1;
+ }
+
+ pcpu->time_in_idle = get_cpu_idle_time_us(
+ data, &pcpu->idle_exit_time);
+ mod_timer(&pcpu->cpu_timer,
+ jiffies + usecs_to_jiffies(timer_rate));
+ }
+
+exit:
+ return;
+}
+
+static void cpufreq_interactive_idle_start(void)
+{
+ struct cpufreq_interactive_cpuinfo *pcpu =
+ &per_cpu(cpuinfo, smp_processor_id());
+ int pending;
+
+ pcpu->idling = 1;
+ smp_wmb();
+ if (!pcpu->governor_enabled)
+ return;
+ pending = timer_pending(&pcpu->cpu_timer);
+
+ if (pcpu->target_freq != pcpu->policy->min) {
+#ifdef CONFIG_SMP
+ /*
+ * Entering idle while not at lowest speed. On some
+ * platforms this can hold the other CPU(s) at that speed
+ * even though the CPU is idle. Set a timer to re-evaluate
+ * speed so this idle CPU doesn't hold the other CPUs above
+ * min indefinitely. This should probably be a quirk of
+ * the CPUFreq driver.
+ */
+ if (!pending) {
+ pcpu->time_in_idle = get_cpu_idle_time_us(
+ smp_processor_id(), &pcpu->idle_exit_time);
+ pcpu->timer_idlecancel = 0;
+ mod_timer(&pcpu->cpu_timer,
+ jiffies + usecs_to_jiffies(timer_rate));
+ }
+#endif
+ } else {
+ /*
+ * If at min speed and entering idle after load has
+ * already been evaluated, and a timer has been set just in
+ * case the CPU suddenly goes busy, cancel that timer. The
+ * CPU didn't go busy; we'll recheck things upon idle exit.
+ */
+ if (pending && pcpu->timer_idlecancel) {
+ del_timer(&pcpu->cpu_timer);
+ /*
+ * Ensure last timer run time is after current idle
+ * sample start time, so next idle exit will always
+ * start a new idle sampling period.
+ */
+ pcpu->idle_exit_time = 0;
+ pcpu->timer_idlecancel = 0;
+ }
+ }
+
+}
+
+static void cpufreq_interactive_idle_end(void)
+{
+ struct cpufreq_interactive_cpuinfo *pcpu =
+ &per_cpu(cpuinfo, smp_processor_id());
+
+ pcpu->idling = 0;
+ smp_wmb();
+
+ /*
+ * Arm the timer for 1-2 ticks later if not already, and if the timer
+ * function has already processed the previous load sampling
+ * interval. (If the timer is not pending but has not processed
+ * the previous interval, it is probably racing with us on another
+ * CPU. Let it compute load based on the previous sample and then
+ * re-arm the timer for another interval when it's done, rather
+ * than updating the interval start time to be "now", which doesn't
+ * give the timer function enough time to make a decision on this
+ * run.)
+ */
+ if (timer_pending(&pcpu->cpu_timer) == 0 &&
+ pcpu->timer_run_time >= pcpu->idle_exit_time &&
+ pcpu->governor_enabled) {
+ pcpu->time_in_idle =
+ get_cpu_idle_time_us(smp_processor_id(),
+ &pcpu->idle_exit_time);
+ pcpu->timer_idlecancel = 0;
+ mod_timer(&pcpu->cpu_timer,
+ jiffies + usecs_to_jiffies(timer_rate));
+ }
+
+}
+
+static int cpufreq_interactive_up_task(void *data)
+{
+ unsigned int cpu;
+ unsigned long flags;
+ struct cpufreq_interactive_cpuinfo *pcpu;
+
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_lock_irqsave(&up_cpumask_lock, flags);
+
+ if (cpumask_empty(&up_cpumask)) {
+ spin_unlock_irqrestore(&up_cpumask_lock, flags);
+ schedule();
+
+ if (kthread_should_stop())
+ break;
+
+ spin_lock_irqsave(&up_cpumask_lock, flags);
+ }
+
+ set_current_state(TASK_RUNNING);
+ cpumask_clear(&up_cpumask);
+ spin_unlock_irqrestore(&up_cpumask_lock, flags);
+
+ for_each_online_cpu(cpu) {
+ unsigned int j;
+ unsigned int max_freq = 0;
+
+ pcpu = &per_cpu(cpuinfo, cpu);
+ smp_rmb();
+
+ if (!pcpu->governor_enabled)
+ continue;
+
+ mutex_lock(&set_speed_lock);
+
+ for_each_online_cpu(j) {
+ struct cpufreq_interactive_cpuinfo *pjcpu =
+ &per_cpu(cpuinfo, j);
+ if (pjcpu->target_freq > max_freq)
+ max_freq = pjcpu->target_freq;
+ }
+ if (max_freq != pcpu->policy->cur)
+ __cpufreq_driver_target(pcpu->policy,
+ max_freq,
+ CPUFREQ_RELATION_H);
+ mutex_unlock(&set_speed_lock);
+
+ pcpu->freq_change_time_in_idle =
+ get_cpu_idle_time_us(cpu,
+ &pcpu->freq_change_time);
+ }
+ }
+
+ return 0;
+}
+
+static void cpufreq_interactive_freq_down(struct work_struct *work)
+{
+ unsigned int cpu;
+ unsigned long flags;
+ struct cpufreq_interactive_cpuinfo *pcpu;
+
+ spin_lock_irqsave(&down_cpumask_lock, flags);
+ cpumask_clear(&down_cpumask);
+ spin_unlock_irqrestore(&down_cpumask_lock, flags);
+
+ for_each_online_cpu(cpu) {
+ unsigned int j;
+ unsigned int max_freq = 0;
+
+ pcpu = &per_cpu(cpuinfo, cpu);
+ smp_rmb();
+
+ if (!pcpu->governor_enabled)
+ continue;
+
+ mutex_lock(&set_speed_lock);
+
+ for_each_online_cpu(j) {
+ struct cpufreq_interactive_cpuinfo *pjcpu =
+ &per_cpu(cpuinfo, j);
+
+ if (pjcpu->target_freq > max_freq)
+ max_freq = pjcpu->target_freq;
+ }
+
+ if (max_freq != pcpu->policy->cur)
+ __cpufreq_driver_target(pcpu->policy, max_freq,
+ CPUFREQ_RELATION_H);
+
+ mutex_unlock(&set_speed_lock);
+ pcpu->freq_change_time_in_idle =
+ get_cpu_idle_time_us(cpu,
+ &pcpu->freq_change_time);
+ }
+}
+
+static ssize_t show_hispeed_freq(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%llu\n", hispeed_freq);
+}
+
+static ssize_t store_hispeed_freq(struct kobject *kobj,
+ struct attribute *attr, const char *buf,
+ size_t count)
+{
+ int ret;
+ u64 val;
+
+ ret = strict_strtoull(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+ hispeed_freq = val;
+ return count;
+}
+
+static struct global_attr hispeed_freq_attr = __ATTR(hispeed_freq, 0644,
+ show_hispeed_freq, store_hispeed_freq);
+
+
+static ssize_t show_go_hispeed_load(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lu\n", go_hispeed_load);
+}
+
+static ssize_t store_go_hispeed_load(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val;
+
+ ret = strict_strtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+ go_hispeed_load = val;
+ return count;
+}
+
+static struct global_attr go_hispeed_load_attr = __ATTR(go_hispeed_load, 0644,
+ show_go_hispeed_load, store_go_hispeed_load);
+
+static ssize_t show_min_sample_time(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lu\n", min_sample_time);
+}
+
+static ssize_t store_min_sample_time(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val;
+
+ ret = strict_strtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+ min_sample_time = val;
+ return count;
+}
+
+static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644,
+ show_min_sample_time, store_min_sample_time);
+
+static ssize_t show_timer_rate(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lu\n", timer_rate);
+}
+
+static ssize_t store_timer_rate(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val;
+
+ ret = strict_strtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+ timer_rate = val;
+ return count;
+}
+
+static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644,
+ show_timer_rate, store_timer_rate);
+
+
+static ssize_t show_irq_param(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ int i, j = 0;
+ j += scnprintf(&buf[j], CPUFREQ_NOTE_LEN, "Change irq setting by echo a data, format: 0xAABBBC, AA:irq number, BBB:up_threshold, C:enable\n");
+ for (i = 0; i < MAX_CPUFREQ_IRQ_NUMBER; i++) {
+ if (irq_tuner_ins[i].irq_number != 0)
+ j += scnprintf(&buf[j], CPUFREQ_IRQ_LEN, "irq number: 0x%x, up_threshold 0x%x, %s\n", irq_tuner_ins[i].irq_number, irq_tuner_ins[i].up_threshold, irq_tuner_ins[i].enable ? "enabled" : "disabled");
+ }
+
+ return j;
+}
+
+static ssize_t store_irq_param(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t count)
+{
+ int ret, i;
+ unsigned long val;
+
+ ret = strict_strtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < MAX_CPUFREQ_IRQ_NUMBER; i++) {
+ if (irq_tuner_ins[i].irq_number == val >> 16)
+ break;
+ }
+
+ if (i >= MAX_CPUFREQ_IRQ_NUMBER) {
+ printk(KERN_WARNING "Invalid irq number!\n");
+ return -EINVAL;
+ }
+ irq_tuner_ins[i].irq_number = val >> 16;
+ irq_tuner_ins[i].up_threshold = (val & 0xFFF0) >> 4;
+ irq_tuner_ins[i].enable = (val & 0xF) ? true : false;
+
+ return count;
+}
+
+
+static struct global_attr irq_param_attr = __ATTR(irq_scaling, 0644,
+ show_irq_param, store_irq_param);
+
+static struct attribute *interactive_attributes[] = {
+ &hispeed_freq_attr.attr,
+ &go_hispeed_load_attr.attr,
+ &min_sample_time_attr.attr,
+ &timer_rate_attr.attr,
+ &irq_param_attr.attr,
+ NULL,
+};
+
+static struct attribute_group interactive_attr_group = {
+ .attrs = interactive_attributes,
+ .name = "interactive",
+};
+
+static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ int rc;
+ unsigned int j;
+ struct cpufreq_interactive_cpuinfo *pcpu;
+ struct cpufreq_frequency_table *freq_table;
+
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ if (!cpu_online(policy->cpu))
+ return -EINVAL;
+
+ freq_table =
+ cpufreq_frequency_get_table(policy->cpu);
+
+ for_each_cpu(j, policy->cpus) {
+ pcpu = &per_cpu(cpuinfo, j);
+ pcpu->policy = policy;
+ if (pcpu->idling)
+ pcpu->target_freq = policy->min;
+ else
+ pcpu->target_freq = policy->cur;
+
+ pcpu->freq_table = freq_table;
+ pcpu->freq_change_time_in_idle =
+ get_cpu_idle_time_us(j,
+ &pcpu->freq_change_time);
+ pcpu->governor_enabled = 1;
+ smp_wmb();
+ }
+
+ if (!hispeed_freq)
+ hispeed_freq = policy->max;
+
+ /*
+ * Do not register the idle hook and create sysfs
+ * entries if we have already done so.
+ */
+ if (atomic_inc_return(&active_count) > 1)
+ return 0;
+
+ rc = sysfs_create_group(cpufreq_global_kobject,
+ &interactive_attr_group);
+ if (rc)
+ return rc;
+
+ break;
+
+ case CPUFREQ_GOV_STOP:
+ for_each_cpu(j, policy->cpus) {
+ pcpu = &per_cpu(cpuinfo, j);
+ pcpu->governor_enabled = 0;
+ smp_wmb();
+ del_timer_sync(&pcpu->cpu_timer);
+
+ /*
+ * Reset idle exit time since we may cancel the timer
+ * before it can run after the last idle exit time,
+ * to avoid tripping the check in idle exit for a timer
+ * that is trying to run.
+ */
+ pcpu->idle_exit_time = 0;
+ }
+
+ flush_work(&freq_scale_down_work);
+ if (atomic_dec_return(&active_count) > 0)
+ return 0;
+
+ sysfs_remove_group(cpufreq_global_kobject,
+ &interactive_attr_group);
+
+ break;
+
+ case CPUFREQ_GOV_LIMITS:
+ if (policy->max < policy->cur)
+ __cpufreq_driver_target(policy,
+ policy->max, CPUFREQ_RELATION_H);
+ else if (policy->min > policy->cur)
+ __cpufreq_driver_target(policy,
+ policy->min, CPUFREQ_RELATION_L);
+ break;
+ }
+ return 0;
+}
+
+static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
+ unsigned long val,
+ void *data)
+{
+ switch (val) {
+ case IDLE_START:
+ cpufreq_interactive_idle_start();
+ break;
+ case IDLE_END:
+ cpufreq_interactive_idle_end();
+ break;
+ }
+
+ return 0;
+}
+
+static struct notifier_block cpufreq_interactive_idle_nb = {
+ .notifier_call = cpufreq_interactive_idle_notifier,
+};
+
+static int __init cpufreq_interactive_init(void)
+{
+ unsigned int i;
+ struct cpufreq_interactive_cpuinfo *pcpu;
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
+ go_hispeed_load = DEFAULT_GO_HISPEED_LOAD;
+ min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
+ timer_rate = DEFAULT_TIMER_RATE;
+
+ /* Initalize per-cpu timers */
+ for_each_possible_cpu(i) {
+ pcpu = &per_cpu(cpuinfo, i);
+ init_timer(&pcpu->cpu_timer);
+ pcpu->cpu_timer.function = cpufreq_interactive_timer;
+ pcpu->cpu_timer.data = i;
+ }
+
+ up_task = kthread_create(cpufreq_interactive_up_task, NULL,
+ "kinteractiveup");
+ if (IS_ERR(up_task))
+ return PTR_ERR(up_task);
+
+ sched_setscheduler_nocheck(up_task, SCHED_FIFO, &param);
+ get_task_struct(up_task);
+
+ /* No rescuer thread, bind to CPU queuing the work for possibly
+ warm cache (probably doesn't matter much). */
+ down_wq = alloc_workqueue("kinteractive_down", 0, 1);
+
+ if (!down_wq)
+ goto err_freeuptask;
+
+ INIT_WORK(&freq_scale_down_work,
+ cpufreq_interactive_freq_down);
+
+ spin_lock_init(&up_cpumask_lock);
+ spin_lock_init(&down_cpumask_lock);
+ mutex_init(&set_speed_lock);
+
+ idle_notifier_register(&cpufreq_interactive_idle_nb);
+
+ return cpufreq_register_governor(&cpufreq_gov_interactive);
+
+err_freeuptask:
+ put_task_struct(up_task);
+ return -ENOMEM;
+}
+
+int cpufreq_gov_irq_tuner_register(struct irq_tuner dbs_irq_tuner)
+{
+ int i, ret = 0;
+ static bool init_flag;
+
+ /* Init the global irq_tuner_ins structure */
+ if (!init_flag) {
+ for (i = 0; i < MAX_CPUFREQ_IRQ_NUMBER; i++) {
+ irq_tuner_ins[i].irq_number = 0;
+ irq_tuner_ins[i].up_threshold = 0;
+ irq_tuner_ins[i].enable = 0;
+ }
+ init_flag = true;
+ }
+
+ if (dbs_irq_tuner.irq_number == 0)
+ return -EINVAL;
+ /* Find an unused struct */
+ for (i = 0; i < MAX_CPUFREQ_IRQ_NUMBER; i++) {
+ if (irq_tuner_ins[i].irq_number != 0)
+ continue;
+ else
+ break;
+ }
+ /* Check index */
+ if (i >= MAX_CPUFREQ_IRQ_NUMBER) {
+ printk(KERN_WARNING "Too many irq number requested!\n");
+ return -EINVAL;
+ }
+
+ irq_tuner_ins[i].irq_number = dbs_irq_tuner.irq_number;
+ irq_tuner_ins[i].up_threshold = dbs_irq_tuner.up_threshold;
+ irq_tuner_ins[i].enable = dbs_irq_tuner.enable;
+ cpufreq_irq_desc[i] = irq_to_desc(irq_tuner_ins[i].irq_number);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cpufreq_gov_irq_tuner_register);
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+late_initcall(cpufreq_interactive_init);
+#else
+module_init(cpufreq_interactive_init);
+#endif
+
+static void __exit cpufreq_interactive_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_interactive);
+ kthread_stop(up_task);
+ put_task_struct(up_task);
+ destroy_workqueue(down_wq);
+}
+
+module_exit(cpufreq_interactive_exit);
+
+MODULE_AUTHOR("Mike Chan <mike@android.com>");
+MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
+ "Latency sensitive workloads");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
new file mode 100644
index 00000000..891360ed
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -0,0 +1,759 @@
+/*
+ * drivers/cpufreq/cpufreq_ondemand.c
+ *
+ * Copyright (C) 2001 Russell King
+ * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
+ * Jun Nakajima <jun.nakajima@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/cpu.h>
+#include <linux/jiffies.h>
+#include <linux/kernel_stat.h>
+#include <linux/mutex.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
+
+/*
+ * dbs is used in this file as a shortform for demandbased switching
+ * It helps to keep variable names smaller, simpler
+ */
+
+#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10)
+#define DEF_FREQUENCY_UP_THRESHOLD (80)
+#define DEF_SAMPLING_DOWN_FACTOR (1)
+#define MAX_SAMPLING_DOWN_FACTOR (100000)
+#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3)
+#define MICRO_FREQUENCY_UP_THRESHOLD (95)
+#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
+#define MIN_FREQUENCY_UP_THRESHOLD (11)
+#define MAX_FREQUENCY_UP_THRESHOLD (100)
+
+/*
+ * The polling frequency of this governor depends on the capability of
+ * the processor. Default polling frequency is 1000 times the transition
+ * latency of the processor. The governor will work on any processor with
+ * transition latency <= 10mS, using appropriate sampling
+ * rate.
+ * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
+ * this governor will not work.
+ * All times here are in uS.
+ */
+#define MIN_SAMPLING_RATE_RATIO (2)
+
+static unsigned int min_sampling_rate;
+
+#define LATENCY_MULTIPLIER (1000)
+#define MIN_LATENCY_MULTIPLIER (100)
+#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000)
+
+static void do_dbs_timer(struct work_struct *work);
+static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
+ unsigned int event);
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
+static
+#endif
+struct cpufreq_governor cpufreq_gov_ondemand = {
+ .name = "ondemand",
+ .governor = cpufreq_governor_dbs,
+ .max_transition_latency = TRANSITION_LATENCY_LIMIT,
+ .owner = THIS_MODULE,
+};
+
+/* Sampling types */
+enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
+
+struct cpu_dbs_info_s {
+ cputime64_t prev_cpu_idle;
+ cputime64_t prev_cpu_iowait;
+ cputime64_t prev_cpu_wall;
+ cputime64_t prev_cpu_nice;
+ struct cpufreq_policy *cur_policy;
+ struct delayed_work work;
+ struct cpufreq_frequency_table *freq_table;
+ unsigned int freq_lo;
+ unsigned int freq_lo_jiffies;
+ unsigned int freq_hi_jiffies;
+ unsigned int rate_mult;
+ int cpu;
+ unsigned int sample_type:1;
+ /*
+ * percpu mutex that serializes governor limit change with
+ * do_dbs_timer invocation. We do not want do_dbs_timer to run
+ * when user is changing the governor or limits.
+ */
+ struct mutex timer_mutex;
+};
+static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
+
+static unsigned int dbs_enable; /* number of CPUs using this policy */
+
+/*
+ * dbs_mutex protects dbs_enable in governor start/stop.
+ */
+static DEFINE_MUTEX(dbs_mutex);
+
+static struct dbs_tuners {
+ unsigned int sampling_rate;
+ unsigned int up_threshold;
+ unsigned int down_differential;
+ unsigned int ignore_nice;
+ unsigned int sampling_down_factor;
+ unsigned int powersave_bias;
+ unsigned int io_is_busy;
+} dbs_tuners_ins = {
+ .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
+ .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
+ .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
+ .ignore_nice = 0,
+ .powersave_bias = 0,
+};
+
+static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
+ cputime64_t *wall)
+{
+ cputime64_t idle_time;
+ cputime64_t cur_wall_time;
+ cputime64_t busy_time;
+
+ cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+ busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
+ kstat_cpu(cpu).cpustat.system);
+
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
+
+ idle_time = cputime64_sub(cur_wall_time, busy_time);
+ if (wall)
+ *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
+
+ return (cputime64_t)jiffies_to_usecs(idle_time);
+}
+
+static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
+{
+ u64 idle_time = get_cpu_idle_time_us(cpu, wall);
+
+ if (idle_time == -1ULL)
+ return get_cpu_idle_time_jiffy(cpu, wall);
+
+ return idle_time;
+}
+
+static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall)
+{
+ u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
+
+ if (iowait_time == -1ULL)
+ return 0;
+
+ return iowait_time;
+}
+
+/*
+ * Find right freq to be set now with powersave_bias on.
+ * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
+ * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
+ */
+static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
+ unsigned int freq_next,
+ unsigned int relation)
+{
+ unsigned int freq_req, freq_reduc, freq_avg;
+ unsigned int freq_hi, freq_lo;
+ unsigned int index = 0;
+ unsigned int jiffies_total, jiffies_hi, jiffies_lo;
+ struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
+ policy->cpu);
+
+ if (!dbs_info->freq_table) {
+ dbs_info->freq_lo = 0;
+ dbs_info->freq_lo_jiffies = 0;
+ return freq_next;
+ }
+
+ cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
+ relation, &index);
+ freq_req = dbs_info->freq_table[index].frequency;
+ freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
+ freq_avg = freq_req - freq_reduc;
+
+ /* Find freq bounds for freq_avg in freq_table */
+ index = 0;
+ cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
+ CPUFREQ_RELATION_H, &index);
+ freq_lo = dbs_info->freq_table[index].frequency;
+ index = 0;
+ cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
+ CPUFREQ_RELATION_L, &index);
+ freq_hi = dbs_info->freq_table[index].frequency;
+
+ /* Find out how long we have to be in hi and lo freqs */
+ if (freq_hi == freq_lo) {
+ dbs_info->freq_lo = 0;
+ dbs_info->freq_lo_jiffies = 0;
+ return freq_lo;
+ }
+ jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+ jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
+ jiffies_hi += ((freq_hi - freq_lo) / 2);
+ jiffies_hi /= (freq_hi - freq_lo);
+ jiffies_lo = jiffies_total - jiffies_hi;
+ dbs_info->freq_lo = freq_lo;
+ dbs_info->freq_lo_jiffies = jiffies_lo;
+ dbs_info->freq_hi_jiffies = jiffies_hi;
+ return freq_hi;
+}
+
+static void ondemand_powersave_bias_init_cpu(int cpu)
+{
+ struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
+ dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
+ dbs_info->freq_lo = 0;
+}
+
+static void ondemand_powersave_bias_init(void)
+{
+ int i;
+ for_each_online_cpu(i) {
+ ondemand_powersave_bias_init_cpu(i);
+ }
+}
+
+/************************** sysfs interface ************************/
+
+static ssize_t show_sampling_rate_min(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", min_sampling_rate);
+}
+
+define_one_global_ro(sampling_rate_min);
+
+/* cpufreq_ondemand Governor Tunables */
+#define show_one(file_name, object) \
+static ssize_t show_##file_name \
+(struct kobject *kobj, struct attribute *attr, char *buf) \
+{ \
+ return sprintf(buf, "%u\n", dbs_tuners_ins.object); \
+}
+show_one(sampling_rate, sampling_rate);
+show_one(io_is_busy, io_is_busy);
+show_one(up_threshold, up_threshold);
+show_one(sampling_down_factor, sampling_down_factor);
+show_one(ignore_nice_load, ignore_nice);
+show_one(powersave_bias, powersave_bias);
+
+static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
+ return count;
+}
+
+static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.io_is_busy = !!input;
+ return count;
+}
+
+static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+
+ if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
+ input < MIN_FREQUENCY_UP_THRESHOLD) {
+ return -EINVAL;
+ }
+ dbs_tuners_ins.up_threshold = input;
+ return count;
+}
+
+static ssize_t store_sampling_down_factor(struct kobject *a,
+ struct attribute *b, const char *buf, size_t count)
+{
+ unsigned int input, j;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+
+ if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
+ return -EINVAL;
+ dbs_tuners_ins.sampling_down_factor = input;
+
+ /* Reset down sampling multiplier in case it was active */
+ for_each_online_cpu(j) {
+ struct cpu_dbs_info_s *dbs_info;
+ dbs_info = &per_cpu(od_cpu_dbs_info, j);
+ dbs_info->rate_mult = 1;
+ }
+ return count;
+}
+
+static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+
+ unsigned int j;
+
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+
+ if (input > 1)
+ input = 1;
+
+ if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
+ return count;
+ }
+ dbs_tuners_ins.ignore_nice = input;
+
+ /* we need to re-evaluate prev_cpu_idle */
+ for_each_online_cpu(j) {
+ struct cpu_dbs_info_s *dbs_info;
+ dbs_info = &per_cpu(od_cpu_dbs_info, j);
+ dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
+ &dbs_info->prev_cpu_wall);
+ if (dbs_tuners_ins.ignore_nice)
+ dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+
+ }
+ return count;
+}
+
+static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+
+ if (ret != 1)
+ return -EINVAL;
+
+ if (input > 1000)
+ input = 1000;
+
+ dbs_tuners_ins.powersave_bias = input;
+ ondemand_powersave_bias_init();
+ return count;
+}
+
+define_one_global_rw(sampling_rate);
+define_one_global_rw(io_is_busy);
+define_one_global_rw(up_threshold);
+define_one_global_rw(sampling_down_factor);
+define_one_global_rw(ignore_nice_load);
+define_one_global_rw(powersave_bias);
+
+static struct attribute *dbs_attributes[] = {
+ &sampling_rate_min.attr,
+ &sampling_rate.attr,
+ &up_threshold.attr,
+ &sampling_down_factor.attr,
+ &ignore_nice_load.attr,
+ &powersave_bias.attr,
+ &io_is_busy.attr,
+ NULL
+};
+
+static struct attribute_group dbs_attr_group = {
+ .attrs = dbs_attributes,
+ .name = "ondemand",
+};
+
+/************************** sysfs end ************************/
+
+static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
+{
+ if (dbs_tuners_ins.powersave_bias)
+ freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
+ else if (p->cur == p->max)
+ return;
+
+ __cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ?
+ CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
+}
+
+static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
+{
+ unsigned int max_load_freq;
+
+ struct cpufreq_policy *policy;
+ unsigned int j;
+
+ this_dbs_info->freq_lo = 0;
+ policy = this_dbs_info->cur_policy;
+
+ /*
+ * Every sampling_rate, we check, if current idle time is less
+ * than 20% (default), then we try to increase frequency
+ * Every sampling_rate, we look for a the lowest
+ * frequency which can sustain the load while keeping idle time over
+ * 30%. If such a frequency exist, we try to decrease to this frequency.
+ *
+ * Any frequency increase takes it to the maximum frequency.
+ * Frequency reduction happens at minimum steps of
+ * 5% (default) of current frequency
+ */
+
+ /* Get Absolute Load - in terms of freq */
+ max_load_freq = 0;
+
+ for_each_cpu(j, policy->cpus) {
+ struct cpu_dbs_info_s *j_dbs_info;
+ cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
+ unsigned int idle_time, wall_time, iowait_time;
+ unsigned int load, load_freq;
+ int freq_avg;
+
+ j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
+
+ cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
+ cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
+
+ wall_time = (unsigned int) cputime64_sub(cur_wall_time,
+ j_dbs_info->prev_cpu_wall);
+ j_dbs_info->prev_cpu_wall = cur_wall_time;
+
+ idle_time = (unsigned int) cputime64_sub(cur_idle_time,
+ j_dbs_info->prev_cpu_idle);
+ j_dbs_info->prev_cpu_idle = cur_idle_time;
+
+ iowait_time = (unsigned int) cputime64_sub(cur_iowait_time,
+ j_dbs_info->prev_cpu_iowait);
+ j_dbs_info->prev_cpu_iowait = cur_iowait_time;
+
+ if (dbs_tuners_ins.ignore_nice) {
+ cputime64_t cur_nice;
+ unsigned long cur_nice_jiffies;
+
+ cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
+ j_dbs_info->prev_cpu_nice);
+ /*
+ * Assumption: nice time between sampling periods will
+ * be less than 2^32 jiffies for 32 bit sys
+ */
+ cur_nice_jiffies = (unsigned long)
+ cputime64_to_jiffies64(cur_nice);
+
+ j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+ idle_time += jiffies_to_usecs(cur_nice_jiffies);
+ }
+
+ /*
+ * For the purpose of ondemand, waiting for disk IO is an
+ * indication that you're performance critical, and not that
+ * the system is actually idle. So subtract the iowait time
+ * from the cpu idle time.
+ */
+
+ if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time)
+ idle_time -= iowait_time;
+
+ if (unlikely(!wall_time || wall_time < idle_time))
+ continue;
+
+ load = 100 * (wall_time - idle_time) / wall_time;
+
+ freq_avg = __cpufreq_driver_getavg(policy, j);
+ if (freq_avg <= 0)
+ freq_avg = policy->cur;
+
+ load_freq = load * freq_avg;
+ if (load_freq > max_load_freq)
+ max_load_freq = load_freq;
+ }
+
+ /* Check for frequency increase */
+ if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
+ /* If switching to max speed, apply sampling_down_factor */
+ if (policy->cur < policy->max)
+ this_dbs_info->rate_mult =
+ dbs_tuners_ins.sampling_down_factor;
+ dbs_freq_increase(policy, policy->max);
+ return;
+ }
+
+ /* Check for frequency decrease */
+ /* if we cannot reduce the frequency anymore, break out early */
+ if (policy->cur == policy->min)
+ return;
+
+ /*
+ * The optimal frequency is the frequency that is the lowest that
+ * can support the current CPU usage without triggering the up
+ * policy. To be safe, we focus 10 points under the threshold.
+ */
+ if (max_load_freq <
+ (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
+ policy->cur) {
+ unsigned int freq_next;
+ freq_next = max_load_freq /
+ (dbs_tuners_ins.up_threshold -
+ dbs_tuners_ins.down_differential);
+
+ /* No longer fully busy, reset rate_mult */
+ this_dbs_info->rate_mult = 1;
+
+ if (freq_next < policy->min)
+ freq_next = policy->min;
+
+ if (!dbs_tuners_ins.powersave_bias) {
+ __cpufreq_driver_target(policy, freq_next,
+ CPUFREQ_RELATION_L);
+ } else {
+ int freq = powersave_bias_target(policy, freq_next,
+ CPUFREQ_RELATION_L);
+ __cpufreq_driver_target(policy, freq,
+ CPUFREQ_RELATION_L);
+ }
+ }
+}
+
+static void do_dbs_timer(struct work_struct *work)
+{
+ struct cpu_dbs_info_s *dbs_info =
+ container_of(work, struct cpu_dbs_info_s, work.work);
+ unsigned int cpu = dbs_info->cpu;
+ int sample_type = dbs_info->sample_type;
+
+ int delay;
+
+ mutex_lock(&dbs_info->timer_mutex);
+
+ /* Common NORMAL_SAMPLE setup */
+ dbs_info->sample_type = DBS_NORMAL_SAMPLE;
+ if (!dbs_tuners_ins.powersave_bias ||
+ sample_type == DBS_NORMAL_SAMPLE) {
+ dbs_check_cpu(dbs_info);
+ if (dbs_info->freq_lo) {
+ /* Setup timer for SUB_SAMPLE */
+ dbs_info->sample_type = DBS_SUB_SAMPLE;
+ delay = dbs_info->freq_hi_jiffies;
+ } else {
+ /* We want all CPUs to do sampling nearly on
+ * same jiffy
+ */
+ delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate
+ * dbs_info->rate_mult);
+
+ if (num_online_cpus() > 1)
+ delay -= jiffies % delay;
+ }
+ } else {
+ __cpufreq_driver_target(dbs_info->cur_policy,
+ dbs_info->freq_lo, CPUFREQ_RELATION_H);
+ delay = dbs_info->freq_lo_jiffies;
+ }
+ schedule_delayed_work_on(cpu, &dbs_info->work, delay);
+ mutex_unlock(&dbs_info->timer_mutex);
+}
+
+static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
+{
+ /* We want all CPUs to do sampling nearly on same jiffy */
+ int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+
+ if (num_online_cpus() > 1)
+ delay -= jiffies % delay;
+
+ dbs_info->sample_type = DBS_NORMAL_SAMPLE;
+ INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
+ schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
+}
+
+static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
+{
+ cancel_delayed_work_sync(&dbs_info->work);
+}
+
+/*
+ * Not all CPUs want IO time to be accounted as busy; this dependson how
+ * efficient idling at a higher frequency/voltage is.
+ * Pavel Machek says this is not so for various generations of AMD and old
+ * Intel systems.
+ * Mike Chan (androidlcom) calis this is also not true for ARM.
+ * Because of this, whitelist specific known (series) of CPUs by default, and
+ * leave all others up to the user.
+ */
+static int should_io_be_busy(void)
+{
+#if defined(CONFIG_X86)
+ /*
+ * For Intel, Core 2 (model 15) andl later have an efficient idle.
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_data.x86 == 6 &&
+ boot_cpu_data.x86_model >= 15)
+ return 1;
+#endif
+ return 0;
+}
+
+static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ unsigned int cpu = policy->cpu;
+ struct cpu_dbs_info_s *this_dbs_info;
+ unsigned int j;
+ int rc;
+
+ this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
+
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ if ((!cpu_online(cpu)) || (!policy->cur))
+ return -EINVAL;
+
+ mutex_lock(&dbs_mutex);
+
+ dbs_enable++;
+ for_each_cpu(j, policy->cpus) {
+ struct cpu_dbs_info_s *j_dbs_info;
+ j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
+ j_dbs_info->cur_policy = policy;
+
+ j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
+ &j_dbs_info->prev_cpu_wall);
+ if (dbs_tuners_ins.ignore_nice) {
+ j_dbs_info->prev_cpu_nice =
+ kstat_cpu(j).cpustat.nice;
+ }
+ }
+ this_dbs_info->cpu = cpu;
+ this_dbs_info->rate_mult = 1;
+ ondemand_powersave_bias_init_cpu(cpu);
+ /*
+ * Start the timerschedule work, when this governor
+ * is used for first time
+ */
+ if (dbs_enable == 1) {
+ unsigned int latency;
+
+ rc = sysfs_create_group(cpufreq_global_kobject,
+ &dbs_attr_group);
+ if (rc) {
+ mutex_unlock(&dbs_mutex);
+ return rc;
+ }
+
+ /* policy latency is in nS. Convert it to uS first */
+ latency = policy->cpuinfo.transition_latency / 1000;
+ if (latency == 0)
+ latency = 1;
+ /* Bring kernel and HW constraints together */
+ min_sampling_rate = max(min_sampling_rate,
+ MIN_LATENCY_MULTIPLIER * latency);
+ dbs_tuners_ins.sampling_rate =
+ max(min_sampling_rate,
+ latency * LATENCY_MULTIPLIER);
+ dbs_tuners_ins.io_is_busy = should_io_be_busy();
+ }
+ mutex_unlock(&dbs_mutex);
+
+ mutex_init(&this_dbs_info->timer_mutex);
+ dbs_timer_init(this_dbs_info);
+ break;
+
+ case CPUFREQ_GOV_STOP:
+ dbs_timer_exit(this_dbs_info);
+
+ mutex_lock(&dbs_mutex);
+ mutex_destroy(&this_dbs_info->timer_mutex);
+ dbs_enable--;
+ mutex_unlock(&dbs_mutex);
+ if (!dbs_enable)
+ sysfs_remove_group(cpufreq_global_kobject,
+ &dbs_attr_group);
+
+ break;
+
+ case CPUFREQ_GOV_LIMITS:
+ mutex_lock(&this_dbs_info->timer_mutex);
+ if (policy->max < this_dbs_info->cur_policy->cur)
+ __cpufreq_driver_target(this_dbs_info->cur_policy,
+ policy->max, CPUFREQ_RELATION_H);
+ else if (policy->min > this_dbs_info->cur_policy->cur)
+ __cpufreq_driver_target(this_dbs_info->cur_policy,
+ policy->min, CPUFREQ_RELATION_L);
+ mutex_unlock(&this_dbs_info->timer_mutex);
+ break;
+ }
+ return 0;
+}
+
+static int __init cpufreq_gov_dbs_init(void)
+{
+ cputime64_t wall;
+ u64 idle_time;
+ int cpu = get_cpu();
+
+ idle_time = get_cpu_idle_time_us(cpu, &wall);
+ put_cpu();
+ if (idle_time != -1ULL) {
+ /* Idle micro accounting is supported. Use finer thresholds */
+ dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
+ dbs_tuners_ins.down_differential =
+ MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
+ /*
+ * In no_hz/micro accounting case we set the minimum frequency
+ * not depending on HZ, but fixed (very low). The deferred
+ * timer might skip some samples if idle/sleeping as needed.
+ */
+ min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
+ } else {
+ /* For correct statistics, we need 10 ticks for each measure */
+ min_sampling_rate =
+ MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
+ }
+
+ return cpufreq_register_governor(&cpufreq_gov_ondemand);
+}
+
+static void __exit cpufreq_gov_dbs_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_ondemand);
+}
+
+
+MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
+MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
+MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
+ "Low Latency Frequency Transition capable processors");
+MODULE_LICENSE("GPL");
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
+fs_initcall(cpufreq_gov_dbs_init);
+#else
+module_init(cpufreq_gov_dbs_init);
+#endif
+module_exit(cpufreq_gov_dbs_exit);
diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c
new file mode 100644
index 00000000..f13a8a9a
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_performance.c
@@ -0,0 +1,63 @@
+/*
+ * linux/drivers/cpufreq/cpufreq_performance.c
+ *
+ * Copyright (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufreq.h>
+#include <linux/init.h>
+
+
+static int cpufreq_governor_performance(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ case CPUFREQ_GOV_LIMITS:
+ pr_debug("setting to %u kHz because of event %u\n",
+ policy->max, event);
+ __cpufreq_driver_target(policy, policy->max,
+ CPUFREQ_RELATION_H);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE
+static
+#endif
+struct cpufreq_governor cpufreq_gov_performance = {
+ .name = "performance",
+ .governor = cpufreq_governor_performance,
+ .owner = THIS_MODULE,
+};
+
+
+static int __init cpufreq_gov_performance_init(void)
+{
+ return cpufreq_register_governor(&cpufreq_gov_performance);
+}
+
+
+static void __exit cpufreq_gov_performance_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_performance);
+}
+
+
+MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("CPUfreq policy governor 'performance'");
+MODULE_LICENSE("GPL");
+
+fs_initcall(cpufreq_gov_performance_init);
+module_exit(cpufreq_gov_performance_exit);
diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c
new file mode 100644
index 00000000..4c2eb512
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_powersave.c
@@ -0,0 +1,65 @@
+/*
+ * linux/drivers/cpufreq/cpufreq_powersave.c
+ *
+ * Copyright (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufreq.h>
+#include <linux/init.h>
+
+static int cpufreq_governor_powersave(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ case CPUFREQ_GOV_LIMITS:
+ pr_debug("setting to %u kHz because of event %u\n",
+ policy->min, event);
+ __cpufreq_driver_target(policy, policy->min,
+ CPUFREQ_RELATION_L);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE
+static
+#endif
+struct cpufreq_governor cpufreq_gov_powersave = {
+ .name = "powersave",
+ .governor = cpufreq_governor_powersave,
+ .owner = THIS_MODULE,
+};
+
+static int __init cpufreq_gov_powersave_init(void)
+{
+ return cpufreq_register_governor(&cpufreq_gov_powersave);
+}
+
+
+static void __exit cpufreq_gov_powersave_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_powersave);
+}
+
+
+MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'");
+MODULE_LICENSE("GPL");
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE
+fs_initcall(cpufreq_gov_powersave_init);
+#else
+module_init(cpufreq_gov_powersave_init);
+#endif
+module_exit(cpufreq_gov_powersave_exit);
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
new file mode 100644
index 00000000..c315ec9d
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -0,0 +1,427 @@
+/*
+ * drivers/cpufreq/cpufreq_stats.c
+ *
+ * Copyright (C) 2003-2004 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
+ * (C) 2004 Zou Nan hai <nanhai.zou@intel.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/sysfs.h>
+#include <linux/cpufreq.h>
+#include <linux/jiffies.h>
+#include <linux/percpu.h>
+#include <linux/kobject.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <asm/cputime.h>
+
+static spinlock_t cpufreq_stats_lock;
+
+#define CPUFREQ_STATDEVICE_ATTR(_name, _mode, _show) \
+static struct freq_attr _attr_##_name = {\
+ .attr = {.name = __stringify(_name), .mode = _mode, }, \
+ .show = _show,\
+};
+
+struct cpufreq_stats {
+ unsigned int cpu;
+ unsigned int total_trans;
+ unsigned long long last_time;
+ unsigned int max_state;
+ unsigned int state_num;
+ unsigned int last_index;
+ cputime64_t *time_in_state;
+ unsigned int *freq_table;
+#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
+ unsigned int *trans_table;
+#endif
+};
+
+static DEFINE_PER_CPU(struct cpufreq_stats *, cpufreq_stats_table);
+
+struct cpufreq_stats_attribute {
+ struct attribute attr;
+ ssize_t(*show) (struct cpufreq_stats *, char *);
+};
+
+static int cpufreq_stats_update(unsigned int cpu)
+{
+ struct cpufreq_stats *stat;
+ unsigned long long cur_time;
+
+ cur_time = get_jiffies_64();
+ spin_lock(&cpufreq_stats_lock);
+ stat = per_cpu(cpufreq_stats_table, cpu);
+ if (stat->time_in_state)
+ stat->time_in_state[stat->last_index] =
+ cputime64_add(stat->time_in_state[stat->last_index],
+ cputime_sub(cur_time, stat->last_time));
+ stat->last_time = cur_time;
+ spin_unlock(&cpufreq_stats_lock);
+ return 0;
+}
+
+static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf)
+{
+ struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu);
+ if (!stat)
+ return 0;
+ return sprintf(buf, "%d\n",
+ per_cpu(cpufreq_stats_table, stat->cpu)->total_trans);
+}
+
+static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
+{
+ ssize_t len = 0;
+ int i;
+ struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu);
+ if (!stat)
+ return 0;
+ cpufreq_stats_update(stat->cpu);
+ for (i = 0; i < stat->state_num; i++) {
+ len += sprintf(buf + len, "%u %llu\n", stat->freq_table[i],
+ (unsigned long long)
+ cputime64_to_clock_t(stat->time_in_state[i]));
+ }
+ return len;
+}
+
+#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
+static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
+{
+ ssize_t len = 0;
+ int i, j;
+
+ struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu);
+ if (!stat)
+ return 0;
+ cpufreq_stats_update(stat->cpu);
+ len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n");
+ len += snprintf(buf + len, PAGE_SIZE - len, " : ");
+ for (i = 0; i < stat->state_num; i++) {
+ if (len >= PAGE_SIZE)
+ break;
+ len += snprintf(buf + len, PAGE_SIZE - len, "%9u ",
+ stat->freq_table[i]);
+ }
+ if (len >= PAGE_SIZE)
+ return PAGE_SIZE;
+
+ len += snprintf(buf + len, PAGE_SIZE - len, "\n");
+
+ for (i = 0; i < stat->state_num; i++) {
+ if (len >= PAGE_SIZE)
+ break;
+
+ len += snprintf(buf + len, PAGE_SIZE - len, "%9u: ",
+ stat->freq_table[i]);
+
+ for (j = 0; j < stat->state_num; j++) {
+ if (len >= PAGE_SIZE)
+ break;
+ len += snprintf(buf + len, PAGE_SIZE - len, "%9u ",
+ stat->trans_table[i*stat->max_state+j]);
+ }
+ if (len >= PAGE_SIZE)
+ break;
+ len += snprintf(buf + len, PAGE_SIZE - len, "\n");
+ }
+ if (len >= PAGE_SIZE)
+ return PAGE_SIZE;
+ return len;
+}
+CPUFREQ_STATDEVICE_ATTR(trans_table, 0444, show_trans_table);
+#endif
+
+CPUFREQ_STATDEVICE_ATTR(total_trans, 0444, show_total_trans);
+CPUFREQ_STATDEVICE_ATTR(time_in_state, 0444, show_time_in_state);
+
+static struct attribute *default_attrs[] = {
+ &_attr_total_trans.attr,
+ &_attr_time_in_state.attr,
+#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
+ &_attr_trans_table.attr,
+#endif
+ NULL
+};
+static struct attribute_group stats_attr_group = {
+ .attrs = default_attrs,
+ .name = "stats"
+};
+
+static int freq_table_get_index(struct cpufreq_stats *stat, unsigned int freq)
+{
+ int index;
+ for (index = 0; index < stat->max_state; index++)
+ if (stat->freq_table[index] == freq)
+ return index;
+ return -1;
+}
+
+/* should be called late in the CPU removal sequence so that the stats
+ * memory is still available in case someone tries to use it.
+ */
+static void cpufreq_stats_free_table(unsigned int cpu)
+{
+ struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, cpu);
+ if (stat) {
+ kfree(stat->time_in_state);
+ kfree(stat);
+ }
+ per_cpu(cpufreq_stats_table, cpu) = NULL;
+}
+
+/* must be called early in the CPU removal sequence (before
+ * cpufreq_remove_dev) so that policy is still valid.
+ */
+static void cpufreq_stats_free_sysfs(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ if (policy && policy->cpu == cpu)
+ sysfs_remove_group(&policy->kobj, &stats_attr_group);
+ if (policy)
+ cpufreq_cpu_put(policy);
+}
+
+static int cpufreq_stats_create_table(struct cpufreq_policy *policy,
+ struct cpufreq_frequency_table *table)
+{
+ unsigned int i, j, count = 0, ret = 0;
+ struct cpufreq_stats *stat;
+ struct cpufreq_policy *data;
+ unsigned int alloc_size;
+ unsigned int cpu = policy->cpu;
+ if (per_cpu(cpufreq_stats_table, cpu))
+ return -EBUSY;
+ stat = kzalloc(sizeof(struct cpufreq_stats), GFP_KERNEL);
+ if ((stat) == NULL)
+ return -ENOMEM;
+
+ data = cpufreq_cpu_get(cpu);
+ if (data == NULL) {
+ ret = -EINVAL;
+ goto error_get_fail;
+ }
+
+ ret = sysfs_create_group(&data->kobj, &stats_attr_group);
+ if (ret)
+ goto error_out;
+
+ stat->cpu = cpu;
+ per_cpu(cpufreq_stats_table, cpu) = stat;
+
+ for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+ unsigned int freq = table[i].frequency;
+ if (freq == CPUFREQ_ENTRY_INVALID)
+ continue;
+ count++;
+ }
+
+ alloc_size = count * sizeof(int) + count * sizeof(cputime64_t);
+
+#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
+ alloc_size += count * count * sizeof(int);
+#endif
+ stat->max_state = count;
+ stat->time_in_state = kzalloc(alloc_size, GFP_KERNEL);
+ if (!stat->time_in_state) {
+ ret = -ENOMEM;
+ goto error_out;
+ }
+ stat->freq_table = (unsigned int *)(stat->time_in_state + count);
+
+#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
+ stat->trans_table = stat->freq_table + count;
+#endif
+ j = 0;
+ for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+ unsigned int freq = table[i].frequency;
+ if (freq == CPUFREQ_ENTRY_INVALID)
+ continue;
+ if (freq_table_get_index(stat, freq) == -1)
+ stat->freq_table[j++] = freq;
+ }
+ stat->state_num = j;
+ spin_lock(&cpufreq_stats_lock);
+ stat->last_time = get_jiffies_64();
+ stat->last_index = freq_table_get_index(stat, policy->cur);
+ spin_unlock(&cpufreq_stats_lock);
+ cpufreq_cpu_put(data);
+ return 0;
+error_out:
+ cpufreq_cpu_put(data);
+error_get_fail:
+ kfree(stat);
+ per_cpu(cpufreq_stats_table, cpu) = NULL;
+ return ret;
+}
+
+static int cpufreq_stat_notifier_policy(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ int ret;
+ struct cpufreq_policy *policy = data;
+ struct cpufreq_frequency_table *table;
+ unsigned int cpu = policy->cpu;
+ if (val != CPUFREQ_NOTIFY)
+ return 0;
+ table = cpufreq_frequency_get_table(cpu);
+ if (!table)
+ return 0;
+ ret = cpufreq_stats_create_table(policy, table);
+ if (ret)
+ return ret;
+ return 0;
+}
+
+static int cpufreq_stat_notifier_trans(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ struct cpufreq_stats *stat;
+ int old_index, new_index;
+
+ if (val != CPUFREQ_POSTCHANGE)
+ return 0;
+
+ stat = per_cpu(cpufreq_stats_table, freq->cpu);
+ if (!stat)
+ return 0;
+
+ old_index = stat->last_index;
+ new_index = freq_table_get_index(stat, freq->new);
+
+ /* We can't do stat->time_in_state[-1]= .. */
+ if (old_index == -1 || new_index == -1)
+ return 0;
+
+ cpufreq_stats_update(freq->cpu);
+
+ if (old_index == new_index)
+ return 0;
+
+ spin_lock(&cpufreq_stats_lock);
+ stat->last_index = new_index;
+#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
+ stat->trans_table[old_index * stat->max_state + new_index]++;
+#endif
+ stat->total_trans++;
+ spin_unlock(&cpufreq_stats_lock);
+ return 0;
+}
+
+static int cpufreq_stats_create_table_cpu(unsigned int cpu)
+{
+ struct cpufreq_policy *policy;
+ struct cpufreq_frequency_table *table;
+ int ret = -ENODEV;
+
+ policy = cpufreq_cpu_get(cpu);
+ if (!policy)
+ return -ENODEV;
+
+ table = cpufreq_frequency_get_table(cpu);
+ if (!table)
+ goto out;
+
+ ret = cpufreq_stats_create_table(policy, table);
+
+out:
+ cpufreq_cpu_put(policy);
+ return ret;
+}
+
+static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ cpufreq_update_policy(cpu);
+ break;
+ case CPU_DOWN_PREPARE:
+ cpufreq_stats_free_sysfs(cpu);
+ break;
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ cpufreq_stats_free_table(cpu);
+ break;
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
+ cpufreq_stats_create_table_cpu(cpu);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+/* priority=1 so this will get called before cpufreq_remove_dev */
+static struct notifier_block cpufreq_stat_cpu_notifier __refdata = {
+ .notifier_call = cpufreq_stat_cpu_callback,
+ .priority = 1,
+};
+
+static struct notifier_block notifier_policy_block = {
+ .notifier_call = cpufreq_stat_notifier_policy
+};
+
+static struct notifier_block notifier_trans_block = {
+ .notifier_call = cpufreq_stat_notifier_trans
+};
+
+static int __init cpufreq_stats_init(void)
+{
+ int ret;
+ unsigned int cpu;
+
+ spin_lock_init(&cpufreq_stats_lock);
+ ret = cpufreq_register_notifier(&notifier_policy_block,
+ CPUFREQ_POLICY_NOTIFIER);
+ if (ret)
+ return ret;
+
+ ret = cpufreq_register_notifier(&notifier_trans_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ if (ret) {
+ cpufreq_unregister_notifier(&notifier_policy_block,
+ CPUFREQ_POLICY_NOTIFIER);
+ return ret;
+ }
+
+ register_hotcpu_notifier(&cpufreq_stat_cpu_notifier);
+ for_each_online_cpu(cpu) {
+ cpufreq_update_policy(cpu);
+ }
+ return 0;
+}
+static void __exit cpufreq_stats_exit(void)
+{
+ unsigned int cpu;
+
+ cpufreq_unregister_notifier(&notifier_policy_block,
+ CPUFREQ_POLICY_NOTIFIER);
+ cpufreq_unregister_notifier(&notifier_trans_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ unregister_hotcpu_notifier(&cpufreq_stat_cpu_notifier);
+ for_each_online_cpu(cpu) {
+ cpufreq_stats_free_table(cpu);
+ cpufreq_stats_free_sysfs(cpu);
+ }
+}
+
+MODULE_AUTHOR("Zou Nan hai <nanhai.zou@intel.com>");
+MODULE_DESCRIPTION("'cpufreq_stats' - A driver to export cpufreq stats "
+ "through sysfs filesystem");
+MODULE_LICENSE("GPL");
+
+module_init(cpufreq_stats_init);
+module_exit(cpufreq_stats_exit);
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
new file mode 100644
index 00000000..f2310159
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -0,0 +1,220 @@
+
+/*
+ * linux/drivers/cpufreq/cpufreq_userspace.c
+ *
+ * Copyright (C) 2001 Russell King
+ * (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/cpufreq.h>
+#include <linux/cpu.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/sysfs.h>
+#include <linux/mutex.h>
+
+/**
+ * A few values needed by the userspace governor
+ */
+static DEFINE_PER_CPU(unsigned int, cpu_max_freq);
+static DEFINE_PER_CPU(unsigned int, cpu_min_freq);
+static DEFINE_PER_CPU(unsigned int, cpu_cur_freq); /* current CPU freq */
+static DEFINE_PER_CPU(unsigned int, cpu_set_freq); /* CPU freq desired by
+ userspace */
+static DEFINE_PER_CPU(unsigned int, cpu_is_managed);
+
+static DEFINE_MUTEX(userspace_mutex);
+static int cpus_using_userspace_governor;
+
+/* keep track of frequency transitions */
+static int
+userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+
+ if (!per_cpu(cpu_is_managed, freq->cpu))
+ return 0;
+
+ pr_debug("saving cpu_cur_freq of cpu %u to be %u kHz\n",
+ freq->cpu, freq->new);
+ per_cpu(cpu_cur_freq, freq->cpu) = freq->new;
+
+ return 0;
+}
+
+static struct notifier_block userspace_cpufreq_notifier_block = {
+ .notifier_call = userspace_cpufreq_notifier
+};
+
+
+/**
+ * cpufreq_set - set the CPU frequency
+ * @policy: pointer to policy struct where freq is being set
+ * @freq: target frequency in kHz
+ *
+ * Sets the CPU frequency to freq.
+ */
+static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq)
+{
+ int ret = -EINVAL;
+
+ pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
+
+ mutex_lock(&userspace_mutex);
+ if (!per_cpu(cpu_is_managed, policy->cpu))
+ goto err;
+
+ per_cpu(cpu_set_freq, policy->cpu) = freq;
+
+ if (freq < per_cpu(cpu_min_freq, policy->cpu))
+ freq = per_cpu(cpu_min_freq, policy->cpu);
+ if (freq > per_cpu(cpu_max_freq, policy->cpu))
+ freq = per_cpu(cpu_max_freq, policy->cpu);
+
+ /*
+ * We're safe from concurrent calls to ->target() here
+ * as we hold the userspace_mutex lock. If we were calling
+ * cpufreq_driver_target, a deadlock situation might occur:
+ * A: cpufreq_set (lock userspace_mutex) ->
+ * cpufreq_driver_target(lock policy->lock)
+ * B: cpufreq_set_policy(lock policy->lock) ->
+ * __cpufreq_governor ->
+ * cpufreq_governor_userspace (lock userspace_mutex)
+ */
+ ret = __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
+
+ err:
+ mutex_unlock(&userspace_mutex);
+ return ret;
+}
+
+
+static ssize_t show_speed(struct cpufreq_policy *policy, char *buf)
+{
+ return sprintf(buf, "%u\n", per_cpu(cpu_cur_freq, policy->cpu));
+}
+
+static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ unsigned int cpu = policy->cpu;
+ int rc = 0;
+
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ if (!cpu_online(cpu))
+ return -EINVAL;
+ BUG_ON(!policy->cur);
+ mutex_lock(&userspace_mutex);
+
+ if (cpus_using_userspace_governor == 0) {
+ cpufreq_register_notifier(
+ &userspace_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ }
+ cpus_using_userspace_governor++;
+
+ per_cpu(cpu_is_managed, cpu) = 1;
+ per_cpu(cpu_min_freq, cpu) = policy->min;
+ per_cpu(cpu_max_freq, cpu) = policy->max;
+ per_cpu(cpu_cur_freq, cpu) = policy->cur;
+ per_cpu(cpu_set_freq, cpu) = policy->cur;
+ pr_debug("managing cpu %u started "
+ "(%u - %u kHz, currently %u kHz)\n",
+ cpu,
+ per_cpu(cpu_min_freq, cpu),
+ per_cpu(cpu_max_freq, cpu),
+ per_cpu(cpu_cur_freq, cpu));
+
+ mutex_unlock(&userspace_mutex);
+ break;
+ case CPUFREQ_GOV_STOP:
+ mutex_lock(&userspace_mutex);
+ cpus_using_userspace_governor--;
+ if (cpus_using_userspace_governor == 0) {
+ cpufreq_unregister_notifier(
+ &userspace_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ }
+
+ per_cpu(cpu_is_managed, cpu) = 0;
+ per_cpu(cpu_min_freq, cpu) = 0;
+ per_cpu(cpu_max_freq, cpu) = 0;
+ per_cpu(cpu_set_freq, cpu) = 0;
+ pr_debug("managing cpu %u stopped\n", cpu);
+ mutex_unlock(&userspace_mutex);
+ break;
+ case CPUFREQ_GOV_LIMITS:
+ mutex_lock(&userspace_mutex);
+ pr_debug("limit event for cpu %u: %u - %u kHz, "
+ "currently %u kHz, last set to %u kHz\n",
+ cpu, policy->min, policy->max,
+ per_cpu(cpu_cur_freq, cpu),
+ per_cpu(cpu_set_freq, cpu));
+ if (policy->max < per_cpu(cpu_set_freq, cpu)) {
+ __cpufreq_driver_target(policy, policy->max,
+ CPUFREQ_RELATION_H);
+ } else if (policy->min > per_cpu(cpu_set_freq, cpu)) {
+ __cpufreq_driver_target(policy, policy->min,
+ CPUFREQ_RELATION_L);
+ } else {
+ __cpufreq_driver_target(policy,
+ per_cpu(cpu_set_freq, cpu),
+ CPUFREQ_RELATION_L);
+ }
+ per_cpu(cpu_min_freq, cpu) = policy->min;
+ per_cpu(cpu_max_freq, cpu) = policy->max;
+ per_cpu(cpu_cur_freq, cpu) = policy->cur;
+ mutex_unlock(&userspace_mutex);
+ break;
+ }
+ return rc;
+}
+
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE
+static
+#endif
+struct cpufreq_governor cpufreq_gov_userspace = {
+ .name = "userspace",
+ .governor = cpufreq_governor_userspace,
+ .store_setspeed = cpufreq_set,
+ .show_setspeed = show_speed,
+ .owner = THIS_MODULE,
+};
+
+static int __init cpufreq_gov_userspace_init(void)
+{
+ return cpufreq_register_governor(&cpufreq_gov_userspace);
+}
+
+
+static void __exit cpufreq_gov_userspace_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_userspace);
+}
+
+
+MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>, "
+ "Russell King <rmk@arm.linux.org.uk>");
+MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'");
+MODULE_LICENSE("GPL");
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE
+fs_initcall(cpufreq_gov_userspace_init);
+#else
+module_init(cpufreq_gov_userspace_init);
+#endif
+module_exit(cpufreq_gov_userspace_exit);
diff --git a/drivers/cpufreq/db8500-cpufreq.c b/drivers/cpufreq/db8500-cpufreq.c
new file mode 100644
index 00000000..d90456a8
--- /dev/null
+++ b/drivers/cpufreq/db8500-cpufreq.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) STMicroelectronics 2009
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License v2
+ * Author: Sundar Iyer <sundar.iyer@stericsson.com>
+ * Author: Martin Persson <martin.persson@stericsson.com>
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/cpufreq.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/mfd/db8500-prcmu.h>
+#include <mach/id.h>
+
+static struct cpufreq_frequency_table freq_table[] = {
+ [0] = {
+ .index = 0,
+ .frequency = 300000,
+ },
+ [1] = {
+ .index = 1,
+ .frequency = 600000,
+ },
+ [2] = {
+ /* Used for MAX_OPP, if available */
+ .index = 2,
+ .frequency = CPUFREQ_TABLE_END,
+ },
+ [3] = {
+ .index = 3,
+ .frequency = CPUFREQ_TABLE_END,
+ },
+};
+
+static enum arm_opp idx2opp[] = {
+ ARM_50_OPP,
+ ARM_100_OPP,
+ ARM_MAX_OPP
+};
+
+static struct freq_attr *db8500_cpufreq_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+static int db8500_cpufreq_verify_speed(struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy, freq_table);
+}
+
+static int db8500_cpufreq_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ struct cpufreq_freqs freqs;
+ unsigned int idx;
+
+ /* scale the target frequency to one of the extremes supported */
+ if (target_freq < policy->cpuinfo.min_freq)
+ target_freq = policy->cpuinfo.min_freq;
+ if (target_freq > policy->cpuinfo.max_freq)
+ target_freq = policy->cpuinfo.max_freq;
+
+ /* Lookup the next frequency */
+ if (cpufreq_frequency_table_target
+ (policy, freq_table, target_freq, relation, &idx)) {
+ return -EINVAL;
+ }
+
+ freqs.old = policy->cur;
+ freqs.new = freq_table[idx].frequency;
+ freqs.cpu = policy->cpu;
+
+ if (freqs.old == freqs.new)
+ return 0;
+
+ /* pre-change notification */
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+ /* request the PRCM unit for opp change */
+ if (prcmu_set_arm_opp(idx2opp[idx])) {
+ pr_err("db8500-cpufreq: Failed to set OPP level\n");
+ return -EINVAL;
+ }
+
+ /* post change notification */
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+ return 0;
+}
+
+static unsigned int db8500_cpufreq_getspeed(unsigned int cpu)
+{
+ int i;
+ /* request the prcm to get the current ARM opp */
+ for (i = 0; prcmu_get_arm_opp() != idx2opp[i]; i++)
+ ;
+ return freq_table[i].frequency;
+}
+
+static int __cpuinit db8500_cpufreq_init(struct cpufreq_policy *policy)
+{
+ int res;
+ int i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(idx2opp) + 1 != ARRAY_SIZE(freq_table));
+
+ if (cpu_is_u8500v2() && !prcmu_is_u8400()) {
+ freq_table[0].frequency = 400000;
+ freq_table[1].frequency = 800000;
+ if (prcmu_has_arm_maxopp())
+ freq_table[2].frequency = 1000000;
+ }
+
+ /* get policy fields based on the table */
+ res = cpufreq_frequency_table_cpuinfo(policy, freq_table);
+ if (!res)
+ cpufreq_frequency_table_get_attr(freq_table, policy->cpu);
+ else {
+ pr_err("db8500-cpufreq : Failed to read policy table\n");
+ return res;
+ }
+
+ policy->min = policy->cpuinfo.min_freq;
+ policy->max = policy->cpuinfo.max_freq;
+ policy->cur = db8500_cpufreq_getspeed(policy->cpu);
+
+ for (i = 0; freq_table[i].frequency != policy->cur; i++)
+ ;
+
+ policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+
+ /*
+ * FIXME : Need to take time measurement across the target()
+ * function with no/some/all drivers in the notification
+ * list.
+ */
+ policy->cpuinfo.transition_latency = 20 * 1000; /* in ns */
+
+ /* policy sharing between dual CPUs */
+ cpumask_copy(policy->cpus, &cpu_present_map);
+
+ policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+
+ return 0;
+}
+
+static struct cpufreq_driver db8500_cpufreq_driver = {
+ .flags = CPUFREQ_STICKY,
+ .verify = db8500_cpufreq_verify_speed,
+ .target = db8500_cpufreq_target,
+ .get = db8500_cpufreq_getspeed,
+ .init = db8500_cpufreq_init,
+ .name = "DB8500",
+ .attr = db8500_cpufreq_attr,
+};
+
+static int __init db8500_cpufreq_register(void)
+{
+ if (!cpu_is_u8500v20_or_later())
+ return -ENODEV;
+
+ pr_info("cpufreq for DB8500 started\n");
+ return cpufreq_register_driver(&db8500_cpufreq_driver);
+}
+device_initcall(db8500_cpufreq_register);
diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c
new file mode 100644
index 00000000..35a257dd
--- /dev/null
+++ b/drivers/cpufreq/e_powersaver.c
@@ -0,0 +1,367 @@
+/*
+ * Based on documentation provided by Dave Jones. Thanks!
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+
+#include <asm/msr.h>
+#include <asm/tsc.h>
+
+#define EPS_BRAND_C7M 0
+#define EPS_BRAND_C7 1
+#define EPS_BRAND_EDEN 2
+#define EPS_BRAND_C3 3
+#define EPS_BRAND_C7D 4
+
+struct eps_cpu_data {
+ u32 fsb;
+ struct cpufreq_frequency_table freq_table[];
+};
+
+static struct eps_cpu_data *eps_cpu[NR_CPUS];
+
+
+static unsigned int eps_get(unsigned int cpu)
+{
+ struct eps_cpu_data *centaur;
+ u32 lo, hi;
+
+ if (cpu)
+ return 0;
+ centaur = eps_cpu[cpu];
+ if (centaur == NULL)
+ return 0;
+
+ /* Return current frequency */
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ return centaur->fsb * ((lo >> 8) & 0xff);
+}
+
+static int eps_set_state(struct eps_cpu_data *centaur,
+ unsigned int cpu,
+ u32 dest_state)
+{
+ struct cpufreq_freqs freqs;
+ u32 lo, hi;
+ int err = 0;
+ int i;
+
+ freqs.old = eps_get(cpu);
+ freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff);
+ freqs.cpu = cpu;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+ /* Wait while CPU is busy */
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ i = 0;
+ while (lo & ((1 << 16) | (1 << 17))) {
+ udelay(16);
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ i++;
+ if (unlikely(i > 64)) {
+ err = -ENODEV;
+ goto postchange;
+ }
+ }
+ /* Set new multiplier and voltage */
+ wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0);
+ /* Wait until transition end */
+ i = 0;
+ do {
+ udelay(16);
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ i++;
+ if (unlikely(i > 64)) {
+ err = -ENODEV;
+ goto postchange;
+ }
+ } while (lo & ((1 << 16) | (1 << 17)));
+
+ /* Return current frequency */
+postchange:
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ freqs.new = centaur->fsb * ((lo >> 8) & 0xff);
+
+#ifdef DEBUG
+ {
+ u8 current_multiplier, current_voltage;
+
+ /* Print voltage and multiplier */
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ current_voltage = lo & 0xff;
+ printk(KERN_INFO "eps: Current voltage = %dmV\n",
+ current_voltage * 16 + 700);
+ current_multiplier = (lo >> 8) & 0xff;
+ printk(KERN_INFO "eps: Current multiplier = %d\n",
+ current_multiplier);
+ }
+#endif
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ return err;
+}
+
+static int eps_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ struct eps_cpu_data *centaur;
+ unsigned int newstate = 0;
+ unsigned int cpu = policy->cpu;
+ unsigned int dest_state;
+ int ret;
+
+ if (unlikely(eps_cpu[cpu] == NULL))
+ return -ENODEV;
+ centaur = eps_cpu[cpu];
+
+ if (unlikely(cpufreq_frequency_table_target(policy,
+ &eps_cpu[cpu]->freq_table[0],
+ target_freq,
+ relation,
+ &newstate))) {
+ return -EINVAL;
+ }
+
+ /* Make frequency transition */
+ dest_state = centaur->freq_table[newstate].index & 0xffff;
+ ret = eps_set_state(centaur, cpu, dest_state);
+ if (ret)
+ printk(KERN_ERR "eps: Timeout!\n");
+ return ret;
+}
+
+static int eps_verify(struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy,
+ &eps_cpu[policy->cpu]->freq_table[0]);
+}
+
+static int eps_cpu_init(struct cpufreq_policy *policy)
+{
+ unsigned int i;
+ u32 lo, hi;
+ u64 val;
+ u8 current_multiplier, current_voltage;
+ u8 max_multiplier, max_voltage;
+ u8 min_multiplier, min_voltage;
+ u8 brand = 0;
+ u32 fsb;
+ struct eps_cpu_data *centaur;
+ struct cpuinfo_x86 *c = &cpu_data(0);
+ struct cpufreq_frequency_table *f_table;
+ int k, step, voltage;
+ int ret;
+ int states;
+
+ if (policy->cpu != 0)
+ return -ENODEV;
+
+ /* Check brand */
+ printk(KERN_INFO "eps: Detected VIA ");
+
+ switch (c->x86_model) {
+ case 10:
+ rdmsr(0x1153, lo, hi);
+ brand = (((lo >> 2) ^ lo) >> 18) & 3;
+ printk(KERN_CONT "Model A ");
+ break;
+ case 13:
+ rdmsr(0x1154, lo, hi);
+ brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff;
+ printk(KERN_CONT "Model D ");
+ break;
+ }
+
+ switch (brand) {
+ case EPS_BRAND_C7M:
+ printk(KERN_CONT "C7-M\n");
+ break;
+ case EPS_BRAND_C7:
+ printk(KERN_CONT "C7\n");
+ break;
+ case EPS_BRAND_EDEN:
+ printk(KERN_CONT "Eden\n");
+ break;
+ case EPS_BRAND_C7D:
+ printk(KERN_CONT "C7-D\n");
+ break;
+ case EPS_BRAND_C3:
+ printk(KERN_CONT "C3\n");
+ return -ENODEV;
+ break;
+ }
+ /* Enable Enhanced PowerSaver */
+ rdmsrl(MSR_IA32_MISC_ENABLE, val);
+ if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+ val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
+ wrmsrl(MSR_IA32_MISC_ENABLE, val);
+ /* Can be locked at 0 */
+ rdmsrl(MSR_IA32_MISC_ENABLE, val);
+ if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+ printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
+ return -ENODEV;
+ }
+ }
+
+ /* Print voltage and multiplier */
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ current_voltage = lo & 0xff;
+ printk(KERN_INFO "eps: Current voltage = %dmV\n",
+ current_voltage * 16 + 700);
+ current_multiplier = (lo >> 8) & 0xff;
+ printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
+
+ /* Print limits */
+ max_voltage = hi & 0xff;
+ printk(KERN_INFO "eps: Highest voltage = %dmV\n",
+ max_voltage * 16 + 700);
+ max_multiplier = (hi >> 8) & 0xff;
+ printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
+ min_voltage = (hi >> 16) & 0xff;
+ printk(KERN_INFO "eps: Lowest voltage = %dmV\n",
+ min_voltage * 16 + 700);
+ min_multiplier = (hi >> 24) & 0xff;
+ printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
+
+ /* Sanity checks */
+ if (current_multiplier == 0 || max_multiplier == 0
+ || min_multiplier == 0)
+ return -EINVAL;
+ if (current_multiplier > max_multiplier
+ || max_multiplier <= min_multiplier)
+ return -EINVAL;
+ if (current_voltage > 0x1f || max_voltage > 0x1f)
+ return -EINVAL;
+ if (max_voltage < min_voltage)
+ return -EINVAL;
+
+ /* Calc FSB speed */
+ fsb = cpu_khz / current_multiplier;
+ /* Calc number of p-states supported */
+ if (brand == EPS_BRAND_C7M)
+ states = max_multiplier - min_multiplier + 1;
+ else
+ states = 2;
+
+ /* Allocate private data and frequency table for current cpu */
+ centaur = kzalloc(sizeof(struct eps_cpu_data)
+ + (states + 1) * sizeof(struct cpufreq_frequency_table),
+ GFP_KERNEL);
+ if (!centaur)
+ return -ENOMEM;
+ eps_cpu[0] = centaur;
+
+ /* Copy basic values */
+ centaur->fsb = fsb;
+
+ /* Fill frequency and MSR value table */
+ f_table = &centaur->freq_table[0];
+ if (brand != EPS_BRAND_C7M) {
+ f_table[0].frequency = fsb * min_multiplier;
+ f_table[0].index = (min_multiplier << 8) | min_voltage;
+ f_table[1].frequency = fsb * max_multiplier;
+ f_table[1].index = (max_multiplier << 8) | max_voltage;
+ f_table[2].frequency = CPUFREQ_TABLE_END;
+ } else {
+ k = 0;
+ step = ((max_voltage - min_voltage) * 256)
+ / (max_multiplier - min_multiplier);
+ for (i = min_multiplier; i <= max_multiplier; i++) {
+ voltage = (k * step) / 256 + min_voltage;
+ f_table[k].frequency = fsb * i;
+ f_table[k].index = (i << 8) | voltage;
+ k++;
+ }
+ f_table[k].frequency = CPUFREQ_TABLE_END;
+ }
+
+ policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */
+ policy->cur = fsb * current_multiplier;
+
+ ret = cpufreq_frequency_table_cpuinfo(policy, &centaur->freq_table[0]);
+ if (ret) {
+ kfree(centaur);
+ return ret;
+ }
+
+ cpufreq_frequency_table_get_attr(&centaur->freq_table[0], policy->cpu);
+ return 0;
+}
+
+static int eps_cpu_exit(struct cpufreq_policy *policy)
+{
+ unsigned int cpu = policy->cpu;
+ struct eps_cpu_data *centaur;
+ u32 lo, hi;
+
+ if (eps_cpu[cpu] == NULL)
+ return -ENODEV;
+ centaur = eps_cpu[cpu];
+
+ /* Get max frequency */
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ /* Set max frequency */
+ eps_set_state(centaur, cpu, hi & 0xffff);
+ /* Bye */
+ cpufreq_frequency_table_put_attr(policy->cpu);
+ kfree(eps_cpu[cpu]);
+ eps_cpu[cpu] = NULL;
+ return 0;
+}
+
+static struct freq_attr *eps_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+static struct cpufreq_driver eps_driver = {
+ .verify = eps_verify,
+ .target = eps_target,
+ .init = eps_cpu_init,
+ .exit = eps_cpu_exit,
+ .get = eps_get,
+ .name = "e_powersaver",
+ .owner = THIS_MODULE,
+ .attr = eps_attr,
+};
+
+static int __init eps_init(void)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ /* This driver will work only on Centaur C7 processors with
+ * Enhanced SpeedStep/PowerSaver registers */
+ if (c->x86_vendor != X86_VENDOR_CENTAUR
+ || c->x86 != 6 || c->x86_model < 10)
+ return -ENODEV;
+ if (!cpu_has(c, X86_FEATURE_EST))
+ return -ENODEV;
+
+ if (cpufreq_register_driver(&eps_driver))
+ return -EINVAL;
+ return 0;
+}
+
+static void __exit eps_exit(void)
+{
+ cpufreq_unregister_driver(&eps_driver);
+}
+
+MODULE_AUTHOR("Rafal Bilski <rafalbilski@interia.pl>");
+MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's.");
+MODULE_LICENSE("GPL");
+
+module_init(eps_init);
+module_exit(eps_exit);
diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c
new file mode 100644
index 00000000..c587db47
--- /dev/null
+++ b/drivers/cpufreq/elanfreq.c
@@ -0,0 +1,309 @@
+/*
+ * elanfreq: cpufreq driver for the AMD ELAN family
+ *
+ * (c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de>
+ *
+ * Parts of this code are (c) Sven Geggus <sven@geggus.net>
+ *
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/delay.h>
+#include <linux/cpufreq.h>
+
+#include <asm/msr.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+
+#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */
+#define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */
+
+/* Module parameter */
+static int max_freq;
+
+struct s_elan_multiplier {
+ int clock; /* frequency in kHz */
+ int val40h; /* PMU Force Mode register */
+ int val80h; /* CPU Clock Speed Register */
+};
+
+/*
+ * It is important that the frequencies
+ * are listed in ascending order here!
+ */
+static struct s_elan_multiplier elan_multiplier[] = {
+ {1000, 0x02, 0x18},
+ {2000, 0x02, 0x10},
+ {4000, 0x02, 0x08},
+ {8000, 0x00, 0x00},
+ {16000, 0x00, 0x02},
+ {33000, 0x00, 0x04},
+ {66000, 0x01, 0x04},
+ {99000, 0x01, 0x05}
+};
+
+static struct cpufreq_frequency_table elanfreq_table[] = {
+ {0, 1000},
+ {1, 2000},
+ {2, 4000},
+ {3, 8000},
+ {4, 16000},
+ {5, 33000},
+ {6, 66000},
+ {7, 99000},
+ {0, CPUFREQ_TABLE_END},
+};
+
+
+/**
+ * elanfreq_get_cpu_frequency: determine current cpu speed
+ *
+ * Finds out at which frequency the CPU of the Elan SOC runs
+ * at the moment. Frequencies from 1 to 33 MHz are generated
+ * the normal way, 66 and 99 MHz are called "Hyperspeed Mode"
+ * and have the rest of the chip running with 33 MHz.
+ */
+
+static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
+{
+ u8 clockspeed_reg; /* Clock Speed Register */
+
+ local_irq_disable();
+ outb_p(0x80, REG_CSCIR);
+ clockspeed_reg = inb_p(REG_CSCDR);
+ local_irq_enable();
+
+ if ((clockspeed_reg & 0xE0) == 0xE0)
+ return 0;
+
+ /* Are we in CPU clock multiplied mode (66/99 MHz)? */
+ if ((clockspeed_reg & 0xE0) == 0xC0) {
+ if ((clockspeed_reg & 0x01) == 0)
+ return 66000;
+ else
+ return 99000;
+ }
+
+ /* 33 MHz is not 32 MHz... */
+ if ((clockspeed_reg & 0xE0) == 0xA0)
+ return 33000;
+
+ return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000;
+}
+
+
+/**
+ * elanfreq_set_cpu_frequency: Change the CPU core frequency
+ * @cpu: cpu number
+ * @freq: frequency in kHz
+ *
+ * This function takes a frequency value and changes the CPU frequency
+ * according to this. Note that the frequency has to be checked by
+ * elanfreq_validatespeed() for correctness!
+ *
+ * There is no return value.
+ */
+
+static void elanfreq_set_cpu_state(unsigned int state)
+{
+ struct cpufreq_freqs freqs;
+
+ freqs.old = elanfreq_get_cpu_frequency(0);
+ freqs.new = elan_multiplier[state].clock;
+ freqs.cpu = 0; /* elanfreq.c is UP only driver */
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+ printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n",
+ elan_multiplier[state].clock);
+
+
+ /*
+ * Access to the Elan's internal registers is indexed via
+ * 0x22: Chip Setup & Control Register Index Register (CSCI)
+ * 0x23: Chip Setup & Control Register Data Register (CSCD)
+ *
+ */
+
+ /*
+ * 0x40 is the Power Management Unit's Force Mode Register.
+ * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency)
+ */
+
+ local_irq_disable();
+ outb_p(0x40, REG_CSCIR); /* Disable hyperspeed mode */
+ outb_p(0x00, REG_CSCDR);
+ local_irq_enable(); /* wait till internal pipelines and */
+ udelay(1000); /* buffers have cleaned up */
+
+ local_irq_disable();
+
+ /* now, set the CPU clock speed register (0x80) */
+ outb_p(0x80, REG_CSCIR);
+ outb_p(elan_multiplier[state].val80h, REG_CSCDR);
+
+ /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */
+ outb_p(0x40, REG_CSCIR);
+ outb_p(elan_multiplier[state].val40h, REG_CSCDR);
+ udelay(10000);
+ local_irq_enable();
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+};
+
+
+/**
+ * elanfreq_validatespeed: test if frequency range is valid
+ * @policy: the policy to validate
+ *
+ * This function checks if a given frequency range in kHz is valid
+ * for the hardware supported by the driver.
+ */
+
+static int elanfreq_verify(struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]);
+}
+
+static int elanfreq_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ unsigned int newstate = 0;
+
+ if (cpufreq_frequency_table_target(policy, &elanfreq_table[0],
+ target_freq, relation, &newstate))
+ return -EINVAL;
+
+ elanfreq_set_cpu_state(newstate);
+
+ return 0;
+}
+
+
+/*
+ * Module init and exit code
+ */
+
+static int elanfreq_cpu_init(struct cpufreq_policy *policy)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+ unsigned int i;
+ int result;
+
+ /* capability check */
+ if ((c->x86_vendor != X86_VENDOR_AMD) ||
+ (c->x86 != 4) || (c->x86_model != 10))
+ return -ENODEV;
+
+ /* max freq */
+ if (!max_freq)
+ max_freq = elanfreq_get_cpu_frequency(0);
+
+ /* table init */
+ for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+ if (elanfreq_table[i].frequency > max_freq)
+ elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+ }
+
+ /* cpuinfo and default policy values */
+ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+ policy->cur = elanfreq_get_cpu_frequency(0);
+
+ result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table);
+ if (result)
+ return result;
+
+ cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu);
+ return 0;
+}
+
+
+static int elanfreq_cpu_exit(struct cpufreq_policy *policy)
+{
+ cpufreq_frequency_table_put_attr(policy->cpu);
+ return 0;
+}
+
+
+#ifndef MODULE
+/**
+ * elanfreq_setup - elanfreq command line parameter parsing
+ *
+ * elanfreq command line parameter. Use:
+ * elanfreq=66000
+ * to set the maximum CPU frequency to 66 MHz. Note that in
+ * case you do not give this boot parameter, the maximum
+ * frequency will fall back to _current_ CPU frequency which
+ * might be lower. If you build this as a module, use the
+ * max_freq module parameter instead.
+ */
+static int __init elanfreq_setup(char *str)
+{
+ max_freq = simple_strtoul(str, &str, 0);
+ printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
+ return 1;
+}
+__setup("elanfreq=", elanfreq_setup);
+#endif
+
+
+static struct freq_attr *elanfreq_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+
+static struct cpufreq_driver elanfreq_driver = {
+ .get = elanfreq_get_cpu_frequency,
+ .verify = elanfreq_verify,
+ .target = elanfreq_target,
+ .init = elanfreq_cpu_init,
+ .exit = elanfreq_cpu_exit,
+ .name = "elanfreq",
+ .owner = THIS_MODULE,
+ .attr = elanfreq_attr,
+};
+
+
+static int __init elanfreq_init(void)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ /* Test if we have the right hardware */
+ if ((c->x86_vendor != X86_VENDOR_AMD) ||
+ (c->x86 != 4) || (c->x86_model != 10)) {
+ printk(KERN_INFO "elanfreq: error: no Elan processor found!\n");
+ return -ENODEV;
+ }
+ return cpufreq_register_driver(&elanfreq_driver);
+}
+
+
+static void __exit elanfreq_exit(void)
+{
+ cpufreq_unregister_driver(&elanfreq_driver);
+}
+
+
+module_param(max_freq, int, 0444);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, "
+ "Sven Geggus <sven@geggus.net>");
+MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs");
+
+module_init(elanfreq_init);
+module_exit(elanfreq_exit);
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
new file mode 100644
index 00000000..90431cb9
--- /dev/null
+++ b/drivers/cpufreq/freq_table.c
@@ -0,0 +1,236 @@
+/*
+ * linux/drivers/cpufreq/freq_table.c
+ *
+ * Copyright (C) 2002 - 2003 Dominik Brodowski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+
+/*********************************************************************
+ * FREQUENCY TABLE HELPERS *
+ *********************************************************************/
+
+int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
+ struct cpufreq_frequency_table *table)
+{
+ unsigned int min_freq = ~0;
+ unsigned int max_freq = 0;
+ unsigned int i;
+
+ for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
+ unsigned int freq = table[i].frequency;
+ if (freq == CPUFREQ_ENTRY_INVALID) {
+ pr_debug("table entry %u is invalid, skipping\n", i);
+
+ continue;
+ }
+ pr_debug("table entry %u: %u kHz, %u index\n",
+ i, freq, table[i].index);
+ if (freq < min_freq)
+ min_freq = freq;
+ if (freq > max_freq)
+ max_freq = freq;
+ }
+
+ policy->min = policy->cpuinfo.min_freq = min_freq;
+ policy->max = policy->cpuinfo.max_freq = max_freq;
+
+ if (policy->min == ~0)
+ return -EINVAL;
+ else
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cpufreq_frequency_table_cpuinfo);
+
+
+int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
+ struct cpufreq_frequency_table *table)
+{
+ unsigned int next_larger = ~0;
+ unsigned int i;
+ unsigned int count = 0;
+
+ pr_debug("request for verification of policy (%u - %u kHz) for cpu %u\n",
+ policy->min, policy->max, policy->cpu);
+
+ if (!cpu_online(policy->cpu))
+ return -EINVAL;
+
+ cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+ policy->cpuinfo.max_freq);
+
+ for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
+ unsigned int freq = table[i].frequency;
+ if (freq == CPUFREQ_ENTRY_INVALID)
+ continue;
+ if ((freq >= policy->min) && (freq <= policy->max))
+ count++;
+ else if ((next_larger > freq) && (freq > policy->max))
+ next_larger = freq;
+ }
+
+ if (!count)
+ policy->max = next_larger;
+
+ cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+ policy->cpuinfo.max_freq);
+
+ pr_debug("verification lead to (%u - %u kHz) for cpu %u\n",
+ policy->min, policy->max, policy->cpu);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cpufreq_frequency_table_verify);
+
+
+int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
+ struct cpufreq_frequency_table *table,
+ unsigned int target_freq,
+ unsigned int relation,
+ unsigned int *index)
+{
+ struct cpufreq_frequency_table optimal = {
+ .index = ~0,
+ .frequency = 0,
+ };
+ struct cpufreq_frequency_table suboptimal = {
+ .index = ~0,
+ .frequency = 0,
+ };
+ unsigned int i;
+
+ pr_debug("request for target %u kHz (relation: %u) for cpu %u\n",
+ target_freq, relation, policy->cpu);
+
+ switch (relation) {
+ case CPUFREQ_RELATION_H:
+ suboptimal.frequency = ~0;
+ break;
+ case CPUFREQ_RELATION_L:
+ optimal.frequency = ~0;
+ break;
+ }
+
+ if (!cpu_online(policy->cpu))
+ return -EINVAL;
+
+ for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
+ unsigned int freq = table[i].frequency;
+ if (freq == CPUFREQ_ENTRY_INVALID)
+ continue;
+ if ((freq < policy->min) || (freq > policy->max))
+ continue;
+ switch (relation) {
+ case CPUFREQ_RELATION_H:
+ if (freq <= target_freq) {
+ if (freq >= optimal.frequency) {
+ optimal.frequency = freq;
+ optimal.index = i;
+ }
+ } else {
+ if (freq <= suboptimal.frequency) {
+ suboptimal.frequency = freq;
+ suboptimal.index = i;
+ }
+ }
+ break;
+ case CPUFREQ_RELATION_L:
+ if (freq >= target_freq) {
+ if (freq <= optimal.frequency) {
+ optimal.frequency = freq;
+ optimal.index = i;
+ }
+ } else {
+ if (freq >= suboptimal.frequency) {
+ suboptimal.frequency = freq;
+ suboptimal.index = i;
+ }
+ }
+ break;
+ }
+ }
+ if (optimal.index > i) {
+ if (suboptimal.index > i)
+ return -EINVAL;
+ *index = suboptimal.index;
+ } else
+ *index = optimal.index;
+
+ pr_debug("target is %u (%u kHz, %u)\n", *index, table[*index].frequency,
+ table[*index].index);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cpufreq_frequency_table_target);
+
+static DEFINE_PER_CPU(struct cpufreq_frequency_table *, cpufreq_show_table);
+/**
+ * show_available_freqs - show available frequencies for the specified CPU
+ */
+static ssize_t show_available_freqs(struct cpufreq_policy *policy, char *buf)
+{
+ unsigned int i = 0;
+ unsigned int cpu = policy->cpu;
+ ssize_t count = 0;
+ struct cpufreq_frequency_table *table;
+
+ if (!per_cpu(cpufreq_show_table, cpu))
+ return -ENODEV;
+
+ table = per_cpu(cpufreq_show_table, cpu);
+
+ for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
+ if (table[i].frequency == CPUFREQ_ENTRY_INVALID)
+ continue;
+ count += sprintf(&buf[count], "%d ", table[i].frequency);
+ }
+ count += sprintf(&buf[count], "\n");
+
+ return count;
+
+}
+
+struct freq_attr cpufreq_freq_attr_scaling_available_freqs = {
+ .attr = { .name = "scaling_available_frequencies",
+ .mode = 0444,
+ },
+ .show = show_available_freqs,
+};
+EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_available_freqs);
+
+/*
+ * if you use these, you must assure that the frequency table is valid
+ * all the time between get_attr and put_attr!
+ */
+void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
+ unsigned int cpu)
+{
+ pr_debug("setting show_table for cpu %u to %p\n", cpu, table);
+ per_cpu(cpufreq_show_table, cpu) = table;
+}
+EXPORT_SYMBOL_GPL(cpufreq_frequency_table_get_attr);
+
+void cpufreq_frequency_table_put_attr(unsigned int cpu)
+{
+ pr_debug("clearing show_table for cpu %u\n", cpu);
+ per_cpu(cpufreq_show_table, cpu) = NULL;
+}
+EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr);
+
+struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu)
+{
+ return per_cpu(cpufreq_show_table, cpu);
+}
+EXPORT_SYMBOL_GPL(cpufreq_frequency_get_table);
+
+MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("CPUfreq frequency table helpers");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/gx-suspmod.c b/drivers/cpufreq/gx-suspmod.c
new file mode 100644
index 00000000..ffe1f2c9
--- /dev/null
+++ b/drivers/cpufreq/gx-suspmod.c
@@ -0,0 +1,514 @@
+/*
+ * Cyrix MediaGX and NatSemi Geode Suspend Modulation
+ * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
+ * (C) 2002 Hiroshi Miura <miura@da-cha.org>
+ * All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation
+ *
+ * The author(s) of this software shall not be held liable for damages
+ * of any nature resulting due to the use of this software. This
+ * software is provided AS-IS with no warranties.
+ *
+ * Theoretical note:
+ *
+ * (see Geode(tm) CS5530 manual (rev.4.1) page.56)
+ *
+ * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0
+ * are based on Suspend Modulation.
+ *
+ * Suspend Modulation works by asserting and de-asserting the SUSP# pin
+ * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP#
+ * the CPU enters an idle state. GX1 stops its core clock when SUSP# is
+ * asserted then power consumption is reduced.
+ *
+ * Suspend Modulation's OFF/ON duration are configurable
+ * with 'Suspend Modulation OFF Count Register'
+ * and 'Suspend Modulation ON Count Register'.
+ * These registers are 8bit counters that represent the number of
+ * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF)
+ * to the processor.
+ *
+ * These counters define a ratio which is the effective frequency
+ * of operation of the system.
+ *
+ * OFF Count
+ * F_eff = Fgx * ----------------------
+ * OFF Count + ON Count
+ *
+ * 0 <= On Count, Off Count <= 255
+ *
+ * From these limits, we can get register values
+ *
+ * off_duration + on_duration <= MAX_DURATION
+ * on_duration = off_duration * (stock_freq - freq) / freq
+ *
+ * off_duration = (freq * DURATION) / stock_freq
+ * on_duration = DURATION - off_duration
+ *
+ *
+ *---------------------------------------------------------------------------
+ *
+ * ChangeLog:
+ * Dec. 12, 2003 Hiroshi Miura <miura@da-cha.org>
+ * - fix on/off register mistake
+ * - fix cpu_khz calc when it stops cpu modulation.
+ *
+ * Dec. 11, 2002 Hiroshi Miura <miura@da-cha.org>
+ * - rewrite for Cyrix MediaGX Cx5510/5520 and
+ * NatSemi Geode Cs5530(A).
+ *
+ * Jul. ??, 2002 Zwane Mwaikambo <zwane@commfireservices.com>
+ * - cs5530_mod patch for 2.4.19-rc1.
+ *
+ *---------------------------------------------------------------------------
+ *
+ * Todo
+ * Test on machines with 5510, 5530, 5530A
+ */
+
+/************************************************************************
+ * Suspend Modulation - Definitions *
+ ************************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+
+#include <asm/processor-cyrix.h>
+
+/* PCI config registers, all at F0 */
+#define PCI_PMER1 0x80 /* power management enable register 1 */
+#define PCI_PMER2 0x81 /* power management enable register 2 */
+#define PCI_PMER3 0x82 /* power management enable register 3 */
+#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */
+#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */
+#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */
+#define PCI_MODON 0x95 /* suspend modulation ON counter register */
+#define PCI_SUSCFG 0x96 /* suspend configuration register */
+
+/* PMER1 bits */
+#define GPM (1<<0) /* global power management */
+#define GIT (1<<1) /* globally enable PM device idle timers */
+#define GTR (1<<2) /* globally enable IO traps */
+#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */
+#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */
+
+/* SUSCFG bits */
+#define SUSMOD (1<<0) /* enable/disable suspend modulation */
+/* the below is supported only with cs5530 (after rev.1.2)/cs5530A */
+#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */
+ /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */
+#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */
+/* the below is supported only with cs5530A */
+#define PWRSVE_ISA (1<<3) /* stop ISA clock */
+#define PWRSVE (1<<4) /* active idle */
+
+struct gxfreq_params {
+ u8 on_duration;
+ u8 off_duration;
+ u8 pci_suscfg;
+ u8 pci_pmer1;
+ u8 pci_pmer2;
+ struct pci_dev *cs55x0;
+};
+
+static struct gxfreq_params *gx_params;
+static int stock_freq;
+
+/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */
+static int pci_busclk;
+module_param(pci_busclk, int, 0444);
+
+/* maximum duration for which the cpu may be suspended
+ * (32us * MAX_DURATION). If no parameter is given, this defaults
+ * to 255.
+ * Note that this leads to a maximum of 8 ms(!) where the CPU clock
+ * is suspended -- processing power is just 0.39% of what it used to be,
+ * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */
+static int max_duration = 255;
+module_param(max_duration, int, 0444);
+
+/* For the default policy, we want at least some processing power
+ * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV)
+ */
+#define POLICY_MIN_DIV 20
+
+
+/**
+ * we can detect a core multipiler from dir0_lsb
+ * from GX1 datasheet p.56,
+ * MULT[3:0]:
+ * 0000 = SYSCLK multiplied by 4 (test only)
+ * 0001 = SYSCLK multiplied by 10
+ * 0010 = SYSCLK multiplied by 4
+ * 0011 = SYSCLK multiplied by 6
+ * 0100 = SYSCLK multiplied by 9
+ * 0101 = SYSCLK multiplied by 5
+ * 0110 = SYSCLK multiplied by 7
+ * 0111 = SYSCLK multiplied by 8
+ * of 33.3MHz
+ **/
+static int gx_freq_mult[16] = {
+ 4, 10, 4, 6, 9, 5, 7, 8,
+ 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+
+/****************************************************************
+ * Low Level chipset interface *
+ ****************************************************************/
+static struct pci_device_id gx_chipset_tbl[] __initdata = {
+ { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), },
+ { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), },
+ { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), },
+ { 0, },
+};
+
+static void gx_write_byte(int reg, int value)
+{
+ pci_write_config_byte(gx_params->cs55x0, reg, value);
+}
+
+/**
+ * gx_detect_chipset:
+ *
+ **/
+static __init struct pci_dev *gx_detect_chipset(void)
+{
+ struct pci_dev *gx_pci = NULL;
+
+ /* check if CPU is a MediaGX or a Geode. */
+ if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
+ (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
+ pr_debug("error: no MediaGX/Geode processor found!\n");
+ return NULL;
+ }
+
+ /* detect which companion chip is used */
+ for_each_pci_dev(gx_pci) {
+ if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL)
+ return gx_pci;
+ }
+
+ pr_debug("error: no supported chipset found!\n");
+ return NULL;
+}
+
+/**
+ * gx_get_cpuspeed:
+ *
+ * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi
+ * Geode CPU runs.
+ */
+static unsigned int gx_get_cpuspeed(unsigned int cpu)
+{
+ if ((gx_params->pci_suscfg & SUSMOD) == 0)
+ return stock_freq;
+
+ return (stock_freq * gx_params->off_duration)
+ / (gx_params->on_duration + gx_params->off_duration);
+}
+
+/**
+ * gx_validate_speed:
+ * determine current cpu speed
+ *
+ **/
+
+static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration,
+ u8 *off_duration)
+{
+ unsigned int i;
+ u8 tmp_on, tmp_off;
+ int old_tmp_freq = stock_freq;
+ int tmp_freq;
+
+ *off_duration = 1;
+ *on_duration = 0;
+
+ for (i = max_duration; i > 0; i--) {
+ tmp_off = ((khz * i) / stock_freq) & 0xff;
+ tmp_on = i - tmp_off;
+ tmp_freq = (stock_freq * tmp_off) / i;
+ /* if this relation is closer to khz, use this. If it's equal,
+ * prefer it, too - lower latency */
+ if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) {
+ *on_duration = tmp_on;
+ *off_duration = tmp_off;
+ old_tmp_freq = tmp_freq;
+ }
+ }
+
+ return old_tmp_freq;
+}
+
+
+/**
+ * gx_set_cpuspeed:
+ * set cpu speed in khz.
+ **/
+
+static void gx_set_cpuspeed(unsigned int khz)
+{
+ u8 suscfg, pmer1;
+ unsigned int new_khz;
+ unsigned long flags;
+ struct cpufreq_freqs freqs;
+
+ freqs.cpu = 0;
+ freqs.old = gx_get_cpuspeed(0);
+
+ new_khz = gx_validate_speed(khz, &gx_params->on_duration,
+ &gx_params->off_duration);
+
+ freqs.new = new_khz;
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ local_irq_save(flags);
+
+
+
+ if (new_khz != stock_freq) {
+ /* if new khz == 100% of CPU speed, it is special case */
+ switch (gx_params->cs55x0->device) {
+ case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
+ pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP;
+ /* FIXME: need to test other values -- Zwane,Miura */
+ /* typical 2 to 4ms */
+ gx_write_byte(PCI_IRQTC, 4);
+ /* typical 50 to 100ms */
+ gx_write_byte(PCI_VIDTC, 100);
+ gx_write_byte(PCI_PMER1, pmer1);
+
+ if (gx_params->cs55x0->revision < 0x10) {
+ /* CS5530(rev 1.2, 1.3) */
+ suscfg = gx_params->pci_suscfg|SUSMOD;
+ } else {
+ /* CS5530A,B.. */
+ suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE;
+ }
+ break;
+ case PCI_DEVICE_ID_CYRIX_5520:
+ case PCI_DEVICE_ID_CYRIX_5510:
+ suscfg = gx_params->pci_suscfg | SUSMOD;
+ break;
+ default:
+ local_irq_restore(flags);
+ pr_debug("fatal: try to set unknown chipset.\n");
+ return;
+ }
+ } else {
+ suscfg = gx_params->pci_suscfg & ~(SUSMOD);
+ gx_params->off_duration = 0;
+ gx_params->on_duration = 0;
+ pr_debug("suspend modulation disabled: cpu runs 100%% speed.\n");
+ }
+
+ gx_write_byte(PCI_MODOFF, gx_params->off_duration);
+ gx_write_byte(PCI_MODON, gx_params->on_duration);
+
+ gx_write_byte(PCI_SUSCFG, suscfg);
+ pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg);
+
+ local_irq_restore(flags);
+
+ gx_params->pci_suscfg = suscfg;
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+ pr_debug("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
+ gx_params->on_duration * 32, gx_params->off_duration * 32);
+ pr_debug("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
+}
+
+/****************************************************************
+ * High level functions *
+ ****************************************************************/
+
+/*
+ * cpufreq_gx_verify: test if frequency range is valid
+ *
+ * This function checks if a given frequency range in kHz is valid
+ * for the hardware supported by the driver.
+ */
+
+static int cpufreq_gx_verify(struct cpufreq_policy *policy)
+{
+ unsigned int tmp_freq = 0;
+ u8 tmp1, tmp2;
+
+ if (!stock_freq || !policy)
+ return -EINVAL;
+
+ policy->cpu = 0;
+ cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
+ stock_freq);
+
+ /* it needs to be assured that at least one supported frequency is
+ * within policy->min and policy->max. If it is not, policy->max
+ * needs to be increased until one freuqency is supported.
+ * policy->min may not be decreased, though. This way we guarantee a
+ * specific processing capacity.
+ */
+ tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2);
+ if (tmp_freq < policy->min)
+ tmp_freq += stock_freq / max_duration;
+ policy->min = tmp_freq;
+ if (policy->min > policy->max)
+ policy->max = tmp_freq;
+ tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2);
+ if (tmp_freq > policy->max)
+ tmp_freq -= stock_freq / max_duration;
+ policy->max = tmp_freq;
+ if (policy->max < policy->min)
+ policy->max = policy->min;
+ cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
+ stock_freq);
+
+ return 0;
+}
+
+/*
+ * cpufreq_gx_target:
+ *
+ */
+static int cpufreq_gx_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ u8 tmp1, tmp2;
+ unsigned int tmp_freq;
+
+ if (!stock_freq || !policy)
+ return -EINVAL;
+
+ policy->cpu = 0;
+
+ tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2);
+ while (tmp_freq < policy->min) {
+ tmp_freq += stock_freq / max_duration;
+ tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
+ }
+ while (tmp_freq > policy->max) {
+ tmp_freq -= stock_freq / max_duration;
+ tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
+ }
+
+ gx_set_cpuspeed(tmp_freq);
+
+ return 0;
+}
+
+static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
+{
+ unsigned int maxfreq, curfreq;
+
+ if (!policy || policy->cpu != 0)
+ return -ENODEV;
+
+ /* determine maximum frequency */
+ if (pci_busclk)
+ maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
+ else if (cpu_khz)
+ maxfreq = cpu_khz;
+ else
+ maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
+
+ stock_freq = maxfreq;
+ curfreq = gx_get_cpuspeed(0);
+
+ pr_debug("cpu max frequency is %d.\n", maxfreq);
+ pr_debug("cpu current frequency is %dkHz.\n", curfreq);
+
+ /* setup basic struct for cpufreq API */
+ policy->cpu = 0;
+
+ if (max_duration < POLICY_MIN_DIV)
+ policy->min = maxfreq / max_duration;
+ else
+ policy->min = maxfreq / POLICY_MIN_DIV;
+ policy->max = maxfreq;
+ policy->cur = curfreq;
+ policy->cpuinfo.min_freq = maxfreq / max_duration;
+ policy->cpuinfo.max_freq = maxfreq;
+ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+
+ return 0;
+}
+
+/*
+ * cpufreq_gx_init:
+ * MediaGX/Geode GX initialize cpufreq driver
+ */
+static struct cpufreq_driver gx_suspmod_driver = {
+ .get = gx_get_cpuspeed,
+ .verify = cpufreq_gx_verify,
+ .target = cpufreq_gx_target,
+ .init = cpufreq_gx_cpu_init,
+ .name = "gx-suspmod",
+ .owner = THIS_MODULE,
+};
+
+static int __init cpufreq_gx_init(void)
+{
+ int ret;
+ struct gxfreq_params *params;
+ struct pci_dev *gx_pci;
+
+ /* Test if we have the right hardware */
+ gx_pci = gx_detect_chipset();
+ if (gx_pci == NULL)
+ return -ENODEV;
+
+ /* check whether module parameters are sane */
+ if (max_duration > 0xff)
+ max_duration = 0xff;
+
+ pr_debug("geode suspend modulation available.\n");
+
+ params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL);
+ if (params == NULL)
+ return -ENOMEM;
+
+ params->cs55x0 = gx_pci;
+ gx_params = params;
+
+ /* keep cs55x0 configurations */
+ pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg));
+ pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1));
+ pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
+ pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
+ pci_read_config_byte(params->cs55x0, PCI_MODOFF,
+ &(params->off_duration));
+
+ ret = cpufreq_register_driver(&gx_suspmod_driver);
+ if (ret) {
+ kfree(params);
+ return ret; /* register error! */
+ }
+
+ return 0;
+}
+
+static void __exit cpufreq_gx_exit(void)
+{
+ cpufreq_unregister_driver(&gx_suspmod_driver);
+ pci_dev_put(gx_params->cs55x0);
+ kfree(gx_params);
+}
+
+MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>");
+MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
+MODULE_LICENSE("GPL");
+
+module_init(cpufreq_gx_init);
+module_exit(cpufreq_gx_exit);
+
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
new file mode 100644
index 00000000..f47d26e2
--- /dev/null
+++ b/drivers/cpufreq/longhaul.c
@@ -0,0 +1,1024 @@
+/*
+ * (C) 2001-2004 Dave Jones. <davej@redhat.com>
+ * (C) 2002 Padraig Brady. <padraig@antefacto.com>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ * Based upon datasheets & sample CPUs kindly provided by VIA.
+ *
+ * VIA have currently 3 different versions of Longhaul.
+ * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147.
+ * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0.
+ * Version 2 of longhaul is backward compatible with v1, but adds
+ * LONGHAUL MSR for purpose of both frequency and voltage scaling.
+ * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C).
+ * Version 3 of longhaul got renamed to Powersaver and redesigned
+ * to use only the POWERSAVER MSR at 0x110a.
+ * It is present in Ezra-T (C5M), Nehemiah (C5X) and above.
+ * It's pretty much the same feature wise to longhaul v2, though
+ * there is provision for scaling FSB too, but this doesn't work
+ * too well in practice so we don't even try to use this.
+ *
+ * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+#include <linux/acpi.h>
+
+#include <asm/msr.h>
+#include <acpi/processor.h>
+
+#include "longhaul.h"
+
+#define PFX "longhaul: "
+
+#define TYPE_LONGHAUL_V1 1
+#define TYPE_LONGHAUL_V2 2
+#define TYPE_POWERSAVER 3
+
+#define CPU_SAMUEL 1
+#define CPU_SAMUEL2 2
+#define CPU_EZRA 3
+#define CPU_EZRA_T 4
+#define CPU_NEHEMIAH 5
+#define CPU_NEHEMIAH_C 6
+
+/* Flags */
+#define USE_ACPI_C3 (1 << 1)
+#define USE_NORTHBRIDGE (1 << 2)
+
+static int cpu_model;
+static unsigned int numscales = 16;
+static unsigned int fsb;
+
+static const struct mV_pos *vrm_mV_table;
+static const unsigned char *mV_vrm_table;
+
+static unsigned int highest_speed, lowest_speed; /* kHz */
+static unsigned int minmult, maxmult;
+static int can_scale_voltage;
+static struct acpi_processor *pr;
+static struct acpi_processor_cx *cx;
+static u32 acpi_regs_addr;
+static u8 longhaul_flags;
+static unsigned int longhaul_index;
+
+/* Module parameters */
+static int scale_voltage;
+static int disable_acpi_c3;
+static int revid_errata;
+
+
+/* Clock ratios multiplied by 10 */
+static int mults[32];
+static int eblcr[32];
+static int longhaul_version;
+static struct cpufreq_frequency_table *longhaul_table;
+
+static char speedbuffer[8];
+
+static char *print_speed(int speed)
+{
+ if (speed < 1000) {
+ snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed);
+ return speedbuffer;
+ }
+
+ if (speed%1000 == 0)
+ snprintf(speedbuffer, sizeof(speedbuffer),
+ "%dGHz", speed/1000);
+ else
+ snprintf(speedbuffer, sizeof(speedbuffer),
+ "%d.%dGHz", speed/1000, (speed%1000)/100);
+
+ return speedbuffer;
+}
+
+
+static unsigned int calc_speed(int mult)
+{
+ int khz;
+ khz = (mult/10)*fsb;
+ if (mult%10)
+ khz += fsb/2;
+ khz *= 1000;
+ return khz;
+}
+
+
+static int longhaul_get_cpu_mult(void)
+{
+ unsigned long invalue = 0, lo, hi;
+
+ rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi);
+ invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22;
+ if (longhaul_version == TYPE_LONGHAUL_V2 ||
+ longhaul_version == TYPE_POWERSAVER) {
+ if (lo & (1<<27))
+ invalue += 16;
+ }
+ return eblcr[invalue];
+}
+
+/* For processor with BCR2 MSR */
+
+static void do_longhaul1(unsigned int mults_index)
+{
+ union msr_bcr2 bcr2;
+
+ rdmsrl(MSR_VIA_BCR2, bcr2.val);
+ /* Enable software clock multiplier */
+ bcr2.bits.ESOFTBF = 1;
+ bcr2.bits.CLOCKMUL = mults_index & 0xff;
+
+ /* Sync to timer tick */
+ safe_halt();
+ /* Change frequency on next halt or sleep */
+ wrmsrl(MSR_VIA_BCR2, bcr2.val);
+ /* Invoke transition */
+ ACPI_FLUSH_CPU_CACHE();
+ halt();
+
+ /* Disable software clock multiplier */
+ local_irq_disable();
+ rdmsrl(MSR_VIA_BCR2, bcr2.val);
+ bcr2.bits.ESOFTBF = 0;
+ wrmsrl(MSR_VIA_BCR2, bcr2.val);
+}
+
+/* For processor with Longhaul MSR */
+
+static void do_powersaver(int cx_address, unsigned int mults_index,
+ unsigned int dir)
+{
+ union msr_longhaul longhaul;
+ u32 t;
+
+ rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ /* Setup new frequency */
+ if (!revid_errata)
+ longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
+ else
+ longhaul.bits.RevisionKey = 0;
+ longhaul.bits.SoftBusRatio = mults_index & 0xf;
+ longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4;
+ /* Setup new voltage */
+ if (can_scale_voltage)
+ longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f;
+ /* Sync to timer tick */
+ safe_halt();
+ /* Raise voltage if necessary */
+ if (can_scale_voltage && dir) {
+ longhaul.bits.EnableSoftVID = 1;
+ wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ /* Change voltage */
+ if (!cx_address) {
+ ACPI_FLUSH_CPU_CACHE();
+ halt();
+ } else {
+ ACPI_FLUSH_CPU_CACHE();
+ /* Invoke C3 */
+ inb(cx_address);
+ /* Dummy op - must do something useless after P_LVL3
+ * read */
+ t = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ }
+ longhaul.bits.EnableSoftVID = 0;
+ wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ }
+
+ /* Change frequency on next halt or sleep */
+ longhaul.bits.EnableSoftBusRatio = 1;
+ wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ if (!cx_address) {
+ ACPI_FLUSH_CPU_CACHE();
+ halt();
+ } else {
+ ACPI_FLUSH_CPU_CACHE();
+ /* Invoke C3 */
+ inb(cx_address);
+ /* Dummy op - must do something useless after P_LVL3 read */
+ t = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ }
+ /* Disable bus ratio bit */
+ longhaul.bits.EnableSoftBusRatio = 0;
+ wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+
+ /* Reduce voltage if necessary */
+ if (can_scale_voltage && !dir) {
+ longhaul.bits.EnableSoftVID = 1;
+ wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ /* Change voltage */
+ if (!cx_address) {
+ ACPI_FLUSH_CPU_CACHE();
+ halt();
+ } else {
+ ACPI_FLUSH_CPU_CACHE();
+ /* Invoke C3 */
+ inb(cx_address);
+ /* Dummy op - must do something useless after P_LVL3
+ * read */
+ t = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ }
+ longhaul.bits.EnableSoftVID = 0;
+ wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ }
+}
+
+/**
+ * longhaul_set_cpu_frequency()
+ * @mults_index : bitpattern of the new multiplier.
+ *
+ * Sets a new clock ratio.
+ */
+
+static void longhaul_setstate(unsigned int table_index)
+{
+ unsigned int mults_index;
+ int speed, mult;
+ struct cpufreq_freqs freqs;
+ unsigned long flags;
+ unsigned int pic1_mask, pic2_mask;
+ u16 bm_status = 0;
+ u32 bm_timeout = 1000;
+ unsigned int dir = 0;
+
+ mults_index = longhaul_table[table_index].index;
+ /* Safety precautions */
+ mult = mults[mults_index & 0x1f];
+ if (mult == -1)
+ return;
+ speed = calc_speed(mult);
+ if ((speed > highest_speed) || (speed < lowest_speed))
+ return;
+ /* Voltage transition before frequency transition? */
+ if (can_scale_voltage && longhaul_index < table_index)
+ dir = 1;
+
+ freqs.old = calc_speed(longhaul_get_cpu_mult());
+ freqs.new = speed;
+ freqs.cpu = 0; /* longhaul.c is UP only driver */
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+ pr_debug("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
+ fsb, mult/10, mult%10, print_speed(speed/1000));
+retry_loop:
+ preempt_disable();
+ local_irq_save(flags);
+
+ pic2_mask = inb(0xA1);
+ pic1_mask = inb(0x21); /* works on C3. save mask. */
+ outb(0xFF, 0xA1); /* Overkill */
+ outb(0xFE, 0x21); /* TMR0 only */
+
+ /* Wait while PCI bus is busy. */
+ if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE
+ || ((pr != NULL) && pr->flags.bm_control))) {
+ bm_status = inw(acpi_regs_addr);
+ bm_status &= 1 << 4;
+ while (bm_status && bm_timeout) {
+ outw(1 << 4, acpi_regs_addr);
+ bm_timeout--;
+ bm_status = inw(acpi_regs_addr);
+ bm_status &= 1 << 4;
+ }
+ }
+
+ if (longhaul_flags & USE_NORTHBRIDGE) {
+ /* Disable AGP and PCI arbiters */
+ outb(3, 0x22);
+ } else if ((pr != NULL) && pr->flags.bm_control) {
+ /* Disable bus master arbitration */
+ acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1);
+ }
+ switch (longhaul_version) {
+
+ /*
+ * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B])
+ * Software controlled multipliers only.
+ */
+ case TYPE_LONGHAUL_V1:
+ do_longhaul1(mults_index);
+ break;
+
+ /*
+ * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C]
+ *
+ * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N])
+ * Nehemiah can do FSB scaling too, but this has never been proven
+ * to work in practice.
+ */
+ case TYPE_LONGHAUL_V2:
+ case TYPE_POWERSAVER:
+ if (longhaul_flags & USE_ACPI_C3) {
+ /* Don't allow wakeup */
+ acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+ do_powersaver(cx->address, mults_index, dir);
+ } else {
+ do_powersaver(0, mults_index, dir);
+ }
+ break;
+ }
+
+ if (longhaul_flags & USE_NORTHBRIDGE) {
+ /* Enable arbiters */
+ outb(0, 0x22);
+ } else if ((pr != NULL) && pr->flags.bm_control) {
+ /* Enable bus master arbitration */
+ acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0);
+ }
+ outb(pic2_mask, 0xA1); /* restore mask */
+ outb(pic1_mask, 0x21);
+
+ local_irq_restore(flags);
+ preempt_enable();
+
+ freqs.new = calc_speed(longhaul_get_cpu_mult());
+ /* Check if requested frequency is set. */
+ if (unlikely(freqs.new != speed)) {
+ printk(KERN_INFO PFX "Failed to set requested frequency!\n");
+ /* Revision ID = 1 but processor is expecting revision key
+ * equal to 0. Jumpers at the bottom of processor will change
+ * multiplier and FSB, but will not change bits in Longhaul
+ * MSR nor enable voltage scaling. */
+ if (!revid_errata) {
+ printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" "
+ "option.\n");
+ revid_errata = 1;
+ msleep(200);
+ goto retry_loop;
+ }
+ /* Why ACPI C3 sometimes doesn't work is a mystery for me.
+ * But it does happen. Processor is entering ACPI C3 state,
+ * but it doesn't change frequency. I tried poking various
+ * bits in northbridge registers, but without success. */
+ if (longhaul_flags & USE_ACPI_C3) {
+ printk(KERN_INFO PFX "Disabling ACPI C3 support.\n");
+ longhaul_flags &= ~USE_ACPI_C3;
+ if (revid_errata) {
+ printk(KERN_INFO PFX "Disabling \"Ignore "
+ "Revision ID\" option.\n");
+ revid_errata = 0;
+ }
+ msleep(200);
+ goto retry_loop;
+ }
+ /* This shouldn't happen. Longhaul ver. 2 was reported not
+ * working on processors without voltage scaling, but with
+ * RevID = 1. RevID errata will make things right. Just
+ * to be 100% sure. */
+ if (longhaul_version == TYPE_LONGHAUL_V2) {
+ printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n");
+ longhaul_version = TYPE_LONGHAUL_V1;
+ msleep(200);
+ goto retry_loop;
+ }
+ }
+ /* Report true CPU frequency */
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+ if (!bm_timeout)
+ printk(KERN_INFO PFX "Warning: Timeout while waiting for "
+ "idle PCI bus.\n");
+}
+
+/*
+ * Centaur decided to make life a little more tricky.
+ * Only longhaul v1 is allowed to read EBLCR BSEL[0:1].
+ * Samuel2 and above have to try and guess what the FSB is.
+ * We do this by assuming we booted at maximum multiplier, and interpolate
+ * between that value multiplied by possible FSBs and cpu_mhz which
+ * was calculated at boot time. Really ugly, but no other way to do this.
+ */
+
+#define ROUNDING 0xf
+
+static int guess_fsb(int mult)
+{
+ int speed = cpu_khz / 1000;
+ int i;
+ int speeds[] = { 666, 1000, 1333, 2000 };
+ int f_max, f_min;
+
+ for (i = 0; i < 4; i++) {
+ f_max = ((speeds[i] * mult) + 50) / 100;
+ f_max += (ROUNDING / 2);
+ f_min = f_max - ROUNDING;
+ if ((speed <= f_max) && (speed >= f_min))
+ return speeds[i] / 10;
+ }
+ return 0;
+}
+
+
+static int __cpuinit longhaul_get_ranges(void)
+{
+ unsigned int i, j, k = 0;
+ unsigned int ratio;
+ int mult;
+
+ /* Get current frequency */
+ mult = longhaul_get_cpu_mult();
+ if (mult == -1) {
+ printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n");
+ return -EINVAL;
+ }
+ fsb = guess_fsb(mult);
+ if (fsb == 0) {
+ printk(KERN_INFO PFX "Invalid (reserved) FSB!\n");
+ return -EINVAL;
+ }
+ /* Get max multiplier - as we always did.
+ * Longhaul MSR is useful only when voltage scaling is enabled.
+ * C3 is booting at max anyway. */
+ maxmult = mult;
+ /* Get min multiplier */
+ switch (cpu_model) {
+ case CPU_NEHEMIAH:
+ minmult = 50;
+ break;
+ case CPU_NEHEMIAH_C:
+ minmult = 40;
+ break;
+ default:
+ minmult = 30;
+ break;
+ }
+
+ pr_debug("MinMult:%d.%dx MaxMult:%d.%dx\n",
+ minmult/10, minmult%10, maxmult/10, maxmult%10);
+
+ highest_speed = calc_speed(maxmult);
+ lowest_speed = calc_speed(minmult);
+ pr_debug("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb,
+ print_speed(lowest_speed/1000),
+ print_speed(highest_speed/1000));
+
+ if (lowest_speed == highest_speed) {
+ printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n");
+ return -EINVAL;
+ }
+ if (lowest_speed > highest_speed) {
+ printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
+ lowest_speed, highest_speed);
+ return -EINVAL;
+ }
+
+ longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table),
+ GFP_KERNEL);
+ if (!longhaul_table)
+ return -ENOMEM;
+
+ for (j = 0; j < numscales; j++) {
+ ratio = mults[j];
+ if (ratio == -1)
+ continue;
+ if (ratio > maxmult || ratio < minmult)
+ continue;
+ longhaul_table[k].frequency = calc_speed(ratio);
+ longhaul_table[k].index = j;
+ k++;
+ }
+ if (k <= 1) {
+ kfree(longhaul_table);
+ return -ENODEV;
+ }
+ /* Sort */
+ for (j = 0; j < k - 1; j++) {
+ unsigned int min_f, min_i;
+ min_f = longhaul_table[j].frequency;
+ min_i = j;
+ for (i = j + 1; i < k; i++) {
+ if (longhaul_table[i].frequency < min_f) {
+ min_f = longhaul_table[i].frequency;
+ min_i = i;
+ }
+ }
+ if (min_i != j) {
+ swap(longhaul_table[j].frequency,
+ longhaul_table[min_i].frequency);
+ swap(longhaul_table[j].index,
+ longhaul_table[min_i].index);
+ }
+ }
+
+ longhaul_table[k].frequency = CPUFREQ_TABLE_END;
+
+ /* Find index we are running on */
+ for (j = 0; j < k; j++) {
+ if (mults[longhaul_table[j].index & 0x1f] == mult) {
+ longhaul_index = j;
+ break;
+ }
+ }
+ return 0;
+}
+
+
+static void __cpuinit longhaul_setup_voltagescaling(void)
+{
+ union msr_longhaul longhaul;
+ struct mV_pos minvid, maxvid, vid;
+ unsigned int j, speed, pos, kHz_step, numvscales;
+ int min_vid_speed;
+
+ rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+ if (!(longhaul.bits.RevisionID & 1)) {
+ printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
+ return;
+ }
+
+ if (!longhaul.bits.VRMRev) {
+ printk(KERN_INFO PFX "VRM 8.5\n");
+ vrm_mV_table = &vrm85_mV[0];
+ mV_vrm_table = &mV_vrm85[0];
+ } else {
+ printk(KERN_INFO PFX "Mobile VRM\n");
+ if (cpu_model < CPU_NEHEMIAH)
+ return;
+ vrm_mV_table = &mobilevrm_mV[0];
+ mV_vrm_table = &mV_mobilevrm[0];
+ }
+
+ minvid = vrm_mV_table[longhaul.bits.MinimumVID];
+ maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
+
+ if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
+ printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
+ "Voltage scaling disabled.\n",
+ minvid.mV/1000, minvid.mV%1000,
+ maxvid.mV/1000, maxvid.mV%1000);
+ return;
+ }
+
+ if (minvid.mV == maxvid.mV) {
+ printk(KERN_INFO PFX "Claims to support voltage scaling but "
+ "min & max are both %d.%03d. "
+ "Voltage scaling disabled\n",
+ maxvid.mV/1000, maxvid.mV%1000);
+ return;
+ }
+
+ /* How many voltage steps*/
+ numvscales = maxvid.pos - minvid.pos + 1;
+ printk(KERN_INFO PFX
+ "Max VID=%d.%03d "
+ "Min VID=%d.%03d, "
+ "%d possible voltage scales\n",
+ maxvid.mV/1000, maxvid.mV%1000,
+ minvid.mV/1000, minvid.mV%1000,
+ numvscales);
+
+ /* Calculate max frequency at min voltage */
+ j = longhaul.bits.MinMHzBR;
+ if (longhaul.bits.MinMHzBR4)
+ j += 16;
+ min_vid_speed = eblcr[j];
+ if (min_vid_speed == -1)
+ return;
+ switch (longhaul.bits.MinMHzFSB) {
+ case 0:
+ min_vid_speed *= 13333;
+ break;
+ case 1:
+ min_vid_speed *= 10000;
+ break;
+ case 3:
+ min_vid_speed *= 6666;
+ break;
+ default:
+ return;
+ break;
+ }
+ if (min_vid_speed >= highest_speed)
+ return;
+ /* Calculate kHz for one voltage step */
+ kHz_step = (highest_speed - min_vid_speed) / numvscales;
+
+ j = 0;
+ while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
+ speed = longhaul_table[j].frequency;
+ if (speed > min_vid_speed)
+ pos = (speed - min_vid_speed) / kHz_step + minvid.pos;
+ else
+ pos = minvid.pos;
+ longhaul_table[j].index |= mV_vrm_table[pos] << 8;
+ vid = vrm_mV_table[mV_vrm_table[pos]];
+ printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n",
+ speed, j, vid.mV);
+ j++;
+ }
+
+ can_scale_voltage = 1;
+ printk(KERN_INFO PFX "Voltage scaling enabled.\n");
+}
+
+
+static int longhaul_verify(struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy, longhaul_table);
+}
+
+
+static int longhaul_target(struct cpufreq_policy *policy,
+ unsigned int target_freq, unsigned int relation)
+{
+ unsigned int table_index = 0;
+ unsigned int i;
+ unsigned int dir = 0;
+ u8 vid, current_vid;
+
+ if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq,
+ relation, &table_index))
+ return -EINVAL;
+
+ /* Don't set same frequency again */
+ if (longhaul_index == table_index)
+ return 0;
+
+ if (!can_scale_voltage)
+ longhaul_setstate(table_index);
+ else {
+ /* On test system voltage transitions exceeding single
+ * step up or down were turning motherboard off. Both
+ * "ondemand" and "userspace" are unsafe. C7 is doing
+ * this in hardware, C3 is old and we need to do this
+ * in software. */
+ i = longhaul_index;
+ current_vid = (longhaul_table[longhaul_index].index >> 8);
+ current_vid &= 0x1f;
+ if (table_index > longhaul_index)
+ dir = 1;
+ while (i != table_index) {
+ vid = (longhaul_table[i].index >> 8) & 0x1f;
+ if (vid != current_vid) {
+ longhaul_setstate(i);
+ current_vid = vid;
+ msleep(200);
+ }
+ if (dir)
+ i++;
+ else
+ i--;
+ }
+ longhaul_setstate(table_index);
+ }
+ longhaul_index = table_index;
+ return 0;
+}
+
+
+static unsigned int longhaul_get(unsigned int cpu)
+{
+ if (cpu)
+ return 0;
+ return calc_speed(longhaul_get_cpu_mult());
+}
+
+static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
+ u32 nesting_level,
+ void *context, void **return_value)
+{
+ struct acpi_device *d;
+
+ if (acpi_bus_get_device(obj_handle, &d))
+ return 0;
+
+ *return_value = acpi_driver_data(d);
+ return 1;
+}
+
+/* VIA don't support PM2 reg, but have something similar */
+static int enable_arbiter_disable(void)
+{
+ struct pci_dev *dev;
+ int status = 1;
+ int reg;
+ u8 pci_cmd;
+
+ /* Find PLE133 host bridge */
+ reg = 0x78;
+ dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0,
+ NULL);
+ /* Find PM133/VT8605 host bridge */
+ if (dev == NULL)
+ dev = pci_get_device(PCI_VENDOR_ID_VIA,
+ PCI_DEVICE_ID_VIA_8605_0, NULL);
+ /* Find CLE266 host bridge */
+ if (dev == NULL) {
+ reg = 0x76;
+ dev = pci_get_device(PCI_VENDOR_ID_VIA,
+ PCI_DEVICE_ID_VIA_862X_0, NULL);
+ /* Find CN400 V-Link host bridge */
+ if (dev == NULL)
+ dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL);
+ }
+ if (dev != NULL) {
+ /* Enable access to port 0x22 */
+ pci_read_config_byte(dev, reg, &pci_cmd);
+ if (!(pci_cmd & 1<<7)) {
+ pci_cmd |= 1<<7;
+ pci_write_config_byte(dev, reg, pci_cmd);
+ pci_read_config_byte(dev, reg, &pci_cmd);
+ if (!(pci_cmd & 1<<7)) {
+ printk(KERN_ERR PFX
+ "Can't enable access to port 0x22.\n");
+ status = 0;
+ }
+ }
+ pci_dev_put(dev);
+ return status;
+ }
+ return 0;
+}
+
+static int longhaul_setup_southbridge(void)
+{
+ struct pci_dev *dev;
+ u8 pci_cmd;
+
+ /* Find VT8235 southbridge */
+ dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
+ if (dev == NULL)
+ /* Find VT8237 southbridge */
+ dev = pci_get_device(PCI_VENDOR_ID_VIA,
+ PCI_DEVICE_ID_VIA_8237, NULL);
+ if (dev != NULL) {
+ /* Set transition time to max */
+ pci_read_config_byte(dev, 0xec, &pci_cmd);
+ pci_cmd &= ~(1 << 2);
+ pci_write_config_byte(dev, 0xec, pci_cmd);
+ pci_read_config_byte(dev, 0xe4, &pci_cmd);
+ pci_cmd &= ~(1 << 7);
+ pci_write_config_byte(dev, 0xe4, pci_cmd);
+ pci_read_config_byte(dev, 0xe5, &pci_cmd);
+ pci_cmd |= 1 << 7;
+ pci_write_config_byte(dev, 0xe5, pci_cmd);
+ /* Get address of ACPI registers block*/
+ pci_read_config_byte(dev, 0x81, &pci_cmd);
+ if (pci_cmd & 1 << 7) {
+ pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
+ acpi_regs_addr &= 0xff00;
+ printk(KERN_INFO PFX "ACPI I/O at 0x%x\n",
+ acpi_regs_addr);
+ }
+
+ pci_dev_put(dev);
+ return 1;
+ }
+ return 0;
+}
+
+static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+ char *cpuname = NULL;
+ int ret;
+ u32 lo, hi;
+
+ /* Check what we have on this motherboard */
+ switch (c->x86_model) {
+ case 6:
+ cpu_model = CPU_SAMUEL;
+ cpuname = "C3 'Samuel' [C5A]";
+ longhaul_version = TYPE_LONGHAUL_V1;
+ memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
+ memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr));
+ break;
+
+ case 7:
+ switch (c->x86_mask) {
+ case 0:
+ longhaul_version = TYPE_LONGHAUL_V1;
+ cpu_model = CPU_SAMUEL2;
+ cpuname = "C3 'Samuel 2' [C5B]";
+ /* Note, this is not a typo, early Samuel2's had
+ * Samuel1 ratios. */
+ memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
+ memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
+ break;
+ case 1 ... 15:
+ longhaul_version = TYPE_LONGHAUL_V2;
+ if (c->x86_mask < 8) {
+ cpu_model = CPU_SAMUEL2;
+ cpuname = "C3 'Samuel 2' [C5B]";
+ } else {
+ cpu_model = CPU_EZRA;
+ cpuname = "C3 'Ezra' [C5C]";
+ }
+ memcpy(mults, ezra_mults, sizeof(ezra_mults));
+ memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr));
+ break;
+ }
+ break;
+
+ case 8:
+ cpu_model = CPU_EZRA_T;
+ cpuname = "C3 'Ezra-T' [C5M]";
+ longhaul_version = TYPE_POWERSAVER;
+ numscales = 32;
+ memcpy(mults, ezrat_mults, sizeof(ezrat_mults));
+ memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr));
+ break;
+
+ case 9:
+ longhaul_version = TYPE_POWERSAVER;
+ numscales = 32;
+ memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults));
+ memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr));
+ switch (c->x86_mask) {
+ case 0 ... 1:
+ cpu_model = CPU_NEHEMIAH;
+ cpuname = "C3 'Nehemiah A' [C5XLOE]";
+ break;
+ case 2 ... 4:
+ cpu_model = CPU_NEHEMIAH;
+ cpuname = "C3 'Nehemiah B' [C5XLOH]";
+ break;
+ case 5 ... 15:
+ cpu_model = CPU_NEHEMIAH_C;
+ cpuname = "C3 'Nehemiah C' [C5P]";
+ break;
+ }
+ break;
+
+ default:
+ cpuname = "Unknown";
+ break;
+ }
+ /* Check Longhaul ver. 2 */
+ if (longhaul_version == TYPE_LONGHAUL_V2) {
+ rdmsr(MSR_VIA_LONGHAUL, lo, hi);
+ if (lo == 0 && hi == 0)
+ /* Looks like MSR isn't present */
+ longhaul_version = TYPE_LONGHAUL_V1;
+ }
+
+ printk(KERN_INFO PFX "VIA %s CPU detected. ", cpuname);
+ switch (longhaul_version) {
+ case TYPE_LONGHAUL_V1:
+ case TYPE_LONGHAUL_V2:
+ printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version);
+ break;
+ case TYPE_POWERSAVER:
+ printk(KERN_CONT "Powersaver supported.\n");
+ break;
+ };
+
+ /* Doesn't hurt */
+ longhaul_setup_southbridge();
+
+ /* Find ACPI data for processor */
+ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX, &longhaul_walk_callback, NULL,
+ NULL, (void *)&pr);
+
+ /* Check ACPI support for C3 state */
+ if (pr != NULL && longhaul_version == TYPE_POWERSAVER) {
+ cx = &pr->power.states[ACPI_STATE_C3];
+ if (cx->address > 0 && cx->latency <= 1000)
+ longhaul_flags |= USE_ACPI_C3;
+ }
+ /* Disable if it isn't working */
+ if (disable_acpi_c3)
+ longhaul_flags &= ~USE_ACPI_C3;
+ /* Check if northbridge is friendly */
+ if (enable_arbiter_disable())
+ longhaul_flags |= USE_NORTHBRIDGE;
+
+ /* Check ACPI support for bus master arbiter disable */
+ if (!(longhaul_flags & USE_ACPI_C3
+ || longhaul_flags & USE_NORTHBRIDGE)
+ && ((pr == NULL) || !(pr->flags.bm_control))) {
+ printk(KERN_ERR PFX
+ "No ACPI support. Unsupported northbridge.\n");
+ return -ENODEV;
+ }
+
+ if (longhaul_flags & USE_NORTHBRIDGE)
+ printk(KERN_INFO PFX "Using northbridge support.\n");
+ if (longhaul_flags & USE_ACPI_C3)
+ printk(KERN_INFO PFX "Using ACPI support.\n");
+
+ ret = longhaul_get_ranges();
+ if (ret != 0)
+ return ret;
+
+ if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0))
+ longhaul_setup_voltagescaling();
+
+ policy->cpuinfo.transition_latency = 200000; /* nsec */
+ policy->cur = calc_speed(longhaul_get_cpu_mult());
+
+ ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table);
+ if (ret)
+ return ret;
+
+ cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu);
+
+ return 0;
+}
+
+static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy)
+{
+ cpufreq_frequency_table_put_attr(policy->cpu);
+ return 0;
+}
+
+static struct freq_attr *longhaul_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+static struct cpufreq_driver longhaul_driver = {
+ .verify = longhaul_verify,
+ .target = longhaul_target,
+ .get = longhaul_get,
+ .init = longhaul_cpu_init,
+ .exit = __devexit_p(longhaul_cpu_exit),
+ .name = "longhaul",
+ .owner = THIS_MODULE,
+ .attr = longhaul_attr,
+};
+
+
+static int __init longhaul_init(void)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6)
+ return -ENODEV;
+
+#ifdef CONFIG_SMP
+ if (num_online_cpus() > 1) {
+ printk(KERN_ERR PFX "More than 1 CPU detected, "
+ "longhaul disabled.\n");
+ return -ENODEV;
+ }
+#endif
+#ifdef CONFIG_X86_IO_APIC
+ if (cpu_has_apic) {
+ printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
+ "broken in this configuration.\n");
+ return -ENODEV;
+ }
+#endif
+ switch (c->x86_model) {
+ case 6 ... 9:
+ return cpufreq_register_driver(&longhaul_driver);
+ case 10:
+ printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n");
+ default:
+ ;
+ }
+
+ return -ENODEV;
+}
+
+
+static void __exit longhaul_exit(void)
+{
+ int i;
+
+ for (i = 0; i < numscales; i++) {
+ if (mults[i] == maxmult) {
+ longhaul_setstate(i);
+ break;
+ }
+ }
+
+ cpufreq_unregister_driver(&longhaul_driver);
+ kfree(longhaul_table);
+}
+
+/* Even if BIOS is exporting ACPI C3 state, and it is used
+ * with success when CPU is idle, this state doesn't
+ * trigger frequency transition in some cases. */
+module_param(disable_acpi_c3, int, 0644);
+MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support");
+/* Change CPU voltage with frequency. Very useful to save
+ * power, but most VIA C3 processors aren't supporting it. */
+module_param(scale_voltage, int, 0644);
+MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
+/* Force revision key to 0 for processors which doesn't
+ * support voltage scaling, but are introducing itself as
+ * such. */
+module_param(revid_errata, int, 0644);
+MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
+
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors.");
+MODULE_LICENSE("GPL");
+
+late_initcall(longhaul_init);
+module_exit(longhaul_exit);
diff --git a/drivers/cpufreq/longhaul.h b/drivers/cpufreq/longhaul.h
new file mode 100644
index 00000000..cbf48fbc
--- /dev/null
+++ b/drivers/cpufreq/longhaul.h
@@ -0,0 +1,353 @@
+/*
+ * longhaul.h
+ * (C) 2003 Dave Jones.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * VIA-specific information
+ */
+
+union msr_bcr2 {
+ struct {
+ unsigned Reseved:19, // 18:0
+ ESOFTBF:1, // 19
+ Reserved2:3, // 22:20
+ CLOCKMUL:4, // 26:23
+ Reserved3:5; // 31:27
+ } bits;
+ unsigned long val;
+};
+
+union msr_longhaul {
+ struct {
+ unsigned RevisionID:4, // 3:0
+ RevisionKey:4, // 7:4
+ EnableSoftBusRatio:1, // 8
+ EnableSoftVID:1, // 9
+ EnableSoftBSEL:1, // 10
+ Reserved:3, // 11:13
+ SoftBusRatio4:1, // 14
+ VRMRev:1, // 15
+ SoftBusRatio:4, // 19:16
+ SoftVID:5, // 24:20
+ Reserved2:3, // 27:25
+ SoftBSEL:2, // 29:28
+ Reserved3:2, // 31:30
+ MaxMHzBR:4, // 35:32
+ MaximumVID:5, // 40:36
+ MaxMHzFSB:2, // 42:41
+ MaxMHzBR4:1, // 43
+ Reserved4:4, // 47:44
+ MinMHzBR:4, // 51:48
+ MinimumVID:5, // 56:52
+ MinMHzFSB:2, // 58:57
+ MinMHzBR4:1, // 59
+ Reserved5:4; // 63:60
+ } bits;
+ unsigned long long val;
+};
+
+/*
+ * Clock ratio tables. Div/Mod by 10 to get ratio.
+ * The eblcr values specify the ratio read from the CPU.
+ * The mults values specify what to write to the CPU.
+ */
+
+/*
+ * VIA C3 Samuel 1 & Samuel 2 (stepping 0)
+ */
+static const int __cpuinitdata samuel1_mults[16] = {
+ -1, /* 0000 -> RESERVED */
+ 30, /* 0001 -> 3.0x */
+ 40, /* 0010 -> 4.0x */
+ -1, /* 0011 -> RESERVED */
+ -1, /* 0100 -> RESERVED */
+ 35, /* 0101 -> 3.5x */
+ 45, /* 0110 -> 4.5x */
+ 55, /* 0111 -> 5.5x */
+ 60, /* 1000 -> 6.0x */
+ 70, /* 1001 -> 7.0x */
+ 80, /* 1010 -> 8.0x */
+ 50, /* 1011 -> 5.0x */
+ 65, /* 1100 -> 6.5x */
+ 75, /* 1101 -> 7.5x */
+ -1, /* 1110 -> RESERVED */
+ -1, /* 1111 -> RESERVED */
+};
+
+static const int __cpuinitdata samuel1_eblcr[16] = {
+ 50, /* 0000 -> RESERVED */
+ 30, /* 0001 -> 3.0x */
+ 40, /* 0010 -> 4.0x */
+ -1, /* 0011 -> RESERVED */
+ 55, /* 0100 -> 5.5x */
+ 35, /* 0101 -> 3.5x */
+ 45, /* 0110 -> 4.5x */
+ -1, /* 0111 -> RESERVED */
+ -1, /* 1000 -> RESERVED */
+ 70, /* 1001 -> 7.0x */
+ 80, /* 1010 -> 8.0x */
+ 60, /* 1011 -> 6.0x */
+ -1, /* 1100 -> RESERVED */
+ 75, /* 1101 -> 7.5x */
+ -1, /* 1110 -> RESERVED */
+ 65, /* 1111 -> 6.5x */
+};
+
+/*
+ * VIA C3 Samuel2 Stepping 1->15
+ */
+static const int __cpuinitdata samuel2_eblcr[16] = {
+ 50, /* 0000 -> 5.0x */
+ 30, /* 0001 -> 3.0x */
+ 40, /* 0010 -> 4.0x */
+ 100, /* 0011 -> 10.0x */
+ 55, /* 0100 -> 5.5x */
+ 35, /* 0101 -> 3.5x */
+ 45, /* 0110 -> 4.5x */
+ 110, /* 0111 -> 11.0x */
+ 90, /* 1000 -> 9.0x */
+ 70, /* 1001 -> 7.0x */
+ 80, /* 1010 -> 8.0x */
+ 60, /* 1011 -> 6.0x */
+ 120, /* 1100 -> 12.0x */
+ 75, /* 1101 -> 7.5x */
+ 130, /* 1110 -> 13.0x */
+ 65, /* 1111 -> 6.5x */
+};
+
+/*
+ * VIA C3 Ezra
+ */
+static const int __cpuinitdata ezra_mults[16] = {
+ 100, /* 0000 -> 10.0x */
+ 30, /* 0001 -> 3.0x */
+ 40, /* 0010 -> 4.0x */
+ 90, /* 0011 -> 9.0x */
+ 95, /* 0100 -> 9.5x */
+ 35, /* 0101 -> 3.5x */
+ 45, /* 0110 -> 4.5x */
+ 55, /* 0111 -> 5.5x */
+ 60, /* 1000 -> 6.0x */
+ 70, /* 1001 -> 7.0x */
+ 80, /* 1010 -> 8.0x */
+ 50, /* 1011 -> 5.0x */
+ 65, /* 1100 -> 6.5x */
+ 75, /* 1101 -> 7.5x */
+ 85, /* 1110 -> 8.5x */
+ 120, /* 1111 -> 12.0x */
+};
+
+static const int __cpuinitdata ezra_eblcr[16] = {
+ 50, /* 0000 -> 5.0x */
+ 30, /* 0001 -> 3.0x */
+ 40, /* 0010 -> 4.0x */
+ 100, /* 0011 -> 10.0x */
+ 55, /* 0100 -> 5.5x */
+ 35, /* 0101 -> 3.5x */
+ 45, /* 0110 -> 4.5x */
+ 95, /* 0111 -> 9.5x */
+ 90, /* 1000 -> 9.0x */
+ 70, /* 1001 -> 7.0x */
+ 80, /* 1010 -> 8.0x */
+ 60, /* 1011 -> 6.0x */
+ 120, /* 1100 -> 12.0x */
+ 75, /* 1101 -> 7.5x */
+ 85, /* 1110 -> 8.5x */
+ 65, /* 1111 -> 6.5x */
+};
+
+/*
+ * VIA C3 (Ezra-T) [C5M].
+ */
+static const int __cpuinitdata ezrat_mults[32] = {
+ 100, /* 0000 -> 10.0x */
+ 30, /* 0001 -> 3.0x */
+ 40, /* 0010 -> 4.0x */
+ 90, /* 0011 -> 9.0x */
+ 95, /* 0100 -> 9.5x */
+ 35, /* 0101 -> 3.5x */
+ 45, /* 0110 -> 4.5x */
+ 55, /* 0111 -> 5.5x */
+ 60, /* 1000 -> 6.0x */
+ 70, /* 1001 -> 7.0x */
+ 80, /* 1010 -> 8.0x */
+ 50, /* 1011 -> 5.0x */
+ 65, /* 1100 -> 6.5x */
+ 75, /* 1101 -> 7.5x */
+ 85, /* 1110 -> 8.5x */
+ 120, /* 1111 -> 12.0x */
+
+ -1, /* 0000 -> RESERVED (10.0x) */
+ 110, /* 0001 -> 11.0x */
+ -1, /* 0010 -> 12.0x */
+ -1, /* 0011 -> RESERVED (9.0x)*/
+ 105, /* 0100 -> 10.5x */
+ 115, /* 0101 -> 11.5x */
+ 125, /* 0110 -> 12.5x */
+ 135, /* 0111 -> 13.5x */
+ 140, /* 1000 -> 14.0x */
+ 150, /* 1001 -> 15.0x */
+ 160, /* 1010 -> 16.0x */
+ 130, /* 1011 -> 13.0x */
+ 145, /* 1100 -> 14.5x */
+ 155, /* 1101 -> 15.5x */
+ -1, /* 1110 -> RESERVED (13.0x) */
+ -1, /* 1111 -> RESERVED (12.0x) */
+};
+
+static const int __cpuinitdata ezrat_eblcr[32] = {
+ 50, /* 0000 -> 5.0x */
+ 30, /* 0001 -> 3.0x */
+ 40, /* 0010 -> 4.0x */
+ 100, /* 0011 -> 10.0x */
+ 55, /* 0100 -> 5.5x */
+ 35, /* 0101 -> 3.5x */
+ 45, /* 0110 -> 4.5x */
+ 95, /* 0111 -> 9.5x */
+ 90, /* 1000 -> 9.0x */
+ 70, /* 1001 -> 7.0x */
+ 80, /* 1010 -> 8.0x */
+ 60, /* 1011 -> 6.0x */
+ 120, /* 1100 -> 12.0x */
+ 75, /* 1101 -> 7.5x */
+ 85, /* 1110 -> 8.5x */
+ 65, /* 1111 -> 6.5x */
+
+ -1, /* 0000 -> RESERVED (9.0x) */
+ 110, /* 0001 -> 11.0x */
+ 120, /* 0010 -> 12.0x */
+ -1, /* 0011 -> RESERVED (10.0x)*/
+ 135, /* 0100 -> 13.5x */
+ 115, /* 0101 -> 11.5x */
+ 125, /* 0110 -> 12.5x */
+ 105, /* 0111 -> 10.5x */
+ 130, /* 1000 -> 13.0x */
+ 150, /* 1001 -> 15.0x */
+ 160, /* 1010 -> 16.0x */
+ 140, /* 1011 -> 14.0x */
+ -1, /* 1100 -> RESERVED (12.0x) */
+ 155, /* 1101 -> 15.5x */
+ -1, /* 1110 -> RESERVED (13.0x) */
+ 145, /* 1111 -> 14.5x */
+};
+
+/*
+ * VIA C3 Nehemiah */
+
+static const int __cpuinitdata nehemiah_mults[32] = {
+ 100, /* 0000 -> 10.0x */
+ -1, /* 0001 -> 16.0x */
+ 40, /* 0010 -> 4.0x */
+ 90, /* 0011 -> 9.0x */
+ 95, /* 0100 -> 9.5x */
+ -1, /* 0101 -> RESERVED */
+ 45, /* 0110 -> 4.5x */
+ 55, /* 0111 -> 5.5x */
+ 60, /* 1000 -> 6.0x */
+ 70, /* 1001 -> 7.0x */
+ 80, /* 1010 -> 8.0x */
+ 50, /* 1011 -> 5.0x */
+ 65, /* 1100 -> 6.5x */
+ 75, /* 1101 -> 7.5x */
+ 85, /* 1110 -> 8.5x */
+ 120, /* 1111 -> 12.0x */
+ -1, /* 0000 -> 10.0x */
+ 110, /* 0001 -> 11.0x */
+ -1, /* 0010 -> 12.0x */
+ -1, /* 0011 -> 9.0x */
+ 105, /* 0100 -> 10.5x */
+ 115, /* 0101 -> 11.5x */
+ 125, /* 0110 -> 12.5x */
+ 135, /* 0111 -> 13.5x */
+ 140, /* 1000 -> 14.0x */
+ 150, /* 1001 -> 15.0x */
+ 160, /* 1010 -> 16.0x */
+ 130, /* 1011 -> 13.0x */
+ 145, /* 1100 -> 14.5x */
+ 155, /* 1101 -> 15.5x */
+ -1, /* 1110 -> RESERVED (13.0x) */
+ -1, /* 1111 -> 12.0x */
+};
+
+static const int __cpuinitdata nehemiah_eblcr[32] = {
+ 50, /* 0000 -> 5.0x */
+ 160, /* 0001 -> 16.0x */
+ 40, /* 0010 -> 4.0x */
+ 100, /* 0011 -> 10.0x */
+ 55, /* 0100 -> 5.5x */
+ -1, /* 0101 -> RESERVED */
+ 45, /* 0110 -> 4.5x */
+ 95, /* 0111 -> 9.5x */
+ 90, /* 1000 -> 9.0x */
+ 70, /* 1001 -> 7.0x */
+ 80, /* 1010 -> 8.0x */
+ 60, /* 1011 -> 6.0x */
+ 120, /* 1100 -> 12.0x */
+ 75, /* 1101 -> 7.5x */
+ 85, /* 1110 -> 8.5x */
+ 65, /* 1111 -> 6.5x */
+ 90, /* 0000 -> 9.0x */
+ 110, /* 0001 -> 11.0x */
+ 120, /* 0010 -> 12.0x */
+ 100, /* 0011 -> 10.0x */
+ 135, /* 0100 -> 13.5x */
+ 115, /* 0101 -> 11.5x */
+ 125, /* 0110 -> 12.5x */
+ 105, /* 0111 -> 10.5x */
+ 130, /* 1000 -> 13.0x */
+ 150, /* 1001 -> 15.0x */
+ 160, /* 1010 -> 16.0x */
+ 140, /* 1011 -> 14.0x */
+ 120, /* 1100 -> 12.0x */
+ 155, /* 1101 -> 15.5x */
+ -1, /* 1110 -> RESERVED (13.0x) */
+ 145 /* 1111 -> 14.5x */
+};
+
+/*
+ * Voltage scales. Div/Mod by 1000 to get actual voltage.
+ * Which scale to use depends on the VRM type in use.
+ */
+
+struct mV_pos {
+ unsigned short mV;
+ unsigned short pos;
+};
+
+static const struct mV_pos __cpuinitdata vrm85_mV[32] = {
+ {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2},
+ {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26},
+ {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18},
+ {1450, 16}, {1400, 14}, {1350, 12}, {1300, 10},
+ {1275, 9}, {1225, 7}, {1175, 5}, {1125, 3},
+ {1075, 1}, {1825, 31}, {1775, 29}, {1725, 27},
+ {1675, 25}, {1625, 23}, {1575, 21}, {1525, 19},
+ {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11}
+};
+
+static const unsigned char __cpuinitdata mV_vrm85[32] = {
+ 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11,
+ 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d,
+ 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19,
+ 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15
+};
+
+static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = {
+ {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28},
+ {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24},
+ {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20},
+ {1150, 19}, {1100, 18}, {1050, 17}, {1000, 16},
+ {975, 15}, {950, 14}, {925, 13}, {900, 12},
+ {875, 11}, {850, 10}, {825, 9}, {800, 8},
+ {775, 7}, {750, 6}, {725, 5}, {700, 4},
+ {675, 3}, {650, 2}, {625, 1}, {600, 0}
+};
+
+static const unsigned char __cpuinitdata mV_mobilevrm[32] = {
+ 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18,
+ 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
+ 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
+ 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
+};
+
diff --git a/drivers/cpufreq/longrun.c b/drivers/cpufreq/longrun.c
new file mode 100644
index 00000000..34ea359b
--- /dev/null
+++ b/drivers/cpufreq/longrun.c
@@ -0,0 +1,324 @@
+/*
+ * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/timex.h>
+
+#include <asm/msr.h>
+#include <asm/processor.h>
+
+static struct cpufreq_driver longrun_driver;
+
+/**
+ * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz
+ * values into per cent values. In TMTA microcode, the following is valid:
+ * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
+ */
+static unsigned int longrun_low_freq, longrun_high_freq;
+
+
+/**
+ * longrun_get_policy - get the current LongRun policy
+ * @policy: struct cpufreq_policy where current policy is written into
+ *
+ * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
+ * and MSR_TMTA_LONGRUN_CTRL
+ */
+static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
+{
+ u32 msr_lo, msr_hi;
+
+ rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
+ pr_debug("longrun flags are %x - %x\n", msr_lo, msr_hi);
+ if (msr_lo & 0x01)
+ policy->policy = CPUFREQ_POLICY_PERFORMANCE;
+ else
+ policy->policy = CPUFREQ_POLICY_POWERSAVE;
+
+ rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+ pr_debug("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
+ msr_lo &= 0x0000007F;
+ msr_hi &= 0x0000007F;
+
+ if (longrun_high_freq <= longrun_low_freq) {
+ /* Assume degenerate Longrun table */
+ policy->min = policy->max = longrun_high_freq;
+ } else {
+ policy->min = longrun_low_freq + msr_lo *
+ ((longrun_high_freq - longrun_low_freq) / 100);
+ policy->max = longrun_low_freq + msr_hi *
+ ((longrun_high_freq - longrun_low_freq) / 100);
+ }
+ policy->cpu = 0;
+}
+
+
+/**
+ * longrun_set_policy - sets a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Sets a new CPUFreq policy on LongRun-capable processors. This function
+ * has to be called with cpufreq_driver locked.
+ */
+static int longrun_set_policy(struct cpufreq_policy *policy)
+{
+ u32 msr_lo, msr_hi;
+ u32 pctg_lo, pctg_hi;
+
+ if (!policy)
+ return -EINVAL;
+
+ if (longrun_high_freq <= longrun_low_freq) {
+ /* Assume degenerate Longrun table */
+ pctg_lo = pctg_hi = 100;
+ } else {
+ pctg_lo = (policy->min - longrun_low_freq) /
+ ((longrun_high_freq - longrun_low_freq) / 100);
+ pctg_hi = (policy->max - longrun_low_freq) /
+ ((longrun_high_freq - longrun_low_freq) / 100);
+ }
+
+ if (pctg_hi > 100)
+ pctg_hi = 100;
+ if (pctg_lo > pctg_hi)
+ pctg_lo = pctg_hi;
+
+ /* performance or economy mode */
+ rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
+ msr_lo &= 0xFFFFFFFE;
+ switch (policy->policy) {
+ case CPUFREQ_POLICY_PERFORMANCE:
+ msr_lo |= 0x00000001;
+ break;
+ case CPUFREQ_POLICY_POWERSAVE:
+ break;
+ }
+ wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
+
+ /* lower and upper boundary */
+ rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+ msr_lo &= 0xFFFFFF80;
+ msr_hi &= 0xFFFFFF80;
+ msr_lo |= pctg_lo;
+ msr_hi |= pctg_hi;
+ wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+
+ return 0;
+}
+
+
+/**
+ * longrun_verify_poliy - verifies a new CPUFreq policy
+ * @policy: the policy to verify
+ *
+ * Validates a new CPUFreq policy. This function has to be called with
+ * cpufreq_driver locked.
+ */
+static int longrun_verify_policy(struct cpufreq_policy *policy)
+{
+ if (!policy)
+ return -EINVAL;
+
+ policy->cpu = 0;
+ cpufreq_verify_within_limits(policy,
+ policy->cpuinfo.min_freq,
+ policy->cpuinfo.max_freq);
+
+ if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
+ (policy->policy != CPUFREQ_POLICY_PERFORMANCE))
+ return -EINVAL;
+
+ return 0;
+}
+
+static unsigned int longrun_get(unsigned int cpu)
+{
+ u32 eax, ebx, ecx, edx;
+
+ if (cpu)
+ return 0;
+
+ cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
+ pr_debug("cpuid eax is %u\n", eax);
+
+ return eax * 1000;
+}
+
+/**
+ * longrun_determine_freqs - determines the lowest and highest possible core frequency
+ * @low_freq: an int to put the lowest frequency into
+ * @high_freq: an int to put the highest frequency into
+ *
+ * Determines the lowest and highest possible core frequencies on this CPU.
+ * This is necessary to calculate the performance percentage according to
+ * TMTA rules:
+ * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
+ */
+static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
+ unsigned int *high_freq)
+{
+ u32 msr_lo, msr_hi;
+ u32 save_lo, save_hi;
+ u32 eax, ebx, ecx, edx;
+ u32 try_hi;
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ if (!low_freq || !high_freq)
+ return -EINVAL;
+
+ if (cpu_has(c, X86_FEATURE_LRTI)) {
+ /* if the LongRun Table Interface is present, the
+ * detection is a bit easier:
+ * For minimum frequency, read out the maximum
+ * level (msr_hi), write that into "currently
+ * selected level", and read out the frequency.
+ * For maximum frequency, read out level zero.
+ */
+ /* minimum */
+ rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi);
+ wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi);
+ rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
+ *low_freq = msr_lo * 1000; /* to kHz */
+
+ /* maximum */
+ wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi);
+ rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
+ *high_freq = msr_lo * 1000; /* to kHz */
+
+ pr_debug("longrun table interface told %u - %u kHz\n",
+ *low_freq, *high_freq);
+
+ if (*low_freq > *high_freq)
+ *low_freq = *high_freq;
+ return 0;
+ }
+
+ /* set the upper border to the value determined during TSC init */
+ *high_freq = (cpu_khz / 1000);
+ *high_freq = *high_freq * 1000;
+ pr_debug("high frequency is %u kHz\n", *high_freq);
+
+ /* get current borders */
+ rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+ save_lo = msr_lo & 0x0000007F;
+ save_hi = msr_hi & 0x0000007F;
+
+ /* if current perf_pctg is larger than 90%, we need to decrease the
+ * upper limit to make the calculation more accurate.
+ */
+ cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
+ /* try decreasing in 10% steps, some processors react only
+ * on some barrier values */
+ for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) {
+ /* set to 0 to try_hi perf_pctg */
+ msr_lo &= 0xFFFFFF80;
+ msr_hi &= 0xFFFFFF80;
+ msr_hi |= try_hi;
+ wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+
+ /* read out current core MHz and current perf_pctg */
+ cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
+
+ /* restore values */
+ wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi);
+ }
+ pr_debug("percentage is %u %%, freq is %u MHz\n", ecx, eax);
+
+ /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
+ * eqals
+ * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
+ *
+ * high_freq * perf_pctg is stored tempoarily into "ebx".
+ */
+ ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */
+
+ if ((ecx > 95) || (ecx == 0) || (eax < ebx))
+ return -EIO;
+
+ edx = ((eax - ebx) * 100) / (100 - ecx);
+ *low_freq = edx * 1000; /* back to kHz */
+
+ pr_debug("low frequency is %u kHz\n", *low_freq);
+
+ if (*low_freq > *high_freq)
+ *low_freq = *high_freq;
+
+ return 0;
+}
+
+
+static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy)
+{
+ int result = 0;
+
+ /* capability check */
+ if (policy->cpu != 0)
+ return -ENODEV;
+
+ /* detect low and high frequency */
+ result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq);
+ if (result)
+ return result;
+
+ /* cpuinfo and default policy values */
+ policy->cpuinfo.min_freq = longrun_low_freq;
+ policy->cpuinfo.max_freq = longrun_high_freq;
+ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+ longrun_get_policy(policy);
+
+ return 0;
+}
+
+
+static struct cpufreq_driver longrun_driver = {
+ .flags = CPUFREQ_CONST_LOOPS,
+ .verify = longrun_verify_policy,
+ .setpolicy = longrun_set_policy,
+ .get = longrun_get,
+ .init = longrun_cpu_init,
+ .name = "longrun",
+ .owner = THIS_MODULE,
+};
+
+
+/**
+ * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver
+ *
+ * Initializes the LongRun support.
+ */
+static int __init longrun_init(void)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ if (c->x86_vendor != X86_VENDOR_TRANSMETA ||
+ !cpu_has(c, X86_FEATURE_LONGRUN))
+ return -ENODEV;
+
+ return cpufreq_register_driver(&longrun_driver);
+}
+
+
+/**
+ * longrun_exit - unregisters LongRun support
+ */
+static void __exit longrun_exit(void)
+{
+ cpufreq_unregister_driver(&longrun_driver);
+}
+
+
+MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and "
+ "Efficeon processors.");
+MODULE_LICENSE("GPL");
+
+module_init(longrun_init);
+module_exit(longrun_exit);
diff --git a/drivers/cpufreq/mperf.c b/drivers/cpufreq/mperf.c
new file mode 100644
index 00000000..911e1930
--- /dev/null
+++ b/drivers/cpufreq/mperf.c
@@ -0,0 +1,51 @@
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+
+#include "mperf.h"
+
+static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
+
+/* Called via smp_call_function_single(), on the target CPU */
+static void read_measured_perf_ctrs(void *_cur)
+{
+ struct aperfmperf *am = _cur;
+
+ get_aperfmperf(am);
+}
+
+/*
+ * Return the measured active (C0) frequency on this CPU since last call
+ * to this function.
+ * Input: cpu number
+ * Return: Average CPU frequency in terms of max frequency (zero on error)
+ *
+ * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
+ * over a period of time, while CPU is in C0 state.
+ * IA32_MPERF counts at the rate of max advertised frequency
+ * IA32_APERF counts at the rate of actual CPU frequency
+ * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
+ * no meaning should be associated with absolute values of these MSRs.
+ */
+unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
+ unsigned int cpu)
+{
+ struct aperfmperf perf;
+ unsigned long ratio;
+ unsigned int retval;
+
+ if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
+ return 0;
+
+ ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
+ per_cpu(acfreq_old_perf, cpu) = perf;
+
+ retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf);
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/mperf.h b/drivers/cpufreq/mperf.h
new file mode 100644
index 00000000..5dbf2950
--- /dev/null
+++ b/drivers/cpufreq/mperf.h
@@ -0,0 +1,9 @@
+/*
+ * (c) 2010 Advanced Micro Devices, Inc.
+ * Your use of this code is subject to the terms and conditions of the
+ * GNU general public license version 2. See "COPYING" or
+ * http://www.gnu.org/licenses/gpl.html
+ */
+
+unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
+ unsigned int cpu);
diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c
new file mode 100644
index 00000000..6be3e076
--- /dev/null
+++ b/drivers/cpufreq/p4-clockmod.c
@@ -0,0 +1,329 @@
+/*
+ * Pentium 4/Xeon CPU on demand clock modulation/speed scaling
+ * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
+ * (C) 2002 Arjan van de Ven <arjanv@redhat.com>
+ * (C) 2002 Tora T. Engstad
+ * All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * The author(s) of this software shall not be held liable for damages
+ * of any nature resulting due to the use of this software. This
+ * software is provided AS-IS with no warranties.
+ *
+ * Date Errata Description
+ * 20020525 N44, O17 12.5% or 25% DC causes lockup
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/timex.h>
+
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/timer.h>
+
+#include "speedstep-lib.h"
+
+#define PFX "p4-clockmod: "
+
+/*
+ * Duty Cycle (3bits), note DC_DISABLE is not specified in
+ * intel docs i just use it to mean disable
+ */
+enum {
+ DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT,
+ DC_64PT, DC_75PT, DC_88PT, DC_DISABLE
+};
+
+#define DC_ENTRIES 8
+
+
+static int has_N44_O17_errata[NR_CPUS];
+static unsigned int stock_freq;
+static struct cpufreq_driver p4clockmod_driver;
+static unsigned int cpufreq_p4_get(unsigned int cpu);
+
+static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
+{
+ u32 l, h;
+
+ if (!cpu_online(cpu) ||
+ (newstate > DC_DISABLE) || (newstate == DC_RESV))
+ return -EINVAL;
+
+ rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
+
+ if (l & 0x01)
+ pr_debug("CPU#%d currently thermal throttled\n", cpu);
+
+ if (has_N44_O17_errata[cpu] &&
+ (newstate == DC_25PT || newstate == DC_DFLT))
+ newstate = DC_38PT;
+
+ rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
+ if (newstate == DC_DISABLE) {
+ pr_debug("CPU#%d disabling modulation\n", cpu);
+ wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
+ } else {
+ pr_debug("CPU#%d setting duty cycle to %d%%\n",
+ cpu, ((125 * newstate) / 10));
+ /* bits 63 - 5 : reserved
+ * bit 4 : enable/disable
+ * bits 3-1 : duty cycle
+ * bit 0 : reserved
+ */
+ l = (l & ~14);
+ l = l | (1<<4) | ((newstate & 0x7)<<1);
+ wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h);
+ }
+
+ return 0;
+}
+
+
+static struct cpufreq_frequency_table p4clockmod_table[] = {
+ {DC_RESV, CPUFREQ_ENTRY_INVALID},
+ {DC_DFLT, 0},
+ {DC_25PT, 0},
+ {DC_38PT, 0},
+ {DC_50PT, 0},
+ {DC_64PT, 0},
+ {DC_75PT, 0},
+ {DC_88PT, 0},
+ {DC_DISABLE, 0},
+ {DC_RESV, CPUFREQ_TABLE_END},
+};
+
+
+static int cpufreq_p4_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ unsigned int newstate = DC_RESV;
+ struct cpufreq_freqs freqs;
+ int i;
+
+ if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0],
+ target_freq, relation, &newstate))
+ return -EINVAL;
+
+ freqs.old = cpufreq_p4_get(policy->cpu);
+ freqs.new = stock_freq * p4clockmod_table[newstate].index / 8;
+
+ if (freqs.new == freqs.old)
+ return 0;
+
+ /* notifiers */
+ for_each_cpu(i, policy->cpus) {
+ freqs.cpu = i;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ }
+
+ /* run on each logical CPU,
+ * see section 13.15.3 of IA32 Intel Architecture Software
+ * Developer's Manual, Volume 3
+ */
+ for_each_cpu(i, policy->cpus)
+ cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
+
+ /* notifiers */
+ for_each_cpu(i, policy->cpus) {
+ freqs.cpu = i;
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ }
+
+ return 0;
+}
+
+
+static int cpufreq_p4_verify(struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]);
+}
+
+
+static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
+{
+ if (c->x86 == 0x06) {
+ if (cpu_has(c, X86_FEATURE_EST))
+ printk_once(KERN_WARNING PFX "Warning: EST-capable "
+ "CPU detected. The acpi-cpufreq module offers "
+ "voltage scaling in addition to frequency "
+ "scaling. You should use that instead of "
+ "p4-clockmod, if possible.\n");
+ switch (c->x86_model) {
+ case 0x0E: /* Core */
+ case 0x0F: /* Core Duo */
+ case 0x16: /* Celeron Core */
+ case 0x1C: /* Atom */
+ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
+ return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
+ case 0x0D: /* Pentium M (Dothan) */
+ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
+ /* fall through */
+ case 0x09: /* Pentium M (Banias) */
+ return speedstep_get_frequency(SPEEDSTEP_CPU_PM);
+ }
+ }
+
+ if (c->x86 != 0xF)
+ return 0;
+
+ /* on P-4s, the TSC runs with constant frequency independent whether
+ * throttling is active or not. */
+ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
+
+ if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
+ printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
+ "The speedstep-ich or acpi cpufreq modules offer "
+ "voltage scaling in addition of frequency scaling. "
+ "You should use either one instead of p4-clockmod, "
+ "if possible.\n");
+ return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
+ }
+
+ return speedstep_get_frequency(SPEEDSTEP_CPU_P4D);
+}
+
+
+
+static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
+{
+ struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
+ int cpuid = 0;
+ unsigned int i;
+
+#ifdef CONFIG_SMP
+ cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
+#endif
+
+ /* Errata workaround */
+ cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask;
+ switch (cpuid) {
+ case 0x0f07:
+ case 0x0f0a:
+ case 0x0f11:
+ case 0x0f12:
+ has_N44_O17_errata[policy->cpu] = 1;
+ pr_debug("has errata -- disabling low frequencies\n");
+ }
+
+ if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
+ c->x86_model < 2) {
+ /* switch to maximum frequency and measure result */
+ cpufreq_p4_setdc(policy->cpu, DC_DISABLE);
+ recalibrate_cpu_khz();
+ }
+ /* get max frequency */
+ stock_freq = cpufreq_p4_get_frequency(c);
+ if (!stock_freq)
+ return -EINVAL;
+
+ /* table init */
+ for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+ if ((i < 2) && (has_N44_O17_errata[policy->cpu]))
+ p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+ else
+ p4clockmod_table[i].frequency = (stock_freq * i)/8;
+ }
+ cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
+
+ /* cpuinfo and default policy values */
+
+ /* the transition latency is set to be 1 higher than the maximum
+ * transition latency of the ondemand governor */
+ policy->cpuinfo.transition_latency = 10000001;
+ policy->cur = stock_freq;
+
+ return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
+}
+
+
+static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy)
+{
+ cpufreq_frequency_table_put_attr(policy->cpu);
+ return 0;
+}
+
+static unsigned int cpufreq_p4_get(unsigned int cpu)
+{
+ u32 l, h;
+
+ rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
+
+ if (l & 0x10) {
+ l = l >> 1;
+ l &= 0x7;
+ } else
+ l = DC_DISABLE;
+
+ if (l != DC_DISABLE)
+ return stock_freq * l / 8;
+
+ return stock_freq;
+}
+
+static struct freq_attr *p4clockmod_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+static struct cpufreq_driver p4clockmod_driver = {
+ .verify = cpufreq_p4_verify,
+ .target = cpufreq_p4_target,
+ .init = cpufreq_p4_cpu_init,
+ .exit = cpufreq_p4_cpu_exit,
+ .get = cpufreq_p4_get,
+ .name = "p4-clockmod",
+ .owner = THIS_MODULE,
+ .attr = p4clockmod_attr,
+};
+
+
+static int __init cpufreq_p4_init(void)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+ int ret;
+
+ /*
+ * THERM_CONTROL is architectural for IA32 now, so
+ * we can rely on the capability checks
+ */
+ if (c->x86_vendor != X86_VENDOR_INTEL)
+ return -ENODEV;
+
+ if (!test_cpu_cap(c, X86_FEATURE_ACPI) ||
+ !test_cpu_cap(c, X86_FEATURE_ACC))
+ return -ENODEV;
+
+ ret = cpufreq_register_driver(&p4clockmod_driver);
+ if (!ret)
+ printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
+ "Modulation available\n");
+
+ return ret;
+}
+
+
+static void __exit cpufreq_p4_exit(void)
+{
+ cpufreq_unregister_driver(&p4clockmod_driver);
+}
+
+
+MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>");
+MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
+MODULE_LICENSE("GPL");
+
+late_initcall(cpufreq_p4_init);
+module_exit(cpufreq_p4_exit);
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
new file mode 100644
index 00000000..cdc02ac8
--- /dev/null
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -0,0 +1,624 @@
+/*
+ * pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface
+ *
+ * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
+ * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
+ * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
+ * INFRINGEMENT. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <linux/cpufreq.h>
+#include <linux/compiler.h>
+#include <linux/slab.h>
+
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+
+#include <acpi/processor.h>
+
+#define PCC_VERSION "1.10.00"
+#define POLL_LOOPS 300
+
+#define CMD_COMPLETE 0x1
+#define CMD_GET_FREQ 0x0
+#define CMD_SET_FREQ 0x1
+
+#define BUF_SZ 4
+
+struct pcc_register_resource {
+ u8 descriptor;
+ u16 length;
+ u8 space_id;
+ u8 bit_width;
+ u8 bit_offset;
+ u8 access_size;
+ u64 address;
+} __attribute__ ((packed));
+
+struct pcc_memory_resource {
+ u8 descriptor;
+ u16 length;
+ u8 space_id;
+ u8 resource_usage;
+ u8 type_specific;
+ u64 granularity;
+ u64 minimum;
+ u64 maximum;
+ u64 translation_offset;
+ u64 address_length;
+} __attribute__ ((packed));
+
+static struct cpufreq_driver pcc_cpufreq_driver;
+
+struct pcc_header {
+ u32 signature;
+ u16 length;
+ u8 major;
+ u8 minor;
+ u32 features;
+ u16 command;
+ u16 status;
+ u32 latency;
+ u32 minimum_time;
+ u32 maximum_time;
+ u32 nominal;
+ u32 throttled_frequency;
+ u32 minimum_frequency;
+};
+
+static void __iomem *pcch_virt_addr;
+static struct pcc_header __iomem *pcch_hdr;
+
+static DEFINE_SPINLOCK(pcc_lock);
+
+static struct acpi_generic_address doorbell;
+
+static u64 doorbell_preserve;
+static u64 doorbell_write;
+
+static u8 OSC_UUID[16] = {0x9F, 0x2C, 0x9B, 0x63, 0x91, 0x70, 0x1f, 0x49,
+ 0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46};
+
+struct pcc_cpu {
+ u32 input_offset;
+ u32 output_offset;
+};
+
+static struct pcc_cpu __percpu *pcc_cpu_info;
+
+static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
+{
+ cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+ policy->cpuinfo.max_freq);
+ return 0;
+}
+
+static inline void pcc_cmd(void)
+{
+ u64 doorbell_value;
+ int i;
+
+ acpi_read(&doorbell_value, &doorbell);
+ acpi_write((doorbell_value & doorbell_preserve) | doorbell_write,
+ &doorbell);
+
+ for (i = 0; i < POLL_LOOPS; i++) {
+ if (ioread16(&pcch_hdr->status) & CMD_COMPLETE)
+ break;
+ }
+}
+
+static inline void pcc_clear_mapping(void)
+{
+ if (pcch_virt_addr)
+ iounmap(pcch_virt_addr);
+ pcch_virt_addr = NULL;
+}
+
+static unsigned int pcc_get_freq(unsigned int cpu)
+{
+ struct pcc_cpu *pcc_cpu_data;
+ unsigned int curr_freq;
+ unsigned int freq_limit;
+ u16 status;
+ u32 input_buffer;
+ u32 output_buffer;
+
+ spin_lock(&pcc_lock);
+
+ pr_debug("get: get_freq for CPU %d\n", cpu);
+ pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
+
+ input_buffer = 0x1;
+ iowrite32(input_buffer,
+ (pcch_virt_addr + pcc_cpu_data->input_offset));
+ iowrite16(CMD_GET_FREQ, &pcch_hdr->command);
+
+ pcc_cmd();
+
+ output_buffer =
+ ioread32(pcch_virt_addr + pcc_cpu_data->output_offset);
+
+ /* Clear the input buffer - we are done with the current command */
+ memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
+
+ status = ioread16(&pcch_hdr->status);
+ if (status != CMD_COMPLETE) {
+ pr_debug("get: FAILED: for CPU %d, status is %d\n",
+ cpu, status);
+ goto cmd_incomplete;
+ }
+ iowrite16(0, &pcch_hdr->status);
+ curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
+ / 100) * 1000);
+
+ pr_debug("get: SUCCESS: (virtual) output_offset for cpu %d is "
+ "0x%p, contains a value of: 0x%x. Speed is: %d MHz\n",
+ cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
+ output_buffer, curr_freq);
+
+ freq_limit = (output_buffer >> 8) & 0xff;
+ if (freq_limit != 0xff) {
+ pr_debug("get: frequency for cpu %d is being temporarily"
+ " capped at %d\n", cpu, curr_freq);
+ }
+
+ spin_unlock(&pcc_lock);
+ return curr_freq;
+
+cmd_incomplete:
+ iowrite16(0, &pcch_hdr->status);
+ spin_unlock(&pcc_lock);
+ return 0;
+}
+
+static int pcc_cpufreq_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ struct pcc_cpu *pcc_cpu_data;
+ struct cpufreq_freqs freqs;
+ u16 status;
+ u32 input_buffer;
+ int cpu;
+
+ spin_lock(&pcc_lock);
+ cpu = policy->cpu;
+ pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
+
+ pr_debug("target: CPU %d should go to target freq: %d "
+ "(virtual) input_offset is 0x%p\n",
+ cpu, target_freq,
+ (pcch_virt_addr + pcc_cpu_data->input_offset));
+
+ freqs.new = target_freq;
+ freqs.cpu = cpu;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+ input_buffer = 0x1 | (((target_freq * 100)
+ / (ioread32(&pcch_hdr->nominal) * 1000)) << 8);
+ iowrite32(input_buffer,
+ (pcch_virt_addr + pcc_cpu_data->input_offset));
+ iowrite16(CMD_SET_FREQ, &pcch_hdr->command);
+
+ pcc_cmd();
+
+ /* Clear the input buffer - we are done with the current command */
+ memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
+
+ status = ioread16(&pcch_hdr->status);
+ if (status != CMD_COMPLETE) {
+ pr_debug("target: FAILED for cpu %d, with status: 0x%x\n",
+ cpu, status);
+ goto cmd_incomplete;
+ }
+ iowrite16(0, &pcch_hdr->status);
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ pr_debug("target: was SUCCESSFUL for cpu %d\n", cpu);
+ spin_unlock(&pcc_lock);
+
+ return 0;
+
+cmd_incomplete:
+ iowrite16(0, &pcch_hdr->status);
+ spin_unlock(&pcc_lock);
+ return -EINVAL;
+}
+
+static int pcc_get_offset(int cpu)
+{
+ acpi_status status;
+ struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+ union acpi_object *pccp, *offset;
+ struct pcc_cpu *pcc_cpu_data;
+ struct acpi_processor *pr;
+ int ret = 0;
+
+ pr = per_cpu(processors, cpu);
+ pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
+
+ if (!pr)
+ return -ENODEV;
+
+ status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ pccp = buffer.pointer;
+ if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) {
+ ret = -ENODEV;
+ goto out_free;
+ };
+
+ offset = &(pccp->package.elements[0]);
+ if (!offset || offset->type != ACPI_TYPE_INTEGER) {
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+ pcc_cpu_data->input_offset = offset->integer.value;
+
+ offset = &(pccp->package.elements[1]);
+ if (!offset || offset->type != ACPI_TYPE_INTEGER) {
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+ pcc_cpu_data->output_offset = offset->integer.value;
+
+ memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
+ memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
+
+ pr_debug("pcc_get_offset: for CPU %d: pcc_cpu_data "
+ "input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
+ cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
+out_free:
+ kfree(buffer.pointer);
+ return ret;
+}
+
+static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
+{
+ acpi_status status;
+ struct acpi_object_list input;
+ struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
+ union acpi_object in_params[4];
+ union acpi_object *out_obj;
+ u32 capabilities[2];
+ u32 errors;
+ u32 supported;
+ int ret = 0;
+
+ input.count = 4;
+ input.pointer = in_params;
+ in_params[0].type = ACPI_TYPE_BUFFER;
+ in_params[0].buffer.length = 16;
+ in_params[0].buffer.pointer = OSC_UUID;
+ in_params[1].type = ACPI_TYPE_INTEGER;
+ in_params[1].integer.value = 1;
+ in_params[2].type = ACPI_TYPE_INTEGER;
+ in_params[2].integer.value = 2;
+ in_params[3].type = ACPI_TYPE_BUFFER;
+ in_params[3].buffer.length = 8;
+ in_params[3].buffer.pointer = (u8 *)&capabilities;
+
+ capabilities[0] = OSC_QUERY_ENABLE;
+ capabilities[1] = 0x1;
+
+ status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ if (!output.length)
+ return -ENODEV;
+
+ out_obj = output.pointer;
+ if (out_obj->type != ACPI_TYPE_BUFFER) {
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+ errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
+ if (errors) {
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+ supported = *((u32 *)(out_obj->buffer.pointer + 4));
+ if (!(supported & 0x1)) {
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+ kfree(output.pointer);
+ capabilities[0] = 0x0;
+ capabilities[1] = 0x1;
+
+ status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ if (!output.length)
+ return -ENODEV;
+
+ out_obj = output.pointer;
+ if (out_obj->type != ACPI_TYPE_BUFFER) {
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+ errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
+ if (errors) {
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+ supported = *((u32 *)(out_obj->buffer.pointer + 4));
+ if (!(supported & 0x1)) {
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+out_free:
+ kfree(output.pointer);
+ return ret;
+}
+
+static int __init pcc_cpufreq_probe(void)
+{
+ acpi_status status;
+ struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
+ struct pcc_memory_resource *mem_resource;
+ struct pcc_register_resource *reg_resource;
+ union acpi_object *out_obj, *member;
+ acpi_handle handle, osc_handle, pcch_handle;
+ int ret = 0;
+
+ status = acpi_get_handle(NULL, "\\_SB", &handle);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ status = acpi_get_handle(handle, "PCCH", &pcch_handle);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ status = acpi_get_handle(handle, "_OSC", &osc_handle);
+ if (ACPI_SUCCESS(status)) {
+ ret = pcc_cpufreq_do_osc(&osc_handle);
+ if (ret)
+ pr_debug("probe: _OSC evaluation did not succeed\n");
+ /* Firmware's use of _OSC is optional */
+ ret = 0;
+ }
+
+ status = acpi_evaluate_object(handle, "PCCH", NULL, &output);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ out_obj = output.pointer;
+ if (out_obj->type != ACPI_TYPE_PACKAGE) {
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+ member = &out_obj->package.elements[0];
+ if (member->type != ACPI_TYPE_BUFFER) {
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+ mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
+
+ pr_debug("probe: mem_resource descriptor: 0x%x,"
+ " length: %d, space_id: %d, resource_usage: %d,"
+ " type_specific: %d, granularity: 0x%llx,"
+ " minimum: 0x%llx, maximum: 0x%llx,"
+ " translation_offset: 0x%llx, address_length: 0x%llx\n",
+ mem_resource->descriptor, mem_resource->length,
+ mem_resource->space_id, mem_resource->resource_usage,
+ mem_resource->type_specific, mem_resource->granularity,
+ mem_resource->minimum, mem_resource->maximum,
+ mem_resource->translation_offset,
+ mem_resource->address_length);
+
+ if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+ pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
+ mem_resource->address_length);
+ if (pcch_virt_addr == NULL) {
+ pr_debug("probe: could not map shared mem region\n");
+ goto out_free;
+ }
+ pcch_hdr = pcch_virt_addr;
+
+ pr_debug("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
+ pr_debug("probe: PCCH header is at physical address: 0x%llx,"
+ " signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
+ " supported features: 0x%x, command field: 0x%x,"
+ " status field: 0x%x, nominal latency: %d us\n",
+ mem_resource->minimum, ioread32(&pcch_hdr->signature),
+ ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major),
+ ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features),
+ ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
+ ioread32(&pcch_hdr->latency));
+
+ pr_debug("probe: min time between commands: %d us,"
+ " max time between commands: %d us,"
+ " nominal CPU frequency: %d MHz,"
+ " minimum CPU frequency: %d MHz,"
+ " minimum CPU frequency without throttling: %d MHz\n",
+ ioread32(&pcch_hdr->minimum_time),
+ ioread32(&pcch_hdr->maximum_time),
+ ioread32(&pcch_hdr->nominal),
+ ioread32(&pcch_hdr->throttled_frequency),
+ ioread32(&pcch_hdr->minimum_frequency));
+
+ member = &out_obj->package.elements[1];
+ if (member->type != ACPI_TYPE_BUFFER) {
+ ret = -ENODEV;
+ goto pcch_free;
+ }
+
+ reg_resource = (struct pcc_register_resource *)member->buffer.pointer;
+
+ doorbell.space_id = reg_resource->space_id;
+ doorbell.bit_width = reg_resource->bit_width;
+ doorbell.bit_offset = reg_resource->bit_offset;
+ doorbell.access_width = 64;
+ doorbell.address = reg_resource->address;
+
+ pr_debug("probe: doorbell: space_id is %d, bit_width is %d, "
+ "bit_offset is %d, access_width is %d, address is 0x%llx\n",
+ doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
+ doorbell.access_width, reg_resource->address);
+
+ member = &out_obj->package.elements[2];
+ if (member->type != ACPI_TYPE_INTEGER) {
+ ret = -ENODEV;
+ goto pcch_free;
+ }
+
+ doorbell_preserve = member->integer.value;
+
+ member = &out_obj->package.elements[3];
+ if (member->type != ACPI_TYPE_INTEGER) {
+ ret = -ENODEV;
+ goto pcch_free;
+ }
+
+ doorbell_write = member->integer.value;
+
+ pr_debug("probe: doorbell_preserve: 0x%llx,"
+ " doorbell_write: 0x%llx\n",
+ doorbell_preserve, doorbell_write);
+
+ pcc_cpu_info = alloc_percpu(struct pcc_cpu);
+ if (!pcc_cpu_info) {
+ ret = -ENOMEM;
+ goto pcch_free;
+ }
+
+ printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency"
+ " limits: %d MHz, %d MHz\n", PCC_VERSION,
+ ioread32(&pcch_hdr->minimum_frequency),
+ ioread32(&pcch_hdr->nominal));
+ kfree(output.pointer);
+ return ret;
+pcch_free:
+ pcc_clear_mapping();
+out_free:
+ kfree(output.pointer);
+ return ret;
+}
+
+static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+ unsigned int cpu = policy->cpu;
+ unsigned int result = 0;
+
+ if (!pcch_virt_addr) {
+ result = -1;
+ goto out;
+ }
+
+ result = pcc_get_offset(cpu);
+ if (result) {
+ pr_debug("init: PCCP evaluation failed\n");
+ goto out;
+ }
+
+ policy->max = policy->cpuinfo.max_freq =
+ ioread32(&pcch_hdr->nominal) * 1000;
+ policy->min = policy->cpuinfo.min_freq =
+ ioread32(&pcch_hdr->minimum_frequency) * 1000;
+ policy->cur = pcc_get_freq(cpu);
+
+ if (!policy->cur) {
+ pr_debug("init: Unable to get current CPU frequency\n");
+ result = -EINVAL;
+ goto out;
+ }
+
+ pr_debug("init: policy->max is %d, policy->min is %d\n",
+ policy->max, policy->min);
+out:
+ return result;
+}
+
+static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+ return 0;
+}
+
+static struct cpufreq_driver pcc_cpufreq_driver = {
+ .flags = CPUFREQ_CONST_LOOPS,
+ .get = pcc_get_freq,
+ .verify = pcc_cpufreq_verify,
+ .target = pcc_cpufreq_target,
+ .init = pcc_cpufreq_cpu_init,
+ .exit = pcc_cpufreq_cpu_exit,
+ .name = "pcc-cpufreq",
+ .owner = THIS_MODULE,
+};
+
+static int __init pcc_cpufreq_init(void)
+{
+ int ret;
+
+ if (acpi_disabled)
+ return 0;
+
+ ret = pcc_cpufreq_probe();
+ if (ret) {
+ pr_debug("pcc_cpufreq_init: PCCH evaluation failed\n");
+ return ret;
+ }
+
+ ret = cpufreq_register_driver(&pcc_cpufreq_driver);
+
+ return ret;
+}
+
+static void __exit pcc_cpufreq_exit(void)
+{
+ cpufreq_unregister_driver(&pcc_cpufreq_driver);
+
+ pcc_clear_mapping();
+
+ free_percpu(pcc_cpu_info);
+}
+
+MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar");
+MODULE_VERSION(PCC_VERSION);
+MODULE_DESCRIPTION("Processor Clocking Control interface driver");
+MODULE_LICENSE("GPL");
+
+late_initcall(pcc_cpufreq_init);
+module_exit(pcc_cpufreq_exit);
diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c
new file mode 100644
index 00000000..b3379d6a
--- /dev/null
+++ b/drivers/cpufreq/powernow-k6.c
@@ -0,0 +1,261 @@
+/*
+ * This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
+ * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä,
+ * Dominik Brodowski.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/ioport.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+
+#include <asm/msr.h>
+
+#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long
+ as it is unused */
+
+#define PFX "powernow-k6: "
+static unsigned int busfreq; /* FSB, in 10 kHz */
+static unsigned int max_multiplier;
+
+
+/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */
+static struct cpufreq_frequency_table clock_ratio[] = {
+ {45, /* 000 -> 4.5x */ 0},
+ {50, /* 001 -> 5.0x */ 0},
+ {40, /* 010 -> 4.0x */ 0},
+ {55, /* 011 -> 5.5x */ 0},
+ {20, /* 100 -> 2.0x */ 0},
+ {30, /* 101 -> 3.0x */ 0},
+ {60, /* 110 -> 6.0x */ 0},
+ {35, /* 111 -> 3.5x */ 0},
+ {0, CPUFREQ_TABLE_END}
+};
+
+
+/**
+ * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier
+ *
+ * Returns the current setting of the frequency multiplier. Core clock
+ * speed is frequency of the Front-Side Bus multiplied with this value.
+ */
+static int powernow_k6_get_cpu_multiplier(void)
+{
+ u64 invalue = 0;
+ u32 msrval;
+
+ msrval = POWERNOW_IOPORT + 0x1;
+ wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
+ invalue = inl(POWERNOW_IOPORT + 0x8);
+ msrval = POWERNOW_IOPORT + 0x0;
+ wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
+
+ return clock_ratio[(invalue >> 5)&7].index;
+}
+
+
+/**
+ * powernow_k6_set_state - set the PowerNow! multiplier
+ * @best_i: clock_ratio[best_i] is the target multiplier
+ *
+ * Tries to change the PowerNow! multiplier
+ */
+static void powernow_k6_set_state(unsigned int best_i)
+{
+ unsigned long outvalue = 0, invalue = 0;
+ unsigned long msrval;
+ struct cpufreq_freqs freqs;
+
+ if (clock_ratio[best_i].index > max_multiplier) {
+ printk(KERN_ERR PFX "invalid target frequency\n");
+ return;
+ }
+
+ freqs.old = busfreq * powernow_k6_get_cpu_multiplier();
+ freqs.new = busfreq * clock_ratio[best_i].index;
+ freqs.cpu = 0; /* powernow-k6.c is UP only driver */
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+ /* we now need to transform best_i to the BVC format, see AMD#23446 */
+
+ outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5);
+
+ msrval = POWERNOW_IOPORT + 0x1;
+ wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
+ invalue = inl(POWERNOW_IOPORT + 0x8);
+ invalue = invalue & 0xf;
+ outvalue = outvalue | invalue;
+ outl(outvalue , (POWERNOW_IOPORT + 0x8));
+ msrval = POWERNOW_IOPORT + 0x0;
+ wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+ return;
+}
+
+
+/**
+ * powernow_k6_verify - verifies a new CPUfreq policy
+ * @policy: new policy
+ *
+ * Policy must be within lowest and highest possible CPU Frequency,
+ * and at least one possible state must be within min and max.
+ */
+static int powernow_k6_verify(struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy, &clock_ratio[0]);
+}
+
+
+/**
+ * powernow_k6_setpolicy - sets a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency
+ * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * sets a new CPUFreq policy
+ */
+static int powernow_k6_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ unsigned int newstate = 0;
+
+ if (cpufreq_frequency_table_target(policy, &clock_ratio[0],
+ target_freq, relation, &newstate))
+ return -EINVAL;
+
+ powernow_k6_set_state(newstate);
+
+ return 0;
+}
+
+
+static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
+{
+ unsigned int i, f;
+ int result;
+
+ if (policy->cpu != 0)
+ return -ENODEV;
+
+ /* get frequencies */
+ max_multiplier = powernow_k6_get_cpu_multiplier();
+ busfreq = cpu_khz / max_multiplier;
+
+ /* table init */
+ for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
+ f = clock_ratio[i].index;
+ if (f > max_multiplier)
+ clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
+ else
+ clock_ratio[i].frequency = busfreq * f;
+ }
+
+ /* cpuinfo and default policy values */
+ policy->cpuinfo.transition_latency = 200000;
+ policy->cur = busfreq * max_multiplier;
+
+ result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
+ if (result)
+ return result;
+
+ cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu);
+
+ return 0;
+}
+
+
+static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
+{
+ unsigned int i;
+ for (i = 0; i < 8; i++) {
+ if (i == max_multiplier)
+ powernow_k6_set_state(i);
+ }
+ cpufreq_frequency_table_put_attr(policy->cpu);
+ return 0;
+}
+
+static unsigned int powernow_k6_get(unsigned int cpu)
+{
+ unsigned int ret;
+ ret = (busfreq * powernow_k6_get_cpu_multiplier());
+ return ret;
+}
+
+static struct freq_attr *powernow_k6_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+static struct cpufreq_driver powernow_k6_driver = {
+ .verify = powernow_k6_verify,
+ .target = powernow_k6_target,
+ .init = powernow_k6_cpu_init,
+ .exit = powernow_k6_cpu_exit,
+ .get = powernow_k6_get,
+ .name = "powernow-k6",
+ .owner = THIS_MODULE,
+ .attr = powernow_k6_attr,
+};
+
+
+/**
+ * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver
+ *
+ * Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported
+ * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero
+ * on success.
+ */
+static int __init powernow_k6_init(void)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) ||
+ ((c->x86_model != 12) && (c->x86_model != 13)))
+ return -ENODEV;
+
+ if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
+ printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n");
+ return -EIO;
+ }
+
+ if (cpufreq_register_driver(&powernow_k6_driver)) {
+ release_region(POWERNOW_IOPORT, 16);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+/**
+ * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support
+ *
+ * Unregisters AMD K6-2+ / K6-3+ PowerNow! support.
+ */
+static void __exit powernow_k6_exit(void)
+{
+ cpufreq_unregister_driver(&powernow_k6_driver);
+ release_region(POWERNOW_IOPORT, 16);
+}
+
+
+MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, "
+ "Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
+MODULE_LICENSE("GPL");
+
+module_init(powernow_k6_init);
+module_exit(powernow_k6_exit);
diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
new file mode 100644
index 00000000..d71d9f37
--- /dev/null
+++ b/drivers/cpufreq/powernow-k7.c
@@ -0,0 +1,747 @@
+/*
+ * AMD K7 Powernow driver.
+ * (C) 2003 Dave Jones on behalf of SuSE Labs.
+ * (C) 2003-2004 Dave Jones <davej@redhat.com>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ * Based upon datasheets & sample CPUs kindly provided by AMD.
+ *
+ * Errata 5:
+ * CPU may fail to execute a FID/VID change in presence of interrupt.
+ * - We cli/sti on stepping A0 CPUs around the FID/VID transition.
+ * Errata 15:
+ * CPU with half frequency multipliers may hang upon wakeup from disconnect.
+ * - We disable half multipliers if ACPI is used on A0 stepping CPUs.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/dmi.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+
+#include <asm/timer.h> /* Needed for recalibrate_cpu_khz() */
+#include <asm/msr.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+#include <linux/acpi.h>
+#include <acpi/processor.h>
+#endif
+
+#include "powernow-k7.h"
+
+#define PFX "powernow: "
+
+
+struct psb_s {
+ u8 signature[10];
+ u8 tableversion;
+ u8 flags;
+ u16 settlingtime;
+ u8 reserved1;
+ u8 numpst;
+};
+
+struct pst_s {
+ u32 cpuid;
+ u8 fsbspeed;
+ u8 maxfid;
+ u8 startvid;
+ u8 numpstates;
+};
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+union powernow_acpi_control_t {
+ struct {
+ unsigned long fid:5,
+ vid:5,
+ sgtc:20,
+ res1:2;
+ } bits;
+ unsigned long val;
+};
+#endif
+
+/* divide by 1000 to get VCore voltage in V. */
+static const int mobile_vid_table[32] = {
+ 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
+ 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0,
+ 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
+ 1075, 1050, 1025, 1000, 975, 950, 925, 0,
+};
+
+/* divide by 10 to get FID. */
+static const int fid_codes[32] = {
+ 110, 115, 120, 125, 50, 55, 60, 65,
+ 70, 75, 80, 85, 90, 95, 100, 105,
+ 30, 190, 40, 200, 130, 135, 140, 210,
+ 150, 225, 160, 165, 170, 180, -1, -1,
+};
+
+/* This parameter is used in order to force ACPI instead of legacy method for
+ * configuration purpose.
+ */
+
+static int acpi_force;
+
+static struct cpufreq_frequency_table *powernow_table;
+
+static unsigned int can_scale_bus;
+static unsigned int can_scale_vid;
+static unsigned int minimum_speed = -1;
+static unsigned int maximum_speed;
+static unsigned int number_scales;
+static unsigned int fsb;
+static unsigned int latency;
+static char have_a0;
+
+static int check_fsb(unsigned int fsbspeed)
+{
+ int delta;
+ unsigned int f = fsb / 1000;
+
+ delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
+ return delta < 5;
+}
+
+static int check_powernow(void)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+ unsigned int maxei, eax, ebx, ecx, edx;
+
+ if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) {
+#ifdef MODULE
+ printk(KERN_INFO PFX "This module only works with "
+ "AMD K7 CPUs\n");
+#endif
+ return 0;
+ }
+
+ /* Get maximum capabilities */
+ maxei = cpuid_eax(0x80000000);
+ if (maxei < 0x80000007) { /* Any powernow info ? */
+#ifdef MODULE
+ printk(KERN_INFO PFX "No powernow capabilities detected\n");
+#endif
+ return 0;
+ }
+
+ if ((c->x86_model == 6) && (c->x86_mask == 0)) {
+ printk(KERN_INFO PFX "K7 660[A0] core detected, "
+ "enabling errata workarounds\n");
+ have_a0 = 1;
+ }
+
+ cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
+
+ /* Check we can actually do something before we say anything.*/
+ if (!(edx & (1 << 1 | 1 << 2)))
+ return 0;
+
+ printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
+
+ if (edx & 1 << 1) {
+ printk("frequency");
+ can_scale_bus = 1;
+ }
+
+ if ((edx & (1 << 1 | 1 << 2)) == 0x6)
+ printk(" and ");
+
+ if (edx & 1 << 2) {
+ printk("voltage");
+ can_scale_vid = 1;
+ }
+
+ printk(".\n");
+ return 1;
+}
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+static void invalidate_entry(unsigned int entry)
+{
+ powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
+}
+#endif
+
+static int get_ranges(unsigned char *pst)
+{
+ unsigned int j;
+ unsigned int speed;
+ u8 fid, vid;
+
+ powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
+ (number_scales + 1)), GFP_KERNEL);
+ if (!powernow_table)
+ return -ENOMEM;
+
+ for (j = 0 ; j < number_scales; j++) {
+ fid = *pst++;
+
+ powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
+ powernow_table[j].index = fid; /* lower 8 bits */
+
+ speed = powernow_table[j].frequency;
+
+ if ((fid_codes[fid] % 10) == 5) {
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+ if (have_a0 == 1)
+ invalidate_entry(j);
+#endif
+ }
+
+ if (speed < minimum_speed)
+ minimum_speed = speed;
+ if (speed > maximum_speed)
+ maximum_speed = speed;
+
+ vid = *pst++;
+ powernow_table[j].index |= (vid << 8); /* upper 8 bits */
+
+ pr_debug(" FID: 0x%x (%d.%dx [%dMHz]) "
+ "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
+ fid_codes[fid] % 10, speed/1000, vid,
+ mobile_vid_table[vid]/1000,
+ mobile_vid_table[vid]%1000);
+ }
+ powernow_table[number_scales].frequency = CPUFREQ_TABLE_END;
+ powernow_table[number_scales].index = 0;
+
+ return 0;
+}
+
+
+static void change_FID(int fid)
+{
+ union msr_fidvidctl fidvidctl;
+
+ rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
+ if (fidvidctl.bits.FID != fid) {
+ fidvidctl.bits.SGTC = latency;
+ fidvidctl.bits.FID = fid;
+ fidvidctl.bits.VIDC = 0;
+ fidvidctl.bits.FIDC = 1;
+ wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
+ }
+}
+
+
+static void change_VID(int vid)
+{
+ union msr_fidvidctl fidvidctl;
+
+ rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
+ if (fidvidctl.bits.VID != vid) {
+ fidvidctl.bits.SGTC = latency;
+ fidvidctl.bits.VID = vid;
+ fidvidctl.bits.FIDC = 0;
+ fidvidctl.bits.VIDC = 1;
+ wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
+ }
+}
+
+
+static void change_speed(unsigned int index)
+{
+ u8 fid, vid;
+ struct cpufreq_freqs freqs;
+ union msr_fidvidstatus fidvidstatus;
+ int cfid;
+
+ /* fid are the lower 8 bits of the index we stored into
+ * the cpufreq frequency table in powernow_decode_bios,
+ * vid are the upper 8 bits.
+ */
+
+ fid = powernow_table[index].index & 0xFF;
+ vid = (powernow_table[index].index & 0xFF00) >> 8;
+
+ freqs.cpu = 0;
+
+ rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+ cfid = fidvidstatus.bits.CFID;
+ freqs.old = fsb * fid_codes[cfid] / 10;
+
+ freqs.new = powernow_table[index].frequency;
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+ /* Now do the magic poking into the MSRs. */
+
+ if (have_a0 == 1) /* A0 errata 5 */
+ local_irq_disable();
+
+ if (freqs.old > freqs.new) {
+ /* Going down, so change FID first */
+ change_FID(fid);
+ change_VID(vid);
+ } else {
+ /* Going up, so change VID first */
+ change_VID(vid);
+ change_FID(fid);
+ }
+
+
+ if (have_a0 == 1)
+ local_irq_enable();
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+}
+
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+
+static struct acpi_processor_performance *acpi_processor_perf;
+
+static int powernow_acpi_init(void)
+{
+ int i;
+ int retval = 0;
+ union powernow_acpi_control_t pc;
+
+ if (acpi_processor_perf != NULL && powernow_table != NULL) {
+ retval = -EINVAL;
+ goto err0;
+ }
+
+ acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance),
+ GFP_KERNEL);
+ if (!acpi_processor_perf) {
+ retval = -ENOMEM;
+ goto err0;
+ }
+
+ if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
+ GFP_KERNEL)) {
+ retval = -ENOMEM;
+ goto err05;
+ }
+
+ if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
+ retval = -EIO;
+ goto err1;
+ }
+
+ if (acpi_processor_perf->control_register.space_id !=
+ ACPI_ADR_SPACE_FIXED_HARDWARE) {
+ retval = -ENODEV;
+ goto err2;
+ }
+
+ if (acpi_processor_perf->status_register.space_id !=
+ ACPI_ADR_SPACE_FIXED_HARDWARE) {
+ retval = -ENODEV;
+ goto err2;
+ }
+
+ number_scales = acpi_processor_perf->state_count;
+
+ if (number_scales < 2) {
+ retval = -ENODEV;
+ goto err2;
+ }
+
+ powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
+ (number_scales + 1)), GFP_KERNEL);
+ if (!powernow_table) {
+ retval = -ENOMEM;
+ goto err2;
+ }
+
+ pc.val = (unsigned long) acpi_processor_perf->states[0].control;
+ for (i = 0; i < number_scales; i++) {
+ u8 fid, vid;
+ struct acpi_processor_px *state =
+ &acpi_processor_perf->states[i];
+ unsigned int speed, speed_mhz;
+
+ pc.val = (unsigned long) state->control;
+ pr_debug("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
+ i,
+ (u32) state->core_frequency,
+ (u32) state->power,
+ (u32) state->transition_latency,
+ (u32) state->control,
+ pc.bits.sgtc);
+
+ vid = pc.bits.vid;
+ fid = pc.bits.fid;
+
+ powernow_table[i].frequency = fsb * fid_codes[fid] / 10;
+ powernow_table[i].index = fid; /* lower 8 bits */
+ powernow_table[i].index |= (vid << 8); /* upper 8 bits */
+
+ speed = powernow_table[i].frequency;
+ speed_mhz = speed / 1000;
+
+ /* processor_perflib will multiply the MHz value by 1000 to
+ * get a KHz value (e.g. 1266000). However, powernow-k7 works
+ * with true KHz values (e.g. 1266768). To ensure that all
+ * powernow frequencies are available, we must ensure that
+ * ACPI doesn't restrict them, so we round up the MHz value
+ * to ensure that perflib's computed KHz value is greater than
+ * or equal to powernow's KHz value.
+ */
+ if (speed % 1000 > 0)
+ speed_mhz++;
+
+ if ((fid_codes[fid] % 10) == 5) {
+ if (have_a0 == 1)
+ invalidate_entry(i);
+ }
+
+ pr_debug(" FID: 0x%x (%d.%dx [%dMHz]) "
+ "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
+ fid_codes[fid] % 10, speed_mhz, vid,
+ mobile_vid_table[vid]/1000,
+ mobile_vid_table[vid]%1000);
+
+ if (state->core_frequency != speed_mhz) {
+ state->core_frequency = speed_mhz;
+ pr_debug(" Corrected ACPI frequency to %d\n",
+ speed_mhz);
+ }
+
+ if (latency < pc.bits.sgtc)
+ latency = pc.bits.sgtc;
+
+ if (speed < minimum_speed)
+ minimum_speed = speed;
+ if (speed > maximum_speed)
+ maximum_speed = speed;
+ }
+
+ powernow_table[i].frequency = CPUFREQ_TABLE_END;
+ powernow_table[i].index = 0;
+
+ /* notify BIOS that we exist */
+ acpi_processor_notify_smm(THIS_MODULE);
+
+ return 0;
+
+err2:
+ acpi_processor_unregister_performance(acpi_processor_perf, 0);
+err1:
+ free_cpumask_var(acpi_processor_perf->shared_cpu_map);
+err05:
+ kfree(acpi_processor_perf);
+err0:
+ printk(KERN_WARNING PFX "ACPI perflib can not be used on "
+ "this platform\n");
+ acpi_processor_perf = NULL;
+ return retval;
+}
+#else
+static int powernow_acpi_init(void)
+{
+ printk(KERN_INFO PFX "no support for ACPI processor found."
+ " Please recompile your kernel with ACPI processor\n");
+ return -EINVAL;
+}
+#endif
+
+static void print_pst_entry(struct pst_s *pst, unsigned int j)
+{
+ pr_debug("PST:%d (@%p)\n", j, pst);
+ pr_debug(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n",
+ pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
+}
+
+static int powernow_decode_bios(int maxfid, int startvid)
+{
+ struct psb_s *psb;
+ struct pst_s *pst;
+ unsigned int i, j;
+ unsigned char *p;
+ unsigned int etuple;
+ unsigned int ret;
+
+ etuple = cpuid_eax(0x80000001);
+
+ for (i = 0xC0000; i < 0xffff0 ; i += 16) {
+
+ p = phys_to_virt(i);
+
+ if (memcmp(p, "AMDK7PNOW!", 10) == 0) {
+ pr_debug("Found PSB header at %p\n", p);
+ psb = (struct psb_s *) p;
+ pr_debug("Table version: 0x%x\n", psb->tableversion);
+ if (psb->tableversion != 0x12) {
+ printk(KERN_INFO PFX "Sorry, only v1.2 tables"
+ " supported right now\n");
+ return -ENODEV;
+ }
+
+ pr_debug("Flags: 0x%x\n", psb->flags);
+ if ((psb->flags & 1) == 0)
+ pr_debug("Mobile voltage regulator\n");
+ else
+ pr_debug("Desktop voltage regulator\n");
+
+ latency = psb->settlingtime;
+ if (latency < 100) {
+ printk(KERN_INFO PFX "BIOS set settling time "
+ "to %d microseconds. "
+ "Should be at least 100. "
+ "Correcting.\n", latency);
+ latency = 100;
+ }
+ pr_debug("Settling Time: %d microseconds.\n",
+ psb->settlingtime);
+ pr_debug("Has %d PST tables. (Only dumping ones "
+ "relevant to this CPU).\n",
+ psb->numpst);
+
+ p += sizeof(struct psb_s);
+
+ pst = (struct pst_s *) p;
+
+ for (j = 0; j < psb->numpst; j++) {
+ pst = (struct pst_s *) p;
+ number_scales = pst->numpstates;
+
+ if ((etuple == pst->cpuid) &&
+ check_fsb(pst->fsbspeed) &&
+ (maxfid == pst->maxfid) &&
+ (startvid == pst->startvid)) {
+ print_pst_entry(pst, j);
+ p = (char *)pst + sizeof(struct pst_s);
+ ret = get_ranges(p);
+ return ret;
+ } else {
+ unsigned int k;
+ p = (char *)pst + sizeof(struct pst_s);
+ for (k = 0; k < number_scales; k++)
+ p += 2;
+ }
+ }
+ printk(KERN_INFO PFX "No PST tables match this cpuid "
+ "(0x%x)\n", etuple);
+ printk(KERN_INFO PFX "This is indicative of a broken "
+ "BIOS.\n");
+
+ return -EINVAL;
+ }
+ p++;
+ }
+
+ return -ENODEV;
+}
+
+
+static int powernow_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ unsigned int newstate;
+
+ if (cpufreq_frequency_table_target(policy, powernow_table, target_freq,
+ relation, &newstate))
+ return -EINVAL;
+
+ change_speed(newstate);
+
+ return 0;
+}
+
+
+static int powernow_verify(struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy, powernow_table);
+}
+
+/*
+ * We use the fact that the bus frequency is somehow
+ * a multiple of 100000/3 khz, then we compute sgtc according
+ * to this multiple.
+ * That way, we match more how AMD thinks all of that work.
+ * We will then get the same kind of behaviour already tested under
+ * the "well-known" other OS.
+ */
+static int __cpuinit fixup_sgtc(void)
+{
+ unsigned int sgtc;
+ unsigned int m;
+
+ m = fsb / 3333;
+ if ((m % 10) >= 5)
+ m += 5;
+
+ m /= 10;
+
+ sgtc = 100 * m * latency;
+ sgtc = sgtc / 3;
+ if (sgtc > 0xfffff) {
+ printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc);
+ sgtc = 0xfffff;
+ }
+ return sgtc;
+}
+
+static unsigned int powernow_get(unsigned int cpu)
+{
+ union msr_fidvidstatus fidvidstatus;
+ unsigned int cfid;
+
+ if (cpu)
+ return 0;
+ rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+ cfid = fidvidstatus.bits.CFID;
+
+ return fsb * fid_codes[cfid] / 10;
+}
+
+
+static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d)
+{
+ printk(KERN_WARNING PFX
+ "%s laptop with broken PST tables in BIOS detected.\n",
+ d->ident);
+ printk(KERN_WARNING PFX
+ "You need to downgrade to 3A21 (09/09/2002), or try a newer "
+ "BIOS than 3A71 (01/20/2003)\n");
+ printk(KERN_WARNING PFX
+ "cpufreq scaling has been disabled as a result of this.\n");
+ return 0;
+}
+
+/*
+ * Some Athlon laptops have really fucked PST tables.
+ * A BIOS update is all that can save them.
+ * Mention this, and disable cpufreq.
+ */
+static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = {
+ {
+ .callback = acer_cpufreq_pst,
+ .ident = "Acer Aspire",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"),
+ DMI_MATCH(DMI_BIOS_VERSION, "3A71"),
+ },
+ },
+ { }
+};
+
+static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy)
+{
+ union msr_fidvidstatus fidvidstatus;
+ int result;
+
+ if (policy->cpu != 0)
+ return -ENODEV;
+
+ rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+
+ recalibrate_cpu_khz();
+
+ fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
+ if (!fsb) {
+ printk(KERN_WARNING PFX "can not determine bus frequency\n");
+ return -EINVAL;
+ }
+ pr_debug("FSB: %3dMHz\n", fsb/1000);
+
+ if (dmi_check_system(powernow_dmi_table) || acpi_force) {
+ printk(KERN_INFO PFX "PSB/PST known to be broken. "
+ "Trying ACPI instead\n");
+ result = powernow_acpi_init();
+ } else {
+ result = powernow_decode_bios(fidvidstatus.bits.MFID,
+ fidvidstatus.bits.SVID);
+ if (result) {
+ printk(KERN_INFO PFX "Trying ACPI perflib\n");
+ maximum_speed = 0;
+ minimum_speed = -1;
+ latency = 0;
+ result = powernow_acpi_init();
+ if (result) {
+ printk(KERN_INFO PFX
+ "ACPI and legacy methods failed\n");
+ }
+ } else {
+ /* SGTC use the bus clock as timer */
+ latency = fixup_sgtc();
+ printk(KERN_INFO PFX "SGTC: %d\n", latency);
+ }
+ }
+
+ if (result)
+ return result;
+
+ printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
+ minimum_speed/1000, maximum_speed/1000);
+
+ policy->cpuinfo.transition_latency =
+ cpufreq_scale(2000000UL, fsb, latency);
+
+ policy->cur = powernow_get(0);
+
+ cpufreq_frequency_table_get_attr(powernow_table, policy->cpu);
+
+ return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
+}
+
+static int powernow_cpu_exit(struct cpufreq_policy *policy)
+{
+ cpufreq_frequency_table_put_attr(policy->cpu);
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+ if (acpi_processor_perf) {
+ acpi_processor_unregister_performance(acpi_processor_perf, 0);
+ free_cpumask_var(acpi_processor_perf->shared_cpu_map);
+ kfree(acpi_processor_perf);
+ }
+#endif
+
+ kfree(powernow_table);
+ return 0;
+}
+
+static struct freq_attr *powernow_table_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+static struct cpufreq_driver powernow_driver = {
+ .verify = powernow_verify,
+ .target = powernow_target,
+ .get = powernow_get,
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+ .bios_limit = acpi_processor_get_bios_limit,
+#endif
+ .init = powernow_cpu_init,
+ .exit = powernow_cpu_exit,
+ .name = "powernow-k7",
+ .owner = THIS_MODULE,
+ .attr = powernow_table_attr,
+};
+
+static int __init powernow_init(void)
+{
+ if (check_powernow() == 0)
+ return -ENODEV;
+ return cpufreq_register_driver(&powernow_driver);
+}
+
+
+static void __exit powernow_exit(void)
+{
+ cpufreq_unregister_driver(&powernow_driver);
+}
+
+module_param(acpi_force, int, 0444);
+MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
+
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_DESCRIPTION("Powernow driver for AMD K7 processors.");
+MODULE_LICENSE("GPL");
+
+late_initcall(powernow_init);
+module_exit(powernow_exit);
+
diff --git a/drivers/cpufreq/powernow-k7.h b/drivers/cpufreq/powernow-k7.h
new file mode 100644
index 00000000..35fb4eaf
--- /dev/null
+++ b/drivers/cpufreq/powernow-k7.h
@@ -0,0 +1,43 @@
+/*
+ * (C) 2003 Dave Jones.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * AMD-specific information
+ *
+ */
+
+union msr_fidvidctl {
+ struct {
+ unsigned FID:5, // 4:0
+ reserved1:3, // 7:5
+ VID:5, // 12:8
+ reserved2:3, // 15:13
+ FIDC:1, // 16
+ VIDC:1, // 17
+ reserved3:2, // 19:18
+ FIDCHGRATIO:1, // 20
+ reserved4:11, // 31-21
+ SGTC:20, // 32:51
+ reserved5:12; // 63:52
+ } bits;
+ unsigned long long val;
+};
+
+union msr_fidvidstatus {
+ struct {
+ unsigned CFID:5, // 4:0
+ reserved1:3, // 7:5
+ SFID:5, // 12:8
+ reserved2:3, // 15:13
+ MFID:5, // 20:16
+ reserved3:11, // 31:21
+ CVID:5, // 36:32
+ reserved4:3, // 39:37
+ SVID:5, // 44:40
+ reserved5:3, // 47:45
+ MVID:5, // 52:48
+ reserved6:11; // 63:53
+ } bits;
+ unsigned long long val;
+};
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
new file mode 100644
index 00000000..ad683ec2
--- /dev/null
+++ b/drivers/cpufreq/powernow-k8.c
@@ -0,0 +1,1619 @@
+/*
+ * (c) 2003-2010 Advanced Micro Devices, Inc.
+ * Your use of this code is subject to the terms and conditions of the
+ * GNU general public license version 2. See "COPYING" or
+ * http://www.gnu.org/licenses/gpl.html
+ *
+ * Support : mark.langsdorf@amd.com
+ *
+ * Based on the powernow-k7.c module written by Dave Jones.
+ * (C) 2003 Dave Jones on behalf of SuSE Labs
+ * (C) 2004 Dominik Brodowski <linux@brodo.de>
+ * (C) 2004 Pavel Machek <pavel@ucw.cz>
+ * Licensed under the terms of the GNU GPL License version 2.
+ * Based upon datasheets & sample CPUs kindly provided by AMD.
+ *
+ * Valuable input gratefully received from Dave Jones, Pavel Machek,
+ * Dominik Brodowski, Jacob Shin, and others.
+ * Originally developed by Paul Devriendt.
+ * Processor information obtained from Chapter 9 (Power and Thermal Management)
+ * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
+ * Opteron Processors" available for download from www.amd.com
+ *
+ * Tables for specific CPUs can be inferred from
+ * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/cpumask.h>
+#include <linux/sched.h> /* for current / set_cpus_allowed() */
+#include <linux/io.h>
+#include <linux/delay.h>
+
+#include <asm/msr.h>
+
+#include <linux/acpi.h>
+#include <linux/mutex.h>
+#include <acpi/processor.h>
+
+#define PFX "powernow-k8: "
+#define VERSION "version 2.20.00"
+#include "powernow-k8.h"
+#include "mperf.h"
+
+/* serialize freq changes */
+static DEFINE_MUTEX(fidvid_mutex);
+
+static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
+
+static int cpu_family = CPU_OPTERON;
+
+/* array to map SW pstate number to acpi state */
+static u32 ps_to_as[8];
+
+/* core performance boost */
+static bool cpb_capable, cpb_enabled;
+static struct msr __percpu *msrs;
+
+static struct cpufreq_driver cpufreq_amd64_driver;
+
+#ifndef CONFIG_SMP
+static inline const struct cpumask *cpu_core_mask(int cpu)
+{
+ return cpumask_of(0);
+}
+#endif
+
+/* Return a frequency in MHz, given an input fid */
+static u32 find_freq_from_fid(u32 fid)
+{
+ return 800 + (fid * 100);
+}
+
+/* Return a frequency in KHz, given an input fid */
+static u32 find_khz_freq_from_fid(u32 fid)
+{
+ return 1000 * find_freq_from_fid(fid);
+}
+
+static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
+ u32 pstate)
+{
+ return data[ps_to_as[pstate]].frequency;
+}
+
+/* Return the vco fid for an input fid
+ *
+ * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
+ * only from corresponding high fids. This returns "high" fid corresponding to
+ * "low" one.
+ */
+static u32 convert_fid_to_vco_fid(u32 fid)
+{
+ if (fid < HI_FID_TABLE_BOTTOM)
+ return 8 + (2 * fid);
+ else
+ return fid;
+}
+
+/*
+ * Return 1 if the pending bit is set. Unless we just instructed the processor
+ * to transition to a new state, seeing this bit set is really bad news.
+ */
+static int pending_bit_stuck(void)
+{
+ u32 lo, hi;
+
+ if (cpu_family == CPU_HW_PSTATE)
+ return 0;
+
+ rdmsr(MSR_FIDVID_STATUS, lo, hi);
+ return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
+}
+
+/*
+ * Update the global current fid / vid values from the status msr.
+ * Returns 1 on error.
+ */
+static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
+{
+ u32 lo, hi;
+ u32 i = 0;
+
+ if (cpu_family == CPU_HW_PSTATE) {
+ rdmsr(MSR_PSTATE_STATUS, lo, hi);
+ i = lo & HW_PSTATE_MASK;
+ data->currpstate = i;
+
+ /*
+ * a workaround for family 11h erratum 311 might cause
+ * an "out-of-range Pstate if the core is in Pstate-0
+ */
+ if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps))
+ data->currpstate = HW_PSTATE_0;
+
+ return 0;
+ }
+ do {
+ if (i++ > 10000) {
+ pr_debug("detected change pending stuck\n");
+ return 1;
+ }
+ rdmsr(MSR_FIDVID_STATUS, lo, hi);
+ } while (lo & MSR_S_LO_CHANGE_PENDING);
+
+ data->currvid = hi & MSR_S_HI_CURRENT_VID;
+ data->currfid = lo & MSR_S_LO_CURRENT_FID;
+
+ return 0;
+}
+
+/* the isochronous relief time */
+static void count_off_irt(struct powernow_k8_data *data)
+{
+ udelay((1 << data->irt) * 10);
+ return;
+}
+
+/* the voltage stabilization time */
+static void count_off_vst(struct powernow_k8_data *data)
+{
+ udelay(data->vstable * VST_UNITS_20US);
+ return;
+}
+
+/* need to init the control msr to a safe value (for each cpu) */
+static void fidvid_msr_init(void)
+{
+ u32 lo, hi;
+ u8 fid, vid;
+
+ rdmsr(MSR_FIDVID_STATUS, lo, hi);
+ vid = hi & MSR_S_HI_CURRENT_VID;
+ fid = lo & MSR_S_LO_CURRENT_FID;
+ lo = fid | (vid << MSR_C_LO_VID_SHIFT);
+ hi = MSR_C_HI_STP_GNT_BENIGN;
+ pr_debug("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
+ wrmsr(MSR_FIDVID_CTL, lo, hi);
+}
+
+/* write the new fid value along with the other control fields to the msr */
+static int write_new_fid(struct powernow_k8_data *data, u32 fid)
+{
+ u32 lo;
+ u32 savevid = data->currvid;
+ u32 i = 0;
+
+ if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
+ printk(KERN_ERR PFX "internal error - overflow on fid write\n");
+ return 1;
+ }
+
+ lo = fid;
+ lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
+ lo |= MSR_C_LO_INIT_FID_VID;
+
+ pr_debug("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
+ fid, lo, data->plllock * PLL_LOCK_CONVERSION);
+
+ do {
+ wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
+ if (i++ > 100) {
+ printk(KERN_ERR PFX
+ "Hardware error - pending bit very stuck - "
+ "no further pstate changes possible\n");
+ return 1;
+ }
+ } while (query_current_values_with_pending_wait(data));
+
+ count_off_irt(data);
+
+ if (savevid != data->currvid) {
+ printk(KERN_ERR PFX
+ "vid change on fid trans, old 0x%x, new 0x%x\n",
+ savevid, data->currvid);
+ return 1;
+ }
+
+ if (fid != data->currfid) {
+ printk(KERN_ERR PFX
+ "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
+ data->currfid);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Write a new vid to the hardware */
+static int write_new_vid(struct powernow_k8_data *data, u32 vid)
+{
+ u32 lo;
+ u32 savefid = data->currfid;
+ int i = 0;
+
+ if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
+ printk(KERN_ERR PFX "internal error - overflow on vid write\n");
+ return 1;
+ }
+
+ lo = data->currfid;
+ lo |= (vid << MSR_C_LO_VID_SHIFT);
+ lo |= MSR_C_LO_INIT_FID_VID;
+
+ pr_debug("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
+ vid, lo, STOP_GRANT_5NS);
+
+ do {
+ wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
+ if (i++ > 100) {
+ printk(KERN_ERR PFX "internal error - pending bit "
+ "very stuck - no further pstate "
+ "changes possible\n");
+ return 1;
+ }
+ } while (query_current_values_with_pending_wait(data));
+
+ if (savefid != data->currfid) {
+ printk(KERN_ERR PFX "fid changed on vid trans, old "
+ "0x%x new 0x%x\n",
+ savefid, data->currfid);
+ return 1;
+ }
+
+ if (vid != data->currvid) {
+ printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
+ "curr 0x%x\n",
+ vid, data->currvid);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Reduce the vid by the max of step or reqvid.
+ * Decreasing vid codes represent increasing voltages:
+ * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
+ */
+static int decrease_vid_code_by_step(struct powernow_k8_data *data,
+ u32 reqvid, u32 step)
+{
+ if ((data->currvid - reqvid) > step)
+ reqvid = data->currvid - step;
+
+ if (write_new_vid(data, reqvid))
+ return 1;
+
+ count_off_vst(data);
+
+ return 0;
+}
+
+/* Change hardware pstate by single MSR write */
+static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
+{
+ wrmsr(MSR_PSTATE_CTRL, pstate, 0);
+ data->currpstate = pstate;
+ return 0;
+}
+
+/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
+static int transition_fid_vid(struct powernow_k8_data *data,
+ u32 reqfid, u32 reqvid)
+{
+ if (core_voltage_pre_transition(data, reqvid, reqfid))
+ return 1;
+
+ if (core_frequency_transition(data, reqfid))
+ return 1;
+
+ if (core_voltage_post_transition(data, reqvid))
+ return 1;
+
+ if (query_current_values_with_pending_wait(data))
+ return 1;
+
+ if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
+ printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
+ "curr 0x%x 0x%x\n",
+ smp_processor_id(),
+ reqfid, reqvid, data->currfid, data->currvid);
+ return 1;
+ }
+
+ pr_debug("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
+ smp_processor_id(), data->currfid, data->currvid);
+
+ return 0;
+}
+
+/* Phase 1 - core voltage transition ... setup voltage */
+static int core_voltage_pre_transition(struct powernow_k8_data *data,
+ u32 reqvid, u32 reqfid)
+{
+ u32 rvosteps = data->rvo;
+ u32 savefid = data->currfid;
+ u32 maxvid, lo, rvomult = 1;
+
+ pr_debug("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
+ "reqvid 0x%x, rvo 0x%x\n",
+ smp_processor_id(),
+ data->currfid, data->currvid, reqvid, data->rvo);
+
+ if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP))
+ rvomult = 2;
+ rvosteps *= rvomult;
+ rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
+ maxvid = 0x1f & (maxvid >> 16);
+ pr_debug("ph1 maxvid=0x%x\n", maxvid);
+ if (reqvid < maxvid) /* lower numbers are higher voltages */
+ reqvid = maxvid;
+
+ while (data->currvid > reqvid) {
+ pr_debug("ph1: curr 0x%x, req vid 0x%x\n",
+ data->currvid, reqvid);
+ if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
+ return 1;
+ }
+
+ while ((rvosteps > 0) &&
+ ((rvomult * data->rvo + data->currvid) > reqvid)) {
+ if (data->currvid == maxvid) {
+ rvosteps = 0;
+ } else {
+ pr_debug("ph1: changing vid for rvo, req 0x%x\n",
+ data->currvid - 1);
+ if (decrease_vid_code_by_step(data, data->currvid-1, 1))
+ return 1;
+ rvosteps--;
+ }
+ }
+
+ if (query_current_values_with_pending_wait(data))
+ return 1;
+
+ if (savefid != data->currfid) {
+ printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
+ data->currfid);
+ return 1;
+ }
+
+ pr_debug("ph1 complete, currfid 0x%x, currvid 0x%x\n",
+ data->currfid, data->currvid);
+
+ return 0;
+}
+
+/* Phase 2 - core frequency transition */
+static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
+{
+ u32 vcoreqfid, vcocurrfid, vcofiddiff;
+ u32 fid_interval, savevid = data->currvid;
+
+ if (data->currfid == reqfid) {
+ printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
+ data->currfid);
+ return 0;
+ }
+
+ pr_debug("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
+ "reqfid 0x%x\n",
+ smp_processor_id(),
+ data->currfid, data->currvid, reqfid);
+
+ vcoreqfid = convert_fid_to_vco_fid(reqfid);
+ vcocurrfid = convert_fid_to_vco_fid(data->currfid);
+ vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
+ : vcoreqfid - vcocurrfid;
+
+ if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP))
+ vcofiddiff = 0;
+
+ while (vcofiddiff > 2) {
+ (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
+
+ if (reqfid > data->currfid) {
+ if (data->currfid > LO_FID_TABLE_TOP) {
+ if (write_new_fid(data,
+ data->currfid + fid_interval))
+ return 1;
+ } else {
+ if (write_new_fid
+ (data,
+ 2 + convert_fid_to_vco_fid(data->currfid)))
+ return 1;
+ }
+ } else {
+ if (write_new_fid(data, data->currfid - fid_interval))
+ return 1;
+ }
+
+ vcocurrfid = convert_fid_to_vco_fid(data->currfid);
+ vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
+ : vcoreqfid - vcocurrfid;
+ }
+
+ if (write_new_fid(data, reqfid))
+ return 1;
+
+ if (query_current_values_with_pending_wait(data))
+ return 1;
+
+ if (data->currfid != reqfid) {
+ printk(KERN_ERR PFX
+ "ph2: mismatch, failed fid transition, "
+ "curr 0x%x, req 0x%x\n",
+ data->currfid, reqfid);
+ return 1;
+ }
+
+ if (savevid != data->currvid) {
+ printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n",
+ savevid, data->currvid);
+ return 1;
+ }
+
+ pr_debug("ph2 complete, currfid 0x%x, currvid 0x%x\n",
+ data->currfid, data->currvid);
+
+ return 0;
+}
+
+/* Phase 3 - core voltage transition flow ... jump to the final vid. */
+static int core_voltage_post_transition(struct powernow_k8_data *data,
+ u32 reqvid)
+{
+ u32 savefid = data->currfid;
+ u32 savereqvid = reqvid;
+
+ pr_debug("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
+ smp_processor_id(),
+ data->currfid, data->currvid);
+
+ if (reqvid != data->currvid) {
+ if (write_new_vid(data, reqvid))
+ return 1;
+
+ if (savefid != data->currfid) {
+ printk(KERN_ERR PFX
+ "ph3: bad fid change, save 0x%x, curr 0x%x\n",
+ savefid, data->currfid);
+ return 1;
+ }
+
+ if (data->currvid != reqvid) {
+ printk(KERN_ERR PFX
+ "ph3: failed vid transition\n, "
+ "req 0x%x, curr 0x%x",
+ reqvid, data->currvid);
+ return 1;
+ }
+ }
+
+ if (query_current_values_with_pending_wait(data))
+ return 1;
+
+ if (savereqvid != data->currvid) {
+ pr_debug("ph3 failed, currvid 0x%x\n", data->currvid);
+ return 1;
+ }
+
+ if (savefid != data->currfid) {
+ pr_debug("ph3 failed, currfid changed 0x%x\n",
+ data->currfid);
+ return 1;
+ }
+
+ pr_debug("ph3 complete, currfid 0x%x, currvid 0x%x\n",
+ data->currfid, data->currvid);
+
+ return 0;
+}
+
+static void check_supported_cpu(void *_rc)
+{
+ u32 eax, ebx, ecx, edx;
+ int *rc = _rc;
+
+ *rc = -ENODEV;
+
+ if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD)
+ return;
+
+ eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+ if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
+ ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
+ return;
+
+ if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
+ if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
+ ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
+ printk(KERN_INFO PFX
+ "Processor cpuid %x not supported\n", eax);
+ return;
+ }
+
+ eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
+ if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
+ printk(KERN_INFO PFX
+ "No frequency change capabilities detected\n");
+ return;
+ }
+
+ cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+ if ((edx & P_STATE_TRANSITION_CAPABLE)
+ != P_STATE_TRANSITION_CAPABLE) {
+ printk(KERN_INFO PFX
+ "Power state transitions not supported\n");
+ return;
+ }
+ } else { /* must be a HW Pstate capable processor */
+ cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+ if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
+ cpu_family = CPU_HW_PSTATE;
+ else
+ return;
+ }
+
+ *rc = 0;
+}
+
+static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
+ u8 maxvid)
+{
+ unsigned int j;
+ u8 lastfid = 0xff;
+
+ for (j = 0; j < data->numps; j++) {
+ if (pst[j].vid > LEAST_VID) {
+ printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n",
+ j, pst[j].vid);
+ return -EINVAL;
+ }
+ if (pst[j].vid < data->rvo) {
+ /* vid + rvo >= 0 */
+ printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
+ " %d\n", j);
+ return -ENODEV;
+ }
+ if (pst[j].vid < maxvid + data->rvo) {
+ /* vid + rvo >= maxvid */
+ printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
+ " %d\n", j);
+ return -ENODEV;
+ }
+ if (pst[j].fid > MAX_FID) {
+ printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate"
+ " %d\n", j);
+ return -ENODEV;
+ }
+ if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) {
+ /* Only first fid is allowed to be in "low" range */
+ printk(KERN_ERR FW_BUG PFX "two low fids - %d : "
+ "0x%x\n", j, pst[j].fid);
+ return -EINVAL;
+ }
+ if (pst[j].fid < lastfid)
+ lastfid = pst[j].fid;
+ }
+ if (lastfid & 1) {
+ printk(KERN_ERR FW_BUG PFX "lastfid invalid\n");
+ return -EINVAL;
+ }
+ if (lastfid > LO_FID_TABLE_TOP)
+ printk(KERN_INFO FW_BUG PFX
+ "first fid not from lo freq table\n");
+
+ return 0;
+}
+
+static void invalidate_entry(struct cpufreq_frequency_table *powernow_table,
+ unsigned int entry)
+{
+ powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
+}
+
+static void print_basics(struct powernow_k8_data *data)
+{
+ int j;
+ for (j = 0; j < data->numps; j++) {
+ if (data->powernow_table[j].frequency !=
+ CPUFREQ_ENTRY_INVALID) {
+ if (cpu_family == CPU_HW_PSTATE) {
+ printk(KERN_INFO PFX
+ " %d : pstate %d (%d MHz)\n", j,
+ data->powernow_table[j].index,
+ data->powernow_table[j].frequency/1000);
+ } else {
+ printk(KERN_INFO PFX
+ "fid 0x%x (%d MHz), vid 0x%x\n",
+ data->powernow_table[j].index & 0xff,
+ data->powernow_table[j].frequency/1000,
+ data->powernow_table[j].index >> 8);
+ }
+ }
+ }
+ if (data->batps)
+ printk(KERN_INFO PFX "Only %d pstates on battery\n",
+ data->batps);
+}
+
+static u32 freq_from_fid_did(u32 fid, u32 did)
+{
+ u32 mhz = 0;
+
+ if (boot_cpu_data.x86 == 0x10)
+ mhz = (100 * (fid + 0x10)) >> did;
+ else if (boot_cpu_data.x86 == 0x11)
+ mhz = (100 * (fid + 8)) >> did;
+ else
+ BUG();
+
+ return mhz * 1000;
+}
+
+static int fill_powernow_table(struct powernow_k8_data *data,
+ struct pst_s *pst, u8 maxvid)
+{
+ struct cpufreq_frequency_table *powernow_table;
+ unsigned int j;
+
+ if (data->batps) {
+ /* use ACPI support to get full speed on mains power */
+ printk(KERN_WARNING PFX
+ "Only %d pstates usable (use ACPI driver for full "
+ "range\n", data->batps);
+ data->numps = data->batps;
+ }
+
+ for (j = 1; j < data->numps; j++) {
+ if (pst[j-1].fid >= pst[j].fid) {
+ printk(KERN_ERR PFX "PST out of sequence\n");
+ return -EINVAL;
+ }
+ }
+
+ if (data->numps < 2) {
+ printk(KERN_ERR PFX "no p states to transition\n");
+ return -ENODEV;
+ }
+
+ if (check_pst_table(data, pst, maxvid))
+ return -EINVAL;
+
+ powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
+ * (data->numps + 1)), GFP_KERNEL);
+ if (!powernow_table) {
+ printk(KERN_ERR PFX "powernow_table memory alloc failure\n");
+ return -ENOMEM;
+ }
+
+ for (j = 0; j < data->numps; j++) {
+ int freq;
+ powernow_table[j].index = pst[j].fid; /* lower 8 bits */
+ powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
+ freq = find_khz_freq_from_fid(pst[j].fid);
+ powernow_table[j].frequency = freq;
+ }
+ powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
+ powernow_table[data->numps].index = 0;
+
+ if (query_current_values_with_pending_wait(data)) {
+ kfree(powernow_table);
+ return -EIO;
+ }
+
+ pr_debug("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
+ data->powernow_table = powernow_table;
+ if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
+ print_basics(data);
+
+ for (j = 0; j < data->numps; j++)
+ if ((pst[j].fid == data->currfid) &&
+ (pst[j].vid == data->currvid))
+ return 0;
+
+ pr_debug("currfid/vid do not match PST, ignoring\n");
+ return 0;
+}
+
+/* Find and validate the PSB/PST table in BIOS. */
+static int find_psb_table(struct powernow_k8_data *data)
+{
+ struct psb_s *psb;
+ unsigned int i;
+ u32 mvs;
+ u8 maxvid;
+ u32 cpst = 0;
+ u32 thiscpuid;
+
+ for (i = 0xc0000; i < 0xffff0; i += 0x10) {
+ /* Scan BIOS looking for the signature. */
+ /* It can not be at ffff0 - it is too big. */
+
+ psb = phys_to_virt(i);
+ if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
+ continue;
+
+ pr_debug("found PSB header at 0x%p\n", psb);
+
+ pr_debug("table vers: 0x%x\n", psb->tableversion);
+ if (psb->tableversion != PSB_VERSION_1_4) {
+ printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n");
+ return -ENODEV;
+ }
+
+ pr_debug("flags: 0x%x\n", psb->flags1);
+ if (psb->flags1) {
+ printk(KERN_ERR FW_BUG PFX "unknown flags\n");
+ return -ENODEV;
+ }
+
+ data->vstable = psb->vstable;
+ pr_debug("voltage stabilization time: %d(*20us)\n",
+ data->vstable);
+
+ pr_debug("flags2: 0x%x\n", psb->flags2);
+ data->rvo = psb->flags2 & 3;
+ data->irt = ((psb->flags2) >> 2) & 3;
+ mvs = ((psb->flags2) >> 4) & 3;
+ data->vidmvs = 1 << mvs;
+ data->batps = ((psb->flags2) >> 6) & 3;
+
+ pr_debug("ramp voltage offset: %d\n", data->rvo);
+ pr_debug("isochronous relief time: %d\n", data->irt);
+ pr_debug("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
+
+ pr_debug("numpst: 0x%x\n", psb->num_tables);
+ cpst = psb->num_tables;
+ if ((psb->cpuid == 0x00000fc0) ||
+ (psb->cpuid == 0x00000fe0)) {
+ thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+ if ((thiscpuid == 0x00000fc0) ||
+ (thiscpuid == 0x00000fe0))
+ cpst = 1;
+ }
+ if (cpst != 1) {
+ printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
+ return -ENODEV;
+ }
+
+ data->plllock = psb->plllocktime;
+ pr_debug("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
+ pr_debug("maxfid: 0x%x\n", psb->maxfid);
+ pr_debug("maxvid: 0x%x\n", psb->maxvid);
+ maxvid = psb->maxvid;
+
+ data->numps = psb->numps;
+ pr_debug("numpstates: 0x%x\n", data->numps);
+ return fill_powernow_table(data,
+ (struct pst_s *)(psb+1), maxvid);
+ }
+ /*
+ * If you see this message, complain to BIOS manufacturer. If
+ * he tells you "we do not support Linux" or some similar
+ * nonsense, remember that Windows 2000 uses the same legacy
+ * mechanism that the old Linux PSB driver uses. Tell them it
+ * is broken with Windows 2000.
+ *
+ * The reference to the AMD documentation is chapter 9 in the
+ * BIOS and Kernel Developer's Guide, which is available on
+ * www.amd.com
+ */
+ printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
+ printk(KERN_ERR PFX "Make sure that your BIOS is up to date"
+ " and Cool'N'Quiet support is enabled in BIOS setup\n");
+ return -ENODEV;
+}
+
+static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
+ unsigned int index)
+{
+ u64 control;
+
+ if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
+ return;
+
+ control = data->acpi_data.states[index].control;
+ data->irt = (control >> IRT_SHIFT) & IRT_MASK;
+ data->rvo = (control >> RVO_SHIFT) & RVO_MASK;
+ data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
+ data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK;
+ data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK);
+ data->vstable = (control >> VST_SHIFT) & VST_MASK;
+}
+
+static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
+{
+ struct cpufreq_frequency_table *powernow_table;
+ int ret_val = -ENODEV;
+ u64 control, status;
+
+ if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
+ pr_debug("register performance failed: bad ACPI data\n");
+ return -EIO;
+ }
+
+ /* verify the data contained in the ACPI structures */
+ if (data->acpi_data.state_count <= 1) {
+ pr_debug("No ACPI P-States\n");
+ goto err_out;
+ }
+
+ control = data->acpi_data.control_register.space_id;
+ status = data->acpi_data.status_register.space_id;
+
+ if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+ (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+ pr_debug("Invalid control/status registers (%llx - %llx)\n",
+ control, status);
+ goto err_out;
+ }
+
+ /* fill in data->powernow_table */
+ powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
+ * (data->acpi_data.state_count + 1)), GFP_KERNEL);
+ if (!powernow_table) {
+ pr_debug("powernow_table memory alloc failure\n");
+ goto err_out;
+ }
+
+ /* fill in data */
+ data->numps = data->acpi_data.state_count;
+ powernow_k8_acpi_pst_values(data, 0);
+
+ if (cpu_family == CPU_HW_PSTATE)
+ ret_val = fill_powernow_table_pstate(data, powernow_table);
+ else
+ ret_val = fill_powernow_table_fidvid(data, powernow_table);
+ if (ret_val)
+ goto err_out_mem;
+
+ powernow_table[data->acpi_data.state_count].frequency =
+ CPUFREQ_TABLE_END;
+ powernow_table[data->acpi_data.state_count].index = 0;
+ data->powernow_table = powernow_table;
+
+ if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
+ print_basics(data);
+
+ /* notify BIOS that we exist */
+ acpi_processor_notify_smm(THIS_MODULE);
+
+ if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
+ printk(KERN_ERR PFX
+ "unable to alloc powernow_k8_data cpumask\n");
+ ret_val = -ENOMEM;
+ goto err_out_mem;
+ }
+
+ return 0;
+
+err_out_mem:
+ kfree(powernow_table);
+
+err_out:
+ acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+
+ /* data->acpi_data.state_count informs us at ->exit()
+ * whether ACPI was used */
+ data->acpi_data.state_count = 0;
+
+ return ret_val;
+}
+
+static int fill_powernow_table_pstate(struct powernow_k8_data *data,
+ struct cpufreq_frequency_table *powernow_table)
+{
+ int i;
+ u32 hi = 0, lo = 0;
+ rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi);
+ data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
+
+ for (i = 0; i < data->acpi_data.state_count; i++) {
+ u32 index;
+
+ index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
+ if (index > data->max_hw_pstate) {
+ printk(KERN_ERR PFX "invalid pstate %d - "
+ "bad value %d.\n", i, index);
+ printk(KERN_ERR PFX "Please report to BIOS "
+ "manufacturer\n");
+ invalidate_entry(powernow_table, i);
+ continue;
+ }
+
+ ps_to_as[index] = i;
+
+ /* Frequency may be rounded for these */
+ if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
+ || boot_cpu_data.x86 == 0x11) {
+
+ rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
+ if (!(hi & HW_PSTATE_VALID_MASK)) {
+ pr_debug("invalid pstate %d, ignoring\n", index);
+ invalidate_entry(powernow_table, i);
+ continue;
+ }
+
+ powernow_table[i].frequency =
+ freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
+ } else
+ powernow_table[i].frequency =
+ data->acpi_data.states[i].core_frequency * 1000;
+
+ powernow_table[i].index = index;
+ }
+ return 0;
+}
+
+static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
+ struct cpufreq_frequency_table *powernow_table)
+{
+ int i;
+
+ for (i = 0; i < data->acpi_data.state_count; i++) {
+ u32 fid;
+ u32 vid;
+ u32 freq, index;
+ u64 status, control;
+
+ if (data->exttype) {
+ status = data->acpi_data.states[i].status;
+ fid = status & EXT_FID_MASK;
+ vid = (status >> VID_SHIFT) & EXT_VID_MASK;
+ } else {
+ control = data->acpi_data.states[i].control;
+ fid = control & FID_MASK;
+ vid = (control >> VID_SHIFT) & VID_MASK;
+ }
+
+ pr_debug(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
+
+ index = fid | (vid<<8);
+ powernow_table[i].index = index;
+
+ freq = find_khz_freq_from_fid(fid);
+ powernow_table[i].frequency = freq;
+
+ /* verify frequency is OK */
+ if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
+ pr_debug("invalid freq %u kHz, ignoring\n", freq);
+ invalidate_entry(powernow_table, i);
+ continue;
+ }
+
+ /* verify voltage is OK -
+ * BIOSs are using "off" to indicate invalid */
+ if (vid == VID_OFF) {
+ pr_debug("invalid vid %u, ignoring\n", vid);
+ invalidate_entry(powernow_table, i);
+ continue;
+ }
+
+ if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
+ printk(KERN_INFO PFX "invalid freq entries "
+ "%u kHz vs. %u kHz\n", freq,
+ (unsigned int)
+ (data->acpi_data.states[i].core_frequency
+ * 1000));
+ invalidate_entry(powernow_table, i);
+ continue;
+ }
+ }
+ return 0;
+}
+
+static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
+{
+ if (data->acpi_data.state_count)
+ acpi_processor_unregister_performance(&data->acpi_data,
+ data->cpu);
+ free_cpumask_var(data->acpi_data.shared_cpu_map);
+}
+
+static int get_transition_latency(struct powernow_k8_data *data)
+{
+ int max_latency = 0;
+ int i;
+ for (i = 0; i < data->acpi_data.state_count; i++) {
+ int cur_latency = data->acpi_data.states[i].transition_latency
+ + data->acpi_data.states[i].bus_master_latency;
+ if (cur_latency > max_latency)
+ max_latency = cur_latency;
+ }
+ if (max_latency == 0) {
+ /*
+ * Fam 11h and later may return 0 as transition latency. This
+ * is intended and means "very fast". While cpufreq core and
+ * governors currently can handle that gracefully, better set it
+ * to 1 to avoid problems in the future.
+ */
+ if (boot_cpu_data.x86 < 0x11)
+ printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
+ "latency\n");
+ max_latency = 1;
+ }
+ /* value in usecs, needs to be in nanoseconds */
+ return 1000 * max_latency;
+}
+
+/* Take a frequency, and issue the fid/vid transition command */
+static int transition_frequency_fidvid(struct powernow_k8_data *data,
+ unsigned int index)
+{
+ u32 fid = 0;
+ u32 vid = 0;
+ int res, i;
+ struct cpufreq_freqs freqs;
+
+ pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
+
+ /* fid/vid correctness check for k8 */
+ /* fid are the lower 8 bits of the index we stored into
+ * the cpufreq frequency table in find_psb_table, vid
+ * are the upper 8 bits.
+ */
+ fid = data->powernow_table[index].index & 0xFF;
+ vid = (data->powernow_table[index].index & 0xFF00) >> 8;
+
+ pr_debug("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
+
+ if (query_current_values_with_pending_wait(data))
+ return 1;
+
+ if ((data->currvid == vid) && (data->currfid == fid)) {
+ pr_debug("target matches current values (fid 0x%x, vid 0x%x)\n",
+ fid, vid);
+ return 0;
+ }
+
+ pr_debug("cpu %d, changing to fid 0x%x, vid 0x%x\n",
+ smp_processor_id(), fid, vid);
+ freqs.old = find_khz_freq_from_fid(data->currfid);
+ freqs.new = find_khz_freq_from_fid(fid);
+
+ for_each_cpu(i, data->available_cores) {
+ freqs.cpu = i;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ }
+
+ res = transition_fid_vid(data, fid, vid);
+ if (res)
+ return res;
+
+ freqs.new = find_khz_freq_from_fid(data->currfid);
+
+ for_each_cpu(i, data->available_cores) {
+ freqs.cpu = i;
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ }
+ return res;
+}
+
+/* Take a frequency, and issue the hardware pstate transition command */
+static int transition_frequency_pstate(struct powernow_k8_data *data,
+ unsigned int index)
+{
+ u32 pstate = 0;
+ int res, i;
+ struct cpufreq_freqs freqs;
+
+ pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
+
+ /* get MSR index for hardware pstate transition */
+ pstate = index & HW_PSTATE_MASK;
+ if (pstate > data->max_hw_pstate)
+ return -EINVAL;
+
+ freqs.old = find_khz_freq_from_pstate(data->powernow_table,
+ data->currpstate);
+ freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
+
+ for_each_cpu(i, data->available_cores) {
+ freqs.cpu = i;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ }
+
+ res = transition_pstate(data, pstate);
+ freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
+
+ for_each_cpu(i, data->available_cores) {
+ freqs.cpu = i;
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ }
+ return res;
+}
+
+/* Driver entry point to switch to the target frequency */
+static int powernowk8_target(struct cpufreq_policy *pol,
+ unsigned targfreq, unsigned relation)
+{
+ cpumask_var_t oldmask;
+ struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
+ u32 checkfid;
+ u32 checkvid;
+ unsigned int newstate;
+ int ret = -EIO;
+
+ if (!data)
+ return -EINVAL;
+
+ checkfid = data->currfid;
+ checkvid = data->currvid;
+
+ /* only run on specific CPU from here on. */
+ /* This is poor form: use a workqueue or smp_call_function_single */
+ if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_copy(oldmask, tsk_cpus_allowed(current));
+ set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
+
+ if (smp_processor_id() != pol->cpu) {
+ printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
+ goto err_out;
+ }
+
+ if (pending_bit_stuck()) {
+ printk(KERN_ERR PFX "failing targ, change pending bit set\n");
+ goto err_out;
+ }
+
+ pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
+ pol->cpu, targfreq, pol->min, pol->max, relation);
+
+ if (query_current_values_with_pending_wait(data))
+ goto err_out;
+
+ if (cpu_family != CPU_HW_PSTATE) {
+ pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
+ data->currfid, data->currvid);
+
+ if ((checkvid != data->currvid) ||
+ (checkfid != data->currfid)) {
+ printk(KERN_INFO PFX
+ "error - out of sync, fix 0x%x 0x%x, "
+ "vid 0x%x 0x%x\n",
+ checkfid, data->currfid,
+ checkvid, data->currvid);
+ }
+ }
+
+ if (cpufreq_frequency_table_target(pol, data->powernow_table,
+ targfreq, relation, &newstate))
+ goto err_out;
+
+ mutex_lock(&fidvid_mutex);
+
+ powernow_k8_acpi_pst_values(data, newstate);
+
+ if (cpu_family == CPU_HW_PSTATE)
+ ret = transition_frequency_pstate(data,
+ data->powernow_table[newstate].index);
+ else
+ ret = transition_frequency_fidvid(data, newstate);
+ if (ret) {
+ printk(KERN_ERR PFX "transition frequency failed\n");
+ ret = 1;
+ mutex_unlock(&fidvid_mutex);
+ goto err_out;
+ }
+ mutex_unlock(&fidvid_mutex);
+
+ if (cpu_family == CPU_HW_PSTATE)
+ pol->cur = find_khz_freq_from_pstate(data->powernow_table,
+ data->powernow_table[newstate].index);
+ else
+ pol->cur = find_khz_freq_from_fid(data->currfid);
+ ret = 0;
+
+err_out:
+ set_cpus_allowed_ptr(current, oldmask);
+ free_cpumask_var(oldmask);
+ return ret;
+}
+
+/* Driver entry point to verify the policy and range of frequencies */
+static int powernowk8_verify(struct cpufreq_policy *pol)
+{
+ struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
+
+ if (!data)
+ return -EINVAL;
+
+ return cpufreq_frequency_table_verify(pol, data->powernow_table);
+}
+
+struct init_on_cpu {
+ struct powernow_k8_data *data;
+ int rc;
+};
+
+static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
+{
+ struct init_on_cpu *init_on_cpu = _init_on_cpu;
+
+ if (pending_bit_stuck()) {
+ printk(KERN_ERR PFX "failing init, change pending bit set\n");
+ init_on_cpu->rc = -ENODEV;
+ return;
+ }
+
+ if (query_current_values_with_pending_wait(init_on_cpu->data)) {
+ init_on_cpu->rc = -ENODEV;
+ return;
+ }
+
+ if (cpu_family == CPU_OPTERON)
+ fidvid_msr_init();
+
+ init_on_cpu->rc = 0;
+}
+
+/* per CPU init entry point to the driver */
+static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
+{
+ static const char ACPI_PSS_BIOS_BUG_MSG[] =
+ KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
+ FW_BUG PFX "Try again with latest BIOS.\n";
+ struct powernow_k8_data *data;
+ struct init_on_cpu init_on_cpu;
+ int rc;
+ struct cpuinfo_x86 *c = &cpu_data(pol->cpu);
+
+ if (!cpu_online(pol->cpu))
+ return -ENODEV;
+
+ smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1);
+ if (rc)
+ return -ENODEV;
+
+ data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
+ if (!data) {
+ printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
+ return -ENOMEM;
+ }
+
+ data->cpu = pol->cpu;
+ data->currpstate = HW_PSTATE_INVALID;
+
+ if (powernow_k8_cpu_init_acpi(data)) {
+ /*
+ * Use the PSB BIOS structure. This is only available on
+ * an UP version, and is deprecated by AMD.
+ */
+ if (num_online_cpus() != 1) {
+ printk_once(ACPI_PSS_BIOS_BUG_MSG);
+ goto err_out;
+ }
+ if (pol->cpu != 0) {
+ printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
+ "CPU other than CPU0. Complain to your BIOS "
+ "vendor.\n");
+ goto err_out;
+ }
+ rc = find_psb_table(data);
+ if (rc)
+ goto err_out;
+
+ /* Take a crude guess here.
+ * That guess was in microseconds, so multiply with 1000 */
+ pol->cpuinfo.transition_latency = (
+ ((data->rvo + 8) * data->vstable * VST_UNITS_20US) +
+ ((1 << data->irt) * 30)) * 1000;
+ } else /* ACPI _PSS objects available */
+ pol->cpuinfo.transition_latency = get_transition_latency(data);
+
+ /* only run on specific CPU from here on */
+ init_on_cpu.data = data;
+ smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu,
+ &init_on_cpu, 1);
+ rc = init_on_cpu.rc;
+ if (rc != 0)
+ goto err_out_exit_acpi;
+
+ if (cpu_family == CPU_HW_PSTATE)
+ cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
+ else
+ cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
+ data->available_cores = pol->cpus;
+
+ if (cpu_family == CPU_HW_PSTATE)
+ pol->cur = find_khz_freq_from_pstate(data->powernow_table,
+ data->currpstate);
+ else
+ pol->cur = find_khz_freq_from_fid(data->currfid);
+ pr_debug("policy current frequency %d kHz\n", pol->cur);
+
+ /* min/max the cpu is capable of */
+ if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
+ printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n");
+ powernow_k8_cpu_exit_acpi(data);
+ kfree(data->powernow_table);
+ kfree(data);
+ return -EINVAL;
+ }
+
+ /* Check for APERF/MPERF support in hardware */
+ if (cpu_has(c, X86_FEATURE_APERFMPERF))
+ cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf;
+
+ cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
+
+ if (cpu_family == CPU_HW_PSTATE)
+ pr_debug("cpu_init done, current pstate 0x%x\n",
+ data->currpstate);
+ else
+ pr_debug("cpu_init done, current fid 0x%x, vid 0x%x\n",
+ data->currfid, data->currvid);
+
+ per_cpu(powernow_data, pol->cpu) = data;
+
+ return 0;
+
+err_out_exit_acpi:
+ powernow_k8_cpu_exit_acpi(data);
+
+err_out:
+ kfree(data);
+ return -ENODEV;
+}
+
+static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
+{
+ struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
+
+ if (!data)
+ return -EINVAL;
+
+ powernow_k8_cpu_exit_acpi(data);
+
+ cpufreq_frequency_table_put_attr(pol->cpu);
+
+ kfree(data->powernow_table);
+ kfree(data);
+ per_cpu(powernow_data, pol->cpu) = NULL;
+
+ return 0;
+}
+
+static void query_values_on_cpu(void *_err)
+{
+ int *err = _err;
+ struct powernow_k8_data *data = __this_cpu_read(powernow_data);
+
+ *err = query_current_values_with_pending_wait(data);
+}
+
+static unsigned int powernowk8_get(unsigned int cpu)
+{
+ struct powernow_k8_data *data = per_cpu(powernow_data, cpu);
+ unsigned int khz = 0;
+ int err;
+
+ if (!data)
+ return 0;
+
+ smp_call_function_single(cpu, query_values_on_cpu, &err, true);
+ if (err)
+ goto out;
+
+ if (cpu_family == CPU_HW_PSTATE)
+ khz = find_khz_freq_from_pstate(data->powernow_table,
+ data->currpstate);
+ else
+ khz = find_khz_freq_from_fid(data->currfid);
+
+
+out:
+ return khz;
+}
+
+static void _cpb_toggle_msrs(bool t)
+{
+ int cpu;
+
+ get_online_cpus();
+
+ rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
+
+ for_each_cpu(cpu, cpu_online_mask) {
+ struct msr *reg = per_cpu_ptr(msrs, cpu);
+ if (t)
+ reg->l &= ~BIT(25);
+ else
+ reg->l |= BIT(25);
+ }
+ wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
+
+ put_online_cpus();
+}
+
+/*
+ * Switch on/off core performance boosting.
+ *
+ * 0=disable
+ * 1=enable.
+ */
+static void cpb_toggle(bool t)
+{
+ if (!cpb_capable)
+ return;
+
+ if (t && !cpb_enabled) {
+ cpb_enabled = true;
+ _cpb_toggle_msrs(t);
+ printk(KERN_INFO PFX "Core Boosting enabled.\n");
+ } else if (!t && cpb_enabled) {
+ cpb_enabled = false;
+ _cpb_toggle_msrs(t);
+ printk(KERN_INFO PFX "Core Boosting disabled.\n");
+ }
+}
+
+static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
+ size_t count)
+{
+ int ret = -EINVAL;
+ unsigned long val = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (!ret && (val == 0 || val == 1) && cpb_capable)
+ cpb_toggle(val);
+ else
+ return -EINVAL;
+
+ return count;
+}
+
+static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
+{
+ return sprintf(buf, "%u\n", cpb_enabled);
+}
+
+#define define_one_rw(_name) \
+static struct freq_attr _name = \
+__ATTR(_name, 0644, show_##_name, store_##_name)
+
+define_one_rw(cpb);
+
+static struct freq_attr *powernow_k8_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ &cpb,
+ NULL,
+};
+
+static struct cpufreq_driver cpufreq_amd64_driver = {
+ .verify = powernowk8_verify,
+ .target = powernowk8_target,
+ .bios_limit = acpi_processor_get_bios_limit,
+ .init = powernowk8_cpu_init,
+ .exit = __devexit_p(powernowk8_cpu_exit),
+ .get = powernowk8_get,
+ .name = "powernow-k8",
+ .owner = THIS_MODULE,
+ .attr = powernow_k8_attr,
+};
+
+/*
+ * Clear the boost-disable flag on the CPU_DOWN path so that this cpu
+ * cannot block the remaining ones from boosting. On the CPU_UP path we
+ * simply keep the boost-disable flag in sync with the current global
+ * state.
+ */
+static int cpb_notify(struct notifier_block *nb, unsigned long action,
+ void *hcpu)
+{
+ unsigned cpu = (long)hcpu;
+ u32 lo, hi;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+
+ if (!cpb_enabled) {
+ rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
+ lo |= BIT(25);
+ wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
+ }
+ break;
+
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
+ lo &= ~BIT(25);
+ wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block cpb_nb = {
+ .notifier_call = cpb_notify,
+};
+
+/* driver entry point for init */
+static int __cpuinit powernowk8_init(void)
+{
+ unsigned int i, supported_cpus = 0, cpu;
+ int rv;
+
+ for_each_online_cpu(i) {
+ int rc;
+ smp_call_function_single(i, check_supported_cpu, &rc, 1);
+ if (rc == 0)
+ supported_cpus++;
+ }
+
+ if (supported_cpus != num_online_cpus())
+ return -ENODEV;
+
+ printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n",
+ num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus);
+
+ if (boot_cpu_has(X86_FEATURE_CPB)) {
+
+ cpb_capable = true;
+
+ msrs = msrs_alloc();
+ if (!msrs) {
+ printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
+ return -ENOMEM;
+ }
+
+ register_cpu_notifier(&cpb_nb);
+
+ rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
+
+ for_each_cpu(cpu, cpu_online_mask) {
+ struct msr *reg = per_cpu_ptr(msrs, cpu);
+ cpb_enabled |= !(!!(reg->l & BIT(25)));
+ }
+
+ printk(KERN_INFO PFX "Core Performance Boosting: %s.\n",
+ (cpb_enabled ? "on" : "off"));
+ }
+
+ rv = cpufreq_register_driver(&cpufreq_amd64_driver);
+ if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
+ unregister_cpu_notifier(&cpb_nb);
+ msrs_free(msrs);
+ msrs = NULL;
+ }
+ return rv;
+}
+
+/* driver entry point for term */
+static void __exit powernowk8_exit(void)
+{
+ pr_debug("exit\n");
+
+ if (boot_cpu_has(X86_FEATURE_CPB)) {
+ msrs_free(msrs);
+ msrs = NULL;
+
+ unregister_cpu_notifier(&cpb_nb);
+ }
+
+ cpufreq_unregister_driver(&cpufreq_amd64_driver);
+}
+
+MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
+ "Mark Langsdorf <mark.langsdorf@amd.com>");
+MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
+MODULE_LICENSE("GPL");
+
+late_initcall(powernowk8_init);
+module_exit(powernowk8_exit);
diff --git a/drivers/cpufreq/powernow-k8.h b/drivers/cpufreq/powernow-k8.h
new file mode 100644
index 00000000..3744d26c
--- /dev/null
+++ b/drivers/cpufreq/powernow-k8.h
@@ -0,0 +1,222 @@
+/*
+ * (c) 2003-2006 Advanced Micro Devices, Inc.
+ * Your use of this code is subject to the terms and conditions of the
+ * GNU general public license version 2. See "COPYING" or
+ * http://www.gnu.org/licenses/gpl.html
+ */
+
+enum pstate {
+ HW_PSTATE_INVALID = 0xff,
+ HW_PSTATE_0 = 0,
+ HW_PSTATE_1 = 1,
+ HW_PSTATE_2 = 2,
+ HW_PSTATE_3 = 3,
+ HW_PSTATE_4 = 4,
+ HW_PSTATE_5 = 5,
+ HW_PSTATE_6 = 6,
+ HW_PSTATE_7 = 7,
+};
+
+struct powernow_k8_data {
+ unsigned int cpu;
+
+ u32 numps; /* number of p-states */
+ u32 batps; /* number of p-states supported on battery */
+ u32 max_hw_pstate; /* maximum legal hardware pstate */
+
+ /* these values are constant when the PSB is used to determine
+ * vid/fid pairings, but are modified during the ->target() call
+ * when ACPI is used */
+ u32 rvo; /* ramp voltage offset */
+ u32 irt; /* isochronous relief time */
+ u32 vidmvs; /* usable value calculated from mvs */
+ u32 vstable; /* voltage stabilization time, units 20 us */
+ u32 plllock; /* pll lock time, units 1 us */
+ u32 exttype; /* extended interface = 1 */
+
+ /* keep track of the current fid / vid or pstate */
+ u32 currvid;
+ u32 currfid;
+ enum pstate currpstate;
+
+ /* the powernow_table includes all frequency and vid/fid pairings:
+ * fid are the lower 8 bits of the index, vid are the upper 8 bits.
+ * frequency is in kHz */
+ struct cpufreq_frequency_table *powernow_table;
+
+ /* the acpi table needs to be kept. it's only available if ACPI was
+ * used to determine valid frequency/vid/fid states */
+ struct acpi_processor_performance acpi_data;
+
+ /* we need to keep track of associated cores, but let cpufreq
+ * handle hotplug events - so just point at cpufreq pol->cpus
+ * structure */
+ struct cpumask *available_cores;
+};
+
+/* processor's cpuid instruction support */
+#define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */
+#define CPUID_XFAM 0x0ff00000 /* extended family */
+#define CPUID_XFAM_K8 0
+#define CPUID_XMOD 0x000f0000 /* extended model */
+#define CPUID_XMOD_REV_MASK 0x000c0000
+#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */
+#define CPUID_USE_XFAM_XMOD 0x00000f00
+#define CPUID_GET_MAX_CAPABILITIES 0x80000000
+#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007
+#define P_STATE_TRANSITION_CAPABLE 6
+
+/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */
+/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */
+/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */
+/* the register number is placed in ecx, and the data is returned in edx:eax. */
+
+#define MSR_FIDVID_CTL 0xc0010041
+#define MSR_FIDVID_STATUS 0xc0010042
+
+/* Field definitions within the FID VID Low Control MSR : */
+#define MSR_C_LO_INIT_FID_VID 0x00010000
+#define MSR_C_LO_NEW_VID 0x00003f00
+#define MSR_C_LO_NEW_FID 0x0000003f
+#define MSR_C_LO_VID_SHIFT 8
+
+/* Field definitions within the FID VID High Control MSR : */
+#define MSR_C_HI_STP_GNT_TO 0x000fffff
+
+/* Field definitions within the FID VID Low Status MSR : */
+#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */
+#define MSR_S_LO_MAX_RAMP_VID 0x3f000000
+#define MSR_S_LO_MAX_FID 0x003f0000
+#define MSR_S_LO_START_FID 0x00003f00
+#define MSR_S_LO_CURRENT_FID 0x0000003f
+
+/* Field definitions within the FID VID High Status MSR : */
+#define MSR_S_HI_MIN_WORKING_VID 0x3f000000
+#define MSR_S_HI_MAX_WORKING_VID 0x003f0000
+#define MSR_S_HI_START_VID 0x00003f00
+#define MSR_S_HI_CURRENT_VID 0x0000003f
+#define MSR_C_HI_STP_GNT_BENIGN 0x00000001
+
+
+/* Hardware Pstate _PSS and MSR definitions */
+#define USE_HW_PSTATE 0x00000080
+#define HW_PSTATE_MASK 0x00000007
+#define HW_PSTATE_VALID_MASK 0x80000000
+#define HW_PSTATE_MAX_MASK 0x000000f0
+#define HW_PSTATE_MAX_SHIFT 4
+#define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */
+#define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */
+#define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */
+#define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */
+
+/* define the two driver architectures */
+#define CPU_OPTERON 0
+#define CPU_HW_PSTATE 1
+
+
+/*
+ * There are restrictions frequencies have to follow:
+ * - only 1 entry in the low fid table ( <=1.4GHz )
+ * - lowest entry in the high fid table must be >= 2 * the entry in the
+ * low fid table
+ * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry
+ * in the low fid table
+ * - the parts can only step at <= 200 MHz intervals, odd fid values are
+ * supported in revision G and later revisions.
+ * - lowest frequency must be >= interprocessor hypertransport link speed
+ * (only applies to MP systems obviously)
+ */
+
+/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */
+#define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */
+#define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */
+
+#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */
+#define HI_VCOFREQ_TABLE_BOTTOM 1600
+
+#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */
+
+#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */
+#define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */
+
+#define MIN_FREQ 800 /* Min and max freqs, per spec */
+#define MAX_FREQ 5000
+
+#define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */
+#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */
+
+#define VID_OFF 0x3f
+
+#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */
+
+#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */
+
+#define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */
+#define VST_UNITS_20US 20 /* Voltage Stabilization Time is in units of 20us */
+
+/*
+ * Most values of interest are encoded in a single field of the _PSS
+ * entries: the "control" value.
+ */
+
+#define IRT_SHIFT 30
+#define RVO_SHIFT 28
+#define EXT_TYPE_SHIFT 27
+#define PLL_L_SHIFT 20
+#define MVS_SHIFT 18
+#define VST_SHIFT 11
+#define VID_SHIFT 6
+#define IRT_MASK 3
+#define RVO_MASK 3
+#define EXT_TYPE_MASK 1
+#define PLL_L_MASK 0x7f
+#define MVS_MASK 3
+#define VST_MASK 0x7f
+#define VID_MASK 0x1f
+#define FID_MASK 0x1f
+#define EXT_VID_MASK 0x3f
+#define EXT_FID_MASK 0x3f
+
+
+/*
+ * Version 1.4 of the PSB table. This table is constructed by BIOS and is
+ * to tell the OS's power management driver which VIDs and FIDs are
+ * supported by this particular processor.
+ * If the data in the PSB / PST is wrong, then this driver will program the
+ * wrong values into hardware, which is very likely to lead to a crash.
+ */
+
+#define PSB_ID_STRING "AMDK7PNOW!"
+#define PSB_ID_STRING_LEN 10
+
+#define PSB_VERSION_1_4 0x14
+
+struct psb_s {
+ u8 signature[10];
+ u8 tableversion;
+ u8 flags1;
+ u16 vstable;
+ u8 flags2;
+ u8 num_tables;
+ u32 cpuid;
+ u8 plllocktime;
+ u8 maxfid;
+ u8 maxvid;
+ u8 numps;
+};
+
+/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */
+struct pst_s {
+ u8 fid;
+ u8 vid;
+};
+
+static int core_voltage_pre_transition(struct powernow_k8_data *data,
+ u32 reqvid, u32 regfid);
+static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
+static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
+
+static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
+
+static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
+static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c
new file mode 100644
index 00000000..1e205e6b
--- /dev/null
+++ b/drivers/cpufreq/sc520_freq.c
@@ -0,0 +1,192 @@
+/*
+ * sc520_freq.c: cpufreq driver for the AMD Elan sc520
+ *
+ * Copyright (C) 2005 Sean Young <sean@mess.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Based on elanfreq.c
+ *
+ * 2005-03-30: - initial revision
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/delay.h>
+#include <linux/cpufreq.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+
+#include <asm/msr.h>
+
+#define MMCR_BASE 0xfffef000 /* The default base address */
+#define OFFS_CPUCTL 0x2 /* CPU Control Register */
+
+static __u8 __iomem *cpuctl;
+
+#define PFX "sc520_freq: "
+
+static struct cpufreq_frequency_table sc520_freq_table[] = {
+ {0x01, 100000},
+ {0x02, 133000},
+ {0, CPUFREQ_TABLE_END},
+};
+
+static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
+{
+ u8 clockspeed_reg = *cpuctl;
+
+ switch (clockspeed_reg & 0x03) {
+ default:
+ printk(KERN_ERR PFX "error: cpuctl register has unexpected "
+ "value %02x\n", clockspeed_reg);
+ case 0x01:
+ return 100000;
+ case 0x02:
+ return 133000;
+ }
+}
+
+static void sc520_freq_set_cpu_state(unsigned int state)
+{
+
+ struct cpufreq_freqs freqs;
+ u8 clockspeed_reg;
+
+ freqs.old = sc520_freq_get_cpu_frequency(0);
+ freqs.new = sc520_freq_table[state].frequency;
+ freqs.cpu = 0; /* AMD Elan is UP */
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+ pr_debug("attempting to set frequency to %i kHz\n",
+ sc520_freq_table[state].frequency);
+
+ local_irq_disable();
+
+ clockspeed_reg = *cpuctl & ~0x03;
+ *cpuctl = clockspeed_reg | sc520_freq_table[state].index;
+
+ local_irq_enable();
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+};
+
+static int sc520_freq_verify(struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]);
+}
+
+static int sc520_freq_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ unsigned int newstate = 0;
+
+ if (cpufreq_frequency_table_target(policy, sc520_freq_table,
+ target_freq, relation, &newstate))
+ return -EINVAL;
+
+ sc520_freq_set_cpu_state(newstate);
+
+ return 0;
+}
+
+
+/*
+ * Module init and exit code
+ */
+
+static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+ int result;
+
+ /* capability check */
+ if (c->x86_vendor != X86_VENDOR_AMD ||
+ c->x86 != 4 || c->x86_model != 9)
+ return -ENODEV;
+
+ /* cpuinfo and default policy values */
+ policy->cpuinfo.transition_latency = 1000000; /* 1ms */
+ policy->cur = sc520_freq_get_cpu_frequency(0);
+
+ result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table);
+ if (result)
+ return result;
+
+ cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu);
+
+ return 0;
+}
+
+
+static int sc520_freq_cpu_exit(struct cpufreq_policy *policy)
+{
+ cpufreq_frequency_table_put_attr(policy->cpu);
+ return 0;
+}
+
+
+static struct freq_attr *sc520_freq_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+
+static struct cpufreq_driver sc520_freq_driver = {
+ .get = sc520_freq_get_cpu_frequency,
+ .verify = sc520_freq_verify,
+ .target = sc520_freq_target,
+ .init = sc520_freq_cpu_init,
+ .exit = sc520_freq_cpu_exit,
+ .name = "sc520_freq",
+ .owner = THIS_MODULE,
+ .attr = sc520_freq_attr,
+};
+
+
+static int __init sc520_freq_init(void)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+ int err;
+
+ /* Test if we have the right hardware */
+ if (c->x86_vendor != X86_VENDOR_AMD ||
+ c->x86 != 4 || c->x86_model != 9) {
+ pr_debug("no Elan SC520 processor found!\n");
+ return -ENODEV;
+ }
+ cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
+ if (!cpuctl) {
+ printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
+ return -ENOMEM;
+ }
+
+ err = cpufreq_register_driver(&sc520_freq_driver);
+ if (err)
+ iounmap(cpuctl);
+
+ return err;
+}
+
+
+static void __exit sc520_freq_exit(void)
+{
+ cpufreq_unregister_driver(&sc520_freq_driver);
+ iounmap(cpuctl);
+}
+
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sean Young <sean@mess.org>");
+MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU");
+
+module_init(sc520_freq_init);
+module_exit(sc520_freq_exit);
+
diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c
new file mode 100644
index 00000000..6ea3455d
--- /dev/null
+++ b/drivers/cpufreq/speedstep-centrino.c
@@ -0,0 +1,633 @@
+/*
+ * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium
+ * M (part of the Centrino chipset).
+ *
+ * Since the original Pentium M, most new Intel CPUs support Enhanced
+ * SpeedStep.
+ *
+ * Despite the "SpeedStep" in the name, this is almost entirely unlike
+ * traditional SpeedStep.
+ *
+ * Modelled on speedstep.c
+ *
+ * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/sched.h> /* current */
+#include <linux/delay.h>
+#include <linux/compiler.h>
+#include <linux/gfp.h>
+
+#include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
+
+#define PFX "speedstep-centrino: "
+#define MAINTAINER "cpufreq@vger.kernel.org"
+
+#define INTEL_MSR_RANGE (0xffff)
+
+struct cpu_id
+{
+ __u8 x86; /* CPU family */
+ __u8 x86_model; /* model */
+ __u8 x86_mask; /* stepping */
+};
+
+enum {
+ CPU_BANIAS,
+ CPU_DOTHAN_A1,
+ CPU_DOTHAN_A2,
+ CPU_DOTHAN_B0,
+ CPU_MP4HT_D0,
+ CPU_MP4HT_E0,
+};
+
+static const struct cpu_id cpu_ids[] = {
+ [CPU_BANIAS] = { 6, 9, 5 },
+ [CPU_DOTHAN_A1] = { 6, 13, 1 },
+ [CPU_DOTHAN_A2] = { 6, 13, 2 },
+ [CPU_DOTHAN_B0] = { 6, 13, 6 },
+ [CPU_MP4HT_D0] = {15, 3, 4 },
+ [CPU_MP4HT_E0] = {15, 4, 1 },
+};
+#define N_IDS ARRAY_SIZE(cpu_ids)
+
+struct cpu_model
+{
+ const struct cpu_id *cpu_id;
+ const char *model_name;
+ unsigned max_freq; /* max clock in kHz */
+
+ struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
+};
+static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
+ const struct cpu_id *x);
+
+/* Operating points for current CPU */
+static DEFINE_PER_CPU(struct cpu_model *, centrino_model);
+static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu);
+
+static struct cpufreq_driver centrino_driver;
+
+#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE
+
+/* Computes the correct form for IA32_PERF_CTL MSR for a particular
+ frequency/voltage operating point; frequency in MHz, volts in mV.
+ This is stored as "index" in the structure. */
+#define OP(mhz, mv) \
+ { \
+ .frequency = (mhz) * 1000, \
+ .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \
+ }
+
+/*
+ * These voltage tables were derived from the Intel Pentium M
+ * datasheet, document 25261202.pdf, Table 5. I have verified they
+ * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium
+ * M.
+ */
+
+/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */
+static struct cpufreq_frequency_table banias_900[] =
+{
+ OP(600, 844),
+ OP(800, 988),
+ OP(900, 1004),
+ { .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */
+static struct cpufreq_frequency_table banias_1000[] =
+{
+ OP(600, 844),
+ OP(800, 972),
+ OP(900, 988),
+ OP(1000, 1004),
+ { .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */
+static struct cpufreq_frequency_table banias_1100[] =
+{
+ OP( 600, 956),
+ OP( 800, 1020),
+ OP( 900, 1100),
+ OP(1000, 1164),
+ OP(1100, 1180),
+ { .frequency = CPUFREQ_TABLE_END }
+};
+
+
+/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */
+static struct cpufreq_frequency_table banias_1200[] =
+{
+ OP( 600, 956),
+ OP( 800, 1004),
+ OP( 900, 1020),
+ OP(1000, 1100),
+ OP(1100, 1164),
+ OP(1200, 1180),
+ { .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.30GHz (Banias) */
+static struct cpufreq_frequency_table banias_1300[] =
+{
+ OP( 600, 956),
+ OP( 800, 1260),
+ OP(1000, 1292),
+ OP(1200, 1356),
+ OP(1300, 1388),
+ { .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.40GHz (Banias) */
+static struct cpufreq_frequency_table banias_1400[] =
+{
+ OP( 600, 956),
+ OP( 800, 1180),
+ OP(1000, 1308),
+ OP(1200, 1436),
+ OP(1400, 1484),
+ { .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.50GHz (Banias) */
+static struct cpufreq_frequency_table banias_1500[] =
+{
+ OP( 600, 956),
+ OP( 800, 1116),
+ OP(1000, 1228),
+ OP(1200, 1356),
+ OP(1400, 1452),
+ OP(1500, 1484),
+ { .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.60GHz (Banias) */
+static struct cpufreq_frequency_table banias_1600[] =
+{
+ OP( 600, 956),
+ OP( 800, 1036),
+ OP(1000, 1164),
+ OP(1200, 1276),
+ OP(1400, 1420),
+ OP(1600, 1484),
+ { .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.70GHz (Banias) */
+static struct cpufreq_frequency_table banias_1700[] =
+{
+ OP( 600, 956),
+ OP( 800, 1004),
+ OP(1000, 1116),
+ OP(1200, 1228),
+ OP(1400, 1308),
+ OP(1700, 1484),
+ { .frequency = CPUFREQ_TABLE_END }
+};
+#undef OP
+
+#define _BANIAS(cpuid, max, name) \
+{ .cpu_id = cpuid, \
+ .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \
+ .max_freq = (max)*1000, \
+ .op_points = banias_##max, \
+}
+#define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max)
+
+/* CPU models, their operating frequency range, and freq/voltage
+ operating points */
+static struct cpu_model models[] =
+{
+ _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"),
+ BANIAS(1000),
+ BANIAS(1100),
+ BANIAS(1200),
+ BANIAS(1300),
+ BANIAS(1400),
+ BANIAS(1500),
+ BANIAS(1600),
+ BANIAS(1700),
+
+ /* NULL model_name is a wildcard */
+ { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL },
+ { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL },
+ { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL },
+ { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL },
+ { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL },
+
+ { NULL, }
+};
+#undef _BANIAS
+#undef BANIAS
+
+static int centrino_cpu_init_table(struct cpufreq_policy *policy)
+{
+ struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
+ struct cpu_model *model;
+
+ for(model = models; model->cpu_id != NULL; model++)
+ if (centrino_verify_cpu_id(cpu, model->cpu_id) &&
+ (model->model_name == NULL ||
+ strcmp(cpu->x86_model_id, model->model_name) == 0))
+ break;
+
+ if (model->cpu_id == NULL) {
+ /* No match at all */
+ pr_debug("no support for CPU model \"%s\": "
+ "send /proc/cpuinfo to " MAINTAINER "\n",
+ cpu->x86_model_id);
+ return -ENOENT;
+ }
+
+ if (model->op_points == NULL) {
+ /* Matched a non-match */
+ pr_debug("no table support for CPU model \"%s\"\n",
+ cpu->x86_model_id);
+ pr_debug("try using the acpi-cpufreq driver\n");
+ return -ENOENT;
+ }
+
+ per_cpu(centrino_model, policy->cpu) = model;
+
+ pr_debug("found \"%s\": max frequency: %dkHz\n",
+ model->model_name, model->max_freq);
+
+ return 0;
+}
+
+#else
+static inline int centrino_cpu_init_table(struct cpufreq_policy *policy)
+{
+ return -ENODEV;
+}
+#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
+
+static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
+ const struct cpu_id *x)
+{
+ if ((c->x86 == x->x86) &&
+ (c->x86_model == x->x86_model) &&
+ (c->x86_mask == x->x86_mask))
+ return 1;
+ return 0;
+}
+
+/* To be called only after centrino_model is initialized */
+static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
+{
+ int i;
+
+ /*
+ * Extract clock in kHz from PERF_CTL value
+ * for centrino, as some DSDTs are buggy.
+ * Ideally, this can be done using the acpi_data structure.
+ */
+ if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) ||
+ (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) ||
+ (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) {
+ msr = (msr >> 8) & 0xff;
+ return msr * 100000;
+ }
+
+ if ((!per_cpu(centrino_model, cpu)) ||
+ (!per_cpu(centrino_model, cpu)->op_points))
+ return 0;
+
+ msr &= 0xffff;
+ for (i = 0;
+ per_cpu(centrino_model, cpu)->op_points[i].frequency
+ != CPUFREQ_TABLE_END;
+ i++) {
+ if (msr == per_cpu(centrino_model, cpu)->op_points[i].index)
+ return per_cpu(centrino_model, cpu)->
+ op_points[i].frequency;
+ }
+ if (failsafe)
+ return per_cpu(centrino_model, cpu)->op_points[i-1].frequency;
+ else
+ return 0;
+}
+
+/* Return the current CPU frequency in kHz */
+static unsigned int get_cur_freq(unsigned int cpu)
+{
+ unsigned l, h;
+ unsigned clock_freq;
+
+ rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h);
+ clock_freq = extract_clock(l, cpu, 0);
+
+ if (unlikely(clock_freq == 0)) {
+ /*
+ * On some CPUs, we can see transient MSR values (which are
+ * not present in _PSS), while CPU is doing some automatic
+ * P-state transition (like TM2). Get the last freq set
+ * in PERF_CTL.
+ */
+ rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h);
+ clock_freq = extract_clock(l, cpu, 1);
+ }
+ return clock_freq;
+}
+
+
+static int centrino_cpu_init(struct cpufreq_policy *policy)
+{
+ struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
+ unsigned freq;
+ unsigned l, h;
+ int ret;
+ int i;
+
+ /* Only Intel makes Enhanced Speedstep-capable CPUs */
+ if (cpu->x86_vendor != X86_VENDOR_INTEL ||
+ !cpu_has(cpu, X86_FEATURE_EST))
+ return -ENODEV;
+
+ if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC))
+ centrino_driver.flags |= CPUFREQ_CONST_LOOPS;
+
+ if (policy->cpu != 0)
+ return -ENODEV;
+
+ for (i = 0; i < N_IDS; i++)
+ if (centrino_verify_cpu_id(cpu, &cpu_ids[i]))
+ break;
+
+ if (i != N_IDS)
+ per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
+
+ if (!per_cpu(centrino_cpu, policy->cpu)) {
+ pr_debug("found unsupported CPU with "
+ "Enhanced SpeedStep: send /proc/cpuinfo to "
+ MAINTAINER "\n");
+ return -ENODEV;
+ }
+
+ if (centrino_cpu_init_table(policy)) {
+ return -ENODEV;
+ }
+
+ /* Check to see if Enhanced SpeedStep is enabled, and try to
+ enable it if not. */
+ rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+
+ if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+ l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
+ pr_debug("trying to enable Enhanced SpeedStep (%x)\n", l);
+ wrmsr(MSR_IA32_MISC_ENABLE, l, h);
+
+ /* check to see if it stuck */
+ rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+ if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+ printk(KERN_INFO PFX
+ "couldn't enable Enhanced SpeedStep\n");
+ return -ENODEV;
+ }
+ }
+
+ freq = get_cur_freq(policy->cpu);
+ policy->cpuinfo.transition_latency = 10000;
+ /* 10uS transition latency */
+ policy->cur = freq;
+
+ pr_debug("centrino_cpu_init: cur=%dkHz\n", policy->cur);
+
+ ret = cpufreq_frequency_table_cpuinfo(policy,
+ per_cpu(centrino_model, policy->cpu)->op_points);
+ if (ret)
+ return (ret);
+
+ cpufreq_frequency_table_get_attr(
+ per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu);
+
+ return 0;
+}
+
+static int centrino_cpu_exit(struct cpufreq_policy *policy)
+{
+ unsigned int cpu = policy->cpu;
+
+ if (!per_cpu(centrino_model, cpu))
+ return -ENODEV;
+
+ cpufreq_frequency_table_put_attr(cpu);
+
+ per_cpu(centrino_model, cpu) = NULL;
+
+ return 0;
+}
+
+/**
+ * centrino_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Limit must be within this model's frequency range at least one
+ * border included.
+ */
+static int centrino_verify (struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy,
+ per_cpu(centrino_model, policy->cpu)->op_points);
+}
+
+/**
+ * centrino_setpolicy - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency
+ * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * Sets a new CPUFreq policy.
+ */
+static int centrino_target (struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ unsigned int newstate = 0;
+ unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu;
+ struct cpufreq_freqs freqs;
+ int retval = 0;
+ unsigned int j, k, first_cpu, tmp;
+ cpumask_var_t covered_cpus;
+
+ if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL)))
+ return -ENOMEM;
+
+ if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
+ retval = -ENODEV;
+ goto out;
+ }
+
+ if (unlikely(cpufreq_frequency_table_target(policy,
+ per_cpu(centrino_model, cpu)->op_points,
+ target_freq,
+ relation,
+ &newstate))) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ first_cpu = 1;
+ for_each_cpu(j, policy->cpus) {
+ int good_cpu;
+
+ /* cpufreq holds the hotplug lock, so we are safe here */
+ if (!cpu_online(j))
+ continue;
+
+ /*
+ * Support for SMP systems.
+ * Make sure we are running on CPU that wants to change freq
+ */
+ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+ good_cpu = cpumask_any_and(policy->cpus,
+ cpu_online_mask);
+ else
+ good_cpu = j;
+
+ if (good_cpu >= nr_cpu_ids) {
+ pr_debug("couldn't limit to CPUs in this domain\n");
+ retval = -EAGAIN;
+ if (first_cpu) {
+ /* We haven't started the transition yet. */
+ goto out;
+ }
+ break;
+ }
+
+ msr = per_cpu(centrino_model, cpu)->op_points[newstate].index;
+
+ if (first_cpu) {
+ rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
+ if (msr == (oldmsr & 0xffff)) {
+ pr_debug("no change needed - msr was and needs "
+ "to be %x\n", oldmsr);
+ retval = 0;
+ goto out;
+ }
+
+ freqs.old = extract_clock(oldmsr, cpu, 0);
+ freqs.new = extract_clock(msr, cpu, 0);
+
+ pr_debug("target=%dkHz old=%d new=%d msr=%04x\n",
+ target_freq, freqs.old, freqs.new, msr);
+
+ for_each_cpu(k, policy->cpus) {
+ if (!cpu_online(k))
+ continue;
+ freqs.cpu = k;
+ cpufreq_notify_transition(&freqs,
+ CPUFREQ_PRECHANGE);
+ }
+
+ first_cpu = 0;
+ /* all but 16 LSB are reserved, treat them with care */
+ oldmsr &= ~0xffff;
+ msr &= 0xffff;
+ oldmsr |= msr;
+ }
+
+ wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h);
+ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+ break;
+
+ cpumask_set_cpu(j, covered_cpus);
+ }
+
+ for_each_cpu(k, policy->cpus) {
+ if (!cpu_online(k))
+ continue;
+ freqs.cpu = k;
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ }
+
+ if (unlikely(retval)) {
+ /*
+ * We have failed halfway through the frequency change.
+ * We have sent callbacks to policy->cpus and
+ * MSRs have already been written on coverd_cpus.
+ * Best effort undo..
+ */
+
+ for_each_cpu(j, covered_cpus)
+ wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h);
+
+ tmp = freqs.new;
+ freqs.new = freqs.old;
+ freqs.old = tmp;
+ for_each_cpu(j, policy->cpus) {
+ if (!cpu_online(j))
+ continue;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ }
+ }
+ retval = 0;
+
+out:
+ free_cpumask_var(covered_cpus);
+ return retval;
+}
+
+static struct freq_attr* centrino_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+static struct cpufreq_driver centrino_driver = {
+ .name = "centrino", /* should be speedstep-centrino,
+ but there's a 16 char limit */
+ .init = centrino_cpu_init,
+ .exit = centrino_cpu_exit,
+ .verify = centrino_verify,
+ .target = centrino_target,
+ .get = get_cur_freq,
+ .attr = centrino_attr,
+ .owner = THIS_MODULE,
+};
+
+
+/**
+ * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver
+ *
+ * Initializes the Enhanced SpeedStep support. Returns -ENODEV on
+ * unsupported devices, -ENOENT if there's no voltage table for this
+ * particular CPU model, -EINVAL on problems during initiatization,
+ * and zero on success.
+ *
+ * This is quite picky. Not only does the CPU have to advertise the
+ * "est" flag in the cpuid capability flags, we look for a specific
+ * CPU model and stepping, and we need to have the exact model name in
+ * our voltage tables. That is, be paranoid about not releasing
+ * someone's valuable magic smoke.
+ */
+static int __init centrino_init(void)
+{
+ struct cpuinfo_x86 *cpu = &cpu_data(0);
+
+ if (!cpu_has(cpu, X86_FEATURE_EST))
+ return -ENODEV;
+
+ return cpufreq_register_driver(&centrino_driver);
+}
+
+static void __exit centrino_exit(void)
+{
+ cpufreq_unregister_driver(&centrino_driver);
+}
+
+MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>");
+MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors.");
+MODULE_LICENSE ("GPL");
+
+late_initcall(centrino_init);
+module_exit(centrino_exit);
diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c
new file mode 100644
index 00000000..a748ce78
--- /dev/null
+++ b/drivers/cpufreq/speedstep-ich.c
@@ -0,0 +1,448 @@
+/*
+ * (C) 2001 Dave Jones, Arjan van de ven.
+ * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ * Based upon reverse engineered information, and on Intel documentation
+ * for chipsets ICH2-M and ICH3-M.
+ *
+ * Many thanks to Ducrot Bruno for finding and fixing the last
+ * "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler
+ * for extensive testing.
+ *
+ * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+
+/*********************************************************************
+ * SPEEDSTEP - DEFINITIONS *
+ *********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+
+#include "speedstep-lib.h"
+
+
+/* speedstep_chipset:
+ * It is necessary to know which chipset is used. As accesses to
+ * this device occur at various places in this module, we need a
+ * static struct pci_dev * pointing to that device.
+ */
+static struct pci_dev *speedstep_chipset_dev;
+
+
+/* speedstep_processor
+ */
+static enum speedstep_processor speedstep_processor;
+
+static u32 pmbase;
+
+/*
+ * There are only two frequency states for each processor. Values
+ * are in kHz for the time being.
+ */
+static struct cpufreq_frequency_table speedstep_freqs[] = {
+ {SPEEDSTEP_HIGH, 0},
+ {SPEEDSTEP_LOW, 0},
+ {0, CPUFREQ_TABLE_END},
+};
+
+
+/**
+ * speedstep_find_register - read the PMBASE address
+ *
+ * Returns: -ENODEV if no register could be found
+ */
+static int speedstep_find_register(void)
+{
+ if (!speedstep_chipset_dev)
+ return -ENODEV;
+
+ /* get PMBASE */
+ pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
+ if (!(pmbase & 0x01)) {
+ printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+ return -ENODEV;
+ }
+
+ pmbase &= 0xFFFFFFFE;
+ if (!pmbase) {
+ printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+ return -ENODEV;
+ }
+
+ pr_debug("pmbase is 0x%x\n", pmbase);
+ return 0;
+}
+
+/**
+ * speedstep_set_state - set the SpeedStep state
+ * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ *
+ * Tries to change the SpeedStep state. Can be called from
+ * smp_call_function_single.
+ */
+static void speedstep_set_state(unsigned int state)
+{
+ u8 pm2_blk;
+ u8 value;
+ unsigned long flags;
+
+ if (state > 0x1)
+ return;
+
+ /* Disable IRQs */
+ local_irq_save(flags);
+
+ /* read state */
+ value = inb(pmbase + 0x50);
+
+ pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+
+ /* write new state */
+ value &= 0xFE;
+ value |= state;
+
+ pr_debug("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
+
+ /* Disable bus master arbitration */
+ pm2_blk = inb(pmbase + 0x20);
+ pm2_blk |= 0x01;
+ outb(pm2_blk, (pmbase + 0x20));
+
+ /* Actual transition */
+ outb(value, (pmbase + 0x50));
+
+ /* Restore bus master arbitration */
+ pm2_blk &= 0xfe;
+ outb(pm2_blk, (pmbase + 0x20));
+
+ /* check if transition was successful */
+ value = inb(pmbase + 0x50);
+
+ /* Enable IRQs */
+ local_irq_restore(flags);
+
+ pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+
+ if (state == (value & 0x1))
+ pr_debug("change to %u MHz succeeded\n",
+ speedstep_get_frequency(speedstep_processor) / 1000);
+ else
+ printk(KERN_ERR "cpufreq: change failed - I/O error\n");
+
+ return;
+}
+
+/* Wrapper for smp_call_function_single. */
+static void _speedstep_set_state(void *_state)
+{
+ speedstep_set_state(*(unsigned int *)_state);
+}
+
+/**
+ * speedstep_activate - activate SpeedStep control in the chipset
+ *
+ * Tries to activate the SpeedStep status and control registers.
+ * Returns -EINVAL on an unsupported chipset, and zero on success.
+ */
+static int speedstep_activate(void)
+{
+ u16 value = 0;
+
+ if (!speedstep_chipset_dev)
+ return -EINVAL;
+
+ pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value);
+ if (!(value & 0x08)) {
+ value |= 0x08;
+ pr_debug("activating SpeedStep (TM) registers\n");
+ pci_write_config_word(speedstep_chipset_dev, 0x00A0, value);
+ }
+
+ return 0;
+}
+
+
+/**
+ * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic
+ *
+ * Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to
+ * the LPC bridge / PM module which contains all power-management
+ * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
+ * chipset, or zero on failure.
+ */
+static unsigned int speedstep_detect_chipset(void)
+{
+ speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_82801DB_12,
+ PCI_ANY_ID, PCI_ANY_ID,
+ NULL);
+ if (speedstep_chipset_dev)
+ return 4; /* 4-M */
+
+ speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_82801CA_12,
+ PCI_ANY_ID, PCI_ANY_ID,
+ NULL);
+ if (speedstep_chipset_dev)
+ return 3; /* 3-M */
+
+
+ speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_82801BA_10,
+ PCI_ANY_ID, PCI_ANY_ID,
+ NULL);
+ if (speedstep_chipset_dev) {
+ /* speedstep.c causes lockups on Dell Inspirons 8000 and
+ * 8100 which use a pretty old revision of the 82815
+ * host brige. Abort on these systems.
+ */
+ static struct pci_dev *hostbridge;
+
+ hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_82815_MC,
+ PCI_ANY_ID, PCI_ANY_ID,
+ NULL);
+
+ if (!hostbridge)
+ return 2; /* 2-M */
+
+ if (hostbridge->revision < 5) {
+ pr_debug("hostbridge does not support speedstep\n");
+ speedstep_chipset_dev = NULL;
+ pci_dev_put(hostbridge);
+ return 0;
+ }
+
+ pci_dev_put(hostbridge);
+ return 2; /* 2-M */
+ }
+
+ return 0;
+}
+
+static void get_freq_data(void *_speed)
+{
+ unsigned int *speed = _speed;
+
+ *speed = speedstep_get_frequency(speedstep_processor);
+}
+
+static unsigned int speedstep_get(unsigned int cpu)
+{
+ unsigned int speed;
+
+ /* You're supposed to ensure CPU is online. */
+ if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
+ BUG();
+
+ pr_debug("detected %u kHz as current frequency\n", speed);
+ return speed;
+}
+
+/**
+ * speedstep_target - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency
+ * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * Sets a new CPUFreq policy.
+ */
+static int speedstep_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ unsigned int newstate = 0, policy_cpu;
+ struct cpufreq_freqs freqs;
+ int i;
+
+ if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
+ target_freq, relation, &newstate))
+ return -EINVAL;
+
+ policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
+ freqs.old = speedstep_get(policy_cpu);
+ freqs.new = speedstep_freqs[newstate].frequency;
+ freqs.cpu = policy->cpu;
+
+ pr_debug("transiting from %u to %u kHz\n", freqs.old, freqs.new);
+
+ /* no transition necessary */
+ if (freqs.old == freqs.new)
+ return 0;
+
+ for_each_cpu(i, policy->cpus) {
+ freqs.cpu = i;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ }
+
+ smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate,
+ true);
+
+ for_each_cpu(i, policy->cpus) {
+ freqs.cpu = i;
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ }
+
+ return 0;
+}
+
+
+/**
+ * speedstep_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Limit must be within speedstep_low_freq and speedstep_high_freq, with
+ * at least one border included.
+ */
+static int speedstep_verify(struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
+}
+
+struct get_freqs {
+ struct cpufreq_policy *policy;
+ int ret;
+};
+
+static void get_freqs_on_cpu(void *_get_freqs)
+{
+ struct get_freqs *get_freqs = _get_freqs;
+
+ get_freqs->ret =
+ speedstep_get_freqs(speedstep_processor,
+ &speedstep_freqs[SPEEDSTEP_LOW].frequency,
+ &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
+ &get_freqs->policy->cpuinfo.transition_latency,
+ &speedstep_set_state);
+}
+
+static int speedstep_cpu_init(struct cpufreq_policy *policy)
+{
+ int result;
+ unsigned int policy_cpu, speed;
+ struct get_freqs gf;
+
+ /* only run on CPU to be set, or on its sibling */
+#ifdef CONFIG_SMP
+ cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
+#endif
+ policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
+
+ /* detect low and high frequency and transition latency */
+ gf.policy = policy;
+ smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1);
+ if (gf.ret)
+ return gf.ret;
+
+ /* get current speed setting */
+ speed = speedstep_get(policy_cpu);
+ if (!speed)
+ return -EIO;
+
+ pr_debug("currently at %s speed setting - %i MHz\n",
+ (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
+ ? "low" : "high",
+ (speed / 1000));
+
+ /* cpuinfo and default policy values */
+ policy->cur = speed;
+
+ result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
+ if (result)
+ return result;
+
+ cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
+
+ return 0;
+}
+
+
+static int speedstep_cpu_exit(struct cpufreq_policy *policy)
+{
+ cpufreq_frequency_table_put_attr(policy->cpu);
+ return 0;
+}
+
+static struct freq_attr *speedstep_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+
+static struct cpufreq_driver speedstep_driver = {
+ .name = "speedstep-ich",
+ .verify = speedstep_verify,
+ .target = speedstep_target,
+ .init = speedstep_cpu_init,
+ .exit = speedstep_cpu_exit,
+ .get = speedstep_get,
+ .owner = THIS_MODULE,
+ .attr = speedstep_attr,
+};
+
+
+/**
+ * speedstep_init - initializes the SpeedStep CPUFreq driver
+ *
+ * Initializes the SpeedStep support. Returns -ENODEV on unsupported
+ * devices, -EINVAL on problems during initiatization, and zero on
+ * success.
+ */
+static int __init speedstep_init(void)
+{
+ /* detect processor */
+ speedstep_processor = speedstep_detect_processor();
+ if (!speedstep_processor) {
+ pr_debug("Intel(R) SpeedStep(TM) capable processor "
+ "not found\n");
+ return -ENODEV;
+ }
+
+ /* detect chipset */
+ if (!speedstep_detect_chipset()) {
+ pr_debug("Intel(R) SpeedStep(TM) for this chipset not "
+ "(yet) available.\n");
+ return -ENODEV;
+ }
+
+ /* activate speedstep support */
+ if (speedstep_activate()) {
+ pci_dev_put(speedstep_chipset_dev);
+ return -EINVAL;
+ }
+
+ if (speedstep_find_register())
+ return -ENODEV;
+
+ return cpufreq_register_driver(&speedstep_driver);
+}
+
+
+/**
+ * speedstep_exit - unregisters SpeedStep support
+ *
+ * Unregisters SpeedStep support.
+ */
+static void __exit speedstep_exit(void)
+{
+ pci_dev_put(speedstep_chipset_dev);
+ cpufreq_unregister_driver(&speedstep_driver);
+}
+
+
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
+ "Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
+ "with ICH-M southbridges.");
+MODULE_LICENSE("GPL");
+
+module_init(speedstep_init);
+module_exit(speedstep_exit);
diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c
new file mode 100644
index 00000000..8af2d2fd
--- /dev/null
+++ b/drivers/cpufreq/speedstep-lib.c
@@ -0,0 +1,478 @@
+/*
+ * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Library for common functions for Intel SpeedStep v.1 and v.2 support
+ *
+ * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+
+#include <asm/msr.h>
+#include <asm/tsc.h>
+#include "speedstep-lib.h"
+
+#define PFX "speedstep-lib: "
+
+#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
+static int relaxed_check;
+#else
+#define relaxed_check 0
+#endif
+
+/*********************************************************************
+ * GET PROCESSOR CORE SPEED IN KHZ *
+ *********************************************************************/
+
+static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
+{
+ /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
+ struct {
+ unsigned int ratio; /* Frequency Multiplier (x10) */
+ u8 bitmap; /* power on configuration bits
+ [27, 25:22] (in MSR 0x2a) */
+ } msr_decode_mult[] = {
+ { 30, 0x01 },
+ { 35, 0x05 },
+ { 40, 0x02 },
+ { 45, 0x06 },
+ { 50, 0x00 },
+ { 55, 0x04 },
+ { 60, 0x0b },
+ { 65, 0x0f },
+ { 70, 0x09 },
+ { 75, 0x0d },
+ { 80, 0x0a },
+ { 85, 0x26 },
+ { 90, 0x20 },
+ { 100, 0x2b },
+ { 0, 0xff } /* error or unknown value */
+ };
+
+ /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */
+ struct {
+ unsigned int value; /* Front Side Bus speed in MHz */
+ u8 bitmap; /* power on configuration bits [18: 19]
+ (in MSR 0x2a) */
+ } msr_decode_fsb[] = {
+ { 66, 0x0 },
+ { 100, 0x2 },
+ { 133, 0x1 },
+ { 0, 0xff}
+ };
+
+ u32 msr_lo, msr_tmp;
+ int i = 0, j = 0;
+
+ /* read MSR 0x2a - we only need the low 32 bits */
+ rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
+ pr_debug("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+ msr_tmp = msr_lo;
+
+ /* decode the FSB */
+ msr_tmp &= 0x00c0000;
+ msr_tmp >>= 18;
+ while (msr_tmp != msr_decode_fsb[i].bitmap) {
+ if (msr_decode_fsb[i].bitmap == 0xff)
+ return 0;
+ i++;
+ }
+
+ /* decode the multiplier */
+ if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) {
+ pr_debug("workaround for early PIIIs\n");
+ msr_lo &= 0x03c00000;
+ } else
+ msr_lo &= 0x0bc00000;
+ msr_lo >>= 22;
+ while (msr_lo != msr_decode_mult[j].bitmap) {
+ if (msr_decode_mult[j].bitmap == 0xff)
+ return 0;
+ j++;
+ }
+
+ pr_debug("speed is %u\n",
+ (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
+
+ return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100;
+}
+
+
+static unsigned int pentiumM_get_frequency(void)
+{
+ u32 msr_lo, msr_tmp;
+
+ rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
+ pr_debug("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+
+ /* see table B-2 of 24547212.pdf */
+ if (msr_lo & 0x00040000) {
+ printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n",
+ msr_lo, msr_tmp);
+ return 0;
+ }
+
+ msr_tmp = (msr_lo >> 22) & 0x1f;
+ pr_debug("bits 22-26 are 0x%x, speed is %u\n",
+ msr_tmp, (msr_tmp * 100 * 1000));
+
+ return msr_tmp * 100 * 1000;
+}
+
+static unsigned int pentium_core_get_frequency(void)
+{
+ u32 fsb = 0;
+ u32 msr_lo, msr_tmp;
+ int ret;
+
+ rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp);
+ /* see table B-2 of 25366920.pdf */
+ switch (msr_lo & 0x07) {
+ case 5:
+ fsb = 100000;
+ break;
+ case 1:
+ fsb = 133333;
+ break;
+ case 3:
+ fsb = 166667;
+ break;
+ case 2:
+ fsb = 200000;
+ break;
+ case 0:
+ fsb = 266667;
+ break;
+ case 4:
+ fsb = 333333;
+ break;
+ default:
+ printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
+ }
+
+ rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
+ pr_debug("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
+ msr_lo, msr_tmp);
+
+ msr_tmp = (msr_lo >> 22) & 0x1f;
+ pr_debug("bits 22-26 are 0x%x, speed is %u\n",
+ msr_tmp, (msr_tmp * fsb));
+
+ ret = (msr_tmp * fsb);
+ return ret;
+}
+
+
+static unsigned int pentium4_get_frequency(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ u32 msr_lo, msr_hi, mult;
+ unsigned int fsb = 0;
+ unsigned int ret;
+ u8 fsb_code;
+
+ /* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency
+ * to System Bus Frequency Ratio Field in the Processor Frequency
+ * Configuration Register of the MSR. Therefore the current
+ * frequency cannot be calculated and has to be measured.
+ */
+ if (c->x86_model < 2)
+ return cpu_khz;
+
+ rdmsr(0x2c, msr_lo, msr_hi);
+
+ pr_debug("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
+
+ /* decode the FSB: see IA-32 Intel (C) Architecture Software
+ * Developer's Manual, Volume 3: System Prgramming Guide,
+ * revision #12 in Table B-1: MSRs in the Pentium 4 and
+ * Intel Xeon Processors, on page B-4 and B-5.
+ */
+ fsb_code = (msr_lo >> 16) & 0x7;
+ switch (fsb_code) {
+ case 0:
+ fsb = 100 * 1000;
+ break;
+ case 1:
+ fsb = 13333 * 10;
+ break;
+ case 2:
+ fsb = 200 * 1000;
+ break;
+ }
+
+ if (!fsb)
+ printk(KERN_DEBUG PFX "couldn't detect FSB speed. "
+ "Please send an e-mail to <linux@brodo.de>\n");
+
+ /* Multiplier. */
+ mult = msr_lo >> 24;
+
+ pr_debug("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
+ fsb, mult, (fsb * mult));
+
+ ret = (fsb * mult);
+ return ret;
+}
+
+
+/* Warning: may get called from smp_call_function_single. */
+unsigned int speedstep_get_frequency(enum speedstep_processor processor)
+{
+ switch (processor) {
+ case SPEEDSTEP_CPU_PCORE:
+ return pentium_core_get_frequency();
+ case SPEEDSTEP_CPU_PM:
+ return pentiumM_get_frequency();
+ case SPEEDSTEP_CPU_P4D:
+ case SPEEDSTEP_CPU_P4M:
+ return pentium4_get_frequency();
+ case SPEEDSTEP_CPU_PIII_T:
+ case SPEEDSTEP_CPU_PIII_C:
+ case SPEEDSTEP_CPU_PIII_C_EARLY:
+ return pentium3_get_frequency(processor);
+ default:
+ return 0;
+ };
+ return 0;
+}
+EXPORT_SYMBOL_GPL(speedstep_get_frequency);
+
+
+/*********************************************************************
+ * DETECT SPEEDSTEP-CAPABLE PROCESSOR *
+ *********************************************************************/
+
+unsigned int speedstep_detect_processor(void)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+ u32 ebx, msr_lo, msr_hi;
+
+ pr_debug("x86: %x, model: %x\n", c->x86, c->x86_model);
+
+ if ((c->x86_vendor != X86_VENDOR_INTEL) ||
+ ((c->x86 != 6) && (c->x86 != 0xF)))
+ return 0;
+
+ if (c->x86 == 0xF) {
+ /* Intel Mobile Pentium 4-M
+ * or Intel Mobile Pentium 4 with 533 MHz FSB */
+ if (c->x86_model != 2)
+ return 0;
+
+ ebx = cpuid_ebx(0x00000001);
+ ebx &= 0x000000FF;
+
+ pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
+
+ switch (c->x86_mask) {
+ case 4:
+ /*
+ * B-stepping [M-P4-M]
+ * sample has ebx = 0x0f, production has 0x0e.
+ */
+ if ((ebx == 0x0e) || (ebx == 0x0f))
+ return SPEEDSTEP_CPU_P4M;
+ break;
+ case 7:
+ /*
+ * C-stepping [M-P4-M]
+ * needs to have ebx=0x0e, else it's a celeron:
+ * cf. 25130917.pdf / page 7, footnote 5 even
+ * though 25072120.pdf / page 7 doesn't say
+ * samples are only of B-stepping...
+ */
+ if (ebx == 0x0e)
+ return SPEEDSTEP_CPU_P4M;
+ break;
+ case 9:
+ /*
+ * D-stepping [M-P4-M or M-P4/533]
+ *
+ * this is totally strange: CPUID 0x0F29 is
+ * used by M-P4-M, M-P4/533 and(!) Celeron CPUs.
+ * The latter need to be sorted out as they don't
+ * support speedstep.
+ * Celerons with CPUID 0x0F29 may have either
+ * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything
+ * specific.
+ * M-P4-Ms may have either ebx=0xe or 0xf [see above]
+ * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf]
+ * also, M-P4M HTs have ebx=0x8, too
+ * For now, they are distinguished by the model_id
+ * string
+ */
+ if ((ebx == 0x0e) ||
+ (strstr(c->x86_model_id,
+ "Mobile Intel(R) Pentium(R) 4") != NULL))
+ return SPEEDSTEP_CPU_P4M;
+ break;
+ default:
+ break;
+ }
+ return 0;
+ }
+
+ switch (c->x86_model) {
+ case 0x0B: /* Intel PIII [Tualatin] */
+ /* cpuid_ebx(1) is 0x04 for desktop PIII,
+ * 0x06 for mobile PIII-M */
+ ebx = cpuid_ebx(0x00000001);
+ pr_debug("ebx is %x\n", ebx);
+
+ ebx &= 0x000000FF;
+
+ if (ebx != 0x06)
+ return 0;
+
+ /* So far all PIII-M processors support SpeedStep. See
+ * Intel's 24540640.pdf of June 2003
+ */
+ return SPEEDSTEP_CPU_PIII_T;
+
+ case 0x08: /* Intel PIII [Coppermine] */
+
+ /* all mobile PIII Coppermines have FSB 100 MHz
+ * ==> sort out a few desktop PIIIs. */
+ rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
+ pr_debug("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
+ msr_lo, msr_hi);
+ msr_lo &= 0x00c0000;
+ if (msr_lo != 0x0080000)
+ return 0;
+
+ /*
+ * If the processor is a mobile version,
+ * platform ID has bit 50 set
+ * it has SpeedStep technology if either
+ * bit 56 or 57 is set
+ */
+ rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
+ pr_debug("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
+ msr_lo, msr_hi);
+ if ((msr_hi & (1<<18)) &&
+ (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
+ if (c->x86_mask == 0x01) {
+ pr_debug("early PIII version\n");
+ return SPEEDSTEP_CPU_PIII_C_EARLY;
+ } else
+ return SPEEDSTEP_CPU_PIII_C;
+ }
+
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(speedstep_detect_processor);
+
+
+/*********************************************************************
+ * DETECT SPEEDSTEP SPEEDS *
+ *********************************************************************/
+
+unsigned int speedstep_get_freqs(enum speedstep_processor processor,
+ unsigned int *low_speed,
+ unsigned int *high_speed,
+ unsigned int *transition_latency,
+ void (*set_state) (unsigned int state))
+{
+ unsigned int prev_speed;
+ unsigned int ret = 0;
+ unsigned long flags;
+ struct timeval tv1, tv2;
+
+ if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
+ return -EINVAL;
+
+ pr_debug("trying to determine both speeds\n");
+
+ /* get current speed */
+ prev_speed = speedstep_get_frequency(processor);
+ if (!prev_speed)
+ return -EIO;
+
+ pr_debug("previous speed is %u\n", prev_speed);
+
+ local_irq_save(flags);
+
+ /* switch to low state */
+ set_state(SPEEDSTEP_LOW);
+ *low_speed = speedstep_get_frequency(processor);
+ if (!*low_speed) {
+ ret = -EIO;
+ goto out;
+ }
+
+ pr_debug("low speed is %u\n", *low_speed);
+
+ /* start latency measurement */
+ if (transition_latency)
+ do_gettimeofday(&tv1);
+
+ /* switch to high state */
+ set_state(SPEEDSTEP_HIGH);
+
+ /* end latency measurement */
+ if (transition_latency)
+ do_gettimeofday(&tv2);
+
+ *high_speed = speedstep_get_frequency(processor);
+ if (!*high_speed) {
+ ret = -EIO;
+ goto out;
+ }
+
+ pr_debug("high speed is %u\n", *high_speed);
+
+ if (*low_speed == *high_speed) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /* switch to previous state, if necessary */
+ if (*high_speed != prev_speed)
+ set_state(SPEEDSTEP_LOW);
+
+ if (transition_latency) {
+ *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC +
+ tv2.tv_usec - tv1.tv_usec;
+ pr_debug("transition latency is %u uSec\n", *transition_latency);
+
+ /* convert uSec to nSec and add 20% for safety reasons */
+ *transition_latency *= 1200;
+
+ /* check if the latency measurement is too high or too low
+ * and set it to a safe value (500uSec) in that case
+ */
+ if (*transition_latency > 10000000 ||
+ *transition_latency < 50000) {
+ printk(KERN_WARNING PFX "frequency transition "
+ "measured seems out of range (%u "
+ "nSec), falling back to a safe one of"
+ "%u nSec.\n",
+ *transition_latency, 500000);
+ *transition_latency = 500000;
+ }
+ }
+
+out:
+ local_irq_restore(flags);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(speedstep_get_freqs);
+
+#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
+module_param(relaxed_check, int, 0444);
+MODULE_PARM_DESC(relaxed_check,
+ "Don't do all checks for speedstep capability.");
+#endif
+
+MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/speedstep-lib.h b/drivers/cpufreq/speedstep-lib.h
new file mode 100644
index 00000000..70d9cea1
--- /dev/null
+++ b/drivers/cpufreq/speedstep-lib.h
@@ -0,0 +1,49 @@
+/*
+ * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Library for common functions for Intel SpeedStep v.1 and v.2 support
+ *
+ * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+
+
+/* processors */
+enum speedstep_processor {
+ SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */
+ SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */
+ SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */
+ SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */
+/* the following processors are not speedstep-capable and are not auto-detected
+ * in speedstep_detect_processor(). However, their speed can be detected using
+ * the speedstep_get_frequency() call. */
+ SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */
+ SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */
+ SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */
+};
+
+/* speedstep states -- only two of them */
+
+#define SPEEDSTEP_HIGH 0x00000000
+#define SPEEDSTEP_LOW 0x00000001
+
+
+/* detect a speedstep-capable processor */
+extern enum speedstep_processor speedstep_detect_processor(void);
+
+/* detect the current speed (in khz) of the processor */
+extern unsigned int speedstep_get_frequency(enum speedstep_processor processor);
+
+
+/* detect the low and high speeds of the processor. The callback
+ * set_state"'s first argument is either SPEEDSTEP_HIGH or
+ * SPEEDSTEP_LOW; the second argument is zero so that no
+ * cpufreq_notify_transition calls are initiated.
+ */
+extern unsigned int speedstep_get_freqs(enum speedstep_processor processor,
+ unsigned int *low_speed,
+ unsigned int *high_speed,
+ unsigned int *transition_latency,
+ void (*set_state) (unsigned int state));
diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c
new file mode 100644
index 00000000..c76ead34
--- /dev/null
+++ b/drivers/cpufreq/speedstep-smi.c
@@ -0,0 +1,464 @@
+/*
+ * Intel SpeedStep SMI driver.
+ *
+ * (C) 2003 Hiroshi Miura <miura@da-cha.org>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ */
+
+
+/*********************************************************************
+ * SPEEDSTEP - DEFINITIONS *
+ *********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <asm/ist.h>
+
+#include "speedstep-lib.h"
+
+/* speedstep system management interface port/command.
+ *
+ * These parameters are got from IST-SMI BIOS call.
+ * If user gives it, these are used.
+ *
+ */
+static int smi_port;
+static int smi_cmd;
+static unsigned int smi_sig;
+
+/* info about the processor */
+static enum speedstep_processor speedstep_processor;
+
+/*
+ * There are only two frequency states for each processor. Values
+ * are in kHz for the time being.
+ */
+static struct cpufreq_frequency_table speedstep_freqs[] = {
+ {SPEEDSTEP_HIGH, 0},
+ {SPEEDSTEP_LOW, 0},
+ {0, CPUFREQ_TABLE_END},
+};
+
+#define GET_SPEEDSTEP_OWNER 0
+#define GET_SPEEDSTEP_STATE 1
+#define SET_SPEEDSTEP_STATE 2
+#define GET_SPEEDSTEP_FREQS 4
+
+/* how often shall the SMI call be tried if it failed, e.g. because
+ * of DMA activity going on? */
+#define SMI_TRIES 5
+
+/**
+ * speedstep_smi_ownership
+ */
+static int speedstep_smi_ownership(void)
+{
+ u32 command, result, magic, dummy;
+ u32 function = GET_SPEEDSTEP_OWNER;
+ unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
+
+ command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+ magic = virt_to_phys(magic_data);
+
+ pr_debug("trying to obtain ownership with command %x at port %x\n",
+ command, smi_port);
+
+ __asm__ __volatile__(
+ "push %%ebp\n"
+ "out %%al, (%%dx)\n"
+ "pop %%ebp\n"
+ : "=D" (result),
+ "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
+ "=S" (dummy)
+ : "a" (command), "b" (function), "c" (0), "d" (smi_port),
+ "D" (0), "S" (magic)
+ : "memory"
+ );
+
+ pr_debug("result is %x\n", result);
+
+ return result;
+}
+
+/**
+ * speedstep_smi_get_freqs - get SpeedStep preferred & current freq.
+ * @low: the low frequency value is placed here
+ * @high: the high frequency value is placed here
+ *
+ * Only available on later SpeedStep-enabled systems, returns false results or
+ * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing
+ * shows that the latter occurs if !(ist_info.event & 0xFFFF).
+ */
+static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
+{
+ u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
+ u32 state = 0;
+ u32 function = GET_SPEEDSTEP_FREQS;
+
+ if (!(ist_info.event & 0xFFFF)) {
+ pr_debug("bug #1422 -- can't read freqs from BIOS\n");
+ return -ENODEV;
+ }
+
+ command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+
+ pr_debug("trying to determine frequencies with command %x at port %x\n",
+ command, smi_port);
+
+ __asm__ __volatile__(
+ "push %%ebp\n"
+ "out %%al, (%%dx)\n"
+ "pop %%ebp"
+ : "=a" (result),
+ "=b" (high_mhz),
+ "=c" (low_mhz),
+ "=d" (state), "=D" (edi), "=S" (dummy)
+ : "a" (command),
+ "b" (function),
+ "c" (state),
+ "d" (smi_port), "S" (0), "D" (0)
+ );
+
+ pr_debug("result %x, low_freq %u, high_freq %u\n",
+ result, low_mhz, high_mhz);
+
+ /* abort if results are obviously incorrect... */
+ if ((high_mhz + low_mhz) < 600)
+ return -EINVAL;
+
+ *high = high_mhz * 1000;
+ *low = low_mhz * 1000;
+
+ return result;
+}
+
+/**
+ * speedstep_get_state - set the SpeedStep state
+ * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ *
+ */
+static int speedstep_get_state(void)
+{
+ u32 function = GET_SPEEDSTEP_STATE;
+ u32 result, state, edi, command, dummy;
+
+ command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+
+ pr_debug("trying to determine current setting with command %x "
+ "at port %x\n", command, smi_port);
+
+ __asm__ __volatile__(
+ "push %%ebp\n"
+ "out %%al, (%%dx)\n"
+ "pop %%ebp\n"
+ : "=a" (result),
+ "=b" (state), "=D" (edi),
+ "=c" (dummy), "=d" (dummy), "=S" (dummy)
+ : "a" (command), "b" (function), "c" (0),
+ "d" (smi_port), "S" (0), "D" (0)
+ );
+
+ pr_debug("state is %x, result is %x\n", state, result);
+
+ return state & 1;
+}
+
+
+/**
+ * speedstep_set_state - set the SpeedStep state
+ * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ *
+ */
+static void speedstep_set_state(unsigned int state)
+{
+ unsigned int result = 0, command, new_state, dummy;
+ unsigned long flags;
+ unsigned int function = SET_SPEEDSTEP_STATE;
+ unsigned int retry = 0;
+
+ if (state > 0x1)
+ return;
+
+ /* Disable IRQs */
+ local_irq_save(flags);
+
+ command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+
+ pr_debug("trying to set frequency to state %u "
+ "with command %x at port %x\n",
+ state, command, smi_port);
+
+ do {
+ if (retry) {
+ pr_debug("retry %u, previous result %u, waiting...\n",
+ retry, result);
+ mdelay(retry * 50);
+ }
+ retry++;
+ __asm__ __volatile__(
+ "push %%ebp\n"
+ "out %%al, (%%dx)\n"
+ "pop %%ebp"
+ : "=b" (new_state), "=D" (result),
+ "=c" (dummy), "=a" (dummy),
+ "=d" (dummy), "=S" (dummy)
+ : "a" (command), "b" (function), "c" (state),
+ "d" (smi_port), "S" (0), "D" (0)
+ );
+ } while ((new_state != state) && (retry <= SMI_TRIES));
+
+ /* enable IRQs */
+ local_irq_restore(flags);
+
+ if (new_state == state)
+ pr_debug("change to %u MHz succeeded after %u tries "
+ "with result %u\n",
+ (speedstep_freqs[new_state].frequency / 1000),
+ retry, result);
+ else
+ printk(KERN_ERR "cpufreq: change to state %u "
+ "failed with new_state %u and result %u\n",
+ state, new_state, result);
+
+ return;
+}
+
+
+/**
+ * speedstep_target - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: new freq
+ * @relation:
+ *
+ * Sets a new CPUFreq policy/freq.
+ */
+static int speedstep_target(struct cpufreq_policy *policy,
+ unsigned int target_freq, unsigned int relation)
+{
+ unsigned int newstate = 0;
+ struct cpufreq_freqs freqs;
+
+ if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
+ target_freq, relation, &newstate))
+ return -EINVAL;
+
+ freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
+ freqs.new = speedstep_freqs[newstate].frequency;
+ freqs.cpu = 0; /* speedstep.c is UP only driver */
+
+ if (freqs.old == freqs.new)
+ return 0;
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ speedstep_set_state(newstate);
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+ return 0;
+}
+
+
+/**
+ * speedstep_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Limit must be within speedstep_low_freq and speedstep_high_freq, with
+ * at least one border included.
+ */
+static int speedstep_verify(struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
+}
+
+
+static int speedstep_cpu_init(struct cpufreq_policy *policy)
+{
+ int result;
+ unsigned int speed, state;
+ unsigned int *low, *high;
+
+ /* capability check */
+ if (policy->cpu != 0)
+ return -ENODEV;
+
+ result = speedstep_smi_ownership();
+ if (result) {
+ pr_debug("fails in acquiring ownership of a SMI interface.\n");
+ return -EINVAL;
+ }
+
+ /* detect low and high frequency */
+ low = &speedstep_freqs[SPEEDSTEP_LOW].frequency;
+ high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency;
+
+ result = speedstep_smi_get_freqs(low, high);
+ if (result) {
+ /* fall back to speedstep_lib.c dection mechanism:
+ * try both states out */
+ pr_debug("could not detect low and high frequencies "
+ "by SMI call.\n");
+ result = speedstep_get_freqs(speedstep_processor,
+ low, high,
+ NULL,
+ &speedstep_set_state);
+
+ if (result) {
+ pr_debug("could not detect two different speeds"
+ " -- aborting.\n");
+ return result;
+ } else
+ pr_debug("workaround worked.\n");
+ }
+
+ /* get current speed setting */
+ state = speedstep_get_state();
+ speed = speedstep_freqs[state].frequency;
+
+ pr_debug("currently at %s speed setting - %i MHz\n",
+ (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
+ ? "low" : "high",
+ (speed / 1000));
+
+ /* cpuinfo and default policy values */
+ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+ policy->cur = speed;
+
+ result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
+ if (result)
+ return result;
+
+ cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
+
+ return 0;
+}
+
+static int speedstep_cpu_exit(struct cpufreq_policy *policy)
+{
+ cpufreq_frequency_table_put_attr(policy->cpu);
+ return 0;
+}
+
+static unsigned int speedstep_get(unsigned int cpu)
+{
+ if (cpu)
+ return -ENODEV;
+ return speedstep_get_frequency(speedstep_processor);
+}
+
+
+static int speedstep_resume(struct cpufreq_policy *policy)
+{
+ int result = speedstep_smi_ownership();
+
+ if (result)
+ pr_debug("fails in re-acquiring ownership of a SMI interface.\n");
+
+ return result;
+}
+
+static struct freq_attr *speedstep_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+static struct cpufreq_driver speedstep_driver = {
+ .name = "speedstep-smi",
+ .verify = speedstep_verify,
+ .target = speedstep_target,
+ .init = speedstep_cpu_init,
+ .exit = speedstep_cpu_exit,
+ .get = speedstep_get,
+ .resume = speedstep_resume,
+ .owner = THIS_MODULE,
+ .attr = speedstep_attr,
+};
+
+/**
+ * speedstep_init - initializes the SpeedStep CPUFreq driver
+ *
+ * Initializes the SpeedStep support. Returns -ENODEV on unsupported
+ * BIOS, -EINVAL on problems during initiatization, and zero on
+ * success.
+ */
+static int __init speedstep_init(void)
+{
+ speedstep_processor = speedstep_detect_processor();
+
+ switch (speedstep_processor) {
+ case SPEEDSTEP_CPU_PIII_T:
+ case SPEEDSTEP_CPU_PIII_C:
+ case SPEEDSTEP_CPU_PIII_C_EARLY:
+ break;
+ default:
+ speedstep_processor = 0;
+ }
+
+ if (!speedstep_processor) {
+ pr_debug("No supported Intel CPU detected.\n");
+ return -ENODEV;
+ }
+
+ pr_debug("signature:0x%.8ulx, command:0x%.8ulx, "
+ "event:0x%.8ulx, perf_level:0x%.8ulx.\n",
+ ist_info.signature, ist_info.command,
+ ist_info.event, ist_info.perf_level);
+
+ /* Error if no IST-SMI BIOS or no PARM
+ sig= 'ISGE' aka 'Intel Speedstep Gate E' */
+ if ((ist_info.signature != 0x47534943) && (
+ (smi_port == 0) || (smi_cmd == 0)))
+ return -ENODEV;
+
+ if (smi_sig == 1)
+ smi_sig = 0x47534943;
+ else
+ smi_sig = ist_info.signature;
+
+ /* setup smi_port from MODLULE_PARM or BIOS */
+ if ((smi_port > 0xff) || (smi_port < 0))
+ return -EINVAL;
+ else if (smi_port == 0)
+ smi_port = ist_info.command & 0xff;
+
+ if ((smi_cmd > 0xff) || (smi_cmd < 0))
+ return -EINVAL;
+ else if (smi_cmd == 0)
+ smi_cmd = (ist_info.command >> 16) & 0xff;
+
+ return cpufreq_register_driver(&speedstep_driver);
+}
+
+
+/**
+ * speedstep_exit - unregisters SpeedStep support
+ *
+ * Unregisters SpeedStep support.
+ */
+static void __exit speedstep_exit(void)
+{
+ cpufreq_unregister_driver(&speedstep_driver);
+}
+
+module_param(smi_port, int, 0444);
+module_param(smi_cmd, int, 0444);
+module_param(smi_sig, uint, 0444);
+
+MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value "
+ "-- Intel's default setting is 0xb2");
+MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value "
+ "-- Intel's default setting is 0x82");
+MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the "
+ "SMI interface.");
+
+MODULE_AUTHOR("Hiroshi Miura");
+MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface.");
+MODULE_LICENSE("GPL");
+
+module_init(speedstep_init);
+module_exit(speedstep_exit);