aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>2014-04-01 03:13:26 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2014-04-07 08:35:27 -0400
commitb3d627a5f2bf1a9a486f65af6f7c2ce0e09b3d12 (patch)
tree3b04d01b8e7ed6b48c28fed17479eec7225e3a75
parent0ca97886fece9e1acd71ade4ca3a250945c8fc8b (diff)
cpufreq: powernv: cpufreq driver for powernv platform
Backend driver to dynamically set voltage and frequency on IBM POWER non-virtualized platforms. Power management SPRs are used to set the required PState. This driver works in conjunction with cpufreq governors like 'ondemand' to provide a demand based frequency and voltage setting on IBM POWER non-virtualized platforms. PState table is obtained from OPAL v3 firmware through device tree. powernv_cpufreq back-end driver would parse the relevant device-tree nodes and initialise the cpufreq subsystem on powernv platform. The code was originally written by svaidy@linux.vnet.ibm.com. Over time it was modified to accomodate bug-fixes as well as updates to the the cpu-freq core. Relevant portions of the change logs corresponding to those modifications are noted below: * The policy->cpus needs to be populated in a hotplug-invariant manner instead of using cpu_sibling_mask() which varies with cpu-hotplug. This is because the cpufreq core code copies this content into policy->related_cpus mask which should not vary on cpu-hotplug. [Authored by srivatsa.bhat@linux.vnet.ibm.com] * Create a helper routine that can return the cpu-frequency for the corresponding pstate_id. Also, cache the values of the pstate_max, pstate_min and pstate_nominal and nr_pstates in a static structure so that they can be reused in the future to perform any validations. [Authored by ego@linux.vnet.ibm.com] * Create a driver attribute named cpuinfo_nominal_freq which creates a sysfs read-only file named cpuinfo_nominal_freq. Export the frequency corresponding to the nominal_pstate through this interface. Nominal frequency is the highest non-turbo frequency for the platform. This is generally used for setting governor policies from user space for optimal energy efficiency. [Authored by ego@linux.vnet.ibm.com] * Implement a powernv_cpufreq_get(unsigned int cpu) method which will return the current operating frequency. Export this via the sysfs interface cpuinfo_cur_freq by setting powernv_cpufreq_driver.get to powernv_cpufreq_get(). [Authored by ego@linux.vnet.ibm.com] [Change log updated by ego@linux.vnet.ibm.com] Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com> Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-rw-r--r--arch/powerpc/include/asm/reg.h4
-rw-r--r--drivers/cpufreq/Kconfig.powerpc8
-rw-r--r--drivers/cpufreq/Makefile1
-rw-r--r--drivers/cpufreq/powernv-cpufreq.c342
4 files changed, 355 insertions, 0 deletions
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1a36b8ede417..2189f8f2ca88 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -271,6 +271,10 @@
271#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ 271#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */
272#define SPRN_IC 0x350 /* Virtual Instruction Count */ 272#define SPRN_IC 0x350 /* Virtual Instruction Count */
273#define SPRN_VTB 0x351 /* Virtual Time Base */ 273#define SPRN_VTB 0x351 /* Virtual Time Base */
274#define SPRN_PMICR 0x354 /* Power Management Idle Control Reg */
275#define SPRN_PMSR 0x355 /* Power Management Status Reg */
276#define SPRN_PMCR 0x374 /* Power Management Control Register */
277
274/* HFSCR and FSCR bit numbers are the same */ 278/* HFSCR and FSCR bit numbers are the same */
275#define FSCR_TAR_LG 8 /* Enable Target Address Register */ 279#define FSCR_TAR_LG 8 /* Enable Target Address Register */
276#define FSCR_EBB_LG 7 /* Enable Event Based Branching */ 280#define FSCR_EBB_LG 7 /* Enable Event Based Branching */
diff --git a/drivers/cpufreq/Kconfig.powerpc b/drivers/cpufreq/Kconfig.powerpc
index ca0021a96e19..72564b701b4a 100644
--- a/drivers/cpufreq/Kconfig.powerpc
+++ b/drivers/cpufreq/Kconfig.powerpc
@@ -54,3 +54,11 @@ config PPC_PASEMI_CPUFREQ
54 help 54 help
55 This adds the support for frequency switching on PA Semi 55 This adds the support for frequency switching on PA Semi
56 PWRficient processors. 56 PWRficient processors.
57
58config POWERNV_CPUFREQ
59 tristate "CPU frequency scaling for IBM POWERNV platform"
60 depends on PPC_POWERNV
61 default y
62 help
63 This adds support for CPU frequency switching on IBM POWERNV
64 platform
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 74945652dd7a..0dbb963c1aef 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -86,6 +86,7 @@ obj-$(CONFIG_PPC_CORENET_CPUFREQ) += ppc-corenet-cpufreq.o
86obj-$(CONFIG_CPU_FREQ_PMAC) += pmac32-cpufreq.o 86obj-$(CONFIG_CPU_FREQ_PMAC) += pmac32-cpufreq.o
87obj-$(CONFIG_CPU_FREQ_PMAC64) += pmac64-cpufreq.o 87obj-$(CONFIG_CPU_FREQ_PMAC64) += pmac64-cpufreq.o
88obj-$(CONFIG_PPC_PASEMI_CPUFREQ) += pasemi-cpufreq.o 88obj-$(CONFIG_PPC_PASEMI_CPUFREQ) += pasemi-cpufreq.o
89obj-$(CONFIG_POWERNV_CPUFREQ) += powernv-cpufreq.o
89 90
90################################################################################## 91##################################################################################
91# Other platform drivers 92# Other platform drivers
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
new file mode 100644
index 000000000000..e1e519703dfe
--- /dev/null
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -0,0 +1,342 @@
1/*
2 * POWERNV cpufreq driver for the IBM POWER processors
3 *
4 * (C) Copyright IBM 2014
5 *
6 * Author: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2, or (at your option)
11 * any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 */
19
20#define pr_fmt(fmt) "powernv-cpufreq: " fmt
21
22#include <linux/kernel.h>
23#include <linux/sysfs.h>
24#include <linux/cpumask.h>
25#include <linux/module.h>
26#include <linux/cpufreq.h>
27#include <linux/smp.h>
28#include <linux/of.h>
29
30#include <asm/cputhreads.h>
31#include <asm/reg.h>
32
33#define POWERNV_MAX_PSTATES 256
34
35static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
36static int powernv_pstate_ids[POWERNV_MAX_PSTATES+1];
37
38/*
39 * Note: The set of pstates consists of contiguous integers, the
40 * smallest of which is indicated by powernv_pstate_info.min, the
41 * largest of which is indicated by powernv_pstate_info.max.
42 *
43 * The nominal pstate is the highest non-turbo pstate in this
44 * platform. This is indicated by powernv_pstate_info.nominal.
45 */
46static struct powernv_pstate_info {
47 int min;
48 int max;
49 int nominal;
50 int nr_pstates;
51} powernv_pstate_info;
52
53/*
54 * Initialize the freq table based on data obtained
55 * from the firmware passed via device-tree
56 */
57static int init_powernv_pstates(void)
58{
59 struct device_node *power_mgt;
60 int i, pstate_min, pstate_max, pstate_nominal, nr_pstates = 0;
61 const __be32 *pstate_ids, *pstate_freqs;
62 u32 len_ids, len_freqs;
63
64 power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
65 if (!power_mgt) {
66 pr_warn("power-mgt node not found\n");
67 return -ENODEV;
68 }
69
70 if (of_property_read_u32(power_mgt, "ibm,pstate-min", &pstate_min)) {
71 pr_warn("ibm,pstate-min node not found\n");
72 return -ENODEV;
73 }
74
75 if (of_property_read_u32(power_mgt, "ibm,pstate-max", &pstate_max)) {
76 pr_warn("ibm,pstate-max node not found\n");
77 return -ENODEV;
78 }
79
80 if (of_property_read_u32(power_mgt, "ibm,pstate-nominal",
81 &pstate_nominal)) {
82 pr_warn("ibm,pstate-nominal not found\n");
83 return -ENODEV;
84 }
85 pr_info("cpufreq pstate min %d nominal %d max %d\n", pstate_min,
86 pstate_nominal, pstate_max);
87
88 pstate_ids = of_get_property(power_mgt, "ibm,pstate-ids", &len_ids);
89 if (!pstate_ids) {
90 pr_warn("ibm,pstate-ids not found\n");
91 return -ENODEV;
92 }
93
94 pstate_freqs = of_get_property(power_mgt, "ibm,pstate-frequencies-mhz",
95 &len_freqs);
96 if (!pstate_freqs) {
97 pr_warn("ibm,pstate-frequencies-mhz not found\n");
98 return -ENODEV;
99 }
100
101 WARN_ON(len_ids != len_freqs);
102 nr_pstates = min(len_ids, len_freqs) / sizeof(u32);
103 if (!nr_pstates) {
104 pr_warn("No PStates found\n");
105 return -ENODEV;
106 }
107
108 pr_debug("NR PStates %d\n", nr_pstates);
109 for (i = 0; i < nr_pstates; i++) {
110 u32 id = be32_to_cpu(pstate_ids[i]);
111 u32 freq = be32_to_cpu(pstate_freqs[i]);
112
113 pr_debug("PState id %d freq %d MHz\n", id, freq);
114 powernv_freqs[i].frequency = freq * 1000; /* kHz */
115 powernv_pstate_ids[i] = id;
116 }
117 /* End of list marker entry */
118 powernv_freqs[i].frequency = CPUFREQ_TABLE_END;
119
120 powernv_pstate_info.min = pstate_min;
121 powernv_pstate_info.max = pstate_max;
122 powernv_pstate_info.nominal = pstate_nominal;
123 powernv_pstate_info.nr_pstates = nr_pstates;
124
125 return 0;
126}
127
128/* Returns the CPU frequency corresponding to the pstate_id. */
129static unsigned int pstate_id_to_freq(int pstate_id)
130{
131 int i;
132
133 i = powernv_pstate_info.max - pstate_id;
134 BUG_ON(i >= powernv_pstate_info.nr_pstates || i < 0);
135
136 return powernv_freqs[i].frequency;
137}
138
139/*
140 * cpuinfo_nominal_freq_show - Show the nominal CPU frequency as indicated by
141 * the firmware
142 */
143static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy,
144 char *buf)
145{
146 return sprintf(buf, "%u\n",
147 pstate_id_to_freq(powernv_pstate_info.nominal));
148}
149
150struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq =
151 __ATTR_RO(cpuinfo_nominal_freq);
152
153static struct freq_attr *powernv_cpu_freq_attr[] = {
154 &cpufreq_freq_attr_scaling_available_freqs,
155 &cpufreq_freq_attr_cpuinfo_nominal_freq,
156 NULL,
157};
158
159/* Helper routines */
160
161/* Access helpers to power mgt SPR */
162
163static inline unsigned long get_pmspr(unsigned long sprn)
164{
165 switch (sprn) {
166 case SPRN_PMCR:
167 return mfspr(SPRN_PMCR);
168
169 case SPRN_PMICR:
170 return mfspr(SPRN_PMICR);
171
172 case SPRN_PMSR:
173 return mfspr(SPRN_PMSR);
174 }
175 BUG();
176}
177
178static inline void set_pmspr(unsigned long sprn, unsigned long val)
179{
180 switch (sprn) {
181 case SPRN_PMCR:
182 mtspr(SPRN_PMCR, val);
183 return;
184
185 case SPRN_PMICR:
186 mtspr(SPRN_PMICR, val);
187 return;
188 }
189 BUG();
190}
191
192/*
193 * Use objects of this type to query/update
194 * pstates on a remote CPU via smp_call_function.
195 */
196struct powernv_smp_call_data {
197 unsigned int freq;
198 int pstate_id;
199};
200
201/*
202 * powernv_read_cpu_freq: Reads the current frequency on this CPU.
203 *
204 * Called via smp_call_function.
205 *
206 * Note: The caller of the smp_call_function should pass an argument of
207 * the type 'struct powernv_smp_call_data *' along with this function.
208 *
209 * The current frequency on this CPU will be returned via
210 * ((struct powernv_smp_call_data *)arg)->freq;
211 */
212static void powernv_read_cpu_freq(void *arg)
213{
214 unsigned long pmspr_val;
215 s8 local_pstate_id;
216 struct powernv_smp_call_data *freq_data = arg;
217
218 pmspr_val = get_pmspr(SPRN_PMSR);
219
220 /*
221 * The local pstate id corresponds bits 48..55 in the PMSR.
222 * Note: Watch out for the sign!
223 */
224 local_pstate_id = (pmspr_val >> 48) & 0xFF;
225 freq_data->pstate_id = local_pstate_id;
226 freq_data->freq = pstate_id_to_freq(freq_data->pstate_id);
227
228 pr_debug("cpu %d pmsr %016lX pstate_id %d frequency %d kHz\n",
229 raw_smp_processor_id(), pmspr_val, freq_data->pstate_id,
230 freq_data->freq);
231}
232
233/*
234 * powernv_cpufreq_get: Returns the CPU frequency as reported by the
235 * firmware for CPU 'cpu'. This value is reported through the sysfs
236 * file cpuinfo_cur_freq.
237 */
238unsigned int powernv_cpufreq_get(unsigned int cpu)
239{
240 struct powernv_smp_call_data freq_data;
241
242 smp_call_function_any(cpu_sibling_mask(cpu), powernv_read_cpu_freq,
243 &freq_data, 1);
244
245 return freq_data.freq;
246}
247
248/*
249 * set_pstate: Sets the pstate on this CPU.
250 *
251 * This is called via an smp_call_function.
252 *
253 * The caller must ensure that freq_data is of the type
254 * (struct powernv_smp_call_data *) and the pstate_id which needs to be set
255 * on this CPU should be present in freq_data->pstate_id.
256 */
257static void set_pstate(void *freq_data)
258{
259 unsigned long val;
260 unsigned long pstate_ul =
261 ((struct powernv_smp_call_data *) freq_data)->pstate_id;
262
263 val = get_pmspr(SPRN_PMCR);
264 val = val & 0x0000FFFFFFFFFFFFULL;
265
266 pstate_ul = pstate_ul & 0xFF;
267
268 /* Set both global(bits 56..63) and local(bits 48..55) PStates */
269 val = val | (pstate_ul << 56) | (pstate_ul << 48);
270
271 pr_debug("Setting cpu %d pmcr to %016lX\n",
272 raw_smp_processor_id(), val);
273 set_pmspr(SPRN_PMCR, val);
274}
275
276/*
277 * powernv_cpufreq_target_index: Sets the frequency corresponding to
278 * the cpufreq table entry indexed by new_index on the cpus in the
279 * mask policy->cpus
280 */
281static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
282 unsigned int new_index)
283{
284 struct powernv_smp_call_data freq_data;
285
286 freq_data.pstate_id = powernv_pstate_ids[new_index];
287
288 /*
289 * Use smp_call_function to send IPI and execute the
290 * mtspr on target CPU. We could do that without IPI
291 * if current CPU is within policy->cpus (core)
292 */
293 smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
294
295 return 0;
296}
297
298static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
299{
300 int base, i;
301
302 base = cpu_first_thread_sibling(policy->cpu);
303
304 for (i = 0; i < threads_per_core; i++)
305 cpumask_set_cpu(base + i, policy->cpus);
306
307 return cpufreq_table_validate_and_show(policy, powernv_freqs);
308}
309
310static struct cpufreq_driver powernv_cpufreq_driver = {
311 .name = "powernv-cpufreq",
312 .flags = CPUFREQ_CONST_LOOPS,
313 .init = powernv_cpufreq_cpu_init,
314 .verify = cpufreq_generic_frequency_table_verify,
315 .target_index = powernv_cpufreq_target_index,
316 .get = powernv_cpufreq_get,
317 .attr = powernv_cpu_freq_attr,
318};
319
320static int __init powernv_cpufreq_init(void)
321{
322 int rc = 0;
323
324 /* Discover pstates from device tree and init */
325 rc = init_powernv_pstates();
326 if (rc) {
327 pr_info("powernv-cpufreq disabled. System does not support PState control\n");
328 return rc;
329 }
330
331 return cpufreq_register_driver(&powernv_cpufreq_driver);
332}
333module_init(powernv_cpufreq_init);
334
335static void __exit powernv_cpufreq_exit(void)
336{
337 cpufreq_unregister_driver(&powernv_cpufreq_driver);
338}
339module_exit(powernv_cpufreq_exit);
340
341MODULE_LICENSE("GPL");
342MODULE_AUTHOR("Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>");