aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/cpufreq
diff options
context:
space:
mode:
authorViresh Kumar <viresh.kumar@linaro.org>2012-10-25 18:47:42 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2012-11-14 18:33:07 -0500
commit4471a34f9a1f2da220272e823bdb8e8fa83a7661 (patch)
treed63e8c16a4b40da97b558d4b955f8e64157b8900 /drivers/cpufreq
parent0676f7f2e7d2adec11f40320ca43a8897b8ef906 (diff)
cpufreq: governors: remove redundant code
Initially ondemand governor was written and then using its code conservative governor is written. It used a lot of code from ondemand governor, but copy of code was created instead of using the same routines from both governors. Which increased code redundancy, which is difficult to manage. This patch is an attempt to move common part of both the governors to cpufreq_governor.c file to come over above mentioned issues. This shouldn't change anything from functionality point of view. Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c548
-rw-r--r--drivers/cpufreq/cpufreq_governor.c276
-rw-r--r--drivers/cpufreq/cpufreq_governor.h177
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c698
4 files changed, 832 insertions, 867 deletions
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 181abad07266..64ef737e7e72 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -11,83 +11,30 @@
11 * published by the Free Software Foundation. 11 * published by the Free Software Foundation.
12 */ 12 */
13 13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/cpufreq.h> 14#include <linux/cpufreq.h>
18#include <linux/cpu.h> 15#include <linux/init.h>
19#include <linux/jiffies.h> 16#include <linux/kernel.h>
20#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
18#include <linux/kobject.h>
19#include <linux/module.h>
21#include <linux/mutex.h> 20#include <linux/mutex.h>
22#include <linux/hrtimer.h> 21#include <linux/notifier.h>
23#include <linux/tick.h> 22#include <linux/percpu-defs.h>
24#include <linux/ktime.h> 23#include <linux/sysfs.h>
25#include <linux/sched.h> 24#include <linux/types.h>
26 25
27/* 26#include "cpufreq_governor.h"
28 * dbs is used in this file as a shortform for demandbased switching
29 * It helps to keep variable names smaller, simpler
30 */
31 27
28/* Conservative governor macors */
32#define DEF_FREQUENCY_UP_THRESHOLD (80) 29#define DEF_FREQUENCY_UP_THRESHOLD (80)
33#define DEF_FREQUENCY_DOWN_THRESHOLD (20) 30#define DEF_FREQUENCY_DOWN_THRESHOLD (20)
34
35/*
36 * The polling frequency of this governor depends on the capability of
37 * the processor. Default polling frequency is 1000 times the transition
38 * latency of the processor. The governor will work on any processor with
39 * transition latency <= 10mS, using appropriate sampling
40 * rate.
41 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
42 * this governor will not work.
43 * All times here are in uS.
44 */
45#define MIN_SAMPLING_RATE_RATIO (2)
46
47static unsigned int min_sampling_rate;
48
49#define LATENCY_MULTIPLIER (1000)
50#define MIN_LATENCY_MULTIPLIER (100)
51#define DEF_SAMPLING_DOWN_FACTOR (1) 31#define DEF_SAMPLING_DOWN_FACTOR (1)
52#define MAX_SAMPLING_DOWN_FACTOR (10) 32#define MAX_SAMPLING_DOWN_FACTOR (10)
53#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000)
54
55static void do_dbs_timer(struct work_struct *work);
56
57struct cpu_dbs_info_s {
58 cputime64_t prev_cpu_idle;
59 cputime64_t prev_cpu_wall;
60 cputime64_t prev_cpu_nice;
61 struct cpufreq_policy *cur_policy;
62 struct delayed_work work;
63 unsigned int down_skip;
64 unsigned int requested_freq;
65 int cpu;
66 unsigned int enable:1;
67 /*
68 * percpu mutex that serializes governor limit change with
69 * do_dbs_timer invocation. We do not want do_dbs_timer to run
70 * when user is changing the governor or limits.
71 */
72 struct mutex timer_mutex;
73};
74static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info);
75 33
76static unsigned int dbs_enable; /* number of CPUs using this policy */ 34static struct dbs_data cs_dbs_data;
35static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info);
77 36
78/* 37static struct cs_dbs_tuners cs_tuners = {
79 * dbs_mutex protects dbs_enable in governor start/stop.
80 */
81static DEFINE_MUTEX(dbs_mutex);
82
83static struct dbs_tuners {
84 unsigned int sampling_rate;
85 unsigned int sampling_down_factor;
86 unsigned int up_threshold;
87 unsigned int down_threshold;
88 unsigned int ignore_nice;
89 unsigned int freq_step;
90} dbs_tuners_ins = {
91 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 38 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
92 .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, 39 .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD,
93 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 40 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
@@ -95,61 +42,121 @@ static struct dbs_tuners {
95 .freq_step = 5, 42 .freq_step = 5,
96}; 43};
97 44
98/* keep track of frequency transitions */ 45/*
99static int 46 * Every sampling_rate, we check, if current idle time is less than 20%
100dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 47 * (default), then we try to increase frequency Every sampling_rate *
101 void *data) 48 * sampling_down_factor, we check, if current idle time is more than 80%, then
49 * we try to decrease frequency
50 *
51 * Any frequency increase takes it to the maximum frequency. Frequency reduction
52 * happens at minimum steps of 5% (default) of maximum frequency
53 */
54static void cs_check_cpu(int cpu, unsigned int load)
102{ 55{
103 struct cpufreq_freqs *freq = data; 56 struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu);
104 struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cs_cpu_dbs_info, 57 struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy;
105 freq->cpu); 58 unsigned int freq_target;
59
60 /*
61 * break out if we 'cannot' reduce the speed as the user might
62 * want freq_step to be zero
63 */
64 if (cs_tuners.freq_step == 0)
65 return;
66
67 /* Check for frequency increase */
68 if (load > cs_tuners.up_threshold) {
69 dbs_info->down_skip = 0;
70
71 /* if we are already at full speed then break out early */
72 if (dbs_info->requested_freq == policy->max)
73 return;
74
75 freq_target = (cs_tuners.freq_step * policy->max) / 100;
76
77 /* max freq cannot be less than 100. But who knows.... */
78 if (unlikely(freq_target == 0))
79 freq_target = 5;
80
81 dbs_info->requested_freq += freq_target;
82 if (dbs_info->requested_freq > policy->max)
83 dbs_info->requested_freq = policy->max;
106 84
85 __cpufreq_driver_target(policy, dbs_info->requested_freq,
86 CPUFREQ_RELATION_H);
87 return;
88 }
89
90 /*
91 * The optimal frequency is the frequency that is the lowest that can
92 * support the current CPU usage without triggering the up policy. To be
93 * safe, we focus 10 points under the threshold.
94 */
95 if (load < (cs_tuners.down_threshold - 10)) {
96 freq_target = (cs_tuners.freq_step * policy->max) / 100;
97
98 dbs_info->requested_freq -= freq_target;
99 if (dbs_info->requested_freq < policy->min)
100 dbs_info->requested_freq = policy->min;
101
102 /*
103 * if we cannot reduce the frequency anymore, break out early
104 */
105 if (policy->cur == policy->min)
106 return;
107
108 __cpufreq_driver_target(policy, dbs_info->requested_freq,
109 CPUFREQ_RELATION_H);
110 return;
111 }
112}
113
114static void cs_dbs_timer(struct work_struct *work)
115{
116 struct cs_cpu_dbs_info_s *dbs_info = container_of(work,
117 struct cs_cpu_dbs_info_s, cdbs.work.work);
118 unsigned int cpu = dbs_info->cdbs.cpu;
119 int delay = delay_for_sampling_rate(cs_tuners.sampling_rate);
120
121 mutex_lock(&dbs_info->cdbs.timer_mutex);
122
123 dbs_check_cpu(&cs_dbs_data, cpu);
124
125 schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay);
126 mutex_unlock(&dbs_info->cdbs.timer_mutex);
127}
128
129static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
130 void *data)
131{
132 struct cpufreq_freqs *freq = data;
133 struct cs_cpu_dbs_info_s *dbs_info =
134 &per_cpu(cs_cpu_dbs_info, freq->cpu);
107 struct cpufreq_policy *policy; 135 struct cpufreq_policy *policy;
108 136
109 if (!this_dbs_info->enable) 137 if (!dbs_info->enable)
110 return 0; 138 return 0;
111 139
112 policy = this_dbs_info->cur_policy; 140 policy = dbs_info->cdbs.cur_policy;
113 141
114 /* 142 /*
115 * we only care if our internally tracked freq moves outside 143 * we only care if our internally tracked freq moves outside the 'valid'
116 * the 'valid' ranges of freqency available to us otherwise 144 * ranges of freqency available to us otherwise we do not change it
117 * we do not change it
118 */ 145 */
119 if (this_dbs_info->requested_freq > policy->max 146 if (dbs_info->requested_freq > policy->max
120 || this_dbs_info->requested_freq < policy->min) 147 || dbs_info->requested_freq < policy->min)
121 this_dbs_info->requested_freq = freq->new; 148 dbs_info->requested_freq = freq->new;
122 149
123 return 0; 150 return 0;
124} 151}
125 152
126static struct notifier_block dbs_cpufreq_notifier_block = {
127 .notifier_call = dbs_cpufreq_notifier
128};
129
130/************************** sysfs interface ************************/ 153/************************** sysfs interface ************************/
131static ssize_t show_sampling_rate_min(struct kobject *kobj, 154static ssize_t show_sampling_rate_min(struct kobject *kobj,
132 struct attribute *attr, char *buf) 155 struct attribute *attr, char *buf)
133{ 156{
134 return sprintf(buf, "%u\n", min_sampling_rate); 157 return sprintf(buf, "%u\n", cs_dbs_data.min_sampling_rate);
135} 158}
136 159
137define_one_global_ro(sampling_rate_min);
138
139/* cpufreq_conservative Governor Tunables */
140#define show_one(file_name, object) \
141static ssize_t show_##file_name \
142(struct kobject *kobj, struct attribute *attr, char *buf) \
143{ \
144 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \
145}
146show_one(sampling_rate, sampling_rate);
147show_one(sampling_down_factor, sampling_down_factor);
148show_one(up_threshold, up_threshold);
149show_one(down_threshold, down_threshold);
150show_one(ignore_nice_load, ignore_nice);
151show_one(freq_step, freq_step);
152
153static ssize_t store_sampling_down_factor(struct kobject *a, 160static ssize_t store_sampling_down_factor(struct kobject *a,
154 struct attribute *b, 161 struct attribute *b,
155 const char *buf, size_t count) 162 const char *buf, size_t count)
@@ -161,7 +168,7 @@ static ssize_t store_sampling_down_factor(struct kobject *a,
161 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 168 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
162 return -EINVAL; 169 return -EINVAL;
163 170
164 dbs_tuners_ins.sampling_down_factor = input; 171 cs_tuners.sampling_down_factor = input;
165 return count; 172 return count;
166} 173}
167 174
@@ -175,7 +182,7 @@ static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
175 if (ret != 1) 182 if (ret != 1)
176 return -EINVAL; 183 return -EINVAL;
177 184
178 dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); 185 cs_tuners.sampling_rate = max(input, cs_dbs_data.min_sampling_rate);
179 return count; 186 return count;
180} 187}
181 188
@@ -186,11 +193,10 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
186 int ret; 193 int ret;
187 ret = sscanf(buf, "%u", &input); 194 ret = sscanf(buf, "%u", &input);
188 195
189 if (ret != 1 || input > 100 || 196 if (ret != 1 || input > 100 || input <= cs_tuners.down_threshold)
190 input <= dbs_tuners_ins.down_threshold)
191 return -EINVAL; 197 return -EINVAL;
192 198
193 dbs_tuners_ins.up_threshold = input; 199 cs_tuners.up_threshold = input;
194 return count; 200 return count;
195} 201}
196 202
@@ -203,21 +209,19 @@ static ssize_t store_down_threshold(struct kobject *a, struct attribute *b,
203 209
204 /* cannot be lower than 11 otherwise freq will not fall */ 210 /* cannot be lower than 11 otherwise freq will not fall */
205 if (ret != 1 || input < 11 || input > 100 || 211 if (ret != 1 || input < 11 || input > 100 ||
206 input >= dbs_tuners_ins.up_threshold) 212 input >= cs_tuners.up_threshold)
207 return -EINVAL; 213 return -EINVAL;
208 214
209 dbs_tuners_ins.down_threshold = input; 215 cs_tuners.down_threshold = input;
210 return count; 216 return count;
211} 217}
212 218
213static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, 219static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
214 const char *buf, size_t count) 220 const char *buf, size_t count)
215{ 221{
216 unsigned int input; 222 unsigned int input, j;
217 int ret; 223 int ret;
218 224
219 unsigned int j;
220
221 ret = sscanf(buf, "%u", &input); 225 ret = sscanf(buf, "%u", &input);
222 if (ret != 1) 226 if (ret != 1)
223 return -EINVAL; 227 return -EINVAL;
@@ -225,19 +229,20 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
225 if (input > 1) 229 if (input > 1)
226 input = 1; 230 input = 1;
227 231
228 if (input == dbs_tuners_ins.ignore_nice) /* nothing to do */ 232 if (input == cs_tuners.ignore_nice) /* nothing to do */
229 return count; 233 return count;
230 234
231 dbs_tuners_ins.ignore_nice = input; 235 cs_tuners.ignore_nice = input;
232 236
233 /* we need to re-evaluate prev_cpu_idle */ 237 /* we need to re-evaluate prev_cpu_idle */
234 for_each_online_cpu(j) { 238 for_each_online_cpu(j) {
235 struct cpu_dbs_info_s *dbs_info; 239 struct cs_cpu_dbs_info_s *dbs_info;
236 dbs_info = &per_cpu(cs_cpu_dbs_info, j); 240 dbs_info = &per_cpu(cs_cpu_dbs_info, j);
237 dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 241 dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
238 &dbs_info->prev_cpu_wall); 242 &dbs_info->cdbs.prev_cpu_wall);
239 if (dbs_tuners_ins.ignore_nice) 243 if (cs_tuners.ignore_nice)
240 dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 244 dbs_info->cdbs.prev_cpu_nice =
245 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
241 } 246 }
242 return count; 247 return count;
243} 248}
@@ -255,18 +260,28 @@ static ssize_t store_freq_step(struct kobject *a, struct attribute *b,
255 if (input > 100) 260 if (input > 100)
256 input = 100; 261 input = 100;
257 262
258 /* no need to test here if freq_step is zero as the user might actually 263 /*
259 * want this, they would be crazy though :) */ 264 * no need to test here if freq_step is zero as the user might actually
260 dbs_tuners_ins.freq_step = input; 265 * want this, they would be crazy though :)
266 */
267 cs_tuners.freq_step = input;
261 return count; 268 return count;
262} 269}
263 270
271show_one(cs, sampling_rate, sampling_rate);
272show_one(cs, sampling_down_factor, sampling_down_factor);
273show_one(cs, up_threshold, up_threshold);
274show_one(cs, down_threshold, down_threshold);
275show_one(cs, ignore_nice_load, ignore_nice);
276show_one(cs, freq_step, freq_step);
277
264define_one_global_rw(sampling_rate); 278define_one_global_rw(sampling_rate);
265define_one_global_rw(sampling_down_factor); 279define_one_global_rw(sampling_down_factor);
266define_one_global_rw(up_threshold); 280define_one_global_rw(up_threshold);
267define_one_global_rw(down_threshold); 281define_one_global_rw(down_threshold);
268define_one_global_rw(ignore_nice_load); 282define_one_global_rw(ignore_nice_load);
269define_one_global_rw(freq_step); 283define_one_global_rw(freq_step);
284define_one_global_ro(sampling_rate_min);
270 285
271static struct attribute *dbs_attributes[] = { 286static struct attribute *dbs_attributes[] = {
272 &sampling_rate_min.attr, 287 &sampling_rate_min.attr,
@@ -279,283 +294,38 @@ static struct attribute *dbs_attributes[] = {
279 NULL 294 NULL
280}; 295};
281 296
282static struct attribute_group dbs_attr_group = { 297static struct attribute_group cs_attr_group = {
283 .attrs = dbs_attributes, 298 .attrs = dbs_attributes,
284 .name = "conservative", 299 .name = "conservative",
285}; 300};
286 301
287/************************** sysfs end ************************/ 302/************************** sysfs end ************************/
288 303
289static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) 304define_get_cpu_dbs_routines(cs_cpu_dbs_info);
290{
291 unsigned int load = 0;
292 unsigned int max_load = 0;
293 unsigned int freq_target;
294
295 struct cpufreq_policy *policy;
296 unsigned int j;
297
298 policy = this_dbs_info->cur_policy;
299
300 /*
301 * Every sampling_rate, we check, if current idle time is less
302 * than 20% (default), then we try to increase frequency
303 * Every sampling_rate*sampling_down_factor, we check, if current
304 * idle time is more than 80%, then we try to decrease frequency
305 *
306 * Any frequency increase takes it to the maximum frequency.
307 * Frequency reduction happens at minimum steps of
308 * 5% (default) of maximum frequency
309 */
310
311 /* Get Absolute Load */
312 for_each_cpu(j, policy->cpus) {
313 struct cpu_dbs_info_s *j_dbs_info;
314 cputime64_t cur_wall_time, cur_idle_time;
315 unsigned int idle_time, wall_time;
316
317 j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);
318
319 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
320
321 wall_time = (unsigned int)
322 (cur_wall_time - j_dbs_info->prev_cpu_wall);
323 j_dbs_info->prev_cpu_wall = cur_wall_time;
324
325 idle_time = (unsigned int)
326 (cur_idle_time - j_dbs_info->prev_cpu_idle);
327 j_dbs_info->prev_cpu_idle = cur_idle_time;
328
329 if (dbs_tuners_ins.ignore_nice) {
330 u64 cur_nice;
331 unsigned long cur_nice_jiffies;
332
333 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
334 j_dbs_info->prev_cpu_nice;
335 /*
336 * Assumption: nice time between sampling periods will
337 * be less than 2^32 jiffies for 32 bit sys
338 */
339 cur_nice_jiffies = (unsigned long)
340 cputime64_to_jiffies64(cur_nice);
341
342 j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
343 idle_time += jiffies_to_usecs(cur_nice_jiffies);
344 }
345
346 if (unlikely(!wall_time || wall_time < idle_time))
347 continue;
348
349 load = 100 * (wall_time - idle_time) / wall_time;
350
351 if (load > max_load)
352 max_load = load;
353 }
354 305
355 /* 306static struct notifier_block cs_cpufreq_notifier_block = {
356 * break out if we 'cannot' reduce the speed as the user might 307 .notifier_call = dbs_cpufreq_notifier,
357 * want freq_step to be zero 308};
358 */
359 if (dbs_tuners_ins.freq_step == 0)
360 return;
361
362 /* Check for frequency increase */
363 if (max_load > dbs_tuners_ins.up_threshold) {
364 this_dbs_info->down_skip = 0;
365
366 /* if we are already at full speed then break out early */
367 if (this_dbs_info->requested_freq == policy->max)
368 return;
369
370 freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;
371
372 /* max freq cannot be less than 100. But who knows.... */
373 if (unlikely(freq_target == 0))
374 freq_target = 5;
375
376 this_dbs_info->requested_freq += freq_target;
377 if (this_dbs_info->requested_freq > policy->max)
378 this_dbs_info->requested_freq = policy->max;
379
380 __cpufreq_driver_target(policy, this_dbs_info->requested_freq,
381 CPUFREQ_RELATION_H);
382 return;
383 }
384
385 /*
386 * The optimal frequency is the frequency that is the lowest that
387 * can support the current CPU usage without triggering the up
388 * policy. To be safe, we focus 10 points under the threshold.
389 */
390 if (max_load < (dbs_tuners_ins.down_threshold - 10)) {
391 freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;
392
393 this_dbs_info->requested_freq -= freq_target;
394 if (this_dbs_info->requested_freq < policy->min)
395 this_dbs_info->requested_freq = policy->min;
396
397 /*
398 * if we cannot reduce the frequency anymore, break out early
399 */
400 if (policy->cur == policy->min)
401 return;
402
403 __cpufreq_driver_target(policy, this_dbs_info->requested_freq,
404 CPUFREQ_RELATION_H);
405 return;
406 }
407}
408
409static void do_dbs_timer(struct work_struct *work)
410{
411 struct cpu_dbs_info_s *dbs_info =
412 container_of(work, struct cpu_dbs_info_s, work.work);
413 unsigned int cpu = dbs_info->cpu;
414
415 /* We want all CPUs to do sampling nearly on same jiffy */
416 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
417
418 delay -= jiffies % delay;
419
420 mutex_lock(&dbs_info->timer_mutex);
421
422 dbs_check_cpu(dbs_info);
423
424 schedule_delayed_work_on(cpu, &dbs_info->work, delay);
425 mutex_unlock(&dbs_info->timer_mutex);
426}
427
428static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
429{
430 /* We want all CPUs to do sampling nearly on same jiffy */
431 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
432 delay -= jiffies % delay;
433 309
434 dbs_info->enable = 1; 310static struct cs_ops cs_ops = {
435 INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer); 311 .notifier_block = &cs_cpufreq_notifier_block,
436 schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); 312};
437}
438 313
439static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 314static struct dbs_data cs_dbs_data = {
440{ 315 .governor = GOV_CONSERVATIVE,
441 dbs_info->enable = 0; 316 .attr_group = &cs_attr_group,
442 cancel_delayed_work_sync(&dbs_info->work); 317 .tuners = &cs_tuners,
443} 318 .get_cpu_cdbs = get_cpu_cdbs,
319 .get_cpu_dbs_info_s = get_cpu_dbs_info_s,
320 .gov_dbs_timer = cs_dbs_timer,
321 .gov_check_cpu = cs_check_cpu,
322 .gov_ops = &cs_ops,
323};
444 324
445static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 325static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy,
446 unsigned int event) 326 unsigned int event)
447{ 327{
448 unsigned int cpu = policy->cpu; 328 return cpufreq_governor_dbs(&cs_dbs_data, policy, event);
449 struct cpu_dbs_info_s *this_dbs_info;
450 unsigned int j;
451 int rc;
452
453 this_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu);
454
455 switch (event) {
456 case CPUFREQ_GOV_START:
457 if ((!cpu_online(cpu)) || (!policy->cur))
458 return -EINVAL;
459
460 mutex_lock(&dbs_mutex);
461
462 for_each_cpu(j, policy->cpus) {
463 struct cpu_dbs_info_s *j_dbs_info;
464 j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);
465 j_dbs_info->cur_policy = policy;
466
467 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
468 &j_dbs_info->prev_cpu_wall);
469 if (dbs_tuners_ins.ignore_nice)
470 j_dbs_info->prev_cpu_nice =
471 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
472 }
473 this_dbs_info->cpu = cpu;
474 this_dbs_info->down_skip = 0;
475 this_dbs_info->requested_freq = policy->cur;
476
477 mutex_init(&this_dbs_info->timer_mutex);
478 dbs_enable++;
479 /*
480 * Start the timerschedule work, when this governor
481 * is used for first time
482 */
483 if (dbs_enable == 1) {
484 unsigned int latency;
485 /* policy latency is in nS. Convert it to uS first */
486 latency = policy->cpuinfo.transition_latency / 1000;
487 if (latency == 0)
488 latency = 1;
489
490 rc = sysfs_create_group(cpufreq_global_kobject,
491 &dbs_attr_group);
492 if (rc) {
493 mutex_unlock(&dbs_mutex);
494 return rc;
495 }
496
497 /*
498 * conservative does not implement micro like ondemand
499 * governor, thus we are bound to jiffes/HZ
500 */
501 min_sampling_rate =
502 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
503 /* Bring kernel and HW constraints together */
504 min_sampling_rate = max(min_sampling_rate,
505 MIN_LATENCY_MULTIPLIER * latency);
506 dbs_tuners_ins.sampling_rate =
507 max(min_sampling_rate,
508 latency * LATENCY_MULTIPLIER);
509
510 cpufreq_register_notifier(
511 &dbs_cpufreq_notifier_block,
512 CPUFREQ_TRANSITION_NOTIFIER);
513 }
514 mutex_unlock(&dbs_mutex);
515
516 dbs_timer_init(this_dbs_info);
517
518 break;
519
520 case CPUFREQ_GOV_STOP:
521 dbs_timer_exit(this_dbs_info);
522
523 mutex_lock(&dbs_mutex);
524 dbs_enable--;
525 mutex_destroy(&this_dbs_info->timer_mutex);
526
527 /*
528 * Stop the timerschedule work, when this governor
529 * is used for first time
530 */
531 if (dbs_enable == 0)
532 cpufreq_unregister_notifier(
533 &dbs_cpufreq_notifier_block,
534 CPUFREQ_TRANSITION_NOTIFIER);
535
536 mutex_unlock(&dbs_mutex);
537 if (!dbs_enable)
538 sysfs_remove_group(cpufreq_global_kobject,
539 &dbs_attr_group);
540
541 break;
542
543 case CPUFREQ_GOV_LIMITS:
544 mutex_lock(&this_dbs_info->timer_mutex);
545 if (policy->max < this_dbs_info->cur_policy->cur)
546 __cpufreq_driver_target(
547 this_dbs_info->cur_policy,
548 policy->max, CPUFREQ_RELATION_H);
549 else if (policy->min > this_dbs_info->cur_policy->cur)
550 __cpufreq_driver_target(
551 this_dbs_info->cur_policy,
552 policy->min, CPUFREQ_RELATION_L);
553 dbs_check_cpu(this_dbs_info);
554 mutex_unlock(&this_dbs_info->timer_mutex);
555
556 break;
557 }
558 return 0;
559} 329}
560 330
561#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE 331#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
@@ -563,13 +333,14 @@ static
563#endif 333#endif
564struct cpufreq_governor cpufreq_gov_conservative = { 334struct cpufreq_governor cpufreq_gov_conservative = {
565 .name = "conservative", 335 .name = "conservative",
566 .governor = cpufreq_governor_dbs, 336 .governor = cs_cpufreq_governor_dbs,
567 .max_transition_latency = TRANSITION_LATENCY_LIMIT, 337 .max_transition_latency = TRANSITION_LATENCY_LIMIT,
568 .owner = THIS_MODULE, 338 .owner = THIS_MODULE,
569}; 339};
570 340
571static int __init cpufreq_gov_dbs_init(void) 341static int __init cpufreq_gov_dbs_init(void)
572{ 342{
343 mutex_init(&cs_dbs_data.mutex);
573 return cpufreq_register_governor(&cpufreq_gov_conservative); 344 return cpufreq_register_governor(&cpufreq_gov_conservative);
574} 345}
575 346
@@ -578,7 +349,6 @@ static void __exit cpufreq_gov_dbs_exit(void)
578 cpufreq_unregister_governor(&cpufreq_gov_conservative); 349 cpufreq_unregister_governor(&cpufreq_gov_conservative);
579} 350}
580 351
581
582MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>"); 352MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>");
583MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " 353MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for "
584 "Low Latency Frequency Transition capable processors " 354 "Low Latency Frequency Transition capable processors "
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 679842a8d34a..5ea2c829a796 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -3,19 +3,31 @@
3 * 3 *
4 * CPUFREQ governors common code 4 * CPUFREQ governors common code
5 * 5 *
6 * Copyright (C) 2001 Russell King
7 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
8 * (C) 2003 Jun Nakajima <jun.nakajima@intel.com>
9 * (C) 2009 Alexander Clouter <alex@digriz.org.uk>
10 * (c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
11 *
6 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 13 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 14 * published by the Free Software Foundation.
9 */ 15 */
10 16
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
11#include <asm/cputime.h> 19#include <asm/cputime.h>
20#include <linux/cpufreq.h>
21#include <linux/cpumask.h>
12#include <linux/export.h> 22#include <linux/export.h>
13#include <linux/kernel_stat.h> 23#include <linux/kernel_stat.h>
24#include <linux/mutex.h>
14#include <linux/tick.h> 25#include <linux/tick.h>
15#include <linux/types.h> 26#include <linux/types.h>
16/* 27#include <linux/workqueue.h>
17 * Code picked from earlier governer implementations 28
18 */ 29#include "cpufreq_governor.h"
30
19static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) 31static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
20{ 32{
21 u64 idle_time; 33 u64 idle_time;
@@ -33,9 +45,9 @@ static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
33 45
34 idle_time = cur_wall_time - busy_time; 46 idle_time = cur_wall_time - busy_time;
35 if (wall) 47 if (wall)
36 *wall = cputime_to_usecs(cur_wall_time); 48 *wall = jiffies_to_usecs(cur_wall_time);
37 49
38 return cputime_to_usecs(idle_time); 50 return jiffies_to_usecs(idle_time);
39} 51}
40 52
41cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) 53cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
@@ -50,3 +62,257 @@ cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
50 return idle_time; 62 return idle_time;
51} 63}
52EXPORT_SYMBOL_GPL(get_cpu_idle_time); 64EXPORT_SYMBOL_GPL(get_cpu_idle_time);
65
66void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
67{
68 struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu);
69 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
70 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
71 struct cpufreq_policy *policy;
72 unsigned int max_load = 0;
73 unsigned int ignore_nice;
74 unsigned int j;
75
76 if (dbs_data->governor == GOV_ONDEMAND)
77 ignore_nice = od_tuners->ignore_nice;
78 else
79 ignore_nice = cs_tuners->ignore_nice;
80
81 policy = cdbs->cur_policy;
82
83 /* Get Absolute Load (in terms of freq for ondemand gov) */
84 for_each_cpu(j, policy->cpus) {
85 struct cpu_dbs_common_info *j_cdbs;
86 cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
87 unsigned int idle_time, wall_time, iowait_time;
88 unsigned int load;
89
90 j_cdbs = dbs_data->get_cpu_cdbs(j);
91
92 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
93
94 wall_time = (unsigned int)
95 (cur_wall_time - j_cdbs->prev_cpu_wall);
96 j_cdbs->prev_cpu_wall = cur_wall_time;
97
98 idle_time = (unsigned int)
99 (cur_idle_time - j_cdbs->prev_cpu_idle);
100 j_cdbs->prev_cpu_idle = cur_idle_time;
101
102 if (ignore_nice) {
103 u64 cur_nice;
104 unsigned long cur_nice_jiffies;
105
106 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
107 cdbs->prev_cpu_nice;
108 /*
109 * Assumption: nice time between sampling periods will
110 * be less than 2^32 jiffies for 32 bit sys
111 */
112 cur_nice_jiffies = (unsigned long)
113 cputime64_to_jiffies64(cur_nice);
114
115 cdbs->prev_cpu_nice =
116 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
117 idle_time += jiffies_to_usecs(cur_nice_jiffies);
118 }
119
120 if (dbs_data->governor == GOV_ONDEMAND) {
121 struct od_cpu_dbs_info_s *od_j_dbs_info =
122 dbs_data->get_cpu_dbs_info_s(cpu);
123
124 cur_iowait_time = get_cpu_iowait_time_us(j,
125 &cur_wall_time);
126 if (cur_iowait_time == -1ULL)
127 cur_iowait_time = 0;
128
129 iowait_time = (unsigned int) (cur_iowait_time -
130 od_j_dbs_info->prev_cpu_iowait);
131 od_j_dbs_info->prev_cpu_iowait = cur_iowait_time;
132
133 /*
134 * For the purpose of ondemand, waiting for disk IO is
135 * an indication that you're performance critical, and
136 * not that the system is actually idle. So subtract the
137 * iowait time from the cpu idle time.
138 */
139 if (od_tuners->io_is_busy && idle_time >= iowait_time)
140 idle_time -= iowait_time;
141 }
142
143 if (unlikely(!wall_time || wall_time < idle_time))
144 continue;
145
146 load = 100 * (wall_time - idle_time) / wall_time;
147
148 if (dbs_data->governor == GOV_ONDEMAND) {
149 int freq_avg = __cpufreq_driver_getavg(policy, j);
150 if (freq_avg <= 0)
151 freq_avg = policy->cur;
152
153 load *= freq_avg;
154 }
155
156 if (load > max_load)
157 max_load = load;
158 }
159
160 dbs_data->gov_check_cpu(cpu, max_load);
161}
162EXPORT_SYMBOL_GPL(dbs_check_cpu);
163
164static inline void dbs_timer_init(struct dbs_data *dbs_data,
165 struct cpu_dbs_common_info *cdbs, unsigned int sampling_rate)
166{
167 int delay = delay_for_sampling_rate(sampling_rate);
168
169 INIT_DEFERRABLE_WORK(&cdbs->work, dbs_data->gov_dbs_timer);
170 schedule_delayed_work_on(cdbs->cpu, &cdbs->work, delay);
171}
172
173static inline void dbs_timer_exit(struct cpu_dbs_common_info *cdbs)
174{
175 cancel_delayed_work_sync(&cdbs->work);
176}
177
178int cpufreq_governor_dbs(struct dbs_data *dbs_data,
179 struct cpufreq_policy *policy, unsigned int event)
180{
181 struct od_cpu_dbs_info_s *od_dbs_info = NULL;
182 struct cs_cpu_dbs_info_s *cs_dbs_info = NULL;
183 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
184 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
185 struct cpu_dbs_common_info *cpu_cdbs;
186 unsigned int *sampling_rate, latency, ignore_nice, j, cpu = policy->cpu;
187 int rc;
188
189 cpu_cdbs = dbs_data->get_cpu_cdbs(cpu);
190
191 if (dbs_data->governor == GOV_CONSERVATIVE) {
192 cs_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu);
193 sampling_rate = &cs_tuners->sampling_rate;
194 ignore_nice = cs_tuners->ignore_nice;
195 } else {
196 od_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu);
197 sampling_rate = &od_tuners->sampling_rate;
198 ignore_nice = od_tuners->ignore_nice;
199 }
200
201 switch (event) {
202 case CPUFREQ_GOV_START:
203 if ((!cpu_online(cpu)) || (!policy->cur))
204 return -EINVAL;
205
206 mutex_lock(&dbs_data->mutex);
207
208 dbs_data->enable++;
209 cpu_cdbs->cpu = cpu;
210 for_each_cpu(j, policy->cpus) {
211 struct cpu_dbs_common_info *j_cdbs;
212 j_cdbs = dbs_data->get_cpu_cdbs(j);
213
214 j_cdbs->cur_policy = policy;
215 j_cdbs->prev_cpu_idle = get_cpu_idle_time(j,
216 &j_cdbs->prev_cpu_wall);
217 if (ignore_nice)
218 j_cdbs->prev_cpu_nice =
219 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
220 }
221
222 /*
223 * Start the timerschedule work, when this governor is used for
224 * first time
225 */
226 if (dbs_data->enable != 1)
227 goto second_time;
228
229 rc = sysfs_create_group(cpufreq_global_kobject,
230 dbs_data->attr_group);
231 if (rc) {
232 mutex_unlock(&dbs_data->mutex);
233 return rc;
234 }
235
236 /* policy latency is in nS. Convert it to uS first */
237 latency = policy->cpuinfo.transition_latency / 1000;
238 if (latency == 0)
239 latency = 1;
240
241 /*
242 * conservative does not implement micro like ondemand
243 * governor, thus we are bound to jiffes/HZ
244 */
245 if (dbs_data->governor == GOV_CONSERVATIVE) {
246 struct cs_ops *ops = dbs_data->gov_ops;
247
248 cpufreq_register_notifier(ops->notifier_block,
249 CPUFREQ_TRANSITION_NOTIFIER);
250
251 dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
252 jiffies_to_usecs(10);
253 } else {
254 struct od_ops *ops = dbs_data->gov_ops;
255
256 od_tuners->io_is_busy = ops->io_busy();
257 }
258
259 /* Bring kernel and HW constraints together */
260 dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
261 MIN_LATENCY_MULTIPLIER * latency);
262 *sampling_rate = max(dbs_data->min_sampling_rate, latency *
263 LATENCY_MULTIPLIER);
264
265second_time:
266 if (dbs_data->governor == GOV_CONSERVATIVE) {
267 cs_dbs_info->down_skip = 0;
268 cs_dbs_info->enable = 1;
269 cs_dbs_info->requested_freq = policy->cur;
270 } else {
271 struct od_ops *ops = dbs_data->gov_ops;
272 od_dbs_info->rate_mult = 1;
273 od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
274 ops->powersave_bias_init_cpu(cpu);
275 }
276 mutex_unlock(&dbs_data->mutex);
277
278 mutex_init(&cpu_cdbs->timer_mutex);
279 dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate);
280 break;
281
282 case CPUFREQ_GOV_STOP:
283 if (dbs_data->governor == GOV_CONSERVATIVE)
284 cs_dbs_info->enable = 0;
285
286 dbs_timer_exit(cpu_cdbs);
287
288 mutex_lock(&dbs_data->mutex);
289 mutex_destroy(&cpu_cdbs->timer_mutex);
290 dbs_data->enable--;
291 if (!dbs_data->enable) {
292 struct cs_ops *ops = dbs_data->gov_ops;
293
294 sysfs_remove_group(cpufreq_global_kobject,
295 dbs_data->attr_group);
296 if (dbs_data->governor == GOV_CONSERVATIVE)
297 cpufreq_unregister_notifier(ops->notifier_block,
298 CPUFREQ_TRANSITION_NOTIFIER);
299 }
300 mutex_unlock(&dbs_data->mutex);
301
302 break;
303
304 case CPUFREQ_GOV_LIMITS:
305 mutex_lock(&cpu_cdbs->timer_mutex);
306 if (policy->max < cpu_cdbs->cur_policy->cur)
307 __cpufreq_driver_target(cpu_cdbs->cur_policy,
308 policy->max, CPUFREQ_RELATION_H);
309 else if (policy->min > cpu_cdbs->cur_policy->cur)
310 __cpufreq_driver_target(cpu_cdbs->cur_policy,
311 policy->min, CPUFREQ_RELATION_L);
312 dbs_check_cpu(dbs_data, cpu);
313 mutex_unlock(&cpu_cdbs->timer_mutex);
314 break;
315 }
316 return 0;
317}
318EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
new file mode 100644
index 000000000000..34e14adfc3f9
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -0,0 +1,177 @@
1/*
2 * drivers/cpufreq/cpufreq_governor.h
3 *
4 * Header file for CPUFreq governors common code
5 *
6 * Copyright (C) 2001 Russell King
7 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
8 * (C) 2003 Jun Nakajima <jun.nakajima@intel.com>
9 * (C) 2009 Alexander Clouter <alex@digriz.org.uk>
10 * (c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
15 */
16
17#ifndef _CPUFREQ_GOVERNER_H
18#define _CPUFREQ_GOVERNER_H
19
20#include <asm/cputime.h>
21#include <linux/cpufreq.h>
22#include <linux/kobject.h>
23#include <linux/mutex.h>
24#include <linux/workqueue.h>
25#include <linux/sysfs.h>
26
27/*
28 * The polling frequency depends on the capability of the processor. Default
29 * polling frequency is 1000 times the transition latency of the processor. The
30 * governor will work on any processor with transition latency <= 10mS, using
31 * appropriate sampling rate.
32 *
33 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
34 * this governor will not work. All times here are in uS.
35 */
36#define MIN_SAMPLING_RATE_RATIO (2)
37#define LATENCY_MULTIPLIER (1000)
38#define MIN_LATENCY_MULTIPLIER (100)
39#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000)
40
41/* Ondemand Sampling types */
42enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
43
44/* Macro creating sysfs show routines */
45#define show_one(_gov, file_name, object) \
46static ssize_t show_##file_name \
47(struct kobject *kobj, struct attribute *attr, char *buf) \
48{ \
49 return sprintf(buf, "%u\n", _gov##_tuners.object); \
50}
51
52#define define_get_cpu_dbs_routines(_dbs_info) \
53static struct cpu_dbs_common_info *get_cpu_cdbs(int cpu) \
54{ \
55 return &per_cpu(_dbs_info, cpu).cdbs; \
56} \
57 \
58static void *get_cpu_dbs_info_s(int cpu) \
59{ \
60 return &per_cpu(_dbs_info, cpu); \
61}
62
63/*
64 * Abbreviations:
65 * dbs: used as a shortform for demand based switching It helps to keep variable
66 * names smaller, simpler
67 * cdbs: common dbs
68 * on_*: On-demand governor
69 * cs_*: Conservative governor
70 */
71
72/* Per cpu structures */
73struct cpu_dbs_common_info {
74 int cpu;
75 cputime64_t prev_cpu_idle;
76 cputime64_t prev_cpu_wall;
77 cputime64_t prev_cpu_nice;
78 struct cpufreq_policy *cur_policy;
79 struct delayed_work work;
80 /*
81 * percpu mutex that serializes governor limit change with gov_dbs_timer
82 * invocation. We do not want gov_dbs_timer to run when user is changing
83 * the governor or limits.
84 */
85 struct mutex timer_mutex;
86};
87
88struct od_cpu_dbs_info_s {
89 struct cpu_dbs_common_info cdbs;
90 cputime64_t prev_cpu_iowait;
91 struct cpufreq_frequency_table *freq_table;
92 unsigned int freq_lo;
93 unsigned int freq_lo_jiffies;
94 unsigned int freq_hi_jiffies;
95 unsigned int rate_mult;
96 unsigned int sample_type:1;
97};
98
99struct cs_cpu_dbs_info_s {
100 struct cpu_dbs_common_info cdbs;
101 unsigned int down_skip;
102 unsigned int requested_freq;
103 unsigned int enable:1;
104};
105
106/* Governers sysfs tunables */
107struct od_dbs_tuners {
108 unsigned int ignore_nice;
109 unsigned int sampling_rate;
110 unsigned int sampling_down_factor;
111 unsigned int up_threshold;
112 unsigned int down_differential;
113 unsigned int powersave_bias;
114 unsigned int io_is_busy;
115};
116
117struct cs_dbs_tuners {
118 unsigned int ignore_nice;
119 unsigned int sampling_rate;
120 unsigned int sampling_down_factor;
121 unsigned int up_threshold;
122 unsigned int down_threshold;
123 unsigned int freq_step;
124};
125
126/* Per Governer data */
127struct dbs_data {
128 /* Common across governors */
129 #define GOV_ONDEMAND 0
130 #define GOV_CONSERVATIVE 1
131 int governor;
132 unsigned int min_sampling_rate;
133 unsigned int enable; /* number of CPUs using this policy */
134 struct attribute_group *attr_group;
135 void *tuners;
136
137 /* dbs_mutex protects dbs_enable in governor start/stop */
138 struct mutex mutex;
139
140 struct cpu_dbs_common_info *(*get_cpu_cdbs)(int cpu);
141 void *(*get_cpu_dbs_info_s)(int cpu);
142 void (*gov_dbs_timer)(struct work_struct *work);
143 void (*gov_check_cpu)(int cpu, unsigned int load);
144
145 /* Governor specific ops, see below */
146 void *gov_ops;
147};
148
149/* Governor specific ops, will be passed to dbs_data->gov_ops */
150struct od_ops {
151 int (*io_busy)(void);
152 void (*powersave_bias_init_cpu)(int cpu);
153 unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy,
154 unsigned int freq_next, unsigned int relation);
155 void (*freq_increase)(struct cpufreq_policy *p, unsigned int freq);
156};
157
158struct cs_ops {
159 struct notifier_block *notifier_block;
160};
161
162static inline int delay_for_sampling_rate(unsigned int sampling_rate)
163{
164 int delay = usecs_to_jiffies(sampling_rate);
165
166 /* We want all CPUs to do sampling nearly on same jiffy */
167 if (num_online_cpus() > 1)
168 delay -= jiffies % delay;
169
170 return delay;
171}
172
173cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall);
174void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
175int cpufreq_governor_dbs(struct dbs_data *dbs_data,
176 struct cpufreq_policy *policy, unsigned int event);
177#endif /* _CPUFREQ_GOVERNER_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index d7f774bb49dd..bdaab9206303 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -10,24 +10,23 @@
10 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
11 */ 11 */
12 12
13#include <linux/kernel.h> 13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14#include <linux/module.h> 14
15#include <linux/init.h>
16#include <linux/cpufreq.h> 15#include <linux/cpufreq.h>
17#include <linux/cpu.h> 16#include <linux/init.h>
18#include <linux/jiffies.h> 17#include <linux/kernel.h>
19#include <linux/kernel_stat.h> 18#include <linux/kernel_stat.h>
19#include <linux/kobject.h>
20#include <linux/module.h>
20#include <linux/mutex.h> 21#include <linux/mutex.h>
21#include <linux/hrtimer.h> 22#include <linux/percpu-defs.h>
23#include <linux/sysfs.h>
22#include <linux/tick.h> 24#include <linux/tick.h>
23#include <linux/ktime.h> 25#include <linux/types.h>
24#include <linux/sched.h>
25 26
26/* 27#include "cpufreq_governor.h"
27 * dbs is used in this file as a shortform for demandbased switching
28 * It helps to keep variable names smaller, simpler
29 */
30 28
29/* On-demand governor macors */
31#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) 30#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10)
32#define DEF_FREQUENCY_UP_THRESHOLD (80) 31#define DEF_FREQUENCY_UP_THRESHOLD (80)
33#define DEF_SAMPLING_DOWN_FACTOR (1) 32#define DEF_SAMPLING_DOWN_FACTOR (1)
@@ -38,80 +37,10 @@
38#define MIN_FREQUENCY_UP_THRESHOLD (11) 37#define MIN_FREQUENCY_UP_THRESHOLD (11)
39#define MAX_FREQUENCY_UP_THRESHOLD (100) 38#define MAX_FREQUENCY_UP_THRESHOLD (100)
40 39
41/* 40static struct dbs_data od_dbs_data;
42 * The polling frequency of this governor depends on the capability of 41static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info);
43 * the processor. Default polling frequency is 1000 times the transition
44 * latency of the processor. The governor will work on any processor with
45 * transition latency <= 10mS, using appropriate sampling
46 * rate.
47 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
48 * this governor will not work.
49 * All times here are in uS.
50 */
51#define MIN_SAMPLING_RATE_RATIO (2)
52
53static unsigned int min_sampling_rate;
54
55#define LATENCY_MULTIPLIER (1000)
56#define MIN_LATENCY_MULTIPLIER (100)
57#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000)
58
59static void do_dbs_timer(struct work_struct *work);
60static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
61 unsigned int event);
62
63#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
64static
65#endif
66struct cpufreq_governor cpufreq_gov_ondemand = {
67 .name = "ondemand",
68 .governor = cpufreq_governor_dbs,
69 .max_transition_latency = TRANSITION_LATENCY_LIMIT,
70 .owner = THIS_MODULE,
71};
72
73/* Sampling types */
74enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
75
76struct cpu_dbs_info_s {
77 cputime64_t prev_cpu_idle;
78 cputime64_t prev_cpu_iowait;
79 cputime64_t prev_cpu_wall;
80 cputime64_t prev_cpu_nice;
81 struct cpufreq_policy *cur_policy;
82 struct delayed_work work;
83 struct cpufreq_frequency_table *freq_table;
84 unsigned int freq_lo;
85 unsigned int freq_lo_jiffies;
86 unsigned int freq_hi_jiffies;
87 unsigned int rate_mult;
88 int cpu;
89 unsigned int sample_type:1;
90 /*
91 * percpu mutex that serializes governor limit change with
92 * do_dbs_timer invocation. We do not want do_dbs_timer to run
93 * when user is changing the governor or limits.
94 */
95 struct mutex timer_mutex;
96};
97static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
98
99static unsigned int dbs_enable; /* number of CPUs using this policy */
100 42
101/* 43static struct od_dbs_tuners od_tuners = {
102 * dbs_mutex protects dbs_enable in governor start/stop.
103 */
104static DEFINE_MUTEX(dbs_mutex);
105
106static struct dbs_tuners {
107 unsigned int sampling_rate;
108 unsigned int up_threshold;
109 unsigned int down_differential;
110 unsigned int ignore_nice;
111 unsigned int sampling_down_factor;
112 unsigned int powersave_bias;
113 unsigned int io_is_busy;
114} dbs_tuners_ins = {
115 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 44 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
116 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 45 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
117 .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, 46 .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
@@ -119,14 +48,35 @@ static struct dbs_tuners {
119 .powersave_bias = 0, 48 .powersave_bias = 0,
120}; 49};
121 50
122static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall) 51static void ondemand_powersave_bias_init_cpu(int cpu)
123{ 52{
124 u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); 53 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
125 54
126 if (iowait_time == -1ULL) 55 dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
127 return 0; 56 dbs_info->freq_lo = 0;
57}
128 58
129 return iowait_time; 59/*
60 * Not all CPUs want IO time to be accounted as busy; this depends on how
61 * efficient idling at a higher frequency/voltage is.
62 * Pavel Machek says this is not so for various generations of AMD and old
63 * Intel systems.
64 * Mike Chan (androidlcom) calis this is also not true for ARM.
65 * Because of this, whitelist specific known (series) of CPUs by default, and
66 * leave all others up to the user.
67 */
68static int should_io_be_busy(void)
69{
70#if defined(CONFIG_X86)
71 /*
72 * For Intel, Core 2 (model 15) andl later have an efficient idle.
73 */
74 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
75 boot_cpu_data.x86 == 6 &&
76 boot_cpu_data.x86_model >= 15)
77 return 1;
78#endif
79 return 0;
130} 80}
131 81
132/* 82/*
@@ -135,14 +85,13 @@ static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wal
135 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. 85 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
136 */ 86 */
137static unsigned int powersave_bias_target(struct cpufreq_policy *policy, 87static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
138 unsigned int freq_next, 88 unsigned int freq_next, unsigned int relation)
139 unsigned int relation)
140{ 89{
141 unsigned int freq_req, freq_reduc, freq_avg; 90 unsigned int freq_req, freq_reduc, freq_avg;
142 unsigned int freq_hi, freq_lo; 91 unsigned int freq_hi, freq_lo;
143 unsigned int index = 0; 92 unsigned int index = 0;
144 unsigned int jiffies_total, jiffies_hi, jiffies_lo; 93 unsigned int jiffies_total, jiffies_hi, jiffies_lo;
145 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 94 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
146 policy->cpu); 95 policy->cpu);
147 96
148 if (!dbs_info->freq_table) { 97 if (!dbs_info->freq_table) {
@@ -154,7 +103,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
154 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, 103 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
155 relation, &index); 104 relation, &index);
156 freq_req = dbs_info->freq_table[index].frequency; 105 freq_req = dbs_info->freq_table[index].frequency;
157 freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; 106 freq_reduc = freq_req * od_tuners.powersave_bias / 1000;
158 freq_avg = freq_req - freq_reduc; 107 freq_avg = freq_req - freq_reduc;
159 108
160 /* Find freq bounds for freq_avg in freq_table */ 109 /* Find freq bounds for freq_avg in freq_table */
@@ -173,7 +122,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
173 dbs_info->freq_lo_jiffies = 0; 122 dbs_info->freq_lo_jiffies = 0;
174 return freq_lo; 123 return freq_lo;
175 } 124 }
176 jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 125 jiffies_total = usecs_to_jiffies(od_tuners.sampling_rate);
177 jiffies_hi = (freq_avg - freq_lo) * jiffies_total; 126 jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
178 jiffies_hi += ((freq_hi - freq_lo) / 2); 127 jiffies_hi += ((freq_hi - freq_lo) / 2);
179 jiffies_hi /= (freq_hi - freq_lo); 128 jiffies_hi /= (freq_hi - freq_lo);
@@ -184,13 +133,6 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
184 return freq_hi; 133 return freq_hi;
185} 134}
186 135
187static void ondemand_powersave_bias_init_cpu(int cpu)
188{
189 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
190 dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
191 dbs_info->freq_lo = 0;
192}
193
194static void ondemand_powersave_bias_init(void) 136static void ondemand_powersave_bias_init(void)
195{ 137{
196 int i; 138 int i;
@@ -199,53 +141,138 @@ static void ondemand_powersave_bias_init(void)
199 } 141 }
200} 142}
201 143
202/************************** sysfs interface ************************/ 144static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
145{
146 if (od_tuners.powersave_bias)
147 freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
148 else if (p->cur == p->max)
149 return;
203 150
204static ssize_t show_sampling_rate_min(struct kobject *kobj, 151 __cpufreq_driver_target(p, freq, od_tuners.powersave_bias ?
205 struct attribute *attr, char *buf) 152 CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
153}
154
155/*
156 * Every sampling_rate, we check, if current idle time is less than 20%
157 * (default), then we try to increase frequency Every sampling_rate, we look for
158 * a the lowest frequency which can sustain the load while keeping idle time
159 * over 30%. If such a frequency exist, we try to decrease to this frequency.
160 *
161 * Any frequency increase takes it to the maximum frequency. Frequency reduction
162 * happens at minimum steps of 5% (default) of current frequency
163 */
164static void od_check_cpu(int cpu, unsigned int load_freq)
206{ 165{
207 return sprintf(buf, "%u\n", min_sampling_rate); 166 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
167 struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy;
168
169 dbs_info->freq_lo = 0;
170
171 /* Check for frequency increase */
172 if (load_freq > od_tuners.up_threshold * policy->cur) {
173 /* If switching to max speed, apply sampling_down_factor */
174 if (policy->cur < policy->max)
175 dbs_info->rate_mult =
176 od_tuners.sampling_down_factor;
177 dbs_freq_increase(policy, policy->max);
178 return;
179 }
180
181 /* Check for frequency decrease */
182 /* if we cannot reduce the frequency anymore, break out early */
183 if (policy->cur == policy->min)
184 return;
185
186 /*
187 * The optimal frequency is the frequency that is the lowest that can
188 * support the current CPU usage without triggering the up policy. To be
189 * safe, we focus 10 points under the threshold.
190 */
191 if (load_freq < (od_tuners.up_threshold - od_tuners.down_differential) *
192 policy->cur) {
193 unsigned int freq_next;
194 freq_next = load_freq / (od_tuners.up_threshold -
195 od_tuners.down_differential);
196
197 /* No longer fully busy, reset rate_mult */
198 dbs_info->rate_mult = 1;
199
200 if (freq_next < policy->min)
201 freq_next = policy->min;
202
203 if (!od_tuners.powersave_bias) {
204 __cpufreq_driver_target(policy, freq_next,
205 CPUFREQ_RELATION_L);
206 } else {
207 int freq = powersave_bias_target(policy, freq_next,
208 CPUFREQ_RELATION_L);
209 __cpufreq_driver_target(policy, freq,
210 CPUFREQ_RELATION_L);
211 }
212 }
208} 213}
209 214
210define_one_global_ro(sampling_rate_min); 215static void od_dbs_timer(struct work_struct *work)
216{
217 struct od_cpu_dbs_info_s *dbs_info =
218 container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work);
219 unsigned int cpu = dbs_info->cdbs.cpu;
220 int delay, sample_type = dbs_info->sample_type;
211 221
212/* cpufreq_ondemand Governor Tunables */ 222 mutex_lock(&dbs_info->cdbs.timer_mutex);
213#define show_one(file_name, object) \ 223
214static ssize_t show_##file_name \ 224 /* Common NORMAL_SAMPLE setup */
215(struct kobject *kobj, struct attribute *attr, char *buf) \ 225 dbs_info->sample_type = OD_NORMAL_SAMPLE;
216{ \ 226 if (sample_type == OD_SUB_SAMPLE) {
217 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 227 delay = dbs_info->freq_lo_jiffies;
228 __cpufreq_driver_target(dbs_info->cdbs.cur_policy,
229 dbs_info->freq_lo, CPUFREQ_RELATION_H);
230 } else {
231 dbs_check_cpu(&od_dbs_data, cpu);
232 if (dbs_info->freq_lo) {
233 /* Setup timer for SUB_SAMPLE */
234 dbs_info->sample_type = OD_SUB_SAMPLE;
235 delay = dbs_info->freq_hi_jiffies;
236 } else {
237 delay = delay_for_sampling_rate(dbs_info->rate_mult);
238 }
239 }
240
241 schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay);
242 mutex_unlock(&dbs_info->cdbs.timer_mutex);
243}
244
245/************************** sysfs interface ************************/
246
247static ssize_t show_sampling_rate_min(struct kobject *kobj,
248 struct attribute *attr, char *buf)
249{
250 return sprintf(buf, "%u\n", od_dbs_data.min_sampling_rate);
218} 251}
219show_one(sampling_rate, sampling_rate);
220show_one(io_is_busy, io_is_busy);
221show_one(up_threshold, up_threshold);
222show_one(sampling_down_factor, sampling_down_factor);
223show_one(ignore_nice_load, ignore_nice);
224show_one(powersave_bias, powersave_bias);
225 252
226/** 253/**
227 * update_sampling_rate - update sampling rate effective immediately if needed. 254 * update_sampling_rate - update sampling rate effective immediately if needed.
228 * @new_rate: new sampling rate 255 * @new_rate: new sampling rate
229 * 256 *
230 * If new rate is smaller than the old, simply updaing 257 * If new rate is smaller than the old, simply updaing
231 * dbs_tuners_int.sampling_rate might not be appropriate. For example, 258 * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the
232 * if the original sampling_rate was 1 second and the requested new sampling 259 * original sampling_rate was 1 second and the requested new sampling rate is 10
233 * rate is 10 ms because the user needs immediate reaction from ondemand 260 * ms because the user needs immediate reaction from ondemand governor, but not
234 * governor, but not sure if higher frequency will be required or not, 261 * sure if higher frequency will be required or not, then, the governor may
235 * then, the governor may change the sampling rate too late; up to 1 second 262 * change the sampling rate too late; up to 1 second later. Thus, if we are
236 * later. Thus, if we are reducing the sampling rate, we need to make the 263 * reducing the sampling rate, we need to make the new value effective
237 * new value effective immediately. 264 * immediately.
238 */ 265 */
239static void update_sampling_rate(unsigned int new_rate) 266static void update_sampling_rate(unsigned int new_rate)
240{ 267{
241 int cpu; 268 int cpu;
242 269
243 dbs_tuners_ins.sampling_rate = new_rate 270 od_tuners.sampling_rate = new_rate = max(new_rate,
244 = max(new_rate, min_sampling_rate); 271 od_dbs_data.min_sampling_rate);
245 272
246 for_each_online_cpu(cpu) { 273 for_each_online_cpu(cpu) {
247 struct cpufreq_policy *policy; 274 struct cpufreq_policy *policy;
248 struct cpu_dbs_info_s *dbs_info; 275 struct od_cpu_dbs_info_s *dbs_info;
249 unsigned long next_sampling, appointed_at; 276 unsigned long next_sampling, appointed_at;
250 277
251 policy = cpufreq_cpu_get(cpu); 278 policy = cpufreq_cpu_get(cpu);
@@ -254,28 +281,28 @@ static void update_sampling_rate(unsigned int new_rate)
254 dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); 281 dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu);
255 cpufreq_cpu_put(policy); 282 cpufreq_cpu_put(policy);
256 283
257 mutex_lock(&dbs_info->timer_mutex); 284 mutex_lock(&dbs_info->cdbs.timer_mutex);
258 285
259 if (!delayed_work_pending(&dbs_info->work)) { 286 if (!delayed_work_pending(&dbs_info->cdbs.work)) {
260 mutex_unlock(&dbs_info->timer_mutex); 287 mutex_unlock(&dbs_info->cdbs.timer_mutex);
261 continue; 288 continue;
262 } 289 }
263 290
264 next_sampling = jiffies + usecs_to_jiffies(new_rate); 291 next_sampling = jiffies + usecs_to_jiffies(new_rate);
265 appointed_at = dbs_info->work.timer.expires; 292 appointed_at = dbs_info->cdbs.work.timer.expires;
266
267 293
268 if (time_before(next_sampling, appointed_at)) { 294 if (time_before(next_sampling, appointed_at)) {
269 295
270 mutex_unlock(&dbs_info->timer_mutex); 296 mutex_unlock(&dbs_info->cdbs.timer_mutex);
271 cancel_delayed_work_sync(&dbs_info->work); 297 cancel_delayed_work_sync(&dbs_info->cdbs.work);
272 mutex_lock(&dbs_info->timer_mutex); 298 mutex_lock(&dbs_info->cdbs.timer_mutex);
273 299
274 schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, 300 schedule_delayed_work_on(dbs_info->cdbs.cpu,
275 usecs_to_jiffies(new_rate)); 301 &dbs_info->cdbs.work,
302 usecs_to_jiffies(new_rate));
276 303
277 } 304 }
278 mutex_unlock(&dbs_info->timer_mutex); 305 mutex_unlock(&dbs_info->cdbs.timer_mutex);
279 } 306 }
280} 307}
281 308
@@ -300,7 +327,7 @@ static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b,
300 ret = sscanf(buf, "%u", &input); 327 ret = sscanf(buf, "%u", &input);
301 if (ret != 1) 328 if (ret != 1)
302 return -EINVAL; 329 return -EINVAL;
303 dbs_tuners_ins.io_is_busy = !!input; 330 od_tuners.io_is_busy = !!input;
304 return count; 331 return count;
305} 332}
306 333
@@ -315,7 +342,7 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
315 input < MIN_FREQUENCY_UP_THRESHOLD) { 342 input < MIN_FREQUENCY_UP_THRESHOLD) {
316 return -EINVAL; 343 return -EINVAL;
317 } 344 }
318 dbs_tuners_ins.up_threshold = input; 345 od_tuners.up_threshold = input;
319 return count; 346 return count;
320} 347}
321 348
@@ -328,12 +355,12 @@ static ssize_t store_sampling_down_factor(struct kobject *a,
328 355
329 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 356 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
330 return -EINVAL; 357 return -EINVAL;
331 dbs_tuners_ins.sampling_down_factor = input; 358 od_tuners.sampling_down_factor = input;
332 359
333 /* Reset down sampling multiplier in case it was active */ 360 /* Reset down sampling multiplier in case it was active */
334 for_each_online_cpu(j) { 361 for_each_online_cpu(j) {
335 struct cpu_dbs_info_s *dbs_info; 362 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
336 dbs_info = &per_cpu(od_cpu_dbs_info, j); 363 j);
337 dbs_info->rate_mult = 1; 364 dbs_info->rate_mult = 1;
338 } 365 }
339 return count; 366 return count;
@@ -354,19 +381,20 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
354 if (input > 1) 381 if (input > 1)
355 input = 1; 382 input = 1;
356 383
357 if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ 384 if (input == od_tuners.ignore_nice) { /* nothing to do */
358 return count; 385 return count;
359 } 386 }
360 dbs_tuners_ins.ignore_nice = input; 387 od_tuners.ignore_nice = input;
361 388
362 /* we need to re-evaluate prev_cpu_idle */ 389 /* we need to re-evaluate prev_cpu_idle */
363 for_each_online_cpu(j) { 390 for_each_online_cpu(j) {
364 struct cpu_dbs_info_s *dbs_info; 391 struct od_cpu_dbs_info_s *dbs_info;
365 dbs_info = &per_cpu(od_cpu_dbs_info, j); 392 dbs_info = &per_cpu(od_cpu_dbs_info, j);
366 dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 393 dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
367 &dbs_info->prev_cpu_wall); 394 &dbs_info->cdbs.prev_cpu_wall);
368 if (dbs_tuners_ins.ignore_nice) 395 if (od_tuners.ignore_nice)
369 dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 396 dbs_info->cdbs.prev_cpu_nice =
397 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
370 398
371 } 399 }
372 return count; 400 return count;
@@ -385,17 +413,25 @@ static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b,
385 if (input > 1000) 413 if (input > 1000)
386 input = 1000; 414 input = 1000;
387 415
388 dbs_tuners_ins.powersave_bias = input; 416 od_tuners.powersave_bias = input;
389 ondemand_powersave_bias_init(); 417 ondemand_powersave_bias_init();
390 return count; 418 return count;
391} 419}
392 420
421show_one(od, sampling_rate, sampling_rate);
422show_one(od, io_is_busy, io_is_busy);
423show_one(od, up_threshold, up_threshold);
424show_one(od, sampling_down_factor, sampling_down_factor);
425show_one(od, ignore_nice_load, ignore_nice);
426show_one(od, powersave_bias, powersave_bias);
427
393define_one_global_rw(sampling_rate); 428define_one_global_rw(sampling_rate);
394define_one_global_rw(io_is_busy); 429define_one_global_rw(io_is_busy);
395define_one_global_rw(up_threshold); 430define_one_global_rw(up_threshold);
396define_one_global_rw(sampling_down_factor); 431define_one_global_rw(sampling_down_factor);
397define_one_global_rw(ignore_nice_load); 432define_one_global_rw(ignore_nice_load);
398define_one_global_rw(powersave_bias); 433define_one_global_rw(powersave_bias);
434define_one_global_ro(sampling_rate_min);
399 435
400static struct attribute *dbs_attributes[] = { 436static struct attribute *dbs_attributes[] = {
401 &sampling_rate_min.attr, 437 &sampling_rate_min.attr,
@@ -408,354 +444,71 @@ static struct attribute *dbs_attributes[] = {
408 NULL 444 NULL
409}; 445};
410 446
411static struct attribute_group dbs_attr_group = { 447static struct attribute_group od_attr_group = {
412 .attrs = dbs_attributes, 448 .attrs = dbs_attributes,
413 .name = "ondemand", 449 .name = "ondemand",
414}; 450};
415 451
416/************************** sysfs end ************************/ 452/************************** sysfs end ************************/
417 453
418static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) 454define_get_cpu_dbs_routines(od_cpu_dbs_info);
419{
420 if (dbs_tuners_ins.powersave_bias)
421 freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
422 else if (p->cur == p->max)
423 return;
424
425 __cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ?
426 CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
427}
428
429static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
430{
431 unsigned int max_load_freq;
432
433 struct cpufreq_policy *policy;
434 unsigned int j;
435
436 this_dbs_info->freq_lo = 0;
437 policy = this_dbs_info->cur_policy;
438
439 /*
440 * Every sampling_rate, we check, if current idle time is less
441 * than 20% (default), then we try to increase frequency
442 * Every sampling_rate, we look for a the lowest
443 * frequency which can sustain the load while keeping idle time over
444 * 30%. If such a frequency exist, we try to decrease to this frequency.
445 *
446 * Any frequency increase takes it to the maximum frequency.
447 * Frequency reduction happens at minimum steps of
448 * 5% (default) of current frequency
449 */
450
451 /* Get Absolute Load - in terms of freq */
452 max_load_freq = 0;
453
454 for_each_cpu(j, policy->cpus) {
455 struct cpu_dbs_info_s *j_dbs_info;
456 cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
457 unsigned int idle_time, wall_time, iowait_time;
458 unsigned int load, load_freq;
459 int freq_avg;
460
461 j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
462
463 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
464 cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
465
466 wall_time = (unsigned int)
467 (cur_wall_time - j_dbs_info->prev_cpu_wall);
468 j_dbs_info->prev_cpu_wall = cur_wall_time;
469
470 idle_time = (unsigned int)
471 (cur_idle_time - j_dbs_info->prev_cpu_idle);
472 j_dbs_info->prev_cpu_idle = cur_idle_time;
473
474 iowait_time = (unsigned int)
475 (cur_iowait_time - j_dbs_info->prev_cpu_iowait);
476 j_dbs_info->prev_cpu_iowait = cur_iowait_time;
477
478 if (dbs_tuners_ins.ignore_nice) {
479 u64 cur_nice;
480 unsigned long cur_nice_jiffies;
481
482 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
483 j_dbs_info->prev_cpu_nice;
484 /*
485 * Assumption: nice time between sampling periods will
486 * be less than 2^32 jiffies for 32 bit sys
487 */
488 cur_nice_jiffies = (unsigned long)
489 cputime64_to_jiffies64(cur_nice);
490
491 j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
492 idle_time += jiffies_to_usecs(cur_nice_jiffies);
493 }
494
495 /*
496 * For the purpose of ondemand, waiting for disk IO is an
497 * indication that you're performance critical, and not that
498 * the system is actually idle. So subtract the iowait time
499 * from the cpu idle time.
500 */
501
502 if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time)
503 idle_time -= iowait_time;
504 455
505 if (unlikely(!wall_time || wall_time < idle_time)) 456static struct od_ops od_ops = {
506 continue; 457 .io_busy = should_io_be_busy,
507 458 .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu,
508 load = 100 * (wall_time - idle_time) / wall_time; 459 .powersave_bias_target = powersave_bias_target,
509 460 .freq_increase = dbs_freq_increase,
510 freq_avg = __cpufreq_driver_getavg(policy, j); 461};
511 if (freq_avg <= 0)
512 freq_avg = policy->cur;
513
514 load_freq = load * freq_avg;
515 if (load_freq > max_load_freq)
516 max_load_freq = load_freq;
517 }
518
519 /* Check for frequency increase */
520 if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
521 /* If switching to max speed, apply sampling_down_factor */
522 if (policy->cur < policy->max)
523 this_dbs_info->rate_mult =
524 dbs_tuners_ins.sampling_down_factor;
525 dbs_freq_increase(policy, policy->max);
526 return;
527 }
528
529 /* Check for frequency decrease */
530 /* if we cannot reduce the frequency anymore, break out early */
531 if (policy->cur == policy->min)
532 return;
533
534 /*
535 * The optimal frequency is the frequency that is the lowest that
536 * can support the current CPU usage without triggering the up
537 * policy. To be safe, we focus 10 points under the threshold.
538 */
539 if (max_load_freq <
540 (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
541 policy->cur) {
542 unsigned int freq_next;
543 freq_next = max_load_freq /
544 (dbs_tuners_ins.up_threshold -
545 dbs_tuners_ins.down_differential);
546
547 /* No longer fully busy, reset rate_mult */
548 this_dbs_info->rate_mult = 1;
549
550 if (freq_next < policy->min)
551 freq_next = policy->min;
552
553 if (!dbs_tuners_ins.powersave_bias) {
554 __cpufreq_driver_target(policy, freq_next,
555 CPUFREQ_RELATION_L);
556 } else {
557 int freq = powersave_bias_target(policy, freq_next,
558 CPUFREQ_RELATION_L);
559 __cpufreq_driver_target(policy, freq,
560 CPUFREQ_RELATION_L);
561 }
562 }
563}
564
565static void do_dbs_timer(struct work_struct *work)
566{
567 struct cpu_dbs_info_s *dbs_info =
568 container_of(work, struct cpu_dbs_info_s, work.work);
569 unsigned int cpu = dbs_info->cpu;
570 int sample_type = dbs_info->sample_type;
571
572 int delay;
573
574 mutex_lock(&dbs_info->timer_mutex);
575
576 /* Common NORMAL_SAMPLE setup */
577 dbs_info->sample_type = DBS_NORMAL_SAMPLE;
578 if (!dbs_tuners_ins.powersave_bias ||
579 sample_type == DBS_NORMAL_SAMPLE) {
580 dbs_check_cpu(dbs_info);
581 if (dbs_info->freq_lo) {
582 /* Setup timer for SUB_SAMPLE */
583 dbs_info->sample_type = DBS_SUB_SAMPLE;
584 delay = dbs_info->freq_hi_jiffies;
585 } else {
586 /* We want all CPUs to do sampling nearly on
587 * same jiffy
588 */
589 delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate
590 * dbs_info->rate_mult);
591
592 if (num_online_cpus() > 1)
593 delay -= jiffies % delay;
594 }
595 } else {
596 __cpufreq_driver_target(dbs_info->cur_policy,
597 dbs_info->freq_lo, CPUFREQ_RELATION_H);
598 delay = dbs_info->freq_lo_jiffies;
599 }
600 schedule_delayed_work_on(cpu, &dbs_info->work, delay);
601 mutex_unlock(&dbs_info->timer_mutex);
602}
603
604static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
605{
606 /* We want all CPUs to do sampling nearly on same jiffy */
607 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
608
609 if (num_online_cpus() > 1)
610 delay -= jiffies % delay;
611 462
612 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 463static struct dbs_data od_dbs_data = {
613 INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer); 464 .governor = GOV_ONDEMAND,
614 schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); 465 .attr_group = &od_attr_group,
615} 466 .tuners = &od_tuners,
467 .get_cpu_cdbs = get_cpu_cdbs,
468 .get_cpu_dbs_info_s = get_cpu_dbs_info_s,
469 .gov_dbs_timer = od_dbs_timer,
470 .gov_check_cpu = od_check_cpu,
471 .gov_ops = &od_ops,
472};
616 473
617static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 474static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy,
475 unsigned int event)
618{ 476{
619 cancel_delayed_work_sync(&dbs_info->work); 477 return cpufreq_governor_dbs(&od_dbs_data, policy, event);
620} 478}
621 479
622/* 480#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
623 * Not all CPUs want IO time to be accounted as busy; this dependson how 481static
624 * efficient idling at a higher frequency/voltage is.
625 * Pavel Machek says this is not so for various generations of AMD and old
626 * Intel systems.
627 * Mike Chan (androidlcom) calis this is also not true for ARM.
628 * Because of this, whitelist specific known (series) of CPUs by default, and
629 * leave all others up to the user.
630 */
631static int should_io_be_busy(void)
632{
633#if defined(CONFIG_X86)
634 /*
635 * For Intel, Core 2 (model 15) andl later have an efficient idle.
636 */
637 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
638 boot_cpu_data.x86 == 6 &&
639 boot_cpu_data.x86_model >= 15)
640 return 1;
641#endif 482#endif
642 return 0; 483struct cpufreq_governor cpufreq_gov_ondemand = {
643} 484 .name = "ondemand",
644 485 .governor = od_cpufreq_governor_dbs,
645static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 486 .max_transition_latency = TRANSITION_LATENCY_LIMIT,
646 unsigned int event) 487 .owner = THIS_MODULE,
647{ 488};
648 unsigned int cpu = policy->cpu;
649 struct cpu_dbs_info_s *this_dbs_info;
650 unsigned int j;
651 int rc;
652
653 this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
654
655 switch (event) {
656 case CPUFREQ_GOV_START:
657 if ((!cpu_online(cpu)) || (!policy->cur))
658 return -EINVAL;
659
660 mutex_lock(&dbs_mutex);
661
662 dbs_enable++;
663 for_each_cpu(j, policy->cpus) {
664 struct cpu_dbs_info_s *j_dbs_info;
665 j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
666 j_dbs_info->cur_policy = policy;
667
668 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
669 &j_dbs_info->prev_cpu_wall);
670 if (dbs_tuners_ins.ignore_nice)
671 j_dbs_info->prev_cpu_nice =
672 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
673 }
674 this_dbs_info->cpu = cpu;
675 this_dbs_info->rate_mult = 1;
676 ondemand_powersave_bias_init_cpu(cpu);
677 /*
678 * Start the timerschedule work, when this governor
679 * is used for first time
680 */
681 if (dbs_enable == 1) {
682 unsigned int latency;
683
684 rc = sysfs_create_group(cpufreq_global_kobject,
685 &dbs_attr_group);
686 if (rc) {
687 mutex_unlock(&dbs_mutex);
688 return rc;
689 }
690
691 /* policy latency is in nS. Convert it to uS first */
692 latency = policy->cpuinfo.transition_latency / 1000;
693 if (latency == 0)
694 latency = 1;
695 /* Bring kernel and HW constraints together */
696 min_sampling_rate = max(min_sampling_rate,
697 MIN_LATENCY_MULTIPLIER * latency);
698 dbs_tuners_ins.sampling_rate =
699 max(min_sampling_rate,
700 latency * LATENCY_MULTIPLIER);
701 dbs_tuners_ins.io_is_busy = should_io_be_busy();
702 }
703 mutex_unlock(&dbs_mutex);
704
705 mutex_init(&this_dbs_info->timer_mutex);
706 dbs_timer_init(this_dbs_info);
707 break;
708
709 case CPUFREQ_GOV_STOP:
710 dbs_timer_exit(this_dbs_info);
711
712 mutex_lock(&dbs_mutex);
713 mutex_destroy(&this_dbs_info->timer_mutex);
714 dbs_enable--;
715 mutex_unlock(&dbs_mutex);
716 if (!dbs_enable)
717 sysfs_remove_group(cpufreq_global_kobject,
718 &dbs_attr_group);
719
720 break;
721
722 case CPUFREQ_GOV_LIMITS:
723 mutex_lock(&this_dbs_info->timer_mutex);
724 if (policy->max < this_dbs_info->cur_policy->cur)
725 __cpufreq_driver_target(this_dbs_info->cur_policy,
726 policy->max, CPUFREQ_RELATION_H);
727 else if (policy->min > this_dbs_info->cur_policy->cur)
728 __cpufreq_driver_target(this_dbs_info->cur_policy,
729 policy->min, CPUFREQ_RELATION_L);
730 dbs_check_cpu(this_dbs_info);
731 mutex_unlock(&this_dbs_info->timer_mutex);
732 break;
733 }
734 return 0;
735}
736 489
737static int __init cpufreq_gov_dbs_init(void) 490static int __init cpufreq_gov_dbs_init(void)
738{ 491{
739 u64 idle_time; 492 u64 idle_time;
740 int cpu = get_cpu(); 493 int cpu = get_cpu();
741 494
495 mutex_init(&od_dbs_data.mutex);
742 idle_time = get_cpu_idle_time_us(cpu, NULL); 496 idle_time = get_cpu_idle_time_us(cpu, NULL);
743 put_cpu(); 497 put_cpu();
744 if (idle_time != -1ULL) { 498 if (idle_time != -1ULL) {
745 /* Idle micro accounting is supported. Use finer thresholds */ 499 /* Idle micro accounting is supported. Use finer thresholds */
746 dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; 500 od_tuners.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
747 dbs_tuners_ins.down_differential = 501 od_tuners.down_differential = MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
748 MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
749 /* 502 /*
750 * In nohz/micro accounting case we set the minimum frequency 503 * In nohz/micro accounting case we set the minimum frequency
751 * not depending on HZ, but fixed (very low). The deferred 504 * not depending on HZ, but fixed (very low). The deferred
752 * timer might skip some samples if idle/sleeping as needed. 505 * timer might skip some samples if idle/sleeping as needed.
753 */ 506 */
754 min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; 507 od_dbs_data.min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
755 } else { 508 } else {
756 /* For correct statistics, we need 10 ticks for each measure */ 509 /* For correct statistics, we need 10 ticks for each measure */
757 min_sampling_rate = 510 od_dbs_data.min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
758 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); 511 jiffies_to_usecs(10);
759 } 512 }
760 513
761 return cpufreq_register_governor(&cpufreq_gov_ondemand); 514 return cpufreq_register_governor(&cpufreq_gov_ondemand);
@@ -766,7 +519,6 @@ static void __exit cpufreq_gov_dbs_exit(void)
766 cpufreq_unregister_governor(&cpufreq_gov_ondemand); 519 cpufreq_unregister_governor(&cpufreq_gov_ondemand);
767} 520}
768 521
769
770MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); 522MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
771MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>"); 523MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
772MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " 524MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "