aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/cpufreq/cpufreq_conservative.c
diff options
context:
space:
mode:
authorDave Jones <davej@redhat.com>2005-05-31 22:03:47 -0400
committerDave Jones <davej@redhat.com>2005-05-31 22:03:47 -0400
commitb9170836d1aa4ded7cc1ac1cb8fbc7867061c98c (patch)
tree87fbac643c392c8ba2459158f78671c356e8dd4a /drivers/cpufreq/cpufreq_conservative.c
parentb53cc6ead046093477ec7a3354d620337101ea5b (diff)
[CPUFREQ] Conservative cpufreq governer
A new cpufreq module, based on the ondemand one with my additional patches just posted. This one is more suitable for battery environments where its probably more appealing to have the cpu freq gracefully increase and decrease rather than flip between the min and max freq's. N.B. Bruno Ducrot pointed out that the amd64's "do have unacceptable latency between min and max freq transition, due to the step-by-step requirements (200MHz IIRC)"; so AMD64 users would probably benefit from this too. Signed-off-by: Alexander Clouter <alex-kernel@digriz.org.uk> Signed-off-by: Dave Jones <davej@redhat.com>
Diffstat (limited to 'drivers/cpufreq/cpufreq_conservative.c')
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c613
1 files changed, 613 insertions, 0 deletions
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
new file mode 100644
index 000000000000..dd2f5b272a4d
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -0,0 +1,613 @@
1/*
2 * drivers/cpufreq/cpufreq_conservative.c
3 *
4 * Copyright (C) 2001 Russell King
5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6 * Jun Nakajima <jun.nakajima@intel.com>
7 * (C) 2004 Alexander Clouter <alex-kernel@digriz.org.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/smp.h>
17#include <linux/init.h>
18#include <linux/interrupt.h>
19#include <linux/ctype.h>
20#include <linux/cpufreq.h>
21#include <linux/sysctl.h>
22#include <linux/types.h>
23#include <linux/fs.h>
24#include <linux/sysfs.h>
25#include <linux/sched.h>
26#include <linux/kmod.h>
27#include <linux/workqueue.h>
28#include <linux/jiffies.h>
29#include <linux/kernel_stat.h>
30#include <linux/percpu.h>
31
32/*
33 * dbs is used in this file as a shortform for demandbased switching
34 * It helps to keep variable names smaller, simpler
35 */
36
37#define DEF_FREQUENCY_UP_THRESHOLD (80)
38#define MIN_FREQUENCY_UP_THRESHOLD (0)
39#define MAX_FREQUENCY_UP_THRESHOLD (100)
40
41#define DEF_FREQUENCY_DOWN_THRESHOLD (20)
42#define MIN_FREQUENCY_DOWN_THRESHOLD (0)
43#define MAX_FREQUENCY_DOWN_THRESHOLD (100)
44
45/*
46 * The polling frequency of this governor depends on the capability of
47 * the processor. Default polling frequency is 1000 times the transition
48 * latency of the processor. The governor will work on any processor with
49 * transition latency <= 10mS, using appropriate sampling
50 * rate.
51 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
52 * this governor will not work.
53 * All times here are in uS.
54 */
55static unsigned int def_sampling_rate;
56#define MIN_SAMPLING_RATE (def_sampling_rate / 2)
57#define MAX_SAMPLING_RATE (500 * def_sampling_rate)
58#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (100000)
59#define DEF_SAMPLING_DOWN_FACTOR (5)
60#define TRANSITION_LATENCY_LIMIT (10 * 1000)
61
62static void do_dbs_timer(void *data);
63
64struct cpu_dbs_info_s {
65 struct cpufreq_policy *cur_policy;
66 unsigned int prev_cpu_idle_up;
67 unsigned int prev_cpu_idle_down;
68 unsigned int enable;
69};
70static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
71
72static unsigned int dbs_enable; /* number of CPUs using this policy */
73
74static DECLARE_MUTEX (dbs_sem);
75static DECLARE_WORK (dbs_work, do_dbs_timer, NULL);
76
77struct dbs_tuners {
78 unsigned int sampling_rate;
79 unsigned int sampling_down_factor;
80 unsigned int up_threshold;
81 unsigned int down_threshold;
82 unsigned int ignore_nice;
83 unsigned int freq_step;
84};
85
86static struct dbs_tuners dbs_tuners_ins = {
87 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
88 .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD,
89 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
90};
91
92/************************** sysfs interface ************************/
93static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
94{
95 return sprintf (buf, "%u\n", MAX_SAMPLING_RATE);
96}
97
98static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf)
99{
100 return sprintf (buf, "%u\n", MIN_SAMPLING_RATE);
101}
102
103#define define_one_ro(_name) \
104static struct freq_attr _name = \
105__ATTR(_name, 0444, show_##_name, NULL)
106
107define_one_ro(sampling_rate_max);
108define_one_ro(sampling_rate_min);
109
110/* cpufreq_conservative Governor Tunables */
111#define show_one(file_name, object) \
112static ssize_t show_##file_name \
113(struct cpufreq_policy *unused, char *buf) \
114{ \
115 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \
116}
117show_one(sampling_rate, sampling_rate);
118show_one(sampling_down_factor, sampling_down_factor);
119show_one(up_threshold, up_threshold);
120show_one(down_threshold, down_threshold);
121show_one(ignore_nice, ignore_nice);
122show_one(freq_step, freq_step);
123
124static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
125 const char *buf, size_t count)
126{
127 unsigned int input;
128 int ret;
129 ret = sscanf (buf, "%u", &input);
130 if (ret != 1 )
131 return -EINVAL;
132
133 down(&dbs_sem);
134 dbs_tuners_ins.sampling_down_factor = input;
135 up(&dbs_sem);
136
137 return count;
138}
139
140static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
141 const char *buf, size_t count)
142{
143 unsigned int input;
144 int ret;
145 ret = sscanf (buf, "%u", &input);
146
147 down(&dbs_sem);
148 if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) {
149 up(&dbs_sem);
150 return -EINVAL;
151 }
152
153 dbs_tuners_ins.sampling_rate = input;
154 up(&dbs_sem);
155
156 return count;
157}
158
159static ssize_t store_up_threshold(struct cpufreq_policy *unused,
160 const char *buf, size_t count)
161{
162 unsigned int input;
163 int ret;
164 ret = sscanf (buf, "%u", &input);
165
166 down(&dbs_sem);
167 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
168 input < MIN_FREQUENCY_UP_THRESHOLD ||
169 input <= dbs_tuners_ins.down_threshold) {
170 up(&dbs_sem);
171 return -EINVAL;
172 }
173
174 dbs_tuners_ins.up_threshold = input;
175 up(&dbs_sem);
176
177 return count;
178}
179
180static ssize_t store_down_threshold(struct cpufreq_policy *unused,
181 const char *buf, size_t count)
182{
183 unsigned int input;
184 int ret;
185 ret = sscanf (buf, "%u", &input);
186
187 down(&dbs_sem);
188 if (ret != 1 || input > MAX_FREQUENCY_DOWN_THRESHOLD ||
189 input < MIN_FREQUENCY_DOWN_THRESHOLD ||
190 input >= dbs_tuners_ins.up_threshold) {
191 up(&dbs_sem);
192 return -EINVAL;
193 }
194
195 dbs_tuners_ins.down_threshold = input;
196 up(&dbs_sem);
197
198 return count;
199}
200
201static ssize_t store_ignore_nice(struct cpufreq_policy *policy,
202 const char *buf, size_t count)
203{
204 unsigned int input;
205 int ret;
206
207 unsigned int j;
208
209 ret = sscanf (buf, "%u", &input);
210 if ( ret != 1 )
211 return -EINVAL;
212
213 if ( input > 1 )
214 input = 1;
215
216 down(&dbs_sem);
217 if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */
218 up(&dbs_sem);
219 return count;
220 }
221 dbs_tuners_ins.ignore_nice = input;
222
223 /* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */
224 for_each_cpu_mask(j, policy->cpus) {
225 struct cpu_dbs_info_s *j_dbs_info;
226 j_dbs_info = &per_cpu(cpu_dbs_info, j);
227 j_dbs_info->cur_policy = policy;
228
229 j_dbs_info->prev_cpu_idle_up =
230 kstat_cpu(j).cpustat.idle +
231 kstat_cpu(j).cpustat.iowait +
232 ( !dbs_tuners_ins.ignore_nice
233 ? kstat_cpu(j).cpustat.nice : 0 );
234 j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up;
235 }
236 up(&dbs_sem);
237
238 return count;
239}
240
241static ssize_t store_freq_step(struct cpufreq_policy *policy,
242 const char *buf, size_t count)
243{
244 unsigned int input;
245 int ret;
246
247 ret = sscanf (buf, "%u", &input);
248
249 if ( ret != 1 )
250 return -EINVAL;
251
252 if ( input > 100 )
253 input = 100;
254
255 /* no need to test here if freq_step is zero as the user might actually
256 * want this, they would be crazy though :) */
257 down(&dbs_sem);
258 dbs_tuners_ins.freq_step = input;
259 up(&dbs_sem);
260
261 return count;
262}
263
264#define define_one_rw(_name) \
265static struct freq_attr _name = \
266__ATTR(_name, 0644, show_##_name, store_##_name)
267
268define_one_rw(sampling_rate);
269define_one_rw(sampling_down_factor);
270define_one_rw(up_threshold);
271define_one_rw(down_threshold);
272define_one_rw(ignore_nice);
273define_one_rw(freq_step);
274
275static struct attribute * dbs_attributes[] = {
276 &sampling_rate_max.attr,
277 &sampling_rate_min.attr,
278 &sampling_rate.attr,
279 &sampling_down_factor.attr,
280 &up_threshold.attr,
281 &down_threshold.attr,
282 &ignore_nice.attr,
283 &freq_step.attr,
284 NULL
285};
286
287static struct attribute_group dbs_attr_group = {
288 .attrs = dbs_attributes,
289 .name = "conservative",
290};
291
292/************************** sysfs end ************************/
293
294static void dbs_check_cpu(int cpu)
295{
296 unsigned int idle_ticks, up_idle_ticks, down_idle_ticks;
297 unsigned int total_idle_ticks;
298 unsigned int freq_step;
299 unsigned int freq_down_sampling_rate;
300 static int down_skip[NR_CPUS];
301 static int requested_freq[NR_CPUS];
302 static unsigned short init_flag = 0;
303 struct cpu_dbs_info_s *this_dbs_info;
304 struct cpu_dbs_info_s *dbs_info;
305
306 struct cpufreq_policy *policy;
307 unsigned int j;
308
309 this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
310 if (!this_dbs_info->enable)
311 return;
312
313 policy = this_dbs_info->cur_policy;
314
315 if ( init_flag == 0 ) {
316 for ( /* NULL */; init_flag < NR_CPUS; init_flag++ ) {
317 dbs_info = &per_cpu(cpu_dbs_info, init_flag);
318 requested_freq[cpu] = dbs_info->cur_policy->cur;
319 }
320 init_flag = 1;
321 }
322
323 /*
324 * The default safe range is 20% to 80%
325 * Every sampling_rate, we check
326 * - If current idle time is less than 20%, then we try to
327 * increase frequency
328 * Every sampling_rate*sampling_down_factor, we check
329 * - If current idle time is more than 80%, then we try to
330 * decrease frequency
331 *
332 * Any frequency increase takes it to the maximum frequency.
333 * Frequency reduction happens at minimum steps of
334 * 5% (default) of max_frequency
335 */
336
337 /* Check for frequency increase */
338 total_idle_ticks = kstat_cpu(cpu).cpustat.idle +
339 kstat_cpu(cpu).cpustat.iowait;
340 /* consider 'nice' tasks as 'idle' time too if required */
341 if (dbs_tuners_ins.ignore_nice == 0)
342 total_idle_ticks += kstat_cpu(cpu).cpustat.nice;
343 idle_ticks = total_idle_ticks -
344 this_dbs_info->prev_cpu_idle_up;
345 this_dbs_info->prev_cpu_idle_up = total_idle_ticks;
346
347
348 for_each_cpu_mask(j, policy->cpus) {
349 unsigned int tmp_idle_ticks;
350 struct cpu_dbs_info_s *j_dbs_info;
351
352 if (j == cpu)
353 continue;
354
355 j_dbs_info = &per_cpu(cpu_dbs_info, j);
356 /* Check for frequency increase */
357 total_idle_ticks = kstat_cpu(j).cpustat.idle +
358 kstat_cpu(j).cpustat.iowait;
359 /* consider 'nice' too? */
360 if (dbs_tuners_ins.ignore_nice == 0)
361 total_idle_ticks += kstat_cpu(j).cpustat.nice;
362 tmp_idle_ticks = total_idle_ticks -
363 j_dbs_info->prev_cpu_idle_up;
364 j_dbs_info->prev_cpu_idle_up = total_idle_ticks;
365
366 if (tmp_idle_ticks < idle_ticks)
367 idle_ticks = tmp_idle_ticks;
368 }
369
370 /* Scale idle ticks by 100 and compare with up and down ticks */
371 idle_ticks *= 100;
372 up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) *
373 usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
374
375 if (idle_ticks < up_idle_ticks) {
376 /* if we are already at full speed then break out early */
377 if (requested_freq[cpu] == policy->max)
378 return;
379
380 freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100;
381
382 /* max freq cannot be less than 100. But who knows.... */
383 if (unlikely(freq_step == 0))
384 freq_step = 5;
385
386 requested_freq[cpu] += freq_step;
387 if (requested_freq[cpu] > policy->max)
388 requested_freq[cpu] = policy->max;
389
390 __cpufreq_driver_target(policy, requested_freq[cpu],
391 CPUFREQ_RELATION_H);
392 down_skip[cpu] = 0;
393 this_dbs_info->prev_cpu_idle_down = total_idle_ticks;
394 return;
395 }
396
397 /* Check for frequency decrease */
398 down_skip[cpu]++;
399 if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor)
400 return;
401
402 total_idle_ticks = kstat_cpu(cpu).cpustat.idle +
403 kstat_cpu(cpu).cpustat.iowait;
404 /* consider 'nice' too? */
405 if (dbs_tuners_ins.ignore_nice == 0)
406 total_idle_ticks += kstat_cpu(cpu).cpustat.nice;
407 idle_ticks = total_idle_ticks -
408 this_dbs_info->prev_cpu_idle_down;
409 this_dbs_info->prev_cpu_idle_down = total_idle_ticks;
410
411 for_each_cpu_mask(j, policy->cpus) {
412 unsigned int tmp_idle_ticks;
413 struct cpu_dbs_info_s *j_dbs_info;
414
415 if (j == cpu)
416 continue;
417
418 j_dbs_info = &per_cpu(cpu_dbs_info, j);
419 /* Check for frequency increase */
420 total_idle_ticks = kstat_cpu(j).cpustat.idle +
421 kstat_cpu(j).cpustat.iowait;
422 /* consider 'nice' too? */
423 if (dbs_tuners_ins.ignore_nice == 0)
424 total_idle_ticks += kstat_cpu(j).cpustat.nice;
425 tmp_idle_ticks = total_idle_ticks -
426 j_dbs_info->prev_cpu_idle_down;
427 j_dbs_info->prev_cpu_idle_down = total_idle_ticks;
428
429 if (tmp_idle_ticks < idle_ticks)
430 idle_ticks = tmp_idle_ticks;
431 }
432
433 /* Scale idle ticks by 100 and compare with up and down ticks */
434 idle_ticks *= 100;
435 down_skip[cpu] = 0;
436
437 freq_down_sampling_rate = dbs_tuners_ins.sampling_rate *
438 dbs_tuners_ins.sampling_down_factor;
439 down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) *
440 usecs_to_jiffies(freq_down_sampling_rate);
441
442 if (idle_ticks > down_idle_ticks ) {
443 /* if we are already at the lowest speed then break out early
444 * or if we 'cannot' reduce the speed as the user might want
445 * freq_step to be zero */
446 if (requested_freq[cpu] == policy->min
447 || dbs_tuners_ins.freq_step == 0)
448 return;
449
450 freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100;
451
452 /* max freq cannot be less than 100. But who knows.... */
453 if (unlikely(freq_step == 0))
454 freq_step = 5;
455
456 requested_freq[cpu] -= freq_step;
457 if (requested_freq[cpu] < policy->min)
458 requested_freq[cpu] = policy->min;
459
460 __cpufreq_driver_target(policy,
461 requested_freq[cpu],
462 CPUFREQ_RELATION_H);
463 return;
464 }
465}
466
467static void do_dbs_timer(void *data)
468{
469 int i;
470 down(&dbs_sem);
471 for_each_online_cpu(i)
472 dbs_check_cpu(i);
473 schedule_delayed_work(&dbs_work,
474 usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
475 up(&dbs_sem);
476}
477
478static inline void dbs_timer_init(void)
479{
480 INIT_WORK(&dbs_work, do_dbs_timer, NULL);
481 schedule_delayed_work(&dbs_work,
482 usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
483 return;
484}
485
486static inline void dbs_timer_exit(void)
487{
488 cancel_delayed_work(&dbs_work);
489 return;
490}
491
492static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
493 unsigned int event)
494{
495 unsigned int cpu = policy->cpu;
496 struct cpu_dbs_info_s *this_dbs_info;
497 unsigned int j;
498
499 this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
500
501 switch (event) {
502 case CPUFREQ_GOV_START:
503 if ((!cpu_online(cpu)) ||
504 (!policy->cur))
505 return -EINVAL;
506
507 if (policy->cpuinfo.transition_latency >
508 (TRANSITION_LATENCY_LIMIT * 1000))
509 return -EINVAL;
510 if (this_dbs_info->enable) /* Already enabled */
511 break;
512
513 down(&dbs_sem);
514 for_each_cpu_mask(j, policy->cpus) {
515 struct cpu_dbs_info_s *j_dbs_info;
516 j_dbs_info = &per_cpu(cpu_dbs_info, j);
517 j_dbs_info->cur_policy = policy;
518
519 j_dbs_info->prev_cpu_idle_up =
520 kstat_cpu(j).cpustat.idle +
521 kstat_cpu(j).cpustat.iowait +
522 ( !dbs_tuners_ins.ignore_nice
523 ? kstat_cpu(j).cpustat.nice : 0 );
524 j_dbs_info->prev_cpu_idle_down
525 = j_dbs_info->prev_cpu_idle_up;
526 }
527 this_dbs_info->enable = 1;
528 sysfs_create_group(&policy->kobj, &dbs_attr_group);
529 dbs_enable++;
530 /*
531 * Start the timerschedule work, when this governor
532 * is used for first time
533 */
534 if (dbs_enable == 1) {
535 unsigned int latency;
536 /* policy latency is in nS. Convert it to uS first */
537
538 latency = policy->cpuinfo.transition_latency;
539 if (latency < 1000)
540 latency = 1000;
541
542 def_sampling_rate = (latency / 1000) *
543 DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
544 dbs_tuners_ins.sampling_rate = def_sampling_rate;
545 dbs_tuners_ins.ignore_nice = 0;
546 dbs_tuners_ins.freq_step = 5;
547
548 dbs_timer_init();
549 }
550
551 up(&dbs_sem);
552 break;
553
554 case CPUFREQ_GOV_STOP:
555 down(&dbs_sem);
556 this_dbs_info->enable = 0;
557 sysfs_remove_group(&policy->kobj, &dbs_attr_group);
558 dbs_enable--;
559 /*
560 * Stop the timerschedule work, when this governor
561 * is used for first time
562 */
563 if (dbs_enable == 0)
564 dbs_timer_exit();
565
566 up(&dbs_sem);
567
568 break;
569
570 case CPUFREQ_GOV_LIMITS:
571 down(&dbs_sem);
572 if (policy->max < this_dbs_info->cur_policy->cur)
573 __cpufreq_driver_target(
574 this_dbs_info->cur_policy,
575 policy->max, CPUFREQ_RELATION_H);
576 else if (policy->min > this_dbs_info->cur_policy->cur)
577 __cpufreq_driver_target(
578 this_dbs_info->cur_policy,
579 policy->min, CPUFREQ_RELATION_L);
580 up(&dbs_sem);
581 break;
582 }
583 return 0;
584}
585
586static struct cpufreq_governor cpufreq_gov_dbs = {
587 .name = "conservative",
588 .governor = cpufreq_governor_dbs,
589 .owner = THIS_MODULE,
590};
591
592static int __init cpufreq_gov_dbs_init(void)
593{
594 return cpufreq_register_governor(&cpufreq_gov_dbs);
595}
596
597static void __exit cpufreq_gov_dbs_exit(void)
598{
599 /* Make sure that the scheduled work is indeed not running */
600 flush_scheduled_work();
601
602 cpufreq_unregister_governor(&cpufreq_gov_dbs);
603}
604
605
606MODULE_AUTHOR ("Alexander Clouter <alex-kernel@digriz.org.uk>");
607MODULE_DESCRIPTION ("'cpufreq_conservative' - A dynamic cpufreq governor for "
608 "Low Latency Frequency Transition capable processors "
609 "optimised for use in a battery environment");
610MODULE_LICENSE ("GPL");
611
612module_init(cpufreq_gov_dbs_init);
613module_exit(cpufreq_gov_dbs_exit);