aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnson Huang <b20788@freescale.com>2013-07-30 14:29:26 -0400
committerNitin Garg <nitin.garg@freescale.com>2014-04-16 09:00:47 -0400
commitdba66b56b70741c7b8e950d952b7580a9cf7c3df (patch)
treee1ee9d39c77e2d70402e3f36a4b47cba6e039968
parent3b1087692afbf476df0fcc68cb24405568ac36e9 (diff)
ENGR00273073-2 cpufreq: add interactive governor for cpufreq
cpufreq: interactive: New 'interactive' governor This governor is designed for latency-sensitive workloads, such as interactive user interfaces. The interactive governor aims to be significantly more responsive to ramp CPU quickly up when CPU-intensive activity begins. Existing governors sample CPU load at a particular rate, typically every X ms. This can lead to under-powering UI threads for the period of time during which the user begins interacting with a previously-idle system until the next sample period happens. The 'interactive' governor uses a different approach. Instead of sampling the CPU at a specified rate, the governor will check whether to scale the CPU frequency up soon after coming out of idle. When the CPU comes out of idle, a timer is configured to fire within 1-2 ticks. If the CPU is very busy from exiting idle to when the timer fires then we assume the CPU is underpowered and ramp to MAX speed. If the CPU was not sufficiently busy to immediately ramp to MAX speed, then the governor evaluates the CPU load since the last speed adjustment, choosing the highest value between that longer-term load or the short-term load since idle exit to determine the CPU speed to ramp to. A realtime thread is used for scaling up, giving the remaining tasks the CPU performance benefit, unlike existing governors which are more likely to schedule rampup work to occur after your performance starved tasks have completed. The tuneables for this governor are: /sys/devices/system/cpu/cpufreq/interactive/min_sample_time: The minimum amount of time to spend at the current frequency before ramping down. This is to ensure that the governor has seen enough historic CPU load data to determine the appropriate workload. /sys/devices/system/cpu/cpufreq/interactive/go_maxspeed_load The CPU load at which to ramp to max speed. Signed-off-by: Mike Chan <mike at android.com> Signed-off-by: Todd Poynor <toddpoynor at google.com> Signed-off-by: Allen Martin <amartin at nvidia.com> (submitted improvements) Signed-off-by: Axel Haslam <axelhaslam at ti.com> (submitted improvements) Signed-off-by: Anton Vorontsov <anton.vorontsov at linaro.org> Signed-off-by: Anson Huang <b20788@freescale.com>
-rw-r--r--drivers/cpufreq/Kconfig29
-rw-r--r--drivers/cpufreq/Makefile1
-rw-r--r--drivers/cpufreq/cpufreq_interactive.c705
-rw-r--r--include/linux/cpufreq.h3
4 files changed, 738 insertions, 0 deletions
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 534fcb825153..4fe6bd38c116 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -102,6 +102,18 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
102 Be aware that not all cpufreq drivers support the conservative 102 Be aware that not all cpufreq drivers support the conservative
103 governor. If unsure have a look at the help section of the 103 governor. If unsure have a look at the help section of the
104 driver. Fallback governor will be the performance governor. 104 driver. Fallback governor will be the performance governor.
105
106config CPU_FREQ_DEFAULT_GOV_INTERACTIVE
107 bool "interactive"
108 select CPU_FREQ_GOV_INTERACTIVE
109 select CPU_FREQ_GOV_PERFORMANCE
110 help
111 Use the CPUFreq governor 'interactive' as default. This allows
112 you to get a full dynamic cpu frequency capable system by simply
113 loading your cpufreq low-level hardware driver, using the
114 'interactive' governor for latency-sensitive workloads. Fallback
115 governor will be the performance governor.
116
105endchoice 117endchoice
106 118
107config CPU_FREQ_GOV_PERFORMANCE 119config CPU_FREQ_GOV_PERFORMANCE
@@ -184,6 +196,23 @@ config CPU_FREQ_GOV_CONSERVATIVE
184 196
185 If in doubt, say N. 197 If in doubt, say N.
186 198
199config CPU_FREQ_GOV_INTERACTIVE
200 tristate "'interactive' cpufreq policy governor"
201 help
202 'interactive' - This driver adds a dynamic cpufreq policy governor
203 designed for latency-sensitive workloads.
204
205 This governor attempts to reduce the latency of clock
206 increases so that the system is more responsive to
207 interactive workloads.
208
209 To compile this driver as a module, choose M here: the
210 module will be called cpufreq_interactive.
211
212 For details, take a look at linux/Documentation/cpu-freq.
213
214 If in doubt, say N.
215
187config GENERIC_CPUFREQ_CPU0 216config GENERIC_CPUFREQ_CPU0
188 tristate "Generic CPU0 cpufreq driver" 217 tristate "Generic CPU0 cpufreq driver"
189 depends on HAVE_CLK && REGULATOR && PM_OPP && OF 218 depends on HAVE_CLK && REGULATOR && PM_OPP && OF
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 315b9231feb1..49e83b62900d 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o
9obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o 9obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o
10obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o 10obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o
11obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o 11obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o
12obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o
12obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o 13obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o
13 14
14# CPUfreq cross-arch helpers 15# CPUfreq cross-arch helpers
diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c
new file mode 100644
index 000000000000..9a6f64f56962
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_interactive.c
@@ -0,0 +1,705 @@
1/*
2 * drivers/cpufreq/cpufreq_interactive.c
3 *
4 * Copyright (C) 2010 Google, Inc.
5 * Copyright (C) 2012-2013 Freescale Semiconductor, Inc.
6 *
7 * This software is licensed under the terms of the GNU General Public
8 * License version 2, as published by the Free Software Foundation, and
9 * may be copied, distributed, and modified under those terms.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * Author: Mike Chan (mike@android.com)
17 *
18 */
19
20#include <linux/cpu.h>
21#include <linux/cpumask.h>
22#include <linux/cpufreq.h>
23#include <linux/mutex.h>
24#include <linux/sched.h>
25#include <linux/tick.h>
26#include <linux/time.h>
27#include <linux/timer.h>
28#include <linux/workqueue.h>
29#include <linux/kthread.h>
30#include <linux/mutex.h>
31#include <linux/kernel_stat.h>
32#include <linux/module.h>
33#include <asm/cputime.h>
34
35static atomic_t active_count = ATOMIC_INIT(0);
36
37struct cpufreq_interactive_cpuinfo {
38 struct timer_list cpu_timer;
39 int timer_idlecancel;
40 u64 time_in_idle;
41 u64 idle_exit_time;
42 u64 timer_run_time;
43 int idling;
44 u64 freq_change_time;
45 u64 freq_change_time_in_idle;
46 struct cpufreq_policy *policy;
47 struct cpufreq_frequency_table *freq_table;
48 unsigned int target_freq;
49 int governor_enabled;
50};
51
52static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
53
54/* Workqueues handle frequency scaling */
55static struct task_struct *up_task;
56static struct workqueue_struct *down_wq;
57static struct work_struct freq_scale_down_work;
58static cpumask_t up_cpumask;
59static spinlock_t up_cpumask_lock;
60static cpumask_t down_cpumask;
61static spinlock_t down_cpumask_lock;
62static struct mutex set_speed_lock;
63
64/* Hi speed to bump to from lo speed when load burst (default max) */
65static u64 hispeed_freq;
66
67/* Go to hi speed when CPU load at or above this value. */
68#define DEFAULT_GO_HISPEED_LOAD 95
69static unsigned long go_hispeed_load;
70
71/*
72 * The minimum amount of time to spend at a frequency before we can ramp down.
73 */
74#define DEFAULT_MIN_SAMPLE_TIME (20 * USEC_PER_MSEC)
75static unsigned long min_sample_time;
76
77/*
78 * The sample rate of the timer used to increase frequency
79 */
80#define DEFAULT_TIMER_RATE (50 * USEC_PER_MSEC)
81#define CPUFREQ_IRQ_LEN 60
82#define CPUFREQ_NOTE_LEN 120
83static unsigned long timer_rate;
84
85static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
86 unsigned int event);
87
88#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
89static
90#endif
91struct cpufreq_governor cpufreq_gov_interactive = {
92 .name = "interactive",
93 .governor = cpufreq_governor_interactive,
94 .max_transition_latency = 10000000,
95 .owner = THIS_MODULE,
96};
97
98static void cpufreq_interactive_timer(unsigned long data)
99{
100 unsigned int delta_idle;
101 unsigned int delta_time;
102 int cpu_load;
103 int load_since_change;
104 u64 time_in_idle;
105 u64 idle_exit_time;
106 struct cpufreq_interactive_cpuinfo *pcpu =
107 &per_cpu(cpuinfo, data);
108 u64 now_idle;
109 unsigned int new_freq;
110 unsigned int index;
111 unsigned long flags;
112
113 smp_rmb();
114
115 if (!pcpu->governor_enabled)
116 goto exit;
117
118 /*
119 * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
120 * this lets idle exit know the current idle time sample has
121 * been processed, and idle exit can generate a new sample and
122 * re-arm the timer. This prevents a concurrent idle
123 * exit on that CPU from writing a new set of info at the same time
124 * the timer function runs (the timer function can't use that info
125 * until more time passes).
126 */
127 time_in_idle = pcpu->time_in_idle;
128 idle_exit_time = pcpu->idle_exit_time;
129 now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
130 smp_wmb();
131
132 /* If we raced with cancelling a timer, skip. */
133 if (!idle_exit_time)
134 goto exit;
135
136 delta_idle = (unsigned int)(now_idle - time_in_idle);
137 delta_time = (unsigned int)(pcpu->timer_run_time - idle_exit_time);
138
139 /*
140 * If timer ran less than 1ms after short-term sample started, retry.
141 */
142 if (delta_time < 1000)
143 goto rearm;
144
145 if (delta_idle > delta_time)
146 cpu_load = 0;
147 else
148 cpu_load = 100 * (delta_time - delta_idle) / delta_time;
149
150 delta_idle = (unsigned int)(now_idle - pcpu->freq_change_time_in_idle);
151 delta_time = (unsigned int)(pcpu->timer_run_time -
152 pcpu->freq_change_time);
153
154 if ((delta_time == 0) || (delta_idle > delta_time))
155 load_since_change = 0;
156 else
157 load_since_change =
158 100 * (delta_time - delta_idle) / delta_time;
159
160 /*
161 * Choose greater of short-term load (since last idle timer
162 * started or timer function re-armed itself) or long-term load
163 * (since last frequency change).
164 */
165 if (load_since_change > cpu_load)
166 cpu_load = load_since_change;
167
168 if (cpu_load >= go_hispeed_load) {
169 if (pcpu->policy->cur == pcpu->policy->min)
170 new_freq = hispeed_freq;
171 else
172 new_freq = pcpu->policy->max * cpu_load / 100;
173 } else {
174 new_freq = pcpu->policy->cur * cpu_load / 100;
175 }
176
177 if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
178 new_freq, CPUFREQ_RELATION_H,
179 &index)) {
180 pr_warn_once("timer %d: cpufreq_frequency_table_target error\n",
181 (int) data);
182 goto rearm;
183 }
184
185 new_freq = pcpu->freq_table[index].frequency;
186 if (pcpu->target_freq == new_freq)
187 goto rearm_if_notmax;
188
189 /*
190 * Do not scale down unless we have been at this frequency for the
191 * minimum sample time.
192 */
193 if (new_freq < pcpu->target_freq) {
194 if ((pcpu->timer_run_time - pcpu->freq_change_time)
195 < min_sample_time)
196 goto rearm;
197 }
198
199 if (new_freq < pcpu->target_freq) {
200 pcpu->target_freq = new_freq;
201 spin_lock_irqsave(&down_cpumask_lock, flags);
202 cpumask_set_cpu(data, &down_cpumask);
203 spin_unlock_irqrestore(&down_cpumask_lock, flags);
204 queue_work(down_wq, &freq_scale_down_work);
205 } else {
206 pcpu->target_freq = new_freq;
207 spin_lock_irqsave(&up_cpumask_lock, flags);
208 cpumask_set_cpu(data, &up_cpumask);
209 spin_unlock_irqrestore(&up_cpumask_lock, flags);
210 wake_up_process(up_task);
211 }
212
213rearm_if_notmax:
214 /*
215 * Already set max speed and don't see a need to change that,
216 * wait until next idle to re-evaluate, don't need timer.
217 */
218 if (pcpu->target_freq == pcpu->policy->max)
219 goto exit;
220
221rearm:
222 if (!timer_pending(&pcpu->cpu_timer)) {
223 /*
224 * If already at min: if that CPU is idle, don't set timer.
225 * Else cancel the timer if that CPU goes idle. We don't
226 * need to re-evaluate speed until the next idle exit.
227 */
228 if (pcpu->target_freq == pcpu->policy->min) {
229 smp_rmb();
230
231 if (pcpu->idling)
232 goto exit;
233
234 pcpu->timer_idlecancel = 1;
235 }
236
237 pcpu->time_in_idle = get_cpu_idle_time_us(
238 data, &pcpu->idle_exit_time);
239 mod_timer(&pcpu->cpu_timer,
240 jiffies + usecs_to_jiffies(timer_rate));
241 }
242
243exit:
244 return;
245}
246
247static void cpufreq_interactive_idle_start(void)
248{
249 struct cpufreq_interactive_cpuinfo *pcpu =
250 &per_cpu(cpuinfo, smp_processor_id());
251 int pending;
252
253 pcpu->idling = 1;
254 smp_wmb();
255 if (!pcpu->governor_enabled)
256 return;
257 pending = timer_pending(&pcpu->cpu_timer);
258
259 if (pcpu->target_freq != pcpu->policy->min) {
260#ifdef CONFIG_SMP
261 /*
262 * Entering idle while not at lowest speed. On some
263 * platforms this can hold the other CPU(s) at that speed
264 * even though the CPU is idle. Set a timer to re-evaluate
265 * speed so this idle CPU doesn't hold the other CPUs above
266 * min indefinitely. This should probably be a quirk of
267 * the CPUFreq driver.
268 */
269 if (!pending) {
270 pcpu->time_in_idle = get_cpu_idle_time_us(
271 smp_processor_id(), &pcpu->idle_exit_time);
272 pcpu->timer_idlecancel = 0;
273 mod_timer(&pcpu->cpu_timer,
274 jiffies + usecs_to_jiffies(timer_rate));
275 }
276#endif
277 } else {
278 /*
279 * If at min speed and entering idle after load has
280 * already been evaluated, and a timer has been set just in
281 * case the CPU suddenly goes busy, cancel that timer. The
282 * CPU didn't go busy; we'll recheck things upon idle exit.
283 */
284 if (pending && pcpu->timer_idlecancel) {
285 del_timer(&pcpu->cpu_timer);
286 /*
287 * Ensure last timer run time is after current idle
288 * sample start time, so next idle exit will always
289 * start a new idle sampling period.
290 */
291 pcpu->idle_exit_time = 0;
292 pcpu->timer_idlecancel = 0;
293 }
294 }
295
296}
297
298static void cpufreq_interactive_idle_end(void)
299{
300 struct cpufreq_interactive_cpuinfo *pcpu =
301 &per_cpu(cpuinfo, smp_processor_id());
302
303 pcpu->idling = 0;
304 smp_wmb();
305
306 /*
307 * Arm the timer for 1-2 ticks later if not already, and if the timer
308 * function has already processed the previous load sampling
309 * interval. (If the timer is not pending but has not processed
310 * the previous interval, it is probably racing with us on another
311 * CPU. Let it compute load based on the previous sample and then
312 * re-arm the timer for another interval when it's done, rather
313 * than updating the interval start time to be "now", which doesn't
314 * give the timer function enough time to make a decision on this
315 * run.)
316 */
317 if (timer_pending(&pcpu->cpu_timer) == 0 &&
318 pcpu->timer_run_time >= pcpu->idle_exit_time &&
319 pcpu->governor_enabled) {
320 pcpu->time_in_idle =
321 get_cpu_idle_time_us(smp_processor_id(),
322 &pcpu->idle_exit_time);
323 pcpu->timer_idlecancel = 0;
324 mod_timer(&pcpu->cpu_timer,
325 jiffies + usecs_to_jiffies(timer_rate));
326 }
327
328}
329
330static int cpufreq_interactive_up_task(void *data)
331{
332 unsigned int cpu;
333 unsigned long flags;
334 struct cpufreq_interactive_cpuinfo *pcpu;
335
336 while (1) {
337 set_current_state(TASK_INTERRUPTIBLE);
338 spin_lock_irqsave(&up_cpumask_lock, flags);
339
340 if (cpumask_empty(&up_cpumask)) {
341 spin_unlock_irqrestore(&up_cpumask_lock, flags);
342 schedule();
343
344 if (kthread_should_stop())
345 break;
346
347 spin_lock_irqsave(&up_cpumask_lock, flags);
348 }
349
350 set_current_state(TASK_RUNNING);
351 cpumask_clear(&up_cpumask);
352 spin_unlock_irqrestore(&up_cpumask_lock, flags);
353
354 for_each_online_cpu(cpu) {
355 unsigned int j;
356 unsigned int max_freq = 0;
357
358 pcpu = &per_cpu(cpuinfo, cpu);
359 smp_rmb();
360
361 if (!pcpu->governor_enabled)
362 continue;
363
364 mutex_lock(&set_speed_lock);
365
366 for_each_online_cpu(j) {
367 struct cpufreq_interactive_cpuinfo *pjcpu =
368 &per_cpu(cpuinfo, j);
369 if (pjcpu->target_freq > max_freq)
370 max_freq = pjcpu->target_freq;
371 }
372 if (max_freq != pcpu->policy->cur)
373 __cpufreq_driver_target(pcpu->policy,
374 max_freq,
375 CPUFREQ_RELATION_H);
376 mutex_unlock(&set_speed_lock);
377
378 pcpu->freq_change_time_in_idle =
379 get_cpu_idle_time_us(cpu,
380 &pcpu->freq_change_time);
381 }
382 }
383
384 return 0;
385}
386
387static void cpufreq_interactive_freq_down(struct work_struct *work)
388{
389 unsigned int cpu;
390 unsigned long flags;
391 struct cpufreq_interactive_cpuinfo *pcpu;
392
393 spin_lock_irqsave(&down_cpumask_lock, flags);
394 cpumask_clear(&down_cpumask);
395 spin_unlock_irqrestore(&down_cpumask_lock, flags);
396
397 for_each_online_cpu(cpu) {
398 unsigned int j;
399 unsigned int max_freq = 0;
400
401 pcpu = &per_cpu(cpuinfo, cpu);
402 smp_rmb();
403
404 if (!pcpu->governor_enabled)
405 continue;
406
407 mutex_lock(&set_speed_lock);
408
409 for_each_online_cpu(j) {
410 struct cpufreq_interactive_cpuinfo *pjcpu =
411 &per_cpu(cpuinfo, j);
412
413 if (pjcpu->target_freq > max_freq)
414 max_freq = pjcpu->target_freq;
415 }
416
417 if (max_freq != pcpu->policy->cur)
418 __cpufreq_driver_target(pcpu->policy, max_freq,
419 CPUFREQ_RELATION_H);
420
421 mutex_unlock(&set_speed_lock);
422 pcpu->freq_change_time_in_idle =
423 get_cpu_idle_time_us(cpu,
424 &pcpu->freq_change_time);
425 }
426}
427
428static ssize_t show_hispeed_freq(struct kobject *kobj,
429 struct attribute *attr, char *buf)
430{
431 return sprintf(buf, "%llu\n", hispeed_freq);
432}
433
434static ssize_t store_hispeed_freq(struct kobject *kobj,
435 struct attribute *attr, const char *buf,
436 size_t count)
437{
438 int ret;
439 u64 val;
440
441 ret = strict_strtoull(buf, 0, &val);
442 if (ret < 0)
443 return ret;
444 hispeed_freq = val;
445 return count;
446}
447
448static struct global_attr hispeed_freq_attr = __ATTR(hispeed_freq, 0644,
449 show_hispeed_freq, store_hispeed_freq);
450
451
452static ssize_t show_go_hispeed_load(struct kobject *kobj,
453 struct attribute *attr, char *buf)
454{
455 return sprintf(buf, "%lu\n", go_hispeed_load);
456}
457
458static ssize_t store_go_hispeed_load(struct kobject *kobj,
459 struct attribute *attr, const char *buf, size_t count)
460{
461 int ret;
462 unsigned long val;
463
464 ret = strict_strtoul(buf, 0, &val);
465 if (ret < 0)
466 return ret;
467 go_hispeed_load = val;
468 return count;
469}
470
471static struct global_attr go_hispeed_load_attr = __ATTR(go_hispeed_load, 0644,
472 show_go_hispeed_load, store_go_hispeed_load);
473
474static ssize_t show_min_sample_time(struct kobject *kobj,
475 struct attribute *attr, char *buf)
476{
477 return sprintf(buf, "%lu\n", min_sample_time);
478}
479
480static ssize_t store_min_sample_time(struct kobject *kobj,
481 struct attribute *attr, const char *buf, size_t count)
482{
483 int ret;
484 unsigned long val;
485
486 ret = strict_strtoul(buf, 0, &val);
487 if (ret < 0)
488 return ret;
489 min_sample_time = val;
490 return count;
491}
492
493static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644,
494 show_min_sample_time, store_min_sample_time);
495
496static ssize_t show_timer_rate(struct kobject *kobj,
497 struct attribute *attr, char *buf)
498{
499 return sprintf(buf, "%lu\n", timer_rate);
500}
501
502static ssize_t store_timer_rate(struct kobject *kobj,
503 struct attribute *attr, const char *buf, size_t count)
504{
505 int ret;
506 unsigned long val;
507
508 ret = strict_strtoul(buf, 0, &val);
509 if (ret < 0)
510 return ret;
511 timer_rate = val;
512 return count;
513}
514
515static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644,
516 show_timer_rate, store_timer_rate);
517
518static struct attribute *interactive_attributes[] = {
519 &hispeed_freq_attr.attr,
520 &go_hispeed_load_attr.attr,
521 &min_sample_time_attr.attr,
522 &timer_rate_attr.attr,
523 NULL,
524};
525
526static struct attribute_group interactive_attr_group = {
527 .attrs = interactive_attributes,
528 .name = "interactive",
529};
530
531static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
532 unsigned int event)
533{
534 int rc;
535 unsigned int j;
536 struct cpufreq_interactive_cpuinfo *pcpu;
537 struct cpufreq_frequency_table *freq_table;
538
539 switch (event) {
540 case CPUFREQ_GOV_START:
541 if (!cpu_online(policy->cpu))
542 return -EINVAL;
543
544 freq_table =
545 cpufreq_frequency_get_table(policy->cpu);
546
547 for_each_cpu(j, policy->cpus) {
548 pcpu = &per_cpu(cpuinfo, j);
549 pcpu->policy = policy;
550 if (pcpu->idling)
551 pcpu->target_freq = policy->min;
552 else
553 pcpu->target_freq = policy->cur;
554
555 pcpu->freq_table = freq_table;
556 pcpu->freq_change_time_in_idle =
557 get_cpu_idle_time_us(j,
558 &pcpu->freq_change_time);
559 pcpu->governor_enabled = 1;
560 smp_wmb();
561 }
562
563 if (!hispeed_freq)
564 hispeed_freq = policy->max;
565
566 /*
567 * Do not register the idle hook and create sysfs
568 * entries if we have already done so.
569 */
570 if (atomic_inc_return(&active_count) > 1)
571 return 0;
572
573 rc = sysfs_create_group(cpufreq_global_kobject,
574 &interactive_attr_group);
575 if (rc)
576 return rc;
577
578 break;
579
580 case CPUFREQ_GOV_STOP:
581 for_each_cpu(j, policy->cpus) {
582 pcpu = &per_cpu(cpuinfo, j);
583 pcpu->governor_enabled = 0;
584 smp_wmb();
585 del_timer_sync(&pcpu->cpu_timer);
586
587 /*
588 * Reset idle exit time since we may cancel the timer
589 * before it can run after the last idle exit time,
590 * to avoid tripping the check in idle exit for a timer
591 * that is trying to run.
592 */
593 pcpu->idle_exit_time = 0;
594 }
595
596 flush_work(&freq_scale_down_work);
597 if (atomic_dec_return(&active_count) > 0)
598 return 0;
599
600 sysfs_remove_group(cpufreq_global_kobject,
601 &interactive_attr_group);
602
603 break;
604
605 case CPUFREQ_GOV_LIMITS:
606 if (policy->max < policy->cur)
607 __cpufreq_driver_target(policy,
608 policy->max, CPUFREQ_RELATION_H);
609 else if (policy->min > policy->cur)
610 __cpufreq_driver_target(policy,
611 policy->min, CPUFREQ_RELATION_L);
612 break;
613 }
614 return 0;
615}
616
617static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
618 unsigned long val,
619 void *data)
620{
621 switch (val) {
622 case IDLE_START:
623 cpufreq_interactive_idle_start();
624 break;
625 case IDLE_END:
626 cpufreq_interactive_idle_end();
627 break;
628 }
629
630 return 0;
631}
632
633static struct notifier_block cpufreq_interactive_idle_nb = {
634 .notifier_call = cpufreq_interactive_idle_notifier,
635};
636
637static int __init cpufreq_interactive_init(void)
638{
639 unsigned int i;
640 struct cpufreq_interactive_cpuinfo *pcpu;
641 struct sched_param param = { .sched_priority = 99 };
642
643 go_hispeed_load = DEFAULT_GO_HISPEED_LOAD;
644 min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
645 timer_rate = DEFAULT_TIMER_RATE;
646
647 /* Initalize per-cpu timers */
648 for_each_possible_cpu(i) {
649 pcpu = &per_cpu(cpuinfo, i);
650 init_timer(&pcpu->cpu_timer);
651 pcpu->cpu_timer.function = cpufreq_interactive_timer;
652 pcpu->cpu_timer.data = i;
653 }
654
655 up_task = kthread_create(cpufreq_interactive_up_task, NULL,
656 "kinteractiveup");
657 if (IS_ERR(up_task))
658 return PTR_ERR(up_task);
659
660 sched_setscheduler_nocheck(up_task, SCHED_FIFO, &param);
661 get_task_struct(up_task);
662
663 /* No rescuer thread, bind to CPU queuing the work for possibly
664 warm cache (probably doesn't matter much). */
665 down_wq = alloc_workqueue("kinteractive_down", 0, 1);
666
667 if (!down_wq)
668 goto err_freeuptask;
669
670 INIT_WORK(&freq_scale_down_work,
671 cpufreq_interactive_freq_down);
672
673 spin_lock_init(&up_cpumask_lock);
674 spin_lock_init(&down_cpumask_lock);
675 mutex_init(&set_speed_lock);
676
677 idle_notifier_register(&cpufreq_interactive_idle_nb);
678
679 return cpufreq_register_governor(&cpufreq_gov_interactive);
680
681err_freeuptask:
682 put_task_struct(up_task);
683 return -ENOMEM;
684}
685
686#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
687late_initcall(cpufreq_interactive_init);
688#else
689module_init(cpufreq_interactive_init);
690#endif
691
692static void __exit cpufreq_interactive_exit(void)
693{
694 cpufreq_unregister_governor(&cpufreq_gov_interactive);
695 kthread_stop(up_task);
696 put_task_struct(up_task);
697 destroy_workqueue(down_wq);
698}
699
700module_exit(cpufreq_interactive_exit);
701
702MODULE_AUTHOR("Mike Chan <mike@android.com>");
703MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
704 "Latency sensitive workloads");
705MODULE_LICENSE("GPL");
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 037d36ae63e5..2c35d1e79e33 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -393,6 +393,9 @@ extern struct cpufreq_governor cpufreq_gov_ondemand;
393#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) 393#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE)
394extern struct cpufreq_governor cpufreq_gov_conservative; 394extern struct cpufreq_governor cpufreq_gov_conservative;
395#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) 395#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative)
396#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE)
397extern struct cpufreq_governor cpufreq_gov_interactive;
398#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_interactive)
396#endif 399#endif
397 400
398 401