aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/cpufreq/cpufreq_ondemand.c
diff options
context:
space:
mode:
authorViresh Kumar <viresh.kumar@linaro.org>2012-10-25 18:47:42 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2012-11-14 18:33:07 -0500
commit4471a34f9a1f2da220272e823bdb8e8fa83a7661 (patch)
treed63e8c16a4b40da97b558d4b955f8e64157b8900 /drivers/cpufreq/cpufreq_ondemand.c
parent0676f7f2e7d2adec11f40320ca43a8897b8ef906 (diff)
cpufreq: governors: remove redundant code
Initially ondemand governor was written and then using its code conservative governor is written. It used a lot of code from ondemand governor, but copy of code was created instead of using the same routines from both governors. Which increased code redundancy, which is difficult to manage. This patch is an attempt to move common part of both the governors to cpufreq_governor.c file to come over above mentioned issues. This shouldn't change anything from functionality point of view. Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Diffstat (limited to 'drivers/cpufreq/cpufreq_ondemand.c')
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c698
1 files changed, 225 insertions, 473 deletions
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index d7f774bb49dd..bdaab9206303 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -10,24 +10,23 @@
10 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
11 */ 11 */
12 12
13#include <linux/kernel.h> 13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14#include <linux/module.h> 14
15#include <linux/init.h>
16#include <linux/cpufreq.h> 15#include <linux/cpufreq.h>
17#include <linux/cpu.h> 16#include <linux/init.h>
18#include <linux/jiffies.h> 17#include <linux/kernel.h>
19#include <linux/kernel_stat.h> 18#include <linux/kernel_stat.h>
19#include <linux/kobject.h>
20#include <linux/module.h>
20#include <linux/mutex.h> 21#include <linux/mutex.h>
21#include <linux/hrtimer.h> 22#include <linux/percpu-defs.h>
23#include <linux/sysfs.h>
22#include <linux/tick.h> 24#include <linux/tick.h>
23#include <linux/ktime.h> 25#include <linux/types.h>
24#include <linux/sched.h>
25 26
26/* 27#include "cpufreq_governor.h"
27 * dbs is used in this file as a shortform for demandbased switching
28 * It helps to keep variable names smaller, simpler
29 */
30 28
29/* On-demand governor macors */
31#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) 30#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10)
32#define DEF_FREQUENCY_UP_THRESHOLD (80) 31#define DEF_FREQUENCY_UP_THRESHOLD (80)
33#define DEF_SAMPLING_DOWN_FACTOR (1) 32#define DEF_SAMPLING_DOWN_FACTOR (1)
@@ -38,80 +37,10 @@
38#define MIN_FREQUENCY_UP_THRESHOLD (11) 37#define MIN_FREQUENCY_UP_THRESHOLD (11)
39#define MAX_FREQUENCY_UP_THRESHOLD (100) 38#define MAX_FREQUENCY_UP_THRESHOLD (100)
40 39
41/* 40static struct dbs_data od_dbs_data;
42 * The polling frequency of this governor depends on the capability of 41static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info);
43 * the processor. Default polling frequency is 1000 times the transition
44 * latency of the processor. The governor will work on any processor with
45 * transition latency <= 10mS, using appropriate sampling
46 * rate.
47 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
48 * this governor will not work.
49 * All times here are in uS.
50 */
51#define MIN_SAMPLING_RATE_RATIO (2)
52
53static unsigned int min_sampling_rate;
54
55#define LATENCY_MULTIPLIER (1000)
56#define MIN_LATENCY_MULTIPLIER (100)
57#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000)
58
59static void do_dbs_timer(struct work_struct *work);
60static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
61 unsigned int event);
62
63#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
64static
65#endif
66struct cpufreq_governor cpufreq_gov_ondemand = {
67 .name = "ondemand",
68 .governor = cpufreq_governor_dbs,
69 .max_transition_latency = TRANSITION_LATENCY_LIMIT,
70 .owner = THIS_MODULE,
71};
72
73/* Sampling types */
74enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
75
76struct cpu_dbs_info_s {
77 cputime64_t prev_cpu_idle;
78 cputime64_t prev_cpu_iowait;
79 cputime64_t prev_cpu_wall;
80 cputime64_t prev_cpu_nice;
81 struct cpufreq_policy *cur_policy;
82 struct delayed_work work;
83 struct cpufreq_frequency_table *freq_table;
84 unsigned int freq_lo;
85 unsigned int freq_lo_jiffies;
86 unsigned int freq_hi_jiffies;
87 unsigned int rate_mult;
88 int cpu;
89 unsigned int sample_type:1;
90 /*
91 * percpu mutex that serializes governor limit change with
92 * do_dbs_timer invocation. We do not want do_dbs_timer to run
93 * when user is changing the governor or limits.
94 */
95 struct mutex timer_mutex;
96};
97static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
98
99static unsigned int dbs_enable; /* number of CPUs using this policy */
100 42
101/* 43static struct od_dbs_tuners od_tuners = {
102 * dbs_mutex protects dbs_enable in governor start/stop.
103 */
104static DEFINE_MUTEX(dbs_mutex);
105
106static struct dbs_tuners {
107 unsigned int sampling_rate;
108 unsigned int up_threshold;
109 unsigned int down_differential;
110 unsigned int ignore_nice;
111 unsigned int sampling_down_factor;
112 unsigned int powersave_bias;
113 unsigned int io_is_busy;
114} dbs_tuners_ins = {
115 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 44 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
116 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 45 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
117 .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, 46 .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
@@ -119,14 +48,35 @@ static struct dbs_tuners {
119 .powersave_bias = 0, 48 .powersave_bias = 0,
120}; 49};
121 50
122static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall) 51static void ondemand_powersave_bias_init_cpu(int cpu)
123{ 52{
124 u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); 53 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
125 54
126 if (iowait_time == -1ULL) 55 dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
127 return 0; 56 dbs_info->freq_lo = 0;
57}
128 58
129 return iowait_time; 59/*
60 * Not all CPUs want IO time to be accounted as busy; this depends on how
61 * efficient idling at a higher frequency/voltage is.
62 * Pavel Machek says this is not so for various generations of AMD and old
63 * Intel systems.
64 * Mike Chan (androidlcom) calis this is also not true for ARM.
65 * Because of this, whitelist specific known (series) of CPUs by default, and
66 * leave all others up to the user.
67 */
68static int should_io_be_busy(void)
69{
70#if defined(CONFIG_X86)
71 /*
72 * For Intel, Core 2 (model 15) andl later have an efficient idle.
73 */
74 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
75 boot_cpu_data.x86 == 6 &&
76 boot_cpu_data.x86_model >= 15)
77 return 1;
78#endif
79 return 0;
130} 80}
131 81
132/* 82/*
@@ -135,14 +85,13 @@ static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wal
135 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. 85 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
136 */ 86 */
137static unsigned int powersave_bias_target(struct cpufreq_policy *policy, 87static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
138 unsigned int freq_next, 88 unsigned int freq_next, unsigned int relation)
139 unsigned int relation)
140{ 89{
141 unsigned int freq_req, freq_reduc, freq_avg; 90 unsigned int freq_req, freq_reduc, freq_avg;
142 unsigned int freq_hi, freq_lo; 91 unsigned int freq_hi, freq_lo;
143 unsigned int index = 0; 92 unsigned int index = 0;
144 unsigned int jiffies_total, jiffies_hi, jiffies_lo; 93 unsigned int jiffies_total, jiffies_hi, jiffies_lo;
145 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 94 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
146 policy->cpu); 95 policy->cpu);
147 96
148 if (!dbs_info->freq_table) { 97 if (!dbs_info->freq_table) {
@@ -154,7 +103,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
154 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, 103 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
155 relation, &index); 104 relation, &index);
156 freq_req = dbs_info->freq_table[index].frequency; 105 freq_req = dbs_info->freq_table[index].frequency;
157 freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; 106 freq_reduc = freq_req * od_tuners.powersave_bias / 1000;
158 freq_avg = freq_req - freq_reduc; 107 freq_avg = freq_req - freq_reduc;
159 108
160 /* Find freq bounds for freq_avg in freq_table */ 109 /* Find freq bounds for freq_avg in freq_table */
@@ -173,7 +122,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
173 dbs_info->freq_lo_jiffies = 0; 122 dbs_info->freq_lo_jiffies = 0;
174 return freq_lo; 123 return freq_lo;
175 } 124 }
176 jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 125 jiffies_total = usecs_to_jiffies(od_tuners.sampling_rate);
177 jiffies_hi = (freq_avg - freq_lo) * jiffies_total; 126 jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
178 jiffies_hi += ((freq_hi - freq_lo) / 2); 127 jiffies_hi += ((freq_hi - freq_lo) / 2);
179 jiffies_hi /= (freq_hi - freq_lo); 128 jiffies_hi /= (freq_hi - freq_lo);
@@ -184,13 +133,6 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
184 return freq_hi; 133 return freq_hi;
185} 134}
186 135
187static void ondemand_powersave_bias_init_cpu(int cpu)
188{
189 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
190 dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
191 dbs_info->freq_lo = 0;
192}
193
194static void ondemand_powersave_bias_init(void) 136static void ondemand_powersave_bias_init(void)
195{ 137{
196 int i; 138 int i;
@@ -199,53 +141,138 @@ static void ondemand_powersave_bias_init(void)
199 } 141 }
200} 142}
201 143
202/************************** sysfs interface ************************/ 144static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
145{
146 if (od_tuners.powersave_bias)
147 freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
148 else if (p->cur == p->max)
149 return;
203 150
204static ssize_t show_sampling_rate_min(struct kobject *kobj, 151 __cpufreq_driver_target(p, freq, od_tuners.powersave_bias ?
205 struct attribute *attr, char *buf) 152 CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
153}
154
155/*
156 * Every sampling_rate, we check, if current idle time is less than 20%
157 * (default), then we try to increase frequency Every sampling_rate, we look for
158 * a the lowest frequency which can sustain the load while keeping idle time
159 * over 30%. If such a frequency exist, we try to decrease to this frequency.
160 *
161 * Any frequency increase takes it to the maximum frequency. Frequency reduction
162 * happens at minimum steps of 5% (default) of current frequency
163 */
164static void od_check_cpu(int cpu, unsigned int load_freq)
206{ 165{
207 return sprintf(buf, "%u\n", min_sampling_rate); 166 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
167 struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy;
168
169 dbs_info->freq_lo = 0;
170
171 /* Check for frequency increase */
172 if (load_freq > od_tuners.up_threshold * policy->cur) {
173 /* If switching to max speed, apply sampling_down_factor */
174 if (policy->cur < policy->max)
175 dbs_info->rate_mult =
176 od_tuners.sampling_down_factor;
177 dbs_freq_increase(policy, policy->max);
178 return;
179 }
180
181 /* Check for frequency decrease */
182 /* if we cannot reduce the frequency anymore, break out early */
183 if (policy->cur == policy->min)
184 return;
185
186 /*
187 * The optimal frequency is the frequency that is the lowest that can
188 * support the current CPU usage without triggering the up policy. To be
189 * safe, we focus 10 points under the threshold.
190 */
191 if (load_freq < (od_tuners.up_threshold - od_tuners.down_differential) *
192 policy->cur) {
193 unsigned int freq_next;
194 freq_next = load_freq / (od_tuners.up_threshold -
195 od_tuners.down_differential);
196
197 /* No longer fully busy, reset rate_mult */
198 dbs_info->rate_mult = 1;
199
200 if (freq_next < policy->min)
201 freq_next = policy->min;
202
203 if (!od_tuners.powersave_bias) {
204 __cpufreq_driver_target(policy, freq_next,
205 CPUFREQ_RELATION_L);
206 } else {
207 int freq = powersave_bias_target(policy, freq_next,
208 CPUFREQ_RELATION_L);
209 __cpufreq_driver_target(policy, freq,
210 CPUFREQ_RELATION_L);
211 }
212 }
208} 213}
209 214
210define_one_global_ro(sampling_rate_min); 215static void od_dbs_timer(struct work_struct *work)
216{
217 struct od_cpu_dbs_info_s *dbs_info =
218 container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work);
219 unsigned int cpu = dbs_info->cdbs.cpu;
220 int delay, sample_type = dbs_info->sample_type;
211 221
212/* cpufreq_ondemand Governor Tunables */ 222 mutex_lock(&dbs_info->cdbs.timer_mutex);
213#define show_one(file_name, object) \ 223
214static ssize_t show_##file_name \ 224 /* Common NORMAL_SAMPLE setup */
215(struct kobject *kobj, struct attribute *attr, char *buf) \ 225 dbs_info->sample_type = OD_NORMAL_SAMPLE;
216{ \ 226 if (sample_type == OD_SUB_SAMPLE) {
217 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 227 delay = dbs_info->freq_lo_jiffies;
228 __cpufreq_driver_target(dbs_info->cdbs.cur_policy,
229 dbs_info->freq_lo, CPUFREQ_RELATION_H);
230 } else {
231 dbs_check_cpu(&od_dbs_data, cpu);
232 if (dbs_info->freq_lo) {
233 /* Setup timer for SUB_SAMPLE */
234 dbs_info->sample_type = OD_SUB_SAMPLE;
235 delay = dbs_info->freq_hi_jiffies;
236 } else {
237 delay = delay_for_sampling_rate(dbs_info->rate_mult);
238 }
239 }
240
241 schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay);
242 mutex_unlock(&dbs_info->cdbs.timer_mutex);
243}
244
245/************************** sysfs interface ************************/
246
247static ssize_t show_sampling_rate_min(struct kobject *kobj,
248 struct attribute *attr, char *buf)
249{
250 return sprintf(buf, "%u\n", od_dbs_data.min_sampling_rate);
218} 251}
219show_one(sampling_rate, sampling_rate);
220show_one(io_is_busy, io_is_busy);
221show_one(up_threshold, up_threshold);
222show_one(sampling_down_factor, sampling_down_factor);
223show_one(ignore_nice_load, ignore_nice);
224show_one(powersave_bias, powersave_bias);
225 252
226/** 253/**
227 * update_sampling_rate - update sampling rate effective immediately if needed. 254 * update_sampling_rate - update sampling rate effective immediately if needed.
228 * @new_rate: new sampling rate 255 * @new_rate: new sampling rate
229 * 256 *
230 * If new rate is smaller than the old, simply updaing 257 * If new rate is smaller than the old, simply updaing
231 * dbs_tuners_int.sampling_rate might not be appropriate. For example, 258 * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the
232 * if the original sampling_rate was 1 second and the requested new sampling 259 * original sampling_rate was 1 second and the requested new sampling rate is 10
233 * rate is 10 ms because the user needs immediate reaction from ondemand 260 * ms because the user needs immediate reaction from ondemand governor, but not
234 * governor, but not sure if higher frequency will be required or not, 261 * sure if higher frequency will be required or not, then, the governor may
235 * then, the governor may change the sampling rate too late; up to 1 second 262 * change the sampling rate too late; up to 1 second later. Thus, if we are
236 * later. Thus, if we are reducing the sampling rate, we need to make the 263 * reducing the sampling rate, we need to make the new value effective
237 * new value effective immediately. 264 * immediately.
238 */ 265 */
239static void update_sampling_rate(unsigned int new_rate) 266static void update_sampling_rate(unsigned int new_rate)
240{ 267{
241 int cpu; 268 int cpu;
242 269
243 dbs_tuners_ins.sampling_rate = new_rate 270 od_tuners.sampling_rate = new_rate = max(new_rate,
244 = max(new_rate, min_sampling_rate); 271 od_dbs_data.min_sampling_rate);
245 272
246 for_each_online_cpu(cpu) { 273 for_each_online_cpu(cpu) {
247 struct cpufreq_policy *policy; 274 struct cpufreq_policy *policy;
248 struct cpu_dbs_info_s *dbs_info; 275 struct od_cpu_dbs_info_s *dbs_info;
249 unsigned long next_sampling, appointed_at; 276 unsigned long next_sampling, appointed_at;
250 277
251 policy = cpufreq_cpu_get(cpu); 278 policy = cpufreq_cpu_get(cpu);
@@ -254,28 +281,28 @@ static void update_sampling_rate(unsigned int new_rate)
254 dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); 281 dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu);
255 cpufreq_cpu_put(policy); 282 cpufreq_cpu_put(policy);
256 283
257 mutex_lock(&dbs_info->timer_mutex); 284 mutex_lock(&dbs_info->cdbs.timer_mutex);
258 285
259 if (!delayed_work_pending(&dbs_info->work)) { 286 if (!delayed_work_pending(&dbs_info->cdbs.work)) {
260 mutex_unlock(&dbs_info->timer_mutex); 287 mutex_unlock(&dbs_info->cdbs.timer_mutex);
261 continue; 288 continue;
262 } 289 }
263 290
264 next_sampling = jiffies + usecs_to_jiffies(new_rate); 291 next_sampling = jiffies + usecs_to_jiffies(new_rate);
265 appointed_at = dbs_info->work.timer.expires; 292 appointed_at = dbs_info->cdbs.work.timer.expires;
266
267 293
268 if (time_before(next_sampling, appointed_at)) { 294 if (time_before(next_sampling, appointed_at)) {
269 295
270 mutex_unlock(&dbs_info->timer_mutex); 296 mutex_unlock(&dbs_info->cdbs.timer_mutex);
271 cancel_delayed_work_sync(&dbs_info->work); 297 cancel_delayed_work_sync(&dbs_info->cdbs.work);
272 mutex_lock(&dbs_info->timer_mutex); 298 mutex_lock(&dbs_info->cdbs.timer_mutex);
273 299
274 schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, 300 schedule_delayed_work_on(dbs_info->cdbs.cpu,
275 usecs_to_jiffies(new_rate)); 301 &dbs_info->cdbs.work,
302 usecs_to_jiffies(new_rate));
276 303
277 } 304 }
278 mutex_unlock(&dbs_info->timer_mutex); 305 mutex_unlock(&dbs_info->cdbs.timer_mutex);
279 } 306 }
280} 307}
281 308
@@ -300,7 +327,7 @@ static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b,
300 ret = sscanf(buf, "%u", &input); 327 ret = sscanf(buf, "%u", &input);
301 if (ret != 1) 328 if (ret != 1)
302 return -EINVAL; 329 return -EINVAL;
303 dbs_tuners_ins.io_is_busy = !!input; 330 od_tuners.io_is_busy = !!input;
304 return count; 331 return count;
305} 332}
306 333
@@ -315,7 +342,7 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
315 input < MIN_FREQUENCY_UP_THRESHOLD) { 342 input < MIN_FREQUENCY_UP_THRESHOLD) {
316 return -EINVAL; 343 return -EINVAL;
317 } 344 }
318 dbs_tuners_ins.up_threshold = input; 345 od_tuners.up_threshold = input;
319 return count; 346 return count;
320} 347}
321 348
@@ -328,12 +355,12 @@ static ssize_t store_sampling_down_factor(struct kobject *a,
328 355
329 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 356 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
330 return -EINVAL; 357 return -EINVAL;
331 dbs_tuners_ins.sampling_down_factor = input; 358 od_tuners.sampling_down_factor = input;
332 359
333 /* Reset down sampling multiplier in case it was active */ 360 /* Reset down sampling multiplier in case it was active */
334 for_each_online_cpu(j) { 361 for_each_online_cpu(j) {
335 struct cpu_dbs_info_s *dbs_info; 362 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
336 dbs_info = &per_cpu(od_cpu_dbs_info, j); 363 j);
337 dbs_info->rate_mult = 1; 364 dbs_info->rate_mult = 1;
338 } 365 }
339 return count; 366 return count;
@@ -354,19 +381,20 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
354 if (input > 1) 381 if (input > 1)
355 input = 1; 382 input = 1;
356 383
357 if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ 384 if (input == od_tuners.ignore_nice) { /* nothing to do */
358 return count; 385 return count;
359 } 386 }
360 dbs_tuners_ins.ignore_nice = input; 387 od_tuners.ignore_nice = input;
361 388
362 /* we need to re-evaluate prev_cpu_idle */ 389 /* we need to re-evaluate prev_cpu_idle */
363 for_each_online_cpu(j) { 390 for_each_online_cpu(j) {
364 struct cpu_dbs_info_s *dbs_info; 391 struct od_cpu_dbs_info_s *dbs_info;
365 dbs_info = &per_cpu(od_cpu_dbs_info, j); 392 dbs_info = &per_cpu(od_cpu_dbs_info, j);
366 dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 393 dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
367 &dbs_info->prev_cpu_wall); 394 &dbs_info->cdbs.prev_cpu_wall);
368 if (dbs_tuners_ins.ignore_nice) 395 if (od_tuners.ignore_nice)
369 dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 396 dbs_info->cdbs.prev_cpu_nice =
397 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
370 398
371 } 399 }
372 return count; 400 return count;
@@ -385,17 +413,25 @@ static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b,
385 if (input > 1000) 413 if (input > 1000)
386 input = 1000; 414 input = 1000;
387 415
388 dbs_tuners_ins.powersave_bias = input; 416 od_tuners.powersave_bias = input;
389 ondemand_powersave_bias_init(); 417 ondemand_powersave_bias_init();
390 return count; 418 return count;
391} 419}
392 420
421show_one(od, sampling_rate, sampling_rate);
422show_one(od, io_is_busy, io_is_busy);
423show_one(od, up_threshold, up_threshold);
424show_one(od, sampling_down_factor, sampling_down_factor);
425show_one(od, ignore_nice_load, ignore_nice);
426show_one(od, powersave_bias, powersave_bias);
427
393define_one_global_rw(sampling_rate); 428define_one_global_rw(sampling_rate);
394define_one_global_rw(io_is_busy); 429define_one_global_rw(io_is_busy);
395define_one_global_rw(up_threshold); 430define_one_global_rw(up_threshold);
396define_one_global_rw(sampling_down_factor); 431define_one_global_rw(sampling_down_factor);
397define_one_global_rw(ignore_nice_load); 432define_one_global_rw(ignore_nice_load);
398define_one_global_rw(powersave_bias); 433define_one_global_rw(powersave_bias);
434define_one_global_ro(sampling_rate_min);
399 435
400static struct attribute *dbs_attributes[] = { 436static struct attribute *dbs_attributes[] = {
401 &sampling_rate_min.attr, 437 &sampling_rate_min.attr,
@@ -408,354 +444,71 @@ static struct attribute *dbs_attributes[] = {
408 NULL 444 NULL
409}; 445};
410 446
411static struct attribute_group dbs_attr_group = { 447static struct attribute_group od_attr_group = {
412 .attrs = dbs_attributes, 448 .attrs = dbs_attributes,
413 .name = "ondemand", 449 .name = "ondemand",
414}; 450};
415 451
416/************************** sysfs end ************************/ 452/************************** sysfs end ************************/
417 453
418static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) 454define_get_cpu_dbs_routines(od_cpu_dbs_info);
419{
420 if (dbs_tuners_ins.powersave_bias)
421 freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
422 else if (p->cur == p->max)
423 return;
424
425 __cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ?
426 CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
427}
428
429static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
430{
431 unsigned int max_load_freq;
432
433 struct cpufreq_policy *policy;
434 unsigned int j;
435
436 this_dbs_info->freq_lo = 0;
437 policy = this_dbs_info->cur_policy;
438
439 /*
440 * Every sampling_rate, we check, if current idle time is less
441 * than 20% (default), then we try to increase frequency
442 * Every sampling_rate, we look for a the lowest
443 * frequency which can sustain the load while keeping idle time over
444 * 30%. If such a frequency exist, we try to decrease to this frequency.
445 *
446 * Any frequency increase takes it to the maximum frequency.
447 * Frequency reduction happens at minimum steps of
448 * 5% (default) of current frequency
449 */
450
451 /* Get Absolute Load - in terms of freq */
452 max_load_freq = 0;
453
454 for_each_cpu(j, policy->cpus) {
455 struct cpu_dbs_info_s *j_dbs_info;
456 cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
457 unsigned int idle_time, wall_time, iowait_time;
458 unsigned int load, load_freq;
459 int freq_avg;
460
461 j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
462
463 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
464 cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
465
466 wall_time = (unsigned int)
467 (cur_wall_time - j_dbs_info->prev_cpu_wall);
468 j_dbs_info->prev_cpu_wall = cur_wall_time;
469
470 idle_time = (unsigned int)
471 (cur_idle_time - j_dbs_info->prev_cpu_idle);
472 j_dbs_info->prev_cpu_idle = cur_idle_time;
473
474 iowait_time = (unsigned int)
475 (cur_iowait_time - j_dbs_info->prev_cpu_iowait);
476 j_dbs_info->prev_cpu_iowait = cur_iowait_time;
477
478 if (dbs_tuners_ins.ignore_nice) {
479 u64 cur_nice;
480 unsigned long cur_nice_jiffies;
481
482 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
483 j_dbs_info->prev_cpu_nice;
484 /*
485 * Assumption: nice time between sampling periods will
486 * be less than 2^32 jiffies for 32 bit sys
487 */
488 cur_nice_jiffies = (unsigned long)
489 cputime64_to_jiffies64(cur_nice);
490
491 j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
492 idle_time += jiffies_to_usecs(cur_nice_jiffies);
493 }
494
495 /*
496 * For the purpose of ondemand, waiting for disk IO is an
497 * indication that you're performance critical, and not that
498 * the system is actually idle. So subtract the iowait time
499 * from the cpu idle time.
500 */
501
502 if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time)
503 idle_time -= iowait_time;
504 455
505 if (unlikely(!wall_time || wall_time < idle_time)) 456static struct od_ops od_ops = {
506 continue; 457 .io_busy = should_io_be_busy,
507 458 .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu,
508 load = 100 * (wall_time - idle_time) / wall_time; 459 .powersave_bias_target = powersave_bias_target,
509 460 .freq_increase = dbs_freq_increase,
510 freq_avg = __cpufreq_driver_getavg(policy, j); 461};
511 if (freq_avg <= 0)
512 freq_avg = policy->cur;
513
514 load_freq = load * freq_avg;
515 if (load_freq > max_load_freq)
516 max_load_freq = load_freq;
517 }
518
519 /* Check for frequency increase */
520 if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
521 /* If switching to max speed, apply sampling_down_factor */
522 if (policy->cur < policy->max)
523 this_dbs_info->rate_mult =
524 dbs_tuners_ins.sampling_down_factor;
525 dbs_freq_increase(policy, policy->max);
526 return;
527 }
528
529 /* Check for frequency decrease */
530 /* if we cannot reduce the frequency anymore, break out early */
531 if (policy->cur == policy->min)
532 return;
533
534 /*
535 * The optimal frequency is the frequency that is the lowest that
536 * can support the current CPU usage without triggering the up
537 * policy. To be safe, we focus 10 points under the threshold.
538 */
539 if (max_load_freq <
540 (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
541 policy->cur) {
542 unsigned int freq_next;
543 freq_next = max_load_freq /
544 (dbs_tuners_ins.up_threshold -
545 dbs_tuners_ins.down_differential);
546
547 /* No longer fully busy, reset rate_mult */
548 this_dbs_info->rate_mult = 1;
549
550 if (freq_next < policy->min)
551 freq_next = policy->min;
552
553 if (!dbs_tuners_ins.powersave_bias) {
554 __cpufreq_driver_target(policy, freq_next,
555 CPUFREQ_RELATION_L);
556 } else {
557 int freq = powersave_bias_target(policy, freq_next,
558 CPUFREQ_RELATION_L);
559 __cpufreq_driver_target(policy, freq,
560 CPUFREQ_RELATION_L);
561 }
562 }
563}
564
565static void do_dbs_timer(struct work_struct *work)
566{
567 struct cpu_dbs_info_s *dbs_info =
568 container_of(work, struct cpu_dbs_info_s, work.work);
569 unsigned int cpu = dbs_info->cpu;
570 int sample_type = dbs_info->sample_type;
571
572 int delay;
573
574 mutex_lock(&dbs_info->timer_mutex);
575
576 /* Common NORMAL_SAMPLE setup */
577 dbs_info->sample_type = DBS_NORMAL_SAMPLE;
578 if (!dbs_tuners_ins.powersave_bias ||
579 sample_type == DBS_NORMAL_SAMPLE) {
580 dbs_check_cpu(dbs_info);
581 if (dbs_info->freq_lo) {
582 /* Setup timer for SUB_SAMPLE */
583 dbs_info->sample_type = DBS_SUB_SAMPLE;
584 delay = dbs_info->freq_hi_jiffies;
585 } else {
586 /* We want all CPUs to do sampling nearly on
587 * same jiffy
588 */
589 delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate
590 * dbs_info->rate_mult);
591
592 if (num_online_cpus() > 1)
593 delay -= jiffies % delay;
594 }
595 } else {
596 __cpufreq_driver_target(dbs_info->cur_policy,
597 dbs_info->freq_lo, CPUFREQ_RELATION_H);
598 delay = dbs_info->freq_lo_jiffies;
599 }
600 schedule_delayed_work_on(cpu, &dbs_info->work, delay);
601 mutex_unlock(&dbs_info->timer_mutex);
602}
603
604static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
605{
606 /* We want all CPUs to do sampling nearly on same jiffy */
607 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
608
609 if (num_online_cpus() > 1)
610 delay -= jiffies % delay;
611 462
612 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 463static struct dbs_data od_dbs_data = {
613 INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer); 464 .governor = GOV_ONDEMAND,
614 schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); 465 .attr_group = &od_attr_group,
615} 466 .tuners = &od_tuners,
467 .get_cpu_cdbs = get_cpu_cdbs,
468 .get_cpu_dbs_info_s = get_cpu_dbs_info_s,
469 .gov_dbs_timer = od_dbs_timer,
470 .gov_check_cpu = od_check_cpu,
471 .gov_ops = &od_ops,
472};
616 473
617static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 474static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy,
475 unsigned int event)
618{ 476{
619 cancel_delayed_work_sync(&dbs_info->work); 477 return cpufreq_governor_dbs(&od_dbs_data, policy, event);
620} 478}
621 479
622/* 480#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
623 * Not all CPUs want IO time to be accounted as busy; this dependson how 481static
624 * efficient idling at a higher frequency/voltage is.
625 * Pavel Machek says this is not so for various generations of AMD and old
626 * Intel systems.
627 * Mike Chan (androidlcom) calis this is also not true for ARM.
628 * Because of this, whitelist specific known (series) of CPUs by default, and
629 * leave all others up to the user.
630 */
631static int should_io_be_busy(void)
632{
633#if defined(CONFIG_X86)
634 /*
635 * For Intel, Core 2 (model 15) andl later have an efficient idle.
636 */
637 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
638 boot_cpu_data.x86 == 6 &&
639 boot_cpu_data.x86_model >= 15)
640 return 1;
641#endif 482#endif
642 return 0; 483struct cpufreq_governor cpufreq_gov_ondemand = {
643} 484 .name = "ondemand",
644 485 .governor = od_cpufreq_governor_dbs,
645static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 486 .max_transition_latency = TRANSITION_LATENCY_LIMIT,
646 unsigned int event) 487 .owner = THIS_MODULE,
647{ 488};
648 unsigned int cpu = policy->cpu;
649 struct cpu_dbs_info_s *this_dbs_info;
650 unsigned int j;
651 int rc;
652
653 this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
654
655 switch (event) {
656 case CPUFREQ_GOV_START:
657 if ((!cpu_online(cpu)) || (!policy->cur))
658 return -EINVAL;
659
660 mutex_lock(&dbs_mutex);
661
662 dbs_enable++;
663 for_each_cpu(j, policy->cpus) {
664 struct cpu_dbs_info_s *j_dbs_info;
665 j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
666 j_dbs_info->cur_policy = policy;
667
668 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
669 &j_dbs_info->prev_cpu_wall);
670 if (dbs_tuners_ins.ignore_nice)
671 j_dbs_info->prev_cpu_nice =
672 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
673 }
674 this_dbs_info->cpu = cpu;
675 this_dbs_info->rate_mult = 1;
676 ondemand_powersave_bias_init_cpu(cpu);
677 /*
678 * Start the timerschedule work, when this governor
679 * is used for first time
680 */
681 if (dbs_enable == 1) {
682 unsigned int latency;
683
684 rc = sysfs_create_group(cpufreq_global_kobject,
685 &dbs_attr_group);
686 if (rc) {
687 mutex_unlock(&dbs_mutex);
688 return rc;
689 }
690
691 /* policy latency is in nS. Convert it to uS first */
692 latency = policy->cpuinfo.transition_latency / 1000;
693 if (latency == 0)
694 latency = 1;
695 /* Bring kernel and HW constraints together */
696 min_sampling_rate = max(min_sampling_rate,
697 MIN_LATENCY_MULTIPLIER * latency);
698 dbs_tuners_ins.sampling_rate =
699 max(min_sampling_rate,
700 latency * LATENCY_MULTIPLIER);
701 dbs_tuners_ins.io_is_busy = should_io_be_busy();
702 }
703 mutex_unlock(&dbs_mutex);
704
705 mutex_init(&this_dbs_info->timer_mutex);
706 dbs_timer_init(this_dbs_info);
707 break;
708
709 case CPUFREQ_GOV_STOP:
710 dbs_timer_exit(this_dbs_info);
711
712 mutex_lock(&dbs_mutex);
713 mutex_destroy(&this_dbs_info->timer_mutex);
714 dbs_enable--;
715 mutex_unlock(&dbs_mutex);
716 if (!dbs_enable)
717 sysfs_remove_group(cpufreq_global_kobject,
718 &dbs_attr_group);
719
720 break;
721
722 case CPUFREQ_GOV_LIMITS:
723 mutex_lock(&this_dbs_info->timer_mutex);
724 if (policy->max < this_dbs_info->cur_policy->cur)
725 __cpufreq_driver_target(this_dbs_info->cur_policy,
726 policy->max, CPUFREQ_RELATION_H);
727 else if (policy->min > this_dbs_info->cur_policy->cur)
728 __cpufreq_driver_target(this_dbs_info->cur_policy,
729 policy->min, CPUFREQ_RELATION_L);
730 dbs_check_cpu(this_dbs_info);
731 mutex_unlock(&this_dbs_info->timer_mutex);
732 break;
733 }
734 return 0;
735}
736 489
737static int __init cpufreq_gov_dbs_init(void) 490static int __init cpufreq_gov_dbs_init(void)
738{ 491{
739 u64 idle_time; 492 u64 idle_time;
740 int cpu = get_cpu(); 493 int cpu = get_cpu();
741 494
495 mutex_init(&od_dbs_data.mutex);
742 idle_time = get_cpu_idle_time_us(cpu, NULL); 496 idle_time = get_cpu_idle_time_us(cpu, NULL);
743 put_cpu(); 497 put_cpu();
744 if (idle_time != -1ULL) { 498 if (idle_time != -1ULL) {
745 /* Idle micro accounting is supported. Use finer thresholds */ 499 /* Idle micro accounting is supported. Use finer thresholds */
746 dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; 500 od_tuners.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
747 dbs_tuners_ins.down_differential = 501 od_tuners.down_differential = MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
748 MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
749 /* 502 /*
750 * In nohz/micro accounting case we set the minimum frequency 503 * In nohz/micro accounting case we set the minimum frequency
751 * not depending on HZ, but fixed (very low). The deferred 504 * not depending on HZ, but fixed (very low). The deferred
752 * timer might skip some samples if idle/sleeping as needed. 505 * timer might skip some samples if idle/sleeping as needed.
753 */ 506 */
754 min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; 507 od_dbs_data.min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
755 } else { 508 } else {
756 /* For correct statistics, we need 10 ticks for each measure */ 509 /* For correct statistics, we need 10 ticks for each measure */
757 min_sampling_rate = 510 od_dbs_data.min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
758 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); 511 jiffies_to_usecs(10);
759 } 512 }
760 513
761 return cpufreq_register_governor(&cpufreq_gov_ondemand); 514 return cpufreq_register_governor(&cpufreq_gov_ondemand);
@@ -766,7 +519,6 @@ static void __exit cpufreq_gov_dbs_exit(void)
766 cpufreq_unregister_governor(&cpufreq_gov_ondemand); 519 cpufreq_unregister_governor(&cpufreq_gov_ondemand);
767} 520}
768 521
769
770MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); 522MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
771MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>"); 523MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
772MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " 524MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "