diff options
-rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 328 |
1 files changed, 188 insertions, 140 deletions
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index a16a5b8c1dc5..c9bd0c55ad1e 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c | |||
@@ -13,22 +13,17 @@ | |||
13 | 13 | ||
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/smp.h> | ||
17 | #include <linux/init.h> | 16 | #include <linux/init.h> |
18 | #include <linux/interrupt.h> | ||
19 | #include <linux/ctype.h> | ||
20 | #include <linux/cpufreq.h> | 17 | #include <linux/cpufreq.h> |
21 | #include <linux/sysctl.h> | ||
22 | #include <linux/types.h> | ||
23 | #include <linux/fs.h> | ||
24 | #include <linux/sysfs.h> | ||
25 | #include <linux/cpu.h> | 18 | #include <linux/cpu.h> |
26 | #include <linux/kmod.h> | ||
27 | #include <linux/workqueue.h> | ||
28 | #include <linux/jiffies.h> | 19 | #include <linux/jiffies.h> |
29 | #include <linux/kernel_stat.h> | 20 | #include <linux/kernel_stat.h> |
30 | #include <linux/percpu.h> | ||
31 | #include <linux/mutex.h> | 21 | #include <linux/mutex.h> |
22 | #include <linux/hrtimer.h> | ||
23 | #include <linux/tick.h> | ||
24 | #include <linux/ktime.h> | ||
25 | #include <linux/sched.h> | ||
26 | |||
32 | /* | 27 | /* |
33 | * dbs is used in this file as a shortform for demandbased switching | 28 | * dbs is used in this file as a shortform for demandbased switching |
34 | * It helps to keep variable names smaller, simpler | 29 | * It helps to keep variable names smaller, simpler |
@@ -43,14 +38,14 @@ | |||
43 | * latency of the processor. The governor will work on any processor with | 38 | * latency of the processor. The governor will work on any processor with |
44 | * transition latency <= 10mS, using appropriate sampling | 39 | * transition latency <= 10mS, using appropriate sampling |
45 | * rate. | 40 | * rate. |
46 | * For CPUs with transition latency > 10mS (mostly drivers | 41 | * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) |
47 | * with CPUFREQ_ETERNAL), this governor will not work. | 42 | * this governor will not work. |
48 | * All times here are in uS. | 43 | * All times here are in uS. |
49 | */ | 44 | */ |
50 | static unsigned int def_sampling_rate; | 45 | static unsigned int def_sampling_rate; |
51 | #define MIN_SAMPLING_RATE_RATIO (2) | 46 | #define MIN_SAMPLING_RATE_RATIO (2) |
52 | /* for correct statistics, we need at least 10 ticks between each measure */ | 47 | /* for correct statistics, we need at least 10 ticks between each measure */ |
53 | #define MIN_STAT_SAMPLING_RATE \ | 48 | #define MIN_STAT_SAMPLING_RATE \ |
54 | (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) | 49 | (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) |
55 | #define MIN_SAMPLING_RATE \ | 50 | #define MIN_SAMPLING_RATE \ |
56 | (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) | 51 | (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) |
@@ -75,12 +70,15 @@ static unsigned int minimum_sampling_rate(void) | |||
75 | static void do_dbs_timer(struct work_struct *work); | 70 | static void do_dbs_timer(struct work_struct *work); |
76 | 71 | ||
77 | struct cpu_dbs_info_s { | 72 | struct cpu_dbs_info_s { |
73 | cputime64_t prev_cpu_idle; | ||
74 | cputime64_t prev_cpu_wall; | ||
75 | cputime64_t prev_cpu_nice; | ||
78 | struct cpufreq_policy *cur_policy; | 76 | struct cpufreq_policy *cur_policy; |
79 | unsigned int prev_cpu_idle_up; | 77 | struct delayed_work work; |
80 | unsigned int prev_cpu_idle_down; | ||
81 | unsigned int enable; | ||
82 | unsigned int down_skip; | 78 | unsigned int down_skip; |
83 | unsigned int requested_freq; | 79 | unsigned int requested_freq; |
80 | int cpu; | ||
81 | unsigned int enable:1; | ||
84 | }; | 82 | }; |
85 | static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); | 83 | static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); |
86 | 84 | ||
@@ -95,18 +93,17 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */ | |||
95 | * is recursive for the same process. -Venki | 93 | * is recursive for the same process. -Venki |
96 | */ | 94 | */ |
97 | static DEFINE_MUTEX(dbs_mutex); | 95 | static DEFINE_MUTEX(dbs_mutex); |
98 | static DECLARE_DELAYED_WORK(dbs_work, do_dbs_timer); | ||
99 | 96 | ||
100 | struct dbs_tuners { | 97 | static struct workqueue_struct *kconservative_wq; |
98 | |||
99 | static struct dbs_tuners { | ||
101 | unsigned int sampling_rate; | 100 | unsigned int sampling_rate; |
102 | unsigned int sampling_down_factor; | 101 | unsigned int sampling_down_factor; |
103 | unsigned int up_threshold; | 102 | unsigned int up_threshold; |
104 | unsigned int down_threshold; | 103 | unsigned int down_threshold; |
105 | unsigned int ignore_nice; | 104 | unsigned int ignore_nice; |
106 | unsigned int freq_step; | 105 | unsigned int freq_step; |
107 | }; | 106 | } dbs_tuners_ins = { |
108 | |||
109 | static struct dbs_tuners dbs_tuners_ins = { | ||
110 | .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, | 107 | .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, |
111 | .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, | 108 | .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, |
112 | .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, | 109 | .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, |
@@ -114,18 +111,37 @@ static struct dbs_tuners dbs_tuners_ins = { | |||
114 | .freq_step = 5, | 111 | .freq_step = 5, |
115 | }; | 112 | }; |
116 | 113 | ||
117 | static inline unsigned int get_cpu_idle_time(unsigned int cpu) | 114 | static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, |
115 | cputime64_t *wall) | ||
118 | { | 116 | { |
119 | unsigned int add_nice = 0, ret; | 117 | cputime64_t idle_time; |
118 | cputime64_t cur_wall_time; | ||
119 | cputime64_t busy_time; | ||
120 | |||
121 | cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); | ||
122 | busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, | ||
123 | kstat_cpu(cpu).cpustat.system); | ||
120 | 124 | ||
121 | if (dbs_tuners_ins.ignore_nice) | 125 | busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); |
122 | add_nice = kstat_cpu(cpu).cpustat.nice; | 126 | busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); |
127 | busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); | ||
128 | busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); | ||
123 | 129 | ||
124 | ret = kstat_cpu(cpu).cpustat.idle + | 130 | idle_time = cputime64_sub(cur_wall_time, busy_time); |
125 | kstat_cpu(cpu).cpustat.iowait + | 131 | if (wall) |
126 | add_nice; | 132 | *wall = cur_wall_time; |
127 | 133 | ||
128 | return ret; | 134 | return idle_time; |
135 | } | ||
136 | |||
137 | static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) | ||
138 | { | ||
139 | u64 idle_time = get_cpu_idle_time_us(cpu, wall); | ||
140 | |||
141 | if (idle_time == -1ULL) | ||
142 | return get_cpu_idle_time_jiffy(cpu, wall); | ||
143 | |||
144 | return idle_time; | ||
129 | } | 145 | } |
130 | 146 | ||
131 | /* keep track of frequency transitions */ | 147 | /* keep track of frequency transitions */ |
@@ -186,8 +202,8 @@ static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) | |||
186 | return sprintf(buf, "%u\n", MIN_SAMPLING_RATE); | 202 | return sprintf(buf, "%u\n", MIN_SAMPLING_RATE); |
187 | } | 203 | } |
188 | 204 | ||
189 | #define define_one_ro(_name) \ | 205 | #define define_one_ro(_name) \ |
190 | static struct freq_attr _name = \ | 206 | static struct freq_attr _name = \ |
191 | __ATTR(_name, 0444, show_##_name, NULL) | 207 | __ATTR(_name, 0444, show_##_name, NULL) |
192 | 208 | ||
193 | define_one_ro(sampling_rate_max); | 209 | define_one_ro(sampling_rate_max); |
@@ -213,6 +229,7 @@ static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, | |||
213 | unsigned int input; | 229 | unsigned int input; |
214 | int ret; | 230 | int ret; |
215 | ret = sscanf(buf, "%u", &input); | 231 | ret = sscanf(buf, "%u", &input); |
232 | |||
216 | if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) | 233 | if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) |
217 | return -EINVAL; | 234 | return -EINVAL; |
218 | 235 | ||
@@ -230,11 +247,10 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, | |||
230 | int ret; | 247 | int ret; |
231 | ret = sscanf(buf, "%u", &input); | 248 | ret = sscanf(buf, "%u", &input); |
232 | 249 | ||
233 | mutex_lock(&dbs_mutex); | 250 | if (ret != 1) |
234 | if (ret != 1) { | ||
235 | mutex_unlock(&dbs_mutex); | ||
236 | return -EINVAL; | 251 | return -EINVAL; |
237 | } | 252 | |
253 | mutex_lock(&dbs_mutex); | ||
238 | dbs_tuners_ins.sampling_rate = max(input, minimum_sampling_rate()); | 254 | dbs_tuners_ins.sampling_rate = max(input, minimum_sampling_rate()); |
239 | mutex_unlock(&dbs_mutex); | 255 | mutex_unlock(&dbs_mutex); |
240 | 256 | ||
@@ -250,7 +266,7 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused, | |||
250 | 266 | ||
251 | mutex_lock(&dbs_mutex); | 267 | mutex_lock(&dbs_mutex); |
252 | if (ret != 1 || input > 100 || | 268 | if (ret != 1 || input > 100 || |
253 | input <= dbs_tuners_ins.down_threshold) { | 269 | input <= dbs_tuners_ins.down_threshold) { |
254 | mutex_unlock(&dbs_mutex); | 270 | mutex_unlock(&dbs_mutex); |
255 | return -EINVAL; | 271 | return -EINVAL; |
256 | } | 272 | } |
@@ -269,7 +285,9 @@ static ssize_t store_down_threshold(struct cpufreq_policy *unused, | |||
269 | ret = sscanf(buf, "%u", &input); | 285 | ret = sscanf(buf, "%u", &input); |
270 | 286 | ||
271 | mutex_lock(&dbs_mutex); | 287 | mutex_lock(&dbs_mutex); |
272 | if (ret != 1 || input > 100 || input >= dbs_tuners_ins.up_threshold) { | 288 | /* cannot be lower than 11 otherwise freq will not fall */ |
289 | if (ret != 1 || input < 11 || input > 100 || | ||
290 | input >= dbs_tuners_ins.up_threshold) { | ||
273 | mutex_unlock(&dbs_mutex); | 291 | mutex_unlock(&dbs_mutex); |
274 | return -EINVAL; | 292 | return -EINVAL; |
275 | } | 293 | } |
@@ -302,12 +320,14 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, | |||
302 | } | 320 | } |
303 | dbs_tuners_ins.ignore_nice = input; | 321 | dbs_tuners_ins.ignore_nice = input; |
304 | 322 | ||
305 | /* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */ | 323 | /* we need to re-evaluate prev_cpu_idle */ |
306 | for_each_online_cpu(j) { | 324 | for_each_online_cpu(j) { |
307 | struct cpu_dbs_info_s *j_dbs_info; | 325 | struct cpu_dbs_info_s *dbs_info; |
308 | j_dbs_info = &per_cpu(cpu_dbs_info, j); | 326 | dbs_info = &per_cpu(cpu_dbs_info, j); |
309 | j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); | 327 | dbs_info->prev_cpu_idle = get_cpu_idle_time(j, |
310 | j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; | 328 | &dbs_info->prev_cpu_wall); |
329 | if (dbs_tuners_ins.ignore_nice) | ||
330 | dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; | ||
311 | } | 331 | } |
312 | mutex_unlock(&dbs_mutex); | 332 | mutex_unlock(&dbs_mutex); |
313 | 333 | ||
@@ -319,7 +339,6 @@ static ssize_t store_freq_step(struct cpufreq_policy *policy, | |||
319 | { | 339 | { |
320 | unsigned int input; | 340 | unsigned int input; |
321 | int ret; | 341 | int ret; |
322 | |||
323 | ret = sscanf(buf, "%u", &input); | 342 | ret = sscanf(buf, "%u", &input); |
324 | 343 | ||
325 | if (ret != 1) | 344 | if (ret != 1) |
@@ -367,55 +386,78 @@ static struct attribute_group dbs_attr_group = { | |||
367 | 386 | ||
368 | /************************** sysfs end ************************/ | 387 | /************************** sysfs end ************************/ |
369 | 388 | ||
370 | static void dbs_check_cpu(int cpu) | 389 | static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) |
371 | { | 390 | { |
372 | unsigned int idle_ticks, up_idle_ticks, down_idle_ticks; | 391 | unsigned int load = 0; |
373 | unsigned int tmp_idle_ticks, total_idle_ticks; | ||
374 | unsigned int freq_target; | 392 | unsigned int freq_target; |
375 | unsigned int freq_down_sampling_rate; | ||
376 | struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, cpu); | ||
377 | struct cpufreq_policy *policy; | ||
378 | 393 | ||
379 | if (!this_dbs_info->enable) | 394 | struct cpufreq_policy *policy; |
380 | return; | 395 | unsigned int j; |
381 | 396 | ||
382 | policy = this_dbs_info->cur_policy; | 397 | policy = this_dbs_info->cur_policy; |
383 | 398 | ||
384 | /* | 399 | /* |
385 | * The default safe range is 20% to 80% | 400 | * Every sampling_rate, we check, if current idle time is less |
386 | * Every sampling_rate, we check | 401 | * than 20% (default), then we try to increase frequency |
387 | * - If current idle time is less than 20%, then we try to | 402 | * Every sampling_rate*sampling_down_factor, we check, if current |
388 | * increase frequency | 403 | * idle time is more than 80%, then we try to decrease frequency |
389 | * Every sampling_rate*sampling_down_factor, we check | ||
390 | * - If current idle time is more than 80%, then we try to | ||
391 | * decrease frequency | ||
392 | * | 404 | * |
393 | * Any frequency increase takes it to the maximum frequency. | 405 | * Any frequency increase takes it to the maximum frequency. |
394 | * Frequency reduction happens at minimum steps of | 406 | * Frequency reduction happens at minimum steps of |
395 | * 5% (default) of max_frequency | 407 | * 5% (default) of maximum frequency |
396 | */ | 408 | */ |
397 | 409 | ||
398 | /* Check for frequency increase */ | 410 | /* Get Absolute Load */ |
399 | idle_ticks = UINT_MAX; | 411 | for_each_cpu(j, policy->cpus) { |
412 | struct cpu_dbs_info_s *j_dbs_info; | ||
413 | cputime64_t cur_wall_time, cur_idle_time; | ||
414 | unsigned int idle_time, wall_time; | ||
400 | 415 | ||
401 | /* Check for frequency increase */ | 416 | j_dbs_info = &per_cpu(cpu_dbs_info, j); |
402 | total_idle_ticks = get_cpu_idle_time(cpu); | 417 | |
403 | tmp_idle_ticks = total_idle_ticks - | 418 | cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); |
404 | this_dbs_info->prev_cpu_idle_up; | 419 | |
405 | this_dbs_info->prev_cpu_idle_up = total_idle_ticks; | 420 | wall_time = (unsigned int) cputime64_sub(cur_wall_time, |
421 | j_dbs_info->prev_cpu_wall); | ||
422 | j_dbs_info->prev_cpu_wall = cur_wall_time; | ||
406 | 423 | ||
407 | if (tmp_idle_ticks < idle_ticks) | 424 | idle_time = (unsigned int) cputime64_sub(cur_idle_time, |
408 | idle_ticks = tmp_idle_ticks; | 425 | j_dbs_info->prev_cpu_idle); |
426 | j_dbs_info->prev_cpu_idle = cur_idle_time; | ||
409 | 427 | ||
410 | /* Scale idle ticks by 100 and compare with up and down ticks */ | 428 | if (dbs_tuners_ins.ignore_nice) { |
411 | idle_ticks *= 100; | 429 | cputime64_t cur_nice; |
412 | up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) * | 430 | unsigned long cur_nice_jiffies; |
413 | usecs_to_jiffies(dbs_tuners_ins.sampling_rate); | 431 | |
432 | cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, | ||
433 | j_dbs_info->prev_cpu_nice); | ||
434 | /* | ||
435 | * Assumption: nice time between sampling periods will | ||
436 | * be less than 2^32 jiffies for 32 bit sys | ||
437 | */ | ||
438 | cur_nice_jiffies = (unsigned long) | ||
439 | cputime64_to_jiffies64(cur_nice); | ||
440 | |||
441 | j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; | ||
442 | idle_time += jiffies_to_usecs(cur_nice_jiffies); | ||
443 | } | ||
444 | |||
445 | if (unlikely(!wall_time || wall_time < idle_time)) | ||
446 | continue; | ||
447 | |||
448 | load = 100 * (wall_time - idle_time) / wall_time; | ||
449 | } | ||
450 | |||
451 | /* | ||
452 | * break out if we 'cannot' reduce the speed as the user might | ||
453 | * want freq_step to be zero | ||
454 | */ | ||
455 | if (dbs_tuners_ins.freq_step == 0) | ||
456 | return; | ||
414 | 457 | ||
415 | if (idle_ticks < up_idle_ticks) { | 458 | /* Check for frequency increase */ |
459 | if (load > dbs_tuners_ins.up_threshold) { | ||
416 | this_dbs_info->down_skip = 0; | 460 | this_dbs_info->down_skip = 0; |
417 | this_dbs_info->prev_cpu_idle_down = | ||
418 | this_dbs_info->prev_cpu_idle_up; | ||
419 | 461 | ||
420 | /* if we are already at full speed then break out early */ | 462 | /* if we are already at full speed then break out early */ |
421 | if (this_dbs_info->requested_freq == policy->max) | 463 | if (this_dbs_info->requested_freq == policy->max) |
@@ -436,49 +478,24 @@ static void dbs_check_cpu(int cpu) | |||
436 | return; | 478 | return; |
437 | } | 479 | } |
438 | 480 | ||
439 | /* Check for frequency decrease */ | 481 | /* |
440 | this_dbs_info->down_skip++; | 482 | * The optimal frequency is the frequency that is the lowest that |
441 | if (this_dbs_info->down_skip < dbs_tuners_ins.sampling_down_factor) | 483 | * can support the current CPU usage without triggering the up |
442 | return; | 484 | * policy. To be safe, we focus 10 points under the threshold. |
443 | 485 | */ | |
444 | /* Check for frequency decrease */ | 486 | if (load < (dbs_tuners_ins.down_threshold - 10)) { |
445 | total_idle_ticks = this_dbs_info->prev_cpu_idle_up; | ||
446 | tmp_idle_ticks = total_idle_ticks - | ||
447 | this_dbs_info->prev_cpu_idle_down; | ||
448 | this_dbs_info->prev_cpu_idle_down = total_idle_ticks; | ||
449 | |||
450 | if (tmp_idle_ticks < idle_ticks) | ||
451 | idle_ticks = tmp_idle_ticks; | ||
452 | |||
453 | /* Scale idle ticks by 100 and compare with up and down ticks */ | ||
454 | idle_ticks *= 100; | ||
455 | this_dbs_info->down_skip = 0; | ||
456 | |||
457 | freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * | ||
458 | dbs_tuners_ins.sampling_down_factor; | ||
459 | down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) * | ||
460 | usecs_to_jiffies(freq_down_sampling_rate); | ||
461 | |||
462 | if (idle_ticks > down_idle_ticks) { | ||
463 | /* | ||
464 | * if we are already at the lowest speed then break out early | ||
465 | * or if we 'cannot' reduce the speed as the user might want | ||
466 | * freq_target to be zero | ||
467 | */ | ||
468 | if (this_dbs_info->requested_freq == policy->min | ||
469 | || dbs_tuners_ins.freq_step == 0) | ||
470 | return; | ||
471 | |||
472 | freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; | 487 | freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; |
473 | 488 | ||
474 | /* max freq cannot be less than 100. But who knows.... */ | ||
475 | if (unlikely(freq_target == 0)) | ||
476 | freq_target = 5; | ||
477 | |||
478 | this_dbs_info->requested_freq -= freq_target; | 489 | this_dbs_info->requested_freq -= freq_target; |
479 | if (this_dbs_info->requested_freq < policy->min) | 490 | if (this_dbs_info->requested_freq < policy->min) |
480 | this_dbs_info->requested_freq = policy->min; | 491 | this_dbs_info->requested_freq = policy->min; |
481 | 492 | ||
493 | /* | ||
494 | * if we cannot reduce the frequency anymore, break out early | ||
495 | */ | ||
496 | if (policy->cur == policy->min) | ||
497 | return; | ||
498 | |||
482 | __cpufreq_driver_target(policy, this_dbs_info->requested_freq, | 499 | __cpufreq_driver_target(policy, this_dbs_info->requested_freq, |
483 | CPUFREQ_RELATION_H); | 500 | CPUFREQ_RELATION_H); |
484 | return; | 501 | return; |
@@ -487,27 +504,45 @@ static void dbs_check_cpu(int cpu) | |||
487 | 504 | ||
488 | static void do_dbs_timer(struct work_struct *work) | 505 | static void do_dbs_timer(struct work_struct *work) |
489 | { | 506 | { |
490 | int i; | 507 | struct cpu_dbs_info_s *dbs_info = |
491 | mutex_lock(&dbs_mutex); | 508 | container_of(work, struct cpu_dbs_info_s, work.work); |
492 | for_each_online_cpu(i) | 509 | unsigned int cpu = dbs_info->cpu; |
493 | dbs_check_cpu(i); | 510 | |
494 | schedule_delayed_work(&dbs_work, | 511 | /* We want all CPUs to do sampling nearly on same jiffy */ |
495 | usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); | 512 | int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); |
496 | mutex_unlock(&dbs_mutex); | 513 | |
514 | delay -= jiffies % delay; | ||
515 | |||
516 | if (lock_policy_rwsem_write(cpu) < 0) | ||
517 | return; | ||
518 | |||
519 | if (!dbs_info->enable) { | ||
520 | unlock_policy_rwsem_write(cpu); | ||
521 | return; | ||
522 | } | ||
523 | |||
524 | dbs_check_cpu(dbs_info); | ||
525 | |||
526 | queue_delayed_work_on(cpu, kconservative_wq, &dbs_info->work, delay); | ||
527 | unlock_policy_rwsem_write(cpu); | ||
497 | } | 528 | } |
498 | 529 | ||
499 | static inline void dbs_timer_init(void) | 530 | static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) |
500 | { | 531 | { |
501 | init_timer_deferrable(&dbs_work.timer); | 532 | /* We want all CPUs to do sampling nearly on same jiffy */ |
502 | schedule_delayed_work(&dbs_work, | 533 | int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); |
503 | usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); | 534 | delay -= jiffies % delay; |
504 | return; | 535 | |
536 | dbs_info->enable = 1; | ||
537 | INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); | ||
538 | queue_delayed_work_on(dbs_info->cpu, kconservative_wq, &dbs_info->work, | ||
539 | delay); | ||
505 | } | 540 | } |
506 | 541 | ||
507 | static inline void dbs_timer_exit(void) | 542 | static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) |
508 | { | 543 | { |
509 | cancel_delayed_work(&dbs_work); | 544 | dbs_info->enable = 0; |
510 | return; | 545 | cancel_delayed_work(&dbs_info->work); |
511 | } | 546 | } |
512 | 547 | ||
513 | static int cpufreq_governor_dbs(struct cpufreq_policy *policy, | 548 | static int cpufreq_governor_dbs(struct cpufreq_policy *policy, |
@@ -541,11 +576,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, | |||
541 | j_dbs_info = &per_cpu(cpu_dbs_info, j); | 576 | j_dbs_info = &per_cpu(cpu_dbs_info, j); |
542 | j_dbs_info->cur_policy = policy; | 577 | j_dbs_info->cur_policy = policy; |
543 | 578 | ||
544 | j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(cpu); | 579 | j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, |
545 | j_dbs_info->prev_cpu_idle_down | 580 | &j_dbs_info->prev_cpu_wall); |
546 | = j_dbs_info->prev_cpu_idle_up; | 581 | if (dbs_tuners_ins.ignore_nice) { |
582 | j_dbs_info->prev_cpu_nice = | ||
583 | kstat_cpu(j).cpustat.nice; | ||
584 | } | ||
547 | } | 585 | } |
548 | this_dbs_info->enable = 1; | ||
549 | this_dbs_info->down_skip = 0; | 586 | this_dbs_info->down_skip = 0; |
550 | this_dbs_info->requested_freq = policy->cur; | 587 | this_dbs_info->requested_freq = policy->cur; |
551 | 588 | ||
@@ -567,30 +604,30 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, | |||
567 | 604 | ||
568 | dbs_tuners_ins.sampling_rate = def_sampling_rate; | 605 | dbs_tuners_ins.sampling_rate = def_sampling_rate; |
569 | 606 | ||
570 | dbs_timer_init(); | ||
571 | cpufreq_register_notifier( | 607 | cpufreq_register_notifier( |
572 | &dbs_cpufreq_notifier_block, | 608 | &dbs_cpufreq_notifier_block, |
573 | CPUFREQ_TRANSITION_NOTIFIER); | 609 | CPUFREQ_TRANSITION_NOTIFIER); |
574 | } | 610 | } |
611 | dbs_timer_init(this_dbs_info); | ||
575 | 612 | ||
576 | mutex_unlock(&dbs_mutex); | 613 | mutex_unlock(&dbs_mutex); |
614 | |||
577 | break; | 615 | break; |
578 | 616 | ||
579 | case CPUFREQ_GOV_STOP: | 617 | case CPUFREQ_GOV_STOP: |
580 | mutex_lock(&dbs_mutex); | 618 | mutex_lock(&dbs_mutex); |
581 | this_dbs_info->enable = 0; | 619 | dbs_timer_exit(this_dbs_info); |
582 | sysfs_remove_group(&policy->kobj, &dbs_attr_group); | 620 | sysfs_remove_group(&policy->kobj, &dbs_attr_group); |
583 | dbs_enable--; | 621 | dbs_enable--; |
622 | |||
584 | /* | 623 | /* |
585 | * Stop the timerschedule work, when this governor | 624 | * Stop the timerschedule work, when this governor |
586 | * is used for first time | 625 | * is used for first time |
587 | */ | 626 | */ |
588 | if (dbs_enable == 0) { | 627 | if (dbs_enable == 0) |
589 | dbs_timer_exit(); | ||
590 | cpufreq_unregister_notifier( | 628 | cpufreq_unregister_notifier( |
591 | &dbs_cpufreq_notifier_block, | 629 | &dbs_cpufreq_notifier_block, |
592 | CPUFREQ_TRANSITION_NOTIFIER); | 630 | CPUFREQ_TRANSITION_NOTIFIER); |
593 | } | ||
594 | 631 | ||
595 | mutex_unlock(&dbs_mutex); | 632 | mutex_unlock(&dbs_mutex); |
596 | 633 | ||
@@ -607,6 +644,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, | |||
607 | this_dbs_info->cur_policy, | 644 | this_dbs_info->cur_policy, |
608 | policy->min, CPUFREQ_RELATION_L); | 645 | policy->min, CPUFREQ_RELATION_L); |
609 | mutex_unlock(&dbs_mutex); | 646 | mutex_unlock(&dbs_mutex); |
647 | |||
610 | break; | 648 | break; |
611 | } | 649 | } |
612 | return 0; | 650 | return 0; |
@@ -624,15 +662,25 @@ struct cpufreq_governor cpufreq_gov_conservative = { | |||
624 | 662 | ||
625 | static int __init cpufreq_gov_dbs_init(void) | 663 | static int __init cpufreq_gov_dbs_init(void) |
626 | { | 664 | { |
627 | return cpufreq_register_governor(&cpufreq_gov_conservative); | 665 | int err; |
666 | |||
667 | kconservative_wq = create_workqueue("kconservative"); | ||
668 | if (!kconservative_wq) { | ||
669 | printk(KERN_ERR "Creation of kconservative failed\n"); | ||
670 | return -EFAULT; | ||
671 | } | ||
672 | |||
673 | err = cpufreq_register_governor(&cpufreq_gov_conservative); | ||
674 | if (err) | ||
675 | destroy_workqueue(kconservative_wq); | ||
676 | |||
677 | return err; | ||
628 | } | 678 | } |
629 | 679 | ||
630 | static void __exit cpufreq_gov_dbs_exit(void) | 680 | static void __exit cpufreq_gov_dbs_exit(void) |
631 | { | 681 | { |
632 | /* Make sure that the scheduled work is indeed not running */ | ||
633 | flush_scheduled_work(); | ||
634 | |||
635 | cpufreq_unregister_governor(&cpufreq_gov_conservative); | 682 | cpufreq_unregister_governor(&cpufreq_gov_conservative); |
683 | destroy_workqueue(kconservative_wq); | ||
636 | } | 684 | } |
637 | 685 | ||
638 | 686 | ||