aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/cpufreq
diff options
context:
space:
mode:
authorArjan van de Ven <arjan@linux.intel.com>2010-05-09 11:26:06 -0400
committerIngo Molnar <mingo@elte.hu>2010-05-09 13:35:27 -0400
commit6b8fcd9029f217a9ecce822db645e19111c11080 (patch)
tree1754333ce16418b5ffffddc75bfa1f0bf486f09f /drivers/cpufreq
parent0224cf4c5ee0d7faec83956b8e21f7d89e3df3bd (diff)
ondemand: Solve a big performance issue by counting IOWAIT time as busy
The ondemand cpufreq governor uses CPU busy time (e.g. not-idle time) as a measure for scaling the CPU frequency up or down. If the CPU is busy, the CPU frequency scales up, if it's idle, the CPU frequency scales down. Effectively, it uses the CPU busy time as proxy variable for the more nebulous "how critical is performance right now" question. This algorithm falls flat on its face in the light of workloads where you're alternatingly disk and CPU bound, such as the ever popular "git grep", but also things like startup of programs and maildir using email clients... much to the chagarin of Andrew Morton. This patch changes the ondemand algorithm to count iowait time as busy, not idle, time. As shown in the breakdown cases above, iowait is performance critical often, and by counting iowait, the proxy variable becomes a more accurate representation of the "how critical is performance" question. The problem and fix are both verified with the "perf timechar" tool. Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Dave Jones <davej@redhat.com> Reviewed-by: Rik van Riel <riel@redhat.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <20100509082606.3d9f00d0@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c30
1 files changed, 28 insertions, 2 deletions
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index bd444dc93cf2..ed472f8dfb72 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -73,6 +73,7 @@ enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
73 73
74struct cpu_dbs_info_s { 74struct cpu_dbs_info_s {
75 cputime64_t prev_cpu_idle; 75 cputime64_t prev_cpu_idle;
76 cputime64_t prev_cpu_iowait;
76 cputime64_t prev_cpu_wall; 77 cputime64_t prev_cpu_wall;
77 cputime64_t prev_cpu_nice; 78 cputime64_t prev_cpu_nice;
78 struct cpufreq_policy *cur_policy; 79 struct cpufreq_policy *cur_policy;
@@ -148,6 +149,16 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
148 return idle_time; 149 return idle_time;
149} 150}
150 151
152static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall)
153{
154 u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
155
156 if (iowait_time == -1ULL)
157 return 0;
158
159 return iowait_time;
160}
161
151/* 162/*
152 * Find right freq to be set now with powersave_bias on. 163 * Find right freq to be set now with powersave_bias on.
153 * Returns the freq_hi to be used right now and will set freq_hi_jiffies, 164 * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
@@ -470,14 +481,15 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
470 481
471 for_each_cpu(j, policy->cpus) { 482 for_each_cpu(j, policy->cpus) {
472 struct cpu_dbs_info_s *j_dbs_info; 483 struct cpu_dbs_info_s *j_dbs_info;
473 cputime64_t cur_wall_time, cur_idle_time; 484 cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
474 unsigned int idle_time, wall_time; 485 unsigned int idle_time, wall_time, iowait_time;
475 unsigned int load, load_freq; 486 unsigned int load, load_freq;
476 int freq_avg; 487 int freq_avg;
477 488
478 j_dbs_info = &per_cpu(od_cpu_dbs_info, j); 489 j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
479 490
480 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); 491 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
492 cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
481 493
482 wall_time = (unsigned int) cputime64_sub(cur_wall_time, 494 wall_time = (unsigned int) cputime64_sub(cur_wall_time,
483 j_dbs_info->prev_cpu_wall); 495 j_dbs_info->prev_cpu_wall);
@@ -487,6 +499,10 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
487 j_dbs_info->prev_cpu_idle); 499 j_dbs_info->prev_cpu_idle);
488 j_dbs_info->prev_cpu_idle = cur_idle_time; 500 j_dbs_info->prev_cpu_idle = cur_idle_time;
489 501
502 iowait_time = (unsigned int) cputime64_sub(cur_iowait_time,
503 j_dbs_info->prev_cpu_iowait);
504 j_dbs_info->prev_cpu_iowait = cur_iowait_time;
505
490 if (dbs_tuners_ins.ignore_nice) { 506 if (dbs_tuners_ins.ignore_nice) {
491 cputime64_t cur_nice; 507 cputime64_t cur_nice;
492 unsigned long cur_nice_jiffies; 508 unsigned long cur_nice_jiffies;
@@ -504,6 +520,16 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
504 idle_time += jiffies_to_usecs(cur_nice_jiffies); 520 idle_time += jiffies_to_usecs(cur_nice_jiffies);
505 } 521 }
506 522
523 /*
524 * For the purpose of ondemand, waiting for disk IO is an
525 * indication that you're performance critical, and not that
526 * the system is actually idle. So subtract the iowait time
527 * from the cpu idle time.
528 */
529
530 if (idle_time >= iowait_time)
531 idle_time -= iowait_time;
532
507 if (unlikely(!wall_time || wall_time < idle_time)) 533 if (unlikely(!wall_time || wall_time < idle_time))
508 continue; 534 continue;
509 535