aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/cpuidle/governors/menu.c
diff options
context:
space:
mode:
authorYouquan Song <youquan.song@intel.com>2012-10-26 06:26:41 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2012-11-14 18:34:19 -0500
commit69a37beabf1f0a6705c08e879bdd5d82ff6486c4 (patch)
treecbe66278435ee12182915832584a12cd1a83b114 /drivers/cpuidle/governors/menu.c
parente45a00d679a788217f35ee4214a32d6d1924160b (diff)
cpuidle: Quickly notice prediction failure for repeat mode
The prediction for future is difficult and when the cpuidle governor prediction fails and govenor possibly choose the shallower C-state than it should. How to quickly notice and find the failure becomes important for power saving. cpuidle menu governor has a method to predict the repeat pattern if there are 8 C-states residency which are continuous and the same or very close, so it will predict the next C-states residency will keep same residency time. There is a real case that turbostat utility (tools/power/x86/turbostat) at kernel 3.3 or early. turbostat utility will read 10 registers one by one at Sandybridge, so it will generate 10 IPIs to wake up idle CPUs. So cpuidle menu governor will predict it is repeat mode and there is another IPI wake up idle CPU soon, so it keeps idle CPU stay at C1 state even though CPU is totally idle. However, in the turbostat, following 10 registers reading is sleep 5 seconds by default, so the idle CPU will keep at C1 for a long time though it is idle until break event occurs. In a idle Sandybridge system, run "./turbostat -v", we will notice that deep C-state dangles between "70% ~ 99%". After patched the kernel, we will notice deep C-state stays at >99.98%. In the patch, a timer is added when menu governor detects a repeat mode and choose a shallow C-state. The timer is set to a time out value that greater than predicted time, and we conclude repeat mode prediction failure if timer is triggered. When repeat mode happens as expected, the timer is not triggered and CPU waken up from C-states and it will cancel the timer initiatively. When repeat mode does not happen, the timer will be time out and menu governor will quickly notice that the repeat mode prediction fails and then re-evaluates deeper C-states possibility. Below is another case which will clearly show the patch much benefit: #include <stdlib.h> #include <stdio.h> #include <unistd.h> #include <signal.h> #include <sys/time.h> #include <time.h> #include <pthread.h> volatile int * shutdown; volatile long * count; int delay = 20; int loop = 8; void usage(void) { fprintf(stderr, "Usage: idle_predict [options]\n" " --help -h Print this help\n" " --thread -n Thread number\n" " --loop -l Loop times in shallow Cstate\n" " --delay -t Sleep time (uS)in shallow Cstate\n"); } void *simple_loop() { int idle_num = 1; while (!(*shutdown)) { *count = *count + 1; if (idle_num % loop) usleep(delay); else { /* sleep 1 second */ usleep(1000000); idle_num = 0; } idle_num++; } } static void sighand(int sig) { *shutdown = 1; } int main(int argc, char *argv[]) { sigset_t sigset; int signum = SIGALRM; int i, c, er = 0, thread_num = 8; pthread_t pt[1024]; static char optstr[] = "n:l:t:h:"; while ((c = getopt(argc, argv, optstr)) != EOF) switch (c) { case 'n': thread_num = atoi(optarg); break; case 'l': loop = atoi(optarg); break; case 't': delay = atoi(optarg); break; case 'h': default: usage(); exit(1); } printf("thread=%d,loop=%d,delay=%d\n",thread_num,loop,delay); count = malloc(sizeof(long)); shutdown = malloc(sizeof(int)); *count = 0; *shutdown = 0; sigemptyset(&sigset); sigaddset(&sigset, signum); sigprocmask (SIG_BLOCK, &sigset, NULL); signal(SIGINT, sighand); signal(SIGTERM, sighand); for(i = 0; i < thread_num ; i++) pthread_create(&pt[i], NULL, simple_loop, NULL); for (i = 0; i < thread_num; i++) pthread_join(pt[i], NULL); exit(0); } Get powertop V2 from git://github.com/fenrus75/powertop, build powertop. After build the above test application, then run it. Test plaform can be Intel Sandybridge or other recent platforms. #./idle_predict -l 10 & #./powertop We will find that deep C-state will dangle between 40%~100% and much time spent on C1 state. It is because menu governor wrongly predict that repeat mode is kept, so it will choose the C1 shallow C-state even though it has chance to sleep 1 second in deep C-state. While after patched the kernel, we find that deep C-state will keep >99.6%. Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Youquan Song <youquan.song@intel.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Diffstat (limited to 'drivers/cpuidle/governors/menu.c')
-rw-r--r--drivers/cpuidle/governors/menu.c75
1 files changed, 70 insertions, 5 deletions
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 5b1f2c372c1f..37c0ff6c805c 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -28,6 +28,13 @@
28#define MAX_INTERESTING 50000 28#define MAX_INTERESTING 50000
29#define STDDEV_THRESH 400 29#define STDDEV_THRESH 400
30 30
31/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */
32#define MAX_DEVIATION 60
33
34static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer);
35static DEFINE_PER_CPU(int, hrtimer_status);
36/* menu hrtimer mode */
37enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT};
31 38
32/* 39/*
33 * Concepts and ideas behind the menu governor 40 * Concepts and ideas behind the menu governor
@@ -191,17 +198,42 @@ static u64 div_round64(u64 dividend, u32 divisor)
191 return div_u64(dividend + (divisor / 2), divisor); 198 return div_u64(dividend + (divisor / 2), divisor);
192} 199}
193 200
201/* Cancel the hrtimer if it is not triggered yet */
202void menu_hrtimer_cancel(void)
203{
204 int cpu = smp_processor_id();
205 struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
206
207 /* The timer is still not time out*/
208 if (per_cpu(hrtimer_status, cpu)) {
209 hrtimer_cancel(hrtmr);
210 per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
211 }
212}
213EXPORT_SYMBOL_GPL(menu_hrtimer_cancel);
214
215/* Call back for hrtimer is triggered */
216static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer)
217{
218 int cpu = smp_processor_id();
219
220 per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
221
222 return HRTIMER_NORESTART;
223}
224
194/* 225/*
195 * Try detecting repeating patterns by keeping track of the last 8 226 * Try detecting repeating patterns by keeping track of the last 8
196 * intervals, and checking if the standard deviation of that set 227 * intervals, and checking if the standard deviation of that set
197 * of points is below a threshold. If it is... then use the 228 * of points is below a threshold. If it is... then use the
198 * average of these 8 points as the estimated value. 229 * average of these 8 points as the estimated value.
199 */ 230 */
200static void detect_repeating_patterns(struct menu_device *data) 231static int detect_repeating_patterns(struct menu_device *data)
201{ 232{
202 int i; 233 int i;
203 uint64_t avg = 0; 234 uint64_t avg = 0;
204 uint64_t stddev = 0; /* contains the square of the std deviation */ 235 uint64_t stddev = 0; /* contains the square of the std deviation */
236 int ret = 0;
205 237
206 /* first calculate average and standard deviation of the past */ 238 /* first calculate average and standard deviation of the past */
207 for (i = 0; i < INTERVALS; i++) 239 for (i = 0; i < INTERVALS; i++)
@@ -210,7 +242,7 @@ static void detect_repeating_patterns(struct menu_device *data)
210 242
211 /* if the avg is beyond the known next tick, it's worthless */ 243 /* if the avg is beyond the known next tick, it's worthless */
212 if (avg > data->expected_us) 244 if (avg > data->expected_us)
213 return; 245 return 0;
214 246
215 for (i = 0; i < INTERVALS; i++) 247 for (i = 0; i < INTERVALS; i++)
216 stddev += (data->intervals[i] - avg) * 248 stddev += (data->intervals[i] - avg) *
@@ -223,8 +255,12 @@ static void detect_repeating_patterns(struct menu_device *data)
223 * repeating pattern and predict we keep doing this. 255 * repeating pattern and predict we keep doing this.
224 */ 256 */
225 257
226 if (avg && stddev < STDDEV_THRESH) 258 if (avg && stddev < STDDEV_THRESH) {
227 data->predicted_us = avg; 259 data->predicted_us = avg;
260 ret = 1;
261 }
262
263 return ret;
228} 264}
229 265
230/** 266/**
@@ -240,6 +276,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
240 int i; 276 int i;
241 int multiplier; 277 int multiplier;
242 struct timespec t; 278 struct timespec t;
279 int repeat = 0, low_predicted = 0;
280 int cpu = smp_processor_id();
281 struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
243 282
244 if (data->needs_update) { 283 if (data->needs_update) {
245 menu_update(drv, dev); 284 menu_update(drv, dev);
@@ -274,7 +313,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
274 data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], 313 data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket],
275 RESOLUTION * DECAY); 314 RESOLUTION * DECAY);
276 315
277 detect_repeating_patterns(data); 316 repeat = detect_repeating_patterns(data);
278 317
279 /* 318 /*
280 * We want to default to C1 (hlt), not to busy polling 319 * We want to default to C1 (hlt), not to busy polling
@@ -295,8 +334,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
295 334
296 if (s->disabled || su->disable) 335 if (s->disabled || su->disable)
297 continue; 336 continue;
298 if (s->target_residency > data->predicted_us) 337 if (s->target_residency > data->predicted_us) {
338 low_predicted = 1;
299 continue; 339 continue;
340 }
300 if (s->exit_latency > latency_req) 341 if (s->exit_latency > latency_req)
301 continue; 342 continue;
302 if (s->exit_latency * multiplier > data->predicted_us) 343 if (s->exit_latency * multiplier > data->predicted_us)
@@ -309,6 +350,27 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
309 } 350 }
310 } 351 }
311 352
353 /* not deepest C-state chosen for low predicted residency */
354 if (low_predicted) {
355 unsigned int timer_us = 0;
356
357 /*
358 * Set a timer to detect whether this sleep is much
359 * longer than repeat mode predicted. If the timer
360 * triggers, the code will evaluate whether to put
361 * the CPU into a deeper C-state.
362 * The timer is cancelled on CPU wakeup.
363 */
364 timer_us = 2 * (data->predicted_us + MAX_DEVIATION);
365
366 if (repeat && (4 * timer_us < data->expected_us)) {
367 hrtimer_start(hrtmr, ns_to_ktime(1000 * timer_us),
368 HRTIMER_MODE_REL_PINNED);
369 /* In repeat case, menu hrtimer is started */
370 per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT;
371 }
372 }
373
312 return data->last_state_idx; 374 return data->last_state_idx;
313} 375}
314 376
@@ -399,6 +461,9 @@ static int menu_enable_device(struct cpuidle_driver *drv,
399 struct cpuidle_device *dev) 461 struct cpuidle_device *dev)
400{ 462{
401 struct menu_device *data = &per_cpu(menu_devices, dev->cpu); 463 struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
464 struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu);
465 hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
466 t->function = menu_hrtimer_notify;
402 467
403 memset(data, 0, sizeof(struct menu_device)); 468 memset(data, 0, sizeof(struct menu_device));
404 469