diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2013-07-26 19:41:34 -0400 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2013-07-29 07:32:29 -0400 |
commit | 148519120c6d1f19ad53349683aeae9f228b0b8d (patch) | |
tree | 6d585444bbc27d2752ac1eb69180b7312150c5a5 /drivers/cpuidle | |
parent | 228b30234f258a193317874854eee1ca7807186e (diff) |
Revert "cpuidle: Quickly notice prediction failure for repeat mode"
Revert commit 69a37bea (cpuidle: Quickly notice prediction failure for
repeat mode), because it has been identified as the source of a
significant performance regression in v3.8 and later as explained by
Jeremy Eder:
We believe we've identified a particular commit to the cpuidle code
that seems to be impacting performance of variety of workloads.
The simplest way to reproduce is using netperf TCP_RR test, so
we're using that, on a pair of Sandy Bridge based servers. We also
have data from a large database setup where performance is also
measurably/positively impacted, though that test data isn't easily
share-able.
Included below are test results from 3 test kernels:
kernel reverts
-----------------------------------------------------------
1) vanilla upstream (no reverts)
2) perfteam2 reverts e11538d1f03914eb92af5a1a378375c05ae8520c
3) test reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4
e11538d1f03914eb92af5a1a378375c05ae8520c
In summary, netperf TCP_RR numbers improve by approximately 4%
after reverting 69a37beabf1f0a6705c08e879bdd5d82ff6486c4. When
69a37beabf1f0a6705c08e879bdd5d82ff6486c4 is included, C0 residency
never seems to get above 40%. Taking that patch out gets C0 near
100% quite often, and performance increases.
The below data are histograms representing the %c0 residency @
1-second sample rates (using turbostat), while under netperf test.
- If you look at the first 4 histograms, you can see %c0 residency
almost entirely in the 30,40% bin.
- The last pair, which reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4,
shows %c0 in the 80,90,100% bins.
Below each kernel name are netperf TCP_RR trans/s numbers for the
particular kernel that can be disclosed publicly, comparing the 3
test kernels. We ran a 4th test with the vanilla kernel where
we've also set /dev/cpu_dma_latency=0 to show overall impact
boosting single-threaded TCP_RR performance over 11% above
baseline.
3.10-rc2 vanilla RX + c0 lock (/dev/cpu_dma_latency=0):
TCP_RR trans/s 54323.78
-----------------------------------------------------------
3.10-rc2 vanilla RX (no reverts)
TCP_RR trans/s 48192.47
Receiver %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 0]:
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 59]:
***********************************************************
40.0000 - 50.0000 [ 1]: *
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 0]:
Sender %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 0]:
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 11]: ***********
40.0000 - 50.0000 [ 49]:
*************************************************
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 0]:
-----------------------------------------------------------
3.10-rc2 perfteam2 RX (reverts commit
e11538d1f03914eb92af5a1a378375c05ae8520c)
TCP_RR trans/s 49698.69
Receiver %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 1]: *
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 59]:
***********************************************************
40.0000 - 50.0000 [ 0]:
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 0]:
Sender %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 0]:
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 2]: **
40.0000 - 50.0000 [ 58]:
**********************************************************
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 0]:
-----------------------------------------------------------
3.10-rc2 test RX (reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4
and e11538d1f03914eb92af5a1a378375c05ae8520c)
TCP_RR trans/s 47766.95
Receiver %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 1]: *
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 27]: ***************************
40.0000 - 50.0000 [ 2]: **
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 2]: **
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 28]: ****************************
Sender:
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 0]:
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 11]: ***********
40.0000 - 50.0000 [ 0]:
50.0000 - 60.0000 [ 1]: *
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 3]: ***
80.0000 - 90.0000 [ 7]: *******
90.0000 - 100.0000 [ 38]: **************************************
These results demonstrate gaining back the tendency of the CPU to
stay in more responsive, performant C-states (and thus yield
measurably better performance), by reverting commit
69a37beabf1f0a6705c08e879bdd5d82ff6486c4.
Requested-by: Jeremy Eder <jeder@redhat.com>
Tested-by: Len Brown <len.brown@intel.com>
Cc: 3.8+ <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r-- | drivers/cpuidle/governors/menu.c | 73 |
1 files changed, 4 insertions, 69 deletions
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index b69a87e22155..bc580b67a652 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c | |||
@@ -28,13 +28,6 @@ | |||
28 | #define MAX_INTERESTING 50000 | 28 | #define MAX_INTERESTING 50000 |
29 | #define STDDEV_THRESH 400 | 29 | #define STDDEV_THRESH 400 |
30 | 30 | ||
31 | /* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */ | ||
32 | #define MAX_DEVIATION 60 | ||
33 | |||
34 | static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer); | ||
35 | static DEFINE_PER_CPU(int, hrtimer_status); | ||
36 | /* menu hrtimer mode */ | ||
37 | enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT}; | ||
38 | 31 | ||
39 | /* | 32 | /* |
40 | * Concepts and ideas behind the menu governor | 33 | * Concepts and ideas behind the menu governor |
@@ -198,42 +191,17 @@ static u64 div_round64(u64 dividend, u32 divisor) | |||
198 | return div_u64(dividend + (divisor / 2), divisor); | 191 | return div_u64(dividend + (divisor / 2), divisor); |
199 | } | 192 | } |
200 | 193 | ||
201 | /* Cancel the hrtimer if it is not triggered yet */ | ||
202 | void menu_hrtimer_cancel(void) | ||
203 | { | ||
204 | int cpu = smp_processor_id(); | ||
205 | struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); | ||
206 | |||
207 | /* The timer is still not time out*/ | ||
208 | if (per_cpu(hrtimer_status, cpu)) { | ||
209 | hrtimer_cancel(hrtmr); | ||
210 | per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; | ||
211 | } | ||
212 | } | ||
213 | EXPORT_SYMBOL_GPL(menu_hrtimer_cancel); | ||
214 | |||
215 | /* Call back for hrtimer is triggered */ | ||
216 | static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) | ||
217 | { | ||
218 | int cpu = smp_processor_id(); | ||
219 | |||
220 | per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; | ||
221 | |||
222 | return HRTIMER_NORESTART; | ||
223 | } | ||
224 | |||
225 | /* | 194 | /* |
226 | * Try detecting repeating patterns by keeping track of the last 8 | 195 | * Try detecting repeating patterns by keeping track of the last 8 |
227 | * intervals, and checking if the standard deviation of that set | 196 | * intervals, and checking if the standard deviation of that set |
228 | * of points is below a threshold. If it is... then use the | 197 | * of points is below a threshold. If it is... then use the |
229 | * average of these 8 points as the estimated value. | 198 | * average of these 8 points as the estimated value. |
230 | */ | 199 | */ |
231 | static u32 get_typical_interval(struct menu_device *data) | 200 | static void get_typical_interval(struct menu_device *data) |
232 | { | 201 | { |
233 | int i = 0, divisor = 0; | 202 | int i = 0, divisor = 0; |
234 | uint64_t max = 0, avg = 0, stddev = 0; | 203 | uint64_t max = 0, avg = 0, stddev = 0; |
235 | int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */ | 204 | int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */ |
236 | unsigned int ret = 0; | ||
237 | 205 | ||
238 | again: | 206 | again: |
239 | 207 | ||
@@ -274,16 +242,13 @@ again: | |||
274 | if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) | 242 | if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) |
275 | || stddev <= 20) { | 243 | || stddev <= 20) { |
276 | data->predicted_us = avg; | 244 | data->predicted_us = avg; |
277 | ret = 1; | 245 | return; |
278 | return ret; | ||
279 | 246 | ||
280 | } else if ((divisor * 4) > INTERVALS * 3) { | 247 | } else if ((divisor * 4) > INTERVALS * 3) { |
281 | /* Exclude the max interval */ | 248 | /* Exclude the max interval */ |
282 | thresh = max - 1; | 249 | thresh = max - 1; |
283 | goto again; | 250 | goto again; |
284 | } | 251 | } |
285 | |||
286 | return ret; | ||
287 | } | 252 | } |
288 | 253 | ||
289 | /** | 254 | /** |
@@ -298,9 +263,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) | |||
298 | int i; | 263 | int i; |
299 | int multiplier; | 264 | int multiplier; |
300 | struct timespec t; | 265 | struct timespec t; |
301 | int repeat = 0, low_predicted = 0; | ||
302 | int cpu = smp_processor_id(); | ||
303 | struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); | ||
304 | 266 | ||
305 | if (data->needs_update) { | 267 | if (data->needs_update) { |
306 | menu_update(drv, dev); | 268 | menu_update(drv, dev); |
@@ -335,7 +297,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) | |||
335 | data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], | 297 | data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], |
336 | RESOLUTION * DECAY); | 298 | RESOLUTION * DECAY); |
337 | 299 | ||
338 | repeat = get_typical_interval(data); | 300 | get_typical_interval(data); |
339 | 301 | ||
340 | /* | 302 | /* |
341 | * We want to default to C1 (hlt), not to busy polling | 303 | * We want to default to C1 (hlt), not to busy polling |
@@ -356,10 +318,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) | |||
356 | 318 | ||
357 | if (s->disabled || su->disable) | 319 | if (s->disabled || su->disable) |
358 | continue; | 320 | continue; |
359 | if (s->target_residency > data->predicted_us) { | 321 | if (s->target_residency > data->predicted_us) |
360 | low_predicted = 1; | ||
361 | continue; | 322 | continue; |
362 | } | ||
363 | if (s->exit_latency > latency_req) | 323 | if (s->exit_latency > latency_req) |
364 | continue; | 324 | continue; |
365 | if (s->exit_latency * multiplier > data->predicted_us) | 325 | if (s->exit_latency * multiplier > data->predicted_us) |
@@ -369,28 +329,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) | |||
369 | data->exit_us = s->exit_latency; | 329 | data->exit_us = s->exit_latency; |
370 | } | 330 | } |
371 | 331 | ||
372 | /* not deepest C-state chosen for low predicted residency */ | ||
373 | if (low_predicted) { | ||
374 | unsigned int timer_us = 0; | ||
375 | |||
376 | /* | ||
377 | * Set a timer to detect whether this sleep is much | ||
378 | * longer than repeat mode predicted. If the timer | ||
379 | * triggers, the code will evaluate whether to put | ||
380 | * the CPU into a deeper C-state. | ||
381 | * The timer is cancelled on CPU wakeup. | ||
382 | */ | ||
383 | timer_us = 2 * (data->predicted_us + MAX_DEVIATION); | ||
384 | |||
385 | if (repeat && (4 * timer_us < data->expected_us)) { | ||
386 | RCU_NONIDLE(hrtimer_start(hrtmr, | ||
387 | ns_to_ktime(1000 * timer_us), | ||
388 | HRTIMER_MODE_REL_PINNED)); | ||
389 | /* In repeat case, menu hrtimer is started */ | ||
390 | per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; | ||
391 | } | ||
392 | } | ||
393 | |||
394 | return data->last_state_idx; | 332 | return data->last_state_idx; |
395 | } | 333 | } |
396 | 334 | ||
@@ -481,9 +419,6 @@ static int menu_enable_device(struct cpuidle_driver *drv, | |||
481 | struct cpuidle_device *dev) | 419 | struct cpuidle_device *dev) |
482 | { | 420 | { |
483 | struct menu_device *data = &per_cpu(menu_devices, dev->cpu); | 421 | struct menu_device *data = &per_cpu(menu_devices, dev->cpu); |
484 | struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu); | ||
485 | hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
486 | t->function = menu_hrtimer_notify; | ||
487 | 422 | ||
488 | memset(data, 0, sizeof(struct menu_device)); | 423 | memset(data, 0, sizeof(struct menu_device)); |
489 | 424 | ||