diff options
author | Peter Zijlstra <peterz@infradead.org> | 2012-03-01 09:04:46 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2012-05-07 11:56:50 -0400 |
commit | 7bfac470b517b18d496e96acc90be58353df2159 (patch) | |
tree | e7daf536ab63883eafc14e076b848bb331f8a618 | |
parent | aef49be82379e995b42648f36dd02d70f979ef2a (diff) |
sched: Fix nohz load accounting -- again!
commit c308b56b5398779cd3da0f62ab26b0453494c3d4 upstream.
[ backported to 3.0 by Kerin Millar <kerframil@gmail.com>]
Various people reported nohz load tracking still being wrecked, but Doug
spotted the actual problem. We fold the nohz remainder in too soon,
causing us to loose samples and under-account.
So instead of playing catch-up up-front, always do a single load-fold
with whatever state we encounter and only then fold the nohz remainder
and play catch-up.
Reported-by: Doug Smythies <dsmythies@telus.net>
Reported-by: LesÃ…=82aw Kope=C4=87 <leslaw.kopec@nasza-klasa.pl>
Reported-by: Aman Gupta <aman@tmm1.net>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-4v31etnhgg9kwd6ocgx3rxl8@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Kerin Millar <kerframil@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | kernel/sched.c | 53 |
1 files changed, 26 insertions, 27 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 063d7a496f4..03dff14b44e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3392,13 +3392,10 @@ calc_load_n(unsigned long load, unsigned long exp, | |||
3392 | * Once we've updated the global active value, we need to apply the exponential | 3392 | * Once we've updated the global active value, we need to apply the exponential |
3393 | * weights adjusted to the number of cycles missed. | 3393 | * weights adjusted to the number of cycles missed. |
3394 | */ | 3394 | */ |
3395 | static void calc_global_nohz(unsigned long ticks) | 3395 | static void calc_global_nohz(void) |
3396 | { | 3396 | { |
3397 | long delta, active, n; | 3397 | long delta, active, n; |
3398 | 3398 | ||
3399 | if (time_before(jiffies, calc_load_update)) | ||
3400 | return; | ||
3401 | |||
3402 | /* | 3399 | /* |
3403 | * If we crossed a calc_load_update boundary, make sure to fold | 3400 | * If we crossed a calc_load_update boundary, make sure to fold |
3404 | * any pending idle changes, the respective CPUs might have | 3401 | * any pending idle changes, the respective CPUs might have |
@@ -3410,31 +3407,25 @@ static void calc_global_nohz(unsigned long ticks) | |||
3410 | atomic_long_add(delta, &calc_load_tasks); | 3407 | atomic_long_add(delta, &calc_load_tasks); |
3411 | 3408 | ||
3412 | /* | 3409 | /* |
3413 | * If we were idle for multiple load cycles, apply them. | 3410 | * It could be the one fold was all it took, we done! |
3414 | */ | 3411 | */ |
3415 | if (ticks >= LOAD_FREQ) { | 3412 | if (time_before(jiffies, calc_load_update + 10)) |
3416 | n = ticks / LOAD_FREQ; | 3413 | return; |
3417 | 3414 | ||
3418 | active = atomic_long_read(&calc_load_tasks); | 3415 | /* |
3419 | active = active > 0 ? active * FIXED_1 : 0; | 3416 | * Catch-up, fold however many we are behind still |
3417 | */ | ||
3418 | delta = jiffies - calc_load_update - 10; | ||
3419 | n = 1 + (delta / LOAD_FREQ); | ||
3420 | 3420 | ||
3421 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); | 3421 | active = atomic_long_read(&calc_load_tasks); |
3422 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); | 3422 | active = active > 0 ? active * FIXED_1 : 0; |
3423 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | ||
3424 | 3423 | ||
3425 | calc_load_update += n * LOAD_FREQ; | 3424 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); |
3426 | } | 3425 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); |
3426 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | ||
3427 | 3427 | ||
3428 | /* | 3428 | calc_load_update += n * LOAD_FREQ; |
3429 | * Its possible the remainder of the above division also crosses | ||
3430 | * a LOAD_FREQ period, the regular check in calc_global_load() | ||
3431 | * which comes after this will take care of that. | ||
3432 | * | ||
3433 | * Consider us being 11 ticks before a cycle completion, and us | ||
3434 | * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will | ||
3435 | * age us 4 cycles, and the test in calc_global_load() will | ||
3436 | * pick up the final one. | ||
3437 | */ | ||
3438 | } | 3429 | } |
3439 | #else | 3430 | #else |
3440 | static void calc_load_account_idle(struct rq *this_rq) | 3431 | static void calc_load_account_idle(struct rq *this_rq) |
@@ -3446,7 +3437,7 @@ static inline long calc_load_fold_idle(void) | |||
3446 | return 0; | 3437 | return 0; |
3447 | } | 3438 | } |
3448 | 3439 | ||
3449 | static void calc_global_nohz(unsigned long ticks) | 3440 | static void calc_global_nohz(void) |
3450 | { | 3441 | { |
3451 | } | 3442 | } |
3452 | #endif | 3443 | #endif |
@@ -3474,8 +3465,6 @@ void calc_global_load(unsigned long ticks) | |||
3474 | { | 3465 | { |
3475 | long active; | 3466 | long active; |
3476 | 3467 | ||
3477 | calc_global_nohz(ticks); | ||
3478 | |||
3479 | if (time_before(jiffies, calc_load_update + 10)) | 3468 | if (time_before(jiffies, calc_load_update + 10)) |
3480 | return; | 3469 | return; |
3481 | 3470 | ||
@@ -3487,6 +3476,16 @@ void calc_global_load(unsigned long ticks) | |||
3487 | avenrun[2] = calc_load(avenrun[2], EXP_15, active); | 3476 | avenrun[2] = calc_load(avenrun[2], EXP_15, active); |
3488 | 3477 | ||
3489 | calc_load_update += LOAD_FREQ; | 3478 | calc_load_update += LOAD_FREQ; |
3479 | |||
3480 | /* | ||
3481 | * Account one period with whatever state we found before | ||
3482 | * folding in the nohz state and ageing the entire idle period. | ||
3483 | * | ||
3484 | * This avoids loosing a sample when we go idle between | ||
3485 | * calc_load_account_active() (10 ticks ago) and now and thus | ||
3486 | * under-accounting. | ||
3487 | */ | ||
3488 | calc_global_nohz(); | ||
3490 | } | 3489 | } |
3491 | 3490 | ||
3492 | /* | 3491 | /* |