aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2012-03-01 09:04:46 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2012-05-07 11:56:50 -0400
commit7bfac470b517b18d496e96acc90be58353df2159 (patch)
treee7daf536ab63883eafc14e076b848bb331f8a618
parentaef49be82379e995b42648f36dd02d70f979ef2a (diff)
sched: Fix nohz load accounting -- again!
commit c308b56b5398779cd3da0f62ab26b0453494c3d4 upstream. [ backported to 3.0 by Kerin Millar <kerframil@gmail.com>] Various people reported nohz load tracking still being wrecked, but Doug spotted the actual problem. We fold the nohz remainder in too soon, causing us to loose samples and under-account. So instead of playing catch-up up-front, always do a single load-fold with whatever state we encounter and only then fold the nohz remainder and play catch-up. Reported-by: Doug Smythies <dsmythies@telus.net> Reported-by: LesÃ…=82aw Kope=C4=87 <leslaw.kopec@nasza-klasa.pl> Reported-by: Aman Gupta <aman@tmm1.net> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/n/tip-4v31etnhgg9kwd6ocgx3rxl8@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu> Cc: Kerin Millar <kerframil@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--kernel/sched.c53
1 files changed, 26 insertions, 27 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 063d7a496f4..03dff14b44e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3392,13 +3392,10 @@ calc_load_n(unsigned long load, unsigned long exp,
3392 * Once we've updated the global active value, we need to apply the exponential 3392 * Once we've updated the global active value, we need to apply the exponential
3393 * weights adjusted to the number of cycles missed. 3393 * weights adjusted to the number of cycles missed.
3394 */ 3394 */
3395static void calc_global_nohz(unsigned long ticks) 3395static void calc_global_nohz(void)
3396{ 3396{
3397 long delta, active, n; 3397 long delta, active, n;
3398 3398
3399 if (time_before(jiffies, calc_load_update))
3400 return;
3401
3402 /* 3399 /*
3403 * If we crossed a calc_load_update boundary, make sure to fold 3400 * If we crossed a calc_load_update boundary, make sure to fold
3404 * any pending idle changes, the respective CPUs might have 3401 * any pending idle changes, the respective CPUs might have
@@ -3410,31 +3407,25 @@ static void calc_global_nohz(unsigned long ticks)
3410 atomic_long_add(delta, &calc_load_tasks); 3407 atomic_long_add(delta, &calc_load_tasks);
3411 3408
3412 /* 3409 /*
3413 * If we were idle for multiple load cycles, apply them. 3410 * It could be the one fold was all it took, we done!
3414 */ 3411 */
3415 if (ticks >= LOAD_FREQ) { 3412 if (time_before(jiffies, calc_load_update + 10))
3416 n = ticks / LOAD_FREQ; 3413 return;
3417 3414
3418 active = atomic_long_read(&calc_load_tasks); 3415 /*
3419 active = active > 0 ? active * FIXED_1 : 0; 3416 * Catch-up, fold however many we are behind still
3417 */
3418 delta = jiffies - calc_load_update - 10;
3419 n = 1 + (delta / LOAD_FREQ);
3420 3420
3421 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); 3421 active = atomic_long_read(&calc_load_tasks);
3422 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); 3422 active = active > 0 ? active * FIXED_1 : 0;
3423 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
3424 3423
3425 calc_load_update += n * LOAD_FREQ; 3424 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
3426 } 3425 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
3426 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
3427 3427
3428 /* 3428 calc_load_update += n * LOAD_FREQ;
3429 * Its possible the remainder of the above division also crosses
3430 * a LOAD_FREQ period, the regular check in calc_global_load()
3431 * which comes after this will take care of that.
3432 *
3433 * Consider us being 11 ticks before a cycle completion, and us
3434 * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
3435 * age us 4 cycles, and the test in calc_global_load() will
3436 * pick up the final one.
3437 */
3438} 3429}
3439#else 3430#else
3440static void calc_load_account_idle(struct rq *this_rq) 3431static void calc_load_account_idle(struct rq *this_rq)
@@ -3446,7 +3437,7 @@ static inline long calc_load_fold_idle(void)
3446 return 0; 3437 return 0;
3447} 3438}
3448 3439
3449static void calc_global_nohz(unsigned long ticks) 3440static void calc_global_nohz(void)
3450{ 3441{
3451} 3442}
3452#endif 3443#endif
@@ -3474,8 +3465,6 @@ void calc_global_load(unsigned long ticks)
3474{ 3465{
3475 long active; 3466 long active;
3476 3467
3477 calc_global_nohz(ticks);
3478
3479 if (time_before(jiffies, calc_load_update + 10)) 3468 if (time_before(jiffies, calc_load_update + 10))
3480 return; 3469 return;
3481 3470
@@ -3487,6 +3476,16 @@ void calc_global_load(unsigned long ticks)
3487 avenrun[2] = calc_load(avenrun[2], EXP_15, active); 3476 avenrun[2] = calc_load(avenrun[2], EXP_15, active);
3488 3477
3489 calc_load_update += LOAD_FREQ; 3478 calc_load_update += LOAD_FREQ;
3479
3480 /*
3481 * Account one period with whatever state we found before
3482 * folding in the nohz state and ageing the entire idle period.
3483 *
3484 * This avoids loosing a sample when we go idle between
3485 * calc_load_account_active() (10 ticks ago) and now and thus
3486 * under-accounting.
3487 */
3488 calc_global_nohz();
3490} 3489}
3491 3490
3492/* 3491/*