aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2012-03-01 09:04:46 -0500
committerIngo Molnar <mingo@elte.hu>2012-03-12 15:43:17 -0400
commitc308b56b5398779cd3da0f62ab26b0453494c3d4 (patch)
treeb2d90bd6f8ebbbeaae97f4ef319f7e22fcc77436 /kernel/sched
parent8e3fabfde445a872c8aec2296846badf24d7c8b4 (diff)
sched: Fix nohz load accounting -- again!
Various people reported nohz load tracking still being wrecked, but Doug spotted the actual problem. We fold the nohz remainder in too soon, causing us to loose samples and under-account. So instead of playing catch-up up-front, always do a single load-fold with whatever state we encounter and only then fold the nohz remainder and play catch-up. Reported-by: Doug Smythies <dsmythies@telus.net> Reported-by: LesÃ…=82aw Kope=C4=87 <leslaw.kopec@nasza-klasa.pl> Reported-by: Aman Gupta <aman@tmm1.net> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/n/tip-4v31etnhgg9kwd6ocgx3rxl8@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c53
1 files changed, 26 insertions, 27 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 47614a5cdd47..e3ccc13c4caa 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2266,13 +2266,10 @@ calc_load_n(unsigned long load, unsigned long exp,
2266 * Once we've updated the global active value, we need to apply the exponential 2266 * Once we've updated the global active value, we need to apply the exponential
2267 * weights adjusted to the number of cycles missed. 2267 * weights adjusted to the number of cycles missed.
2268 */ 2268 */
2269static void calc_global_nohz(unsigned long ticks) 2269static void calc_global_nohz(void)
2270{ 2270{
2271 long delta, active, n; 2271 long delta, active, n;
2272 2272
2273 if (time_before(jiffies, calc_load_update))
2274 return;
2275
2276 /* 2273 /*
2277 * If we crossed a calc_load_update boundary, make sure to fold 2274 * If we crossed a calc_load_update boundary, make sure to fold
2278 * any pending idle changes, the respective CPUs might have 2275 * any pending idle changes, the respective CPUs might have
@@ -2284,31 +2281,25 @@ static void calc_global_nohz(unsigned long ticks)
2284 atomic_long_add(delta, &calc_load_tasks); 2281 atomic_long_add(delta, &calc_load_tasks);
2285 2282
2286 /* 2283 /*
2287 * If we were idle for multiple load cycles, apply them. 2284 * It could be the one fold was all it took, we done!
2288 */ 2285 */
2289 if (ticks >= LOAD_FREQ) { 2286 if (time_before(jiffies, calc_load_update + 10))
2290 n = ticks / LOAD_FREQ; 2287 return;
2291 2288
2292 active = atomic_long_read(&calc_load_tasks); 2289 /*
2293 active = active > 0 ? active * FIXED_1 : 0; 2290 * Catch-up, fold however many we are behind still
2291 */
2292 delta = jiffies - calc_load_update - 10;
2293 n = 1 + (delta / LOAD_FREQ);
2294 2294
2295 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); 2295 active = atomic_long_read(&calc_load_tasks);
2296 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); 2296 active = active > 0 ? active * FIXED_1 : 0;
2297 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
2298 2297
2299 calc_load_update += n * LOAD_FREQ; 2298 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
2300 } 2299 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
2300 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
2301 2301
2302 /* 2302 calc_load_update += n * LOAD_FREQ;
2303 * Its possible the remainder of the above division also crosses
2304 * a LOAD_FREQ period, the regular check in calc_global_load()
2305 * which comes after this will take care of that.
2306 *
2307 * Consider us being 11 ticks before a cycle completion, and us
2308 * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
2309 * age us 4 cycles, and the test in calc_global_load() will
2310 * pick up the final one.
2311 */
2312} 2303}
2313#else 2304#else
2314void calc_load_account_idle(struct rq *this_rq) 2305void calc_load_account_idle(struct rq *this_rq)
@@ -2320,7 +2311,7 @@ static inline long calc_load_fold_idle(void)
2320 return 0; 2311 return 0;
2321} 2312}
2322 2313
2323static void calc_global_nohz(unsigned long ticks) 2314static void calc_global_nohz(void)
2324{ 2315{
2325} 2316}
2326#endif 2317#endif
@@ -2348,8 +2339,6 @@ void calc_global_load(unsigned long ticks)
2348{ 2339{
2349 long active; 2340 long active;
2350 2341
2351 calc_global_nohz(ticks);
2352
2353 if (time_before(jiffies, calc_load_update + 10)) 2342 if (time_before(jiffies, calc_load_update + 10))
2354 return; 2343 return;
2355 2344
@@ -2361,6 +2350,16 @@ void calc_global_load(unsigned long ticks)
2361 avenrun[2] = calc_load(avenrun[2], EXP_15, active); 2350 avenrun[2] = calc_load(avenrun[2], EXP_15, active);
2362 2351
2363 calc_load_update += LOAD_FREQ; 2352 calc_load_update += LOAD_FREQ;
2353
2354 /*
2355 * Account one period with whatever state we found before
2356 * folding in the nohz state and ageing the entire idle period.
2357 *
2358 * This avoids loosing a sample when we go idle between
2359 * calc_load_account_active() (10 ticks ago) and now and thus
2360 * under-accounting.
2361 */
2362 calc_global_nohz();
2364} 2363}
2365 2364
2366/* 2365/*