aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2012-05-17 11:15:29 -0400
committerIngo Molnar <mingo@kernel.org>2012-05-30 08:02:16 -0400
commit5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 (patch)
tree0334762e9d0e773acf21e61c682f895c25201c89 /kernel/sched
parent9f646389aa7727a2fd8f9ae6337b92af9cfbc264 (diff)
sched/nohz: Fix rq->cpu_load calculations some more
Follow up on commit 556061b00 ("sched/nohz: Fix rq->cpu_load[] calculations") since while that fixed the busy case it regressed the mostly idle case. Add a callback from the nohz exit to also age the rq->cpu_load[] array. This closes the hole where either there was no nohz load balance pass during the nohz, or there was a 'significant' amount of idle time between the last nohz balance and the nohz exit. So we'll update unconditionally from the tick to not insert any accidental 0 load periods while busy, and we try and catch up from nohz idle balance and nohz exit. Both these are still prone to missing a jiffy, but that has always been the case. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: pjt@google.com Cc: Venkatesh Pallipadi <venki@google.com> Link: http://lkml.kernel.org/n/tip-kt0trz0apodbf84ucjfdbr1a@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c53
1 files changed, 43 insertions, 10 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 39eb6011bc38..75844a8f9aeb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2517,25 +2517,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
2517 sched_avg_update(this_rq); 2517 sched_avg_update(this_rq);
2518} 2518}
2519 2519
2520#ifdef CONFIG_NO_HZ
2521/*
2522 * There is no sane way to deal with nohz on smp when using jiffies because the
2523 * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
2524 * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
2525 *
2526 * Therefore we cannot use the delta approach from the regular tick since that
2527 * would seriously skew the load calculation. However we'll make do for those
2528 * updates happening while idle (nohz_idle_balance) or coming out of idle
2529 * (tick_nohz_idle_exit).
2530 *
2531 * This means we might still be one tick off for nohz periods.
2532 */
2533
2520/* 2534/*
2521 * Called from nohz_idle_balance() to update the load ratings before doing the 2535 * Called from nohz_idle_balance() to update the load ratings before doing the
2522 * idle balance. 2536 * idle balance.
2523 */ 2537 */
2524void update_idle_cpu_load(struct rq *this_rq) 2538void update_idle_cpu_load(struct rq *this_rq)
2525{ 2539{
2526 unsigned long curr_jiffies = jiffies; 2540 unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
2527 unsigned long load = this_rq->load.weight; 2541 unsigned long load = this_rq->load.weight;
2528 unsigned long pending_updates; 2542 unsigned long pending_updates;
2529 2543
2530 /* 2544 /*
2531 * Bloody broken means of dealing with nohz, but better than nothing.. 2545 * bail if there's load or we're actually up-to-date.
2532 * jiffies is updated by one cpu, another cpu can drift wrt the jiffy
2533 * update and see 0 difference the one time and 2 the next, even though
2534 * we ticked at roughtly the same rate.
2535 *
2536 * Hence we only use this from nohz_idle_balance() and skip this
2537 * nonsense when called from the scheduler_tick() since that's
2538 * guaranteed a stable rate.
2539 */ 2546 */
2540 if (load || curr_jiffies == this_rq->last_load_update_tick) 2547 if (load || curr_jiffies == this_rq->last_load_update_tick)
2541 return; 2548 return;
@@ -2547,12 +2554,38 @@ void update_idle_cpu_load(struct rq *this_rq)
2547} 2554}
2548 2555
2549/* 2556/*
2557 * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
2558 */
2559void update_cpu_load_nohz(void)
2560{
2561 struct rq *this_rq = this_rq();
2562 unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
2563 unsigned long pending_updates;
2564
2565 if (curr_jiffies == this_rq->last_load_update_tick)
2566 return;
2567
2568 raw_spin_lock(&this_rq->lock);
2569 pending_updates = curr_jiffies - this_rq->last_load_update_tick;
2570 if (pending_updates) {
2571 this_rq->last_load_update_tick = curr_jiffies;
2572 /*
2573 * We were idle, this means load 0, the current load might be
2574 * !0 due to remote wakeups and the sort.
2575 */
2576 __update_cpu_load(this_rq, 0, pending_updates);
2577 }
2578 raw_spin_unlock(&this_rq->lock);
2579}
2580#endif /* CONFIG_NO_HZ */
2581
2582/*
2550 * Called from scheduler_tick() 2583 * Called from scheduler_tick()
2551 */ 2584 */
2552static void update_cpu_load_active(struct rq *this_rq) 2585static void update_cpu_load_active(struct rq *this_rq)
2553{ 2586{
2554 /* 2587 /*
2555 * See the mess in update_idle_cpu_load(). 2588 * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
2556 */ 2589 */
2557 this_rq->last_load_update_tick = jiffies; 2590 this_rq->last_load_update_tick = jiffies;
2558 __update_cpu_load(this_rq, this_rq->load.weight, 1); 2591 __update_cpu_load(this_rq, this_rq->load.weight, 1);