diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2012-05-17 11:15:29 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-05-30 08:02:16 -0400 |
commit | 5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 (patch) | |
tree | 0334762e9d0e773acf21e61c682f895c25201c89 /kernel/sched | |
parent | 9f646389aa7727a2fd8f9ae6337b92af9cfbc264 (diff) |
sched/nohz: Fix rq->cpu_load calculations some more
Follow up on commit 556061b00 ("sched/nohz: Fix rq->cpu_load[]
calculations") since while that fixed the busy case it regressed the
mostly idle case.
Add a callback from the nohz exit to also age the rq->cpu_load[]
array. This closes the hole where either there was no nohz load
balance pass during the nohz, or there was a 'significant' amount of
idle time between the last nohz balance and the nohz exit.
So we'll update unconditionally from the tick to not insert any
accidental 0 load periods while busy, and we try and catch up from
nohz idle balance and nohz exit. Both these are still prone to missing
a jiffy, but that has always been the case.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: pjt@google.com
Cc: Venkatesh Pallipadi <venki@google.com>
Link: http://lkml.kernel.org/n/tip-kt0trz0apodbf84ucjfdbr1a@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 53 |
1 files changed, 43 insertions, 10 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 39eb6011bc38..75844a8f9aeb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2517,25 +2517,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, | |||
2517 | sched_avg_update(this_rq); | 2517 | sched_avg_update(this_rq); |
2518 | } | 2518 | } |
2519 | 2519 | ||
2520 | #ifdef CONFIG_NO_HZ | ||
2521 | /* | ||
2522 | * There is no sane way to deal with nohz on smp when using jiffies because the | ||
2523 | * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading | ||
2524 | * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. | ||
2525 | * | ||
2526 | * Therefore we cannot use the delta approach from the regular tick since that | ||
2527 | * would seriously skew the load calculation. However we'll make do for those | ||
2528 | * updates happening while idle (nohz_idle_balance) or coming out of idle | ||
2529 | * (tick_nohz_idle_exit). | ||
2530 | * | ||
2531 | * This means we might still be one tick off for nohz periods. | ||
2532 | */ | ||
2533 | |||
2520 | /* | 2534 | /* |
2521 | * Called from nohz_idle_balance() to update the load ratings before doing the | 2535 | * Called from nohz_idle_balance() to update the load ratings before doing the |
2522 | * idle balance. | 2536 | * idle balance. |
2523 | */ | 2537 | */ |
2524 | void update_idle_cpu_load(struct rq *this_rq) | 2538 | void update_idle_cpu_load(struct rq *this_rq) |
2525 | { | 2539 | { |
2526 | unsigned long curr_jiffies = jiffies; | 2540 | unsigned long curr_jiffies = ACCESS_ONCE(jiffies); |
2527 | unsigned long load = this_rq->load.weight; | 2541 | unsigned long load = this_rq->load.weight; |
2528 | unsigned long pending_updates; | 2542 | unsigned long pending_updates; |
2529 | 2543 | ||
2530 | /* | 2544 | /* |
2531 | * Bloody broken means of dealing with nohz, but better than nothing.. | 2545 | * bail if there's load or we're actually up-to-date. |
2532 | * jiffies is updated by one cpu, another cpu can drift wrt the jiffy | ||
2533 | * update and see 0 difference the one time and 2 the next, even though | ||
2534 | * we ticked at roughtly the same rate. | ||
2535 | * | ||
2536 | * Hence we only use this from nohz_idle_balance() and skip this | ||
2537 | * nonsense when called from the scheduler_tick() since that's | ||
2538 | * guaranteed a stable rate. | ||
2539 | */ | 2546 | */ |
2540 | if (load || curr_jiffies == this_rq->last_load_update_tick) | 2547 | if (load || curr_jiffies == this_rq->last_load_update_tick) |
2541 | return; | 2548 | return; |
@@ -2547,12 +2554,38 @@ void update_idle_cpu_load(struct rq *this_rq) | |||
2547 | } | 2554 | } |
2548 | 2555 | ||
2549 | /* | 2556 | /* |
2557 | * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. | ||
2558 | */ | ||
2559 | void update_cpu_load_nohz(void) | ||
2560 | { | ||
2561 | struct rq *this_rq = this_rq(); | ||
2562 | unsigned long curr_jiffies = ACCESS_ONCE(jiffies); | ||
2563 | unsigned long pending_updates; | ||
2564 | |||
2565 | if (curr_jiffies == this_rq->last_load_update_tick) | ||
2566 | return; | ||
2567 | |||
2568 | raw_spin_lock(&this_rq->lock); | ||
2569 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
2570 | if (pending_updates) { | ||
2571 | this_rq->last_load_update_tick = curr_jiffies; | ||
2572 | /* | ||
2573 | * We were idle, this means load 0, the current load might be | ||
2574 | * !0 due to remote wakeups and the sort. | ||
2575 | */ | ||
2576 | __update_cpu_load(this_rq, 0, pending_updates); | ||
2577 | } | ||
2578 | raw_spin_unlock(&this_rq->lock); | ||
2579 | } | ||
2580 | #endif /* CONFIG_NO_HZ */ | ||
2581 | |||
2582 | /* | ||
2550 | * Called from scheduler_tick() | 2583 | * Called from scheduler_tick() |
2551 | */ | 2584 | */ |
2552 | static void update_cpu_load_active(struct rq *this_rq) | 2585 | static void update_cpu_load_active(struct rq *this_rq) |
2553 | { | 2586 | { |
2554 | /* | 2587 | /* |
2555 | * See the mess in update_idle_cpu_load(). | 2588 | * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). |
2556 | */ | 2589 | */ |
2557 | this_rq->last_load_update_tick = jiffies; | 2590 | this_rq->last_load_update_tick = jiffies; |
2558 | __update_cpu_load(this_rq, this_rq->load.weight, 1); | 2591 | __update_cpu_load(this_rq, this_rq->load.weight, 1); |