diff options
| author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2012-05-17 11:15:29 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2012-05-30 08:02:16 -0400 |
| commit | 5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 (patch) | |
| tree | 0334762e9d0e773acf21e61c682f895c25201c89 /kernel | |
| parent | 9f646389aa7727a2fd8f9ae6337b92af9cfbc264 (diff) | |
sched/nohz: Fix rq->cpu_load calculations some more
Follow up on commit 556061b00 ("sched/nohz: Fix rq->cpu_load[]
calculations") since while that fixed the busy case it regressed the
mostly idle case.
Add a callback from the nohz exit to also age the rq->cpu_load[]
array. This closes the hole where either there was no nohz load
balance pass during the nohz, or there was a 'significant' amount of
idle time between the last nohz balance and the nohz exit.
So we'll update unconditionally from the tick to not insert any
accidental 0 load periods while busy, and we try and catch up from
nohz idle balance and nohz exit. Both these are still prone to missing
a jiffy, but that has always been the case.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: pjt@google.com
Cc: Venkatesh Pallipadi <venki@google.com>
Link: http://lkml.kernel.org/n/tip-kt0trz0apodbf84ucjfdbr1a@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched/core.c | 53 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 1 |
2 files changed, 44 insertions, 10 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 39eb6011bc38..75844a8f9aeb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -2517,25 +2517,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, | |||
| 2517 | sched_avg_update(this_rq); | 2517 | sched_avg_update(this_rq); |
| 2518 | } | 2518 | } |
| 2519 | 2519 | ||
| 2520 | #ifdef CONFIG_NO_HZ | ||
| 2521 | /* | ||
| 2522 | * There is no sane way to deal with nohz on smp when using jiffies because the | ||
| 2523 | * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading | ||
| 2524 | * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. | ||
| 2525 | * | ||
| 2526 | * Therefore we cannot use the delta approach from the regular tick since that | ||
| 2527 | * would seriously skew the load calculation. However we'll make do for those | ||
| 2528 | * updates happening while idle (nohz_idle_balance) or coming out of idle | ||
| 2529 | * (tick_nohz_idle_exit). | ||
| 2530 | * | ||
| 2531 | * This means we might still be one tick off for nohz periods. | ||
| 2532 | */ | ||
| 2533 | |||
| 2520 | /* | 2534 | /* |
| 2521 | * Called from nohz_idle_balance() to update the load ratings before doing the | 2535 | * Called from nohz_idle_balance() to update the load ratings before doing the |
| 2522 | * idle balance. | 2536 | * idle balance. |
| 2523 | */ | 2537 | */ |
| 2524 | void update_idle_cpu_load(struct rq *this_rq) | 2538 | void update_idle_cpu_load(struct rq *this_rq) |
| 2525 | { | 2539 | { |
| 2526 | unsigned long curr_jiffies = jiffies; | 2540 | unsigned long curr_jiffies = ACCESS_ONCE(jiffies); |
| 2527 | unsigned long load = this_rq->load.weight; | 2541 | unsigned long load = this_rq->load.weight; |
| 2528 | unsigned long pending_updates; | 2542 | unsigned long pending_updates; |
| 2529 | 2543 | ||
| 2530 | /* | 2544 | /* |
| 2531 | * Bloody broken means of dealing with nohz, but better than nothing.. | 2545 | * bail if there's load or we're actually up-to-date. |
| 2532 | * jiffies is updated by one cpu, another cpu can drift wrt the jiffy | ||
| 2533 | * update and see 0 difference the one time and 2 the next, even though | ||
| 2534 | * we ticked at roughtly the same rate. | ||
| 2535 | * | ||
| 2536 | * Hence we only use this from nohz_idle_balance() and skip this | ||
| 2537 | * nonsense when called from the scheduler_tick() since that's | ||
| 2538 | * guaranteed a stable rate. | ||
| 2539 | */ | 2546 | */ |
| 2540 | if (load || curr_jiffies == this_rq->last_load_update_tick) | 2547 | if (load || curr_jiffies == this_rq->last_load_update_tick) |
| 2541 | return; | 2548 | return; |
| @@ -2547,12 +2554,38 @@ void update_idle_cpu_load(struct rq *this_rq) | |||
| 2547 | } | 2554 | } |
| 2548 | 2555 | ||
| 2549 | /* | 2556 | /* |
| 2557 | * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. | ||
| 2558 | */ | ||
| 2559 | void update_cpu_load_nohz(void) | ||
| 2560 | { | ||
| 2561 | struct rq *this_rq = this_rq(); | ||
| 2562 | unsigned long curr_jiffies = ACCESS_ONCE(jiffies); | ||
| 2563 | unsigned long pending_updates; | ||
| 2564 | |||
| 2565 | if (curr_jiffies == this_rq->last_load_update_tick) | ||
| 2566 | return; | ||
| 2567 | |||
| 2568 | raw_spin_lock(&this_rq->lock); | ||
| 2569 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
| 2570 | if (pending_updates) { | ||
| 2571 | this_rq->last_load_update_tick = curr_jiffies; | ||
| 2572 | /* | ||
| 2573 | * We were idle, this means load 0, the current load might be | ||
| 2574 | * !0 due to remote wakeups and the sort. | ||
| 2575 | */ | ||
| 2576 | __update_cpu_load(this_rq, 0, pending_updates); | ||
| 2577 | } | ||
| 2578 | raw_spin_unlock(&this_rq->lock); | ||
| 2579 | } | ||
| 2580 | #endif /* CONFIG_NO_HZ */ | ||
| 2581 | |||
| 2582 | /* | ||
| 2550 | * Called from scheduler_tick() | 2583 | * Called from scheduler_tick() |
| 2551 | */ | 2584 | */ |
| 2552 | static void update_cpu_load_active(struct rq *this_rq) | 2585 | static void update_cpu_load_active(struct rq *this_rq) |
| 2553 | { | 2586 | { |
| 2554 | /* | 2587 | /* |
| 2555 | * See the mess in update_idle_cpu_load(). | 2588 | * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). |
| 2556 | */ | 2589 | */ |
| 2557 | this_rq->last_load_update_tick = jiffies; | 2590 | this_rq->last_load_update_tick = jiffies; |
| 2558 | __update_cpu_load(this_rq, this_rq->load.weight, 1); | 2591 | __update_cpu_load(this_rq, this_rq->load.weight, 1); |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6a3a5b9ff561..0c927cd85345 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -576,6 +576,7 @@ void tick_nohz_idle_exit(void) | |||
| 576 | /* Update jiffies first */ | 576 | /* Update jiffies first */ |
| 577 | select_nohz_load_balancer(0); | 577 | select_nohz_load_balancer(0); |
| 578 | tick_do_update_jiffies64(now); | 578 | tick_do_update_jiffies64(now); |
| 579 | update_cpu_load_nohz(); | ||
| 579 | 580 | ||
| 580 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 581 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
| 581 | /* | 582 | /* |
