aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2012-05-11 11:31:26 -0400
committerIngo Molnar <mingo@kernel.org>2012-05-14 09:05:27 -0400
commit556061b00c9f2fd6a5524b6bde823ef12f299ecf (patch)
tree087891d70dbcd97cd23ac3eb92fad6a905c0f527 /kernel/sched
parent870a0bb5d636156502769233d02a0d5791d4366a (diff)
sched/nohz: Fix rq->cpu_load[] calculations
While investigating why the load-balancer did funny I found that the rq->cpu_load[] tables were completely screwy.. a bit more digging revealed that the updates that got through were missing ticks followed by a catchup of 2 ticks. The catchup assumes the cpu was idle during that time (since only nohz can cause missed ticks and the machine is idle etc..) this means that esp. the higher indices were significantly lower than they ought to be. The reason for this is that its not correct to compare against jiffies on every jiffy on any other cpu than the cpu that updates jiffies. This patch cludges around it by only doing the catch-up stuff from nohz_idle_balance() and doing the regular stuff unconditionally from the tick. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: pjt@google.com Cc: Venkatesh Pallipadi <venki@google.com> Link: http://lkml.kernel.org/n/tip-tp4kj18xdd5aj4vvj0qg55s2@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c53
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/sched.h2
3 files changed, 41 insertions, 16 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6883d998dc38..860fddfb7bb7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -692,8 +692,6 @@ int tg_nop(struct task_group *tg, void *data)
692} 692}
693#endif 693#endif
694 694
695void update_cpu_load(struct rq *this_rq);
696
697static void set_load_weight(struct task_struct *p) 695static void set_load_weight(struct task_struct *p)
698{ 696{
699 int prio = p->static_prio - MAX_RT_PRIO; 697 int prio = p->static_prio - MAX_RT_PRIO;
@@ -2486,22 +2484,13 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
2486 * scheduler tick (TICK_NSEC). With tickless idle this will not be called 2484 * scheduler tick (TICK_NSEC). With tickless idle this will not be called
2487 * every tick. We fix it up based on jiffies. 2485 * every tick. We fix it up based on jiffies.
2488 */ 2486 */
2489void update_cpu_load(struct rq *this_rq) 2487static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
2488 unsigned long pending_updates)
2490{ 2489{
2491 unsigned long this_load = this_rq->load.weight;
2492 unsigned long curr_jiffies = jiffies;
2493 unsigned long pending_updates;
2494 int i, scale; 2490 int i, scale;
2495 2491
2496 this_rq->nr_load_updates++; 2492 this_rq->nr_load_updates++;
2497 2493
2498 /* Avoid repeated calls on same jiffy, when moving in and out of idle */
2499 if (curr_jiffies == this_rq->last_load_update_tick)
2500 return;
2501
2502 pending_updates = curr_jiffies - this_rq->last_load_update_tick;
2503 this_rq->last_load_update_tick = curr_jiffies;
2504
2505 /* Update our load: */ 2494 /* Update our load: */
2506 this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ 2495 this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
2507 for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { 2496 for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
@@ -2526,9 +2515,45 @@ void update_cpu_load(struct rq *this_rq)
2526 sched_avg_update(this_rq); 2515 sched_avg_update(this_rq);
2527} 2516}
2528 2517
2518/*
2519 * Called from nohz_idle_balance() to update the load ratings before doing the
2520 * idle balance.
2521 */
2522void update_idle_cpu_load(struct rq *this_rq)
2523{
2524 unsigned long curr_jiffies = jiffies;
2525 unsigned long load = this_rq->load.weight;
2526 unsigned long pending_updates;
2527
2528 /*
2529 * Bloody broken means of dealing with nohz, but better than nothing..
2530 * jiffies is updated by one cpu, another cpu can drift wrt the jiffy
2531 * update and see 0 difference the one time and 2 the next, even though
2532 * we ticked at roughtly the same rate.
2533 *
2534 * Hence we only use this from nohz_idle_balance() and skip this
2535 * nonsense when called from the scheduler_tick() since that's
2536 * guaranteed a stable rate.
2537 */
2538 if (load || curr_jiffies == this_rq->last_load_update_tick)
2539 return;
2540
2541 pending_updates = curr_jiffies - this_rq->last_load_update_tick;
2542 this_rq->last_load_update_tick = curr_jiffies;
2543
2544 __update_cpu_load(this_rq, load, pending_updates);
2545}
2546
2547/*
2548 * Called from scheduler_tick()
2549 */
2529static void update_cpu_load_active(struct rq *this_rq) 2550static void update_cpu_load_active(struct rq *this_rq)
2530{ 2551{
2531 update_cpu_load(this_rq); 2552 /*
2553 * See the mess in update_idle_cpu_load().
2554 */
2555 this_rq->last_load_update_tick = jiffies;
2556 __update_cpu_load(this_rq, this_rq->load.weight, 1);
2532 2557
2533 calc_load_account_active(this_rq); 2558 calc_load_account_active(this_rq);
2534} 2559}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a259a614b394..124e6b6999a7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5012,7 +5012,7 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
5012 5012
5013 raw_spin_lock_irq(&this_rq->lock); 5013 raw_spin_lock_irq(&this_rq->lock);
5014 update_rq_clock(this_rq); 5014 update_rq_clock(this_rq);
5015 update_cpu_load(this_rq); 5015 update_idle_cpu_load(this_rq);
5016 raw_spin_unlock_irq(&this_rq->lock); 5016 raw_spin_unlock_irq(&this_rq->lock);
5017 5017
5018 rebalance_domains(balance_cpu, CPU_IDLE); 5018 rebalance_domains(balance_cpu, CPU_IDLE);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7282e7b5f4c7..ba9dccfd24ce 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -876,7 +876,7 @@ extern void resched_cpu(int cpu);
876extern struct rt_bandwidth def_rt_bandwidth; 876extern struct rt_bandwidth def_rt_bandwidth;
877extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); 877extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
878 878
879extern void update_cpu_load(struct rq *this_rq); 879extern void update_idle_cpu_load(struct rq *this_rq);
880 880
881#ifdef CONFIG_CGROUP_CPUACCT 881#ifdef CONFIG_CGROUP_CPUACCT
882#include <linux/cgroup.h> 882#include <linux/cgroup.h>