aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2012-05-10 18:22:12 -0400
committerIngo Molnar <mingo@kernel.org>2012-05-14 09:05:28 -0400
commite44bc5c5d00ee9b56dd87db47ed827d52948b9fa (patch)
treee0e5c30591d8bae335a101458f311e5972175a69 /kernel/sched
parent556061b00c9f2fd6a5524b6bde823ef12f299ecf (diff)
sched/fair: Improve the ->group_imb logic
Group imbalance is meant to deal with situations where affinity masks and sched domains don't align well, such as 3 cpus from one group and 6 from another. In this case the domain based balancer will want to put an equal amount of tasks on each side even though they don't have equal cpus. Currently group_imb is set whenever two cpus of a group have a weight difference of at least one avg task and the heaviest cpu has at least two tasks. A group with imbalance set will always be picked as busiest and a balance pass will be forced. The problem is that even if there are no affinity masks this stuff can trigger and cause weird balancing decisions, eg. the observed behaviour was that of 6 cpus, 5 had 2 and 1 had 3 tasks, due to the difference of 1 avg load (they all had the same weight) and nr_running being >1 the group_imbalance logic triggered and did the weird thing of pulling more load instead of trying to move the 1 excess task to the other domain of 6 cpus that had 5 cpu with 2 tasks and 1 cpu with 1 task. Curb the group_imbalance stuff by making the nr_running condition weaker by also tracking the min_nr_running and using the difference in nr_running over the set instead of the absolute max nr_running. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/n/tip-9s7dedozxo8kjsb9kqlrukkf@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/fair.c20
1 files changed, 14 insertions, 6 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 124e6b6999a7..0b42f4487329 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3775,7 +3775,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3775 int local_group, const struct cpumask *cpus, 3775 int local_group, const struct cpumask *cpus,
3776 int *balance, struct sg_lb_stats *sgs) 3776 int *balance, struct sg_lb_stats *sgs)
3777{ 3777{
3778 unsigned long load, max_cpu_load, min_cpu_load, max_nr_running; 3778 unsigned long nr_running, max_nr_running, min_nr_running;
3779 unsigned long load, max_cpu_load, min_cpu_load;
3779 unsigned int balance_cpu = -1, first_idle_cpu = 0; 3780 unsigned int balance_cpu = -1, first_idle_cpu = 0;
3780 unsigned long avg_load_per_task = 0; 3781 unsigned long avg_load_per_task = 0;
3781 int i; 3782 int i;
@@ -3787,10 +3788,13 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3787 max_cpu_load = 0; 3788 max_cpu_load = 0;
3788 min_cpu_load = ~0UL; 3789 min_cpu_load = ~0UL;
3789 max_nr_running = 0; 3790 max_nr_running = 0;
3791 min_nr_running = ~0UL;
3790 3792
3791 for_each_cpu_and(i, sched_group_cpus(group), cpus) { 3793 for_each_cpu_and(i, sched_group_cpus(group), cpus) {
3792 struct rq *rq = cpu_rq(i); 3794 struct rq *rq = cpu_rq(i);
3793 3795
3796 nr_running = rq->nr_running;
3797
3794 /* Bias balancing toward cpus of our domain */ 3798 /* Bias balancing toward cpus of our domain */
3795 if (local_group) { 3799 if (local_group) {
3796 if (idle_cpu(i) && !first_idle_cpu) { 3800 if (idle_cpu(i) && !first_idle_cpu) {
@@ -3801,16 +3805,19 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3801 load = target_load(i, load_idx); 3805 load = target_load(i, load_idx);
3802 } else { 3806 } else {
3803 load = source_load(i, load_idx); 3807 load = source_load(i, load_idx);
3804 if (load > max_cpu_load) { 3808 if (load > max_cpu_load)
3805 max_cpu_load = load; 3809 max_cpu_load = load;
3806 max_nr_running = rq->nr_running;
3807 }
3808 if (min_cpu_load > load) 3810 if (min_cpu_load > load)
3809 min_cpu_load = load; 3811 min_cpu_load = load;
3812
3813 if (nr_running > max_nr_running)
3814 max_nr_running = nr_running;
3815 if (min_nr_running > nr_running)
3816 min_nr_running = nr_running;
3810 } 3817 }
3811 3818
3812 sgs->group_load += load; 3819 sgs->group_load += load;
3813 sgs->sum_nr_running += rq->nr_running; 3820 sgs->sum_nr_running += nr_running;
3814 sgs->sum_weighted_load += weighted_cpuload(i); 3821 sgs->sum_weighted_load += weighted_cpuload(i);
3815 if (idle_cpu(i)) 3822 if (idle_cpu(i))
3816 sgs->idle_cpus++; 3823 sgs->idle_cpus++;
@@ -3848,7 +3855,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3848 if (sgs->sum_nr_running) 3855 if (sgs->sum_nr_running)
3849 avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; 3856 avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
3850 3857
3851 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) 3858 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task &&
3859 (max_nr_running - min_nr_running) > 1)
3852 sgs->group_imb = 1; 3860 sgs->group_imb = 1;
3853 3861
3854 sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power, 3862 sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power,