aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2011-02-21 12:56:47 -0500
committerIngo Molnar <mingo@elte.hu>2011-02-23 05:33:57 -0500
commit866ab43efd325fae8889ea77a744d03f2b957e38 (patch)
tree450263aa8a30abb4a0ab2812643aa7a83711df05 /kernel/sched_fair.c
parentcc57aa8f4b3bece8c26c7929728edcc5fa6b5aed (diff)
sched: Fix the group_imb logic
On a 2*6*2 machine something like: taskset -c 3-11 bash -c 'for ((i=0;i<9;i++)) do while :; do :; done & done' _should_ result in 9 busy CPUs, each running 1 task. However it didn't quite work reliably, most of the time one cpu of the second socket (6-11) would be idle and one cpu of the first socket (0-5) would have two tasks on it. The group_imb logic is supposed to deal with this and detect when a particular group is imbalanced (like in our case, 0-2 are idle but 3-5 will have 4 tasks on it). The detection phase needed a bit of a tweak as it was too weak and required more than 2 avg weight tasks difference between idle and busy cpus in the group which won't trigger for our test-case. So cure that to be one or more avg task weight difference between cpus. Once the detection phase worked, it was then defeated by the f_b_g() tests trying to avoid ping-pongs. In particular, this_load >= max_load triggered because the pulling cpu (the (first) idle cpu in on the second socket, say 6) would find this_load to be 5 and max_load to be 4 (there'd be 5 tasks running on our socket and only 4 on the other socket). Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Nikhil Rao <ncrao@google.com> Cc: Venkatesh Pallipadi <venki@google.com> Cc: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Mike Galbraith <efault@gmx.de> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c12
1 files changed, 10 insertions, 2 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 03496ebc4553..3a88dee165c0 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2743,7 +2743,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2743 2743
2744 /* 2744 /*
2745 * Consider the group unbalanced when the imbalance is larger 2745 * Consider the group unbalanced when the imbalance is larger
2746 * than the average weight of two tasks. 2746 * than the average weight of a task.
2747 * 2747 *
2748 * APZ: with cgroup the avg task weight can vary wildly and 2748 * APZ: with cgroup the avg task weight can vary wildly and
2749 * might not be a suitable number - should we keep a 2749 * might not be a suitable number - should we keep a
@@ -2753,7 +2753,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2753 if (sgs->sum_nr_running) 2753 if (sgs->sum_nr_running)
2754 avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; 2754 avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
2755 2755
2756 if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1) 2756 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
2757 sgs->group_imb = 1; 2757 sgs->group_imb = 1;
2758 2758
2759 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); 2759 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
@@ -3128,6 +3128,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3128 if (!sds.busiest || sds.busiest_nr_running == 0) 3128 if (!sds.busiest || sds.busiest_nr_running == 0)
3129 goto out_balanced; 3129 goto out_balanced;
3130 3130
3131 /*
3132 * If the busiest group is imbalanced the below checks don't
3133 * work because they assumes all things are equal, which typically
3134 * isn't true due to cpus_allowed constraints and the like.
3135 */
3136 if (sds.group_imb)
3137 goto force_balance;
3138
3131 /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ 3139 /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
3132 if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity && 3140 if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&
3133 !sds.busiest_has_capacity) 3141 !sds.busiest_has_capacity)