aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2012-04-17 07:38:40 -0400
committerIngo Molnar <mingo@kernel.org>2012-04-26 06:54:52 -0400
commiteb95308ee2a69403909e111837b9068c64cfc349 (patch)
tree92e9a3368e75b94486dc54f2688453fed17d4eed /kernel/sched/fair.c
parent2300fd67b4f29eec19addb15a8571837228f63fc (diff)
sched: Fix more load-balancing fallout
Commits 367456c756a6 ("sched: Ditch per cgroup task lists for load-balancing") and 5d6523ebd ("sched: Fix load-balance wreckage") left some more wreckage. By setting loop_max unconditionally to ->nr_running load-balancing could take a lot of time on very long runqueues (hackbench!). So keep the sysctl as max limit of the amount of tasks we'll iterate. Furthermore, the min load filter for migration completely fails with cgroups since inequality in per-cpu state can easily lead to such small loads :/ Furthermore the change to add new tasks to the tail of the queue instead of the head seems to have some effect.. not quite sure I understand why. Combined these fixes solve the huge hackbench regression reported by Tim when hackbench is ran in a cgroup. Reported-by: Tim Chen <tim.c.chen@linux.intel.com> Acked-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/1335365763.28150.267.camel@twins [ got rid of the CONFIG_PREEMPT tuning and made small readability edits ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c18
1 files changed, 10 insertions, 8 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0d97ebdc58f0..e9553640c1c3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -784,7 +784,7 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
784 update_load_add(&rq_of(cfs_rq)->load, se->load.weight); 784 update_load_add(&rq_of(cfs_rq)->load, se->load.weight);
785#ifdef CONFIG_SMP 785#ifdef CONFIG_SMP
786 if (entity_is_task(se)) 786 if (entity_is_task(se))
787 list_add_tail(&se->group_node, &rq_of(cfs_rq)->cfs_tasks); 787 list_add(&se->group_node, &rq_of(cfs_rq)->cfs_tasks);
788#endif 788#endif
789 cfs_rq->nr_running++; 789 cfs_rq->nr_running++;
790} 790}
@@ -3215,6 +3215,8 @@ static int move_one_task(struct lb_env *env)
3215 3215
3216static unsigned long task_h_load(struct task_struct *p); 3216static unsigned long task_h_load(struct task_struct *p);
3217 3217
3218static const unsigned int sched_nr_migrate_break = 32;
3219
3218/* 3220/*
3219 * move_tasks tries to move up to load_move weighted load from busiest to 3221 * move_tasks tries to move up to load_move weighted load from busiest to
3220 * this_rq, as part of a balancing operation within domain "sd". 3222 * this_rq, as part of a balancing operation within domain "sd".
@@ -3242,7 +3244,7 @@ static int move_tasks(struct lb_env *env)
3242 3244
3243 /* take a breather every nr_migrate tasks */ 3245 /* take a breather every nr_migrate tasks */
3244 if (env->loop > env->loop_break) { 3246 if (env->loop > env->loop_break) {
3245 env->loop_break += sysctl_sched_nr_migrate; 3247 env->loop_break += sched_nr_migrate_break;
3246 env->flags |= LBF_NEED_BREAK; 3248 env->flags |= LBF_NEED_BREAK;
3247 break; 3249 break;
3248 } 3250 }
@@ -3252,7 +3254,7 @@ static int move_tasks(struct lb_env *env)
3252 3254
3253 load = task_h_load(p); 3255 load = task_h_load(p);
3254 3256
3255 if (load < 16 && !env->sd->nr_balance_failed) 3257 if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed)
3256 goto next; 3258 goto next;
3257 3259
3258 if ((load / 2) > env->load_move) 3260 if ((load / 2) > env->load_move)
@@ -4407,7 +4409,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4407 .dst_cpu = this_cpu, 4409 .dst_cpu = this_cpu,
4408 .dst_rq = this_rq, 4410 .dst_rq = this_rq,
4409 .idle = idle, 4411 .idle = idle,
4410 .loop_break = sysctl_sched_nr_migrate, 4412 .loop_break = sched_nr_migrate_break,
4411 }; 4413 };
4412 4414
4413 cpumask_copy(cpus, cpu_active_mask); 4415 cpumask_copy(cpus, cpu_active_mask);
@@ -4445,10 +4447,10 @@ redo:
4445 * correctly treated as an imbalance. 4447 * correctly treated as an imbalance.
4446 */ 4448 */
4447 env.flags |= LBF_ALL_PINNED; 4449 env.flags |= LBF_ALL_PINNED;
4448 env.load_move = imbalance; 4450 env.load_move = imbalance;
4449 env.src_cpu = busiest->cpu; 4451 env.src_cpu = busiest->cpu;
4450 env.src_rq = busiest; 4452 env.src_rq = busiest;
4451 env.loop_max = busiest->nr_running; 4453 env.loop_max = min_t(unsigned long, sysctl_sched_nr_migrate, busiest->nr_running);
4452 4454
4453more_balance: 4455more_balance:
4454 local_irq_save(flags); 4456 local_irq_save(flags);