aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2011-09-22 09:30:18 -0400
committerIngo Molnar <mingo@elte.hu>2011-12-21 04:34:47 -0500
commita195f004e9496b4d99f471bb96e0a0c1af080909 (patch)
tree605f04617dc42458025cb4341e9e6407bed8b140 /kernel
parent5b54b56be5b540a9cb12682c4d0df5454c098a38 (diff)
sched: Fix load-balance lock-breaking
The current lock break relies on contention on the rq locks, something which might never come because we've got IRQs disabled. Or will be very likely because on anything with more than 2 cpus a synchronized load-balance pass will very likely cause contention on the rq locks. Also the sched_nr_migrate thing fails when it gets trapped the loops of either the cgroup muck in load_balance_fair() or the move_tasks() load condition. Instead, use the new lb_flags field to propagate break/abort conditions for all these loops and create a new loop outside the irq disabled on the break being required. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/n/tip-tsceb6w61q0gakmsccix6xxi@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/fair.c32
1 files changed, 25 insertions, 7 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index be47ce6da2a5..cea2fa853274 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3132,6 +3132,8 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
3132} 3132}
3133 3133
3134#define LBF_ALL_PINNED 0x01 3134#define LBF_ALL_PINNED 0x01
3135#define LBF_NEED_BREAK 0x02
3136#define LBF_ABORT 0x04
3135 3137
3136/* 3138/*
3137 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? 3139 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
@@ -3237,8 +3239,10 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
3237 goto out; 3239 goto out;
3238 3240
3239 list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) { 3241 list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) {
3240 if (loops++ > sysctl_sched_nr_migrate) 3242 if (loops++ > sysctl_sched_nr_migrate) {
3243 *lb_flags |= LBF_NEED_BREAK;
3241 break; 3244 break;
3245 }
3242 3246
3243 if ((p->se.load.weight >> 1) > rem_load_move || 3247 if ((p->se.load.weight >> 1) > rem_load_move ||
3244 !can_migrate_task(p, busiest, this_cpu, sd, idle, 3248 !can_migrate_task(p, busiest, this_cpu, sd, idle,
@@ -3255,8 +3259,10 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
3255 * kernels will stop after the first task is pulled to minimize 3259 * kernels will stop after the first task is pulled to minimize
3256 * the critical section. 3260 * the critical section.
3257 */ 3261 */
3258 if (idle == CPU_NEWLY_IDLE) 3262 if (idle == CPU_NEWLY_IDLE) {
3263 *lb_flags |= LBF_ABORT;
3259 break; 3264 break;
3265 }
3260#endif 3266#endif
3261 3267
3262 /* 3268 /*
@@ -3374,6 +3380,9 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
3374 unsigned long busiest_weight = busiest_cfs_rq->load.weight; 3380 unsigned long busiest_weight = busiest_cfs_rq->load.weight;
3375 u64 rem_load, moved_load; 3381 u64 rem_load, moved_load;
3376 3382
3383 if (*lb_flags & (LBF_NEED_BREAK|LBF_ABORT))
3384 break;
3385
3377 /* 3386 /*
3378 * empty group or part of a throttled hierarchy 3387 * empty group or part of a throttled hierarchy
3379 */ 3388 */
@@ -3440,18 +3449,19 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
3440 3449
3441 total_load_moved += load_moved; 3450 total_load_moved += load_moved;
3442 3451
3452 if (*lb_flags & (LBF_NEED_BREAK|LBF_ABORT))
3453 break;
3454
3443#ifdef CONFIG_PREEMPT 3455#ifdef CONFIG_PREEMPT
3444 /* 3456 /*
3445 * NEWIDLE balancing is a source of latency, so preemptible 3457 * NEWIDLE balancing is a source of latency, so preemptible
3446 * kernels will stop after the first task is pulled to minimize 3458 * kernels will stop after the first task is pulled to minimize
3447 * the critical section. 3459 * the critical section.
3448 */ 3460 */
3449 if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) 3461 if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) {
3450 break; 3462 *lb_flags |= LBF_ABORT;
3451
3452 if (raw_spin_is_contended(&this_rq->lock) ||
3453 raw_spin_is_contended(&busiest->lock))
3454 break; 3463 break;
3464 }
3455#endif 3465#endif
3456 } while (load_moved && max_load_move > total_load_moved); 3466 } while (load_moved && max_load_move > total_load_moved);
3457 3467
@@ -4496,6 +4506,14 @@ redo:
4496 if (ld_moved && this_cpu != smp_processor_id()) 4506 if (ld_moved && this_cpu != smp_processor_id())
4497 resched_cpu(this_cpu); 4507 resched_cpu(this_cpu);
4498 4508
4509 if (lb_flags & LBF_ABORT)
4510 goto out_balanced;
4511
4512 if (lb_flags & LBF_NEED_BREAK) {
4513 lb_flags &= ~LBF_NEED_BREAK;
4514 goto redo;
4515 }
4516
4499 /* All tasks on this runqueue were pinned by CPU affinity */ 4517 /* All tasks on this runqueue were pinned by CPU affinity */
4500 if (unlikely(lb_flags & LBF_ALL_PINNED)) { 4518 if (unlikely(lb_flags & LBF_ALL_PINNED)) {
4501 cpumask_clear_cpu(cpu_of(busiest), cpus); 4519 cpumask_clear_cpu(cpu_of(busiest), cpus);