aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2011-04-05 11:23:54 -0400
committerIngo Molnar <mingo@elte.hu>2011-04-14 02:52:39 -0400
commite4a52bcb9a18142d79e231b6733cabdbf2e67c1f (patch)
treefcf29647bb6416d826237b90f233b34a169953ab /kernel
parent8f42ced974df7d5af2de4cf5ea21fe978c7e4478 (diff)
sched: Remove rq->lock from the first half of ttwu()
Currently ttwu() does two rq->lock acquisitions, once on the task's old rq, holding it over the p->state fiddling and load-balance pass. Then it drops the old rq->lock to acquire the new rq->lock. By having serialized ttwu(), p->sched_class, p->cpus_allowed with p->pi_lock, we can now drop the whole first rq->lock acquisition. The p->pi_lock serializing concurrent ttwu() calls protects p->state, which we will set to TASK_WAKING to bridge possible p->pi_lock to rq->lock gaps and serialize set_task_cpu() calls against task_rq_lock(). The p->pi_lock serialization of p->sched_class allows us to call scheduling class methods without holding the rq->lock, and the serialization of p->cpus_allowed allows us to do the load-balancing bits without races. Reviewed-by: Frank Rowand <frank.rowand@am.sony.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20110405152729.354401150@chello.nl
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c65
1 files changed, 37 insertions, 28 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 1be1a09b9dc9..871dd9e147a6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2493,69 +2493,78 @@ ttwu_post_activation(struct task_struct *p, struct rq *rq, int wake_flags)
2493 * Returns %true if @p was woken up, %false if it was already running 2493 * Returns %true if @p was woken up, %false if it was already running
2494 * or @state didn't match @p's state. 2494 * or @state didn't match @p's state.
2495 */ 2495 */
2496static int try_to_wake_up(struct task_struct *p, unsigned int state, 2496static int
2497 int wake_flags) 2497try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2498{ 2498{
2499 int cpu, orig_cpu, this_cpu, success = 0; 2499 int cpu, this_cpu, success = 0;
2500 unsigned long flags; 2500 unsigned long flags;
2501 unsigned long en_flags = ENQUEUE_WAKEUP;
2502 struct rq *rq; 2501 struct rq *rq;
2503 2502
2504 this_cpu = get_cpu(); 2503 this_cpu = get_cpu();
2505 2504
2506 smp_wmb(); 2505 smp_wmb();
2507 raw_spin_lock_irqsave(&p->pi_lock, flags); 2506 raw_spin_lock_irqsave(&p->pi_lock, flags);
2508 rq = __task_rq_lock(p);
2509 if (!(p->state & state)) 2507 if (!(p->state & state))
2510 goto out; 2508 goto out;
2511 2509
2512 cpu = task_cpu(p); 2510 cpu = task_cpu(p);
2513 2511
2514 if (p->on_rq) 2512 if (p->on_rq) {
2515 goto out_running; 2513 rq = __task_rq_lock(p);
2514 if (p->on_rq)
2515 goto out_running;
2516 __task_rq_unlock(rq);
2517 }
2516 2518
2517 orig_cpu = cpu;
2518#ifdef CONFIG_SMP 2519#ifdef CONFIG_SMP
2519 if (unlikely(task_running(rq, p))) 2520 while (p->on_cpu) {
2520 goto out_activate; 2521#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2522 /*
2523 * If called from interrupt context we could have landed in the
2524 * middle of schedule(), in this case we should take care not
2525 * to spin on ->on_cpu if p is current, since that would
2526 * deadlock.
2527 */
2528 if (p == current)
2529 goto out_activate;
2530#endif
2531 cpu_relax();
2532 }
2533 /*
2534 * Pairs with the smp_wmb() in finish_lock_switch().
2535 */
2536 smp_rmb();
2521 2537
2522 p->sched_contributes_to_load = !!task_contributes_to_load(p); 2538 p->sched_contributes_to_load = !!task_contributes_to_load(p);
2523 p->state = TASK_WAKING; 2539 p->state = TASK_WAKING;
2524 2540
2525 if (p->sched_class->task_waking) { 2541 if (p->sched_class->task_waking)
2526 p->sched_class->task_waking(p); 2542 p->sched_class->task_waking(p);
2527 en_flags |= ENQUEUE_WAKING;
2528 }
2529 2543
2530 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); 2544 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
2531 if (cpu != orig_cpu) 2545#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2532 set_task_cpu(p, cpu); 2546out_activate:
2533 __task_rq_unlock(rq); 2547#endif
2548#endif /* CONFIG_SMP */
2534 2549
2535 rq = cpu_rq(cpu); 2550 rq = cpu_rq(cpu);
2536 raw_spin_lock(&rq->lock); 2551 raw_spin_lock(&rq->lock);
2537 2552
2538 /* 2553#ifdef CONFIG_SMP
2539 * We migrated the task without holding either rq->lock, however 2554 if (cpu != task_cpu(p))
2540 * since the task is not on the task list itself, nobody else 2555 set_task_cpu(p, cpu);
2541 * will try and migrate the task, hence the rq should match the
2542 * cpu we just moved it to.
2543 */
2544 WARN_ON(task_cpu(p) != cpu);
2545 WARN_ON(p->state != TASK_WAKING);
2546 2556
2547 if (p->sched_contributes_to_load) 2557 if (p->sched_contributes_to_load)
2548 rq->nr_uninterruptible--; 2558 rq->nr_uninterruptible--;
2559#endif
2549 2560
2550out_activate: 2561 ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
2551#endif /* CONFIG_SMP */
2552 ttwu_activate(rq, p, en_flags);
2553out_running: 2562out_running:
2554 ttwu_post_activation(p, rq, wake_flags); 2563 ttwu_post_activation(p, rq, wake_flags);
2555 ttwu_stat(rq, p, cpu, wake_flags); 2564 ttwu_stat(rq, p, cpu, wake_flags);
2556 success = 1; 2565 success = 1;
2557out:
2558 __task_rq_unlock(rq); 2566 __task_rq_unlock(rq);
2567out:
2559 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 2568 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2560 put_cpu(); 2569 put_cpu();
2561 2570