diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-02-15 08:45:54 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-02-16 09:13:59 -0500 |
commit | 0970d2992dfd7d5ec2c787417cf464f01eeaf42a (patch) | |
tree | 5f0077dcafda733ef3f1e41d218a473978ce6a9f /kernel/sched.c | |
parent | 9000f05c6d1607f79c0deacf42b09693be673f4c (diff) |
sched: Fix race between ttwu() and task_rq_lock()
Thomas found that due to ttwu() changing a task's cpu without holding
the rq->lock, task_rq_lock() might end up locking the wrong rq.
Avoid this by serializing against TASK_WAKING.
Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1266241712.15770.420.camel@laptop>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 73 |
1 files changed, 46 insertions, 27 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 4d78aef4559d..404e2017c0cf 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -941,16 +941,33 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
941 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ | 941 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ |
942 | 942 | ||
943 | /* | 943 | /* |
944 | * Check whether the task is waking, we use this to synchronize against | ||
945 | * ttwu() so that task_cpu() reports a stable number. | ||
946 | * | ||
947 | * We need to make an exception for PF_STARTING tasks because the fork | ||
948 | * path might require task_rq_lock() to work, eg. it can call | ||
949 | * set_cpus_allowed_ptr() from the cpuset clone_ns code. | ||
950 | */ | ||
951 | static inline int task_is_waking(struct task_struct *p) | ||
952 | { | ||
953 | return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING)); | ||
954 | } | ||
955 | |||
956 | /* | ||
944 | * __task_rq_lock - lock the runqueue a given task resides on. | 957 | * __task_rq_lock - lock the runqueue a given task resides on. |
945 | * Must be called interrupts disabled. | 958 | * Must be called interrupts disabled. |
946 | */ | 959 | */ |
947 | static inline struct rq *__task_rq_lock(struct task_struct *p) | 960 | static inline struct rq *__task_rq_lock(struct task_struct *p) |
948 | __acquires(rq->lock) | 961 | __acquires(rq->lock) |
949 | { | 962 | { |
963 | struct rq *rq; | ||
964 | |||
950 | for (;;) { | 965 | for (;;) { |
951 | struct rq *rq = task_rq(p); | 966 | while (task_is_waking(p)) |
967 | cpu_relax(); | ||
968 | rq = task_rq(p); | ||
952 | raw_spin_lock(&rq->lock); | 969 | raw_spin_lock(&rq->lock); |
953 | if (likely(rq == task_rq(p))) | 970 | if (likely(rq == task_rq(p) && !task_is_waking(p))) |
954 | return rq; | 971 | return rq; |
955 | raw_spin_unlock(&rq->lock); | 972 | raw_spin_unlock(&rq->lock); |
956 | } | 973 | } |
@@ -967,10 +984,12 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | |||
967 | struct rq *rq; | 984 | struct rq *rq; |
968 | 985 | ||
969 | for (;;) { | 986 | for (;;) { |
987 | while (task_is_waking(p)) | ||
988 | cpu_relax(); | ||
970 | local_irq_save(*flags); | 989 | local_irq_save(*flags); |
971 | rq = task_rq(p); | 990 | rq = task_rq(p); |
972 | raw_spin_lock(&rq->lock); | 991 | raw_spin_lock(&rq->lock); |
973 | if (likely(rq == task_rq(p))) | 992 | if (likely(rq == task_rq(p) && !task_is_waking(p))) |
974 | return rq; | 993 | return rq; |
975 | raw_spin_unlock_irqrestore(&rq->lock, *flags); | 994 | raw_spin_unlock_irqrestore(&rq->lock, *flags); |
976 | } | 995 | } |
@@ -2408,14 +2427,27 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2408 | __task_rq_unlock(rq); | 2427 | __task_rq_unlock(rq); |
2409 | 2428 | ||
2410 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2429 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
2411 | if (cpu != orig_cpu) | 2430 | if (cpu != orig_cpu) { |
2431 | /* | ||
2432 | * Since we migrate the task without holding any rq->lock, | ||
2433 | * we need to be careful with task_rq_lock(), since that | ||
2434 | * might end up locking an invalid rq. | ||
2435 | */ | ||
2412 | set_task_cpu(p, cpu); | 2436 | set_task_cpu(p, cpu); |
2437 | } | ||
2413 | 2438 | ||
2414 | rq = __task_rq_lock(p); | 2439 | rq = cpu_rq(cpu); |
2440 | raw_spin_lock(&rq->lock); | ||
2415 | update_rq_clock(rq); | 2441 | update_rq_clock(rq); |
2416 | 2442 | ||
2443 | /* | ||
2444 | * We migrated the task without holding either rq->lock, however | ||
2445 | * since the task is not on the task list itself, nobody else | ||
2446 | * will try and migrate the task, hence the rq should match the | ||
2447 | * cpu we just moved it to. | ||
2448 | */ | ||
2449 | WARN_ON(task_cpu(p) != cpu); | ||
2417 | WARN_ON(p->state != TASK_WAKING); | 2450 | WARN_ON(p->state != TASK_WAKING); |
2418 | cpu = task_cpu(p); | ||
2419 | 2451 | ||
2420 | #ifdef CONFIG_SCHEDSTATS | 2452 | #ifdef CONFIG_SCHEDSTATS |
2421 | schedstat_inc(rq, ttwu_count); | 2453 | schedstat_inc(rq, ttwu_count); |
@@ -2647,7 +2679,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2647 | { | 2679 | { |
2648 | unsigned long flags; | 2680 | unsigned long flags; |
2649 | struct rq *rq; | 2681 | struct rq *rq; |
2650 | int cpu __maybe_unused = get_cpu(); | 2682 | int cpu = get_cpu(); |
2651 | 2683 | ||
2652 | #ifdef CONFIG_SMP | 2684 | #ifdef CONFIG_SMP |
2653 | /* | 2685 | /* |
@@ -2663,7 +2695,13 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2663 | set_task_cpu(p, cpu); | 2695 | set_task_cpu(p, cpu); |
2664 | #endif | 2696 | #endif |
2665 | 2697 | ||
2666 | rq = task_rq_lock(p, &flags); | 2698 | /* |
2699 | * Since the task is not on the rq and we still have TASK_WAKING set | ||
2700 | * nobody else will migrate this task. | ||
2701 | */ | ||
2702 | rq = cpu_rq(cpu); | ||
2703 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
2704 | |||
2667 | BUG_ON(p->state != TASK_WAKING); | 2705 | BUG_ON(p->state != TASK_WAKING); |
2668 | p->state = TASK_RUNNING; | 2706 | p->state = TASK_RUNNING; |
2669 | update_rq_clock(rq); | 2707 | update_rq_clock(rq); |
@@ -7156,27 +7194,8 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7156 | struct rq *rq; | 7194 | struct rq *rq; |
7157 | int ret = 0; | 7195 | int ret = 0; |
7158 | 7196 | ||
7159 | /* | ||
7160 | * Since we rely on wake-ups to migrate sleeping tasks, don't change | ||
7161 | * the ->cpus_allowed mask from under waking tasks, which would be | ||
7162 | * possible when we change rq->lock in ttwu(), so synchronize against | ||
7163 | * TASK_WAKING to avoid that. | ||
7164 | * | ||
7165 | * Make an exception for freshly cloned tasks, since cpuset namespaces | ||
7166 | * might move the task about, we have to validate the target in | ||
7167 | * wake_up_new_task() anyway since the cpu might have gone away. | ||
7168 | */ | ||
7169 | again: | ||
7170 | while (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) | ||
7171 | cpu_relax(); | ||
7172 | |||
7173 | rq = task_rq_lock(p, &flags); | 7197 | rq = task_rq_lock(p, &flags); |
7174 | 7198 | ||
7175 | if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) { | ||
7176 | task_rq_unlock(rq, &flags); | ||
7177 | goto again; | ||
7178 | } | ||
7179 | |||
7180 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | 7199 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
7181 | ret = -EINVAL; | 7200 | ret = -EINVAL; |
7182 | goto out; | 7201 | goto out; |