diff options
author | Dmitry Adamushko <dmitry.adamushko@gmail.com> | 2007-11-15 14:57:40 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2007-11-15 14:57:40 -0500 |
commit | ce96b5ac742801718ae86d2adf0500c5abef3782 (patch) | |
tree | 1ec0bc7d105af9adc3836a5f47a0f9f62031d14f | |
parent | dae51f56204d33444f61d9e7af3ee70aef55daa4 (diff) |
sched: fix __set_task_cpu() SMP race
Grant Wilson has reported rare SCHED_FAIR_USER crashes on his quad-core
system, which crashes can only be explained via runqueue corruption.
there is a narrow SMP race in __set_task_cpu(): after ->cpu is set up to
a new value, task_rq_lock(p, ...) can be successfuly executed on another
CPU. We must ensure that updates of per-task data have been completed by
this moment.
this bug has been hiding in the Linux scheduler for an eternity (we never
had any explicit barrier for task->cpu in set_task_cpu() - so the bug was
introduced in 2.5.1), but only became visible via set_task_cfs_rq() being
accidentally put after the task->cpu update. It also probably needs a
sufficiently out-of-order CPU to trigger.
Reported-by: Grant Wilson <grant.wilson@zen.co.uk>
Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/sched.c | 20 |
1 files changed, 13 insertions, 7 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index d62b759753f3..db1f71e31310 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -216,15 +216,15 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
216 | } | 216 | } |
217 | 217 | ||
218 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | 218 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ |
219 | static inline void set_task_cfs_rq(struct task_struct *p) | 219 | static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu) |
220 | { | 220 | { |
221 | p->se.cfs_rq = task_group(p)->cfs_rq[task_cpu(p)]; | 221 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; |
222 | p->se.parent = task_group(p)->se[task_cpu(p)]; | 222 | p->se.parent = task_group(p)->se[cpu]; |
223 | } | 223 | } |
224 | 224 | ||
225 | #else | 225 | #else |
226 | 226 | ||
227 | static inline void set_task_cfs_rq(struct task_struct *p) { } | 227 | static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu) { } |
228 | 228 | ||
229 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 229 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
230 | 230 | ||
@@ -1022,10 +1022,16 @@ unsigned long weighted_cpuload(const int cpu) | |||
1022 | 1022 | ||
1023 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | 1023 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) |
1024 | { | 1024 | { |
1025 | set_task_cfs_rq(p, cpu); | ||
1025 | #ifdef CONFIG_SMP | 1026 | #ifdef CONFIG_SMP |
1027 | /* | ||
1028 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1029 | * successfuly executed on another CPU. We must ensure that updates of | ||
1030 | * per-task data have been completed by this moment. | ||
1031 | */ | ||
1032 | smp_wmb(); | ||
1026 | task_thread_info(p)->cpu = cpu; | 1033 | task_thread_info(p)->cpu = cpu; |
1027 | #endif | 1034 | #endif |
1028 | set_task_cfs_rq(p); | ||
1029 | } | 1035 | } |
1030 | 1036 | ||
1031 | #ifdef CONFIG_SMP | 1037 | #ifdef CONFIG_SMP |
@@ -7088,7 +7094,7 @@ void sched_move_task(struct task_struct *tsk) | |||
7088 | rq = task_rq_lock(tsk, &flags); | 7094 | rq = task_rq_lock(tsk, &flags); |
7089 | 7095 | ||
7090 | if (tsk->sched_class != &fair_sched_class) { | 7096 | if (tsk->sched_class != &fair_sched_class) { |
7091 | set_task_cfs_rq(tsk); | 7097 | set_task_cfs_rq(tsk, task_cpu(tsk)); |
7092 | goto done; | 7098 | goto done; |
7093 | } | 7099 | } |
7094 | 7100 | ||
@@ -7103,7 +7109,7 @@ void sched_move_task(struct task_struct *tsk) | |||
7103 | tsk->sched_class->put_prev_task(rq, tsk); | 7109 | tsk->sched_class->put_prev_task(rq, tsk); |
7104 | } | 7110 | } |
7105 | 7111 | ||
7106 | set_task_cfs_rq(tsk); | 7112 | set_task_cfs_rq(tsk, task_cpu(tsk)); |
7107 | 7113 | ||
7108 | if (on_rq) { | 7114 | if (on_rq) { |
7109 | if (unlikely(running)) | 7115 | if (unlikely(running)) |