aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorDmitry Adamushko <dmitry.adamushko@gmail.com>2007-11-15 14:57:40 -0500
committerIngo Molnar <mingo@elte.hu>2007-11-15 14:57:40 -0500
commitce96b5ac742801718ae86d2adf0500c5abef3782 (patch)
tree1ec0bc7d105af9adc3836a5f47a0f9f62031d14f /kernel/sched.c
parentdae51f56204d33444f61d9e7af3ee70aef55daa4 (diff)
sched: fix __set_task_cpu() SMP race
Grant Wilson has reported rare SCHED_FAIR_USER crashes on his quad-core system, which crashes can only be explained via runqueue corruption. there is a narrow SMP race in __set_task_cpu(): after ->cpu is set up to a new value, task_rq_lock(p, ...) can be successfuly executed on another CPU. We must ensure that updates of per-task data have been completed by this moment. this bug has been hiding in the Linux scheduler for an eternity (we never had any explicit barrier for task->cpu in set_task_cpu() - so the bug was introduced in 2.5.1), but only became visible via set_task_cfs_rq() being accidentally put after the task->cpu update. It also probably needs a sufficiently out-of-order CPU to trigger. Reported-by: Grant Wilson <grant.wilson@zen.co.uk> Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c20
1 files changed, 13 insertions, 7 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index d62b759753f3..db1f71e31310 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -216,15 +216,15 @@ static inline struct task_group *task_group(struct task_struct *p)
216} 216}
217 217
218/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ 218/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
219static inline void set_task_cfs_rq(struct task_struct *p) 219static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu)
220{ 220{
221 p->se.cfs_rq = task_group(p)->cfs_rq[task_cpu(p)]; 221 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
222 p->se.parent = task_group(p)->se[task_cpu(p)]; 222 p->se.parent = task_group(p)->se[cpu];
223} 223}
224 224
225#else 225#else
226 226
227static inline void set_task_cfs_rq(struct task_struct *p) { } 227static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu) { }
228 228
229#endif /* CONFIG_FAIR_GROUP_SCHED */ 229#endif /* CONFIG_FAIR_GROUP_SCHED */
230 230
@@ -1022,10 +1022,16 @@ unsigned long weighted_cpuload(const int cpu)
1022 1022
1023static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) 1023static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1024{ 1024{
1025 set_task_cfs_rq(p, cpu);
1025#ifdef CONFIG_SMP 1026#ifdef CONFIG_SMP
1027 /*
1028 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
1029 * successfuly executed on another CPU. We must ensure that updates of
1030 * per-task data have been completed by this moment.
1031 */
1032 smp_wmb();
1026 task_thread_info(p)->cpu = cpu; 1033 task_thread_info(p)->cpu = cpu;
1027#endif 1034#endif
1028 set_task_cfs_rq(p);
1029} 1035}
1030 1036
1031#ifdef CONFIG_SMP 1037#ifdef CONFIG_SMP
@@ -7088,7 +7094,7 @@ void sched_move_task(struct task_struct *tsk)
7088 rq = task_rq_lock(tsk, &flags); 7094 rq = task_rq_lock(tsk, &flags);
7089 7095
7090 if (tsk->sched_class != &fair_sched_class) { 7096 if (tsk->sched_class != &fair_sched_class) {
7091 set_task_cfs_rq(tsk); 7097 set_task_cfs_rq(tsk, task_cpu(tsk));
7092 goto done; 7098 goto done;
7093 } 7099 }
7094 7100
@@ -7103,7 +7109,7 @@ void sched_move_task(struct task_struct *tsk)
7103 tsk->sched_class->put_prev_task(rq, tsk); 7109 tsk->sched_class->put_prev_task(rq, tsk);
7104 } 7110 }
7105 7111
7106 set_task_cfs_rq(tsk); 7112 set_task_cfs_rq(tsk, task_cpu(tsk));
7107 7113
7108 if (on_rq) { 7114 if (on_rq) {
7109 if (unlikely(running)) 7115 if (unlikely(running))