aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2012-06-22 07:36:05 -0400
committerIngo Molnar <mingo@kernel.org>2012-07-24 07:58:20 -0400
commit8323f26ce3425460769605a6aece7a174edaa7d1 (patch)
tree44daa0dafa49cedc9301efd1417c6c2ac338c1c7 /kernel/sched
parent88b8dac0a14c511ff41486b83a8c3d688936eec0 (diff)
sched: Fix race in task_group()
Stefan reported a crash on a kernel before a3e5d1091c1 ("sched: Don't call task_group() too many times in set_task_rq()"), he found the reason to be that the multiple task_group() invocations in set_task_rq() returned different values. Looking at all that I found a lack of serialization and plain wrong comments. The below tries to fix it using an extra pointer which is updated under the appropriate scheduler locks. Its not pretty, but I can't really see another way given how all the cgroup stuff works. Reported-and-tested-by: Stefan Bader <stefan.bader@canonical.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1340364965.18025.71.camel@twins Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c9
-rw-r--r--kernel/sched/sched.h23
2 files changed, 18 insertions, 14 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 536b213f0ce5..5d011ef4c0df 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1096,7 +1096,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1096 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. 1096 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
1097 * 1097 *
1098 * sched_move_task() holds both and thus holding either pins the cgroup, 1098 * sched_move_task() holds both and thus holding either pins the cgroup,
1099 * see set_task_rq(). 1099 * see task_group().
1100 * 1100 *
1101 * Furthermore, all task_rq users should acquire both locks, see 1101 * Furthermore, all task_rq users should acquire both locks, see
1102 * task_rq_lock(). 1102 * task_rq_lock().
@@ -7658,6 +7658,7 @@ void sched_destroy_group(struct task_group *tg)
7658 */ 7658 */
7659void sched_move_task(struct task_struct *tsk) 7659void sched_move_task(struct task_struct *tsk)
7660{ 7660{
7661 struct task_group *tg;
7661 int on_rq, running; 7662 int on_rq, running;
7662 unsigned long flags; 7663 unsigned long flags;
7663 struct rq *rq; 7664 struct rq *rq;
@@ -7672,6 +7673,12 @@ void sched_move_task(struct task_struct *tsk)
7672 if (unlikely(running)) 7673 if (unlikely(running))
7673 tsk->sched_class->put_prev_task(rq, tsk); 7674 tsk->sched_class->put_prev_task(rq, tsk);
7674 7675
7676 tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id,
7677 lockdep_is_held(&tsk->sighand->siglock)),
7678 struct task_group, css);
7679 tg = autogroup_task_group(tsk, tg);
7680 tsk->sched_task_group = tg;
7681
7675#ifdef CONFIG_FAIR_GROUP_SCHED 7682#ifdef CONFIG_FAIR_GROUP_SCHED
7676 if (tsk->sched_class->task_move_group) 7683 if (tsk->sched_class->task_move_group)
7677 tsk->sched_class->task_move_group(tsk, on_rq); 7684 tsk->sched_class->task_move_group(tsk, on_rq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 55844f24435a..c35a1a7dd4d6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -538,22 +538,19 @@ extern int group_balance_cpu(struct sched_group *sg);
538/* 538/*
539 * Return the group to which this tasks belongs. 539 * Return the group to which this tasks belongs.
540 * 540 *
541 * We use task_subsys_state_check() and extend the RCU verification with 541 * We cannot use task_subsys_state() and friends because the cgroup
542 * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each 542 * subsystem changes that value before the cgroup_subsys::attach() method
543 * task it moves into the cgroup. Therefore by holding either of those locks, 543 * is called, therefore we cannot pin it and might observe the wrong value.
544 * we pin the task to the current cgroup. 544 *
545 * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
546 * core changes this before calling sched_move_task().
547 *
548 * Instead we use a 'copy' which is updated from sched_move_task() while
549 * holding both task_struct::pi_lock and rq::lock.
545 */ 550 */
546static inline struct task_group *task_group(struct task_struct *p) 551static inline struct task_group *task_group(struct task_struct *p)
547{ 552{
548 struct task_group *tg; 553 return p->sched_task_group;
549 struct cgroup_subsys_state *css;
550
551 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
552 lockdep_is_held(&p->pi_lock) ||
553 lockdep_is_held(&task_rq(p)->lock));
554 tg = container_of(css, struct task_group, css);
555
556 return autogroup_task_group(p, tg);
557} 554}
558 555
559/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ 556/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */