aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2016-11-14 13:46:09 -0500
committerIngo Molnar <mingo@kernel.org>2016-11-22 06:33:42 -0500
commit18f649ef344127ef6de23a5a4272dbe2fdb73dde (patch)
tree29890b0385d329d63885baa8e07a53dcddd18b70
parent3b404a519815b9820f73f1ecf404e5546c9270ba (diff)
sched/autogroup: Fix autogroup_move_group() to never skip sched_move_task()
The PF_EXITING check in task_wants_autogroup() is no longer needed. Remove it, but see the next patch. However the comment is correct in that autogroup_move_group() must always change task_group() for every thread so the sysctl_ check is very wrong; we can race with cgroups and even sys_setsid() is not safe because a task running with task_group() == ag->tg must participate in refcounting: int main(void) { int sctl = open("/proc/sys/kernel/sched_autogroup_enabled", O_WRONLY); assert(sctl > 0); if (fork()) { wait(NULL); // destroy the child's ag/tg pause(); } assert(pwrite(sctl, "1\n", 2, 0) == 2); assert(setsid() > 0); if (fork()) pause(); kill(getppid(), SIGKILL); sleep(1); // The child has gone, the grandchild runs with kref == 1 assert(pwrite(sctl, "0\n", 2, 0) == 2); assert(setsid() > 0); // runs with the freed ag/tg for (;;) sleep(1); return 0; } crashes the kernel. It doesn't really need sleep(1), it doesn't matter if autogroup_move_group() actually frees the task_group or this happens later. Reported-by: Vern Lovejoy <vlovejoy@redhat.com> Signed-off-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: hartsjc@redhat.com Cc: vbendel@redhat.com Link: http://lkml.kernel.org/r/20161114184609.GA15965@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/sched/auto_group.c23
1 files changed, 12 insertions, 11 deletions
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index a5d966cb8891..ad2b19ad6ca0 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -111,14 +111,11 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
111{ 111{
112 if (tg != &root_task_group) 112 if (tg != &root_task_group)
113 return false; 113 return false;
114
115 /* 114 /*
116 * We can only assume the task group can't go away on us if 115 * If we race with autogroup_move_group() the caller can use the old
117 * autogroup_move_group() can see us on ->thread_group list. 116 * value of signal->autogroup but in this case sched_move_task() will
117 * be called again before autogroup_kref_put().
118 */ 118 */
119 if (p->flags & PF_EXITING)
120 return false;
121
122 return true; 119 return true;
123} 120}
124 121
@@ -138,13 +135,17 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
138 } 135 }
139 136
140 p->signal->autogroup = autogroup_kref_get(ag); 137 p->signal->autogroup = autogroup_kref_get(ag);
141 138 /*
142 if (!READ_ONCE(sysctl_sched_autogroup_enabled)) 139 * We can't avoid sched_move_task() after we changed signal->autogroup,
143 goto out; 140 * this process can already run with task_group() == prev->tg or we can
144 141 * race with cgroup code which can read autogroup = prev under rq->lock.
142 * In the latter case for_each_thread() can not miss a migrating thread,
143 * cpu_cgroup_attach() must not be possible after cgroup_exit() and it
144 * can't be removed from thread list, we hold ->siglock.
145 */
145 for_each_thread(p, t) 146 for_each_thread(p, t)
146 sched_move_task(t); 147 sched_move_task(t);
147out: 148
148 unlock_task_sighand(p, &flags); 149 unlock_task_sighand(p, &flags);
149 autogroup_kref_put(prev); 150 autogroup_kref_put(prev);
150} 151}