diff options
author | Oleg Nesterov <oleg@redhat.com> | 2016-11-14 13:46:09 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-11-22 06:33:42 -0500 |
commit | 18f649ef344127ef6de23a5a4272dbe2fdb73dde (patch) | |
tree | 29890b0385d329d63885baa8e07a53dcddd18b70 | |
parent | 3b404a519815b9820f73f1ecf404e5546c9270ba (diff) |
sched/autogroup: Fix autogroup_move_group() to never skip sched_move_task()
The PF_EXITING check in task_wants_autogroup() is no longer needed. Remove
it, but see the next patch.
However the comment is correct in that autogroup_move_group() must always
change task_group() for every thread so the sysctl_ check is very wrong;
we can race with cgroups and even sys_setsid() is not safe because a task
running with task_group() == ag->tg must participate in refcounting:
int main(void)
{
int sctl = open("/proc/sys/kernel/sched_autogroup_enabled", O_WRONLY);
assert(sctl > 0);
if (fork()) {
wait(NULL); // destroy the child's ag/tg
pause();
}
assert(pwrite(sctl, "1\n", 2, 0) == 2);
assert(setsid() > 0);
if (fork())
pause();
kill(getppid(), SIGKILL);
sleep(1);
// The child has gone, the grandchild runs with kref == 1
assert(pwrite(sctl, "0\n", 2, 0) == 2);
assert(setsid() > 0);
// runs with the freed ag/tg
for (;;)
sleep(1);
return 0;
}
crashes the kernel. It doesn't really need sleep(1), it doesn't matter if
autogroup_move_group() actually frees the task_group or this happens later.
Reported-by: Vern Lovejoy <vlovejoy@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: hartsjc@redhat.com
Cc: vbendel@redhat.com
Link: http://lkml.kernel.org/r/20161114184609.GA15965@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/sched/auto_group.c | 23 |
1 files changed, 12 insertions, 11 deletions
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index a5d966cb8891..ad2b19ad6ca0 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c | |||
@@ -111,14 +111,11 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) | |||
111 | { | 111 | { |
112 | if (tg != &root_task_group) | 112 | if (tg != &root_task_group) |
113 | return false; | 113 | return false; |
114 | |||
115 | /* | 114 | /* |
116 | * We can only assume the task group can't go away on us if | 115 | * If we race with autogroup_move_group() the caller can use the old |
117 | * autogroup_move_group() can see us on ->thread_group list. | 116 | * value of signal->autogroup but in this case sched_move_task() will |
117 | * be called again before autogroup_kref_put(). | ||
118 | */ | 118 | */ |
119 | if (p->flags & PF_EXITING) | ||
120 | return false; | ||
121 | |||
122 | return true; | 119 | return true; |
123 | } | 120 | } |
124 | 121 | ||
@@ -138,13 +135,17 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) | |||
138 | } | 135 | } |
139 | 136 | ||
140 | p->signal->autogroup = autogroup_kref_get(ag); | 137 | p->signal->autogroup = autogroup_kref_get(ag); |
141 | 138 | /* | |
142 | if (!READ_ONCE(sysctl_sched_autogroup_enabled)) | 139 | * We can't avoid sched_move_task() after we changed signal->autogroup, |
143 | goto out; | 140 | * this process can already run with task_group() == prev->tg or we can |
144 | 141 | * race with cgroup code which can read autogroup = prev under rq->lock. | |
142 | * In the latter case for_each_thread() can not miss a migrating thread, | ||
143 | * cpu_cgroup_attach() must not be possible after cgroup_exit() and it | ||
144 | * can't be removed from thread list, we hold ->siglock. | ||
145 | */ | ||
145 | for_each_thread(p, t) | 146 | for_each_thread(p, t) |
146 | sched_move_task(t); | 147 | sched_move_task(t); |
147 | out: | 148 | |
148 | unlock_task_sighand(p, &flags); | 149 | unlock_task_sighand(p, &flags); |
149 | autogroup_kref_put(prev); | 150 | autogroup_kref_put(prev); |
150 | } | 151 | } |