aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2016-05-18 13:02:18 -0400
committerIngo Molnar <mingo@kernel.org>2016-06-03 03:18:57 -0400
commit150593bf869393d10a79f6bd3df2585ecc20a9bb (patch)
treecbe9c8bbf903315c0b07397f18a6a97294bab0e7
parentdf55f462b905f3b2d40ec3fb865891382a6ebfb1 (diff)
sched/api: Introduce task_rcu_dereference() and try_get_task_struct()
Generally task_struct is only protected by RCU if it was found on a RCU protected list (say, for_each_process() or find_task_by_vpid()). As Kirill pointed out rq->curr isn't protected by RCU, the scheduler drops the (potentially) last reference without RCU gp, this means that we need to fix the code which uses foreign_rq->curr under rcu_read_lock(). Add a new helper which can be used to dereference rq->curr or any other pointer to task_struct assuming that it should be cleared or updated before the final put_task_struct(). It returns non-NULL only if this task can't go away before rcu_read_unlock(). ( Also add try_get_task_struct() to make it easier to use this API correctly. ) Suggested-by: Kirill Tkhai <ktkhai@parallels.com> Signed-off-by: Oleg Nesterov <oleg@redhat.com> [ Updated comments; added try_get_task_struct()] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Chris Metcalf <cmetcalf@ezchip.com> Cc: Christoph Lameter <cl@linux.com> Cc: Kirill Tkhai <tkhai@yandex.ru> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vladimir Davydov <vdavydov@parallels.com> Link: http://lkml.kernel.org/r/20160518170218.GY3192@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/linux/sched.h3
-rw-r--r--kernel/exit.c76
2 files changed, 79 insertions, 0 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6e42ada26345..dee41bf59e6b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2139,6 +2139,9 @@ static inline void put_task_struct(struct task_struct *t)
2139 __put_task_struct(t); 2139 __put_task_struct(t);
2140} 2140}
2141 2141
2142struct task_struct *task_rcu_dereference(struct task_struct **ptask);
2143struct task_struct *try_get_task_struct(struct task_struct **ptask);
2144
2142#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 2145#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
2143extern void task_cputime(struct task_struct *t, 2146extern void task_cputime(struct task_struct *t,
2144 cputime_t *utime, cputime_t *stime); 2147 cputime_t *utime, cputime_t *stime);
diff --git a/kernel/exit.c b/kernel/exit.c
index 9e6e1356e6bb..2fb4d44c51b1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -211,6 +211,82 @@ repeat:
211} 211}
212 212
213/* 213/*
214 * Note that if this function returns a valid task_struct pointer (!NULL)
215 * task->usage must remain >0 for the duration of the RCU critical section.
216 */
217struct task_struct *task_rcu_dereference(struct task_struct **ptask)
218{
219 struct sighand_struct *sighand;
220 struct task_struct *task;
221
222 /*
223 * We need to verify that release_task() was not called and thus
224 * delayed_put_task_struct() can't run and drop the last reference
225 * before rcu_read_unlock(). We check task->sighand != NULL,
226 * but we can read the already freed and reused memory.
227 */
228retry:
229 task = rcu_dereference(*ptask);
230 if (!task)
231 return NULL;
232
233 probe_kernel_address(&task->sighand, sighand);
234
235 /*
236 * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
237 * was already freed we can not miss the preceding update of this
238 * pointer.
239 */
240 smp_rmb();
241 if (unlikely(task != READ_ONCE(*ptask)))
242 goto retry;
243
244 /*
245 * We've re-checked that "task == *ptask", now we have two different
246 * cases:
247 *
248 * 1. This is actually the same task/task_struct. In this case
249 * sighand != NULL tells us it is still alive.
250 *
251 * 2. This is another task which got the same memory for task_struct.
252 * We can't know this of course, and we can not trust
253 * sighand != NULL.
254 *
255 * In this case we actually return a random value, but this is
256 * correct.
257 *
258 * If we return NULL - we can pretend that we actually noticed that
259 * *ptask was updated when the previous task has exited. Or pretend
260 * that probe_slab_address(&sighand) reads NULL.
261 *
262 * If we return the new task (because sighand is not NULL for any
263 * reason) - this is fine too. This (new) task can't go away before
264 * another gp pass.
265 *
266 * And note: We could even eliminate the false positive if re-read
267 * task->sighand once again to avoid the falsely NULL. But this case
268 * is very unlikely so we don't care.
269 */
270 if (!sighand)
271 return NULL;
272
273 return task;
274}
275
276struct task_struct *try_get_task_struct(struct task_struct **ptask)
277{
278 struct task_struct *task;
279
280 rcu_read_lock();
281 task = task_rcu_dereference(ptask);
282 if (task)
283 get_task_struct(task);
284 rcu_read_unlock();
285
286 return task;
287}
288
289/*
214 * Determine if a process group is "orphaned", according to the POSIX 290 * Determine if a process group is "orphaned", according to the POSIX
215 * definition in 2.2.2.52. Orphaned process groups are not to be affected 291 * definition in 2.2.2.52. Orphaned process groups are not to be affected
216 * by terminal-generated stop signals. Newly orphaned process groups are 292 * by terminal-generated stop signals. Newly orphaned process groups are