aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-12-12 21:12:21 -0500
committerTejun Heo <tj@kernel.org>2011-12-12 21:12:21 -0500
commit77e4ef99d1c596a31747668e5fd837f77b6349b6 (patch)
treeb9dcc8d73f9febf6a18edc0b2efa3ecddc2a7a35
parent257058ae2b971646b96ab3a15605ac69186e562a (diff)
threadgroup: extend threadgroup_lock() to cover exit and exec
threadgroup_lock() protected only protected against new addition to the threadgroup, which was inherently somewhat incomplete and problematic for its only user cgroup. On-going migration could race against exec and exit leading to interesting problems - the symmetry between various attach methods, task exiting during method execution, ->exit() racing against attach methods, migrating task switching basic properties during exec and so on. This patch extends threadgroup_lock() such that it protects against all three threadgroup altering operations - fork, exit and exec. For exit, threadgroup_change_begin/end() calls are added to exit_signals around assertion of PF_EXITING. For exec, threadgroup_[un]lock() are updated to also grab and release cred_guard_mutex. With this change, threadgroup_lock() guarantees that the target threadgroup will remain stable - no new task will be added, no new PF_EXITING will be set and exec won't happen. The next patch will update cgroup so that it can take full advantage of this change. -v2: beefed up comment as suggested by Frederic. -v3: narrowed scope of protection in exit path as suggested by Frederic. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Li Zefan <lizf@cn.fujitsu.com> Acked-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Paul Menage <paul@paulmenage.org> Cc: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/sched.h47
-rw-r--r--kernel/signal.c10
2 files changed, 51 insertions, 6 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8cd523202a3b..c0c5876c52c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -635,11 +635,13 @@ struct signal_struct {
635#endif 635#endif
636#ifdef CONFIG_CGROUPS 636#ifdef CONFIG_CGROUPS
637 /* 637 /*
638 * The group_rwsem prevents threads from forking with 638 * group_rwsem prevents new tasks from entering the threadgroup and
639 * CLONE_THREAD while held for writing. Use this for fork-sensitive 639 * member tasks from exiting,a more specifically, setting of
640 * threadgroup-wide operations. It's taken for reading in fork.c in 640 * PF_EXITING. fork and exit paths are protected with this rwsem
641 * copy_process(). 641 * using threadgroup_change_begin/end(). Users which require
642 * Currently only needed write-side by cgroups. 642 * threadgroup to remain stable should use threadgroup_[un]lock()
643 * which also takes care of exec path. Currently, cgroup is the
644 * only user.
643 */ 645 */
644 struct rw_semaphore group_rwsem; 646 struct rw_semaphore group_rwsem;
645#endif 647#endif
@@ -2371,7 +2373,6 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
2371 spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); 2373 spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
2372} 2374}
2373 2375
2374/* See the declaration of group_rwsem in signal_struct. */
2375#ifdef CONFIG_CGROUPS 2376#ifdef CONFIG_CGROUPS
2376static inline void threadgroup_change_begin(struct task_struct *tsk) 2377static inline void threadgroup_change_begin(struct task_struct *tsk)
2377{ 2378{
@@ -2381,13 +2382,47 @@ static inline void threadgroup_change_end(struct task_struct *tsk)
2381{ 2382{
2382 up_read(&tsk->signal->group_rwsem); 2383 up_read(&tsk->signal->group_rwsem);
2383} 2384}
2385
2386/**
2387 * threadgroup_lock - lock threadgroup
2388 * @tsk: member task of the threadgroup to lock
2389 *
2390 * Lock the threadgroup @tsk belongs to. No new task is allowed to enter
2391 * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
2392 * perform exec. This is useful for cases where the threadgroup needs to
2393 * stay stable across blockable operations.
2394 *
2395 * fork and exit paths explicitly call threadgroup_change_{begin|end}() for
2396 * synchronization. While held, no new task will be added to threadgroup
2397 * and no existing live task will have its PF_EXITING set.
2398 *
2399 * During exec, a task goes and puts its thread group through unusual
2400 * changes. After de-threading, exclusive access is assumed to resources
2401 * which are usually shared by tasks in the same group - e.g. sighand may
2402 * be replaced with a new one. Also, the exec'ing task takes over group
2403 * leader role including its pid. Exclude these changes while locked by
2404 * grabbing cred_guard_mutex which is used to synchronize exec path.
2405 */
2384static inline void threadgroup_lock(struct task_struct *tsk) 2406static inline void threadgroup_lock(struct task_struct *tsk)
2385{ 2407{
2408 /*
2409 * exec uses exit for de-threading nesting group_rwsem inside
2410 * cred_guard_mutex. Grab cred_guard_mutex first.
2411 */
2412 mutex_lock(&tsk->signal->cred_guard_mutex);
2386 down_write(&tsk->signal->group_rwsem); 2413 down_write(&tsk->signal->group_rwsem);
2387} 2414}
2415
2416/**
2417 * threadgroup_unlock - unlock threadgroup
2418 * @tsk: member task of the threadgroup to unlock
2419 *
2420 * Reverse threadgroup_lock().
2421 */
2388static inline void threadgroup_unlock(struct task_struct *tsk) 2422static inline void threadgroup_unlock(struct task_struct *tsk)
2389{ 2423{
2390 up_write(&tsk->signal->group_rwsem); 2424 up_write(&tsk->signal->group_rwsem);
2425 mutex_unlock(&tsk->signal->cred_guard_mutex);
2391} 2426}
2392#else 2427#else
2393static inline void threadgroup_change_begin(struct task_struct *tsk) {} 2428static inline void threadgroup_change_begin(struct task_struct *tsk) {}
diff --git a/kernel/signal.c b/kernel/signal.c
index b3f78d09a105..399c184bf0ae 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2359,8 +2359,15 @@ void exit_signals(struct task_struct *tsk)
2359 int group_stop = 0; 2359 int group_stop = 0;
2360 sigset_t unblocked; 2360 sigset_t unblocked;
2361 2361
2362 /*
2363 * @tsk is about to have PF_EXITING set - lock out users which
2364 * expect stable threadgroup.
2365 */
2366 threadgroup_change_begin(tsk);
2367
2362 if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { 2368 if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
2363 tsk->flags |= PF_EXITING; 2369 tsk->flags |= PF_EXITING;
2370 threadgroup_change_end(tsk);
2364 return; 2371 return;
2365 } 2372 }
2366 2373
@@ -2370,6 +2377,9 @@ void exit_signals(struct task_struct *tsk)
2370 * see wants_signal(), do_signal_stop(). 2377 * see wants_signal(), do_signal_stop().
2371 */ 2378 */
2372 tsk->flags |= PF_EXITING; 2379 tsk->flags |= PF_EXITING;
2380
2381 threadgroup_change_end(tsk);
2382
2373 if (!signal_pending(tsk)) 2383 if (!signal_pending(tsk))
2374 goto out; 2384 goto out;
2375 2385