diff options
author | Tejun Heo <tj@kernel.org> | 2011-12-12 21:12:21 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-12-12 21:12:21 -0500 |
commit | 77e4ef99d1c596a31747668e5fd837f77b6349b6 (patch) | |
tree | b9dcc8d73f9febf6a18edc0b2efa3ecddc2a7a35 | |
parent | 257058ae2b971646b96ab3a15605ac69186e562a (diff) |
threadgroup: extend threadgroup_lock() to cover exit and exec
threadgroup_lock() protected only protected against new addition to
the threadgroup, which was inherently somewhat incomplete and
problematic for its only user cgroup. On-going migration could race
against exec and exit leading to interesting problems - the symmetry
between various attach methods, task exiting during method execution,
->exit() racing against attach methods, migrating task switching basic
properties during exec and so on.
This patch extends threadgroup_lock() such that it protects against
all three threadgroup altering operations - fork, exit and exec. For
exit, threadgroup_change_begin/end() calls are added to exit_signals
around assertion of PF_EXITING. For exec, threadgroup_[un]lock() are
updated to also grab and release cred_guard_mutex.
With this change, threadgroup_lock() guarantees that the target
threadgroup will remain stable - no new task will be added, no new
PF_EXITING will be set and exec won't happen.
The next patch will update cgroup so that it can take full advantage
of this change.
-v2: beefed up comment as suggested by Frederic.
-v3: narrowed scope of protection in exit path as suggested by
Frederic.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Menage <paul@paulmenage.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/sched.h | 47 | ||||
-rw-r--r-- | kernel/signal.c | 10 |
2 files changed, 51 insertions, 6 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 8cd523202a3b..c0c5876c52c0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -635,11 +635,13 @@ struct signal_struct { | |||
635 | #endif | 635 | #endif |
636 | #ifdef CONFIG_CGROUPS | 636 | #ifdef CONFIG_CGROUPS |
637 | /* | 637 | /* |
638 | * The group_rwsem prevents threads from forking with | 638 | * group_rwsem prevents new tasks from entering the threadgroup and |
639 | * CLONE_THREAD while held for writing. Use this for fork-sensitive | 639 | * member tasks from exiting,a more specifically, setting of |
640 | * threadgroup-wide operations. It's taken for reading in fork.c in | 640 | * PF_EXITING. fork and exit paths are protected with this rwsem |
641 | * copy_process(). | 641 | * using threadgroup_change_begin/end(). Users which require |
642 | * Currently only needed write-side by cgroups. | 642 | * threadgroup to remain stable should use threadgroup_[un]lock() |
643 | * which also takes care of exec path. Currently, cgroup is the | ||
644 | * only user. | ||
643 | */ | 645 | */ |
644 | struct rw_semaphore group_rwsem; | 646 | struct rw_semaphore group_rwsem; |
645 | #endif | 647 | #endif |
@@ -2371,7 +2373,6 @@ static inline void unlock_task_sighand(struct task_struct *tsk, | |||
2371 | spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); | 2373 | spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); |
2372 | } | 2374 | } |
2373 | 2375 | ||
2374 | /* See the declaration of group_rwsem in signal_struct. */ | ||
2375 | #ifdef CONFIG_CGROUPS | 2376 | #ifdef CONFIG_CGROUPS |
2376 | static inline void threadgroup_change_begin(struct task_struct *tsk) | 2377 | static inline void threadgroup_change_begin(struct task_struct *tsk) |
2377 | { | 2378 | { |
@@ -2381,13 +2382,47 @@ static inline void threadgroup_change_end(struct task_struct *tsk) | |||
2381 | { | 2382 | { |
2382 | up_read(&tsk->signal->group_rwsem); | 2383 | up_read(&tsk->signal->group_rwsem); |
2383 | } | 2384 | } |
2385 | |||
2386 | /** | ||
2387 | * threadgroup_lock - lock threadgroup | ||
2388 | * @tsk: member task of the threadgroup to lock | ||
2389 | * | ||
2390 | * Lock the threadgroup @tsk belongs to. No new task is allowed to enter | ||
2391 | * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or | ||
2392 | * perform exec. This is useful for cases where the threadgroup needs to | ||
2393 | * stay stable across blockable operations. | ||
2394 | * | ||
2395 | * fork and exit paths explicitly call threadgroup_change_{begin|end}() for | ||
2396 | * synchronization. While held, no new task will be added to threadgroup | ||
2397 | * and no existing live task will have its PF_EXITING set. | ||
2398 | * | ||
2399 | * During exec, a task goes and puts its thread group through unusual | ||
2400 | * changes. After de-threading, exclusive access is assumed to resources | ||
2401 | * which are usually shared by tasks in the same group - e.g. sighand may | ||
2402 | * be replaced with a new one. Also, the exec'ing task takes over group | ||
2403 | * leader role including its pid. Exclude these changes while locked by | ||
2404 | * grabbing cred_guard_mutex which is used to synchronize exec path. | ||
2405 | */ | ||
2384 | static inline void threadgroup_lock(struct task_struct *tsk) | 2406 | static inline void threadgroup_lock(struct task_struct *tsk) |
2385 | { | 2407 | { |
2408 | /* | ||
2409 | * exec uses exit for de-threading nesting group_rwsem inside | ||
2410 | * cred_guard_mutex. Grab cred_guard_mutex first. | ||
2411 | */ | ||
2412 | mutex_lock(&tsk->signal->cred_guard_mutex); | ||
2386 | down_write(&tsk->signal->group_rwsem); | 2413 | down_write(&tsk->signal->group_rwsem); |
2387 | } | 2414 | } |
2415 | |||
2416 | /** | ||
2417 | * threadgroup_unlock - unlock threadgroup | ||
2418 | * @tsk: member task of the threadgroup to unlock | ||
2419 | * | ||
2420 | * Reverse threadgroup_lock(). | ||
2421 | */ | ||
2388 | static inline void threadgroup_unlock(struct task_struct *tsk) | 2422 | static inline void threadgroup_unlock(struct task_struct *tsk) |
2389 | { | 2423 | { |
2390 | up_write(&tsk->signal->group_rwsem); | 2424 | up_write(&tsk->signal->group_rwsem); |
2425 | mutex_unlock(&tsk->signal->cred_guard_mutex); | ||
2391 | } | 2426 | } |
2392 | #else | 2427 | #else |
2393 | static inline void threadgroup_change_begin(struct task_struct *tsk) {} | 2428 | static inline void threadgroup_change_begin(struct task_struct *tsk) {} |
diff --git a/kernel/signal.c b/kernel/signal.c index b3f78d09a105..399c184bf0ae 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -2359,8 +2359,15 @@ void exit_signals(struct task_struct *tsk) | |||
2359 | int group_stop = 0; | 2359 | int group_stop = 0; |
2360 | sigset_t unblocked; | 2360 | sigset_t unblocked; |
2361 | 2361 | ||
2362 | /* | ||
2363 | * @tsk is about to have PF_EXITING set - lock out users which | ||
2364 | * expect stable threadgroup. | ||
2365 | */ | ||
2366 | threadgroup_change_begin(tsk); | ||
2367 | |||
2362 | if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { | 2368 | if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { |
2363 | tsk->flags |= PF_EXITING; | 2369 | tsk->flags |= PF_EXITING; |
2370 | threadgroup_change_end(tsk); | ||
2364 | return; | 2371 | return; |
2365 | } | 2372 | } |
2366 | 2373 | ||
@@ -2370,6 +2377,9 @@ void exit_signals(struct task_struct *tsk) | |||
2370 | * see wants_signal(), do_signal_stop(). | 2377 | * see wants_signal(), do_signal_stop(). |
2371 | */ | 2378 | */ |
2372 | tsk->flags |= PF_EXITING; | 2379 | tsk->flags |= PF_EXITING; |
2380 | |||
2381 | threadgroup_change_end(tsk); | ||
2382 | |||
2373 | if (!signal_pending(tsk)) | 2383 | if (!signal_pending(tsk)) |
2374 | goto out; | 2384 | goto out; |
2375 | 2385 | ||