diff options
author | Ben Blum <bblum@andrew.cmu.edu> | 2011-05-26 19:25:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 20:12:34 -0400 |
commit | 4714d1d32d97239fb5ae3e10521d3f133a899b66 (patch) | |
tree | fb50707cefc386bf4e87cac9661a38dcfe3192df | |
parent | dcb3a08e69629ea65a3e9647da730bfaf670497d (diff) |
cgroups: read-write lock CLONE_THREAD forking per threadgroup
Adds functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup
Add an rwsem that lives in a threadgroup's signal_struct that's taken for
reading in the fork path, under CONFIG_CGROUPS. If another part of the
kernel later wants to use such a locking mechanism, the CONFIG_CGROUPS
ifdefs should be changed to a higher-up flag that CGROUPS and the other
system would both depend on.
This is a pre-patch for cgroup-procs-write.patch.
Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/init_task.h | 9 | ||||
-rw-r--r-- | include/linux/sched.h | 36 | ||||
-rw-r--r-- | kernel/fork.c | 10 |
3 files changed, 55 insertions, 0 deletions
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index bafc58c00fc..580f70c0239 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -22,6 +22,14 @@ | |||
22 | extern struct files_struct init_files; | 22 | extern struct files_struct init_files; |
23 | extern struct fs_struct init_fs; | 23 | extern struct fs_struct init_fs; |
24 | 24 | ||
25 | #ifdef CONFIG_CGROUPS | ||
26 | #define INIT_THREADGROUP_FORK_LOCK(sig) \ | ||
27 | .threadgroup_fork_lock = \ | ||
28 | __RWSEM_INITIALIZER(sig.threadgroup_fork_lock), | ||
29 | #else | ||
30 | #define INIT_THREADGROUP_FORK_LOCK(sig) | ||
31 | #endif | ||
32 | |||
25 | #define INIT_SIGNALS(sig) { \ | 33 | #define INIT_SIGNALS(sig) { \ |
26 | .nr_threads = 1, \ | 34 | .nr_threads = 1, \ |
27 | .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ | 35 | .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ |
@@ -38,6 +46,7 @@ extern struct fs_struct init_fs; | |||
38 | }, \ | 46 | }, \ |
39 | .cred_guard_mutex = \ | 47 | .cred_guard_mutex = \ |
40 | __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ | 48 | __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ |
49 | INIT_THREADGROUP_FORK_LOCK(sig) \ | ||
41 | } | 50 | } |
42 | 51 | ||
43 | extern struct nsproxy init_nsproxy; | 52 | extern struct nsproxy init_nsproxy; |
diff --git a/include/linux/sched.h b/include/linux/sched.h index f18300eddfc..dc8871295a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -513,6 +513,7 @@ struct thread_group_cputimer { | |||
513 | spinlock_t lock; | 513 | spinlock_t lock; |
514 | }; | 514 | }; |
515 | 515 | ||
516 | #include <linux/rwsem.h> | ||
516 | struct autogroup; | 517 | struct autogroup; |
517 | 518 | ||
518 | /* | 519 | /* |
@@ -632,6 +633,16 @@ struct signal_struct { | |||
632 | unsigned audit_tty; | 633 | unsigned audit_tty; |
633 | struct tty_audit_buf *tty_audit_buf; | 634 | struct tty_audit_buf *tty_audit_buf; |
634 | #endif | 635 | #endif |
636 | #ifdef CONFIG_CGROUPS | ||
637 | /* | ||
638 | * The threadgroup_fork_lock prevents threads from forking with | ||
639 | * CLONE_THREAD while held for writing. Use this for fork-sensitive | ||
640 | * threadgroup-wide operations. It's taken for reading in fork.c in | ||
641 | * copy_process(). | ||
642 | * Currently only needed write-side by cgroups. | ||
643 | */ | ||
644 | struct rw_semaphore threadgroup_fork_lock; | ||
645 | #endif | ||
635 | 646 | ||
636 | int oom_adj; /* OOM kill score adjustment (bit shift) */ | 647 | int oom_adj; /* OOM kill score adjustment (bit shift) */ |
637 | int oom_score_adj; /* OOM kill score adjustment */ | 648 | int oom_score_adj; /* OOM kill score adjustment */ |
@@ -2323,6 +2334,31 @@ static inline void unlock_task_sighand(struct task_struct *tsk, | |||
2323 | spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); | 2334 | spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); |
2324 | } | 2335 | } |
2325 | 2336 | ||
2337 | /* See the declaration of threadgroup_fork_lock in signal_struct. */ | ||
2338 | #ifdef CONFIG_CGROUPS | ||
2339 | static inline void threadgroup_fork_read_lock(struct task_struct *tsk) | ||
2340 | { | ||
2341 | down_read(&tsk->signal->threadgroup_fork_lock); | ||
2342 | } | ||
2343 | static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) | ||
2344 | { | ||
2345 | up_read(&tsk->signal->threadgroup_fork_lock); | ||
2346 | } | ||
2347 | static inline void threadgroup_fork_write_lock(struct task_struct *tsk) | ||
2348 | { | ||
2349 | down_write(&tsk->signal->threadgroup_fork_lock); | ||
2350 | } | ||
2351 | static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) | ||
2352 | { | ||
2353 | up_write(&tsk->signal->threadgroup_fork_lock); | ||
2354 | } | ||
2355 | #else | ||
2356 | static inline void threadgroup_fork_read_lock(struct task_struct *tsk) {} | ||
2357 | static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) {} | ||
2358 | static inline void threadgroup_fork_write_lock(struct task_struct *tsk) {} | ||
2359 | static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) {} | ||
2360 | #endif | ||
2361 | |||
2326 | #ifndef __HAVE_THREAD_FUNCTIONS | 2362 | #ifndef __HAVE_THREAD_FUNCTIONS |
2327 | 2363 | ||
2328 | #define task_thread_info(task) ((struct thread_info *)(task)->stack) | 2364 | #define task_thread_info(task) ((struct thread_info *)(task)->stack) |
diff --git a/kernel/fork.c b/kernel/fork.c index 8e7e135d081..1fa9d940e30 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -957,6 +957,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
957 | tty_audit_fork(sig); | 957 | tty_audit_fork(sig); |
958 | sched_autogroup_fork(sig); | 958 | sched_autogroup_fork(sig); |
959 | 959 | ||
960 | #ifdef CONFIG_CGROUPS | ||
961 | init_rwsem(&sig->threadgroup_fork_lock); | ||
962 | #endif | ||
963 | |||
960 | sig->oom_adj = current->signal->oom_adj; | 964 | sig->oom_adj = current->signal->oom_adj; |
961 | sig->oom_score_adj = current->signal->oom_score_adj; | 965 | sig->oom_score_adj = current->signal->oom_score_adj; |
962 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; | 966 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; |
@@ -1138,6 +1142,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1138 | monotonic_to_bootbased(&p->real_start_time); | 1142 | monotonic_to_bootbased(&p->real_start_time); |
1139 | p->io_context = NULL; | 1143 | p->io_context = NULL; |
1140 | p->audit_context = NULL; | 1144 | p->audit_context = NULL; |
1145 | if (clone_flags & CLONE_THREAD) | ||
1146 | threadgroup_fork_read_lock(current); | ||
1141 | cgroup_fork(p); | 1147 | cgroup_fork(p); |
1142 | #ifdef CONFIG_NUMA | 1148 | #ifdef CONFIG_NUMA |
1143 | p->mempolicy = mpol_dup(p->mempolicy); | 1149 | p->mempolicy = mpol_dup(p->mempolicy); |
@@ -1342,6 +1348,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1342 | write_unlock_irq(&tasklist_lock); | 1348 | write_unlock_irq(&tasklist_lock); |
1343 | proc_fork_connector(p); | 1349 | proc_fork_connector(p); |
1344 | cgroup_post_fork(p); | 1350 | cgroup_post_fork(p); |
1351 | if (clone_flags & CLONE_THREAD) | ||
1352 | threadgroup_fork_read_unlock(current); | ||
1345 | perf_event_fork(p); | 1353 | perf_event_fork(p); |
1346 | return p; | 1354 | return p; |
1347 | 1355 | ||
@@ -1380,6 +1388,8 @@ bad_fork_cleanup_policy: | |||
1380 | mpol_put(p->mempolicy); | 1388 | mpol_put(p->mempolicy); |
1381 | bad_fork_cleanup_cgroup: | 1389 | bad_fork_cleanup_cgroup: |
1382 | #endif | 1390 | #endif |
1391 | if (clone_flags & CLONE_THREAD) | ||
1392 | threadgroup_fork_read_unlock(current); | ||
1383 | cgroup_exit(p, cgroup_callbacks_done); | 1393 | cgroup_exit(p, cgroup_callbacks_done); |
1384 | delayacct_tsk_free(p); | 1394 | delayacct_tsk_free(p); |
1385 | module_put(task_thread_info(p)->exec_domain->module); | 1395 | module_put(task_thread_info(p)->exec_domain->module); |