diff options
-rw-r--r-- | include/linux/cgroup-defs.h | 27 | ||||
-rw-r--r-- | include/linux/init_task.h | 8 | ||||
-rw-r--r-- | include/linux/sched.h | 12 | ||||
-rw-r--r-- | init/Kconfig | 1 | ||||
-rw-r--r-- | kernel/cgroup.c | 77 | ||||
-rw-r--r-- | kernel/fork.c | 4 |
6 files changed, 46 insertions, 83 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1b8c93806dbd..7d83d7f73420 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h | |||
@@ -461,8 +461,31 @@ struct cgroup_subsys { | |||
461 | unsigned int depends_on; | 461 | unsigned int depends_on; |
462 | }; | 462 | }; |
463 | 463 | ||
464 | void cgroup_threadgroup_change_begin(struct task_struct *tsk); | 464 | extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; |
465 | void cgroup_threadgroup_change_end(struct task_struct *tsk); | 465 | |
466 | /** | ||
467 | * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups | ||
468 | * @tsk: target task | ||
469 | * | ||
470 | * Called from threadgroup_change_begin() and allows cgroup operations to | ||
471 | * synchronize against threadgroup changes using a percpu_rw_semaphore. | ||
472 | */ | ||
473 | static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) | ||
474 | { | ||
475 | percpu_down_read(&cgroup_threadgroup_rwsem); | ||
476 | } | ||
477 | |||
478 | /** | ||
479 | * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups | ||
480 | * @tsk: target task | ||
481 | * | ||
482 | * Called from threadgroup_change_end(). Counterpart of | ||
483 | * cgroup_threadcgroup_change_begin(). | ||
484 | */ | ||
485 | static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) | ||
486 | { | ||
487 | percpu_up_read(&cgroup_threadgroup_rwsem); | ||
488 | } | ||
466 | 489 | ||
467 | #else /* CONFIG_CGROUPS */ | 490 | #else /* CONFIG_CGROUPS */ |
468 | 491 | ||
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 696d22312b31..0cc0bbf20022 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -25,13 +25,6 @@ | |||
25 | extern struct files_struct init_files; | 25 | extern struct files_struct init_files; |
26 | extern struct fs_struct init_fs; | 26 | extern struct fs_struct init_fs; |
27 | 27 | ||
28 | #ifdef CONFIG_CGROUPS | ||
29 | #define INIT_GROUP_RWSEM(sig) \ | ||
30 | .group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem), | ||
31 | #else | ||
32 | #define INIT_GROUP_RWSEM(sig) | ||
33 | #endif | ||
34 | |||
35 | #ifdef CONFIG_CPUSETS | 28 | #ifdef CONFIG_CPUSETS |
36 | #define INIT_CPUSET_SEQ(tsk) \ | 29 | #define INIT_CPUSET_SEQ(tsk) \ |
37 | .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), | 30 | .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), |
@@ -56,7 +49,6 @@ extern struct fs_struct init_fs; | |||
56 | }, \ | 49 | }, \ |
57 | .cred_guard_mutex = \ | 50 | .cred_guard_mutex = \ |
58 | __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ | 51 | __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ |
59 | INIT_GROUP_RWSEM(sig) \ | ||
60 | } | 52 | } |
61 | 53 | ||
62 | extern struct nsproxy init_nsproxy; | 54 | extern struct nsproxy init_nsproxy; |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 5ee290003470..add524a910bd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -743,18 +743,6 @@ struct signal_struct { | |||
743 | unsigned audit_tty_log_passwd; | 743 | unsigned audit_tty_log_passwd; |
744 | struct tty_audit_buf *tty_audit_buf; | 744 | struct tty_audit_buf *tty_audit_buf; |
745 | #endif | 745 | #endif |
746 | #ifdef CONFIG_CGROUPS | ||
747 | /* | ||
748 | * group_rwsem prevents new tasks from entering the threadgroup and | ||
749 | * member tasks from exiting,a more specifically, setting of | ||
750 | * PF_EXITING. fork and exit paths are protected with this rwsem | ||
751 | * using threadgroup_change_begin/end(). Users which require | ||
752 | * threadgroup to remain stable should use threadgroup_[un]lock() | ||
753 | * which also takes care of exec path. Currently, cgroup is the | ||
754 | * only user. | ||
755 | */ | ||
756 | struct rw_semaphore group_rwsem; | ||
757 | #endif | ||
758 | 746 | ||
759 | oom_flags_t oom_flags; | 747 | oom_flags_t oom_flags; |
760 | short oom_score_adj; /* OOM kill score adjustment */ | 748 | short oom_score_adj; /* OOM kill score adjustment */ |
diff --git a/init/Kconfig b/init/Kconfig index dc24dec60232..b9b824bf8f6b 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -938,6 +938,7 @@ config NUMA_BALANCING_DEFAULT_ENABLED | |||
938 | menuconfig CGROUPS | 938 | menuconfig CGROUPS |
939 | bool "Control Group support" | 939 | bool "Control Group support" |
940 | select KERNFS | 940 | select KERNFS |
941 | select PERCPU_RWSEM | ||
941 | help | 942 | help |
942 | This option adds support for grouping sets of processes together, for | 943 | This option adds support for grouping sets of processes together, for |
943 | use with process control subsystems such as Cpusets, CFS, memory | 944 | use with process control subsystems such as Cpusets, CFS, memory |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 980b1f52f39f..77578a169b8c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <linux/slab.h> | 46 | #include <linux/slab.h> |
47 | #include <linux/spinlock.h> | 47 | #include <linux/spinlock.h> |
48 | #include <linux/rwsem.h> | 48 | #include <linux/rwsem.h> |
49 | #include <linux/percpu-rwsem.h> | ||
49 | #include <linux/string.h> | 50 | #include <linux/string.h> |
50 | #include <linux/sort.h> | 51 | #include <linux/sort.h> |
51 | #include <linux/kmod.h> | 52 | #include <linux/kmod.h> |
@@ -103,6 +104,8 @@ static DEFINE_SPINLOCK(cgroup_idr_lock); | |||
103 | */ | 104 | */ |
104 | static DEFINE_SPINLOCK(release_agent_path_lock); | 105 | static DEFINE_SPINLOCK(release_agent_path_lock); |
105 | 106 | ||
107 | struct percpu_rw_semaphore cgroup_threadgroup_rwsem; | ||
108 | |||
106 | #define cgroup_assert_mutex_or_rcu_locked() \ | 109 | #define cgroup_assert_mutex_or_rcu_locked() \ |
107 | rcu_lockdep_assert(rcu_read_lock_held() || \ | 110 | rcu_lockdep_assert(rcu_read_lock_held() || \ |
108 | lockdep_is_held(&cgroup_mutex), \ | 111 | lockdep_is_held(&cgroup_mutex), \ |
@@ -848,48 +851,6 @@ static struct css_set *find_css_set(struct css_set *old_cset, | |||
848 | return cset; | 851 | return cset; |
849 | } | 852 | } |
850 | 853 | ||
851 | void cgroup_threadgroup_change_begin(struct task_struct *tsk) | ||
852 | { | ||
853 | down_read(&tsk->signal->group_rwsem); | ||
854 | } | ||
855 | |||
856 | void cgroup_threadgroup_change_end(struct task_struct *tsk) | ||
857 | { | ||
858 | up_read(&tsk->signal->group_rwsem); | ||
859 | } | ||
860 | |||
861 | /** | ||
862 | * threadgroup_lock - lock threadgroup | ||
863 | * @tsk: member task of the threadgroup to lock | ||
864 | * | ||
865 | * Lock the threadgroup @tsk belongs to. No new task is allowed to enter | ||
866 | * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or | ||
867 | * change ->group_leader/pid. This is useful for cases where the threadgroup | ||
868 | * needs to stay stable across blockable operations. | ||
869 | * | ||
870 | * fork and exit explicitly call threadgroup_change_{begin|end}() for | ||
871 | * synchronization. While held, no new task will be added to threadgroup | ||
872 | * and no existing live task will have its PF_EXITING set. | ||
873 | * | ||
874 | * de_thread() does threadgroup_change_{begin|end}() when a non-leader | ||
875 | * sub-thread becomes a new leader. | ||
876 | */ | ||
877 | static void threadgroup_lock(struct task_struct *tsk) | ||
878 | { | ||
879 | down_write(&tsk->signal->group_rwsem); | ||
880 | } | ||
881 | |||
882 | /** | ||
883 | * threadgroup_unlock - unlock threadgroup | ||
884 | * @tsk: member task of the threadgroup to unlock | ||
885 | * | ||
886 | * Reverse threadgroup_lock(). | ||
887 | */ | ||
888 | static inline void threadgroup_unlock(struct task_struct *tsk) | ||
889 | { | ||
890 | up_write(&tsk->signal->group_rwsem); | ||
891 | } | ||
892 | |||
893 | static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root) | 854 | static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root) |
894 | { | 855 | { |
895 | struct cgroup *root_cgrp = kf_root->kn->priv; | 856 | struct cgroup *root_cgrp = kf_root->kn->priv; |
@@ -2095,9 +2056,9 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp, | |||
2095 | lockdep_assert_held(&css_set_rwsem); | 2056 | lockdep_assert_held(&css_set_rwsem); |
2096 | 2057 | ||
2097 | /* | 2058 | /* |
2098 | * We are synchronized through threadgroup_lock() against PF_EXITING | 2059 | * We are synchronized through cgroup_threadgroup_rwsem against |
2099 | * setting such that we can't race against cgroup_exit() changing the | 2060 | * PF_EXITING setting such that we can't race against cgroup_exit() |
2100 | * css_set to init_css_set and dropping the old one. | 2061 | * changing the css_set to init_css_set and dropping the old one. |
2101 | */ | 2062 | */ |
2102 | WARN_ON_ONCE(tsk->flags & PF_EXITING); | 2063 | WARN_ON_ONCE(tsk->flags & PF_EXITING); |
2103 | old_cset = task_css_set(tsk); | 2064 | old_cset = task_css_set(tsk); |
@@ -2154,10 +2115,11 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) | |||
2154 | * @src_cset and add it to @preloaded_csets, which should later be cleaned | 2115 | * @src_cset and add it to @preloaded_csets, which should later be cleaned |
2155 | * up by cgroup_migrate_finish(). | 2116 | * up by cgroup_migrate_finish(). |
2156 | * | 2117 | * |
2157 | * This function may be called without holding threadgroup_lock even if the | 2118 | * This function may be called without holding cgroup_threadgroup_rwsem |
2158 | * target is a process. Threads may be created and destroyed but as long | 2119 | * even if the target is a process. Threads may be created and destroyed |
2159 | * as cgroup_mutex is not dropped, no new css_set can be put into play and | 2120 | * but as long as cgroup_mutex is not dropped, no new css_set can be put |
2160 | * the preloaded css_sets are guaranteed to cover all migrations. | 2121 | * into play and the preloaded css_sets are guaranteed to cover all |
2122 | * migrations. | ||
2161 | */ | 2123 | */ |
2162 | static void cgroup_migrate_add_src(struct css_set *src_cset, | 2124 | static void cgroup_migrate_add_src(struct css_set *src_cset, |
2163 | struct cgroup *dst_cgrp, | 2125 | struct cgroup *dst_cgrp, |
@@ -2260,7 +2222,7 @@ err: | |||
2260 | * @threadgroup: whether @leader points to the whole process or a single task | 2222 | * @threadgroup: whether @leader points to the whole process or a single task |
2261 | * | 2223 | * |
2262 | * Migrate a process or task denoted by @leader to @cgrp. If migrating a | 2224 | * Migrate a process or task denoted by @leader to @cgrp. If migrating a |
2263 | * process, the caller must be holding threadgroup_lock of @leader. The | 2225 | * process, the caller must be holding cgroup_threadgroup_rwsem. The |
2264 | * caller is also responsible for invoking cgroup_migrate_add_src() and | 2226 | * caller is also responsible for invoking cgroup_migrate_add_src() and |
2265 | * cgroup_migrate_prepare_dst() on the targets before invoking this | 2227 | * cgroup_migrate_prepare_dst() on the targets before invoking this |
2266 | * function and following up with cgroup_migrate_finish(). | 2228 | * function and following up with cgroup_migrate_finish(). |
@@ -2388,7 +2350,7 @@ out_release_tset: | |||
2388 | * @leader: the task or the leader of the threadgroup to be attached | 2350 | * @leader: the task or the leader of the threadgroup to be attached |
2389 | * @threadgroup: attach the whole threadgroup? | 2351 | * @threadgroup: attach the whole threadgroup? |
2390 | * | 2352 | * |
2391 | * Call holding cgroup_mutex and threadgroup_lock of @leader. | 2353 | * Call holding cgroup_mutex and cgroup_threadgroup_rwsem. |
2392 | */ | 2354 | */ |
2393 | static int cgroup_attach_task(struct cgroup *dst_cgrp, | 2355 | static int cgroup_attach_task(struct cgroup *dst_cgrp, |
2394 | struct task_struct *leader, bool threadgroup) | 2356 | struct task_struct *leader, bool threadgroup) |
@@ -2481,7 +2443,7 @@ retry_find_task: | |||
2481 | get_task_struct(tsk); | 2443 | get_task_struct(tsk); |
2482 | rcu_read_unlock(); | 2444 | rcu_read_unlock(); |
2483 | 2445 | ||
2484 | threadgroup_lock(tsk); | 2446 | percpu_down_write(&cgroup_threadgroup_rwsem); |
2485 | if (threadgroup) { | 2447 | if (threadgroup) { |
2486 | if (!thread_group_leader(tsk)) { | 2448 | if (!thread_group_leader(tsk)) { |
2487 | /* | 2449 | /* |
@@ -2491,7 +2453,7 @@ retry_find_task: | |||
2491 | * try again; this is | 2453 | * try again; this is |
2492 | * "double-double-toil-and-trouble-check locking". | 2454 | * "double-double-toil-and-trouble-check locking". |
2493 | */ | 2455 | */ |
2494 | threadgroup_unlock(tsk); | 2456 | percpu_up_write(&cgroup_threadgroup_rwsem); |
2495 | put_task_struct(tsk); | 2457 | put_task_struct(tsk); |
2496 | goto retry_find_task; | 2458 | goto retry_find_task; |
2497 | } | 2459 | } |
@@ -2499,7 +2461,7 @@ retry_find_task: | |||
2499 | 2461 | ||
2500 | ret = cgroup_attach_task(cgrp, tsk, threadgroup); | 2462 | ret = cgroup_attach_task(cgrp, tsk, threadgroup); |
2501 | 2463 | ||
2502 | threadgroup_unlock(tsk); | 2464 | percpu_up_write(&cgroup_threadgroup_rwsem); |
2503 | 2465 | ||
2504 | put_task_struct(tsk); | 2466 | put_task_struct(tsk); |
2505 | out_unlock_cgroup: | 2467 | out_unlock_cgroup: |
@@ -2704,17 +2666,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
2704 | goto out_finish; | 2666 | goto out_finish; |
2705 | last_task = task; | 2667 | last_task = task; |
2706 | 2668 | ||
2707 | threadgroup_lock(task); | 2669 | percpu_down_write(&cgroup_threadgroup_rwsem); |
2708 | /* raced against de_thread() from another thread? */ | 2670 | /* raced against de_thread() from another thread? */ |
2709 | if (!thread_group_leader(task)) { | 2671 | if (!thread_group_leader(task)) { |
2710 | threadgroup_unlock(task); | 2672 | percpu_up_write(&cgroup_threadgroup_rwsem); |
2711 | put_task_struct(task); | 2673 | put_task_struct(task); |
2712 | continue; | 2674 | continue; |
2713 | } | 2675 | } |
2714 | 2676 | ||
2715 | ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); | 2677 | ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); |
2716 | 2678 | ||
2717 | threadgroup_unlock(task); | 2679 | percpu_up_write(&cgroup_threadgroup_rwsem); |
2718 | put_task_struct(task); | 2680 | put_task_struct(task); |
2719 | 2681 | ||
2720 | if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) | 2682 | if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) |
@@ -5032,6 +4994,7 @@ int __init cgroup_init(void) | |||
5032 | unsigned long key; | 4994 | unsigned long key; |
5033 | int ssid, err; | 4995 | int ssid, err; |
5034 | 4996 | ||
4997 | BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem)); | ||
5035 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); | 4998 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); |
5036 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); | 4999 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); |
5037 | 5000 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 03c1eaaa6ef5..9531275e12a9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1144,10 +1144,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
1144 | tty_audit_fork(sig); | 1144 | tty_audit_fork(sig); |
1145 | sched_autogroup_fork(sig); | 1145 | sched_autogroup_fork(sig); |
1146 | 1146 | ||
1147 | #ifdef CONFIG_CGROUPS | ||
1148 | init_rwsem(&sig->group_rwsem); | ||
1149 | #endif | ||
1150 | |||
1151 | sig->oom_score_adj = current->signal->oom_score_adj; | 1147 | sig->oom_score_adj = current->signal->oom_score_adj; |
1152 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; | 1148 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; |
1153 | 1149 | ||