cgroups: revamp subsys array

This patch series provides the ability for cgroup subsystems to be compiled as modules both within and outside the kernel tree. This is mainly useful for classifiers and subsystems that hook into components that are already modules. cls_cgroup and blkio-cgroup serve as the example use cases for this feature. It provides an interface cgroup_load_subsys() and cgroup_unload_subsys() which modular subsystems can use to register and depart during runtime. The net_cls classifier subsystem serves as the example for a subsystem which can be converted into a module using these changes. Patch #1 sets up the subsys[] array so its contents can be dynamic as modules appear and (eventually) disappear. Iterations over the array are modified to handle when subsystems are absent, and the dynamic section of the array is protected by cgroup_mutex. Patch #2 implements an interface for modules to load subsystems, called cgroup_load_subsys, similar to cgroup_init_subsys, and adds a module pointer in struct cgroup_subsys. Patch #3 adds a mechanism for unloading modular subsystems, which includes a more advanced rework of the rudimentary reference counting introduced in patch 2. Patch #4 modifies the net_cls subsystem, which already had some module declarations, to be configurable as a module, which also serves as a simple proof-of-concept. Part of implementing patches 2 and 4 involved updating css pointers in each css_set when the module appears or leaves. In doing this, it was discovered that css_sets always remain linked to the dummy cgroup, regardless of whether or not any subsystems are actually bound to it (i.e., not mounted on an actual hierarchy). The subsystem loading and unloading code therefore should keep in mind the special cases where the added subsystem is the only one in the dummy cgroup (and therefore all css_sets need to be linked back into it) and where the removed subsys was the only one in the dummy cgroup (and therefore all css_sets should be unlinked from it) - however, as all css_sets always stay attached to the dummy cgroup anyway, these cases are ignored. Any fix that addresses this issue should also make sure these cases are addressed in the subsystem loading and unloading code. This patch: Make subsys[] able to be dynamically populated to support modular subsystems This patch reworks the way the subsys[] array is used so that subsystems can register themselves after boot time, and enables the internals of cgroups to be able to handle when subsystems are not present or may appear/disappear. Signed-off-by: Ben Blum <bblum@andrew.cmu.edu> Acked-by: Li Zefan <lizf@cn.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Ben Blum <bblum@andrew.cmu.edu> 2010-03-10 18:22:07 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2010-03-12 18:52:36 -0500
commit: aae8aab40367036931608fdaf9e2dc568b516f19 (patch)
tree: b2a06ee21042eb3972ecd9e4153d61a8f6ed53cb /kernel
parent: d7b9fff711d5e8db8c844161c684017e556c38a0 (diff)
1 files changed, 80 insertions, 16 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index cace83ddbcdc..c92fb9549358 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -57,10 +57,14 @@
 static DEFINE_MUTEX(cgroup_mutex);
-/* Generate an array of cgroup subsystem pointers */
+/*
+ * Generate an array of cgroup subsystem pointers. At boot time, this is
+ * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are
+ * registered after that. The mutable section of this array is protected by
+ * cgroup_mutex.
+ */
 #define SUBSYS(_x) &_x ## _subsys,
+static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
-static struct cgroup_subsys *subsys[] = {
 #include <linux/cgroup_subsys.h>
 };
@@ -448,8 +452,11 @@ static struct css_set *find_existing_css_set(
        struct hlist_node *node;
        struct css_set *cg;
-        /* Built the set of subsystem state objects that we want to
+        /*
-         * see in the new css_set */
+         * Build the set of subsystem state objects that we want to see in the
+         * new css_set. while subsystems can change globally, the entries here
+         * won't change, so no need for locking.
+         */
        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
                if (root->subsys_bits & (1UL << i)) {
                        /* Subsystem is in this hierarchy. So we want
@@ -884,7 +891,9 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
        css_put(css);
 }
+/*
+ * Call with cgroup_mutex held.
+ */
 static int rebind_subsystems(struct cgroupfs_root *root,
                              unsigned long final_bits)
 {
@@ -892,6 +901,8 @@ static int rebind_subsystems(struct cgroupfs_root *root,
        struct cgroup *cgrp = &root->top_cgroup;
        int i;
+        BUG_ON(!mutex_is_locked(&cgroup_mutex));
        removed_bits = root->actual_subsys_bits & ~final_bits;
        added_bits = final_bits & ~root->actual_subsys_bits;
        /* Check that any added subsystems are currently free */
@@ -900,6 +911,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
                struct cgroup_subsys *ss = subsys[i];
                if (!(bit & added_bits))
                        continue;
+                /*
+                 * Nobody should tell us to do a subsys that doesn't exist:
+                 * parse_cgroupfs_options should catch that case and refcounts
+                 * ensure that subsystems won't disappear once selected.
+                 */
+                BUG_ON(ss == NULL);
                if (ss->root != &rootnode) {
                        /* Subsystem isn't free */
                        return -EBUSY;
@@ -919,6 +936,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
                unsigned long bit = 1UL << i;
                if (bit & added_bits) {
                        /* We're binding this subsystem to this hierarchy */
+                        BUG_ON(ss == NULL);
                        BUG_ON(cgrp->subsys[i]);
                        BUG_ON(!dummytop->subsys[i]);
                        BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
@@ -932,6 +950,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
                        mutex_unlock(&ss->hierarchy_mutex);
                } else if (bit & removed_bits) {
                        /* We're removing this subsystem */
+                        BUG_ON(ss == NULL);
                        BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
                        BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
                        mutex_lock(&ss->hierarchy_mutex);
@@ -944,6 +963,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
                        mutex_unlock(&ss->hierarchy_mutex);
                } else if (bit & final_bits) {
                        /* Subsystem state should already exist */
+                        BUG_ON(ss == NULL);
                        BUG_ON(!cgrp->subsys[i]);
                } else {
                        /* Subsystem state shouldn't exist */
@@ -986,14 +1006,18 @@ struct cgroup_sb_opts {
 };
-/* Convert a hierarchy specifier into a bitmask of subsystems and
+/*
- * flags. */
+ * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
+ * with cgroup_mutex held to protect the subsys[] array.
+ */
 static int parse_cgroupfs_options(char *data,
                                     struct cgroup_sb_opts *opts)
 {
        char *token, *o = data ?: "all";
        unsigned long mask = (unsigned long)-1;
+        BUG_ON(!mutex_is_locked(&cgroup_mutex));
 #ifdef CONFIG_CPUSETS
        mask = ~(1UL << cpuset_subsys_id);
 #endif
@@ -1009,6 +1033,8 @@ static int parse_cgroupfs_options(char *data,
                        opts->subsys_bits = 0;
                        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
                                struct cgroup_subsys *ss = subsys[i];
+                                if (ss == NULL)
+                                        continue;
                                if (!ss->disabled)
                                        opts->subsys_bits |= 1ul << i;
                        }
@@ -1053,6 +1079,8 @@ static int parse_cgroupfs_options(char *data,
                        int i;
                        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
                                ss = subsys[i];
+                                if (ss == NULL)
+                                        continue;
                                if (!strcmp(token, ss->name)) {
                                        if (!ss->disabled)
                                                set_bit(i, &opts->subsys_bits);
@@ -1306,7 +1334,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
        struct cgroupfs_root *new_root;
        /* First find the desired set of subsystems */
+        mutex_lock(&cgroup_mutex);
        ret = parse_cgroupfs_options(data, &opts);
+        mutex_unlock(&cgroup_mutex);
        if (ret)
                goto out_err;
@@ -2918,8 +2948,14 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
        /* We need to take each hierarchy_mutex in a consistent order */
        int i;
+        /*
+         * No worry about a race with rebind_subsystems that might mess up the
+         * locking order, since both parties are under cgroup_mutex.
+         */
        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
                struct cgroup_subsys *ss = subsys[i];
+                if (ss == NULL)
+                        continue;
                if (ss->root == root)
                        mutex_lock(&ss->hierarchy_mutex);
        }
@@ -2931,6 +2967,8 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
                struct cgroup_subsys *ss = subsys[i];
+                if (ss == NULL)
+                        continue;
                if (ss->root == root)
                        mutex_unlock(&ss->hierarchy_mutex);
        }
@@ -3054,11 +3092,16 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
         * synchronization other than RCU, and the subsystem linked
         * list isn't RCU-safe */
        int i;
+        /*
+         * We won't need to lock the subsys array, because the subsystems
+         * we're concerned about aren't going anywhere since our cgroup root
+         * has a reference on them.
+         */
        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
                struct cgroup_subsys *ss = subsys[i];
                struct cgroup_subsys_state *css;
-                /* Skip subsystems not in this hierarchy */
+                /* Skip subsystems not present or not in this hierarchy */
-                if (ss->root != cgrp->root)
+                if (ss == NULL || ss->root != cgrp->root)
                        continue;
                css = cgrp->subsys[ss->subsys_id];
                /* When called from check_for_release() it's possible
@@ -3279,7 +3322,8 @@ int __init cgroup_init_early(void)
        for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
                INIT_HLIST_HEAD(&css_set_table[i]);
-        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+        /* at bootup time, we don't worry about modular subsystems */
+        for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
                struct cgroup_subsys *ss = subsys[i];
                BUG_ON(!ss->name);
@@ -3314,7 +3358,8 @@ int __init cgroup_init(void)
        if (err)
                return err;
-        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+        /* at bootup time, we don't worry about modular subsystems */
+        for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
                struct cgroup_subsys *ss = subsys[i];
                if (!ss->early_init)
                        cgroup_init_subsys(ss);
@@ -3423,9 +3468,16 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
        int i;
        seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
+        /*
+         * ideally we don't want subsystems moving around while we do this.
+         * cgroup_mutex is also necessary to guarantee an atomic snapshot of
+         * subsys/hierarchy state.
+         */
        mutex_lock(&cgroup_mutex);
        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
                struct cgroup_subsys *ss = subsys[i];
+                if (ss == NULL)
+                        continue;
                seq_printf(m, "%s\t%d\t%d\t%d\n",
                           ss->name, ss->root->hierarchy_id,
                           ss->root->number_of_cgroups, !ss->disabled);
@@ -3483,7 +3535,12 @@ void cgroup_fork_callbacks(struct task_struct *child)
 {
        if (need_forkexit_callback) {
                int i;
-                for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+                /*
+                 * forkexit callbacks are only supported for builtin
+                 * subsystems, and the builtin section of the subsys array is
+                 * immutable, so we don't need to lock the subsys array here.
+                 */
+                for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
                        struct cgroup_subsys *ss = subsys[i];
                        if (ss->fork)
                                ss->fork(ss, child);
@@ -3552,7 +3609,11 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
        struct css_set *cg;
        if (run_callbacks && need_forkexit_callback) {
-                for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+                /*
+                 * modular subsystems can't use callbacks, so no need to lock
+                 * the subsys array
+                 */
+                for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
                        struct cgroup_subsys *ss = subsys[i];
                        if (ss->exit)
                                ss->exit(ss, tsk);
@@ -3844,8 +3905,11 @@ static int __init cgroup_disable(char *str)
        while ((token = strsep(&str, ",")) != NULL) {
                if (!*token)
                        continue;
+                /*
-                for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+                 * cgroup_disable, being at boot time, can't know about module
+                 * subsystems, so we don't worry about them.
+                 */
+                for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
                        struct cgroup_subsys *ss = subsys[i];
                        if (!strcmp(token, ss->name)) {
author	Ben Blum <bblum@andrew.cmu.edu>	2010-03-10 18:22:07 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2010-03-12 18:52:36 -0500
commit	aae8aab40367036931608fdaf9e2dc568b516f19 (patch)
tree	b2a06ee21042eb3972ecd9e4153d61a8f6ed53cb /kernel
parent	d7b9fff711d5e8db8c844161c684017e556c38a0 (diff)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c index cace83ddbcdc..c92fb9549358 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c
@@ -57,10 +57,14 @@
57		57
58	static DEFINE_MUTEX(cgroup_mutex);	58	static DEFINE_MUTEX(cgroup_mutex);
59		59
60	/* Generate an array of cgroup subsystem pointers */	60	/*
		61	* Generate an array of cgroup subsystem pointers. At boot time, this is
		62	* populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are
		63	* registered after that. The mutable section of this array is protected by
		64	* cgroup_mutex.
		65	*/
61	#define SUBSYS(_x) &_x ## _subsys,	66	#define SUBSYS(_x) &_x ## _subsys,
62		67	static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
63	static struct cgroup_subsys *subsys[] = {
64	#include <linux/cgroup_subsys.h>	68	#include <linux/cgroup_subsys.h>
65	};	69	};
66		70
@@ -448,8 +452,11 @@ static struct css_set *find_existing_css_set(
448	struct hlist_node *node;	452	struct hlist_node *node;
449	struct css_set *cg;	453	struct css_set *cg;
450		454
451	/* Built the set of subsystem state objects that we want to	455	/*
452	* see in the new css_set */	456	* Build the set of subsystem state objects that we want to see in the
		457	* new css_set. while subsystems can change globally, the entries here
		458	* won't change, so no need for locking.
		459	*/
453	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {	460	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
454	if (root->subsys_bits & (1UL << i)) {	461	if (root->subsys_bits & (1UL << i)) {
455	/* Subsystem is in this hierarchy. So we want	462	/* Subsystem is in this hierarchy. So we want
@@ -884,7 +891,9 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
884	css_put(css);	891	css_put(css);
885	}	892	}
886		893
887		894	/*
		895	* Call with cgroup_mutex held.
		896	*/
888	static int rebind_subsystems(struct cgroupfs_root *root,	897	static int rebind_subsystems(struct cgroupfs_root *root,
889	unsigned long final_bits)	898	unsigned long final_bits)
890	{	899	{
@@ -892,6 +901,8 @@ static int rebind_subsystems(struct cgroupfs_root *root,
892	struct cgroup *cgrp = &root->top_cgroup;	901	struct cgroup *cgrp = &root->top_cgroup;
893	int i;	902	int i;
894		903
		904	BUG_ON(!mutex_is_locked(&cgroup_mutex));
		905
895	removed_bits = root->actual_subsys_bits & ~final_bits;	906	removed_bits = root->actual_subsys_bits & ~final_bits;
896	added_bits = final_bits & ~root->actual_subsys_bits;	907	added_bits = final_bits & ~root->actual_subsys_bits;
897	/* Check that any added subsystems are currently free */	908	/* Check that any added subsystems are currently free */
@@ -900,6 +911,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
900	struct cgroup_subsys *ss = subsys[i];	911	struct cgroup_subsys *ss = subsys[i];
901	if (!(bit & added_bits))	912	if (!(bit & added_bits))
902	continue;	913	continue;
		914	/*
		915	* Nobody should tell us to do a subsys that doesn't exist:
		916	* parse_cgroupfs_options should catch that case and refcounts
		917	* ensure that subsystems won't disappear once selected.
		918	*/
		919	BUG_ON(ss == NULL);
903	if (ss->root != &rootnode) {	920	if (ss->root != &rootnode) {
904	/* Subsystem isn't free */	921	/* Subsystem isn't free */
905	return -EBUSY;	922	return -EBUSY;
@@ -919,6 +936,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
919	unsigned long bit = 1UL << i;	936	unsigned long bit = 1UL << i;
920	if (bit & added_bits) {	937	if (bit & added_bits) {
921	/* We're binding this subsystem to this hierarchy */	938	/* We're binding this subsystem to this hierarchy */
		939	BUG_ON(ss == NULL);
922	BUG_ON(cgrp->subsys[i]);	940	BUG_ON(cgrp->subsys[i]);
923	BUG_ON(!dummytop->subsys[i]);	941	BUG_ON(!dummytop->subsys[i]);
924	BUG_ON(dummytop->subsys[i]->cgroup != dummytop);	942	BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
@@ -932,6 +950,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
932	mutex_unlock(&ss->hierarchy_mutex);	950	mutex_unlock(&ss->hierarchy_mutex);
933	} else if (bit & removed_bits) {	951	} else if (bit & removed_bits) {
934	/* We're removing this subsystem */	952	/* We're removing this subsystem */
		953	BUG_ON(ss == NULL);
935	BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);	954	BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
936	BUG_ON(cgrp->subsys[i]->cgroup != cgrp);	955	BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
937	mutex_lock(&ss->hierarchy_mutex);	956	mutex_lock(&ss->hierarchy_mutex);
@@ -944,6 +963,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
944	mutex_unlock(&ss->hierarchy_mutex);	963	mutex_unlock(&ss->hierarchy_mutex);
945	} else if (bit & final_bits) {	964	} else if (bit & final_bits) {
946	/* Subsystem state should already exist */	965	/* Subsystem state should already exist */
		966	BUG_ON(ss == NULL);
947	BUG_ON(!cgrp->subsys[i]);	967	BUG_ON(!cgrp->subsys[i]);
948	} else {	968	} else {
949	/* Subsystem state shouldn't exist */	969	/* Subsystem state shouldn't exist */
@@ -986,14 +1006,18 @@ struct cgroup_sb_opts {
986		1006
987	};	1007	};
988		1008
989	/* Convert a hierarchy specifier into a bitmask of subsystems and	1009	/*
990	* flags. */	1010	* Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
		1011	* with cgroup_mutex held to protect the subsys[] array.
		1012	*/
991	static int parse_cgroupfs_options(char *data,	1013	static int parse_cgroupfs_options(char *data,
992	struct cgroup_sb_opts *opts)	1014	struct cgroup_sb_opts *opts)
993	{	1015	{
994	char token, o = data ?: "all";	1016	char token, o = data ?: "all";
995	unsigned long mask = (unsigned long)-1;	1017	unsigned long mask = (unsigned long)-1;
996		1018
		1019	BUG_ON(!mutex_is_locked(&cgroup_mutex));
		1020
997	#ifdef CONFIG_CPUSETS	1021	#ifdef CONFIG_CPUSETS
998	mask = ~(1UL << cpuset_subsys_id);	1022	mask = ~(1UL << cpuset_subsys_id);
999	#endif	1023	#endif
@@ -1009,6 +1033,8 @@ static int parse_cgroupfs_options(char *data,
1009	opts->subsys_bits = 0;	1033	opts->subsys_bits = 0;
1010	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {	1034	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1011	struct cgroup_subsys *ss = subsys[i];	1035	struct cgroup_subsys *ss = subsys[i];
		1036	if (ss == NULL)
		1037	continue;
1012	if (!ss->disabled)	1038	if (!ss->disabled)
1013	opts->subsys_bits \|= 1ul << i;	1039	opts->subsys_bits \|= 1ul << i;
1014	}	1040	}
@@ -1053,6 +1079,8 @@ static int parse_cgroupfs_options(char *data,
1053	int i;	1079	int i;
1054	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {	1080	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1055	ss = subsys[i];	1081	ss = subsys[i];
		1082	if (ss == NULL)
		1083	continue;
1056	if (!strcmp(token, ss->name)) {	1084	if (!strcmp(token, ss->name)) {
1057	if (!ss->disabled)	1085	if (!ss->disabled)
1058	set_bit(i, &opts->subsys_bits);	1086	set_bit(i, &opts->subsys_bits);
@@ -1306,7 +1334,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1306	struct cgroupfs_root *new_root;	1334	struct cgroupfs_root *new_root;
1307		1335
1308	/* First find the desired set of subsystems */	1336	/* First find the desired set of subsystems */
		1337	mutex_lock(&cgroup_mutex);
1309	ret = parse_cgroupfs_options(data, &opts);	1338	ret = parse_cgroupfs_options(data, &opts);
		1339	mutex_unlock(&cgroup_mutex);
1310	if (ret)	1340	if (ret)
1311	goto out_err;	1341	goto out_err;
1312		1342
@@ -2918,8 +2948,14 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
2918	/* We need to take each hierarchy_mutex in a consistent order */	2948	/* We need to take each hierarchy_mutex in a consistent order */
2919	int i;	2949	int i;
2920		2950
		2951	/*
		2952	* No worry about a race with rebind_subsystems that might mess up the
		2953	* locking order, since both parties are under cgroup_mutex.
		2954	*/
2921	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {	2955	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2922	struct cgroup_subsys *ss = subsys[i];	2956	struct cgroup_subsys *ss = subsys[i];
		2957	if (ss == NULL)
		2958	continue;
2923	if (ss->root == root)	2959	if (ss->root == root)
2924	mutex_lock(&ss->hierarchy_mutex);	2960	mutex_lock(&ss->hierarchy_mutex);
2925	}	2961	}
@@ -2931,6 +2967,8 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
2931		2967
2932	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {	2968	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2933	struct cgroup_subsys *ss = subsys[i];	2969	struct cgroup_subsys *ss = subsys[i];
		2970	if (ss == NULL)
		2971	continue;
2934	if (ss->root == root)	2972	if (ss->root == root)
2935	mutex_unlock(&ss->hierarchy_mutex);	2973	mutex_unlock(&ss->hierarchy_mutex);
2936	}	2974	}
@@ -3054,11 +3092,16 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
3054	* synchronization other than RCU, and the subsystem linked	3092	* synchronization other than RCU, and the subsystem linked
3055	* list isn't RCU-safe */	3093	* list isn't RCU-safe */
3056	int i;	3094	int i;
		3095	/*
		3096	* We won't need to lock the subsys array, because the subsystems
		3097	* we're concerned about aren't going anywhere since our cgroup root
		3098	* has a reference on them.
		3099	*/
3057	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {	3100	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3058	struct cgroup_subsys *ss = subsys[i];	3101	struct cgroup_subsys *ss = subsys[i];
3059	struct cgroup_subsys_state *css;	3102	struct cgroup_subsys_state *css;
3060	/* Skip subsystems not in this hierarchy */	3103	/* Skip subsystems not present or not in this hierarchy */
3061	if (ss->root != cgrp->root)	3104	if (ss == NULL \|\| ss->root != cgrp->root)
3062	continue;	3105	continue;
3063	css = cgrp->subsys[ss->subsys_id];	3106	css = cgrp->subsys[ss->subsys_id];
3064	/* When called from check_for_release() it's possible	3107	/* When called from check_for_release() it's possible
@@ -3279,7 +3322,8 @@ int __init cgroup_init_early(void)
3279	for (i = 0; i < CSS_SET_TABLE_SIZE; i++)	3322	for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
3280	INIT_HLIST_HEAD(&css_set_table[i]);	3323	INIT_HLIST_HEAD(&css_set_table[i]);
3281		3324
3282	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {	3325	/* at bootup time, we don't worry about modular subsystems */
		3326	for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3283	struct cgroup_subsys *ss = subsys[i];	3327	struct cgroup_subsys *ss = subsys[i];
3284		3328
3285	BUG_ON(!ss->name);	3329	BUG_ON(!ss->name);
@@ -3314,7 +3358,8 @@ int __init cgroup_init(void)
3314	if (err)	3358	if (err)
3315	return err;	3359	return err;
3316		3360
3317	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {	3361	/* at bootup time, we don't worry about modular subsystems */
		3362	for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3318	struct cgroup_subsys *ss = subsys[i];	3363	struct cgroup_subsys *ss = subsys[i];
3319	if (!ss->early_init)	3364	if (!ss->early_init)
3320	cgroup_init_subsys(ss);	3365	cgroup_init_subsys(ss);
@@ -3423,9 +3468,16 @@ static int proc_cgroupstats_show(struct seq_file m, void v)
3423	int i;	3468	int i;
3424		3469
3425	seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");	3470	seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
		3471	/*
		3472	* ideally we don't want subsystems moving around while we do this.
		3473	* cgroup_mutex is also necessary to guarantee an atomic snapshot of
		3474	* subsys/hierarchy state.
		3475	*/
3426	mutex_lock(&cgroup_mutex);	3476	mutex_lock(&cgroup_mutex);
3427	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {	3477	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3428	struct cgroup_subsys *ss = subsys[i];	3478	struct cgroup_subsys *ss = subsys[i];
		3479	if (ss == NULL)
		3480	continue;
3429	seq_printf(m, "%s\t%d\t%d\t%d\n",	3481	seq_printf(m, "%s\t%d\t%d\t%d\n",
3430	ss->name, ss->root->hierarchy_id,	3482	ss->name, ss->root->hierarchy_id,
3431	ss->root->number_of_cgroups, !ss->disabled);	3483	ss->root->number_of_cgroups, !ss->disabled);
@@ -3483,7 +3535,12 @@ void cgroup_fork_callbacks(struct task_struct *child)
3483	{	3535	{
3484	if (need_forkexit_callback) {	3536	if (need_forkexit_callback) {
3485	int i;	3537	int i;
3486	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {	3538	/*
		3539	* forkexit callbacks are only supported for builtin
		3540	* subsystems, and the builtin section of the subsys array is
		3541	* immutable, so we don't need to lock the subsys array here.
		3542	*/
		3543	for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3487	struct cgroup_subsys *ss = subsys[i];	3544	struct cgroup_subsys *ss = subsys[i];
3488	if (ss->fork)	3545	if (ss->fork)
3489	ss->fork(ss, child);	3546	ss->fork(ss, child);
@@ -3552,7 +3609,11 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
3552	struct css_set *cg;	3609	struct css_set *cg;
3553		3610
3554	if (run_callbacks && need_forkexit_callback) {	3611	if (run_callbacks && need_forkexit_callback) {
3555	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {	3612	/*
		3613	* modular subsystems can't use callbacks, so no need to lock
		3614	* the subsys array
		3615	*/
		3616	for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3556	struct cgroup_subsys *ss = subsys[i];	3617	struct cgroup_subsys *ss = subsys[i];
3557	if (ss->exit)	3618	if (ss->exit)
3558	ss->exit(ss, tsk);	3619	ss->exit(ss, tsk);
@@ -3844,8 +3905,11 @@ static int __init cgroup_disable(char *str)
3844	while ((token = strsep(&str, ",")) != NULL) {	3905	while ((token = strsep(&str, ",")) != NULL) {
3845	if (!*token)	3906	if (!*token)
3846	continue;	3907	continue;
3847		3908	/*
3848	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {	3909	* cgroup_disable, being at boot time, can't know about module
		3910	* subsystems, so we don't worry about them.
		3911	*/
		3912	for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3849	struct cgroup_subsys *ss = subsys[i];	3913	struct cgroup_subsys *ss = subsys[i];
3850		3914
3851	if (!strcmp(token, ss->name)) {	3915	if (!strcmp(token, ss->name)) {