aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c376
1 files changed, 251 insertions, 125 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 83cd7d041c62..e3045ad4267a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -88,11 +88,12 @@ static DEFINE_MUTEX(cgroup_root_mutex);
88 88
89/* 89/*
90 * Generate an array of cgroup subsystem pointers. At boot time, this is 90 * Generate an array of cgroup subsystem pointers. At boot time, this is
91 * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are 91 * populated with the built in subsystems, and modular subsystems are
92 * registered after that. The mutable section of this array is protected by 92 * registered after that. The mutable section of this array is protected by
93 * cgroup_mutex. 93 * cgroup_mutex.
94 */ 94 */
95#define SUBSYS(_x) &_x ## _subsys, 95#define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys,
96#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
96static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = { 97static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
97#include <linux/cgroup_subsys.h> 98#include <linux/cgroup_subsys.h>
98}; 99};
@@ -111,13 +112,13 @@ struct cgroupfs_root {
111 * The bitmask of subsystems intended to be attached to this 112 * The bitmask of subsystems intended to be attached to this
112 * hierarchy 113 * hierarchy
113 */ 114 */
114 unsigned long subsys_bits; 115 unsigned long subsys_mask;
115 116
116 /* Unique id for this hierarchy. */ 117 /* Unique id for this hierarchy. */
117 int hierarchy_id; 118 int hierarchy_id;
118 119
119 /* The bitmask of subsystems currently attached to this hierarchy */ 120 /* The bitmask of subsystems currently attached to this hierarchy */
120 unsigned long actual_subsys_bits; 121 unsigned long actual_subsys_mask;
121 122
122 /* A list running through the attached subsystems */ 123 /* A list running through the attached subsystems */
123 struct list_head subsys_list; 124 struct list_head subsys_list;
@@ -276,7 +277,8 @@ inline int cgroup_is_removed(const struct cgroup *cgrp)
276 277
277/* bits in struct cgroupfs_root flags field */ 278/* bits in struct cgroupfs_root flags field */
278enum { 279enum {
279 ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ 280 ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
281 ROOT_XATTR, /* supports extended attributes */
280}; 282};
281 283
282static int cgroup_is_releasable(const struct cgroup *cgrp) 284static int cgroup_is_releasable(const struct cgroup *cgrp)
@@ -556,7 +558,7 @@ static struct css_set *find_existing_css_set(
556 * won't change, so no need for locking. 558 * won't change, so no need for locking.
557 */ 559 */
558 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 560 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
559 if (root->subsys_bits & (1UL << i)) { 561 if (root->subsys_mask & (1UL << i)) {
560 /* Subsystem is in this hierarchy. So we want 562 /* Subsystem is in this hierarchy. So we want
561 * the subsystem state from the new 563 * the subsystem state from the new
562 * cgroup */ 564 * cgroup */
@@ -824,7 +826,8 @@ EXPORT_SYMBOL_GPL(cgroup_unlock);
824static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); 826static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
825static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); 827static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int);
826static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); 828static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
827static int cgroup_populate_dir(struct cgroup *cgrp); 829static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
830 unsigned long subsys_mask);
828static const struct inode_operations cgroup_dir_inode_operations; 831static const struct inode_operations cgroup_dir_inode_operations;
829static const struct file_operations proc_cgroupstats_operations; 832static const struct file_operations proc_cgroupstats_operations;
830 833
@@ -888,15 +891,19 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
888 */ 891 */
889 BUG_ON(!list_empty(&cgrp->pidlists)); 892 BUG_ON(!list_empty(&cgrp->pidlists));
890 893
894 simple_xattrs_free(&cgrp->xattrs);
895
891 kfree_rcu(cgrp, rcu_head); 896 kfree_rcu(cgrp, rcu_head);
892 } else { 897 } else {
893 struct cfent *cfe = __d_cfe(dentry); 898 struct cfent *cfe = __d_cfe(dentry);
894 struct cgroup *cgrp = dentry->d_parent->d_fsdata; 899 struct cgroup *cgrp = dentry->d_parent->d_fsdata;
900 struct cftype *cft = cfe->type;
895 901
896 WARN_ONCE(!list_empty(&cfe->node) && 902 WARN_ONCE(!list_empty(&cfe->node) &&
897 cgrp != &cgrp->root->top_cgroup, 903 cgrp != &cgrp->root->top_cgroup,
898 "cfe still linked for %s\n", cfe->type->name); 904 "cfe still linked for %s\n", cfe->type->name);
899 kfree(cfe); 905 kfree(cfe);
906 simple_xattrs_free(&cft->xattrs);
900 } 907 }
901 iput(inode); 908 iput(inode);
902} 909}
@@ -939,12 +946,29 @@ static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
939 return -ENOENT; 946 return -ENOENT;
940} 947}
941 948
942static void cgroup_clear_directory(struct dentry *dir) 949/**
950 * cgroup_clear_directory - selective removal of base and subsystem files
951 * @dir: directory containing the files
952 * @base_files: true if the base files should be removed
953 * @subsys_mask: mask of the subsystem ids whose files should be removed
954 */
955static void cgroup_clear_directory(struct dentry *dir, bool base_files,
956 unsigned long subsys_mask)
943{ 957{
944 struct cgroup *cgrp = __d_cgrp(dir); 958 struct cgroup *cgrp = __d_cgrp(dir);
959 struct cgroup_subsys *ss;
945 960
946 while (!list_empty(&cgrp->files)) 961 for_each_subsys(cgrp->root, ss) {
947 cgroup_rm_file(cgrp, NULL); 962 struct cftype_set *set;
963 if (!test_bit(ss->subsys_id, &subsys_mask))
964 continue;
965 list_for_each_entry(set, &ss->cftsets, node)
966 cgroup_rm_file(cgrp, set->cfts);
967 }
968 if (base_files) {
969 while (!list_empty(&cgrp->files))
970 cgroup_rm_file(cgrp, NULL);
971 }
948} 972}
949 973
950/* 974/*
@@ -953,8 +977,9 @@ static void cgroup_clear_directory(struct dentry *dir)
953static void cgroup_d_remove_dir(struct dentry *dentry) 977static void cgroup_d_remove_dir(struct dentry *dentry)
954{ 978{
955 struct dentry *parent; 979 struct dentry *parent;
980 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
956 981
957 cgroup_clear_directory(dentry); 982 cgroup_clear_directory(dentry, true, root->subsys_mask);
958 983
959 parent = dentry->d_parent; 984 parent = dentry->d_parent;
960 spin_lock(&parent->d_lock); 985 spin_lock(&parent->d_lock);
@@ -971,22 +996,22 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
971 * returns an error, no reference counts are touched. 996 * returns an error, no reference counts are touched.
972 */ 997 */
973static int rebind_subsystems(struct cgroupfs_root *root, 998static int rebind_subsystems(struct cgroupfs_root *root,
974 unsigned long final_bits) 999 unsigned long final_subsys_mask)
975{ 1000{
976 unsigned long added_bits, removed_bits; 1001 unsigned long added_mask, removed_mask;
977 struct cgroup *cgrp = &root->top_cgroup; 1002 struct cgroup *cgrp = &root->top_cgroup;
978 int i; 1003 int i;
979 1004
980 BUG_ON(!mutex_is_locked(&cgroup_mutex)); 1005 BUG_ON(!mutex_is_locked(&cgroup_mutex));
981 BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); 1006 BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
982 1007
983 removed_bits = root->actual_subsys_bits & ~final_bits; 1008 removed_mask = root->actual_subsys_mask & ~final_subsys_mask;
984 added_bits = final_bits & ~root->actual_subsys_bits; 1009 added_mask = final_subsys_mask & ~root->actual_subsys_mask;
985 /* Check that any added subsystems are currently free */ 1010 /* Check that any added subsystems are currently free */
986 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 1011 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
987 unsigned long bit = 1UL << i; 1012 unsigned long bit = 1UL << i;
988 struct cgroup_subsys *ss = subsys[i]; 1013 struct cgroup_subsys *ss = subsys[i];
989 if (!(bit & added_bits)) 1014 if (!(bit & added_mask))
990 continue; 1015 continue;
991 /* 1016 /*
992 * Nobody should tell us to do a subsys that doesn't exist: 1017 * Nobody should tell us to do a subsys that doesn't exist:
@@ -1011,7 +1036,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
1011 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 1036 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1012 struct cgroup_subsys *ss = subsys[i]; 1037 struct cgroup_subsys *ss = subsys[i];
1013 unsigned long bit = 1UL << i; 1038 unsigned long bit = 1UL << i;
1014 if (bit & added_bits) { 1039 if (bit & added_mask) {
1015 /* We're binding this subsystem to this hierarchy */ 1040 /* We're binding this subsystem to this hierarchy */
1016 BUG_ON(ss == NULL); 1041 BUG_ON(ss == NULL);
1017 BUG_ON(cgrp->subsys[i]); 1042 BUG_ON(cgrp->subsys[i]);
@@ -1024,7 +1049,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
1024 if (ss->bind) 1049 if (ss->bind)
1025 ss->bind(cgrp); 1050 ss->bind(cgrp);
1026 /* refcount was already taken, and we're keeping it */ 1051 /* refcount was already taken, and we're keeping it */
1027 } else if (bit & removed_bits) { 1052 } else if (bit & removed_mask) {
1028 /* We're removing this subsystem */ 1053 /* We're removing this subsystem */
1029 BUG_ON(ss == NULL); 1054 BUG_ON(ss == NULL);
1030 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); 1055 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
@@ -1037,7 +1062,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
1037 list_move(&ss->sibling, &rootnode.subsys_list); 1062 list_move(&ss->sibling, &rootnode.subsys_list);
1038 /* subsystem is now free - drop reference on module */ 1063 /* subsystem is now free - drop reference on module */
1039 module_put(ss->module); 1064 module_put(ss->module);
1040 } else if (bit & final_bits) { 1065 } else if (bit & final_subsys_mask) {
1041 /* Subsystem state should already exist */ 1066 /* Subsystem state should already exist */
1042 BUG_ON(ss == NULL); 1067 BUG_ON(ss == NULL);
1043 BUG_ON(!cgrp->subsys[i]); 1068 BUG_ON(!cgrp->subsys[i]);
@@ -1054,7 +1079,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
1054 BUG_ON(cgrp->subsys[i]); 1079 BUG_ON(cgrp->subsys[i]);
1055 } 1080 }
1056 } 1081 }
1057 root->subsys_bits = root->actual_subsys_bits = final_bits; 1082 root->subsys_mask = root->actual_subsys_mask = final_subsys_mask;
1058 synchronize_rcu(); 1083 synchronize_rcu();
1059 1084
1060 return 0; 1085 return 0;
@@ -1070,6 +1095,8 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1070 seq_printf(seq, ",%s", ss->name); 1095 seq_printf(seq, ",%s", ss->name);
1071 if (test_bit(ROOT_NOPREFIX, &root->flags)) 1096 if (test_bit(ROOT_NOPREFIX, &root->flags))
1072 seq_puts(seq, ",noprefix"); 1097 seq_puts(seq, ",noprefix");
1098 if (test_bit(ROOT_XATTR, &root->flags))
1099 seq_puts(seq, ",xattr");
1073 if (strlen(root->release_agent_path)) 1100 if (strlen(root->release_agent_path))
1074 seq_printf(seq, ",release_agent=%s", root->release_agent_path); 1101 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1075 if (clone_children(&root->top_cgroup)) 1102 if (clone_children(&root->top_cgroup))
@@ -1081,7 +1108,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1081} 1108}
1082 1109
1083struct cgroup_sb_opts { 1110struct cgroup_sb_opts {
1084 unsigned long subsys_bits; 1111 unsigned long subsys_mask;
1085 unsigned long flags; 1112 unsigned long flags;
1086 char *release_agent; 1113 char *release_agent;
1087 bool clone_children; 1114 bool clone_children;
@@ -1138,6 +1165,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1138 opts->clone_children = true; 1165 opts->clone_children = true;
1139 continue; 1166 continue;
1140 } 1167 }
1168 if (!strcmp(token, "xattr")) {
1169 set_bit(ROOT_XATTR, &opts->flags);
1170 continue;
1171 }
1141 if (!strncmp(token, "release_agent=", 14)) { 1172 if (!strncmp(token, "release_agent=", 14)) {
1142 /* Specifying two release agents is forbidden */ 1173 /* Specifying two release agents is forbidden */
1143 if (opts->release_agent) 1174 if (opts->release_agent)
@@ -1186,7 +1217,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1186 /* Mutually exclusive option 'all' + subsystem name */ 1217 /* Mutually exclusive option 'all' + subsystem name */
1187 if (all_ss) 1218 if (all_ss)
1188 return -EINVAL; 1219 return -EINVAL;
1189 set_bit(i, &opts->subsys_bits); 1220 set_bit(i, &opts->subsys_mask);
1190 one_ss = true; 1221 one_ss = true;
1191 1222
1192 break; 1223 break;
@@ -1207,7 +1238,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1207 continue; 1238 continue;
1208 if (ss->disabled) 1239 if (ss->disabled)
1209 continue; 1240 continue;
1210 set_bit(i, &opts->subsys_bits); 1241 set_bit(i, &opts->subsys_mask);
1211 } 1242 }
1212 } 1243 }
1213 1244
@@ -1219,19 +1250,19 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1219 * the cpuset subsystem. 1250 * the cpuset subsystem.
1220 */ 1251 */
1221 if (test_bit(ROOT_NOPREFIX, &opts->flags) && 1252 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1222 (opts->subsys_bits & mask)) 1253 (opts->subsys_mask & mask))
1223 return -EINVAL; 1254 return -EINVAL;
1224 1255
1225 1256
1226 /* Can't specify "none" and some subsystems */ 1257 /* Can't specify "none" and some subsystems */
1227 if (opts->subsys_bits && opts->none) 1258 if (opts->subsys_mask && opts->none)
1228 return -EINVAL; 1259 return -EINVAL;
1229 1260
1230 /* 1261 /*
1231 * We either have to specify by name or by subsystems. (So all 1262 * We either have to specify by name or by subsystems. (So all
1232 * empty hierarchies must have a name). 1263 * empty hierarchies must have a name).
1233 */ 1264 */
1234 if (!opts->subsys_bits && !opts->name) 1265 if (!opts->subsys_mask && !opts->name)
1235 return -EINVAL; 1266 return -EINVAL;
1236 1267
1237 /* 1268 /*
@@ -1240,10 +1271,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1240 * take duplicate reference counts on a subsystem that's already used, 1271 * take duplicate reference counts on a subsystem that's already used,
1241 * but rebind_subsystems handles this case. 1272 * but rebind_subsystems handles this case.
1242 */ 1273 */
1243 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) { 1274 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1244 unsigned long bit = 1UL << i; 1275 unsigned long bit = 1UL << i;
1245 1276
1246 if (!(bit & opts->subsys_bits)) 1277 if (!(bit & opts->subsys_mask))
1247 continue; 1278 continue;
1248 if (!try_module_get(subsys[i]->module)) { 1279 if (!try_module_get(subsys[i]->module)) {
1249 module_pin_failed = true; 1280 module_pin_failed = true;
@@ -1256,11 +1287,11 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1256 * raced with a module_delete call, and to the user this is 1287 * raced with a module_delete call, and to the user this is
1257 * essentially a "subsystem doesn't exist" case. 1288 * essentially a "subsystem doesn't exist" case.
1258 */ 1289 */
1259 for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) { 1290 for (i--; i >= 0; i--) {
1260 /* drop refcounts only on the ones we took */ 1291 /* drop refcounts only on the ones we took */
1261 unsigned long bit = 1UL << i; 1292 unsigned long bit = 1UL << i;
1262 1293
1263 if (!(bit & opts->subsys_bits)) 1294 if (!(bit & opts->subsys_mask))
1264 continue; 1295 continue;
1265 module_put(subsys[i]->module); 1296 module_put(subsys[i]->module);
1266 } 1297 }
@@ -1270,13 +1301,13 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1270 return 0; 1301 return 0;
1271} 1302}
1272 1303
1273static void drop_parsed_module_refcounts(unsigned long subsys_bits) 1304static void drop_parsed_module_refcounts(unsigned long subsys_mask)
1274{ 1305{
1275 int i; 1306 int i;
1276 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) { 1307 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1277 unsigned long bit = 1UL << i; 1308 unsigned long bit = 1UL << i;
1278 1309
1279 if (!(bit & subsys_bits)) 1310 if (!(bit & subsys_mask))
1280 continue; 1311 continue;
1281 module_put(subsys[i]->module); 1312 module_put(subsys[i]->module);
1282 } 1313 }
@@ -1288,6 +1319,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1288 struct cgroupfs_root *root = sb->s_fs_info; 1319 struct cgroupfs_root *root = sb->s_fs_info;
1289 struct cgroup *cgrp = &root->top_cgroup; 1320 struct cgroup *cgrp = &root->top_cgroup;
1290 struct cgroup_sb_opts opts; 1321 struct cgroup_sb_opts opts;
1322 unsigned long added_mask, removed_mask;
1291 1323
1292 mutex_lock(&cgrp->dentry->d_inode->i_mutex); 1324 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1293 mutex_lock(&cgroup_mutex); 1325 mutex_lock(&cgroup_mutex);
@@ -1299,27 +1331,31 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1299 goto out_unlock; 1331 goto out_unlock;
1300 1332
1301 /* See feature-removal-schedule.txt */ 1333 /* See feature-removal-schedule.txt */
1302 if (opts.subsys_bits != root->actual_subsys_bits || opts.release_agent) 1334 if (opts.subsys_mask != root->actual_subsys_mask || opts.release_agent)
1303 pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", 1335 pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
1304 task_tgid_nr(current), current->comm); 1336 task_tgid_nr(current), current->comm);
1305 1337
1338 added_mask = opts.subsys_mask & ~root->subsys_mask;
1339 removed_mask = root->subsys_mask & ~opts.subsys_mask;
1340
1306 /* Don't allow flags or name to change at remount */ 1341 /* Don't allow flags or name to change at remount */
1307 if (opts.flags != root->flags || 1342 if (opts.flags != root->flags ||
1308 (opts.name && strcmp(opts.name, root->name))) { 1343 (opts.name && strcmp(opts.name, root->name))) {
1309 ret = -EINVAL; 1344 ret = -EINVAL;
1310 drop_parsed_module_refcounts(opts.subsys_bits); 1345 drop_parsed_module_refcounts(opts.subsys_mask);
1311 goto out_unlock; 1346 goto out_unlock;
1312 } 1347 }
1313 1348
1314 ret = rebind_subsystems(root, opts.subsys_bits); 1349 ret = rebind_subsystems(root, opts.subsys_mask);
1315 if (ret) { 1350 if (ret) {
1316 drop_parsed_module_refcounts(opts.subsys_bits); 1351 drop_parsed_module_refcounts(opts.subsys_mask);
1317 goto out_unlock; 1352 goto out_unlock;
1318 } 1353 }
1319 1354
1320 /* clear out any existing files and repopulate subsystem files */ 1355 /* clear out any existing files and repopulate subsystem files */
1321 cgroup_clear_directory(cgrp->dentry); 1356 cgroup_clear_directory(cgrp->dentry, false, removed_mask);
1322 cgroup_populate_dir(cgrp); 1357 /* re-populate subsystem files */
1358 cgroup_populate_dir(cgrp, false, added_mask);
1323 1359
1324 if (opts.release_agent) 1360 if (opts.release_agent)
1325 strcpy(root->release_agent_path, opts.release_agent); 1361 strcpy(root->release_agent_path, opts.release_agent);
@@ -1350,6 +1386,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1350 mutex_init(&cgrp->pidlist_mutex); 1386 mutex_init(&cgrp->pidlist_mutex);
1351 INIT_LIST_HEAD(&cgrp->event_list); 1387 INIT_LIST_HEAD(&cgrp->event_list);
1352 spin_lock_init(&cgrp->event_list_lock); 1388 spin_lock_init(&cgrp->event_list_lock);
1389 simple_xattrs_init(&cgrp->xattrs);
1353} 1390}
1354 1391
1355static void init_cgroup_root(struct cgroupfs_root *root) 1392static void init_cgroup_root(struct cgroupfs_root *root)
@@ -1404,8 +1441,8 @@ static int cgroup_test_super(struct super_block *sb, void *data)
1404 * If we asked for subsystems (or explicitly for no 1441 * If we asked for subsystems (or explicitly for no
1405 * subsystems) then they must match 1442 * subsystems) then they must match
1406 */ 1443 */
1407 if ((opts->subsys_bits || opts->none) 1444 if ((opts->subsys_mask || opts->none)
1408 && (opts->subsys_bits != root->subsys_bits)) 1445 && (opts->subsys_mask != root->subsys_mask))
1409 return 0; 1446 return 0;
1410 1447
1411 return 1; 1448 return 1;
@@ -1415,7 +1452,7 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1415{ 1452{
1416 struct cgroupfs_root *root; 1453 struct cgroupfs_root *root;
1417 1454
1418 if (!opts->subsys_bits && !opts->none) 1455 if (!opts->subsys_mask && !opts->none)
1419 return NULL; 1456 return NULL;
1420 1457
1421 root = kzalloc(sizeof(*root), GFP_KERNEL); 1458 root = kzalloc(sizeof(*root), GFP_KERNEL);
@@ -1428,7 +1465,7 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1428 } 1465 }
1429 init_cgroup_root(root); 1466 init_cgroup_root(root);
1430 1467
1431 root->subsys_bits = opts->subsys_bits; 1468 root->subsys_mask = opts->subsys_mask;
1432 root->flags = opts->flags; 1469 root->flags = opts->flags;
1433 if (opts->release_agent) 1470 if (opts->release_agent)
1434 strcpy(root->release_agent_path, opts->release_agent); 1471 strcpy(root->release_agent_path, opts->release_agent);
@@ -1460,7 +1497,7 @@ static int cgroup_set_super(struct super_block *sb, void *data)
1460 if (!opts->new_root) 1497 if (!opts->new_root)
1461 return -EINVAL; 1498 return -EINVAL;
1462 1499
1463 BUG_ON(!opts->subsys_bits && !opts->none); 1500 BUG_ON(!opts->subsys_mask && !opts->none);
1464 1501
1465 ret = set_anon_super(sb, NULL); 1502 ret = set_anon_super(sb, NULL);
1466 if (ret) 1503 if (ret)
@@ -1578,7 +1615,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1578 if (ret) 1615 if (ret)
1579 goto unlock_drop; 1616 goto unlock_drop;
1580 1617
1581 ret = rebind_subsystems(root, root->subsys_bits); 1618 ret = rebind_subsystems(root, root->subsys_mask);
1582 if (ret == -EBUSY) { 1619 if (ret == -EBUSY) {
1583 free_cg_links(&tmp_cg_links); 1620 free_cg_links(&tmp_cg_links);
1584 goto unlock_drop; 1621 goto unlock_drop;
@@ -1618,7 +1655,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1618 BUG_ON(root->number_of_cgroups != 1); 1655 BUG_ON(root->number_of_cgroups != 1);
1619 1656
1620 cred = override_creds(&init_cred); 1657 cred = override_creds(&init_cred);
1621 cgroup_populate_dir(root_cgrp); 1658 cgroup_populate_dir(root_cgrp, true, root->subsys_mask);
1622 revert_creds(cred); 1659 revert_creds(cred);
1623 mutex_unlock(&cgroup_root_mutex); 1660 mutex_unlock(&cgroup_root_mutex);
1624 mutex_unlock(&cgroup_mutex); 1661 mutex_unlock(&cgroup_mutex);
@@ -1630,7 +1667,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1630 */ 1667 */
1631 cgroup_drop_root(opts.new_root); 1668 cgroup_drop_root(opts.new_root);
1632 /* no subsys rebinding, so refcounts don't change */ 1669 /* no subsys rebinding, so refcounts don't change */
1633 drop_parsed_module_refcounts(opts.subsys_bits); 1670 drop_parsed_module_refcounts(opts.subsys_mask);
1634 } 1671 }
1635 1672
1636 kfree(opts.release_agent); 1673 kfree(opts.release_agent);
@@ -1644,7 +1681,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1644 drop_new_super: 1681 drop_new_super:
1645 deactivate_locked_super(sb); 1682 deactivate_locked_super(sb);
1646 drop_modules: 1683 drop_modules:
1647 drop_parsed_module_refcounts(opts.subsys_bits); 1684 drop_parsed_module_refcounts(opts.subsys_mask);
1648 out_err: 1685 out_err:
1649 kfree(opts.release_agent); 1686 kfree(opts.release_agent);
1650 kfree(opts.name); 1687 kfree(opts.name);
@@ -1694,6 +1731,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
1694 mutex_unlock(&cgroup_root_mutex); 1731 mutex_unlock(&cgroup_root_mutex);
1695 mutex_unlock(&cgroup_mutex); 1732 mutex_unlock(&cgroup_mutex);
1696 1733
1734 simple_xattrs_free(&cgrp->xattrs);
1735
1697 kill_litter_super(sb); 1736 kill_litter_super(sb);
1698 cgroup_drop_root(root); 1737 cgroup_drop_root(root);
1699} 1738}
@@ -2493,6 +2532,64 @@ static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2493 return simple_rename(old_dir, old_dentry, new_dir, new_dentry); 2532 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2494} 2533}
2495 2534
2535static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
2536{
2537 if (S_ISDIR(dentry->d_inode->i_mode))
2538 return &__d_cgrp(dentry)->xattrs;
2539 else
2540 return &__d_cft(dentry)->xattrs;
2541}
2542
2543static inline int xattr_enabled(struct dentry *dentry)
2544{
2545 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
2546 return test_bit(ROOT_XATTR, &root->flags);
2547}
2548
2549static bool is_valid_xattr(const char *name)
2550{
2551 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
2552 !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
2553 return true;
2554 return false;
2555}
2556
2557static int cgroup_setxattr(struct dentry *dentry, const char *name,
2558 const void *val, size_t size, int flags)
2559{
2560 if (!xattr_enabled(dentry))
2561 return -EOPNOTSUPP;
2562 if (!is_valid_xattr(name))
2563 return -EINVAL;
2564 return simple_xattr_set(__d_xattrs(dentry), name, val, size, flags);
2565}
2566
2567static int cgroup_removexattr(struct dentry *dentry, const char *name)
2568{
2569 if (!xattr_enabled(dentry))
2570 return -EOPNOTSUPP;
2571 if (!is_valid_xattr(name))
2572 return -EINVAL;
2573 return simple_xattr_remove(__d_xattrs(dentry), name);
2574}
2575
2576static ssize_t cgroup_getxattr(struct dentry *dentry, const char *name,
2577 void *buf, size_t size)
2578{
2579 if (!xattr_enabled(dentry))
2580 return -EOPNOTSUPP;
2581 if (!is_valid_xattr(name))
2582 return -EINVAL;
2583 return simple_xattr_get(__d_xattrs(dentry), name, buf, size);
2584}
2585
2586static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size)
2587{
2588 if (!xattr_enabled(dentry))
2589 return -EOPNOTSUPP;
2590 return simple_xattr_list(__d_xattrs(dentry), buf, size);
2591}
2592
2496static const struct file_operations cgroup_file_operations = { 2593static const struct file_operations cgroup_file_operations = {
2497 .read = cgroup_file_read, 2594 .read = cgroup_file_read,
2498 .write = cgroup_file_write, 2595 .write = cgroup_file_write,
@@ -2501,11 +2598,22 @@ static const struct file_operations cgroup_file_operations = {
2501 .release = cgroup_file_release, 2598 .release = cgroup_file_release,
2502}; 2599};
2503 2600
2601static const struct inode_operations cgroup_file_inode_operations = {
2602 .setxattr = cgroup_setxattr,
2603 .getxattr = cgroup_getxattr,
2604 .listxattr = cgroup_listxattr,
2605 .removexattr = cgroup_removexattr,
2606};
2607
2504static const struct inode_operations cgroup_dir_inode_operations = { 2608static const struct inode_operations cgroup_dir_inode_operations = {
2505 .lookup = cgroup_lookup, 2609 .lookup = cgroup_lookup,
2506 .mkdir = cgroup_mkdir, 2610 .mkdir = cgroup_mkdir,
2507 .rmdir = cgroup_rmdir, 2611 .rmdir = cgroup_rmdir,
2508 .rename = cgroup_rename, 2612 .rename = cgroup_rename,
2613 .setxattr = cgroup_setxattr,
2614 .getxattr = cgroup_getxattr,
2615 .listxattr = cgroup_listxattr,
2616 .removexattr = cgroup_removexattr,
2509}; 2617};
2510 2618
2511static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 2619static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
@@ -2553,6 +2661,7 @@ static int cgroup_create_file(struct dentry *dentry, umode_t mode,
2553 } else if (S_ISREG(mode)) { 2661 } else if (S_ISREG(mode)) {
2554 inode->i_size = 0; 2662 inode->i_size = 0;
2555 inode->i_fop = &cgroup_file_operations; 2663 inode->i_fop = &cgroup_file_operations;
2664 inode->i_op = &cgroup_file_inode_operations;
2556 } 2665 }
2557 d_instantiate(dentry, inode); 2666 d_instantiate(dentry, inode);
2558 dget(dentry); /* Extra count - pin the dentry in core */ 2667 dget(dentry); /* Extra count - pin the dentry in core */
@@ -2613,7 +2722,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
2613} 2722}
2614 2723
2615static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, 2724static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2616 const struct cftype *cft) 2725 struct cftype *cft)
2617{ 2726{
2618 struct dentry *dir = cgrp->dentry; 2727 struct dentry *dir = cgrp->dentry;
2619 struct cgroup *parent = __d_cgrp(dir); 2728 struct cgroup *parent = __d_cgrp(dir);
@@ -2623,6 +2732,8 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2623 umode_t mode; 2732 umode_t mode;
2624 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; 2733 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2625 2734
2735 simple_xattrs_init(&cft->xattrs);
2736
2626 /* does @cft->flags tell us to skip creation on @cgrp? */ 2737 /* does @cft->flags tell us to skip creation on @cgrp? */
2627 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) 2738 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
2628 return 0; 2739 return 0;
@@ -2663,9 +2774,9 @@ out:
2663} 2774}
2664 2775
2665static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, 2776static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2666 const struct cftype cfts[], bool is_add) 2777 struct cftype cfts[], bool is_add)
2667{ 2778{
2668 const struct cftype *cft; 2779 struct cftype *cft;
2669 int err, ret = 0; 2780 int err, ret = 0;
2670 2781
2671 for (cft = cfts; cft->name[0] != '\0'; cft++) { 2782 for (cft = cfts; cft->name[0] != '\0'; cft++) {
@@ -2699,7 +2810,7 @@ static void cgroup_cfts_prepare(void)
2699} 2810}
2700 2811
2701static void cgroup_cfts_commit(struct cgroup_subsys *ss, 2812static void cgroup_cfts_commit(struct cgroup_subsys *ss,
2702 const struct cftype *cfts, bool is_add) 2813 struct cftype *cfts, bool is_add)
2703 __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex) 2814 __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex)
2704{ 2815{
2705 LIST_HEAD(pending); 2816 LIST_HEAD(pending);
@@ -2750,7 +2861,7 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
2750 * function currently returns 0 as long as @cfts registration is successful 2861 * function currently returns 0 as long as @cfts registration is successful
2751 * even if some file creation attempts on existing cgroups fail. 2862 * even if some file creation attempts on existing cgroups fail.
2752 */ 2863 */
2753int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) 2864int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
2754{ 2865{
2755 struct cftype_set *set; 2866 struct cftype_set *set;
2756 2867
@@ -2780,7 +2891,7 @@ EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
2780 * Returns 0 on successful unregistration, -ENOENT if @cfts is not 2891 * Returns 0 on successful unregistration, -ENOENT if @cfts is not
2781 * registered with @ss. 2892 * registered with @ss.
2782 */ 2893 */
2783int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) 2894int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
2784{ 2895{
2785 struct cftype_set *set; 2896 struct cftype_set *set;
2786 2897
@@ -3785,18 +3896,29 @@ static struct cftype files[] = {
3785 { } /* terminate */ 3896 { } /* terminate */
3786}; 3897};
3787 3898
3788static int cgroup_populate_dir(struct cgroup *cgrp) 3899/**
3900 * cgroup_populate_dir - selectively creation of files in a directory
3901 * @cgrp: target cgroup
3902 * @base_files: true if the base files should be added
3903 * @subsys_mask: mask of the subsystem ids whose files should be added
3904 */
3905static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
3906 unsigned long subsys_mask)
3789{ 3907{
3790 int err; 3908 int err;
3791 struct cgroup_subsys *ss; 3909 struct cgroup_subsys *ss;
3792 3910
3793 err = cgroup_addrm_files(cgrp, NULL, files, true); 3911 if (base_files) {
3794 if (err < 0) 3912 err = cgroup_addrm_files(cgrp, NULL, files, true);
3795 return err; 3913 if (err < 0)
3914 return err;
3915 }
3796 3916
3797 /* process cftsets of each subsystem */ 3917 /* process cftsets of each subsystem */
3798 for_each_subsys(cgrp->root, ss) { 3918 for_each_subsys(cgrp->root, ss) {
3799 struct cftype_set *set; 3919 struct cftype_set *set;
3920 if (!test_bit(ss->subsys_id, &subsys_mask))
3921 continue;
3800 3922
3801 list_for_each_entry(set, &ss->cftsets, node) 3923 list_for_each_entry(set, &ss->cftsets, node)
3802 cgroup_addrm_files(cgrp, ss, set->cfts, true); 3924 cgroup_addrm_files(cgrp, ss, set->cfts, true);
@@ -3904,8 +4026,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3904 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); 4026 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3905 4027
3906 for_each_subsys(root, ss) { 4028 for_each_subsys(root, ss) {
3907 struct cgroup_subsys_state *css = ss->create(cgrp); 4029 struct cgroup_subsys_state *css;
3908 4030
4031 css = ss->create(cgrp);
3909 if (IS_ERR(css)) { 4032 if (IS_ERR(css)) {
3910 err = PTR_ERR(css); 4033 err = PTR_ERR(css);
3911 goto err_destroy; 4034 goto err_destroy;
@@ -3919,6 +4042,15 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3919 /* At error, ->destroy() callback has to free assigned ID. */ 4042 /* At error, ->destroy() callback has to free assigned ID. */
3920 if (clone_children(parent) && ss->post_clone) 4043 if (clone_children(parent) && ss->post_clone)
3921 ss->post_clone(cgrp); 4044 ss->post_clone(cgrp);
4045
4046 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
4047 parent->parent) {
4048 pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
4049 current->comm, current->pid, ss->name);
4050 if (!strcmp(ss->name, "memory"))
4051 pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
4052 ss->warned_broken_hierarchy = true;
4053 }
3922 } 4054 }
3923 4055
3924 list_add(&cgrp->sibling, &cgrp->parent->children); 4056 list_add(&cgrp->sibling, &cgrp->parent->children);
@@ -3937,7 +4069,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3937 4069
3938 list_add_tail(&cgrp->allcg_node, &root->allcg_list); 4070 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
3939 4071
3940 err = cgroup_populate_dir(cgrp); 4072 err = cgroup_populate_dir(cgrp, true, root->subsys_mask);
3941 /* If err < 0, we have a half-filled directory - oh well ;) */ 4073 /* If err < 0, we have a half-filled directory - oh well ;) */
3942 4074
3943 mutex_unlock(&cgroup_mutex); 4075 mutex_unlock(&cgroup_mutex);
@@ -4187,8 +4319,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4187 * since cgroup_init_subsys will have already taken care of it. 4319 * since cgroup_init_subsys will have already taken care of it.
4188 */ 4320 */
4189 if (ss->module == NULL) { 4321 if (ss->module == NULL) {
4190 /* a few sanity checks */ 4322 /* a sanity check */
4191 BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
4192 BUG_ON(subsys[ss->subsys_id] != ss); 4323 BUG_ON(subsys[ss->subsys_id] != ss);
4193 return 0; 4324 return 0;
4194 } 4325 }
@@ -4196,24 +4327,8 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4196 /* init base cftset */ 4327 /* init base cftset */
4197 cgroup_init_cftsets(ss); 4328 cgroup_init_cftsets(ss);
4198 4329
4199 /*
4200 * need to register a subsys id before anything else - for example,
4201 * init_cgroup_css needs it.
4202 */
4203 mutex_lock(&cgroup_mutex); 4330 mutex_lock(&cgroup_mutex);
4204 /* find the first empty slot in the array */ 4331 subsys[ss->subsys_id] = ss;
4205 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
4206 if (subsys[i] == NULL)
4207 break;
4208 }
4209 if (i == CGROUP_SUBSYS_COUNT) {
4210 /* maximum number of subsystems already registered! */
4211 mutex_unlock(&cgroup_mutex);
4212 return -EBUSY;
4213 }
4214 /* assign ourselves the subsys_id */
4215 ss->subsys_id = i;
4216 subsys[i] = ss;
4217 4332
4218 /* 4333 /*
4219 * no ss->create seems to need anything important in the ss struct, so 4334 * no ss->create seems to need anything important in the ss struct, so
@@ -4222,7 +4337,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4222 css = ss->create(dummytop); 4337 css = ss->create(dummytop);
4223 if (IS_ERR(css)) { 4338 if (IS_ERR(css)) {
4224 /* failure case - need to deassign the subsys[] slot. */ 4339 /* failure case - need to deassign the subsys[] slot. */
4225 subsys[i] = NULL; 4340 subsys[ss->subsys_id] = NULL;
4226 mutex_unlock(&cgroup_mutex); 4341 mutex_unlock(&cgroup_mutex);
4227 return PTR_ERR(css); 4342 return PTR_ERR(css);
4228 } 4343 }
@@ -4238,7 +4353,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4238 if (ret) { 4353 if (ret) {
4239 dummytop->subsys[ss->subsys_id] = NULL; 4354 dummytop->subsys[ss->subsys_id] = NULL;
4240 ss->destroy(dummytop); 4355 ss->destroy(dummytop);
4241 subsys[i] = NULL; 4356 subsys[ss->subsys_id] = NULL;
4242 mutex_unlock(&cgroup_mutex); 4357 mutex_unlock(&cgroup_mutex);
4243 return ret; 4358 return ret;
4244 } 4359 }
@@ -4305,7 +4420,6 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
4305 4420
4306 mutex_lock(&cgroup_mutex); 4421 mutex_lock(&cgroup_mutex);
4307 /* deassign the subsys_id */ 4422 /* deassign the subsys_id */
4308 BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
4309 subsys[ss->subsys_id] = NULL; 4423 subsys[ss->subsys_id] = NULL;
4310 4424
4311 /* remove subsystem from rootnode's list of subsystems */ 4425 /* remove subsystem from rootnode's list of subsystems */
@@ -4368,10 +4482,13 @@ int __init cgroup_init_early(void)
4368 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) 4482 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
4369 INIT_HLIST_HEAD(&css_set_table[i]); 4483 INIT_HLIST_HEAD(&css_set_table[i]);
4370 4484
4371 /* at bootup time, we don't worry about modular subsystems */ 4485 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4372 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4373 struct cgroup_subsys *ss = subsys[i]; 4486 struct cgroup_subsys *ss = subsys[i];
4374 4487
4488 /* at bootup time, we don't worry about modular subsystems */
4489 if (!ss || ss->module)
4490 continue;
4491
4375 BUG_ON(!ss->name); 4492 BUG_ON(!ss->name);
4376 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN); 4493 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
4377 BUG_ON(!ss->create); 4494 BUG_ON(!ss->create);
@@ -4404,9 +4521,12 @@ int __init cgroup_init(void)
4404 if (err) 4521 if (err)
4405 return err; 4522 return err;
4406 4523
4407 /* at bootup time, we don't worry about modular subsystems */ 4524 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4408 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4409 struct cgroup_subsys *ss = subsys[i]; 4525 struct cgroup_subsys *ss = subsys[i];
4526
4527 /* at bootup time, we don't worry about modular subsystems */
4528 if (!ss || ss->module)
4529 continue;
4410 if (!ss->early_init) 4530 if (!ss->early_init)
4411 cgroup_init_subsys(ss); 4531 cgroup_init_subsys(ss);
4412 if (ss->use_id) 4532 if (ss->use_id)
@@ -4590,41 +4710,19 @@ void cgroup_fork(struct task_struct *child)
4590} 4710}
4591 4711
4592/** 4712/**
4593 * cgroup_fork_callbacks - run fork callbacks
4594 * @child: the new task
4595 *
4596 * Called on a new task very soon before adding it to the
4597 * tasklist. No need to take any locks since no-one can
4598 * be operating on this task.
4599 */
4600void cgroup_fork_callbacks(struct task_struct *child)
4601{
4602 if (need_forkexit_callback) {
4603 int i;
4604 /*
4605 * forkexit callbacks are only supported for builtin
4606 * subsystems, and the builtin section of the subsys array is
4607 * immutable, so we don't need to lock the subsys array here.
4608 */
4609 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4610 struct cgroup_subsys *ss = subsys[i];
4611 if (ss->fork)
4612 ss->fork(child);
4613 }
4614 }
4615}
4616
4617/**
4618 * cgroup_post_fork - called on a new task after adding it to the task list 4713 * cgroup_post_fork - called on a new task after adding it to the task list
4619 * @child: the task in question 4714 * @child: the task in question
4620 * 4715 *
4621 * Adds the task to the list running through its css_set if necessary. 4716 * Adds the task to the list running through its css_set if necessary and
4622 * Has to be after the task is visible on the task list in case we race 4717 * call the subsystem fork() callbacks. Has to be after the task is
4623 * with the first call to cgroup_iter_start() - to guarantee that the 4718 * visible on the task list in case we race with the first call to
4624 * new task ends up on its list. 4719 * cgroup_iter_start() - to guarantee that the new task ends up on its
4720 * list.
4625 */ 4721 */
4626void cgroup_post_fork(struct task_struct *child) 4722void cgroup_post_fork(struct task_struct *child)
4627{ 4723{
4724 int i;
4725
4628 /* 4726 /*
4629 * use_task_css_set_links is set to 1 before we walk the tasklist 4727 * use_task_css_set_links is set to 1 before we walk the tasklist
4630 * under the tasklist_lock and we read it here after we added the child 4728 * under the tasklist_lock and we read it here after we added the child
@@ -4653,7 +4751,30 @@ void cgroup_post_fork(struct task_struct *child)
4653 } 4751 }
4654 write_unlock(&css_set_lock); 4752 write_unlock(&css_set_lock);
4655 } 4753 }
4754
4755 /*
4756 * Call ss->fork(). This must happen after @child is linked on
4757 * css_set; otherwise, @child might change state between ->fork()
4758 * and addition to css_set.
4759 */
4760 if (need_forkexit_callback) {
4761 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4762 struct cgroup_subsys *ss = subsys[i];
4763
4764 /*
4765 * fork/exit callbacks are supported only for
4766 * builtin subsystems and we don't need further
4767 * synchronization as they never go away.
4768 */
4769 if (!ss || ss->module)
4770 continue;
4771
4772 if (ss->fork)
4773 ss->fork(child);
4774 }
4775 }
4656} 4776}
4777
4657/** 4778/**
4658 * cgroup_exit - detach cgroup from exiting task 4779 * cgroup_exit - detach cgroup from exiting task
4659 * @tsk: pointer to task_struct of exiting process 4780 * @tsk: pointer to task_struct of exiting process
@@ -4712,12 +4833,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4712 tsk->cgroups = &init_css_set; 4833 tsk->cgroups = &init_css_set;
4713 4834
4714 if (run_callbacks && need_forkexit_callback) { 4835 if (run_callbacks && need_forkexit_callback) {
4715 /* 4836 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4716 * modular subsystems can't use callbacks, so no need to lock
4717 * the subsys array
4718 */
4719 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4720 struct cgroup_subsys *ss = subsys[i]; 4837 struct cgroup_subsys *ss = subsys[i];
4838
4839 /* modular subsystems can't use callbacks */
4840 if (!ss || ss->module)
4841 continue;
4842
4721 if (ss->exit) { 4843 if (ss->exit) {
4722 struct cgroup *old_cgrp = 4844 struct cgroup *old_cgrp =
4723 rcu_dereference_raw(cg->subsys[i])->cgroup; 4845 rcu_dereference_raw(cg->subsys[i])->cgroup;
@@ -4903,13 +5025,17 @@ static int __init cgroup_disable(char *str)
4903 while ((token = strsep(&str, ",")) != NULL) { 5025 while ((token = strsep(&str, ",")) != NULL) {
4904 if (!*token) 5026 if (!*token)
4905 continue; 5027 continue;
4906 /* 5028 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4907 * cgroup_disable, being at boot time, can't know about module
4908 * subsystems, so we don't worry about them.
4909 */
4910 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4911 struct cgroup_subsys *ss = subsys[i]; 5029 struct cgroup_subsys *ss = subsys[i];
4912 5030
5031 /*
5032 * cgroup_disable, being at boot time, can't
5033 * know about module subsystems, so we don't
5034 * worry about them.
5035 */
5036 if (!ss || ss->module)
5037 continue;
5038
4913 if (!strcmp(token, ss->name)) { 5039 if (!strcmp(token, ss->name)) {
4914 ss->disabled = 1; 5040 ss->disabled = 1;
4915 printk(KERN_INFO "Disabling %s control group" 5041 printk(KERN_INFO "Disabling %s control group"