aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c145
1 files changed, 106 insertions, 39 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 291ba3d04bea..66a416b42c18 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -52,7 +52,6 @@
52#include <linux/cgroupstats.h> 52#include <linux/cgroupstats.h>
53#include <linux/hash.h> 53#include <linux/hash.h>
54#include <linux/namei.h> 54#include <linux/namei.h>
55#include <linux/smp_lock.h>
56#include <linux/pid_namespace.h> 55#include <linux/pid_namespace.h>
57#include <linux/idr.h> 56#include <linux/idr.h>
58#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ 57#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
@@ -244,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp)
244 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); 243 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
245} 244}
246 245
246static int clone_children(const struct cgroup *cgrp)
247{
248 return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
249}
250
247/* 251/*
248 * for_each_subsys() allows you to iterate on each subsystem attached to 252 * for_each_subsys() allows you to iterate on each subsystem attached to
249 * an active hierarchy 253 * an active hierarchy
@@ -778,6 +782,7 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
778 struct inode *inode = new_inode(sb); 782 struct inode *inode = new_inode(sb);
779 783
780 if (inode) { 784 if (inode) {
785 inode->i_ino = get_next_ino();
781 inode->i_mode = mode; 786 inode->i_mode = mode;
782 inode->i_uid = current_fsuid(); 787 inode->i_uid = current_fsuid();
783 inode->i_gid = current_fsgid(); 788 inode->i_gid = current_fsgid();
@@ -1040,6 +1045,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
1040 seq_puts(seq, ",noprefix"); 1045 seq_puts(seq, ",noprefix");
1041 if (strlen(root->release_agent_path)) 1046 if (strlen(root->release_agent_path))
1042 seq_printf(seq, ",release_agent=%s", root->release_agent_path); 1047 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1048 if (clone_children(&root->top_cgroup))
1049 seq_puts(seq, ",clone_children");
1043 if (strlen(root->name)) 1050 if (strlen(root->name))
1044 seq_printf(seq, ",name=%s", root->name); 1051 seq_printf(seq, ",name=%s", root->name);
1045 mutex_unlock(&cgroup_mutex); 1052 mutex_unlock(&cgroup_mutex);
@@ -1050,6 +1057,7 @@ struct cgroup_sb_opts {
1050 unsigned long subsys_bits; 1057 unsigned long subsys_bits;
1051 unsigned long flags; 1058 unsigned long flags;
1052 char *release_agent; 1059 char *release_agent;
1060 bool clone_children;
1053 char *name; 1061 char *name;
1054 /* User explicitly requested empty subsystem */ 1062 /* User explicitly requested empty subsystem */
1055 bool none; 1063 bool none;
@@ -1066,7 +1074,8 @@ struct cgroup_sb_opts {
1066 */ 1074 */
1067static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) 1075static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1068{ 1076{
1069 char *token, *o = data ?: "all"; 1077 char *token, *o = data;
1078 bool all_ss = false, one_ss = false;
1070 unsigned long mask = (unsigned long)-1; 1079 unsigned long mask = (unsigned long)-1;
1071 int i; 1080 int i;
1072 bool module_pin_failed = false; 1081 bool module_pin_failed = false;
@@ -1082,22 +1091,27 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1082 while ((token = strsep(&o, ",")) != NULL) { 1091 while ((token = strsep(&o, ",")) != NULL) {
1083 if (!*token) 1092 if (!*token)
1084 return -EINVAL; 1093 return -EINVAL;
1085 if (!strcmp(token, "all")) { 1094 if (!strcmp(token, "none")) {
1086 /* Add all non-disabled subsystems */
1087 opts->subsys_bits = 0;
1088 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1089 struct cgroup_subsys *ss = subsys[i];
1090 if (ss == NULL)
1091 continue;
1092 if (!ss->disabled)
1093 opts->subsys_bits |= 1ul << i;
1094 }
1095 } else if (!strcmp(token, "none")) {
1096 /* Explicitly have no subsystems */ 1095 /* Explicitly have no subsystems */
1097 opts->none = true; 1096 opts->none = true;
1098 } else if (!strcmp(token, "noprefix")) { 1097 continue;
1098 }
1099 if (!strcmp(token, "all")) {
1100 /* Mutually exclusive option 'all' + subsystem name */
1101 if (one_ss)
1102 return -EINVAL;
1103 all_ss = true;
1104 continue;
1105 }
1106 if (!strcmp(token, "noprefix")) {
1099 set_bit(ROOT_NOPREFIX, &opts->flags); 1107 set_bit(ROOT_NOPREFIX, &opts->flags);
1100 } else if (!strncmp(token, "release_agent=", 14)) { 1108 continue;
1109 }
1110 if (!strcmp(token, "clone_children")) {
1111 opts->clone_children = true;
1112 continue;
1113 }
1114 if (!strncmp(token, "release_agent=", 14)) {
1101 /* Specifying two release agents is forbidden */ 1115 /* Specifying two release agents is forbidden */
1102 if (opts->release_agent) 1116 if (opts->release_agent)
1103 return -EINVAL; 1117 return -EINVAL;
@@ -1105,7 +1119,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1105 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); 1119 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1106 if (!opts->release_agent) 1120 if (!opts->release_agent)
1107 return -ENOMEM; 1121 return -ENOMEM;
1108 } else if (!strncmp(token, "name=", 5)) { 1122 continue;
1123 }
1124 if (!strncmp(token, "name=", 5)) {
1109 const char *name = token + 5; 1125 const char *name = token + 5;
1110 /* Can't specify an empty name */ 1126 /* Can't specify an empty name */
1111 if (!strlen(name)) 1127 if (!strlen(name))
@@ -1127,20 +1143,44 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1127 GFP_KERNEL); 1143 GFP_KERNEL);
1128 if (!opts->name) 1144 if (!opts->name)
1129 return -ENOMEM; 1145 return -ENOMEM;
1130 } else { 1146
1131 struct cgroup_subsys *ss; 1147 continue;
1132 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 1148 }
1133 ss = subsys[i]; 1149
1134 if (ss == NULL) 1150 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1135 continue; 1151 struct cgroup_subsys *ss = subsys[i];
1136 if (!strcmp(token, ss->name)) { 1152 if (ss == NULL)
1137 if (!ss->disabled) 1153 continue;
1138 set_bit(i, &opts->subsys_bits); 1154 if (strcmp(token, ss->name))
1139 break; 1155 continue;
1140 } 1156 if (ss->disabled)
1141 } 1157 continue;
1142 if (i == CGROUP_SUBSYS_COUNT) 1158
1143 return -ENOENT; 1159 /* Mutually exclusive option 'all' + subsystem name */
1160 if (all_ss)
1161 return -EINVAL;
1162 set_bit(i, &opts->subsys_bits);
1163 one_ss = true;
1164
1165 break;
1166 }
1167 if (i == CGROUP_SUBSYS_COUNT)
1168 return -ENOENT;
1169 }
1170
1171 /*
1172 * If the 'all' option was specified select all the subsystems,
1173 * otherwise 'all, 'none' and a subsystem name options were not
1174 * specified, let's default to 'all'
1175 */
1176 if (all_ss || (!all_ss && !one_ss && !opts->none)) {
1177 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1178 struct cgroup_subsys *ss = subsys[i];
1179 if (ss == NULL)
1180 continue;
1181 if (ss->disabled)
1182 continue;
1183 set_bit(i, &opts->subsys_bits);
1144 } 1184 }
1145 } 1185 }
1146 1186
@@ -1222,7 +1262,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1222 struct cgroup *cgrp = &root->top_cgroup; 1262 struct cgroup *cgrp = &root->top_cgroup;
1223 struct cgroup_sb_opts opts; 1263 struct cgroup_sb_opts opts;
1224 1264
1225 lock_kernel();
1226 mutex_lock(&cgrp->dentry->d_inode->i_mutex); 1265 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1227 mutex_lock(&cgroup_mutex); 1266 mutex_lock(&cgroup_mutex);
1228 1267
@@ -1255,7 +1294,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1255 kfree(opts.name); 1294 kfree(opts.name);
1256 mutex_unlock(&cgroup_mutex); 1295 mutex_unlock(&cgroup_mutex);
1257 mutex_unlock(&cgrp->dentry->d_inode->i_mutex); 1296 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1258 unlock_kernel();
1259 return ret; 1297 return ret;
1260} 1298}
1261 1299
@@ -1357,6 +1395,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1357 strcpy(root->release_agent_path, opts->release_agent); 1395 strcpy(root->release_agent_path, opts->release_agent);
1358 if (opts->name) 1396 if (opts->name)
1359 strcpy(root->name, opts->name); 1397 strcpy(root->name, opts->name);
1398 if (opts->clone_children)
1399 set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
1360 return root; 1400 return root;
1361} 1401}
1362 1402
@@ -1420,9 +1460,9 @@ static int cgroup_get_rootdir(struct super_block *sb)
1420 return 0; 1460 return 0;
1421} 1461}
1422 1462
1423static int cgroup_get_sb(struct file_system_type *fs_type, 1463static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1424 int flags, const char *unused_dev_name, 1464 int flags, const char *unused_dev_name,
1425 void *data, struct vfsmount *mnt) 1465 void *data)
1426{ 1466{
1427 struct cgroup_sb_opts opts; 1467 struct cgroup_sb_opts opts;
1428 struct cgroupfs_root *root; 1468 struct cgroupfs_root *root;
@@ -1556,10 +1596,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1556 drop_parsed_module_refcounts(opts.subsys_bits); 1596 drop_parsed_module_refcounts(opts.subsys_bits);
1557 } 1597 }
1558 1598
1559 simple_set_mnt(mnt, sb);
1560 kfree(opts.release_agent); 1599 kfree(opts.release_agent);
1561 kfree(opts.name); 1600 kfree(opts.name);
1562 return 0; 1601 return dget(sb->s_root);
1563 1602
1564 drop_new_super: 1603 drop_new_super:
1565 deactivate_locked_super(sb); 1604 deactivate_locked_super(sb);
@@ -1568,8 +1607,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1568 out_err: 1607 out_err:
1569 kfree(opts.release_agent); 1608 kfree(opts.release_agent);
1570 kfree(opts.name); 1609 kfree(opts.name);
1571 1610 return ERR_PTR(ret);
1572 return ret;
1573} 1611}
1574 1612
1575static void cgroup_kill_sb(struct super_block *sb) { 1613static void cgroup_kill_sb(struct super_block *sb) {
@@ -1619,7 +1657,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
1619 1657
1620static struct file_system_type cgroup_fs_type = { 1658static struct file_system_type cgroup_fs_type = {
1621 .name = "cgroup", 1659 .name = "cgroup",
1622 .get_sb = cgroup_get_sb, 1660 .mount = cgroup_mount,
1623 .kill_sb = cgroup_kill_sb, 1661 .kill_sb = cgroup_kill_sb,
1624}; 1662};
1625 1663
@@ -1883,6 +1921,8 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
1883 const char *buffer) 1921 const char *buffer)
1884{ 1922{
1885 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); 1923 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
1924 if (strlen(buffer) >= PATH_MAX)
1925 return -EINVAL;
1886 if (!cgroup_lock_live_group(cgrp)) 1926 if (!cgroup_lock_live_group(cgrp))
1887 return -ENODEV; 1927 return -ENODEV;
1888 strcpy(cgrp->root->release_agent_path, buffer); 1928 strcpy(cgrp->root->release_agent_path, buffer);
@@ -3176,6 +3216,23 @@ fail:
3176 return ret; 3216 return ret;
3177} 3217}
3178 3218
3219static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3220 struct cftype *cft)
3221{
3222 return clone_children(cgrp);
3223}
3224
3225static int cgroup_clone_children_write(struct cgroup *cgrp,
3226 struct cftype *cft,
3227 u64 val)
3228{
3229 if (val)
3230 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3231 else
3232 clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3233 return 0;
3234}
3235
3179/* 3236/*
3180 * for the common functions, 'private' gives the type of file 3237 * for the common functions, 'private' gives the type of file
3181 */ 3238 */
@@ -3206,6 +3263,11 @@ static struct cftype files[] = {
3206 .write_string = cgroup_write_event_control, 3263 .write_string = cgroup_write_event_control,
3207 .mode = S_IWUGO, 3264 .mode = S_IWUGO,
3208 }, 3265 },
3266 {
3267 .name = "cgroup.clone_children",
3268 .read_u64 = cgroup_clone_children_read,
3269 .write_u64 = cgroup_clone_children_write,
3270 },
3209}; 3271};
3210 3272
3211static struct cftype cft_release_agent = { 3273static struct cftype cft_release_agent = {
@@ -3335,6 +3397,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3335 if (notify_on_release(parent)) 3397 if (notify_on_release(parent))
3336 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); 3398 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3337 3399
3400 if (clone_children(parent))
3401 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3402
3338 for_each_subsys(root, ss) { 3403 for_each_subsys(root, ss) {
3339 struct cgroup_subsys_state *css = ss->create(ss, cgrp); 3404 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
3340 3405
@@ -3349,6 +3414,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3349 goto err_destroy; 3414 goto err_destroy;
3350 } 3415 }
3351 /* At error, ->destroy() callback has to free assigned ID. */ 3416 /* At error, ->destroy() callback has to free assigned ID. */
3417 if (clone_children(parent) && ss->post_clone)
3418 ss->post_clone(ss, cgrp);
3352 } 3419 }
3353 3420
3354 cgroup_lock_hierarchy(root); 3421 cgroup_lock_hierarchy(root);