aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDaniel Lezcano <daniel.lezcano@free.fr>2010-10-27 18:33:35 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-27 21:03:09 -0400
commit97978e6d1f2da0073416870410459694fbdbfd9b (patch)
treee8ff2fe4119d03fa54a45e8a101adbf9fb91a385 /kernel
parent2d3cbf8bc852ac1bc3d098186143c5973f87b753 (diff)
cgroup: add clone_children control file
The ns_cgroup is a control group interacting with the namespaces. When a new namespace is created, a corresponding cgroup is automatically created too. The cgroup name is the pid of the process who did 'unshare' or the child of 'clone'. This cgroup is tied with the namespace because it prevents a process to escape the control group and use the post_clone callback, so the child cgroup inherits the values of the parent cgroup. Unfortunately, the more we use this cgroup and the more we are facing problems with it: (1) when a process unshares, the cgroup name may conflict with a previous cgroup with the same pid, so unshare or clone return -EEXIST (2) the cgroup creation is out of control because there may have an application creating several namespaces where the system will automatically create several cgroups in his back and let them on the cgroupfs (eg. a vrf based on the network namespace). (3) the mix of (1) and (2) force an administrator to regularly check and clean these cgroups. This patchset removes the ns_cgroup by adding a new flag to the cgroup and the cgroupfs mount option. It enables the copy of the parent cgroup when a child cgroup is created. We can then safely remove the ns_cgroup as this flag brings a compatibility. We have now to manually create and add the task to a cgroup, which is consistent with the cgroup framework. This patch: Sent as an answer to a previous thread around the ns_cgroup. https://lists.linux-foundation.org/pipermail/containers/2009-June/018627.html It adds a control file 'clone_children' for a cgroup. This control file is a boolean specifying if the child cgroup should be a clone of the parent cgroup or not. The default value is 'false'. This flag makes the child cgroup to call the post_clone callback of all the subsystem, if it is available. At present, the cpuset is the only one which had implemented the post_clone callback. The option can be set at mount time by specifying the 'clone_children' mount option. Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr> Signed-off-by: Serge E. Hallyn <serge.hallyn@canonical.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Acked-by: Paul Menage <menage@google.com> Reviewed-by: Li Zefan <lizf@cn.fujitsu.com> Cc: Jamal Hadi Salim <hadi@cyberus.ca> Cc: Matt Helsley <matthltc@us.ibm.com> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c39
1 files changed, 39 insertions, 0 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9270d532ec3c..4b218a46ddd3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -243,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp)
243 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); 243 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
244} 244}
245 245
246static int clone_children(const struct cgroup *cgrp)
247{
248 return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
249}
250
246/* 251/*
247 * for_each_subsys() allows you to iterate on each subsystem attached to 252 * for_each_subsys() allows you to iterate on each subsystem attached to
248 * an active hierarchy 253 * an active hierarchy
@@ -1040,6 +1045,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
1040 seq_puts(seq, ",noprefix"); 1045 seq_puts(seq, ",noprefix");
1041 if (strlen(root->release_agent_path)) 1046 if (strlen(root->release_agent_path))
1042 seq_printf(seq, ",release_agent=%s", root->release_agent_path); 1047 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1048 if (clone_children(&root->top_cgroup))
1049 seq_puts(seq, ",clone_children");
1043 if (strlen(root->name)) 1050 if (strlen(root->name))
1044 seq_printf(seq, ",name=%s", root->name); 1051 seq_printf(seq, ",name=%s", root->name);
1045 mutex_unlock(&cgroup_mutex); 1052 mutex_unlock(&cgroup_mutex);
@@ -1050,6 +1057,7 @@ struct cgroup_sb_opts {
1050 unsigned long subsys_bits; 1057 unsigned long subsys_bits;
1051 unsigned long flags; 1058 unsigned long flags;
1052 char *release_agent; 1059 char *release_agent;
1060 bool clone_children;
1053 char *name; 1061 char *name;
1054 /* User explicitly requested empty subsystem */ 1062 /* User explicitly requested empty subsystem */
1055 bool none; 1063 bool none;
@@ -1097,6 +1105,8 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1097 opts->none = true; 1105 opts->none = true;
1098 } else if (!strcmp(token, "noprefix")) { 1106 } else if (!strcmp(token, "noprefix")) {
1099 set_bit(ROOT_NOPREFIX, &opts->flags); 1107 set_bit(ROOT_NOPREFIX, &opts->flags);
1108 } else if (!strcmp(token, "clone_children")) {
1109 opts->clone_children = true;
1100 } else if (!strncmp(token, "release_agent=", 14)) { 1110 } else if (!strncmp(token, "release_agent=", 14)) {
1101 /* Specifying two release agents is forbidden */ 1111 /* Specifying two release agents is forbidden */
1102 if (opts->release_agent) 1112 if (opts->release_agent)
@@ -1355,6 +1365,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1355 strcpy(root->release_agent_path, opts->release_agent); 1365 strcpy(root->release_agent_path, opts->release_agent);
1356 if (opts->name) 1366 if (opts->name)
1357 strcpy(root->name, opts->name); 1367 strcpy(root->name, opts->name);
1368 if (opts->clone_children)
1369 set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
1358 return root; 1370 return root;
1359} 1371}
1360 1372
@@ -3173,6 +3185,23 @@ fail:
3173 return ret; 3185 return ret;
3174} 3186}
3175 3187
3188static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3189 struct cftype *cft)
3190{
3191 return clone_children(cgrp);
3192}
3193
3194static int cgroup_clone_children_write(struct cgroup *cgrp,
3195 struct cftype *cft,
3196 u64 val)
3197{
3198 if (val)
3199 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3200 else
3201 clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3202 return 0;
3203}
3204
3176/* 3205/*
3177 * for the common functions, 'private' gives the type of file 3206 * for the common functions, 'private' gives the type of file
3178 */ 3207 */
@@ -3203,6 +3232,11 @@ static struct cftype files[] = {
3203 .write_string = cgroup_write_event_control, 3232 .write_string = cgroup_write_event_control,
3204 .mode = S_IWUGO, 3233 .mode = S_IWUGO,
3205 }, 3234 },
3235 {
3236 .name = "cgroup.clone_children",
3237 .read_u64 = cgroup_clone_children_read,
3238 .write_u64 = cgroup_clone_children_write,
3239 },
3206}; 3240};
3207 3241
3208static struct cftype cft_release_agent = { 3242static struct cftype cft_release_agent = {
@@ -3332,6 +3366,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3332 if (notify_on_release(parent)) 3366 if (notify_on_release(parent))
3333 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); 3367 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3334 3368
3369 if (clone_children(parent))
3370 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3371
3335 for_each_subsys(root, ss) { 3372 for_each_subsys(root, ss) {
3336 struct cgroup_subsys_state *css = ss->create(ss, cgrp); 3373 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
3337 3374
@@ -3346,6 +3383,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3346 goto err_destroy; 3383 goto err_destroy;
3347 } 3384 }
3348 /* At error, ->destroy() callback has to free assigned ID. */ 3385 /* At error, ->destroy() callback has to free assigned ID. */
3386 if (clone_children(parent) && ss->post_clone)
3387 ss->post_clone(ss, cgrp);
3349 } 3388 }
3350 3389
3351 cgroup_lock_hierarchy(root); 3390 cgroup_lock_hierarchy(root);