aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Lezcano <daniel.lezcano@free.fr>2010-10-27 18:33:35 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-27 21:03:09 -0400
commit97978e6d1f2da0073416870410459694fbdbfd9b (patch)
treee8ff2fe4119d03fa54a45e8a101adbf9fb91a385
parent2d3cbf8bc852ac1bc3d098186143c5973f87b753 (diff)
cgroup: add clone_children control file
The ns_cgroup is a control group interacting with the namespaces. When a new namespace is created, a corresponding cgroup is automatically created too. The cgroup name is the pid of the process who did 'unshare' or the child of 'clone'. This cgroup is tied with the namespace because it prevents a process to escape the control group and use the post_clone callback, so the child cgroup inherits the values of the parent cgroup. Unfortunately, the more we use this cgroup and the more we are facing problems with it: (1) when a process unshares, the cgroup name may conflict with a previous cgroup with the same pid, so unshare or clone return -EEXIST (2) the cgroup creation is out of control because there may have an application creating several namespaces where the system will automatically create several cgroups in his back and let them on the cgroupfs (eg. a vrf based on the network namespace). (3) the mix of (1) and (2) force an administrator to regularly check and clean these cgroups. This patchset removes the ns_cgroup by adding a new flag to the cgroup and the cgroupfs mount option. It enables the copy of the parent cgroup when a child cgroup is created. We can then safely remove the ns_cgroup as this flag brings a compatibility. We have now to manually create and add the task to a cgroup, which is consistent with the cgroup framework. This patch: Sent as an answer to a previous thread around the ns_cgroup. https://lists.linux-foundation.org/pipermail/containers/2009-June/018627.html It adds a control file 'clone_children' for a cgroup. This control file is a boolean specifying if the child cgroup should be a clone of the parent cgroup or not. The default value is 'false'. This flag makes the child cgroup to call the post_clone callback of all the subsystem, if it is available. At present, the cpuset is the only one which had implemented the post_clone callback. The option can be set at mount time by specifying the 'clone_children' mount option. Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr> Signed-off-by: Serge E. Hallyn <serge.hallyn@canonical.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Acked-by: Paul Menage <menage@google.com> Reviewed-by: Li Zefan <lizf@cn.fujitsu.com> Cc: Jamal Hadi Salim <hadi@cyberus.ca> Cc: Matt Helsley <matthltc@us.ibm.com> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/cgroups/cgroups.txt14
-rw-r--r--include/linux/cgroup.h4
-rw-r--r--kernel/cgroup.c39
3 files changed, 55 insertions, 2 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index b34823ff1646..190018b0c649 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -18,7 +18,8 @@ CONTENTS:
18 1.2 Why are cgroups needed ? 18 1.2 Why are cgroups needed ?
19 1.3 How are cgroups implemented ? 19 1.3 How are cgroups implemented ?
20 1.4 What does notify_on_release do ? 20 1.4 What does notify_on_release do ?
21 1.5 How do I use cgroups ? 21 1.5 What does clone_children do ?
22 1.6 How do I use cgroups ?
222. Usage Examples and Syntax 232. Usage Examples and Syntax
23 2.1 Basic Usage 24 2.1 Basic Usage
24 2.2 Attaching processes 25 2.2 Attaching processes
@@ -293,7 +294,16 @@ notify_on_release in the root cgroup at system boot is disabled
293value of their parents notify_on_release setting. The default value of 294value of their parents notify_on_release setting. The default value of
294a cgroup hierarchy's release_agent path is empty. 295a cgroup hierarchy's release_agent path is empty.
295 296
2961.5 How do I use cgroups ? 2971.5 What does clone_children do ?
298---------------------------------
299
300If the clone_children flag is enabled (1) in a cgroup, then all
301cgroups created beneath will call the post_clone callbacks for each
302subsystem of the newly created cgroup. Usually when this callback is
303implemented for a subsystem, it copies the values of the parent
304subsystem, this is the case for the cpuset.
305
3061.6 How do I use cgroups ?
297-------------------------- 307--------------------------
298 308
299To start a new job that is to be contained within a cgroup, using 309To start a new job that is to be contained within a cgroup, using
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 709dfb901d11..ed4ba111bc8d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -154,6 +154,10 @@ enum {
154 * A thread in rmdir() is wating for this cgroup. 154 * A thread in rmdir() is wating for this cgroup.
155 */ 155 */
156 CGRP_WAIT_ON_RMDIR, 156 CGRP_WAIT_ON_RMDIR,
157 /*
158 * Clone cgroup values when creating a new child cgroup
159 */
160 CGRP_CLONE_CHILDREN,
157}; 161};
158 162
159/* which pidlist file are we talking about? */ 163/* which pidlist file are we talking about? */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9270d532ec3c..4b218a46ddd3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -243,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp)
243 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); 243 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
244} 244}
245 245
246static int clone_children(const struct cgroup *cgrp)
247{
248 return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
249}
250
246/* 251/*
247 * for_each_subsys() allows you to iterate on each subsystem attached to 252 * for_each_subsys() allows you to iterate on each subsystem attached to
248 * an active hierarchy 253 * an active hierarchy
@@ -1040,6 +1045,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
1040 seq_puts(seq, ",noprefix"); 1045 seq_puts(seq, ",noprefix");
1041 if (strlen(root->release_agent_path)) 1046 if (strlen(root->release_agent_path))
1042 seq_printf(seq, ",release_agent=%s", root->release_agent_path); 1047 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1048 if (clone_children(&root->top_cgroup))
1049 seq_puts(seq, ",clone_children");
1043 if (strlen(root->name)) 1050 if (strlen(root->name))
1044 seq_printf(seq, ",name=%s", root->name); 1051 seq_printf(seq, ",name=%s", root->name);
1045 mutex_unlock(&cgroup_mutex); 1052 mutex_unlock(&cgroup_mutex);
@@ -1050,6 +1057,7 @@ struct cgroup_sb_opts {
1050 unsigned long subsys_bits; 1057 unsigned long subsys_bits;
1051 unsigned long flags; 1058 unsigned long flags;
1052 char *release_agent; 1059 char *release_agent;
1060 bool clone_children;
1053 char *name; 1061 char *name;
1054 /* User explicitly requested empty subsystem */ 1062 /* User explicitly requested empty subsystem */
1055 bool none; 1063 bool none;
@@ -1097,6 +1105,8 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1097 opts->none = true; 1105 opts->none = true;
1098 } else if (!strcmp(token, "noprefix")) { 1106 } else if (!strcmp(token, "noprefix")) {
1099 set_bit(ROOT_NOPREFIX, &opts->flags); 1107 set_bit(ROOT_NOPREFIX, &opts->flags);
1108 } else if (!strcmp(token, "clone_children")) {
1109 opts->clone_children = true;
1100 } else if (!strncmp(token, "release_agent=", 14)) { 1110 } else if (!strncmp(token, "release_agent=", 14)) {
1101 /* Specifying two release agents is forbidden */ 1111 /* Specifying two release agents is forbidden */
1102 if (opts->release_agent) 1112 if (opts->release_agent)
@@ -1355,6 +1365,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1355 strcpy(root->release_agent_path, opts->release_agent); 1365 strcpy(root->release_agent_path, opts->release_agent);
1356 if (opts->name) 1366 if (opts->name)
1357 strcpy(root->name, opts->name); 1367 strcpy(root->name, opts->name);
1368 if (opts->clone_children)
1369 set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
1358 return root; 1370 return root;
1359} 1371}
1360 1372
@@ -3173,6 +3185,23 @@ fail:
3173 return ret; 3185 return ret;
3174} 3186}
3175 3187
3188static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3189 struct cftype *cft)
3190{
3191 return clone_children(cgrp);
3192}
3193
3194static int cgroup_clone_children_write(struct cgroup *cgrp,
3195 struct cftype *cft,
3196 u64 val)
3197{
3198 if (val)
3199 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3200 else
3201 clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3202 return 0;
3203}
3204
3176/* 3205/*
3177 * for the common functions, 'private' gives the type of file 3206 * for the common functions, 'private' gives the type of file
3178 */ 3207 */
@@ -3203,6 +3232,11 @@ static struct cftype files[] = {
3203 .write_string = cgroup_write_event_control, 3232 .write_string = cgroup_write_event_control,
3204 .mode = S_IWUGO, 3233 .mode = S_IWUGO,
3205 }, 3234 },
3235 {
3236 .name = "cgroup.clone_children",
3237 .read_u64 = cgroup_clone_children_read,
3238 .write_u64 = cgroup_clone_children_write,
3239 },
3206}; 3240};
3207 3241
3208static struct cftype cft_release_agent = { 3242static struct cftype cft_release_agent = {
@@ -3332,6 +3366,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3332 if (notify_on_release(parent)) 3366 if (notify_on_release(parent))
3333 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); 3367 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3334 3368
3369 if (clone_children(parent))
3370 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3371
3335 for_each_subsys(root, ss) { 3372 for_each_subsys(root, ss) {
3336 struct cgroup_subsys_state *css = ss->create(ss, cgrp); 3373 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
3337 3374
@@ -3346,6 +3383,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3346 goto err_destroy; 3383 goto err_destroy;
3347 } 3384 }
3348 /* At error, ->destroy() callback has to free assigned ID. */ 3385 /* At error, ->destroy() callback has to free assigned ID. */
3386 if (clone_children(parent) && ss->post_clone)
3387 ss->post_clone(ss, cgrp);
3349 } 3388 }
3350 3389
3351 cgroup_lock_hierarchy(root); 3390 cgroup_lock_hierarchy(root);