aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-04-14 23:15:26 -0400
committerTejun Heo <tj@kernel.org>2013-04-14 23:15:26 -0400
commit873fe09ea5df6ccf6bb34811d8c9992aacb67598 (patch)
treeef8d3afc31460ed9fdfd16e70e4c0916d42bcbd6
parent25a7e6848db76e22677aff202d9c4ef3503be15b (diff)
cgroup: introduce sane_behavior mount option
It's a sad fact that at this point various cgroup controllers are carrying so many idiosyncrasies and pure insanities that it simply isn't possible to reach any sort of sane consistent behavior while maintaining staying fully compatible with what already has been exposed to userland. As we can't break exposed userland interface, transitioning to sane behaviors can only be done in steps while maintaining backwards compatibility. This patch introduces a new mount option - __DEVEL__sane_behavior - which disables crazy features and enforces consistent behaviors in cgroup core proper and various controllers. As exactly which behaviors it changes are still being determined, the mount option, at this point, is useful only for development of the new behaviors. As such, the mount option is prefixed with __DEVEL__ and generates a warning message when used. Eventually, once we get to the point where all controller's behaviors are consistent enough to implement unified hierarchy, the __DEVEL__ prefix will be dropped, and more importantly, unified-hierarchy will enforce sane_behavior by default. Maybe we'll able to completely drop the crazy stuff after a while, maybe not, but we at least have a strategy to move on to saner behaviors. This patch introduces the mount option and changes the following behaviors in cgroup core. * Mount options "noprefix" and "clone_children" are disallowed. Also, cgroupfs file cgroup.clone_children is not created. * When mounting an existing superblock, mount options should match. This is currently pretty crazy. If one mounts a cgroup, creates a subdirectory, unmounts it and then mount it again with different option, it looks like the new options are applied but they aren't. * Remount is disallowed. The behaviors changes are documented in the comment above CGRP_ROOT_SANE_BEHAVIOR enum and will be expanded as different controllers are converted and planned improvements progress. v2: Dropped unnecessary explicit file permission setting sane_behavior cftype entry as suggested by Li Zefan. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com> Acked-by: Li Zefan <lizefan@huawei.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: Vivek Goyal <vgoyal@redhat.com>
-rw-r--r--include/linux/cgroup.h43
-rw-r--r--kernel/cgroup.c48
2 files changed, 91 insertions, 0 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b21881e1ea08..9c300ad9a911 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -156,6 +156,8 @@ enum {
156 * specified at mount time and thus is implemented here. 156 * specified at mount time and thus is implemented here.
157 */ 157 */
158 CGRP_CPUSET_CLONE_CHILDREN, 158 CGRP_CPUSET_CLONE_CHILDREN,
159 /* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
160 CGRP_SANE_BEHAVIOR,
159}; 161};
160 162
161struct cgroup_name { 163struct cgroup_name {
@@ -243,6 +245,37 @@ struct cgroup {
243 245
244/* cgroupfs_root->flags */ 246/* cgroupfs_root->flags */
245enum { 247enum {
248 /*
249 * Unfortunately, cgroup core and various controllers are riddled
250 * with idiosyncrasies and pointless options. The following flag,
251 * when set, will force sane behavior - some options are forced on,
252 * others are disallowed, and some controllers will change their
253 * hierarchical or other behaviors.
254 *
255 * The set of behaviors affected by this flag are still being
256 * determined and developed and the mount option for this flag is
257 * prefixed with __DEVEL__. The prefix will be dropped once we
258 * reach the point where all behaviors are compatible with the
259 * planned unified hierarchy, which will automatically turn on this
260 * flag.
261 *
262 * The followings are the behaviors currently affected this flag.
263 *
264 * - Mount options "noprefix" and "clone_children" are disallowed.
265 * Also, cgroupfs file cgroup.clone_children is not created.
266 *
267 * - When mounting an existing superblock, mount options should
268 * match.
269 *
270 * - Remount is disallowed.
271 *
272 * The followings are planned changes.
273 *
274 * - release_agent will be disallowed once replacement notification
275 * mechanism is implemented.
276 */
277 CGRP_ROOT_SANE_BEHAVIOR = (1 << 0),
278
246 CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ 279 CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */
247 CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ 280 CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */
248}; 281};
@@ -360,6 +393,7 @@ struct cgroup_map_cb {
360/* cftype->flags */ 393/* cftype->flags */
361#define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ 394#define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */
362#define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */ 395#define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */
396#define CFTYPE_INSANE (1U << 2) /* don't create if sane_behavior */
363 397
364#define MAX_CFTYPE_NAME 64 398#define MAX_CFTYPE_NAME 64
365 399
@@ -486,6 +520,15 @@ struct cgroup_scanner {
486 void *data; 520 void *data;
487}; 521};
488 522
523/*
524 * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This
525 * function can be called as long as @cgrp is accessible.
526 */
527static inline bool cgroup_sane_behavior(const struct cgroup *cgrp)
528{
529 return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR;
530}
531
489/* Caller should hold rcu_read_lock() */ 532/* Caller should hold rcu_read_lock() */
490static inline const char *cgroup_name(const struct cgroup *cgrp) 533static inline const char *cgroup_name(const struct cgroup *cgrp)
491{ 534{
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8b8eb7c168ff..67804590d4b0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1080,6 +1080,8 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1080 mutex_lock(&cgroup_root_mutex); 1080 mutex_lock(&cgroup_root_mutex);
1081 for_each_subsys(root, ss) 1081 for_each_subsys(root, ss)
1082 seq_printf(seq, ",%s", ss->name); 1082 seq_printf(seq, ",%s", ss->name);
1083 if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
1084 seq_puts(seq, ",sane_behavior");
1083 if (root->flags & CGRP_ROOT_NOPREFIX) 1085 if (root->flags & CGRP_ROOT_NOPREFIX)
1084 seq_puts(seq, ",noprefix"); 1086 seq_puts(seq, ",noprefix");
1085 if (root->flags & CGRP_ROOT_XATTR) 1087 if (root->flags & CGRP_ROOT_XATTR)
@@ -1144,6 +1146,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1144 all_ss = true; 1146 all_ss = true;
1145 continue; 1147 continue;
1146 } 1148 }
1149 if (!strcmp(token, "__DEVEL__sane_behavior")) {
1150 opts->flags |= CGRP_ROOT_SANE_BEHAVIOR;
1151 continue;
1152 }
1147 if (!strcmp(token, "noprefix")) { 1153 if (!strcmp(token, "noprefix")) {
1148 opts->flags |= CGRP_ROOT_NOPREFIX; 1154 opts->flags |= CGRP_ROOT_NOPREFIX;
1149 continue; 1155 continue;
@@ -1231,6 +1237,20 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1231 1237
1232 /* Consistency checks */ 1238 /* Consistency checks */
1233 1239
1240 if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
1241 pr_warning("cgroup: sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
1242
1243 if (opts->flags & CGRP_ROOT_NOPREFIX) {
1244 pr_err("cgroup: sane_behavior: noprefix is not allowed\n");
1245 return -EINVAL;
1246 }
1247
1248 if (opts->cpuset_clone_children) {
1249 pr_err("cgroup: sane_behavior: clone_children is not allowed\n");
1250 return -EINVAL;
1251 }
1252 }
1253
1234 /* 1254 /*
1235 * Option noprefix was introduced just for backward compatibility 1255 * Option noprefix was introduced just for backward compatibility
1236 * with the old cpuset, so we allow noprefix only if mounting just 1256 * with the old cpuset, so we allow noprefix only if mounting just
@@ -1307,6 +1327,11 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1307 struct cgroup_sb_opts opts; 1327 struct cgroup_sb_opts opts;
1308 unsigned long added_mask, removed_mask; 1328 unsigned long added_mask, removed_mask;
1309 1329
1330 if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) {
1331 pr_err("cgroup: sane_behavior: remount is not allowed\n");
1332 return -EINVAL;
1333 }
1334
1310 mutex_lock(&cgrp->dentry->d_inode->i_mutex); 1335 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1311 mutex_lock(&cgroup_mutex); 1336 mutex_lock(&cgroup_mutex);
1312 mutex_lock(&cgroup_root_mutex); 1337 mutex_lock(&cgroup_root_mutex);
@@ -1657,6 +1682,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1657 * any) is not needed 1682 * any) is not needed
1658 */ 1683 */
1659 cgroup_drop_root(opts.new_root); 1684 cgroup_drop_root(opts.new_root);
1685
1686 if (((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) &&
1687 root->flags != opts.flags) {
1688 pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
1689 ret = -EINVAL;
1690 goto drop_new_super;
1691 }
1692
1660 /* no subsys rebinding, so refcounts don't change */ 1693 /* no subsys rebinding, so refcounts don't change */
1661 drop_parsed_module_refcounts(opts.subsys_mask); 1694 drop_parsed_module_refcounts(opts.subsys_mask);
1662 } 1695 }
@@ -2200,6 +2233,13 @@ static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
2200 return 0; 2233 return 0;
2201} 2234}
2202 2235
2236static int cgroup_sane_behavior_show(struct cgroup *cgrp, struct cftype *cft,
2237 struct seq_file *seq)
2238{
2239 seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
2240 return 0;
2241}
2242
2203/* A buffer size big enough for numbers or short strings */ 2243/* A buffer size big enough for numbers or short strings */
2204#define CGROUP_LOCAL_BUFFER_SIZE 64 2244#define CGROUP_LOCAL_BUFFER_SIZE 64
2205 2245
@@ -2681,6 +2721,8 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2681 2721
2682 for (cft = cfts; cft->name[0] != '\0'; cft++) { 2722 for (cft = cfts; cft->name[0] != '\0'; cft++) {
2683 /* does cft->flags tell us to skip this file on @cgrp? */ 2723 /* does cft->flags tell us to skip this file on @cgrp? */
2724 if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
2725 continue;
2684 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) 2726 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
2685 continue; 2727 continue;
2686 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) 2728 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
@@ -3918,10 +3960,16 @@ static struct cftype files[] = {
3918 }, 3960 },
3919 { 3961 {
3920 .name = "cgroup.clone_children", 3962 .name = "cgroup.clone_children",
3963 .flags = CFTYPE_INSANE,
3921 .read_u64 = cgroup_clone_children_read, 3964 .read_u64 = cgroup_clone_children_read,
3922 .write_u64 = cgroup_clone_children_write, 3965 .write_u64 = cgroup_clone_children_write,
3923 }, 3966 },
3924 { 3967 {
3968 .name = "cgroup.sane_behavior",
3969 .flags = CFTYPE_ONLY_ON_ROOT,
3970 .read_seq_string = cgroup_sane_behavior_show,
3971 },
3972 {
3925 .name = "release_agent", 3973 .name = "release_agent",
3926 .flags = CFTYPE_ONLY_ON_ROOT, 3974 .flags = CFTYPE_ONLY_ON_ROOT,
3927 .read_seq_string = cgroup_release_agent_show, 3975 .read_seq_string = cgroup_release_agent_show,