aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-09-13 15:20:58 -0400
committerTejun Heo <tj@kernel.org>2012-09-14 15:01:16 -0400
commit8c7f6edbda01f1b1a2e60ad61f14fe38023e433b (patch)
treef1db9712b109575cba86e37140e1e4f8a56ca780
parentfbcbe2b3c92ee1c930dcfcf8bb764074c100fd63 (diff)
cgroup: mark subsystems with broken hierarchy support and whine if cgroups are nested for them
Currently, cgroup hierarchy support is a mess. cpu related subsystems behave correctly - configuration, accounting and control on a parent properly cover its children. blkio and freezer completely ignore hierarchy and treat all cgroups as if they're directly under the root cgroup. Others show yet different behaviors. These differing interpretations of cgroup hierarchy make using cgroup confusing and it impossible to co-mount controllers into the same hierarchy and obtain sane behavior. Eventually, we want full hierarchy support from all subsystems and probably a unified hierarchy. Users using separate hierarchies expecting completely different behaviors depending on the mounted subsystem is deterimental to making any progress on this front. This patch adds cgroup_subsys.broken_hierarchy and sets it to %true for controllers which are lacking in hierarchy support. The goal of this patch is two-fold. * Move users away from using hierarchy on currently non-hierarchical subsystems, so that implementing proper hierarchy support on those doesn't surprise them. * Keep track of which controllers are broken how and nudge the subsystems to implement proper hierarchy support. For now, start with a single warning message. We can whine louder later on. v2: Fixed a typo spotted by Michal. Warning message updated. v3: Updated memcg part so that it doesn't generate warning in the cases where .use_hierarchy=false doesn't make the behavior different from root.use_hierarchy=true. Fixed a typo spotted by Glauber. v4: Check ->broken_hierarchy after cgroup creation is complete so that ->create() can affect the result per Michal. Dropped unnecessary memcg root handling per Michal. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Michal Hocko <mhocko@suse.cz> Acked-by: Li Zefan <lizefan@huawei.com> Acked-by: Serge E. Hallyn <serue@us.ibm.com> Cc: Glauber Costa <glommer@parallels.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul Turner <pjt@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Thomas Graf <tgraf@suug.ch> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Cc: Neil Horman <nhorman@tuxdriver.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
-rw-r--r--block/blk-cgroup.c8
-rw-r--r--include/linux/cgroup.h15
-rw-r--r--kernel/cgroup.c12
-rw-r--r--kernel/cgroup_freezer.c8
-rw-r--r--kernel/events/core.c7
-rw-r--r--mm/memcontrol.c7
-rw-r--r--net/core/netprio_cgroup.c12
-rw-r--r--net/sched/cls_cgroup.c9
-rw-r--r--security/device_cgroup.c9
9 files changed, 85 insertions, 2 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index f3b44a65fc7..cafcd743118 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -737,6 +737,14 @@ struct cgroup_subsys blkio_subsys = {
737 .subsys_id = blkio_subsys_id, 737 .subsys_id = blkio_subsys_id,
738 .base_cftypes = blkcg_files, 738 .base_cftypes = blkcg_files,
739 .module = THIS_MODULE, 739 .module = THIS_MODULE,
740
741 /*
742 * blkio subsystem is utterly broken in terms of hierarchy support.
743 * It treats all cgroups equally regardless of where they're
744 * located in the hierarchy - all cgroups are treated as if they're
745 * right below the root. Fix it and remove the following.
746 */
747 .broken_hierarchy = true,
740}; 748};
741EXPORT_SYMBOL_GPL(blkio_subsys); 749EXPORT_SYMBOL_GPL(blkio_subsys);
742 750
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c90eaa80344..68e8df70487 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -496,6 +496,21 @@ struct cgroup_subsys {
496 */ 496 */
497 bool __DEPRECATED_clear_css_refs; 497 bool __DEPRECATED_clear_css_refs;
498 498
499 /*
500 * If %false, this subsystem is properly hierarchical -
501 * configuration, resource accounting and restriction on a parent
502 * cgroup cover those of its children. If %true, hierarchy support
503 * is broken in some ways - some subsystems ignore hierarchy
504 * completely while others are only implemented half-way.
505 *
506 * It's now disallowed to create nested cgroups if the subsystem is
507 * broken and cgroup core will emit a warning message on such
508 * cases. Eventually, all subsystems will be made properly
509 * hierarchical and this will go away.
510 */
511 bool broken_hierarchy;
512 bool warned_broken_hierarchy;
513
499#define MAX_CGROUP_TYPE_NAMELEN 32 514#define MAX_CGROUP_TYPE_NAMELEN 32
500 const char *name; 515 const char *name;
501 516
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 79818507e44..b7d9606b17d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3954,8 +3954,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3954 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); 3954 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3955 3955
3956 for_each_subsys(root, ss) { 3956 for_each_subsys(root, ss) {
3957 struct cgroup_subsys_state *css = ss->create(cgrp); 3957 struct cgroup_subsys_state *css;
3958 3958
3959 css = ss->create(cgrp);
3959 if (IS_ERR(css)) { 3960 if (IS_ERR(css)) {
3960 err = PTR_ERR(css); 3961 err = PTR_ERR(css);
3961 goto err_destroy; 3962 goto err_destroy;
@@ -3969,6 +3970,15 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3969 /* At error, ->destroy() callback has to free assigned ID. */ 3970 /* At error, ->destroy() callback has to free assigned ID. */
3970 if (clone_children(parent) && ss->post_clone) 3971 if (clone_children(parent) && ss->post_clone)
3971 ss->post_clone(cgrp); 3972 ss->post_clone(cgrp);
3973
3974 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
3975 parent->parent) {
3976 pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
3977 current->comm, current->pid, ss->name);
3978 if (!strcmp(ss->name, "memory"))
3979 pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
3980 ss->warned_broken_hierarchy = true;
3981 }
3972 } 3982 }
3973 3983
3974 list_add(&cgrp->sibling, &cgrp->parent->children); 3984 list_add(&cgrp->sibling, &cgrp->parent->children);
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 3649fc6b3ea..b1724ce9898 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -373,4 +373,12 @@ struct cgroup_subsys freezer_subsys = {
373 .can_attach = freezer_can_attach, 373 .can_attach = freezer_can_attach,
374 .fork = freezer_fork, 374 .fork = freezer_fork,
375 .base_cftypes = files, 375 .base_cftypes = files,
376
377 /*
378 * freezer subsys doesn't handle hierarchy at all. Frozen state
379 * should be inherited through the hierarchy - if a parent is
380 * frozen, all its children should be frozen. Fix it and remove
381 * the following.
382 */
383 .broken_hierarchy = true,
376}; 384};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b7935fcec7d..f18a0a56e5a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7285,5 +7285,12 @@ struct cgroup_subsys perf_subsys = {
7285 .destroy = perf_cgroup_destroy, 7285 .destroy = perf_cgroup_destroy,
7286 .exit = perf_cgroup_exit, 7286 .exit = perf_cgroup_exit,
7287 .attach = perf_cgroup_attach, 7287 .attach = perf_cgroup_attach,
7288
7289 /*
7290 * perf_event cgroup doesn't handle nesting correctly.
7291 * ctx->nr_cgroups adjustments should be propagated through the
7292 * cgroup hierarchy. Fix it and remove the following.
7293 */
7294 .broken_hierarchy = true,
7288}; 7295};
7289#endif /* CONFIG_CGROUP_PERF */ 7296#endif /* CONFIG_CGROUP_PERF */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 795e525afab..a72f2ffdc3d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4973,6 +4973,13 @@ mem_cgroup_create(struct cgroup *cont)
4973 } else { 4973 } else {
4974 res_counter_init(&memcg->res, NULL); 4974 res_counter_init(&memcg->res, NULL);
4975 res_counter_init(&memcg->memsw, NULL); 4975 res_counter_init(&memcg->memsw, NULL);
4976 /*
4977 * Deeper hierachy with use_hierarchy == false doesn't make
4978 * much sense so let cgroup subsystem know about this
4979 * unfortunate state in our controller.
4980 */
4981 if (parent && parent != root_mem_cgroup)
4982 mem_cgroup_subsys.broken_hierarchy = true;
4976 } 4983 }
4977 memcg->last_scanned_node = MAX_NUMNODES; 4984 memcg->last_scanned_node = MAX_NUMNODES;
4978 INIT_LIST_HEAD(&memcg->oom_notify); 4985 INIT_LIST_HEAD(&memcg->oom_notify);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index c75e3f9d060..34f3615b30c 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -330,7 +330,17 @@ struct cgroup_subsys net_prio_subsys = {
330 .subsys_id = net_prio_subsys_id, 330 .subsys_id = net_prio_subsys_id,
331#endif 331#endif
332 .base_cftypes = ss_files, 332 .base_cftypes = ss_files,
333 .module = THIS_MODULE 333 .module = THIS_MODULE,
334
335 /*
336 * net_prio has artificial limit on the number of cgroups and
337 * disallows nesting making it impossible to co-mount it with other
338 * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ
339 * limit and properly nest configuration such that children follow
340 * their parents' configurations by default and are allowed to
341 * override and remove the following.
342 */
343 .broken_hierarchy = true,
334}; 344};
335 345
336static int netprio_device_event(struct notifier_block *unused, 346static int netprio_device_event(struct notifier_block *unused,
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 7743ea8d1d3..907daf99ab2 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -82,6 +82,15 @@ struct cgroup_subsys net_cls_subsys = {
82#endif 82#endif
83 .base_cftypes = ss_files, 83 .base_cftypes = ss_files,
84 .module = THIS_MODULE, 84 .module = THIS_MODULE,
85
86 /*
87 * While net_cls cgroup has the rudimentary hierarchy support of
88 * inheriting the parent's classid on cgroup creation, it doesn't
89 * properly propagates config changes in ancestors to their
90 * descendents. A child should follow the parent's configuration
91 * but be allowed to override it. Fix it and remove the following.
92 */
93 .broken_hierarchy = true,
85}; 94};
86 95
87struct cls_cgroup_head { 96struct cls_cgroup_head {
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 442204cc22d..4b877a92a7e 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -457,6 +457,15 @@ struct cgroup_subsys devices_subsys = {
457 .destroy = devcgroup_destroy, 457 .destroy = devcgroup_destroy,
458 .subsys_id = devices_subsys_id, 458 .subsys_id = devices_subsys_id,
459 .base_cftypes = dev_cgroup_files, 459 .base_cftypes = dev_cgroup_files,
460
461 /*
462 * While devices cgroup has the rudimentary hierarchy support which
463 * checks the parent's restriction, it doesn't properly propagates
464 * config changes in ancestors to their descendents. A child
465 * should only be allowed to add more restrictions to the parent's
466 * configuration. Fix it and remove the following.
467 */
468 .broken_hierarchy = true,
460}; 469};
461 470
462int __devcgroup_inode_permission(struct inode *inode, int mask) 471int __devcgroup_inode_permission(struct inode *inode, int mask)