aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoman Gushchin <guro@fb.com>2017-07-28 13:28:44 -0400
committerTejun Heo <tj@kernel.org>2017-08-02 15:05:20 -0400
commit1a926e0bbab83bae8207d05a533173425e0496d1 (patch)
tree8b0d77250135e254907205b4711e7a7b7174b78e
parent0679dee03c6d706d57145ea92c23d08fa10a1999 (diff)
cgroup: implement hierarchy limits
Creating cgroup hierearchies of unreasonable size can affect overall system performance. A user might want to limit the size of cgroup hierarchy. This is especially important if a user is delegating some cgroup sub-tree. To address this issue, introduce an ability to control the size of cgroup hierarchy. The cgroup.max.descendants control file allows to set the maximum allowed number of descendant cgroups. The cgroup.max.depth file controls the maximum depth of the cgroup tree. Both are single value r/w files, with "max" default value. The control files exist on each hierarchy level (including root). When a new cgroup is created, we check the total descendants and depth limits on each level, and if none of them are exceeded, a new cgroup is created. Only alive cgroups are counted, removed (dying) cgroups are ignored. Signed-off-by: Roman Gushchin <guro@fb.com> Suggested-by: Tejun Heo <tj@kernel.org> Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: Waiman Long <longman@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: kernel-team@fb.com Cc: cgroups@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org
-rw-r--r--Documentation/cgroup-v2.txt14
-rw-r--r--include/linux/cgroup-defs.h5
-rw-r--r--kernel/cgroup/cgroup.c126
3 files changed, 145 insertions, 0 deletions
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index dec5afdaa36d..46ec3f76211c 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -854,6 +854,20 @@ All cgroup core files are prefixed with "cgroup."
854 1 if the cgroup or its descendants contains any live 854 1 if the cgroup or its descendants contains any live
855 processes; otherwise, 0. 855 processes; otherwise, 0.
856 856
857 cgroup.max.descendants
858 A read-write single value files. The default is "max".
859
860 Maximum allowed number of descent cgroups.
861 If the actual number of descendants is equal or larger,
862 an attempt to create a new cgroup in the hierarchy will fail.
863
864 cgroup.max.depth
865 A read-write single value files. The default is "max".
866
867 Maximum allowed descent depth below the current cgroup.
868 If the actual descent depth is equal or larger,
869 an attempt to create a new child cgroup will fail.
870
857 871
858Controllers 872Controllers
859=========== 873===========
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 58b4c425a155..59e4ad9e7bac 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -273,13 +273,18 @@ struct cgroup {
273 */ 273 */
274 int level; 274 int level;
275 275
276 /* Maximum allowed descent tree depth */
277 int max_depth;
278
276 /* 279 /*
277 * Keep track of total numbers of visible and dying descent cgroups. 280 * Keep track of total numbers of visible and dying descent cgroups.
278 * Dying cgroups are cgroups which were deleted by a user, 281 * Dying cgroups are cgroups which were deleted by a user,
279 * but are still existing because someone else is holding a reference. 282 * but are still existing because someone else is holding a reference.
283 * max_descendants is a maximum allowed number of descent cgroups.
280 */ 284 */
281 int nr_descendants; 285 int nr_descendants;
282 int nr_dying_descendants; 286 int nr_dying_descendants;
287 int max_descendants;
283 288
284 /* 289 /*
285 * Each non-empty css_set associated with this cgroup contributes 290 * Each non-empty css_set associated with this cgroup contributes
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index cfdbb1e780de..0fd9134e1720 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1827,6 +1827,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1827 cgrp->self.cgroup = cgrp; 1827 cgrp->self.cgroup = cgrp;
1828 cgrp->self.flags |= CSS_ONLINE; 1828 cgrp->self.flags |= CSS_ONLINE;
1829 cgrp->dom_cgrp = cgrp; 1829 cgrp->dom_cgrp = cgrp;
1830 cgrp->max_descendants = INT_MAX;
1831 cgrp->max_depth = INT_MAX;
1830 1832
1831 for_each_subsys(ss, ssid) 1833 for_each_subsys(ss, ssid)
1832 INIT_LIST_HEAD(&cgrp->e_csets[ssid]); 1834 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
@@ -3209,6 +3211,92 @@ static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
3209 return ret ?: nbytes; 3211 return ret ?: nbytes;
3210} 3212}
3211 3213
3214static int cgroup_max_descendants_show(struct seq_file *seq, void *v)
3215{
3216 struct cgroup *cgrp = seq_css(seq)->cgroup;
3217 int descendants = READ_ONCE(cgrp->max_descendants);
3218
3219 if (descendants == INT_MAX)
3220 seq_puts(seq, "max\n");
3221 else
3222 seq_printf(seq, "%d\n", descendants);
3223
3224 return 0;
3225}
3226
3227static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of,
3228 char *buf, size_t nbytes, loff_t off)
3229{
3230 struct cgroup *cgrp;
3231 int descendants;
3232 ssize_t ret;
3233
3234 buf = strstrip(buf);
3235 if (!strcmp(buf, "max")) {
3236 descendants = INT_MAX;
3237 } else {
3238 ret = kstrtoint(buf, 0, &descendants);
3239 if (ret)
3240 return ret;
3241 }
3242
3243 if (descendants < 0 || descendants > INT_MAX)
3244 return -ERANGE;
3245
3246 cgrp = cgroup_kn_lock_live(of->kn, false);
3247 if (!cgrp)
3248 return -ENOENT;
3249
3250 cgrp->max_descendants = descendants;
3251
3252 cgroup_kn_unlock(of->kn);
3253
3254 return nbytes;
3255}
3256
3257static int cgroup_max_depth_show(struct seq_file *seq, void *v)
3258{
3259 struct cgroup *cgrp = seq_css(seq)->cgroup;
3260 int depth = READ_ONCE(cgrp->max_depth);
3261
3262 if (depth == INT_MAX)
3263 seq_puts(seq, "max\n");
3264 else
3265 seq_printf(seq, "%d\n", depth);
3266
3267 return 0;
3268}
3269
3270static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
3271 char *buf, size_t nbytes, loff_t off)
3272{
3273 struct cgroup *cgrp;
3274 ssize_t ret;
3275 int depth;
3276
3277 buf = strstrip(buf);
3278 if (!strcmp(buf, "max")) {
3279 depth = INT_MAX;
3280 } else {
3281 ret = kstrtoint(buf, 0, &depth);
3282 if (ret)
3283 return ret;
3284 }
3285
3286 if (depth < 0 || depth > INT_MAX)
3287 return -ERANGE;
3288
3289 cgrp = cgroup_kn_lock_live(of->kn, false);
3290 if (!cgrp)
3291 return -ENOENT;
3292
3293 cgrp->max_depth = depth;
3294
3295 cgroup_kn_unlock(of->kn);
3296
3297 return nbytes;
3298}
3299
3212static int cgroup_events_show(struct seq_file *seq, void *v) 3300static int cgroup_events_show(struct seq_file *seq, void *v)
3213{ 3301{
3214 seq_printf(seq, "populated %d\n", 3302 seq_printf(seq, "populated %d\n",
@@ -4309,6 +4397,16 @@ static struct cftype cgroup_base_files[] = {
4309 .file_offset = offsetof(struct cgroup, events_file), 4397 .file_offset = offsetof(struct cgroup, events_file),
4310 .seq_show = cgroup_events_show, 4398 .seq_show = cgroup_events_show,
4311 }, 4399 },
4400 {
4401 .name = "cgroup.max.descendants",
4402 .seq_show = cgroup_max_descendants_show,
4403 .write = cgroup_max_descendants_write,
4404 },
4405 {
4406 .name = "cgroup.max.depth",
4407 .seq_show = cgroup_max_depth_show,
4408 .write = cgroup_max_depth_write,
4409 },
4312 { } /* terminate */ 4410 { } /* terminate */
4313}; 4411};
4314 4412
@@ -4662,6 +4760,29 @@ out_free_cgrp:
4662 return ERR_PTR(ret); 4760 return ERR_PTR(ret);
4663} 4761}
4664 4762
4763static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
4764{
4765 struct cgroup *cgroup;
4766 int ret = false;
4767 int level = 1;
4768
4769 lockdep_assert_held(&cgroup_mutex);
4770
4771 for (cgroup = parent; cgroup; cgroup = cgroup_parent(cgroup)) {
4772 if (cgroup->nr_descendants >= cgroup->max_descendants)
4773 goto fail;
4774
4775 if (level > cgroup->max_depth)
4776 goto fail;
4777
4778 level++;
4779 }
4780
4781 ret = true;
4782fail:
4783 return ret;
4784}
4785
4665int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) 4786int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
4666{ 4787{
4667 struct cgroup *parent, *cgrp; 4788 struct cgroup *parent, *cgrp;
@@ -4676,6 +4797,11 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
4676 if (!parent) 4797 if (!parent)
4677 return -ENODEV; 4798 return -ENODEV;
4678 4799
4800 if (!cgroup_check_hierarchy_limits(parent)) {
4801 ret = -EAGAIN;
4802 goto out_unlock;
4803 }
4804
4679 cgrp = cgroup_create(parent); 4805 cgrp = cgroup_create(parent);
4680 if (IS_ERR(cgrp)) { 4806 if (IS_ERR(cgrp)) {
4681 ret = PTR_ERR(cgrp); 4807 ret = PTR_ERR(cgrp);