aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2016-09-23 16:55:49 -0400
committerTejun Heo <tj@kernel.org>2016-09-23 16:55:49 -0400
commit9157056da8f8c4a6305f15619e269f164b63a6de (patch)
tree3b2cea5209fae8b62b46ab19c3ce4d4a897b9b40
parent8a15b81741879fa89601b03c0e50b0d780d65bc0 (diff)
cgroup: fix invalid controller enable rejections with cgroup namespace
On the v2 hierarchy, "cgroup.subtree_control" rejects controller enables if the cgroup has processes in it. The enforcement of this logic assumes that the cgroup wouldn't have any css_sets associated with it if there are no tasks in the cgroup, which is no longer true since a79a908fd2b0 ("cgroup: introduce cgroup namespaces"). When a cgroup namespace is created, it pins the css_set of the creating task to use it as the root css_set of the namespace. This extra reference stays as long as the namespace is around and makes "cgroup.subtree_control" think that the namespace root cgroup is not empty even when it is and thus reject controller enables. Fix it by making cgroup_subtree_control() walk and test emptiness of each css_set instead of testing whether the list_head is empty. While at it, update the comment of cgroup_task_count() to indicate that the returned value may be higher than the number of tasks, which has always been true due to temporary references and doesn't break anything. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Evgeny Vereshchagin <evvers@ya.ru> Cc: Serge E. Hallyn <serge.hallyn@ubuntu.com> Cc: Aditya Kali <adityakali@google.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: stable@vger.kernel.org # v4.6+ Fixes: a79a908fd2b0 ("cgroup: introduce cgroup namespaces") Link: https://github.com/systemd/systemd/pull/3589#issuecomment-249089541
-rw-r--r--kernel/cgroup.c29
1 files changed, 25 insertions, 4 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d1c51b7f5221..0d4ee1ea5c31 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3446,9 +3446,28 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
3446 * Except for the root, subtree_control must be zero for a cgroup 3446 * Except for the root, subtree_control must be zero for a cgroup
3447 * with tasks so that child cgroups don't compete against tasks. 3447 * with tasks so that child cgroups don't compete against tasks.
3448 */ 3448 */
3449 if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) { 3449 if (enable && cgroup_parent(cgrp)) {
3450 ret = -EBUSY; 3450 struct cgrp_cset_link *link;
3451 goto out_unlock; 3451
3452 /*
3453 * Because namespaces pin csets too, @cgrp->cset_links
3454 * might not be empty even when @cgrp is empty. Walk and
3455 * verify each cset.
3456 */
3457 spin_lock_irq(&css_set_lock);
3458
3459 ret = 0;
3460 list_for_each_entry(link, &cgrp->cset_links, cset_link) {
3461 if (css_set_populated(link->cset)) {
3462 ret = -EBUSY;
3463 break;
3464 }
3465 }
3466
3467 spin_unlock_irq(&css_set_lock);
3468
3469 if (ret)
3470 goto out_unlock;
3452 } 3471 }
3453 3472
3454 /* save and update control masks and prepare csses */ 3473 /* save and update control masks and prepare csses */
@@ -3899,7 +3918,9 @@ void cgroup_file_notify(struct cgroup_file *cfile)
3899 * cgroup_task_count - count the number of tasks in a cgroup. 3918 * cgroup_task_count - count the number of tasks in a cgroup.
3900 * @cgrp: the cgroup in question 3919 * @cgrp: the cgroup in question
3901 * 3920 *
3902 * Return the number of tasks in the cgroup. 3921 * Return the number of tasks in the cgroup. The returned number can be
3922 * higher than the actual number of tasks due to css_set references from
3923 * namespace roots and temporary usages.
3903 */ 3924 */
3904static int cgroup_task_count(const struct cgroup *cgrp) 3925static int cgroup_task_count(const struct cgroup *cgrp)
3905{ 3926{