diff options
author | Tejun Heo <tj@kernel.org> | 2016-09-23 16:55:49 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2016-09-23 16:55:49 -0400 |
commit | 9157056da8f8c4a6305f15619e269f164b63a6de (patch) | |
tree | 3b2cea5209fae8b62b46ab19c3ce4d4a897b9b40 | |
parent | 8a15b81741879fa89601b03c0e50b0d780d65bc0 (diff) |
cgroup: fix invalid controller enable rejections with cgroup namespace
On the v2 hierarchy, "cgroup.subtree_control" rejects controller
enables if the cgroup has processes in it. The enforcement of this
logic assumes that the cgroup wouldn't have any css_sets associated
with it if there are no tasks in the cgroup, which is no longer true
since a79a908fd2b0 ("cgroup: introduce cgroup namespaces").
When a cgroup namespace is created, it pins the css_set of the
creating task to use it as the root css_set of the namespace. This
extra reference stays as long as the namespace is around and makes
"cgroup.subtree_control" think that the namespace root cgroup is not
empty even when it is and thus reject controller enables.
Fix it by making cgroup_subtree_control() walk and test emptiness of
each css_set instead of testing whether the list_head is empty.
While at it, update the comment of cgroup_task_count() to indicate
that the returned value may be higher than the number of tasks, which
has always been true due to temporary references and doesn't break
anything.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Evgeny Vereshchagin <evvers@ya.ru>
Cc: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Cc: Aditya Kali <adityakali@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: stable@vger.kernel.org # v4.6+
Fixes: a79a908fd2b0 ("cgroup: introduce cgroup namespaces")
Link: https://github.com/systemd/systemd/pull/3589#issuecomment-249089541
-rw-r--r-- | kernel/cgroup.c | 29 |
1 files changed, 25 insertions, 4 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d1c51b7f5221..0d4ee1ea5c31 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -3446,9 +3446,28 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
3446 | * Except for the root, subtree_control must be zero for a cgroup | 3446 | * Except for the root, subtree_control must be zero for a cgroup |
3447 | * with tasks so that child cgroups don't compete against tasks. | 3447 | * with tasks so that child cgroups don't compete against tasks. |
3448 | */ | 3448 | */ |
3449 | if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) { | 3449 | if (enable && cgroup_parent(cgrp)) { |
3450 | ret = -EBUSY; | 3450 | struct cgrp_cset_link *link; |
3451 | goto out_unlock; | 3451 | |
3452 | /* | ||
3453 | * Because namespaces pin csets too, @cgrp->cset_links | ||
3454 | * might not be empty even when @cgrp is empty. Walk and | ||
3455 | * verify each cset. | ||
3456 | */ | ||
3457 | spin_lock_irq(&css_set_lock); | ||
3458 | |||
3459 | ret = 0; | ||
3460 | list_for_each_entry(link, &cgrp->cset_links, cset_link) { | ||
3461 | if (css_set_populated(link->cset)) { | ||
3462 | ret = -EBUSY; | ||
3463 | break; | ||
3464 | } | ||
3465 | } | ||
3466 | |||
3467 | spin_unlock_irq(&css_set_lock); | ||
3468 | |||
3469 | if (ret) | ||
3470 | goto out_unlock; | ||
3452 | } | 3471 | } |
3453 | 3472 | ||
3454 | /* save and update control masks and prepare csses */ | 3473 | /* save and update control masks and prepare csses */ |
@@ -3899,7 +3918,9 @@ void cgroup_file_notify(struct cgroup_file *cfile) | |||
3899 | * cgroup_task_count - count the number of tasks in a cgroup. | 3918 | * cgroup_task_count - count the number of tasks in a cgroup. |
3900 | * @cgrp: the cgroup in question | 3919 | * @cgrp: the cgroup in question |
3901 | * | 3920 | * |
3902 | * Return the number of tasks in the cgroup. | 3921 | * Return the number of tasks in the cgroup. The returned number can be |
3922 | * higher than the actual number of tasks due to css_set references from | ||
3923 | * namespace roots and temporary usages. | ||
3903 | */ | 3924 | */ |
3904 | static int cgroup_task_count(const struct cgroup *cgrp) | 3925 | static int cgroup_task_count(const struct cgroup *cgrp) |
3905 | { | 3926 | { |