diff options
author | Hugh Dickins <hughd@google.com> | 2013-08-28 19:31:23 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2013-08-29 11:05:07 -0400 |
commit | bb78a92f47696b2da49f2692b6a9fa56d07c444a (patch) | |
tree | 6750776493dcfd0cd180b7fc0a666406708547c8 /kernel | |
parent | c95389b4cd6a4b52af78bea706a274453e886251 (diff) |
cgroup: fix rmdir EBUSY regression in 3.11
On 3.11-rc we are seeing cgroup directories left behind when they should
have been removed. Here's a trivial reproducer:
cd /sys/fs/cgroup/memory
mkdir parent parent/child; rmdir parent/child parent
rmdir: failed to remove `parent': Device or resource busy
It's because cgroup_destroy_locked() (step 1 of destruction) leaves
cgroup on parent's children list, letting cgroup_offline_fn() (step 2 of
destruction) remove it; but step 2 is run by work queue, which may not
yet have removed the children when parent destruction checks the list.
Fix that by checking through a non-empty list of children: if every one
of them has already been marked CGRP_DEAD, then it's safe to proceed:
those children are invisible to userspace, and should not obstruct rmdir.
(I didn't see any reason to keep the cgrp->children checks under the
unrelated css_set_lock, so moved them out.)
tj: Flattened nested ifs a bit and updated comment so that it's
correct on both for-3.11-fixes and for-3.12.
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 19 |
1 files changed, 18 insertions, 1 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 781845a013ab..e91963302c0d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -4480,6 +4480,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4480 | struct dentry *d = cgrp->dentry; | 4480 | struct dentry *d = cgrp->dentry; |
4481 | struct cgroup_event *event, *tmp; | 4481 | struct cgroup_event *event, *tmp; |
4482 | struct cgroup_subsys *ss; | 4482 | struct cgroup_subsys *ss; |
4483 | struct cgroup *child; | ||
4483 | bool empty; | 4484 | bool empty; |
4484 | 4485 | ||
4485 | lockdep_assert_held(&d->d_inode->i_mutex); | 4486 | lockdep_assert_held(&d->d_inode->i_mutex); |
@@ -4490,12 +4491,28 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4490 | * @cgrp from being removed while __put_css_set() is in progress. | 4491 | * @cgrp from being removed while __put_css_set() is in progress. |
4491 | */ | 4492 | */ |
4492 | read_lock(&css_set_lock); | 4493 | read_lock(&css_set_lock); |
4493 | empty = list_empty(&cgrp->cset_links) && list_empty(&cgrp->children); | 4494 | empty = list_empty(&cgrp->cset_links); |
4494 | read_unlock(&css_set_lock); | 4495 | read_unlock(&css_set_lock); |
4495 | if (!empty) | 4496 | if (!empty) |
4496 | return -EBUSY; | 4497 | return -EBUSY; |
4497 | 4498 | ||
4498 | /* | 4499 | /* |
4500 | * Make sure there's no live children. We can't test ->children | ||
4501 | * emptiness as dead children linger on it while being destroyed; | ||
4502 | * otherwise, "rmdir parent/child parent" may fail with -EBUSY. | ||
4503 | */ | ||
4504 | empty = true; | ||
4505 | rcu_read_lock(); | ||
4506 | list_for_each_entry_rcu(child, &cgrp->children, sibling) { | ||
4507 | empty = cgroup_is_dead(child); | ||
4508 | if (!empty) | ||
4509 | break; | ||
4510 | } | ||
4511 | rcu_read_unlock(); | ||
4512 | if (!empty) | ||
4513 | return -EBUSY; | ||
4514 | |||
4515 | /* | ||
4499 | * Block new css_tryget() by killing css refcnts. cgroup core | 4516 | * Block new css_tryget() by killing css refcnts. cgroup core |
4500 | * guarantees that, by the time ->css_offline() is invoked, no new | 4517 | * guarantees that, by the time ->css_offline() is invoked, no new |
4501 | * css reference will be given out via css_tryget(). We can't | 4518 | * css reference will be given out via css_tryget(). We can't |