From 967db0ea65b0bf8507a7643ac8f296c4f2c0a834 Mon Sep 17 00:00:00 2001 From: Salman Qazi Date: Wed, 6 Jun 2012 18:51:35 -0700 Subject: cgroup: make sure that decisions in __css_put are atomic __css_put is using atomic_dec on the ref count, and then looking at the ref count to make decisions. This is prone to races, as someone else may decrement ref count between our decrement and our decision. Instead, we should base our decisions on the value that we decremented the ref count to. (This results in an actual race on Google's kernel which I haven't been able to reproduce on the upstream kernel. Having said that, it's still incorrect by inspection). Signed-off-by: Salman Qazi Acked-by: Li Zefan Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- kernel/cgroup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 72fcd3069a90..ceeafe874b3f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4984,8 +4984,7 @@ void __css_put(struct cgroup_subsys_state *css) struct cgroup *cgrp = css->cgroup; rcu_read_lock(); - atomic_dec(&css->refcnt); - switch (css_refcnt(css)) { + switch (atomic_dec_return(&css->refcnt)) { case 1: if (notify_on_release(cgrp)) { set_bit(CGRP_RELEASABLE, &cgrp->flags); -- cgit v1.2.2 From 8e3bbf42c6d73881956863cc3305456afe2bc4ea Mon Sep 17 00:00:00 2001 From: Salman Qazi Date: Thu, 14 Jun 2012 14:55:30 -0700 Subject: cgroups: Account for CSS_DEACT_BIAS in __css_put When we fixed the race between atomic_dec and css_refcnt, we missed the fact that css_refcnt internally subtracts CSS_DEACT_BIAS to get the actual reference count. This can potentially cause a refcount leak if __css_put races with cgroup_clear_css_refs. Signed-off-by: Salman Qazi Acked-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cgroup.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ceeafe874b3f..2097684cf194 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -255,12 +255,17 @@ int cgroup_lock_is_held(void) EXPORT_SYMBOL_GPL(cgroup_lock_is_held); +static int css_unbias_refcnt(int refcnt) +{ + return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS; +} + /* the current nr of refs, always >= 0 whether @css is deactivated or not */ static int css_refcnt(struct cgroup_subsys_state *css) { int v = atomic_read(&css->refcnt); - return v >= 0 ? v : v - CSS_DEACT_BIAS; + return css_unbias_refcnt(v); } /* convenient tests for these bits */ @@ -4982,9 +4987,12 @@ EXPORT_SYMBOL_GPL(__css_tryget); void __css_put(struct cgroup_subsys_state *css) { struct cgroup *cgrp = css->cgroup; + int v; rcu_read_lock(); - switch (atomic_dec_return(&css->refcnt)) { + v = css_unbias_refcnt(atomic_dec_return(&css->refcnt)); + + switch (v) { case 1: if (notify_on_release(cgrp)) { set_bit(CGRP_RELEASABLE, &cgrp->flags); -- cgit v1.2.2 From 7db5b3ca0ecdb2e8fad52a4770e4e320e61c77a6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 7 Jul 2012 15:55:47 -0700 Subject: Revert "cgroup: superblock can't be released with active dentries" This reverts commit fa980ca87d15bb8a1317853f257a505990f3ffde. The commit was an attempt to fix a race condition where a cgroup hierarchy may be unmounted with positive dentry reference on root cgroup. While the commit made the race condition slightly more difficult to trigger, the race was still there and could be reliably triggered using a different test case. Revert the incorrect fix. The next commit will describe the race and fix it correctly. Signed-off-by: Tejun Heo LKML-Reference: <4FEEA5CB.8070809@huawei.com> Reported-by: shyju pv Cc: Sasha Levin Acked-by: Li Zefan --- kernel/cgroup.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2097684cf194..5f134a0e0e3f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -901,13 +901,10 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) mutex_unlock(&cgroup_mutex); /* - * We want to drop the active superblock reference from the - * cgroup creation after all the dentry refs are gone - - * kill_sb gets mighty unhappy otherwise. Mark - * dentry->d_fsdata with cgroup_diput() to tell - * cgroup_d_release() to call deactivate_super(). + * Drop the active superblock reference that we took when we + * created the cgroup */ - dentry->d_fsdata = cgroup_diput; + deactivate_super(cgrp->root->sb); /* * if we're getting rid of the cgroup, refcount should ensure @@ -933,13 +930,6 @@ static int cgroup_delete(const struct dentry *d) return 1; } -static void cgroup_d_release(struct dentry *dentry) -{ - /* did cgroup_diput() tell me to deactivate super? */ - if (dentry->d_fsdata == cgroup_diput) - deactivate_super(dentry->d_sb); -} - static void remove_dir(struct dentry *d) { struct dentry *parent = dget(d->d_parent); @@ -1547,7 +1537,6 @@ static int cgroup_get_rootdir(struct super_block *sb) static const struct dentry_operations cgroup_dops = { .d_iput = cgroup_diput, .d_delete = cgroup_delete, - .d_release = cgroup_d_release, }; struct inode *inode = -- cgit v1.2.2 From 5db9a4d99b0157a513944e9a44d29c9cec2e91dc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 7 Jul 2012 16:08:18 -0700 Subject: cgroup: fix cgroup hierarchy umount race 48ddbe1946 "cgroup: make css->refcnt clearing on cgroup removal optional" allowed a css to linger after the associated cgroup is removed. As a css holds a reference on the cgroup's dentry, it means that cgroup dentries may linger for a while. Destroying a superblock which has dentries with positive refcnts is a critical bug and triggers BUG() in vfs code. As each cgroup dentry holds an s_active reference, any lingering cgroup has both its dentry and the superblock pinned and thus preventing premature release of superblock. Unfortunately, after 48ddbe1946, there's a small window while releasing a cgroup which is directly under the root of the hierarchy. When a cgroup directory is released, vfs layer first deletes the corresponding dentry and then invokes dput() on the parent, which may recurse further, so when a cgroup directly below root cgroup is released, the cgroup is first destroyed - which releases the s_active it was holding - and then the dentry for the root cgroup is dput(). This creates a window where the root dentry's refcnt isn't zero but superblock's s_active is. If umount happens before or during this window, vfs will see the root dentry with non-zero refcnt and trigger BUG(). Before 48ddbe1946, this problem didn't exist because the last dentry reference was guaranteed to be put synchronously from rmdir(2) invocation which holds s_active around the whole process. Fix it by holding an extra superblock->s_active reference across dput() from css release, which is the dput() path added by 48ddbe1946 and the only one which doesn't hold an extra s_active ref across the final cgroup dput(). Signed-off-by: Tejun Heo LKML-Reference: <4FEEA5CB.8070809@huawei.com> Reported-by: shyju pv Tested-by: shyju pv Cc: Sasha Levin Acked-by: Li Zefan --- kernel/cgroup.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5f134a0e0e3f..b303dfc7dce0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3883,8 +3883,12 @@ static void css_dput_fn(struct work_struct *work) { struct cgroup_subsys_state *css = container_of(work, struct cgroup_subsys_state, dput_work); + struct dentry *dentry = css->cgroup->dentry; + struct super_block *sb = dentry->d_sb; - dput(css->cgroup->dentry); + atomic_inc(&sb->s_active); + dput(dentry); + deactivate_super(sb); } static void init_cgroup_css(struct cgroup_subsys_state *css, -- cgit v1.2.2