aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-04-01 15:09:56 -0400
committerTejun Heo <tj@kernel.org>2012-04-01 15:09:56 -0400
commit28b4c27b8e6bb6d7ff2875281a8484f8898a87ef (patch)
tree8da3eefc6b98c46a4dbd0fd6e6d6dec6220382df /kernel/cgroup.c
parent79578621b4847afdef48d19a28d00e3b188c37e1 (diff)
cgroup: use negative bias on css->refcnt to block css_tryget()
When a cgroup is about to be removed, cgroup_clear_css_refs() is called to check and ensure that there are no active css references. This is currently achieved by dropping the refcnt to zero iff it has only the base ref. If all css refs could be dropped to zero, ref clearing is successful and CSS_REMOVED is set on all css. If not, the base ref is restored. While css ref is zero w/o CSS_REMOVED set, any css_tryget() attempt on it busy loops so that they are atomic w.r.t. the whole css ref clearing. This does work but dropping and re-instating the base ref is somewhat hairy and makes it difficult to add more logic to the put path as there are two of them - the regular css_put() and the reversible base ref clearing. This patch updates css ref clearing such that blocking new css_tryget() and putting the base ref are separate operations. CSS_DEACT_BIAS, defined as INT_MIN, is added to css->refcnt and css_tryget() busy loops while refcnt is negative. After all css refs are deactivated, if they were all one, ref clearing succeeded and CSS_REMOVED is set and the base ref is put using the regular css_put(); otherwise, CSS_DEACT_BIAS is subtracted from the refcnts and the original postive values are restored. css_refcnt() accessor which always returns the unbiased positive reference counts is added and used to simplify refcnt usages. While at it, relocate and reformat comments in cgroup_has_css_refs(). This separates css->refcnt deactivation and putting the base ref, which enables the next patch to make ref clearing optional. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c119
1 files changed, 71 insertions, 48 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 21bba7722350..2eade5186604 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -63,6 +63,9 @@
63 63
64#include <linux/atomic.h> 64#include <linux/atomic.h>
65 65
66/* css deactivation bias, makes css->refcnt negative to deny new trygets */
67#define CSS_DEACT_BIAS INT_MIN
68
66/* 69/*
67 * cgroup_mutex is the master lock. Any modification to cgroup or its 70 * cgroup_mutex is the master lock. Any modification to cgroup or its
68 * hierarchy must be performed while holding it. 71 * hierarchy must be performed while holding it.
@@ -251,6 +254,14 @@ int cgroup_lock_is_held(void)
251 254
252EXPORT_SYMBOL_GPL(cgroup_lock_is_held); 255EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
253 256
257/* the current nr of refs, always >= 0 whether @css is deactivated or not */
258static int css_refcnt(struct cgroup_subsys_state *css)
259{
260 int v = atomic_read(&css->refcnt);
261
262 return v >= 0 ? v : v - CSS_DEACT_BIAS;
263}
264
254/* convenient tests for these bits */ 265/* convenient tests for these bits */
255inline int cgroup_is_removed(const struct cgroup *cgrp) 266inline int cgroup_is_removed(const struct cgroup *cgrp)
256{ 267{
@@ -4006,18 +4017,19 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4006 return cgroup_create(c_parent, dentry, mode | S_IFDIR); 4017 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
4007} 4018}
4008 4019
4020/*
4021 * Check the reference count on each subsystem. Since we already
4022 * established that there are no tasks in the cgroup, if the css refcount
4023 * is also 1, then there should be no outstanding references, so the
4024 * subsystem is safe to destroy. We scan across all subsystems rather than
4025 * using the per-hierarchy linked list of mounted subsystems since we can
4026 * be called via check_for_release() with no synchronization other than
4027 * RCU, and the subsystem linked list isn't RCU-safe.
4028 */
4009static int cgroup_has_css_refs(struct cgroup *cgrp) 4029static int cgroup_has_css_refs(struct cgroup *cgrp)
4010{ 4030{
4011 /* Check the reference count on each subsystem. Since we
4012 * already established that there are no tasks in the
4013 * cgroup, if the css refcount is also 1, then there should
4014 * be no outstanding references, so the subsystem is safe to
4015 * destroy. We scan across all subsystems rather than using
4016 * the per-hierarchy linked list of mounted subsystems since
4017 * we can be called via check_for_release() with no
4018 * synchronization other than RCU, and the subsystem linked
4019 * list isn't RCU-safe */
4020 int i; 4031 int i;
4032
4021 /* 4033 /*
4022 * We won't need to lock the subsys array, because the subsystems 4034 * We won't need to lock the subsys array, because the subsystems
4023 * we're concerned about aren't going anywhere since our cgroup root 4035 * we're concerned about aren't going anywhere since our cgroup root
@@ -4026,17 +4038,21 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
4026 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 4038 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4027 struct cgroup_subsys *ss = subsys[i]; 4039 struct cgroup_subsys *ss = subsys[i];
4028 struct cgroup_subsys_state *css; 4040 struct cgroup_subsys_state *css;
4041
4029 /* Skip subsystems not present or not in this hierarchy */ 4042 /* Skip subsystems not present or not in this hierarchy */
4030 if (ss == NULL || ss->root != cgrp->root) 4043 if (ss == NULL || ss->root != cgrp->root)
4031 continue; 4044 continue;
4045
4032 css = cgrp->subsys[ss->subsys_id]; 4046 css = cgrp->subsys[ss->subsys_id];
4033 /* When called from check_for_release() it's possible 4047 /*
4048 * When called from check_for_release() it's possible
4034 * that by this point the cgroup has been removed 4049 * that by this point the cgroup has been removed
4035 * and the css deleted. But a false-positive doesn't 4050 * and the css deleted. But a false-positive doesn't
4036 * matter, since it can only happen if the cgroup 4051 * matter, since it can only happen if the cgroup
4037 * has been deleted and hence no longer needs the 4052 * has been deleted and hence no longer needs the
4038 * release agent to be called anyway. */ 4053 * release agent to be called anyway.
4039 if (css && (atomic_read(&css->refcnt) > 1)) 4054 */
4055 if (css && css_refcnt(css) > 1)
4040 return 1; 4056 return 1;
4041 } 4057 }
4042 return 0; 4058 return 0;
@@ -4053,44 +4069,37 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp)
4053 struct cgroup_subsys *ss; 4069 struct cgroup_subsys *ss;
4054 unsigned long flags; 4070 unsigned long flags;
4055 bool failed = false; 4071 bool failed = false;
4072
4056 local_irq_save(flags); 4073 local_irq_save(flags);
4074
4075 /*
4076 * Block new css_tryget() by deactivating refcnt. If all refcnts
4077 * were 1 at the moment of deactivation, we succeeded.
4078 */
4057 for_each_subsys(cgrp->root, ss) { 4079 for_each_subsys(cgrp->root, ss) {
4058 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; 4080 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4059 int refcnt; 4081
4060 while (1) { 4082 WARN_ON(atomic_read(&css->refcnt) < 0);
4061 /* We can only remove a CSS with a refcnt==1 */ 4083 atomic_add(CSS_DEACT_BIAS, &css->refcnt);
4062 refcnt = atomic_read(&css->refcnt); 4084 failed |= css_refcnt(css) != 1;
4063 if (refcnt > 1) {
4064 failed = true;
4065 goto done;
4066 }
4067 BUG_ON(!refcnt);
4068 /*
4069 * Drop the refcnt to 0 while we check other
4070 * subsystems. This will cause any racing
4071 * css_tryget() to spin until we set the
4072 * CSS_REMOVED bits or abort
4073 */
4074 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
4075 break;
4076 cpu_relax();
4077 }
4078 } 4085 }
4079 done: 4086
4087 /*
4088 * If succeeded, set REMOVED and put all the base refs; otherwise,
4089 * restore refcnts to positive values. Either way, all in-progress
4090 * css_tryget() will be released.
4091 */
4080 for_each_subsys(cgrp->root, ss) { 4092 for_each_subsys(cgrp->root, ss) {
4081 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; 4093 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4082 if (failed) { 4094
4083 /* 4095 if (!failed) {
4084 * Restore old refcnt if we previously managed
4085 * to clear it from 1 to 0
4086 */
4087 if (!atomic_read(&css->refcnt))
4088 atomic_set(&css->refcnt, 1);
4089 } else {
4090 /* Commit the fact that the CSS is removed */
4091 set_bit(CSS_REMOVED, &css->flags); 4096 set_bit(CSS_REMOVED, &css->flags);
4097 css_put(css);
4098 } else {
4099 atomic_sub(CSS_DEACT_BIAS, &css->refcnt);
4092 } 4100 }
4093 } 4101 }
4102
4094 local_irq_restore(flags); 4103 local_irq_restore(flags);
4095 return !failed; 4104 return !failed;
4096} 4105}
@@ -4887,13 +4896,28 @@ static void check_for_release(struct cgroup *cgrp)
4887} 4896}
4888 4897
4889/* Caller must verify that the css is not for root cgroup */ 4898/* Caller must verify that the css is not for root cgroup */
4890void __css_put(struct cgroup_subsys_state *css, int count) 4899bool __css_tryget(struct cgroup_subsys_state *css)
4900{
4901 do {
4902 int v = css_refcnt(css);
4903
4904 if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v)
4905 return true;
4906 cpu_relax();
4907 } while (!test_bit(CSS_REMOVED, &css->flags));
4908
4909 return false;
4910}
4911EXPORT_SYMBOL_GPL(__css_tryget);
4912
4913/* Caller must verify that the css is not for root cgroup */
4914void __css_put(struct cgroup_subsys_state *css)
4891{ 4915{
4892 struct cgroup *cgrp = css->cgroup; 4916 struct cgroup *cgrp = css->cgroup;
4893 int val; 4917
4894 rcu_read_lock(); 4918 rcu_read_lock();
4895 val = atomic_sub_return(count, &css->refcnt); 4919 atomic_dec(&css->refcnt);
4896 if (val == 1) { 4920 if (css_refcnt(css) == 1) {
4897 if (notify_on_release(cgrp)) { 4921 if (notify_on_release(cgrp)) {
4898 set_bit(CGRP_RELEASABLE, &cgrp->flags); 4922 set_bit(CGRP_RELEASABLE, &cgrp->flags);
4899 check_for_release(cgrp); 4923 check_for_release(cgrp);
@@ -4901,7 +4925,6 @@ void __css_put(struct cgroup_subsys_state *css, int count)
4901 cgroup_wakeup_rmdir_waiter(cgrp); 4925 cgroup_wakeup_rmdir_waiter(cgrp);
4902 } 4926 }
4903 rcu_read_unlock(); 4927 rcu_read_unlock();
4904 WARN_ON_ONCE(val < 1);
4905} 4928}
4906EXPORT_SYMBOL_GPL(__css_put); 4929EXPORT_SYMBOL_GPL(__css_put);
4907 4930
@@ -5020,7 +5043,7 @@ unsigned short css_id(struct cgroup_subsys_state *css)
5020 * on this or this is under rcu_read_lock(). Once css->id is allocated, 5043 * on this or this is under rcu_read_lock(). Once css->id is allocated,
5021 * it's unchanged until freed. 5044 * it's unchanged until freed.
5022 */ 5045 */
5023 cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); 5046 cssid = rcu_dereference_check(css->id, css_refcnt(css));
5024 5047
5025 if (cssid) 5048 if (cssid)
5026 return cssid->id; 5049 return cssid->id;
@@ -5032,7 +5055,7 @@ unsigned short css_depth(struct cgroup_subsys_state *css)
5032{ 5055{
5033 struct css_id *cssid; 5056 struct css_id *cssid;
5034 5057
5035 cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); 5058 cssid = rcu_dereference_check(css->id, css_refcnt(css));
5036 5059
5037 if (cssid) 5060 if (cssid)
5038 return cssid->depth; 5061 return cssid->depth;