aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c119
1 files changed, 71 insertions, 48 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 21bba7722350..2eade5186604 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -63,6 +63,9 @@
63 63
64#include <linux/atomic.h> 64#include <linux/atomic.h>
65 65
66/* css deactivation bias, makes css->refcnt negative to deny new trygets */
67#define CSS_DEACT_BIAS INT_MIN
68
66/* 69/*
67 * cgroup_mutex is the master lock. Any modification to cgroup or its 70 * cgroup_mutex is the master lock. Any modification to cgroup or its
68 * hierarchy must be performed while holding it. 71 * hierarchy must be performed while holding it.
@@ -251,6 +254,14 @@ int cgroup_lock_is_held(void)
251 254
252EXPORT_SYMBOL_GPL(cgroup_lock_is_held); 255EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
253 256
257/* the current nr of refs, always >= 0 whether @css is deactivated or not */
258static int css_refcnt(struct cgroup_subsys_state *css)
259{
260 int v = atomic_read(&css->refcnt);
261
262 return v >= 0 ? v : v - CSS_DEACT_BIAS;
263}
264
254/* convenient tests for these bits */ 265/* convenient tests for these bits */
255inline int cgroup_is_removed(const struct cgroup *cgrp) 266inline int cgroup_is_removed(const struct cgroup *cgrp)
256{ 267{
@@ -4006,18 +4017,19 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4006 return cgroup_create(c_parent, dentry, mode | S_IFDIR); 4017 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
4007} 4018}
4008 4019
4020/*
4021 * Check the reference count on each subsystem. Since we already
4022 * established that there are no tasks in the cgroup, if the css refcount
4023 * is also 1, then there should be no outstanding references, so the
4024 * subsystem is safe to destroy. We scan across all subsystems rather than
4025 * using the per-hierarchy linked list of mounted subsystems since we can
4026 * be called via check_for_release() with no synchronization other than
4027 * RCU, and the subsystem linked list isn't RCU-safe.
4028 */
4009static int cgroup_has_css_refs(struct cgroup *cgrp) 4029static int cgroup_has_css_refs(struct cgroup *cgrp)
4010{ 4030{
4011 /* Check the reference count on each subsystem. Since we
4012 * already established that there are no tasks in the
4013 * cgroup, if the css refcount is also 1, then there should
4014 * be no outstanding references, so the subsystem is safe to
4015 * destroy. We scan across all subsystems rather than using
4016 * the per-hierarchy linked list of mounted subsystems since
4017 * we can be called via check_for_release() with no
4018 * synchronization other than RCU, and the subsystem linked
4019 * list isn't RCU-safe */
4020 int i; 4031 int i;
4032
4021 /* 4033 /*
4022 * We won't need to lock the subsys array, because the subsystems 4034 * We won't need to lock the subsys array, because the subsystems
4023 * we're concerned about aren't going anywhere since our cgroup root 4035 * we're concerned about aren't going anywhere since our cgroup root
@@ -4026,17 +4038,21 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
4026 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 4038 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4027 struct cgroup_subsys *ss = subsys[i]; 4039 struct cgroup_subsys *ss = subsys[i];
4028 struct cgroup_subsys_state *css; 4040 struct cgroup_subsys_state *css;
4041
4029 /* Skip subsystems not present or not in this hierarchy */ 4042 /* Skip subsystems not present or not in this hierarchy */
4030 if (ss == NULL || ss->root != cgrp->root) 4043 if (ss == NULL || ss->root != cgrp->root)
4031 continue; 4044 continue;
4045
4032 css = cgrp->subsys[ss->subsys_id]; 4046 css = cgrp->subsys[ss->subsys_id];
4033 /* When called from check_for_release() it's possible 4047 /*
4048 * When called from check_for_release() it's possible
4034 * that by this point the cgroup has been removed 4049 * that by this point the cgroup has been removed
4035 * and the css deleted. But a false-positive doesn't 4050 * and the css deleted. But a false-positive doesn't
4036 * matter, since it can only happen if the cgroup 4051 * matter, since it can only happen if the cgroup
4037 * has been deleted and hence no longer needs the 4052 * has been deleted and hence no longer needs the
4038 * release agent to be called anyway. */ 4053 * release agent to be called anyway.
4039 if (css && (atomic_read(&css->refcnt) > 1)) 4054 */
4055 if (css && css_refcnt(css) > 1)
4040 return 1; 4056 return 1;
4041 } 4057 }
4042 return 0; 4058 return 0;
@@ -4053,44 +4069,37 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp)
4053 struct cgroup_subsys *ss; 4069 struct cgroup_subsys *ss;
4054 unsigned long flags; 4070 unsigned long flags;
4055 bool failed = false; 4071 bool failed = false;
4072
4056 local_irq_save(flags); 4073 local_irq_save(flags);
4074
4075 /*
4076 * Block new css_tryget() by deactivating refcnt. If all refcnts
4077 * were 1 at the moment of deactivation, we succeeded.
4078 */
4057 for_each_subsys(cgrp->root, ss) { 4079 for_each_subsys(cgrp->root, ss) {
4058 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; 4080 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4059 int refcnt; 4081
4060 while (1) { 4082 WARN_ON(atomic_read(&css->refcnt) < 0);
4061 /* We can only remove a CSS with a refcnt==1 */ 4083 atomic_add(CSS_DEACT_BIAS, &css->refcnt);
4062 refcnt = atomic_read(&css->refcnt); 4084 failed |= css_refcnt(css) != 1;
4063 if (refcnt > 1) {
4064 failed = true;
4065 goto done;
4066 }
4067 BUG_ON(!refcnt);
4068 /*
4069 * Drop the refcnt to 0 while we check other
4070 * subsystems. This will cause any racing
4071 * css_tryget() to spin until we set the
4072 * CSS_REMOVED bits or abort
4073 */
4074 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
4075 break;
4076 cpu_relax();
4077 }
4078 } 4085 }
4079 done: 4086
4087 /*
4088 * If succeeded, set REMOVED and put all the base refs; otherwise,
4089 * restore refcnts to positive values. Either way, all in-progress
4090 * css_tryget() will be released.
4091 */
4080 for_each_subsys(cgrp->root, ss) { 4092 for_each_subsys(cgrp->root, ss) {
4081 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; 4093 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4082 if (failed) { 4094
4083 /* 4095 if (!failed) {
4084 * Restore old refcnt if we previously managed
4085 * to clear it from 1 to 0
4086 */
4087 if (!atomic_read(&css->refcnt))
4088 atomic_set(&css->refcnt, 1);
4089 } else {
4090 /* Commit the fact that the CSS is removed */
4091 set_bit(CSS_REMOVED, &css->flags); 4096 set_bit(CSS_REMOVED, &css->flags);
4097 css_put(css);
4098 } else {
4099 atomic_sub(CSS_DEACT_BIAS, &css->refcnt);
4092 } 4100 }
4093 } 4101 }
4102
4094 local_irq_restore(flags); 4103 local_irq_restore(flags);
4095 return !failed; 4104 return !failed;
4096} 4105}
@@ -4887,13 +4896,28 @@ static void check_for_release(struct cgroup *cgrp)
4887} 4896}
4888 4897
4889/* Caller must verify that the css is not for root cgroup */ 4898/* Caller must verify that the css is not for root cgroup */
4890void __css_put(struct cgroup_subsys_state *css, int count) 4899bool __css_tryget(struct cgroup_subsys_state *css)
4900{
4901 do {
4902 int v = css_refcnt(css);
4903
4904 if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v)
4905 return true;
4906 cpu_relax();
4907 } while (!test_bit(CSS_REMOVED, &css->flags));
4908
4909 return false;
4910}
4911EXPORT_SYMBOL_GPL(__css_tryget);
4912
4913/* Caller must verify that the css is not for root cgroup */
4914void __css_put(struct cgroup_subsys_state *css)
4891{ 4915{
4892 struct cgroup *cgrp = css->cgroup; 4916 struct cgroup *cgrp = css->cgroup;
4893 int val; 4917
4894 rcu_read_lock(); 4918 rcu_read_lock();
4895 val = atomic_sub_return(count, &css->refcnt); 4919 atomic_dec(&css->refcnt);
4896 if (val == 1) { 4920 if (css_refcnt(css) == 1) {
4897 if (notify_on_release(cgrp)) { 4921 if (notify_on_release(cgrp)) {
4898 set_bit(CGRP_RELEASABLE, &cgrp->flags); 4922 set_bit(CGRP_RELEASABLE, &cgrp->flags);
4899 check_for_release(cgrp); 4923 check_for_release(cgrp);
@@ -4901,7 +4925,6 @@ void __css_put(struct cgroup_subsys_state *css, int count)
4901 cgroup_wakeup_rmdir_waiter(cgrp); 4925 cgroup_wakeup_rmdir_waiter(cgrp);
4902 } 4926 }
4903 rcu_read_unlock(); 4927 rcu_read_unlock();
4904 WARN_ON_ONCE(val < 1);
4905} 4928}
4906EXPORT_SYMBOL_GPL(__css_put); 4929EXPORT_SYMBOL_GPL(__css_put);
4907 4930
@@ -5020,7 +5043,7 @@ unsigned short css_id(struct cgroup_subsys_state *css)
5020 * on this or this is under rcu_read_lock(). Once css->id is allocated, 5043 * on this or this is under rcu_read_lock(). Once css->id is allocated,
5021 * it's unchanged until freed. 5044 * it's unchanged until freed.
5022 */ 5045 */
5023 cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); 5046 cssid = rcu_dereference_check(css->id, css_refcnt(css));
5024 5047
5025 if (cssid) 5048 if (cssid)
5026 return cssid->id; 5049 return cssid->id;
@@ -5032,7 +5055,7 @@ unsigned short css_depth(struct cgroup_subsys_state *css)
5032{ 5055{
5033 struct css_id *cssid; 5056 struct css_id *cssid;
5034 5057
5035 cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); 5058 cssid = rcu_dereference_check(css->id, css_refcnt(css));
5036 5059
5037 if (cssid) 5060 if (cssid)
5038 return cssid->depth; 5061 return cssid->depth;