diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 119 |
1 files changed, 71 insertions, 48 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 21bba7722350..2eade5186604 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -63,6 +63,9 @@ | |||
63 | 63 | ||
64 | #include <linux/atomic.h> | 64 | #include <linux/atomic.h> |
65 | 65 | ||
66 | /* css deactivation bias, makes css->refcnt negative to deny new trygets */ | ||
67 | #define CSS_DEACT_BIAS INT_MIN | ||
68 | |||
66 | /* | 69 | /* |
67 | * cgroup_mutex is the master lock. Any modification to cgroup or its | 70 | * cgroup_mutex is the master lock. Any modification to cgroup or its |
68 | * hierarchy must be performed while holding it. | 71 | * hierarchy must be performed while holding it. |
@@ -251,6 +254,14 @@ int cgroup_lock_is_held(void) | |||
251 | 254 | ||
252 | EXPORT_SYMBOL_GPL(cgroup_lock_is_held); | 255 | EXPORT_SYMBOL_GPL(cgroup_lock_is_held); |
253 | 256 | ||
257 | /* the current nr of refs, always >= 0 whether @css is deactivated or not */ | ||
258 | static int css_refcnt(struct cgroup_subsys_state *css) | ||
259 | { | ||
260 | int v = atomic_read(&css->refcnt); | ||
261 | |||
262 | return v >= 0 ? v : v - CSS_DEACT_BIAS; | ||
263 | } | ||
264 | |||
254 | /* convenient tests for these bits */ | 265 | /* convenient tests for these bits */ |
255 | inline int cgroup_is_removed(const struct cgroup *cgrp) | 266 | inline int cgroup_is_removed(const struct cgroup *cgrp) |
256 | { | 267 | { |
@@ -4006,18 +4017,19 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
4006 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); | 4017 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); |
4007 | } | 4018 | } |
4008 | 4019 | ||
4020 | /* | ||
4021 | * Check the reference count on each subsystem. Since we already | ||
4022 | * established that there are no tasks in the cgroup, if the css refcount | ||
4023 | * is also 1, then there should be no outstanding references, so the | ||
4024 | * subsystem is safe to destroy. We scan across all subsystems rather than | ||
4025 | * using the per-hierarchy linked list of mounted subsystems since we can | ||
4026 | * be called via check_for_release() with no synchronization other than | ||
4027 | * RCU, and the subsystem linked list isn't RCU-safe. | ||
4028 | */ | ||
4009 | static int cgroup_has_css_refs(struct cgroup *cgrp) | 4029 | static int cgroup_has_css_refs(struct cgroup *cgrp) |
4010 | { | 4030 | { |
4011 | /* Check the reference count on each subsystem. Since we | ||
4012 | * already established that there are no tasks in the | ||
4013 | * cgroup, if the css refcount is also 1, then there should | ||
4014 | * be no outstanding references, so the subsystem is safe to | ||
4015 | * destroy. We scan across all subsystems rather than using | ||
4016 | * the per-hierarchy linked list of mounted subsystems since | ||
4017 | * we can be called via check_for_release() with no | ||
4018 | * synchronization other than RCU, and the subsystem linked | ||
4019 | * list isn't RCU-safe */ | ||
4020 | int i; | 4031 | int i; |
4032 | |||
4021 | /* | 4033 | /* |
4022 | * We won't need to lock the subsys array, because the subsystems | 4034 | * We won't need to lock the subsys array, because the subsystems |
4023 | * we're concerned about aren't going anywhere since our cgroup root | 4035 | * we're concerned about aren't going anywhere since our cgroup root |
@@ -4026,17 +4038,21 @@ static int cgroup_has_css_refs(struct cgroup *cgrp) | |||
4026 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 4038 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
4027 | struct cgroup_subsys *ss = subsys[i]; | 4039 | struct cgroup_subsys *ss = subsys[i]; |
4028 | struct cgroup_subsys_state *css; | 4040 | struct cgroup_subsys_state *css; |
4041 | |||
4029 | /* Skip subsystems not present or not in this hierarchy */ | 4042 | /* Skip subsystems not present or not in this hierarchy */ |
4030 | if (ss == NULL || ss->root != cgrp->root) | 4043 | if (ss == NULL || ss->root != cgrp->root) |
4031 | continue; | 4044 | continue; |
4045 | |||
4032 | css = cgrp->subsys[ss->subsys_id]; | 4046 | css = cgrp->subsys[ss->subsys_id]; |
4033 | /* When called from check_for_release() it's possible | 4047 | /* |
4048 | * When called from check_for_release() it's possible | ||
4034 | * that by this point the cgroup has been removed | 4049 | * that by this point the cgroup has been removed |
4035 | * and the css deleted. But a false-positive doesn't | 4050 | * and the css deleted. But a false-positive doesn't |
4036 | * matter, since it can only happen if the cgroup | 4051 | * matter, since it can only happen if the cgroup |
4037 | * has been deleted and hence no longer needs the | 4052 | * has been deleted and hence no longer needs the |
4038 | * release agent to be called anyway. */ | 4053 | * release agent to be called anyway. |
4039 | if (css && (atomic_read(&css->refcnt) > 1)) | 4054 | */ |
4055 | if (css && css_refcnt(css) > 1) | ||
4040 | return 1; | 4056 | return 1; |
4041 | } | 4057 | } |
4042 | return 0; | 4058 | return 0; |
@@ -4053,44 +4069,37 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) | |||
4053 | struct cgroup_subsys *ss; | 4069 | struct cgroup_subsys *ss; |
4054 | unsigned long flags; | 4070 | unsigned long flags; |
4055 | bool failed = false; | 4071 | bool failed = false; |
4072 | |||
4056 | local_irq_save(flags); | 4073 | local_irq_save(flags); |
4074 | |||
4075 | /* | ||
4076 | * Block new css_tryget() by deactivating refcnt. If all refcnts | ||
4077 | * were 1 at the moment of deactivation, we succeeded. | ||
4078 | */ | ||
4057 | for_each_subsys(cgrp->root, ss) { | 4079 | for_each_subsys(cgrp->root, ss) { |
4058 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4080 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; |
4059 | int refcnt; | 4081 | |
4060 | while (1) { | 4082 | WARN_ON(atomic_read(&css->refcnt) < 0); |
4061 | /* We can only remove a CSS with a refcnt==1 */ | 4083 | atomic_add(CSS_DEACT_BIAS, &css->refcnt); |
4062 | refcnt = atomic_read(&css->refcnt); | 4084 | failed |= css_refcnt(css) != 1; |
4063 | if (refcnt > 1) { | ||
4064 | failed = true; | ||
4065 | goto done; | ||
4066 | } | ||
4067 | BUG_ON(!refcnt); | ||
4068 | /* | ||
4069 | * Drop the refcnt to 0 while we check other | ||
4070 | * subsystems. This will cause any racing | ||
4071 | * css_tryget() to spin until we set the | ||
4072 | * CSS_REMOVED bits or abort | ||
4073 | */ | ||
4074 | if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt) | ||
4075 | break; | ||
4076 | cpu_relax(); | ||
4077 | } | ||
4078 | } | 4085 | } |
4079 | done: | 4086 | |
4087 | /* | ||
4088 | * If succeeded, set REMOVED and put all the base refs; otherwise, | ||
4089 | * restore refcnts to positive values. Either way, all in-progress | ||
4090 | * css_tryget() will be released. | ||
4091 | */ | ||
4080 | for_each_subsys(cgrp->root, ss) { | 4092 | for_each_subsys(cgrp->root, ss) { |
4081 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4093 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; |
4082 | if (failed) { | 4094 | |
4083 | /* | 4095 | if (!failed) { |
4084 | * Restore old refcnt if we previously managed | ||
4085 | * to clear it from 1 to 0 | ||
4086 | */ | ||
4087 | if (!atomic_read(&css->refcnt)) | ||
4088 | atomic_set(&css->refcnt, 1); | ||
4089 | } else { | ||
4090 | /* Commit the fact that the CSS is removed */ | ||
4091 | set_bit(CSS_REMOVED, &css->flags); | 4096 | set_bit(CSS_REMOVED, &css->flags); |
4097 | css_put(css); | ||
4098 | } else { | ||
4099 | atomic_sub(CSS_DEACT_BIAS, &css->refcnt); | ||
4092 | } | 4100 | } |
4093 | } | 4101 | } |
4102 | |||
4094 | local_irq_restore(flags); | 4103 | local_irq_restore(flags); |
4095 | return !failed; | 4104 | return !failed; |
4096 | } | 4105 | } |
@@ -4887,13 +4896,28 @@ static void check_for_release(struct cgroup *cgrp) | |||
4887 | } | 4896 | } |
4888 | 4897 | ||
4889 | /* Caller must verify that the css is not for root cgroup */ | 4898 | /* Caller must verify that the css is not for root cgroup */ |
4890 | void __css_put(struct cgroup_subsys_state *css, int count) | 4899 | bool __css_tryget(struct cgroup_subsys_state *css) |
4900 | { | ||
4901 | do { | ||
4902 | int v = css_refcnt(css); | ||
4903 | |||
4904 | if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v) | ||
4905 | return true; | ||
4906 | cpu_relax(); | ||
4907 | } while (!test_bit(CSS_REMOVED, &css->flags)); | ||
4908 | |||
4909 | return false; | ||
4910 | } | ||
4911 | EXPORT_SYMBOL_GPL(__css_tryget); | ||
4912 | |||
4913 | /* Caller must verify that the css is not for root cgroup */ | ||
4914 | void __css_put(struct cgroup_subsys_state *css) | ||
4891 | { | 4915 | { |
4892 | struct cgroup *cgrp = css->cgroup; | 4916 | struct cgroup *cgrp = css->cgroup; |
4893 | int val; | 4917 | |
4894 | rcu_read_lock(); | 4918 | rcu_read_lock(); |
4895 | val = atomic_sub_return(count, &css->refcnt); | 4919 | atomic_dec(&css->refcnt); |
4896 | if (val == 1) { | 4920 | if (css_refcnt(css) == 1) { |
4897 | if (notify_on_release(cgrp)) { | 4921 | if (notify_on_release(cgrp)) { |
4898 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 4922 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
4899 | check_for_release(cgrp); | 4923 | check_for_release(cgrp); |
@@ -4901,7 +4925,6 @@ void __css_put(struct cgroup_subsys_state *css, int count) | |||
4901 | cgroup_wakeup_rmdir_waiter(cgrp); | 4925 | cgroup_wakeup_rmdir_waiter(cgrp); |
4902 | } | 4926 | } |
4903 | rcu_read_unlock(); | 4927 | rcu_read_unlock(); |
4904 | WARN_ON_ONCE(val < 1); | ||
4905 | } | 4928 | } |
4906 | EXPORT_SYMBOL_GPL(__css_put); | 4929 | EXPORT_SYMBOL_GPL(__css_put); |
4907 | 4930 | ||
@@ -5020,7 +5043,7 @@ unsigned short css_id(struct cgroup_subsys_state *css) | |||
5020 | * on this or this is under rcu_read_lock(). Once css->id is allocated, | 5043 | * on this or this is under rcu_read_lock(). Once css->id is allocated, |
5021 | * it's unchanged until freed. | 5044 | * it's unchanged until freed. |
5022 | */ | 5045 | */ |
5023 | cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); | 5046 | cssid = rcu_dereference_check(css->id, css_refcnt(css)); |
5024 | 5047 | ||
5025 | if (cssid) | 5048 | if (cssid) |
5026 | return cssid->id; | 5049 | return cssid->id; |
@@ -5032,7 +5055,7 @@ unsigned short css_depth(struct cgroup_subsys_state *css) | |||
5032 | { | 5055 | { |
5033 | struct css_id *cssid; | 5056 | struct css_id *cssid; |
5034 | 5057 | ||
5035 | cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); | 5058 | cssid = rcu_dereference_check(css->id, css_refcnt(css)); |
5036 | 5059 | ||
5037 | if (cssid) | 5060 | if (cssid) |
5038 | return cssid->depth; | 5061 | return cssid->depth; |