diff options
| -rw-r--r-- | include/linux/cgroup.h | 12 | ||||
| -rw-r--r-- | kernel/cgroup.c | 119 |
2 files changed, 75 insertions, 56 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 028478c6e0c5..be81fafae11f 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
| @@ -115,16 +115,12 @@ static inline bool css_is_removed(struct cgroup_subsys_state *css) | |||
| 115 | * the css has been destroyed. | 115 | * the css has been destroyed. |
| 116 | */ | 116 | */ |
| 117 | 117 | ||
| 118 | extern bool __css_tryget(struct cgroup_subsys_state *css); | ||
| 118 | static inline bool css_tryget(struct cgroup_subsys_state *css) | 119 | static inline bool css_tryget(struct cgroup_subsys_state *css) |
| 119 | { | 120 | { |
| 120 | if (test_bit(CSS_ROOT, &css->flags)) | 121 | if (test_bit(CSS_ROOT, &css->flags)) |
| 121 | return true; | 122 | return true; |
| 122 | while (!atomic_inc_not_zero(&css->refcnt)) { | 123 | return __css_tryget(css); |
| 123 | if (test_bit(CSS_REMOVED, &css->flags)) | ||
| 124 | return false; | ||
| 125 | cpu_relax(); | ||
| 126 | } | ||
| 127 | return true; | ||
| 128 | } | 124 | } |
| 129 | 125 | ||
| 130 | /* | 126 | /* |
| @@ -132,11 +128,11 @@ static inline bool css_tryget(struct cgroup_subsys_state *css) | |||
| 132 | * css_get() or css_tryget() | 128 | * css_get() or css_tryget() |
| 133 | */ | 129 | */ |
| 134 | 130 | ||
| 135 | extern void __css_put(struct cgroup_subsys_state *css, int count); | 131 | extern void __css_put(struct cgroup_subsys_state *css); |
| 136 | static inline void css_put(struct cgroup_subsys_state *css) | 132 | static inline void css_put(struct cgroup_subsys_state *css) |
| 137 | { | 133 | { |
| 138 | if (!test_bit(CSS_ROOT, &css->flags)) | 134 | if (!test_bit(CSS_ROOT, &css->flags)) |
| 139 | __css_put(css, 1); | 135 | __css_put(css); |
| 140 | } | 136 | } |
| 141 | 137 | ||
| 142 | /* bits in struct cgroup flags field */ | 138 | /* bits in struct cgroup flags field */ |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 21bba7722350..2eade5186604 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -63,6 +63,9 @@ | |||
| 63 | 63 | ||
| 64 | #include <linux/atomic.h> | 64 | #include <linux/atomic.h> |
| 65 | 65 | ||
| 66 | /* css deactivation bias, makes css->refcnt negative to deny new trygets */ | ||
| 67 | #define CSS_DEACT_BIAS INT_MIN | ||
| 68 | |||
| 66 | /* | 69 | /* |
| 67 | * cgroup_mutex is the master lock. Any modification to cgroup or its | 70 | * cgroup_mutex is the master lock. Any modification to cgroup or its |
| 68 | * hierarchy must be performed while holding it. | 71 | * hierarchy must be performed while holding it. |
| @@ -251,6 +254,14 @@ int cgroup_lock_is_held(void) | |||
| 251 | 254 | ||
| 252 | EXPORT_SYMBOL_GPL(cgroup_lock_is_held); | 255 | EXPORT_SYMBOL_GPL(cgroup_lock_is_held); |
| 253 | 256 | ||
| 257 | /* the current nr of refs, always >= 0 whether @css is deactivated or not */ | ||
| 258 | static int css_refcnt(struct cgroup_subsys_state *css) | ||
| 259 | { | ||
| 260 | int v = atomic_read(&css->refcnt); | ||
| 261 | |||
| 262 | return v >= 0 ? v : v - CSS_DEACT_BIAS; | ||
| 263 | } | ||
| 264 | |||
| 254 | /* convenient tests for these bits */ | 265 | /* convenient tests for these bits */ |
| 255 | inline int cgroup_is_removed(const struct cgroup *cgrp) | 266 | inline int cgroup_is_removed(const struct cgroup *cgrp) |
| 256 | { | 267 | { |
| @@ -4006,18 +4017,19 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
| 4006 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); | 4017 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); |
| 4007 | } | 4018 | } |
| 4008 | 4019 | ||
| 4020 | /* | ||
| 4021 | * Check the reference count on each subsystem. Since we already | ||
| 4022 | * established that there are no tasks in the cgroup, if the css refcount | ||
| 4023 | * is also 1, then there should be no outstanding references, so the | ||
| 4024 | * subsystem is safe to destroy. We scan across all subsystems rather than | ||
| 4025 | * using the per-hierarchy linked list of mounted subsystems since we can | ||
| 4026 | * be called via check_for_release() with no synchronization other than | ||
| 4027 | * RCU, and the subsystem linked list isn't RCU-safe. | ||
| 4028 | */ | ||
| 4009 | static int cgroup_has_css_refs(struct cgroup *cgrp) | 4029 | static int cgroup_has_css_refs(struct cgroup *cgrp) |
| 4010 | { | 4030 | { |
| 4011 | /* Check the reference count on each subsystem. Since we | ||
| 4012 | * already established that there are no tasks in the | ||
| 4013 | * cgroup, if the css refcount is also 1, then there should | ||
| 4014 | * be no outstanding references, so the subsystem is safe to | ||
| 4015 | * destroy. We scan across all subsystems rather than using | ||
| 4016 | * the per-hierarchy linked list of mounted subsystems since | ||
| 4017 | * we can be called via check_for_release() with no | ||
| 4018 | * synchronization other than RCU, and the subsystem linked | ||
| 4019 | * list isn't RCU-safe */ | ||
| 4020 | int i; | 4031 | int i; |
| 4032 | |||
| 4021 | /* | 4033 | /* |
| 4022 | * We won't need to lock the subsys array, because the subsystems | 4034 | * We won't need to lock the subsys array, because the subsystems |
| 4023 | * we're concerned about aren't going anywhere since our cgroup root | 4035 | * we're concerned about aren't going anywhere since our cgroup root |
| @@ -4026,17 +4038,21 @@ static int cgroup_has_css_refs(struct cgroup *cgrp) | |||
| 4026 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 4038 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 4027 | struct cgroup_subsys *ss = subsys[i]; | 4039 | struct cgroup_subsys *ss = subsys[i]; |
| 4028 | struct cgroup_subsys_state *css; | 4040 | struct cgroup_subsys_state *css; |
| 4041 | |||
| 4029 | /* Skip subsystems not present or not in this hierarchy */ | 4042 | /* Skip subsystems not present or not in this hierarchy */ |
| 4030 | if (ss == NULL || ss->root != cgrp->root) | 4043 | if (ss == NULL || ss->root != cgrp->root) |
| 4031 | continue; | 4044 | continue; |
| 4045 | |||
| 4032 | css = cgrp->subsys[ss->subsys_id]; | 4046 | css = cgrp->subsys[ss->subsys_id]; |
| 4033 | /* When called from check_for_release() it's possible | 4047 | /* |
| 4048 | * When called from check_for_release() it's possible | ||
| 4034 | * that by this point the cgroup has been removed | 4049 | * that by this point the cgroup has been removed |
| 4035 | * and the css deleted. But a false-positive doesn't | 4050 | * and the css deleted. But a false-positive doesn't |
| 4036 | * matter, since it can only happen if the cgroup | 4051 | * matter, since it can only happen if the cgroup |
| 4037 | * has been deleted and hence no longer needs the | 4052 | * has been deleted and hence no longer needs the |
| 4038 | * release agent to be called anyway. */ | 4053 | * release agent to be called anyway. |
| 4039 | if (css && (atomic_read(&css->refcnt) > 1)) | 4054 | */ |
| 4055 | if (css && css_refcnt(css) > 1) | ||
| 4040 | return 1; | 4056 | return 1; |
| 4041 | } | 4057 | } |
| 4042 | return 0; | 4058 | return 0; |
| @@ -4053,44 +4069,37 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) | |||
| 4053 | struct cgroup_subsys *ss; | 4069 | struct cgroup_subsys *ss; |
| 4054 | unsigned long flags; | 4070 | unsigned long flags; |
| 4055 | bool failed = false; | 4071 | bool failed = false; |
| 4072 | |||
| 4056 | local_irq_save(flags); | 4073 | local_irq_save(flags); |
| 4074 | |||
| 4075 | /* | ||
| 4076 | * Block new css_tryget() by deactivating refcnt. If all refcnts | ||
| 4077 | * were 1 at the moment of deactivation, we succeeded. | ||
| 4078 | */ | ||
| 4057 | for_each_subsys(cgrp->root, ss) { | 4079 | for_each_subsys(cgrp->root, ss) { |
| 4058 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4080 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; |
| 4059 | int refcnt; | 4081 | |
| 4060 | while (1) { | 4082 | WARN_ON(atomic_read(&css->refcnt) < 0); |
| 4061 | /* We can only remove a CSS with a refcnt==1 */ | 4083 | atomic_add(CSS_DEACT_BIAS, &css->refcnt); |
| 4062 | refcnt = atomic_read(&css->refcnt); | 4084 | failed |= css_refcnt(css) != 1; |
| 4063 | if (refcnt > 1) { | ||
| 4064 | failed = true; | ||
| 4065 | goto done; | ||
| 4066 | } | ||
| 4067 | BUG_ON(!refcnt); | ||
| 4068 | /* | ||
| 4069 | * Drop the refcnt to 0 while we check other | ||
| 4070 | * subsystems. This will cause any racing | ||
| 4071 | * css_tryget() to spin until we set the | ||
| 4072 | * CSS_REMOVED bits or abort | ||
| 4073 | */ | ||
| 4074 | if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt) | ||
| 4075 | break; | ||
| 4076 | cpu_relax(); | ||
| 4077 | } | ||
| 4078 | } | 4085 | } |
| 4079 | done: | 4086 | |
| 4087 | /* | ||
| 4088 | * If succeeded, set REMOVED and put all the base refs; otherwise, | ||
| 4089 | * restore refcnts to positive values. Either way, all in-progress | ||
| 4090 | * css_tryget() will be released. | ||
| 4091 | */ | ||
| 4080 | for_each_subsys(cgrp->root, ss) { | 4092 | for_each_subsys(cgrp->root, ss) { |
| 4081 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4093 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; |
| 4082 | if (failed) { | 4094 | |
| 4083 | /* | 4095 | if (!failed) { |
| 4084 | * Restore old refcnt if we previously managed | ||
| 4085 | * to clear it from 1 to 0 | ||
| 4086 | */ | ||
| 4087 | if (!atomic_read(&css->refcnt)) | ||
| 4088 | atomic_set(&css->refcnt, 1); | ||
| 4089 | } else { | ||
| 4090 | /* Commit the fact that the CSS is removed */ | ||
| 4091 | set_bit(CSS_REMOVED, &css->flags); | 4096 | set_bit(CSS_REMOVED, &css->flags); |
| 4097 | css_put(css); | ||
| 4098 | } else { | ||
| 4099 | atomic_sub(CSS_DEACT_BIAS, &css->refcnt); | ||
| 4092 | } | 4100 | } |
| 4093 | } | 4101 | } |
| 4102 | |||
| 4094 | local_irq_restore(flags); | 4103 | local_irq_restore(flags); |
| 4095 | return !failed; | 4104 | return !failed; |
| 4096 | } | 4105 | } |
| @@ -4887,13 +4896,28 @@ static void check_for_release(struct cgroup *cgrp) | |||
| 4887 | } | 4896 | } |
| 4888 | 4897 | ||
| 4889 | /* Caller must verify that the css is not for root cgroup */ | 4898 | /* Caller must verify that the css is not for root cgroup */ |
| 4890 | void __css_put(struct cgroup_subsys_state *css, int count) | 4899 | bool __css_tryget(struct cgroup_subsys_state *css) |
| 4900 | { | ||
| 4901 | do { | ||
| 4902 | int v = css_refcnt(css); | ||
| 4903 | |||
| 4904 | if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v) | ||
| 4905 | return true; | ||
| 4906 | cpu_relax(); | ||
| 4907 | } while (!test_bit(CSS_REMOVED, &css->flags)); | ||
| 4908 | |||
| 4909 | return false; | ||
| 4910 | } | ||
| 4911 | EXPORT_SYMBOL_GPL(__css_tryget); | ||
| 4912 | |||
| 4913 | /* Caller must verify that the css is not for root cgroup */ | ||
| 4914 | void __css_put(struct cgroup_subsys_state *css) | ||
| 4891 | { | 4915 | { |
| 4892 | struct cgroup *cgrp = css->cgroup; | 4916 | struct cgroup *cgrp = css->cgroup; |
| 4893 | int val; | 4917 | |
| 4894 | rcu_read_lock(); | 4918 | rcu_read_lock(); |
| 4895 | val = atomic_sub_return(count, &css->refcnt); | 4919 | atomic_dec(&css->refcnt); |
| 4896 | if (val == 1) { | 4920 | if (css_refcnt(css) == 1) { |
| 4897 | if (notify_on_release(cgrp)) { | 4921 | if (notify_on_release(cgrp)) { |
| 4898 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 4922 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
| 4899 | check_for_release(cgrp); | 4923 | check_for_release(cgrp); |
| @@ -4901,7 +4925,6 @@ void __css_put(struct cgroup_subsys_state *css, int count) | |||
| 4901 | cgroup_wakeup_rmdir_waiter(cgrp); | 4925 | cgroup_wakeup_rmdir_waiter(cgrp); |
| 4902 | } | 4926 | } |
| 4903 | rcu_read_unlock(); | 4927 | rcu_read_unlock(); |
| 4904 | WARN_ON_ONCE(val < 1); | ||
| 4905 | } | 4928 | } |
| 4906 | EXPORT_SYMBOL_GPL(__css_put); | 4929 | EXPORT_SYMBOL_GPL(__css_put); |
| 4907 | 4930 | ||
| @@ -5020,7 +5043,7 @@ unsigned short css_id(struct cgroup_subsys_state *css) | |||
| 5020 | * on this or this is under rcu_read_lock(). Once css->id is allocated, | 5043 | * on this or this is under rcu_read_lock(). Once css->id is allocated, |
| 5021 | * it's unchanged until freed. | 5044 | * it's unchanged until freed. |
| 5022 | */ | 5045 | */ |
| 5023 | cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); | 5046 | cssid = rcu_dereference_check(css->id, css_refcnt(css)); |
| 5024 | 5047 | ||
| 5025 | if (cssid) | 5048 | if (cssid) |
| 5026 | return cssid->id; | 5049 | return cssid->id; |
| @@ -5032,7 +5055,7 @@ unsigned short css_depth(struct cgroup_subsys_state *css) | |||
| 5032 | { | 5055 | { |
| 5033 | struct css_id *cssid; | 5056 | struct css_id *cssid; |
| 5034 | 5057 | ||
| 5035 | cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); | 5058 | cssid = rcu_dereference_check(css->id, css_refcnt(css)); |
| 5036 | 5059 | ||
| 5037 | if (cssid) | 5060 | if (cssid) |
| 5038 | return cssid->depth; | 5061 | return cssid->depth; |
