diff options
author | Li Zefan <lizefan@huawei.com> | 2013-03-01 02:06:07 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2013-03-04 13:04:54 -0500 |
commit | f50daa704f36a6544a902c52b6cf37b0493dfc5d (patch) | |
tree | 157d40fe53c6cde3aa2c2644b35dbe687e547dcc /kernel | |
parent | f440d98f8ebab02a768c1de17395e4239af9a97d (diff) |
cgroup: no need to check css refs for release notification
We no longer fail rmdir() when there're still css refs, so we don't
need to check css refs in check_for_release().
This also voids a bug. cgroup_has_css_refs() accesses subsys[i]
without cgroup_mutex, so it can race with cgroup_unload_subsys().
cgroup_has_css_refs()
...
if (ss == NULL || ss->root != cgrp->root)
if ss pointers to net_cls_subsys, and cls_cgroup module is unloaded
right after the former check but before the latter, the memory that
net_cls_subsys resides has become invalid.
Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 67 |
1 files changed, 8 insertions, 59 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 50682168abc2..9df799d5d31c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -4341,47 +4341,6 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
4341 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); | 4341 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); |
4342 | } | 4342 | } |
4343 | 4343 | ||
4344 | /* | ||
4345 | * Check the reference count on each subsystem. Since we already | ||
4346 | * established that there are no tasks in the cgroup, if the css refcount | ||
4347 | * is also 1, then there should be no outstanding references, so the | ||
4348 | * subsystem is safe to destroy. We scan across all subsystems rather than | ||
4349 | * using the per-hierarchy linked list of mounted subsystems since we can | ||
4350 | * be called via check_for_release() with no synchronization other than | ||
4351 | * RCU, and the subsystem linked list isn't RCU-safe. | ||
4352 | */ | ||
4353 | static int cgroup_has_css_refs(struct cgroup *cgrp) | ||
4354 | { | ||
4355 | int i; | ||
4356 | |||
4357 | /* | ||
4358 | * We won't need to lock the subsys array, because the subsystems | ||
4359 | * we're concerned about aren't going anywhere since our cgroup root | ||
4360 | * has a reference on them. | ||
4361 | */ | ||
4362 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
4363 | struct cgroup_subsys *ss = subsys[i]; | ||
4364 | struct cgroup_subsys_state *css; | ||
4365 | |||
4366 | /* Skip subsystems not present or not in this hierarchy */ | ||
4367 | if (ss == NULL || ss->root != cgrp->root) | ||
4368 | continue; | ||
4369 | |||
4370 | css = cgrp->subsys[ss->subsys_id]; | ||
4371 | /* | ||
4372 | * When called from check_for_release() it's possible | ||
4373 | * that by this point the cgroup has been removed | ||
4374 | * and the css deleted. But a false-positive doesn't | ||
4375 | * matter, since it can only happen if the cgroup | ||
4376 | * has been deleted and hence no longer needs the | ||
4377 | * release agent to be called anyway. | ||
4378 | */ | ||
4379 | if (css && css_refcnt(css) > 1) | ||
4380 | return 1; | ||
4381 | } | ||
4382 | return 0; | ||
4383 | } | ||
4384 | |||
4385 | static int cgroup_destroy_locked(struct cgroup *cgrp) | 4344 | static int cgroup_destroy_locked(struct cgroup *cgrp) |
4386 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) | 4345 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) |
4387 | { | 4346 | { |
@@ -5108,12 +5067,15 @@ static void check_for_release(struct cgroup *cgrp) | |||
5108 | { | 5067 | { |
5109 | /* All of these checks rely on RCU to keep the cgroup | 5068 | /* All of these checks rely on RCU to keep the cgroup |
5110 | * structure alive */ | 5069 | * structure alive */ |
5111 | if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count) | 5070 | if (cgroup_is_releasable(cgrp) && |
5112 | && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) { | 5071 | !atomic_read(&cgrp->count) && list_empty(&cgrp->children)) { |
5113 | /* Control Group is currently removeable. If it's not | 5072 | /* |
5073 | * Control Group is currently removeable. If it's not | ||
5114 | * already queued for a userspace notification, queue | 5074 | * already queued for a userspace notification, queue |
5115 | * it now */ | 5075 | * it now |
5076 | */ | ||
5116 | int need_schedule_work = 0; | 5077 | int need_schedule_work = 0; |
5078 | |||
5117 | raw_spin_lock(&release_list_lock); | 5079 | raw_spin_lock(&release_list_lock); |
5118 | if (!cgroup_is_removed(cgrp) && | 5080 | if (!cgroup_is_removed(cgrp) && |
5119 | list_empty(&cgrp->release_list)) { | 5081 | list_empty(&cgrp->release_list)) { |
@@ -5146,24 +5108,11 @@ EXPORT_SYMBOL_GPL(__css_tryget); | |||
5146 | /* Caller must verify that the css is not for root cgroup */ | 5108 | /* Caller must verify that the css is not for root cgroup */ |
5147 | void __css_put(struct cgroup_subsys_state *css) | 5109 | void __css_put(struct cgroup_subsys_state *css) |
5148 | { | 5110 | { |
5149 | struct cgroup *cgrp = css->cgroup; | ||
5150 | int v; | 5111 | int v; |
5151 | 5112 | ||
5152 | rcu_read_lock(); | ||
5153 | v = css_unbias_refcnt(atomic_dec_return(&css->refcnt)); | 5113 | v = css_unbias_refcnt(atomic_dec_return(&css->refcnt)); |
5154 | 5114 | if (v == 0) | |
5155 | switch (v) { | ||
5156 | case 1: | ||
5157 | if (notify_on_release(cgrp)) { | ||
5158 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | ||
5159 | check_for_release(cgrp); | ||
5160 | } | ||
5161 | break; | ||
5162 | case 0: | ||
5163 | schedule_work(&css->dput_work); | 5115 | schedule_work(&css->dput_work); |
5164 | break; | ||
5165 | } | ||
5166 | rcu_read_unlock(); | ||
5167 | } | 5116 | } |
5168 | EXPORT_SYMBOL_GPL(__css_put); | 5117 | EXPORT_SYMBOL_GPL(__css_put); |
5169 | 5118 | ||