aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLai Jiangshan <laijs@cn.fujitsu.com>2008-10-18 23:28:03 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:52:38 -0400
commit146aa1bd0511f88ddb4e92fafa2b8aad4f2f65f3 (patch)
treed7deb46b9a38f82f109b2126317899efbbce41c2
parent248736c2a57206388c86f8cdd3392ee986e84f9f (diff)
cgroups: fix probable race with put_css_set[_taskexit] and find_css_set
put_css_set_taskexit may be called when find_css_set is called on other cpu. And the race will occur: put_css_set_taskexit side find_css_set side | atomic_dec_and_test(&kref->refcount) | /* kref->refcount = 0 */ | .................................................................... | read_lock(&css_set_lock) | find_existing_css_set | get_css_set | read_unlock(&css_set_lock); .................................................................... __release_css_set | .................................................................... | /* use a released css_set */ | [put_css_set is the same. But in the current code, all put_css_set are put into cgroup mutex critical region as the same as find_css_set.] [akpm@linux-foundation.org: repair comments] [menage@google.com: eliminate race in css_set refcounting] Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Paul Menage <menage@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/cgroup.h3
-rw-r--r--kernel/cgroup.c43
-rw-r--r--kernel/cgroup_debug.c4
3 files changed, 23 insertions, 27 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 30934e4bfaab..7166023e07d2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -9,7 +9,6 @@
9 */ 9 */
10 10
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/kref.h>
13#include <linux/cpumask.h> 12#include <linux/cpumask.h>
14#include <linux/nodemask.h> 13#include <linux/nodemask.h>
15#include <linux/rcupdate.h> 14#include <linux/rcupdate.h>
@@ -149,7 +148,7 @@ struct cgroup {
149struct css_set { 148struct css_set {
150 149
151 /* Reference count */ 150 /* Reference count */
152 struct kref ref; 151 atomic_t refcount;
153 152
154 /* 153 /*
155 * List running through all cgroup groups in the same hash 154 * List running through all cgroup groups in the same hash
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8c6e1c17e6d3..1e49218457e0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg)
241 struct cg_cgroup_link *link; 241 struct cg_cgroup_link *link;
242 struct cg_cgroup_link *saved_link; 242 struct cg_cgroup_link *saved_link;
243 243
244 write_lock(&css_set_lock);
245 hlist_del(&cg->hlist); 244 hlist_del(&cg->hlist);
246 css_set_count--; 245 css_set_count--;
247 246
@@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg)
251 list_del(&link->cgrp_link_list); 250 list_del(&link->cgrp_link_list);
252 kfree(link); 251 kfree(link);
253 } 252 }
254
255 write_unlock(&css_set_lock);
256} 253}
257 254
258static void __release_css_set(struct kref *k, int taskexit) 255static void __put_css_set(struct css_set *cg, int taskexit)
259{ 256{
260 int i; 257 int i;
261 struct css_set *cg = container_of(k, struct css_set, ref); 258 /*
262 259 * Ensure that the refcount doesn't hit zero while any readers
260 * can see it. Similar to atomic_dec_and_lock(), but for an
261 * rwlock
262 */
263 if (atomic_add_unless(&cg->refcount, -1, 1))
264 return;
265 write_lock(&css_set_lock);
266 if (!atomic_dec_and_test(&cg->refcount)) {
267 write_unlock(&css_set_lock);
268 return;
269 }
263 unlink_css_set(cg); 270 unlink_css_set(cg);
271 write_unlock(&css_set_lock);
264 272
265 rcu_read_lock(); 273 rcu_read_lock();
266 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 274 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
@@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit)
276 kfree(cg); 284 kfree(cg);
277} 285}
278 286
279static void release_css_set(struct kref *k)
280{
281 __release_css_set(k, 0);
282}
283
284static void release_css_set_taskexit(struct kref *k)
285{
286 __release_css_set(k, 1);
287}
288
289/* 287/*
290 * refcounted get/put for css_set objects 288 * refcounted get/put for css_set objects
291 */ 289 */
292static inline void get_css_set(struct css_set *cg) 290static inline void get_css_set(struct css_set *cg)
293{ 291{
294 kref_get(&cg->ref); 292 atomic_inc(&cg->refcount);
295} 293}
296 294
297static inline void put_css_set(struct css_set *cg) 295static inline void put_css_set(struct css_set *cg)
298{ 296{
299 kref_put(&cg->ref, release_css_set); 297 __put_css_set(cg, 0);
300} 298}
301 299
302static inline void put_css_set_taskexit(struct css_set *cg) 300static inline void put_css_set_taskexit(struct css_set *cg)
303{ 301{
304 kref_put(&cg->ref, release_css_set_taskexit); 302 __put_css_set(cg, 1);
305} 303}
306 304
307/* 305/*
@@ -427,7 +425,7 @@ static struct css_set *find_css_set(
427 return NULL; 425 return NULL;
428 } 426 }
429 427
430 kref_init(&res->ref); 428 atomic_set(&res->refcount, 1);
431 INIT_LIST_HEAD(&res->cg_links); 429 INIT_LIST_HEAD(&res->cg_links);
432 INIT_LIST_HEAD(&res->tasks); 430 INIT_LIST_HEAD(&res->tasks);
433 INIT_HLIST_NODE(&res->hlist); 431 INIT_HLIST_NODE(&res->hlist);
@@ -1728,7 +1726,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
1728 1726
1729 read_lock(&css_set_lock); 1727 read_lock(&css_set_lock);
1730 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { 1728 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
1731 count += atomic_read(&link->cg->ref.refcount); 1729 count += atomic_read(&link->cg->refcount);
1732 } 1730 }
1733 read_unlock(&css_set_lock); 1731 read_unlock(&css_set_lock);
1734 return count; 1732 return count;
@@ -2495,8 +2493,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
2495int __init cgroup_init_early(void) 2493int __init cgroup_init_early(void)
2496{ 2494{
2497 int i; 2495 int i;
2498 kref_init(&init_css_set.ref); 2496 atomic_set(&init_css_set.refcount, 1);
2499 kref_get(&init_css_set.ref);
2500 INIT_LIST_HEAD(&init_css_set.cg_links); 2497 INIT_LIST_HEAD(&init_css_set.cg_links);
2501 INIT_LIST_HEAD(&init_css_set.tasks); 2498 INIT_LIST_HEAD(&init_css_set.tasks);
2502 INIT_HLIST_NODE(&init_css_set.hlist); 2499 INIT_HLIST_NODE(&init_css_set.hlist);
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
index c3dc3aba4c02..daca6209202d 100644
--- a/kernel/cgroup_debug.c
+++ b/kernel/cgroup_debug.c
@@ -57,7 +57,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
57 u64 count; 57 u64 count;
58 58
59 rcu_read_lock(); 59 rcu_read_lock();
60 count = atomic_read(&current->cgroups->ref.refcount); 60 count = atomic_read(&current->cgroups->refcount);
61 rcu_read_unlock(); 61 rcu_read_unlock();
62 return count; 62 return count;
63} 63}
@@ -90,7 +90,7 @@ static struct cftype files[] = {
90 { 90 {
91 .name = "releasable", 91 .name = "releasable",
92 .read_u64 = releasable_read, 92 .read_u64 = releasable_read,
93 } 93 },
94}; 94};
95 95
96static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) 96static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)