diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2010-05-11 17:06:59 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-11 20:33:42 -0400 |
commit | 747388d78a0ae768fd82b55c4ed38aa646a72364 (patch) | |
tree | 03efabf9761209cffef4e8da9b6989887f76428e | |
parent | 7f0f15464185a92f9d8791ad231bcd7bf6df54e4 (diff) |
memcg: fix css_is_ancestor() RCU locking
Some callers (in memcontrol.c) calls css_is_ancestor() without
rcu_read_lock. Because css_is_ancestor() has to access RCU protected
data, it should be under rcu_read_lock().
This makes css_is_ancestor() itself does safe access to RCU protected
area. (At least, "root" can have refcnt==0 if it's not an ancestor of
"child". So, we need rcu_read_lock().)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | kernel/cgroup.c | 31 | ||||
-rw-r--r-- | mm/memcontrol.c | 4 |
2 files changed, 26 insertions, 9 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 6db8b7f297a1..6d870f2d1228 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -4464,15 +4464,36 @@ unsigned short css_depth(struct cgroup_subsys_state *css) | |||
4464 | } | 4464 | } |
4465 | EXPORT_SYMBOL_GPL(css_depth); | 4465 | EXPORT_SYMBOL_GPL(css_depth); |
4466 | 4466 | ||
4467 | /** | ||
4468 | * css_is_ancestor - test "root" css is an ancestor of "child" | ||
4469 | * @child: the css to be tested. | ||
4470 | * @root: the css supporsed to be an ancestor of the child. | ||
4471 | * | ||
4472 | * Returns true if "root" is an ancestor of "child" in its hierarchy. Because | ||
4473 | * this function reads css->id, this use rcu_dereference() and rcu_read_lock(). | ||
4474 | * But, considering usual usage, the csses should be valid objects after test. | ||
4475 | * Assuming that the caller will do some action to the child if this returns | ||
4476 | * returns true, the caller must take "child";s reference count. | ||
4477 | * If "child" is valid object and this returns true, "root" is valid, too. | ||
4478 | */ | ||
4479 | |||
4467 | bool css_is_ancestor(struct cgroup_subsys_state *child, | 4480 | bool css_is_ancestor(struct cgroup_subsys_state *child, |
4468 | const struct cgroup_subsys_state *root) | 4481 | const struct cgroup_subsys_state *root) |
4469 | { | 4482 | { |
4470 | struct css_id *child_id = rcu_dereference(child->id); | 4483 | struct css_id *child_id; |
4471 | struct css_id *root_id = rcu_dereference(root->id); | 4484 | struct css_id *root_id; |
4485 | bool ret = true; | ||
4472 | 4486 | ||
4473 | if (!child_id || !root_id || (child_id->depth < root_id->depth)) | 4487 | rcu_read_lock(); |
4474 | return false; | 4488 | child_id = rcu_dereference(child->id); |
4475 | return child_id->stack[root_id->depth] == root_id->id; | 4489 | root_id = rcu_dereference(root->id); |
4490 | if (!child_id | ||
4491 | || !root_id | ||
4492 | || (child_id->depth < root_id->depth) | ||
4493 | || (child_id->stack[root_id->depth] != root_id->id)) | ||
4494 | ret = false; | ||
4495 | rcu_read_unlock(); | ||
4496 | return ret; | ||
4476 | } | 4497 | } |
4477 | 4498 | ||
4478 | static void __free_css_id_cb(struct rcu_head *head) | 4499 | static void __free_css_id_cb(struct rcu_head *head) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 595d03f33b2c..8a79a6f0f029 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -811,12 +811,10 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | |||
811 | * enabled in "curr" and "curr" is a child of "mem" in *cgroup* | 811 | * enabled in "curr" and "curr" is a child of "mem" in *cgroup* |
812 | * hierarchy(even if use_hierarchy is disabled in "mem"). | 812 | * hierarchy(even if use_hierarchy is disabled in "mem"). |
813 | */ | 813 | */ |
814 | rcu_read_lock(); | ||
815 | if (mem->use_hierarchy) | 814 | if (mem->use_hierarchy) |
816 | ret = css_is_ancestor(&curr->css, &mem->css); | 815 | ret = css_is_ancestor(&curr->css, &mem->css); |
817 | else | 816 | else |
818 | ret = (curr == mem); | 817 | ret = (curr == mem); |
819 | rcu_read_unlock(); | ||
820 | css_put(&curr->css); | 818 | css_put(&curr->css); |
821 | return ret; | 819 | return ret; |
822 | } | 820 | } |
@@ -1603,7 +1601,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1603 | * There is a small race that "from" or "to" can be | 1601 | * There is a small race that "from" or "to" can be |
1604 | * freed by rmdir, so we use css_tryget(). | 1602 | * freed by rmdir, so we use css_tryget(). |
1605 | */ | 1603 | */ |
1606 | rcu_read_lock(); | ||
1607 | from = mc.from; | 1604 | from = mc.from; |
1608 | to = mc.to; | 1605 | to = mc.to; |
1609 | if (from && css_tryget(&from->css)) { | 1606 | if (from && css_tryget(&from->css)) { |
@@ -1624,7 +1621,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1624 | do_continue = (to == mem_over_limit); | 1621 | do_continue = (to == mem_over_limit); |
1625 | css_put(&to->css); | 1622 | css_put(&to->css); |
1626 | } | 1623 | } |
1627 | rcu_read_unlock(); | ||
1628 | if (do_continue) { | 1624 | if (do_continue) { |
1629 | DEFINE_WAIT(wait); | 1625 | DEFINE_WAIT(wait); |
1630 | prepare_to_wait(&mc.waitq, &wait, | 1626 | prepare_to_wait(&mc.waitq, &wait, |