aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2010-05-11 17:06:59 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-11 20:33:42 -0400
commit747388d78a0ae768fd82b55c4ed38aa646a72364 (patch)
tree03efabf9761209cffef4e8da9b6989887f76428e
parent7f0f15464185a92f9d8791ad231bcd7bf6df54e4 (diff)
memcg: fix css_is_ancestor() RCU locking
Some callers (in memcontrol.c) calls css_is_ancestor() without rcu_read_lock. Because css_is_ancestor() has to access RCU protected data, it should be under rcu_read_lock(). This makes css_is_ancestor() itself does safe access to RCU protected area. (At least, "root" can have refcnt==0 if it's not an ancestor of "child". So, we need rcu_read_lock().) Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--kernel/cgroup.c31
-rw-r--r--mm/memcontrol.c4
2 files changed, 26 insertions, 9 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 6db8b7f297a1..6d870f2d1228 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4464,15 +4464,36 @@ unsigned short css_depth(struct cgroup_subsys_state *css)
4464} 4464}
4465EXPORT_SYMBOL_GPL(css_depth); 4465EXPORT_SYMBOL_GPL(css_depth);
4466 4466
4467/**
4468 * css_is_ancestor - test "root" css is an ancestor of "child"
4469 * @child: the css to be tested.
4470 * @root: the css supporsed to be an ancestor of the child.
4471 *
4472 * Returns true if "root" is an ancestor of "child" in its hierarchy. Because
4473 * this function reads css->id, this use rcu_dereference() and rcu_read_lock().
4474 * But, considering usual usage, the csses should be valid objects after test.
4475 * Assuming that the caller will do some action to the child if this returns
4476 * returns true, the caller must take "child";s reference count.
4477 * If "child" is valid object and this returns true, "root" is valid, too.
4478 */
4479
4467bool css_is_ancestor(struct cgroup_subsys_state *child, 4480bool css_is_ancestor(struct cgroup_subsys_state *child,
4468 const struct cgroup_subsys_state *root) 4481 const struct cgroup_subsys_state *root)
4469{ 4482{
4470 struct css_id *child_id = rcu_dereference(child->id); 4483 struct css_id *child_id;
4471 struct css_id *root_id = rcu_dereference(root->id); 4484 struct css_id *root_id;
4485 bool ret = true;
4472 4486
4473 if (!child_id || !root_id || (child_id->depth < root_id->depth)) 4487 rcu_read_lock();
4474 return false; 4488 child_id = rcu_dereference(child->id);
4475 return child_id->stack[root_id->depth] == root_id->id; 4489 root_id = rcu_dereference(root->id);
4490 if (!child_id
4491 || !root_id
4492 || (child_id->depth < root_id->depth)
4493 || (child_id->stack[root_id->depth] != root_id->id))
4494 ret = false;
4495 rcu_read_unlock();
4496 return ret;
4476} 4497}
4477 4498
4478static void __free_css_id_cb(struct rcu_head *head) 4499static void __free_css_id_cb(struct rcu_head *head)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 595d03f33b2c..8a79a6f0f029 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -811,12 +811,10 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
811 * enabled in "curr" and "curr" is a child of "mem" in *cgroup* 811 * enabled in "curr" and "curr" is a child of "mem" in *cgroup*
812 * hierarchy(even if use_hierarchy is disabled in "mem"). 812 * hierarchy(even if use_hierarchy is disabled in "mem").
813 */ 813 */
814 rcu_read_lock();
815 if (mem->use_hierarchy) 814 if (mem->use_hierarchy)
816 ret = css_is_ancestor(&curr->css, &mem->css); 815 ret = css_is_ancestor(&curr->css, &mem->css);
817 else 816 else
818 ret = (curr == mem); 817 ret = (curr == mem);
819 rcu_read_unlock();
820 css_put(&curr->css); 818 css_put(&curr->css);
821 return ret; 819 return ret;
822} 820}
@@ -1603,7 +1601,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1603 * There is a small race that "from" or "to" can be 1601 * There is a small race that "from" or "to" can be
1604 * freed by rmdir, so we use css_tryget(). 1602 * freed by rmdir, so we use css_tryget().
1605 */ 1603 */
1606 rcu_read_lock();
1607 from = mc.from; 1604 from = mc.from;
1608 to = mc.to; 1605 to = mc.to;
1609 if (from && css_tryget(&from->css)) { 1606 if (from && css_tryget(&from->css)) {
@@ -1624,7 +1621,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1624 do_continue = (to == mem_over_limit); 1621 do_continue = (to == mem_over_limit);
1625 css_put(&to->css); 1622 css_put(&to->css);
1626 } 1623 }
1627 rcu_read_unlock();
1628 if (do_continue) { 1624 if (do_continue) {
1629 DEFINE_WAIT(wait); 1625 DEFINE_WAIT(wait);
1630 prepare_to_wait(&mc.waitq, &wait, 1626 prepare_to_wait(&mc.waitq, &wait,