summaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
authorLi Zefan <lizefan@huawei.com>2013-12-16 22:13:39 -0500
committerTejun Heo <tj@kernel.org>2013-12-17 08:11:52 -0500
commitc1a71504e9715812a2d15e7c03b5aa147ae70ded (patch)
tree5b58cb4434889ffe3038f6a87fdb1b7cf85dfe2f /kernel/cgroup.c
parent266ccd505e8acb98717819cef9d91d66c7b237cc (diff)
cgroup: don't recycle cgroup id until all csses' have been destroyed
Hugh reported this bug: > CONFIG_MEMCG_SWAP is broken in 3.13-rc. Try something like this: > > mkdir -p /tmp/tmpfs /tmp/memcg > mount -t tmpfs -o size=1G tmpfs /tmp/tmpfs > mount -t cgroup -o memory memcg /tmp/memcg > mkdir /tmp/memcg/old > echo 512M >/tmp/memcg/old/memory.limit_in_bytes > echo $$ >/tmp/memcg/old/tasks > cp /dev/zero /tmp/tmpfs/zero 2>/dev/null > echo $$ >/tmp/memcg/tasks > rmdir /tmp/memcg/old > sleep 1 # let rmdir work complete > mkdir /tmp/memcg/new > umount /tmp/tmpfs > dmesg | grep WARNING > rmdir /tmp/memcg/new > umount /tmp/memcg > > Shows lots of WARNING: CPU: 1 PID: 1006 at kernel/res_counter.c:91 > res_counter_uncharge_locked+0x1f/0x2f() > > Breakage comes from 34c00c319ce7 ("memcg: convert to use cgroup id"). > > The lifetime of a cgroup id is different from the lifetime of the > css id it replaced: memsw's css_get()s do nothing to hold on to the > old cgroup id, it soon gets recycled to a new cgroup, which then > mysteriously inherits the old's swap, without any charge for it. Instead of removing cgroup id right after all the csses have been offlined, we should do that after csses have been destroyed. To make sure an invalid css pointer won't be returned after the css is destroyed, make sure css_from_id() returns NULL in this case. tj: Updated comment to note planned changes for cgrp->id. Reported-by: Hugh Dickins <hughd@google.com> Signed-off-by: Li Zefan <lizefan@huawei.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c19
1 files changed, 11 insertions, 8 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index bcb1755f410a..bc1dcabe9217 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -890,6 +890,16 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
890 struct cgroup *cgrp = dentry->d_fsdata; 890 struct cgroup *cgrp = dentry->d_fsdata;
891 891
892 BUG_ON(!(cgroup_is_dead(cgrp))); 892 BUG_ON(!(cgroup_is_dead(cgrp)));
893
894 /*
895 * XXX: cgrp->id is only used to look up css's. As cgroup
896 * and css's lifetimes will be decoupled, it should be made
897 * per-subsystem and moved to css->id so that lookups are
898 * successful until the target css is released.
899 */
900 idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
901 cgrp->id = -1;
902
893 call_rcu(&cgrp->rcu_head, cgroup_free_rcu); 903 call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
894 } else { 904 } else {
895 struct cfent *cfe = __d_cfe(dentry); 905 struct cfent *cfe = __d_cfe(dentry);
@@ -4268,6 +4278,7 @@ static void css_release(struct percpu_ref *ref)
4268 struct cgroup_subsys_state *css = 4278 struct cgroup_subsys_state *css =
4269 container_of(ref, struct cgroup_subsys_state, refcnt); 4279 container_of(ref, struct cgroup_subsys_state, refcnt);
4270 4280
4281 rcu_assign_pointer(css->cgroup->subsys[css->ss->subsys_id], NULL);
4271 call_rcu(&css->rcu_head, css_free_rcu_fn); 4282 call_rcu(&css->rcu_head, css_free_rcu_fn);
4272} 4283}
4273 4284
@@ -4733,14 +4744,6 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp)
4733 /* delete this cgroup from parent->children */ 4744 /* delete this cgroup from parent->children */
4734 list_del_rcu(&cgrp->sibling); 4745 list_del_rcu(&cgrp->sibling);
4735 4746
4736 /*
4737 * We should remove the cgroup object from idr before its grace
4738 * period starts, so we won't be looking up a cgroup while the
4739 * cgroup is being freed.
4740 */
4741 idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
4742 cgrp->id = -1;
4743
4744 dput(d); 4747 dput(d);
4745 4748
4746 set_bit(CGRP_RELEASABLE, &parent->flags); 4749 set_bit(CGRP_RELEASABLE, &parent->flags);