cgroup: remove cgroupfs_root->refcnt

Currently, cgroupfs_root and its ->top_cgroup are separated reference counted and the latter's is ignored. There's no reason to do this separately. This patch removes cgroupfs_root->refcnt and destroys cgroupfs_root when the top_cgroup is released. * cgroup_put() updated to ignore cgroup_is_dead() test for top cgroups. cgroup_free_fn() updated to handle root destruction when releasing a top cgroup. * As root destruction is now bounced through cgroup destruction, it is asynchronous. Update cgroup_mount() so that it waits for pending release which is currently implemented using msleep(). Converting this to proper wait_queue isn't hard but likely unnecessary. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com>
author: Tejun Heo <tj@kernel.org> 2014-02-12 09:29:50 -0500
committer: Tejun Heo <tj@kernel.org> 2014-02-12 09:29:50 -0500
commit: 776f02fa4e1ad70557c0318c70ce928e0642bee0 (patch)
tree: f2080066461b0ef75a964a8ef2b9a2243d7b5389 /kernel/cgroup.c
parent: 3c9c825b8b50de7dbb015e6bfc04bb2da79364d9 (diff)
1 files changed, 38 insertions, 48 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index cffdb6e2ad08..03845c5d082b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -53,6 +53,7 @@
 #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
 #include <linux/flex_array.h> /* used in cgroup_attach_task */
 #include <linux/kthread.h>
+#include <linux/delay.h>
 #include <linux/atomic.h>
@@ -728,37 +729,16 @@ static void cgroup_free_root(struct cgroupfs_root *root)
        }
 }
-static void cgroup_get_root(struct cgroupfs_root *root)
+static void cgroup_destroy_root(struct cgroupfs_root *root)
-{
-        /*
-         * The caller must ensure that @root is alive, which can be
-         * achieved by holding a ref on one of the member cgroups or
-         * following a registered reference to @root while holding
-         * cgroup_tree_mutex.
-         */
-        WARN_ON_ONCE(atomic_read(&root->refcnt) <= 0);
-        atomic_inc(&root->refcnt);
-}
-static void cgroup_put_root(struct cgroupfs_root *root)
 {
        struct cgroup *cgrp = &root->top_cgroup;
        struct cgrp_cset_link *link, *tmp_link;
        int ret;
-        /*
-         * @root's refcnt reaching zero and its deregistration should be
-         * atomic w.r.t. cgroup_tree_mutex.  This ensures that
-         * cgroup_get_root() is safe to invoke if @root is registered.
-         */
        mutex_lock(&cgroup_tree_mutex);
-        if (!atomic_dec_and_test(&root->refcnt)) {
-                mutex_unlock(&cgroup_tree_mutex);
-                return;
-        }
        mutex_lock(&cgroup_mutex);
-        BUG_ON(atomic_read(&root->nr_cgrps) != 1);
+        BUG_ON(atomic_read(&root->nr_cgrps));
        BUG_ON(!list_empty(&cgrp->children));
        /* Rebind all subsystems back to the default hierarchy */
@@ -929,21 +909,24 @@ static void cgroup_free_fn(struct work_struct *work)
        struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
        atomic_dec(&cgrp->root->nr_cgrps);
-        /*
-         * We get a ref to the parent, and put the ref when this cgroup is
-         * being freed, so it's guaranteed that the parent won't be
-         * destroyed before its children.
-         */
-        cgroup_put(cgrp->parent);
-        /* put the root reference that we took when we created the cgroup */
-        cgroup_put_root(cgrp->root);
        cgroup_pidlist_destroy_all(cgrp);
-        kernfs_put(cgrp->kn);
+        if (cgrp->parent) {
-        kfree(cgrp);
+                /*
+                 * We get a ref to the parent, and put the ref when this
+                 * cgroup is being freed, so it's guaranteed that the
+                 * parent won't be destroyed before its children.
+                 */
+                cgroup_put(cgrp->parent);
+                kernfs_put(cgrp->kn);
+                kfree(cgrp);
+        } else {
+                /*
+                 * This is top cgroup's refcnt reaching zero, which
+                 * indicates that the root should be released.
+                 */
+                cgroup_destroy_root(cgrp->root);
+        }
 }
 static void cgroup_free_rcu(struct rcu_head *head)
@@ -965,7 +948,7 @@ static void cgroup_put(struct cgroup *cgrp)
 {
        if (!atomic_dec_and_test(&cgrp->refcnt))
                return;
-        if (WARN_ON_ONCE(!cgroup_is_dead(cgrp)))
+        if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp)))
                return;
        /*
@@ -1356,7 +1339,6 @@ static void init_cgroup_root(struct cgroupfs_root *root)
 {
        struct cgroup *cgrp = &root->top_cgroup;
-        atomic_set(&root->refcnt, 1);
        INIT_LIST_HEAD(&root->root_list);
        atomic_set(&root->nr_cgrps, 1);
        cgrp->root = root;
@@ -1485,7 +1467,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
        struct cgroup_sb_opts opts;
        struct dentry *dentry;
        int ret;
+retry:
        mutex_lock(&cgroup_tree_mutex);
        mutex_lock(&cgroup_mutex);
@@ -1531,7 +1513,21 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                        }
                }
-                cgroup_get_root(root);
+                /*
+                 * A root's lifetime is governed by its top cgroup.  Zero
+                 * ref indicate that the root is being destroyed.  Wait for
+                 * destruction to complete so that the subsystems are free.
+                 * We can use wait_queue for the wait but this path is
+                 * super cold.  Let's just sleep for a bit and retry.
+                 */
+                if (!atomic_inc_not_zero(&root->top_cgroup.refcnt)) {
+                        mutex_unlock(&cgroup_mutex);
+                        mutex_unlock(&cgroup_tree_mutex);
+                        msleep(10);
+                        goto retry;
+                }
+                ret = 0;
                goto out_unlock;
        }
@@ -1558,7 +1554,7 @@ out_unlock:
        dentry = kernfs_mount(fs_type, flags, root->kf_root);
        if (IS_ERR(dentry))
-                cgroup_put_root(root);
+                cgroup_put(&root->top_cgroup);
        return dentry;
 }
@@ -1567,7 +1563,7 @@ static void cgroup_kill_sb(struct super_block *sb)
        struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
        struct cgroupfs_root *root = cgroup_root_from_kf(kf_root);
-        cgroup_put_root(root);
+        cgroup_put(&root->top_cgroup);
        kernfs_kill_sb(sb);
 }
@@ -3708,12 +3704,6 @@ static long cgroup_create(struct cgroup *parent, const char *name,
        /* allocation complete, commit to creation */
        list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
        atomic_inc(&root->nr_cgrps);
-        /*
-         * Grab a reference on the root and parent so that they don't get
-         * deleted while there are child cgroups.
-         */
-        cgroup_get_root(root);
        cgroup_get(parent);
        /*
author	Tejun Heo <tj@kernel.org>	2014-02-12 09:29:50 -0500
committer	Tejun Heo <tj@kernel.org>	2014-02-12 09:29:50 -0500
commit	776f02fa4e1ad70557c0318c70ce928e0642bee0 (patch)
tree	f2080066461b0ef75a964a8ef2b9a2243d7b5389 /kernel/cgroup.c
parent	3c9c825b8b50de7dbb015e6bfc04bb2da79364d9 (diff)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c index cffdb6e2ad08..03845c5d082b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c
@@ -53,6 +53,7 @@
53	#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */	53	#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
54	#include <linux/flex_array.h> /* used in cgroup_attach_task */	54	#include <linux/flex_array.h> /* used in cgroup_attach_task */
55	#include <linux/kthread.h>	55	#include <linux/kthread.h>
		56	#include <linux/delay.h>
56		57
57	#include <linux/atomic.h>	58	#include <linux/atomic.h>
58		59
@@ -728,37 +729,16 @@ static void cgroup_free_root(struct cgroupfs_root *root)
728	}	729	}
729	}	730	}
730		731
731	static void cgroup_get_root(struct cgroupfs_root *root)	732	static void cgroup_destroy_root(struct cgroupfs_root *root)
732	{
733	/*
734	* The caller must ensure that @root is alive, which can be
735	* achieved by holding a ref on one of the member cgroups or
736	* following a registered reference to @root while holding
737	* cgroup_tree_mutex.
738	*/
739	WARN_ON_ONCE(atomic_read(&root->refcnt) <= 0);
740	atomic_inc(&root->refcnt);
741	}
742
743	static void cgroup_put_root(struct cgroupfs_root *root)
744	{	733	{
745	struct cgroup *cgrp = &root->top_cgroup;	734	struct cgroup *cgrp = &root->top_cgroup;
746	struct cgrp_cset_link link, tmp_link;	735	struct cgrp_cset_link link, tmp_link;
747	int ret;	736	int ret;
748		737
749	/*
750	* @root's refcnt reaching zero and its deregistration should be
751	* atomic w.r.t. cgroup_tree_mutex. This ensures that
752	* cgroup_get_root() is safe to invoke if @root is registered.
753	*/
754	mutex_lock(&cgroup_tree_mutex);	738	mutex_lock(&cgroup_tree_mutex);
755	if (!atomic_dec_and_test(&root->refcnt)) {
756	mutex_unlock(&cgroup_tree_mutex);
757	return;
758	}
759	mutex_lock(&cgroup_mutex);	739	mutex_lock(&cgroup_mutex);
760		740
761	BUG_ON(atomic_read(&root->nr_cgrps) != 1);	741	BUG_ON(atomic_read(&root->nr_cgrps));
762	BUG_ON(!list_empty(&cgrp->children));	742	BUG_ON(!list_empty(&cgrp->children));
763		743
764	/* Rebind all subsystems back to the default hierarchy */	744	/* Rebind all subsystems back to the default hierarchy */
@@ -929,21 +909,24 @@ static void cgroup_free_fn(struct work_struct *work)
929	struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);	909	struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
930		910
931	atomic_dec(&cgrp->root->nr_cgrps);	911	atomic_dec(&cgrp->root->nr_cgrps);
932
933	/*
934	* We get a ref to the parent, and put the ref when this cgroup is
935	* being freed, so it's guaranteed that the parent won't be
936	* destroyed before its children.
937	*/
938	cgroup_put(cgrp->parent);
939
940	/* put the root reference that we took when we created the cgroup */
941	cgroup_put_root(cgrp->root);
942
943	cgroup_pidlist_destroy_all(cgrp);	912	cgroup_pidlist_destroy_all(cgrp);
944		913
945	kernfs_put(cgrp->kn);	914	if (cgrp->parent) {
946	kfree(cgrp);	915	/*
		916	* We get a ref to the parent, and put the ref when this
		917	* cgroup is being freed, so it's guaranteed that the
		918	* parent won't be destroyed before its children.
		919	*/
		920	cgroup_put(cgrp->parent);
		921	kernfs_put(cgrp->kn);
		922	kfree(cgrp);
		923	} else {
		924	/*
		925	* This is top cgroup's refcnt reaching zero, which
		926	* indicates that the root should be released.
		927	*/
		928	cgroup_destroy_root(cgrp->root);
		929	}
947	}	930	}
948		931
949	static void cgroup_free_rcu(struct rcu_head *head)	932	static void cgroup_free_rcu(struct rcu_head *head)
@@ -965,7 +948,7 @@ static void cgroup_put(struct cgroup *cgrp)
965	{	948	{
966	if (!atomic_dec_and_test(&cgrp->refcnt))	949	if (!atomic_dec_and_test(&cgrp->refcnt))
967	return;	950	return;
968	if (WARN_ON_ONCE(!cgroup_is_dead(cgrp)))	951	if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp)))
969	return;	952	return;
970		953
971	/*	954	/*
@@ -1356,7 +1339,6 @@ static void init_cgroup_root(struct cgroupfs_root *root)
1356	{	1339	{
1357	struct cgroup *cgrp = &root->top_cgroup;	1340	struct cgroup *cgrp = &root->top_cgroup;
1358		1341
1359	atomic_set(&root->refcnt, 1);
1360	INIT_LIST_HEAD(&root->root_list);	1342	INIT_LIST_HEAD(&root->root_list);
1361	atomic_set(&root->nr_cgrps, 1);	1343	atomic_set(&root->nr_cgrps, 1);
1362	cgrp->root = root;	1344	cgrp->root = root;
@@ -1485,7 +1467,7 @@ static struct dentry cgroup_mount(struct file_system_type fs_type,
1485	struct cgroup_sb_opts opts;	1467	struct cgroup_sb_opts opts;
1486	struct dentry *dentry;	1468	struct dentry *dentry;
1487	int ret;	1469	int ret;
1488		1470	retry:
1489	mutex_lock(&cgroup_tree_mutex);	1471	mutex_lock(&cgroup_tree_mutex);
1490	mutex_lock(&cgroup_mutex);	1472	mutex_lock(&cgroup_mutex);
1491		1473
@@ -1531,7 +1513,21 @@ static struct dentry cgroup_mount(struct file_system_type fs_type,
1531	}	1513	}
1532	}	1514	}
1533		1515
1534	cgroup_get_root(root);	1516	/*
		1517	* A root's lifetime is governed by its top cgroup. Zero
		1518	* ref indicate that the root is being destroyed. Wait for
		1519	* destruction to complete so that the subsystems are free.
		1520	* We can use wait_queue for the wait but this path is
		1521	* super cold. Let's just sleep for a bit and retry.
		1522	*/
		1523	if (!atomic_inc_not_zero(&root->top_cgroup.refcnt)) {
		1524	mutex_unlock(&cgroup_mutex);
		1525	mutex_unlock(&cgroup_tree_mutex);
		1526	msleep(10);
		1527	goto retry;
		1528	}
		1529
		1530	ret = 0;
1535	goto out_unlock;	1531	goto out_unlock;
1536	}	1532	}
1537		1533
@@ -1558,7 +1554,7 @@ out_unlock:
1558		1554
1559	dentry = kernfs_mount(fs_type, flags, root->kf_root);	1555	dentry = kernfs_mount(fs_type, flags, root->kf_root);
1560	if (IS_ERR(dentry))	1556	if (IS_ERR(dentry))
1561	cgroup_put_root(root);	1557	cgroup_put(&root->top_cgroup);
1562	return dentry;	1558	return dentry;
1563	}	1559	}
1564		1560
@@ -1567,7 +1563,7 @@ static void cgroup_kill_sb(struct super_block *sb)
1567	struct kernfs_root *kf_root = kernfs_root_from_sb(sb);	1563	struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
1568	struct cgroupfs_root *root = cgroup_root_from_kf(kf_root);	1564	struct cgroupfs_root *root = cgroup_root_from_kf(kf_root);
1569		1565
1570	cgroup_put_root(root);	1566	cgroup_put(&root->top_cgroup);
1571	kernfs_kill_sb(sb);	1567	kernfs_kill_sb(sb);
1572	}	1568	}
1573		1569
@@ -3708,12 +3704,6 @@ static long cgroup_create(struct cgroup parent, const char name,
3708	/* allocation complete, commit to creation */	3704	/* allocation complete, commit to creation */
3709	list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);	3705	list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
3710	atomic_inc(&root->nr_cgrps);	3706	atomic_inc(&root->nr_cgrps);
3711
3712	/*
3713	* Grab a reference on the root and parent so that they don't get
3714	* deleted while there are child cgroups.
3715	*/
3716	cgroup_get_root(root);
3717	cgroup_get(parent);	3707	cgroup_get(parent);
3718		3708
3719	/*	3709	/*