diff options
author | Tejun Heo <tj@kernel.org> | 2014-02-12 09:29:50 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2014-02-12 09:29:50 -0500 |
commit | 776f02fa4e1ad70557c0318c70ce928e0642bee0 (patch) | |
tree | f2080066461b0ef75a964a8ef2b9a2243d7b5389 /kernel/cgroup.c | |
parent | 3c9c825b8b50de7dbb015e6bfc04bb2da79364d9 (diff) |
cgroup: remove cgroupfs_root->refcnt
Currently, cgroupfs_root and its ->top_cgroup are separated reference
counted and the latter's is ignored. There's no reason to do this
separately. This patch removes cgroupfs_root->refcnt and destroys
cgroupfs_root when the top_cgroup is released.
* cgroup_put() updated to ignore cgroup_is_dead() test for top
cgroups. cgroup_free_fn() updated to handle root destruction when
releasing a top cgroup.
* As root destruction is now bounced through cgroup destruction, it is
asynchronous. Update cgroup_mount() so that it waits for pending
release which is currently implemented using msleep(). Converting
this to proper wait_queue isn't hard but likely unnecessary.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 86 |
1 files changed, 38 insertions, 48 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index cffdb6e2ad08..03845c5d082b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ | 53 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ |
54 | #include <linux/flex_array.h> /* used in cgroup_attach_task */ | 54 | #include <linux/flex_array.h> /* used in cgroup_attach_task */ |
55 | #include <linux/kthread.h> | 55 | #include <linux/kthread.h> |
56 | #include <linux/delay.h> | ||
56 | 57 | ||
57 | #include <linux/atomic.h> | 58 | #include <linux/atomic.h> |
58 | 59 | ||
@@ -728,37 +729,16 @@ static void cgroup_free_root(struct cgroupfs_root *root) | |||
728 | } | 729 | } |
729 | } | 730 | } |
730 | 731 | ||
731 | static void cgroup_get_root(struct cgroupfs_root *root) | 732 | static void cgroup_destroy_root(struct cgroupfs_root *root) |
732 | { | ||
733 | /* | ||
734 | * The caller must ensure that @root is alive, which can be | ||
735 | * achieved by holding a ref on one of the member cgroups or | ||
736 | * following a registered reference to @root while holding | ||
737 | * cgroup_tree_mutex. | ||
738 | */ | ||
739 | WARN_ON_ONCE(atomic_read(&root->refcnt) <= 0); | ||
740 | atomic_inc(&root->refcnt); | ||
741 | } | ||
742 | |||
743 | static void cgroup_put_root(struct cgroupfs_root *root) | ||
744 | { | 733 | { |
745 | struct cgroup *cgrp = &root->top_cgroup; | 734 | struct cgroup *cgrp = &root->top_cgroup; |
746 | struct cgrp_cset_link *link, *tmp_link; | 735 | struct cgrp_cset_link *link, *tmp_link; |
747 | int ret; | 736 | int ret; |
748 | 737 | ||
749 | /* | ||
750 | * @root's refcnt reaching zero and its deregistration should be | ||
751 | * atomic w.r.t. cgroup_tree_mutex. This ensures that | ||
752 | * cgroup_get_root() is safe to invoke if @root is registered. | ||
753 | */ | ||
754 | mutex_lock(&cgroup_tree_mutex); | 738 | mutex_lock(&cgroup_tree_mutex); |
755 | if (!atomic_dec_and_test(&root->refcnt)) { | ||
756 | mutex_unlock(&cgroup_tree_mutex); | ||
757 | return; | ||
758 | } | ||
759 | mutex_lock(&cgroup_mutex); | 739 | mutex_lock(&cgroup_mutex); |
760 | 740 | ||
761 | BUG_ON(atomic_read(&root->nr_cgrps) != 1); | 741 | BUG_ON(atomic_read(&root->nr_cgrps)); |
762 | BUG_ON(!list_empty(&cgrp->children)); | 742 | BUG_ON(!list_empty(&cgrp->children)); |
763 | 743 | ||
764 | /* Rebind all subsystems back to the default hierarchy */ | 744 | /* Rebind all subsystems back to the default hierarchy */ |
@@ -929,21 +909,24 @@ static void cgroup_free_fn(struct work_struct *work) | |||
929 | struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); | 909 | struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); |
930 | 910 | ||
931 | atomic_dec(&cgrp->root->nr_cgrps); | 911 | atomic_dec(&cgrp->root->nr_cgrps); |
932 | |||
933 | /* | ||
934 | * We get a ref to the parent, and put the ref when this cgroup is | ||
935 | * being freed, so it's guaranteed that the parent won't be | ||
936 | * destroyed before its children. | ||
937 | */ | ||
938 | cgroup_put(cgrp->parent); | ||
939 | |||
940 | /* put the root reference that we took when we created the cgroup */ | ||
941 | cgroup_put_root(cgrp->root); | ||
942 | |||
943 | cgroup_pidlist_destroy_all(cgrp); | 912 | cgroup_pidlist_destroy_all(cgrp); |
944 | 913 | ||
945 | kernfs_put(cgrp->kn); | 914 | if (cgrp->parent) { |
946 | kfree(cgrp); | 915 | /* |
916 | * We get a ref to the parent, and put the ref when this | ||
917 | * cgroup is being freed, so it's guaranteed that the | ||
918 | * parent won't be destroyed before its children. | ||
919 | */ | ||
920 | cgroup_put(cgrp->parent); | ||
921 | kernfs_put(cgrp->kn); | ||
922 | kfree(cgrp); | ||
923 | } else { | ||
924 | /* | ||
925 | * This is top cgroup's refcnt reaching zero, which | ||
926 | * indicates that the root should be released. | ||
927 | */ | ||
928 | cgroup_destroy_root(cgrp->root); | ||
929 | } | ||
947 | } | 930 | } |
948 | 931 | ||
949 | static void cgroup_free_rcu(struct rcu_head *head) | 932 | static void cgroup_free_rcu(struct rcu_head *head) |
@@ -965,7 +948,7 @@ static void cgroup_put(struct cgroup *cgrp) | |||
965 | { | 948 | { |
966 | if (!atomic_dec_and_test(&cgrp->refcnt)) | 949 | if (!atomic_dec_and_test(&cgrp->refcnt)) |
967 | return; | 950 | return; |
968 | if (WARN_ON_ONCE(!cgroup_is_dead(cgrp))) | 951 | if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp))) |
969 | return; | 952 | return; |
970 | 953 | ||
971 | /* | 954 | /* |
@@ -1356,7 +1339,6 @@ static void init_cgroup_root(struct cgroupfs_root *root) | |||
1356 | { | 1339 | { |
1357 | struct cgroup *cgrp = &root->top_cgroup; | 1340 | struct cgroup *cgrp = &root->top_cgroup; |
1358 | 1341 | ||
1359 | atomic_set(&root->refcnt, 1); | ||
1360 | INIT_LIST_HEAD(&root->root_list); | 1342 | INIT_LIST_HEAD(&root->root_list); |
1361 | atomic_set(&root->nr_cgrps, 1); | 1343 | atomic_set(&root->nr_cgrps, 1); |
1362 | cgrp->root = root; | 1344 | cgrp->root = root; |
@@ -1485,7 +1467,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1485 | struct cgroup_sb_opts opts; | 1467 | struct cgroup_sb_opts opts; |
1486 | struct dentry *dentry; | 1468 | struct dentry *dentry; |
1487 | int ret; | 1469 | int ret; |
1488 | 1470 | retry: | |
1489 | mutex_lock(&cgroup_tree_mutex); | 1471 | mutex_lock(&cgroup_tree_mutex); |
1490 | mutex_lock(&cgroup_mutex); | 1472 | mutex_lock(&cgroup_mutex); |
1491 | 1473 | ||
@@ -1531,7 +1513,21 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1531 | } | 1513 | } |
1532 | } | 1514 | } |
1533 | 1515 | ||
1534 | cgroup_get_root(root); | 1516 | /* |
1517 | * A root's lifetime is governed by its top cgroup. Zero | ||
1518 | * ref indicate that the root is being destroyed. Wait for | ||
1519 | * destruction to complete so that the subsystems are free. | ||
1520 | * We can use wait_queue for the wait but this path is | ||
1521 | * super cold. Let's just sleep for a bit and retry. | ||
1522 | */ | ||
1523 | if (!atomic_inc_not_zero(&root->top_cgroup.refcnt)) { | ||
1524 | mutex_unlock(&cgroup_mutex); | ||
1525 | mutex_unlock(&cgroup_tree_mutex); | ||
1526 | msleep(10); | ||
1527 | goto retry; | ||
1528 | } | ||
1529 | |||
1530 | ret = 0; | ||
1535 | goto out_unlock; | 1531 | goto out_unlock; |
1536 | } | 1532 | } |
1537 | 1533 | ||
@@ -1558,7 +1554,7 @@ out_unlock: | |||
1558 | 1554 | ||
1559 | dentry = kernfs_mount(fs_type, flags, root->kf_root); | 1555 | dentry = kernfs_mount(fs_type, flags, root->kf_root); |
1560 | if (IS_ERR(dentry)) | 1556 | if (IS_ERR(dentry)) |
1561 | cgroup_put_root(root); | 1557 | cgroup_put(&root->top_cgroup); |
1562 | return dentry; | 1558 | return dentry; |
1563 | } | 1559 | } |
1564 | 1560 | ||
@@ -1567,7 +1563,7 @@ static void cgroup_kill_sb(struct super_block *sb) | |||
1567 | struct kernfs_root *kf_root = kernfs_root_from_sb(sb); | 1563 | struct kernfs_root *kf_root = kernfs_root_from_sb(sb); |
1568 | struct cgroupfs_root *root = cgroup_root_from_kf(kf_root); | 1564 | struct cgroupfs_root *root = cgroup_root_from_kf(kf_root); |
1569 | 1565 | ||
1570 | cgroup_put_root(root); | 1566 | cgroup_put(&root->top_cgroup); |
1571 | kernfs_kill_sb(sb); | 1567 | kernfs_kill_sb(sb); |
1572 | } | 1568 | } |
1573 | 1569 | ||
@@ -3708,12 +3704,6 @@ static long cgroup_create(struct cgroup *parent, const char *name, | |||
3708 | /* allocation complete, commit to creation */ | 3704 | /* allocation complete, commit to creation */ |
3709 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); | 3705 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); |
3710 | atomic_inc(&root->nr_cgrps); | 3706 | atomic_inc(&root->nr_cgrps); |
3711 | |||
3712 | /* | ||
3713 | * Grab a reference on the root and parent so that they don't get | ||
3714 | * deleted while there are child cgroups. | ||
3715 | */ | ||
3716 | cgroup_get_root(root); | ||
3717 | cgroup_get(parent); | 3707 | cgroup_get(parent); |
3718 | 3708 | ||
3719 | /* | 3709 | /* |