diff options
author | Li Zefan <lizefan@huawei.com> | 2014-06-29 23:50:59 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2014-06-30 10:16:26 -0400 |
commit | 3a32bd72d77058d768dbb38183ad517f720dd1bc (patch) | |
tree | dc67b70a50e1dc649b059113bb8303f420192eca /kernel | |
parent | 4e26445faad366d67d7723622bf6a60a6f0f5993 (diff) |
cgroup: fix a race between cgroup_mount() and cgroup_kill_sb()
We've converted cgroup to kernfs so cgroup won't be intertwined with
vfs objects and locking, but there are dark areas.
Run two instances of this script concurrently:
for ((; ;))
{
mount -t cgroup -o cpuacct xxx /cgroup
umount /cgroup
}
After a while, I saw two mount processes were stuck at retrying, because
they were waiting for a subsystem to become free, but the root associated
with this subsystem never got freed.
This can happen, if thread A is in the process of killing superblock but
hasn't called percpu_ref_kill(), and at this time thread B is mounting
the same cgroup root and finds the root in the root list and performs
percpu_ref_try_get().
To fix this, we try to increase both the refcnt of the superblock and the
percpu refcnt of cgroup root.
v2:
- we should try to get both the superblock refcnt and cgroup_root refcnt,
because cgroup_root may have no superblock assosiated with it.
- adjust/add comments.
tj: Updated comments. Renamed @sb to @pinned_sb.
Cc: <stable@vger.kernel.org> # 3.15
Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 33 |
1 files changed, 26 insertions, 7 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 64068667be84..70776aec2562 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -1648,6 +1648,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1648 | int flags, const char *unused_dev_name, | 1648 | int flags, const char *unused_dev_name, |
1649 | void *data) | 1649 | void *data) |
1650 | { | 1650 | { |
1651 | struct super_block *pinned_sb = NULL; | ||
1651 | struct cgroup_subsys *ss; | 1652 | struct cgroup_subsys *ss; |
1652 | struct cgroup_root *root; | 1653 | struct cgroup_root *root; |
1653 | struct cgroup_sb_opts opts; | 1654 | struct cgroup_sb_opts opts; |
@@ -1740,15 +1741,23 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1740 | } | 1741 | } |
1741 | 1742 | ||
1742 | /* | 1743 | /* |
1743 | * A root's lifetime is governed by its root cgroup. | 1744 | * We want to reuse @root whose lifetime is governed by its |
1744 | * tryget_live failure indicate that the root is being | 1745 | * ->cgrp. Let's check whether @root is alive and keep it |
1745 | * destroyed. Wait for destruction to complete so that the | 1746 | * that way. As cgroup_kill_sb() can happen anytime, we |
1746 | * subsystems are free. We can use wait_queue for the wait | 1747 | * want to block it by pinning the sb so that @root doesn't |
1747 | * but this path is super cold. Let's just sleep for a bit | 1748 | * get killed before mount is complete. |
1748 | * and retry. | 1749 | * |
1750 | * With the sb pinned, tryget_live can reliably indicate | ||
1751 | * whether @root can be reused. If it's being killed, | ||
1752 | * drain it. We can use wait_queue for the wait but this | ||
1753 | * path is super cold. Let's just sleep a bit and retry. | ||
1749 | */ | 1754 | */ |
1750 | if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { | 1755 | pinned_sb = kernfs_pin_sb(root->kf_root, NULL); |
1756 | if (IS_ERR(pinned_sb) || | ||
1757 | !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { | ||
1751 | mutex_unlock(&cgroup_mutex); | 1758 | mutex_unlock(&cgroup_mutex); |
1759 | if (!IS_ERR_OR_NULL(pinned_sb)) | ||
1760 | deactivate_super(pinned_sb); | ||
1752 | msleep(10); | 1761 | msleep(10); |
1753 | ret = restart_syscall(); | 1762 | ret = restart_syscall(); |
1754 | goto out_free; | 1763 | goto out_free; |
@@ -1793,6 +1802,16 @@ out_free: | |||
1793 | CGROUP_SUPER_MAGIC, &new_sb); | 1802 | CGROUP_SUPER_MAGIC, &new_sb); |
1794 | if (IS_ERR(dentry) || !new_sb) | 1803 | if (IS_ERR(dentry) || !new_sb) |
1795 | cgroup_put(&root->cgrp); | 1804 | cgroup_put(&root->cgrp); |
1805 | |||
1806 | /* | ||
1807 | * If @pinned_sb, we're reusing an existing root and holding an | ||
1808 | * extra ref on its sb. Mount is complete. Put the extra ref. | ||
1809 | */ | ||
1810 | if (pinned_sb) { | ||
1811 | WARN_ON(new_sb); | ||
1812 | deactivate_super(pinned_sb); | ||
1813 | } | ||
1814 | |||
1796 | return dentry; | 1815 | return dentry; |
1797 | } | 1816 | } |
1798 | 1817 | ||