mnt: Tuck mounts under others instead of creating shadow/side mounts.

commit 1064f874abc0d05eeed8993815f584d847b72486 upstream. Ever since mount propagation was introduced in cases where a mount in propagated to parent mount mountpoint pair that is already in use the code has placed the new mount behind the old mount in the mount hash table. This implementation detail is problematic as it allows creating arbitrary length mount hash chains. Furthermore it invalidates the constraint maintained elsewhere in the mount code that a parent mount and a mountpoint pair will have exactly one mount upon them. Making it hard to deal with and to talk about this special case in the mount code. Modify mount propagation to notice when there is already a mount at the parent mount and mountpoint where a new mount is propagating to and place that preexisting mount on top of the new mount. Modify unmount propagation to notice when a mount that is being unmounted has another mount on top of it (and no other children), and to replace the unmounted mount with the mount on top of it. Move the MNT_UMUONT test from __lookup_mnt_last into __propagate_umount as that is the only call of __lookup_mnt_last where MNT_UMOUNT may be set on any mount visible in the mount hash table. These modifications allow: - __lookup_mnt_last to be removed. - attach_shadows to be renamed __attach_mnt and its shadow handling to be removed. - commit_tree to be simplified - copy_tree to be simplified The result is an easier to understand tree of mounts that does not allow creation of arbitrary length hash chains in the mount hash table. The result is also a very slight userspace visible difference in semantics. The following two cases now behave identically, where before order mattered: case 1: (explicit user action) B is a slave of A mount something on A/a , it will propagate to B/a and than mount something on B/a case 2: (tucked mount) B is a slave of A mount something on B/a and than mount something on A/a Histroically umount A/a would fail in case 1 and succeed in case 2. Now umount A/a succeeds in both configurations. This very small change in semantics appears if anything to be a bug fix to me and my survey of userspace leads me to believe that no programs will notice or care of this subtle semantic change. v2: Updated to mnt_change_mountpoint to not call dput or mntput and instead to decrement the counts directly. It is guaranteed that there will be other references when mnt_change_mountpoint is called so this is safe. v3: Moved put_mountpoint under mount_lock in attach_recursive_mnt As the locking in fs/namespace.c changed between v2 and v3. v4: Reworked the logic in propagate_mount_busy and __propagate_umount that detects when a mount completely covers another mount. v5: Removed unnecessary tests whose result is alwasy true in find_topper and attach_recursive_mnt. v6: Document the user space visible semantic difference. Fixes: b90fa9ae8f51 ("[PATCH] shared mount handling: bind and rbind") Tested-by: Andrei Vagin <avagin@virtuozzo.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Eric W. Biederman <ebiederm@xmission.com> 2017-01-20 00:28:35 -0500
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2017-03-14 22:02:43 -0400
commit: 808e83e5add13152bd4a88346487de68395c136b (patch)
tree: 08b80d9d58c501135fa9bee58ee0f4052adfa9ec /fs/namespace.c
parent: c9b3f3173fa5178df2c8fe2279442f48faaf7a98 (diff)
1 files changed, 60 insertions, 50 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 7cea503ae06d..ea751263fefa 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -641,28 +641,6 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
 }
 /*
- * find the last mount at @dentry on vfsmount @mnt.
- * mount_lock must be held.
- */
-struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
-{
-        struct mount *p, *res = NULL;
-        p = __lookup_mnt(mnt, dentry);
-        if (!p)
-                goto out;
-        if (!(p->mnt.mnt_flags & MNT_UMOUNT))
-                res = p;
-        hlist_for_each_entry_continue(p, mnt_hash) {
-                if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
-                        break;
-                if (!(p->mnt.mnt_flags & MNT_UMOUNT))
-                        res = p;
-        }
-out:
-        return res;
-}
-/*
 * lookup_mnt - Return the first child mount mounted at path
 *
 * "First" means first mounted chronologically.  If you create the
@@ -882,6 +860,13 @@ void mnt_set_mountpoint(struct mount *mnt,
        hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
 }
+static void __attach_mnt(struct mount *mnt, struct mount *parent)
+{
+        hlist_add_head_rcu(&mnt->mnt_hash,
+                           m_hash(&parent->mnt, mnt->mnt_mountpoint));
+        list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+}
 /*
 * vfsmount lock must be held for write
 */
@@ -890,28 +875,45 @@ static void attach_mnt(struct mount *mnt,
                        struct mountpoint *mp)
 {
        mnt_set_mountpoint(parent, mp, mnt);
-        hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
+        __attach_mnt(mnt, parent);
-        list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
 }
-static void attach_shadowed(struct mount *mnt,
+void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
-                        struct mount *parent,
-                        struct mount *shadows)
 {
-        if (shadows) {
+        struct mountpoint *old_mp = mnt->mnt_mp;
-                hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
+        struct dentry *old_mountpoint = mnt->mnt_mountpoint;
-                list_add(&mnt->mnt_child, &shadows->mnt_child);
+        struct mount *old_parent = mnt->mnt_parent;
-        } else {
-                hlist_add_head_rcu(&mnt->mnt_hash,
+        list_del_init(&mnt->mnt_child);
-                                m_hash(&parent->mnt, mnt->mnt_mountpoint));
+        hlist_del_init(&mnt->mnt_mp_list);
-                list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+        hlist_del_init_rcu(&mnt->mnt_hash);
-        }
+        attach_mnt(mnt, parent, mp);
+        put_mountpoint(old_mp);
+        /*
+         * Safely avoid even the suggestion this code might sleep or
+         * lock the mount hash by taking advantage of the knowledge that
+         * mnt_change_mountpoint will not release the final reference
+         * to a mountpoint.
+         *
+         * During mounting, the mount passed in as the parent mount will
+         * continue to use the old mountpoint and during unmounting, the
+         * old mountpoint will continue to exist until namespace_unlock,
+         * which happens well after mnt_change_mountpoint.
+         */
+        spin_lock(&old_mountpoint->d_lock);
+        old_mountpoint->d_lockref.count--;
+        spin_unlock(&old_mountpoint->d_lock);
+        mnt_add_count(old_parent, -1);
 }
 /*
 * vfsmount lock must be held for write
 */
-static void commit_tree(struct mount *mnt, struct mount *shadows)
+static void commit_tree(struct mount *mnt)
 {
        struct mount *parent = mnt->mnt_parent;
        struct mount *m;
@@ -929,7 +931,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
        n->mounts += n->pending_mounts;
        n->pending_mounts = 0;
-        attach_shadowed(mnt, parent, shadows);
+        __attach_mnt(mnt, parent);
        touch_mnt_namespace(n);
 }
@@ -1737,7 +1739,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                        continue;
                for (s = r; s; s = next_mnt(s, r)) {
-                        struct mount *t = NULL;
                        if (!(flag & CL_COPY_UNBINDABLE) &&
                            IS_MNT_UNBINDABLE(s)) {
                                s = skip_mnt_tree(s);
@@ -1759,14 +1760,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                                goto out;
                        lock_mount_hash();
                        list_add_tail(&q->mnt_list, &res->mnt_list);
-                        mnt_set_mountpoint(parent, p->mnt_mp, q);
+                        attach_mnt(q, parent, p->mnt_mp);
-                        if (!list_empty(&parent->mnt_mounts)) {
-                                t = list_last_entry(&parent->mnt_mounts,
-                                        struct mount, mnt_child);
-                                if (t->mnt_mp != p->mnt_mp)
-                                        t = NULL;
-                        }
-                        attach_shadowed(q, parent, t);
                        unlock_mount_hash();
                }
        }
@@ -1967,10 +1961,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 {
        HLIST_HEAD(tree_list);
        struct mnt_namespace *ns = dest_mnt->mnt_ns;
+        struct mountpoint *smp;
        struct mount *child, *p;
        struct hlist_node *n;
        int err;
+        /* Preallocate a mountpoint in case the new mounts need
+         * to be tucked under other mounts.
+         */
+        smp = get_mountpoint(source_mnt->mnt.mnt_root);
+        if (IS_ERR(smp))
+                return PTR_ERR(smp);
        /* Is there space to add these mounts to the mount namespace? */
        if (!parent_path) {
                err = count_mounts(ns, source_mnt);
@@ -1997,16 +1999,19 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                touch_mnt_namespace(source_mnt->mnt_ns);
        } else {
                mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
-                commit_tree(source_mnt, NULL);
+                commit_tree(source_mnt);
        }
        hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
                struct mount *q;
                hlist_del_init(&child->mnt_hash);
-                q = __lookup_mnt_last(&child->mnt_parent->mnt,
+                q = __lookup_mnt(&child->mnt_parent->mnt,
-                                      child->mnt_mountpoint);
+                                 child->mnt_mountpoint);
-                commit_tree(child, q);
+                if (q)
+                        mnt_change_mountpoint(child, smp, q);
+                commit_tree(child);
        }
+        put_mountpoint(smp);
        unlock_mount_hash();
        return 0;
@@ -2021,6 +2026,11 @@ static int attach_recursive_mnt(struct mount *source_mnt,
        cleanup_group_ids(source_mnt, NULL);
 out:
        ns->pending_mounts = 0;
+        read_seqlock_excl(&mount_lock);
+        put_mountpoint(smp);
+        read_sequnlock_excl(&mount_lock);
        return err;
 }
author	Eric W. Biederman <ebiederm@xmission.com>	2017-01-20 00:28:35 -0500
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2017-03-14 22:02:43 -0400
commit	808e83e5add13152bd4a88346487de68395c136b (patch)
tree	08b80d9d58c501135fa9bee58ee0f4052adfa9ec /fs/namespace.c
parent	c9b3f3173fa5178df2c8fe2279442f48faaf7a98 (diff)

diff --git a/fs/namespace.c b/fs/namespace.c index 7cea503ae06d..ea751263fefa 100644 --- a/fs/namespace.c +++ b/fs/namespace.c
@@ -641,28 +641,6 @@ struct mount __lookup_mnt(struct vfsmount mnt, struct dentry *dentry)
641	}	641	}
642		642
643	/*	643	/*
644	* find the last mount at @dentry on vfsmount @mnt.
645	* mount_lock must be held.
646	*/
647	struct mount __lookup_mnt_last(struct vfsmount mnt, struct dentry *dentry)
648	{
649	struct mount p, res = NULL;
650	p = __lookup_mnt(mnt, dentry);
651	if (!p)
652	goto out;
653	if (!(p->mnt.mnt_flags & MNT_UMOUNT))
654	res = p;
655	hlist_for_each_entry_continue(p, mnt_hash) {
656	if (&p->mnt_parent->mnt != mnt \|\| p->mnt_mountpoint != dentry)
657	break;
658	if (!(p->mnt.mnt_flags & MNT_UMOUNT))
659	res = p;
660	}
661	out:
662	return res;
663	}
664
665	/*
666	* lookup_mnt - Return the first child mount mounted at path	644	* lookup_mnt - Return the first child mount mounted at path
667	*	645	*
668	* "First" means first mounted chronologically. If you create the	646	* "First" means first mounted chronologically. If you create the
@@ -882,6 +860,13 @@ void mnt_set_mountpoint(struct mount *mnt,
882	hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);	860	hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
883	}	861	}
884		862
		863	static void __attach_mnt(struct mount mnt, struct mount parent)
		864	{
		865	hlist_add_head_rcu(&mnt->mnt_hash,
		866	m_hash(&parent->mnt, mnt->mnt_mountpoint));
		867	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
		868	}
		869
885	/*	870	/*
886	* vfsmount lock must be held for write	871	* vfsmount lock must be held for write
887	*/	872	*/
@@ -890,28 +875,45 @@ static void attach_mnt(struct mount *mnt,
890	struct mountpoint *mp)	875	struct mountpoint *mp)
891	{	876	{
892	mnt_set_mountpoint(parent, mp, mnt);	877	mnt_set_mountpoint(parent, mp, mnt);
893	hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));	878	__attach_mnt(mnt, parent);
894	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
895	}	879	}
896		880
897	static void attach_shadowed(struct mount *mnt,	881	void mnt_change_mountpoint(struct mount parent, struct mountpoint mp, struct mount *mnt)
898	struct mount *parent,
899	struct mount *shadows)
900	{	882	{
901	if (shadows) {	883	struct mountpoint *old_mp = mnt->mnt_mp;
902	hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);	884	struct dentry *old_mountpoint = mnt->mnt_mountpoint;
903	list_add(&mnt->mnt_child, &shadows->mnt_child);	885	struct mount *old_parent = mnt->mnt_parent;
904	} else {	886
905	hlist_add_head_rcu(&mnt->mnt_hash,	887	list_del_init(&mnt->mnt_child);
906	m_hash(&parent->mnt, mnt->mnt_mountpoint));	888	hlist_del_init(&mnt->mnt_mp_list);
907	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);	889	hlist_del_init_rcu(&mnt->mnt_hash);
908	}	890
		891	attach_mnt(mnt, parent, mp);
		892
		893	put_mountpoint(old_mp);
		894
		895	/*
		896	* Safely avoid even the suggestion this code might sleep or
		897	* lock the mount hash by taking advantage of the knowledge that
		898	* mnt_change_mountpoint will not release the final reference
		899	* to a mountpoint.
		900	*
		901	* During mounting, the mount passed in as the parent mount will
		902	* continue to use the old mountpoint and during unmounting, the
		903	* old mountpoint will continue to exist until namespace_unlock,
		904	* which happens well after mnt_change_mountpoint.
		905	*/
		906	spin_lock(&old_mountpoint->d_lock);
		907	old_mountpoint->d_lockref.count--;
		908	spin_unlock(&old_mountpoint->d_lock);
		909
		910	mnt_add_count(old_parent, -1);
909	}	911	}
910		912
911	/*	913	/*
912	* vfsmount lock must be held for write	914	* vfsmount lock must be held for write
913	*/	915	*/
914	static void commit_tree(struct mount mnt, struct mount shadows)	916	static void commit_tree(struct mount *mnt)
915	{	917	{
916	struct mount *parent = mnt->mnt_parent;	918	struct mount *parent = mnt->mnt_parent;
917	struct mount *m;	919	struct mount *m;
@@ -929,7 +931,7 @@ static void commit_tree(struct mount mnt, struct mount shadows)
929	n->mounts += n->pending_mounts;	931	n->mounts += n->pending_mounts;
930	n->pending_mounts = 0;	932	n->pending_mounts = 0;
931		933
932	attach_shadowed(mnt, parent, shadows);	934	__attach_mnt(mnt, parent);
933	touch_mnt_namespace(n);	935	touch_mnt_namespace(n);
934	}	936	}
935		937
@@ -1737,7 +1739,6 @@ struct mount copy_tree(struct mount mnt, struct dentry *dentry,
1737	continue;	1739	continue;
1738		1740
1739	for (s = r; s; s = next_mnt(s, r)) {	1741	for (s = r; s; s = next_mnt(s, r)) {
1740	struct mount *t = NULL;
1741	if (!(flag & CL_COPY_UNBINDABLE) &&	1742	if (!(flag & CL_COPY_UNBINDABLE) &&
1742	IS_MNT_UNBINDABLE(s)) {	1743	IS_MNT_UNBINDABLE(s)) {
1743	s = skip_mnt_tree(s);	1744	s = skip_mnt_tree(s);
@@ -1759,14 +1760,7 @@ struct mount copy_tree(struct mount mnt, struct dentry *dentry,
1759	goto out;	1760	goto out;
1760	lock_mount_hash();	1761	lock_mount_hash();
1761	list_add_tail(&q->mnt_list, &res->mnt_list);	1762	list_add_tail(&q->mnt_list, &res->mnt_list);
1762	mnt_set_mountpoint(parent, p->mnt_mp, q);	1763	attach_mnt(q, parent, p->mnt_mp);
1763	if (!list_empty(&parent->mnt_mounts)) {
1764	t = list_last_entry(&parent->mnt_mounts,
1765	struct mount, mnt_child);
1766	if (t->mnt_mp != p->mnt_mp)
1767	t = NULL;
1768	}
1769	attach_shadowed(q, parent, t);
1770	unlock_mount_hash();	1764	unlock_mount_hash();
1771	}	1765	}
1772	}	1766	}
@@ -1967,10 +1961,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1967	{	1961	{
1968	HLIST_HEAD(tree_list);	1962	HLIST_HEAD(tree_list);
1969	struct mnt_namespace *ns = dest_mnt->mnt_ns;	1963	struct mnt_namespace *ns = dest_mnt->mnt_ns;
		1964	struct mountpoint *smp;
1970	struct mount child, p;	1965	struct mount child, p;
1971	struct hlist_node *n;	1966	struct hlist_node *n;
1972	int err;	1967	int err;
1973		1968
		1969	/* Preallocate a mountpoint in case the new mounts need
		1970	* to be tucked under other mounts.
		1971	*/
		1972	smp = get_mountpoint(source_mnt->mnt.mnt_root);
		1973	if (IS_ERR(smp))
		1974	return PTR_ERR(smp);
		1975
1974	/* Is there space to add these mounts to the mount namespace? */	1976	/* Is there space to add these mounts to the mount namespace? */
1975	if (!parent_path) {	1977	if (!parent_path) {
1976	err = count_mounts(ns, source_mnt);	1978	err = count_mounts(ns, source_mnt);
@@ -1997,16 +1999,19 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1997	touch_mnt_namespace(source_mnt->mnt_ns);	1999	touch_mnt_namespace(source_mnt->mnt_ns);
1998	} else {	2000	} else {
1999	mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);	2001	mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2000	commit_tree(source_mnt, NULL);	2002	commit_tree(source_mnt);
2001	}	2003	}
2002		2004
2003	hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {	2005	hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
2004	struct mount *q;	2006	struct mount *q;
2005	hlist_del_init(&child->mnt_hash);	2007	hlist_del_init(&child->mnt_hash);
2006	q = __lookup_mnt_last(&child->mnt_parent->mnt,	2008	q = __lookup_mnt(&child->mnt_parent->mnt,
2007	child->mnt_mountpoint);	2009	child->mnt_mountpoint);
2008	commit_tree(child, q);	2010	if (q)
		2011	mnt_change_mountpoint(child, smp, q);
		2012	commit_tree(child);
2009	}	2013	}
		2014	put_mountpoint(smp);
2010	unlock_mount_hash();	2015	unlock_mount_hash();
2011		2016
2012	return 0;	2017	return 0;
@@ -2021,6 +2026,11 @@ static int attach_recursive_mnt(struct mount *source_mnt,
2021	cleanup_group_ids(source_mnt, NULL);	2026	cleanup_group_ids(source_mnt, NULL);
2022	out:	2027	out:
2023	ns->pending_mounts = 0;	2028	ns->pending_mounts = 0;
		2029
		2030	read_seqlock_excl(&mount_lock);
		2031	put_mountpoint(smp);
		2032	read_sequnlock_excl(&mount_lock);
		2033
2024	return err;	2034	return err;
2025	}	2035	}
2026		2036