aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namespace.c
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2017-01-20 00:28:35 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-03-14 22:02:43 -0400
commit808e83e5add13152bd4a88346487de68395c136b (patch)
tree08b80d9d58c501135fa9bee58ee0f4052adfa9ec /fs/namespace.c
parentc9b3f3173fa5178df2c8fe2279442f48faaf7a98 (diff)
mnt: Tuck mounts under others instead of creating shadow/side mounts.
commit 1064f874abc0d05eeed8993815f584d847b72486 upstream. Ever since mount propagation was introduced in cases where a mount in propagated to parent mount mountpoint pair that is already in use the code has placed the new mount behind the old mount in the mount hash table. This implementation detail is problematic as it allows creating arbitrary length mount hash chains. Furthermore it invalidates the constraint maintained elsewhere in the mount code that a parent mount and a mountpoint pair will have exactly one mount upon them. Making it hard to deal with and to talk about this special case in the mount code. Modify mount propagation to notice when there is already a mount at the parent mount and mountpoint where a new mount is propagating to and place that preexisting mount on top of the new mount. Modify unmount propagation to notice when a mount that is being unmounted has another mount on top of it (and no other children), and to replace the unmounted mount with the mount on top of it. Move the MNT_UMUONT test from __lookup_mnt_last into __propagate_umount as that is the only call of __lookup_mnt_last where MNT_UMOUNT may be set on any mount visible in the mount hash table. These modifications allow: - __lookup_mnt_last to be removed. - attach_shadows to be renamed __attach_mnt and its shadow handling to be removed. - commit_tree to be simplified - copy_tree to be simplified The result is an easier to understand tree of mounts that does not allow creation of arbitrary length hash chains in the mount hash table. The result is also a very slight userspace visible difference in semantics. The following two cases now behave identically, where before order mattered: case 1: (explicit user action) B is a slave of A mount something on A/a , it will propagate to B/a and than mount something on B/a case 2: (tucked mount) B is a slave of A mount something on B/a and than mount something on A/a Histroically umount A/a would fail in case 1 and succeed in case 2. Now umount A/a succeeds in both configurations. This very small change in semantics appears if anything to be a bug fix to me and my survey of userspace leads me to believe that no programs will notice or care of this subtle semantic change. v2: Updated to mnt_change_mountpoint to not call dput or mntput and instead to decrement the counts directly. It is guaranteed that there will be other references when mnt_change_mountpoint is called so this is safe. v3: Moved put_mountpoint under mount_lock in attach_recursive_mnt As the locking in fs/namespace.c changed between v2 and v3. v4: Reworked the logic in propagate_mount_busy and __propagate_umount that detects when a mount completely covers another mount. v5: Removed unnecessary tests whose result is alwasy true in find_topper and attach_recursive_mnt. v6: Document the user space visible semantic difference. Fixes: b90fa9ae8f51 ("[PATCH] shared mount handling: bind and rbind") Tested-by: Andrei Vagin <avagin@virtuozzo.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c110
1 files changed, 60 insertions, 50 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 7cea503ae06d..ea751263fefa 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -641,28 +641,6 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
641} 641}
642 642
643/* 643/*
644 * find the last mount at @dentry on vfsmount @mnt.
645 * mount_lock must be held.
646 */
647struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
648{
649 struct mount *p, *res = NULL;
650 p = __lookup_mnt(mnt, dentry);
651 if (!p)
652 goto out;
653 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
654 res = p;
655 hlist_for_each_entry_continue(p, mnt_hash) {
656 if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
657 break;
658 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
659 res = p;
660 }
661out:
662 return res;
663}
664
665/*
666 * lookup_mnt - Return the first child mount mounted at path 644 * lookup_mnt - Return the first child mount mounted at path
667 * 645 *
668 * "First" means first mounted chronologically. If you create the 646 * "First" means first mounted chronologically. If you create the
@@ -882,6 +860,13 @@ void mnt_set_mountpoint(struct mount *mnt,
882 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list); 860 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
883} 861}
884 862
863static void __attach_mnt(struct mount *mnt, struct mount *parent)
864{
865 hlist_add_head_rcu(&mnt->mnt_hash,
866 m_hash(&parent->mnt, mnt->mnt_mountpoint));
867 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
868}
869
885/* 870/*
886 * vfsmount lock must be held for write 871 * vfsmount lock must be held for write
887 */ 872 */
@@ -890,28 +875,45 @@ static void attach_mnt(struct mount *mnt,
890 struct mountpoint *mp) 875 struct mountpoint *mp)
891{ 876{
892 mnt_set_mountpoint(parent, mp, mnt); 877 mnt_set_mountpoint(parent, mp, mnt);
893 hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); 878 __attach_mnt(mnt, parent);
894 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
895} 879}
896 880
897static void attach_shadowed(struct mount *mnt, 881void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
898 struct mount *parent,
899 struct mount *shadows)
900{ 882{
901 if (shadows) { 883 struct mountpoint *old_mp = mnt->mnt_mp;
902 hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash); 884 struct dentry *old_mountpoint = mnt->mnt_mountpoint;
903 list_add(&mnt->mnt_child, &shadows->mnt_child); 885 struct mount *old_parent = mnt->mnt_parent;
904 } else { 886
905 hlist_add_head_rcu(&mnt->mnt_hash, 887 list_del_init(&mnt->mnt_child);
906 m_hash(&parent->mnt, mnt->mnt_mountpoint)); 888 hlist_del_init(&mnt->mnt_mp_list);
907 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); 889 hlist_del_init_rcu(&mnt->mnt_hash);
908 } 890
891 attach_mnt(mnt, parent, mp);
892
893 put_mountpoint(old_mp);
894
895 /*
896 * Safely avoid even the suggestion this code might sleep or
897 * lock the mount hash by taking advantage of the knowledge that
898 * mnt_change_mountpoint will not release the final reference
899 * to a mountpoint.
900 *
901 * During mounting, the mount passed in as the parent mount will
902 * continue to use the old mountpoint and during unmounting, the
903 * old mountpoint will continue to exist until namespace_unlock,
904 * which happens well after mnt_change_mountpoint.
905 */
906 spin_lock(&old_mountpoint->d_lock);
907 old_mountpoint->d_lockref.count--;
908 spin_unlock(&old_mountpoint->d_lock);
909
910 mnt_add_count(old_parent, -1);
909} 911}
910 912
911/* 913/*
912 * vfsmount lock must be held for write 914 * vfsmount lock must be held for write
913 */ 915 */
914static void commit_tree(struct mount *mnt, struct mount *shadows) 916static void commit_tree(struct mount *mnt)
915{ 917{
916 struct mount *parent = mnt->mnt_parent; 918 struct mount *parent = mnt->mnt_parent;
917 struct mount *m; 919 struct mount *m;
@@ -929,7 +931,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
929 n->mounts += n->pending_mounts; 931 n->mounts += n->pending_mounts;
930 n->pending_mounts = 0; 932 n->pending_mounts = 0;
931 933
932 attach_shadowed(mnt, parent, shadows); 934 __attach_mnt(mnt, parent);
933 touch_mnt_namespace(n); 935 touch_mnt_namespace(n);
934} 936}
935 937
@@ -1737,7 +1739,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1737 continue; 1739 continue;
1738 1740
1739 for (s = r; s; s = next_mnt(s, r)) { 1741 for (s = r; s; s = next_mnt(s, r)) {
1740 struct mount *t = NULL;
1741 if (!(flag & CL_COPY_UNBINDABLE) && 1742 if (!(flag & CL_COPY_UNBINDABLE) &&
1742 IS_MNT_UNBINDABLE(s)) { 1743 IS_MNT_UNBINDABLE(s)) {
1743 s = skip_mnt_tree(s); 1744 s = skip_mnt_tree(s);
@@ -1759,14 +1760,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1759 goto out; 1760 goto out;
1760 lock_mount_hash(); 1761 lock_mount_hash();
1761 list_add_tail(&q->mnt_list, &res->mnt_list); 1762 list_add_tail(&q->mnt_list, &res->mnt_list);
1762 mnt_set_mountpoint(parent, p->mnt_mp, q); 1763 attach_mnt(q, parent, p->mnt_mp);
1763 if (!list_empty(&parent->mnt_mounts)) {
1764 t = list_last_entry(&parent->mnt_mounts,
1765 struct mount, mnt_child);
1766 if (t->mnt_mp != p->mnt_mp)
1767 t = NULL;
1768 }
1769 attach_shadowed(q, parent, t);
1770 unlock_mount_hash(); 1764 unlock_mount_hash();
1771 } 1765 }
1772 } 1766 }
@@ -1967,10 +1961,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1967{ 1961{
1968 HLIST_HEAD(tree_list); 1962 HLIST_HEAD(tree_list);
1969 struct mnt_namespace *ns = dest_mnt->mnt_ns; 1963 struct mnt_namespace *ns = dest_mnt->mnt_ns;
1964 struct mountpoint *smp;
1970 struct mount *child, *p; 1965 struct mount *child, *p;
1971 struct hlist_node *n; 1966 struct hlist_node *n;
1972 int err; 1967 int err;
1973 1968
1969 /* Preallocate a mountpoint in case the new mounts need
1970 * to be tucked under other mounts.
1971 */
1972 smp = get_mountpoint(source_mnt->mnt.mnt_root);
1973 if (IS_ERR(smp))
1974 return PTR_ERR(smp);
1975
1974 /* Is there space to add these mounts to the mount namespace? */ 1976 /* Is there space to add these mounts to the mount namespace? */
1975 if (!parent_path) { 1977 if (!parent_path) {
1976 err = count_mounts(ns, source_mnt); 1978 err = count_mounts(ns, source_mnt);
@@ -1997,16 +1999,19 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1997 touch_mnt_namespace(source_mnt->mnt_ns); 1999 touch_mnt_namespace(source_mnt->mnt_ns);
1998 } else { 2000 } else {
1999 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); 2001 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2000 commit_tree(source_mnt, NULL); 2002 commit_tree(source_mnt);
2001 } 2003 }
2002 2004
2003 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { 2005 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
2004 struct mount *q; 2006 struct mount *q;
2005 hlist_del_init(&child->mnt_hash); 2007 hlist_del_init(&child->mnt_hash);
2006 q = __lookup_mnt_last(&child->mnt_parent->mnt, 2008 q = __lookup_mnt(&child->mnt_parent->mnt,
2007 child->mnt_mountpoint); 2009 child->mnt_mountpoint);
2008 commit_tree(child, q); 2010 if (q)
2011 mnt_change_mountpoint(child, smp, q);
2012 commit_tree(child);
2009 } 2013 }
2014 put_mountpoint(smp);
2010 unlock_mount_hash(); 2015 unlock_mount_hash();
2011 2016
2012 return 0; 2017 return 0;
@@ -2021,6 +2026,11 @@ static int attach_recursive_mnt(struct mount *source_mnt,
2021 cleanup_group_ids(source_mnt, NULL); 2026 cleanup_group_ids(source_mnt, NULL);
2022 out: 2027 out:
2023 ns->pending_mounts = 0; 2028 ns->pending_mounts = 0;
2029
2030 read_seqlock_excl(&mount_lock);
2031 put_mountpoint(smp);
2032 read_sequnlock_excl(&mount_lock);
2033
2024 return err; 2034 return err;
2025} 2035}
2026 2036