aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2017-01-20 00:28:35 -0500
committerEric W. Biederman <ebiederm@xmission.com>2017-02-03 06:01:06 -0500
commit1064f874abc0d05eeed8993815f584d847b72486 (patch)
tree32b68ec43cccfea8a47474d93f9cf271b033fffc
parent749860ce242798fb090557a5a7868dee40af9268 (diff)
mnt: Tuck mounts under others instead of creating shadow/side mounts.
Ever since mount propagation was introduced in cases where a mount in propagated to parent mount mountpoint pair that is already in use the code has placed the new mount behind the old mount in the mount hash table. This implementation detail is problematic as it allows creating arbitrary length mount hash chains. Furthermore it invalidates the constraint maintained elsewhere in the mount code that a parent mount and a mountpoint pair will have exactly one mount upon them. Making it hard to deal with and to talk about this special case in the mount code. Modify mount propagation to notice when there is already a mount at the parent mount and mountpoint where a new mount is propagating to and place that preexisting mount on top of the new mount. Modify unmount propagation to notice when a mount that is being unmounted has another mount on top of it (and no other children), and to replace the unmounted mount with the mount on top of it. Move the MNT_UMUONT test from __lookup_mnt_last into __propagate_umount as that is the only call of __lookup_mnt_last where MNT_UMOUNT may be set on any mount visible in the mount hash table. These modifications allow: - __lookup_mnt_last to be removed. - attach_shadows to be renamed __attach_mnt and its shadow handling to be removed. - commit_tree to be simplified - copy_tree to be simplified The result is an easier to understand tree of mounts that does not allow creation of arbitrary length hash chains in the mount hash table. The result is also a very slight userspace visible difference in semantics. The following two cases now behave identically, where before order mattered: case 1: (explicit user action) B is a slave of A mount something on A/a , it will propagate to B/a and than mount something on B/a case 2: (tucked mount) B is a slave of A mount something on B/a and than mount something on A/a Histroically umount A/a would fail in case 1 and succeed in case 2. Now umount A/a succeeds in both configurations. This very small change in semantics appears if anything to be a bug fix to me and my survey of userspace leads me to believe that no programs will notice or care of this subtle semantic change. v2: Updated to mnt_change_mountpoint to not call dput or mntput and instead to decrement the counts directly. It is guaranteed that there will be other references when mnt_change_mountpoint is called so this is safe. v3: Moved put_mountpoint under mount_lock in attach_recursive_mnt As the locking in fs/namespace.c changed between v2 and v3. v4: Reworked the logic in propagate_mount_busy and __propagate_umount that detects when a mount completely covers another mount. v5: Removed unnecessary tests whose result is alwasy true in find_topper and attach_recursive_mnt. v6: Document the user space visible semantic difference. Cc: stable@vger.kernel.org Fixes: b90fa9ae8f51 ("[PATCH] shared mount handling: bind and rbind") Tested-by: Andrei Vagin <avagin@virtuozzo.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
-rw-r--r--fs/mount.h1
-rw-r--r--fs/namespace.c110
-rw-r--r--fs/pnode.c61
-rw-r--r--fs/pnode.h2
4 files changed, 111 insertions, 63 deletions
diff --git a/fs/mount.h b/fs/mount.h
index 2c856fc47ae3..2826543a131d 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -89,7 +89,6 @@ static inline int is_mounted(struct vfsmount *mnt)
89} 89}
90 90
91extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); 91extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
92extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
93 92
94extern int __legitimize_mnt(struct vfsmount *, unsigned); 93extern int __legitimize_mnt(struct vfsmount *, unsigned);
95extern bool legitimize_mnt(struct vfsmount *, unsigned); 94extern bool legitimize_mnt(struct vfsmount *, unsigned);
diff --git a/fs/namespace.c b/fs/namespace.c
index 089a6b23135a..8bfad42c1ccf 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -637,28 +637,6 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
637} 637}
638 638
639/* 639/*
640 * find the last mount at @dentry on vfsmount @mnt.
641 * mount_lock must be held.
642 */
643struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
644{
645 struct mount *p, *res = NULL;
646 p = __lookup_mnt(mnt, dentry);
647 if (!p)
648 goto out;
649 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
650 res = p;
651 hlist_for_each_entry_continue(p, mnt_hash) {
652 if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
653 break;
654 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
655 res = p;
656 }
657out:
658 return res;
659}
660
661/*
662 * lookup_mnt - Return the first child mount mounted at path 640 * lookup_mnt - Return the first child mount mounted at path
663 * 641 *
664 * "First" means first mounted chronologically. If you create the 642 * "First" means first mounted chronologically. If you create the
@@ -878,6 +856,13 @@ void mnt_set_mountpoint(struct mount *mnt,
878 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list); 856 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
879} 857}
880 858
859static void __attach_mnt(struct mount *mnt, struct mount *parent)
860{
861 hlist_add_head_rcu(&mnt->mnt_hash,
862 m_hash(&parent->mnt, mnt->mnt_mountpoint));
863 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
864}
865
881/* 866/*
882 * vfsmount lock must be held for write 867 * vfsmount lock must be held for write
883 */ 868 */
@@ -886,28 +871,45 @@ static void attach_mnt(struct mount *mnt,
886 struct mountpoint *mp) 871 struct mountpoint *mp)
887{ 872{
888 mnt_set_mountpoint(parent, mp, mnt); 873 mnt_set_mountpoint(parent, mp, mnt);
889 hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); 874 __attach_mnt(mnt, parent);
890 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
891} 875}
892 876
893static void attach_shadowed(struct mount *mnt, 877void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
894 struct mount *parent,
895 struct mount *shadows)
896{ 878{
897 if (shadows) { 879 struct mountpoint *old_mp = mnt->mnt_mp;
898 hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash); 880 struct dentry *old_mountpoint = mnt->mnt_mountpoint;
899 list_add(&mnt->mnt_child, &shadows->mnt_child); 881 struct mount *old_parent = mnt->mnt_parent;
900 } else { 882
901 hlist_add_head_rcu(&mnt->mnt_hash, 883 list_del_init(&mnt->mnt_child);
902 m_hash(&parent->mnt, mnt->mnt_mountpoint)); 884 hlist_del_init(&mnt->mnt_mp_list);
903 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); 885 hlist_del_init_rcu(&mnt->mnt_hash);
904 } 886
887 attach_mnt(mnt, parent, mp);
888
889 put_mountpoint(old_mp);
890
891 /*
892 * Safely avoid even the suggestion this code might sleep or
893 * lock the mount hash by taking advantage of the knowledge that
894 * mnt_change_mountpoint will not release the final reference
895 * to a mountpoint.
896 *
897 * During mounting, the mount passed in as the parent mount will
898 * continue to use the old mountpoint and during unmounting, the
899 * old mountpoint will continue to exist until namespace_unlock,
900 * which happens well after mnt_change_mountpoint.
901 */
902 spin_lock(&old_mountpoint->d_lock);
903 old_mountpoint->d_lockref.count--;
904 spin_unlock(&old_mountpoint->d_lock);
905
906 mnt_add_count(old_parent, -1);
905} 907}
906 908
907/* 909/*
908 * vfsmount lock must be held for write 910 * vfsmount lock must be held for write
909 */ 911 */
910static void commit_tree(struct mount *mnt, struct mount *shadows) 912static void commit_tree(struct mount *mnt)
911{ 913{
912 struct mount *parent = mnt->mnt_parent; 914 struct mount *parent = mnt->mnt_parent;
913 struct mount *m; 915 struct mount *m;
@@ -925,7 +927,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
925 n->mounts += n->pending_mounts; 927 n->mounts += n->pending_mounts;
926 n->pending_mounts = 0; 928 n->pending_mounts = 0;
927 929
928 attach_shadowed(mnt, parent, shadows); 930 __attach_mnt(mnt, parent);
929 touch_mnt_namespace(n); 931 touch_mnt_namespace(n);
930} 932}
931 933
@@ -1779,7 +1781,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1779 continue; 1781 continue;
1780 1782
1781 for (s = r; s; s = next_mnt(s, r)) { 1783 for (s = r; s; s = next_mnt(s, r)) {
1782 struct mount *t = NULL;
1783 if (!(flag & CL_COPY_UNBINDABLE) && 1784 if (!(flag & CL_COPY_UNBINDABLE) &&
1784 IS_MNT_UNBINDABLE(s)) { 1785 IS_MNT_UNBINDABLE(s)) {
1785 s = skip_mnt_tree(s); 1786 s = skip_mnt_tree(s);
@@ -1801,14 +1802,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1801 goto out; 1802 goto out;
1802 lock_mount_hash(); 1803 lock_mount_hash();
1803 list_add_tail(&q->mnt_list, &res->mnt_list); 1804 list_add_tail(&q->mnt_list, &res->mnt_list);
1804 mnt_set_mountpoint(parent, p->mnt_mp, q); 1805 attach_mnt(q, parent, p->mnt_mp);
1805 if (!list_empty(&parent->mnt_mounts)) {
1806 t = list_last_entry(&parent->mnt_mounts,
1807 struct mount, mnt_child);
1808 if (t->mnt_mp != p->mnt_mp)
1809 t = NULL;
1810 }
1811 attach_shadowed(q, parent, t);
1812 unlock_mount_hash(); 1806 unlock_mount_hash();
1813 } 1807 }
1814 } 1808 }
@@ -2007,10 +2001,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
2007{ 2001{
2008 HLIST_HEAD(tree_list); 2002 HLIST_HEAD(tree_list);
2009 struct mnt_namespace *ns = dest_mnt->mnt_ns; 2003 struct mnt_namespace *ns = dest_mnt->mnt_ns;
2004 struct mountpoint *smp;
2010 struct mount *child, *p; 2005 struct mount *child, *p;
2011 struct hlist_node *n; 2006 struct hlist_node *n;
2012 int err; 2007 int err;
2013 2008
2009 /* Preallocate a mountpoint in case the new mounts need
2010 * to be tucked under other mounts.
2011 */
2012 smp = get_mountpoint(source_mnt->mnt.mnt_root);
2013 if (IS_ERR(smp))
2014 return PTR_ERR(smp);
2015
2014 /* Is there space to add these mounts to the mount namespace? */ 2016 /* Is there space to add these mounts to the mount namespace? */
2015 if (!parent_path) { 2017 if (!parent_path) {
2016 err = count_mounts(ns, source_mnt); 2018 err = count_mounts(ns, source_mnt);
@@ -2037,16 +2039,19 @@ static int attach_recursive_mnt(struct mount *source_mnt,
2037 touch_mnt_namespace(source_mnt->mnt_ns); 2039 touch_mnt_namespace(source_mnt->mnt_ns);
2038 } else { 2040 } else {
2039 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); 2041 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2040 commit_tree(source_mnt, NULL); 2042 commit_tree(source_mnt);
2041 } 2043 }
2042 2044
2043 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { 2045 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
2044 struct mount *q; 2046 struct mount *q;
2045 hlist_del_init(&child->mnt_hash); 2047 hlist_del_init(&child->mnt_hash);
2046 q = __lookup_mnt_last(&child->mnt_parent->mnt, 2048 q = __lookup_mnt(&child->mnt_parent->mnt,
2047 child->mnt_mountpoint); 2049 child->mnt_mountpoint);
2048 commit_tree(child, q); 2050 if (q)
2051 mnt_change_mountpoint(child, smp, q);
2052 commit_tree(child);
2049 } 2053 }
2054 put_mountpoint(smp);
2050 unlock_mount_hash(); 2055 unlock_mount_hash();
2051 2056
2052 return 0; 2057 return 0;
@@ -2061,6 +2066,11 @@ static int attach_recursive_mnt(struct mount *source_mnt,
2061 cleanup_group_ids(source_mnt, NULL); 2066 cleanup_group_ids(source_mnt, NULL);
2062 out: 2067 out:
2063 ns->pending_mounts = 0; 2068 ns->pending_mounts = 0;
2069
2070 read_seqlock_excl(&mount_lock);
2071 put_mountpoint(smp);
2072 read_sequnlock_excl(&mount_lock);
2073
2064 return err; 2074 return err;
2065} 2075}
2066 2076
diff --git a/fs/pnode.c b/fs/pnode.c
index 06a793f4ae38..5bc7896d122a 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -322,6 +322,21 @@ out:
322 return ret; 322 return ret;
323} 323}
324 324
325static struct mount *find_topper(struct mount *mnt)
326{
327 /* If there is exactly one mount covering mnt completely return it. */
328 struct mount *child;
329
330 if (!list_is_singular(&mnt->mnt_mounts))
331 return NULL;
332
333 child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
334 if (child->mnt_mountpoint != mnt->mnt.mnt_root)
335 return NULL;
336
337 return child;
338}
339
325/* 340/*
326 * return true if the refcount is greater than count 341 * return true if the refcount is greater than count
327 */ 342 */
@@ -342,9 +357,8 @@ static inline int do_refcount_check(struct mount *mnt, int count)
342 */ 357 */
343int propagate_mount_busy(struct mount *mnt, int refcnt) 358int propagate_mount_busy(struct mount *mnt, int refcnt)
344{ 359{
345 struct mount *m, *child; 360 struct mount *m, *child, *topper;
346 struct mount *parent = mnt->mnt_parent; 361 struct mount *parent = mnt->mnt_parent;
347 int ret = 0;
348 362
349 if (mnt == parent) 363 if (mnt == parent)
350 return do_refcount_check(mnt, refcnt); 364 return do_refcount_check(mnt, refcnt);
@@ -359,12 +373,24 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
359 373
360 for (m = propagation_next(parent, parent); m; 374 for (m = propagation_next(parent, parent); m;
361 m = propagation_next(m, parent)) { 375 m = propagation_next(m, parent)) {
362 child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); 376 int count = 1;
363 if (child && list_empty(&child->mnt_mounts) && 377 child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
364 (ret = do_refcount_check(child, 1))) 378 if (!child)
365 break; 379 continue;
380
381 /* Is there exactly one mount on the child that covers
382 * it completely whose reference should be ignored?
383 */
384 topper = find_topper(child);
385 if (topper)
386 count += 1;
387 else if (!list_empty(&child->mnt_mounts))
388 continue;
389
390 if (do_refcount_check(child, count))
391 return 1;
366 } 392 }
367 return ret; 393 return 0;
368} 394}
369 395
370/* 396/*
@@ -381,7 +407,7 @@ void propagate_mount_unlock(struct mount *mnt)
381 407
382 for (m = propagation_next(parent, parent); m; 408 for (m = propagation_next(parent, parent); m;
383 m = propagation_next(m, parent)) { 409 m = propagation_next(m, parent)) {
384 child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); 410 child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
385 if (child) 411 if (child)
386 child->mnt.mnt_flags &= ~MNT_LOCKED; 412 child->mnt.mnt_flags &= ~MNT_LOCKED;
387 } 413 }
@@ -399,9 +425,11 @@ static void mark_umount_candidates(struct mount *mnt)
399 425
400 for (m = propagation_next(parent, parent); m; 426 for (m = propagation_next(parent, parent); m;
401 m = propagation_next(m, parent)) { 427 m = propagation_next(m, parent)) {
402 struct mount *child = __lookup_mnt_last(&m->mnt, 428 struct mount *child = __lookup_mnt(&m->mnt,
403 mnt->mnt_mountpoint); 429 mnt->mnt_mountpoint);
404 if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) { 430 if (!child || (child->mnt.mnt_flags & MNT_UMOUNT))
431 continue;
432 if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) {
405 SET_MNT_MARK(child); 433 SET_MNT_MARK(child);
406 } 434 }
407 } 435 }
@@ -420,8 +448,8 @@ static void __propagate_umount(struct mount *mnt)
420 448
421 for (m = propagation_next(parent, parent); m; 449 for (m = propagation_next(parent, parent); m;
422 m = propagation_next(m, parent)) { 450 m = propagation_next(m, parent)) {
423 451 struct mount *topper;
424 struct mount *child = __lookup_mnt_last(&m->mnt, 452 struct mount *child = __lookup_mnt(&m->mnt,
425 mnt->mnt_mountpoint); 453 mnt->mnt_mountpoint);
426 /* 454 /*
427 * umount the child only if the child has no children 455 * umount the child only if the child has no children
@@ -430,6 +458,15 @@ static void __propagate_umount(struct mount *mnt)
430 if (!child || !IS_MNT_MARKED(child)) 458 if (!child || !IS_MNT_MARKED(child))
431 continue; 459 continue;
432 CLEAR_MNT_MARK(child); 460 CLEAR_MNT_MARK(child);
461
462 /* If there is exactly one mount covering all of child
463 * replace child with that mount.
464 */
465 topper = find_topper(child);
466 if (topper)
467 mnt_change_mountpoint(child->mnt_parent, child->mnt_mp,
468 topper);
469
433 if (list_empty(&child->mnt_mounts)) { 470 if (list_empty(&child->mnt_mounts)) {
434 list_del_init(&child->mnt_child); 471 list_del_init(&child->mnt_child);
435 child->mnt.mnt_flags |= MNT_UMOUNT; 472 child->mnt.mnt_flags |= MNT_UMOUNT;
diff --git a/fs/pnode.h b/fs/pnode.h
index 550f5a8b4fcf..dc87e65becd2 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -49,6 +49,8 @@ int get_dominating_id(struct mount *mnt, const struct path *root);
49unsigned int mnt_get_count(struct mount *mnt); 49unsigned int mnt_get_count(struct mount *mnt);
50void mnt_set_mountpoint(struct mount *, struct mountpoint *, 50void mnt_set_mountpoint(struct mount *, struct mountpoint *,
51 struct mount *); 51 struct mount *);
52void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp,
53 struct mount *mnt);
52struct mount *copy_tree(struct mount *, struct dentry *, int); 54struct mount *copy_tree(struct mount *, struct dentry *, int);
53bool is_path_reachable(struct mount *, struct dentry *, 55bool is_path_reachable(struct mount *, struct dentry *,
54 const struct path *root); 56 const struct path *root);