diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2018-08-09 17:51:32 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2018-08-09 17:51:32 -0400 |
commit | 119e1ef80ecfe0d1deb6378d4ab41f5b71519de1 (patch) | |
tree | ab2f094205bde3da367a4a875d5102157a7f370b | |
parent | 9ea0a46ca2c318fcc449c1e6b62a7230a17888f1 (diff) |
fix __legitimize_mnt()/mntput() race
__legitimize_mnt() has two problems - one is that in case of success
the check of mount_lock is not ordered wrt preceding increment of
refcount, making it possible to have successful __legitimize_mnt()
on one CPU just before the otherwise final mntpu() on another,
with __legitimize_mnt() not seeing mntput() taking the lock and
mntput() not seeing the increment done by __legitimize_mnt().
Solved by a pair of barriers.
Another is that failure of __legitimize_mnt() on the second
read_seqretry() leaves us with reference that'll need to be
dropped by caller; however, if that races with final mntput()
we can end up with caller dropping rcu_read_lock() and doing
mntput() to release that reference - with the first mntput()
having freed the damn thing just as rcu_read_lock() had been
dropped. Solution: in "do mntput() yourself" failure case
grab mount_lock, check if MNT_DOOMED has been set by racing
final mntput() that has missed our increment and if it has -
undo the increment and treat that as "failure, caller doesn't
need to drop anything" case.
It's not easy to hit - the final mntput() has to come right
after the first read_seqretry() in __legitimize_mnt() *and*
manage to miss the increment done by __legitimize_mnt() before
the second read_seqretry() in there. The things that are almost
impossible to hit on bare hardware are not impossible on SMP
KVM, though...
Reported-by: Oleg Nesterov <oleg@redhat.com>
Fixes: 48a066e72d97 ("RCU'd vsfmounts")
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/namespace.c | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index d46a951bd541..bd2f4c68506a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -659,12 +659,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) | |||
659 | return 0; | 659 | return 0; |
660 | mnt = real_mount(bastard); | 660 | mnt = real_mount(bastard); |
661 | mnt_add_count(mnt, 1); | 661 | mnt_add_count(mnt, 1); |
662 | smp_mb(); // see mntput_no_expire() | ||
662 | if (likely(!read_seqretry(&mount_lock, seq))) | 663 | if (likely(!read_seqretry(&mount_lock, seq))) |
663 | return 0; | 664 | return 0; |
664 | if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { | 665 | if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { |
665 | mnt_add_count(mnt, -1); | 666 | mnt_add_count(mnt, -1); |
666 | return 1; | 667 | return 1; |
667 | } | 668 | } |
669 | lock_mount_hash(); | ||
670 | if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { | ||
671 | mnt_add_count(mnt, -1); | ||
672 | unlock_mount_hash(); | ||
673 | return 1; | ||
674 | } | ||
675 | unlock_mount_hash(); | ||
676 | /* caller will mntput() */ | ||
668 | return -1; | 677 | return -1; |
669 | } | 678 | } |
670 | 679 | ||
@@ -1210,6 +1219,11 @@ static void mntput_no_expire(struct mount *mnt) | |||
1210 | return; | 1219 | return; |
1211 | } | 1220 | } |
1212 | lock_mount_hash(); | 1221 | lock_mount_hash(); |
1222 | /* | ||
1223 | * make sure that if __legitimize_mnt() has not seen us grab | ||
1224 | * mount_lock, we'll see their refcount increment here. | ||
1225 | */ | ||
1226 | smp_mb(); | ||
1213 | mnt_add_count(mnt, -1); | 1227 | mnt_add_count(mnt, -1); |
1214 | if (mnt_get_count(mnt)) { | 1228 | if (mnt_get_count(mnt)) { |
1215 | rcu_read_unlock(); | 1229 | rcu_read_unlock(); |