diff options
author | Al Viro <viro@ZenIV.linux.org.uk> | 2011-06-16 10:10:06 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2011-06-16 11:28:16 -0400 |
commit | 8aef18845266f5c05904c610088f2d1ed58f6be3 (patch) | |
tree | fbdecafcd5e5d15445af18119cc8ee2e9b2cb850 /fs/namei.c | |
parent | 50338b889dc504c69e0cb316ac92d1b9e51f3c8a (diff) |
VFS: Fix vfsmount overput on simultaneous automount
[Kudos to dhowells for tracking that crap down]
If two processes attempt to cause automounting on the same mountpoint at the
same time, the vfsmount holding the mountpoint will be left with one too few
references on it, causing a BUG when the kernel tries to clean up.
The problem is that lock_mount() drops the caller's reference to the
mountpoint's vfsmount in the case where it finds something already mounted on
the mountpoint as it transits to the mounted filesystem and replaces path->mnt
with the new mountpoint vfsmount.
During a pathwalk, however, we don't take a reference on the vfsmount if it is
the same as the one in the nameidata struct, but do_add_mount() doesn't know
this.
The fix is to make sure we have a ref on the vfsmount of the mountpoint before
calling do_add_mount(). However, if lock_mount() doesn't transit, we're then
left with an extra ref on the mountpoint vfsmount which needs releasing.
We can handle that in follow_managed() by not making assumptions about what
we can and what we cannot get from lookup_mnt() as the current code does.
The callers of follow_managed() expect that reference to path->mnt will be
grabbed iff path->mnt has been changed. follow_managed() and follow_automount()
keep track of whether such reference has been grabbed and assume that it'll
happen in those and only those cases that'll have us return with changed
path->mnt. That assumption is almost correct - it breaks in case of
racing automounts and in even harder to hit race between following a mountpoint
and a couple of mount --move. The thing is, we don't need to make that
assumption at all - after the end of loop in follow_manage() we can check
if path->mnt has ended up unchanged and do mntput() if needed.
The BUG can be reproduced with the following test program:
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/wait.h>
int main(int argc, char **argv)
{
int pid, ws;
struct stat buf;
pid = fork();
stat(argv[1], &buf);
if (pid > 0) wait(&ws);
return 0;
}
and the following procedure:
(1) Mount an NFS volume that on the server has something else mounted on a
subdirectory. For instance, I can mount / from my server:
mount warthog:/ /mnt -t nfs4 -r
On the server /data has another filesystem mounted on it, so NFS will see
a change in FSID as it walks down the path, and will mark /mnt/data as
being a mountpoint. This will cause the automount code to be triggered.
!!! Do not look inside the mounted fs at this point !!!
(2) Run the above program on a file within the submount to generate two
simultaneous automount requests:
/tmp/forkstat /mnt/data/testfile
(3) Unmount the automounted submount:
umount /mnt/data
(4) Unmount the original mount:
umount /mnt
At this point the kernel should throw a BUG with something like the
following:
BUG: Dentry ffff880032e3c5c0{i=2,n=} still in use (1) [unmount of nfs4 0:12]
Note that the bug appears on the root dentry of the original mount, not the
mountpoint and not the submount because sys_umount() hasn't got to its final
mntput_no_expire() yet, but this isn't so obvious from the call trace:
[<ffffffff8117cd82>] shrink_dcache_for_umount+0x69/0x82
[<ffffffff8116160e>] generic_shutdown_super+0x37/0x15b
[<ffffffffa00fae56>] ? nfs_super_return_all_delegations+0x2e/0x1b1 [nfs]
[<ffffffff811617f3>] kill_anon_super+0x1d/0x7e
[<ffffffffa00d0be1>] nfs4_kill_super+0x60/0xb6 [nfs]
[<ffffffff81161c17>] deactivate_locked_super+0x34/0x83
[<ffffffff811629ff>] deactivate_super+0x6f/0x7b
[<ffffffff81186261>] mntput_no_expire+0x18d/0x199
[<ffffffff811862a8>] mntput+0x3b/0x44
[<ffffffff81186d87>] release_mounts+0xa2/0xbf
[<ffffffff811876af>] sys_umount+0x47a/0x4ba
[<ffffffff8109e1ca>] ? trace_hardirqs_on_caller+0x1fd/0x22f
[<ffffffff816ea86b>] system_call_fastpath+0x16/0x1b
as do_umount() is inlined. However, you can see release_mounts() in there.
Note also that it may be necessary to have multiple CPU cores to be able to
trigger this bug.
Tested-by: Jeff Layton <jlayton@redhat.com>
Tested-by: Ian Kent <raven@themaw.net>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/fs/namei.c b/fs/namei.c index 6301963b161f..9e425e7e6c8f 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -812,6 +812,11 @@ static int follow_automount(struct path *path, unsigned flags, | |||
812 | if (!mnt) /* mount collision */ | 812 | if (!mnt) /* mount collision */ |
813 | return 0; | 813 | return 0; |
814 | 814 | ||
815 | if (!*need_mntput) { | ||
816 | /* lock_mount() may release path->mnt on error */ | ||
817 | mntget(path->mnt); | ||
818 | *need_mntput = true; | ||
819 | } | ||
815 | err = finish_automount(mnt, path); | 820 | err = finish_automount(mnt, path); |
816 | 821 | ||
817 | switch (err) { | 822 | switch (err) { |
@@ -819,12 +824,9 @@ static int follow_automount(struct path *path, unsigned flags, | |||
819 | /* Someone else made a mount here whilst we were busy */ | 824 | /* Someone else made a mount here whilst we were busy */ |
820 | return 0; | 825 | return 0; |
821 | case 0: | 826 | case 0: |
822 | dput(path->dentry); | 827 | path_put(path); |
823 | if (*need_mntput) | ||
824 | mntput(path->mnt); | ||
825 | path->mnt = mnt; | 828 | path->mnt = mnt; |
826 | path->dentry = dget(mnt->mnt_root); | 829 | path->dentry = dget(mnt->mnt_root); |
827 | *need_mntput = true; | ||
828 | return 0; | 830 | return 0; |
829 | default: | 831 | default: |
830 | return err; | 832 | return err; |
@@ -844,9 +846,10 @@ static int follow_automount(struct path *path, unsigned flags, | |||
844 | */ | 846 | */ |
845 | static int follow_managed(struct path *path, unsigned flags) | 847 | static int follow_managed(struct path *path, unsigned flags) |
846 | { | 848 | { |
849 | struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */ | ||
847 | unsigned managed; | 850 | unsigned managed; |
848 | bool need_mntput = false; | 851 | bool need_mntput = false; |
849 | int ret; | 852 | int ret = 0; |
850 | 853 | ||
851 | /* Given that we're not holding a lock here, we retain the value in a | 854 | /* Given that we're not holding a lock here, we retain the value in a |
852 | * local variable for each dentry as we look at it so that we don't see | 855 | * local variable for each dentry as we look at it so that we don't see |
@@ -861,7 +864,7 @@ static int follow_managed(struct path *path, unsigned flags) | |||
861 | BUG_ON(!path->dentry->d_op->d_manage); | 864 | BUG_ON(!path->dentry->d_op->d_manage); |
862 | ret = path->dentry->d_op->d_manage(path->dentry, false); | 865 | ret = path->dentry->d_op->d_manage(path->dentry, false); |
863 | if (ret < 0) | 866 | if (ret < 0) |
864 | return ret == -EISDIR ? 0 : ret; | 867 | break; |
865 | } | 868 | } |
866 | 869 | ||
867 | /* Transit to a mounted filesystem. */ | 870 | /* Transit to a mounted filesystem. */ |
@@ -887,14 +890,19 @@ static int follow_managed(struct path *path, unsigned flags) | |||
887 | if (managed & DCACHE_NEED_AUTOMOUNT) { | 890 | if (managed & DCACHE_NEED_AUTOMOUNT) { |
888 | ret = follow_automount(path, flags, &need_mntput); | 891 | ret = follow_automount(path, flags, &need_mntput); |
889 | if (ret < 0) | 892 | if (ret < 0) |
890 | return ret == -EISDIR ? 0 : ret; | 893 | break; |
891 | continue; | 894 | continue; |
892 | } | 895 | } |
893 | 896 | ||
894 | /* We didn't change the current path point */ | 897 | /* We didn't change the current path point */ |
895 | break; | 898 | break; |
896 | } | 899 | } |
897 | return 0; | 900 | |
901 | if (need_mntput && path->mnt == mnt) | ||
902 | mntput(path->mnt); | ||
903 | if (ret == -EISDIR) | ||
904 | ret = 0; | ||
905 | return ret; | ||
898 | } | 906 | } |
899 | 907 | ||
900 | int follow_down_one(struct path *path) | 908 | int follow_down_one(struct path *path) |