aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfsd
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2016-01-07 16:08:20 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2016-01-09 03:07:52 -0500
commitbbddca8e8fac07ece3938e03526b5d00fa791a4c (patch)
tree5fbe9fe5251f1040bb001377260c56f7330e0459 /fs/nfsd
parentdb39c16724d019029d7533561754d92bef1b389a (diff)
nfsd: don't hold i_mutex over userspace upcalls
We need information about exports when crossing mountpoints during lookup or NFSv4 readdir. If we don't already have that information cached, we may have to ask (and wait for) rpc.mountd. In both cases we currently hold the i_mutex on the parent of the directory we're asking rpc.mountd about. We've seen situations where rpc.mountd performs some operation on that directory that tries to take the i_mutex again, resulting in deadlock. With some care, we may be able to avoid that in rpc.mountd. But it seems better just to avoid holding a mutex while waiting on userspace. It appears that lookup_one_len is pretty much the only operation that needs the i_mutex. So we could just drop the i_mutex elsewhere and do something like mutex_lock() lookup_one_len() mutex_unlock() In many cases though the lookup would have been cached and not required the i_mutex, so it's more efficient to create a lookup_one_len() variant that only takes the i_mutex when necessary. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: J. Bruce Fields <bfields@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/nfs3xdr.c2
-rw-r--r--fs/nfsd/nfs4xdr.c8
-rw-r--r--fs/nfsd/vfs.c23
3 files changed, 14 insertions, 19 deletions
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 00575d776d91..2246454dec76 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -823,7 +823,7 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
823 } else 823 } else
824 dchild = dget(dparent); 824 dchild = dget(dparent);
825 } else 825 } else
826 dchild = lookup_one_len(name, dparent, namlen); 826 dchild = lookup_one_len_unlocked(name, dparent, namlen);
827 if (IS_ERR(dchild)) 827 if (IS_ERR(dchild))
828 return rv; 828 return rv;
829 if (d_mountpoint(dchild)) 829 if (d_mountpoint(dchild))
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 51c9e9ca39a4..325521ce389a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2838,14 +2838,14 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
2838 __be32 nfserr; 2838 __be32 nfserr;
2839 int ignore_crossmnt = 0; 2839 int ignore_crossmnt = 0;
2840 2840
2841 dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); 2841 dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
2842 if (IS_ERR(dentry)) 2842 if (IS_ERR(dentry))
2843 return nfserrno(PTR_ERR(dentry)); 2843 return nfserrno(PTR_ERR(dentry));
2844 if (d_really_is_negative(dentry)) { 2844 if (d_really_is_negative(dentry)) {
2845 /* 2845 /*
2846 * nfsd_buffered_readdir drops the i_mutex between 2846 * we're not holding the i_mutex here, so there's
2847 * readdir and calling this callback, leaving a window 2847 * a window where this directory entry could have gone
2848 * where this directory entry could have gone away. 2848 * away.
2849 */ 2849 */
2850 dput(dentry); 2850 dput(dentry);
2851 return nfserr_noent; 2851 return nfserr_noent;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 994d66fbb446..4212aaacbb55 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -217,10 +217,16 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
217 host_err = PTR_ERR(dentry); 217 host_err = PTR_ERR(dentry);
218 if (IS_ERR(dentry)) 218 if (IS_ERR(dentry))
219 goto out_nfserr; 219 goto out_nfserr;
220 /*
221 * check if we have crossed a mount point ...
222 */
223 if (nfsd_mountpoint(dentry, exp)) { 220 if (nfsd_mountpoint(dentry, exp)) {
221 /*
222 * We don't need the i_mutex after all. It's
223 * still possible we could open this (regular
224 * files can be mountpoints too), but the
225 * i_mutex is just there to prevent renames of
226 * something that we might be about to delegate,
227 * and a mountpoint won't be renamed:
228 */
229 fh_unlock(fhp);
224 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { 230 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
225 dput(dentry); 231 dput(dentry);
226 goto out_nfserr; 232 goto out_nfserr;
@@ -1809,7 +1815,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
1809 offset = *offsetp; 1815 offset = *offsetp;
1810 1816
1811 while (1) { 1817 while (1) {
1812 struct inode *dir_inode = file_inode(file);
1813 unsigned int reclen; 1818 unsigned int reclen;
1814 1819
1815 cdp->err = nfserr_eof; /* will be cleared on successful read */ 1820 cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -1828,15 +1833,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
1828 if (!size) 1833 if (!size)
1829 break; 1834 break;
1830 1835
1831 /*
1832 * Various filldir functions may end up calling back into
1833 * lookup_one_len() and the file system's ->lookup() method.
1834 * These expect i_mutex to be held, as it would within readdir.
1835 */
1836 host_err = mutex_lock_killable(&dir_inode->i_mutex);
1837 if (host_err)
1838 break;
1839
1840 de = (struct buffered_dirent *)buf.dirent; 1836 de = (struct buffered_dirent *)buf.dirent;
1841 while (size > 0) { 1837 while (size > 0) {
1842 offset = de->offset; 1838 offset = de->offset;
@@ -1853,7 +1849,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
1853 size -= reclen; 1849 size -= reclen;
1854 de = (struct buffered_dirent *)((char *)de + reclen); 1850 de = (struct buffered_dirent *)((char *)de + reclen);
1855 } 1851 }
1856 mutex_unlock(&dir_inode->i_mutex);
1857 if (size > 0) /* We bailed out early */ 1852 if (size > 0) /* We bailed out early */
1858 break; 1853 break;
1859 1854