nfsd: don't hold i_mutex over userspace upcalls

We need information about exports when crossing mountpoints during lookup or NFSv4 readdir. If we don't already have that information cached, we may have to ask (and wait for) rpc.mountd. In both cases we currently hold the i_mutex on the parent of the directory we're asking rpc.mountd about. We've seen situations where rpc.mountd performs some operation on that directory that tries to take the i_mutex again, resulting in deadlock. With some care, we may be able to avoid that in rpc.mountd. But it seems better just to avoid holding a mutex while waiting on userspace. It appears that lookup_one_len is pretty much the only operation that needs the i_mutex. So we could just drop the i_mutex elsewhere and do something like mutex_lock() lookup_one_len() mutex_unlock() In many cases though the lookup would have been cached and not required the i_mutex, so it's more efficient to create a lookup_one_len() variant that only takes the i_mutex when necessary. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: J. Bruce Fields <bfields@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
author: NeilBrown <neilb@suse.de> 2016-01-07 16:08:20 -0500
committer: Al Viro <viro@zeniv.linux.org.uk> 2016-01-09 03:07:52 -0500
commit: bbddca8e8fac07ece3938e03526b5d00fa791a4c (patch)
tree: 5fbe9fe5251f1040bb001377260c56f7330e0459 /fs/nfsd
parent: db39c16724d019029d7533561754d92bef1b389a (diff)
3 files changed, 14 insertions, 19 deletions
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 00575d776d91..2246454dec76 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -823,7 +823,7 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
                } else
                        dchild = dget(dparent);
        } else
-                dchild = lookup_one_len(name, dparent, namlen);
+                dchild = lookup_one_len_unlocked(name, dparent, namlen);
        if (IS_ERR(dchild))
                return rv;
        if (d_mountpoint(dchild))
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 51c9e9ca39a4..325521ce389a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2838,14 +2838,14 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
        __be32 nfserr;
        int ignore_crossmnt = 0;
-        dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
+        dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
        if (IS_ERR(dentry))
                return nfserrno(PTR_ERR(dentry));
        if (d_really_is_negative(dentry)) {
                /*
-                 * nfsd_buffered_readdir drops the i_mutex between
+                 * we're not holding the i_mutex here, so there's
-                 * readdir and calling this callback, leaving a window
+                 * a window where this directory entry could have gone
-                 * where this directory entry could have gone away.
+                 * away.
                 */
                dput(dentry);
                return nfserr_noent;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 994d66fbb446..4212aaacbb55 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -217,10 +217,16 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
                host_err = PTR_ERR(dentry);
                if (IS_ERR(dentry))
                        goto out_nfserr;
-                /*
-                 * check if we have crossed a mount point ...
-                 */
                if (nfsd_mountpoint(dentry, exp)) {
+                        /*
+                         * We don't need the i_mutex after all.  It's
+                         * still possible we could open this (regular
+                         * files can be mountpoints too), but the
+                         * i_mutex is just there to prevent renames of
+                         * something that we might be about to delegate,
+                         * and a mountpoint won't be renamed:
+                         */
+                        fh_unlock(fhp);
                        if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
                                dput(dentry);
                                goto out_nfserr;
@@ -1809,7 +1815,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
        offset = *offsetp;
        while (1) {
-                struct inode *dir_inode = file_inode(file);
                unsigned int reclen;
                cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -1828,15 +1833,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
                if (!size)
                        break;
-                /*
-                 * Various filldir functions may end up calling back into
-                 * lookup_one_len() and the file system's ->lookup() method.
-                 * These expect i_mutex to be held, as it would within readdir.
-                 */
-                host_err = mutex_lock_killable(&dir_inode->i_mutex);
-                if (host_err)
-                        break;
                de = (struct buffered_dirent *)buf.dirent;
                while (size > 0) {
                        offset = de->offset;
@@ -1853,7 +1849,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
                        size -= reclen;
                        de = (struct buffered_dirent *)((char *)de + reclen);
                }
-                mutex_unlock(&dir_inode->i_mutex);
                if (size > 0) /* We bailed out early */
                        break;
author	NeilBrown <neilb@suse.de>	2016-01-07 16:08:20 -0500
committer	Al Viro <viro@zeniv.linux.org.uk>	2016-01-09 03:07:52 -0500
commit	bbddca8e8fac07ece3938e03526b5d00fa791a4c (patch)
tree	5fbe9fe5251f1040bb001377260c56f7330e0459 /fs/nfsd
parent	db39c16724d019029d7533561754d92bef1b389a (diff)