aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2016-01-07 16:08:20 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2016-01-09 03:07:52 -0500
commitbbddca8e8fac07ece3938e03526b5d00fa791a4c (patch)
tree5fbe9fe5251f1040bb001377260c56f7330e0459
parentdb39c16724d019029d7533561754d92bef1b389a (diff)
nfsd: don't hold i_mutex over userspace upcalls
We need information about exports when crossing mountpoints during lookup or NFSv4 readdir. If we don't already have that information cached, we may have to ask (and wait for) rpc.mountd. In both cases we currently hold the i_mutex on the parent of the directory we're asking rpc.mountd about. We've seen situations where rpc.mountd performs some operation on that directory that tries to take the i_mutex again, resulting in deadlock. With some care, we may be able to avoid that in rpc.mountd. But it seems better just to avoid holding a mutex while waiting on userspace. It appears that lookup_one_len is pretty much the only operation that needs the i_mutex. So we could just drop the i_mutex elsewhere and do something like mutex_lock() lookup_one_len() mutex_unlock() In many cases though the lookup would have been cached and not required the i_mutex, so it's more efficient to create a lookup_one_len() variant that only takes the i_mutex when necessary. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: J. Bruce Fields <bfields@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/namei.c71
-rw-r--r--fs/nfsd/nfs3xdr.c2
-rw-r--r--fs/nfsd/nfs4xdr.c8
-rw-r--r--fs/nfsd/vfs.c23
-rw-r--r--include/linux/namei.h1
5 files changed, 86 insertions, 19 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 45c702edce3c..1067f7a0287a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2272,6 +2272,8 @@ EXPORT_SYMBOL(vfs_path_lookup);
2272 * 2272 *
2273 * Note that this routine is purely a helper for filesystem usage and should 2273 * Note that this routine is purely a helper for filesystem usage and should
2274 * not be called by generic code. 2274 * not be called by generic code.
2275 *
2276 * The caller must hold base->i_mutex.
2275 */ 2277 */
2276struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 2278struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
2277{ 2279{
@@ -2315,6 +2317,75 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
2315} 2317}
2316EXPORT_SYMBOL(lookup_one_len); 2318EXPORT_SYMBOL(lookup_one_len);
2317 2319
2320/**
2321 * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
2322 * @name: pathname component to lookup
2323 * @base: base directory to lookup from
2324 * @len: maximum length @len should be interpreted to
2325 *
2326 * Note that this routine is purely a helper for filesystem usage and should
2327 * not be called by generic code.
2328 *
2329 * Unlike lookup_one_len, it should be called without the parent
2330 * i_mutex held, and will take the i_mutex itself if necessary.
2331 */
2332struct dentry *lookup_one_len_unlocked(const char *name,
2333 struct dentry *base, int len)
2334{
2335 struct qstr this;
2336 unsigned int c;
2337 int err;
2338 struct dentry *ret;
2339
2340 this.name = name;
2341 this.len = len;
2342 this.hash = full_name_hash(name, len);
2343 if (!len)
2344 return ERR_PTR(-EACCES);
2345
2346 if (unlikely(name[0] == '.')) {
2347 if (len < 2 || (len == 2 && name[1] == '.'))
2348 return ERR_PTR(-EACCES);
2349 }
2350
2351 while (len--) {
2352 c = *(const unsigned char *)name++;
2353 if (c == '/' || c == '\0')
2354 return ERR_PTR(-EACCES);
2355 }
2356 /*
2357 * See if the low-level filesystem might want
2358 * to use its own hash..
2359 */
2360 if (base->d_flags & DCACHE_OP_HASH) {
2361 int err = base->d_op->d_hash(base, &this);
2362 if (err < 0)
2363 return ERR_PTR(err);
2364 }
2365
2366 err = inode_permission(base->d_inode, MAY_EXEC);
2367 if (err)
2368 return ERR_PTR(err);
2369
2370 /*
2371 * __d_lookup() is used to try to get a quick answer and avoid the
2372 * mutex. A false-negative does no harm.
2373 */
2374 ret = __d_lookup(base, &this);
2375 if (ret && unlikely(ret->d_flags & DCACHE_OP_REVALIDATE)) {
2376 dput(ret);
2377 ret = NULL;
2378 }
2379 if (ret)
2380 return ret;
2381
2382 mutex_lock(&base->d_inode->i_mutex);
2383 ret = __lookup_hash(&this, base, 0);
2384 mutex_unlock(&base->d_inode->i_mutex);
2385 return ret;
2386}
2387EXPORT_SYMBOL(lookup_one_len_unlocked);
2388
2318int user_path_at_empty(int dfd, const char __user *name, unsigned flags, 2389int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
2319 struct path *path, int *empty) 2390 struct path *path, int *empty)
2320{ 2391{
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 00575d776d91..2246454dec76 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -823,7 +823,7 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
823 } else 823 } else
824 dchild = dget(dparent); 824 dchild = dget(dparent);
825 } else 825 } else
826 dchild = lookup_one_len(name, dparent, namlen); 826 dchild = lookup_one_len_unlocked(name, dparent, namlen);
827 if (IS_ERR(dchild)) 827 if (IS_ERR(dchild))
828 return rv; 828 return rv;
829 if (d_mountpoint(dchild)) 829 if (d_mountpoint(dchild))
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 51c9e9ca39a4..325521ce389a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2838,14 +2838,14 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
2838 __be32 nfserr; 2838 __be32 nfserr;
2839 int ignore_crossmnt = 0; 2839 int ignore_crossmnt = 0;
2840 2840
2841 dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); 2841 dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
2842 if (IS_ERR(dentry)) 2842 if (IS_ERR(dentry))
2843 return nfserrno(PTR_ERR(dentry)); 2843 return nfserrno(PTR_ERR(dentry));
2844 if (d_really_is_negative(dentry)) { 2844 if (d_really_is_negative(dentry)) {
2845 /* 2845 /*
2846 * nfsd_buffered_readdir drops the i_mutex between 2846 * we're not holding the i_mutex here, so there's
2847 * readdir and calling this callback, leaving a window 2847 * a window where this directory entry could have gone
2848 * where this directory entry could have gone away. 2848 * away.
2849 */ 2849 */
2850 dput(dentry); 2850 dput(dentry);
2851 return nfserr_noent; 2851 return nfserr_noent;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 994d66fbb446..4212aaacbb55 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -217,10 +217,16 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
217 host_err = PTR_ERR(dentry); 217 host_err = PTR_ERR(dentry);
218 if (IS_ERR(dentry)) 218 if (IS_ERR(dentry))
219 goto out_nfserr; 219 goto out_nfserr;
220 /*
221 * check if we have crossed a mount point ...
222 */
223 if (nfsd_mountpoint(dentry, exp)) { 220 if (nfsd_mountpoint(dentry, exp)) {
221 /*
222 * We don't need the i_mutex after all. It's
223 * still possible we could open this (regular
224 * files can be mountpoints too), but the
225 * i_mutex is just there to prevent renames of
226 * something that we might be about to delegate,
227 * and a mountpoint won't be renamed:
228 */
229 fh_unlock(fhp);
224 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { 230 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
225 dput(dentry); 231 dput(dentry);
226 goto out_nfserr; 232 goto out_nfserr;
@@ -1809,7 +1815,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
1809 offset = *offsetp; 1815 offset = *offsetp;
1810 1816
1811 while (1) { 1817 while (1) {
1812 struct inode *dir_inode = file_inode(file);
1813 unsigned int reclen; 1818 unsigned int reclen;
1814 1819
1815 cdp->err = nfserr_eof; /* will be cleared on successful read */ 1820 cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -1828,15 +1833,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
1828 if (!size) 1833 if (!size)
1829 break; 1834 break;
1830 1835
1831 /*
1832 * Various filldir functions may end up calling back into
1833 * lookup_one_len() and the file system's ->lookup() method.
1834 * These expect i_mutex to be held, as it would within readdir.
1835 */
1836 host_err = mutex_lock_killable(&dir_inode->i_mutex);
1837 if (host_err)
1838 break;
1839
1840 de = (struct buffered_dirent *)buf.dirent; 1836 de = (struct buffered_dirent *)buf.dirent;
1841 while (size > 0) { 1837 while (size > 0) {
1842 offset = de->offset; 1838 offset = de->offset;
@@ -1853,7 +1849,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
1853 size -= reclen; 1849 size -= reclen;
1854 de = (struct buffered_dirent *)((char *)de + reclen); 1850 de = (struct buffered_dirent *)((char *)de + reclen);
1855 } 1851 }
1856 mutex_unlock(&dir_inode->i_mutex);
1857 if (size > 0) /* We bailed out early */ 1852 if (size > 0) /* We bailed out early */
1858 break; 1853 break;
1859 1854
diff --git a/include/linux/namei.h b/include/linux/namei.h
index d8c6334cd150..d0f25d81b46a 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -77,6 +77,7 @@ extern struct dentry *kern_path_locked(const char *, struct path *);
77extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int); 77extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int);
78 78
79extern struct dentry *lookup_one_len(const char *, struct dentry *, int); 79extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
80extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
80 81
81extern int follow_down_one(struct path *); 82extern int follow_down_one(struct path *);
82extern int follow_down(struct path *); 83extern int follow_down(struct path *);