summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-09-27 20:00:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-27 20:00:27 -0400
commit298fb76a5583900a155d387efaf37a8b39e5dea2 (patch)
tree55b903ec587e8ec470c13084938303f542139557
parent8f744bdee4fefb17fac052c7418b830de2b59ac8 (diff)
parente41f9efb85d38d95744b9f35b9903109032b93d4 (diff)
Merge tag 'nfsd-5.4' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields: "Highlights: - Add a new knfsd file cache, so that we don't have to open and close on each (NFSv2/v3) READ or WRITE. This can speed up read and write in some cases. It also replaces our readahead cache. - Prevent silent data loss on write errors, by treating write errors like server reboots for the purposes of write caching, thus forcing clients to resend their writes. - Tweak the code that allocates sessions to be more forgiving, so that NFSv4.1 mounts are less likely to hang when a server already has a lot of clients. - Eliminate an arbitrary limit on NFSv4 ACL sizes; they should now be limited only by the backend filesystem and the maximum RPC size. - Allow the server to enforce use of the correct kerberos credentials when a client reclaims state after a reboot. And some miscellaneous smaller bugfixes and cleanup" * tag 'nfsd-5.4' of git://linux-nfs.org/~bfields/linux: (34 commits) sunrpc: clean up indentation issue nfsd: fix nfs read eof detection nfsd: Make nfsd_reset_boot_verifier_locked static nfsd: degraded slot-count more gracefully as allocation nears exhaustion. nfsd: handle drc over-allocation gracefully. nfsd: add support for upcall version 2 nfsd: add a "GetVersion" upcall for nfsdcld nfsd: Reset the boot verifier on all write I/O errors nfsd: Don't garbage collect files that might contain write errors nfsd: Support the server resetting the boot verifier nfsd: nfsd_file cache entries should be per net namespace nfsd: eliminate an unnecessary acl size limit Deprecate nfsd fault injection nfsd: remove duplicated include from filecache.c nfsd: Fix the documentation for svcxdr_tmpalloc() nfsd: Fix up some unused variable warnings nfsd: close cached files prior to a REMOVE or RENAME that would replace target nfsd: rip out the raparms cache nfsd: have nfsd_test_lock use the nfsd_file cache nfsd: hook up nfs4_preprocess_stateid_op to the nfsd_file cache ...
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/locks.c62
-rw-r--r--fs/nfsd/Kconfig3
-rw-r--r--fs/nfsd/Makefile3
-rw-r--r--fs/nfsd/acl.h8
-rw-r--r--fs/nfsd/blocklayout.c3
-rw-r--r--fs/nfsd/export.c13
-rw-r--r--fs/nfsd/filecache.c934
-rw-r--r--fs/nfsd/filecache.h61
-rw-r--r--fs/nfsd/netns.h4
-rw-r--r--fs/nfsd/nfs3proc.c9
-rw-r--r--fs/nfsd/nfs3xdr.c13
-rw-r--r--fs/nfsd/nfs4callback.c35
-rw-r--r--fs/nfsd/nfs4layouts.c12
-rw-r--r--fs/nfsd/nfs4proc.c97
-rw-r--r--fs/nfsd/nfs4recover.c388
-rw-r--r--fs/nfsd/nfs4state.c239
-rw-r--r--fs/nfsd/nfs4xdr.c56
-rw-r--r--fs/nfsd/nfsctl.c1
-rw-r--r--fs/nfsd/nfsproc.c4
-rw-r--r--fs/nfsd/nfssvc.c48
-rw-r--r--fs/nfsd/state.h13
-rw-r--r--fs/nfsd/trace.h140
-rw-r--r--fs/nfsd/vfs.c351
-rw-r--r--fs/nfsd/vfs.h37
-rw-r--r--fs/nfsd/xdr3.h2
-rw-r--r--fs/nfsd/xdr4.h19
-rw-r--r--fs/notify/fsnotify.h2
-rw-r--r--fs/notify/group.c2
-rw-r--r--fs/notify/mark.c6
-rw-r--r--include/linux/fs.h5
-rw-r--r--include/linux/fsnotify_backend.h2
-rw-r--r--include/linux/sunrpc/cache.h7
-rw-r--r--include/linux/sunrpc/svc_rdma.h6
-rw-r--r--include/uapi/linux/nfsd/cld.h41
-rw-r--r--net/sunrpc/cache.c15
-rw-r--r--net/sunrpc/svc.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c7
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c24
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c6
40 files changed, 2083 insertions, 600 deletions
diff --git a/fs/file_table.c b/fs/file_table.c
index b07b53f24ff5..30d55c9a1744 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -327,6 +327,7 @@ void flush_delayed_fput(void)
327{ 327{
328 delayed_fput(NULL); 328 delayed_fput(NULL);
329} 329}
330EXPORT_SYMBOL_GPL(flush_delayed_fput);
330 331
331static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); 332static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
332 333
diff --git a/fs/locks.c b/fs/locks.c
index a364ebc5cec3..6970f55daf54 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -212,6 +212,7 @@ struct file_lock_list_struct {
212static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list); 212static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
213DEFINE_STATIC_PERCPU_RWSEM(file_rwsem); 213DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
214 214
215
215/* 216/*
216 * The blocked_hash is used to find POSIX lock loops for deadlock detection. 217 * The blocked_hash is used to find POSIX lock loops for deadlock detection.
217 * It is protected by blocked_lock_lock. 218 * It is protected by blocked_lock_lock.
@@ -1991,6 +1992,64 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
1991} 1992}
1992EXPORT_SYMBOL(generic_setlease); 1993EXPORT_SYMBOL(generic_setlease);
1993 1994
1995#if IS_ENABLED(CONFIG_SRCU)
1996/*
1997 * Kernel subsystems can register to be notified on any attempt to set
1998 * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
1999 * to close files that it may have cached when there is an attempt to set a
2000 * conflicting lease.
2001 */
2002static struct srcu_notifier_head lease_notifier_chain;
2003
2004static inline void
2005lease_notifier_chain_init(void)
2006{
2007 srcu_init_notifier_head(&lease_notifier_chain);
2008}
2009
2010static inline void
2011setlease_notifier(long arg, struct file_lock *lease)
2012{
2013 if (arg != F_UNLCK)
2014 srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
2015}
2016
2017int lease_register_notifier(struct notifier_block *nb)
2018{
2019 return srcu_notifier_chain_register(&lease_notifier_chain, nb);
2020}
2021EXPORT_SYMBOL_GPL(lease_register_notifier);
2022
2023void lease_unregister_notifier(struct notifier_block *nb)
2024{
2025 srcu_notifier_chain_unregister(&lease_notifier_chain, nb);
2026}
2027EXPORT_SYMBOL_GPL(lease_unregister_notifier);
2028
2029#else /* !IS_ENABLED(CONFIG_SRCU) */
2030static inline void
2031lease_notifier_chain_init(void)
2032{
2033}
2034
2035static inline void
2036setlease_notifier(long arg, struct file_lock *lease)
2037{
2038}
2039
2040int lease_register_notifier(struct notifier_block *nb)
2041{
2042 return 0;
2043}
2044EXPORT_SYMBOL_GPL(lease_register_notifier);
2045
2046void lease_unregister_notifier(struct notifier_block *nb)
2047{
2048}
2049EXPORT_SYMBOL_GPL(lease_unregister_notifier);
2050
2051#endif /* IS_ENABLED(CONFIG_SRCU) */
2052
1994/** 2053/**
1995 * vfs_setlease - sets a lease on an open file 2054 * vfs_setlease - sets a lease on an open file
1996 * @filp: file pointer 2055 * @filp: file pointer
@@ -2011,6 +2070,8 @@ EXPORT_SYMBOL(generic_setlease);
2011int 2070int
2012vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv) 2071vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
2013{ 2072{
2073 if (lease)
2074 setlease_notifier(arg, *lease);
2014 if (filp->f_op->setlease) 2075 if (filp->f_op->setlease)
2015 return filp->f_op->setlease(filp, arg, lease, priv); 2076 return filp->f_op->setlease(filp, arg, lease, priv);
2016 else 2077 else
@@ -2924,6 +2985,7 @@ static int __init filelock_init(void)
2924 INIT_HLIST_HEAD(&fll->hlist); 2985 INIT_HLIST_HEAD(&fll->hlist);
2925 } 2986 }
2926 2987
2988 lease_notifier_chain_init();
2927 return 0; 2989 return 0;
2928} 2990}
2929core_initcall(filelock_init); 2991core_initcall(filelock_init);
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index d25f6bbe7006..10cefb0c07c7 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -3,6 +3,7 @@ config NFSD
3 tristate "NFS server support" 3 tristate "NFS server support"
4 depends on INET 4 depends on INET
5 depends on FILE_LOCKING 5 depends on FILE_LOCKING
6 depends on FSNOTIFY
6 select LOCKD 7 select LOCKD
7 select SUNRPC 8 select SUNRPC
8 select EXPORTFS 9 select EXPORTFS
@@ -147,7 +148,7 @@ config NFSD_V4_SECURITY_LABEL
147 148
148config NFSD_FAULT_INJECTION 149config NFSD_FAULT_INJECTION
149 bool "NFS server manual fault injection" 150 bool "NFS server manual fault injection"
150 depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS 151 depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS && BROKEN
151 help 152 help
152 This option enables support for manually injecting faults 153 This option enables support for manually injecting faults
153 into the NFS server. This is intended to be used for 154 into the NFS server. This is intended to be used for
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 2bfb58eefad1..6a40b1afe703 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -11,7 +11,8 @@ obj-$(CONFIG_NFSD) += nfsd.o
11nfsd-y += trace.o 11nfsd-y += trace.o
12 12
13nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ 13nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
14 export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o 14 export.o auth.o lockd.o nfscache.o nfsxdr.o \
15 stats.o filecache.o
15nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o 16nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
16nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o 17nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
17nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o 18nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index 4cd7c69a6cb9..ba14d2f4b64f 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -39,14 +39,6 @@ struct nfs4_acl;
39struct svc_fh; 39struct svc_fh;
40struct svc_rqst; 40struct svc_rqst;
41 41
42/*
43 * Maximum ACL we'll accept from a client; chosen (somewhat
44 * arbitrarily) so that kmalloc'ing the ACL shouldn't require a
45 * high-order allocation. This allows 204 ACEs on x86_64:
46 */
47#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
48 / sizeof(struct nfs4_ace))
49
50int nfs4_acl_bytes(int entries); 42int nfs4_acl_bytes(int entries);
51int nfs4_acl_get_whotype(char *, u32); 43int nfs4_acl_get_whotype(char *, u32);
52__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); 44__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 66d4c55eb48e..9bbaa671c079 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -15,6 +15,7 @@
15 15
16#include "blocklayoutxdr.h" 16#include "blocklayoutxdr.h"
17#include "pnfs.h" 17#include "pnfs.h"
18#include "filecache.h"
18 19
19#define NFSDDBG_FACILITY NFSDDBG_PNFS 20#define NFSDDBG_FACILITY NFSDDBG_PNFS
20 21
@@ -404,7 +405,7 @@ static void
404nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls) 405nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
405{ 406{
406 struct nfs4_client *clp = ls->ls_stid.sc_client; 407 struct nfs4_client *clp = ls->ls_stid.sc_client;
407 struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev; 408 struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
408 409
409 bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY, 410 bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
410 nfsd4_scsi_pr_key(clp), 0, true); 411 nfsd4_scsi_pr_key(clp), 0, true);
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index baa01956a5b3..15422c951fd1 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -22,6 +22,7 @@
22#include "nfsfh.h" 22#include "nfsfh.h"
23#include "netns.h" 23#include "netns.h"
24#include "pnfs.h" 24#include "pnfs.h"
25#include "filecache.h"
25 26
26#define NFSDDBG_FACILITY NFSDDBG_EXPORT 27#define NFSDDBG_FACILITY NFSDDBG_EXPORT
27 28
@@ -232,6 +233,17 @@ static struct cache_head *expkey_alloc(void)
232 return NULL; 233 return NULL;
233} 234}
234 235
236static void expkey_flush(void)
237{
238 /*
239 * Take the nfsd_mutex here to ensure that the file cache is not
240 * destroyed while we're in the middle of flushing.
241 */
242 mutex_lock(&nfsd_mutex);
243 nfsd_file_cache_purge(current->nsproxy->net_ns);
244 mutex_unlock(&nfsd_mutex);
245}
246
235static const struct cache_detail svc_expkey_cache_template = { 247static const struct cache_detail svc_expkey_cache_template = {
236 .owner = THIS_MODULE, 248 .owner = THIS_MODULE,
237 .hash_size = EXPKEY_HASHMAX, 249 .hash_size = EXPKEY_HASHMAX,
@@ -244,6 +256,7 @@ static const struct cache_detail svc_expkey_cache_template = {
244 .init = expkey_init, 256 .init = expkey_init,
245 .update = expkey_update, 257 .update = expkey_update,
246 .alloc = expkey_alloc, 258 .alloc = expkey_alloc,
259 .flush = expkey_flush,
247}; 260};
248 261
249static int 262static int
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
new file mode 100644
index 000000000000..ef55e9b1cd4e
--- /dev/null
+++ b/fs/nfsd/filecache.c
@@ -0,0 +1,934 @@
1/*
2 * Open file cache.
3 *
4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
5 */
6
7#include <linux/hash.h>
8#include <linux/slab.h>
9#include <linux/file.h>
10#include <linux/sched.h>
11#include <linux/list_lru.h>
12#include <linux/fsnotify_backend.h>
13#include <linux/fsnotify.h>
14#include <linux/seq_file.h>
15
16#include "vfs.h"
17#include "nfsd.h"
18#include "nfsfh.h"
19#include "netns.h"
20#include "filecache.h"
21#include "trace.h"
22
23#define NFSDDBG_FACILITY NFSDDBG_FH
24
25/* FIXME: dynamically size this for the machine somehow? */
26#define NFSD_FILE_HASH_BITS 12
27#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS)
28#define NFSD_LAUNDRETTE_DELAY (2 * HZ)
29
30#define NFSD_FILE_LRU_RESCAN (0)
31#define NFSD_FILE_SHUTDOWN (1)
32#define NFSD_FILE_LRU_THRESHOLD (4096UL)
33#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2)
34
35/* We only care about NFSD_MAY_READ/WRITE for this cache */
36#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE)
37
38struct nfsd_fcache_bucket {
39 struct hlist_head nfb_head;
40 spinlock_t nfb_lock;
41 unsigned int nfb_count;
42 unsigned int nfb_maxcount;
43};
44
45static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
46
47static struct kmem_cache *nfsd_file_slab;
48static struct kmem_cache *nfsd_file_mark_slab;
49static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
50static struct list_lru nfsd_file_lru;
51static long nfsd_file_lru_flags;
52static struct fsnotify_group *nfsd_file_fsnotify_group;
53static atomic_long_t nfsd_filecache_count;
54static struct delayed_work nfsd_filecache_laundrette;
55
56enum nfsd_file_laundrette_ctl {
57 NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
58 NFSD_FILE_LAUNDRETTE_MAY_FLUSH
59};
60
61static void
62nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
63{
64 long count = atomic_long_read(&nfsd_filecache_count);
65
66 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
67 return;
68
69 /* Be more aggressive about scanning if over the threshold */
70 if (count > NFSD_FILE_LRU_THRESHOLD)
71 mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
72 else
73 schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
74
75 if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
76 return;
77
78 /* ...and don't delay flushing if we're out of control */
79 if (count >= NFSD_FILE_LRU_LIMIT)
80 flush_delayed_work(&nfsd_filecache_laundrette);
81}
82
83static void
84nfsd_file_slab_free(struct rcu_head *rcu)
85{
86 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
87
88 put_cred(nf->nf_cred);
89 kmem_cache_free(nfsd_file_slab, nf);
90}
91
92static void
93nfsd_file_mark_free(struct fsnotify_mark *mark)
94{
95 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
96 nfm_mark);
97
98 kmem_cache_free(nfsd_file_mark_slab, nfm);
99}
100
101static struct nfsd_file_mark *
102nfsd_file_mark_get(struct nfsd_file_mark *nfm)
103{
104 if (!atomic_inc_not_zero(&nfm->nfm_ref))
105 return NULL;
106 return nfm;
107}
108
109static void
110nfsd_file_mark_put(struct nfsd_file_mark *nfm)
111{
112 if (atomic_dec_and_test(&nfm->nfm_ref)) {
113
114 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
115 fsnotify_put_mark(&nfm->nfm_mark);
116 }
117}
118
119static struct nfsd_file_mark *
120nfsd_file_mark_find_or_create(struct nfsd_file *nf)
121{
122 int err;
123 struct fsnotify_mark *mark;
124 struct nfsd_file_mark *nfm = NULL, *new;
125 struct inode *inode = nf->nf_inode;
126
127 do {
128 mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
129 mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
130 nfsd_file_fsnotify_group);
131 if (mark) {
132 nfm = nfsd_file_mark_get(container_of(mark,
133 struct nfsd_file_mark,
134 nfm_mark));
135 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
136 fsnotify_put_mark(mark);
137 if (likely(nfm))
138 break;
139 } else
140 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
141
142 /* allocate a new nfm */
143 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
144 if (!new)
145 return NULL;
146 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
147 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
148 atomic_set(&new->nfm_ref, 1);
149
150 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
151
152 /*
153 * If the add was successful, then return the object.
154 * Otherwise, we need to put the reference we hold on the
155 * nfm_mark. The fsnotify code will take a reference and put
156 * it on failure, so we can't just free it directly. It's also
157 * not safe to call fsnotify_destroy_mark on it as the
158 * mark->group will be NULL. Thus, we can't let the nfm_ref
159 * counter drive the destruction at this point.
160 */
161 if (likely(!err))
162 nfm = new;
163 else
164 fsnotify_put_mark(&new->nfm_mark);
165 } while (unlikely(err == -EEXIST));
166
167 return nfm;
168}
169
170static struct nfsd_file *
171nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
172 struct net *net)
173{
174 struct nfsd_file *nf;
175
176 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
177 if (nf) {
178 INIT_HLIST_NODE(&nf->nf_node);
179 INIT_LIST_HEAD(&nf->nf_lru);
180 nf->nf_file = NULL;
181 nf->nf_cred = get_current_cred();
182 nf->nf_net = net;
183 nf->nf_flags = 0;
184 nf->nf_inode = inode;
185 nf->nf_hashval = hashval;
186 atomic_set(&nf->nf_ref, 1);
187 nf->nf_may = may & NFSD_FILE_MAY_MASK;
188 if (may & NFSD_MAY_NOT_BREAK_LEASE) {
189 if (may & NFSD_MAY_WRITE)
190 __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
191 if (may & NFSD_MAY_READ)
192 __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
193 }
194 nf->nf_mark = NULL;
195 trace_nfsd_file_alloc(nf);
196 }
197 return nf;
198}
199
200static bool
201nfsd_file_free(struct nfsd_file *nf)
202{
203 bool flush = false;
204
205 trace_nfsd_file_put_final(nf);
206 if (nf->nf_mark)
207 nfsd_file_mark_put(nf->nf_mark);
208 if (nf->nf_file) {
209 get_file(nf->nf_file);
210 filp_close(nf->nf_file, NULL);
211 fput(nf->nf_file);
212 flush = true;
213 }
214 call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
215 return flush;
216}
217
218static bool
219nfsd_file_check_writeback(struct nfsd_file *nf)
220{
221 struct file *file = nf->nf_file;
222 struct address_space *mapping;
223
224 if (!file || !(file->f_mode & FMODE_WRITE))
225 return false;
226 mapping = file->f_mapping;
227 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
228 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
229}
230
231static int
232nfsd_file_check_write_error(struct nfsd_file *nf)
233{
234 struct file *file = nf->nf_file;
235
236 if (!file || !(file->f_mode & FMODE_WRITE))
237 return 0;
238 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
239}
240
241static bool
242nfsd_file_in_use(struct nfsd_file *nf)
243{
244 return nfsd_file_check_writeback(nf) ||
245 nfsd_file_check_write_error(nf);
246}
247
248static void
249nfsd_file_do_unhash(struct nfsd_file *nf)
250{
251 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
252
253 trace_nfsd_file_unhash(nf);
254
255 if (nfsd_file_check_write_error(nf))
256 nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
257 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
258 hlist_del_rcu(&nf->nf_node);
259 if (!list_empty(&nf->nf_lru))
260 list_lru_del(&nfsd_file_lru, &nf->nf_lru);
261 atomic_long_dec(&nfsd_filecache_count);
262}
263
264static bool
265nfsd_file_unhash(struct nfsd_file *nf)
266{
267 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
268 nfsd_file_do_unhash(nf);
269 return true;
270 }
271 return false;
272}
273
274/*
275 * Return true if the file was unhashed.
276 */
277static bool
278nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
279{
280 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
281
282 trace_nfsd_file_unhash_and_release_locked(nf);
283 if (!nfsd_file_unhash(nf))
284 return false;
285 /* keep final reference for nfsd_file_lru_dispose */
286 if (atomic_add_unless(&nf->nf_ref, -1, 1))
287 return true;
288
289 list_add(&nf->nf_lru, dispose);
290 return true;
291}
292
293static int
294nfsd_file_put_noref(struct nfsd_file *nf)
295{
296 int count;
297 trace_nfsd_file_put(nf);
298
299 count = atomic_dec_return(&nf->nf_ref);
300 if (!count) {
301 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
302 nfsd_file_free(nf);
303 }
304 return count;
305}
306
307void
308nfsd_file_put(struct nfsd_file *nf)
309{
310 bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
311 bool unused = !nfsd_file_in_use(nf);
312
313 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
314 if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused)
315 nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
316}
317
318struct nfsd_file *
319nfsd_file_get(struct nfsd_file *nf)
320{
321 if (likely(atomic_inc_not_zero(&nf->nf_ref)))
322 return nf;
323 return NULL;
324}
325
326static void
327nfsd_file_dispose_list(struct list_head *dispose)
328{
329 struct nfsd_file *nf;
330
331 while(!list_empty(dispose)) {
332 nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
333 list_del(&nf->nf_lru);
334 nfsd_file_put_noref(nf);
335 }
336}
337
338static void
339nfsd_file_dispose_list_sync(struct list_head *dispose)
340{
341 bool flush = false;
342 struct nfsd_file *nf;
343
344 while(!list_empty(dispose)) {
345 nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
346 list_del(&nf->nf_lru);
347 if (!atomic_dec_and_test(&nf->nf_ref))
348 continue;
349 if (nfsd_file_free(nf))
350 flush = true;
351 }
352 if (flush)
353 flush_delayed_fput();
354}
355
356/*
357 * Note this can deadlock with nfsd_file_cache_purge.
358 */
359static enum lru_status
360nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
361 spinlock_t *lock, void *arg)
362 __releases(lock)
363 __acquires(lock)
364{
365 struct list_head *head = arg;
366 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
367
368 /*
369 * Do a lockless refcount check. The hashtable holds one reference, so
370 * we look to see if anything else has a reference, or if any have
371 * been put since the shrinker last ran. Those don't get unhashed and
372 * released.
373 *
374 * Note that in the put path, we set the flag and then decrement the
375 * counter. Here we check the counter and then test and clear the flag.
376 * That order is deliberate to ensure that we can do this locklessly.
377 */
378 if (atomic_read(&nf->nf_ref) > 1)
379 goto out_skip;
380
381 /*
382 * Don't throw out files that are still undergoing I/O or
383 * that have uncleared errors pending.
384 */
385 if (nfsd_file_check_writeback(nf))
386 goto out_skip;
387
388 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
389 goto out_rescan;
390
391 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
392 goto out_skip;
393
394 list_lru_isolate_move(lru, &nf->nf_lru, head);
395 return LRU_REMOVED;
396out_rescan:
397 set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags);
398out_skip:
399 return LRU_SKIP;
400}
401
402static void
403nfsd_file_lru_dispose(struct list_head *head)
404{
405 while(!list_empty(head)) {
406 struct nfsd_file *nf = list_first_entry(head,
407 struct nfsd_file, nf_lru);
408 list_del_init(&nf->nf_lru);
409 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
410 nfsd_file_do_unhash(nf);
411 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
412 nfsd_file_put_noref(nf);
413 }
414}
415
416static unsigned long
417nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
418{
419 return list_lru_count(&nfsd_file_lru);
420}
421
422static unsigned long
423nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
424{
425 LIST_HEAD(head);
426 unsigned long ret;
427
428 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
429 nfsd_file_lru_dispose(&head);
430 return ret;
431}
432
433static struct shrinker nfsd_file_shrinker = {
434 .scan_objects = nfsd_file_lru_scan,
435 .count_objects = nfsd_file_lru_count,
436 .seeks = 1,
437};
438
439static void
440__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
441 struct list_head *dispose)
442{
443 struct nfsd_file *nf;
444 struct hlist_node *tmp;
445
446 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
447 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
448 if (inode == nf->nf_inode)
449 nfsd_file_unhash_and_release_locked(nf, dispose);
450 }
451 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
452}
453
454/**
455 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
456 * @inode: inode of the file to attempt to remove
457 *
458 * Walk the whole hash bucket, looking for any files that correspond to "inode".
459 * If any do, then unhash them and put the hashtable reference to them and
460 * destroy any that had their last reference put. Also ensure that any of the
461 * fputs also have their final __fput done as well.
462 */
463void
464nfsd_file_close_inode_sync(struct inode *inode)
465{
466 unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
467 NFSD_FILE_HASH_BITS);
468 LIST_HEAD(dispose);
469
470 __nfsd_file_close_inode(inode, hashval, &dispose);
471 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
472 nfsd_file_dispose_list_sync(&dispose);
473}
474
475/**
476 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
477 * @inode: inode of the file to attempt to remove
478 *
479 * Walk the whole hash bucket, looking for any files that correspond to "inode".
480 * If any do, then unhash them and put the hashtable reference to them and
481 * destroy any that had their last reference put.
482 */
483static void
484nfsd_file_close_inode(struct inode *inode)
485{
486 unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
487 NFSD_FILE_HASH_BITS);
488 LIST_HEAD(dispose);
489
490 __nfsd_file_close_inode(inode, hashval, &dispose);
491 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
492 nfsd_file_dispose_list(&dispose);
493}
494
495/**
496 * nfsd_file_delayed_close - close unused nfsd_files
497 * @work: dummy
498 *
499 * Walk the LRU list and close any entries that have not been used since
500 * the last scan.
501 *
502 * Note this can deadlock with nfsd_file_cache_purge.
503 */
504static void
505nfsd_file_delayed_close(struct work_struct *work)
506{
507 LIST_HEAD(head);
508
509 list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
510
511 if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
512 nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
513
514 if (!list_empty(&head)) {
515 nfsd_file_lru_dispose(&head);
516 flush_delayed_fput();
517 }
518}
519
520static int
521nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
522 void *data)
523{
524 struct file_lock *fl = data;
525
526 /* Only close files for F_SETLEASE leases */
527 if (fl->fl_flags & FL_LEASE)
528 nfsd_file_close_inode_sync(file_inode(fl->fl_file));
529 return 0;
530}
531
532static struct notifier_block nfsd_file_lease_notifier = {
533 .notifier_call = nfsd_file_lease_notifier_call,
534};
535
536static int
537nfsd_file_fsnotify_handle_event(struct fsnotify_group *group,
538 struct inode *inode,
539 u32 mask, const void *data, int data_type,
540 const struct qstr *file_name, u32 cookie,
541 struct fsnotify_iter_info *iter_info)
542{
543 trace_nfsd_file_fsnotify_handle_event(inode, mask);
544
545 /* Should be no marks on non-regular files */
546 if (!S_ISREG(inode->i_mode)) {
547 WARN_ON_ONCE(1);
548 return 0;
549 }
550
551 /* don't close files if this was not the last link */
552 if (mask & FS_ATTRIB) {
553 if (inode->i_nlink)
554 return 0;
555 }
556
557 nfsd_file_close_inode(inode);
558 return 0;
559}
560
561
562static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
563 .handle_event = nfsd_file_fsnotify_handle_event,
564 .free_mark = nfsd_file_mark_free,
565};
566
567int
568nfsd_file_cache_init(void)
569{
570 int ret = -ENOMEM;
571 unsigned int i;
572
573 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
574
575 if (nfsd_file_hashtbl)
576 return 0;
577
578 nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
579 sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
580 if (!nfsd_file_hashtbl) {
581 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
582 goto out_err;
583 }
584
585 nfsd_file_slab = kmem_cache_create("nfsd_file",
586 sizeof(struct nfsd_file), 0, 0, NULL);
587 if (!nfsd_file_slab) {
588 pr_err("nfsd: unable to create nfsd_file_slab\n");
589 goto out_err;
590 }
591
592 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
593 sizeof(struct nfsd_file_mark), 0, 0, NULL);
594 if (!nfsd_file_mark_slab) {
595 pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
596 goto out_err;
597 }
598
599
600 ret = list_lru_init(&nfsd_file_lru);
601 if (ret) {
602 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
603 goto out_err;
604 }
605
606 ret = register_shrinker(&nfsd_file_shrinker);
607 if (ret) {
608 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
609 goto out_lru;
610 }
611
612 ret = lease_register_notifier(&nfsd_file_lease_notifier);
613 if (ret) {
614 pr_err("nfsd: unable to register lease notifier: %d\n", ret);
615 goto out_shrinker;
616 }
617
618 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
619 if (IS_ERR(nfsd_file_fsnotify_group)) {
620 pr_err("nfsd: unable to create fsnotify group: %ld\n",
621 PTR_ERR(nfsd_file_fsnotify_group));
622 nfsd_file_fsnotify_group = NULL;
623 goto out_notifier;
624 }
625
626 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
627 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
628 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
629 }
630
631 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
632out:
633 return ret;
634out_notifier:
635 lease_unregister_notifier(&nfsd_file_lease_notifier);
636out_shrinker:
637 unregister_shrinker(&nfsd_file_shrinker);
638out_lru:
639 list_lru_destroy(&nfsd_file_lru);
640out_err:
641 kmem_cache_destroy(nfsd_file_slab);
642 nfsd_file_slab = NULL;
643 kmem_cache_destroy(nfsd_file_mark_slab);
644 nfsd_file_mark_slab = NULL;
645 kfree(nfsd_file_hashtbl);
646 nfsd_file_hashtbl = NULL;
647 goto out;
648}
649
650/*
651 * Note this can deadlock with nfsd_file_lru_cb.
652 */
653void
654nfsd_file_cache_purge(struct net *net)
655{
656 unsigned int i;
657 struct nfsd_file *nf;
658 struct hlist_node *next;
659 LIST_HEAD(dispose);
660 bool del;
661
662 if (!nfsd_file_hashtbl)
663 return;
664
665 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
666 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
667
668 spin_lock(&nfb->nfb_lock);
669 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
670 if (net && nf->nf_net != net)
671 continue;
672 del = nfsd_file_unhash_and_release_locked(nf, &dispose);
673
674 /*
675 * Deadlock detected! Something marked this entry as
676 * unhased, but hasn't removed it from the hash list.
677 */
678 WARN_ON_ONCE(!del);
679 }
680 spin_unlock(&nfb->nfb_lock);
681 nfsd_file_dispose_list(&dispose);
682 }
683}
684
685void
686nfsd_file_cache_shutdown(void)
687{
688 LIST_HEAD(dispose);
689
690 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
691
692 lease_unregister_notifier(&nfsd_file_lease_notifier);
693 unregister_shrinker(&nfsd_file_shrinker);
694 /*
695 * make sure all callers of nfsd_file_lru_cb are done before
696 * calling nfsd_file_cache_purge
697 */
698 cancel_delayed_work_sync(&nfsd_filecache_laundrette);
699 nfsd_file_cache_purge(NULL);
700 list_lru_destroy(&nfsd_file_lru);
701 rcu_barrier();
702 fsnotify_put_group(nfsd_file_fsnotify_group);
703 nfsd_file_fsnotify_group = NULL;
704 kmem_cache_destroy(nfsd_file_slab);
705 nfsd_file_slab = NULL;
706 fsnotify_wait_marks_destroyed();
707 kmem_cache_destroy(nfsd_file_mark_slab);
708 nfsd_file_mark_slab = NULL;
709 kfree(nfsd_file_hashtbl);
710 nfsd_file_hashtbl = NULL;
711}
712
713static bool
714nfsd_match_cred(const struct cred *c1, const struct cred *c2)
715{
716 int i;
717
718 if (!uid_eq(c1->fsuid, c2->fsuid))
719 return false;
720 if (!gid_eq(c1->fsgid, c2->fsgid))
721 return false;
722 if (c1->group_info == NULL || c2->group_info == NULL)
723 return c1->group_info == c2->group_info;
724 if (c1->group_info->ngroups != c2->group_info->ngroups)
725 return false;
726 for (i = 0; i < c1->group_info->ngroups; i++) {
727 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
728 return false;
729 }
730 return true;
731}
732
733static struct nfsd_file *
734nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
735 unsigned int hashval, struct net *net)
736{
737 struct nfsd_file *nf;
738 unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
739
740 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
741 nf_node) {
742 if ((need & nf->nf_may) != need)
743 continue;
744 if (nf->nf_inode != inode)
745 continue;
746 if (nf->nf_net != net)
747 continue;
748 if (!nfsd_match_cred(nf->nf_cred, current_cred()))
749 continue;
750 if (nfsd_file_get(nf) != NULL)
751 return nf;
752 }
753 return NULL;
754}
755
756/**
757 * nfsd_file_is_cached - are there any cached open files for this fh?
758 * @inode: inode of the file to check
759 *
760 * Scan the hashtable for open files that match this fh. Returns true if there
761 * are any, and false if not.
762 */
763bool
764nfsd_file_is_cached(struct inode *inode)
765{
766 bool ret = false;
767 struct nfsd_file *nf;
768 unsigned int hashval;
769
770 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
771
772 rcu_read_lock();
773 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
774 nf_node) {
775 if (inode == nf->nf_inode) {
776 ret = true;
777 break;
778 }
779 }
780 rcu_read_unlock();
781 trace_nfsd_file_is_cached(inode, hashval, (int)ret);
782 return ret;
783}
784
785__be32
786nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
787 unsigned int may_flags, struct nfsd_file **pnf)
788{
789 __be32 status;
790 struct net *net = SVC_NET(rqstp);
791 struct nfsd_file *nf, *new;
792 struct inode *inode;
793 unsigned int hashval;
794
795 /* FIXME: skip this if fh_dentry is already set? */
796 status = fh_verify(rqstp, fhp, S_IFREG,
797 may_flags|NFSD_MAY_OWNER_OVERRIDE);
798 if (status != nfs_ok)
799 return status;
800
801 inode = d_inode(fhp->fh_dentry);
802 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
803retry:
804 rcu_read_lock();
805 nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
806 rcu_read_unlock();
807 if (nf)
808 goto wait_for_construction;
809
810 new = nfsd_file_alloc(inode, may_flags, hashval, net);
811 if (!new) {
812 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
813 NULL, nfserr_jukebox);
814 return nfserr_jukebox;
815 }
816
817 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
818 nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
819 if (nf == NULL)
820 goto open_file;
821 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
822 nfsd_file_slab_free(&new->nf_rcu);
823
824wait_for_construction:
825 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
826
827 /* Did construction of this file fail? */
828 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
829 nfsd_file_put_noref(nf);
830 goto retry;
831 }
832
833 this_cpu_inc(nfsd_file_cache_hits);
834
835 if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
836 bool write = (may_flags & NFSD_MAY_WRITE);
837
838 if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
839 (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
840 status = nfserrno(nfsd_open_break_lease(
841 file_inode(nf->nf_file), may_flags));
842 if (status == nfs_ok) {
843 clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
844 if (write)
845 clear_bit(NFSD_FILE_BREAK_WRITE,
846 &nf->nf_flags);
847 }
848 }
849 }
850out:
851 if (status == nfs_ok) {
852 *pnf = nf;
853 } else {
854 nfsd_file_put(nf);
855 nf = NULL;
856 }
857
858 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
859 return status;
860open_file:
861 nf = new;
862 /* Take reference for the hashtable */
863 atomic_inc(&nf->nf_ref);
864 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
865 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
866 list_lru_add(&nfsd_file_lru, &nf->nf_lru);
867 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
868 ++nfsd_file_hashtbl[hashval].nfb_count;
869 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
870 nfsd_file_hashtbl[hashval].nfb_count);
871 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
872 atomic_long_inc(&nfsd_filecache_count);
873
874 nf->nf_mark = nfsd_file_mark_find_or_create(nf);
875 if (nf->nf_mark)
876 status = nfsd_open_verified(rqstp, fhp, S_IFREG,
877 may_flags, &nf->nf_file);
878 else
879 status = nfserr_jukebox;
880 /*
881 * If construction failed, or we raced with a call to unlink()
882 * then unhash.
883 */
884 if (status != nfs_ok || inode->i_nlink == 0) {
885 bool do_free;
886 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
887 do_free = nfsd_file_unhash(nf);
888 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
889 if (do_free)
890 nfsd_file_put_noref(nf);
891 }
892 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
893 smp_mb__after_atomic();
894 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
895 goto out;
896}
897
898/*
899 * Note that fields may be added, removed or reordered in the future. Programs
900 * scraping this file for info should test the labels to ensure they're
901 * getting the correct field.
902 */
903static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
904{
905 unsigned int i, count = 0, longest = 0;
906 unsigned long hits = 0;
907
908 /*
909 * No need for spinlocks here since we're not terribly interested in
910 * accuracy. We do take the nfsd_mutex simply to ensure that we
911 * don't end up racing with server shutdown
912 */
913 mutex_lock(&nfsd_mutex);
914 if (nfsd_file_hashtbl) {
915 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
916 count += nfsd_file_hashtbl[i].nfb_count;
917 longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
918 }
919 }
920 mutex_unlock(&nfsd_mutex);
921
922 for_each_possible_cpu(i)
923 hits += per_cpu(nfsd_file_cache_hits, i);
924
925 seq_printf(m, "total entries: %u\n", count);
926 seq_printf(m, "longest chain: %u\n", longest);
927 seq_printf(m, "cache hits: %lu\n", hits);
928 return 0;
929}
930
931int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
932{
933 return single_open(file, nfsd_file_cache_stats_show, NULL);
934}
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
new file mode 100644
index 000000000000..851d9abf54c2
--- /dev/null
+++ b/fs/nfsd/filecache.h
@@ -0,0 +1,61 @@
1#ifndef _FS_NFSD_FILECACHE_H
2#define _FS_NFSD_FILECACHE_H
3
4#include <linux/fsnotify_backend.h>
5
6/*
7 * This is the fsnotify_mark container that nfsd attaches to the files that it
8 * is holding open. Note that we have a separate refcount here aside from the
9 * one in the fsnotify_mark. We only want a single fsnotify_mark attached to
10 * the inode, and for each nfsd_file to hold a reference to it.
11 *
12 * The fsnotify_mark is itself refcounted, but that's not sufficient to tell us
13 * how to put that reference. If there are still outstanding nfsd_files that
14 * reference the mark, then we would want to call fsnotify_put_mark on it.
15 * If there were not, then we'd need to call fsnotify_destroy_mark. Since we
16 * can't really tell the difference, we use the nfm_mark to keep track of how
17 * many nfsd_files hold references to the mark. When that counter goes to zero
18 * then we know to call fsnotify_destroy_mark on it.
19 */
20struct nfsd_file_mark {
21 struct fsnotify_mark nfm_mark;
22 atomic_t nfm_ref;
23};
24
25/*
26 * A representation of a file that has been opened by knfsd. These are hashed
27 * in the hashtable by inode pointer value. Note that this object doesn't
28 * hold a reference to the inode by itself, so the nf_inode pointer should
29 * never be dereferenced, only used for comparison.
30 */
31struct nfsd_file {
32 struct hlist_node nf_node;
33 struct list_head nf_lru;
34 struct rcu_head nf_rcu;
35 struct file *nf_file;
36 const struct cred *nf_cred;
37 struct net *nf_net;
38#define NFSD_FILE_HASHED (0)
39#define NFSD_FILE_PENDING (1)
40#define NFSD_FILE_BREAK_READ (2)
41#define NFSD_FILE_BREAK_WRITE (3)
42#define NFSD_FILE_REFERENCED (4)
43 unsigned long nf_flags;
44 struct inode *nf_inode;
45 unsigned int nf_hashval;
46 atomic_t nf_ref;
47 unsigned char nf_may;
48 struct nfsd_file_mark *nf_mark;
49};
50
51int nfsd_file_cache_init(void);
52void nfsd_file_cache_purge(struct net *);
53void nfsd_file_cache_shutdown(void);
54void nfsd_file_put(struct nfsd_file *nf);
55struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
56void nfsd_file_close_inode_sync(struct inode *inode);
57bool nfsd_file_is_cached(struct inode *inode);
58__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
59 unsigned int may_flags, struct nfsd_file **nfp);
60int nfsd_file_cache_stats_open(struct inode *, struct file *);
61#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index bdfe5bcb3dcd..9a4ef815fb8c 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -104,6 +104,7 @@ struct nfsd_net {
104 104
105 /* Time of server startup */ 105 /* Time of server startup */
106 struct timespec64 nfssvc_boot; 106 struct timespec64 nfssvc_boot;
107 seqlock_t boot_lock;
107 108
108 /* 109 /*
109 * Max number of connections this nfsd container will allow. Defaults 110 * Max number of connections this nfsd container will allow. Defaults
@@ -179,4 +180,7 @@ struct nfsd_net {
179extern void nfsd_netns_free_versions(struct nfsd_net *nn); 180extern void nfsd_netns_free_versions(struct nfsd_net *nn);
180 181
181extern unsigned int nfsd_net_id; 182extern unsigned int nfsd_net_id;
183
184void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
185void nfsd_reset_boot_verifier(struct nfsd_net *nn);
182#endif /* __NFSD_NETNS_H__ */ 186#endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 9bc32af4e2da..cea68d8411ac 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -172,13 +172,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
172 nfserr = nfsd_read(rqstp, &resp->fh, 172 nfserr = nfsd_read(rqstp, &resp->fh,
173 argp->offset, 173 argp->offset,
174 rqstp->rq_vec, argp->vlen, 174 rqstp->rq_vec, argp->vlen,
175 &resp->count); 175 &resp->count,
176 if (nfserr == 0) { 176 &resp->eof);
177 struct inode *inode = d_inode(resp->fh.fh_dentry);
178 resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
179 inode->i_size);
180 }
181
182 RETURN_STATUS(nfserr); 177 RETURN_STATUS(nfserr);
183} 178}
184 179
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index fcf31822c74c..86e5658651f1 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -27,6 +27,7 @@ static u32 nfs3_ftypes[] = {
27 NF3SOCK, NF3BAD, NF3LNK, NF3BAD, 27 NF3SOCK, NF3BAD, NF3LNK, NF3BAD,
28}; 28};
29 29
30
30/* 31/*
31 * XDR functions for basic NFS types 32 * XDR functions for basic NFS types
32 */ 33 */
@@ -751,14 +752,16 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
751{ 752{
752 struct nfsd3_writeres *resp = rqstp->rq_resp; 753 struct nfsd3_writeres *resp = rqstp->rq_resp;
753 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 754 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
755 __be32 verf[2];
754 756
755 p = encode_wcc_data(rqstp, p, &resp->fh); 757 p = encode_wcc_data(rqstp, p, &resp->fh);
756 if (resp->status == 0) { 758 if (resp->status == 0) {
757 *p++ = htonl(resp->count); 759 *p++ = htonl(resp->count);
758 *p++ = htonl(resp->committed); 760 *p++ = htonl(resp->committed);
759 /* unique identifier, y2038 overflow can be ignored */ 761 /* unique identifier, y2038 overflow can be ignored */
760 *p++ = htonl((u32)nn->nfssvc_boot.tv_sec); 762 nfsd_copy_boot_verifier(verf, nn);
761 *p++ = htonl(nn->nfssvc_boot.tv_nsec); 763 *p++ = verf[0];
764 *p++ = verf[1];
762 } 765 }
763 return xdr_ressize_check(rqstp, p); 766 return xdr_ressize_check(rqstp, p);
764} 767}
@@ -1125,13 +1128,15 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
1125{ 1128{
1126 struct nfsd3_commitres *resp = rqstp->rq_resp; 1129 struct nfsd3_commitres *resp = rqstp->rq_resp;
1127 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 1130 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
1131 __be32 verf[2];
1128 1132
1129 p = encode_wcc_data(rqstp, p, &resp->fh); 1133 p = encode_wcc_data(rqstp, p, &resp->fh);
1130 /* Write verifier */ 1134 /* Write verifier */
1131 if (resp->status == 0) { 1135 if (resp->status == 0) {
1132 /* unique identifier, y2038 overflow can be ignored */ 1136 /* unique identifier, y2038 overflow can be ignored */
1133 *p++ = htonl((u32)nn->nfssvc_boot.tv_sec); 1137 nfsd_copy_boot_verifier(verf, nn);
1134 *p++ = htonl(nn->nfssvc_boot.tv_nsec); 1138 *p++ = verf[0];
1139 *p++ = verf[1];
1135 } 1140 }
1136 return xdr_ressize_check(rqstp, p); 1141 return xdr_ressize_check(rqstp, p);
1137} 1142}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 397eb7820929..524111420b48 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -512,11 +512,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
512 if (unlikely(status)) 512 if (unlikely(status))
513 return status; 513 return status;
514 514
515 if (cb != NULL) { 515 status = decode_cb_sequence4res(xdr, cb);
516 status = decode_cb_sequence4res(xdr, cb); 516 if (unlikely(status || cb->cb_seq_status))
517 if (unlikely(status || cb->cb_seq_status)) 517 return status;
518 return status;
519 }
520 518
521 return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status); 519 return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
522} 520}
@@ -604,11 +602,10 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
604 if (unlikely(status)) 602 if (unlikely(status))
605 return status; 603 return status;
606 604
607 if (cb) { 605 status = decode_cb_sequence4res(xdr, cb);
608 status = decode_cb_sequence4res(xdr, cb); 606 if (unlikely(status || cb->cb_seq_status))
609 if (unlikely(status || cb->cb_seq_status)) 607 return status;
610 return status; 608
611 }
612 return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status); 609 return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
613} 610}
614#endif /* CONFIG_NFSD_PNFS */ 611#endif /* CONFIG_NFSD_PNFS */
@@ -663,11 +660,10 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
663 if (unlikely(status)) 660 if (unlikely(status))
664 return status; 661 return status;
665 662
666 if (cb) { 663 status = decode_cb_sequence4res(xdr, cb);
667 status = decode_cb_sequence4res(xdr, cb); 664 if (unlikely(status || cb->cb_seq_status))
668 if (unlikely(status || cb->cb_seq_status)) 665 return status;
669 return status; 666
670 }
671 return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status); 667 return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
672} 668}
673 669
@@ -759,11 +755,10 @@ static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
759 if (unlikely(status)) 755 if (unlikely(status))
760 return status; 756 return status;
761 757
762 if (cb) { 758 status = decode_cb_sequence4res(xdr, cb);
763 status = decode_cb_sequence4res(xdr, cb); 759 if (unlikely(status || cb->cb_seq_status))
764 if (unlikely(status || cb->cb_seq_status)) 760 return status;
765 return status; 761
766 }
767 return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status); 762 return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
768} 763}
769/* 764/*
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index a79e24b79095..2681c70283ce 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -169,8 +169,8 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
169 spin_unlock(&fp->fi_lock); 169 spin_unlock(&fp->fi_lock);
170 170
171 if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls) 171 if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
172 vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls); 172 vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
173 fput(ls->ls_file); 173 nfsd_file_put(ls->ls_file);
174 174
175 if (ls->ls_recalled) 175 if (ls->ls_recalled)
176 atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls); 176 atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@@ -197,7 +197,7 @@ nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
197 fl->fl_end = OFFSET_MAX; 197 fl->fl_end = OFFSET_MAX;
198 fl->fl_owner = ls; 198 fl->fl_owner = ls;
199 fl->fl_pid = current->tgid; 199 fl->fl_pid = current->tgid;
200 fl->fl_file = ls->ls_file; 200 fl->fl_file = ls->ls_file->nf_file;
201 201
202 status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL); 202 status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL);
203 if (status) { 203 if (status) {
@@ -236,13 +236,13 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
236 NFSPROC4_CLNT_CB_LAYOUT); 236 NFSPROC4_CLNT_CB_LAYOUT);
237 237
238 if (parent->sc_type == NFS4_DELEG_STID) 238 if (parent->sc_type == NFS4_DELEG_STID)
239 ls->ls_file = get_file(fp->fi_deleg_file); 239 ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
240 else 240 else
241 ls->ls_file = find_any_file(fp); 241 ls->ls_file = find_any_file(fp);
242 BUG_ON(!ls->ls_file); 242 BUG_ON(!ls->ls_file);
243 243
244 if (nfsd4_layout_setlease(ls)) { 244 if (nfsd4_layout_setlease(ls)) {
245 fput(ls->ls_file); 245 nfsd_file_put(ls->ls_file);
246 put_nfs4_file(fp); 246 put_nfs4_file(fp);
247 kmem_cache_free(nfs4_layout_stateid_cache, ls); 247 kmem_cache_free(nfs4_layout_stateid_cache, ls);
248 return NULL; 248 return NULL;
@@ -626,7 +626,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
626 626
627 argv[0] = (char *)nfsd_recall_failed; 627 argv[0] = (char *)nfsd_recall_failed;
628 argv[1] = addr_str; 628 argv[1] = addr_str;
629 argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id; 629 argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
630 argv[3] = NULL; 630 argv[3] = NULL;
631 631
632 error = call_usermodehelper(nfsd_recall_failed, argv, envp, 632 error = call_usermodehelper(nfsd_recall_failed, argv, envp,
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8beda999e134..4e3e77b76411 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -568,17 +568,11 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
568 568
569static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) 569static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
570{ 570{
571 __be32 verf[2]; 571 __be32 *verf = (__be32 *)verifier->data;
572 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
573 572
574 /* 573 BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
575 * This is opaque to client, so no need to byte-swap. Use 574
576 * __force to keep sparse happy. y2038 time_t overflow is 575 nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
577 * irrelevant in this usage.
578 */
579 verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
580 verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
581 memcpy(verifier->data, verf, sizeof(verifier->data));
582} 576}
583 577
584static __be32 578static __be32
@@ -761,7 +755,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
761 struct nfsd4_read *read = &u->read; 755 struct nfsd4_read *read = &u->read;
762 __be32 status; 756 __be32 status;
763 757
764 read->rd_filp = NULL; 758 read->rd_nf = NULL;
765 if (read->rd_offset >= OFFSET_MAX) 759 if (read->rd_offset >= OFFSET_MAX)
766 return nfserr_inval; 760 return nfserr_inval;
767 761
@@ -782,7 +776,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
782 /* check stateid */ 776 /* check stateid */
783 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, 777 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
784 &read->rd_stateid, RD_STATE, 778 &read->rd_stateid, RD_STATE,
785 &read->rd_filp, &read->rd_tmp_file); 779 &read->rd_nf);
786 if (status) { 780 if (status) {
787 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); 781 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
788 goto out; 782 goto out;
@@ -798,8 +792,8 @@ out:
798static void 792static void
799nfsd4_read_release(union nfsd4_op_u *u) 793nfsd4_read_release(union nfsd4_op_u *u)
800{ 794{
801 if (u->read.rd_filp) 795 if (u->read.rd_nf)
802 fput(u->read.rd_filp); 796 nfsd_file_put(u->read.rd_nf);
803 trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, 797 trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
804 u->read.rd_offset, u->read.rd_length); 798 u->read.rd_offset, u->read.rd_length);
805} 799}
@@ -954,7 +948,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
954 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { 948 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
955 status = nfs4_preprocess_stateid_op(rqstp, cstate, 949 status = nfs4_preprocess_stateid_op(rqstp, cstate,
956 &cstate->current_fh, &setattr->sa_stateid, 950 &cstate->current_fh, &setattr->sa_stateid,
957 WR_STATE, NULL, NULL); 951 WR_STATE, NULL);
958 if (status) { 952 if (status) {
959 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); 953 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
960 return status; 954 return status;
@@ -993,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
993{ 987{
994 struct nfsd4_write *write = &u->write; 988 struct nfsd4_write *write = &u->write;
995 stateid_t *stateid = &write->wr_stateid; 989 stateid_t *stateid = &write->wr_stateid;
996 struct file *filp = NULL; 990 struct nfsd_file *nf = NULL;
997 __be32 status = nfs_ok; 991 __be32 status = nfs_ok;
998 unsigned long cnt; 992 unsigned long cnt;
999 int nvecs; 993 int nvecs;
@@ -1005,7 +999,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1005 trace_nfsd_write_start(rqstp, &cstate->current_fh, 999 trace_nfsd_write_start(rqstp, &cstate->current_fh,
1006 write->wr_offset, cnt); 1000 write->wr_offset, cnt);
1007 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, 1001 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
1008 stateid, WR_STATE, &filp, NULL); 1002 stateid, WR_STATE, &nf);
1009 if (status) { 1003 if (status) {
1010 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); 1004 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
1011 return status; 1005 return status;
@@ -1018,10 +1012,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1018 &write->wr_head, write->wr_buflen); 1012 &write->wr_head, write->wr_buflen);
1019 WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); 1013 WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
1020 1014
1021 status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp, 1015 status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf->nf_file,
1022 write->wr_offset, rqstp->rq_vec, nvecs, &cnt, 1016 write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
1023 write->wr_how_written); 1017 write->wr_how_written);
1024 fput(filp); 1018 nfsd_file_put(nf);
1025 1019
1026 write->wr_bytes_written = cnt; 1020 write->wr_bytes_written = cnt;
1027 trace_nfsd_write_done(rqstp, &cstate->current_fh, 1021 trace_nfsd_write_done(rqstp, &cstate->current_fh,
@@ -1031,8 +1025,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1031 1025
1032static __be32 1026static __be32
1033nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1027nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1034 stateid_t *src_stateid, struct file **src, 1028 stateid_t *src_stateid, struct nfsd_file **src,
1035 stateid_t *dst_stateid, struct file **dst) 1029 stateid_t *dst_stateid, struct nfsd_file **dst)
1036{ 1030{
1037 __be32 status; 1031 __be32 status;
1038 1032
@@ -1040,22 +1034,22 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1040 return nfserr_nofilehandle; 1034 return nfserr_nofilehandle;
1041 1035
1042 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, 1036 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
1043 src_stateid, RD_STATE, src, NULL); 1037 src_stateid, RD_STATE, src);
1044 if (status) { 1038 if (status) {
1045 dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); 1039 dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
1046 goto out; 1040 goto out;
1047 } 1041 }
1048 1042
1049 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, 1043 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
1050 dst_stateid, WR_STATE, dst, NULL); 1044 dst_stateid, WR_STATE, dst);
1051 if (status) { 1045 if (status) {
1052 dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); 1046 dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
1053 goto out_put_src; 1047 goto out_put_src;
1054 } 1048 }
1055 1049
1056 /* fix up for NFS-specific error code */ 1050 /* fix up for NFS-specific error code */
1057 if (!S_ISREG(file_inode(*src)->i_mode) || 1051 if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) ||
1058 !S_ISREG(file_inode(*dst)->i_mode)) { 1052 !S_ISREG(file_inode((*dst)->nf_file)->i_mode)) {
1059 status = nfserr_wrong_type; 1053 status = nfserr_wrong_type;
1060 goto out_put_dst; 1054 goto out_put_dst;
1061 } 1055 }
@@ -1063,9 +1057,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1063out: 1057out:
1064 return status; 1058 return status;
1065out_put_dst: 1059out_put_dst:
1066 fput(*dst); 1060 nfsd_file_put(*dst);
1067out_put_src: 1061out_put_src:
1068 fput(*src); 1062 nfsd_file_put(*src);
1069 goto out; 1063 goto out;
1070} 1064}
1071 1065
@@ -1074,7 +1068,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1074 union nfsd4_op_u *u) 1068 union nfsd4_op_u *u)
1075{ 1069{
1076 struct nfsd4_clone *clone = &u->clone; 1070 struct nfsd4_clone *clone = &u->clone;
1077 struct file *src, *dst; 1071 struct nfsd_file *src, *dst;
1078 __be32 status; 1072 __be32 status;
1079 1073
1080 status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src, 1074 status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
@@ -1082,11 +1076,11 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1082 if (status) 1076 if (status)
1083 goto out; 1077 goto out;
1084 1078
1085 status = nfsd4_clone_file_range(src, clone->cl_src_pos, 1079 status = nfsd4_clone_file_range(src->nf_file, clone->cl_src_pos,
1086 dst, clone->cl_dst_pos, clone->cl_count); 1080 dst->nf_file, clone->cl_dst_pos, clone->cl_count);
1087 1081
1088 fput(dst); 1082 nfsd_file_put(dst);
1089 fput(src); 1083 nfsd_file_put(src);
1090out: 1084out:
1091 return status; 1085 return status;
1092} 1086}
@@ -1176,8 +1170,9 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
1176 do { 1170 do {
1177 if (kthread_should_stop()) 1171 if (kthread_should_stop())
1178 break; 1172 break;
1179 bytes_copied = nfsd_copy_file_range(copy->file_src, src_pos, 1173 bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
1180 copy->file_dst, dst_pos, bytes_total); 1174 src_pos, copy->nf_dst->nf_file, dst_pos,
1175 bytes_total);
1181 if (bytes_copied <= 0) 1176 if (bytes_copied <= 0)
1182 break; 1177 break;
1183 bytes_total -= bytes_copied; 1178 bytes_total -= bytes_copied;
@@ -1204,8 +1199,8 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
1204 status = nfs_ok; 1199 status = nfs_ok;
1205 } 1200 }
1206 1201
1207 fput(copy->file_src); 1202 nfsd_file_put(copy->nf_src);
1208 fput(copy->file_dst); 1203 nfsd_file_put(copy->nf_dst);
1209 return status; 1204 return status;
1210} 1205}
1211 1206
@@ -1218,16 +1213,16 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
1218 memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res)); 1213 memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res));
1219 memcpy(&dst->fh, &src->fh, sizeof(src->fh)); 1214 memcpy(&dst->fh, &src->fh, sizeof(src->fh));
1220 dst->cp_clp = src->cp_clp; 1215 dst->cp_clp = src->cp_clp;
1221 dst->file_dst = get_file(src->file_dst); 1216 dst->nf_dst = nfsd_file_get(src->nf_dst);
1222 dst->file_src = get_file(src->file_src); 1217 dst->nf_src = nfsd_file_get(src->nf_src);
1223 memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); 1218 memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
1224} 1219}
1225 1220
1226static void cleanup_async_copy(struct nfsd4_copy *copy) 1221static void cleanup_async_copy(struct nfsd4_copy *copy)
1227{ 1222{
1228 nfs4_free_cp_state(copy); 1223 nfs4_free_cp_state(copy);
1229 fput(copy->file_dst); 1224 nfsd_file_put(copy->nf_dst);
1230 fput(copy->file_src); 1225 nfsd_file_put(copy->nf_src);
1231 spin_lock(&copy->cp_clp->async_lock); 1226 spin_lock(&copy->cp_clp->async_lock);
1232 list_del(&copy->copies); 1227 list_del(&copy->copies);
1233 spin_unlock(&copy->cp_clp->async_lock); 1228 spin_unlock(&copy->cp_clp->async_lock);
@@ -1264,8 +1259,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1264 struct nfsd4_copy *async_copy = NULL; 1259 struct nfsd4_copy *async_copy = NULL;
1265 1260
1266 status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid, 1261 status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
1267 &copy->file_src, &copy->cp_dst_stateid, 1262 &copy->nf_src, &copy->cp_dst_stateid,
1268 &copy->file_dst); 1263 &copy->nf_dst);
1269 if (status) 1264 if (status)
1270 goto out; 1265 goto out;
1271 1266
@@ -1347,21 +1342,21 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1347 struct nfsd4_fallocate *fallocate, int flags) 1342 struct nfsd4_fallocate *fallocate, int flags)
1348{ 1343{
1349 __be32 status; 1344 __be32 status;
1350 struct file *file; 1345 struct nfsd_file *nf;
1351 1346
1352 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, 1347 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
1353 &fallocate->falloc_stateid, 1348 &fallocate->falloc_stateid,
1354 WR_STATE, &file, NULL); 1349 WR_STATE, &nf);
1355 if (status != nfs_ok) { 1350 if (status != nfs_ok) {
1356 dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); 1351 dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
1357 return status; 1352 return status;
1358 } 1353 }
1359 1354
1360 status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file, 1355 status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
1361 fallocate->falloc_offset, 1356 fallocate->falloc_offset,
1362 fallocate->falloc_length, 1357 fallocate->falloc_length,
1363 flags); 1358 flags);
1364 fput(file); 1359 nfsd_file_put(nf);
1365 return status; 1360 return status;
1366} 1361}
1367static __be32 1362static __be32
@@ -1406,11 +1401,11 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1406 struct nfsd4_seek *seek = &u->seek; 1401 struct nfsd4_seek *seek = &u->seek;
1407 int whence; 1402 int whence;
1408 __be32 status; 1403 __be32 status;
1409 struct file *file; 1404 struct nfsd_file *nf;
1410 1405
1411 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, 1406 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
1412 &seek->seek_stateid, 1407 &seek->seek_stateid,
1413 RD_STATE, &file, NULL); 1408 RD_STATE, &nf);
1414 if (status) { 1409 if (status) {
1415 dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); 1410 dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
1416 return status; 1411 return status;
@@ -1432,14 +1427,14 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1432 * Note: This call does change file->f_pos, but nothing in NFSD 1427 * Note: This call does change file->f_pos, but nothing in NFSD
1433 * should ever file->f_pos. 1428 * should ever file->f_pos.
1434 */ 1429 */
1435 seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence); 1430 seek->seek_pos = vfs_llseek(nf->nf_file, seek->seek_offset, whence);
1436 if (seek->seek_pos < 0) 1431 if (seek->seek_pos < 0)
1437 status = nfserrno(seek->seek_pos); 1432 status = nfserrno(seek->seek_pos);
1438 else if (seek->seek_pos >= i_size_read(file_inode(file))) 1433 else if (seek->seek_pos >= i_size_read(file_inode(nf->nf_file)))
1439 seek->seek_eof = true; 1434 seek->seek_eof = true;
1440 1435
1441out: 1436out:
1442 fput(file); 1437 nfsd_file_put(nf);
1443 return status; 1438 return status;
1444} 1439}
1445 1440
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 87679557d0d6..cdc75ad4438b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -59,8 +59,13 @@ struct nfsd4_client_tracking_ops {
59 void (*remove)(struct nfs4_client *); 59 void (*remove)(struct nfs4_client *);
60 int (*check)(struct nfs4_client *); 60 int (*check)(struct nfs4_client *);
61 void (*grace_done)(struct nfsd_net *); 61 void (*grace_done)(struct nfsd_net *);
62 uint8_t version;
63 size_t msglen;
62}; 64};
63 65
66static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops;
67static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2;
68
64/* Globals */ 69/* Globals */
65static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; 70static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
66 71
@@ -173,6 +178,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
173 const char *dname, int len, struct nfsd_net *nn) 178 const char *dname, int len, struct nfsd_net *nn)
174{ 179{
175 struct xdr_netobj name; 180 struct xdr_netobj name;
181 struct xdr_netobj princhash = { .len = 0, .data = NULL };
176 struct nfs4_client_reclaim *crp; 182 struct nfs4_client_reclaim *crp;
177 183
178 name.data = kmemdup(dname, len, GFP_KERNEL); 184 name.data = kmemdup(dname, len, GFP_KERNEL);
@@ -182,7 +188,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
182 return; 188 return;
183 } 189 }
184 name.len = len; 190 name.len = len;
185 crp = nfs4_client_to_reclaim(name, nn); 191 crp = nfs4_client_to_reclaim(name, princhash, nn);
186 if (!crp) { 192 if (!crp) {
187 kfree(name.data); 193 kfree(name.data);
188 return; 194 return;
@@ -482,6 +488,7 @@ static int
482load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) 488load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
483{ 489{
484 struct xdr_netobj name; 490 struct xdr_netobj name;
491 struct xdr_netobj princhash = { .len = 0, .data = NULL };
485 492
486 if (child->d_name.len != HEXDIR_LEN - 1) { 493 if (child->d_name.len != HEXDIR_LEN - 1) {
487 printk("%s: illegal name %pd in recovery directory\n", 494 printk("%s: illegal name %pd in recovery directory\n",
@@ -496,7 +503,7 @@ load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
496 goto out; 503 goto out;
497 } 504 }
498 name.len = HEXDIR_LEN; 505 name.len = HEXDIR_LEN;
499 if (!nfs4_client_to_reclaim(name, nn)) 506 if (!nfs4_client_to_reclaim(name, princhash, nn))
500 kfree(name.data); 507 kfree(name.data);
501out: 508out:
502 return 0; 509 return 0;
@@ -718,6 +725,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
718 .remove = nfsd4_remove_clid_dir, 725 .remove = nfsd4_remove_clid_dir,
719 .check = nfsd4_check_legacy_client, 726 .check = nfsd4_check_legacy_client,
720 .grace_done = nfsd4_recdir_purge_old, 727 .grace_done = nfsd4_recdir_purge_old,
728 .version = 1,
729 .msglen = 0,
721}; 730};
722 731
723/* Globals */ 732/* Globals */
@@ -731,25 +740,32 @@ struct cld_net {
731 struct list_head cn_list; 740 struct list_head cn_list;
732 unsigned int cn_xid; 741 unsigned int cn_xid;
733 bool cn_has_legacy; 742 bool cn_has_legacy;
743 struct crypto_shash *cn_tfm;
734}; 744};
735 745
736struct cld_upcall { 746struct cld_upcall {
737 struct list_head cu_list; 747 struct list_head cu_list;
738 struct cld_net *cu_net; 748 struct cld_net *cu_net;
739 struct completion cu_done; 749 struct completion cu_done;
740 struct cld_msg cu_msg; 750 union {
751 struct cld_msg_hdr cu_hdr;
752 struct cld_msg cu_msg;
753 struct cld_msg_v2 cu_msg_v2;
754 } cu_u;
741}; 755};
742 756
743static int 757static int
744__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) 758__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
745{ 759{
746 int ret; 760 int ret;
747 struct rpc_pipe_msg msg; 761 struct rpc_pipe_msg msg;
748 struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg); 762 struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u);
763 struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info,
764 nfsd_net_id);
749 765
750 memset(&msg, 0, sizeof(msg)); 766 memset(&msg, 0, sizeof(msg));
751 msg.data = cmsg; 767 msg.data = cmsg;
752 msg.len = sizeof(*cmsg); 768 msg.len = nn->client_tracking_ops->msglen;
753 769
754 ret = rpc_queue_upcall(pipe, &msg); 770 ret = rpc_queue_upcall(pipe, &msg);
755 if (ret < 0) { 771 if (ret < 0) {
@@ -765,7 +781,7 @@ out:
765} 781}
766 782
767static int 783static int
768cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) 784cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
769{ 785{
770 int ret; 786 int ret;
771 787
@@ -781,11 +797,11 @@ cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
781} 797}
782 798
783static ssize_t 799static ssize_t
784__cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg, 800__cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
785 struct nfsd_net *nn) 801 struct nfsd_net *nn)
786{ 802{
787 uint8_t cmd; 803 uint8_t cmd, princhashlen;
788 struct xdr_netobj name; 804 struct xdr_netobj name, princhash = { .len = 0, .data = NULL };
789 uint16_t namelen; 805 uint16_t namelen;
790 struct cld_net *cn = nn->cld_net; 806 struct cld_net *cn = nn->cld_net;
791 807
@@ -794,22 +810,48 @@ __cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
794 return -EFAULT; 810 return -EFAULT;
795 } 811 }
796 if (cmd == Cld_GraceStart) { 812 if (cmd == Cld_GraceStart) {
797 if (get_user(namelen, &cmsg->cm_u.cm_name.cn_len)) 813 if (nn->client_tracking_ops->version >= 2) {
798 return -EFAULT; 814 const struct cld_clntinfo __user *ci;
799 name.data = memdup_user(&cmsg->cm_u.cm_name.cn_id, namelen); 815
800 if (IS_ERR_OR_NULL(name.data)) 816 ci = &cmsg->cm_u.cm_clntinfo;
801 return -EFAULT; 817 if (get_user(namelen, &ci->cc_name.cn_len))
802 name.len = namelen; 818 return -EFAULT;
819 name.data = memdup_user(&ci->cc_name.cn_id, namelen);
820 if (IS_ERR_OR_NULL(name.data))
821 return -EFAULT;
822 name.len = namelen;
823 get_user(princhashlen, &ci->cc_princhash.cp_len);
824 if (princhashlen > 0) {
825 princhash.data = memdup_user(
826 &ci->cc_princhash.cp_data,
827 princhashlen);
828 if (IS_ERR_OR_NULL(princhash.data))
829 return -EFAULT;
830 princhash.len = princhashlen;
831 } else
832 princhash.len = 0;
833 } else {
834 const struct cld_name __user *cnm;
835
836 cnm = &cmsg->cm_u.cm_name;
837 if (get_user(namelen, &cnm->cn_len))
838 return -EFAULT;
839 name.data = memdup_user(&cnm->cn_id, namelen);
840 if (IS_ERR_OR_NULL(name.data))
841 return -EFAULT;
842 name.len = namelen;
843 }
803 if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) { 844 if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
804 name.len = name.len - 5; 845 name.len = name.len - 5;
805 memmove(name.data, name.data + 5, name.len); 846 memmove(name.data, name.data + 5, name.len);
806 cn->cn_has_legacy = true; 847 cn->cn_has_legacy = true;
807 } 848 }
808 if (!nfs4_client_to_reclaim(name, nn)) { 849 if (!nfs4_client_to_reclaim(name, princhash, nn)) {
809 kfree(name.data); 850 kfree(name.data);
851 kfree(princhash.data);
810 return -EFAULT; 852 return -EFAULT;
811 } 853 }
812 return sizeof(*cmsg); 854 return nn->client_tracking_ops->msglen;
813 } 855 }
814 return -EFAULT; 856 return -EFAULT;
815} 857}
@@ -818,21 +860,22 @@ static ssize_t
818cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) 860cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
819{ 861{
820 struct cld_upcall *tmp, *cup; 862 struct cld_upcall *tmp, *cup;
821 struct cld_msg __user *cmsg = (struct cld_msg __user *)src; 863 struct cld_msg_hdr __user *hdr = (struct cld_msg_hdr __user *)src;
864 struct cld_msg_v2 __user *cmsg = (struct cld_msg_v2 __user *)src;
822 uint32_t xid; 865 uint32_t xid;
823 struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info, 866 struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
824 nfsd_net_id); 867 nfsd_net_id);
825 struct cld_net *cn = nn->cld_net; 868 struct cld_net *cn = nn->cld_net;
826 int16_t status; 869 int16_t status;
827 870
828 if (mlen != sizeof(*cmsg)) { 871 if (mlen != nn->client_tracking_ops->msglen) {
829 dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen, 872 dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
830 sizeof(*cmsg)); 873 nn->client_tracking_ops->msglen);
831 return -EINVAL; 874 return -EINVAL;
832 } 875 }
833 876
834 /* copy just the xid so we can try to find that */ 877 /* copy just the xid so we can try to find that */
835 if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) { 878 if (copy_from_user(&xid, &hdr->cm_xid, sizeof(xid)) != 0) {
836 dprintk("%s: error when copying xid from userspace", __func__); 879 dprintk("%s: error when copying xid from userspace", __func__);
837 return -EFAULT; 880 return -EFAULT;
838 } 881 }
@@ -842,7 +885,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
842 * list (for -EINPROGRESS, we just want to make sure the xid is 885 * list (for -EINPROGRESS, we just want to make sure the xid is
843 * valid, not remove the upcall from the list) 886 * valid, not remove the upcall from the list)
844 */ 887 */
845 if (get_user(status, &cmsg->cm_status)) { 888 if (get_user(status, &hdr->cm_status)) {
846 dprintk("%s: error when copying status from userspace", __func__); 889 dprintk("%s: error when copying status from userspace", __func__);
847 return -EFAULT; 890 return -EFAULT;
848 } 891 }
@@ -851,7 +894,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
851 cup = NULL; 894 cup = NULL;
852 spin_lock(&cn->cn_lock); 895 spin_lock(&cn->cn_lock);
853 list_for_each_entry(tmp, &cn->cn_list, cu_list) { 896 list_for_each_entry(tmp, &cn->cn_list, cu_list) {
854 if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) { 897 if (get_unaligned(&tmp->cu_u.cu_hdr.cm_xid) == xid) {
855 cup = tmp; 898 cup = tmp;
856 if (status != -EINPROGRESS) 899 if (status != -EINPROGRESS)
857 list_del_init(&cup->cu_list); 900 list_del_init(&cup->cu_list);
@@ -869,7 +912,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
869 if (status == -EINPROGRESS) 912 if (status == -EINPROGRESS)
870 return __cld_pipe_inprogress_downcall(cmsg, nn); 913 return __cld_pipe_inprogress_downcall(cmsg, nn);
871 914
872 if (copy_from_user(&cup->cu_msg, src, mlen) != 0) 915 if (copy_from_user(&cup->cu_u.cu_msg_v2, src, mlen) != 0)
873 return -EFAULT; 916 return -EFAULT;
874 917
875 complete(&cup->cu_done); 918 complete(&cup->cu_done);
@@ -881,7 +924,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
881{ 924{
882 struct cld_msg *cmsg = msg->data; 925 struct cld_msg *cmsg = msg->data;
883 struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, 926 struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
884 cu_msg); 927 cu_u.cu_msg);
885 928
886 /* errno >= 0 means we got a downcall */ 929 /* errno >= 0 means we got a downcall */
887 if (msg->errno >= 0) 930 if (msg->errno >= 0)
@@ -1007,14 +1050,17 @@ nfsd4_remove_cld_pipe(struct net *net)
1007 1050
1008 nfsd4_cld_unregister_net(net, cn->cn_pipe); 1051 nfsd4_cld_unregister_net(net, cn->cn_pipe);
1009 rpc_destroy_pipe_data(cn->cn_pipe); 1052 rpc_destroy_pipe_data(cn->cn_pipe);
1053 if (cn->cn_tfm)
1054 crypto_free_shash(cn->cn_tfm);
1010 kfree(nn->cld_net); 1055 kfree(nn->cld_net);
1011 nn->cld_net = NULL; 1056 nn->cld_net = NULL;
1012} 1057}
1013 1058
1014static struct cld_upcall * 1059static struct cld_upcall *
1015alloc_cld_upcall(struct cld_net *cn) 1060alloc_cld_upcall(struct nfsd_net *nn)
1016{ 1061{
1017 struct cld_upcall *new, *tmp; 1062 struct cld_upcall *new, *tmp;
1063 struct cld_net *cn = nn->cld_net;
1018 1064
1019 new = kzalloc(sizeof(*new), GFP_KERNEL); 1065 new = kzalloc(sizeof(*new), GFP_KERNEL);
1020 if (!new) 1066 if (!new)
@@ -1024,20 +1070,20 @@ alloc_cld_upcall(struct cld_net *cn)
1024restart_search: 1070restart_search:
1025 spin_lock(&cn->cn_lock); 1071 spin_lock(&cn->cn_lock);
1026 list_for_each_entry(tmp, &cn->cn_list, cu_list) { 1072 list_for_each_entry(tmp, &cn->cn_list, cu_list) {
1027 if (tmp->cu_msg.cm_xid == cn->cn_xid) { 1073 if (tmp->cu_u.cu_msg.cm_xid == cn->cn_xid) {
1028 cn->cn_xid++; 1074 cn->cn_xid++;
1029 spin_unlock(&cn->cn_lock); 1075 spin_unlock(&cn->cn_lock);
1030 goto restart_search; 1076 goto restart_search;
1031 } 1077 }
1032 } 1078 }
1033 init_completion(&new->cu_done); 1079 init_completion(&new->cu_done);
1034 new->cu_msg.cm_vers = CLD_UPCALL_VERSION; 1080 new->cu_u.cu_msg.cm_vers = nn->client_tracking_ops->version;
1035 put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid); 1081 put_unaligned(cn->cn_xid++, &new->cu_u.cu_msg.cm_xid);
1036 new->cu_net = cn; 1082 new->cu_net = cn;
1037 list_add(&new->cu_list, &cn->cn_list); 1083 list_add(&new->cu_list, &cn->cn_list);
1038 spin_unlock(&cn->cn_lock); 1084 spin_unlock(&cn->cn_lock);
1039 1085
1040 dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid); 1086 dprintk("%s: allocated xid %u\n", __func__, new->cu_u.cu_msg.cm_xid);
1041 1087
1042 return new; 1088 return new;
1043} 1089}
@@ -1066,20 +1112,20 @@ nfsd4_cld_create(struct nfs4_client *clp)
1066 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) 1112 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1067 return; 1113 return;
1068 1114
1069 cup = alloc_cld_upcall(cn); 1115 cup = alloc_cld_upcall(nn);
1070 if (!cup) { 1116 if (!cup) {
1071 ret = -ENOMEM; 1117 ret = -ENOMEM;
1072 goto out_err; 1118 goto out_err;
1073 } 1119 }
1074 1120
1075 cup->cu_msg.cm_cmd = Cld_Create; 1121 cup->cu_u.cu_msg.cm_cmd = Cld_Create;
1076 cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; 1122 cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
1077 memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, 1123 memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
1078 clp->cl_name.len); 1124 clp->cl_name.len);
1079 1125
1080 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); 1126 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
1081 if (!ret) { 1127 if (!ret) {
1082 ret = cup->cu_msg.cm_status; 1128 ret = cup->cu_u.cu_msg.cm_status;
1083 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); 1129 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1084 } 1130 }
1085 1131
@@ -1092,6 +1138,75 @@ out_err:
1092 1138
1093/* Ask daemon to create a new record */ 1139/* Ask daemon to create a new record */
1094static void 1140static void
1141nfsd4_cld_create_v2(struct nfs4_client *clp)
1142{
1143 int ret;
1144 struct cld_upcall *cup;
1145 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1146 struct cld_net *cn = nn->cld_net;
1147 struct cld_msg_v2 *cmsg;
1148 struct crypto_shash *tfm = cn->cn_tfm;
1149 struct xdr_netobj cksum;
1150 char *principal = NULL;
1151 SHASH_DESC_ON_STACK(desc, tfm);
1152
1153 /* Don't upcall if it's already stored */
1154 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1155 return;
1156
1157 cup = alloc_cld_upcall(nn);
1158 if (!cup) {
1159 ret = -ENOMEM;
1160 goto out_err;
1161 }
1162
1163 cmsg = &cup->cu_u.cu_msg_v2;
1164 cmsg->cm_cmd = Cld_Create;
1165 cmsg->cm_u.cm_clntinfo.cc_name.cn_len = clp->cl_name.len;
1166 memcpy(cmsg->cm_u.cm_clntinfo.cc_name.cn_id, clp->cl_name.data,
1167 clp->cl_name.len);
1168 if (clp->cl_cred.cr_raw_principal)
1169 principal = clp->cl_cred.cr_raw_principal;
1170 else if (clp->cl_cred.cr_principal)
1171 principal = clp->cl_cred.cr_principal;
1172 if (principal) {
1173 desc->tfm = tfm;
1174 cksum.len = crypto_shash_digestsize(tfm);
1175 cksum.data = kmalloc(cksum.len, GFP_KERNEL);
1176 if (cksum.data == NULL) {
1177 ret = -ENOMEM;
1178 goto out;
1179 }
1180 ret = crypto_shash_digest(desc, principal, strlen(principal),
1181 cksum.data);
1182 shash_desc_zero(desc);
1183 if (ret) {
1184 kfree(cksum.data);
1185 goto out;
1186 }
1187 cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len;
1188 memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
1189 cksum.data, cksum.len);
1190 kfree(cksum.data);
1191 } else
1192 cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0;
1193
1194 ret = cld_pipe_upcall(cn->cn_pipe, cmsg);
1195 if (!ret) {
1196 ret = cmsg->cm_status;
1197 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1198 }
1199
1200out:
1201 free_cld_upcall(cup);
1202out_err:
1203 if (ret)
1204 pr_err("NFSD: Unable to create client record on stable storage: %d\n",
1205 ret);
1206}
1207
1208/* Ask daemon to create a new record */
1209static void
1095nfsd4_cld_remove(struct nfs4_client *clp) 1210nfsd4_cld_remove(struct nfs4_client *clp)
1096{ 1211{
1097 int ret; 1212 int ret;
@@ -1103,20 +1218,20 @@ nfsd4_cld_remove(struct nfs4_client *clp)
1103 if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) 1218 if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1104 return; 1219 return;
1105 1220
1106 cup = alloc_cld_upcall(cn); 1221 cup = alloc_cld_upcall(nn);
1107 if (!cup) { 1222 if (!cup) {
1108 ret = -ENOMEM; 1223 ret = -ENOMEM;
1109 goto out_err; 1224 goto out_err;
1110 } 1225 }
1111 1226
1112 cup->cu_msg.cm_cmd = Cld_Remove; 1227 cup->cu_u.cu_msg.cm_cmd = Cld_Remove;
1113 cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; 1228 cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
1114 memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, 1229 memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
1115 clp->cl_name.len); 1230 clp->cl_name.len);
1116 1231
1117 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); 1232 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
1118 if (!ret) { 1233 if (!ret) {
1119 ret = cup->cu_msg.cm_status; 1234 ret = cup->cu_u.cu_msg.cm_status;
1120 clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); 1235 clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1121 } 1236 }
1122 1237
@@ -1145,21 +1260,21 @@ nfsd4_cld_check_v0(struct nfs4_client *clp)
1145 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) 1260 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1146 return 0; 1261 return 0;
1147 1262
1148 cup = alloc_cld_upcall(cn); 1263 cup = alloc_cld_upcall(nn);
1149 if (!cup) { 1264 if (!cup) {
1150 printk(KERN_ERR "NFSD: Unable to check client record on " 1265 printk(KERN_ERR "NFSD: Unable to check client record on "
1151 "stable storage: %d\n", -ENOMEM); 1266 "stable storage: %d\n", -ENOMEM);
1152 return -ENOMEM; 1267 return -ENOMEM;
1153 } 1268 }
1154 1269
1155 cup->cu_msg.cm_cmd = Cld_Check; 1270 cup->cu_u.cu_msg.cm_cmd = Cld_Check;
1156 cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; 1271 cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
1157 memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, 1272 memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
1158 clp->cl_name.len); 1273 clp->cl_name.len);
1159 1274
1160 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); 1275 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
1161 if (!ret) { 1276 if (!ret) {
1162 ret = cup->cu_msg.cm_status; 1277 ret = cup->cu_u.cu_msg.cm_status;
1163 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); 1278 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1164 } 1279 }
1165 1280
@@ -1217,22 +1332,95 @@ found:
1217} 1332}
1218 1333
1219static int 1334static int
1335nfsd4_cld_check_v2(struct nfs4_client *clp)
1336{
1337 struct nfs4_client_reclaim *crp;
1338 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1339 struct cld_net *cn = nn->cld_net;
1340 int status;
1341 char dname[HEXDIR_LEN];
1342 struct xdr_netobj name;
1343 struct crypto_shash *tfm = cn->cn_tfm;
1344 struct xdr_netobj cksum;
1345 char *principal = NULL;
1346 SHASH_DESC_ON_STACK(desc, tfm);
1347
1348 /* did we already find that this client is stable? */
1349 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1350 return 0;
1351
1352 /* look for it in the reclaim hashtable otherwise */
1353 crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
1354 if (crp)
1355 goto found;
1356
1357 if (cn->cn_has_legacy) {
1358 status = nfs4_make_rec_clidname(dname, &clp->cl_name);
1359 if (status)
1360 return -ENOENT;
1361
1362 name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
1363 if (!name.data) {
1364 dprintk("%s: failed to allocate memory for name.data\n",
1365 __func__);
1366 return -ENOENT;
1367 }
1368 name.len = HEXDIR_LEN;
1369 crp = nfsd4_find_reclaim_client(name, nn);
1370 kfree(name.data);
1371 if (crp)
1372 goto found;
1373
1374 }
1375 return -ENOENT;
1376found:
1377 if (crp->cr_princhash.len) {
1378 if (clp->cl_cred.cr_raw_principal)
1379 principal = clp->cl_cred.cr_raw_principal;
1380 else if (clp->cl_cred.cr_principal)
1381 principal = clp->cl_cred.cr_principal;
1382 if (principal == NULL)
1383 return -ENOENT;
1384 desc->tfm = tfm;
1385 cksum.len = crypto_shash_digestsize(tfm);
1386 cksum.data = kmalloc(cksum.len, GFP_KERNEL);
1387 if (cksum.data == NULL)
1388 return -ENOENT;
1389 status = crypto_shash_digest(desc, principal, strlen(principal),
1390 cksum.data);
1391 shash_desc_zero(desc);
1392 if (status) {
1393 kfree(cksum.data);
1394 return -ENOENT;
1395 }
1396 if (memcmp(crp->cr_princhash.data, cksum.data,
1397 crp->cr_princhash.len)) {
1398 kfree(cksum.data);
1399 return -ENOENT;
1400 }
1401 kfree(cksum.data);
1402 }
1403 crp->cr_clp = clp;
1404 return 0;
1405}
1406
1407static int
1220nfsd4_cld_grace_start(struct nfsd_net *nn) 1408nfsd4_cld_grace_start(struct nfsd_net *nn)
1221{ 1409{
1222 int ret; 1410 int ret;
1223 struct cld_upcall *cup; 1411 struct cld_upcall *cup;
1224 struct cld_net *cn = nn->cld_net; 1412 struct cld_net *cn = nn->cld_net;
1225 1413
1226 cup = alloc_cld_upcall(cn); 1414 cup = alloc_cld_upcall(nn);
1227 if (!cup) { 1415 if (!cup) {
1228 ret = -ENOMEM; 1416 ret = -ENOMEM;
1229 goto out_err; 1417 goto out_err;
1230 } 1418 }
1231 1419
1232 cup->cu_msg.cm_cmd = Cld_GraceStart; 1420 cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart;
1233 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); 1421 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
1234 if (!ret) 1422 if (!ret)
1235 ret = cup->cu_msg.cm_status; 1423 ret = cup->cu_u.cu_msg.cm_status;
1236 1424
1237 free_cld_upcall(cup); 1425 free_cld_upcall(cup);
1238out_err: 1426out_err:
@@ -1250,17 +1438,17 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
1250 struct cld_upcall *cup; 1438 struct cld_upcall *cup;
1251 struct cld_net *cn = nn->cld_net; 1439 struct cld_net *cn = nn->cld_net;
1252 1440
1253 cup = alloc_cld_upcall(cn); 1441 cup = alloc_cld_upcall(nn);
1254 if (!cup) { 1442 if (!cup) {
1255 ret = -ENOMEM; 1443 ret = -ENOMEM;
1256 goto out_err; 1444 goto out_err;
1257 } 1445 }
1258 1446
1259 cup->cu_msg.cm_cmd = Cld_GraceDone; 1447 cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
1260 cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time; 1448 cup->cu_u.cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
1261 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); 1449 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
1262 if (!ret) 1450 if (!ret)
1263 ret = cup->cu_msg.cm_status; 1451 ret = cup->cu_u.cu_msg.cm_status;
1264 1452
1265 free_cld_upcall(cup); 1453 free_cld_upcall(cup);
1266out_err: 1454out_err:
@@ -1279,16 +1467,16 @@ nfsd4_cld_grace_done(struct nfsd_net *nn)
1279 struct cld_upcall *cup; 1467 struct cld_upcall *cup;
1280 struct cld_net *cn = nn->cld_net; 1468 struct cld_net *cn = nn->cld_net;
1281 1469
1282 cup = alloc_cld_upcall(cn); 1470 cup = alloc_cld_upcall(nn);
1283 if (!cup) { 1471 if (!cup) {
1284 ret = -ENOMEM; 1472 ret = -ENOMEM;
1285 goto out_err; 1473 goto out_err;
1286 } 1474 }
1287 1475
1288 cup->cu_msg.cm_cmd = Cld_GraceDone; 1476 cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
1289 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); 1477 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
1290 if (!ret) 1478 if (!ret)
1291 ret = cup->cu_msg.cm_status; 1479 ret = cup->cu_u.cu_msg.cm_status;
1292 1480
1293 free_cld_upcall(cup); 1481 free_cld_upcall(cup);
1294out_err: 1482out_err:
@@ -1337,6 +1525,53 @@ cld_running(struct nfsd_net *nn)
1337} 1525}
1338 1526
1339static int 1527static int
1528nfsd4_cld_get_version(struct nfsd_net *nn)
1529{
1530 int ret = 0;
1531 struct cld_upcall *cup;
1532 struct cld_net *cn = nn->cld_net;
1533 uint8_t version;
1534
1535 cup = alloc_cld_upcall(nn);
1536 if (!cup) {
1537 ret = -ENOMEM;
1538 goto out_err;
1539 }
1540 cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion;
1541 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
1542 if (!ret) {
1543 ret = cup->cu_u.cu_msg.cm_status;
1544 if (ret)
1545 goto out_free;
1546 version = cup->cu_u.cu_msg.cm_u.cm_version;
1547 dprintk("%s: userspace returned version %u\n",
1548 __func__, version);
1549 if (version < 1)
1550 version = 1;
1551 else if (version > CLD_UPCALL_VERSION)
1552 version = CLD_UPCALL_VERSION;
1553
1554 switch (version) {
1555 case 1:
1556 nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
1557 break;
1558 case 2:
1559 nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v2;
1560 break;
1561 default:
1562 break;
1563 }
1564 }
1565out_free:
1566 free_cld_upcall(cup);
1567out_err:
1568 if (ret)
1569 dprintk("%s: Unable to get version from userspace: %d\n",
1570 __func__, ret);
1571 return ret;
1572}
1573
1574static int
1340nfsd4_cld_tracking_init(struct net *net) 1575nfsd4_cld_tracking_init(struct net *net)
1341{ 1576{
1342 int status; 1577 int status;
@@ -1351,6 +1586,11 @@ nfsd4_cld_tracking_init(struct net *net)
1351 status = __nfsd4_init_cld_pipe(net); 1586 status = __nfsd4_init_cld_pipe(net);
1352 if (status) 1587 if (status)
1353 goto err_shutdown; 1588 goto err_shutdown;
1589 nn->cld_net->cn_tfm = crypto_alloc_shash("sha256", 0, 0);
1590 if (IS_ERR(nn->cld_net->cn_tfm)) {
1591 status = PTR_ERR(nn->cld_net->cn_tfm);
1592 goto err_remove;
1593 }
1354 1594
1355 /* 1595 /*
1356 * rpc pipe upcalls take 30 seconds to time out, so we don't want to 1596 * rpc pipe upcalls take 30 seconds to time out, so we don't want to
@@ -1368,10 +1608,14 @@ nfsd4_cld_tracking_init(struct net *net)
1368 goto err_remove; 1608 goto err_remove;
1369 } 1609 }
1370 1610
1611 status = nfsd4_cld_get_version(nn);
1612 if (status == -EOPNOTSUPP)
1613 pr_warn("NFSD: nfsdcld GetVersion upcall failed. Please upgrade nfsdcld.\n");
1614
1371 status = nfsd4_cld_grace_start(nn); 1615 status = nfsd4_cld_grace_start(nn);
1372 if (status) { 1616 if (status) {
1373 if (status == -EOPNOTSUPP) 1617 if (status == -EOPNOTSUPP)
1374 printk(KERN_WARNING "NFSD: Please upgrade nfsdcld.\n"); 1618 pr_warn("NFSD: nfsdcld GraceStart upcall failed. Please upgrade nfsdcld.\n");
1375 nfs4_release_reclaim(nn); 1619 nfs4_release_reclaim(nn);
1376 goto err_remove; 1620 goto err_remove;
1377 } else 1621 } else
@@ -1403,6 +1647,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = {
1403 .remove = nfsd4_cld_remove, 1647 .remove = nfsd4_cld_remove,
1404 .check = nfsd4_cld_check_v0, 1648 .check = nfsd4_cld_check_v0,
1405 .grace_done = nfsd4_cld_grace_done_v0, 1649 .grace_done = nfsd4_cld_grace_done_v0,
1650 .version = 1,
1651 .msglen = sizeof(struct cld_msg),
1406}; 1652};
1407 1653
1408/* For newer nfsdcld's */ 1654/* For newer nfsdcld's */
@@ -1413,6 +1659,20 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
1413 .remove = nfsd4_cld_remove, 1659 .remove = nfsd4_cld_remove,
1414 .check = nfsd4_cld_check, 1660 .check = nfsd4_cld_check,
1415 .grace_done = nfsd4_cld_grace_done, 1661 .grace_done = nfsd4_cld_grace_done,
1662 .version = 1,
1663 .msglen = sizeof(struct cld_msg),
1664};
1665
1666/* v2 create/check ops include the principal, if available */
1667static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2 = {
1668 .init = nfsd4_cld_tracking_init,
1669 .exit = nfsd4_cld_tracking_exit,
1670 .create = nfsd4_cld_create_v2,
1671 .remove = nfsd4_cld_remove,
1672 .check = nfsd4_cld_check_v2,
1673 .grace_done = nfsd4_cld_grace_done,
1674 .version = 2,
1675 .msglen = sizeof(struct cld_msg_v2),
1416}; 1676};
1417 1677
1418/* upcall via usermodehelper */ 1678/* upcall via usermodehelper */
@@ -1760,6 +2020,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
1760 .remove = nfsd4_umh_cltrack_remove, 2020 .remove = nfsd4_umh_cltrack_remove,
1761 .check = nfsd4_umh_cltrack_check, 2021 .check = nfsd4_umh_cltrack_check,
1762 .grace_done = nfsd4_umh_cltrack_grace_done, 2022 .grace_done = nfsd4_umh_cltrack_grace_done,
2023 .version = 1,
2024 .msglen = 0,
1763}; 2025};
1764 2026
1765int 2027int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7857942c5ca6..c65aeaa812d4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -50,6 +50,7 @@
50 50
51#include "netns.h" 51#include "netns.h"
52#include "pnfs.h" 52#include "pnfs.h"
53#include "filecache.h"
53 54
54#define NFSDDBG_FACILITY NFSDDBG_PROC 55#define NFSDDBG_FACILITY NFSDDBG_PROC
55 56
@@ -429,18 +430,18 @@ put_nfs4_file(struct nfs4_file *fi)
429 } 430 }
430} 431}
431 432
432static struct file * 433static struct nfsd_file *
433__nfs4_get_fd(struct nfs4_file *f, int oflag) 434__nfs4_get_fd(struct nfs4_file *f, int oflag)
434{ 435{
435 if (f->fi_fds[oflag]) 436 if (f->fi_fds[oflag])
436 return get_file(f->fi_fds[oflag]); 437 return nfsd_file_get(f->fi_fds[oflag]);
437 return NULL; 438 return NULL;
438} 439}
439 440
440static struct file * 441static struct nfsd_file *
441find_writeable_file_locked(struct nfs4_file *f) 442find_writeable_file_locked(struct nfs4_file *f)
442{ 443{
443 struct file *ret; 444 struct nfsd_file *ret;
444 445
445 lockdep_assert_held(&f->fi_lock); 446 lockdep_assert_held(&f->fi_lock);
446 447
@@ -450,10 +451,10 @@ find_writeable_file_locked(struct nfs4_file *f)
450 return ret; 451 return ret;
451} 452}
452 453
453static struct file * 454static struct nfsd_file *
454find_writeable_file(struct nfs4_file *f) 455find_writeable_file(struct nfs4_file *f)
455{ 456{
456 struct file *ret; 457 struct nfsd_file *ret;
457 458
458 spin_lock(&f->fi_lock); 459 spin_lock(&f->fi_lock);
459 ret = find_writeable_file_locked(f); 460 ret = find_writeable_file_locked(f);
@@ -462,9 +463,10 @@ find_writeable_file(struct nfs4_file *f)
462 return ret; 463 return ret;
463} 464}
464 465
465static struct file *find_readable_file_locked(struct nfs4_file *f) 466static struct nfsd_file *
467find_readable_file_locked(struct nfs4_file *f)
466{ 468{
467 struct file *ret; 469 struct nfsd_file *ret;
468 470
469 lockdep_assert_held(&f->fi_lock); 471 lockdep_assert_held(&f->fi_lock);
470 472
@@ -474,10 +476,10 @@ static struct file *find_readable_file_locked(struct nfs4_file *f)
474 return ret; 476 return ret;
475} 477}
476 478
477static struct file * 479static struct nfsd_file *
478find_readable_file(struct nfs4_file *f) 480find_readable_file(struct nfs4_file *f)
479{ 481{
480 struct file *ret; 482 struct nfsd_file *ret;
481 483
482 spin_lock(&f->fi_lock); 484 spin_lock(&f->fi_lock);
483 ret = find_readable_file_locked(f); 485 ret = find_readable_file_locked(f);
@@ -486,10 +488,10 @@ find_readable_file(struct nfs4_file *f)
486 return ret; 488 return ret;
487} 489}
488 490
489struct file * 491struct nfsd_file *
490find_any_file(struct nfs4_file *f) 492find_any_file(struct nfs4_file *f)
491{ 493{
492 struct file *ret; 494 struct nfsd_file *ret;
493 495
494 spin_lock(&f->fi_lock); 496 spin_lock(&f->fi_lock);
495 ret = __nfs4_get_fd(f, O_RDWR); 497 ret = __nfs4_get_fd(f, O_RDWR);
@@ -590,17 +592,17 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
590 might_lock(&fp->fi_lock); 592 might_lock(&fp->fi_lock);
591 593
592 if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) { 594 if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
593 struct file *f1 = NULL; 595 struct nfsd_file *f1 = NULL;
594 struct file *f2 = NULL; 596 struct nfsd_file *f2 = NULL;
595 597
596 swap(f1, fp->fi_fds[oflag]); 598 swap(f1, fp->fi_fds[oflag]);
597 if (atomic_read(&fp->fi_access[1 - oflag]) == 0) 599 if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
598 swap(f2, fp->fi_fds[O_RDWR]); 600 swap(f2, fp->fi_fds[O_RDWR]);
599 spin_unlock(&fp->fi_lock); 601 spin_unlock(&fp->fi_lock);
600 if (f1) 602 if (f1)
601 fput(f1); 603 nfsd_file_put(f1);
602 if (f2) 604 if (f2)
603 fput(f2); 605 nfsd_file_put(f2);
604 } 606 }
605} 607}
606 608
@@ -933,25 +935,25 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
933 935
934static void put_deleg_file(struct nfs4_file *fp) 936static void put_deleg_file(struct nfs4_file *fp)
935{ 937{
936 struct file *filp = NULL; 938 struct nfsd_file *nf = NULL;
937 939
938 spin_lock(&fp->fi_lock); 940 spin_lock(&fp->fi_lock);
939 if (--fp->fi_delegees == 0) 941 if (--fp->fi_delegees == 0)
940 swap(filp, fp->fi_deleg_file); 942 swap(nf, fp->fi_deleg_file);
941 spin_unlock(&fp->fi_lock); 943 spin_unlock(&fp->fi_lock);
942 944
943 if (filp) 945 if (nf)
944 fput(filp); 946 nfsd_file_put(nf);
945} 947}
946 948
947static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp) 949static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
948{ 950{
949 struct nfs4_file *fp = dp->dl_stid.sc_file; 951 struct nfs4_file *fp = dp->dl_stid.sc_file;
950 struct file *filp = fp->fi_deleg_file; 952 struct nfsd_file *nf = fp->fi_deleg_file;
951 953
952 WARN_ON_ONCE(!fp->fi_delegees); 954 WARN_ON_ONCE(!fp->fi_delegees);
953 955
954 vfs_setlease(filp, F_UNLCK, NULL, (void **)&dp); 956 vfs_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp);
955 put_deleg_file(fp); 957 put_deleg_file(fp);
956} 958}
957 959
@@ -1289,11 +1291,14 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
1289{ 1291{
1290 struct nfs4_ol_stateid *stp = openlockstateid(stid); 1292 struct nfs4_ol_stateid *stp = openlockstateid(stid);
1291 struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); 1293 struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
1292 struct file *file; 1294 struct nfsd_file *nf;
1293 1295
1294 file = find_any_file(stp->st_stid.sc_file); 1296 nf = find_any_file(stp->st_stid.sc_file);
1295 if (file) 1297 if (nf) {
1296 filp_close(file, (fl_owner_t)lo); 1298 get_file(nf->nf_file);
1299 filp_close(nf->nf_file, (fl_owner_t)lo);
1300 nfsd_file_put(nf);
1301 }
1297 nfs4_free_ol_stateid(stid); 1302 nfs4_free_ol_stateid(stid);
1298} 1303}
1299 1304
@@ -1563,21 +1568,39 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
1563 * re-negotiate active sessions and reduce their slot usage to make 1568 * re-negotiate active sessions and reduce their slot usage to make
1564 * room for new connections. For now we just fail the create session. 1569 * room for new connections. For now we just fail the create session.
1565 */ 1570 */
1566static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) 1571static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
1567{ 1572{
1568 u32 slotsize = slot_bytes(ca); 1573 u32 slotsize = slot_bytes(ca);
1569 u32 num = ca->maxreqs; 1574 u32 num = ca->maxreqs;
1570 unsigned long avail, total_avail; 1575 unsigned long avail, total_avail;
1576 unsigned int scale_factor;
1571 1577
1572 spin_lock(&nfsd_drc_lock); 1578 spin_lock(&nfsd_drc_lock);
1573 total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used; 1579 if (nfsd_drc_max_mem > nfsd_drc_mem_used)
1580 total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
1581 else
1582 /* We have handed out more space than we chose in
1583 * set_max_drc() to allow. That isn't really a
1584 * problem as long as that doesn't make us think we
1585 * have lots more due to integer overflow.
1586 */
1587 total_avail = 0;
1574 avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail); 1588 avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
1575 /* 1589 /*
1576 * Never use more than a third of the remaining memory, 1590 * Never use more than a fraction of the remaining memory,
1577 * unless it's the only way to give this client a slot: 1591 * unless it's the only way to give this client a slot.
1592 * The chosen fraction is either 1/8 or 1/number of threads,
1593 * whichever is smaller. This ensures there are adequate
1594 * slots to support multiple clients per thread.
1595 * Give the client one slot even if that would require
1596 * over-allocation--it is better than failure.
1578 */ 1597 */
1579 avail = clamp_t(unsigned long, avail, slotsize, total_avail/3); 1598 scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
1599
1600 avail = clamp_t(unsigned long, avail, slotsize,
1601 total_avail/scale_factor);
1580 num = min_t(int, num, avail / slotsize); 1602 num = min_t(int, num, avail / slotsize);
1603 num = max_t(int, num, 1);
1581 nfsd_drc_mem_used += num * slotsize; 1604 nfsd_drc_mem_used += num * slotsize;
1582 spin_unlock(&nfsd_drc_lock); 1605 spin_unlock(&nfsd_drc_lock);
1583 1606
@@ -2323,9 +2346,9 @@ static void states_stop(struct seq_file *s, void *v)
2323 spin_unlock(&clp->cl_lock); 2346 spin_unlock(&clp->cl_lock);
2324} 2347}
2325 2348
2326static void nfs4_show_superblock(struct seq_file *s, struct file *f) 2349static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
2327{ 2350{
2328 struct inode *inode = file_inode(f); 2351 struct inode *inode = f->nf_inode;
2329 2352
2330 seq_printf(s, "superblock: \"%02x:%02x:%ld\"", 2353 seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
2331 MAJOR(inode->i_sb->s_dev), 2354 MAJOR(inode->i_sb->s_dev),
@@ -2343,7 +2366,7 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
2343{ 2366{
2344 struct nfs4_ol_stateid *ols; 2367 struct nfs4_ol_stateid *ols;
2345 struct nfs4_file *nf; 2368 struct nfs4_file *nf;
2346 struct file *file; 2369 struct nfsd_file *file;
2347 struct nfs4_stateowner *oo; 2370 struct nfs4_stateowner *oo;
2348 unsigned int access, deny; 2371 unsigned int access, deny;
2349 2372
@@ -2370,7 +2393,7 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
2370 seq_printf(s, ", "); 2393 seq_printf(s, ", ");
2371 nfs4_show_owner(s, oo); 2394 nfs4_show_owner(s, oo);
2372 seq_printf(s, " }\n"); 2395 seq_printf(s, " }\n");
2373 fput(file); 2396 nfsd_file_put(file);
2374 2397
2375 return 0; 2398 return 0;
2376} 2399}
@@ -2379,7 +2402,7 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
2379{ 2402{
2380 struct nfs4_ol_stateid *ols; 2403 struct nfs4_ol_stateid *ols;
2381 struct nfs4_file *nf; 2404 struct nfs4_file *nf;
2382 struct file *file; 2405 struct nfsd_file *file;
2383 struct nfs4_stateowner *oo; 2406 struct nfs4_stateowner *oo;
2384 2407
2385 ols = openlockstateid(st); 2408 ols = openlockstateid(st);
@@ -2401,7 +2424,7 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
2401 seq_printf(s, ", "); 2424 seq_printf(s, ", ");
2402 nfs4_show_owner(s, oo); 2425 nfs4_show_owner(s, oo);
2403 seq_printf(s, " }\n"); 2426 seq_printf(s, " }\n");
2404 fput(file); 2427 nfsd_file_put(file);
2405 2428
2406 return 0; 2429 return 0;
2407} 2430}
@@ -2410,7 +2433,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
2410{ 2433{
2411 struct nfs4_delegation *ds; 2434 struct nfs4_delegation *ds;
2412 struct nfs4_file *nf; 2435 struct nfs4_file *nf;
2413 struct file *file; 2436 struct nfsd_file *file;
2414 2437
2415 ds = delegstateid(st); 2438 ds = delegstateid(st);
2416 nf = st->sc_file; 2439 nf = st->sc_file;
@@ -2433,7 +2456,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
2433static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st) 2456static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
2434{ 2457{
2435 struct nfs4_layout_stateid *ls; 2458 struct nfs4_layout_stateid *ls;
2436 struct file *file; 2459 struct nfsd_file *file;
2437 2460
2438 ls = container_of(st, struct nfs4_layout_stateid, ls_stid); 2461 ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
2439 file = ls->ls_file; 2462 file = ls->ls_file;
@@ -3169,10 +3192,10 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
3169 * performance. When short on memory we therefore prefer to 3192 * performance. When short on memory we therefore prefer to
3170 * decrease number of slots instead of their size. Clients that 3193 * decrease number of slots instead of their size. Clients that
3171 * request larger slots than they need will get poor results: 3194 * request larger slots than they need will get poor results:
3195 * Note that we always allow at least one slot, because our
3196 * accounting is soft and provides no guarantees either way.
3172 */ 3197 */
3173 ca->maxreqs = nfsd4_get_drc_mem(ca); 3198 ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
3174 if (!ca->maxreqs)
3175 return nfserr_jukebox;
3176 3199
3177 return nfs_ok; 3200 return nfs_ok;
3178} 3201}
@@ -4651,7 +4674,7 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
4651 struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, 4674 struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
4652 struct nfsd4_open *open) 4675 struct nfsd4_open *open)
4653{ 4676{
4654 struct file *filp = NULL; 4677 struct nfsd_file *nf = NULL;
4655 __be32 status; 4678 __be32 status;
4656 int oflag = nfs4_access_to_omode(open->op_share_access); 4679 int oflag = nfs4_access_to_omode(open->op_share_access);
4657 int access = nfs4_access_to_access(open->op_share_access); 4680 int access = nfs4_access_to_access(open->op_share_access);
@@ -4687,18 +4710,18 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
4687 4710
4688 if (!fp->fi_fds[oflag]) { 4711 if (!fp->fi_fds[oflag]) {
4689 spin_unlock(&fp->fi_lock); 4712 spin_unlock(&fp->fi_lock);
4690 status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp); 4713 status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
4691 if (status) 4714 if (status)
4692 goto out_put_access; 4715 goto out_put_access;
4693 spin_lock(&fp->fi_lock); 4716 spin_lock(&fp->fi_lock);
4694 if (!fp->fi_fds[oflag]) { 4717 if (!fp->fi_fds[oflag]) {
4695 fp->fi_fds[oflag] = filp; 4718 fp->fi_fds[oflag] = nf;
4696 filp = NULL; 4719 nf = NULL;
4697 } 4720 }
4698 } 4721 }
4699 spin_unlock(&fp->fi_lock); 4722 spin_unlock(&fp->fi_lock);
4700 if (filp) 4723 if (nf)
4701 fput(filp); 4724 nfsd_file_put(nf);
4702 4725
4703 status = nfsd4_truncate(rqstp, cur_fh, open); 4726 status = nfsd4_truncate(rqstp, cur_fh, open);
4704 if (status) 4727 if (status)
@@ -4767,7 +4790,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
4767 fl->fl_end = OFFSET_MAX; 4790 fl->fl_end = OFFSET_MAX;
4768 fl->fl_owner = (fl_owner_t)dp; 4791 fl->fl_owner = (fl_owner_t)dp;
4769 fl->fl_pid = current->tgid; 4792 fl->fl_pid = current->tgid;
4770 fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file; 4793 fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
4771 return fl; 4794 return fl;
4772} 4795}
4773 4796
@@ -4777,7 +4800,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
4777{ 4800{
4778 int status = 0; 4801 int status = 0;
4779 struct nfs4_delegation *dp; 4802 struct nfs4_delegation *dp;
4780 struct file *filp; 4803 struct nfsd_file *nf;
4781 struct file_lock *fl; 4804 struct file_lock *fl;
4782 4805
4783 /* 4806 /*
@@ -4788,8 +4811,8 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
4788 if (fp->fi_had_conflict) 4811 if (fp->fi_had_conflict)
4789 return ERR_PTR(-EAGAIN); 4812 return ERR_PTR(-EAGAIN);
4790 4813
4791 filp = find_readable_file(fp); 4814 nf = find_readable_file(fp);
4792 if (!filp) { 4815 if (!nf) {
4793 /* We should always have a readable file here */ 4816 /* We should always have a readable file here */
4794 WARN_ON_ONCE(1); 4817 WARN_ON_ONCE(1);
4795 return ERR_PTR(-EBADF); 4818 return ERR_PTR(-EBADF);
@@ -4799,17 +4822,17 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
4799 if (nfs4_delegation_exists(clp, fp)) 4822 if (nfs4_delegation_exists(clp, fp))
4800 status = -EAGAIN; 4823 status = -EAGAIN;
4801 else if (!fp->fi_deleg_file) { 4824 else if (!fp->fi_deleg_file) {
4802 fp->fi_deleg_file = filp; 4825 fp->fi_deleg_file = nf;
4803 /* increment early to prevent fi_deleg_file from being 4826 /* increment early to prevent fi_deleg_file from being
4804 * cleared */ 4827 * cleared */
4805 fp->fi_delegees = 1; 4828 fp->fi_delegees = 1;
4806 filp = NULL; 4829 nf = NULL;
4807 } else 4830 } else
4808 fp->fi_delegees++; 4831 fp->fi_delegees++;
4809 spin_unlock(&fp->fi_lock); 4832 spin_unlock(&fp->fi_lock);
4810 spin_unlock(&state_lock); 4833 spin_unlock(&state_lock);
4811 if (filp) 4834 if (nf)
4812 fput(filp); 4835 nfsd_file_put(nf);
4813 if (status) 4836 if (status)
4814 return ERR_PTR(status); 4837 return ERR_PTR(status);
4815 4838
@@ -4822,7 +4845,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
4822 if (!fl) 4845 if (!fl)
4823 goto out_clnt_odstate; 4846 goto out_clnt_odstate;
4824 4847
4825 status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL); 4848 status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL);
4826 if (fl) 4849 if (fl)
4827 locks_free_lock(fl); 4850 locks_free_lock(fl);
4828 if (status) 4851 if (status)
@@ -4842,7 +4865,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
4842 4865
4843 return dp; 4866 return dp;
4844out_unlock: 4867out_unlock:
4845 vfs_setlease(fp->fi_deleg_file, F_UNLCK, NULL, (void **)&dp); 4868 vfs_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
4846out_clnt_odstate: 4869out_clnt_odstate:
4847 put_clnt_odstate(dp->dl_clnt_odstate); 4870 put_clnt_odstate(dp->dl_clnt_odstate);
4848 nfs4_put_stid(&dp->dl_stid); 4871 nfs4_put_stid(&dp->dl_stid);
@@ -5513,7 +5536,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
5513 return nfs_ok; 5536 return nfs_ok;
5514} 5537}
5515 5538
5516static struct file * 5539static struct nfsd_file *
5517nfs4_find_file(struct nfs4_stid *s, int flags) 5540nfs4_find_file(struct nfs4_stid *s, int flags)
5518{ 5541{
5519 if (!s) 5542 if (!s)
@@ -5523,7 +5546,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
5523 case NFS4_DELEG_STID: 5546 case NFS4_DELEG_STID:
5524 if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file)) 5547 if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
5525 return NULL; 5548 return NULL;
5526 return get_file(s->sc_file->fi_deleg_file); 5549 return nfsd_file_get(s->sc_file->fi_deleg_file);
5527 case NFS4_OPEN_STID: 5550 case NFS4_OPEN_STID:
5528 case NFS4_LOCK_STID: 5551 case NFS4_LOCK_STID:
5529 if (flags & RD_STATE) 5552 if (flags & RD_STATE)
@@ -5549,32 +5572,28 @@ nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags)
5549 5572
5550static __be32 5573static __be32
5551nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s, 5574nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
5552 struct file **filpp, bool *tmp_file, int flags) 5575 struct nfsd_file **nfp, int flags)
5553{ 5576{
5554 int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE; 5577 int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
5555 struct file *file; 5578 struct nfsd_file *nf;
5556 __be32 status; 5579 __be32 status;
5557 5580
5558 file = nfs4_find_file(s, flags); 5581 nf = nfs4_find_file(s, flags);
5559 if (file) { 5582 if (nf) {
5560 status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, 5583 status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
5561 acc | NFSD_MAY_OWNER_OVERRIDE); 5584 acc | NFSD_MAY_OWNER_OVERRIDE);
5562 if (status) { 5585 if (status) {
5563 fput(file); 5586 nfsd_file_put(nf);
5564 return status; 5587 goto out;
5565 } 5588 }
5566
5567 *filpp = file;
5568 } else { 5589 } else {
5569 status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp); 5590 status = nfsd_file_acquire(rqstp, fhp, acc, &nf);
5570 if (status) 5591 if (status)
5571 return status; 5592 return status;
5572
5573 if (tmp_file)
5574 *tmp_file = true;
5575 } 5593 }
5576 5594 *nfp = nf;
5577 return 0; 5595out:
5596 return status;
5578} 5597}
5579 5598
5580/* 5599/*
@@ -5583,7 +5602,7 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
5583__be32 5602__be32
5584nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, 5603nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
5585 struct nfsd4_compound_state *cstate, struct svc_fh *fhp, 5604 struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
5586 stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file) 5605 stateid_t *stateid, int flags, struct nfsd_file **nfp)
5587{ 5606{
5588 struct inode *ino = d_inode(fhp->fh_dentry); 5607 struct inode *ino = d_inode(fhp->fh_dentry);
5589 struct net *net = SVC_NET(rqstp); 5608 struct net *net = SVC_NET(rqstp);
@@ -5591,10 +5610,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
5591 struct nfs4_stid *s = NULL; 5610 struct nfs4_stid *s = NULL;
5592 __be32 status; 5611 __be32 status;
5593 5612
5594 if (filpp) 5613 if (nfp)
5595 *filpp = NULL; 5614 *nfp = NULL;
5596 if (tmp_file)
5597 *tmp_file = false;
5598 5615
5599 if (grace_disallows_io(net, ino)) 5616 if (grace_disallows_io(net, ino))
5600 return nfserr_grace; 5617 return nfserr_grace;
@@ -5631,8 +5648,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
5631 status = nfs4_check_fh(fhp, s); 5648 status = nfs4_check_fh(fhp, s);
5632 5649
5633done: 5650done:
5634 if (!status && filpp) 5651 if (status == nfs_ok && nfp)
5635 status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags); 5652 status = nfs4_check_file(rqstp, fhp, s, nfp, flags);
5636out: 5653out:
5637 if (s) 5654 if (s)
5638 nfs4_put_stid(s); 5655 nfs4_put_stid(s);
@@ -6392,7 +6409,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6392 struct nfs4_ol_stateid *lock_stp = NULL; 6409 struct nfs4_ol_stateid *lock_stp = NULL;
6393 struct nfs4_ol_stateid *open_stp = NULL; 6410 struct nfs4_ol_stateid *open_stp = NULL;
6394 struct nfs4_file *fp; 6411 struct nfs4_file *fp;
6395 struct file *filp = NULL; 6412 struct nfsd_file *nf = NULL;
6396 struct nfsd4_blocked_lock *nbl = NULL; 6413 struct nfsd4_blocked_lock *nbl = NULL;
6397 struct file_lock *file_lock = NULL; 6414 struct file_lock *file_lock = NULL;
6398 struct file_lock *conflock = NULL; 6415 struct file_lock *conflock = NULL;
@@ -6474,8 +6491,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6474 /* Fallthrough */ 6491 /* Fallthrough */
6475 case NFS4_READ_LT: 6492 case NFS4_READ_LT:
6476 spin_lock(&fp->fi_lock); 6493 spin_lock(&fp->fi_lock);
6477 filp = find_readable_file_locked(fp); 6494 nf = find_readable_file_locked(fp);
6478 if (filp) 6495 if (nf)
6479 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); 6496 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
6480 spin_unlock(&fp->fi_lock); 6497 spin_unlock(&fp->fi_lock);
6481 fl_type = F_RDLCK; 6498 fl_type = F_RDLCK;
@@ -6486,8 +6503,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6486 /* Fallthrough */ 6503 /* Fallthrough */
6487 case NFS4_WRITE_LT: 6504 case NFS4_WRITE_LT:
6488 spin_lock(&fp->fi_lock); 6505 spin_lock(&fp->fi_lock);
6489 filp = find_writeable_file_locked(fp); 6506 nf = find_writeable_file_locked(fp);
6490 if (filp) 6507 if (nf)
6491 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); 6508 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
6492 spin_unlock(&fp->fi_lock); 6509 spin_unlock(&fp->fi_lock);
6493 fl_type = F_WRLCK; 6510 fl_type = F_WRLCK;
@@ -6497,7 +6514,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6497 goto out; 6514 goto out;
6498 } 6515 }
6499 6516
6500 if (!filp) { 6517 if (!nf) {
6501 status = nfserr_openmode; 6518 status = nfserr_openmode;
6502 goto out; 6519 goto out;
6503 } 6520 }
@@ -6513,7 +6530,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6513 file_lock->fl_type = fl_type; 6530 file_lock->fl_type = fl_type;
6514 file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner)); 6531 file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
6515 file_lock->fl_pid = current->tgid; 6532 file_lock->fl_pid = current->tgid;
6516 file_lock->fl_file = filp; 6533 file_lock->fl_file = nf->nf_file;
6517 file_lock->fl_flags = fl_flags; 6534 file_lock->fl_flags = fl_flags;
6518 file_lock->fl_lmops = &nfsd_posix_mng_ops; 6535 file_lock->fl_lmops = &nfsd_posix_mng_ops;
6519 file_lock->fl_start = lock->lk_offset; 6536 file_lock->fl_start = lock->lk_offset;
@@ -6535,7 +6552,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6535 spin_unlock(&nn->blocked_locks_lock); 6552 spin_unlock(&nn->blocked_locks_lock);
6536 } 6553 }
6537 6554
6538 err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); 6555 err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, conflock);
6539 switch (err) { 6556 switch (err) {
6540 case 0: /* success! */ 6557 case 0: /* success! */
6541 nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); 6558 nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
@@ -6570,8 +6587,8 @@ out:
6570 } 6587 }
6571 free_blocked_lock(nbl); 6588 free_blocked_lock(nbl);
6572 } 6589 }
6573 if (filp) 6590 if (nf)
6574 fput(filp); 6591 nfsd_file_put(nf);
6575 if (lock_stp) { 6592 if (lock_stp) {
6576 /* Bump seqid manually if the 4.0 replay owner is openowner */ 6593 /* Bump seqid manually if the 4.0 replay owner is openowner */
6577 if (cstate->replay_owner && 6594 if (cstate->replay_owner &&
@@ -6606,11 +6623,11 @@ out:
6606 */ 6623 */
6607static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) 6624static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
6608{ 6625{
6609 struct file *file; 6626 struct nfsd_file *nf;
6610 __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); 6627 __be32 err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
6611 if (!err) { 6628 if (!err) {
6612 err = nfserrno(vfs_test_lock(file, lock)); 6629 err = nfserrno(vfs_test_lock(nf->nf_file, lock));
6613 fput(file); 6630 nfsd_file_put(nf);
6614 } 6631 }
6615 return err; 6632 return err;
6616} 6633}
@@ -6698,7 +6715,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6698{ 6715{
6699 struct nfsd4_locku *locku = &u->locku; 6716 struct nfsd4_locku *locku = &u->locku;
6700 struct nfs4_ol_stateid *stp; 6717 struct nfs4_ol_stateid *stp;
6701 struct file *filp = NULL; 6718 struct nfsd_file *nf = NULL;
6702 struct file_lock *file_lock = NULL; 6719 struct file_lock *file_lock = NULL;
6703 __be32 status; 6720 __be32 status;
6704 int err; 6721 int err;
@@ -6716,8 +6733,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6716 &stp, nn); 6733 &stp, nn);
6717 if (status) 6734 if (status)
6718 goto out; 6735 goto out;
6719 filp = find_any_file(stp->st_stid.sc_file); 6736 nf = find_any_file(stp->st_stid.sc_file);
6720 if (!filp) { 6737 if (!nf) {
6721 status = nfserr_lock_range; 6738 status = nfserr_lock_range;
6722 goto put_stateid; 6739 goto put_stateid;
6723 } 6740 }
@@ -6725,13 +6742,13 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6725 if (!file_lock) { 6742 if (!file_lock) {
6726 dprintk("NFSD: %s: unable to allocate lock!\n", __func__); 6743 dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
6727 status = nfserr_jukebox; 6744 status = nfserr_jukebox;
6728 goto fput; 6745 goto put_file;
6729 } 6746 }
6730 6747
6731 file_lock->fl_type = F_UNLCK; 6748 file_lock->fl_type = F_UNLCK;
6732 file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner)); 6749 file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
6733 file_lock->fl_pid = current->tgid; 6750 file_lock->fl_pid = current->tgid;
6734 file_lock->fl_file = filp; 6751 file_lock->fl_file = nf->nf_file;
6735 file_lock->fl_flags = FL_POSIX; 6752 file_lock->fl_flags = FL_POSIX;
6736 file_lock->fl_lmops = &nfsd_posix_mng_ops; 6753 file_lock->fl_lmops = &nfsd_posix_mng_ops;
6737 file_lock->fl_start = locku->lu_offset; 6754 file_lock->fl_start = locku->lu_offset;
@@ -6740,14 +6757,14 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6740 locku->lu_length); 6757 locku->lu_length);
6741 nfs4_transform_lock_offset(file_lock); 6758 nfs4_transform_lock_offset(file_lock);
6742 6759
6743 err = vfs_lock_file(filp, F_SETLK, file_lock, NULL); 6760 err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, NULL);
6744 if (err) { 6761 if (err) {
6745 dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); 6762 dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
6746 goto out_nfserr; 6763 goto out_nfserr;
6747 } 6764 }
6748 nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid); 6765 nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid);
6749fput: 6766put_file:
6750 fput(filp); 6767 nfsd_file_put(nf);
6751put_stateid: 6768put_stateid:
6752 mutex_unlock(&stp->st_mutex); 6769 mutex_unlock(&stp->st_mutex);
6753 nfs4_put_stid(&stp->st_stid); 6770 nfs4_put_stid(&stp->st_stid);
@@ -6759,7 +6776,7 @@ out:
6759 6776
6760out_nfserr: 6777out_nfserr:
6761 status = nfserrno(err); 6778 status = nfserrno(err);
6762 goto fput; 6779 goto put_file;
6763} 6780}
6764 6781
6765/* 6782/*
@@ -6772,17 +6789,17 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
6772{ 6789{
6773 struct file_lock *fl; 6790 struct file_lock *fl;
6774 int status = false; 6791 int status = false;
6775 struct file *filp = find_any_file(fp); 6792 struct nfsd_file *nf = find_any_file(fp);
6776 struct inode *inode; 6793 struct inode *inode;
6777 struct file_lock_context *flctx; 6794 struct file_lock_context *flctx;
6778 6795
6779 if (!filp) { 6796 if (!nf) {
6780 /* Any valid lock stateid should have some sort of access */ 6797 /* Any valid lock stateid should have some sort of access */
6781 WARN_ON_ONCE(1); 6798 WARN_ON_ONCE(1);
6782 return status; 6799 return status;
6783 } 6800 }
6784 6801
6785 inode = locks_inode(filp); 6802 inode = locks_inode(nf->nf_file);
6786 flctx = inode->i_flctx; 6803 flctx = inode->i_flctx;
6787 6804
6788 if (flctx && !list_empty_careful(&flctx->flc_posix)) { 6805 if (flctx && !list_empty_careful(&flctx->flc_posix)) {
@@ -6795,7 +6812,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
6795 } 6812 }
6796 spin_unlock(&flctx->flc_lock); 6813 spin_unlock(&flctx->flc_lock);
6797 } 6814 }
6798 fput(filp); 6815 nfsd_file_put(nf);
6799 return status; 6816 return status;
6800} 6817}
6801 6818
@@ -6888,7 +6905,8 @@ nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn)
6888 * will be freed in nfs4_remove_reclaim_record in the normal case). 6905 * will be freed in nfs4_remove_reclaim_record in the normal case).
6889 */ 6906 */
6890struct nfs4_client_reclaim * 6907struct nfs4_client_reclaim *
6891nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn) 6908nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
6909 struct nfsd_net *nn)
6892{ 6910{
6893 unsigned int strhashval; 6911 unsigned int strhashval;
6894 struct nfs4_client_reclaim *crp; 6912 struct nfs4_client_reclaim *crp;
@@ -6901,6 +6919,8 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn)
6901 list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]); 6919 list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
6902 crp->cr_name.data = name.data; 6920 crp->cr_name.data = name.data;
6903 crp->cr_name.len = name.len; 6921 crp->cr_name.len = name.len;
6922 crp->cr_princhash.data = princhash.data;
6923 crp->cr_princhash.len = princhash.len;
6904 crp->cr_clp = NULL; 6924 crp->cr_clp = NULL;
6905 nn->reclaim_str_hashtbl_size++; 6925 nn->reclaim_str_hashtbl_size++;
6906 } 6926 }
@@ -6912,6 +6932,7 @@ nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
6912{ 6932{
6913 list_del(&crp->cr_strhash); 6933 list_del(&crp->cr_strhash);
6914 kfree(crp->cr_name.data); 6934 kfree(crp->cr_name.data);
6935 kfree(crp->cr_princhash.data);
6915 kfree(crp); 6936 kfree(crp);
6916 nn->reclaim_str_hashtbl_size--; 6937 nn->reclaim_str_hashtbl_size--;
6917} 6938}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 442811809f3d..533d0fc3c96b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -49,6 +49,7 @@
49#include "cache.h" 49#include "cache.h"
50#include "netns.h" 50#include "netns.h"
51#include "pnfs.h" 51#include "pnfs.h"
52#include "filecache.h"
52 53
53#ifdef CONFIG_NFSD_V4_SECURITY_LABEL 54#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
54#include <linux/security.h> 55#include <linux/security.h>
@@ -203,6 +204,13 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
203 return p; 204 return p;
204} 205}
205 206
207static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp)
208{
209 unsigned int this = (char *)argp->end - (char *)argp->p;
210
211 return this + argp->pagelen;
212}
213
206static int zero_clientid(clientid_t *clid) 214static int zero_clientid(clientid_t *clid)
207{ 215{
208 return (clid->cl_boot == 0) && (clid->cl_id == 0); 216 return (clid->cl_boot == 0) && (clid->cl_id == 0);
@@ -211,10 +219,10 @@ static int zero_clientid(clientid_t *clid)
211/** 219/**
212 * svcxdr_tmpalloc - allocate memory to be freed after compound processing 220 * svcxdr_tmpalloc - allocate memory to be freed after compound processing
213 * @argp: NFSv4 compound argument structure 221 * @argp: NFSv4 compound argument structure
214 * @p: pointer to be freed (with kfree()) 222 * @len: length of buffer to allocate
215 * 223 *
216 * Marks @p to be freed when processing the compound operation 224 * Allocates a buffer of size @len to be freed when processing the compound
217 * described in @argp finishes. 225 * operation described in @argp finishes.
218 */ 226 */
219static void * 227static void *
220svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len) 228svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
@@ -347,7 +355,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
347 READ_BUF(4); len += 4; 355 READ_BUF(4); len += 4;
348 nace = be32_to_cpup(p++); 356 nace = be32_to_cpup(p++);
349 357
350 if (nace > NFS4_ACL_MAX) 358 if (nace > compoundargs_bytes_left(argp)/20)
359 /*
360 * Even with 4-byte names there wouldn't be
361 * space for that many aces; something fishy is
362 * going on:
363 */
351 return nfserr_fbig; 364 return nfserr_fbig;
352 365
353 *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace)); 366 *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
@@ -1418,7 +1431,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1418 struct nfsd4_create_session *sess) 1431 struct nfsd4_create_session *sess)
1419{ 1432{
1420 DECODE_HEAD; 1433 DECODE_HEAD;
1421 u32 dummy;
1422 1434
1423 READ_BUF(16); 1435 READ_BUF(16);
1424 COPYMEM(&sess->clientid, 8); 1436 COPYMEM(&sess->clientid, 8);
@@ -1427,7 +1439,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1427 1439
1428 /* Fore channel attrs */ 1440 /* Fore channel attrs */
1429 READ_BUF(28); 1441 READ_BUF(28);
1430 dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ 1442 p++; /* headerpadsz is always 0 */
1431 sess->fore_channel.maxreq_sz = be32_to_cpup(p++); 1443 sess->fore_channel.maxreq_sz = be32_to_cpup(p++);
1432 sess->fore_channel.maxresp_sz = be32_to_cpup(p++); 1444 sess->fore_channel.maxresp_sz = be32_to_cpup(p++);
1433 sess->fore_channel.maxresp_cached = be32_to_cpup(p++); 1445 sess->fore_channel.maxresp_cached = be32_to_cpup(p++);
@@ -1444,7 +1456,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1444 1456
1445 /* Back channel attrs */ 1457 /* Back channel attrs */
1446 READ_BUF(28); 1458 READ_BUF(28);
1447 dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ 1459 p++; /* headerpadsz is always 0 */
1448 sess->back_channel.maxreq_sz = be32_to_cpup(p++); 1460 sess->back_channel.maxreq_sz = be32_to_cpup(p++);
1449 sess->back_channel.maxresp_sz = be32_to_cpup(p++); 1461 sess->back_channel.maxresp_sz = be32_to_cpup(p++);
1450 sess->back_channel.maxresp_cached = be32_to_cpup(p++); 1462 sess->back_channel.maxresp_cached = be32_to_cpup(p++);
@@ -1736,7 +1748,6 @@ static __be32
1736nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) 1748nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
1737{ 1749{
1738 DECODE_HEAD; 1750 DECODE_HEAD;
1739 unsigned int tmp;
1740 1751
1741 status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid); 1752 status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
1742 if (status) 1753 if (status)
@@ -1751,7 +1762,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
1751 p = xdr_decode_hyper(p, &copy->cp_count); 1762 p = xdr_decode_hyper(p, &copy->cp_count);
1752 p++; /* ca_consecutive: we always do consecutive copies */ 1763 p++; /* ca_consecutive: we always do consecutive copies */
1753 copy->cp_synchronous = be32_to_cpup(p++); 1764 copy->cp_synchronous = be32_to_cpup(p++);
1754 tmp = be32_to_cpup(p); /* Source server list not supported */ 1765 /* tmp = be32_to_cpup(p); Source server list not supported */
1755 1766
1756 DECODE_TAIL; 1767 DECODE_TAIL;
1757} 1768}
@@ -3217,9 +3228,8 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
3217 if (!p) 3228 if (!p)
3218 return nfserr_resource; 3229 return nfserr_resource;
3219 encode_cinfo(p, &create->cr_cinfo); 3230 encode_cinfo(p, &create->cr_cinfo);
3220 nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0], 3231 return nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
3221 create->cr_bmval[1], create->cr_bmval[2]); 3232 create->cr_bmval[1], create->cr_bmval[2]);
3222 return 0;
3223} 3233}
3224 3234
3225static __be32 3235static __be32
@@ -3462,7 +3472,7 @@ static __be32 nfsd4_encode_splice_read(
3462 3472
3463 len = maxcount; 3473 len = maxcount;
3464 nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, 3474 nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
3465 file, read->rd_offset, &maxcount); 3475 file, read->rd_offset, &maxcount, &eof);
3466 read->rd_length = maxcount; 3476 read->rd_length = maxcount;
3467 if (nfserr) { 3477 if (nfserr) {
3468 /* 3478 /*
@@ -3474,9 +3484,6 @@ static __be32 nfsd4_encode_splice_read(
3474 return nfserr; 3484 return nfserr;
3475 } 3485 }
3476 3486
3477 eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
3478 d_inode(read->rd_fhp->fh_dentry)->i_size);
3479
3480 *(p++) = htonl(eof); 3487 *(p++) = htonl(eof);
3481 *(p++) = htonl(maxcount); 3488 *(p++) = htonl(maxcount);
3482 3489
@@ -3547,15 +3554,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
3547 3554
3548 len = maxcount; 3555 len = maxcount;
3549 nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, 3556 nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
3550 resp->rqstp->rq_vec, read->rd_vlen, &maxcount); 3557 resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
3558 &eof);
3551 read->rd_length = maxcount; 3559 read->rd_length = maxcount;
3552 if (nfserr) 3560 if (nfserr)
3553 return nfserr; 3561 return nfserr;
3554 xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); 3562 xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
3555 3563
3556 eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
3557 d_inode(read->rd_fhp->fh_dentry)->i_size);
3558
3559 tmp = htonl(eof); 3564 tmp = htonl(eof);
3560 write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); 3565 write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
3561 tmp = htonl(maxcount); 3566 tmp = htonl(maxcount);
@@ -3574,11 +3579,14 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
3574{ 3579{
3575 unsigned long maxcount; 3580 unsigned long maxcount;
3576 struct xdr_stream *xdr = &resp->xdr; 3581 struct xdr_stream *xdr = &resp->xdr;
3577 struct file *file = read->rd_filp; 3582 struct file *file;
3578 int starting_len = xdr->buf->len; 3583 int starting_len = xdr->buf->len;
3579 struct raparms *ra = NULL;
3580 __be32 *p; 3584 __be32 *p;
3581 3585
3586 if (nfserr)
3587 return nfserr;
3588 file = read->rd_nf->nf_file;
3589
3582 p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ 3590 p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
3583 if (!p) { 3591 if (!p) {
3584 WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); 3592 WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
@@ -3596,18 +3604,12 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
3596 (xdr->buf->buflen - xdr->buf->len)); 3604 (xdr->buf->buflen - xdr->buf->len));
3597 maxcount = min_t(unsigned long, maxcount, read->rd_length); 3605 maxcount = min_t(unsigned long, maxcount, read->rd_length);
3598 3606
3599 if (read->rd_tmp_file)
3600 ra = nfsd_init_raparms(file);
3601
3602 if (file->f_op->splice_read && 3607 if (file->f_op->splice_read &&
3603 test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) 3608 test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
3604 nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); 3609 nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
3605 else 3610 else
3606 nfserr = nfsd4_encode_readv(resp, read, file, maxcount); 3611 nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
3607 3612
3608 if (ra)
3609 nfsd_put_raparams(file, ra);
3610
3611 if (nfserr) 3613 if (nfserr)
3612 xdr_truncate_encode(xdr, starting_len); 3614 xdr_truncate_encode(xdr, starting_len);
3613 3615
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 2c215171c0eb..11b42c523f04 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1476,6 +1476,7 @@ static __net_init int nfsd_init_net(struct net *net)
1476 1476
1477 atomic_set(&nn->ntf_refcnt, 0); 1477 atomic_set(&nn->ntf_refcnt, 0);
1478 init_waitqueue_head(&nn->ntf_wq); 1478 init_waitqueue_head(&nn->ntf_wq);
1479 seqlock_init(&nn->boot_lock);
1479 1480
1480 mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL); 1481 mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
1481 if (IS_ERR(mnt)) { 1482 if (IS_ERR(mnt)) {
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0d20fd161225..c83ddac22f38 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -172,6 +172,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
172 struct nfsd_readargs *argp = rqstp->rq_argp; 172 struct nfsd_readargs *argp = rqstp->rq_argp;
173 struct nfsd_readres *resp = rqstp->rq_resp; 173 struct nfsd_readres *resp = rqstp->rq_resp;
174 __be32 nfserr; 174 __be32 nfserr;
175 u32 eof;
175 176
176 dprintk("nfsd: READ %s %d bytes at %d\n", 177 dprintk("nfsd: READ %s %d bytes at %d\n",
177 SVCFH_fmt(&argp->fh), 178 SVCFH_fmt(&argp->fh),
@@ -195,7 +196,8 @@ nfsd_proc_read(struct svc_rqst *rqstp)
195 nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), 196 nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
196 argp->offset, 197 argp->offset,
197 rqstp->rq_vec, argp->vlen, 198 rqstp->rq_vec, argp->vlen,
198 &resp->count); 199 &resp->count,
200 &eof);
199 201
200 if (nfserr) return nfserr; 202 if (nfserr) return nfserr;
201 return fh_getattr(&resp->fh, &resp->stat); 203 return fh_getattr(&resp->fh, &resp->stat);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 18d94ea984ba..fdf7ed4bd5dd 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -27,6 +27,7 @@
27#include "cache.h" 27#include "cache.h"
28#include "vfs.h" 28#include "vfs.h"
29#include "netns.h" 29#include "netns.h"
30#include "filecache.h"
30 31
31#define NFSDDBG_FACILITY NFSDDBG_SVC 32#define NFSDDBG_FACILITY NFSDDBG_SVC
32 33
@@ -313,22 +314,17 @@ static int nfsd_startup_generic(int nrservs)
313 if (nfsd_users++) 314 if (nfsd_users++)
314 return 0; 315 return 0;
315 316
316 /* 317 ret = nfsd_file_cache_init();
317 * Readahead param cache - will no-op if it already exists.
318 * (Note therefore results will be suboptimal if number of
319 * threads is modified after nfsd start.)
320 */
321 ret = nfsd_racache_init(2*nrservs);
322 if (ret) 318 if (ret)
323 goto dec_users; 319 goto dec_users;
324 320
325 ret = nfs4_state_start(); 321 ret = nfs4_state_start();
326 if (ret) 322 if (ret)
327 goto out_racache; 323 goto out_file_cache;
328 return 0; 324 return 0;
329 325
330out_racache: 326out_file_cache:
331 nfsd_racache_shutdown(); 327 nfsd_file_cache_shutdown();
332dec_users: 328dec_users:
333 nfsd_users--; 329 nfsd_users--;
334 return ret; 330 return ret;
@@ -340,7 +336,7 @@ static void nfsd_shutdown_generic(void)
340 return; 336 return;
341 337
342 nfs4_state_shutdown(); 338 nfs4_state_shutdown();
343 nfsd_racache_shutdown(); 339 nfsd_file_cache_shutdown();
344} 340}
345 341
346static bool nfsd_needs_lockd(struct nfsd_net *nn) 342static bool nfsd_needs_lockd(struct nfsd_net *nn)
@@ -348,6 +344,35 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
348 return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST); 344 return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
349} 345}
350 346
347void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
348{
349 int seq = 0;
350
351 do {
352 read_seqbegin_or_lock(&nn->boot_lock, &seq);
353 /*
354 * This is opaque to client, so no need to byte-swap. Use
355 * __force to keep sparse happy. y2038 time_t overflow is
356 * irrelevant in this usage
357 */
358 verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
359 verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
360 } while (need_seqretry(&nn->boot_lock, seq));
361 done_seqretry(&nn->boot_lock, seq);
362}
363
364static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
365{
366 ktime_get_real_ts64(&nn->nfssvc_boot);
367}
368
369void nfsd_reset_boot_verifier(struct nfsd_net *nn)
370{
371 write_seqlock(&nn->boot_lock);
372 nfsd_reset_boot_verifier_locked(nn);
373 write_sequnlock(&nn->boot_lock);
374}
375
351static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred) 376static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred)
352{ 377{
353 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 378 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -391,6 +416,7 @@ static void nfsd_shutdown_net(struct net *net)
391{ 416{
392 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 417 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
393 418
419 nfsd_file_cache_purge(net);
394 nfs4_state_shutdown_net(net); 420 nfs4_state_shutdown_net(net);
395 if (nn->lockd_up) { 421 if (nn->lockd_up) {
396 lockd_down(net); 422 lockd_down(net);
@@ -599,7 +625,7 @@ int nfsd_create_serv(struct net *net)
599#endif 625#endif
600 } 626 }
601 atomic_inc(&nn->ntf_refcnt); 627 atomic_inc(&nn->ntf_refcnt);
602 ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */ 628 nfsd_reset_boot_verifier(nn);
603 return 0; 629 return 0;
604} 630}
605 631
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 5dbd16946e8e..46f56afb6cb8 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -378,6 +378,7 @@ struct nfs4_client_reclaim {
378 struct list_head cr_strhash; /* hash by cr_name */ 378 struct list_head cr_strhash; /* hash by cr_name */
379 struct nfs4_client *cr_clp; /* pointer to associated clp */ 379 struct nfs4_client *cr_clp; /* pointer to associated clp */
380 struct xdr_netobj cr_name; /* recovery dir name */ 380 struct xdr_netobj cr_name; /* recovery dir name */
381 struct xdr_netobj cr_princhash;
381}; 382};
382 383
383/* A reasonable value for REPLAY_ISIZE was estimated as follows: 384/* A reasonable value for REPLAY_ISIZE was estimated as follows:
@@ -506,7 +507,7 @@ struct nfs4_file {
506 }; 507 };
507 struct list_head fi_clnt_odstate; 508 struct list_head fi_clnt_odstate;
508 /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ 509 /* One each for O_RDONLY, O_WRONLY, O_RDWR: */
509 struct file * fi_fds[3]; 510 struct nfsd_file *fi_fds[3];
510 /* 511 /*
511 * Each open or lock stateid contributes 0-4 to the counts 512 * Each open or lock stateid contributes 0-4 to the counts
512 * below depending on which bits are set in st_access_bitmap: 513 * below depending on which bits are set in st_access_bitmap:
@@ -516,7 +517,7 @@ struct nfs4_file {
516 */ 517 */
517 atomic_t fi_access[2]; 518 atomic_t fi_access[2];
518 u32 fi_share_deny; 519 u32 fi_share_deny;
519 struct file *fi_deleg_file; 520 struct nfsd_file *fi_deleg_file;
520 int fi_delegees; 521 int fi_delegees;
521 struct knfsd_fh fi_fhandle; 522 struct knfsd_fh fi_fhandle;
522 bool fi_had_conflict; 523 bool fi_had_conflict;
@@ -565,7 +566,7 @@ struct nfs4_layout_stateid {
565 spinlock_t ls_lock; 566 spinlock_t ls_lock;
566 struct list_head ls_layouts; 567 struct list_head ls_layouts;
567 u32 ls_layout_type; 568 u32 ls_layout_type;
568 struct file *ls_file; 569 struct nfsd_file *ls_file;
569 struct nfsd4_callback ls_recall; 570 struct nfsd4_callback ls_recall;
570 stateid_t ls_recall_sid; 571 stateid_t ls_recall_sid;
571 bool ls_recalled; 572 bool ls_recalled;
@@ -616,7 +617,7 @@ struct nfsd4_copy;
616 617
617extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, 618extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
618 struct nfsd4_compound_state *cstate, struct svc_fh *fhp, 619 struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
619 stateid_t *stateid, int flags, struct file **filp, bool *tmp_file); 620 stateid_t *stateid, int flags, struct nfsd_file **filp);
620__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, 621__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
621 stateid_t *stateid, unsigned char typemask, 622 stateid_t *stateid, unsigned char typemask,
622 struct nfs4_stid **s, struct nfsd_net *nn); 623 struct nfs4_stid **s, struct nfsd_net *nn);
@@ -645,7 +646,7 @@ extern void nfsd4_shutdown_callback(struct nfs4_client *);
645extern void nfsd4_shutdown_copy(struct nfs4_client *clp); 646extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
646extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); 647extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
647extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, 648extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
648 struct nfsd_net *nn); 649 struct xdr_netobj princhash, struct nfsd_net *nn);
649extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); 650extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
650 651
651struct nfs4_file *find_file(struct knfsd_fh *fh); 652struct nfs4_file *find_file(struct knfsd_fh *fh);
@@ -657,7 +658,7 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
657{ 658{
658 refcount_inc(&fi->fi_ref); 659 refcount_inc(&fi->fi_ref);
659} 660}
660struct file *find_any_file(struct nfs4_file *f); 661struct nfsd_file *find_any_file(struct nfs4_file *f);
661 662
662/* grace period management */ 663/* grace period management */
663void nfsd4_end_grace(struct nfsd_net *nn); 664void nfsd4_end_grace(struct nfsd_net *nn);
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 80933e4334d8..ffc78a0e28b2 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -126,6 +126,8 @@ DEFINE_NFSD_ERR_EVENT(read_err);
126DEFINE_NFSD_ERR_EVENT(write_err); 126DEFINE_NFSD_ERR_EVENT(write_err);
127 127
128#include "state.h" 128#include "state.h"
129#include "filecache.h"
130#include "vfs.h"
129 131
130DECLARE_EVENT_CLASS(nfsd_stateid_class, 132DECLARE_EVENT_CLASS(nfsd_stateid_class,
131 TP_PROTO(stateid_t *stp), 133 TP_PROTO(stateid_t *stp),
@@ -164,6 +166,144 @@ DEFINE_STATEID_EVENT(layout_recall_done);
164DEFINE_STATEID_EVENT(layout_recall_fail); 166DEFINE_STATEID_EVENT(layout_recall_fail);
165DEFINE_STATEID_EVENT(layout_recall_release); 167DEFINE_STATEID_EVENT(layout_recall_release);
166 168
169#define show_nf_flags(val) \
170 __print_flags(val, "|", \
171 { 1 << NFSD_FILE_HASHED, "HASHED" }, \
172 { 1 << NFSD_FILE_PENDING, "PENDING" }, \
173 { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \
174 { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \
175 { 1 << NFSD_FILE_REFERENCED, "REFERENCED"})
176
177/* FIXME: This should probably be fleshed out in the future. */
178#define show_nf_may(val) \
179 __print_flags(val, "|", \
180 { NFSD_MAY_READ, "READ" }, \
181 { NFSD_MAY_WRITE, "WRITE" }, \
182 { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" })
183
184DECLARE_EVENT_CLASS(nfsd_file_class,
185 TP_PROTO(struct nfsd_file *nf),
186 TP_ARGS(nf),
187 TP_STRUCT__entry(
188 __field(unsigned int, nf_hashval)
189 __field(void *, nf_inode)
190 __field(int, nf_ref)
191 __field(unsigned long, nf_flags)
192 __field(unsigned char, nf_may)
193 __field(struct file *, nf_file)
194 ),
195 TP_fast_assign(
196 __entry->nf_hashval = nf->nf_hashval;
197 __entry->nf_inode = nf->nf_inode;
198 __entry->nf_ref = atomic_read(&nf->nf_ref);
199 __entry->nf_flags = nf->nf_flags;
200 __entry->nf_may = nf->nf_may;
201 __entry->nf_file = nf->nf_file;
202 ),
203 TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p",
204 __entry->nf_hashval,
205 __entry->nf_inode,
206 __entry->nf_ref,
207 show_nf_flags(__entry->nf_flags),
208 show_nf_may(__entry->nf_may),
209 __entry->nf_file)
210)
211
212#define DEFINE_NFSD_FILE_EVENT(name) \
213DEFINE_EVENT(nfsd_file_class, name, \
214 TP_PROTO(struct nfsd_file *nf), \
215 TP_ARGS(nf))
216
217DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
218DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
219DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
220DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
221DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
222
223TRACE_EVENT(nfsd_file_acquire,
224 TP_PROTO(struct svc_rqst *rqstp, unsigned int hash,
225 struct inode *inode, unsigned int may_flags,
226 struct nfsd_file *nf, __be32 status),
227
228 TP_ARGS(rqstp, hash, inode, may_flags, nf, status),
229
230 TP_STRUCT__entry(
231 __field(__be32, xid)
232 __field(unsigned int, hash)
233 __field(void *, inode)
234 __field(unsigned int, may_flags)
235 __field(int, nf_ref)
236 __field(unsigned long, nf_flags)
237 __field(unsigned char, nf_may)
238 __field(struct file *, nf_file)
239 __field(__be32, status)
240 ),
241
242 TP_fast_assign(
243 __entry->xid = rqstp->rq_xid;
244 __entry->hash = hash;
245 __entry->inode = inode;
246 __entry->may_flags = may_flags;
247 __entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0;
248 __entry->nf_flags = nf ? nf->nf_flags : 0;
249 __entry->nf_may = nf ? nf->nf_may : 0;
250 __entry->nf_file = nf ? nf->nf_file : NULL;
251 __entry->status = status;
252 ),
253
254 TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
255 be32_to_cpu(__entry->xid), __entry->hash, __entry->inode,
256 show_nf_may(__entry->may_flags), __entry->nf_ref,
257 show_nf_flags(__entry->nf_flags),
258 show_nf_may(__entry->nf_may), __entry->nf_file,
259 be32_to_cpu(__entry->status))
260);
261
262DECLARE_EVENT_CLASS(nfsd_file_search_class,
263 TP_PROTO(struct inode *inode, unsigned int hash, int found),
264 TP_ARGS(inode, hash, found),
265 TP_STRUCT__entry(
266 __field(struct inode *, inode)
267 __field(unsigned int, hash)
268 __field(int, found)
269 ),
270 TP_fast_assign(
271 __entry->inode = inode;
272 __entry->hash = hash;
273 __entry->found = found;
274 ),
275 TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash,
276 __entry->inode, __entry->found)
277);
278
279#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \
280DEFINE_EVENT(nfsd_file_search_class, name, \
281 TP_PROTO(struct inode *inode, unsigned int hash, int found), \
282 TP_ARGS(inode, hash, found))
283
284DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
285DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
286DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached);
287
288TRACE_EVENT(nfsd_file_fsnotify_handle_event,
289 TP_PROTO(struct inode *inode, u32 mask),
290 TP_ARGS(inode, mask),
291 TP_STRUCT__entry(
292 __field(struct inode *, inode)
293 __field(unsigned int, nlink)
294 __field(umode_t, mode)
295 __field(u32, mask)
296 ),
297 TP_fast_assign(
298 __entry->inode = inode;
299 __entry->nlink = inode->i_nlink;
300 __entry->mode = inode->i_mode;
301 __entry->mask = mask;
302 ),
303 TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
304 __entry->nlink, __entry->mode, __entry->mask)
305);
306
167#endif /* _NFSD_TRACE_H */ 307#endif /* _NFSD_TRACE_H */
168 308
169#undef TRACE_INCLUDE_PATH 309#undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c85783e536d5..bd0a385df3fc 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -44,38 +44,11 @@
44 44
45#include "nfsd.h" 45#include "nfsd.h"
46#include "vfs.h" 46#include "vfs.h"
47#include "filecache.h"
47#include "trace.h" 48#include "trace.h"
48 49
49#define NFSDDBG_FACILITY NFSDDBG_FILEOP 50#define NFSDDBG_FACILITY NFSDDBG_FILEOP
50 51
51
52/*
53 * This is a cache of readahead params that help us choose the proper
54 * readahead strategy. Initially, we set all readahead parameters to 0
55 * and let the VFS handle things.
56 * If you increase the number of cached files very much, you'll need to
57 * add a hash table here.
58 */
59struct raparms {
60 struct raparms *p_next;
61 unsigned int p_count;
62 ino_t p_ino;
63 dev_t p_dev;
64 int p_set;
65 struct file_ra_state p_ra;
66 unsigned int p_hindex;
67};
68
69struct raparm_hbucket {
70 struct raparms *pb_head;
71 spinlock_t pb_lock;
72} ____cacheline_aligned_in_smp;
73
74#define RAPARM_HASH_BITS 4
75#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
76#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
77static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
78
79/* 52/*
80 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 53 * Called from nfsd_lookup and encode_dirent. Check if we have crossed
81 * a mount point. 54 * a mount point.
@@ -699,7 +672,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
699} 672}
700#endif /* CONFIG_NFSD_V3 */ 673#endif /* CONFIG_NFSD_V3 */
701 674
702static int nfsd_open_break_lease(struct inode *inode, int access) 675int nfsd_open_break_lease(struct inode *inode, int access)
703{ 676{
704 unsigned int mode; 677 unsigned int mode;
705 678
@@ -715,8 +688,8 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
715 * and additional flags. 688 * and additional flags.
716 * N.B. After this call fhp needs an fh_put 689 * N.B. After this call fhp needs an fh_put
717 */ 690 */
718__be32 691static __be32
719nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, 692__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
720 int may_flags, struct file **filp) 693 int may_flags, struct file **filp)
721{ 694{
722 struct path path; 695 struct path path;
@@ -726,25 +699,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
726 __be32 err; 699 __be32 err;
727 int host_err = 0; 700 int host_err = 0;
728 701
729 validate_process_creds();
730
731 /*
732 * If we get here, then the client has already done an "open",
733 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
734 * in case a chmod has now revoked permission.
735 *
736 * Arguably we should also allow the owner override for
737 * directories, but we never have and it doesn't seem to have
738 * caused anyone a problem. If we were to change this, note
739 * also that our filldir callbacks would need a variant of
740 * lookup_one_len that doesn't check permissions.
741 */
742 if (type == S_IFREG)
743 may_flags |= NFSD_MAY_OWNER_OVERRIDE;
744 err = fh_verify(rqstp, fhp, type, may_flags);
745 if (err)
746 goto out;
747
748 path.mnt = fhp->fh_export->ex_path.mnt; 702 path.mnt = fhp->fh_export->ex_path.mnt;
749 path.dentry = fhp->fh_dentry; 703 path.dentry = fhp->fh_dentry;
750 inode = d_inode(path.dentry); 704 inode = d_inode(path.dentry);
@@ -798,67 +752,46 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
798out_nfserr: 752out_nfserr:
799 err = nfserrno(host_err); 753 err = nfserrno(host_err);
800out: 754out:
801 validate_process_creds();
802 return err; 755 return err;
803} 756}
804 757
805struct raparms * 758__be32
806nfsd_init_raparms(struct file *file) 759nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
760 int may_flags, struct file **filp)
807{ 761{
808 struct inode *inode = file_inode(file); 762 __be32 err;
809 dev_t dev = inode->i_sb->s_dev;
810 ino_t ino = inode->i_ino;
811 struct raparms *ra, **rap, **frap = NULL;
812 int depth = 0;
813 unsigned int hash;
814 struct raparm_hbucket *rab;
815
816 hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
817 rab = &raparm_hash[hash];
818
819 spin_lock(&rab->pb_lock);
820 for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
821 if (ra->p_ino == ino && ra->p_dev == dev)
822 goto found;
823 depth++;
824 if (ra->p_count == 0)
825 frap = rap;
826 }
827 depth = nfsdstats.ra_size;
828 if (!frap) {
829 spin_unlock(&rab->pb_lock);
830 return NULL;
831 }
832 rap = frap;
833 ra = *frap;
834 ra->p_dev = dev;
835 ra->p_ino = ino;
836 ra->p_set = 0;
837 ra->p_hindex = hash;
838found:
839 if (rap != &rab->pb_head) {
840 *rap = ra->p_next;
841 ra->p_next = rab->pb_head;
842 rab->pb_head = ra;
843 }
844 ra->p_count++;
845 nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
846 spin_unlock(&rab->pb_lock);
847 763
848 if (ra->p_set) 764 validate_process_creds();
849 file->f_ra = ra->p_ra; 765 /*
850 return ra; 766 * If we get here, then the client has already done an "open",
767 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
768 * in case a chmod has now revoked permission.
769 *
770 * Arguably we should also allow the owner override for
771 * directories, but we never have and it doesn't seem to have
772 * caused anyone a problem. If we were to change this, note
773 * also that our filldir callbacks would need a variant of
774 * lookup_one_len that doesn't check permissions.
775 */
776 if (type == S_IFREG)
777 may_flags |= NFSD_MAY_OWNER_OVERRIDE;
778 err = fh_verify(rqstp, fhp, type, may_flags);
779 if (!err)
780 err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
781 validate_process_creds();
782 return err;
851} 783}
852 784
853void nfsd_put_raparams(struct file *file, struct raparms *ra) 785__be32
786nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
787 int may_flags, struct file **filp)
854{ 788{
855 struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; 789 __be32 err;
856 790
857 spin_lock(&rab->pb_lock); 791 validate_process_creds();
858 ra->p_ra = file->f_ra; 792 err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
859 ra->p_set = 1; 793 validate_process_creds();
860 ra->p_count--; 794 return err;
861 spin_unlock(&rab->pb_lock);
862} 795}
863 796
864/* 797/*
@@ -901,12 +834,23 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
901 return __splice_from_pipe(pipe, sd, nfsd_splice_actor); 834 return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
902} 835}
903 836
837static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len,
838 size_t expected)
839{
840 if (expected != 0 && len == 0)
841 return 1;
842 if (offset+len >= i_size_read(file_inode(file)))
843 return 1;
844 return 0;
845}
846
904static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, 847static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
905 struct file *file, loff_t offset, 848 struct file *file, loff_t offset,
906 unsigned long *count, int host_err) 849 unsigned long *count, u32 *eof, ssize_t host_err)
907{ 850{
908 if (host_err >= 0) { 851 if (host_err >= 0) {
909 nfsdstats.io_read += host_err; 852 nfsdstats.io_read += host_err;
853 *eof = nfsd_eof_on_read(file, offset, host_err, *count);
910 *count = host_err; 854 *count = host_err;
911 fsnotify_access(file); 855 fsnotify_access(file);
912 trace_nfsd_read_io_done(rqstp, fhp, offset, *count); 856 trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
@@ -918,7 +862,8 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
918} 862}
919 863
920__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, 864__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
921 struct file *file, loff_t offset, unsigned long *count) 865 struct file *file, loff_t offset, unsigned long *count,
866 u32 *eof)
922{ 867{
923 struct splice_desc sd = { 868 struct splice_desc sd = {
924 .len = 0, 869 .len = 0,
@@ -926,25 +871,27 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
926 .pos = offset, 871 .pos = offset,
927 .u.data = rqstp, 872 .u.data = rqstp,
928 }; 873 };
929 int host_err; 874 ssize_t host_err;
930 875
931 trace_nfsd_read_splice(rqstp, fhp, offset, *count); 876 trace_nfsd_read_splice(rqstp, fhp, offset, *count);
932 rqstp->rq_next_page = rqstp->rq_respages + 1; 877 rqstp->rq_next_page = rqstp->rq_respages + 1;
933 host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); 878 host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
934 return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err); 879 return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
935} 880}
936 881
937__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, 882__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
938 struct file *file, loff_t offset, 883 struct file *file, loff_t offset,
939 struct kvec *vec, int vlen, unsigned long *count) 884 struct kvec *vec, int vlen, unsigned long *count,
885 u32 *eof)
940{ 886{
941 struct iov_iter iter; 887 struct iov_iter iter;
942 int host_err; 888 loff_t ppos = offset;
889 ssize_t host_err;
943 890
944 trace_nfsd_read_vector(rqstp, fhp, offset, *count); 891 trace_nfsd_read_vector(rqstp, fhp, offset, *count);
945 iov_iter_kvec(&iter, READ, vec, vlen, *count); 892 iov_iter_kvec(&iter, READ, vec, vlen, *count);
946 host_err = vfs_iter_read(file, &iter, &offset, 0); 893 host_err = vfs_iter_read(file, &iter, &ppos, 0);
947 return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err); 894 return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
948} 895}
949 896
950/* 897/*
@@ -1025,8 +972,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1025 nfsdstats.io_write += *cnt; 972 nfsdstats.io_write += *cnt;
1026 fsnotify_modify(file); 973 fsnotify_modify(file);
1027 974
1028 if (stable && use_wgather) 975 if (stable && use_wgather) {
1029 host_err = wait_for_concurrent_writes(file); 976 host_err = wait_for_concurrent_writes(file);
977 if (host_err < 0)
978 nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
979 nfsd_net_id));
980 }
1030 981
1031out_nfserr: 982out_nfserr:
1032 if (host_err >= 0) { 983 if (host_err >= 0) {
@@ -1047,27 +998,25 @@ out_nfserr:
1047 * N.B. After this call fhp needs an fh_put 998 * N.B. After this call fhp needs an fh_put
1048 */ 999 */
1049__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, 1000__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
1050 loff_t offset, struct kvec *vec, int vlen, unsigned long *count) 1001 loff_t offset, struct kvec *vec, int vlen, unsigned long *count,
1002 u32 *eof)
1051{ 1003{
1004 struct nfsd_file *nf;
1052 struct file *file; 1005 struct file *file;
1053 struct raparms *ra;
1054 __be32 err; 1006 __be32 err;
1055 1007
1056 trace_nfsd_read_start(rqstp, fhp, offset, *count); 1008 trace_nfsd_read_start(rqstp, fhp, offset, *count);
1057 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); 1009 err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
1058 if (err) 1010 if (err)
1059 return err; 1011 return err;
1060 1012
1061 ra = nfsd_init_raparms(file); 1013 file = nf->nf_file;
1062
1063 if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags)) 1014 if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
1064 err = nfsd_splice_read(rqstp, fhp, file, offset, count); 1015 err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
1065 else 1016 else
1066 err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count); 1017 err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof);
1067 1018
1068 if (ra) 1019 nfsd_file_put(nf);
1069 nfsd_put_raparams(file, ra);
1070 fput(file);
1071 1020
1072 trace_nfsd_read_done(rqstp, fhp, offset, *count); 1021 trace_nfsd_read_done(rqstp, fhp, offset, *count);
1073 1022
@@ -1083,17 +1032,18 @@ __be32
1083nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, 1032nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
1084 struct kvec *vec, int vlen, unsigned long *cnt, int stable) 1033 struct kvec *vec, int vlen, unsigned long *cnt, int stable)
1085{ 1034{
1086 struct file *file = NULL; 1035 struct nfsd_file *nf;
1087 __be32 err = 0; 1036 __be32 err;
1088 1037
1089 trace_nfsd_write_start(rqstp, fhp, offset, *cnt); 1038 trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
1090 1039
1091 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); 1040 err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf);
1092 if (err) 1041 if (err)
1093 goto out; 1042 goto out;
1094 1043
1095 err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable); 1044 err = nfsd_vfs_write(rqstp, fhp, nf->nf_file, offset, vec,
1096 fput(file); 1045 vlen, cnt, stable);
1046 nfsd_file_put(nf);
1097out: 1047out:
1098 trace_nfsd_write_done(rqstp, fhp, offset, *cnt); 1048 trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
1099 return err; 1049 return err;
@@ -1113,9 +1063,9 @@ __be32
1113nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, 1063nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
1114 loff_t offset, unsigned long count) 1064 loff_t offset, unsigned long count)
1115{ 1065{
1116 struct file *file; 1066 struct nfsd_file *nf;
1117 loff_t end = LLONG_MAX; 1067 loff_t end = LLONG_MAX;
1118 __be32 err = nfserr_inval; 1068 __be32 err = nfserr_inval;
1119 1069
1120 if (offset < 0) 1070 if (offset < 0)
1121 goto out; 1071 goto out;
@@ -1125,20 +1075,27 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
1125 goto out; 1075 goto out;
1126 } 1076 }
1127 1077
1128 err = nfsd_open(rqstp, fhp, S_IFREG, 1078 err = nfsd_file_acquire(rqstp, fhp,
1129 NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file); 1079 NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
1130 if (err) 1080 if (err)
1131 goto out; 1081 goto out;
1132 if (EX_ISSYNC(fhp->fh_export)) { 1082 if (EX_ISSYNC(fhp->fh_export)) {
1133 int err2 = vfs_fsync_range(file, offset, end, 0); 1083 int err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
1134 1084
1135 if (err2 != -EINVAL) 1085 switch (err2) {
1136 err = nfserrno(err2); 1086 case 0:
1137 else 1087 break;
1088 case -EINVAL:
1138 err = nfserr_notsupp; 1089 err = nfserr_notsupp;
1090 break;
1091 default:
1092 err = nfserrno(err2);
1093 nfsd_reset_boot_verifier(net_generic(nf->nf_net,
1094 nfsd_net_id));
1095 }
1139 } 1096 }
1140 1097
1141 fput(file); 1098 nfsd_file_put(nf);
1142out: 1099out:
1143 return err; 1100 return err;
1144} 1101}
@@ -1659,6 +1616,26 @@ out_nfserr:
1659 goto out_unlock; 1616 goto out_unlock;
1660} 1617}
1661 1618
1619static void
1620nfsd_close_cached_files(struct dentry *dentry)
1621{
1622 struct inode *inode = d_inode(dentry);
1623
1624 if (inode && S_ISREG(inode->i_mode))
1625 nfsd_file_close_inode_sync(inode);
1626}
1627
1628static bool
1629nfsd_has_cached_files(struct dentry *dentry)
1630{
1631 bool ret = false;
1632 struct inode *inode = d_inode(dentry);
1633
1634 if (inode && S_ISREG(inode->i_mode))
1635 ret = nfsd_file_is_cached(inode);
1636 return ret;
1637}
1638
1662/* 1639/*
1663 * Rename a file 1640 * Rename a file
1664 * N.B. After this call _both_ ffhp and tfhp need an fh_put 1641 * N.B. After this call _both_ ffhp and tfhp need an fh_put
@@ -1671,6 +1648,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1671 struct inode *fdir, *tdir; 1648 struct inode *fdir, *tdir;
1672 __be32 err; 1649 __be32 err;
1673 int host_err; 1650 int host_err;
1651 bool has_cached = false;
1674 1652
1675 err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); 1653 err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
1676 if (err) 1654 if (err)
@@ -1689,6 +1667,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1689 if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) 1667 if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
1690 goto out; 1668 goto out;
1691 1669
1670retry:
1692 host_err = fh_want_write(ffhp); 1671 host_err = fh_want_write(ffhp);
1693 if (host_err) { 1672 if (host_err) {
1694 err = nfserrno(host_err); 1673 err = nfserrno(host_err);
@@ -1728,11 +1707,16 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1728 if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) 1707 if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
1729 goto out_dput_new; 1708 goto out_dput_new;
1730 1709
1731 host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); 1710 if (nfsd_has_cached_files(ndentry)) {
1732 if (!host_err) { 1711 has_cached = true;
1733 host_err = commit_metadata(tfhp); 1712 goto out_dput_old;
1734 if (!host_err) 1713 } else {
1735 host_err = commit_metadata(ffhp); 1714 host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
1715 if (!host_err) {
1716 host_err = commit_metadata(tfhp);
1717 if (!host_err)
1718 host_err = commit_metadata(ffhp);
1719 }
1736 } 1720 }
1737 out_dput_new: 1721 out_dput_new:
1738 dput(ndentry); 1722 dput(ndentry);
@@ -1745,12 +1729,26 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1745 * as that would do the wrong thing if the two directories 1729 * as that would do the wrong thing if the two directories
1746 * were the same, so again we do it by hand. 1730 * were the same, so again we do it by hand.
1747 */ 1731 */
1748 fill_post_wcc(ffhp); 1732 if (!has_cached) {
1749 fill_post_wcc(tfhp); 1733 fill_post_wcc(ffhp);
1734 fill_post_wcc(tfhp);
1735 }
1750 unlock_rename(tdentry, fdentry); 1736 unlock_rename(tdentry, fdentry);
1751 ffhp->fh_locked = tfhp->fh_locked = false; 1737 ffhp->fh_locked = tfhp->fh_locked = false;
1752 fh_drop_write(ffhp); 1738 fh_drop_write(ffhp);
1753 1739
1740 /*
1741 * If the target dentry has cached open files, then we need to try to
1742 * close them prior to doing the rename. Flushing delayed fput
1743 * shouldn't be done with locks held however, so we delay it until this
1744 * point and then reattempt the whole shebang.
1745 */
1746 if (has_cached) {
1747 has_cached = false;
1748 nfsd_close_cached_files(ndentry);
1749 dput(ndentry);
1750 goto retry;
1751 }
1754out: 1752out:
1755 return err; 1753 return err;
1756} 1754}
@@ -1797,10 +1795,13 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1797 if (!type) 1795 if (!type)
1798 type = d_inode(rdentry)->i_mode & S_IFMT; 1796 type = d_inode(rdentry)->i_mode & S_IFMT;
1799 1797
1800 if (type != S_IFDIR) 1798 if (type != S_IFDIR) {
1799 nfsd_close_cached_files(rdentry);
1801 host_err = vfs_unlink(dirp, rdentry, NULL); 1800 host_err = vfs_unlink(dirp, rdentry, NULL);
1802 else 1801 } else {
1803 host_err = vfs_rmdir(dirp, rdentry); 1802 host_err = vfs_rmdir(dirp, rdentry);
1803 }
1804
1804 if (!host_err) 1805 if (!host_err)
1805 host_err = commit_metadata(fhp); 1806 host_err = commit_metadata(fhp);
1806 dput(rdentry); 1807 dput(rdentry);
@@ -2074,63 +2075,3 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2074 2075
2075 return err? nfserrno(err) : 0; 2076 return err? nfserrno(err) : 0;
2076} 2077}
2077
2078void
2079nfsd_racache_shutdown(void)
2080{
2081 struct raparms *raparm, *last_raparm;
2082 unsigned int i;
2083
2084 dprintk("nfsd: freeing readahead buffers.\n");
2085
2086 for (i = 0; i < RAPARM_HASH_SIZE; i++) {
2087 raparm = raparm_hash[i].pb_head;
2088 while(raparm) {
2089 last_raparm = raparm;
2090 raparm = raparm->p_next;
2091 kfree(last_raparm);
2092 }
2093 raparm_hash[i].pb_head = NULL;
2094 }
2095}
2096/*
2097 * Initialize readahead param cache
2098 */
2099int
2100nfsd_racache_init(int cache_size)
2101{
2102 int i;
2103 int j = 0;
2104 int nperbucket;
2105 struct raparms **raparm = NULL;
2106
2107
2108 if (raparm_hash[0].pb_head)
2109 return 0;
2110 nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
2111 nperbucket = max(2, nperbucket);
2112 cache_size = nperbucket * RAPARM_HASH_SIZE;
2113
2114 dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
2115
2116 for (i = 0; i < RAPARM_HASH_SIZE; i++) {
2117 spin_lock_init(&raparm_hash[i].pb_lock);
2118
2119 raparm = &raparm_hash[i].pb_head;
2120 for (j = 0; j < nperbucket; j++) {
2121 *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
2122 if (!*raparm)
2123 goto out_nomem;
2124 raparm = &(*raparm)->p_next;
2125 }
2126 *raparm = NULL;
2127 }
2128
2129 nfsdstats.ra_size = cache_size;
2130 return 0;
2131
2132out_nomem:
2133 dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
2134 nfsd_racache_shutdown();
2135 return -ENOMEM;
2136}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index db351247892d..a13fd9d7e1f5 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -40,8 +40,6 @@
40typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned); 40typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
41 41
42/* nfsd/vfs.c */ 42/* nfsd/vfs.c */
43int nfsd_racache_init(int);
44void nfsd_racache_shutdown(void);
45int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, 43int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
46 struct svc_export **expp); 44 struct svc_export **expp);
47__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, 45__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
@@ -75,18 +73,23 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
75__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, 73__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
76 loff_t, unsigned long); 74 loff_t, unsigned long);
77#endif /* CONFIG_NFSD_V3 */ 75#endif /* CONFIG_NFSD_V3 */
76int nfsd_open_break_lease(struct inode *, int);
78__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, 77__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
79 int, struct file **); 78 int, struct file **);
80struct raparms; 79__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
80 int, struct file **);
81__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, 81__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
82 struct file *file, loff_t offset, 82 struct file *file, loff_t offset,
83 unsigned long *count); 83 unsigned long *count,
84 u32 *eof);
84__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, 85__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
85 struct file *file, loff_t offset, 86 struct file *file, loff_t offset,
86 struct kvec *vec, int vlen, 87 struct kvec *vec, int vlen,
87 unsigned long *count); 88 unsigned long *count,
89 u32 *eof);
88__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, 90__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
89 loff_t, struct kvec *, int, unsigned long *); 91 loff_t, struct kvec *, int, unsigned long *,
92 u32 *eof);
90__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, 93__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
91 struct kvec *, int, unsigned long *, int); 94 struct kvec *, int, unsigned long *, int);
92__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, 95__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
@@ -115,9 +118,6 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
115__be32 nfsd_permission(struct svc_rqst *, struct svc_export *, 118__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
116 struct dentry *, int); 119 struct dentry *, int);
117 120
118struct raparms *nfsd_init_raparms(struct file *file);
119void nfsd_put_raparams(struct file *file, struct raparms *ra);
120
121static inline int fh_want_write(struct svc_fh *fh) 121static inline int fh_want_write(struct svc_fh *fh)
122{ 122{
123 int ret; 123 int ret;
@@ -152,23 +152,4 @@ static inline int nfsd_create_is_exclusive(int createmode)
152 || createmode == NFS4_CREATE_EXCLUSIVE4_1; 152 || createmode == NFS4_CREATE_EXCLUSIVE4_1;
153} 153}
154 154
155static inline bool nfsd_eof_on_read(long requested, long read,
156 loff_t offset, loff_t size)
157{
158 /* We assume a short read means eof: */
159 if (requested > read)
160 return true;
161 /*
162 * A non-short read might also reach end of file. The spec
163 * still requires us to set eof in that case.
164 *
165 * Further operations may have modified the file size since
166 * the read, so the following check is not atomic with the read.
167 * We've only seen that cause a problem for a client in the case
168 * where the read returned a count of 0 without setting eof.
169 * That case was fixed by the addition of the above check.
170 */
171 return (offset + read >= size);
172}
173
174#endif /* LINUX_NFSD_VFS_H */ 155#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 2cb29e961a76..99ff9f403ff1 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -151,7 +151,7 @@ struct nfsd3_readres {
151 __be32 status; 151 __be32 status;
152 struct svc_fh fh; 152 struct svc_fh fh;
153 unsigned long count; 153 unsigned long count;
154 int eof; 154 __u32 eof;
155}; 155};
156 156
157struct nfsd3_writeres { 157struct nfsd3_writeres {
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d64c870f998a..f4737d66ee98 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -273,15 +273,14 @@ struct nfsd4_open_downgrade {
273 273
274 274
275struct nfsd4_read { 275struct nfsd4_read {
276 stateid_t rd_stateid; /* request */ 276 stateid_t rd_stateid; /* request */
277 u64 rd_offset; /* request */ 277 u64 rd_offset; /* request */
278 u32 rd_length; /* request */ 278 u32 rd_length; /* request */
279 int rd_vlen; 279 int rd_vlen;
280 struct file *rd_filp; 280 struct nfsd_file *rd_nf;
281 bool rd_tmp_file;
282 281
283 struct svc_rqst *rd_rqstp; /* response */ 282 struct svc_rqst *rd_rqstp; /* response */
284 struct svc_fh * rd_fhp; /* response */ 283 struct svc_fh *rd_fhp; /* response */
285}; 284};
286 285
287struct nfsd4_readdir { 286struct nfsd4_readdir {
@@ -538,8 +537,8 @@ struct nfsd4_copy {
538 537
539 struct nfs4_client *cp_clp; 538 struct nfs4_client *cp_clp;
540 539
541 struct file *file_src; 540 struct nfsd_file *nf_src;
542 struct file *file_dst; 541 struct nfsd_file *nf_dst;
543 542
544 stateid_t cp_stateid; 543 stateid_t cp_stateid;
545 544
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 5a00121fb219..f3462828a0e2 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -54,8 +54,6 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
54{ 54{
55 fsnotify_destroy_marks(&sb->s_fsnotify_marks); 55 fsnotify_destroy_marks(&sb->s_fsnotify_marks);
56} 56}
57/* Wait until all marks queued for destruction are destroyed */
58extern void fsnotify_wait_marks_destroyed(void);
59 57
60/* 58/*
61 * update the dentry->d_flags of all of inode's children to indicate if inode cares 59 * update the dentry->d_flags of all of inode's children to indicate if inode cares
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 0391190305cc..133f723aca07 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -108,6 +108,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
108 if (refcount_dec_and_test(&group->refcnt)) 108 if (refcount_dec_and_test(&group->refcnt))
109 fsnotify_final_destroy_group(group); 109 fsnotify_final_destroy_group(group);
110} 110}
111EXPORT_SYMBOL_GPL(fsnotify_put_group);
111 112
112/* 113/*
113 * Create a new fsnotify_group and hold a reference for the group returned. 114 * Create a new fsnotify_group and hold a reference for the group returned.
@@ -137,6 +138,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
137 138
138 return group; 139 return group;
139} 140}
141EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
140 142
141int fsnotify_fasync(int fd, struct file *file, int on) 143int fsnotify_fasync(int fd, struct file *file, int on)
142{ 144{
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 99ddd126f6f0..1d96216dffd1 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -276,6 +276,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
276 queue_delayed_work(system_unbound_wq, &reaper_work, 276 queue_delayed_work(system_unbound_wq, &reaper_work,
277 FSNOTIFY_REAPER_DELAY); 277 FSNOTIFY_REAPER_DELAY);
278} 278}
279EXPORT_SYMBOL_GPL(fsnotify_put_mark);
279 280
280/* 281/*
281 * Get mark reference when we found the mark via lockless traversal of object 282 * Get mark reference when we found the mark via lockless traversal of object
@@ -430,6 +431,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
430 mutex_unlock(&group->mark_mutex); 431 mutex_unlock(&group->mark_mutex);
431 fsnotify_free_mark(mark); 432 fsnotify_free_mark(mark);
432} 433}
434EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
433 435
434/* 436/*
435 * Sorting function for lists of fsnotify marks. 437 * Sorting function for lists of fsnotify marks.
@@ -685,6 +687,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
685 mutex_unlock(&group->mark_mutex); 687 mutex_unlock(&group->mark_mutex);
686 return ret; 688 return ret;
687} 689}
690EXPORT_SYMBOL_GPL(fsnotify_add_mark);
688 691
689/* 692/*
690 * Given a list of marks, find the mark associated with given group. If found 693 * Given a list of marks, find the mark associated with given group. If found
@@ -711,6 +714,7 @@ struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
711 spin_unlock(&conn->lock); 714 spin_unlock(&conn->lock);
712 return NULL; 715 return NULL;
713} 716}
717EXPORT_SYMBOL_GPL(fsnotify_find_mark);
714 718
715/* Clear any marks in a group with given type mask */ 719/* Clear any marks in a group with given type mask */
716void fsnotify_clear_marks_by_group(struct fsnotify_group *group, 720void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
@@ -809,6 +813,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
809 mark->group = group; 813 mark->group = group;
810 WRITE_ONCE(mark->connector, NULL); 814 WRITE_ONCE(mark->connector, NULL);
811} 815}
816EXPORT_SYMBOL_GPL(fsnotify_init_mark);
812 817
813/* 818/*
814 * Destroy all marks in destroy_list, waits for SRCU period to finish before 819 * Destroy all marks in destroy_list, waits for SRCU period to finish before
@@ -837,3 +842,4 @@ void fsnotify_wait_marks_destroyed(void)
837{ 842{
838 flush_delayed_work(&reaper_work); 843 flush_delayed_work(&reaper_work);
839} 844}
845EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b0c6b0d34d02..e0d909d35763 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1168,6 +1168,11 @@ extern void lease_get_mtime(struct inode *, struct timespec64 *time);
1168extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); 1168extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
1169extern int vfs_setlease(struct file *, long, struct file_lock **, void **); 1169extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
1170extern int lease_modify(struct file_lock *, int, struct list_head *); 1170extern int lease_modify(struct file_lock *, int, struct list_head *);
1171
1172struct notifier_block;
1173extern int lease_register_notifier(struct notifier_block *);
1174extern void lease_unregister_notifier(struct notifier_block *);
1175
1171struct files_struct; 1176struct files_struct;
1172extern void show_fd_locks(struct seq_file *f, 1177extern void show_fd_locks(struct seq_file *f,
1173 struct file *filp, struct files_struct *files); 1178 struct file *filp, struct files_struct *files);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 2de3b2ddd19a..1915bdba2fad 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -475,6 +475,8 @@ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
475extern void fsnotify_detach_mark(struct fsnotify_mark *mark); 475extern void fsnotify_detach_mark(struct fsnotify_mark *mark);
476/* free mark */ 476/* free mark */
477extern void fsnotify_free_mark(struct fsnotify_mark *mark); 477extern void fsnotify_free_mark(struct fsnotify_mark *mark);
478/* Wait until all marks queued for destruction are destroyed */
479extern void fsnotify_wait_marks_destroyed(void);
478/* run all the marks in a group, and clear all of the marks attached to given object type */ 480/* run all the marks in a group, and clear all of the marks attached to given object type */
479extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type); 481extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type);
480/* run all the marks in a group, and clear all of the vfsmount marks */ 482/* run all the marks in a group, and clear all of the vfsmount marks */
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index c7f38e897174..f8603724fbee 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -87,6 +87,7 @@ struct cache_detail {
87 int has_died); 87 int has_died);
88 88
89 struct cache_head * (*alloc)(void); 89 struct cache_head * (*alloc)(void);
90 void (*flush)(void);
90 int (*match)(struct cache_head *orig, struct cache_head *new); 91 int (*match)(struct cache_head *orig, struct cache_head *new);
91 void (*init)(struct cache_head *orig, struct cache_head *new); 92 void (*init)(struct cache_head *orig, struct cache_head *new);
92 void (*update)(struct cache_head *orig, struct cache_head *new); 93 void (*update)(struct cache_head *orig, struct cache_head *new);
@@ -107,9 +108,9 @@ struct cache_detail {
107 /* fields for communication over channel */ 108 /* fields for communication over channel */
108 struct list_head queue; 109 struct list_head queue;
109 110
110 atomic_t readers; /* how many time is /chennel open */ 111 atomic_t writers; /* how many time is /channel open */
111 time_t last_close; /* if no readers, when did last close */ 112 time_t last_close; /* if no writers, when did last close */
112 time_t last_warn; /* when we last warned about no readers */ 113 time_t last_warn; /* when we last warned about no writers */
113 114
114 union { 115 union {
115 struct proc_dir_entry *procfs; 116 struct proc_dir_entry *procfs;
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 981f0d726ad4..40f65888dd38 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -42,6 +42,7 @@
42 42
43#ifndef SVC_RDMA_H 43#ifndef SVC_RDMA_H
44#define SVC_RDMA_H 44#define SVC_RDMA_H
45#include <linux/llist.h>
45#include <linux/sunrpc/xdr.h> 46#include <linux/sunrpc/xdr.h>
46#include <linux/sunrpc/svcsock.h> 47#include <linux/sunrpc/svcsock.h>
47#include <linux/sunrpc/rpc_rdma.h> 48#include <linux/sunrpc/rpc_rdma.h>
@@ -107,8 +108,7 @@ struct svcxprt_rdma {
107 struct list_head sc_read_complete_q; 108 struct list_head sc_read_complete_q;
108 struct work_struct sc_work; 109 struct work_struct sc_work;
109 110
110 spinlock_t sc_recv_lock; 111 struct llist_head sc_recv_ctxts;
111 struct list_head sc_recv_ctxts;
112}; 112};
113/* sc_flags */ 113/* sc_flags */
114#define RDMAXPRT_CONN_PENDING 3 114#define RDMAXPRT_CONN_PENDING 3
@@ -125,6 +125,7 @@ enum {
125#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD 125#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
126 126
127struct svc_rdma_recv_ctxt { 127struct svc_rdma_recv_ctxt {
128 struct llist_node rc_node;
128 struct list_head rc_list; 129 struct list_head rc_list;
129 struct ib_recv_wr rc_recv_wr; 130 struct ib_recv_wr rc_recv_wr;
130 struct ib_cqe rc_cqe; 131 struct ib_cqe rc_cqe;
@@ -200,7 +201,6 @@ extern struct svc_xprt_class svc_rdma_bc_class;
200#endif 201#endif
201 202
202/* svc_rdma.c */ 203/* svc_rdma.c */
203extern struct workqueue_struct *svc_rdma_wq;
204extern int svc_rdma_init(void); 204extern int svc_rdma_init(void);
205extern void svc_rdma_cleanup(void); 205extern void svc_rdma_cleanup(void);
206 206
diff --git a/include/uapi/linux/nfsd/cld.h b/include/uapi/linux/nfsd/cld.h
index b1e9de4f07d5..a519313af953 100644
--- a/include/uapi/linux/nfsd/cld.h
+++ b/include/uapi/linux/nfsd/cld.h
@@ -26,17 +26,22 @@
26#include <linux/types.h> 26#include <linux/types.h>
27 27
28/* latest upcall version available */ 28/* latest upcall version available */
29#define CLD_UPCALL_VERSION 1 29#define CLD_UPCALL_VERSION 2
30 30
31/* defined by RFC3530 */ 31/* defined by RFC3530 */
32#define NFS4_OPAQUE_LIMIT 1024 32#define NFS4_OPAQUE_LIMIT 1024
33 33
34#ifndef SHA256_DIGEST_SIZE
35#define SHA256_DIGEST_SIZE 32
36#endif
37
34enum cld_command { 38enum cld_command {
35 Cld_Create, /* create a record for this cm_id */ 39 Cld_Create, /* create a record for this cm_id */
36 Cld_Remove, /* remove record of this cm_id */ 40 Cld_Remove, /* remove record of this cm_id */
37 Cld_Check, /* is this cm_id allowed? */ 41 Cld_Check, /* is this cm_id allowed? */
38 Cld_GraceDone, /* grace period is complete */ 42 Cld_GraceDone, /* grace period is complete */
39 Cld_GraceStart, 43 Cld_GraceStart, /* grace start (upload client records) */
44 Cld_GetVersion, /* query max supported upcall version */
40}; 45};
41 46
42/* representation of long-form NFSv4 client ID */ 47/* representation of long-form NFSv4 client ID */
@@ -45,6 +50,17 @@ struct cld_name {
45 unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */ 50 unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */
46} __attribute__((packed)); 51} __attribute__((packed));
47 52
53/* sha256 hash of the kerberos principal */
54struct cld_princhash {
55 __u8 cp_len; /* length of cp_data */
56 unsigned char cp_data[SHA256_DIGEST_SIZE]; /* hash of principal */
57} __attribute__((packed));
58
59struct cld_clntinfo {
60 struct cld_name cc_name;
61 struct cld_princhash cc_princhash;
62} __attribute__((packed));
63
48/* message struct for communication with userspace */ 64/* message struct for communication with userspace */
49struct cld_msg { 65struct cld_msg {
50 __u8 cm_vers; /* upcall version */ 66 __u8 cm_vers; /* upcall version */
@@ -54,7 +70,28 @@ struct cld_msg {
54 union { 70 union {
55 __s64 cm_gracetime; /* grace period start time */ 71 __s64 cm_gracetime; /* grace period start time */
56 struct cld_name cm_name; 72 struct cld_name cm_name;
73 __u8 cm_version; /* for getting max version */
74 } __attribute__((packed)) cm_u;
75} __attribute__((packed));
76
77/* version 2 message can include hash of kerberos principal */
78struct cld_msg_v2 {
79 __u8 cm_vers; /* upcall version */
80 __u8 cm_cmd; /* upcall command */
81 __s16 cm_status; /* return code */
82 __u32 cm_xid; /* transaction id */
83 union {
84 struct cld_name cm_name;
85 __u8 cm_version; /* for getting max version */
86 struct cld_clntinfo cm_clntinfo; /* name & princ hash */
57 } __attribute__((packed)) cm_u; 87 } __attribute__((packed)) cm_u;
58} __attribute__((packed)); 88} __attribute__((packed));
59 89
90struct cld_msg_hdr {
91 __u8 cm_vers; /* upcall version */
92 __u8 cm_cmd; /* upcall command */
93 __s16 cm_status; /* return code */
94 __u32 cm_xid; /* transaction id */
95} __attribute__((packed));
96
60#endif /* !_NFSD_CLD_H */ 97#endif /* !_NFSD_CLD_H */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 6f1528f271ee..a349094f6fb7 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -373,7 +373,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
373 spin_lock(&cache_list_lock); 373 spin_lock(&cache_list_lock);
374 cd->nextcheck = 0; 374 cd->nextcheck = 0;
375 cd->entries = 0; 375 cd->entries = 0;
376 atomic_set(&cd->readers, 0); 376 atomic_set(&cd->writers, 0);
377 cd->last_close = 0; 377 cd->last_close = 0;
378 cd->last_warn = -1; 378 cd->last_warn = -1;
379 list_add(&cd->others, &cache_list); 379 list_add(&cd->others, &cache_list);
@@ -1029,11 +1029,13 @@ static int cache_open(struct inode *inode, struct file *filp,
1029 } 1029 }
1030 rp->offset = 0; 1030 rp->offset = 0;
1031 rp->q.reader = 1; 1031 rp->q.reader = 1;
1032 atomic_inc(&cd->readers); 1032
1033 spin_lock(&queue_lock); 1033 spin_lock(&queue_lock);
1034 list_add(&rp->q.list, &cd->queue); 1034 list_add(&rp->q.list, &cd->queue);
1035 spin_unlock(&queue_lock); 1035 spin_unlock(&queue_lock);
1036 } 1036 }
1037 if (filp->f_mode & FMODE_WRITE)
1038 atomic_inc(&cd->writers);
1037 filp->private_data = rp; 1039 filp->private_data = rp;
1038 return 0; 1040 return 0;
1039} 1041}
@@ -1062,8 +1064,10 @@ static int cache_release(struct inode *inode, struct file *filp,
1062 filp->private_data = NULL; 1064 filp->private_data = NULL;
1063 kfree(rp); 1065 kfree(rp);
1064 1066
1067 }
1068 if (filp->f_mode & FMODE_WRITE) {
1069 atomic_dec(&cd->writers);
1065 cd->last_close = seconds_since_boot(); 1070 cd->last_close = seconds_since_boot();
1066 atomic_dec(&cd->readers);
1067 } 1071 }
1068 module_put(cd->owner); 1072 module_put(cd->owner);
1069 return 0; 1073 return 0;
@@ -1171,7 +1175,7 @@ static void warn_no_listener(struct cache_detail *detail)
1171 1175
1172static bool cache_listeners_exist(struct cache_detail *detail) 1176static bool cache_listeners_exist(struct cache_detail *detail)
1173{ 1177{
1174 if (atomic_read(&detail->readers)) 1178 if (atomic_read(&detail->writers))
1175 return true; 1179 return true;
1176 if (detail->last_close == 0) 1180 if (detail->last_close == 0)
1177 /* This cache was never opened */ 1181 /* This cache was never opened */
@@ -1520,6 +1524,9 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
1520 cd->nextcheck = now; 1524 cd->nextcheck = now;
1521 cache_flush(); 1525 cache_flush();
1522 1526
1527 if (cd->flush)
1528 cd->flush();
1529
1523 *ppos += count; 1530 *ppos += count;
1524 return count; 1531 return count;
1525} 1532}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 220b79988000..d11b70552c33 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1233,8 +1233,8 @@ svc_generic_init_request(struct svc_rqst *rqstp,
1233 1233
1234 if (rqstp->rq_vers >= progp->pg_nvers ) 1234 if (rqstp->rq_vers >= progp->pg_nvers )
1235 goto err_bad_vers; 1235 goto err_bad_vers;
1236 versp = progp->pg_vers[rqstp->rq_vers]; 1236 versp = progp->pg_vers[rqstp->rq_vers];
1237 if (!versp) 1237 if (!versp)
1238 goto err_bad_vers; 1238 goto err_bad_vers;
1239 1239
1240 /* 1240 /*
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index abdb3004a1e3..97bca509a391 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -73,8 +73,6 @@ atomic_t rdma_stat_rq_prod;
73atomic_t rdma_stat_sq_poll; 73atomic_t rdma_stat_sq_poll;
74atomic_t rdma_stat_sq_prod; 74atomic_t rdma_stat_sq_prod;
75 75
76struct workqueue_struct *svc_rdma_wq;
77
78/* 76/*
79 * This function implements reading and resetting an atomic_t stat 77 * This function implements reading and resetting an atomic_t stat
80 * variable through read/write to a proc file. Any write to the file 78 * variable through read/write to a proc file. Any write to the file
@@ -230,7 +228,6 @@ static struct ctl_table svcrdma_root_table[] = {
230void svc_rdma_cleanup(void) 228void svc_rdma_cleanup(void)
231{ 229{
232 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); 230 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
233 destroy_workqueue(svc_rdma_wq);
234 if (svcrdma_table_header) { 231 if (svcrdma_table_header) {
235 unregister_sysctl_table(svcrdma_table_header); 232 unregister_sysctl_table(svcrdma_table_header);
236 svcrdma_table_header = NULL; 233 svcrdma_table_header = NULL;
@@ -246,10 +243,6 @@ int svc_rdma_init(void)
246 dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests); 243 dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
247 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); 244 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
248 245
249 svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
250 if (!svc_rdma_wq)
251 return -ENOMEM;
252
253 if (!svcrdma_table_header) 246 if (!svcrdma_table_header)
254 svcrdma_table_header = 247 svcrdma_table_header =
255 register_sysctl_table(svcrdma_root_table); 248 register_sysctl_table(svcrdma_root_table);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 65e2fb9aac65..96bccd398469 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -172,9 +172,10 @@ static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
172void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma) 172void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
173{ 173{
174 struct svc_rdma_recv_ctxt *ctxt; 174 struct svc_rdma_recv_ctxt *ctxt;
175 struct llist_node *node;
175 176
176 while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) { 177 while ((node = llist_del_first(&rdma->sc_recv_ctxts))) {
177 list_del(&ctxt->rc_list); 178 ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
178 svc_rdma_recv_ctxt_destroy(rdma, ctxt); 179 svc_rdma_recv_ctxt_destroy(rdma, ctxt);
179 } 180 }
180} 181}
@@ -183,21 +184,18 @@ static struct svc_rdma_recv_ctxt *
183svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma) 184svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
184{ 185{
185 struct svc_rdma_recv_ctxt *ctxt; 186 struct svc_rdma_recv_ctxt *ctxt;
187 struct llist_node *node;
186 188
187 spin_lock(&rdma->sc_recv_lock); 189 node = llist_del_first(&rdma->sc_recv_ctxts);
188 ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts); 190 if (!node)
189 if (!ctxt)
190 goto out_empty; 191 goto out_empty;
191 list_del(&ctxt->rc_list); 192 ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
192 spin_unlock(&rdma->sc_recv_lock);
193 193
194out: 194out:
195 ctxt->rc_page_count = 0; 195 ctxt->rc_page_count = 0;
196 return ctxt; 196 return ctxt;
197 197
198out_empty: 198out_empty:
199 spin_unlock(&rdma->sc_recv_lock);
200
201 ctxt = svc_rdma_recv_ctxt_alloc(rdma); 199 ctxt = svc_rdma_recv_ctxt_alloc(rdma);
202 if (!ctxt) 200 if (!ctxt)
203 return NULL; 201 return NULL;
@@ -218,11 +216,9 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
218 for (i = 0; i < ctxt->rc_page_count; i++) 216 for (i = 0; i < ctxt->rc_page_count; i++)
219 put_page(ctxt->rc_pages[i]); 217 put_page(ctxt->rc_pages[i]);
220 218
221 if (!ctxt->rc_temp) { 219 if (!ctxt->rc_temp)
222 spin_lock(&rdma->sc_recv_lock); 220 llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
223 list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts); 221 else
224 spin_unlock(&rdma->sc_recv_lock);
225 } else
226 svc_rdma_recv_ctxt_destroy(rdma, ctxt); 222 svc_rdma_recv_ctxt_destroy(rdma, ctxt);
227} 223}
228 224
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4d3db6ee7f09..145a3615c319 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -140,14 +140,13 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
140 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); 140 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
141 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); 141 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
142 INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts); 142 INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
143 INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts); 143 init_llist_head(&cma_xprt->sc_recv_ctxts);
144 INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts); 144 INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
145 init_waitqueue_head(&cma_xprt->sc_send_wait); 145 init_waitqueue_head(&cma_xprt->sc_send_wait);
146 146
147 spin_lock_init(&cma_xprt->sc_lock); 147 spin_lock_init(&cma_xprt->sc_lock);
148 spin_lock_init(&cma_xprt->sc_rq_dto_lock); 148 spin_lock_init(&cma_xprt->sc_rq_dto_lock);
149 spin_lock_init(&cma_xprt->sc_send_lock); 149 spin_lock_init(&cma_xprt->sc_send_lock);
150 spin_lock_init(&cma_xprt->sc_recv_lock);
151 spin_lock_init(&cma_xprt->sc_rw_ctxt_lock); 150 spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
152 151
153 /* 152 /*
@@ -630,8 +629,9 @@ static void svc_rdma_free(struct svc_xprt *xprt)
630{ 629{
631 struct svcxprt_rdma *rdma = 630 struct svcxprt_rdma *rdma =
632 container_of(xprt, struct svcxprt_rdma, sc_xprt); 631 container_of(xprt, struct svcxprt_rdma, sc_xprt);
632
633 INIT_WORK(&rdma->sc_work, __svc_rdma_free); 633 INIT_WORK(&rdma->sc_work, __svc_rdma_free);
634 queue_work(svc_rdma_wq, &rdma->sc_work); 634 schedule_work(&rdma->sc_work);
635} 635}
636 636
637static int svc_rdma_has_wspace(struct svc_xprt *xprt) 637static int svc_rdma_has_wspace(struct svc_xprt *xprt)