summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Layton <jeff.layton@primarydata.com>2019-08-18 14:18:48 -0400
committerJ. Bruce Fields <bfields@redhat.com>2019-08-19 11:00:39 -0400
commit65294c1f2c5e72b15b76e16c8c8cfd9359fc9f6f (patch)
tree9b088b0f2e2fba280862b489910b1aeb0460c8c3
parent7239a40ca8bfd88dc5d2f66a14882054fe8e3b92 (diff)
nfsd: add a new struct file caching facility to nfsd
Currently, NFSv2/3 reads and writes have to open a file, do the read or write and then close it again for each RPC. This is highly inefficient, especially when the underlying filesystem has a relatively slow open routine. This patch adds a new open file cache to knfsd. Rather than doing an open for each RPC, the read/write handlers can call into this cache to see if there is one already there for the correct filehandle and NFS_MAY_READ/WRITE flags. If there isn't an entry, then we create a new one and attempt to perform the open. If there is, then we wait until the entry is fully instantiated and return it if it is at the end of the wait. If it's not, then we attempt to take over construction. Since the main goal is to speed up NFSv2/3 I/O, we don't want to close these files on last put of these objects. We need to keep them around for a little while since we never know when the next READ/WRITE will come in. Cache entries have a hardcoded 1s timeout, and we have a recurring workqueue job that walks the cache and purges any entries that have expired. Signed-off-by: Jeff Layton <jeff.layton@primarydata.com> Signed-off-by: Weston Andros Adamson <dros@primarydata.com> Signed-off-by: Richard Sharpe <richard.sharpe@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/nfsd/Makefile3
-rw-r--r--fs/nfsd/export.c13
-rw-r--r--fs/nfsd/filecache.c885
-rw-r--r--fs/nfsd/filecache.h60
-rw-r--r--fs/nfsd/nfssvc.c9
-rw-r--r--fs/nfsd/trace.h140
-rw-r--r--fs/nfsd/vfs.c65
-rw-r--r--fs/nfsd/vfs.h3
9 files changed, 1155 insertions, 24 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index d25f6bbe7006..bff8456220e0 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -3,6 +3,7 @@ config NFSD
3 tristate "NFS server support" 3 tristate "NFS server support"
4 depends on INET 4 depends on INET
5 depends on FILE_LOCKING 5 depends on FILE_LOCKING
6 depends on FSNOTIFY
6 select LOCKD 7 select LOCKD
7 select SUNRPC 8 select SUNRPC
8 select EXPORTFS 9 select EXPORTFS
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 2bfb58eefad1..6a40b1afe703 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -11,7 +11,8 @@ obj-$(CONFIG_NFSD) += nfsd.o
11nfsd-y += trace.o 11nfsd-y += trace.o
12 12
13nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ 13nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
14 export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o 14 export.o auth.o lockd.o nfscache.o nfsxdr.o \
15 stats.o filecache.o
15nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o 16nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
16nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o 17nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
17nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o 18nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index baa01956a5b3..052fac64b578 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -22,6 +22,7 @@
22#include "nfsfh.h" 22#include "nfsfh.h"
23#include "netns.h" 23#include "netns.h"
24#include "pnfs.h" 24#include "pnfs.h"
25#include "filecache.h"
25 26
26#define NFSDDBG_FACILITY NFSDDBG_EXPORT 27#define NFSDDBG_FACILITY NFSDDBG_EXPORT
27 28
@@ -232,6 +233,17 @@ static struct cache_head *expkey_alloc(void)
232 return NULL; 233 return NULL;
233} 234}
234 235
236static void expkey_flush(void)
237{
238 /*
239 * Take the nfsd_mutex here to ensure that the file cache is not
240 * destroyed while we're in the middle of flushing.
241 */
242 mutex_lock(&nfsd_mutex);
243 nfsd_file_cache_purge();
244 mutex_unlock(&nfsd_mutex);
245}
246
235static const struct cache_detail svc_expkey_cache_template = { 247static const struct cache_detail svc_expkey_cache_template = {
236 .owner = THIS_MODULE, 248 .owner = THIS_MODULE,
237 .hash_size = EXPKEY_HASHMAX, 249 .hash_size = EXPKEY_HASHMAX,
@@ -244,6 +256,7 @@ static const struct cache_detail svc_expkey_cache_template = {
244 .init = expkey_init, 256 .init = expkey_init,
245 .update = expkey_update, 257 .update = expkey_update,
246 .alloc = expkey_alloc, 258 .alloc = expkey_alloc,
259 .flush = expkey_flush,
247}; 260};
248 261
249static int 262static int
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
new file mode 100644
index 000000000000..a2fcb251d2f6
--- /dev/null
+++ b/fs/nfsd/filecache.c
@@ -0,0 +1,885 @@
1/*
2 * Open file cache.
3 *
4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
5 */
6
7#include <linux/hash.h>
8#include <linux/slab.h>
9#include <linux/hash.h>
10#include <linux/file.h>
11#include <linux/sched.h>
12#include <linux/list_lru.h>
13#include <linux/fsnotify_backend.h>
14#include <linux/fsnotify.h>
15#include <linux/seq_file.h>
16
17#include "vfs.h"
18#include "nfsd.h"
19#include "nfsfh.h"
20#include "filecache.h"
21#include "trace.h"
22
23#define NFSDDBG_FACILITY NFSDDBG_FH
24
25/* FIXME: dynamically size this for the machine somehow? */
26#define NFSD_FILE_HASH_BITS 12
27#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS)
28#define NFSD_LAUNDRETTE_DELAY (2 * HZ)
29
30#define NFSD_FILE_LRU_RESCAN (0)
31#define NFSD_FILE_SHUTDOWN (1)
32#define NFSD_FILE_LRU_THRESHOLD (4096UL)
33#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2)
34
35/* We only care about NFSD_MAY_READ/WRITE for this cache */
36#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE)
37
38struct nfsd_fcache_bucket {
39 struct hlist_head nfb_head;
40 spinlock_t nfb_lock;
41 unsigned int nfb_count;
42 unsigned int nfb_maxcount;
43};
44
45static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
46
47static struct kmem_cache *nfsd_file_slab;
48static struct kmem_cache *nfsd_file_mark_slab;
49static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
50static struct list_lru nfsd_file_lru;
51static long nfsd_file_lru_flags;
52static struct fsnotify_group *nfsd_file_fsnotify_group;
53static atomic_long_t nfsd_filecache_count;
54static struct delayed_work nfsd_filecache_laundrette;
55
56enum nfsd_file_laundrette_ctl {
57 NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
58 NFSD_FILE_LAUNDRETTE_MAY_FLUSH
59};
60
61static void
62nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
63{
64 long count = atomic_long_read(&nfsd_filecache_count);
65
66 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
67 return;
68
69 /* Be more aggressive about scanning if over the threshold */
70 if (count > NFSD_FILE_LRU_THRESHOLD)
71 mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
72 else
73 schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
74
75 if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
76 return;
77
78 /* ...and don't delay flushing if we're out of control */
79 if (count >= NFSD_FILE_LRU_LIMIT)
80 flush_delayed_work(&nfsd_filecache_laundrette);
81}
82
83static void
84nfsd_file_slab_free(struct rcu_head *rcu)
85{
86 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
87
88 put_cred(nf->nf_cred);
89 kmem_cache_free(nfsd_file_slab, nf);
90}
91
92static void
93nfsd_file_mark_free(struct fsnotify_mark *mark)
94{
95 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
96 nfm_mark);
97
98 kmem_cache_free(nfsd_file_mark_slab, nfm);
99}
100
101static struct nfsd_file_mark *
102nfsd_file_mark_get(struct nfsd_file_mark *nfm)
103{
104 if (!atomic_inc_not_zero(&nfm->nfm_ref))
105 return NULL;
106 return nfm;
107}
108
109static void
110nfsd_file_mark_put(struct nfsd_file_mark *nfm)
111{
112 if (atomic_dec_and_test(&nfm->nfm_ref)) {
113
114 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
115 fsnotify_put_mark(&nfm->nfm_mark);
116 }
117}
118
119static struct nfsd_file_mark *
120nfsd_file_mark_find_or_create(struct nfsd_file *nf)
121{
122 int err;
123 struct fsnotify_mark *mark;
124 struct nfsd_file_mark *nfm = NULL, *new;
125 struct inode *inode = nf->nf_inode;
126
127 do {
128 mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
129 mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
130 nfsd_file_fsnotify_group);
131 if (mark) {
132 nfm = nfsd_file_mark_get(container_of(mark,
133 struct nfsd_file_mark,
134 nfm_mark));
135 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
136 fsnotify_put_mark(mark);
137 if (likely(nfm))
138 break;
139 } else
140 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
141
142 /* allocate a new nfm */
143 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
144 if (!new)
145 return NULL;
146 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
147 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
148 atomic_set(&new->nfm_ref, 1);
149
150 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
151
152 /*
153 * If the add was successful, then return the object.
154 * Otherwise, we need to put the reference we hold on the
155 * nfm_mark. The fsnotify code will take a reference and put
156 * it on failure, so we can't just free it directly. It's also
157 * not safe to call fsnotify_destroy_mark on it as the
158 * mark->group will be NULL. Thus, we can't let the nfm_ref
159 * counter drive the destruction at this point.
160 */
161 if (likely(!err))
162 nfm = new;
163 else
164 fsnotify_put_mark(&new->nfm_mark);
165 } while (unlikely(err == -EEXIST));
166
167 return nfm;
168}
169
170static struct nfsd_file *
171nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval)
172{
173 struct nfsd_file *nf;
174
175 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
176 if (nf) {
177 INIT_HLIST_NODE(&nf->nf_node);
178 INIT_LIST_HEAD(&nf->nf_lru);
179 nf->nf_file = NULL;
180 nf->nf_cred = get_current_cred();
181 nf->nf_flags = 0;
182 nf->nf_inode = inode;
183 nf->nf_hashval = hashval;
184 atomic_set(&nf->nf_ref, 1);
185 nf->nf_may = may & NFSD_FILE_MAY_MASK;
186 if (may & NFSD_MAY_NOT_BREAK_LEASE) {
187 if (may & NFSD_MAY_WRITE)
188 __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
189 if (may & NFSD_MAY_READ)
190 __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
191 }
192 nf->nf_mark = NULL;
193 trace_nfsd_file_alloc(nf);
194 }
195 return nf;
196}
197
198static bool
199nfsd_file_free(struct nfsd_file *nf)
200{
201 bool flush = false;
202
203 trace_nfsd_file_put_final(nf);
204 if (nf->nf_mark)
205 nfsd_file_mark_put(nf->nf_mark);
206 if (nf->nf_file) {
207 get_file(nf->nf_file);
208 filp_close(nf->nf_file, NULL);
209 fput(nf->nf_file);
210 flush = true;
211 }
212 call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
213 return flush;
214}
215
216static void
217nfsd_file_do_unhash(struct nfsd_file *nf)
218{
219 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
220
221 trace_nfsd_file_unhash(nf);
222
223 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
224 hlist_del_rcu(&nf->nf_node);
225 if (!list_empty(&nf->nf_lru))
226 list_lru_del(&nfsd_file_lru, &nf->nf_lru);
227 atomic_long_dec(&nfsd_filecache_count);
228}
229
230static bool
231nfsd_file_unhash(struct nfsd_file *nf)
232{
233 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
234 nfsd_file_do_unhash(nf);
235 return true;
236 }
237 return false;
238}
239
240/*
241 * Return true if the file was unhashed.
242 */
243static bool
244nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
245{
246 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
247
248 trace_nfsd_file_unhash_and_release_locked(nf);
249 if (!nfsd_file_unhash(nf))
250 return false;
251 /* keep final reference for nfsd_file_lru_dispose */
252 if (atomic_add_unless(&nf->nf_ref, -1, 1))
253 return true;
254
255 list_add(&nf->nf_lru, dispose);
256 return true;
257}
258
259static int
260nfsd_file_put_noref(struct nfsd_file *nf)
261{
262 int count;
263 trace_nfsd_file_put(nf);
264
265 count = atomic_dec_return(&nf->nf_ref);
266 if (!count) {
267 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
268 nfsd_file_free(nf);
269 }
270 return count;
271}
272
273void
274nfsd_file_put(struct nfsd_file *nf)
275{
276 bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
277
278 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
279 if (nfsd_file_put_noref(nf) == 1 && is_hashed)
280 nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
281}
282
283struct nfsd_file *
284nfsd_file_get(struct nfsd_file *nf)
285{
286 if (likely(atomic_inc_not_zero(&nf->nf_ref)))
287 return nf;
288 return NULL;
289}
290
291static void
292nfsd_file_dispose_list(struct list_head *dispose)
293{
294 struct nfsd_file *nf;
295
296 while(!list_empty(dispose)) {
297 nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
298 list_del(&nf->nf_lru);
299 nfsd_file_put_noref(nf);
300 }
301}
302
303static void
304nfsd_file_dispose_list_sync(struct list_head *dispose)
305{
306 bool flush = false;
307 struct nfsd_file *nf;
308
309 while(!list_empty(dispose)) {
310 nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
311 list_del(&nf->nf_lru);
312 if (!atomic_dec_and_test(&nf->nf_ref))
313 continue;
314 if (nfsd_file_free(nf))
315 flush = true;
316 }
317 if (flush)
318 flush_delayed_fput();
319}
320
321/*
322 * Note this can deadlock with nfsd_file_cache_purge.
323 */
324static enum lru_status
325nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
326 spinlock_t *lock, void *arg)
327 __releases(lock)
328 __acquires(lock)
329{
330 struct list_head *head = arg;
331 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
332
333 /*
334 * Do a lockless refcount check. The hashtable holds one reference, so
335 * we look to see if anything else has a reference, or if any have
336 * been put since the shrinker last ran. Those don't get unhashed and
337 * released.
338 *
339 * Note that in the put path, we set the flag and then decrement the
340 * counter. Here we check the counter and then test and clear the flag.
341 * That order is deliberate to ensure that we can do this locklessly.
342 */
343 if (atomic_read(&nf->nf_ref) > 1)
344 goto out_skip;
345 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
346 goto out_rescan;
347
348 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
349 goto out_skip;
350
351 list_lru_isolate_move(lru, &nf->nf_lru, head);
352 return LRU_REMOVED;
353out_rescan:
354 set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags);
355out_skip:
356 return LRU_SKIP;
357}
358
359static void
360nfsd_file_lru_dispose(struct list_head *head)
361{
362 while(!list_empty(head)) {
363 struct nfsd_file *nf = list_first_entry(head,
364 struct nfsd_file, nf_lru);
365 list_del_init(&nf->nf_lru);
366 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
367 nfsd_file_do_unhash(nf);
368 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
369 nfsd_file_put_noref(nf);
370 }
371}
372
373static unsigned long
374nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
375{
376 return list_lru_count(&nfsd_file_lru);
377}
378
379static unsigned long
380nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
381{
382 LIST_HEAD(head);
383 unsigned long ret;
384
385 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
386 nfsd_file_lru_dispose(&head);
387 return ret;
388}
389
390static struct shrinker nfsd_file_shrinker = {
391 .scan_objects = nfsd_file_lru_scan,
392 .count_objects = nfsd_file_lru_count,
393 .seeks = 1,
394};
395
396static void
397__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
398 struct list_head *dispose)
399{
400 struct nfsd_file *nf;
401 struct hlist_node *tmp;
402
403 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
404 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
405 if (inode == nf->nf_inode)
406 nfsd_file_unhash_and_release_locked(nf, dispose);
407 }
408 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
409}
410
411/**
412 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
413 * @inode: inode of the file to attempt to remove
414 *
415 * Walk the whole hash bucket, looking for any files that correspond to "inode".
416 * If any do, then unhash them and put the hashtable reference to them and
417 * destroy any that had their last reference put. Also ensure that any of the
418 * fputs also have their final __fput done as well.
419 */
420void
421nfsd_file_close_inode_sync(struct inode *inode)
422{
423 unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
424 NFSD_FILE_HASH_BITS);
425 LIST_HEAD(dispose);
426
427 __nfsd_file_close_inode(inode, hashval, &dispose);
428 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
429 nfsd_file_dispose_list_sync(&dispose);
430}
431
432/**
433 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
434 * @inode: inode of the file to attempt to remove
435 *
436 * Walk the whole hash bucket, looking for any files that correspond to "inode".
437 * If any do, then unhash them and put the hashtable reference to them and
438 * destroy any that had their last reference put.
439 */
440static void
441nfsd_file_close_inode(struct inode *inode)
442{
443 unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
444 NFSD_FILE_HASH_BITS);
445 LIST_HEAD(dispose);
446
447 __nfsd_file_close_inode(inode, hashval, &dispose);
448 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
449 nfsd_file_dispose_list(&dispose);
450}
451
452/**
453 * nfsd_file_delayed_close - close unused nfsd_files
454 * @work: dummy
455 *
456 * Walk the LRU list and close any entries that have not been used since
457 * the last scan.
458 *
459 * Note this can deadlock with nfsd_file_cache_purge.
460 */
461static void
462nfsd_file_delayed_close(struct work_struct *work)
463{
464 LIST_HEAD(head);
465
466 list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
467
468 if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
469 nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
470
471 if (!list_empty(&head)) {
472 nfsd_file_lru_dispose(&head);
473 flush_delayed_fput();
474 }
475}
476
477static int
478nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
479 void *data)
480{
481 struct file_lock *fl = data;
482
483 /* Only close files for F_SETLEASE leases */
484 if (fl->fl_flags & FL_LEASE)
485 nfsd_file_close_inode_sync(file_inode(fl->fl_file));
486 return 0;
487}
488
489static struct notifier_block nfsd_file_lease_notifier = {
490 .notifier_call = nfsd_file_lease_notifier_call,
491};
492
493static int
494nfsd_file_fsnotify_handle_event(struct fsnotify_group *group,
495 struct inode *inode,
496 u32 mask, const void *data, int data_type,
497 const struct qstr *file_name, u32 cookie,
498 struct fsnotify_iter_info *iter_info)
499{
500 trace_nfsd_file_fsnotify_handle_event(inode, mask);
501
502 /* Should be no marks on non-regular files */
503 if (!S_ISREG(inode->i_mode)) {
504 WARN_ON_ONCE(1);
505 return 0;
506 }
507
508 /* don't close files if this was not the last link */
509 if (mask & FS_ATTRIB) {
510 if (inode->i_nlink)
511 return 0;
512 }
513
514 nfsd_file_close_inode(inode);
515 return 0;
516}
517
518
519static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
520 .handle_event = nfsd_file_fsnotify_handle_event,
521 .free_mark = nfsd_file_mark_free,
522};
523
524int
525nfsd_file_cache_init(void)
526{
527 int ret = -ENOMEM;
528 unsigned int i;
529
530 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
531
532 if (nfsd_file_hashtbl)
533 return 0;
534
535 nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
536 sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
537 if (!nfsd_file_hashtbl) {
538 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
539 goto out_err;
540 }
541
542 nfsd_file_slab = kmem_cache_create("nfsd_file",
543 sizeof(struct nfsd_file), 0, 0, NULL);
544 if (!nfsd_file_slab) {
545 pr_err("nfsd: unable to create nfsd_file_slab\n");
546 goto out_err;
547 }
548
549 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
550 sizeof(struct nfsd_file_mark), 0, 0, NULL);
551 if (!nfsd_file_mark_slab) {
552 pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
553 goto out_err;
554 }
555
556
557 ret = list_lru_init(&nfsd_file_lru);
558 if (ret) {
559 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
560 goto out_err;
561 }
562
563 ret = register_shrinker(&nfsd_file_shrinker);
564 if (ret) {
565 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
566 goto out_lru;
567 }
568
569 ret = lease_register_notifier(&nfsd_file_lease_notifier);
570 if (ret) {
571 pr_err("nfsd: unable to register lease notifier: %d\n", ret);
572 goto out_shrinker;
573 }
574
575 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
576 if (IS_ERR(nfsd_file_fsnotify_group)) {
577 pr_err("nfsd: unable to create fsnotify group: %ld\n",
578 PTR_ERR(nfsd_file_fsnotify_group));
579 nfsd_file_fsnotify_group = NULL;
580 goto out_notifier;
581 }
582
583 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
584 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
585 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
586 }
587
588 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
589out:
590 return ret;
591out_notifier:
592 lease_unregister_notifier(&nfsd_file_lease_notifier);
593out_shrinker:
594 unregister_shrinker(&nfsd_file_shrinker);
595out_lru:
596 list_lru_destroy(&nfsd_file_lru);
597out_err:
598 kmem_cache_destroy(nfsd_file_slab);
599 nfsd_file_slab = NULL;
600 kmem_cache_destroy(nfsd_file_mark_slab);
601 nfsd_file_mark_slab = NULL;
602 kfree(nfsd_file_hashtbl);
603 nfsd_file_hashtbl = NULL;
604 goto out;
605}
606
607/*
608 * Note this can deadlock with nfsd_file_lru_cb.
609 */
610void
611nfsd_file_cache_purge(void)
612{
613 unsigned int i;
614 struct nfsd_file *nf;
615 LIST_HEAD(dispose);
616 bool del;
617
618 if (!nfsd_file_hashtbl)
619 return;
620
621 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
622 spin_lock(&nfsd_file_hashtbl[i].nfb_lock);
623 while(!hlist_empty(&nfsd_file_hashtbl[i].nfb_head)) {
624 nf = hlist_entry(nfsd_file_hashtbl[i].nfb_head.first,
625 struct nfsd_file, nf_node);
626 del = nfsd_file_unhash_and_release_locked(nf, &dispose);
627
628 /*
629 * Deadlock detected! Something marked this entry as
630 * unhased, but hasn't removed it from the hash list.
631 */
632 WARN_ON_ONCE(!del);
633 }
634 spin_unlock(&nfsd_file_hashtbl[i].nfb_lock);
635 nfsd_file_dispose_list(&dispose);
636 }
637}
638
639void
640nfsd_file_cache_shutdown(void)
641{
642 LIST_HEAD(dispose);
643
644 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
645
646 lease_unregister_notifier(&nfsd_file_lease_notifier);
647 unregister_shrinker(&nfsd_file_shrinker);
648 /*
649 * make sure all callers of nfsd_file_lru_cb are done before
650 * calling nfsd_file_cache_purge
651 */
652 cancel_delayed_work_sync(&nfsd_filecache_laundrette);
653 nfsd_file_cache_purge();
654 list_lru_destroy(&nfsd_file_lru);
655 rcu_barrier();
656 fsnotify_put_group(nfsd_file_fsnotify_group);
657 nfsd_file_fsnotify_group = NULL;
658 kmem_cache_destroy(nfsd_file_slab);
659 nfsd_file_slab = NULL;
660 fsnotify_wait_marks_destroyed();
661 kmem_cache_destroy(nfsd_file_mark_slab);
662 nfsd_file_mark_slab = NULL;
663 kfree(nfsd_file_hashtbl);
664 nfsd_file_hashtbl = NULL;
665}
666
667static bool
668nfsd_match_cred(const struct cred *c1, const struct cred *c2)
669{
670 int i;
671
672 if (!uid_eq(c1->fsuid, c2->fsuid))
673 return false;
674 if (!gid_eq(c1->fsgid, c2->fsgid))
675 return false;
676 if (c1->group_info == NULL || c2->group_info == NULL)
677 return c1->group_info == c2->group_info;
678 if (c1->group_info->ngroups != c2->group_info->ngroups)
679 return false;
680 for (i = 0; i < c1->group_info->ngroups; i++) {
681 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
682 return false;
683 }
684 return true;
685}
686
687static struct nfsd_file *
688nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
689 unsigned int hashval)
690{
691 struct nfsd_file *nf;
692 unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
693
694 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
695 nf_node) {
696 if ((need & nf->nf_may) != need)
697 continue;
698 if (nf->nf_inode != inode)
699 continue;
700 if (!nfsd_match_cred(nf->nf_cred, current_cred()))
701 continue;
702 if (nfsd_file_get(nf) != NULL)
703 return nf;
704 }
705 return NULL;
706}
707
708/**
709 * nfsd_file_is_cached - are there any cached open files for this fh?
710 * @inode: inode of the file to check
711 *
712 * Scan the hashtable for open files that match this fh. Returns true if there
713 * are any, and false if not.
714 */
715bool
716nfsd_file_is_cached(struct inode *inode)
717{
718 bool ret = false;
719 struct nfsd_file *nf;
720 unsigned int hashval;
721
722 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
723
724 rcu_read_lock();
725 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
726 nf_node) {
727 if (inode == nf->nf_inode) {
728 ret = true;
729 break;
730 }
731 }
732 rcu_read_unlock();
733 trace_nfsd_file_is_cached(inode, hashval, (int)ret);
734 return ret;
735}
736
737__be32
738nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
739 unsigned int may_flags, struct nfsd_file **pnf)
740{
741 __be32 status;
742 struct nfsd_file *nf, *new;
743 struct inode *inode;
744 unsigned int hashval;
745
746 /* FIXME: skip this if fh_dentry is already set? */
747 status = fh_verify(rqstp, fhp, S_IFREG,
748 may_flags|NFSD_MAY_OWNER_OVERRIDE);
749 if (status != nfs_ok)
750 return status;
751
752 inode = d_inode(fhp->fh_dentry);
753 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
754retry:
755 rcu_read_lock();
756 nf = nfsd_file_find_locked(inode, may_flags, hashval);
757 rcu_read_unlock();
758 if (nf)
759 goto wait_for_construction;
760
761 new = nfsd_file_alloc(inode, may_flags, hashval);
762 if (!new) {
763 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
764 NULL, nfserr_jukebox);
765 return nfserr_jukebox;
766 }
767
768 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
769 nf = nfsd_file_find_locked(inode, may_flags, hashval);
770 if (nf == NULL)
771 goto open_file;
772 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
773 nfsd_file_slab_free(&new->nf_rcu);
774
775wait_for_construction:
776 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
777
778 /* Did construction of this file fail? */
779 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
780 nfsd_file_put_noref(nf);
781 goto retry;
782 }
783
784 this_cpu_inc(nfsd_file_cache_hits);
785
786 if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
787 bool write = (may_flags & NFSD_MAY_WRITE);
788
789 if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
790 (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
791 status = nfserrno(nfsd_open_break_lease(
792 file_inode(nf->nf_file), may_flags));
793 if (status == nfs_ok) {
794 clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
795 if (write)
796 clear_bit(NFSD_FILE_BREAK_WRITE,
797 &nf->nf_flags);
798 }
799 }
800 }
801out:
802 if (status == nfs_ok) {
803 *pnf = nf;
804 } else {
805 nfsd_file_put(nf);
806 nf = NULL;
807 }
808
809 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
810 return status;
811open_file:
812 nf = new;
813 /* Take reference for the hashtable */
814 atomic_inc(&nf->nf_ref);
815 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
816 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
817 list_lru_add(&nfsd_file_lru, &nf->nf_lru);
818 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
819 ++nfsd_file_hashtbl[hashval].nfb_count;
820 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
821 nfsd_file_hashtbl[hashval].nfb_count);
822 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
823 atomic_long_inc(&nfsd_filecache_count);
824
825 nf->nf_mark = nfsd_file_mark_find_or_create(nf);
826 if (nf->nf_mark)
827 status = nfsd_open_verified(rqstp, fhp, S_IFREG,
828 may_flags, &nf->nf_file);
829 else
830 status = nfserr_jukebox;
831 /*
832 * If construction failed, or we raced with a call to unlink()
833 * then unhash.
834 */
835 if (status != nfs_ok || inode->i_nlink == 0) {
836 bool do_free;
837 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
838 do_free = nfsd_file_unhash(nf);
839 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
840 if (do_free)
841 nfsd_file_put_noref(nf);
842 }
843 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
844 smp_mb__after_atomic();
845 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
846 goto out;
847}
848
849/*
850 * Note that fields may be added, removed or reordered in the future. Programs
851 * scraping this file for info should test the labels to ensure they're
852 * getting the correct field.
853 */
854static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
855{
856 unsigned int i, count = 0, longest = 0;
857 unsigned long hits = 0;
858
859 /*
860 * No need for spinlocks here since we're not terribly interested in
861 * accuracy. We do take the nfsd_mutex simply to ensure that we
862 * don't end up racing with server shutdown
863 */
864 mutex_lock(&nfsd_mutex);
865 if (nfsd_file_hashtbl) {
866 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
867 count += nfsd_file_hashtbl[i].nfb_count;
868 longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
869 }
870 }
871 mutex_unlock(&nfsd_mutex);
872
873 for_each_possible_cpu(i)
874 hits += per_cpu(nfsd_file_cache_hits, i);
875
876 seq_printf(m, "total entries: %u\n", count);
877 seq_printf(m, "longest chain: %u\n", longest);
878 seq_printf(m, "cache hits: %lu\n", hits);
879 return 0;
880}
881
882int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
883{
884 return single_open(file, nfsd_file_cache_stats_show, NULL);
885}
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
new file mode 100644
index 000000000000..0c0c67166b87
--- /dev/null
+++ b/fs/nfsd/filecache.h
@@ -0,0 +1,60 @@
1#ifndef _FS_NFSD_FILECACHE_H
2#define _FS_NFSD_FILECACHE_H
3
4#include <linux/fsnotify_backend.h>
5
6/*
7 * This is the fsnotify_mark container that nfsd attaches to the files that it
8 * is holding open. Note that we have a separate refcount here aside from the
9 * one in the fsnotify_mark. We only want a single fsnotify_mark attached to
10 * the inode, and for each nfsd_file to hold a reference to it.
11 *
12 * The fsnotify_mark is itself refcounted, but that's not sufficient to tell us
13 * how to put that reference. If there are still outstanding nfsd_files that
14 * reference the mark, then we would want to call fsnotify_put_mark on it.
15 * If there were not, then we'd need to call fsnotify_destroy_mark. Since we
16 * can't really tell the difference, we use the nfm_mark to keep track of how
17 * many nfsd_files hold references to the mark. When that counter goes to zero
18 * then we know to call fsnotify_destroy_mark on it.
19 */
20struct nfsd_file_mark {
21 struct fsnotify_mark nfm_mark;
22 atomic_t nfm_ref;
23};
24
25/*
26 * A representation of a file that has been opened by knfsd. These are hashed
27 * in the hashtable by inode pointer value. Note that this object doesn't
28 * hold a reference to the inode by itself, so the nf_inode pointer should
29 * never be dereferenced, only used for comparison.
30 */
31struct nfsd_file {
32 struct hlist_node nf_node;
33 struct list_head nf_lru;
34 struct rcu_head nf_rcu;
35 struct file *nf_file;
36 const struct cred *nf_cred;
37#define NFSD_FILE_HASHED (0)
38#define NFSD_FILE_PENDING (1)
39#define NFSD_FILE_BREAK_READ (2)
40#define NFSD_FILE_BREAK_WRITE (3)
41#define NFSD_FILE_REFERENCED (4)
42 unsigned long nf_flags;
43 struct inode *nf_inode;
44 unsigned int nf_hashval;
45 atomic_t nf_ref;
46 unsigned char nf_may;
47 struct nfsd_file_mark *nf_mark;
48};
49
50int nfsd_file_cache_init(void);
51void nfsd_file_cache_purge(void);
52void nfsd_file_cache_shutdown(void);
53void nfsd_file_put(struct nfsd_file *nf);
54struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
55void nfsd_file_close_inode_sync(struct inode *inode);
56bool nfsd_file_is_cached(struct inode *inode);
57__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
58 unsigned int may_flags, struct nfsd_file **nfp);
59int nfsd_file_cache_stats_open(struct inode *, struct file *);
60#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 18d94ea984ba..a6b1eab7b722 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -27,6 +27,7 @@
27#include "cache.h" 27#include "cache.h"
28#include "vfs.h" 28#include "vfs.h"
29#include "netns.h" 29#include "netns.h"
30#include "filecache.h"
30 31
31#define NFSDDBG_FACILITY NFSDDBG_SVC 32#define NFSDDBG_FACILITY NFSDDBG_SVC
32 33
@@ -313,6 +314,9 @@ static int nfsd_startup_generic(int nrservs)
313 if (nfsd_users++) 314 if (nfsd_users++)
314 return 0; 315 return 0;
315 316
317 ret = nfsd_file_cache_init();
318 if (ret)
319 goto dec_users;
316 /* 320 /*
317 * Readahead param cache - will no-op if it already exists. 321 * Readahead param cache - will no-op if it already exists.
318 * (Note therefore results will be suboptimal if number of 322 * (Note therefore results will be suboptimal if number of
@@ -320,7 +324,7 @@ static int nfsd_startup_generic(int nrservs)
320 */ 324 */
321 ret = nfsd_racache_init(2*nrservs); 325 ret = nfsd_racache_init(2*nrservs);
322 if (ret) 326 if (ret)
323 goto dec_users; 327 goto out_file_cache;
324 328
325 ret = nfs4_state_start(); 329 ret = nfs4_state_start();
326 if (ret) 330 if (ret)
@@ -329,6 +333,8 @@ static int nfsd_startup_generic(int nrservs)
329 333
330out_racache: 334out_racache:
331 nfsd_racache_shutdown(); 335 nfsd_racache_shutdown();
336out_file_cache:
337 nfsd_file_cache_shutdown();
332dec_users: 338dec_users:
333 nfsd_users--; 339 nfsd_users--;
334 return ret; 340 return ret;
@@ -340,6 +346,7 @@ static void nfsd_shutdown_generic(void)
340 return; 346 return;
341 347
342 nfs4_state_shutdown(); 348 nfs4_state_shutdown();
349 nfsd_file_cache_shutdown();
343 nfsd_racache_shutdown(); 350 nfsd_racache_shutdown();
344} 351}
345 352
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 80933e4334d8..ffc78a0e28b2 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -126,6 +126,8 @@ DEFINE_NFSD_ERR_EVENT(read_err);
126DEFINE_NFSD_ERR_EVENT(write_err); 126DEFINE_NFSD_ERR_EVENT(write_err);
127 127
128#include "state.h" 128#include "state.h"
129#include "filecache.h"
130#include "vfs.h"
129 131
130DECLARE_EVENT_CLASS(nfsd_stateid_class, 132DECLARE_EVENT_CLASS(nfsd_stateid_class,
131 TP_PROTO(stateid_t *stp), 133 TP_PROTO(stateid_t *stp),
@@ -164,6 +166,144 @@ DEFINE_STATEID_EVENT(layout_recall_done);
164DEFINE_STATEID_EVENT(layout_recall_fail); 166DEFINE_STATEID_EVENT(layout_recall_fail);
165DEFINE_STATEID_EVENT(layout_recall_release); 167DEFINE_STATEID_EVENT(layout_recall_release);
166 168
169#define show_nf_flags(val) \
170 __print_flags(val, "|", \
171 { 1 << NFSD_FILE_HASHED, "HASHED" }, \
172 { 1 << NFSD_FILE_PENDING, "PENDING" }, \
173 { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \
174 { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \
175 { 1 << NFSD_FILE_REFERENCED, "REFERENCED"})
176
177/* FIXME: This should probably be fleshed out in the future. */
178#define show_nf_may(val) \
179 __print_flags(val, "|", \
180 { NFSD_MAY_READ, "READ" }, \
181 { NFSD_MAY_WRITE, "WRITE" }, \
182 { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" })
183
184DECLARE_EVENT_CLASS(nfsd_file_class,
185 TP_PROTO(struct nfsd_file *nf),
186 TP_ARGS(nf),
187 TP_STRUCT__entry(
188 __field(unsigned int, nf_hashval)
189 __field(void *, nf_inode)
190 __field(int, nf_ref)
191 __field(unsigned long, nf_flags)
192 __field(unsigned char, nf_may)
193 __field(struct file *, nf_file)
194 ),
195 TP_fast_assign(
196 __entry->nf_hashval = nf->nf_hashval;
197 __entry->nf_inode = nf->nf_inode;
198 __entry->nf_ref = atomic_read(&nf->nf_ref);
199 __entry->nf_flags = nf->nf_flags;
200 __entry->nf_may = nf->nf_may;
201 __entry->nf_file = nf->nf_file;
202 ),
203 TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p",
204 __entry->nf_hashval,
205 __entry->nf_inode,
206 __entry->nf_ref,
207 show_nf_flags(__entry->nf_flags),
208 show_nf_may(__entry->nf_may),
209 __entry->nf_file)
210)
211
212#define DEFINE_NFSD_FILE_EVENT(name) \
213DEFINE_EVENT(nfsd_file_class, name, \
214 TP_PROTO(struct nfsd_file *nf), \
215 TP_ARGS(nf))
216
217DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
218DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
219DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
220DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
221DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
222
223TRACE_EVENT(nfsd_file_acquire,
224 TP_PROTO(struct svc_rqst *rqstp, unsigned int hash,
225 struct inode *inode, unsigned int may_flags,
226 struct nfsd_file *nf, __be32 status),
227
228 TP_ARGS(rqstp, hash, inode, may_flags, nf, status),
229
230 TP_STRUCT__entry(
231 __field(__be32, xid)
232 __field(unsigned int, hash)
233 __field(void *, inode)
234 __field(unsigned int, may_flags)
235 __field(int, nf_ref)
236 __field(unsigned long, nf_flags)
237 __field(unsigned char, nf_may)
238 __field(struct file *, nf_file)
239 __field(__be32, status)
240 ),
241
242 TP_fast_assign(
243 __entry->xid = rqstp->rq_xid;
244 __entry->hash = hash;
245 __entry->inode = inode;
246 __entry->may_flags = may_flags;
247 __entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0;
248 __entry->nf_flags = nf ? nf->nf_flags : 0;
249 __entry->nf_may = nf ? nf->nf_may : 0;
250 __entry->nf_file = nf ? nf->nf_file : NULL;
251 __entry->status = status;
252 ),
253
254 TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
255 be32_to_cpu(__entry->xid), __entry->hash, __entry->inode,
256 show_nf_may(__entry->may_flags), __entry->nf_ref,
257 show_nf_flags(__entry->nf_flags),
258 show_nf_may(__entry->nf_may), __entry->nf_file,
259 be32_to_cpu(__entry->status))
260);
261
262DECLARE_EVENT_CLASS(nfsd_file_search_class,
263 TP_PROTO(struct inode *inode, unsigned int hash, int found),
264 TP_ARGS(inode, hash, found),
265 TP_STRUCT__entry(
266 __field(struct inode *, inode)
267 __field(unsigned int, hash)
268 __field(int, found)
269 ),
270 TP_fast_assign(
271 __entry->inode = inode;
272 __entry->hash = hash;
273 __entry->found = found;
274 ),
275 TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash,
276 __entry->inode, __entry->found)
277);
278
279#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \
280DEFINE_EVENT(nfsd_file_search_class, name, \
281 TP_PROTO(struct inode *inode, unsigned int hash, int found), \
282 TP_ARGS(inode, hash, found))
283
284DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
285DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
286DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached);
287
288TRACE_EVENT(nfsd_file_fsnotify_handle_event,
289 TP_PROTO(struct inode *inode, u32 mask),
290 TP_ARGS(inode, mask),
291 TP_STRUCT__entry(
292 __field(struct inode *, inode)
293 __field(unsigned int, nlink)
294 __field(umode_t, mode)
295 __field(u32, mask)
296 ),
297 TP_fast_assign(
298 __entry->inode = inode;
299 __entry->nlink = inode->i_nlink;
300 __entry->mode = inode->i_mode;
301 __entry->mask = mask;
302 ),
303 TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
304 __entry->nlink, __entry->mode, __entry->mask)
305);
306
167#endif /* _NFSD_TRACE_H */ 307#endif /* _NFSD_TRACE_H */
168 308
169#undef TRACE_INCLUDE_PATH 309#undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c85783e536d5..5983206ab036 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -699,7 +699,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
699} 699}
700#endif /* CONFIG_NFSD_V3 */ 700#endif /* CONFIG_NFSD_V3 */
701 701
702static int nfsd_open_break_lease(struct inode *inode, int access) 702int nfsd_open_break_lease(struct inode *inode, int access)
703{ 703{
704 unsigned int mode; 704 unsigned int mode;
705 705
@@ -715,8 +715,8 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
715 * and additional flags. 715 * and additional flags.
716 * N.B. After this call fhp needs an fh_put 716 * N.B. After this call fhp needs an fh_put
717 */ 717 */
718__be32 718static __be32
719nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, 719__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
720 int may_flags, struct file **filp) 720 int may_flags, struct file **filp)
721{ 721{
722 struct path path; 722 struct path path;
@@ -726,25 +726,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
726 __be32 err; 726 __be32 err;
727 int host_err = 0; 727 int host_err = 0;
728 728
729 validate_process_creds();
730
731 /*
732 * If we get here, then the client has already done an "open",
733 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
734 * in case a chmod has now revoked permission.
735 *
736 * Arguably we should also allow the owner override for
737 * directories, but we never have and it doesn't seem to have
738 * caused anyone a problem. If we were to change this, note
739 * also that our filldir callbacks would need a variant of
740 * lookup_one_len that doesn't check permissions.
741 */
742 if (type == S_IFREG)
743 may_flags |= NFSD_MAY_OWNER_OVERRIDE;
744 err = fh_verify(rqstp, fhp, type, may_flags);
745 if (err)
746 goto out;
747
748 path.mnt = fhp->fh_export->ex_path.mnt; 729 path.mnt = fhp->fh_export->ex_path.mnt;
749 path.dentry = fhp->fh_dentry; 730 path.dentry = fhp->fh_dentry;
750 inode = d_inode(path.dentry); 731 inode = d_inode(path.dentry);
@@ -798,10 +779,50 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
798out_nfserr: 779out_nfserr:
799 err = nfserrno(host_err); 780 err = nfserrno(host_err);
800out: 781out:
782 return err;
783}
784
785__be32
786nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
787 int may_flags, struct file **filp)
788{
789 __be32 err;
790
791 validate_process_creds();
792 /*
793 * If we get here, then the client has already done an "open",
794 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
795 * in case a chmod has now revoked permission.
796 *
797 * Arguably we should also allow the owner override for
798 * directories, but we never have and it doesn't seem to have
799 * caused anyone a problem. If we were to change this, note
800 * also that our filldir callbacks would need a variant of
801 * lookup_one_len that doesn't check permissions.
802 */
803 if (type == S_IFREG)
804 may_flags |= NFSD_MAY_OWNER_OVERRIDE;
805 err = fh_verify(rqstp, fhp, type, may_flags);
806 if (!err)
807 err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
808 validate_process_creds();
809 return err;
810}
811
812__be32
813nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
814 int may_flags, struct file **filp)
815{
816 __be32 err;
817
818 validate_process_creds();
819 err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
801 validate_process_creds(); 820 validate_process_creds();
802 return err; 821 return err;
803} 822}
804 823
824
825
805struct raparms * 826struct raparms *
806nfsd_init_raparms(struct file *file) 827nfsd_init_raparms(struct file *file)
807{ 828{
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index db351247892d..31fdae34e028 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -75,8 +75,11 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
75__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, 75__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
76 loff_t, unsigned long); 76 loff_t, unsigned long);
77#endif /* CONFIG_NFSD_V3 */ 77#endif /* CONFIG_NFSD_V3 */
78int nfsd_open_break_lease(struct inode *, int);
78__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, 79__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
79 int, struct file **); 80 int, struct file **);
81__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
82 int, struct file **);
80struct raparms; 83struct raparms;
81__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, 84__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
82 struct file *file, loff_t offset, 85 struct file *file, loff_t offset,