diff options
Diffstat (limited to 'fs/nfsd/filecache.c')
-rw-r--r-- | fs/nfsd/filecache.c | 934 |
1 files changed, 934 insertions, 0 deletions
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c new file mode 100644 index 000000000000..ef55e9b1cd4e --- /dev/null +++ b/fs/nfsd/filecache.c | |||
@@ -0,0 +1,934 @@ | |||
1 | /* | ||
2 | * Open file cache. | ||
3 | * | ||
4 | * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> | ||
5 | */ | ||
6 | |||
7 | #include <linux/hash.h> | ||
8 | #include <linux/slab.h> | ||
9 | #include <linux/file.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/list_lru.h> | ||
12 | #include <linux/fsnotify_backend.h> | ||
13 | #include <linux/fsnotify.h> | ||
14 | #include <linux/seq_file.h> | ||
15 | |||
16 | #include "vfs.h" | ||
17 | #include "nfsd.h" | ||
18 | #include "nfsfh.h" | ||
19 | #include "netns.h" | ||
20 | #include "filecache.h" | ||
21 | #include "trace.h" | ||
22 | |||
23 | #define NFSDDBG_FACILITY NFSDDBG_FH | ||
24 | |||
25 | /* FIXME: dynamically size this for the machine somehow? */ | ||
26 | #define NFSD_FILE_HASH_BITS 12 | ||
27 | #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) | ||
28 | #define NFSD_LAUNDRETTE_DELAY (2 * HZ) | ||
29 | |||
30 | #define NFSD_FILE_LRU_RESCAN (0) | ||
31 | #define NFSD_FILE_SHUTDOWN (1) | ||
32 | #define NFSD_FILE_LRU_THRESHOLD (4096UL) | ||
33 | #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) | ||
34 | |||
35 | /* We only care about NFSD_MAY_READ/WRITE for this cache */ | ||
36 | #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) | ||
37 | |||
38 | struct nfsd_fcache_bucket { | ||
39 | struct hlist_head nfb_head; | ||
40 | spinlock_t nfb_lock; | ||
41 | unsigned int nfb_count; | ||
42 | unsigned int nfb_maxcount; | ||
43 | }; | ||
44 | |||
45 | static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); | ||
46 | |||
47 | static struct kmem_cache *nfsd_file_slab; | ||
48 | static struct kmem_cache *nfsd_file_mark_slab; | ||
49 | static struct nfsd_fcache_bucket *nfsd_file_hashtbl; | ||
50 | static struct list_lru nfsd_file_lru; | ||
51 | static long nfsd_file_lru_flags; | ||
52 | static struct fsnotify_group *nfsd_file_fsnotify_group; | ||
53 | static atomic_long_t nfsd_filecache_count; | ||
54 | static struct delayed_work nfsd_filecache_laundrette; | ||
55 | |||
56 | enum nfsd_file_laundrette_ctl { | ||
57 | NFSD_FILE_LAUNDRETTE_NOFLUSH = 0, | ||
58 | NFSD_FILE_LAUNDRETTE_MAY_FLUSH | ||
59 | }; | ||
60 | |||
61 | static void | ||
62 | nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl) | ||
63 | { | ||
64 | long count = atomic_long_read(&nfsd_filecache_count); | ||
65 | |||
66 | if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) | ||
67 | return; | ||
68 | |||
69 | /* Be more aggressive about scanning if over the threshold */ | ||
70 | if (count > NFSD_FILE_LRU_THRESHOLD) | ||
71 | mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0); | ||
72 | else | ||
73 | schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY); | ||
74 | |||
75 | if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH) | ||
76 | return; | ||
77 | |||
78 | /* ...and don't delay flushing if we're out of control */ | ||
79 | if (count >= NFSD_FILE_LRU_LIMIT) | ||
80 | flush_delayed_work(&nfsd_filecache_laundrette); | ||
81 | } | ||
82 | |||
83 | static void | ||
84 | nfsd_file_slab_free(struct rcu_head *rcu) | ||
85 | { | ||
86 | struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); | ||
87 | |||
88 | put_cred(nf->nf_cred); | ||
89 | kmem_cache_free(nfsd_file_slab, nf); | ||
90 | } | ||
91 | |||
92 | static void | ||
93 | nfsd_file_mark_free(struct fsnotify_mark *mark) | ||
94 | { | ||
95 | struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, | ||
96 | nfm_mark); | ||
97 | |||
98 | kmem_cache_free(nfsd_file_mark_slab, nfm); | ||
99 | } | ||
100 | |||
101 | static struct nfsd_file_mark * | ||
102 | nfsd_file_mark_get(struct nfsd_file_mark *nfm) | ||
103 | { | ||
104 | if (!atomic_inc_not_zero(&nfm->nfm_ref)) | ||
105 | return NULL; | ||
106 | return nfm; | ||
107 | } | ||
108 | |||
109 | static void | ||
110 | nfsd_file_mark_put(struct nfsd_file_mark *nfm) | ||
111 | { | ||
112 | if (atomic_dec_and_test(&nfm->nfm_ref)) { | ||
113 | |||
114 | fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); | ||
115 | fsnotify_put_mark(&nfm->nfm_mark); | ||
116 | } | ||
117 | } | ||
118 | |||
119 | static struct nfsd_file_mark * | ||
120 | nfsd_file_mark_find_or_create(struct nfsd_file *nf) | ||
121 | { | ||
122 | int err; | ||
123 | struct fsnotify_mark *mark; | ||
124 | struct nfsd_file_mark *nfm = NULL, *new; | ||
125 | struct inode *inode = nf->nf_inode; | ||
126 | |||
127 | do { | ||
128 | mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); | ||
129 | mark = fsnotify_find_mark(&inode->i_fsnotify_marks, | ||
130 | nfsd_file_fsnotify_group); | ||
131 | if (mark) { | ||
132 | nfm = nfsd_file_mark_get(container_of(mark, | ||
133 | struct nfsd_file_mark, | ||
134 | nfm_mark)); | ||
135 | mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); | ||
136 | fsnotify_put_mark(mark); | ||
137 | if (likely(nfm)) | ||
138 | break; | ||
139 | } else | ||
140 | mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); | ||
141 | |||
142 | /* allocate a new nfm */ | ||
143 | new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); | ||
144 | if (!new) | ||
145 | return NULL; | ||
146 | fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); | ||
147 | new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; | ||
148 | atomic_set(&new->nfm_ref, 1); | ||
149 | |||
150 | err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); | ||
151 | |||
152 | /* | ||
153 | * If the add was successful, then return the object. | ||
154 | * Otherwise, we need to put the reference we hold on the | ||
155 | * nfm_mark. The fsnotify code will take a reference and put | ||
156 | * it on failure, so we can't just free it directly. It's also | ||
157 | * not safe to call fsnotify_destroy_mark on it as the | ||
158 | * mark->group will be NULL. Thus, we can't let the nfm_ref | ||
159 | * counter drive the destruction at this point. | ||
160 | */ | ||
161 | if (likely(!err)) | ||
162 | nfm = new; | ||
163 | else | ||
164 | fsnotify_put_mark(&new->nfm_mark); | ||
165 | } while (unlikely(err == -EEXIST)); | ||
166 | |||
167 | return nfm; | ||
168 | } | ||
169 | |||
170 | static struct nfsd_file * | ||
171 | nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, | ||
172 | struct net *net) | ||
173 | { | ||
174 | struct nfsd_file *nf; | ||
175 | |||
176 | nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); | ||
177 | if (nf) { | ||
178 | INIT_HLIST_NODE(&nf->nf_node); | ||
179 | INIT_LIST_HEAD(&nf->nf_lru); | ||
180 | nf->nf_file = NULL; | ||
181 | nf->nf_cred = get_current_cred(); | ||
182 | nf->nf_net = net; | ||
183 | nf->nf_flags = 0; | ||
184 | nf->nf_inode = inode; | ||
185 | nf->nf_hashval = hashval; | ||
186 | atomic_set(&nf->nf_ref, 1); | ||
187 | nf->nf_may = may & NFSD_FILE_MAY_MASK; | ||
188 | if (may & NFSD_MAY_NOT_BREAK_LEASE) { | ||
189 | if (may & NFSD_MAY_WRITE) | ||
190 | __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); | ||
191 | if (may & NFSD_MAY_READ) | ||
192 | __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); | ||
193 | } | ||
194 | nf->nf_mark = NULL; | ||
195 | trace_nfsd_file_alloc(nf); | ||
196 | } | ||
197 | return nf; | ||
198 | } | ||
199 | |||
200 | static bool | ||
201 | nfsd_file_free(struct nfsd_file *nf) | ||
202 | { | ||
203 | bool flush = false; | ||
204 | |||
205 | trace_nfsd_file_put_final(nf); | ||
206 | if (nf->nf_mark) | ||
207 | nfsd_file_mark_put(nf->nf_mark); | ||
208 | if (nf->nf_file) { | ||
209 | get_file(nf->nf_file); | ||
210 | filp_close(nf->nf_file, NULL); | ||
211 | fput(nf->nf_file); | ||
212 | flush = true; | ||
213 | } | ||
214 | call_rcu(&nf->nf_rcu, nfsd_file_slab_free); | ||
215 | return flush; | ||
216 | } | ||
217 | |||
218 | static bool | ||
219 | nfsd_file_check_writeback(struct nfsd_file *nf) | ||
220 | { | ||
221 | struct file *file = nf->nf_file; | ||
222 | struct address_space *mapping; | ||
223 | |||
224 | if (!file || !(file->f_mode & FMODE_WRITE)) | ||
225 | return false; | ||
226 | mapping = file->f_mapping; | ||
227 | return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || | ||
228 | mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); | ||
229 | } | ||
230 | |||
231 | static int | ||
232 | nfsd_file_check_write_error(struct nfsd_file *nf) | ||
233 | { | ||
234 | struct file *file = nf->nf_file; | ||
235 | |||
236 | if (!file || !(file->f_mode & FMODE_WRITE)) | ||
237 | return 0; | ||
238 | return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); | ||
239 | } | ||
240 | |||
241 | static bool | ||
242 | nfsd_file_in_use(struct nfsd_file *nf) | ||
243 | { | ||
244 | return nfsd_file_check_writeback(nf) || | ||
245 | nfsd_file_check_write_error(nf); | ||
246 | } | ||
247 | |||
248 | static void | ||
249 | nfsd_file_do_unhash(struct nfsd_file *nf) | ||
250 | { | ||
251 | lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); | ||
252 | |||
253 | trace_nfsd_file_unhash(nf); | ||
254 | |||
255 | if (nfsd_file_check_write_error(nf)) | ||
256 | nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); | ||
257 | --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; | ||
258 | hlist_del_rcu(&nf->nf_node); | ||
259 | if (!list_empty(&nf->nf_lru)) | ||
260 | list_lru_del(&nfsd_file_lru, &nf->nf_lru); | ||
261 | atomic_long_dec(&nfsd_filecache_count); | ||
262 | } | ||
263 | |||
264 | static bool | ||
265 | nfsd_file_unhash(struct nfsd_file *nf) | ||
266 | { | ||
267 | if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { | ||
268 | nfsd_file_do_unhash(nf); | ||
269 | return true; | ||
270 | } | ||
271 | return false; | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * Return true if the file was unhashed. | ||
276 | */ | ||
277 | static bool | ||
278 | nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) | ||
279 | { | ||
280 | lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); | ||
281 | |||
282 | trace_nfsd_file_unhash_and_release_locked(nf); | ||
283 | if (!nfsd_file_unhash(nf)) | ||
284 | return false; | ||
285 | /* keep final reference for nfsd_file_lru_dispose */ | ||
286 | if (atomic_add_unless(&nf->nf_ref, -1, 1)) | ||
287 | return true; | ||
288 | |||
289 | list_add(&nf->nf_lru, dispose); | ||
290 | return true; | ||
291 | } | ||
292 | |||
293 | static int | ||
294 | nfsd_file_put_noref(struct nfsd_file *nf) | ||
295 | { | ||
296 | int count; | ||
297 | trace_nfsd_file_put(nf); | ||
298 | |||
299 | count = atomic_dec_return(&nf->nf_ref); | ||
300 | if (!count) { | ||
301 | WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); | ||
302 | nfsd_file_free(nf); | ||
303 | } | ||
304 | return count; | ||
305 | } | ||
306 | |||
307 | void | ||
308 | nfsd_file_put(struct nfsd_file *nf) | ||
309 | { | ||
310 | bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; | ||
311 | bool unused = !nfsd_file_in_use(nf); | ||
312 | |||
313 | set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); | ||
314 | if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused) | ||
315 | nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH); | ||
316 | } | ||
317 | |||
318 | struct nfsd_file * | ||
319 | nfsd_file_get(struct nfsd_file *nf) | ||
320 | { | ||
321 | if (likely(atomic_inc_not_zero(&nf->nf_ref))) | ||
322 | return nf; | ||
323 | return NULL; | ||
324 | } | ||
325 | |||
326 | static void | ||
327 | nfsd_file_dispose_list(struct list_head *dispose) | ||
328 | { | ||
329 | struct nfsd_file *nf; | ||
330 | |||
331 | while(!list_empty(dispose)) { | ||
332 | nf = list_first_entry(dispose, struct nfsd_file, nf_lru); | ||
333 | list_del(&nf->nf_lru); | ||
334 | nfsd_file_put_noref(nf); | ||
335 | } | ||
336 | } | ||
337 | |||
338 | static void | ||
339 | nfsd_file_dispose_list_sync(struct list_head *dispose) | ||
340 | { | ||
341 | bool flush = false; | ||
342 | struct nfsd_file *nf; | ||
343 | |||
344 | while(!list_empty(dispose)) { | ||
345 | nf = list_first_entry(dispose, struct nfsd_file, nf_lru); | ||
346 | list_del(&nf->nf_lru); | ||
347 | if (!atomic_dec_and_test(&nf->nf_ref)) | ||
348 | continue; | ||
349 | if (nfsd_file_free(nf)) | ||
350 | flush = true; | ||
351 | } | ||
352 | if (flush) | ||
353 | flush_delayed_fput(); | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * Note this can deadlock with nfsd_file_cache_purge. | ||
358 | */ | ||
359 | static enum lru_status | ||
360 | nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, | ||
361 | spinlock_t *lock, void *arg) | ||
362 | __releases(lock) | ||
363 | __acquires(lock) | ||
364 | { | ||
365 | struct list_head *head = arg; | ||
366 | struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); | ||
367 | |||
368 | /* | ||
369 | * Do a lockless refcount check. The hashtable holds one reference, so | ||
370 | * we look to see if anything else has a reference, or if any have | ||
371 | * been put since the shrinker last ran. Those don't get unhashed and | ||
372 | * released. | ||
373 | * | ||
374 | * Note that in the put path, we set the flag and then decrement the | ||
375 | * counter. Here we check the counter and then test and clear the flag. | ||
376 | * That order is deliberate to ensure that we can do this locklessly. | ||
377 | */ | ||
378 | if (atomic_read(&nf->nf_ref) > 1) | ||
379 | goto out_skip; | ||
380 | |||
381 | /* | ||
382 | * Don't throw out files that are still undergoing I/O or | ||
383 | * that have uncleared errors pending. | ||
384 | */ | ||
385 | if (nfsd_file_check_writeback(nf)) | ||
386 | goto out_skip; | ||
387 | |||
388 | if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) | ||
389 | goto out_rescan; | ||
390 | |||
391 | if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) | ||
392 | goto out_skip; | ||
393 | |||
394 | list_lru_isolate_move(lru, &nf->nf_lru, head); | ||
395 | return LRU_REMOVED; | ||
396 | out_rescan: | ||
397 | set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags); | ||
398 | out_skip: | ||
399 | return LRU_SKIP; | ||
400 | } | ||
401 | |||
402 | static void | ||
403 | nfsd_file_lru_dispose(struct list_head *head) | ||
404 | { | ||
405 | while(!list_empty(head)) { | ||
406 | struct nfsd_file *nf = list_first_entry(head, | ||
407 | struct nfsd_file, nf_lru); | ||
408 | list_del_init(&nf->nf_lru); | ||
409 | spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); | ||
410 | nfsd_file_do_unhash(nf); | ||
411 | spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); | ||
412 | nfsd_file_put_noref(nf); | ||
413 | } | ||
414 | } | ||
415 | |||
416 | static unsigned long | ||
417 | nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) | ||
418 | { | ||
419 | return list_lru_count(&nfsd_file_lru); | ||
420 | } | ||
421 | |||
422 | static unsigned long | ||
423 | nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) | ||
424 | { | ||
425 | LIST_HEAD(head); | ||
426 | unsigned long ret; | ||
427 | |||
428 | ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head); | ||
429 | nfsd_file_lru_dispose(&head); | ||
430 | return ret; | ||
431 | } | ||
432 | |||
433 | static struct shrinker nfsd_file_shrinker = { | ||
434 | .scan_objects = nfsd_file_lru_scan, | ||
435 | .count_objects = nfsd_file_lru_count, | ||
436 | .seeks = 1, | ||
437 | }; | ||
438 | |||
439 | static void | ||
440 | __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, | ||
441 | struct list_head *dispose) | ||
442 | { | ||
443 | struct nfsd_file *nf; | ||
444 | struct hlist_node *tmp; | ||
445 | |||
446 | spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); | ||
447 | hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { | ||
448 | if (inode == nf->nf_inode) | ||
449 | nfsd_file_unhash_and_release_locked(nf, dispose); | ||
450 | } | ||
451 | spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); | ||
452 | } | ||
453 | |||
454 | /** | ||
455 | * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file | ||
456 | * @inode: inode of the file to attempt to remove | ||
457 | * | ||
458 | * Walk the whole hash bucket, looking for any files that correspond to "inode". | ||
459 | * If any do, then unhash them and put the hashtable reference to them and | ||
460 | * destroy any that had their last reference put. Also ensure that any of the | ||
461 | * fputs also have their final __fput done as well. | ||
462 | */ | ||
463 | void | ||
464 | nfsd_file_close_inode_sync(struct inode *inode) | ||
465 | { | ||
466 | unsigned int hashval = (unsigned int)hash_long(inode->i_ino, | ||
467 | NFSD_FILE_HASH_BITS); | ||
468 | LIST_HEAD(dispose); | ||
469 | |||
470 | __nfsd_file_close_inode(inode, hashval, &dispose); | ||
471 | trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); | ||
472 | nfsd_file_dispose_list_sync(&dispose); | ||
473 | } | ||
474 | |||
475 | /** | ||
476 | * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file | ||
477 | * @inode: inode of the file to attempt to remove | ||
478 | * | ||
479 | * Walk the whole hash bucket, looking for any files that correspond to "inode". | ||
480 | * If any do, then unhash them and put the hashtable reference to them and | ||
481 | * destroy any that had their last reference put. | ||
482 | */ | ||
483 | static void | ||
484 | nfsd_file_close_inode(struct inode *inode) | ||
485 | { | ||
486 | unsigned int hashval = (unsigned int)hash_long(inode->i_ino, | ||
487 | NFSD_FILE_HASH_BITS); | ||
488 | LIST_HEAD(dispose); | ||
489 | |||
490 | __nfsd_file_close_inode(inode, hashval, &dispose); | ||
491 | trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); | ||
492 | nfsd_file_dispose_list(&dispose); | ||
493 | } | ||
494 | |||
495 | /** | ||
496 | * nfsd_file_delayed_close - close unused nfsd_files | ||
497 | * @work: dummy | ||
498 | * | ||
499 | * Walk the LRU list and close any entries that have not been used since | ||
500 | * the last scan. | ||
501 | * | ||
502 | * Note this can deadlock with nfsd_file_cache_purge. | ||
503 | */ | ||
504 | static void | ||
505 | nfsd_file_delayed_close(struct work_struct *work) | ||
506 | { | ||
507 | LIST_HEAD(head); | ||
508 | |||
509 | list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX); | ||
510 | |||
511 | if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags)) | ||
512 | nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH); | ||
513 | |||
514 | if (!list_empty(&head)) { | ||
515 | nfsd_file_lru_dispose(&head); | ||
516 | flush_delayed_fput(); | ||
517 | } | ||
518 | } | ||
519 | |||
520 | static int | ||
521 | nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, | ||
522 | void *data) | ||
523 | { | ||
524 | struct file_lock *fl = data; | ||
525 | |||
526 | /* Only close files for F_SETLEASE leases */ | ||
527 | if (fl->fl_flags & FL_LEASE) | ||
528 | nfsd_file_close_inode_sync(file_inode(fl->fl_file)); | ||
529 | return 0; | ||
530 | } | ||
531 | |||
532 | static struct notifier_block nfsd_file_lease_notifier = { | ||
533 | .notifier_call = nfsd_file_lease_notifier_call, | ||
534 | }; | ||
535 | |||
536 | static int | ||
537 | nfsd_file_fsnotify_handle_event(struct fsnotify_group *group, | ||
538 | struct inode *inode, | ||
539 | u32 mask, const void *data, int data_type, | ||
540 | const struct qstr *file_name, u32 cookie, | ||
541 | struct fsnotify_iter_info *iter_info) | ||
542 | { | ||
543 | trace_nfsd_file_fsnotify_handle_event(inode, mask); | ||
544 | |||
545 | /* Should be no marks on non-regular files */ | ||
546 | if (!S_ISREG(inode->i_mode)) { | ||
547 | WARN_ON_ONCE(1); | ||
548 | return 0; | ||
549 | } | ||
550 | |||
551 | /* don't close files if this was not the last link */ | ||
552 | if (mask & FS_ATTRIB) { | ||
553 | if (inode->i_nlink) | ||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | nfsd_file_close_inode(inode); | ||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | |||
562 | static const struct fsnotify_ops nfsd_file_fsnotify_ops = { | ||
563 | .handle_event = nfsd_file_fsnotify_handle_event, | ||
564 | .free_mark = nfsd_file_mark_free, | ||
565 | }; | ||
566 | |||
567 | int | ||
568 | nfsd_file_cache_init(void) | ||
569 | { | ||
570 | int ret = -ENOMEM; | ||
571 | unsigned int i; | ||
572 | |||
573 | clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); | ||
574 | |||
575 | if (nfsd_file_hashtbl) | ||
576 | return 0; | ||
577 | |||
578 | nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, | ||
579 | sizeof(*nfsd_file_hashtbl), GFP_KERNEL); | ||
580 | if (!nfsd_file_hashtbl) { | ||
581 | pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); | ||
582 | goto out_err; | ||
583 | } | ||
584 | |||
585 | nfsd_file_slab = kmem_cache_create("nfsd_file", | ||
586 | sizeof(struct nfsd_file), 0, 0, NULL); | ||
587 | if (!nfsd_file_slab) { | ||
588 | pr_err("nfsd: unable to create nfsd_file_slab\n"); | ||
589 | goto out_err; | ||
590 | } | ||
591 | |||
592 | nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", | ||
593 | sizeof(struct nfsd_file_mark), 0, 0, NULL); | ||
594 | if (!nfsd_file_mark_slab) { | ||
595 | pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); | ||
596 | goto out_err; | ||
597 | } | ||
598 | |||
599 | |||
600 | ret = list_lru_init(&nfsd_file_lru); | ||
601 | if (ret) { | ||
602 | pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); | ||
603 | goto out_err; | ||
604 | } | ||
605 | |||
606 | ret = register_shrinker(&nfsd_file_shrinker); | ||
607 | if (ret) { | ||
608 | pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); | ||
609 | goto out_lru; | ||
610 | } | ||
611 | |||
612 | ret = lease_register_notifier(&nfsd_file_lease_notifier); | ||
613 | if (ret) { | ||
614 | pr_err("nfsd: unable to register lease notifier: %d\n", ret); | ||
615 | goto out_shrinker; | ||
616 | } | ||
617 | |||
618 | nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); | ||
619 | if (IS_ERR(nfsd_file_fsnotify_group)) { | ||
620 | pr_err("nfsd: unable to create fsnotify group: %ld\n", | ||
621 | PTR_ERR(nfsd_file_fsnotify_group)); | ||
622 | nfsd_file_fsnotify_group = NULL; | ||
623 | goto out_notifier; | ||
624 | } | ||
625 | |||
626 | for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { | ||
627 | INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); | ||
628 | spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); | ||
629 | } | ||
630 | |||
631 | INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close); | ||
632 | out: | ||
633 | return ret; | ||
634 | out_notifier: | ||
635 | lease_unregister_notifier(&nfsd_file_lease_notifier); | ||
636 | out_shrinker: | ||
637 | unregister_shrinker(&nfsd_file_shrinker); | ||
638 | out_lru: | ||
639 | list_lru_destroy(&nfsd_file_lru); | ||
640 | out_err: | ||
641 | kmem_cache_destroy(nfsd_file_slab); | ||
642 | nfsd_file_slab = NULL; | ||
643 | kmem_cache_destroy(nfsd_file_mark_slab); | ||
644 | nfsd_file_mark_slab = NULL; | ||
645 | kfree(nfsd_file_hashtbl); | ||
646 | nfsd_file_hashtbl = NULL; | ||
647 | goto out; | ||
648 | } | ||
649 | |||
650 | /* | ||
651 | * Note this can deadlock with nfsd_file_lru_cb. | ||
652 | */ | ||
653 | void | ||
654 | nfsd_file_cache_purge(struct net *net) | ||
655 | { | ||
656 | unsigned int i; | ||
657 | struct nfsd_file *nf; | ||
658 | struct hlist_node *next; | ||
659 | LIST_HEAD(dispose); | ||
660 | bool del; | ||
661 | |||
662 | if (!nfsd_file_hashtbl) | ||
663 | return; | ||
664 | |||
665 | for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { | ||
666 | struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; | ||
667 | |||
668 | spin_lock(&nfb->nfb_lock); | ||
669 | hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { | ||
670 | if (net && nf->nf_net != net) | ||
671 | continue; | ||
672 | del = nfsd_file_unhash_and_release_locked(nf, &dispose); | ||
673 | |||
674 | /* | ||
675 | * Deadlock detected! Something marked this entry as | ||
676 | * unhased, but hasn't removed it from the hash list. | ||
677 | */ | ||
678 | WARN_ON_ONCE(!del); | ||
679 | } | ||
680 | spin_unlock(&nfb->nfb_lock); | ||
681 | nfsd_file_dispose_list(&dispose); | ||
682 | } | ||
683 | } | ||
684 | |||
685 | void | ||
686 | nfsd_file_cache_shutdown(void) | ||
687 | { | ||
688 | LIST_HEAD(dispose); | ||
689 | |||
690 | set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); | ||
691 | |||
692 | lease_unregister_notifier(&nfsd_file_lease_notifier); | ||
693 | unregister_shrinker(&nfsd_file_shrinker); | ||
694 | /* | ||
695 | * make sure all callers of nfsd_file_lru_cb are done before | ||
696 | * calling nfsd_file_cache_purge | ||
697 | */ | ||
698 | cancel_delayed_work_sync(&nfsd_filecache_laundrette); | ||
699 | nfsd_file_cache_purge(NULL); | ||
700 | list_lru_destroy(&nfsd_file_lru); | ||
701 | rcu_barrier(); | ||
702 | fsnotify_put_group(nfsd_file_fsnotify_group); | ||
703 | nfsd_file_fsnotify_group = NULL; | ||
704 | kmem_cache_destroy(nfsd_file_slab); | ||
705 | nfsd_file_slab = NULL; | ||
706 | fsnotify_wait_marks_destroyed(); | ||
707 | kmem_cache_destroy(nfsd_file_mark_slab); | ||
708 | nfsd_file_mark_slab = NULL; | ||
709 | kfree(nfsd_file_hashtbl); | ||
710 | nfsd_file_hashtbl = NULL; | ||
711 | } | ||
712 | |||
713 | static bool | ||
714 | nfsd_match_cred(const struct cred *c1, const struct cred *c2) | ||
715 | { | ||
716 | int i; | ||
717 | |||
718 | if (!uid_eq(c1->fsuid, c2->fsuid)) | ||
719 | return false; | ||
720 | if (!gid_eq(c1->fsgid, c2->fsgid)) | ||
721 | return false; | ||
722 | if (c1->group_info == NULL || c2->group_info == NULL) | ||
723 | return c1->group_info == c2->group_info; | ||
724 | if (c1->group_info->ngroups != c2->group_info->ngroups) | ||
725 | return false; | ||
726 | for (i = 0; i < c1->group_info->ngroups; i++) { | ||
727 | if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) | ||
728 | return false; | ||
729 | } | ||
730 | return true; | ||
731 | } | ||
732 | |||
733 | static struct nfsd_file * | ||
734 | nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, | ||
735 | unsigned int hashval, struct net *net) | ||
736 | { | ||
737 | struct nfsd_file *nf; | ||
738 | unsigned char need = may_flags & NFSD_FILE_MAY_MASK; | ||
739 | |||
740 | hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, | ||
741 | nf_node) { | ||
742 | if ((need & nf->nf_may) != need) | ||
743 | continue; | ||
744 | if (nf->nf_inode != inode) | ||
745 | continue; | ||
746 | if (nf->nf_net != net) | ||
747 | continue; | ||
748 | if (!nfsd_match_cred(nf->nf_cred, current_cred())) | ||
749 | continue; | ||
750 | if (nfsd_file_get(nf) != NULL) | ||
751 | return nf; | ||
752 | } | ||
753 | return NULL; | ||
754 | } | ||
755 | |||
756 | /** | ||
757 | * nfsd_file_is_cached - are there any cached open files for this fh? | ||
758 | * @inode: inode of the file to check | ||
759 | * | ||
760 | * Scan the hashtable for open files that match this fh. Returns true if there | ||
761 | * are any, and false if not. | ||
762 | */ | ||
763 | bool | ||
764 | nfsd_file_is_cached(struct inode *inode) | ||
765 | { | ||
766 | bool ret = false; | ||
767 | struct nfsd_file *nf; | ||
768 | unsigned int hashval; | ||
769 | |||
770 | hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); | ||
771 | |||
772 | rcu_read_lock(); | ||
773 | hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, | ||
774 | nf_node) { | ||
775 | if (inode == nf->nf_inode) { | ||
776 | ret = true; | ||
777 | break; | ||
778 | } | ||
779 | } | ||
780 | rcu_read_unlock(); | ||
781 | trace_nfsd_file_is_cached(inode, hashval, (int)ret); | ||
782 | return ret; | ||
783 | } | ||
784 | |||
785 | __be32 | ||
786 | nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, | ||
787 | unsigned int may_flags, struct nfsd_file **pnf) | ||
788 | { | ||
789 | __be32 status; | ||
790 | struct net *net = SVC_NET(rqstp); | ||
791 | struct nfsd_file *nf, *new; | ||
792 | struct inode *inode; | ||
793 | unsigned int hashval; | ||
794 | |||
795 | /* FIXME: skip this if fh_dentry is already set? */ | ||
796 | status = fh_verify(rqstp, fhp, S_IFREG, | ||
797 | may_flags|NFSD_MAY_OWNER_OVERRIDE); | ||
798 | if (status != nfs_ok) | ||
799 | return status; | ||
800 | |||
801 | inode = d_inode(fhp->fh_dentry); | ||
802 | hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); | ||
803 | retry: | ||
804 | rcu_read_lock(); | ||
805 | nf = nfsd_file_find_locked(inode, may_flags, hashval, net); | ||
806 | rcu_read_unlock(); | ||
807 | if (nf) | ||
808 | goto wait_for_construction; | ||
809 | |||
810 | new = nfsd_file_alloc(inode, may_flags, hashval, net); | ||
811 | if (!new) { | ||
812 | trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, | ||
813 | NULL, nfserr_jukebox); | ||
814 | return nfserr_jukebox; | ||
815 | } | ||
816 | |||
817 | spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); | ||
818 | nf = nfsd_file_find_locked(inode, may_flags, hashval, net); | ||
819 | if (nf == NULL) | ||
820 | goto open_file; | ||
821 | spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); | ||
822 | nfsd_file_slab_free(&new->nf_rcu); | ||
823 | |||
824 | wait_for_construction: | ||
825 | wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); | ||
826 | |||
827 | /* Did construction of this file fail? */ | ||
828 | if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { | ||
829 | nfsd_file_put_noref(nf); | ||
830 | goto retry; | ||
831 | } | ||
832 | |||
833 | this_cpu_inc(nfsd_file_cache_hits); | ||
834 | |||
835 | if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { | ||
836 | bool write = (may_flags & NFSD_MAY_WRITE); | ||
837 | |||
838 | if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || | ||
839 | (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { | ||
840 | status = nfserrno(nfsd_open_break_lease( | ||
841 | file_inode(nf->nf_file), may_flags)); | ||
842 | if (status == nfs_ok) { | ||
843 | clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); | ||
844 | if (write) | ||
845 | clear_bit(NFSD_FILE_BREAK_WRITE, | ||
846 | &nf->nf_flags); | ||
847 | } | ||
848 | } | ||
849 | } | ||
850 | out: | ||
851 | if (status == nfs_ok) { | ||
852 | *pnf = nf; | ||
853 | } else { | ||
854 | nfsd_file_put(nf); | ||
855 | nf = NULL; | ||
856 | } | ||
857 | |||
858 | trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); | ||
859 | return status; | ||
860 | open_file: | ||
861 | nf = new; | ||
862 | /* Take reference for the hashtable */ | ||
863 | atomic_inc(&nf->nf_ref); | ||
864 | __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); | ||
865 | __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); | ||
866 | list_lru_add(&nfsd_file_lru, &nf->nf_lru); | ||
867 | hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); | ||
868 | ++nfsd_file_hashtbl[hashval].nfb_count; | ||
869 | nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, | ||
870 | nfsd_file_hashtbl[hashval].nfb_count); | ||
871 | spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); | ||
872 | atomic_long_inc(&nfsd_filecache_count); | ||
873 | |||
874 | nf->nf_mark = nfsd_file_mark_find_or_create(nf); | ||
875 | if (nf->nf_mark) | ||
876 | status = nfsd_open_verified(rqstp, fhp, S_IFREG, | ||
877 | may_flags, &nf->nf_file); | ||
878 | else | ||
879 | status = nfserr_jukebox; | ||
880 | /* | ||
881 | * If construction failed, or we raced with a call to unlink() | ||
882 | * then unhash. | ||
883 | */ | ||
884 | if (status != nfs_ok || inode->i_nlink == 0) { | ||
885 | bool do_free; | ||
886 | spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); | ||
887 | do_free = nfsd_file_unhash(nf); | ||
888 | spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); | ||
889 | if (do_free) | ||
890 | nfsd_file_put_noref(nf); | ||
891 | } | ||
892 | clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); | ||
893 | smp_mb__after_atomic(); | ||
894 | wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); | ||
895 | goto out; | ||
896 | } | ||
897 | |||
898 | /* | ||
899 | * Note that fields may be added, removed or reordered in the future. Programs | ||
900 | * scraping this file for info should test the labels to ensure they're | ||
901 | * getting the correct field. | ||
902 | */ | ||
903 | static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) | ||
904 | { | ||
905 | unsigned int i, count = 0, longest = 0; | ||
906 | unsigned long hits = 0; | ||
907 | |||
908 | /* | ||
909 | * No need for spinlocks here since we're not terribly interested in | ||
910 | * accuracy. We do take the nfsd_mutex simply to ensure that we | ||
911 | * don't end up racing with server shutdown | ||
912 | */ | ||
913 | mutex_lock(&nfsd_mutex); | ||
914 | if (nfsd_file_hashtbl) { | ||
915 | for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { | ||
916 | count += nfsd_file_hashtbl[i].nfb_count; | ||
917 | longest = max(longest, nfsd_file_hashtbl[i].nfb_count); | ||
918 | } | ||
919 | } | ||
920 | mutex_unlock(&nfsd_mutex); | ||
921 | |||
922 | for_each_possible_cpu(i) | ||
923 | hits += per_cpu(nfsd_file_cache_hits, i); | ||
924 | |||
925 | seq_printf(m, "total entries: %u\n", count); | ||
926 | seq_printf(m, "longest chain: %u\n", longest); | ||
927 | seq_printf(m, "cache hits: %lu\n", hits); | ||
928 | return 0; | ||
929 | } | ||
930 | |||
931 | int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) | ||
932 | { | ||
933 | return single_open(file, nfsd_file_cache_stats_show, NULL); | ||
934 | } | ||