aboutsummaryrefslogtreecommitdiffstats
path: root/fs/cachefiles
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2012-12-20 16:52:35 -0500
committerDavid Howells <dhowells@redhat.com>2012-12-20 16:58:26 -0500
commit9f10523f891928330b7529da54c1a3cc65180b1a (patch)
tree014731e89d44d1ca86cc665f4d39d8d2c25c69bf /fs/cachefiles
parentef46ed888efb1e8da33be5d33c9b54476289a43b (diff)
FS-Cache: Fix operation state management and accounting
Fix the state management of internal fscache operations and the accounting of what operations are in what states. This is done by: (1) Give struct fscache_operation a enum variable that directly represents the state it's currently in, rather than spreading this knowledge over a bunch of flags, who's processing the operation at the moment and whether it is queued or not. This makes it easier to write assertions to check the state at various points and to prevent invalid state transitions. (2) Add an 'operation complete' state and supply a function to indicate the completion of an operation (fscache_op_complete()) and make things call it. The final call to fscache_put_operation() can then check that an op in the appropriate state (complete or cancelled). (3) Adjust the use of object->n_ops, ->n_in_progress, ->n_exclusive to better govern the state of an object: (a) The ->n_ops is now the number of extant operations on the object and is now decremented by fscache_put_operation() only. (b) The ->n_in_progress is simply the number of objects that have been taken off of the object's pending queue for the purposes of being run. This is decremented by fscache_op_complete() only. (c) The ->n_exclusive is the number of exclusive ops that have been submitted and queued or are in progress. It is decremented by fscache_op_complete() and by fscache_cancel_op(). fscache_put_operation() and fscache_operation_gc() now no longer try to clean up ->n_exclusive and ->n_in_progress. That was leading to double decrements against fscache_cancel_op(). fscache_cancel_op() now no longer decrements ->n_ops. That was leading to double decrements against fscache_put_operation(). fscache_submit_exclusive_op() now decides whether it has to queue an op based on ->n_in_progress being > 0 rather than ->n_ops > 0 as the latter will persist in being true even after all preceding operations have been cancelled or completed. Furthermore, if an object is active and there are runnable ops against it, there must be at least one op running. (4) Add a remaining-pages counter (n_pages) to struct fscache_retrieval and provide a function to record completion of the pages as they complete. When n_pages reaches 0, the operation is deemed to be complete and fscache_op_complete() is called. Add calls to fscache_retrieval_complete() anywhere we've finished with a page we've been given to read or allocate for. This includes places where we just return pages to the netfs for reading from the server and where accessing the cache fails and we discard the proposed netfs page. The bugs in the unfixed state management manifest themselves as oopses like the following where the operation completion gets out of sync with return of the cookie by the netfs. This is possible because the cache unlocks and returns all the netfs pages before recording its completion - which means that there's nothing to stop the netfs discarding them and returning the cookie. FS-Cache: Cookie 'NFS.fh' still has outstanding reads ------------[ cut here ]------------ kernel BUG at fs/fscache/cookie.c:519! invalid opcode: 0000 [#1] SMP CPU 1 Modules linked in: cachefiles nfs fscache auth_rpcgss nfs_acl lockd sunrpc Pid: 400, comm: kswapd0 Not tainted 3.1.0-rc7-fsdevel+ #1090 /DG965RY RIP: 0010:[<ffffffffa007050a>] [<ffffffffa007050a>] __fscache_relinquish_cookie+0x170/0x343 [fscache] RSP: 0018:ffff8800368cfb00 EFLAGS: 00010282 RAX: 000000000000003c RBX: ffff880023cc8790 RCX: 0000000000000000 RDX: 0000000000002f2e RSI: 0000000000000001 RDI: ffffffff813ab86c RBP: ffff8800368cfb50 R08: 0000000000000002 R09: 0000000000000000 R10: ffff88003a1b7890 R11: ffff88001df6e488 R12: ffff880023d8ed98 R13: ffff880023cc8798 R14: 0000000000000004 R15: ffff88003b8bf370 FS: 0000000000000000(0000) GS:ffff88003bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000008ba008 CR3: 0000000023d93000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kswapd0 (pid: 400, threadinfo ffff8800368ce000, task ffff88003b8bf040) Stack: ffff88003b8bf040 ffff88001df6e528 ffff88001df6e528 ffffffffa00b46b0 ffff88003b8bf040 ffff88001df6e488 ffff88001df6e620 ffffffffa00b46b0 ffff88001ebd04c8 0000000000000004 ffff8800368cfb70 ffffffffa00b2c91 Call Trace: [<ffffffffa00b2c91>] nfs_fscache_release_inode_cookie+0x3b/0x47 [nfs] [<ffffffffa008f25f>] nfs_clear_inode+0x3c/0x41 [nfs] [<ffffffffa0090df1>] nfs4_evict_inode+0x2f/0x33 [nfs] [<ffffffff810d8d47>] evict+0xa1/0x15c [<ffffffff810d8e2e>] dispose_list+0x2c/0x38 [<ffffffff810d9ebd>] prune_icache_sb+0x28c/0x29b [<ffffffff810c56b7>] prune_super+0xd5/0x140 [<ffffffff8109b615>] shrink_slab+0x102/0x1ab [<ffffffff8109d690>] balance_pgdat+0x2f2/0x595 [<ffffffff8103e009>] ? process_timeout+0xb/0xb [<ffffffff8109dba3>] kswapd+0x270/0x289 [<ffffffff8104c5ea>] ? __init_waitqueue_head+0x46/0x46 [<ffffffff8109d933>] ? balance_pgdat+0x595/0x595 [<ffffffff8104bf7a>] kthread+0x7f/0x87 [<ffffffff813ad6b4>] kernel_thread_helper+0x4/0x10 [<ffffffff81026b98>] ? finish_task_switch+0x45/0xc0 [<ffffffff813abcdd>] ? retint_restore_args+0xe/0xe [<ffffffff8104befb>] ? __init_kthread_worker+0x53/0x53 [<ffffffff813ad6b0>] ? gs_change+0xb/0xb Signed-off-by: David Howells <dhowells@redhat.com>
Diffstat (limited to 'fs/cachefiles')
-rw-r--r--fs/cachefiles/rdwr.c31
1 files changed, 26 insertions, 5 deletions
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index bf123d9c3206..93a0815e0498 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -197,6 +197,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
197 197
198 fscache_end_io(op, monitor->netfs_page, error); 198 fscache_end_io(op, monitor->netfs_page, error);
199 page_cache_release(monitor->netfs_page); 199 page_cache_release(monitor->netfs_page);
200 fscache_retrieval_complete(op, 1);
200 fscache_put_retrieval(op); 201 fscache_put_retrieval(op);
201 kfree(monitor); 202 kfree(monitor);
202 203
@@ -339,6 +340,7 @@ backing_page_already_uptodate:
339 340
340 copy_highpage(netpage, backpage); 341 copy_highpage(netpage, backpage);
341 fscache_end_io(op, netpage, 0); 342 fscache_end_io(op, netpage, 0);
343 fscache_retrieval_complete(op, 1);
342 344
343success: 345success:
344 _debug("success"); 346 _debug("success");
@@ -360,6 +362,7 @@ read_error:
360 goto out; 362 goto out;
361io_error: 363io_error:
362 cachefiles_io_error_obj(object, "Page read error on backing file"); 364 cachefiles_io_error_obj(object, "Page read error on backing file");
365 fscache_retrieval_complete(op, 1);
363 ret = -ENOBUFS; 366 ret = -ENOBUFS;
364 goto out; 367 goto out;
365 368
@@ -369,6 +372,7 @@ nomem_monitor:
369 fscache_put_retrieval(monitor->op); 372 fscache_put_retrieval(monitor->op);
370 kfree(monitor); 373 kfree(monitor);
371nomem: 374nomem:
375 fscache_retrieval_complete(op, 1);
372 _leave(" = -ENOMEM"); 376 _leave(" = -ENOMEM");
373 return -ENOMEM; 377 return -ENOMEM;
374} 378}
@@ -407,7 +411,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
407 _enter("{%p},{%lx},,,", object, page->index); 411 _enter("{%p},{%lx},,,", object, page->index);
408 412
409 if (!object->backer) 413 if (!object->backer)
410 return -ENOBUFS; 414 goto enobufs;
411 415
412 inode = object->backer->d_inode; 416 inode = object->backer->d_inode;
413 ASSERT(S_ISREG(inode->i_mode)); 417 ASSERT(S_ISREG(inode->i_mode));
@@ -416,7 +420,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
416 420
417 /* calculate the shift required to use bmap */ 421 /* calculate the shift required to use bmap */
418 if (inode->i_sb->s_blocksize > PAGE_SIZE) 422 if (inode->i_sb->s_blocksize > PAGE_SIZE)
419 return -ENOBUFS; 423 goto enobufs;
420 424
421 shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; 425 shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
422 426
@@ -448,13 +452,19 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
448 } else if (cachefiles_has_space(cache, 0, 1) == 0) { 452 } else if (cachefiles_has_space(cache, 0, 1) == 0) {
449 /* there's space in the cache we can use */ 453 /* there's space in the cache we can use */
450 fscache_mark_page_cached(op, page); 454 fscache_mark_page_cached(op, page);
455 fscache_retrieval_complete(op, 1);
451 ret = -ENODATA; 456 ret = -ENODATA;
452 } else { 457 } else {
453 ret = -ENOBUFS; 458 goto enobufs;
454 } 459 }
455 460
456 _leave(" = %d", ret); 461 _leave(" = %d", ret);
457 return ret; 462 return ret;
463
464enobufs:
465 fscache_retrieval_complete(op, 1);
466 _leave(" = -ENOBUFS");
467 return -ENOBUFS;
458} 468}
459 469
460/* 470/*
@@ -632,6 +642,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
632 642
633 /* the netpage is unlocked and marked up to date here */ 643 /* the netpage is unlocked and marked up to date here */
634 fscache_end_io(op, netpage, 0); 644 fscache_end_io(op, netpage, 0);
645 fscache_retrieval_complete(op, 1);
635 page_cache_release(netpage); 646 page_cache_release(netpage);
636 netpage = NULL; 647 netpage = NULL;
637 continue; 648 continue;
@@ -659,6 +670,7 @@ out:
659 list_for_each_entry_safe(netpage, _n, list, lru) { 670 list_for_each_entry_safe(netpage, _n, list, lru) {
660 list_del(&netpage->lru); 671 list_del(&netpage->lru);
661 page_cache_release(netpage); 672 page_cache_release(netpage);
673 fscache_retrieval_complete(op, 1);
662 } 674 }
663 675
664 _leave(" = %d", ret); 676 _leave(" = %d", ret);
@@ -707,7 +719,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
707 *nr_pages); 719 *nr_pages);
708 720
709 if (!object->backer) 721 if (!object->backer)
710 return -ENOBUFS; 722 goto all_enobufs;
711 723
712 space = 1; 724 space = 1;
713 if (cachefiles_has_space(cache, 0, *nr_pages) < 0) 725 if (cachefiles_has_space(cache, 0, *nr_pages) < 0)
@@ -720,7 +732,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
720 732
721 /* calculate the shift required to use bmap */ 733 /* calculate the shift required to use bmap */
722 if (inode->i_sb->s_blocksize > PAGE_SIZE) 734 if (inode->i_sb->s_blocksize > PAGE_SIZE)
723 return -ENOBUFS; 735 goto all_enobufs;
724 736
725 shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; 737 shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
726 738
@@ -760,7 +772,10 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
760 nrbackpages++; 772 nrbackpages++;
761 } else if (space && pagevec_add(&pagevec, page) == 0) { 773 } else if (space && pagevec_add(&pagevec, page) == 0) {
762 fscache_mark_pages_cached(op, &pagevec); 774 fscache_mark_pages_cached(op, &pagevec);
775 fscache_retrieval_complete(op, 1);
763 ret = -ENODATA; 776 ret = -ENODATA;
777 } else {
778 fscache_retrieval_complete(op, 1);
764 } 779 }
765 } 780 }
766 781
@@ -781,6 +796,10 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
781 _leave(" = %d [nr=%u%s]", 796 _leave(" = %d [nr=%u%s]",
782 ret, *nr_pages, list_empty(pages) ? " empty" : ""); 797 ret, *nr_pages, list_empty(pages) ? " empty" : "");
783 return ret; 798 return ret;
799
800all_enobufs:
801 fscache_retrieval_complete(op, *nr_pages);
802 return -ENOBUFS;
784} 803}
785 804
786/* 805/*
@@ -815,6 +834,7 @@ int cachefiles_allocate_page(struct fscache_retrieval *op,
815 else 834 else
816 ret = -ENOBUFS; 835 ret = -ENOBUFS;
817 836
837 fscache_retrieval_complete(op, 1);
818 _leave(" = %d", ret); 838 _leave(" = %d", ret);
819 return ret; 839 return ret;
820} 840}
@@ -864,6 +884,7 @@ int cachefiles_allocate_pages(struct fscache_retrieval *op,
864 ret = -ENOBUFS; 884 ret = -ENOBUFS;
865 } 885 }
866 886
887 fscache_retrieval_complete(op, *nr_pages);
867 _leave(" = %d", ret); 888 _leave(" = %d", ret);
868 return ret; 889 return ret;
869} 890}