aboutsummaryrefslogtreecommitdiffstats
path: root/fs/cachefiles/rdwr.c
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2012-12-20 16:52:32 -0500
committerDavid Howells <dhowells@redhat.com>2012-12-20 16:54:30 -0500
commitc4d6d8dbf335c7fa47341654a37c53a512b519bb (patch)
tree14f0b9c7146a39aa3770c26bc7c480cf0d2c4f56 /fs/cachefiles/rdwr.c
parent1800098549fc310cffffefdcb3722adaad0edda8 (diff)
CacheFiles: Fix the marking of cached pages
Under some circumstances CacheFiles defers the marking of pages with PG_fscache so that it can take advantage of pagevecs to reduce the number of calls to fscache_mark_pages_cached() and the netfs's hook to keep track of this. There are, however, two problems with this: (1) It can lead to the PG_fscache mark being applied _after_ the page is set PG_uptodate and unlocked (by the call to fscache_end_io()). (2) CacheFiles's ref on the page is dropped immediately following fscache_end_io() - and so may not still be held when the mark is applied. This can lead to the page being passed back to the allocator before the mark is applied. Fix this by, where appropriate, marking the page before calling fscache_end_io() and releasing the page. This means that we can't take advantage of pagevecs and have to make a separate call for each page to the marking routines. The symptoms of this are Bad Page state errors cropping up under memory pressure, for example: BUG: Bad page state in process tar pfn:002da page:ffffea0000009fb0 count:0 mapcount:0 mapping: (null) index:0x1447 page flags: 0x1000(private_2) Pid: 4574, comm: tar Tainted: G W 3.1.0-rc4-fsdevel+ #1064 Call Trace: [<ffffffff8109583c>] ? dump_page+0xb9/0xbe [<ffffffff81095916>] bad_page+0xd5/0xea [<ffffffff81095d82>] get_page_from_freelist+0x35b/0x46a [<ffffffff810961f3>] __alloc_pages_nodemask+0x362/0x662 [<ffffffff810989da>] __do_page_cache_readahead+0x13a/0x267 [<ffffffff81098942>] ? __do_page_cache_readahead+0xa2/0x267 [<ffffffff81098d7b>] ra_submit+0x1c/0x20 [<ffffffff8109900a>] ondemand_readahead+0x28b/0x29a [<ffffffff81098ee2>] ? ondemand_readahead+0x163/0x29a [<ffffffff810990ce>] page_cache_sync_readahead+0x38/0x3a [<ffffffff81091d8a>] generic_file_aio_read+0x2ab/0x67e [<ffffffffa008cfbe>] nfs_file_read+0xa4/0xc9 [nfs] [<ffffffff810c22c4>] do_sync_read+0xba/0xfa [<ffffffff81177a47>] ? security_file_permission+0x7b/0x84 [<ffffffff810c25dd>] ? rw_verify_area+0xab/0xc8 [<ffffffff810c29a4>] vfs_read+0xaa/0x13a [<ffffffff810c2a79>] sys_read+0x45/0x6c [<ffffffff813ac37b>] system_call_fastpath+0x16/0x1b As can be seen, PG_private_2 (== PG_fscache) is set in the page flags. Instrumenting fscache_mark_pages_cached() to verify whether page->mapping was set appropriately showed that sometimes it wasn't. This led to the discovery that sometimes the page has apparently been reclaimed by the time the marker got to see it. Reported-by: M. Stevens <m@tippett.com> Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeff Layton <jlayton@redhat.com>
Diffstat (limited to 'fs/cachefiles/rdwr.c')
-rw-r--r--fs/cachefiles/rdwr.c34
1 files changed, 11 insertions, 23 deletions
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c994691d9445..3367abdcdac4 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -176,9 +176,8 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
176 recheck: 176 recheck:
177 if (PageUptodate(monitor->back_page)) { 177 if (PageUptodate(monitor->back_page)) {
178 copy_highpage(monitor->netfs_page, monitor->back_page); 178 copy_highpage(monitor->netfs_page, monitor->back_page);
179 179 fscache_mark_page_cached(monitor->op,
180 pagevec_add(&pagevec, monitor->netfs_page); 180 monitor->netfs_page);
181 fscache_mark_pages_cached(monitor->op, &pagevec);
182 error = 0; 181 error = 0;
183 } else if (!PageError(monitor->back_page)) { 182 } else if (!PageError(monitor->back_page)) {
184 /* the page has probably been truncated */ 183 /* the page has probably been truncated */
@@ -335,8 +334,7 @@ backing_page_already_present:
335backing_page_already_uptodate: 334backing_page_already_uptodate:
336 _debug("- uptodate"); 335 _debug("- uptodate");
337 336
338 pagevec_add(pagevec, netpage); 337 fscache_mark_page_cached(op, netpage);
339 fscache_mark_pages_cached(op, pagevec);
340 338
341 copy_highpage(netpage, backpage); 339 copy_highpage(netpage, backpage);
342 fscache_end_io(op, netpage, 0); 340 fscache_end_io(op, netpage, 0);
@@ -448,8 +446,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
448 &pagevec); 446 &pagevec);
449 } else if (cachefiles_has_space(cache, 0, 1) == 0) { 447 } else if (cachefiles_has_space(cache, 0, 1) == 0) {
450 /* there's space in the cache we can use */ 448 /* there's space in the cache we can use */
451 pagevec_add(&pagevec, page); 449 fscache_mark_page_cached(op, page);
452 fscache_mark_pages_cached(op, &pagevec);
453 ret = -ENODATA; 450 ret = -ENODATA;
454 } else { 451 } else {
455 ret = -ENOBUFS; 452 ret = -ENOBUFS;
@@ -465,8 +462,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
465 */ 462 */
466static int cachefiles_read_backing_file(struct cachefiles_object *object, 463static int cachefiles_read_backing_file(struct cachefiles_object *object,
467 struct fscache_retrieval *op, 464 struct fscache_retrieval *op,
468 struct list_head *list, 465 struct list_head *list)
469 struct pagevec *mark_pvec)
470{ 466{
471 struct cachefiles_one_read *monitor = NULL; 467 struct cachefiles_one_read *monitor = NULL;
472 struct address_space *bmapping = object->backer->d_inode->i_mapping; 468 struct address_space *bmapping = object->backer->d_inode->i_mapping;
@@ -626,13 +622,13 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
626 page_cache_release(backpage); 622 page_cache_release(backpage);
627 backpage = NULL; 623 backpage = NULL;
628 624
629 if (!pagevec_add(mark_pvec, netpage)) 625 fscache_mark_page_cached(op, netpage);
630 fscache_mark_pages_cached(op, mark_pvec);
631 626
632 page_cache_get(netpage); 627 page_cache_get(netpage);
633 if (!pagevec_add(&lru_pvec, netpage)) 628 if (!pagevec_add(&lru_pvec, netpage))
634 __pagevec_lru_add_file(&lru_pvec); 629 __pagevec_lru_add_file(&lru_pvec);
635 630
631 /* the netpage is unlocked and marked up to date here */
636 fscache_end_io(op, netpage, 0); 632 fscache_end_io(op, netpage, 0);
637 page_cache_release(netpage); 633 page_cache_release(netpage);
638 netpage = NULL; 634 netpage = NULL;
@@ -775,15 +771,11 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
775 /* submit the apparently valid pages to the backing fs to be read from 771 /* submit the apparently valid pages to the backing fs to be read from
776 * disk */ 772 * disk */
777 if (nrbackpages > 0) { 773 if (nrbackpages > 0) {
778 ret2 = cachefiles_read_backing_file(object, op, &backpages, 774 ret2 = cachefiles_read_backing_file(object, op, &backpages);
779 &pagevec);
780 if (ret2 == -ENOMEM || ret2 == -EINTR) 775 if (ret2 == -ENOMEM || ret2 == -EINTR)
781 ret = ret2; 776 ret = ret2;
782 } 777 }
783 778
784 if (pagevec_count(&pagevec) > 0)
785 fscache_mark_pages_cached(op, &pagevec);
786
787 _leave(" = %d [nr=%u%s]", 779 _leave(" = %d [nr=%u%s]",
788 ret, *nr_pages, list_empty(pages) ? " empty" : ""); 780 ret, *nr_pages, list_empty(pages) ? " empty" : "");
789 return ret; 781 return ret;
@@ -806,7 +798,6 @@ int cachefiles_allocate_page(struct fscache_retrieval *op,
806{ 798{
807 struct cachefiles_object *object; 799 struct cachefiles_object *object;
808 struct cachefiles_cache *cache; 800 struct cachefiles_cache *cache;
809 struct pagevec pagevec;
810 int ret; 801 int ret;
811 802
812 object = container_of(op->op.object, 803 object = container_of(op->op.object,
@@ -817,13 +808,10 @@ int cachefiles_allocate_page(struct fscache_retrieval *op,
817 _enter("%p,{%lx},", object, page->index); 808 _enter("%p,{%lx},", object, page->index);
818 809
819 ret = cachefiles_has_space(cache, 0, 1); 810 ret = cachefiles_has_space(cache, 0, 1);
820 if (ret == 0) { 811 if (ret == 0)
821 pagevec_init(&pagevec, 0); 812 fscache_mark_page_cached(op, page);
822 pagevec_add(&pagevec, page); 813 else
823 fscache_mark_pages_cached(op, &pagevec);
824 } else {
825 ret = -ENOBUFS; 814 ret = -ENOBUFS;
826 }
827 815
828 _leave(" = %d", ret); 816 _leave(" = %d", ret);
829 return ret; 817 return ret;