aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2014-09-23 21:28:32 -0400
committerTrond Myklebust <trond.myklebust@primarydata.com>2014-09-25 08:25:28 -0400
commit9590544694becc64f4874963dbfc4b4d391024b7 (patch)
treee0578565ba34391968b509cc5cc8cc1ae7d89614 /fs/nfs
parenta4796e37c12e177572b80864cbab9c907ea250b0 (diff)
NFS: avoid deadlocks with loop-back mounted NFS filesystems.
Support for loop-back mounted NFS filesystems is useful when NFS is used to access shared storage in a high-availability cluster. If the node running the NFS server fails, some other node can mount the filesystem and start providing NFS service. If that node already had the filesystem NFS mounted, it will now have it loop-back mounted. nfsd can suffer a deadlock when allocating memory and entering direct reclaim. While direct reclaim does not write to the NFS filesystem it can send and wait for a COMMIT through nfs_release_page(). This patch modifies nfs_release_page() to wait a limited time for the commit to complete - one second. If the commit doesn't complete in this time, nfs_release_page() will fail. This means it might now fail in some cases where it wouldn't before. These cases are only when 'gfp' includes '__GFP_WAIT'. nfs_release_page() is only called by try_to_release_page(), and that can only be called on an NFS page with required 'gfp' flags from - page_cache_pipe_buf_steal() in splice.c - shrink_page_list() in vmscan.c - invalidate_inode_pages2_range() in truncate.c The first two handle failure quite safely. The last is only called after ->launder_page() has been called, and that will have waited for the commit to finish already. So aborting if the commit takes longer than 1 second is perfectly safe. Signed-off-by: NeilBrown <neilb@suse.de> Acked-by: Jeff Layton <jlayton@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/file.c26
-rw-r--r--fs/nfs/write.c2
2 files changed, 18 insertions, 10 deletions
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 23e5f0ea5c83..325df0aeab05 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -475,17 +475,23 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
475 475
476 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); 476 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
477 477
478 /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not 478 /* Always try to initiate a 'commit' if relevant, but only
479 * doing this memory reclaim for a fs-related allocation. 479 * wait for it if __GFP_WAIT is set and the calling process is
480 * allowed to block. Even then, only wait 1 second.
481 * Waiting indefinitely can cause deadlocks when the NFS
482 * server is on this machine, and there is no particular need
483 * to wait extensively here. A short wait has the benefit
484 * that someone else can worry about the freezer.
480 */ 485 */
481 if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL && 486 if (mapping) {
482 !(current->flags & PF_FSTRANS)) { 487 struct nfs_server *nfss = NFS_SERVER(mapping->host);
483 int how = FLUSH_SYNC; 488 nfs_commit_inode(mapping->host, 0);
484 489 if ((gfp & __GFP_WAIT) &&
485 /* Don't let kswapd deadlock waiting for OOM RPC calls */ 490 !current_is_kswapd() &&
486 if (current_is_kswapd()) 491 !(current->flags & PF_FSTRANS)) {
487 how = 0; 492 wait_on_page_bit_killable_timeout(page, PG_private,
488 nfs_commit_inode(mapping->host, how); 493 HZ);
494 }
489 } 495 }
490 /* If PagePrivate() is set, then the page is not freeable */ 496 /* If PagePrivate() is set, then the page is not freeable */
491 if (PagePrivate(page)) 497 if (PagePrivate(page))
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index a623b00530c3..c063a4e70354 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -705,6 +705,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
705 if (likely(!PageSwapCache(head->wb_page))) { 705 if (likely(!PageSwapCache(head->wb_page))) {
706 set_page_private(head->wb_page, 0); 706 set_page_private(head->wb_page, 0);
707 ClearPagePrivate(head->wb_page); 707 ClearPagePrivate(head->wb_page);
708 smp_mb__after_atomic();
709 wake_up_page(head->wb_page, PG_private);
708 clear_bit(PG_MAPPED, &head->wb_flags); 710 clear_bit(PG_MAPPED, &head->wb_flags);
709 } 711 }
710 nfsi->npages--; 712 nfsi->npages--;