From 4223a4a155f245d41c350ed9eba4fc32e965c4da Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 20 Oct 2009 14:13:46 +0900 Subject: nfs: Fix nfs_parse_mount_options() kfree() leak Fix a (small) memory leak in one of the error paths of the NFS mount options parsing code. Regression introduced in 2.6.30 by commit a67d18f (NFS: load the rpc/rdma transport module automatically). Reported-by: Yinghai Lu Reported-by: Pekka Enberg Signed-off-by: Ingo Molnar Signed-off-by: Trond Myklebust Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/nfs/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a2c18acb8568..90be551b80c1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1253,6 +1253,7 @@ static int nfs_parse_mount_options(char *raw, default: dfprintk(MOUNT, "NFS: unrecognized " "transport protocol\n"); + kfree(string); return 0; } break; -- cgit v1.2.2 From a8b40bc7e635831b61c43acc71a86d3a68b2dff0 Mon Sep 17 00:00:00 2001 From: Terry Loftin Date: Thu, 22 Oct 2009 21:36:01 -0400 Subject: nfs: Panic when commit fails Actually pass the NFS_FILE_SYNC option to the server to avoid a Panic in nfs_direct_write_complete() when a commit fails. At the end of an nfs write, if the nfs commit fails, all the writes will be rescheduled. They are supposed to be rescheduled as NFS_FILE_SYNC writes, but the rpc_task structure is not completely intialized and so the option is not passed. When the rescheduled writes complete, the return indicates that they are NFS_UNSTABLE and we try to do another commit. This leads to a Panic because the commit data structure pointer was set to null in the initial (failed) commit attempt. Signed-off-by: Terry Loftin Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 6c3210099d51..e1d415e97849 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -457,6 +457,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) }; struct rpc_task_setup task_setup_data = { .rpc_client = NFS_CLIENT(inode), + .rpc_message = &msg, .callback_ops = &nfs_write_direct_ops, .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, -- cgit v1.2.2 From 52567b03ca38b6e556ced450d64dba8d66e23b0e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Oct 2009 14:46:42 -0400 Subject: NFSv4: Fix a bug when the server returns NFS4ERR_RESOURCE RFC 3530 states that when we recieve the error NFS4ERR_RESOURCE, we are not supposed to bump the sequence number on OPEN, LOCK, LOCKU, CLOSE, etc operations. The problem is that we map that error into EREMOTEIO in the XDR layer, and so the NFSv4 middle-layer routines like seqid_mutating_err(), and nfs_increment_seqid() don't recognise it. The fix is to defer the mapping until after the middle layers have processed the error. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 11 ++++++++--- fs/nfs/nfs4xdr.c | 1 - 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ed7c269e2514..65c252798861 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -72,12 +72,17 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, /* Prevent leaks of NFSv4 errors into userland */ static int nfs4_map_errors(int err) { - if (err < -1000) { + if (err >= -1000) + return err; + switch (err) { + case -NFS4ERR_RESOURCE: + return -EREMOTEIO; + default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); - return -EIO; + break; } - return err; + return -EIO; } /* diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 83ad47cbdd8a..20b4e30e6c82 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5681,7 +5681,6 @@ static struct { { NFS4ERR_SERVERFAULT, -ESERVERFAULT }, { NFS4ERR_BADTYPE, -EBADTYPE }, { NFS4ERR_LOCKED, -EAGAIN }, - { NFS4ERR_RESOURCE, -EREMOTEIO }, { NFS4ERR_SYMLINK, -ELOOP }, { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, { NFS4ERR_DEADLOCK, -EDEADLK }, -- cgit v1.2.2 From 141aeb9f26f9f12f1584c128ce8697cdffb046e7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Oct 2009 08:09:46 -0400 Subject: NFSv4: Fix two unbalanced put_rpccred() issues. Commits 29fba38b (nfs41: lease renewal) and fc01cea9 (nfs41: sequence operation) introduce a couple of put_rpccred() calls on credentials for which there is no corresponding get_rpccred(). See http://bugzilla.kernel.org/show_bug.cgi?id=14249 Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 65c252798861..ff37454fa783 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3065,9 +3065,6 @@ static void nfs4_renew_done(struct rpc_task *task, void *data) if (time_before(clp->cl_last_renewal,timestamp)) clp->cl_last_renewal = timestamp; spin_unlock(&clp->cl_lock); - dprintk("%s calling put_rpccred on rpc_cred %p\n", __func__, - task->tk_msg.rpc_cred); - put_rpccred(task->tk_msg.rpc_cred); } static const struct rpc_call_ops nfs4_renew_ops = { @@ -4882,7 +4879,6 @@ void nfs41_sequence_call_done(struct rpc_task *task, void *data) nfs41_sequence_free_slot(clp, task->tk_msg.rpc_resp); dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); - put_rpccred(task->tk_msg.rpc_cred); kfree(task->tk_msg.rpc_argp); kfree(task->tk_msg.rpc_resp); -- cgit v1.2.2 From 9a3936aac133037f65124fcb2d676a6c201a90a4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Oct 2009 08:09:46 -0400 Subject: NFSv4: The link() operation should return any delegation on the file Otherwise, we have to wait for the server to recall it. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 32062c33c859..7cb298525eef 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1536,6 +1536,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) old_dentry->d_parent->d_name.name, old_dentry->d_name.name, dentry->d_parent->d_name.name, dentry->d_name.name); + nfs_inode_return_delegation(inode); + d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { -- cgit v1.2.2 From 96d25e532234bec1a1989e6e1baf702d43a78b0d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Nov 2009 16:15:42 +0900 Subject: NFSv4: Fix a cache validation bug which causes getcwd() to return ENOENT Changeset a65318bf3afc93ce49227e849d213799b072c5fd (NFSv4: Simplify some cache consistency post-op GETATTRs) incorrectly changed the getattr bitmap for readdir(). This causes the readdir() function to fail to return a fileid/inode number, which again exposed a bug in the NFS readdir code that causes spurious ENOENT errors to appear in applications (see http://bugzilla.kernel.org/show_bug.cgi?id=14541). The immediate band aid is to revert the incorrect bitmap change, but more long term, we should change the NFS readdir code to cope with the fact that NFSv4 servers are not required to support fileids/inode numbers. Reported-by: Daniel J Blueman Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ff37454fa783..741a562177fc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2767,7 +2767,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, .pages = &page, .pgbase = 0, .count = count, - .bitmask = NFS_SERVER(dentry->d_inode)->cache_consistency_bitmask, + .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, }; struct nfs4_readdir_res res; struct rpc_message msg = { -- cgit v1.2.2 From 201a15428bd54f83eccec8b7c64a04b8f9431204 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:35 +0000 Subject: FS-Cache: Handle pages pending storage that get evicted under OOM conditions Handle netfs pages that the vmscan algorithm wants to evict from the pagecache under OOM conditions, but that are waiting for write to the cache. Under these conditions, vmscan calls the releasepage() function of the netfs, asking if a page can be discarded. The problem is typified by the following trace of a stuck process: kslowd005 D 0000000000000000 0 4253 2 0x00000080 ffff88001b14f370 0000000000000046 ffff880020d0d000 0000000000000007 0000000000000006 0000000000000001 ffff88001b14ffd8 ffff880020d0d2a8 000000000000ddf0 00000000000118c0 00000000000118c0 ffff880020d0d2a8 Call Trace: [] __fscache_wait_on_page_write+0x8b/0xa7 [fscache] [] ? autoremove_wake_function+0x0/0x34 [] ? __fscache_check_page_write+0x63/0x70 [fscache] [] nfs_fscache_release_page+0x4e/0xc4 [nfs] [] nfs_release_page+0x3c/0x41 [nfs] [] try_to_release_page+0x32/0x3b [] shrink_page_list+0x316/0x4ac [] shrink_inactive_list+0x392/0x67c [] ? __mutex_unlock_slowpath+0x100/0x10b [] ? trace_hardirqs_on_caller+0x10c/0x130 [] ? mutex_unlock+0x9/0xb [] shrink_list+0x8d/0x8f [] shrink_zone+0x278/0x33c [] ? ktime_get_ts+0xad/0xba [] try_to_free_pages+0x22e/0x392 [] ? isolate_pages_global+0x0/0x212 [] __alloc_pages_nodemask+0x3dc/0x5cf [] grab_cache_page_write_begin+0x65/0xaa [] ext3_write_begin+0x78/0x1eb [] generic_file_buffered_write+0x109/0x28c [] ? current_fs_time+0x22/0x29 [] __generic_file_aio_write+0x350/0x385 [] ? generic_file_aio_write+0x4a/0xae [] generic_file_aio_write+0x60/0xae [] do_sync_write+0xe3/0x120 [] ? autoremove_wake_function+0x0/0x34 [] ? __dentry_open+0x1a5/0x2b8 [] ? dentry_open+0x82/0x89 [] cachefiles_write_page+0x298/0x335 [cachefiles] [] fscache_write_op+0x178/0x2c2 [fscache] [] fscache_op_execute+0x7a/0xd1 [fscache] [] slow_work_execute+0x18f/0x2d1 [] slow_work_thread+0x1c5/0x308 [] ? autoremove_wake_function+0x0/0x34 [] ? slow_work_thread+0x0/0x308 [] kthread+0x7a/0x82 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? tg_shares_up+0x171/0x227 [] ? kthread+0x0/0x82 [] ? child_rip+0x0/0x20 In the above backtrace, the following is happening: (1) A page storage operation is being executed by a slow-work thread (fscache_write_op()). (2) FS-Cache farms the operation out to the cache to perform (cachefiles_write_page()). (3) CacheFiles is then calling Ext3 to perform the actual write, using Ext3's standard write (do_sync_write()) under KERNEL_DS directly from the netfs page. (4) However, for Ext3 to perform the write, it must allocate some memory, in particular, it must allocate at least one page cache page into which it can copy the data from the netfs page. (5) Under OOM conditions, the memory allocator can't immediately come up with a page, so it uses vmscan to find something to discard (try_to_free_pages()). (6) vmscan finds a clean netfs page it might be able to discard (possibly the one it's trying to write out). (7) The netfs is called to throw the page away (nfs_release_page()) - but it's called with __GFP_WAIT, so the netfs decides to wait for the store to complete (__fscache_wait_on_page_write()). (8) This blocks a slow-work processing thread - possibly against itself. The system ends up stuck because it can't write out any netfs pages to the cache without allocating more memory. To avoid this, we make FS-Cache cancel some writes that aren't in the middle of actually being performed. This means that some data won't make it into the cache this time. To support this, a new FS-Cache function is added fscache_maybe_release_page() that replaces what the netfs releasepage() functions used to do with respect to the cache. The decisions fscache_maybe_release_page() makes are counted and displayed through /proc/fs/fscache/stats on a line labelled "VmScan". There are four counters provided: "nos=N" - pages that weren't pending storage; "gon=N" - pages that were pending storage when we first looked, but weren't by the time we got the object lock; "bsy=N" - pages that we ignored as they were actively being written when we looked; and "can=N" - pages that we cancelled the storage of. What I'd really like to do is alter the behaviour of the cancellation heuristics, depending on how necessary it is to expel pages. If there are plenty of other pages that aren't waiting to be written to the cache that could be ejected first, then it would be nice to hold up on immediate cancellation of cache writes - but I don't see a way of doing that. Signed-off-by: David Howells --- fs/nfs/fscache.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 70fad69eb959..fa588006588d 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -359,17 +359,13 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp) BUG_ON(!cookie); - if (fscache_check_page_write(cookie, page)) { - if (!(gfp & __GFP_WAIT)) - return 0; - fscache_wait_on_page_write(cookie, page); - } - if (PageFsCache(page)) { dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", cookie, page, nfsi); - fscache_uncache_page(cookie, page); + if (!fscache_maybe_release_page(cookie, page, gfp)) + return 0; + nfs_add_fscache_stats(page->mapping->host, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); } -- cgit v1.2.2