From ab09203e302b6e526f6930f3e460064b0f253ae9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 5 Nov 2009 14:25:10 -0800 Subject: sysctl fs: Remove dead binary sysctl support Now that sys_sysctl is a generic wrapper around /proc/sys .ctl_name and .strategy members of sysctl tables are dead code. Remove them. Cc: Jan Harkes Signed-off-by: Eric W. Biederman --- fs/nfs/sysctl.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index b62481dabae9..af51e6af2072 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -22,7 +22,6 @@ static struct ctl_table_header *nfs_callback_sysctl_table; static ctl_table nfs_cb_sysctls[] = { #ifdef CONFIG_NFS_V4 { - .ctl_name = CTL_UNNUMBERED, .procname = "nfs_callback_tcpport", .data = &nfs_callback_set_tcpport, .maxlen = sizeof(int), @@ -32,53 +31,46 @@ static ctl_table nfs_cb_sysctls[] = { .extra2 = (int *)&nfs_set_port_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "idmap_cache_timeout", .data = &nfs_idmap_cache_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, }, #endif { - .ctl_name = CTL_UNNUMBERED, .procname = "nfs_mountpoint_timeout", .data = &nfs_mountpoint_expiry_timeout, .maxlen = sizeof(nfs_mountpoint_expiry_timeout), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nfs_congestion_kb", .data = &nfs_congestion_kb, .maxlen = sizeof(nfs_congestion_kb), .mode = 0644, .proc_handler = &proc_dointvec, }, - { .ctl_name = 0 } + { } }; static ctl_table nfs_cb_sysctl_dir[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "nfs", .mode = 0555, .child = nfs_cb_sysctls, }, - { .ctl_name = 0 } + { } }; static ctl_table nfs_cb_sysctl_root[] = { { - .ctl_name = CTL_FS, .procname = "fs", .mode = 0555, .child = nfs_cb_sysctl_dir, }, - { .ctl_name = 0 } + { } }; int nfs_register_sysctl(void) -- cgit v1.2.2 From 6d4561110a3e9fa742aeec6717248a491dfb1878 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 16 Nov 2009 03:11:48 -0800 Subject: sysctl: Drop & in front of every proc_handler. For consistency drop & in front of every proc_handler. Explicity taking the address is unnecessary and it prevents optimizations like stubbing the proc_handlers to NULL. Cc: Alexey Dobriyan Cc: Ingo Molnar Cc: Joe Perches Signed-off-by: Eric W. Biederman --- fs/nfs/sysctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index af51e6af2072..70e1fbbaaeab 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -26,7 +26,7 @@ static ctl_table nfs_cb_sysctls[] = { .data = &nfs_callback_set_tcpport, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = (int *)&nfs_set_port_min, .extra2 = (int *)&nfs_set_port_max, }, @@ -35,7 +35,7 @@ static ctl_table nfs_cb_sysctls[] = { .data = &nfs_idmap_cache_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, #endif { @@ -43,14 +43,14 @@ static ctl_table nfs_cb_sysctls[] = { .data = &nfs_mountpoint_expiry_timeout, .maxlen = sizeof(nfs_mountpoint_expiry_timeout), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nfs_congestion_kb", .data = &nfs_congestion_kb, .maxlen = sizeof(nfs_congestion_kb), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { } }; -- cgit v1.2.2 From 201a15428bd54f83eccec8b7c64a04b8f9431204 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:35 +0000 Subject: FS-Cache: Handle pages pending storage that get evicted under OOM conditions Handle netfs pages that the vmscan algorithm wants to evict from the pagecache under OOM conditions, but that are waiting for write to the cache. Under these conditions, vmscan calls the releasepage() function of the netfs, asking if a page can be discarded. The problem is typified by the following trace of a stuck process: kslowd005 D 0000000000000000 0 4253 2 0x00000080 ffff88001b14f370 0000000000000046 ffff880020d0d000 0000000000000007 0000000000000006 0000000000000001 ffff88001b14ffd8 ffff880020d0d2a8 000000000000ddf0 00000000000118c0 00000000000118c0 ffff880020d0d2a8 Call Trace: [] __fscache_wait_on_page_write+0x8b/0xa7 [fscache] [] ? autoremove_wake_function+0x0/0x34 [] ? __fscache_check_page_write+0x63/0x70 [fscache] [] nfs_fscache_release_page+0x4e/0xc4 [nfs] [] nfs_release_page+0x3c/0x41 [nfs] [] try_to_release_page+0x32/0x3b [] shrink_page_list+0x316/0x4ac [] shrink_inactive_list+0x392/0x67c [] ? __mutex_unlock_slowpath+0x100/0x10b [] ? trace_hardirqs_on_caller+0x10c/0x130 [] ? mutex_unlock+0x9/0xb [] shrink_list+0x8d/0x8f [] shrink_zone+0x278/0x33c [] ? ktime_get_ts+0xad/0xba [] try_to_free_pages+0x22e/0x392 [] ? isolate_pages_global+0x0/0x212 [] __alloc_pages_nodemask+0x3dc/0x5cf [] grab_cache_page_write_begin+0x65/0xaa [] ext3_write_begin+0x78/0x1eb [] generic_file_buffered_write+0x109/0x28c [] ? current_fs_time+0x22/0x29 [] __generic_file_aio_write+0x350/0x385 [] ? generic_file_aio_write+0x4a/0xae [] generic_file_aio_write+0x60/0xae [] do_sync_write+0xe3/0x120 [] ? autoremove_wake_function+0x0/0x34 [] ? __dentry_open+0x1a5/0x2b8 [] ? dentry_open+0x82/0x89 [] cachefiles_write_page+0x298/0x335 [cachefiles] [] fscache_write_op+0x178/0x2c2 [fscache] [] fscache_op_execute+0x7a/0xd1 [fscache] [] slow_work_execute+0x18f/0x2d1 [] slow_work_thread+0x1c5/0x308 [] ? autoremove_wake_function+0x0/0x34 [] ? slow_work_thread+0x0/0x308 [] kthread+0x7a/0x82 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? tg_shares_up+0x171/0x227 [] ? kthread+0x0/0x82 [] ? child_rip+0x0/0x20 In the above backtrace, the following is happening: (1) A page storage operation is being executed by a slow-work thread (fscache_write_op()). (2) FS-Cache farms the operation out to the cache to perform (cachefiles_write_page()). (3) CacheFiles is then calling Ext3 to perform the actual write, using Ext3's standard write (do_sync_write()) under KERNEL_DS directly from the netfs page. (4) However, for Ext3 to perform the write, it must allocate some memory, in particular, it must allocate at least one page cache page into which it can copy the data from the netfs page. (5) Under OOM conditions, the memory allocator can't immediately come up with a page, so it uses vmscan to find something to discard (try_to_free_pages()). (6) vmscan finds a clean netfs page it might be able to discard (possibly the one it's trying to write out). (7) The netfs is called to throw the page away (nfs_release_page()) - but it's called with __GFP_WAIT, so the netfs decides to wait for the store to complete (__fscache_wait_on_page_write()). (8) This blocks a slow-work processing thread - possibly against itself. The system ends up stuck because it can't write out any netfs pages to the cache without allocating more memory. To avoid this, we make FS-Cache cancel some writes that aren't in the middle of actually being performed. This means that some data won't make it into the cache this time. To support this, a new FS-Cache function is added fscache_maybe_release_page() that replaces what the netfs releasepage() functions used to do with respect to the cache. The decisions fscache_maybe_release_page() makes are counted and displayed through /proc/fs/fscache/stats on a line labelled "VmScan". There are four counters provided: "nos=N" - pages that weren't pending storage; "gon=N" - pages that were pending storage when we first looked, but weren't by the time we got the object lock; "bsy=N" - pages that we ignored as they were actively being written when we looked; and "can=N" - pages that we cancelled the storage of. What I'd really like to do is alter the behaviour of the cancellation heuristics, depending on how necessary it is to expel pages. If there are plenty of other pages that aren't waiting to be written to the cache that could be ejected first, then it would be nice to hold up on immediate cancellation of cache writes - but I don't see a way of doing that. Signed-off-by: David Howells --- fs/nfs/fscache.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 70fad69eb959..fa588006588d 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -359,17 +359,13 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp) BUG_ON(!cookie); - if (fscache_check_page_write(cookie, page)) { - if (!(gfp & __GFP_WAIT)) - return 0; - fscache_wait_on_page_write(cookie, page); - } - if (PageFsCache(page)) { dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", cookie, page, nfsi); - fscache_uncache_page(cookie, page); + if (!fscache_maybe_release_page(cookie, page, gfp)) + return 0; + nfs_add_fscache_stats(page->mapping->host, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); } -- cgit v1.2.2 From b17621fed6aa039387e35f9b4d34d98f213e5673 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 3 Dec 2009 13:54:25 +0100 Subject: writeback: introduce wbc.for_background It will lower the flush priority for NFS, and maybe more in future. Signed-off-by: Wu Fengguang Cc: Trond Myklebust Cc: Jens Axboe Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 53eb26c16b50..c84b5cc1a943 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -178,7 +178,7 @@ static int wb_priority(struct writeback_control *wbc) { if (wbc->for_reclaim) return FLUSH_HIGHPRI | FLUSH_STABLE; - if (wbc->for_kupdate) + if (wbc->for_kupdate || wbc->for_background) return FLUSH_LOWPRI; return 0; } -- cgit v1.2.2