diff options
author | David Howells <dhowells@redhat.com> | 2012-12-05 08:34:49 -0500 |
---|---|---|
committer | David Howells <dhowells@redhat.com> | 2012-12-20 17:12:03 -0500 |
commit | 8c209ce721444a61b61d9e772746c721e4d8d1e8 (patch) | |
tree | 3a38c101c4974d774f62069ac0502e65864a0ea6 /fs/fscache | |
parent | 8d76349d359064859217dc292dc8733e209705af (diff) |
NFS: nfs_migrate_page() does not wait for FS-Cache to finish with a page
nfs_migrate_page() does not wait for FS-Cache to finish with a page, probably
leading to the following bad-page-state:
BUG: Bad page state in process python-bin pfn:17d39b
page:ffffea00053649e8 flags:004000000000100c count:0 mapcount:0 mapping:(null)
index:38686 (Tainted: G B ---------------- )
Pid: 31053, comm: python-bin Tainted: G B ----------------
2.6.32-71.24.1.el6.x86_64 #1
Call Trace:
[<ffffffff8111bfe7>] bad_page+0x107/0x160
[<ffffffff8111ee69>] free_hot_cold_page+0x1c9/0x220
[<ffffffff8111ef19>] __pagevec_free+0x59/0xb0
[<ffffffff8104b988>] ? flush_tlb_others_ipi+0x128/0x130
[<ffffffff8112230c>] release_pages+0x21c/0x250
[<ffffffff8115b92a>] ? remove_migration_pte+0x28a/0x2b0
[<ffffffff8115f3f8>] ? mem_cgroup_get_reclaim_stat_from_page+0x18/0x70
[<ffffffff81122687>] ____pagevec_lru_add+0x167/0x180
[<ffffffff811226f8>] __lru_cache_add+0x58/0x70
[<ffffffff81122731>] lru_cache_add_lru+0x21/0x40
[<ffffffff81123f49>] putback_lru_page+0x69/0x100
[<ffffffff8115c0bd>] migrate_pages+0x13d/0x5d0
[<ffffffff81122687>] ? ____pagevec_lru_add+0x167/0x180
[<ffffffff81152ab0>] ? compaction_alloc+0x0/0x370
[<ffffffff8115255c>] compact_zone+0x4cc/0x600
[<ffffffff8111cfac>] ? get_page_from_freelist+0x15c/0x820
[<ffffffff810672f4>] ? check_preempt_wakeup+0x1c4/0x3c0
[<ffffffff8115290e>] compact_zone_order+0x7e/0xb0
[<ffffffff81152a49>] try_to_compact_pages+0x109/0x170
[<ffffffff8111e94d>] __alloc_pages_nodemask+0x5ed/0x850
[<ffffffff814c9136>] ? thread_return+0x4e/0x778
[<ffffffff81150d43>] alloc_pages_vma+0x93/0x150
[<ffffffff81167ea5>] do_huge_pmd_anonymous_page+0x135/0x340
[<ffffffff814cb6f6>] ? rwsem_down_read_failed+0x26/0x30
[<ffffffff81136755>] handle_mm_fault+0x245/0x2b0
[<ffffffff814ce383>] do_page_fault+0x123/0x3a0
[<ffffffff814cbdf5>] page_fault+0x25/0x30
nfs_migrate_page() calls nfs_fscache_release_page() which doesn't actually wait
- even if __GFP_WAIT is set. The reason that doesn't wait is that
fscache_maybe_release_page() might deadlock the allocator as the work threads
writing to the cache may all end up sleeping on memory allocation.
However, I wonder if that is actually a problem. There are a number of things
I can do to deal with this:
(1) Make nfs_migrate_page() wait.
(2) Make fscache_maybe_release_page() honour the __GFP_WAIT flag.
(3) Set a timeout around the wait.
(4) Make nfs_migrate_page() return an error if the page is still busy.
For the moment, I'll select (2) and (4).
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Diffstat (limited to 'fs/fscache')
-rw-r--r-- | fs/fscache/internal.h | 1 | ||||
-rw-r--r-- | fs/fscache/page.c | 19 | ||||
-rw-r--r-- | fs/fscache/stats.c | 6 |
3 files changed, 19 insertions, 7 deletions
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index dcb3e1d5dbf6..88a48ccb7d9e 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h | |||
@@ -200,6 +200,7 @@ extern atomic_t fscache_n_store_vmscan_not_storing; | |||
200 | extern atomic_t fscache_n_store_vmscan_gone; | 200 | extern atomic_t fscache_n_store_vmscan_gone; |
201 | extern atomic_t fscache_n_store_vmscan_busy; | 201 | extern atomic_t fscache_n_store_vmscan_busy; |
202 | extern atomic_t fscache_n_store_vmscan_cancelled; | 202 | extern atomic_t fscache_n_store_vmscan_cancelled; |
203 | extern atomic_t fscache_n_store_vmscan_wait; | ||
203 | 204 | ||
204 | extern atomic_t fscache_n_marks; | 205 | extern atomic_t fscache_n_marks; |
205 | extern atomic_t fscache_n_uncaches; | 206 | extern atomic_t fscache_n_uncaches; |
diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 4dbbca162620..f9b2fb3ae492 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c | |||
@@ -56,6 +56,7 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie, | |||
56 | 56 | ||
57 | _enter("%p,%p,%x", cookie, page, gfp); | 57 | _enter("%p,%p,%x", cookie, page, gfp); |
58 | 58 | ||
59 | try_again: | ||
59 | rcu_read_lock(); | 60 | rcu_read_lock(); |
60 | val = radix_tree_lookup(&cookie->stores, page->index); | 61 | val = radix_tree_lookup(&cookie->stores, page->index); |
61 | if (!val) { | 62 | if (!val) { |
@@ -104,11 +105,19 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie, | |||
104 | return true; | 105 | return true; |
105 | 106 | ||
106 | page_busy: | 107 | page_busy: |
107 | /* we might want to wait here, but that could deadlock the allocator as | 108 | /* We will wait here if we're allowed to, but that could deadlock the |
108 | * the work threads writing to the cache may all end up sleeping | 109 | * allocator as the work threads writing to the cache may all end up |
109 | * on memory allocation */ | 110 | * sleeping on memory allocation, so we may need to impose a timeout |
110 | fscache_stat(&fscache_n_store_vmscan_busy); | 111 | * too. */ |
111 | return false; | 112 | if (!(gfp & __GFP_WAIT)) { |
113 | fscache_stat(&fscache_n_store_vmscan_busy); | ||
114 | return false; | ||
115 | } | ||
116 | |||
117 | fscache_stat(&fscache_n_store_vmscan_wait); | ||
118 | __fscache_wait_on_page_write(cookie, page); | ||
119 | gfp &= ~__GFP_WAIT; | ||
120 | goto try_again; | ||
112 | } | 121 | } |
113 | EXPORT_SYMBOL(__fscache_maybe_release_page); | 122 | EXPORT_SYMBOL(__fscache_maybe_release_page); |
114 | 123 | ||
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 51cdaee14109..8179e8bc4a3d 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c | |||
@@ -69,6 +69,7 @@ atomic_t fscache_n_store_vmscan_not_storing; | |||
69 | atomic_t fscache_n_store_vmscan_gone; | 69 | atomic_t fscache_n_store_vmscan_gone; |
70 | atomic_t fscache_n_store_vmscan_busy; | 70 | atomic_t fscache_n_store_vmscan_busy; |
71 | atomic_t fscache_n_store_vmscan_cancelled; | 71 | atomic_t fscache_n_store_vmscan_cancelled; |
72 | atomic_t fscache_n_store_vmscan_wait; | ||
72 | 73 | ||
73 | atomic_t fscache_n_marks; | 74 | atomic_t fscache_n_marks; |
74 | atomic_t fscache_n_uncaches; | 75 | atomic_t fscache_n_uncaches; |
@@ -232,11 +233,12 @@ static int fscache_stats_show(struct seq_file *m, void *v) | |||
232 | atomic_read(&fscache_n_store_radix_deletes), | 233 | atomic_read(&fscache_n_store_radix_deletes), |
233 | atomic_read(&fscache_n_store_pages_over_limit)); | 234 | atomic_read(&fscache_n_store_pages_over_limit)); |
234 | 235 | ||
235 | seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u\n", | 236 | seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u wt=%u\n", |
236 | atomic_read(&fscache_n_store_vmscan_not_storing), | 237 | atomic_read(&fscache_n_store_vmscan_not_storing), |
237 | atomic_read(&fscache_n_store_vmscan_gone), | 238 | atomic_read(&fscache_n_store_vmscan_gone), |
238 | atomic_read(&fscache_n_store_vmscan_busy), | 239 | atomic_read(&fscache_n_store_vmscan_busy), |
239 | atomic_read(&fscache_n_store_vmscan_cancelled)); | 240 | atomic_read(&fscache_n_store_vmscan_cancelled), |
241 | atomic_read(&fscache_n_store_vmscan_wait)); | ||
240 | 242 | ||
241 | seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u rej=%u\n", | 243 | seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u rej=%u\n", |
242 | atomic_read(&fscache_n_op_pend), | 244 | atomic_read(&fscache_n_op_pend), |