aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2013-05-21 08:44:15 -0400
committerDavid Howells <dhowells@redhat.com>2013-06-19 09:16:47 -0400
commit1bb4b7f98f361132ea322834515334d95b93c184 (patch)
treeebe77590c4d49beaf7c1504475c53d625b03d882
parent2144bc78d41fe31ba58ffdd48571a54d3ca6b5fe (diff)
FS-Cache: The retrieval remaining-pages counter needs to be atomic_t
struct fscache_retrieval contains a count of the number of pages that still need some processing (n_pages). This is decremented as the pages are processed. However, this needs to be atomic as fscache_retrieval_complete() (I think) just occasionally may be called from cachefiles_read_backing_file() and cachefiles_read_copier() simultaneously. This happens when an fscache_read_or_alloc_pages() request containing a lot of pages (say a couple of hundred) is being processed. The read on each backing page is dispatched individually because we need to insert a monitor into the waitqueue to catch when the read completes. However, under low-memory conditions, we might be forced to wait in the allocator - and this gives the I/O on the backing page a chance to complete first. When the I/O completes, fscache_enqueue_retrieval() chucks the retrieval onto the workqueue without waiting for the operation to finish the initial I/O dispatch (we want to release any pages we can as soon as we can), thus both can end up running simultaneously and potentially attempting to partially complete the retrieval simultaneously (ENOMEM may occur, backing pages may already be in the page cache). This was demonstrated by parallelling the non-atomic counter with an atomic counter and printing both of them when the assertion fails. At this point, the atomic counter has reached zero, but the non-atomic counter has not. To fix this, make the counter an atomic_t. This results in the following bug appearing FS-Cache: Assertion failed 3 == 5 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/operation.c:421! or FS-Cache: Assertion failed 3 == 5 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/operation.c:414! With a backtrace like the following: RIP: 0010:[<ffffffffa0211b1d>] fscache_put_operation+0x1ad/0x240 [fscache] Call Trace: [<ffffffffa0213185>] fscache_retrieval_work+0x55/0x270 [fscache] [<ffffffffa0213130>] ? fscache_retrieval_work+0x0/0x270 [fscache] [<ffffffff81090b10>] worker_thread+0x170/0x2a0 [<ffffffff81096d10>] ? autoremove_wake_function+0x0/0x40 [<ffffffff810909a0>] ? worker_thread+0x0/0x2a0 [<ffffffff81096966>] kthread+0x96/0xa0 [<ffffffff8100c0ca>] child_rip+0xa/0x20 [<ffffffff810968d0>] ? kthread+0x0/0xa0 [<ffffffff8100c0c0>] ? child_rip+0x0/0x20 Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-and-tested-By: Milosz Tanski <milosz@adfin.com> Acked-by: Jeff Layton <jlayton@redhat.com>
-rw-r--r--fs/fscache/page.c10
-rw-r--r--include/linux/fscache-cache.h7
2 files changed, 8 insertions, 9 deletions
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 780bac6ffde5..d479ab3c63e4 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -235,7 +235,7 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op)
235 235
236 _enter("{OP%x}", op->op.debug_id); 236 _enter("{OP%x}", op->op.debug_id);
237 237
238 ASSERTCMP(op->n_pages, ==, 0); 238 ASSERTCMP(atomic_read(&op->n_pages), ==, 0);
239 239
240 fscache_hist(fscache_retrieval_histogram, op->start_time); 240 fscache_hist(fscache_retrieval_histogram, op->start_time);
241 if (op->context) 241 if (op->context)
@@ -316,7 +316,7 @@ static void fscache_do_cancel_retrieval(struct fscache_operation *_op)
316 struct fscache_retrieval *op = 316 struct fscache_retrieval *op =
317 container_of(_op, struct fscache_retrieval, op); 317 container_of(_op, struct fscache_retrieval, op);
318 318
319 op->n_pages = 0; 319 atomic_set(&op->n_pages, 0);
320} 320}
321 321
322/* 322/*
@@ -406,7 +406,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
406 _leave(" = -ENOMEM"); 406 _leave(" = -ENOMEM");
407 return -ENOMEM; 407 return -ENOMEM;
408 } 408 }
409 op->n_pages = 1; 409 atomic_set(&op->n_pages, 1);
410 410
411 spin_lock(&cookie->lock); 411 spin_lock(&cookie->lock);
412 412
@@ -533,7 +533,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
533 op = fscache_alloc_retrieval(cookie, mapping, end_io_func, context); 533 op = fscache_alloc_retrieval(cookie, mapping, end_io_func, context);
534 if (!op) 534 if (!op)
535 return -ENOMEM; 535 return -ENOMEM;
536 op->n_pages = *nr_pages; 536 atomic_set(&op->n_pages, *nr_pages);
537 537
538 spin_lock(&cookie->lock); 538 spin_lock(&cookie->lock);
539 539
@@ -643,7 +643,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
643 op = fscache_alloc_retrieval(cookie, page->mapping, NULL, NULL); 643 op = fscache_alloc_retrieval(cookie, page->mapping, NULL, NULL);
644 if (!op) 644 if (!op)
645 return -ENOMEM; 645 return -ENOMEM;
646 op->n_pages = 1; 646 atomic_set(&op->n_pages, 1);
647 647
648 spin_lock(&cookie->lock); 648 spin_lock(&cookie->lock);
649 649
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index d32f70611a00..a9ff9a36b86d 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -151,7 +151,7 @@ struct fscache_retrieval {
151 void *context; /* netfs read context (pinned) */ 151 void *context; /* netfs read context (pinned) */
152 struct list_head to_do; /* list of things to be done by the backend */ 152 struct list_head to_do; /* list of things to be done by the backend */
153 unsigned long start_time; /* time at which retrieval started */ 153 unsigned long start_time; /* time at which retrieval started */
154 unsigned n_pages; /* number of pages to be retrieved */ 154 atomic_t n_pages; /* number of pages to be retrieved */
155}; 155};
156 156
157typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op, 157typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op,
@@ -195,15 +195,14 @@ static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op)
195static inline void fscache_retrieval_complete(struct fscache_retrieval *op, 195static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
196 int n_pages) 196 int n_pages)
197{ 197{
198 op->n_pages -= n_pages; 198 atomic_sub(n_pages, &op->n_pages);
199 if (op->n_pages <= 0) 199 if (atomic_read(&op->n_pages) <= 0)
200 fscache_op_complete(&op->op, true); 200 fscache_op_complete(&op->op, true);
201} 201}
202 202
203/** 203/**
204 * fscache_put_retrieval - Drop a reference to a retrieval operation 204 * fscache_put_retrieval - Drop a reference to a retrieval operation
205 * @op: The retrieval operation affected 205 * @op: The retrieval operation affected
206 * @n_pages: The number of pages to account for
207 * 206 *
208 * Drop a reference to a retrieval operation. 207 * Drop a reference to a retrieval operation.
209 */ 208 */