diff options
author | Weston Andros Adamson <dros@primarydata.com> | 2014-07-11 10:20:48 -0400 |
---|---|---|
committer | Trond Myklebust <trond.myklebust@primarydata.com> | 2014-07-12 17:35:46 -0400 |
commit | d458138353726ea6dcbc53ae3597e489d0432c25 (patch) | |
tree | 178e8cb0040307bda0dbd556ad1137b89ca9a10f /fs/nfs | |
parent | 84d3a9a913ba6a90c79b7763d063bb42554a8906 (diff) |
nfs: handle multiple reqs in nfs_page_async_flush
Change nfs_find_and_lock_request so nfs_page_async_flush can handle multiple
requests in a page. There is only one request for a page the first time
nfs_page_async_flush is called, but if a write or commit fails, async_flush
is called again and there may be multiple requests associated with the page.
The solution is to merge all the requests in a page group into a single
request before calling nfs_pageio_add_request.
Rename nfs_find_and_lock_request to nfs_lock_and_join_requests and
change it to first lock all requests for the page, then cancel and merge
all subrequests into the head request.
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/internal.h | 1 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 4 | ||||
-rw-r--r-- | fs/nfs/write.c | 255 |
3 files changed, 235 insertions, 25 deletions
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 82ddbf46660e..f415cbf9f6c3 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -244,6 +244,7 @@ void nfs_pgio_data_release(struct nfs_pgio_data *); | |||
244 | int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); | 244 | int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); |
245 | int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, | 245 | int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, |
246 | const struct rpc_call_ops *, int, int); | 246 | const struct rpc_call_ops *, int, int); |
247 | void nfs_free_request(struct nfs_page *req); | ||
247 | 248 | ||
248 | static inline void nfs_iocounter_init(struct nfs_io_counter *c) | 249 | static inline void nfs_iocounter_init(struct nfs_io_counter *c) |
249 | { | 250 | { |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 05a63593a61f..0aefc8102b6b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -29,8 +29,6 @@ | |||
29 | static struct kmem_cache *nfs_page_cachep; | 29 | static struct kmem_cache *nfs_page_cachep; |
30 | static const struct rpc_call_ops nfs_pgio_common_ops; | 30 | static const struct rpc_call_ops nfs_pgio_common_ops; |
31 | 31 | ||
32 | static void nfs_free_request(struct nfs_page *); | ||
33 | |||
34 | static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) | 32 | static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) |
35 | { | 33 | { |
36 | p->npages = pagecount; | 34 | p->npages = pagecount; |
@@ -406,7 +404,7 @@ static void nfs_clear_request(struct nfs_page *req) | |||
406 | * | 404 | * |
407 | * Note: Should never be called with the spinlock held! | 405 | * Note: Should never be called with the spinlock held! |
408 | */ | 406 | */ |
409 | static void nfs_free_request(struct nfs_page *req) | 407 | void nfs_free_request(struct nfs_page *req) |
410 | { | 408 | { |
411 | WARN_ON_ONCE(req->wb_this_page != req); | 409 | WARN_ON_ONCE(req->wb_this_page != req); |
412 | 410 | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 53c4a9917dac..9f4424c464a0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_commit_ops; | |||
46 | static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; | 46 | static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; |
47 | static const struct nfs_commit_completion_ops nfs_commit_completion_ops; | 47 | static const struct nfs_commit_completion_ops nfs_commit_completion_ops; |
48 | static const struct nfs_rw_ops nfs_rw_write_ops; | 48 | static const struct nfs_rw_ops nfs_rw_write_ops; |
49 | static void nfs_clear_request_commit(struct nfs_page *req); | ||
49 | 50 | ||
50 | static struct kmem_cache *nfs_wdata_cachep; | 51 | static struct kmem_cache *nfs_wdata_cachep; |
51 | static mempool_t *nfs_wdata_mempool; | 52 | static mempool_t *nfs_wdata_mempool; |
@@ -289,36 +290,246 @@ static void nfs_end_page_writeback(struct nfs_page *req) | |||
289 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); | 290 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); |
290 | } | 291 | } |
291 | 292 | ||
292 | static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) | 293 | |
294 | /* nfs_page_group_clear_bits | ||
295 | * @req - an nfs request | ||
296 | * clears all page group related bits from @req | ||
297 | */ | ||
298 | static void | ||
299 | nfs_page_group_clear_bits(struct nfs_page *req) | ||
300 | { | ||
301 | clear_bit(PG_TEARDOWN, &req->wb_flags); | ||
302 | clear_bit(PG_UNLOCKPAGE, &req->wb_flags); | ||
303 | clear_bit(PG_UPTODATE, &req->wb_flags); | ||
304 | clear_bit(PG_WB_END, &req->wb_flags); | ||
305 | clear_bit(PG_REMOVE, &req->wb_flags); | ||
306 | } | ||
307 | |||
308 | |||
309 | /* | ||
310 | * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req | ||
311 | * | ||
312 | * this is a helper function for nfs_lock_and_join_requests | ||
313 | * | ||
314 | * @inode - inode associated with request page group, must be holding inode lock | ||
315 | * @head - head request of page group, must be holding head lock | ||
316 | * @req - request that couldn't lock and needs to wait on the req bit lock | ||
317 | * @nonblock - if true, don't actually wait | ||
318 | * | ||
319 | * NOTE: this must be called holding page_group bit lock and inode spin lock | ||
320 | * and BOTH will be released before returning. | ||
321 | * | ||
322 | * returns 0 on success, < 0 on error. | ||
323 | */ | ||
324 | static int | ||
325 | nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head, | ||
326 | struct nfs_page *req, bool nonblock) | ||
327 | __releases(&inode->i_lock) | ||
328 | { | ||
329 | struct nfs_page *tmp; | ||
330 | int ret; | ||
331 | |||
332 | /* relinquish all the locks successfully grabbed this run */ | ||
333 | for (tmp = head ; tmp != req; tmp = tmp->wb_this_page) | ||
334 | nfs_unlock_request(tmp); | ||
335 | |||
336 | WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); | ||
337 | |||
338 | /* grab a ref on the request that will be waited on */ | ||
339 | kref_get(&req->wb_kref); | ||
340 | |||
341 | nfs_page_group_unlock(head); | ||
342 | spin_unlock(&inode->i_lock); | ||
343 | |||
344 | /* release ref from nfs_page_find_head_request_locked */ | ||
345 | nfs_release_request(head); | ||
346 | |||
347 | if (!nonblock) | ||
348 | ret = nfs_wait_on_request(req); | ||
349 | else | ||
350 | ret = -EAGAIN; | ||
351 | nfs_release_request(req); | ||
352 | |||
353 | return ret; | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests | ||
358 | * | ||
359 | * @destroy_list - request list (using wb_this_page) terminated by @old_head | ||
360 | * @old_head - the old head of the list | ||
361 | * | ||
362 | * All subrequests must be locked and removed from all lists, so at this point | ||
363 | * they are only "active" in this function, and possibly in nfs_wait_on_request | ||
364 | * with a reference held by some other context. | ||
365 | */ | ||
366 | static void | ||
367 | nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, | ||
368 | struct nfs_page *old_head) | ||
369 | { | ||
370 | while (destroy_list) { | ||
371 | struct nfs_page *subreq = destroy_list; | ||
372 | |||
373 | destroy_list = (subreq->wb_this_page == old_head) ? | ||
374 | NULL : subreq->wb_this_page; | ||
375 | |||
376 | WARN_ON_ONCE(old_head != subreq->wb_head); | ||
377 | |||
378 | /* make sure old group is not used */ | ||
379 | subreq->wb_head = subreq; | ||
380 | subreq->wb_this_page = subreq; | ||
381 | |||
382 | nfs_clear_request_commit(subreq); | ||
383 | |||
384 | /* subreq is now totally disconnected from page group or any | ||
385 | * write / commit lists. last chance to wake any waiters */ | ||
386 | nfs_unlock_request(subreq); | ||
387 | |||
388 | if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) { | ||
389 | /* release ref on old head request */ | ||
390 | nfs_release_request(old_head); | ||
391 | |||
392 | nfs_page_group_clear_bits(subreq); | ||
393 | |||
394 | /* release the PG_INODE_REF reference */ | ||
395 | if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) | ||
396 | nfs_release_request(subreq); | ||
397 | else | ||
398 | WARN_ON_ONCE(1); | ||
399 | } else { | ||
400 | WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags)); | ||
401 | /* zombie requests have already released the last | ||
402 | * reference and were waiting on the rest of the | ||
403 | * group to complete. Since it's no longer part of a | ||
404 | * group, simply free the request */ | ||
405 | nfs_page_group_clear_bits(subreq); | ||
406 | nfs_free_request(subreq); | ||
407 | } | ||
408 | } | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * nfs_lock_and_join_requests - join all subreqs to the head req and return | ||
413 | * a locked reference, cancelling any pending | ||
414 | * operations for this page. | ||
415 | * | ||
416 | * @page - the page used to lookup the "page group" of nfs_page structures | ||
417 | * @nonblock - if true, don't block waiting for request locks | ||
418 | * | ||
419 | * This function joins all sub requests to the head request by first | ||
420 | * locking all requests in the group, cancelling any pending operations | ||
421 | * and finally updating the head request to cover the whole range covered by | ||
422 | * the (former) group. All subrequests are removed from any write or commit | ||
423 | * lists, unlinked from the group and destroyed. | ||
424 | * | ||
425 | * Returns a locked, referenced pointer to the head request - which after | ||
426 | * this call is guaranteed to be the only request associated with the page. | ||
427 | * Returns NULL if no requests are found for @page, or a ERR_PTR if an | ||
428 | * error was encountered. | ||
429 | */ | ||
430 | static struct nfs_page * | ||
431 | nfs_lock_and_join_requests(struct page *page, bool nonblock) | ||
293 | { | 432 | { |
294 | struct inode *inode = page_file_mapping(page)->host; | 433 | struct inode *inode = page_file_mapping(page)->host; |
295 | struct nfs_page *req; | 434 | struct nfs_page *head, *subreq; |
435 | struct nfs_page *destroy_list = NULL; | ||
436 | unsigned int total_bytes; | ||
296 | int ret; | 437 | int ret; |
297 | 438 | ||
439 | try_again: | ||
440 | total_bytes = 0; | ||
441 | |||
442 | WARN_ON_ONCE(destroy_list); | ||
443 | |||
298 | spin_lock(&inode->i_lock); | 444 | spin_lock(&inode->i_lock); |
299 | for (;;) { | 445 | |
300 | req = nfs_page_find_head_request_locked(NFS_I(inode), page); | 446 | /* |
301 | if (req == NULL) | 447 | * A reference is taken only on the head request which acts as a |
302 | break; | 448 | * reference to the whole page group - the group will not be destroyed |
303 | if (nfs_lock_request(req)) | 449 | * until the head reference is released. |
304 | break; | 450 | */ |
305 | /* Note: If we hold the page lock, as is the case in nfs_writepage, | 451 | head = nfs_page_find_head_request_locked(NFS_I(inode), page); |
306 | * then the call to nfs_lock_request() will always | 452 | |
307 | * succeed provided that someone hasn't already marked the | 453 | if (!head) { |
308 | * request as dirty (in which case we don't care). | ||
309 | */ | ||
310 | spin_unlock(&inode->i_lock); | 454 | spin_unlock(&inode->i_lock); |
311 | if (!nonblock) | 455 | return NULL; |
312 | ret = nfs_wait_on_request(req); | 456 | } |
313 | else | 457 | |
314 | ret = -EAGAIN; | 458 | /* lock each request in the page group */ |
315 | nfs_release_request(req); | 459 | nfs_page_group_lock(head); |
316 | if (ret != 0) | 460 | subreq = head; |
461 | do { | ||
462 | /* | ||
463 | * Subrequests are always contiguous, non overlapping | ||
464 | * and in order. If not, it's a programming error. | ||
465 | */ | ||
466 | WARN_ON_ONCE(subreq->wb_offset != | ||
467 | (head->wb_offset + total_bytes)); | ||
468 | |||
469 | /* keep track of how many bytes this group covers */ | ||
470 | total_bytes += subreq->wb_bytes; | ||
471 | |||
472 | if (!nfs_lock_request(subreq)) { | ||
473 | /* releases page group bit lock and | ||
474 | * inode spin lock and all references */ | ||
475 | ret = nfs_unroll_locks_and_wait(inode, head, | ||
476 | subreq, nonblock); | ||
477 | |||
478 | if (ret == 0) | ||
479 | goto try_again; | ||
480 | |||
317 | return ERR_PTR(ret); | 481 | return ERR_PTR(ret); |
318 | spin_lock(&inode->i_lock); | 482 | } |
483 | |||
484 | subreq = subreq->wb_this_page; | ||
485 | } while (subreq != head); | ||
486 | |||
487 | /* Now that all requests are locked, make sure they aren't on any list. | ||
488 | * Commit list removal accounting is done after locks are dropped */ | ||
489 | subreq = head; | ||
490 | do { | ||
491 | nfs_list_remove_request(subreq); | ||
492 | subreq = subreq->wb_this_page; | ||
493 | } while (subreq != head); | ||
494 | |||
495 | /* unlink subrequests from head, destroy them later */ | ||
496 | if (head->wb_this_page != head) { | ||
497 | /* destroy list will be terminated by head */ | ||
498 | destroy_list = head->wb_this_page; | ||
499 | head->wb_this_page = head; | ||
500 | |||
501 | /* change head request to cover whole range that | ||
502 | * the former page group covered */ | ||
503 | head->wb_bytes = total_bytes; | ||
319 | } | 504 | } |
505 | |||
506 | /* | ||
507 | * prepare head request to be added to new pgio descriptor | ||
508 | */ | ||
509 | nfs_page_group_clear_bits(head); | ||
510 | |||
511 | /* | ||
512 | * some part of the group was still on the inode list - otherwise | ||
513 | * the group wouldn't be involved in async write. | ||
514 | * grab a reference for the head request, iff it needs one. | ||
515 | */ | ||
516 | if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags)) | ||
517 | kref_get(&head->wb_kref); | ||
518 | |||
519 | nfs_page_group_unlock(head); | ||
520 | |||
521 | /* drop lock to clear_request_commit the head req and clean up | ||
522 | * requests on destroy list */ | ||
320 | spin_unlock(&inode->i_lock); | 523 | spin_unlock(&inode->i_lock); |
321 | return req; | 524 | |
525 | nfs_destroy_unlinked_subrequests(destroy_list, head); | ||
526 | |||
527 | /* clean up commit list state */ | ||
528 | nfs_clear_request_commit(head); | ||
529 | |||
530 | /* still holds ref on head from nfs_page_find_head_request_locked | ||
531 | * and still has lock on head from lock loop */ | ||
532 | return head; | ||
322 | } | 533 | } |
323 | 534 | ||
324 | /* | 535 | /* |
@@ -331,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | |||
331 | struct nfs_page *req; | 542 | struct nfs_page *req; |
332 | int ret = 0; | 543 | int ret = 0; |
333 | 544 | ||
334 | req = nfs_find_and_lock_request(page, nonblock); | 545 | req = nfs_lock_and_join_requests(page, nonblock); |
335 | if (!req) | 546 | if (!req) |
336 | goto out; | 547 | goto out; |
337 | ret = PTR_ERR(req); | 548 | ret = PTR_ERR(req); |