aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorWeston Andros Adamson <dros@primarydata.com>2014-07-11 10:20:48 -0400
committerTrond Myklebust <trond.myklebust@primarydata.com>2014-07-12 17:35:46 -0400
commitd458138353726ea6dcbc53ae3597e489d0432c25 (patch)
tree178e8cb0040307bda0dbd556ad1137b89ca9a10f /fs/nfs
parent84d3a9a913ba6a90c79b7763d063bb42554a8906 (diff)
nfs: handle multiple reqs in nfs_page_async_flush
Change nfs_find_and_lock_request so nfs_page_async_flush can handle multiple requests in a page. There is only one request for a page the first time nfs_page_async_flush is called, but if a write or commit fails, async_flush is called again and there may be multiple requests associated with the page. The solution is to merge all the requests in a page group into a single request before calling nfs_pageio_add_request. Rename nfs_find_and_lock_request to nfs_lock_and_join_requests and change it to first lock all requests for the page, then cancel and merge all subrequests into the head request. Signed-off-by: Weston Andros Adamson <dros@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/pagelist.c4
-rw-r--r--fs/nfs/write.c255
3 files changed, 235 insertions, 25 deletions
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 82ddbf46660e..f415cbf9f6c3 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -244,6 +244,7 @@ void nfs_pgio_data_release(struct nfs_pgio_data *);
244int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); 244int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
245int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, 245int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
246 const struct rpc_call_ops *, int, int); 246 const struct rpc_call_ops *, int, int);
247void nfs_free_request(struct nfs_page *req);
247 248
248static inline void nfs_iocounter_init(struct nfs_io_counter *c) 249static inline void nfs_iocounter_init(struct nfs_io_counter *c)
249{ 250{
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 05a63593a61f..0aefc8102b6b 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,8 +29,6 @@
29static struct kmem_cache *nfs_page_cachep; 29static struct kmem_cache *nfs_page_cachep;
30static const struct rpc_call_ops nfs_pgio_common_ops; 30static const struct rpc_call_ops nfs_pgio_common_ops;
31 31
32static void nfs_free_request(struct nfs_page *);
33
34static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) 32static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
35{ 33{
36 p->npages = pagecount; 34 p->npages = pagecount;
@@ -406,7 +404,7 @@ static void nfs_clear_request(struct nfs_page *req)
406 * 404 *
407 * Note: Should never be called with the spinlock held! 405 * Note: Should never be called with the spinlock held!
408 */ 406 */
409static void nfs_free_request(struct nfs_page *req) 407void nfs_free_request(struct nfs_page *req)
410{ 408{
411 WARN_ON_ONCE(req->wb_this_page != req); 409 WARN_ON_ONCE(req->wb_this_page != req);
412 410
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53c4a9917dac..9f4424c464a0 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_commit_ops;
46static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; 46static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
47static const struct nfs_commit_completion_ops nfs_commit_completion_ops; 47static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
48static const struct nfs_rw_ops nfs_rw_write_ops; 48static const struct nfs_rw_ops nfs_rw_write_ops;
49static void nfs_clear_request_commit(struct nfs_page *req);
49 50
50static struct kmem_cache *nfs_wdata_cachep; 51static struct kmem_cache *nfs_wdata_cachep;
51static mempool_t *nfs_wdata_mempool; 52static mempool_t *nfs_wdata_mempool;
@@ -289,36 +290,246 @@ static void nfs_end_page_writeback(struct nfs_page *req)
289 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 290 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
290} 291}
291 292
292static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) 293
294/* nfs_page_group_clear_bits
295 * @req - an nfs request
296 * clears all page group related bits from @req
297 */
298static void
299nfs_page_group_clear_bits(struct nfs_page *req)
300{
301 clear_bit(PG_TEARDOWN, &req->wb_flags);
302 clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
303 clear_bit(PG_UPTODATE, &req->wb_flags);
304 clear_bit(PG_WB_END, &req->wb_flags);
305 clear_bit(PG_REMOVE, &req->wb_flags);
306}
307
308
309/*
310 * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req
311 *
312 * this is a helper function for nfs_lock_and_join_requests
313 *
314 * @inode - inode associated with request page group, must be holding inode lock
315 * @head - head request of page group, must be holding head lock
316 * @req - request that couldn't lock and needs to wait on the req bit lock
317 * @nonblock - if true, don't actually wait
318 *
319 * NOTE: this must be called holding page_group bit lock and inode spin lock
320 * and BOTH will be released before returning.
321 *
322 * returns 0 on success, < 0 on error.
323 */
324static int
325nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
326 struct nfs_page *req, bool nonblock)
327 __releases(&inode->i_lock)
328{
329 struct nfs_page *tmp;
330 int ret;
331
332 /* relinquish all the locks successfully grabbed this run */
333 for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
334 nfs_unlock_request(tmp);
335
336 WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
337
338 /* grab a ref on the request that will be waited on */
339 kref_get(&req->wb_kref);
340
341 nfs_page_group_unlock(head);
342 spin_unlock(&inode->i_lock);
343
344 /* release ref from nfs_page_find_head_request_locked */
345 nfs_release_request(head);
346
347 if (!nonblock)
348 ret = nfs_wait_on_request(req);
349 else
350 ret = -EAGAIN;
351 nfs_release_request(req);
352
353 return ret;
354}
355
356/*
357 * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
358 *
359 * @destroy_list - request list (using wb_this_page) terminated by @old_head
360 * @old_head - the old head of the list
361 *
362 * All subrequests must be locked and removed from all lists, so at this point
363 * they are only "active" in this function, and possibly in nfs_wait_on_request
364 * with a reference held by some other context.
365 */
366static void
367nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
368 struct nfs_page *old_head)
369{
370 while (destroy_list) {
371 struct nfs_page *subreq = destroy_list;
372
373 destroy_list = (subreq->wb_this_page == old_head) ?
374 NULL : subreq->wb_this_page;
375
376 WARN_ON_ONCE(old_head != subreq->wb_head);
377
378 /* make sure old group is not used */
379 subreq->wb_head = subreq;
380 subreq->wb_this_page = subreq;
381
382 nfs_clear_request_commit(subreq);
383
384 /* subreq is now totally disconnected from page group or any
385 * write / commit lists. last chance to wake any waiters */
386 nfs_unlock_request(subreq);
387
388 if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
389 /* release ref on old head request */
390 nfs_release_request(old_head);
391
392 nfs_page_group_clear_bits(subreq);
393
394 /* release the PG_INODE_REF reference */
395 if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
396 nfs_release_request(subreq);
397 else
398 WARN_ON_ONCE(1);
399 } else {
400 WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
401 /* zombie requests have already released the last
402 * reference and were waiting on the rest of the
403 * group to complete. Since it's no longer part of a
404 * group, simply free the request */
405 nfs_page_group_clear_bits(subreq);
406 nfs_free_request(subreq);
407 }
408 }
409}
410
411/*
412 * nfs_lock_and_join_requests - join all subreqs to the head req and return
413 * a locked reference, cancelling any pending
414 * operations for this page.
415 *
416 * @page - the page used to lookup the "page group" of nfs_page structures
417 * @nonblock - if true, don't block waiting for request locks
418 *
419 * This function joins all sub requests to the head request by first
420 * locking all requests in the group, cancelling any pending operations
421 * and finally updating the head request to cover the whole range covered by
422 * the (former) group. All subrequests are removed from any write or commit
423 * lists, unlinked from the group and destroyed.
424 *
425 * Returns a locked, referenced pointer to the head request - which after
426 * this call is guaranteed to be the only request associated with the page.
427 * Returns NULL if no requests are found for @page, or a ERR_PTR if an
428 * error was encountered.
429 */
430static struct nfs_page *
431nfs_lock_and_join_requests(struct page *page, bool nonblock)
293{ 432{
294 struct inode *inode = page_file_mapping(page)->host; 433 struct inode *inode = page_file_mapping(page)->host;
295 struct nfs_page *req; 434 struct nfs_page *head, *subreq;
435 struct nfs_page *destroy_list = NULL;
436 unsigned int total_bytes;
296 int ret; 437 int ret;
297 438
439try_again:
440 total_bytes = 0;
441
442 WARN_ON_ONCE(destroy_list);
443
298 spin_lock(&inode->i_lock); 444 spin_lock(&inode->i_lock);
299 for (;;) { 445
300 req = nfs_page_find_head_request_locked(NFS_I(inode), page); 446 /*
301 if (req == NULL) 447 * A reference is taken only on the head request which acts as a
302 break; 448 * reference to the whole page group - the group will not be destroyed
303 if (nfs_lock_request(req)) 449 * until the head reference is released.
304 break; 450 */
305 /* Note: If we hold the page lock, as is the case in nfs_writepage, 451 head = nfs_page_find_head_request_locked(NFS_I(inode), page);
306 * then the call to nfs_lock_request() will always 452
307 * succeed provided that someone hasn't already marked the 453 if (!head) {
308 * request as dirty (in which case we don't care).
309 */
310 spin_unlock(&inode->i_lock); 454 spin_unlock(&inode->i_lock);
311 if (!nonblock) 455 return NULL;
312 ret = nfs_wait_on_request(req); 456 }
313 else 457
314 ret = -EAGAIN; 458 /* lock each request in the page group */
315 nfs_release_request(req); 459 nfs_page_group_lock(head);
316 if (ret != 0) 460 subreq = head;
461 do {
462 /*
463 * Subrequests are always contiguous, non overlapping
464 * and in order. If not, it's a programming error.
465 */
466 WARN_ON_ONCE(subreq->wb_offset !=
467 (head->wb_offset + total_bytes));
468
469 /* keep track of how many bytes this group covers */
470 total_bytes += subreq->wb_bytes;
471
472 if (!nfs_lock_request(subreq)) {
473 /* releases page group bit lock and
474 * inode spin lock and all references */
475 ret = nfs_unroll_locks_and_wait(inode, head,
476 subreq, nonblock);
477
478 if (ret == 0)
479 goto try_again;
480
317 return ERR_PTR(ret); 481 return ERR_PTR(ret);
318 spin_lock(&inode->i_lock); 482 }
483
484 subreq = subreq->wb_this_page;
485 } while (subreq != head);
486
487 /* Now that all requests are locked, make sure they aren't on any list.
488 * Commit list removal accounting is done after locks are dropped */
489 subreq = head;
490 do {
491 nfs_list_remove_request(subreq);
492 subreq = subreq->wb_this_page;
493 } while (subreq != head);
494
495 /* unlink subrequests from head, destroy them later */
496 if (head->wb_this_page != head) {
497 /* destroy list will be terminated by head */
498 destroy_list = head->wb_this_page;
499 head->wb_this_page = head;
500
501 /* change head request to cover whole range that
502 * the former page group covered */
503 head->wb_bytes = total_bytes;
319 } 504 }
505
506 /*
507 * prepare head request to be added to new pgio descriptor
508 */
509 nfs_page_group_clear_bits(head);
510
511 /*
512 * some part of the group was still on the inode list - otherwise
513 * the group wouldn't be involved in async write.
514 * grab a reference for the head request, iff it needs one.
515 */
516 if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
517 kref_get(&head->wb_kref);
518
519 nfs_page_group_unlock(head);
520
521 /* drop lock to clear_request_commit the head req and clean up
522 * requests on destroy list */
320 spin_unlock(&inode->i_lock); 523 spin_unlock(&inode->i_lock);
321 return req; 524
525 nfs_destroy_unlinked_subrequests(destroy_list, head);
526
527 /* clean up commit list state */
528 nfs_clear_request_commit(head);
529
530 /* still holds ref on head from nfs_page_find_head_request_locked
531 * and still has lock on head from lock loop */
532 return head;
322} 533}
323 534
324/* 535/*
@@ -331,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
331 struct nfs_page *req; 542 struct nfs_page *req;
332 int ret = 0; 543 int ret = 0;
333 544
334 req = nfs_find_and_lock_request(page, nonblock); 545 req = nfs_lock_and_join_requests(page, nonblock);
335 if (!req) 546 if (!req)
336 goto out; 547 goto out;
337 ret = PTR_ERR(req); 548 ret = PTR_ERR(req);