diff options
Diffstat (limited to 'fs/nfs/write.c')
-rw-r--r-- | fs/nfs/write.c | 335 |
1 files changed, 282 insertions, 53 deletions
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8534ee5c207d..d357728ed8ba 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_commit_ops; | |||
46 | static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; | 46 | static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; |
47 | static const struct nfs_commit_completion_ops nfs_commit_completion_ops; | 47 | static const struct nfs_commit_completion_ops nfs_commit_completion_ops; |
48 | static const struct nfs_rw_ops nfs_rw_write_ops; | 48 | static const struct nfs_rw_ops nfs_rw_write_ops; |
49 | static void nfs_clear_request_commit(struct nfs_page *req); | ||
49 | 50 | ||
50 | static struct kmem_cache *nfs_wdata_cachep; | 51 | static struct kmem_cache *nfs_wdata_cachep; |
51 | static mempool_t *nfs_wdata_mempool; | 52 | static mempool_t *nfs_wdata_mempool; |
@@ -91,8 +92,15 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) | |||
91 | set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | 92 | set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); |
92 | } | 93 | } |
93 | 94 | ||
95 | /* | ||
96 | * nfs_page_find_head_request_locked - find head request associated with @page | ||
97 | * | ||
98 | * must be called while holding the inode lock. | ||
99 | * | ||
100 | * returns matching head request with reference held, or NULL if not found. | ||
101 | */ | ||
94 | static struct nfs_page * | 102 | static struct nfs_page * |
95 | nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page) | 103 | nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page) |
96 | { | 104 | { |
97 | struct nfs_page *req = NULL; | 105 | struct nfs_page *req = NULL; |
98 | 106 | ||
@@ -104,25 +112,33 @@ nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page) | |||
104 | /* Linearly search the commit list for the correct req */ | 112 | /* Linearly search the commit list for the correct req */ |
105 | list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) { | 113 | list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) { |
106 | if (freq->wb_page == page) { | 114 | if (freq->wb_page == page) { |
107 | req = freq; | 115 | req = freq->wb_head; |
108 | break; | 116 | break; |
109 | } | 117 | } |
110 | } | 118 | } |
111 | } | 119 | } |
112 | 120 | ||
113 | if (req) | 121 | if (req) { |
122 | WARN_ON_ONCE(req->wb_head != req); | ||
123 | |||
114 | kref_get(&req->wb_kref); | 124 | kref_get(&req->wb_kref); |
125 | } | ||
115 | 126 | ||
116 | return req; | 127 | return req; |
117 | } | 128 | } |
118 | 129 | ||
119 | static struct nfs_page *nfs_page_find_request(struct page *page) | 130 | /* |
131 | * nfs_page_find_head_request - find head request associated with @page | ||
132 | * | ||
133 | * returns matching head request with reference held, or NULL if not found. | ||
134 | */ | ||
135 | static struct nfs_page *nfs_page_find_head_request(struct page *page) | ||
120 | { | 136 | { |
121 | struct inode *inode = page_file_mapping(page)->host; | 137 | struct inode *inode = page_file_mapping(page)->host; |
122 | struct nfs_page *req = NULL; | 138 | struct nfs_page *req = NULL; |
123 | 139 | ||
124 | spin_lock(&inode->i_lock); | 140 | spin_lock(&inode->i_lock); |
125 | req = nfs_page_find_request_locked(NFS_I(inode), page); | 141 | req = nfs_page_find_head_request_locked(NFS_I(inode), page); |
126 | spin_unlock(&inode->i_lock); | 142 | spin_unlock(&inode->i_lock); |
127 | return req; | 143 | return req; |
128 | } | 144 | } |
@@ -274,36 +290,246 @@ static void nfs_end_page_writeback(struct nfs_page *req) | |||
274 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); | 290 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); |
275 | } | 291 | } |
276 | 292 | ||
277 | static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) | 293 | |
294 | /* nfs_page_group_clear_bits | ||
295 | * @req - an nfs request | ||
296 | * clears all page group related bits from @req | ||
297 | */ | ||
298 | static void | ||
299 | nfs_page_group_clear_bits(struct nfs_page *req) | ||
300 | { | ||
301 | clear_bit(PG_TEARDOWN, &req->wb_flags); | ||
302 | clear_bit(PG_UNLOCKPAGE, &req->wb_flags); | ||
303 | clear_bit(PG_UPTODATE, &req->wb_flags); | ||
304 | clear_bit(PG_WB_END, &req->wb_flags); | ||
305 | clear_bit(PG_REMOVE, &req->wb_flags); | ||
306 | } | ||
307 | |||
308 | |||
309 | /* | ||
310 | * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req | ||
311 | * | ||
312 | * this is a helper function for nfs_lock_and_join_requests | ||
313 | * | ||
314 | * @inode - inode associated with request page group, must be holding inode lock | ||
315 | * @head - head request of page group, must be holding head lock | ||
316 | * @req - request that couldn't lock and needs to wait on the req bit lock | ||
317 | * @nonblock - if true, don't actually wait | ||
318 | * | ||
319 | * NOTE: this must be called holding page_group bit lock and inode spin lock | ||
320 | * and BOTH will be released before returning. | ||
321 | * | ||
322 | * returns 0 on success, < 0 on error. | ||
323 | */ | ||
324 | static int | ||
325 | nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head, | ||
326 | struct nfs_page *req, bool nonblock) | ||
327 | __releases(&inode->i_lock) | ||
328 | { | ||
329 | struct nfs_page *tmp; | ||
330 | int ret; | ||
331 | |||
332 | /* relinquish all the locks successfully grabbed this run */ | ||
333 | for (tmp = head ; tmp != req; tmp = tmp->wb_this_page) | ||
334 | nfs_unlock_request(tmp); | ||
335 | |||
336 | WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); | ||
337 | |||
338 | /* grab a ref on the request that will be waited on */ | ||
339 | kref_get(&req->wb_kref); | ||
340 | |||
341 | nfs_page_group_unlock(head); | ||
342 | spin_unlock(&inode->i_lock); | ||
343 | |||
344 | /* release ref from nfs_page_find_head_request_locked */ | ||
345 | nfs_release_request(head); | ||
346 | |||
347 | if (!nonblock) | ||
348 | ret = nfs_wait_on_request(req); | ||
349 | else | ||
350 | ret = -EAGAIN; | ||
351 | nfs_release_request(req); | ||
352 | |||
353 | return ret; | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests | ||
358 | * | ||
359 | * @destroy_list - request list (using wb_this_page) terminated by @old_head | ||
360 | * @old_head - the old head of the list | ||
361 | * | ||
362 | * All subrequests must be locked and removed from all lists, so at this point | ||
363 | * they are only "active" in this function, and possibly in nfs_wait_on_request | ||
364 | * with a reference held by some other context. | ||
365 | */ | ||
366 | static void | ||
367 | nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, | ||
368 | struct nfs_page *old_head) | ||
369 | { | ||
370 | while (destroy_list) { | ||
371 | struct nfs_page *subreq = destroy_list; | ||
372 | |||
373 | destroy_list = (subreq->wb_this_page == old_head) ? | ||
374 | NULL : subreq->wb_this_page; | ||
375 | |||
376 | WARN_ON_ONCE(old_head != subreq->wb_head); | ||
377 | |||
378 | /* make sure old group is not used */ | ||
379 | subreq->wb_head = subreq; | ||
380 | subreq->wb_this_page = subreq; | ||
381 | |||
382 | nfs_clear_request_commit(subreq); | ||
383 | |||
384 | /* subreq is now totally disconnected from page group or any | ||
385 | * write / commit lists. last chance to wake any waiters */ | ||
386 | nfs_unlock_request(subreq); | ||
387 | |||
388 | if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) { | ||
389 | /* release ref on old head request */ | ||
390 | nfs_release_request(old_head); | ||
391 | |||
392 | nfs_page_group_clear_bits(subreq); | ||
393 | |||
394 | /* release the PG_INODE_REF reference */ | ||
395 | if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) | ||
396 | nfs_release_request(subreq); | ||
397 | else | ||
398 | WARN_ON_ONCE(1); | ||
399 | } else { | ||
400 | WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags)); | ||
401 | /* zombie requests have already released the last | ||
402 | * reference and were waiting on the rest of the | ||
403 | * group to complete. Since it's no longer part of a | ||
404 | * group, simply free the request */ | ||
405 | nfs_page_group_clear_bits(subreq); | ||
406 | nfs_free_request(subreq); | ||
407 | } | ||
408 | } | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * nfs_lock_and_join_requests - join all subreqs to the head req and return | ||
413 | * a locked reference, cancelling any pending | ||
414 | * operations for this page. | ||
415 | * | ||
416 | * @page - the page used to lookup the "page group" of nfs_page structures | ||
417 | * @nonblock - if true, don't block waiting for request locks | ||
418 | * | ||
419 | * This function joins all sub requests to the head request by first | ||
420 | * locking all requests in the group, cancelling any pending operations | ||
421 | * and finally updating the head request to cover the whole range covered by | ||
422 | * the (former) group. All subrequests are removed from any write or commit | ||
423 | * lists, unlinked from the group and destroyed. | ||
424 | * | ||
425 | * Returns a locked, referenced pointer to the head request - which after | ||
426 | * this call is guaranteed to be the only request associated with the page. | ||
427 | * Returns NULL if no requests are found for @page, or a ERR_PTR if an | ||
428 | * error was encountered. | ||
429 | */ | ||
430 | static struct nfs_page * | ||
431 | nfs_lock_and_join_requests(struct page *page, bool nonblock) | ||
278 | { | 432 | { |
279 | struct inode *inode = page_file_mapping(page)->host; | 433 | struct inode *inode = page_file_mapping(page)->host; |
280 | struct nfs_page *req; | 434 | struct nfs_page *head, *subreq; |
435 | struct nfs_page *destroy_list = NULL; | ||
436 | unsigned int total_bytes; | ||
281 | int ret; | 437 | int ret; |
282 | 438 | ||
439 | try_again: | ||
440 | total_bytes = 0; | ||
441 | |||
442 | WARN_ON_ONCE(destroy_list); | ||
443 | |||
283 | spin_lock(&inode->i_lock); | 444 | spin_lock(&inode->i_lock); |
284 | for (;;) { | 445 | |
285 | req = nfs_page_find_request_locked(NFS_I(inode), page); | 446 | /* |
286 | if (req == NULL) | 447 | * A reference is taken only on the head request which acts as a |
287 | break; | 448 | * reference to the whole page group - the group will not be destroyed |
288 | if (nfs_lock_request(req)) | 449 | * until the head reference is released. |
289 | break; | 450 | */ |
290 | /* Note: If we hold the page lock, as is the case in nfs_writepage, | 451 | head = nfs_page_find_head_request_locked(NFS_I(inode), page); |
291 | * then the call to nfs_lock_request() will always | 452 | |
292 | * succeed provided that someone hasn't already marked the | 453 | if (!head) { |
293 | * request as dirty (in which case we don't care). | ||
294 | */ | ||
295 | spin_unlock(&inode->i_lock); | 454 | spin_unlock(&inode->i_lock); |
296 | if (!nonblock) | 455 | return NULL; |
297 | ret = nfs_wait_on_request(req); | 456 | } |
298 | else | 457 | |
299 | ret = -EAGAIN; | 458 | /* lock each request in the page group */ |
300 | nfs_release_request(req); | 459 | nfs_page_group_lock(head); |
301 | if (ret != 0) | 460 | subreq = head; |
461 | do { | ||
462 | /* | ||
463 | * Subrequests are always contiguous, non overlapping | ||
464 | * and in order. If not, it's a programming error. | ||
465 | */ | ||
466 | WARN_ON_ONCE(subreq->wb_offset != | ||
467 | (head->wb_offset + total_bytes)); | ||
468 | |||
469 | /* keep track of how many bytes this group covers */ | ||
470 | total_bytes += subreq->wb_bytes; | ||
471 | |||
472 | if (!nfs_lock_request(subreq)) { | ||
473 | /* releases page group bit lock and | ||
474 | * inode spin lock and all references */ | ||
475 | ret = nfs_unroll_locks_and_wait(inode, head, | ||
476 | subreq, nonblock); | ||
477 | |||
478 | if (ret == 0) | ||
479 | goto try_again; | ||
480 | |||
302 | return ERR_PTR(ret); | 481 | return ERR_PTR(ret); |
303 | spin_lock(&inode->i_lock); | 482 | } |
483 | |||
484 | subreq = subreq->wb_this_page; | ||
485 | } while (subreq != head); | ||
486 | |||
487 | /* Now that all requests are locked, make sure they aren't on any list. | ||
488 | * Commit list removal accounting is done after locks are dropped */ | ||
489 | subreq = head; | ||
490 | do { | ||
491 | nfs_list_remove_request(subreq); | ||
492 | subreq = subreq->wb_this_page; | ||
493 | } while (subreq != head); | ||
494 | |||
495 | /* unlink subrequests from head, destroy them later */ | ||
496 | if (head->wb_this_page != head) { | ||
497 | /* destroy list will be terminated by head */ | ||
498 | destroy_list = head->wb_this_page; | ||
499 | head->wb_this_page = head; | ||
500 | |||
501 | /* change head request to cover whole range that | ||
502 | * the former page group covered */ | ||
503 | head->wb_bytes = total_bytes; | ||
304 | } | 504 | } |
505 | |||
506 | /* | ||
507 | * prepare head request to be added to new pgio descriptor | ||
508 | */ | ||
509 | nfs_page_group_clear_bits(head); | ||
510 | |||
511 | /* | ||
512 | * some part of the group was still on the inode list - otherwise | ||
513 | * the group wouldn't be involved in async write. | ||
514 | * grab a reference for the head request, iff it needs one. | ||
515 | */ | ||
516 | if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags)) | ||
517 | kref_get(&head->wb_kref); | ||
518 | |||
519 | nfs_page_group_unlock(head); | ||
520 | |||
521 | /* drop lock to clear_request_commit the head req and clean up | ||
522 | * requests on destroy list */ | ||
305 | spin_unlock(&inode->i_lock); | 523 | spin_unlock(&inode->i_lock); |
306 | return req; | 524 | |
525 | nfs_destroy_unlinked_subrequests(destroy_list, head); | ||
526 | |||
527 | /* clean up commit list state */ | ||
528 | nfs_clear_request_commit(head); | ||
529 | |||
530 | /* still holds ref on head from nfs_page_find_head_request_locked | ||
531 | * and still has lock on head from lock loop */ | ||
532 | return head; | ||
307 | } | 533 | } |
308 | 534 | ||
309 | /* | 535 | /* |
@@ -316,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | |||
316 | struct nfs_page *req; | 542 | struct nfs_page *req; |
317 | int ret = 0; | 543 | int ret = 0; |
318 | 544 | ||
319 | req = nfs_find_and_lock_request(page, nonblock); | 545 | req = nfs_lock_and_join_requests(page, nonblock); |
320 | if (!req) | 546 | if (!req) |
321 | goto out; | 547 | goto out; |
322 | ret = PTR_ERR(req); | 548 | ret = PTR_ERR(req); |
@@ -448,7 +674,9 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
448 | set_page_private(req->wb_page, (unsigned long)req); | 674 | set_page_private(req->wb_page, (unsigned long)req); |
449 | } | 675 | } |
450 | nfsi->npages++; | 676 | nfsi->npages++; |
451 | set_bit(PG_INODE_REF, &req->wb_flags); | 677 | /* this a head request for a page group - mark it as having an |
678 | * extra reference so sub groups can follow suit */ | ||
679 | WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); | ||
452 | kref_get(&req->wb_kref); | 680 | kref_get(&req->wb_kref); |
453 | spin_unlock(&inode->i_lock); | 681 | spin_unlock(&inode->i_lock); |
454 | } | 682 | } |
@@ -474,7 +702,9 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
474 | nfsi->npages--; | 702 | nfsi->npages--; |
475 | spin_unlock(&inode->i_lock); | 703 | spin_unlock(&inode->i_lock); |
476 | } | 704 | } |
477 | nfs_release_request(req); | 705 | |
706 | if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) | ||
707 | nfs_release_request(req); | ||
478 | } | 708 | } |
479 | 709 | ||
480 | static void | 710 | static void |
@@ -636,7 +866,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) | |||
636 | { | 866 | { |
637 | struct nfs_commit_info cinfo; | 867 | struct nfs_commit_info cinfo; |
638 | unsigned long bytes = 0; | 868 | unsigned long bytes = 0; |
639 | bool do_destroy; | ||
640 | 869 | ||
641 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) | 870 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) |
642 | goto out; | 871 | goto out; |
@@ -662,7 +891,6 @@ remove_req: | |||
662 | next: | 891 | next: |
663 | nfs_unlock_request(req); | 892 | nfs_unlock_request(req); |
664 | nfs_end_page_writeback(req); | 893 | nfs_end_page_writeback(req); |
665 | do_destroy = !nfs_write_need_commit(hdr); | ||
666 | nfs_release_request(req); | 894 | nfs_release_request(req); |
667 | } | 895 | } |
668 | out: | 896 | out: |
@@ -763,7 +991,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, | |||
763 | spin_lock(&inode->i_lock); | 991 | spin_lock(&inode->i_lock); |
764 | 992 | ||
765 | for (;;) { | 993 | for (;;) { |
766 | req = nfs_page_find_request_locked(NFS_I(inode), page); | 994 | req = nfs_page_find_head_request_locked(NFS_I(inode), page); |
767 | if (req == NULL) | 995 | if (req == NULL) |
768 | goto out_unlock; | 996 | goto out_unlock; |
769 | 997 | ||
@@ -871,7 +1099,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) | |||
871 | * dropped page. | 1099 | * dropped page. |
872 | */ | 1100 | */ |
873 | do { | 1101 | do { |
874 | req = nfs_page_find_request(page); | 1102 | req = nfs_page_find_head_request(page); |
875 | if (req == NULL) | 1103 | if (req == NULL) |
876 | return 0; | 1104 | return 0; |
877 | l_ctx = req->wb_lock_context; | 1105 | l_ctx = req->wb_lock_context; |
@@ -1555,27 +1783,28 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) | |||
1555 | struct nfs_page *req; | 1783 | struct nfs_page *req; |
1556 | int ret = 0; | 1784 | int ret = 0; |
1557 | 1785 | ||
1558 | for (;;) { | 1786 | wait_on_page_writeback(page); |
1559 | wait_on_page_writeback(page); | 1787 | |
1560 | req = nfs_page_find_request(page); | 1788 | /* blocking call to cancel all requests and join to a single (head) |
1561 | if (req == NULL) | 1789 | * request */ |
1562 | break; | 1790 | req = nfs_lock_and_join_requests(page, false); |
1563 | if (nfs_lock_request(req)) { | 1791 | |
1564 | nfs_clear_request_commit(req); | 1792 | if (IS_ERR(req)) { |
1565 | nfs_inode_remove_request(req); | 1793 | ret = PTR_ERR(req); |
1566 | /* | 1794 | } else if (req) { |
1567 | * In case nfs_inode_remove_request has marked the | 1795 | /* all requests from this page have been cancelled by |
1568 | * page as being dirty | 1796 | * nfs_lock_and_join_requests, so just remove the head |
1569 | */ | 1797 | * request from the inode / page_private pointer and |
1570 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | 1798 | * release it */ |
1571 | nfs_unlock_and_release_request(req); | 1799 | nfs_inode_remove_request(req); |
1572 | break; | 1800 | /* |
1573 | } | 1801 | * In case nfs_inode_remove_request has marked the |
1574 | ret = nfs_wait_on_request(req); | 1802 | * page as being dirty |
1575 | nfs_release_request(req); | 1803 | */ |
1576 | if (ret < 0) | 1804 | cancel_dirty_page(page, PAGE_CACHE_SIZE); |
1577 | break; | 1805 | nfs_unlock_and_release_request(req); |
1578 | } | 1806 | } |
1807 | |||
1579 | return ret; | 1808 | return ret; |
1580 | } | 1809 | } |
1581 | 1810 | ||