aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs/write.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs/write.c')
-rw-r--r--fs/nfs/write.c388
1 files changed, 247 insertions, 141 deletions
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046cb..49c715b4ac92 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -28,6 +28,7 @@
28#include "iostat.h" 28#include "iostat.h"
29#include "nfs4_fs.h" 29#include "nfs4_fs.h"
30#include "fscache.h" 30#include "fscache.h"
31#include "pnfs.h"
31 32
32#define NFSDBG_FACILITY NFSDBG_PAGECACHE 33#define NFSDBG_FACILITY NFSDBG_PAGECACHE
33 34
@@ -58,6 +59,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
58 } 59 }
59 return p; 60 return p;
60} 61}
62EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
61 63
62void nfs_commit_free(struct nfs_write_data *p) 64void nfs_commit_free(struct nfs_write_data *p)
63{ 65{
@@ -65,6 +67,7 @@ void nfs_commit_free(struct nfs_write_data *p)
65 kfree(p->pagevec); 67 kfree(p->pagevec);
66 mempool_free(p, nfs_commit_mempool); 68 mempool_free(p, nfs_commit_mempool);
67} 69}
70EXPORT_SYMBOL_GPL(nfs_commit_free);
68 71
69struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 72struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
70{ 73{
@@ -96,6 +99,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
96 99
97static void nfs_writedata_release(struct nfs_write_data *wdata) 100static void nfs_writedata_release(struct nfs_write_data *wdata)
98{ 101{
102 put_lseg(wdata->lseg);
99 put_nfs_open_context(wdata->args.context); 103 put_nfs_open_context(wdata->args.context);
100 nfs_writedata_free(wdata); 104 nfs_writedata_free(wdata);
101} 105}
@@ -177,8 +181,8 @@ static int wb_priority(struct writeback_control *wbc)
177 if (wbc->for_reclaim) 181 if (wbc->for_reclaim)
178 return FLUSH_HIGHPRI | FLUSH_STABLE; 182 return FLUSH_HIGHPRI | FLUSH_STABLE;
179 if (wbc->for_kupdate || wbc->for_background) 183 if (wbc->for_kupdate || wbc->for_background)
180 return FLUSH_LOWPRI; 184 return FLUSH_LOWPRI | FLUSH_COND_STABLE;
181 return 0; 185 return FLUSH_COND_STABLE;
182} 186}
183 187
184/* 188/*
@@ -385,11 +389,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
385 spin_lock(&inode->i_lock); 389 spin_lock(&inode->i_lock);
386 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); 390 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
387 BUG_ON(error); 391 BUG_ON(error);
388 if (!nfsi->npages) { 392 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
389 igrab(inode); 393 nfsi->change_attr++;
390 if (nfs_have_delegation(inode, FMODE_WRITE))
391 nfsi->change_attr++;
392 }
393 set_bit(PG_MAPPED, &req->wb_flags); 394 set_bit(PG_MAPPED, &req->wb_flags);
394 SetPagePrivate(req->wb_page); 395 SetPagePrivate(req->wb_page);
395 set_page_private(req->wb_page, (unsigned long)req); 396 set_page_private(req->wb_page, (unsigned long)req);
@@ -419,11 +420,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
419 clear_bit(PG_MAPPED, &req->wb_flags); 420 clear_bit(PG_MAPPED, &req->wb_flags);
420 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 421 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
421 nfsi->npages--; 422 nfsi->npages--;
422 if (!nfsi->npages) { 423 spin_unlock(&inode->i_lock);
423 spin_unlock(&inode->i_lock);
424 iput(inode);
425 } else
426 spin_unlock(&inode->i_lock);
427 nfs_release_request(req); 424 nfs_release_request(req);
428} 425}
429 426
@@ -439,7 +436,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
439 * Add a request to the inode's commit list. 436 * Add a request to the inode's commit list.
440 */ 437 */
441static void 438static void
442nfs_mark_request_commit(struct nfs_page *req) 439nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
443{ 440{
444 struct inode *inode = req->wb_context->path.dentry->d_inode; 441 struct inode *inode = req->wb_context->path.dentry->d_inode;
445 struct nfs_inode *nfsi = NFS_I(inode); 442 struct nfs_inode *nfsi = NFS_I(inode);
@@ -451,6 +448,7 @@ nfs_mark_request_commit(struct nfs_page *req)
451 NFS_PAGE_TAG_COMMIT); 448 NFS_PAGE_TAG_COMMIT);
452 nfsi->ncommit++; 449 nfsi->ncommit++;
453 spin_unlock(&inode->i_lock); 450 spin_unlock(&inode->i_lock);
451 pnfs_mark_request_commit(req, lseg);
454 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 452 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
455 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 453 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
456 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 454 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
@@ -472,14 +470,18 @@ nfs_clear_request_commit(struct nfs_page *req)
472static inline 470static inline
473int nfs_write_need_commit(struct nfs_write_data *data) 471int nfs_write_need_commit(struct nfs_write_data *data)
474{ 472{
475 return data->verf.committed != NFS_FILE_SYNC; 473 if (data->verf.committed == NFS_DATA_SYNC)
474 return data->lseg == NULL;
475 else
476 return data->verf.committed != NFS_FILE_SYNC;
476} 477}
477 478
478static inline 479static inline
479int nfs_reschedule_unstable_write(struct nfs_page *req) 480int nfs_reschedule_unstable_write(struct nfs_page *req,
481 struct nfs_write_data *data)
480{ 482{
481 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { 483 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
482 nfs_mark_request_commit(req); 484 nfs_mark_request_commit(req, data->lseg);
483 return 1; 485 return 1;
484 } 486 }
485 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { 487 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
@@ -490,7 +492,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
490} 492}
491#else 493#else
492static inline void 494static inline void
493nfs_mark_request_commit(struct nfs_page *req) 495nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
494{ 496{
495} 497}
496 498
@@ -507,7 +509,8 @@ int nfs_write_need_commit(struct nfs_write_data *data)
507} 509}
508 510
509static inline 511static inline
510int nfs_reschedule_unstable_write(struct nfs_page *req) 512int nfs_reschedule_unstable_write(struct nfs_page *req,
513 struct nfs_write_data *data)
511{ 514{
512 return 0; 515 return 0;
513} 516}
@@ -539,11 +542,15 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
539 if (!nfs_need_commit(nfsi)) 542 if (!nfs_need_commit(nfsi))
540 return 0; 543 return 0;
541 544
545 spin_lock(&inode->i_lock);
542 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); 546 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
543 if (ret > 0) 547 if (ret > 0)
544 nfsi->ncommit -= ret; 548 nfsi->ncommit -= ret;
549 spin_unlock(&inode->i_lock);
550
545 if (nfs_need_commit(NFS_I(inode))) 551 if (nfs_need_commit(NFS_I(inode)))
546 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 552 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
553
547 return ret; 554 return ret;
548} 555}
549#else 556#else
@@ -610,9 +617,11 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
610 } 617 }
611 618
612 if (nfs_clear_request_commit(req) && 619 if (nfs_clear_request_commit(req) &&
613 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, 620 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
614 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) 621 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) {
615 NFS_I(inode)->ncommit--; 622 NFS_I(inode)->ncommit--;
623 pnfs_clear_request_commit(req);
624 }
616 625
617 /* Okay, the request matches. Update the region */ 626 /* Okay, the request matches. Update the region */
618 if (offset < req->wb_offset) { 627 if (offset < req->wb_offset) {
@@ -671,7 +680,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
671 req = nfs_setup_write_request(ctx, page, offset, count); 680 req = nfs_setup_write_request(ctx, page, offset, count);
672 if (IS_ERR(req)) 681 if (IS_ERR(req))
673 return PTR_ERR(req); 682 return PTR_ERR(req);
674 nfs_mark_request_dirty(req);
675 /* Update file length */ 683 /* Update file length */
676 nfs_grow_file(page, offset, count); 684 nfs_grow_file(page, offset, count);
677 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 685 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
@@ -760,11 +768,12 @@ int nfs_updatepage(struct file *file, struct page *page,
760 return status; 768 return status;
761} 769}
762 770
763static void nfs_writepage_release(struct nfs_page *req) 771static void nfs_writepage_release(struct nfs_page *req,
772 struct nfs_write_data *data)
764{ 773{
765 struct page *page = req->wb_page; 774 struct page *page = req->wb_page;
766 775
767 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) 776 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
768 nfs_inode_remove_request(req); 777 nfs_inode_remove_request(req);
769 nfs_clear_page_tag_locked(req); 778 nfs_clear_page_tag_locked(req);
770 nfs_end_page_writeback(page); 779 nfs_end_page_writeback(page);
@@ -781,25 +790,21 @@ static int flush_task_priority(int how)
781 return RPC_PRIORITY_NORMAL; 790 return RPC_PRIORITY_NORMAL;
782} 791}
783 792
784/* 793int nfs_initiate_write(struct nfs_write_data *data,
785 * Set up the argument/result storage required for the RPC call. 794 struct rpc_clnt *clnt,
786 */ 795 const struct rpc_call_ops *call_ops,
787static int nfs_write_rpcsetup(struct nfs_page *req, 796 int how)
788 struct nfs_write_data *data,
789 const struct rpc_call_ops *call_ops,
790 unsigned int count, unsigned int offset,
791 int how)
792{ 797{
793 struct inode *inode = req->wb_context->path.dentry->d_inode; 798 struct inode *inode = data->inode;
794 int priority = flush_task_priority(how); 799 int priority = flush_task_priority(how);
795 struct rpc_task *task; 800 struct rpc_task *task;
796 struct rpc_message msg = { 801 struct rpc_message msg = {
797 .rpc_argp = &data->args, 802 .rpc_argp = &data->args,
798 .rpc_resp = &data->res, 803 .rpc_resp = &data->res,
799 .rpc_cred = req->wb_context->cred, 804 .rpc_cred = data->cred,
800 }; 805 };
801 struct rpc_task_setup task_setup_data = { 806 struct rpc_task_setup task_setup_data = {
802 .rpc_client = NFS_CLIENT(inode), 807 .rpc_client = clnt,
803 .task = &data->task, 808 .task = &data->task,
804 .rpc_message = &msg, 809 .rpc_message = &msg,
805 .callback_ops = call_ops, 810 .callback_ops = call_ops,
@@ -810,12 +815,52 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
810 }; 815 };
811 int ret = 0; 816 int ret = 0;
812 817
818 /* Set up the initial task struct. */
819 NFS_PROTO(inode)->write_setup(data, &msg);
820
821 dprintk("NFS: %5u initiated write call "
822 "(req %s/%lld, %u bytes @ offset %llu)\n",
823 data->task.tk_pid,
824 inode->i_sb->s_id,
825 (long long)NFS_FILEID(inode),
826 data->args.count,
827 (unsigned long long)data->args.offset);
828
829 task = rpc_run_task(&task_setup_data);
830 if (IS_ERR(task)) {
831 ret = PTR_ERR(task);
832 goto out;
833 }
834 if (how & FLUSH_SYNC) {
835 ret = rpc_wait_for_completion_task(task);
836 if (ret == 0)
837 ret = task->tk_status;
838 }
839 rpc_put_task(task);
840out:
841 return ret;
842}
843EXPORT_SYMBOL_GPL(nfs_initiate_write);
844
845/*
846 * Set up the argument/result storage required for the RPC call.
847 */
848static int nfs_write_rpcsetup(struct nfs_page *req,
849 struct nfs_write_data *data,
850 const struct rpc_call_ops *call_ops,
851 unsigned int count, unsigned int offset,
852 struct pnfs_layout_segment *lseg,
853 int how)
854{
855 struct inode *inode = req->wb_context->path.dentry->d_inode;
856
813 /* Set up the RPC argument and reply structs 857 /* Set up the RPC argument and reply structs
814 * NB: take care not to mess about with data->commit et al. */ 858 * NB: take care not to mess about with data->commit et al. */
815 859
816 data->req = req; 860 data->req = req;
817 data->inode = inode = req->wb_context->path.dentry->d_inode; 861 data->inode = inode = req->wb_context->path.dentry->d_inode;
818 data->cred = msg.rpc_cred; 862 data->cred = req->wb_context->cred;
863 data->lseg = get_lseg(lseg);
819 864
820 data->args.fh = NFS_FH(inode); 865 data->args.fh = NFS_FH(inode);
821 data->args.offset = req_offset(req) + offset; 866 data->args.offset = req_offset(req) + offset;
@@ -825,7 +870,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
825 data->args.context = get_nfs_open_context(req->wb_context); 870 data->args.context = get_nfs_open_context(req->wb_context);
826 data->args.lock_context = req->wb_lock_context; 871 data->args.lock_context = req->wb_lock_context;
827 data->args.stable = NFS_UNSTABLE; 872 data->args.stable = NFS_UNSTABLE;
828 if (how & FLUSH_STABLE) { 873 if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
829 data->args.stable = NFS_DATA_SYNC; 874 data->args.stable = NFS_DATA_SYNC;
830 if (!nfs_need_commit(NFS_I(inode))) 875 if (!nfs_need_commit(NFS_I(inode)))
831 data->args.stable = NFS_FILE_SYNC; 876 data->args.stable = NFS_FILE_SYNC;
@@ -836,30 +881,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
836 data->res.verf = &data->verf; 881 data->res.verf = &data->verf;
837 nfs_fattr_init(&data->fattr); 882 nfs_fattr_init(&data->fattr);
838 883
839 /* Set up the initial task struct. */ 884 if (data->lseg &&
840 NFS_PROTO(inode)->write_setup(data, &msg); 885 (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
841 886 return 0;
842 dprintk("NFS: %5u initiated write call "
843 "(req %s/%lld, %u bytes @ offset %llu)\n",
844 data->task.tk_pid,
845 inode->i_sb->s_id,
846 (long long)NFS_FILEID(inode),
847 count,
848 (unsigned long long)data->args.offset);
849 887
850 task = rpc_run_task(&task_setup_data); 888 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
851 if (IS_ERR(task)) {
852 ret = PTR_ERR(task);
853 goto out;
854 }
855 if (how & FLUSH_SYNC) {
856 ret = rpc_wait_for_completion_task(task);
857 if (ret == 0)
858 ret = task->tk_status;
859 }
860 rpc_put_task(task);
861out:
862 return ret;
863} 889}
864 890
865/* If a nfs_flush_* function fails, it should remove reqs from @head and 891/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -879,20 +905,27 @@ static void nfs_redirty_request(struct nfs_page *req)
879 * Generate multiple small requests to write out a single 905 * Generate multiple small requests to write out a single
880 * contiguous dirty area on one page. 906 * contiguous dirty area on one page.
881 */ 907 */
882static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 908static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
883{ 909{
884 struct nfs_page *req = nfs_list_entry(head->next); 910 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
885 struct page *page = req->wb_page; 911 struct page *page = req->wb_page;
886 struct nfs_write_data *data; 912 struct nfs_write_data *data;
887 size_t wsize = NFS_SERVER(inode)->wsize, nbytes; 913 size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
888 unsigned int offset; 914 unsigned int offset;
889 int requests = 0; 915 int requests = 0;
890 int ret = 0; 916 int ret = 0;
917 struct pnfs_layout_segment *lseg;
891 LIST_HEAD(list); 918 LIST_HEAD(list);
892 919
893 nfs_list_remove_request(req); 920 nfs_list_remove_request(req);
894 921
895 nbytes = count; 922 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
923 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
924 desc->pg_count > wsize))
925 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
926
927
928 nbytes = desc->pg_count;
896 do { 929 do {
897 size_t len = min(nbytes, wsize); 930 size_t len = min(nbytes, wsize);
898 931
@@ -905,9 +938,11 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
905 } while (nbytes != 0); 938 } while (nbytes != 0);
906 atomic_set(&req->wb_complete, requests); 939 atomic_set(&req->wb_complete, requests);
907 940
941 BUG_ON(desc->pg_lseg);
942 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
908 ClearPageError(page); 943 ClearPageError(page);
909 offset = 0; 944 offset = 0;
910 nbytes = count; 945 nbytes = desc->pg_count;
911 do { 946 do {
912 int ret2; 947 int ret2;
913 948
@@ -919,13 +954,15 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
919 if (nbytes < wsize) 954 if (nbytes < wsize)
920 wsize = nbytes; 955 wsize = nbytes;
921 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, 956 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
922 wsize, offset, how); 957 wsize, offset, lseg, desc->pg_ioflags);
923 if (ret == 0) 958 if (ret == 0)
924 ret = ret2; 959 ret = ret2;
925 offset += wsize; 960 offset += wsize;
926 nbytes -= wsize; 961 nbytes -= wsize;
927 } while (nbytes != 0); 962 } while (nbytes != 0);
928 963
964 put_lseg(lseg);
965 desc->pg_lseg = NULL;
929 return ret; 966 return ret;
930 967
931out_bad: 968out_bad:
@@ -946,16 +983,26 @@ out_bad:
946 * This is the case if nfs_updatepage detects a conflicting request 983 * This is the case if nfs_updatepage detects a conflicting request
947 * that has been written but not committed. 984 * that has been written but not committed.
948 */ 985 */
949static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 986static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
950{ 987{
951 struct nfs_page *req; 988 struct nfs_page *req;
952 struct page **pages; 989 struct page **pages;
953 struct nfs_write_data *data; 990 struct nfs_write_data *data;
991 struct list_head *head = &desc->pg_list;
992 struct pnfs_layout_segment *lseg = desc->pg_lseg;
993 int ret;
954 994
955 data = nfs_writedata_alloc(npages); 995 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
956 if (!data) 996 desc->pg_count));
957 goto out_bad; 997 if (!data) {
958 998 while (!list_empty(head)) {
999 req = nfs_list_entry(head->next);
1000 nfs_list_remove_request(req);
1001 nfs_redirty_request(req);
1002 }
1003 ret = -ENOMEM;
1004 goto out;
1005 }
959 pages = data->pagevec; 1006 pages = data->pagevec;
960 while (!list_empty(head)) { 1007 while (!list_empty(head)) {
961 req = nfs_list_entry(head->next); 1008 req = nfs_list_entry(head->next);
@@ -965,16 +1012,19 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
965 *pages++ = req->wb_page; 1012 *pages++ = req->wb_page;
966 } 1013 }
967 req = nfs_list_entry(data->pages.next); 1014 req = nfs_list_entry(data->pages.next);
1015 if ((!lseg) && list_is_singular(&data->pages))
1016 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
1017
1018 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1019 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
1020 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
968 1021
969 /* Set up the argument struct */ 1022 /* Set up the argument struct */
970 return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); 1023 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
971 out_bad: 1024out:
972 while (!list_empty(head)) { 1025 put_lseg(lseg); /* Cleans any gotten in ->pg_test */
973 req = nfs_list_entry(head->next); 1026 desc->pg_lseg = NULL;
974 nfs_list_remove_request(req); 1027 return ret;
975 nfs_redirty_request(req);
976 }
977 return -ENOMEM;
978} 1028}
979 1029
980static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1030static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -982,6 +1032,8 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
982{ 1032{
983 size_t wsize = NFS_SERVER(inode)->wsize; 1033 size_t wsize = NFS_SERVER(inode)->wsize;
984 1034
1035 pnfs_pageio_init_write(pgio, inode);
1036
985 if (wsize < PAGE_CACHE_SIZE) 1037 if (wsize < PAGE_CACHE_SIZE)
986 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); 1038 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
987 else 1039 else
@@ -1039,7 +1091,7 @@ static void nfs_writeback_release_partial(void *calldata)
1039 1091
1040out: 1092out:
1041 if (atomic_dec_and_test(&req->wb_complete)) 1093 if (atomic_dec_and_test(&req->wb_complete))
1042 nfs_writepage_release(req); 1094 nfs_writepage_release(req, data);
1043 nfs_writedata_release(calldata); 1095 nfs_writedata_release(calldata);
1044} 1096}
1045 1097
@@ -1106,7 +1158,7 @@ static void nfs_writeback_release_full(void *calldata)
1106 1158
1107 if (nfs_write_need_commit(data)) { 1159 if (nfs_write_need_commit(data)) {
1108 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1160 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1109 nfs_mark_request_commit(req); 1161 nfs_mark_request_commit(req, data->lseg);
1110 dprintk(" marked for commit\n"); 1162 dprintk(" marked for commit\n");
1111 goto next; 1163 goto next;
1112 } 1164 }
@@ -1132,7 +1184,7 @@ static const struct rpc_call_ops nfs_write_full_ops = {
1132/* 1184/*
1133 * This function is called when the WRITE call is complete. 1185 * This function is called when the WRITE call is complete.
1134 */ 1186 */
1135int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) 1187void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1136{ 1188{
1137 struct nfs_writeargs *argp = &data->args; 1189 struct nfs_writeargs *argp = &data->args;
1138 struct nfs_writeres *resp = &data->res; 1190 struct nfs_writeres *resp = &data->res;
@@ -1151,7 +1203,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1151 */ 1203 */
1152 status = NFS_PROTO(data->inode)->write_done(task, data); 1204 status = NFS_PROTO(data->inode)->write_done(task, data);
1153 if (status != 0) 1205 if (status != 0)
1154 return status; 1206 return;
1155 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1207 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1156 1208
1157#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1209#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -1166,6 +1218,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1166 */ 1218 */
1167 static unsigned long complain; 1219 static unsigned long complain;
1168 1220
1221 /* Note this will print the MDS for a DS write */
1169 if (time_before(complain, jiffies)) { 1222 if (time_before(complain, jiffies)) {
1170 dprintk("NFS: faulty NFS server %s:" 1223 dprintk("NFS: faulty NFS server %s:"
1171 " (committed = %d) != (stable = %d)\n", 1224 " (committed = %d) != (stable = %d)\n",
@@ -1186,6 +1239,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1186 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1239 /* Was this an NFSv2 write or an NFSv3 stable write? */
1187 if (resp->verf->committed != NFS_UNSTABLE) { 1240 if (resp->verf->committed != NFS_UNSTABLE) {
1188 /* Resend from where the server left off */ 1241 /* Resend from where the server left off */
1242 data->mds_offset += resp->count;
1189 argp->offset += resp->count; 1243 argp->offset += resp->count;
1190 argp->pgbase += resp->count; 1244 argp->pgbase += resp->count;
1191 argp->count -= resp->count; 1245 argp->count -= resp->count;
@@ -1196,7 +1250,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1196 argp->stable = NFS_FILE_SYNC; 1250 argp->stable = NFS_FILE_SYNC;
1197 } 1251 }
1198 nfs_restart_rpc(task, server->nfs_client); 1252 nfs_restart_rpc(task, server->nfs_client);
1199 return -EAGAIN; 1253 return;
1200 } 1254 }
1201 if (time_before(complain, jiffies)) { 1255 if (time_before(complain, jiffies)) {
1202 printk(KERN_WARNING 1256 printk(KERN_WARNING
@@ -1207,64 +1261,89 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1207 /* Can't do anything about it except throw an error. */ 1261 /* Can't do anything about it except throw an error. */
1208 task->tk_status = -EIO; 1262 task->tk_status = -EIO;
1209 } 1263 }
1210 return 0; 1264 return;
1211} 1265}
1212 1266
1213 1267
1214#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1268#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1215static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) 1269static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1216{ 1270{
1271 int ret;
1272
1217 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) 1273 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
1218 return 1; 1274 return 1;
1219 if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, 1275 if (!may_wait)
1220 NFS_INO_COMMIT, nfs_wait_bit_killable, 1276 return 0;
1221 TASK_KILLABLE)) 1277 ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
1222 return 1; 1278 NFS_INO_COMMIT,
1223 return 0; 1279 nfs_wait_bit_killable,
1280 TASK_KILLABLE);
1281 return (ret < 0) ? ret : 1;
1224} 1282}
1225 1283
1226static void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1284void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1227{ 1285{
1228 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1286 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1229 smp_mb__after_clear_bit(); 1287 smp_mb__after_clear_bit();
1230 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1288 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1231} 1289}
1290EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
1232 1291
1233 1292void nfs_commitdata_release(void *data)
1234static void nfs_commitdata_release(void *data)
1235{ 1293{
1236 struct nfs_write_data *wdata = data; 1294 struct nfs_write_data *wdata = data;
1237 1295
1296 put_lseg(wdata->lseg);
1238 put_nfs_open_context(wdata->args.context); 1297 put_nfs_open_context(wdata->args.context);
1239 nfs_commit_free(wdata); 1298 nfs_commit_free(wdata);
1240} 1299}
1300EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1241 1301
1242/* 1302int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
1243 * Set up the argument/result storage required for the RPC call. 1303 const struct rpc_call_ops *call_ops,
1244 */ 1304 int how)
1245static int nfs_commit_rpcsetup(struct list_head *head,
1246 struct nfs_write_data *data,
1247 int how)
1248{ 1305{
1249 struct nfs_page *first = nfs_list_entry(head->next);
1250 struct inode *inode = first->wb_context->path.dentry->d_inode;
1251 int priority = flush_task_priority(how);
1252 struct rpc_task *task; 1306 struct rpc_task *task;
1307 int priority = flush_task_priority(how);
1253 struct rpc_message msg = { 1308 struct rpc_message msg = {
1254 .rpc_argp = &data->args, 1309 .rpc_argp = &data->args,
1255 .rpc_resp = &data->res, 1310 .rpc_resp = &data->res,
1256 .rpc_cred = first->wb_context->cred, 1311 .rpc_cred = data->cred,
1257 }; 1312 };
1258 struct rpc_task_setup task_setup_data = { 1313 struct rpc_task_setup task_setup_data = {
1259 .task = &data->task, 1314 .task = &data->task,
1260 .rpc_client = NFS_CLIENT(inode), 1315 .rpc_client = clnt,
1261 .rpc_message = &msg, 1316 .rpc_message = &msg,
1262 .callback_ops = &nfs_commit_ops, 1317 .callback_ops = call_ops,
1263 .callback_data = data, 1318 .callback_data = data,
1264 .workqueue = nfsiod_workqueue, 1319 .workqueue = nfsiod_workqueue,
1265 .flags = RPC_TASK_ASYNC, 1320 .flags = RPC_TASK_ASYNC,
1266 .priority = priority, 1321 .priority = priority,
1267 }; 1322 };
1323 /* Set up the initial task struct. */
1324 NFS_PROTO(data->inode)->commit_setup(data, &msg);
1325
1326 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
1327
1328 task = rpc_run_task(&task_setup_data);
1329 if (IS_ERR(task))
1330 return PTR_ERR(task);
1331 if (how & FLUSH_SYNC)
1332 rpc_wait_for_completion_task(task);
1333 rpc_put_task(task);
1334 return 0;
1335}
1336EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1337
1338/*
1339 * Set up the argument/result storage required for the RPC call.
1340 */
1341void nfs_init_commit(struct nfs_write_data *data,
1342 struct list_head *head,
1343 struct pnfs_layout_segment *lseg)
1344{
1345 struct nfs_page *first = nfs_list_entry(head->next);
1346 struct inode *inode = first->wb_context->path.dentry->d_inode;
1268 1347
1269 /* Set up the RPC argument and reply structs 1348 /* Set up the RPC argument and reply structs
1270 * NB: take care not to mess about with data->commit et al. */ 1349 * NB: take care not to mess about with data->commit et al. */
@@ -1272,7 +1351,9 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1272 list_splice_init(head, &data->pages); 1351 list_splice_init(head, &data->pages);
1273 1352
1274 data->inode = inode; 1353 data->inode = inode;
1275 data->cred = msg.rpc_cred; 1354 data->cred = first->wb_context->cred;
1355 data->lseg = lseg; /* reference transferred */
1356 data->mds_ops = &nfs_commit_ops;
1276 1357
1277 data->args.fh = NFS_FH(data->inode); 1358 data->args.fh = NFS_FH(data->inode);
1278 /* Note: we always request a commit of the entire inode */ 1359 /* Note: we always request a commit of the entire inode */
@@ -1283,18 +1364,25 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1283 data->res.fattr = &data->fattr; 1364 data->res.fattr = &data->fattr;
1284 data->res.verf = &data->verf; 1365 data->res.verf = &data->verf;
1285 nfs_fattr_init(&data->fattr); 1366 nfs_fattr_init(&data->fattr);
1367}
1368EXPORT_SYMBOL_GPL(nfs_init_commit);
1286 1369
1287 /* Set up the initial task struct. */ 1370void nfs_retry_commit(struct list_head *page_list,
1288 NFS_PROTO(inode)->commit_setup(data, &msg); 1371 struct pnfs_layout_segment *lseg)
1289 1372{
1290 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 1373 struct nfs_page *req;
1291 1374
1292 task = rpc_run_task(&task_setup_data); 1375 while (!list_empty(page_list)) {
1293 if (IS_ERR(task)) 1376 req = nfs_list_entry(page_list->next);
1294 return PTR_ERR(task); 1377 nfs_list_remove_request(req);
1295 rpc_put_task(task); 1378 nfs_mark_request_commit(req, lseg);
1296 return 0; 1379 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1380 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1381 BDI_RECLAIMABLE);
1382 nfs_clear_page_tag_locked(req);
1383 }
1297} 1384}
1385EXPORT_SYMBOL_GPL(nfs_retry_commit);
1298 1386
1299/* 1387/*
1300 * Commit dirty pages 1388 * Commit dirty pages
@@ -1303,7 +1391,6 @@ static int
1303nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1391nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1304{ 1392{
1305 struct nfs_write_data *data; 1393 struct nfs_write_data *data;
1306 struct nfs_page *req;
1307 1394
1308 data = nfs_commitdata_alloc(); 1395 data = nfs_commitdata_alloc();
1309 1396
@@ -1311,17 +1398,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1311 goto out_bad; 1398 goto out_bad;
1312 1399
1313 /* Set up the argument struct */ 1400 /* Set up the argument struct */
1314 return nfs_commit_rpcsetup(head, data, how); 1401 nfs_init_commit(data, head, NULL);
1402 return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how);
1315 out_bad: 1403 out_bad:
1316 while (!list_empty(head)) { 1404 nfs_retry_commit(head, NULL);
1317 req = nfs_list_entry(head->next);
1318 nfs_list_remove_request(req);
1319 nfs_mark_request_commit(req);
1320 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1321 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1322 BDI_RECLAIMABLE);
1323 nfs_clear_page_tag_locked(req);
1324 }
1325 nfs_commit_clear_lock(NFS_I(inode)); 1405 nfs_commit_clear_lock(NFS_I(inode));
1326 return -ENOMEM; 1406 return -ENOMEM;
1327} 1407}
@@ -1337,14 +1417,12 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1337 task->tk_pid, task->tk_status); 1417 task->tk_pid, task->tk_status);
1338 1418
1339 /* Call the NFS version-specific code */ 1419 /* Call the NFS version-specific code */
1340 if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) 1420 NFS_PROTO(data->inode)->commit_done(task, data);
1341 return;
1342} 1421}
1343 1422
1344static void nfs_commit_release(void *calldata) 1423void nfs_commit_release_pages(struct nfs_write_data *data)
1345{ 1424{
1346 struct nfs_write_data *data = calldata; 1425 struct nfs_page *req;
1347 struct nfs_page *req;
1348 int status = data->task.tk_status; 1426 int status = data->task.tk_status;
1349 1427
1350 while (!list_empty(&data->pages)) { 1428 while (!list_empty(&data->pages)) {
@@ -1378,6 +1456,14 @@ static void nfs_commit_release(void *calldata)
1378 next: 1456 next:
1379 nfs_clear_page_tag_locked(req); 1457 nfs_clear_page_tag_locked(req);
1380 } 1458 }
1459}
1460EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
1461
1462static void nfs_commit_release(void *calldata)
1463{
1464 struct nfs_write_data *data = calldata;
1465
1466 nfs_commit_release_pages(data);
1381 nfs_commit_clear_lock(NFS_I(data->inode)); 1467 nfs_commit_clear_lock(NFS_I(data->inode));
1382 nfs_commitdata_release(calldata); 1468 nfs_commitdata_release(calldata);
1383} 1469}
@@ -1394,23 +1480,28 @@ int nfs_commit_inode(struct inode *inode, int how)
1394{ 1480{
1395 LIST_HEAD(head); 1481 LIST_HEAD(head);
1396 int may_wait = how & FLUSH_SYNC; 1482 int may_wait = how & FLUSH_SYNC;
1397 int res = 0; 1483 int res;
1398 1484
1399 if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) 1485 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1486 if (res <= 0)
1400 goto out_mark_dirty; 1487 goto out_mark_dirty;
1401 spin_lock(&inode->i_lock);
1402 res = nfs_scan_commit(inode, &head, 0, 0); 1488 res = nfs_scan_commit(inode, &head, 0, 0);
1403 spin_unlock(&inode->i_lock);
1404 if (res) { 1489 if (res) {
1405 int error = nfs_commit_list(inode, &head, how); 1490 int error;
1491
1492 error = pnfs_commit_list(inode, &head, how);
1493 if (error == PNFS_NOT_ATTEMPTED)
1494 error = nfs_commit_list(inode, &head, how);
1406 if (error < 0) 1495 if (error < 0)
1407 return error; 1496 return error;
1408 if (may_wait) 1497 if (!may_wait)
1409 wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
1410 nfs_wait_bit_killable,
1411 TASK_KILLABLE);
1412 else
1413 goto out_mark_dirty; 1498 goto out_mark_dirty;
1499 error = wait_on_bit(&NFS_I(inode)->flags,
1500 NFS_INO_COMMIT,
1501 nfs_wait_bit_killable,
1502 TASK_KILLABLE);
1503 if (error < 0)
1504 return error;
1414 } else 1505 } else
1415 nfs_commit_clear_lock(NFS_I(inode)); 1506 nfs_commit_clear_lock(NFS_I(inode));
1416 return res; 1507 return res;
@@ -1464,7 +1555,22 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1464 1555
1465int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1556int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1466{ 1557{
1467 return nfs_commit_unstable_pages(inode, wbc); 1558 int ret;
1559
1560 ret = nfs_commit_unstable_pages(inode, wbc);
1561 if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
1562 int status;
1563 bool sync = true;
1564
1565 if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking ||
1566 wbc->for_background)
1567 sync = false;
1568
1569 status = pnfs_layoutcommit_inode(inode, sync);
1570 if (status < 0)
1571 return status;
1572 }
1573 return ret;
1468} 1574}
1469 1575
1470/* 1576/*