aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/nfs/callback_proc.c52
-rw-r--r--fs/nfs/dir.c21
-rw-r--r--fs/nfs/direct.c48
-rw-r--r--fs/nfs/file.c6
-rw-r--r--fs/nfs/filelayout/filelayout.c18
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c205
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h1
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c16
-rw-r--r--fs/nfs/inode.c78
-rw-r--r--fs/nfs/internal.h39
-rw-r--r--fs/nfs/nfs42proc.c29
-rw-r--r--fs/nfs/nfs4proc.c71
-rw-r--r--fs/nfs/nfs4sysctl.c2
-rw-r--r--fs/nfs/nfs4trace.c1
-rw-r--r--fs/nfs/nfs4trace.h431
-rw-r--r--fs/nfs/nfstrace.h1
-rw-r--r--fs/nfs/pagelist.c126
-rw-r--r--fs/nfs/pnfs.c180
-rw-r--r--fs/nfs/pnfs.h54
-rw-r--r--fs/nfs/pnfs_nfs.c10
-rw-r--r--fs/nfs/read.c43
-rw-r--r--fs/nfs/write.c133
-rw-r--r--include/linux/nfs4.h14
-rw-r--r--include/linux/nfs_fs.h23
-rw-r--r--include/linux/nfs_fs_sb.h1
-rw-r--r--include/linux/nfs_xdr.h6
-rw-r--r--net/sunrpc/clnt.c1
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c26
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c64
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c174
-rw-r--r--net/sunrpc/xprtrdma/physical_ops.c13
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c16
-rw-r--r--net/sunrpc/xprtrdma/transport.c3
-rw-r--r--net/sunrpc/xprtrdma/verbs.c16
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h14
-rw-r--r--net/sunrpc/xprtsock.c63
36 files changed, 1480 insertions, 519 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 807eb6ef4f91..f0939d097406 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -83,8 +83,11 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
83 83
84 res = htonl(NFS4ERR_BADHANDLE); 84 res = htonl(NFS4ERR_BADHANDLE);
85 inode = nfs_delegation_find_inode(cps->clp, &args->fh); 85 inode = nfs_delegation_find_inode(cps->clp, &args->fh);
86 if (inode == NULL) 86 if (inode == NULL) {
87 trace_nfs4_cb_recall(cps->clp, &args->fh, NULL,
88 &args->stateid, -ntohl(res));
87 goto out; 89 goto out;
90 }
88 /* Set up a helper thread to actually return the delegation */ 91 /* Set up a helper thread to actually return the delegation */
89 switch (nfs_async_inode_return_delegation(inode, &args->stateid)) { 92 switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
90 case 0: 93 case 0:
@@ -96,7 +99,8 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
96 default: 99 default:
97 res = htonl(NFS4ERR_RESOURCE); 100 res = htonl(NFS4ERR_RESOURCE);
98 } 101 }
99 trace_nfs4_recall_delegation(inode, -ntohl(res)); 102 trace_nfs4_cb_recall(cps->clp, &args->fh, inode,
103 &args->stateid, -ntohl(res));
100 iput(inode); 104 iput(inode);
101out: 105out:
102 dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); 106 dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
@@ -160,6 +164,22 @@ static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
160 return lo; 164 return lo;
161} 165}
162 166
167/*
168 * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing)
169 */
170static bool pnfs_check_stateid_sequence(struct pnfs_layout_hdr *lo,
171 const nfs4_stateid *new)
172{
173 u32 oldseq, newseq;
174
175 oldseq = be32_to_cpu(lo->plh_stateid.seqid);
176 newseq = be32_to_cpu(new->seqid);
177
178 if (newseq > oldseq + 1)
179 return false;
180 return true;
181}
182
163static u32 initiate_file_draining(struct nfs_client *clp, 183static u32 initiate_file_draining(struct nfs_client *clp,
164 struct cb_layoutrecallargs *args) 184 struct cb_layoutrecallargs *args)
165{ 185{
@@ -169,34 +189,52 @@ static u32 initiate_file_draining(struct nfs_client *clp,
169 LIST_HEAD(free_me_list); 189 LIST_HEAD(free_me_list);
170 190
171 lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid); 191 lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid);
172 if (!lo) 192 if (!lo) {
193 trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL,
194 &args->cbl_stateid, -rv);
173 goto out; 195 goto out;
196 }
174 197
175 ino = lo->plh_inode; 198 ino = lo->plh_inode;
176 199
177 spin_lock(&ino->i_lock); 200 spin_lock(&ino->i_lock);
201 if (!pnfs_check_stateid_sequence(lo, &args->cbl_stateid)) {
202 rv = NFS4ERR_DELAY;
203 goto unlock;
204 }
178 pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); 205 pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
179 spin_unlock(&ino->i_lock); 206 spin_unlock(&ino->i_lock);
180 207
181 pnfs_layoutcommit_inode(ino, false); 208 pnfs_layoutcommit_inode(ino, false);
182 209
183 spin_lock(&ino->i_lock); 210 spin_lock(&ino->i_lock);
184 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 211 /*
185 pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, 212 * Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return)
186 &args->cbl_range)) { 213 */
214 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
187 rv = NFS4ERR_DELAY; 215 rv = NFS4ERR_DELAY;
188 goto unlock; 216 goto unlock;
189 } 217 }
190 218
219 if (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
220 &args->cbl_range)) {
221 rv = NFS4_OK;
222 goto unlock;
223 }
224
191 if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { 225 if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
192 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, 226 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
193 &args->cbl_range); 227 &args->cbl_range);
194 } 228 }
229 pnfs_mark_layout_returned_if_empty(lo);
195unlock: 230unlock:
196 spin_unlock(&ino->i_lock); 231 spin_unlock(&ino->i_lock);
197 pnfs_free_lseg_list(&free_me_list); 232 pnfs_free_lseg_list(&free_me_list);
233 /* Free all lsegs that are attached to commit buckets */
234 nfs_commit_inode(ino, 0);
198 pnfs_put_layout_hdr(lo); 235 pnfs_put_layout_hdr(lo);
199 trace_nfs4_cb_layoutrecall_inode(clp, &args->cbl_fh, ino, -rv); 236 trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino,
237 &args->cbl_stateid, -rv);
200 iput(ino); 238 iput(ino);
201out: 239out:
202 return rv; 240 return rv;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8a0530921685..c82a21228a34 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2431,6 +2431,20 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
2431} 2431}
2432EXPORT_SYMBOL_GPL(nfs_may_open); 2432EXPORT_SYMBOL_GPL(nfs_may_open);
2433 2433
2434static int nfs_execute_ok(struct inode *inode, int mask)
2435{
2436 struct nfs_server *server = NFS_SERVER(inode);
2437 int ret;
2438
2439 if (mask & MAY_NOT_BLOCK)
2440 ret = nfs_revalidate_inode_rcu(server, inode);
2441 else
2442 ret = nfs_revalidate_inode(server, inode);
2443 if (ret == 0 && !execute_ok(inode))
2444 ret = -EACCES;
2445 return ret;
2446}
2447
2434int nfs_permission(struct inode *inode, int mask) 2448int nfs_permission(struct inode *inode, int mask)
2435{ 2449{
2436 struct rpc_cred *cred; 2450 struct rpc_cred *cred;
@@ -2448,6 +2462,9 @@ int nfs_permission(struct inode *inode, int mask)
2448 case S_IFLNK: 2462 case S_IFLNK:
2449 goto out; 2463 goto out;
2450 case S_IFREG: 2464 case S_IFREG:
2465 if ((mask & MAY_OPEN) &&
2466 nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
2467 return 0;
2451 break; 2468 break;
2452 case S_IFDIR: 2469 case S_IFDIR:
2453 /* 2470 /*
@@ -2480,8 +2497,8 @@ force_lookup:
2480 res = PTR_ERR(cred); 2497 res = PTR_ERR(cred);
2481 } 2498 }
2482out: 2499out:
2483 if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) 2500 if (!res && (mask & MAY_EXEC))
2484 res = -EACCES; 2501 res = nfs_execute_ok(inode, mask);
2485 2502
2486 dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n", 2503 dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
2487 inode->i_sb->s_id, inode->i_ino, mask, res); 2504 inode->i_sb->s_id, inode->i_ino, mask, res);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4b1d08f56aba..7ab7ec9f4eed 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -117,12 +117,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
117 return atomic_dec_and_test(&dreq->io_count); 117 return atomic_dec_and_test(&dreq->io_count);
118} 118}
119 119
120void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq)
121{
122 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
123}
124EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes);
125
126static void 120static void
127nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) 121nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
128{ 122{
@@ -670,6 +664,10 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
670 664
671 req = nfs_list_entry(reqs.next); 665 req = nfs_list_entry(reqs.next);
672 nfs_direct_setup_mirroring(dreq, &desc, req); 666 nfs_direct_setup_mirroring(dreq, &desc, req);
667 if (desc.pg_error < 0) {
668 list_splice_init(&reqs, &failed);
669 goto out_failed;
670 }
673 671
674 list_for_each_entry_safe(req, tmp, &reqs, wb_list) { 672 list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
675 if (!nfs_pageio_add_request(&desc, req)) { 673 if (!nfs_pageio_add_request(&desc, req)) {
@@ -677,13 +675,17 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
677 nfs_list_add_request(req, &failed); 675 nfs_list_add_request(req, &failed);
678 spin_lock(cinfo.lock); 676 spin_lock(cinfo.lock);
679 dreq->flags = 0; 677 dreq->flags = 0;
680 dreq->error = -EIO; 678 if (desc.pg_error < 0)
679 dreq->error = desc.pg_error;
680 else
681 dreq->error = -EIO;
681 spin_unlock(cinfo.lock); 682 spin_unlock(cinfo.lock);
682 } 683 }
683 nfs_release_request(req); 684 nfs_release_request(req);
684 } 685 }
685 nfs_pageio_complete(&desc); 686 nfs_pageio_complete(&desc);
686 687
688out_failed:
687 while (!list_empty(&failed)) { 689 while (!list_empty(&failed)) {
688 req = nfs_list_entry(failed.next); 690 req = nfs_list_entry(failed.next);
689 nfs_list_remove_request(req); 691 nfs_list_remove_request(req);
@@ -727,14 +729,20 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
727 nfs_direct_write_complete(dreq, data->inode); 729 nfs_direct_write_complete(dreq, data->inode);
728} 730}
729 731
730static void nfs_direct_error_cleanup(struct nfs_inode *nfsi) 732static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
733 struct nfs_page *req)
731{ 734{
732 /* There is no lock to clear */ 735 struct nfs_direct_req *dreq = cinfo->dreq;
736
737 spin_lock(&dreq->lock);
738 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
739 spin_unlock(&dreq->lock);
740 nfs_mark_request_commit(req, NULL, cinfo, 0);
733} 741}
734 742
735static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { 743static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
736 .completion = nfs_direct_commit_complete, 744 .completion = nfs_direct_commit_complete,
737 .error_cleanup = nfs_direct_error_cleanup, 745 .resched_write = nfs_direct_resched_write,
738}; 746};
739 747
740static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 748static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
@@ -839,10 +847,25 @@ static void nfs_write_sync_pgio_error(struct list_head *head)
839 } 847 }
840} 848}
841 849
850static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
851{
852 struct nfs_direct_req *dreq = hdr->dreq;
853
854 spin_lock(&dreq->lock);
855 if (dreq->error == 0) {
856 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
857 /* fake unstable write to let common nfs resend pages */
858 hdr->verf.committed = NFS_UNSTABLE;
859 hdr->good_bytes = hdr->args.count;
860 }
861 spin_unlock(&dreq->lock);
862}
863
842static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { 864static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
843 .error_cleanup = nfs_write_sync_pgio_error, 865 .error_cleanup = nfs_write_sync_pgio_error,
844 .init_hdr = nfs_direct_pgio_init, 866 .init_hdr = nfs_direct_pgio_init,
845 .completion = nfs_direct_write_completion, 867 .completion = nfs_direct_write_completion,
868 .reschedule_io = nfs_direct_write_reschedule_io,
846}; 869};
847 870
848 871
@@ -900,6 +923,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
900 } 923 }
901 924
902 nfs_direct_setup_mirroring(dreq, &desc, req); 925 nfs_direct_setup_mirroring(dreq, &desc, req);
926 if (desc.pg_error < 0) {
927 nfs_free_request(req);
928 result = desc.pg_error;
929 break;
930 }
903 931
904 nfs_lock_request(req); 932 nfs_lock_request(req);
905 req->wb_index = pos >> PAGE_SHIFT; 933 req->wb_index = pos >> PAGE_SHIFT;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 93e236429c5d..4ef8f5addcad 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -514,7 +514,7 @@ static void nfs_check_dirty_writeback(struct page *page,
514 * so it will not block due to pages that will shortly be freeable. 514 * so it will not block due to pages that will shortly be freeable.
515 */ 515 */
516 nfsi = NFS_I(mapping->host); 516 nfsi = NFS_I(mapping->host);
517 if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { 517 if (atomic_read(&nfsi->commit_info.rpcs_out)) {
518 *writeback = true; 518 *writeback = true;
519 return; 519 return;
520 } 520 }
@@ -545,7 +545,7 @@ static int nfs_launder_page(struct page *page)
545 inode->i_ino, (long long)page_offset(page)); 545 inode->i_ino, (long long)page_offset(page));
546 546
547 nfs_fscache_wait_on_page_write(nfsi, page); 547 nfs_fscache_wait_on_page_write(nfsi, page);
548 return nfs_wb_page(inode, page); 548 return nfs_wb_launder_page(inode, page);
549} 549}
550 550
551static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, 551static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
@@ -756,7 +756,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
756 756
757 l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); 757 l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
758 if (!IS_ERR(l_ctx)) { 758 if (!IS_ERR(l_ctx)) {
759 status = nfs_iocounter_wait(&l_ctx->io_count); 759 status = nfs_iocounter_wait(l_ctx);
760 nfs_put_lock_context(l_ctx); 760 nfs_put_lock_context(l_ctx);
761 if (status < 0) 761 if (status < 0)
762 return status; 762 return status;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 02ec07973bc4..bb1f4e7a3270 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -202,6 +202,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
202 task->tk_status); 202 task->tk_status);
203 nfs4_mark_deviceid_unavailable(devid); 203 nfs4_mark_deviceid_unavailable(devid);
204 pnfs_error_mark_layout_for_return(inode, lseg); 204 pnfs_error_mark_layout_for_return(inode, lseg);
205 pnfs_set_lo_fail(lseg);
205 rpc_wake_up(&tbl->slot_tbl_waitq); 206 rpc_wake_up(&tbl->slot_tbl_waitq);
206 /* fall through */ 207 /* fall through */
207 default: 208 default:
@@ -883,13 +884,19 @@ static void
883filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, 884filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
884 struct nfs_page *req) 885 struct nfs_page *req)
885{ 886{
886 if (!pgio->pg_lseg) 887 if (!pgio->pg_lseg) {
887 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 888 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
888 req->wb_context, 889 req->wb_context,
889 0, 890 0,
890 NFS4_MAX_UINT64, 891 NFS4_MAX_UINT64,
891 IOMODE_READ, 892 IOMODE_READ,
892 GFP_KERNEL); 893 GFP_KERNEL);
894 if (IS_ERR(pgio->pg_lseg)) {
895 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
896 pgio->pg_lseg = NULL;
897 return;
898 }
899 }
893 /* If no lseg, fall back to read through mds */ 900 /* If no lseg, fall back to read through mds */
894 if (pgio->pg_lseg == NULL) 901 if (pgio->pg_lseg == NULL)
895 nfs_pageio_reset_read_mds(pgio); 902 nfs_pageio_reset_read_mds(pgio);
@@ -902,13 +909,20 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
902 struct nfs_commit_info cinfo; 909 struct nfs_commit_info cinfo;
903 int status; 910 int status;
904 911
905 if (!pgio->pg_lseg) 912 if (!pgio->pg_lseg) {
906 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 913 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
907 req->wb_context, 914 req->wb_context,
908 0, 915 0,
909 NFS4_MAX_UINT64, 916 NFS4_MAX_UINT64,
910 IOMODE_RW, 917 IOMODE_RW,
911 GFP_NOFS); 918 GFP_NOFS);
919 if (IS_ERR(pgio->pg_lseg)) {
920 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
921 pgio->pg_lseg = NULL;
922 return;
923 }
924 }
925
912 /* If no lseg, fall back to write through mds */ 926 /* If no lseg, fall back to write through mds */
913 if (pgio->pg_lseg == NULL) 927 if (pgio->pg_lseg == NULL)
914 goto out_mds; 928 goto out_mds;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 03516c80855a..6594e9f903a0 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -145,7 +145,7 @@ static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
145 return false; 145 return false;
146 for (i = 0; i < m1->fh_versions_cnt; i++) { 146 for (i = 0; i < m1->fh_versions_cnt; i++) {
147 bool found_fh = false; 147 bool found_fh = false;
148 for (j = 0; j < m2->fh_versions_cnt; i++) { 148 for (j = 0; j < m2->fh_versions_cnt; j++) {
149 if (nfs_compare_fh(&m1->fh_versions[i], 149 if (nfs_compare_fh(&m1->fh_versions[i],
150 &m2->fh_versions[j]) == 0) { 150 &m2->fh_versions[j]) == 0) {
151 found_fh = true; 151 found_fh = true;
@@ -505,9 +505,17 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
505 } 505 }
506 506
507 p = xdr_inline_decode(&stream, 4); 507 p = xdr_inline_decode(&stream, 4);
508 if (p) 508 if (!p)
509 fls->flags = be32_to_cpup(p); 509 goto out_sort_mirrors;
510 fls->flags = be32_to_cpup(p);
511
512 p = xdr_inline_decode(&stream, 4);
513 if (!p)
514 goto out_sort_mirrors;
515 for (i=0; i < fls->mirror_array_cnt; i++)
516 fls->mirror_array[i]->report_interval = be32_to_cpup(p);
510 517
518out_sort_mirrors:
511 ff_layout_sort_mirrors(fls); 519 ff_layout_sort_mirrors(fls);
512 rc = ff_layout_check_layout(lgr); 520 rc = ff_layout_check_layout(lgr);
513 if (rc) 521 if (rc)
@@ -603,7 +611,9 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
603 mirror->start_time = now; 611 mirror->start_time = now;
604 if (ktime_equal(mirror->last_report_time, notime)) 612 if (ktime_equal(mirror->last_report_time, notime))
605 mirror->last_report_time = now; 613 mirror->last_report_time = now;
606 if (layoutstats_timer != 0) 614 if (mirror->report_interval != 0)
615 report_interval = (s64)mirror->report_interval * 1000LL;
616 else if (layoutstats_timer != 0)
607 report_interval = (s64)layoutstats_timer * 1000LL; 617 report_interval = (s64)layoutstats_timer * 1000LL;
608 if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= 618 if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
609 report_interval) { 619 report_interval) {
@@ -785,13 +795,19 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
785 int ds_idx; 795 int ds_idx;
786 796
787 /* Use full layout for now */ 797 /* Use full layout for now */
788 if (!pgio->pg_lseg) 798 if (!pgio->pg_lseg) {
789 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 799 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
790 req->wb_context, 800 req->wb_context,
791 0, 801 0,
792 NFS4_MAX_UINT64, 802 NFS4_MAX_UINT64,
793 IOMODE_READ, 803 IOMODE_READ,
794 GFP_KERNEL); 804 GFP_KERNEL);
805 if (IS_ERR(pgio->pg_lseg)) {
806 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
807 pgio->pg_lseg = NULL;
808 return;
809 }
810 }
795 /* If no lseg, fall back to read through mds */ 811 /* If no lseg, fall back to read through mds */
796 if (pgio->pg_lseg == NULL) 812 if (pgio->pg_lseg == NULL)
797 goto out_mds; 813 goto out_mds;
@@ -825,13 +841,19 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
825 int i; 841 int i;
826 int status; 842 int status;
827 843
828 if (!pgio->pg_lseg) 844 if (!pgio->pg_lseg) {
829 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 845 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
830 req->wb_context, 846 req->wb_context,
831 0, 847 0,
832 NFS4_MAX_UINT64, 848 NFS4_MAX_UINT64,
833 IOMODE_RW, 849 IOMODE_RW,
834 GFP_NOFS); 850 GFP_NOFS);
851 if (IS_ERR(pgio->pg_lseg)) {
852 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
853 pgio->pg_lseg = NULL;
854 return;
855 }
856 }
835 /* If no lseg, fall back to write through mds */ 857 /* If no lseg, fall back to write through mds */
836 if (pgio->pg_lseg == NULL) 858 if (pgio->pg_lseg == NULL)
837 goto out_mds; 859 goto out_mds;
@@ -867,18 +889,25 @@ static unsigned int
867ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, 889ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
868 struct nfs_page *req) 890 struct nfs_page *req)
869{ 891{
870 if (!pgio->pg_lseg) 892 if (!pgio->pg_lseg) {
871 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 893 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
872 req->wb_context, 894 req->wb_context,
873 0, 895 0,
874 NFS4_MAX_UINT64, 896 NFS4_MAX_UINT64,
875 IOMODE_RW, 897 IOMODE_RW,
876 GFP_NOFS); 898 GFP_NOFS);
899 if (IS_ERR(pgio->pg_lseg)) {
900 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
901 pgio->pg_lseg = NULL;
902 goto out;
903 }
904 }
877 if (pgio->pg_lseg) 905 if (pgio->pg_lseg)
878 return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); 906 return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg);
879 907
880 /* no lseg means that pnfs is not in use, so no mirroring here */ 908 /* no lseg means that pnfs is not in use, so no mirroring here */
881 nfs_pageio_reset_write_mds(pgio); 909 nfs_pageio_reset_write_mds(pgio);
910out:
882 return 1; 911 return 1;
883} 912}
884 913
@@ -912,18 +941,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
912 hdr->args.count, 941 hdr->args.count,
913 (unsigned long long)hdr->args.offset); 942 (unsigned long long)hdr->args.offset);
914 943
915 if (!hdr->dreq) { 944 hdr->completion_ops->reschedule_io(hdr);
916 struct nfs_open_context *ctx;
917
918 ctx = nfs_list_entry(hdr->pages.next)->wb_context;
919 set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
920 hdr->completion_ops->error_cleanup(&hdr->pages);
921 } else {
922 nfs_direct_set_resched_writes(hdr->dreq);
923 /* fake unstable write to let common nfs resend pages */
924 hdr->verf.committed = NFS_UNSTABLE;
925 hdr->good_bytes = hdr->args.count;
926 }
927 return; 945 return;
928 } 946 }
929 947
@@ -1101,7 +1119,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
1101 return -NFS4ERR_RESET_TO_PNFS; 1119 return -NFS4ERR_RESET_TO_PNFS;
1102out_retry: 1120out_retry:
1103 task->tk_status = 0; 1121 task->tk_status = 0;
1104 rpc_restart_call(task); 1122 rpc_restart_call_prepare(task);
1105 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); 1123 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
1106 return -EAGAIN; 1124 return -EAGAIN;
1107} 1125}
@@ -1159,6 +1177,14 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
1159 } 1177 }
1160 } 1178 }
1161 1179
1180 switch (status) {
1181 case NFS4ERR_DELAY:
1182 case NFS4ERR_GRACE:
1183 return;
1184 default:
1185 break;
1186 }
1187
1162 mirror = FF_LAYOUT_COMP(lseg, idx); 1188 mirror = FF_LAYOUT_COMP(lseg, idx);
1163 err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), 1189 err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
1164 mirror, offset, length, status, opnum, 1190 mirror, offset, length, status, opnum,
@@ -1242,14 +1268,31 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx)
1242 return ff_layout_test_devid_unavailable(node); 1268 return ff_layout_test_devid_unavailable(node);
1243} 1269}
1244 1270
1245static int ff_layout_read_prepare_common(struct rpc_task *task, 1271static void ff_layout_read_record_layoutstats_start(struct rpc_task *task,
1246 struct nfs_pgio_header *hdr) 1272 struct nfs_pgio_header *hdr)
1247{ 1273{
1274 if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
1275 return;
1248 nfs4_ff_layout_stat_io_start_read(hdr->inode, 1276 nfs4_ff_layout_stat_io_start_read(hdr->inode,
1249 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), 1277 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1250 hdr->args.count, 1278 hdr->args.count,
1251 task->tk_start); 1279 task->tk_start);
1280}
1281
1282static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
1283 struct nfs_pgio_header *hdr)
1284{
1285 if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
1286 return;
1287 nfs4_ff_layout_stat_io_end_read(task,
1288 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1289 hdr->args.count,
1290 hdr->res.count);
1291}
1252 1292
1293static int ff_layout_read_prepare_common(struct rpc_task *task,
1294 struct nfs_pgio_header *hdr)
1295{
1253 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { 1296 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
1254 rpc_exit(task, -EIO); 1297 rpc_exit(task, -EIO);
1255 return -EIO; 1298 return -EIO;
@@ -1265,6 +1308,7 @@ static int ff_layout_read_prepare_common(struct rpc_task *task,
1265 } 1308 }
1266 hdr->pgio_done_cb = ff_layout_read_done_cb; 1309 hdr->pgio_done_cb = ff_layout_read_done_cb;
1267 1310
1311 ff_layout_read_record_layoutstats_start(task, hdr);
1268 return 0; 1312 return 0;
1269} 1313}
1270 1314
@@ -1323,10 +1367,6 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data)
1323 1367
1324 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 1368 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
1325 1369
1326 nfs4_ff_layout_stat_io_end_read(task,
1327 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1328 hdr->args.count, hdr->res.count);
1329
1330 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && 1370 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
1331 task->tk_status == 0) { 1371 task->tk_status == 0) {
1332 nfs4_sequence_done(task, &hdr->res.seq_res); 1372 nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1341,10 +1381,20 @@ static void ff_layout_read_count_stats(struct rpc_task *task, void *data)
1341{ 1381{
1342 struct nfs_pgio_header *hdr = data; 1382 struct nfs_pgio_header *hdr = data;
1343 1383
1384 ff_layout_read_record_layoutstats_done(task, hdr);
1344 rpc_count_iostats_metrics(task, 1385 rpc_count_iostats_metrics(task,
1345 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); 1386 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]);
1346} 1387}
1347 1388
1389static void ff_layout_read_release(void *data)
1390{
1391 struct nfs_pgio_header *hdr = data;
1392
1393 ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
1394 pnfs_generic_rw_release(data);
1395}
1396
1397
1348static int ff_layout_write_done_cb(struct rpc_task *task, 1398static int ff_layout_write_done_cb(struct rpc_task *task,
1349 struct nfs_pgio_header *hdr) 1399 struct nfs_pgio_header *hdr)
1350{ 1400{
@@ -1362,15 +1412,12 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
1362 1412
1363 switch (err) { 1413 switch (err) {
1364 case -NFS4ERR_RESET_TO_PNFS: 1414 case -NFS4ERR_RESET_TO_PNFS:
1365 pnfs_set_retry_layoutget(hdr->lseg->pls_layout);
1366 ff_layout_reset_write(hdr, true); 1415 ff_layout_reset_write(hdr, true);
1367 return task->tk_status; 1416 return task->tk_status;
1368 case -NFS4ERR_RESET_TO_MDS: 1417 case -NFS4ERR_RESET_TO_MDS:
1369 pnfs_clear_retry_layoutget(hdr->lseg->pls_layout);
1370 ff_layout_reset_write(hdr, false); 1418 ff_layout_reset_write(hdr, false);
1371 return task->tk_status; 1419 return task->tk_status;
1372 case -EAGAIN: 1420 case -EAGAIN:
1373 rpc_restart_call_prepare(task);
1374 return -EAGAIN; 1421 return -EAGAIN;
1375 } 1422 }
1376 1423
@@ -1402,11 +1449,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
1402 1449
1403 switch (err) { 1450 switch (err) {
1404 case -NFS4ERR_RESET_TO_PNFS: 1451 case -NFS4ERR_RESET_TO_PNFS:
1405 pnfs_set_retry_layoutget(data->lseg->pls_layout);
1406 pnfs_generic_prepare_to_resend_writes(data); 1452 pnfs_generic_prepare_to_resend_writes(data);
1407 return -EAGAIN; 1453 return -EAGAIN;
1408 case -NFS4ERR_RESET_TO_MDS: 1454 case -NFS4ERR_RESET_TO_MDS:
1409 pnfs_clear_retry_layoutget(data->lseg->pls_layout);
1410 pnfs_generic_prepare_to_resend_writes(data); 1455 pnfs_generic_prepare_to_resend_writes(data);
1411 return -EAGAIN; 1456 return -EAGAIN;
1412 case -EAGAIN: 1457 case -EAGAIN:
@@ -1421,14 +1466,31 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
1421 return 0; 1466 return 0;
1422} 1467}
1423 1468
1424static int ff_layout_write_prepare_common(struct rpc_task *task, 1469static void ff_layout_write_record_layoutstats_start(struct rpc_task *task,
1425 struct nfs_pgio_header *hdr) 1470 struct nfs_pgio_header *hdr)
1426{ 1471{
1472 if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
1473 return;
1427 nfs4_ff_layout_stat_io_start_write(hdr->inode, 1474 nfs4_ff_layout_stat_io_start_write(hdr->inode,
1428 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), 1475 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1429 hdr->args.count, 1476 hdr->args.count,
1430 task->tk_start); 1477 task->tk_start);
1478}
1479
1480static void ff_layout_write_record_layoutstats_done(struct rpc_task *task,
1481 struct nfs_pgio_header *hdr)
1482{
1483 if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
1484 return;
1485 nfs4_ff_layout_stat_io_end_write(task,
1486 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1487 hdr->args.count, hdr->res.count,
1488 hdr->res.verf->committed);
1489}
1431 1490
1491static int ff_layout_write_prepare_common(struct rpc_task *task,
1492 struct nfs_pgio_header *hdr)
1493{
1432 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { 1494 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
1433 rpc_exit(task, -EIO); 1495 rpc_exit(task, -EIO);
1434 return -EIO; 1496 return -EIO;
@@ -1445,6 +1507,7 @@ static int ff_layout_write_prepare_common(struct rpc_task *task,
1445 return -EAGAIN; 1507 return -EAGAIN;
1446 } 1508 }
1447 1509
1510 ff_layout_write_record_layoutstats_start(task, hdr);
1448 return 0; 1511 return 0;
1449} 1512}
1450 1513
@@ -1480,11 +1543,6 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data)
1480{ 1543{
1481 struct nfs_pgio_header *hdr = data; 1544 struct nfs_pgio_header *hdr = data;
1482 1545
1483 nfs4_ff_layout_stat_io_end_write(task,
1484 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1485 hdr->args.count, hdr->res.count,
1486 hdr->res.verf->committed);
1487
1488 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && 1546 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
1489 task->tk_status == 0) { 1547 task->tk_status == 0) {
1490 nfs4_sequence_done(task, &hdr->res.seq_res); 1548 nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1499,18 +1557,53 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
1499{ 1557{
1500 struct nfs_pgio_header *hdr = data; 1558 struct nfs_pgio_header *hdr = data;
1501 1559
1560 ff_layout_write_record_layoutstats_done(task, hdr);
1502 rpc_count_iostats_metrics(task, 1561 rpc_count_iostats_metrics(task,
1503 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); 1562 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
1504} 1563}
1505 1564
1506static void ff_layout_commit_prepare_common(struct rpc_task *task, 1565static void ff_layout_write_release(void *data)
1566{
1567 struct nfs_pgio_header *hdr = data;
1568
1569 ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
1570 pnfs_generic_rw_release(data);
1571}
1572
1573static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task,
1507 struct nfs_commit_data *cdata) 1574 struct nfs_commit_data *cdata)
1508{ 1575{
1576 if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags))
1577 return;
1509 nfs4_ff_layout_stat_io_start_write(cdata->inode, 1578 nfs4_ff_layout_stat_io_start_write(cdata->inode,
1510 FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), 1579 FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
1511 0, task->tk_start); 1580 0, task->tk_start);
1512} 1581}
1513 1582
1583static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task,
1584 struct nfs_commit_data *cdata)
1585{
1586 struct nfs_page *req;
1587 __u64 count = 0;
1588
1589 if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags))
1590 return;
1591
1592 if (task->tk_status == 0) {
1593 list_for_each_entry(req, &cdata->pages, wb_list)
1594 count += req->wb_bytes;
1595 }
1596 nfs4_ff_layout_stat_io_end_write(task,
1597 FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
1598 count, count, NFS_FILE_SYNC);
1599}
1600
1601static void ff_layout_commit_prepare_common(struct rpc_task *task,
1602 struct nfs_commit_data *cdata)
1603{
1604 ff_layout_commit_record_layoutstats_start(task, cdata);
1605}
1606
1514static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) 1607static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
1515{ 1608{
1516 ff_layout_commit_prepare_common(task, data); 1609 ff_layout_commit_prepare_common(task, data);
@@ -1531,19 +1624,6 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data)
1531 1624
1532static void ff_layout_commit_done(struct rpc_task *task, void *data) 1625static void ff_layout_commit_done(struct rpc_task *task, void *data)
1533{ 1626{
1534 struct nfs_commit_data *cdata = data;
1535 struct nfs_page *req;
1536 __u64 count = 0;
1537
1538 if (task->tk_status == 0) {
1539 list_for_each_entry(req, &cdata->pages, wb_list)
1540 count += req->wb_bytes;
1541 }
1542
1543 nfs4_ff_layout_stat_io_end_write(task,
1544 FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
1545 count, count, NFS_FILE_SYNC);
1546
1547 pnfs_generic_write_commit_done(task, data); 1627 pnfs_generic_write_commit_done(task, data);
1548} 1628}
1549 1629
@@ -1551,50 +1631,59 @@ static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
1551{ 1631{
1552 struct nfs_commit_data *cdata = data; 1632 struct nfs_commit_data *cdata = data;
1553 1633
1634 ff_layout_commit_record_layoutstats_done(task, cdata);
1554 rpc_count_iostats_metrics(task, 1635 rpc_count_iostats_metrics(task,
1555 &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); 1636 &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]);
1556} 1637}
1557 1638
1639static void ff_layout_commit_release(void *data)
1640{
1641 struct nfs_commit_data *cdata = data;
1642
1643 ff_layout_commit_record_layoutstats_done(&cdata->task, cdata);
1644 pnfs_generic_commit_release(data);
1645}
1646
1558static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { 1647static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
1559 .rpc_call_prepare = ff_layout_read_prepare_v3, 1648 .rpc_call_prepare = ff_layout_read_prepare_v3,
1560 .rpc_call_done = ff_layout_read_call_done, 1649 .rpc_call_done = ff_layout_read_call_done,
1561 .rpc_count_stats = ff_layout_read_count_stats, 1650 .rpc_count_stats = ff_layout_read_count_stats,
1562 .rpc_release = pnfs_generic_rw_release, 1651 .rpc_release = ff_layout_read_release,
1563}; 1652};
1564 1653
1565static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { 1654static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
1566 .rpc_call_prepare = ff_layout_read_prepare_v4, 1655 .rpc_call_prepare = ff_layout_read_prepare_v4,
1567 .rpc_call_done = ff_layout_read_call_done, 1656 .rpc_call_done = ff_layout_read_call_done,
1568 .rpc_count_stats = ff_layout_read_count_stats, 1657 .rpc_count_stats = ff_layout_read_count_stats,
1569 .rpc_release = pnfs_generic_rw_release, 1658 .rpc_release = ff_layout_read_release,
1570}; 1659};
1571 1660
1572static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { 1661static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
1573 .rpc_call_prepare = ff_layout_write_prepare_v3, 1662 .rpc_call_prepare = ff_layout_write_prepare_v3,
1574 .rpc_call_done = ff_layout_write_call_done, 1663 .rpc_call_done = ff_layout_write_call_done,
1575 .rpc_count_stats = ff_layout_write_count_stats, 1664 .rpc_count_stats = ff_layout_write_count_stats,
1576 .rpc_release = pnfs_generic_rw_release, 1665 .rpc_release = ff_layout_write_release,
1577}; 1666};
1578 1667
1579static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { 1668static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
1580 .rpc_call_prepare = ff_layout_write_prepare_v4, 1669 .rpc_call_prepare = ff_layout_write_prepare_v4,
1581 .rpc_call_done = ff_layout_write_call_done, 1670 .rpc_call_done = ff_layout_write_call_done,
1582 .rpc_count_stats = ff_layout_write_count_stats, 1671 .rpc_count_stats = ff_layout_write_count_stats,
1583 .rpc_release = pnfs_generic_rw_release, 1672 .rpc_release = ff_layout_write_release,
1584}; 1673};
1585 1674
1586static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { 1675static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = {
1587 .rpc_call_prepare = ff_layout_commit_prepare_v3, 1676 .rpc_call_prepare = ff_layout_commit_prepare_v3,
1588 .rpc_call_done = ff_layout_commit_done, 1677 .rpc_call_done = ff_layout_commit_done,
1589 .rpc_count_stats = ff_layout_commit_count_stats, 1678 .rpc_count_stats = ff_layout_commit_count_stats,
1590 .rpc_release = pnfs_generic_commit_release, 1679 .rpc_release = ff_layout_commit_release,
1591}; 1680};
1592 1681
1593static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { 1682static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = {
1594 .rpc_call_prepare = ff_layout_commit_prepare_v4, 1683 .rpc_call_prepare = ff_layout_commit_prepare_v4,
1595 .rpc_call_done = ff_layout_commit_done, 1684 .rpc_call_done = ff_layout_commit_done,
1596 .rpc_count_stats = ff_layout_commit_count_stats, 1685 .rpc_count_stats = ff_layout_commit_count_stats,
1597 .rpc_release = pnfs_generic_commit_release, 1686 .rpc_release = ff_layout_commit_release,
1598}; 1687};
1599 1688
1600static enum pnfs_try_status 1689static enum pnfs_try_status
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 2bb08bc6aaf0..dd353bb7dc0a 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -85,6 +85,7 @@ struct nfs4_ff_layout_mirror {
85 struct nfs4_ff_layoutstat write_stat; 85 struct nfs4_ff_layoutstat write_stat;
86 ktime_t start_time; 86 ktime_t start_time;
87 ktime_t last_report_time; 87 ktime_t last_report_time;
88 u32 report_interval;
88}; 89};
89 90
90struct nfs4_ff_layout_segment { 91struct nfs4_ff_layout_segment {
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e125e55de86d..bd0327541366 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -429,22 +429,14 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
429 mirror, lseg->pls_range.offset, 429 mirror, lseg->pls_range.offset,
430 lseg->pls_range.length, NFS4ERR_NXIO, 430 lseg->pls_range.length, NFS4ERR_NXIO,
431 OP_ILLEGAL, GFP_NOIO); 431 OP_ILLEGAL, GFP_NOIO);
432 if (fail_return) { 432 if (!fail_return) {
433 pnfs_error_mark_layout_for_return(ino, lseg);
434 if (ff_layout_has_available_ds(lseg))
435 pnfs_set_retry_layoutget(lseg->pls_layout);
436 else
437 pnfs_clear_retry_layoutget(lseg->pls_layout);
438
439 } else {
440 if (ff_layout_has_available_ds(lseg)) 433 if (ff_layout_has_available_ds(lseg))
441 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 434 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
442 &lseg->pls_layout->plh_flags); 435 &lseg->pls_layout->plh_flags);
443 else { 436 else
444 pnfs_error_mark_layout_for_return(ino, lseg); 437 pnfs_error_mark_layout_for_return(ino, lseg);
445 pnfs_clear_retry_layoutget(lseg->pls_layout); 438 } else
446 } 439 pnfs_error_mark_layout_for_return(ino, lseg);
447 }
448 } 440 }
449out_update_creds: 441out_update_creds:
450 if (ff_layout_update_mirror_cred(mirror, ds)) 442 if (ff_layout_update_mirror_cred(mirror, ds))
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bdb4dc7b4ecd..c11e855e0e18 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -71,19 +71,25 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
71 return nfs_fileid_to_ino_t(fattr->fileid); 71 return nfs_fileid_to_ino_t(fattr->fileid);
72} 72}
73 73
74/** 74static int nfs_wait_killable(int mode)
75 * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
76 * @word: long word containing the bit lock
77 */
78int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
79{ 75{
80 freezable_schedule_unsafe(); 76 freezable_schedule_unsafe();
81 if (signal_pending_state(mode, current)) 77 if (signal_pending_state(mode, current))
82 return -ERESTARTSYS; 78 return -ERESTARTSYS;
83 return 0; 79 return 0;
84} 80}
81
82int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
83{
84 return nfs_wait_killable(mode);
85}
85EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); 86EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
86 87
88int nfs_wait_atomic_killable(atomic_t *p)
89{
90 return nfs_wait_killable(TASK_KILLABLE);
91}
92
87/** 93/**
88 * nfs_compat_user_ino64 - returns the user-visible inode number 94 * nfs_compat_user_ino64 - returns the user-visible inode number
89 * @fileid: 64-bit fileid 95 * @fileid: 64-bit fileid
@@ -700,7 +706,7 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
700 l_ctx->lockowner.l_owner = current->files; 706 l_ctx->lockowner.l_owner = current->files;
701 l_ctx->lockowner.l_pid = current->tgid; 707 l_ctx->lockowner.l_pid = current->tgid;
702 INIT_LIST_HEAD(&l_ctx->list); 708 INIT_LIST_HEAD(&l_ctx->list);
703 nfs_iocounter_init(&l_ctx->io_count); 709 atomic_set(&l_ctx->io_count, 0);
704} 710}
705 711
706static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) 712static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
@@ -913,6 +919,12 @@ void nfs_file_clear_open_context(struct file *filp)
913 if (ctx) { 919 if (ctx) {
914 struct inode *inode = d_inode(ctx->dentry); 920 struct inode *inode = d_inode(ctx->dentry);
915 921
922 /*
923 * We fatal error on write before. Try to writeback
924 * every page again.
925 */
926 if (ctx->error < 0)
927 invalidate_inode_pages2(inode->i_mapping);
916 filp->private_data = NULL; 928 filp->private_data = NULL;
917 spin_lock(&inode->i_lock); 929 spin_lock(&inode->i_lock);
918 list_move_tail(&ctx->list, &NFS_I(inode)->open_files); 930 list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
@@ -1663,6 +1675,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1663 unsigned long invalid = 0; 1675 unsigned long invalid = 0;
1664 unsigned long now = jiffies; 1676 unsigned long now = jiffies;
1665 unsigned long save_cache_validity; 1677 unsigned long save_cache_validity;
1678 bool cache_revalidated = true;
1666 1679
1667 dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", 1680 dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
1668 __func__, inode->i_sb->s_id, inode->i_ino, 1681 __func__, inode->i_sb->s_id, inode->i_ino,
@@ -1724,22 +1737,28 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1724 nfs_force_lookup_revalidate(inode); 1737 nfs_force_lookup_revalidate(inode);
1725 inode->i_version = fattr->change_attr; 1738 inode->i_version = fattr->change_attr;
1726 } 1739 }
1727 } else 1740 } else {
1728 nfsi->cache_validity |= save_cache_validity; 1741 nfsi->cache_validity |= save_cache_validity;
1742 cache_revalidated = false;
1743 }
1729 1744
1730 if (fattr->valid & NFS_ATTR_FATTR_MTIME) { 1745 if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
1731 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 1746 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1732 } else if (server->caps & NFS_CAP_MTIME) 1747 } else if (server->caps & NFS_CAP_MTIME) {
1733 nfsi->cache_validity |= save_cache_validity & 1748 nfsi->cache_validity |= save_cache_validity &
1734 (NFS_INO_INVALID_ATTR 1749 (NFS_INO_INVALID_ATTR
1735 | NFS_INO_REVAL_FORCED); 1750 | NFS_INO_REVAL_FORCED);
1751 cache_revalidated = false;
1752 }
1736 1753
1737 if (fattr->valid & NFS_ATTR_FATTR_CTIME) { 1754 if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
1738 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); 1755 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1739 } else if (server->caps & NFS_CAP_CTIME) 1756 } else if (server->caps & NFS_CAP_CTIME) {
1740 nfsi->cache_validity |= save_cache_validity & 1757 nfsi->cache_validity |= save_cache_validity &
1741 (NFS_INO_INVALID_ATTR 1758 (NFS_INO_INVALID_ATTR
1742 | NFS_INO_REVAL_FORCED); 1759 | NFS_INO_REVAL_FORCED);
1760 cache_revalidated = false;
1761 }
1743 1762
1744 /* Check if our cached file size is stale */ 1763 /* Check if our cached file size is stale */
1745 if (fattr->valid & NFS_ATTR_FATTR_SIZE) { 1764 if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
@@ -1759,19 +1778,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1759 (long long)cur_isize, 1778 (long long)cur_isize,
1760 (long long)new_isize); 1779 (long long)new_isize);
1761 } 1780 }
1762 } else 1781 } else {
1763 nfsi->cache_validity |= save_cache_validity & 1782 nfsi->cache_validity |= save_cache_validity &
1764 (NFS_INO_INVALID_ATTR 1783 (NFS_INO_INVALID_ATTR
1765 | NFS_INO_REVAL_PAGECACHE 1784 | NFS_INO_REVAL_PAGECACHE
1766 | NFS_INO_REVAL_FORCED); 1785 | NFS_INO_REVAL_FORCED);
1786 cache_revalidated = false;
1787 }
1767 1788
1768 1789
1769 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 1790 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
1770 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); 1791 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
1771 else if (server->caps & NFS_CAP_ATIME) 1792 else if (server->caps & NFS_CAP_ATIME) {
1772 nfsi->cache_validity |= save_cache_validity & 1793 nfsi->cache_validity |= save_cache_validity &
1773 (NFS_INO_INVALID_ATIME 1794 (NFS_INO_INVALID_ATIME
1774 | NFS_INO_REVAL_FORCED); 1795 | NFS_INO_REVAL_FORCED);
1796 cache_revalidated = false;
1797 }
1775 1798
1776 if (fattr->valid & NFS_ATTR_FATTR_MODE) { 1799 if (fattr->valid & NFS_ATTR_FATTR_MODE) {
1777 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { 1800 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
@@ -1780,36 +1803,42 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1780 inode->i_mode = newmode; 1803 inode->i_mode = newmode;
1781 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1804 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1782 } 1805 }
1783 } else if (server->caps & NFS_CAP_MODE) 1806 } else if (server->caps & NFS_CAP_MODE) {
1784 nfsi->cache_validity |= save_cache_validity & 1807 nfsi->cache_validity |= save_cache_validity &
1785 (NFS_INO_INVALID_ATTR 1808 (NFS_INO_INVALID_ATTR
1786 | NFS_INO_INVALID_ACCESS 1809 | NFS_INO_INVALID_ACCESS
1787 | NFS_INO_INVALID_ACL 1810 | NFS_INO_INVALID_ACL
1788 | NFS_INO_REVAL_FORCED); 1811 | NFS_INO_REVAL_FORCED);
1812 cache_revalidated = false;
1813 }
1789 1814
1790 if (fattr->valid & NFS_ATTR_FATTR_OWNER) { 1815 if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
1791 if (!uid_eq(inode->i_uid, fattr->uid)) { 1816 if (!uid_eq(inode->i_uid, fattr->uid)) {
1792 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1817 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1793 inode->i_uid = fattr->uid; 1818 inode->i_uid = fattr->uid;
1794 } 1819 }
1795 } else if (server->caps & NFS_CAP_OWNER) 1820 } else if (server->caps & NFS_CAP_OWNER) {
1796 nfsi->cache_validity |= save_cache_validity & 1821 nfsi->cache_validity |= save_cache_validity &
1797 (NFS_INO_INVALID_ATTR 1822 (NFS_INO_INVALID_ATTR
1798 | NFS_INO_INVALID_ACCESS 1823 | NFS_INO_INVALID_ACCESS
1799 | NFS_INO_INVALID_ACL 1824 | NFS_INO_INVALID_ACL
1800 | NFS_INO_REVAL_FORCED); 1825 | NFS_INO_REVAL_FORCED);
1826 cache_revalidated = false;
1827 }
1801 1828
1802 if (fattr->valid & NFS_ATTR_FATTR_GROUP) { 1829 if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
1803 if (!gid_eq(inode->i_gid, fattr->gid)) { 1830 if (!gid_eq(inode->i_gid, fattr->gid)) {
1804 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1831 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1805 inode->i_gid = fattr->gid; 1832 inode->i_gid = fattr->gid;
1806 } 1833 }
1807 } else if (server->caps & NFS_CAP_OWNER_GROUP) 1834 } else if (server->caps & NFS_CAP_OWNER_GROUP) {
1808 nfsi->cache_validity |= save_cache_validity & 1835 nfsi->cache_validity |= save_cache_validity &
1809 (NFS_INO_INVALID_ATTR 1836 (NFS_INO_INVALID_ATTR
1810 | NFS_INO_INVALID_ACCESS 1837 | NFS_INO_INVALID_ACCESS
1811 | NFS_INO_INVALID_ACL 1838 | NFS_INO_INVALID_ACL
1812 | NFS_INO_REVAL_FORCED); 1839 | NFS_INO_REVAL_FORCED);
1840 cache_revalidated = false;
1841 }
1813 1842
1814 if (fattr->valid & NFS_ATTR_FATTR_NLINK) { 1843 if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
1815 if (inode->i_nlink != fattr->nlink) { 1844 if (inode->i_nlink != fattr->nlink) {
@@ -1818,19 +1847,22 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1818 invalid |= NFS_INO_INVALID_DATA; 1847 invalid |= NFS_INO_INVALID_DATA;
1819 set_nlink(inode, fattr->nlink); 1848 set_nlink(inode, fattr->nlink);
1820 } 1849 }
1821 } else if (server->caps & NFS_CAP_NLINK) 1850 } else if (server->caps & NFS_CAP_NLINK) {
1822 nfsi->cache_validity |= save_cache_validity & 1851 nfsi->cache_validity |= save_cache_validity &
1823 (NFS_INO_INVALID_ATTR 1852 (NFS_INO_INVALID_ATTR
1824 | NFS_INO_REVAL_FORCED); 1853 | NFS_INO_REVAL_FORCED);
1854 cache_revalidated = false;
1855 }
1825 1856
1826 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 1857 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
1827 /* 1858 /*
1828 * report the blocks in 512byte units 1859 * report the blocks in 512byte units
1829 */ 1860 */
1830 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); 1861 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
1831 } 1862 } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
1832 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
1833 inode->i_blocks = fattr->du.nfs2.blocks; 1863 inode->i_blocks = fattr->du.nfs2.blocks;
1864 else
1865 cache_revalidated = false;
1834 1866
1835 /* Update attrtimeo value if we're out of the unstable period */ 1867 /* Update attrtimeo value if we're out of the unstable period */
1836 if (invalid & NFS_INO_INVALID_ATTR) { 1868 if (invalid & NFS_INO_INVALID_ATTR) {
@@ -1840,9 +1872,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1840 /* Set barrier to be more recent than all outstanding updates */ 1872 /* Set barrier to be more recent than all outstanding updates */
1841 nfsi->attr_gencount = nfs_inc_attr_generation_counter(); 1873 nfsi->attr_gencount = nfs_inc_attr_generation_counter();
1842 } else { 1874 } else {
1843 if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { 1875 if (cache_revalidated) {
1844 if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) 1876 if (!time_in_range_open(now, nfsi->attrtimeo_timestamp,
1845 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); 1877 nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
1878 nfsi->attrtimeo <<= 1;
1879 if (nfsi->attrtimeo > NFS_MAXATTRTIMEO(inode))
1880 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
1881 }
1846 nfsi->attrtimeo_timestamp = now; 1882 nfsi->attrtimeo_timestamp = now;
1847 } 1883 }
1848 /* Set the barrier to be more recent than this fattr */ 1884 /* Set the barrier to be more recent than this fattr */
@@ -1851,7 +1887,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1851 } 1887 }
1852 1888
1853 /* Don't declare attrcache up to date if there were no attrs! */ 1889 /* Don't declare attrcache up to date if there were no attrs! */
1854 if (fattr->valid != 0) 1890 if (cache_revalidated)
1855 invalid &= ~NFS_INO_INVALID_ATTR; 1891 invalid &= ~NFS_INO_INVALID_ATTR;
1856 1892
1857 /* Don't invalidate the data if we were to blame */ 1893 /* Don't invalidate the data if we were to blame */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9dea85f7f918..4e8cc942336c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -238,7 +238,7 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
238 struct nfs_pgio_header *hdr, 238 struct nfs_pgio_header *hdr,
239 void (*release)(struct nfs_pgio_header *hdr)); 239 void (*release)(struct nfs_pgio_header *hdr));
240void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); 240void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
241int nfs_iocounter_wait(struct nfs_io_counter *c); 241int nfs_iocounter_wait(struct nfs_lock_context *l_ctx);
242 242
243extern const struct nfs_pageio_ops nfs_pgio_rw_ops; 243extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
244struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); 244struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
@@ -252,18 +252,18 @@ void nfs_free_request(struct nfs_page *req);
252struct nfs_pgio_mirror * 252struct nfs_pgio_mirror *
253nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); 253nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc);
254 254
255static inline void nfs_iocounter_init(struct nfs_io_counter *c)
256{
257 c->flags = 0;
258 atomic_set(&c->io_count, 0);
259}
260
261static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc) 255static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc)
262{ 256{
263 WARN_ON_ONCE(desc->pg_mirror_count < 1); 257 WARN_ON_ONCE(desc->pg_mirror_count < 1);
264 return desc->pg_mirror_count > 1; 258 return desc->pg_mirror_count > 1;
265} 259}
266 260
261static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
262 const struct nfs_open_context *ctx2)
263{
264 return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
265}
266
267/* nfs2xdr.c */ 267/* nfs2xdr.c */
268extern struct rpc_procinfo nfs_procedures[]; 268extern struct rpc_procinfo nfs_procedures[];
269extern int nfs2_decode_dirent(struct xdr_stream *, 269extern int nfs2_decode_dirent(struct xdr_stream *,
@@ -380,6 +380,7 @@ extern void nfs_clear_inode(struct inode *);
380extern void nfs_evict_inode(struct inode *); 380extern void nfs_evict_inode(struct inode *);
381void nfs_zap_acl_cache(struct inode *inode); 381void nfs_zap_acl_cache(struct inode *inode);
382extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); 382extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
383extern int nfs_wait_atomic_killable(atomic_t *p);
383 384
384/* super.c */ 385/* super.c */
385extern const struct super_operations nfs_sops; 386extern const struct super_operations nfs_sops;
@@ -519,7 +520,6 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
519 inode_dio_wait(inode); 520 inode_dio_wait(inode);
520} 521}
521extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); 522extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
522extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq);
523 523
524/* nfs4proc.c */ 524/* nfs4proc.c */
525extern void __nfs4_read_done_cb(struct nfs_pgio_header *); 525extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
@@ -696,9 +696,32 @@ static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
696{ 696{
697 return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); 697 return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size);
698} 698}
699static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid)
700{
701 return ~crc32_le(0xFFFFFFFF, &stateid->other[0],
702 NFS4_STATEID_OTHER_SIZE);
703}
699#else 704#else
700static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) 705static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
701{ 706{
702 return 0; 707 return 0;
703} 708}
709static inline u32 nfs_stateid_hash(nfs4_stateid *stateid)
710{
711 return 0;
712}
704#endif 713#endif
714
715static inline bool nfs_error_is_fatal(int err)
716{
717 switch (err) {
718 case -ERESTARTSYS:
719 case -EIO:
720 case -ENOSPC:
721 case -EROFS:
722 case -E2BIG:
723 return true;
724 default:
725 return false;
726 }
727}
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 6b1ce9825430..6e8174930a48 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -204,6 +204,8 @@ static void
204nfs42_layoutstat_done(struct rpc_task *task, void *calldata) 204nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
205{ 205{
206 struct nfs42_layoutstat_data *data = calldata; 206 struct nfs42_layoutstat_data *data = calldata;
207 struct inode *inode = data->inode;
208 struct pnfs_layout_hdr *lo;
207 209
208 if (!nfs4_sequence_done(task, &data->res.seq_res)) 210 if (!nfs4_sequence_done(task, &data->res.seq_res))
209 return; 211 return;
@@ -211,12 +213,35 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
211 switch (task->tk_status) { 213 switch (task->tk_status) {
212 case 0: 214 case 0:
213 break; 215 break;
216 case -NFS4ERR_EXPIRED:
217 case -NFS4ERR_STALE_STATEID:
218 case -NFS4ERR_OLD_STATEID:
219 case -NFS4ERR_BAD_STATEID:
220 spin_lock(&inode->i_lock);
221 lo = NFS_I(inode)->layout;
222 if (lo && nfs4_stateid_match(&data->args.stateid,
223 &lo->plh_stateid)) {
224 LIST_HEAD(head);
225
226 /*
227 * Mark the bad layout state as invalid, then retry
228 * with the current stateid.
229 */
230 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
231 pnfs_mark_matching_lsegs_invalid(lo, &head, NULL);
232 spin_unlock(&inode->i_lock);
233 pnfs_free_lseg_list(&head);
234 } else
235 spin_unlock(&inode->i_lock);
236 break;
214 case -ENOTSUPP: 237 case -ENOTSUPP:
215 case -EOPNOTSUPP: 238 case -EOPNOTSUPP:
216 NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS; 239 NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
217 default: 240 default:
218 dprintk("%s server returns %d\n", __func__, task->tk_status); 241 break;
219 } 242 }
243
244 dprintk("%s server returns %d\n", __func__, task->tk_status);
220} 245}
221 246
222static void 247static void
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c57d1332c1c8..4bfc33ad0563 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -208,6 +208,9 @@ static const u32 nfs4_pnfs_open_bitmap[3] = {
208 | FATTR4_WORD1_TIME_METADATA 208 | FATTR4_WORD1_TIME_METADATA
209 | FATTR4_WORD1_TIME_MODIFY, 209 | FATTR4_WORD1_TIME_MODIFY,
210 FATTR4_WORD2_MDSTHRESHOLD 210 FATTR4_WORD2_MDSTHRESHOLD
211#ifdef CONFIG_NFS_V4_SECURITY_LABEL
212 | FATTR4_WORD2_SECURITY_LABEL
213#endif
211}; 214};
212 215
213static const u32 nfs4_open_noattr_bitmap[3] = { 216static const u32 nfs4_open_noattr_bitmap[3] = {
@@ -1385,6 +1388,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s
1385 * Protect the call to nfs4_state_set_mode_locked and 1388 * Protect the call to nfs4_state_set_mode_locked and
1386 * serialise the stateid update 1389 * serialise the stateid update
1387 */ 1390 */
1391 spin_lock(&state->owner->so_lock);
1388 write_seqlock(&state->seqlock); 1392 write_seqlock(&state->seqlock);
1389 if (deleg_stateid != NULL) { 1393 if (deleg_stateid != NULL) {
1390 nfs4_stateid_copy(&state->stateid, deleg_stateid); 1394 nfs4_stateid_copy(&state->stateid, deleg_stateid);
@@ -1393,7 +1397,6 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s
1393 if (open_stateid != NULL) 1397 if (open_stateid != NULL)
1394 nfs_set_open_stateid_locked(state, open_stateid, fmode); 1398 nfs_set_open_stateid_locked(state, open_stateid, fmode);
1395 write_sequnlock(&state->seqlock); 1399 write_sequnlock(&state->seqlock);
1396 spin_lock(&state->owner->so_lock);
1397 update_open_stateflags(state, fmode); 1400 update_open_stateflags(state, fmode);
1398 spin_unlock(&state->owner->so_lock); 1401 spin_unlock(&state->owner->so_lock);
1399} 1402}
@@ -1598,6 +1601,7 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
1598 1601
1599 if (!data->rpc_done) { 1602 if (!data->rpc_done) {
1600 state = nfs4_try_open_cached(data); 1603 state = nfs4_try_open_cached(data);
1604 trace_nfs4_cached_open(data->state);
1601 goto out; 1605 goto out;
1602 } 1606 }
1603 1607
@@ -2015,6 +2019,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
2015 } 2019 }
2016 return; 2020 return;
2017unlock_no_action: 2021unlock_no_action:
2022 trace_nfs4_cached_open(data->state);
2018 rcu_read_unlock(); 2023 rcu_read_unlock();
2019out_no_action: 2024out_no_action:
2020 task->tk_action = NULL; 2025 task->tk_action = NULL;
@@ -2703,6 +2708,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
2703 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 2708 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2704 if (status == 0 && state != NULL) 2709 if (status == 0 && state != NULL)
2705 renew_lease(server, timestamp); 2710 renew_lease(server, timestamp);
2711 trace_nfs4_setattr(inode, &arg.stateid, status);
2706 return status; 2712 return status;
2707} 2713}
2708 2714
@@ -2719,7 +2725,6 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
2719 int err; 2725 int err;
2720 do { 2726 do {
2721 err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel); 2727 err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel);
2722 trace_nfs4_setattr(inode, err);
2723 switch (err) { 2728 switch (err) {
2724 case -NFS4ERR_OPENMODE: 2729 case -NFS4ERR_OPENMODE:
2725 if (!(sattr->ia_valid & ATTR_SIZE)) { 2730 if (!(sattr->ia_valid & ATTR_SIZE)) {
@@ -5048,7 +5053,6 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp,
5048static int 5053static int
5049nfs4_init_nonuniform_client_string(struct nfs_client *clp) 5054nfs4_init_nonuniform_client_string(struct nfs_client *clp)
5050{ 5055{
5051 int result;
5052 size_t len; 5056 size_t len;
5053 char *str; 5057 char *str;
5054 5058
@@ -5076,7 +5080,7 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp)
5076 return -ENOMEM; 5080 return -ENOMEM;
5077 5081
5078 rcu_read_lock(); 5082 rcu_read_lock();
5079 result = scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", 5083 scnprintf(str, len, "Linux NFSv4.0 %s/%s %s",
5080 clp->cl_ipaddr, 5084 clp->cl_ipaddr,
5081 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), 5085 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
5082 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)); 5086 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO));
@@ -5089,7 +5093,6 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp)
5089static int 5093static int
5090nfs4_init_uniquifier_client_string(struct nfs_client *clp) 5094nfs4_init_uniquifier_client_string(struct nfs_client *clp)
5091{ 5095{
5092 int result;
5093 size_t len; 5096 size_t len;
5094 char *str; 5097 char *str;
5095 5098
@@ -5109,7 +5112,7 @@ nfs4_init_uniquifier_client_string(struct nfs_client *clp)
5109 if (!str) 5112 if (!str)
5110 return -ENOMEM; 5113 return -ENOMEM;
5111 5114
5112 result = scnprintf(str, len, "Linux NFSv%u.%u %s/%s", 5115 scnprintf(str, len, "Linux NFSv%u.%u %s/%s",
5113 clp->rpc_ops->version, clp->cl_minorversion, 5116 clp->rpc_ops->version, clp->cl_minorversion,
5114 nfs4_client_id_uniquifier, 5117 nfs4_client_id_uniquifier,
5115 clp->cl_rpcclient->cl_nodename); 5118 clp->cl_rpcclient->cl_nodename);
@@ -5120,7 +5123,6 @@ nfs4_init_uniquifier_client_string(struct nfs_client *clp)
5120static int 5123static int
5121nfs4_init_uniform_client_string(struct nfs_client *clp) 5124nfs4_init_uniform_client_string(struct nfs_client *clp)
5122{ 5125{
5123 int result;
5124 size_t len; 5126 size_t len;
5125 char *str; 5127 char *str;
5126 5128
@@ -5145,7 +5147,7 @@ nfs4_init_uniform_client_string(struct nfs_client *clp)
5145 if (!str) 5147 if (!str)
5146 return -ENOMEM; 5148 return -ENOMEM;
5147 5149
5148 result = scnprintf(str, len, "Linux NFSv%u.%u %s", 5150 scnprintf(str, len, "Linux NFSv%u.%u %s",
5149 clp->rpc_ops->version, clp->cl_minorversion, 5151 clp->rpc_ops->version, clp->cl_minorversion,
5150 clp->cl_rpcclient->cl_nodename); 5152 clp->cl_rpcclient->cl_nodename);
5151 clp->cl_owner_id = str; 5153 clp->cl_owner_id = str;
@@ -5384,6 +5386,11 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
5384 if (data == NULL) 5386 if (data == NULL)
5385 return -ENOMEM; 5387 return -ENOMEM;
5386 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 5388 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
5389
5390 nfs4_state_protect(server->nfs_client,
5391 NFS_SP4_MACH_CRED_CLEANUP,
5392 &task_setup_data.rpc_client, &msg);
5393
5387 data->args.fhandle = &data->fh; 5394 data->args.fhandle = &data->fh;
5388 data->args.stateid = &data->stateid; 5395 data->args.stateid = &data->stateid;
5389 data->args.bitmask = server->cache_consistency_bitmask; 5396 data->args.bitmask = server->cache_consistency_bitmask;
@@ -5426,7 +5433,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
5426 int err; 5433 int err;
5427 do { 5434 do {
5428 err = _nfs4_proc_delegreturn(inode, cred, stateid, issync); 5435 err = _nfs4_proc_delegreturn(inode, cred, stateid, issync);
5429 trace_nfs4_delegreturn(inode, err); 5436 trace_nfs4_delegreturn(inode, stateid, err);
5430 switch (err) { 5437 switch (err) {
5431 case -NFS4ERR_STALE_STATEID: 5438 case -NFS4ERR_STALE_STATEID:
5432 case -NFS4ERR_EXPIRED: 5439 case -NFS4ERR_EXPIRED:
@@ -5936,6 +5943,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
5936 data->cancelled = 1; 5943 data->cancelled = 1;
5937 rpc_put_task(task); 5944 rpc_put_task(task);
5938 dprintk("%s: done, ret = %d!\n", __func__, ret); 5945 dprintk("%s: done, ret = %d!\n", __func__, ret);
5946 trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret);
5939 return ret; 5947 return ret;
5940} 5948}
5941 5949
@@ -5952,7 +5960,6 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
5952 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 5960 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
5953 return 0; 5961 return 0;
5954 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); 5962 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
5955 trace_nfs4_lock_reclaim(request, state, F_SETLK, err);
5956 if (err != -NFS4ERR_DELAY) 5963 if (err != -NFS4ERR_DELAY)
5957 break; 5964 break;
5958 nfs4_handle_exception(server, err, &exception); 5965 nfs4_handle_exception(server, err, &exception);
@@ -5979,7 +5986,6 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
5979 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 5986 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
5980 return 0; 5987 return 0;
5981 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED); 5988 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED);
5982 trace_nfs4_lock_expired(request, state, F_SETLK, err);
5983 switch (err) { 5989 switch (err) {
5984 default: 5990 default:
5985 goto out; 5991 goto out;
@@ -6087,7 +6093,6 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *
6087 6093
6088 do { 6094 do {
6089 err = _nfs4_proc_setlk(state, cmd, request); 6095 err = _nfs4_proc_setlk(state, cmd, request);
6090 trace_nfs4_set_lock(request, state, cmd, err);
6091 if (err == -NFS4ERR_DENIED) 6096 if (err == -NFS4ERR_DENIED)
6092 err = -EAGAIN; 6097 err = -EAGAIN;
6093 err = nfs4_handle_exception(NFS_SERVER(state->inode), 6098 err = nfs4_handle_exception(NFS_SERVER(state->inode),
@@ -6847,10 +6852,13 @@ static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = {
6847 }, 6852 },
6848 .allow.u.words = { 6853 .allow.u.words = {
6849 [0] = 1 << (OP_CLOSE) | 6854 [0] = 1 << (OP_CLOSE) |
6855 1 << (OP_OPEN_DOWNGRADE) |
6850 1 << (OP_LOCKU) | 6856 1 << (OP_LOCKU) |
6857 1 << (OP_DELEGRETURN) |
6851 1 << (OP_COMMIT), 6858 1 << (OP_COMMIT),
6852 [1] = 1 << (OP_SECINFO - 32) | 6859 [1] = 1 << (OP_SECINFO - 32) |
6853 1 << (OP_SECINFO_NO_NAME - 32) | 6860 1 << (OP_SECINFO_NO_NAME - 32) |
6861 1 << (OP_LAYOUTRETURN - 32) |
6854 1 << (OP_TEST_STATEID - 32) | 6862 1 << (OP_TEST_STATEID - 32) |
6855 1 << (OP_FREE_STATEID - 32) | 6863 1 << (OP_FREE_STATEID - 32) |
6856 1 << (OP_WRITE - 32) 6864 1 << (OP_WRITE - 32)
@@ -6915,11 +6923,19 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp,
6915 } 6923 }
6916 6924
6917 if (test_bit(OP_CLOSE, sp->allow.u.longs) && 6925 if (test_bit(OP_CLOSE, sp->allow.u.longs) &&
6926 test_bit(OP_OPEN_DOWNGRADE, sp->allow.u.longs) &&
6927 test_bit(OP_DELEGRETURN, sp->allow.u.longs) &&
6918 test_bit(OP_LOCKU, sp->allow.u.longs)) { 6928 test_bit(OP_LOCKU, sp->allow.u.longs)) {
6919 dfprintk(MOUNT, " cleanup mode enabled\n"); 6929 dfprintk(MOUNT, " cleanup mode enabled\n");
6920 set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags); 6930 set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags);
6921 } 6931 }
6922 6932
6933 if (test_bit(OP_LAYOUTRETURN, sp->allow.u.longs)) {
6934 dfprintk(MOUNT, " pnfs cleanup mode enabled\n");
6935 set_bit(NFS_SP4_MACH_CRED_PNFS_CLEANUP,
6936 &clp->cl_sp4_flags);
6937 }
6938
6923 if (test_bit(OP_SECINFO, sp->allow.u.longs) && 6939 if (test_bit(OP_SECINFO, sp->allow.u.longs) &&
6924 test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) { 6940 test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) {
6925 dfprintk(MOUNT, " secinfo mode enabled\n"); 6941 dfprintk(MOUNT, " secinfo mode enabled\n");
@@ -7748,6 +7764,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
7748 struct nfs4_layoutget *lgp = calldata; 7764 struct nfs4_layoutget *lgp = calldata;
7749 struct nfs_server *server = NFS_SERVER(lgp->args.inode); 7765 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
7750 struct nfs4_session *session = nfs4_get_session(server); 7766 struct nfs4_session *session = nfs4_get_session(server);
7767 int ret;
7751 7768
7752 dprintk("--> %s\n", __func__); 7769 dprintk("--> %s\n", __func__);
7753 /* Note the is a race here, where a CB_LAYOUTRECALL can come in 7770 /* Note the is a race here, where a CB_LAYOUTRECALL can come in
@@ -7758,12 +7775,12 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
7758 if (nfs41_setup_sequence(session, &lgp->args.seq_args, 7775 if (nfs41_setup_sequence(session, &lgp->args.seq_args,
7759 &lgp->res.seq_res, task)) 7776 &lgp->res.seq_res, task))
7760 return; 7777 return;
7761 if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, 7778 ret = pnfs_choose_layoutget_stateid(&lgp->args.stateid,
7762 NFS_I(lgp->args.inode)->layout, 7779 NFS_I(lgp->args.inode)->layout,
7763 &lgp->args.range, 7780 &lgp->args.range,
7764 lgp->args.ctx->state)) { 7781 lgp->args.ctx->state);
7765 rpc_exit(task, NFS4_OK); 7782 if (ret < 0)
7766 } 7783 rpc_exit(task, ret);
7767} 7784}
7768 7785
7769static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) 7786static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
@@ -7783,6 +7800,15 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
7783 switch (task->tk_status) { 7800 switch (task->tk_status) {
7784 case 0: 7801 case 0:
7785 goto out; 7802 goto out;
7803
7804 /*
7805 * NFS4ERR_LAYOUTUNAVAILABLE means we are not supposed to use pnfs
7806 * on the file. set tk_status to -ENODATA to tell upper layer to
7807 * retry go inband.
7808 */
7809 case -NFS4ERR_LAYOUTUNAVAILABLE:
7810 task->tk_status = -ENODATA;
7811 goto out;
7786 /* 7812 /*
7787 * NFS4ERR_BADLAYOUT means the MDS cannot return a layout of 7813 * NFS4ERR_BADLAYOUT means the MDS cannot return a layout of
7788 * length lgp->args.minlength != 0 (see RFC5661 section 18.43.3). 7814 * length lgp->args.minlength != 0 (see RFC5661 section 18.43.3).
@@ -7979,6 +8005,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
7979 trace_nfs4_layoutget(lgp->args.ctx, 8005 trace_nfs4_layoutget(lgp->args.ctx,
7980 &lgp->args.range, 8006 &lgp->args.range,
7981 &lgp->res.range, 8007 &lgp->res.range,
8008 &lgp->res.stateid,
7982 status); 8009 status);
7983 /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ 8010 /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
7984 if (status == 0 && lgp->res.layoutp->len) 8011 if (status == 0 && lgp->res.layoutp->len)
@@ -8035,11 +8062,11 @@ static void nfs4_layoutreturn_release(void *calldata)
8035 8062
8036 dprintk("--> %s\n", __func__); 8063 dprintk("--> %s\n", __func__);
8037 spin_lock(&lo->plh_inode->i_lock); 8064 spin_lock(&lo->plh_inode->i_lock);
8065 pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
8066 pnfs_mark_layout_returned_if_empty(lo);
8038 if (lrp->res.lrs_present) 8067 if (lrp->res.lrs_present)
8039 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); 8068 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
8040 pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
8041 pnfs_clear_layoutreturn_waitbit(lo); 8069 pnfs_clear_layoutreturn_waitbit(lo);
8042 lo->plh_block_lgets--;
8043 spin_unlock(&lo->plh_inode->i_lock); 8070 spin_unlock(&lo->plh_inode->i_lock);
8044 pnfs_free_lseg_list(&freeme); 8071 pnfs_free_lseg_list(&freeme);
8045 pnfs_put_layout_hdr(lrp->args.layout); 8072 pnfs_put_layout_hdr(lrp->args.layout);
@@ -8071,6 +8098,10 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
8071 }; 8098 };
8072 int status = 0; 8099 int status = 0;
8073 8100
8101 nfs4_state_protect(NFS_SERVER(lrp->args.inode)->nfs_client,
8102 NFS_SP4_MACH_CRED_PNFS_CLEANUP,
8103 &task_setup_data.rpc_client, &msg);
8104
8074 dprintk("--> %s\n", __func__); 8105 dprintk("--> %s\n", __func__);
8075 if (!sync) { 8106 if (!sync) {
8076 lrp->inode = nfs_igrab_and_active(lrp->args.inode); 8107 lrp->inode = nfs_igrab_and_active(lrp->args.inode);
@@ -8086,7 +8117,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
8086 return PTR_ERR(task); 8117 return PTR_ERR(task);
8087 if (sync) 8118 if (sync)
8088 status = task->tk_status; 8119 status = task->tk_status;
8089 trace_nfs4_layoutreturn(lrp->args.inode, status); 8120 trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status);
8090 dprintk("<-- %s status=%d\n", __func__, status); 8121 dprintk("<-- %s status=%d\n", __func__, status);
8091 rpc_put_task(task); 8122 rpc_put_task(task);
8092 return status; 8123 return status;
@@ -8234,7 +8265,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
8234 return PTR_ERR(task); 8265 return PTR_ERR(task);
8235 if (sync) 8266 if (sync)
8236 status = task->tk_status; 8267 status = task->tk_status;
8237 trace_nfs4_layoutcommit(data->args.inode, status); 8268 trace_nfs4_layoutcommit(data->args.inode, &data->args.stateid, status);
8238 dprintk("%s: status %d\n", __func__, status); 8269 dprintk("%s: status %d\n", __func__, status);
8239 rpc_put_task(task); 8270 rpc_put_task(task);
8240 return status; 8271 return status;
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index 0fbd3ab1be22..8693d77c45ea 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -12,7 +12,7 @@
12#include "nfs4idmap.h" 12#include "nfs4idmap.h"
13#include "callback.h" 13#include "callback.h"
14 14
15static const int nfs_set_port_min = 0; 15static const int nfs_set_port_min;
16static const int nfs_set_port_max = 65535; 16static const int nfs_set_port_max = 65535;
17static struct ctl_table_header *nfs4_callback_sysctl_table; 17static struct ctl_table_header *nfs4_callback_sysctl_table;
18 18
diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c
index d774335cc8bc..2850bce19244 100644
--- a/fs/nfs/nfs4trace.c
+++ b/fs/nfs/nfs4trace.c
@@ -6,6 +6,7 @@
6#include "internal.h" 6#include "internal.h"
7#include "nfs4session.h" 7#include "nfs4session.h"
8#include "callback.h" 8#include "callback.h"
9#include "pnfs.h"
9 10
10#define CREATE_TRACE_POINTS 11#define CREATE_TRACE_POINTS
11#include "nfs4trace.h" 12#include "nfs4trace.h"
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 671cf68fe56b..2c8d05dae5b1 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -321,6 +321,7 @@ TRACE_EVENT(nfs4_sequence_done,
321 __entry->highest_slotid = res->sr_highest_slotid; 321 __entry->highest_slotid = res->sr_highest_slotid;
322 __entry->target_highest_slotid = 322 __entry->target_highest_slotid =
323 res->sr_target_highest_slotid; 323 res->sr_target_highest_slotid;
324 __entry->status_flags = res->sr_status_flags;
324 __entry->error = res->sr_status; 325 __entry->error = res->sr_status;
325 ), 326 ),
326 TP_printk( 327 TP_printk(
@@ -399,6 +400,10 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
399 __field(u64, fileid) 400 __field(u64, fileid)
400 __field(u64, dir) 401 __field(u64, dir)
401 __string(name, ctx->dentry->d_name.name) 402 __string(name, ctx->dentry->d_name.name)
403 __field(int, stateid_seq)
404 __field(u32, stateid_hash)
405 __field(int, openstateid_seq)
406 __field(u32, openstateid_hash)
402 ), 407 ),
403 408
404 TP_fast_assign( 409 TP_fast_assign(
@@ -409,8 +414,22 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
409 __entry->flags = flags; 414 __entry->flags = flags;
410 __entry->fmode = (__force unsigned int)ctx->mode; 415 __entry->fmode = (__force unsigned int)ctx->mode;
411 __entry->dev = ctx->dentry->d_sb->s_dev; 416 __entry->dev = ctx->dentry->d_sb->s_dev;
412 if (!IS_ERR_OR_NULL(state)) 417 if (!IS_ERR_OR_NULL(state)) {
413 inode = state->inode; 418 inode = state->inode;
419 __entry->stateid_seq =
420 be32_to_cpu(state->stateid.seqid);
421 __entry->stateid_hash =
422 nfs_stateid_hash(&state->stateid);
423 __entry->openstateid_seq =
424 be32_to_cpu(state->open_stateid.seqid);
425 __entry->openstateid_hash =
426 nfs_stateid_hash(&state->open_stateid);
427 } else {
428 __entry->stateid_seq = 0;
429 __entry->stateid_hash = 0;
430 __entry->openstateid_seq = 0;
431 __entry->openstateid_hash = 0;
432 }
414 if (inode != NULL) { 433 if (inode != NULL) {
415 __entry->fileid = NFS_FILEID(inode); 434 __entry->fileid = NFS_FILEID(inode);
416 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 435 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
@@ -425,7 +444,8 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
425 TP_printk( 444 TP_printk(
426 "error=%d (%s) flags=%d (%s) fmode=%s " 445 "error=%d (%s) flags=%d (%s) fmode=%s "
427 "fileid=%02x:%02x:%llu fhandle=0x%08x " 446 "fileid=%02x:%02x:%llu fhandle=0x%08x "
428 "name=%02x:%02x:%llu/%s", 447 "name=%02x:%02x:%llu/%s stateid=%d:0x%08x "
448 "openstateid=%d:0x%08x",
429 __entry->error, 449 __entry->error,
430 show_nfsv4_errors(__entry->error), 450 show_nfsv4_errors(__entry->error),
431 __entry->flags, 451 __entry->flags,
@@ -436,7 +456,9 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
436 __entry->fhandle, 456 __entry->fhandle,
437 MAJOR(__entry->dev), MINOR(__entry->dev), 457 MAJOR(__entry->dev), MINOR(__entry->dev),
438 (unsigned long long)__entry->dir, 458 (unsigned long long)__entry->dir,
439 __get_str(name) 459 __get_str(name),
460 __entry->stateid_seq, __entry->stateid_hash,
461 __entry->openstateid_seq, __entry->openstateid_hash
440 ) 462 )
441); 463);
442 464
@@ -452,6 +474,45 @@ DEFINE_NFS4_OPEN_EVENT(nfs4_open_reclaim);
452DEFINE_NFS4_OPEN_EVENT(nfs4_open_expired); 474DEFINE_NFS4_OPEN_EVENT(nfs4_open_expired);
453DEFINE_NFS4_OPEN_EVENT(nfs4_open_file); 475DEFINE_NFS4_OPEN_EVENT(nfs4_open_file);
454 476
477TRACE_EVENT(nfs4_cached_open,
478 TP_PROTO(
479 const struct nfs4_state *state
480 ),
481 TP_ARGS(state),
482 TP_STRUCT__entry(
483 __field(dev_t, dev)
484 __field(u32, fhandle)
485 __field(u64, fileid)
486 __field(unsigned int, fmode)
487 __field(int, stateid_seq)
488 __field(u32, stateid_hash)
489 ),
490
491 TP_fast_assign(
492 const struct inode *inode = state->inode;
493
494 __entry->dev = inode->i_sb->s_dev;
495 __entry->fileid = NFS_FILEID(inode);
496 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
497 __entry->fmode = (__force unsigned int)state->state;
498 __entry->stateid_seq =
499 be32_to_cpu(state->stateid.seqid);
500 __entry->stateid_hash =
501 nfs_stateid_hash(&state->stateid);
502 ),
503
504 TP_printk(
505 "fmode=%s fileid=%02x:%02x:%llu "
506 "fhandle=0x%08x stateid=%d:0x%08x",
507 __entry->fmode ? show_fmode_flags(__entry->fmode) :
508 "closed",
509 MAJOR(__entry->dev), MINOR(__entry->dev),
510 (unsigned long long)__entry->fileid,
511 __entry->fhandle,
512 __entry->stateid_seq, __entry->stateid_hash
513 )
514);
515
455TRACE_EVENT(nfs4_close, 516TRACE_EVENT(nfs4_close,
456 TP_PROTO( 517 TP_PROTO(
457 const struct nfs4_state *state, 518 const struct nfs4_state *state,
@@ -468,6 +529,8 @@ TRACE_EVENT(nfs4_close,
468 __field(u64, fileid) 529 __field(u64, fileid)
469 __field(unsigned int, fmode) 530 __field(unsigned int, fmode)
470 __field(int, error) 531 __field(int, error)
532 __field(int, stateid_seq)
533 __field(u32, stateid_hash)
471 ), 534 ),
472 535
473 TP_fast_assign( 536 TP_fast_assign(
@@ -478,18 +541,23 @@ TRACE_EVENT(nfs4_close,
478 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 541 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
479 __entry->fmode = (__force unsigned int)state->state; 542 __entry->fmode = (__force unsigned int)state->state;
480 __entry->error = error; 543 __entry->error = error;
544 __entry->stateid_seq =
545 be32_to_cpu(args->stateid.seqid);
546 __entry->stateid_hash =
547 nfs_stateid_hash(&args->stateid);
481 ), 548 ),
482 549
483 TP_printk( 550 TP_printk(
484 "error=%d (%s) fmode=%s fileid=%02x:%02x:%llu " 551 "error=%d (%s) fmode=%s fileid=%02x:%02x:%llu "
485 "fhandle=0x%08x", 552 "fhandle=0x%08x openstateid=%d:0x%08x",
486 __entry->error, 553 __entry->error,
487 show_nfsv4_errors(__entry->error), 554 show_nfsv4_errors(__entry->error),
488 __entry->fmode ? show_fmode_flags(__entry->fmode) : 555 __entry->fmode ? show_fmode_flags(__entry->fmode) :
489 "closed", 556 "closed",
490 MAJOR(__entry->dev), MINOR(__entry->dev), 557 MAJOR(__entry->dev), MINOR(__entry->dev),
491 (unsigned long long)__entry->fileid, 558 (unsigned long long)__entry->fileid,
492 __entry->fhandle 559 __entry->fhandle,
560 __entry->stateid_seq, __entry->stateid_hash
493 ) 561 )
494); 562);
495 563
@@ -523,6 +591,8 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
523 __field(dev_t, dev) 591 __field(dev_t, dev)
524 __field(u32, fhandle) 592 __field(u32, fhandle)
525 __field(u64, fileid) 593 __field(u64, fileid)
594 __field(int, stateid_seq)
595 __field(u32, stateid_hash)
526 ), 596 ),
527 597
528 TP_fast_assign( 598 TP_fast_assign(
@@ -536,11 +606,16 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
536 __entry->dev = inode->i_sb->s_dev; 606 __entry->dev = inode->i_sb->s_dev;
537 __entry->fileid = NFS_FILEID(inode); 607 __entry->fileid = NFS_FILEID(inode);
538 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 608 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
609 __entry->stateid_seq =
610 be32_to_cpu(state->stateid.seqid);
611 __entry->stateid_hash =
612 nfs_stateid_hash(&state->stateid);
539 ), 613 ),
540 614
541 TP_printk( 615 TP_printk(
542 "error=%d (%s) cmd=%s:%s range=%lld:%lld " 616 "error=%d (%s) cmd=%s:%s range=%lld:%lld "
543 "fileid=%02x:%02x:%llu fhandle=0x%08x", 617 "fileid=%02x:%02x:%llu fhandle=0x%08x "
618 "stateid=%d:0x%08x",
544 __entry->error, 619 __entry->error,
545 show_nfsv4_errors(__entry->error), 620 show_nfsv4_errors(__entry->error),
546 show_lock_cmd(__entry->cmd), 621 show_lock_cmd(__entry->cmd),
@@ -549,7 +624,8 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
549 (long long)__entry->end, 624 (long long)__entry->end,
550 MAJOR(__entry->dev), MINOR(__entry->dev), 625 MAJOR(__entry->dev), MINOR(__entry->dev),
551 (unsigned long long)__entry->fileid, 626 (unsigned long long)__entry->fileid,
552 __entry->fhandle 627 __entry->fhandle,
628 __entry->stateid_seq, __entry->stateid_hash
553 ) 629 )
554); 630);
555 631
@@ -563,11 +639,73 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
563 ), \ 639 ), \
564 TP_ARGS(request, state, cmd, error)) 640 TP_ARGS(request, state, cmd, error))
565DEFINE_NFS4_LOCK_EVENT(nfs4_get_lock); 641DEFINE_NFS4_LOCK_EVENT(nfs4_get_lock);
566DEFINE_NFS4_LOCK_EVENT(nfs4_set_lock);
567DEFINE_NFS4_LOCK_EVENT(nfs4_lock_reclaim);
568DEFINE_NFS4_LOCK_EVENT(nfs4_lock_expired);
569DEFINE_NFS4_LOCK_EVENT(nfs4_unlock); 642DEFINE_NFS4_LOCK_EVENT(nfs4_unlock);
570 643
644TRACE_EVENT(nfs4_set_lock,
645 TP_PROTO(
646 const struct file_lock *request,
647 const struct nfs4_state *state,
648 const nfs4_stateid *lockstateid,
649 int cmd,
650 int error
651 ),
652
653 TP_ARGS(request, state, lockstateid, cmd, error),
654
655 TP_STRUCT__entry(
656 __field(int, error)
657 __field(int, cmd)
658 __field(char, type)
659 __field(loff_t, start)
660 __field(loff_t, end)
661 __field(dev_t, dev)
662 __field(u32, fhandle)
663 __field(u64, fileid)
664 __field(int, stateid_seq)
665 __field(u32, stateid_hash)
666 __field(int, lockstateid_seq)
667 __field(u32, lockstateid_hash)
668 ),
669
670 TP_fast_assign(
671 const struct inode *inode = state->inode;
672
673 __entry->error = error;
674 __entry->cmd = cmd;
675 __entry->type = request->fl_type;
676 __entry->start = request->fl_start;
677 __entry->end = request->fl_end;
678 __entry->dev = inode->i_sb->s_dev;
679 __entry->fileid = NFS_FILEID(inode);
680 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
681 __entry->stateid_seq =
682 be32_to_cpu(state->stateid.seqid);
683 __entry->stateid_hash =
684 nfs_stateid_hash(&state->stateid);
685 __entry->lockstateid_seq =
686 be32_to_cpu(lockstateid->seqid);
687 __entry->lockstateid_hash =
688 nfs_stateid_hash(lockstateid);
689 ),
690
691 TP_printk(
692 "error=%d (%s) cmd=%s:%s range=%lld:%lld "
693 "fileid=%02x:%02x:%llu fhandle=0x%08x "
694 "stateid=%d:0x%08x lockstateid=%d:0x%08x",
695 __entry->error,
696 show_nfsv4_errors(__entry->error),
697 show_lock_cmd(__entry->cmd),
698 show_lock_type(__entry->type),
699 (long long)__entry->start,
700 (long long)__entry->end,
701 MAJOR(__entry->dev), MINOR(__entry->dev),
702 (unsigned long long)__entry->fileid,
703 __entry->fhandle,
704 __entry->stateid_seq, __entry->stateid_hash,
705 __entry->lockstateid_seq, __entry->lockstateid_hash
706 )
707);
708
571DECLARE_EVENT_CLASS(nfs4_set_delegation_event, 709DECLARE_EVENT_CLASS(nfs4_set_delegation_event,
572 TP_PROTO( 710 TP_PROTO(
573 const struct inode *inode, 711 const struct inode *inode,
@@ -621,20 +759,28 @@ TRACE_EVENT(nfs4_delegreturn_exit,
621 __field(dev_t, dev) 759 __field(dev_t, dev)
622 __field(u32, fhandle) 760 __field(u32, fhandle)
623 __field(int, error) 761 __field(int, error)
762 __field(int, stateid_seq)
763 __field(u32, stateid_hash)
624 ), 764 ),
625 765
626 TP_fast_assign( 766 TP_fast_assign(
627 __entry->dev = res->server->s_dev; 767 __entry->dev = res->server->s_dev;
628 __entry->fhandle = nfs_fhandle_hash(args->fhandle); 768 __entry->fhandle = nfs_fhandle_hash(args->fhandle);
629 __entry->error = error; 769 __entry->error = error;
770 __entry->stateid_seq =
771 be32_to_cpu(args->stateid->seqid);
772 __entry->stateid_hash =
773 nfs_stateid_hash(args->stateid);
630 ), 774 ),
631 775
632 TP_printk( 776 TP_printk(
633 "error=%d (%s) dev=%02x:%02x fhandle=0x%08x", 777 "error=%d (%s) dev=%02x:%02x fhandle=0x%08x "
778 "stateid=%d:0x%08x",
634 __entry->error, 779 __entry->error,
635 show_nfsv4_errors(__entry->error), 780 show_nfsv4_errors(__entry->error),
636 MAJOR(__entry->dev), MINOR(__entry->dev), 781 MAJOR(__entry->dev), MINOR(__entry->dev),
637 __entry->fhandle 782 __entry->fhandle,
783 __entry->stateid_seq, __entry->stateid_hash
638 ) 784 )
639); 785);
640 786
@@ -653,6 +799,8 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event,
653 __field(dev_t, dev) 799 __field(dev_t, dev)
654 __field(u32, fhandle) 800 __field(u32, fhandle)
655 __field(u64, fileid) 801 __field(u64, fileid)
802 __field(int, stateid_seq)
803 __field(u32, stateid_hash)
656 ), 804 ),
657 805
658 TP_fast_assign( 806 TP_fast_assign(
@@ -662,15 +810,21 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event,
662 __entry->dev = inode->i_sb->s_dev; 810 __entry->dev = inode->i_sb->s_dev;
663 __entry->fileid = NFS_FILEID(inode); 811 __entry->fileid = NFS_FILEID(inode);
664 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 812 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
813 __entry->stateid_seq =
814 be32_to_cpu(state->stateid.seqid);
815 __entry->stateid_hash =
816 nfs_stateid_hash(&state->stateid);
665 ), 817 ),
666 818
667 TP_printk( 819 TP_printk(
668 "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", 820 "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
821 "stateid=%d:0x%08x",
669 __entry->error, 822 __entry->error,
670 show_nfsv4_errors(__entry->error), 823 show_nfsv4_errors(__entry->error),
671 MAJOR(__entry->dev), MINOR(__entry->dev), 824 MAJOR(__entry->dev), MINOR(__entry->dev),
672 (unsigned long long)__entry->fileid, 825 (unsigned long long)__entry->fileid,
673 __entry->fhandle 826 __entry->fhandle,
827 __entry->stateid_seq, __entry->stateid_hash
674 ) 828 )
675); 829);
676 830
@@ -820,7 +974,6 @@ DECLARE_EVENT_CLASS(nfs4_inode_event,
820 ), \ 974 ), \
821 TP_ARGS(inode, error)) 975 TP_ARGS(inode, error))
822 976
823DEFINE_NFS4_INODE_EVENT(nfs4_setattr);
824DEFINE_NFS4_INODE_EVENT(nfs4_access); 977DEFINE_NFS4_INODE_EVENT(nfs4_access);
825DEFINE_NFS4_INODE_EVENT(nfs4_readlink); 978DEFINE_NFS4_INODE_EVENT(nfs4_readlink);
826DEFINE_NFS4_INODE_EVENT(nfs4_readdir); 979DEFINE_NFS4_INODE_EVENT(nfs4_readdir);
@@ -830,8 +983,59 @@ DEFINE_NFS4_INODE_EVENT(nfs4_set_acl);
830DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label); 983DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label);
831DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label); 984DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label);
832#endif /* CONFIG_NFS_V4_SECURITY_LABEL */ 985#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
833DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation); 986
834DEFINE_NFS4_INODE_EVENT(nfs4_delegreturn); 987DECLARE_EVENT_CLASS(nfs4_inode_stateid_event,
988 TP_PROTO(
989 const struct inode *inode,
990 const nfs4_stateid *stateid,
991 int error
992 ),
993
994 TP_ARGS(inode, stateid, error),
995
996 TP_STRUCT__entry(
997 __field(dev_t, dev)
998 __field(u32, fhandle)
999 __field(u64, fileid)
1000 __field(int, error)
1001 __field(int, stateid_seq)
1002 __field(u32, stateid_hash)
1003 ),
1004
1005 TP_fast_assign(
1006 __entry->dev = inode->i_sb->s_dev;
1007 __entry->fileid = NFS_FILEID(inode);
1008 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
1009 __entry->error = error;
1010 __entry->stateid_seq =
1011 be32_to_cpu(stateid->seqid);
1012 __entry->stateid_hash =
1013 nfs_stateid_hash(stateid);
1014 ),
1015
1016 TP_printk(
1017 "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
1018 "stateid=%d:0x%08x",
1019 __entry->error,
1020 show_nfsv4_errors(__entry->error),
1021 MAJOR(__entry->dev), MINOR(__entry->dev),
1022 (unsigned long long)__entry->fileid,
1023 __entry->fhandle,
1024 __entry->stateid_seq, __entry->stateid_hash
1025 )
1026);
1027
1028#define DEFINE_NFS4_INODE_STATEID_EVENT(name) \
1029 DEFINE_EVENT(nfs4_inode_stateid_event, name, \
1030 TP_PROTO( \
1031 const struct inode *inode, \
1032 const nfs4_stateid *stateid, \
1033 int error \
1034 ), \
1035 TP_ARGS(inode, stateid, error))
1036
1037DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr);
1038DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn);
835 1039
836DECLARE_EVENT_CLASS(nfs4_getattr_event, 1040DECLARE_EVENT_CLASS(nfs4_getattr_event,
837 TP_PROTO( 1041 TP_PROTO(
@@ -941,8 +1145,74 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
941 ), \ 1145 ), \
942 TP_ARGS(clp, fhandle, inode, error)) 1146 TP_ARGS(clp, fhandle, inode, error))
943DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_getattr); 1147DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_getattr);
944DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_layoutrecall_inode);
945 1148
1149DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
1150 TP_PROTO(
1151 const struct nfs_client *clp,
1152 const struct nfs_fh *fhandle,
1153 const struct inode *inode,
1154 const nfs4_stateid *stateid,
1155 int error
1156 ),
1157
1158 TP_ARGS(clp, fhandle, inode, stateid, error),
1159
1160 TP_STRUCT__entry(
1161 __field(int, error)
1162 __field(dev_t, dev)
1163 __field(u32, fhandle)
1164 __field(u64, fileid)
1165 __string(dstaddr, clp ?
1166 rpc_peeraddr2str(clp->cl_rpcclient,
1167 RPC_DISPLAY_ADDR) : "unknown")
1168 __field(int, stateid_seq)
1169 __field(u32, stateid_hash)
1170 ),
1171
1172 TP_fast_assign(
1173 __entry->error = error;
1174 __entry->fhandle = nfs_fhandle_hash(fhandle);
1175 if (inode != NULL) {
1176 __entry->fileid = NFS_FILEID(inode);
1177 __entry->dev = inode->i_sb->s_dev;
1178 } else {
1179 __entry->fileid = 0;
1180 __entry->dev = 0;
1181 }
1182 __assign_str(dstaddr, clp ?
1183 rpc_peeraddr2str(clp->cl_rpcclient,
1184 RPC_DISPLAY_ADDR) : "unknown")
1185 __entry->stateid_seq =
1186 be32_to_cpu(stateid->seqid);
1187 __entry->stateid_hash =
1188 nfs_stateid_hash(stateid);
1189 ),
1190
1191 TP_printk(
1192 "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
1193 "stateid=%d:0x%08x dstaddr=%s",
1194 __entry->error,
1195 show_nfsv4_errors(__entry->error),
1196 MAJOR(__entry->dev), MINOR(__entry->dev),
1197 (unsigned long long)__entry->fileid,
1198 __entry->fhandle,
1199 __entry->stateid_seq, __entry->stateid_hash,
1200 __get_str(dstaddr)
1201 )
1202);
1203
1204#define DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(name) \
1205 DEFINE_EVENT(nfs4_inode_stateid_callback_event, name, \
1206 TP_PROTO( \
1207 const struct nfs_client *clp, \
1208 const struct nfs_fh *fhandle, \
1209 const struct inode *inode, \
1210 const nfs4_stateid *stateid, \
1211 int error \
1212 ), \
1213 TP_ARGS(clp, fhandle, inode, stateid, error))
1214DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_recall);
1215DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_layoutrecall_file);
946 1216
947DECLARE_EVENT_CLASS(nfs4_idmap_event, 1217DECLARE_EVENT_CLASS(nfs4_idmap_event,
948 TP_PROTO( 1218 TP_PROTO(
@@ -1005,28 +1275,37 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
1005 __field(loff_t, offset) 1275 __field(loff_t, offset)
1006 __field(size_t, count) 1276 __field(size_t, count)
1007 __field(int, error) 1277 __field(int, error)
1278 __field(int, stateid_seq)
1279 __field(u32, stateid_hash)
1008 ), 1280 ),
1009 1281
1010 TP_fast_assign( 1282 TP_fast_assign(
1011 const struct inode *inode = hdr->inode; 1283 const struct inode *inode = hdr->inode;
1284 const struct nfs4_state *state =
1285 hdr->args.context->state;
1012 __entry->dev = inode->i_sb->s_dev; 1286 __entry->dev = inode->i_sb->s_dev;
1013 __entry->fileid = NFS_FILEID(inode); 1287 __entry->fileid = NFS_FILEID(inode);
1014 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 1288 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
1015 __entry->offset = hdr->args.offset; 1289 __entry->offset = hdr->args.offset;
1016 __entry->count = hdr->args.count; 1290 __entry->count = hdr->args.count;
1017 __entry->error = error; 1291 __entry->error = error;
1292 __entry->stateid_seq =
1293 be32_to_cpu(state->stateid.seqid);
1294 __entry->stateid_hash =
1295 nfs_stateid_hash(&state->stateid);
1018 ), 1296 ),
1019 1297
1020 TP_printk( 1298 TP_printk(
1021 "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1299 "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
1022 "offset=%lld count=%zu", 1300 "offset=%lld count=%zu stateid=%d:0x%08x",
1023 __entry->error, 1301 __entry->error,
1024 show_nfsv4_errors(__entry->error), 1302 show_nfsv4_errors(__entry->error),
1025 MAJOR(__entry->dev), MINOR(__entry->dev), 1303 MAJOR(__entry->dev), MINOR(__entry->dev),
1026 (unsigned long long)__entry->fileid, 1304 (unsigned long long)__entry->fileid,
1027 __entry->fhandle, 1305 __entry->fhandle,
1028 (long long)__entry->offset, 1306 (long long)__entry->offset,
1029 __entry->count 1307 __entry->count,
1308 __entry->stateid_seq, __entry->stateid_hash
1030 ) 1309 )
1031); 1310);
1032#define DEFINE_NFS4_READ_EVENT(name) \ 1311#define DEFINE_NFS4_READ_EVENT(name) \
@@ -1056,28 +1335,37 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
1056 __field(loff_t, offset) 1335 __field(loff_t, offset)
1057 __field(size_t, count) 1336 __field(size_t, count)
1058 __field(int, error) 1337 __field(int, error)
1338 __field(int, stateid_seq)
1339 __field(u32, stateid_hash)
1059 ), 1340 ),
1060 1341
1061 TP_fast_assign( 1342 TP_fast_assign(
1062 const struct inode *inode = hdr->inode; 1343 const struct inode *inode = hdr->inode;
1344 const struct nfs4_state *state =
1345 hdr->args.context->state;
1063 __entry->dev = inode->i_sb->s_dev; 1346 __entry->dev = inode->i_sb->s_dev;
1064 __entry->fileid = NFS_FILEID(inode); 1347 __entry->fileid = NFS_FILEID(inode);
1065 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 1348 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
1066 __entry->offset = hdr->args.offset; 1349 __entry->offset = hdr->args.offset;
1067 __entry->count = hdr->args.count; 1350 __entry->count = hdr->args.count;
1068 __entry->error = error; 1351 __entry->error = error;
1352 __entry->stateid_seq =
1353 be32_to_cpu(state->stateid.seqid);
1354 __entry->stateid_hash =
1355 nfs_stateid_hash(&state->stateid);
1069 ), 1356 ),
1070 1357
1071 TP_printk( 1358 TP_printk(
1072 "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1359 "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
1073 "offset=%lld count=%zu", 1360 "offset=%lld count=%zu stateid=%d:0x%08x",
1074 __entry->error, 1361 __entry->error,
1075 show_nfsv4_errors(__entry->error), 1362 show_nfsv4_errors(__entry->error),
1076 MAJOR(__entry->dev), MINOR(__entry->dev), 1363 MAJOR(__entry->dev), MINOR(__entry->dev),
1077 (unsigned long long)__entry->fileid, 1364 (unsigned long long)__entry->fileid,
1078 __entry->fhandle, 1365 __entry->fhandle,
1079 (long long)__entry->offset, 1366 (long long)__entry->offset,
1080 __entry->count 1367 __entry->count,
1368 __entry->stateid_seq, __entry->stateid_hash
1081 ) 1369 )
1082); 1370);
1083 1371
@@ -1154,10 +1442,11 @@ TRACE_EVENT(nfs4_layoutget,
1154 const struct nfs_open_context *ctx, 1442 const struct nfs_open_context *ctx,
1155 const struct pnfs_layout_range *args, 1443 const struct pnfs_layout_range *args,
1156 const struct pnfs_layout_range *res, 1444 const struct pnfs_layout_range *res,
1445 const nfs4_stateid *layout_stateid,
1157 int error 1446 int error
1158 ), 1447 ),
1159 1448
1160 TP_ARGS(ctx, args, res, error), 1449 TP_ARGS(ctx, args, res, layout_stateid, error),
1161 1450
1162 TP_STRUCT__entry( 1451 TP_STRUCT__entry(
1163 __field(dev_t, dev) 1452 __field(dev_t, dev)
@@ -1167,10 +1456,15 @@ TRACE_EVENT(nfs4_layoutget,
1167 __field(u64, offset) 1456 __field(u64, offset)
1168 __field(u64, count) 1457 __field(u64, count)
1169 __field(int, error) 1458 __field(int, error)
1459 __field(int, stateid_seq)
1460 __field(u32, stateid_hash)
1461 __field(int, layoutstateid_seq)
1462 __field(u32, layoutstateid_hash)
1170 ), 1463 ),
1171 1464
1172 TP_fast_assign( 1465 TP_fast_assign(
1173 const struct inode *inode = d_inode(ctx->dentry); 1466 const struct inode *inode = d_inode(ctx->dentry);
1467 const struct nfs4_state *state = ctx->state;
1174 __entry->dev = inode->i_sb->s_dev; 1468 __entry->dev = inode->i_sb->s_dev;
1175 __entry->fileid = NFS_FILEID(inode); 1469 __entry->fileid = NFS_FILEID(inode);
1176 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 1470 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
@@ -1178,11 +1472,25 @@ TRACE_EVENT(nfs4_layoutget,
1178 __entry->offset = args->offset; 1472 __entry->offset = args->offset;
1179 __entry->count = args->length; 1473 __entry->count = args->length;
1180 __entry->error = error; 1474 __entry->error = error;
1475 __entry->stateid_seq =
1476 be32_to_cpu(state->stateid.seqid);
1477 __entry->stateid_hash =
1478 nfs_stateid_hash(&state->stateid);
1479 if (!error) {
1480 __entry->layoutstateid_seq =
1481 be32_to_cpu(layout_stateid->seqid);
1482 __entry->layoutstateid_hash =
1483 nfs_stateid_hash(layout_stateid);
1484 } else {
1485 __entry->layoutstateid_seq = 0;
1486 __entry->layoutstateid_hash = 0;
1487 }
1181 ), 1488 ),
1182 1489
1183 TP_printk( 1490 TP_printk(
1184 "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1491 "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
1185 "iomode=%s offset=%llu count=%llu", 1492 "iomode=%s offset=%llu count=%llu stateid=%d:0x%08x "
1493 "layoutstateid=%d:0x%08x",
1186 __entry->error, 1494 __entry->error,
1187 show_nfsv4_errors(__entry->error), 1495 show_nfsv4_errors(__entry->error),
1188 MAJOR(__entry->dev), MINOR(__entry->dev), 1496 MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -1190,14 +1498,83 @@ TRACE_EVENT(nfs4_layoutget,
1190 __entry->fhandle, 1498 __entry->fhandle,
1191 show_pnfs_iomode(__entry->iomode), 1499 show_pnfs_iomode(__entry->iomode),
1192 (unsigned long long)__entry->offset, 1500 (unsigned long long)__entry->offset,
1193 (unsigned long long)__entry->count 1501 (unsigned long long)__entry->count,
1502 __entry->stateid_seq, __entry->stateid_hash,
1503 __entry->layoutstateid_seq, __entry->layoutstateid_hash
1194 ) 1504 )
1195); 1505);
1196 1506
1197DEFINE_NFS4_INODE_EVENT(nfs4_layoutcommit); 1507DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutcommit);
1198DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn); 1508DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutreturn);
1199DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close); 1509DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close);
1200 1510
1511#define show_pnfs_update_layout_reason(reason) \
1512 __print_symbolic(reason, \
1513 { PNFS_UPDATE_LAYOUT_UNKNOWN, "unknown" }, \
1514 { PNFS_UPDATE_LAYOUT_NO_PNFS, "no pnfs" }, \
1515 { PNFS_UPDATE_LAYOUT_RD_ZEROLEN, "read+zerolen" }, \
1516 { PNFS_UPDATE_LAYOUT_MDSTHRESH, "mdsthresh" }, \
1517 { PNFS_UPDATE_LAYOUT_NOMEM, "nomem" }, \
1518 { PNFS_UPDATE_LAYOUT_BULK_RECALL, "bulk recall" }, \
1519 { PNFS_UPDATE_LAYOUT_IO_TEST_FAIL, "io test fail" }, \
1520 { PNFS_UPDATE_LAYOUT_FOUND_CACHED, "found cached" }, \
1521 { PNFS_UPDATE_LAYOUT_RETURN, "layoutreturn" }, \
1522 { PNFS_UPDATE_LAYOUT_BLOCKED, "layouts blocked" }, \
1523 { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" })
1524
1525TRACE_EVENT(pnfs_update_layout,
1526 TP_PROTO(struct inode *inode,
1527 loff_t pos,
1528 u64 count,
1529 enum pnfs_iomode iomode,
1530 struct pnfs_layout_hdr *lo,
1531 enum pnfs_update_layout_reason reason
1532 ),
1533 TP_ARGS(inode, pos, count, iomode, lo, reason),
1534 TP_STRUCT__entry(
1535 __field(dev_t, dev)
1536 __field(u64, fileid)
1537 __field(u32, fhandle)
1538 __field(loff_t, pos)
1539 __field(u64, count)
1540 __field(enum pnfs_iomode, iomode)
1541 __field(int, layoutstateid_seq)
1542 __field(u32, layoutstateid_hash)
1543 __field(enum pnfs_update_layout_reason, reason)
1544 ),
1545 TP_fast_assign(
1546 __entry->dev = inode->i_sb->s_dev;
1547 __entry->fileid = NFS_FILEID(inode);
1548 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
1549 __entry->pos = pos;
1550 __entry->count = count;
1551 __entry->iomode = iomode;
1552 __entry->reason = reason;
1553 if (lo != NULL) {
1554 __entry->layoutstateid_seq =
1555 be32_to_cpu(lo->plh_stateid.seqid);
1556 __entry->layoutstateid_hash =
1557 nfs_stateid_hash(&lo->plh_stateid);
1558 } else {
1559 __entry->layoutstateid_seq = 0;
1560 __entry->layoutstateid_hash = 0;
1561 }
1562 ),
1563 TP_printk(
1564 "fileid=%02x:%02x:%llu fhandle=0x%08x "
1565 "iomode=%s pos=%llu count=%llu "
1566 "layoutstateid=%d:0x%08x (%s)",
1567 MAJOR(__entry->dev), MINOR(__entry->dev),
1568 (unsigned long long)__entry->fileid,
1569 __entry->fhandle,
1570 show_pnfs_iomode(__entry->iomode),
1571 (unsigned long long)__entry->pos,
1572 (unsigned long long)__entry->count,
1573 __entry->layoutstateid_seq, __entry->layoutstateid_hash,
1574 show_pnfs_update_layout_reason(__entry->reason)
1575 )
1576);
1577
1201#endif /* CONFIG_NFS_V4_1 */ 1578#endif /* CONFIG_NFS_V4_1 */
1202 1579
1203#endif /* _TRACE_NFS4_H */ 1580#endif /* _TRACE_NFS4_H */
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 59f838cdc009..9f80a086b612 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -39,7 +39,6 @@
39 { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ 39 { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \
40 { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \ 40 { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
41 { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ 41 { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
42 { 1 << NFS_INO_COMMIT, "COMMIT" }, \
43 { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ 42 { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \
44 { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) 43 { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" })
45 44
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 452a011ba0d8..8ce4f61cbaa5 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -101,53 +101,18 @@ nfs_page_free(struct nfs_page *p)
101 kmem_cache_free(nfs_page_cachep, p); 101 kmem_cache_free(nfs_page_cachep, p);
102} 102}
103 103
104static void
105nfs_iocounter_inc(struct nfs_io_counter *c)
106{
107 atomic_inc(&c->io_count);
108}
109
110static void
111nfs_iocounter_dec(struct nfs_io_counter *c)
112{
113 if (atomic_dec_and_test(&c->io_count)) {
114 clear_bit(NFS_IO_INPROGRESS, &c->flags);
115 smp_mb__after_atomic();
116 wake_up_bit(&c->flags, NFS_IO_INPROGRESS);
117 }
118}
119
120static int
121__nfs_iocounter_wait(struct nfs_io_counter *c)
122{
123 wait_queue_head_t *wq = bit_waitqueue(&c->flags, NFS_IO_INPROGRESS);
124 DEFINE_WAIT_BIT(q, &c->flags, NFS_IO_INPROGRESS);
125 int ret = 0;
126
127 do {
128 prepare_to_wait(wq, &q.wait, TASK_KILLABLE);
129 set_bit(NFS_IO_INPROGRESS, &c->flags);
130 if (atomic_read(&c->io_count) == 0)
131 break;
132 ret = nfs_wait_bit_killable(&q.key, TASK_KILLABLE);
133 } while (atomic_read(&c->io_count) != 0 && !ret);
134 finish_wait(wq, &q.wait);
135 return ret;
136}
137
138/** 104/**
139 * nfs_iocounter_wait - wait for i/o to complete 105 * nfs_iocounter_wait - wait for i/o to complete
140 * @c: nfs_io_counter to use 106 * @l_ctx: nfs_lock_context with io_counter to use
141 * 107 *
142 * returns -ERESTARTSYS if interrupted by a fatal signal. 108 * returns -ERESTARTSYS if interrupted by a fatal signal.
143 * Otherwise returns 0 once the io_count hits 0. 109 * Otherwise returns 0 once the io_count hits 0.
144 */ 110 */
145int 111int
146nfs_iocounter_wait(struct nfs_io_counter *c) 112nfs_iocounter_wait(struct nfs_lock_context *l_ctx)
147{ 113{
148 if (atomic_read(&c->io_count) == 0) 114 return wait_on_atomic_t(&l_ctx->io_count, nfs_wait_atomic_killable,
149 return 0; 115 TASK_KILLABLE);
150 return __nfs_iocounter_wait(c);
151} 116}
152 117
153/* 118/*
@@ -370,7 +335,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
370 return ERR_CAST(l_ctx); 335 return ERR_CAST(l_ctx);
371 } 336 }
372 req->wb_lock_context = l_ctx; 337 req->wb_lock_context = l_ctx;
373 nfs_iocounter_inc(&l_ctx->io_count); 338 atomic_inc(&l_ctx->io_count);
374 339
375 /* Initialize the request struct. Initially, we assume a 340 /* Initialize the request struct. Initially, we assume a
376 * long write-back delay. This will be adjusted in 341 * long write-back delay. This will be adjusted in
@@ -431,7 +396,8 @@ static void nfs_clear_request(struct nfs_page *req)
431 req->wb_page = NULL; 396 req->wb_page = NULL;
432 } 397 }
433 if (l_ctx != NULL) { 398 if (l_ctx != NULL) {
434 nfs_iocounter_dec(&l_ctx->io_count); 399 if (atomic_dec_and_test(&l_ctx->io_count))
400 wake_up_atomic_t(&l_ctx->io_count);
435 nfs_put_lock_context(l_ctx); 401 nfs_put_lock_context(l_ctx);
436 req->wb_lock_context = NULL; 402 req->wb_lock_context = NULL;
437 } 403 }
@@ -664,22 +630,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
664 * @desc: IO descriptor 630 * @desc: IO descriptor
665 * @hdr: pageio header 631 * @hdr: pageio header
666 */ 632 */
667static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, 633static void nfs_pgio_error(struct nfs_pgio_header *hdr)
668 struct nfs_pgio_header *hdr)
669{ 634{
670 struct nfs_pgio_mirror *mirror;
671 u32 midx;
672
673 set_bit(NFS_IOHDR_REDO, &hdr->flags); 635 set_bit(NFS_IOHDR_REDO, &hdr->flags);
674 nfs_pgio_data_destroy(hdr); 636 nfs_pgio_data_destroy(hdr);
675 hdr->completion_ops->completion(hdr); 637 hdr->completion_ops->completion(hdr);
676 /* TODO: Make sure it's right to clean up all mirrors here
677 * and not just hdr->pgio_mirror_idx */
678 for (midx = 0; midx < desc->pg_mirror_count; midx++) {
679 mirror = &desc->pg_mirrors[midx];
680 desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
681 }
682 return -ENOMEM;
683} 638}
684 639
685/** 640/**
@@ -800,8 +755,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
800 unsigned int pagecount, pageused; 755 unsigned int pagecount, pageused;
801 756
802 pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); 757 pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
803 if (!nfs_pgarray_set(&hdr->page_array, pagecount)) 758 if (!nfs_pgarray_set(&hdr->page_array, pagecount)) {
804 return nfs_pgio_error(desc, hdr); 759 nfs_pgio_error(hdr);
760 desc->pg_error = -ENOMEM;
761 return desc->pg_error;
762 }
805 763
806 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); 764 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
807 pages = hdr->page_array.pagevec; 765 pages = hdr->page_array.pagevec;
@@ -819,8 +777,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
819 *pages++ = last_page = req->wb_page; 777 *pages++ = last_page = req->wb_page;
820 } 778 }
821 } 779 }
822 if (WARN_ON_ONCE(pageused != pagecount)) 780 if (WARN_ON_ONCE(pageused != pagecount)) {
823 return nfs_pgio_error(desc, hdr); 781 nfs_pgio_error(hdr);
782 desc->pg_error = -EINVAL;
783 return desc->pg_error;
784 }
824 785
825 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 786 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
826 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) 787 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
@@ -835,18 +796,13 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio);
835 796
836static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) 797static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
837{ 798{
838 struct nfs_pgio_mirror *mirror;
839 struct nfs_pgio_header *hdr; 799 struct nfs_pgio_header *hdr;
840 int ret; 800 int ret;
841 801
842 mirror = nfs_pgio_current_mirror(desc);
843
844 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 802 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
845 if (!hdr) { 803 if (!hdr) {
846 /* TODO: make sure this is right with mirroring - or 804 desc->pg_error = -ENOMEM;
847 * should it back out all mirrors? */ 805 return desc->pg_error;
848 desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
849 return -ENOMEM;
850 } 806 }
851 nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); 807 nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
852 ret = nfs_generic_pgio(desc, hdr); 808 ret = nfs_generic_pgio(desc, hdr);
@@ -874,6 +830,9 @@ static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio,
874 830
875 mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); 831 mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
876 832
833 if (pgio->pg_error < 0)
834 return pgio->pg_error;
835
877 if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) 836 if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX)
878 return -EINVAL; 837 return -EINVAL;
879 838
@@ -903,12 +862,6 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio)
903 pgio->pg_mirrors_dynamic = NULL; 862 pgio->pg_mirrors_dynamic = NULL;
904} 863}
905 864
906static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
907 const struct nfs_open_context *ctx2)
908{
909 return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
910}
911
912static bool nfs_match_lock_context(const struct nfs_lock_context *l1, 865static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
913 const struct nfs_lock_context *l2) 866 const struct nfs_lock_context *l2)
914{ 867{
@@ -982,6 +935,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
982 } else { 935 } else {
983 if (desc->pg_ops->pg_init) 936 if (desc->pg_ops->pg_init)
984 desc->pg_ops->pg_init(desc, req); 937 desc->pg_ops->pg_init(desc, req);
938 if (desc->pg_error < 0)
939 return 0;
985 mirror->pg_base = req->wb_pgbase; 940 mirror->pg_base = req->wb_pgbase;
986 } 941 }
987 if (!nfs_can_coalesce_requests(prev, req, desc)) 942 if (!nfs_can_coalesce_requests(prev, req, desc))
@@ -1147,6 +1102,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
1147 bytes = req->wb_bytes; 1102 bytes = req->wb_bytes;
1148 1103
1149 nfs_pageio_setup_mirroring(desc, req); 1104 nfs_pageio_setup_mirroring(desc, req);
1105 if (desc->pg_error < 0)
1106 goto out_failed;
1150 1107
1151 for (midx = 0; midx < desc->pg_mirror_count; midx++) { 1108 for (midx = 0; midx < desc->pg_mirror_count; midx++) {
1152 if (midx) { 1109 if (midx) {
@@ -1163,7 +1120,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
1163 1120
1164 if (IS_ERR(dupreq)) { 1121 if (IS_ERR(dupreq)) {
1165 nfs_page_group_unlock(req); 1122 nfs_page_group_unlock(req);
1166 return 0; 1123 desc->pg_error = PTR_ERR(dupreq);
1124 goto out_failed;
1167 } 1125 }
1168 1126
1169 nfs_lock_request(dupreq); 1127 nfs_lock_request(dupreq);
@@ -1176,10 +1134,32 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
1176 if (nfs_pgio_has_mirroring(desc)) 1134 if (nfs_pgio_has_mirroring(desc))
1177 desc->pg_mirror_idx = midx; 1135 desc->pg_mirror_idx = midx;
1178 if (!nfs_pageio_add_request_mirror(desc, dupreq)) 1136 if (!nfs_pageio_add_request_mirror(desc, dupreq))
1179 return 0; 1137 goto out_failed;
1180 } 1138 }
1181 1139
1182 return 1; 1140 return 1;
1141
1142out_failed:
1143 /*
1144 * We might have failed before sending any reqs over wire.
1145 * Clean up rest of the reqs in mirror pg_list.
1146 */
1147 if (desc->pg_error) {
1148 struct nfs_pgio_mirror *mirror;
1149 void (*func)(struct list_head *);
1150
1151 /* remember fatal errors */
1152 if (nfs_error_is_fatal(desc->pg_error))
1153 mapping_set_error(desc->pg_inode->i_mapping,
1154 desc->pg_error);
1155
1156 func = desc->pg_completion_ops->error_cleanup;
1157 for (midx = 0; midx < desc->pg_mirror_count; midx++) {
1158 mirror = &desc->pg_mirrors[midx];
1159 func(&mirror->pg_list);
1160 }
1161 }
1162 return 0;
1183} 1163}
1184 1164
1185/* 1165/*
@@ -1232,7 +1212,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
1232 nfs_pageio_complete(desc); 1212 nfs_pageio_complete(desc);
1233 if (!list_empty(&failed)) { 1213 if (!list_empty(&failed)) {
1234 list_move(&failed, &hdr->pages); 1214 list_move(&failed, &hdr->pages);
1235 return -EIO; 1215 return desc->pg_error < 0 ? desc->pg_error : -EIO;
1236 } 1216 }
1237 return 0; 1217 return 0;
1238} 1218}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bec0384499f7..a3592cc34a20 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -53,7 +53,7 @@ static DEFINE_SPINLOCK(pnfs_spinlock);
53static LIST_HEAD(pnfs_modules_tbl); 53static LIST_HEAD(pnfs_modules_tbl);
54 54
55static int 55static int
56pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, 56pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
57 enum pnfs_iomode iomode, bool sync); 57 enum pnfs_iomode iomode, bool sync);
58 58
59/* Return the registered pnfs layout driver module matching given id */ 59/* Return the registered pnfs layout driver module matching given id */
@@ -385,13 +385,13 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
385 enum pnfs_iomode iomode; 385 enum pnfs_iomode iomode;
386 bool send; 386 bool send;
387 387
388 stateid = lo->plh_stateid; 388 nfs4_stateid_copy(&stateid, &lo->plh_stateid);
389 iomode = lo->plh_return_iomode; 389 iomode = lo->plh_return_iomode;
390 send = pnfs_prepare_layoutreturn(lo); 390 send = pnfs_prepare_layoutreturn(lo);
391 spin_unlock(&inode->i_lock); 391 spin_unlock(&inode->i_lock);
392 if (send) { 392 if (send) {
393 /* Send an async layoutreturn so we dont deadlock */ 393 /* Send an async layoutreturn so we dont deadlock */
394 pnfs_send_layoutreturn(lo, stateid, iomode, false); 394 pnfs_send_layoutreturn(lo, &stateid, iomode, false);
395 } 395 }
396 } else 396 } else
397 spin_unlock(&inode->i_lock); 397 spin_unlock(&inode->i_lock);
@@ -566,10 +566,10 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
566int 566int
567pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 567pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
568 struct list_head *tmp_list, 568 struct list_head *tmp_list,
569 struct pnfs_layout_range *recall_range) 569 const struct pnfs_layout_range *recall_range)
570{ 570{
571 struct pnfs_layout_segment *lseg, *next; 571 struct pnfs_layout_segment *lseg, *next;
572 int invalid = 0, removed = 0; 572 int remaining = 0;
573 573
574 dprintk("%s:Begin lo %p\n", __func__, lo); 574 dprintk("%s:Begin lo %p\n", __func__, lo);
575 575
@@ -582,11 +582,11 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
582 "offset %llu length %llu\n", __func__, 582 "offset %llu length %llu\n", __func__,
583 lseg, lseg->pls_range.iomode, lseg->pls_range.offset, 583 lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
584 lseg->pls_range.length); 584 lseg->pls_range.length);
585 invalid++; 585 if (!mark_lseg_invalid(lseg, tmp_list))
586 removed += mark_lseg_invalid(lseg, tmp_list); 586 remaining++;
587 } 587 }
588 dprintk("%s:Return %i\n", __func__, invalid - removed); 588 dprintk("%s:Return %i\n", __func__, remaining);
589 return invalid - removed; 589 return remaining;
590} 590}
591 591
592/* note free_me must contain lsegs from a single layout_hdr */ 592/* note free_me must contain lsegs from a single layout_hdr */
@@ -618,7 +618,6 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
618 pnfs_get_layout_hdr(lo); 618 pnfs_get_layout_hdr(lo);
619 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); 619 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
620 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); 620 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
621 pnfs_clear_retry_layoutget(lo);
622 spin_unlock(&nfsi->vfs_inode.i_lock); 621 spin_unlock(&nfsi->vfs_inode.i_lock);
623 pnfs_free_lseg_list(&tmp_list); 622 pnfs_free_lseg_list(&tmp_list);
624 pnfs_put_layout_hdr(lo); 623 pnfs_put_layout_hdr(lo);
@@ -703,6 +702,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
703 ret = -EAGAIN; 702 ret = -EAGAIN;
704 spin_unlock(&inode->i_lock); 703 spin_unlock(&inode->i_lock);
705 pnfs_free_lseg_list(&lseg_list); 704 pnfs_free_lseg_list(&lseg_list);
705 /* Free all lsegs that are attached to commit buckets */
706 nfs_commit_inode(inode, 0);
706 pnfs_put_layout_hdr(lo); 707 pnfs_put_layout_hdr(lo);
707 iput(inode); 708 iput(inode);
708 } 709 }
@@ -826,7 +827,7 @@ pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo)
826 827
827int 828int
828pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, 829pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
829 struct pnfs_layout_range *range, 830 const struct pnfs_layout_range *range,
830 struct nfs4_state *open_state) 831 struct nfs4_state *open_state)
831{ 832{
832 int status = 0; 833 int status = 0;
@@ -861,7 +862,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
861static struct pnfs_layout_segment * 862static struct pnfs_layout_segment *
862send_layoutget(struct pnfs_layout_hdr *lo, 863send_layoutget(struct pnfs_layout_hdr *lo,
863 struct nfs_open_context *ctx, 864 struct nfs_open_context *ctx,
864 struct pnfs_layout_range *range, 865 const struct pnfs_layout_range *range,
865 gfp_t gfp_flags) 866 gfp_t gfp_flags)
866{ 867{
867 struct inode *ino = lo->plh_inode; 868 struct inode *ino = lo->plh_inode;
@@ -894,7 +895,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
894 lgp->args.minlength = i_size - range->offset; 895 lgp->args.minlength = i_size - range->offset;
895 } 896 }
896 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 897 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
897 lgp->args.range = *range; 898 pnfs_copy_range(&lgp->args.range, range);
898 lgp->args.type = server->pnfs_curr_ld->id; 899 lgp->args.type = server->pnfs_curr_ld->id;
899 lgp->args.inode = ino; 900 lgp->args.inode = ino;
900 lgp->args.ctx = get_nfs_open_context(ctx); 901 lgp->args.ctx = get_nfs_open_context(ctx);
@@ -904,17 +905,9 @@ send_layoutget(struct pnfs_layout_hdr *lo,
904 lseg = nfs4_proc_layoutget(lgp, gfp_flags); 905 lseg = nfs4_proc_layoutget(lgp, gfp_flags);
905 } while (lseg == ERR_PTR(-EAGAIN)); 906 } while (lseg == ERR_PTR(-EAGAIN));
906 907
907 if (IS_ERR(lseg)) { 908 if (IS_ERR(lseg) && !nfs_error_is_fatal(PTR_ERR(lseg)))
908 switch (PTR_ERR(lseg)) { 909 lseg = NULL;
909 case -ENOMEM: 910 else
910 case -ERESTARTSYS:
911 break;
912 default:
913 /* remember that LAYOUTGET failed and suspend trying */
914 pnfs_layout_io_set_failed(lo, range->iomode);
915 }
916 return NULL;
917 } else
918 pnfs_layout_clear_fail_bit(lo, 911 pnfs_layout_clear_fail_bit(lo,
919 pnfs_iomode_to_fail_bit(range->iomode)); 912 pnfs_iomode_to_fail_bit(range->iomode));
920 913
@@ -945,7 +938,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
945} 938}
946 939
947static int 940static int
948pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, 941pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
949 enum pnfs_iomode iomode, bool sync) 942 enum pnfs_iomode iomode, bool sync)
950{ 943{
951 struct inode *ino = lo->plh_inode; 944 struct inode *ino = lo->plh_inode;
@@ -962,7 +955,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
962 goto out; 955 goto out;
963 } 956 }
964 957
965 lrp->args.stateid = stateid; 958 nfs4_stateid_copy(&lrp->args.stateid, stateid);
966 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 959 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
967 lrp->args.inode = ino; 960 lrp->args.inode = ino;
968 lrp->args.range.iomode = iomode; 961 lrp->args.range.iomode = iomode;
@@ -1005,7 +998,7 @@ _pnfs_return_layout(struct inode *ino)
1005 dprintk("NFS: %s no layout to return\n", __func__); 998 dprintk("NFS: %s no layout to return\n", __func__);
1006 goto out; 999 goto out;
1007 } 1000 }
1008 stateid = nfsi->layout->plh_stateid; 1001 nfs4_stateid_copy(&stateid, &nfsi->layout->plh_stateid);
1009 /* Reference matched in nfs4_layoutreturn_release */ 1002 /* Reference matched in nfs4_layoutreturn_release */
1010 pnfs_get_layout_hdr(lo); 1003 pnfs_get_layout_hdr(lo);
1011 empty = list_empty(&lo->plh_segs); 1004 empty = list_empty(&lo->plh_segs);
@@ -1033,7 +1026,7 @@ _pnfs_return_layout(struct inode *ino)
1033 spin_unlock(&ino->i_lock); 1026 spin_unlock(&ino->i_lock);
1034 pnfs_free_lseg_list(&tmp_list); 1027 pnfs_free_lseg_list(&tmp_list);
1035 if (send) 1028 if (send)
1036 status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); 1029 status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
1037out_put_layout_hdr: 1030out_put_layout_hdr:
1038 pnfs_put_layout_hdr(lo); 1031 pnfs_put_layout_hdr(lo);
1039out: 1032out:
@@ -1096,13 +1089,12 @@ bool pnfs_roc(struct inode *ino)
1096 goto out_noroc; 1089 goto out_noroc;
1097 } 1090 }
1098 1091
1099 stateid = lo->plh_stateid; 1092 nfs4_stateid_copy(&stateid, &lo->plh_stateid);
1100 /* always send layoutreturn if being marked so */ 1093 /* always send layoutreturn if being marked so */
1101 if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1094 if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
1102 &lo->plh_flags)) 1095 &lo->plh_flags))
1103 layoutreturn = pnfs_prepare_layoutreturn(lo); 1096 layoutreturn = pnfs_prepare_layoutreturn(lo);
1104 1097
1105 pnfs_clear_retry_layoutget(lo);
1106 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) 1098 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
1107 /* If we are sending layoutreturn, invalidate all valid lsegs */ 1099 /* If we are sending layoutreturn, invalidate all valid lsegs */
1108 if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 1100 if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
@@ -1124,7 +1116,7 @@ out_noroc:
1124 pnfs_free_lseg_list(&tmp_list); 1116 pnfs_free_lseg_list(&tmp_list);
1125 pnfs_layoutcommit_inode(ino, true); 1117 pnfs_layoutcommit_inode(ino, true);
1126 if (layoutreturn) 1118 if (layoutreturn)
1127 pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); 1119 pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
1128 return roc; 1120 return roc;
1129} 1121}
1130 1122
@@ -1149,6 +1141,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
1149 1141
1150 spin_lock(&ino->i_lock); 1142 spin_lock(&ino->i_lock);
1151 lo = NFS_I(ino)->layout; 1143 lo = NFS_I(ino)->layout;
1144 pnfs_mark_layout_returned_if_empty(lo);
1152 if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) 1145 if (pnfs_seqid_is_newer(barrier, lo->plh_barrier))
1153 lo->plh_barrier = barrier; 1146 lo->plh_barrier = barrier;
1154 spin_unlock(&ino->i_lock); 1147 spin_unlock(&ino->i_lock);
@@ -1465,25 +1458,15 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
1465 return ret; 1458 return ret;
1466} 1459}
1467 1460
1468/* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */
1469static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key, int mode)
1470{
1471 if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags))
1472 return 1;
1473 return nfs_wait_bit_killable(key, mode);
1474}
1475
1476static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) 1461static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
1477{ 1462{
1478 if (!pnfs_should_retry_layoutget(lo))
1479 return false;
1480 /* 1463 /*
1481 * send layoutcommit as it can hold up layoutreturn due to lseg 1464 * send layoutcommit as it can hold up layoutreturn due to lseg
1482 * reference 1465 * reference
1483 */ 1466 */
1484 pnfs_layoutcommit_inode(lo->plh_inode, false); 1467 pnfs_layoutcommit_inode(lo->plh_inode, false);
1485 return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, 1468 return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
1486 pnfs_layoutget_retry_bit_wait, 1469 nfs_wait_bit_killable,
1487 TASK_UNINTERRUPTIBLE); 1470 TASK_UNINTERRUPTIBLE);
1488} 1471}
1489 1472
@@ -1520,14 +1503,23 @@ pnfs_update_layout(struct inode *ino,
1520 struct pnfs_layout_segment *lseg = NULL; 1503 struct pnfs_layout_segment *lseg = NULL;
1521 bool first; 1504 bool first;
1522 1505
1523 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 1506 if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
1507 trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
1508 PNFS_UPDATE_LAYOUT_NO_PNFS);
1524 goto out; 1509 goto out;
1510 }
1525 1511
1526 if (iomode == IOMODE_READ && i_size_read(ino) == 0) 1512 if (iomode == IOMODE_READ && i_size_read(ino) == 0) {
1513 trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
1514 PNFS_UPDATE_LAYOUT_RD_ZEROLEN);
1527 goto out; 1515 goto out;
1516 }
1528 1517
1529 if (pnfs_within_mdsthreshold(ctx, ino, iomode)) 1518 if (pnfs_within_mdsthreshold(ctx, ino, iomode)) {
1519 trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
1520 PNFS_UPDATE_LAYOUT_MDSTHRESH);
1530 goto out; 1521 goto out;
1522 }
1531 1523
1532lookup_again: 1524lookup_again:
1533 first = false; 1525 first = false;
@@ -1535,19 +1527,25 @@ lookup_again:
1535 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1527 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
1536 if (lo == NULL) { 1528 if (lo == NULL) {
1537 spin_unlock(&ino->i_lock); 1529 spin_unlock(&ino->i_lock);
1530 trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
1531 PNFS_UPDATE_LAYOUT_NOMEM);
1538 goto out; 1532 goto out;
1539 } 1533 }
1540 1534
1541 /* Do we even need to bother with this? */ 1535 /* Do we even need to bother with this? */
1542 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 1536 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
1537 trace_pnfs_update_layout(ino, pos, count, iomode, lo,
1538 PNFS_UPDATE_LAYOUT_BULK_RECALL);
1543 dprintk("%s matches recall, use MDS\n", __func__); 1539 dprintk("%s matches recall, use MDS\n", __func__);
1544 goto out_unlock; 1540 goto out_unlock;
1545 } 1541 }
1546 1542
1547 /* if LAYOUTGET already failed once we don't try again */ 1543 /* if LAYOUTGET already failed once we don't try again */
1548 if (pnfs_layout_io_test_failed(lo, iomode) && 1544 if (pnfs_layout_io_test_failed(lo, iomode)) {
1549 !pnfs_should_retry_layoutget(lo)) 1545 trace_pnfs_update_layout(ino, pos, count, iomode, lo,
1546 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
1550 goto out_unlock; 1547 goto out_unlock;
1548 }
1551 1549
1552 first = list_empty(&lo->plh_segs); 1550 first = list_empty(&lo->plh_segs);
1553 if (first) { 1551 if (first) {
@@ -1567,8 +1565,11 @@ lookup_again:
1567 * already exists 1565 * already exists
1568 */ 1566 */
1569 lseg = pnfs_find_lseg(lo, &arg); 1567 lseg = pnfs_find_lseg(lo, &arg);
1570 if (lseg) 1568 if (lseg) {
1569 trace_pnfs_update_layout(ino, pos, count, iomode, lo,
1570 PNFS_UPDATE_LAYOUT_FOUND_CACHED);
1571 goto out_unlock; 1571 goto out_unlock;
1572 }
1572 } 1573 }
1573 1574
1574 /* 1575 /*
@@ -1585,11 +1586,16 @@ lookup_again:
1585 dprintk("%s retrying\n", __func__); 1586 dprintk("%s retrying\n", __func__);
1586 goto lookup_again; 1587 goto lookup_again;
1587 } 1588 }
1589 trace_pnfs_update_layout(ino, pos, count, iomode, lo,
1590 PNFS_UPDATE_LAYOUT_RETURN);
1588 goto out_put_layout_hdr; 1591 goto out_put_layout_hdr;
1589 } 1592 }
1590 1593
1591 if (pnfs_layoutgets_blocked(lo)) 1594 if (pnfs_layoutgets_blocked(lo)) {
1595 trace_pnfs_update_layout(ino, pos, count, iomode, lo,
1596 PNFS_UPDATE_LAYOUT_BLOCKED);
1592 goto out_unlock; 1597 goto out_unlock;
1598 }
1593 atomic_inc(&lo->plh_outstanding); 1599 atomic_inc(&lo->plh_outstanding);
1594 spin_unlock(&ino->i_lock); 1600 spin_unlock(&ino->i_lock);
1595 1601
@@ -1612,8 +1618,9 @@ lookup_again:
1612 arg.length = PAGE_CACHE_ALIGN(arg.length); 1618 arg.length = PAGE_CACHE_ALIGN(arg.length);
1613 1619
1614 lseg = send_layoutget(lo, ctx, &arg, gfp_flags); 1620 lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
1615 pnfs_clear_retry_layoutget(lo);
1616 atomic_dec(&lo->plh_outstanding); 1621 atomic_dec(&lo->plh_outstanding);
1622 trace_pnfs_update_layout(ino, pos, count, iomode, lo,
1623 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
1617out_put_layout_hdr: 1624out_put_layout_hdr:
1618 if (first) 1625 if (first)
1619 pnfs_clear_first_layoutget(lo); 1626 pnfs_clear_first_layoutget(lo);
@@ -1623,7 +1630,7 @@ out:
1623 "(%s, offset: %llu, length: %llu)\n", 1630 "(%s, offset: %llu, length: %llu)\n",
1624 __func__, ino->i_sb->s_id, 1631 __func__, ino->i_sb->s_id,
1625 (unsigned long long)NFS_FILEID(ino), 1632 (unsigned long long)NFS_FILEID(ino),
1626 lseg == NULL ? "not found" : "found", 1633 IS_ERR_OR_NULL(lseg) ? "not found" : "found",
1627 iomode==IOMODE_RW ? "read/write" : "read-only", 1634 iomode==IOMODE_RW ? "read/write" : "read-only",
1628 (unsigned long long)pos, 1635 (unsigned long long)pos,
1629 (unsigned long long)count); 1636 (unsigned long long)count);
@@ -1730,16 +1737,29 @@ out_forget_reply:
1730} 1737}
1731 1738
1732static void 1739static void
1740pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode)
1741{
1742 if (lo->plh_return_iomode == iomode)
1743 return;
1744 if (lo->plh_return_iomode != 0)
1745 iomode = IOMODE_ANY;
1746 lo->plh_return_iomode = iomode;
1747}
1748
1749int
1733pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, 1750pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
1734 struct list_head *tmp_list, 1751 struct list_head *tmp_list,
1735 struct pnfs_layout_range *return_range) 1752 const struct pnfs_layout_range *return_range)
1736{ 1753{
1737 struct pnfs_layout_segment *lseg, *next; 1754 struct pnfs_layout_segment *lseg, *next;
1755 int remaining = 0;
1738 1756
1739 dprintk("%s:Begin lo %p\n", __func__, lo); 1757 dprintk("%s:Begin lo %p\n", __func__, lo);
1740 1758
1741 if (list_empty(&lo->plh_segs)) 1759 if (list_empty(&lo->plh_segs))
1742 return; 1760 return 0;
1761
1762 assert_spin_locked(&lo->plh_inode->i_lock);
1743 1763
1744 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 1764 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
1745 if (should_free_lseg(&lseg->pls_range, return_range)) { 1765 if (should_free_lseg(&lseg->pls_range, return_range)) {
@@ -1749,38 +1769,47 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
1749 lseg->pls_range.offset, 1769 lseg->pls_range.offset,
1750 lseg->pls_range.length); 1770 lseg->pls_range.length);
1751 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 1771 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
1752 mark_lseg_invalid(lseg, tmp_list); 1772 pnfs_set_plh_return_iomode(lo, return_range->iomode);
1773 if (!mark_lseg_invalid(lseg, tmp_list))
1774 remaining++;
1753 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1775 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
1754 &lo->plh_flags); 1776 &lo->plh_flags);
1755 } 1777 }
1778 return remaining;
1756} 1779}
1757 1780
1758void pnfs_error_mark_layout_for_return(struct inode *inode, 1781void pnfs_error_mark_layout_for_return(struct inode *inode,
1759 struct pnfs_layout_segment *lseg) 1782 struct pnfs_layout_segment *lseg)
1760{ 1783{
1761 struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; 1784 struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
1762 int iomode = pnfs_iomode_to_fail_bit(lseg->pls_range.iomode);
1763 struct pnfs_layout_range range = { 1785 struct pnfs_layout_range range = {
1764 .iomode = lseg->pls_range.iomode, 1786 .iomode = lseg->pls_range.iomode,
1765 .offset = 0, 1787 .offset = 0,
1766 .length = NFS4_MAX_UINT64, 1788 .length = NFS4_MAX_UINT64,
1767 }; 1789 };
1768 LIST_HEAD(free_me); 1790 LIST_HEAD(free_me);
1791 bool return_now = false;
1769 1792
1770 spin_lock(&inode->i_lock); 1793 spin_lock(&inode->i_lock);
1771 /* set failure bit so that pnfs path will be retried later */ 1794 pnfs_set_plh_return_iomode(lo, range.iomode);
1772 pnfs_layout_set_fail_bit(lo, iomode);
1773 if (lo->plh_return_iomode == 0)
1774 lo->plh_return_iomode = range.iomode;
1775 else if (lo->plh_return_iomode != range.iomode)
1776 lo->plh_return_iomode = IOMODE_ANY;
1777 /* 1795 /*
1778 * mark all matching lsegs so that we are sure to have no live 1796 * mark all matching lsegs so that we are sure to have no live
1779 * segments at hand when sending layoutreturn. See pnfs_put_lseg() 1797 * segments at hand when sending layoutreturn. See pnfs_put_lseg()
1780 * for how it works. 1798 * for how it works.
1781 */ 1799 */
1782 pnfs_mark_matching_lsegs_return(lo, &free_me, &range); 1800 if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range)) {
1783 spin_unlock(&inode->i_lock); 1801 nfs4_stateid stateid;
1802 enum pnfs_iomode iomode = lo->plh_return_iomode;
1803
1804 nfs4_stateid_copy(&stateid, &lo->plh_stateid);
1805 return_now = pnfs_prepare_layoutreturn(lo);
1806 spin_unlock(&inode->i_lock);
1807 if (return_now)
1808 pnfs_send_layoutreturn(lo, &stateid, iomode, false);
1809 } else {
1810 spin_unlock(&inode->i_lock);
1811 nfs_commit_inode(inode, 0);
1812 }
1784 pnfs_free_lseg_list(&free_me); 1813 pnfs_free_lseg_list(&free_me);
1785} 1814}
1786EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); 1815EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
@@ -1802,6 +1831,11 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
1802 rd_size, 1831 rd_size,
1803 IOMODE_READ, 1832 IOMODE_READ,
1804 GFP_KERNEL); 1833 GFP_KERNEL);
1834 if (IS_ERR(pgio->pg_lseg)) {
1835 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
1836 pgio->pg_lseg = NULL;
1837 return;
1838 }
1805 } 1839 }
1806 /* If no lseg, fall back to read through mds */ 1840 /* If no lseg, fall back to read through mds */
1807 if (pgio->pg_lseg == NULL) 1841 if (pgio->pg_lseg == NULL)
@@ -1814,13 +1848,19 @@ void
1814pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 1848pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
1815 struct nfs_page *req, u64 wb_size) 1849 struct nfs_page *req, u64 wb_size)
1816{ 1850{
1817 if (pgio->pg_lseg == NULL) 1851 if (pgio->pg_lseg == NULL) {
1818 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1852 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1819 req->wb_context, 1853 req->wb_context,
1820 req_offset(req), 1854 req_offset(req),
1821 wb_size, 1855 wb_size,
1822 IOMODE_RW, 1856 IOMODE_RW,
1823 GFP_NOFS); 1857 GFP_NOFS);
1858 if (IS_ERR(pgio->pg_lseg)) {
1859 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
1860 pgio->pg_lseg = NULL;
1861 return;
1862 }
1863 }
1824 /* If no lseg, fall back to write through mds */ 1864 /* If no lseg, fall back to write through mds */
1825 if (pgio->pg_lseg == NULL) 1865 if (pgio->pg_lseg == NULL)
1826 nfs_pageio_reset_write_mds(pgio); 1866 nfs_pageio_reset_write_mds(pgio);
@@ -1988,15 +2028,13 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1988int 2028int
1989pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 2029pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1990{ 2030{
1991 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
1992
1993 struct nfs_pgio_header *hdr; 2031 struct nfs_pgio_header *hdr;
1994 int ret; 2032 int ret;
1995 2033
1996 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2034 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
1997 if (!hdr) { 2035 if (!hdr) {
1998 desc->pg_completion_ops->error_cleanup(&mirror->pg_list); 2036 desc->pg_error = -ENOMEM;
1999 return -ENOMEM; 2037 return desc->pg_error;
2000 } 2038 }
2001 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 2039 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
2002 2040
@@ -2119,15 +2157,13 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
2119int 2157int
2120pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 2158pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
2121{ 2159{
2122 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
2123
2124 struct nfs_pgio_header *hdr; 2160 struct nfs_pgio_header *hdr;
2125 int ret; 2161 int ret;
2126 2162
2127 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2163 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
2128 if (!hdr) { 2164 if (!hdr) {
2129 desc->pg_completion_ops->error_cleanup(&mirror->pg_list); 2165 desc->pg_error = -ENOMEM;
2130 return -ENOMEM; 2166 return desc->pg_error;
2131 } 2167 }
2132 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 2168 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
2133 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2169 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index d1990e90e7a0..9f4e2a47f4aa 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -98,7 +98,6 @@ enum {
98 NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */ 98 NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */
99 NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ 99 NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */
100 NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ 100 NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
101 NFS_LAYOUT_RETRY_LAYOUTGET, /* Retry layoutget */
102}; 101};
103 102
104enum layoutdriver_policy_flags { 103enum layoutdriver_policy_flags {
@@ -261,11 +260,14 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
261 bool update_barrier); 260 bool update_barrier);
262int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, 261int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
263 struct pnfs_layout_hdr *lo, 262 struct pnfs_layout_hdr *lo,
264 struct pnfs_layout_range *range, 263 const struct pnfs_layout_range *range,
265 struct nfs4_state *open_state); 264 struct nfs4_state *open_state);
266int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 265int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
267 struct list_head *tmp_list, 266 struct list_head *tmp_list,
268 struct pnfs_layout_range *recall_range); 267 const struct pnfs_layout_range *recall_range);
268int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
269 struct list_head *tmp_list,
270 const struct pnfs_layout_range *recall_range);
269bool pnfs_roc(struct inode *ino); 271bool pnfs_roc(struct inode *ino);
270void pnfs_roc_release(struct inode *ino); 272void pnfs_roc_release(struct inode *ino);
271void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 273void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
@@ -379,26 +381,6 @@ nfs4_get_deviceid(struct nfs4_deviceid_node *d)
379 return d; 381 return d;
380} 382}
381 383
382static inline void pnfs_set_retry_layoutget(struct pnfs_layout_hdr *lo)
383{
384 if (!test_and_set_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags))
385 atomic_inc(&lo->plh_refcount);
386}
387
388static inline void pnfs_clear_retry_layoutget(struct pnfs_layout_hdr *lo)
389{
390 if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) {
391 atomic_dec(&lo->plh_refcount);
392 /* wake up waiters for LAYOUTRETURN as that is not needed */
393 wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
394 }
395}
396
397static inline bool pnfs_should_retry_layoutget(struct pnfs_layout_hdr *lo)
398{
399 return test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags);
400}
401
402static inline struct pnfs_layout_segment * 384static inline struct pnfs_layout_segment *
403pnfs_get_lseg(struct pnfs_layout_segment *lseg) 385pnfs_get_lseg(struct pnfs_layout_segment *lseg)
404{ 386{
@@ -409,6 +391,12 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg)
409 return lseg; 391 return lseg;
410} 392}
411 393
394static inline bool
395pnfs_is_valid_lseg(struct pnfs_layout_segment *lseg)
396{
397 return test_bit(NFS_LSEG_VALID, &lseg->pls_flags) != 0;
398}
399
412/* Return true if a layout driver is being used for this mountpoint */ 400/* Return true if a layout driver is being used for this mountpoint */
413static inline int pnfs_enabled_sb(struct nfs_server *nfss) 401static inline int pnfs_enabled_sb(struct nfs_server *nfss)
414{ 402{
@@ -556,6 +544,26 @@ pnfs_calc_offset_length(u64 offset, u64 end)
556 return 1 + end - offset; 544 return 1 + end - offset;
557} 545}
558 546
547/**
548 * pnfs_mark_layout_returned_if_empty - marks the layout as returned
549 * @lo: layout header
550 *
551 * Note: Caller must hold inode->i_lock
552 */
553static inline void
554pnfs_mark_layout_returned_if_empty(struct pnfs_layout_hdr *lo)
555{
556 if (list_empty(&lo->plh_segs))
557 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
558}
559
560static inline void
561pnfs_copy_range(struct pnfs_layout_range *dst,
562 const struct pnfs_layout_range *src)
563{
564 memcpy(dst, src, sizeof(*dst));
565}
566
559extern unsigned int layoutstats_timer; 567extern unsigned int layoutstats_timer;
560 568
561#ifdef NFS_DEBUG 569#ifdef NFS_DEBUG
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 24655b807d44..81ac6480f9e7 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -266,17 +266,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
266 } else { 266 } else {
267 nfs_retry_commit(mds_pages, NULL, cinfo, 0); 267 nfs_retry_commit(mds_pages, NULL, cinfo, 0);
268 pnfs_generic_retry_commit(cinfo, 0); 268 pnfs_generic_retry_commit(cinfo, 0);
269 cinfo->completion_ops->error_cleanup(NFS_I(inode));
270 return -ENOMEM; 269 return -ENOMEM;
271 } 270 }
272 } 271 }
273 272
274 nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); 273 nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);
275 274
276 if (nreq == 0) { 275 if (nreq == 0)
277 cinfo->completion_ops->error_cleanup(NFS_I(inode));
278 goto out; 276 goto out;
279 }
280 277
281 atomic_add(nreq, &cinfo->mds->rpcs_out); 278 atomic_add(nreq, &cinfo->mds->rpcs_out);
282 279
@@ -871,6 +868,11 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
871 buckets = cinfo->ds->buckets; 868 buckets = cinfo->ds->buckets;
872 list = &buckets[ds_commit_idx].written; 869 list = &buckets[ds_commit_idx].written;
873 if (list_empty(list)) { 870 if (list_empty(list)) {
871 if (!pnfs_is_valid_lseg(lseg)) {
872 spin_unlock(cinfo->lock);
873 cinfo->completion_ops->resched_write(cinfo, req);
874 return;
875 }
874 /* Non-empty buckets hold a reference on the lseg. That ref 876 /* Non-empty buckets hold a reference on the lseg. That ref
875 * is normally transferred to the COMMIT call and released 877 * is normally transferred to the COMMIT call and released
876 * there. It could also be released if the last req is pulled 878 * there. It could also be released if the last req is pulled
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0a5e33f33b5c..eb31e23e7def 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -85,6 +85,23 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
85} 85}
86EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 86EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
87 87
88static void nfs_readpage_release(struct nfs_page *req)
89{
90 struct inode *inode = d_inode(req->wb_context->dentry);
91
92 dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
93 (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
94 (long long)req_offset(req));
95
96 if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
97 if (PageUptodate(req->wb_page))
98 nfs_readpage_to_fscache(inode, req->wb_page, 0);
99
100 unlock_page(req->wb_page);
101 }
102 nfs_release_request(req);
103}
104
88int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 105int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
89 struct page *page) 106 struct page *page)
90{ 107{
@@ -106,7 +123,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
106 123
107 nfs_pageio_init_read(&pgio, inode, false, 124 nfs_pageio_init_read(&pgio, inode, false,
108 &nfs_async_read_completion_ops); 125 &nfs_async_read_completion_ops);
109 nfs_pageio_add_request(&pgio, new); 126 if (!nfs_pageio_add_request(&pgio, new)) {
127 nfs_list_remove_request(new);
128 nfs_readpage_release(new);
129 }
110 nfs_pageio_complete(&pgio); 130 nfs_pageio_complete(&pgio);
111 131
112 /* It doesn't make sense to do mirrored reads! */ 132 /* It doesn't make sense to do mirrored reads! */
@@ -115,24 +135,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
115 pgm = &pgio.pg_mirrors[0]; 135 pgm = &pgio.pg_mirrors[0];
116 NFS_I(inode)->read_io += pgm->pg_bytes_written; 136 NFS_I(inode)->read_io += pgm->pg_bytes_written;
117 137
118 return 0; 138 return pgio.pg_error < 0 ? pgio.pg_error : 0;
119}
120
121static void nfs_readpage_release(struct nfs_page *req)
122{
123 struct inode *inode = d_inode(req->wb_context->dentry);
124
125 dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
126 (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
127 (long long)req_offset(req));
128
129 if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
130 if (PageUptodate(req->wb_page))
131 nfs_readpage_to_fscache(inode, req->wb_page, 0);
132
133 unlock_page(req->wb_page);
134 }
135 nfs_release_request(req);
136} 139}
137 140
138static void nfs_page_group_set_uptodate(struct nfs_page *req) 141static void nfs_page_group_set_uptodate(struct nfs_page *req)
@@ -361,6 +364,8 @@ readpage_async_filler(void *data, struct page *page)
361 if (len < PAGE_CACHE_SIZE) 364 if (len < PAGE_CACHE_SIZE)
362 zero_user_segment(page, len, PAGE_CACHE_SIZE); 365 zero_user_segment(page, len, PAGE_CACHE_SIZE);
363 if (!nfs_pageio_add_request(desc->pgio, new)) { 366 if (!nfs_pageio_add_request(desc->pgio, new)) {
367 nfs_list_remove_request(new);
368 nfs_readpage_release(new);
364 error = desc->pgio->pg_error; 369 error = desc->pgio->pg_error;
365 goto out_unlock; 370 goto out_unlock;
366 } 371 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7b9316406930..ce43cd6d88c6 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -21,6 +21,8 @@
21#include <linux/nfs_page.h> 21#include <linux/nfs_page.h>
22#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
23#include <linux/export.h> 23#include <linux/export.h>
24#include <linux/freezer.h>
25#include <linux/wait.h>
24 26
25#include <asm/uaccess.h> 27#include <asm/uaccess.h>
26 28
@@ -244,11 +246,9 @@ static int wb_priority(struct writeback_control *wbc)
244{ 246{
245 int ret = 0; 247 int ret = 0;
246 if (wbc->for_reclaim) 248 if (wbc->for_reclaim)
247 return FLUSH_HIGHPRI | FLUSH_STABLE; 249 return FLUSH_HIGHPRI | FLUSH_COND_STABLE;
248 if (wbc->sync_mode == WB_SYNC_ALL) 250 if (wbc->sync_mode == WB_SYNC_ALL)
249 ret = FLUSH_COND_STABLE; 251 ret = FLUSH_COND_STABLE;
250 if (wbc->for_kupdate || wbc->for_background)
251 ret |= FLUSH_LOWPRI;
252 return ret; 252 return ret;
253} 253}
254 254
@@ -545,12 +545,22 @@ try_again:
545 return head; 545 return head;
546} 546}
547 547
548static void nfs_write_error_remove_page(struct nfs_page *req)
549{
550 nfs_unlock_request(req);
551 nfs_end_page_writeback(req);
552 nfs_release_request(req);
553 generic_error_remove_page(page_file_mapping(req->wb_page),
554 req->wb_page);
555}
556
548/* 557/*
549 * Find an associated nfs write request, and prepare to flush it out 558 * Find an associated nfs write request, and prepare to flush it out
550 * May return an error if the user signalled nfs_wait_on_request(). 559 * May return an error if the user signalled nfs_wait_on_request().
551 */ 560 */
552static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 561static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
553 struct page *page, bool nonblock) 562 struct page *page, bool nonblock,
563 bool launder)
554{ 564{
555 struct nfs_page *req; 565 struct nfs_page *req;
556 int ret = 0; 566 int ret = 0;
@@ -567,8 +577,21 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
567 577
568 ret = 0; 578 ret = 0;
569 if (!nfs_pageio_add_request(pgio, req)) { 579 if (!nfs_pageio_add_request(pgio, req)) {
570 nfs_redirty_request(req);
571 ret = pgio->pg_error; 580 ret = pgio->pg_error;
581 /*
582 * Remove the problematic req upon fatal errors
583 * in launder case, while other dirty pages can
584 * still be around until they get flushed.
585 */
586 if (nfs_error_is_fatal(ret)) {
587 nfs_context_set_write_error(req->wb_context, ret);
588 if (launder) {
589 nfs_write_error_remove_page(req);
590 goto out;
591 }
592 }
593 nfs_redirty_request(req);
594 ret = -EAGAIN;
572 } else 595 } else
573 nfs_add_stats(page_file_mapping(page)->host, 596 nfs_add_stats(page_file_mapping(page)->host,
574 NFSIOS_WRITEPAGES, 1); 597 NFSIOS_WRITEPAGES, 1);
@@ -576,12 +599,14 @@ out:
576 return ret; 599 return ret;
577} 600}
578 601
579static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) 602static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
603 struct nfs_pageio_descriptor *pgio, bool launder)
580{ 604{
581 int ret; 605 int ret;
582 606
583 nfs_pageio_cond_complete(pgio, page_file_index(page)); 607 nfs_pageio_cond_complete(pgio, page_file_index(page));
584 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); 608 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE,
609 launder);
585 if (ret == -EAGAIN) { 610 if (ret == -EAGAIN) {
586 redirty_page_for_writepage(wbc, page); 611 redirty_page_for_writepage(wbc, page);
587 ret = 0; 612 ret = 0;
@@ -592,7 +617,9 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
592/* 617/*
593 * Write an mmapped page to the server. 618 * Write an mmapped page to the server.
594 */ 619 */
595static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) 620static int nfs_writepage_locked(struct page *page,
621 struct writeback_control *wbc,
622 bool launder)
596{ 623{
597 struct nfs_pageio_descriptor pgio; 624 struct nfs_pageio_descriptor pgio;
598 struct inode *inode = page_file_mapping(page)->host; 625 struct inode *inode = page_file_mapping(page)->host;
@@ -601,7 +628,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
601 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 628 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
602 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), 629 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
603 false, &nfs_async_write_completion_ops); 630 false, &nfs_async_write_completion_ops);
604 err = nfs_do_writepage(page, wbc, &pgio); 631 err = nfs_do_writepage(page, wbc, &pgio, launder);
605 nfs_pageio_complete(&pgio); 632 nfs_pageio_complete(&pgio);
606 if (err < 0) 633 if (err < 0)
607 return err; 634 return err;
@@ -614,7 +641,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
614{ 641{
615 int ret; 642 int ret;
616 643
617 ret = nfs_writepage_locked(page, wbc); 644 ret = nfs_writepage_locked(page, wbc, false);
618 unlock_page(page); 645 unlock_page(page);
619 return ret; 646 return ret;
620} 647}
@@ -623,7 +650,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
623{ 650{
624 int ret; 651 int ret;
625 652
626 ret = nfs_do_writepage(page, wbc, data); 653 ret = nfs_do_writepage(page, wbc, data, false);
627 unlock_page(page); 654 unlock_page(page);
628 return ret; 655 return ret;
629} 656}
@@ -1128,7 +1155,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
1128 if (req == NULL) 1155 if (req == NULL)
1129 return 0; 1156 return 0;
1130 l_ctx = req->wb_lock_context; 1157 l_ctx = req->wb_lock_context;
1131 do_flush = req->wb_page != page || req->wb_context != ctx; 1158 do_flush = req->wb_page != page ||
1159 !nfs_match_open_context(req->wb_context, ctx);
1132 /* for now, flush if more than 1 request in page_group */ 1160 /* for now, flush if more than 1 request in page_group */
1133 do_flush |= req->wb_this_page != req; 1161 do_flush |= req->wb_this_page != req;
1134 if (l_ctx && flctx && 1162 if (l_ctx && flctx &&
@@ -1326,9 +1354,15 @@ static void nfs_async_write_error(struct list_head *head)
1326 } 1354 }
1327} 1355}
1328 1356
1357static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
1358{
1359 nfs_async_write_error(&hdr->pages);
1360}
1361
1329static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { 1362static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
1330 .error_cleanup = nfs_async_write_error, 1363 .error_cleanup = nfs_async_write_error,
1331 .completion = nfs_write_completion, 1364 .completion = nfs_write_completion,
1365 .reschedule_io = nfs_async_write_reschedule_io,
1332}; 1366};
1333 1367
1334void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1368void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -1529,27 +1563,21 @@ static void nfs_writeback_result(struct rpc_task *task,
1529 } 1563 }
1530} 1564}
1531 1565
1532 1566static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
1533static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1534{ 1567{
1535 int ret; 1568 return wait_on_atomic_t(&cinfo->rpcs_out,
1569 nfs_wait_atomic_killable, TASK_KILLABLE);
1570}
1536 1571
1537 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) 1572static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
1538 return 1; 1573{
1539 if (!may_wait) 1574 atomic_inc(&cinfo->rpcs_out);
1540 return 0;
1541 ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
1542 NFS_INO_COMMIT,
1543 nfs_wait_bit_killable,
1544 TASK_KILLABLE);
1545 return (ret < 0) ? ret : 1;
1546} 1575}
1547 1576
1548static void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1577static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
1549{ 1578{
1550 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1579 if (atomic_dec_and_test(&cinfo->rpcs_out))
1551 smp_mb__after_atomic(); 1580 wake_up_atomic_t(&cinfo->rpcs_out);
1552 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1553} 1581}
1554 1582
1555void nfs_commitdata_release(struct nfs_commit_data *data) 1583void nfs_commitdata_release(struct nfs_commit_data *data)
@@ -1666,6 +1694,13 @@ void nfs_retry_commit(struct list_head *page_list,
1666} 1694}
1667EXPORT_SYMBOL_GPL(nfs_retry_commit); 1695EXPORT_SYMBOL_GPL(nfs_retry_commit);
1668 1696
1697static void
1698nfs_commit_resched_write(struct nfs_commit_info *cinfo,
1699 struct nfs_page *req)
1700{
1701 __set_page_dirty_nobuffers(req->wb_page);
1702}
1703
1669/* 1704/*
1670 * Commit dirty pages 1705 * Commit dirty pages
1671 */ 1706 */
@@ -1687,7 +1722,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
1687 data->mds_ops, how, 0); 1722 data->mds_ops, how, 0);
1688 out_bad: 1723 out_bad:
1689 nfs_retry_commit(head, NULL, cinfo, 0); 1724 nfs_retry_commit(head, NULL, cinfo, 0);
1690 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1691 return -ENOMEM; 1725 return -ENOMEM;
1692} 1726}
1693 1727
@@ -1749,8 +1783,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
1749 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 1783 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
1750 1784
1751 nfs_init_cinfo(&cinfo, data->inode, data->dreq); 1785 nfs_init_cinfo(&cinfo, data->inode, data->dreq);
1752 if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) 1786 nfs_commit_end(cinfo.mds);
1753 nfs_commit_clear_lock(NFS_I(data->inode));
1754} 1787}
1755 1788
1756static void nfs_commit_release(void *calldata) 1789static void nfs_commit_release(void *calldata)
@@ -1769,7 +1802,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
1769 1802
1770static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { 1803static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
1771 .completion = nfs_commit_release_pages, 1804 .completion = nfs_commit_release_pages,
1772 .error_cleanup = nfs_commit_clear_lock, 1805 .resched_write = nfs_commit_resched_write,
1773}; 1806};
1774 1807
1775int nfs_generic_commit_list(struct inode *inode, struct list_head *head, 1808int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
@@ -1788,30 +1821,25 @@ int nfs_commit_inode(struct inode *inode, int how)
1788 LIST_HEAD(head); 1821 LIST_HEAD(head);
1789 struct nfs_commit_info cinfo; 1822 struct nfs_commit_info cinfo;
1790 int may_wait = how & FLUSH_SYNC; 1823 int may_wait = how & FLUSH_SYNC;
1824 int error = 0;
1791 int res; 1825 int res;
1792 1826
1793 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1794 if (res <= 0)
1795 goto out_mark_dirty;
1796 nfs_init_cinfo_from_inode(&cinfo, inode); 1827 nfs_init_cinfo_from_inode(&cinfo, inode);
1828 nfs_commit_begin(cinfo.mds);
1797 res = nfs_scan_commit(inode, &head, &cinfo); 1829 res = nfs_scan_commit(inode, &head, &cinfo);
1798 if (res) { 1830 if (res)
1799 int error;
1800
1801 error = nfs_generic_commit_list(inode, &head, how, &cinfo); 1831 error = nfs_generic_commit_list(inode, &head, how, &cinfo);
1802 if (error < 0) 1832 nfs_commit_end(cinfo.mds);
1803 return error; 1833 if (error < 0)
1804 if (!may_wait) 1834 goto out_error;
1805 goto out_mark_dirty; 1835 if (!may_wait)
1806 error = wait_on_bit_action(&NFS_I(inode)->flags, 1836 goto out_mark_dirty;
1807 NFS_INO_COMMIT, 1837 error = wait_on_commit(cinfo.mds);
1808 nfs_wait_bit_killable, 1838 if (error < 0)
1809 TASK_KILLABLE); 1839 return error;
1810 if (error < 0)
1811 return error;
1812 } else
1813 nfs_commit_clear_lock(NFS_I(inode));
1814 return res; 1840 return res;
1841out_error:
1842 res = error;
1815 /* Note: If we exit without ensuring that the commit is complete, 1843 /* Note: If we exit without ensuring that the commit is complete,
1816 * we must mark the inode as dirty. Otherwise, future calls to 1844 * we must mark the inode as dirty. Otherwise, future calls to
1817 * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure 1845 * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
@@ -1821,6 +1849,7 @@ out_mark_dirty:
1821 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1849 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1822 return res; 1850 return res;
1823} 1851}
1852EXPORT_SYMBOL_GPL(nfs_commit_inode);
1824 1853
1825int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1854int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1826{ 1855{
@@ -1911,7 +1940,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1911/* 1940/*
1912 * Write back all requests on one page - we do this before reading it. 1941 * Write back all requests on one page - we do this before reading it.
1913 */ 1942 */
1914int nfs_wb_page(struct inode *inode, struct page *page) 1943int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
1915{ 1944{
1916 loff_t range_start = page_file_offset(page); 1945 loff_t range_start = page_file_offset(page);
1917 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); 1946 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
@@ -1928,7 +1957,7 @@ int nfs_wb_page(struct inode *inode, struct page *page)
1928 for (;;) { 1957 for (;;) {
1929 wait_on_page_writeback(page); 1958 wait_on_page_writeback(page);
1930 if (clear_page_dirty_for_io(page)) { 1959 if (clear_page_dirty_for_io(page)) {
1931 ret = nfs_writepage_locked(page, &wbc); 1960 ret = nfs_writepage_locked(page, &wbc, launder);
1932 if (ret < 0) 1961 if (ret < 0)
1933 goto out_error; 1962 goto out_error;
1934 continue; 1963 continue;
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 43aeabd4b968..d6f9b4e6006d 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -592,4 +592,18 @@ enum data_content4 {
592 NFS4_CONTENT_HOLE = 1, 592 NFS4_CONTENT_HOLE = 1,
593}; 593};
594 594
595enum pnfs_update_layout_reason {
596 PNFS_UPDATE_LAYOUT_UNKNOWN = 0,
597 PNFS_UPDATE_LAYOUT_NO_PNFS,
598 PNFS_UPDATE_LAYOUT_RD_ZEROLEN,
599 PNFS_UPDATE_LAYOUT_MDSTHRESH,
600 PNFS_UPDATE_LAYOUT_NOMEM,
601 PNFS_UPDATE_LAYOUT_BULK_RECALL,
602 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL,
603 PNFS_UPDATE_LAYOUT_FOUND_CACHED,
604 PNFS_UPDATE_LAYOUT_RETURN,
605 PNFS_UPDATE_LAYOUT_BLOCKED,
606 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET,
607};
608
595#endif 609#endif
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 37a3d2981352..48e0320cd643 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -60,18 +60,12 @@ struct nfs_lockowner {
60 pid_t l_pid; 60 pid_t l_pid;
61}; 61};
62 62
63#define NFS_IO_INPROGRESS 0
64struct nfs_io_counter {
65 unsigned long flags;
66 atomic_t io_count;
67};
68
69struct nfs_lock_context { 63struct nfs_lock_context {
70 atomic_t count; 64 atomic_t count;
71 struct list_head list; 65 struct list_head list;
72 struct nfs_open_context *open_context; 66 struct nfs_open_context *open_context;
73 struct nfs_lockowner lockowner; 67 struct nfs_lockowner lockowner;
74 struct nfs_io_counter io_count; 68 atomic_t io_count;
75}; 69};
76 70
77struct nfs4_state; 71struct nfs4_state;
@@ -216,7 +210,6 @@ struct nfs_inode {
216#define NFS_INO_FLUSHING (4) /* inode is flushing out data */ 210#define NFS_INO_FLUSHING (4) /* inode is flushing out data */
217#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ 211#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */
218#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ 212#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */
219#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */
220#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ 213#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
221#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ 214#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */
222#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ 215#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */
@@ -518,13 +511,25 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
518 */ 511 */
519extern int nfs_sync_inode(struct inode *inode); 512extern int nfs_sync_inode(struct inode *inode);
520extern int nfs_wb_all(struct inode *inode); 513extern int nfs_wb_all(struct inode *inode);
521extern int nfs_wb_page(struct inode *inode, struct page* page); 514extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder);
522extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); 515extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
523extern int nfs_commit_inode(struct inode *, int); 516extern int nfs_commit_inode(struct inode *, int);
524extern struct nfs_commit_data *nfs_commitdata_alloc(void); 517extern struct nfs_commit_data *nfs_commitdata_alloc(void);
525extern void nfs_commit_free(struct nfs_commit_data *data); 518extern void nfs_commit_free(struct nfs_commit_data *data);
526 519
527static inline int 520static inline int
521nfs_wb_launder_page(struct inode *inode, struct page *page)
522{
523 return nfs_wb_single_page(inode, page, true);
524}
525
526static inline int
527nfs_wb_page(struct inode *inode, struct page *page)
528{
529 return nfs_wb_single_page(inode, page, false);
530}
531
532static inline int
528nfs_have_writebacks(struct inode *inode) 533nfs_have_writebacks(struct inode *inode)
529{ 534{
530 return NFS_I(inode)->nrequests != 0; 535 return NFS_I(inode)->nrequests != 0;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 2469ab0bb3a1..7fcc13c8cf1f 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -102,6 +102,7 @@ struct nfs_client {
102#define NFS_SP4_MACH_CRED_STATEID 4 /* TEST_STATEID and FREE_STATEID */ 102#define NFS_SP4_MACH_CRED_STATEID 4 /* TEST_STATEID and FREE_STATEID */
103#define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */ 103#define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */
104#define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */ 104#define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */
105#define NFS_SP4_MACH_CRED_PNFS_CLEANUP 7 /* LAYOUTRETURN */
105#endif /* CONFIG_NFS_V4 */ 106#endif /* CONFIG_NFS_V4 */
106 107
107 /* Our own IP address, as a null-terminated string. 108 /* Our own IP address, as a null-terminated string.
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 11bbae44f4cb..791098a08a87 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1375,6 +1375,7 @@ enum {
1375 NFS_IOHDR_ERROR = 0, 1375 NFS_IOHDR_ERROR = 0,
1376 NFS_IOHDR_EOF, 1376 NFS_IOHDR_EOF,
1377 NFS_IOHDR_REDO, 1377 NFS_IOHDR_REDO,
1378 NFS_IOHDR_STAT,
1378}; 1379};
1379 1380
1380struct nfs_pgio_header { 1381struct nfs_pgio_header {
@@ -1420,11 +1421,12 @@ struct nfs_mds_commit_info {
1420 struct list_head list; 1421 struct list_head list;
1421}; 1422};
1422 1423
1424struct nfs_commit_info;
1423struct nfs_commit_data; 1425struct nfs_commit_data;
1424struct nfs_inode; 1426struct nfs_inode;
1425struct nfs_commit_completion_ops { 1427struct nfs_commit_completion_ops {
1426 void (*error_cleanup) (struct nfs_inode *nfsi);
1427 void (*completion) (struct nfs_commit_data *data); 1428 void (*completion) (struct nfs_commit_data *data);
1429 void (*resched_write) (struct nfs_commit_info *, struct nfs_page *);
1428}; 1430};
1429 1431
1430struct nfs_commit_info { 1432struct nfs_commit_info {
@@ -1454,12 +1456,14 @@ struct nfs_commit_data {
1454 const struct rpc_call_ops *mds_ops; 1456 const struct rpc_call_ops *mds_ops;
1455 const struct nfs_commit_completion_ops *completion_ops; 1457 const struct nfs_commit_completion_ops *completion_ops;
1456 int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); 1458 int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
1459 unsigned long flags;
1457}; 1460};
1458 1461
1459struct nfs_pgio_completion_ops { 1462struct nfs_pgio_completion_ops {
1460 void (*error_cleanup)(struct list_head *head); 1463 void (*error_cleanup)(struct list_head *head);
1461 void (*init_hdr)(struct nfs_pgio_header *hdr); 1464 void (*init_hdr)(struct nfs_pgio_header *hdr);
1462 void (*completion)(struct nfs_pgio_header *hdr); 1465 void (*completion)(struct nfs_pgio_header *hdr);
1466 void (*reschedule_io)(struct nfs_pgio_header *hdr);
1463}; 1467};
1464 1468
1465struct nfs_unlinkdata { 1469struct nfs_unlinkdata {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 23608eb0ded2..b7f21044f4d8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1217,6 +1217,7 @@ static int rpc_anyaddr(int family, struct sockaddr *buf, size_t buflen)
1217 return -EINVAL; 1217 return -EINVAL;
1218 memcpy(buf, &rpc_in6addr_loopback, 1218 memcpy(buf, &rpc_in6addr_loopback,
1219 sizeof(rpc_in6addr_loopback)); 1219 sizeof(rpc_in6addr_loopback));
1220 break;
1220 default: 1221 default:
1221 dprintk("RPC: %s: address family not supported\n", 1222 dprintk("RPC: %s: address family not supported\n",
1222 __func__); 1223 __func__);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 2dcb44f69e53..cc1251d07297 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -15,7 +15,7 @@
15# define RPCDBG_FACILITY RPCDBG_TRANS 15# define RPCDBG_FACILITY RPCDBG_TRANS
16#endif 16#endif
17 17
18#define RPCRDMA_BACKCHANNEL_DEBUG 18#undef RPCRDMA_BACKCHANNEL_DEBUG
19 19
20static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, 20static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
21 struct rpc_rqst *rqst) 21 struct rpc_rqst *rqst)
@@ -42,8 +42,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
42 size_t size; 42 size_t size;
43 43
44 req = rpcrdma_create_req(r_xprt); 44 req = rpcrdma_create_req(r_xprt);
45 if (!req) 45 if (IS_ERR(req))
46 return -ENOMEM; 46 return PTR_ERR(req);
47 req->rl_backchannel = true; 47 req->rl_backchannel = true;
48 48
49 size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); 49 size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
@@ -84,9 +84,7 @@ out_fail:
84static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, 84static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
85 unsigned int count) 85 unsigned int count)
86{ 86{
87 struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
88 struct rpcrdma_rep *rep; 87 struct rpcrdma_rep *rep;
89 unsigned long flags;
90 int rc = 0; 88 int rc = 0;
91 89
92 while (count--) { 90 while (count--) {
@@ -98,9 +96,7 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
98 break; 96 break;
99 } 97 }
100 98
101 spin_lock_irqsave(&buffers->rb_lock, flags); 99 rpcrdma_recv_buffer_put(rep);
102 list_add(&rep->rr_list, &buffers->rb_recv_bufs);
103 spin_unlock_irqrestore(&buffers->rb_lock, flags);
104 } 100 }
105 101
106 return rc; 102 return rc;
@@ -140,6 +136,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
140 __func__); 136 __func__);
141 goto out_free; 137 goto out_free;
142 } 138 }
139 dprintk("RPC: %s: new rqst %p\n", __func__, rqst);
143 140
144 rqst->rq_xprt = &r_xprt->rx_xprt; 141 rqst->rq_xprt = &r_xprt->rx_xprt;
145 INIT_LIST_HEAD(&rqst->rq_list); 142 INIT_LIST_HEAD(&rqst->rq_list);
@@ -220,12 +217,14 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
220 217
221 rpclen = rqst->rq_svec[0].iov_len; 218 rpclen = rqst->rq_svec[0].iov_len;
222 219
220#ifdef RPCRDMA_BACKCHANNEL_DEBUG
223 pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", 221 pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n",
224 __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); 222 __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf));
225 pr_info("RPC: %s: RPC/RDMA: %*ph\n", 223 pr_info("RPC: %s: RPC/RDMA: %*ph\n",
226 __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); 224 __func__, (int)RPCRDMA_HDRLEN_MIN, headerp);
227 pr_info("RPC: %s: RPC: %*ph\n", 225 pr_info("RPC: %s: RPC: %*ph\n",
228 __func__, (int)rpclen, rqst->rq_svec[0].iov_base); 226 __func__, (int)rpclen, rqst->rq_svec[0].iov_base);
227#endif
229 228
230 req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); 229 req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
231 req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; 230 req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN;
@@ -269,6 +268,9 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
269{ 268{
270 struct rpc_xprt *xprt = rqst->rq_xprt; 269 struct rpc_xprt *xprt = rqst->rq_xprt;
271 270
271 dprintk("RPC: %s: freeing rqst %p (req %p)\n",
272 __func__, rqst, rpcr_to_rdmar(rqst));
273
272 smp_mb__before_atomic(); 274 smp_mb__before_atomic();
273 WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); 275 WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
274 clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); 276 clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
@@ -333,9 +335,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
333 struct rpc_rqst, rq_bc_pa_list); 335 struct rpc_rqst, rq_bc_pa_list);
334 list_del(&rqst->rq_bc_pa_list); 336 list_del(&rqst->rq_bc_pa_list);
335 spin_unlock(&xprt->bc_pa_lock); 337 spin_unlock(&xprt->bc_pa_lock);
336#ifdef RPCRDMA_BACKCHANNEL_DEBUG 338 dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
337 pr_info("RPC: %s: using rqst %p\n", __func__, rqst);
338#endif
339 339
340 /* Prepare rqst */ 340 /* Prepare rqst */
341 rqst->rq_reply_bytes_recvd = 0; 341 rqst->rq_reply_bytes_recvd = 0;
@@ -355,10 +355,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
355 * direction reply. 355 * direction reply.
356 */ 356 */
357 req = rpcr_to_rdmar(rqst); 357 req = rpcr_to_rdmar(rqst);
358#ifdef RPCRDMA_BACKCHANNEL_DEBUG 358 dprintk("RPC: %s: attaching rep %p to req %p\n",
359 pr_info("RPC: %s: attaching rep %p to req %p\n",
360 __func__, rep, req); 359 __func__, rep, req);
361#endif
362 req->rl_reply = rep; 360 req->rl_reply = rep;
363 361
364 /* Defeat the retransmit detection logic in send_request */ 362 /* Defeat the retransmit detection logic in send_request */
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index f1e8dafbd507..c14f3a4bff68 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -179,6 +179,69 @@ out_maperr:
179 return rc; 179 return rc;
180} 180}
181 181
182static void
183__fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
184{
185 struct ib_device *device = r_xprt->rx_ia.ri_device;
186 struct rpcrdma_mw *mw = seg->rl_mw;
187 int nsegs = seg->mr_nsegs;
188
189 seg->rl_mw = NULL;
190
191 while (nsegs--)
192 rpcrdma_unmap_one(device, seg++);
193
194 rpcrdma_put_mw(r_xprt, mw);
195}
196
197/* Invalidate all memory regions that were registered for "req".
198 *
199 * Sleeps until it is safe for the host CPU to access the
200 * previously mapped memory regions.
201 */
202static void
203fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
204{
205 struct rpcrdma_mr_seg *seg;
206 unsigned int i, nchunks;
207 struct rpcrdma_mw *mw;
208 LIST_HEAD(unmap_list);
209 int rc;
210
211 dprintk("RPC: %s: req %p\n", __func__, req);
212
213 /* ORDER: Invalidate all of the req's MRs first
214 *
215 * ib_unmap_fmr() is slow, so use a single call instead
216 * of one call per mapped MR.
217 */
218 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
219 seg = &req->rl_segments[i];
220 mw = seg->rl_mw;
221
222 list_add(&mw->r.fmr.fmr->list, &unmap_list);
223
224 i += seg->mr_nsegs;
225 }
226 rc = ib_unmap_fmr(&unmap_list);
227 if (rc)
228 pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc);
229
230 /* ORDER: Now DMA unmap all of the req's MRs, and return
231 * them to the free MW list.
232 */
233 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
234 seg = &req->rl_segments[i];
235
236 __fmr_dma_unmap(r_xprt, seg);
237
238 i += seg->mr_nsegs;
239 seg->mr_nsegs = 0;
240 }
241
242 req->rl_nchunks = 0;
243}
244
182/* Use the ib_unmap_fmr() verb to prevent further remote 245/* Use the ib_unmap_fmr() verb to prevent further remote
183 * access via RDMA READ or RDMA WRITE. 246 * access via RDMA READ or RDMA WRITE.
184 */ 247 */
@@ -231,6 +294,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
231 294
232const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { 295const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
233 .ro_map = fmr_op_map, 296 .ro_map = fmr_op_map,
297 .ro_unmap_sync = fmr_op_unmap_sync,
234 .ro_unmap = fmr_op_unmap, 298 .ro_unmap = fmr_op_unmap,
235 .ro_open = fmr_op_open, 299 .ro_open = fmr_op_open,
236 .ro_maxpages = fmr_op_maxpages, 300 .ro_maxpages = fmr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 88cf9e7269c2..c6836844bd0e 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -245,12 +245,14 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
245 rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); 245 rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
246} 246}
247 247
248/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */ 248/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs
249 * to be reset.
250 *
251 * WARNING: Only wr_id and status are reliable at this point
252 */
249static void 253static void
250frwr_sendcompletion(struct ib_wc *wc) 254__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_mw *r)
251{ 255{
252 struct rpcrdma_mw *r;
253
254 if (likely(wc->status == IB_WC_SUCCESS)) 256 if (likely(wc->status == IB_WC_SUCCESS))
255 return; 257 return;
256 258
@@ -261,9 +263,23 @@ frwr_sendcompletion(struct ib_wc *wc)
261 else 263 else
262 pr_warn("RPC: %s: frmr %p error, status %s (%d)\n", 264 pr_warn("RPC: %s: frmr %p error, status %s (%d)\n",
263 __func__, r, ib_wc_status_msg(wc->status), wc->status); 265 __func__, r, ib_wc_status_msg(wc->status), wc->status);
266
264 r->r.frmr.fr_state = FRMR_IS_STALE; 267 r->r.frmr.fr_state = FRMR_IS_STALE;
265} 268}
266 269
270static void
271frwr_sendcompletion(struct ib_wc *wc)
272{
273 struct rpcrdma_mw *r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
274 struct rpcrdma_frmr *f = &r->r.frmr;
275
276 if (unlikely(wc->status != IB_WC_SUCCESS))
277 __frwr_sendcompletion_flush(wc, r);
278
279 if (f->fr_waiter)
280 complete(&f->fr_linv_done);
281}
282
267static int 283static int
268frwr_op_init(struct rpcrdma_xprt *r_xprt) 284frwr_op_init(struct rpcrdma_xprt *r_xprt)
269{ 285{
@@ -319,7 +335,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
319 struct rpcrdma_mw *mw; 335 struct rpcrdma_mw *mw;
320 struct rpcrdma_frmr *frmr; 336 struct rpcrdma_frmr *frmr;
321 struct ib_mr *mr; 337 struct ib_mr *mr;
322 struct ib_reg_wr reg_wr; 338 struct ib_reg_wr *reg_wr;
323 struct ib_send_wr *bad_wr; 339 struct ib_send_wr *bad_wr;
324 int rc, i, n, dma_nents; 340 int rc, i, n, dma_nents;
325 u8 key; 341 u8 key;
@@ -335,7 +351,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
335 } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); 351 } while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
336 frmr = &mw->r.frmr; 352 frmr = &mw->r.frmr;
337 frmr->fr_state = FRMR_IS_VALID; 353 frmr->fr_state = FRMR_IS_VALID;
354 frmr->fr_waiter = false;
338 mr = frmr->fr_mr; 355 mr = frmr->fr_mr;
356 reg_wr = &frmr->fr_regwr;
339 357
340 if (nsegs > ia->ri_max_frmr_depth) 358 if (nsegs > ia->ri_max_frmr_depth)
341 nsegs = ia->ri_max_frmr_depth; 359 nsegs = ia->ri_max_frmr_depth;
@@ -381,19 +399,19 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
381 key = (u8)(mr->rkey & 0x000000FF); 399 key = (u8)(mr->rkey & 0x000000FF);
382 ib_update_fast_reg_key(mr, ++key); 400 ib_update_fast_reg_key(mr, ++key);
383 401
384 reg_wr.wr.next = NULL; 402 reg_wr->wr.next = NULL;
385 reg_wr.wr.opcode = IB_WR_REG_MR; 403 reg_wr->wr.opcode = IB_WR_REG_MR;
386 reg_wr.wr.wr_id = (uintptr_t)mw; 404 reg_wr->wr.wr_id = (uintptr_t)mw;
387 reg_wr.wr.num_sge = 0; 405 reg_wr->wr.num_sge = 0;
388 reg_wr.wr.send_flags = 0; 406 reg_wr->wr.send_flags = 0;
389 reg_wr.mr = mr; 407 reg_wr->mr = mr;
390 reg_wr.key = mr->rkey; 408 reg_wr->key = mr->rkey;
391 reg_wr.access = writing ? 409 reg_wr->access = writing ?
392 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 410 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
393 IB_ACCESS_REMOTE_READ; 411 IB_ACCESS_REMOTE_READ;
394 412
395 DECR_CQCOUNT(&r_xprt->rx_ep); 413 DECR_CQCOUNT(&r_xprt->rx_ep);
396 rc = ib_post_send(ia->ri_id->qp, &reg_wr.wr, &bad_wr); 414 rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
397 if (rc) 415 if (rc)
398 goto out_senderr; 416 goto out_senderr;
399 417
@@ -413,6 +431,116 @@ out_senderr:
413 return rc; 431 return rc;
414} 432}
415 433
434static struct ib_send_wr *
435__frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg)
436{
437 struct rpcrdma_mw *mw = seg->rl_mw;
438 struct rpcrdma_frmr *f = &mw->r.frmr;
439 struct ib_send_wr *invalidate_wr;
440
441 f->fr_waiter = false;
442 f->fr_state = FRMR_IS_INVALID;
443 invalidate_wr = &f->fr_invwr;
444
445 memset(invalidate_wr, 0, sizeof(*invalidate_wr));
446 invalidate_wr->wr_id = (unsigned long)(void *)mw;
447 invalidate_wr->opcode = IB_WR_LOCAL_INV;
448 invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;
449
450 return invalidate_wr;
451}
452
453static void
454__frwr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
455 int rc)
456{
457 struct ib_device *device = r_xprt->rx_ia.ri_device;
458 struct rpcrdma_mw *mw = seg->rl_mw;
459 struct rpcrdma_frmr *f = &mw->r.frmr;
460
461 seg->rl_mw = NULL;
462
463 ib_dma_unmap_sg(device, f->sg, f->sg_nents, seg->mr_dir);
464
465 if (!rc)
466 rpcrdma_put_mw(r_xprt, mw);
467 else
468 __frwr_queue_recovery(mw);
469}
470
471/* Invalidate all memory regions that were registered for "req".
472 *
473 * Sleeps until it is safe for the host CPU to access the
474 * previously mapped memory regions.
475 */
476static void
477frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
478{
479 struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr;
480 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
481 struct rpcrdma_mr_seg *seg;
482 unsigned int i, nchunks;
483 struct rpcrdma_frmr *f;
484 int rc;
485
486 dprintk("RPC: %s: req %p\n", __func__, req);
487
488 /* ORDER: Invalidate all of the req's MRs first
489 *
490 * Chain the LOCAL_INV Work Requests and post them with
491 * a single ib_post_send() call.
492 */
493 invalidate_wrs = pos = prev = NULL;
494 seg = NULL;
495 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
496 seg = &req->rl_segments[i];
497
498 pos = __frwr_prepare_linv_wr(seg);
499
500 if (!invalidate_wrs)
501 invalidate_wrs = pos;
502 else
503 prev->next = pos;
504 prev = pos;
505
506 i += seg->mr_nsegs;
507 }
508 f = &seg->rl_mw->r.frmr;
509
510 /* Strong send queue ordering guarantees that when the
511 * last WR in the chain completes, all WRs in the chain
512 * are complete.
513 */
514 f->fr_invwr.send_flags = IB_SEND_SIGNALED;
515 f->fr_waiter = true;
516 init_completion(&f->fr_linv_done);
517 INIT_CQCOUNT(&r_xprt->rx_ep);
518
519 /* Transport disconnect drains the receive CQ before it
520 * replaces the QP. The RPC reply handler won't call us
521 * unless ri_id->qp is a valid pointer.
522 */
523 rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
524 if (rc)
525 pr_warn("%s: ib_post_send failed %i\n", __func__, rc);
526
527 wait_for_completion(&f->fr_linv_done);
528
529 /* ORDER: Now DMA unmap all of the req's MRs, and return
530 * them to the free MW list.
531 */
532 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
533 seg = &req->rl_segments[i];
534
535 __frwr_dma_unmap(r_xprt, seg, rc);
536
537 i += seg->mr_nsegs;
538 seg->mr_nsegs = 0;
539 }
540
541 req->rl_nchunks = 0;
542}
543
416/* Post a LOCAL_INV Work Request to prevent further remote access 544/* Post a LOCAL_INV Work Request to prevent further remote access
417 * via RDMA READ or RDMA WRITE. 545 * via RDMA READ or RDMA WRITE.
418 */ 546 */
@@ -423,23 +551,24 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
423 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 551 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
424 struct rpcrdma_mw *mw = seg1->rl_mw; 552 struct rpcrdma_mw *mw = seg1->rl_mw;
425 struct rpcrdma_frmr *frmr = &mw->r.frmr; 553 struct rpcrdma_frmr *frmr = &mw->r.frmr;
426 struct ib_send_wr invalidate_wr, *bad_wr; 554 struct ib_send_wr *invalidate_wr, *bad_wr;
427 int rc, nsegs = seg->mr_nsegs; 555 int rc, nsegs = seg->mr_nsegs;
428 556
429 dprintk("RPC: %s: FRMR %p\n", __func__, mw); 557 dprintk("RPC: %s: FRMR %p\n", __func__, mw);
430 558
431 seg1->rl_mw = NULL; 559 seg1->rl_mw = NULL;
432 frmr->fr_state = FRMR_IS_INVALID; 560 frmr->fr_state = FRMR_IS_INVALID;
561 invalidate_wr = &mw->r.frmr.fr_invwr;
433 562
434 memset(&invalidate_wr, 0, sizeof(invalidate_wr)); 563 memset(invalidate_wr, 0, sizeof(*invalidate_wr));
435 invalidate_wr.wr_id = (unsigned long)(void *)mw; 564 invalidate_wr->wr_id = (uintptr_t)mw;
436 invalidate_wr.opcode = IB_WR_LOCAL_INV; 565 invalidate_wr->opcode = IB_WR_LOCAL_INV;
437 invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey; 566 invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey;
438 DECR_CQCOUNT(&r_xprt->rx_ep); 567 DECR_CQCOUNT(&r_xprt->rx_ep);
439 568
440 ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); 569 ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);
441 read_lock(&ia->ri_qplock); 570 read_lock(&ia->ri_qplock);
442 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); 571 rc = ib_post_send(ia->ri_id->qp, invalidate_wr, &bad_wr);
443 read_unlock(&ia->ri_qplock); 572 read_unlock(&ia->ri_qplock);
444 if (rc) 573 if (rc)
445 goto out_err; 574 goto out_err;
@@ -471,6 +600,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)
471 600
472const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { 601const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
473 .ro_map = frwr_op_map, 602 .ro_map = frwr_op_map,
603 .ro_unmap_sync = frwr_op_unmap_sync,
474 .ro_unmap = frwr_op_unmap, 604 .ro_unmap = frwr_op_unmap,
475 .ro_open = frwr_op_open, 605 .ro_open = frwr_op_open,
476 .ro_maxpages = frwr_op_maxpages, 606 .ro_maxpages = frwr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index 617b76f22154..dbb302ecf590 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -83,6 +83,18 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
83 return 1; 83 return 1;
84} 84}
85 85
86/* DMA unmap all memory regions that were mapped for "req".
87 */
88static void
89physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
90{
91 struct ib_device *device = r_xprt->rx_ia.ri_device;
92 unsigned int i;
93
94 for (i = 0; req->rl_nchunks; --req->rl_nchunks)
95 rpcrdma_unmap_one(device, &req->rl_segments[i++]);
96}
97
86static void 98static void
87physical_op_destroy(struct rpcrdma_buffer *buf) 99physical_op_destroy(struct rpcrdma_buffer *buf)
88{ 100{
@@ -90,6 +102,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf)
90 102
91const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { 103const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
92 .ro_map = physical_op_map, 104 .ro_map = physical_op_map,
105 .ro_unmap_sync = physical_op_unmap_sync,
93 .ro_unmap = physical_op_unmap, 106 .ro_unmap = physical_op_unmap,
94 .ro_open = physical_op_open, 107 .ro_open = physical_op_open,
95 .ro_maxpages = physical_op_maxpages, 108 .ro_maxpages = physical_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c10d9699441c..0f28f2d743ed 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -804,6 +804,11 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
804 if (req->rl_reply) 804 if (req->rl_reply)
805 goto out_duplicate; 805 goto out_duplicate;
806 806
807 /* Sanity checking has passed. We are now committed
808 * to complete this transaction.
809 */
810 list_del_init(&rqst->rq_list);
811 spin_unlock_bh(&xprt->transport_lock);
807 dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" 812 dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
808 " RPC request 0x%p xid 0x%08x\n", 813 " RPC request 0x%p xid 0x%08x\n",
809 __func__, rep, req, rqst, 814 __func__, rep, req, rqst,
@@ -888,12 +893,23 @@ badheader:
888 break; 893 break;
889 } 894 }
890 895
896 /* Invalidate and flush the data payloads before waking the
897 * waiting application. This guarantees the memory region is
898 * properly fenced from the server before the application
899 * accesses the data. It also ensures proper send flow
900 * control: waking the next RPC waits until this RPC has
901 * relinquished all its Send Queue entries.
902 */
903 if (req->rl_nchunks)
904 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
905
891 credits = be32_to_cpu(headerp->rm_credit); 906 credits = be32_to_cpu(headerp->rm_credit);
892 if (credits == 0) 907 if (credits == 0)
893 credits = 1; /* don't deadlock */ 908 credits = 1; /* don't deadlock */
894 else if (credits > r_xprt->rx_buf.rb_max_requests) 909 else if (credits > r_xprt->rx_buf.rb_max_requests)
895 credits = r_xprt->rx_buf.rb_max_requests; 910 credits = r_xprt->rx_buf.rb_max_requests;
896 911
912 spin_lock_bh(&xprt->transport_lock);
897 cwnd = xprt->cwnd; 913 cwnd = xprt->cwnd;
898 xprt->cwnd = credits << RPC_CWNDSHIFT; 914 xprt->cwnd = credits << RPC_CWNDSHIFT;
899 if (xprt->cwnd > cwnd) 915 if (xprt->cwnd > cwnd)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 8c545f7d7525..740bddcf3488 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -576,6 +576,9 @@ xprt_rdma_free(void *buffer)
576 576
577 rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); 577 rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]);
578 req = rb->rg_owner; 578 req = rb->rg_owner;
579 if (req->rl_backchannel)
580 return;
581
579 r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); 582 r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
580 583
581 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); 584 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index eadd1655145a..732c71ce5dca 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -616,10 +616,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
616 616
617 /* set trigger for requesting send completion */ 617 /* set trigger for requesting send completion */
618 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; 618 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
619 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS) 619 if (ep->rep_cqinit <= 2)
620 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS; 620 ep->rep_cqinit = 0; /* always signal? */
621 else if (ep->rep_cqinit <= 2)
622 ep->rep_cqinit = 0;
623 INIT_CQCOUNT(ep); 621 INIT_CQCOUNT(ep);
624 init_waitqueue_head(&ep->rep_connect_wait); 622 init_waitqueue_head(&ep->rep_connect_wait);
625 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); 623 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
@@ -852,10 +850,11 @@ retry:
852 850
853 if (extras) { 851 if (extras) {
854 rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); 852 rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
855 if (rc) 853 if (rc) {
856 pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", 854 pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
857 __func__, rc); 855 __func__, rc);
858 rc = 0; 856 rc = 0;
857 }
859 } 858 }
860 } 859 }
861 860
@@ -1337,15 +1336,14 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
1337 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 1336 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1338 struct rpcrdma_ep *ep = &r_xprt->rx_ep; 1337 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
1339 struct rpcrdma_rep *rep; 1338 struct rpcrdma_rep *rep;
1340 unsigned long flags;
1341 int rc; 1339 int rc;
1342 1340
1343 while (count--) { 1341 while (count--) {
1344 spin_lock_irqsave(&buffers->rb_lock, flags); 1342 spin_lock(&buffers->rb_lock);
1345 if (list_empty(&buffers->rb_recv_bufs)) 1343 if (list_empty(&buffers->rb_recv_bufs))
1346 goto out_reqbuf; 1344 goto out_reqbuf;
1347 rep = rpcrdma_buffer_get_rep_locked(buffers); 1345 rep = rpcrdma_buffer_get_rep_locked(buffers);
1348 spin_unlock_irqrestore(&buffers->rb_lock, flags); 1346 spin_unlock(&buffers->rb_lock);
1349 1347
1350 rc = rpcrdma_ep_post_recv(ia, ep, rep); 1348 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1351 if (rc) 1349 if (rc)
@@ -1355,7 +1353,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
1355 return 0; 1353 return 0;
1356 1354
1357out_reqbuf: 1355out_reqbuf:
1358 spin_unlock_irqrestore(&buffers->rb_lock, flags); 1356 spin_unlock(&buffers->rb_lock);
1359 pr_warn("%s: no extra receive buffers\n", __func__); 1357 pr_warn("%s: no extra receive buffers\n", __func__);
1360 return -ENOMEM; 1358 return -ENOMEM;
1361 1359
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index ac7f8d4f632a..728101ddc44b 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -88,12 +88,6 @@ struct rpcrdma_ep {
88 struct delayed_work rep_connect_worker; 88 struct delayed_work rep_connect_worker;
89}; 89};
90 90
91/*
92 * Force a signaled SEND Work Request every so often,
93 * in case the provider needs to do some housekeeping.
94 */
95#define RPCRDMA_MAX_UNSIGNALED_SENDS (32)
96
97#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) 91#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
98#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) 92#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
99 93
@@ -207,6 +201,12 @@ struct rpcrdma_frmr {
207 enum rpcrdma_frmr_state fr_state; 201 enum rpcrdma_frmr_state fr_state;
208 struct work_struct fr_work; 202 struct work_struct fr_work;
209 struct rpcrdma_xprt *fr_xprt; 203 struct rpcrdma_xprt *fr_xprt;
204 bool fr_waiter;
205 struct completion fr_linv_done;;
206 union {
207 struct ib_reg_wr fr_regwr;
208 struct ib_send_wr fr_invwr;
209 };
210}; 210};
211 211
212struct rpcrdma_fmr { 212struct rpcrdma_fmr {
@@ -364,6 +364,8 @@ struct rpcrdma_xprt;
364struct rpcrdma_memreg_ops { 364struct rpcrdma_memreg_ops {
365 int (*ro_map)(struct rpcrdma_xprt *, 365 int (*ro_map)(struct rpcrdma_xprt *,
366 struct rpcrdma_mr_seg *, int, bool); 366 struct rpcrdma_mr_seg *, int, bool);
367 void (*ro_unmap_sync)(struct rpcrdma_xprt *,
368 struct rpcrdma_req *);
367 int (*ro_unmap)(struct rpcrdma_xprt *, 369 int (*ro_unmap)(struct rpcrdma_xprt *,
368 struct rpcrdma_mr_seg *); 370 struct rpcrdma_mr_seg *);
369 int (*ro_open)(struct rpcrdma_ia *, 371 int (*ro_open)(struct rpcrdma_ia *,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 2ffaf6a79499..fde2138b81e7 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -398,7 +398,6 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
398 if (unlikely(!sock)) 398 if (unlikely(!sock))
399 return -ENOTSOCK; 399 return -ENOTSOCK;
400 400
401 clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags);
402 if (base != 0) { 401 if (base != 0) {
403 addr = NULL; 402 addr = NULL;
404 addrlen = 0; 403 addrlen = 0;
@@ -442,7 +441,6 @@ static void xs_nospace_callback(struct rpc_task *task)
442 struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); 441 struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
443 442
444 transport->inet->sk_write_pending--; 443 transport->inet->sk_write_pending--;
445 clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
446} 444}
447 445
448/** 446/**
@@ -467,20 +465,11 @@ static int xs_nospace(struct rpc_task *task)
467 465
468 /* Don't race with disconnect */ 466 /* Don't race with disconnect */
469 if (xprt_connected(xprt)) { 467 if (xprt_connected(xprt)) {
470 if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) { 468 /* wait for more buffer space */
471 /* 469 sk->sk_write_pending++;
472 * Notify TCP that we're limited by the application 470 xprt_wait_for_buffer_space(task, xs_nospace_callback);
473 * window size 471 } else
474 */
475 set_bit(SOCK_NOSPACE, &transport->sock->flags);
476 sk->sk_write_pending++;
477 /* ...and wait for more buffer space */
478 xprt_wait_for_buffer_space(task, xs_nospace_callback);
479 }
480 } else {
481 clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
482 ret = -ENOTCONN; 472 ret = -ENOTCONN;
483 }
484 473
485 spin_unlock_bh(&xprt->transport_lock); 474 spin_unlock_bh(&xprt->transport_lock);
486 475
@@ -616,9 +605,6 @@ process_status:
616 case -EAGAIN: 605 case -EAGAIN:
617 status = xs_nospace(task); 606 status = xs_nospace(task);
618 break; 607 break;
619 default:
620 dprintk("RPC: sendmsg returned unrecognized error %d\n",
621 -status);
622 case -ENETUNREACH: 608 case -ENETUNREACH:
623 case -ENOBUFS: 609 case -ENOBUFS:
624 case -EPIPE: 610 case -EPIPE:
@@ -626,7 +612,10 @@ process_status:
626 case -EPERM: 612 case -EPERM:
627 /* When the server has died, an ICMP port unreachable message 613 /* When the server has died, an ICMP port unreachable message
628 * prompts ECONNREFUSED. */ 614 * prompts ECONNREFUSED. */
629 clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); 615 break;
616 default:
617 dprintk("RPC: sendmsg returned unrecognized error %d\n",
618 -status);
630 } 619 }
631 620
632 return status; 621 return status;
@@ -706,16 +695,16 @@ static int xs_tcp_send_request(struct rpc_task *task)
706 case -EAGAIN: 695 case -EAGAIN:
707 status = xs_nospace(task); 696 status = xs_nospace(task);
708 break; 697 break;
709 default:
710 dprintk("RPC: sendmsg returned unrecognized error %d\n",
711 -status);
712 case -ECONNRESET: 698 case -ECONNRESET:
713 case -ECONNREFUSED: 699 case -ECONNREFUSED:
714 case -ENOTCONN: 700 case -ENOTCONN:
715 case -EADDRINUSE: 701 case -EADDRINUSE:
716 case -ENOBUFS: 702 case -ENOBUFS:
717 case -EPIPE: 703 case -EPIPE:
718 clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); 704 break;
705 default:
706 dprintk("RPC: sendmsg returned unrecognized error %d\n",
707 -status);
719 } 708 }
720 709
721 return status; 710 return status;
@@ -1609,19 +1598,23 @@ static void xs_tcp_state_change(struct sock *sk)
1609 1598
1610static void xs_write_space(struct sock *sk) 1599static void xs_write_space(struct sock *sk)
1611{ 1600{
1612 struct socket *sock; 1601 struct socket_wq *wq;
1613 struct rpc_xprt *xprt; 1602 struct rpc_xprt *xprt;
1614 1603
1615 if (unlikely(!(sock = sk->sk_socket))) 1604 if (!sk->sk_socket)
1616 return; 1605 return;
1617 clear_bit(SOCK_NOSPACE, &sock->flags); 1606 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1618 1607
1619 if (unlikely(!(xprt = xprt_from_sock(sk)))) 1608 if (unlikely(!(xprt = xprt_from_sock(sk))))
1620 return; 1609 return;
1621 if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0) 1610 rcu_read_lock();
1622 return; 1611 wq = rcu_dereference(sk->sk_wq);
1612 if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
1613 goto out;
1623 1614
1624 xprt_write_space(xprt); 1615 xprt_write_space(xprt);
1616out:
1617 rcu_read_unlock();
1625} 1618}
1626 1619
1627/** 1620/**
@@ -1907,18 +1900,6 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
1907 } 1900 }
1908} 1901}
1909#else 1902#else
1910static inline void xs_reclassify_socketu(struct socket *sock)
1911{
1912}
1913
1914static inline void xs_reclassify_socket4(struct socket *sock)
1915{
1916}
1917
1918static inline void xs_reclassify_socket6(struct socket *sock)
1919{
1920}
1921
1922static inline void xs_reclassify_socket(int family, struct socket *sock) 1903static inline void xs_reclassify_socket(int family, struct socket *sock)
1923{ 1904{
1924} 1905}
@@ -2008,7 +1989,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
2008 "transport socket (%d).\n", -status); 1989 "transport socket (%d).\n", -status);
2009 goto out; 1990 goto out;
2010 } 1991 }
2011 xs_reclassify_socketu(sock); 1992 xs_reclassify_socket(AF_LOCAL, sock);
2012 1993
2013 dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", 1994 dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n",
2014 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); 1995 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);