diff options
36 files changed, 1480 insertions, 519 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 807eb6ef4f91..f0939d097406 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -83,8 +83,11 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, | |||
83 | 83 | ||
84 | res = htonl(NFS4ERR_BADHANDLE); | 84 | res = htonl(NFS4ERR_BADHANDLE); |
85 | inode = nfs_delegation_find_inode(cps->clp, &args->fh); | 85 | inode = nfs_delegation_find_inode(cps->clp, &args->fh); |
86 | if (inode == NULL) | 86 | if (inode == NULL) { |
87 | trace_nfs4_cb_recall(cps->clp, &args->fh, NULL, | ||
88 | &args->stateid, -ntohl(res)); | ||
87 | goto out; | 89 | goto out; |
90 | } | ||
88 | /* Set up a helper thread to actually return the delegation */ | 91 | /* Set up a helper thread to actually return the delegation */ |
89 | switch (nfs_async_inode_return_delegation(inode, &args->stateid)) { | 92 | switch (nfs_async_inode_return_delegation(inode, &args->stateid)) { |
90 | case 0: | 93 | case 0: |
@@ -96,7 +99,8 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, | |||
96 | default: | 99 | default: |
97 | res = htonl(NFS4ERR_RESOURCE); | 100 | res = htonl(NFS4ERR_RESOURCE); |
98 | } | 101 | } |
99 | trace_nfs4_recall_delegation(inode, -ntohl(res)); | 102 | trace_nfs4_cb_recall(cps->clp, &args->fh, inode, |
103 | &args->stateid, -ntohl(res)); | ||
100 | iput(inode); | 104 | iput(inode); |
101 | out: | 105 | out: |
102 | dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); | 106 | dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); |
@@ -160,6 +164,22 @@ static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, | |||
160 | return lo; | 164 | return lo; |
161 | } | 165 | } |
162 | 166 | ||
167 | /* | ||
168 | * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing) | ||
169 | */ | ||
170 | static bool pnfs_check_stateid_sequence(struct pnfs_layout_hdr *lo, | ||
171 | const nfs4_stateid *new) | ||
172 | { | ||
173 | u32 oldseq, newseq; | ||
174 | |||
175 | oldseq = be32_to_cpu(lo->plh_stateid.seqid); | ||
176 | newseq = be32_to_cpu(new->seqid); | ||
177 | |||
178 | if (newseq > oldseq + 1) | ||
179 | return false; | ||
180 | return true; | ||
181 | } | ||
182 | |||
163 | static u32 initiate_file_draining(struct nfs_client *clp, | 183 | static u32 initiate_file_draining(struct nfs_client *clp, |
164 | struct cb_layoutrecallargs *args) | 184 | struct cb_layoutrecallargs *args) |
165 | { | 185 | { |
@@ -169,34 +189,52 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
169 | LIST_HEAD(free_me_list); | 189 | LIST_HEAD(free_me_list); |
170 | 190 | ||
171 | lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid); | 191 | lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid); |
172 | if (!lo) | 192 | if (!lo) { |
193 | trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL, | ||
194 | &args->cbl_stateid, -rv); | ||
173 | goto out; | 195 | goto out; |
196 | } | ||
174 | 197 | ||
175 | ino = lo->plh_inode; | 198 | ino = lo->plh_inode; |
176 | 199 | ||
177 | spin_lock(&ino->i_lock); | 200 | spin_lock(&ino->i_lock); |
201 | if (!pnfs_check_stateid_sequence(lo, &args->cbl_stateid)) { | ||
202 | rv = NFS4ERR_DELAY; | ||
203 | goto unlock; | ||
204 | } | ||
178 | pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); | 205 | pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); |
179 | spin_unlock(&ino->i_lock); | 206 | spin_unlock(&ino->i_lock); |
180 | 207 | ||
181 | pnfs_layoutcommit_inode(ino, false); | 208 | pnfs_layoutcommit_inode(ino, false); |
182 | 209 | ||
183 | spin_lock(&ino->i_lock); | 210 | spin_lock(&ino->i_lock); |
184 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | 211 | /* |
185 | pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, | 212 | * Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return) |
186 | &args->cbl_range)) { | 213 | */ |
214 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | ||
187 | rv = NFS4ERR_DELAY; | 215 | rv = NFS4ERR_DELAY; |
188 | goto unlock; | 216 | goto unlock; |
189 | } | 217 | } |
190 | 218 | ||
219 | if (pnfs_mark_matching_lsegs_return(lo, &free_me_list, | ||
220 | &args->cbl_range)) { | ||
221 | rv = NFS4_OK; | ||
222 | goto unlock; | ||
223 | } | ||
224 | |||
191 | if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { | 225 | if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { |
192 | NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, | 226 | NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, |
193 | &args->cbl_range); | 227 | &args->cbl_range); |
194 | } | 228 | } |
229 | pnfs_mark_layout_returned_if_empty(lo); | ||
195 | unlock: | 230 | unlock: |
196 | spin_unlock(&ino->i_lock); | 231 | spin_unlock(&ino->i_lock); |
197 | pnfs_free_lseg_list(&free_me_list); | 232 | pnfs_free_lseg_list(&free_me_list); |
233 | /* Free all lsegs that are attached to commit buckets */ | ||
234 | nfs_commit_inode(ino, 0); | ||
198 | pnfs_put_layout_hdr(lo); | 235 | pnfs_put_layout_hdr(lo); |
199 | trace_nfs4_cb_layoutrecall_inode(clp, &args->cbl_fh, ino, -rv); | 236 | trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino, |
237 | &args->cbl_stateid, -rv); | ||
200 | iput(ino); | 238 | iput(ino); |
201 | out: | 239 | out: |
202 | return rv; | 240 | return rv; |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8a0530921685..c82a21228a34 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -2431,6 +2431,20 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) | |||
2431 | } | 2431 | } |
2432 | EXPORT_SYMBOL_GPL(nfs_may_open); | 2432 | EXPORT_SYMBOL_GPL(nfs_may_open); |
2433 | 2433 | ||
2434 | static int nfs_execute_ok(struct inode *inode, int mask) | ||
2435 | { | ||
2436 | struct nfs_server *server = NFS_SERVER(inode); | ||
2437 | int ret; | ||
2438 | |||
2439 | if (mask & MAY_NOT_BLOCK) | ||
2440 | ret = nfs_revalidate_inode_rcu(server, inode); | ||
2441 | else | ||
2442 | ret = nfs_revalidate_inode(server, inode); | ||
2443 | if (ret == 0 && !execute_ok(inode)) | ||
2444 | ret = -EACCES; | ||
2445 | return ret; | ||
2446 | } | ||
2447 | |||
2434 | int nfs_permission(struct inode *inode, int mask) | 2448 | int nfs_permission(struct inode *inode, int mask) |
2435 | { | 2449 | { |
2436 | struct rpc_cred *cred; | 2450 | struct rpc_cred *cred; |
@@ -2448,6 +2462,9 @@ int nfs_permission(struct inode *inode, int mask) | |||
2448 | case S_IFLNK: | 2462 | case S_IFLNK: |
2449 | goto out; | 2463 | goto out; |
2450 | case S_IFREG: | 2464 | case S_IFREG: |
2465 | if ((mask & MAY_OPEN) && | ||
2466 | nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)) | ||
2467 | return 0; | ||
2451 | break; | 2468 | break; |
2452 | case S_IFDIR: | 2469 | case S_IFDIR: |
2453 | /* | 2470 | /* |
@@ -2480,8 +2497,8 @@ force_lookup: | |||
2480 | res = PTR_ERR(cred); | 2497 | res = PTR_ERR(cred); |
2481 | } | 2498 | } |
2482 | out: | 2499 | out: |
2483 | if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) | 2500 | if (!res && (mask & MAY_EXEC)) |
2484 | res = -EACCES; | 2501 | res = nfs_execute_ok(inode, mask); |
2485 | 2502 | ||
2486 | dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n", | 2503 | dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n", |
2487 | inode->i_sb->s_id, inode->i_ino, mask, res); | 2504 | inode->i_sb->s_id, inode->i_ino, mask, res); |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4b1d08f56aba..7ab7ec9f4eed 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -117,12 +117,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq) | |||
117 | return atomic_dec_and_test(&dreq->io_count); | 117 | return atomic_dec_and_test(&dreq->io_count); |
118 | } | 118 | } |
119 | 119 | ||
120 | void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq) | ||
121 | { | ||
122 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; | ||
123 | } | ||
124 | EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes); | ||
125 | |||
126 | static void | 120 | static void |
127 | nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) | 121 | nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) |
128 | { | 122 | { |
@@ -670,6 +664,10 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | |||
670 | 664 | ||
671 | req = nfs_list_entry(reqs.next); | 665 | req = nfs_list_entry(reqs.next); |
672 | nfs_direct_setup_mirroring(dreq, &desc, req); | 666 | nfs_direct_setup_mirroring(dreq, &desc, req); |
667 | if (desc.pg_error < 0) { | ||
668 | list_splice_init(&reqs, &failed); | ||
669 | goto out_failed; | ||
670 | } | ||
673 | 671 | ||
674 | list_for_each_entry_safe(req, tmp, &reqs, wb_list) { | 672 | list_for_each_entry_safe(req, tmp, &reqs, wb_list) { |
675 | if (!nfs_pageio_add_request(&desc, req)) { | 673 | if (!nfs_pageio_add_request(&desc, req)) { |
@@ -677,13 +675,17 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | |||
677 | nfs_list_add_request(req, &failed); | 675 | nfs_list_add_request(req, &failed); |
678 | spin_lock(cinfo.lock); | 676 | spin_lock(cinfo.lock); |
679 | dreq->flags = 0; | 677 | dreq->flags = 0; |
680 | dreq->error = -EIO; | 678 | if (desc.pg_error < 0) |
679 | dreq->error = desc.pg_error; | ||
680 | else | ||
681 | dreq->error = -EIO; | ||
681 | spin_unlock(cinfo.lock); | 682 | spin_unlock(cinfo.lock); |
682 | } | 683 | } |
683 | nfs_release_request(req); | 684 | nfs_release_request(req); |
684 | } | 685 | } |
685 | nfs_pageio_complete(&desc); | 686 | nfs_pageio_complete(&desc); |
686 | 687 | ||
688 | out_failed: | ||
687 | while (!list_empty(&failed)) { | 689 | while (!list_empty(&failed)) { |
688 | req = nfs_list_entry(failed.next); | 690 | req = nfs_list_entry(failed.next); |
689 | nfs_list_remove_request(req); | 691 | nfs_list_remove_request(req); |
@@ -727,14 +729,20 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) | |||
727 | nfs_direct_write_complete(dreq, data->inode); | 729 | nfs_direct_write_complete(dreq, data->inode); |
728 | } | 730 | } |
729 | 731 | ||
730 | static void nfs_direct_error_cleanup(struct nfs_inode *nfsi) | 732 | static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, |
733 | struct nfs_page *req) | ||
731 | { | 734 | { |
732 | /* There is no lock to clear */ | 735 | struct nfs_direct_req *dreq = cinfo->dreq; |
736 | |||
737 | spin_lock(&dreq->lock); | ||
738 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; | ||
739 | spin_unlock(&dreq->lock); | ||
740 | nfs_mark_request_commit(req, NULL, cinfo, 0); | ||
733 | } | 741 | } |
734 | 742 | ||
735 | static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { | 743 | static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { |
736 | .completion = nfs_direct_commit_complete, | 744 | .completion = nfs_direct_commit_complete, |
737 | .error_cleanup = nfs_direct_error_cleanup, | 745 | .resched_write = nfs_direct_resched_write, |
738 | }; | 746 | }; |
739 | 747 | ||
740 | static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) | 748 | static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) |
@@ -839,10 +847,25 @@ static void nfs_write_sync_pgio_error(struct list_head *head) | |||
839 | } | 847 | } |
840 | } | 848 | } |
841 | 849 | ||
850 | static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) | ||
851 | { | ||
852 | struct nfs_direct_req *dreq = hdr->dreq; | ||
853 | |||
854 | spin_lock(&dreq->lock); | ||
855 | if (dreq->error == 0) { | ||
856 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; | ||
857 | /* fake unstable write to let common nfs resend pages */ | ||
858 | hdr->verf.committed = NFS_UNSTABLE; | ||
859 | hdr->good_bytes = hdr->args.count; | ||
860 | } | ||
861 | spin_unlock(&dreq->lock); | ||
862 | } | ||
863 | |||
842 | static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { | 864 | static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { |
843 | .error_cleanup = nfs_write_sync_pgio_error, | 865 | .error_cleanup = nfs_write_sync_pgio_error, |
844 | .init_hdr = nfs_direct_pgio_init, | 866 | .init_hdr = nfs_direct_pgio_init, |
845 | .completion = nfs_direct_write_completion, | 867 | .completion = nfs_direct_write_completion, |
868 | .reschedule_io = nfs_direct_write_reschedule_io, | ||
846 | }; | 869 | }; |
847 | 870 | ||
848 | 871 | ||
@@ -900,6 +923,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
900 | } | 923 | } |
901 | 924 | ||
902 | nfs_direct_setup_mirroring(dreq, &desc, req); | 925 | nfs_direct_setup_mirroring(dreq, &desc, req); |
926 | if (desc.pg_error < 0) { | ||
927 | nfs_free_request(req); | ||
928 | result = desc.pg_error; | ||
929 | break; | ||
930 | } | ||
903 | 931 | ||
904 | nfs_lock_request(req); | 932 | nfs_lock_request(req); |
905 | req->wb_index = pos >> PAGE_SHIFT; | 933 | req->wb_index = pos >> PAGE_SHIFT; |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 93e236429c5d..4ef8f5addcad 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -514,7 +514,7 @@ static void nfs_check_dirty_writeback(struct page *page, | |||
514 | * so it will not block due to pages that will shortly be freeable. | 514 | * so it will not block due to pages that will shortly be freeable. |
515 | */ | 515 | */ |
516 | nfsi = NFS_I(mapping->host); | 516 | nfsi = NFS_I(mapping->host); |
517 | if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { | 517 | if (atomic_read(&nfsi->commit_info.rpcs_out)) { |
518 | *writeback = true; | 518 | *writeback = true; |
519 | return; | 519 | return; |
520 | } | 520 | } |
@@ -545,7 +545,7 @@ static int nfs_launder_page(struct page *page) | |||
545 | inode->i_ino, (long long)page_offset(page)); | 545 | inode->i_ino, (long long)page_offset(page)); |
546 | 546 | ||
547 | nfs_fscache_wait_on_page_write(nfsi, page); | 547 | nfs_fscache_wait_on_page_write(nfsi, page); |
548 | return nfs_wb_page(inode, page); | 548 | return nfs_wb_launder_page(inode, page); |
549 | } | 549 | } |
550 | 550 | ||
551 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, | 551 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, |
@@ -756,7 +756,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) | |||
756 | 756 | ||
757 | l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); | 757 | l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); |
758 | if (!IS_ERR(l_ctx)) { | 758 | if (!IS_ERR(l_ctx)) { |
759 | status = nfs_iocounter_wait(&l_ctx->io_count); | 759 | status = nfs_iocounter_wait(l_ctx); |
760 | nfs_put_lock_context(l_ctx); | 760 | nfs_put_lock_context(l_ctx); |
761 | if (status < 0) | 761 | if (status < 0) |
762 | return status; | 762 | return status; |
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 02ec07973bc4..bb1f4e7a3270 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
@@ -202,6 +202,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, | |||
202 | task->tk_status); | 202 | task->tk_status); |
203 | nfs4_mark_deviceid_unavailable(devid); | 203 | nfs4_mark_deviceid_unavailable(devid); |
204 | pnfs_error_mark_layout_for_return(inode, lseg); | 204 | pnfs_error_mark_layout_for_return(inode, lseg); |
205 | pnfs_set_lo_fail(lseg); | ||
205 | rpc_wake_up(&tbl->slot_tbl_waitq); | 206 | rpc_wake_up(&tbl->slot_tbl_waitq); |
206 | /* fall through */ | 207 | /* fall through */ |
207 | default: | 208 | default: |
@@ -883,13 +884,19 @@ static void | |||
883 | filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, | 884 | filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, |
884 | struct nfs_page *req) | 885 | struct nfs_page *req) |
885 | { | 886 | { |
886 | if (!pgio->pg_lseg) | 887 | if (!pgio->pg_lseg) { |
887 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 888 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
888 | req->wb_context, | 889 | req->wb_context, |
889 | 0, | 890 | 0, |
890 | NFS4_MAX_UINT64, | 891 | NFS4_MAX_UINT64, |
891 | IOMODE_READ, | 892 | IOMODE_READ, |
892 | GFP_KERNEL); | 893 | GFP_KERNEL); |
894 | if (IS_ERR(pgio->pg_lseg)) { | ||
895 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | ||
896 | pgio->pg_lseg = NULL; | ||
897 | return; | ||
898 | } | ||
899 | } | ||
893 | /* If no lseg, fall back to read through mds */ | 900 | /* If no lseg, fall back to read through mds */ |
894 | if (pgio->pg_lseg == NULL) | 901 | if (pgio->pg_lseg == NULL) |
895 | nfs_pageio_reset_read_mds(pgio); | 902 | nfs_pageio_reset_read_mds(pgio); |
@@ -902,13 +909,20 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
902 | struct nfs_commit_info cinfo; | 909 | struct nfs_commit_info cinfo; |
903 | int status; | 910 | int status; |
904 | 911 | ||
905 | if (!pgio->pg_lseg) | 912 | if (!pgio->pg_lseg) { |
906 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 913 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
907 | req->wb_context, | 914 | req->wb_context, |
908 | 0, | 915 | 0, |
909 | NFS4_MAX_UINT64, | 916 | NFS4_MAX_UINT64, |
910 | IOMODE_RW, | 917 | IOMODE_RW, |
911 | GFP_NOFS); | 918 | GFP_NOFS); |
919 | if (IS_ERR(pgio->pg_lseg)) { | ||
920 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | ||
921 | pgio->pg_lseg = NULL; | ||
922 | return; | ||
923 | } | ||
924 | } | ||
925 | |||
912 | /* If no lseg, fall back to write through mds */ | 926 | /* If no lseg, fall back to write through mds */ |
913 | if (pgio->pg_lseg == NULL) | 927 | if (pgio->pg_lseg == NULL) |
914 | goto out_mds; | 928 | goto out_mds; |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 03516c80855a..6594e9f903a0 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c | |||
@@ -145,7 +145,7 @@ static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, | |||
145 | return false; | 145 | return false; |
146 | for (i = 0; i < m1->fh_versions_cnt; i++) { | 146 | for (i = 0; i < m1->fh_versions_cnt; i++) { |
147 | bool found_fh = false; | 147 | bool found_fh = false; |
148 | for (j = 0; j < m2->fh_versions_cnt; i++) { | 148 | for (j = 0; j < m2->fh_versions_cnt; j++) { |
149 | if (nfs_compare_fh(&m1->fh_versions[i], | 149 | if (nfs_compare_fh(&m1->fh_versions[i], |
150 | &m2->fh_versions[j]) == 0) { | 150 | &m2->fh_versions[j]) == 0) { |
151 | found_fh = true; | 151 | found_fh = true; |
@@ -505,9 +505,17 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, | |||
505 | } | 505 | } |
506 | 506 | ||
507 | p = xdr_inline_decode(&stream, 4); | 507 | p = xdr_inline_decode(&stream, 4); |
508 | if (p) | 508 | if (!p) |
509 | fls->flags = be32_to_cpup(p); | 509 | goto out_sort_mirrors; |
510 | fls->flags = be32_to_cpup(p); | ||
511 | |||
512 | p = xdr_inline_decode(&stream, 4); | ||
513 | if (!p) | ||
514 | goto out_sort_mirrors; | ||
515 | for (i=0; i < fls->mirror_array_cnt; i++) | ||
516 | fls->mirror_array[i]->report_interval = be32_to_cpup(p); | ||
510 | 517 | ||
518 | out_sort_mirrors: | ||
511 | ff_layout_sort_mirrors(fls); | 519 | ff_layout_sort_mirrors(fls); |
512 | rc = ff_layout_check_layout(lgr); | 520 | rc = ff_layout_check_layout(lgr); |
513 | if (rc) | 521 | if (rc) |
@@ -603,7 +611,9 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, | |||
603 | mirror->start_time = now; | 611 | mirror->start_time = now; |
604 | if (ktime_equal(mirror->last_report_time, notime)) | 612 | if (ktime_equal(mirror->last_report_time, notime)) |
605 | mirror->last_report_time = now; | 613 | mirror->last_report_time = now; |
606 | if (layoutstats_timer != 0) | 614 | if (mirror->report_interval != 0) |
615 | report_interval = (s64)mirror->report_interval * 1000LL; | ||
616 | else if (layoutstats_timer != 0) | ||
607 | report_interval = (s64)layoutstats_timer * 1000LL; | 617 | report_interval = (s64)layoutstats_timer * 1000LL; |
608 | if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= | 618 | if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= |
609 | report_interval) { | 619 | report_interval) { |
@@ -785,13 +795,19 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, | |||
785 | int ds_idx; | 795 | int ds_idx; |
786 | 796 | ||
787 | /* Use full layout for now */ | 797 | /* Use full layout for now */ |
788 | if (!pgio->pg_lseg) | 798 | if (!pgio->pg_lseg) { |
789 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 799 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
790 | req->wb_context, | 800 | req->wb_context, |
791 | 0, | 801 | 0, |
792 | NFS4_MAX_UINT64, | 802 | NFS4_MAX_UINT64, |
793 | IOMODE_READ, | 803 | IOMODE_READ, |
794 | GFP_KERNEL); | 804 | GFP_KERNEL); |
805 | if (IS_ERR(pgio->pg_lseg)) { | ||
806 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | ||
807 | pgio->pg_lseg = NULL; | ||
808 | return; | ||
809 | } | ||
810 | } | ||
795 | /* If no lseg, fall back to read through mds */ | 811 | /* If no lseg, fall back to read through mds */ |
796 | if (pgio->pg_lseg == NULL) | 812 | if (pgio->pg_lseg == NULL) |
797 | goto out_mds; | 813 | goto out_mds; |
@@ -825,13 +841,19 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
825 | int i; | 841 | int i; |
826 | int status; | 842 | int status; |
827 | 843 | ||
828 | if (!pgio->pg_lseg) | 844 | if (!pgio->pg_lseg) { |
829 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 845 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
830 | req->wb_context, | 846 | req->wb_context, |
831 | 0, | 847 | 0, |
832 | NFS4_MAX_UINT64, | 848 | NFS4_MAX_UINT64, |
833 | IOMODE_RW, | 849 | IOMODE_RW, |
834 | GFP_NOFS); | 850 | GFP_NOFS); |
851 | if (IS_ERR(pgio->pg_lseg)) { | ||
852 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | ||
853 | pgio->pg_lseg = NULL; | ||
854 | return; | ||
855 | } | ||
856 | } | ||
835 | /* If no lseg, fall back to write through mds */ | 857 | /* If no lseg, fall back to write through mds */ |
836 | if (pgio->pg_lseg == NULL) | 858 | if (pgio->pg_lseg == NULL) |
837 | goto out_mds; | 859 | goto out_mds; |
@@ -867,18 +889,25 @@ static unsigned int | |||
867 | ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, | 889 | ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, |
868 | struct nfs_page *req) | 890 | struct nfs_page *req) |
869 | { | 891 | { |
870 | if (!pgio->pg_lseg) | 892 | if (!pgio->pg_lseg) { |
871 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 893 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
872 | req->wb_context, | 894 | req->wb_context, |
873 | 0, | 895 | 0, |
874 | NFS4_MAX_UINT64, | 896 | NFS4_MAX_UINT64, |
875 | IOMODE_RW, | 897 | IOMODE_RW, |
876 | GFP_NOFS); | 898 | GFP_NOFS); |
899 | if (IS_ERR(pgio->pg_lseg)) { | ||
900 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | ||
901 | pgio->pg_lseg = NULL; | ||
902 | goto out; | ||
903 | } | ||
904 | } | ||
877 | if (pgio->pg_lseg) | 905 | if (pgio->pg_lseg) |
878 | return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); | 906 | return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); |
879 | 907 | ||
880 | /* no lseg means that pnfs is not in use, so no mirroring here */ | 908 | /* no lseg means that pnfs is not in use, so no mirroring here */ |
881 | nfs_pageio_reset_write_mds(pgio); | 909 | nfs_pageio_reset_write_mds(pgio); |
910 | out: | ||
882 | return 1; | 911 | return 1; |
883 | } | 912 | } |
884 | 913 | ||
@@ -912,18 +941,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) | |||
912 | hdr->args.count, | 941 | hdr->args.count, |
913 | (unsigned long long)hdr->args.offset); | 942 | (unsigned long long)hdr->args.offset); |
914 | 943 | ||
915 | if (!hdr->dreq) { | 944 | hdr->completion_ops->reschedule_io(hdr); |
916 | struct nfs_open_context *ctx; | ||
917 | |||
918 | ctx = nfs_list_entry(hdr->pages.next)->wb_context; | ||
919 | set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); | ||
920 | hdr->completion_ops->error_cleanup(&hdr->pages); | ||
921 | } else { | ||
922 | nfs_direct_set_resched_writes(hdr->dreq); | ||
923 | /* fake unstable write to let common nfs resend pages */ | ||
924 | hdr->verf.committed = NFS_UNSTABLE; | ||
925 | hdr->good_bytes = hdr->args.count; | ||
926 | } | ||
927 | return; | 945 | return; |
928 | } | 946 | } |
929 | 947 | ||
@@ -1101,7 +1119,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, | |||
1101 | return -NFS4ERR_RESET_TO_PNFS; | 1119 | return -NFS4ERR_RESET_TO_PNFS; |
1102 | out_retry: | 1120 | out_retry: |
1103 | task->tk_status = 0; | 1121 | task->tk_status = 0; |
1104 | rpc_restart_call(task); | 1122 | rpc_restart_call_prepare(task); |
1105 | rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); | 1123 | rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); |
1106 | return -EAGAIN; | 1124 | return -EAGAIN; |
1107 | } | 1125 | } |
@@ -1159,6 +1177,14 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, | |||
1159 | } | 1177 | } |
1160 | } | 1178 | } |
1161 | 1179 | ||
1180 | switch (status) { | ||
1181 | case NFS4ERR_DELAY: | ||
1182 | case NFS4ERR_GRACE: | ||
1183 | return; | ||
1184 | default: | ||
1185 | break; | ||
1186 | } | ||
1187 | |||
1162 | mirror = FF_LAYOUT_COMP(lseg, idx); | 1188 | mirror = FF_LAYOUT_COMP(lseg, idx); |
1163 | err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), | 1189 | err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), |
1164 | mirror, offset, length, status, opnum, | 1190 | mirror, offset, length, status, opnum, |
@@ -1242,14 +1268,31 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) | |||
1242 | return ff_layout_test_devid_unavailable(node); | 1268 | return ff_layout_test_devid_unavailable(node); |
1243 | } | 1269 | } |
1244 | 1270 | ||
1245 | static int ff_layout_read_prepare_common(struct rpc_task *task, | 1271 | static void ff_layout_read_record_layoutstats_start(struct rpc_task *task, |
1246 | struct nfs_pgio_header *hdr) | 1272 | struct nfs_pgio_header *hdr) |
1247 | { | 1273 | { |
1274 | if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags)) | ||
1275 | return; | ||
1248 | nfs4_ff_layout_stat_io_start_read(hdr->inode, | 1276 | nfs4_ff_layout_stat_io_start_read(hdr->inode, |
1249 | FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), | 1277 | FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), |
1250 | hdr->args.count, | 1278 | hdr->args.count, |
1251 | task->tk_start); | 1279 | task->tk_start); |
1280 | } | ||
1281 | |||
1282 | static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, | ||
1283 | struct nfs_pgio_header *hdr) | ||
1284 | { | ||
1285 | if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags)) | ||
1286 | return; | ||
1287 | nfs4_ff_layout_stat_io_end_read(task, | ||
1288 | FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), | ||
1289 | hdr->args.count, | ||
1290 | hdr->res.count); | ||
1291 | } | ||
1252 | 1292 | ||
1293 | static int ff_layout_read_prepare_common(struct rpc_task *task, | ||
1294 | struct nfs_pgio_header *hdr) | ||
1295 | { | ||
1253 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { | 1296 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { |
1254 | rpc_exit(task, -EIO); | 1297 | rpc_exit(task, -EIO); |
1255 | return -EIO; | 1298 | return -EIO; |
@@ -1265,6 +1308,7 @@ static int ff_layout_read_prepare_common(struct rpc_task *task, | |||
1265 | } | 1308 | } |
1266 | hdr->pgio_done_cb = ff_layout_read_done_cb; | 1309 | hdr->pgio_done_cb = ff_layout_read_done_cb; |
1267 | 1310 | ||
1311 | ff_layout_read_record_layoutstats_start(task, hdr); | ||
1268 | return 0; | 1312 | return 0; |
1269 | } | 1313 | } |
1270 | 1314 | ||
@@ -1323,10 +1367,6 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data) | |||
1323 | 1367 | ||
1324 | dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); | 1368 | dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); |
1325 | 1369 | ||
1326 | nfs4_ff_layout_stat_io_end_read(task, | ||
1327 | FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), | ||
1328 | hdr->args.count, hdr->res.count); | ||
1329 | |||
1330 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && | 1370 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && |
1331 | task->tk_status == 0) { | 1371 | task->tk_status == 0) { |
1332 | nfs4_sequence_done(task, &hdr->res.seq_res); | 1372 | nfs4_sequence_done(task, &hdr->res.seq_res); |
@@ -1341,10 +1381,20 @@ static void ff_layout_read_count_stats(struct rpc_task *task, void *data) | |||
1341 | { | 1381 | { |
1342 | struct nfs_pgio_header *hdr = data; | 1382 | struct nfs_pgio_header *hdr = data; |
1343 | 1383 | ||
1384 | ff_layout_read_record_layoutstats_done(task, hdr); | ||
1344 | rpc_count_iostats_metrics(task, | 1385 | rpc_count_iostats_metrics(task, |
1345 | &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); | 1386 | &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); |
1346 | } | 1387 | } |
1347 | 1388 | ||
1389 | static void ff_layout_read_release(void *data) | ||
1390 | { | ||
1391 | struct nfs_pgio_header *hdr = data; | ||
1392 | |||
1393 | ff_layout_read_record_layoutstats_done(&hdr->task, hdr); | ||
1394 | pnfs_generic_rw_release(data); | ||
1395 | } | ||
1396 | |||
1397 | |||
1348 | static int ff_layout_write_done_cb(struct rpc_task *task, | 1398 | static int ff_layout_write_done_cb(struct rpc_task *task, |
1349 | struct nfs_pgio_header *hdr) | 1399 | struct nfs_pgio_header *hdr) |
1350 | { | 1400 | { |
@@ -1362,15 +1412,12 @@ static int ff_layout_write_done_cb(struct rpc_task *task, | |||
1362 | 1412 | ||
1363 | switch (err) { | 1413 | switch (err) { |
1364 | case -NFS4ERR_RESET_TO_PNFS: | 1414 | case -NFS4ERR_RESET_TO_PNFS: |
1365 | pnfs_set_retry_layoutget(hdr->lseg->pls_layout); | ||
1366 | ff_layout_reset_write(hdr, true); | 1415 | ff_layout_reset_write(hdr, true); |
1367 | return task->tk_status; | 1416 | return task->tk_status; |
1368 | case -NFS4ERR_RESET_TO_MDS: | 1417 | case -NFS4ERR_RESET_TO_MDS: |
1369 | pnfs_clear_retry_layoutget(hdr->lseg->pls_layout); | ||
1370 | ff_layout_reset_write(hdr, false); | 1418 | ff_layout_reset_write(hdr, false); |
1371 | return task->tk_status; | 1419 | return task->tk_status; |
1372 | case -EAGAIN: | 1420 | case -EAGAIN: |
1373 | rpc_restart_call_prepare(task); | ||
1374 | return -EAGAIN; | 1421 | return -EAGAIN; |
1375 | } | 1422 | } |
1376 | 1423 | ||
@@ -1402,11 +1449,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, | |||
1402 | 1449 | ||
1403 | switch (err) { | 1450 | switch (err) { |
1404 | case -NFS4ERR_RESET_TO_PNFS: | 1451 | case -NFS4ERR_RESET_TO_PNFS: |
1405 | pnfs_set_retry_layoutget(data->lseg->pls_layout); | ||
1406 | pnfs_generic_prepare_to_resend_writes(data); | 1452 | pnfs_generic_prepare_to_resend_writes(data); |
1407 | return -EAGAIN; | 1453 | return -EAGAIN; |
1408 | case -NFS4ERR_RESET_TO_MDS: | 1454 | case -NFS4ERR_RESET_TO_MDS: |
1409 | pnfs_clear_retry_layoutget(data->lseg->pls_layout); | ||
1410 | pnfs_generic_prepare_to_resend_writes(data); | 1455 | pnfs_generic_prepare_to_resend_writes(data); |
1411 | return -EAGAIN; | 1456 | return -EAGAIN; |
1412 | case -EAGAIN: | 1457 | case -EAGAIN: |
@@ -1421,14 +1466,31 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, | |||
1421 | return 0; | 1466 | return 0; |
1422 | } | 1467 | } |
1423 | 1468 | ||
1424 | static int ff_layout_write_prepare_common(struct rpc_task *task, | 1469 | static void ff_layout_write_record_layoutstats_start(struct rpc_task *task, |
1425 | struct nfs_pgio_header *hdr) | 1470 | struct nfs_pgio_header *hdr) |
1426 | { | 1471 | { |
1472 | if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags)) | ||
1473 | return; | ||
1427 | nfs4_ff_layout_stat_io_start_write(hdr->inode, | 1474 | nfs4_ff_layout_stat_io_start_write(hdr->inode, |
1428 | FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), | 1475 | FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), |
1429 | hdr->args.count, | 1476 | hdr->args.count, |
1430 | task->tk_start); | 1477 | task->tk_start); |
1478 | } | ||
1479 | |||
1480 | static void ff_layout_write_record_layoutstats_done(struct rpc_task *task, | ||
1481 | struct nfs_pgio_header *hdr) | ||
1482 | { | ||
1483 | if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags)) | ||
1484 | return; | ||
1485 | nfs4_ff_layout_stat_io_end_write(task, | ||
1486 | FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), | ||
1487 | hdr->args.count, hdr->res.count, | ||
1488 | hdr->res.verf->committed); | ||
1489 | } | ||
1431 | 1490 | ||
1491 | static int ff_layout_write_prepare_common(struct rpc_task *task, | ||
1492 | struct nfs_pgio_header *hdr) | ||
1493 | { | ||
1432 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { | 1494 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { |
1433 | rpc_exit(task, -EIO); | 1495 | rpc_exit(task, -EIO); |
1434 | return -EIO; | 1496 | return -EIO; |
@@ -1445,6 +1507,7 @@ static int ff_layout_write_prepare_common(struct rpc_task *task, | |||
1445 | return -EAGAIN; | 1507 | return -EAGAIN; |
1446 | } | 1508 | } |
1447 | 1509 | ||
1510 | ff_layout_write_record_layoutstats_start(task, hdr); | ||
1448 | return 0; | 1511 | return 0; |
1449 | } | 1512 | } |
1450 | 1513 | ||
@@ -1480,11 +1543,6 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data) | |||
1480 | { | 1543 | { |
1481 | struct nfs_pgio_header *hdr = data; | 1544 | struct nfs_pgio_header *hdr = data; |
1482 | 1545 | ||
1483 | nfs4_ff_layout_stat_io_end_write(task, | ||
1484 | FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), | ||
1485 | hdr->args.count, hdr->res.count, | ||
1486 | hdr->res.verf->committed); | ||
1487 | |||
1488 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && | 1546 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && |
1489 | task->tk_status == 0) { | 1547 | task->tk_status == 0) { |
1490 | nfs4_sequence_done(task, &hdr->res.seq_res); | 1548 | nfs4_sequence_done(task, &hdr->res.seq_res); |
@@ -1499,18 +1557,53 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data) | |||
1499 | { | 1557 | { |
1500 | struct nfs_pgio_header *hdr = data; | 1558 | struct nfs_pgio_header *hdr = data; |
1501 | 1559 | ||
1560 | ff_layout_write_record_layoutstats_done(task, hdr); | ||
1502 | rpc_count_iostats_metrics(task, | 1561 | rpc_count_iostats_metrics(task, |
1503 | &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); | 1562 | &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); |
1504 | } | 1563 | } |
1505 | 1564 | ||
1506 | static void ff_layout_commit_prepare_common(struct rpc_task *task, | 1565 | static void ff_layout_write_release(void *data) |
1566 | { | ||
1567 | struct nfs_pgio_header *hdr = data; | ||
1568 | |||
1569 | ff_layout_write_record_layoutstats_done(&hdr->task, hdr); | ||
1570 | pnfs_generic_rw_release(data); | ||
1571 | } | ||
1572 | |||
1573 | static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task, | ||
1507 | struct nfs_commit_data *cdata) | 1574 | struct nfs_commit_data *cdata) |
1508 | { | 1575 | { |
1576 | if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags)) | ||
1577 | return; | ||
1509 | nfs4_ff_layout_stat_io_start_write(cdata->inode, | 1578 | nfs4_ff_layout_stat_io_start_write(cdata->inode, |
1510 | FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), | 1579 | FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), |
1511 | 0, task->tk_start); | 1580 | 0, task->tk_start); |
1512 | } | 1581 | } |
1513 | 1582 | ||
1583 | static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task, | ||
1584 | struct nfs_commit_data *cdata) | ||
1585 | { | ||
1586 | struct nfs_page *req; | ||
1587 | __u64 count = 0; | ||
1588 | |||
1589 | if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags)) | ||
1590 | return; | ||
1591 | |||
1592 | if (task->tk_status == 0) { | ||
1593 | list_for_each_entry(req, &cdata->pages, wb_list) | ||
1594 | count += req->wb_bytes; | ||
1595 | } | ||
1596 | nfs4_ff_layout_stat_io_end_write(task, | ||
1597 | FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), | ||
1598 | count, count, NFS_FILE_SYNC); | ||
1599 | } | ||
1600 | |||
1601 | static void ff_layout_commit_prepare_common(struct rpc_task *task, | ||
1602 | struct nfs_commit_data *cdata) | ||
1603 | { | ||
1604 | ff_layout_commit_record_layoutstats_start(task, cdata); | ||
1605 | } | ||
1606 | |||
1514 | static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) | 1607 | static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) |
1515 | { | 1608 | { |
1516 | ff_layout_commit_prepare_common(task, data); | 1609 | ff_layout_commit_prepare_common(task, data); |
@@ -1531,19 +1624,6 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) | |||
1531 | 1624 | ||
1532 | static void ff_layout_commit_done(struct rpc_task *task, void *data) | 1625 | static void ff_layout_commit_done(struct rpc_task *task, void *data) |
1533 | { | 1626 | { |
1534 | struct nfs_commit_data *cdata = data; | ||
1535 | struct nfs_page *req; | ||
1536 | __u64 count = 0; | ||
1537 | |||
1538 | if (task->tk_status == 0) { | ||
1539 | list_for_each_entry(req, &cdata->pages, wb_list) | ||
1540 | count += req->wb_bytes; | ||
1541 | } | ||
1542 | |||
1543 | nfs4_ff_layout_stat_io_end_write(task, | ||
1544 | FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), | ||
1545 | count, count, NFS_FILE_SYNC); | ||
1546 | |||
1547 | pnfs_generic_write_commit_done(task, data); | 1627 | pnfs_generic_write_commit_done(task, data); |
1548 | } | 1628 | } |
1549 | 1629 | ||
@@ -1551,50 +1631,59 @@ static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) | |||
1551 | { | 1631 | { |
1552 | struct nfs_commit_data *cdata = data; | 1632 | struct nfs_commit_data *cdata = data; |
1553 | 1633 | ||
1634 | ff_layout_commit_record_layoutstats_done(task, cdata); | ||
1554 | rpc_count_iostats_metrics(task, | 1635 | rpc_count_iostats_metrics(task, |
1555 | &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); | 1636 | &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); |
1556 | } | 1637 | } |
1557 | 1638 | ||
1639 | static void ff_layout_commit_release(void *data) | ||
1640 | { | ||
1641 | struct nfs_commit_data *cdata = data; | ||
1642 | |||
1643 | ff_layout_commit_record_layoutstats_done(&cdata->task, cdata); | ||
1644 | pnfs_generic_commit_release(data); | ||
1645 | } | ||
1646 | |||
1558 | static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { | 1647 | static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { |
1559 | .rpc_call_prepare = ff_layout_read_prepare_v3, | 1648 | .rpc_call_prepare = ff_layout_read_prepare_v3, |
1560 | .rpc_call_done = ff_layout_read_call_done, | 1649 | .rpc_call_done = ff_layout_read_call_done, |
1561 | .rpc_count_stats = ff_layout_read_count_stats, | 1650 | .rpc_count_stats = ff_layout_read_count_stats, |
1562 | .rpc_release = pnfs_generic_rw_release, | 1651 | .rpc_release = ff_layout_read_release, |
1563 | }; | 1652 | }; |
1564 | 1653 | ||
1565 | static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { | 1654 | static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { |
1566 | .rpc_call_prepare = ff_layout_read_prepare_v4, | 1655 | .rpc_call_prepare = ff_layout_read_prepare_v4, |
1567 | .rpc_call_done = ff_layout_read_call_done, | 1656 | .rpc_call_done = ff_layout_read_call_done, |
1568 | .rpc_count_stats = ff_layout_read_count_stats, | 1657 | .rpc_count_stats = ff_layout_read_count_stats, |
1569 | .rpc_release = pnfs_generic_rw_release, | 1658 | .rpc_release = ff_layout_read_release, |
1570 | }; | 1659 | }; |
1571 | 1660 | ||
1572 | static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { | 1661 | static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { |
1573 | .rpc_call_prepare = ff_layout_write_prepare_v3, | 1662 | .rpc_call_prepare = ff_layout_write_prepare_v3, |
1574 | .rpc_call_done = ff_layout_write_call_done, | 1663 | .rpc_call_done = ff_layout_write_call_done, |
1575 | .rpc_count_stats = ff_layout_write_count_stats, | 1664 | .rpc_count_stats = ff_layout_write_count_stats, |
1576 | .rpc_release = pnfs_generic_rw_release, | 1665 | .rpc_release = ff_layout_write_release, |
1577 | }; | 1666 | }; |
1578 | 1667 | ||
1579 | static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { | 1668 | static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { |
1580 | .rpc_call_prepare = ff_layout_write_prepare_v4, | 1669 | .rpc_call_prepare = ff_layout_write_prepare_v4, |
1581 | .rpc_call_done = ff_layout_write_call_done, | 1670 | .rpc_call_done = ff_layout_write_call_done, |
1582 | .rpc_count_stats = ff_layout_write_count_stats, | 1671 | .rpc_count_stats = ff_layout_write_count_stats, |
1583 | .rpc_release = pnfs_generic_rw_release, | 1672 | .rpc_release = ff_layout_write_release, |
1584 | }; | 1673 | }; |
1585 | 1674 | ||
1586 | static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { | 1675 | static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { |
1587 | .rpc_call_prepare = ff_layout_commit_prepare_v3, | 1676 | .rpc_call_prepare = ff_layout_commit_prepare_v3, |
1588 | .rpc_call_done = ff_layout_commit_done, | 1677 | .rpc_call_done = ff_layout_commit_done, |
1589 | .rpc_count_stats = ff_layout_commit_count_stats, | 1678 | .rpc_count_stats = ff_layout_commit_count_stats, |
1590 | .rpc_release = pnfs_generic_commit_release, | 1679 | .rpc_release = ff_layout_commit_release, |
1591 | }; | 1680 | }; |
1592 | 1681 | ||
1593 | static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { | 1682 | static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { |
1594 | .rpc_call_prepare = ff_layout_commit_prepare_v4, | 1683 | .rpc_call_prepare = ff_layout_commit_prepare_v4, |
1595 | .rpc_call_done = ff_layout_commit_done, | 1684 | .rpc_call_done = ff_layout_commit_done, |
1596 | .rpc_count_stats = ff_layout_commit_count_stats, | 1685 | .rpc_count_stats = ff_layout_commit_count_stats, |
1597 | .rpc_release = pnfs_generic_commit_release, | 1686 | .rpc_release = ff_layout_commit_release, |
1598 | }; | 1687 | }; |
1599 | 1688 | ||
1600 | static enum pnfs_try_status | 1689 | static enum pnfs_try_status |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 2bb08bc6aaf0..dd353bb7dc0a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h | |||
@@ -85,6 +85,7 @@ struct nfs4_ff_layout_mirror { | |||
85 | struct nfs4_ff_layoutstat write_stat; | 85 | struct nfs4_ff_layoutstat write_stat; |
86 | ktime_t start_time; | 86 | ktime_t start_time; |
87 | ktime_t last_report_time; | 87 | ktime_t last_report_time; |
88 | u32 report_interval; | ||
88 | }; | 89 | }; |
89 | 90 | ||
90 | struct nfs4_ff_layout_segment { | 91 | struct nfs4_ff_layout_segment { |
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e125e55de86d..bd0327541366 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c | |||
@@ -429,22 +429,14 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |||
429 | mirror, lseg->pls_range.offset, | 429 | mirror, lseg->pls_range.offset, |
430 | lseg->pls_range.length, NFS4ERR_NXIO, | 430 | lseg->pls_range.length, NFS4ERR_NXIO, |
431 | OP_ILLEGAL, GFP_NOIO); | 431 | OP_ILLEGAL, GFP_NOIO); |
432 | if (fail_return) { | 432 | if (!fail_return) { |
433 | pnfs_error_mark_layout_for_return(ino, lseg); | ||
434 | if (ff_layout_has_available_ds(lseg)) | ||
435 | pnfs_set_retry_layoutget(lseg->pls_layout); | ||
436 | else | ||
437 | pnfs_clear_retry_layoutget(lseg->pls_layout); | ||
438 | |||
439 | } else { | ||
440 | if (ff_layout_has_available_ds(lseg)) | 433 | if (ff_layout_has_available_ds(lseg)) |
441 | set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | 434 | set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, |
442 | &lseg->pls_layout->plh_flags); | 435 | &lseg->pls_layout->plh_flags); |
443 | else { | 436 | else |
444 | pnfs_error_mark_layout_for_return(ino, lseg); | 437 | pnfs_error_mark_layout_for_return(ino, lseg); |
445 | pnfs_clear_retry_layoutget(lseg->pls_layout); | 438 | } else |
446 | } | 439 | pnfs_error_mark_layout_for_return(ino, lseg); |
447 | } | ||
448 | } | 440 | } |
449 | out_update_creds: | 441 | out_update_creds: |
450 | if (ff_layout_update_mirror_cred(mirror, ds)) | 442 | if (ff_layout_update_mirror_cred(mirror, ds)) |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bdb4dc7b4ecd..c11e855e0e18 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -71,19 +71,25 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) | |||
71 | return nfs_fileid_to_ino_t(fattr->fileid); | 71 | return nfs_fileid_to_ino_t(fattr->fileid); |
72 | } | 72 | } |
73 | 73 | ||
74 | /** | 74 | static int nfs_wait_killable(int mode) |
75 | * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks | ||
76 | * @word: long word containing the bit lock | ||
77 | */ | ||
78 | int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) | ||
79 | { | 75 | { |
80 | freezable_schedule_unsafe(); | 76 | freezable_schedule_unsafe(); |
81 | if (signal_pending_state(mode, current)) | 77 | if (signal_pending_state(mode, current)) |
82 | return -ERESTARTSYS; | 78 | return -ERESTARTSYS; |
83 | return 0; | 79 | return 0; |
84 | } | 80 | } |
81 | |||
82 | int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) | ||
83 | { | ||
84 | return nfs_wait_killable(mode); | ||
85 | } | ||
85 | EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); | 86 | EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); |
86 | 87 | ||
88 | int nfs_wait_atomic_killable(atomic_t *p) | ||
89 | { | ||
90 | return nfs_wait_killable(TASK_KILLABLE); | ||
91 | } | ||
92 | |||
87 | /** | 93 | /** |
88 | * nfs_compat_user_ino64 - returns the user-visible inode number | 94 | * nfs_compat_user_ino64 - returns the user-visible inode number |
89 | * @fileid: 64-bit fileid | 95 | * @fileid: 64-bit fileid |
@@ -700,7 +706,7 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) | |||
700 | l_ctx->lockowner.l_owner = current->files; | 706 | l_ctx->lockowner.l_owner = current->files; |
701 | l_ctx->lockowner.l_pid = current->tgid; | 707 | l_ctx->lockowner.l_pid = current->tgid; |
702 | INIT_LIST_HEAD(&l_ctx->list); | 708 | INIT_LIST_HEAD(&l_ctx->list); |
703 | nfs_iocounter_init(&l_ctx->io_count); | 709 | atomic_set(&l_ctx->io_count, 0); |
704 | } | 710 | } |
705 | 711 | ||
706 | static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) | 712 | static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) |
@@ -913,6 +919,12 @@ void nfs_file_clear_open_context(struct file *filp) | |||
913 | if (ctx) { | 919 | if (ctx) { |
914 | struct inode *inode = d_inode(ctx->dentry); | 920 | struct inode *inode = d_inode(ctx->dentry); |
915 | 921 | ||
922 | /* | ||
923 | * We fatal error on write before. Try to writeback | ||
924 | * every page again. | ||
925 | */ | ||
926 | if (ctx->error < 0) | ||
927 | invalidate_inode_pages2(inode->i_mapping); | ||
916 | filp->private_data = NULL; | 928 | filp->private_data = NULL; |
917 | spin_lock(&inode->i_lock); | 929 | spin_lock(&inode->i_lock); |
918 | list_move_tail(&ctx->list, &NFS_I(inode)->open_files); | 930 | list_move_tail(&ctx->list, &NFS_I(inode)->open_files); |
@@ -1663,6 +1675,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1663 | unsigned long invalid = 0; | 1675 | unsigned long invalid = 0; |
1664 | unsigned long now = jiffies; | 1676 | unsigned long now = jiffies; |
1665 | unsigned long save_cache_validity; | 1677 | unsigned long save_cache_validity; |
1678 | bool cache_revalidated = true; | ||
1666 | 1679 | ||
1667 | dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", | 1680 | dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", |
1668 | __func__, inode->i_sb->s_id, inode->i_ino, | 1681 | __func__, inode->i_sb->s_id, inode->i_ino, |
@@ -1724,22 +1737,28 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1724 | nfs_force_lookup_revalidate(inode); | 1737 | nfs_force_lookup_revalidate(inode); |
1725 | inode->i_version = fattr->change_attr; | 1738 | inode->i_version = fattr->change_attr; |
1726 | } | 1739 | } |
1727 | } else | 1740 | } else { |
1728 | nfsi->cache_validity |= save_cache_validity; | 1741 | nfsi->cache_validity |= save_cache_validity; |
1742 | cache_revalidated = false; | ||
1743 | } | ||
1729 | 1744 | ||
1730 | if (fattr->valid & NFS_ATTR_FATTR_MTIME) { | 1745 | if (fattr->valid & NFS_ATTR_FATTR_MTIME) { |
1731 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); | 1746 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); |
1732 | } else if (server->caps & NFS_CAP_MTIME) | 1747 | } else if (server->caps & NFS_CAP_MTIME) { |
1733 | nfsi->cache_validity |= save_cache_validity & | 1748 | nfsi->cache_validity |= save_cache_validity & |
1734 | (NFS_INO_INVALID_ATTR | 1749 | (NFS_INO_INVALID_ATTR |
1735 | | NFS_INO_REVAL_FORCED); | 1750 | | NFS_INO_REVAL_FORCED); |
1751 | cache_revalidated = false; | ||
1752 | } | ||
1736 | 1753 | ||
1737 | if (fattr->valid & NFS_ATTR_FATTR_CTIME) { | 1754 | if (fattr->valid & NFS_ATTR_FATTR_CTIME) { |
1738 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); | 1755 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); |
1739 | } else if (server->caps & NFS_CAP_CTIME) | 1756 | } else if (server->caps & NFS_CAP_CTIME) { |
1740 | nfsi->cache_validity |= save_cache_validity & | 1757 | nfsi->cache_validity |= save_cache_validity & |
1741 | (NFS_INO_INVALID_ATTR | 1758 | (NFS_INO_INVALID_ATTR |
1742 | | NFS_INO_REVAL_FORCED); | 1759 | | NFS_INO_REVAL_FORCED); |
1760 | cache_revalidated = false; | ||
1761 | } | ||
1743 | 1762 | ||
1744 | /* Check if our cached file size is stale */ | 1763 | /* Check if our cached file size is stale */ |
1745 | if (fattr->valid & NFS_ATTR_FATTR_SIZE) { | 1764 | if (fattr->valid & NFS_ATTR_FATTR_SIZE) { |
@@ -1759,19 +1778,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1759 | (long long)cur_isize, | 1778 | (long long)cur_isize, |
1760 | (long long)new_isize); | 1779 | (long long)new_isize); |
1761 | } | 1780 | } |
1762 | } else | 1781 | } else { |
1763 | nfsi->cache_validity |= save_cache_validity & | 1782 | nfsi->cache_validity |= save_cache_validity & |
1764 | (NFS_INO_INVALID_ATTR | 1783 | (NFS_INO_INVALID_ATTR |
1765 | | NFS_INO_REVAL_PAGECACHE | 1784 | | NFS_INO_REVAL_PAGECACHE |
1766 | | NFS_INO_REVAL_FORCED); | 1785 | | NFS_INO_REVAL_FORCED); |
1786 | cache_revalidated = false; | ||
1787 | } | ||
1767 | 1788 | ||
1768 | 1789 | ||
1769 | if (fattr->valid & NFS_ATTR_FATTR_ATIME) | 1790 | if (fattr->valid & NFS_ATTR_FATTR_ATIME) |
1770 | memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); | 1791 | memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); |
1771 | else if (server->caps & NFS_CAP_ATIME) | 1792 | else if (server->caps & NFS_CAP_ATIME) { |
1772 | nfsi->cache_validity |= save_cache_validity & | 1793 | nfsi->cache_validity |= save_cache_validity & |
1773 | (NFS_INO_INVALID_ATIME | 1794 | (NFS_INO_INVALID_ATIME |
1774 | | NFS_INO_REVAL_FORCED); | 1795 | | NFS_INO_REVAL_FORCED); |
1796 | cache_revalidated = false; | ||
1797 | } | ||
1775 | 1798 | ||
1776 | if (fattr->valid & NFS_ATTR_FATTR_MODE) { | 1799 | if (fattr->valid & NFS_ATTR_FATTR_MODE) { |
1777 | if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { | 1800 | if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { |
@@ -1780,36 +1803,42 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1780 | inode->i_mode = newmode; | 1803 | inode->i_mode = newmode; |
1781 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1804 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
1782 | } | 1805 | } |
1783 | } else if (server->caps & NFS_CAP_MODE) | 1806 | } else if (server->caps & NFS_CAP_MODE) { |
1784 | nfsi->cache_validity |= save_cache_validity & | 1807 | nfsi->cache_validity |= save_cache_validity & |
1785 | (NFS_INO_INVALID_ATTR | 1808 | (NFS_INO_INVALID_ATTR |
1786 | | NFS_INO_INVALID_ACCESS | 1809 | | NFS_INO_INVALID_ACCESS |
1787 | | NFS_INO_INVALID_ACL | 1810 | | NFS_INO_INVALID_ACL |
1788 | | NFS_INO_REVAL_FORCED); | 1811 | | NFS_INO_REVAL_FORCED); |
1812 | cache_revalidated = false; | ||
1813 | } | ||
1789 | 1814 | ||
1790 | if (fattr->valid & NFS_ATTR_FATTR_OWNER) { | 1815 | if (fattr->valid & NFS_ATTR_FATTR_OWNER) { |
1791 | if (!uid_eq(inode->i_uid, fattr->uid)) { | 1816 | if (!uid_eq(inode->i_uid, fattr->uid)) { |
1792 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1817 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
1793 | inode->i_uid = fattr->uid; | 1818 | inode->i_uid = fattr->uid; |
1794 | } | 1819 | } |
1795 | } else if (server->caps & NFS_CAP_OWNER) | 1820 | } else if (server->caps & NFS_CAP_OWNER) { |
1796 | nfsi->cache_validity |= save_cache_validity & | 1821 | nfsi->cache_validity |= save_cache_validity & |
1797 | (NFS_INO_INVALID_ATTR | 1822 | (NFS_INO_INVALID_ATTR |
1798 | | NFS_INO_INVALID_ACCESS | 1823 | | NFS_INO_INVALID_ACCESS |
1799 | | NFS_INO_INVALID_ACL | 1824 | | NFS_INO_INVALID_ACL |
1800 | | NFS_INO_REVAL_FORCED); | 1825 | | NFS_INO_REVAL_FORCED); |
1826 | cache_revalidated = false; | ||
1827 | } | ||
1801 | 1828 | ||
1802 | if (fattr->valid & NFS_ATTR_FATTR_GROUP) { | 1829 | if (fattr->valid & NFS_ATTR_FATTR_GROUP) { |
1803 | if (!gid_eq(inode->i_gid, fattr->gid)) { | 1830 | if (!gid_eq(inode->i_gid, fattr->gid)) { |
1804 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1831 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
1805 | inode->i_gid = fattr->gid; | 1832 | inode->i_gid = fattr->gid; |
1806 | } | 1833 | } |
1807 | } else if (server->caps & NFS_CAP_OWNER_GROUP) | 1834 | } else if (server->caps & NFS_CAP_OWNER_GROUP) { |
1808 | nfsi->cache_validity |= save_cache_validity & | 1835 | nfsi->cache_validity |= save_cache_validity & |
1809 | (NFS_INO_INVALID_ATTR | 1836 | (NFS_INO_INVALID_ATTR |
1810 | | NFS_INO_INVALID_ACCESS | 1837 | | NFS_INO_INVALID_ACCESS |
1811 | | NFS_INO_INVALID_ACL | 1838 | | NFS_INO_INVALID_ACL |
1812 | | NFS_INO_REVAL_FORCED); | 1839 | | NFS_INO_REVAL_FORCED); |
1840 | cache_revalidated = false; | ||
1841 | } | ||
1813 | 1842 | ||
1814 | if (fattr->valid & NFS_ATTR_FATTR_NLINK) { | 1843 | if (fattr->valid & NFS_ATTR_FATTR_NLINK) { |
1815 | if (inode->i_nlink != fattr->nlink) { | 1844 | if (inode->i_nlink != fattr->nlink) { |
@@ -1818,19 +1847,22 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1818 | invalid |= NFS_INO_INVALID_DATA; | 1847 | invalid |= NFS_INO_INVALID_DATA; |
1819 | set_nlink(inode, fattr->nlink); | 1848 | set_nlink(inode, fattr->nlink); |
1820 | } | 1849 | } |
1821 | } else if (server->caps & NFS_CAP_NLINK) | 1850 | } else if (server->caps & NFS_CAP_NLINK) { |
1822 | nfsi->cache_validity |= save_cache_validity & | 1851 | nfsi->cache_validity |= save_cache_validity & |
1823 | (NFS_INO_INVALID_ATTR | 1852 | (NFS_INO_INVALID_ATTR |
1824 | | NFS_INO_REVAL_FORCED); | 1853 | | NFS_INO_REVAL_FORCED); |
1854 | cache_revalidated = false; | ||
1855 | } | ||
1825 | 1856 | ||
1826 | if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { | 1857 | if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { |
1827 | /* | 1858 | /* |
1828 | * report the blocks in 512byte units | 1859 | * report the blocks in 512byte units |
1829 | */ | 1860 | */ |
1830 | inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); | 1861 | inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); |
1831 | } | 1862 | } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) |
1832 | if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) | ||
1833 | inode->i_blocks = fattr->du.nfs2.blocks; | 1863 | inode->i_blocks = fattr->du.nfs2.blocks; |
1864 | else | ||
1865 | cache_revalidated = false; | ||
1834 | 1866 | ||
1835 | /* Update attrtimeo value if we're out of the unstable period */ | 1867 | /* Update attrtimeo value if we're out of the unstable period */ |
1836 | if (invalid & NFS_INO_INVALID_ATTR) { | 1868 | if (invalid & NFS_INO_INVALID_ATTR) { |
@@ -1840,9 +1872,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1840 | /* Set barrier to be more recent than all outstanding updates */ | 1872 | /* Set barrier to be more recent than all outstanding updates */ |
1841 | nfsi->attr_gencount = nfs_inc_attr_generation_counter(); | 1873 | nfsi->attr_gencount = nfs_inc_attr_generation_counter(); |
1842 | } else { | 1874 | } else { |
1843 | if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { | 1875 | if (cache_revalidated) { |
1844 | if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) | 1876 | if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, |
1845 | nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); | 1877 | nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { |
1878 | nfsi->attrtimeo <<= 1; | ||
1879 | if (nfsi->attrtimeo > NFS_MAXATTRTIMEO(inode)) | ||
1880 | nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); | ||
1881 | } | ||
1846 | nfsi->attrtimeo_timestamp = now; | 1882 | nfsi->attrtimeo_timestamp = now; |
1847 | } | 1883 | } |
1848 | /* Set the barrier to be more recent than this fattr */ | 1884 | /* Set the barrier to be more recent than this fattr */ |
@@ -1851,7 +1887,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1851 | } | 1887 | } |
1852 | 1888 | ||
1853 | /* Don't declare attrcache up to date if there were no attrs! */ | 1889 | /* Don't declare attrcache up to date if there were no attrs! */ |
1854 | if (fattr->valid != 0) | 1890 | if (cache_revalidated) |
1855 | invalid &= ~NFS_INO_INVALID_ATTR; | 1891 | invalid &= ~NFS_INO_INVALID_ATTR; |
1856 | 1892 | ||
1857 | /* Don't invalidate the data if we were to blame */ | 1893 | /* Don't invalidate the data if we were to blame */ |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9dea85f7f918..4e8cc942336c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -238,7 +238,7 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, | |||
238 | struct nfs_pgio_header *hdr, | 238 | struct nfs_pgio_header *hdr, |
239 | void (*release)(struct nfs_pgio_header *hdr)); | 239 | void (*release)(struct nfs_pgio_header *hdr)); |
240 | void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); | 240 | void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); |
241 | int nfs_iocounter_wait(struct nfs_io_counter *c); | 241 | int nfs_iocounter_wait(struct nfs_lock_context *l_ctx); |
242 | 242 | ||
243 | extern const struct nfs_pageio_ops nfs_pgio_rw_ops; | 243 | extern const struct nfs_pageio_ops nfs_pgio_rw_ops; |
244 | struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); | 244 | struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); |
@@ -252,18 +252,18 @@ void nfs_free_request(struct nfs_page *req); | |||
252 | struct nfs_pgio_mirror * | 252 | struct nfs_pgio_mirror * |
253 | nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); | 253 | nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); |
254 | 254 | ||
255 | static inline void nfs_iocounter_init(struct nfs_io_counter *c) | ||
256 | { | ||
257 | c->flags = 0; | ||
258 | atomic_set(&c->io_count, 0); | ||
259 | } | ||
260 | |||
261 | static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc) | 255 | static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc) |
262 | { | 256 | { |
263 | WARN_ON_ONCE(desc->pg_mirror_count < 1); | 257 | WARN_ON_ONCE(desc->pg_mirror_count < 1); |
264 | return desc->pg_mirror_count > 1; | 258 | return desc->pg_mirror_count > 1; |
265 | } | 259 | } |
266 | 260 | ||
261 | static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1, | ||
262 | const struct nfs_open_context *ctx2) | ||
263 | { | ||
264 | return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state; | ||
265 | } | ||
266 | |||
267 | /* nfs2xdr.c */ | 267 | /* nfs2xdr.c */ |
268 | extern struct rpc_procinfo nfs_procedures[]; | 268 | extern struct rpc_procinfo nfs_procedures[]; |
269 | extern int nfs2_decode_dirent(struct xdr_stream *, | 269 | extern int nfs2_decode_dirent(struct xdr_stream *, |
@@ -380,6 +380,7 @@ extern void nfs_clear_inode(struct inode *); | |||
380 | extern void nfs_evict_inode(struct inode *); | 380 | extern void nfs_evict_inode(struct inode *); |
381 | void nfs_zap_acl_cache(struct inode *inode); | 381 | void nfs_zap_acl_cache(struct inode *inode); |
382 | extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); | 382 | extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); |
383 | extern int nfs_wait_atomic_killable(atomic_t *p); | ||
383 | 384 | ||
384 | /* super.c */ | 385 | /* super.c */ |
385 | extern const struct super_operations nfs_sops; | 386 | extern const struct super_operations nfs_sops; |
@@ -519,7 +520,6 @@ static inline void nfs_inode_dio_wait(struct inode *inode) | |||
519 | inode_dio_wait(inode); | 520 | inode_dio_wait(inode); |
520 | } | 521 | } |
521 | extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); | 522 | extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); |
522 | extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq); | ||
523 | 523 | ||
524 | /* nfs4proc.c */ | 524 | /* nfs4proc.c */ |
525 | extern void __nfs4_read_done_cb(struct nfs_pgio_header *); | 525 | extern void __nfs4_read_done_cb(struct nfs_pgio_header *); |
@@ -696,9 +696,32 @@ static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) | |||
696 | { | 696 | { |
697 | return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); | 697 | return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); |
698 | } | 698 | } |
699 | static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid) | ||
700 | { | ||
701 | return ~crc32_le(0xFFFFFFFF, &stateid->other[0], | ||
702 | NFS4_STATEID_OTHER_SIZE); | ||
703 | } | ||
699 | #else | 704 | #else |
700 | static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) | 705 | static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) |
701 | { | 706 | { |
702 | return 0; | 707 | return 0; |
703 | } | 708 | } |
709 | static inline u32 nfs_stateid_hash(nfs4_stateid *stateid) | ||
710 | { | ||
711 | return 0; | ||
712 | } | ||
704 | #endif | 713 | #endif |
714 | |||
715 | static inline bool nfs_error_is_fatal(int err) | ||
716 | { | ||
717 | switch (err) { | ||
718 | case -ERESTARTSYS: | ||
719 | case -EIO: | ||
720 | case -ENOSPC: | ||
721 | case -EROFS: | ||
722 | case -E2BIG: | ||
723 | return true; | ||
724 | default: | ||
725 | return false; | ||
726 | } | ||
727 | } | ||
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 6b1ce9825430..6e8174930a48 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c | |||
@@ -204,6 +204,8 @@ static void | |||
204 | nfs42_layoutstat_done(struct rpc_task *task, void *calldata) | 204 | nfs42_layoutstat_done(struct rpc_task *task, void *calldata) |
205 | { | 205 | { |
206 | struct nfs42_layoutstat_data *data = calldata; | 206 | struct nfs42_layoutstat_data *data = calldata; |
207 | struct inode *inode = data->inode; | ||
208 | struct pnfs_layout_hdr *lo; | ||
207 | 209 | ||
208 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | 210 | if (!nfs4_sequence_done(task, &data->res.seq_res)) |
209 | return; | 211 | return; |
@@ -211,12 +213,35 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) | |||
211 | switch (task->tk_status) { | 213 | switch (task->tk_status) { |
212 | case 0: | 214 | case 0: |
213 | break; | 215 | break; |
216 | case -NFS4ERR_EXPIRED: | ||
217 | case -NFS4ERR_STALE_STATEID: | ||
218 | case -NFS4ERR_OLD_STATEID: | ||
219 | case -NFS4ERR_BAD_STATEID: | ||
220 | spin_lock(&inode->i_lock); | ||
221 | lo = NFS_I(inode)->layout; | ||
222 | if (lo && nfs4_stateid_match(&data->args.stateid, | ||
223 | &lo->plh_stateid)) { | ||
224 | LIST_HEAD(head); | ||
225 | |||
226 | /* | ||
227 | * Mark the bad layout state as invalid, then retry | ||
228 | * with the current stateid. | ||
229 | */ | ||
230 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | ||
231 | pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); | ||
232 | spin_unlock(&inode->i_lock); | ||
233 | pnfs_free_lseg_list(&head); | ||
234 | } else | ||
235 | spin_unlock(&inode->i_lock); | ||
236 | break; | ||
214 | case -ENOTSUPP: | 237 | case -ENOTSUPP: |
215 | case -EOPNOTSUPP: | 238 | case -EOPNOTSUPP: |
216 | NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS; | 239 | NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS; |
217 | default: | 240 | default: |
218 | dprintk("%s server returns %d\n", __func__, task->tk_status); | 241 | break; |
219 | } | 242 | } |
243 | |||
244 | dprintk("%s server returns %d\n", __func__, task->tk_status); | ||
220 | } | 245 | } |
221 | 246 | ||
222 | static void | 247 | static void |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c57d1332c1c8..4bfc33ad0563 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -208,6 +208,9 @@ static const u32 nfs4_pnfs_open_bitmap[3] = { | |||
208 | | FATTR4_WORD1_TIME_METADATA | 208 | | FATTR4_WORD1_TIME_METADATA |
209 | | FATTR4_WORD1_TIME_MODIFY, | 209 | | FATTR4_WORD1_TIME_MODIFY, |
210 | FATTR4_WORD2_MDSTHRESHOLD | 210 | FATTR4_WORD2_MDSTHRESHOLD |
211 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
212 | | FATTR4_WORD2_SECURITY_LABEL | ||
213 | #endif | ||
211 | }; | 214 | }; |
212 | 215 | ||
213 | static const u32 nfs4_open_noattr_bitmap[3] = { | 216 | static const u32 nfs4_open_noattr_bitmap[3] = { |
@@ -1385,6 +1388,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s | |||
1385 | * Protect the call to nfs4_state_set_mode_locked and | 1388 | * Protect the call to nfs4_state_set_mode_locked and |
1386 | * serialise the stateid update | 1389 | * serialise the stateid update |
1387 | */ | 1390 | */ |
1391 | spin_lock(&state->owner->so_lock); | ||
1388 | write_seqlock(&state->seqlock); | 1392 | write_seqlock(&state->seqlock); |
1389 | if (deleg_stateid != NULL) { | 1393 | if (deleg_stateid != NULL) { |
1390 | nfs4_stateid_copy(&state->stateid, deleg_stateid); | 1394 | nfs4_stateid_copy(&state->stateid, deleg_stateid); |
@@ -1393,7 +1397,6 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s | |||
1393 | if (open_stateid != NULL) | 1397 | if (open_stateid != NULL) |
1394 | nfs_set_open_stateid_locked(state, open_stateid, fmode); | 1398 | nfs_set_open_stateid_locked(state, open_stateid, fmode); |
1395 | write_sequnlock(&state->seqlock); | 1399 | write_sequnlock(&state->seqlock); |
1396 | spin_lock(&state->owner->so_lock); | ||
1397 | update_open_stateflags(state, fmode); | 1400 | update_open_stateflags(state, fmode); |
1398 | spin_unlock(&state->owner->so_lock); | 1401 | spin_unlock(&state->owner->so_lock); |
1399 | } | 1402 | } |
@@ -1598,6 +1601,7 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) | |||
1598 | 1601 | ||
1599 | if (!data->rpc_done) { | 1602 | if (!data->rpc_done) { |
1600 | state = nfs4_try_open_cached(data); | 1603 | state = nfs4_try_open_cached(data); |
1604 | trace_nfs4_cached_open(data->state); | ||
1601 | goto out; | 1605 | goto out; |
1602 | } | 1606 | } |
1603 | 1607 | ||
@@ -2015,6 +2019,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) | |||
2015 | } | 2019 | } |
2016 | return; | 2020 | return; |
2017 | unlock_no_action: | 2021 | unlock_no_action: |
2022 | trace_nfs4_cached_open(data->state); | ||
2018 | rcu_read_unlock(); | 2023 | rcu_read_unlock(); |
2019 | out_no_action: | 2024 | out_no_action: |
2020 | task->tk_action = NULL; | 2025 | task->tk_action = NULL; |
@@ -2703,6 +2708,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
2703 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); | 2708 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
2704 | if (status == 0 && state != NULL) | 2709 | if (status == 0 && state != NULL) |
2705 | renew_lease(server, timestamp); | 2710 | renew_lease(server, timestamp); |
2711 | trace_nfs4_setattr(inode, &arg.stateid, status); | ||
2706 | return status; | 2712 | return status; |
2707 | } | 2713 | } |
2708 | 2714 | ||
@@ -2719,7 +2725,6 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
2719 | int err; | 2725 | int err; |
2720 | do { | 2726 | do { |
2721 | err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel); | 2727 | err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel); |
2722 | trace_nfs4_setattr(inode, err); | ||
2723 | switch (err) { | 2728 | switch (err) { |
2724 | case -NFS4ERR_OPENMODE: | 2729 | case -NFS4ERR_OPENMODE: |
2725 | if (!(sattr->ia_valid & ATTR_SIZE)) { | 2730 | if (!(sattr->ia_valid & ATTR_SIZE)) { |
@@ -5048,7 +5053,6 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, | |||
5048 | static int | 5053 | static int |
5049 | nfs4_init_nonuniform_client_string(struct nfs_client *clp) | 5054 | nfs4_init_nonuniform_client_string(struct nfs_client *clp) |
5050 | { | 5055 | { |
5051 | int result; | ||
5052 | size_t len; | 5056 | size_t len; |
5053 | char *str; | 5057 | char *str; |
5054 | 5058 | ||
@@ -5076,7 +5080,7 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) | |||
5076 | return -ENOMEM; | 5080 | return -ENOMEM; |
5077 | 5081 | ||
5078 | rcu_read_lock(); | 5082 | rcu_read_lock(); |
5079 | result = scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", | 5083 | scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", |
5080 | clp->cl_ipaddr, | 5084 | clp->cl_ipaddr, |
5081 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), | 5085 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), |
5082 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)); | 5086 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)); |
@@ -5089,7 +5093,6 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) | |||
5089 | static int | 5093 | static int |
5090 | nfs4_init_uniquifier_client_string(struct nfs_client *clp) | 5094 | nfs4_init_uniquifier_client_string(struct nfs_client *clp) |
5091 | { | 5095 | { |
5092 | int result; | ||
5093 | size_t len; | 5096 | size_t len; |
5094 | char *str; | 5097 | char *str; |
5095 | 5098 | ||
@@ -5109,7 +5112,7 @@ nfs4_init_uniquifier_client_string(struct nfs_client *clp) | |||
5109 | if (!str) | 5112 | if (!str) |
5110 | return -ENOMEM; | 5113 | return -ENOMEM; |
5111 | 5114 | ||
5112 | result = scnprintf(str, len, "Linux NFSv%u.%u %s/%s", | 5115 | scnprintf(str, len, "Linux NFSv%u.%u %s/%s", |
5113 | clp->rpc_ops->version, clp->cl_minorversion, | 5116 | clp->rpc_ops->version, clp->cl_minorversion, |
5114 | nfs4_client_id_uniquifier, | 5117 | nfs4_client_id_uniquifier, |
5115 | clp->cl_rpcclient->cl_nodename); | 5118 | clp->cl_rpcclient->cl_nodename); |
@@ -5120,7 +5123,6 @@ nfs4_init_uniquifier_client_string(struct nfs_client *clp) | |||
5120 | static int | 5123 | static int |
5121 | nfs4_init_uniform_client_string(struct nfs_client *clp) | 5124 | nfs4_init_uniform_client_string(struct nfs_client *clp) |
5122 | { | 5125 | { |
5123 | int result; | ||
5124 | size_t len; | 5126 | size_t len; |
5125 | char *str; | 5127 | char *str; |
5126 | 5128 | ||
@@ -5145,7 +5147,7 @@ nfs4_init_uniform_client_string(struct nfs_client *clp) | |||
5145 | if (!str) | 5147 | if (!str) |
5146 | return -ENOMEM; | 5148 | return -ENOMEM; |
5147 | 5149 | ||
5148 | result = scnprintf(str, len, "Linux NFSv%u.%u %s", | 5150 | scnprintf(str, len, "Linux NFSv%u.%u %s", |
5149 | clp->rpc_ops->version, clp->cl_minorversion, | 5151 | clp->rpc_ops->version, clp->cl_minorversion, |
5150 | clp->cl_rpcclient->cl_nodename); | 5152 | clp->cl_rpcclient->cl_nodename); |
5151 | clp->cl_owner_id = str; | 5153 | clp->cl_owner_id = str; |
@@ -5384,6 +5386,11 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co | |||
5384 | if (data == NULL) | 5386 | if (data == NULL) |
5385 | return -ENOMEM; | 5387 | return -ENOMEM; |
5386 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); | 5388 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); |
5389 | |||
5390 | nfs4_state_protect(server->nfs_client, | ||
5391 | NFS_SP4_MACH_CRED_CLEANUP, | ||
5392 | &task_setup_data.rpc_client, &msg); | ||
5393 | |||
5387 | data->args.fhandle = &data->fh; | 5394 | data->args.fhandle = &data->fh; |
5388 | data->args.stateid = &data->stateid; | 5395 | data->args.stateid = &data->stateid; |
5389 | data->args.bitmask = server->cache_consistency_bitmask; | 5396 | data->args.bitmask = server->cache_consistency_bitmask; |
@@ -5426,7 +5433,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 | |||
5426 | int err; | 5433 | int err; |
5427 | do { | 5434 | do { |
5428 | err = _nfs4_proc_delegreturn(inode, cred, stateid, issync); | 5435 | err = _nfs4_proc_delegreturn(inode, cred, stateid, issync); |
5429 | trace_nfs4_delegreturn(inode, err); | 5436 | trace_nfs4_delegreturn(inode, stateid, err); |
5430 | switch (err) { | 5437 | switch (err) { |
5431 | case -NFS4ERR_STALE_STATEID: | 5438 | case -NFS4ERR_STALE_STATEID: |
5432 | case -NFS4ERR_EXPIRED: | 5439 | case -NFS4ERR_EXPIRED: |
@@ -5936,6 +5943,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f | |||
5936 | data->cancelled = 1; | 5943 | data->cancelled = 1; |
5937 | rpc_put_task(task); | 5944 | rpc_put_task(task); |
5938 | dprintk("%s: done, ret = %d!\n", __func__, ret); | 5945 | dprintk("%s: done, ret = %d!\n", __func__, ret); |
5946 | trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); | ||
5939 | return ret; | 5947 | return ret; |
5940 | } | 5948 | } |
5941 | 5949 | ||
@@ -5952,7 +5960,6 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request | |||
5952 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) | 5960 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) |
5953 | return 0; | 5961 | return 0; |
5954 | err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); | 5962 | err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); |
5955 | trace_nfs4_lock_reclaim(request, state, F_SETLK, err); | ||
5956 | if (err != -NFS4ERR_DELAY) | 5963 | if (err != -NFS4ERR_DELAY) |
5957 | break; | 5964 | break; |
5958 | nfs4_handle_exception(server, err, &exception); | 5965 | nfs4_handle_exception(server, err, &exception); |
@@ -5979,7 +5986,6 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request | |||
5979 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) | 5986 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) |
5980 | return 0; | 5987 | return 0; |
5981 | err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED); | 5988 | err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED); |
5982 | trace_nfs4_lock_expired(request, state, F_SETLK, err); | ||
5983 | switch (err) { | 5989 | switch (err) { |
5984 | default: | 5990 | default: |
5985 | goto out; | 5991 | goto out; |
@@ -6087,7 +6093,6 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock * | |||
6087 | 6093 | ||
6088 | do { | 6094 | do { |
6089 | err = _nfs4_proc_setlk(state, cmd, request); | 6095 | err = _nfs4_proc_setlk(state, cmd, request); |
6090 | trace_nfs4_set_lock(request, state, cmd, err); | ||
6091 | if (err == -NFS4ERR_DENIED) | 6096 | if (err == -NFS4ERR_DENIED) |
6092 | err = -EAGAIN; | 6097 | err = -EAGAIN; |
6093 | err = nfs4_handle_exception(NFS_SERVER(state->inode), | 6098 | err = nfs4_handle_exception(NFS_SERVER(state->inode), |
@@ -6847,10 +6852,13 @@ static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = { | |||
6847 | }, | 6852 | }, |
6848 | .allow.u.words = { | 6853 | .allow.u.words = { |
6849 | [0] = 1 << (OP_CLOSE) | | 6854 | [0] = 1 << (OP_CLOSE) | |
6855 | 1 << (OP_OPEN_DOWNGRADE) | | ||
6850 | 1 << (OP_LOCKU) | | 6856 | 1 << (OP_LOCKU) | |
6857 | 1 << (OP_DELEGRETURN) | | ||
6851 | 1 << (OP_COMMIT), | 6858 | 1 << (OP_COMMIT), |
6852 | [1] = 1 << (OP_SECINFO - 32) | | 6859 | [1] = 1 << (OP_SECINFO - 32) | |
6853 | 1 << (OP_SECINFO_NO_NAME - 32) | | 6860 | 1 << (OP_SECINFO_NO_NAME - 32) | |
6861 | 1 << (OP_LAYOUTRETURN - 32) | | ||
6854 | 1 << (OP_TEST_STATEID - 32) | | 6862 | 1 << (OP_TEST_STATEID - 32) | |
6855 | 1 << (OP_FREE_STATEID - 32) | | 6863 | 1 << (OP_FREE_STATEID - 32) | |
6856 | 1 << (OP_WRITE - 32) | 6864 | 1 << (OP_WRITE - 32) |
@@ -6915,11 +6923,19 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, | |||
6915 | } | 6923 | } |
6916 | 6924 | ||
6917 | if (test_bit(OP_CLOSE, sp->allow.u.longs) && | 6925 | if (test_bit(OP_CLOSE, sp->allow.u.longs) && |
6926 | test_bit(OP_OPEN_DOWNGRADE, sp->allow.u.longs) && | ||
6927 | test_bit(OP_DELEGRETURN, sp->allow.u.longs) && | ||
6918 | test_bit(OP_LOCKU, sp->allow.u.longs)) { | 6928 | test_bit(OP_LOCKU, sp->allow.u.longs)) { |
6919 | dfprintk(MOUNT, " cleanup mode enabled\n"); | 6929 | dfprintk(MOUNT, " cleanup mode enabled\n"); |
6920 | set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags); | 6930 | set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags); |
6921 | } | 6931 | } |
6922 | 6932 | ||
6933 | if (test_bit(OP_LAYOUTRETURN, sp->allow.u.longs)) { | ||
6934 | dfprintk(MOUNT, " pnfs cleanup mode enabled\n"); | ||
6935 | set_bit(NFS_SP4_MACH_CRED_PNFS_CLEANUP, | ||
6936 | &clp->cl_sp4_flags); | ||
6937 | } | ||
6938 | |||
6923 | if (test_bit(OP_SECINFO, sp->allow.u.longs) && | 6939 | if (test_bit(OP_SECINFO, sp->allow.u.longs) && |
6924 | test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) { | 6940 | test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) { |
6925 | dfprintk(MOUNT, " secinfo mode enabled\n"); | 6941 | dfprintk(MOUNT, " secinfo mode enabled\n"); |
@@ -7748,6 +7764,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) | |||
7748 | struct nfs4_layoutget *lgp = calldata; | 7764 | struct nfs4_layoutget *lgp = calldata; |
7749 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | 7765 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); |
7750 | struct nfs4_session *session = nfs4_get_session(server); | 7766 | struct nfs4_session *session = nfs4_get_session(server); |
7767 | int ret; | ||
7751 | 7768 | ||
7752 | dprintk("--> %s\n", __func__); | 7769 | dprintk("--> %s\n", __func__); |
7753 | /* Note the is a race here, where a CB_LAYOUTRECALL can come in | 7770 | /* Note the is a race here, where a CB_LAYOUTRECALL can come in |
@@ -7758,12 +7775,12 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) | |||
7758 | if (nfs41_setup_sequence(session, &lgp->args.seq_args, | 7775 | if (nfs41_setup_sequence(session, &lgp->args.seq_args, |
7759 | &lgp->res.seq_res, task)) | 7776 | &lgp->res.seq_res, task)) |
7760 | return; | 7777 | return; |
7761 | if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, | 7778 | ret = pnfs_choose_layoutget_stateid(&lgp->args.stateid, |
7762 | NFS_I(lgp->args.inode)->layout, | 7779 | NFS_I(lgp->args.inode)->layout, |
7763 | &lgp->args.range, | 7780 | &lgp->args.range, |
7764 | lgp->args.ctx->state)) { | 7781 | lgp->args.ctx->state); |
7765 | rpc_exit(task, NFS4_OK); | 7782 | if (ret < 0) |
7766 | } | 7783 | rpc_exit(task, ret); |
7767 | } | 7784 | } |
7768 | 7785 | ||
7769 | static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | 7786 | static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) |
@@ -7783,6 +7800,15 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | |||
7783 | switch (task->tk_status) { | 7800 | switch (task->tk_status) { |
7784 | case 0: | 7801 | case 0: |
7785 | goto out; | 7802 | goto out; |
7803 | |||
7804 | /* | ||
7805 | * NFS4ERR_LAYOUTUNAVAILABLE means we are not supposed to use pnfs | ||
7806 | * on the file. set tk_status to -ENODATA to tell upper layer to | ||
7807 | * retry go inband. | ||
7808 | */ | ||
7809 | case -NFS4ERR_LAYOUTUNAVAILABLE: | ||
7810 | task->tk_status = -ENODATA; | ||
7811 | goto out; | ||
7786 | /* | 7812 | /* |
7787 | * NFS4ERR_BADLAYOUT means the MDS cannot return a layout of | 7813 | * NFS4ERR_BADLAYOUT means the MDS cannot return a layout of |
7788 | * length lgp->args.minlength != 0 (see RFC5661 section 18.43.3). | 7814 | * length lgp->args.minlength != 0 (see RFC5661 section 18.43.3). |
@@ -7979,6 +8005,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) | |||
7979 | trace_nfs4_layoutget(lgp->args.ctx, | 8005 | trace_nfs4_layoutget(lgp->args.ctx, |
7980 | &lgp->args.range, | 8006 | &lgp->args.range, |
7981 | &lgp->res.range, | 8007 | &lgp->res.range, |
8008 | &lgp->res.stateid, | ||
7982 | status); | 8009 | status); |
7983 | /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ | 8010 | /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ |
7984 | if (status == 0 && lgp->res.layoutp->len) | 8011 | if (status == 0 && lgp->res.layoutp->len) |
@@ -8035,11 +8062,11 @@ static void nfs4_layoutreturn_release(void *calldata) | |||
8035 | 8062 | ||
8036 | dprintk("--> %s\n", __func__); | 8063 | dprintk("--> %s\n", __func__); |
8037 | spin_lock(&lo->plh_inode->i_lock); | 8064 | spin_lock(&lo->plh_inode->i_lock); |
8065 | pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); | ||
8066 | pnfs_mark_layout_returned_if_empty(lo); | ||
8038 | if (lrp->res.lrs_present) | 8067 | if (lrp->res.lrs_present) |
8039 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | 8068 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); |
8040 | pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); | ||
8041 | pnfs_clear_layoutreturn_waitbit(lo); | 8069 | pnfs_clear_layoutreturn_waitbit(lo); |
8042 | lo->plh_block_lgets--; | ||
8043 | spin_unlock(&lo->plh_inode->i_lock); | 8070 | spin_unlock(&lo->plh_inode->i_lock); |
8044 | pnfs_free_lseg_list(&freeme); | 8071 | pnfs_free_lseg_list(&freeme); |
8045 | pnfs_put_layout_hdr(lrp->args.layout); | 8072 | pnfs_put_layout_hdr(lrp->args.layout); |
@@ -8071,6 +8098,10 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) | |||
8071 | }; | 8098 | }; |
8072 | int status = 0; | 8099 | int status = 0; |
8073 | 8100 | ||
8101 | nfs4_state_protect(NFS_SERVER(lrp->args.inode)->nfs_client, | ||
8102 | NFS_SP4_MACH_CRED_PNFS_CLEANUP, | ||
8103 | &task_setup_data.rpc_client, &msg); | ||
8104 | |||
8074 | dprintk("--> %s\n", __func__); | 8105 | dprintk("--> %s\n", __func__); |
8075 | if (!sync) { | 8106 | if (!sync) { |
8076 | lrp->inode = nfs_igrab_and_active(lrp->args.inode); | 8107 | lrp->inode = nfs_igrab_and_active(lrp->args.inode); |
@@ -8086,7 +8117,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) | |||
8086 | return PTR_ERR(task); | 8117 | return PTR_ERR(task); |
8087 | if (sync) | 8118 | if (sync) |
8088 | status = task->tk_status; | 8119 | status = task->tk_status; |
8089 | trace_nfs4_layoutreturn(lrp->args.inode, status); | 8120 | trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status); |
8090 | dprintk("<-- %s status=%d\n", __func__, status); | 8121 | dprintk("<-- %s status=%d\n", __func__, status); |
8091 | rpc_put_task(task); | 8122 | rpc_put_task(task); |
8092 | return status; | 8123 | return status; |
@@ -8234,7 +8265,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) | |||
8234 | return PTR_ERR(task); | 8265 | return PTR_ERR(task); |
8235 | if (sync) | 8266 | if (sync) |
8236 | status = task->tk_status; | 8267 | status = task->tk_status; |
8237 | trace_nfs4_layoutcommit(data->args.inode, status); | 8268 | trace_nfs4_layoutcommit(data->args.inode, &data->args.stateid, status); |
8238 | dprintk("%s: status %d\n", __func__, status); | 8269 | dprintk("%s: status %d\n", __func__, status); |
8239 | rpc_put_task(task); | 8270 | rpc_put_task(task); |
8240 | return status; | 8271 | return status; |
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index 0fbd3ab1be22..8693d77c45ea 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include "nfs4idmap.h" | 12 | #include "nfs4idmap.h" |
13 | #include "callback.h" | 13 | #include "callback.h" |
14 | 14 | ||
15 | static const int nfs_set_port_min = 0; | 15 | static const int nfs_set_port_min; |
16 | static const int nfs_set_port_max = 65535; | 16 | static const int nfs_set_port_max = 65535; |
17 | static struct ctl_table_header *nfs4_callback_sysctl_table; | 17 | static struct ctl_table_header *nfs4_callback_sysctl_table; |
18 | 18 | ||
diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index d774335cc8bc..2850bce19244 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include "internal.h" | 6 | #include "internal.h" |
7 | #include "nfs4session.h" | 7 | #include "nfs4session.h" |
8 | #include "callback.h" | 8 | #include "callback.h" |
9 | #include "pnfs.h" | ||
9 | 10 | ||
10 | #define CREATE_TRACE_POINTS | 11 | #define CREATE_TRACE_POINTS |
11 | #include "nfs4trace.h" | 12 | #include "nfs4trace.h" |
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 671cf68fe56b..2c8d05dae5b1 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h | |||
@@ -321,6 +321,7 @@ TRACE_EVENT(nfs4_sequence_done, | |||
321 | __entry->highest_slotid = res->sr_highest_slotid; | 321 | __entry->highest_slotid = res->sr_highest_slotid; |
322 | __entry->target_highest_slotid = | 322 | __entry->target_highest_slotid = |
323 | res->sr_target_highest_slotid; | 323 | res->sr_target_highest_slotid; |
324 | __entry->status_flags = res->sr_status_flags; | ||
324 | __entry->error = res->sr_status; | 325 | __entry->error = res->sr_status; |
325 | ), | 326 | ), |
326 | TP_printk( | 327 | TP_printk( |
@@ -399,6 +400,10 @@ DECLARE_EVENT_CLASS(nfs4_open_event, | |||
399 | __field(u64, fileid) | 400 | __field(u64, fileid) |
400 | __field(u64, dir) | 401 | __field(u64, dir) |
401 | __string(name, ctx->dentry->d_name.name) | 402 | __string(name, ctx->dentry->d_name.name) |
403 | __field(int, stateid_seq) | ||
404 | __field(u32, stateid_hash) | ||
405 | __field(int, openstateid_seq) | ||
406 | __field(u32, openstateid_hash) | ||
402 | ), | 407 | ), |
403 | 408 | ||
404 | TP_fast_assign( | 409 | TP_fast_assign( |
@@ -409,8 +414,22 @@ DECLARE_EVENT_CLASS(nfs4_open_event, | |||
409 | __entry->flags = flags; | 414 | __entry->flags = flags; |
410 | __entry->fmode = (__force unsigned int)ctx->mode; | 415 | __entry->fmode = (__force unsigned int)ctx->mode; |
411 | __entry->dev = ctx->dentry->d_sb->s_dev; | 416 | __entry->dev = ctx->dentry->d_sb->s_dev; |
412 | if (!IS_ERR_OR_NULL(state)) | 417 | if (!IS_ERR_OR_NULL(state)) { |
413 | inode = state->inode; | 418 | inode = state->inode; |
419 | __entry->stateid_seq = | ||
420 | be32_to_cpu(state->stateid.seqid); | ||
421 | __entry->stateid_hash = | ||
422 | nfs_stateid_hash(&state->stateid); | ||
423 | __entry->openstateid_seq = | ||
424 | be32_to_cpu(state->open_stateid.seqid); | ||
425 | __entry->openstateid_hash = | ||
426 | nfs_stateid_hash(&state->open_stateid); | ||
427 | } else { | ||
428 | __entry->stateid_seq = 0; | ||
429 | __entry->stateid_hash = 0; | ||
430 | __entry->openstateid_seq = 0; | ||
431 | __entry->openstateid_hash = 0; | ||
432 | } | ||
414 | if (inode != NULL) { | 433 | if (inode != NULL) { |
415 | __entry->fileid = NFS_FILEID(inode); | 434 | __entry->fileid = NFS_FILEID(inode); |
416 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); | 435 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); |
@@ -425,7 +444,8 @@ DECLARE_EVENT_CLASS(nfs4_open_event, | |||
425 | TP_printk( | 444 | TP_printk( |
426 | "error=%d (%s) flags=%d (%s) fmode=%s " | 445 | "error=%d (%s) flags=%d (%s) fmode=%s " |
427 | "fileid=%02x:%02x:%llu fhandle=0x%08x " | 446 | "fileid=%02x:%02x:%llu fhandle=0x%08x " |
428 | "name=%02x:%02x:%llu/%s", | 447 | "name=%02x:%02x:%llu/%s stateid=%d:0x%08x " |
448 | "openstateid=%d:0x%08x", | ||
429 | __entry->error, | 449 | __entry->error, |
430 | show_nfsv4_errors(__entry->error), | 450 | show_nfsv4_errors(__entry->error), |
431 | __entry->flags, | 451 | __entry->flags, |
@@ -436,7 +456,9 @@ DECLARE_EVENT_CLASS(nfs4_open_event, | |||
436 | __entry->fhandle, | 456 | __entry->fhandle, |
437 | MAJOR(__entry->dev), MINOR(__entry->dev), | 457 | MAJOR(__entry->dev), MINOR(__entry->dev), |
438 | (unsigned long long)__entry->dir, | 458 | (unsigned long long)__entry->dir, |
439 | __get_str(name) | 459 | __get_str(name), |
460 | __entry->stateid_seq, __entry->stateid_hash, | ||
461 | __entry->openstateid_seq, __entry->openstateid_hash | ||
440 | ) | 462 | ) |
441 | ); | 463 | ); |
442 | 464 | ||
@@ -452,6 +474,45 @@ DEFINE_NFS4_OPEN_EVENT(nfs4_open_reclaim); | |||
452 | DEFINE_NFS4_OPEN_EVENT(nfs4_open_expired); | 474 | DEFINE_NFS4_OPEN_EVENT(nfs4_open_expired); |
453 | DEFINE_NFS4_OPEN_EVENT(nfs4_open_file); | 475 | DEFINE_NFS4_OPEN_EVENT(nfs4_open_file); |
454 | 476 | ||
477 | TRACE_EVENT(nfs4_cached_open, | ||
478 | TP_PROTO( | ||
479 | const struct nfs4_state *state | ||
480 | ), | ||
481 | TP_ARGS(state), | ||
482 | TP_STRUCT__entry( | ||
483 | __field(dev_t, dev) | ||
484 | __field(u32, fhandle) | ||
485 | __field(u64, fileid) | ||
486 | __field(unsigned int, fmode) | ||
487 | __field(int, stateid_seq) | ||
488 | __field(u32, stateid_hash) | ||
489 | ), | ||
490 | |||
491 | TP_fast_assign( | ||
492 | const struct inode *inode = state->inode; | ||
493 | |||
494 | __entry->dev = inode->i_sb->s_dev; | ||
495 | __entry->fileid = NFS_FILEID(inode); | ||
496 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); | ||
497 | __entry->fmode = (__force unsigned int)state->state; | ||
498 | __entry->stateid_seq = | ||
499 | be32_to_cpu(state->stateid.seqid); | ||
500 | __entry->stateid_hash = | ||
501 | nfs_stateid_hash(&state->stateid); | ||
502 | ), | ||
503 | |||
504 | TP_printk( | ||
505 | "fmode=%s fileid=%02x:%02x:%llu " | ||
506 | "fhandle=0x%08x stateid=%d:0x%08x", | ||
507 | __entry->fmode ? show_fmode_flags(__entry->fmode) : | ||
508 | "closed", | ||
509 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
510 | (unsigned long long)__entry->fileid, | ||
511 | __entry->fhandle, | ||
512 | __entry->stateid_seq, __entry->stateid_hash | ||
513 | ) | ||
514 | ); | ||
515 | |||
455 | TRACE_EVENT(nfs4_close, | 516 | TRACE_EVENT(nfs4_close, |
456 | TP_PROTO( | 517 | TP_PROTO( |
457 | const struct nfs4_state *state, | 518 | const struct nfs4_state *state, |
@@ -468,6 +529,8 @@ TRACE_EVENT(nfs4_close, | |||
468 | __field(u64, fileid) | 529 | __field(u64, fileid) |
469 | __field(unsigned int, fmode) | 530 | __field(unsigned int, fmode) |
470 | __field(int, error) | 531 | __field(int, error) |
532 | __field(int, stateid_seq) | ||
533 | __field(u32, stateid_hash) | ||
471 | ), | 534 | ), |
472 | 535 | ||
473 | TP_fast_assign( | 536 | TP_fast_assign( |
@@ -478,18 +541,23 @@ TRACE_EVENT(nfs4_close, | |||
478 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); | 541 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); |
479 | __entry->fmode = (__force unsigned int)state->state; | 542 | __entry->fmode = (__force unsigned int)state->state; |
480 | __entry->error = error; | 543 | __entry->error = error; |
544 | __entry->stateid_seq = | ||
545 | be32_to_cpu(args->stateid.seqid); | ||
546 | __entry->stateid_hash = | ||
547 | nfs_stateid_hash(&args->stateid); | ||
481 | ), | 548 | ), |
482 | 549 | ||
483 | TP_printk( | 550 | TP_printk( |
484 | "error=%d (%s) fmode=%s fileid=%02x:%02x:%llu " | 551 | "error=%d (%s) fmode=%s fileid=%02x:%02x:%llu " |
485 | "fhandle=0x%08x", | 552 | "fhandle=0x%08x openstateid=%d:0x%08x", |
486 | __entry->error, | 553 | __entry->error, |
487 | show_nfsv4_errors(__entry->error), | 554 | show_nfsv4_errors(__entry->error), |
488 | __entry->fmode ? show_fmode_flags(__entry->fmode) : | 555 | __entry->fmode ? show_fmode_flags(__entry->fmode) : |
489 | "closed", | 556 | "closed", |
490 | MAJOR(__entry->dev), MINOR(__entry->dev), | 557 | MAJOR(__entry->dev), MINOR(__entry->dev), |
491 | (unsigned long long)__entry->fileid, | 558 | (unsigned long long)__entry->fileid, |
492 | __entry->fhandle | 559 | __entry->fhandle, |
560 | __entry->stateid_seq, __entry->stateid_hash | ||
493 | ) | 561 | ) |
494 | ); | 562 | ); |
495 | 563 | ||
@@ -523,6 +591,8 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, | |||
523 | __field(dev_t, dev) | 591 | __field(dev_t, dev) |
524 | __field(u32, fhandle) | 592 | __field(u32, fhandle) |
525 | __field(u64, fileid) | 593 | __field(u64, fileid) |
594 | __field(int, stateid_seq) | ||
595 | __field(u32, stateid_hash) | ||
526 | ), | 596 | ), |
527 | 597 | ||
528 | TP_fast_assign( | 598 | TP_fast_assign( |
@@ -536,11 +606,16 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, | |||
536 | __entry->dev = inode->i_sb->s_dev; | 606 | __entry->dev = inode->i_sb->s_dev; |
537 | __entry->fileid = NFS_FILEID(inode); | 607 | __entry->fileid = NFS_FILEID(inode); |
538 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); | 608 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); |
609 | __entry->stateid_seq = | ||
610 | be32_to_cpu(state->stateid.seqid); | ||
611 | __entry->stateid_hash = | ||
612 | nfs_stateid_hash(&state->stateid); | ||
539 | ), | 613 | ), |
540 | 614 | ||
541 | TP_printk( | 615 | TP_printk( |
542 | "error=%d (%s) cmd=%s:%s range=%lld:%lld " | 616 | "error=%d (%s) cmd=%s:%s range=%lld:%lld " |
543 | "fileid=%02x:%02x:%llu fhandle=0x%08x", | 617 | "fileid=%02x:%02x:%llu fhandle=0x%08x " |
618 | "stateid=%d:0x%08x", | ||
544 | __entry->error, | 619 | __entry->error, |
545 | show_nfsv4_errors(__entry->error), | 620 | show_nfsv4_errors(__entry->error), |
546 | show_lock_cmd(__entry->cmd), | 621 | show_lock_cmd(__entry->cmd), |
@@ -549,7 +624,8 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, | |||
549 | (long long)__entry->end, | 624 | (long long)__entry->end, |
550 | MAJOR(__entry->dev), MINOR(__entry->dev), | 625 | MAJOR(__entry->dev), MINOR(__entry->dev), |
551 | (unsigned long long)__entry->fileid, | 626 | (unsigned long long)__entry->fileid, |
552 | __entry->fhandle | 627 | __entry->fhandle, |
628 | __entry->stateid_seq, __entry->stateid_hash | ||
553 | ) | 629 | ) |
554 | ); | 630 | ); |
555 | 631 | ||
@@ -563,11 +639,73 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, | |||
563 | ), \ | 639 | ), \ |
564 | TP_ARGS(request, state, cmd, error)) | 640 | TP_ARGS(request, state, cmd, error)) |
565 | DEFINE_NFS4_LOCK_EVENT(nfs4_get_lock); | 641 | DEFINE_NFS4_LOCK_EVENT(nfs4_get_lock); |
566 | DEFINE_NFS4_LOCK_EVENT(nfs4_set_lock); | ||
567 | DEFINE_NFS4_LOCK_EVENT(nfs4_lock_reclaim); | ||
568 | DEFINE_NFS4_LOCK_EVENT(nfs4_lock_expired); | ||
569 | DEFINE_NFS4_LOCK_EVENT(nfs4_unlock); | 642 | DEFINE_NFS4_LOCK_EVENT(nfs4_unlock); |
570 | 643 | ||
644 | TRACE_EVENT(nfs4_set_lock, | ||
645 | TP_PROTO( | ||
646 | const struct file_lock *request, | ||
647 | const struct nfs4_state *state, | ||
648 | const nfs4_stateid *lockstateid, | ||
649 | int cmd, | ||
650 | int error | ||
651 | ), | ||
652 | |||
653 | TP_ARGS(request, state, lockstateid, cmd, error), | ||
654 | |||
655 | TP_STRUCT__entry( | ||
656 | __field(int, error) | ||
657 | __field(int, cmd) | ||
658 | __field(char, type) | ||
659 | __field(loff_t, start) | ||
660 | __field(loff_t, end) | ||
661 | __field(dev_t, dev) | ||
662 | __field(u32, fhandle) | ||
663 | __field(u64, fileid) | ||
664 | __field(int, stateid_seq) | ||
665 | __field(u32, stateid_hash) | ||
666 | __field(int, lockstateid_seq) | ||
667 | __field(u32, lockstateid_hash) | ||
668 | ), | ||
669 | |||
670 | TP_fast_assign( | ||
671 | const struct inode *inode = state->inode; | ||
672 | |||
673 | __entry->error = error; | ||
674 | __entry->cmd = cmd; | ||
675 | __entry->type = request->fl_type; | ||
676 | __entry->start = request->fl_start; | ||
677 | __entry->end = request->fl_end; | ||
678 | __entry->dev = inode->i_sb->s_dev; | ||
679 | __entry->fileid = NFS_FILEID(inode); | ||
680 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); | ||
681 | __entry->stateid_seq = | ||
682 | be32_to_cpu(state->stateid.seqid); | ||
683 | __entry->stateid_hash = | ||
684 | nfs_stateid_hash(&state->stateid); | ||
685 | __entry->lockstateid_seq = | ||
686 | be32_to_cpu(lockstateid->seqid); | ||
687 | __entry->lockstateid_hash = | ||
688 | nfs_stateid_hash(lockstateid); | ||
689 | ), | ||
690 | |||
691 | TP_printk( | ||
692 | "error=%d (%s) cmd=%s:%s range=%lld:%lld " | ||
693 | "fileid=%02x:%02x:%llu fhandle=0x%08x " | ||
694 | "stateid=%d:0x%08x lockstateid=%d:0x%08x", | ||
695 | __entry->error, | ||
696 | show_nfsv4_errors(__entry->error), | ||
697 | show_lock_cmd(__entry->cmd), | ||
698 | show_lock_type(__entry->type), | ||
699 | (long long)__entry->start, | ||
700 | (long long)__entry->end, | ||
701 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
702 | (unsigned long long)__entry->fileid, | ||
703 | __entry->fhandle, | ||
704 | __entry->stateid_seq, __entry->stateid_hash, | ||
705 | __entry->lockstateid_seq, __entry->lockstateid_hash | ||
706 | ) | ||
707 | ); | ||
708 | |||
571 | DECLARE_EVENT_CLASS(nfs4_set_delegation_event, | 709 | DECLARE_EVENT_CLASS(nfs4_set_delegation_event, |
572 | TP_PROTO( | 710 | TP_PROTO( |
573 | const struct inode *inode, | 711 | const struct inode *inode, |
@@ -621,20 +759,28 @@ TRACE_EVENT(nfs4_delegreturn_exit, | |||
621 | __field(dev_t, dev) | 759 | __field(dev_t, dev) |
622 | __field(u32, fhandle) | 760 | __field(u32, fhandle) |
623 | __field(int, error) | 761 | __field(int, error) |
762 | __field(int, stateid_seq) | ||
763 | __field(u32, stateid_hash) | ||
624 | ), | 764 | ), |
625 | 765 | ||
626 | TP_fast_assign( | 766 | TP_fast_assign( |
627 | __entry->dev = res->server->s_dev; | 767 | __entry->dev = res->server->s_dev; |
628 | __entry->fhandle = nfs_fhandle_hash(args->fhandle); | 768 | __entry->fhandle = nfs_fhandle_hash(args->fhandle); |
629 | __entry->error = error; | 769 | __entry->error = error; |
770 | __entry->stateid_seq = | ||
771 | be32_to_cpu(args->stateid->seqid); | ||
772 | __entry->stateid_hash = | ||
773 | nfs_stateid_hash(args->stateid); | ||
630 | ), | 774 | ), |
631 | 775 | ||
632 | TP_printk( | 776 | TP_printk( |
633 | "error=%d (%s) dev=%02x:%02x fhandle=0x%08x", | 777 | "error=%d (%s) dev=%02x:%02x fhandle=0x%08x " |
778 | "stateid=%d:0x%08x", | ||
634 | __entry->error, | 779 | __entry->error, |
635 | show_nfsv4_errors(__entry->error), | 780 | show_nfsv4_errors(__entry->error), |
636 | MAJOR(__entry->dev), MINOR(__entry->dev), | 781 | MAJOR(__entry->dev), MINOR(__entry->dev), |
637 | __entry->fhandle | 782 | __entry->fhandle, |
783 | __entry->stateid_seq, __entry->stateid_hash | ||
638 | ) | 784 | ) |
639 | ); | 785 | ); |
640 | 786 | ||
@@ -653,6 +799,8 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event, | |||
653 | __field(dev_t, dev) | 799 | __field(dev_t, dev) |
654 | __field(u32, fhandle) | 800 | __field(u32, fhandle) |
655 | __field(u64, fileid) | 801 | __field(u64, fileid) |
802 | __field(int, stateid_seq) | ||
803 | __field(u32, stateid_hash) | ||
656 | ), | 804 | ), |
657 | 805 | ||
658 | TP_fast_assign( | 806 | TP_fast_assign( |
@@ -662,15 +810,21 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event, | |||
662 | __entry->dev = inode->i_sb->s_dev; | 810 | __entry->dev = inode->i_sb->s_dev; |
663 | __entry->fileid = NFS_FILEID(inode); | 811 | __entry->fileid = NFS_FILEID(inode); |
664 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); | 812 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); |
813 | __entry->stateid_seq = | ||
814 | be32_to_cpu(state->stateid.seqid); | ||
815 | __entry->stateid_hash = | ||
816 | nfs_stateid_hash(&state->stateid); | ||
665 | ), | 817 | ), |
666 | 818 | ||
667 | TP_printk( | 819 | TP_printk( |
668 | "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", | 820 | "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " |
821 | "stateid=%d:0x%08x", | ||
669 | __entry->error, | 822 | __entry->error, |
670 | show_nfsv4_errors(__entry->error), | 823 | show_nfsv4_errors(__entry->error), |
671 | MAJOR(__entry->dev), MINOR(__entry->dev), | 824 | MAJOR(__entry->dev), MINOR(__entry->dev), |
672 | (unsigned long long)__entry->fileid, | 825 | (unsigned long long)__entry->fileid, |
673 | __entry->fhandle | 826 | __entry->fhandle, |
827 | __entry->stateid_seq, __entry->stateid_hash | ||
674 | ) | 828 | ) |
675 | ); | 829 | ); |
676 | 830 | ||
@@ -820,7 +974,6 @@ DECLARE_EVENT_CLASS(nfs4_inode_event, | |||
820 | ), \ | 974 | ), \ |
821 | TP_ARGS(inode, error)) | 975 | TP_ARGS(inode, error)) |
822 | 976 | ||
823 | DEFINE_NFS4_INODE_EVENT(nfs4_setattr); | ||
824 | DEFINE_NFS4_INODE_EVENT(nfs4_access); | 977 | DEFINE_NFS4_INODE_EVENT(nfs4_access); |
825 | DEFINE_NFS4_INODE_EVENT(nfs4_readlink); | 978 | DEFINE_NFS4_INODE_EVENT(nfs4_readlink); |
826 | DEFINE_NFS4_INODE_EVENT(nfs4_readdir); | 979 | DEFINE_NFS4_INODE_EVENT(nfs4_readdir); |
@@ -830,8 +983,59 @@ DEFINE_NFS4_INODE_EVENT(nfs4_set_acl); | |||
830 | DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label); | 983 | DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label); |
831 | DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label); | 984 | DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label); |
832 | #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ | 985 | #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ |
833 | DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation); | 986 | |
834 | DEFINE_NFS4_INODE_EVENT(nfs4_delegreturn); | 987 | DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, |
988 | TP_PROTO( | ||
989 | const struct inode *inode, | ||
990 | const nfs4_stateid *stateid, | ||
991 | int error | ||
992 | ), | ||
993 | |||
994 | TP_ARGS(inode, stateid, error), | ||
995 | |||
996 | TP_STRUCT__entry( | ||
997 | __field(dev_t, dev) | ||
998 | __field(u32, fhandle) | ||
999 | __field(u64, fileid) | ||
1000 | __field(int, error) | ||
1001 | __field(int, stateid_seq) | ||
1002 | __field(u32, stateid_hash) | ||
1003 | ), | ||
1004 | |||
1005 | TP_fast_assign( | ||
1006 | __entry->dev = inode->i_sb->s_dev; | ||
1007 | __entry->fileid = NFS_FILEID(inode); | ||
1008 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); | ||
1009 | __entry->error = error; | ||
1010 | __entry->stateid_seq = | ||
1011 | be32_to_cpu(stateid->seqid); | ||
1012 | __entry->stateid_hash = | ||
1013 | nfs_stateid_hash(stateid); | ||
1014 | ), | ||
1015 | |||
1016 | TP_printk( | ||
1017 | "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " | ||
1018 | "stateid=%d:0x%08x", | ||
1019 | __entry->error, | ||
1020 | show_nfsv4_errors(__entry->error), | ||
1021 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1022 | (unsigned long long)__entry->fileid, | ||
1023 | __entry->fhandle, | ||
1024 | __entry->stateid_seq, __entry->stateid_hash | ||
1025 | ) | ||
1026 | ); | ||
1027 | |||
1028 | #define DEFINE_NFS4_INODE_STATEID_EVENT(name) \ | ||
1029 | DEFINE_EVENT(nfs4_inode_stateid_event, name, \ | ||
1030 | TP_PROTO( \ | ||
1031 | const struct inode *inode, \ | ||
1032 | const nfs4_stateid *stateid, \ | ||
1033 | int error \ | ||
1034 | ), \ | ||
1035 | TP_ARGS(inode, stateid, error)) | ||
1036 | |||
1037 | DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr); | ||
1038 | DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn); | ||
835 | 1039 | ||
836 | DECLARE_EVENT_CLASS(nfs4_getattr_event, | 1040 | DECLARE_EVENT_CLASS(nfs4_getattr_event, |
837 | TP_PROTO( | 1041 | TP_PROTO( |
@@ -941,8 +1145,74 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, | |||
941 | ), \ | 1145 | ), \ |
942 | TP_ARGS(clp, fhandle, inode, error)) | 1146 | TP_ARGS(clp, fhandle, inode, error)) |
943 | DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_getattr); | 1147 | DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_getattr); |
944 | DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_layoutrecall_inode); | ||
945 | 1148 | ||
1149 | DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, | ||
1150 | TP_PROTO( | ||
1151 | const struct nfs_client *clp, | ||
1152 | const struct nfs_fh *fhandle, | ||
1153 | const struct inode *inode, | ||
1154 | const nfs4_stateid *stateid, | ||
1155 | int error | ||
1156 | ), | ||
1157 | |||
1158 | TP_ARGS(clp, fhandle, inode, stateid, error), | ||
1159 | |||
1160 | TP_STRUCT__entry( | ||
1161 | __field(int, error) | ||
1162 | __field(dev_t, dev) | ||
1163 | __field(u32, fhandle) | ||
1164 | __field(u64, fileid) | ||
1165 | __string(dstaddr, clp ? | ||
1166 | rpc_peeraddr2str(clp->cl_rpcclient, | ||
1167 | RPC_DISPLAY_ADDR) : "unknown") | ||
1168 | __field(int, stateid_seq) | ||
1169 | __field(u32, stateid_hash) | ||
1170 | ), | ||
1171 | |||
1172 | TP_fast_assign( | ||
1173 | __entry->error = error; | ||
1174 | __entry->fhandle = nfs_fhandle_hash(fhandle); | ||
1175 | if (inode != NULL) { | ||
1176 | __entry->fileid = NFS_FILEID(inode); | ||
1177 | __entry->dev = inode->i_sb->s_dev; | ||
1178 | } else { | ||
1179 | __entry->fileid = 0; | ||
1180 | __entry->dev = 0; | ||
1181 | } | ||
1182 | __assign_str(dstaddr, clp ? | ||
1183 | rpc_peeraddr2str(clp->cl_rpcclient, | ||
1184 | RPC_DISPLAY_ADDR) : "unknown") | ||
1185 | __entry->stateid_seq = | ||
1186 | be32_to_cpu(stateid->seqid); | ||
1187 | __entry->stateid_hash = | ||
1188 | nfs_stateid_hash(stateid); | ||
1189 | ), | ||
1190 | |||
1191 | TP_printk( | ||
1192 | "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " | ||
1193 | "stateid=%d:0x%08x dstaddr=%s", | ||
1194 | __entry->error, | ||
1195 | show_nfsv4_errors(__entry->error), | ||
1196 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1197 | (unsigned long long)__entry->fileid, | ||
1198 | __entry->fhandle, | ||
1199 | __entry->stateid_seq, __entry->stateid_hash, | ||
1200 | __get_str(dstaddr) | ||
1201 | ) | ||
1202 | ); | ||
1203 | |||
1204 | #define DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(name) \ | ||
1205 | DEFINE_EVENT(nfs4_inode_stateid_callback_event, name, \ | ||
1206 | TP_PROTO( \ | ||
1207 | const struct nfs_client *clp, \ | ||
1208 | const struct nfs_fh *fhandle, \ | ||
1209 | const struct inode *inode, \ | ||
1210 | const nfs4_stateid *stateid, \ | ||
1211 | int error \ | ||
1212 | ), \ | ||
1213 | TP_ARGS(clp, fhandle, inode, stateid, error)) | ||
1214 | DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_recall); | ||
1215 | DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_layoutrecall_file); | ||
946 | 1216 | ||
947 | DECLARE_EVENT_CLASS(nfs4_idmap_event, | 1217 | DECLARE_EVENT_CLASS(nfs4_idmap_event, |
948 | TP_PROTO( | 1218 | TP_PROTO( |
@@ -1005,28 +1275,37 @@ DECLARE_EVENT_CLASS(nfs4_read_event, | |||
1005 | __field(loff_t, offset) | 1275 | __field(loff_t, offset) |
1006 | __field(size_t, count) | 1276 | __field(size_t, count) |
1007 | __field(int, error) | 1277 | __field(int, error) |
1278 | __field(int, stateid_seq) | ||
1279 | __field(u32, stateid_hash) | ||
1008 | ), | 1280 | ), |
1009 | 1281 | ||
1010 | TP_fast_assign( | 1282 | TP_fast_assign( |
1011 | const struct inode *inode = hdr->inode; | 1283 | const struct inode *inode = hdr->inode; |
1284 | const struct nfs4_state *state = | ||
1285 | hdr->args.context->state; | ||
1012 | __entry->dev = inode->i_sb->s_dev; | 1286 | __entry->dev = inode->i_sb->s_dev; |
1013 | __entry->fileid = NFS_FILEID(inode); | 1287 | __entry->fileid = NFS_FILEID(inode); |
1014 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); | 1288 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); |
1015 | __entry->offset = hdr->args.offset; | 1289 | __entry->offset = hdr->args.offset; |
1016 | __entry->count = hdr->args.count; | 1290 | __entry->count = hdr->args.count; |
1017 | __entry->error = error; | 1291 | __entry->error = error; |
1292 | __entry->stateid_seq = | ||
1293 | be32_to_cpu(state->stateid.seqid); | ||
1294 | __entry->stateid_hash = | ||
1295 | nfs_stateid_hash(&state->stateid); | ||
1018 | ), | 1296 | ), |
1019 | 1297 | ||
1020 | TP_printk( | 1298 | TP_printk( |
1021 | "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " | 1299 | "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " |
1022 | "offset=%lld count=%zu", | 1300 | "offset=%lld count=%zu stateid=%d:0x%08x", |
1023 | __entry->error, | 1301 | __entry->error, |
1024 | show_nfsv4_errors(__entry->error), | 1302 | show_nfsv4_errors(__entry->error), |
1025 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1303 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1026 | (unsigned long long)__entry->fileid, | 1304 | (unsigned long long)__entry->fileid, |
1027 | __entry->fhandle, | 1305 | __entry->fhandle, |
1028 | (long long)__entry->offset, | 1306 | (long long)__entry->offset, |
1029 | __entry->count | 1307 | __entry->count, |
1308 | __entry->stateid_seq, __entry->stateid_hash | ||
1030 | ) | 1309 | ) |
1031 | ); | 1310 | ); |
1032 | #define DEFINE_NFS4_READ_EVENT(name) \ | 1311 | #define DEFINE_NFS4_READ_EVENT(name) \ |
@@ -1056,28 +1335,37 @@ DECLARE_EVENT_CLASS(nfs4_write_event, | |||
1056 | __field(loff_t, offset) | 1335 | __field(loff_t, offset) |
1057 | __field(size_t, count) | 1336 | __field(size_t, count) |
1058 | __field(int, error) | 1337 | __field(int, error) |
1338 | __field(int, stateid_seq) | ||
1339 | __field(u32, stateid_hash) | ||
1059 | ), | 1340 | ), |
1060 | 1341 | ||
1061 | TP_fast_assign( | 1342 | TP_fast_assign( |
1062 | const struct inode *inode = hdr->inode; | 1343 | const struct inode *inode = hdr->inode; |
1344 | const struct nfs4_state *state = | ||
1345 | hdr->args.context->state; | ||
1063 | __entry->dev = inode->i_sb->s_dev; | 1346 | __entry->dev = inode->i_sb->s_dev; |
1064 | __entry->fileid = NFS_FILEID(inode); | 1347 | __entry->fileid = NFS_FILEID(inode); |
1065 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); | 1348 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); |
1066 | __entry->offset = hdr->args.offset; | 1349 | __entry->offset = hdr->args.offset; |
1067 | __entry->count = hdr->args.count; | 1350 | __entry->count = hdr->args.count; |
1068 | __entry->error = error; | 1351 | __entry->error = error; |
1352 | __entry->stateid_seq = | ||
1353 | be32_to_cpu(state->stateid.seqid); | ||
1354 | __entry->stateid_hash = | ||
1355 | nfs_stateid_hash(&state->stateid); | ||
1069 | ), | 1356 | ), |
1070 | 1357 | ||
1071 | TP_printk( | 1358 | TP_printk( |
1072 | "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " | 1359 | "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " |
1073 | "offset=%lld count=%zu", | 1360 | "offset=%lld count=%zu stateid=%d:0x%08x", |
1074 | __entry->error, | 1361 | __entry->error, |
1075 | show_nfsv4_errors(__entry->error), | 1362 | show_nfsv4_errors(__entry->error), |
1076 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1363 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1077 | (unsigned long long)__entry->fileid, | 1364 | (unsigned long long)__entry->fileid, |
1078 | __entry->fhandle, | 1365 | __entry->fhandle, |
1079 | (long long)__entry->offset, | 1366 | (long long)__entry->offset, |
1080 | __entry->count | 1367 | __entry->count, |
1368 | __entry->stateid_seq, __entry->stateid_hash | ||
1081 | ) | 1369 | ) |
1082 | ); | 1370 | ); |
1083 | 1371 | ||
@@ -1154,10 +1442,11 @@ TRACE_EVENT(nfs4_layoutget, | |||
1154 | const struct nfs_open_context *ctx, | 1442 | const struct nfs_open_context *ctx, |
1155 | const struct pnfs_layout_range *args, | 1443 | const struct pnfs_layout_range *args, |
1156 | const struct pnfs_layout_range *res, | 1444 | const struct pnfs_layout_range *res, |
1445 | const nfs4_stateid *layout_stateid, | ||
1157 | int error | 1446 | int error |
1158 | ), | 1447 | ), |
1159 | 1448 | ||
1160 | TP_ARGS(ctx, args, res, error), | 1449 | TP_ARGS(ctx, args, res, layout_stateid, error), |
1161 | 1450 | ||
1162 | TP_STRUCT__entry( | 1451 | TP_STRUCT__entry( |
1163 | __field(dev_t, dev) | 1452 | __field(dev_t, dev) |
@@ -1167,10 +1456,15 @@ TRACE_EVENT(nfs4_layoutget, | |||
1167 | __field(u64, offset) | 1456 | __field(u64, offset) |
1168 | __field(u64, count) | 1457 | __field(u64, count) |
1169 | __field(int, error) | 1458 | __field(int, error) |
1459 | __field(int, stateid_seq) | ||
1460 | __field(u32, stateid_hash) | ||
1461 | __field(int, layoutstateid_seq) | ||
1462 | __field(u32, layoutstateid_hash) | ||
1170 | ), | 1463 | ), |
1171 | 1464 | ||
1172 | TP_fast_assign( | 1465 | TP_fast_assign( |
1173 | const struct inode *inode = d_inode(ctx->dentry); | 1466 | const struct inode *inode = d_inode(ctx->dentry); |
1467 | const struct nfs4_state *state = ctx->state; | ||
1174 | __entry->dev = inode->i_sb->s_dev; | 1468 | __entry->dev = inode->i_sb->s_dev; |
1175 | __entry->fileid = NFS_FILEID(inode); | 1469 | __entry->fileid = NFS_FILEID(inode); |
1176 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); | 1470 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); |
@@ -1178,11 +1472,25 @@ TRACE_EVENT(nfs4_layoutget, | |||
1178 | __entry->offset = args->offset; | 1472 | __entry->offset = args->offset; |
1179 | __entry->count = args->length; | 1473 | __entry->count = args->length; |
1180 | __entry->error = error; | 1474 | __entry->error = error; |
1475 | __entry->stateid_seq = | ||
1476 | be32_to_cpu(state->stateid.seqid); | ||
1477 | __entry->stateid_hash = | ||
1478 | nfs_stateid_hash(&state->stateid); | ||
1479 | if (!error) { | ||
1480 | __entry->layoutstateid_seq = | ||
1481 | be32_to_cpu(layout_stateid->seqid); | ||
1482 | __entry->layoutstateid_hash = | ||
1483 | nfs_stateid_hash(layout_stateid); | ||
1484 | } else { | ||
1485 | __entry->layoutstateid_seq = 0; | ||
1486 | __entry->layoutstateid_hash = 0; | ||
1487 | } | ||
1181 | ), | 1488 | ), |
1182 | 1489 | ||
1183 | TP_printk( | 1490 | TP_printk( |
1184 | "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " | 1491 | "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " |
1185 | "iomode=%s offset=%llu count=%llu", | 1492 | "iomode=%s offset=%llu count=%llu stateid=%d:0x%08x " |
1493 | "layoutstateid=%d:0x%08x", | ||
1186 | __entry->error, | 1494 | __entry->error, |
1187 | show_nfsv4_errors(__entry->error), | 1495 | show_nfsv4_errors(__entry->error), |
1188 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1496 | MAJOR(__entry->dev), MINOR(__entry->dev), |
@@ -1190,14 +1498,83 @@ TRACE_EVENT(nfs4_layoutget, | |||
1190 | __entry->fhandle, | 1498 | __entry->fhandle, |
1191 | show_pnfs_iomode(__entry->iomode), | 1499 | show_pnfs_iomode(__entry->iomode), |
1192 | (unsigned long long)__entry->offset, | 1500 | (unsigned long long)__entry->offset, |
1193 | (unsigned long long)__entry->count | 1501 | (unsigned long long)__entry->count, |
1502 | __entry->stateid_seq, __entry->stateid_hash, | ||
1503 | __entry->layoutstateid_seq, __entry->layoutstateid_hash | ||
1194 | ) | 1504 | ) |
1195 | ); | 1505 | ); |
1196 | 1506 | ||
1197 | DEFINE_NFS4_INODE_EVENT(nfs4_layoutcommit); | 1507 | DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutcommit); |
1198 | DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn); | 1508 | DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutreturn); |
1199 | DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close); | 1509 | DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close); |
1200 | 1510 | ||
1511 | #define show_pnfs_update_layout_reason(reason) \ | ||
1512 | __print_symbolic(reason, \ | ||
1513 | { PNFS_UPDATE_LAYOUT_UNKNOWN, "unknown" }, \ | ||
1514 | { PNFS_UPDATE_LAYOUT_NO_PNFS, "no pnfs" }, \ | ||
1515 | { PNFS_UPDATE_LAYOUT_RD_ZEROLEN, "read+zerolen" }, \ | ||
1516 | { PNFS_UPDATE_LAYOUT_MDSTHRESH, "mdsthresh" }, \ | ||
1517 | { PNFS_UPDATE_LAYOUT_NOMEM, "nomem" }, \ | ||
1518 | { PNFS_UPDATE_LAYOUT_BULK_RECALL, "bulk recall" }, \ | ||
1519 | { PNFS_UPDATE_LAYOUT_IO_TEST_FAIL, "io test fail" }, \ | ||
1520 | { PNFS_UPDATE_LAYOUT_FOUND_CACHED, "found cached" }, \ | ||
1521 | { PNFS_UPDATE_LAYOUT_RETURN, "layoutreturn" }, \ | ||
1522 | { PNFS_UPDATE_LAYOUT_BLOCKED, "layouts blocked" }, \ | ||
1523 | { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" }) | ||
1524 | |||
1525 | TRACE_EVENT(pnfs_update_layout, | ||
1526 | TP_PROTO(struct inode *inode, | ||
1527 | loff_t pos, | ||
1528 | u64 count, | ||
1529 | enum pnfs_iomode iomode, | ||
1530 | struct pnfs_layout_hdr *lo, | ||
1531 | enum pnfs_update_layout_reason reason | ||
1532 | ), | ||
1533 | TP_ARGS(inode, pos, count, iomode, lo, reason), | ||
1534 | TP_STRUCT__entry( | ||
1535 | __field(dev_t, dev) | ||
1536 | __field(u64, fileid) | ||
1537 | __field(u32, fhandle) | ||
1538 | __field(loff_t, pos) | ||
1539 | __field(u64, count) | ||
1540 | __field(enum pnfs_iomode, iomode) | ||
1541 | __field(int, layoutstateid_seq) | ||
1542 | __field(u32, layoutstateid_hash) | ||
1543 | __field(enum pnfs_update_layout_reason, reason) | ||
1544 | ), | ||
1545 | TP_fast_assign( | ||
1546 | __entry->dev = inode->i_sb->s_dev; | ||
1547 | __entry->fileid = NFS_FILEID(inode); | ||
1548 | __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); | ||
1549 | __entry->pos = pos; | ||
1550 | __entry->count = count; | ||
1551 | __entry->iomode = iomode; | ||
1552 | __entry->reason = reason; | ||
1553 | if (lo != NULL) { | ||
1554 | __entry->layoutstateid_seq = | ||
1555 | be32_to_cpu(lo->plh_stateid.seqid); | ||
1556 | __entry->layoutstateid_hash = | ||
1557 | nfs_stateid_hash(&lo->plh_stateid); | ||
1558 | } else { | ||
1559 | __entry->layoutstateid_seq = 0; | ||
1560 | __entry->layoutstateid_hash = 0; | ||
1561 | } | ||
1562 | ), | ||
1563 | TP_printk( | ||
1564 | "fileid=%02x:%02x:%llu fhandle=0x%08x " | ||
1565 | "iomode=%s pos=%llu count=%llu " | ||
1566 | "layoutstateid=%d:0x%08x (%s)", | ||
1567 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1568 | (unsigned long long)__entry->fileid, | ||
1569 | __entry->fhandle, | ||
1570 | show_pnfs_iomode(__entry->iomode), | ||
1571 | (unsigned long long)__entry->pos, | ||
1572 | (unsigned long long)__entry->count, | ||
1573 | __entry->layoutstateid_seq, __entry->layoutstateid_hash, | ||
1574 | show_pnfs_update_layout_reason(__entry->reason) | ||
1575 | ) | ||
1576 | ); | ||
1577 | |||
1201 | #endif /* CONFIG_NFS_V4_1 */ | 1578 | #endif /* CONFIG_NFS_V4_1 */ |
1202 | 1579 | ||
1203 | #endif /* _TRACE_NFS4_H */ | 1580 | #endif /* _TRACE_NFS4_H */ |
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 59f838cdc009..9f80a086b612 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h | |||
@@ -39,7 +39,6 @@ | |||
39 | { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ | 39 | { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ |
40 | { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \ | 40 | { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \ |
41 | { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ | 41 | { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ |
42 | { 1 << NFS_INO_COMMIT, "COMMIT" }, \ | ||
43 | { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ | 42 | { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ |
44 | { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) | 43 | { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) |
45 | 44 | ||
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 452a011ba0d8..8ce4f61cbaa5 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -101,53 +101,18 @@ nfs_page_free(struct nfs_page *p) | |||
101 | kmem_cache_free(nfs_page_cachep, p); | 101 | kmem_cache_free(nfs_page_cachep, p); |
102 | } | 102 | } |
103 | 103 | ||
104 | static void | ||
105 | nfs_iocounter_inc(struct nfs_io_counter *c) | ||
106 | { | ||
107 | atomic_inc(&c->io_count); | ||
108 | } | ||
109 | |||
110 | static void | ||
111 | nfs_iocounter_dec(struct nfs_io_counter *c) | ||
112 | { | ||
113 | if (atomic_dec_and_test(&c->io_count)) { | ||
114 | clear_bit(NFS_IO_INPROGRESS, &c->flags); | ||
115 | smp_mb__after_atomic(); | ||
116 | wake_up_bit(&c->flags, NFS_IO_INPROGRESS); | ||
117 | } | ||
118 | } | ||
119 | |||
120 | static int | ||
121 | __nfs_iocounter_wait(struct nfs_io_counter *c) | ||
122 | { | ||
123 | wait_queue_head_t *wq = bit_waitqueue(&c->flags, NFS_IO_INPROGRESS); | ||
124 | DEFINE_WAIT_BIT(q, &c->flags, NFS_IO_INPROGRESS); | ||
125 | int ret = 0; | ||
126 | |||
127 | do { | ||
128 | prepare_to_wait(wq, &q.wait, TASK_KILLABLE); | ||
129 | set_bit(NFS_IO_INPROGRESS, &c->flags); | ||
130 | if (atomic_read(&c->io_count) == 0) | ||
131 | break; | ||
132 | ret = nfs_wait_bit_killable(&q.key, TASK_KILLABLE); | ||
133 | } while (atomic_read(&c->io_count) != 0 && !ret); | ||
134 | finish_wait(wq, &q.wait); | ||
135 | return ret; | ||
136 | } | ||
137 | |||
138 | /** | 104 | /** |
139 | * nfs_iocounter_wait - wait for i/o to complete | 105 | * nfs_iocounter_wait - wait for i/o to complete |
140 | * @c: nfs_io_counter to use | 106 | * @l_ctx: nfs_lock_context with io_counter to use |
141 | * | 107 | * |
142 | * returns -ERESTARTSYS if interrupted by a fatal signal. | 108 | * returns -ERESTARTSYS if interrupted by a fatal signal. |
143 | * Otherwise returns 0 once the io_count hits 0. | 109 | * Otherwise returns 0 once the io_count hits 0. |
144 | */ | 110 | */ |
145 | int | 111 | int |
146 | nfs_iocounter_wait(struct nfs_io_counter *c) | 112 | nfs_iocounter_wait(struct nfs_lock_context *l_ctx) |
147 | { | 113 | { |
148 | if (atomic_read(&c->io_count) == 0) | 114 | return wait_on_atomic_t(&l_ctx->io_count, nfs_wait_atomic_killable, |
149 | return 0; | 115 | TASK_KILLABLE); |
150 | return __nfs_iocounter_wait(c); | ||
151 | } | 116 | } |
152 | 117 | ||
153 | /* | 118 | /* |
@@ -370,7 +335,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page, | |||
370 | return ERR_CAST(l_ctx); | 335 | return ERR_CAST(l_ctx); |
371 | } | 336 | } |
372 | req->wb_lock_context = l_ctx; | 337 | req->wb_lock_context = l_ctx; |
373 | nfs_iocounter_inc(&l_ctx->io_count); | 338 | atomic_inc(&l_ctx->io_count); |
374 | 339 | ||
375 | /* Initialize the request struct. Initially, we assume a | 340 | /* Initialize the request struct. Initially, we assume a |
376 | * long write-back delay. This will be adjusted in | 341 | * long write-back delay. This will be adjusted in |
@@ -431,7 +396,8 @@ static void nfs_clear_request(struct nfs_page *req) | |||
431 | req->wb_page = NULL; | 396 | req->wb_page = NULL; |
432 | } | 397 | } |
433 | if (l_ctx != NULL) { | 398 | if (l_ctx != NULL) { |
434 | nfs_iocounter_dec(&l_ctx->io_count); | 399 | if (atomic_dec_and_test(&l_ctx->io_count)) |
400 | wake_up_atomic_t(&l_ctx->io_count); | ||
435 | nfs_put_lock_context(l_ctx); | 401 | nfs_put_lock_context(l_ctx); |
436 | req->wb_lock_context = NULL; | 402 | req->wb_lock_context = NULL; |
437 | } | 403 | } |
@@ -664,22 +630,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); | |||
664 | * @desc: IO descriptor | 630 | * @desc: IO descriptor |
665 | * @hdr: pageio header | 631 | * @hdr: pageio header |
666 | */ | 632 | */ |
667 | static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, | 633 | static void nfs_pgio_error(struct nfs_pgio_header *hdr) |
668 | struct nfs_pgio_header *hdr) | ||
669 | { | 634 | { |
670 | struct nfs_pgio_mirror *mirror; | ||
671 | u32 midx; | ||
672 | |||
673 | set_bit(NFS_IOHDR_REDO, &hdr->flags); | 635 | set_bit(NFS_IOHDR_REDO, &hdr->flags); |
674 | nfs_pgio_data_destroy(hdr); | 636 | nfs_pgio_data_destroy(hdr); |
675 | hdr->completion_ops->completion(hdr); | 637 | hdr->completion_ops->completion(hdr); |
676 | /* TODO: Make sure it's right to clean up all mirrors here | ||
677 | * and not just hdr->pgio_mirror_idx */ | ||
678 | for (midx = 0; midx < desc->pg_mirror_count; midx++) { | ||
679 | mirror = &desc->pg_mirrors[midx]; | ||
680 | desc->pg_completion_ops->error_cleanup(&mirror->pg_list); | ||
681 | } | ||
682 | return -ENOMEM; | ||
683 | } | 638 | } |
684 | 639 | ||
685 | /** | 640 | /** |
@@ -800,8 +755,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, | |||
800 | unsigned int pagecount, pageused; | 755 | unsigned int pagecount, pageused; |
801 | 756 | ||
802 | pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); | 757 | pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); |
803 | if (!nfs_pgarray_set(&hdr->page_array, pagecount)) | 758 | if (!nfs_pgarray_set(&hdr->page_array, pagecount)) { |
804 | return nfs_pgio_error(desc, hdr); | 759 | nfs_pgio_error(hdr); |
760 | desc->pg_error = -ENOMEM; | ||
761 | return desc->pg_error; | ||
762 | } | ||
805 | 763 | ||
806 | nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); | 764 | nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); |
807 | pages = hdr->page_array.pagevec; | 765 | pages = hdr->page_array.pagevec; |
@@ -819,8 +777,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, | |||
819 | *pages++ = last_page = req->wb_page; | 777 | *pages++ = last_page = req->wb_page; |
820 | } | 778 | } |
821 | } | 779 | } |
822 | if (WARN_ON_ONCE(pageused != pagecount)) | 780 | if (WARN_ON_ONCE(pageused != pagecount)) { |
823 | return nfs_pgio_error(desc, hdr); | 781 | nfs_pgio_error(hdr); |
782 | desc->pg_error = -EINVAL; | ||
783 | return desc->pg_error; | ||
784 | } | ||
824 | 785 | ||
825 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | 786 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && |
826 | (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) | 787 | (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) |
@@ -835,18 +796,13 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); | |||
835 | 796 | ||
836 | static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) | 797 | static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) |
837 | { | 798 | { |
838 | struct nfs_pgio_mirror *mirror; | ||
839 | struct nfs_pgio_header *hdr; | 799 | struct nfs_pgio_header *hdr; |
840 | int ret; | 800 | int ret; |
841 | 801 | ||
842 | mirror = nfs_pgio_current_mirror(desc); | ||
843 | |||
844 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | 802 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); |
845 | if (!hdr) { | 803 | if (!hdr) { |
846 | /* TODO: make sure this is right with mirroring - or | 804 | desc->pg_error = -ENOMEM; |
847 | * should it back out all mirrors? */ | 805 | return desc->pg_error; |
848 | desc->pg_completion_ops->error_cleanup(&mirror->pg_list); | ||
849 | return -ENOMEM; | ||
850 | } | 806 | } |
851 | nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); | 807 | nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); |
852 | ret = nfs_generic_pgio(desc, hdr); | 808 | ret = nfs_generic_pgio(desc, hdr); |
@@ -874,6 +830,9 @@ static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, | |||
874 | 830 | ||
875 | mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); | 831 | mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); |
876 | 832 | ||
833 | if (pgio->pg_error < 0) | ||
834 | return pgio->pg_error; | ||
835 | |||
877 | if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) | 836 | if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) |
878 | return -EINVAL; | 837 | return -EINVAL; |
879 | 838 | ||
@@ -903,12 +862,6 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) | |||
903 | pgio->pg_mirrors_dynamic = NULL; | 862 | pgio->pg_mirrors_dynamic = NULL; |
904 | } | 863 | } |
905 | 864 | ||
906 | static bool nfs_match_open_context(const struct nfs_open_context *ctx1, | ||
907 | const struct nfs_open_context *ctx2) | ||
908 | { | ||
909 | return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state; | ||
910 | } | ||
911 | |||
912 | static bool nfs_match_lock_context(const struct nfs_lock_context *l1, | 865 | static bool nfs_match_lock_context(const struct nfs_lock_context *l1, |
913 | const struct nfs_lock_context *l2) | 866 | const struct nfs_lock_context *l2) |
914 | { | 867 | { |
@@ -982,6 +935,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | |||
982 | } else { | 935 | } else { |
983 | if (desc->pg_ops->pg_init) | 936 | if (desc->pg_ops->pg_init) |
984 | desc->pg_ops->pg_init(desc, req); | 937 | desc->pg_ops->pg_init(desc, req); |
938 | if (desc->pg_error < 0) | ||
939 | return 0; | ||
985 | mirror->pg_base = req->wb_pgbase; | 940 | mirror->pg_base = req->wb_pgbase; |
986 | } | 941 | } |
987 | if (!nfs_can_coalesce_requests(prev, req, desc)) | 942 | if (!nfs_can_coalesce_requests(prev, req, desc)) |
@@ -1147,6 +1102,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||
1147 | bytes = req->wb_bytes; | 1102 | bytes = req->wb_bytes; |
1148 | 1103 | ||
1149 | nfs_pageio_setup_mirroring(desc, req); | 1104 | nfs_pageio_setup_mirroring(desc, req); |
1105 | if (desc->pg_error < 0) | ||
1106 | goto out_failed; | ||
1150 | 1107 | ||
1151 | for (midx = 0; midx < desc->pg_mirror_count; midx++) { | 1108 | for (midx = 0; midx < desc->pg_mirror_count; midx++) { |
1152 | if (midx) { | 1109 | if (midx) { |
@@ -1163,7 +1120,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||
1163 | 1120 | ||
1164 | if (IS_ERR(dupreq)) { | 1121 | if (IS_ERR(dupreq)) { |
1165 | nfs_page_group_unlock(req); | 1122 | nfs_page_group_unlock(req); |
1166 | return 0; | 1123 | desc->pg_error = PTR_ERR(dupreq); |
1124 | goto out_failed; | ||
1167 | } | 1125 | } |
1168 | 1126 | ||
1169 | nfs_lock_request(dupreq); | 1127 | nfs_lock_request(dupreq); |
@@ -1176,10 +1134,32 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||
1176 | if (nfs_pgio_has_mirroring(desc)) | 1134 | if (nfs_pgio_has_mirroring(desc)) |
1177 | desc->pg_mirror_idx = midx; | 1135 | desc->pg_mirror_idx = midx; |
1178 | if (!nfs_pageio_add_request_mirror(desc, dupreq)) | 1136 | if (!nfs_pageio_add_request_mirror(desc, dupreq)) |
1179 | return 0; | 1137 | goto out_failed; |
1180 | } | 1138 | } |
1181 | 1139 | ||
1182 | return 1; | 1140 | return 1; |
1141 | |||
1142 | out_failed: | ||
1143 | /* | ||
1144 | * We might have failed before sending any reqs over wire. | ||
1145 | * Clean up rest of the reqs in mirror pg_list. | ||
1146 | */ | ||
1147 | if (desc->pg_error) { | ||
1148 | struct nfs_pgio_mirror *mirror; | ||
1149 | void (*func)(struct list_head *); | ||
1150 | |||
1151 | /* remember fatal errors */ | ||
1152 | if (nfs_error_is_fatal(desc->pg_error)) | ||
1153 | mapping_set_error(desc->pg_inode->i_mapping, | ||
1154 | desc->pg_error); | ||
1155 | |||
1156 | func = desc->pg_completion_ops->error_cleanup; | ||
1157 | for (midx = 0; midx < desc->pg_mirror_count; midx++) { | ||
1158 | mirror = &desc->pg_mirrors[midx]; | ||
1159 | func(&mirror->pg_list); | ||
1160 | } | ||
1161 | } | ||
1162 | return 0; | ||
1183 | } | 1163 | } |
1184 | 1164 | ||
1185 | /* | 1165 | /* |
@@ -1232,7 +1212,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, | |||
1232 | nfs_pageio_complete(desc); | 1212 | nfs_pageio_complete(desc); |
1233 | if (!list_empty(&failed)) { | 1213 | if (!list_empty(&failed)) { |
1234 | list_move(&failed, &hdr->pages); | 1214 | list_move(&failed, &hdr->pages); |
1235 | return -EIO; | 1215 | return desc->pg_error < 0 ? desc->pg_error : -EIO; |
1236 | } | 1216 | } |
1237 | return 0; | 1217 | return 0; |
1238 | } | 1218 | } |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bec0384499f7..a3592cc34a20 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -53,7 +53,7 @@ static DEFINE_SPINLOCK(pnfs_spinlock); | |||
53 | static LIST_HEAD(pnfs_modules_tbl); | 53 | static LIST_HEAD(pnfs_modules_tbl); |
54 | 54 | ||
55 | static int | 55 | static int |
56 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, | 56 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, |
57 | enum pnfs_iomode iomode, bool sync); | 57 | enum pnfs_iomode iomode, bool sync); |
58 | 58 | ||
59 | /* Return the registered pnfs layout driver module matching given id */ | 59 | /* Return the registered pnfs layout driver module matching given id */ |
@@ -385,13 +385,13 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, | |||
385 | enum pnfs_iomode iomode; | 385 | enum pnfs_iomode iomode; |
386 | bool send; | 386 | bool send; |
387 | 387 | ||
388 | stateid = lo->plh_stateid; | 388 | nfs4_stateid_copy(&stateid, &lo->plh_stateid); |
389 | iomode = lo->plh_return_iomode; | 389 | iomode = lo->plh_return_iomode; |
390 | send = pnfs_prepare_layoutreturn(lo); | 390 | send = pnfs_prepare_layoutreturn(lo); |
391 | spin_unlock(&inode->i_lock); | 391 | spin_unlock(&inode->i_lock); |
392 | if (send) { | 392 | if (send) { |
393 | /* Send an async layoutreturn so we dont deadlock */ | 393 | /* Send an async layoutreturn so we dont deadlock */ |
394 | pnfs_send_layoutreturn(lo, stateid, iomode, false); | 394 | pnfs_send_layoutreturn(lo, &stateid, iomode, false); |
395 | } | 395 | } |
396 | } else | 396 | } else |
397 | spin_unlock(&inode->i_lock); | 397 | spin_unlock(&inode->i_lock); |
@@ -566,10 +566,10 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, | |||
566 | int | 566 | int |
567 | pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 567 | pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, |
568 | struct list_head *tmp_list, | 568 | struct list_head *tmp_list, |
569 | struct pnfs_layout_range *recall_range) | 569 | const struct pnfs_layout_range *recall_range) |
570 | { | 570 | { |
571 | struct pnfs_layout_segment *lseg, *next; | 571 | struct pnfs_layout_segment *lseg, *next; |
572 | int invalid = 0, removed = 0; | 572 | int remaining = 0; |
573 | 573 | ||
574 | dprintk("%s:Begin lo %p\n", __func__, lo); | 574 | dprintk("%s:Begin lo %p\n", __func__, lo); |
575 | 575 | ||
@@ -582,11 +582,11 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | |||
582 | "offset %llu length %llu\n", __func__, | 582 | "offset %llu length %llu\n", __func__, |
583 | lseg, lseg->pls_range.iomode, lseg->pls_range.offset, | 583 | lseg, lseg->pls_range.iomode, lseg->pls_range.offset, |
584 | lseg->pls_range.length); | 584 | lseg->pls_range.length); |
585 | invalid++; | 585 | if (!mark_lseg_invalid(lseg, tmp_list)) |
586 | removed += mark_lseg_invalid(lseg, tmp_list); | 586 | remaining++; |
587 | } | 587 | } |
588 | dprintk("%s:Return %i\n", __func__, invalid - removed); | 588 | dprintk("%s:Return %i\n", __func__, remaining); |
589 | return invalid - removed; | 589 | return remaining; |
590 | } | 590 | } |
591 | 591 | ||
592 | /* note free_me must contain lsegs from a single layout_hdr */ | 592 | /* note free_me must contain lsegs from a single layout_hdr */ |
@@ -618,7 +618,6 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) | |||
618 | pnfs_get_layout_hdr(lo); | 618 | pnfs_get_layout_hdr(lo); |
619 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); | 619 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); |
620 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); | 620 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); |
621 | pnfs_clear_retry_layoutget(lo); | ||
622 | spin_unlock(&nfsi->vfs_inode.i_lock); | 621 | spin_unlock(&nfsi->vfs_inode.i_lock); |
623 | pnfs_free_lseg_list(&tmp_list); | 622 | pnfs_free_lseg_list(&tmp_list); |
624 | pnfs_put_layout_hdr(lo); | 623 | pnfs_put_layout_hdr(lo); |
@@ -703,6 +702,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, | |||
703 | ret = -EAGAIN; | 702 | ret = -EAGAIN; |
704 | spin_unlock(&inode->i_lock); | 703 | spin_unlock(&inode->i_lock); |
705 | pnfs_free_lseg_list(&lseg_list); | 704 | pnfs_free_lseg_list(&lseg_list); |
705 | /* Free all lsegs that are attached to commit buckets */ | ||
706 | nfs_commit_inode(inode, 0); | ||
706 | pnfs_put_layout_hdr(lo); | 707 | pnfs_put_layout_hdr(lo); |
707 | iput(inode); | 708 | iput(inode); |
708 | } | 709 | } |
@@ -826,7 +827,7 @@ pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) | |||
826 | 827 | ||
827 | int | 828 | int |
828 | pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | 829 | pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, |
829 | struct pnfs_layout_range *range, | 830 | const struct pnfs_layout_range *range, |
830 | struct nfs4_state *open_state) | 831 | struct nfs4_state *open_state) |
831 | { | 832 | { |
832 | int status = 0; | 833 | int status = 0; |
@@ -861,7 +862,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | |||
861 | static struct pnfs_layout_segment * | 862 | static struct pnfs_layout_segment * |
862 | send_layoutget(struct pnfs_layout_hdr *lo, | 863 | send_layoutget(struct pnfs_layout_hdr *lo, |
863 | struct nfs_open_context *ctx, | 864 | struct nfs_open_context *ctx, |
864 | struct pnfs_layout_range *range, | 865 | const struct pnfs_layout_range *range, |
865 | gfp_t gfp_flags) | 866 | gfp_t gfp_flags) |
866 | { | 867 | { |
867 | struct inode *ino = lo->plh_inode; | 868 | struct inode *ino = lo->plh_inode; |
@@ -894,7 +895,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
894 | lgp->args.minlength = i_size - range->offset; | 895 | lgp->args.minlength = i_size - range->offset; |
895 | } | 896 | } |
896 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | 897 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; |
897 | lgp->args.range = *range; | 898 | pnfs_copy_range(&lgp->args.range, range); |
898 | lgp->args.type = server->pnfs_curr_ld->id; | 899 | lgp->args.type = server->pnfs_curr_ld->id; |
899 | lgp->args.inode = ino; | 900 | lgp->args.inode = ino; |
900 | lgp->args.ctx = get_nfs_open_context(ctx); | 901 | lgp->args.ctx = get_nfs_open_context(ctx); |
@@ -904,17 +905,9 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
904 | lseg = nfs4_proc_layoutget(lgp, gfp_flags); | 905 | lseg = nfs4_proc_layoutget(lgp, gfp_flags); |
905 | } while (lseg == ERR_PTR(-EAGAIN)); | 906 | } while (lseg == ERR_PTR(-EAGAIN)); |
906 | 907 | ||
907 | if (IS_ERR(lseg)) { | 908 | if (IS_ERR(lseg) && !nfs_error_is_fatal(PTR_ERR(lseg))) |
908 | switch (PTR_ERR(lseg)) { | 909 | lseg = NULL; |
909 | case -ENOMEM: | 910 | else |
910 | case -ERESTARTSYS: | ||
911 | break; | ||
912 | default: | ||
913 | /* remember that LAYOUTGET failed and suspend trying */ | ||
914 | pnfs_layout_io_set_failed(lo, range->iomode); | ||
915 | } | ||
916 | return NULL; | ||
917 | } else | ||
918 | pnfs_layout_clear_fail_bit(lo, | 911 | pnfs_layout_clear_fail_bit(lo, |
919 | pnfs_iomode_to_fail_bit(range->iomode)); | 912 | pnfs_iomode_to_fail_bit(range->iomode)); |
920 | 913 | ||
@@ -945,7 +938,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) | |||
945 | } | 938 | } |
946 | 939 | ||
947 | static int | 940 | static int |
948 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, | 941 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, |
949 | enum pnfs_iomode iomode, bool sync) | 942 | enum pnfs_iomode iomode, bool sync) |
950 | { | 943 | { |
951 | struct inode *ino = lo->plh_inode; | 944 | struct inode *ino = lo->plh_inode; |
@@ -962,7 +955,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, | |||
962 | goto out; | 955 | goto out; |
963 | } | 956 | } |
964 | 957 | ||
965 | lrp->args.stateid = stateid; | 958 | nfs4_stateid_copy(&lrp->args.stateid, stateid); |
966 | lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; | 959 | lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; |
967 | lrp->args.inode = ino; | 960 | lrp->args.inode = ino; |
968 | lrp->args.range.iomode = iomode; | 961 | lrp->args.range.iomode = iomode; |
@@ -1005,7 +998,7 @@ _pnfs_return_layout(struct inode *ino) | |||
1005 | dprintk("NFS: %s no layout to return\n", __func__); | 998 | dprintk("NFS: %s no layout to return\n", __func__); |
1006 | goto out; | 999 | goto out; |
1007 | } | 1000 | } |
1008 | stateid = nfsi->layout->plh_stateid; | 1001 | nfs4_stateid_copy(&stateid, &nfsi->layout->plh_stateid); |
1009 | /* Reference matched in nfs4_layoutreturn_release */ | 1002 | /* Reference matched in nfs4_layoutreturn_release */ |
1010 | pnfs_get_layout_hdr(lo); | 1003 | pnfs_get_layout_hdr(lo); |
1011 | empty = list_empty(&lo->plh_segs); | 1004 | empty = list_empty(&lo->plh_segs); |
@@ -1033,7 +1026,7 @@ _pnfs_return_layout(struct inode *ino) | |||
1033 | spin_unlock(&ino->i_lock); | 1026 | spin_unlock(&ino->i_lock); |
1034 | pnfs_free_lseg_list(&tmp_list); | 1027 | pnfs_free_lseg_list(&tmp_list); |
1035 | if (send) | 1028 | if (send) |
1036 | status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); | 1029 | status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); |
1037 | out_put_layout_hdr: | 1030 | out_put_layout_hdr: |
1038 | pnfs_put_layout_hdr(lo); | 1031 | pnfs_put_layout_hdr(lo); |
1039 | out: | 1032 | out: |
@@ -1096,13 +1089,12 @@ bool pnfs_roc(struct inode *ino) | |||
1096 | goto out_noroc; | 1089 | goto out_noroc; |
1097 | } | 1090 | } |
1098 | 1091 | ||
1099 | stateid = lo->plh_stateid; | 1092 | nfs4_stateid_copy(&stateid, &lo->plh_stateid); |
1100 | /* always send layoutreturn if being marked so */ | 1093 | /* always send layoutreturn if being marked so */ |
1101 | if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | 1094 | if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, |
1102 | &lo->plh_flags)) | 1095 | &lo->plh_flags)) |
1103 | layoutreturn = pnfs_prepare_layoutreturn(lo); | 1096 | layoutreturn = pnfs_prepare_layoutreturn(lo); |
1104 | 1097 | ||
1105 | pnfs_clear_retry_layoutget(lo); | ||
1106 | list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) | 1098 | list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) |
1107 | /* If we are sending layoutreturn, invalidate all valid lsegs */ | 1099 | /* If we are sending layoutreturn, invalidate all valid lsegs */ |
1108 | if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { | 1100 | if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { |
@@ -1124,7 +1116,7 @@ out_noroc: | |||
1124 | pnfs_free_lseg_list(&tmp_list); | 1116 | pnfs_free_lseg_list(&tmp_list); |
1125 | pnfs_layoutcommit_inode(ino, true); | 1117 | pnfs_layoutcommit_inode(ino, true); |
1126 | if (layoutreturn) | 1118 | if (layoutreturn) |
1127 | pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); | 1119 | pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); |
1128 | return roc; | 1120 | return roc; |
1129 | } | 1121 | } |
1130 | 1122 | ||
@@ -1149,6 +1141,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) | |||
1149 | 1141 | ||
1150 | spin_lock(&ino->i_lock); | 1142 | spin_lock(&ino->i_lock); |
1151 | lo = NFS_I(ino)->layout; | 1143 | lo = NFS_I(ino)->layout; |
1144 | pnfs_mark_layout_returned_if_empty(lo); | ||
1152 | if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) | 1145 | if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) |
1153 | lo->plh_barrier = barrier; | 1146 | lo->plh_barrier = barrier; |
1154 | spin_unlock(&ino->i_lock); | 1147 | spin_unlock(&ino->i_lock); |
@@ -1465,25 +1458,15 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, | |||
1465 | return ret; | 1458 | return ret; |
1466 | } | 1459 | } |
1467 | 1460 | ||
1468 | /* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */ | ||
1469 | static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key, int mode) | ||
1470 | { | ||
1471 | if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags)) | ||
1472 | return 1; | ||
1473 | return nfs_wait_bit_killable(key, mode); | ||
1474 | } | ||
1475 | |||
1476 | static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) | 1461 | static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) |
1477 | { | 1462 | { |
1478 | if (!pnfs_should_retry_layoutget(lo)) | ||
1479 | return false; | ||
1480 | /* | 1463 | /* |
1481 | * send layoutcommit as it can hold up layoutreturn due to lseg | 1464 | * send layoutcommit as it can hold up layoutreturn due to lseg |
1482 | * reference | 1465 | * reference |
1483 | */ | 1466 | */ |
1484 | pnfs_layoutcommit_inode(lo->plh_inode, false); | 1467 | pnfs_layoutcommit_inode(lo->plh_inode, false); |
1485 | return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, | 1468 | return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, |
1486 | pnfs_layoutget_retry_bit_wait, | 1469 | nfs_wait_bit_killable, |
1487 | TASK_UNINTERRUPTIBLE); | 1470 | TASK_UNINTERRUPTIBLE); |
1488 | } | 1471 | } |
1489 | 1472 | ||
@@ -1520,14 +1503,23 @@ pnfs_update_layout(struct inode *ino, | |||
1520 | struct pnfs_layout_segment *lseg = NULL; | 1503 | struct pnfs_layout_segment *lseg = NULL; |
1521 | bool first; | 1504 | bool first; |
1522 | 1505 | ||
1523 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) | 1506 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) { |
1507 | trace_pnfs_update_layout(ino, pos, count, iomode, NULL, | ||
1508 | PNFS_UPDATE_LAYOUT_NO_PNFS); | ||
1524 | goto out; | 1509 | goto out; |
1510 | } | ||
1525 | 1511 | ||
1526 | if (iomode == IOMODE_READ && i_size_read(ino) == 0) | 1512 | if (iomode == IOMODE_READ && i_size_read(ino) == 0) { |
1513 | trace_pnfs_update_layout(ino, pos, count, iomode, NULL, | ||
1514 | PNFS_UPDATE_LAYOUT_RD_ZEROLEN); | ||
1527 | goto out; | 1515 | goto out; |
1516 | } | ||
1528 | 1517 | ||
1529 | if (pnfs_within_mdsthreshold(ctx, ino, iomode)) | 1518 | if (pnfs_within_mdsthreshold(ctx, ino, iomode)) { |
1519 | trace_pnfs_update_layout(ino, pos, count, iomode, NULL, | ||
1520 | PNFS_UPDATE_LAYOUT_MDSTHRESH); | ||
1530 | goto out; | 1521 | goto out; |
1522 | } | ||
1531 | 1523 | ||
1532 | lookup_again: | 1524 | lookup_again: |
1533 | first = false; | 1525 | first = false; |
@@ -1535,19 +1527,25 @@ lookup_again: | |||
1535 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); | 1527 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); |
1536 | if (lo == NULL) { | 1528 | if (lo == NULL) { |
1537 | spin_unlock(&ino->i_lock); | 1529 | spin_unlock(&ino->i_lock); |
1530 | trace_pnfs_update_layout(ino, pos, count, iomode, NULL, | ||
1531 | PNFS_UPDATE_LAYOUT_NOMEM); | ||
1538 | goto out; | 1532 | goto out; |
1539 | } | 1533 | } |
1540 | 1534 | ||
1541 | /* Do we even need to bother with this? */ | 1535 | /* Do we even need to bother with this? */ |
1542 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | 1536 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { |
1537 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, | ||
1538 | PNFS_UPDATE_LAYOUT_BULK_RECALL); | ||
1543 | dprintk("%s matches recall, use MDS\n", __func__); | 1539 | dprintk("%s matches recall, use MDS\n", __func__); |
1544 | goto out_unlock; | 1540 | goto out_unlock; |
1545 | } | 1541 | } |
1546 | 1542 | ||
1547 | /* if LAYOUTGET already failed once we don't try again */ | 1543 | /* if LAYOUTGET already failed once we don't try again */ |
1548 | if (pnfs_layout_io_test_failed(lo, iomode) && | 1544 | if (pnfs_layout_io_test_failed(lo, iomode)) { |
1549 | !pnfs_should_retry_layoutget(lo)) | 1545 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, |
1546 | PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); | ||
1550 | goto out_unlock; | 1547 | goto out_unlock; |
1548 | } | ||
1551 | 1549 | ||
1552 | first = list_empty(&lo->plh_segs); | 1550 | first = list_empty(&lo->plh_segs); |
1553 | if (first) { | 1551 | if (first) { |
@@ -1567,8 +1565,11 @@ lookup_again: | |||
1567 | * already exists | 1565 | * already exists |
1568 | */ | 1566 | */ |
1569 | lseg = pnfs_find_lseg(lo, &arg); | 1567 | lseg = pnfs_find_lseg(lo, &arg); |
1570 | if (lseg) | 1568 | if (lseg) { |
1569 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, | ||
1570 | PNFS_UPDATE_LAYOUT_FOUND_CACHED); | ||
1571 | goto out_unlock; | 1571 | goto out_unlock; |
1572 | } | ||
1572 | } | 1573 | } |
1573 | 1574 | ||
1574 | /* | 1575 | /* |
@@ -1585,11 +1586,16 @@ lookup_again: | |||
1585 | dprintk("%s retrying\n", __func__); | 1586 | dprintk("%s retrying\n", __func__); |
1586 | goto lookup_again; | 1587 | goto lookup_again; |
1587 | } | 1588 | } |
1589 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, | ||
1590 | PNFS_UPDATE_LAYOUT_RETURN); | ||
1588 | goto out_put_layout_hdr; | 1591 | goto out_put_layout_hdr; |
1589 | } | 1592 | } |
1590 | 1593 | ||
1591 | if (pnfs_layoutgets_blocked(lo)) | 1594 | if (pnfs_layoutgets_blocked(lo)) { |
1595 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, | ||
1596 | PNFS_UPDATE_LAYOUT_BLOCKED); | ||
1592 | goto out_unlock; | 1597 | goto out_unlock; |
1598 | } | ||
1593 | atomic_inc(&lo->plh_outstanding); | 1599 | atomic_inc(&lo->plh_outstanding); |
1594 | spin_unlock(&ino->i_lock); | 1600 | spin_unlock(&ino->i_lock); |
1595 | 1601 | ||
@@ -1612,8 +1618,9 @@ lookup_again: | |||
1612 | arg.length = PAGE_CACHE_ALIGN(arg.length); | 1618 | arg.length = PAGE_CACHE_ALIGN(arg.length); |
1613 | 1619 | ||
1614 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); | 1620 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); |
1615 | pnfs_clear_retry_layoutget(lo); | ||
1616 | atomic_dec(&lo->plh_outstanding); | 1621 | atomic_dec(&lo->plh_outstanding); |
1622 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, | ||
1623 | PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); | ||
1617 | out_put_layout_hdr: | 1624 | out_put_layout_hdr: |
1618 | if (first) | 1625 | if (first) |
1619 | pnfs_clear_first_layoutget(lo); | 1626 | pnfs_clear_first_layoutget(lo); |
@@ -1623,7 +1630,7 @@ out: | |||
1623 | "(%s, offset: %llu, length: %llu)\n", | 1630 | "(%s, offset: %llu, length: %llu)\n", |
1624 | __func__, ino->i_sb->s_id, | 1631 | __func__, ino->i_sb->s_id, |
1625 | (unsigned long long)NFS_FILEID(ino), | 1632 | (unsigned long long)NFS_FILEID(ino), |
1626 | lseg == NULL ? "not found" : "found", | 1633 | IS_ERR_OR_NULL(lseg) ? "not found" : "found", |
1627 | iomode==IOMODE_RW ? "read/write" : "read-only", | 1634 | iomode==IOMODE_RW ? "read/write" : "read-only", |
1628 | (unsigned long long)pos, | 1635 | (unsigned long long)pos, |
1629 | (unsigned long long)count); | 1636 | (unsigned long long)count); |
@@ -1730,16 +1737,29 @@ out_forget_reply: | |||
1730 | } | 1737 | } |
1731 | 1738 | ||
1732 | static void | 1739 | static void |
1740 | pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode) | ||
1741 | { | ||
1742 | if (lo->plh_return_iomode == iomode) | ||
1743 | return; | ||
1744 | if (lo->plh_return_iomode != 0) | ||
1745 | iomode = IOMODE_ANY; | ||
1746 | lo->plh_return_iomode = iomode; | ||
1747 | } | ||
1748 | |||
1749 | int | ||
1733 | pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, | 1750 | pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, |
1734 | struct list_head *tmp_list, | 1751 | struct list_head *tmp_list, |
1735 | struct pnfs_layout_range *return_range) | 1752 | const struct pnfs_layout_range *return_range) |
1736 | { | 1753 | { |
1737 | struct pnfs_layout_segment *lseg, *next; | 1754 | struct pnfs_layout_segment *lseg, *next; |
1755 | int remaining = 0; | ||
1738 | 1756 | ||
1739 | dprintk("%s:Begin lo %p\n", __func__, lo); | 1757 | dprintk("%s:Begin lo %p\n", __func__, lo); |
1740 | 1758 | ||
1741 | if (list_empty(&lo->plh_segs)) | 1759 | if (list_empty(&lo->plh_segs)) |
1742 | return; | 1760 | return 0; |
1761 | |||
1762 | assert_spin_locked(&lo->plh_inode->i_lock); | ||
1743 | 1763 | ||
1744 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) | 1764 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) |
1745 | if (should_free_lseg(&lseg->pls_range, return_range)) { | 1765 | if (should_free_lseg(&lseg->pls_range, return_range)) { |
@@ -1749,38 +1769,47 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, | |||
1749 | lseg->pls_range.offset, | 1769 | lseg->pls_range.offset, |
1750 | lseg->pls_range.length); | 1770 | lseg->pls_range.length); |
1751 | set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); | 1771 | set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); |
1752 | mark_lseg_invalid(lseg, tmp_list); | 1772 | pnfs_set_plh_return_iomode(lo, return_range->iomode); |
1773 | if (!mark_lseg_invalid(lseg, tmp_list)) | ||
1774 | remaining++; | ||
1753 | set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | 1775 | set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, |
1754 | &lo->plh_flags); | 1776 | &lo->plh_flags); |
1755 | } | 1777 | } |
1778 | return remaining; | ||
1756 | } | 1779 | } |
1757 | 1780 | ||
1758 | void pnfs_error_mark_layout_for_return(struct inode *inode, | 1781 | void pnfs_error_mark_layout_for_return(struct inode *inode, |
1759 | struct pnfs_layout_segment *lseg) | 1782 | struct pnfs_layout_segment *lseg) |
1760 | { | 1783 | { |
1761 | struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; | 1784 | struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; |
1762 | int iomode = pnfs_iomode_to_fail_bit(lseg->pls_range.iomode); | ||
1763 | struct pnfs_layout_range range = { | 1785 | struct pnfs_layout_range range = { |
1764 | .iomode = lseg->pls_range.iomode, | 1786 | .iomode = lseg->pls_range.iomode, |
1765 | .offset = 0, | 1787 | .offset = 0, |
1766 | .length = NFS4_MAX_UINT64, | 1788 | .length = NFS4_MAX_UINT64, |
1767 | }; | 1789 | }; |
1768 | LIST_HEAD(free_me); | 1790 | LIST_HEAD(free_me); |
1791 | bool return_now = false; | ||
1769 | 1792 | ||
1770 | spin_lock(&inode->i_lock); | 1793 | spin_lock(&inode->i_lock); |
1771 | /* set failure bit so that pnfs path will be retried later */ | 1794 | pnfs_set_plh_return_iomode(lo, range.iomode); |
1772 | pnfs_layout_set_fail_bit(lo, iomode); | ||
1773 | if (lo->plh_return_iomode == 0) | ||
1774 | lo->plh_return_iomode = range.iomode; | ||
1775 | else if (lo->plh_return_iomode != range.iomode) | ||
1776 | lo->plh_return_iomode = IOMODE_ANY; | ||
1777 | /* | 1795 | /* |
1778 | * mark all matching lsegs so that we are sure to have no live | 1796 | * mark all matching lsegs so that we are sure to have no live |
1779 | * segments at hand when sending layoutreturn. See pnfs_put_lseg() | 1797 | * segments at hand when sending layoutreturn. See pnfs_put_lseg() |
1780 | * for how it works. | 1798 | * for how it works. |
1781 | */ | 1799 | */ |
1782 | pnfs_mark_matching_lsegs_return(lo, &free_me, &range); | 1800 | if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range)) { |
1783 | spin_unlock(&inode->i_lock); | 1801 | nfs4_stateid stateid; |
1802 | enum pnfs_iomode iomode = lo->plh_return_iomode; | ||
1803 | |||
1804 | nfs4_stateid_copy(&stateid, &lo->plh_stateid); | ||
1805 | return_now = pnfs_prepare_layoutreturn(lo); | ||
1806 | spin_unlock(&inode->i_lock); | ||
1807 | if (return_now) | ||
1808 | pnfs_send_layoutreturn(lo, &stateid, iomode, false); | ||
1809 | } else { | ||
1810 | spin_unlock(&inode->i_lock); | ||
1811 | nfs_commit_inode(inode, 0); | ||
1812 | } | ||
1784 | pnfs_free_lseg_list(&free_me); | 1813 | pnfs_free_lseg_list(&free_me); |
1785 | } | 1814 | } |
1786 | EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); | 1815 | EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); |
@@ -1802,6 +1831,11 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r | |||
1802 | rd_size, | 1831 | rd_size, |
1803 | IOMODE_READ, | 1832 | IOMODE_READ, |
1804 | GFP_KERNEL); | 1833 | GFP_KERNEL); |
1834 | if (IS_ERR(pgio->pg_lseg)) { | ||
1835 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | ||
1836 | pgio->pg_lseg = NULL; | ||
1837 | return; | ||
1838 | } | ||
1805 | } | 1839 | } |
1806 | /* If no lseg, fall back to read through mds */ | 1840 | /* If no lseg, fall back to read through mds */ |
1807 | if (pgio->pg_lseg == NULL) | 1841 | if (pgio->pg_lseg == NULL) |
@@ -1814,13 +1848,19 @@ void | |||
1814 | pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | 1848 | pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, |
1815 | struct nfs_page *req, u64 wb_size) | 1849 | struct nfs_page *req, u64 wb_size) |
1816 | { | 1850 | { |
1817 | if (pgio->pg_lseg == NULL) | 1851 | if (pgio->pg_lseg == NULL) { |
1818 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1852 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
1819 | req->wb_context, | 1853 | req->wb_context, |
1820 | req_offset(req), | 1854 | req_offset(req), |
1821 | wb_size, | 1855 | wb_size, |
1822 | IOMODE_RW, | 1856 | IOMODE_RW, |
1823 | GFP_NOFS); | 1857 | GFP_NOFS); |
1858 | if (IS_ERR(pgio->pg_lseg)) { | ||
1859 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | ||
1860 | pgio->pg_lseg = NULL; | ||
1861 | return; | ||
1862 | } | ||
1863 | } | ||
1824 | /* If no lseg, fall back to write through mds */ | 1864 | /* If no lseg, fall back to write through mds */ |
1825 | if (pgio->pg_lseg == NULL) | 1865 | if (pgio->pg_lseg == NULL) |
1826 | nfs_pageio_reset_write_mds(pgio); | 1866 | nfs_pageio_reset_write_mds(pgio); |
@@ -1988,15 +2028,13 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) | |||
1988 | int | 2028 | int |
1989 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | 2029 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) |
1990 | { | 2030 | { |
1991 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
1992 | |||
1993 | struct nfs_pgio_header *hdr; | 2031 | struct nfs_pgio_header *hdr; |
1994 | int ret; | 2032 | int ret; |
1995 | 2033 | ||
1996 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | 2034 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); |
1997 | if (!hdr) { | 2035 | if (!hdr) { |
1998 | desc->pg_completion_ops->error_cleanup(&mirror->pg_list); | 2036 | desc->pg_error = -ENOMEM; |
1999 | return -ENOMEM; | 2037 | return desc->pg_error; |
2000 | } | 2038 | } |
2001 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); | 2039 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); |
2002 | 2040 | ||
@@ -2119,15 +2157,13 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) | |||
2119 | int | 2157 | int |
2120 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | 2158 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) |
2121 | { | 2159 | { |
2122 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
2123 | |||
2124 | struct nfs_pgio_header *hdr; | 2160 | struct nfs_pgio_header *hdr; |
2125 | int ret; | 2161 | int ret; |
2126 | 2162 | ||
2127 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | 2163 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); |
2128 | if (!hdr) { | 2164 | if (!hdr) { |
2129 | desc->pg_completion_ops->error_cleanup(&mirror->pg_list); | 2165 | desc->pg_error = -ENOMEM; |
2130 | return -ENOMEM; | 2166 | return desc->pg_error; |
2131 | } | 2167 | } |
2132 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); | 2168 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); |
2133 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | 2169 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d1990e90e7a0..9f4e2a47f4aa 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -98,7 +98,6 @@ enum { | |||
98 | NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */ | 98 | NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */ |
99 | NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ | 99 | NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ |
100 | NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ | 100 | NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ |
101 | NFS_LAYOUT_RETRY_LAYOUTGET, /* Retry layoutget */ | ||
102 | }; | 101 | }; |
103 | 102 | ||
104 | enum layoutdriver_policy_flags { | 103 | enum layoutdriver_policy_flags { |
@@ -261,11 +260,14 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, | |||
261 | bool update_barrier); | 260 | bool update_barrier); |
262 | int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, | 261 | int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, |
263 | struct pnfs_layout_hdr *lo, | 262 | struct pnfs_layout_hdr *lo, |
264 | struct pnfs_layout_range *range, | 263 | const struct pnfs_layout_range *range, |
265 | struct nfs4_state *open_state); | 264 | struct nfs4_state *open_state); |
266 | int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 265 | int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, |
267 | struct list_head *tmp_list, | 266 | struct list_head *tmp_list, |
268 | struct pnfs_layout_range *recall_range); | 267 | const struct pnfs_layout_range *recall_range); |
268 | int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, | ||
269 | struct list_head *tmp_list, | ||
270 | const struct pnfs_layout_range *recall_range); | ||
269 | bool pnfs_roc(struct inode *ino); | 271 | bool pnfs_roc(struct inode *ino); |
270 | void pnfs_roc_release(struct inode *ino); | 272 | void pnfs_roc_release(struct inode *ino); |
271 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 273 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
@@ -379,26 +381,6 @@ nfs4_get_deviceid(struct nfs4_deviceid_node *d) | |||
379 | return d; | 381 | return d; |
380 | } | 382 | } |
381 | 383 | ||
382 | static inline void pnfs_set_retry_layoutget(struct pnfs_layout_hdr *lo) | ||
383 | { | ||
384 | if (!test_and_set_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) | ||
385 | atomic_inc(&lo->plh_refcount); | ||
386 | } | ||
387 | |||
388 | static inline void pnfs_clear_retry_layoutget(struct pnfs_layout_hdr *lo) | ||
389 | { | ||
390 | if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) { | ||
391 | atomic_dec(&lo->plh_refcount); | ||
392 | /* wake up waiters for LAYOUTRETURN as that is not needed */ | ||
393 | wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); | ||
394 | } | ||
395 | } | ||
396 | |||
397 | static inline bool pnfs_should_retry_layoutget(struct pnfs_layout_hdr *lo) | ||
398 | { | ||
399 | return test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags); | ||
400 | } | ||
401 | |||
402 | static inline struct pnfs_layout_segment * | 384 | static inline struct pnfs_layout_segment * |
403 | pnfs_get_lseg(struct pnfs_layout_segment *lseg) | 385 | pnfs_get_lseg(struct pnfs_layout_segment *lseg) |
404 | { | 386 | { |
@@ -409,6 +391,12 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg) | |||
409 | return lseg; | 391 | return lseg; |
410 | } | 392 | } |
411 | 393 | ||
394 | static inline bool | ||
395 | pnfs_is_valid_lseg(struct pnfs_layout_segment *lseg) | ||
396 | { | ||
397 | return test_bit(NFS_LSEG_VALID, &lseg->pls_flags) != 0; | ||
398 | } | ||
399 | |||
412 | /* Return true if a layout driver is being used for this mountpoint */ | 400 | /* Return true if a layout driver is being used for this mountpoint */ |
413 | static inline int pnfs_enabled_sb(struct nfs_server *nfss) | 401 | static inline int pnfs_enabled_sb(struct nfs_server *nfss) |
414 | { | 402 | { |
@@ -556,6 +544,26 @@ pnfs_calc_offset_length(u64 offset, u64 end) | |||
556 | return 1 + end - offset; | 544 | return 1 + end - offset; |
557 | } | 545 | } |
558 | 546 | ||
547 | /** | ||
548 | * pnfs_mark_layout_returned_if_empty - marks the layout as returned | ||
549 | * @lo: layout header | ||
550 | * | ||
551 | * Note: Caller must hold inode->i_lock | ||
552 | */ | ||
553 | static inline void | ||
554 | pnfs_mark_layout_returned_if_empty(struct pnfs_layout_hdr *lo) | ||
555 | { | ||
556 | if (list_empty(&lo->plh_segs)) | ||
557 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | ||
558 | } | ||
559 | |||
560 | static inline void | ||
561 | pnfs_copy_range(struct pnfs_layout_range *dst, | ||
562 | const struct pnfs_layout_range *src) | ||
563 | { | ||
564 | memcpy(dst, src, sizeof(*dst)); | ||
565 | } | ||
566 | |||
559 | extern unsigned int layoutstats_timer; | 567 | extern unsigned int layoutstats_timer; |
560 | 568 | ||
561 | #ifdef NFS_DEBUG | 569 | #ifdef NFS_DEBUG |
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 24655b807d44..81ac6480f9e7 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c | |||
@@ -266,17 +266,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | |||
266 | } else { | 266 | } else { |
267 | nfs_retry_commit(mds_pages, NULL, cinfo, 0); | 267 | nfs_retry_commit(mds_pages, NULL, cinfo, 0); |
268 | pnfs_generic_retry_commit(cinfo, 0); | 268 | pnfs_generic_retry_commit(cinfo, 0); |
269 | cinfo->completion_ops->error_cleanup(NFS_I(inode)); | ||
270 | return -ENOMEM; | 269 | return -ENOMEM; |
271 | } | 270 | } |
272 | } | 271 | } |
273 | 272 | ||
274 | nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); | 273 | nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); |
275 | 274 | ||
276 | if (nreq == 0) { | 275 | if (nreq == 0) |
277 | cinfo->completion_ops->error_cleanup(NFS_I(inode)); | ||
278 | goto out; | 276 | goto out; |
279 | } | ||
280 | 277 | ||
281 | atomic_add(nreq, &cinfo->mds->rpcs_out); | 278 | atomic_add(nreq, &cinfo->mds->rpcs_out); |
282 | 279 | ||
@@ -871,6 +868,11 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, | |||
871 | buckets = cinfo->ds->buckets; | 868 | buckets = cinfo->ds->buckets; |
872 | list = &buckets[ds_commit_idx].written; | 869 | list = &buckets[ds_commit_idx].written; |
873 | if (list_empty(list)) { | 870 | if (list_empty(list)) { |
871 | if (!pnfs_is_valid_lseg(lseg)) { | ||
872 | spin_unlock(cinfo->lock); | ||
873 | cinfo->completion_ops->resched_write(cinfo, req); | ||
874 | return; | ||
875 | } | ||
874 | /* Non-empty buckets hold a reference on the lseg. That ref | 876 | /* Non-empty buckets hold a reference on the lseg. That ref |
875 | * is normally transferred to the COMMIT call and released | 877 | * is normally transferred to the COMMIT call and released |
876 | * there. It could also be released if the last req is pulled | 878 | * there. It could also be released if the last req is pulled |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 0a5e33f33b5c..eb31e23e7def 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -85,6 +85,23 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) | |||
85 | } | 85 | } |
86 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); | 86 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); |
87 | 87 | ||
88 | static void nfs_readpage_release(struct nfs_page *req) | ||
89 | { | ||
90 | struct inode *inode = d_inode(req->wb_context->dentry); | ||
91 | |||
92 | dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id, | ||
93 | (unsigned long long)NFS_FILEID(inode), req->wb_bytes, | ||
94 | (long long)req_offset(req)); | ||
95 | |||
96 | if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { | ||
97 | if (PageUptodate(req->wb_page)) | ||
98 | nfs_readpage_to_fscache(inode, req->wb_page, 0); | ||
99 | |||
100 | unlock_page(req->wb_page); | ||
101 | } | ||
102 | nfs_release_request(req); | ||
103 | } | ||
104 | |||
88 | int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | 105 | int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, |
89 | struct page *page) | 106 | struct page *page) |
90 | { | 107 | { |
@@ -106,7 +123,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
106 | 123 | ||
107 | nfs_pageio_init_read(&pgio, inode, false, | 124 | nfs_pageio_init_read(&pgio, inode, false, |
108 | &nfs_async_read_completion_ops); | 125 | &nfs_async_read_completion_ops); |
109 | nfs_pageio_add_request(&pgio, new); | 126 | if (!nfs_pageio_add_request(&pgio, new)) { |
127 | nfs_list_remove_request(new); | ||
128 | nfs_readpage_release(new); | ||
129 | } | ||
110 | nfs_pageio_complete(&pgio); | 130 | nfs_pageio_complete(&pgio); |
111 | 131 | ||
112 | /* It doesn't make sense to do mirrored reads! */ | 132 | /* It doesn't make sense to do mirrored reads! */ |
@@ -115,24 +135,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
115 | pgm = &pgio.pg_mirrors[0]; | 135 | pgm = &pgio.pg_mirrors[0]; |
116 | NFS_I(inode)->read_io += pgm->pg_bytes_written; | 136 | NFS_I(inode)->read_io += pgm->pg_bytes_written; |
117 | 137 | ||
118 | return 0; | 138 | return pgio.pg_error < 0 ? pgio.pg_error : 0; |
119 | } | ||
120 | |||
121 | static void nfs_readpage_release(struct nfs_page *req) | ||
122 | { | ||
123 | struct inode *inode = d_inode(req->wb_context->dentry); | ||
124 | |||
125 | dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id, | ||
126 | (unsigned long long)NFS_FILEID(inode), req->wb_bytes, | ||
127 | (long long)req_offset(req)); | ||
128 | |||
129 | if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { | ||
130 | if (PageUptodate(req->wb_page)) | ||
131 | nfs_readpage_to_fscache(inode, req->wb_page, 0); | ||
132 | |||
133 | unlock_page(req->wb_page); | ||
134 | } | ||
135 | nfs_release_request(req); | ||
136 | } | 139 | } |
137 | 140 | ||
138 | static void nfs_page_group_set_uptodate(struct nfs_page *req) | 141 | static void nfs_page_group_set_uptodate(struct nfs_page *req) |
@@ -361,6 +364,8 @@ readpage_async_filler(void *data, struct page *page) | |||
361 | if (len < PAGE_CACHE_SIZE) | 364 | if (len < PAGE_CACHE_SIZE) |
362 | zero_user_segment(page, len, PAGE_CACHE_SIZE); | 365 | zero_user_segment(page, len, PAGE_CACHE_SIZE); |
363 | if (!nfs_pageio_add_request(desc->pgio, new)) { | 366 | if (!nfs_pageio_add_request(desc->pgio, new)) { |
367 | nfs_list_remove_request(new); | ||
368 | nfs_readpage_release(new); | ||
364 | error = desc->pgio->pg_error; | 369 | error = desc->pgio->pg_error; |
365 | goto out_unlock; | 370 | goto out_unlock; |
366 | } | 371 | } |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7b9316406930..ce43cd6d88c6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -21,6 +21,8 @@ | |||
21 | #include <linux/nfs_page.h> | 21 | #include <linux/nfs_page.h> |
22 | #include <linux/backing-dev.h> | 22 | #include <linux/backing-dev.h> |
23 | #include <linux/export.h> | 23 | #include <linux/export.h> |
24 | #include <linux/freezer.h> | ||
25 | #include <linux/wait.h> | ||
24 | 26 | ||
25 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
26 | 28 | ||
@@ -244,11 +246,9 @@ static int wb_priority(struct writeback_control *wbc) | |||
244 | { | 246 | { |
245 | int ret = 0; | 247 | int ret = 0; |
246 | if (wbc->for_reclaim) | 248 | if (wbc->for_reclaim) |
247 | return FLUSH_HIGHPRI | FLUSH_STABLE; | 249 | return FLUSH_HIGHPRI | FLUSH_COND_STABLE; |
248 | if (wbc->sync_mode == WB_SYNC_ALL) | 250 | if (wbc->sync_mode == WB_SYNC_ALL) |
249 | ret = FLUSH_COND_STABLE; | 251 | ret = FLUSH_COND_STABLE; |
250 | if (wbc->for_kupdate || wbc->for_background) | ||
251 | ret |= FLUSH_LOWPRI; | ||
252 | return ret; | 252 | return ret; |
253 | } | 253 | } |
254 | 254 | ||
@@ -545,12 +545,22 @@ try_again: | |||
545 | return head; | 545 | return head; |
546 | } | 546 | } |
547 | 547 | ||
548 | static void nfs_write_error_remove_page(struct nfs_page *req) | ||
549 | { | ||
550 | nfs_unlock_request(req); | ||
551 | nfs_end_page_writeback(req); | ||
552 | nfs_release_request(req); | ||
553 | generic_error_remove_page(page_file_mapping(req->wb_page), | ||
554 | req->wb_page); | ||
555 | } | ||
556 | |||
548 | /* | 557 | /* |
549 | * Find an associated nfs write request, and prepare to flush it out | 558 | * Find an associated nfs write request, and prepare to flush it out |
550 | * May return an error if the user signalled nfs_wait_on_request(). | 559 | * May return an error if the user signalled nfs_wait_on_request(). |
551 | */ | 560 | */ |
552 | static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | 561 | static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, |
553 | struct page *page, bool nonblock) | 562 | struct page *page, bool nonblock, |
563 | bool launder) | ||
554 | { | 564 | { |
555 | struct nfs_page *req; | 565 | struct nfs_page *req; |
556 | int ret = 0; | 566 | int ret = 0; |
@@ -567,8 +577,21 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | |||
567 | 577 | ||
568 | ret = 0; | 578 | ret = 0; |
569 | if (!nfs_pageio_add_request(pgio, req)) { | 579 | if (!nfs_pageio_add_request(pgio, req)) { |
570 | nfs_redirty_request(req); | ||
571 | ret = pgio->pg_error; | 580 | ret = pgio->pg_error; |
581 | /* | ||
582 | * Remove the problematic req upon fatal errors | ||
583 | * in launder case, while other dirty pages can | ||
584 | * still be around until they get flushed. | ||
585 | */ | ||
586 | if (nfs_error_is_fatal(ret)) { | ||
587 | nfs_context_set_write_error(req->wb_context, ret); | ||
588 | if (launder) { | ||
589 | nfs_write_error_remove_page(req); | ||
590 | goto out; | ||
591 | } | ||
592 | } | ||
593 | nfs_redirty_request(req); | ||
594 | ret = -EAGAIN; | ||
572 | } else | 595 | } else |
573 | nfs_add_stats(page_file_mapping(page)->host, | 596 | nfs_add_stats(page_file_mapping(page)->host, |
574 | NFSIOS_WRITEPAGES, 1); | 597 | NFSIOS_WRITEPAGES, 1); |
@@ -576,12 +599,14 @@ out: | |||
576 | return ret; | 599 | return ret; |
577 | } | 600 | } |
578 | 601 | ||
579 | static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) | 602 | static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, |
603 | struct nfs_pageio_descriptor *pgio, bool launder) | ||
580 | { | 604 | { |
581 | int ret; | 605 | int ret; |
582 | 606 | ||
583 | nfs_pageio_cond_complete(pgio, page_file_index(page)); | 607 | nfs_pageio_cond_complete(pgio, page_file_index(page)); |
584 | ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); | 608 | ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE, |
609 | launder); | ||
585 | if (ret == -EAGAIN) { | 610 | if (ret == -EAGAIN) { |
586 | redirty_page_for_writepage(wbc, page); | 611 | redirty_page_for_writepage(wbc, page); |
587 | ret = 0; | 612 | ret = 0; |
@@ -592,7 +617,9 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st | |||
592 | /* | 617 | /* |
593 | * Write an mmapped page to the server. | 618 | * Write an mmapped page to the server. |
594 | */ | 619 | */ |
595 | static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) | 620 | static int nfs_writepage_locked(struct page *page, |
621 | struct writeback_control *wbc, | ||
622 | bool launder) | ||
596 | { | 623 | { |
597 | struct nfs_pageio_descriptor pgio; | 624 | struct nfs_pageio_descriptor pgio; |
598 | struct inode *inode = page_file_mapping(page)->host; | 625 | struct inode *inode = page_file_mapping(page)->host; |
@@ -601,7 +628,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc | |||
601 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); | 628 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); |
602 | nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), | 629 | nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), |
603 | false, &nfs_async_write_completion_ops); | 630 | false, &nfs_async_write_completion_ops); |
604 | err = nfs_do_writepage(page, wbc, &pgio); | 631 | err = nfs_do_writepage(page, wbc, &pgio, launder); |
605 | nfs_pageio_complete(&pgio); | 632 | nfs_pageio_complete(&pgio); |
606 | if (err < 0) | 633 | if (err < 0) |
607 | return err; | 634 | return err; |
@@ -614,7 +641,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) | |||
614 | { | 641 | { |
615 | int ret; | 642 | int ret; |
616 | 643 | ||
617 | ret = nfs_writepage_locked(page, wbc); | 644 | ret = nfs_writepage_locked(page, wbc, false); |
618 | unlock_page(page); | 645 | unlock_page(page); |
619 | return ret; | 646 | return ret; |
620 | } | 647 | } |
@@ -623,7 +650,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control * | |||
623 | { | 650 | { |
624 | int ret; | 651 | int ret; |
625 | 652 | ||
626 | ret = nfs_do_writepage(page, wbc, data); | 653 | ret = nfs_do_writepage(page, wbc, data, false); |
627 | unlock_page(page); | 654 | unlock_page(page); |
628 | return ret; | 655 | return ret; |
629 | } | 656 | } |
@@ -1128,7 +1155,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page) | |||
1128 | if (req == NULL) | 1155 | if (req == NULL) |
1129 | return 0; | 1156 | return 0; |
1130 | l_ctx = req->wb_lock_context; | 1157 | l_ctx = req->wb_lock_context; |
1131 | do_flush = req->wb_page != page || req->wb_context != ctx; | 1158 | do_flush = req->wb_page != page || |
1159 | !nfs_match_open_context(req->wb_context, ctx); | ||
1132 | /* for now, flush if more than 1 request in page_group */ | 1160 | /* for now, flush if more than 1 request in page_group */ |
1133 | do_flush |= req->wb_this_page != req; | 1161 | do_flush |= req->wb_this_page != req; |
1134 | if (l_ctx && flctx && | 1162 | if (l_ctx && flctx && |
@@ -1326,9 +1354,15 @@ static void nfs_async_write_error(struct list_head *head) | |||
1326 | } | 1354 | } |
1327 | } | 1355 | } |
1328 | 1356 | ||
1357 | static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr) | ||
1358 | { | ||
1359 | nfs_async_write_error(&hdr->pages); | ||
1360 | } | ||
1361 | |||
1329 | static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { | 1362 | static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { |
1330 | .error_cleanup = nfs_async_write_error, | 1363 | .error_cleanup = nfs_async_write_error, |
1331 | .completion = nfs_write_completion, | 1364 | .completion = nfs_write_completion, |
1365 | .reschedule_io = nfs_async_write_reschedule_io, | ||
1332 | }; | 1366 | }; |
1333 | 1367 | ||
1334 | void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, | 1368 | void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, |
@@ -1529,27 +1563,21 @@ static void nfs_writeback_result(struct rpc_task *task, | |||
1529 | } | 1563 | } |
1530 | } | 1564 | } |
1531 | 1565 | ||
1532 | 1566 | static int wait_on_commit(struct nfs_mds_commit_info *cinfo) | |
1533 | static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) | ||
1534 | { | 1567 | { |
1535 | int ret; | 1568 | return wait_on_atomic_t(&cinfo->rpcs_out, |
1569 | nfs_wait_atomic_killable, TASK_KILLABLE); | ||
1570 | } | ||
1536 | 1571 | ||
1537 | if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) | 1572 | static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) |
1538 | return 1; | 1573 | { |
1539 | if (!may_wait) | 1574 | atomic_inc(&cinfo->rpcs_out); |
1540 | return 0; | ||
1541 | ret = out_of_line_wait_on_bit_lock(&nfsi->flags, | ||
1542 | NFS_INO_COMMIT, | ||
1543 | nfs_wait_bit_killable, | ||
1544 | TASK_KILLABLE); | ||
1545 | return (ret < 0) ? ret : 1; | ||
1546 | } | 1575 | } |
1547 | 1576 | ||
1548 | static void nfs_commit_clear_lock(struct nfs_inode *nfsi) | 1577 | static void nfs_commit_end(struct nfs_mds_commit_info *cinfo) |
1549 | { | 1578 | { |
1550 | clear_bit(NFS_INO_COMMIT, &nfsi->flags); | 1579 | if (atomic_dec_and_test(&cinfo->rpcs_out)) |
1551 | smp_mb__after_atomic(); | 1580 | wake_up_atomic_t(&cinfo->rpcs_out); |
1552 | wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); | ||
1553 | } | 1581 | } |
1554 | 1582 | ||
1555 | void nfs_commitdata_release(struct nfs_commit_data *data) | 1583 | void nfs_commitdata_release(struct nfs_commit_data *data) |
@@ -1666,6 +1694,13 @@ void nfs_retry_commit(struct list_head *page_list, | |||
1666 | } | 1694 | } |
1667 | EXPORT_SYMBOL_GPL(nfs_retry_commit); | 1695 | EXPORT_SYMBOL_GPL(nfs_retry_commit); |
1668 | 1696 | ||
1697 | static void | ||
1698 | nfs_commit_resched_write(struct nfs_commit_info *cinfo, | ||
1699 | struct nfs_page *req) | ||
1700 | { | ||
1701 | __set_page_dirty_nobuffers(req->wb_page); | ||
1702 | } | ||
1703 | |||
1669 | /* | 1704 | /* |
1670 | * Commit dirty pages | 1705 | * Commit dirty pages |
1671 | */ | 1706 | */ |
@@ -1687,7 +1722,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, | |||
1687 | data->mds_ops, how, 0); | 1722 | data->mds_ops, how, 0); |
1688 | out_bad: | 1723 | out_bad: |
1689 | nfs_retry_commit(head, NULL, cinfo, 0); | 1724 | nfs_retry_commit(head, NULL, cinfo, 0); |
1690 | cinfo->completion_ops->error_cleanup(NFS_I(inode)); | ||
1691 | return -ENOMEM; | 1725 | return -ENOMEM; |
1692 | } | 1726 | } |
1693 | 1727 | ||
@@ -1749,8 +1783,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) | |||
1749 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); | 1783 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); |
1750 | 1784 | ||
1751 | nfs_init_cinfo(&cinfo, data->inode, data->dreq); | 1785 | nfs_init_cinfo(&cinfo, data->inode, data->dreq); |
1752 | if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) | 1786 | nfs_commit_end(cinfo.mds); |
1753 | nfs_commit_clear_lock(NFS_I(data->inode)); | ||
1754 | } | 1787 | } |
1755 | 1788 | ||
1756 | static void nfs_commit_release(void *calldata) | 1789 | static void nfs_commit_release(void *calldata) |
@@ -1769,7 +1802,7 @@ static const struct rpc_call_ops nfs_commit_ops = { | |||
1769 | 1802 | ||
1770 | static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { | 1803 | static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { |
1771 | .completion = nfs_commit_release_pages, | 1804 | .completion = nfs_commit_release_pages, |
1772 | .error_cleanup = nfs_commit_clear_lock, | 1805 | .resched_write = nfs_commit_resched_write, |
1773 | }; | 1806 | }; |
1774 | 1807 | ||
1775 | int nfs_generic_commit_list(struct inode *inode, struct list_head *head, | 1808 | int nfs_generic_commit_list(struct inode *inode, struct list_head *head, |
@@ -1788,30 +1821,25 @@ int nfs_commit_inode(struct inode *inode, int how) | |||
1788 | LIST_HEAD(head); | 1821 | LIST_HEAD(head); |
1789 | struct nfs_commit_info cinfo; | 1822 | struct nfs_commit_info cinfo; |
1790 | int may_wait = how & FLUSH_SYNC; | 1823 | int may_wait = how & FLUSH_SYNC; |
1824 | int error = 0; | ||
1791 | int res; | 1825 | int res; |
1792 | 1826 | ||
1793 | res = nfs_commit_set_lock(NFS_I(inode), may_wait); | ||
1794 | if (res <= 0) | ||
1795 | goto out_mark_dirty; | ||
1796 | nfs_init_cinfo_from_inode(&cinfo, inode); | 1827 | nfs_init_cinfo_from_inode(&cinfo, inode); |
1828 | nfs_commit_begin(cinfo.mds); | ||
1797 | res = nfs_scan_commit(inode, &head, &cinfo); | 1829 | res = nfs_scan_commit(inode, &head, &cinfo); |
1798 | if (res) { | 1830 | if (res) |
1799 | int error; | ||
1800 | |||
1801 | error = nfs_generic_commit_list(inode, &head, how, &cinfo); | 1831 | error = nfs_generic_commit_list(inode, &head, how, &cinfo); |
1802 | if (error < 0) | 1832 | nfs_commit_end(cinfo.mds); |
1803 | return error; | 1833 | if (error < 0) |
1804 | if (!may_wait) | 1834 | goto out_error; |
1805 | goto out_mark_dirty; | 1835 | if (!may_wait) |
1806 | error = wait_on_bit_action(&NFS_I(inode)->flags, | 1836 | goto out_mark_dirty; |
1807 | NFS_INO_COMMIT, | 1837 | error = wait_on_commit(cinfo.mds); |
1808 | nfs_wait_bit_killable, | 1838 | if (error < 0) |
1809 | TASK_KILLABLE); | 1839 | return error; |
1810 | if (error < 0) | ||
1811 | return error; | ||
1812 | } else | ||
1813 | nfs_commit_clear_lock(NFS_I(inode)); | ||
1814 | return res; | 1840 | return res; |
1841 | out_error: | ||
1842 | res = error; | ||
1815 | /* Note: If we exit without ensuring that the commit is complete, | 1843 | /* Note: If we exit without ensuring that the commit is complete, |
1816 | * we must mark the inode as dirty. Otherwise, future calls to | 1844 | * we must mark the inode as dirty. Otherwise, future calls to |
1817 | * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure | 1845 | * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure |
@@ -1821,6 +1849,7 @@ out_mark_dirty: | |||
1821 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | 1849 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); |
1822 | return res; | 1850 | return res; |
1823 | } | 1851 | } |
1852 | EXPORT_SYMBOL_GPL(nfs_commit_inode); | ||
1824 | 1853 | ||
1825 | int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) | 1854 | int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) |
1826 | { | 1855 | { |
@@ -1911,7 +1940,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) | |||
1911 | /* | 1940 | /* |
1912 | * Write back all requests on one page - we do this before reading it. | 1941 | * Write back all requests on one page - we do this before reading it. |
1913 | */ | 1942 | */ |
1914 | int nfs_wb_page(struct inode *inode, struct page *page) | 1943 | int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder) |
1915 | { | 1944 | { |
1916 | loff_t range_start = page_file_offset(page); | 1945 | loff_t range_start = page_file_offset(page); |
1917 | loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); | 1946 | loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); |
@@ -1928,7 +1957,7 @@ int nfs_wb_page(struct inode *inode, struct page *page) | |||
1928 | for (;;) { | 1957 | for (;;) { |
1929 | wait_on_page_writeback(page); | 1958 | wait_on_page_writeback(page); |
1930 | if (clear_page_dirty_for_io(page)) { | 1959 | if (clear_page_dirty_for_io(page)) { |
1931 | ret = nfs_writepage_locked(page, &wbc); | 1960 | ret = nfs_writepage_locked(page, &wbc, launder); |
1932 | if (ret < 0) | 1961 | if (ret < 0) |
1933 | goto out_error; | 1962 | goto out_error; |
1934 | continue; | 1963 | continue; |
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 43aeabd4b968..d6f9b4e6006d 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h | |||
@@ -592,4 +592,18 @@ enum data_content4 { | |||
592 | NFS4_CONTENT_HOLE = 1, | 592 | NFS4_CONTENT_HOLE = 1, |
593 | }; | 593 | }; |
594 | 594 | ||
595 | enum pnfs_update_layout_reason { | ||
596 | PNFS_UPDATE_LAYOUT_UNKNOWN = 0, | ||
597 | PNFS_UPDATE_LAYOUT_NO_PNFS, | ||
598 | PNFS_UPDATE_LAYOUT_RD_ZEROLEN, | ||
599 | PNFS_UPDATE_LAYOUT_MDSTHRESH, | ||
600 | PNFS_UPDATE_LAYOUT_NOMEM, | ||
601 | PNFS_UPDATE_LAYOUT_BULK_RECALL, | ||
602 | PNFS_UPDATE_LAYOUT_IO_TEST_FAIL, | ||
603 | PNFS_UPDATE_LAYOUT_FOUND_CACHED, | ||
604 | PNFS_UPDATE_LAYOUT_RETURN, | ||
605 | PNFS_UPDATE_LAYOUT_BLOCKED, | ||
606 | PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, | ||
607 | }; | ||
608 | |||
595 | #endif | 609 | #endif |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 37a3d2981352..48e0320cd643 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -60,18 +60,12 @@ struct nfs_lockowner { | |||
60 | pid_t l_pid; | 60 | pid_t l_pid; |
61 | }; | 61 | }; |
62 | 62 | ||
63 | #define NFS_IO_INPROGRESS 0 | ||
64 | struct nfs_io_counter { | ||
65 | unsigned long flags; | ||
66 | atomic_t io_count; | ||
67 | }; | ||
68 | |||
69 | struct nfs_lock_context { | 63 | struct nfs_lock_context { |
70 | atomic_t count; | 64 | atomic_t count; |
71 | struct list_head list; | 65 | struct list_head list; |
72 | struct nfs_open_context *open_context; | 66 | struct nfs_open_context *open_context; |
73 | struct nfs_lockowner lockowner; | 67 | struct nfs_lockowner lockowner; |
74 | struct nfs_io_counter io_count; | 68 | atomic_t io_count; |
75 | }; | 69 | }; |
76 | 70 | ||
77 | struct nfs4_state; | 71 | struct nfs4_state; |
@@ -216,7 +210,6 @@ struct nfs_inode { | |||
216 | #define NFS_INO_FLUSHING (4) /* inode is flushing out data */ | 210 | #define NFS_INO_FLUSHING (4) /* inode is flushing out data */ |
217 | #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ | 211 | #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ |
218 | #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ | 212 | #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ |
219 | #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ | ||
220 | #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ | 213 | #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ |
221 | #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ | 214 | #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ |
222 | #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ | 215 | #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ |
@@ -518,13 +511,25 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned | |||
518 | */ | 511 | */ |
519 | extern int nfs_sync_inode(struct inode *inode); | 512 | extern int nfs_sync_inode(struct inode *inode); |
520 | extern int nfs_wb_all(struct inode *inode); | 513 | extern int nfs_wb_all(struct inode *inode); |
521 | extern int nfs_wb_page(struct inode *inode, struct page* page); | 514 | extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder); |
522 | extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); | 515 | extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); |
523 | extern int nfs_commit_inode(struct inode *, int); | 516 | extern int nfs_commit_inode(struct inode *, int); |
524 | extern struct nfs_commit_data *nfs_commitdata_alloc(void); | 517 | extern struct nfs_commit_data *nfs_commitdata_alloc(void); |
525 | extern void nfs_commit_free(struct nfs_commit_data *data); | 518 | extern void nfs_commit_free(struct nfs_commit_data *data); |
526 | 519 | ||
527 | static inline int | 520 | static inline int |
521 | nfs_wb_launder_page(struct inode *inode, struct page *page) | ||
522 | { | ||
523 | return nfs_wb_single_page(inode, page, true); | ||
524 | } | ||
525 | |||
526 | static inline int | ||
527 | nfs_wb_page(struct inode *inode, struct page *page) | ||
528 | { | ||
529 | return nfs_wb_single_page(inode, page, false); | ||
530 | } | ||
531 | |||
532 | static inline int | ||
528 | nfs_have_writebacks(struct inode *inode) | 533 | nfs_have_writebacks(struct inode *inode) |
529 | { | 534 | { |
530 | return NFS_I(inode)->nrequests != 0; | 535 | return NFS_I(inode)->nrequests != 0; |
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 2469ab0bb3a1..7fcc13c8cf1f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h | |||
@@ -102,6 +102,7 @@ struct nfs_client { | |||
102 | #define NFS_SP4_MACH_CRED_STATEID 4 /* TEST_STATEID and FREE_STATEID */ | 102 | #define NFS_SP4_MACH_CRED_STATEID 4 /* TEST_STATEID and FREE_STATEID */ |
103 | #define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */ | 103 | #define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */ |
104 | #define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */ | 104 | #define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */ |
105 | #define NFS_SP4_MACH_CRED_PNFS_CLEANUP 7 /* LAYOUTRETURN */ | ||
105 | #endif /* CONFIG_NFS_V4 */ | 106 | #endif /* CONFIG_NFS_V4 */ |
106 | 107 | ||
107 | /* Our own IP address, as a null-terminated string. | 108 | /* Our own IP address, as a null-terminated string. |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 11bbae44f4cb..791098a08a87 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
@@ -1375,6 +1375,7 @@ enum { | |||
1375 | NFS_IOHDR_ERROR = 0, | 1375 | NFS_IOHDR_ERROR = 0, |
1376 | NFS_IOHDR_EOF, | 1376 | NFS_IOHDR_EOF, |
1377 | NFS_IOHDR_REDO, | 1377 | NFS_IOHDR_REDO, |
1378 | NFS_IOHDR_STAT, | ||
1378 | }; | 1379 | }; |
1379 | 1380 | ||
1380 | struct nfs_pgio_header { | 1381 | struct nfs_pgio_header { |
@@ -1420,11 +1421,12 @@ struct nfs_mds_commit_info { | |||
1420 | struct list_head list; | 1421 | struct list_head list; |
1421 | }; | 1422 | }; |
1422 | 1423 | ||
1424 | struct nfs_commit_info; | ||
1423 | struct nfs_commit_data; | 1425 | struct nfs_commit_data; |
1424 | struct nfs_inode; | 1426 | struct nfs_inode; |
1425 | struct nfs_commit_completion_ops { | 1427 | struct nfs_commit_completion_ops { |
1426 | void (*error_cleanup) (struct nfs_inode *nfsi); | ||
1427 | void (*completion) (struct nfs_commit_data *data); | 1428 | void (*completion) (struct nfs_commit_data *data); |
1429 | void (*resched_write) (struct nfs_commit_info *, struct nfs_page *); | ||
1428 | }; | 1430 | }; |
1429 | 1431 | ||
1430 | struct nfs_commit_info { | 1432 | struct nfs_commit_info { |
@@ -1454,12 +1456,14 @@ struct nfs_commit_data { | |||
1454 | const struct rpc_call_ops *mds_ops; | 1456 | const struct rpc_call_ops *mds_ops; |
1455 | const struct nfs_commit_completion_ops *completion_ops; | 1457 | const struct nfs_commit_completion_ops *completion_ops; |
1456 | int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); | 1458 | int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); |
1459 | unsigned long flags; | ||
1457 | }; | 1460 | }; |
1458 | 1461 | ||
1459 | struct nfs_pgio_completion_ops { | 1462 | struct nfs_pgio_completion_ops { |
1460 | void (*error_cleanup)(struct list_head *head); | 1463 | void (*error_cleanup)(struct list_head *head); |
1461 | void (*init_hdr)(struct nfs_pgio_header *hdr); | 1464 | void (*init_hdr)(struct nfs_pgio_header *hdr); |
1462 | void (*completion)(struct nfs_pgio_header *hdr); | 1465 | void (*completion)(struct nfs_pgio_header *hdr); |
1466 | void (*reschedule_io)(struct nfs_pgio_header *hdr); | ||
1463 | }; | 1467 | }; |
1464 | 1468 | ||
1465 | struct nfs_unlinkdata { | 1469 | struct nfs_unlinkdata { |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 23608eb0ded2..b7f21044f4d8 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -1217,6 +1217,7 @@ static int rpc_anyaddr(int family, struct sockaddr *buf, size_t buflen) | |||
1217 | return -EINVAL; | 1217 | return -EINVAL; |
1218 | memcpy(buf, &rpc_in6addr_loopback, | 1218 | memcpy(buf, &rpc_in6addr_loopback, |
1219 | sizeof(rpc_in6addr_loopback)); | 1219 | sizeof(rpc_in6addr_loopback)); |
1220 | break; | ||
1220 | default: | 1221 | default: |
1221 | dprintk("RPC: %s: address family not supported\n", | 1222 | dprintk("RPC: %s: address family not supported\n", |
1222 | __func__); | 1223 | __func__); |
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 2dcb44f69e53..cc1251d07297 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c | |||
@@ -15,7 +15,7 @@ | |||
15 | # define RPCDBG_FACILITY RPCDBG_TRANS | 15 | # define RPCDBG_FACILITY RPCDBG_TRANS |
16 | #endif | 16 | #endif |
17 | 17 | ||
18 | #define RPCRDMA_BACKCHANNEL_DEBUG | 18 | #undef RPCRDMA_BACKCHANNEL_DEBUG |
19 | 19 | ||
20 | static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, | 20 | static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, |
21 | struct rpc_rqst *rqst) | 21 | struct rpc_rqst *rqst) |
@@ -42,8 +42,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, | |||
42 | size_t size; | 42 | size_t size; |
43 | 43 | ||
44 | req = rpcrdma_create_req(r_xprt); | 44 | req = rpcrdma_create_req(r_xprt); |
45 | if (!req) | 45 | if (IS_ERR(req)) |
46 | return -ENOMEM; | 46 | return PTR_ERR(req); |
47 | req->rl_backchannel = true; | 47 | req->rl_backchannel = true; |
48 | 48 | ||
49 | size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); | 49 | size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); |
@@ -84,9 +84,7 @@ out_fail: | |||
84 | static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, | 84 | static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, |
85 | unsigned int count) | 85 | unsigned int count) |
86 | { | 86 | { |
87 | struct rpcrdma_buffer *buffers = &r_xprt->rx_buf; | ||
88 | struct rpcrdma_rep *rep; | 87 | struct rpcrdma_rep *rep; |
89 | unsigned long flags; | ||
90 | int rc = 0; | 88 | int rc = 0; |
91 | 89 | ||
92 | while (count--) { | 90 | while (count--) { |
@@ -98,9 +96,7 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, | |||
98 | break; | 96 | break; |
99 | } | 97 | } |
100 | 98 | ||
101 | spin_lock_irqsave(&buffers->rb_lock, flags); | 99 | rpcrdma_recv_buffer_put(rep); |
102 | list_add(&rep->rr_list, &buffers->rb_recv_bufs); | ||
103 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | ||
104 | } | 100 | } |
105 | 101 | ||
106 | return rc; | 102 | return rc; |
@@ -140,6 +136,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) | |||
140 | __func__); | 136 | __func__); |
141 | goto out_free; | 137 | goto out_free; |
142 | } | 138 | } |
139 | dprintk("RPC: %s: new rqst %p\n", __func__, rqst); | ||
143 | 140 | ||
144 | rqst->rq_xprt = &r_xprt->rx_xprt; | 141 | rqst->rq_xprt = &r_xprt->rx_xprt; |
145 | INIT_LIST_HEAD(&rqst->rq_list); | 142 | INIT_LIST_HEAD(&rqst->rq_list); |
@@ -220,12 +217,14 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) | |||
220 | 217 | ||
221 | rpclen = rqst->rq_svec[0].iov_len; | 218 | rpclen = rqst->rq_svec[0].iov_len; |
222 | 219 | ||
220 | #ifdef RPCRDMA_BACKCHANNEL_DEBUG | ||
223 | pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", | 221 | pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", |
224 | __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); | 222 | __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); |
225 | pr_info("RPC: %s: RPC/RDMA: %*ph\n", | 223 | pr_info("RPC: %s: RPC/RDMA: %*ph\n", |
226 | __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); | 224 | __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); |
227 | pr_info("RPC: %s: RPC: %*ph\n", | 225 | pr_info("RPC: %s: RPC: %*ph\n", |
228 | __func__, (int)rpclen, rqst->rq_svec[0].iov_base); | 226 | __func__, (int)rpclen, rqst->rq_svec[0].iov_base); |
227 | #endif | ||
229 | 228 | ||
230 | req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); | 229 | req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); |
231 | req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; | 230 | req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; |
@@ -269,6 +268,9 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) | |||
269 | { | 268 | { |
270 | struct rpc_xprt *xprt = rqst->rq_xprt; | 269 | struct rpc_xprt *xprt = rqst->rq_xprt; |
271 | 270 | ||
271 | dprintk("RPC: %s: freeing rqst %p (req %p)\n", | ||
272 | __func__, rqst, rpcr_to_rdmar(rqst)); | ||
273 | |||
272 | smp_mb__before_atomic(); | 274 | smp_mb__before_atomic(); |
273 | WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); | 275 | WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); |
274 | clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); | 276 | clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); |
@@ -333,9 +335,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, | |||
333 | struct rpc_rqst, rq_bc_pa_list); | 335 | struct rpc_rqst, rq_bc_pa_list); |
334 | list_del(&rqst->rq_bc_pa_list); | 336 | list_del(&rqst->rq_bc_pa_list); |
335 | spin_unlock(&xprt->bc_pa_lock); | 337 | spin_unlock(&xprt->bc_pa_lock); |
336 | #ifdef RPCRDMA_BACKCHANNEL_DEBUG | 338 | dprintk("RPC: %s: using rqst %p\n", __func__, rqst); |
337 | pr_info("RPC: %s: using rqst %p\n", __func__, rqst); | ||
338 | #endif | ||
339 | 339 | ||
340 | /* Prepare rqst */ | 340 | /* Prepare rqst */ |
341 | rqst->rq_reply_bytes_recvd = 0; | 341 | rqst->rq_reply_bytes_recvd = 0; |
@@ -355,10 +355,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, | |||
355 | * direction reply. | 355 | * direction reply. |
356 | */ | 356 | */ |
357 | req = rpcr_to_rdmar(rqst); | 357 | req = rpcr_to_rdmar(rqst); |
358 | #ifdef RPCRDMA_BACKCHANNEL_DEBUG | 358 | dprintk("RPC: %s: attaching rep %p to req %p\n", |
359 | pr_info("RPC: %s: attaching rep %p to req %p\n", | ||
360 | __func__, rep, req); | 359 | __func__, rep, req); |
361 | #endif | ||
362 | req->rl_reply = rep; | 360 | req->rl_reply = rep; |
363 | 361 | ||
364 | /* Defeat the retransmit detection logic in send_request */ | 362 | /* Defeat the retransmit detection logic in send_request */ |
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index f1e8dafbd507..c14f3a4bff68 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -179,6 +179,69 @@ out_maperr: | |||
179 | return rc; | 179 | return rc; |
180 | } | 180 | } |
181 | 181 | ||
182 | static void | ||
183 | __fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
184 | { | ||
185 | struct ib_device *device = r_xprt->rx_ia.ri_device; | ||
186 | struct rpcrdma_mw *mw = seg->rl_mw; | ||
187 | int nsegs = seg->mr_nsegs; | ||
188 | |||
189 | seg->rl_mw = NULL; | ||
190 | |||
191 | while (nsegs--) | ||
192 | rpcrdma_unmap_one(device, seg++); | ||
193 | |||
194 | rpcrdma_put_mw(r_xprt, mw); | ||
195 | } | ||
196 | |||
197 | /* Invalidate all memory regions that were registered for "req". | ||
198 | * | ||
199 | * Sleeps until it is safe for the host CPU to access the | ||
200 | * previously mapped memory regions. | ||
201 | */ | ||
202 | static void | ||
203 | fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | ||
204 | { | ||
205 | struct rpcrdma_mr_seg *seg; | ||
206 | unsigned int i, nchunks; | ||
207 | struct rpcrdma_mw *mw; | ||
208 | LIST_HEAD(unmap_list); | ||
209 | int rc; | ||
210 | |||
211 | dprintk("RPC: %s: req %p\n", __func__, req); | ||
212 | |||
213 | /* ORDER: Invalidate all of the req's MRs first | ||
214 | * | ||
215 | * ib_unmap_fmr() is slow, so use a single call instead | ||
216 | * of one call per mapped MR. | ||
217 | */ | ||
218 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | ||
219 | seg = &req->rl_segments[i]; | ||
220 | mw = seg->rl_mw; | ||
221 | |||
222 | list_add(&mw->r.fmr.fmr->list, &unmap_list); | ||
223 | |||
224 | i += seg->mr_nsegs; | ||
225 | } | ||
226 | rc = ib_unmap_fmr(&unmap_list); | ||
227 | if (rc) | ||
228 | pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc); | ||
229 | |||
230 | /* ORDER: Now DMA unmap all of the req's MRs, and return | ||
231 | * them to the free MW list. | ||
232 | */ | ||
233 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | ||
234 | seg = &req->rl_segments[i]; | ||
235 | |||
236 | __fmr_dma_unmap(r_xprt, seg); | ||
237 | |||
238 | i += seg->mr_nsegs; | ||
239 | seg->mr_nsegs = 0; | ||
240 | } | ||
241 | |||
242 | req->rl_nchunks = 0; | ||
243 | } | ||
244 | |||
182 | /* Use the ib_unmap_fmr() verb to prevent further remote | 245 | /* Use the ib_unmap_fmr() verb to prevent further remote |
183 | * access via RDMA READ or RDMA WRITE. | 246 | * access via RDMA READ or RDMA WRITE. |
184 | */ | 247 | */ |
@@ -231,6 +294,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf) | |||
231 | 294 | ||
232 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { | 295 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { |
233 | .ro_map = fmr_op_map, | 296 | .ro_map = fmr_op_map, |
297 | .ro_unmap_sync = fmr_op_unmap_sync, | ||
234 | .ro_unmap = fmr_op_unmap, | 298 | .ro_unmap = fmr_op_unmap, |
235 | .ro_open = fmr_op_open, | 299 | .ro_open = fmr_op_open, |
236 | .ro_maxpages = fmr_op_maxpages, | 300 | .ro_maxpages = fmr_op_maxpages, |
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 88cf9e7269c2..c6836844bd0e 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -245,12 +245,14 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |||
245 | rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); | 245 | rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); |
246 | } | 246 | } |
247 | 247 | ||
248 | /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */ | 248 | /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs |
249 | * to be reset. | ||
250 | * | ||
251 | * WARNING: Only wr_id and status are reliable at this point | ||
252 | */ | ||
249 | static void | 253 | static void |
250 | frwr_sendcompletion(struct ib_wc *wc) | 254 | __frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_mw *r) |
251 | { | 255 | { |
252 | struct rpcrdma_mw *r; | ||
253 | |||
254 | if (likely(wc->status == IB_WC_SUCCESS)) | 256 | if (likely(wc->status == IB_WC_SUCCESS)) |
255 | return; | 257 | return; |
256 | 258 | ||
@@ -261,9 +263,23 @@ frwr_sendcompletion(struct ib_wc *wc) | |||
261 | else | 263 | else |
262 | pr_warn("RPC: %s: frmr %p error, status %s (%d)\n", | 264 | pr_warn("RPC: %s: frmr %p error, status %s (%d)\n", |
263 | __func__, r, ib_wc_status_msg(wc->status), wc->status); | 265 | __func__, r, ib_wc_status_msg(wc->status), wc->status); |
266 | |||
264 | r->r.frmr.fr_state = FRMR_IS_STALE; | 267 | r->r.frmr.fr_state = FRMR_IS_STALE; |
265 | } | 268 | } |
266 | 269 | ||
270 | static void | ||
271 | frwr_sendcompletion(struct ib_wc *wc) | ||
272 | { | ||
273 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
274 | struct rpcrdma_frmr *f = &r->r.frmr; | ||
275 | |||
276 | if (unlikely(wc->status != IB_WC_SUCCESS)) | ||
277 | __frwr_sendcompletion_flush(wc, r); | ||
278 | |||
279 | if (f->fr_waiter) | ||
280 | complete(&f->fr_linv_done); | ||
281 | } | ||
282 | |||
267 | static int | 283 | static int |
268 | frwr_op_init(struct rpcrdma_xprt *r_xprt) | 284 | frwr_op_init(struct rpcrdma_xprt *r_xprt) |
269 | { | 285 | { |
@@ -319,7 +335,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
319 | struct rpcrdma_mw *mw; | 335 | struct rpcrdma_mw *mw; |
320 | struct rpcrdma_frmr *frmr; | 336 | struct rpcrdma_frmr *frmr; |
321 | struct ib_mr *mr; | 337 | struct ib_mr *mr; |
322 | struct ib_reg_wr reg_wr; | 338 | struct ib_reg_wr *reg_wr; |
323 | struct ib_send_wr *bad_wr; | 339 | struct ib_send_wr *bad_wr; |
324 | int rc, i, n, dma_nents; | 340 | int rc, i, n, dma_nents; |
325 | u8 key; | 341 | u8 key; |
@@ -335,7 +351,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
335 | } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); | 351 | } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); |
336 | frmr = &mw->r.frmr; | 352 | frmr = &mw->r.frmr; |
337 | frmr->fr_state = FRMR_IS_VALID; | 353 | frmr->fr_state = FRMR_IS_VALID; |
354 | frmr->fr_waiter = false; | ||
338 | mr = frmr->fr_mr; | 355 | mr = frmr->fr_mr; |
356 | reg_wr = &frmr->fr_regwr; | ||
339 | 357 | ||
340 | if (nsegs > ia->ri_max_frmr_depth) | 358 | if (nsegs > ia->ri_max_frmr_depth) |
341 | nsegs = ia->ri_max_frmr_depth; | 359 | nsegs = ia->ri_max_frmr_depth; |
@@ -381,19 +399,19 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
381 | key = (u8)(mr->rkey & 0x000000FF); | 399 | key = (u8)(mr->rkey & 0x000000FF); |
382 | ib_update_fast_reg_key(mr, ++key); | 400 | ib_update_fast_reg_key(mr, ++key); |
383 | 401 | ||
384 | reg_wr.wr.next = NULL; | 402 | reg_wr->wr.next = NULL; |
385 | reg_wr.wr.opcode = IB_WR_REG_MR; | 403 | reg_wr->wr.opcode = IB_WR_REG_MR; |
386 | reg_wr.wr.wr_id = (uintptr_t)mw; | 404 | reg_wr->wr.wr_id = (uintptr_t)mw; |
387 | reg_wr.wr.num_sge = 0; | 405 | reg_wr->wr.num_sge = 0; |
388 | reg_wr.wr.send_flags = 0; | 406 | reg_wr->wr.send_flags = 0; |
389 | reg_wr.mr = mr; | 407 | reg_wr->mr = mr; |
390 | reg_wr.key = mr->rkey; | 408 | reg_wr->key = mr->rkey; |
391 | reg_wr.access = writing ? | 409 | reg_wr->access = writing ? |
392 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | 410 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : |
393 | IB_ACCESS_REMOTE_READ; | 411 | IB_ACCESS_REMOTE_READ; |
394 | 412 | ||
395 | DECR_CQCOUNT(&r_xprt->rx_ep); | 413 | DECR_CQCOUNT(&r_xprt->rx_ep); |
396 | rc = ib_post_send(ia->ri_id->qp, ®_wr.wr, &bad_wr); | 414 | rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); |
397 | if (rc) | 415 | if (rc) |
398 | goto out_senderr; | 416 | goto out_senderr; |
399 | 417 | ||
@@ -413,6 +431,116 @@ out_senderr: | |||
413 | return rc; | 431 | return rc; |
414 | } | 432 | } |
415 | 433 | ||
434 | static struct ib_send_wr * | ||
435 | __frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg) | ||
436 | { | ||
437 | struct rpcrdma_mw *mw = seg->rl_mw; | ||
438 | struct rpcrdma_frmr *f = &mw->r.frmr; | ||
439 | struct ib_send_wr *invalidate_wr; | ||
440 | |||
441 | f->fr_waiter = false; | ||
442 | f->fr_state = FRMR_IS_INVALID; | ||
443 | invalidate_wr = &f->fr_invwr; | ||
444 | |||
445 | memset(invalidate_wr, 0, sizeof(*invalidate_wr)); | ||
446 | invalidate_wr->wr_id = (unsigned long)(void *)mw; | ||
447 | invalidate_wr->opcode = IB_WR_LOCAL_INV; | ||
448 | invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey; | ||
449 | |||
450 | return invalidate_wr; | ||
451 | } | ||
452 | |||
453 | static void | ||
454 | __frwr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
455 | int rc) | ||
456 | { | ||
457 | struct ib_device *device = r_xprt->rx_ia.ri_device; | ||
458 | struct rpcrdma_mw *mw = seg->rl_mw; | ||
459 | struct rpcrdma_frmr *f = &mw->r.frmr; | ||
460 | |||
461 | seg->rl_mw = NULL; | ||
462 | |||
463 | ib_dma_unmap_sg(device, f->sg, f->sg_nents, seg->mr_dir); | ||
464 | |||
465 | if (!rc) | ||
466 | rpcrdma_put_mw(r_xprt, mw); | ||
467 | else | ||
468 | __frwr_queue_recovery(mw); | ||
469 | } | ||
470 | |||
471 | /* Invalidate all memory regions that were registered for "req". | ||
472 | * | ||
473 | * Sleeps until it is safe for the host CPU to access the | ||
474 | * previously mapped memory regions. | ||
475 | */ | ||
476 | static void | ||
477 | frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | ||
478 | { | ||
479 | struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr; | ||
480 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
481 | struct rpcrdma_mr_seg *seg; | ||
482 | unsigned int i, nchunks; | ||
483 | struct rpcrdma_frmr *f; | ||
484 | int rc; | ||
485 | |||
486 | dprintk("RPC: %s: req %p\n", __func__, req); | ||
487 | |||
488 | /* ORDER: Invalidate all of the req's MRs first | ||
489 | * | ||
490 | * Chain the LOCAL_INV Work Requests and post them with | ||
491 | * a single ib_post_send() call. | ||
492 | */ | ||
493 | invalidate_wrs = pos = prev = NULL; | ||
494 | seg = NULL; | ||
495 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | ||
496 | seg = &req->rl_segments[i]; | ||
497 | |||
498 | pos = __frwr_prepare_linv_wr(seg); | ||
499 | |||
500 | if (!invalidate_wrs) | ||
501 | invalidate_wrs = pos; | ||
502 | else | ||
503 | prev->next = pos; | ||
504 | prev = pos; | ||
505 | |||
506 | i += seg->mr_nsegs; | ||
507 | } | ||
508 | f = &seg->rl_mw->r.frmr; | ||
509 | |||
510 | /* Strong send queue ordering guarantees that when the | ||
511 | * last WR in the chain completes, all WRs in the chain | ||
512 | * are complete. | ||
513 | */ | ||
514 | f->fr_invwr.send_flags = IB_SEND_SIGNALED; | ||
515 | f->fr_waiter = true; | ||
516 | init_completion(&f->fr_linv_done); | ||
517 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
518 | |||
519 | /* Transport disconnect drains the receive CQ before it | ||
520 | * replaces the QP. The RPC reply handler won't call us | ||
521 | * unless ri_id->qp is a valid pointer. | ||
522 | */ | ||
523 | rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); | ||
524 | if (rc) | ||
525 | pr_warn("%s: ib_post_send failed %i\n", __func__, rc); | ||
526 | |||
527 | wait_for_completion(&f->fr_linv_done); | ||
528 | |||
529 | /* ORDER: Now DMA unmap all of the req's MRs, and return | ||
530 | * them to the free MW list. | ||
531 | */ | ||
532 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | ||
533 | seg = &req->rl_segments[i]; | ||
534 | |||
535 | __frwr_dma_unmap(r_xprt, seg, rc); | ||
536 | |||
537 | i += seg->mr_nsegs; | ||
538 | seg->mr_nsegs = 0; | ||
539 | } | ||
540 | |||
541 | req->rl_nchunks = 0; | ||
542 | } | ||
543 | |||
416 | /* Post a LOCAL_INV Work Request to prevent further remote access | 544 | /* Post a LOCAL_INV Work Request to prevent further remote access |
417 | * via RDMA READ or RDMA WRITE. | 545 | * via RDMA READ or RDMA WRITE. |
418 | */ | 546 | */ |
@@ -423,23 +551,24 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | |||
423 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 551 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
424 | struct rpcrdma_mw *mw = seg1->rl_mw; | 552 | struct rpcrdma_mw *mw = seg1->rl_mw; |
425 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | 553 | struct rpcrdma_frmr *frmr = &mw->r.frmr; |
426 | struct ib_send_wr invalidate_wr, *bad_wr; | 554 | struct ib_send_wr *invalidate_wr, *bad_wr; |
427 | int rc, nsegs = seg->mr_nsegs; | 555 | int rc, nsegs = seg->mr_nsegs; |
428 | 556 | ||
429 | dprintk("RPC: %s: FRMR %p\n", __func__, mw); | 557 | dprintk("RPC: %s: FRMR %p\n", __func__, mw); |
430 | 558 | ||
431 | seg1->rl_mw = NULL; | 559 | seg1->rl_mw = NULL; |
432 | frmr->fr_state = FRMR_IS_INVALID; | 560 | frmr->fr_state = FRMR_IS_INVALID; |
561 | invalidate_wr = &mw->r.frmr.fr_invwr; | ||
433 | 562 | ||
434 | memset(&invalidate_wr, 0, sizeof(invalidate_wr)); | 563 | memset(invalidate_wr, 0, sizeof(*invalidate_wr)); |
435 | invalidate_wr.wr_id = (unsigned long)(void *)mw; | 564 | invalidate_wr->wr_id = (uintptr_t)mw; |
436 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | 565 | invalidate_wr->opcode = IB_WR_LOCAL_INV; |
437 | invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey; | 566 | invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey; |
438 | DECR_CQCOUNT(&r_xprt->rx_ep); | 567 | DECR_CQCOUNT(&r_xprt->rx_ep); |
439 | 568 | ||
440 | ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); | 569 | ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); |
441 | read_lock(&ia->ri_qplock); | 570 | read_lock(&ia->ri_qplock); |
442 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | 571 | rc = ib_post_send(ia->ri_id->qp, invalidate_wr, &bad_wr); |
443 | read_unlock(&ia->ri_qplock); | 572 | read_unlock(&ia->ri_qplock); |
444 | if (rc) | 573 | if (rc) |
445 | goto out_err; | 574 | goto out_err; |
@@ -471,6 +600,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf) | |||
471 | 600 | ||
472 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | 601 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { |
473 | .ro_map = frwr_op_map, | 602 | .ro_map = frwr_op_map, |
603 | .ro_unmap_sync = frwr_op_unmap_sync, | ||
474 | .ro_unmap = frwr_op_unmap, | 604 | .ro_unmap = frwr_op_unmap, |
475 | .ro_open = frwr_op_open, | 605 | .ro_open = frwr_op_open, |
476 | .ro_maxpages = frwr_op_maxpages, | 606 | .ro_maxpages = frwr_op_maxpages, |
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c index 617b76f22154..dbb302ecf590 100644 --- a/net/sunrpc/xprtrdma/physical_ops.c +++ b/net/sunrpc/xprtrdma/physical_ops.c | |||
@@ -83,6 +83,18 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | |||
83 | return 1; | 83 | return 1; |
84 | } | 84 | } |
85 | 85 | ||
86 | /* DMA unmap all memory regions that were mapped for "req". | ||
87 | */ | ||
88 | static void | ||
89 | physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | ||
90 | { | ||
91 | struct ib_device *device = r_xprt->rx_ia.ri_device; | ||
92 | unsigned int i; | ||
93 | |||
94 | for (i = 0; req->rl_nchunks; --req->rl_nchunks) | ||
95 | rpcrdma_unmap_one(device, &req->rl_segments[i++]); | ||
96 | } | ||
97 | |||
86 | static void | 98 | static void |
87 | physical_op_destroy(struct rpcrdma_buffer *buf) | 99 | physical_op_destroy(struct rpcrdma_buffer *buf) |
88 | { | 100 | { |
@@ -90,6 +102,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf) | |||
90 | 102 | ||
91 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { | 103 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { |
92 | .ro_map = physical_op_map, | 104 | .ro_map = physical_op_map, |
105 | .ro_unmap_sync = physical_op_unmap_sync, | ||
93 | .ro_unmap = physical_op_unmap, | 106 | .ro_unmap = physical_op_unmap, |
94 | .ro_open = physical_op_open, | 107 | .ro_open = physical_op_open, |
95 | .ro_maxpages = physical_op_maxpages, | 108 | .ro_maxpages = physical_op_maxpages, |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index c10d9699441c..0f28f2d743ed 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -804,6 +804,11 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
804 | if (req->rl_reply) | 804 | if (req->rl_reply) |
805 | goto out_duplicate; | 805 | goto out_duplicate; |
806 | 806 | ||
807 | /* Sanity checking has passed. We are now committed | ||
808 | * to complete this transaction. | ||
809 | */ | ||
810 | list_del_init(&rqst->rq_list); | ||
811 | spin_unlock_bh(&xprt->transport_lock); | ||
807 | dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" | 812 | dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" |
808 | " RPC request 0x%p xid 0x%08x\n", | 813 | " RPC request 0x%p xid 0x%08x\n", |
809 | __func__, rep, req, rqst, | 814 | __func__, rep, req, rqst, |
@@ -888,12 +893,23 @@ badheader: | |||
888 | break; | 893 | break; |
889 | } | 894 | } |
890 | 895 | ||
896 | /* Invalidate and flush the data payloads before waking the | ||
897 | * waiting application. This guarantees the memory region is | ||
898 | * properly fenced from the server before the application | ||
899 | * accesses the data. It also ensures proper send flow | ||
900 | * control: waking the next RPC waits until this RPC has | ||
901 | * relinquished all its Send Queue entries. | ||
902 | */ | ||
903 | if (req->rl_nchunks) | ||
904 | r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req); | ||
905 | |||
891 | credits = be32_to_cpu(headerp->rm_credit); | 906 | credits = be32_to_cpu(headerp->rm_credit); |
892 | if (credits == 0) | 907 | if (credits == 0) |
893 | credits = 1; /* don't deadlock */ | 908 | credits = 1; /* don't deadlock */ |
894 | else if (credits > r_xprt->rx_buf.rb_max_requests) | 909 | else if (credits > r_xprt->rx_buf.rb_max_requests) |
895 | credits = r_xprt->rx_buf.rb_max_requests; | 910 | credits = r_xprt->rx_buf.rb_max_requests; |
896 | 911 | ||
912 | spin_lock_bh(&xprt->transport_lock); | ||
897 | cwnd = xprt->cwnd; | 913 | cwnd = xprt->cwnd; |
898 | xprt->cwnd = credits << RPC_CWNDSHIFT; | 914 | xprt->cwnd = credits << RPC_CWNDSHIFT; |
899 | if (xprt->cwnd > cwnd) | 915 | if (xprt->cwnd > cwnd) |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 8c545f7d7525..740bddcf3488 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -576,6 +576,9 @@ xprt_rdma_free(void *buffer) | |||
576 | 576 | ||
577 | rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); | 577 | rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); |
578 | req = rb->rg_owner; | 578 | req = rb->rg_owner; |
579 | if (req->rl_backchannel) | ||
580 | return; | ||
581 | |||
579 | r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); | 582 | r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); |
580 | 583 | ||
581 | dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); | 584 | dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index eadd1655145a..732c71ce5dca 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -616,10 +616,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
616 | 616 | ||
617 | /* set trigger for requesting send completion */ | 617 | /* set trigger for requesting send completion */ |
618 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; | 618 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; |
619 | if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS) | 619 | if (ep->rep_cqinit <= 2) |
620 | ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS; | 620 | ep->rep_cqinit = 0; /* always signal? */ |
621 | else if (ep->rep_cqinit <= 2) | ||
622 | ep->rep_cqinit = 0; | ||
623 | INIT_CQCOUNT(ep); | 621 | INIT_CQCOUNT(ep); |
624 | init_waitqueue_head(&ep->rep_connect_wait); | 622 | init_waitqueue_head(&ep->rep_connect_wait); |
625 | INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); | 623 | INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); |
@@ -852,10 +850,11 @@ retry: | |||
852 | 850 | ||
853 | if (extras) { | 851 | if (extras) { |
854 | rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); | 852 | rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); |
855 | if (rc) | 853 | if (rc) { |
856 | pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", | 854 | pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", |
857 | __func__, rc); | 855 | __func__, rc); |
858 | rc = 0; | 856 | rc = 0; |
857 | } | ||
859 | } | 858 | } |
860 | } | 859 | } |
861 | 860 | ||
@@ -1337,15 +1336,14 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) | |||
1337 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1336 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
1338 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; | 1337 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; |
1339 | struct rpcrdma_rep *rep; | 1338 | struct rpcrdma_rep *rep; |
1340 | unsigned long flags; | ||
1341 | int rc; | 1339 | int rc; |
1342 | 1340 | ||
1343 | while (count--) { | 1341 | while (count--) { |
1344 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1342 | spin_lock(&buffers->rb_lock); |
1345 | if (list_empty(&buffers->rb_recv_bufs)) | 1343 | if (list_empty(&buffers->rb_recv_bufs)) |
1346 | goto out_reqbuf; | 1344 | goto out_reqbuf; |
1347 | rep = rpcrdma_buffer_get_rep_locked(buffers); | 1345 | rep = rpcrdma_buffer_get_rep_locked(buffers); |
1348 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | 1346 | spin_unlock(&buffers->rb_lock); |
1349 | 1347 | ||
1350 | rc = rpcrdma_ep_post_recv(ia, ep, rep); | 1348 | rc = rpcrdma_ep_post_recv(ia, ep, rep); |
1351 | if (rc) | 1349 | if (rc) |
@@ -1355,7 +1353,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) | |||
1355 | return 0; | 1353 | return 0; |
1356 | 1354 | ||
1357 | out_reqbuf: | 1355 | out_reqbuf: |
1358 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | 1356 | spin_unlock(&buffers->rb_lock); |
1359 | pr_warn("%s: no extra receive buffers\n", __func__); | 1357 | pr_warn("%s: no extra receive buffers\n", __func__); |
1360 | return -ENOMEM; | 1358 | return -ENOMEM; |
1361 | 1359 | ||
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index ac7f8d4f632a..728101ddc44b 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -88,12 +88,6 @@ struct rpcrdma_ep { | |||
88 | struct delayed_work rep_connect_worker; | 88 | struct delayed_work rep_connect_worker; |
89 | }; | 89 | }; |
90 | 90 | ||
91 | /* | ||
92 | * Force a signaled SEND Work Request every so often, | ||
93 | * in case the provider needs to do some housekeeping. | ||
94 | */ | ||
95 | #define RPCRDMA_MAX_UNSIGNALED_SENDS (32) | ||
96 | |||
97 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) | 91 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) |
98 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) | 92 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) |
99 | 93 | ||
@@ -207,6 +201,12 @@ struct rpcrdma_frmr { | |||
207 | enum rpcrdma_frmr_state fr_state; | 201 | enum rpcrdma_frmr_state fr_state; |
208 | struct work_struct fr_work; | 202 | struct work_struct fr_work; |
209 | struct rpcrdma_xprt *fr_xprt; | 203 | struct rpcrdma_xprt *fr_xprt; |
204 | bool fr_waiter; | ||
205 | struct completion fr_linv_done;; | ||
206 | union { | ||
207 | struct ib_reg_wr fr_regwr; | ||
208 | struct ib_send_wr fr_invwr; | ||
209 | }; | ||
210 | }; | 210 | }; |
211 | 211 | ||
212 | struct rpcrdma_fmr { | 212 | struct rpcrdma_fmr { |
@@ -364,6 +364,8 @@ struct rpcrdma_xprt; | |||
364 | struct rpcrdma_memreg_ops { | 364 | struct rpcrdma_memreg_ops { |
365 | int (*ro_map)(struct rpcrdma_xprt *, | 365 | int (*ro_map)(struct rpcrdma_xprt *, |
366 | struct rpcrdma_mr_seg *, int, bool); | 366 | struct rpcrdma_mr_seg *, int, bool); |
367 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, | ||
368 | struct rpcrdma_req *); | ||
367 | int (*ro_unmap)(struct rpcrdma_xprt *, | 369 | int (*ro_unmap)(struct rpcrdma_xprt *, |
368 | struct rpcrdma_mr_seg *); | 370 | struct rpcrdma_mr_seg *); |
369 | int (*ro_open)(struct rpcrdma_ia *, | 371 | int (*ro_open)(struct rpcrdma_ia *, |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2ffaf6a79499..fde2138b81e7 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -398,7 +398,6 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, | |||
398 | if (unlikely(!sock)) | 398 | if (unlikely(!sock)) |
399 | return -ENOTSOCK; | 399 | return -ENOTSOCK; |
400 | 400 | ||
401 | clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags); | ||
402 | if (base != 0) { | 401 | if (base != 0) { |
403 | addr = NULL; | 402 | addr = NULL; |
404 | addrlen = 0; | 403 | addrlen = 0; |
@@ -442,7 +441,6 @@ static void xs_nospace_callback(struct rpc_task *task) | |||
442 | struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); | 441 | struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); |
443 | 442 | ||
444 | transport->inet->sk_write_pending--; | 443 | transport->inet->sk_write_pending--; |
445 | clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); | ||
446 | } | 444 | } |
447 | 445 | ||
448 | /** | 446 | /** |
@@ -467,20 +465,11 @@ static int xs_nospace(struct rpc_task *task) | |||
467 | 465 | ||
468 | /* Don't race with disconnect */ | 466 | /* Don't race with disconnect */ |
469 | if (xprt_connected(xprt)) { | 467 | if (xprt_connected(xprt)) { |
470 | if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) { | 468 | /* wait for more buffer space */ |
471 | /* | 469 | sk->sk_write_pending++; |
472 | * Notify TCP that we're limited by the application | 470 | xprt_wait_for_buffer_space(task, xs_nospace_callback); |
473 | * window size | 471 | } else |
474 | */ | ||
475 | set_bit(SOCK_NOSPACE, &transport->sock->flags); | ||
476 | sk->sk_write_pending++; | ||
477 | /* ...and wait for more buffer space */ | ||
478 | xprt_wait_for_buffer_space(task, xs_nospace_callback); | ||
479 | } | ||
480 | } else { | ||
481 | clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); | ||
482 | ret = -ENOTCONN; | 472 | ret = -ENOTCONN; |
483 | } | ||
484 | 473 | ||
485 | spin_unlock_bh(&xprt->transport_lock); | 474 | spin_unlock_bh(&xprt->transport_lock); |
486 | 475 | ||
@@ -616,9 +605,6 @@ process_status: | |||
616 | case -EAGAIN: | 605 | case -EAGAIN: |
617 | status = xs_nospace(task); | 606 | status = xs_nospace(task); |
618 | break; | 607 | break; |
619 | default: | ||
620 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
621 | -status); | ||
622 | case -ENETUNREACH: | 608 | case -ENETUNREACH: |
623 | case -ENOBUFS: | 609 | case -ENOBUFS: |
624 | case -EPIPE: | 610 | case -EPIPE: |
@@ -626,7 +612,10 @@ process_status: | |||
626 | case -EPERM: | 612 | case -EPERM: |
627 | /* When the server has died, an ICMP port unreachable message | 613 | /* When the server has died, an ICMP port unreachable message |
628 | * prompts ECONNREFUSED. */ | 614 | * prompts ECONNREFUSED. */ |
629 | clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); | 615 | break; |
616 | default: | ||
617 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
618 | -status); | ||
630 | } | 619 | } |
631 | 620 | ||
632 | return status; | 621 | return status; |
@@ -706,16 +695,16 @@ static int xs_tcp_send_request(struct rpc_task *task) | |||
706 | case -EAGAIN: | 695 | case -EAGAIN: |
707 | status = xs_nospace(task); | 696 | status = xs_nospace(task); |
708 | break; | 697 | break; |
709 | default: | ||
710 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
711 | -status); | ||
712 | case -ECONNRESET: | 698 | case -ECONNRESET: |
713 | case -ECONNREFUSED: | 699 | case -ECONNREFUSED: |
714 | case -ENOTCONN: | 700 | case -ENOTCONN: |
715 | case -EADDRINUSE: | 701 | case -EADDRINUSE: |
716 | case -ENOBUFS: | 702 | case -ENOBUFS: |
717 | case -EPIPE: | 703 | case -EPIPE: |
718 | clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); | 704 | break; |
705 | default: | ||
706 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
707 | -status); | ||
719 | } | 708 | } |
720 | 709 | ||
721 | return status; | 710 | return status; |
@@ -1609,19 +1598,23 @@ static void xs_tcp_state_change(struct sock *sk) | |||
1609 | 1598 | ||
1610 | static void xs_write_space(struct sock *sk) | 1599 | static void xs_write_space(struct sock *sk) |
1611 | { | 1600 | { |
1612 | struct socket *sock; | 1601 | struct socket_wq *wq; |
1613 | struct rpc_xprt *xprt; | 1602 | struct rpc_xprt *xprt; |
1614 | 1603 | ||
1615 | if (unlikely(!(sock = sk->sk_socket))) | 1604 | if (!sk->sk_socket) |
1616 | return; | 1605 | return; |
1617 | clear_bit(SOCK_NOSPACE, &sock->flags); | 1606 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
1618 | 1607 | ||
1619 | if (unlikely(!(xprt = xprt_from_sock(sk)))) | 1608 | if (unlikely(!(xprt = xprt_from_sock(sk)))) |
1620 | return; | 1609 | return; |
1621 | if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0) | 1610 | rcu_read_lock(); |
1622 | return; | 1611 | wq = rcu_dereference(sk->sk_wq); |
1612 | if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0) | ||
1613 | goto out; | ||
1623 | 1614 | ||
1624 | xprt_write_space(xprt); | 1615 | xprt_write_space(xprt); |
1616 | out: | ||
1617 | rcu_read_unlock(); | ||
1625 | } | 1618 | } |
1626 | 1619 | ||
1627 | /** | 1620 | /** |
@@ -1907,18 +1900,6 @@ static inline void xs_reclassify_socket(int family, struct socket *sock) | |||
1907 | } | 1900 | } |
1908 | } | 1901 | } |
1909 | #else | 1902 | #else |
1910 | static inline void xs_reclassify_socketu(struct socket *sock) | ||
1911 | { | ||
1912 | } | ||
1913 | |||
1914 | static inline void xs_reclassify_socket4(struct socket *sock) | ||
1915 | { | ||
1916 | } | ||
1917 | |||
1918 | static inline void xs_reclassify_socket6(struct socket *sock) | ||
1919 | { | ||
1920 | } | ||
1921 | |||
1922 | static inline void xs_reclassify_socket(int family, struct socket *sock) | 1903 | static inline void xs_reclassify_socket(int family, struct socket *sock) |
1923 | { | 1904 | { |
1924 | } | 1905 | } |
@@ -2008,7 +1989,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport) | |||
2008 | "transport socket (%d).\n", -status); | 1989 | "transport socket (%d).\n", -status); |
2009 | goto out; | 1990 | goto out; |
2010 | } | 1991 | } |
2011 | xs_reclassify_socketu(sock); | 1992 | xs_reclassify_socket(AF_LOCAL, sock); |
2012 | 1993 | ||
2013 | dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", | 1994 | dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", |
2014 | xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); | 1995 | xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); |