diff options
49 files changed, 1764 insertions, 899 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 618ced381a14..aaa2e8d3df6f 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -217,7 +217,8 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
217 | } | 217 | } |
218 | 218 | ||
219 | if (pnfs_mark_matching_lsegs_return(lo, &free_me_list, | 219 | if (pnfs_mark_matching_lsegs_return(lo, &free_me_list, |
220 | &args->cbl_range)) { | 220 | &args->cbl_range, |
221 | be32_to_cpu(args->cbl_stateid.seqid))) { | ||
221 | rv = NFS4_OK; | 222 | rv = NFS4_OK; |
222 | goto unlock; | 223 | goto unlock; |
223 | } | 224 | } |
@@ -500,8 +501,10 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
500 | cps->slot = slot; | 501 | cps->slot = slot; |
501 | 502 | ||
502 | /* The ca_maxresponsesize_cached is 0 with no DRC */ | 503 | /* The ca_maxresponsesize_cached is 0 with no DRC */ |
503 | if (args->csa_cachethis != 0) | 504 | if (args->csa_cachethis != 0) { |
504 | return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE); | 505 | status = htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE); |
506 | goto out_unlock; | ||
507 | } | ||
505 | 508 | ||
506 | /* | 509 | /* |
507 | * Check for pending referring calls. If a match is found, a | 510 | * Check for pending referring calls. If a match is found, a |
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 976c90608e56..d81f96aacd51 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -146,10 +146,16 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | |||
146 | p = read_buf(xdr, NFS4_STATEID_SIZE); | 146 | p = read_buf(xdr, NFS4_STATEID_SIZE); |
147 | if (unlikely(p == NULL)) | 147 | if (unlikely(p == NULL)) |
148 | return htonl(NFS4ERR_RESOURCE); | 148 | return htonl(NFS4ERR_RESOURCE); |
149 | memcpy(stateid, p, NFS4_STATEID_SIZE); | 149 | memcpy(stateid->data, p, NFS4_STATEID_SIZE); |
150 | return 0; | 150 | return 0; |
151 | } | 151 | } |
152 | 152 | ||
153 | static __be32 decode_delegation_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | ||
154 | { | ||
155 | stateid->type = NFS4_DELEGATION_STATEID_TYPE; | ||
156 | return decode_stateid(xdr, stateid); | ||
157 | } | ||
158 | |||
153 | static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr) | 159 | static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr) |
154 | { | 160 | { |
155 | __be32 *p; | 161 | __be32 *p; |
@@ -211,7 +217,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, | |||
211 | __be32 *p; | 217 | __be32 *p; |
212 | __be32 status; | 218 | __be32 status; |
213 | 219 | ||
214 | status = decode_stateid(xdr, &args->stateid); | 220 | status = decode_delegation_stateid(xdr, &args->stateid); |
215 | if (unlikely(status != 0)) | 221 | if (unlikely(status != 0)) |
216 | goto out; | 222 | goto out; |
217 | p = read_buf(xdr, 4); | 223 | p = read_buf(xdr, 4); |
@@ -227,6 +233,11 @@ out: | |||
227 | } | 233 | } |
228 | 234 | ||
229 | #if defined(CONFIG_NFS_V4_1) | 235 | #if defined(CONFIG_NFS_V4_1) |
236 | static __be32 decode_layout_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | ||
237 | { | ||
238 | stateid->type = NFS4_LAYOUT_STATEID_TYPE; | ||
239 | return decode_stateid(xdr, stateid); | ||
240 | } | ||
230 | 241 | ||
231 | static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, | 242 | static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, |
232 | struct xdr_stream *xdr, | 243 | struct xdr_stream *xdr, |
@@ -263,7 +274,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, | |||
263 | } | 274 | } |
264 | p = xdr_decode_hyper(p, &args->cbl_range.offset); | 275 | p = xdr_decode_hyper(p, &args->cbl_range.offset); |
265 | p = xdr_decode_hyper(p, &args->cbl_range.length); | 276 | p = xdr_decode_hyper(p, &args->cbl_range.length); |
266 | status = decode_stateid(xdr, &args->cbl_stateid); | 277 | status = decode_layout_stateid(xdr, &args->cbl_stateid); |
267 | if (unlikely(status != 0)) | 278 | if (unlikely(status != 0)) |
268 | goto out; | 279 | goto out; |
269 | } else if (args->cbl_recall_type == RETURN_FSID) { | 280 | } else if (args->cbl_recall_type == RETURN_FSID) { |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5166adcfc0fb..322c2585bc34 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -875,15 +875,16 @@ int nfs_delegations_present(struct nfs_client *clp) | |||
875 | 875 | ||
876 | /** | 876 | /** |
877 | * nfs4_copy_delegation_stateid - Copy inode's state ID information | 877 | * nfs4_copy_delegation_stateid - Copy inode's state ID information |
878 | * @dst: stateid data structure to fill in | ||
879 | * @inode: inode to check | 878 | * @inode: inode to check |
880 | * @flags: delegation type requirement | 879 | * @flags: delegation type requirement |
880 | * @dst: stateid data structure to fill in | ||
881 | * @cred: optional argument to retrieve credential | ||
881 | * | 882 | * |
882 | * Returns "true" and fills in "dst->data" * if inode had a delegation, | 883 | * Returns "true" and fills in "dst->data" * if inode had a delegation, |
883 | * otherwise "false" is returned. | 884 | * otherwise "false" is returned. |
884 | */ | 885 | */ |
885 | bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, | 886 | bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, |
886 | fmode_t flags) | 887 | nfs4_stateid *dst, struct rpc_cred **cred) |
887 | { | 888 | { |
888 | struct nfs_inode *nfsi = NFS_I(inode); | 889 | struct nfs_inode *nfsi = NFS_I(inode); |
889 | struct nfs_delegation *delegation; | 890 | struct nfs_delegation *delegation; |
@@ -896,6 +897,8 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, | |||
896 | if (ret) { | 897 | if (ret) { |
897 | nfs4_stateid_copy(dst, &delegation->stateid); | 898 | nfs4_stateid_copy(dst, &delegation->stateid); |
898 | nfs_mark_delegation_referenced(delegation); | 899 | nfs_mark_delegation_referenced(delegation); |
900 | if (cred) | ||
901 | *cred = get_rpccred(delegation->cred); | ||
899 | } | 902 | } |
900 | rcu_read_unlock(); | 903 | rcu_read_unlock(); |
901 | return ret; | 904 | return ret; |
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 333063e032f0..64724d252a79 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h | |||
@@ -56,7 +56,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); | |||
56 | int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); | 56 | int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); |
57 | int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); | 57 | int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); |
58 | int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); | 58 | int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); |
59 | bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); | 59 | bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred); |
60 | 60 | ||
61 | void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); | 61 | void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); |
62 | int nfs4_have_delegation(struct inode *inode, fmode_t flags); | 62 | int nfs4_have_delegation(struct inode *inode, fmode_t flags); |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 741a92c470bb..979b3c4dee6a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -87,6 +87,7 @@ struct nfs_direct_req { | |||
87 | int mirror_count; | 87 | int mirror_count; |
88 | 88 | ||
89 | ssize_t count, /* bytes actually processed */ | 89 | ssize_t count, /* bytes actually processed */ |
90 | max_count, /* max expected count */ | ||
90 | bytes_left, /* bytes left to be sent */ | 91 | bytes_left, /* bytes left to be sent */ |
91 | io_start, /* start of IO */ | 92 | io_start, /* start of IO */ |
92 | error; /* any reported error */ | 93 | error; /* any reported error */ |
@@ -123,6 +124,8 @@ nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) | |||
123 | int i; | 124 | int i; |
124 | ssize_t count; | 125 | ssize_t count; |
125 | 126 | ||
127 | WARN_ON_ONCE(dreq->count >= dreq->max_count); | ||
128 | |||
126 | if (dreq->mirror_count == 1) { | 129 | if (dreq->mirror_count == 1) { |
127 | dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes; | 130 | dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes; |
128 | dreq->count += hdr->good_bytes; | 131 | dreq->count += hdr->good_bytes; |
@@ -275,7 +278,7 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages) | |||
275 | void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, | 278 | void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, |
276 | struct nfs_direct_req *dreq) | 279 | struct nfs_direct_req *dreq) |
277 | { | 280 | { |
278 | cinfo->lock = &dreq->inode->i_lock; | 281 | cinfo->inode = dreq->inode; |
279 | cinfo->mds = &dreq->mds_cinfo; | 282 | cinfo->mds = &dreq->mds_cinfo; |
280 | cinfo->ds = &dreq->ds_cinfo; | 283 | cinfo->ds = &dreq->ds_cinfo; |
281 | cinfo->dreq = dreq; | 284 | cinfo->dreq = dreq; |
@@ -591,7 +594,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) | |||
591 | goto out_unlock; | 594 | goto out_unlock; |
592 | 595 | ||
593 | dreq->inode = inode; | 596 | dreq->inode = inode; |
594 | dreq->bytes_left = count; | 597 | dreq->bytes_left = dreq->max_count = count; |
595 | dreq->io_start = iocb->ki_pos; | 598 | dreq->io_start = iocb->ki_pos; |
596 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | 599 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); |
597 | l_ctx = nfs_get_lock_context(dreq->ctx); | 600 | l_ctx = nfs_get_lock_context(dreq->ctx); |
@@ -630,13 +633,13 @@ nfs_direct_write_scan_commit_list(struct inode *inode, | |||
630 | struct list_head *list, | 633 | struct list_head *list, |
631 | struct nfs_commit_info *cinfo) | 634 | struct nfs_commit_info *cinfo) |
632 | { | 635 | { |
633 | spin_lock(cinfo->lock); | 636 | spin_lock(&cinfo->inode->i_lock); |
634 | #ifdef CONFIG_NFS_V4_1 | 637 | #ifdef CONFIG_NFS_V4_1 |
635 | if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) | 638 | if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) |
636 | NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); | 639 | NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); |
637 | #endif | 640 | #endif |
638 | nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); | 641 | nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); |
639 | spin_unlock(cinfo->lock); | 642 | spin_unlock(&cinfo->inode->i_lock); |
640 | } | 643 | } |
641 | 644 | ||
642 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | 645 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) |
@@ -671,13 +674,13 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | |||
671 | if (!nfs_pageio_add_request(&desc, req)) { | 674 | if (!nfs_pageio_add_request(&desc, req)) { |
672 | nfs_list_remove_request(req); | 675 | nfs_list_remove_request(req); |
673 | nfs_list_add_request(req, &failed); | 676 | nfs_list_add_request(req, &failed); |
674 | spin_lock(cinfo.lock); | 677 | spin_lock(&cinfo.inode->i_lock); |
675 | dreq->flags = 0; | 678 | dreq->flags = 0; |
676 | if (desc.pg_error < 0) | 679 | if (desc.pg_error < 0) |
677 | dreq->error = desc.pg_error; | 680 | dreq->error = desc.pg_error; |
678 | else | 681 | else |
679 | dreq->error = -EIO; | 682 | dreq->error = -EIO; |
680 | spin_unlock(cinfo.lock); | 683 | spin_unlock(&cinfo.inode->i_lock); |
681 | } | 684 | } |
682 | nfs_release_request(req); | 685 | nfs_release_request(req); |
683 | } | 686 | } |
@@ -1023,7 +1026,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) | |||
1023 | goto out_unlock; | 1026 | goto out_unlock; |
1024 | 1027 | ||
1025 | dreq->inode = inode; | 1028 | dreq->inode = inode; |
1026 | dreq->bytes_left = iov_iter_count(iter); | 1029 | dreq->bytes_left = dreq->max_count = iov_iter_count(iter); |
1027 | dreq->io_start = pos; | 1030 | dreq->io_start = pos; |
1028 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | 1031 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); |
1029 | l_ctx = nfs_get_lock_context(dreq->ctx); | 1032 | l_ctx = nfs_get_lock_context(dreq->ctx); |
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 3384dc8e6683..aa59757389dc 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
@@ -795,7 +795,7 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, | |||
795 | buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; | 795 | buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; |
796 | } | 796 | } |
797 | 797 | ||
798 | spin_lock(cinfo->lock); | 798 | spin_lock(&cinfo->inode->i_lock); |
799 | if (cinfo->ds->nbuckets >= size) | 799 | if (cinfo->ds->nbuckets >= size) |
800 | goto out; | 800 | goto out; |
801 | for (i = 0; i < cinfo->ds->nbuckets; i++) { | 801 | for (i = 0; i < cinfo->ds->nbuckets; i++) { |
@@ -811,7 +811,7 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, | |||
811 | swap(cinfo->ds->buckets, buckets); | 811 | swap(cinfo->ds->buckets, buckets); |
812 | cinfo->ds->nbuckets = size; | 812 | cinfo->ds->nbuckets = size; |
813 | out: | 813 | out: |
814 | spin_unlock(cinfo->lock); | 814 | spin_unlock(&cinfo->inode->i_lock); |
815 | kfree(buckets); | 815 | kfree(buckets); |
816 | return 0; | 816 | return 0; |
817 | } | 817 | } |
@@ -890,6 +890,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, | |||
890 | 0, | 890 | 0, |
891 | NFS4_MAX_UINT64, | 891 | NFS4_MAX_UINT64, |
892 | IOMODE_READ, | 892 | IOMODE_READ, |
893 | false, | ||
893 | GFP_KERNEL); | 894 | GFP_KERNEL); |
894 | if (IS_ERR(pgio->pg_lseg)) { | 895 | if (IS_ERR(pgio->pg_lseg)) { |
895 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | 896 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); |
@@ -915,6 +916,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
915 | 0, | 916 | 0, |
916 | NFS4_MAX_UINT64, | 917 | NFS4_MAX_UINT64, |
917 | IOMODE_RW, | 918 | IOMODE_RW, |
919 | false, | ||
918 | GFP_NOFS); | 920 | GFP_NOFS); |
919 | if (IS_ERR(pgio->pg_lseg)) { | 921 | if (IS_ERR(pgio->pg_lseg)) { |
920 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | 922 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 0cb1abd535e3..0e8018bc9880 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c | |||
@@ -26,6 +26,8 @@ | |||
26 | 26 | ||
27 | #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) | 27 | #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) |
28 | 28 | ||
29 | static struct group_info *ff_zero_group; | ||
30 | |||
29 | static struct pnfs_layout_hdr * | 31 | static struct pnfs_layout_hdr * |
30 | ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) | 32 | ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) |
31 | { | 33 | { |
@@ -53,14 +55,15 @@ ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo) | |||
53 | kfree(FF_LAYOUT_FROM_HDR(lo)); | 55 | kfree(FF_LAYOUT_FROM_HDR(lo)); |
54 | } | 56 | } |
55 | 57 | ||
56 | static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | 58 | static int decode_pnfs_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) |
57 | { | 59 | { |
58 | __be32 *p; | 60 | __be32 *p; |
59 | 61 | ||
60 | p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE); | 62 | p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE); |
61 | if (unlikely(p == NULL)) | 63 | if (unlikely(p == NULL)) |
62 | return -ENOBUFS; | 64 | return -ENOBUFS; |
63 | memcpy(stateid, p, NFS4_STATEID_SIZE); | 65 | stateid->type = NFS4_PNFS_DS_STATEID_TYPE; |
66 | memcpy(stateid->data, p, NFS4_STATEID_SIZE); | ||
64 | dprintk("%s: stateid id= [%x%x%x%x]\n", __func__, | 67 | dprintk("%s: stateid id= [%x%x%x%x]\n", __func__, |
65 | p[0], p[1], p[2], p[3]); | 68 | p[0], p[1], p[2], p[3]); |
66 | return 0; | 69 | return 0; |
@@ -211,10 +214,16 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) | |||
211 | 214 | ||
212 | static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) | 215 | static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) |
213 | { | 216 | { |
217 | struct rpc_cred *cred; | ||
218 | |||
214 | ff_layout_remove_mirror(mirror); | 219 | ff_layout_remove_mirror(mirror); |
215 | kfree(mirror->fh_versions); | 220 | kfree(mirror->fh_versions); |
216 | if (mirror->cred) | 221 | cred = rcu_access_pointer(mirror->ro_cred); |
217 | put_rpccred(mirror->cred); | 222 | if (cred) |
223 | put_rpccred(cred); | ||
224 | cred = rcu_access_pointer(mirror->rw_cred); | ||
225 | if (cred) | ||
226 | put_rpccred(cred); | ||
218 | nfs4_ff_layout_put_deviceid(mirror->mirror_ds); | 227 | nfs4_ff_layout_put_deviceid(mirror->mirror_ds); |
219 | kfree(mirror); | 228 | kfree(mirror); |
220 | } | 229 | } |
@@ -290,6 +299,8 @@ ff_lseg_merge(struct pnfs_layout_segment *new, | |||
290 | { | 299 | { |
291 | u64 new_end, old_end; | 300 | u64 new_end, old_end; |
292 | 301 | ||
302 | if (test_bit(NFS_LSEG_LAYOUTRETURN, &old->pls_flags)) | ||
303 | return false; | ||
293 | if (new->pls_range.iomode != old->pls_range.iomode) | 304 | if (new->pls_range.iomode != old->pls_range.iomode) |
294 | return false; | 305 | return false; |
295 | old_end = pnfs_calc_offset_end(old->pls_range.offset, | 306 | old_end = pnfs_calc_offset_end(old->pls_range.offset, |
@@ -310,8 +321,6 @@ ff_lseg_merge(struct pnfs_layout_segment *new, | |||
310 | new_end); | 321 | new_end); |
311 | if (test_bit(NFS_LSEG_ROC, &old->pls_flags)) | 322 | if (test_bit(NFS_LSEG_ROC, &old->pls_flags)) |
312 | set_bit(NFS_LSEG_ROC, &new->pls_flags); | 323 | set_bit(NFS_LSEG_ROC, &new->pls_flags); |
313 | if (test_bit(NFS_LSEG_LAYOUTRETURN, &old->pls_flags)) | ||
314 | set_bit(NFS_LSEG_LAYOUTRETURN, &new->pls_flags); | ||
315 | return true; | 324 | return true; |
316 | } | 325 | } |
317 | 326 | ||
@@ -407,8 +416,9 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, | |||
407 | struct nfs4_ff_layout_mirror *mirror; | 416 | struct nfs4_ff_layout_mirror *mirror; |
408 | struct nfs4_deviceid devid; | 417 | struct nfs4_deviceid devid; |
409 | struct nfs4_deviceid_node *idnode; | 418 | struct nfs4_deviceid_node *idnode; |
410 | u32 ds_count; | 419 | struct auth_cred acred = { .group_info = ff_zero_group }; |
411 | u32 fh_count; | 420 | struct rpc_cred __rcu *cred; |
421 | u32 ds_count, fh_count, id; | ||
412 | int j; | 422 | int j; |
413 | 423 | ||
414 | rc = -EIO; | 424 | rc = -EIO; |
@@ -456,7 +466,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, | |||
456 | fls->mirror_array[i]->efficiency = be32_to_cpup(p); | 466 | fls->mirror_array[i]->efficiency = be32_to_cpup(p); |
457 | 467 | ||
458 | /* stateid */ | 468 | /* stateid */ |
459 | rc = decode_stateid(&stream, &fls->mirror_array[i]->stateid); | 469 | rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->stateid); |
460 | if (rc) | 470 | if (rc) |
461 | goto out_err_free; | 471 | goto out_err_free; |
462 | 472 | ||
@@ -484,24 +494,49 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, | |||
484 | fls->mirror_array[i]->fh_versions_cnt = fh_count; | 494 | fls->mirror_array[i]->fh_versions_cnt = fh_count; |
485 | 495 | ||
486 | /* user */ | 496 | /* user */ |
487 | rc = decode_name(&stream, &fls->mirror_array[i]->uid); | 497 | rc = decode_name(&stream, &id); |
488 | if (rc) | 498 | if (rc) |
489 | goto out_err_free; | 499 | goto out_err_free; |
490 | 500 | ||
501 | acred.uid = make_kuid(&init_user_ns, id); | ||
502 | |||
491 | /* group */ | 503 | /* group */ |
492 | rc = decode_name(&stream, &fls->mirror_array[i]->gid); | 504 | rc = decode_name(&stream, &id); |
493 | if (rc) | 505 | if (rc) |
494 | goto out_err_free; | 506 | goto out_err_free; |
495 | 507 | ||
508 | acred.gid = make_kgid(&init_user_ns, id); | ||
509 | |||
510 | /* find the cred for it */ | ||
511 | rcu_assign_pointer(cred, rpc_lookup_generic_cred(&acred, 0, gfp_flags)); | ||
512 | if (IS_ERR(cred)) { | ||
513 | rc = PTR_ERR(cred); | ||
514 | goto out_err_free; | ||
515 | } | ||
516 | |||
517 | if (lgr->range.iomode == IOMODE_READ) | ||
518 | rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); | ||
519 | else | ||
520 | rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred); | ||
521 | |||
496 | mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]); | 522 | mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]); |
497 | if (mirror != fls->mirror_array[i]) { | 523 | if (mirror != fls->mirror_array[i]) { |
524 | /* swap cred ptrs so free_mirror will clean up old */ | ||
525 | if (lgr->range.iomode == IOMODE_READ) { | ||
526 | cred = xchg(&mirror->ro_cred, cred); | ||
527 | rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); | ||
528 | } else { | ||
529 | cred = xchg(&mirror->rw_cred, cred); | ||
530 | rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred); | ||
531 | } | ||
498 | ff_layout_free_mirror(fls->mirror_array[i]); | 532 | ff_layout_free_mirror(fls->mirror_array[i]); |
499 | fls->mirror_array[i] = mirror; | 533 | fls->mirror_array[i] = mirror; |
500 | } | 534 | } |
501 | 535 | ||
502 | dprintk("%s: uid %d gid %d\n", __func__, | 536 | dprintk("%s: iomode %s uid %u gid %u\n", __func__, |
503 | fls->mirror_array[i]->uid, | 537 | lgr->range.iomode == IOMODE_READ ? "READ" : "RW", |
504 | fls->mirror_array[i]->gid); | 538 | from_kuid(&init_user_ns, acred.uid), |
539 | from_kgid(&init_user_ns, acred.gid)); | ||
505 | } | 540 | } |
506 | 541 | ||
507 | p = xdr_inline_decode(&stream, 4); | 542 | p = xdr_inline_decode(&stream, 4); |
@@ -745,7 +780,7 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, | |||
745 | else { | 780 | else { |
746 | int i; | 781 | int i; |
747 | 782 | ||
748 | spin_lock(cinfo->lock); | 783 | spin_lock(&cinfo->inode->i_lock); |
749 | if (cinfo->ds->nbuckets != 0) | 784 | if (cinfo->ds->nbuckets != 0) |
750 | kfree(buckets); | 785 | kfree(buckets); |
751 | else { | 786 | else { |
@@ -759,7 +794,7 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, | |||
759 | NFS_INVALID_STABLE_HOW; | 794 | NFS_INVALID_STABLE_HOW; |
760 | } | 795 | } |
761 | } | 796 | } |
762 | spin_unlock(cinfo->lock); | 797 | spin_unlock(&cinfo->inode->i_lock); |
763 | return 0; | 798 | return 0; |
764 | } | 799 | } |
765 | } | 800 | } |
@@ -786,6 +821,36 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, | |||
786 | } | 821 | } |
787 | 822 | ||
788 | static void | 823 | static void |
824 | ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio, | ||
825 | struct nfs_page *req, | ||
826 | bool strict_iomode) | ||
827 | { | ||
828 | retry_strict: | ||
829 | pnfs_put_lseg(pgio->pg_lseg); | ||
830 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
831 | req->wb_context, | ||
832 | 0, | ||
833 | NFS4_MAX_UINT64, | ||
834 | IOMODE_READ, | ||
835 | strict_iomode, | ||
836 | GFP_KERNEL); | ||
837 | if (IS_ERR(pgio->pg_lseg)) { | ||
838 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | ||
839 | pgio->pg_lseg = NULL; | ||
840 | } | ||
841 | |||
842 | /* If we don't have checking, do get a IOMODE_RW | ||
843 | * segment, and the server wants to avoid READs | ||
844 | * there, then retry! | ||
845 | */ | ||
846 | if (pgio->pg_lseg && !strict_iomode && | ||
847 | ff_layout_avoid_read_on_rw(pgio->pg_lseg)) { | ||
848 | strict_iomode = true; | ||
849 | goto retry_strict; | ||
850 | } | ||
851 | } | ||
852 | |||
853 | static void | ||
789 | ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, | 854 | ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, |
790 | struct nfs_page *req) | 855 | struct nfs_page *req) |
791 | { | 856 | { |
@@ -795,26 +860,23 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, | |||
795 | int ds_idx; | 860 | int ds_idx; |
796 | 861 | ||
797 | /* Use full layout for now */ | 862 | /* Use full layout for now */ |
798 | if (!pgio->pg_lseg) { | 863 | if (!pgio->pg_lseg) |
799 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 864 | ff_layout_pg_get_read(pgio, req, false); |
800 | req->wb_context, | 865 | else if (ff_layout_avoid_read_on_rw(pgio->pg_lseg)) |
801 | 0, | 866 | ff_layout_pg_get_read(pgio, req, true); |
802 | NFS4_MAX_UINT64, | 867 | |
803 | IOMODE_READ, | ||
804 | GFP_KERNEL); | ||
805 | if (IS_ERR(pgio->pg_lseg)) { | ||
806 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | ||
807 | pgio->pg_lseg = NULL; | ||
808 | return; | ||
809 | } | ||
810 | } | ||
811 | /* If no lseg, fall back to read through mds */ | 868 | /* If no lseg, fall back to read through mds */ |
812 | if (pgio->pg_lseg == NULL) | 869 | if (pgio->pg_lseg == NULL) |
813 | goto out_mds; | 870 | goto out_mds; |
814 | 871 | ||
815 | ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); | 872 | ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); |
816 | if (!ds) | 873 | if (!ds) { |
817 | goto out_mds; | 874 | if (ff_layout_no_fallback_to_mds(pgio->pg_lseg)) |
875 | goto out_pnfs; | ||
876 | else | ||
877 | goto out_mds; | ||
878 | } | ||
879 | |||
818 | mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); | 880 | mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); |
819 | 881 | ||
820 | pgio->pg_mirror_idx = ds_idx; | 882 | pgio->pg_mirror_idx = ds_idx; |
@@ -828,6 +890,12 @@ out_mds: | |||
828 | pnfs_put_lseg(pgio->pg_lseg); | 890 | pnfs_put_lseg(pgio->pg_lseg); |
829 | pgio->pg_lseg = NULL; | 891 | pgio->pg_lseg = NULL; |
830 | nfs_pageio_reset_read_mds(pgio); | 892 | nfs_pageio_reset_read_mds(pgio); |
893 | return; | ||
894 | |||
895 | out_pnfs: | ||
896 | pnfs_set_lo_fail(pgio->pg_lseg); | ||
897 | pnfs_put_lseg(pgio->pg_lseg); | ||
898 | pgio->pg_lseg = NULL; | ||
831 | } | 899 | } |
832 | 900 | ||
833 | static void | 901 | static void |
@@ -847,6 +915,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
847 | 0, | 915 | 0, |
848 | NFS4_MAX_UINT64, | 916 | NFS4_MAX_UINT64, |
849 | IOMODE_RW, | 917 | IOMODE_RW, |
918 | false, | ||
850 | GFP_NOFS); | 919 | GFP_NOFS); |
851 | if (IS_ERR(pgio->pg_lseg)) { | 920 | if (IS_ERR(pgio->pg_lseg)) { |
852 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | 921 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); |
@@ -870,8 +939,12 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
870 | 939 | ||
871 | for (i = 0; i < pgio->pg_mirror_count; i++) { | 940 | for (i = 0; i < pgio->pg_mirror_count; i++) { |
872 | ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); | 941 | ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); |
873 | if (!ds) | 942 | if (!ds) { |
874 | goto out_mds; | 943 | if (ff_layout_no_fallback_to_mds(pgio->pg_lseg)) |
944 | goto out_pnfs; | ||
945 | else | ||
946 | goto out_mds; | ||
947 | } | ||
875 | pgm = &pgio->pg_mirrors[i]; | 948 | pgm = &pgio->pg_mirrors[i]; |
876 | mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); | 949 | mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); |
877 | pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; | 950 | pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; |
@@ -883,6 +956,12 @@ out_mds: | |||
883 | pnfs_put_lseg(pgio->pg_lseg); | 956 | pnfs_put_lseg(pgio->pg_lseg); |
884 | pgio->pg_lseg = NULL; | 957 | pgio->pg_lseg = NULL; |
885 | nfs_pageio_reset_write_mds(pgio); | 958 | nfs_pageio_reset_write_mds(pgio); |
959 | return; | ||
960 | |||
961 | out_pnfs: | ||
962 | pnfs_set_lo_fail(pgio->pg_lseg); | ||
963 | pnfs_put_lseg(pgio->pg_lseg); | ||
964 | pgio->pg_lseg = NULL; | ||
886 | } | 965 | } |
887 | 966 | ||
888 | static unsigned int | 967 | static unsigned int |
@@ -895,6 +974,7 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, | |||
895 | 0, | 974 | 0, |
896 | NFS4_MAX_UINT64, | 975 | NFS4_MAX_UINT64, |
897 | IOMODE_RW, | 976 | IOMODE_RW, |
977 | false, | ||
898 | GFP_NOFS); | 978 | GFP_NOFS); |
899 | if (IS_ERR(pgio->pg_lseg)) { | 979 | if (IS_ERR(pgio->pg_lseg)) { |
900 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | 980 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); |
@@ -1067,8 +1147,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, | |||
1067 | rpc_wake_up(&tbl->slot_tbl_waitq); | 1147 | rpc_wake_up(&tbl->slot_tbl_waitq); |
1068 | /* fall through */ | 1148 | /* fall through */ |
1069 | default: | 1149 | default: |
1070 | if (ff_layout_no_fallback_to_mds(lseg) || | 1150 | if (ff_layout_avoid_mds_available_ds(lseg)) |
1071 | ff_layout_has_available_ds(lseg)) | ||
1072 | return -NFS4ERR_RESET_TO_PNFS; | 1151 | return -NFS4ERR_RESET_TO_PNFS; |
1073 | reset: | 1152 | reset: |
1074 | dprintk("%s Retry through MDS. Error %d\n", __func__, | 1153 | dprintk("%s Retry through MDS. Error %d\n", __func__, |
@@ -1215,8 +1294,6 @@ static int ff_layout_read_done_cb(struct rpc_task *task, | |||
1215 | hdr->pgio_mirror_idx + 1, | 1294 | hdr->pgio_mirror_idx + 1, |
1216 | &hdr->pgio_mirror_idx)) | 1295 | &hdr->pgio_mirror_idx)) |
1217 | goto out_eagain; | 1296 | goto out_eagain; |
1218 | set_bit(NFS_LAYOUT_RETURN_REQUESTED, | ||
1219 | &hdr->lseg->pls_layout->plh_flags); | ||
1220 | pnfs_read_resend_pnfs(hdr); | 1297 | pnfs_read_resend_pnfs(hdr); |
1221 | return task->tk_status; | 1298 | return task->tk_status; |
1222 | case -NFS4ERR_RESET_TO_MDS: | 1299 | case -NFS4ERR_RESET_TO_MDS: |
@@ -1260,7 +1337,7 @@ ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) | |||
1260 | } | 1337 | } |
1261 | 1338 | ||
1262 | static bool | 1339 | static bool |
1263 | ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) | 1340 | ff_layout_device_unavailable(struct pnfs_layout_segment *lseg, int idx) |
1264 | { | 1341 | { |
1265 | /* No mirroring for now */ | 1342 | /* No mirroring for now */ |
1266 | struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx); | 1343 | struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx); |
@@ -1297,16 +1374,10 @@ static int ff_layout_read_prepare_common(struct rpc_task *task, | |||
1297 | rpc_exit(task, -EIO); | 1374 | rpc_exit(task, -EIO); |
1298 | return -EIO; | 1375 | return -EIO; |
1299 | } | 1376 | } |
1300 | if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { | 1377 | if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) { |
1301 | dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); | 1378 | rpc_exit(task, -EHOSTDOWN); |
1302 | if (ff_layout_has_available_ds(hdr->lseg)) | ||
1303 | pnfs_read_resend_pnfs(hdr); | ||
1304 | else | ||
1305 | ff_layout_reset_read(hdr); | ||
1306 | rpc_exit(task, 0); | ||
1307 | return -EAGAIN; | 1379 | return -EAGAIN; |
1308 | } | 1380 | } |
1309 | hdr->pgio_done_cb = ff_layout_read_done_cb; | ||
1310 | 1381 | ||
1311 | ff_layout_read_record_layoutstats_start(task, hdr); | 1382 | ff_layout_read_record_layoutstats_start(task, hdr); |
1312 | return 0; | 1383 | return 0; |
@@ -1496,14 +1567,8 @@ static int ff_layout_write_prepare_common(struct rpc_task *task, | |||
1496 | return -EIO; | 1567 | return -EIO; |
1497 | } | 1568 | } |
1498 | 1569 | ||
1499 | if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { | 1570 | if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) { |
1500 | bool retry_pnfs; | 1571 | rpc_exit(task, -EHOSTDOWN); |
1501 | |||
1502 | retry_pnfs = ff_layout_has_available_ds(hdr->lseg); | ||
1503 | dprintk("%s task %u reset io to %s\n", __func__, | ||
1504 | task->tk_pid, retry_pnfs ? "pNFS" : "MDS"); | ||
1505 | ff_layout_reset_write(hdr, retry_pnfs); | ||
1506 | rpc_exit(task, 0); | ||
1507 | return -EAGAIN; | 1572 | return -EAGAIN; |
1508 | } | 1573 | } |
1509 | 1574 | ||
@@ -1712,7 +1777,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) | |||
1712 | goto out_failed; | 1777 | goto out_failed; |
1713 | 1778 | ||
1714 | ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); | 1779 | ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); |
1715 | if (IS_ERR(ds_cred)) | 1780 | if (!ds_cred) |
1716 | goto out_failed; | 1781 | goto out_failed; |
1717 | 1782 | ||
1718 | vers = nfs4_ff_layout_ds_version(lseg, idx); | 1783 | vers = nfs4_ff_layout_ds_version(lseg, idx); |
@@ -1720,6 +1785,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) | |||
1720 | dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, | 1785 | dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, |
1721 | ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); | 1786 | ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); |
1722 | 1787 | ||
1788 | hdr->pgio_done_cb = ff_layout_read_done_cb; | ||
1723 | atomic_inc(&ds->ds_clp->cl_count); | 1789 | atomic_inc(&ds->ds_clp->cl_count); |
1724 | hdr->ds_clp = ds->ds_clp; | 1790 | hdr->ds_clp = ds->ds_clp; |
1725 | fh = nfs4_ff_layout_select_ds_fh(lseg, idx); | 1791 | fh = nfs4_ff_layout_select_ds_fh(lseg, idx); |
@@ -1737,11 +1803,11 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) | |||
1737 | vers == 3 ? &ff_layout_read_call_ops_v3 : | 1803 | vers == 3 ? &ff_layout_read_call_ops_v3 : |
1738 | &ff_layout_read_call_ops_v4, | 1804 | &ff_layout_read_call_ops_v4, |
1739 | 0, RPC_TASK_SOFTCONN); | 1805 | 0, RPC_TASK_SOFTCONN); |
1740 | 1806 | put_rpccred(ds_cred); | |
1741 | return PNFS_ATTEMPTED; | 1807 | return PNFS_ATTEMPTED; |
1742 | 1808 | ||
1743 | out_failed: | 1809 | out_failed: |
1744 | if (ff_layout_has_available_ds(lseg)) | 1810 | if (ff_layout_avoid_mds_available_ds(lseg)) |
1745 | return PNFS_TRY_AGAIN; | 1811 | return PNFS_TRY_AGAIN; |
1746 | return PNFS_NOT_ATTEMPTED; | 1812 | return PNFS_NOT_ATTEMPTED; |
1747 | } | 1813 | } |
@@ -1769,7 +1835,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) | |||
1769 | return PNFS_NOT_ATTEMPTED; | 1835 | return PNFS_NOT_ATTEMPTED; |
1770 | 1836 | ||
1771 | ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); | 1837 | ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); |
1772 | if (IS_ERR(ds_cred)) | 1838 | if (!ds_cred) |
1773 | return PNFS_NOT_ATTEMPTED; | 1839 | return PNFS_NOT_ATTEMPTED; |
1774 | 1840 | ||
1775 | vers = nfs4_ff_layout_ds_version(lseg, idx); | 1841 | vers = nfs4_ff_layout_ds_version(lseg, idx); |
@@ -1798,6 +1864,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) | |||
1798 | vers == 3 ? &ff_layout_write_call_ops_v3 : | 1864 | vers == 3 ? &ff_layout_write_call_ops_v3 : |
1799 | &ff_layout_write_call_ops_v4, | 1865 | &ff_layout_write_call_ops_v4, |
1800 | sync, RPC_TASK_SOFTCONN); | 1866 | sync, RPC_TASK_SOFTCONN); |
1867 | put_rpccred(ds_cred); | ||
1801 | return PNFS_ATTEMPTED; | 1868 | return PNFS_ATTEMPTED; |
1802 | } | 1869 | } |
1803 | 1870 | ||
@@ -1824,7 +1891,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) | |||
1824 | struct rpc_clnt *ds_clnt; | 1891 | struct rpc_clnt *ds_clnt; |
1825 | struct rpc_cred *ds_cred; | 1892 | struct rpc_cred *ds_cred; |
1826 | u32 idx; | 1893 | u32 idx; |
1827 | int vers; | 1894 | int vers, ret; |
1828 | struct nfs_fh *fh; | 1895 | struct nfs_fh *fh; |
1829 | 1896 | ||
1830 | idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); | 1897 | idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); |
@@ -1838,7 +1905,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) | |||
1838 | goto out_err; | 1905 | goto out_err; |
1839 | 1906 | ||
1840 | ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred); | 1907 | ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred); |
1841 | if (IS_ERR(ds_cred)) | 1908 | if (!ds_cred) |
1842 | goto out_err; | 1909 | goto out_err; |
1843 | 1910 | ||
1844 | vers = nfs4_ff_layout_ds_version(lseg, idx); | 1911 | vers = nfs4_ff_layout_ds_version(lseg, idx); |
@@ -1854,10 +1921,12 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) | |||
1854 | if (fh) | 1921 | if (fh) |
1855 | data->args.fh = fh; | 1922 | data->args.fh = fh; |
1856 | 1923 | ||
1857 | return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, | 1924 | ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, |
1858 | vers == 3 ? &ff_layout_commit_call_ops_v3 : | 1925 | vers == 3 ? &ff_layout_commit_call_ops_v3 : |
1859 | &ff_layout_commit_call_ops_v4, | 1926 | &ff_layout_commit_call_ops_v4, |
1860 | how, RPC_TASK_SOFTCONN); | 1927 | how, RPC_TASK_SOFTCONN); |
1928 | put_rpccred(ds_cred); | ||
1929 | return ret; | ||
1861 | out_err: | 1930 | out_err: |
1862 | pnfs_generic_prepare_to_resend_writes(data); | 1931 | pnfs_generic_prepare_to_resend_writes(data); |
1863 | pnfs_generic_commit_release(data); | 1932 | pnfs_generic_commit_release(data); |
@@ -2223,6 +2292,11 @@ static int __init nfs4flexfilelayout_init(void) | |||
2223 | { | 2292 | { |
2224 | printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n", | 2293 | printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n", |
2225 | __func__); | 2294 | __func__); |
2295 | if (!ff_zero_group) { | ||
2296 | ff_zero_group = groups_alloc(0); | ||
2297 | if (!ff_zero_group) | ||
2298 | return -ENOMEM; | ||
2299 | } | ||
2226 | return pnfs_register_layoutdriver(&flexfilelayout_type); | 2300 | return pnfs_register_layoutdriver(&flexfilelayout_type); |
2227 | } | 2301 | } |
2228 | 2302 | ||
@@ -2231,6 +2305,10 @@ static void __exit nfs4flexfilelayout_exit(void) | |||
2231 | printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n", | 2305 | printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n", |
2232 | __func__); | 2306 | __func__); |
2233 | pnfs_unregister_layoutdriver(&flexfilelayout_type); | 2307 | pnfs_unregister_layoutdriver(&flexfilelayout_type); |
2308 | if (ff_zero_group) { | ||
2309 | put_group_info(ff_zero_group); | ||
2310 | ff_zero_group = NULL; | ||
2311 | } | ||
2234 | } | 2312 | } |
2235 | 2313 | ||
2236 | MODULE_ALIAS("nfs-layouttype4-4"); | 2314 | MODULE_ALIAS("nfs-layouttype4-4"); |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index dd353bb7dc0a..1bcdb15d0c41 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h | |||
@@ -10,7 +10,8 @@ | |||
10 | #define FS_NFS_NFS4FLEXFILELAYOUT_H | 10 | #define FS_NFS_NFS4FLEXFILELAYOUT_H |
11 | 11 | ||
12 | #define FF_FLAGS_NO_LAYOUTCOMMIT 1 | 12 | #define FF_FLAGS_NO_LAYOUTCOMMIT 1 |
13 | #define FF_FLAGS_NO_IO_THRU_MDS 2 | 13 | #define FF_FLAGS_NO_IO_THRU_MDS 2 |
14 | #define FF_FLAGS_NO_READ_IO 4 | ||
14 | 15 | ||
15 | #include "../pnfs.h" | 16 | #include "../pnfs.h" |
16 | 17 | ||
@@ -76,9 +77,8 @@ struct nfs4_ff_layout_mirror { | |||
76 | u32 fh_versions_cnt; | 77 | u32 fh_versions_cnt; |
77 | struct nfs_fh *fh_versions; | 78 | struct nfs_fh *fh_versions; |
78 | nfs4_stateid stateid; | 79 | nfs4_stateid stateid; |
79 | u32 uid; | 80 | struct rpc_cred __rcu *ro_cred; |
80 | u32 gid; | 81 | struct rpc_cred __rcu *rw_cred; |
81 | struct rpc_cred *cred; | ||
82 | atomic_t ref; | 82 | atomic_t ref; |
83 | spinlock_t lock; | 83 | spinlock_t lock; |
84 | struct nfs4_ff_layoutstat read_stat; | 84 | struct nfs4_ff_layoutstat read_stat; |
@@ -154,6 +154,12 @@ ff_layout_no_fallback_to_mds(struct pnfs_layout_segment *lseg) | |||
154 | } | 154 | } |
155 | 155 | ||
156 | static inline bool | 156 | static inline bool |
157 | ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg) | ||
158 | { | ||
159 | return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_READ_IO; | ||
160 | } | ||
161 | |||
162 | static inline bool | ||
157 | ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) | 163 | ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) |
158 | { | 164 | { |
159 | return nfs4_test_deviceid_unavailable(node); | 165 | return nfs4_test_deviceid_unavailable(node); |
@@ -192,4 +198,7 @@ nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, | |||
192 | struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, | 198 | struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, |
193 | u32 ds_idx, struct rpc_cred *mdscred); | 199 | u32 ds_idx, struct rpc_cred *mdscred); |
194 | bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); | 200 | bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); |
201 | bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg); | ||
202 | bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg); | ||
203 | |||
195 | #endif /* FS_NFS_NFS4FLEXFILELAYOUT_H */ | 204 | #endif /* FS_NFS_NFS4FLEXFILELAYOUT_H */ |
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index add0e5a70bd6..0aa36be71fce 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c | |||
@@ -228,7 +228,8 @@ ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1, | |||
228 | return e1->opnum < e2->opnum ? -1 : 1; | 228 | return e1->opnum < e2->opnum ? -1 : 1; |
229 | if (e1->status != e2->status) | 229 | if (e1->status != e2->status) |
230 | return e1->status < e2->status ? -1 : 1; | 230 | return e1->status < e2->status ? -1 : 1; |
231 | ret = memcmp(&e1->stateid, &e2->stateid, sizeof(e1->stateid)); | 231 | ret = memcmp(e1->stateid.data, e2->stateid.data, |
232 | sizeof(e1->stateid.data)); | ||
232 | if (ret != 0) | 233 | if (ret != 0) |
233 | return ret; | 234 | return ret; |
234 | ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid)); | 235 | ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid)); |
@@ -302,40 +303,26 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, | |||
302 | return 0; | 303 | return 0; |
303 | } | 304 | } |
304 | 305 | ||
305 | /* currently we only support AUTH_NONE and AUTH_SYS */ | 306 | static struct rpc_cred * |
306 | static rpc_authflavor_t | 307 | ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode) |
307 | nfs4_ff_layout_choose_authflavor(struct nfs4_ff_layout_mirror *mirror) | ||
308 | { | 308 | { |
309 | if (mirror->uid == (u32)-1) | 309 | struct rpc_cred *cred, __rcu **pcred; |
310 | return RPC_AUTH_NULL; | ||
311 | return RPC_AUTH_UNIX; | ||
312 | } | ||
313 | 310 | ||
314 | /* fetch cred for NFSv3 DS */ | 311 | if (iomode == IOMODE_READ) |
315 | static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, | 312 | pcred = &mirror->ro_cred; |
316 | struct nfs4_pnfs_ds *ds) | 313 | else |
317 | { | 314 | pcred = &mirror->rw_cred; |
318 | if (ds->ds_clp && !mirror->cred && | 315 | |
319 | mirror->mirror_ds->ds_versions[0].version == 3) { | 316 | rcu_read_lock(); |
320 | struct rpc_auth *auth = ds->ds_clp->cl_rpcclient->cl_auth; | 317 | do { |
321 | struct rpc_cred *cred; | 318 | cred = rcu_dereference(*pcred); |
322 | struct auth_cred acred = { | 319 | if (!cred) |
323 | .uid = make_kuid(&init_user_ns, mirror->uid), | 320 | break; |
324 | .gid = make_kgid(&init_user_ns, mirror->gid), | 321 | |
325 | }; | 322 | cred = get_rpccred_rcu(cred); |
326 | 323 | } while(!cred); | |
327 | /* AUTH_NULL ignores acred */ | 324 | rcu_read_unlock(); |
328 | cred = auth->au_ops->lookup_cred(auth, &acred, 0); | 325 | return cred; |
329 | if (IS_ERR(cred)) { | ||
330 | dprintk("%s: lookup_cred failed with %ld\n", | ||
331 | __func__, PTR_ERR(cred)); | ||
332 | return PTR_ERR(cred); | ||
333 | } else { | ||
334 | if (cmpxchg(&mirror->cred, NULL, cred)) | ||
335 | put_rpccred(cred); | ||
336 | } | ||
337 | } | ||
338 | return 0; | ||
339 | } | 326 | } |
340 | 327 | ||
341 | struct nfs_fh * | 328 | struct nfs_fh * |
@@ -356,7 +343,23 @@ out: | |||
356 | return fh; | 343 | return fh; |
357 | } | 344 | } |
358 | 345 | ||
359 | /* Upon return, either ds is connected, or ds is NULL */ | 346 | /** |
347 | * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call | ||
348 | * @lseg: the layout segment we're operating on | ||
349 | * @ds_idx: index of the DS to use | ||
350 | * @fail_return: return layout on connect failure? | ||
351 | * | ||
352 | * Try to prepare a DS connection to accept an RPC call. This involves | ||
353 | * selecting a mirror to use and connecting the client to it if it's not | ||
354 | * already connected. | ||
355 | * | ||
356 | * Since we only need a single functioning mirror to satisfy a read, we don't | ||
357 | * want to return the layout if there is one. For writes though, any down | ||
358 | * mirror should result in a LAYOUTRETURN. @fail_return is how we distinguish | ||
359 | * between the two cases. | ||
360 | * | ||
361 | * Returns a pointer to a connected DS object on success or NULL on failure. | ||
362 | */ | ||
360 | struct nfs4_pnfs_ds * | 363 | struct nfs4_pnfs_ds * |
361 | nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | 364 | nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, |
362 | bool fail_return) | 365 | bool fail_return) |
@@ -367,7 +370,6 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |||
367 | struct inode *ino = lseg->pls_layout->plh_inode; | 370 | struct inode *ino = lseg->pls_layout->plh_inode; |
368 | struct nfs_server *s = NFS_SERVER(ino); | 371 | struct nfs_server *s = NFS_SERVER(ino); |
369 | unsigned int max_payload; | 372 | unsigned int max_payload; |
370 | rpc_authflavor_t flavor; | ||
371 | 373 | ||
372 | if (!ff_layout_mirror_valid(lseg, mirror)) { | 374 | if (!ff_layout_mirror_valid(lseg, mirror)) { |
373 | pr_err_ratelimited("NFS: %s: No data server for offset index %d\n", | 375 | pr_err_ratelimited("NFS: %s: No data server for offset index %d\n", |
@@ -383,9 +385,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |||
383 | /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ | 385 | /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ |
384 | smp_rmb(); | 386 | smp_rmb(); |
385 | if (ds->ds_clp) | 387 | if (ds->ds_clp) |
386 | goto out_update_creds; | 388 | goto out; |
387 | |||
388 | flavor = nfs4_ff_layout_choose_authflavor(mirror); | ||
389 | 389 | ||
390 | /* FIXME: For now we assume the server sent only one version of NFS | 390 | /* FIXME: For now we assume the server sent only one version of NFS |
391 | * to use for the DS. | 391 | * to use for the DS. |
@@ -394,7 +394,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |||
394 | dataserver_retrans, | 394 | dataserver_retrans, |
395 | mirror->mirror_ds->ds_versions[0].version, | 395 | mirror->mirror_ds->ds_versions[0].version, |
396 | mirror->mirror_ds->ds_versions[0].minor_version, | 396 | mirror->mirror_ds->ds_versions[0].minor_version, |
397 | flavor); | 397 | RPC_AUTH_UNIX); |
398 | 398 | ||
399 | /* connect success, check rsize/wsize limit */ | 399 | /* connect success, check rsize/wsize limit */ |
400 | if (ds->ds_clp) { | 400 | if (ds->ds_clp) { |
@@ -410,20 +410,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |||
410 | mirror, lseg->pls_range.offset, | 410 | mirror, lseg->pls_range.offset, |
411 | lseg->pls_range.length, NFS4ERR_NXIO, | 411 | lseg->pls_range.length, NFS4ERR_NXIO, |
412 | OP_ILLEGAL, GFP_NOIO); | 412 | OP_ILLEGAL, GFP_NOIO); |
413 | if (!fail_return) { | 413 | if (fail_return || !ff_layout_has_available_ds(lseg)) |
414 | if (ff_layout_has_available_ds(lseg)) | ||
415 | set_bit(NFS_LAYOUT_RETURN_REQUESTED, | ||
416 | &lseg->pls_layout->plh_flags); | ||
417 | else | ||
418 | pnfs_error_mark_layout_for_return(ino, lseg); | ||
419 | } else | ||
420 | pnfs_error_mark_layout_for_return(ino, lseg); | 414 | pnfs_error_mark_layout_for_return(ino, lseg); |
421 | ds = NULL; | 415 | ds = NULL; |
422 | goto out; | ||
423 | } | 416 | } |
424 | out_update_creds: | ||
425 | if (ff_layout_update_mirror_cred(mirror, ds)) | ||
426 | ds = NULL; | ||
427 | out: | 417 | out: |
428 | return ds; | 418 | return ds; |
429 | } | 419 | } |
@@ -433,16 +423,15 @@ ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, | |||
433 | struct rpc_cred *mdscred) | 423 | struct rpc_cred *mdscred) |
434 | { | 424 | { |
435 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | 425 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); |
436 | struct rpc_cred *cred = ERR_PTR(-EINVAL); | 426 | struct rpc_cred *cred; |
437 | |||
438 | if (!nfs4_ff_layout_prepare_ds(lseg, ds_idx, true)) | ||
439 | goto out; | ||
440 | 427 | ||
441 | if (mirror && mirror->cred) | 428 | if (mirror) { |
442 | cred = mirror->cred; | 429 | cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode); |
443 | else | 430 | if (!cred) |
444 | cred = mdscred; | 431 | cred = get_rpccred(mdscred); |
445 | out: | 432 | } else { |
433 | cred = get_rpccred(mdscred); | ||
434 | } | ||
446 | return cred; | 435 | return cred; |
447 | } | 436 | } |
448 | 437 | ||
@@ -562,6 +551,18 @@ bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) | |||
562 | return ff_rw_layout_has_available_ds(lseg); | 551 | return ff_rw_layout_has_available_ds(lseg); |
563 | } | 552 | } |
564 | 553 | ||
554 | bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg) | ||
555 | { | ||
556 | return ff_layout_no_fallback_to_mds(lseg) || | ||
557 | ff_layout_has_available_ds(lseg); | ||
558 | } | ||
559 | |||
560 | bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg) | ||
561 | { | ||
562 | return lseg->pls_range.iomode == IOMODE_RW && | ||
563 | ff_layout_no_read_on_rw(lseg); | ||
564 | } | ||
565 | |||
565 | module_param(dataserver_retrans, uint, 0644); | 566 | module_param(dataserver_retrans, uint, 0644); |
566 | MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " | 567 | MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " |
567 | "retries a request before it attempts further " | 568 | "retries a request before it attempts further " |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index f1d1d2c472e9..5154fa65a2f2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -477,6 +477,7 @@ void nfs_mark_request_commit(struct nfs_page *req, | |||
477 | u32 ds_commit_idx); | 477 | u32 ds_commit_idx); |
478 | int nfs_write_need_commit(struct nfs_pgio_header *); | 478 | int nfs_write_need_commit(struct nfs_pgio_header *); |
479 | void nfs_writeback_update_inode(struct nfs_pgio_header *hdr); | 479 | void nfs_writeback_update_inode(struct nfs_pgio_header *hdr); |
480 | int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf); | ||
480 | int nfs_generic_commit_list(struct inode *inode, struct list_head *head, | 481 | int nfs_generic_commit_list(struct inode *inode, struct list_head *head, |
481 | int how, struct nfs_commit_info *cinfo); | 482 | int how, struct nfs_commit_info *cinfo); |
482 | void nfs_retry_commit(struct list_head *page_list, | 483 | void nfs_retry_commit(struct list_head *page_list, |
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index b587ccd31083..b6cd15314bab 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h | |||
@@ -13,6 +13,7 @@ | |||
13 | 13 | ||
14 | /* nfs4.2proc.c */ | 14 | /* nfs4.2proc.c */ |
15 | int nfs42_proc_allocate(struct file *, loff_t, loff_t); | 15 | int nfs42_proc_allocate(struct file *, loff_t, loff_t); |
16 | ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t); | ||
16 | int nfs42_proc_deallocate(struct file *, loff_t, loff_t); | 17 | int nfs42_proc_deallocate(struct file *, loff_t, loff_t); |
17 | loff_t nfs42_proc_llseek(struct file *, loff_t, int); | 18 | loff_t nfs42_proc_llseek(struct file *, loff_t, int); |
18 | int nfs42_proc_layoutstats_generic(struct nfs_server *, | 19 | int nfs42_proc_layoutstats_generic(struct nfs_server *, |
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index dff83460e5a6..aa03ed09ba06 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c | |||
@@ -126,6 +126,111 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) | |||
126 | return err; | 126 | return err; |
127 | } | 127 | } |
128 | 128 | ||
129 | static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src, | ||
130 | struct nfs_lock_context *src_lock, | ||
131 | struct file *dst, loff_t pos_dst, | ||
132 | struct nfs_lock_context *dst_lock, | ||
133 | size_t count) | ||
134 | { | ||
135 | struct nfs42_copy_args args = { | ||
136 | .src_fh = NFS_FH(file_inode(src)), | ||
137 | .src_pos = pos_src, | ||
138 | .dst_fh = NFS_FH(file_inode(dst)), | ||
139 | .dst_pos = pos_dst, | ||
140 | .count = count, | ||
141 | }; | ||
142 | struct nfs42_copy_res res; | ||
143 | struct rpc_message msg = { | ||
144 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY], | ||
145 | .rpc_argp = &args, | ||
146 | .rpc_resp = &res, | ||
147 | }; | ||
148 | struct inode *dst_inode = file_inode(dst); | ||
149 | struct nfs_server *server = NFS_SERVER(dst_inode); | ||
150 | int status; | ||
151 | |||
152 | status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context, | ||
153 | src_lock, FMODE_READ); | ||
154 | if (status) | ||
155 | return status; | ||
156 | |||
157 | status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context, | ||
158 | dst_lock, FMODE_WRITE); | ||
159 | if (status) | ||
160 | return status; | ||
161 | |||
162 | status = nfs4_call_sync(server->client, server, &msg, | ||
163 | &args.seq_args, &res.seq_res, 0); | ||
164 | if (status == -ENOTSUPP) | ||
165 | server->caps &= ~NFS_CAP_COPY; | ||
166 | if (status) | ||
167 | return status; | ||
168 | |||
169 | if (res.write_res.verifier.committed != NFS_FILE_SYNC) { | ||
170 | status = nfs_commit_file(dst, &res.write_res.verifier.verifier); | ||
171 | if (status) | ||
172 | return status; | ||
173 | } | ||
174 | |||
175 | truncate_pagecache_range(dst_inode, pos_dst, | ||
176 | pos_dst + res.write_res.count); | ||
177 | |||
178 | return res.write_res.count; | ||
179 | } | ||
180 | |||
181 | ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, | ||
182 | struct file *dst, loff_t pos_dst, | ||
183 | size_t count) | ||
184 | { | ||
185 | struct nfs_server *server = NFS_SERVER(file_inode(dst)); | ||
186 | struct nfs_lock_context *src_lock; | ||
187 | struct nfs_lock_context *dst_lock; | ||
188 | struct nfs4_exception src_exception = { }; | ||
189 | struct nfs4_exception dst_exception = { }; | ||
190 | ssize_t err, err2; | ||
191 | |||
192 | if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY)) | ||
193 | return -EOPNOTSUPP; | ||
194 | |||
195 | src_lock = nfs_get_lock_context(nfs_file_open_context(src)); | ||
196 | if (IS_ERR(src_lock)) | ||
197 | return PTR_ERR(src_lock); | ||
198 | |||
199 | src_exception.inode = file_inode(src); | ||
200 | src_exception.state = src_lock->open_context->state; | ||
201 | |||
202 | dst_lock = nfs_get_lock_context(nfs_file_open_context(dst)); | ||
203 | if (IS_ERR(dst_lock)) { | ||
204 | err = PTR_ERR(dst_lock); | ||
205 | goto out_put_src_lock; | ||
206 | } | ||
207 | |||
208 | dst_exception.inode = file_inode(dst); | ||
209 | dst_exception.state = dst_lock->open_context->state; | ||
210 | |||
211 | do { | ||
212 | inode_lock(file_inode(dst)); | ||
213 | err = _nfs42_proc_copy(src, pos_src, src_lock, | ||
214 | dst, pos_dst, dst_lock, count); | ||
215 | inode_unlock(file_inode(dst)); | ||
216 | |||
217 | if (err == -ENOTSUPP) { | ||
218 | err = -EOPNOTSUPP; | ||
219 | break; | ||
220 | } | ||
221 | |||
222 | err2 = nfs4_handle_exception(server, err, &src_exception); | ||
223 | err = nfs4_handle_exception(server, err, &dst_exception); | ||
224 | if (!err) | ||
225 | err = err2; | ||
226 | } while (src_exception.retry || dst_exception.retry); | ||
227 | |||
228 | nfs_put_lock_context(dst_lock); | ||
229 | out_put_src_lock: | ||
230 | nfs_put_lock_context(src_lock); | ||
231 | return err; | ||
232 | } | ||
233 | |||
129 | static loff_t _nfs42_proc_llseek(struct file *filep, | 234 | static loff_t _nfs42_proc_llseek(struct file *filep, |
130 | struct nfs_lock_context *lock, loff_t offset, int whence) | 235 | struct nfs_lock_context *lock, loff_t offset, int whence) |
131 | { | 236 | { |
@@ -232,7 +337,7 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) | |||
232 | * with the current stateid. | 337 | * with the current stateid. |
233 | */ | 338 | */ |
234 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | 339 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); |
235 | pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); | 340 | pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0); |
236 | spin_unlock(&inode->i_lock); | 341 | spin_unlock(&inode->i_lock); |
237 | pnfs_free_lseg_list(&head); | 342 | pnfs_free_lseg_list(&head); |
238 | } else | 343 | } else |
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 0ca482a51e53..6dc6f2aea0d6 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c | |||
@@ -9,9 +9,22 @@ | |||
9 | #define encode_fallocate_maxsz (encode_stateid_maxsz + \ | 9 | #define encode_fallocate_maxsz (encode_stateid_maxsz + \ |
10 | 2 /* offset */ + \ | 10 | 2 /* offset */ + \ |
11 | 2 /* length */) | 11 | 2 /* length */) |
12 | #define NFS42_WRITE_RES_SIZE (1 /* wr_callback_id size */ +\ | ||
13 | XDR_QUADLEN(NFS4_STATEID_SIZE) + \ | ||
14 | 2 /* wr_count */ + \ | ||
15 | 1 /* wr_committed */ + \ | ||
16 | XDR_QUADLEN(NFS4_VERIFIER_SIZE)) | ||
12 | #define encode_allocate_maxsz (op_encode_hdr_maxsz + \ | 17 | #define encode_allocate_maxsz (op_encode_hdr_maxsz + \ |
13 | encode_fallocate_maxsz) | 18 | encode_fallocate_maxsz) |
14 | #define decode_allocate_maxsz (op_decode_hdr_maxsz) | 19 | #define decode_allocate_maxsz (op_decode_hdr_maxsz) |
20 | #define encode_copy_maxsz (op_encode_hdr_maxsz + \ | ||
21 | XDR_QUADLEN(NFS4_STATEID_SIZE) + \ | ||
22 | XDR_QUADLEN(NFS4_STATEID_SIZE) + \ | ||
23 | 2 + 2 + 2 + 1 + 1 + 1) | ||
24 | #define decode_copy_maxsz (op_decode_hdr_maxsz + \ | ||
25 | NFS42_WRITE_RES_SIZE + \ | ||
26 | 1 /* cr_consecutive */ + \ | ||
27 | 1 /* cr_synchronous */) | ||
15 | #define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ | 28 | #define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ |
16 | encode_fallocate_maxsz) | 29 | encode_fallocate_maxsz) |
17 | #define decode_deallocate_maxsz (op_decode_hdr_maxsz) | 30 | #define decode_deallocate_maxsz (op_decode_hdr_maxsz) |
@@ -49,6 +62,16 @@ | |||
49 | decode_putfh_maxsz + \ | 62 | decode_putfh_maxsz + \ |
50 | decode_allocate_maxsz + \ | 63 | decode_allocate_maxsz + \ |
51 | decode_getattr_maxsz) | 64 | decode_getattr_maxsz) |
65 | #define NFS4_enc_copy_sz (compound_encode_hdr_maxsz + \ | ||
66 | encode_putfh_maxsz + \ | ||
67 | encode_savefh_maxsz + \ | ||
68 | encode_putfh_maxsz + \ | ||
69 | encode_copy_maxsz) | ||
70 | #define NFS4_dec_copy_sz (compound_decode_hdr_maxsz + \ | ||
71 | decode_putfh_maxsz + \ | ||
72 | decode_savefh_maxsz + \ | ||
73 | decode_putfh_maxsz + \ | ||
74 | decode_copy_maxsz) | ||
52 | #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ | 75 | #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ |
53 | encode_putfh_maxsz + \ | 76 | encode_putfh_maxsz + \ |
54 | encode_deallocate_maxsz + \ | 77 | encode_deallocate_maxsz + \ |
@@ -102,6 +125,23 @@ static void encode_allocate(struct xdr_stream *xdr, | |||
102 | encode_fallocate(xdr, args); | 125 | encode_fallocate(xdr, args); |
103 | } | 126 | } |
104 | 127 | ||
128 | static void encode_copy(struct xdr_stream *xdr, | ||
129 | struct nfs42_copy_args *args, | ||
130 | struct compound_hdr *hdr) | ||
131 | { | ||
132 | encode_op_hdr(xdr, OP_COPY, decode_copy_maxsz, hdr); | ||
133 | encode_nfs4_stateid(xdr, &args->src_stateid); | ||
134 | encode_nfs4_stateid(xdr, &args->dst_stateid); | ||
135 | |||
136 | encode_uint64(xdr, args->src_pos); | ||
137 | encode_uint64(xdr, args->dst_pos); | ||
138 | encode_uint64(xdr, args->count); | ||
139 | |||
140 | encode_uint32(xdr, 1); /* consecutive = true */ | ||
141 | encode_uint32(xdr, 1); /* synchronous = true */ | ||
142 | encode_uint32(xdr, 0); /* src server list */ | ||
143 | } | ||
144 | |||
105 | static void encode_deallocate(struct xdr_stream *xdr, | 145 | static void encode_deallocate(struct xdr_stream *xdr, |
106 | struct nfs42_falloc_args *args, | 146 | struct nfs42_falloc_args *args, |
107 | struct compound_hdr *hdr) | 147 | struct compound_hdr *hdr) |
@@ -182,6 +222,26 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, | |||
182 | } | 222 | } |
183 | 223 | ||
184 | /* | 224 | /* |
225 | * Encode COPY request | ||
226 | */ | ||
227 | static void nfs4_xdr_enc_copy(struct rpc_rqst *req, | ||
228 | struct xdr_stream *xdr, | ||
229 | struct nfs42_copy_args *args) | ||
230 | { | ||
231 | struct compound_hdr hdr = { | ||
232 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
233 | }; | ||
234 | |||
235 | encode_compound_hdr(xdr, req, &hdr); | ||
236 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
237 | encode_putfh(xdr, args->src_fh, &hdr); | ||
238 | encode_savefh(xdr, &hdr); | ||
239 | encode_putfh(xdr, args->dst_fh, &hdr); | ||
240 | encode_copy(xdr, args, &hdr); | ||
241 | encode_nops(&hdr); | ||
242 | } | ||
243 | |||
244 | /* | ||
185 | * Encode DEALLOCATE request | 245 | * Encode DEALLOCATE request |
186 | */ | 246 | */ |
187 | static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, | 247 | static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, |
@@ -266,6 +326,62 @@ static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) | |||
266 | return decode_op_hdr(xdr, OP_ALLOCATE); | 326 | return decode_op_hdr(xdr, OP_ALLOCATE); |
267 | } | 327 | } |
268 | 328 | ||
329 | static int decode_write_response(struct xdr_stream *xdr, | ||
330 | struct nfs42_write_res *res) | ||
331 | { | ||
332 | __be32 *p; | ||
333 | int stateids; | ||
334 | |||
335 | p = xdr_inline_decode(xdr, 4 + 8 + 4); | ||
336 | if (unlikely(!p)) | ||
337 | goto out_overflow; | ||
338 | |||
339 | stateids = be32_to_cpup(p++); | ||
340 | p = xdr_decode_hyper(p, &res->count); | ||
341 | res->verifier.committed = be32_to_cpup(p); | ||
342 | return decode_verifier(xdr, &res->verifier.verifier); | ||
343 | |||
344 | out_overflow: | ||
345 | print_overflow_msg(__func__, xdr); | ||
346 | return -EIO; | ||
347 | } | ||
348 | |||
349 | static int decode_copy_requirements(struct xdr_stream *xdr, | ||
350 | struct nfs42_copy_res *res) { | ||
351 | __be32 *p; | ||
352 | |||
353 | p = xdr_inline_decode(xdr, 4 + 4); | ||
354 | if (unlikely(!p)) | ||
355 | goto out_overflow; | ||
356 | |||
357 | res->consecutive = be32_to_cpup(p++); | ||
358 | res->synchronous = be32_to_cpup(p++); | ||
359 | return 0; | ||
360 | out_overflow: | ||
361 | print_overflow_msg(__func__, xdr); | ||
362 | return -EIO; | ||
363 | } | ||
364 | |||
365 | static int decode_copy(struct xdr_stream *xdr, struct nfs42_copy_res *res) | ||
366 | { | ||
367 | int status; | ||
368 | |||
369 | status = decode_op_hdr(xdr, OP_COPY); | ||
370 | if (status == NFS4ERR_OFFLOAD_NO_REQS) { | ||
371 | status = decode_copy_requirements(xdr, res); | ||
372 | if (status) | ||
373 | return status; | ||
374 | return NFS4ERR_OFFLOAD_NO_REQS; | ||
375 | } else if (status) | ||
376 | return status; | ||
377 | |||
378 | status = decode_write_response(xdr, &res->write_res); | ||
379 | if (status) | ||
380 | return status; | ||
381 | |||
382 | return decode_copy_requirements(xdr, res); | ||
383 | } | ||
384 | |||
269 | static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) | 385 | static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) |
270 | { | 386 | { |
271 | return decode_op_hdr(xdr, OP_DEALLOCATE); | 387 | return decode_op_hdr(xdr, OP_DEALLOCATE); |
@@ -331,6 +447,36 @@ out: | |||
331 | } | 447 | } |
332 | 448 | ||
333 | /* | 449 | /* |
450 | * Decode COPY response | ||
451 | */ | ||
452 | static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp, | ||
453 | struct xdr_stream *xdr, | ||
454 | struct nfs42_copy_res *res) | ||
455 | { | ||
456 | struct compound_hdr hdr; | ||
457 | int status; | ||
458 | |||
459 | status = decode_compound_hdr(xdr, &hdr); | ||
460 | if (status) | ||
461 | goto out; | ||
462 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
463 | if (status) | ||
464 | goto out; | ||
465 | status = decode_putfh(xdr); | ||
466 | if (status) | ||
467 | goto out; | ||
468 | status = decode_savefh(xdr); | ||
469 | if (status) | ||
470 | goto out; | ||
471 | status = decode_putfh(xdr); | ||
472 | if (status) | ||
473 | goto out; | ||
474 | status = decode_copy(xdr, res); | ||
475 | out: | ||
476 | return status; | ||
477 | } | ||
478 | |||
479 | /* | ||
334 | * Decode DEALLOCATE request | 480 | * Decode DEALLOCATE request |
335 | */ | 481 | */ |
336 | static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp, | 482 | static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp, |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 4afdee420d25..768456fa1b17 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -438,8 +438,9 @@ extern void nfs41_handle_server_scope(struct nfs_client *, | |||
438 | struct nfs41_server_scope **); | 438 | struct nfs41_server_scope **); |
439 | extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); | 439 | extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); |
440 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); | 440 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); |
441 | extern int nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, | 441 | extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t, |
442 | fmode_t, const struct nfs_lockowner *); | 442 | const struct nfs_lockowner *, nfs4_stateid *, |
443 | struct rpc_cred **); | ||
443 | 444 | ||
444 | extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); | 445 | extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); |
445 | extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); | 446 | extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); |
@@ -496,12 +497,15 @@ extern struct svc_version nfs4_callback_version4; | |||
496 | 497 | ||
497 | static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src) | 498 | static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src) |
498 | { | 499 | { |
499 | memcpy(dst, src, sizeof(*dst)); | 500 | memcpy(dst->data, src->data, sizeof(dst->data)); |
501 | dst->type = src->type; | ||
500 | } | 502 | } |
501 | 503 | ||
502 | static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src) | 504 | static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src) |
503 | { | 505 | { |
504 | return memcmp(dst, src, sizeof(*dst)) == 0; | 506 | if (dst->type != src->type) |
507 | return false; | ||
508 | return memcmp(dst->data, src->data, sizeof(dst->data)) == 0; | ||
505 | } | 509 | } |
506 | 510 | ||
507 | static inline bool nfs4_stateid_match_other(const nfs4_stateid *dst, const nfs4_stateid *src) | 511 | static inline bool nfs4_stateid_match_other(const nfs4_stateid *dst, const nfs4_stateid *src) |
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index d0390516467c..014b0e41ace5 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c | |||
@@ -129,6 +129,28 @@ nfs4_file_flush(struct file *file, fl_owner_t id) | |||
129 | } | 129 | } |
130 | 130 | ||
131 | #ifdef CONFIG_NFS_V4_2 | 131 | #ifdef CONFIG_NFS_V4_2 |
132 | static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, | ||
133 | struct file *file_out, loff_t pos_out, | ||
134 | size_t count, unsigned int flags) | ||
135 | { | ||
136 | struct inode *in_inode = file_inode(file_in); | ||
137 | struct inode *out_inode = file_inode(file_out); | ||
138 | int ret; | ||
139 | |||
140 | if (in_inode == out_inode) | ||
141 | return -EINVAL; | ||
142 | |||
143 | /* flush any pending writes */ | ||
144 | ret = nfs_sync_inode(in_inode); | ||
145 | if (ret) | ||
146 | return ret; | ||
147 | ret = nfs_sync_inode(out_inode); | ||
148 | if (ret) | ||
149 | return ret; | ||
150 | |||
151 | return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); | ||
152 | } | ||
153 | |||
132 | static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) | 154 | static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) |
133 | { | 155 | { |
134 | loff_t ret; | 156 | loff_t ret; |
@@ -243,6 +265,7 @@ const struct file_operations nfs4_file_operations = { | |||
243 | .check_flags = nfs_check_flags, | 265 | .check_flags = nfs_check_flags, |
244 | .setlease = simple_nosetlease, | 266 | .setlease = simple_nosetlease, |
245 | #ifdef CONFIG_NFS_V4_2 | 267 | #ifdef CONFIG_NFS_V4_2 |
268 | .copy_file_range = nfs4_copy_file_range, | ||
246 | .llseek = nfs4_file_llseek, | 269 | .llseek = nfs4_file_llseek, |
247 | .fallocate = nfs42_fallocate, | 270 | .fallocate = nfs42_fallocate, |
248 | .clone_file_range = nfs42_clone_file_range, | 271 | .clone_file_range = nfs42_clone_file_range, |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 084e8570da18..223982eb38c9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -74,6 +74,17 @@ | |||
74 | #define NFS4_POLL_RETRY_MIN (HZ/10) | 74 | #define NFS4_POLL_RETRY_MIN (HZ/10) |
75 | #define NFS4_POLL_RETRY_MAX (15*HZ) | 75 | #define NFS4_POLL_RETRY_MAX (15*HZ) |
76 | 76 | ||
77 | /* file attributes which can be mapped to nfs attributes */ | ||
78 | #define NFS4_VALID_ATTRS (ATTR_MODE \ | ||
79 | | ATTR_UID \ | ||
80 | | ATTR_GID \ | ||
81 | | ATTR_SIZE \ | ||
82 | | ATTR_ATIME \ | ||
83 | | ATTR_MTIME \ | ||
84 | | ATTR_CTIME \ | ||
85 | | ATTR_ATIME_SET \ | ||
86 | | ATTR_MTIME_SET) | ||
87 | |||
77 | struct nfs4_opendata; | 88 | struct nfs4_opendata; |
78 | static int _nfs4_proc_open(struct nfs4_opendata *data); | 89 | static int _nfs4_proc_open(struct nfs4_opendata *data); |
79 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); | 90 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); |
@@ -416,6 +427,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server, | |||
416 | case -NFS4ERR_DELAY: | 427 | case -NFS4ERR_DELAY: |
417 | nfs_inc_server_stats(server, NFSIOS_DELAY); | 428 | nfs_inc_server_stats(server, NFSIOS_DELAY); |
418 | case -NFS4ERR_GRACE: | 429 | case -NFS4ERR_GRACE: |
430 | case -NFS4ERR_RECALLCONFLICT: | ||
419 | exception->delay = 1; | 431 | exception->delay = 1; |
420 | return 0; | 432 | return 0; |
421 | 433 | ||
@@ -2558,15 +2570,20 @@ static int _nfs4_do_open(struct inode *dir, | |||
2558 | if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) && | 2570 | if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) && |
2559 | (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { | 2571 | (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { |
2560 | nfs4_exclusive_attrset(opendata, sattr, &label); | 2572 | nfs4_exclusive_attrset(opendata, sattr, &label); |
2561 | 2573 | /* | |
2562 | nfs_fattr_init(opendata->o_res.f_attr); | 2574 | * send create attributes which was not set by open |
2563 | status = nfs4_do_setattr(state->inode, cred, | 2575 | * with an extra setattr. |
2564 | opendata->o_res.f_attr, sattr, | 2576 | */ |
2565 | state, label, olabel); | 2577 | if (sattr->ia_valid & NFS4_VALID_ATTRS) { |
2566 | if (status == 0) { | 2578 | nfs_fattr_init(opendata->o_res.f_attr); |
2567 | nfs_setattr_update_inode(state->inode, sattr, | 2579 | status = nfs4_do_setattr(state->inode, cred, |
2568 | opendata->o_res.f_attr); | 2580 | opendata->o_res.f_attr, sattr, |
2569 | nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); | 2581 | state, label, olabel); |
2582 | if (status == 0) { | ||
2583 | nfs_setattr_update_inode(state->inode, sattr, | ||
2584 | opendata->o_res.f_attr); | ||
2585 | nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); | ||
2586 | } | ||
2570 | } | 2587 | } |
2571 | } | 2588 | } |
2572 | if (opened && opendata->file_created) | 2589 | if (opened && opendata->file_created) |
@@ -2676,6 +2693,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
2676 | .rpc_resp = &res, | 2693 | .rpc_resp = &res, |
2677 | .rpc_cred = cred, | 2694 | .rpc_cred = cred, |
2678 | }; | 2695 | }; |
2696 | struct rpc_cred *delegation_cred = NULL; | ||
2679 | unsigned long timestamp = jiffies; | 2697 | unsigned long timestamp = jiffies; |
2680 | fmode_t fmode; | 2698 | fmode_t fmode; |
2681 | bool truncate; | 2699 | bool truncate; |
@@ -2691,7 +2709,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
2691 | truncate = (sattr->ia_valid & ATTR_SIZE) ? true : false; | 2709 | truncate = (sattr->ia_valid & ATTR_SIZE) ? true : false; |
2692 | fmode = truncate ? FMODE_WRITE : FMODE_READ; | 2710 | fmode = truncate ? FMODE_WRITE : FMODE_READ; |
2693 | 2711 | ||
2694 | if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) { | 2712 | if (nfs4_copy_delegation_stateid(inode, fmode, &arg.stateid, &delegation_cred)) { |
2695 | /* Use that stateid */ | 2713 | /* Use that stateid */ |
2696 | } else if (truncate && state != NULL) { | 2714 | } else if (truncate && state != NULL) { |
2697 | struct nfs_lockowner lockowner = { | 2715 | struct nfs_lockowner lockowner = { |
@@ -2700,13 +2718,17 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
2700 | }; | 2718 | }; |
2701 | if (!nfs4_valid_open_stateid(state)) | 2719 | if (!nfs4_valid_open_stateid(state)) |
2702 | return -EBADF; | 2720 | return -EBADF; |
2703 | if (nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, | 2721 | if (nfs4_select_rw_stateid(state, FMODE_WRITE, &lockowner, |
2704 | &lockowner) == -EIO) | 2722 | &arg.stateid, &delegation_cred) == -EIO) |
2705 | return -EBADF; | 2723 | return -EBADF; |
2706 | } else | 2724 | } else |
2707 | nfs4_stateid_copy(&arg.stateid, &zero_stateid); | 2725 | nfs4_stateid_copy(&arg.stateid, &zero_stateid); |
2726 | if (delegation_cred) | ||
2727 | msg.rpc_cred = delegation_cred; | ||
2708 | 2728 | ||
2709 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); | 2729 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
2730 | |||
2731 | put_rpccred(delegation_cred); | ||
2710 | if (status == 0 && state != NULL) | 2732 | if (status == 0 && state != NULL) |
2711 | renew_lease(server, timestamp); | 2733 | renew_lease(server, timestamp); |
2712 | trace_nfs4_setattr(inode, &arg.stateid, status); | 2734 | trace_nfs4_setattr(inode, &arg.stateid, status); |
@@ -4285,7 +4307,7 @@ int nfs4_set_rw_stateid(nfs4_stateid *stateid, | |||
4285 | 4307 | ||
4286 | if (l_ctx != NULL) | 4308 | if (l_ctx != NULL) |
4287 | lockowner = &l_ctx->lockowner; | 4309 | lockowner = &l_ctx->lockowner; |
4288 | return nfs4_select_rw_stateid(stateid, ctx->state, fmode, lockowner); | 4310 | return nfs4_select_rw_stateid(ctx->state, fmode, lockowner, stateid, NULL); |
4289 | } | 4311 | } |
4290 | EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid); | 4312 | EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid); |
4291 | 4313 | ||
@@ -6054,6 +6076,7 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques | |||
6054 | static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) | 6076 | static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) |
6055 | { | 6077 | { |
6056 | struct nfs_inode *nfsi = NFS_I(state->inode); | 6078 | struct nfs_inode *nfsi = NFS_I(state->inode); |
6079 | struct nfs4_state_owner *sp = state->owner; | ||
6057 | unsigned char fl_flags = request->fl_flags; | 6080 | unsigned char fl_flags = request->fl_flags; |
6058 | int status = -ENOLCK; | 6081 | int status = -ENOLCK; |
6059 | 6082 | ||
@@ -6068,6 +6091,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock | |||
6068 | status = do_vfs_lock(state->inode, request); | 6091 | status = do_vfs_lock(state->inode, request); |
6069 | if (status < 0) | 6092 | if (status < 0) |
6070 | goto out; | 6093 | goto out; |
6094 | mutex_lock(&sp->so_delegreturn_mutex); | ||
6071 | down_read(&nfsi->rwsem); | 6095 | down_read(&nfsi->rwsem); |
6072 | if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { | 6096 | if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { |
6073 | /* Yes: cache locks! */ | 6097 | /* Yes: cache locks! */ |
@@ -6075,9 +6099,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock | |||
6075 | request->fl_flags = fl_flags & ~FL_SLEEP; | 6099 | request->fl_flags = fl_flags & ~FL_SLEEP; |
6076 | status = do_vfs_lock(state->inode, request); | 6100 | status = do_vfs_lock(state->inode, request); |
6077 | up_read(&nfsi->rwsem); | 6101 | up_read(&nfsi->rwsem); |
6102 | mutex_unlock(&sp->so_delegreturn_mutex); | ||
6078 | goto out; | 6103 | goto out; |
6079 | } | 6104 | } |
6080 | up_read(&nfsi->rwsem); | 6105 | up_read(&nfsi->rwsem); |
6106 | mutex_unlock(&sp->so_delegreturn_mutex); | ||
6081 | status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); | 6107 | status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); |
6082 | out: | 6108 | out: |
6083 | request->fl_flags = fl_flags; | 6109 | request->fl_flags = fl_flags; |
@@ -7351,9 +7377,11 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) | |||
7351 | * always set csa_cachethis to FALSE because the current implementation | 7377 | * always set csa_cachethis to FALSE because the current implementation |
7352 | * of the back channel DRC only supports caching the CB_SEQUENCE operation. | 7378 | * of the back channel DRC only supports caching the CB_SEQUENCE operation. |
7353 | */ | 7379 | */ |
7354 | static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) | 7380 | static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args, |
7381 | struct rpc_clnt *clnt) | ||
7355 | { | 7382 | { |
7356 | unsigned int max_rqst_sz, max_resp_sz; | 7383 | unsigned int max_rqst_sz, max_resp_sz; |
7384 | unsigned int max_bc_payload = rpc_max_bc_payload(clnt); | ||
7357 | 7385 | ||
7358 | max_rqst_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxwrite_overhead; | 7386 | max_rqst_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxwrite_overhead; |
7359 | max_resp_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxread_overhead; | 7387 | max_resp_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxread_overhead; |
@@ -7371,8 +7399,8 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) | |||
7371 | args->fc_attrs.max_ops, args->fc_attrs.max_reqs); | 7399 | args->fc_attrs.max_ops, args->fc_attrs.max_reqs); |
7372 | 7400 | ||
7373 | /* Back channel attributes */ | 7401 | /* Back channel attributes */ |
7374 | args->bc_attrs.max_rqst_sz = PAGE_SIZE; | 7402 | args->bc_attrs.max_rqst_sz = max_bc_payload; |
7375 | args->bc_attrs.max_resp_sz = PAGE_SIZE; | 7403 | args->bc_attrs.max_resp_sz = max_bc_payload; |
7376 | args->bc_attrs.max_resp_sz_cached = 0; | 7404 | args->bc_attrs.max_resp_sz_cached = 0; |
7377 | args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; | 7405 | args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; |
7378 | args->bc_attrs.max_reqs = NFS41_BC_MAX_CALLBACKS; | 7406 | args->bc_attrs.max_reqs = NFS41_BC_MAX_CALLBACKS; |
@@ -7476,7 +7504,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, | |||
7476 | }; | 7504 | }; |
7477 | int status; | 7505 | int status; |
7478 | 7506 | ||
7479 | nfs4_init_channel_attrs(&args); | 7507 | nfs4_init_channel_attrs(&args, clp->cl_rpcclient); |
7480 | args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); | 7508 | args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); |
7481 | 7509 | ||
7482 | status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); | 7510 | status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); |
@@ -7820,40 +7848,34 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) | |||
7820 | struct nfs4_layoutget *lgp = calldata; | 7848 | struct nfs4_layoutget *lgp = calldata; |
7821 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | 7849 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); |
7822 | struct nfs4_session *session = nfs4_get_session(server); | 7850 | struct nfs4_session *session = nfs4_get_session(server); |
7823 | int ret; | ||
7824 | 7851 | ||
7825 | dprintk("--> %s\n", __func__); | 7852 | dprintk("--> %s\n", __func__); |
7826 | /* Note the is a race here, where a CB_LAYOUTRECALL can come in | 7853 | nfs41_setup_sequence(session, &lgp->args.seq_args, |
7827 | * right now covering the LAYOUTGET we are about to send. | 7854 | &lgp->res.seq_res, task); |
7828 | * However, that is not so catastrophic, and there seems | 7855 | dprintk("<-- %s\n", __func__); |
7829 | * to be no way to prevent it completely. | ||
7830 | */ | ||
7831 | if (nfs41_setup_sequence(session, &lgp->args.seq_args, | ||
7832 | &lgp->res.seq_res, task)) | ||
7833 | return; | ||
7834 | ret = pnfs_choose_layoutget_stateid(&lgp->args.stateid, | ||
7835 | NFS_I(lgp->args.inode)->layout, | ||
7836 | &lgp->args.range, | ||
7837 | lgp->args.ctx->state); | ||
7838 | if (ret < 0) | ||
7839 | rpc_exit(task, ret); | ||
7840 | } | 7856 | } |
7841 | 7857 | ||
7842 | static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | 7858 | static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) |
7843 | { | 7859 | { |
7844 | struct nfs4_layoutget *lgp = calldata; | 7860 | struct nfs4_layoutget *lgp = calldata; |
7861 | |||
7862 | dprintk("--> %s\n", __func__); | ||
7863 | nfs41_sequence_done(task, &lgp->res.seq_res); | ||
7864 | dprintk("<-- %s\n", __func__); | ||
7865 | } | ||
7866 | |||
7867 | static int | ||
7868 | nfs4_layoutget_handle_exception(struct rpc_task *task, | ||
7869 | struct nfs4_layoutget *lgp, struct nfs4_exception *exception) | ||
7870 | { | ||
7845 | struct inode *inode = lgp->args.inode; | 7871 | struct inode *inode = lgp->args.inode; |
7846 | struct nfs_server *server = NFS_SERVER(inode); | 7872 | struct nfs_server *server = NFS_SERVER(inode); |
7847 | struct pnfs_layout_hdr *lo; | 7873 | struct pnfs_layout_hdr *lo; |
7848 | struct nfs4_state *state = NULL; | 7874 | int status = task->tk_status; |
7849 | unsigned long timeo, now, giveup; | ||
7850 | 7875 | ||
7851 | dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status); | 7876 | dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status); |
7852 | 7877 | ||
7853 | if (!nfs41_sequence_done(task, &lgp->res.seq_res)) | 7878 | switch (status) { |
7854 | goto out; | ||
7855 | |||
7856 | switch (task->tk_status) { | ||
7857 | case 0: | 7879 | case 0: |
7858 | goto out; | 7880 | goto out; |
7859 | 7881 | ||
@@ -7863,57 +7885,43 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | |||
7863 | * retry go inband. | 7885 | * retry go inband. |
7864 | */ | 7886 | */ |
7865 | case -NFS4ERR_LAYOUTUNAVAILABLE: | 7887 | case -NFS4ERR_LAYOUTUNAVAILABLE: |
7866 | task->tk_status = -ENODATA; | 7888 | status = -ENODATA; |
7867 | goto out; | 7889 | goto out; |
7868 | /* | 7890 | /* |
7869 | * NFS4ERR_BADLAYOUT means the MDS cannot return a layout of | 7891 | * NFS4ERR_BADLAYOUT means the MDS cannot return a layout of |
7870 | * length lgp->args.minlength != 0 (see RFC5661 section 18.43.3). | 7892 | * length lgp->args.minlength != 0 (see RFC5661 section 18.43.3). |
7871 | */ | 7893 | */ |
7872 | case -NFS4ERR_BADLAYOUT: | 7894 | case -NFS4ERR_BADLAYOUT: |
7873 | goto out_overflow; | 7895 | status = -EOVERFLOW; |
7896 | goto out; | ||
7874 | /* | 7897 | /* |
7875 | * NFS4ERR_LAYOUTTRYLATER is a conflict with another client | 7898 | * NFS4ERR_LAYOUTTRYLATER is a conflict with another client |
7876 | * (or clients) writing to the same RAID stripe except when | 7899 | * (or clients) writing to the same RAID stripe except when |
7877 | * the minlength argument is 0 (see RFC5661 section 18.43.3). | 7900 | * the minlength argument is 0 (see RFC5661 section 18.43.3). |
7901 | * | ||
7902 | * Treat it like we would RECALLCONFLICT -- we retry for a little | ||
7903 | * while, and then eventually give up. | ||
7878 | */ | 7904 | */ |
7879 | case -NFS4ERR_LAYOUTTRYLATER: | 7905 | case -NFS4ERR_LAYOUTTRYLATER: |
7880 | if (lgp->args.minlength == 0) | 7906 | if (lgp->args.minlength == 0) { |
7881 | goto out_overflow; | 7907 | status = -EOVERFLOW; |
7882 | /* | 7908 | goto out; |
7883 | * NFS4ERR_RECALLCONFLICT is when conflict with self (must recall | ||
7884 | * existing layout before getting a new one). | ||
7885 | */ | ||
7886 | case -NFS4ERR_RECALLCONFLICT: | ||
7887 | timeo = rpc_get_timeout(task->tk_client); | ||
7888 | giveup = lgp->args.timestamp + timeo; | ||
7889 | now = jiffies; | ||
7890 | if (time_after(giveup, now)) { | ||
7891 | unsigned long delay; | ||
7892 | |||
7893 | /* Delay for: | ||
7894 | * - Not less then NFS4_POLL_RETRY_MIN. | ||
7895 | * - One last time a jiffie before we give up | ||
7896 | * - exponential backoff (time_now minus start_attempt) | ||
7897 | */ | ||
7898 | delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN, | ||
7899 | min((giveup - now - 1), | ||
7900 | now - lgp->args.timestamp)); | ||
7901 | |||
7902 | dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n", | ||
7903 | __func__, delay); | ||
7904 | rpc_delay(task, delay); | ||
7905 | /* Do not call nfs4_async_handle_error() */ | ||
7906 | goto out_restart; | ||
7907 | } | 7909 | } |
7908 | break; | 7910 | /* Fallthrough */ |
7911 | case -NFS4ERR_RECALLCONFLICT: | ||
7912 | nfs4_handle_exception(server, -NFS4ERR_RECALLCONFLICT, | ||
7913 | exception); | ||
7914 | status = -ERECALLCONFLICT; | ||
7915 | goto out; | ||
7909 | case -NFS4ERR_EXPIRED: | 7916 | case -NFS4ERR_EXPIRED: |
7910 | case -NFS4ERR_BAD_STATEID: | 7917 | case -NFS4ERR_BAD_STATEID: |
7918 | exception->timeout = 0; | ||
7911 | spin_lock(&inode->i_lock); | 7919 | spin_lock(&inode->i_lock); |
7912 | if (nfs4_stateid_match(&lgp->args.stateid, | 7920 | if (nfs4_stateid_match(&lgp->args.stateid, |
7913 | &lgp->args.ctx->state->stateid)) { | 7921 | &lgp->args.ctx->state->stateid)) { |
7914 | spin_unlock(&inode->i_lock); | 7922 | spin_unlock(&inode->i_lock); |
7915 | /* If the open stateid was bad, then recover it. */ | 7923 | /* If the open stateid was bad, then recover it. */ |
7916 | state = lgp->args.ctx->state; | 7924 | exception->state = lgp->args.ctx->state; |
7917 | break; | 7925 | break; |
7918 | } | 7926 | } |
7919 | lo = NFS_I(inode)->layout; | 7927 | lo = NFS_I(inode)->layout; |
@@ -7926,25 +7934,21 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | |||
7926 | * with the current stateid. | 7934 | * with the current stateid. |
7927 | */ | 7935 | */ |
7928 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | 7936 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); |
7929 | pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); | 7937 | pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0); |
7930 | spin_unlock(&inode->i_lock); | 7938 | spin_unlock(&inode->i_lock); |
7931 | pnfs_free_lseg_list(&head); | 7939 | pnfs_free_lseg_list(&head); |
7932 | } else | 7940 | } else |
7933 | spin_unlock(&inode->i_lock); | 7941 | spin_unlock(&inode->i_lock); |
7934 | goto out_restart; | 7942 | status = -EAGAIN; |
7943 | goto out; | ||
7935 | } | 7944 | } |
7936 | if (nfs4_async_handle_error(task, server, state, &lgp->timeout) == -EAGAIN) | 7945 | |
7937 | goto out_restart; | 7946 | status = nfs4_handle_exception(server, status, exception); |
7947 | if (exception->retry) | ||
7948 | status = -EAGAIN; | ||
7938 | out: | 7949 | out: |
7939 | dprintk("<-- %s\n", __func__); | 7950 | dprintk("<-- %s\n", __func__); |
7940 | return; | 7951 | return status; |
7941 | out_restart: | ||
7942 | task->tk_status = 0; | ||
7943 | rpc_restart_call_prepare(task); | ||
7944 | return; | ||
7945 | out_overflow: | ||
7946 | task->tk_status = -EOVERFLOW; | ||
7947 | goto out; | ||
7948 | } | 7952 | } |
7949 | 7953 | ||
7950 | static size_t max_response_pages(struct nfs_server *server) | 7954 | static size_t max_response_pages(struct nfs_server *server) |
@@ -8013,7 +8017,7 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { | |||
8013 | }; | 8017 | }; |
8014 | 8018 | ||
8015 | struct pnfs_layout_segment * | 8019 | struct pnfs_layout_segment * |
8016 | nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) | 8020 | nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) |
8017 | { | 8021 | { |
8018 | struct inode *inode = lgp->args.inode; | 8022 | struct inode *inode = lgp->args.inode; |
8019 | struct nfs_server *server = NFS_SERVER(inode); | 8023 | struct nfs_server *server = NFS_SERVER(inode); |
@@ -8033,6 +8037,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) | |||
8033 | .flags = RPC_TASK_ASYNC, | 8037 | .flags = RPC_TASK_ASYNC, |
8034 | }; | 8038 | }; |
8035 | struct pnfs_layout_segment *lseg = NULL; | 8039 | struct pnfs_layout_segment *lseg = NULL; |
8040 | struct nfs4_exception exception = { .timeout = *timeout }; | ||
8036 | int status = 0; | 8041 | int status = 0; |
8037 | 8042 | ||
8038 | dprintk("--> %s\n", __func__); | 8043 | dprintk("--> %s\n", __func__); |
@@ -8046,7 +8051,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) | |||
8046 | return ERR_PTR(-ENOMEM); | 8051 | return ERR_PTR(-ENOMEM); |
8047 | } | 8052 | } |
8048 | lgp->args.layout.pglen = max_pages * PAGE_SIZE; | 8053 | lgp->args.layout.pglen = max_pages * PAGE_SIZE; |
8049 | lgp->args.timestamp = jiffies; | ||
8050 | 8054 | ||
8051 | lgp->res.layoutp = &lgp->args.layout; | 8055 | lgp->res.layoutp = &lgp->args.layout; |
8052 | lgp->res.seq_res.sr_slot = NULL; | 8056 | lgp->res.seq_res.sr_slot = NULL; |
@@ -8056,13 +8060,17 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) | |||
8056 | if (IS_ERR(task)) | 8060 | if (IS_ERR(task)) |
8057 | return ERR_CAST(task); | 8061 | return ERR_CAST(task); |
8058 | status = nfs4_wait_for_completion_rpc_task(task); | 8062 | status = nfs4_wait_for_completion_rpc_task(task); |
8059 | if (status == 0) | 8063 | if (status == 0) { |
8060 | status = task->tk_status; | 8064 | status = nfs4_layoutget_handle_exception(task, lgp, &exception); |
8065 | *timeout = exception.timeout; | ||
8066 | } | ||
8067 | |||
8061 | trace_nfs4_layoutget(lgp->args.ctx, | 8068 | trace_nfs4_layoutget(lgp->args.ctx, |
8062 | &lgp->args.range, | 8069 | &lgp->args.range, |
8063 | &lgp->res.range, | 8070 | &lgp->res.range, |
8064 | &lgp->res.stateid, | 8071 | &lgp->res.stateid, |
8065 | status); | 8072 | status); |
8073 | |||
8066 | /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ | 8074 | /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ |
8067 | if (status == 0 && lgp->res.layoutp->len) | 8075 | if (status == 0 && lgp->res.layoutp->len) |
8068 | lseg = pnfs_layout_process(lgp); | 8076 | lseg = pnfs_layout_process(lgp); |
@@ -8118,7 +8126,8 @@ static void nfs4_layoutreturn_release(void *calldata) | |||
8118 | 8126 | ||
8119 | dprintk("--> %s\n", __func__); | 8127 | dprintk("--> %s\n", __func__); |
8120 | spin_lock(&lo->plh_inode->i_lock); | 8128 | spin_lock(&lo->plh_inode->i_lock); |
8121 | pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); | 8129 | pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range, |
8130 | be32_to_cpu(lrp->args.stateid.seqid)); | ||
8122 | pnfs_mark_layout_returned_if_empty(lo); | 8131 | pnfs_mark_layout_returned_if_empty(lo); |
8123 | if (lrp->res.lrs_present) | 8132 | if (lrp->res.lrs_present) |
8124 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | 8133 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); |
@@ -8653,6 +8662,9 @@ nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) | |||
8653 | static bool nfs41_match_stateid(const nfs4_stateid *s1, | 8662 | static bool nfs41_match_stateid(const nfs4_stateid *s1, |
8654 | const nfs4_stateid *s2) | 8663 | const nfs4_stateid *s2) |
8655 | { | 8664 | { |
8665 | if (s1->type != s2->type) | ||
8666 | return false; | ||
8667 | |||
8656 | if (memcmp(s1->other, s2->other, sizeof(s1->other)) != 0) | 8668 | if (memcmp(s1->other, s2->other, sizeof(s1->other)) != 0) |
8657 | return false; | 8669 | return false; |
8658 | 8670 | ||
@@ -8793,6 +8805,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | |||
8793 | | NFS_CAP_STATEID_NFSV41 | 8805 | | NFS_CAP_STATEID_NFSV41 |
8794 | | NFS_CAP_ATOMIC_OPEN_V1 | 8806 | | NFS_CAP_ATOMIC_OPEN_V1 |
8795 | | NFS_CAP_ALLOCATE | 8807 | | NFS_CAP_ALLOCATE |
8808 | | NFS_CAP_COPY | ||
8796 | | NFS_CAP_DEALLOCATE | 8809 | | NFS_CAP_DEALLOCATE |
8797 | | NFS_CAP_SEEK | 8810 | | NFS_CAP_SEEK |
8798 | | NFS_CAP_LAYOUTSTATS | 8811 | | NFS_CAP_LAYOUTSTATS |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index d854693a15b0..5075592df145 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -65,7 +65,10 @@ | |||
65 | 65 | ||
66 | #define OPENOWNER_POOL_SIZE 8 | 66 | #define OPENOWNER_POOL_SIZE 8 |
67 | 67 | ||
68 | const nfs4_stateid zero_stateid; | 68 | const nfs4_stateid zero_stateid = { |
69 | .data = { 0 }, | ||
70 | .type = NFS4_SPECIAL_STATEID_TYPE, | ||
71 | }; | ||
69 | static DEFINE_MUTEX(nfs_clid_init_mutex); | 72 | static DEFINE_MUTEX(nfs_clid_init_mutex); |
70 | 73 | ||
71 | int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) | 74 | int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) |
@@ -985,15 +988,20 @@ static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) | |||
985 | * Byte-range lock aware utility to initialize the stateid of read/write | 988 | * Byte-range lock aware utility to initialize the stateid of read/write |
986 | * requests. | 989 | * requests. |
987 | */ | 990 | */ |
988 | int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, | 991 | int nfs4_select_rw_stateid(struct nfs4_state *state, |
989 | fmode_t fmode, const struct nfs_lockowner *lockowner) | 992 | fmode_t fmode, const struct nfs_lockowner *lockowner, |
993 | nfs4_stateid *dst, struct rpc_cred **cred) | ||
990 | { | 994 | { |
991 | int ret = nfs4_copy_lock_stateid(dst, state, lockowner); | 995 | int ret; |
996 | |||
997 | if (cred != NULL) | ||
998 | *cred = NULL; | ||
999 | ret = nfs4_copy_lock_stateid(dst, state, lockowner); | ||
992 | if (ret == -EIO) | 1000 | if (ret == -EIO) |
993 | /* A lost lock - don't even consider delegations */ | 1001 | /* A lost lock - don't even consider delegations */ |
994 | goto out; | 1002 | goto out; |
995 | /* returns true if delegation stateid found and copied */ | 1003 | /* returns true if delegation stateid found and copied */ |
996 | if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) { | 1004 | if (nfs4_copy_delegation_stateid(state->inode, fmode, dst, cred)) { |
997 | ret = 0; | 1005 | ret = 0; |
998 | goto out; | 1006 | goto out; |
999 | } | 1007 | } |
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 2c8d05dae5b1..9c150b153782 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h | |||
@@ -1520,6 +1520,8 @@ DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close); | |||
1520 | { PNFS_UPDATE_LAYOUT_FOUND_CACHED, "found cached" }, \ | 1520 | { PNFS_UPDATE_LAYOUT_FOUND_CACHED, "found cached" }, \ |
1521 | { PNFS_UPDATE_LAYOUT_RETURN, "layoutreturn" }, \ | 1521 | { PNFS_UPDATE_LAYOUT_RETURN, "layoutreturn" }, \ |
1522 | { PNFS_UPDATE_LAYOUT_BLOCKED, "layouts blocked" }, \ | 1522 | { PNFS_UPDATE_LAYOUT_BLOCKED, "layouts blocked" }, \ |
1523 | { PNFS_UPDATE_LAYOUT_INVALID_OPEN, "invalid open" }, \ | ||
1524 | { PNFS_UPDATE_LAYOUT_RETRY, "retrying" }, \ | ||
1523 | { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" }) | 1525 | { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" }) |
1524 | 1526 | ||
1525 | TRACE_EVENT(pnfs_update_layout, | 1527 | TRACE_EVENT(pnfs_update_layout, |
@@ -1528,9 +1530,10 @@ TRACE_EVENT(pnfs_update_layout, | |||
1528 | u64 count, | 1530 | u64 count, |
1529 | enum pnfs_iomode iomode, | 1531 | enum pnfs_iomode iomode, |
1530 | struct pnfs_layout_hdr *lo, | 1532 | struct pnfs_layout_hdr *lo, |
1533 | struct pnfs_layout_segment *lseg, | ||
1531 | enum pnfs_update_layout_reason reason | 1534 | enum pnfs_update_layout_reason reason |
1532 | ), | 1535 | ), |
1533 | TP_ARGS(inode, pos, count, iomode, lo, reason), | 1536 | TP_ARGS(inode, pos, count, iomode, lo, lseg, reason), |
1534 | TP_STRUCT__entry( | 1537 | TP_STRUCT__entry( |
1535 | __field(dev_t, dev) | 1538 | __field(dev_t, dev) |
1536 | __field(u64, fileid) | 1539 | __field(u64, fileid) |
@@ -1540,6 +1543,7 @@ TRACE_EVENT(pnfs_update_layout, | |||
1540 | __field(enum pnfs_iomode, iomode) | 1543 | __field(enum pnfs_iomode, iomode) |
1541 | __field(int, layoutstateid_seq) | 1544 | __field(int, layoutstateid_seq) |
1542 | __field(u32, layoutstateid_hash) | 1545 | __field(u32, layoutstateid_hash) |
1546 | __field(long, lseg) | ||
1543 | __field(enum pnfs_update_layout_reason, reason) | 1547 | __field(enum pnfs_update_layout_reason, reason) |
1544 | ), | 1548 | ), |
1545 | TP_fast_assign( | 1549 | TP_fast_assign( |
@@ -1559,11 +1563,12 @@ TRACE_EVENT(pnfs_update_layout, | |||
1559 | __entry->layoutstateid_seq = 0; | 1563 | __entry->layoutstateid_seq = 0; |
1560 | __entry->layoutstateid_hash = 0; | 1564 | __entry->layoutstateid_hash = 0; |
1561 | } | 1565 | } |
1566 | __entry->lseg = (long)lseg; | ||
1562 | ), | 1567 | ), |
1563 | TP_printk( | 1568 | TP_printk( |
1564 | "fileid=%02x:%02x:%llu fhandle=0x%08x " | 1569 | "fileid=%02x:%02x:%llu fhandle=0x%08x " |
1565 | "iomode=%s pos=%llu count=%llu " | 1570 | "iomode=%s pos=%llu count=%llu " |
1566 | "layoutstateid=%d:0x%08x (%s)", | 1571 | "layoutstateid=%d:0x%08x lseg=0x%lx (%s)", |
1567 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1572 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1568 | (unsigned long long)__entry->fileid, | 1573 | (unsigned long long)__entry->fileid, |
1569 | __entry->fhandle, | 1574 | __entry->fhandle, |
@@ -1571,6 +1576,7 @@ TRACE_EVENT(pnfs_update_layout, | |||
1571 | (unsigned long long)__entry->pos, | 1576 | (unsigned long long)__entry->pos, |
1572 | (unsigned long long)__entry->count, | 1577 | (unsigned long long)__entry->count, |
1573 | __entry->layoutstateid_seq, __entry->layoutstateid_hash, | 1578 | __entry->layoutstateid_seq, __entry->layoutstateid_hash, |
1579 | __entry->lseg, | ||
1574 | show_pnfs_update_layout_reason(__entry->reason) | 1580 | show_pnfs_update_layout_reason(__entry->reason) |
1575 | ) | 1581 | ) |
1576 | ); | 1582 | ); |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 88474a4fc669..661e753fe1c9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -4270,6 +4270,24 @@ static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | |||
4270 | return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); | 4270 | return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); |
4271 | } | 4271 | } |
4272 | 4272 | ||
4273 | static int decode_open_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | ||
4274 | { | ||
4275 | stateid->type = NFS4_OPEN_STATEID_TYPE; | ||
4276 | return decode_stateid(xdr, stateid); | ||
4277 | } | ||
4278 | |||
4279 | static int decode_lock_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | ||
4280 | { | ||
4281 | stateid->type = NFS4_LOCK_STATEID_TYPE; | ||
4282 | return decode_stateid(xdr, stateid); | ||
4283 | } | ||
4284 | |||
4285 | static int decode_delegation_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | ||
4286 | { | ||
4287 | stateid->type = NFS4_DELEGATION_STATEID_TYPE; | ||
4288 | return decode_stateid(xdr, stateid); | ||
4289 | } | ||
4290 | |||
4273 | static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) | 4291 | static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) |
4274 | { | 4292 | { |
4275 | int status; | 4293 | int status; |
@@ -4278,7 +4296,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) | |||
4278 | if (status != -EIO) | 4296 | if (status != -EIO) |
4279 | nfs_increment_open_seqid(status, res->seqid); | 4297 | nfs_increment_open_seqid(status, res->seqid); |
4280 | if (!status) | 4298 | if (!status) |
4281 | status = decode_stateid(xdr, &res->stateid); | 4299 | status = decode_open_stateid(xdr, &res->stateid); |
4282 | return status; | 4300 | return status; |
4283 | } | 4301 | } |
4284 | 4302 | ||
@@ -4937,7 +4955,7 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) | |||
4937 | if (status == -EIO) | 4955 | if (status == -EIO) |
4938 | goto out; | 4956 | goto out; |
4939 | if (status == 0) { | 4957 | if (status == 0) { |
4940 | status = decode_stateid(xdr, &res->stateid); | 4958 | status = decode_lock_stateid(xdr, &res->stateid); |
4941 | if (unlikely(status)) | 4959 | if (unlikely(status)) |
4942 | goto out; | 4960 | goto out; |
4943 | } else if (status == -NFS4ERR_DENIED) | 4961 | } else if (status == -NFS4ERR_DENIED) |
@@ -4966,7 +4984,7 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) | |||
4966 | if (status != -EIO) | 4984 | if (status != -EIO) |
4967 | nfs_increment_lock_seqid(status, res->seqid); | 4985 | nfs_increment_lock_seqid(status, res->seqid); |
4968 | if (status == 0) | 4986 | if (status == 0) |
4969 | status = decode_stateid(xdr, &res->stateid); | 4987 | status = decode_lock_stateid(xdr, &res->stateid); |
4970 | return status; | 4988 | return status; |
4971 | } | 4989 | } |
4972 | 4990 | ||
@@ -5016,7 +5034,7 @@ static int decode_rw_delegation(struct xdr_stream *xdr, | |||
5016 | __be32 *p; | 5034 | __be32 *p; |
5017 | int status; | 5035 | int status; |
5018 | 5036 | ||
5019 | status = decode_stateid(xdr, &res->delegation); | 5037 | status = decode_delegation_stateid(xdr, &res->delegation); |
5020 | if (unlikely(status)) | 5038 | if (unlikely(status)) |
5021 | return status; | 5039 | return status; |
5022 | p = xdr_inline_decode(xdr, 4); | 5040 | p = xdr_inline_decode(xdr, 4); |
@@ -5096,7 +5114,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) | |||
5096 | nfs_increment_open_seqid(status, res->seqid); | 5114 | nfs_increment_open_seqid(status, res->seqid); |
5097 | if (status) | 5115 | if (status) |
5098 | return status; | 5116 | return status; |
5099 | status = decode_stateid(xdr, &res->stateid); | 5117 | status = decode_open_stateid(xdr, &res->stateid); |
5100 | if (unlikely(status)) | 5118 | if (unlikely(status)) |
5101 | return status; | 5119 | return status; |
5102 | 5120 | ||
@@ -5136,7 +5154,7 @@ static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmre | |||
5136 | if (status != -EIO) | 5154 | if (status != -EIO) |
5137 | nfs_increment_open_seqid(status, res->seqid); | 5155 | nfs_increment_open_seqid(status, res->seqid); |
5138 | if (!status) | 5156 | if (!status) |
5139 | status = decode_stateid(xdr, &res->stateid); | 5157 | status = decode_open_stateid(xdr, &res->stateid); |
5140 | return status; | 5158 | return status; |
5141 | } | 5159 | } |
5142 | 5160 | ||
@@ -5148,7 +5166,7 @@ static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *re | |||
5148 | if (status != -EIO) | 5166 | if (status != -EIO) |
5149 | nfs_increment_open_seqid(status, res->seqid); | 5167 | nfs_increment_open_seqid(status, res->seqid); |
5150 | if (!status) | 5168 | if (!status) |
5151 | status = decode_stateid(xdr, &res->stateid); | 5169 | status = decode_open_stateid(xdr, &res->stateid); |
5152 | return status; | 5170 | return status; |
5153 | } | 5171 | } |
5154 | 5172 | ||
@@ -5838,6 +5856,12 @@ out_overflow: | |||
5838 | } | 5856 | } |
5839 | 5857 | ||
5840 | #if defined(CONFIG_NFS_V4_1) | 5858 | #if defined(CONFIG_NFS_V4_1) |
5859 | static int decode_layout_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | ||
5860 | { | ||
5861 | stateid->type = NFS4_LAYOUT_STATEID_TYPE; | ||
5862 | return decode_stateid(xdr, stateid); | ||
5863 | } | ||
5864 | |||
5841 | static int decode_getdeviceinfo(struct xdr_stream *xdr, | 5865 | static int decode_getdeviceinfo(struct xdr_stream *xdr, |
5842 | struct nfs4_getdeviceinfo_res *res) | 5866 | struct nfs4_getdeviceinfo_res *res) |
5843 | { | 5867 | { |
@@ -5919,7 +5943,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
5919 | if (unlikely(!p)) | 5943 | if (unlikely(!p)) |
5920 | goto out_overflow; | 5944 | goto out_overflow; |
5921 | res->return_on_close = be32_to_cpup(p); | 5945 | res->return_on_close = be32_to_cpup(p); |
5922 | decode_stateid(xdr, &res->stateid); | 5946 | decode_layout_stateid(xdr, &res->stateid); |
5923 | p = xdr_inline_decode(xdr, 4); | 5947 | p = xdr_inline_decode(xdr, 4); |
5924 | if (unlikely(!p)) | 5948 | if (unlikely(!p)) |
5925 | goto out_overflow; | 5949 | goto out_overflow; |
@@ -5985,7 +6009,7 @@ static int decode_layoutreturn(struct xdr_stream *xdr, | |||
5985 | goto out_overflow; | 6009 | goto out_overflow; |
5986 | res->lrs_present = be32_to_cpup(p); | 6010 | res->lrs_present = be32_to_cpup(p); |
5987 | if (res->lrs_present) | 6011 | if (res->lrs_present) |
5988 | status = decode_stateid(xdr, &res->stateid); | 6012 | status = decode_layout_stateid(xdr, &res->stateid); |
5989 | return status; | 6013 | return status; |
5990 | out_overflow: | 6014 | out_overflow: |
5991 | print_overflow_msg(__func__, xdr); | 6015 | print_overflow_msg(__func__, xdr); |
@@ -7515,6 +7539,7 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
7515 | PROC(DEALLOCATE, enc_deallocate, dec_deallocate), | 7539 | PROC(DEALLOCATE, enc_deallocate, dec_deallocate), |
7516 | PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), | 7540 | PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), |
7517 | PROC(CLONE, enc_clone, dec_clone), | 7541 | PROC(CLONE, enc_clone, dec_clone), |
7542 | PROC(COPY, enc_copy, dec_copy), | ||
7518 | #endif /* CONFIG_NFS_V4_2 */ | 7543 | #endif /* CONFIG_NFS_V4_2 */ |
7519 | }; | 7544 | }; |
7520 | 7545 | ||
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 1f6db4231057..174dd4cf5747 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -341,8 +341,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page, | |||
341 | * long write-back delay. This will be adjusted in | 341 | * long write-back delay. This will be adjusted in |
342 | * update_nfs_request below if the region is not locked. */ | 342 | * update_nfs_request below if the region is not locked. */ |
343 | req->wb_page = page; | 343 | req->wb_page = page; |
344 | req->wb_index = page_file_index(page); | 344 | if (page) { |
345 | get_page(page); | 345 | req->wb_index = page_file_index(page); |
346 | get_page(page); | ||
347 | } | ||
346 | req->wb_offset = offset; | 348 | req->wb_offset = offset; |
347 | req->wb_pgbase = offset; | 349 | req->wb_pgbase = offset; |
348 | req->wb_bytes = count; | 350 | req->wb_bytes = count; |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 89a5ef4df08a..0c7e0d45a4de 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -270,7 +270,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, | |||
270 | }; | 270 | }; |
271 | 271 | ||
272 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | 272 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); |
273 | return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range); | 273 | return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); |
274 | } | 274 | } |
275 | 275 | ||
276 | static int | 276 | static int |
@@ -308,7 +308,7 @@ pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) | |||
308 | 308 | ||
309 | spin_lock(&inode->i_lock); | 309 | spin_lock(&inode->i_lock); |
310 | pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); | 310 | pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); |
311 | pnfs_mark_matching_lsegs_invalid(lo, &head, &range); | 311 | pnfs_mark_matching_lsegs_invalid(lo, &head, &range, 0); |
312 | spin_unlock(&inode->i_lock); | 312 | spin_unlock(&inode->i_lock); |
313 | pnfs_free_lseg_list(&head); | 313 | pnfs_free_lseg_list(&head); |
314 | dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, | 314 | dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, |
@@ -522,13 +522,35 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, | |||
522 | return rv; | 522 | return rv; |
523 | } | 523 | } |
524 | 524 | ||
525 | /* Returns count of number of matching invalid lsegs remaining in list | 525 | /* |
526 | * after call. | 526 | * Compare 2 layout stateid sequence ids, to see which is newer, |
527 | * taking into account wraparound issues. | ||
528 | */ | ||
529 | static bool pnfs_seqid_is_newer(u32 s1, u32 s2) | ||
530 | { | ||
531 | return (s32)(s1 - s2) > 0; | ||
532 | } | ||
533 | |||
534 | /** | ||
535 | * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later | ||
536 | * @lo: layout header containing the lsegs | ||
537 | * @tmp_list: list head where doomed lsegs should go | ||
538 | * @recall_range: optional recall range argument to match (may be NULL) | ||
539 | * @seq: only invalidate lsegs obtained prior to this sequence (may be 0) | ||
540 | * | ||
541 | * Walk the list of lsegs in the layout header, and tear down any that should | ||
542 | * be destroyed. If "recall_range" is specified then the segment must match | ||
543 | * that range. If "seq" is non-zero, then only match segments that were handed | ||
544 | * out at or before that sequence. | ||
545 | * | ||
546 | * Returns number of matching invalid lsegs remaining in list after scanning | ||
547 | * it and purging them. | ||
527 | */ | 548 | */ |
528 | int | 549 | int |
529 | pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 550 | pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, |
530 | struct list_head *tmp_list, | 551 | struct list_head *tmp_list, |
531 | const struct pnfs_layout_range *recall_range) | 552 | const struct pnfs_layout_range *recall_range, |
553 | u32 seq) | ||
532 | { | 554 | { |
533 | struct pnfs_layout_segment *lseg, *next; | 555 | struct pnfs_layout_segment *lseg, *next; |
534 | int remaining = 0; | 556 | int remaining = 0; |
@@ -540,10 +562,12 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | |||
540 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) | 562 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) |
541 | if (!recall_range || | 563 | if (!recall_range || |
542 | should_free_lseg(&lseg->pls_range, recall_range)) { | 564 | should_free_lseg(&lseg->pls_range, recall_range)) { |
543 | dprintk("%s: freeing lseg %p iomode %d " | 565 | if (seq && pnfs_seqid_is_newer(lseg->pls_seq, seq)) |
566 | continue; | ||
567 | dprintk("%s: freeing lseg %p iomode %d seq %u" | ||
544 | "offset %llu length %llu\n", __func__, | 568 | "offset %llu length %llu\n", __func__, |
545 | lseg, lseg->pls_range.iomode, lseg->pls_range.offset, | 569 | lseg, lseg->pls_range.iomode, lseg->pls_seq, |
546 | lseg->pls_range.length); | 570 | lseg->pls_range.offset, lseg->pls_range.length); |
547 | if (!mark_lseg_invalid(lseg, tmp_list)) | 571 | if (!mark_lseg_invalid(lseg, tmp_list)) |
548 | remaining++; | 572 | remaining++; |
549 | } | 573 | } |
@@ -730,15 +754,6 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) | |||
730 | pnfs_destroy_layouts_byclid(clp, false); | 754 | pnfs_destroy_layouts_byclid(clp, false); |
731 | } | 755 | } |
732 | 756 | ||
733 | /* | ||
734 | * Compare 2 layout stateid sequence ids, to see which is newer, | ||
735 | * taking into account wraparound issues. | ||
736 | */ | ||
737 | static bool pnfs_seqid_is_newer(u32 s1, u32 s2) | ||
738 | { | ||
739 | return (s32)(s1 - s2) > 0; | ||
740 | } | ||
741 | |||
742 | /* update lo->plh_stateid with new if is more recent */ | 757 | /* update lo->plh_stateid with new if is more recent */ |
743 | void | 758 | void |
744 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, | 759 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, |
@@ -781,50 +796,22 @@ pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) | |||
781 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); | 796 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); |
782 | } | 797 | } |
783 | 798 | ||
784 | int | ||
785 | pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | ||
786 | const struct pnfs_layout_range *range, | ||
787 | struct nfs4_state *open_state) | ||
788 | { | ||
789 | int status = 0; | ||
790 | |||
791 | dprintk("--> %s\n", __func__); | ||
792 | spin_lock(&lo->plh_inode->i_lock); | ||
793 | if (pnfs_layoutgets_blocked(lo)) { | ||
794 | status = -EAGAIN; | ||
795 | } else if (!nfs4_valid_open_stateid(open_state)) { | ||
796 | status = -EBADF; | ||
797 | } else if (list_empty(&lo->plh_segs) || | ||
798 | test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { | ||
799 | int seq; | ||
800 | |||
801 | do { | ||
802 | seq = read_seqbegin(&open_state->seqlock); | ||
803 | nfs4_stateid_copy(dst, &open_state->stateid); | ||
804 | } while (read_seqretry(&open_state->seqlock, seq)); | ||
805 | } else | ||
806 | nfs4_stateid_copy(dst, &lo->plh_stateid); | ||
807 | spin_unlock(&lo->plh_inode->i_lock); | ||
808 | dprintk("<-- %s\n", __func__); | ||
809 | return status; | ||
810 | } | ||
811 | |||
812 | /* | 799 | /* |
813 | * Get layout from server. | 800 | * Get layout from server. |
814 | * for now, assume that whole file layouts are requested. | 801 | * for now, assume that whole file layouts are requested. |
815 | * arg->offset: 0 | 802 | * arg->offset: 0 |
816 | * arg->length: all ones | 803 | * arg->length: all ones |
817 | */ | 804 | */ |
818 | static struct pnfs_layout_segment * | 805 | static struct pnfs_layout_segment * |
819 | send_layoutget(struct pnfs_layout_hdr *lo, | 806 | send_layoutget(struct pnfs_layout_hdr *lo, |
820 | struct nfs_open_context *ctx, | 807 | struct nfs_open_context *ctx, |
808 | nfs4_stateid *stateid, | ||
821 | const struct pnfs_layout_range *range, | 809 | const struct pnfs_layout_range *range, |
822 | gfp_t gfp_flags) | 810 | long *timeout, gfp_t gfp_flags) |
823 | { | 811 | { |
824 | struct inode *ino = lo->plh_inode; | 812 | struct inode *ino = lo->plh_inode; |
825 | struct nfs_server *server = NFS_SERVER(ino); | 813 | struct nfs_server *server = NFS_SERVER(ino); |
826 | struct nfs4_layoutget *lgp; | 814 | struct nfs4_layoutget *lgp; |
827 | struct pnfs_layout_segment *lseg; | ||
828 | loff_t i_size; | 815 | loff_t i_size; |
829 | 816 | ||
830 | dprintk("--> %s\n", __func__); | 817 | dprintk("--> %s\n", __func__); |
@@ -834,40 +821,31 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
834 | * store in lseg. If we race with a concurrent seqid morphing | 821 | * store in lseg. If we race with a concurrent seqid morphing |
835 | * op, then re-send the LAYOUTGET. | 822 | * op, then re-send the LAYOUTGET. |
836 | */ | 823 | */ |
837 | do { | 824 | lgp = kzalloc(sizeof(*lgp), gfp_flags); |
838 | lgp = kzalloc(sizeof(*lgp), gfp_flags); | 825 | if (lgp == NULL) |
839 | if (lgp == NULL) | 826 | return ERR_PTR(-ENOMEM); |
840 | return NULL; | ||
841 | |||
842 | i_size = i_size_read(ino); | ||
843 | |||
844 | lgp->args.minlength = PAGE_SIZE; | ||
845 | if (lgp->args.minlength > range->length) | ||
846 | lgp->args.minlength = range->length; | ||
847 | if (range->iomode == IOMODE_READ) { | ||
848 | if (range->offset >= i_size) | ||
849 | lgp->args.minlength = 0; | ||
850 | else if (i_size - range->offset < lgp->args.minlength) | ||
851 | lgp->args.minlength = i_size - range->offset; | ||
852 | } | ||
853 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | ||
854 | pnfs_copy_range(&lgp->args.range, range); | ||
855 | lgp->args.type = server->pnfs_curr_ld->id; | ||
856 | lgp->args.inode = ino; | ||
857 | lgp->args.ctx = get_nfs_open_context(ctx); | ||
858 | lgp->gfp_flags = gfp_flags; | ||
859 | lgp->cred = lo->plh_lc_cred; | ||
860 | |||
861 | lseg = nfs4_proc_layoutget(lgp, gfp_flags); | ||
862 | } while (lseg == ERR_PTR(-EAGAIN)); | ||
863 | |||
864 | if (IS_ERR(lseg) && !nfs_error_is_fatal(PTR_ERR(lseg))) | ||
865 | lseg = NULL; | ||
866 | else | ||
867 | pnfs_layout_clear_fail_bit(lo, | ||
868 | pnfs_iomode_to_fail_bit(range->iomode)); | ||
869 | 827 | ||
870 | return lseg; | 828 | i_size = i_size_read(ino); |
829 | |||
830 | lgp->args.minlength = PAGE_SIZE; | ||
831 | if (lgp->args.minlength > range->length) | ||
832 | lgp->args.minlength = range->length; | ||
833 | if (range->iomode == IOMODE_READ) { | ||
834 | if (range->offset >= i_size) | ||
835 | lgp->args.minlength = 0; | ||
836 | else if (i_size - range->offset < lgp->args.minlength) | ||
837 | lgp->args.minlength = i_size - range->offset; | ||
838 | } | ||
839 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | ||
840 | pnfs_copy_range(&lgp->args.range, range); | ||
841 | lgp->args.type = server->pnfs_curr_ld->id; | ||
842 | lgp->args.inode = ino; | ||
843 | lgp->args.ctx = get_nfs_open_context(ctx); | ||
844 | nfs4_stateid_copy(&lgp->args.stateid, stateid); | ||
845 | lgp->gfp_flags = gfp_flags; | ||
846 | lgp->cred = lo->plh_lc_cred; | ||
847 | |||
848 | return nfs4_proc_layoutget(lgp, timeout, gfp_flags); | ||
871 | } | 849 | } |
872 | 850 | ||
873 | static void pnfs_clear_layoutcommit(struct inode *inode, | 851 | static void pnfs_clear_layoutcommit(struct inode *inode, |
@@ -899,6 +877,7 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo) | |||
899 | if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) | 877 | if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) |
900 | return false; | 878 | return false; |
901 | lo->plh_return_iomode = 0; | 879 | lo->plh_return_iomode = 0; |
880 | lo->plh_return_seq = 0; | ||
902 | pnfs_get_layout_hdr(lo); | 881 | pnfs_get_layout_hdr(lo); |
903 | clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); | 882 | clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); |
904 | return true; | 883 | return true; |
@@ -969,6 +948,7 @@ static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) | |||
969 | bool send; | 948 | bool send; |
970 | 949 | ||
971 | nfs4_stateid_copy(&stateid, &lo->plh_stateid); | 950 | nfs4_stateid_copy(&stateid, &lo->plh_stateid); |
951 | stateid.seqid = cpu_to_be32(lo->plh_return_seq); | ||
972 | iomode = lo->plh_return_iomode; | 952 | iomode = lo->plh_return_iomode; |
973 | send = pnfs_prepare_layoutreturn(lo); | 953 | send = pnfs_prepare_layoutreturn(lo); |
974 | spin_unlock(&inode->i_lock); | 954 | spin_unlock(&inode->i_lock); |
@@ -1012,7 +992,7 @@ _pnfs_return_layout(struct inode *ino) | |||
1012 | pnfs_get_layout_hdr(lo); | 992 | pnfs_get_layout_hdr(lo); |
1013 | empty = list_empty(&lo->plh_segs); | 993 | empty = list_empty(&lo->plh_segs); |
1014 | pnfs_clear_layoutcommit(ino, &tmp_list); | 994 | pnfs_clear_layoutcommit(ino, &tmp_list); |
1015 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); | 995 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); |
1016 | 996 | ||
1017 | if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { | 997 | if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { |
1018 | struct pnfs_layout_range range = { | 998 | struct pnfs_layout_range range = { |
@@ -1341,23 +1321,28 @@ out_existing: | |||
1341 | 1321 | ||
1342 | /* | 1322 | /* |
1343 | * iomode matching rules: | 1323 | * iomode matching rules: |
1344 | * iomode lseg match | 1324 | * iomode lseg strict match |
1345 | * ----- ----- ----- | 1325 | * iomode |
1346 | * ANY READ true | 1326 | * ----- ----- ------ ----- |
1347 | * ANY RW true | 1327 | * ANY READ N/A true |
1348 | * RW READ false | 1328 | * ANY RW N/A true |
1349 | * RW RW true | 1329 | * RW READ N/A false |
1350 | * READ READ true | 1330 | * RW RW N/A true |
1351 | * READ RW true | 1331 | * READ READ N/A true |
1332 | * READ RW true false | ||
1333 | * READ RW false true | ||
1352 | */ | 1334 | */ |
1353 | static bool | 1335 | static bool |
1354 | pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, | 1336 | pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, |
1355 | const struct pnfs_layout_range *range) | 1337 | const struct pnfs_layout_range *range, |
1338 | bool strict_iomode) | ||
1356 | { | 1339 | { |
1357 | struct pnfs_layout_range range1; | 1340 | struct pnfs_layout_range range1; |
1358 | 1341 | ||
1359 | if ((range->iomode == IOMODE_RW && | 1342 | if ((range->iomode == IOMODE_RW && |
1360 | ls_range->iomode != IOMODE_RW) || | 1343 | ls_range->iomode != IOMODE_RW) || |
1344 | (range->iomode != ls_range->iomode && | ||
1345 | strict_iomode == true) || | ||
1361 | !pnfs_lseg_range_intersecting(ls_range, range)) | 1346 | !pnfs_lseg_range_intersecting(ls_range, range)) |
1362 | return 0; | 1347 | return 0; |
1363 | 1348 | ||
@@ -1372,7 +1357,8 @@ pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, | |||
1372 | */ | 1357 | */ |
1373 | static struct pnfs_layout_segment * | 1358 | static struct pnfs_layout_segment * |
1374 | pnfs_find_lseg(struct pnfs_layout_hdr *lo, | 1359 | pnfs_find_lseg(struct pnfs_layout_hdr *lo, |
1375 | struct pnfs_layout_range *range) | 1360 | struct pnfs_layout_range *range, |
1361 | bool strict_iomode) | ||
1376 | { | 1362 | { |
1377 | struct pnfs_layout_segment *lseg, *ret = NULL; | 1363 | struct pnfs_layout_segment *lseg, *ret = NULL; |
1378 | 1364 | ||
@@ -1381,7 +1367,8 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, | |||
1381 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { | 1367 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { |
1382 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && | 1368 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && |
1383 | !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && | 1369 | !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && |
1384 | pnfs_lseg_range_match(&lseg->pls_range, range)) { | 1370 | pnfs_lseg_range_match(&lseg->pls_range, range, |
1371 | strict_iomode)) { | ||
1385 | ret = pnfs_get_lseg(lseg); | 1372 | ret = pnfs_get_lseg(lseg); |
1386 | break; | 1373 | break; |
1387 | } | 1374 | } |
@@ -1498,6 +1485,7 @@ pnfs_update_layout(struct inode *ino, | |||
1498 | loff_t pos, | 1485 | loff_t pos, |
1499 | u64 count, | 1486 | u64 count, |
1500 | enum pnfs_iomode iomode, | 1487 | enum pnfs_iomode iomode, |
1488 | bool strict_iomode, | ||
1501 | gfp_t gfp_flags) | 1489 | gfp_t gfp_flags) |
1502 | { | 1490 | { |
1503 | struct pnfs_layout_range arg = { | 1491 | struct pnfs_layout_range arg = { |
@@ -1505,27 +1493,30 @@ pnfs_update_layout(struct inode *ino, | |||
1505 | .offset = pos, | 1493 | .offset = pos, |
1506 | .length = count, | 1494 | .length = count, |
1507 | }; | 1495 | }; |
1508 | unsigned pg_offset; | 1496 | unsigned pg_offset, seq; |
1509 | struct nfs_server *server = NFS_SERVER(ino); | 1497 | struct nfs_server *server = NFS_SERVER(ino); |
1510 | struct nfs_client *clp = server->nfs_client; | 1498 | struct nfs_client *clp = server->nfs_client; |
1511 | struct pnfs_layout_hdr *lo; | 1499 | struct pnfs_layout_hdr *lo = NULL; |
1512 | struct pnfs_layout_segment *lseg = NULL; | 1500 | struct pnfs_layout_segment *lseg = NULL; |
1501 | nfs4_stateid stateid; | ||
1502 | long timeout = 0; | ||
1503 | unsigned long giveup = jiffies + rpc_get_timeout(server->client); | ||
1513 | bool first; | 1504 | bool first; |
1514 | 1505 | ||
1515 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) { | 1506 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) { |
1516 | trace_pnfs_update_layout(ino, pos, count, iomode, NULL, | 1507 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
1517 | PNFS_UPDATE_LAYOUT_NO_PNFS); | 1508 | PNFS_UPDATE_LAYOUT_NO_PNFS); |
1518 | goto out; | 1509 | goto out; |
1519 | } | 1510 | } |
1520 | 1511 | ||
1521 | if (iomode == IOMODE_READ && i_size_read(ino) == 0) { | 1512 | if (iomode == IOMODE_READ && i_size_read(ino) == 0) { |
1522 | trace_pnfs_update_layout(ino, pos, count, iomode, NULL, | 1513 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
1523 | PNFS_UPDATE_LAYOUT_RD_ZEROLEN); | 1514 | PNFS_UPDATE_LAYOUT_RD_ZEROLEN); |
1524 | goto out; | 1515 | goto out; |
1525 | } | 1516 | } |
1526 | 1517 | ||
1527 | if (pnfs_within_mdsthreshold(ctx, ino, iomode)) { | 1518 | if (pnfs_within_mdsthreshold(ctx, ino, iomode)) { |
1528 | trace_pnfs_update_layout(ino, pos, count, iomode, NULL, | 1519 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
1529 | PNFS_UPDATE_LAYOUT_MDSTHRESH); | 1520 | PNFS_UPDATE_LAYOUT_MDSTHRESH); |
1530 | goto out; | 1521 | goto out; |
1531 | } | 1522 | } |
@@ -1536,14 +1527,14 @@ lookup_again: | |||
1536 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); | 1527 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); |
1537 | if (lo == NULL) { | 1528 | if (lo == NULL) { |
1538 | spin_unlock(&ino->i_lock); | 1529 | spin_unlock(&ino->i_lock); |
1539 | trace_pnfs_update_layout(ino, pos, count, iomode, NULL, | 1530 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
1540 | PNFS_UPDATE_LAYOUT_NOMEM); | 1531 | PNFS_UPDATE_LAYOUT_NOMEM); |
1541 | goto out; | 1532 | goto out; |
1542 | } | 1533 | } |
1543 | 1534 | ||
1544 | /* Do we even need to bother with this? */ | 1535 | /* Do we even need to bother with this? */ |
1545 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | 1536 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { |
1546 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, | 1537 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
1547 | PNFS_UPDATE_LAYOUT_BULK_RECALL); | 1538 | PNFS_UPDATE_LAYOUT_BULK_RECALL); |
1548 | dprintk("%s matches recall, use MDS\n", __func__); | 1539 | dprintk("%s matches recall, use MDS\n", __func__); |
1549 | goto out_unlock; | 1540 | goto out_unlock; |
@@ -1551,14 +1542,34 @@ lookup_again: | |||
1551 | 1542 | ||
1552 | /* if LAYOUTGET already failed once we don't try again */ | 1543 | /* if LAYOUTGET already failed once we don't try again */ |
1553 | if (pnfs_layout_io_test_failed(lo, iomode)) { | 1544 | if (pnfs_layout_io_test_failed(lo, iomode)) { |
1554 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, | 1545 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
1555 | PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); | 1546 | PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); |
1556 | goto out_unlock; | 1547 | goto out_unlock; |
1557 | } | 1548 | } |
1558 | 1549 | ||
1559 | first = list_empty(&lo->plh_segs); | 1550 | lseg = pnfs_find_lseg(lo, &arg, strict_iomode); |
1560 | if (first) { | 1551 | if (lseg) { |
1561 | /* The first layoutget for the file. Need to serialize per | 1552 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
1553 | PNFS_UPDATE_LAYOUT_FOUND_CACHED); | ||
1554 | goto out_unlock; | ||
1555 | } | ||
1556 | |||
1557 | if (!nfs4_valid_open_stateid(ctx->state)) { | ||
1558 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, | ||
1559 | PNFS_UPDATE_LAYOUT_INVALID_OPEN); | ||
1560 | goto out_unlock; | ||
1561 | } | ||
1562 | |||
1563 | /* | ||
1564 | * Choose a stateid for the LAYOUTGET. If we don't have a layout | ||
1565 | * stateid, or it has been invalidated, then we must use the open | ||
1566 | * stateid. | ||
1567 | */ | ||
1568 | if (lo->plh_stateid.seqid == 0 || | ||
1569 | test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { | ||
1570 | |||
1571 | /* | ||
1572 | * The first layoutget for the file. Need to serialize per | ||
1562 | * RFC 5661 Errata 3208. | 1573 | * RFC 5661 Errata 3208. |
1563 | */ | 1574 | */ |
1564 | if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, | 1575 | if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, |
@@ -1567,18 +1578,17 @@ lookup_again: | |||
1567 | wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, | 1578 | wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, |
1568 | TASK_UNINTERRUPTIBLE); | 1579 | TASK_UNINTERRUPTIBLE); |
1569 | pnfs_put_layout_hdr(lo); | 1580 | pnfs_put_layout_hdr(lo); |
1581 | dprintk("%s retrying\n", __func__); | ||
1570 | goto lookup_again; | 1582 | goto lookup_again; |
1571 | } | 1583 | } |
1584 | |||
1585 | first = true; | ||
1586 | do { | ||
1587 | seq = read_seqbegin(&ctx->state->seqlock); | ||
1588 | nfs4_stateid_copy(&stateid, &ctx->state->stateid); | ||
1589 | } while (read_seqretry(&ctx->state->seqlock, seq)); | ||
1572 | } else { | 1590 | } else { |
1573 | /* Check to see if the layout for the given range | 1591 | nfs4_stateid_copy(&stateid, &lo->plh_stateid); |
1574 | * already exists | ||
1575 | */ | ||
1576 | lseg = pnfs_find_lseg(lo, &arg); | ||
1577 | if (lseg) { | ||
1578 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, | ||
1579 | PNFS_UPDATE_LAYOUT_FOUND_CACHED); | ||
1580 | goto out_unlock; | ||
1581 | } | ||
1582 | } | 1592 | } |
1583 | 1593 | ||
1584 | /* | 1594 | /* |
@@ -1593,15 +1603,17 @@ lookup_again: | |||
1593 | pnfs_clear_first_layoutget(lo); | 1603 | pnfs_clear_first_layoutget(lo); |
1594 | pnfs_put_layout_hdr(lo); | 1604 | pnfs_put_layout_hdr(lo); |
1595 | dprintk("%s retrying\n", __func__); | 1605 | dprintk("%s retrying\n", __func__); |
1606 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, | ||
1607 | lseg, PNFS_UPDATE_LAYOUT_RETRY); | ||
1596 | goto lookup_again; | 1608 | goto lookup_again; |
1597 | } | 1609 | } |
1598 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, | 1610 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
1599 | PNFS_UPDATE_LAYOUT_RETURN); | 1611 | PNFS_UPDATE_LAYOUT_RETURN); |
1600 | goto out_put_layout_hdr; | 1612 | goto out_put_layout_hdr; |
1601 | } | 1613 | } |
1602 | 1614 | ||
1603 | if (pnfs_layoutgets_blocked(lo)) { | 1615 | if (pnfs_layoutgets_blocked(lo)) { |
1604 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, | 1616 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
1605 | PNFS_UPDATE_LAYOUT_BLOCKED); | 1617 | PNFS_UPDATE_LAYOUT_BLOCKED); |
1606 | goto out_unlock; | 1618 | goto out_unlock; |
1607 | } | 1619 | } |
@@ -1626,10 +1638,36 @@ lookup_again: | |||
1626 | if (arg.length != NFS4_MAX_UINT64) | 1638 | if (arg.length != NFS4_MAX_UINT64) |
1627 | arg.length = PAGE_ALIGN(arg.length); | 1639 | arg.length = PAGE_ALIGN(arg.length); |
1628 | 1640 | ||
1629 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); | 1641 | lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags); |
1630 | atomic_dec(&lo->plh_outstanding); | 1642 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, |
1631 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, | ||
1632 | PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); | 1643 | PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); |
1644 | if (IS_ERR(lseg)) { | ||
1645 | switch(PTR_ERR(lseg)) { | ||
1646 | case -ERECALLCONFLICT: | ||
1647 | if (time_after(jiffies, giveup)) | ||
1648 | lseg = NULL; | ||
1649 | /* Fallthrough */ | ||
1650 | case -EAGAIN: | ||
1651 | pnfs_put_layout_hdr(lo); | ||
1652 | if (first) | ||
1653 | pnfs_clear_first_layoutget(lo); | ||
1654 | if (lseg) { | ||
1655 | trace_pnfs_update_layout(ino, pos, count, | ||
1656 | iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); | ||
1657 | goto lookup_again; | ||
1658 | } | ||
1659 | /* Fallthrough */ | ||
1660 | default: | ||
1661 | if (!nfs_error_is_fatal(PTR_ERR(lseg))) { | ||
1662 | pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); | ||
1663 | lseg = NULL; | ||
1664 | } | ||
1665 | } | ||
1666 | } else { | ||
1667 | pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); | ||
1668 | } | ||
1669 | |||
1670 | atomic_dec(&lo->plh_outstanding); | ||
1633 | out_put_layout_hdr: | 1671 | out_put_layout_hdr: |
1634 | if (first) | 1672 | if (first) |
1635 | pnfs_clear_first_layoutget(lo); | 1673 | pnfs_clear_first_layoutget(lo); |
@@ -1678,38 +1716,36 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1678 | struct pnfs_layout_segment *lseg; | 1716 | struct pnfs_layout_segment *lseg; |
1679 | struct inode *ino = lo->plh_inode; | 1717 | struct inode *ino = lo->plh_inode; |
1680 | LIST_HEAD(free_me); | 1718 | LIST_HEAD(free_me); |
1681 | int status = -EINVAL; | ||
1682 | 1719 | ||
1683 | if (!pnfs_sanity_check_layout_range(&res->range)) | 1720 | if (!pnfs_sanity_check_layout_range(&res->range)) |
1684 | goto out; | 1721 | return ERR_PTR(-EINVAL); |
1685 | 1722 | ||
1686 | /* Inject layout blob into I/O device driver */ | 1723 | /* Inject layout blob into I/O device driver */ |
1687 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); | 1724 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); |
1688 | if (!lseg || IS_ERR(lseg)) { | 1725 | if (IS_ERR_OR_NULL(lseg)) { |
1689 | if (!lseg) | 1726 | if (!lseg) |
1690 | status = -ENOMEM; | 1727 | lseg = ERR_PTR(-ENOMEM); |
1691 | else | 1728 | |
1692 | status = PTR_ERR(lseg); | 1729 | dprintk("%s: Could not allocate layout: error %ld\n", |
1693 | dprintk("%s: Could not allocate layout: error %d\n", | 1730 | __func__, PTR_ERR(lseg)); |
1694 | __func__, status); | 1731 | return lseg; |
1695 | goto out; | ||
1696 | } | 1732 | } |
1697 | 1733 | ||
1698 | init_lseg(lo, lseg); | 1734 | init_lseg(lo, lseg); |
1699 | lseg->pls_range = res->range; | 1735 | lseg->pls_range = res->range; |
1736 | lseg->pls_seq = be32_to_cpu(res->stateid.seqid); | ||
1700 | 1737 | ||
1701 | spin_lock(&ino->i_lock); | 1738 | spin_lock(&ino->i_lock); |
1702 | if (pnfs_layoutgets_blocked(lo)) { | 1739 | if (pnfs_layoutgets_blocked(lo)) { |
1703 | dprintk("%s forget reply due to state\n", __func__); | 1740 | dprintk("%s forget reply due to state\n", __func__); |
1704 | goto out_forget_reply; | 1741 | goto out_forget; |
1705 | } | 1742 | } |
1706 | 1743 | ||
1707 | if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { | 1744 | if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { |
1708 | /* existing state ID, make sure the sequence number matches. */ | 1745 | /* existing state ID, make sure the sequence number matches. */ |
1709 | if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { | 1746 | if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { |
1710 | dprintk("%s forget reply due to sequence\n", __func__); | 1747 | dprintk("%s forget reply due to sequence\n", __func__); |
1711 | status = -EAGAIN; | 1748 | goto out_forget; |
1712 | goto out_forget_reply; | ||
1713 | } | 1749 | } |
1714 | pnfs_set_layout_stateid(lo, &res->stateid, false); | 1750 | pnfs_set_layout_stateid(lo, &res->stateid, false); |
1715 | } else { | 1751 | } else { |
@@ -1718,7 +1754,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1718 | * inode invalid, and don't bother validating the stateid | 1754 | * inode invalid, and don't bother validating the stateid |
1719 | * sequence number. | 1755 | * sequence number. |
1720 | */ | 1756 | */ |
1721 | pnfs_mark_matching_lsegs_invalid(lo, &free_me, NULL); | 1757 | pnfs_mark_matching_lsegs_invalid(lo, &free_me, NULL, 0); |
1722 | 1758 | ||
1723 | nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); | 1759 | nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); |
1724 | lo->plh_barrier = be32_to_cpu(res->stateid.seqid); | 1760 | lo->plh_barrier = be32_to_cpu(res->stateid.seqid); |
@@ -1735,18 +1771,17 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1735 | spin_unlock(&ino->i_lock); | 1771 | spin_unlock(&ino->i_lock); |
1736 | pnfs_free_lseg_list(&free_me); | 1772 | pnfs_free_lseg_list(&free_me); |
1737 | return lseg; | 1773 | return lseg; |
1738 | out: | ||
1739 | return ERR_PTR(status); | ||
1740 | 1774 | ||
1741 | out_forget_reply: | 1775 | out_forget: |
1742 | spin_unlock(&ino->i_lock); | 1776 | spin_unlock(&ino->i_lock); |
1743 | lseg->pls_layout = lo; | 1777 | lseg->pls_layout = lo; |
1744 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); | 1778 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); |
1745 | goto out; | 1779 | return ERR_PTR(-EAGAIN); |
1746 | } | 1780 | } |
1747 | 1781 | ||
1748 | static void | 1782 | static void |
1749 | pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode) | 1783 | pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, |
1784 | u32 seq) | ||
1750 | { | 1785 | { |
1751 | if (lo->plh_return_iomode == iomode) | 1786 | if (lo->plh_return_iomode == iomode) |
1752 | return; | 1787 | return; |
@@ -1754,6 +1789,8 @@ pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode) | |||
1754 | iomode = IOMODE_ANY; | 1789 | iomode = IOMODE_ANY; |
1755 | lo->plh_return_iomode = iomode; | 1790 | lo->plh_return_iomode = iomode; |
1756 | set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); | 1791 | set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); |
1792 | if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq)) | ||
1793 | lo->plh_return_seq = seq; | ||
1757 | } | 1794 | } |
1758 | 1795 | ||
1759 | /** | 1796 | /** |
@@ -1769,7 +1806,8 @@ pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode) | |||
1769 | int | 1806 | int |
1770 | pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, | 1807 | pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, |
1771 | struct list_head *tmp_list, | 1808 | struct list_head *tmp_list, |
1772 | const struct pnfs_layout_range *return_range) | 1809 | const struct pnfs_layout_range *return_range, |
1810 | u32 seq) | ||
1773 | { | 1811 | { |
1774 | struct pnfs_layout_segment *lseg, *next; | 1812 | struct pnfs_layout_segment *lseg, *next; |
1775 | int remaining = 0; | 1813 | int remaining = 0; |
@@ -1792,8 +1830,11 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, | |||
1792 | continue; | 1830 | continue; |
1793 | remaining++; | 1831 | remaining++; |
1794 | set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); | 1832 | set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); |
1795 | pnfs_set_plh_return_iomode(lo, return_range->iomode); | ||
1796 | } | 1833 | } |
1834 | |||
1835 | if (remaining) | ||
1836 | pnfs_set_plh_return_info(lo, return_range->iomode, seq); | ||
1837 | |||
1797 | return remaining; | 1838 | return remaining; |
1798 | } | 1839 | } |
1799 | 1840 | ||
@@ -1810,13 +1851,14 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, | |||
1810 | bool return_now = false; | 1851 | bool return_now = false; |
1811 | 1852 | ||
1812 | spin_lock(&inode->i_lock); | 1853 | spin_lock(&inode->i_lock); |
1813 | pnfs_set_plh_return_iomode(lo, range.iomode); | 1854 | pnfs_set_plh_return_info(lo, range.iomode, lseg->pls_seq); |
1814 | /* | 1855 | /* |
1815 | * mark all matching lsegs so that we are sure to have no live | 1856 | * mark all matching lsegs so that we are sure to have no live |
1816 | * segments at hand when sending layoutreturn. See pnfs_put_lseg() | 1857 | * segments at hand when sending layoutreturn. See pnfs_put_lseg() |
1817 | * for how it works. | 1858 | * for how it works. |
1818 | */ | 1859 | */ |
1819 | if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range)) { | 1860 | if (!pnfs_mark_matching_lsegs_return(lo, &free_me, |
1861 | &range, lseg->pls_seq)) { | ||
1820 | nfs4_stateid stateid; | 1862 | nfs4_stateid stateid; |
1821 | enum pnfs_iomode iomode = lo->plh_return_iomode; | 1863 | enum pnfs_iomode iomode = lo->plh_return_iomode; |
1822 | 1864 | ||
@@ -1849,6 +1891,7 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r | |||
1849 | req_offset(req), | 1891 | req_offset(req), |
1850 | rd_size, | 1892 | rd_size, |
1851 | IOMODE_READ, | 1893 | IOMODE_READ, |
1894 | false, | ||
1852 | GFP_KERNEL); | 1895 | GFP_KERNEL); |
1853 | if (IS_ERR(pgio->pg_lseg)) { | 1896 | if (IS_ERR(pgio->pg_lseg)) { |
1854 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | 1897 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); |
@@ -1873,6 +1916,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
1873 | req_offset(req), | 1916 | req_offset(req), |
1874 | wb_size, | 1917 | wb_size, |
1875 | IOMODE_RW, | 1918 | IOMODE_RW, |
1919 | false, | ||
1876 | GFP_NOFS); | 1920 | GFP_NOFS); |
1877 | if (IS_ERR(pgio->pg_lseg)) { | 1921 | if (IS_ERR(pgio->pg_lseg)) { |
1878 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | 1922 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); |
@@ -2143,12 +2187,15 @@ pnfs_try_to_read_data(struct nfs_pgio_header *hdr, | |||
2143 | } | 2187 | } |
2144 | 2188 | ||
2145 | /* Resend all requests through pnfs. */ | 2189 | /* Resend all requests through pnfs. */ |
2146 | int pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) | 2190 | void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) |
2147 | { | 2191 | { |
2148 | struct nfs_pageio_descriptor pgio; | 2192 | struct nfs_pageio_descriptor pgio; |
2149 | 2193 | ||
2150 | nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); | 2194 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { |
2151 | return nfs_pageio_resend(&pgio, hdr); | 2195 | nfs_pageio_init_read(&pgio, hdr->inode, false, |
2196 | hdr->completion_ops); | ||
2197 | hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); | ||
2198 | } | ||
2152 | } | 2199 | } |
2153 | EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); | 2200 | EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); |
2154 | 2201 | ||
@@ -2158,12 +2205,11 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) | |||
2158 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | 2205 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; |
2159 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 2206 | struct pnfs_layout_segment *lseg = desc->pg_lseg; |
2160 | enum pnfs_try_status trypnfs; | 2207 | enum pnfs_try_status trypnfs; |
2161 | int err = 0; | ||
2162 | 2208 | ||
2163 | trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); | 2209 | trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); |
2164 | if (trypnfs == PNFS_TRY_AGAIN) | 2210 | if (trypnfs == PNFS_TRY_AGAIN) |
2165 | err = pnfs_read_resend_pnfs(hdr); | 2211 | pnfs_read_resend_pnfs(hdr); |
2166 | if (trypnfs == PNFS_NOT_ATTEMPTED || err) | 2212 | if (trypnfs == PNFS_NOT_ATTEMPTED || hdr->task.tk_status) |
2167 | pnfs_read_through_mds(desc, hdr); | 2213 | pnfs_read_through_mds(desc, hdr); |
2168 | } | 2214 | } |
2169 | 2215 | ||
@@ -2405,7 +2451,7 @@ pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags) | |||
2405 | spin_lock(&inode->i_lock); | 2451 | spin_lock(&inode->i_lock); |
2406 | if (!NFS_I(inode)->layout) { | 2452 | if (!NFS_I(inode)->layout) { |
2407 | spin_unlock(&inode->i_lock); | 2453 | spin_unlock(&inode->i_lock); |
2408 | goto out; | 2454 | goto out_clear_layoutstats; |
2409 | } | 2455 | } |
2410 | hdr = NFS_I(inode)->layout; | 2456 | hdr = NFS_I(inode)->layout; |
2411 | pnfs_get_layout_hdr(hdr); | 2457 | pnfs_get_layout_hdr(hdr); |
@@ -2434,6 +2480,7 @@ out_free: | |||
2434 | kfree(data); | 2480 | kfree(data); |
2435 | out_put: | 2481 | out_put: |
2436 | pnfs_put_layout_hdr(hdr); | 2482 | pnfs_put_layout_hdr(hdr); |
2483 | out_clear_layoutstats: | ||
2437 | smp_mb__before_atomic(); | 2484 | smp_mb__before_atomic(); |
2438 | clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); | 2485 | clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); |
2439 | smp_mb__after_atomic(); | 2486 | smp_mb__after_atomic(); |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1ac1db5f6dad..b21bd0bee784 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -64,6 +64,7 @@ struct pnfs_layout_segment { | |||
64 | struct list_head pls_lc_list; | 64 | struct list_head pls_lc_list; |
65 | struct pnfs_layout_range pls_range; | 65 | struct pnfs_layout_range pls_range; |
66 | atomic_t pls_refcount; | 66 | atomic_t pls_refcount; |
67 | u32 pls_seq; | ||
67 | unsigned long pls_flags; | 68 | unsigned long pls_flags; |
68 | struct pnfs_layout_hdr *pls_layout; | 69 | struct pnfs_layout_hdr *pls_layout; |
69 | struct work_struct pls_work; | 70 | struct work_struct pls_work; |
@@ -194,6 +195,7 @@ struct pnfs_layout_hdr { | |||
194 | unsigned long plh_flags; | 195 | unsigned long plh_flags; |
195 | nfs4_stateid plh_stateid; | 196 | nfs4_stateid plh_stateid; |
196 | u32 plh_barrier; /* ignore lower seqids */ | 197 | u32 plh_barrier; /* ignore lower seqids */ |
198 | u32 plh_return_seq; | ||
197 | enum pnfs_iomode plh_return_iomode; | 199 | enum pnfs_iomode plh_return_iomode; |
198 | loff_t plh_lwb; /* last write byte for layoutcommit */ | 200 | loff_t plh_lwb; /* last write byte for layoutcommit */ |
199 | struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ | 201 | struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ |
@@ -226,7 +228,7 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | |||
226 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | 228 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
227 | struct pnfs_device *dev, | 229 | struct pnfs_device *dev, |
228 | struct rpc_cred *cred); | 230 | struct rpc_cred *cred); |
229 | extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); | 231 | extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags); |
230 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); | 232 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); |
231 | 233 | ||
232 | /* pnfs.c */ | 234 | /* pnfs.c */ |
@@ -258,16 +260,14 @@ void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); | |||
258 | void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, | 260 | void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, |
259 | const nfs4_stateid *new, | 261 | const nfs4_stateid *new, |
260 | bool update_barrier); | 262 | bool update_barrier); |
261 | int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, | ||
262 | struct pnfs_layout_hdr *lo, | ||
263 | const struct pnfs_layout_range *range, | ||
264 | struct nfs4_state *open_state); | ||
265 | int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 263 | int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, |
266 | struct list_head *tmp_list, | 264 | struct list_head *tmp_list, |
267 | const struct pnfs_layout_range *recall_range); | 265 | const struct pnfs_layout_range *recall_range, |
266 | u32 seq); | ||
268 | int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, | 267 | int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, |
269 | struct list_head *tmp_list, | 268 | struct list_head *tmp_list, |
270 | const struct pnfs_layout_range *recall_range); | 269 | const struct pnfs_layout_range *recall_range, |
270 | u32 seq); | ||
271 | bool pnfs_roc(struct inode *ino); | 271 | bool pnfs_roc(struct inode *ino); |
272 | void pnfs_roc_release(struct inode *ino); | 272 | void pnfs_roc_release(struct inode *ino); |
273 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 273 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
@@ -282,12 +282,13 @@ int _pnfs_return_layout(struct inode *); | |||
282 | int pnfs_commit_and_return_layout(struct inode *); | 282 | int pnfs_commit_and_return_layout(struct inode *); |
283 | void pnfs_ld_write_done(struct nfs_pgio_header *); | 283 | void pnfs_ld_write_done(struct nfs_pgio_header *); |
284 | void pnfs_ld_read_done(struct nfs_pgio_header *); | 284 | void pnfs_ld_read_done(struct nfs_pgio_header *); |
285 | int pnfs_read_resend_pnfs(struct nfs_pgio_header *); | 285 | void pnfs_read_resend_pnfs(struct nfs_pgio_header *); |
286 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, | 286 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, |
287 | struct nfs_open_context *ctx, | 287 | struct nfs_open_context *ctx, |
288 | loff_t pos, | 288 | loff_t pos, |
289 | u64 count, | 289 | u64 count, |
290 | enum pnfs_iomode iomode, | 290 | enum pnfs_iomode iomode, |
291 | bool strict_iomode, | ||
291 | gfp_t gfp_flags); | 292 | gfp_t gfp_flags); |
292 | void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo); | 293 | void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo); |
293 | 294 | ||
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 4aaed890048f..0dfc476da3e1 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c | |||
@@ -61,7 +61,7 @@ EXPORT_SYMBOL_GPL(pnfs_generic_commit_release); | |||
61 | 61 | ||
62 | /* The generic layer is about to remove the req from the commit list. | 62 | /* The generic layer is about to remove the req from the commit list. |
63 | * If this will make the bucket empty, it will need to put the lseg reference. | 63 | * If this will make the bucket empty, it will need to put the lseg reference. |
64 | * Note this must be called holding the inode (/cinfo) lock | 64 | * Note this must be called holding i_lock |
65 | */ | 65 | */ |
66 | void | 66 | void |
67 | pnfs_generic_clear_request_commit(struct nfs_page *req, | 67 | pnfs_generic_clear_request_commit(struct nfs_page *req, |
@@ -98,7 +98,7 @@ pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, | |||
98 | if (!nfs_lock_request(req)) | 98 | if (!nfs_lock_request(req)) |
99 | continue; | 99 | continue; |
100 | kref_get(&req->wb_kref); | 100 | kref_get(&req->wb_kref); |
101 | if (cond_resched_lock(cinfo->lock)) | 101 | if (cond_resched_lock(&cinfo->inode->i_lock)) |
102 | list_safe_reset_next(req, tmp, wb_list); | 102 | list_safe_reset_next(req, tmp, wb_list); |
103 | nfs_request_remove_commit_list(req, cinfo); | 103 | nfs_request_remove_commit_list(req, cinfo); |
104 | clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); | 104 | clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); |
@@ -119,7 +119,7 @@ pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, | |||
119 | struct list_head *dst = &bucket->committing; | 119 | struct list_head *dst = &bucket->committing; |
120 | int ret; | 120 | int ret; |
121 | 121 | ||
122 | lockdep_assert_held(cinfo->lock); | 122 | lockdep_assert_held(&cinfo->inode->i_lock); |
123 | ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max); | 123 | ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max); |
124 | if (ret) { | 124 | if (ret) { |
125 | cinfo->ds->nwritten -= ret; | 125 | cinfo->ds->nwritten -= ret; |
@@ -142,7 +142,7 @@ int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, | |||
142 | { | 142 | { |
143 | int i, rv = 0, cnt; | 143 | int i, rv = 0, cnt; |
144 | 144 | ||
145 | lockdep_assert_held(cinfo->lock); | 145 | lockdep_assert_held(&cinfo->inode->i_lock); |
146 | for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { | 146 | for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { |
147 | cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i], | 147 | cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i], |
148 | cinfo, max); | 148 | cinfo, max); |
@@ -161,16 +161,16 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, | |||
161 | struct pnfs_layout_segment *freeme; | 161 | struct pnfs_layout_segment *freeme; |
162 | int i; | 162 | int i; |
163 | 163 | ||
164 | lockdep_assert_held(cinfo->lock); | 164 | lockdep_assert_held(&cinfo->inode->i_lock); |
165 | restart: | 165 | restart: |
166 | for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { | 166 | for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { |
167 | if (pnfs_generic_transfer_commit_list(&b->written, dst, | 167 | if (pnfs_generic_transfer_commit_list(&b->written, dst, |
168 | cinfo, 0)) { | 168 | cinfo, 0)) { |
169 | freeme = b->wlseg; | 169 | freeme = b->wlseg; |
170 | b->wlseg = NULL; | 170 | b->wlseg = NULL; |
171 | spin_unlock(cinfo->lock); | 171 | spin_unlock(&cinfo->inode->i_lock); |
172 | pnfs_put_lseg(freeme); | 172 | pnfs_put_lseg(freeme); |
173 | spin_lock(cinfo->lock); | 173 | spin_lock(&cinfo->inode->i_lock); |
174 | goto restart; | 174 | goto restart; |
175 | } | 175 | } |
176 | } | 176 | } |
@@ -186,7 +186,7 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) | |||
186 | LIST_HEAD(pages); | 186 | LIST_HEAD(pages); |
187 | int i; | 187 | int i; |
188 | 188 | ||
189 | spin_lock(cinfo->lock); | 189 | spin_lock(&cinfo->inode->i_lock); |
190 | for (i = idx; i < fl_cinfo->nbuckets; i++) { | 190 | for (i = idx; i < fl_cinfo->nbuckets; i++) { |
191 | bucket = &fl_cinfo->buckets[i]; | 191 | bucket = &fl_cinfo->buckets[i]; |
192 | if (list_empty(&bucket->committing)) | 192 | if (list_empty(&bucket->committing)) |
@@ -194,12 +194,12 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) | |||
194 | freeme = bucket->clseg; | 194 | freeme = bucket->clseg; |
195 | bucket->clseg = NULL; | 195 | bucket->clseg = NULL; |
196 | list_splice_init(&bucket->committing, &pages); | 196 | list_splice_init(&bucket->committing, &pages); |
197 | spin_unlock(cinfo->lock); | 197 | spin_unlock(&cinfo->inode->i_lock); |
198 | nfs_retry_commit(&pages, freeme, cinfo, i); | 198 | nfs_retry_commit(&pages, freeme, cinfo, i); |
199 | pnfs_put_lseg(freeme); | 199 | pnfs_put_lseg(freeme); |
200 | spin_lock(cinfo->lock); | 200 | spin_lock(&cinfo->inode->i_lock); |
201 | } | 201 | } |
202 | spin_unlock(cinfo->lock); | 202 | spin_unlock(&cinfo->inode->i_lock); |
203 | } | 203 | } |
204 | 204 | ||
205 | static unsigned int | 205 | static unsigned int |
@@ -238,14 +238,31 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages, | |||
238 | struct pnfs_commit_bucket *bucket; | 238 | struct pnfs_commit_bucket *bucket; |
239 | 239 | ||
240 | bucket = &cinfo->ds->buckets[data->ds_commit_index]; | 240 | bucket = &cinfo->ds->buckets[data->ds_commit_index]; |
241 | spin_lock(cinfo->lock); | 241 | spin_lock(&cinfo->inode->i_lock); |
242 | list_splice_init(&bucket->committing, pages); | 242 | list_splice_init(&bucket->committing, pages); |
243 | data->lseg = bucket->clseg; | 243 | data->lseg = bucket->clseg; |
244 | bucket->clseg = NULL; | 244 | bucket->clseg = NULL; |
245 | spin_unlock(cinfo->lock); | 245 | spin_unlock(&cinfo->inode->i_lock); |
246 | 246 | ||
247 | } | 247 | } |
248 | 248 | ||
249 | /* Helper function for pnfs_generic_commit_pagelist to catch an empty | ||
250 | * page list. This can happen when two commits race. */ | ||
251 | static bool | ||
252 | pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages, | ||
253 | struct nfs_commit_data *data, | ||
254 | struct nfs_commit_info *cinfo) | ||
255 | { | ||
256 | if (list_empty(pages)) { | ||
257 | if (atomic_dec_and_test(&cinfo->mds->rpcs_out)) | ||
258 | wake_up_atomic_t(&cinfo->mds->rpcs_out); | ||
259 | nfs_commitdata_release(data); | ||
260 | return true; | ||
261 | } | ||
262 | |||
263 | return false; | ||
264 | } | ||
265 | |||
249 | /* This follows nfs_commit_list pretty closely */ | 266 | /* This follows nfs_commit_list pretty closely */ |
250 | int | 267 | int |
251 | pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | 268 | pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, |
@@ -280,6 +297,11 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | |||
280 | list_for_each_entry_safe(data, tmp, &list, pages) { | 297 | list_for_each_entry_safe(data, tmp, &list, pages) { |
281 | list_del_init(&data->pages); | 298 | list_del_init(&data->pages); |
282 | if (data->ds_commit_index < 0) { | 299 | if (data->ds_commit_index < 0) { |
300 | /* another commit raced with us */ | ||
301 | if (pnfs_generic_commit_cancel_empty_pagelist(mds_pages, | ||
302 | data, cinfo)) | ||
303 | continue; | ||
304 | |||
283 | nfs_init_commit(data, mds_pages, NULL, cinfo); | 305 | nfs_init_commit(data, mds_pages, NULL, cinfo); |
284 | nfs_initiate_commit(NFS_CLIENT(inode), data, | 306 | nfs_initiate_commit(NFS_CLIENT(inode), data, |
285 | NFS_PROTO(data->inode), | 307 | NFS_PROTO(data->inode), |
@@ -288,6 +310,12 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | |||
288 | LIST_HEAD(pages); | 310 | LIST_HEAD(pages); |
289 | 311 | ||
290 | pnfs_fetch_commit_bucket_list(&pages, data, cinfo); | 312 | pnfs_fetch_commit_bucket_list(&pages, data, cinfo); |
313 | |||
314 | /* another commit raced with us */ | ||
315 | if (pnfs_generic_commit_cancel_empty_pagelist(&pages, | ||
316 | data, cinfo)) | ||
317 | continue; | ||
318 | |||
291 | nfs_init_commit(data, &pages, data->lseg, cinfo); | 319 | nfs_init_commit(data, &pages, data->lseg, cinfo); |
292 | initiate_commit(data, how); | 320 | initiate_commit(data, how); |
293 | } | 321 | } |
@@ -874,12 +902,12 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, | |||
874 | struct list_head *list; | 902 | struct list_head *list; |
875 | struct pnfs_commit_bucket *buckets; | 903 | struct pnfs_commit_bucket *buckets; |
876 | 904 | ||
877 | spin_lock(cinfo->lock); | 905 | spin_lock(&cinfo->inode->i_lock); |
878 | buckets = cinfo->ds->buckets; | 906 | buckets = cinfo->ds->buckets; |
879 | list = &buckets[ds_commit_idx].written; | 907 | list = &buckets[ds_commit_idx].written; |
880 | if (list_empty(list)) { | 908 | if (list_empty(list)) { |
881 | if (!pnfs_is_valid_lseg(lseg)) { | 909 | if (!pnfs_is_valid_lseg(lseg)) { |
882 | spin_unlock(cinfo->lock); | 910 | spin_unlock(&cinfo->inode->i_lock); |
883 | cinfo->completion_ops->resched_write(cinfo, req); | 911 | cinfo->completion_ops->resched_write(cinfo, req); |
884 | return; | 912 | return; |
885 | } | 913 | } |
@@ -896,7 +924,7 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, | |||
896 | cinfo->ds->nwritten++; | 924 | cinfo->ds->nwritten++; |
897 | 925 | ||
898 | nfs_request_add_commit_list_locked(req, list, cinfo); | 926 | nfs_request_add_commit_list_locked(req, list, cinfo); |
899 | spin_unlock(cinfo->lock); | 927 | spin_unlock(&cinfo->inode->i_lock); |
900 | nfs_mark_page_unstable(req->wb_page, cinfo); | 928 | nfs_mark_page_unstable(req->wb_page, cinfo); |
901 | } | 929 | } |
902 | EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); | 930 | EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f1268280244e..2137e0202f25 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -191,6 +191,7 @@ static const match_table_t nfs_mount_option_tokens = { | |||
191 | 191 | ||
192 | enum { | 192 | enum { |
193 | Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma, | 193 | Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma, |
194 | Opt_xprt_rdma6, | ||
194 | 195 | ||
195 | Opt_xprt_err | 196 | Opt_xprt_err |
196 | }; | 197 | }; |
@@ -201,6 +202,7 @@ static const match_table_t nfs_xprt_protocol_tokens = { | |||
201 | { Opt_xprt_tcp, "tcp" }, | 202 | { Opt_xprt_tcp, "tcp" }, |
202 | { Opt_xprt_tcp6, "tcp6" }, | 203 | { Opt_xprt_tcp6, "tcp6" }, |
203 | { Opt_xprt_rdma, "rdma" }, | 204 | { Opt_xprt_rdma, "rdma" }, |
205 | { Opt_xprt_rdma6, "rdma6" }, | ||
204 | 206 | ||
205 | { Opt_xprt_err, NULL } | 207 | { Opt_xprt_err, NULL } |
206 | }; | 208 | }; |
@@ -1456,6 +1458,8 @@ static int nfs_parse_mount_options(char *raw, | |||
1456 | mnt->flags |= NFS_MOUNT_TCP; | 1458 | mnt->flags |= NFS_MOUNT_TCP; |
1457 | mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; | 1459 | mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; |
1458 | break; | 1460 | break; |
1461 | case Opt_xprt_rdma6: | ||
1462 | protofamily = AF_INET6; | ||
1459 | case Opt_xprt_rdma: | 1463 | case Opt_xprt_rdma: |
1460 | /* vector side protocols to TCP */ | 1464 | /* vector side protocols to TCP */ |
1461 | mnt->flags |= NFS_MOUNT_TCP; | 1465 | mnt->flags |= NFS_MOUNT_TCP; |
@@ -2408,6 +2412,11 @@ static int nfs_compare_super_address(struct nfs_server *server1, | |||
2408 | struct nfs_server *server2) | 2412 | struct nfs_server *server2) |
2409 | { | 2413 | { |
2410 | struct sockaddr *sap1, *sap2; | 2414 | struct sockaddr *sap1, *sap2; |
2415 | struct rpc_xprt *xprt1 = server1->client->cl_xprt; | ||
2416 | struct rpc_xprt *xprt2 = server2->client->cl_xprt; | ||
2417 | |||
2418 | if (!net_eq(xprt1->xprt_net, xprt2->xprt_net)) | ||
2419 | return 0; | ||
2411 | 2420 | ||
2412 | sap1 = (struct sockaddr *)&server1->nfs_client->cl_addr; | 2421 | sap1 = (struct sockaddr *)&server1->nfs_client->cl_addr; |
2413 | sap2 = (struct sockaddr *)&server2->nfs_client->cl_addr; | 2422 | sap2 = (struct sockaddr *)&server2->nfs_client->cl_addr; |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5f4fd53e5764..e1c74d3db64d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -245,8 +245,7 @@ static void nfs_mark_uptodate(struct nfs_page *req) | |||
245 | static int wb_priority(struct writeback_control *wbc) | 245 | static int wb_priority(struct writeback_control *wbc) |
246 | { | 246 | { |
247 | int ret = 0; | 247 | int ret = 0; |
248 | if (wbc->for_reclaim) | 248 | |
249 | return FLUSH_HIGHPRI | FLUSH_COND_STABLE; | ||
250 | if (wbc->sync_mode == WB_SYNC_ALL) | 249 | if (wbc->sync_mode == WB_SYNC_ALL) |
251 | ret = FLUSH_COND_STABLE; | 250 | ret = FLUSH_COND_STABLE; |
252 | return ret; | 251 | return ret; |
@@ -737,7 +736,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
737 | head = req->wb_head; | 736 | head = req->wb_head; |
738 | 737 | ||
739 | spin_lock(&inode->i_lock); | 738 | spin_lock(&inode->i_lock); |
740 | if (likely(!PageSwapCache(head->wb_page))) { | 739 | if (likely(head->wb_page && !PageSwapCache(head->wb_page))) { |
741 | set_page_private(head->wb_page, 0); | 740 | set_page_private(head->wb_page, 0); |
742 | ClearPagePrivate(head->wb_page); | 741 | ClearPagePrivate(head->wb_page); |
743 | smp_mb__after_atomic(); | 742 | smp_mb__after_atomic(); |
@@ -759,7 +758,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
759 | static void | 758 | static void |
760 | nfs_mark_request_dirty(struct nfs_page *req) | 759 | nfs_mark_request_dirty(struct nfs_page *req) |
761 | { | 760 | { |
762 | __set_page_dirty_nobuffers(req->wb_page); | 761 | if (req->wb_page) |
762 | __set_page_dirty_nobuffers(req->wb_page); | ||
763 | } | 763 | } |
764 | 764 | ||
765 | /* | 765 | /* |
@@ -804,7 +804,7 @@ nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, | |||
804 | * number of outstanding requests requiring a commit as well as | 804 | * number of outstanding requests requiring a commit as well as |
805 | * the MM page stats. | 805 | * the MM page stats. |
806 | * | 806 | * |
807 | * The caller must hold the cinfo->lock, and the nfs_page lock. | 807 | * The caller must hold cinfo->inode->i_lock, and the nfs_page lock. |
808 | */ | 808 | */ |
809 | void | 809 | void |
810 | nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, | 810 | nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, |
@@ -832,10 +832,11 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked); | |||
832 | void | 832 | void |
833 | nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo) | 833 | nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo) |
834 | { | 834 | { |
835 | spin_lock(cinfo->lock); | 835 | spin_lock(&cinfo->inode->i_lock); |
836 | nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo); | 836 | nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo); |
837 | spin_unlock(cinfo->lock); | 837 | spin_unlock(&cinfo->inode->i_lock); |
838 | nfs_mark_page_unstable(req->wb_page, cinfo); | 838 | if (req->wb_page) |
839 | nfs_mark_page_unstable(req->wb_page, cinfo); | ||
839 | } | 840 | } |
840 | EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); | 841 | EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); |
841 | 842 | ||
@@ -864,7 +865,7 @@ EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); | |||
864 | static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, | 865 | static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, |
865 | struct inode *inode) | 866 | struct inode *inode) |
866 | { | 867 | { |
867 | cinfo->lock = &inode->i_lock; | 868 | cinfo->inode = inode; |
868 | cinfo->mds = &NFS_I(inode)->commit_info; | 869 | cinfo->mds = &NFS_I(inode)->commit_info; |
869 | cinfo->ds = pnfs_get_ds_info(inode); | 870 | cinfo->ds = pnfs_get_ds_info(inode); |
870 | cinfo->dreq = NULL; | 871 | cinfo->dreq = NULL; |
@@ -967,7 +968,7 @@ nfs_reqs_to_commit(struct nfs_commit_info *cinfo) | |||
967 | return cinfo->mds->ncommit; | 968 | return cinfo->mds->ncommit; |
968 | } | 969 | } |
969 | 970 | ||
970 | /* cinfo->lock held by caller */ | 971 | /* cinfo->inode->i_lock held by caller */ |
971 | int | 972 | int |
972 | nfs_scan_commit_list(struct list_head *src, struct list_head *dst, | 973 | nfs_scan_commit_list(struct list_head *src, struct list_head *dst, |
973 | struct nfs_commit_info *cinfo, int max) | 974 | struct nfs_commit_info *cinfo, int max) |
@@ -979,7 +980,7 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, | |||
979 | if (!nfs_lock_request(req)) | 980 | if (!nfs_lock_request(req)) |
980 | continue; | 981 | continue; |
981 | kref_get(&req->wb_kref); | 982 | kref_get(&req->wb_kref); |
982 | if (cond_resched_lock(cinfo->lock)) | 983 | if (cond_resched_lock(&cinfo->inode->i_lock)) |
983 | list_safe_reset_next(req, tmp, wb_list); | 984 | list_safe_reset_next(req, tmp, wb_list); |
984 | nfs_request_remove_commit_list(req, cinfo); | 985 | nfs_request_remove_commit_list(req, cinfo); |
985 | nfs_list_add_request(req, dst); | 986 | nfs_list_add_request(req, dst); |
@@ -1005,7 +1006,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, | |||
1005 | { | 1006 | { |
1006 | int ret = 0; | 1007 | int ret = 0; |
1007 | 1008 | ||
1008 | spin_lock(cinfo->lock); | 1009 | spin_lock(&cinfo->inode->i_lock); |
1009 | if (cinfo->mds->ncommit > 0) { | 1010 | if (cinfo->mds->ncommit > 0) { |
1010 | const int max = INT_MAX; | 1011 | const int max = INT_MAX; |
1011 | 1012 | ||
@@ -1013,7 +1014,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, | |||
1013 | cinfo, max); | 1014 | cinfo, max); |
1014 | ret += pnfs_scan_commit_lists(inode, cinfo, max - ret); | 1015 | ret += pnfs_scan_commit_lists(inode, cinfo, max - ret); |
1015 | } | 1016 | } |
1016 | spin_unlock(cinfo->lock); | 1017 | spin_unlock(&cinfo->inode->i_lock); |
1017 | return ret; | 1018 | return ret; |
1018 | } | 1019 | } |
1019 | 1020 | ||
@@ -1709,6 +1710,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, | |||
1709 | { | 1710 | { |
1710 | struct nfs_commit_data *data; | 1711 | struct nfs_commit_data *data; |
1711 | 1712 | ||
1713 | /* another commit raced with us */ | ||
1714 | if (list_empty(head)) | ||
1715 | return 0; | ||
1716 | |||
1712 | data = nfs_commitdata_alloc(); | 1717 | data = nfs_commitdata_alloc(); |
1713 | 1718 | ||
1714 | if (!data) | 1719 | if (!data) |
@@ -1724,6 +1729,36 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, | |||
1724 | return -ENOMEM; | 1729 | return -ENOMEM; |
1725 | } | 1730 | } |
1726 | 1731 | ||
1732 | int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf) | ||
1733 | { | ||
1734 | struct inode *inode = file_inode(file); | ||
1735 | struct nfs_open_context *open; | ||
1736 | struct nfs_commit_info cinfo; | ||
1737 | struct nfs_page *req; | ||
1738 | int ret; | ||
1739 | |||
1740 | open = get_nfs_open_context(nfs_file_open_context(file)); | ||
1741 | req = nfs_create_request(open, NULL, NULL, 0, i_size_read(inode)); | ||
1742 | if (IS_ERR(req)) { | ||
1743 | ret = PTR_ERR(req); | ||
1744 | goto out_put; | ||
1745 | } | ||
1746 | |||
1747 | nfs_init_cinfo_from_inode(&cinfo, inode); | ||
1748 | |||
1749 | memcpy(&req->wb_verf, verf, sizeof(struct nfs_write_verifier)); | ||
1750 | nfs_request_add_commit_list(req, &cinfo); | ||
1751 | ret = nfs_commit_inode(inode, FLUSH_SYNC); | ||
1752 | if (ret > 0) | ||
1753 | ret = 0; | ||
1754 | |||
1755 | nfs_free_request(req); | ||
1756 | out_put: | ||
1757 | put_nfs_open_context(open); | ||
1758 | return ret; | ||
1759 | } | ||
1760 | EXPORT_SYMBOL_GPL(nfs_commit_file); | ||
1761 | |||
1727 | /* | 1762 | /* |
1728 | * COMMIT call returned | 1763 | * COMMIT call returned |
1729 | */ | 1764 | */ |
@@ -1748,7 +1783,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) | |||
1748 | while (!list_empty(&data->pages)) { | 1783 | while (!list_empty(&data->pages)) { |
1749 | req = nfs_list_entry(data->pages.next); | 1784 | req = nfs_list_entry(data->pages.next); |
1750 | nfs_list_remove_request(req); | 1785 | nfs_list_remove_request(req); |
1751 | nfs_clear_page_commit(req->wb_page); | 1786 | if (req->wb_page) |
1787 | nfs_clear_page_commit(req->wb_page); | ||
1752 | 1788 | ||
1753 | dprintk("NFS: commit (%s/%llu %d@%lld)", | 1789 | dprintk("NFS: commit (%s/%llu %d@%lld)", |
1754 | req->wb_context->dentry->d_sb->s_id, | 1790 | req->wb_context->dentry->d_sb->s_id, |
diff --git a/include/linux/errno.h b/include/linux/errno.h index 89627b9187f9..7ce9fb1b7d28 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h | |||
@@ -28,5 +28,6 @@ | |||
28 | #define EBADTYPE 527 /* Type not supported by server */ | 28 | #define EBADTYPE 527 /* Type not supported by server */ |
29 | #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ | 29 | #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ |
30 | #define EIOCBQUEUED 529 /* iocb queued, will get completion event */ | 30 | #define EIOCBQUEUED 529 /* iocb queued, will get completion event */ |
31 | #define ERECALLCONFLICT 530 /* conflict with recalled state */ | ||
31 | 32 | ||
32 | #endif | 33 | #endif |
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 011433478a14..bfed6b367350 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h | |||
@@ -50,12 +50,27 @@ struct nfs4_label { | |||
50 | 50 | ||
51 | typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; | 51 | typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; |
52 | 52 | ||
53 | struct nfs_stateid4 { | 53 | struct nfs4_stateid_struct { |
54 | __be32 seqid; | 54 | union { |
55 | char other[NFS4_STATEID_OTHER_SIZE]; | 55 | char data[NFS4_STATEID_SIZE]; |
56 | } __attribute__ ((packed)); | 56 | struct { |
57 | __be32 seqid; | ||
58 | char other[NFS4_STATEID_OTHER_SIZE]; | ||
59 | } __attribute__ ((packed)); | ||
60 | }; | ||
61 | |||
62 | enum { | ||
63 | NFS4_INVALID_STATEID_TYPE = 0, | ||
64 | NFS4_SPECIAL_STATEID_TYPE, | ||
65 | NFS4_OPEN_STATEID_TYPE, | ||
66 | NFS4_LOCK_STATEID_TYPE, | ||
67 | NFS4_DELEGATION_STATEID_TYPE, | ||
68 | NFS4_LAYOUT_STATEID_TYPE, | ||
69 | NFS4_PNFS_DS_STATEID_TYPE, | ||
70 | } type; | ||
71 | }; | ||
57 | 72 | ||
58 | typedef struct nfs_stateid4 nfs4_stateid; | 73 | typedef struct nfs4_stateid_struct nfs4_stateid; |
59 | 74 | ||
60 | enum nfs_opnum4 { | 75 | enum nfs_opnum4 { |
61 | OP_ACCESS = 3, | 76 | OP_ACCESS = 3, |
@@ -504,6 +519,7 @@ enum { | |||
504 | NFSPROC4_CLNT_DEALLOCATE, | 519 | NFSPROC4_CLNT_DEALLOCATE, |
505 | NFSPROC4_CLNT_LAYOUTSTATS, | 520 | NFSPROC4_CLNT_LAYOUTSTATS, |
506 | NFSPROC4_CLNT_CLONE, | 521 | NFSPROC4_CLNT_CLONE, |
522 | NFSPROC4_CLNT_COPY, | ||
507 | }; | 523 | }; |
508 | 524 | ||
509 | /* nfs41 types */ | 525 | /* nfs41 types */ |
@@ -621,7 +637,9 @@ enum pnfs_update_layout_reason { | |||
621 | PNFS_UPDATE_LAYOUT_IO_TEST_FAIL, | 637 | PNFS_UPDATE_LAYOUT_IO_TEST_FAIL, |
622 | PNFS_UPDATE_LAYOUT_FOUND_CACHED, | 638 | PNFS_UPDATE_LAYOUT_FOUND_CACHED, |
623 | PNFS_UPDATE_LAYOUT_RETURN, | 639 | PNFS_UPDATE_LAYOUT_RETURN, |
640 | PNFS_UPDATE_LAYOUT_RETRY, | ||
624 | PNFS_UPDATE_LAYOUT_BLOCKED, | 641 | PNFS_UPDATE_LAYOUT_BLOCKED, |
642 | PNFS_UPDATE_LAYOUT_INVALID_OPEN, | ||
625 | PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, | 643 | PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, |
626 | }; | 644 | }; |
627 | 645 | ||
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7fcc13c8cf1f..14a762d2734d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h | |||
@@ -246,5 +246,6 @@ struct nfs_server { | |||
246 | #define NFS_CAP_DEALLOCATE (1U << 21) | 246 | #define NFS_CAP_DEALLOCATE (1U << 21) |
247 | #define NFS_CAP_LAYOUTSTATS (1U << 22) | 247 | #define NFS_CAP_LAYOUTSTATS (1U << 22) |
248 | #define NFS_CAP_CLONE (1U << 23) | 248 | #define NFS_CAP_CLONE (1U << 23) |
249 | #define NFS_CAP_COPY (1U << 24) | ||
249 | 250 | ||
250 | #endif | 251 | #endif |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ee8491dadbf3..c304a11b5b1a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
@@ -233,7 +233,6 @@ struct nfs4_layoutget_args { | |||
233 | struct inode *inode; | 233 | struct inode *inode; |
234 | struct nfs_open_context *ctx; | 234 | struct nfs_open_context *ctx; |
235 | nfs4_stateid stateid; | 235 | nfs4_stateid stateid; |
236 | unsigned long timestamp; | ||
237 | struct nfs4_layoutdriver_data layout; | 236 | struct nfs4_layoutdriver_data layout; |
238 | }; | 237 | }; |
239 | 238 | ||
@@ -251,7 +250,6 @@ struct nfs4_layoutget { | |||
251 | struct nfs4_layoutget_res res; | 250 | struct nfs4_layoutget_res res; |
252 | struct rpc_cred *cred; | 251 | struct rpc_cred *cred; |
253 | gfp_t gfp_flags; | 252 | gfp_t gfp_flags; |
254 | long timeout; | ||
255 | }; | 253 | }; |
256 | 254 | ||
257 | struct nfs4_getdeviceinfo_args { | 255 | struct nfs4_getdeviceinfo_args { |
@@ -1343,6 +1341,32 @@ struct nfs42_falloc_res { | |||
1343 | const struct nfs_server *falloc_server; | 1341 | const struct nfs_server *falloc_server; |
1344 | }; | 1342 | }; |
1345 | 1343 | ||
1344 | struct nfs42_copy_args { | ||
1345 | struct nfs4_sequence_args seq_args; | ||
1346 | |||
1347 | struct nfs_fh *src_fh; | ||
1348 | nfs4_stateid src_stateid; | ||
1349 | u64 src_pos; | ||
1350 | |||
1351 | struct nfs_fh *dst_fh; | ||
1352 | nfs4_stateid dst_stateid; | ||
1353 | u64 dst_pos; | ||
1354 | |||
1355 | u64 count; | ||
1356 | }; | ||
1357 | |||
1358 | struct nfs42_write_res { | ||
1359 | u64 count; | ||
1360 | struct nfs_writeverf verifier; | ||
1361 | }; | ||
1362 | |||
1363 | struct nfs42_copy_res { | ||
1364 | struct nfs4_sequence_res seq_res; | ||
1365 | struct nfs42_write_res write_res; | ||
1366 | bool consecutive; | ||
1367 | bool synchronous; | ||
1368 | }; | ||
1369 | |||
1346 | struct nfs42_seek_args { | 1370 | struct nfs42_seek_args { |
1347 | struct nfs4_sequence_args seq_args; | 1371 | struct nfs4_sequence_args seq_args; |
1348 | 1372 | ||
@@ -1431,7 +1455,7 @@ struct nfs_commit_completion_ops { | |||
1431 | }; | 1455 | }; |
1432 | 1456 | ||
1433 | struct nfs_commit_info { | 1457 | struct nfs_commit_info { |
1434 | spinlock_t *lock; /* inode->i_lock */ | 1458 | struct inode *inode; /* Needed for inode->i_lock */ |
1435 | struct nfs_mds_commit_info *mds; | 1459 | struct nfs_mds_commit_info *mds; |
1436 | struct pnfs_ds_commit_info *ds; | 1460 | struct pnfs_ds_commit_info *ds; |
1437 | struct nfs_direct_req *dreq; /* O_DIRECT request */ | 1461 | struct nfs_direct_req *dreq; /* O_DIRECT request */ |
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 6a241a277249..899791573a40 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h | |||
@@ -127,7 +127,7 @@ struct rpc_authops { | |||
127 | void (*destroy)(struct rpc_auth *); | 127 | void (*destroy)(struct rpc_auth *); |
128 | 128 | ||
129 | struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); | 129 | struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); |
130 | struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int); | 130 | struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int, gfp_t); |
131 | int (*list_pseudoflavors)(rpc_authflavor_t *, int); | 131 | int (*list_pseudoflavors)(rpc_authflavor_t *, int); |
132 | rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); | 132 | rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); |
133 | int (*flavor2info)(rpc_authflavor_t, | 133 | int (*flavor2info)(rpc_authflavor_t, |
@@ -167,6 +167,7 @@ void rpc_destroy_authunix(void); | |||
167 | 167 | ||
168 | struct rpc_cred * rpc_lookup_cred(void); | 168 | struct rpc_cred * rpc_lookup_cred(void); |
169 | struct rpc_cred * rpc_lookup_cred_nonblock(void); | 169 | struct rpc_cred * rpc_lookup_cred_nonblock(void); |
170 | struct rpc_cred * rpc_lookup_generic_cred(struct auth_cred *, int, gfp_t); | ||
170 | struct rpc_cred * rpc_lookup_machine_cred(const char *service_name); | 171 | struct rpc_cred * rpc_lookup_machine_cred(const char *service_name); |
171 | int rpcauth_register(const struct rpc_authops *); | 172 | int rpcauth_register(const struct rpc_authops *); |
172 | int rpcauth_unregister(const struct rpc_authops *); | 173 | int rpcauth_unregister(const struct rpc_authops *); |
@@ -178,7 +179,7 @@ rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t, | |||
178 | int rpcauth_get_gssinfo(rpc_authflavor_t, | 179 | int rpcauth_get_gssinfo(rpc_authflavor_t, |
179 | struct rpcsec_gss_info *); | 180 | struct rpcsec_gss_info *); |
180 | int rpcauth_list_flavors(rpc_authflavor_t *, int); | 181 | int rpcauth_list_flavors(rpc_authflavor_t *, int); |
181 | struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); | 182 | struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int, gfp_t); |
182 | void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); | 183 | void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); |
183 | struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); | 184 | struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); |
184 | struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int); | 185 | struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int); |
@@ -201,9 +202,28 @@ char * rpcauth_stringify_acceptor(struct rpc_cred *); | |||
201 | static inline | 202 | static inline |
202 | struct rpc_cred * get_rpccred(struct rpc_cred *cred) | 203 | struct rpc_cred * get_rpccred(struct rpc_cred *cred) |
203 | { | 204 | { |
204 | atomic_inc(&cred->cr_count); | 205 | if (cred != NULL) |
206 | atomic_inc(&cred->cr_count); | ||
205 | return cred; | 207 | return cred; |
206 | } | 208 | } |
207 | 209 | ||
210 | /** | ||
211 | * get_rpccred_rcu - get a reference to a cred using rcu-protected pointer | ||
212 | * @cred: cred of which to take a reference | ||
213 | * | ||
214 | * In some cases, we may have a pointer to a credential to which we | ||
215 | * want to take a reference, but don't already have one. Because these | ||
216 | * objects are freed using RCU, we can access the cr_count while its | ||
217 | * on its way to destruction and only take a reference if it's not already | ||
218 | * zero. | ||
219 | */ | ||
220 | static inline struct rpc_cred * | ||
221 | get_rpccred_rcu(struct rpc_cred *cred) | ||
222 | { | ||
223 | if (atomic_inc_not_zero(&cred->cr_count)) | ||
224 | return cred; | ||
225 | return NULL; | ||
226 | } | ||
227 | |||
208 | #endif /* __KERNEL__ */ | 228 | #endif /* __KERNEL__ */ |
209 | #endif /* _LINUX_SUNRPC_AUTH_H */ | 229 | #endif /* _LINUX_SUNRPC_AUTH_H */ |
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 9a7ddbaf116e..19c659d1c0f8 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h | |||
@@ -176,6 +176,7 @@ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); | |||
176 | int rpc_protocol(struct rpc_clnt *); | 176 | int rpc_protocol(struct rpc_clnt *); |
177 | struct net * rpc_net_ns(struct rpc_clnt *); | 177 | struct net * rpc_net_ns(struct rpc_clnt *); |
178 | size_t rpc_max_payload(struct rpc_clnt *); | 178 | size_t rpc_max_payload(struct rpc_clnt *); |
179 | size_t rpc_max_bc_payload(struct rpc_clnt *); | ||
179 | unsigned long rpc_get_timeout(struct rpc_clnt *clnt); | 180 | unsigned long rpc_get_timeout(struct rpc_clnt *clnt); |
180 | void rpc_force_rebind(struct rpc_clnt *); | 181 | void rpc_force_rebind(struct rpc_clnt *); |
181 | size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); | 182 | size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); |
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 807371357160..59cbf16eaeb5 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h | |||
@@ -158,9 +158,9 @@ typedef __be32 rpc_fraghdr; | |||
158 | 158 | ||
159 | /* | 159 | /* |
160 | * Note that RFC 1833 does not put any size restrictions on the | 160 | * Note that RFC 1833 does not put any size restrictions on the |
161 | * netid string, but all currently defined netid's fit in 4 bytes. | 161 | * netid string, but all currently defined netid's fit in 5 bytes. |
162 | */ | 162 | */ |
163 | #define RPCBIND_MAXNETIDLEN (4u) | 163 | #define RPCBIND_MAXNETIDLEN (5u) |
164 | 164 | ||
165 | /* | 165 | /* |
166 | * Universal addresses are introduced in RFC 1833 and further spelled | 166 | * Universal addresses are introduced in RFC 1833 and further spelled |
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index fb0d212e0d3a..5aa3834619a8 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
@@ -142,6 +142,7 @@ struct rpc_xprt_ops { | |||
142 | int (*bc_setup)(struct rpc_xprt *xprt, | 142 | int (*bc_setup)(struct rpc_xprt *xprt, |
143 | unsigned int min_reqs); | 143 | unsigned int min_reqs); |
144 | int (*bc_up)(struct svc_serv *serv, struct net *net); | 144 | int (*bc_up)(struct svc_serv *serv, struct net *net); |
145 | size_t (*bc_maxpayload)(struct rpc_xprt *xprt); | ||
145 | void (*bc_free_rqst)(struct rpc_rqst *rqst); | 146 | void (*bc_free_rqst)(struct rpc_rqst *rqst); |
146 | void (*bc_destroy)(struct rpc_xprt *xprt, | 147 | void (*bc_destroy)(struct rpc_xprt *xprt, |
147 | unsigned int max_reqs); | 148 | unsigned int max_reqs); |
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 767190b01363..39267dc3486a 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h | |||
@@ -52,7 +52,9 @@ | |||
52 | #define RPCRDMA_DEF_SLOT_TABLE (128U) | 52 | #define RPCRDMA_DEF_SLOT_TABLE (128U) |
53 | #define RPCRDMA_MAX_SLOT_TABLE (256U) | 53 | #define RPCRDMA_MAX_SLOT_TABLE (256U) |
54 | 54 | ||
55 | #define RPCRDMA_DEF_INLINE (1024) /* default inline max */ | 55 | #define RPCRDMA_MIN_INLINE (1024) /* min inline thresh */ |
56 | #define RPCRDMA_DEF_INLINE (1024) /* default inline thresh */ | ||
57 | #define RPCRDMA_MAX_INLINE (3068) /* max inline thresh */ | ||
56 | 58 | ||
57 | /* Memory registration strategies, by number. | 59 | /* Memory registration strategies, by number. |
58 | * This is part of a kernel / user space API. Do not remove. */ | 60 | * This is part of a kernel / user space API. Do not remove. */ |
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 02f53674dc39..040ff627c18a 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c | |||
@@ -543,7 +543,7 @@ rpcauth_cache_enforce_limit(void) | |||
543 | */ | 543 | */ |
544 | struct rpc_cred * | 544 | struct rpc_cred * |
545 | rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, | 545 | rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, |
546 | int flags) | 546 | int flags, gfp_t gfp) |
547 | { | 547 | { |
548 | LIST_HEAD(free); | 548 | LIST_HEAD(free); |
549 | struct rpc_cred_cache *cache = auth->au_credcache; | 549 | struct rpc_cred_cache *cache = auth->au_credcache; |
@@ -580,7 +580,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, | |||
580 | if (flags & RPCAUTH_LOOKUP_RCU) | 580 | if (flags & RPCAUTH_LOOKUP_RCU) |
581 | return ERR_PTR(-ECHILD); | 581 | return ERR_PTR(-ECHILD); |
582 | 582 | ||
583 | new = auth->au_ops->crcreate(auth, acred, flags); | 583 | new = auth->au_ops->crcreate(auth, acred, flags, gfp); |
584 | if (IS_ERR(new)) { | 584 | if (IS_ERR(new)) { |
585 | cred = new; | 585 | cred = new; |
586 | goto out; | 586 | goto out; |
@@ -703,8 +703,7 @@ rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) | |||
703 | new = rpcauth_bind_new_cred(task, lookupflags); | 703 | new = rpcauth_bind_new_cred(task, lookupflags); |
704 | if (IS_ERR(new)) | 704 | if (IS_ERR(new)) |
705 | return PTR_ERR(new); | 705 | return PTR_ERR(new); |
706 | if (req->rq_cred != NULL) | 706 | put_rpccred(req->rq_cred); |
707 | put_rpccred(req->rq_cred); | ||
708 | req->rq_cred = new; | 707 | req->rq_cred = new; |
709 | return 0; | 708 | return 0; |
710 | } | 709 | } |
@@ -712,6 +711,8 @@ rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) | |||
712 | void | 711 | void |
713 | put_rpccred(struct rpc_cred *cred) | 712 | put_rpccred(struct rpc_cred *cred) |
714 | { | 713 | { |
714 | if (cred == NULL) | ||
715 | return; | ||
715 | /* Fast path for unhashed credentials */ | 716 | /* Fast path for unhashed credentials */ |
716 | if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) { | 717 | if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) { |
717 | if (atomic_dec_and_test(&cred->cr_count)) | 718 | if (atomic_dec_and_test(&cred->cr_count)) |
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 41248b1820c7..54dd3fdead54 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c | |||
@@ -38,6 +38,13 @@ struct rpc_cred *rpc_lookup_cred(void) | |||
38 | } | 38 | } |
39 | EXPORT_SYMBOL_GPL(rpc_lookup_cred); | 39 | EXPORT_SYMBOL_GPL(rpc_lookup_cred); |
40 | 40 | ||
41 | struct rpc_cred * | ||
42 | rpc_lookup_generic_cred(struct auth_cred *acred, int flags, gfp_t gfp) | ||
43 | { | ||
44 | return rpcauth_lookup_credcache(&generic_auth, acred, flags, gfp); | ||
45 | } | ||
46 | EXPORT_SYMBOL_GPL(rpc_lookup_generic_cred); | ||
47 | |||
41 | struct rpc_cred *rpc_lookup_cred_nonblock(void) | 48 | struct rpc_cred *rpc_lookup_cred_nonblock(void) |
42 | { | 49 | { |
43 | return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU); | 50 | return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU); |
@@ -77,15 +84,15 @@ static struct rpc_cred *generic_bind_cred(struct rpc_task *task, | |||
77 | static struct rpc_cred * | 84 | static struct rpc_cred * |
78 | generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | 85 | generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) |
79 | { | 86 | { |
80 | return rpcauth_lookup_credcache(&generic_auth, acred, flags); | 87 | return rpcauth_lookup_credcache(&generic_auth, acred, flags, GFP_KERNEL); |
81 | } | 88 | } |
82 | 89 | ||
83 | static struct rpc_cred * | 90 | static struct rpc_cred * |
84 | generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | 91 | generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp) |
85 | { | 92 | { |
86 | struct generic_cred *gcred; | 93 | struct generic_cred *gcred; |
87 | 94 | ||
88 | gcred = kmalloc(sizeof(*gcred), GFP_KERNEL); | 95 | gcred = kmalloc(sizeof(*gcred), gfp); |
89 | if (gcred == NULL) | 96 | if (gcred == NULL) |
90 | return ERR_PTR(-ENOMEM); | 97 | return ERR_PTR(-ENOMEM); |
91 | 98 | ||
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 15612ffa8d57..e64ae93d5b4f 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c | |||
@@ -1299,11 +1299,11 @@ gss_destroy_cred(struct rpc_cred *cred) | |||
1299 | static struct rpc_cred * | 1299 | static struct rpc_cred * |
1300 | gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | 1300 | gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) |
1301 | { | 1301 | { |
1302 | return rpcauth_lookup_credcache(auth, acred, flags); | 1302 | return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS); |
1303 | } | 1303 | } |
1304 | 1304 | ||
1305 | static struct rpc_cred * | 1305 | static struct rpc_cred * |
1306 | gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | 1306 | gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp) |
1307 | { | 1307 | { |
1308 | struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); | 1308 | struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); |
1309 | struct gss_cred *cred = NULL; | 1309 | struct gss_cred *cred = NULL; |
@@ -1313,7 +1313,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | |||
1313 | __func__, from_kuid(&init_user_ns, acred->uid), | 1313 | __func__, from_kuid(&init_user_ns, acred->uid), |
1314 | auth->au_flavor); | 1314 | auth->au_flavor); |
1315 | 1315 | ||
1316 | if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS))) | 1316 | if (!(cred = kzalloc(sizeof(*cred), gfp))) |
1317 | goto out_err; | 1317 | goto out_err; |
1318 | 1318 | ||
1319 | rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); | 1319 | rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); |
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 0d3dd364c22f..9f65452b7cbc 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c | |||
@@ -52,11 +52,11 @@ unx_destroy(struct rpc_auth *auth) | |||
52 | static struct rpc_cred * | 52 | static struct rpc_cred * |
53 | unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | 53 | unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) |
54 | { | 54 | { |
55 | return rpcauth_lookup_credcache(auth, acred, flags); | 55 | return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS); |
56 | } | 56 | } |
57 | 57 | ||
58 | static struct rpc_cred * | 58 | static struct rpc_cred * |
59 | unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | 59 | unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp) |
60 | { | 60 | { |
61 | struct unx_cred *cred; | 61 | struct unx_cred *cred; |
62 | unsigned int groups = 0; | 62 | unsigned int groups = 0; |
@@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | |||
66 | from_kuid(&init_user_ns, acred->uid), | 66 | from_kuid(&init_user_ns, acred->uid), |
67 | from_kgid(&init_user_ns, acred->gid)); | 67 | from_kgid(&init_user_ns, acred->gid)); |
68 | 68 | ||
69 | if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS))) | 69 | if (!(cred = kmalloc(sizeof(*cred), gfp))) |
70 | return ERR_PTR(-ENOMEM); | 70 | return ERR_PTR(-ENOMEM); |
71 | 71 | ||
72 | rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); | 72 | rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7e0c9bf22df8..06b4df9faaa1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -1414,6 +1414,23 @@ size_t rpc_max_payload(struct rpc_clnt *clnt) | |||
1414 | EXPORT_SYMBOL_GPL(rpc_max_payload); | 1414 | EXPORT_SYMBOL_GPL(rpc_max_payload); |
1415 | 1415 | ||
1416 | /** | 1416 | /** |
1417 | * rpc_max_bc_payload - Get maximum backchannel payload size, in bytes | ||
1418 | * @clnt: RPC client to query | ||
1419 | */ | ||
1420 | size_t rpc_max_bc_payload(struct rpc_clnt *clnt) | ||
1421 | { | ||
1422 | struct rpc_xprt *xprt; | ||
1423 | size_t ret; | ||
1424 | |||
1425 | rcu_read_lock(); | ||
1426 | xprt = rcu_dereference(clnt->cl_xprt); | ||
1427 | ret = xprt->ops->bc_maxpayload(xprt); | ||
1428 | rcu_read_unlock(); | ||
1429 | return ret; | ||
1430 | } | ||
1431 | EXPORT_SYMBOL_GPL(rpc_max_bc_payload); | ||
1432 | |||
1433 | /** | ||
1417 | * rpc_get_timeout - Get timeout for transport in units of HZ | 1434 | * rpc_get_timeout - Get timeout for transport in units of HZ |
1418 | * @clnt: RPC client to query | 1435 | * @clnt: RPC client to query |
1419 | */ | 1436 | */ |
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 6bdb3865212d..c4f3cc0c0775 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c | |||
@@ -797,6 +797,8 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) | |||
797 | xdr_set_iov(xdr, buf->head, buf->len); | 797 | xdr_set_iov(xdr, buf->head, buf->len); |
798 | else if (buf->page_len != 0) | 798 | else if (buf->page_len != 0) |
799 | xdr_set_page_base(xdr, 0, buf->len); | 799 | xdr_set_page_base(xdr, 0, buf->len); |
800 | else | ||
801 | xdr_set_iov(xdr, buf->head, buf->len); | ||
800 | if (p != NULL && p > xdr->p && xdr->end >= p) { | 802 | if (p != NULL && p > xdr->p && xdr->end >= p) { |
801 | xdr->nwords -= p - xdr->p; | 803 | xdr->nwords -= p - xdr->p; |
802 | xdr->p = p; | 804 | xdr->p = p; |
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 2dcd7640eeb5..87762d976b63 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c | |||
@@ -192,6 +192,22 @@ int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net) | |||
192 | } | 192 | } |
193 | 193 | ||
194 | /** | 194 | /** |
195 | * xprt_rdma_bc_maxpayload - Return maximum backchannel message size | ||
196 | * @xprt: transport | ||
197 | * | ||
198 | * Returns maximum size, in bytes, of a backchannel message | ||
199 | */ | ||
200 | size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) | ||
201 | { | ||
202 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
203 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; | ||
204 | size_t maxmsg; | ||
205 | |||
206 | maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize); | ||
207 | return maxmsg - RPCRDMA_HDRLEN_MIN; | ||
208 | } | ||
209 | |||
210 | /** | ||
195 | * rpcrdma_bc_marshal_reply - Send backwards direction reply | 211 | * rpcrdma_bc_marshal_reply - Send backwards direction reply |
196 | * @rqst: buffer containing RPC reply data | 212 | * @rqst: buffer containing RPC reply data |
197 | * | 213 | * |
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index b289e106540b..6326ebe8b595 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -35,10 +35,71 @@ | |||
35 | /* Maximum scatter/gather per FMR */ | 35 | /* Maximum scatter/gather per FMR */ |
36 | #define RPCRDMA_MAX_FMR_SGES (64) | 36 | #define RPCRDMA_MAX_FMR_SGES (64) |
37 | 37 | ||
38 | static struct workqueue_struct *fmr_recovery_wq; | ||
39 | |||
40 | #define FMR_RECOVERY_WQ_FLAGS (WQ_UNBOUND) | ||
41 | |||
42 | int | ||
43 | fmr_alloc_recovery_wq(void) | ||
44 | { | ||
45 | fmr_recovery_wq = alloc_workqueue("fmr_recovery", WQ_UNBOUND, 0); | ||
46 | return !fmr_recovery_wq ? -ENOMEM : 0; | ||
47 | } | ||
48 | |||
49 | void | ||
50 | fmr_destroy_recovery_wq(void) | ||
51 | { | ||
52 | struct workqueue_struct *wq; | ||
53 | |||
54 | if (!fmr_recovery_wq) | ||
55 | return; | ||
56 | |||
57 | wq = fmr_recovery_wq; | ||
58 | fmr_recovery_wq = NULL; | ||
59 | destroy_workqueue(wq); | ||
60 | } | ||
61 | |||
62 | static int | ||
63 | __fmr_unmap(struct rpcrdma_mw *mw) | ||
64 | { | ||
65 | LIST_HEAD(l); | ||
66 | |||
67 | list_add(&mw->fmr.fmr->list, &l); | ||
68 | return ib_unmap_fmr(&l); | ||
69 | } | ||
70 | |||
71 | /* Deferred reset of a single FMR. Generate a fresh rkey by | ||
72 | * replacing the MR. There's no recovery if this fails. | ||
73 | */ | ||
74 | static void | ||
75 | __fmr_recovery_worker(struct work_struct *work) | ||
76 | { | ||
77 | struct rpcrdma_mw *mw = container_of(work, struct rpcrdma_mw, | ||
78 | mw_work); | ||
79 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; | ||
80 | |||
81 | __fmr_unmap(mw); | ||
82 | rpcrdma_put_mw(r_xprt, mw); | ||
83 | return; | ||
84 | } | ||
85 | |||
86 | /* A broken MR was discovered in a context that can't sleep. | ||
87 | * Defer recovery to the recovery worker. | ||
88 | */ | ||
89 | static void | ||
90 | __fmr_queue_recovery(struct rpcrdma_mw *mw) | ||
91 | { | ||
92 | INIT_WORK(&mw->mw_work, __fmr_recovery_worker); | ||
93 | queue_work(fmr_recovery_wq, &mw->mw_work); | ||
94 | } | ||
95 | |||
38 | static int | 96 | static int |
39 | fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | 97 | fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, |
40 | struct rpcrdma_create_data_internal *cdata) | 98 | struct rpcrdma_create_data_internal *cdata) |
41 | { | 99 | { |
100 | rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, | ||
101 | RPCRDMA_MAX_DATA_SEGS / | ||
102 | RPCRDMA_MAX_FMR_SGES)); | ||
42 | return 0; | 103 | return 0; |
43 | } | 104 | } |
44 | 105 | ||
@@ -48,7 +109,7 @@ static size_t | |||
48 | fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) | 109 | fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) |
49 | { | 110 | { |
50 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | 111 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, |
51 | rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES); | 112 | RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES); |
52 | } | 113 | } |
53 | 114 | ||
54 | static int | 115 | static int |
@@ -89,6 +150,7 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt) | |||
89 | if (IS_ERR(r->fmr.fmr)) | 150 | if (IS_ERR(r->fmr.fmr)) |
90 | goto out_fmr_err; | 151 | goto out_fmr_err; |
91 | 152 | ||
153 | r->mw_xprt = r_xprt; | ||
92 | list_add(&r->mw_list, &buf->rb_mws); | 154 | list_add(&r->mw_list, &buf->rb_mws); |
93 | list_add(&r->mw_all, &buf->rb_all); | 155 | list_add(&r->mw_all, &buf->rb_all); |
94 | } | 156 | } |
@@ -104,15 +166,6 @@ out: | |||
104 | return rc; | 166 | return rc; |
105 | } | 167 | } |
106 | 168 | ||
107 | static int | ||
108 | __fmr_unmap(struct rpcrdma_mw *r) | ||
109 | { | ||
110 | LIST_HEAD(l); | ||
111 | |||
112 | list_add(&r->fmr.fmr->list, &l); | ||
113 | return ib_unmap_fmr(&l); | ||
114 | } | ||
115 | |||
116 | /* Use the ib_map_phys_fmr() verb to register a memory region | 169 | /* Use the ib_map_phys_fmr() verb to register a memory region |
117 | * for remote access via RDMA READ or RDMA WRITE. | 170 | * for remote access via RDMA READ or RDMA WRITE. |
118 | */ | 171 | */ |
@@ -183,15 +236,10 @@ static void | |||
183 | __fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | 236 | __fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) |
184 | { | 237 | { |
185 | struct ib_device *device = r_xprt->rx_ia.ri_device; | 238 | struct ib_device *device = r_xprt->rx_ia.ri_device; |
186 | struct rpcrdma_mw *mw = seg->rl_mw; | ||
187 | int nsegs = seg->mr_nsegs; | 239 | int nsegs = seg->mr_nsegs; |
188 | 240 | ||
189 | seg->rl_mw = NULL; | ||
190 | |||
191 | while (nsegs--) | 241 | while (nsegs--) |
192 | rpcrdma_unmap_one(device, seg++); | 242 | rpcrdma_unmap_one(device, seg++); |
193 | |||
194 | rpcrdma_put_mw(r_xprt, mw); | ||
195 | } | 243 | } |
196 | 244 | ||
197 | /* Invalidate all memory regions that were registered for "req". | 245 | /* Invalidate all memory regions that were registered for "req". |
@@ -234,42 +282,50 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |||
234 | seg = &req->rl_segments[i]; | 282 | seg = &req->rl_segments[i]; |
235 | 283 | ||
236 | __fmr_dma_unmap(r_xprt, seg); | 284 | __fmr_dma_unmap(r_xprt, seg); |
285 | rpcrdma_put_mw(r_xprt, seg->rl_mw); | ||
237 | 286 | ||
238 | i += seg->mr_nsegs; | 287 | i += seg->mr_nsegs; |
239 | seg->mr_nsegs = 0; | 288 | seg->mr_nsegs = 0; |
289 | seg->rl_mw = NULL; | ||
240 | } | 290 | } |
241 | 291 | ||
242 | req->rl_nchunks = 0; | 292 | req->rl_nchunks = 0; |
243 | } | 293 | } |
244 | 294 | ||
245 | /* Use the ib_unmap_fmr() verb to prevent further remote | 295 | /* Use a slow, safe mechanism to invalidate all memory regions |
246 | * access via RDMA READ or RDMA WRITE. | 296 | * that were registered for "req". |
297 | * | ||
298 | * In the asynchronous case, DMA unmapping occurs first here | ||
299 | * because the rpcrdma_mr_seg is released immediately after this | ||
300 | * call. It's contents won't be available in __fmr_dma_unmap later. | ||
301 | * FIXME. | ||
247 | */ | 302 | */ |
248 | static int | 303 | static void |
249 | fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | 304 | fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, |
305 | bool sync) | ||
250 | { | 306 | { |
251 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 307 | struct rpcrdma_mr_seg *seg; |
252 | struct rpcrdma_mr_seg *seg1 = seg; | 308 | struct rpcrdma_mw *mw; |
253 | struct rpcrdma_mw *mw = seg1->rl_mw; | 309 | unsigned int i; |
254 | int rc, nsegs = seg->mr_nsegs; | ||
255 | 310 | ||
256 | dprintk("RPC: %s: FMR %p\n", __func__, mw); | 311 | for (i = 0; req->rl_nchunks; req->rl_nchunks--) { |
312 | seg = &req->rl_segments[i]; | ||
313 | mw = seg->rl_mw; | ||
257 | 314 | ||
258 | seg1->rl_mw = NULL; | 315 | if (sync) { |
259 | while (seg1->mr_nsegs--) | 316 | /* ORDER */ |
260 | rpcrdma_unmap_one(ia->ri_device, seg++); | 317 | __fmr_unmap(mw); |
261 | rc = __fmr_unmap(mw); | 318 | __fmr_dma_unmap(r_xprt, seg); |
262 | if (rc) | 319 | rpcrdma_put_mw(r_xprt, mw); |
263 | goto out_err; | 320 | } else { |
264 | rpcrdma_put_mw(r_xprt, mw); | 321 | __fmr_dma_unmap(r_xprt, seg); |
265 | return nsegs; | 322 | __fmr_queue_recovery(mw); |
323 | } | ||
266 | 324 | ||
267 | out_err: | 325 | i += seg->mr_nsegs; |
268 | /* The FMR is abandoned, but remains in rb_all. fmr_op_destroy | 326 | seg->mr_nsegs = 0; |
269 | * will attempt to release it when the transport is destroyed. | 327 | seg->rl_mw = NULL; |
270 | */ | 328 | } |
271 | dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc); | ||
272 | return nsegs; | ||
273 | } | 329 | } |
274 | 330 | ||
275 | static void | 331 | static void |
@@ -295,7 +351,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf) | |||
295 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { | 351 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { |
296 | .ro_map = fmr_op_map, | 352 | .ro_map = fmr_op_map, |
297 | .ro_unmap_sync = fmr_op_unmap_sync, | 353 | .ro_unmap_sync = fmr_op_unmap_sync, |
298 | .ro_unmap = fmr_op_unmap, | 354 | .ro_unmap_safe = fmr_op_unmap_safe, |
299 | .ro_open = fmr_op_open, | 355 | .ro_open = fmr_op_open, |
300 | .ro_maxpages = fmr_op_maxpages, | 356 | .ro_maxpages = fmr_op_maxpages, |
301 | .ro_init = fmr_op_init, | 357 | .ro_init = fmr_op_init, |
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 94c3fa910b85..c0947544babe 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -98,6 +98,47 @@ frwr_destroy_recovery_wq(void) | |||
98 | destroy_workqueue(wq); | 98 | destroy_workqueue(wq); |
99 | } | 99 | } |
100 | 100 | ||
101 | static int | ||
102 | __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) | ||
103 | { | ||
104 | struct rpcrdma_frmr *f = &r->frmr; | ||
105 | int rc; | ||
106 | |||
107 | rc = ib_dereg_mr(f->fr_mr); | ||
108 | if (rc) { | ||
109 | pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", | ||
110 | rc, r); | ||
111 | return rc; | ||
112 | } | ||
113 | |||
114 | f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, | ||
115 | ia->ri_max_frmr_depth); | ||
116 | if (IS_ERR(f->fr_mr)) { | ||
117 | pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", | ||
118 | PTR_ERR(f->fr_mr), r); | ||
119 | return PTR_ERR(f->fr_mr); | ||
120 | } | ||
121 | |||
122 | dprintk("RPC: %s: recovered FRMR %p\n", __func__, r); | ||
123 | f->fr_state = FRMR_IS_INVALID; | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | static void | ||
128 | __frwr_reset_and_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) | ||
129 | { | ||
130 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
131 | struct rpcrdma_frmr *f = &mw->frmr; | ||
132 | int rc; | ||
133 | |||
134 | rc = __frwr_reset_mr(ia, mw); | ||
135 | ib_dma_unmap_sg(ia->ri_device, f->fr_sg, f->fr_nents, f->fr_dir); | ||
136 | if (rc) | ||
137 | return; | ||
138 | |||
139 | rpcrdma_put_mw(r_xprt, mw); | ||
140 | } | ||
141 | |||
101 | /* Deferred reset of a single FRMR. Generate a fresh rkey by | 142 | /* Deferred reset of a single FRMR. Generate a fresh rkey by |
102 | * replacing the MR. | 143 | * replacing the MR. |
103 | * | 144 | * |
@@ -109,26 +150,10 @@ static void | |||
109 | __frwr_recovery_worker(struct work_struct *work) | 150 | __frwr_recovery_worker(struct work_struct *work) |
110 | { | 151 | { |
111 | struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw, | 152 | struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw, |
112 | frmr.fr_work); | 153 | mw_work); |
113 | struct rpcrdma_xprt *r_xprt = r->frmr.fr_xprt; | ||
114 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
115 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
116 | |||
117 | if (ib_dereg_mr(r->frmr.fr_mr)) | ||
118 | goto out_fail; | ||
119 | 154 | ||
120 | r->frmr.fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); | 155 | __frwr_reset_and_unmap(r->mw_xprt, r); |
121 | if (IS_ERR(r->frmr.fr_mr)) | ||
122 | goto out_fail; | ||
123 | |||
124 | dprintk("RPC: %s: recovered FRMR %p\n", __func__, r); | ||
125 | r->frmr.fr_state = FRMR_IS_INVALID; | ||
126 | rpcrdma_put_mw(r_xprt, r); | ||
127 | return; | 156 | return; |
128 | |||
129 | out_fail: | ||
130 | pr_warn("RPC: %s: FRMR %p unrecovered\n", | ||
131 | __func__, r); | ||
132 | } | 157 | } |
133 | 158 | ||
134 | /* A broken MR was discovered in a context that can't sleep. | 159 | /* A broken MR was discovered in a context that can't sleep. |
@@ -137,8 +162,8 @@ out_fail: | |||
137 | static void | 162 | static void |
138 | __frwr_queue_recovery(struct rpcrdma_mw *r) | 163 | __frwr_queue_recovery(struct rpcrdma_mw *r) |
139 | { | 164 | { |
140 | INIT_WORK(&r->frmr.fr_work, __frwr_recovery_worker); | 165 | INIT_WORK(&r->mw_work, __frwr_recovery_worker); |
141 | queue_work(frwr_recovery_wq, &r->frmr.fr_work); | 166 | queue_work(frwr_recovery_wq, &r->mw_work); |
142 | } | 167 | } |
143 | 168 | ||
144 | static int | 169 | static int |
@@ -152,11 +177,11 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, | |||
152 | if (IS_ERR(f->fr_mr)) | 177 | if (IS_ERR(f->fr_mr)) |
153 | goto out_mr_err; | 178 | goto out_mr_err; |
154 | 179 | ||
155 | f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL); | 180 | f->fr_sg = kcalloc(depth, sizeof(*f->fr_sg), GFP_KERNEL); |
156 | if (!f->sg) | 181 | if (!f->fr_sg) |
157 | goto out_list_err; | 182 | goto out_list_err; |
158 | 183 | ||
159 | sg_init_table(f->sg, depth); | 184 | sg_init_table(f->fr_sg, depth); |
160 | 185 | ||
161 | init_completion(&f->fr_linv_done); | 186 | init_completion(&f->fr_linv_done); |
162 | 187 | ||
@@ -185,7 +210,7 @@ __frwr_release(struct rpcrdma_mw *r) | |||
185 | if (rc) | 210 | if (rc) |
186 | dprintk("RPC: %s: ib_dereg_mr status %i\n", | 211 | dprintk("RPC: %s: ib_dereg_mr status %i\n", |
187 | __func__, rc); | 212 | __func__, rc); |
188 | kfree(r->frmr.sg); | 213 | kfree(r->frmr.fr_sg); |
189 | } | 214 | } |
190 | 215 | ||
191 | static int | 216 | static int |
@@ -231,6 +256,9 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |||
231 | depth; | 256 | depth; |
232 | } | 257 | } |
233 | 258 | ||
259 | rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, | ||
260 | RPCRDMA_MAX_DATA_SEGS / | ||
261 | ia->ri_max_frmr_depth)); | ||
234 | return 0; | 262 | return 0; |
235 | } | 263 | } |
236 | 264 | ||
@@ -243,7 +271,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |||
243 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 271 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
244 | 272 | ||
245 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | 273 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, |
246 | rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); | 274 | RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth); |
247 | } | 275 | } |
248 | 276 | ||
249 | static void | 277 | static void |
@@ -350,9 +378,9 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt) | |||
350 | return rc; | 378 | return rc; |
351 | } | 379 | } |
352 | 380 | ||
381 | r->mw_xprt = r_xprt; | ||
353 | list_add(&r->mw_list, &buf->rb_mws); | 382 | list_add(&r->mw_list, &buf->rb_mws); |
354 | list_add(&r->mw_all, &buf->rb_all); | 383 | list_add(&r->mw_all, &buf->rb_all); |
355 | r->frmr.fr_xprt = r_xprt; | ||
356 | } | 384 | } |
357 | 385 | ||
358 | return 0; | 386 | return 0; |
@@ -396,12 +424,12 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
396 | 424 | ||
397 | for (i = 0; i < nsegs;) { | 425 | for (i = 0; i < nsegs;) { |
398 | if (seg->mr_page) | 426 | if (seg->mr_page) |
399 | sg_set_page(&frmr->sg[i], | 427 | sg_set_page(&frmr->fr_sg[i], |
400 | seg->mr_page, | 428 | seg->mr_page, |
401 | seg->mr_len, | 429 | seg->mr_len, |
402 | offset_in_page(seg->mr_offset)); | 430 | offset_in_page(seg->mr_offset)); |
403 | else | 431 | else |
404 | sg_set_buf(&frmr->sg[i], seg->mr_offset, | 432 | sg_set_buf(&frmr->fr_sg[i], seg->mr_offset, |
405 | seg->mr_len); | 433 | seg->mr_len); |
406 | 434 | ||
407 | ++seg; | 435 | ++seg; |
@@ -412,25 +440,26 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
412 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | 440 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) |
413 | break; | 441 | break; |
414 | } | 442 | } |
415 | frmr->sg_nents = i; | 443 | frmr->fr_nents = i; |
444 | frmr->fr_dir = direction; | ||
416 | 445 | ||
417 | dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction); | 446 | dma_nents = ib_dma_map_sg(device, frmr->fr_sg, frmr->fr_nents, direction); |
418 | if (!dma_nents) { | 447 | if (!dma_nents) { |
419 | pr_err("RPC: %s: failed to dma map sg %p sg_nents %u\n", | 448 | pr_err("RPC: %s: failed to dma map sg %p sg_nents %u\n", |
420 | __func__, frmr->sg, frmr->sg_nents); | 449 | __func__, frmr->fr_sg, frmr->fr_nents); |
421 | return -ENOMEM; | 450 | return -ENOMEM; |
422 | } | 451 | } |
423 | 452 | ||
424 | n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE); | 453 | n = ib_map_mr_sg(mr, frmr->fr_sg, frmr->fr_nents, NULL, PAGE_SIZE); |
425 | if (unlikely(n != frmr->sg_nents)) { | 454 | if (unlikely(n != frmr->fr_nents)) { |
426 | pr_err("RPC: %s: failed to map mr %p (%u/%u)\n", | 455 | pr_err("RPC: %s: failed to map mr %p (%u/%u)\n", |
427 | __func__, frmr->fr_mr, n, frmr->sg_nents); | 456 | __func__, frmr->fr_mr, n, frmr->fr_nents); |
428 | rc = n < 0 ? n : -EINVAL; | 457 | rc = n < 0 ? n : -EINVAL; |
429 | goto out_senderr; | 458 | goto out_senderr; |
430 | } | 459 | } |
431 | 460 | ||
432 | dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", | 461 | dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", |
433 | __func__, mw, frmr->sg_nents, mr->length); | 462 | __func__, mw, frmr->fr_nents, mr->length); |
434 | 463 | ||
435 | key = (u8)(mr->rkey & 0x000000FF); | 464 | key = (u8)(mr->rkey & 0x000000FF); |
436 | ib_update_fast_reg_key(mr, ++key); | 465 | ib_update_fast_reg_key(mr, ++key); |
@@ -452,18 +481,16 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
452 | if (rc) | 481 | if (rc) |
453 | goto out_senderr; | 482 | goto out_senderr; |
454 | 483 | ||
455 | seg1->mr_dir = direction; | ||
456 | seg1->rl_mw = mw; | 484 | seg1->rl_mw = mw; |
457 | seg1->mr_rkey = mr->rkey; | 485 | seg1->mr_rkey = mr->rkey; |
458 | seg1->mr_base = mr->iova; | 486 | seg1->mr_base = mr->iova; |
459 | seg1->mr_nsegs = frmr->sg_nents; | 487 | seg1->mr_nsegs = frmr->fr_nents; |
460 | seg1->mr_len = mr->length; | 488 | seg1->mr_len = mr->length; |
461 | 489 | ||
462 | return frmr->sg_nents; | 490 | return frmr->fr_nents; |
463 | 491 | ||
464 | out_senderr: | 492 | out_senderr: |
465 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | 493 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); |
466 | ib_dma_unmap_sg(device, frmr->sg, dma_nents, direction); | ||
467 | __frwr_queue_recovery(mw); | 494 | __frwr_queue_recovery(mw); |
468 | return rc; | 495 | return rc; |
469 | } | 496 | } |
@@ -487,24 +514,6 @@ __frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg) | |||
487 | return invalidate_wr; | 514 | return invalidate_wr; |
488 | } | 515 | } |
489 | 516 | ||
490 | static void | ||
491 | __frwr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
492 | int rc) | ||
493 | { | ||
494 | struct ib_device *device = r_xprt->rx_ia.ri_device; | ||
495 | struct rpcrdma_mw *mw = seg->rl_mw; | ||
496 | struct rpcrdma_frmr *f = &mw->frmr; | ||
497 | |||
498 | seg->rl_mw = NULL; | ||
499 | |||
500 | ib_dma_unmap_sg(device, f->sg, f->sg_nents, seg->mr_dir); | ||
501 | |||
502 | if (!rc) | ||
503 | rpcrdma_put_mw(r_xprt, mw); | ||
504 | else | ||
505 | __frwr_queue_recovery(mw); | ||
506 | } | ||
507 | |||
508 | /* Invalidate all memory regions that were registered for "req". | 517 | /* Invalidate all memory regions that were registered for "req". |
509 | * | 518 | * |
510 | * Sleeps until it is safe for the host CPU to access the | 519 | * Sleeps until it is safe for the host CPU to access the |
@@ -518,6 +527,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |||
518 | struct rpcrdma_mr_seg *seg; | 527 | struct rpcrdma_mr_seg *seg; |
519 | unsigned int i, nchunks; | 528 | unsigned int i, nchunks; |
520 | struct rpcrdma_frmr *f; | 529 | struct rpcrdma_frmr *f; |
530 | struct rpcrdma_mw *mw; | ||
521 | int rc; | 531 | int rc; |
522 | 532 | ||
523 | dprintk("RPC: %s: req %p\n", __func__, req); | 533 | dprintk("RPC: %s: req %p\n", __func__, req); |
@@ -558,11 +568,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |||
558 | * unless ri_id->qp is a valid pointer. | 568 | * unless ri_id->qp is a valid pointer. |
559 | */ | 569 | */ |
560 | rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); | 570 | rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); |
561 | if (rc) { | 571 | if (rc) |
562 | pr_warn("%s: ib_post_send failed %i\n", __func__, rc); | 572 | goto reset_mrs; |
563 | rdma_disconnect(ia->ri_id); | ||
564 | goto unmap; | ||
565 | } | ||
566 | 573 | ||
567 | wait_for_completion(&f->fr_linv_done); | 574 | wait_for_completion(&f->fr_linv_done); |
568 | 575 | ||
@@ -572,56 +579,65 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |||
572 | unmap: | 579 | unmap: |
573 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | 580 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { |
574 | seg = &req->rl_segments[i]; | 581 | seg = &req->rl_segments[i]; |
582 | mw = seg->rl_mw; | ||
583 | seg->rl_mw = NULL; | ||
575 | 584 | ||
576 | __frwr_dma_unmap(r_xprt, seg, rc); | 585 | ib_dma_unmap_sg(ia->ri_device, f->fr_sg, f->fr_nents, |
586 | f->fr_dir); | ||
587 | rpcrdma_put_mw(r_xprt, mw); | ||
577 | 588 | ||
578 | i += seg->mr_nsegs; | 589 | i += seg->mr_nsegs; |
579 | seg->mr_nsegs = 0; | 590 | seg->mr_nsegs = 0; |
580 | } | 591 | } |
581 | 592 | ||
582 | req->rl_nchunks = 0; | 593 | req->rl_nchunks = 0; |
583 | } | 594 | return; |
584 | 595 | ||
585 | /* Post a LOCAL_INV Work Request to prevent further remote access | 596 | reset_mrs: |
586 | * via RDMA READ or RDMA WRITE. | 597 | pr_warn("%s: ib_post_send failed %i\n", __func__, rc); |
587 | */ | ||
588 | static int | ||
589 | frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
590 | { | ||
591 | struct rpcrdma_mr_seg *seg1 = seg; | ||
592 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
593 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
594 | struct rpcrdma_frmr *frmr = &mw->frmr; | ||
595 | struct ib_send_wr *invalidate_wr, *bad_wr; | ||
596 | int rc, nsegs = seg->mr_nsegs; | ||
597 | 598 | ||
598 | dprintk("RPC: %s: FRMR %p\n", __func__, mw); | 599 | /* Find and reset the MRs in the LOCAL_INV WRs that did not |
600 | * get posted. This is synchronous, and slow. | ||
601 | */ | ||
602 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | ||
603 | seg = &req->rl_segments[i]; | ||
604 | mw = seg->rl_mw; | ||
605 | f = &mw->frmr; | ||
599 | 606 | ||
600 | seg1->rl_mw = NULL; | 607 | if (mw->frmr.fr_mr->rkey == bad_wr->ex.invalidate_rkey) { |
601 | frmr->fr_state = FRMR_IS_INVALID; | 608 | __frwr_reset_mr(ia, mw); |
602 | invalidate_wr = &mw->frmr.fr_invwr; | 609 | bad_wr = bad_wr->next; |
610 | } | ||
603 | 611 | ||
604 | memset(invalidate_wr, 0, sizeof(*invalidate_wr)); | 612 | i += seg->mr_nsegs; |
605 | frmr->fr_cqe.done = frwr_wc_localinv; | 613 | } |
606 | invalidate_wr->wr_cqe = &frmr->fr_cqe; | 614 | goto unmap; |
607 | invalidate_wr->opcode = IB_WR_LOCAL_INV; | 615 | } |
608 | invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey; | ||
609 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
610 | 616 | ||
611 | ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); | 617 | /* Use a slow, safe mechanism to invalidate all memory regions |
612 | read_lock(&ia->ri_qplock); | 618 | * that were registered for "req". |
613 | rc = ib_post_send(ia->ri_id->qp, invalidate_wr, &bad_wr); | 619 | */ |
614 | read_unlock(&ia->ri_qplock); | 620 | static void |
615 | if (rc) | 621 | frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, |
616 | goto out_err; | 622 | bool sync) |
623 | { | ||
624 | struct rpcrdma_mr_seg *seg; | ||
625 | struct rpcrdma_mw *mw; | ||
626 | unsigned int i; | ||
617 | 627 | ||
618 | rpcrdma_put_mw(r_xprt, mw); | 628 | for (i = 0; req->rl_nchunks; req->rl_nchunks--) { |
619 | return nsegs; | 629 | seg = &req->rl_segments[i]; |
630 | mw = seg->rl_mw; | ||
620 | 631 | ||
621 | out_err: | 632 | if (sync) |
622 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | 633 | __frwr_reset_and_unmap(r_xprt, mw); |
623 | __frwr_queue_recovery(mw); | 634 | else |
624 | return nsegs; | 635 | __frwr_queue_recovery(mw); |
636 | |||
637 | i += seg->mr_nsegs; | ||
638 | seg->mr_nsegs = 0; | ||
639 | seg->rl_mw = NULL; | ||
640 | } | ||
625 | } | 641 | } |
626 | 642 | ||
627 | static void | 643 | static void |
@@ -643,7 +659,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf) | |||
643 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | 659 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { |
644 | .ro_map = frwr_op_map, | 660 | .ro_map = frwr_op_map, |
645 | .ro_unmap_sync = frwr_op_unmap_sync, | 661 | .ro_unmap_sync = frwr_op_unmap_sync, |
646 | .ro_unmap = frwr_op_unmap, | 662 | .ro_unmap_safe = frwr_op_unmap_safe, |
647 | .ro_open = frwr_op_open, | 663 | .ro_open = frwr_op_open, |
648 | .ro_maxpages = frwr_op_maxpages, | 664 | .ro_maxpages = frwr_op_maxpages, |
649 | .ro_init = frwr_op_init, | 665 | .ro_init = frwr_op_init, |
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c index 481b9b6f4a15..3750596cc432 100644 --- a/net/sunrpc/xprtrdma/physical_ops.c +++ b/net/sunrpc/xprtrdma/physical_ops.c | |||
@@ -36,8 +36,11 @@ physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |||
36 | __func__, PTR_ERR(mr)); | 36 | __func__, PTR_ERR(mr)); |
37 | return -ENOMEM; | 37 | return -ENOMEM; |
38 | } | 38 | } |
39 | |||
40 | ia->ri_dma_mr = mr; | 39 | ia->ri_dma_mr = mr; |
40 | |||
41 | rpcrdma_set_max_header_sizes(ia, cdata, min_t(unsigned int, | ||
42 | RPCRDMA_MAX_DATA_SEGS, | ||
43 | RPCRDMA_MAX_HDR_SEGS)); | ||
41 | return 0; | 44 | return 0; |
42 | } | 45 | } |
43 | 46 | ||
@@ -47,7 +50,7 @@ static size_t | |||
47 | physical_op_maxpages(struct rpcrdma_xprt *r_xprt) | 50 | physical_op_maxpages(struct rpcrdma_xprt *r_xprt) |
48 | { | 51 | { |
49 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | 52 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, |
50 | rpcrdma_max_segments(r_xprt)); | 53 | RPCRDMA_MAX_HDR_SEGS); |
51 | } | 54 | } |
52 | 55 | ||
53 | static int | 56 | static int |
@@ -71,17 +74,6 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
71 | return 1; | 74 | return 1; |
72 | } | 75 | } |
73 | 76 | ||
74 | /* Unmap a memory region, but leave it registered. | ||
75 | */ | ||
76 | static int | ||
77 | physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
78 | { | ||
79 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
80 | |||
81 | rpcrdma_unmap_one(ia->ri_device, seg); | ||
82 | return 1; | ||
83 | } | ||
84 | |||
85 | /* DMA unmap all memory regions that were mapped for "req". | 77 | /* DMA unmap all memory regions that were mapped for "req". |
86 | */ | 78 | */ |
87 | static void | 79 | static void |
@@ -94,6 +86,25 @@ physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |||
94 | rpcrdma_unmap_one(device, &req->rl_segments[i++]); | 86 | rpcrdma_unmap_one(device, &req->rl_segments[i++]); |
95 | } | 87 | } |
96 | 88 | ||
89 | /* Use a slow, safe mechanism to invalidate all memory regions | ||
90 | * that were registered for "req". | ||
91 | * | ||
92 | * For physical memory registration, there is no good way to | ||
93 | * fence a single MR that has been advertised to the server. The | ||
94 | * client has already handed the server an R_key that cannot be | ||
95 | * invalidated and is shared by all MRs on this connection. | ||
96 | * Tearing down the PD might be the only safe choice, but it's | ||
97 | * not clear that a freshly acquired DMA R_key would be different | ||
98 | * than the one used by the PD that was just destroyed. | ||
99 | * FIXME. | ||
100 | */ | ||
101 | static void | ||
102 | physical_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | ||
103 | bool sync) | ||
104 | { | ||
105 | physical_op_unmap_sync(r_xprt, req); | ||
106 | } | ||
107 | |||
97 | static void | 108 | static void |
98 | physical_op_destroy(struct rpcrdma_buffer *buf) | 109 | physical_op_destroy(struct rpcrdma_buffer *buf) |
99 | { | 110 | { |
@@ -102,7 +113,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf) | |||
102 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { | 113 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { |
103 | .ro_map = physical_op_map, | 114 | .ro_map = physical_op_map, |
104 | .ro_unmap_sync = physical_op_unmap_sync, | 115 | .ro_unmap_sync = physical_op_unmap_sync, |
105 | .ro_unmap = physical_op_unmap, | 116 | .ro_unmap_safe = physical_op_unmap_safe, |
106 | .ro_open = physical_op_open, | 117 | .ro_open = physical_op_open, |
107 | .ro_maxpages = physical_op_maxpages, | 118 | .ro_maxpages = physical_op_maxpages, |
108 | .ro_init = physical_op_init, | 119 | .ro_init = physical_op_init, |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 888823bb6dae..35a81096e83d 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -61,26 +61,84 @@ enum rpcrdma_chunktype { | |||
61 | rpcrdma_replych | 61 | rpcrdma_replych |
62 | }; | 62 | }; |
63 | 63 | ||
64 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
65 | static const char transfertypes[][12] = { | 64 | static const char transfertypes[][12] = { |
66 | "pure inline", /* no chunks */ | 65 | "inline", /* no chunks */ |
67 | " read chunk", /* some argument via rdma read */ | 66 | "read list", /* some argument via rdma read */ |
68 | "*read chunk", /* entire request via rdma read */ | 67 | "*read list", /* entire request via rdma read */ |
69 | "write chunk", /* some result via rdma write */ | 68 | "write list", /* some result via rdma write */ |
70 | "reply chunk" /* entire reply via rdma write */ | 69 | "reply chunk" /* entire reply via rdma write */ |
71 | }; | 70 | }; |
72 | #endif | 71 | |
72 | /* Returns size of largest RPC-over-RDMA header in a Call message | ||
73 | * | ||
74 | * The largest Call header contains a full-size Read list and a | ||
75 | * minimal Reply chunk. | ||
76 | */ | ||
77 | static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs) | ||
78 | { | ||
79 | unsigned int size; | ||
80 | |||
81 | /* Fixed header fields and list discriminators */ | ||
82 | size = RPCRDMA_HDRLEN_MIN; | ||
83 | |||
84 | /* Maximum Read list size */ | ||
85 | maxsegs += 2; /* segment for head and tail buffers */ | ||
86 | size = maxsegs * sizeof(struct rpcrdma_read_chunk); | ||
87 | |||
88 | /* Minimal Read chunk size */ | ||
89 | size += sizeof(__be32); /* segment count */ | ||
90 | size += sizeof(struct rpcrdma_segment); | ||
91 | size += sizeof(__be32); /* list discriminator */ | ||
92 | |||
93 | dprintk("RPC: %s: max call header size = %u\n", | ||
94 | __func__, size); | ||
95 | return size; | ||
96 | } | ||
97 | |||
98 | /* Returns size of largest RPC-over-RDMA header in a Reply message | ||
99 | * | ||
100 | * There is only one Write list or one Reply chunk per Reply | ||
101 | * message. The larger list is the Write list. | ||
102 | */ | ||
103 | static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) | ||
104 | { | ||
105 | unsigned int size; | ||
106 | |||
107 | /* Fixed header fields and list discriminators */ | ||
108 | size = RPCRDMA_HDRLEN_MIN; | ||
109 | |||
110 | /* Maximum Write list size */ | ||
111 | maxsegs += 2; /* segment for head and tail buffers */ | ||
112 | size = sizeof(__be32); /* segment count */ | ||
113 | size += maxsegs * sizeof(struct rpcrdma_segment); | ||
114 | size += sizeof(__be32); /* list discriminator */ | ||
115 | |||
116 | dprintk("RPC: %s: max reply header size = %u\n", | ||
117 | __func__, size); | ||
118 | return size; | ||
119 | } | ||
120 | |||
121 | void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *ia, | ||
122 | struct rpcrdma_create_data_internal *cdata, | ||
123 | unsigned int maxsegs) | ||
124 | { | ||
125 | ia->ri_max_inline_write = cdata->inline_wsize - | ||
126 | rpcrdma_max_call_header_size(maxsegs); | ||
127 | ia->ri_max_inline_read = cdata->inline_rsize - | ||
128 | rpcrdma_max_reply_header_size(maxsegs); | ||
129 | } | ||
73 | 130 | ||
74 | /* The client can send a request inline as long as the RPCRDMA header | 131 | /* The client can send a request inline as long as the RPCRDMA header |
75 | * plus the RPC call fit under the transport's inline limit. If the | 132 | * plus the RPC call fit under the transport's inline limit. If the |
76 | * combined call message size exceeds that limit, the client must use | 133 | * combined call message size exceeds that limit, the client must use |
77 | * the read chunk list for this operation. | 134 | * the read chunk list for this operation. |
78 | */ | 135 | */ |
79 | static bool rpcrdma_args_inline(struct rpc_rqst *rqst) | 136 | static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, |
137 | struct rpc_rqst *rqst) | ||
80 | { | 138 | { |
81 | unsigned int callsize = RPCRDMA_HDRLEN_MIN + rqst->rq_snd_buf.len; | 139 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
82 | 140 | ||
83 | return callsize <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); | 141 | return rqst->rq_snd_buf.len <= ia->ri_max_inline_write; |
84 | } | 142 | } |
85 | 143 | ||
86 | /* The client can't know how large the actual reply will be. Thus it | 144 | /* The client can't know how large the actual reply will be. Thus it |
@@ -89,11 +147,12 @@ static bool rpcrdma_args_inline(struct rpc_rqst *rqst) | |||
89 | * limit, the client must provide a write list or a reply chunk for | 147 | * limit, the client must provide a write list or a reply chunk for |
90 | * this request. | 148 | * this request. |
91 | */ | 149 | */ |
92 | static bool rpcrdma_results_inline(struct rpc_rqst *rqst) | 150 | static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, |
151 | struct rpc_rqst *rqst) | ||
93 | { | 152 | { |
94 | unsigned int repsize = RPCRDMA_HDRLEN_MIN + rqst->rq_rcv_buf.buflen; | 153 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
95 | 154 | ||
96 | return repsize <= RPCRDMA_INLINE_READ_THRESHOLD(rqst); | 155 | return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; |
97 | } | 156 | } |
98 | 157 | ||
99 | static int | 158 | static int |
@@ -226,23 +285,16 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
226 | return n; | 285 | return n; |
227 | } | 286 | } |
228 | 287 | ||
229 | /* | 288 | static inline __be32 * |
230 | * Create read/write chunk lists, and reply chunks, for RDMA | 289 | xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr_seg *seg) |
231 | * | 290 | { |
232 | * Assume check against THRESHOLD has been done, and chunks are required. | 291 | *iptr++ = cpu_to_be32(seg->mr_rkey); |
233 | * Assume only encoding one list entry for read|write chunks. The NFSv3 | 292 | *iptr++ = cpu_to_be32(seg->mr_len); |
234 | * protocol is simple enough to allow this as it only has a single "bulk | 293 | return xdr_encode_hyper(iptr, seg->mr_base); |
235 | * result" in each procedure - complicated NFSv4 COMPOUNDs are not. (The | 294 | } |
236 | * RDMA/Sessions NFSv4 proposal addresses this for future v4 revs.) | 295 | |
237 | * | 296 | /* XDR-encode the Read list. Supports encoding a list of read |
238 | * When used for a single reply chunk (which is a special write | 297 | * segments that belong to a single read chunk. |
239 | * chunk used for the entire reply, rather than just the data), it | ||
240 | * is used primarily for READDIR and READLINK which would otherwise | ||
241 | * be severely size-limited by a small rdma inline read max. The server | ||
242 | * response will come back as an RDMA Write, followed by a message | ||
243 | * of type RDMA_NOMSG carrying the xid and length. As a result, reply | ||
244 | * chunks do not provide data alignment, however they do not require | ||
245 | * "fixup" (moving the response to the upper layer buffer) either. | ||
246 | * | 298 | * |
247 | * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): | 299 | * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): |
248 | * | 300 | * |
@@ -250,131 +302,190 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
250 | * N elements, position P (same P for all chunks of same arg!): | 302 | * N elements, position P (same P for all chunks of same arg!): |
251 | * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 | 303 | * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 |
252 | * | 304 | * |
305 | * Returns a pointer to the XDR word in the RDMA header following | ||
306 | * the end of the Read list, or an error pointer. | ||
307 | */ | ||
308 | static __be32 * | ||
309 | rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, | ||
310 | struct rpcrdma_req *req, struct rpc_rqst *rqst, | ||
311 | __be32 *iptr, enum rpcrdma_chunktype rtype) | ||
312 | { | ||
313 | struct rpcrdma_mr_seg *seg = req->rl_nextseg; | ||
314 | unsigned int pos; | ||
315 | int n, nsegs; | ||
316 | |||
317 | if (rtype == rpcrdma_noch) { | ||
318 | *iptr++ = xdr_zero; /* item not present */ | ||
319 | return iptr; | ||
320 | } | ||
321 | |||
322 | pos = rqst->rq_snd_buf.head[0].iov_len; | ||
323 | if (rtype == rpcrdma_areadch) | ||
324 | pos = 0; | ||
325 | nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, | ||
326 | RPCRDMA_MAX_SEGS - req->rl_nchunks); | ||
327 | if (nsegs < 0) | ||
328 | return ERR_PTR(nsegs); | ||
329 | |||
330 | do { | ||
331 | n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, false); | ||
332 | if (n <= 0) | ||
333 | return ERR_PTR(n); | ||
334 | |||
335 | *iptr++ = xdr_one; /* item present */ | ||
336 | |||
337 | /* All read segments in this chunk | ||
338 | * have the same "position". | ||
339 | */ | ||
340 | *iptr++ = cpu_to_be32(pos); | ||
341 | iptr = xdr_encode_rdma_segment(iptr, seg); | ||
342 | |||
343 | dprintk("RPC: %5u %s: read segment pos %u " | ||
344 | "%d@0x%016llx:0x%08x (%s)\n", | ||
345 | rqst->rq_task->tk_pid, __func__, pos, | ||
346 | seg->mr_len, (unsigned long long)seg->mr_base, | ||
347 | seg->mr_rkey, n < nsegs ? "more" : "last"); | ||
348 | |||
349 | r_xprt->rx_stats.read_chunk_count++; | ||
350 | req->rl_nchunks++; | ||
351 | seg += n; | ||
352 | nsegs -= n; | ||
353 | } while (nsegs); | ||
354 | req->rl_nextseg = seg; | ||
355 | |||
356 | /* Finish Read list */ | ||
357 | *iptr++ = xdr_zero; /* Next item not present */ | ||
358 | return iptr; | ||
359 | } | ||
360 | |||
361 | /* XDR-encode the Write list. Supports encoding a list containing | ||
362 | * one array of plain segments that belong to a single write chunk. | ||
363 | * | ||
364 | * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): | ||
365 | * | ||
253 | * Write chunklist (a list of (one) counted array): | 366 | * Write chunklist (a list of (one) counted array): |
254 | * N elements: | 367 | * N elements: |
255 | * 1 - N - HLOO - HLOO - ... - HLOO - 0 | 368 | * 1 - N - HLOO - HLOO - ... - HLOO - 0 |
256 | * | 369 | * |
370 | * Returns a pointer to the XDR word in the RDMA header following | ||
371 | * the end of the Write list, or an error pointer. | ||
372 | */ | ||
373 | static __be32 * | ||
374 | rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | ||
375 | struct rpc_rqst *rqst, __be32 *iptr, | ||
376 | enum rpcrdma_chunktype wtype) | ||
377 | { | ||
378 | struct rpcrdma_mr_seg *seg = req->rl_nextseg; | ||
379 | int n, nsegs, nchunks; | ||
380 | __be32 *segcount; | ||
381 | |||
382 | if (wtype != rpcrdma_writech) { | ||
383 | *iptr++ = xdr_zero; /* no Write list present */ | ||
384 | return iptr; | ||
385 | } | ||
386 | |||
387 | nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, | ||
388 | rqst->rq_rcv_buf.head[0].iov_len, | ||
389 | wtype, seg, | ||
390 | RPCRDMA_MAX_SEGS - req->rl_nchunks); | ||
391 | if (nsegs < 0) | ||
392 | return ERR_PTR(nsegs); | ||
393 | |||
394 | *iptr++ = xdr_one; /* Write list present */ | ||
395 | segcount = iptr++; /* save location of segment count */ | ||
396 | |||
397 | nchunks = 0; | ||
398 | do { | ||
399 | n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, true); | ||
400 | if (n <= 0) | ||
401 | return ERR_PTR(n); | ||
402 | |||
403 | iptr = xdr_encode_rdma_segment(iptr, seg); | ||
404 | |||
405 | dprintk("RPC: %5u %s: write segment " | ||
406 | "%d@0x016%llx:0x%08x (%s)\n", | ||
407 | rqst->rq_task->tk_pid, __func__, | ||
408 | seg->mr_len, (unsigned long long)seg->mr_base, | ||
409 | seg->mr_rkey, n < nsegs ? "more" : "last"); | ||
410 | |||
411 | r_xprt->rx_stats.write_chunk_count++; | ||
412 | r_xprt->rx_stats.total_rdma_request += seg->mr_len; | ||
413 | req->rl_nchunks++; | ||
414 | nchunks++; | ||
415 | seg += n; | ||
416 | nsegs -= n; | ||
417 | } while (nsegs); | ||
418 | req->rl_nextseg = seg; | ||
419 | |||
420 | /* Update count of segments in this Write chunk */ | ||
421 | *segcount = cpu_to_be32(nchunks); | ||
422 | |||
423 | /* Finish Write list */ | ||
424 | *iptr++ = xdr_zero; /* Next item not present */ | ||
425 | return iptr; | ||
426 | } | ||
427 | |||
428 | /* XDR-encode the Reply chunk. Supports encoding an array of plain | ||
429 | * segments that belong to a single write (reply) chunk. | ||
430 | * | ||
431 | * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): | ||
432 | * | ||
257 | * Reply chunk (a counted array): | 433 | * Reply chunk (a counted array): |
258 | * N elements: | 434 | * N elements: |
259 | * 1 - N - HLOO - HLOO - ... - HLOO | 435 | * 1 - N - HLOO - HLOO - ... - HLOO |
260 | * | 436 | * |
261 | * Returns positive RPC/RDMA header size, or negative errno. | 437 | * Returns a pointer to the XDR word in the RDMA header following |
438 | * the end of the Reply chunk, or an error pointer. | ||
262 | */ | 439 | */ |
263 | 440 | static __be32 * | |
264 | static ssize_t | 441 | rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, |
265 | rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | 442 | struct rpcrdma_req *req, struct rpc_rqst *rqst, |
266 | struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) | 443 | __be32 *iptr, enum rpcrdma_chunktype wtype) |
267 | { | 444 | { |
268 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 445 | struct rpcrdma_mr_seg *seg = req->rl_nextseg; |
269 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | 446 | int n, nsegs, nchunks; |
270 | int n, nsegs, nchunks = 0; | 447 | __be32 *segcount; |
271 | unsigned int pos; | ||
272 | struct rpcrdma_mr_seg *seg = req->rl_segments; | ||
273 | struct rpcrdma_read_chunk *cur_rchunk = NULL; | ||
274 | struct rpcrdma_write_array *warray = NULL; | ||
275 | struct rpcrdma_write_chunk *cur_wchunk = NULL; | ||
276 | __be32 *iptr = headerp->rm_body.rm_chunks; | ||
277 | int (*map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool); | ||
278 | |||
279 | if (type == rpcrdma_readch || type == rpcrdma_areadch) { | ||
280 | /* a read chunk - server will RDMA Read our memory */ | ||
281 | cur_rchunk = (struct rpcrdma_read_chunk *) iptr; | ||
282 | } else { | ||
283 | /* a write or reply chunk - server will RDMA Write our memory */ | ||
284 | *iptr++ = xdr_zero; /* encode a NULL read chunk list */ | ||
285 | if (type == rpcrdma_replych) | ||
286 | *iptr++ = xdr_zero; /* a NULL write chunk list */ | ||
287 | warray = (struct rpcrdma_write_array *) iptr; | ||
288 | cur_wchunk = (struct rpcrdma_write_chunk *) (warray + 1); | ||
289 | } | ||
290 | 448 | ||
291 | if (type == rpcrdma_replych || type == rpcrdma_areadch) | 449 | if (wtype != rpcrdma_replych) { |
292 | pos = 0; | 450 | *iptr++ = xdr_zero; /* no Reply chunk present */ |
293 | else | 451 | return iptr; |
294 | pos = target->head[0].iov_len; | 452 | } |
295 | 453 | ||
296 | nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS); | 454 | nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg, |
455 | RPCRDMA_MAX_SEGS - req->rl_nchunks); | ||
297 | if (nsegs < 0) | 456 | if (nsegs < 0) |
298 | return nsegs; | 457 | return ERR_PTR(nsegs); |
299 | 458 | ||
300 | map = r_xprt->rx_ia.ri_ops->ro_map; | 459 | *iptr++ = xdr_one; /* Reply chunk present */ |
460 | segcount = iptr++; /* save location of segment count */ | ||
461 | |||
462 | nchunks = 0; | ||
301 | do { | 463 | do { |
302 | n = map(r_xprt, seg, nsegs, cur_wchunk != NULL); | 464 | n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, true); |
303 | if (n <= 0) | 465 | if (n <= 0) |
304 | goto out; | 466 | return ERR_PTR(n); |
305 | if (cur_rchunk) { /* read */ | 467 | |
306 | cur_rchunk->rc_discrim = xdr_one; | 468 | iptr = xdr_encode_rdma_segment(iptr, seg); |
307 | /* all read chunks have the same "position" */ | 469 | |
308 | cur_rchunk->rc_position = cpu_to_be32(pos); | 470 | dprintk("RPC: %5u %s: reply segment " |
309 | cur_rchunk->rc_target.rs_handle = | 471 | "%d@0x%016llx:0x%08x (%s)\n", |
310 | cpu_to_be32(seg->mr_rkey); | 472 | rqst->rq_task->tk_pid, __func__, |
311 | cur_rchunk->rc_target.rs_length = | 473 | seg->mr_len, (unsigned long long)seg->mr_base, |
312 | cpu_to_be32(seg->mr_len); | 474 | seg->mr_rkey, n < nsegs ? "more" : "last"); |
313 | xdr_encode_hyper( | 475 | |
314 | (__be32 *)&cur_rchunk->rc_target.rs_offset, | 476 | r_xprt->rx_stats.reply_chunk_count++; |
315 | seg->mr_base); | 477 | r_xprt->rx_stats.total_rdma_request += seg->mr_len; |
316 | dprintk("RPC: %s: read chunk " | 478 | req->rl_nchunks++; |
317 | "elem %d@0x%llx:0x%x pos %u (%s)\n", __func__, | ||
318 | seg->mr_len, (unsigned long long)seg->mr_base, | ||
319 | seg->mr_rkey, pos, n < nsegs ? "more" : "last"); | ||
320 | cur_rchunk++; | ||
321 | r_xprt->rx_stats.read_chunk_count++; | ||
322 | } else { /* write/reply */ | ||
323 | cur_wchunk->wc_target.rs_handle = | ||
324 | cpu_to_be32(seg->mr_rkey); | ||
325 | cur_wchunk->wc_target.rs_length = | ||
326 | cpu_to_be32(seg->mr_len); | ||
327 | xdr_encode_hyper( | ||
328 | (__be32 *)&cur_wchunk->wc_target.rs_offset, | ||
329 | seg->mr_base); | ||
330 | dprintk("RPC: %s: %s chunk " | ||
331 | "elem %d@0x%llx:0x%x (%s)\n", __func__, | ||
332 | (type == rpcrdma_replych) ? "reply" : "write", | ||
333 | seg->mr_len, (unsigned long long)seg->mr_base, | ||
334 | seg->mr_rkey, n < nsegs ? "more" : "last"); | ||
335 | cur_wchunk++; | ||
336 | if (type == rpcrdma_replych) | ||
337 | r_xprt->rx_stats.reply_chunk_count++; | ||
338 | else | ||
339 | r_xprt->rx_stats.write_chunk_count++; | ||
340 | r_xprt->rx_stats.total_rdma_request += seg->mr_len; | ||
341 | } | ||
342 | nchunks++; | 479 | nchunks++; |
343 | seg += n; | 480 | seg += n; |
344 | nsegs -= n; | 481 | nsegs -= n; |
345 | } while (nsegs); | 482 | } while (nsegs); |
483 | req->rl_nextseg = seg; | ||
346 | 484 | ||
347 | /* success. all failures return above */ | 485 | /* Update count of segments in the Reply chunk */ |
348 | req->rl_nchunks = nchunks; | 486 | *segcount = cpu_to_be32(nchunks); |
349 | |||
350 | /* | ||
351 | * finish off header. If write, marshal discrim and nchunks. | ||
352 | */ | ||
353 | if (cur_rchunk) { | ||
354 | iptr = (__be32 *) cur_rchunk; | ||
355 | *iptr++ = xdr_zero; /* finish the read chunk list */ | ||
356 | *iptr++ = xdr_zero; /* encode a NULL write chunk list */ | ||
357 | *iptr++ = xdr_zero; /* encode a NULL reply chunk */ | ||
358 | } else { | ||
359 | warray->wc_discrim = xdr_one; | ||
360 | warray->wc_nchunks = cpu_to_be32(nchunks); | ||
361 | iptr = (__be32 *) cur_wchunk; | ||
362 | if (type == rpcrdma_writech) { | ||
363 | *iptr++ = xdr_zero; /* finish the write chunk list */ | ||
364 | *iptr++ = xdr_zero; /* encode a NULL reply chunk */ | ||
365 | } | ||
366 | } | ||
367 | |||
368 | /* | ||
369 | * Return header size. | ||
370 | */ | ||
371 | return (unsigned char *)iptr - (unsigned char *)headerp; | ||
372 | 487 | ||
373 | out: | 488 | return iptr; |
374 | for (pos = 0; nchunks--;) | ||
375 | pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, | ||
376 | &req->rl_segments[pos]); | ||
377 | return n; | ||
378 | } | 489 | } |
379 | 490 | ||
380 | /* | 491 | /* |
@@ -440,13 +551,10 @@ static void rpcrdma_inline_pullup(struct rpc_rqst *rqst) | |||
440 | * Marshal a request: the primary job of this routine is to choose | 551 | * Marshal a request: the primary job of this routine is to choose |
441 | * the transfer modes. See comments below. | 552 | * the transfer modes. See comments below. |
442 | * | 553 | * |
443 | * Uses multiple RDMA IOVs for a request: | 554 | * Prepares up to two IOVs per Call message: |
444 | * [0] -- RPC RDMA header, which uses memory from the *start* of the | 555 | * |
445 | * preregistered buffer that already holds the RPC data in | 556 | * [0] -- RPC RDMA header |
446 | * its middle. | 557 | * [1] -- the RPC header/data |
447 | * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol. | ||
448 | * [2] -- optional padding. | ||
449 | * [3] -- if padded, header only in [1] and data here. | ||
450 | * | 558 | * |
451 | * Returns zero on success, otherwise a negative errno. | 559 | * Returns zero on success, otherwise a negative errno. |
452 | */ | 560 | */ |
@@ -457,24 +565,17 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
457 | struct rpc_xprt *xprt = rqst->rq_xprt; | 565 | struct rpc_xprt *xprt = rqst->rq_xprt; |
458 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 566 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
459 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 567 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
460 | char *base; | ||
461 | size_t rpclen; | ||
462 | ssize_t hdrlen; | ||
463 | enum rpcrdma_chunktype rtype, wtype; | 568 | enum rpcrdma_chunktype rtype, wtype; |
464 | struct rpcrdma_msg *headerp; | 569 | struct rpcrdma_msg *headerp; |
570 | ssize_t hdrlen; | ||
571 | size_t rpclen; | ||
572 | __be32 *iptr; | ||
465 | 573 | ||
466 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | 574 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
467 | if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)) | 575 | if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)) |
468 | return rpcrdma_bc_marshal_reply(rqst); | 576 | return rpcrdma_bc_marshal_reply(rqst); |
469 | #endif | 577 | #endif |
470 | 578 | ||
471 | /* | ||
472 | * rpclen gets amount of data in first buffer, which is the | ||
473 | * pre-registered buffer. | ||
474 | */ | ||
475 | base = rqst->rq_svec[0].iov_base; | ||
476 | rpclen = rqst->rq_svec[0].iov_len; | ||
477 | |||
478 | headerp = rdmab_to_msg(req->rl_rdmabuf); | 579 | headerp = rdmab_to_msg(req->rl_rdmabuf); |
479 | /* don't byte-swap XID, it's already done in request */ | 580 | /* don't byte-swap XID, it's already done in request */ |
480 | headerp->rm_xid = rqst->rq_xid; | 581 | headerp->rm_xid = rqst->rq_xid; |
@@ -485,15 +586,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
485 | /* | 586 | /* |
486 | * Chunks needed for results? | 587 | * Chunks needed for results? |
487 | * | 588 | * |
488 | * o Read ops return data as write chunk(s), header as inline. | ||
489 | * o If the expected result is under the inline threshold, all ops | 589 | * o If the expected result is under the inline threshold, all ops |
490 | * return as inline. | 590 | * return as inline. |
591 | * o Large read ops return data as write chunk(s), header as | ||
592 | * inline. | ||
491 | * o Large non-read ops return as a single reply chunk. | 593 | * o Large non-read ops return as a single reply chunk. |
492 | */ | 594 | */ |
493 | if (rqst->rq_rcv_buf.flags & XDRBUF_READ) | 595 | if (rpcrdma_results_inline(r_xprt, rqst)) |
494 | wtype = rpcrdma_writech; | ||
495 | else if (rpcrdma_results_inline(rqst)) | ||
496 | wtype = rpcrdma_noch; | 596 | wtype = rpcrdma_noch; |
597 | else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) | ||
598 | wtype = rpcrdma_writech; | ||
497 | else | 599 | else |
498 | wtype = rpcrdma_replych; | 600 | wtype = rpcrdma_replych; |
499 | 601 | ||
@@ -511,10 +613,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
511 | * that both has a data payload, and whose non-data arguments | 613 | * that both has a data payload, and whose non-data arguments |
512 | * by themselves are larger than the inline threshold. | 614 | * by themselves are larger than the inline threshold. |
513 | */ | 615 | */ |
514 | if (rpcrdma_args_inline(rqst)) { | 616 | if (rpcrdma_args_inline(r_xprt, rqst)) { |
515 | rtype = rpcrdma_noch; | 617 | rtype = rpcrdma_noch; |
618 | rpcrdma_inline_pullup(rqst); | ||
619 | rpclen = rqst->rq_svec[0].iov_len; | ||
516 | } else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) { | 620 | } else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) { |
517 | rtype = rpcrdma_readch; | 621 | rtype = rpcrdma_readch; |
622 | rpclen = rqst->rq_svec[0].iov_len; | ||
623 | rpclen += rpcrdma_tail_pullup(&rqst->rq_snd_buf); | ||
518 | } else { | 624 | } else { |
519 | r_xprt->rx_stats.nomsg_call_count++; | 625 | r_xprt->rx_stats.nomsg_call_count++; |
520 | headerp->rm_type = htonl(RDMA_NOMSG); | 626 | headerp->rm_type = htonl(RDMA_NOMSG); |
@@ -522,57 +628,50 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
522 | rpclen = 0; | 628 | rpclen = 0; |
523 | } | 629 | } |
524 | 630 | ||
525 | /* The following simplification is not true forever */ | 631 | /* This implementation supports the following combinations |
526 | if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) | 632 | * of chunk lists in one RPC-over-RDMA Call message: |
527 | wtype = rpcrdma_noch; | 633 | * |
528 | if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { | 634 | * - Read list |
529 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", | 635 | * - Write list |
530 | __func__); | 636 | * - Reply chunk |
531 | return -EIO; | 637 | * - Read list + Reply chunk |
532 | } | 638 | * |
533 | 639 | * It might not yet support the following combinations: | |
534 | hdrlen = RPCRDMA_HDRLEN_MIN; | 640 | * |
535 | 641 | * - Read list + Write list | |
536 | /* | 642 | * |
537 | * Pull up any extra send data into the preregistered buffer. | 643 | * It does not support the following combinations: |
538 | * When padding is in use and applies to the transfer, insert | 644 | * |
539 | * it and change the message type. | 645 | * - Write list + Reply chunk |
646 | * - Read list + Write list + Reply chunk | ||
647 | * | ||
648 | * This implementation supports only a single chunk in each | ||
649 | * Read or Write list. Thus for example the client cannot | ||
650 | * send a Call message with a Position Zero Read chunk and a | ||
651 | * regular Read chunk at the same time. | ||
540 | */ | 652 | */ |
541 | if (rtype == rpcrdma_noch) { | 653 | req->rl_nchunks = 0; |
542 | 654 | req->rl_nextseg = req->rl_segments; | |
543 | rpcrdma_inline_pullup(rqst); | 655 | iptr = headerp->rm_body.rm_chunks; |
544 | 656 | iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype); | |
545 | headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero; | 657 | if (IS_ERR(iptr)) |
546 | headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero; | 658 | goto out_unmap; |
547 | headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero; | 659 | iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype); |
548 | /* new length after pullup */ | 660 | if (IS_ERR(iptr)) |
549 | rpclen = rqst->rq_svec[0].iov_len; | 661 | goto out_unmap; |
550 | } else if (rtype == rpcrdma_readch) | 662 | iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype); |
551 | rpclen += rpcrdma_tail_pullup(&rqst->rq_snd_buf); | 663 | if (IS_ERR(iptr)) |
552 | if (rtype != rpcrdma_noch) { | 664 | goto out_unmap; |
553 | hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, | 665 | hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; |
554 | headerp, rtype); | 666 | |
555 | wtype = rtype; /* simplify dprintk */ | 667 | if (hdrlen + rpclen > RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) |
556 | 668 | goto out_overflow; | |
557 | } else if (wtype != rpcrdma_noch) { | 669 | |
558 | hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, | 670 | dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n", |
559 | headerp, wtype); | 671 | rqst->rq_task->tk_pid, __func__, |
560 | } | 672 | transfertypes[rtype], transfertypes[wtype], |
561 | if (hdrlen < 0) | 673 | hdrlen, rpclen); |
562 | return hdrlen; | ||
563 | 674 | ||
564 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd" | ||
565 | " headerp 0x%p base 0x%p lkey 0x%x\n", | ||
566 | __func__, transfertypes[wtype], hdrlen, rpclen, | ||
567 | headerp, base, rdmab_lkey(req->rl_rdmabuf)); | ||
568 | |||
569 | /* | ||
570 | * initialize send_iov's - normally only two: rdma chunk header and | ||
571 | * single preregistered RPC header buffer, but if padding is present, | ||
572 | * then use a preregistered (and zeroed) pad buffer between the RPC | ||
573 | * header and any write data. In all non-rdma cases, any following | ||
574 | * data has been copied into the RPC header buffer. | ||
575 | */ | ||
576 | req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); | 675 | req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); |
577 | req->rl_send_iov[0].length = hdrlen; | 676 | req->rl_send_iov[0].length = hdrlen; |
578 | req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); | 677 | req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); |
@@ -587,6 +686,18 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
587 | 686 | ||
588 | req->rl_niovs = 2; | 687 | req->rl_niovs = 2; |
589 | return 0; | 688 | return 0; |
689 | |||
690 | out_overflow: | ||
691 | pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s/%s\n", | ||
692 | hdrlen, rpclen, transfertypes[rtype], transfertypes[wtype]); | ||
693 | /* Terminate this RPC. Chunks registered above will be | ||
694 | * released by xprt_release -> xprt_rmda_free . | ||
695 | */ | ||
696 | return -EIO; | ||
697 | |||
698 | out_unmap: | ||
699 | r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); | ||
700 | return PTR_ERR(iptr); | ||
590 | } | 701 | } |
591 | 702 | ||
592 | /* | 703 | /* |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index b1b009f10ea3..99d2e5b72726 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -73,6 +73,8 @@ static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; | |||
73 | 73 | ||
74 | static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; | 74 | static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; |
75 | static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; | 75 | static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; |
76 | static unsigned int min_inline_size = RPCRDMA_MIN_INLINE; | ||
77 | static unsigned int max_inline_size = RPCRDMA_MAX_INLINE; | ||
76 | static unsigned int zero; | 78 | static unsigned int zero; |
77 | static unsigned int max_padding = PAGE_SIZE; | 79 | static unsigned int max_padding = PAGE_SIZE; |
78 | static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; | 80 | static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; |
@@ -96,6 +98,8 @@ static struct ctl_table xr_tunables_table[] = { | |||
96 | .maxlen = sizeof(unsigned int), | 98 | .maxlen = sizeof(unsigned int), |
97 | .mode = 0644, | 99 | .mode = 0644, |
98 | .proc_handler = proc_dointvec, | 100 | .proc_handler = proc_dointvec, |
101 | .extra1 = &min_inline_size, | ||
102 | .extra2 = &max_inline_size, | ||
99 | }, | 103 | }, |
100 | { | 104 | { |
101 | .procname = "rdma_max_inline_write", | 105 | .procname = "rdma_max_inline_write", |
@@ -103,6 +107,8 @@ static struct ctl_table xr_tunables_table[] = { | |||
103 | .maxlen = sizeof(unsigned int), | 107 | .maxlen = sizeof(unsigned int), |
104 | .mode = 0644, | 108 | .mode = 0644, |
105 | .proc_handler = proc_dointvec, | 109 | .proc_handler = proc_dointvec, |
110 | .extra1 = &min_inline_size, | ||
111 | .extra2 = &max_inline_size, | ||
106 | }, | 112 | }, |
107 | { | 113 | { |
108 | .procname = "rdma_inline_write_padding", | 114 | .procname = "rdma_inline_write_padding", |
@@ -508,6 +514,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
508 | out: | 514 | out: |
509 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); | 515 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); |
510 | req->rl_connect_cookie = 0; /* our reserved value */ | 516 | req->rl_connect_cookie = 0; /* our reserved value */ |
517 | req->rl_task = task; | ||
511 | return req->rl_sendbuf->rg_base; | 518 | return req->rl_sendbuf->rg_base; |
512 | 519 | ||
513 | out_rdmabuf: | 520 | out_rdmabuf: |
@@ -564,7 +571,6 @@ xprt_rdma_free(void *buffer) | |||
564 | struct rpcrdma_req *req; | 571 | struct rpcrdma_req *req; |
565 | struct rpcrdma_xprt *r_xprt; | 572 | struct rpcrdma_xprt *r_xprt; |
566 | struct rpcrdma_regbuf *rb; | 573 | struct rpcrdma_regbuf *rb; |
567 | int i; | ||
568 | 574 | ||
569 | if (buffer == NULL) | 575 | if (buffer == NULL) |
570 | return; | 576 | return; |
@@ -578,11 +584,8 @@ xprt_rdma_free(void *buffer) | |||
578 | 584 | ||
579 | dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); | 585 | dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); |
580 | 586 | ||
581 | for (i = 0; req->rl_nchunks;) { | 587 | r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, |
582 | --req->rl_nchunks; | 588 | !RPC_IS_ASYNC(req->rl_task)); |
583 | i += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, | ||
584 | &req->rl_segments[i]); | ||
585 | } | ||
586 | 589 | ||
587 | rpcrdma_buffer_put(req); | 590 | rpcrdma_buffer_put(req); |
588 | } | 591 | } |
@@ -707,6 +710,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = { | |||
707 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | 710 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
708 | .bc_setup = xprt_rdma_bc_setup, | 711 | .bc_setup = xprt_rdma_bc_setup, |
709 | .bc_up = xprt_rdma_bc_up, | 712 | .bc_up = xprt_rdma_bc_up, |
713 | .bc_maxpayload = xprt_rdma_bc_maxpayload, | ||
710 | .bc_free_rqst = xprt_rdma_bc_free_rqst, | 714 | .bc_free_rqst = xprt_rdma_bc_free_rqst, |
711 | .bc_destroy = xprt_rdma_bc_destroy, | 715 | .bc_destroy = xprt_rdma_bc_destroy, |
712 | #endif | 716 | #endif |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index f5ed9f982cd7..b044d98a1370 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -203,15 +203,6 @@ out_fail: | |||
203 | goto out_schedule; | 203 | goto out_schedule; |
204 | } | 204 | } |
205 | 205 | ||
206 | static void | ||
207 | rpcrdma_flush_cqs(struct rpcrdma_ep *ep) | ||
208 | { | ||
209 | struct ib_wc wc; | ||
210 | |||
211 | while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0) | ||
212 | rpcrdma_receive_wc(NULL, &wc); | ||
213 | } | ||
214 | |||
215 | static int | 206 | static int |
216 | rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | 207 | rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) |
217 | { | 208 | { |
@@ -374,23 +365,6 @@ out: | |||
374 | } | 365 | } |
375 | 366 | ||
376 | /* | 367 | /* |
377 | * Drain any cq, prior to teardown. | ||
378 | */ | ||
379 | static void | ||
380 | rpcrdma_clean_cq(struct ib_cq *cq) | ||
381 | { | ||
382 | struct ib_wc wc; | ||
383 | int count = 0; | ||
384 | |||
385 | while (1 == ib_poll_cq(cq, 1, &wc)) | ||
386 | ++count; | ||
387 | |||
388 | if (count) | ||
389 | dprintk("RPC: %s: flushed %d events (last 0x%x)\n", | ||
390 | __func__, count, wc.opcode); | ||
391 | } | ||
392 | |||
393 | /* | ||
394 | * Exported functions. | 368 | * Exported functions. |
395 | */ | 369 | */ |
396 | 370 | ||
@@ -459,7 +433,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
459 | dprintk("RPC: %s: memory registration strategy is '%s'\n", | 433 | dprintk("RPC: %s: memory registration strategy is '%s'\n", |
460 | __func__, ia->ri_ops->ro_displayname); | 434 | __func__, ia->ri_ops->ro_displayname); |
461 | 435 | ||
462 | rwlock_init(&ia->ri_qplock); | ||
463 | return 0; | 436 | return 0; |
464 | 437 | ||
465 | out3: | 438 | out3: |
@@ -515,7 +488,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
515 | __func__); | 488 | __func__); |
516 | return -ENOMEM; | 489 | return -ENOMEM; |
517 | } | 490 | } |
518 | max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS; | 491 | max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1; |
519 | 492 | ||
520 | /* check provider's send/recv wr limits */ | 493 | /* check provider's send/recv wr limits */ |
521 | if (cdata->max_requests > max_qp_wr) | 494 | if (cdata->max_requests > max_qp_wr) |
@@ -526,11 +499,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
526 | ep->rep_attr.srq = NULL; | 499 | ep->rep_attr.srq = NULL; |
527 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 500 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
528 | ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; | 501 | ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; |
502 | ep->rep_attr.cap.max_send_wr += 1; /* drain cqe */ | ||
529 | rc = ia->ri_ops->ro_open(ia, ep, cdata); | 503 | rc = ia->ri_ops->ro_open(ia, ep, cdata); |
530 | if (rc) | 504 | if (rc) |
531 | return rc; | 505 | return rc; |
532 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; | 506 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; |
533 | ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; | 507 | ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; |
508 | ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ | ||
534 | ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS; | 509 | ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS; |
535 | ep->rep_attr.cap.max_recv_sge = 1; | 510 | ep->rep_attr.cap.max_recv_sge = 1; |
536 | ep->rep_attr.cap.max_inline_data = 0; | 511 | ep->rep_attr.cap.max_inline_data = 0; |
@@ -578,6 +553,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
578 | ep->rep_attr.recv_cq = recvcq; | 553 | ep->rep_attr.recv_cq = recvcq; |
579 | 554 | ||
580 | /* Initialize cma parameters */ | 555 | /* Initialize cma parameters */ |
556 | memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma)); | ||
581 | 557 | ||
582 | /* RPC/RDMA does not use private data */ | 558 | /* RPC/RDMA does not use private data */ |
583 | ep->rep_remote_cma.private_data = NULL; | 559 | ep->rep_remote_cma.private_data = NULL; |
@@ -591,7 +567,16 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
591 | ep->rep_remote_cma.responder_resources = | 567 | ep->rep_remote_cma.responder_resources = |
592 | ia->ri_device->attrs.max_qp_rd_atom; | 568 | ia->ri_device->attrs.max_qp_rd_atom; |
593 | 569 | ||
594 | ep->rep_remote_cma.retry_count = 7; | 570 | /* Limit transport retries so client can detect server |
571 | * GID changes quickly. RPC layer handles re-establishing | ||
572 | * transport connection and retransmission. | ||
573 | */ | ||
574 | ep->rep_remote_cma.retry_count = 6; | ||
575 | |||
576 | /* RPC-over-RDMA handles its own flow control. In addition, | ||
577 | * make all RNR NAKs visible so we know that RPC-over-RDMA | ||
578 | * flow control is working correctly (no NAKs should be seen). | ||
579 | */ | ||
595 | ep->rep_remote_cma.flow_control = 0; | 580 | ep->rep_remote_cma.flow_control = 0; |
596 | ep->rep_remote_cma.rnr_retry_count = 0; | 581 | ep->rep_remote_cma.rnr_retry_count = 0; |
597 | 582 | ||
@@ -622,13 +607,8 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
622 | 607 | ||
623 | cancel_delayed_work_sync(&ep->rep_connect_worker); | 608 | cancel_delayed_work_sync(&ep->rep_connect_worker); |
624 | 609 | ||
625 | if (ia->ri_id->qp) | ||
626 | rpcrdma_ep_disconnect(ep, ia); | ||
627 | |||
628 | rpcrdma_clean_cq(ep->rep_attr.recv_cq); | ||
629 | rpcrdma_clean_cq(ep->rep_attr.send_cq); | ||
630 | |||
631 | if (ia->ri_id->qp) { | 610 | if (ia->ri_id->qp) { |
611 | rpcrdma_ep_disconnect(ep, ia); | ||
632 | rdma_destroy_qp(ia->ri_id); | 612 | rdma_destroy_qp(ia->ri_id); |
633 | ia->ri_id->qp = NULL; | 613 | ia->ri_id->qp = NULL; |
634 | } | 614 | } |
@@ -659,7 +639,6 @@ retry: | |||
659 | dprintk("RPC: %s: reconnecting...\n", __func__); | 639 | dprintk("RPC: %s: reconnecting...\n", __func__); |
660 | 640 | ||
661 | rpcrdma_ep_disconnect(ep, ia); | 641 | rpcrdma_ep_disconnect(ep, ia); |
662 | rpcrdma_flush_cqs(ep); | ||
663 | 642 | ||
664 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | 643 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); |
665 | id = rpcrdma_create_id(xprt, ia, | 644 | id = rpcrdma_create_id(xprt, ia, |
@@ -692,10 +671,8 @@ retry: | |||
692 | goto out; | 671 | goto out; |
693 | } | 672 | } |
694 | 673 | ||
695 | write_lock(&ia->ri_qplock); | ||
696 | old = ia->ri_id; | 674 | old = ia->ri_id; |
697 | ia->ri_id = id; | 675 | ia->ri_id = id; |
698 | write_unlock(&ia->ri_qplock); | ||
699 | 676 | ||
700 | rdma_destroy_qp(old); | 677 | rdma_destroy_qp(old); |
701 | rpcrdma_destroy_id(old); | 678 | rpcrdma_destroy_id(old); |
@@ -785,7 +762,6 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
785 | { | 762 | { |
786 | int rc; | 763 | int rc; |
787 | 764 | ||
788 | rpcrdma_flush_cqs(ep); | ||
789 | rc = rdma_disconnect(ia->ri_id); | 765 | rc = rdma_disconnect(ia->ri_id); |
790 | if (!rc) { | 766 | if (!rc) { |
791 | /* returns without wait if not connected */ | 767 | /* returns without wait if not connected */ |
@@ -797,6 +773,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
797 | dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc); | 773 | dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc); |
798 | ep->rep_connected = rc; | 774 | ep->rep_connected = rc; |
799 | } | 775 | } |
776 | |||
777 | ib_drain_qp(ia->ri_id->qp); | ||
800 | } | 778 | } |
801 | 779 | ||
802 | struct rpcrdma_req * | 780 | struct rpcrdma_req * |
@@ -1271,25 +1249,3 @@ out_rc: | |||
1271 | rpcrdma_recv_buffer_put(rep); | 1249 | rpcrdma_recv_buffer_put(rep); |
1272 | return rc; | 1250 | return rc; |
1273 | } | 1251 | } |
1274 | |||
1275 | /* How many chunk list items fit within our inline buffers? | ||
1276 | */ | ||
1277 | unsigned int | ||
1278 | rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt) | ||
1279 | { | ||
1280 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; | ||
1281 | int bytes, segments; | ||
1282 | |||
1283 | bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize); | ||
1284 | bytes -= RPCRDMA_HDRLEN_MIN; | ||
1285 | if (bytes < sizeof(struct rpcrdma_segment) * 2) { | ||
1286 | pr_warn("RPC: %s: inline threshold too small\n", | ||
1287 | __func__); | ||
1288 | return 0; | ||
1289 | } | ||
1290 | |||
1291 | segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1); | ||
1292 | dprintk("RPC: %s: max chunk list size = %d segments\n", | ||
1293 | __func__, segments); | ||
1294 | return segments; | ||
1295 | } | ||
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 2ebc743cb96f..95cdc66225ee 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -65,7 +65,6 @@ | |||
65 | */ | 65 | */ |
66 | struct rpcrdma_ia { | 66 | struct rpcrdma_ia { |
67 | const struct rpcrdma_memreg_ops *ri_ops; | 67 | const struct rpcrdma_memreg_ops *ri_ops; |
68 | rwlock_t ri_qplock; | ||
69 | struct ib_device *ri_device; | 68 | struct ib_device *ri_device; |
70 | struct rdma_cm_id *ri_id; | 69 | struct rdma_cm_id *ri_id; |
71 | struct ib_pd *ri_pd; | 70 | struct ib_pd *ri_pd; |
@@ -73,6 +72,8 @@ struct rpcrdma_ia { | |||
73 | struct completion ri_done; | 72 | struct completion ri_done; |
74 | int ri_async_rc; | 73 | int ri_async_rc; |
75 | unsigned int ri_max_frmr_depth; | 74 | unsigned int ri_max_frmr_depth; |
75 | unsigned int ri_max_inline_write; | ||
76 | unsigned int ri_max_inline_read; | ||
76 | struct ib_qp_attr ri_qp_attr; | 77 | struct ib_qp_attr ri_qp_attr; |
77 | struct ib_qp_init_attr ri_qp_init_attr; | 78 | struct ib_qp_init_attr ri_qp_init_attr; |
78 | }; | 79 | }; |
@@ -144,6 +145,26 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) | |||
144 | 145 | ||
145 | #define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) | 146 | #define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) |
146 | 147 | ||
148 | /* To ensure a transport can always make forward progress, | ||
149 | * the number of RDMA segments allowed in header chunk lists | ||
150 | * is capped at 8. This prevents less-capable devices and | ||
151 | * memory registrations from overrunning the Send buffer | ||
152 | * while building chunk lists. | ||
153 | * | ||
154 | * Elements of the Read list take up more room than the | ||
155 | * Write list or Reply chunk. 8 read segments means the Read | ||
156 | * list (or Write list or Reply chunk) cannot consume more | ||
157 | * than | ||
158 | * | ||
159 | * ((8 + 2) * read segment size) + 1 XDR words, or 244 bytes. | ||
160 | * | ||
161 | * And the fixed part of the header is another 24 bytes. | ||
162 | * | ||
163 | * The smallest inline threshold is 1024 bytes, ensuring that | ||
164 | * at least 750 bytes are available for RPC messages. | ||
165 | */ | ||
166 | #define RPCRDMA_MAX_HDR_SEGS (8) | ||
167 | |||
147 | /* | 168 | /* |
148 | * struct rpcrdma_rep -- this structure encapsulates state required to recv | 169 | * struct rpcrdma_rep -- this structure encapsulates state required to recv |
149 | * and complete a reply, asychronously. It needs several pieces of | 170 | * and complete a reply, asychronously. It needs several pieces of |
@@ -162,7 +183,9 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) | |||
162 | */ | 183 | */ |
163 | 184 | ||
164 | #define RPCRDMA_MAX_DATA_SEGS ((1 * 1024 * 1024) / PAGE_SIZE) | 185 | #define RPCRDMA_MAX_DATA_SEGS ((1 * 1024 * 1024) / PAGE_SIZE) |
165 | #define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ | 186 | |
187 | /* data segments + head/tail for Call + head/tail for Reply */ | ||
188 | #define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 4) | ||
166 | 189 | ||
167 | struct rpcrdma_buffer; | 190 | struct rpcrdma_buffer; |
168 | 191 | ||
@@ -198,14 +221,13 @@ enum rpcrdma_frmr_state { | |||
198 | }; | 221 | }; |
199 | 222 | ||
200 | struct rpcrdma_frmr { | 223 | struct rpcrdma_frmr { |
201 | struct scatterlist *sg; | 224 | struct scatterlist *fr_sg; |
202 | int sg_nents; | 225 | int fr_nents; |
226 | enum dma_data_direction fr_dir; | ||
203 | struct ib_mr *fr_mr; | 227 | struct ib_mr *fr_mr; |
204 | struct ib_cqe fr_cqe; | 228 | struct ib_cqe fr_cqe; |
205 | enum rpcrdma_frmr_state fr_state; | 229 | enum rpcrdma_frmr_state fr_state; |
206 | struct completion fr_linv_done; | 230 | struct completion fr_linv_done; |
207 | struct work_struct fr_work; | ||
208 | struct rpcrdma_xprt *fr_xprt; | ||
209 | union { | 231 | union { |
210 | struct ib_reg_wr fr_regwr; | 232 | struct ib_reg_wr fr_regwr; |
211 | struct ib_send_wr fr_invwr; | 233 | struct ib_send_wr fr_invwr; |
@@ -222,6 +244,8 @@ struct rpcrdma_mw { | |||
222 | struct rpcrdma_fmr fmr; | 244 | struct rpcrdma_fmr fmr; |
223 | struct rpcrdma_frmr frmr; | 245 | struct rpcrdma_frmr frmr; |
224 | }; | 246 | }; |
247 | struct work_struct mw_work; | ||
248 | struct rpcrdma_xprt *mw_xprt; | ||
225 | struct list_head mw_list; | 249 | struct list_head mw_list; |
226 | struct list_head mw_all; | 250 | struct list_head mw_all; |
227 | }; | 251 | }; |
@@ -270,12 +294,14 @@ struct rpcrdma_req { | |||
270 | unsigned int rl_niovs; | 294 | unsigned int rl_niovs; |
271 | unsigned int rl_nchunks; | 295 | unsigned int rl_nchunks; |
272 | unsigned int rl_connect_cookie; | 296 | unsigned int rl_connect_cookie; |
297 | struct rpc_task *rl_task; | ||
273 | struct rpcrdma_buffer *rl_buffer; | 298 | struct rpcrdma_buffer *rl_buffer; |
274 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ | 299 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ |
275 | struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS]; | 300 | struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS]; |
276 | struct rpcrdma_regbuf *rl_rdmabuf; | 301 | struct rpcrdma_regbuf *rl_rdmabuf; |
277 | struct rpcrdma_regbuf *rl_sendbuf; | 302 | struct rpcrdma_regbuf *rl_sendbuf; |
278 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; | 303 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; |
304 | struct rpcrdma_mr_seg *rl_nextseg; | ||
279 | 305 | ||
280 | struct ib_cqe rl_cqe; | 306 | struct ib_cqe rl_cqe; |
281 | struct list_head rl_all; | 307 | struct list_head rl_all; |
@@ -372,8 +398,8 @@ struct rpcrdma_memreg_ops { | |||
372 | struct rpcrdma_mr_seg *, int, bool); | 398 | struct rpcrdma_mr_seg *, int, bool); |
373 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, | 399 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, |
374 | struct rpcrdma_req *); | 400 | struct rpcrdma_req *); |
375 | int (*ro_unmap)(struct rpcrdma_xprt *, | 401 | void (*ro_unmap_safe)(struct rpcrdma_xprt *, |
376 | struct rpcrdma_mr_seg *); | 402 | struct rpcrdma_req *, bool); |
377 | int (*ro_open)(struct rpcrdma_ia *, | 403 | int (*ro_open)(struct rpcrdma_ia *, |
378 | struct rpcrdma_ep *, | 404 | struct rpcrdma_ep *, |
379 | struct rpcrdma_create_data_internal *); | 405 | struct rpcrdma_create_data_internal *); |
@@ -456,7 +482,6 @@ struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, | |||
456 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, | 482 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, |
457 | struct rpcrdma_regbuf *); | 483 | struct rpcrdma_regbuf *); |
458 | 484 | ||
459 | unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); | ||
460 | int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); | 485 | int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); |
461 | 486 | ||
462 | int frwr_alloc_recovery_wq(void); | 487 | int frwr_alloc_recovery_wq(void); |
@@ -519,6 +544,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); | |||
519 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | 544 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c |
520 | */ | 545 | */ |
521 | int rpcrdma_marshal_req(struct rpc_rqst *); | 546 | int rpcrdma_marshal_req(struct rpc_rqst *); |
547 | void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *, | ||
548 | struct rpcrdma_create_data_internal *, | ||
549 | unsigned int); | ||
522 | 550 | ||
523 | /* RPC/RDMA module init - xprtrdma/transport.c | 551 | /* RPC/RDMA module init - xprtrdma/transport.c |
524 | */ | 552 | */ |
@@ -534,6 +562,7 @@ void xprt_rdma_cleanup(void); | |||
534 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | 562 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
535 | int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); | 563 | int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); |
536 | int xprt_rdma_bc_up(struct svc_serv *, struct net *); | 564 | int xprt_rdma_bc_up(struct svc_serv *, struct net *); |
565 | size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); | ||
537 | int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); | 566 | int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); |
538 | void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); | 567 | void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); |
539 | int rpcrdma_bc_marshal_reply(struct rpc_rqst *); | 568 | int rpcrdma_bc_marshal_reply(struct rpc_rqst *); |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b90c5397b5e1..2d3e0c42361e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -1364,6 +1364,11 @@ static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net) | |||
1364 | return ret; | 1364 | return ret; |
1365 | return 0; | 1365 | return 0; |
1366 | } | 1366 | } |
1367 | |||
1368 | static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt) | ||
1369 | { | ||
1370 | return PAGE_SIZE; | ||
1371 | } | ||
1367 | #else | 1372 | #else |
1368 | static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, | 1373 | static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, |
1369 | struct xdr_skb_reader *desc) | 1374 | struct xdr_skb_reader *desc) |
@@ -2661,6 +2666,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { | |||
2661 | #ifdef CONFIG_SUNRPC_BACKCHANNEL | 2666 | #ifdef CONFIG_SUNRPC_BACKCHANNEL |
2662 | .bc_setup = xprt_setup_bc, | 2667 | .bc_setup = xprt_setup_bc, |
2663 | .bc_up = xs_tcp_bc_up, | 2668 | .bc_up = xs_tcp_bc_up, |
2669 | .bc_maxpayload = xs_tcp_bc_maxpayload, | ||
2664 | .bc_free_rqst = xprt_free_bc_rqst, | 2670 | .bc_free_rqst = xprt_free_bc_rqst, |
2665 | .bc_destroy = xprt_destroy_bc, | 2671 | .bc_destroy = xprt_destroy_bc, |
2666 | #endif | 2672 | #endif |