diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-14 00:04:42 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-14 00:04:42 -0400 |
commit | 2778556474b1996aa68ae61619386b8802733bd8 (patch) | |
tree | cc136dd6d4589073b5aedb28429eb72cdd067106 | |
parent | 35a891be96f1f8e1227e6ad3ca827b8a08ce47ea (diff) | |
parent | 29ae7f9dc21a7dda41d78b27bbda7d427ece8ad4 (diff) |
Merge tag 'nfsd-4.9' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields:
"Some RDMA work and some good bugfixes, and two new features that could
benefit from user testing:
- Anna Schumacker contributed a simple NFSv4.2 COPY implementation.
COPY is already supported on the client side, so a call to
copy_file_range() on a recent client should now result in a
server-side copy that doesn't require all the data to make a round
trip to the client and back.
- Jeff Layton implemented callbacks to notify clients when contended
locks become available, which should reduce latency on workloads
with contended locks"
* tag 'nfsd-4.9' of git://linux-nfs.org/~bfields/linux:
NFSD: Implement the COPY call
nfsd: handle EUCLEAN
nfsd: only WARN once on unmapped errors
exportfs: be careful to only return expected errors.
nfsd4: setclientid_confirm with unmatched verifier should fail
nfsd: randomize SETCLIENTID reply to help distinguish servers
nfsd: set the MAY_NOTIFY_LOCK flag in OPEN replies
nfs: add a new NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK constant
nfsd: add a LRU list for blocked locks
nfsd: have nfsd4_lock use blocking locks for v4.1+ locks
nfsd: plumb in a CB_NOTIFY_LOCK operation
NFSD: fix corruption in notifier registration
svcrdma: support Remote Invalidation
svcrdma: Server-side support for rpcrdma_connect_private
rpcrdma: RDMA/CM private message data structure
svcrdma: Skip put_page() when send_reply() fails
svcrdma: Tail iovec leaves an orphaned DMA mapping
nfsd: fix dprintk in nfsd4_encode_getdeviceinfo
nfsd: eliminate cb_minorversion field
nfsd: don't set a FL_LAYOUT lease for flexfiles layouts
-rw-r--r-- | fs/exportfs/expfs.c | 10 | ||||
-rw-r--r-- | fs/nfsd/flexfilelayout.c | 1 | ||||
-rw-r--r-- | fs/nfsd/netns.h | 1 | ||||
-rw-r--r-- | fs/nfsd/nfs4callback.c | 64 | ||||
-rw-r--r-- | fs/nfsd/nfs4layouts.c | 6 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 90 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 237 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 65 | ||||
-rw-r--r-- | fs/nfsd/nfsctl.c | 2 | ||||
-rw-r--r-- | fs/nfsd/nfsproc.c | 3 | ||||
-rw-r--r-- | fs/nfsd/nfssvc.c | 18 | ||||
-rw-r--r-- | fs/nfsd/pnfs.h | 1 | ||||
-rw-r--r-- | fs/nfsd/state.h | 22 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 16 | ||||
-rw-r--r-- | fs/nfsd/vfs.h | 2 | ||||
-rw-r--r-- | fs/nfsd/xdr4.h | 23 | ||||
-rw-r--r-- | fs/nfsd/xdr4cb.h | 9 | ||||
-rw-r--r-- | include/linux/exportfs.h | 13 | ||||
-rw-r--r-- | include/linux/sunrpc/rpc_rdma.h | 35 | ||||
-rw-r--r-- | include/linux/sunrpc/svc_rdma.h | 10 | ||||
-rw-r--r-- | include/uapi/linux/nfs4.h | 5 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 2 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_sendto.c | 82 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 60 |
25 files changed, 683 insertions, 96 deletions
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 207ba8d627ca..a4b531be9168 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c | |||
@@ -428,10 +428,10 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, | |||
428 | if (!nop || !nop->fh_to_dentry) | 428 | if (!nop || !nop->fh_to_dentry) |
429 | return ERR_PTR(-ESTALE); | 429 | return ERR_PTR(-ESTALE); |
430 | result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); | 430 | result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); |
431 | if (!result) | 431 | if (PTR_ERR(result) == -ENOMEM) |
432 | result = ERR_PTR(-ESTALE); | 432 | return ERR_CAST(result); |
433 | if (IS_ERR(result)) | 433 | if (IS_ERR_OR_NULL(result)) |
434 | return result; | 434 | return ERR_PTR(-ESTALE); |
435 | 435 | ||
436 | if (d_is_dir(result)) { | 436 | if (d_is_dir(result)) { |
437 | /* | 437 | /* |
@@ -541,6 +541,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, | |||
541 | 541 | ||
542 | err_result: | 542 | err_result: |
543 | dput(result); | 543 | dput(result); |
544 | if (err != -ENOMEM) | ||
545 | err = -ESTALE; | ||
544 | return ERR_PTR(err); | 546 | return ERR_PTR(err); |
545 | } | 547 | } |
546 | EXPORT_SYMBOL_GPL(exportfs_decode_fh); | 548 | EXPORT_SYMBOL_GPL(exportfs_decode_fh); |
diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c index df880e9fa71f..b67287383010 100644 --- a/fs/nfsd/flexfilelayout.c +++ b/fs/nfsd/flexfilelayout.c | |||
@@ -126,6 +126,7 @@ nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp, | |||
126 | const struct nfsd4_layout_ops ff_layout_ops = { | 126 | const struct nfsd4_layout_ops ff_layout_ops = { |
127 | .notify_types = | 127 | .notify_types = |
128 | NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, | 128 | NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, |
129 | .disable_recalls = true, | ||
129 | .proc_getdeviceinfo = nfsd4_ff_proc_getdeviceinfo, | 130 | .proc_getdeviceinfo = nfsd4_ff_proc_getdeviceinfo, |
130 | .encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo, | 131 | .encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo, |
131 | .proc_layoutget = nfsd4_ff_proc_layoutget, | 132 | .proc_layoutget = nfsd4_ff_proc_layoutget, |
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 5fbf3bbd00d0..b10d557f9c9e 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h | |||
@@ -84,6 +84,7 @@ struct nfsd_net { | |||
84 | struct list_head client_lru; | 84 | struct list_head client_lru; |
85 | struct list_head close_lru; | 85 | struct list_head close_lru; |
86 | struct list_head del_recall_lru; | 86 | struct list_head del_recall_lru; |
87 | struct list_head blocked_locks_lru; | ||
87 | 88 | ||
88 | struct delayed_work laundromat_work; | 89 | struct delayed_work laundromat_work; |
89 | 90 | ||
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 04c68d900324..211dc2aed8e1 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -448,7 +448,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr, | |||
448 | { | 448 | { |
449 | int status; | 449 | int status; |
450 | 450 | ||
451 | if (cb->cb_minorversion == 0) | 451 | if (cb->cb_clp->cl_minorversion == 0) |
452 | return 0; | 452 | return 0; |
453 | 453 | ||
454 | status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_seq_status); | 454 | status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_seq_status); |
@@ -485,7 +485,7 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
485 | const struct nfs4_delegation *dp = cb_to_delegation(cb); | 485 | const struct nfs4_delegation *dp = cb_to_delegation(cb); |
486 | struct nfs4_cb_compound_hdr hdr = { | 486 | struct nfs4_cb_compound_hdr hdr = { |
487 | .ident = cb->cb_clp->cl_cb_ident, | 487 | .ident = cb->cb_clp->cl_cb_ident, |
488 | .minorversion = cb->cb_minorversion, | 488 | .minorversion = cb->cb_clp->cl_minorversion, |
489 | }; | 489 | }; |
490 | 490 | ||
491 | encode_cb_compound4args(xdr, &hdr); | 491 | encode_cb_compound4args(xdr, &hdr); |
@@ -594,7 +594,7 @@ static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req, | |||
594 | container_of(cb, struct nfs4_layout_stateid, ls_recall); | 594 | container_of(cb, struct nfs4_layout_stateid, ls_recall); |
595 | struct nfs4_cb_compound_hdr hdr = { | 595 | struct nfs4_cb_compound_hdr hdr = { |
596 | .ident = 0, | 596 | .ident = 0, |
597 | .minorversion = cb->cb_minorversion, | 597 | .minorversion = cb->cb_clp->cl_minorversion, |
598 | }; | 598 | }; |
599 | 599 | ||
600 | encode_cb_compound4args(xdr, &hdr); | 600 | encode_cb_compound4args(xdr, &hdr); |
@@ -623,6 +623,62 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, | |||
623 | } | 623 | } |
624 | #endif /* CONFIG_NFSD_PNFS */ | 624 | #endif /* CONFIG_NFSD_PNFS */ |
625 | 625 | ||
626 | static void encode_stateowner(struct xdr_stream *xdr, struct nfs4_stateowner *so) | ||
627 | { | ||
628 | __be32 *p; | ||
629 | |||
630 | p = xdr_reserve_space(xdr, 8 + 4 + so->so_owner.len); | ||
631 | p = xdr_encode_opaque_fixed(p, &so->so_client->cl_clientid, 8); | ||
632 | xdr_encode_opaque(p, so->so_owner.data, so->so_owner.len); | ||
633 | } | ||
634 | |||
635 | static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req, | ||
636 | struct xdr_stream *xdr, | ||
637 | const struct nfsd4_callback *cb) | ||
638 | { | ||
639 | const struct nfsd4_blocked_lock *nbl = | ||
640 | container_of(cb, struct nfsd4_blocked_lock, nbl_cb); | ||
641 | struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.fl_owner; | ||
642 | struct nfs4_cb_compound_hdr hdr = { | ||
643 | .ident = 0, | ||
644 | .minorversion = cb->cb_clp->cl_minorversion, | ||
645 | }; | ||
646 | |||
647 | __be32 *p; | ||
648 | |||
649 | BUG_ON(hdr.minorversion == 0); | ||
650 | |||
651 | encode_cb_compound4args(xdr, &hdr); | ||
652 | encode_cb_sequence4args(xdr, cb, &hdr); | ||
653 | |||
654 | p = xdr_reserve_space(xdr, 4); | ||
655 | *p = cpu_to_be32(OP_CB_NOTIFY_LOCK); | ||
656 | encode_nfs_fh4(xdr, &nbl->nbl_fh); | ||
657 | encode_stateowner(xdr, &lo->lo_owner); | ||
658 | hdr.nops++; | ||
659 | |||
660 | encode_cb_nops(&hdr); | ||
661 | } | ||
662 | |||
663 | static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp, | ||
664 | struct xdr_stream *xdr, | ||
665 | struct nfsd4_callback *cb) | ||
666 | { | ||
667 | struct nfs4_cb_compound_hdr hdr; | ||
668 | int status; | ||
669 | |||
670 | status = decode_cb_compound4res(xdr, &hdr); | ||
671 | if (unlikely(status)) | ||
672 | return status; | ||
673 | |||
674 | if (cb) { | ||
675 | status = decode_cb_sequence4res(xdr, cb); | ||
676 | if (unlikely(status || cb->cb_seq_status)) | ||
677 | return status; | ||
678 | } | ||
679 | return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status); | ||
680 | } | ||
681 | |||
626 | /* | 682 | /* |
627 | * RPC procedure tables | 683 | * RPC procedure tables |
628 | */ | 684 | */ |
@@ -643,6 +699,7 @@ static struct rpc_procinfo nfs4_cb_procedures[] = { | |||
643 | #ifdef CONFIG_NFSD_PNFS | 699 | #ifdef CONFIG_NFSD_PNFS |
644 | PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout), | 700 | PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout), |
645 | #endif | 701 | #endif |
702 | PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), | ||
646 | }; | 703 | }; |
647 | 704 | ||
648 | static struct rpc_version nfs_cb_version4 = { | 705 | static struct rpc_version nfs_cb_version4 = { |
@@ -862,7 +919,6 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) | |||
862 | struct nfs4_client *clp = cb->cb_clp; | 919 | struct nfs4_client *clp = cb->cb_clp; |
863 | u32 minorversion = clp->cl_minorversion; | 920 | u32 minorversion = clp->cl_minorversion; |
864 | 921 | ||
865 | cb->cb_minorversion = minorversion; | ||
866 | /* | 922 | /* |
867 | * cb_seq_status is only set in decode_cb_sequence4res, | 923 | * cb_seq_status is only set in decode_cb_sequence4res, |
868 | * and so will remain 1 if an rpc level failure occurs. | 924 | * and so will remain 1 if an rpc level failure occurs. |
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 2be9602b0221..42aace4fc4c8 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c | |||
@@ -174,7 +174,8 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid) | |||
174 | list_del_init(&ls->ls_perfile); | 174 | list_del_init(&ls->ls_perfile); |
175 | spin_unlock(&fp->fi_lock); | 175 | spin_unlock(&fp->fi_lock); |
176 | 176 | ||
177 | vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls); | 177 | if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls) |
178 | vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls); | ||
178 | fput(ls->ls_file); | 179 | fput(ls->ls_file); |
179 | 180 | ||
180 | if (ls->ls_recalled) | 181 | if (ls->ls_recalled) |
@@ -189,6 +190,9 @@ nfsd4_layout_setlease(struct nfs4_layout_stateid *ls) | |||
189 | struct file_lock *fl; | 190 | struct file_lock *fl; |
190 | int status; | 191 | int status; |
191 | 192 | ||
193 | if (nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls) | ||
194 | return 0; | ||
195 | |||
192 | fl = locks_alloc_lock(); | 196 | fl = locks_alloc_lock(); |
193 | if (!fl) | 197 | if (!fl) |
194 | return -ENOMEM; | 198 | return -ENOMEM; |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 1fb222752b2b..abb09b580389 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -1010,47 +1010,97 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1010 | } | 1010 | } |
1011 | 1011 | ||
1012 | static __be32 | 1012 | static __be32 |
1013 | nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 1013 | nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
1014 | struct nfsd4_clone *clone) | 1014 | stateid_t *src_stateid, struct file **src, |
1015 | stateid_t *dst_stateid, struct file **dst) | ||
1015 | { | 1016 | { |
1016 | struct file *src, *dst; | ||
1017 | __be32 status; | 1017 | __be32 status; |
1018 | 1018 | ||
1019 | status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, | 1019 | status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, |
1020 | &clone->cl_src_stateid, RD_STATE, | 1020 | src_stateid, RD_STATE, src, NULL); |
1021 | &src, NULL); | ||
1022 | if (status) { | 1021 | if (status) { |
1023 | dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); | 1022 | dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); |
1024 | goto out; | 1023 | goto out; |
1025 | } | 1024 | } |
1026 | 1025 | ||
1027 | status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, | 1026 | status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, |
1028 | &clone->cl_dst_stateid, WR_STATE, | 1027 | dst_stateid, WR_STATE, dst, NULL); |
1029 | &dst, NULL); | ||
1030 | if (status) { | 1028 | if (status) { |
1031 | dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); | 1029 | dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); |
1032 | goto out_put_src; | 1030 | goto out_put_src; |
1033 | } | 1031 | } |
1034 | 1032 | ||
1035 | /* fix up for NFS-specific error code */ | 1033 | /* fix up for NFS-specific error code */ |
1036 | if (!S_ISREG(file_inode(src)->i_mode) || | 1034 | if (!S_ISREG(file_inode(*src)->i_mode) || |
1037 | !S_ISREG(file_inode(dst)->i_mode)) { | 1035 | !S_ISREG(file_inode(*dst)->i_mode)) { |
1038 | status = nfserr_wrong_type; | 1036 | status = nfserr_wrong_type; |
1039 | goto out_put_dst; | 1037 | goto out_put_dst; |
1040 | } | 1038 | } |
1041 | 1039 | ||
1040 | out: | ||
1041 | return status; | ||
1042 | out_put_dst: | ||
1043 | fput(*dst); | ||
1044 | out_put_src: | ||
1045 | fput(*src); | ||
1046 | goto out; | ||
1047 | } | ||
1048 | |||
1049 | static __be32 | ||
1050 | nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | ||
1051 | struct nfsd4_clone *clone) | ||
1052 | { | ||
1053 | struct file *src, *dst; | ||
1054 | __be32 status; | ||
1055 | |||
1056 | status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src, | ||
1057 | &clone->cl_dst_stateid, &dst); | ||
1058 | if (status) | ||
1059 | goto out; | ||
1060 | |||
1042 | status = nfsd4_clone_file_range(src, clone->cl_src_pos, | 1061 | status = nfsd4_clone_file_range(src, clone->cl_src_pos, |
1043 | dst, clone->cl_dst_pos, clone->cl_count); | 1062 | dst, clone->cl_dst_pos, clone->cl_count); |
1044 | 1063 | ||
1045 | out_put_dst: | ||
1046 | fput(dst); | 1064 | fput(dst); |
1047 | out_put_src: | ||
1048 | fput(src); | 1065 | fput(src); |
1049 | out: | 1066 | out: |
1050 | return status; | 1067 | return status; |
1051 | } | 1068 | } |
1052 | 1069 | ||
1053 | static __be32 | 1070 | static __be32 |
1071 | nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | ||
1072 | struct nfsd4_copy *copy) | ||
1073 | { | ||
1074 | struct file *src, *dst; | ||
1075 | __be32 status; | ||
1076 | ssize_t bytes; | ||
1077 | |||
1078 | status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, &src, | ||
1079 | ©->cp_dst_stateid, &dst); | ||
1080 | if (status) | ||
1081 | goto out; | ||
1082 | |||
1083 | bytes = nfsd_copy_file_range(src, copy->cp_src_pos, | ||
1084 | dst, copy->cp_dst_pos, copy->cp_count); | ||
1085 | |||
1086 | if (bytes < 0) | ||
1087 | status = nfserrno(bytes); | ||
1088 | else { | ||
1089 | copy->cp_res.wr_bytes_written = bytes; | ||
1090 | copy->cp_res.wr_stable_how = NFS_UNSTABLE; | ||
1091 | copy->cp_consecutive = 1; | ||
1092 | copy->cp_synchronous = 1; | ||
1093 | gen_boot_verifier(©->cp_res.wr_verifier, SVC_NET(rqstp)); | ||
1094 | status = nfs_ok; | ||
1095 | } | ||
1096 | |||
1097 | fput(src); | ||
1098 | fput(dst); | ||
1099 | out: | ||
1100 | return status; | ||
1101 | } | ||
1102 | |||
1103 | static __be32 | ||
1054 | nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 1104 | nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
1055 | struct nfsd4_fallocate *fallocate, int flags) | 1105 | struct nfsd4_fallocate *fallocate, int flags) |
1056 | { | 1106 | { |
@@ -1966,6 +2016,18 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd | |||
1966 | op_encode_channel_attrs_maxsz) * sizeof(__be32); | 2016 | op_encode_channel_attrs_maxsz) * sizeof(__be32); |
1967 | } | 2017 | } |
1968 | 2018 | ||
2019 | static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
2020 | { | ||
2021 | return (op_encode_hdr_size + | ||
2022 | 1 /* wr_callback */ + | ||
2023 | op_encode_stateid_maxsz /* wr_callback */ + | ||
2024 | 2 /* wr_count */ + | ||
2025 | 1 /* wr_committed */ + | ||
2026 | op_encode_verifier_maxsz + | ||
2027 | 1 /* cr_consecutive */ + | ||
2028 | 1 /* cr_synchronous */) * sizeof(__be32); | ||
2029 | } | ||
2030 | |||
1969 | #ifdef CONFIG_NFSD_PNFS | 2031 | #ifdef CONFIG_NFSD_PNFS |
1970 | /* | 2032 | /* |
1971 | * At this stage we don't really know what layout driver will handle the request, | 2033 | * At this stage we don't really know what layout driver will handle the request, |
@@ -2328,6 +2390,12 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
2328 | .op_name = "OP_CLONE", | 2390 | .op_name = "OP_CLONE", |
2329 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | 2391 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, |
2330 | }, | 2392 | }, |
2393 | [OP_COPY] = { | ||
2394 | .op_func = (nfsd4op_func)nfsd4_copy, | ||
2395 | .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, | ||
2396 | .op_name = "OP_COPY", | ||
2397 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_copy_rsize, | ||
2398 | }, | ||
2331 | [OP_SEEK] = { | 2399 | [OP_SEEK] = { |
2332 | .op_func = (nfsd4op_func)nfsd4_seek, | 2400 | .op_func = (nfsd4op_func)nfsd4_seek, |
2333 | .op_name = "OP_SEEK", | 2401 | .op_name = "OP_SEEK", |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 39bfaba9c99c..9752beb78659 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -99,6 +99,7 @@ static struct kmem_cache *odstate_slab; | |||
99 | static void free_session(struct nfsd4_session *); | 99 | static void free_session(struct nfsd4_session *); |
100 | 100 | ||
101 | static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; | 101 | static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; |
102 | static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops; | ||
102 | 103 | ||
103 | static bool is_session_dead(struct nfsd4_session *ses) | 104 | static bool is_session_dead(struct nfsd4_session *ses) |
104 | { | 105 | { |
@@ -210,6 +211,85 @@ static void nfsd4_put_session(struct nfsd4_session *ses) | |||
210 | spin_unlock(&nn->client_lock); | 211 | spin_unlock(&nn->client_lock); |
211 | } | 212 | } |
212 | 213 | ||
214 | static struct nfsd4_blocked_lock * | ||
215 | find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, | ||
216 | struct nfsd_net *nn) | ||
217 | { | ||
218 | struct nfsd4_blocked_lock *cur, *found = NULL; | ||
219 | |||
220 | spin_lock(&nn->client_lock); | ||
221 | list_for_each_entry(cur, &lo->lo_blocked, nbl_list) { | ||
222 | if (fh_match(fh, &cur->nbl_fh)) { | ||
223 | list_del_init(&cur->nbl_list); | ||
224 | list_del_init(&cur->nbl_lru); | ||
225 | found = cur; | ||
226 | break; | ||
227 | } | ||
228 | } | ||
229 | spin_unlock(&nn->client_lock); | ||
230 | if (found) | ||
231 | posix_unblock_lock(&found->nbl_lock); | ||
232 | return found; | ||
233 | } | ||
234 | |||
235 | static struct nfsd4_blocked_lock * | ||
236 | find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, | ||
237 | struct nfsd_net *nn) | ||
238 | { | ||
239 | struct nfsd4_blocked_lock *nbl; | ||
240 | |||
241 | nbl = find_blocked_lock(lo, fh, nn); | ||
242 | if (!nbl) { | ||
243 | nbl= kmalloc(sizeof(*nbl), GFP_KERNEL); | ||
244 | if (nbl) { | ||
245 | fh_copy_shallow(&nbl->nbl_fh, fh); | ||
246 | locks_init_lock(&nbl->nbl_lock); | ||
247 | nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, | ||
248 | &nfsd4_cb_notify_lock_ops, | ||
249 | NFSPROC4_CLNT_CB_NOTIFY_LOCK); | ||
250 | } | ||
251 | } | ||
252 | return nbl; | ||
253 | } | ||
254 | |||
255 | static void | ||
256 | free_blocked_lock(struct nfsd4_blocked_lock *nbl) | ||
257 | { | ||
258 | locks_release_private(&nbl->nbl_lock); | ||
259 | kfree(nbl); | ||
260 | } | ||
261 | |||
262 | static int | ||
263 | nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) | ||
264 | { | ||
265 | /* | ||
266 | * Since this is just an optimization, we don't try very hard if it | ||
267 | * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and | ||
268 | * just quit trying on anything else. | ||
269 | */ | ||
270 | switch (task->tk_status) { | ||
271 | case -NFS4ERR_DELAY: | ||
272 | rpc_delay(task, 1 * HZ); | ||
273 | return 0; | ||
274 | default: | ||
275 | return 1; | ||
276 | } | ||
277 | } | ||
278 | |||
279 | static void | ||
280 | nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb) | ||
281 | { | ||
282 | struct nfsd4_blocked_lock *nbl = container_of(cb, | ||
283 | struct nfsd4_blocked_lock, nbl_cb); | ||
284 | |||
285 | free_blocked_lock(nbl); | ||
286 | } | ||
287 | |||
288 | static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { | ||
289 | .done = nfsd4_cb_notify_lock_done, | ||
290 | .release = nfsd4_cb_notify_lock_release, | ||
291 | }; | ||
292 | |||
213 | static inline struct nfs4_stateowner * | 293 | static inline struct nfs4_stateowner * |
214 | nfs4_get_stateowner(struct nfs4_stateowner *sop) | 294 | nfs4_get_stateowner(struct nfs4_stateowner *sop) |
215 | { | 295 | { |
@@ -3224,9 +3304,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
3224 | goto out; | 3304 | goto out; |
3225 | /* cases below refer to rfc 3530 section 14.2.34: */ | 3305 | /* cases below refer to rfc 3530 section 14.2.34: */ |
3226 | if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) { | 3306 | if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) { |
3227 | if (conf && !unconf) /* case 2: probable retransmit */ | 3307 | if (conf && same_verf(&confirm, &conf->cl_confirm)) { |
3308 | /* case 2: probable retransmit */ | ||
3228 | status = nfs_ok; | 3309 | status = nfs_ok; |
3229 | else /* case 4: client hasn't noticed we rebooted yet? */ | 3310 | } else /* case 4: client hasn't noticed we rebooted yet? */ |
3230 | status = nfserr_stale_clientid; | 3311 | status = nfserr_stale_clientid; |
3231 | goto out; | 3312 | goto out; |
3232 | } | 3313 | } |
@@ -4410,9 +4491,11 @@ out: | |||
4410 | * To finish the open response, we just need to set the rflags. | 4491 | * To finish the open response, we just need to set the rflags. |
4411 | */ | 4492 | */ |
4412 | open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; | 4493 | open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; |
4413 | if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) && | 4494 | if (nfsd4_has_session(&resp->cstate)) |
4414 | !nfsd4_has_session(&resp->cstate)) | 4495 | open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK; |
4496 | else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED)) | ||
4415 | open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; | 4497 | open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; |
4498 | |||
4416 | if (dp) | 4499 | if (dp) |
4417 | nfs4_put_stid(&dp->dl_stid); | 4500 | nfs4_put_stid(&dp->dl_stid); |
4418 | if (stp) | 4501 | if (stp) |
@@ -4501,6 +4584,7 @@ nfs4_laundromat(struct nfsd_net *nn) | |||
4501 | struct nfs4_openowner *oo; | 4584 | struct nfs4_openowner *oo; |
4502 | struct nfs4_delegation *dp; | 4585 | struct nfs4_delegation *dp; |
4503 | struct nfs4_ol_stateid *stp; | 4586 | struct nfs4_ol_stateid *stp; |
4587 | struct nfsd4_blocked_lock *nbl; | ||
4504 | struct list_head *pos, *next, reaplist; | 4588 | struct list_head *pos, *next, reaplist; |
4505 | time_t cutoff = get_seconds() - nn->nfsd4_lease; | 4589 | time_t cutoff = get_seconds() - nn->nfsd4_lease; |
4506 | time_t t, new_timeo = nn->nfsd4_lease; | 4590 | time_t t, new_timeo = nn->nfsd4_lease; |
@@ -4569,6 +4653,41 @@ nfs4_laundromat(struct nfsd_net *nn) | |||
4569 | } | 4653 | } |
4570 | spin_unlock(&nn->client_lock); | 4654 | spin_unlock(&nn->client_lock); |
4571 | 4655 | ||
4656 | /* | ||
4657 | * It's possible for a client to try and acquire an already held lock | ||
4658 | * that is being held for a long time, and then lose interest in it. | ||
4659 | * So, we clean out any un-revisited request after a lease period | ||
4660 | * under the assumption that the client is no longer interested. | ||
4661 | * | ||
4662 | * RFC5661, sec. 9.6 states that the client must not rely on getting | ||
4663 | * notifications and must continue to poll for locks, even when the | ||
4664 | * server supports them. Thus this shouldn't lead to clients blocking | ||
4665 | * indefinitely once the lock does become free. | ||
4666 | */ | ||
4667 | BUG_ON(!list_empty(&reaplist)); | ||
4668 | spin_lock(&nn->client_lock); | ||
4669 | while (!list_empty(&nn->blocked_locks_lru)) { | ||
4670 | nbl = list_first_entry(&nn->blocked_locks_lru, | ||
4671 | struct nfsd4_blocked_lock, nbl_lru); | ||
4672 | if (time_after((unsigned long)nbl->nbl_time, | ||
4673 | (unsigned long)cutoff)) { | ||
4674 | t = nbl->nbl_time - cutoff; | ||
4675 | new_timeo = min(new_timeo, t); | ||
4676 | break; | ||
4677 | } | ||
4678 | list_move(&nbl->nbl_lru, &reaplist); | ||
4679 | list_del_init(&nbl->nbl_list); | ||
4680 | } | ||
4681 | spin_unlock(&nn->client_lock); | ||
4682 | |||
4683 | while (!list_empty(&reaplist)) { | ||
4684 | nbl = list_first_entry(&nn->blocked_locks_lru, | ||
4685 | struct nfsd4_blocked_lock, nbl_lru); | ||
4686 | list_del_init(&nbl->nbl_lru); | ||
4687 | posix_unblock_lock(&nbl->nbl_lock); | ||
4688 | free_blocked_lock(nbl); | ||
4689 | } | ||
4690 | |||
4572 | new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); | 4691 | new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); |
4573 | return new_timeo; | 4692 | return new_timeo; |
4574 | } | 4693 | } |
@@ -5309,7 +5428,31 @@ nfsd4_fl_put_owner(fl_owner_t owner) | |||
5309 | nfs4_put_stateowner(&lo->lo_owner); | 5428 | nfs4_put_stateowner(&lo->lo_owner); |
5310 | } | 5429 | } |
5311 | 5430 | ||
5431 | static void | ||
5432 | nfsd4_lm_notify(struct file_lock *fl) | ||
5433 | { | ||
5434 | struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner; | ||
5435 | struct net *net = lo->lo_owner.so_client->net; | ||
5436 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); | ||
5437 | struct nfsd4_blocked_lock *nbl = container_of(fl, | ||
5438 | struct nfsd4_blocked_lock, nbl_lock); | ||
5439 | bool queue = false; | ||
5440 | |||
5441 | /* An empty list means that something else is going to be using it */ | ||
5442 | spin_lock(&nn->client_lock); | ||
5443 | if (!list_empty(&nbl->nbl_list)) { | ||
5444 | list_del_init(&nbl->nbl_list); | ||
5445 | list_del_init(&nbl->nbl_lru); | ||
5446 | queue = true; | ||
5447 | } | ||
5448 | spin_unlock(&nn->client_lock); | ||
5449 | |||
5450 | if (queue) | ||
5451 | nfsd4_run_cb(&nbl->nbl_cb); | ||
5452 | } | ||
5453 | |||
5312 | static const struct lock_manager_operations nfsd_posix_mng_ops = { | 5454 | static const struct lock_manager_operations nfsd_posix_mng_ops = { |
5455 | .lm_notify = nfsd4_lm_notify, | ||
5313 | .lm_get_owner = nfsd4_fl_get_owner, | 5456 | .lm_get_owner = nfsd4_fl_get_owner, |
5314 | .lm_put_owner = nfsd4_fl_put_owner, | 5457 | .lm_put_owner = nfsd4_fl_put_owner, |
5315 | }; | 5458 | }; |
@@ -5407,6 +5550,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, | |||
5407 | lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); | 5550 | lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); |
5408 | if (!lo) | 5551 | if (!lo) |
5409 | return NULL; | 5552 | return NULL; |
5553 | INIT_LIST_HEAD(&lo->lo_blocked); | ||
5410 | INIT_LIST_HEAD(&lo->lo_owner.so_stateids); | 5554 | INIT_LIST_HEAD(&lo->lo_owner.so_stateids); |
5411 | lo->lo_owner.so_is_open_owner = 0; | 5555 | lo->lo_owner.so_is_open_owner = 0; |
5412 | lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; | 5556 | lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; |
@@ -5588,12 +5732,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
5588 | struct nfs4_ol_stateid *open_stp = NULL; | 5732 | struct nfs4_ol_stateid *open_stp = NULL; |
5589 | struct nfs4_file *fp; | 5733 | struct nfs4_file *fp; |
5590 | struct file *filp = NULL; | 5734 | struct file *filp = NULL; |
5735 | struct nfsd4_blocked_lock *nbl = NULL; | ||
5591 | struct file_lock *file_lock = NULL; | 5736 | struct file_lock *file_lock = NULL; |
5592 | struct file_lock *conflock = NULL; | 5737 | struct file_lock *conflock = NULL; |
5593 | __be32 status = 0; | 5738 | __be32 status = 0; |
5594 | int lkflg; | 5739 | int lkflg; |
5595 | int err; | 5740 | int err; |
5596 | bool new = false; | 5741 | bool new = false; |
5742 | unsigned char fl_type; | ||
5743 | unsigned int fl_flags = FL_POSIX; | ||
5597 | struct net *net = SVC_NET(rqstp); | 5744 | struct net *net = SVC_NET(rqstp); |
5598 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); | 5745 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); |
5599 | 5746 | ||
@@ -5658,46 +5805,55 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
5658 | if (!locks_in_grace(net) && lock->lk_reclaim) | 5805 | if (!locks_in_grace(net) && lock->lk_reclaim) |
5659 | goto out; | 5806 | goto out; |
5660 | 5807 | ||
5661 | file_lock = locks_alloc_lock(); | ||
5662 | if (!file_lock) { | ||
5663 | dprintk("NFSD: %s: unable to allocate lock!\n", __func__); | ||
5664 | status = nfserr_jukebox; | ||
5665 | goto out; | ||
5666 | } | ||
5667 | |||
5668 | fp = lock_stp->st_stid.sc_file; | 5808 | fp = lock_stp->st_stid.sc_file; |
5669 | switch (lock->lk_type) { | 5809 | switch (lock->lk_type) { |
5670 | case NFS4_READ_LT: | ||
5671 | case NFS4_READW_LT: | 5810 | case NFS4_READW_LT: |
5811 | if (nfsd4_has_session(cstate)) | ||
5812 | fl_flags |= FL_SLEEP; | ||
5813 | /* Fallthrough */ | ||
5814 | case NFS4_READ_LT: | ||
5672 | spin_lock(&fp->fi_lock); | 5815 | spin_lock(&fp->fi_lock); |
5673 | filp = find_readable_file_locked(fp); | 5816 | filp = find_readable_file_locked(fp); |
5674 | if (filp) | 5817 | if (filp) |
5675 | get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); | 5818 | get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); |
5676 | spin_unlock(&fp->fi_lock); | 5819 | spin_unlock(&fp->fi_lock); |
5677 | file_lock->fl_type = F_RDLCK; | 5820 | fl_type = F_RDLCK; |
5678 | break; | 5821 | break; |
5679 | case NFS4_WRITE_LT: | ||
5680 | case NFS4_WRITEW_LT: | 5822 | case NFS4_WRITEW_LT: |
5823 | if (nfsd4_has_session(cstate)) | ||
5824 | fl_flags |= FL_SLEEP; | ||
5825 | /* Fallthrough */ | ||
5826 | case NFS4_WRITE_LT: | ||
5681 | spin_lock(&fp->fi_lock); | 5827 | spin_lock(&fp->fi_lock); |
5682 | filp = find_writeable_file_locked(fp); | 5828 | filp = find_writeable_file_locked(fp); |
5683 | if (filp) | 5829 | if (filp) |
5684 | get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); | 5830 | get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); |
5685 | spin_unlock(&fp->fi_lock); | 5831 | spin_unlock(&fp->fi_lock); |
5686 | file_lock->fl_type = F_WRLCK; | 5832 | fl_type = F_WRLCK; |
5687 | break; | 5833 | break; |
5688 | default: | 5834 | default: |
5689 | status = nfserr_inval; | 5835 | status = nfserr_inval; |
5690 | goto out; | 5836 | goto out; |
5691 | } | 5837 | } |
5838 | |||
5692 | if (!filp) { | 5839 | if (!filp) { |
5693 | status = nfserr_openmode; | 5840 | status = nfserr_openmode; |
5694 | goto out; | 5841 | goto out; |
5695 | } | 5842 | } |
5696 | 5843 | ||
5844 | nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); | ||
5845 | if (!nbl) { | ||
5846 | dprintk("NFSD: %s: unable to allocate block!\n", __func__); | ||
5847 | status = nfserr_jukebox; | ||
5848 | goto out; | ||
5849 | } | ||
5850 | |||
5851 | file_lock = &nbl->nbl_lock; | ||
5852 | file_lock->fl_type = fl_type; | ||
5697 | file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner)); | 5853 | file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner)); |
5698 | file_lock->fl_pid = current->tgid; | 5854 | file_lock->fl_pid = current->tgid; |
5699 | file_lock->fl_file = filp; | 5855 | file_lock->fl_file = filp; |
5700 | file_lock->fl_flags = FL_POSIX; | 5856 | file_lock->fl_flags = fl_flags; |
5701 | file_lock->fl_lmops = &nfsd_posix_mng_ops; | 5857 | file_lock->fl_lmops = &nfsd_posix_mng_ops; |
5702 | file_lock->fl_start = lock->lk_offset; | 5858 | file_lock->fl_start = lock->lk_offset; |
5703 | file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length); | 5859 | file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length); |
@@ -5710,18 +5866,29 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
5710 | goto out; | 5866 | goto out; |
5711 | } | 5867 | } |
5712 | 5868 | ||
5869 | if (fl_flags & FL_SLEEP) { | ||
5870 | nbl->nbl_time = jiffies; | ||
5871 | spin_lock(&nn->client_lock); | ||
5872 | list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); | ||
5873 | list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); | ||
5874 | spin_unlock(&nn->client_lock); | ||
5875 | } | ||
5876 | |||
5713 | err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); | 5877 | err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); |
5714 | switch (-err) { | 5878 | switch (err) { |
5715 | case 0: /* success! */ | 5879 | case 0: /* success! */ |
5716 | nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); | 5880 | nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); |
5717 | status = 0; | 5881 | status = 0; |
5718 | break; | 5882 | break; |
5719 | case (EAGAIN): /* conflock holds conflicting lock */ | 5883 | case FILE_LOCK_DEFERRED: |
5884 | nbl = NULL; | ||
5885 | /* Fallthrough */ | ||
5886 | case -EAGAIN: /* conflock holds conflicting lock */ | ||
5720 | status = nfserr_denied; | 5887 | status = nfserr_denied; |
5721 | dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); | 5888 | dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); |
5722 | nfs4_set_lock_denied(conflock, &lock->lk_denied); | 5889 | nfs4_set_lock_denied(conflock, &lock->lk_denied); |
5723 | break; | 5890 | break; |
5724 | case (EDEADLK): | 5891 | case -EDEADLK: |
5725 | status = nfserr_deadlock; | 5892 | status = nfserr_deadlock; |
5726 | break; | 5893 | break; |
5727 | default: | 5894 | default: |
@@ -5730,6 +5897,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
5730 | break; | 5897 | break; |
5731 | } | 5898 | } |
5732 | out: | 5899 | out: |
5900 | if (nbl) { | ||
5901 | /* dequeue it if we queued it before */ | ||
5902 | if (fl_flags & FL_SLEEP) { | ||
5903 | spin_lock(&nn->client_lock); | ||
5904 | list_del_init(&nbl->nbl_list); | ||
5905 | list_del_init(&nbl->nbl_lru); | ||
5906 | spin_unlock(&nn->client_lock); | ||
5907 | } | ||
5908 | free_blocked_lock(nbl); | ||
5909 | } | ||
5733 | if (filp) | 5910 | if (filp) |
5734 | fput(filp); | 5911 | fput(filp); |
5735 | if (lock_stp) { | 5912 | if (lock_stp) { |
@@ -5753,8 +5930,6 @@ out: | |||
5753 | if (open_stp) | 5930 | if (open_stp) |
5754 | nfs4_put_stid(&open_stp->st_stid); | 5931 | nfs4_put_stid(&open_stp->st_stid); |
5755 | nfsd4_bump_seqid(cstate, status); | 5932 | nfsd4_bump_seqid(cstate, status); |
5756 | if (file_lock) | ||
5757 | locks_free_lock(file_lock); | ||
5758 | if (conflock) | 5933 | if (conflock) |
5759 | locks_free_lock(conflock); | 5934 | locks_free_lock(conflock); |
5760 | return status; | 5935 | return status; |
@@ -6768,6 +6943,7 @@ static int nfs4_state_create_net(struct net *net) | |||
6768 | INIT_LIST_HEAD(&nn->client_lru); | 6943 | INIT_LIST_HEAD(&nn->client_lru); |
6769 | INIT_LIST_HEAD(&nn->close_lru); | 6944 | INIT_LIST_HEAD(&nn->close_lru); |
6770 | INIT_LIST_HEAD(&nn->del_recall_lru); | 6945 | INIT_LIST_HEAD(&nn->del_recall_lru); |
6946 | INIT_LIST_HEAD(&nn->blocked_locks_lru); | ||
6771 | spin_lock_init(&nn->client_lock); | 6947 | spin_lock_init(&nn->client_lock); |
6772 | 6948 | ||
6773 | INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); | 6949 | INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); |
@@ -6865,6 +7041,7 @@ nfs4_state_shutdown_net(struct net *net) | |||
6865 | struct nfs4_delegation *dp = NULL; | 7041 | struct nfs4_delegation *dp = NULL; |
6866 | struct list_head *pos, *next, reaplist; | 7042 | struct list_head *pos, *next, reaplist; |
6867 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); | 7043 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); |
7044 | struct nfsd4_blocked_lock *nbl; | ||
6868 | 7045 | ||
6869 | cancel_delayed_work_sync(&nn->laundromat_work); | 7046 | cancel_delayed_work_sync(&nn->laundromat_work); |
6870 | locks_end_grace(&nn->nfsd4_manager); | 7047 | locks_end_grace(&nn->nfsd4_manager); |
@@ -6885,6 +7062,24 @@ nfs4_state_shutdown_net(struct net *net) | |||
6885 | nfs4_put_stid(&dp->dl_stid); | 7062 | nfs4_put_stid(&dp->dl_stid); |
6886 | } | 7063 | } |
6887 | 7064 | ||
7065 | BUG_ON(!list_empty(&reaplist)); | ||
7066 | spin_lock(&nn->client_lock); | ||
7067 | while (!list_empty(&nn->blocked_locks_lru)) { | ||
7068 | nbl = list_first_entry(&nn->blocked_locks_lru, | ||
7069 | struct nfsd4_blocked_lock, nbl_lru); | ||
7070 | list_move(&nbl->nbl_lru, &reaplist); | ||
7071 | list_del_init(&nbl->nbl_list); | ||
7072 | } | ||
7073 | spin_unlock(&nn->client_lock); | ||
7074 | |||
7075 | while (!list_empty(&reaplist)) { | ||
7076 | nbl = list_first_entry(&nn->blocked_locks_lru, | ||
7077 | struct nfsd4_blocked_lock, nbl_lru); | ||
7078 | list_del_init(&nbl->nbl_lru); | ||
7079 | posix_unblock_lock(&nbl->nbl_lock); | ||
7080 | free_blocked_lock(nbl); | ||
7081 | } | ||
7082 | |||
6888 | nfsd4_client_tracking_exit(net); | 7083 | nfsd4_client_tracking_exit(net); |
6889 | nfs4_state_destroy_net(net); | 7084 | nfs4_state_destroy_net(net); |
6890 | } | 7085 | } |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0aa0236a1429..c2d2895a1ec1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -1694,6 +1694,30 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) | |||
1694 | } | 1694 | } |
1695 | 1695 | ||
1696 | static __be32 | 1696 | static __be32 |
1697 | nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) | ||
1698 | { | ||
1699 | DECODE_HEAD; | ||
1700 | unsigned int tmp; | ||
1701 | |||
1702 | status = nfsd4_decode_stateid(argp, ©->cp_src_stateid); | ||
1703 | if (status) | ||
1704 | return status; | ||
1705 | status = nfsd4_decode_stateid(argp, ©->cp_dst_stateid); | ||
1706 | if (status) | ||
1707 | return status; | ||
1708 | |||
1709 | READ_BUF(8 + 8 + 8 + 4 + 4 + 4); | ||
1710 | p = xdr_decode_hyper(p, ©->cp_src_pos); | ||
1711 | p = xdr_decode_hyper(p, ©->cp_dst_pos); | ||
1712 | p = xdr_decode_hyper(p, ©->cp_count); | ||
1713 | copy->cp_consecutive = be32_to_cpup(p++); | ||
1714 | copy->cp_synchronous = be32_to_cpup(p++); | ||
1715 | tmp = be32_to_cpup(p); /* Source server list not supported */ | ||
1716 | |||
1717 | DECODE_TAIL; | ||
1718 | } | ||
1719 | |||
1720 | static __be32 | ||
1697 | nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) | 1721 | nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) |
1698 | { | 1722 | { |
1699 | DECODE_HEAD; | 1723 | DECODE_HEAD; |
@@ -1793,7 +1817,7 @@ static nfsd4_dec nfsd4_dec_ops[] = { | |||
1793 | 1817 | ||
1794 | /* new operations for NFSv4.2 */ | 1818 | /* new operations for NFSv4.2 */ |
1795 | [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, | 1819 | [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, |
1796 | [OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp, | 1820 | [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy, |
1797 | [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp, | 1821 | [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp, |
1798 | [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, | 1822 | [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, |
1799 | [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, | 1823 | [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, |
@@ -4062,7 +4086,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
4062 | u32 starting_len = xdr->buf->len, needed_len; | 4086 | u32 starting_len = xdr->buf->len, needed_len; |
4063 | __be32 *p; | 4087 | __be32 *p; |
4064 | 4088 | ||
4065 | dprintk("%s: err %d\n", __func__, nfserr); | 4089 | dprintk("%s: err %d\n", __func__, be32_to_cpu(nfserr)); |
4066 | if (nfserr) | 4090 | if (nfserr) |
4067 | goto out; | 4091 | goto out; |
4068 | 4092 | ||
@@ -4202,6 +4226,41 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
4202 | #endif /* CONFIG_NFSD_PNFS */ | 4226 | #endif /* CONFIG_NFSD_PNFS */ |
4203 | 4227 | ||
4204 | static __be32 | 4228 | static __be32 |
4229 | nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write) | ||
4230 | { | ||
4231 | __be32 *p; | ||
4232 | |||
4233 | p = xdr_reserve_space(&resp->xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE); | ||
4234 | if (!p) | ||
4235 | return nfserr_resource; | ||
4236 | |||
4237 | *p++ = cpu_to_be32(0); | ||
4238 | p = xdr_encode_hyper(p, write->wr_bytes_written); | ||
4239 | *p++ = cpu_to_be32(write->wr_stable_how); | ||
4240 | p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, | ||
4241 | NFS4_VERIFIER_SIZE); | ||
4242 | return nfs_ok; | ||
4243 | } | ||
4244 | |||
4245 | static __be32 | ||
4246 | nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, | ||
4247 | struct nfsd4_copy *copy) | ||
4248 | { | ||
4249 | __be32 *p; | ||
4250 | |||
4251 | if (!nfserr) { | ||
4252 | nfserr = nfsd42_encode_write_res(resp, ©->cp_res); | ||
4253 | if (nfserr) | ||
4254 | return nfserr; | ||
4255 | |||
4256 | p = xdr_reserve_space(&resp->xdr, 4 + 4); | ||
4257 | *p++ = cpu_to_be32(copy->cp_consecutive); | ||
4258 | *p++ = cpu_to_be32(copy->cp_synchronous); | ||
4259 | } | ||
4260 | return nfserr; | ||
4261 | } | ||
4262 | |||
4263 | static __be32 | ||
4205 | nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, | 4264 | nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, |
4206 | struct nfsd4_seek *seek) | 4265 | struct nfsd4_seek *seek) |
4207 | { | 4266 | { |
@@ -4300,7 +4359,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { | |||
4300 | 4359 | ||
4301 | /* NFSv4.2 operations */ | 4360 | /* NFSv4.2 operations */ |
4302 | [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, | 4361 | [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, |
4303 | [OP_COPY] = (nfsd4_enc)nfsd4_encode_noop, | 4362 | [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy, |
4304 | [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop, | 4363 | [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop, |
4305 | [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, | 4364 | [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, |
4306 | [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, | 4365 | [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 65ad0165a94f..36b2af931e06 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -1216,6 +1216,8 @@ static __net_init int nfsd_init_net(struct net *net) | |||
1216 | goto out_idmap_error; | 1216 | goto out_idmap_error; |
1217 | nn->nfsd4_lease = 90; /* default lease time */ | 1217 | nn->nfsd4_lease = 90; /* default lease time */ |
1218 | nn->nfsd4_grace = 90; | 1218 | nn->nfsd4_grace = 90; |
1219 | nn->clverifier_counter = prandom_u32(); | ||
1220 | nn->clientid_counter = prandom_u32(); | ||
1219 | return 0; | 1221 | return 0; |
1220 | 1222 | ||
1221 | out_idmap_error: | 1223 | out_idmap_error: |
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 08188743db53..010aff5c5a79 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c | |||
@@ -789,6 +789,7 @@ nfserrno (int errno) | |||
789 | { nfserr_toosmall, -ETOOSMALL }, | 789 | { nfserr_toosmall, -ETOOSMALL }, |
790 | { nfserr_serverfault, -ESERVERFAULT }, | 790 | { nfserr_serverfault, -ESERVERFAULT }, |
791 | { nfserr_serverfault, -ENFILE }, | 791 | { nfserr_serverfault, -ENFILE }, |
792 | { nfserr_io, -EUCLEAN }, | ||
792 | }; | 793 | }; |
793 | int i; | 794 | int i; |
794 | 795 | ||
@@ -796,7 +797,7 @@ nfserrno (int errno) | |||
796 | if (nfs_errtbl[i].syserr == errno) | 797 | if (nfs_errtbl[i].syserr == errno) |
797 | return nfs_errtbl[i].nfserr; | 798 | return nfs_errtbl[i].nfserr; |
798 | } | 799 | } |
799 | WARN(1, "nfsd: non-standard errno: %d\n", errno); | 800 | WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); |
800 | return nfserr_io; | 801 | return nfserr_io; |
801 | } | 802 | } |
802 | 803 | ||
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 45007acaf364..a2b65fc56dd6 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -366,14 +366,21 @@ static struct notifier_block nfsd_inet6addr_notifier = { | |||
366 | }; | 366 | }; |
367 | #endif | 367 | #endif |
368 | 368 | ||
369 | /* Only used under nfsd_mutex, so this atomic may be overkill: */ | ||
370 | static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0); | ||
371 | |||
369 | static void nfsd_last_thread(struct svc_serv *serv, struct net *net) | 372 | static void nfsd_last_thread(struct svc_serv *serv, struct net *net) |
370 | { | 373 | { |
371 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); | 374 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); |
372 | 375 | ||
373 | unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); | 376 | /* check if the notifier still has clients */ |
377 | if (atomic_dec_return(&nfsd_notifier_refcount) == 0) { | ||
378 | unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); | ||
374 | #if IS_ENABLED(CONFIG_IPV6) | 379 | #if IS_ENABLED(CONFIG_IPV6) |
375 | unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); | 380 | unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); |
376 | #endif | 381 | #endif |
382 | } | ||
383 | |||
377 | /* | 384 | /* |
378 | * write_ports can create the server without actually starting | 385 | * write_ports can create the server without actually starting |
379 | * any threads--if we get shut down before any threads are | 386 | * any threads--if we get shut down before any threads are |
@@ -488,10 +495,13 @@ int nfsd_create_serv(struct net *net) | |||
488 | } | 495 | } |
489 | 496 | ||
490 | set_max_drc(); | 497 | set_max_drc(); |
491 | register_inetaddr_notifier(&nfsd_inetaddr_notifier); | 498 | /* check if the notifier is already set */ |
499 | if (atomic_inc_return(&nfsd_notifier_refcount) == 1) { | ||
500 | register_inetaddr_notifier(&nfsd_inetaddr_notifier); | ||
492 | #if IS_ENABLED(CONFIG_IPV6) | 501 | #if IS_ENABLED(CONFIG_IPV6) |
493 | register_inet6addr_notifier(&nfsd_inet6addr_notifier); | 502 | register_inet6addr_notifier(&nfsd_inet6addr_notifier); |
494 | #endif | 503 | #endif |
504 | } | ||
495 | do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ | 505 | do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ |
496 | return 0; | 506 | return 0; |
497 | } | 507 | } |
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h index 0c2a716e8741..d27a5aa60022 100644 --- a/fs/nfsd/pnfs.h +++ b/fs/nfsd/pnfs.h | |||
@@ -19,6 +19,7 @@ struct nfsd4_deviceid_map { | |||
19 | 19 | ||
20 | struct nfsd4_layout_ops { | 20 | struct nfsd4_layout_ops { |
21 | u32 notify_types; | 21 | u32 notify_types; |
22 | bool disable_recalls; | ||
22 | 23 | ||
23 | __be32 (*proc_getdeviceinfo)(struct super_block *sb, | 24 | __be32 (*proc_getdeviceinfo)(struct super_block *sb, |
24 | struct svc_rqst *rqstp, | 25 | struct svc_rqst *rqstp, |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index b95adf9a1595..c9399366f9df 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -63,7 +63,6 @@ typedef struct { | |||
63 | 63 | ||
64 | struct nfsd4_callback { | 64 | struct nfsd4_callback { |
65 | struct nfs4_client *cb_clp; | 65 | struct nfs4_client *cb_clp; |
66 | u32 cb_minorversion; | ||
67 | struct rpc_message cb_msg; | 66 | struct rpc_message cb_msg; |
68 | const struct nfsd4_callback_ops *cb_ops; | 67 | const struct nfsd4_callback_ops *cb_ops; |
69 | struct work_struct cb_work; | 68 | struct work_struct cb_work; |
@@ -441,11 +440,11 @@ struct nfs4_openowner { | |||
441 | /* | 440 | /* |
442 | * Represents a generic "lockowner". Similar to an openowner. References to it | 441 | * Represents a generic "lockowner". Similar to an openowner. References to it |
443 | * are held by the lock stateids that are created on its behalf. This object is | 442 | * are held by the lock stateids that are created on its behalf. This object is |
444 | * a superset of the nfs4_stateowner struct (or would be if it needed any extra | 443 | * a superset of the nfs4_stateowner struct. |
445 | * fields). | ||
446 | */ | 444 | */ |
447 | struct nfs4_lockowner { | 445 | struct nfs4_lockowner { |
448 | struct nfs4_stateowner lo_owner; /* must be first element */ | 446 | struct nfs4_stateowner lo_owner; /* must be first element */ |
447 | struct list_head lo_blocked; /* blocked file_locks */ | ||
449 | }; | 448 | }; |
450 | 449 | ||
451 | static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) | 450 | static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) |
@@ -572,6 +571,7 @@ enum nfsd4_cb_op { | |||
572 | NFSPROC4_CLNT_CB_RECALL, | 571 | NFSPROC4_CLNT_CB_RECALL, |
573 | NFSPROC4_CLNT_CB_LAYOUT, | 572 | NFSPROC4_CLNT_CB_LAYOUT, |
574 | NFSPROC4_CLNT_CB_SEQUENCE, | 573 | NFSPROC4_CLNT_CB_SEQUENCE, |
574 | NFSPROC4_CLNT_CB_NOTIFY_LOCK, | ||
575 | }; | 575 | }; |
576 | 576 | ||
577 | /* Returns true iff a is later than b: */ | 577 | /* Returns true iff a is later than b: */ |
@@ -580,6 +580,20 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) | |||
580 | return (s32)(a->si_generation - b->si_generation) > 0; | 580 | return (s32)(a->si_generation - b->si_generation) > 0; |
581 | } | 581 | } |
582 | 582 | ||
583 | /* | ||
584 | * When a client tries to get a lock on a file, we set one of these objects | ||
585 | * on the blocking lock. When the lock becomes free, we can then issue a | ||
586 | * CB_NOTIFY_LOCK to the server. | ||
587 | */ | ||
588 | struct nfsd4_blocked_lock { | ||
589 | struct list_head nbl_list; | ||
590 | struct list_head nbl_lru; | ||
591 | unsigned long nbl_time; | ||
592 | struct file_lock nbl_lock; | ||
593 | struct knfsd_fh nbl_fh; | ||
594 | struct nfsd4_callback nbl_cb; | ||
595 | }; | ||
596 | |||
583 | struct nfsd4_compound_state; | 597 | struct nfsd4_compound_state; |
584 | struct nfsd_net; | 598 | struct nfsd_net; |
585 | 599 | ||
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ff476e654b8f..8ca642fe9b21 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -513,6 +513,22 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, | |||
513 | count)); | 513 | count)); |
514 | } | 514 | } |
515 | 515 | ||
516 | ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, | ||
517 | u64 dst_pos, u64 count) | ||
518 | { | ||
519 | |||
520 | /* | ||
521 | * Limit copy to 4MB to prevent indefinitely blocking an nfsd | ||
522 | * thread and client rpc slot. The choice of 4MB is somewhat | ||
523 | * arbitrary. We might instead base this on r/wsize, or make it | ||
524 | * tunable, or use a time instead of a byte limit, or implement | ||
525 | * asynchronous copy. In theory a client could also recognize a | ||
526 | * limit like this and pipeline multiple COPY requests. | ||
527 | */ | ||
528 | count = min_t(u64, count, 1 << 22); | ||
529 | return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); | ||
530 | } | ||
531 | |||
516 | __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, | 532 | __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, |
517 | struct file *file, loff_t offset, loff_t len, | 533 | struct file *file, loff_t offset, loff_t len, |
518 | int flags) | 534 | int flags) |
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 3cbb1b33777b..0bf9e7bf5800 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h | |||
@@ -96,6 +96,8 @@ __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, | |||
96 | struct svc_fh *res); | 96 | struct svc_fh *res); |
97 | __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, | 97 | __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, |
98 | char *, int, struct svc_fh *); | 98 | char *, int, struct svc_fh *); |
99 | ssize_t nfsd_copy_file_range(struct file *, u64, | ||
100 | struct file *, u64, u64); | ||
99 | __be32 nfsd_rename(struct svc_rqst *, | 101 | __be32 nfsd_rename(struct svc_rqst *, |
100 | struct svc_fh *, char *, int, | 102 | struct svc_fh *, char *, int, |
101 | struct svc_fh *, char *, int); | 103 | struct svc_fh *, char *, int); |
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index beea0c5edc51..8fda4abdf3b1 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h | |||
@@ -503,6 +503,28 @@ struct nfsd4_clone { | |||
503 | u64 cl_count; | 503 | u64 cl_count; |
504 | }; | 504 | }; |
505 | 505 | ||
506 | struct nfsd42_write_res { | ||
507 | u64 wr_bytes_written; | ||
508 | u32 wr_stable_how; | ||
509 | nfs4_verifier wr_verifier; | ||
510 | }; | ||
511 | |||
512 | struct nfsd4_copy { | ||
513 | /* request */ | ||
514 | stateid_t cp_src_stateid; | ||
515 | stateid_t cp_dst_stateid; | ||
516 | u64 cp_src_pos; | ||
517 | u64 cp_dst_pos; | ||
518 | u64 cp_count; | ||
519 | |||
520 | /* both */ | ||
521 | bool cp_consecutive; | ||
522 | bool cp_synchronous; | ||
523 | |||
524 | /* response */ | ||
525 | struct nfsd42_write_res cp_res; | ||
526 | }; | ||
527 | |||
506 | struct nfsd4_seek { | 528 | struct nfsd4_seek { |
507 | /* request */ | 529 | /* request */ |
508 | stateid_t seek_stateid; | 530 | stateid_t seek_stateid; |
@@ -568,6 +590,7 @@ struct nfsd4_op { | |||
568 | struct nfsd4_fallocate allocate; | 590 | struct nfsd4_fallocate allocate; |
569 | struct nfsd4_fallocate deallocate; | 591 | struct nfsd4_fallocate deallocate; |
570 | struct nfsd4_clone clone; | 592 | struct nfsd4_clone clone; |
593 | struct nfsd4_copy copy; | ||
571 | struct nfsd4_seek seek; | 594 | struct nfsd4_seek seek; |
572 | } u; | 595 | } u; |
573 | struct nfs4_replay * replay; | 596 | struct nfs4_replay * replay; |
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index c47f6fdb111a..49b719dfef95 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h | |||
@@ -28,3 +28,12 @@ | |||
28 | #define NFS4_dec_cb_layout_sz (cb_compound_dec_hdr_sz + \ | 28 | #define NFS4_dec_cb_layout_sz (cb_compound_dec_hdr_sz + \ |
29 | cb_sequence_dec_sz + \ | 29 | cb_sequence_dec_sz + \ |
30 | op_dec_sz) | 30 | op_dec_sz) |
31 | |||
32 | #define NFS4_enc_cb_notify_lock_sz (cb_compound_enc_hdr_sz + \ | ||
33 | cb_sequence_enc_sz + \ | ||
34 | 2 + 1 + \ | ||
35 | XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ | ||
36 | enc_nfs4_fh_sz) | ||
37 | #define NFS4_dec_cb_notify_lock_sz (cb_compound_dec_hdr_sz + \ | ||
38 | cb_sequence_dec_sz + \ | ||
39 | op_dec_sz) | ||
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index b03c0625fa6e..5ab958cdc50b 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h | |||
@@ -157,12 +157,13 @@ struct fid { | |||
157 | * @fh_to_dentry is given a &struct super_block (@sb) and a file handle | 157 | * @fh_to_dentry is given a &struct super_block (@sb) and a file handle |
158 | * fragment (@fh, @fh_len). It should return a &struct dentry which refers | 158 | * fragment (@fh, @fh_len). It should return a &struct dentry which refers |
159 | * to the same file that the file handle fragment refers to. If it cannot, | 159 | * to the same file that the file handle fragment refers to. If it cannot, |
160 | * it should return a %NULL pointer if the file was found but no acceptable | 160 | * it should return a %NULL pointer if the file cannot be found, or an |
161 | * &dentries were available, or an %ERR_PTR error code indicating why it | 161 | * %ERR_PTR error code of %ENOMEM if a memory allocation failure occurred. |
162 | * couldn't be found (e.g. %ENOENT or %ENOMEM). Any suitable dentry can be | 162 | * Any other error code is treated like %NULL, and will cause an %ESTALE error |
163 | * returned including, if necessary, a new dentry created with d_alloc_root. | 163 | * for callers of exportfs_decode_fh(). |
164 | * The caller can then find any other extant dentries by following the | 164 | * Any suitable dentry can be returned including, if necessary, a new dentry |
165 | * d_alias links. | 165 | * created with d_alloc_root. The caller can then find any other extant |
166 | * dentries by following the d_alias links. | ||
166 | * | 167 | * |
167 | * fh_to_parent: | 168 | * fh_to_parent: |
168 | * Same as @fh_to_dentry, except that it returns a pointer to the parent | 169 | * Same as @fh_to_dentry, except that it returns a pointer to the parent |
diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index 3b1ff38f0c37..a7da6bf56610 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h | |||
@@ -41,6 +41,7 @@ | |||
41 | #define _LINUX_SUNRPC_RPC_RDMA_H | 41 | #define _LINUX_SUNRPC_RPC_RDMA_H |
42 | 42 | ||
43 | #include <linux/types.h> | 43 | #include <linux/types.h> |
44 | #include <linux/bitops.h> | ||
44 | 45 | ||
45 | #define RPCRDMA_VERSION 1 | 46 | #define RPCRDMA_VERSION 1 |
46 | #define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION) | 47 | #define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION) |
@@ -129,4 +130,38 @@ enum rpcrdma_proc { | |||
129 | #define rdma_done cpu_to_be32(RDMA_DONE) | 130 | #define rdma_done cpu_to_be32(RDMA_DONE) |
130 | #define rdma_error cpu_to_be32(RDMA_ERROR) | 131 | #define rdma_error cpu_to_be32(RDMA_ERROR) |
131 | 132 | ||
133 | /* | ||
134 | * Private extension to RPC-over-RDMA Version One. | ||
135 | * Message passed during RDMA-CM connection set-up. | ||
136 | * | ||
137 | * Add new fields at the end, and don't permute existing | ||
138 | * fields. | ||
139 | */ | ||
140 | struct rpcrdma_connect_private { | ||
141 | __be32 cp_magic; | ||
142 | u8 cp_version; | ||
143 | u8 cp_flags; | ||
144 | u8 cp_send_size; | ||
145 | u8 cp_recv_size; | ||
146 | } __packed; | ||
147 | |||
148 | #define rpcrdma_cmp_magic __cpu_to_be32(0xf6ab0e18) | ||
149 | |||
150 | enum { | ||
151 | RPCRDMA_CMP_VERSION = 1, | ||
152 | RPCRDMA_CMP_F_SND_W_INV_OK = BIT(0), | ||
153 | }; | ||
154 | |||
155 | static inline u8 | ||
156 | rpcrdma_encode_buffer_size(unsigned int size) | ||
157 | { | ||
158 | return (size >> 10) - 1; | ||
159 | } | ||
160 | |||
161 | static inline unsigned int | ||
162 | rpcrdma_decode_buffer_size(u8 val) | ||
163 | { | ||
164 | return ((unsigned int)val + 1) << 10; | ||
165 | } | ||
166 | |||
132 | #endif /* _LINUX_SUNRPC_RPC_RDMA_H */ | 167 | #endif /* _LINUX_SUNRPC_RPC_RDMA_H */ |
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d6917b896d3a..cc3ae16eac68 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h | |||
@@ -86,6 +86,7 @@ struct svc_rdma_op_ctxt { | |||
86 | unsigned long flags; | 86 | unsigned long flags; |
87 | enum dma_data_direction direction; | 87 | enum dma_data_direction direction; |
88 | int count; | 88 | int count; |
89 | unsigned int mapped_sges; | ||
89 | struct ib_sge sge[RPCSVC_MAXPAGES]; | 90 | struct ib_sge sge[RPCSVC_MAXPAGES]; |
90 | struct page *pages[RPCSVC_MAXPAGES]; | 91 | struct page *pages[RPCSVC_MAXPAGES]; |
91 | }; | 92 | }; |
@@ -136,6 +137,7 @@ struct svcxprt_rdma { | |||
136 | int sc_ord; /* RDMA read limit */ | 137 | int sc_ord; /* RDMA read limit */ |
137 | int sc_max_sge; | 138 | int sc_max_sge; |
138 | int sc_max_sge_rd; /* max sge for read target */ | 139 | int sc_max_sge_rd; /* max sge for read target */ |
140 | bool sc_snd_w_inv; /* OK to use Send With Invalidate */ | ||
139 | 141 | ||
140 | atomic_t sc_sq_count; /* Number of SQ WR on queue */ | 142 | atomic_t sc_sq_count; /* Number of SQ WR on queue */ |
141 | unsigned int sc_sq_depth; /* Depth of SQ */ | 143 | unsigned int sc_sq_depth; /* Depth of SQ */ |
@@ -193,6 +195,14 @@ struct svcxprt_rdma { | |||
193 | 195 | ||
194 | #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD | 196 | #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD |
195 | 197 | ||
198 | /* Track DMA maps for this transport and context */ | ||
199 | static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma, | ||
200 | struct svc_rdma_op_ctxt *ctxt) | ||
201 | { | ||
202 | ctxt->mapped_sges++; | ||
203 | atomic_inc(&rdma->sc_dma_used); | ||
204 | } | ||
205 | |||
196 | /* svc_rdma_backchannel.c */ | 206 | /* svc_rdma_backchannel.c */ |
197 | extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, | 207 | extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, |
198 | struct rpcrdma_msg *rmsgp, | 208 | struct rpcrdma_msg *rmsgp, |
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 2b871e0858d9..4ae62796bfde 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h | |||
@@ -39,8 +39,9 @@ | |||
39 | #define NFS4_FH_VOL_MIGRATION 0x0004 | 39 | #define NFS4_FH_VOL_MIGRATION 0x0004 |
40 | #define NFS4_FH_VOL_RENAME 0x0008 | 40 | #define NFS4_FH_VOL_RENAME 0x0008 |
41 | 41 | ||
42 | #define NFS4_OPEN_RESULT_CONFIRM 0x0002 | 42 | #define NFS4_OPEN_RESULT_CONFIRM 0x0002 |
43 | #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 | 43 | #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 |
44 | #define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020 | ||
44 | 45 | ||
45 | #define NFS4_SHARE_ACCESS_MASK 0x000F | 46 | #define NFS4_SHARE_ACCESS_MASK 0x000F |
46 | #define NFS4_SHARE_ACCESS_READ 0x0001 | 47 | #define NFS4_SHARE_ACCESS_READ 0x0001 |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index a2a7519b0f23..cd0c5581498c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c | |||
@@ -129,7 +129,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, | |||
129 | ret = -EIO; | 129 | ret = -EIO; |
130 | goto out_unmap; | 130 | goto out_unmap; |
131 | } | 131 | } |
132 | atomic_inc(&rdma->sc_dma_used); | 132 | svc_rdma_count_mappings(rdma, ctxt); |
133 | 133 | ||
134 | memset(&send_wr, 0, sizeof(send_wr)); | 134 | memset(&send_wr, 0, sizeof(send_wr)); |
135 | ctxt->cqe.done = svc_rdma_wc_send; | 135 | ctxt->cqe.done = svc_rdma_wc_send; |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 2c25606f2561..ad1df979b3f0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | |||
@@ -159,7 +159,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, | |||
159 | ctxt->sge[pno].addr); | 159 | ctxt->sge[pno].addr); |
160 | if (ret) | 160 | if (ret) |
161 | goto err; | 161 | goto err; |
162 | atomic_inc(&xprt->sc_dma_used); | 162 | svc_rdma_count_mappings(xprt, ctxt); |
163 | 163 | ||
164 | ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey; | 164 | ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey; |
165 | ctxt->sge[pno].length = len; | 165 | ctxt->sge[pno].length = len; |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 54d533300620..f5a91edcd233 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c | |||
@@ -225,6 +225,48 @@ svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp, | |||
225 | return rp_ary; | 225 | return rp_ary; |
226 | } | 226 | } |
227 | 227 | ||
228 | /* RPC-over-RDMA Version One private extension: Remote Invalidation. | ||
229 | * Responder's choice: requester signals it can handle Send With | ||
230 | * Invalidate, and responder chooses one rkey to invalidate. | ||
231 | * | ||
232 | * Find a candidate rkey to invalidate when sending a reply. Picks the | ||
233 | * first rkey it finds in the chunks lists. | ||
234 | * | ||
235 | * Returns zero if RPC's chunk lists are empty. | ||
236 | */ | ||
237 | static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp, | ||
238 | struct rpcrdma_write_array *wr_ary, | ||
239 | struct rpcrdma_write_array *rp_ary) | ||
240 | { | ||
241 | struct rpcrdma_read_chunk *rd_ary; | ||
242 | struct rpcrdma_segment *arg_ch; | ||
243 | u32 inv_rkey; | ||
244 | |||
245 | inv_rkey = 0; | ||
246 | |||
247 | rd_ary = svc_rdma_get_read_chunk(rdma_argp); | ||
248 | if (rd_ary) { | ||
249 | inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle); | ||
250 | goto out; | ||
251 | } | ||
252 | |||
253 | if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) { | ||
254 | arg_ch = &wr_ary->wc_array[0].wc_target; | ||
255 | inv_rkey = be32_to_cpu(arg_ch->rs_handle); | ||
256 | goto out; | ||
257 | } | ||
258 | |||
259 | if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) { | ||
260 | arg_ch = &rp_ary->wc_array[0].wc_target; | ||
261 | inv_rkey = be32_to_cpu(arg_ch->rs_handle); | ||
262 | goto out; | ||
263 | } | ||
264 | |||
265 | out: | ||
266 | dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey); | ||
267 | return inv_rkey; | ||
268 | } | ||
269 | |||
228 | /* Assumptions: | 270 | /* Assumptions: |
229 | * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE | 271 | * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE |
230 | */ | 272 | */ |
@@ -280,7 +322,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, | |||
280 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | 322 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, |
281 | sge[sge_no].addr)) | 323 | sge[sge_no].addr)) |
282 | goto err; | 324 | goto err; |
283 | atomic_inc(&xprt->sc_dma_used); | 325 | svc_rdma_count_mappings(xprt, ctxt); |
284 | sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; | 326 | sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; |
285 | ctxt->count++; | 327 | ctxt->count++; |
286 | sge_off = 0; | 328 | sge_off = 0; |
@@ -464,7 +506,8 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
464 | struct page *page, | 506 | struct page *page, |
465 | struct rpcrdma_msg *rdma_resp, | 507 | struct rpcrdma_msg *rdma_resp, |
466 | struct svc_rdma_req_map *vec, | 508 | struct svc_rdma_req_map *vec, |
467 | int byte_count) | 509 | int byte_count, |
510 | u32 inv_rkey) | ||
468 | { | 511 | { |
469 | struct svc_rdma_op_ctxt *ctxt; | 512 | struct svc_rdma_op_ctxt *ctxt; |
470 | struct ib_send_wr send_wr; | 513 | struct ib_send_wr send_wr; |
@@ -489,7 +532,7 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
489 | ctxt->sge[0].length, DMA_TO_DEVICE); | 532 | ctxt->sge[0].length, DMA_TO_DEVICE); |
490 | if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) | 533 | if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) |
491 | goto err; | 534 | goto err; |
492 | atomic_inc(&rdma->sc_dma_used); | 535 | svc_rdma_count_mappings(rdma, ctxt); |
493 | 536 | ||
494 | ctxt->direction = DMA_TO_DEVICE; | 537 | ctxt->direction = DMA_TO_DEVICE; |
495 | 538 | ||
@@ -505,7 +548,7 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
505 | if (ib_dma_mapping_error(rdma->sc_cm_id->device, | 548 | if (ib_dma_mapping_error(rdma->sc_cm_id->device, |
506 | ctxt->sge[sge_no].addr)) | 549 | ctxt->sge[sge_no].addr)) |
507 | goto err; | 550 | goto err; |
508 | atomic_inc(&rdma->sc_dma_used); | 551 | svc_rdma_count_mappings(rdma, ctxt); |
509 | ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; | 552 | ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; |
510 | ctxt->sge[sge_no].length = sge_bytes; | 553 | ctxt->sge[sge_no].length = sge_bytes; |
511 | } | 554 | } |
@@ -523,23 +566,9 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
523 | ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; | 566 | ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; |
524 | ctxt->count++; | 567 | ctxt->count++; |
525 | rqstp->rq_respages[page_no] = NULL; | 568 | rqstp->rq_respages[page_no] = NULL; |
526 | /* | ||
527 | * If there are more pages than SGE, terminate SGE | ||
528 | * list so that svc_rdma_unmap_dma doesn't attempt to | ||
529 | * unmap garbage. | ||
530 | */ | ||
531 | if (page_no+1 >= sge_no) | ||
532 | ctxt->sge[page_no+1].length = 0; | ||
533 | } | 569 | } |
534 | rqstp->rq_next_page = rqstp->rq_respages + 1; | 570 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
535 | 571 | ||
536 | /* The loop above bumps sc_dma_used for each sge. The | ||
537 | * xdr_buf.tail gets a separate sge, but resides in the | ||
538 | * same page as xdr_buf.head. Don't count it twice. | ||
539 | */ | ||
540 | if (sge_no > ctxt->count) | ||
541 | atomic_dec(&rdma->sc_dma_used); | ||
542 | |||
543 | if (sge_no > rdma->sc_max_sge) { | 572 | if (sge_no > rdma->sc_max_sge) { |
544 | pr_err("svcrdma: Too many sges (%d)\n", sge_no); | 573 | pr_err("svcrdma: Too many sges (%d)\n", sge_no); |
545 | goto err; | 574 | goto err; |
@@ -549,7 +578,11 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
549 | send_wr.wr_cqe = &ctxt->cqe; | 578 | send_wr.wr_cqe = &ctxt->cqe; |
550 | send_wr.sg_list = ctxt->sge; | 579 | send_wr.sg_list = ctxt->sge; |
551 | send_wr.num_sge = sge_no; | 580 | send_wr.num_sge = sge_no; |
552 | send_wr.opcode = IB_WR_SEND; | 581 | if (inv_rkey) { |
582 | send_wr.opcode = IB_WR_SEND_WITH_INV; | ||
583 | send_wr.ex.invalidate_rkey = inv_rkey; | ||
584 | } else | ||
585 | send_wr.opcode = IB_WR_SEND; | ||
553 | send_wr.send_flags = IB_SEND_SIGNALED; | 586 | send_wr.send_flags = IB_SEND_SIGNALED; |
554 | 587 | ||
555 | ret = svc_rdma_send(rdma, &send_wr); | 588 | ret = svc_rdma_send(rdma, &send_wr); |
@@ -581,6 +614,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
581 | int inline_bytes; | 614 | int inline_bytes; |
582 | struct page *res_page; | 615 | struct page *res_page; |
583 | struct svc_rdma_req_map *vec; | 616 | struct svc_rdma_req_map *vec; |
617 | u32 inv_rkey; | ||
584 | 618 | ||
585 | dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); | 619 | dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); |
586 | 620 | ||
@@ -591,6 +625,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
591 | wr_ary = svc_rdma_get_write_array(rdma_argp); | 625 | wr_ary = svc_rdma_get_write_array(rdma_argp); |
592 | rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary); | 626 | rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary); |
593 | 627 | ||
628 | inv_rkey = 0; | ||
629 | if (rdma->sc_snd_w_inv) | ||
630 | inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary); | ||
631 | |||
594 | /* Build an req vec for the XDR */ | 632 | /* Build an req vec for the XDR */ |
595 | vec = svc_rdma_get_req_map(rdma); | 633 | vec = svc_rdma_get_req_map(rdma); |
596 | ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL); | 634 | ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL); |
@@ -633,9 +671,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
633 | goto err1; | 671 | goto err1; |
634 | 672 | ||
635 | ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec, | 673 | ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec, |
636 | inline_bytes); | 674 | inline_bytes, inv_rkey); |
637 | if (ret < 0) | 675 | if (ret < 0) |
638 | goto err1; | 676 | goto err0; |
639 | 677 | ||
640 | svc_rdma_put_req_map(rdma, vec); | 678 | svc_rdma_put_req_map(rdma, vec); |
641 | dprintk("svcrdma: send_reply returns %d\n", ret); | 679 | dprintk("svcrdma: send_reply returns %d\n", ret); |
@@ -692,7 +730,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, | |||
692 | svc_rdma_put_context(ctxt, 1); | 730 | svc_rdma_put_context(ctxt, 1); |
693 | return; | 731 | return; |
694 | } | 732 | } |
695 | atomic_inc(&xprt->sc_dma_used); | 733 | svc_rdma_count_mappings(xprt, ctxt); |
696 | 734 | ||
697 | /* Prepare SEND WR */ | 735 | /* Prepare SEND WR */ |
698 | memset(&err_wr, 0, sizeof(err_wr)); | 736 | memset(&err_wr, 0, sizeof(err_wr)); |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index eb2857f52b05..6864fb967038 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c | |||
@@ -198,6 +198,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) | |||
198 | 198 | ||
199 | out: | 199 | out: |
200 | ctxt->count = 0; | 200 | ctxt->count = 0; |
201 | ctxt->mapped_sges = 0; | ||
201 | ctxt->frmr = NULL; | 202 | ctxt->frmr = NULL; |
202 | return ctxt; | 203 | return ctxt; |
203 | 204 | ||
@@ -221,22 +222,27 @@ out_empty: | |||
221 | void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) | 222 | void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) |
222 | { | 223 | { |
223 | struct svcxprt_rdma *xprt = ctxt->xprt; | 224 | struct svcxprt_rdma *xprt = ctxt->xprt; |
224 | int i; | 225 | struct ib_device *device = xprt->sc_cm_id->device; |
225 | for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { | 226 | u32 lkey = xprt->sc_pd->local_dma_lkey; |
227 | unsigned int i, count; | ||
228 | |||
229 | for (count = 0, i = 0; i < ctxt->mapped_sges; i++) { | ||
226 | /* | 230 | /* |
227 | * Unmap the DMA addr in the SGE if the lkey matches | 231 | * Unmap the DMA addr in the SGE if the lkey matches |
228 | * the local_dma_lkey, otherwise, ignore it since it is | 232 | * the local_dma_lkey, otherwise, ignore it since it is |
229 | * an FRMR lkey and will be unmapped later when the | 233 | * an FRMR lkey and will be unmapped later when the |
230 | * last WR that uses it completes. | 234 | * last WR that uses it completes. |
231 | */ | 235 | */ |
232 | if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) { | 236 | if (ctxt->sge[i].lkey == lkey) { |
233 | atomic_dec(&xprt->sc_dma_used); | 237 | count++; |
234 | ib_dma_unmap_page(xprt->sc_cm_id->device, | 238 | ib_dma_unmap_page(device, |
235 | ctxt->sge[i].addr, | 239 | ctxt->sge[i].addr, |
236 | ctxt->sge[i].length, | 240 | ctxt->sge[i].length, |
237 | ctxt->direction); | 241 | ctxt->direction); |
238 | } | 242 | } |
239 | } | 243 | } |
244 | ctxt->mapped_sges = 0; | ||
245 | atomic_sub(count, &xprt->sc_dma_used); | ||
240 | } | 246 | } |
241 | 247 | ||
242 | void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) | 248 | void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) |
@@ -600,7 +606,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags) | |||
600 | DMA_FROM_DEVICE); | 606 | DMA_FROM_DEVICE); |
601 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) | 607 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) |
602 | goto err_put_ctxt; | 608 | goto err_put_ctxt; |
603 | atomic_inc(&xprt->sc_dma_used); | 609 | svc_rdma_count_mappings(xprt, ctxt); |
604 | ctxt->sge[sge_no].addr = pa; | 610 | ctxt->sge[sge_no].addr = pa; |
605 | ctxt->sge[sge_no].length = PAGE_SIZE; | 611 | ctxt->sge[sge_no].length = PAGE_SIZE; |
606 | ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; | 612 | ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; |
@@ -642,6 +648,26 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags) | |||
642 | return ret; | 648 | return ret; |
643 | } | 649 | } |
644 | 650 | ||
651 | static void | ||
652 | svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt, | ||
653 | struct rdma_conn_param *param) | ||
654 | { | ||
655 | const struct rpcrdma_connect_private *pmsg = param->private_data; | ||
656 | |||
657 | if (pmsg && | ||
658 | pmsg->cp_magic == rpcrdma_cmp_magic && | ||
659 | pmsg->cp_version == RPCRDMA_CMP_VERSION) { | ||
660 | newxprt->sc_snd_w_inv = pmsg->cp_flags & | ||
661 | RPCRDMA_CMP_F_SND_W_INV_OK; | ||
662 | |||
663 | dprintk("svcrdma: client send_size %u, recv_size %u " | ||
664 | "remote inv %ssupported\n", | ||
665 | rpcrdma_decode_buffer_size(pmsg->cp_send_size), | ||
666 | rpcrdma_decode_buffer_size(pmsg->cp_recv_size), | ||
667 | newxprt->sc_snd_w_inv ? "" : "un"); | ||
668 | } | ||
669 | } | ||
670 | |||
645 | /* | 671 | /* |
646 | * This function handles the CONNECT_REQUEST event on a listening | 672 | * This function handles the CONNECT_REQUEST event on a listening |
647 | * endpoint. It is passed the cma_id for the _new_ connection. The context in | 673 | * endpoint. It is passed the cma_id for the _new_ connection. The context in |
@@ -653,7 +679,8 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags) | |||
653 | * will call the recvfrom method on the listen xprt which will accept the new | 679 | * will call the recvfrom method on the listen xprt which will accept the new |
654 | * connection. | 680 | * connection. |
655 | */ | 681 | */ |
656 | static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird) | 682 | static void handle_connect_req(struct rdma_cm_id *new_cma_id, |
683 | struct rdma_conn_param *param) | ||
657 | { | 684 | { |
658 | struct svcxprt_rdma *listen_xprt = new_cma_id->context; | 685 | struct svcxprt_rdma *listen_xprt = new_cma_id->context; |
659 | struct svcxprt_rdma *newxprt; | 686 | struct svcxprt_rdma *newxprt; |
@@ -669,9 +696,10 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird) | |||
669 | new_cma_id->context = newxprt; | 696 | new_cma_id->context = newxprt; |
670 | dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", | 697 | dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", |
671 | newxprt, newxprt->sc_cm_id, listen_xprt); | 698 | newxprt, newxprt->sc_cm_id, listen_xprt); |
699 | svc_rdma_parse_connect_private(newxprt, param); | ||
672 | 700 | ||
673 | /* Save client advertised inbound read limit for use later in accept. */ | 701 | /* Save client advertised inbound read limit for use later in accept. */ |
674 | newxprt->sc_ord = client_ird; | 702 | newxprt->sc_ord = param->initiator_depth; |
675 | 703 | ||
676 | /* Set the local and remote addresses in the transport */ | 704 | /* Set the local and remote addresses in the transport */ |
677 | sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; | 705 | sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; |
@@ -706,8 +734,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, | |||
706 | dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " | 734 | dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " |
707 | "event = %s (%d)\n", cma_id, cma_id->context, | 735 | "event = %s (%d)\n", cma_id, cma_id->context, |
708 | rdma_event_msg(event->event), event->event); | 736 | rdma_event_msg(event->event), event->event); |
709 | handle_connect_req(cma_id, | 737 | handle_connect_req(cma_id, &event->param.conn); |
710 | event->param.conn.initiator_depth); | ||
711 | break; | 738 | break; |
712 | 739 | ||
713 | case RDMA_CM_EVENT_ESTABLISHED: | 740 | case RDMA_CM_EVENT_ESTABLISHED: |
@@ -941,6 +968,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
941 | struct svcxprt_rdma *listen_rdma; | 968 | struct svcxprt_rdma *listen_rdma; |
942 | struct svcxprt_rdma *newxprt = NULL; | 969 | struct svcxprt_rdma *newxprt = NULL; |
943 | struct rdma_conn_param conn_param; | 970 | struct rdma_conn_param conn_param; |
971 | struct rpcrdma_connect_private pmsg; | ||
944 | struct ib_qp_init_attr qp_attr; | 972 | struct ib_qp_init_attr qp_attr; |
945 | struct ib_device *dev; | 973 | struct ib_device *dev; |
946 | unsigned int i; | 974 | unsigned int i; |
@@ -1070,7 +1098,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
1070 | dev->attrs.max_fast_reg_page_list_len; | 1098 | dev->attrs.max_fast_reg_page_list_len; |
1071 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; | 1099 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; |
1072 | newxprt->sc_reader = rdma_read_chunk_frmr; | 1100 | newxprt->sc_reader = rdma_read_chunk_frmr; |
1073 | } | 1101 | } else |
1102 | newxprt->sc_snd_w_inv = false; | ||
1074 | 1103 | ||
1075 | /* | 1104 | /* |
1076 | * Determine if a DMA MR is required and if so, what privs are required | 1105 | * Determine if a DMA MR is required and if so, what privs are required |
@@ -1094,11 +1123,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
1094 | /* Swap out the handler */ | 1123 | /* Swap out the handler */ |
1095 | newxprt->sc_cm_id->event_handler = rdma_cma_handler; | 1124 | newxprt->sc_cm_id->event_handler = rdma_cma_handler; |
1096 | 1125 | ||
1126 | /* Construct RDMA-CM private message */ | ||
1127 | pmsg.cp_magic = rpcrdma_cmp_magic; | ||
1128 | pmsg.cp_version = RPCRDMA_CMP_VERSION; | ||
1129 | pmsg.cp_flags = 0; | ||
1130 | pmsg.cp_send_size = pmsg.cp_recv_size = | ||
1131 | rpcrdma_encode_buffer_size(newxprt->sc_max_req_size); | ||
1132 | |||
1097 | /* Accept Connection */ | 1133 | /* Accept Connection */ |
1098 | set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); | 1134 | set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); |
1099 | memset(&conn_param, 0, sizeof conn_param); | 1135 | memset(&conn_param, 0, sizeof conn_param); |
1100 | conn_param.responder_resources = 0; | 1136 | conn_param.responder_resources = 0; |
1101 | conn_param.initiator_depth = newxprt->sc_ord; | 1137 | conn_param.initiator_depth = newxprt->sc_ord; |
1138 | conn_param.private_data = &pmsg; | ||
1139 | conn_param.private_data_len = sizeof(pmsg); | ||
1102 | ret = rdma_accept(newxprt->sc_cm_id, &conn_param); | 1140 | ret = rdma_accept(newxprt->sc_cm_id, &conn_param); |
1103 | if (ret) { | 1141 | if (ret) { |
1104 | dprintk("svcrdma: failed to accept new connection, ret=%d\n", | 1142 | dprintk("svcrdma: failed to accept new connection, ret=%d\n", |