aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-14 00:04:42 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-14 00:04:42 -0400
commit2778556474b1996aa68ae61619386b8802733bd8 (patch)
treecc136dd6d4589073b5aedb28429eb72cdd067106
parent35a891be96f1f8e1227e6ad3ca827b8a08ce47ea (diff)
parent29ae7f9dc21a7dda41d78b27bbda7d427ece8ad4 (diff)
Merge tag 'nfsd-4.9' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields: "Some RDMA work and some good bugfixes, and two new features that could benefit from user testing: - Anna Schumacker contributed a simple NFSv4.2 COPY implementation. COPY is already supported on the client side, so a call to copy_file_range() on a recent client should now result in a server-side copy that doesn't require all the data to make a round trip to the client and back. - Jeff Layton implemented callbacks to notify clients when contended locks become available, which should reduce latency on workloads with contended locks" * tag 'nfsd-4.9' of git://linux-nfs.org/~bfields/linux: NFSD: Implement the COPY call nfsd: handle EUCLEAN nfsd: only WARN once on unmapped errors exportfs: be careful to only return expected errors. nfsd4: setclientid_confirm with unmatched verifier should fail nfsd: randomize SETCLIENTID reply to help distinguish servers nfsd: set the MAY_NOTIFY_LOCK flag in OPEN replies nfs: add a new NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK constant nfsd: add a LRU list for blocked locks nfsd: have nfsd4_lock use blocking locks for v4.1+ locks nfsd: plumb in a CB_NOTIFY_LOCK operation NFSD: fix corruption in notifier registration svcrdma: support Remote Invalidation svcrdma: Server-side support for rpcrdma_connect_private rpcrdma: RDMA/CM private message data structure svcrdma: Skip put_page() when send_reply() fails svcrdma: Tail iovec leaves an orphaned DMA mapping nfsd: fix dprintk in nfsd4_encode_getdeviceinfo nfsd: eliminate cb_minorversion field nfsd: don't set a FL_LAYOUT lease for flexfiles layouts
-rw-r--r--fs/exportfs/expfs.c10
-rw-r--r--fs/nfsd/flexfilelayout.c1
-rw-r--r--fs/nfsd/netns.h1
-rw-r--r--fs/nfsd/nfs4callback.c64
-rw-r--r--fs/nfsd/nfs4layouts.c6
-rw-r--r--fs/nfsd/nfs4proc.c90
-rw-r--r--fs/nfsd/nfs4state.c237
-rw-r--r--fs/nfsd/nfs4xdr.c65
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/nfsd/nfsproc.c3
-rw-r--r--fs/nfsd/nfssvc.c18
-rw-r--r--fs/nfsd/pnfs.h1
-rw-r--r--fs/nfsd/state.h22
-rw-r--r--fs/nfsd/vfs.c16
-rw-r--r--fs/nfsd/vfs.h2
-rw-r--r--fs/nfsd/xdr4.h23
-rw-r--r--fs/nfsd/xdr4cb.h9
-rw-r--r--include/linux/exportfs.h13
-rw-r--r--include/linux/sunrpc/rpc_rdma.h35
-rw-r--r--include/linux/sunrpc/svc_rdma.h10
-rw-r--r--include/uapi/linux/nfs4.h5
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c82
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c60
25 files changed, 683 insertions, 96 deletions
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 207ba8d627ca..a4b531be9168 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -428,10 +428,10 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
428 if (!nop || !nop->fh_to_dentry) 428 if (!nop || !nop->fh_to_dentry)
429 return ERR_PTR(-ESTALE); 429 return ERR_PTR(-ESTALE);
430 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 430 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
431 if (!result) 431 if (PTR_ERR(result) == -ENOMEM)
432 result = ERR_PTR(-ESTALE); 432 return ERR_CAST(result);
433 if (IS_ERR(result)) 433 if (IS_ERR_OR_NULL(result))
434 return result; 434 return ERR_PTR(-ESTALE);
435 435
436 if (d_is_dir(result)) { 436 if (d_is_dir(result)) {
437 /* 437 /*
@@ -541,6 +541,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
541 541
542 err_result: 542 err_result:
543 dput(result); 543 dput(result);
544 if (err != -ENOMEM)
545 err = -ESTALE;
544 return ERR_PTR(err); 546 return ERR_PTR(err);
545} 547}
546EXPORT_SYMBOL_GPL(exportfs_decode_fh); 548EXPORT_SYMBOL_GPL(exportfs_decode_fh);
diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
index df880e9fa71f..b67287383010 100644
--- a/fs/nfsd/flexfilelayout.c
+++ b/fs/nfsd/flexfilelayout.c
@@ -126,6 +126,7 @@ nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp,
126const struct nfsd4_layout_ops ff_layout_ops = { 126const struct nfsd4_layout_ops ff_layout_ops = {
127 .notify_types = 127 .notify_types =
128 NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, 128 NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
129 .disable_recalls = true,
129 .proc_getdeviceinfo = nfsd4_ff_proc_getdeviceinfo, 130 .proc_getdeviceinfo = nfsd4_ff_proc_getdeviceinfo,
130 .encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo, 131 .encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo,
131 .proc_layoutget = nfsd4_ff_proc_layoutget, 132 .proc_layoutget = nfsd4_ff_proc_layoutget,
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 5fbf3bbd00d0..b10d557f9c9e 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -84,6 +84,7 @@ struct nfsd_net {
84 struct list_head client_lru; 84 struct list_head client_lru;
85 struct list_head close_lru; 85 struct list_head close_lru;
86 struct list_head del_recall_lru; 86 struct list_head del_recall_lru;
87 struct list_head blocked_locks_lru;
87 88
88 struct delayed_work laundromat_work; 89 struct delayed_work laundromat_work;
89 90
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 04c68d900324..211dc2aed8e1 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -448,7 +448,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
448{ 448{
449 int status; 449 int status;
450 450
451 if (cb->cb_minorversion == 0) 451 if (cb->cb_clp->cl_minorversion == 0)
452 return 0; 452 return 0;
453 453
454 status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_seq_status); 454 status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_seq_status);
@@ -485,7 +485,7 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
485 const struct nfs4_delegation *dp = cb_to_delegation(cb); 485 const struct nfs4_delegation *dp = cb_to_delegation(cb);
486 struct nfs4_cb_compound_hdr hdr = { 486 struct nfs4_cb_compound_hdr hdr = {
487 .ident = cb->cb_clp->cl_cb_ident, 487 .ident = cb->cb_clp->cl_cb_ident,
488 .minorversion = cb->cb_minorversion, 488 .minorversion = cb->cb_clp->cl_minorversion,
489 }; 489 };
490 490
491 encode_cb_compound4args(xdr, &hdr); 491 encode_cb_compound4args(xdr, &hdr);
@@ -594,7 +594,7 @@ static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req,
594 container_of(cb, struct nfs4_layout_stateid, ls_recall); 594 container_of(cb, struct nfs4_layout_stateid, ls_recall);
595 struct nfs4_cb_compound_hdr hdr = { 595 struct nfs4_cb_compound_hdr hdr = {
596 .ident = 0, 596 .ident = 0,
597 .minorversion = cb->cb_minorversion, 597 .minorversion = cb->cb_clp->cl_minorversion,
598 }; 598 };
599 599
600 encode_cb_compound4args(xdr, &hdr); 600 encode_cb_compound4args(xdr, &hdr);
@@ -623,6 +623,62 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
623} 623}
624#endif /* CONFIG_NFSD_PNFS */ 624#endif /* CONFIG_NFSD_PNFS */
625 625
626static void encode_stateowner(struct xdr_stream *xdr, struct nfs4_stateowner *so)
627{
628 __be32 *p;
629
630 p = xdr_reserve_space(xdr, 8 + 4 + so->so_owner.len);
631 p = xdr_encode_opaque_fixed(p, &so->so_client->cl_clientid, 8);
632 xdr_encode_opaque(p, so->so_owner.data, so->so_owner.len);
633}
634
635static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req,
636 struct xdr_stream *xdr,
637 const struct nfsd4_callback *cb)
638{
639 const struct nfsd4_blocked_lock *nbl =
640 container_of(cb, struct nfsd4_blocked_lock, nbl_cb);
641 struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.fl_owner;
642 struct nfs4_cb_compound_hdr hdr = {
643 .ident = 0,
644 .minorversion = cb->cb_clp->cl_minorversion,
645 };
646
647 __be32 *p;
648
649 BUG_ON(hdr.minorversion == 0);
650
651 encode_cb_compound4args(xdr, &hdr);
652 encode_cb_sequence4args(xdr, cb, &hdr);
653
654 p = xdr_reserve_space(xdr, 4);
655 *p = cpu_to_be32(OP_CB_NOTIFY_LOCK);
656 encode_nfs_fh4(xdr, &nbl->nbl_fh);
657 encode_stateowner(xdr, &lo->lo_owner);
658 hdr.nops++;
659
660 encode_cb_nops(&hdr);
661}
662
663static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
664 struct xdr_stream *xdr,
665 struct nfsd4_callback *cb)
666{
667 struct nfs4_cb_compound_hdr hdr;
668 int status;
669
670 status = decode_cb_compound4res(xdr, &hdr);
671 if (unlikely(status))
672 return status;
673
674 if (cb) {
675 status = decode_cb_sequence4res(xdr, cb);
676 if (unlikely(status || cb->cb_seq_status))
677 return status;
678 }
679 return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
680}
681
626/* 682/*
627 * RPC procedure tables 683 * RPC procedure tables
628 */ 684 */
@@ -643,6 +699,7 @@ static struct rpc_procinfo nfs4_cb_procedures[] = {
643#ifdef CONFIG_NFSD_PNFS 699#ifdef CONFIG_NFSD_PNFS
644 PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout), 700 PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout),
645#endif 701#endif
702 PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
646}; 703};
647 704
648static struct rpc_version nfs_cb_version4 = { 705static struct rpc_version nfs_cb_version4 = {
@@ -862,7 +919,6 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
862 struct nfs4_client *clp = cb->cb_clp; 919 struct nfs4_client *clp = cb->cb_clp;
863 u32 minorversion = clp->cl_minorversion; 920 u32 minorversion = clp->cl_minorversion;
864 921
865 cb->cb_minorversion = minorversion;
866 /* 922 /*
867 * cb_seq_status is only set in decode_cb_sequence4res, 923 * cb_seq_status is only set in decode_cb_sequence4res,
868 * and so will remain 1 if an rpc level failure occurs. 924 * and so will remain 1 if an rpc level failure occurs.
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 2be9602b0221..42aace4fc4c8 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -174,7 +174,8 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
174 list_del_init(&ls->ls_perfile); 174 list_del_init(&ls->ls_perfile);
175 spin_unlock(&fp->fi_lock); 175 spin_unlock(&fp->fi_lock);
176 176
177 vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls); 177 if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
178 vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls);
178 fput(ls->ls_file); 179 fput(ls->ls_file);
179 180
180 if (ls->ls_recalled) 181 if (ls->ls_recalled)
@@ -189,6 +190,9 @@ nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
189 struct file_lock *fl; 190 struct file_lock *fl;
190 int status; 191 int status;
191 192
193 if (nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
194 return 0;
195
192 fl = locks_alloc_lock(); 196 fl = locks_alloc_lock();
193 if (!fl) 197 if (!fl)
194 return -ENOMEM; 198 return -ENOMEM;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 1fb222752b2b..abb09b580389 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1010,47 +1010,97 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1010} 1010}
1011 1011
1012static __be32 1012static __be32
1013nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1013nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1014 struct nfsd4_clone *clone) 1014 stateid_t *src_stateid, struct file **src,
1015 stateid_t *dst_stateid, struct file **dst)
1015{ 1016{
1016 struct file *src, *dst;
1017 __be32 status; 1017 __be32 status;
1018 1018
1019 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, 1019 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
1020 &clone->cl_src_stateid, RD_STATE, 1020 src_stateid, RD_STATE, src, NULL);
1021 &src, NULL);
1022 if (status) { 1021 if (status) {
1023 dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); 1022 dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
1024 goto out; 1023 goto out;
1025 } 1024 }
1026 1025
1027 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, 1026 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
1028 &clone->cl_dst_stateid, WR_STATE, 1027 dst_stateid, WR_STATE, dst, NULL);
1029 &dst, NULL);
1030 if (status) { 1028 if (status) {
1031 dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); 1029 dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
1032 goto out_put_src; 1030 goto out_put_src;
1033 } 1031 }
1034 1032
1035 /* fix up for NFS-specific error code */ 1033 /* fix up for NFS-specific error code */
1036 if (!S_ISREG(file_inode(src)->i_mode) || 1034 if (!S_ISREG(file_inode(*src)->i_mode) ||
1037 !S_ISREG(file_inode(dst)->i_mode)) { 1035 !S_ISREG(file_inode(*dst)->i_mode)) {
1038 status = nfserr_wrong_type; 1036 status = nfserr_wrong_type;
1039 goto out_put_dst; 1037 goto out_put_dst;
1040 } 1038 }
1041 1039
1040out:
1041 return status;
1042out_put_dst:
1043 fput(*dst);
1044out_put_src:
1045 fput(*src);
1046 goto out;
1047}
1048
1049static __be32
1050nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1051 struct nfsd4_clone *clone)
1052{
1053 struct file *src, *dst;
1054 __be32 status;
1055
1056 status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
1057 &clone->cl_dst_stateid, &dst);
1058 if (status)
1059 goto out;
1060
1042 status = nfsd4_clone_file_range(src, clone->cl_src_pos, 1061 status = nfsd4_clone_file_range(src, clone->cl_src_pos,
1043 dst, clone->cl_dst_pos, clone->cl_count); 1062 dst, clone->cl_dst_pos, clone->cl_count);
1044 1063
1045out_put_dst:
1046 fput(dst); 1064 fput(dst);
1047out_put_src:
1048 fput(src); 1065 fput(src);
1049out: 1066out:
1050 return status; 1067 return status;
1051} 1068}
1052 1069
1053static __be32 1070static __be32
1071nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1072 struct nfsd4_copy *copy)
1073{
1074 struct file *src, *dst;
1075 __be32 status;
1076 ssize_t bytes;
1077
1078 status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid, &src,
1079 &copy->cp_dst_stateid, &dst);
1080 if (status)
1081 goto out;
1082
1083 bytes = nfsd_copy_file_range(src, copy->cp_src_pos,
1084 dst, copy->cp_dst_pos, copy->cp_count);
1085
1086 if (bytes < 0)
1087 status = nfserrno(bytes);
1088 else {
1089 copy->cp_res.wr_bytes_written = bytes;
1090 copy->cp_res.wr_stable_how = NFS_UNSTABLE;
1091 copy->cp_consecutive = 1;
1092 copy->cp_synchronous = 1;
1093 gen_boot_verifier(&copy->cp_res.wr_verifier, SVC_NET(rqstp));
1094 status = nfs_ok;
1095 }
1096
1097 fput(src);
1098 fput(dst);
1099out:
1100 return status;
1101}
1102
1103static __be32
1054nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1104nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1055 struct nfsd4_fallocate *fallocate, int flags) 1105 struct nfsd4_fallocate *fallocate, int flags)
1056{ 1106{
@@ -1966,6 +2016,18 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd
1966 op_encode_channel_attrs_maxsz) * sizeof(__be32); 2016 op_encode_channel_attrs_maxsz) * sizeof(__be32);
1967} 2017}
1968 2018
2019static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
2020{
2021 return (op_encode_hdr_size +
2022 1 /* wr_callback */ +
2023 op_encode_stateid_maxsz /* wr_callback */ +
2024 2 /* wr_count */ +
2025 1 /* wr_committed */ +
2026 op_encode_verifier_maxsz +
2027 1 /* cr_consecutive */ +
2028 1 /* cr_synchronous */) * sizeof(__be32);
2029}
2030
1969#ifdef CONFIG_NFSD_PNFS 2031#ifdef CONFIG_NFSD_PNFS
1970/* 2032/*
1971 * At this stage we don't really know what layout driver will handle the request, 2033 * At this stage we don't really know what layout driver will handle the request,
@@ -2328,6 +2390,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
2328 .op_name = "OP_CLONE", 2390 .op_name = "OP_CLONE",
2329 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, 2391 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
2330 }, 2392 },
2393 [OP_COPY] = {
2394 .op_func = (nfsd4op_func)nfsd4_copy,
2395 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
2396 .op_name = "OP_COPY",
2397 .op_rsize_bop = (nfsd4op_rsize)nfsd4_copy_rsize,
2398 },
2331 [OP_SEEK] = { 2399 [OP_SEEK] = {
2332 .op_func = (nfsd4op_func)nfsd4_seek, 2400 .op_func = (nfsd4op_func)nfsd4_seek,
2333 .op_name = "OP_SEEK", 2401 .op_name = "OP_SEEK",
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 39bfaba9c99c..9752beb78659 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -99,6 +99,7 @@ static struct kmem_cache *odstate_slab;
99static void free_session(struct nfsd4_session *); 99static void free_session(struct nfsd4_session *);
100 100
101static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; 101static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
102static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
102 103
103static bool is_session_dead(struct nfsd4_session *ses) 104static bool is_session_dead(struct nfsd4_session *ses)
104{ 105{
@@ -210,6 +211,85 @@ static void nfsd4_put_session(struct nfsd4_session *ses)
210 spin_unlock(&nn->client_lock); 211 spin_unlock(&nn->client_lock);
211} 212}
212 213
214static struct nfsd4_blocked_lock *
215find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
216 struct nfsd_net *nn)
217{
218 struct nfsd4_blocked_lock *cur, *found = NULL;
219
220 spin_lock(&nn->client_lock);
221 list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
222 if (fh_match(fh, &cur->nbl_fh)) {
223 list_del_init(&cur->nbl_list);
224 list_del_init(&cur->nbl_lru);
225 found = cur;
226 break;
227 }
228 }
229 spin_unlock(&nn->client_lock);
230 if (found)
231 posix_unblock_lock(&found->nbl_lock);
232 return found;
233}
234
235static struct nfsd4_blocked_lock *
236find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
237 struct nfsd_net *nn)
238{
239 struct nfsd4_blocked_lock *nbl;
240
241 nbl = find_blocked_lock(lo, fh, nn);
242 if (!nbl) {
243 nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
244 if (nbl) {
245 fh_copy_shallow(&nbl->nbl_fh, fh);
246 locks_init_lock(&nbl->nbl_lock);
247 nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
248 &nfsd4_cb_notify_lock_ops,
249 NFSPROC4_CLNT_CB_NOTIFY_LOCK);
250 }
251 }
252 return nbl;
253}
254
255static void
256free_blocked_lock(struct nfsd4_blocked_lock *nbl)
257{
258 locks_release_private(&nbl->nbl_lock);
259 kfree(nbl);
260}
261
262static int
263nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
264{
265 /*
266 * Since this is just an optimization, we don't try very hard if it
267 * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
268 * just quit trying on anything else.
269 */
270 switch (task->tk_status) {
271 case -NFS4ERR_DELAY:
272 rpc_delay(task, 1 * HZ);
273 return 0;
274 default:
275 return 1;
276 }
277}
278
279static void
280nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
281{
282 struct nfsd4_blocked_lock *nbl = container_of(cb,
283 struct nfsd4_blocked_lock, nbl_cb);
284
285 free_blocked_lock(nbl);
286}
287
288static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
289 .done = nfsd4_cb_notify_lock_done,
290 .release = nfsd4_cb_notify_lock_release,
291};
292
213static inline struct nfs4_stateowner * 293static inline struct nfs4_stateowner *
214nfs4_get_stateowner(struct nfs4_stateowner *sop) 294nfs4_get_stateowner(struct nfs4_stateowner *sop)
215{ 295{
@@ -3224,9 +3304,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
3224 goto out; 3304 goto out;
3225 /* cases below refer to rfc 3530 section 14.2.34: */ 3305 /* cases below refer to rfc 3530 section 14.2.34: */
3226 if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) { 3306 if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) {
3227 if (conf && !unconf) /* case 2: probable retransmit */ 3307 if (conf && same_verf(&confirm, &conf->cl_confirm)) {
3308 /* case 2: probable retransmit */
3228 status = nfs_ok; 3309 status = nfs_ok;
3229 else /* case 4: client hasn't noticed we rebooted yet? */ 3310 } else /* case 4: client hasn't noticed we rebooted yet? */
3230 status = nfserr_stale_clientid; 3311 status = nfserr_stale_clientid;
3231 goto out; 3312 goto out;
3232 } 3313 }
@@ -4410,9 +4491,11 @@ out:
4410 * To finish the open response, we just need to set the rflags. 4491 * To finish the open response, we just need to set the rflags.
4411 */ 4492 */
4412 open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; 4493 open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
4413 if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) && 4494 if (nfsd4_has_session(&resp->cstate))
4414 !nfsd4_has_session(&resp->cstate)) 4495 open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK;
4496 else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED))
4415 open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; 4497 open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
4498
4416 if (dp) 4499 if (dp)
4417 nfs4_put_stid(&dp->dl_stid); 4500 nfs4_put_stid(&dp->dl_stid);
4418 if (stp) 4501 if (stp)
@@ -4501,6 +4584,7 @@ nfs4_laundromat(struct nfsd_net *nn)
4501 struct nfs4_openowner *oo; 4584 struct nfs4_openowner *oo;
4502 struct nfs4_delegation *dp; 4585 struct nfs4_delegation *dp;
4503 struct nfs4_ol_stateid *stp; 4586 struct nfs4_ol_stateid *stp;
4587 struct nfsd4_blocked_lock *nbl;
4504 struct list_head *pos, *next, reaplist; 4588 struct list_head *pos, *next, reaplist;
4505 time_t cutoff = get_seconds() - nn->nfsd4_lease; 4589 time_t cutoff = get_seconds() - nn->nfsd4_lease;
4506 time_t t, new_timeo = nn->nfsd4_lease; 4590 time_t t, new_timeo = nn->nfsd4_lease;
@@ -4569,6 +4653,41 @@ nfs4_laundromat(struct nfsd_net *nn)
4569 } 4653 }
4570 spin_unlock(&nn->client_lock); 4654 spin_unlock(&nn->client_lock);
4571 4655
4656 /*
4657 * It's possible for a client to try and acquire an already held lock
4658 * that is being held for a long time, and then lose interest in it.
4659 * So, we clean out any un-revisited request after a lease period
4660 * under the assumption that the client is no longer interested.
4661 *
4662 * RFC5661, sec. 9.6 states that the client must not rely on getting
4663 * notifications and must continue to poll for locks, even when the
4664 * server supports them. Thus this shouldn't lead to clients blocking
4665 * indefinitely once the lock does become free.
4666 */
4667 BUG_ON(!list_empty(&reaplist));
4668 spin_lock(&nn->client_lock);
4669 while (!list_empty(&nn->blocked_locks_lru)) {
4670 nbl = list_first_entry(&nn->blocked_locks_lru,
4671 struct nfsd4_blocked_lock, nbl_lru);
4672 if (time_after((unsigned long)nbl->nbl_time,
4673 (unsigned long)cutoff)) {
4674 t = nbl->nbl_time - cutoff;
4675 new_timeo = min(new_timeo, t);
4676 break;
4677 }
4678 list_move(&nbl->nbl_lru, &reaplist);
4679 list_del_init(&nbl->nbl_list);
4680 }
4681 spin_unlock(&nn->client_lock);
4682
4683 while (!list_empty(&reaplist)) {
4684 nbl = list_first_entry(&nn->blocked_locks_lru,
4685 struct nfsd4_blocked_lock, nbl_lru);
4686 list_del_init(&nbl->nbl_lru);
4687 posix_unblock_lock(&nbl->nbl_lock);
4688 free_blocked_lock(nbl);
4689 }
4690
4572 new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); 4691 new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
4573 return new_timeo; 4692 return new_timeo;
4574} 4693}
@@ -5309,7 +5428,31 @@ nfsd4_fl_put_owner(fl_owner_t owner)
5309 nfs4_put_stateowner(&lo->lo_owner); 5428 nfs4_put_stateowner(&lo->lo_owner);
5310} 5429}
5311 5430
5431static void
5432nfsd4_lm_notify(struct file_lock *fl)
5433{
5434 struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner;
5435 struct net *net = lo->lo_owner.so_client->net;
5436 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
5437 struct nfsd4_blocked_lock *nbl = container_of(fl,
5438 struct nfsd4_blocked_lock, nbl_lock);
5439 bool queue = false;
5440
5441 /* An empty list means that something else is going to be using it */
5442 spin_lock(&nn->client_lock);
5443 if (!list_empty(&nbl->nbl_list)) {
5444 list_del_init(&nbl->nbl_list);
5445 list_del_init(&nbl->nbl_lru);
5446 queue = true;
5447 }
5448 spin_unlock(&nn->client_lock);
5449
5450 if (queue)
5451 nfsd4_run_cb(&nbl->nbl_cb);
5452}
5453
5312static const struct lock_manager_operations nfsd_posix_mng_ops = { 5454static const struct lock_manager_operations nfsd_posix_mng_ops = {
5455 .lm_notify = nfsd4_lm_notify,
5313 .lm_get_owner = nfsd4_fl_get_owner, 5456 .lm_get_owner = nfsd4_fl_get_owner,
5314 .lm_put_owner = nfsd4_fl_put_owner, 5457 .lm_put_owner = nfsd4_fl_put_owner,
5315}; 5458};
@@ -5407,6 +5550,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
5407 lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); 5550 lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
5408 if (!lo) 5551 if (!lo)
5409 return NULL; 5552 return NULL;
5553 INIT_LIST_HEAD(&lo->lo_blocked);
5410 INIT_LIST_HEAD(&lo->lo_owner.so_stateids); 5554 INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
5411 lo->lo_owner.so_is_open_owner = 0; 5555 lo->lo_owner.so_is_open_owner = 0;
5412 lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; 5556 lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
@@ -5588,12 +5732,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5588 struct nfs4_ol_stateid *open_stp = NULL; 5732 struct nfs4_ol_stateid *open_stp = NULL;
5589 struct nfs4_file *fp; 5733 struct nfs4_file *fp;
5590 struct file *filp = NULL; 5734 struct file *filp = NULL;
5735 struct nfsd4_blocked_lock *nbl = NULL;
5591 struct file_lock *file_lock = NULL; 5736 struct file_lock *file_lock = NULL;
5592 struct file_lock *conflock = NULL; 5737 struct file_lock *conflock = NULL;
5593 __be32 status = 0; 5738 __be32 status = 0;
5594 int lkflg; 5739 int lkflg;
5595 int err; 5740 int err;
5596 bool new = false; 5741 bool new = false;
5742 unsigned char fl_type;
5743 unsigned int fl_flags = FL_POSIX;
5597 struct net *net = SVC_NET(rqstp); 5744 struct net *net = SVC_NET(rqstp);
5598 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 5745 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
5599 5746
@@ -5658,46 +5805,55 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5658 if (!locks_in_grace(net) && lock->lk_reclaim) 5805 if (!locks_in_grace(net) && lock->lk_reclaim)
5659 goto out; 5806 goto out;
5660 5807
5661 file_lock = locks_alloc_lock();
5662 if (!file_lock) {
5663 dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
5664 status = nfserr_jukebox;
5665 goto out;
5666 }
5667
5668 fp = lock_stp->st_stid.sc_file; 5808 fp = lock_stp->st_stid.sc_file;
5669 switch (lock->lk_type) { 5809 switch (lock->lk_type) {
5670 case NFS4_READ_LT:
5671 case NFS4_READW_LT: 5810 case NFS4_READW_LT:
5811 if (nfsd4_has_session(cstate))
5812 fl_flags |= FL_SLEEP;
5813 /* Fallthrough */
5814 case NFS4_READ_LT:
5672 spin_lock(&fp->fi_lock); 5815 spin_lock(&fp->fi_lock);
5673 filp = find_readable_file_locked(fp); 5816 filp = find_readable_file_locked(fp);
5674 if (filp) 5817 if (filp)
5675 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); 5818 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
5676 spin_unlock(&fp->fi_lock); 5819 spin_unlock(&fp->fi_lock);
5677 file_lock->fl_type = F_RDLCK; 5820 fl_type = F_RDLCK;
5678 break; 5821 break;
5679 case NFS4_WRITE_LT:
5680 case NFS4_WRITEW_LT: 5822 case NFS4_WRITEW_LT:
5823 if (nfsd4_has_session(cstate))
5824 fl_flags |= FL_SLEEP;
5825 /* Fallthrough */
5826 case NFS4_WRITE_LT:
5681 spin_lock(&fp->fi_lock); 5827 spin_lock(&fp->fi_lock);
5682 filp = find_writeable_file_locked(fp); 5828 filp = find_writeable_file_locked(fp);
5683 if (filp) 5829 if (filp)
5684 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); 5830 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
5685 spin_unlock(&fp->fi_lock); 5831 spin_unlock(&fp->fi_lock);
5686 file_lock->fl_type = F_WRLCK; 5832 fl_type = F_WRLCK;
5687 break; 5833 break;
5688 default: 5834 default:
5689 status = nfserr_inval; 5835 status = nfserr_inval;
5690 goto out; 5836 goto out;
5691 } 5837 }
5838
5692 if (!filp) { 5839 if (!filp) {
5693 status = nfserr_openmode; 5840 status = nfserr_openmode;
5694 goto out; 5841 goto out;
5695 } 5842 }
5696 5843
5844 nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
5845 if (!nbl) {
5846 dprintk("NFSD: %s: unable to allocate block!\n", __func__);
5847 status = nfserr_jukebox;
5848 goto out;
5849 }
5850
5851 file_lock = &nbl->nbl_lock;
5852 file_lock->fl_type = fl_type;
5697 file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner)); 5853 file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
5698 file_lock->fl_pid = current->tgid; 5854 file_lock->fl_pid = current->tgid;
5699 file_lock->fl_file = filp; 5855 file_lock->fl_file = filp;
5700 file_lock->fl_flags = FL_POSIX; 5856 file_lock->fl_flags = fl_flags;
5701 file_lock->fl_lmops = &nfsd_posix_mng_ops; 5857 file_lock->fl_lmops = &nfsd_posix_mng_ops;
5702 file_lock->fl_start = lock->lk_offset; 5858 file_lock->fl_start = lock->lk_offset;
5703 file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length); 5859 file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
@@ -5710,18 +5866,29 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5710 goto out; 5866 goto out;
5711 } 5867 }
5712 5868
5869 if (fl_flags & FL_SLEEP) {
5870 nbl->nbl_time = jiffies;
5871 spin_lock(&nn->client_lock);
5872 list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
5873 list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
5874 spin_unlock(&nn->client_lock);
5875 }
5876
5713 err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); 5877 err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
5714 switch (-err) { 5878 switch (err) {
5715 case 0: /* success! */ 5879 case 0: /* success! */
5716 nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); 5880 nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
5717 status = 0; 5881 status = 0;
5718 break; 5882 break;
5719 case (EAGAIN): /* conflock holds conflicting lock */ 5883 case FILE_LOCK_DEFERRED:
5884 nbl = NULL;
5885 /* Fallthrough */
5886 case -EAGAIN: /* conflock holds conflicting lock */
5720 status = nfserr_denied; 5887 status = nfserr_denied;
5721 dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); 5888 dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
5722 nfs4_set_lock_denied(conflock, &lock->lk_denied); 5889 nfs4_set_lock_denied(conflock, &lock->lk_denied);
5723 break; 5890 break;
5724 case (EDEADLK): 5891 case -EDEADLK:
5725 status = nfserr_deadlock; 5892 status = nfserr_deadlock;
5726 break; 5893 break;
5727 default: 5894 default:
@@ -5730,6 +5897,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5730 break; 5897 break;
5731 } 5898 }
5732out: 5899out:
5900 if (nbl) {
5901 /* dequeue it if we queued it before */
5902 if (fl_flags & FL_SLEEP) {
5903 spin_lock(&nn->client_lock);
5904 list_del_init(&nbl->nbl_list);
5905 list_del_init(&nbl->nbl_lru);
5906 spin_unlock(&nn->client_lock);
5907 }
5908 free_blocked_lock(nbl);
5909 }
5733 if (filp) 5910 if (filp)
5734 fput(filp); 5911 fput(filp);
5735 if (lock_stp) { 5912 if (lock_stp) {
@@ -5753,8 +5930,6 @@ out:
5753 if (open_stp) 5930 if (open_stp)
5754 nfs4_put_stid(&open_stp->st_stid); 5931 nfs4_put_stid(&open_stp->st_stid);
5755 nfsd4_bump_seqid(cstate, status); 5932 nfsd4_bump_seqid(cstate, status);
5756 if (file_lock)
5757 locks_free_lock(file_lock);
5758 if (conflock) 5933 if (conflock)
5759 locks_free_lock(conflock); 5934 locks_free_lock(conflock);
5760 return status; 5935 return status;
@@ -6768,6 +6943,7 @@ static int nfs4_state_create_net(struct net *net)
6768 INIT_LIST_HEAD(&nn->client_lru); 6943 INIT_LIST_HEAD(&nn->client_lru);
6769 INIT_LIST_HEAD(&nn->close_lru); 6944 INIT_LIST_HEAD(&nn->close_lru);
6770 INIT_LIST_HEAD(&nn->del_recall_lru); 6945 INIT_LIST_HEAD(&nn->del_recall_lru);
6946 INIT_LIST_HEAD(&nn->blocked_locks_lru);
6771 spin_lock_init(&nn->client_lock); 6947 spin_lock_init(&nn->client_lock);
6772 6948
6773 INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); 6949 INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
@@ -6865,6 +7041,7 @@ nfs4_state_shutdown_net(struct net *net)
6865 struct nfs4_delegation *dp = NULL; 7041 struct nfs4_delegation *dp = NULL;
6866 struct list_head *pos, *next, reaplist; 7042 struct list_head *pos, *next, reaplist;
6867 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 7043 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
7044 struct nfsd4_blocked_lock *nbl;
6868 7045
6869 cancel_delayed_work_sync(&nn->laundromat_work); 7046 cancel_delayed_work_sync(&nn->laundromat_work);
6870 locks_end_grace(&nn->nfsd4_manager); 7047 locks_end_grace(&nn->nfsd4_manager);
@@ -6885,6 +7062,24 @@ nfs4_state_shutdown_net(struct net *net)
6885 nfs4_put_stid(&dp->dl_stid); 7062 nfs4_put_stid(&dp->dl_stid);
6886 } 7063 }
6887 7064
7065 BUG_ON(!list_empty(&reaplist));
7066 spin_lock(&nn->client_lock);
7067 while (!list_empty(&nn->blocked_locks_lru)) {
7068 nbl = list_first_entry(&nn->blocked_locks_lru,
7069 struct nfsd4_blocked_lock, nbl_lru);
7070 list_move(&nbl->nbl_lru, &reaplist);
7071 list_del_init(&nbl->nbl_list);
7072 }
7073 spin_unlock(&nn->client_lock);
7074
7075 while (!list_empty(&reaplist)) {
7076 nbl = list_first_entry(&nn->blocked_locks_lru,
7077 struct nfsd4_blocked_lock, nbl_lru);
7078 list_del_init(&nbl->nbl_lru);
7079 posix_unblock_lock(&nbl->nbl_lock);
7080 free_blocked_lock(nbl);
7081 }
7082
6888 nfsd4_client_tracking_exit(net); 7083 nfsd4_client_tracking_exit(net);
6889 nfs4_state_destroy_net(net); 7084 nfs4_state_destroy_net(net);
6890} 7085}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0aa0236a1429..c2d2895a1ec1 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1694,6 +1694,30 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
1694} 1694}
1695 1695
1696static __be32 1696static __be32
1697nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
1698{
1699 DECODE_HEAD;
1700 unsigned int tmp;
1701
1702 status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
1703 if (status)
1704 return status;
1705 status = nfsd4_decode_stateid(argp, &copy->cp_dst_stateid);
1706 if (status)
1707 return status;
1708
1709 READ_BUF(8 + 8 + 8 + 4 + 4 + 4);
1710 p = xdr_decode_hyper(p, &copy->cp_src_pos);
1711 p = xdr_decode_hyper(p, &copy->cp_dst_pos);
1712 p = xdr_decode_hyper(p, &copy->cp_count);
1713 copy->cp_consecutive = be32_to_cpup(p++);
1714 copy->cp_synchronous = be32_to_cpup(p++);
1715 tmp = be32_to_cpup(p); /* Source server list not supported */
1716
1717 DECODE_TAIL;
1718}
1719
1720static __be32
1697nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) 1721nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
1698{ 1722{
1699 DECODE_HEAD; 1723 DECODE_HEAD;
@@ -1793,7 +1817,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
1793 1817
1794 /* new operations for NFSv4.2 */ 1818 /* new operations for NFSv4.2 */
1795 [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, 1819 [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
1796 [OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp, 1820 [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
1797 [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp, 1821 [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
1798 [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, 1822 [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
1799 [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, 1823 [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -4062,7 +4086,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
4062 u32 starting_len = xdr->buf->len, needed_len; 4086 u32 starting_len = xdr->buf->len, needed_len;
4063 __be32 *p; 4087 __be32 *p;
4064 4088
4065 dprintk("%s: err %d\n", __func__, nfserr); 4089 dprintk("%s: err %d\n", __func__, be32_to_cpu(nfserr));
4066 if (nfserr) 4090 if (nfserr)
4067 goto out; 4091 goto out;
4068 4092
@@ -4202,6 +4226,41 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
4202#endif /* CONFIG_NFSD_PNFS */ 4226#endif /* CONFIG_NFSD_PNFS */
4203 4227
4204static __be32 4228static __be32
4229nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write)
4230{
4231 __be32 *p;
4232
4233 p = xdr_reserve_space(&resp->xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
4234 if (!p)
4235 return nfserr_resource;
4236
4237 *p++ = cpu_to_be32(0);
4238 p = xdr_encode_hyper(p, write->wr_bytes_written);
4239 *p++ = cpu_to_be32(write->wr_stable_how);
4240 p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
4241 NFS4_VERIFIER_SIZE);
4242 return nfs_ok;
4243}
4244
4245static __be32
4246nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
4247 struct nfsd4_copy *copy)
4248{
4249 __be32 *p;
4250
4251 if (!nfserr) {
4252 nfserr = nfsd42_encode_write_res(resp, &copy->cp_res);
4253 if (nfserr)
4254 return nfserr;
4255
4256 p = xdr_reserve_space(&resp->xdr, 4 + 4);
4257 *p++ = cpu_to_be32(copy->cp_consecutive);
4258 *p++ = cpu_to_be32(copy->cp_synchronous);
4259 }
4260 return nfserr;
4261}
4262
4263static __be32
4205nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, 4264nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
4206 struct nfsd4_seek *seek) 4265 struct nfsd4_seek *seek)
4207{ 4266{
@@ -4300,7 +4359,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
4300 4359
4301 /* NFSv4.2 operations */ 4360 /* NFSv4.2 operations */
4302 [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, 4361 [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
4303 [OP_COPY] = (nfsd4_enc)nfsd4_encode_noop, 4362 [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
4304 [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop, 4363 [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop,
4305 [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, 4364 [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
4306 [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, 4365 [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 65ad0165a94f..36b2af931e06 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1216,6 +1216,8 @@ static __net_init int nfsd_init_net(struct net *net)
1216 goto out_idmap_error; 1216 goto out_idmap_error;
1217 nn->nfsd4_lease = 90; /* default lease time */ 1217 nn->nfsd4_lease = 90; /* default lease time */
1218 nn->nfsd4_grace = 90; 1218 nn->nfsd4_grace = 90;
1219 nn->clverifier_counter = prandom_u32();
1220 nn->clientid_counter = prandom_u32();
1219 return 0; 1221 return 0;
1220 1222
1221out_idmap_error: 1223out_idmap_error:
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 08188743db53..010aff5c5a79 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -789,6 +789,7 @@ nfserrno (int errno)
789 { nfserr_toosmall, -ETOOSMALL }, 789 { nfserr_toosmall, -ETOOSMALL },
790 { nfserr_serverfault, -ESERVERFAULT }, 790 { nfserr_serverfault, -ESERVERFAULT },
791 { nfserr_serverfault, -ENFILE }, 791 { nfserr_serverfault, -ENFILE },
792 { nfserr_io, -EUCLEAN },
792 }; 793 };
793 int i; 794 int i;
794 795
@@ -796,7 +797,7 @@ nfserrno (int errno)
796 if (nfs_errtbl[i].syserr == errno) 797 if (nfs_errtbl[i].syserr == errno)
797 return nfs_errtbl[i].nfserr; 798 return nfs_errtbl[i].nfserr;
798 } 799 }
799 WARN(1, "nfsd: non-standard errno: %d\n", errno); 800 WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno);
800 return nfserr_io; 801 return nfserr_io;
801} 802}
802 803
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 45007acaf364..a2b65fc56dd6 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -366,14 +366,21 @@ static struct notifier_block nfsd_inet6addr_notifier = {
366}; 366};
367#endif 367#endif
368 368
369/* Only used under nfsd_mutex, so this atomic may be overkill: */
370static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0);
371
369static void nfsd_last_thread(struct svc_serv *serv, struct net *net) 372static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
370{ 373{
371 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 374 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
372 375
373 unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); 376 /* check if the notifier still has clients */
377 if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
378 unregister_inetaddr_notifier(&nfsd_inetaddr_notifier);
374#if IS_ENABLED(CONFIG_IPV6) 379#if IS_ENABLED(CONFIG_IPV6)
375 unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); 380 unregister_inet6addr_notifier(&nfsd_inet6addr_notifier);
376#endif 381#endif
382 }
383
377 /* 384 /*
378 * write_ports can create the server without actually starting 385 * write_ports can create the server without actually starting
379 * any threads--if we get shut down before any threads are 386 * any threads--if we get shut down before any threads are
@@ -488,10 +495,13 @@ int nfsd_create_serv(struct net *net)
488 } 495 }
489 496
490 set_max_drc(); 497 set_max_drc();
491 register_inetaddr_notifier(&nfsd_inetaddr_notifier); 498 /* check if the notifier is already set */
499 if (atomic_inc_return(&nfsd_notifier_refcount) == 1) {
500 register_inetaddr_notifier(&nfsd_inetaddr_notifier);
492#if IS_ENABLED(CONFIG_IPV6) 501#if IS_ENABLED(CONFIG_IPV6)
493 register_inet6addr_notifier(&nfsd_inet6addr_notifier); 502 register_inet6addr_notifier(&nfsd_inet6addr_notifier);
494#endif 503#endif
504 }
495 do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ 505 do_gettimeofday(&nn->nfssvc_boot); /* record boot time */
496 return 0; 506 return 0;
497} 507}
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index 0c2a716e8741..d27a5aa60022 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -19,6 +19,7 @@ struct nfsd4_deviceid_map {
19 19
20struct nfsd4_layout_ops { 20struct nfsd4_layout_ops {
21 u32 notify_types; 21 u32 notify_types;
22 bool disable_recalls;
22 23
23 __be32 (*proc_getdeviceinfo)(struct super_block *sb, 24 __be32 (*proc_getdeviceinfo)(struct super_block *sb,
24 struct svc_rqst *rqstp, 25 struct svc_rqst *rqstp,
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index b95adf9a1595..c9399366f9df 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -63,7 +63,6 @@ typedef struct {
63 63
64struct nfsd4_callback { 64struct nfsd4_callback {
65 struct nfs4_client *cb_clp; 65 struct nfs4_client *cb_clp;
66 u32 cb_minorversion;
67 struct rpc_message cb_msg; 66 struct rpc_message cb_msg;
68 const struct nfsd4_callback_ops *cb_ops; 67 const struct nfsd4_callback_ops *cb_ops;
69 struct work_struct cb_work; 68 struct work_struct cb_work;
@@ -441,11 +440,11 @@ struct nfs4_openowner {
441/* 440/*
442 * Represents a generic "lockowner". Similar to an openowner. References to it 441 * Represents a generic "lockowner". Similar to an openowner. References to it
443 * are held by the lock stateids that are created on its behalf. This object is 442 * are held by the lock stateids that are created on its behalf. This object is
444 * a superset of the nfs4_stateowner struct (or would be if it needed any extra 443 * a superset of the nfs4_stateowner struct.
445 * fields).
446 */ 444 */
447struct nfs4_lockowner { 445struct nfs4_lockowner {
448 struct nfs4_stateowner lo_owner; /* must be first element */ 446 struct nfs4_stateowner lo_owner; /* must be first element */
447 struct list_head lo_blocked; /* blocked file_locks */
449}; 448};
450 449
451static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) 450static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
@@ -572,6 +571,7 @@ enum nfsd4_cb_op {
572 NFSPROC4_CLNT_CB_RECALL, 571 NFSPROC4_CLNT_CB_RECALL,
573 NFSPROC4_CLNT_CB_LAYOUT, 572 NFSPROC4_CLNT_CB_LAYOUT,
574 NFSPROC4_CLNT_CB_SEQUENCE, 573 NFSPROC4_CLNT_CB_SEQUENCE,
574 NFSPROC4_CLNT_CB_NOTIFY_LOCK,
575}; 575};
576 576
577/* Returns true iff a is later than b: */ 577/* Returns true iff a is later than b: */
@@ -580,6 +580,20 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
580 return (s32)(a->si_generation - b->si_generation) > 0; 580 return (s32)(a->si_generation - b->si_generation) > 0;
581} 581}
582 582
583/*
584 * When a client tries to get a lock on a file, we set one of these objects
585 * on the blocking lock. When the lock becomes free, we can then issue a
586 * CB_NOTIFY_LOCK to the server.
587 */
588struct nfsd4_blocked_lock {
589 struct list_head nbl_list;
590 struct list_head nbl_lru;
591 unsigned long nbl_time;
592 struct file_lock nbl_lock;
593 struct knfsd_fh nbl_fh;
594 struct nfsd4_callback nbl_cb;
595};
596
583struct nfsd4_compound_state; 597struct nfsd4_compound_state;
584struct nfsd_net; 598struct nfsd_net;
585 599
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ff476e654b8f..8ca642fe9b21 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -513,6 +513,22 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
513 count)); 513 count));
514} 514}
515 515
516ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
517 u64 dst_pos, u64 count)
518{
519
520 /*
521 * Limit copy to 4MB to prevent indefinitely blocking an nfsd
522 * thread and client rpc slot. The choice of 4MB is somewhat
523 * arbitrary. We might instead base this on r/wsize, or make it
524 * tunable, or use a time instead of a byte limit, or implement
525 * asynchronous copy. In theory a client could also recognize a
526 * limit like this and pipeline multiple COPY requests.
527 */
528 count = min_t(u64, count, 1 << 22);
529 return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
530}
531
516__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, 532__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
517 struct file *file, loff_t offset, loff_t len, 533 struct file *file, loff_t offset, loff_t len,
518 int flags) 534 int flags)
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 3cbb1b33777b..0bf9e7bf5800 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -96,6 +96,8 @@ __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
96 struct svc_fh *res); 96 struct svc_fh *res);
97__be32 nfsd_link(struct svc_rqst *, struct svc_fh *, 97__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
98 char *, int, struct svc_fh *); 98 char *, int, struct svc_fh *);
99ssize_t nfsd_copy_file_range(struct file *, u64,
100 struct file *, u64, u64);
99__be32 nfsd_rename(struct svc_rqst *, 101__be32 nfsd_rename(struct svc_rqst *,
100 struct svc_fh *, char *, int, 102 struct svc_fh *, char *, int,
101 struct svc_fh *, char *, int); 103 struct svc_fh *, char *, int);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index beea0c5edc51..8fda4abdf3b1 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -503,6 +503,28 @@ struct nfsd4_clone {
503 u64 cl_count; 503 u64 cl_count;
504}; 504};
505 505
506struct nfsd42_write_res {
507 u64 wr_bytes_written;
508 u32 wr_stable_how;
509 nfs4_verifier wr_verifier;
510};
511
512struct nfsd4_copy {
513 /* request */
514 stateid_t cp_src_stateid;
515 stateid_t cp_dst_stateid;
516 u64 cp_src_pos;
517 u64 cp_dst_pos;
518 u64 cp_count;
519
520 /* both */
521 bool cp_consecutive;
522 bool cp_synchronous;
523
524 /* response */
525 struct nfsd42_write_res cp_res;
526};
527
506struct nfsd4_seek { 528struct nfsd4_seek {
507 /* request */ 529 /* request */
508 stateid_t seek_stateid; 530 stateid_t seek_stateid;
@@ -568,6 +590,7 @@ struct nfsd4_op {
568 struct nfsd4_fallocate allocate; 590 struct nfsd4_fallocate allocate;
569 struct nfsd4_fallocate deallocate; 591 struct nfsd4_fallocate deallocate;
570 struct nfsd4_clone clone; 592 struct nfsd4_clone clone;
593 struct nfsd4_copy copy;
571 struct nfsd4_seek seek; 594 struct nfsd4_seek seek;
572 } u; 595 } u;
573 struct nfs4_replay * replay; 596 struct nfs4_replay * replay;
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index c47f6fdb111a..49b719dfef95 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -28,3 +28,12 @@
28#define NFS4_dec_cb_layout_sz (cb_compound_dec_hdr_sz + \ 28#define NFS4_dec_cb_layout_sz (cb_compound_dec_hdr_sz + \
29 cb_sequence_dec_sz + \ 29 cb_sequence_dec_sz + \
30 op_dec_sz) 30 op_dec_sz)
31
32#define NFS4_enc_cb_notify_lock_sz (cb_compound_enc_hdr_sz + \
33 cb_sequence_enc_sz + \
34 2 + 1 + \
35 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
36 enc_nfs4_fh_sz)
37#define NFS4_dec_cb_notify_lock_sz (cb_compound_dec_hdr_sz + \
38 cb_sequence_dec_sz + \
39 op_dec_sz)
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index b03c0625fa6e..5ab958cdc50b 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -157,12 +157,13 @@ struct fid {
157 * @fh_to_dentry is given a &struct super_block (@sb) and a file handle 157 * @fh_to_dentry is given a &struct super_block (@sb) and a file handle
158 * fragment (@fh, @fh_len). It should return a &struct dentry which refers 158 * fragment (@fh, @fh_len). It should return a &struct dentry which refers
159 * to the same file that the file handle fragment refers to. If it cannot, 159 * to the same file that the file handle fragment refers to. If it cannot,
160 * it should return a %NULL pointer if the file was found but no acceptable 160 * it should return a %NULL pointer if the file cannot be found, or an
161 * &dentries were available, or an %ERR_PTR error code indicating why it 161 * %ERR_PTR error code of %ENOMEM if a memory allocation failure occurred.
162 * couldn't be found (e.g. %ENOENT or %ENOMEM). Any suitable dentry can be 162 * Any other error code is treated like %NULL, and will cause an %ESTALE error
163 * returned including, if necessary, a new dentry created with d_alloc_root. 163 * for callers of exportfs_decode_fh().
164 * The caller can then find any other extant dentries by following the 164 * Any suitable dentry can be returned including, if necessary, a new dentry
165 * d_alias links. 165 * created with d_alloc_root. The caller can then find any other extant
166 * dentries by following the d_alias links.
166 * 167 *
167 * fh_to_parent: 168 * fh_to_parent:
168 * Same as @fh_to_dentry, except that it returns a pointer to the parent 169 * Same as @fh_to_dentry, except that it returns a pointer to the parent
diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
index 3b1ff38f0c37..a7da6bf56610 100644
--- a/include/linux/sunrpc/rpc_rdma.h
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -41,6 +41,7 @@
41#define _LINUX_SUNRPC_RPC_RDMA_H 41#define _LINUX_SUNRPC_RPC_RDMA_H
42 42
43#include <linux/types.h> 43#include <linux/types.h>
44#include <linux/bitops.h>
44 45
45#define RPCRDMA_VERSION 1 46#define RPCRDMA_VERSION 1
46#define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION) 47#define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION)
@@ -129,4 +130,38 @@ enum rpcrdma_proc {
129#define rdma_done cpu_to_be32(RDMA_DONE) 130#define rdma_done cpu_to_be32(RDMA_DONE)
130#define rdma_error cpu_to_be32(RDMA_ERROR) 131#define rdma_error cpu_to_be32(RDMA_ERROR)
131 132
133/*
134 * Private extension to RPC-over-RDMA Version One.
135 * Message passed during RDMA-CM connection set-up.
136 *
137 * Add new fields at the end, and don't permute existing
138 * fields.
139 */
140struct rpcrdma_connect_private {
141 __be32 cp_magic;
142 u8 cp_version;
143 u8 cp_flags;
144 u8 cp_send_size;
145 u8 cp_recv_size;
146} __packed;
147
148#define rpcrdma_cmp_magic __cpu_to_be32(0xf6ab0e18)
149
150enum {
151 RPCRDMA_CMP_VERSION = 1,
152 RPCRDMA_CMP_F_SND_W_INV_OK = BIT(0),
153};
154
155static inline u8
156rpcrdma_encode_buffer_size(unsigned int size)
157{
158 return (size >> 10) - 1;
159}
160
161static inline unsigned int
162rpcrdma_decode_buffer_size(u8 val)
163{
164 return ((unsigned int)val + 1) << 10;
165}
166
132#endif /* _LINUX_SUNRPC_RPC_RDMA_H */ 167#endif /* _LINUX_SUNRPC_RPC_RDMA_H */
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index d6917b896d3a..cc3ae16eac68 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -86,6 +86,7 @@ struct svc_rdma_op_ctxt {
86 unsigned long flags; 86 unsigned long flags;
87 enum dma_data_direction direction; 87 enum dma_data_direction direction;
88 int count; 88 int count;
89 unsigned int mapped_sges;
89 struct ib_sge sge[RPCSVC_MAXPAGES]; 90 struct ib_sge sge[RPCSVC_MAXPAGES];
90 struct page *pages[RPCSVC_MAXPAGES]; 91 struct page *pages[RPCSVC_MAXPAGES];
91}; 92};
@@ -136,6 +137,7 @@ struct svcxprt_rdma {
136 int sc_ord; /* RDMA read limit */ 137 int sc_ord; /* RDMA read limit */
137 int sc_max_sge; 138 int sc_max_sge;
138 int sc_max_sge_rd; /* max sge for read target */ 139 int sc_max_sge_rd; /* max sge for read target */
140 bool sc_snd_w_inv; /* OK to use Send With Invalidate */
139 141
140 atomic_t sc_sq_count; /* Number of SQ WR on queue */ 142 atomic_t sc_sq_count; /* Number of SQ WR on queue */
141 unsigned int sc_sq_depth; /* Depth of SQ */ 143 unsigned int sc_sq_depth; /* Depth of SQ */
@@ -193,6 +195,14 @@ struct svcxprt_rdma {
193 195
194#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD 196#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
195 197
198/* Track DMA maps for this transport and context */
199static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma,
200 struct svc_rdma_op_ctxt *ctxt)
201{
202 ctxt->mapped_sges++;
203 atomic_inc(&rdma->sc_dma_used);
204}
205
196/* svc_rdma_backchannel.c */ 206/* svc_rdma_backchannel.c */
197extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, 207extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
198 struct rpcrdma_msg *rmsgp, 208 struct rpcrdma_msg *rmsgp,
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index 2b871e0858d9..4ae62796bfde 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -39,8 +39,9 @@
39#define NFS4_FH_VOL_MIGRATION 0x0004 39#define NFS4_FH_VOL_MIGRATION 0x0004
40#define NFS4_FH_VOL_RENAME 0x0008 40#define NFS4_FH_VOL_RENAME 0x0008
41 41
42#define NFS4_OPEN_RESULT_CONFIRM 0x0002 42#define NFS4_OPEN_RESULT_CONFIRM 0x0002
43#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 43#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004
44#define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020
44 45
45#define NFS4_SHARE_ACCESS_MASK 0x000F 46#define NFS4_SHARE_ACCESS_MASK 0x000F
46#define NFS4_SHARE_ACCESS_READ 0x0001 47#define NFS4_SHARE_ACCESS_READ 0x0001
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index a2a7519b0f23..cd0c5581498c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -129,7 +129,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
129 ret = -EIO; 129 ret = -EIO;
130 goto out_unmap; 130 goto out_unmap;
131 } 131 }
132 atomic_inc(&rdma->sc_dma_used); 132 svc_rdma_count_mappings(rdma, ctxt);
133 133
134 memset(&send_wr, 0, sizeof(send_wr)); 134 memset(&send_wr, 0, sizeof(send_wr));
135 ctxt->cqe.done = svc_rdma_wc_send; 135 ctxt->cqe.done = svc_rdma_wc_send;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 2c25606f2561..ad1df979b3f0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -159,7 +159,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
159 ctxt->sge[pno].addr); 159 ctxt->sge[pno].addr);
160 if (ret) 160 if (ret)
161 goto err; 161 goto err;
162 atomic_inc(&xprt->sc_dma_used); 162 svc_rdma_count_mappings(xprt, ctxt);
163 163
164 ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey; 164 ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
165 ctxt->sge[pno].length = len; 165 ctxt->sge[pno].length = len;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 54d533300620..f5a91edcd233 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -225,6 +225,48 @@ svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
225 return rp_ary; 225 return rp_ary;
226} 226}
227 227
228/* RPC-over-RDMA Version One private extension: Remote Invalidation.
229 * Responder's choice: requester signals it can handle Send With
230 * Invalidate, and responder chooses one rkey to invalidate.
231 *
232 * Find a candidate rkey to invalidate when sending a reply. Picks the
233 * first rkey it finds in the chunks lists.
234 *
235 * Returns zero if RPC's chunk lists are empty.
236 */
237static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
238 struct rpcrdma_write_array *wr_ary,
239 struct rpcrdma_write_array *rp_ary)
240{
241 struct rpcrdma_read_chunk *rd_ary;
242 struct rpcrdma_segment *arg_ch;
243 u32 inv_rkey;
244
245 inv_rkey = 0;
246
247 rd_ary = svc_rdma_get_read_chunk(rdma_argp);
248 if (rd_ary) {
249 inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle);
250 goto out;
251 }
252
253 if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
254 arg_ch = &wr_ary->wc_array[0].wc_target;
255 inv_rkey = be32_to_cpu(arg_ch->rs_handle);
256 goto out;
257 }
258
259 if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
260 arg_ch = &rp_ary->wc_array[0].wc_target;
261 inv_rkey = be32_to_cpu(arg_ch->rs_handle);
262 goto out;
263 }
264
265out:
266 dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey);
267 return inv_rkey;
268}
269
228/* Assumptions: 270/* Assumptions:
229 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE 271 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
230 */ 272 */
@@ -280,7 +322,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
280 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 322 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
281 sge[sge_no].addr)) 323 sge[sge_no].addr))
282 goto err; 324 goto err;
283 atomic_inc(&xprt->sc_dma_used); 325 svc_rdma_count_mappings(xprt, ctxt);
284 sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; 326 sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
285 ctxt->count++; 327 ctxt->count++;
286 sge_off = 0; 328 sge_off = 0;
@@ -464,7 +506,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
464 struct page *page, 506 struct page *page,
465 struct rpcrdma_msg *rdma_resp, 507 struct rpcrdma_msg *rdma_resp,
466 struct svc_rdma_req_map *vec, 508 struct svc_rdma_req_map *vec,
467 int byte_count) 509 int byte_count,
510 u32 inv_rkey)
468{ 511{
469 struct svc_rdma_op_ctxt *ctxt; 512 struct svc_rdma_op_ctxt *ctxt;
470 struct ib_send_wr send_wr; 513 struct ib_send_wr send_wr;
@@ -489,7 +532,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
489 ctxt->sge[0].length, DMA_TO_DEVICE); 532 ctxt->sge[0].length, DMA_TO_DEVICE);
490 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) 533 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
491 goto err; 534 goto err;
492 atomic_inc(&rdma->sc_dma_used); 535 svc_rdma_count_mappings(rdma, ctxt);
493 536
494 ctxt->direction = DMA_TO_DEVICE; 537 ctxt->direction = DMA_TO_DEVICE;
495 538
@@ -505,7 +548,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
505 if (ib_dma_mapping_error(rdma->sc_cm_id->device, 548 if (ib_dma_mapping_error(rdma->sc_cm_id->device,
506 ctxt->sge[sge_no].addr)) 549 ctxt->sge[sge_no].addr))
507 goto err; 550 goto err;
508 atomic_inc(&rdma->sc_dma_used); 551 svc_rdma_count_mappings(rdma, ctxt);
509 ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; 552 ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
510 ctxt->sge[sge_no].length = sge_bytes; 553 ctxt->sge[sge_no].length = sge_bytes;
511 } 554 }
@@ -523,23 +566,9 @@ static int send_reply(struct svcxprt_rdma *rdma,
523 ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; 566 ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
524 ctxt->count++; 567 ctxt->count++;
525 rqstp->rq_respages[page_no] = NULL; 568 rqstp->rq_respages[page_no] = NULL;
526 /*
527 * If there are more pages than SGE, terminate SGE
528 * list so that svc_rdma_unmap_dma doesn't attempt to
529 * unmap garbage.
530 */
531 if (page_no+1 >= sge_no)
532 ctxt->sge[page_no+1].length = 0;
533 } 569 }
534 rqstp->rq_next_page = rqstp->rq_respages + 1; 570 rqstp->rq_next_page = rqstp->rq_respages + 1;
535 571
536 /* The loop above bumps sc_dma_used for each sge. The
537 * xdr_buf.tail gets a separate sge, but resides in the
538 * same page as xdr_buf.head. Don't count it twice.
539 */
540 if (sge_no > ctxt->count)
541 atomic_dec(&rdma->sc_dma_used);
542
543 if (sge_no > rdma->sc_max_sge) { 572 if (sge_no > rdma->sc_max_sge) {
544 pr_err("svcrdma: Too many sges (%d)\n", sge_no); 573 pr_err("svcrdma: Too many sges (%d)\n", sge_no);
545 goto err; 574 goto err;
@@ -549,7 +578,11 @@ static int send_reply(struct svcxprt_rdma *rdma,
549 send_wr.wr_cqe = &ctxt->cqe; 578 send_wr.wr_cqe = &ctxt->cqe;
550 send_wr.sg_list = ctxt->sge; 579 send_wr.sg_list = ctxt->sge;
551 send_wr.num_sge = sge_no; 580 send_wr.num_sge = sge_no;
552 send_wr.opcode = IB_WR_SEND; 581 if (inv_rkey) {
582 send_wr.opcode = IB_WR_SEND_WITH_INV;
583 send_wr.ex.invalidate_rkey = inv_rkey;
584 } else
585 send_wr.opcode = IB_WR_SEND;
553 send_wr.send_flags = IB_SEND_SIGNALED; 586 send_wr.send_flags = IB_SEND_SIGNALED;
554 587
555 ret = svc_rdma_send(rdma, &send_wr); 588 ret = svc_rdma_send(rdma, &send_wr);
@@ -581,6 +614,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
581 int inline_bytes; 614 int inline_bytes;
582 struct page *res_page; 615 struct page *res_page;
583 struct svc_rdma_req_map *vec; 616 struct svc_rdma_req_map *vec;
617 u32 inv_rkey;
584 618
585 dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); 619 dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
586 620
@@ -591,6 +625,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
591 wr_ary = svc_rdma_get_write_array(rdma_argp); 625 wr_ary = svc_rdma_get_write_array(rdma_argp);
592 rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary); 626 rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
593 627
628 inv_rkey = 0;
629 if (rdma->sc_snd_w_inv)
630 inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary);
631
594 /* Build an req vec for the XDR */ 632 /* Build an req vec for the XDR */
595 vec = svc_rdma_get_req_map(rdma); 633 vec = svc_rdma_get_req_map(rdma);
596 ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL); 634 ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
@@ -633,9 +671,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
633 goto err1; 671 goto err1;
634 672
635 ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec, 673 ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec,
636 inline_bytes); 674 inline_bytes, inv_rkey);
637 if (ret < 0) 675 if (ret < 0)
638 goto err1; 676 goto err0;
639 677
640 svc_rdma_put_req_map(rdma, vec); 678 svc_rdma_put_req_map(rdma, vec);
641 dprintk("svcrdma: send_reply returns %d\n", ret); 679 dprintk("svcrdma: send_reply returns %d\n", ret);
@@ -692,7 +730,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
692 svc_rdma_put_context(ctxt, 1); 730 svc_rdma_put_context(ctxt, 1);
693 return; 731 return;
694 } 732 }
695 atomic_inc(&xprt->sc_dma_used); 733 svc_rdma_count_mappings(xprt, ctxt);
696 734
697 /* Prepare SEND WR */ 735 /* Prepare SEND WR */
698 memset(&err_wr, 0, sizeof(err_wr)); 736 memset(&err_wr, 0, sizeof(err_wr));
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index eb2857f52b05..6864fb967038 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -198,6 +198,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
198 198
199out: 199out:
200 ctxt->count = 0; 200 ctxt->count = 0;
201 ctxt->mapped_sges = 0;
201 ctxt->frmr = NULL; 202 ctxt->frmr = NULL;
202 return ctxt; 203 return ctxt;
203 204
@@ -221,22 +222,27 @@ out_empty:
221void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) 222void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
222{ 223{
223 struct svcxprt_rdma *xprt = ctxt->xprt; 224 struct svcxprt_rdma *xprt = ctxt->xprt;
224 int i; 225 struct ib_device *device = xprt->sc_cm_id->device;
225 for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { 226 u32 lkey = xprt->sc_pd->local_dma_lkey;
227 unsigned int i, count;
228
229 for (count = 0, i = 0; i < ctxt->mapped_sges; i++) {
226 /* 230 /*
227 * Unmap the DMA addr in the SGE if the lkey matches 231 * Unmap the DMA addr in the SGE if the lkey matches
228 * the local_dma_lkey, otherwise, ignore it since it is 232 * the local_dma_lkey, otherwise, ignore it since it is
229 * an FRMR lkey and will be unmapped later when the 233 * an FRMR lkey and will be unmapped later when the
230 * last WR that uses it completes. 234 * last WR that uses it completes.
231 */ 235 */
232 if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) { 236 if (ctxt->sge[i].lkey == lkey) {
233 atomic_dec(&xprt->sc_dma_used); 237 count++;
234 ib_dma_unmap_page(xprt->sc_cm_id->device, 238 ib_dma_unmap_page(device,
235 ctxt->sge[i].addr, 239 ctxt->sge[i].addr,
236 ctxt->sge[i].length, 240 ctxt->sge[i].length,
237 ctxt->direction); 241 ctxt->direction);
238 } 242 }
239 } 243 }
244 ctxt->mapped_sges = 0;
245 atomic_sub(count, &xprt->sc_dma_used);
240} 246}
241 247
242void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) 248void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
@@ -600,7 +606,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
600 DMA_FROM_DEVICE); 606 DMA_FROM_DEVICE);
601 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) 607 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
602 goto err_put_ctxt; 608 goto err_put_ctxt;
603 atomic_inc(&xprt->sc_dma_used); 609 svc_rdma_count_mappings(xprt, ctxt);
604 ctxt->sge[sge_no].addr = pa; 610 ctxt->sge[sge_no].addr = pa;
605 ctxt->sge[sge_no].length = PAGE_SIZE; 611 ctxt->sge[sge_no].length = PAGE_SIZE;
606 ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; 612 ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
@@ -642,6 +648,26 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
642 return ret; 648 return ret;
643} 649}
644 650
651static void
652svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
653 struct rdma_conn_param *param)
654{
655 const struct rpcrdma_connect_private *pmsg = param->private_data;
656
657 if (pmsg &&
658 pmsg->cp_magic == rpcrdma_cmp_magic &&
659 pmsg->cp_version == RPCRDMA_CMP_VERSION) {
660 newxprt->sc_snd_w_inv = pmsg->cp_flags &
661 RPCRDMA_CMP_F_SND_W_INV_OK;
662
663 dprintk("svcrdma: client send_size %u, recv_size %u "
664 "remote inv %ssupported\n",
665 rpcrdma_decode_buffer_size(pmsg->cp_send_size),
666 rpcrdma_decode_buffer_size(pmsg->cp_recv_size),
667 newxprt->sc_snd_w_inv ? "" : "un");
668 }
669}
670
645/* 671/*
646 * This function handles the CONNECT_REQUEST event on a listening 672 * This function handles the CONNECT_REQUEST event on a listening
647 * endpoint. It is passed the cma_id for the _new_ connection. The context in 673 * endpoint. It is passed the cma_id for the _new_ connection. The context in
@@ -653,7 +679,8 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
653 * will call the recvfrom method on the listen xprt which will accept the new 679 * will call the recvfrom method on the listen xprt which will accept the new
654 * connection. 680 * connection.
655 */ 681 */
656static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird) 682static void handle_connect_req(struct rdma_cm_id *new_cma_id,
683 struct rdma_conn_param *param)
657{ 684{
658 struct svcxprt_rdma *listen_xprt = new_cma_id->context; 685 struct svcxprt_rdma *listen_xprt = new_cma_id->context;
659 struct svcxprt_rdma *newxprt; 686 struct svcxprt_rdma *newxprt;
@@ -669,9 +696,10 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird)
669 new_cma_id->context = newxprt; 696 new_cma_id->context = newxprt;
670 dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", 697 dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
671 newxprt, newxprt->sc_cm_id, listen_xprt); 698 newxprt, newxprt->sc_cm_id, listen_xprt);
699 svc_rdma_parse_connect_private(newxprt, param);
672 700
673 /* Save client advertised inbound read limit for use later in accept. */ 701 /* Save client advertised inbound read limit for use later in accept. */
674 newxprt->sc_ord = client_ird; 702 newxprt->sc_ord = param->initiator_depth;
675 703
676 /* Set the local and remote addresses in the transport */ 704 /* Set the local and remote addresses in the transport */
677 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; 705 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
@@ -706,8 +734,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
706 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " 734 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
707 "event = %s (%d)\n", cma_id, cma_id->context, 735 "event = %s (%d)\n", cma_id, cma_id->context,
708 rdma_event_msg(event->event), event->event); 736 rdma_event_msg(event->event), event->event);
709 handle_connect_req(cma_id, 737 handle_connect_req(cma_id, &event->param.conn);
710 event->param.conn.initiator_depth);
711 break; 738 break;
712 739
713 case RDMA_CM_EVENT_ESTABLISHED: 740 case RDMA_CM_EVENT_ESTABLISHED:
@@ -941,6 +968,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
941 struct svcxprt_rdma *listen_rdma; 968 struct svcxprt_rdma *listen_rdma;
942 struct svcxprt_rdma *newxprt = NULL; 969 struct svcxprt_rdma *newxprt = NULL;
943 struct rdma_conn_param conn_param; 970 struct rdma_conn_param conn_param;
971 struct rpcrdma_connect_private pmsg;
944 struct ib_qp_init_attr qp_attr; 972 struct ib_qp_init_attr qp_attr;
945 struct ib_device *dev; 973 struct ib_device *dev;
946 unsigned int i; 974 unsigned int i;
@@ -1070,7 +1098,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1070 dev->attrs.max_fast_reg_page_list_len; 1098 dev->attrs.max_fast_reg_page_list_len;
1071 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; 1099 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
1072 newxprt->sc_reader = rdma_read_chunk_frmr; 1100 newxprt->sc_reader = rdma_read_chunk_frmr;
1073 } 1101 } else
1102 newxprt->sc_snd_w_inv = false;
1074 1103
1075 /* 1104 /*
1076 * Determine if a DMA MR is required and if so, what privs are required 1105 * Determine if a DMA MR is required and if so, what privs are required
@@ -1094,11 +1123,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1094 /* Swap out the handler */ 1123 /* Swap out the handler */
1095 newxprt->sc_cm_id->event_handler = rdma_cma_handler; 1124 newxprt->sc_cm_id->event_handler = rdma_cma_handler;
1096 1125
1126 /* Construct RDMA-CM private message */
1127 pmsg.cp_magic = rpcrdma_cmp_magic;
1128 pmsg.cp_version = RPCRDMA_CMP_VERSION;
1129 pmsg.cp_flags = 0;
1130 pmsg.cp_send_size = pmsg.cp_recv_size =
1131 rpcrdma_encode_buffer_size(newxprt->sc_max_req_size);
1132
1097 /* Accept Connection */ 1133 /* Accept Connection */
1098 set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); 1134 set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
1099 memset(&conn_param, 0, sizeof conn_param); 1135 memset(&conn_param, 0, sizeof conn_param);
1100 conn_param.responder_resources = 0; 1136 conn_param.responder_resources = 0;
1101 conn_param.initiator_depth = newxprt->sc_ord; 1137 conn_param.initiator_depth = newxprt->sc_ord;
1138 conn_param.private_data = &pmsg;
1139 conn_param.private_data_len = sizeof(pmsg);
1102 ret = rdma_accept(newxprt->sc_cm_id, &conn_param); 1140 ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
1103 if (ret) { 1141 if (ret) {
1104 dprintk("svcrdma: failed to accept new connection, ret=%d\n", 1142 dprintk("svcrdma: failed to accept new connection, ret=%d\n",