aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <trond.myklebust@hammerspace.com>2018-10-18 17:29:00 -0400
committerTrond Myklebust <trond.myklebust@hammerspace.com>2018-10-18 17:29:00 -0400
commit93bdcf9fdb89646e9446fab25aec76a754969619 (patch)
tree3009b5bc6cc06463772ce1d8b3cb41a0b93dd3f5
parent826799e66e8683e5698e140bb9ef69afc8c0014e (diff)
parent470443e0b379b070305629f911cc09562bdf324f (diff)
Merge tag 'nfs-rdma-for-4.20-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
NFS RDMA client updates for Linux 4.20 Stable bugfixes: - Reset credit grant properly after a disconnect Other bugfixes and cleanups: - xprt_release_rqst_cong is called outside of transport_lock - Create more MRs at a time and toss out old ones during recovery - Various improvements to the RDMA connection and disconnection code: - Improve naming of trace events, functions, and variables - Add documenting comments - Fix metrics and stats reporting - Fix a tracepoint sparse warning Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
-rw-r--r--include/trace/events/rpcrdma.h18
-rw-r--r--net/sunrpc/xprt.c14
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c16
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c131
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c137
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c20
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c1
-rw-r--r--net/sunrpc/xprtrdma/transport.c110
-rw-r--r--net/sunrpc/xprtrdma/verbs.c178
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h18
-rw-r--r--net/sunrpc/xprtsock.c10
11 files changed, 302 insertions, 351 deletions
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 53df203b8057..b093058f78aa 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -263,7 +263,7 @@ DECLARE_EVENT_CLASS(xprtrdma_mr,
263); 263);
264 264
265#define DEFINE_MR_EVENT(name) \ 265#define DEFINE_MR_EVENT(name) \
266 DEFINE_EVENT(xprtrdma_mr, name, \ 266 DEFINE_EVENT(xprtrdma_mr, xprtrdma_mr_##name, \
267 TP_PROTO( \ 267 TP_PROTO( \
268 const struct rpcrdma_mr *mr \ 268 const struct rpcrdma_mr *mr \
269 ), \ 269 ), \
@@ -306,7 +306,7 @@ DECLARE_EVENT_CLASS(xprtrdma_cb_event,
306 ** Connection events 306 ** Connection events
307 **/ 307 **/
308 308
309TRACE_EVENT(xprtrdma_conn_upcall, 309TRACE_EVENT(xprtrdma_cm_event,
310 TP_PROTO( 310 TP_PROTO(
311 const struct rpcrdma_xprt *r_xprt, 311 const struct rpcrdma_xprt *r_xprt,
312 struct rdma_cm_event *event 312 struct rdma_cm_event *event
@@ -377,7 +377,7 @@ DEFINE_RXPRT_EVENT(xprtrdma_reinsert);
377DEFINE_RXPRT_EVENT(xprtrdma_reconnect); 377DEFINE_RXPRT_EVENT(xprtrdma_reconnect);
378DEFINE_RXPRT_EVENT(xprtrdma_inject_dsc); 378DEFINE_RXPRT_EVENT(xprtrdma_inject_dsc);
379 379
380TRACE_EVENT(xprtrdma_qp_error, 380TRACE_EVENT(xprtrdma_qp_event,
381 TP_PROTO( 381 TP_PROTO(
382 const struct rpcrdma_xprt *r_xprt, 382 const struct rpcrdma_xprt *r_xprt,
383 const struct ib_event *event 383 const struct ib_event *event
@@ -509,7 +509,7 @@ TRACE_EVENT(xprtrdma_post_send,
509 TP_STRUCT__entry( 509 TP_STRUCT__entry(
510 __field(const void *, req) 510 __field(const void *, req)
511 __field(int, num_sge) 511 __field(int, num_sge)
512 __field(bool, signaled) 512 __field(int, signaled)
513 __field(int, status) 513 __field(int, status)
514 ), 514 ),
515 515
@@ -651,11 +651,11 @@ DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_fastreg);
651DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li); 651DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li);
652DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake); 652DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake);
653 653
654DEFINE_MR_EVENT(xprtrdma_localinv); 654DEFINE_MR_EVENT(localinv);
655DEFINE_MR_EVENT(xprtrdma_dma_map); 655DEFINE_MR_EVENT(map);
656DEFINE_MR_EVENT(xprtrdma_dma_unmap); 656DEFINE_MR_EVENT(unmap);
657DEFINE_MR_EVENT(xprtrdma_remoteinv); 657DEFINE_MR_EVENT(remoteinv);
658DEFINE_MR_EVENT(xprtrdma_recover_mr); 658DEFINE_MR_EVENT(recycle);
659 659
660/** 660/**
661 ** Reply events 661 ** Reply events
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 7ee9f1e996db..86bea4520c4d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -834,17 +834,11 @@ void xprt_connect(struct rpc_task *task)
834 834
835static void xprt_connect_status(struct rpc_task *task) 835static void xprt_connect_status(struct rpc_task *task)
836{ 836{
837 struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; 837 switch (task->tk_status) {
838 838 case 0:
839 if (task->tk_status == 0) {
840 xprt->stat.connect_count++;
841 xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;
842 dprintk("RPC: %5u xprt_connect_status: connection established\n", 839 dprintk("RPC: %5u xprt_connect_status: connection established\n",
843 task->tk_pid); 840 task->tk_pid);
844 return; 841 break;
845 }
846
847 switch (task->tk_status) {
848 case -ECONNREFUSED: 842 case -ECONNREFUSED:
849 case -ECONNRESET: 843 case -ECONNRESET:
850 case -ECONNABORTED: 844 case -ECONNABORTED:
@@ -861,7 +855,7 @@ static void xprt_connect_status(struct rpc_task *task)
861 default: 855 default:
862 dprintk("RPC: %5u xprt_connect_status: error %d connecting to " 856 dprintk("RPC: %5u xprt_connect_status: error %d connecting to "
863 "server %s\n", task->tk_pid, -task->tk_status, 857 "server %s\n", task->tk_pid, -task->tk_status,
864 xprt->servername); 858 task->tk_rqstp->rq_xprt->servername);
865 task->tk_status = -EIO; 859 task->tk_status = -EIO;
866 } 860 }
867} 861}
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index e7c445cee16f..e5b367a3e517 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -53,9 +53,9 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
53 rqst->rq_xprt = xprt; 53 rqst->rq_xprt = xprt;
54 INIT_LIST_HEAD(&rqst->rq_bc_list); 54 INIT_LIST_HEAD(&rqst->rq_bc_list);
55 __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); 55 __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
56 spin_lock_bh(&xprt->bc_pa_lock); 56 spin_lock(&xprt->bc_pa_lock);
57 list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); 57 list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
58 spin_unlock_bh(&xprt->bc_pa_lock); 58 spin_unlock(&xprt->bc_pa_lock);
59 59
60 size = r_xprt->rx_data.inline_rsize; 60 size = r_xprt->rx_data.inline_rsize;
61 rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); 61 rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
@@ -230,16 +230,16 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
230 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 230 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
231 struct rpc_rqst *rqst, *tmp; 231 struct rpc_rqst *rqst, *tmp;
232 232
233 spin_lock_bh(&xprt->bc_pa_lock); 233 spin_lock(&xprt->bc_pa_lock);
234 list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { 234 list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
235 list_del(&rqst->rq_bc_pa_list); 235 list_del(&rqst->rq_bc_pa_list);
236 spin_unlock_bh(&xprt->bc_pa_lock); 236 spin_unlock(&xprt->bc_pa_lock);
237 237
238 rpcrdma_bc_free_rqst(r_xprt, rqst); 238 rpcrdma_bc_free_rqst(r_xprt, rqst);
239 239
240 spin_lock_bh(&xprt->bc_pa_lock); 240 spin_lock(&xprt->bc_pa_lock);
241 } 241 }
242 spin_unlock_bh(&xprt->bc_pa_lock); 242 spin_unlock(&xprt->bc_pa_lock);
243} 243}
244 244
245/** 245/**
@@ -257,9 +257,9 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
257 rpcrdma_recv_buffer_put(req->rl_reply); 257 rpcrdma_recv_buffer_put(req->rl_reply);
258 req->rl_reply = NULL; 258 req->rl_reply = NULL;
259 259
260 spin_lock_bh(&xprt->bc_pa_lock); 260 spin_lock(&xprt->bc_pa_lock);
261 list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); 261 list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
262 spin_unlock_bh(&xprt->bc_pa_lock); 262 spin_unlock(&xprt->bc_pa_lock);
263} 263}
264 264
265/** 265/**
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 0f7c465d9a5a..7f5632cd5a48 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -49,46 +49,7 @@ fmr_is_supported(struct rpcrdma_ia *ia)
49 return true; 49 return true;
50} 50}
51 51
52static int 52static void
53fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
54{
55 static struct ib_fmr_attr fmr_attr = {
56 .max_pages = RPCRDMA_MAX_FMR_SGES,
57 .max_maps = 1,
58 .page_shift = PAGE_SHIFT
59 };
60
61 mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
62 sizeof(u64), GFP_KERNEL);
63 if (!mr->fmr.fm_physaddrs)
64 goto out_free;
65
66 mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
67 sizeof(*mr->mr_sg), GFP_KERNEL);
68 if (!mr->mr_sg)
69 goto out_free;
70
71 sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
72
73 mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
74 &fmr_attr);
75 if (IS_ERR(mr->fmr.fm_mr))
76 goto out_fmr_err;
77
78 INIT_LIST_HEAD(&mr->mr_list);
79 return 0;
80
81out_fmr_err:
82 dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
83 PTR_ERR(mr->fmr.fm_mr));
84
85out_free:
86 kfree(mr->mr_sg);
87 kfree(mr->fmr.fm_physaddrs);
88 return -ENOMEM;
89}
90
91static int
92__fmr_unmap(struct rpcrdma_mr *mr) 53__fmr_unmap(struct rpcrdma_mr *mr)
93{ 54{
94 LIST_HEAD(l); 55 LIST_HEAD(l);
@@ -97,13 +58,16 @@ __fmr_unmap(struct rpcrdma_mr *mr)
97 list_add(&mr->fmr.fm_mr->list, &l); 58 list_add(&mr->fmr.fm_mr->list, &l);
98 rc = ib_unmap_fmr(&l); 59 rc = ib_unmap_fmr(&l);
99 list_del(&mr->fmr.fm_mr->list); 60 list_del(&mr->fmr.fm_mr->list);
100 return rc; 61 if (rc)
62 pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
63 mr, rc);
101} 64}
102 65
66/* Release an MR.
67 */
103static void 68static void
104fmr_op_release_mr(struct rpcrdma_mr *mr) 69fmr_op_release_mr(struct rpcrdma_mr *mr)
105{ 70{
106 LIST_HEAD(unmap_list);
107 int rc; 71 int rc;
108 72
109 kfree(mr->fmr.fm_physaddrs); 73 kfree(mr->fmr.fm_physaddrs);
@@ -112,10 +76,7 @@ fmr_op_release_mr(struct rpcrdma_mr *mr)
112 /* In case this one was left mapped, try to unmap it 76 /* In case this one was left mapped, try to unmap it
113 * to prevent dealloc_fmr from failing with EBUSY 77 * to prevent dealloc_fmr from failing with EBUSY
114 */ 78 */
115 rc = __fmr_unmap(mr); 79 __fmr_unmap(mr);
116 if (rc)
117 pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
118 mr, rc);
119 80
120 rc = ib_dealloc_fmr(mr->fmr.fm_mr); 81 rc = ib_dealloc_fmr(mr->fmr.fm_mr);
121 if (rc) 82 if (rc)
@@ -125,40 +86,68 @@ fmr_op_release_mr(struct rpcrdma_mr *mr)
125 kfree(mr); 86 kfree(mr);
126} 87}
127 88
128/* Reset of a single FMR. 89/* MRs are dynamically allocated, so simply clean up and release the MR.
90 * A replacement MR will subsequently be allocated on demand.
129 */ 91 */
130static void 92static void
131fmr_op_recover_mr(struct rpcrdma_mr *mr) 93fmr_mr_recycle_worker(struct work_struct *work)
132{ 94{
95 struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
133 struct rpcrdma_xprt *r_xprt = mr->mr_xprt; 96 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
134 int rc;
135 97
136 /* ORDER: invalidate first */ 98 trace_xprtrdma_mr_recycle(mr);
137 rc = __fmr_unmap(mr);
138 if (rc)
139 goto out_release;
140
141 /* ORDER: then DMA unmap */
142 rpcrdma_mr_unmap_and_put(mr);
143 99
144 r_xprt->rx_stats.mrs_recovered++; 100 trace_xprtrdma_mr_unmap(mr);
145 return;
146
147out_release:
148 pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
149 r_xprt->rx_stats.mrs_orphaned++;
150
151 trace_xprtrdma_dma_unmap(mr);
152 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, 101 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
153 mr->mr_sg, mr->mr_nents, mr->mr_dir); 102 mr->mr_sg, mr->mr_nents, mr->mr_dir);
154 103
155 spin_lock(&r_xprt->rx_buf.rb_mrlock); 104 spin_lock(&r_xprt->rx_buf.rb_mrlock);
156 list_del(&mr->mr_all); 105 list_del(&mr->mr_all);
106 r_xprt->rx_stats.mrs_recycled++;
157 spin_unlock(&r_xprt->rx_buf.rb_mrlock); 107 spin_unlock(&r_xprt->rx_buf.rb_mrlock);
158
159 fmr_op_release_mr(mr); 108 fmr_op_release_mr(mr);
160} 109}
161 110
111static int
112fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
113{
114 static struct ib_fmr_attr fmr_attr = {
115 .max_pages = RPCRDMA_MAX_FMR_SGES,
116 .max_maps = 1,
117 .page_shift = PAGE_SHIFT
118 };
119
120 mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
121 sizeof(u64), GFP_KERNEL);
122 if (!mr->fmr.fm_physaddrs)
123 goto out_free;
124
125 mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
126 sizeof(*mr->mr_sg), GFP_KERNEL);
127 if (!mr->mr_sg)
128 goto out_free;
129
130 sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
131
132 mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
133 &fmr_attr);
134 if (IS_ERR(mr->fmr.fm_mr))
135 goto out_fmr_err;
136
137 INIT_LIST_HEAD(&mr->mr_list);
138 INIT_WORK(&mr->mr_recycle, fmr_mr_recycle_worker);
139 return 0;
140
141out_fmr_err:
142 dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
143 PTR_ERR(mr->fmr.fm_mr));
144
145out_free:
146 kfree(mr->mr_sg);
147 kfree(mr->fmr.fm_physaddrs);
148 return -ENOMEM;
149}
150
162/* On success, sets: 151/* On success, sets:
163 * ep->rep_attr.cap.max_send_wr 152 * ep->rep_attr.cap.max_send_wr
164 * ep->rep_attr.cap.max_recv_wr 153 * ep->rep_attr.cap.max_recv_wr
@@ -187,6 +176,7 @@ fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
187 176
188 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / 177 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
189 RPCRDMA_MAX_FMR_SGES); 178 RPCRDMA_MAX_FMR_SGES);
179 ia->ri_max_segs += 2; /* segments for head and tail buffers */
190 return 0; 180 return 0;
191} 181}
192 182
@@ -244,7 +234,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
244 mr->mr_sg, i, mr->mr_dir); 234 mr->mr_sg, i, mr->mr_dir);
245 if (!mr->mr_nents) 235 if (!mr->mr_nents)
246 goto out_dmamap_err; 236 goto out_dmamap_err;
247 trace_xprtrdma_dma_map(mr); 237 trace_xprtrdma_mr_map(mr);
248 238
249 for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++) 239 for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
250 dma_pages[i] = sg_dma_address(&mr->mr_sg[i]); 240 dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
@@ -305,13 +295,13 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
305 list_for_each_entry(mr, mrs, mr_list) { 295 list_for_each_entry(mr, mrs, mr_list) {
306 dprintk("RPC: %s: unmapping fmr %p\n", 296 dprintk("RPC: %s: unmapping fmr %p\n",
307 __func__, &mr->fmr); 297 __func__, &mr->fmr);
308 trace_xprtrdma_localinv(mr); 298 trace_xprtrdma_mr_localinv(mr);
309 list_add_tail(&mr->fmr.fm_mr->list, &unmap_list); 299 list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
310 } 300 }
311 r_xprt->rx_stats.local_inv_needed++; 301 r_xprt->rx_stats.local_inv_needed++;
312 rc = ib_unmap_fmr(&unmap_list); 302 rc = ib_unmap_fmr(&unmap_list);
313 if (rc) 303 if (rc)
314 goto out_reset; 304 goto out_release;
315 305
316 /* ORDER: Now DMA unmap all of the req's MRs, and return 306 /* ORDER: Now DMA unmap all of the req's MRs, and return
317 * them to the free MW list. 307 * them to the free MW list.
@@ -324,13 +314,13 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
324 314
325 return; 315 return;
326 316
327out_reset: 317out_release:
328 pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); 318 pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
329 319
330 while (!list_empty(mrs)) { 320 while (!list_empty(mrs)) {
331 mr = rpcrdma_mr_pop(mrs); 321 mr = rpcrdma_mr_pop(mrs);
332 list_del(&mr->fmr.fm_mr->list); 322 list_del(&mr->fmr.fm_mr->list);
333 fmr_op_recover_mr(mr); 323 rpcrdma_mr_recycle(mr);
334 } 324 }
335} 325}
336 326
@@ -338,7 +328,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
338 .ro_map = fmr_op_map, 328 .ro_map = fmr_op_map,
339 .ro_send = fmr_op_send, 329 .ro_send = fmr_op_send,
340 .ro_unmap_sync = fmr_op_unmap_sync, 330 .ro_unmap_sync = fmr_op_unmap_sync,
341 .ro_recover_mr = fmr_op_recover_mr,
342 .ro_open = fmr_op_open, 331 .ro_open = fmr_op_open,
343 .ro_maxpages = fmr_op_maxpages, 332 .ro_maxpages = fmr_op_maxpages,
344 .ro_init_mr = fmr_op_init_mr, 333 .ro_init_mr = fmr_op_init_mr,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 1bb00dd6ccdb..fc6378cc0c1c 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -97,6 +97,44 @@ out_not_supported:
97 return false; 97 return false;
98} 98}
99 99
100static void
101frwr_op_release_mr(struct rpcrdma_mr *mr)
102{
103 int rc;
104
105 rc = ib_dereg_mr(mr->frwr.fr_mr);
106 if (rc)
107 pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
108 mr, rc);
109 kfree(mr->mr_sg);
110 kfree(mr);
111}
112
113/* MRs are dynamically allocated, so simply clean up and release the MR.
114 * A replacement MR will subsequently be allocated on demand.
115 */
116static void
117frwr_mr_recycle_worker(struct work_struct *work)
118{
119 struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
120 enum rpcrdma_frwr_state state = mr->frwr.fr_state;
121 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
122
123 trace_xprtrdma_mr_recycle(mr);
124
125 if (state != FRWR_FLUSHED_LI) {
126 trace_xprtrdma_mr_unmap(mr);
127 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
128 mr->mr_sg, mr->mr_nents, mr->mr_dir);
129 }
130
131 spin_lock(&r_xprt->rx_buf.rb_mrlock);
132 list_del(&mr->mr_all);
133 r_xprt->rx_stats.mrs_recycled++;
134 spin_unlock(&r_xprt->rx_buf.rb_mrlock);
135 frwr_op_release_mr(mr);
136}
137
100static int 138static int
101frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) 139frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
102{ 140{
@@ -113,6 +151,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
113 goto out_list_err; 151 goto out_list_err;
114 152
115 INIT_LIST_HEAD(&mr->mr_list); 153 INIT_LIST_HEAD(&mr->mr_list);
154 INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker);
116 sg_init_table(mr->mr_sg, depth); 155 sg_init_table(mr->mr_sg, depth);
117 init_completion(&frwr->fr_linv_done); 156 init_completion(&frwr->fr_linv_done);
118 return 0; 157 return 0;
@@ -131,79 +170,6 @@ out_list_err:
131 return rc; 170 return rc;
132} 171}
133 172
134static void
135frwr_op_release_mr(struct rpcrdma_mr *mr)
136{
137 int rc;
138
139 rc = ib_dereg_mr(mr->frwr.fr_mr);
140 if (rc)
141 pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
142 mr, rc);
143 kfree(mr->mr_sg);
144 kfree(mr);
145}
146
147static int
148__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
149{
150 struct rpcrdma_frwr *frwr = &mr->frwr;
151 int rc;
152
153 rc = ib_dereg_mr(frwr->fr_mr);
154 if (rc) {
155 pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
156 rc, mr);
157 return rc;
158 }
159
160 frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
161 ia->ri_max_frwr_depth);
162 if (IS_ERR(frwr->fr_mr)) {
163 pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
164 PTR_ERR(frwr->fr_mr), mr);
165 return PTR_ERR(frwr->fr_mr);
166 }
167
168 dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
169 frwr->fr_state = FRWR_IS_INVALID;
170 return 0;
171}
172
173/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
174 */
175static void
176frwr_op_recover_mr(struct rpcrdma_mr *mr)
177{
178 enum rpcrdma_frwr_state state = mr->frwr.fr_state;
179 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
180 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
181 int rc;
182
183 rc = __frwr_mr_reset(ia, mr);
184 if (state != FRWR_FLUSHED_LI) {
185 trace_xprtrdma_dma_unmap(mr);
186 ib_dma_unmap_sg(ia->ri_device,
187 mr->mr_sg, mr->mr_nents, mr->mr_dir);
188 }
189 if (rc)
190 goto out_release;
191
192 rpcrdma_mr_put(mr);
193 r_xprt->rx_stats.mrs_recovered++;
194 return;
195
196out_release:
197 pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr);
198 r_xprt->rx_stats.mrs_orphaned++;
199
200 spin_lock(&r_xprt->rx_buf.rb_mrlock);
201 list_del(&mr->mr_all);
202 spin_unlock(&r_xprt->rx_buf.rb_mrlock);
203
204 frwr_op_release_mr(mr);
205}
206
207/* On success, sets: 173/* On success, sets:
208 * ep->rep_attr.cap.max_send_wr 174 * ep->rep_attr.cap.max_send_wr
209 * ep->rep_attr.cap.max_recv_wr 175 * ep->rep_attr.cap.max_recv_wr
@@ -276,6 +242,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
276 242
277 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / 243 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
278 ia->ri_max_frwr_depth); 244 ia->ri_max_frwr_depth);
245 ia->ri_max_segs += 2; /* segments for head and tail buffers */
279 return 0; 246 return 0;
280} 247}
281 248
@@ -384,7 +351,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
384 mr = NULL; 351 mr = NULL;
385 do { 352 do {
386 if (mr) 353 if (mr)
387 rpcrdma_mr_defer_recovery(mr); 354 rpcrdma_mr_recycle(mr);
388 mr = rpcrdma_mr_get(r_xprt); 355 mr = rpcrdma_mr_get(r_xprt);
389 if (!mr) 356 if (!mr)
390 return ERR_PTR(-EAGAIN); 357 return ERR_PTR(-EAGAIN);
@@ -417,7 +384,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
417 mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); 384 mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
418 if (!mr->mr_nents) 385 if (!mr->mr_nents)
419 goto out_dmamap_err; 386 goto out_dmamap_err;
420 trace_xprtrdma_dma_map(mr); 387 trace_xprtrdma_mr_map(mr);
421 388
422 ibmr = frwr->fr_mr; 389 ibmr = frwr->fr_mr;
423 n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); 390 n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
@@ -451,7 +418,7 @@ out_dmamap_err:
451out_mapmr_err: 418out_mapmr_err:
452 pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", 419 pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
453 frwr->fr_mr, n, mr->mr_nents); 420 frwr->fr_mr, n, mr->mr_nents);
454 rpcrdma_mr_defer_recovery(mr); 421 rpcrdma_mr_recycle(mr);
455 return ERR_PTR(-EIO); 422 return ERR_PTR(-EIO);
456} 423}
457 424
@@ -499,7 +466,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
499 list_for_each_entry(mr, mrs, mr_list) 466 list_for_each_entry(mr, mrs, mr_list)
500 if (mr->mr_handle == rep->rr_inv_rkey) { 467 if (mr->mr_handle == rep->rr_inv_rkey) {
501 list_del_init(&mr->mr_list); 468 list_del_init(&mr->mr_list);
502 trace_xprtrdma_remoteinv(mr); 469 trace_xprtrdma_mr_remoteinv(mr);
503 mr->frwr.fr_state = FRWR_IS_INVALID; 470 mr->frwr.fr_state = FRWR_IS_INVALID;
504 rpcrdma_mr_unmap_and_put(mr); 471 rpcrdma_mr_unmap_and_put(mr);
505 break; /* only one invalidated MR per RPC */ 472 break; /* only one invalidated MR per RPC */
@@ -536,7 +503,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
536 mr->frwr.fr_state = FRWR_IS_INVALID; 503 mr->frwr.fr_state = FRWR_IS_INVALID;
537 504
538 frwr = &mr->frwr; 505 frwr = &mr->frwr;
539 trace_xprtrdma_localinv(mr); 506 trace_xprtrdma_mr_localinv(mr);
540 507
541 frwr->fr_cqe.done = frwr_wc_localinv; 508 frwr->fr_cqe.done = frwr_wc_localinv;
542 last = &frwr->fr_invwr; 509 last = &frwr->fr_invwr;
@@ -570,7 +537,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
570 if (bad_wr != first) 537 if (bad_wr != first)
571 wait_for_completion(&frwr->fr_linv_done); 538 wait_for_completion(&frwr->fr_linv_done);
572 if (rc) 539 if (rc)
573 goto reset_mrs; 540 goto out_release;
574 541
575 /* ORDER: Now DMA unmap all of the MRs, and return 542 /* ORDER: Now DMA unmap all of the MRs, and return
576 * them to the free MR list. 543 * them to the free MR list.
@@ -582,22 +549,21 @@ unmap:
582 } 549 }
583 return; 550 return;
584 551
585reset_mrs: 552out_release:
586 pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc); 553 pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
587 554
588 /* Find and reset the MRs in the LOCAL_INV WRs that did not 555 /* Unmap and release the MRs in the LOCAL_INV WRs that did not
589 * get posted. 556 * get posted.
590 */ 557 */
591 while (bad_wr) { 558 while (bad_wr) {
592 frwr = container_of(bad_wr, struct rpcrdma_frwr, 559 frwr = container_of(bad_wr, struct rpcrdma_frwr,
593 fr_invwr); 560 fr_invwr);
594 mr = container_of(frwr, struct rpcrdma_mr, frwr); 561 mr = container_of(frwr, struct rpcrdma_mr, frwr);
595
596 __frwr_mr_reset(ia, mr);
597
598 bad_wr = bad_wr->next; 562 bad_wr = bad_wr->next;
563
564 list_del(&mr->mr_list);
565 frwr_op_release_mr(mr);
599 } 566 }
600 goto unmap;
601} 567}
602 568
603const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { 569const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
@@ -605,7 +571,6 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
605 .ro_send = frwr_op_send, 571 .ro_send = frwr_op_send,
606 .ro_reminv = frwr_op_reminv, 572 .ro_reminv = frwr_op_reminv,
607 .ro_unmap_sync = frwr_op_unmap_sync, 573 .ro_unmap_sync = frwr_op_unmap_sync,
608 .ro_recover_mr = frwr_op_recover_mr,
609 .ro_open = frwr_op_open, 574 .ro_open = frwr_op_open,
610 .ro_maxpages = frwr_op_maxpages, 575 .ro_maxpages = frwr_op_maxpages,
611 .ro_init_mr = frwr_op_init_mr, 576 .ro_init_mr = frwr_op_init_mr,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 53fa95d60015..9f53e0240035 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -71,7 +71,6 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
71 size = RPCRDMA_HDRLEN_MIN; 71 size = RPCRDMA_HDRLEN_MIN;
72 72
73 /* Maximum Read list size */ 73 /* Maximum Read list size */
74 maxsegs += 2; /* segment for head and tail buffers */
75 size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32); 74 size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
76 75
77 /* Minimal Read chunk size */ 76 /* Minimal Read chunk size */
@@ -97,7 +96,6 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
97 size = RPCRDMA_HDRLEN_MIN; 96 size = RPCRDMA_HDRLEN_MIN;
98 97
99 /* Maximum Write list size */ 98 /* Maximum Write list size */
100 maxsegs += 2; /* segment for head and tail buffers */
101 size = sizeof(__be32); /* segment count */ 99 size = sizeof(__be32); /* segment count */
102 size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32); 100 size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
103 size += sizeof(__be32); /* list discriminator */ 101 size += sizeof(__be32); /* list discriminator */
@@ -805,7 +803,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
805 struct rpcrdma_mr *mr; 803 struct rpcrdma_mr *mr;
806 804
807 mr = rpcrdma_mr_pop(&req->rl_registered); 805 mr = rpcrdma_mr_pop(&req->rl_registered);
808 rpcrdma_mr_defer_recovery(mr); 806 rpcrdma_mr_recycle(mr);
809 } 807 }
810 808
811 /* This implementation supports the following combinations 809 /* This implementation supports the following combinations
@@ -1216,7 +1214,6 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
1216 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; 1214 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1217 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 1215 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1218 struct rpc_rqst *rqst = rep->rr_rqst; 1216 struct rpc_rqst *rqst = rep->rr_rqst;
1219 unsigned long cwnd;
1220 int status; 1217 int status;
1221 1218
1222 xprt->reestablish_timeout = 0; 1219 xprt->reestablish_timeout = 0;
@@ -1239,11 +1236,6 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
1239 1236
1240out: 1237out:
1241 spin_lock(&xprt->queue_lock); 1238 spin_lock(&xprt->queue_lock);
1242 cwnd = xprt->cwnd;
1243 xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT;
1244 if (xprt->cwnd > cwnd)
1245 xprt_release_rqst_cong(rqst->rq_task);
1246
1247 xprt_complete_rqst(rqst->rq_task, status); 1239 xprt_complete_rqst(rqst->rq_task, status);
1248 xprt_unpin_rqst(rqst); 1240 xprt_unpin_rqst(rqst);
1249 spin_unlock(&xprt->queue_lock); 1241 spin_unlock(&xprt->queue_lock);
@@ -1350,14 +1342,18 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1350 if (!rqst) 1342 if (!rqst)
1351 goto out_norqst; 1343 goto out_norqst;
1352 xprt_pin_rqst(rqst); 1344 xprt_pin_rqst(rqst);
1345 spin_unlock(&xprt->queue_lock);
1353 1346
1354 if (credits == 0) 1347 if (credits == 0)
1355 credits = 1; /* don't deadlock */ 1348 credits = 1; /* don't deadlock */
1356 else if (credits > buf->rb_max_requests) 1349 else if (credits > buf->rb_max_requests)
1357 credits = buf->rb_max_requests; 1350 credits = buf->rb_max_requests;
1358 buf->rb_credits = credits; 1351 if (buf->rb_credits != credits) {
1359 1352 spin_lock_bh(&xprt->transport_lock);
1360 spin_unlock(&xprt->queue_lock); 1353 buf->rb_credits = credits;
1354 xprt->cwnd = credits << RPC_CWNDSHIFT;
1355 spin_unlock_bh(&xprt->transport_lock);
1356 }
1361 1357
1362 req = rpcr_to_rdmar(rqst); 1358 req = rpcr_to_rdmar(rqst);
1363 req->rl_reply = rep; 1359 req->rl_reply = rep;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 992312504cfd..d3a1a237cee6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -242,6 +242,7 @@ static void
242xprt_rdma_bc_close(struct rpc_xprt *xprt) 242xprt_rdma_bc_close(struct rpc_xprt *xprt)
243{ 243{
244 dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); 244 dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
245 xprt->cwnd = RPC_CWNDSHIFT;
245} 246}
246 247
247static void 248static void
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a5a6a4a353f2..ae2a83828953 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -225,69 +225,59 @@ xprt_rdma_free_addresses(struct rpc_xprt *xprt)
225 } 225 }
226} 226}
227 227
228void 228/**
229rpcrdma_conn_func(struct rpcrdma_ep *ep) 229 * xprt_rdma_connect_worker - establish connection in the background
230{ 230 * @work: worker thread context
231 schedule_delayed_work(&ep->rep_connect_worker, 0); 231 *
232} 232 * Requester holds the xprt's send lock to prevent activity on this
233 233 * transport while a fresh connection is being established. RPC tasks
234void 234 * sleep on the xprt's pending queue waiting for connect to complete.
235rpcrdma_connect_worker(struct work_struct *work) 235 */
236{
237 struct rpcrdma_ep *ep =
238 container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
239 struct rpcrdma_xprt *r_xprt =
240 container_of(ep, struct rpcrdma_xprt, rx_ep);
241 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
242
243 spin_lock_bh(&xprt->transport_lock);
244 if (ep->rep_connected > 0) {
245 if (!xprt_test_and_set_connected(xprt))
246 xprt_wake_pending_tasks(xprt, 0);
247 } else {
248 if (xprt_test_and_clear_connected(xprt))
249 xprt_wake_pending_tasks(xprt, -ENOTCONN);
250 }
251 spin_unlock_bh(&xprt->transport_lock);
252}
253
254static void 236static void
255xprt_rdma_connect_worker(struct work_struct *work) 237xprt_rdma_connect_worker(struct work_struct *work)
256{ 238{
257 struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, 239 struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
258 rx_connect_worker.work); 240 rx_connect_worker.work);
259 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 241 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
260 int rc = 0; 242 int rc;
261
262 xprt_clear_connected(xprt);
263 243
264 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); 244 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
265 if (rc)
266 xprt_wake_pending_tasks(xprt, rc);
267
268 xprt_clear_connecting(xprt); 245 xprt_clear_connecting(xprt);
246 if (r_xprt->rx_ep.rep_connected > 0) {
247 if (!xprt_test_and_set_connected(xprt)) {
248 xprt->stat.connect_count++;
249 xprt->stat.connect_time += (long)jiffies -
250 xprt->stat.connect_start;
251 xprt_wake_pending_tasks(xprt, -EAGAIN);
252 }
253 } else {
254 if (xprt_test_and_clear_connected(xprt))
255 xprt_wake_pending_tasks(xprt, rc);
256 }
269} 257}
270 258
259/**
260 * xprt_rdma_inject_disconnect - inject a connection fault
261 * @xprt: transport context
262 *
263 * If @xprt is connected, disconnect it to simulate spurious connection
264 * loss.
265 */
271static void 266static void
272xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) 267xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
273{ 268{
274 struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, 269 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
275 rx_xprt);
276 270
277 trace_xprtrdma_inject_dsc(r_xprt); 271 trace_xprtrdma_inject_dsc(r_xprt);
278 rdma_disconnect(r_xprt->rx_ia.ri_id); 272 rdma_disconnect(r_xprt->rx_ia.ri_id);
279} 273}
280 274
281/* 275/**
282 * xprt_rdma_destroy 276 * xprt_rdma_destroy - Full tear down of transport
277 * @xprt: doomed transport context
283 * 278 *
284 * Destroy the xprt. 279 * Caller guarantees there will be no more calls to us with
285 * Free all memory associated with the object, including its own. 280 * this @xprt.
286 * NOTE: none of the *destroy methods free memory for their top-level
287 * objects, even though they may have allocated it (they do free
288 * private memory). It's up to the caller to handle it. In this
289 * case (RDMA transport), all structure memory is inlined with the
290 * struct rpcrdma_xprt.
291 */ 281 */
292static void 282static void
293xprt_rdma_destroy(struct rpc_xprt *xprt) 283xprt_rdma_destroy(struct rpc_xprt *xprt)
@@ -298,8 +288,6 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
298 288
299 cancel_delayed_work_sync(&r_xprt->rx_connect_worker); 289 cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
300 290
301 xprt_clear_connected(xprt);
302
303 rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); 291 rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
304 rpcrdma_buffer_destroy(&r_xprt->rx_buf); 292 rpcrdma_buffer_destroy(&r_xprt->rx_buf);
305 rpcrdma_ia_close(&r_xprt->rx_ia); 293 rpcrdma_ia_close(&r_xprt->rx_ia);
@@ -442,11 +430,12 @@ out1:
442} 430}
443 431
444/** 432/**
445 * xprt_rdma_close - Close down RDMA connection 433 * xprt_rdma_close - close a transport connection
446 * @xprt: generic transport to be closed 434 * @xprt: transport context
447 * 435 *
448 * Called during transport shutdown reconnect, or device 436 * Called during transport shutdown, reconnect, or device removal.
449 * removal. Caller holds the transport's write lock. 437 * Caller holds @xprt's send lock to prevent activity on this
438 * transport while the connection is torn down.
450 */ 439 */
451static void 440static void
452xprt_rdma_close(struct rpc_xprt *xprt) 441xprt_rdma_close(struct rpc_xprt *xprt)
@@ -468,6 +457,12 @@ xprt_rdma_close(struct rpc_xprt *xprt)
468 xprt->reestablish_timeout = 0; 457 xprt->reestablish_timeout = 0;
469 xprt_disconnect_done(xprt); 458 xprt_disconnect_done(xprt);
470 rpcrdma_ep_disconnect(ep, ia); 459 rpcrdma_ep_disconnect(ep, ia);
460
461 /* Prepare @xprt for the next connection by reinitializing
462 * its credit grant to one (see RFC 8166, Section 3.3.3).
463 */
464 r_xprt->rx_buf.rb_credits = 1;
465 xprt->cwnd = RPC_CWNDSHIFT;
471} 466}
472 467
473/** 468/**
@@ -519,6 +514,12 @@ xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
519 xprt_force_disconnect(xprt); 514 xprt_force_disconnect(xprt);
520} 515}
521 516
517/**
518 * xprt_rdma_connect - try to establish a transport connection
519 * @xprt: transport state
520 * @task: RPC scheduler context
521 *
522 */
522static void 523static void
523xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) 524xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
524{ 525{
@@ -638,13 +639,6 @@ rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
638 * 0: Success; rq_buffer points to RPC buffer to use 639 * 0: Success; rq_buffer points to RPC buffer to use
639 * ENOMEM: Out of memory, call again later 640 * ENOMEM: Out of memory, call again later
640 * EIO: A permanent error occurred, do not retry 641 * EIO: A permanent error occurred, do not retry
641 *
642 * The RDMA allocate/free functions need the task structure as a place
643 * to hide the struct rpcrdma_req, which is necessary for the actual
644 * send/recv sequence.
645 *
646 * xprt_rdma_allocate provides buffers that are already mapped for
647 * DMA, and a local DMA lkey is provided for each.
648 */ 642 */
649static int 643static int
650xprt_rdma_allocate(struct rpc_task *task) 644xprt_rdma_allocate(struct rpc_task *task)
@@ -768,7 +762,7 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
768 0, /* need a local port? */ 762 0, /* need a local port? */
769 xprt->stat.bind_count, 763 xprt->stat.bind_count,
770 xprt->stat.connect_count, 764 xprt->stat.connect_count,
771 xprt->stat.connect_time, 765 xprt->stat.connect_time / HZ,
772 idle_time, 766 idle_time,
773 xprt->stat.sends, 767 xprt->stat.sends,
774 xprt->stat.recvs, 768 xprt->stat.recvs,
@@ -788,7 +782,7 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
788 r_xprt->rx_stats.bad_reply_count, 782 r_xprt->rx_stats.bad_reply_count,
789 r_xprt->rx_stats.nomsg_call_count); 783 r_xprt->rx_stats.nomsg_call_count);
790 seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n", 784 seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
791 r_xprt->rx_stats.mrs_recovered, 785 r_xprt->rx_stats.mrs_recycled,
792 r_xprt->rx_stats.mrs_orphaned, 786 r_xprt->rx_stats.mrs_orphaned,
793 r_xprt->rx_stats.mrs_allocated, 787 r_xprt->rx_stats.mrs_allocated,
794 r_xprt->rx_stats.local_inv_needed, 788 r_xprt->rx_stats.local_inv_needed,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 956a5ea47b58..3ddba94c939f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -108,20 +108,48 @@ rpcrdma_destroy_wq(void)
108 } 108 }
109} 109}
110 110
111/**
112 * rpcrdma_disconnect_worker - Force a disconnect
113 * @work: endpoint to be disconnected
114 *
115 * Provider callbacks can possibly run in an IRQ context. This function
116 * is invoked in a worker thread to guarantee that disconnect wake-up
117 * calls are always done in process context.
118 */
119static void
120rpcrdma_disconnect_worker(struct work_struct *work)
121{
122 struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep,
123 rep_disconnect_worker.work);
124 struct rpcrdma_xprt *r_xprt =
125 container_of(ep, struct rpcrdma_xprt, rx_ep);
126
127 xprt_force_disconnect(&r_xprt->rx_xprt);
128}
129
130/**
131 * rpcrdma_qp_event_handler - Handle one QP event (error notification)
132 * @event: details of the event
133 * @context: ep that owns QP where event occurred
134 *
135 * Called from the RDMA provider (device driver) possibly in an interrupt
136 * context.
137 */
111static void 138static void
112rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) 139rpcrdma_qp_event_handler(struct ib_event *event, void *context)
113{ 140{
114 struct rpcrdma_ep *ep = context; 141 struct rpcrdma_ep *ep = context;
115 struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, 142 struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
116 rx_ep); 143 rx_ep);
117 144
118 trace_xprtrdma_qp_error(r_xprt, event); 145 trace_xprtrdma_qp_event(r_xprt, event);
119 pr_err("rpcrdma: %s on device %s ep %p\n", 146 pr_err("rpcrdma: %s on device %s connected to %s:%s\n",
120 ib_event_msg(event->event), event->device->name, context); 147 ib_event_msg(event->event), event->device->name,
148 rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
121 149
122 if (ep->rep_connected == 1) { 150 if (ep->rep_connected == 1) {
123 ep->rep_connected = -EIO; 151 ep->rep_connected = -EIO;
124 rpcrdma_conn_func(ep); 152 schedule_delayed_work(&ep->rep_disconnect_worker, 0);
125 wake_up_all(&ep->rep_connect_wait); 153 wake_up_all(&ep->rep_connect_wait);
126 } 154 }
127} 155}
@@ -219,38 +247,48 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
219 rpcrdma_set_max_header_sizes(r_xprt); 247 rpcrdma_set_max_header_sizes(r_xprt);
220} 248}
221 249
250/**
251 * rpcrdma_cm_event_handler - Handle RDMA CM events
252 * @id: rdma_cm_id on which an event has occurred
253 * @event: details of the event
254 *
255 * Called with @id's mutex held. Returns 1 if caller should
256 * destroy @id, otherwise 0.
257 */
222static int 258static int
223rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) 259rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
224{ 260{
225 struct rpcrdma_xprt *xprt = id->context; 261 struct rpcrdma_xprt *r_xprt = id->context;
226 struct rpcrdma_ia *ia = &xprt->rx_ia; 262 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
227 struct rpcrdma_ep *ep = &xprt->rx_ep; 263 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
228 int connstate = 0; 264 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
265
266 might_sleep();
229 267
230 trace_xprtrdma_conn_upcall(xprt, event); 268 trace_xprtrdma_cm_event(r_xprt, event);
231 switch (event->event) { 269 switch (event->event) {
232 case RDMA_CM_EVENT_ADDR_RESOLVED: 270 case RDMA_CM_EVENT_ADDR_RESOLVED:
233 case RDMA_CM_EVENT_ROUTE_RESOLVED: 271 case RDMA_CM_EVENT_ROUTE_RESOLVED:
234 ia->ri_async_rc = 0; 272 ia->ri_async_rc = 0;
235 complete(&ia->ri_done); 273 complete(&ia->ri_done);
236 break; 274 return 0;
237 case RDMA_CM_EVENT_ADDR_ERROR: 275 case RDMA_CM_EVENT_ADDR_ERROR:
238 ia->ri_async_rc = -EPROTO; 276 ia->ri_async_rc = -EPROTO;
239 complete(&ia->ri_done); 277 complete(&ia->ri_done);
240 break; 278 return 0;
241 case RDMA_CM_EVENT_ROUTE_ERROR: 279 case RDMA_CM_EVENT_ROUTE_ERROR:
242 ia->ri_async_rc = -ENETUNREACH; 280 ia->ri_async_rc = -ENETUNREACH;
243 complete(&ia->ri_done); 281 complete(&ia->ri_done);
244 break; 282 return 0;
245 case RDMA_CM_EVENT_DEVICE_REMOVAL: 283 case RDMA_CM_EVENT_DEVICE_REMOVAL:
246#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 284#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
247 pr_info("rpcrdma: removing device %s for %s:%s\n", 285 pr_info("rpcrdma: removing device %s for %s:%s\n",
248 ia->ri_device->name, 286 ia->ri_device->name,
249 rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt)); 287 rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
250#endif 288#endif
251 set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); 289 set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
252 ep->rep_connected = -ENODEV; 290 ep->rep_connected = -ENODEV;
253 xprt_force_disconnect(&xprt->rx_xprt); 291 xprt_force_disconnect(xprt);
254 wait_for_completion(&ia->ri_remove_done); 292 wait_for_completion(&ia->ri_remove_done);
255 293
256 ia->ri_id = NULL; 294 ia->ri_id = NULL;
@@ -258,41 +296,40 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
258 /* Return 1 to ensure the core destroys the id. */ 296 /* Return 1 to ensure the core destroys the id. */
259 return 1; 297 return 1;
260 case RDMA_CM_EVENT_ESTABLISHED: 298 case RDMA_CM_EVENT_ESTABLISHED:
261 ++xprt->rx_xprt.connect_cookie; 299 ++xprt->connect_cookie;
262 connstate = 1; 300 ep->rep_connected = 1;
263 rpcrdma_update_connect_private(xprt, &event->param.conn); 301 rpcrdma_update_connect_private(r_xprt, &event->param.conn);
264 goto connected; 302 wake_up_all(&ep->rep_connect_wait);
303 break;
265 case RDMA_CM_EVENT_CONNECT_ERROR: 304 case RDMA_CM_EVENT_CONNECT_ERROR:
266 connstate = -ENOTCONN; 305 ep->rep_connected = -ENOTCONN;
267 goto connected; 306 goto disconnected;
268 case RDMA_CM_EVENT_UNREACHABLE: 307 case RDMA_CM_EVENT_UNREACHABLE:
269 connstate = -ENETUNREACH; 308 ep->rep_connected = -ENETUNREACH;
270 goto connected; 309 goto disconnected;
271 case RDMA_CM_EVENT_REJECTED: 310 case RDMA_CM_EVENT_REJECTED:
272 dprintk("rpcrdma: connection to %s:%s rejected: %s\n", 311 dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
273 rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt), 312 rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
274 rdma_reject_msg(id, event->status)); 313 rdma_reject_msg(id, event->status));
275 connstate = -ECONNREFUSED; 314 ep->rep_connected = -ECONNREFUSED;
276 if (event->status == IB_CM_REJ_STALE_CONN) 315 if (event->status == IB_CM_REJ_STALE_CONN)
277 connstate = -EAGAIN; 316 ep->rep_connected = -EAGAIN;
278 goto connected; 317 goto disconnected;
279 case RDMA_CM_EVENT_DISCONNECTED: 318 case RDMA_CM_EVENT_DISCONNECTED:
280 ++xprt->rx_xprt.connect_cookie; 319 ++xprt->connect_cookie;
281 connstate = -ECONNABORTED; 320 ep->rep_connected = -ECONNABORTED;
282connected: 321disconnected:
283 ep->rep_connected = connstate; 322 xprt_force_disconnect(xprt);
284 rpcrdma_conn_func(ep);
285 wake_up_all(&ep->rep_connect_wait); 323 wake_up_all(&ep->rep_connect_wait);
286 /*FALLTHROUGH*/ 324 break;
287 default: 325 default:
288 dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n",
289 __func__,
290 rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
291 ia->ri_device->name, ia->ri_ops->ro_displayname,
292 ep, rdma_event_msg(event->event));
293 break; 326 break;
294 } 327 }
295 328
329 dprintk("RPC: %s: %s:%s on %s/%s: %s\n", __func__,
330 rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
331 ia->ri_device->name, ia->ri_ops->ro_displayname,
332 rdma_event_msg(event->event));
296 return 0; 333 return 0;
297} 334}
298 335
@@ -308,7 +345,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
308 init_completion(&ia->ri_done); 345 init_completion(&ia->ri_done);
309 init_completion(&ia->ri_remove_done); 346 init_completion(&ia->ri_remove_done);
310 347
311 id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall, 348 id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler,
312 xprt, RDMA_PS_TCP, IB_QPT_RC); 349 xprt, RDMA_PS_TCP, IB_QPT_RC);
313 if (IS_ERR(id)) { 350 if (IS_ERR(id)) {
314 rc = PTR_ERR(id); 351 rc = PTR_ERR(id);
@@ -519,7 +556,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
519 if (rc) 556 if (rc)
520 return rc; 557 return rc;
521 558
522 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; 559 ep->rep_attr.event_handler = rpcrdma_qp_event_handler;
523 ep->rep_attr.qp_context = ep; 560 ep->rep_attr.qp_context = ep;
524 ep->rep_attr.srq = NULL; 561 ep->rep_attr.srq = NULL;
525 ep->rep_attr.cap.max_send_sge = max_sge; 562 ep->rep_attr.cap.max_send_sge = max_sge;
@@ -542,7 +579,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
542 cdata->max_requests >> 2); 579 cdata->max_requests >> 2);
543 ep->rep_send_count = ep->rep_send_batch; 580 ep->rep_send_count = ep->rep_send_batch;
544 init_waitqueue_head(&ep->rep_connect_wait); 581 init_waitqueue_head(&ep->rep_connect_wait);
545 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); 582 INIT_DELAYED_WORK(&ep->rep_disconnect_worker,
583 rpcrdma_disconnect_worker);
546 584
547 sendcq = ib_alloc_cq(ia->ri_device, NULL, 585 sendcq = ib_alloc_cq(ia->ri_device, NULL,
548 ep->rep_attr.cap.max_send_wr + 1, 586 ep->rep_attr.cap.max_send_wr + 1,
@@ -615,7 +653,7 @@ out1:
615void 653void
616rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) 654rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
617{ 655{
618 cancel_delayed_work_sync(&ep->rep_connect_worker); 656 cancel_delayed_work_sync(&ep->rep_disconnect_worker);
619 657
620 if (ia->ri_id && ia->ri_id->qp) { 658 if (ia->ri_id && ia->ri_id->qp) {
621 rpcrdma_ep_disconnect(ep, ia); 659 rpcrdma_ep_disconnect(ep, ia);
@@ -728,6 +766,7 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
728{ 766{
729 struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, 767 struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
730 rx_ia); 768 rx_ia);
769 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
731 int rc; 770 int rc;
732 771
733retry: 772retry:
@@ -754,6 +793,8 @@ retry:
754 } 793 }
755 794
756 ep->rep_connected = 0; 795 ep->rep_connected = 0;
796 xprt_clear_connected(xprt);
797
757 rpcrdma_post_recvs(r_xprt, true); 798 rpcrdma_post_recvs(r_xprt, true);
758 799
759 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); 800 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
@@ -877,7 +918,6 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
877 sc->sc_xprt = r_xprt; 918 sc->sc_xprt = r_xprt;
878 buf->rb_sc_ctxs[i] = sc; 919 buf->rb_sc_ctxs[i] = sc;
879 } 920 }
880 buf->rb_flags = 0;
881 921
882 return 0; 922 return 0;
883 923
@@ -978,39 +1018,6 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
978} 1018}
979 1019
980static void 1020static void
981rpcrdma_mr_recovery_worker(struct work_struct *work)
982{
983 struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
984 rb_recovery_worker.work);
985 struct rpcrdma_mr *mr;
986
987 spin_lock(&buf->rb_recovery_lock);
988 while (!list_empty(&buf->rb_stale_mrs)) {
989 mr = rpcrdma_mr_pop(&buf->rb_stale_mrs);
990 spin_unlock(&buf->rb_recovery_lock);
991
992 trace_xprtrdma_recover_mr(mr);
993 mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr);
994
995 spin_lock(&buf->rb_recovery_lock);
996 }
997 spin_unlock(&buf->rb_recovery_lock);
998}
999
1000void
1001rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr)
1002{
1003 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
1004 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1005
1006 spin_lock(&buf->rb_recovery_lock);
1007 rpcrdma_mr_push(mr, &buf->rb_stale_mrs);
1008 spin_unlock(&buf->rb_recovery_lock);
1009
1010 schedule_delayed_work(&buf->rb_recovery_worker, 0);
1011}
1012
1013static void
1014rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) 1021rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
1015{ 1022{
1016 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1023 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
@@ -1019,7 +1026,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
1019 LIST_HEAD(free); 1026 LIST_HEAD(free);
1020 LIST_HEAD(all); 1027 LIST_HEAD(all);
1021 1028
1022 for (count = 0; count < 3; count++) { 1029 for (count = 0; count < ia->ri_max_segs; count++) {
1023 struct rpcrdma_mr *mr; 1030 struct rpcrdma_mr *mr;
1024 int rc; 1031 int rc;
1025 1032
@@ -1138,18 +1145,15 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1138 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1145 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1139 int i, rc; 1146 int i, rc;
1140 1147
1148 buf->rb_flags = 0;
1141 buf->rb_max_requests = r_xprt->rx_data.max_requests; 1149 buf->rb_max_requests = r_xprt->rx_data.max_requests;
1142 buf->rb_bc_srv_max_requests = 0; 1150 buf->rb_bc_srv_max_requests = 0;
1143 spin_lock_init(&buf->rb_mrlock); 1151 spin_lock_init(&buf->rb_mrlock);
1144 spin_lock_init(&buf->rb_lock); 1152 spin_lock_init(&buf->rb_lock);
1145 spin_lock_init(&buf->rb_recovery_lock);
1146 INIT_LIST_HEAD(&buf->rb_mrs); 1153 INIT_LIST_HEAD(&buf->rb_mrs);
1147 INIT_LIST_HEAD(&buf->rb_all); 1154 INIT_LIST_HEAD(&buf->rb_all);
1148 INIT_LIST_HEAD(&buf->rb_stale_mrs);
1149 INIT_DELAYED_WORK(&buf->rb_refresh_worker, 1155 INIT_DELAYED_WORK(&buf->rb_refresh_worker,
1150 rpcrdma_mr_refresh_worker); 1156 rpcrdma_mr_refresh_worker);
1151 INIT_DELAYED_WORK(&buf->rb_recovery_worker,
1152 rpcrdma_mr_recovery_worker);
1153 1157
1154 rpcrdma_mrs_create(r_xprt); 1158 rpcrdma_mrs_create(r_xprt);
1155 1159
@@ -1233,7 +1237,6 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
1233void 1237void
1234rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1238rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1235{ 1239{
1236 cancel_delayed_work_sync(&buf->rb_recovery_worker);
1237 cancel_delayed_work_sync(&buf->rb_refresh_worker); 1240 cancel_delayed_work_sync(&buf->rb_refresh_worker);
1238 1241
1239 rpcrdma_sendctxs_destroy(buf); 1242 rpcrdma_sendctxs_destroy(buf);
@@ -1326,7 +1329,7 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
1326{ 1329{
1327 struct rpcrdma_xprt *r_xprt = mr->mr_xprt; 1330 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
1328 1331
1329 trace_xprtrdma_dma_unmap(mr); 1332 trace_xprtrdma_mr_unmap(mr);
1330 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, 1333 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
1331 mr->mr_sg, mr->mr_nents, mr->mr_dir); 1334 mr->mr_sg, mr->mr_nents, mr->mr_dir);
1332 __rpcrdma_mr_put(&r_xprt->rx_buf, mr); 1335 __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
@@ -1518,9 +1521,11 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
1518 struct ib_recv_wr *wr, *bad_wr; 1521 struct ib_recv_wr *wr, *bad_wr;
1519 int needed, count, rc; 1522 int needed, count, rc;
1520 1523
1524 rc = 0;
1525 count = 0;
1521 needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); 1526 needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
1522 if (buf->rb_posted_receives > needed) 1527 if (buf->rb_posted_receives > needed)
1523 return; 1528 goto out;
1524 needed -= buf->rb_posted_receives; 1529 needed -= buf->rb_posted_receives;
1525 1530
1526 count = 0; 1531 count = 0;
@@ -1556,7 +1561,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
1556 --needed; 1561 --needed;
1557 } 1562 }
1558 if (!count) 1563 if (!count)
1559 return; 1564 goto out;
1560 1565
1561 rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, 1566 rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr,
1562 (const struct ib_recv_wr **)&bad_wr); 1567 (const struct ib_recv_wr **)&bad_wr);
@@ -1570,5 +1575,6 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
1570 } 1575 }
1571 } 1576 }
1572 buf->rb_posted_receives += count; 1577 buf->rb_posted_receives += count;
1578out:
1573 trace_xprtrdma_post_recvs(r_xprt, count, rc); 1579 trace_xprtrdma_post_recvs(r_xprt, count, rc);
1574} 1580}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 2ca14f7c2d51..a13ccb643ce0 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -101,7 +101,7 @@ struct rpcrdma_ep {
101 wait_queue_head_t rep_connect_wait; 101 wait_queue_head_t rep_connect_wait;
102 struct rpcrdma_connect_private rep_cm_private; 102 struct rpcrdma_connect_private rep_cm_private;
103 struct rdma_conn_param rep_remote_cma; 103 struct rdma_conn_param rep_remote_cma;
104 struct delayed_work rep_connect_worker; 104 struct delayed_work rep_disconnect_worker;
105}; 105};
106 106
107/* Pre-allocate extra Work Requests for handling backward receives 107/* Pre-allocate extra Work Requests for handling backward receives
@@ -280,6 +280,7 @@ struct rpcrdma_mr {
280 u32 mr_handle; 280 u32 mr_handle;
281 u32 mr_length; 281 u32 mr_length;
282 u64 mr_offset; 282 u64 mr_offset;
283 struct work_struct mr_recycle;
283 struct list_head mr_all; 284 struct list_head mr_all;
284}; 285};
285 286
@@ -411,9 +412,6 @@ struct rpcrdma_buffer {
411 412
412 u32 rb_bc_max_requests; 413 u32 rb_bc_max_requests;
413 414
414 spinlock_t rb_recovery_lock; /* protect rb_stale_mrs */
415 struct list_head rb_stale_mrs;
416 struct delayed_work rb_recovery_worker;
417 struct delayed_work rb_refresh_worker; 415 struct delayed_work rb_refresh_worker;
418}; 416};
419#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) 417#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
@@ -452,7 +450,7 @@ struct rpcrdma_stats {
452 unsigned long hardway_register_count; 450 unsigned long hardway_register_count;
453 unsigned long failed_marshal_count; 451 unsigned long failed_marshal_count;
454 unsigned long bad_reply_count; 452 unsigned long bad_reply_count;
455 unsigned long mrs_recovered; 453 unsigned long mrs_recycled;
456 unsigned long mrs_orphaned; 454 unsigned long mrs_orphaned;
457 unsigned long mrs_allocated; 455 unsigned long mrs_allocated;
458 unsigned long empty_sendctx_q; 456 unsigned long empty_sendctx_q;
@@ -481,7 +479,6 @@ struct rpcrdma_memreg_ops {
481 struct list_head *mrs); 479 struct list_head *mrs);
482 void (*ro_unmap_sync)(struct rpcrdma_xprt *, 480 void (*ro_unmap_sync)(struct rpcrdma_xprt *,
483 struct list_head *); 481 struct list_head *);
484 void (*ro_recover_mr)(struct rpcrdma_mr *mr);
485 int (*ro_open)(struct rpcrdma_ia *, 482 int (*ro_open)(struct rpcrdma_ia *,
486 struct rpcrdma_ep *, 483 struct rpcrdma_ep *,
487 struct rpcrdma_create_data_internal *); 484 struct rpcrdma_create_data_internal *);
@@ -559,7 +556,6 @@ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
559 struct rpcrdma_create_data_internal *); 556 struct rpcrdma_create_data_internal *);
560void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); 557void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
561int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); 558int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
562void rpcrdma_conn_func(struct rpcrdma_ep *ep);
563void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); 559void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
564 560
565int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, 561int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
@@ -578,7 +574,12 @@ struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
578struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); 574struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
579void rpcrdma_mr_put(struct rpcrdma_mr *mr); 575void rpcrdma_mr_put(struct rpcrdma_mr *mr);
580void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr); 576void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
581void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr); 577
578static inline void
579rpcrdma_mr_recycle(struct rpcrdma_mr *mr)
580{
581 schedule_work(&mr->mr_recycle);
582}
582 583
583struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); 584struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
584void rpcrdma_buffer_put(struct rpcrdma_req *); 585void rpcrdma_buffer_put(struct rpcrdma_req *);
@@ -652,7 +653,6 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
652extern unsigned int xprt_rdma_max_inline_read; 653extern unsigned int xprt_rdma_max_inline_read;
653void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); 654void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
654void xprt_rdma_free_addresses(struct rpc_xprt *xprt); 655void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
655void rpcrdma_connect_worker(struct work_struct *work);
656void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); 656void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
657int xprt_rdma_init(void); 657int xprt_rdma_init(void);
658void xprt_rdma_cleanup(void); 658void xprt_rdma_cleanup(void);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9bb86cd3ee56..1b51e04d3566 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1452,6 +1452,9 @@ static void xs_tcp_state_change(struct sock *sk)
1452 clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); 1452 clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
1453 xprt_clear_connecting(xprt); 1453 xprt_clear_connecting(xprt);
1454 1454
1455 xprt->stat.connect_count++;
1456 xprt->stat.connect_time += (long)jiffies -
1457 xprt->stat.connect_start;
1455 xprt_wake_pending_tasks(xprt, -EAGAIN); 1458 xprt_wake_pending_tasks(xprt, -EAGAIN);
1456 } 1459 }
1457 spin_unlock(&xprt->transport_lock); 1460 spin_unlock(&xprt->transport_lock);
@@ -1909,6 +1912,9 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
1909 case 0: 1912 case 0:
1910 dprintk("RPC: xprt %p connected to %s\n", 1913 dprintk("RPC: xprt %p connected to %s\n",
1911 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); 1914 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
1915 xprt->stat.connect_count++;
1916 xprt->stat.connect_time += (long)jiffies -
1917 xprt->stat.connect_start;
1912 xprt_set_connected(xprt); 1918 xprt_set_connected(xprt);
1913 case -ENOBUFS: 1919 case -ENOBUFS:
1914 break; 1920 break;
@@ -2409,7 +2415,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
2409 "%llu %llu %lu %llu %llu\n", 2415 "%llu %llu %lu %llu %llu\n",
2410 xprt->stat.bind_count, 2416 xprt->stat.bind_count,
2411 xprt->stat.connect_count, 2417 xprt->stat.connect_count,
2412 xprt->stat.connect_time, 2418 xprt->stat.connect_time / HZ,
2413 idle_time, 2419 idle_time,
2414 xprt->stat.sends, 2420 xprt->stat.sends,
2415 xprt->stat.recvs, 2421 xprt->stat.recvs,
@@ -2464,7 +2470,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
2464 transport->srcport, 2470 transport->srcport,
2465 xprt->stat.bind_count, 2471 xprt->stat.bind_count,
2466 xprt->stat.connect_count, 2472 xprt->stat.connect_count,
2467 xprt->stat.connect_time, 2473 xprt->stat.connect_time / HZ,
2468 idle_time, 2474 idle_time,
2469 xprt->stat.sends, 2475 xprt->stat.sends,
2470 xprt->stat.recvs, 2476 xprt->stat.recvs,