aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2016-06-29 13:52:54 -0400
committerAnna Schumaker <Anna.Schumaker@Netapp.com>2016-07-11 15:50:43 -0400
commit505bbe64dd04b105c1377703252758ac56f92485 (patch)
treee76e834799250f27b8daf758e785d07908883798 /net
parentfcdfb968a706b0e80b12832bc30387ee9e0a759e (diff)
xprtrdma: Refactor MR recovery work queues
I found that commit ead3f26e359e ("xprtrdma: Add ro_unmap_safe memreg method"), which introduces ro_unmap_safe, never wired up the FMR recovery worker. The FMR and FRWR recovery work queues both do the same thing. Instead of setting up separate individual work queues for this, schedule a delayed worker to deal with them, since recovering MRs is not performance-critical. Fixes: ead3f26e359e ("xprtrdma: Add ro_unmap_safe memreg method") Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Tested-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c147
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c82
-rw-r--r--net/sunrpc/xprtrdma/transport.c16
-rw-r--r--net/sunrpc/xprtrdma/verbs.c43
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h13
5 files changed, 135 insertions, 166 deletions
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index df5fe1786105..4837ced20b65 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -19,13 +19,6 @@
19 * verb (fmr_op_unmap). 19 * verb (fmr_op_unmap).
20 */ 20 */
21 21
22/* Transport recovery
23 *
24 * After a transport reconnect, fmr_op_map re-uses the MR already
25 * allocated for the RPC, but generates a fresh rkey then maps the
26 * MR again. This process is synchronous.
27 */
28
29#include "xprt_rdma.h" 22#include "xprt_rdma.h"
30 23
31#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 24#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -41,30 +34,6 @@ enum {
41 IB_ACCESS_REMOTE_READ, 34 IB_ACCESS_REMOTE_READ,
42}; 35};
43 36
44static struct workqueue_struct *fmr_recovery_wq;
45
46#define FMR_RECOVERY_WQ_FLAGS (WQ_UNBOUND)
47
48int
49fmr_alloc_recovery_wq(void)
50{
51 fmr_recovery_wq = alloc_workqueue("fmr_recovery", WQ_UNBOUND, 0);
52 return !fmr_recovery_wq ? -ENOMEM : 0;
53}
54
55void
56fmr_destroy_recovery_wq(void)
57{
58 struct workqueue_struct *wq;
59
60 if (!fmr_recovery_wq)
61 return;
62
63 wq = fmr_recovery_wq;
64 fmr_recovery_wq = NULL;
65 destroy_workqueue(wq);
66}
67
68static int 37static int
69__fmr_init(struct rpcrdma_mw *mw, struct ib_pd *pd) 38__fmr_init(struct rpcrdma_mw *mw, struct ib_pd *pd)
70{ 39{
@@ -116,65 +85,55 @@ __fmr_unmap(struct rpcrdma_mw *mw)
116} 85}
117 86
118static void 87static void
119__fmr_dma_unmap(struct rpcrdma_mw *mw)
120{
121 struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
122
123 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
124 mw->mw_sg, mw->mw_nents, mw->mw_dir);
125 rpcrdma_put_mw(r_xprt, mw);
126}
127
128static void
129__fmr_reset_and_unmap(struct rpcrdma_mw *mw)
130{
131 int rc;
132
133 /* ORDER */
134 rc = __fmr_unmap(mw);
135 if (rc) {
136 pr_warn("rpcrdma: ib_unmap_fmr status %d, fmr %p orphaned\n",
137 rc, mw);
138 return;
139 }
140 __fmr_dma_unmap(mw);
141}
142
143static void
144__fmr_release(struct rpcrdma_mw *r) 88__fmr_release(struct rpcrdma_mw *r)
145{ 89{
90 LIST_HEAD(unmap_list);
146 int rc; 91 int rc;
147 92
148 kfree(r->fmr.fm_physaddrs); 93 kfree(r->fmr.fm_physaddrs);
149 kfree(r->mw_sg); 94 kfree(r->mw_sg);
150 95
96 /* In case this one was left mapped, try to unmap it
97 * to prevent dealloc_fmr from failing with EBUSY
98 */
99 rc = __fmr_unmap(r);
100 if (rc)
101 pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
102 r, rc);
103
151 rc = ib_dealloc_fmr(r->fmr.fm_mr); 104 rc = ib_dealloc_fmr(r->fmr.fm_mr);
152 if (rc) 105 if (rc)
153 pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", 106 pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
154 r, rc); 107 r, rc);
155} 108}
156 109
157/* Deferred reset of a single FMR. Generate a fresh rkey by 110/* Reset of a single FMR.
158 * replacing the MR. There's no recovery if this fails. 111 *
112 * There's no recovery if this fails. The FMR is abandoned, but
113 * remains in rb_all. It will be cleaned up when the transport is
114 * destroyed.
159 */ 115 */
160static void 116static void
161__fmr_recovery_worker(struct work_struct *work) 117fmr_op_recover_mr(struct rpcrdma_mw *mw)
162{ 118{
163 struct rpcrdma_mw *mw = container_of(work, struct rpcrdma_mw, 119 struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
164 mw_work); 120 int rc;
165 121
166 __fmr_reset_and_unmap(mw); 122 /* ORDER: invalidate first */
167 return; 123 rc = __fmr_unmap(mw);
168}
169 124
170/* A broken MR was discovered in a context that can't sleep. 125 /* ORDER: then DMA unmap */
171 * Defer recovery to the recovery worker. 126 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
172 */ 127 mw->mw_sg, mw->mw_nents, mw->mw_dir);
173static void 128 if (rc) {
174__fmr_queue_recovery(struct rpcrdma_mw *mw) 129 pr_err("rpcrdma: FMR reset status %d, %p orphaned\n",
175{ 130 rc, mw);
176 INIT_WORK(&mw->mw_work, __fmr_recovery_worker); 131 r_xprt->rx_stats.mrs_orphaned++;
177 queue_work(fmr_recovery_wq, &mw->mw_work); 132 return;
133 }
134
135 rpcrdma_put_mw(r_xprt, mw);
136 r_xprt->rx_stats.mrs_recovered++;
178} 137}
179 138
180static int 139static int
@@ -245,16 +204,11 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
245 204
246 mw = seg1->rl_mw; 205 mw = seg1->rl_mw;
247 seg1->rl_mw = NULL; 206 seg1->rl_mw = NULL;
248 if (!mw) { 207 if (mw)
249 mw = rpcrdma_get_mw(r_xprt); 208 rpcrdma_defer_mr_recovery(mw);
250 if (!mw) 209 mw = rpcrdma_get_mw(r_xprt);
251 return -ENOMEM; 210 if (!mw)
252 } else { 211 return -ENOMEM;
253 /* this is a retransmit; generate a fresh rkey */
254 rc = __fmr_unmap(mw);
255 if (rc)
256 return rc;
257 }
258 212
259 pageoff = offset_in_page(seg1->mr_offset); 213 pageoff = offset_in_page(seg1->mr_offset);
260 seg1->mr_offset -= pageoff; /* start of page */ 214 seg1->mr_offset -= pageoff; /* start of page */
@@ -309,7 +263,7 @@ out_maperr:
309 pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", 263 pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
310 len, (unsigned long long)dma_pages[0], 264 len, (unsigned long long)dma_pages[0],
311 pageoff, mw->mw_nents, rc); 265 pageoff, mw->mw_nents, rc);
312 __fmr_dma_unmap(mw); 266 rpcrdma_defer_mr_recovery(mw);
313 return rc; 267 return rc;
314} 268}
315 269
@@ -332,7 +286,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
332 /* ORDER: Invalidate all of the req's MRs first 286 /* ORDER: Invalidate all of the req's MRs first
333 * 287 *
334 * ib_unmap_fmr() is slow, so use a single call instead 288 * ib_unmap_fmr() is slow, so use a single call instead
335 * of one call per mapped MR. 289 * of one call per mapped FMR.
336 */ 290 */
337 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { 291 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
338 seg = &req->rl_segments[i]; 292 seg = &req->rl_segments[i];
@@ -344,7 +298,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
344 } 298 }
345 rc = ib_unmap_fmr(&unmap_list); 299 rc = ib_unmap_fmr(&unmap_list);
346 if (rc) 300 if (rc)
347 pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc); 301 goto out_reset;
348 302
349 /* ORDER: Now DMA unmap all of the req's MRs, and return 303 /* ORDER: Now DMA unmap all of the req's MRs, and return
350 * them to the free MW list. 304 * them to the free MW list.
@@ -354,7 +308,9 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
354 mw = seg->rl_mw; 308 mw = seg->rl_mw;
355 309
356 list_del_init(&mw->fmr.fm_mr->list); 310 list_del_init(&mw->fmr.fm_mr->list);
357 __fmr_dma_unmap(mw); 311 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
312 mw->mw_sg, mw->mw_nents, mw->mw_dir);
313 rpcrdma_put_mw(r_xprt, mw);
358 314
359 i += seg->mr_nsegs; 315 i += seg->mr_nsegs;
360 seg->mr_nsegs = 0; 316 seg->mr_nsegs = 0;
@@ -362,6 +318,20 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
362 } 318 }
363 319
364 req->rl_nchunks = 0; 320 req->rl_nchunks = 0;
321 return;
322
323out_reset:
324 pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
325
326 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
327 seg = &req->rl_segments[i];
328 mw = seg->rl_mw;
329
330 list_del_init(&mw->fmr.fm_mr->list);
331 fmr_op_recover_mr(mw);
332
333 i += seg->mr_nsegs;
334 }
365} 335}
366 336
367/* Use a slow, safe mechanism to invalidate all memory regions 337/* Use a slow, safe mechanism to invalidate all memory regions
@@ -380,9 +350,9 @@ fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
380 mw = seg->rl_mw; 350 mw = seg->rl_mw;
381 351
382 if (sync) 352 if (sync)
383 __fmr_reset_and_unmap(mw); 353 fmr_op_recover_mr(mw);
384 else 354 else
385 __fmr_queue_recovery(mw); 355 rpcrdma_defer_mr_recovery(mw);
386 356
387 i += seg->mr_nsegs; 357 i += seg->mr_nsegs;
388 seg->mr_nsegs = 0; 358 seg->mr_nsegs = 0;
@@ -407,6 +377,7 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
407 .ro_map = fmr_op_map, 377 .ro_map = fmr_op_map,
408 .ro_unmap_sync = fmr_op_unmap_sync, 378 .ro_unmap_sync = fmr_op_unmap_sync,
409 .ro_unmap_safe = fmr_op_unmap_safe, 379 .ro_unmap_safe = fmr_op_unmap_safe,
380 .ro_recover_mr = fmr_op_recover_mr,
410 .ro_open = fmr_op_open, 381 .ro_open = fmr_op_open,
411 .ro_maxpages = fmr_op_maxpages, 382 .ro_maxpages = fmr_op_maxpages,
412 .ro_init = fmr_op_init, 383 .ro_init = fmr_op_init,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 9cd60bf0917d..cbb2d05be57f 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -73,31 +73,6 @@
73# define RPCDBG_FACILITY RPCDBG_TRANS 73# define RPCDBG_FACILITY RPCDBG_TRANS
74#endif 74#endif
75 75
76static struct workqueue_struct *frwr_recovery_wq;
77
78#define FRWR_RECOVERY_WQ_FLAGS (WQ_UNBOUND | WQ_MEM_RECLAIM)
79
80int
81frwr_alloc_recovery_wq(void)
82{
83 frwr_recovery_wq = alloc_workqueue("frwr_recovery",
84 FRWR_RECOVERY_WQ_FLAGS, 0);
85 return !frwr_recovery_wq ? -ENOMEM : 0;
86}
87
88void
89frwr_destroy_recovery_wq(void)
90{
91 struct workqueue_struct *wq;
92
93 if (!frwr_recovery_wq)
94 return;
95
96 wq = frwr_recovery_wq;
97 frwr_recovery_wq = NULL;
98 destroy_workqueue(wq);
99}
100
101static int 76static int
102__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, unsigned int depth) 77__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, unsigned int depth)
103{ 78{
@@ -168,8 +143,14 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
168 return 0; 143 return 0;
169} 144}
170 145
146/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR.
147 *
148 * There's no recovery if this fails. The FRMR is abandoned, but
149 * remains in rb_all. It will be cleaned up when the transport is
150 * destroyed.
151 */
171static void 152static void
172__frwr_reset_and_unmap(struct rpcrdma_mw *mw) 153frwr_op_recover_mr(struct rpcrdma_mw *mw)
173{ 154{
174 struct rpcrdma_xprt *r_xprt = mw->mw_xprt; 155 struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
175 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 156 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -177,35 +158,15 @@ __frwr_reset_and_unmap(struct rpcrdma_mw *mw)
177 158
178 rc = __frwr_reset_mr(ia, mw); 159 rc = __frwr_reset_mr(ia, mw);
179 ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); 160 ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir);
180 if (rc) 161 if (rc) {
162 pr_err("rpcrdma: FRMR reset status %d, %p orphaned\n",
163 rc, mw);
164 r_xprt->rx_stats.mrs_orphaned++;
181 return; 165 return;
182 rpcrdma_put_mw(r_xprt, mw); 166 }
183}
184
185/* Deferred reset of a single FRMR. Generate a fresh rkey by
186 * replacing the MR.
187 *
188 * There's no recovery if this fails. The FRMR is abandoned, but
189 * remains in rb_all. It will be cleaned up when the transport is
190 * destroyed.
191 */
192static void
193__frwr_recovery_worker(struct work_struct *work)
194{
195 struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw,
196 mw_work);
197
198 __frwr_reset_and_unmap(r);
199}
200 167
201/* A broken MR was discovered in a context that can't sleep. 168 rpcrdma_put_mw(r_xprt, mw);
202 * Defer recovery to the recovery worker. 169 r_xprt->rx_stats.mrs_recovered++;
203 */
204static void
205__frwr_queue_recovery(struct rpcrdma_mw *r)
206{
207 INIT_WORK(&r->mw_work, __frwr_recovery_worker);
208 queue_work(frwr_recovery_wq, &r->mw_work);
209} 170}
210 171
211static int 172static int
@@ -401,7 +362,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
401 seg1->rl_mw = NULL; 362 seg1->rl_mw = NULL;
402 do { 363 do {
403 if (mw) 364 if (mw)
404 __frwr_queue_recovery(mw); 365 rpcrdma_defer_mr_recovery(mw);
405 mw = rpcrdma_get_mw(r_xprt); 366 mw = rpcrdma_get_mw(r_xprt);
406 if (!mw) 367 if (!mw)
407 return -ENOMEM; 368 return -ENOMEM;
@@ -483,12 +444,11 @@ out_mapmr_err:
483 pr_err("rpcrdma: failed to map mr %p (%u/%u)\n", 444 pr_err("rpcrdma: failed to map mr %p (%u/%u)\n",
484 frmr->fr_mr, n, mw->mw_nents); 445 frmr->fr_mr, n, mw->mw_nents);
485 rc = n < 0 ? n : -EIO; 446 rc = n < 0 ? n : -EIO;
486 __frwr_queue_recovery(mw); 447 rpcrdma_defer_mr_recovery(mw);
487 return rc; 448 return rc;
488 449
489out_senderr: 450out_senderr:
490 pr_err("rpcrdma: ib_post_send status %i\n", rc); 451 rpcrdma_defer_mr_recovery(mw);
491 __frwr_queue_recovery(mw);
492 return rc; 452 return rc;
493} 453}
494 454
@@ -627,9 +587,9 @@ frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
627 mw = seg->rl_mw; 587 mw = seg->rl_mw;
628 588
629 if (sync) 589 if (sync)
630 __frwr_reset_and_unmap(mw); 590 frwr_op_recover_mr(mw);
631 else 591 else
632 __frwr_queue_recovery(mw); 592 rpcrdma_defer_mr_recovery(mw);
633 593
634 i += seg->mr_nsegs; 594 i += seg->mr_nsegs;
635 seg->mr_nsegs = 0; 595 seg->mr_nsegs = 0;
@@ -642,9 +602,6 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)
642{ 602{
643 struct rpcrdma_mw *r; 603 struct rpcrdma_mw *r;
644 604
645 /* Ensure stale MWs for "buf" are no longer in flight */
646 flush_workqueue(frwr_recovery_wq);
647
648 while (!list_empty(&buf->rb_all)) { 605 while (!list_empty(&buf->rb_all)) {
649 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); 606 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
650 list_del(&r->mw_all); 607 list_del(&r->mw_all);
@@ -657,6 +614,7 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
657 .ro_map = frwr_op_map, 614 .ro_map = frwr_op_map,
658 .ro_unmap_sync = frwr_op_unmap_sync, 615 .ro_unmap_sync = frwr_op_unmap_sync,
659 .ro_unmap_safe = frwr_op_unmap_safe, 616 .ro_unmap_safe = frwr_op_unmap_safe,
617 .ro_recover_mr = frwr_op_recover_mr,
660 .ro_open = frwr_op_open, 618 .ro_open = frwr_op_open,
661 .ro_maxpages = frwr_op_maxpages, 619 .ro_maxpages = frwr_op_maxpages,
662 .ro_init = frwr_op_init, 620 .ro_init = frwr_op_init,
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 99d2e5b72726..4c8e7f11b906 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -660,7 +660,7 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
660 xprt->stat.bad_xids, 660 xprt->stat.bad_xids,
661 xprt->stat.req_u, 661 xprt->stat.req_u,
662 xprt->stat.bklog_u); 662 xprt->stat.bklog_u);
663 seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %lu %lu %lu %lu\n", 663 seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %lu %lu %lu %lu ",
664 r_xprt->rx_stats.read_chunk_count, 664 r_xprt->rx_stats.read_chunk_count,
665 r_xprt->rx_stats.write_chunk_count, 665 r_xprt->rx_stats.write_chunk_count,
666 r_xprt->rx_stats.reply_chunk_count, 666 r_xprt->rx_stats.reply_chunk_count,
@@ -672,6 +672,9 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
672 r_xprt->rx_stats.failed_marshal_count, 672 r_xprt->rx_stats.failed_marshal_count,
673 r_xprt->rx_stats.bad_reply_count, 673 r_xprt->rx_stats.bad_reply_count,
674 r_xprt->rx_stats.nomsg_call_count); 674 r_xprt->rx_stats.nomsg_call_count);
675 seq_printf(seq, "%lu %lu\n",
676 r_xprt->rx_stats.mrs_recovered,
677 r_xprt->rx_stats.mrs_orphaned);
675} 678}
676 679
677static int 680static int
@@ -741,7 +744,6 @@ void xprt_rdma_cleanup(void)
741 __func__, rc); 744 __func__, rc);
742 745
743 rpcrdma_destroy_wq(); 746 rpcrdma_destroy_wq();
744 frwr_destroy_recovery_wq();
745 747
746 rc = xprt_unregister_transport(&xprt_rdma_bc); 748 rc = xprt_unregister_transport(&xprt_rdma_bc);
747 if (rc) 749 if (rc)
@@ -753,20 +755,13 @@ int xprt_rdma_init(void)
753{ 755{
754 int rc; 756 int rc;
755 757
756 rc = frwr_alloc_recovery_wq();
757 if (rc)
758 return rc;
759
760 rc = rpcrdma_alloc_wq(); 758 rc = rpcrdma_alloc_wq();
761 if (rc) { 759 if (rc)
762 frwr_destroy_recovery_wq();
763 return rc; 760 return rc;
764 }
765 761
766 rc = xprt_register_transport(&xprt_rdma); 762 rc = xprt_register_transport(&xprt_rdma);
767 if (rc) { 763 if (rc) {
768 rpcrdma_destroy_wq(); 764 rpcrdma_destroy_wq();
769 frwr_destroy_recovery_wq();
770 return rc; 765 return rc;
771 } 766 }
772 767
@@ -774,7 +769,6 @@ int xprt_rdma_init(void)
774 if (rc) { 769 if (rc) {
775 xprt_unregister_transport(&xprt_rdma); 770 xprt_unregister_transport(&xprt_rdma);
776 rpcrdma_destroy_wq(); 771 rpcrdma_destroy_wq();
777 frwr_destroy_recovery_wq();
778 return rc; 772 return rc;
779 } 773 }
780 774
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index b044d98a1370..77a371d3cde8 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -777,6 +777,41 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
777 ib_drain_qp(ia->ri_id->qp); 777 ib_drain_qp(ia->ri_id->qp);
778} 778}
779 779
780static void
781rpcrdma_mr_recovery_worker(struct work_struct *work)
782{
783 struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
784 rb_recovery_worker.work);
785 struct rpcrdma_mw *mw;
786
787 spin_lock(&buf->rb_recovery_lock);
788 while (!list_empty(&buf->rb_stale_mrs)) {
789 mw = list_first_entry(&buf->rb_stale_mrs,
790 struct rpcrdma_mw, mw_list);
791 list_del_init(&mw->mw_list);
792 spin_unlock(&buf->rb_recovery_lock);
793
794 dprintk("RPC: %s: recovering MR %p\n", __func__, mw);
795 mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw);
796
797 spin_lock(&buf->rb_recovery_lock);
798 };
799 spin_unlock(&buf->rb_recovery_lock);
800}
801
802void
803rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw)
804{
805 struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
806 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
807
808 spin_lock(&buf->rb_recovery_lock);
809 list_add(&mw->mw_list, &buf->rb_stale_mrs);
810 spin_unlock(&buf->rb_recovery_lock);
811
812 schedule_delayed_work(&buf->rb_recovery_worker, 0);
813}
814
780struct rpcrdma_req * 815struct rpcrdma_req *
781rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) 816rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
782{ 817{
@@ -837,8 +872,12 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
837 872
838 buf->rb_max_requests = r_xprt->rx_data.max_requests; 873 buf->rb_max_requests = r_xprt->rx_data.max_requests;
839 buf->rb_bc_srv_max_requests = 0; 874 buf->rb_bc_srv_max_requests = 0;
840 spin_lock_init(&buf->rb_lock);
841 atomic_set(&buf->rb_credits, 1); 875 atomic_set(&buf->rb_credits, 1);
876 spin_lock_init(&buf->rb_lock);
877 spin_lock_init(&buf->rb_recovery_lock);
878 INIT_LIST_HEAD(&buf->rb_stale_mrs);
879 INIT_DELAYED_WORK(&buf->rb_recovery_worker,
880 rpcrdma_mr_recovery_worker);
842 881
843 rc = ia->ri_ops->ro_init(r_xprt); 882 rc = ia->ri_ops->ro_init(r_xprt);
844 if (rc) 883 if (rc)
@@ -923,6 +962,8 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
923{ 962{
924 struct rpcrdma_ia *ia = rdmab_to_ia(buf); 963 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
925 964
965 cancel_delayed_work_sync(&buf->rb_recovery_worker);
966
926 while (!list_empty(&buf->rb_recv_bufs)) { 967 while (!list_empty(&buf->rb_recv_bufs)) {
927 struct rpcrdma_rep *rep; 968 struct rpcrdma_rep *rep;
928 969
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 04696c046dc5..4e03037d042c 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -245,7 +245,6 @@ struct rpcrdma_mw {
245 struct rpcrdma_fmr fmr; 245 struct rpcrdma_fmr fmr;
246 struct rpcrdma_frmr frmr; 246 struct rpcrdma_frmr frmr;
247 }; 247 };
248 struct work_struct mw_work;
249 struct rpcrdma_xprt *mw_xprt; 248 struct rpcrdma_xprt *mw_xprt;
250 struct list_head mw_all; 249 struct list_head mw_all;
251}; 250};
@@ -341,6 +340,10 @@ struct rpcrdma_buffer {
341 struct list_head rb_allreqs; 340 struct list_head rb_allreqs;
342 341
343 u32 rb_bc_max_requests; 342 u32 rb_bc_max_requests;
343
344 spinlock_t rb_recovery_lock; /* protect rb_stale_mrs */
345 struct list_head rb_stale_mrs;
346 struct delayed_work rb_recovery_worker;
344}; 347};
345#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) 348#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
346 349
@@ -387,6 +390,8 @@ struct rpcrdma_stats {
387 unsigned long bad_reply_count; 390 unsigned long bad_reply_count;
388 unsigned long nomsg_call_count; 391 unsigned long nomsg_call_count;
389 unsigned long bcall_count; 392 unsigned long bcall_count;
393 unsigned long mrs_recovered;
394 unsigned long mrs_orphaned;
390}; 395};
391 396
392/* 397/*
@@ -400,6 +405,7 @@ struct rpcrdma_memreg_ops {
400 struct rpcrdma_req *); 405 struct rpcrdma_req *);
401 void (*ro_unmap_safe)(struct rpcrdma_xprt *, 406 void (*ro_unmap_safe)(struct rpcrdma_xprt *,
402 struct rpcrdma_req *, bool); 407 struct rpcrdma_req *, bool);
408 void (*ro_recover_mr)(struct rpcrdma_mw *);
403 int (*ro_open)(struct rpcrdma_ia *, 409 int (*ro_open)(struct rpcrdma_ia *,
404 struct rpcrdma_ep *, 410 struct rpcrdma_ep *,
405 struct rpcrdma_create_data_internal *); 411 struct rpcrdma_create_data_internal *);
@@ -477,6 +483,8 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
477void rpcrdma_recv_buffer_get(struct rpcrdma_req *); 483void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
478void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); 484void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
479 485
486void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
487
480struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, 488struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
481 size_t, gfp_t); 489 size_t, gfp_t);
482void rpcrdma_free_regbuf(struct rpcrdma_ia *, 490void rpcrdma_free_regbuf(struct rpcrdma_ia *,
@@ -484,9 +492,6 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
484 492
485int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); 493int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
486 494
487int frwr_alloc_recovery_wq(void);
488void frwr_destroy_recovery_wq(void);
489
490int rpcrdma_alloc_wq(void); 495int rpcrdma_alloc_wq(void);
491void rpcrdma_destroy_wq(void); 496void rpcrdma_destroy_wq(void);
492 497