diff options
author | Trond Myklebust <trond.myklebust@primarydata.com> | 2015-12-28 14:49:14 -0500 |
---|---|---|
committer | Trond Myklebust <trond.myklebust@primarydata.com> | 2015-12-28 14:49:14 -0500 |
commit | 8d0ed0ca6381fea4a97f3b0f085cb1dfa0a50ec0 (patch) | |
tree | 8b7a81597314dfb2b909ac44df21f07d8e5c8fcb /net | |
parent | d1358917f2eb530bc6a097937302282a428806f8 (diff) | |
parent | 26ae9d1c5af1b1d669ca1c28fc02bbca3d778d45 (diff) |
Merge tag 'nfs-rdma-4.5' of git://git.linux-nfs.org/projects/anna/nfs-rdma
NFS: NFSoRDMA Client Side Changes
These patches mostly fix send queue ordering issues inside the NFSoRDMA
client, but there are also two patches from Dan Carpenter fixing up smatch
warnings.
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
* tag 'nfs-rdma-4.5' of git://git.linux-nfs.org/projects/anna/nfs-rdma:
xprtrdma: Revert commit e7104a2a9606 ('xprtrdma: Cap req_cqinit').
xprtrdma: Invalidate in the RPC reply handler
xprtrdma: Add ro_unmap_sync method for all-physical registration
xprtrdma: Add ro_unmap_sync method for FMR
xprtrdma: Add ro_unmap_sync method for FRWR
xprtrdma: Introduce ro_unmap_sync method
xprtrdma: Move struct ib_send_wr off the stack
xprtrdma: Disable RPC/RDMA backchannel debugging messages
xprtrdma: xprt_rdma_free() must not release backchannel reqs
xprtrdma: Fix additional uses of spin_lock_irqsave(rb_lock)
xprtrdma: checking for NULL instead of IS_ERR()
xprtrdma: clean up some curly braces
Diffstat (limited to 'net')
-rw-r--r-- | net/sunrpc/xprtrdma/backchannel.c | 26 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/fmr_ops.c | 64 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/frwr_ops.c | 174 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/physical_ops.c | 13 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 16 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 3 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 16 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 14 |
8 files changed, 275 insertions, 51 deletions
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 2dcb44f69e53..cc1251d07297 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c | |||
@@ -15,7 +15,7 @@ | |||
15 | # define RPCDBG_FACILITY RPCDBG_TRANS | 15 | # define RPCDBG_FACILITY RPCDBG_TRANS |
16 | #endif | 16 | #endif |
17 | 17 | ||
18 | #define RPCRDMA_BACKCHANNEL_DEBUG | 18 | #undef RPCRDMA_BACKCHANNEL_DEBUG |
19 | 19 | ||
20 | static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, | 20 | static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, |
21 | struct rpc_rqst *rqst) | 21 | struct rpc_rqst *rqst) |
@@ -42,8 +42,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, | |||
42 | size_t size; | 42 | size_t size; |
43 | 43 | ||
44 | req = rpcrdma_create_req(r_xprt); | 44 | req = rpcrdma_create_req(r_xprt); |
45 | if (!req) | 45 | if (IS_ERR(req)) |
46 | return -ENOMEM; | 46 | return PTR_ERR(req); |
47 | req->rl_backchannel = true; | 47 | req->rl_backchannel = true; |
48 | 48 | ||
49 | size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); | 49 | size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); |
@@ -84,9 +84,7 @@ out_fail: | |||
84 | static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, | 84 | static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, |
85 | unsigned int count) | 85 | unsigned int count) |
86 | { | 86 | { |
87 | struct rpcrdma_buffer *buffers = &r_xprt->rx_buf; | ||
88 | struct rpcrdma_rep *rep; | 87 | struct rpcrdma_rep *rep; |
89 | unsigned long flags; | ||
90 | int rc = 0; | 88 | int rc = 0; |
91 | 89 | ||
92 | while (count--) { | 90 | while (count--) { |
@@ -98,9 +96,7 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, | |||
98 | break; | 96 | break; |
99 | } | 97 | } |
100 | 98 | ||
101 | spin_lock_irqsave(&buffers->rb_lock, flags); | 99 | rpcrdma_recv_buffer_put(rep); |
102 | list_add(&rep->rr_list, &buffers->rb_recv_bufs); | ||
103 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | ||
104 | } | 100 | } |
105 | 101 | ||
106 | return rc; | 102 | return rc; |
@@ -140,6 +136,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) | |||
140 | __func__); | 136 | __func__); |
141 | goto out_free; | 137 | goto out_free; |
142 | } | 138 | } |
139 | dprintk("RPC: %s: new rqst %p\n", __func__, rqst); | ||
143 | 140 | ||
144 | rqst->rq_xprt = &r_xprt->rx_xprt; | 141 | rqst->rq_xprt = &r_xprt->rx_xprt; |
145 | INIT_LIST_HEAD(&rqst->rq_list); | 142 | INIT_LIST_HEAD(&rqst->rq_list); |
@@ -220,12 +217,14 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) | |||
220 | 217 | ||
221 | rpclen = rqst->rq_svec[0].iov_len; | 218 | rpclen = rqst->rq_svec[0].iov_len; |
222 | 219 | ||
220 | #ifdef RPCRDMA_BACKCHANNEL_DEBUG | ||
223 | pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", | 221 | pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", |
224 | __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); | 222 | __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); |
225 | pr_info("RPC: %s: RPC/RDMA: %*ph\n", | 223 | pr_info("RPC: %s: RPC/RDMA: %*ph\n", |
226 | __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); | 224 | __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); |
227 | pr_info("RPC: %s: RPC: %*ph\n", | 225 | pr_info("RPC: %s: RPC: %*ph\n", |
228 | __func__, (int)rpclen, rqst->rq_svec[0].iov_base); | 226 | __func__, (int)rpclen, rqst->rq_svec[0].iov_base); |
227 | #endif | ||
229 | 228 | ||
230 | req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); | 229 | req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); |
231 | req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; | 230 | req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; |
@@ -269,6 +268,9 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) | |||
269 | { | 268 | { |
270 | struct rpc_xprt *xprt = rqst->rq_xprt; | 269 | struct rpc_xprt *xprt = rqst->rq_xprt; |
271 | 270 | ||
271 | dprintk("RPC: %s: freeing rqst %p (req %p)\n", | ||
272 | __func__, rqst, rpcr_to_rdmar(rqst)); | ||
273 | |||
272 | smp_mb__before_atomic(); | 274 | smp_mb__before_atomic(); |
273 | WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); | 275 | WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); |
274 | clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); | 276 | clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); |
@@ -333,9 +335,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, | |||
333 | struct rpc_rqst, rq_bc_pa_list); | 335 | struct rpc_rqst, rq_bc_pa_list); |
334 | list_del(&rqst->rq_bc_pa_list); | 336 | list_del(&rqst->rq_bc_pa_list); |
335 | spin_unlock(&xprt->bc_pa_lock); | 337 | spin_unlock(&xprt->bc_pa_lock); |
336 | #ifdef RPCRDMA_BACKCHANNEL_DEBUG | 338 | dprintk("RPC: %s: using rqst %p\n", __func__, rqst); |
337 | pr_info("RPC: %s: using rqst %p\n", __func__, rqst); | ||
338 | #endif | ||
339 | 339 | ||
340 | /* Prepare rqst */ | 340 | /* Prepare rqst */ |
341 | rqst->rq_reply_bytes_recvd = 0; | 341 | rqst->rq_reply_bytes_recvd = 0; |
@@ -355,10 +355,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, | |||
355 | * direction reply. | 355 | * direction reply. |
356 | */ | 356 | */ |
357 | req = rpcr_to_rdmar(rqst); | 357 | req = rpcr_to_rdmar(rqst); |
358 | #ifdef RPCRDMA_BACKCHANNEL_DEBUG | 358 | dprintk("RPC: %s: attaching rep %p to req %p\n", |
359 | pr_info("RPC: %s: attaching rep %p to req %p\n", | ||
360 | __func__, rep, req); | 359 | __func__, rep, req); |
361 | #endif | ||
362 | req->rl_reply = rep; | 360 | req->rl_reply = rep; |
363 | 361 | ||
364 | /* Defeat the retransmit detection logic in send_request */ | 362 | /* Defeat the retransmit detection logic in send_request */ |
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index f1e8dafbd507..c14f3a4bff68 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -179,6 +179,69 @@ out_maperr: | |||
179 | return rc; | 179 | return rc; |
180 | } | 180 | } |
181 | 181 | ||
182 | static void | ||
183 | __fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
184 | { | ||
185 | struct ib_device *device = r_xprt->rx_ia.ri_device; | ||
186 | struct rpcrdma_mw *mw = seg->rl_mw; | ||
187 | int nsegs = seg->mr_nsegs; | ||
188 | |||
189 | seg->rl_mw = NULL; | ||
190 | |||
191 | while (nsegs--) | ||
192 | rpcrdma_unmap_one(device, seg++); | ||
193 | |||
194 | rpcrdma_put_mw(r_xprt, mw); | ||
195 | } | ||
196 | |||
197 | /* Invalidate all memory regions that were registered for "req". | ||
198 | * | ||
199 | * Sleeps until it is safe for the host CPU to access the | ||
200 | * previously mapped memory regions. | ||
201 | */ | ||
202 | static void | ||
203 | fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | ||
204 | { | ||
205 | struct rpcrdma_mr_seg *seg; | ||
206 | unsigned int i, nchunks; | ||
207 | struct rpcrdma_mw *mw; | ||
208 | LIST_HEAD(unmap_list); | ||
209 | int rc; | ||
210 | |||
211 | dprintk("RPC: %s: req %p\n", __func__, req); | ||
212 | |||
213 | /* ORDER: Invalidate all of the req's MRs first | ||
214 | * | ||
215 | * ib_unmap_fmr() is slow, so use a single call instead | ||
216 | * of one call per mapped MR. | ||
217 | */ | ||
218 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | ||
219 | seg = &req->rl_segments[i]; | ||
220 | mw = seg->rl_mw; | ||
221 | |||
222 | list_add(&mw->r.fmr.fmr->list, &unmap_list); | ||
223 | |||
224 | i += seg->mr_nsegs; | ||
225 | } | ||
226 | rc = ib_unmap_fmr(&unmap_list); | ||
227 | if (rc) | ||
228 | pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc); | ||
229 | |||
230 | /* ORDER: Now DMA unmap all of the req's MRs, and return | ||
231 | * them to the free MW list. | ||
232 | */ | ||
233 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | ||
234 | seg = &req->rl_segments[i]; | ||
235 | |||
236 | __fmr_dma_unmap(r_xprt, seg); | ||
237 | |||
238 | i += seg->mr_nsegs; | ||
239 | seg->mr_nsegs = 0; | ||
240 | } | ||
241 | |||
242 | req->rl_nchunks = 0; | ||
243 | } | ||
244 | |||
182 | /* Use the ib_unmap_fmr() verb to prevent further remote | 245 | /* Use the ib_unmap_fmr() verb to prevent further remote |
183 | * access via RDMA READ or RDMA WRITE. | 246 | * access via RDMA READ or RDMA WRITE. |
184 | */ | 247 | */ |
@@ -231,6 +294,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf) | |||
231 | 294 | ||
232 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { | 295 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { |
233 | .ro_map = fmr_op_map, | 296 | .ro_map = fmr_op_map, |
297 | .ro_unmap_sync = fmr_op_unmap_sync, | ||
234 | .ro_unmap = fmr_op_unmap, | 298 | .ro_unmap = fmr_op_unmap, |
235 | .ro_open = fmr_op_open, | 299 | .ro_open = fmr_op_open, |
236 | .ro_maxpages = fmr_op_maxpages, | 300 | .ro_maxpages = fmr_op_maxpages, |
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 88cf9e7269c2..c6836844bd0e 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -245,12 +245,14 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |||
245 | rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); | 245 | rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); |
246 | } | 246 | } |
247 | 247 | ||
248 | /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */ | 248 | /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs |
249 | * to be reset. | ||
250 | * | ||
251 | * WARNING: Only wr_id and status are reliable at this point | ||
252 | */ | ||
249 | static void | 253 | static void |
250 | frwr_sendcompletion(struct ib_wc *wc) | 254 | __frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_mw *r) |
251 | { | 255 | { |
252 | struct rpcrdma_mw *r; | ||
253 | |||
254 | if (likely(wc->status == IB_WC_SUCCESS)) | 256 | if (likely(wc->status == IB_WC_SUCCESS)) |
255 | return; | 257 | return; |
256 | 258 | ||
@@ -261,9 +263,23 @@ frwr_sendcompletion(struct ib_wc *wc) | |||
261 | else | 263 | else |
262 | pr_warn("RPC: %s: frmr %p error, status %s (%d)\n", | 264 | pr_warn("RPC: %s: frmr %p error, status %s (%d)\n", |
263 | __func__, r, ib_wc_status_msg(wc->status), wc->status); | 265 | __func__, r, ib_wc_status_msg(wc->status), wc->status); |
266 | |||
264 | r->r.frmr.fr_state = FRMR_IS_STALE; | 267 | r->r.frmr.fr_state = FRMR_IS_STALE; |
265 | } | 268 | } |
266 | 269 | ||
270 | static void | ||
271 | frwr_sendcompletion(struct ib_wc *wc) | ||
272 | { | ||
273 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
274 | struct rpcrdma_frmr *f = &r->r.frmr; | ||
275 | |||
276 | if (unlikely(wc->status != IB_WC_SUCCESS)) | ||
277 | __frwr_sendcompletion_flush(wc, r); | ||
278 | |||
279 | if (f->fr_waiter) | ||
280 | complete(&f->fr_linv_done); | ||
281 | } | ||
282 | |||
267 | static int | 283 | static int |
268 | frwr_op_init(struct rpcrdma_xprt *r_xprt) | 284 | frwr_op_init(struct rpcrdma_xprt *r_xprt) |
269 | { | 285 | { |
@@ -319,7 +335,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
319 | struct rpcrdma_mw *mw; | 335 | struct rpcrdma_mw *mw; |
320 | struct rpcrdma_frmr *frmr; | 336 | struct rpcrdma_frmr *frmr; |
321 | struct ib_mr *mr; | 337 | struct ib_mr *mr; |
322 | struct ib_reg_wr reg_wr; | 338 | struct ib_reg_wr *reg_wr; |
323 | struct ib_send_wr *bad_wr; | 339 | struct ib_send_wr *bad_wr; |
324 | int rc, i, n, dma_nents; | 340 | int rc, i, n, dma_nents; |
325 | u8 key; | 341 | u8 key; |
@@ -335,7 +351,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
335 | } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); | 351 | } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); |
336 | frmr = &mw->r.frmr; | 352 | frmr = &mw->r.frmr; |
337 | frmr->fr_state = FRMR_IS_VALID; | 353 | frmr->fr_state = FRMR_IS_VALID; |
354 | frmr->fr_waiter = false; | ||
338 | mr = frmr->fr_mr; | 355 | mr = frmr->fr_mr; |
356 | reg_wr = &frmr->fr_regwr; | ||
339 | 357 | ||
340 | if (nsegs > ia->ri_max_frmr_depth) | 358 | if (nsegs > ia->ri_max_frmr_depth) |
341 | nsegs = ia->ri_max_frmr_depth; | 359 | nsegs = ia->ri_max_frmr_depth; |
@@ -381,19 +399,19 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
381 | key = (u8)(mr->rkey & 0x000000FF); | 399 | key = (u8)(mr->rkey & 0x000000FF); |
382 | ib_update_fast_reg_key(mr, ++key); | 400 | ib_update_fast_reg_key(mr, ++key); |
383 | 401 | ||
384 | reg_wr.wr.next = NULL; | 402 | reg_wr->wr.next = NULL; |
385 | reg_wr.wr.opcode = IB_WR_REG_MR; | 403 | reg_wr->wr.opcode = IB_WR_REG_MR; |
386 | reg_wr.wr.wr_id = (uintptr_t)mw; | 404 | reg_wr->wr.wr_id = (uintptr_t)mw; |
387 | reg_wr.wr.num_sge = 0; | 405 | reg_wr->wr.num_sge = 0; |
388 | reg_wr.wr.send_flags = 0; | 406 | reg_wr->wr.send_flags = 0; |
389 | reg_wr.mr = mr; | 407 | reg_wr->mr = mr; |
390 | reg_wr.key = mr->rkey; | 408 | reg_wr->key = mr->rkey; |
391 | reg_wr.access = writing ? | 409 | reg_wr->access = writing ? |
392 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | 410 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : |
393 | IB_ACCESS_REMOTE_READ; | 411 | IB_ACCESS_REMOTE_READ; |
394 | 412 | ||
395 | DECR_CQCOUNT(&r_xprt->rx_ep); | 413 | DECR_CQCOUNT(&r_xprt->rx_ep); |
396 | rc = ib_post_send(ia->ri_id->qp, ®_wr.wr, &bad_wr); | 414 | rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); |
397 | if (rc) | 415 | if (rc) |
398 | goto out_senderr; | 416 | goto out_senderr; |
399 | 417 | ||
@@ -413,6 +431,116 @@ out_senderr: | |||
413 | return rc; | 431 | return rc; |
414 | } | 432 | } |
415 | 433 | ||
434 | static struct ib_send_wr * | ||
435 | __frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg) | ||
436 | { | ||
437 | struct rpcrdma_mw *mw = seg->rl_mw; | ||
438 | struct rpcrdma_frmr *f = &mw->r.frmr; | ||
439 | struct ib_send_wr *invalidate_wr; | ||
440 | |||
441 | f->fr_waiter = false; | ||
442 | f->fr_state = FRMR_IS_INVALID; | ||
443 | invalidate_wr = &f->fr_invwr; | ||
444 | |||
445 | memset(invalidate_wr, 0, sizeof(*invalidate_wr)); | ||
446 | invalidate_wr->wr_id = (unsigned long)(void *)mw; | ||
447 | invalidate_wr->opcode = IB_WR_LOCAL_INV; | ||
448 | invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey; | ||
449 | |||
450 | return invalidate_wr; | ||
451 | } | ||
452 | |||
453 | static void | ||
454 | __frwr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
455 | int rc) | ||
456 | { | ||
457 | struct ib_device *device = r_xprt->rx_ia.ri_device; | ||
458 | struct rpcrdma_mw *mw = seg->rl_mw; | ||
459 | struct rpcrdma_frmr *f = &mw->r.frmr; | ||
460 | |||
461 | seg->rl_mw = NULL; | ||
462 | |||
463 | ib_dma_unmap_sg(device, f->sg, f->sg_nents, seg->mr_dir); | ||
464 | |||
465 | if (!rc) | ||
466 | rpcrdma_put_mw(r_xprt, mw); | ||
467 | else | ||
468 | __frwr_queue_recovery(mw); | ||
469 | } | ||
470 | |||
471 | /* Invalidate all memory regions that were registered for "req". | ||
472 | * | ||
473 | * Sleeps until it is safe for the host CPU to access the | ||
474 | * previously mapped memory regions. | ||
475 | */ | ||
476 | static void | ||
477 | frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | ||
478 | { | ||
479 | struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr; | ||
480 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
481 | struct rpcrdma_mr_seg *seg; | ||
482 | unsigned int i, nchunks; | ||
483 | struct rpcrdma_frmr *f; | ||
484 | int rc; | ||
485 | |||
486 | dprintk("RPC: %s: req %p\n", __func__, req); | ||
487 | |||
488 | /* ORDER: Invalidate all of the req's MRs first | ||
489 | * | ||
490 | * Chain the LOCAL_INV Work Requests and post them with | ||
491 | * a single ib_post_send() call. | ||
492 | */ | ||
493 | invalidate_wrs = pos = prev = NULL; | ||
494 | seg = NULL; | ||
495 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | ||
496 | seg = &req->rl_segments[i]; | ||
497 | |||
498 | pos = __frwr_prepare_linv_wr(seg); | ||
499 | |||
500 | if (!invalidate_wrs) | ||
501 | invalidate_wrs = pos; | ||
502 | else | ||
503 | prev->next = pos; | ||
504 | prev = pos; | ||
505 | |||
506 | i += seg->mr_nsegs; | ||
507 | } | ||
508 | f = &seg->rl_mw->r.frmr; | ||
509 | |||
510 | /* Strong send queue ordering guarantees that when the | ||
511 | * last WR in the chain completes, all WRs in the chain | ||
512 | * are complete. | ||
513 | */ | ||
514 | f->fr_invwr.send_flags = IB_SEND_SIGNALED; | ||
515 | f->fr_waiter = true; | ||
516 | init_completion(&f->fr_linv_done); | ||
517 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
518 | |||
519 | /* Transport disconnect drains the receive CQ before it | ||
520 | * replaces the QP. The RPC reply handler won't call us | ||
521 | * unless ri_id->qp is a valid pointer. | ||
522 | */ | ||
523 | rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); | ||
524 | if (rc) | ||
525 | pr_warn("%s: ib_post_send failed %i\n", __func__, rc); | ||
526 | |||
527 | wait_for_completion(&f->fr_linv_done); | ||
528 | |||
529 | /* ORDER: Now DMA unmap all of the req's MRs, and return | ||
530 | * them to the free MW list. | ||
531 | */ | ||
532 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | ||
533 | seg = &req->rl_segments[i]; | ||
534 | |||
535 | __frwr_dma_unmap(r_xprt, seg, rc); | ||
536 | |||
537 | i += seg->mr_nsegs; | ||
538 | seg->mr_nsegs = 0; | ||
539 | } | ||
540 | |||
541 | req->rl_nchunks = 0; | ||
542 | } | ||
543 | |||
416 | /* Post a LOCAL_INV Work Request to prevent further remote access | 544 | /* Post a LOCAL_INV Work Request to prevent further remote access |
417 | * via RDMA READ or RDMA WRITE. | 545 | * via RDMA READ or RDMA WRITE. |
418 | */ | 546 | */ |
@@ -423,23 +551,24 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | |||
423 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 551 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
424 | struct rpcrdma_mw *mw = seg1->rl_mw; | 552 | struct rpcrdma_mw *mw = seg1->rl_mw; |
425 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | 553 | struct rpcrdma_frmr *frmr = &mw->r.frmr; |
426 | struct ib_send_wr invalidate_wr, *bad_wr; | 554 | struct ib_send_wr *invalidate_wr, *bad_wr; |
427 | int rc, nsegs = seg->mr_nsegs; | 555 | int rc, nsegs = seg->mr_nsegs; |
428 | 556 | ||
429 | dprintk("RPC: %s: FRMR %p\n", __func__, mw); | 557 | dprintk("RPC: %s: FRMR %p\n", __func__, mw); |
430 | 558 | ||
431 | seg1->rl_mw = NULL; | 559 | seg1->rl_mw = NULL; |
432 | frmr->fr_state = FRMR_IS_INVALID; | 560 | frmr->fr_state = FRMR_IS_INVALID; |
561 | invalidate_wr = &mw->r.frmr.fr_invwr; | ||
433 | 562 | ||
434 | memset(&invalidate_wr, 0, sizeof(invalidate_wr)); | 563 | memset(invalidate_wr, 0, sizeof(*invalidate_wr)); |
435 | invalidate_wr.wr_id = (unsigned long)(void *)mw; | 564 | invalidate_wr->wr_id = (uintptr_t)mw; |
436 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | 565 | invalidate_wr->opcode = IB_WR_LOCAL_INV; |
437 | invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey; | 566 | invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey; |
438 | DECR_CQCOUNT(&r_xprt->rx_ep); | 567 | DECR_CQCOUNT(&r_xprt->rx_ep); |
439 | 568 | ||
440 | ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); | 569 | ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); |
441 | read_lock(&ia->ri_qplock); | 570 | read_lock(&ia->ri_qplock); |
442 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | 571 | rc = ib_post_send(ia->ri_id->qp, invalidate_wr, &bad_wr); |
443 | read_unlock(&ia->ri_qplock); | 572 | read_unlock(&ia->ri_qplock); |
444 | if (rc) | 573 | if (rc) |
445 | goto out_err; | 574 | goto out_err; |
@@ -471,6 +600,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf) | |||
471 | 600 | ||
472 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | 601 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { |
473 | .ro_map = frwr_op_map, | 602 | .ro_map = frwr_op_map, |
603 | .ro_unmap_sync = frwr_op_unmap_sync, | ||
474 | .ro_unmap = frwr_op_unmap, | 604 | .ro_unmap = frwr_op_unmap, |
475 | .ro_open = frwr_op_open, | 605 | .ro_open = frwr_op_open, |
476 | .ro_maxpages = frwr_op_maxpages, | 606 | .ro_maxpages = frwr_op_maxpages, |
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c index 617b76f22154..dbb302ecf590 100644 --- a/net/sunrpc/xprtrdma/physical_ops.c +++ b/net/sunrpc/xprtrdma/physical_ops.c | |||
@@ -83,6 +83,18 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | |||
83 | return 1; | 83 | return 1; |
84 | } | 84 | } |
85 | 85 | ||
86 | /* DMA unmap all memory regions that were mapped for "req". | ||
87 | */ | ||
88 | static void | ||
89 | physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | ||
90 | { | ||
91 | struct ib_device *device = r_xprt->rx_ia.ri_device; | ||
92 | unsigned int i; | ||
93 | |||
94 | for (i = 0; req->rl_nchunks; --req->rl_nchunks) | ||
95 | rpcrdma_unmap_one(device, &req->rl_segments[i++]); | ||
96 | } | ||
97 | |||
86 | static void | 98 | static void |
87 | physical_op_destroy(struct rpcrdma_buffer *buf) | 99 | physical_op_destroy(struct rpcrdma_buffer *buf) |
88 | { | 100 | { |
@@ -90,6 +102,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf) | |||
90 | 102 | ||
91 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { | 103 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { |
92 | .ro_map = physical_op_map, | 104 | .ro_map = physical_op_map, |
105 | .ro_unmap_sync = physical_op_unmap_sync, | ||
93 | .ro_unmap = physical_op_unmap, | 106 | .ro_unmap = physical_op_unmap, |
94 | .ro_open = physical_op_open, | 107 | .ro_open = physical_op_open, |
95 | .ro_maxpages = physical_op_maxpages, | 108 | .ro_maxpages = physical_op_maxpages, |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index c10d9699441c..0f28f2d743ed 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -804,6 +804,11 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
804 | if (req->rl_reply) | 804 | if (req->rl_reply) |
805 | goto out_duplicate; | 805 | goto out_duplicate; |
806 | 806 | ||
807 | /* Sanity checking has passed. We are now committed | ||
808 | * to complete this transaction. | ||
809 | */ | ||
810 | list_del_init(&rqst->rq_list); | ||
811 | spin_unlock_bh(&xprt->transport_lock); | ||
807 | dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" | 812 | dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" |
808 | " RPC request 0x%p xid 0x%08x\n", | 813 | " RPC request 0x%p xid 0x%08x\n", |
809 | __func__, rep, req, rqst, | 814 | __func__, rep, req, rqst, |
@@ -888,12 +893,23 @@ badheader: | |||
888 | break; | 893 | break; |
889 | } | 894 | } |
890 | 895 | ||
896 | /* Invalidate and flush the data payloads before waking the | ||
897 | * waiting application. This guarantees the memory region is | ||
898 | * properly fenced from the server before the application | ||
899 | * accesses the data. It also ensures proper send flow | ||
900 | * control: waking the next RPC waits until this RPC has | ||
901 | * relinquished all its Send Queue entries. | ||
902 | */ | ||
903 | if (req->rl_nchunks) | ||
904 | r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req); | ||
905 | |||
891 | credits = be32_to_cpu(headerp->rm_credit); | 906 | credits = be32_to_cpu(headerp->rm_credit); |
892 | if (credits == 0) | 907 | if (credits == 0) |
893 | credits = 1; /* don't deadlock */ | 908 | credits = 1; /* don't deadlock */ |
894 | else if (credits > r_xprt->rx_buf.rb_max_requests) | 909 | else if (credits > r_xprt->rx_buf.rb_max_requests) |
895 | credits = r_xprt->rx_buf.rb_max_requests; | 910 | credits = r_xprt->rx_buf.rb_max_requests; |
896 | 911 | ||
912 | spin_lock_bh(&xprt->transport_lock); | ||
897 | cwnd = xprt->cwnd; | 913 | cwnd = xprt->cwnd; |
898 | xprt->cwnd = credits << RPC_CWNDSHIFT; | 914 | xprt->cwnd = credits << RPC_CWNDSHIFT; |
899 | if (xprt->cwnd > cwnd) | 915 | if (xprt->cwnd > cwnd) |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 8c545f7d7525..740bddcf3488 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -576,6 +576,9 @@ xprt_rdma_free(void *buffer) | |||
576 | 576 | ||
577 | rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); | 577 | rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); |
578 | req = rb->rg_owner; | 578 | req = rb->rg_owner; |
579 | if (req->rl_backchannel) | ||
580 | return; | ||
581 | |||
579 | r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); | 582 | r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); |
580 | 583 | ||
581 | dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); | 584 | dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index eadd1655145a..732c71ce5dca 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -616,10 +616,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
616 | 616 | ||
617 | /* set trigger for requesting send completion */ | 617 | /* set trigger for requesting send completion */ |
618 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; | 618 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; |
619 | if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS) | 619 | if (ep->rep_cqinit <= 2) |
620 | ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS; | 620 | ep->rep_cqinit = 0; /* always signal? */ |
621 | else if (ep->rep_cqinit <= 2) | ||
622 | ep->rep_cqinit = 0; | ||
623 | INIT_CQCOUNT(ep); | 621 | INIT_CQCOUNT(ep); |
624 | init_waitqueue_head(&ep->rep_connect_wait); | 622 | init_waitqueue_head(&ep->rep_connect_wait); |
625 | INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); | 623 | INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); |
@@ -852,10 +850,11 @@ retry: | |||
852 | 850 | ||
853 | if (extras) { | 851 | if (extras) { |
854 | rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); | 852 | rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); |
855 | if (rc) | 853 | if (rc) { |
856 | pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", | 854 | pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", |
857 | __func__, rc); | 855 | __func__, rc); |
858 | rc = 0; | 856 | rc = 0; |
857 | } | ||
859 | } | 858 | } |
860 | } | 859 | } |
861 | 860 | ||
@@ -1337,15 +1336,14 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) | |||
1337 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1336 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
1338 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; | 1337 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; |
1339 | struct rpcrdma_rep *rep; | 1338 | struct rpcrdma_rep *rep; |
1340 | unsigned long flags; | ||
1341 | int rc; | 1339 | int rc; |
1342 | 1340 | ||
1343 | while (count--) { | 1341 | while (count--) { |
1344 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1342 | spin_lock(&buffers->rb_lock); |
1345 | if (list_empty(&buffers->rb_recv_bufs)) | 1343 | if (list_empty(&buffers->rb_recv_bufs)) |
1346 | goto out_reqbuf; | 1344 | goto out_reqbuf; |
1347 | rep = rpcrdma_buffer_get_rep_locked(buffers); | 1345 | rep = rpcrdma_buffer_get_rep_locked(buffers); |
1348 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | 1346 | spin_unlock(&buffers->rb_lock); |
1349 | 1347 | ||
1350 | rc = rpcrdma_ep_post_recv(ia, ep, rep); | 1348 | rc = rpcrdma_ep_post_recv(ia, ep, rep); |
1351 | if (rc) | 1349 | if (rc) |
@@ -1355,7 +1353,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) | |||
1355 | return 0; | 1353 | return 0; |
1356 | 1354 | ||
1357 | out_reqbuf: | 1355 | out_reqbuf: |
1358 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | 1356 | spin_unlock(&buffers->rb_lock); |
1359 | pr_warn("%s: no extra receive buffers\n", __func__); | 1357 | pr_warn("%s: no extra receive buffers\n", __func__); |
1360 | return -ENOMEM; | 1358 | return -ENOMEM; |
1361 | 1359 | ||
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index ac7f8d4f632a..728101ddc44b 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -88,12 +88,6 @@ struct rpcrdma_ep { | |||
88 | struct delayed_work rep_connect_worker; | 88 | struct delayed_work rep_connect_worker; |
89 | }; | 89 | }; |
90 | 90 | ||
91 | /* | ||
92 | * Force a signaled SEND Work Request every so often, | ||
93 | * in case the provider needs to do some housekeeping. | ||
94 | */ | ||
95 | #define RPCRDMA_MAX_UNSIGNALED_SENDS (32) | ||
96 | |||
97 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) | 91 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) |
98 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) | 92 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) |
99 | 93 | ||
@@ -207,6 +201,12 @@ struct rpcrdma_frmr { | |||
207 | enum rpcrdma_frmr_state fr_state; | 201 | enum rpcrdma_frmr_state fr_state; |
208 | struct work_struct fr_work; | 202 | struct work_struct fr_work; |
209 | struct rpcrdma_xprt *fr_xprt; | 203 | struct rpcrdma_xprt *fr_xprt; |
204 | bool fr_waiter; | ||
205 | struct completion fr_linv_done;; | ||
206 | union { | ||
207 | struct ib_reg_wr fr_regwr; | ||
208 | struct ib_send_wr fr_invwr; | ||
209 | }; | ||
210 | }; | 210 | }; |
211 | 211 | ||
212 | struct rpcrdma_fmr { | 212 | struct rpcrdma_fmr { |
@@ -364,6 +364,8 @@ struct rpcrdma_xprt; | |||
364 | struct rpcrdma_memreg_ops { | 364 | struct rpcrdma_memreg_ops { |
365 | int (*ro_map)(struct rpcrdma_xprt *, | 365 | int (*ro_map)(struct rpcrdma_xprt *, |
366 | struct rpcrdma_mr_seg *, int, bool); | 366 | struct rpcrdma_mr_seg *, int, bool); |
367 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, | ||
368 | struct rpcrdma_req *); | ||
367 | int (*ro_unmap)(struct rpcrdma_xprt *, | 369 | int (*ro_unmap)(struct rpcrdma_xprt *, |
368 | struct rpcrdma_mr_seg *); | 370 | struct rpcrdma_mr_seg *); |
369 | int (*ro_open)(struct rpcrdma_ia *, | 371 | int (*ro_open)(struct rpcrdma_ia *, |