diff options
| -rw-r--r-- | include/linux/sunrpc/xprt.h | 6 | ||||
| -rw-r--r-- | net/sunrpc/xprt.c | 28 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 119 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 90 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 753 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 17 |
6 files changed, 411 insertions, 602 deletions
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3e5efb2b236e..5903d2c0ab4d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
| @@ -24,6 +24,12 @@ | |||
| 24 | #define RPC_MAX_SLOT_TABLE_LIMIT (65536U) | 24 | #define RPC_MAX_SLOT_TABLE_LIMIT (65536U) |
| 25 | #define RPC_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE_LIMIT | 25 | #define RPC_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE_LIMIT |
| 26 | 26 | ||
| 27 | #define RPC_CWNDSHIFT (8U) | ||
| 28 | #define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) | ||
| 29 | #define RPC_INITCWND RPC_CWNDSCALE | ||
| 30 | #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) | ||
| 31 | #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) | ||
| 32 | |||
| 27 | /* | 33 | /* |
| 28 | * This describes a timeout strategy | 34 | * This describes a timeout strategy |
| 29 | */ | 35 | */ |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d173f79947c6..2d1d5a643b95 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
| @@ -71,24 +71,6 @@ static void xprt_destroy(struct rpc_xprt *xprt); | |||
| 71 | static DEFINE_SPINLOCK(xprt_list_lock); | 71 | static DEFINE_SPINLOCK(xprt_list_lock); |
| 72 | static LIST_HEAD(xprt_list); | 72 | static LIST_HEAD(xprt_list); |
| 73 | 73 | ||
| 74 | /* | ||
| 75 | * The transport code maintains an estimate on the maximum number of out- | ||
| 76 | * standing RPC requests, using a smoothed version of the congestion | ||
| 77 | * avoidance implemented in 44BSD. This is basically the Van Jacobson | ||
| 78 | * congestion algorithm: If a retransmit occurs, the congestion window is | ||
| 79 | * halved; otherwise, it is incremented by 1/cwnd when | ||
| 80 | * | ||
| 81 | * - a reply is received and | ||
| 82 | * - a full number of requests are outstanding and | ||
| 83 | * - the congestion window hasn't been updated recently. | ||
| 84 | */ | ||
| 85 | #define RPC_CWNDSHIFT (8U) | ||
| 86 | #define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) | ||
| 87 | #define RPC_INITCWND RPC_CWNDSCALE | ||
| 88 | #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) | ||
| 89 | |||
| 90 | #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) | ||
| 91 | |||
| 92 | /** | 74 | /** |
| 93 | * xprt_register_transport - register a transport implementation | 75 | * xprt_register_transport - register a transport implementation |
| 94 | * @transport: transport to register | 76 | * @transport: transport to register |
| @@ -446,7 +428,15 @@ EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); | |||
| 446 | * @task: recently completed RPC request used to adjust window | 428 | * @task: recently completed RPC request used to adjust window |
| 447 | * @result: result code of completed RPC request | 429 | * @result: result code of completed RPC request |
| 448 | * | 430 | * |
| 449 | * We use a time-smoothed congestion estimator to avoid heavy oscillation. | 431 | * The transport code maintains an estimate on the maximum number of out- |
| 432 | * standing RPC requests, using a smoothed version of the congestion | ||
| 433 | * avoidance implemented in 44BSD. This is basically the Van Jacobson | ||
| 434 | * congestion algorithm: If a retransmit occurs, the congestion window is | ||
| 435 | * halved; otherwise, it is incremented by 1/cwnd when | ||
| 436 | * | ||
| 437 | * - a reply is received and | ||
| 438 | * - a full number of requests are outstanding and | ||
| 439 | * - the congestion window hasn't been updated recently. | ||
| 450 | */ | 440 | */ |
| 451 | void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result) | 441 | void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result) |
| 452 | { | 442 | { |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 96ead526b125..693966d3f33b 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
| @@ -78,8 +78,7 @@ static const char transfertypes[][12] = { | |||
| 78 | * elements. Segments are then coalesced when registered, if possible | 78 | * elements. Segments are then coalesced when registered, if possible |
| 79 | * within the selected memreg mode. | 79 | * within the selected memreg mode. |
| 80 | * | 80 | * |
| 81 | * Note, this routine is never called if the connection's memory | 81 | * Returns positive number of segments converted, or a negative errno. |
| 82 | * registration strategy is 0 (bounce buffers). | ||
| 83 | */ | 82 | */ |
| 84 | 83 | ||
| 85 | static int | 84 | static int |
| @@ -102,10 +101,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
| 102 | page_base = xdrbuf->page_base & ~PAGE_MASK; | 101 | page_base = xdrbuf->page_base & ~PAGE_MASK; |
| 103 | p = 0; | 102 | p = 0; |
| 104 | while (len && n < nsegs) { | 103 | while (len && n < nsegs) { |
| 104 | if (!ppages[p]) { | ||
| 105 | /* alloc the pagelist for receiving buffer */ | ||
| 106 | ppages[p] = alloc_page(GFP_ATOMIC); | ||
| 107 | if (!ppages[p]) | ||
| 108 | return -ENOMEM; | ||
| 109 | } | ||
| 105 | seg[n].mr_page = ppages[p]; | 110 | seg[n].mr_page = ppages[p]; |
| 106 | seg[n].mr_offset = (void *)(unsigned long) page_base; | 111 | seg[n].mr_offset = (void *)(unsigned long) page_base; |
| 107 | seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len); | 112 | seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len); |
| 108 | BUG_ON(seg[n].mr_len > PAGE_SIZE); | 113 | if (seg[n].mr_len > PAGE_SIZE) |
| 114 | return -EIO; | ||
| 109 | len -= seg[n].mr_len; | 115 | len -= seg[n].mr_len; |
| 110 | ++n; | 116 | ++n; |
| 111 | ++p; | 117 | ++p; |
| @@ -114,7 +120,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
| 114 | 120 | ||
| 115 | /* Message overflows the seg array */ | 121 | /* Message overflows the seg array */ |
| 116 | if (len && n == nsegs) | 122 | if (len && n == nsegs) |
| 117 | return 0; | 123 | return -EIO; |
| 118 | 124 | ||
| 119 | if (xdrbuf->tail[0].iov_len) { | 125 | if (xdrbuf->tail[0].iov_len) { |
| 120 | /* the rpcrdma protocol allows us to omit any trailing | 126 | /* the rpcrdma protocol allows us to omit any trailing |
| @@ -123,7 +129,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
| 123 | return n; | 129 | return n; |
| 124 | if (n == nsegs) | 130 | if (n == nsegs) |
| 125 | /* Tail remains, but we're out of segments */ | 131 | /* Tail remains, but we're out of segments */ |
| 126 | return 0; | 132 | return -EIO; |
| 127 | seg[n].mr_page = NULL; | 133 | seg[n].mr_page = NULL; |
| 128 | seg[n].mr_offset = xdrbuf->tail[0].iov_base; | 134 | seg[n].mr_offset = xdrbuf->tail[0].iov_base; |
| 129 | seg[n].mr_len = xdrbuf->tail[0].iov_len; | 135 | seg[n].mr_len = xdrbuf->tail[0].iov_len; |
| @@ -164,15 +170,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
| 164 | * Reply chunk (a counted array): | 170 | * Reply chunk (a counted array): |
| 165 | * N elements: | 171 | * N elements: |
| 166 | * 1 - N - HLOO - HLOO - ... - HLOO | 172 | * 1 - N - HLOO - HLOO - ... - HLOO |
| 173 | * | ||
| 174 | * Returns positive RPC/RDMA header size, or negative errno. | ||
| 167 | */ | 175 | */ |
| 168 | 176 | ||
| 169 | static unsigned int | 177 | static ssize_t |
| 170 | rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | 178 | rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, |
| 171 | struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) | 179 | struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) |
| 172 | { | 180 | { |
| 173 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 181 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
| 174 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | 182 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); |
| 175 | int nsegs, nchunks = 0; | 183 | int n, nsegs, nchunks = 0; |
| 176 | unsigned int pos; | 184 | unsigned int pos; |
| 177 | struct rpcrdma_mr_seg *seg = req->rl_segments; | 185 | struct rpcrdma_mr_seg *seg = req->rl_segments; |
| 178 | struct rpcrdma_read_chunk *cur_rchunk = NULL; | 186 | struct rpcrdma_read_chunk *cur_rchunk = NULL; |
| @@ -198,12 +206,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
| 198 | pos = target->head[0].iov_len; | 206 | pos = target->head[0].iov_len; |
| 199 | 207 | ||
| 200 | nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS); | 208 | nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS); |
| 201 | if (nsegs == 0) | 209 | if (nsegs < 0) |
| 202 | return 0; | 210 | return nsegs; |
| 203 | 211 | ||
| 204 | do { | 212 | do { |
| 205 | /* bind/register the memory, then build chunk from result. */ | 213 | n = rpcrdma_register_external(seg, nsegs, |
| 206 | int n = rpcrdma_register_external(seg, nsegs, | ||
| 207 | cur_wchunk != NULL, r_xprt); | 214 | cur_wchunk != NULL, r_xprt); |
| 208 | if (n <= 0) | 215 | if (n <= 0) |
| 209 | goto out; | 216 | goto out; |
| @@ -248,10 +255,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
| 248 | /* success. all failures return above */ | 255 | /* success. all failures return above */ |
| 249 | req->rl_nchunks = nchunks; | 256 | req->rl_nchunks = nchunks; |
| 250 | 257 | ||
| 251 | BUG_ON(nchunks == 0); | ||
| 252 | BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) | ||
| 253 | && (nchunks > 3)); | ||
| 254 | |||
| 255 | /* | 258 | /* |
| 256 | * finish off header. If write, marshal discrim and nchunks. | 259 | * finish off header. If write, marshal discrim and nchunks. |
| 257 | */ | 260 | */ |
| @@ -278,8 +281,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
| 278 | out: | 281 | out: |
| 279 | for (pos = 0; nchunks--;) | 282 | for (pos = 0; nchunks--;) |
| 280 | pos += rpcrdma_deregister_external( | 283 | pos += rpcrdma_deregister_external( |
| 281 | &req->rl_segments[pos], r_xprt, NULL); | 284 | &req->rl_segments[pos], r_xprt); |
| 282 | return 0; | 285 | return n; |
| 283 | } | 286 | } |
| 284 | 287 | ||
| 285 | /* | 288 | /* |
| @@ -361,6 +364,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) | |||
| 361 | * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol. | 364 | * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol. |
| 362 | * [2] -- optional padding. | 365 | * [2] -- optional padding. |
| 363 | * [3] -- if padded, header only in [1] and data here. | 366 | * [3] -- if padded, header only in [1] and data here. |
| 367 | * | ||
| 368 | * Returns zero on success, otherwise a negative errno. | ||
| 364 | */ | 369 | */ |
| 365 | 370 | ||
| 366 | int | 371 | int |
| @@ -370,7 +375,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 370 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 375 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 371 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 376 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
| 372 | char *base; | 377 | char *base; |
| 373 | size_t hdrlen, rpclen, padlen; | 378 | size_t rpclen, padlen; |
| 379 | ssize_t hdrlen; | ||
| 374 | enum rpcrdma_chunktype rtype, wtype; | 380 | enum rpcrdma_chunktype rtype, wtype; |
| 375 | struct rpcrdma_msg *headerp; | 381 | struct rpcrdma_msg *headerp; |
| 376 | 382 | ||
| @@ -441,14 +447,10 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 441 | /* The following simplification is not true forever */ | 447 | /* The following simplification is not true forever */ |
| 442 | if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) | 448 | if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) |
| 443 | wtype = rpcrdma_noch; | 449 | wtype = rpcrdma_noch; |
| 444 | BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch); | 450 | if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { |
| 445 | 451 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", | |
| 446 | if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS && | 452 | __func__); |
| 447 | (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) { | 453 | return -EIO; |
| 448 | /* forced to "pure inline"? */ | ||
| 449 | dprintk("RPC: %s: too much data (%d/%d) for inline\n", | ||
| 450 | __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len); | ||
| 451 | return -1; | ||
| 452 | } | 454 | } |
| 453 | 455 | ||
| 454 | hdrlen = 28; /*sizeof *headerp;*/ | 456 | hdrlen = 28; /*sizeof *headerp;*/ |
| @@ -474,8 +476,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 474 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; | 476 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; |
| 475 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; | 477 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; |
| 476 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ | 478 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ |
| 477 | BUG_ON(wtype != rpcrdma_noch); | 479 | if (wtype != rpcrdma_noch) { |
| 478 | 480 | dprintk("RPC: %s: invalid chunk list\n", | |
| 481 | __func__); | ||
| 482 | return -EIO; | ||
| 483 | } | ||
| 479 | } else { | 484 | } else { |
| 480 | headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero; | 485 | headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero; |
| 481 | headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero; | 486 | headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero; |
| @@ -492,8 +497,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 492 | * on receive. Therefore, we request a reply chunk | 497 | * on receive. Therefore, we request a reply chunk |
| 493 | * for non-writes wherever feasible and efficient. | 498 | * for non-writes wherever feasible and efficient. |
| 494 | */ | 499 | */ |
| 495 | if (wtype == rpcrdma_noch && | 500 | if (wtype == rpcrdma_noch) |
| 496 | r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER) | ||
| 497 | wtype = rpcrdma_replych; | 501 | wtype = rpcrdma_replych; |
| 498 | } | 502 | } |
| 499 | } | 503 | } |
| @@ -511,9 +515,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 511 | hdrlen = rpcrdma_create_chunks(rqst, | 515 | hdrlen = rpcrdma_create_chunks(rqst, |
| 512 | &rqst->rq_rcv_buf, headerp, wtype); | 516 | &rqst->rq_rcv_buf, headerp, wtype); |
| 513 | } | 517 | } |
| 514 | 518 | if (hdrlen < 0) | |
| 515 | if (hdrlen == 0) | 519 | return hdrlen; |
| 516 | return -1; | ||
| 517 | 520 | ||
| 518 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" | 521 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" |
| 519 | " headerp 0x%p base 0x%p lkey 0x%x\n", | 522 | " headerp 0x%p base 0x%p lkey 0x%x\n", |
| @@ -680,15 +683,11 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) | |||
| 680 | rqst->rq_private_buf = rqst->rq_rcv_buf; | 683 | rqst->rq_private_buf = rqst->rq_rcv_buf; |
| 681 | } | 684 | } |
| 682 | 685 | ||
| 683 | /* | ||
| 684 | * This function is called when an async event is posted to | ||
| 685 | * the connection which changes the connection state. All it | ||
| 686 | * does at this point is mark the connection up/down, the rpc | ||
| 687 | * timers do the rest. | ||
| 688 | */ | ||
| 689 | void | 686 | void |
| 690 | rpcrdma_conn_func(struct rpcrdma_ep *ep) | 687 | rpcrdma_connect_worker(struct work_struct *work) |
| 691 | { | 688 | { |
| 689 | struct rpcrdma_ep *ep = | ||
| 690 | container_of(work, struct rpcrdma_ep, rep_connect_worker.work); | ||
| 692 | struct rpc_xprt *xprt = ep->rep_xprt; | 691 | struct rpc_xprt *xprt = ep->rep_xprt; |
| 693 | 692 | ||
| 694 | spin_lock_bh(&xprt->transport_lock); | 693 | spin_lock_bh(&xprt->transport_lock); |
| @@ -705,13 +704,15 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) | |||
| 705 | } | 704 | } |
| 706 | 705 | ||
| 707 | /* | 706 | /* |
| 708 | * This function is called when memory window unbind which we are waiting | 707 | * This function is called when an async event is posted to |
| 709 | * for completes. Just use rr_func (zeroed by upcall) to signal completion. | 708 | * the connection which changes the connection state. All it |
| 709 | * does at this point is mark the connection up/down, the rpc | ||
| 710 | * timers do the rest. | ||
| 710 | */ | 711 | */ |
| 711 | static void | 712 | void |
| 712 | rpcrdma_unbind_func(struct rpcrdma_rep *rep) | 713 | rpcrdma_conn_func(struct rpcrdma_ep *ep) |
| 713 | { | 714 | { |
| 714 | wake_up(&rep->rr_unbind); | 715 | schedule_delayed_work(&ep->rep_connect_worker, 0); |
| 715 | } | 716 | } |
| 716 | 717 | ||
| 717 | /* | 718 | /* |
| @@ -728,7 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
| 728 | struct rpc_xprt *xprt = rep->rr_xprt; | 729 | struct rpc_xprt *xprt = rep->rr_xprt; |
| 729 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 730 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 730 | __be32 *iptr; | 731 | __be32 *iptr; |
| 731 | int i, rdmalen, status; | 732 | int rdmalen, status; |
| 733 | unsigned long cwnd; | ||
| 732 | 734 | ||
| 733 | /* Check status. If bad, signal disconnect and return rep to pool */ | 735 | /* Check status. If bad, signal disconnect and return rep to pool */ |
| 734 | if (rep->rr_len == ~0U) { | 736 | if (rep->rr_len == ~0U) { |
| @@ -783,6 +785,7 @@ repost: | |||
| 783 | 785 | ||
| 784 | /* from here on, the reply is no longer an orphan */ | 786 | /* from here on, the reply is no longer an orphan */ |
| 785 | req->rl_reply = rep; | 787 | req->rl_reply = rep; |
| 788 | xprt->reestablish_timeout = 0; | ||
| 786 | 789 | ||
| 787 | /* check for expected message types */ | 790 | /* check for expected message types */ |
| 788 | /* The order of some of these tests is important. */ | 791 | /* The order of some of these tests is important. */ |
| @@ -857,26 +860,10 @@ badheader: | |||
| 857 | break; | 860 | break; |
| 858 | } | 861 | } |
| 859 | 862 | ||
| 860 | /* If using mw bind, start the deregister process now. */ | 863 | cwnd = xprt->cwnd; |
| 861 | /* (Note: if mr_free(), cannot perform it here, in tasklet context) */ | 864 | xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT; |
| 862 | if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) { | 865 | if (xprt->cwnd > cwnd) |
| 863 | case RPCRDMA_MEMWINDOWS: | 866 | xprt_release_rqst_cong(rqst->rq_task); |
| 864 | for (i = 0; req->rl_nchunks-- > 1;) | ||
| 865 | i += rpcrdma_deregister_external( | ||
| 866 | &req->rl_segments[i], r_xprt, NULL); | ||
| 867 | /* Optionally wait (not here) for unbinds to complete */ | ||
| 868 | rep->rr_func = rpcrdma_unbind_func; | ||
| 869 | (void) rpcrdma_deregister_external(&req->rl_segments[i], | ||
| 870 | r_xprt, rep); | ||
| 871 | break; | ||
| 872 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 873 | for (i = 0; req->rl_nchunks--;) | ||
| 874 | i += rpcrdma_deregister_external(&req->rl_segments[i], | ||
| 875 | r_xprt, NULL); | ||
| 876 | break; | ||
| 877 | default: | ||
| 878 | break; | ||
| 879 | } | ||
| 880 | 867 | ||
| 881 | dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", | 868 | dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", |
| 882 | __func__, xprt, rqst, status); | 869 | __func__, xprt, rqst, status); |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 1eb9c468d0c9..66f91f0d071a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
| @@ -149,6 +149,11 @@ static struct ctl_table sunrpc_table[] = { | |||
| 149 | 149 | ||
| 150 | #endif | 150 | #endif |
| 151 | 151 | ||
| 152 | #define RPCRDMA_BIND_TO (60U * HZ) | ||
| 153 | #define RPCRDMA_INIT_REEST_TO (5U * HZ) | ||
| 154 | #define RPCRDMA_MAX_REEST_TO (30U * HZ) | ||
| 155 | #define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) | ||
| 156 | |||
| 152 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ | 157 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ |
| 153 | 158 | ||
| 154 | static void | 159 | static void |
| @@ -229,7 +234,6 @@ static void | |||
| 229 | xprt_rdma_destroy(struct rpc_xprt *xprt) | 234 | xprt_rdma_destroy(struct rpc_xprt *xprt) |
| 230 | { | 235 | { |
| 231 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 236 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 232 | int rc; | ||
| 233 | 237 | ||
| 234 | dprintk("RPC: %s: called\n", __func__); | 238 | dprintk("RPC: %s: called\n", __func__); |
| 235 | 239 | ||
| @@ -238,10 +242,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) | |||
| 238 | xprt_clear_connected(xprt); | 242 | xprt_clear_connected(xprt); |
| 239 | 243 | ||
| 240 | rpcrdma_buffer_destroy(&r_xprt->rx_buf); | 244 | rpcrdma_buffer_destroy(&r_xprt->rx_buf); |
| 241 | rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); | 245 | rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); |
| 242 | if (rc) | ||
| 243 | dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n", | ||
| 244 | __func__, rc); | ||
| 245 | rpcrdma_ia_close(&r_xprt->rx_ia); | 246 | rpcrdma_ia_close(&r_xprt->rx_ia); |
| 246 | 247 | ||
| 247 | xprt_rdma_free_addresses(xprt); | 248 | xprt_rdma_free_addresses(xprt); |
| @@ -289,9 +290,9 @@ xprt_setup_rdma(struct xprt_create *args) | |||
| 289 | 290 | ||
| 290 | /* 60 second timeout, no retries */ | 291 | /* 60 second timeout, no retries */ |
| 291 | xprt->timeout = &xprt_rdma_default_timeout; | 292 | xprt->timeout = &xprt_rdma_default_timeout; |
| 292 | xprt->bind_timeout = (60U * HZ); | 293 | xprt->bind_timeout = RPCRDMA_BIND_TO; |
| 293 | xprt->reestablish_timeout = (5U * HZ); | 294 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; |
| 294 | xprt->idle_timeout = (5U * 60 * HZ); | 295 | xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; |
| 295 | 296 | ||
| 296 | xprt->resvport = 0; /* privileged port not needed */ | 297 | xprt->resvport = 0; /* privileged port not needed */ |
| 297 | xprt->tsh_size = 0; /* RPC-RDMA handles framing */ | 298 | xprt->tsh_size = 0; /* RPC-RDMA handles framing */ |
| @@ -391,7 +392,7 @@ out4: | |||
| 391 | xprt_rdma_free_addresses(xprt); | 392 | xprt_rdma_free_addresses(xprt); |
| 392 | rc = -EINVAL; | 393 | rc = -EINVAL; |
| 393 | out3: | 394 | out3: |
| 394 | (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); | 395 | rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); |
| 395 | out2: | 396 | out2: |
| 396 | rpcrdma_ia_close(&new_xprt->rx_ia); | 397 | rpcrdma_ia_close(&new_xprt->rx_ia); |
| 397 | out1: | 398 | out1: |
| @@ -436,10 +437,10 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) | |||
| 436 | schedule_delayed_work(&r_xprt->rdma_connect, | 437 | schedule_delayed_work(&r_xprt->rdma_connect, |
| 437 | xprt->reestablish_timeout); | 438 | xprt->reestablish_timeout); |
| 438 | xprt->reestablish_timeout <<= 1; | 439 | xprt->reestablish_timeout <<= 1; |
| 439 | if (xprt->reestablish_timeout > (30 * HZ)) | 440 | if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) |
| 440 | xprt->reestablish_timeout = (30 * HZ); | 441 | xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; |
| 441 | else if (xprt->reestablish_timeout < (5 * HZ)) | 442 | else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) |
| 442 | xprt->reestablish_timeout = (5 * HZ); | 443 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; |
| 443 | } else { | 444 | } else { |
| 444 | schedule_delayed_work(&r_xprt->rdma_connect, 0); | 445 | schedule_delayed_work(&r_xprt->rdma_connect, 0); |
| 445 | if (!RPC_IS_ASYNC(task)) | 446 | if (!RPC_IS_ASYNC(task)) |
| @@ -447,23 +448,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) | |||
| 447 | } | 448 | } |
| 448 | } | 449 | } |
| 449 | 450 | ||
| 450 | static int | ||
| 451 | xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) | ||
| 452 | { | ||
| 453 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
| 454 | int credits = atomic_read(&r_xprt->rx_buf.rb_credits); | ||
| 455 | |||
| 456 | /* == RPC_CWNDSCALE @ init, but *after* setup */ | ||
| 457 | if (r_xprt->rx_buf.rb_cwndscale == 0UL) { | ||
| 458 | r_xprt->rx_buf.rb_cwndscale = xprt->cwnd; | ||
| 459 | dprintk("RPC: %s: cwndscale %lu\n", __func__, | ||
| 460 | r_xprt->rx_buf.rb_cwndscale); | ||
| 461 | BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); | ||
| 462 | } | ||
| 463 | xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; | ||
| 464 | return xprt_reserve_xprt_cong(xprt, task); | ||
| 465 | } | ||
| 466 | |||
| 467 | /* | 451 | /* |
| 468 | * The RDMA allocate/free functions need the task structure as a place | 452 | * The RDMA allocate/free functions need the task structure as a place |
| 469 | * to hide the struct rpcrdma_req, which is necessary for the actual send/recv | 453 | * to hide the struct rpcrdma_req, which is necessary for the actual send/recv |
| @@ -479,7 +463,8 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
| 479 | struct rpcrdma_req *req, *nreq; | 463 | struct rpcrdma_req *req, *nreq; |
| 480 | 464 | ||
| 481 | req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); | 465 | req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); |
| 482 | BUG_ON(NULL == req); | 466 | if (req == NULL) |
| 467 | return NULL; | ||
| 483 | 468 | ||
| 484 | if (size > req->rl_size) { | 469 | if (size > req->rl_size) { |
| 485 | dprintk("RPC: %s: size %zd too large for buffer[%zd]: " | 470 | dprintk("RPC: %s: size %zd too large for buffer[%zd]: " |
| @@ -503,18 +488,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
| 503 | * If the allocation or registration fails, the RPC framework | 488 | * If the allocation or registration fails, the RPC framework |
| 504 | * will (doggedly) retry. | 489 | * will (doggedly) retry. |
| 505 | */ | 490 | */ |
| 506 | if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy == | ||
| 507 | RPCRDMA_BOUNCEBUFFERS) { | ||
| 508 | /* forced to "pure inline" */ | ||
| 509 | dprintk("RPC: %s: too much data (%zd) for inline " | ||
| 510 | "(r/w max %d/%d)\n", __func__, size, | ||
| 511 | rpcx_to_rdmad(xprt).inline_rsize, | ||
| 512 | rpcx_to_rdmad(xprt).inline_wsize); | ||
| 513 | size = req->rl_size; | ||
| 514 | rpc_exit(task, -EIO); /* fail the operation */ | ||
| 515 | rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; | ||
| 516 | goto out; | ||
| 517 | } | ||
| 518 | if (task->tk_flags & RPC_TASK_SWAPPER) | 491 | if (task->tk_flags & RPC_TASK_SWAPPER) |
| 519 | nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); | 492 | nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); |
| 520 | else | 493 | else |
| @@ -543,7 +516,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
| 543 | req = nreq; | 516 | req = nreq; |
| 544 | } | 517 | } |
| 545 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); | 518 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); |
| 546 | out: | ||
| 547 | req->rl_connect_cookie = 0; /* our reserved value */ | 519 | req->rl_connect_cookie = 0; /* our reserved value */ |
| 548 | return req->rl_xdr_buf; | 520 | return req->rl_xdr_buf; |
| 549 | 521 | ||
| @@ -579,9 +551,7 @@ xprt_rdma_free(void *buffer) | |||
| 579 | __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); | 551 | __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); |
| 580 | 552 | ||
| 581 | /* | 553 | /* |
| 582 | * Finish the deregistration. When using mw bind, this was | 554 | * Finish the deregistration. The process is considered |
| 583 | * begun in rpcrdma_reply_handler(). In all other modes, we | ||
| 584 | * do it here, in thread context. The process is considered | ||
| 585 | * complete when the rr_func vector becomes NULL - this | 555 | * complete when the rr_func vector becomes NULL - this |
| 586 | * was put in place during rpcrdma_reply_handler() - the wait | 556 | * was put in place during rpcrdma_reply_handler() - the wait |
| 587 | * call below will not block if the dereg is "done". If | 557 | * call below will not block if the dereg is "done". If |
| @@ -590,12 +560,7 @@ xprt_rdma_free(void *buffer) | |||
| 590 | for (i = 0; req->rl_nchunks;) { | 560 | for (i = 0; req->rl_nchunks;) { |
| 591 | --req->rl_nchunks; | 561 | --req->rl_nchunks; |
| 592 | i += rpcrdma_deregister_external( | 562 | i += rpcrdma_deregister_external( |
| 593 | &req->rl_segments[i], r_xprt, NULL); | 563 | &req->rl_segments[i], r_xprt); |
| 594 | } | ||
| 595 | |||
| 596 | if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) { | ||
| 597 | rep->rr_func = NULL; /* abandon the callback */ | ||
| 598 | req->rl_reply = NULL; | ||
| 599 | } | 564 | } |
| 600 | 565 | ||
| 601 | if (req->rl_iov.length == 0) { /* see allocate above */ | 566 | if (req->rl_iov.length == 0) { /* see allocate above */ |
| @@ -630,13 +595,12 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
| 630 | struct rpc_xprt *xprt = rqst->rq_xprt; | 595 | struct rpc_xprt *xprt = rqst->rq_xprt; |
| 631 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 596 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
| 632 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 597 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 598 | int rc; | ||
| 633 | 599 | ||
| 634 | /* marshal the send itself */ | 600 | if (req->rl_niovs == 0) { |
| 635 | if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) { | 601 | rc = rpcrdma_marshal_req(rqst); |
| 636 | r_xprt->rx_stats.failed_marshal_count++; | 602 | if (rc < 0) |
| 637 | dprintk("RPC: %s: rpcrdma_marshal_req failed\n", | 603 | goto failed_marshal; |
| 638 | __func__); | ||
| 639 | return -EIO; | ||
| 640 | } | 604 | } |
| 641 | 605 | ||
| 642 | if (req->rl_reply == NULL) /* e.g. reconnection */ | 606 | if (req->rl_reply == NULL) /* e.g. reconnection */ |
| @@ -660,6 +624,12 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
| 660 | rqst->rq_bytes_sent = 0; | 624 | rqst->rq_bytes_sent = 0; |
| 661 | return 0; | 625 | return 0; |
| 662 | 626 | ||
| 627 | failed_marshal: | ||
| 628 | r_xprt->rx_stats.failed_marshal_count++; | ||
| 629 | dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n", | ||
| 630 | __func__, rc); | ||
| 631 | if (rc == -EIO) | ||
| 632 | return -EIO; | ||
| 663 | drop_connection: | 633 | drop_connection: |
| 664 | xprt_disconnect_done(xprt); | 634 | xprt_disconnect_done(xprt); |
| 665 | return -ENOTCONN; /* implies disconnect */ | 635 | return -ENOTCONN; /* implies disconnect */ |
| @@ -705,7 +675,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | |||
| 705 | */ | 675 | */ |
| 706 | 676 | ||
| 707 | static struct rpc_xprt_ops xprt_rdma_procs = { | 677 | static struct rpc_xprt_ops xprt_rdma_procs = { |
| 708 | .reserve_xprt = xprt_rdma_reserve_xprt, | 678 | .reserve_xprt = xprt_reserve_xprt_cong, |
| 709 | .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ | 679 | .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ |
| 710 | .alloc_slot = xprt_alloc_slot, | 680 | .alloc_slot = xprt_alloc_slot, |
| 711 | .release_request = xprt_release_rqst_cong, /* ditto */ | 681 | .release_request = xprt_release_rqst_cong, /* ditto */ |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 93726560eaa8..13dbd1c389ff 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
| @@ -48,8 +48,8 @@ | |||
| 48 | */ | 48 | */ |
| 49 | 49 | ||
| 50 | #include <linux/interrupt.h> | 50 | #include <linux/interrupt.h> |
| 51 | #include <linux/pci.h> /* for Tavor hack below */ | ||
| 52 | #include <linux/slab.h> | 51 | #include <linux/slab.h> |
| 52 | #include <asm/bitops.h> | ||
| 53 | 53 | ||
| 54 | #include "xprt_rdma.h" | 54 | #include "xprt_rdma.h" |
| 55 | 55 | ||
| @@ -142,98 +142,139 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) | |||
| 142 | } | 142 | } |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | static inline | 145 | static void |
| 146 | void rpcrdma_event_process(struct ib_wc *wc) | 146 | rpcrdma_sendcq_process_wc(struct ib_wc *wc) |
| 147 | { | 147 | { |
| 148 | struct rpcrdma_mw *frmr; | 148 | struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; |
| 149 | struct rpcrdma_rep *rep = | ||
| 150 | (struct rpcrdma_rep *)(unsigned long) wc->wr_id; | ||
| 151 | 149 | ||
| 152 | dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n", | 150 | dprintk("RPC: %s: frmr %p status %X opcode %d\n", |
| 153 | __func__, rep, wc->status, wc->opcode, wc->byte_len); | 151 | __func__, frmr, wc->status, wc->opcode); |
| 154 | 152 | ||
| 155 | if (!rep) /* send or bind completion that we don't care about */ | 153 | if (wc->wr_id == 0ULL) |
| 156 | return; | 154 | return; |
| 157 | 155 | if (wc->status != IB_WC_SUCCESS) | |
| 158 | if (IB_WC_SUCCESS != wc->status) { | ||
| 159 | dprintk("RPC: %s: WC opcode %d status %X, connection lost\n", | ||
| 160 | __func__, wc->opcode, wc->status); | ||
| 161 | rep->rr_len = ~0U; | ||
| 162 | if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV) | ||
| 163 | rpcrdma_schedule_tasklet(rep); | ||
| 164 | return; | 156 | return; |
| 165 | } | ||
| 166 | 157 | ||
| 167 | switch (wc->opcode) { | 158 | if (wc->opcode == IB_WC_FAST_REG_MR) |
| 168 | case IB_WC_FAST_REG_MR: | ||
| 169 | frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
| 170 | frmr->r.frmr.state = FRMR_IS_VALID; | 159 | frmr->r.frmr.state = FRMR_IS_VALID; |
| 171 | break; | 160 | else if (wc->opcode == IB_WC_LOCAL_INV) |
| 172 | case IB_WC_LOCAL_INV: | ||
| 173 | frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
| 174 | frmr->r.frmr.state = FRMR_IS_INVALID; | 161 | frmr->r.frmr.state = FRMR_IS_INVALID; |
| 175 | break; | ||
| 176 | case IB_WC_RECV: | ||
| 177 | rep->rr_len = wc->byte_len; | ||
| 178 | ib_dma_sync_single_for_cpu( | ||
| 179 | rdmab_to_ia(rep->rr_buffer)->ri_id->device, | ||
| 180 | rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); | ||
| 181 | /* Keep (only) the most recent credits, after check validity */ | ||
| 182 | if (rep->rr_len >= 16) { | ||
| 183 | struct rpcrdma_msg *p = | ||
| 184 | (struct rpcrdma_msg *) rep->rr_base; | ||
| 185 | unsigned int credits = ntohl(p->rm_credit); | ||
| 186 | if (credits == 0) { | ||
| 187 | dprintk("RPC: %s: server" | ||
| 188 | " dropped credits to 0!\n", __func__); | ||
| 189 | /* don't deadlock */ | ||
| 190 | credits = 1; | ||
| 191 | } else if (credits > rep->rr_buffer->rb_max_requests) { | ||
| 192 | dprintk("RPC: %s: server" | ||
| 193 | " over-crediting: %d (%d)\n", | ||
| 194 | __func__, credits, | ||
| 195 | rep->rr_buffer->rb_max_requests); | ||
| 196 | credits = rep->rr_buffer->rb_max_requests; | ||
| 197 | } | ||
| 198 | atomic_set(&rep->rr_buffer->rb_credits, credits); | ||
| 199 | } | ||
| 200 | /* fall through */ | ||
| 201 | case IB_WC_BIND_MW: | ||
| 202 | rpcrdma_schedule_tasklet(rep); | ||
| 203 | break; | ||
| 204 | default: | ||
| 205 | dprintk("RPC: %s: unexpected WC event %X\n", | ||
| 206 | __func__, wc->opcode); | ||
| 207 | break; | ||
| 208 | } | ||
| 209 | } | 162 | } |
| 210 | 163 | ||
| 211 | static inline int | 164 | static int |
| 212 | rpcrdma_cq_poll(struct ib_cq *cq) | 165 | rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) |
| 213 | { | 166 | { |
| 214 | struct ib_wc wc; | 167 | struct ib_wc *wcs; |
| 215 | int rc; | 168 | int budget, count, rc; |
| 216 | 169 | ||
| 217 | for (;;) { | 170 | budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; |
| 218 | rc = ib_poll_cq(cq, 1, &wc); | 171 | do { |
| 219 | if (rc < 0) { | 172 | wcs = ep->rep_send_wcs; |
| 220 | dprintk("RPC: %s: ib_poll_cq failed %i\n", | 173 | |
| 221 | __func__, rc); | 174 | rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); |
| 175 | if (rc <= 0) | ||
| 222 | return rc; | 176 | return rc; |
| 223 | } | ||
| 224 | if (rc == 0) | ||
| 225 | break; | ||
| 226 | 177 | ||
| 227 | rpcrdma_event_process(&wc); | 178 | count = rc; |
| 179 | while (count-- > 0) | ||
| 180 | rpcrdma_sendcq_process_wc(wcs++); | ||
| 181 | } while (rc == RPCRDMA_POLLSIZE && --budget); | ||
| 182 | return 0; | ||
| 183 | } | ||
| 184 | |||
| 185 | /* | ||
| 186 | * Handle send, fast_reg_mr, and local_inv completions. | ||
| 187 | * | ||
| 188 | * Send events are typically suppressed and thus do not result | ||
| 189 | * in an upcall. Occasionally one is signaled, however. This | ||
| 190 | * prevents the provider's completion queue from wrapping and | ||
| 191 | * losing a completion. | ||
| 192 | */ | ||
| 193 | static void | ||
| 194 | rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context) | ||
| 195 | { | ||
| 196 | struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context; | ||
| 197 | int rc; | ||
| 198 | |||
| 199 | rc = rpcrdma_sendcq_poll(cq, ep); | ||
| 200 | if (rc) { | ||
| 201 | dprintk("RPC: %s: ib_poll_cq failed: %i\n", | ||
| 202 | __func__, rc); | ||
| 203 | return; | ||
| 228 | } | 204 | } |
| 229 | 205 | ||
| 206 | rc = ib_req_notify_cq(cq, | ||
| 207 | IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); | ||
| 208 | if (rc == 0) | ||
| 209 | return; | ||
| 210 | if (rc < 0) { | ||
| 211 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", | ||
| 212 | __func__, rc); | ||
| 213 | return; | ||
| 214 | } | ||
| 215 | |||
| 216 | rpcrdma_sendcq_poll(cq, ep); | ||
| 217 | } | ||
| 218 | |||
| 219 | static void | ||
| 220 | rpcrdma_recvcq_process_wc(struct ib_wc *wc) | ||
| 221 | { | ||
| 222 | struct rpcrdma_rep *rep = | ||
| 223 | (struct rpcrdma_rep *)(unsigned long)wc->wr_id; | ||
| 224 | |||
| 225 | dprintk("RPC: %s: rep %p status %X opcode %X length %u\n", | ||
| 226 | __func__, rep, wc->status, wc->opcode, wc->byte_len); | ||
| 227 | |||
| 228 | if (wc->status != IB_WC_SUCCESS) { | ||
| 229 | rep->rr_len = ~0U; | ||
| 230 | goto out_schedule; | ||
| 231 | } | ||
| 232 | if (wc->opcode != IB_WC_RECV) | ||
| 233 | return; | ||
| 234 | |||
| 235 | rep->rr_len = wc->byte_len; | ||
| 236 | ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, | ||
| 237 | rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); | ||
| 238 | |||
| 239 | if (rep->rr_len >= 16) { | ||
| 240 | struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base; | ||
| 241 | unsigned int credits = ntohl(p->rm_credit); | ||
| 242 | |||
| 243 | if (credits == 0) | ||
| 244 | credits = 1; /* don't deadlock */ | ||
| 245 | else if (credits > rep->rr_buffer->rb_max_requests) | ||
| 246 | credits = rep->rr_buffer->rb_max_requests; | ||
| 247 | atomic_set(&rep->rr_buffer->rb_credits, credits); | ||
| 248 | } | ||
| 249 | |||
| 250 | out_schedule: | ||
| 251 | rpcrdma_schedule_tasklet(rep); | ||
| 252 | } | ||
| 253 | |||
| 254 | static int | ||
| 255 | rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) | ||
| 256 | { | ||
| 257 | struct ib_wc *wcs; | ||
| 258 | int budget, count, rc; | ||
| 259 | |||
| 260 | budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; | ||
| 261 | do { | ||
| 262 | wcs = ep->rep_recv_wcs; | ||
| 263 | |||
| 264 | rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); | ||
| 265 | if (rc <= 0) | ||
| 266 | return rc; | ||
| 267 | |||
| 268 | count = rc; | ||
| 269 | while (count-- > 0) | ||
| 270 | rpcrdma_recvcq_process_wc(wcs++); | ||
| 271 | } while (rc == RPCRDMA_POLLSIZE && --budget); | ||
| 230 | return 0; | 272 | return 0; |
| 231 | } | 273 | } |
| 232 | 274 | ||
| 233 | /* | 275 | /* |
| 234 | * rpcrdma_cq_event_upcall | 276 | * Handle receive completions. |
| 235 | * | 277 | * |
| 236 | * This upcall handles recv, send, bind and unbind events. | ||
| 237 | * It is reentrant but processes single events in order to maintain | 278 | * It is reentrant but processes single events in order to maintain |
| 238 | * ordering of receives to keep server credits. | 279 | * ordering of receives to keep server credits. |
| 239 | * | 280 | * |
| @@ -242,26 +283,31 @@ rpcrdma_cq_poll(struct ib_cq *cq) | |||
| 242 | * connection shutdown. That is, the structures required for | 283 | * connection shutdown. That is, the structures required for |
| 243 | * the completion of the reply handler must remain intact until | 284 | * the completion of the reply handler must remain intact until |
| 244 | * all memory has been reclaimed. | 285 | * all memory has been reclaimed. |
| 245 | * | ||
| 246 | * Note that send events are suppressed and do not result in an upcall. | ||
| 247 | */ | 286 | */ |
| 248 | static void | 287 | static void |
| 249 | rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context) | 288 | rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) |
| 250 | { | 289 | { |
| 290 | struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context; | ||
| 251 | int rc; | 291 | int rc; |
| 252 | 292 | ||
| 253 | rc = rpcrdma_cq_poll(cq); | 293 | rc = rpcrdma_recvcq_poll(cq, ep); |
| 254 | if (rc) | 294 | if (rc) { |
| 295 | dprintk("RPC: %s: ib_poll_cq failed: %i\n", | ||
| 296 | __func__, rc); | ||
| 255 | return; | 297 | return; |
| 298 | } | ||
| 256 | 299 | ||
| 257 | rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); | 300 | rc = ib_req_notify_cq(cq, |
| 258 | if (rc) { | 301 | IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); |
| 259 | dprintk("RPC: %s: ib_req_notify_cq failed %i\n", | 302 | if (rc == 0) |
| 303 | return; | ||
| 304 | if (rc < 0) { | ||
| 305 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", | ||
| 260 | __func__, rc); | 306 | __func__, rc); |
| 261 | return; | 307 | return; |
| 262 | } | 308 | } |
| 263 | 309 | ||
| 264 | rpcrdma_cq_poll(cq); | 310 | rpcrdma_recvcq_poll(cq, ep); |
| 265 | } | 311 | } |
| 266 | 312 | ||
| 267 | #ifdef RPC_DEBUG | 313 | #ifdef RPC_DEBUG |
| @@ -493,54 +539,32 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
| 493 | ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; | 539 | ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; |
| 494 | } | 540 | } |
| 495 | 541 | ||
| 496 | switch (memreg) { | 542 | if (memreg == RPCRDMA_FRMR) { |
| 497 | case RPCRDMA_MEMWINDOWS: | ||
| 498 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 499 | if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) { | ||
| 500 | dprintk("RPC: %s: MEMWINDOWS registration " | ||
| 501 | "specified but not supported by adapter, " | ||
| 502 | "using slower RPCRDMA_REGISTER\n", | ||
| 503 | __func__); | ||
| 504 | memreg = RPCRDMA_REGISTER; | ||
| 505 | } | ||
| 506 | break; | ||
| 507 | case RPCRDMA_MTHCAFMR: | ||
| 508 | if (!ia->ri_id->device->alloc_fmr) { | ||
| 509 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
| 510 | dprintk("RPC: %s: MTHCAFMR registration " | ||
| 511 | "specified but not supported by adapter, " | ||
| 512 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
| 513 | __func__); | ||
| 514 | memreg = RPCRDMA_ALLPHYSICAL; | ||
| 515 | #else | ||
| 516 | dprintk("RPC: %s: MTHCAFMR registration " | ||
| 517 | "specified but not supported by adapter, " | ||
| 518 | "using slower RPCRDMA_REGISTER\n", | ||
| 519 | __func__); | ||
| 520 | memreg = RPCRDMA_REGISTER; | ||
| 521 | #endif | ||
| 522 | } | ||
| 523 | break; | ||
| 524 | case RPCRDMA_FRMR: | ||
| 525 | /* Requires both frmr reg and local dma lkey */ | 543 | /* Requires both frmr reg and local dma lkey */ |
| 526 | if ((devattr.device_cap_flags & | 544 | if ((devattr.device_cap_flags & |
| 527 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | 545 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != |
| 528 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | 546 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { |
| 529 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
| 530 | dprintk("RPC: %s: FRMR registration " | 547 | dprintk("RPC: %s: FRMR registration " |
| 531 | "specified but not supported by adapter, " | 548 | "not supported by HCA\n", __func__); |
| 532 | "using riskier RPCRDMA_ALLPHYSICAL\n", | 549 | memreg = RPCRDMA_MTHCAFMR; |
| 533 | __func__); | 550 | } else { |
| 551 | /* Mind the ia limit on FRMR page list depth */ | ||
| 552 | ia->ri_max_frmr_depth = min_t(unsigned int, | ||
| 553 | RPCRDMA_MAX_DATA_SEGS, | ||
| 554 | devattr.max_fast_reg_page_list_len); | ||
| 555 | } | ||
| 556 | } | ||
| 557 | if (memreg == RPCRDMA_MTHCAFMR) { | ||
| 558 | if (!ia->ri_id->device->alloc_fmr) { | ||
| 559 | dprintk("RPC: %s: MTHCAFMR registration " | ||
| 560 | "not supported by HCA\n", __func__); | ||
| 561 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
| 534 | memreg = RPCRDMA_ALLPHYSICAL; | 562 | memreg = RPCRDMA_ALLPHYSICAL; |
| 535 | #else | 563 | #else |
| 536 | dprintk("RPC: %s: FRMR registration " | 564 | rc = -ENOMEM; |
| 537 | "specified but not supported by adapter, " | 565 | goto out2; |
| 538 | "using slower RPCRDMA_REGISTER\n", | ||
| 539 | __func__); | ||
| 540 | memreg = RPCRDMA_REGISTER; | ||
| 541 | #endif | 566 | #endif |
| 542 | } | 567 | } |
| 543 | break; | ||
| 544 | } | 568 | } |
| 545 | 569 | ||
| 546 | /* | 570 | /* |
| @@ -552,8 +576,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
| 552 | * adapter. | 576 | * adapter. |
| 553 | */ | 577 | */ |
| 554 | switch (memreg) { | 578 | switch (memreg) { |
| 555 | case RPCRDMA_BOUNCEBUFFERS: | ||
| 556 | case RPCRDMA_REGISTER: | ||
| 557 | case RPCRDMA_FRMR: | 579 | case RPCRDMA_FRMR: |
| 558 | break; | 580 | break; |
| 559 | #if RPCRDMA_PERSISTENT_REGISTRATION | 581 | #if RPCRDMA_PERSISTENT_REGISTRATION |
| @@ -563,30 +585,26 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
| 563 | IB_ACCESS_REMOTE_READ; | 585 | IB_ACCESS_REMOTE_READ; |
| 564 | goto register_setup; | 586 | goto register_setup; |
| 565 | #endif | 587 | #endif |
| 566 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 567 | case RPCRDMA_MEMWINDOWS: | ||
| 568 | mem_priv = IB_ACCESS_LOCAL_WRITE | | ||
| 569 | IB_ACCESS_MW_BIND; | ||
| 570 | goto register_setup; | ||
| 571 | case RPCRDMA_MTHCAFMR: | 588 | case RPCRDMA_MTHCAFMR: |
| 572 | if (ia->ri_have_dma_lkey) | 589 | if (ia->ri_have_dma_lkey) |
| 573 | break; | 590 | break; |
| 574 | mem_priv = IB_ACCESS_LOCAL_WRITE; | 591 | mem_priv = IB_ACCESS_LOCAL_WRITE; |
| 592 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
| 575 | register_setup: | 593 | register_setup: |
| 594 | #endif | ||
| 576 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); | 595 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); |
| 577 | if (IS_ERR(ia->ri_bind_mem)) { | 596 | if (IS_ERR(ia->ri_bind_mem)) { |
| 578 | printk(KERN_ALERT "%s: ib_get_dma_mr for " | 597 | printk(KERN_ALERT "%s: ib_get_dma_mr for " |
| 579 | "phys register failed with %lX\n\t" | 598 | "phys register failed with %lX\n", |
| 580 | "Will continue with degraded performance\n", | ||
| 581 | __func__, PTR_ERR(ia->ri_bind_mem)); | 599 | __func__, PTR_ERR(ia->ri_bind_mem)); |
| 582 | memreg = RPCRDMA_REGISTER; | 600 | rc = -ENOMEM; |
| 583 | ia->ri_bind_mem = NULL; | 601 | goto out2; |
| 584 | } | 602 | } |
| 585 | break; | 603 | break; |
| 586 | default: | 604 | default: |
| 587 | printk(KERN_ERR "%s: invalid memory registration mode %d\n", | 605 | printk(KERN_ERR "RPC: Unsupported memory " |
| 588 | __func__, memreg); | 606 | "registration mode: %d\n", memreg); |
| 589 | rc = -EINVAL; | 607 | rc = -ENOMEM; |
| 590 | goto out2; | 608 | goto out2; |
| 591 | } | 609 | } |
| 592 | dprintk("RPC: %s: memory registration strategy is %d\n", | 610 | dprintk("RPC: %s: memory registration strategy is %d\n", |
| @@ -640,6 +658,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 640 | struct rpcrdma_create_data_internal *cdata) | 658 | struct rpcrdma_create_data_internal *cdata) |
| 641 | { | 659 | { |
| 642 | struct ib_device_attr devattr; | 660 | struct ib_device_attr devattr; |
| 661 | struct ib_cq *sendcq, *recvcq; | ||
| 643 | int rc, err; | 662 | int rc, err; |
| 644 | 663 | ||
| 645 | rc = ib_query_device(ia->ri_id->device, &devattr); | 664 | rc = ib_query_device(ia->ri_id->device, &devattr); |
| @@ -659,32 +678,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 659 | ep->rep_attr.srq = NULL; | 678 | ep->rep_attr.srq = NULL; |
| 660 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 679 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
| 661 | switch (ia->ri_memreg_strategy) { | 680 | switch (ia->ri_memreg_strategy) { |
| 662 | case RPCRDMA_FRMR: | 681 | case RPCRDMA_FRMR: { |
| 682 | int depth = 7; | ||
| 683 | |||
| 663 | /* Add room for frmr register and invalidate WRs. | 684 | /* Add room for frmr register and invalidate WRs. |
| 664 | * 1. FRMR reg WR for head | 685 | * 1. FRMR reg WR for head |
| 665 | * 2. FRMR invalidate WR for head | 686 | * 2. FRMR invalidate WR for head |
| 666 | * 3. FRMR reg WR for pagelist | 687 | * 3. N FRMR reg WRs for pagelist |
| 667 | * 4. FRMR invalidate WR for pagelist | 688 | * 4. N FRMR invalidate WRs for pagelist |
| 668 | * 5. FRMR reg WR for tail | 689 | * 5. FRMR reg WR for tail |
| 669 | * 6. FRMR invalidate WR for tail | 690 | * 6. FRMR invalidate WR for tail |
| 670 | * 7. The RDMA_SEND WR | 691 | * 7. The RDMA_SEND WR |
| 671 | */ | 692 | */ |
| 672 | ep->rep_attr.cap.max_send_wr *= 7; | 693 | |
| 694 | /* Calculate N if the device max FRMR depth is smaller than | ||
| 695 | * RPCRDMA_MAX_DATA_SEGS. | ||
| 696 | */ | ||
| 697 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | ||
| 698 | int delta = RPCRDMA_MAX_DATA_SEGS - | ||
| 699 | ia->ri_max_frmr_depth; | ||
| 700 | |||
| 701 | do { | ||
| 702 | depth += 2; /* FRMR reg + invalidate */ | ||
| 703 | delta -= ia->ri_max_frmr_depth; | ||
| 704 | } while (delta > 0); | ||
| 705 | |||
| 706 | } | ||
| 707 | ep->rep_attr.cap.max_send_wr *= depth; | ||
| 673 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { | 708 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { |
| 674 | cdata->max_requests = devattr.max_qp_wr / 7; | 709 | cdata->max_requests = devattr.max_qp_wr / depth; |
| 675 | if (!cdata->max_requests) | 710 | if (!cdata->max_requests) |
| 676 | return -EINVAL; | 711 | return -EINVAL; |
| 677 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7; | 712 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * |
| 713 | depth; | ||
| 678 | } | 714 | } |
| 679 | break; | 715 | break; |
| 680 | case RPCRDMA_MEMWINDOWS_ASYNC: | 716 | } |
| 681 | case RPCRDMA_MEMWINDOWS: | ||
| 682 | /* Add room for mw_binds+unbinds - overkill! */ | ||
| 683 | ep->rep_attr.cap.max_send_wr++; | ||
| 684 | ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS); | ||
| 685 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) | ||
| 686 | return -EINVAL; | ||
| 687 | break; | ||
| 688 | default: | 717 | default: |
| 689 | break; | 718 | break; |
| 690 | } | 719 | } |
| @@ -705,46 +734,51 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 705 | ep->rep_attr.cap.max_recv_sge); | 734 | ep->rep_attr.cap.max_recv_sge); |
| 706 | 735 | ||
| 707 | /* set trigger for requesting send completion */ | 736 | /* set trigger for requesting send completion */ |
| 708 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/; | 737 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; |
| 709 | switch (ia->ri_memreg_strategy) { | ||
| 710 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 711 | case RPCRDMA_MEMWINDOWS: | ||
| 712 | ep->rep_cqinit -= RPCRDMA_MAX_SEGS; | ||
| 713 | break; | ||
| 714 | default: | ||
| 715 | break; | ||
| 716 | } | ||
| 717 | if (ep->rep_cqinit <= 2) | 738 | if (ep->rep_cqinit <= 2) |
| 718 | ep->rep_cqinit = 0; | 739 | ep->rep_cqinit = 0; |
| 719 | INIT_CQCOUNT(ep); | 740 | INIT_CQCOUNT(ep); |
| 720 | ep->rep_ia = ia; | 741 | ep->rep_ia = ia; |
| 721 | init_waitqueue_head(&ep->rep_connect_wait); | 742 | init_waitqueue_head(&ep->rep_connect_wait); |
| 743 | INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); | ||
| 722 | 744 | ||
| 723 | /* | 745 | sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall, |
| 724 | * Create a single cq for receive dto and mw_bind (only ever | 746 | rpcrdma_cq_async_error_upcall, ep, |
| 725 | * care about unbind, really). Send completions are suppressed. | ||
| 726 | * Use single threaded tasklet upcalls to maintain ordering. | ||
| 727 | */ | ||
| 728 | ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall, | ||
| 729 | rpcrdma_cq_async_error_upcall, NULL, | ||
| 730 | ep->rep_attr.cap.max_recv_wr + | ||
| 731 | ep->rep_attr.cap.max_send_wr + 1, 0); | 747 | ep->rep_attr.cap.max_send_wr + 1, 0); |
| 732 | if (IS_ERR(ep->rep_cq)) { | 748 | if (IS_ERR(sendcq)) { |
| 733 | rc = PTR_ERR(ep->rep_cq); | 749 | rc = PTR_ERR(sendcq); |
| 734 | dprintk("RPC: %s: ib_create_cq failed: %i\n", | 750 | dprintk("RPC: %s: failed to create send CQ: %i\n", |
| 735 | __func__, rc); | 751 | __func__, rc); |
| 736 | goto out1; | 752 | goto out1; |
| 737 | } | 753 | } |
| 738 | 754 | ||
| 739 | rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP); | 755 | rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP); |
| 756 | if (rc) { | ||
| 757 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", | ||
| 758 | __func__, rc); | ||
| 759 | goto out2; | ||
| 760 | } | ||
| 761 | |||
| 762 | recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall, | ||
| 763 | rpcrdma_cq_async_error_upcall, ep, | ||
| 764 | ep->rep_attr.cap.max_recv_wr + 1, 0); | ||
| 765 | if (IS_ERR(recvcq)) { | ||
| 766 | rc = PTR_ERR(recvcq); | ||
| 767 | dprintk("RPC: %s: failed to create recv CQ: %i\n", | ||
| 768 | __func__, rc); | ||
| 769 | goto out2; | ||
| 770 | } | ||
| 771 | |||
| 772 | rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP); | ||
| 740 | if (rc) { | 773 | if (rc) { |
| 741 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", | 774 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", |
| 742 | __func__, rc); | 775 | __func__, rc); |
| 776 | ib_destroy_cq(recvcq); | ||
| 743 | goto out2; | 777 | goto out2; |
| 744 | } | 778 | } |
| 745 | 779 | ||
| 746 | ep->rep_attr.send_cq = ep->rep_cq; | 780 | ep->rep_attr.send_cq = sendcq; |
| 747 | ep->rep_attr.recv_cq = ep->rep_cq; | 781 | ep->rep_attr.recv_cq = recvcq; |
| 748 | 782 | ||
| 749 | /* Initialize cma parameters */ | 783 | /* Initialize cma parameters */ |
| 750 | 784 | ||
| @@ -754,9 +788,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 754 | 788 | ||
| 755 | /* Client offers RDMA Read but does not initiate */ | 789 | /* Client offers RDMA Read but does not initiate */ |
| 756 | ep->rep_remote_cma.initiator_depth = 0; | 790 | ep->rep_remote_cma.initiator_depth = 0; |
| 757 | if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS) | 791 | if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ |
| 758 | ep->rep_remote_cma.responder_resources = 0; | ||
| 759 | else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ | ||
| 760 | ep->rep_remote_cma.responder_resources = 32; | 792 | ep->rep_remote_cma.responder_resources = 32; |
| 761 | else | 793 | else |
| 762 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; | 794 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; |
| @@ -768,7 +800,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 768 | return 0; | 800 | return 0; |
| 769 | 801 | ||
| 770 | out2: | 802 | out2: |
| 771 | err = ib_destroy_cq(ep->rep_cq); | 803 | err = ib_destroy_cq(sendcq); |
| 772 | if (err) | 804 | if (err) |
| 773 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", | 805 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", |
| 774 | __func__, err); | 806 | __func__, err); |
| @@ -782,11 +814,8 @@ out1: | |||
| 782 | * Disconnect and destroy endpoint. After this, the only | 814 | * Disconnect and destroy endpoint. After this, the only |
| 783 | * valid operations on the ep are to free it (if dynamically | 815 | * valid operations on the ep are to free it (if dynamically |
| 784 | * allocated) or re-create it. | 816 | * allocated) or re-create it. |
| 785 | * | ||
| 786 | * The caller's error handling must be sure to not leak the endpoint | ||
| 787 | * if this function fails. | ||
| 788 | */ | 817 | */ |
| 789 | int | 818 | void |
| 790 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | 819 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
| 791 | { | 820 | { |
| 792 | int rc; | 821 | int rc; |
| @@ -794,6 +823,8 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
| 794 | dprintk("RPC: %s: entering, connected is %d\n", | 823 | dprintk("RPC: %s: entering, connected is %d\n", |
| 795 | __func__, ep->rep_connected); | 824 | __func__, ep->rep_connected); |
| 796 | 825 | ||
| 826 | cancel_delayed_work_sync(&ep->rep_connect_worker); | ||
| 827 | |||
| 797 | if (ia->ri_id->qp) { | 828 | if (ia->ri_id->qp) { |
| 798 | rc = rpcrdma_ep_disconnect(ep, ia); | 829 | rc = rpcrdma_ep_disconnect(ep, ia); |
| 799 | if (rc) | 830 | if (rc) |
| @@ -809,13 +840,17 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
| 809 | ep->rep_pad_mr = NULL; | 840 | ep->rep_pad_mr = NULL; |
| 810 | } | 841 | } |
| 811 | 842 | ||
| 812 | rpcrdma_clean_cq(ep->rep_cq); | 843 | rpcrdma_clean_cq(ep->rep_attr.recv_cq); |
| 813 | rc = ib_destroy_cq(ep->rep_cq); | 844 | rc = ib_destroy_cq(ep->rep_attr.recv_cq); |
| 814 | if (rc) | 845 | if (rc) |
| 815 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", | 846 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", |
| 816 | __func__, rc); | 847 | __func__, rc); |
| 817 | 848 | ||
| 818 | return rc; | 849 | rpcrdma_clean_cq(ep->rep_attr.send_cq); |
| 850 | rc = ib_destroy_cq(ep->rep_attr.send_cq); | ||
| 851 | if (rc) | ||
| 852 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", | ||
| 853 | __func__, rc); | ||
| 819 | } | 854 | } |
| 820 | 855 | ||
| 821 | /* | 856 | /* |
| @@ -831,17 +866,20 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
| 831 | if (ep->rep_connected != 0) { | 866 | if (ep->rep_connected != 0) { |
| 832 | struct rpcrdma_xprt *xprt; | 867 | struct rpcrdma_xprt *xprt; |
| 833 | retry: | 868 | retry: |
| 869 | dprintk("RPC: %s: reconnecting...\n", __func__); | ||
| 834 | rc = rpcrdma_ep_disconnect(ep, ia); | 870 | rc = rpcrdma_ep_disconnect(ep, ia); |
| 835 | if (rc && rc != -ENOTCONN) | 871 | if (rc && rc != -ENOTCONN) |
| 836 | dprintk("RPC: %s: rpcrdma_ep_disconnect" | 872 | dprintk("RPC: %s: rpcrdma_ep_disconnect" |
| 837 | " status %i\n", __func__, rc); | 873 | " status %i\n", __func__, rc); |
| 838 | rpcrdma_clean_cq(ep->rep_cq); | 874 | |
| 875 | rpcrdma_clean_cq(ep->rep_attr.recv_cq); | ||
| 876 | rpcrdma_clean_cq(ep->rep_attr.send_cq); | ||
| 839 | 877 | ||
| 840 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | 878 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); |
| 841 | id = rpcrdma_create_id(xprt, ia, | 879 | id = rpcrdma_create_id(xprt, ia, |
| 842 | (struct sockaddr *)&xprt->rx_data.addr); | 880 | (struct sockaddr *)&xprt->rx_data.addr); |
| 843 | if (IS_ERR(id)) { | 881 | if (IS_ERR(id)) { |
| 844 | rc = PTR_ERR(id); | 882 | rc = -EHOSTUNREACH; |
| 845 | goto out; | 883 | goto out; |
| 846 | } | 884 | } |
| 847 | /* TEMP TEMP TEMP - fail if new device: | 885 | /* TEMP TEMP TEMP - fail if new device: |
| @@ -855,35 +893,32 @@ retry: | |||
| 855 | printk("RPC: %s: can't reconnect on " | 893 | printk("RPC: %s: can't reconnect on " |
| 856 | "different device!\n", __func__); | 894 | "different device!\n", __func__); |
| 857 | rdma_destroy_id(id); | 895 | rdma_destroy_id(id); |
| 858 | rc = -ENETDOWN; | 896 | rc = -ENETUNREACH; |
| 859 | goto out; | 897 | goto out; |
| 860 | } | 898 | } |
| 861 | /* END TEMP */ | 899 | /* END TEMP */ |
| 900 | rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); | ||
| 901 | if (rc) { | ||
| 902 | dprintk("RPC: %s: rdma_create_qp failed %i\n", | ||
| 903 | __func__, rc); | ||
| 904 | rdma_destroy_id(id); | ||
| 905 | rc = -ENETUNREACH; | ||
| 906 | goto out; | ||
| 907 | } | ||
| 862 | rdma_destroy_qp(ia->ri_id); | 908 | rdma_destroy_qp(ia->ri_id); |
| 863 | rdma_destroy_id(ia->ri_id); | 909 | rdma_destroy_id(ia->ri_id); |
| 864 | ia->ri_id = id; | 910 | ia->ri_id = id; |
| 911 | } else { | ||
| 912 | dprintk("RPC: %s: connecting...\n", __func__); | ||
| 913 | rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); | ||
| 914 | if (rc) { | ||
| 915 | dprintk("RPC: %s: rdma_create_qp failed %i\n", | ||
| 916 | __func__, rc); | ||
| 917 | /* do not update ep->rep_connected */ | ||
| 918 | return -ENETUNREACH; | ||
| 919 | } | ||
| 865 | } | 920 | } |
| 866 | 921 | ||
| 867 | rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); | ||
| 868 | if (rc) { | ||
| 869 | dprintk("RPC: %s: rdma_create_qp failed %i\n", | ||
| 870 | __func__, rc); | ||
| 871 | goto out; | ||
| 872 | } | ||
| 873 | |||
| 874 | /* XXX Tavor device performs badly with 2K MTU! */ | ||
| 875 | if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | ||
| 876 | struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device); | ||
| 877 | if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR && | ||
| 878 | (pcid->vendor == PCI_VENDOR_ID_MELLANOX || | ||
| 879 | pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) { | ||
| 880 | struct ib_qp_attr attr = { | ||
| 881 | .path_mtu = IB_MTU_1024 | ||
| 882 | }; | ||
| 883 | rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU); | ||
| 884 | } | ||
| 885 | } | ||
| 886 | |||
| 887 | ep->rep_connected = 0; | 922 | ep->rep_connected = 0; |
| 888 | 923 | ||
| 889 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); | 924 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); |
| @@ -944,7 +979,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
| 944 | { | 979 | { |
| 945 | int rc; | 980 | int rc; |
| 946 | 981 | ||
| 947 | rpcrdma_clean_cq(ep->rep_cq); | 982 | rpcrdma_clean_cq(ep->rep_attr.recv_cq); |
| 983 | rpcrdma_clean_cq(ep->rep_attr.send_cq); | ||
| 948 | rc = rdma_disconnect(ia->ri_id); | 984 | rc = rdma_disconnect(ia->ri_id); |
| 949 | if (!rc) { | 985 | if (!rc) { |
| 950 | /* returns without wait if not connected */ | 986 | /* returns without wait if not connected */ |
| @@ -967,7 +1003,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 967 | struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) | 1003 | struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) |
| 968 | { | 1004 | { |
| 969 | char *p; | 1005 | char *p; |
| 970 | size_t len; | 1006 | size_t len, rlen, wlen; |
| 971 | int i, rc; | 1007 | int i, rc; |
| 972 | struct rpcrdma_mw *r; | 1008 | struct rpcrdma_mw *r; |
| 973 | 1009 | ||
| @@ -997,11 +1033,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 997 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | 1033 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * |
| 998 | sizeof(struct rpcrdma_mw); | 1034 | sizeof(struct rpcrdma_mw); |
| 999 | break; | 1035 | break; |
| 1000 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 1001 | case RPCRDMA_MEMWINDOWS: | ||
| 1002 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | ||
| 1003 | sizeof(struct rpcrdma_mw); | ||
| 1004 | break; | ||
| 1005 | default: | 1036 | default: |
| 1006 | break; | 1037 | break; |
| 1007 | } | 1038 | } |
| @@ -1032,32 +1063,29 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 1032 | } | 1063 | } |
| 1033 | p += cdata->padding; | 1064 | p += cdata->padding; |
| 1034 | 1065 | ||
| 1035 | /* | ||
| 1036 | * Allocate the fmr's, or mw's for mw_bind chunk registration. | ||
| 1037 | * We "cycle" the mw's in order to minimize rkey reuse, | ||
| 1038 | * and also reduce unbind-to-bind collision. | ||
| 1039 | */ | ||
| 1040 | INIT_LIST_HEAD(&buf->rb_mws); | 1066 | INIT_LIST_HEAD(&buf->rb_mws); |
| 1041 | r = (struct rpcrdma_mw *)p; | 1067 | r = (struct rpcrdma_mw *)p; |
| 1042 | switch (ia->ri_memreg_strategy) { | 1068 | switch (ia->ri_memreg_strategy) { |
| 1043 | case RPCRDMA_FRMR: | 1069 | case RPCRDMA_FRMR: |
| 1044 | for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { | 1070 | for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { |
| 1045 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | 1071 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, |
| 1046 | RPCRDMA_MAX_SEGS); | 1072 | ia->ri_max_frmr_depth); |
| 1047 | if (IS_ERR(r->r.frmr.fr_mr)) { | 1073 | if (IS_ERR(r->r.frmr.fr_mr)) { |
| 1048 | rc = PTR_ERR(r->r.frmr.fr_mr); | 1074 | rc = PTR_ERR(r->r.frmr.fr_mr); |
| 1049 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | 1075 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" |
| 1050 | " failed %i\n", __func__, rc); | 1076 | " failed %i\n", __func__, rc); |
| 1051 | goto out; | 1077 | goto out; |
| 1052 | } | 1078 | } |
| 1053 | r->r.frmr.fr_pgl = | 1079 | r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( |
| 1054 | ib_alloc_fast_reg_page_list(ia->ri_id->device, | 1080 | ia->ri_id->device, |
| 1055 | RPCRDMA_MAX_SEGS); | 1081 | ia->ri_max_frmr_depth); |
| 1056 | if (IS_ERR(r->r.frmr.fr_pgl)) { | 1082 | if (IS_ERR(r->r.frmr.fr_pgl)) { |
| 1057 | rc = PTR_ERR(r->r.frmr.fr_pgl); | 1083 | rc = PTR_ERR(r->r.frmr.fr_pgl); |
| 1058 | dprintk("RPC: %s: " | 1084 | dprintk("RPC: %s: " |
| 1059 | "ib_alloc_fast_reg_page_list " | 1085 | "ib_alloc_fast_reg_page_list " |
| 1060 | "failed %i\n", __func__, rc); | 1086 | "failed %i\n", __func__, rc); |
| 1087 | |||
| 1088 | ib_dereg_mr(r->r.frmr.fr_mr); | ||
| 1061 | goto out; | 1089 | goto out; |
| 1062 | } | 1090 | } |
| 1063 | list_add(&r->mw_list, &buf->rb_mws); | 1091 | list_add(&r->mw_list, &buf->rb_mws); |
| @@ -1082,21 +1110,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 1082 | ++r; | 1110 | ++r; |
| 1083 | } | 1111 | } |
| 1084 | break; | 1112 | break; |
| 1085 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 1086 | case RPCRDMA_MEMWINDOWS: | ||
| 1087 | /* Allocate one extra request's worth, for full cycling */ | ||
| 1088 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | ||
| 1089 | r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1); | ||
| 1090 | if (IS_ERR(r->r.mw)) { | ||
| 1091 | rc = PTR_ERR(r->r.mw); | ||
| 1092 | dprintk("RPC: %s: ib_alloc_mw" | ||
| 1093 | " failed %i\n", __func__, rc); | ||
| 1094 | goto out; | ||
| 1095 | } | ||
| 1096 | list_add(&r->mw_list, &buf->rb_mws); | ||
| 1097 | ++r; | ||
| 1098 | } | ||
| 1099 | break; | ||
| 1100 | default: | 1113 | default: |
| 1101 | break; | 1114 | break; |
| 1102 | } | 1115 | } |
| @@ -1105,16 +1118,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 1105 | * Allocate/init the request/reply buffers. Doing this | 1118 | * Allocate/init the request/reply buffers. Doing this |
| 1106 | * using kmalloc for now -- one for each buf. | 1119 | * using kmalloc for now -- one for each buf. |
| 1107 | */ | 1120 | */ |
| 1121 | wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req)); | ||
| 1122 | rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep)); | ||
| 1123 | dprintk("RPC: %s: wlen = %zu, rlen = %zu\n", | ||
| 1124 | __func__, wlen, rlen); | ||
| 1125 | |||
| 1108 | for (i = 0; i < buf->rb_max_requests; i++) { | 1126 | for (i = 0; i < buf->rb_max_requests; i++) { |
| 1109 | struct rpcrdma_req *req; | 1127 | struct rpcrdma_req *req; |
| 1110 | struct rpcrdma_rep *rep; | 1128 | struct rpcrdma_rep *rep; |
| 1111 | 1129 | ||
| 1112 | len = cdata->inline_wsize + sizeof(struct rpcrdma_req); | 1130 | req = kmalloc(wlen, GFP_KERNEL); |
| 1113 | /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */ | ||
| 1114 | /* Typical ~2400b, so rounding up saves work later */ | ||
| 1115 | if (len < 4096) | ||
| 1116 | len = 4096; | ||
| 1117 | req = kmalloc(len, GFP_KERNEL); | ||
| 1118 | if (req == NULL) { | 1131 | if (req == NULL) { |
| 1119 | dprintk("RPC: %s: request buffer %d alloc" | 1132 | dprintk("RPC: %s: request buffer %d alloc" |
| 1120 | " failed\n", __func__, i); | 1133 | " failed\n", __func__, i); |
| @@ -1126,16 +1139,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 1126 | buf->rb_send_bufs[i]->rl_buffer = buf; | 1139 | buf->rb_send_bufs[i]->rl_buffer = buf; |
| 1127 | 1140 | ||
| 1128 | rc = rpcrdma_register_internal(ia, req->rl_base, | 1141 | rc = rpcrdma_register_internal(ia, req->rl_base, |
| 1129 | len - offsetof(struct rpcrdma_req, rl_base), | 1142 | wlen - offsetof(struct rpcrdma_req, rl_base), |
| 1130 | &buf->rb_send_bufs[i]->rl_handle, | 1143 | &buf->rb_send_bufs[i]->rl_handle, |
| 1131 | &buf->rb_send_bufs[i]->rl_iov); | 1144 | &buf->rb_send_bufs[i]->rl_iov); |
| 1132 | if (rc) | 1145 | if (rc) |
| 1133 | goto out; | 1146 | goto out; |
| 1134 | 1147 | ||
| 1135 | buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req); | 1148 | buf->rb_send_bufs[i]->rl_size = wlen - |
| 1149 | sizeof(struct rpcrdma_req); | ||
| 1136 | 1150 | ||
| 1137 | len = cdata->inline_rsize + sizeof(struct rpcrdma_rep); | 1151 | rep = kmalloc(rlen, GFP_KERNEL); |
| 1138 | rep = kmalloc(len, GFP_KERNEL); | ||
| 1139 | if (rep == NULL) { | 1152 | if (rep == NULL) { |
| 1140 | dprintk("RPC: %s: reply buffer %d alloc failed\n", | 1153 | dprintk("RPC: %s: reply buffer %d alloc failed\n", |
| 1141 | __func__, i); | 1154 | __func__, i); |
| @@ -1145,10 +1158,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 1145 | memset(rep, 0, sizeof(struct rpcrdma_rep)); | 1158 | memset(rep, 0, sizeof(struct rpcrdma_rep)); |
| 1146 | buf->rb_recv_bufs[i] = rep; | 1159 | buf->rb_recv_bufs[i] = rep; |
| 1147 | buf->rb_recv_bufs[i]->rr_buffer = buf; | 1160 | buf->rb_recv_bufs[i]->rr_buffer = buf; |
| 1148 | init_waitqueue_head(&rep->rr_unbind); | ||
| 1149 | 1161 | ||
| 1150 | rc = rpcrdma_register_internal(ia, rep->rr_base, | 1162 | rc = rpcrdma_register_internal(ia, rep->rr_base, |
| 1151 | len - offsetof(struct rpcrdma_rep, rr_base), | 1163 | rlen - offsetof(struct rpcrdma_rep, rr_base), |
| 1152 | &buf->rb_recv_bufs[i]->rr_handle, | 1164 | &buf->rb_recv_bufs[i]->rr_handle, |
| 1153 | &buf->rb_recv_bufs[i]->rr_iov); | 1165 | &buf->rb_recv_bufs[i]->rr_iov); |
| 1154 | if (rc) | 1166 | if (rc) |
| @@ -1179,7 +1191,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
| 1179 | 1191 | ||
| 1180 | /* clean up in reverse order from create | 1192 | /* clean up in reverse order from create |
| 1181 | * 1. recv mr memory (mr free, then kfree) | 1193 | * 1. recv mr memory (mr free, then kfree) |
| 1182 | * 1a. bind mw memory | ||
| 1183 | * 2. send mr memory (mr free, then kfree) | 1194 | * 2. send mr memory (mr free, then kfree) |
| 1184 | * 3. padding (if any) [moved to rpcrdma_ep_destroy] | 1195 | * 3. padding (if any) [moved to rpcrdma_ep_destroy] |
| 1185 | * 4. arrays | 1196 | * 4. arrays |
| @@ -1194,41 +1205,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
| 1194 | kfree(buf->rb_recv_bufs[i]); | 1205 | kfree(buf->rb_recv_bufs[i]); |
| 1195 | } | 1206 | } |
| 1196 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { | 1207 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { |
| 1197 | while (!list_empty(&buf->rb_mws)) { | ||
| 1198 | r = list_entry(buf->rb_mws.next, | ||
| 1199 | struct rpcrdma_mw, mw_list); | ||
| 1200 | list_del(&r->mw_list); | ||
| 1201 | switch (ia->ri_memreg_strategy) { | ||
| 1202 | case RPCRDMA_FRMR: | ||
| 1203 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
| 1204 | if (rc) | ||
| 1205 | dprintk("RPC: %s:" | ||
| 1206 | " ib_dereg_mr" | ||
| 1207 | " failed %i\n", | ||
| 1208 | __func__, rc); | ||
| 1209 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
| 1210 | break; | ||
| 1211 | case RPCRDMA_MTHCAFMR: | ||
| 1212 | rc = ib_dealloc_fmr(r->r.fmr); | ||
| 1213 | if (rc) | ||
| 1214 | dprintk("RPC: %s:" | ||
| 1215 | " ib_dealloc_fmr" | ||
| 1216 | " failed %i\n", | ||
| 1217 | __func__, rc); | ||
| 1218 | break; | ||
| 1219 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 1220 | case RPCRDMA_MEMWINDOWS: | ||
| 1221 | rc = ib_dealloc_mw(r->r.mw); | ||
| 1222 | if (rc) | ||
| 1223 | dprintk("RPC: %s:" | ||
| 1224 | " ib_dealloc_mw" | ||
| 1225 | " failed %i\n", | ||
| 1226 | __func__, rc); | ||
| 1227 | break; | ||
| 1228 | default: | ||
| 1229 | break; | ||
| 1230 | } | ||
| 1231 | } | ||
| 1232 | rpcrdma_deregister_internal(ia, | 1208 | rpcrdma_deregister_internal(ia, |
| 1233 | buf->rb_send_bufs[i]->rl_handle, | 1209 | buf->rb_send_bufs[i]->rl_handle, |
| 1234 | &buf->rb_send_bufs[i]->rl_iov); | 1210 | &buf->rb_send_bufs[i]->rl_iov); |
| @@ -1236,6 +1212,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
| 1236 | } | 1212 | } |
| 1237 | } | 1213 | } |
| 1238 | 1214 | ||
| 1215 | while (!list_empty(&buf->rb_mws)) { | ||
| 1216 | r = list_entry(buf->rb_mws.next, | ||
| 1217 | struct rpcrdma_mw, mw_list); | ||
| 1218 | list_del(&r->mw_list); | ||
| 1219 | switch (ia->ri_memreg_strategy) { | ||
| 1220 | case RPCRDMA_FRMR: | ||
| 1221 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
| 1222 | if (rc) | ||
| 1223 | dprintk("RPC: %s:" | ||
| 1224 | " ib_dereg_mr" | ||
| 1225 | " failed %i\n", | ||
| 1226 | __func__, rc); | ||
| 1227 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
| 1228 | break; | ||
| 1229 | case RPCRDMA_MTHCAFMR: | ||
| 1230 | rc = ib_dealloc_fmr(r->r.fmr); | ||
| 1231 | if (rc) | ||
| 1232 | dprintk("RPC: %s:" | ||
| 1233 | " ib_dealloc_fmr" | ||
| 1234 | " failed %i\n", | ||
| 1235 | __func__, rc); | ||
| 1236 | break; | ||
| 1237 | default: | ||
| 1238 | break; | ||
| 1239 | } | ||
| 1240 | } | ||
| 1241 | |||
| 1239 | kfree(buf->rb_pool); | 1242 | kfree(buf->rb_pool); |
| 1240 | } | 1243 | } |
| 1241 | 1244 | ||
| @@ -1299,21 +1302,17 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) | |||
| 1299 | int i; | 1302 | int i; |
| 1300 | unsigned long flags; | 1303 | unsigned long flags; |
| 1301 | 1304 | ||
| 1302 | BUG_ON(req->rl_nchunks != 0); | ||
| 1303 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1305 | spin_lock_irqsave(&buffers->rb_lock, flags); |
| 1304 | buffers->rb_send_bufs[--buffers->rb_send_index] = req; | 1306 | buffers->rb_send_bufs[--buffers->rb_send_index] = req; |
| 1305 | req->rl_niovs = 0; | 1307 | req->rl_niovs = 0; |
| 1306 | if (req->rl_reply) { | 1308 | if (req->rl_reply) { |
| 1307 | buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply; | 1309 | buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply; |
| 1308 | init_waitqueue_head(&req->rl_reply->rr_unbind); | ||
| 1309 | req->rl_reply->rr_func = NULL; | 1310 | req->rl_reply->rr_func = NULL; |
| 1310 | req->rl_reply = NULL; | 1311 | req->rl_reply = NULL; |
| 1311 | } | 1312 | } |
| 1312 | switch (ia->ri_memreg_strategy) { | 1313 | switch (ia->ri_memreg_strategy) { |
| 1313 | case RPCRDMA_FRMR: | 1314 | case RPCRDMA_FRMR: |
| 1314 | case RPCRDMA_MTHCAFMR: | 1315 | case RPCRDMA_MTHCAFMR: |
| 1315 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 1316 | case RPCRDMA_MEMWINDOWS: | ||
| 1317 | /* | 1316 | /* |
| 1318 | * Cycle mw's back in reverse order, and "spin" them. | 1317 | * Cycle mw's back in reverse order, and "spin" them. |
| 1319 | * This delays and scrambles reuse as much as possible. | 1318 | * This delays and scrambles reuse as much as possible. |
| @@ -1358,8 +1357,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) | |||
| 1358 | 1357 | ||
| 1359 | /* | 1358 | /* |
| 1360 | * Put reply buffers back into pool when not attached to | 1359 | * Put reply buffers back into pool when not attached to |
| 1361 | * request. This happens in error conditions, and when | 1360 | * request. This happens in error conditions. |
| 1362 | * aborting unbinds. Pre-decrement counter/array index. | ||
| 1363 | */ | 1361 | */ |
| 1364 | void | 1362 | void |
| 1365 | rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) | 1363 | rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) |
| @@ -1498,8 +1496,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | |||
| 1498 | seg1->mr_offset -= pageoff; /* start of page */ | 1496 | seg1->mr_offset -= pageoff; /* start of page */ |
| 1499 | seg1->mr_len += pageoff; | 1497 | seg1->mr_len += pageoff; |
| 1500 | len = -pageoff; | 1498 | len = -pageoff; |
| 1501 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | 1499 | if (*nsegs > ia->ri_max_frmr_depth) |
| 1502 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | 1500 | *nsegs = ia->ri_max_frmr_depth; |
| 1503 | for (page_no = i = 0; i < *nsegs;) { | 1501 | for (page_no = i = 0; i < *nsegs;) { |
| 1504 | rpcrdma_map_one(ia, seg, writing); | 1502 | rpcrdma_map_one(ia, seg, writing); |
| 1505 | pa = seg->mr_dma; | 1503 | pa = seg->mr_dma; |
| @@ -1536,10 +1534,6 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | |||
| 1536 | } else | 1534 | } else |
| 1537 | post_wr = &frmr_wr; | 1535 | post_wr = &frmr_wr; |
| 1538 | 1536 | ||
| 1539 | /* Bump the key */ | ||
| 1540 | key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); | ||
| 1541 | ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); | ||
| 1542 | |||
| 1543 | /* Prepare FRMR WR */ | 1537 | /* Prepare FRMR WR */ |
| 1544 | memset(&frmr_wr, 0, sizeof frmr_wr); | 1538 | memset(&frmr_wr, 0, sizeof frmr_wr); |
| 1545 | frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; | 1539 | frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; |
| @@ -1550,7 +1544,16 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | |||
| 1550 | frmr_wr.wr.fast_reg.page_list_len = page_no; | 1544 | frmr_wr.wr.fast_reg.page_list_len = page_no; |
| 1551 | frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | 1545 | frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; |
| 1552 | frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; | 1546 | frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; |
| 1553 | BUG_ON(frmr_wr.wr.fast_reg.length < len); | 1547 | if (frmr_wr.wr.fast_reg.length < len) { |
| 1548 | while (seg1->mr_nsegs--) | ||
| 1549 | rpcrdma_unmap_one(ia, seg++); | ||
| 1550 | return -EIO; | ||
| 1551 | } | ||
| 1552 | |||
| 1553 | /* Bump the key */ | ||
| 1554 | key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); | ||
| 1555 | ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); | ||
| 1556 | |||
| 1554 | frmr_wr.wr.fast_reg.access_flags = (writing ? | 1557 | frmr_wr.wr.fast_reg.access_flags = (writing ? |
| 1555 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | 1558 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : |
| 1556 | IB_ACCESS_REMOTE_READ); | 1559 | IB_ACCESS_REMOTE_READ); |
| @@ -1661,135 +1664,6 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, | |||
| 1661 | return rc; | 1664 | return rc; |
| 1662 | } | 1665 | } |
| 1663 | 1666 | ||
| 1664 | static int | ||
| 1665 | rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, | ||
| 1666 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
| 1667 | struct rpcrdma_xprt *r_xprt) | ||
| 1668 | { | ||
| 1669 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
| 1670 | IB_ACCESS_REMOTE_READ); | ||
| 1671 | struct ib_mw_bind param; | ||
| 1672 | int rc; | ||
| 1673 | |||
| 1674 | *nsegs = 1; | ||
| 1675 | rpcrdma_map_one(ia, seg, writing); | ||
| 1676 | param.bind_info.mr = ia->ri_bind_mem; | ||
| 1677 | param.wr_id = 0ULL; /* no send cookie */ | ||
| 1678 | param.bind_info.addr = seg->mr_dma; | ||
| 1679 | param.bind_info.length = seg->mr_len; | ||
| 1680 | param.send_flags = 0; | ||
| 1681 | param.bind_info.mw_access_flags = mem_priv; | ||
| 1682 | |||
| 1683 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 1684 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
| 1685 | if (rc) { | ||
| 1686 | dprintk("RPC: %s: failed ib_bind_mw " | ||
| 1687 | "%u@0x%llx status %i\n", | ||
| 1688 | __func__, seg->mr_len, | ||
| 1689 | (unsigned long long)seg->mr_dma, rc); | ||
| 1690 | rpcrdma_unmap_one(ia, seg); | ||
| 1691 | } else { | ||
| 1692 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
| 1693 | seg->mr_base = param.bind_info.addr; | ||
| 1694 | seg->mr_nsegs = 1; | ||
| 1695 | } | ||
| 1696 | return rc; | ||
| 1697 | } | ||
| 1698 | |||
| 1699 | static int | ||
| 1700 | rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg, | ||
| 1701 | struct rpcrdma_ia *ia, | ||
| 1702 | struct rpcrdma_xprt *r_xprt, void **r) | ||
| 1703 | { | ||
| 1704 | struct ib_mw_bind param; | ||
| 1705 | LIST_HEAD(l); | ||
| 1706 | int rc; | ||
| 1707 | |||
| 1708 | BUG_ON(seg->mr_nsegs != 1); | ||
| 1709 | param.bind_info.mr = ia->ri_bind_mem; | ||
| 1710 | param.bind_info.addr = 0ULL; /* unbind */ | ||
| 1711 | param.bind_info.length = 0; | ||
| 1712 | param.bind_info.mw_access_flags = 0; | ||
| 1713 | if (*r) { | ||
| 1714 | param.wr_id = (u64) (unsigned long) *r; | ||
| 1715 | param.send_flags = IB_SEND_SIGNALED; | ||
| 1716 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
| 1717 | } else { | ||
| 1718 | param.wr_id = 0ULL; | ||
| 1719 | param.send_flags = 0; | ||
| 1720 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 1721 | } | ||
| 1722 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
| 1723 | rpcrdma_unmap_one(ia, seg); | ||
| 1724 | if (rc) | ||
| 1725 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
| 1726 | " status %i\n", __func__, rc); | ||
| 1727 | else | ||
| 1728 | *r = NULL; /* will upcall on completion */ | ||
| 1729 | return rc; | ||
| 1730 | } | ||
| 1731 | |||
| 1732 | static int | ||
| 1733 | rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg, | ||
| 1734 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
| 1735 | { | ||
| 1736 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
| 1737 | IB_ACCESS_REMOTE_READ); | ||
| 1738 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1739 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
| 1740 | int len, i, rc = 0; | ||
| 1741 | |||
| 1742 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
| 1743 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
| 1744 | for (len = 0, i = 0; i < *nsegs;) { | ||
| 1745 | rpcrdma_map_one(ia, seg, writing); | ||
| 1746 | ipb[i].addr = seg->mr_dma; | ||
| 1747 | ipb[i].size = seg->mr_len; | ||
| 1748 | len += seg->mr_len; | ||
| 1749 | ++seg; | ||
| 1750 | ++i; | ||
| 1751 | /* Check for holes */ | ||
| 1752 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
| 1753 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
| 1754 | break; | ||
| 1755 | } | ||
| 1756 | seg1->mr_base = seg1->mr_dma; | ||
| 1757 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
| 1758 | ipb, i, mem_priv, &seg1->mr_base); | ||
| 1759 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
| 1760 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
| 1761 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
| 1762 | "%u@0x%llx (%d)... status %i\n", | ||
| 1763 | __func__, len, | ||
| 1764 | (unsigned long long)seg1->mr_dma, i, rc); | ||
| 1765 | while (i--) | ||
| 1766 | rpcrdma_unmap_one(ia, --seg); | ||
| 1767 | } else { | ||
| 1768 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
| 1769 | seg1->mr_nsegs = i; | ||
| 1770 | seg1->mr_len = len; | ||
| 1771 | } | ||
| 1772 | *nsegs = i; | ||
| 1773 | return rc; | ||
| 1774 | } | ||
| 1775 | |||
| 1776 | static int | ||
| 1777 | rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg, | ||
| 1778 | struct rpcrdma_ia *ia) | ||
| 1779 | { | ||
| 1780 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1781 | int rc; | ||
| 1782 | |||
| 1783 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | ||
| 1784 | seg1->mr_chunk.rl_mr = NULL; | ||
| 1785 | while (seg1->mr_nsegs--) | ||
| 1786 | rpcrdma_unmap_one(ia, seg++); | ||
| 1787 | if (rc) | ||
| 1788 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
| 1789 | " status %i\n", __func__, rc); | ||
| 1790 | return rc; | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | int | 1667 | int |
| 1794 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | 1668 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, |
| 1795 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | 1669 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) |
| @@ -1819,16 +1693,8 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | |||
| 1819 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); | 1693 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); |
| 1820 | break; | 1694 | break; |
| 1821 | 1695 | ||
| 1822 | /* Registration using memory windows */ | ||
| 1823 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 1824 | case RPCRDMA_MEMWINDOWS: | ||
| 1825 | rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt); | ||
| 1826 | break; | ||
| 1827 | |||
| 1828 | /* Default registration each time */ | ||
| 1829 | default: | 1696 | default: |
| 1830 | rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia); | 1697 | return -1; |
| 1831 | break; | ||
| 1832 | } | 1698 | } |
| 1833 | if (rc) | 1699 | if (rc) |
| 1834 | return -1; | 1700 | return -1; |
| @@ -1838,7 +1704,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | |||
| 1838 | 1704 | ||
| 1839 | int | 1705 | int |
| 1840 | rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | 1706 | rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, |
| 1841 | struct rpcrdma_xprt *r_xprt, void *r) | 1707 | struct rpcrdma_xprt *r_xprt) |
| 1842 | { | 1708 | { |
| 1843 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1709 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
| 1844 | int nsegs = seg->mr_nsegs, rc; | 1710 | int nsegs = seg->mr_nsegs, rc; |
| @@ -1847,9 +1713,7 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
| 1847 | 1713 | ||
| 1848 | #if RPCRDMA_PERSISTENT_REGISTRATION | 1714 | #if RPCRDMA_PERSISTENT_REGISTRATION |
| 1849 | case RPCRDMA_ALLPHYSICAL: | 1715 | case RPCRDMA_ALLPHYSICAL: |
| 1850 | BUG_ON(nsegs != 1); | ||
| 1851 | rpcrdma_unmap_one(ia, seg); | 1716 | rpcrdma_unmap_one(ia, seg); |
| 1852 | rc = 0; | ||
| 1853 | break; | 1717 | break; |
| 1854 | #endif | 1718 | #endif |
| 1855 | 1719 | ||
| @@ -1861,21 +1725,9 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
| 1861 | rc = rpcrdma_deregister_fmr_external(seg, ia); | 1725 | rc = rpcrdma_deregister_fmr_external(seg, ia); |
| 1862 | break; | 1726 | break; |
| 1863 | 1727 | ||
| 1864 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 1865 | case RPCRDMA_MEMWINDOWS: | ||
| 1866 | rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r); | ||
| 1867 | break; | ||
| 1868 | |||
| 1869 | default: | 1728 | default: |
| 1870 | rc = rpcrdma_deregister_default_external(seg, ia); | ||
| 1871 | break; | 1729 | break; |
| 1872 | } | 1730 | } |
| 1873 | if (r) { | ||
| 1874 | struct rpcrdma_rep *rep = r; | ||
| 1875 | void (*func)(struct rpcrdma_rep *) = rep->rr_func; | ||
| 1876 | rep->rr_func = NULL; | ||
| 1877 | func(rep); /* dereg done, callback now */ | ||
| 1878 | } | ||
| 1879 | return nsegs; | 1731 | return nsegs; |
| 1880 | } | 1732 | } |
| 1881 | 1733 | ||
| @@ -1950,7 +1802,6 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, | |||
| 1950 | ib_dma_sync_single_for_cpu(ia->ri_id->device, | 1802 | ib_dma_sync_single_for_cpu(ia->ri_id->device, |
| 1951 | rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); | 1803 | rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); |
| 1952 | 1804 | ||
| 1953 | DECR_CQCOUNT(ep); | ||
| 1954 | rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); | 1805 | rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); |
| 1955 | 1806 | ||
| 1956 | if (rc) | 1807 | if (rc) |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index cc1445dc1d1a..89e7cd479705 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
| @@ -43,6 +43,7 @@ | |||
| 43 | #include <linux/wait.h> /* wait_queue_head_t, etc */ | 43 | #include <linux/wait.h> /* wait_queue_head_t, etc */ |
| 44 | #include <linux/spinlock.h> /* spinlock_t, etc */ | 44 | #include <linux/spinlock.h> /* spinlock_t, etc */ |
| 45 | #include <linux/atomic.h> /* atomic_t, etc */ | 45 | #include <linux/atomic.h> /* atomic_t, etc */ |
| 46 | #include <linux/workqueue.h> /* struct work_struct */ | ||
| 46 | 47 | ||
| 47 | #include <rdma/rdma_cm.h> /* RDMA connection api */ | 48 | #include <rdma/rdma_cm.h> /* RDMA connection api */ |
| 48 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ | 49 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ |
| @@ -66,18 +67,21 @@ struct rpcrdma_ia { | |||
| 66 | struct completion ri_done; | 67 | struct completion ri_done; |
| 67 | int ri_async_rc; | 68 | int ri_async_rc; |
| 68 | enum rpcrdma_memreg ri_memreg_strategy; | 69 | enum rpcrdma_memreg ri_memreg_strategy; |
| 70 | unsigned int ri_max_frmr_depth; | ||
| 69 | }; | 71 | }; |
| 70 | 72 | ||
| 71 | /* | 73 | /* |
| 72 | * RDMA Endpoint -- one per transport instance | 74 | * RDMA Endpoint -- one per transport instance |
| 73 | */ | 75 | */ |
| 74 | 76 | ||
| 77 | #define RPCRDMA_WC_BUDGET (128) | ||
| 78 | #define RPCRDMA_POLLSIZE (16) | ||
| 79 | |||
| 75 | struct rpcrdma_ep { | 80 | struct rpcrdma_ep { |
| 76 | atomic_t rep_cqcount; | 81 | atomic_t rep_cqcount; |
| 77 | int rep_cqinit; | 82 | int rep_cqinit; |
| 78 | int rep_connected; | 83 | int rep_connected; |
| 79 | struct rpcrdma_ia *rep_ia; | 84 | struct rpcrdma_ia *rep_ia; |
| 80 | struct ib_cq *rep_cq; | ||
| 81 | struct ib_qp_init_attr rep_attr; | 85 | struct ib_qp_init_attr rep_attr; |
| 82 | wait_queue_head_t rep_connect_wait; | 86 | wait_queue_head_t rep_connect_wait; |
| 83 | struct ib_sge rep_pad; /* holds zeroed pad */ | 87 | struct ib_sge rep_pad; /* holds zeroed pad */ |
| @@ -86,6 +90,9 @@ struct rpcrdma_ep { | |||
| 86 | struct rpc_xprt *rep_xprt; /* for rep_func */ | 90 | struct rpc_xprt *rep_xprt; /* for rep_func */ |
| 87 | struct rdma_conn_param rep_remote_cma; | 91 | struct rdma_conn_param rep_remote_cma; |
| 88 | struct sockaddr_storage rep_remote_addr; | 92 | struct sockaddr_storage rep_remote_addr; |
| 93 | struct delayed_work rep_connect_worker; | ||
| 94 | struct ib_wc rep_send_wcs[RPCRDMA_POLLSIZE]; | ||
| 95 | struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE]; | ||
| 89 | }; | 96 | }; |
| 90 | 97 | ||
| 91 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) | 98 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) |
| @@ -124,7 +131,6 @@ struct rpcrdma_rep { | |||
| 124 | struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ | 131 | struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ |
| 125 | void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ | 132 | void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ |
| 126 | struct list_head rr_list; /* tasklet list */ | 133 | struct list_head rr_list; /* tasklet list */ |
| 127 | wait_queue_head_t rr_unbind; /* optional unbind wait */ | ||
| 128 | struct ib_sge rr_iov; /* for posting */ | 134 | struct ib_sge rr_iov; /* for posting */ |
| 129 | struct ib_mr *rr_handle; /* handle for mem in rr_iov */ | 135 | struct ib_mr *rr_handle; /* handle for mem in rr_iov */ |
| 130 | char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ | 136 | char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ |
| @@ -159,7 +165,6 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ | |||
| 159 | struct ib_mr *rl_mr; /* if registered directly */ | 165 | struct ib_mr *rl_mr; /* if registered directly */ |
| 160 | struct rpcrdma_mw { /* if registered from region */ | 166 | struct rpcrdma_mw { /* if registered from region */ |
| 161 | union { | 167 | union { |
| 162 | struct ib_mw *mw; | ||
| 163 | struct ib_fmr *fmr; | 168 | struct ib_fmr *fmr; |
| 164 | struct { | 169 | struct { |
| 165 | struct ib_fast_reg_page_list *fr_pgl; | 170 | struct ib_fast_reg_page_list *fr_pgl; |
| @@ -207,7 +212,6 @@ struct rpcrdma_req { | |||
| 207 | struct rpcrdma_buffer { | 212 | struct rpcrdma_buffer { |
| 208 | spinlock_t rb_lock; /* protects indexes */ | 213 | spinlock_t rb_lock; /* protects indexes */ |
| 209 | atomic_t rb_credits; /* most recent server credits */ | 214 | atomic_t rb_credits; /* most recent server credits */ |
| 210 | unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ | ||
| 211 | int rb_max_requests;/* client max requests */ | 215 | int rb_max_requests;/* client max requests */ |
| 212 | struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ | 216 | struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ |
| 213 | int rb_send_index; | 217 | int rb_send_index; |
| @@ -300,7 +304,7 @@ void rpcrdma_ia_close(struct rpcrdma_ia *); | |||
| 300 | */ | 304 | */ |
| 301 | int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, | 305 | int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, |
| 302 | struct rpcrdma_create_data_internal *); | 306 | struct rpcrdma_create_data_internal *); |
| 303 | int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); | 307 | void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); |
| 304 | int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); | 308 | int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
| 305 | int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); | 309 | int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
| 306 | 310 | ||
| @@ -330,11 +334,12 @@ int rpcrdma_deregister_internal(struct rpcrdma_ia *, | |||
| 330 | int rpcrdma_register_external(struct rpcrdma_mr_seg *, | 334 | int rpcrdma_register_external(struct rpcrdma_mr_seg *, |
| 331 | int, int, struct rpcrdma_xprt *); | 335 | int, int, struct rpcrdma_xprt *); |
| 332 | int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, | 336 | int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, |
| 333 | struct rpcrdma_xprt *, void *); | 337 | struct rpcrdma_xprt *); |
| 334 | 338 | ||
| 335 | /* | 339 | /* |
| 336 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c | 340 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c |
| 337 | */ | 341 | */ |
| 342 | void rpcrdma_connect_worker(struct work_struct *); | ||
| 338 | void rpcrdma_conn_func(struct rpcrdma_ep *); | 343 | void rpcrdma_conn_func(struct rpcrdma_ep *); |
| 339 | void rpcrdma_reply_handler(struct rpcrdma_rep *); | 344 | void rpcrdma_reply_handler(struct rpcrdma_rep *); |
| 340 | 345 | ||
