aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorTrond Myklebust <trond.myklebust@primarydata.com>2015-02-03 11:53:18 -0500
committerTrond Myklebust <trond.myklebust@primarydata.com>2015-02-03 11:54:58 -0500
commitcc3ea893cbb386e657d775f299f61bb9ba3bc435 (patch)
treefd17f7af86fbe30d9c4c9d02102b3cbd02e90ca6 /net
parentc7c545d4a34872f4a3d710e22f21fb61f7258706 (diff)
parenta0a1d50cd1e80652142af5cddcde500d06c71bdd (diff)
Merge tag 'nfs-rdma-for-3.20' of git://git.linux-nfs.org/projects/anna/nfs-rdma
NFS: Client side changes for RDMA These patches improve the scalability of the NFSoRDMA client and take large variables off of the stack. Additionally, the GFP_* flags are updated to match what TCP uses. Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com> * tag 'nfs-rdma-for-3.20' of git://git.linux-nfs.org/projects/anna/nfs-rdma: (21 commits) xprtrdma: Update the GFP flags used in xprt_rdma_allocate() xprtrdma: Clean up after adding regbuf management xprtrdma: Allocate zero pad separately from rpcrdma_buffer xprtrdma: Allocate RPC/RDMA receive buffer separately from struct rpcrdma_rep xprtrdma: Allocate RPC/RDMA send buffer separately from struct rpcrdma_req xprtrdma: Allocate RPC send buffer separately from struct rpcrdma_req xprtrdma: Add struct rpcrdma_regbuf and helpers xprtrdma: Refactor rpcrdma_buffer_create() and rpcrdma_buffer_destroy() xprtrdma: Simplify synopsis of rpcrdma_buffer_create() xprtrdma: Take struct ib_qp_attr and ib_qp_init_attr off the stack xprtrdma: Take struct ib_device_attr off the stack xprtrdma: Free the pd if ib_query_qp() fails xprtrdma: Remove rpcrdma_ep::rep_func and ::rep_xprt xprtrdma: Move credit update to RPC reply handler xprtrdma: Remove rl_mr field, and the mr_chunk union xprtrdma: Remove rpcrdma_ep::rep_ia xprtrdma: Rename "xprt" and "rdma_connect" fields in struct rpcrdma_xprt xprtrdma: Clean up hdrlen xprtrdma: Display XIDs in host byte order xprtrdma: Modernize htonl and ntohl ...
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c108
-rw-r--r--net/sunrpc/xprtrdma/transport.c182
-rw-r--r--net/sunrpc/xprtrdma/verbs.c411
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h111
4 files changed, 468 insertions, 344 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index df01d124936c..7e9acd9361c5 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -209,9 +209,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
209 if (cur_rchunk) { /* read */ 209 if (cur_rchunk) { /* read */
210 cur_rchunk->rc_discrim = xdr_one; 210 cur_rchunk->rc_discrim = xdr_one;
211 /* all read chunks have the same "position" */ 211 /* all read chunks have the same "position" */
212 cur_rchunk->rc_position = htonl(pos); 212 cur_rchunk->rc_position = cpu_to_be32(pos);
213 cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey); 213 cur_rchunk->rc_target.rs_handle =
214 cur_rchunk->rc_target.rs_length = htonl(seg->mr_len); 214 cpu_to_be32(seg->mr_rkey);
215 cur_rchunk->rc_target.rs_length =
216 cpu_to_be32(seg->mr_len);
215 xdr_encode_hyper( 217 xdr_encode_hyper(
216 (__be32 *)&cur_rchunk->rc_target.rs_offset, 218 (__be32 *)&cur_rchunk->rc_target.rs_offset,
217 seg->mr_base); 219 seg->mr_base);
@@ -222,8 +224,10 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
222 cur_rchunk++; 224 cur_rchunk++;
223 r_xprt->rx_stats.read_chunk_count++; 225 r_xprt->rx_stats.read_chunk_count++;
224 } else { /* write/reply */ 226 } else { /* write/reply */
225 cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey); 227 cur_wchunk->wc_target.rs_handle =
226 cur_wchunk->wc_target.rs_length = htonl(seg->mr_len); 228 cpu_to_be32(seg->mr_rkey);
229 cur_wchunk->wc_target.rs_length =
230 cpu_to_be32(seg->mr_len);
227 xdr_encode_hyper( 231 xdr_encode_hyper(
228 (__be32 *)&cur_wchunk->wc_target.rs_offset, 232 (__be32 *)&cur_wchunk->wc_target.rs_offset,
229 seg->mr_base); 233 seg->mr_base);
@@ -257,7 +261,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
257 *iptr++ = xdr_zero; /* encode a NULL reply chunk */ 261 *iptr++ = xdr_zero; /* encode a NULL reply chunk */
258 } else { 262 } else {
259 warray->wc_discrim = xdr_one; 263 warray->wc_discrim = xdr_one;
260 warray->wc_nchunks = htonl(nchunks); 264 warray->wc_nchunks = cpu_to_be32(nchunks);
261 iptr = (__be32 *) cur_wchunk; 265 iptr = (__be32 *) cur_wchunk;
262 if (type == rpcrdma_writech) { 266 if (type == rpcrdma_writech) {
263 *iptr++ = xdr_zero; /* finish the write chunk list */ 267 *iptr++ = xdr_zero; /* finish the write chunk list */
@@ -290,7 +294,7 @@ ssize_t
290rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result) 294rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result)
291{ 295{
292 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 296 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
293 struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)req->rl_base; 297 struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf);
294 298
295 if (req->rl_rtype != rpcrdma_noch) 299 if (req->rl_rtype != rpcrdma_noch)
296 result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, 300 result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
@@ -402,13 +406,12 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
402 base = rqst->rq_svec[0].iov_base; 406 base = rqst->rq_svec[0].iov_base;
403 rpclen = rqst->rq_svec[0].iov_len; 407 rpclen = rqst->rq_svec[0].iov_len;
404 408
405 /* build RDMA header in private area at front */ 409 headerp = rdmab_to_msg(req->rl_rdmabuf);
406 headerp = (struct rpcrdma_msg *) req->rl_base; 410 /* don't byte-swap XID, it's already done in request */
407 /* don't htonl XID, it's already done in request */
408 headerp->rm_xid = rqst->rq_xid; 411 headerp->rm_xid = rqst->rq_xid;
409 headerp->rm_vers = xdr_one; 412 headerp->rm_vers = rpcrdma_version;
410 headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests); 413 headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_max_requests);
411 headerp->rm_type = htonl(RDMA_MSG); 414 headerp->rm_type = rdma_msg;
412 415
413 /* 416 /*
414 * Chunks needed for results? 417 * Chunks needed for results?
@@ -468,7 +471,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
468 return -EIO; 471 return -EIO;
469 } 472 }
470 473
471 hdrlen = 28; /*sizeof *headerp;*/ 474 hdrlen = RPCRDMA_HDRLEN_MIN;
472 padlen = 0; 475 padlen = 0;
473 476
474 /* 477 /*
@@ -482,11 +485,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
482 RPCRDMA_INLINE_PAD_VALUE(rqst)); 485 RPCRDMA_INLINE_PAD_VALUE(rqst));
483 486
484 if (padlen) { 487 if (padlen) {
485 headerp->rm_type = htonl(RDMA_MSGP); 488 headerp->rm_type = rdma_msgp;
486 headerp->rm_body.rm_padded.rm_align = 489 headerp->rm_body.rm_padded.rm_align =
487 htonl(RPCRDMA_INLINE_PAD_VALUE(rqst)); 490 cpu_to_be32(RPCRDMA_INLINE_PAD_VALUE(rqst));
488 headerp->rm_body.rm_padded.rm_thresh = 491 headerp->rm_body.rm_padded.rm_thresh =
489 htonl(RPCRDMA_INLINE_PAD_THRESH); 492 cpu_to_be32(RPCRDMA_INLINE_PAD_THRESH);
490 headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero; 493 headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero;
491 headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; 494 headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
492 headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; 495 headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
@@ -524,7 +527,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
524 dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" 527 dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
525 " headerp 0x%p base 0x%p lkey 0x%x\n", 528 " headerp 0x%p base 0x%p lkey 0x%x\n",
526 __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen, 529 __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen,
527 headerp, base, req->rl_iov.lkey); 530 headerp, base, rdmab_lkey(req->rl_rdmabuf));
528 531
529 /* 532 /*
530 * initialize send_iov's - normally only two: rdma chunk header and 533 * initialize send_iov's - normally only two: rdma chunk header and
@@ -533,26 +536,26 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
533 * header and any write data. In all non-rdma cases, any following 536 * header and any write data. In all non-rdma cases, any following
534 * data has been copied into the RPC header buffer. 537 * data has been copied into the RPC header buffer.
535 */ 538 */
536 req->rl_send_iov[0].addr = req->rl_iov.addr; 539 req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
537 req->rl_send_iov[0].length = hdrlen; 540 req->rl_send_iov[0].length = hdrlen;
538 req->rl_send_iov[0].lkey = req->rl_iov.lkey; 541 req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf);
539 542
540 req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base); 543 req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf);
541 req->rl_send_iov[1].length = rpclen; 544 req->rl_send_iov[1].length = rpclen;
542 req->rl_send_iov[1].lkey = req->rl_iov.lkey; 545 req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf);
543 546
544 req->rl_niovs = 2; 547 req->rl_niovs = 2;
545 548
546 if (padlen) { 549 if (padlen) {
547 struct rpcrdma_ep *ep = &r_xprt->rx_ep; 550 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
548 551
549 req->rl_send_iov[2].addr = ep->rep_pad.addr; 552 req->rl_send_iov[2].addr = rdmab_addr(ep->rep_padbuf);
550 req->rl_send_iov[2].length = padlen; 553 req->rl_send_iov[2].length = padlen;
551 req->rl_send_iov[2].lkey = ep->rep_pad.lkey; 554 req->rl_send_iov[2].lkey = rdmab_lkey(ep->rep_padbuf);
552 555
553 req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen; 556 req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen;
554 req->rl_send_iov[3].length = rqst->rq_slen - rpclen; 557 req->rl_send_iov[3].length = rqst->rq_slen - rpclen;
555 req->rl_send_iov[3].lkey = req->rl_iov.lkey; 558 req->rl_send_iov[3].lkey = rdmab_lkey(req->rl_sendbuf);
556 559
557 req->rl_niovs = 4; 560 req->rl_niovs = 4;
558 } 561 }
@@ -569,8 +572,9 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
569{ 572{
570 unsigned int i, total_len; 573 unsigned int i, total_len;
571 struct rpcrdma_write_chunk *cur_wchunk; 574 struct rpcrdma_write_chunk *cur_wchunk;
575 char *base = (char *)rdmab_to_msg(rep->rr_rdmabuf);
572 576
573 i = ntohl(**iptrp); /* get array count */ 577 i = be32_to_cpu(**iptrp);
574 if (i > max) 578 if (i > max)
575 return -1; 579 return -1;
576 cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1); 580 cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1);
@@ -582,11 +586,11 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
582 xdr_decode_hyper((__be32 *)&seg->rs_offset, &off); 586 xdr_decode_hyper((__be32 *)&seg->rs_offset, &off);
583 dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n", 587 dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n",
584 __func__, 588 __func__,
585 ntohl(seg->rs_length), 589 be32_to_cpu(seg->rs_length),
586 (unsigned long long)off, 590 (unsigned long long)off,
587 ntohl(seg->rs_handle)); 591 be32_to_cpu(seg->rs_handle));
588 } 592 }
589 total_len += ntohl(seg->rs_length); 593 total_len += be32_to_cpu(seg->rs_length);
590 ++cur_wchunk; 594 ++cur_wchunk;
591 } 595 }
592 /* check and adjust for properly terminated write chunk */ 596 /* check and adjust for properly terminated write chunk */
@@ -596,7 +600,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
596 return -1; 600 return -1;
597 cur_wchunk = (struct rpcrdma_write_chunk *) w; 601 cur_wchunk = (struct rpcrdma_write_chunk *) w;
598 } 602 }
599 if ((char *) cur_wchunk > rep->rr_base + rep->rr_len) 603 if ((char *)cur_wchunk > base + rep->rr_len)
600 return -1; 604 return -1;
601 605
602 *iptrp = (__be32 *) cur_wchunk; 606 *iptrp = (__be32 *) cur_wchunk;
@@ -691,7 +695,9 @@ rpcrdma_connect_worker(struct work_struct *work)
691{ 695{
692 struct rpcrdma_ep *ep = 696 struct rpcrdma_ep *ep =
693 container_of(work, struct rpcrdma_ep, rep_connect_worker.work); 697 container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
694 struct rpc_xprt *xprt = ep->rep_xprt; 698 struct rpcrdma_xprt *r_xprt =
699 container_of(ep, struct rpcrdma_xprt, rx_ep);
700 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
695 701
696 spin_lock_bh(&xprt->transport_lock); 702 spin_lock_bh(&xprt->transport_lock);
697 if (++xprt->connect_cookie == 0) /* maintain a reserved value */ 703 if (++xprt->connect_cookie == 0) /* maintain a reserved value */
@@ -732,7 +738,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
732 struct rpc_xprt *xprt = rep->rr_xprt; 738 struct rpc_xprt *xprt = rep->rr_xprt;
733 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 739 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
734 __be32 *iptr; 740 __be32 *iptr;
735 int rdmalen, status; 741 int credits, rdmalen, status;
736 unsigned long cwnd; 742 unsigned long cwnd;
737 743
738 /* Check status. If bad, signal disconnect and return rep to pool */ 744 /* Check status. If bad, signal disconnect and return rep to pool */
@@ -744,14 +750,14 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
744 } 750 }
745 return; 751 return;
746 } 752 }
747 if (rep->rr_len < 28) { 753 if (rep->rr_len < RPCRDMA_HDRLEN_MIN) {
748 dprintk("RPC: %s: short/invalid reply\n", __func__); 754 dprintk("RPC: %s: short/invalid reply\n", __func__);
749 goto repost; 755 goto repost;
750 } 756 }
751 headerp = (struct rpcrdma_msg *) rep->rr_base; 757 headerp = rdmab_to_msg(rep->rr_rdmabuf);
752 if (headerp->rm_vers != xdr_one) { 758 if (headerp->rm_vers != rpcrdma_version) {
753 dprintk("RPC: %s: invalid version %d\n", 759 dprintk("RPC: %s: invalid version %d\n",
754 __func__, ntohl(headerp->rm_vers)); 760 __func__, be32_to_cpu(headerp->rm_vers));
755 goto repost; 761 goto repost;
756 } 762 }
757 763
@@ -762,7 +768,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
762 spin_unlock(&xprt->transport_lock); 768 spin_unlock(&xprt->transport_lock);
763 dprintk("RPC: %s: reply 0x%p failed " 769 dprintk("RPC: %s: reply 0x%p failed "
764 "to match any request xid 0x%08x len %d\n", 770 "to match any request xid 0x%08x len %d\n",
765 __func__, rep, headerp->rm_xid, rep->rr_len); 771 __func__, rep, be32_to_cpu(headerp->rm_xid),
772 rep->rr_len);
766repost: 773repost:
767 r_xprt->rx_stats.bad_reply_count++; 774 r_xprt->rx_stats.bad_reply_count++;
768 rep->rr_func = rpcrdma_reply_handler; 775 rep->rr_func = rpcrdma_reply_handler;
@@ -778,13 +785,14 @@ repost:
778 spin_unlock(&xprt->transport_lock); 785 spin_unlock(&xprt->transport_lock);
779 dprintk("RPC: %s: duplicate reply 0x%p to RPC " 786 dprintk("RPC: %s: duplicate reply 0x%p to RPC "
780 "request 0x%p: xid 0x%08x\n", __func__, rep, req, 787 "request 0x%p: xid 0x%08x\n", __func__, rep, req,
781 headerp->rm_xid); 788 be32_to_cpu(headerp->rm_xid));
782 goto repost; 789 goto repost;
783 } 790 }
784 791
785 dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" 792 dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
786 " RPC request 0x%p xid 0x%08x\n", 793 " RPC request 0x%p xid 0x%08x\n",
787 __func__, rep, req, rqst, headerp->rm_xid); 794 __func__, rep, req, rqst,
795 be32_to_cpu(headerp->rm_xid));
788 796
789 /* from here on, the reply is no longer an orphan */ 797 /* from here on, the reply is no longer an orphan */
790 req->rl_reply = rep; 798 req->rl_reply = rep;
@@ -793,7 +801,7 @@ repost:
793 /* check for expected message types */ 801 /* check for expected message types */
794 /* The order of some of these tests is important. */ 802 /* The order of some of these tests is important. */
795 switch (headerp->rm_type) { 803 switch (headerp->rm_type) {
796 case htonl(RDMA_MSG): 804 case rdma_msg:
797 /* never expect read chunks */ 805 /* never expect read chunks */
798 /* never expect reply chunks (two ways to check) */ 806 /* never expect reply chunks (two ways to check) */
799 /* never expect write chunks without having offered RDMA */ 807 /* never expect write chunks without having offered RDMA */
@@ -824,22 +832,24 @@ repost:
824 } else { 832 } else {
825 /* else ordinary inline */ 833 /* else ordinary inline */
826 rdmalen = 0; 834 rdmalen = 0;
827 iptr = (__be32 *)((unsigned char *)headerp + 28); 835 iptr = (__be32 *)((unsigned char *)headerp +
828 rep->rr_len -= 28; /*sizeof *headerp;*/ 836 RPCRDMA_HDRLEN_MIN);
837 rep->rr_len -= RPCRDMA_HDRLEN_MIN;
829 status = rep->rr_len; 838 status = rep->rr_len;
830 } 839 }
831 /* Fix up the rpc results for upper layer */ 840 /* Fix up the rpc results for upper layer */
832 rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen); 841 rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen);
833 break; 842 break;
834 843
835 case htonl(RDMA_NOMSG): 844 case rdma_nomsg:
836 /* never expect read or write chunks, always reply chunks */ 845 /* never expect read or write chunks, always reply chunks */
837 if (headerp->rm_body.rm_chunks[0] != xdr_zero || 846 if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
838 headerp->rm_body.rm_chunks[1] != xdr_zero || 847 headerp->rm_body.rm_chunks[1] != xdr_zero ||
839 headerp->rm_body.rm_chunks[2] != xdr_one || 848 headerp->rm_body.rm_chunks[2] != xdr_one ||
840 req->rl_nchunks == 0) 849 req->rl_nchunks == 0)
841 goto badheader; 850 goto badheader;
842 iptr = (__be32 *)((unsigned char *)headerp + 28); 851 iptr = (__be32 *)((unsigned char *)headerp +
852 RPCRDMA_HDRLEN_MIN);
843 rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr); 853 rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr);
844 if (rdmalen < 0) 854 if (rdmalen < 0)
845 goto badheader; 855 goto badheader;
@@ -853,7 +863,7 @@ badheader:
853 dprintk("%s: invalid rpcrdma reply header (type %d):" 863 dprintk("%s: invalid rpcrdma reply header (type %d):"
854 " chunks[012] == %d %d %d" 864 " chunks[012] == %d %d %d"
855 " expected chunks <= %d\n", 865 " expected chunks <= %d\n",
856 __func__, ntohl(headerp->rm_type), 866 __func__, be32_to_cpu(headerp->rm_type),
857 headerp->rm_body.rm_chunks[0], 867 headerp->rm_body.rm_chunks[0],
858 headerp->rm_body.rm_chunks[1], 868 headerp->rm_body.rm_chunks[1],
859 headerp->rm_body.rm_chunks[2], 869 headerp->rm_body.rm_chunks[2],
@@ -863,8 +873,14 @@ badheader:
863 break; 873 break;
864 } 874 }
865 875
876 credits = be32_to_cpu(headerp->rm_credit);
877 if (credits == 0)
878 credits = 1; /* don't deadlock */
879 else if (credits > r_xprt->rx_buf.rb_max_requests)
880 credits = r_xprt->rx_buf.rb_max_requests;
881
866 cwnd = xprt->cwnd; 882 cwnd = xprt->cwnd;
867 xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT; 883 xprt->cwnd = credits << RPC_CWNDSHIFT;
868 if (xprt->cwnd > cwnd) 884 if (xprt->cwnd > cwnd)
869 xprt_release_rqst_cong(rqst->rq_task); 885 xprt_release_rqst_cong(rqst->rq_task);
870 886
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index bbd6155d3e34..2e192baa59f3 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -200,9 +200,9 @@ xprt_rdma_free_addresses(struct rpc_xprt *xprt)
200static void 200static void
201xprt_rdma_connect_worker(struct work_struct *work) 201xprt_rdma_connect_worker(struct work_struct *work)
202{ 202{
203 struct rpcrdma_xprt *r_xprt = 203 struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
204 container_of(work, struct rpcrdma_xprt, rdma_connect.work); 204 rx_connect_worker.work);
205 struct rpc_xprt *xprt = &r_xprt->xprt; 205 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
206 int rc = 0; 206 int rc = 0;
207 207
208 xprt_clear_connected(xprt); 208 xprt_clear_connected(xprt);
@@ -235,7 +235,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
235 235
236 dprintk("RPC: %s: called\n", __func__); 236 dprintk("RPC: %s: called\n", __func__);
237 237
238 cancel_delayed_work_sync(&r_xprt->rdma_connect); 238 cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
239 239
240 xprt_clear_connected(xprt); 240 xprt_clear_connected(xprt);
241 241
@@ -364,8 +364,7 @@ xprt_setup_rdma(struct xprt_create *args)
364 * any inline data. Also specify any padding which will be provided 364 * any inline data. Also specify any padding which will be provided
365 * from a preregistered zero buffer. 365 * from a preregistered zero buffer.
366 */ 366 */
367 rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia, 367 rc = rpcrdma_buffer_create(new_xprt);
368 &new_xprt->rx_data);
369 if (rc) 368 if (rc)
370 goto out3; 369 goto out3;
371 370
@@ -374,9 +373,8 @@ xprt_setup_rdma(struct xprt_create *args)
374 * connection loss notification is async. We also catch connection loss 373 * connection loss notification is async. We also catch connection loss
375 * when reaping receives. 374 * when reaping receives.
376 */ 375 */
377 INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker); 376 INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
378 new_ep->rep_func = rpcrdma_conn_func; 377 xprt_rdma_connect_worker);
379 new_ep->rep_xprt = xprt;
380 378
381 xprt_rdma_format_addresses(xprt); 379 xprt_rdma_format_addresses(xprt);
382 xprt->max_payload = rpcrdma_max_payload(new_xprt); 380 xprt->max_payload = rpcrdma_max_payload(new_xprt);
@@ -434,94 +432,101 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
434 432
435 if (r_xprt->rx_ep.rep_connected != 0) { 433 if (r_xprt->rx_ep.rep_connected != 0) {
436 /* Reconnect */ 434 /* Reconnect */
437 schedule_delayed_work(&r_xprt->rdma_connect, 435 schedule_delayed_work(&r_xprt->rx_connect_worker,
438 xprt->reestablish_timeout); 436 xprt->reestablish_timeout);
439 xprt->reestablish_timeout <<= 1; 437 xprt->reestablish_timeout <<= 1;
440 if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) 438 if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
441 xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; 439 xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
442 else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) 440 else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
443 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; 441 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
444 } else { 442 } else {
445 schedule_delayed_work(&r_xprt->rdma_connect, 0); 443 schedule_delayed_work(&r_xprt->rx_connect_worker, 0);
446 if (!RPC_IS_ASYNC(task)) 444 if (!RPC_IS_ASYNC(task))
447 flush_delayed_work(&r_xprt->rdma_connect); 445 flush_delayed_work(&r_xprt->rx_connect_worker);
448 } 446 }
449} 447}
450 448
451/* 449/*
452 * The RDMA allocate/free functions need the task structure as a place 450 * The RDMA allocate/free functions need the task structure as a place
453 * to hide the struct rpcrdma_req, which is necessary for the actual send/recv 451 * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
454 * sequence. For this reason, the recv buffers are attached to send 452 * sequence.
455 * buffers for portions of the RPC. Note that the RPC layer allocates 453 *
456 * both send and receive buffers in the same call. We may register 454 * The RPC layer allocates both send and receive buffers in the same call
457 * the receive buffer portion when using reply chunks. 455 * (rq_send_buf and rq_rcv_buf are both part of a single contiguous buffer).
456 * We may register rq_rcv_buf when using reply chunks.
458 */ 457 */
459static void * 458static void *
460xprt_rdma_allocate(struct rpc_task *task, size_t size) 459xprt_rdma_allocate(struct rpc_task *task, size_t size)
461{ 460{
462 struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; 461 struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
463 struct rpcrdma_req *req, *nreq; 462 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
463 struct rpcrdma_regbuf *rb;
464 struct rpcrdma_req *req;
465 size_t min_size;
466 gfp_t flags;
464 467
465 req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); 468 req = rpcrdma_buffer_get(&r_xprt->rx_buf);
466 if (req == NULL) 469 if (req == NULL)
467 return NULL; 470 return NULL;
468 471
469 if (size > req->rl_size) { 472 flags = GFP_NOIO | __GFP_NOWARN;
470 dprintk("RPC: %s: size %zd too large for buffer[%zd]: " 473 if (RPC_IS_SWAPPER(task))
471 "prog %d vers %d proc %d\n", 474 flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
472 __func__, size, req->rl_size, 475
473 task->tk_client->cl_prog, task->tk_client->cl_vers, 476 if (req->rl_rdmabuf == NULL)
474 task->tk_msg.rpc_proc->p_proc); 477 goto out_rdmabuf;
475 /* 478 if (req->rl_sendbuf == NULL)
476 * Outgoing length shortage. Our inline write max must have 479 goto out_sendbuf;
477 * been configured to perform direct i/o. 480 if (size > req->rl_sendbuf->rg_size)
478 * 481 goto out_sendbuf;
479 * This is therefore a large metadata operation, and the 482
480 * allocate call was made on the maximum possible message, 483out:
481 * e.g. containing long filename(s) or symlink data. In
482 * fact, while these metadata operations *might* carry
483 * large outgoing payloads, they rarely *do*. However, we
484 * have to commit to the request here, so reallocate and
485 * register it now. The data path will never require this
486 * reallocation.
487 *
488 * If the allocation or registration fails, the RPC framework
489 * will (doggedly) retry.
490 */
491 if (task->tk_flags & RPC_TASK_SWAPPER)
492 nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
493 else
494 nreq = kmalloc(sizeof *req + size, GFP_NOFS);
495 if (nreq == NULL)
496 goto outfail;
497
498 if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia,
499 nreq->rl_base, size + sizeof(struct rpcrdma_req)
500 - offsetof(struct rpcrdma_req, rl_base),
501 &nreq->rl_handle, &nreq->rl_iov)) {
502 kfree(nreq);
503 goto outfail;
504 }
505 rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size;
506 nreq->rl_size = size;
507 nreq->rl_niovs = 0;
508 nreq->rl_nchunks = 0;
509 nreq->rl_buffer = (struct rpcrdma_buffer *)req;
510 nreq->rl_reply = req->rl_reply;
511 memcpy(nreq->rl_segments,
512 req->rl_segments, sizeof nreq->rl_segments);
513 /* flag the swap with an unused field */
514 nreq->rl_iov.length = 0;
515 req->rl_reply = NULL;
516 req = nreq;
517 }
518 dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); 484 dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
519 req->rl_connect_cookie = 0; /* our reserved value */ 485 req->rl_connect_cookie = 0; /* our reserved value */
520 return req->rl_xdr_buf; 486 return req->rl_sendbuf->rg_base;
521 487
522outfail: 488out_rdmabuf:
489 min_size = RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
490 rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, min_size, flags);
491 if (IS_ERR(rb))
492 goto out_fail;
493 req->rl_rdmabuf = rb;
494
495out_sendbuf:
496 /* XDR encoding and RPC/RDMA marshaling of this request has not
497 * yet occurred. Thus a lower bound is needed to prevent buffer
498 * overrun during marshaling.
499 *
500 * RPC/RDMA marshaling may choose to send payload bearing ops
501 * inline, if the result is smaller than the inline threshold.
502 * The value of the "size" argument accounts for header
503 * requirements but not for the payload in these cases.
504 *
505 * Likewise, allocate enough space to receive a reply up to the
506 * size of the inline threshold.
507 *
508 * It's unlikely that both the send header and the received
509 * reply will be large, but slush is provided here to allow
510 * flexibility when marshaling.
511 */
512 min_size = RPCRDMA_INLINE_READ_THRESHOLD(task->tk_rqstp);
513 min_size += RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
514 if (size < min_size)
515 size = min_size;
516
517 rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, size, flags);
518 if (IS_ERR(rb))
519 goto out_fail;
520 rb->rg_owner = req;
521
522 r_xprt->rx_stats.hardway_register_count += size;
523 rpcrdma_free_regbuf(&r_xprt->rx_ia, req->rl_sendbuf);
524 req->rl_sendbuf = rb;
525 goto out;
526
527out_fail:
523 rpcrdma_buffer_put(req); 528 rpcrdma_buffer_put(req);
524 rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; 529 r_xprt->rx_stats.failed_marshal_count++;
525 return NULL; 530 return NULL;
526} 531}
527 532
@@ -533,47 +538,24 @@ xprt_rdma_free(void *buffer)
533{ 538{
534 struct rpcrdma_req *req; 539 struct rpcrdma_req *req;
535 struct rpcrdma_xprt *r_xprt; 540 struct rpcrdma_xprt *r_xprt;
536 struct rpcrdma_rep *rep; 541 struct rpcrdma_regbuf *rb;
537 int i; 542 int i;
538 543
539 if (buffer == NULL) 544 if (buffer == NULL)
540 return; 545 return;
541 546
542 req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]); 547 rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]);
543 if (req->rl_iov.length == 0) { /* see allocate above */ 548 req = rb->rg_owner;
544 r_xprt = container_of(((struct rpcrdma_req *) req->rl_buffer)->rl_buffer, 549 r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
545 struct rpcrdma_xprt, rx_buf);
546 } else
547 r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
548 rep = req->rl_reply;
549 550
550 dprintk("RPC: %s: called on 0x%p%s\n", 551 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
551 __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
552 552
553 /*
554 * Finish the deregistration. The process is considered
555 * complete when the rr_func vector becomes NULL - this
556 * was put in place during rpcrdma_reply_handler() - the wait
557 * call below will not block if the dereg is "done". If
558 * interrupted, our framework will clean up.
559 */
560 for (i = 0; req->rl_nchunks;) { 553 for (i = 0; req->rl_nchunks;) {
561 --req->rl_nchunks; 554 --req->rl_nchunks;
562 i += rpcrdma_deregister_external( 555 i += rpcrdma_deregister_external(
563 &req->rl_segments[i], r_xprt); 556 &req->rl_segments[i], r_xprt);
564 } 557 }
565 558
566 if (req->rl_iov.length == 0) { /* see allocate above */
567 struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer;
568 oreq->rl_reply = req->rl_reply;
569 (void) rpcrdma_deregister_internal(&r_xprt->rx_ia,
570 req->rl_handle,
571 &req->rl_iov);
572 kfree(req);
573 req = oreq;
574 }
575
576 /* Put back request+reply buffers */
577 rpcrdma_buffer_put(req); 559 rpcrdma_buffer_put(req);
578} 560}
579 561
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index c98e40643910..124676c13780 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -49,6 +49,7 @@
49 49
50#include <linux/interrupt.h> 50#include <linux/interrupt.h>
51#include <linux/slab.h> 51#include <linux/slab.h>
52#include <linux/prefetch.h>
52#include <asm/bitops.h> 53#include <asm/bitops.h>
53 54
54#include "xprt_rdma.h" 55#include "xprt_rdma.h"
@@ -153,7 +154,7 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
153 event->device->name, context); 154 event->device->name, context);
154 if (ep->rep_connected == 1) { 155 if (ep->rep_connected == 1) {
155 ep->rep_connected = -EIO; 156 ep->rep_connected = -EIO;
156 ep->rep_func(ep); 157 rpcrdma_conn_func(ep);
157 wake_up_all(&ep->rep_connect_wait); 158 wake_up_all(&ep->rep_connect_wait);
158 } 159 }
159} 160}
@@ -168,23 +169,59 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
168 event->device->name, context); 169 event->device->name, context);
169 if (ep->rep_connected == 1) { 170 if (ep->rep_connected == 1) {
170 ep->rep_connected = -EIO; 171 ep->rep_connected = -EIO;
171 ep->rep_func(ep); 172 rpcrdma_conn_func(ep);
172 wake_up_all(&ep->rep_connect_wait); 173 wake_up_all(&ep->rep_connect_wait);
173 } 174 }
174} 175}
175 176
177static const char * const wc_status[] = {
178 "success",
179 "local length error",
180 "local QP operation error",
181 "local EE context operation error",
182 "local protection error",
183 "WR flushed",
184 "memory management operation error",
185 "bad response error",
186 "local access error",
187 "remote invalid request error",
188 "remote access error",
189 "remote operation error",
190 "transport retry counter exceeded",
191 "RNR retrycounter exceeded",
192 "local RDD violation error",
193 "remove invalid RD request",
194 "operation aborted",
195 "invalid EE context number",
196 "invalid EE context state",
197 "fatal error",
198 "response timeout error",
199 "general error",
200};
201
202#define COMPLETION_MSG(status) \
203 ((status) < ARRAY_SIZE(wc_status) ? \
204 wc_status[(status)] : "unexpected completion error")
205
176static void 206static void
177rpcrdma_sendcq_process_wc(struct ib_wc *wc) 207rpcrdma_sendcq_process_wc(struct ib_wc *wc)
178{ 208{
179 struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; 209 if (likely(wc->status == IB_WC_SUCCESS))
210 return;
180 211
181 dprintk("RPC: %s: frmr %p status %X opcode %d\n", 212 /* WARNING: Only wr_id and status are reliable at this point */
182 __func__, frmr, wc->status, wc->opcode); 213 if (wc->wr_id == 0ULL) {
214 if (wc->status != IB_WC_WR_FLUSH_ERR)
215 pr_err("RPC: %s: SEND: %s\n",
216 __func__, COMPLETION_MSG(wc->status));
217 } else {
218 struct rpcrdma_mw *r;
183 219
184 if (wc->wr_id == 0ULL) 220 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
185 return; 221 r->r.frmr.fr_state = FRMR_IS_STALE;
186 if (wc->status != IB_WC_SUCCESS) 222 pr_err("RPC: %s: frmr %p (stale): %s\n",
187 frmr->r.frmr.fr_state = FRMR_IS_STALE; 223 __func__, r, COMPLETION_MSG(wc->status));
224 }
188} 225}
189 226
190static int 227static int
@@ -248,33 +285,32 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
248 struct rpcrdma_rep *rep = 285 struct rpcrdma_rep *rep =
249 (struct rpcrdma_rep *)(unsigned long)wc->wr_id; 286 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
250 287
251 dprintk("RPC: %s: rep %p status %X opcode %X length %u\n", 288 /* WARNING: Only wr_id and status are reliable at this point */
252 __func__, rep, wc->status, wc->opcode, wc->byte_len); 289 if (wc->status != IB_WC_SUCCESS)
290 goto out_fail;
253 291
254 if (wc->status != IB_WC_SUCCESS) { 292 /* status == SUCCESS means all fields in wc are trustworthy */
255 rep->rr_len = ~0U;
256 goto out_schedule;
257 }
258 if (wc->opcode != IB_WC_RECV) 293 if (wc->opcode != IB_WC_RECV)
259 return; 294 return;
260 295
296 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
297 __func__, rep, wc->byte_len);
298
261 rep->rr_len = wc->byte_len; 299 rep->rr_len = wc->byte_len;
262 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, 300 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
263 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); 301 rdmab_addr(rep->rr_rdmabuf),
264 302 rep->rr_len, DMA_FROM_DEVICE);
265 if (rep->rr_len >= 16) { 303 prefetch(rdmab_to_msg(rep->rr_rdmabuf));
266 struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
267 unsigned int credits = ntohl(p->rm_credit);
268
269 if (credits == 0)
270 credits = 1; /* don't deadlock */
271 else if (credits > rep->rr_buffer->rb_max_requests)
272 credits = rep->rr_buffer->rb_max_requests;
273 atomic_set(&rep->rr_buffer->rb_credits, credits);
274 }
275 304
276out_schedule: 305out_schedule:
277 list_add_tail(&rep->rr_list, sched_list); 306 list_add_tail(&rep->rr_list, sched_list);
307 return;
308out_fail:
309 if (wc->status != IB_WC_WR_FLUSH_ERR)
310 pr_err("RPC: %s: rep %p: %s\n",
311 __func__, rep, COMPLETION_MSG(wc->status));
312 rep->rr_len = ~0U;
313 goto out_schedule;
278} 314}
279 315
280static int 316static int
@@ -390,8 +426,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
390#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 426#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
391 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; 427 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
392#endif 428#endif
393 struct ib_qp_attr attr; 429 struct ib_qp_attr *attr = &ia->ri_qp_attr;
394 struct ib_qp_init_attr iattr; 430 struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
395 int connstate = 0; 431 int connstate = 0;
396 432
397 switch (event->event) { 433 switch (event->event) {
@@ -414,12 +450,13 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
414 break; 450 break;
415 case RDMA_CM_EVENT_ESTABLISHED: 451 case RDMA_CM_EVENT_ESTABLISHED:
416 connstate = 1; 452 connstate = 1;
417 ib_query_qp(ia->ri_id->qp, &attr, 453 ib_query_qp(ia->ri_id->qp, attr,
418 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, 454 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
419 &iattr); 455 iattr);
420 dprintk("RPC: %s: %d responder resources" 456 dprintk("RPC: %s: %d responder resources"
421 " (%d initiator)\n", 457 " (%d initiator)\n",
422 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic); 458 __func__, attr->max_dest_rd_atomic,
459 attr->max_rd_atomic);
423 goto connected; 460 goto connected;
424 case RDMA_CM_EVENT_CONNECT_ERROR: 461 case RDMA_CM_EVENT_CONNECT_ERROR:
425 connstate = -ENOTCONN; 462 connstate = -ENOTCONN;
@@ -436,11 +473,10 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
436 case RDMA_CM_EVENT_DEVICE_REMOVAL: 473 case RDMA_CM_EVENT_DEVICE_REMOVAL:
437 connstate = -ENODEV; 474 connstate = -ENODEV;
438connected: 475connected:
439 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
440 dprintk("RPC: %s: %sconnected\n", 476 dprintk("RPC: %s: %sconnected\n",
441 __func__, connstate > 0 ? "" : "dis"); 477 __func__, connstate > 0 ? "" : "dis");
442 ep->rep_connected = connstate; 478 ep->rep_connected = connstate;
443 ep->rep_func(ep); 479 rpcrdma_conn_func(ep);
444 wake_up_all(&ep->rep_connect_wait); 480 wake_up_all(&ep->rep_connect_wait);
445 /*FALLTHROUGH*/ 481 /*FALLTHROUGH*/
446 default: 482 default:
@@ -453,7 +489,7 @@ connected:
453 489
454#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 490#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
455 if (connstate == 1) { 491 if (connstate == 1) {
456 int ird = attr.max_dest_rd_atomic; 492 int ird = attr->max_dest_rd_atomic;
457 int tird = ep->rep_remote_cma.responder_resources; 493 int tird = ep->rep_remote_cma.responder_resources;
458 printk(KERN_INFO "rpcrdma: connection to %pI4:%u " 494 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
459 "on %s, memreg %d slots %d ird %d%s\n", 495 "on %s, memreg %d slots %d ird %d%s\n",
@@ -554,8 +590,8 @@ int
554rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) 590rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
555{ 591{
556 int rc, mem_priv; 592 int rc, mem_priv;
557 struct ib_device_attr devattr;
558 struct rpcrdma_ia *ia = &xprt->rx_ia; 593 struct rpcrdma_ia *ia = &xprt->rx_ia;
594 struct ib_device_attr *devattr = &ia->ri_devattr;
559 595
560 ia->ri_id = rpcrdma_create_id(xprt, ia, addr); 596 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
561 if (IS_ERR(ia->ri_id)) { 597 if (IS_ERR(ia->ri_id)) {
@@ -571,26 +607,21 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
571 goto out2; 607 goto out2;
572 } 608 }
573 609
574 /* 610 rc = ib_query_device(ia->ri_id->device, devattr);
575 * Query the device to determine if the requested memory
576 * registration strategy is supported. If it isn't, set the
577 * strategy to a globally supported model.
578 */
579 rc = ib_query_device(ia->ri_id->device, &devattr);
580 if (rc) { 611 if (rc) {
581 dprintk("RPC: %s: ib_query_device failed %d\n", 612 dprintk("RPC: %s: ib_query_device failed %d\n",
582 __func__, rc); 613 __func__, rc);
583 goto out2; 614 goto out3;
584 } 615 }
585 616
586 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { 617 if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
587 ia->ri_have_dma_lkey = 1; 618 ia->ri_have_dma_lkey = 1;
588 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; 619 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
589 } 620 }
590 621
591 if (memreg == RPCRDMA_FRMR) { 622 if (memreg == RPCRDMA_FRMR) {
592 /* Requires both frmr reg and local dma lkey */ 623 /* Requires both frmr reg and local dma lkey */
593 if ((devattr.device_cap_flags & 624 if ((devattr->device_cap_flags &
594 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != 625 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
595 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { 626 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
596 dprintk("RPC: %s: FRMR registration " 627 dprintk("RPC: %s: FRMR registration "
@@ -600,7 +631,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
600 /* Mind the ia limit on FRMR page list depth */ 631 /* Mind the ia limit on FRMR page list depth */
601 ia->ri_max_frmr_depth = min_t(unsigned int, 632 ia->ri_max_frmr_depth = min_t(unsigned int,
602 RPCRDMA_MAX_DATA_SEGS, 633 RPCRDMA_MAX_DATA_SEGS,
603 devattr.max_fast_reg_page_list_len); 634 devattr->max_fast_reg_page_list_len);
604 } 635 }
605 } 636 }
606 if (memreg == RPCRDMA_MTHCAFMR) { 637 if (memreg == RPCRDMA_MTHCAFMR) {
@@ -638,14 +669,14 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
638 "phys register failed with %lX\n", 669 "phys register failed with %lX\n",
639 __func__, PTR_ERR(ia->ri_bind_mem)); 670 __func__, PTR_ERR(ia->ri_bind_mem));
640 rc = -ENOMEM; 671 rc = -ENOMEM;
641 goto out2; 672 goto out3;
642 } 673 }
643 break; 674 break;
644 default: 675 default:
645 printk(KERN_ERR "RPC: Unsupported memory " 676 printk(KERN_ERR "RPC: Unsupported memory "
646 "registration mode: %d\n", memreg); 677 "registration mode: %d\n", memreg);
647 rc = -ENOMEM; 678 rc = -ENOMEM;
648 goto out2; 679 goto out3;
649 } 680 }
650 dprintk("RPC: %s: memory registration strategy is %d\n", 681 dprintk("RPC: %s: memory registration strategy is %d\n",
651 __func__, memreg); 682 __func__, memreg);
@@ -655,6 +686,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
655 686
656 rwlock_init(&ia->ri_qplock); 687 rwlock_init(&ia->ri_qplock);
657 return 0; 688 return 0;
689
690out3:
691 ib_dealloc_pd(ia->ri_pd);
692 ia->ri_pd = NULL;
658out2: 693out2:
659 rdma_destroy_id(ia->ri_id); 694 rdma_destroy_id(ia->ri_id);
660 ia->ri_id = NULL; 695 ia->ri_id = NULL;
@@ -698,20 +733,13 @@ int
698rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, 733rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
699 struct rpcrdma_create_data_internal *cdata) 734 struct rpcrdma_create_data_internal *cdata)
700{ 735{
701 struct ib_device_attr devattr; 736 struct ib_device_attr *devattr = &ia->ri_devattr;
702 struct ib_cq *sendcq, *recvcq; 737 struct ib_cq *sendcq, *recvcq;
703 int rc, err; 738 int rc, err;
704 739
705 rc = ib_query_device(ia->ri_id->device, &devattr);
706 if (rc) {
707 dprintk("RPC: %s: ib_query_device failed %d\n",
708 __func__, rc);
709 return rc;
710 }
711
712 /* check provider's send/recv wr limits */ 740 /* check provider's send/recv wr limits */
713 if (cdata->max_requests > devattr.max_qp_wr) 741 if (cdata->max_requests > devattr->max_qp_wr)
714 cdata->max_requests = devattr.max_qp_wr; 742 cdata->max_requests = devattr->max_qp_wr;
715 743
716 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; 744 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
717 ep->rep_attr.qp_context = ep; 745 ep->rep_attr.qp_context = ep;
@@ -746,8 +774,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
746 774
747 } 775 }
748 ep->rep_attr.cap.max_send_wr *= depth; 776 ep->rep_attr.cap.max_send_wr *= depth;
749 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { 777 if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
750 cdata->max_requests = devattr.max_qp_wr / depth; 778 cdata->max_requests = devattr->max_qp_wr / depth;
751 if (!cdata->max_requests) 779 if (!cdata->max_requests)
752 return -EINVAL; 780 return -EINVAL;
753 ep->rep_attr.cap.max_send_wr = cdata->max_requests * 781 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
@@ -766,6 +794,14 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
766 ep->rep_attr.qp_type = IB_QPT_RC; 794 ep->rep_attr.qp_type = IB_QPT_RC;
767 ep->rep_attr.port_num = ~0; 795 ep->rep_attr.port_num = ~0;
768 796
797 if (cdata->padding) {
798 ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding,
799 GFP_KERNEL);
800 if (IS_ERR(ep->rep_padbuf))
801 return PTR_ERR(ep->rep_padbuf);
802 } else
803 ep->rep_padbuf = NULL;
804
769 dprintk("RPC: %s: requested max: dtos: send %d recv %d; " 805 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
770 "iovs: send %d recv %d\n", 806 "iovs: send %d recv %d\n",
771 __func__, 807 __func__,
@@ -781,7 +817,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
781 else if (ep->rep_cqinit <= 2) 817 else if (ep->rep_cqinit <= 2)
782 ep->rep_cqinit = 0; 818 ep->rep_cqinit = 0;
783 INIT_CQCOUNT(ep); 819 INIT_CQCOUNT(ep);
784 ep->rep_ia = ia;
785 init_waitqueue_head(&ep->rep_connect_wait); 820 init_waitqueue_head(&ep->rep_connect_wait);
786 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); 821 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
787 822
@@ -831,10 +866,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
831 866
832 /* Client offers RDMA Read but does not initiate */ 867 /* Client offers RDMA Read but does not initiate */
833 ep->rep_remote_cma.initiator_depth = 0; 868 ep->rep_remote_cma.initiator_depth = 0;
834 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ 869 if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
835 ep->rep_remote_cma.responder_resources = 32; 870 ep->rep_remote_cma.responder_resources = 32;
836 else 871 else
837 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; 872 ep->rep_remote_cma.responder_resources =
873 devattr->max_qp_rd_atom;
838 874
839 ep->rep_remote_cma.retry_count = 7; 875 ep->rep_remote_cma.retry_count = 7;
840 ep->rep_remote_cma.flow_control = 0; 876 ep->rep_remote_cma.flow_control = 0;
@@ -848,6 +884,7 @@ out2:
848 dprintk("RPC: %s: ib_destroy_cq returned %i\n", 884 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
849 __func__, err); 885 __func__, err);
850out1: 886out1:
887 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
851 return rc; 888 return rc;
852} 889}
853 890
@@ -874,11 +911,7 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
874 ia->ri_id->qp = NULL; 911 ia->ri_id->qp = NULL;
875 } 912 }
876 913
877 /* padding - could be done in rpcrdma_buffer_destroy... */ 914 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
878 if (ep->rep_pad_mr) {
879 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
880 ep->rep_pad_mr = NULL;
881 }
882 915
883 rpcrdma_clean_cq(ep->rep_attr.recv_cq); 916 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
884 rc = ib_destroy_cq(ep->rep_attr.recv_cq); 917 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
@@ -1048,6 +1081,48 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1048 } 1081 }
1049} 1082}
1050 1083
1084static struct rpcrdma_req *
1085rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
1086{
1087 struct rpcrdma_req *req;
1088
1089 req = kzalloc(sizeof(*req), GFP_KERNEL);
1090 if (req == NULL)
1091 return ERR_PTR(-ENOMEM);
1092
1093 req->rl_buffer = &r_xprt->rx_buf;
1094 return req;
1095}
1096
1097static struct rpcrdma_rep *
1098rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1099{
1100 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
1101 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1102 struct rpcrdma_rep *rep;
1103 int rc;
1104
1105 rc = -ENOMEM;
1106 rep = kzalloc(sizeof(*rep), GFP_KERNEL);
1107 if (rep == NULL)
1108 goto out;
1109
1110 rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize,
1111 GFP_KERNEL);
1112 if (IS_ERR(rep->rr_rdmabuf)) {
1113 rc = PTR_ERR(rep->rr_rdmabuf);
1114 goto out_free;
1115 }
1116
1117 rep->rr_buffer = &r_xprt->rx_buf;
1118 return rep;
1119
1120out_free:
1121 kfree(rep);
1122out:
1123 return ERR_PTR(rc);
1124}
1125
1051static int 1126static int
1052rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) 1127rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1053{ 1128{
@@ -1134,27 +1209,26 @@ out_free:
1134} 1209}
1135 1210
1136int 1211int
1137rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, 1212rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1138 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1139{ 1213{
1214 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1215 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1216 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
1140 char *p; 1217 char *p;
1141 size_t len, rlen, wlen; 1218 size_t len;
1142 int i, rc; 1219 int i, rc;
1143 1220
1144 buf->rb_max_requests = cdata->max_requests; 1221 buf->rb_max_requests = cdata->max_requests;
1145 spin_lock_init(&buf->rb_lock); 1222 spin_lock_init(&buf->rb_lock);
1146 atomic_set(&buf->rb_credits, 1);
1147 1223
1148 /* Need to allocate: 1224 /* Need to allocate:
1149 * 1. arrays for send and recv pointers 1225 * 1. arrays for send and recv pointers
1150 * 2. arrays of struct rpcrdma_req to fill in pointers 1226 * 2. arrays of struct rpcrdma_req to fill in pointers
1151 * 3. array of struct rpcrdma_rep for replies 1227 * 3. array of struct rpcrdma_rep for replies
1152 * 4. padding, if any
1153 * Send/recv buffers in req/rep need to be registered 1228 * Send/recv buffers in req/rep need to be registered
1154 */ 1229 */
1155 len = buf->rb_max_requests * 1230 len = buf->rb_max_requests *
1156 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); 1231 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1157 len += cdata->padding;
1158 1232
1159 p = kzalloc(len, GFP_KERNEL); 1233 p = kzalloc(len, GFP_KERNEL);
1160 if (p == NULL) { 1234 if (p == NULL) {
@@ -1170,17 +1244,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1170 buf->rb_recv_bufs = (struct rpcrdma_rep **) p; 1244 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1171 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; 1245 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1172 1246
1173 /*
1174 * Register the zeroed pad buffer, if any.
1175 */
1176 if (cdata->padding) {
1177 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1178 &ep->rep_pad_mr, &ep->rep_pad);
1179 if (rc)
1180 goto out;
1181 }
1182 p += cdata->padding;
1183
1184 INIT_LIST_HEAD(&buf->rb_mws); 1247 INIT_LIST_HEAD(&buf->rb_mws);
1185 INIT_LIST_HEAD(&buf->rb_all); 1248 INIT_LIST_HEAD(&buf->rb_all);
1186 switch (ia->ri_memreg_strategy) { 1249 switch (ia->ri_memreg_strategy) {
@@ -1198,62 +1261,29 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1198 break; 1261 break;
1199 } 1262 }
1200 1263
1201 /*
1202 * Allocate/init the request/reply buffers. Doing this
1203 * using kmalloc for now -- one for each buf.
1204 */
1205 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1206 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1207 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1208 __func__, wlen, rlen);
1209
1210 for (i = 0; i < buf->rb_max_requests; i++) { 1264 for (i = 0; i < buf->rb_max_requests; i++) {
1211 struct rpcrdma_req *req; 1265 struct rpcrdma_req *req;
1212 struct rpcrdma_rep *rep; 1266 struct rpcrdma_rep *rep;
1213 1267
1214 req = kmalloc(wlen, GFP_KERNEL); 1268 req = rpcrdma_create_req(r_xprt);
1215 if (req == NULL) { 1269 if (IS_ERR(req)) {
1216 dprintk("RPC: %s: request buffer %d alloc" 1270 dprintk("RPC: %s: request buffer %d alloc"
1217 " failed\n", __func__, i); 1271 " failed\n", __func__, i);
1218 rc = -ENOMEM; 1272 rc = PTR_ERR(req);
1219 goto out; 1273 goto out;
1220 } 1274 }
1221 memset(req, 0, sizeof(struct rpcrdma_req));
1222 buf->rb_send_bufs[i] = req; 1275 buf->rb_send_bufs[i] = req;
1223 buf->rb_send_bufs[i]->rl_buffer = buf;
1224 1276
1225 rc = rpcrdma_register_internal(ia, req->rl_base, 1277 rep = rpcrdma_create_rep(r_xprt);
1226 wlen - offsetof(struct rpcrdma_req, rl_base), 1278 if (IS_ERR(rep)) {
1227 &buf->rb_send_bufs[i]->rl_handle,
1228 &buf->rb_send_bufs[i]->rl_iov);
1229 if (rc)
1230 goto out;
1231
1232 buf->rb_send_bufs[i]->rl_size = wlen -
1233 sizeof(struct rpcrdma_req);
1234
1235 rep = kmalloc(rlen, GFP_KERNEL);
1236 if (rep == NULL) {
1237 dprintk("RPC: %s: reply buffer %d alloc failed\n", 1279 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1238 __func__, i); 1280 __func__, i);
1239 rc = -ENOMEM; 1281 rc = PTR_ERR(rep);
1240 goto out; 1282 goto out;
1241 } 1283 }
1242 memset(rep, 0, sizeof(struct rpcrdma_rep));
1243 buf->rb_recv_bufs[i] = rep; 1284 buf->rb_recv_bufs[i] = rep;
1244 buf->rb_recv_bufs[i]->rr_buffer = buf;
1245
1246 rc = rpcrdma_register_internal(ia, rep->rr_base,
1247 rlen - offsetof(struct rpcrdma_rep, rr_base),
1248 &buf->rb_recv_bufs[i]->rr_handle,
1249 &buf->rb_recv_bufs[i]->rr_iov);
1250 if (rc)
1251 goto out;
1252
1253 } 1285 }
1254 dprintk("RPC: %s: max_requests %d\n", 1286
1255 __func__, buf->rb_max_requests);
1256 /* done */
1257 return 0; 1287 return 0;
1258out: 1288out:
1259 rpcrdma_buffer_destroy(buf); 1289 rpcrdma_buffer_destroy(buf);
@@ -1261,6 +1291,27 @@ out:
1261} 1291}
1262 1292
1263static void 1293static void
1294rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
1295{
1296 if (!rep)
1297 return;
1298
1299 rpcrdma_free_regbuf(ia, rep->rr_rdmabuf);
1300 kfree(rep);
1301}
1302
1303static void
1304rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
1305{
1306 if (!req)
1307 return;
1308
1309 rpcrdma_free_regbuf(ia, req->rl_sendbuf);
1310 rpcrdma_free_regbuf(ia, req->rl_rdmabuf);
1311 kfree(req);
1312}
1313
1314static void
1264rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf) 1315rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
1265{ 1316{
1266 struct rpcrdma_mw *r; 1317 struct rpcrdma_mw *r;
@@ -1315,18 +1366,10 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1315 dprintk("RPC: %s: entering\n", __func__); 1366 dprintk("RPC: %s: entering\n", __func__);
1316 1367
1317 for (i = 0; i < buf->rb_max_requests; i++) { 1368 for (i = 0; i < buf->rb_max_requests; i++) {
1318 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) { 1369 if (buf->rb_recv_bufs)
1319 rpcrdma_deregister_internal(ia, 1370 rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]);
1320 buf->rb_recv_bufs[i]->rr_handle, 1371 if (buf->rb_send_bufs)
1321 &buf->rb_recv_bufs[i]->rr_iov); 1372 rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
1322 kfree(buf->rb_recv_bufs[i]);
1323 }
1324 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
1325 rpcrdma_deregister_internal(ia,
1326 buf->rb_send_bufs[i]->rl_handle,
1327 &buf->rb_send_bufs[i]->rl_iov);
1328 kfree(buf->rb_send_bufs[i]);
1329 }
1330 } 1373 }
1331 1374
1332 switch (ia->ri_memreg_strategy) { 1375 switch (ia->ri_memreg_strategy) {
@@ -1450,8 +1493,8 @@ rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1450 int i; 1493 int i;
1451 1494
1452 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++) 1495 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
1453 rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf); 1496 rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
1454 rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf); 1497 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
1455} 1498}
1456 1499
1457static void 1500static void
@@ -1537,7 +1580,7 @@ rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1537 list_add(&r->mw_list, stale); 1580 list_add(&r->mw_list, stale);
1538 continue; 1581 continue;
1539 } 1582 }
1540 req->rl_segments[i].mr_chunk.rl_mw = r; 1583 req->rl_segments[i].rl_mw = r;
1541 if (unlikely(i-- == 0)) 1584 if (unlikely(i-- == 0))
1542 return req; /* Success */ 1585 return req; /* Success */
1543 } 1586 }
@@ -1559,7 +1602,7 @@ rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1559 r = list_entry(buf->rb_mws.next, 1602 r = list_entry(buf->rb_mws.next,
1560 struct rpcrdma_mw, mw_list); 1603 struct rpcrdma_mw, mw_list);
1561 list_del(&r->mw_list); 1604 list_del(&r->mw_list);
1562 req->rl_segments[i].mr_chunk.rl_mw = r; 1605 req->rl_segments[i].rl_mw = r;
1563 if (unlikely(i-- == 0)) 1606 if (unlikely(i-- == 0))
1564 return req; /* Success */ 1607 return req; /* Success */
1565 } 1608 }
@@ -1658,8 +1701,6 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1658 struct rpcrdma_buffer *buffers = req->rl_buffer; 1701 struct rpcrdma_buffer *buffers = req->rl_buffer;
1659 unsigned long flags; 1702 unsigned long flags;
1660 1703
1661 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1662 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1663 spin_lock_irqsave(&buffers->rb_lock, flags); 1704 spin_lock_irqsave(&buffers->rb_lock, flags);
1664 if (buffers->rb_recv_index < buffers->rb_max_requests) { 1705 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1665 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; 1706 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
@@ -1688,7 +1729,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1688 * Wrappers for internal-use kmalloc memory registration, used by buffer code. 1729 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1689 */ 1730 */
1690 1731
1691int 1732static int
1692rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, 1733rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1693 struct ib_mr **mrp, struct ib_sge *iov) 1734 struct ib_mr **mrp, struct ib_sge *iov)
1694{ 1735{
@@ -1739,7 +1780,7 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1739 return rc; 1780 return rc;
1740} 1781}
1741 1782
1742int 1783static int
1743rpcrdma_deregister_internal(struct rpcrdma_ia *ia, 1784rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1744 struct ib_mr *mr, struct ib_sge *iov) 1785 struct ib_mr *mr, struct ib_sge *iov)
1745{ 1786{
@@ -1757,6 +1798,61 @@ rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1757 return rc; 1798 return rc;
1758} 1799}
1759 1800
1801/**
1802 * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers
1803 * @ia: controlling rpcrdma_ia
1804 * @size: size of buffer to be allocated, in bytes
1805 * @flags: GFP flags
1806 *
1807 * Returns pointer to private header of an area of internally
1808 * registered memory, or an ERR_PTR. The registered buffer follows
1809 * the end of the private header.
1810 *
1811 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
1812 * receiving the payload of RDMA RECV operations. regbufs are not
1813 * used for RDMA READ/WRITE operations, thus are registered only for
1814 * LOCAL access.
1815 */
1816struct rpcrdma_regbuf *
1817rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags)
1818{
1819 struct rpcrdma_regbuf *rb;
1820 int rc;
1821
1822 rc = -ENOMEM;
1823 rb = kmalloc(sizeof(*rb) + size, flags);
1824 if (rb == NULL)
1825 goto out;
1826
1827 rb->rg_size = size;
1828 rb->rg_owner = NULL;
1829 rc = rpcrdma_register_internal(ia, rb->rg_base, size,
1830 &rb->rg_mr, &rb->rg_iov);
1831 if (rc)
1832 goto out_free;
1833
1834 return rb;
1835
1836out_free:
1837 kfree(rb);
1838out:
1839 return ERR_PTR(rc);
1840}
1841
1842/**
1843 * rpcrdma_free_regbuf - deregister and free registered buffer
1844 * @ia: controlling rpcrdma_ia
1845 * @rb: regbuf to be deregistered and freed
1846 */
1847void
1848rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
1849{
1850 if (rb) {
1851 rpcrdma_deregister_internal(ia, rb->rg_mr, &rb->rg_iov);
1852 kfree(rb);
1853 }
1854}
1855
1760/* 1856/*
1761 * Wrappers for chunk registration, shared by read/write chunk code. 1857 * Wrappers for chunk registration, shared by read/write chunk code.
1762 */ 1858 */
@@ -1799,7 +1895,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1799 struct rpcrdma_xprt *r_xprt) 1895 struct rpcrdma_xprt *r_xprt)
1800{ 1896{
1801 struct rpcrdma_mr_seg *seg1 = seg; 1897 struct rpcrdma_mr_seg *seg1 = seg;
1802 struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw; 1898 struct rpcrdma_mw *mw = seg1->rl_mw;
1803 struct rpcrdma_frmr *frmr = &mw->r.frmr; 1899 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1804 struct ib_mr *mr = frmr->fr_mr; 1900 struct ib_mr *mr = frmr->fr_mr;
1805 struct ib_send_wr fastreg_wr, *bad_wr; 1901 struct ib_send_wr fastreg_wr, *bad_wr;
@@ -1888,12 +1984,12 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1888 struct ib_send_wr invalidate_wr, *bad_wr; 1984 struct ib_send_wr invalidate_wr, *bad_wr;
1889 int rc; 1985 int rc;
1890 1986
1891 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; 1987 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
1892 1988
1893 memset(&invalidate_wr, 0, sizeof invalidate_wr); 1989 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1894 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; 1990 invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
1895 invalidate_wr.opcode = IB_WR_LOCAL_INV; 1991 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1896 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1992 invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
1897 DECR_CQCOUNT(&r_xprt->rx_ep); 1993 DECR_CQCOUNT(&r_xprt->rx_ep);
1898 1994
1899 read_lock(&ia->ri_qplock); 1995 read_lock(&ia->ri_qplock);
@@ -1903,7 +1999,7 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1903 read_unlock(&ia->ri_qplock); 1999 read_unlock(&ia->ri_qplock);
1904 if (rc) { 2000 if (rc) {
1905 /* Force rpcrdma_buffer_get() to retry */ 2001 /* Force rpcrdma_buffer_get() to retry */
1906 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_STALE; 2002 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
1907 dprintk("RPC: %s: failed ib_post_send for invalidate," 2003 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1908 " status %i\n", __func__, rc); 2004 " status %i\n", __func__, rc);
1909 } 2005 }
@@ -1935,8 +2031,7 @@ rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1935 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 2031 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1936 break; 2032 break;
1937 } 2033 }
1938 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, 2034 rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma);
1939 physaddrs, i, seg1->mr_dma);
1940 if (rc) { 2035 if (rc) {
1941 dprintk("RPC: %s: failed ib_map_phys_fmr " 2036 dprintk("RPC: %s: failed ib_map_phys_fmr "
1942 "%u@0x%llx+%i (%d)... status %i\n", __func__, 2037 "%u@0x%llx+%i (%d)... status %i\n", __func__,
@@ -1945,7 +2040,7 @@ rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1945 while (i--) 2040 while (i--)
1946 rpcrdma_unmap_one(ia, --seg); 2041 rpcrdma_unmap_one(ia, --seg);
1947 } else { 2042 } else {
1948 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; 2043 seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey;
1949 seg1->mr_base = seg1->mr_dma + pageoff; 2044 seg1->mr_base = seg1->mr_dma + pageoff;
1950 seg1->mr_nsegs = i; 2045 seg1->mr_nsegs = i;
1951 seg1->mr_len = len; 2046 seg1->mr_len = len;
@@ -1962,7 +2057,7 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1962 LIST_HEAD(l); 2057 LIST_HEAD(l);
1963 int rc; 2058 int rc;
1964 2059
1965 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l); 2060 list_add(&seg1->rl_mw->r.fmr->list, &l);
1966 rc = ib_unmap_fmr(&l); 2061 rc = ib_unmap_fmr(&l);
1967 read_lock(&ia->ri_qplock); 2062 read_lock(&ia->ri_qplock);
1968 while (seg1->mr_nsegs--) 2063 while (seg1->mr_nsegs--)
@@ -2104,11 +2199,13 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
2104 2199
2105 recv_wr.next = NULL; 2200 recv_wr.next = NULL;
2106 recv_wr.wr_id = (u64) (unsigned long) rep; 2201 recv_wr.wr_id = (u64) (unsigned long) rep;
2107 recv_wr.sg_list = &rep->rr_iov; 2202 recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
2108 recv_wr.num_sge = 1; 2203 recv_wr.num_sge = 1;
2109 2204
2110 ib_dma_sync_single_for_cpu(ia->ri_id->device, 2205 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2111 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); 2206 rdmab_addr(rep->rr_rdmabuf),
2207 rdmab_length(rep->rr_rdmabuf),
2208 DMA_BIDIRECTIONAL);
2112 2209
2113 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); 2210 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2114 2211
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index b799041b75bf..c9d2a02f631b 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -70,6 +70,9 @@ struct rpcrdma_ia {
70 int ri_async_rc; 70 int ri_async_rc;
71 enum rpcrdma_memreg ri_memreg_strategy; 71 enum rpcrdma_memreg ri_memreg_strategy;
72 unsigned int ri_max_frmr_depth; 72 unsigned int ri_max_frmr_depth;
73 struct ib_device_attr ri_devattr;
74 struct ib_qp_attr ri_qp_attr;
75 struct ib_qp_init_attr ri_qp_init_attr;
73}; 76};
74 77
75/* 78/*
@@ -83,13 +86,9 @@ struct rpcrdma_ep {
83 atomic_t rep_cqcount; 86 atomic_t rep_cqcount;
84 int rep_cqinit; 87 int rep_cqinit;
85 int rep_connected; 88 int rep_connected;
86 struct rpcrdma_ia *rep_ia;
87 struct ib_qp_init_attr rep_attr; 89 struct ib_qp_init_attr rep_attr;
88 wait_queue_head_t rep_connect_wait; 90 wait_queue_head_t rep_connect_wait;
89 struct ib_sge rep_pad; /* holds zeroed pad */ 91 struct rpcrdma_regbuf *rep_padbuf;
90 struct ib_mr *rep_pad_mr; /* holds zeroed pad */
91 void (*rep_func)(struct rpcrdma_ep *);
92 struct rpc_xprt *rep_xprt; /* for rep_func */
93 struct rdma_conn_param rep_remote_cma; 92 struct rdma_conn_param rep_remote_cma;
94 struct sockaddr_storage rep_remote_addr; 93 struct sockaddr_storage rep_remote_addr;
95 struct delayed_work rep_connect_worker; 94 struct delayed_work rep_connect_worker;
@@ -106,6 +105,44 @@ struct rpcrdma_ep {
106#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) 105#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
107#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) 106#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
108 107
108/* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
109 *
110 * The below structure appears at the front of a large region of kmalloc'd
111 * memory, which always starts on a good alignment boundary.
112 */
113
114struct rpcrdma_regbuf {
115 size_t rg_size;
116 struct rpcrdma_req *rg_owner;
117 struct ib_mr *rg_mr;
118 struct ib_sge rg_iov;
119 __be32 rg_base[0] __attribute__ ((aligned(256)));
120};
121
122static inline u64
123rdmab_addr(struct rpcrdma_regbuf *rb)
124{
125 return rb->rg_iov.addr;
126}
127
128static inline u32
129rdmab_length(struct rpcrdma_regbuf *rb)
130{
131 return rb->rg_iov.length;
132}
133
134static inline u32
135rdmab_lkey(struct rpcrdma_regbuf *rb)
136{
137 return rb->rg_iov.lkey;
138}
139
140static inline struct rpcrdma_msg *
141rdmab_to_msg(struct rpcrdma_regbuf *rb)
142{
143 return (struct rpcrdma_msg *)rb->rg_base;
144}
145
109enum rpcrdma_chunktype { 146enum rpcrdma_chunktype {
110 rpcrdma_noch = 0, 147 rpcrdma_noch = 0,
111 rpcrdma_readch, 148 rpcrdma_readch,
@@ -134,22 +171,16 @@ enum rpcrdma_chunktype {
134/* temporary static scatter/gather max */ 171/* temporary static scatter/gather max */
135#define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */ 172#define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */
136#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ 173#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
137#define MAX_RPCRDMAHDR (\
138 /* max supported RPC/RDMA header */ \
139 sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \
140 (sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32))
141 174
142struct rpcrdma_buffer; 175struct rpcrdma_buffer;
143 176
144struct rpcrdma_rep { 177struct rpcrdma_rep {
145 unsigned int rr_len; /* actual received reply length */ 178 unsigned int rr_len;
146 struct rpcrdma_buffer *rr_buffer; /* home base for this structure */ 179 struct rpcrdma_buffer *rr_buffer;
147 struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ 180 struct rpc_xprt *rr_xprt;
148 void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ 181 void (*rr_func)(struct rpcrdma_rep *);
149 struct list_head rr_list; /* tasklet list */ 182 struct list_head rr_list;
150 struct ib_sge rr_iov; /* for posting */ 183 struct rpcrdma_regbuf *rr_rdmabuf;
151 struct ib_mr *rr_handle; /* handle for mem in rr_iov */
152 char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
153}; 184};
154 185
155/* 186/*
@@ -211,10 +242,7 @@ struct rpcrdma_mw {
211 */ 242 */
212 243
213struct rpcrdma_mr_seg { /* chunk descriptors */ 244struct rpcrdma_mr_seg { /* chunk descriptors */
214 union { /* chunk memory handles */ 245 struct rpcrdma_mw *rl_mw; /* registered MR */
215 struct ib_mr *rl_mr; /* if registered directly */
216 struct rpcrdma_mw *rl_mw; /* if registered from region */
217 } mr_chunk;
218 u64 mr_base; /* registration result */ 246 u64 mr_base; /* registration result */
219 u32 mr_rkey; /* registration result */ 247 u32 mr_rkey; /* registration result */
220 u32 mr_len; /* length of chunk or segment */ 248 u32 mr_len; /* length of chunk or segment */
@@ -227,22 +255,26 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
227}; 255};
228 256
229struct rpcrdma_req { 257struct rpcrdma_req {
230 size_t rl_size; /* actual length of buffer */
231 unsigned int rl_niovs; /* 0, 2 or 4 */ 258 unsigned int rl_niovs; /* 0, 2 or 4 */
232 unsigned int rl_nchunks; /* non-zero if chunks */ 259 unsigned int rl_nchunks; /* non-zero if chunks */
233 unsigned int rl_connect_cookie; /* retry detection */ 260 unsigned int rl_connect_cookie; /* retry detection */
234 enum rpcrdma_chunktype rl_rtype, rl_wtype; 261 enum rpcrdma_chunktype rl_rtype, rl_wtype;
235 struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ 262 struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
236 struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ 263 struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
237 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
238 struct ib_sge rl_send_iov[4]; /* for active requests */ 264 struct ib_sge rl_send_iov[4]; /* for active requests */
239 struct ib_sge rl_iov; /* for posting */ 265 struct rpcrdma_regbuf *rl_rdmabuf;
240 struct ib_mr *rl_handle; /* handle for mem in rl_iov */ 266 struct rpcrdma_regbuf *rl_sendbuf;
241 char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */ 267 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
242 __u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */
243}; 268};
244#define rpcr_to_rdmar(r) \ 269
245 container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0]) 270static inline struct rpcrdma_req *
271rpcr_to_rdmar(struct rpc_rqst *rqst)
272{
273 struct rpcrdma_regbuf *rb = container_of(rqst->rq_buffer,
274 struct rpcrdma_regbuf,
275 rg_base[0]);
276 return rb->rg_owner;
277}
246 278
247/* 279/*
248 * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for 280 * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
@@ -252,7 +284,6 @@ struct rpcrdma_req {
252 */ 284 */
253struct rpcrdma_buffer { 285struct rpcrdma_buffer {
254 spinlock_t rb_lock; /* protects indexes */ 286 spinlock_t rb_lock; /* protects indexes */
255 atomic_t rb_credits; /* most recent server credits */
256 int rb_max_requests;/* client max requests */ 287 int rb_max_requests;/* client max requests */
257 struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ 288 struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
258 struct list_head rb_all; 289 struct list_head rb_all;
@@ -318,16 +349,16 @@ struct rpcrdma_stats {
318 * during unmount. 349 * during unmount.
319 */ 350 */
320struct rpcrdma_xprt { 351struct rpcrdma_xprt {
321 struct rpc_xprt xprt; 352 struct rpc_xprt rx_xprt;
322 struct rpcrdma_ia rx_ia; 353 struct rpcrdma_ia rx_ia;
323 struct rpcrdma_ep rx_ep; 354 struct rpcrdma_ep rx_ep;
324 struct rpcrdma_buffer rx_buf; 355 struct rpcrdma_buffer rx_buf;
325 struct rpcrdma_create_data_internal rx_data; 356 struct rpcrdma_create_data_internal rx_data;
326 struct delayed_work rdma_connect; 357 struct delayed_work rx_connect_worker;
327 struct rpcrdma_stats rx_stats; 358 struct rpcrdma_stats rx_stats;
328}; 359};
329 360
330#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) 361#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
331#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) 362#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
332 363
333/* Setting this to 0 ensures interoperability with early servers. 364/* Setting this to 0 ensures interoperability with early servers.
@@ -358,9 +389,7 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
358/* 389/*
359 * Buffer calls - xprtrdma/verbs.c 390 * Buffer calls - xprtrdma/verbs.c
360 */ 391 */
361int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *, 392int rpcrdma_buffer_create(struct rpcrdma_xprt *);
362 struct rpcrdma_ia *,
363 struct rpcrdma_create_data_internal *);
364void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); 393void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
365 394
366struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); 395struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
@@ -368,16 +397,16 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
368void rpcrdma_recv_buffer_get(struct rpcrdma_req *); 397void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
369void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); 398void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
370 399
371int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int,
372 struct ib_mr **, struct ib_sge *);
373int rpcrdma_deregister_internal(struct rpcrdma_ia *,
374 struct ib_mr *, struct ib_sge *);
375
376int rpcrdma_register_external(struct rpcrdma_mr_seg *, 400int rpcrdma_register_external(struct rpcrdma_mr_seg *,
377 int, int, struct rpcrdma_xprt *); 401 int, int, struct rpcrdma_xprt *);
378int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, 402int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
379 struct rpcrdma_xprt *); 403 struct rpcrdma_xprt *);
380 404
405struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
406 size_t, gfp_t);
407void rpcrdma_free_regbuf(struct rpcrdma_ia *,
408 struct rpcrdma_regbuf *);
409
381/* 410/*
382 * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c 411 * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
383 */ 412 */