aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Talpey <talpey@netapp.com>2008-10-09 15:00:20 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2008-10-10 15:09:34 -0400
commit3197d309f5fb042499b2c4c8f2fcb67372df5201 (patch)
treea4cca3420bc99e44af00806abe4a265d539d9c24
parentbd7ed1d13304d914648dacec4dbb9145aaae614e (diff)
RPC/RDMA: support FRMR client memory registration.
Configure, detect and use "fastreg" support from IB/iWARP verbs layer to perform RPC/RDMA memory registration. Make FRMR the default memreg mode (will fall back if not supported by the selected RDMA adapter). This allows full and optimal operation over the cxgb3 adapter, and others. Signed-off-by: Tom Talpey <talpey@netapp.com> Acked-by: Tom Tucker <tom@opengridcomputing.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--net/sunrpc/xprtrdma/transport.c6
-rw-r--r--net/sunrpc/xprtrdma/verbs.c167
2 files changed, 167 insertions, 6 deletions
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a564c1a39ec5..89970b0a4cc9 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -70,11 +70,7 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
70static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; 70static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
71static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; 71static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
72static unsigned int xprt_rdma_inline_write_padding; 72static unsigned int xprt_rdma_inline_write_padding;
73#if !RPCRDMA_PERSISTENT_REGISTRATION 73static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
74static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */
75#else
76static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL;
77#endif
78 74
79#ifdef RPC_DEBUG 75#ifdef RPC_DEBUG
80 76
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 0f3b43148b7f..39a165202d8f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -488,6 +488,26 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
488#endif 488#endif
489 } 489 }
490 break; 490 break;
491 case RPCRDMA_FRMR:
492 /* Requires both frmr reg and local dma lkey */
493 if ((devattr.device_cap_flags &
494 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
495 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
496#if RPCRDMA_PERSISTENT_REGISTRATION
497 dprintk("RPC: %s: FRMR registration "
498 "specified but not supported by adapter, "
499 "using riskier RPCRDMA_ALLPHYSICAL\n",
500 __func__);
501 memreg = RPCRDMA_ALLPHYSICAL;
502#else
503 dprintk("RPC: %s: FRMR registration "
504 "specified but not supported by adapter, "
505 "using slower RPCRDMA_REGISTER\n",
506 __func__);
507 memreg = RPCRDMA_REGISTER;
508#endif
509 }
510 break;
491 } 511 }
492 512
493 /* 513 /*
@@ -501,6 +521,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
501 switch (memreg) { 521 switch (memreg) {
502 case RPCRDMA_BOUNCEBUFFERS: 522 case RPCRDMA_BOUNCEBUFFERS:
503 case RPCRDMA_REGISTER: 523 case RPCRDMA_REGISTER:
524 case RPCRDMA_FRMR:
504 break; 525 break;
505#if RPCRDMA_PERSISTENT_REGISTRATION 526#if RPCRDMA_PERSISTENT_REGISTRATION
506 case RPCRDMA_ALLPHYSICAL: 527 case RPCRDMA_ALLPHYSICAL:
@@ -602,6 +623,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
602 ep->rep_attr.srq = NULL; 623 ep->rep_attr.srq = NULL;
603 ep->rep_attr.cap.max_send_wr = cdata->max_requests; 624 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
604 switch (ia->ri_memreg_strategy) { 625 switch (ia->ri_memreg_strategy) {
626 case RPCRDMA_FRMR:
627 /* Add room for frmr register and invalidate WRs */
628 ep->rep_attr.cap.max_send_wr *= 3;
629 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
630 return -EINVAL;
631 break;
605 case RPCRDMA_MEMWINDOWS_ASYNC: 632 case RPCRDMA_MEMWINDOWS_ASYNC:
606 case RPCRDMA_MEMWINDOWS: 633 case RPCRDMA_MEMWINDOWS:
607 /* Add room for mw_binds+unbinds - overkill! */ 634 /* Add room for mw_binds+unbinds - overkill! */
@@ -684,6 +711,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
684 break; 711 break;
685 case RPCRDMA_MTHCAFMR: 712 case RPCRDMA_MTHCAFMR:
686 case RPCRDMA_REGISTER: 713 case RPCRDMA_REGISTER:
714 case RPCRDMA_FRMR:
687 ep->rep_remote_cma.responder_resources = cdata->max_requests * 715 ep->rep_remote_cma.responder_resources = cdata->max_requests *
688 (RPCRDMA_MAX_DATA_SEGS / 8); 716 (RPCRDMA_MAX_DATA_SEGS / 8);
689 break; 717 break;
@@ -935,7 +963,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
935 * 2. arrays of struct rpcrdma_req to fill in pointers 963 * 2. arrays of struct rpcrdma_req to fill in pointers
936 * 3. array of struct rpcrdma_rep for replies 964 * 3. array of struct rpcrdma_rep for replies
937 * 4. padding, if any 965 * 4. padding, if any
938 * 5. mw's or fmr's, if any 966 * 5. mw's, fmr's or frmr's, if any
939 * Send/recv buffers in req/rep need to be registered 967 * Send/recv buffers in req/rep need to be registered
940 */ 968 */
941 969
@@ -943,6 +971,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
943 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); 971 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
944 len += cdata->padding; 972 len += cdata->padding;
945 switch (ia->ri_memreg_strategy) { 973 switch (ia->ri_memreg_strategy) {
974 case RPCRDMA_FRMR:
975 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
976 sizeof(struct rpcrdma_mw);
977 break;
946 case RPCRDMA_MTHCAFMR: 978 case RPCRDMA_MTHCAFMR:
947 /* TBD we are perhaps overallocating here */ 979 /* TBD we are perhaps overallocating here */
948 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * 980 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
@@ -991,6 +1023,30 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
991 INIT_LIST_HEAD(&buf->rb_mws); 1023 INIT_LIST_HEAD(&buf->rb_mws);
992 r = (struct rpcrdma_mw *)p; 1024 r = (struct rpcrdma_mw *)p;
993 switch (ia->ri_memreg_strategy) { 1025 switch (ia->ri_memreg_strategy) {
1026 case RPCRDMA_FRMR:
1027 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
1028 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1029 RPCRDMA_MAX_SEGS);
1030 if (IS_ERR(r->r.frmr.fr_mr)) {
1031 rc = PTR_ERR(r->r.frmr.fr_mr);
1032 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1033 " failed %i\n", __func__, rc);
1034 goto out;
1035 }
1036 r->r.frmr.fr_pgl =
1037 ib_alloc_fast_reg_page_list(ia->ri_id->device,
1038 RPCRDMA_MAX_SEGS);
1039 if (IS_ERR(r->r.frmr.fr_pgl)) {
1040 rc = PTR_ERR(r->r.frmr.fr_pgl);
1041 dprintk("RPC: %s: "
1042 "ib_alloc_fast_reg_page_list "
1043 "failed %i\n", __func__, rc);
1044 goto out;
1045 }
1046 list_add(&r->mw_list, &buf->rb_mws);
1047 ++r;
1048 }
1049 break;
994 case RPCRDMA_MTHCAFMR: 1050 case RPCRDMA_MTHCAFMR:
995 /* TBD we are perhaps overallocating here */ 1051 /* TBD we are perhaps overallocating here */
996 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { 1052 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
@@ -1126,6 +1182,15 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1126 struct rpcrdma_mw, mw_list); 1182 struct rpcrdma_mw, mw_list);
1127 list_del(&r->mw_list); 1183 list_del(&r->mw_list);
1128 switch (ia->ri_memreg_strategy) { 1184 switch (ia->ri_memreg_strategy) {
1185 case RPCRDMA_FRMR:
1186 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1187 if (rc)
1188 dprintk("RPC: %s:"
1189 " ib_dereg_mr"
1190 " failed %i\n",
1191 __func__, rc);
1192 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1193 break;
1129 case RPCRDMA_MTHCAFMR: 1194 case RPCRDMA_MTHCAFMR:
1130 rc = ib_dealloc_fmr(r->r.fmr); 1195 rc = ib_dealloc_fmr(r->r.fmr);
1131 if (rc) 1196 if (rc)
@@ -1228,6 +1293,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
1228 req->rl_reply = NULL; 1293 req->rl_reply = NULL;
1229 } 1294 }
1230 switch (ia->ri_memreg_strategy) { 1295 switch (ia->ri_memreg_strategy) {
1296 case RPCRDMA_FRMR:
1231 case RPCRDMA_MTHCAFMR: 1297 case RPCRDMA_MTHCAFMR:
1232 case RPCRDMA_MEMWINDOWS_ASYNC: 1298 case RPCRDMA_MEMWINDOWS_ASYNC:
1233 case RPCRDMA_MEMWINDOWS: 1299 case RPCRDMA_MEMWINDOWS:
@@ -1391,6 +1457,96 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1391} 1457}
1392 1458
1393static int 1459static int
1460rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1461 int *nsegs, int writing, struct rpcrdma_ia *ia,
1462 struct rpcrdma_xprt *r_xprt)
1463{
1464 struct rpcrdma_mr_seg *seg1 = seg;
1465 struct ib_send_wr frmr_wr, *bad_wr;
1466 u8 key;
1467 int len, pageoff;
1468 int i, rc;
1469
1470 pageoff = offset_in_page(seg1->mr_offset);
1471 seg1->mr_offset -= pageoff; /* start of page */
1472 seg1->mr_len += pageoff;
1473 len = -pageoff;
1474 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1475 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1476 for (i = 0; i < *nsegs;) {
1477 rpcrdma_map_one(ia, seg, writing);
1478 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
1479 len += seg->mr_len;
1480 ++seg;
1481 ++i;
1482 /* Check for holes */
1483 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1484 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1485 break;
1486 }
1487 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1488 __func__, seg1->mr_chunk.rl_mw, i);
1489
1490 /* Bump the key */
1491 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1492 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1493
1494 /* Prepare FRMR WR */
1495 memset(&frmr_wr, 0, sizeof frmr_wr);
1496 frmr_wr.opcode = IB_WR_FAST_REG_MR;
1497 frmr_wr.send_flags = 0; /* unsignaled */
1498 frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma;
1499 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
1500 frmr_wr.wr.fast_reg.page_list_len = i;
1501 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1502 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
1503 frmr_wr.wr.fast_reg.access_flags = (writing ?
1504 IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ);
1505 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1506 DECR_CQCOUNT(&r_xprt->rx_ep);
1507
1508 rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr);
1509
1510 if (rc) {
1511 dprintk("RPC: %s: failed ib_post_send for register,"
1512 " status %i\n", __func__, rc);
1513 while (i--)
1514 rpcrdma_unmap_one(ia, --seg);
1515 } else {
1516 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1517 seg1->mr_base = seg1->mr_dma + pageoff;
1518 seg1->mr_nsegs = i;
1519 seg1->mr_len = len;
1520 }
1521 *nsegs = i;
1522 return rc;
1523}
1524
1525static int
1526rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1527 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1528{
1529 struct rpcrdma_mr_seg *seg1 = seg;
1530 struct ib_send_wr invalidate_wr, *bad_wr;
1531 int rc;
1532
1533 while (seg1->mr_nsegs--)
1534 rpcrdma_unmap_one(ia, seg++);
1535
1536 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1537 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1538 invalidate_wr.send_flags = 0; /* unsignaled */
1539 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1540 DECR_CQCOUNT(&r_xprt->rx_ep);
1541
1542 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1543 if (rc)
1544 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1545 " status %i\n", __func__, rc);
1546 return rc;
1547}
1548
1549static int
1394rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, 1550rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1395 int *nsegs, int writing, struct rpcrdma_ia *ia) 1551 int *nsegs, int writing, struct rpcrdma_ia *ia)
1396{ 1552{
@@ -1600,6 +1756,11 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1600 break; 1756 break;
1601#endif 1757#endif
1602 1758
1759 /* Registration using frmr registration */
1760 case RPCRDMA_FRMR:
1761 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1762 break;
1763
1603 /* Registration using fmr memory registration */ 1764 /* Registration using fmr memory registration */
1604 case RPCRDMA_MTHCAFMR: 1765 case RPCRDMA_MTHCAFMR:
1605 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); 1766 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
@@ -1639,6 +1800,10 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
1639 break; 1800 break;
1640#endif 1801#endif
1641 1802
1803 case RPCRDMA_FRMR:
1804 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1805 break;
1806
1642 case RPCRDMA_MTHCAFMR: 1807 case RPCRDMA_MTHCAFMR:
1643 rc = rpcrdma_deregister_fmr_external(seg, ia); 1808 rc = rpcrdma_deregister_fmr_external(seg, ia);
1644 break; 1809 break;