aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--net/sunrpc/xprtrdma/transport.c6
-rw-r--r--net/sunrpc/xprtrdma/verbs.c167
2 files changed, 167 insertions, 6 deletions
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a564c1a39ec5..89970b0a4cc9 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -70,11 +70,7 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
70static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; 70static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
71static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; 71static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
72static unsigned int xprt_rdma_inline_write_padding; 72static unsigned int xprt_rdma_inline_write_padding;
73#if !RPCRDMA_PERSISTENT_REGISTRATION 73static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
74static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */
75#else
76static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL;
77#endif
78 74
79#ifdef RPC_DEBUG 75#ifdef RPC_DEBUG
80 76
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 0f3b43148b7f..39a165202d8f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -488,6 +488,26 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
488#endif 488#endif
489 } 489 }
490 break; 490 break;
491 case RPCRDMA_FRMR:
492 /* Requires both frmr reg and local dma lkey */
493 if ((devattr.device_cap_flags &
494 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
495 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
496#if RPCRDMA_PERSISTENT_REGISTRATION
497 dprintk("RPC: %s: FRMR registration "
498 "specified but not supported by adapter, "
499 "using riskier RPCRDMA_ALLPHYSICAL\n",
500 __func__);
501 memreg = RPCRDMA_ALLPHYSICAL;
502#else
503 dprintk("RPC: %s: FRMR registration "
504 "specified but not supported by adapter, "
505 "using slower RPCRDMA_REGISTER\n",
506 __func__);
507 memreg = RPCRDMA_REGISTER;
508#endif
509 }
510 break;
491 } 511 }
492 512
493 /* 513 /*
@@ -501,6 +521,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
501 switch (memreg) { 521 switch (memreg) {
502 case RPCRDMA_BOUNCEBUFFERS: 522 case RPCRDMA_BOUNCEBUFFERS:
503 case RPCRDMA_REGISTER: 523 case RPCRDMA_REGISTER:
524 case RPCRDMA_FRMR:
504 break; 525 break;
505#if RPCRDMA_PERSISTENT_REGISTRATION 526#if RPCRDMA_PERSISTENT_REGISTRATION
506 case RPCRDMA_ALLPHYSICAL: 527 case RPCRDMA_ALLPHYSICAL:
@@ -602,6 +623,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
602 ep->rep_attr.srq = NULL; 623 ep->rep_attr.srq = NULL;
603 ep->rep_attr.cap.max_send_wr = cdata->max_requests; 624 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
604 switch (ia->ri_memreg_strategy) { 625 switch (ia->ri_memreg_strategy) {
626 case RPCRDMA_FRMR:
627 /* Add room for frmr register and invalidate WRs */
628 ep->rep_attr.cap.max_send_wr *= 3;
629 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
630 return -EINVAL;
631 break;
605 case RPCRDMA_MEMWINDOWS_ASYNC: 632 case RPCRDMA_MEMWINDOWS_ASYNC:
606 case RPCRDMA_MEMWINDOWS: 633 case RPCRDMA_MEMWINDOWS:
607 /* Add room for mw_binds+unbinds - overkill! */ 634 /* Add room for mw_binds+unbinds - overkill! */
@@ -684,6 +711,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
684 break; 711 break;
685 case RPCRDMA_MTHCAFMR: 712 case RPCRDMA_MTHCAFMR:
686 case RPCRDMA_REGISTER: 713 case RPCRDMA_REGISTER:
714 case RPCRDMA_FRMR:
687 ep->rep_remote_cma.responder_resources = cdata->max_requests * 715 ep->rep_remote_cma.responder_resources = cdata->max_requests *
688 (RPCRDMA_MAX_DATA_SEGS / 8); 716 (RPCRDMA_MAX_DATA_SEGS / 8);
689 break; 717 break;
@@ -935,7 +963,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
935 * 2. arrays of struct rpcrdma_req to fill in pointers 963 * 2. arrays of struct rpcrdma_req to fill in pointers
936 * 3. array of struct rpcrdma_rep for replies 964 * 3. array of struct rpcrdma_rep for replies
937 * 4. padding, if any 965 * 4. padding, if any
938 * 5. mw's or fmr's, if any 966 * 5. mw's, fmr's or frmr's, if any
939 * Send/recv buffers in req/rep need to be registered 967 * Send/recv buffers in req/rep need to be registered
940 */ 968 */
941 969
@@ -943,6 +971,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
943 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); 971 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
944 len += cdata->padding; 972 len += cdata->padding;
945 switch (ia->ri_memreg_strategy) { 973 switch (ia->ri_memreg_strategy) {
974 case RPCRDMA_FRMR:
975 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
976 sizeof(struct rpcrdma_mw);
977 break;
946 case RPCRDMA_MTHCAFMR: 978 case RPCRDMA_MTHCAFMR:
947 /* TBD we are perhaps overallocating here */ 979 /* TBD we are perhaps overallocating here */
948 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * 980 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
@@ -991,6 +1023,30 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
991 INIT_LIST_HEAD(&buf->rb_mws); 1023 INIT_LIST_HEAD(&buf->rb_mws);
992 r = (struct rpcrdma_mw *)p; 1024 r = (struct rpcrdma_mw *)p;
993 switch (ia->ri_memreg_strategy) { 1025 switch (ia->ri_memreg_strategy) {
1026 case RPCRDMA_FRMR:
1027 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
1028 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1029 RPCRDMA_MAX_SEGS);
1030 if (IS_ERR(r->r.frmr.fr_mr)) {
1031 rc = PTR_ERR(r->r.frmr.fr_mr);
1032 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1033 " failed %i\n", __func__, rc);
1034 goto out;
1035 }
1036 r->r.frmr.fr_pgl =
1037 ib_alloc_fast_reg_page_list(ia->ri_id->device,
1038 RPCRDMA_MAX_SEGS);
1039 if (IS_ERR(r->r.frmr.fr_pgl)) {
1040 rc = PTR_ERR(r->r.frmr.fr_pgl);
1041 dprintk("RPC: %s: "
1042 "ib_alloc_fast_reg_page_list "
1043 "failed %i\n", __func__, rc);
1044 goto out;
1045 }
1046 list_add(&r->mw_list, &buf->rb_mws);
1047 ++r;
1048 }
1049 break;
994 case RPCRDMA_MTHCAFMR: 1050 case RPCRDMA_MTHCAFMR:
995 /* TBD we are perhaps overallocating here */ 1051 /* TBD we are perhaps overallocating here */
996 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { 1052 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
@@ -1126,6 +1182,15 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1126 struct rpcrdma_mw, mw_list); 1182 struct rpcrdma_mw, mw_list);
1127 list_del(&r->mw_list); 1183 list_del(&r->mw_list);
1128 switch (ia->ri_memreg_strategy) { 1184 switch (ia->ri_memreg_strategy) {
1185 case RPCRDMA_FRMR:
1186 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1187 if (rc)
1188 dprintk("RPC: %s:"
1189 " ib_dereg_mr"
1190 " failed %i\n",
1191 __func__, rc);
1192 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1193 break;
1129 case RPCRDMA_MTHCAFMR: 1194 case RPCRDMA_MTHCAFMR:
1130 rc = ib_dealloc_fmr(r->r.fmr); 1195 rc = ib_dealloc_fmr(r->r.fmr);
1131 if (rc) 1196 if (rc)
@@ -1228,6 +1293,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
1228 req->rl_reply = NULL; 1293 req->rl_reply = NULL;
1229 } 1294 }
1230 switch (ia->ri_memreg_strategy) { 1295 switch (ia->ri_memreg_strategy) {
1296 case RPCRDMA_FRMR:
1231 case RPCRDMA_MTHCAFMR: 1297 case RPCRDMA_MTHCAFMR:
1232 case RPCRDMA_MEMWINDOWS_ASYNC: 1298 case RPCRDMA_MEMWINDOWS_ASYNC:
1233 case RPCRDMA_MEMWINDOWS: 1299 case RPCRDMA_MEMWINDOWS:
@@ -1391,6 +1457,96 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1391} 1457}
1392 1458
1393static int 1459static int
1460rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1461 int *nsegs, int writing, struct rpcrdma_ia *ia,
1462 struct rpcrdma_xprt *r_xprt)
1463{
1464 struct rpcrdma_mr_seg *seg1 = seg;
1465 struct ib_send_wr frmr_wr, *bad_wr;
1466 u8 key;
1467 int len, pageoff;
1468 int i, rc;
1469
1470 pageoff = offset_in_page(seg1->mr_offset);
1471 seg1->mr_offset -= pageoff; /* start of page */
1472 seg1->mr_len += pageoff;
1473 len = -pageoff;
1474 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1475 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1476 for (i = 0; i < *nsegs;) {
1477 rpcrdma_map_one(ia, seg, writing);
1478 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
1479 len += seg->mr_len;
1480 ++seg;
1481 ++i;
1482 /* Check for holes */
1483 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1484 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1485 break;
1486 }
1487 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1488 __func__, seg1->mr_chunk.rl_mw, i);
1489
1490 /* Bump the key */
1491 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1492 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1493
1494 /* Prepare FRMR WR */
1495 memset(&frmr_wr, 0, sizeof frmr_wr);
1496 frmr_wr.opcode = IB_WR_FAST_REG_MR;
1497 frmr_wr.send_flags = 0; /* unsignaled */
1498 frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma;
1499 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
1500 frmr_wr.wr.fast_reg.page_list_len = i;
1501 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1502 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
1503 frmr_wr.wr.fast_reg.access_flags = (writing ?
1504 IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ);
1505 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1506 DECR_CQCOUNT(&r_xprt->rx_ep);
1507
1508 rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr);
1509
1510 if (rc) {
1511 dprintk("RPC: %s: failed ib_post_send for register,"
1512 " status %i\n", __func__, rc);
1513 while (i--)
1514 rpcrdma_unmap_one(ia, --seg);
1515 } else {
1516 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1517 seg1->mr_base = seg1->mr_dma + pageoff;
1518 seg1->mr_nsegs = i;
1519 seg1->mr_len = len;
1520 }
1521 *nsegs = i;
1522 return rc;
1523}
1524
1525static int
1526rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1527 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1528{
1529 struct rpcrdma_mr_seg *seg1 = seg;
1530 struct ib_send_wr invalidate_wr, *bad_wr;
1531 int rc;
1532
1533 while (seg1->mr_nsegs--)
1534 rpcrdma_unmap_one(ia, seg++);
1535
1536 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1537 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1538 invalidate_wr.send_flags = 0; /* unsignaled */
1539 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1540 DECR_CQCOUNT(&r_xprt->rx_ep);
1541
1542 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1543 if (rc)
1544 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1545 " status %i\n", __func__, rc);
1546 return rc;
1547}
1548
1549static int
1394rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, 1550rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1395 int *nsegs, int writing, struct rpcrdma_ia *ia) 1551 int *nsegs, int writing, struct rpcrdma_ia *ia)
1396{ 1552{
@@ -1600,6 +1756,11 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1600 break; 1756 break;
1601#endif 1757#endif
1602 1758
1759 /* Registration using frmr registration */
1760 case RPCRDMA_FRMR:
1761 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1762 break;
1763
1603 /* Registration using fmr memory registration */ 1764 /* Registration using fmr memory registration */
1604 case RPCRDMA_MTHCAFMR: 1765 case RPCRDMA_MTHCAFMR:
1605 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); 1766 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
@@ -1639,6 +1800,10 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
1639 break; 1800 break;
1640#endif 1801#endif
1641 1802
1803 case RPCRDMA_FRMR:
1804 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1805 break;
1806
1642 case RPCRDMA_MTHCAFMR: 1807 case RPCRDMA_MTHCAFMR:
1643 rc = rpcrdma_deregister_fmr_external(seg, ia); 1808 rc = rpcrdma_deregister_fmr_external(seg, ia);
1644 break; 1809 break;