diff options
-rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 6 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 167 |
2 files changed, 167 insertions, 6 deletions
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index a564c1a39ec5..89970b0a4cc9 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -70,11 +70,7 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; | |||
70 | static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; | 70 | static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; |
71 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; | 71 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
72 | static unsigned int xprt_rdma_inline_write_padding; | 72 | static unsigned int xprt_rdma_inline_write_padding; |
73 | #if !RPCRDMA_PERSISTENT_REGISTRATION | 73 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; |
74 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ | ||
75 | #else | ||
76 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; | ||
77 | #endif | ||
78 | 74 | ||
79 | #ifdef RPC_DEBUG | 75 | #ifdef RPC_DEBUG |
80 | 76 | ||
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 0f3b43148b7f..39a165202d8f 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -488,6 +488,26 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
488 | #endif | 488 | #endif |
489 | } | 489 | } |
490 | break; | 490 | break; |
491 | case RPCRDMA_FRMR: | ||
492 | /* Requires both frmr reg and local dma lkey */ | ||
493 | if ((devattr.device_cap_flags & | ||
494 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | ||
495 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | ||
496 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
497 | dprintk("RPC: %s: FRMR registration " | ||
498 | "specified but not supported by adapter, " | ||
499 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
500 | __func__); | ||
501 | memreg = RPCRDMA_ALLPHYSICAL; | ||
502 | #else | ||
503 | dprintk("RPC: %s: FRMR registration " | ||
504 | "specified but not supported by adapter, " | ||
505 | "using slower RPCRDMA_REGISTER\n", | ||
506 | __func__); | ||
507 | memreg = RPCRDMA_REGISTER; | ||
508 | #endif | ||
509 | } | ||
510 | break; | ||
491 | } | 511 | } |
492 | 512 | ||
493 | /* | 513 | /* |
@@ -501,6 +521,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
501 | switch (memreg) { | 521 | switch (memreg) { |
502 | case RPCRDMA_BOUNCEBUFFERS: | 522 | case RPCRDMA_BOUNCEBUFFERS: |
503 | case RPCRDMA_REGISTER: | 523 | case RPCRDMA_REGISTER: |
524 | case RPCRDMA_FRMR: | ||
504 | break; | 525 | break; |
505 | #if RPCRDMA_PERSISTENT_REGISTRATION | 526 | #if RPCRDMA_PERSISTENT_REGISTRATION |
506 | case RPCRDMA_ALLPHYSICAL: | 527 | case RPCRDMA_ALLPHYSICAL: |
@@ -602,6 +623,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
602 | ep->rep_attr.srq = NULL; | 623 | ep->rep_attr.srq = NULL; |
603 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 624 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
604 | switch (ia->ri_memreg_strategy) { | 625 | switch (ia->ri_memreg_strategy) { |
626 | case RPCRDMA_FRMR: | ||
627 | /* Add room for frmr register and invalidate WRs */ | ||
628 | ep->rep_attr.cap.max_send_wr *= 3; | ||
629 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) | ||
630 | return -EINVAL; | ||
631 | break; | ||
605 | case RPCRDMA_MEMWINDOWS_ASYNC: | 632 | case RPCRDMA_MEMWINDOWS_ASYNC: |
606 | case RPCRDMA_MEMWINDOWS: | 633 | case RPCRDMA_MEMWINDOWS: |
607 | /* Add room for mw_binds+unbinds - overkill! */ | 634 | /* Add room for mw_binds+unbinds - overkill! */ |
@@ -684,6 +711,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
684 | break; | 711 | break; |
685 | case RPCRDMA_MTHCAFMR: | 712 | case RPCRDMA_MTHCAFMR: |
686 | case RPCRDMA_REGISTER: | 713 | case RPCRDMA_REGISTER: |
714 | case RPCRDMA_FRMR: | ||
687 | ep->rep_remote_cma.responder_resources = cdata->max_requests * | 715 | ep->rep_remote_cma.responder_resources = cdata->max_requests * |
688 | (RPCRDMA_MAX_DATA_SEGS / 8); | 716 | (RPCRDMA_MAX_DATA_SEGS / 8); |
689 | break; | 717 | break; |
@@ -935,7 +963,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
935 | * 2. arrays of struct rpcrdma_req to fill in pointers | 963 | * 2. arrays of struct rpcrdma_req to fill in pointers |
936 | * 3. array of struct rpcrdma_rep for replies | 964 | * 3. array of struct rpcrdma_rep for replies |
937 | * 4. padding, if any | 965 | * 4. padding, if any |
938 | * 5. mw's or fmr's, if any | 966 | * 5. mw's, fmr's or frmr's, if any |
939 | * Send/recv buffers in req/rep need to be registered | 967 | * Send/recv buffers in req/rep need to be registered |
940 | */ | 968 | */ |
941 | 969 | ||
@@ -943,6 +971,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
943 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); | 971 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); |
944 | len += cdata->padding; | 972 | len += cdata->padding; |
945 | switch (ia->ri_memreg_strategy) { | 973 | switch (ia->ri_memreg_strategy) { |
974 | case RPCRDMA_FRMR: | ||
975 | len += buf->rb_max_requests * RPCRDMA_MAX_SEGS * | ||
976 | sizeof(struct rpcrdma_mw); | ||
977 | break; | ||
946 | case RPCRDMA_MTHCAFMR: | 978 | case RPCRDMA_MTHCAFMR: |
947 | /* TBD we are perhaps overallocating here */ | 979 | /* TBD we are perhaps overallocating here */ |
948 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | 980 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * |
@@ -991,6 +1023,30 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
991 | INIT_LIST_HEAD(&buf->rb_mws); | 1023 | INIT_LIST_HEAD(&buf->rb_mws); |
992 | r = (struct rpcrdma_mw *)p; | 1024 | r = (struct rpcrdma_mw *)p; |
993 | switch (ia->ri_memreg_strategy) { | 1025 | switch (ia->ri_memreg_strategy) { |
1026 | case RPCRDMA_FRMR: | ||
1027 | for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { | ||
1028 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1029 | RPCRDMA_MAX_SEGS); | ||
1030 | if (IS_ERR(r->r.frmr.fr_mr)) { | ||
1031 | rc = PTR_ERR(r->r.frmr.fr_mr); | ||
1032 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | ||
1033 | " failed %i\n", __func__, rc); | ||
1034 | goto out; | ||
1035 | } | ||
1036 | r->r.frmr.fr_pgl = | ||
1037 | ib_alloc_fast_reg_page_list(ia->ri_id->device, | ||
1038 | RPCRDMA_MAX_SEGS); | ||
1039 | if (IS_ERR(r->r.frmr.fr_pgl)) { | ||
1040 | rc = PTR_ERR(r->r.frmr.fr_pgl); | ||
1041 | dprintk("RPC: %s: " | ||
1042 | "ib_alloc_fast_reg_page_list " | ||
1043 | "failed %i\n", __func__, rc); | ||
1044 | goto out; | ||
1045 | } | ||
1046 | list_add(&r->mw_list, &buf->rb_mws); | ||
1047 | ++r; | ||
1048 | } | ||
1049 | break; | ||
994 | case RPCRDMA_MTHCAFMR: | 1050 | case RPCRDMA_MTHCAFMR: |
995 | /* TBD we are perhaps overallocating here */ | 1051 | /* TBD we are perhaps overallocating here */ |
996 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | 1052 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { |
@@ -1126,6 +1182,15 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1126 | struct rpcrdma_mw, mw_list); | 1182 | struct rpcrdma_mw, mw_list); |
1127 | list_del(&r->mw_list); | 1183 | list_del(&r->mw_list); |
1128 | switch (ia->ri_memreg_strategy) { | 1184 | switch (ia->ri_memreg_strategy) { |
1185 | case RPCRDMA_FRMR: | ||
1186 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1187 | if (rc) | ||
1188 | dprintk("RPC: %s:" | ||
1189 | " ib_dereg_mr" | ||
1190 | " failed %i\n", | ||
1191 | __func__, rc); | ||
1192 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1193 | break; | ||
1129 | case RPCRDMA_MTHCAFMR: | 1194 | case RPCRDMA_MTHCAFMR: |
1130 | rc = ib_dealloc_fmr(r->r.fmr); | 1195 | rc = ib_dealloc_fmr(r->r.fmr); |
1131 | if (rc) | 1196 | if (rc) |
@@ -1228,6 +1293,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) | |||
1228 | req->rl_reply = NULL; | 1293 | req->rl_reply = NULL; |
1229 | } | 1294 | } |
1230 | switch (ia->ri_memreg_strategy) { | 1295 | switch (ia->ri_memreg_strategy) { |
1296 | case RPCRDMA_FRMR: | ||
1231 | case RPCRDMA_MTHCAFMR: | 1297 | case RPCRDMA_MTHCAFMR: |
1232 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1298 | case RPCRDMA_MEMWINDOWS_ASYNC: |
1233 | case RPCRDMA_MEMWINDOWS: | 1299 | case RPCRDMA_MEMWINDOWS: |
@@ -1391,6 +1457,96 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) | |||
1391 | } | 1457 | } |
1392 | 1458 | ||
1393 | static int | 1459 | static int |
1460 | rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1461 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1462 | struct rpcrdma_xprt *r_xprt) | ||
1463 | { | ||
1464 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1465 | struct ib_send_wr frmr_wr, *bad_wr; | ||
1466 | u8 key; | ||
1467 | int len, pageoff; | ||
1468 | int i, rc; | ||
1469 | |||
1470 | pageoff = offset_in_page(seg1->mr_offset); | ||
1471 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1472 | seg1->mr_len += pageoff; | ||
1473 | len = -pageoff; | ||
1474 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1475 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1476 | for (i = 0; i < *nsegs;) { | ||
1477 | rpcrdma_map_one(ia, seg, writing); | ||
1478 | seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; | ||
1479 | len += seg->mr_len; | ||
1480 | ++seg; | ||
1481 | ++i; | ||
1482 | /* Check for holes */ | ||
1483 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1484 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
1485 | break; | ||
1486 | } | ||
1487 | dprintk("RPC: %s: Using frmr %p to map %d segments\n", | ||
1488 | __func__, seg1->mr_chunk.rl_mw, i); | ||
1489 | |||
1490 | /* Bump the key */ | ||
1491 | key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); | ||
1492 | ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); | ||
1493 | |||
1494 | /* Prepare FRMR WR */ | ||
1495 | memset(&frmr_wr, 0, sizeof frmr_wr); | ||
1496 | frmr_wr.opcode = IB_WR_FAST_REG_MR; | ||
1497 | frmr_wr.send_flags = 0; /* unsignaled */ | ||
1498 | frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma; | ||
1499 | frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; | ||
1500 | frmr_wr.wr.fast_reg.page_list_len = i; | ||
1501 | frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
1502 | frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; | ||
1503 | frmr_wr.wr.fast_reg.access_flags = (writing ? | ||
1504 | IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); | ||
1505 | frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1506 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1507 | |||
1508 | rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); | ||
1509 | |||
1510 | if (rc) { | ||
1511 | dprintk("RPC: %s: failed ib_post_send for register," | ||
1512 | " status %i\n", __func__, rc); | ||
1513 | while (i--) | ||
1514 | rpcrdma_unmap_one(ia, --seg); | ||
1515 | } else { | ||
1516 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1517 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1518 | seg1->mr_nsegs = i; | ||
1519 | seg1->mr_len = len; | ||
1520 | } | ||
1521 | *nsegs = i; | ||
1522 | return rc; | ||
1523 | } | ||
1524 | |||
1525 | static int | ||
1526 | rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1527 | struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) | ||
1528 | { | ||
1529 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1530 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
1531 | int rc; | ||
1532 | |||
1533 | while (seg1->mr_nsegs--) | ||
1534 | rpcrdma_unmap_one(ia, seg++); | ||
1535 | |||
1536 | memset(&invalidate_wr, 0, sizeof invalidate_wr); | ||
1537 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
1538 | invalidate_wr.send_flags = 0; /* unsignaled */ | ||
1539 | invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1540 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1541 | |||
1542 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
1543 | if (rc) | ||
1544 | dprintk("RPC: %s: failed ib_post_send for invalidate," | ||
1545 | " status %i\n", __func__, rc); | ||
1546 | return rc; | ||
1547 | } | ||
1548 | |||
1549 | static int | ||
1394 | rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, | 1550 | rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, |
1395 | int *nsegs, int writing, struct rpcrdma_ia *ia) | 1551 | int *nsegs, int writing, struct rpcrdma_ia *ia) |
1396 | { | 1552 | { |
@@ -1600,6 +1756,11 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | |||
1600 | break; | 1756 | break; |
1601 | #endif | 1757 | #endif |
1602 | 1758 | ||
1759 | /* Registration using frmr registration */ | ||
1760 | case RPCRDMA_FRMR: | ||
1761 | rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); | ||
1762 | break; | ||
1763 | |||
1603 | /* Registration using fmr memory registration */ | 1764 | /* Registration using fmr memory registration */ |
1604 | case RPCRDMA_MTHCAFMR: | 1765 | case RPCRDMA_MTHCAFMR: |
1605 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); | 1766 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); |
@@ -1639,6 +1800,10 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
1639 | break; | 1800 | break; |
1640 | #endif | 1801 | #endif |
1641 | 1802 | ||
1803 | case RPCRDMA_FRMR: | ||
1804 | rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); | ||
1805 | break; | ||
1806 | |||
1642 | case RPCRDMA_MTHCAFMR: | 1807 | case RPCRDMA_MTHCAFMR: |
1643 | rc = rpcrdma_deregister_fmr_external(seg, ia); | 1808 | rc = rpcrdma_deregister_fmr_external(seg, ia); |
1644 | break; | 1809 | break; |