aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Tucker <tom@ogc.us>2011-02-09 14:45:34 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2011-03-11 15:39:27 -0500
commit5c635e09cec0feeeb310968e51dad01040244851 (patch)
tree6f776276df4d20b221c02f776a677f8719f9a0aa
parentbd7ea31b9e8a342be76e0fe8d638343886c2d8c5 (diff)
RPCRDMA: Fix FRMR registration/invalidate handling.
When the rpc_memreg_strategy is 5, FRMR are used to map RPC data. This mode uses an FRMR to map the RPC data, then invalidates (i.e. unregisers) the data in xprt_rdma_free. These FRMR are used across connections on the same mount, i.e. if the connection goes away on an idle timeout and reconnects later, the FRMR are not destroyed and recreated. This creates a problem for transport errors because the WR that invalidate an FRMR may be flushed (i.e. fail) leaving the FRMR valid. When the FRMR is later used to map an RPC it will fail, tearing down the transport and starting over. Over time, more and more of the FRMR pool end up in the wrong state resulting in seemingly random disconnects. This fix keeps track of the FRMR state explicitly by setting it's state based on the successful completion of a reg/inv WR. If the FRMR is ever used and found to be in the wrong state, an invalidate WR is prepended, re-syncing the FRMR state and avoiding the connection loss. Signed-off-by: Tom Tucker <tom@ogc.us> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--net/sunrpc/xprtrdma/verbs.c52
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
2 files changed, 45 insertions, 8 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 5f4c7b3bc711..570f08dc0b03 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -144,6 +144,7 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
144static inline 144static inline
145void rpcrdma_event_process(struct ib_wc *wc) 145void rpcrdma_event_process(struct ib_wc *wc)
146{ 146{
147 struct rpcrdma_mw *frmr;
147 struct rpcrdma_rep *rep = 148 struct rpcrdma_rep *rep =
148 (struct rpcrdma_rep *)(unsigned long) wc->wr_id; 149 (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
149 150
@@ -154,15 +155,23 @@ void rpcrdma_event_process(struct ib_wc *wc)
154 return; 155 return;
155 156
156 if (IB_WC_SUCCESS != wc->status) { 157 if (IB_WC_SUCCESS != wc->status) {
157 dprintk("RPC: %s: %s WC status %X, connection lost\n", 158 dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
158 __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send", 159 __func__, wc->opcode, wc->status);
159 wc->status);
160 rep->rr_len = ~0U; 160 rep->rr_len = ~0U;
161 rpcrdma_schedule_tasklet(rep); 161 if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
162 rpcrdma_schedule_tasklet(rep);
162 return; 163 return;
163 } 164 }
164 165
165 switch (wc->opcode) { 166 switch (wc->opcode) {
167 case IB_WC_FAST_REG_MR:
168 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
169 frmr->r.frmr.state = FRMR_IS_VALID;
170 break;
171 case IB_WC_LOCAL_INV:
172 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
173 frmr->r.frmr.state = FRMR_IS_INVALID;
174 break;
166 case IB_WC_RECV: 175 case IB_WC_RECV:
167 rep->rr_len = wc->byte_len; 176 rep->rr_len = wc->byte_len;
168 ib_dma_sync_single_for_cpu( 177 ib_dma_sync_single_for_cpu(
@@ -1450,6 +1459,11 @@ rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1450 seg->mr_dma = ib_dma_map_single(ia->ri_id->device, 1459 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1451 seg->mr_offset, 1460 seg->mr_offset,
1452 seg->mr_dmalen, seg->mr_dir); 1461 seg->mr_dmalen, seg->mr_dir);
1462 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1463 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1464 __func__,
1465 seg->mr_dma, seg->mr_offset, seg->mr_dmalen);
1466 }
1453} 1467}
1454 1468
1455static void 1469static void
@@ -1469,7 +1483,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1469 struct rpcrdma_xprt *r_xprt) 1483 struct rpcrdma_xprt *r_xprt)
1470{ 1484{
1471 struct rpcrdma_mr_seg *seg1 = seg; 1485 struct rpcrdma_mr_seg *seg1 = seg;
1472 struct ib_send_wr frmr_wr, *bad_wr; 1486 struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
1487
1473 u8 key; 1488 u8 key;
1474 int len, pageoff; 1489 int len, pageoff;
1475 int i, rc; 1490 int i, rc;
@@ -1484,6 +1499,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1484 rpcrdma_map_one(ia, seg, writing); 1499 rpcrdma_map_one(ia, seg, writing);
1485 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; 1500 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
1486 len += seg->mr_len; 1501 len += seg->mr_len;
1502 BUG_ON(seg->mr_len > PAGE_SIZE);
1487 ++seg; 1503 ++seg;
1488 ++i; 1504 ++i;
1489 /* Check for holes */ 1505 /* Check for holes */
@@ -1494,26 +1510,45 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1494 dprintk("RPC: %s: Using frmr %p to map %d segments\n", 1510 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1495 __func__, seg1->mr_chunk.rl_mw, i); 1511 __func__, seg1->mr_chunk.rl_mw, i);
1496 1512
1513 if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
1514 dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
1515 __func__,
1516 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
1517 /* Invalidate before using. */
1518 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1519 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1520 invalidate_wr.next = &frmr_wr;
1521 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1522 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1523 invalidate_wr.ex.invalidate_rkey =
1524 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1525 DECR_CQCOUNT(&r_xprt->rx_ep);
1526 post_wr = &invalidate_wr;
1527 } else
1528 post_wr = &frmr_wr;
1529
1497 /* Bump the key */ 1530 /* Bump the key */
1498 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); 1531 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1499 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); 1532 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1500 1533
1501 /* Prepare FRMR WR */ 1534 /* Prepare FRMR WR */
1502 memset(&frmr_wr, 0, sizeof frmr_wr); 1535 memset(&frmr_wr, 0, sizeof frmr_wr);
1536 frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1503 frmr_wr.opcode = IB_WR_FAST_REG_MR; 1537 frmr_wr.opcode = IB_WR_FAST_REG_MR;
1504 frmr_wr.send_flags = 0; /* unsignaled */ 1538 frmr_wr.send_flags = IB_SEND_SIGNALED;
1505 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; 1539 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1506 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; 1540 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
1507 frmr_wr.wr.fast_reg.page_list_len = i; 1541 frmr_wr.wr.fast_reg.page_list_len = i;
1508 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 1542 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1509 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; 1543 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
1544 BUG_ON(frmr_wr.wr.fast_reg.length < len);
1510 frmr_wr.wr.fast_reg.access_flags = (writing ? 1545 frmr_wr.wr.fast_reg.access_flags = (writing ?
1511 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 1546 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1512 IB_ACCESS_REMOTE_READ); 1547 IB_ACCESS_REMOTE_READ);
1513 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1548 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1514 DECR_CQCOUNT(&r_xprt->rx_ep); 1549 DECR_CQCOUNT(&r_xprt->rx_ep);
1515 1550
1516 rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); 1551 rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
1517 1552
1518 if (rc) { 1553 if (rc) {
1519 dprintk("RPC: %s: failed ib_post_send for register," 1554 dprintk("RPC: %s: failed ib_post_send for register,"
@@ -1542,8 +1577,9 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1542 rpcrdma_unmap_one(ia, seg++); 1577 rpcrdma_unmap_one(ia, seg++);
1543 1578
1544 memset(&invalidate_wr, 0, sizeof invalidate_wr); 1579 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1580 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1545 invalidate_wr.opcode = IB_WR_LOCAL_INV; 1581 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1546 invalidate_wr.send_flags = 0; /* unsignaled */ 1582 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1547 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1583 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1548 DECR_CQCOUNT(&r_xprt->rx_ep); 1584 DECR_CQCOUNT(&r_xprt->rx_ep);
1549 1585
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c7a7eba991bc..cae761a8536c 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -164,6 +164,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
164 struct { 164 struct {
165 struct ib_fast_reg_page_list *fr_pgl; 165 struct ib_fast_reg_page_list *fr_pgl;
166 struct ib_mr *fr_mr; 166 struct ib_mr *fr_mr;
167 enum { FRMR_IS_INVALID, FRMR_IS_VALID } state;
167 } frmr; 168 } frmr;
168 } r; 169 } r;
169 struct list_head mw_list; 170 struct list_head mw_list;