aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2018-05-07 15:28:09 -0400
committerJ. Bruce Fields <bfields@redhat.com>2018-05-11 15:48:57 -0400
commit25fd86eca11c26bad2aede6dd4709ff58f89c7cb (patch)
tree924d70a458777408f56a2194a87a6bb14f825644
parent4201c7464753827803366b40e82eb050c04ebdef (diff)
svcrdma: Don't overrun the SGE array in svc_rdma_send_ctxt
Receive buffers are always the same size, but each Send WR has a variable number of SGEs, based on the contents of the xdr_buf being sent. While assembling a Send WR, keep track of the number of SGEs so that we don't exceed the device's maximum, or walk off the end of the Send SGE array. For now the Send path just fails if it exceeds the maximum. The current logic in svc_rdma_accept bases the maximum number of Send SGEs on the largest NFS request that can be sent or received. In the transport layer, the limit is actually based on the capabilities of the underlying device, not on properties of the Upper Layer Protocol. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
-rw-r--r--include/linux/sunrpc/svc_rdma.h9
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c36
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c13
3 files changed, 33 insertions, 25 deletions
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index d3e2bb331264..bfb8824e31e1 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -96,7 +96,7 @@ struct svcxprt_rdma {
96 struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ 96 struct rdma_cm_id *sc_cm_id; /* RDMA connection id */
97 struct list_head sc_accept_q; /* Conn. waiting accept */ 97 struct list_head sc_accept_q; /* Conn. waiting accept */
98 int sc_ord; /* RDMA read limit */ 98 int sc_ord; /* RDMA read limit */
99 int sc_max_sge; 99 int sc_max_send_sges;
100 bool sc_snd_w_inv; /* OK to use Send With Invalidate */ 100 bool sc_snd_w_inv; /* OK to use Send With Invalidate */
101 101
102 atomic_t sc_sq_avail; /* SQEs ready to be consumed */ 102 atomic_t sc_sq_avail; /* SQEs ready to be consumed */
@@ -158,17 +158,14 @@ struct svc_rdma_recv_ctxt {
158 struct page *rc_pages[RPCSVC_MAXPAGES]; 158 struct page *rc_pages[RPCSVC_MAXPAGES];
159}; 159};
160 160
161enum {
162 RPCRDMA_MAX_SGES = 1 + (RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE),
163};
164
165struct svc_rdma_send_ctxt { 161struct svc_rdma_send_ctxt {
166 struct list_head sc_list; 162 struct list_head sc_list;
167 struct ib_send_wr sc_send_wr; 163 struct ib_send_wr sc_send_wr;
168 struct ib_cqe sc_cqe; 164 struct ib_cqe sc_cqe;
169 int sc_page_count; 165 int sc_page_count;
166 int sc_cur_sge_no;
170 struct page *sc_pages[RPCSVC_MAXPAGES]; 167 struct page *sc_pages[RPCSVC_MAXPAGES];
171 struct ib_sge sc_sges[RPCRDMA_MAX_SGES]; 168 struct ib_sge sc_sges[];
172}; 169};
173 170
174/* svc_rdma_backchannel.c */ 171/* svc_rdma_backchannel.c */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b286d6a6e429..53d8db6bfaf2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -127,9 +127,12 @@ static struct svc_rdma_send_ctxt *
127svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) 127svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
128{ 128{
129 struct svc_rdma_send_ctxt *ctxt; 129 struct svc_rdma_send_ctxt *ctxt;
130 size_t size;
130 int i; 131 int i;
131 132
132 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); 133 size = sizeof(*ctxt);
134 size += rdma->sc_max_send_sges * sizeof(struct ib_sge);
135 ctxt = kmalloc(size, GFP_KERNEL);
133 if (!ctxt) 136 if (!ctxt)
134 return NULL; 137 return NULL;
135 138
@@ -138,7 +141,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
138 ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; 141 ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
139 ctxt->sc_send_wr.sg_list = ctxt->sc_sges; 142 ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
140 ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; 143 ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
141 for (i = 0; i < ARRAY_SIZE(ctxt->sc_sges); i++) 144 for (i = 0; i < rdma->sc_max_send_sges; i++)
142 ctxt->sc_sges[i].lkey = rdma->sc_pd->local_dma_lkey; 145 ctxt->sc_sges[i].lkey = rdma->sc_pd->local_dma_lkey;
143 return ctxt; 146 return ctxt;
144} 147}
@@ -482,7 +485,6 @@ static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp,
482 485
483static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, 486static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
484 struct svc_rdma_send_ctxt *ctxt, 487 struct svc_rdma_send_ctxt *ctxt,
485 unsigned int sge_no,
486 struct page *page, 488 struct page *page,
487 unsigned long offset, 489 unsigned long offset,
488 unsigned int len) 490 unsigned int len)
@@ -494,8 +496,8 @@ static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
494 if (ib_dma_mapping_error(dev, dma_addr)) 496 if (ib_dma_mapping_error(dev, dma_addr))
495 goto out_maperr; 497 goto out_maperr;
496 498
497 ctxt->sc_sges[sge_no].addr = dma_addr; 499 ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr;
498 ctxt->sc_sges[sge_no].length = len; 500 ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len;
499 ctxt->sc_send_wr.num_sge++; 501 ctxt->sc_send_wr.num_sge++;
500 return 0; 502 return 0;
501 503
@@ -509,11 +511,10 @@ out_maperr:
509 */ 511 */
510static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma, 512static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
511 struct svc_rdma_send_ctxt *ctxt, 513 struct svc_rdma_send_ctxt *ctxt,
512 unsigned int sge_no,
513 unsigned char *base, 514 unsigned char *base,
514 unsigned int len) 515 unsigned int len)
515{ 516{
516 return svc_rdma_dma_map_page(rdma, ctxt, sge_no, virt_to_page(base), 517 return svc_rdma_dma_map_page(rdma, ctxt, virt_to_page(base),
517 offset_in_page(base), len); 518 offset_in_page(base), len);
518} 519}
519 520
@@ -535,7 +536,8 @@ int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma,
535{ 536{
536 ctxt->sc_pages[0] = virt_to_page(rdma_resp); 537 ctxt->sc_pages[0] = virt_to_page(rdma_resp);
537 ctxt->sc_page_count++; 538 ctxt->sc_page_count++;
538 return svc_rdma_dma_map_page(rdma, ctxt, 0, ctxt->sc_pages[0], 0, len); 539 ctxt->sc_cur_sge_no = 0;
540 return svc_rdma_dma_map_page(rdma, ctxt, ctxt->sc_pages[0], 0, len);
539} 541}
540 542
541/* Load the xdr_buf into the ctxt's sge array, and DMA map each 543/* Load the xdr_buf into the ctxt's sge array, and DMA map each
@@ -547,16 +549,16 @@ static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
547 struct svc_rdma_send_ctxt *ctxt, 549 struct svc_rdma_send_ctxt *ctxt,
548 struct xdr_buf *xdr, __be32 *wr_lst) 550 struct xdr_buf *xdr, __be32 *wr_lst)
549{ 551{
550 unsigned int len, sge_no, remaining; 552 unsigned int len, remaining;
551 unsigned long page_off; 553 unsigned long page_off;
552 struct page **ppages; 554 struct page **ppages;
553 unsigned char *base; 555 unsigned char *base;
554 u32 xdr_pad; 556 u32 xdr_pad;
555 int ret; 557 int ret;
556 558
557 sge_no = 1; 559 if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
558 560 return -EIO;
559 ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, 561 ret = svc_rdma_dma_map_buf(rdma, ctxt,
560 xdr->head[0].iov_base, 562 xdr->head[0].iov_base,
561 xdr->head[0].iov_len); 563 xdr->head[0].iov_len);
562 if (ret < 0) 564 if (ret < 0)
@@ -586,8 +588,10 @@ static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
586 while (remaining) { 588 while (remaining) {
587 len = min_t(u32, PAGE_SIZE - page_off, remaining); 589 len = min_t(u32, PAGE_SIZE - page_off, remaining);
588 590
589 ret = svc_rdma_dma_map_page(rdma, ctxt, sge_no++, 591 if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
590 *ppages++, page_off, len); 592 return -EIO;
593 ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++,
594 page_off, len);
591 if (ret < 0) 595 if (ret < 0)
592 return ret; 596 return ret;
593 597
@@ -599,7 +603,9 @@ static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
599 len = xdr->tail[0].iov_len; 603 len = xdr->tail[0].iov_len;
600tail: 604tail:
601 if (len) { 605 if (len) {
602 ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, base, len); 606 if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
607 return -EIO;
608 ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len);
603 if (ret < 0) 609 if (ret < 0)
604 return ret; 610 return ret;
605 } 611 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 3de81735a6cc..e9535a66bab0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -476,8 +476,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
476 476
477 /* Qualify the transport resource defaults with the 477 /* Qualify the transport resource defaults with the
478 * capabilities of this particular device */ 478 * capabilities of this particular device */
479 newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge, 479 newxprt->sc_max_send_sges = dev->attrs.max_sge;
480 (size_t)RPCSVC_MAXPAGES); 480 /* transport hdr, head iovec, one page list entry, tail iovec */
481 if (newxprt->sc_max_send_sges < 4) {
482 pr_err("svcrdma: too few Send SGEs available (%d)\n",
483 newxprt->sc_max_send_sges);
484 goto errout;
485 }
481 newxprt->sc_max_req_size = svcrdma_max_req_size; 486 newxprt->sc_max_req_size = svcrdma_max_req_size;
482 newxprt->sc_max_requests = svcrdma_max_requests; 487 newxprt->sc_max_requests = svcrdma_max_requests;
483 newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; 488 newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
@@ -525,7 +530,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
525 qp_attr.cap.max_rdma_ctxs = ctxts; 530 qp_attr.cap.max_rdma_ctxs = ctxts;
526 qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts; 531 qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts;
527 qp_attr.cap.max_recv_wr = rq_depth; 532 qp_attr.cap.max_recv_wr = rq_depth;
528 qp_attr.cap.max_send_sge = newxprt->sc_max_sge; 533 qp_attr.cap.max_send_sge = newxprt->sc_max_send_sges;
529 qp_attr.cap.max_recv_sge = 1; 534 qp_attr.cap.max_recv_sge = 1;
530 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 535 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
531 qp_attr.qp_type = IB_QPT_RC; 536 qp_attr.qp_type = IB_QPT_RC;
@@ -586,7 +591,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
586 dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap)); 591 dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
587 sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; 592 sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
588 dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap)); 593 dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap));
589 dprintk(" max_sge : %d\n", newxprt->sc_max_sge); 594 dprintk(" max_sge : %d\n", newxprt->sc_max_send_sges);
590 dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth); 595 dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
591 dprintk(" rdma_rw_ctxs : %d\n", ctxts); 596 dprintk(" rdma_rw_ctxs : %d\n", ctxts);
592 dprintk(" max_requests : %d\n", newxprt->sc_max_requests); 597 dprintk(" max_requests : %d\n", newxprt->sc_max_requests);