aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2019-01-25 16:54:54 -0500
committerJ. Bruce Fields <bfields@redhat.com>2019-02-06 15:32:34 -0500
commite248aa7be86e8179f20ac0931774ecd746f3f5bf (patch)
treefc51afa03c6f57f52ab65b647fd88b120082d708
parente3fdc89ca47ef34dfb6fd5101fec084c3dba5486 (diff)
svcrdma: Remove max_sge check at connect time
Two and a half years ago, the client was changed to use gathered Send for larger inline messages, in commit 655fec6987b ("xprtrdma: Use gathered Send for large inline messages"). Several fixes were required because there are a few in-kernel device drivers whose max_sge is 3, and these were broken by the change. Apparently my memory is going, because some time later, I submitted commit 25fd86eca11c ("svcrdma: Don't overrun the SGE array in svc_rdma_send_ctxt"), and after that, commit f3c1fd0ee294 ("svcrdma: Reduce max_send_sges"). These too incorrectly assumed in-kernel device drivers would have more than a few Send SGEs available. The fix for the server side is not the same. This is because the fundamental problem on the server is that, whether or not the client has provisioned a chunk for the RPC reply, the server must squeeze even the most complex RPC replies into a single RDMA Send. Failing in the send path because of Send SGE exhaustion should never be an option. Therefore, instead of failing when the send path runs out of SGEs, switch to using a bounce buffer mechanism to handle RPC replies that are too complex for the device to send directly. That allows us to remove the max_sge check to enable drivers with small max_sge to work again. Reported-by: Don Dutile <ddutile@redhat.com> Fixes: 25fd86eca11c ("svcrdma: Don't overrun the SGE array in ...") Cc: stable@vger.kernel.org Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c105
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c9
2 files changed, 102 insertions, 12 deletions
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index cf51b8f9b15f..1f200119268c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -537,6 +537,99 @@ void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma,
537 DMA_TO_DEVICE); 537 DMA_TO_DEVICE);
538} 538}
539 539
540/* If the xdr_buf has more elements than the device can
541 * transmit in a single RDMA Send, then the reply will
542 * have to be copied into a bounce buffer.
543 */
544static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
545 struct xdr_buf *xdr,
546 __be32 *wr_lst)
547{
548 int elements;
549
550 /* xdr->head */
551 elements = 1;
552
553 /* xdr->pages */
554 if (!wr_lst) {
555 unsigned int remaining;
556 unsigned long pageoff;
557
558 pageoff = xdr->page_base & ~PAGE_MASK;
559 remaining = xdr->page_len;
560 while (remaining) {
561 ++elements;
562 remaining -= min_t(u32, PAGE_SIZE - pageoff,
563 remaining);
564 pageoff = 0;
565 }
566 }
567
568 /* xdr->tail */
569 if (xdr->tail[0].iov_len)
570 ++elements;
571
572 /* assume 1 SGE is needed for the transport header */
573 return elements >= rdma->sc_max_send_sges;
574}
575
576/* The device is not capable of sending the reply directly.
577 * Assemble the elements of @xdr into the transport header
578 * buffer.
579 */
580static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
581 struct svc_rdma_send_ctxt *ctxt,
582 struct xdr_buf *xdr, __be32 *wr_lst)
583{
584 unsigned char *dst, *tailbase;
585 unsigned int taillen;
586
587 dst = ctxt->sc_xprt_buf;
588 dst += ctxt->sc_sges[0].length;
589
590 memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len);
591 dst += xdr->head[0].iov_len;
592
593 tailbase = xdr->tail[0].iov_base;
594 taillen = xdr->tail[0].iov_len;
595 if (wr_lst) {
596 u32 xdrpad;
597
598 xdrpad = xdr_padsize(xdr->page_len);
599 if (taillen && xdrpad) {
600 tailbase += xdrpad;
601 taillen -= xdrpad;
602 }
603 } else {
604 unsigned int len, remaining;
605 unsigned long pageoff;
606 struct page **ppages;
607
608 ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
609 pageoff = xdr->page_base & ~PAGE_MASK;
610 remaining = xdr->page_len;
611 while (remaining) {
612 len = min_t(u32, PAGE_SIZE - pageoff, remaining);
613
614 memcpy(dst, page_address(*ppages), len);
615 remaining -= len;
616 dst += len;
617 pageoff = 0;
618 }
619 }
620
621 if (taillen)
622 memcpy(dst, tailbase, taillen);
623
624 ctxt->sc_sges[0].length += xdr->len;
625 ib_dma_sync_single_for_device(rdma->sc_pd->device,
626 ctxt->sc_sges[0].addr,
627 ctxt->sc_sges[0].length,
628 DMA_TO_DEVICE);
629
630 return 0;
631}
632
540/* svc_rdma_map_reply_msg - Map the buffer holding RPC message 633/* svc_rdma_map_reply_msg - Map the buffer holding RPC message
541 * @rdma: controlling transport 634 * @rdma: controlling transport
542 * @ctxt: send_ctxt for the Send WR 635 * @ctxt: send_ctxt for the Send WR
@@ -559,8 +652,10 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
559 u32 xdr_pad; 652 u32 xdr_pad;
560 int ret; 653 int ret;
561 654
562 if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) 655 if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst))
563 return -EIO; 656 return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst);
657
658 ++ctxt->sc_cur_sge_no;
564 ret = svc_rdma_dma_map_buf(rdma, ctxt, 659 ret = svc_rdma_dma_map_buf(rdma, ctxt,
565 xdr->head[0].iov_base, 660 xdr->head[0].iov_base,
566 xdr->head[0].iov_len); 661 xdr->head[0].iov_len);
@@ -591,8 +686,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
591 while (remaining) { 686 while (remaining) {
592 len = min_t(u32, PAGE_SIZE - page_off, remaining); 687 len = min_t(u32, PAGE_SIZE - page_off, remaining);
593 688
594 if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) 689 ++ctxt->sc_cur_sge_no;
595 return -EIO;
596 ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++, 690 ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++,
597 page_off, len); 691 page_off, len);
598 if (ret < 0) 692 if (ret < 0)
@@ -606,8 +700,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
606 len = xdr->tail[0].iov_len; 700 len = xdr->tail[0].iov_len;
607tail: 701tail:
608 if (len) { 702 if (len) {
609 if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) 703 ++ctxt->sc_cur_sge_no;
610 return -EIO;
611 ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len); 704 ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len);
612 if (ret < 0) 705 if (ret < 0)
613 return ret; 706 return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 924c17d46903..57f86c63a463 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -419,12 +419,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
419 /* Transport header, head iovec, tail iovec */ 419 /* Transport header, head iovec, tail iovec */
420 newxprt->sc_max_send_sges = 3; 420 newxprt->sc_max_send_sges = 3;
421 /* Add one SGE per page list entry */ 421 /* Add one SGE per page list entry */
422 newxprt->sc_max_send_sges += svcrdma_max_req_size / PAGE_SIZE; 422 newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1;
423 if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) { 423 if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
424 pr_err("svcrdma: too few Send SGEs available (%d needed)\n", 424 newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
425 newxprt->sc_max_send_sges);
426 goto errout;
427 }
428 newxprt->sc_max_req_size = svcrdma_max_req_size; 425 newxprt->sc_max_req_size = svcrdma_max_req_size;
429 newxprt->sc_max_requests = svcrdma_max_requests; 426 newxprt->sc_max_requests = svcrdma_max_requests;
430 newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; 427 newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;