diff options
author | Chuck Lever <chuck.lever@oracle.com> | 2017-02-08 17:00:35 -0500 |
---|---|---|
committer | Anna Schumaker <Anna.Schumaker@Netapp.com> | 2017-02-10 14:02:36 -0500 |
commit | 0a90487bf7182c74830616b91bd33f68f8c6e18b (patch) | |
tree | 8a5aec95f4a56b0092a5219b33800f9b898f77f3 | |
parent | 18c0fb31a034023e5cb2d3c9c1320d5d47d91afe (diff) |
xprtrdma: Handle stale connection rejection
A server rejects a connection attempt with STALE_CONNECTION when a
client attempts to connect to a working remote service, but uses a
QPN and GUID that corresponds to an old connection that was
abandoned. This might occur after a client crashes and restarts.
Fix rpcrdma_conn_upcall() to distinguish between a normal rejection
and rejection of stale connection parameters.
As an additional clean-up, remove the code that retries the
connection attempt with different ORD/IRD values. Code audit of
other ULP initiators shows no similar special case handling of
initiator_depth or responder_resources.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 66 |
1 files changed, 21 insertions, 45 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 61d16c39e92c..d1ee33fa8055 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <linux/sunrpc/svc_rdma.h> | 54 | #include <linux/sunrpc/svc_rdma.h> |
55 | #include <asm/bitops.h> | 55 | #include <asm/bitops.h> |
56 | #include <linux/module.h> /* try_module_get()/module_put() */ | 56 | #include <linux/module.h> /* try_module_get()/module_put() */ |
57 | #include <rdma/ib_cm.h> | ||
57 | 58 | ||
58 | #include "xprt_rdma.h" | 59 | #include "xprt_rdma.h" |
59 | 60 | ||
@@ -279,7 +280,14 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
279 | connstate = -ENETDOWN; | 280 | connstate = -ENETDOWN; |
280 | goto connected; | 281 | goto connected; |
281 | case RDMA_CM_EVENT_REJECTED: | 282 | case RDMA_CM_EVENT_REJECTED: |
283 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
284 | pr_info("rpcrdma: connection to %pIS:%u on %s rejected: %s\n", | ||
285 | sap, rpc_get_port(sap), ia->ri_device->name, | ||
286 | rdma_reject_msg(id, event->status)); | ||
287 | #endif | ||
282 | connstate = -ECONNREFUSED; | 288 | connstate = -ECONNREFUSED; |
289 | if (event->status == IB_CM_REJ_STALE_CONN) | ||
290 | connstate = -EAGAIN; | ||
283 | goto connected; | 291 | goto connected; |
284 | case RDMA_CM_EVENT_DISCONNECTED: | 292 | case RDMA_CM_EVENT_DISCONNECTED: |
285 | connstate = -ECONNABORTED; | 293 | connstate = -ECONNABORTED; |
@@ -643,20 +651,21 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
643 | int | 651 | int |
644 | rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | 652 | rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
645 | { | 653 | { |
654 | struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, | ||
655 | rx_ia); | ||
646 | struct rdma_cm_id *id, *old; | 656 | struct rdma_cm_id *id, *old; |
657 | struct sockaddr *sap; | ||
658 | unsigned int extras; | ||
647 | int rc = 0; | 659 | int rc = 0; |
648 | int retry_count = 0; | ||
649 | 660 | ||
650 | if (ep->rep_connected != 0) { | 661 | if (ep->rep_connected != 0) { |
651 | struct rpcrdma_xprt *xprt; | ||
652 | retry: | 662 | retry: |
653 | dprintk("RPC: %s: reconnecting...\n", __func__); | 663 | dprintk("RPC: %s: reconnecting...\n", __func__); |
654 | 664 | ||
655 | rpcrdma_ep_disconnect(ep, ia); | 665 | rpcrdma_ep_disconnect(ep, ia); |
656 | 666 | ||
657 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | 667 | sap = (struct sockaddr *)&r_xprt->rx_data.addr; |
658 | id = rpcrdma_create_id(xprt, ia, | 668 | id = rpcrdma_create_id(r_xprt, ia, sap); |
659 | (struct sockaddr *)&xprt->rx_data.addr); | ||
660 | if (IS_ERR(id)) { | 669 | if (IS_ERR(id)) { |
661 | rc = -EHOSTUNREACH; | 670 | rc = -EHOSTUNREACH; |
662 | goto out; | 671 | goto out; |
@@ -711,51 +720,18 @@ retry: | |||
711 | } | 720 | } |
712 | 721 | ||
713 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); | 722 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); |
714 | |||
715 | /* | ||
716 | * Check state. A non-peer reject indicates no listener | ||
717 | * (ECONNREFUSED), which may be a transient state. All | ||
718 | * others indicate a transport condition which has already | ||
719 | * undergone a best-effort. | ||
720 | */ | ||
721 | if (ep->rep_connected == -ECONNREFUSED && | ||
722 | ++retry_count <= RDMA_CONNECT_RETRY_MAX) { | ||
723 | dprintk("RPC: %s: non-peer_reject, retry\n", __func__); | ||
724 | goto retry; | ||
725 | } | ||
726 | if (ep->rep_connected <= 0) { | 723 | if (ep->rep_connected <= 0) { |
727 | /* Sometimes, the only way to reliably connect to remote | 724 | if (ep->rep_connected == -EAGAIN) |
728 | * CMs is to use same nonzero values for ORD and IRD. */ | ||
729 | if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 && | ||
730 | (ep->rep_remote_cma.responder_resources == 0 || | ||
731 | ep->rep_remote_cma.initiator_depth != | ||
732 | ep->rep_remote_cma.responder_resources)) { | ||
733 | if (ep->rep_remote_cma.responder_resources == 0) | ||
734 | ep->rep_remote_cma.responder_resources = 1; | ||
735 | ep->rep_remote_cma.initiator_depth = | ||
736 | ep->rep_remote_cma.responder_resources; | ||
737 | goto retry; | 725 | goto retry; |
738 | } | ||
739 | rc = ep->rep_connected; | 726 | rc = ep->rep_connected; |
740 | } else { | 727 | goto out; |
741 | struct rpcrdma_xprt *r_xprt; | ||
742 | unsigned int extras; | ||
743 | |||
744 | dprintk("RPC: %s: connected\n", __func__); | ||
745 | |||
746 | r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
747 | extras = r_xprt->rx_buf.rb_bc_srv_max_requests; | ||
748 | |||
749 | if (extras) { | ||
750 | rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); | ||
751 | if (rc) { | ||
752 | pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", | ||
753 | __func__, rc); | ||
754 | rc = 0; | ||
755 | } | ||
756 | } | ||
757 | } | 728 | } |
758 | 729 | ||
730 | dprintk("RPC: %s: connected\n", __func__); | ||
731 | extras = r_xprt->rx_buf.rb_bc_srv_max_requests; | ||
732 | if (extras) | ||
733 | rpcrdma_ep_post_extra_recv(r_xprt, extras); | ||
734 | |||
759 | out: | 735 | out: |
760 | if (rc) | 736 | if (rc) |
761 | ep->rep_connected = rc; | 737 | ep->rep_connected = rc; |