aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc/xprtrdma
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2014-05-28 10:34:07 -0400
committerAnna Schumaker <Anna.Schumaker@Netapp.com>2014-06-04 08:56:47 -0400
commitec62f40d3505a643497d105c297093bb90afd44e (patch)
tree32cb9abf5da170e58d6855d78cd4d6b5016c3652 /net/sunrpc/xprtrdma
parent65866f8259851cea5e356d2fd46fc37a4e26330e (diff)
xprtrdma: Ensure ia->ri_id->qp is not NULL when reconnecting
Devesh Sharma <Devesh.Sharma@Emulex.Com> reports that after a disconnect, his HCA is failing to create a fresh QP, leaving ia_ri->ri_id->qp set to NULL. But xprtrdma still allows RPCs to wake up and post LOCAL_INV as they exit, causing an oops. rpcrdma_ep_connect() is allowing the wake-up by leaking the QP creation error code (-EPERM in this case) to the RPC client's generic layer. xprt_connect_status() does not recognize -EPERM, so it kills pending RPC tasks immediately rather than retrying the connect. Re-arrange the QP creation logic so that when it fails on reconnect, it leaves ->qp with the old QP rather than NULL. If pending RPC tasks wake and exit, LOCAL_INV work requests will flush rather than oops. On initial connect, leaving ->qp == NULL is OK, since there are no pending RPCs that might use ->qp. But be sure not to try to destroy a NULL QP when rpcrdma_ep_connect() is retried. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net/sunrpc/xprtrdma')
-rw-r--r--net/sunrpc/xprtrdma/verbs.c29
1 files changed, 20 insertions, 9 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index c80995af82de..54edf2ac48a1 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -867,6 +867,7 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
867 if (ep->rep_connected != 0) { 867 if (ep->rep_connected != 0) {
868 struct rpcrdma_xprt *xprt; 868 struct rpcrdma_xprt *xprt;
869retry: 869retry:
870 dprintk("RPC: %s: reconnecting...\n", __func__);
870 rc = rpcrdma_ep_disconnect(ep, ia); 871 rc = rpcrdma_ep_disconnect(ep, ia);
871 if (rc && rc != -ENOTCONN) 872 if (rc && rc != -ENOTCONN)
872 dprintk("RPC: %s: rpcrdma_ep_disconnect" 873 dprintk("RPC: %s: rpcrdma_ep_disconnect"
@@ -879,7 +880,7 @@ retry:
879 id = rpcrdma_create_id(xprt, ia, 880 id = rpcrdma_create_id(xprt, ia,
880 (struct sockaddr *)&xprt->rx_data.addr); 881 (struct sockaddr *)&xprt->rx_data.addr);
881 if (IS_ERR(id)) { 882 if (IS_ERR(id)) {
882 rc = PTR_ERR(id); 883 rc = -EHOSTUNREACH;
883 goto out; 884 goto out;
884 } 885 }
885 /* TEMP TEMP TEMP - fail if new device: 886 /* TEMP TEMP TEMP - fail if new device:
@@ -893,20 +894,30 @@ retry:
893 printk("RPC: %s: can't reconnect on " 894 printk("RPC: %s: can't reconnect on "
894 "different device!\n", __func__); 895 "different device!\n", __func__);
895 rdma_destroy_id(id); 896 rdma_destroy_id(id);
896 rc = -ENETDOWN; 897 rc = -ENETUNREACH;
897 goto out; 898 goto out;
898 } 899 }
899 /* END TEMP */ 900 /* END TEMP */
901 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
902 if (rc) {
903 dprintk("RPC: %s: rdma_create_qp failed %i\n",
904 __func__, rc);
905 rdma_destroy_id(id);
906 rc = -ENETUNREACH;
907 goto out;
908 }
900 rdma_destroy_qp(ia->ri_id); 909 rdma_destroy_qp(ia->ri_id);
901 rdma_destroy_id(ia->ri_id); 910 rdma_destroy_id(ia->ri_id);
902 ia->ri_id = id; 911 ia->ri_id = id;
903 } 912 } else {
904 913 dprintk("RPC: %s: connecting...\n", __func__);
905 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); 914 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
906 if (rc) { 915 if (rc) {
907 dprintk("RPC: %s: rdma_create_qp failed %i\n", 916 dprintk("RPC: %s: rdma_create_qp failed %i\n",
908 __func__, rc); 917 __func__, rc);
909 goto out; 918 /* do not update ep->rep_connected */
919 return -ENETUNREACH;
920 }
910 } 921 }
911 922
912/* XXX Tavor device performs badly with 2K MTU! */ 923/* XXX Tavor device performs badly with 2K MTU! */