aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorSean Hefty <sean.hefty@intel.com>2011-03-03 18:31:06 -0500
committerRoland Dreier <roland@purestorage.com>2011-03-15 13:56:12 -0400
commit8d8ac86564b616bc17054cbb6e727588da64c86b (patch)
treef17f09108b2fa639d5248120409e2fe562069545 /drivers/infiniband
parent29963437a48475036353b95ab142bf199adb909e (diff)
IB/cm: Cancel pending LAP message when exiting IB_CM_ESTABLISH state
This problem was reported by Moni Shoua <monis@mellanox.com> and Amir Vadai <amirv@mellanox.com>: When destroying a cm_id from a context of a work queue and if the lap_state of this cm_id is IB_CM_LAP_SENT, we need to release the reference of this id that was taken upon the send of the LAP message. Otherwise, if the expected APR message gets lost, it is only after a long time that the reference will be released, while during that the work handler thread is not available to process other things. It turns out that we need to cancel any pending LAP messages whenever we transition out of the IB_CM_ESTABLISH state. This occurs when disconnecting - either sending or receiving a DREQ. It can also happen in a corner case where we receive a REJ message after sending an RTU, followed by a LAP. Add checks and cancel any outstanding LAP messages in these three cases. Canceling the LAP when sending a DREQ fixes the destroy problem reported by Moni. When a cm_id is destroyed in the IB_CM_ESTABLISHED state, it sends a DREQ to the remote side to notify the peer that the connection is going away. Signed-off-by: Sean Hefty <sean.hefty@intel.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/cm.c19
1 files changed, 18 insertions, 1 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 1d9616be4192..f804e28e1ebb 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1988,6 +1988,10 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
1988 goto out; 1988 goto out;
1989 } 1989 }
1990 1990
1991 if (cm_id->lap_state == IB_CM_LAP_SENT ||
1992 cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
1993 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1994
1991 ret = cm_alloc_msg(cm_id_priv, &msg); 1995 ret = cm_alloc_msg(cm_id_priv, &msg);
1992 if (ret) { 1996 if (ret) {
1993 cm_enter_timewait(cm_id_priv); 1997 cm_enter_timewait(cm_id_priv);
@@ -2129,6 +2133,10 @@ static int cm_dreq_handler(struct cm_work *work)
2129 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); 2133 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2130 break; 2134 break;
2131 case IB_CM_ESTABLISHED: 2135 case IB_CM_ESTABLISHED:
2136 if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2137 cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2138 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2139 break;
2132 case IB_CM_MRA_REP_RCVD: 2140 case IB_CM_MRA_REP_RCVD:
2133 break; 2141 break;
2134 case IB_CM_TIMEWAIT: 2142 case IB_CM_TIMEWAIT:
@@ -2349,9 +2357,18 @@ static int cm_rej_handler(struct cm_work *work)
2349 /* fall through */ 2357 /* fall through */
2350 case IB_CM_REP_RCVD: 2358 case IB_CM_REP_RCVD:
2351 case IB_CM_MRA_REP_SENT: 2359 case IB_CM_MRA_REP_SENT:
2352 case IB_CM_ESTABLISHED:
2353 cm_enter_timewait(cm_id_priv); 2360 cm_enter_timewait(cm_id_priv);
2354 break; 2361 break;
2362 case IB_CM_ESTABLISHED:
2363 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2364 cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2365 if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2366 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2367 cm_id_priv->msg);
2368 cm_enter_timewait(cm_id_priv);
2369 break;
2370 }
2371 /* fall through */
2355 default: 2372 default:
2356 spin_unlock_irq(&cm_id_priv->lock); 2373 spin_unlock_irq(&cm_id_priv->lock);
2357 ret = -EINVAL; 2374 ret = -EINVAL;