aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSean Hefty <sean.hefty@intel.com>2006-03-02 19:50:37 -0500
committerRoland Dreier <rolandd@cisco.com>2006-03-20 13:08:23 -0500
commit87fd1a11ae91ab42fac978467667c61fee9f01da (patch)
tree856c3ca67426211ac229b87bea31d8b1a3154840
parent6226bb570184eb6c645e047d2aecbce8c0442e3e (diff)
IB/cm: Check cm_id state before handling a REP
Move checking the state of a cm_id before modifying it when handling a REP. This fixes a bug seen under MPI scale-up testing, where a NULL timewait_info pointer is dereferenced if a request times out before a REP is received. Signed-off-by: Sean Hefty <sean.hefty@intel.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/core/cm.c42
1 files changed, 24 insertions, 18 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 2514de3480d8..7cfedb8d9bcd 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -121,7 +121,7 @@ struct cm_id_private {
121 121
122 struct rb_node service_node; 122 struct rb_node service_node;
123 struct rb_node sidr_id_node; 123 struct rb_node sidr_id_node;
124 spinlock_t lock; 124 spinlock_t lock; /* Do not acquire inside cm.lock */
125 wait_queue_head_t wait; 125 wait_queue_head_t wait;
126 atomic_t refcount; 126 atomic_t refcount;
127 127
@@ -1547,40 +1547,46 @@ static int cm_rep_handler(struct cm_work *work)
1547 return -EINVAL; 1547 return -EINVAL;
1548 } 1548 }
1549 1549
1550 cm_format_rep_event(work);
1551
1552 spin_lock_irqsave(&cm_id_priv->lock, flags);
1553 switch (cm_id_priv->id.state) {
1554 case IB_CM_REQ_SENT:
1555 case IB_CM_MRA_REQ_RCVD:
1556 break;
1557 default:
1558 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1559 ret = -EINVAL;
1560 goto error;
1561 }
1562
1550 cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; 1563 cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
1551 cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; 1564 cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
1552 cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg); 1565 cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1553 1566
1554 spin_lock_irqsave(&cm.lock, flags); 1567 spin_lock(&cm.lock);
1555 /* Check for duplicate REP. */ 1568 /* Check for duplicate REP. */
1556 if (cm_insert_remote_id(cm_id_priv->timewait_info)) { 1569 if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
1557 spin_unlock_irqrestore(&cm.lock, flags); 1570 spin_unlock(&cm.lock);
1571 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1558 ret = -EINVAL; 1572 ret = -EINVAL;
1559 goto error; 1573 goto error;
1560 } 1574 }
1561 /* Check for a stale connection. */ 1575 /* Check for a stale connection. */
1562 if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { 1576 if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
1563 spin_unlock_irqrestore(&cm.lock, flags); 1577 rb_erase(&cm_id_priv->timewait_info->remote_id_node,
1578 &cm.remote_id_table);
1579 cm_id_priv->timewait_info->inserted_remote_id = 0;
1580 spin_unlock(&cm.lock);
1581 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1564 cm_issue_rej(work->port, work->mad_recv_wc, 1582 cm_issue_rej(work->port, work->mad_recv_wc,
1565 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, 1583 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
1566 NULL, 0); 1584 NULL, 0);
1567 ret = -EINVAL; 1585 ret = -EINVAL;
1568 goto error; 1586 goto error;
1569 } 1587 }
1570 spin_unlock_irqrestore(&cm.lock, flags); 1588 spin_unlock(&cm.lock);
1571
1572 cm_format_rep_event(work);
1573 1589
1574 spin_lock_irqsave(&cm_id_priv->lock, flags);
1575 switch (cm_id_priv->id.state) {
1576 case IB_CM_REQ_SENT:
1577 case IB_CM_MRA_REQ_RCVD:
1578 break;
1579 default:
1580 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1581 ret = -EINVAL;
1582 goto error;
1583 }
1584 cm_id_priv->id.state = IB_CM_REP_RCVD; 1590 cm_id_priv->id.state = IB_CM_REP_RCVD;
1585 cm_id_priv->id.remote_id = rep_msg->local_comm_id; 1591 cm_id_priv->id.remote_id = rep_msg->local_comm_id;
1586 cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg); 1592 cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
@@ -1603,7 +1609,7 @@ static int cm_rep_handler(struct cm_work *work)
1603 cm_deref_id(cm_id_priv); 1609 cm_deref_id(cm_id_priv);
1604 return 0; 1610 return 0;
1605 1611
1606error: cm_cleanup_timewait(cm_id_priv->timewait_info); 1612error:
1607 cm_deref_id(cm_id_priv); 1613 cm_deref_id(cm_id_priv);
1608 return ret; 1614 return ret;
1609} 1615}