aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2008-04-29 16:46:51 -0400
committerRoland Dreier <rolandd@cisco.com>2008-04-29 16:46:51 -0400
commit989a1780698c65dfe093a6aa89ceeff84c31f528 (patch)
tree9fe0ca81bfbdbc9f7618fc6b09c1c0c9e3234057 /drivers/infiniband
parente463c7b197dbe64b8a99b0612c65f286937e5bf1 (diff)
RDMA/cxgb3: Correctly serialize peer abort path
Open MPI and other stress testing exposed a few bad bugs in handling aborts in the middle of a normal close. Fix these by: - serializing abort reply and peer abort processing with disconnect processing - warning (and ignoring) if ep timer is stopped when it wasn't running - cleaning up disconnect path to correctly deal with aborting and dead endpoints - in iwch_modify_qp(), taking a ref on the ep before releasing the qp lock if iwch_ep_disconnect() will be called. The ref is dropped after calling disconnect. Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c100
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c6
3 files changed, 72 insertions, 35 deletions
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 72ca360c3dbc..0b515d899f6c 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -125,6 +125,12 @@ static void start_ep_timer(struct iwch_ep *ep)
125static void stop_ep_timer(struct iwch_ep *ep) 125static void stop_ep_timer(struct iwch_ep *ep)
126{ 126{
127 PDBG("%s ep %p\n", __func__, ep); 127 PDBG("%s ep %p\n", __func__, ep);
128 if (!timer_pending(&ep->timer)) {
129 printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n",
130 __func__, ep, ep->com.state);
131 WARN_ON(1);
132 return;
133 }
128 del_timer_sync(&ep->timer); 134 del_timer_sync(&ep->timer);
129 put_ep(&ep->com); 135 put_ep(&ep->com);
130} 136}
@@ -1083,8 +1089,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1083static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) 1089static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1084{ 1090{
1085 struct iwch_ep *ep = ctx; 1091 struct iwch_ep *ep = ctx;
1092 unsigned long flags;
1093 int release = 0;
1086 1094
1087 PDBG("%s ep %p\n", __func__, ep); 1095 PDBG("%s ep %p\n", __func__, ep);
1096 BUG_ON(!ep);
1088 1097
1089 /* 1098 /*
1090 * We get 2 abort replies from the HW. The first one must 1099 * We get 2 abort replies from the HW. The first one must
@@ -1095,9 +1104,22 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1095 return CPL_RET_BUF_DONE; 1104 return CPL_RET_BUF_DONE;
1096 } 1105 }
1097 1106
1098 close_complete_upcall(ep); 1107 spin_lock_irqsave(&ep->com.lock, flags);
1099 state_set(&ep->com, DEAD); 1108 switch (ep->com.state) {
1100 release_ep_resources(ep); 1109 case ABORTING:
1110 close_complete_upcall(ep);
1111 __state_set(&ep->com, DEAD);
1112 release = 1;
1113 break;
1114 default:
1115 printk(KERN_ERR "%s ep %p state %d\n",
1116 __func__, ep, ep->com.state);
1117 break;
1118 }
1119 spin_unlock_irqrestore(&ep->com.lock, flags);
1120
1121 if (release)
1122 release_ep_resources(ep);
1101 return CPL_RET_BUF_DONE; 1123 return CPL_RET_BUF_DONE;
1102} 1124}
1103 1125
@@ -1470,7 +1492,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1470 struct sk_buff *rpl_skb; 1492 struct sk_buff *rpl_skb;
1471 struct iwch_qp_attributes attrs; 1493 struct iwch_qp_attributes attrs;
1472 int ret; 1494 int ret;
1473 int state; 1495 int release = 0;
1496 unsigned long flags;
1474 1497
1475 if (is_neg_adv_abort(req->status)) { 1498 if (is_neg_adv_abort(req->status)) {
1476 PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep, 1499 PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
@@ -1488,9 +1511,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1488 return CPL_RET_BUF_DONE; 1511 return CPL_RET_BUF_DONE;
1489 } 1512 }
1490 1513
1491 state = state_read(&ep->com); 1514 spin_lock_irqsave(&ep->com.lock, flags);
1492 PDBG("%s ep %p state %u\n", __func__, ep, state); 1515 PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
1493 switch (state) { 1516 switch (ep->com.state) {
1494 case CONNECTING: 1517 case CONNECTING:
1495 break; 1518 break;
1496 case MPA_REQ_WAIT: 1519 case MPA_REQ_WAIT:
@@ -1536,21 +1559,25 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1536 break; 1559 break;
1537 case DEAD: 1560 case DEAD:
1538 PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); 1561 PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
1562 spin_unlock_irqrestore(&ep->com.lock, flags);
1539 return CPL_RET_BUF_DONE; 1563 return CPL_RET_BUF_DONE;
1540 default: 1564 default:
1541 BUG_ON(1); 1565 BUG_ON(1);
1542 break; 1566 break;
1543 } 1567 }
1544 dst_confirm(ep->dst); 1568 dst_confirm(ep->dst);
1569 if (ep->com.state != ABORTING) {
1570 __state_set(&ep->com, DEAD);
1571 release = 1;
1572 }
1573 spin_unlock_irqrestore(&ep->com.lock, flags);
1545 1574
1546 rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); 1575 rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
1547 if (!rpl_skb) { 1576 if (!rpl_skb) {
1548 printk(KERN_ERR MOD "%s - cannot allocate skb!\n", 1577 printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
1549 __func__); 1578 __func__);
1550 dst_release(ep->dst); 1579 release = 1;
1551 l2t_release(L2DATA(ep->com.tdev), ep->l2t); 1580 goto out;
1552 put_ep(&ep->com);
1553 return CPL_RET_BUF_DONE;
1554 } 1581 }
1555 rpl_skb->priority = CPL_PRIORITY_DATA; 1582 rpl_skb->priority = CPL_PRIORITY_DATA;
1556 rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); 1583 rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
@@ -1559,10 +1586,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1559 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); 1586 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
1560 rpl->cmd = CPL_ABORT_NO_RST; 1587 rpl->cmd = CPL_ABORT_NO_RST;
1561 cxgb3_ofld_send(ep->com.tdev, rpl_skb); 1588 cxgb3_ofld_send(ep->com.tdev, rpl_skb);
1562 if (state != ABORTING) { 1589out:
1563 state_set(&ep->com, DEAD); 1590 if (release)
1564 release_ep_resources(ep); 1591 release_ep_resources(ep);
1565 }
1566 return CPL_RET_BUF_DONE; 1592 return CPL_RET_BUF_DONE;
1567} 1593}
1568 1594
@@ -1661,15 +1687,18 @@ static void ep_timeout(unsigned long arg)
1661 struct iwch_ep *ep = (struct iwch_ep *)arg; 1687 struct iwch_ep *ep = (struct iwch_ep *)arg;
1662 struct iwch_qp_attributes attrs; 1688 struct iwch_qp_attributes attrs;
1663 unsigned long flags; 1689 unsigned long flags;
1690 int abort = 1;
1664 1691
1665 spin_lock_irqsave(&ep->com.lock, flags); 1692 spin_lock_irqsave(&ep->com.lock, flags);
1666 PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, 1693 PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
1667 ep->com.state); 1694 ep->com.state);
1668 switch (ep->com.state) { 1695 switch (ep->com.state) {
1669 case MPA_REQ_SENT: 1696 case MPA_REQ_SENT:
1697 __state_set(&ep->com, ABORTING);
1670 connect_reply_upcall(ep, -ETIMEDOUT); 1698 connect_reply_upcall(ep, -ETIMEDOUT);
1671 break; 1699 break;
1672 case MPA_REQ_WAIT: 1700 case MPA_REQ_WAIT:
1701 __state_set(&ep->com, ABORTING);
1673 break; 1702 break;
1674 case CLOSING: 1703 case CLOSING:
1675 case MORIBUND: 1704 case MORIBUND:
@@ -1679,13 +1708,17 @@ static void ep_timeout(unsigned long arg)
1679 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1708 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1680 &attrs, 1); 1709 &attrs, 1);
1681 } 1710 }
1711 __state_set(&ep->com, ABORTING);
1682 break; 1712 break;
1683 default: 1713 default:
1684 BUG(); 1714 printk(KERN_ERR "%s unexpected state ep %p state %u\n",
1715 __func__, ep, ep->com.state);
1716 WARN_ON(1);
1717 abort = 0;
1685 } 1718 }
1686 __state_set(&ep->com, CLOSING);
1687 spin_unlock_irqrestore(&ep->com.lock, flags); 1719 spin_unlock_irqrestore(&ep->com.lock, flags);
1688 abort_connection(ep, NULL, GFP_ATOMIC); 1720 if (abort)
1721 abort_connection(ep, NULL, GFP_ATOMIC);
1689 put_ep(&ep->com); 1722 put_ep(&ep->com);
1690} 1723}
1691 1724
@@ -1968,40 +2001,39 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
1968 PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, 2001 PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
1969 states[ep->com.state], abrupt); 2002 states[ep->com.state], abrupt);
1970 2003
1971 if (ep->com.state == DEAD) {
1972 PDBG("%s already dead ep %p\n", __func__, ep);
1973 goto out;
1974 }
1975
1976 if (abrupt) {
1977 if (ep->com.state != ABORTING) {
1978 ep->com.state = ABORTING;
1979 close = 1;
1980 }
1981 goto out;
1982 }
1983
1984 switch (ep->com.state) { 2004 switch (ep->com.state) {
1985 case MPA_REQ_WAIT: 2005 case MPA_REQ_WAIT:
1986 case MPA_REQ_SENT: 2006 case MPA_REQ_SENT:
1987 case MPA_REQ_RCVD: 2007 case MPA_REQ_RCVD:
1988 case MPA_REP_SENT: 2008 case MPA_REP_SENT:
1989 case FPDU_MODE: 2009 case FPDU_MODE:
1990 start_ep_timer(ep);
1991 ep->com.state = CLOSING;
1992 close = 1; 2010 close = 1;
2011 if (abrupt)
2012 ep->com.state = ABORTING;
2013 else {
2014 ep->com.state = CLOSING;
2015 start_ep_timer(ep);
2016 }
1993 break; 2017 break;
1994 case CLOSING: 2018 case CLOSING:
1995 ep->com.state = MORIBUND;
1996 close = 1; 2019 close = 1;
2020 if (abrupt) {
2021 stop_ep_timer(ep);
2022 ep->com.state = ABORTING;
2023 } else
2024 ep->com.state = MORIBUND;
1997 break; 2025 break;
1998 case MORIBUND: 2026 case MORIBUND:
2027 case ABORTING:
2028 case DEAD:
2029 PDBG("%s ignoring disconnect ep %p state %u\n",
2030 __func__, ep, ep->com.state);
1999 break; 2031 break;
2000 default: 2032 default:
2001 BUG(); 2033 BUG();
2002 break; 2034 break;
2003 } 2035 }
2004out: 2036
2005 spin_unlock_irqrestore(&ep->com.lock, flags); 2037 spin_unlock_irqrestore(&ep->com.lock, flags);
2006 if (close) { 2038 if (close) {
2007 if (abrupt) 2039 if (abrupt)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index 2bb7fbdb3ff4..a2f1b787d970 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -56,6 +56,7 @@
56#define put_ep(ep) { \ 56#define put_ep(ep) { \
57 PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ 57 PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
58 ep, atomic_read(&((ep)->kref.refcount))); \ 58 ep, atomic_read(&((ep)->kref.refcount))); \
59 WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
59 kref_put(&((ep)->kref), __free_ep); \ 60 kref_put(&((ep)->kref), __free_ep); \
60} 61}
61 62
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 8891c3b0a3d5..6cd484e11c11 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -832,6 +832,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
832 abort=0; 832 abort=0;
833 disconnect = 1; 833 disconnect = 1;
834 ep = qhp->ep; 834 ep = qhp->ep;
835 get_ep(&ep->com);
835 } 836 }
836 flush_qp(qhp, &flag); 837 flush_qp(qhp, &flag);
837 break; 838 break;
@@ -848,6 +849,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
848 abort=1; 849 abort=1;
849 disconnect = 1; 850 disconnect = 1;
850 ep = qhp->ep; 851 ep = qhp->ep;
852 get_ep(&ep->com);
851 } 853 }
852 goto err; 854 goto err;
853 break; 855 break;
@@ -929,8 +931,10 @@ out:
929 * on the EP. This can be a normal close (RTS->CLOSING) or 931 * on the EP. This can be a normal close (RTS->CLOSING) or
930 * an abnormal close (RTS/CLOSING->ERROR). 932 * an abnormal close (RTS/CLOSING->ERROR).
931 */ 933 */
932 if (disconnect) 934 if (disconnect) {
933 iwch_ep_disconnect(ep, abort, GFP_KERNEL); 935 iwch_ep_disconnect(ep, abort, GFP_KERNEL);
936 put_ep(&ep->com);
937 }
934 938
935 /* 939 /*
936 * If free is 1, then we've disassociated the EP from the QP 940 * If free is 1, then we've disassociated the EP from the QP