diff options
author | Steve Wise <swise@opengridcomputing.com> | 2008-04-29 16:46:51 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2008-04-29 16:46:51 -0400 |
commit | 989a1780698c65dfe093a6aa89ceeff84c31f528 (patch) | |
tree | 9fe0ca81bfbdbc9f7618fc6b09c1c0c9e3234057 /drivers | |
parent | e463c7b197dbe64b8a99b0612c65f286937e5bf1 (diff) |
RDMA/cxgb3: Correctly serialize peer abort path
Open MPI and other stress testing exposed a few bad bugs in handling
aborts in the middle of a normal close. Fix these by:
- serializing abort reply and peer abort processing with disconnect
processing
- warning (and ignoring) if ep timer is stopped when it wasn't running
- cleaning up disconnect path to correctly deal with aborting and
dead endpoints
- in iwch_modify_qp(), taking a ref on the ep before releasing the qp
lock if iwch_ep_disconnect() will be called. The ref is dropped
after calling disconnect.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_cm.c | 100 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_cm.h | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_qp.c | 6 |
3 files changed, 72 insertions, 35 deletions
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 72ca360c3dbc..0b515d899f6c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c | |||
@@ -125,6 +125,12 @@ static void start_ep_timer(struct iwch_ep *ep) | |||
125 | static void stop_ep_timer(struct iwch_ep *ep) | 125 | static void stop_ep_timer(struct iwch_ep *ep) |
126 | { | 126 | { |
127 | PDBG("%s ep %p\n", __func__, ep); | 127 | PDBG("%s ep %p\n", __func__, ep); |
128 | if (!timer_pending(&ep->timer)) { | ||
129 | printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n", | ||
130 | __func__, ep, ep->com.state); | ||
131 | WARN_ON(1); | ||
132 | return; | ||
133 | } | ||
128 | del_timer_sync(&ep->timer); | 134 | del_timer_sync(&ep->timer); |
129 | put_ep(&ep->com); | 135 | put_ep(&ep->com); |
130 | } | 136 | } |
@@ -1083,8 +1089,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) | |||
1083 | static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) | 1089 | static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) |
1084 | { | 1090 | { |
1085 | struct iwch_ep *ep = ctx; | 1091 | struct iwch_ep *ep = ctx; |
1092 | unsigned long flags; | ||
1093 | int release = 0; | ||
1086 | 1094 | ||
1087 | PDBG("%s ep %p\n", __func__, ep); | 1095 | PDBG("%s ep %p\n", __func__, ep); |
1096 | BUG_ON(!ep); | ||
1088 | 1097 | ||
1089 | /* | 1098 | /* |
1090 | * We get 2 abort replies from the HW. The first one must | 1099 | * We get 2 abort replies from the HW. The first one must |
@@ -1095,9 +1104,22 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) | |||
1095 | return CPL_RET_BUF_DONE; | 1104 | return CPL_RET_BUF_DONE; |
1096 | } | 1105 | } |
1097 | 1106 | ||
1098 | close_complete_upcall(ep); | 1107 | spin_lock_irqsave(&ep->com.lock, flags); |
1099 | state_set(&ep->com, DEAD); | 1108 | switch (ep->com.state) { |
1100 | release_ep_resources(ep); | 1109 | case ABORTING: |
1110 | close_complete_upcall(ep); | ||
1111 | __state_set(&ep->com, DEAD); | ||
1112 | release = 1; | ||
1113 | break; | ||
1114 | default: | ||
1115 | printk(KERN_ERR "%s ep %p state %d\n", | ||
1116 | __func__, ep, ep->com.state); | ||
1117 | break; | ||
1118 | } | ||
1119 | spin_unlock_irqrestore(&ep->com.lock, flags); | ||
1120 | |||
1121 | if (release) | ||
1122 | release_ep_resources(ep); | ||
1101 | return CPL_RET_BUF_DONE; | 1123 | return CPL_RET_BUF_DONE; |
1102 | } | 1124 | } |
1103 | 1125 | ||
@@ -1470,7 +1492,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) | |||
1470 | struct sk_buff *rpl_skb; | 1492 | struct sk_buff *rpl_skb; |
1471 | struct iwch_qp_attributes attrs; | 1493 | struct iwch_qp_attributes attrs; |
1472 | int ret; | 1494 | int ret; |
1473 | int state; | 1495 | int release = 0; |
1496 | unsigned long flags; | ||
1474 | 1497 | ||
1475 | if (is_neg_adv_abort(req->status)) { | 1498 | if (is_neg_adv_abort(req->status)) { |
1476 | PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep, | 1499 | PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep, |
@@ -1488,9 +1511,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) | |||
1488 | return CPL_RET_BUF_DONE; | 1511 | return CPL_RET_BUF_DONE; |
1489 | } | 1512 | } |
1490 | 1513 | ||
1491 | state = state_read(&ep->com); | 1514 | spin_lock_irqsave(&ep->com.lock, flags); |
1492 | PDBG("%s ep %p state %u\n", __func__, ep, state); | 1515 | PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state); |
1493 | switch (state) { | 1516 | switch (ep->com.state) { |
1494 | case CONNECTING: | 1517 | case CONNECTING: |
1495 | break; | 1518 | break; |
1496 | case MPA_REQ_WAIT: | 1519 | case MPA_REQ_WAIT: |
@@ -1536,21 +1559,25 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) | |||
1536 | break; | 1559 | break; |
1537 | case DEAD: | 1560 | case DEAD: |
1538 | PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); | 1561 | PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); |
1562 | spin_unlock_irqrestore(&ep->com.lock, flags); | ||
1539 | return CPL_RET_BUF_DONE; | 1563 | return CPL_RET_BUF_DONE; |
1540 | default: | 1564 | default: |
1541 | BUG_ON(1); | 1565 | BUG_ON(1); |
1542 | break; | 1566 | break; |
1543 | } | 1567 | } |
1544 | dst_confirm(ep->dst); | 1568 | dst_confirm(ep->dst); |
1569 | if (ep->com.state != ABORTING) { | ||
1570 | __state_set(&ep->com, DEAD); | ||
1571 | release = 1; | ||
1572 | } | ||
1573 | spin_unlock_irqrestore(&ep->com.lock, flags); | ||
1545 | 1574 | ||
1546 | rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); | 1575 | rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); |
1547 | if (!rpl_skb) { | 1576 | if (!rpl_skb) { |
1548 | printk(KERN_ERR MOD "%s - cannot allocate skb!\n", | 1577 | printk(KERN_ERR MOD "%s - cannot allocate skb!\n", |
1549 | __func__); | 1578 | __func__); |
1550 | dst_release(ep->dst); | 1579 | release = 1; |
1551 | l2t_release(L2DATA(ep->com.tdev), ep->l2t); | 1580 | goto out; |
1552 | put_ep(&ep->com); | ||
1553 | return CPL_RET_BUF_DONE; | ||
1554 | } | 1581 | } |
1555 | rpl_skb->priority = CPL_PRIORITY_DATA; | 1582 | rpl_skb->priority = CPL_PRIORITY_DATA; |
1556 | rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); | 1583 | rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); |
@@ -1559,10 +1586,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) | |||
1559 | OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); | 1586 | OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); |
1560 | rpl->cmd = CPL_ABORT_NO_RST; | 1587 | rpl->cmd = CPL_ABORT_NO_RST; |
1561 | cxgb3_ofld_send(ep->com.tdev, rpl_skb); | 1588 | cxgb3_ofld_send(ep->com.tdev, rpl_skb); |
1562 | if (state != ABORTING) { | 1589 | out: |
1563 | state_set(&ep->com, DEAD); | 1590 | if (release) |
1564 | release_ep_resources(ep); | 1591 | release_ep_resources(ep); |
1565 | } | ||
1566 | return CPL_RET_BUF_DONE; | 1592 | return CPL_RET_BUF_DONE; |
1567 | } | 1593 | } |
1568 | 1594 | ||
@@ -1661,15 +1687,18 @@ static void ep_timeout(unsigned long arg) | |||
1661 | struct iwch_ep *ep = (struct iwch_ep *)arg; | 1687 | struct iwch_ep *ep = (struct iwch_ep *)arg; |
1662 | struct iwch_qp_attributes attrs; | 1688 | struct iwch_qp_attributes attrs; |
1663 | unsigned long flags; | 1689 | unsigned long flags; |
1690 | int abort = 1; | ||
1664 | 1691 | ||
1665 | spin_lock_irqsave(&ep->com.lock, flags); | 1692 | spin_lock_irqsave(&ep->com.lock, flags); |
1666 | PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, | 1693 | PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, |
1667 | ep->com.state); | 1694 | ep->com.state); |
1668 | switch (ep->com.state) { | 1695 | switch (ep->com.state) { |
1669 | case MPA_REQ_SENT: | 1696 | case MPA_REQ_SENT: |
1697 | __state_set(&ep->com, ABORTING); | ||
1670 | connect_reply_upcall(ep, -ETIMEDOUT); | 1698 | connect_reply_upcall(ep, -ETIMEDOUT); |
1671 | break; | 1699 | break; |
1672 | case MPA_REQ_WAIT: | 1700 | case MPA_REQ_WAIT: |
1701 | __state_set(&ep->com, ABORTING); | ||
1673 | break; | 1702 | break; |
1674 | case CLOSING: | 1703 | case CLOSING: |
1675 | case MORIBUND: | 1704 | case MORIBUND: |
@@ -1679,13 +1708,17 @@ static void ep_timeout(unsigned long arg) | |||
1679 | ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, | 1708 | ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, |
1680 | &attrs, 1); | 1709 | &attrs, 1); |
1681 | } | 1710 | } |
1711 | __state_set(&ep->com, ABORTING); | ||
1682 | break; | 1712 | break; |
1683 | default: | 1713 | default: |
1684 | BUG(); | 1714 | printk(KERN_ERR "%s unexpected state ep %p state %u\n", |
1715 | __func__, ep, ep->com.state); | ||
1716 | WARN_ON(1); | ||
1717 | abort = 0; | ||
1685 | } | 1718 | } |
1686 | __state_set(&ep->com, CLOSING); | ||
1687 | spin_unlock_irqrestore(&ep->com.lock, flags); | 1719 | spin_unlock_irqrestore(&ep->com.lock, flags); |
1688 | abort_connection(ep, NULL, GFP_ATOMIC); | 1720 | if (abort) |
1721 | abort_connection(ep, NULL, GFP_ATOMIC); | ||
1689 | put_ep(&ep->com); | 1722 | put_ep(&ep->com); |
1690 | } | 1723 | } |
1691 | 1724 | ||
@@ -1968,40 +2001,39 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) | |||
1968 | PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, | 2001 | PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, |
1969 | states[ep->com.state], abrupt); | 2002 | states[ep->com.state], abrupt); |
1970 | 2003 | ||
1971 | if (ep->com.state == DEAD) { | ||
1972 | PDBG("%s already dead ep %p\n", __func__, ep); | ||
1973 | goto out; | ||
1974 | } | ||
1975 | |||
1976 | if (abrupt) { | ||
1977 | if (ep->com.state != ABORTING) { | ||
1978 | ep->com.state = ABORTING; | ||
1979 | close = 1; | ||
1980 | } | ||
1981 | goto out; | ||
1982 | } | ||
1983 | |||
1984 | switch (ep->com.state) { | 2004 | switch (ep->com.state) { |
1985 | case MPA_REQ_WAIT: | 2005 | case MPA_REQ_WAIT: |
1986 | case MPA_REQ_SENT: | 2006 | case MPA_REQ_SENT: |
1987 | case MPA_REQ_RCVD: | 2007 | case MPA_REQ_RCVD: |
1988 | case MPA_REP_SENT: | 2008 | case MPA_REP_SENT: |
1989 | case FPDU_MODE: | 2009 | case FPDU_MODE: |
1990 | start_ep_timer(ep); | ||
1991 | ep->com.state = CLOSING; | ||
1992 | close = 1; | 2010 | close = 1; |
2011 | if (abrupt) | ||
2012 | ep->com.state = ABORTING; | ||
2013 | else { | ||
2014 | ep->com.state = CLOSING; | ||
2015 | start_ep_timer(ep); | ||
2016 | } | ||
1993 | break; | 2017 | break; |
1994 | case CLOSING: | 2018 | case CLOSING: |
1995 | ep->com.state = MORIBUND; | ||
1996 | close = 1; | 2019 | close = 1; |
2020 | if (abrupt) { | ||
2021 | stop_ep_timer(ep); | ||
2022 | ep->com.state = ABORTING; | ||
2023 | } else | ||
2024 | ep->com.state = MORIBUND; | ||
1997 | break; | 2025 | break; |
1998 | case MORIBUND: | 2026 | case MORIBUND: |
2027 | case ABORTING: | ||
2028 | case DEAD: | ||
2029 | PDBG("%s ignoring disconnect ep %p state %u\n", | ||
2030 | __func__, ep, ep->com.state); | ||
1999 | break; | 2031 | break; |
2000 | default: | 2032 | default: |
2001 | BUG(); | 2033 | BUG(); |
2002 | break; | 2034 | break; |
2003 | } | 2035 | } |
2004 | out: | 2036 | |
2005 | spin_unlock_irqrestore(&ep->com.lock, flags); | 2037 | spin_unlock_irqrestore(&ep->com.lock, flags); |
2006 | if (close) { | 2038 | if (close) { |
2007 | if (abrupt) | 2039 | if (abrupt) |
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index 2bb7fbdb3ff4..a2f1b787d970 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h | |||
@@ -56,6 +56,7 @@ | |||
56 | #define put_ep(ep) { \ | 56 | #define put_ep(ep) { \ |
57 | PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ | 57 | PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ |
58 | ep, atomic_read(&((ep)->kref.refcount))); \ | 58 | ep, atomic_read(&((ep)->kref.refcount))); \ |
59 | WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \ | ||
59 | kref_put(&((ep)->kref), __free_ep); \ | 60 | kref_put(&((ep)->kref), __free_ep); \ |
60 | } | 61 | } |
61 | 62 | ||
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 8891c3b0a3d5..6cd484e11c11 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c | |||
@@ -832,6 +832,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, | |||
832 | abort=0; | 832 | abort=0; |
833 | disconnect = 1; | 833 | disconnect = 1; |
834 | ep = qhp->ep; | 834 | ep = qhp->ep; |
835 | get_ep(&ep->com); | ||
835 | } | 836 | } |
836 | flush_qp(qhp, &flag); | 837 | flush_qp(qhp, &flag); |
837 | break; | 838 | break; |
@@ -848,6 +849,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, | |||
848 | abort=1; | 849 | abort=1; |
849 | disconnect = 1; | 850 | disconnect = 1; |
850 | ep = qhp->ep; | 851 | ep = qhp->ep; |
852 | get_ep(&ep->com); | ||
851 | } | 853 | } |
852 | goto err; | 854 | goto err; |
853 | break; | 855 | break; |
@@ -929,8 +931,10 @@ out: | |||
929 | * on the EP. This can be a normal close (RTS->CLOSING) or | 931 | * on the EP. This can be a normal close (RTS->CLOSING) or |
930 | * an abnormal close (RTS/CLOSING->ERROR). | 932 | * an abnormal close (RTS/CLOSING->ERROR). |
931 | */ | 933 | */ |
932 | if (disconnect) | 934 | if (disconnect) { |
933 | iwch_ep_disconnect(ep, abort, GFP_KERNEL); | 935 | iwch_ep_disconnect(ep, abort, GFP_KERNEL); |
936 | put_ep(&ep->com); | ||
937 | } | ||
934 | 938 | ||
935 | /* | 939 | /* |
936 | * If free is 1, then we've disassociated the EP from the QP | 940 | * If free is 1, then we've disassociated the EP from the QP |