aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2011-05-10 01:06:22 -0400
committerRoland Dreier <roland@purestorage.com>2011-05-10 01:06:22 -0400
commitd9594d990a528d4c444777d0f360bb50c6114825 (patch)
tree93b587a40d1bd17200f3076edc5a2c0f78feb230 /drivers/infiniband
parent85d215b0f316bee0a6936bd1a5f21abf03333eaa (diff)
RDMA/cxgb4: Reset wait condition atomically
The driver was never really waiting for RDMA_WR/FINI completions because the condition variable used to determine if the completion happened was never reset, and this condition variable is reused for both connection setup and teardown. This causes various driver crashes under heavy loads due to releasing resources too early. The fix is to use atomic bits to correctly reset the condition immediately after the completion is detected. Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c30
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h26
2 files changed, 26 insertions, 30 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index d235810e52d..d7ee70fc917 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1198,9 +1198,7 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1198 } 1198 }
1199 PDBG("%s ep %p status %d error %d\n", __func__, ep, 1199 PDBG("%s ep %p status %d error %d\n", __func__, ep,
1200 rpl->status, status2errno(rpl->status)); 1200 rpl->status, status2errno(rpl->status));
1201 ep->com.wr_wait.ret = status2errno(rpl->status); 1201 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
1202 ep->com.wr_wait.done = 1;
1203 wake_up(&ep->com.wr_wait.wait);
1204 1202
1205 return 0; 1203 return 0;
1206} 1204}
@@ -1234,9 +1232,7 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1234 struct c4iw_listen_ep *ep = lookup_stid(t, stid); 1232 struct c4iw_listen_ep *ep = lookup_stid(t, stid);
1235 1233
1236 PDBG("%s ep %p\n", __func__, ep); 1234 PDBG("%s ep %p\n", __func__, ep);
1237 ep->com.wr_wait.ret = status2errno(rpl->status); 1235 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
1238 ep->com.wr_wait.done = 1;
1239 wake_up(&ep->com.wr_wait.wait);
1240 return 0; 1236 return 0;
1241} 1237}
1242 1238
@@ -1492,17 +1488,13 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
1492 * in rdma connection migration (see c4iw_accept_cr()). 1488 * in rdma connection migration (see c4iw_accept_cr()).
1493 */ 1489 */
1494 __state_set(&ep->com, CLOSING); 1490 __state_set(&ep->com, CLOSING);
1495 ep->com.wr_wait.done = 1;
1496 ep->com.wr_wait.ret = -ECONNRESET;
1497 PDBG("waking up ep %p tid %u\n", ep, ep->hwtid); 1491 PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
1498 wake_up(&ep->com.wr_wait.wait); 1492 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1499 break; 1493 break;
1500 case MPA_REP_SENT: 1494 case MPA_REP_SENT:
1501 __state_set(&ep->com, CLOSING); 1495 __state_set(&ep->com, CLOSING);
1502 ep->com.wr_wait.done = 1;
1503 ep->com.wr_wait.ret = -ECONNRESET;
1504 PDBG("waking up ep %p tid %u\n", ep, ep->hwtid); 1496 PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
1505 wake_up(&ep->com.wr_wait.wait); 1497 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1506 break; 1498 break;
1507 case FPDU_MODE: 1499 case FPDU_MODE:
1508 start_ep_timer(ep); 1500 start_ep_timer(ep);
@@ -1579,9 +1571,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
1579 /* 1571 /*
1580 * Wake up any threads in rdma_init() or rdma_fini(). 1572 * Wake up any threads in rdma_init() or rdma_fini().
1581 */ 1573 */
1582 ep->com.wr_wait.done = 1; 1574 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1583 ep->com.wr_wait.ret = -ECONNRESET;
1584 wake_up(&ep->com.wr_wait.wait);
1585 1575
1586 mutex_lock(&ep->com.mutex); 1576 mutex_lock(&ep->com.mutex);
1587 switch (ep->com.state) { 1577 switch (ep->com.state) {
@@ -2294,14 +2284,8 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
2294 ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); 2284 ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
2295 wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; 2285 wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
2296 PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret); 2286 PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
2297 if (wr_waitp) { 2287 if (wr_waitp)
2298 if (ret) 2288 c4iw_wake_up(wr_waitp, ret ? -ret : 0);
2299 wr_waitp->ret = -ret;
2300 else
2301 wr_waitp->ret = 0;
2302 wr_waitp->done = 1;
2303 wake_up(&wr_waitp->wait);
2304 }
2305 kfree_skb(skb); 2289 kfree_skb(skb);
2306 break; 2290 break;
2307 case 2: 2291 case 2:
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 8e16eb2de91..3dcfe82b124 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -131,42 +131,54 @@ static inline int c4iw_num_stags(struct c4iw_rdev *rdev)
131 131
132#define C4IW_WR_TO (10*HZ) 132#define C4IW_WR_TO (10*HZ)
133 133
134enum {
135 REPLY_READY = 0,
136};
137
134struct c4iw_wr_wait { 138struct c4iw_wr_wait {
135 wait_queue_head_t wait; 139 wait_queue_head_t wait;
136 int done; 140 unsigned long status;
137 int ret; 141 int ret;
138}; 142};
139 143
140static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) 144static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp)
141{ 145{
142 wr_waitp->ret = 0; 146 wr_waitp->ret = 0;
143 wr_waitp->done = 0; 147 wr_waitp->status = 0;
144 init_waitqueue_head(&wr_waitp->wait); 148 init_waitqueue_head(&wr_waitp->wait);
145} 149}
146 150
151static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret)
152{
153 wr_waitp->ret = ret;
154 set_bit(REPLY_READY, &wr_waitp->status);
155 wake_up(&wr_waitp->wait);
156}
157
147static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, 158static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
148 struct c4iw_wr_wait *wr_waitp, 159 struct c4iw_wr_wait *wr_waitp,
149 u32 hwtid, u32 qpid, 160 u32 hwtid, u32 qpid,
150 const char *func) 161 const char *func)
151{ 162{
152 unsigned to = C4IW_WR_TO; 163 unsigned to = C4IW_WR_TO;
153 do { 164 int ret;
154 165
155 wait_event_timeout(wr_waitp->wait, wr_waitp->done, to); 166 do {
156 if (!wr_waitp->done) { 167 ret = wait_event_timeout(wr_waitp->wait,
168 test_and_clear_bit(REPLY_READY, &wr_waitp->status), to);
169 if (!ret) {
157 printk(KERN_ERR MOD "%s - Device %s not responding - " 170 printk(KERN_ERR MOD "%s - Device %s not responding - "
158 "tid %u qpid %u\n", func, 171 "tid %u qpid %u\n", func,
159 pci_name(rdev->lldi.pdev), hwtid, qpid); 172 pci_name(rdev->lldi.pdev), hwtid, qpid);
160 to = to << 2; 173 to = to << 2;
161 } 174 }
162 } while (!wr_waitp->done); 175 } while (!ret);
163 if (wr_waitp->ret) 176 if (wr_waitp->ret)
164 PDBG("%s: FW reply %d tid %u qpid %u\n", 177 PDBG("%s: FW reply %d tid %u qpid %u\n",
165 pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid); 178 pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
166 return wr_waitp->ret; 179 return wr_waitp->ret;
167} 180}
168 181
169
170struct c4iw_dev { 182struct c4iw_dev {
171 struct ib_device ibdev; 183 struct ib_device ibdev;
172 struct c4iw_rdev rdev; 184 struct c4iw_rdev rdev;