aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorMichael S. Tsirkin <mst@dev.mellanox.co.il>2007-05-21 08:04:59 -0400
committerRoland Dreier <rolandd@cisco.com>2007-05-21 16:35:40 -0400
commit518b1646f8a31904ca637b8df0c1e31c34a7a3c2 (patch)
treeb72e7d9b6b3e5338d636746e77d326bd42aa4e29 /drivers/infiniband
parent24bd1e4e32e88cd3d0675482d15bea498a922ca8 (diff)
IPoIB/cm: Fix SRQ WR leak
SRQ WR leakage has been observed with IPoIB/CM: e.g. flipping ports on and off will, with time, leak out all WRs and then all connections will start getting RNR NAKs. Fix this in the way suggested by spec: move the QP being destroyed to the error state, wait for "Last WQE Reached" event and then post WR on a "drain QP" connected to the same CQ. Once we observe a completion on the drain QP, it's safe to call ib_destroy_qp. Signed-off-by: Michael S. Tsirkin <mst@dev.mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h42
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c203
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c2
3 files changed, 211 insertions, 36 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 93d4a9a1e1d..a0b3782c762 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -132,12 +132,46 @@ struct ipoib_cm_data {
132 __be32 mtu; 132 __be32 mtu;
133}; 133};
134 134
135/*
136 * Quoting 10.3.1 Queue Pair and EE Context States:
137 *
138 * Note, for QPs that are associated with an SRQ, the Consumer should take the
139 * QP through the Error State before invoking a Destroy QP or a Modify QP to the
140 * Reset State. The Consumer may invoke the Destroy QP without first performing
141 * a Modify QP to the Error State and waiting for the Affiliated Asynchronous
142 * Last WQE Reached Event. However, if the Consumer does not wait for the
143 * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment
144 * leakage may occur. Therefore, it is good programming practice to tear down a
145 * QP that is associated with an SRQ by using the following process:
146 *
147 * - Put the QP in the Error State
148 * - Wait for the Affiliated Asynchronous Last WQE Reached Event;
149 * - either:
150 * drain the CQ by invoking the Poll CQ verb and either wait for CQ
151 * to be empty or the number of Poll CQ operations has exceeded
152 * CQ capacity size;
153 * - or
154 * post another WR that completes on the same CQ and wait for this
155 * WR to return as a WC;
156 * - and then invoke a Destroy QP or Reset QP.
157 *
158 * We use the second option and wait for a completion on the
159 * rx_drain_qp before destroying QPs attached to our SRQ.
160 */
161
162enum ipoib_cm_state {
163 IPOIB_CM_RX_LIVE,
164 IPOIB_CM_RX_ERROR, /* Ignored by stale task */
165 IPOIB_CM_RX_FLUSH /* Last WQE Reached event observed */
166};
167
135struct ipoib_cm_rx { 168struct ipoib_cm_rx {
136 struct ib_cm_id *id; 169 struct ib_cm_id *id;
137 struct ib_qp *qp; 170 struct ib_qp *qp;
138 struct list_head list; 171 struct list_head list;
139 struct net_device *dev; 172 struct net_device *dev;
140 unsigned long jiffies; 173 unsigned long jiffies;
174 enum ipoib_cm_state state;
141}; 175};
142 176
143struct ipoib_cm_tx { 177struct ipoib_cm_tx {
@@ -165,10 +199,16 @@ struct ipoib_cm_dev_priv {
165 struct ib_srq *srq; 199 struct ib_srq *srq;
166 struct ipoib_cm_rx_buf *srq_ring; 200 struct ipoib_cm_rx_buf *srq_ring;
167 struct ib_cm_id *id; 201 struct ib_cm_id *id;
168 struct list_head passive_ids; 202 struct ib_qp *rx_drain_qp; /* generates WR described in 10.3.1 */
203 struct list_head passive_ids; /* state: LIVE */
204 struct list_head rx_error_list; /* state: ERROR */
205 struct list_head rx_flush_list; /* state: FLUSH, drain not started */
206 struct list_head rx_drain_list; /* state: FLUSH, drain started */
207 struct list_head rx_reap_list; /* state: FLUSH, drain done */
169 struct work_struct start_task; 208 struct work_struct start_task;
170 struct work_struct reap_task; 209 struct work_struct reap_task;
171 struct work_struct skb_task; 210 struct work_struct skb_task;
211 struct work_struct rx_reap_task;
172 struct delayed_work stale_task; 212 struct delayed_work stale_task;
173 struct sk_buff_head skb_queue; 213 struct sk_buff_head skb_queue;
174 struct list_head start_list; 214 struct list_head start_list;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index eec833b81e9..ffec794b791 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -37,6 +37,7 @@
37#include <net/dst.h> 37#include <net/dst.h>
38#include <net/icmp.h> 38#include <net/icmp.h>
39#include <linux/icmpv6.h> 39#include <linux/icmpv6.h>
40#include <linux/delay.h>
40 41
41#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA 42#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
42static int data_debug_level; 43static int data_debug_level;
@@ -62,6 +63,16 @@ struct ipoib_cm_id {
62 u32 remote_mtu; 63 u32 remote_mtu;
63}; 64};
64 65
66static struct ib_qp_attr ipoib_cm_err_attr = {
67 .qp_state = IB_QPS_ERR
68};
69
70#define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff
71
72static struct ib_recv_wr ipoib_cm_rx_drain_wr = {
73 .wr_id = IPOIB_CM_RX_DRAIN_WRID
74};
75
65static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, 76static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
66 struct ib_cm_event *event); 77 struct ib_cm_event *event);
67 78
@@ -150,11 +161,44 @@ partial_error:
150 return NULL; 161 return NULL;
151} 162}
152 163
164static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv)
165{
166 struct ib_recv_wr *bad_wr;
167
168 /* rx_drain_qp send queue depth is 1, so
169 * make sure we have at most 1 outstanding WR. */
170 if (list_empty(&priv->cm.rx_flush_list) ||
171 !list_empty(&priv->cm.rx_drain_list))
172 return;
173
174 if (ib_post_recv(priv->cm.rx_drain_qp, &ipoib_cm_rx_drain_wr, &bad_wr))
175 ipoib_warn(priv, "failed to post rx_drain wr\n");
176
177 list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
178}
179
180static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
181{
182 struct ipoib_cm_rx *p = ctx;
183 struct ipoib_dev_priv *priv = netdev_priv(p->dev);
184 unsigned long flags;
185
186 if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
187 return;
188
189 spin_lock_irqsave(&priv->lock, flags);
190 list_move(&p->list, &priv->cm.rx_flush_list);
191 p->state = IPOIB_CM_RX_FLUSH;
192 ipoib_cm_start_rx_drain(priv);
193 spin_unlock_irqrestore(&priv->lock, flags);
194}
195
153static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, 196static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
154 struct ipoib_cm_rx *p) 197 struct ipoib_cm_rx *p)
155{ 198{
156 struct ipoib_dev_priv *priv = netdev_priv(dev); 199 struct ipoib_dev_priv *priv = netdev_priv(dev);
157 struct ib_qp_init_attr attr = { 200 struct ib_qp_init_attr attr = {
201 .event_handler = ipoib_cm_rx_event_handler,
158 .send_cq = priv->cq, /* does not matter, we never send anything */ 202 .send_cq = priv->cq, /* does not matter, we never send anything */
159 .recv_cq = priv->cq, 203 .recv_cq = priv->cq,
160 .srq = priv->cm.srq, 204 .srq = priv->cm.srq,
@@ -256,6 +300,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
256 300
257 cm_id->context = p; 301 cm_id->context = p;
258 p->jiffies = jiffies; 302 p->jiffies = jiffies;
303 p->state = IPOIB_CM_RX_LIVE;
259 spin_lock_irq(&priv->lock); 304 spin_lock_irq(&priv->lock);
260 if (list_empty(&priv->cm.passive_ids)) 305 if (list_empty(&priv->cm.passive_ids))
261 queue_delayed_work(ipoib_workqueue, 306 queue_delayed_work(ipoib_workqueue,
@@ -277,7 +322,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
277{ 322{
278 struct ipoib_cm_rx *p; 323 struct ipoib_cm_rx *p;
279 struct ipoib_dev_priv *priv; 324 struct ipoib_dev_priv *priv;
280 int ret;
281 325
282 switch (event->event) { 326 switch (event->event) {
283 case IB_CM_REQ_RECEIVED: 327 case IB_CM_REQ_RECEIVED:
@@ -289,20 +333,9 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
289 case IB_CM_REJ_RECEIVED: 333 case IB_CM_REJ_RECEIVED:
290 p = cm_id->context; 334 p = cm_id->context;
291 priv = netdev_priv(p->dev); 335 priv = netdev_priv(p->dev);
292 spin_lock_irq(&priv->lock); 336 if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
293 if (list_empty(&p->list)) 337 ipoib_warn(priv, "unable to move qp to error state\n");
294 ret = 0; /* Connection is going away already. */ 338 /* Fall through */
295 else {
296 list_del_init(&p->list);
297 ret = -ECONNRESET;
298 }
299 spin_unlock_irq(&priv->lock);
300 if (ret) {
301 ib_destroy_qp(p->qp);
302 kfree(p);
303 return ret;
304 }
305 return 0;
306 default: 339 default:
307 return 0; 340 return 0;
308 } 341 }
@@ -354,8 +387,15 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
354 wr_id, wc->status); 387 wr_id, wc->status);
355 388
356 if (unlikely(wr_id >= ipoib_recvq_size)) { 389 if (unlikely(wr_id >= ipoib_recvq_size)) {
357 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", 390 if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) {
358 wr_id, ipoib_recvq_size); 391 spin_lock_irqsave(&priv->lock, flags);
392 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
393 ipoib_cm_start_rx_drain(priv);
394 queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
395 spin_unlock_irqrestore(&priv->lock, flags);
396 } else
397 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
398 wr_id, ipoib_recvq_size);
359 return; 399 return;
360 } 400 }
361 401
@@ -374,9 +414,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
374 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { 414 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
375 spin_lock_irqsave(&priv->lock, flags); 415 spin_lock_irqsave(&priv->lock, flags);
376 p->jiffies = jiffies; 416 p->jiffies = jiffies;
377 /* Move this entry to list head, but do 417 /* Move this entry to list head, but do not re-add it
378 * not re-add it if it has been removed. */ 418 * if it has been moved out of list. */
379 if (!list_empty(&p->list)) 419 if (p->state == IPOIB_CM_RX_LIVE)
380 list_move(&p->list, &priv->cm.passive_ids); 420 list_move(&p->list, &priv->cm.passive_ids);
381 spin_unlock_irqrestore(&priv->lock, flags); 421 spin_unlock_irqrestore(&priv->lock, flags);
382 } 422 }
@@ -583,17 +623,43 @@ static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
583int ipoib_cm_dev_open(struct net_device *dev) 623int ipoib_cm_dev_open(struct net_device *dev)
584{ 624{
585 struct ipoib_dev_priv *priv = netdev_priv(dev); 625 struct ipoib_dev_priv *priv = netdev_priv(dev);
626 struct ib_qp_init_attr qp_init_attr = {
627 .send_cq = priv->cq, /* does not matter, we never send anything */
628 .recv_cq = priv->cq,
629 .cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */
630 .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
631 .cap.max_recv_wr = 1,
632 .cap.max_recv_sge = 1, /* FIXME: 0 Seems not to work */
633 .sq_sig_type = IB_SIGNAL_ALL_WR,
634 .qp_type = IB_QPT_UC,
635 };
586 int ret; 636 int ret;
587 637
588 if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) 638 if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
589 return 0; 639 return 0;
590 640
641 priv->cm.rx_drain_qp = ib_create_qp(priv->pd, &qp_init_attr);
642 if (IS_ERR(priv->cm.rx_drain_qp)) {
643 printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
644 ret = PTR_ERR(priv->cm.rx_drain_qp);
645 return ret;
646 }
647
648 /*
649 * We put the QP in error state directly. This way, a "flush
650 * error" WC will be immediately generated for each WR we post.
651 */
652 ret = ib_modify_qp(priv->cm.rx_drain_qp, &ipoib_cm_err_attr, IB_QP_STATE);
653 if (ret) {
654 ipoib_warn(priv, "failed to modify drain QP to error: %d\n", ret);
655 goto err_qp;
656 }
657
591 priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev); 658 priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
592 if (IS_ERR(priv->cm.id)) { 659 if (IS_ERR(priv->cm.id)) {
593 printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); 660 printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
594 ret = PTR_ERR(priv->cm.id); 661 ret = PTR_ERR(priv->cm.id);
595 priv->cm.id = NULL; 662 goto err_cm;
596 return ret;
597 } 663 }
598 664
599 ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), 665 ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
@@ -601,35 +667,79 @@ int ipoib_cm_dev_open(struct net_device *dev)
601 if (ret) { 667 if (ret) {
602 printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, 668 printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name,
603 IPOIB_CM_IETF_ID | priv->qp->qp_num); 669 IPOIB_CM_IETF_ID | priv->qp->qp_num);
604 ib_destroy_cm_id(priv->cm.id); 670 goto err_listen;
605 priv->cm.id = NULL;
606 return ret;
607 } 671 }
672
608 return 0; 673 return 0;
674
675err_listen:
676 ib_destroy_cm_id(priv->cm.id);
677err_cm:
678 priv->cm.id = NULL;
679err_qp:
680 ib_destroy_qp(priv->cm.rx_drain_qp);
681 return ret;
609} 682}
610 683
611void ipoib_cm_dev_stop(struct net_device *dev) 684void ipoib_cm_dev_stop(struct net_device *dev)
612{ 685{
613 struct ipoib_dev_priv *priv = netdev_priv(dev); 686 struct ipoib_dev_priv *priv = netdev_priv(dev);
614 struct ipoib_cm_rx *p; 687 struct ipoib_cm_rx *p, *n;
688 unsigned long begin;
689 LIST_HEAD(list);
690 int ret;
615 691
616 if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id) 692 if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id)
617 return; 693 return;
618 694
619 ib_destroy_cm_id(priv->cm.id); 695 ib_destroy_cm_id(priv->cm.id);
620 priv->cm.id = NULL; 696 priv->cm.id = NULL;
697
621 spin_lock_irq(&priv->lock); 698 spin_lock_irq(&priv->lock);
622 while (!list_empty(&priv->cm.passive_ids)) { 699 while (!list_empty(&priv->cm.passive_ids)) {
623 p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); 700 p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
624 list_del_init(&p->list); 701 list_move(&p->list, &priv->cm.rx_error_list);
702 p->state = IPOIB_CM_RX_ERROR;
625 spin_unlock_irq(&priv->lock); 703 spin_unlock_irq(&priv->lock);
704 ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
705 if (ret)
706 ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
707 spin_lock_irq(&priv->lock);
708 }
709
710 /* Wait for all RX to be drained */
711 begin = jiffies;
712
713 while (!list_empty(&priv->cm.rx_error_list) ||
714 !list_empty(&priv->cm.rx_flush_list) ||
715 !list_empty(&priv->cm.rx_drain_list)) {
716 if (!time_after(jiffies, begin + 5 * HZ)) {
717 ipoib_warn(priv, "RX drain timing out\n");
718
719 /*
720 * assume the HW is wedged and just free up everything.
721 */
722 list_splice_init(&priv->cm.rx_flush_list, &list);
723 list_splice_init(&priv->cm.rx_error_list, &list);
724 list_splice_init(&priv->cm.rx_drain_list, &list);
725 break;
726 }
727 spin_unlock_irq(&priv->lock);
728 msleep(1);
729 spin_lock_irq(&priv->lock);
730 }
731
732 list_splice_init(&priv->cm.rx_reap_list, &list);
733
734 spin_unlock_irq(&priv->lock);
735
736 list_for_each_entry_safe(p, n, &list, list) {
626 ib_destroy_cm_id(p->id); 737 ib_destroy_cm_id(p->id);
627 ib_destroy_qp(p->qp); 738 ib_destroy_qp(p->qp);
628 kfree(p); 739 kfree(p);
629 spin_lock_irq(&priv->lock);
630 } 740 }
631 spin_unlock_irq(&priv->lock);
632 741
742 ib_destroy_qp(priv->cm.rx_drain_qp);
633 cancel_delayed_work(&priv->cm.stale_task); 743 cancel_delayed_work(&priv->cm.stale_task);
634} 744}
635 745
@@ -1079,24 +1189,44 @@ void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
1079 queue_work(ipoib_workqueue, &priv->cm.skb_task); 1189 queue_work(ipoib_workqueue, &priv->cm.skb_task);
1080} 1190}
1081 1191
1192static void ipoib_cm_rx_reap(struct work_struct *work)
1193{
1194 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1195 cm.rx_reap_task);
1196 struct ipoib_cm_rx *p, *n;
1197 LIST_HEAD(list);
1198
1199 spin_lock_irq(&priv->lock);
1200 list_splice_init(&priv->cm.rx_reap_list, &list);
1201 spin_unlock_irq(&priv->lock);
1202
1203 list_for_each_entry_safe(p, n, &list, list) {
1204 ib_destroy_cm_id(p->id);
1205 ib_destroy_qp(p->qp);
1206 kfree(p);
1207 }
1208}
1209
1082static void ipoib_cm_stale_task(struct work_struct *work) 1210static void ipoib_cm_stale_task(struct work_struct *work)
1083{ 1211{
1084 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1212 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1085 cm.stale_task.work); 1213 cm.stale_task.work);
1086 struct ipoib_cm_rx *p; 1214 struct ipoib_cm_rx *p;
1215 int ret;
1087 1216
1088 spin_lock_irq(&priv->lock); 1217 spin_lock_irq(&priv->lock);
1089 while (!list_empty(&priv->cm.passive_ids)) { 1218 while (!list_empty(&priv->cm.passive_ids)) {
1090 /* List if sorted by LRU, start from tail, 1219 /* List is sorted by LRU, start from tail,
1091 * stop when we see a recently used entry */ 1220 * stop when we see a recently used entry */
1092 p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); 1221 p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
1093 if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) 1222 if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
1094 break; 1223 break;
1095 list_del_init(&p->list); 1224 list_move(&p->list, &priv->cm.rx_error_list);
1225 p->state = IPOIB_CM_RX_ERROR;
1096 spin_unlock_irq(&priv->lock); 1226 spin_unlock_irq(&priv->lock);
1097 ib_destroy_cm_id(p->id); 1227 ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
1098 ib_destroy_qp(p->qp); 1228 if (ret)
1099 kfree(p); 1229 ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
1100 spin_lock_irq(&priv->lock); 1230 spin_lock_irq(&priv->lock);
1101 } 1231 }
1102 1232
@@ -1164,9 +1294,14 @@ int ipoib_cm_dev_init(struct net_device *dev)
1164 INIT_LIST_HEAD(&priv->cm.passive_ids); 1294 INIT_LIST_HEAD(&priv->cm.passive_ids);
1165 INIT_LIST_HEAD(&priv->cm.reap_list); 1295 INIT_LIST_HEAD(&priv->cm.reap_list);
1166 INIT_LIST_HEAD(&priv->cm.start_list); 1296 INIT_LIST_HEAD(&priv->cm.start_list);
1297 INIT_LIST_HEAD(&priv->cm.rx_error_list);
1298 INIT_LIST_HEAD(&priv->cm.rx_flush_list);
1299 INIT_LIST_HEAD(&priv->cm.rx_drain_list);
1300 INIT_LIST_HEAD(&priv->cm.rx_reap_list);
1167 INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); 1301 INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
1168 INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); 1302 INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);
1169 INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap); 1303 INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap);
1304 INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap);
1170 INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); 1305 INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task);
1171 1306
1172 skb_queue_head_init(&priv->cm.skb_queue); 1307 skb_queue_head_init(&priv->cm.skb_queue);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 791252621b2..982eb88e27e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -173,7 +173,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
173 size = ipoib_sendq_size + ipoib_recvq_size + 1; 173 size = ipoib_sendq_size + ipoib_recvq_size + 1;
174 ret = ipoib_cm_dev_init(dev); 174 ret = ipoib_cm_dev_init(dev);
175 if (!ret) 175 if (!ret)
176 size += ipoib_recvq_size; 176 size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */;
177 177
178 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); 178 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
179 if (IS_ERR(priv->cq)) { 179 if (IS_ERR(priv->cq)) {