aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorPradeep Satyanarayana <pradeeps@linux.vnet.ibm.com>2008-01-25 17:15:24 -0500
committerRoland Dreier <rolandd@cisco.com>2008-01-25 17:15:24 -0500
commit68e995a295720439ad2bf8677114cdf9d262d905 (patch)
tree24de129501f4fb576f1fc9eef90900a342707278 /drivers/infiniband
parentefcd99717f76c6d19dd81203c60fe198480de522 (diff)
IPoIB/cm: Add connected mode support for devices without SRQs
Some IB adapters (notably IBM's eHCA) do not implement SRQs (shared receive queues). The current IPoIB connected mode support only works on devices that support SRQs. Fix this by adding support for using the receive queue of each connected mode receive QP. The disadvantage of this compared to using an SRQ is that it means a full queue of receives must be posted for each remote connected mode peer, which means that total memory usage is potentially much higher than when using SRQs. To manage this, add a new module parameter "max_nonsrq_conn_qp" that limits the number of connections allowed per interface. The rest of the changes are fairly straightforward: we use a table of struct ipoib_cm_rx to hold all the active connections, and put the table index of the connection in the high bits of receive WR IDs. This is needed because we cannot rely on the struct ib_wc.qp field for non-SRQ receive completions. Most of the rest of the changes just test whether or not an SRQ is available, and post receives or find received packets in the right place depending on the answer. Cleaning up dead connections actually becomes simpler, because we do not have to do the "last WQE reached" dance that is required to destroy QPs attached to an SRQ. We just move the QP to the error state and wait for all pending receives to be flushed. Signed-off-by: Pradeep Satyanarayana <pradeeps@linux.vnet.ibm.com> [ Completely rewritten and split up, based on Pradeep's work. Several bugs fixed and no doubt several bugs introduced. - Roland ] Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h19
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c213
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c8
4 files changed, 200 insertions, 43 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index a376fb6ffa0e..d35025f0652b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -69,6 +69,7 @@ enum {
69 IPOIB_TX_RING_SIZE = 64, 69 IPOIB_TX_RING_SIZE = 64,
70 IPOIB_MAX_QUEUE_SIZE = 8192, 70 IPOIB_MAX_QUEUE_SIZE = 8192,
71 IPOIB_MIN_QUEUE_SIZE = 2, 71 IPOIB_MIN_QUEUE_SIZE = 2,
72 IPOIB_CM_MAX_CONN_QP = 4096,
72 73
73 IPOIB_NUM_WC = 4, 74 IPOIB_NUM_WC = 4,
74 75
@@ -188,10 +189,12 @@ enum ipoib_cm_state {
188struct ipoib_cm_rx { 189struct ipoib_cm_rx {
189 struct ib_cm_id *id; 190 struct ib_cm_id *id;
190 struct ib_qp *qp; 191 struct ib_qp *qp;
192 struct ipoib_cm_rx_buf *rx_ring;
191 struct list_head list; 193 struct list_head list;
192 struct net_device *dev; 194 struct net_device *dev;
193 unsigned long jiffies; 195 unsigned long jiffies;
194 enum ipoib_cm_state state; 196 enum ipoib_cm_state state;
197 int recv_count;
195}; 198};
196 199
197struct ipoib_cm_tx { 200struct ipoib_cm_tx {
@@ -234,6 +237,7 @@ struct ipoib_cm_dev_priv {
234 struct ib_wc ibwc[IPOIB_NUM_WC]; 237 struct ib_wc ibwc[IPOIB_NUM_WC];
235 struct ib_sge rx_sge[IPOIB_CM_RX_SG]; 238 struct ib_sge rx_sge[IPOIB_CM_RX_SG];
236 struct ib_recv_wr rx_wr; 239 struct ib_recv_wr rx_wr;
240 int nonsrq_conn_qp;
237}; 241};
238 242
239/* 243/*
@@ -461,6 +465,8 @@ void ipoib_drain_cq(struct net_device *dev);
461/* We don't support UC connections at the moment */ 465/* We don't support UC connections at the moment */
462#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC)) 466#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC))
463 467
468extern int ipoib_max_conn_qp;
469
464static inline int ipoib_cm_admin_enabled(struct net_device *dev) 470static inline int ipoib_cm_admin_enabled(struct net_device *dev)
465{ 471{
466 struct ipoib_dev_priv *priv = netdev_priv(dev); 472 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -491,6 +497,12 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t
491 neigh->cm = tx; 497 neigh->cm = tx;
492} 498}
493 499
500static inline int ipoib_cm_has_srq(struct net_device *dev)
501{
502 struct ipoib_dev_priv *priv = netdev_priv(dev);
503 return !!priv->cm.srq;
504}
505
494void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx); 506void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx);
495int ipoib_cm_dev_open(struct net_device *dev); 507int ipoib_cm_dev_open(struct net_device *dev);
496void ipoib_cm_dev_stop(struct net_device *dev); 508void ipoib_cm_dev_stop(struct net_device *dev);
@@ -508,6 +520,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
508 520
509struct ipoib_cm_tx; 521struct ipoib_cm_tx;
510 522
523#define ipoib_max_conn_qp 0
524
511static inline int ipoib_cm_admin_enabled(struct net_device *dev) 525static inline int ipoib_cm_admin_enabled(struct net_device *dev)
512{ 526{
513 return 0; 527 return 0;
@@ -533,6 +547,11 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t
533{ 547{
534} 548}
535 549
550static inline int ipoib_cm_has_srq(struct net_device *dev)
551{
552 return 0;
553}
554
536static inline 555static inline
537void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) 556void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
538{ 557{
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 75717a9cbcdc..fdf33cecc6d5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -39,6 +39,15 @@
39#include <linux/icmpv6.h> 39#include <linux/icmpv6.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41 41
42#include "ipoib.h"
43
44int ipoib_max_conn_qp = 128;
45
46module_param_named(max_nonsrq_conn_qp, ipoib_max_conn_qp, int, 0444);
47MODULE_PARM_DESC(max_nonsrq_conn_qp,
48 "Max number of connected-mode QPs per interface "
49 "(applied only if shared receive queue is not available)");
50
42#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA 51#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
43static int data_debug_level; 52static int data_debug_level;
44 53
@@ -47,8 +56,6 @@ MODULE_PARM_DESC(cm_data_debug_level,
47 "Enable data path debug tracing for connected mode if > 0"); 56 "Enable data path debug tracing for connected mode if > 0");
48#endif 57#endif
49 58
50#include "ipoib.h"
51
52#define IPOIB_CM_IETF_ID 0x1000000000000000ULL 59#define IPOIB_CM_IETF_ID 0x1000000000000000ULL
53 60
54#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ) 61#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ)
@@ -81,7 +88,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
81 ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); 88 ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
82} 89}
83 90
84static int ipoib_cm_post_receive(struct net_device *dev, int id) 91static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
85{ 92{
86 struct ipoib_dev_priv *priv = netdev_priv(dev); 93 struct ipoib_dev_priv *priv = netdev_priv(dev);
87 struct ib_recv_wr *bad_wr; 94 struct ib_recv_wr *bad_wr;
@@ -104,7 +111,33 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id)
104 return ret; 111 return ret;
105} 112}
106 113
107static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, int frags, 114static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
115 struct ipoib_cm_rx *rx, int id)
116{
117 struct ipoib_dev_priv *priv = netdev_priv(dev);
118 struct ib_recv_wr *bad_wr;
119 int i, ret;
120
121 priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
122
123 for (i = 0; i < IPOIB_CM_RX_SG; ++i)
124 priv->cm.rx_sge[i].addr = rx->rx_ring[id].mapping[i];
125
126 ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr);
127 if (unlikely(ret)) {
128 ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
129 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
130 rx->rx_ring[id].mapping);
131 dev_kfree_skb_any(rx->rx_ring[id].skb);
132 rx->rx_ring[id].skb = NULL;
133 }
134
135 return ret;
136}
137
138static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
139 struct ipoib_cm_rx_buf *rx_ring,
140 int id, int frags,
108 u64 mapping[IPOIB_CM_RX_SG]) 141 u64 mapping[IPOIB_CM_RX_SG])
109{ 142{
110 struct ipoib_dev_priv *priv = netdev_priv(dev); 143 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -141,7 +174,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, int
141 goto partial_error; 174 goto partial_error;
142 } 175 }
143 176
144 priv->cm.srq_ring[id].skb = skb; 177 rx_ring[id].skb = skb;
145 return skb; 178 return skb;
146 179
147partial_error: 180partial_error:
@@ -224,12 +257,18 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
224 .qp_type = IB_QPT_RC, 257 .qp_type = IB_QPT_RC,
225 .qp_context = p, 258 .qp_context = p,
226 }; 259 };
260
261 if (!ipoib_cm_has_srq(dev)) {
262 attr.cap.max_recv_wr = ipoib_recvq_size;
263 attr.cap.max_recv_sge = IPOIB_CM_RX_SG;
264 }
265
227 return ib_create_qp(priv->pd, &attr); 266 return ib_create_qp(priv->pd, &attr);
228} 267}
229 268
230static int ipoib_cm_modify_rx_qp(struct net_device *dev, 269static int ipoib_cm_modify_rx_qp(struct net_device *dev,
231 struct ib_cm_id *cm_id, struct ib_qp *qp, 270 struct ib_cm_id *cm_id, struct ib_qp *qp,
232 unsigned psn) 271 unsigned psn)
233{ 272{
234 struct ipoib_dev_priv *priv = netdev_priv(dev); 273 struct ipoib_dev_priv *priv = netdev_priv(dev);
235 struct ib_qp_attr qp_attr; 274 struct ib_qp_attr qp_attr;
@@ -282,6 +321,60 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
282 return 0; 321 return 0;
283} 322}
284 323
324static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
325 struct ipoib_cm_rx *rx)
326{
327 struct ipoib_dev_priv *priv = netdev_priv(dev);
328 int ret;
329 int i;
330
331 rx->rx_ring = kcalloc(ipoib_recvq_size, sizeof *rx->rx_ring, GFP_KERNEL);
332 if (!rx->rx_ring)
333 return -ENOMEM;
334
335 spin_lock_irq(&priv->lock);
336
337 if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) {
338 spin_unlock_irq(&priv->lock);
339 ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0);
340 ret = -EINVAL;
341 goto err_free;
342 } else
343 ++priv->cm.nonsrq_conn_qp;
344
345 spin_unlock_irq(&priv->lock);
346
347 for (i = 0; i < ipoib_recvq_size; ++i) {
348 if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1,
349 rx->rx_ring[i].mapping)) {
350 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
351 ret = -ENOMEM;
352 goto err_count;
353 }
354 ret = ipoib_cm_post_receive_nonsrq(dev, rx, i);
355 if (ret) {
356 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq "
357 "failed for buf %d\n", i);
358 ret = -EIO;
359 goto err_count;
360 }
361 }
362
363 rx->recv_count = ipoib_recvq_size;
364
365 return 0;
366
367err_count:
368 spin_lock_irq(&priv->lock);
369 --priv->cm.nonsrq_conn_qp;
370 spin_unlock_irq(&priv->lock);
371
372err_free:
373 ipoib_cm_free_rx_ring(dev, rx->rx_ring);
374
375 return ret;
376}
377
285static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, 378static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
286 struct ib_qp *qp, struct ib_cm_req_event_param *req, 379 struct ib_qp *qp, struct ib_cm_req_event_param *req,
287 unsigned psn) 380 unsigned psn)
@@ -297,7 +390,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
297 rep.private_data_len = sizeof data; 390 rep.private_data_len = sizeof data;
298 rep.flow_control = 0; 391 rep.flow_control = 0;
299 rep.rnr_retry_count = req->rnr_retry_count; 392 rep.rnr_retry_count = req->rnr_retry_count;
300 rep.srq = 1; 393 rep.srq = ipoib_cm_has_srq(dev);
301 rep.qp_num = qp->qp_num; 394 rep.qp_num = qp->qp_num;
302 rep.starting_psn = psn; 395 rep.starting_psn = psn;
303 return ib_send_cm_rep(cm_id, &rep); 396 return ib_send_cm_rep(cm_id, &rep);
@@ -333,6 +426,12 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
333 if (ret) 426 if (ret)
334 goto err_modify; 427 goto err_modify;
335 428
429 if (!ipoib_cm_has_srq(dev)) {
430 ret = ipoib_cm_nonsrq_init_rx(dev, cm_id, p);
431 if (ret)
432 goto err_modify;
433 }
434
336 spin_lock_irq(&priv->lock); 435 spin_lock_irq(&priv->lock);
337 queue_delayed_work(ipoib_workqueue, 436 queue_delayed_work(ipoib_workqueue,
338 &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 437 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
@@ -417,12 +516,14 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
417void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) 516void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
418{ 517{
419 struct ipoib_dev_priv *priv = netdev_priv(dev); 518 struct ipoib_dev_priv *priv = netdev_priv(dev);
519 struct ipoib_cm_rx_buf *rx_ring;
420 unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); 520 unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
421 struct sk_buff *skb, *newskb; 521 struct sk_buff *skb, *newskb;
422 struct ipoib_cm_rx *p; 522 struct ipoib_cm_rx *p;
423 unsigned long flags; 523 unsigned long flags;
424 u64 mapping[IPOIB_CM_RX_SG]; 524 u64 mapping[IPOIB_CM_RX_SG];
425 int frags; 525 int frags;
526 int has_srq;
426 527
427 ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", 528 ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
428 wr_id, wc->status); 529 wr_id, wc->status);
@@ -440,18 +541,32 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
440 return; 541 return;
441 } 542 }
442 543
443 skb = priv->cm.srq_ring[wr_id].skb; 544 p = wc->qp->qp_context;
545
546 has_srq = ipoib_cm_has_srq(dev);
547 rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring;
548
549 skb = rx_ring[wr_id].skb;
444 550
445 if (unlikely(wc->status != IB_WC_SUCCESS)) { 551 if (unlikely(wc->status != IB_WC_SUCCESS)) {
446 ipoib_dbg(priv, "cm recv error " 552 ipoib_dbg(priv, "cm recv error "
447 "(status=%d, wrid=%d vend_err %x)\n", 553 "(status=%d, wrid=%d vend_err %x)\n",
448 wc->status, wr_id, wc->vendor_err); 554 wc->status, wr_id, wc->vendor_err);
449 ++dev->stats.rx_dropped; 555 ++dev->stats.rx_dropped;
450 goto repost; 556 if (has_srq)
557 goto repost;
558 else {
559 if (!--p->recv_count) {
560 spin_lock_irqsave(&priv->lock, flags);
561 list_move(&p->list, &priv->cm.rx_reap_list);
562 spin_unlock_irqrestore(&priv->lock, flags);
563 queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
564 }
565 return;
566 }
451 } 567 }
452 568
453 if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) { 569 if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) {
454 p = wc->qp->qp_context;
455 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { 570 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
456 spin_lock_irqsave(&priv->lock, flags); 571 spin_lock_irqsave(&priv->lock, flags);
457 p->jiffies = jiffies; 572 p->jiffies = jiffies;
@@ -466,7 +581,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
466 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, 581 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
467 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; 582 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
468 583
469 newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, mapping); 584 newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping);
470 if (unlikely(!newskb)) { 585 if (unlikely(!newskb)) {
471 /* 586 /*
472 * If we can't allocate a new RX buffer, dump 587 * If we can't allocate a new RX buffer, dump
@@ -477,8 +592,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
477 goto repost; 592 goto repost;
478 } 593 }
479 594
480 ipoib_cm_dma_unmap_rx(priv, frags, priv->cm.srq_ring[wr_id].mapping); 595 ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping);
481 memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping); 596 memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
482 597
483 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", 598 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
484 wc->byte_len, wc->slid); 599 wc->byte_len, wc->slid);
@@ -499,9 +614,17 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
499 netif_receive_skb(skb); 614 netif_receive_skb(skb);
500 615
501repost: 616repost:
502 if (unlikely(ipoib_cm_post_receive(dev, wr_id))) 617 if (has_srq) {
503 ipoib_warn(priv, "ipoib_cm_post_receive failed " 618 if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id)))
504 "for buf %d\n", wr_id); 619 ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
620 "for buf %d\n", wr_id);
621 } else {
622 if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) {
623 --p->recv_count;
624 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
625 "for buf %d\n", wr_id);
626 }
627 }
505} 628}
506 629
507static inline int post_send(struct ipoib_dev_priv *priv, 630static inline int post_send(struct ipoib_dev_priv *priv,
@@ -686,6 +809,12 @@ static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
686 list_for_each_entry_safe(rx, n, &list, list) { 809 list_for_each_entry_safe(rx, n, &list, list) {
687 ib_destroy_cm_id(rx->id); 810 ib_destroy_cm_id(rx->id);
688 ib_destroy_qp(rx->qp); 811 ib_destroy_qp(rx->qp);
812 if (!ipoib_cm_has_srq(dev)) {
813 ipoib_cm_free_rx_ring(priv->dev, rx->rx_ring);
814 spin_lock_irq(&priv->lock);
815 --priv->cm.nonsrq_conn_qp;
816 spin_unlock_irq(&priv->lock);
817 }
689 kfree(rx); 818 kfree(rx);
690 } 819 }
691} 820}
@@ -864,7 +993,7 @@ static int ipoib_cm_send_req(struct net_device *dev,
864 req.retry_count = 0; /* RFC draft warns against retries */ 993 req.retry_count = 0; /* RFC draft warns against retries */
865 req.rnr_retry_count = 0; /* RFC draft warns against retries */ 994 req.rnr_retry_count = 0; /* RFC draft warns against retries */
866 req.max_cm_retries = 15; 995 req.max_cm_retries = 15;
867 req.srq = 1; 996 req.srq = ipoib_cm_has_srq(dev);
868 return ib_send_cm_req(id, &req); 997 return ib_send_cm_req(id, &req);
869} 998}
870 999
@@ -1270,7 +1399,7 @@ int ipoib_cm_add_mode_attr(struct net_device *dev)
1270 return device_create_file(&dev->dev, &dev_attr_mode); 1399 return device_create_file(&dev->dev, &dev_attr_mode);
1271} 1400}
1272 1401
1273static int ipoib_cm_create_srq(struct net_device *dev) 1402static void ipoib_cm_create_srq(struct net_device *dev)
1274{ 1403{
1275 struct ipoib_dev_priv *priv = netdev_priv(dev); 1404 struct ipoib_dev_priv *priv = netdev_priv(dev);
1276 struct ib_srq_init_attr srq_init_attr = { 1405 struct ib_srq_init_attr srq_init_attr = {
@@ -1279,32 +1408,30 @@ static int ipoib_cm_create_srq(struct net_device *dev)
1279 .max_sge = IPOIB_CM_RX_SG 1408 .max_sge = IPOIB_CM_RX_SG
1280 } 1409 }
1281 }; 1410 };
1282 int ret;
1283 1411
1284 priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); 1412 priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
1285 if (IS_ERR(priv->cm.srq)) { 1413 if (IS_ERR(priv->cm.srq)) {
1286 ret = PTR_ERR(priv->cm.srq); 1414 if (PTR_ERR(priv->cm.srq) != -ENOSYS)
1415 printk(KERN_WARNING "%s: failed to allocate SRQ, error %ld\n",
1416 priv->ca->name, PTR_ERR(priv->cm.srq));
1287 priv->cm.srq = NULL; 1417 priv->cm.srq = NULL;
1288 return ret; 1418 return;
1289 } 1419 }
1290 1420
1291 priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, 1421 priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring,
1292 GFP_KERNEL); 1422 GFP_KERNEL);
1293 if (!priv->cm.srq_ring) { 1423 if (!priv->cm.srq_ring) {
1294 printk(KERN_WARNING "%s: failed to allocate CM ring (%d entries)\n", 1424 printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n",
1295 priv->ca->name, ipoib_recvq_size); 1425 priv->ca->name, ipoib_recvq_size);
1296 ib_destroy_srq(priv->cm.srq); 1426 ib_destroy_srq(priv->cm.srq);
1297 priv->cm.srq = NULL; 1427 priv->cm.srq = NULL;
1298 return -ENOMEM;
1299 } 1428 }
1300
1301 return 0;
1302} 1429}
1303 1430
1304int ipoib_cm_dev_init(struct net_device *dev) 1431int ipoib_cm_dev_init(struct net_device *dev)
1305{ 1432{
1306 struct ipoib_dev_priv *priv = netdev_priv(dev); 1433 struct ipoib_dev_priv *priv = netdev_priv(dev);
1307 int ret, i; 1434 int i;
1308 1435
1309 INIT_LIST_HEAD(&priv->cm.passive_ids); 1436 INIT_LIST_HEAD(&priv->cm.passive_ids);
1310 INIT_LIST_HEAD(&priv->cm.reap_list); 1437 INIT_LIST_HEAD(&priv->cm.reap_list);
@@ -1331,21 +1458,25 @@ int ipoib_cm_dev_init(struct net_device *dev)
1331 priv->cm.rx_wr.sg_list = priv->cm.rx_sge; 1458 priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
1332 priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG; 1459 priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG;
1333 1460
1334 ret = ipoib_cm_create_srq(dev); 1461 ipoib_cm_create_srq(dev);
1335 if (ret) 1462
1336 return ret; 1463 if (ipoib_cm_has_srq(dev)) {
1464 for (i = 0; i < ipoib_recvq_size; ++i) {
1465 if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
1466 IPOIB_CM_RX_SG - 1,
1467 priv->cm.srq_ring[i].mapping)) {
1468 ipoib_warn(priv, "failed to allocate "
1469 "receive buffer %d\n", i);
1470 ipoib_cm_dev_cleanup(dev);
1471 return -ENOMEM;
1472 }
1337 1473
1338 for (i = 0; i < ipoib_recvq_size; ++i) { 1474 if (ipoib_cm_post_receive_srq(dev, i)) {
1339 if (!ipoib_cm_alloc_rx_skb(dev, i, IPOIB_CM_RX_SG - 1, 1475 ipoib_warn(priv, "ipoib_cm_post_receive_srq "
1340 priv->cm.srq_ring[i].mapping)) { 1476 "failed for buf %d\n", i);
1341 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 1477 ipoib_cm_dev_cleanup(dev);
1342 ipoib_cm_dev_cleanup(dev); 1478 return -EIO;
1343 return -ENOMEM; 1479 }
1344 }
1345 if (ipoib_cm_post_receive(dev, i)) {
1346 ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
1347 ipoib_cm_dev_cleanup(dev);
1348 return -EIO;
1349 } 1480 }
1350 } 1481 }
1351 1482
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 5a9c3b5a39ef..3bfc2ef1303e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1268,6 +1268,9 @@ static int __init ipoib_init_module(void)
1268 ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); 1268 ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
1269 ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); 1269 ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
1270 ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE); 1270 ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE);
1271#ifdef CONFIG_INFINIBAND_IPOIB_CM
1272 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
1273#endif
1271 1274
1272 ret = ipoib_register_debugfs(); 1275 ret = ipoib_register_debugfs();
1273 if (ret) 1276 if (ret)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index b6848a8d35db..433e99ac227b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -172,8 +172,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
172 172
173 size = ipoib_sendq_size + ipoib_recvq_size + 1; 173 size = ipoib_sendq_size + ipoib_recvq_size + 1;
174 ret = ipoib_cm_dev_init(dev); 174 ret = ipoib_cm_dev_init(dev);
175 if (!ret) 175 if (!ret) {
176 size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */; 176 if (ipoib_cm_has_srq(dev))
177 size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
178 else
179 size += ipoib_recvq_size * ipoib_max_conn_qp;
180 }
177 181
178 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); 182 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
179 if (IS_ERR(priv->cq)) { 183 if (IS_ERR(priv->cq)) {