aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Cohen <eli@dev.mellanox.co.il>2008-04-29 16:46:53 -0400
committerRoland Dreier <rolandd@cisco.com>2008-04-29 16:46:53 -0400
commitf56bcd8013566d4ad4759ae5fc85a6660e4655c7 (patch)
tree58b7e23f81caf5e6d8ada5819170f0bfb783d7e3
parent87528227dfa8776d12779d073c217f0835fd6d20 (diff)
IPoIB: Use separate CQ for UD send completions
Use a dedicated CQ for UD send completions. Also, do not arm the UD send CQ, which reduces the number of interrupts generated. This patch farther reduces overhead by not calling poll CQ for every posted send WR -- it does polls only when there 16 or more outstanding work requests. Signed-off-by: Eli Cohen <eli@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c45
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c39
6 files changed, 64 insertions, 40 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index f1f142dc64b1..9044f8803532 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -95,6 +95,8 @@ enum {
95 IPOIB_MCAST_FLAG_SENDONLY = 1, 95 IPOIB_MCAST_FLAG_SENDONLY = 1,
96 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ 96 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
97 IPOIB_MCAST_FLAG_ATTACHED = 3, 97 IPOIB_MCAST_FLAG_ATTACHED = 3,
98
99 MAX_SEND_CQE = 16,
98}; 100};
99 101
100#define IPOIB_OP_RECV (1ul << 31) 102#define IPOIB_OP_RECV (1ul << 31)
@@ -285,7 +287,8 @@ struct ipoib_dev_priv {
285 u16 pkey_index; 287 u16 pkey_index;
286 struct ib_pd *pd; 288 struct ib_pd *pd;
287 struct ib_mr *mr; 289 struct ib_mr *mr;
288 struct ib_cq *cq; 290 struct ib_cq *recv_cq;
291 struct ib_cq *send_cq;
289 struct ib_qp *qp; 292 struct ib_qp *qp;
290 u32 qkey; 293 u32 qkey;
291 294
@@ -305,6 +308,7 @@ struct ipoib_dev_priv {
305 struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; 308 struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
306 struct ib_send_wr tx_wr; 309 struct ib_send_wr tx_wr;
307 unsigned tx_outstanding; 310 unsigned tx_outstanding;
311 struct ib_wc send_wc[MAX_SEND_CQE];
308 312
309 struct ib_recv_wr rx_wr; 313 struct ib_recv_wr rx_wr;
310 struct ib_sge rx_sge[IPOIB_UD_RX_SG]; 314 struct ib_sge rx_sge[IPOIB_UD_RX_SG];
@@ -662,7 +666,6 @@ static inline int ipoib_register_debugfs(void) { return 0; }
662static inline void ipoib_unregister_debugfs(void) { } 666static inline void ipoib_unregister_debugfs(void) { }
663#endif 667#endif
664 668
665
666#define ipoib_printk(level, priv, format, arg...) \ 669#define ipoib_printk(level, priv, format, arg...) \
667 printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg) 670 printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
668#define ipoib_warn(priv, format, arg...) \ 671#define ipoib_warn(priv, format, arg...) \
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 9db7b0bd9134..97e67d36378f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -249,8 +249,8 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
249 struct ipoib_dev_priv *priv = netdev_priv(dev); 249 struct ipoib_dev_priv *priv = netdev_priv(dev);
250 struct ib_qp_init_attr attr = { 250 struct ib_qp_init_attr attr = {
251 .event_handler = ipoib_cm_rx_event_handler, 251 .event_handler = ipoib_cm_rx_event_handler,
252 .send_cq = priv->cq, /* For drain WR */ 252 .send_cq = priv->recv_cq, /* For drain WR */
253 .recv_cq = priv->cq, 253 .recv_cq = priv->recv_cq,
254 .srq = priv->cm.srq, 254 .srq = priv->cm.srq,
255 .cap.max_send_wr = 1, /* For drain WR */ 255 .cap.max_send_wr = 1, /* For drain WR */
256 .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ 256 .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
@@ -951,8 +951,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
951{ 951{
952 struct ipoib_dev_priv *priv = netdev_priv(dev); 952 struct ipoib_dev_priv *priv = netdev_priv(dev);
953 struct ib_qp_init_attr attr = { 953 struct ib_qp_init_attr attr = {
954 .send_cq = priv->cq, 954 .send_cq = priv->recv_cq,
955 .recv_cq = priv->cq, 955 .recv_cq = priv->recv_cq,
956 .srq = priv->cm.srq, 956 .srq = priv->cm.srq,
957 .cap.max_send_wr = ipoib_sendq_size, 957 .cap.max_send_wr = ipoib_sendq_size,
958 .cap.max_send_sge = 1, 958 .cap.max_send_sge = 1,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 9a47428366c9..10279b79c44d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -71,7 +71,7 @@ static int ipoib_set_coalesce(struct net_device *dev,
71 coal->rx_max_coalesced_frames > 0xffff) 71 coal->rx_max_coalesced_frames > 0xffff)
72 return -EINVAL; 72 return -EINVAL;
73 73
74 ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames, 74 ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames,
75 coal->rx_coalesce_usecs); 75 coal->rx_coalesce_usecs);
76 if (ret && ret != -ENOSYS) { 76 if (ret && ret != -ENOSYS) {
77 ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); 77 ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 7cf1fa7074ab..97b815c1a3fc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -364,7 +364,6 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
364 struct ipoib_dev_priv *priv = netdev_priv(dev); 364 struct ipoib_dev_priv *priv = netdev_priv(dev);
365 unsigned int wr_id = wc->wr_id; 365 unsigned int wr_id = wc->wr_id;
366 struct ipoib_tx_buf *tx_req; 366 struct ipoib_tx_buf *tx_req;
367 unsigned long flags;
368 367
369 ipoib_dbg_data(priv, "send completion: id %d, status: %d\n", 368 ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
370 wr_id, wc->status); 369 wr_id, wc->status);
@@ -384,13 +383,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
384 383
385 dev_kfree_skb_any(tx_req->skb); 384 dev_kfree_skb_any(tx_req->skb);
386 385
387 spin_lock_irqsave(&priv->tx_lock, flags);
388 ++priv->tx_tail; 386 ++priv->tx_tail;
389 if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && 387 if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
390 netif_queue_stopped(dev) && 388 netif_queue_stopped(dev) &&
391 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 389 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
392 netif_wake_queue(dev); 390 netif_wake_queue(dev);
393 spin_unlock_irqrestore(&priv->tx_lock, flags);
394 391
395 if (wc->status != IB_WC_SUCCESS && 392 if (wc->status != IB_WC_SUCCESS &&
396 wc->status != IB_WC_WR_FLUSH_ERR) 393 wc->status != IB_WC_WR_FLUSH_ERR)
@@ -399,6 +396,17 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
399 wc->status, wr_id, wc->vendor_err); 396 wc->status, wr_id, wc->vendor_err);
400} 397}
401 398
399static int poll_tx(struct ipoib_dev_priv *priv)
400{
401 int n, i;
402
403 n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
404 for (i = 0; i < n; ++i)
405 ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i);
406
407 return n == MAX_SEND_CQE;
408}
409
402int ipoib_poll(struct napi_struct *napi, int budget) 410int ipoib_poll(struct napi_struct *napi, int budget)
403{ 411{
404 struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi); 412 struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
@@ -414,7 +422,7 @@ poll_more:
414 int max = (budget - done); 422 int max = (budget - done);
415 423
416 t = min(IPOIB_NUM_WC, max); 424 t = min(IPOIB_NUM_WC, max);
417 n = ib_poll_cq(priv->cq, t, priv->ibwc); 425 n = ib_poll_cq(priv->recv_cq, t, priv->ibwc);
418 426
419 for (i = 0; i < n; i++) { 427 for (i = 0; i < n; i++) {
420 struct ib_wc *wc = priv->ibwc + i; 428 struct ib_wc *wc = priv->ibwc + i;
@@ -425,12 +433,8 @@ poll_more:
425 ipoib_cm_handle_rx_wc(dev, wc); 433 ipoib_cm_handle_rx_wc(dev, wc);
426 else 434 else
427 ipoib_ib_handle_rx_wc(dev, wc); 435 ipoib_ib_handle_rx_wc(dev, wc);
428 } else { 436 } else
429 if (wc->wr_id & IPOIB_OP_CM) 437 ipoib_cm_handle_tx_wc(priv->dev, wc);
430 ipoib_cm_handle_tx_wc(dev, wc);
431 else
432 ipoib_ib_handle_tx_wc(dev, wc);
433 }
434 } 438 }
435 439
436 if (n != t) 440 if (n != t)
@@ -439,7 +443,7 @@ poll_more:
439 443
440 if (done < budget) { 444 if (done < budget) {
441 netif_rx_complete(dev, napi); 445 netif_rx_complete(dev, napi);
442 if (unlikely(ib_req_notify_cq(priv->cq, 446 if (unlikely(ib_req_notify_cq(priv->recv_cq,
443 IB_CQ_NEXT_COMP | 447 IB_CQ_NEXT_COMP |
444 IB_CQ_REPORT_MISSED_EVENTS)) && 448 IB_CQ_REPORT_MISSED_EVENTS)) &&
445 netif_rx_reschedule(dev, napi)) 449 netif_rx_reschedule(dev, napi))
@@ -562,12 +566,16 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
562 566
563 address->last_send = priv->tx_head; 567 address->last_send = priv->tx_head;
564 ++priv->tx_head; 568 ++priv->tx_head;
569 skb_orphan(skb);
565 570
566 if (++priv->tx_outstanding == ipoib_sendq_size) { 571 if (++priv->tx_outstanding == ipoib_sendq_size) {
567 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); 572 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
568 netif_stop_queue(dev); 573 netif_stop_queue(dev);
569 } 574 }
570 } 575 }
576
577 if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
578 poll_tx(priv);
571} 579}
572 580
573static void __ipoib_reap_ah(struct net_device *dev) 581static void __ipoib_reap_ah(struct net_device *dev)
@@ -714,7 +722,7 @@ void ipoib_drain_cq(struct net_device *dev)
714 struct ipoib_dev_priv *priv = netdev_priv(dev); 722 struct ipoib_dev_priv *priv = netdev_priv(dev);
715 int i, n; 723 int i, n;
716 do { 724 do {
717 n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc); 725 n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
718 for (i = 0; i < n; ++i) { 726 for (i = 0; i < n; ++i) {
719 /* 727 /*
720 * Convert any successful completions to flush 728 * Convert any successful completions to flush
@@ -729,14 +737,13 @@ void ipoib_drain_cq(struct net_device *dev)
729 ipoib_cm_handle_rx_wc(dev, priv->ibwc + i); 737 ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
730 else 738 else
731 ipoib_ib_handle_rx_wc(dev, priv->ibwc + i); 739 ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
732 } else { 740 } else
733 if (priv->ibwc[i].wr_id & IPOIB_OP_CM) 741 ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
734 ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
735 else
736 ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
737 }
738 } 742 }
739 } while (n == IPOIB_NUM_WC); 743 } while (n == IPOIB_NUM_WC);
744
745 while (poll_tx(priv))
746 ; /* nothing */
740} 747}
741 748
742int ipoib_ib_dev_stop(struct net_device *dev, int flush) 749int ipoib_ib_dev_stop(struct net_device *dev, int flush)
@@ -826,7 +833,7 @@ timeout:
826 msleep(1); 833 msleep(1);
827 } 834 }
828 835
829 ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP); 836 ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
830 837
831 return 0; 838 return 0;
832} 839}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 7a4ed9d3d844..2442090ac8d1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1298,7 +1298,8 @@ static int __init ipoib_init_module(void)
1298 1298
1299 ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); 1299 ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
1300 ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); 1300 ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
1301 ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE); 1301 ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE,
1302 IPOIB_MIN_QUEUE_SIZE));
1302#ifdef CONFIG_INFINIBAND_IPOIB_CM 1303#ifdef CONFIG_INFINIBAND_IPOIB_CM
1303 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); 1304 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
1304#endif 1305#endif
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 07c03f178a49..c1e7ece1fd44 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -171,26 +171,33 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
171 goto out_free_pd; 171 goto out_free_pd;
172 } 172 }
173 173
174 size = ipoib_sendq_size + ipoib_recvq_size + 1; 174 size = ipoib_recvq_size + 1;
175 ret = ipoib_cm_dev_init(dev); 175 ret = ipoib_cm_dev_init(dev);
176 if (!ret) { 176 if (!ret) {
177 size += ipoib_sendq_size;
177 if (ipoib_cm_has_srq(dev)) 178 if (ipoib_cm_has_srq(dev))
178 size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */ 179 size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
179 else 180 else
180 size += ipoib_recvq_size * ipoib_max_conn_qp; 181 size += ipoib_recvq_size * ipoib_max_conn_qp;
181 } 182 }
182 183
183 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); 184 priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
184 if (IS_ERR(priv->cq)) { 185 if (IS_ERR(priv->recv_cq)) {
185 printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); 186 printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
186 goto out_free_mr; 187 goto out_free_mr;
187 } 188 }
188 189
189 if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) 190 priv->send_cq = ib_create_cq(priv->ca, NULL, NULL, dev, ipoib_sendq_size, 0);
190 goto out_free_cq; 191 if (IS_ERR(priv->send_cq)) {
192 printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
193 goto out_free_recv_cq;
194 }
195
196 if (ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP))
197 goto out_free_send_cq;
191 198
192 init_attr.send_cq = priv->cq; 199 init_attr.send_cq = priv->send_cq;
193 init_attr.recv_cq = priv->cq; 200 init_attr.recv_cq = priv->recv_cq;
194 201
195 if (priv->hca_caps & IB_DEVICE_UD_TSO) 202 if (priv->hca_caps & IB_DEVICE_UD_TSO)
196 init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO; 203 init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO;
@@ -201,7 +208,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
201 priv->qp = ib_create_qp(priv->pd, &init_attr); 208 priv->qp = ib_create_qp(priv->pd, &init_attr);
202 if (IS_ERR(priv->qp)) { 209 if (IS_ERR(priv->qp)) {
203 printk(KERN_WARNING "%s: failed to create QP\n", ca->name); 210 printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
204 goto out_free_cq; 211 goto out_free_send_cq;
205 } 212 }
206 213
207 priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; 214 priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
@@ -230,8 +237,11 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
230 237
231 return 0; 238 return 0;
232 239
233out_free_cq: 240out_free_send_cq:
234 ib_destroy_cq(priv->cq); 241 ib_destroy_cq(priv->send_cq);
242
243out_free_recv_cq:
244 ib_destroy_cq(priv->recv_cq);
235 245
236out_free_mr: 246out_free_mr:
237 ib_dereg_mr(priv->mr); 247 ib_dereg_mr(priv->mr);
@@ -254,8 +264,11 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
254 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 264 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
255 } 265 }
256 266
257 if (ib_destroy_cq(priv->cq)) 267 if (ib_destroy_cq(priv->send_cq))
258 ipoib_warn(priv, "ib_cq_destroy failed\n"); 268 ipoib_warn(priv, "ib_cq_destroy (send) failed\n");
269
270 if (ib_destroy_cq(priv->recv_cq))
271 ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
259 272
260 ipoib_cm_dev_cleanup(dev); 273 ipoib_cm_dev_cleanup(dev);
261 274