aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorEli Cohen <eli@dev.mellanox.co.il>2008-07-15 02:48:44 -0400
committerRoland Dreier <rolandd@cisco.com>2008-07-15 02:48:44 -0400
commitf89271da32bc1a636cf4eb078e615930886cd013 (patch)
tree7155618205af963d991f72766d1155702439f124 /drivers
parentf3781d2e89f12dd5afa046dc56032af6e39bd116 (diff)
IPoIB: Copy small received SKBs in connected mode
The connected mode implementation in the IPoIB driver has a large overhead in the way SKBs are handled in the receive flow. It usually allocates an SKB with as big as was used in the currently received SKB and moves unused fragments from the old SKB to the new one. This involves a loop on all the remaining fragments and incurs overhead on the CPU. This patch, for small SKBs, allocates an SKB just large enough to contain the received data and copies to it the data from the received SKB. The newly allocated SKB is passed to the stack and the old SKB is reposted. When running netperf, UDP small messages, without this pach I get: UDP UNIDIRECTIONAL SEND TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 14.4.3.178 (14.4.3.178) port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 114688 128 10.00 5142034 0 526.31 114688 10.00 1130489 115.71 With this patch I get both send and receive at ~315 mbps. The reason that send performance actually slows down is as follows: When using this patch, the overhead of the CPU for handling RX packets is dramatically reduced. As a result, we do not experience RNR NAK messages from the receiver which cause the connection to be closed and reopened again; when the patch is not used, the receiver cannot handle the packets fast enough so there is less time to post new buffers and hence the mentioned RNR NACKs. So what happens is that the application *thinks* it posted a certain number of packets for transmission but these packets are flushed and do not really get transmitted. Since the connection gets opened and closed many times, each time netperf gets the CPU time that otherwise would have been given to IPoIB to actually transmit the packets. This can be verified when looking at the port counters -- the output of ifconfig and the oputput of netperf (this is for the case without the patch): tx packets ========== port counter: 1,543,996 ifconfig: 1,581,426 netperf: 5,142,034 rx packets ========== netperf 1,1304,089 Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c19
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c6
3 files changed, 26 insertions, 0 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 0dcbab3203c9..8754b364f229 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -95,6 +95,7 @@ enum {
95 IPOIB_MCAST_FLAG_ATTACHED = 3, 95 IPOIB_MCAST_FLAG_ATTACHED = 3,
96 96
97 MAX_SEND_CQE = 16, 97 MAX_SEND_CQE = 16,
98 IPOIB_CM_COPYBREAK = 256,
98}; 99};
99 100
100#define IPOIB_OP_RECV (1ul << 31) 101#define IPOIB_OP_RECV (1ul << 31)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 91c959299910..6223fc39af70 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -523,6 +523,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
523 u64 mapping[IPOIB_CM_RX_SG]; 523 u64 mapping[IPOIB_CM_RX_SG];
524 int frags; 524 int frags;
525 int has_srq; 525 int has_srq;
526 struct sk_buff *small_skb;
526 527
527 ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", 528 ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
528 wr_id, wc->status); 529 wr_id, wc->status);
@@ -577,6 +578,23 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
577 } 578 }
578 } 579 }
579 580
581 if (wc->byte_len < IPOIB_CM_COPYBREAK) {
582 int dlen = wc->byte_len;
583
584 small_skb = dev_alloc_skb(dlen + 12);
585 if (small_skb) {
586 skb_reserve(small_skb, 12);
587 ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0],
588 dlen, DMA_FROM_DEVICE);
589 skb_copy_from_linear_data(skb, small_skb->data, dlen);
590 ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0],
591 dlen, DMA_FROM_DEVICE);
592 skb_put(small_skb, dlen);
593 skb = small_skb;
594 goto copied;
595 }
596 }
597
580 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, 598 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
581 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; 599 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
582 600
@@ -599,6 +617,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
599 617
600 skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb); 618 skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
601 619
620copied:
602 skb->protocol = ((struct ipoib_header *) skb->data)->proto; 621 skb->protocol = ((struct ipoib_header *) skb->data)->proto;
603 skb_reset_mac_header(skb); 622 skb_reset_mac_header(skb);
604 skb_pull(skb, IPOIB_ENCAP_LEN); 623 skb_pull(skb, IPOIB_ENCAP_LEN);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index f217b1edd0ac..bfe1dbf99207 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1302,6 +1302,12 @@ static int __init ipoib_init_module(void)
1302 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); 1302 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
1303#endif 1303#endif
1304 1304
1305 /*
1306 * When copying small received packets, we only copy from the
1307 * linear data part of the SKB, so we rely on this condition.
1308 */
1309 BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE);
1310
1305 ret = ipoib_register_debugfs(); 1311 ret = ipoib_register_debugfs();
1306 if (ret) 1312 if (ret)
1307 return ret; 1313 return ret;