aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorVladimir Sokolovsky <vlad@mellanox.co.il>2008-07-15 02:48:48 -0400
committerRoland Dreier <rolandd@cisco.com>2008-07-15 02:48:48 -0400
commitaf40da894e96d5c826d38be3ea53ee00d9de0367 (patch)
tree15b8e850d4227ec04289e5bc67d1838f231c8d9f /drivers/infiniband
parent12406734051a26e9fe4c8568e931dfddbb72d431 (diff)
IPoIB: add LRO support
Add "ipoib_use_lro" module parameter to enable LRO and an "ipoib_lro_max_aggr" module parameter to set the max number of packets to be aggregated. Make LRO controllable and LRO statistics accessible through ethtool. Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.co.il> Signed-off-by: Eli Cohen <eli@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h11
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c46
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c62
5 files changed, 127 insertions, 1 deletions
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index 1f76bad020f3..691525cf394a 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -1,6 +1,7 @@
1config INFINIBAND_IPOIB 1config INFINIBAND_IPOIB
2 tristate "IP-over-InfiniBand" 2 tristate "IP-over-InfiniBand"
3 depends on NETDEVICES && INET && (IPV6 || IPV6=n) 3 depends on NETDEVICES && INET && (IPV6 || IPV6=n)
4 select INET_LRO
4 ---help--- 5 ---help---
5 Support for the IP-over-InfiniBand protocol (IPoIB). This 6 Support for the IP-over-InfiniBand protocol (IPoIB). This
6 transports IP packets over InfiniBand so you can use your IB 7 transports IP packets over InfiniBand so you can use your IB
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 8754b364f229..2c522572e3c5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -50,6 +50,7 @@
50#include <rdma/ib_verbs.h> 50#include <rdma/ib_verbs.h>
51#include <rdma/ib_pack.h> 51#include <rdma/ib_pack.h>
52#include <rdma/ib_sa.h> 52#include <rdma/ib_sa.h>
53#include <linux/inet_lro.h>
53 54
54/* constants */ 55/* constants */
55 56
@@ -94,6 +95,9 @@ enum {
94 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ 95 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
95 IPOIB_MCAST_FLAG_ATTACHED = 3, 96 IPOIB_MCAST_FLAG_ATTACHED = 3,
96 97
98 IPOIB_MAX_LRO_DESCRIPTORS = 8,
99 IPOIB_LRO_MAX_AGGR = 64,
100
97 MAX_SEND_CQE = 16, 101 MAX_SEND_CQE = 16,
98 IPOIB_CM_COPYBREAK = 256, 102 IPOIB_CM_COPYBREAK = 256,
99}; 103};
@@ -248,6 +252,11 @@ struct ipoib_ethtool_st {
248 u16 max_coalesced_frames; 252 u16 max_coalesced_frames;
249}; 253};
250 254
255struct ipoib_lro {
256 struct net_lro_mgr lro_mgr;
257 struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS];
258};
259
251/* 260/*
252 * Device private locking: tx_lock protects members used in TX fast 261 * Device private locking: tx_lock protects members used in TX fast
253 * path (and we use LLTX so upper layers don't do extra locking). 262 * path (and we use LLTX so upper layers don't do extra locking).
@@ -334,6 +343,8 @@ struct ipoib_dev_priv {
334 int hca_caps; 343 int hca_caps;
335 struct ipoib_ethtool_st ethtool; 344 struct ipoib_ethtool_st ethtool;
336 struct timer_list poll_timer; 345 struct timer_list poll_timer;
346
347 struct ipoib_lro lro;
337}; 348};
338 349
339struct ipoib_ah { 350struct ipoib_ah {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 10279b79c44d..66af5c1a76e5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -86,11 +86,57 @@ static int ipoib_set_coalesce(struct net_device *dev,
86 return 0; 86 return 0;
87} 87}
88 88
89static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = {
90 "LRO aggregated", "LRO flushed",
91 "LRO avg aggr", "LRO no desc"
92};
93
94static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
95{
96 switch (stringset) {
97 case ETH_SS_STATS:
98 memcpy(data, *ipoib_stats_keys, sizeof(ipoib_stats_keys));
99 break;
100 }
101}
102
103static int ipoib_get_sset_count(struct net_device *dev, int sset)
104{
105 switch (sset) {
106 case ETH_SS_STATS:
107 return ARRAY_SIZE(ipoib_stats_keys);
108 default:
109 return -EOPNOTSUPP;
110 }
111}
112
113static void ipoib_get_ethtool_stats(struct net_device *dev,
114 struct ethtool_stats *stats, uint64_t *data)
115{
116 struct ipoib_dev_priv *priv = netdev_priv(dev);
117 int index = 0;
118
119 /* Get LRO statistics */
120 data[index++] = priv->lro.lro_mgr.stats.aggregated;
121 data[index++] = priv->lro.lro_mgr.stats.flushed;
122 if (priv->lro.lro_mgr.stats.flushed)
123 data[index++] = priv->lro.lro_mgr.stats.aggregated /
124 priv->lro.lro_mgr.stats.flushed;
125 else
126 data[index++] = 0;
127 data[index++] = priv->lro.lro_mgr.stats.no_desc;
128}
129
89static const struct ethtool_ops ipoib_ethtool_ops = { 130static const struct ethtool_ops ipoib_ethtool_ops = {
90 .get_drvinfo = ipoib_get_drvinfo, 131 .get_drvinfo = ipoib_get_drvinfo,
91 .get_tso = ethtool_op_get_tso, 132 .get_tso = ethtool_op_get_tso,
92 .get_coalesce = ipoib_get_coalesce, 133 .get_coalesce = ipoib_get_coalesce,
93 .set_coalesce = ipoib_set_coalesce, 134 .set_coalesce = ipoib_set_coalesce,
135 .get_flags = ethtool_op_get_flags,
136 .set_flags = ethtool_op_set_flags,
137 .get_strings = ipoib_get_strings,
138 .get_sset_count = ipoib_get_sset_count,
139 .get_ethtool_stats = ipoib_get_ethtool_stats,
94}; 140};
95 141
96void ipoib_set_ethtool_ops(struct net_device *dev) 142void ipoib_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index eca8518d79a0..5d50e5261eed 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -288,7 +288,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
288 if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) 288 if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
289 skb->ip_summed = CHECKSUM_UNNECESSARY; 289 skb->ip_summed = CHECKSUM_UNNECESSARY;
290 290
291 netif_receive_skb(skb); 291 if (dev->features & NETIF_F_LRO)
292 lro_receive_skb(&priv->lro.lro_mgr, skb, NULL);
293 else
294 netif_receive_skb(skb);
292 295
293repost: 296repost:
294 if (unlikely(ipoib_ib_post_receive(dev, wr_id))) 297 if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
@@ -440,6 +443,9 @@ poll_more:
440 } 443 }
441 444
442 if (done < budget) { 445 if (done < budget) {
446 if (dev->features & NETIF_F_LRO)
447 lro_flush_all(&priv->lro.lro_mgr);
448
443 netif_rx_complete(dev, napi); 449 netif_rx_complete(dev, napi);
444 if (unlikely(ib_req_notify_cq(priv->recv_cq, 450 if (unlikely(ib_req_notify_cq(priv->recv_cq,
445 IB_CQ_NEXT_COMP | 451 IB_CQ_NEXT_COMP |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index bfe1dbf99207..fead88f7fb17 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -60,6 +60,15 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
60module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); 60module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
61MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); 61MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
62 62
63static int lro;
64module_param(lro, bool, 0444);
65MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)");
66
67static int lro_max_aggr = IPOIB_LRO_MAX_AGGR;
68module_param(lro_max_aggr, int, 0644);
69MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated "
70 "(default = 64)");
71
63#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 72#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
64int ipoib_debug_level; 73int ipoib_debug_level;
65 74
@@ -936,6 +945,54 @@ static const struct header_ops ipoib_header_ops = {
936 .create = ipoib_hard_header, 945 .create = ipoib_hard_header,
937}; 946};
938 947
948static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
949 void **tcph, u64 *hdr_flags, void *priv)
950{
951 unsigned int ip_len;
952 struct iphdr *iph;
953
954 if (unlikely(skb->protocol != htons(ETH_P_IP)))
955 return -1;
956
957 /*
958 * In the future we may add an else clause that verifies the
959 * checksum and allows devices which do not calculate checksum
960 * to use LRO.
961 */
962 if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY))
963 return -1;
964
965 /* Check for non-TCP packet */
966 skb_reset_network_header(skb);
967 iph = ip_hdr(skb);
968 if (iph->protocol != IPPROTO_TCP)
969 return -1;
970
971 ip_len = ip_hdrlen(skb);
972 skb_set_transport_header(skb, ip_len);
973 *tcph = tcp_hdr(skb);
974
975 /* check if IP header and TCP header are complete */
976 if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
977 return -1;
978
979 *hdr_flags = LRO_IPV4 | LRO_TCP;
980 *iphdr = iph;
981
982 return 0;
983}
984
985static void ipoib_lro_setup(struct ipoib_dev_priv *priv)
986{
987 priv->lro.lro_mgr.max_aggr = lro_max_aggr;
988 priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS;
989 priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc;
990 priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
991 priv->lro.lro_mgr.features = LRO_F_NAPI;
992 priv->lro.lro_mgr.dev = priv->dev;
993 priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
994}
995
939static void ipoib_setup(struct net_device *dev) 996static void ipoib_setup(struct net_device *dev)
940{ 997{
941 struct ipoib_dev_priv *priv = netdev_priv(dev); 998 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -975,6 +1032,8 @@ static void ipoib_setup(struct net_device *dev)
975 1032
976 priv->dev = dev; 1033 priv->dev = dev;
977 1034
1035 ipoib_lro_setup(priv);
1036
978 spin_lock_init(&priv->lock); 1037 spin_lock_init(&priv->lock);
979 spin_lock_init(&priv->tx_lock); 1038 spin_lock_init(&priv->tx_lock);
980 1039
@@ -1152,6 +1211,9 @@ static struct net_device *ipoib_add_port(const char *format,
1152 priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; 1211 priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
1153 } 1212 }
1154 1213
1214 if (lro)
1215 priv->dev->features |= NETIF_F_LRO;
1216
1155 /* 1217 /*
1156 * Set the full membership bit, so that we join the right 1218 * Set the full membership bit, so that we join the right
1157 * broadcast group, etc. 1219 * broadcast group, etc.