diff options
author | Vladimir Sokolovsky <vlad@mellanox.co.il> | 2008-07-15 02:48:48 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2008-07-15 02:48:48 -0400 |
commit | af40da894e96d5c826d38be3ea53ee00d9de0367 (patch) | |
tree | 15b8e850d4227ec04289e5bc67d1838f231c8d9f /drivers/infiniband | |
parent | 12406734051a26e9fe4c8568e931dfddbb72d431 (diff) |
IPoIB: add LRO support
Add "ipoib_use_lro" module parameter to enable LRO and an
"ipoib_lro_max_aggr" module parameter to set the max number of packets
to be aggregated. Make LRO controllable and LRO statistics accessible
through ethtool.
Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.co.il>
Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/Kconfig | 1 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 11 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 46 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ib.c | 8 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 62 |
5 files changed, 127 insertions, 1 deletions
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 1f76bad020f3..691525cf394a 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig | |||
@@ -1,6 +1,7 @@ | |||
1 | config INFINIBAND_IPOIB | 1 | config INFINIBAND_IPOIB |
2 | tristate "IP-over-InfiniBand" | 2 | tristate "IP-over-InfiniBand" |
3 | depends on NETDEVICES && INET && (IPV6 || IPV6=n) | 3 | depends on NETDEVICES && INET && (IPV6 || IPV6=n) |
4 | select INET_LRO | ||
4 | ---help--- | 5 | ---help--- |
5 | Support for the IP-over-InfiniBand protocol (IPoIB). This | 6 | Support for the IP-over-InfiniBand protocol (IPoIB). This |
6 | transports IP packets over InfiniBand so you can use your IB | 7 | transports IP packets over InfiniBand so you can use your IB |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 8754b364f229..2c522572e3c5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <rdma/ib_verbs.h> | 50 | #include <rdma/ib_verbs.h> |
51 | #include <rdma/ib_pack.h> | 51 | #include <rdma/ib_pack.h> |
52 | #include <rdma/ib_sa.h> | 52 | #include <rdma/ib_sa.h> |
53 | #include <linux/inet_lro.h> | ||
53 | 54 | ||
54 | /* constants */ | 55 | /* constants */ |
55 | 56 | ||
@@ -94,6 +95,9 @@ enum { | |||
94 | IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ | 95 | IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ |
95 | IPOIB_MCAST_FLAG_ATTACHED = 3, | 96 | IPOIB_MCAST_FLAG_ATTACHED = 3, |
96 | 97 | ||
98 | IPOIB_MAX_LRO_DESCRIPTORS = 8, | ||
99 | IPOIB_LRO_MAX_AGGR = 64, | ||
100 | |||
97 | MAX_SEND_CQE = 16, | 101 | MAX_SEND_CQE = 16, |
98 | IPOIB_CM_COPYBREAK = 256, | 102 | IPOIB_CM_COPYBREAK = 256, |
99 | }; | 103 | }; |
@@ -248,6 +252,11 @@ struct ipoib_ethtool_st { | |||
248 | u16 max_coalesced_frames; | 252 | u16 max_coalesced_frames; |
249 | }; | 253 | }; |
250 | 254 | ||
255 | struct ipoib_lro { | ||
256 | struct net_lro_mgr lro_mgr; | ||
257 | struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS]; | ||
258 | }; | ||
259 | |||
251 | /* | 260 | /* |
252 | * Device private locking: tx_lock protects members used in TX fast | 261 | * Device private locking: tx_lock protects members used in TX fast |
253 | * path (and we use LLTX so upper layers don't do extra locking). | 262 | * path (and we use LLTX so upper layers don't do extra locking). |
@@ -334,6 +343,8 @@ struct ipoib_dev_priv { | |||
334 | int hca_caps; | 343 | int hca_caps; |
335 | struct ipoib_ethtool_st ethtool; | 344 | struct ipoib_ethtool_st ethtool; |
336 | struct timer_list poll_timer; | 345 | struct timer_list poll_timer; |
346 | |||
347 | struct ipoib_lro lro; | ||
337 | }; | 348 | }; |
338 | 349 | ||
339 | struct ipoib_ah { | 350 | struct ipoib_ah { |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 10279b79c44d..66af5c1a76e5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | |||
@@ -86,11 +86,57 @@ static int ipoib_set_coalesce(struct net_device *dev, | |||
86 | return 0; | 86 | return 0; |
87 | } | 87 | } |
88 | 88 | ||
89 | static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = { | ||
90 | "LRO aggregated", "LRO flushed", | ||
91 | "LRO avg aggr", "LRO no desc" | ||
92 | }; | ||
93 | |||
94 | static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data) | ||
95 | { | ||
96 | switch (stringset) { | ||
97 | case ETH_SS_STATS: | ||
98 | memcpy(data, *ipoib_stats_keys, sizeof(ipoib_stats_keys)); | ||
99 | break; | ||
100 | } | ||
101 | } | ||
102 | |||
103 | static int ipoib_get_sset_count(struct net_device *dev, int sset) | ||
104 | { | ||
105 | switch (sset) { | ||
106 | case ETH_SS_STATS: | ||
107 | return ARRAY_SIZE(ipoib_stats_keys); | ||
108 | default: | ||
109 | return -EOPNOTSUPP; | ||
110 | } | ||
111 | } | ||
112 | |||
113 | static void ipoib_get_ethtool_stats(struct net_device *dev, | ||
114 | struct ethtool_stats *stats, uint64_t *data) | ||
115 | { | ||
116 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
117 | int index = 0; | ||
118 | |||
119 | /* Get LRO statistics */ | ||
120 | data[index++] = priv->lro.lro_mgr.stats.aggregated; | ||
121 | data[index++] = priv->lro.lro_mgr.stats.flushed; | ||
122 | if (priv->lro.lro_mgr.stats.flushed) | ||
123 | data[index++] = priv->lro.lro_mgr.stats.aggregated / | ||
124 | priv->lro.lro_mgr.stats.flushed; | ||
125 | else | ||
126 | data[index++] = 0; | ||
127 | data[index++] = priv->lro.lro_mgr.stats.no_desc; | ||
128 | } | ||
129 | |||
89 | static const struct ethtool_ops ipoib_ethtool_ops = { | 130 | static const struct ethtool_ops ipoib_ethtool_ops = { |
90 | .get_drvinfo = ipoib_get_drvinfo, | 131 | .get_drvinfo = ipoib_get_drvinfo, |
91 | .get_tso = ethtool_op_get_tso, | 132 | .get_tso = ethtool_op_get_tso, |
92 | .get_coalesce = ipoib_get_coalesce, | 133 | .get_coalesce = ipoib_get_coalesce, |
93 | .set_coalesce = ipoib_set_coalesce, | 134 | .set_coalesce = ipoib_set_coalesce, |
135 | .get_flags = ethtool_op_get_flags, | ||
136 | .set_flags = ethtool_op_set_flags, | ||
137 | .get_strings = ipoib_get_strings, | ||
138 | .get_sset_count = ipoib_get_sset_count, | ||
139 | .get_ethtool_stats = ipoib_get_ethtool_stats, | ||
94 | }; | 140 | }; |
95 | 141 | ||
96 | void ipoib_set_ethtool_ops(struct net_device *dev) | 142 | void ipoib_set_ethtool_ops(struct net_device *dev) |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index eca8518d79a0..5d50e5261eed 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c | |||
@@ -288,7 +288,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) | |||
288 | if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) | 288 | if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) |
289 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 289 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
290 | 290 | ||
291 | netif_receive_skb(skb); | 291 | if (dev->features & NETIF_F_LRO) |
292 | lro_receive_skb(&priv->lro.lro_mgr, skb, NULL); | ||
293 | else | ||
294 | netif_receive_skb(skb); | ||
292 | 295 | ||
293 | repost: | 296 | repost: |
294 | if (unlikely(ipoib_ib_post_receive(dev, wr_id))) | 297 | if (unlikely(ipoib_ib_post_receive(dev, wr_id))) |
@@ -440,6 +443,9 @@ poll_more: | |||
440 | } | 443 | } |
441 | 444 | ||
442 | if (done < budget) { | 445 | if (done < budget) { |
446 | if (dev->features & NETIF_F_LRO) | ||
447 | lro_flush_all(&priv->lro.lro_mgr); | ||
448 | |||
443 | netif_rx_complete(dev, napi); | 449 | netif_rx_complete(dev, napi); |
444 | if (unlikely(ib_req_notify_cq(priv->recv_cq, | 450 | if (unlikely(ib_req_notify_cq(priv->recv_cq, |
445 | IB_CQ_NEXT_COMP | | 451 | IB_CQ_NEXT_COMP | |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index bfe1dbf99207..fead88f7fb17 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c | |||
@@ -60,6 +60,15 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); | |||
60 | module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); | 60 | module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); |
61 | MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); | 61 | MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); |
62 | 62 | ||
63 | static int lro; | ||
64 | module_param(lro, bool, 0444); | ||
65 | MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)"); | ||
66 | |||
67 | static int lro_max_aggr = IPOIB_LRO_MAX_AGGR; | ||
68 | module_param(lro_max_aggr, int, 0644); | ||
69 | MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated " | ||
70 | "(default = 64)"); | ||
71 | |||
63 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | 72 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
64 | int ipoib_debug_level; | 73 | int ipoib_debug_level; |
65 | 74 | ||
@@ -936,6 +945,54 @@ static const struct header_ops ipoib_header_ops = { | |||
936 | .create = ipoib_hard_header, | 945 | .create = ipoib_hard_header, |
937 | }; | 946 | }; |
938 | 947 | ||
948 | static int get_skb_hdr(struct sk_buff *skb, void **iphdr, | ||
949 | void **tcph, u64 *hdr_flags, void *priv) | ||
950 | { | ||
951 | unsigned int ip_len; | ||
952 | struct iphdr *iph; | ||
953 | |||
954 | if (unlikely(skb->protocol != htons(ETH_P_IP))) | ||
955 | return -1; | ||
956 | |||
957 | /* | ||
958 | * In the future we may add an else clause that verifies the | ||
959 | * checksum and allows devices which do not calculate checksum | ||
960 | * to use LRO. | ||
961 | */ | ||
962 | if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY)) | ||
963 | return -1; | ||
964 | |||
965 | /* Check for non-TCP packet */ | ||
966 | skb_reset_network_header(skb); | ||
967 | iph = ip_hdr(skb); | ||
968 | if (iph->protocol != IPPROTO_TCP) | ||
969 | return -1; | ||
970 | |||
971 | ip_len = ip_hdrlen(skb); | ||
972 | skb_set_transport_header(skb, ip_len); | ||
973 | *tcph = tcp_hdr(skb); | ||
974 | |||
975 | /* check if IP header and TCP header are complete */ | ||
976 | if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb)) | ||
977 | return -1; | ||
978 | |||
979 | *hdr_flags = LRO_IPV4 | LRO_TCP; | ||
980 | *iphdr = iph; | ||
981 | |||
982 | return 0; | ||
983 | } | ||
984 | |||
985 | static void ipoib_lro_setup(struct ipoib_dev_priv *priv) | ||
986 | { | ||
987 | priv->lro.lro_mgr.max_aggr = lro_max_aggr; | ||
988 | priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS; | ||
989 | priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc; | ||
990 | priv->lro.lro_mgr.get_skb_header = get_skb_hdr; | ||
991 | priv->lro.lro_mgr.features = LRO_F_NAPI; | ||
992 | priv->lro.lro_mgr.dev = priv->dev; | ||
993 | priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; | ||
994 | } | ||
995 | |||
939 | static void ipoib_setup(struct net_device *dev) | 996 | static void ipoib_setup(struct net_device *dev) |
940 | { | 997 | { |
941 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 998 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
@@ -975,6 +1032,8 @@ static void ipoib_setup(struct net_device *dev) | |||
975 | 1032 | ||
976 | priv->dev = dev; | 1033 | priv->dev = dev; |
977 | 1034 | ||
1035 | ipoib_lro_setup(priv); | ||
1036 | |||
978 | spin_lock_init(&priv->lock); | 1037 | spin_lock_init(&priv->lock); |
979 | spin_lock_init(&priv->tx_lock); | 1038 | spin_lock_init(&priv->tx_lock); |
980 | 1039 | ||
@@ -1152,6 +1211,9 @@ static struct net_device *ipoib_add_port(const char *format, | |||
1152 | priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; | 1211 | priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; |
1153 | } | 1212 | } |
1154 | 1213 | ||
1214 | if (lro) | ||
1215 | priv->dev->features |= NETIF_F_LRO; | ||
1216 | |||
1155 | /* | 1217 | /* |
1156 | * Set the full membership bit, so that we join the right | 1218 | * Set the full membership bit, so that we join the right |
1157 | * broadcast group, etc. | 1219 | * broadcast group, etc. |