aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2008-12-16 02:41:09 -0500
committerDavid S. Miller <davem@davemloft.net>2008-12-16 02:41:09 -0500
commit73cc19f1556b95976934de236fd9043f7208844f (patch)
tree4ebe390a9b328bba32d149b9f83e998836806f5d
parentd565b0a1a9b6ee7dff46e1f68b26b526ac11ae50 (diff)
ipv4: Add GRO infrastructure
This patch adds GRO support for IPv4. The criteria for merging is more stringent than LRO, in particular, we require all fields in the IP header to be identical except for the length, ID and checksum. In addition, the ID must form an arithmetic sequence with a difference of one. The ID requirement might seem overly strict, however, most hardware TSO solutions already obey this rule. Linux itself also obeys this whether GSO is in use or not. In future we could relax this rule by storing the IDs (or rather making sure that we don't drop them when pulling the aggregate skb's tail). Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/protocol.h3
-rw-r--r--net/ipv4/af_inet.c97
2 files changed, 100 insertions, 0 deletions
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 8d024d7cb741..cb2965aa1b62 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -39,6 +39,9 @@ struct net_protocol {
39 int (*gso_send_check)(struct sk_buff *skb); 39 int (*gso_send_check)(struct sk_buff *skb);
40 struct sk_buff *(*gso_segment)(struct sk_buff *skb, 40 struct sk_buff *(*gso_segment)(struct sk_buff *skb,
41 int features); 41 int features);
42 struct sk_buff **(*gro_receive)(struct sk_buff **head,
43 struct sk_buff *skb);
44 int (*gro_complete)(struct sk_buff *skb);
42 unsigned int no_policy:1, 45 unsigned int no_policy:1,
43 netns_ok:1; 46 netns_ok:1;
44}; 47};
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fe03048c130d..a85595307fa7 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -94,6 +94,7 @@
94#include <linux/igmp.h> 94#include <linux/igmp.h>
95#include <linux/inetdevice.h> 95#include <linux/inetdevice.h>
96#include <linux/netdevice.h> 96#include <linux/netdevice.h>
97#include <net/checksum.h>
97#include <net/ip.h> 98#include <net/ip.h>
98#include <net/protocol.h> 99#include <net/protocol.h>
99#include <net/arp.h> 100#include <net/arp.h>
@@ -1241,6 +1242,100 @@ out:
1241 return segs; 1242 return segs;
1242} 1243}
1243 1244
1245static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1246 struct sk_buff *skb)
1247{
1248 struct net_protocol *ops;
1249 struct sk_buff **pp = NULL;
1250 struct sk_buff *p;
1251 struct iphdr *iph;
1252 int flush = 1;
1253 int proto;
1254 int id;
1255
1256 if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
1257 goto out;
1258
1259 iph = ip_hdr(skb);
1260 proto = iph->protocol & (MAX_INET_PROTOS - 1);
1261
1262 rcu_read_lock();
1263 ops = rcu_dereference(inet_protos[proto]);
1264 if (!ops || !ops->gro_receive)
1265 goto out_unlock;
1266
1267 if (iph->version != 4 || iph->ihl != 5)
1268 goto out_unlock;
1269
1270 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
1271 goto out_unlock;
1272
1273 flush = ntohs(iph->tot_len) != skb->len ||
1274 iph->frag_off != htons(IP_DF);
1275 id = ntohs(iph->id);
1276
1277 for (p = *head; p; p = p->next) {
1278 struct iphdr *iph2;
1279
1280 if (!NAPI_GRO_CB(p)->same_flow)
1281 continue;
1282
1283 iph2 = ip_hdr(p);
1284
1285 if (iph->protocol != iph2->protocol ||
1286 iph->tos != iph2->tos ||
1287 memcmp(&iph->saddr, &iph2->saddr, 8)) {
1288 NAPI_GRO_CB(p)->same_flow = 0;
1289 continue;
1290 }
1291
1292 /* All fields must match except length and checksum. */
1293 NAPI_GRO_CB(p)->flush |=
1294 memcmp(&iph->frag_off, &iph2->frag_off, 4) ||
1295 (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id;
1296
1297 NAPI_GRO_CB(p)->flush |= flush;
1298 }
1299
1300 NAPI_GRO_CB(skb)->flush |= flush;
1301 __skb_pull(skb, sizeof(*iph));
1302 skb_reset_transport_header(skb);
1303
1304 pp = ops->gro_receive(head, skb);
1305
1306out_unlock:
1307 rcu_read_unlock();
1308
1309out:
1310 NAPI_GRO_CB(skb)->flush |= flush;
1311
1312 return pp;
1313}
1314
1315static int inet_gro_complete(struct sk_buff *skb)
1316{
1317 struct net_protocol *ops;
1318 struct iphdr *iph = ip_hdr(skb);
1319 int proto = iph->protocol & (MAX_INET_PROTOS - 1);
1320 int err = -ENOSYS;
1321 __be16 newlen = htons(skb->len - skb_network_offset(skb));
1322
1323 csum_replace2(&iph->check, iph->tot_len, newlen);
1324 iph->tot_len = newlen;
1325
1326 rcu_read_lock();
1327 ops = rcu_dereference(inet_protos[proto]);
1328 if (WARN_ON(!ops || !ops->gro_complete))
1329 goto out_unlock;
1330
1331 err = ops->gro_complete(skb);
1332
1333out_unlock:
1334 rcu_read_unlock();
1335
1336 return err;
1337}
1338
1244int inet_ctl_sock_create(struct sock **sk, unsigned short family, 1339int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1245 unsigned short type, unsigned char protocol, 1340 unsigned short type, unsigned char protocol,
1246 struct net *net) 1341 struct net *net)
@@ -1407,6 +1502,8 @@ static struct packet_type ip_packet_type = {
1407 .func = ip_rcv, 1502 .func = ip_rcv,
1408 .gso_send_check = inet_gso_send_check, 1503 .gso_send_check = inet_gso_send_check,
1409 .gso_segment = inet_gso_segment, 1504 .gso_segment = inet_gso_segment,
1505 .gro_receive = inet_gro_receive,
1506 .gro_complete = inet_gro_complete,
1410}; 1507};
1411 1508
1412static int __init inet_init(void) 1509static int __init inet_init(void)