aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorJerry Chu <hkchu@google.com>2013-12-11 23:53:45 -0500
committerDavid S. Miller <davem@davemloft.net>2013-12-12 13:47:53 -0500
commit299603e8370a93dd5d8e8d800f0dff1ce2c53d36 (patch)
tree2a10106aabe88c278a0cd02b93af1add04f5ffcc /net/ipv4
parenta46dc748caea185d4d0978280a1af0112bf6a8f8 (diff)
net-gro: Prepare GRO stack for the upcoming tunneling support
This patch modifies the GRO stack to avoid the use of "network_header" and associated macros like ip_hdr() and ipv6_hdr() in order to allow an arbitary number of IP hdrs (v4 or v6) to be used in the encapsulation chain. This lays the foundation for various IP tunneling support (IP-in-IP, GRE, VXLAN, SIT,...) to be added later. With this patch, the GRO stack traversing now is mostly based on skb_gro_offset rather than special hdr offsets saved in skb (e.g., skb->network_header). As a result all but the top layer (i.e., the the transport layer) must have hdrs of the same length in order for a pkt to be considered for aggregation. Therefore when adding a new encap layer (e.g., for tunneling), one must check and skip flows (e.g., by setting NAPI_GRO_CB(p)->same_flow to 0) that have a different hdr length. Note that unlike the network header, the transport header can and will continue to be set by the GRO code since there will be at most one "transport layer" in the encap chain. Signed-off-by: H.K. Jerry Chu <hkchu@google.com> Suggested-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c25
-rw-r--r--net/ipv4/tcp_offload.c9
2 files changed, 24 insertions, 10 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 70011e029ac1..ef4f9df6d698 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1377,8 +1377,12 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1377 if (!NAPI_GRO_CB(p)->same_flow) 1377 if (!NAPI_GRO_CB(p)->same_flow)
1378 continue; 1378 continue;
1379 1379
1380 iph2 = ip_hdr(p); 1380 iph2 = (struct iphdr *)(p->data + off);
1381 1381 /* The above works because, with the exception of the top
1382 * (inner most) layer, we only aggregate pkts with the same
1383 * hdr length so all the hdrs we'll need to verify will start
1384 * at the same offset.
1385 */
1382 if ((iph->protocol ^ iph2->protocol) | 1386 if ((iph->protocol ^ iph2->protocol) |
1383 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | 1387 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
1384 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { 1388 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
@@ -1397,6 +1401,11 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1397 } 1401 }
1398 1402
1399 NAPI_GRO_CB(skb)->flush |= flush; 1403 NAPI_GRO_CB(skb)->flush |= flush;
1404 skb_set_network_header(skb, off);
1405 /* The above will be needed by the transport layer if there is one
1406 * immediately following this IP hdr.
1407 */
1408
1400 skb_gro_pull(skb, sizeof(*iph)); 1409 skb_gro_pull(skb, sizeof(*iph));
1401 skb_set_transport_header(skb, skb_gro_offset(skb)); 1410 skb_set_transport_header(skb, skb_gro_offset(skb));
1402 1411
@@ -1411,10 +1420,10 @@ out:
1411 return pp; 1420 return pp;
1412} 1421}
1413 1422
1414static int inet_gro_complete(struct sk_buff *skb) 1423static int inet_gro_complete(struct sk_buff *skb, int nhoff)
1415{ 1424{
1416 __be16 newlen = htons(skb->len - skb_network_offset(skb)); 1425 __be16 newlen = htons(skb->len - nhoff);
1417 struct iphdr *iph = ip_hdr(skb); 1426 struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
1418 const struct net_offload *ops; 1427 const struct net_offload *ops;
1419 int proto = iph->protocol; 1428 int proto = iph->protocol;
1420 int err = -ENOSYS; 1429 int err = -ENOSYS;
@@ -1427,7 +1436,11 @@ static int inet_gro_complete(struct sk_buff *skb)
1427 if (WARN_ON(!ops || !ops->callbacks.gro_complete)) 1436 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
1428 goto out_unlock; 1437 goto out_unlock;
1429 1438
1430 err = ops->callbacks.gro_complete(skb); 1439 /* Only need to add sizeof(*iph) to get to the next hdr below
1440 * because any hdr with option will have been flushed in
1441 * inet_gro_receive().
1442 */
1443 err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));
1431 1444
1432out_unlock: 1445out_unlock:
1433 rcu_read_unlock(); 1446 rcu_read_unlock();
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 05606353c7e7..2658a27f540d 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -240,7 +240,7 @@ int tcp_gro_complete(struct sk_buff *skb)
240{ 240{
241 struct tcphdr *th = tcp_hdr(skb); 241 struct tcphdr *th = tcp_hdr(skb);
242 242
243 skb->csum_start = skb_transport_header(skb) - skb->head; 243 skb->csum_start = (unsigned char *)th - skb->head;
244 skb->csum_offset = offsetof(struct tcphdr, check); 244 skb->csum_offset = offsetof(struct tcphdr, check);
245 skb->ip_summed = CHECKSUM_PARTIAL; 245 skb->ip_summed = CHECKSUM_PARTIAL;
246 246
@@ -272,6 +272,7 @@ static int tcp_v4_gso_send_check(struct sk_buff *skb)
272 272
273static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) 273static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
274{ 274{
275 /* Use the IP hdr immediately proceeding for this transport */
275 const struct iphdr *iph = skb_gro_network_header(skb); 276 const struct iphdr *iph = skb_gro_network_header(skb);
276 __wsum wsum; 277 __wsum wsum;
277 278
@@ -303,13 +304,13 @@ skip_csum:
303 return tcp_gro_receive(head, skb); 304 return tcp_gro_receive(head, skb);
304} 305}
305 306
306static int tcp4_gro_complete(struct sk_buff *skb) 307static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
307{ 308{
308 const struct iphdr *iph = ip_hdr(skb); 309 const struct iphdr *iph = ip_hdr(skb);
309 struct tcphdr *th = tcp_hdr(skb); 310 struct tcphdr *th = tcp_hdr(skb);
310 311
311 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), 312 th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
312 iph->saddr, iph->daddr, 0); 313 iph->daddr, 0);
313 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 314 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
314 315
315 return tcp_gro_complete(skb); 316 return tcp_gro_complete(skb);