aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2009-01-29 09:19:50 -0500
committerDavid S. Miller <davem@davemloft.net>2009-01-29 19:33:03 -0500
commit86911732d3996a9da07914b280621450111bb6da (patch)
treee787240d5ba869ddf4d0adfc3f9c69e0372e96ef /net/core
parent5d0d9be8ef456afc6c3fb5f8aad06ef19b704b05 (diff)
gro: Avoid copying headers of unmerged packets
Unfortunately simplicity isn't always the best. The fraginfo interface turned out to be suboptimal. The problem was quite obvious. For every packet, we have to copy the headers from the frags structure into skb->head, even though for 99% of the packets this part is immediately thrown away after the merge. LRO didn't have this problem because it directly read the headers from the frags structure. This patch attempts to address this by creating an interface that allows GRO to access the headers in the first frag without having to copy it. Because all drivers that use frags place the headers in the first frag this optimisation should be enough. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c70
-rw-r--r--net/core/skbuff.c23
2 files changed, 74 insertions, 19 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index cd23ae15a1d5..df406dcf7482 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -215,6 +215,13 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
215 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; 215 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
216} 216}
217 217
218static inline void *skb_gro_mac_header(struct sk_buff *skb)
219{
220 return skb_headlen(skb) ? skb_mac_header(skb) :
221 page_address(skb_shinfo(skb)->frags[0].page) +
222 skb_shinfo(skb)->frags[0].page_offset;
223}
224
218/* Device list insertion */ 225/* Device list insertion */
219static int list_netdevice(struct net_device *dev) 226static int list_netdevice(struct net_device *dev)
220{ 227{
@@ -2350,7 +2357,6 @@ static int napi_gro_complete(struct sk_buff *skb)
2350 2357
2351out: 2358out:
2352 skb_shinfo(skb)->gso_size = 0; 2359 skb_shinfo(skb)->gso_size = 0;
2353 __skb_push(skb, -skb_network_offset(skb));
2354 return netif_receive_skb(skb); 2360 return netif_receive_skb(skb);
2355} 2361}
2356 2362
@@ -2368,6 +2374,25 @@ void napi_gro_flush(struct napi_struct *napi)
2368} 2374}
2369EXPORT_SYMBOL(napi_gro_flush); 2375EXPORT_SYMBOL(napi_gro_flush);
2370 2376
2377void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
2378{
2379 unsigned int offset = skb_gro_offset(skb);
2380
2381 hlen += offset;
2382 if (hlen <= skb_headlen(skb))
2383 return skb->data + offset;
2384
2385 if (unlikely(!skb_shinfo(skb)->nr_frags ||
2386 skb_shinfo(skb)->frags[0].size <=
2387 hlen - skb_headlen(skb) ||
2388 PageHighMem(skb_shinfo(skb)->frags[0].page)))
2389 return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
2390
2391 return page_address(skb_shinfo(skb)->frags[0].page) +
2392 skb_shinfo(skb)->frags[0].page_offset + offset;
2393}
2394EXPORT_SYMBOL(skb_gro_header);
2395
2371int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2396int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2372{ 2397{
2373 struct sk_buff **pp = NULL; 2398 struct sk_buff **pp = NULL;
@@ -2388,11 +2413,13 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2388 rcu_read_lock(); 2413 rcu_read_lock();
2389 list_for_each_entry_rcu(ptype, head, list) { 2414 list_for_each_entry_rcu(ptype, head, list) {
2390 struct sk_buff *p; 2415 struct sk_buff *p;
2416 void *mac;
2391 2417
2392 if (ptype->type != type || ptype->dev || !ptype->gro_receive) 2418 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2393 continue; 2419 continue;
2394 2420
2395 skb_reset_network_header(skb); 2421 skb_set_network_header(skb, skb_gro_offset(skb));
2422 mac = skb_gro_mac_header(skb);
2396 mac_len = skb->network_header - skb->mac_header; 2423 mac_len = skb->network_header - skb->mac_header;
2397 skb->mac_len = mac_len; 2424 skb->mac_len = mac_len;
2398 NAPI_GRO_CB(skb)->same_flow = 0; 2425 NAPI_GRO_CB(skb)->same_flow = 0;
@@ -2406,8 +2433,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2406 continue; 2433 continue;
2407 2434
2408 if (p->mac_len != mac_len || 2435 if (p->mac_len != mac_len ||
2409 memcmp(skb_mac_header(p), skb_mac_header(skb), 2436 memcmp(skb_mac_header(p), mac, mac_len))
2410 mac_len))
2411 NAPI_GRO_CB(p)->same_flow = 0; 2437 NAPI_GRO_CB(p)->same_flow = 0;
2412 } 2438 }
2413 2439
@@ -2434,13 +2460,11 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2434 if (same_flow) 2460 if (same_flow)
2435 goto ok; 2461 goto ok;
2436 2462
2437 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { 2463 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS)
2438 __skb_push(skb, -skb_network_offset(skb));
2439 goto normal; 2464 goto normal;
2440 }
2441 2465
2442 NAPI_GRO_CB(skb)->count = 1; 2466 NAPI_GRO_CB(skb)->count = 1;
2443 skb_shinfo(skb)->gso_size = skb->len; 2467 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
2444 skb->next = napi->gro_list; 2468 skb->next = napi->gro_list;
2445 napi->gro_list = skb; 2469 napi->gro_list = skb;
2446 ret = GRO_HELD; 2470 ret = GRO_HELD;
@@ -2488,6 +2512,8 @@ EXPORT_SYMBOL(napi_skb_finish);
2488 2512
2489int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2513int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2490{ 2514{
2515 skb_gro_reset_offset(skb);
2516
2491 return napi_skb_finish(__napi_gro_receive(napi, skb), skb); 2517 return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
2492} 2518}
2493EXPORT_SYMBOL(napi_gro_receive); 2519EXPORT_SYMBOL(napi_gro_receive);
@@ -2506,6 +2532,7 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
2506{ 2532{
2507 struct net_device *dev = napi->dev; 2533 struct net_device *dev = napi->dev;
2508 struct sk_buff *skb = napi->skb; 2534 struct sk_buff *skb = napi->skb;
2535 struct ethhdr *eth;
2509 2536
2510 napi->skb = NULL; 2537 napi->skb = NULL;
2511 2538
@@ -2525,13 +2552,23 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
2525 skb->len += info->len; 2552 skb->len += info->len;
2526 skb->truesize += info->len; 2553 skb->truesize += info->len;
2527 2554
2528 if (!pskb_may_pull(skb, ETH_HLEN)) { 2555 skb_reset_mac_header(skb);
2556 skb_gro_reset_offset(skb);
2557
2558 eth = skb_gro_header(skb, sizeof(*eth));
2559 if (!eth) {
2529 napi_reuse_skb(napi, skb); 2560 napi_reuse_skb(napi, skb);
2530 skb = NULL; 2561 skb = NULL;
2531 goto out; 2562 goto out;
2532 } 2563 }
2533 2564
2534 skb->protocol = eth_type_trans(skb, dev); 2565 skb_gro_pull(skb, sizeof(*eth));
2566
2567 /*
2568 * This works because the only protocols we care about don't require
2569 * special handling. We'll fix it up properly at the end.
2570 */
2571 skb->protocol = eth->h_proto;
2535 2572
2536 skb->ip_summed = info->ip_summed; 2573 skb->ip_summed = info->ip_summed;
2537 skb->csum = info->csum; 2574 skb->csum = info->csum;
@@ -2544,10 +2581,21 @@ EXPORT_SYMBOL(napi_fraginfo_skb);
2544int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) 2581int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2545{ 2582{
2546 int err = NET_RX_SUCCESS; 2583 int err = NET_RX_SUCCESS;
2584 int may;
2547 2585
2548 switch (ret) { 2586 switch (ret) {
2549 case GRO_NORMAL: 2587 case GRO_NORMAL:
2550 return netif_receive_skb(skb); 2588 case GRO_HELD:
2589 may = pskb_may_pull(skb, skb_gro_offset(skb));
2590 BUG_ON(!may);
2591
2592 skb->protocol = eth_type_trans(skb, napi->dev);
2593
2594 if (ret == GRO_NORMAL)
2595 return netif_receive_skb(skb);
2596
2597 skb_gro_pull(skb, -ETH_HLEN);
2598 break;
2551 2599
2552 case GRO_DROP: 2600 case GRO_DROP:
2553 err = NET_RX_DROP; 2601 err = NET_RX_DROP;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2e5f2ca3bdcd..f9f4065a7e9b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2584,17 +2584,21 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2584 struct sk_buff *p = *head; 2584 struct sk_buff *p = *head;
2585 struct sk_buff *nskb; 2585 struct sk_buff *nskb;
2586 unsigned int headroom; 2586 unsigned int headroom;
2587 unsigned int hlen = p->data - skb_mac_header(p); 2587 unsigned int len = skb_gro_len(skb);
2588 unsigned int len = skb->len;
2589 2588
2590 if (hlen + p->len + len >= 65536) 2589 if (p->len + len >= 65536)
2591 return -E2BIG; 2590 return -E2BIG;
2592 2591
2593 if (skb_shinfo(p)->frag_list) 2592 if (skb_shinfo(p)->frag_list)
2594 goto merge; 2593 goto merge;
2595 else if (!skb_headlen(p) && !skb_headlen(skb) && 2594 else if (skb_headlen(skb) <= skb_gro_offset(skb) &&
2596 skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags < 2595 skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <=
2597 MAX_SKB_FRAGS) { 2596 MAX_SKB_FRAGS) {
2597 skb_shinfo(skb)->frags[0].page_offset +=
2598 skb_gro_offset(skb) - skb_headlen(skb);
2599 skb_shinfo(skb)->frags[0].size -=
2600 skb_gro_offset(skb) - skb_headlen(skb);
2601
2598 memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags, 2602 memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
2599 skb_shinfo(skb)->frags, 2603 skb_shinfo(skb)->frags,
2600 skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); 2604 skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
@@ -2611,7 +2615,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2611 } 2615 }
2612 2616
2613 headroom = skb_headroom(p); 2617 headroom = skb_headroom(p);
2614 nskb = netdev_alloc_skb(p->dev, headroom); 2618 nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p));
2615 if (unlikely(!nskb)) 2619 if (unlikely(!nskb))
2616 return -ENOMEM; 2620 return -ENOMEM;
2617 2621
@@ -2619,12 +2623,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2619 nskb->mac_len = p->mac_len; 2623 nskb->mac_len = p->mac_len;
2620 2624
2621 skb_reserve(nskb, headroom); 2625 skb_reserve(nskb, headroom);
2626 __skb_put(nskb, skb_gro_offset(p));
2622 2627
2623 skb_set_mac_header(nskb, -hlen); 2628 skb_set_mac_header(nskb, skb_mac_header(p) - p->data);
2624 skb_set_network_header(nskb, skb_network_offset(p)); 2629 skb_set_network_header(nskb, skb_network_offset(p));
2625 skb_set_transport_header(nskb, skb_transport_offset(p)); 2630 skb_set_transport_header(nskb, skb_transport_offset(p));
2626 2631
2627 memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen); 2632 __skb_pull(p, skb_gro_offset(p));
2633 memcpy(skb_mac_header(nskb), skb_mac_header(p),
2634 p->data - skb_mac_header(p));
2628 2635
2629 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); 2636 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
2630 skb_shinfo(nskb)->frag_list = p; 2637 skb_shinfo(nskb)->frag_list = p;