aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2009-01-04 19:13:40 -0500
committerDavid S. Miller <davem@davemloft.net>2009-01-04 19:13:40 -0500
commit5d38a079ce3971f932bbdc0dc5b887806fabd5dc (patch)
tree79d948098add1f6c52ecd42c151ce6b6fa1dbc5a
parentb530256d2e0f1a75fab31f9821129fff1bb49faa (diff)
gro: Add page frag support
This patch allows GRO to merge page frags (skb_shinfo(skb)->frags) in one skb, rather than using the less efficient frag_list. It also adds a new interface, napi_gro_frags to allow drivers to inject page frags directly into the stack without allocating an skb. This is intended to be the GRO equivalent for LRO's lro_receive_frags interface. The existing GSO interface can already handle page frags with or without an appended frag_list so nothing needs to be changed there. The merging itself is rather simple. We store any new frag entries after the last existing entry, without checking whether the first new entry can be merged with the last existing entry. Making this check would actually be easy but since no existing driver can produce contiguous frags anyway it would just be mental masturbation. If the total number of entries would exceed the capacity of a single skb, we simply resort to using frag_list as we do now. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netdevice.h16
-rw-r--r--net/core/dev.c91
-rw-r--r--net/core/skbuff.c14
3 files changed, 114 insertions, 7 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 41e1224651cf..c28bbba3c23d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -313,10 +313,11 @@ struct napi_struct {
313#ifdef CONFIG_NETPOLL 313#ifdef CONFIG_NETPOLL
314 spinlock_t poll_lock; 314 spinlock_t poll_lock;
315 int poll_owner; 315 int poll_owner;
316 struct net_device *dev;
317#endif 316#endif
317 struct net_device *dev;
318 struct list_head dev_list; 318 struct list_head dev_list;
319 struct sk_buff *gro_list; 319 struct sk_buff *gro_list;
320 struct sk_buff *skb;
320}; 321};
321 322
322enum 323enum
@@ -990,6 +991,9 @@ struct napi_gro_cb {
990 991
991 /* Number of segments aggregated. */ 992 /* Number of segments aggregated. */
992 int count; 993 int count;
994
995 /* Free the skb? */
996 int free;
993}; 997};
994 998
995#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) 999#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
@@ -1011,6 +1015,14 @@ struct packet_type {
1011 struct list_head list; 1015 struct list_head list;
1012}; 1016};
1013 1017
1018struct napi_gro_fraginfo {
1019 skb_frag_t frags[MAX_SKB_FRAGS];
1020 unsigned int nr_frags;
1021 unsigned int ip_summed;
1022 unsigned int len;
1023 __wsum csum;
1024};
1025
1014#include <linux/interrupt.h> 1026#include <linux/interrupt.h>
1015#include <linux/notifier.h> 1027#include <linux/notifier.h>
1016 1028
@@ -1363,6 +1375,8 @@ extern int netif_receive_skb(struct sk_buff *skb);
1363extern void napi_gro_flush(struct napi_struct *napi); 1375extern void napi_gro_flush(struct napi_struct *napi);
1364extern int napi_gro_receive(struct napi_struct *napi, 1376extern int napi_gro_receive(struct napi_struct *napi,
1365 struct sk_buff *skb); 1377 struct sk_buff *skb);
1378extern int napi_gro_frags(struct napi_struct *napi,
1379 struct napi_gro_fraginfo *info);
1366extern void netif_nit_deliver(struct sk_buff *skb); 1380extern void netif_nit_deliver(struct sk_buff *skb);
1367extern int dev_valid_name(const char *name); 1381extern int dev_valid_name(const char *name);
1368extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); 1382extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1e1a68066457..382df6c09eec 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,9 @@
132/* Instead of increasing this, you should create a hash table. */ 132/* Instead of increasing this, you should create a hash table. */
133#define MAX_GRO_SKBS 8 133#define MAX_GRO_SKBS 8
134 134
135/* This should be increased if a protocol with a bigger head is added. */
136#define GRO_MAX_HEAD (MAX_HEADER + 128)
137
135/* 138/*
136 * The list of packet types we will receive (as opposed to discard) 139 * The list of packet types we will receive (as opposed to discard)
137 * and the routines to invoke. 140 * and the routines to invoke.
@@ -2345,7 +2348,7 @@ static int napi_gro_complete(struct sk_buff *skb)
2345 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 2348 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2346 int err = -ENOENT; 2349 int err = -ENOENT;
2347 2350
2348 if (!skb_shinfo(skb)->frag_list) 2351 if (NAPI_GRO_CB(skb)->count == 1)
2349 goto out; 2352 goto out;
2350 2353
2351 rcu_read_lock(); 2354 rcu_read_lock();
@@ -2384,7 +2387,7 @@ void napi_gro_flush(struct napi_struct *napi)
2384} 2387}
2385EXPORT_SYMBOL(napi_gro_flush); 2388EXPORT_SYMBOL(napi_gro_flush);
2386 2389
2387int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2390static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2388{ 2391{
2389 struct sk_buff **pp = NULL; 2392 struct sk_buff **pp = NULL;
2390 struct packet_type *ptype; 2393 struct packet_type *ptype;
@@ -2393,6 +2396,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2393 int count = 0; 2396 int count = 0;
2394 int same_flow; 2397 int same_flow;
2395 int mac_len; 2398 int mac_len;
2399 int free;
2396 2400
2397 if (!(skb->dev->features & NETIF_F_GRO)) 2401 if (!(skb->dev->features & NETIF_F_GRO))
2398 goto normal; 2402 goto normal;
@@ -2409,6 +2413,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2409 skb->mac_len = mac_len; 2413 skb->mac_len = mac_len;
2410 NAPI_GRO_CB(skb)->same_flow = 0; 2414 NAPI_GRO_CB(skb)->same_flow = 0;
2411 NAPI_GRO_CB(skb)->flush = 0; 2415 NAPI_GRO_CB(skb)->flush = 0;
2416 NAPI_GRO_CB(skb)->free = 0;
2412 2417
2413 for (p = napi->gro_list; p; p = p->next) { 2418 for (p = napi->gro_list; p; p = p->next) {
2414 count++; 2419 count++;
@@ -2428,6 +2433,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2428 goto normal; 2433 goto normal;
2429 2434
2430 same_flow = NAPI_GRO_CB(skb)->same_flow; 2435 same_flow = NAPI_GRO_CB(skb)->same_flow;
2436 free = NAPI_GRO_CB(skb)->free;
2431 2437
2432 if (pp) { 2438 if (pp) {
2433 struct sk_buff *nskb = *pp; 2439 struct sk_buff *nskb = *pp;
@@ -2452,13 +2458,86 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2452 napi->gro_list = skb; 2458 napi->gro_list = skb;
2453 2459
2454ok: 2460ok:
2455 return NET_RX_SUCCESS; 2461 return free;
2456 2462
2457normal: 2463normal:
2458 return netif_receive_skb(skb); 2464 return -1;
2465}
2466
2467int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2468{
2469 switch (__napi_gro_receive(napi, skb)) {
2470 case -1:
2471 return netif_receive_skb(skb);
2472
2473 case 1:
2474 kfree_skb(skb);
2475 break;
2476 }
2477
2478 return NET_RX_SUCCESS;
2459} 2479}
2460EXPORT_SYMBOL(napi_gro_receive); 2480EXPORT_SYMBOL(napi_gro_receive);
2461 2481
2482int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
2483{
2484 struct net_device *dev = napi->dev;
2485 struct sk_buff *skb = napi->skb;
2486 int err = NET_RX_DROP;
2487
2488 napi->skb = NULL;
2489
2490 if (!skb) {
2491 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
2492 if (!skb)
2493 goto out;
2494
2495 skb_reserve(skb, NET_IP_ALIGN);
2496 }
2497
2498 BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
2499 skb_shinfo(skb)->nr_frags = info->nr_frags;
2500 memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
2501
2502 skb->data_len = info->len;
2503 skb->len += info->len;
2504 skb->truesize += info->len;
2505
2506 if (!pskb_may_pull(skb, ETH_HLEN))
2507 goto reuse;
2508
2509 err = NET_RX_SUCCESS;
2510
2511 skb->protocol = eth_type_trans(skb, dev);
2512
2513 skb->ip_summed = info->ip_summed;
2514 skb->csum = info->csum;
2515
2516 switch (__napi_gro_receive(napi, skb)) {
2517 case -1:
2518 return netif_receive_skb(skb);
2519
2520 case 0:
2521 goto out;
2522 }
2523
2524reuse:
2525 skb_shinfo(skb)->nr_frags = 0;
2526
2527 skb->len -= skb->data_len;
2528 skb->truesize -= skb->data_len;
2529 skb->data_len = 0;
2530
2531 __skb_pull(skb, skb_headlen(skb));
2532 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2533
2534 napi->skb = skb;
2535
2536out:
2537 return err;
2538}
2539EXPORT_SYMBOL(napi_gro_frags);
2540
2462static int process_backlog(struct napi_struct *napi, int quota) 2541static int process_backlog(struct napi_struct *napi, int quota)
2463{ 2542{
2464 int work = 0; 2543 int work = 0;
@@ -2537,11 +2616,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2537{ 2616{
2538 INIT_LIST_HEAD(&napi->poll_list); 2617 INIT_LIST_HEAD(&napi->poll_list);
2539 napi->gro_list = NULL; 2618 napi->gro_list = NULL;
2619 napi->skb = NULL;
2540 napi->poll = poll; 2620 napi->poll = poll;
2541 napi->weight = weight; 2621 napi->weight = weight;
2542 list_add(&napi->dev_list, &dev->napi_list); 2622 list_add(&napi->dev_list, &dev->napi_list);
2543#ifdef CONFIG_NETPOLL
2544 napi->dev = dev; 2623 napi->dev = dev;
2624#ifdef CONFIG_NETPOLL
2545 spin_lock_init(&napi->poll_lock); 2625 spin_lock_init(&napi->poll_lock);
2546 napi->poll_owner = -1; 2626 napi->poll_owner = -1;
2547#endif 2627#endif
@@ -2554,6 +2634,7 @@ void netif_napi_del(struct napi_struct *napi)
2554 struct sk_buff *skb, *next; 2634 struct sk_buff *skb, *next;
2555 2635
2556 list_del_init(&napi->dev_list); 2636 list_del_init(&napi->dev_list);
2637 kfree(napi->skb);
2557 2638
2558 for (skb = napi->gro_list; skb; skb = next) { 2639 for (skb = napi->gro_list; skb; skb = next) {
2559 next = skb->next; 2640 next = skb->next;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3aafb10325b8..5110b359c758 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2594,6 +2594,17 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2594 2594
2595 if (skb_shinfo(p)->frag_list) 2595 if (skb_shinfo(p)->frag_list)
2596 goto merge; 2596 goto merge;
2597 else if (!skb_headlen(p) && !skb_headlen(skb) &&
2598 skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <
2599 MAX_SKB_FRAGS) {
2600 memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
2601 skb_shinfo(skb)->frags,
2602 skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
2603
2604 skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags;
2605 NAPI_GRO_CB(skb)->free = 1;
2606 goto done;
2607 }
2597 2608
2598 headroom = skb_headroom(p); 2609 headroom = skb_headroom(p);
2599 nskb = netdev_alloc_skb(p->dev, headroom); 2610 nskb = netdev_alloc_skb(p->dev, headroom);
@@ -2628,11 +2639,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2628 p = nskb; 2639 p = nskb;
2629 2640
2630merge: 2641merge:
2631 NAPI_GRO_CB(p)->count++;
2632 p->prev->next = skb; 2642 p->prev->next = skb;
2633 p->prev = skb; 2643 p->prev = skb;
2634 skb_header_release(skb); 2644 skb_header_release(skb);
2635 2645
2646done:
2647 NAPI_GRO_CB(p)->count++;
2636 p->data_len += skb->len; 2648 p->data_len += skb->len;
2637 p->truesize += skb->len; 2649 p->truesize += skb->len;
2638 p->len += skb->len; 2650 p->len += skb->len;