aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2008-12-16 02:38:52 -0500
committerDavid S. Miller <davem@davemloft.net>2008-12-16 02:38:52 -0500
commitd565b0a1a9b6ee7dff46e1f68b26b526ac11ae50 (patch)
tree3526815ab2b60c37f474c25ad1d8fb207644efcc /net
parent1a881f27c50b4fbd6858a8696a189263621136b0 (diff)
net: Add Generic Receive Offload infrastructure
This patch adds the top-level GRO (Generic Receive Offload) infrastructure. This is pretty similar to LRO except that this is protocol-independent. Instead of holding packets in an lro_mgr structure, they're now held in napi_struct. For drivers that intend to use this, they can set the NETIF_F_GRO bit and call napi_gro_receive instead of netif_receive_skb or just call netif_rx. The latter will call napi_receive_skb automatically. When napi_gro_receive is used, the driver must either call napi_complete/napi_rx_complete, or call napi_gro_flush in softirq context if the driver uses the primitives __napi_complete/__napi_rx_complete. Protocols will set the gro_receive and gro_complete function pointers in order to participate in this scheme. In addition to the packet, gro_receive will get a list of currently held packets. Each packet in the list has a same_flow field which is non-zero if it is a potential match for the new packet. For each packet that may match, they also have a flush field which is non-zero if the held packet must not be merged with the new packet. Once gro_receive has determined that the new skb matches a held packet, the held packet may be processed immediately if the new skb cannot be merged with it. In this case gro_receive should return the pointer to the existing skb in gro_list. Otherwise the new skb should be merged into the existing packet and NULL should be returned, unless the new skb makes it impossible for any further merges to be made (e.g., FIN packet) where the merged skb should be returned. Whenever the skb is merged into an existing entry, the gro_receive function should set NAPI_GRO_CB(skb)->same_flow. Note that if an skb merely matches an existing entry but can't be merged with it, then this shouldn't be set. If gro_receive finds it pointless to hold the new skb for future merging, it should set NAPI_GRO_CB(skb)->flush. Held packets will be flushed by napi_gro_flush which is called by napi_complete and napi_rx_complete. Currently held packets are stored in a singly liked list just like LRO. The list is limited to a maximum of 8 entries. In future, this may be expanded to use a hash table to allow more flows to be held for merging. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c193
1 files changed, 191 insertions, 2 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index e415f0b0d0d0..d8d7d1fccde4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,9 @@
129 129
130#include "net-sysfs.h" 130#include "net-sysfs.h"
131 131
132/* Instead of increasing this, you should create a hash table. */
133#define MAX_GRO_SKBS 8
134
132/* 135/*
133 * The list of packet types we will receive (as opposed to discard) 136 * The list of packet types we will receive (as opposed to discard)
134 * and the routines to invoke. 137 * and the routines to invoke.
@@ -2335,6 +2338,122 @@ static void flush_backlog(void *arg)
2335 } 2338 }
2336} 2339}
2337 2340
2341static int napi_gro_complete(struct sk_buff *skb)
2342{
2343 struct packet_type *ptype;
2344 __be16 type = skb->protocol;
2345 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2346 int err = -ENOENT;
2347
2348 if (!skb_shinfo(skb)->frag_list)
2349 goto out;
2350
2351 rcu_read_lock();
2352 list_for_each_entry_rcu(ptype, head, list) {
2353 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2354 continue;
2355
2356 err = ptype->gro_complete(skb);
2357 break;
2358 }
2359 rcu_read_unlock();
2360
2361 if (err) {
2362 WARN_ON(&ptype->list == head);
2363 kfree_skb(skb);
2364 return NET_RX_SUCCESS;
2365 }
2366
2367out:
2368 __skb_push(skb, -skb_network_offset(skb));
2369 return netif_receive_skb(skb);
2370}
2371
2372void napi_gro_flush(struct napi_struct *napi)
2373{
2374 struct sk_buff *skb, *next;
2375
2376 for (skb = napi->gro_list; skb; skb = next) {
2377 next = skb->next;
2378 skb->next = NULL;
2379 napi_gro_complete(skb);
2380 }
2381
2382 napi->gro_list = NULL;
2383}
2384EXPORT_SYMBOL(napi_gro_flush);
2385
2386int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2387{
2388 struct sk_buff **pp = NULL;
2389 struct packet_type *ptype;
2390 __be16 type = skb->protocol;
2391 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2392 int count = 0;
2393 int mac_len;
2394
2395 if (!(skb->dev->features & NETIF_F_GRO))
2396 goto normal;
2397
2398 rcu_read_lock();
2399 list_for_each_entry_rcu(ptype, head, list) {
2400 struct sk_buff *p;
2401
2402 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2403 continue;
2404
2405 skb_reset_network_header(skb);
2406 mac_len = skb->network_header - skb->mac_header;
2407 skb->mac_len = mac_len;
2408 NAPI_GRO_CB(skb)->same_flow = 0;
2409 NAPI_GRO_CB(skb)->flush = 0;
2410
2411 for (p = napi->gro_list; p; p = p->next) {
2412 count++;
2413 NAPI_GRO_CB(p)->same_flow =
2414 p->mac_len == mac_len &&
2415 !memcmp(skb_mac_header(p), skb_mac_header(skb),
2416 mac_len);
2417 NAPI_GRO_CB(p)->flush = 0;
2418 }
2419
2420 pp = ptype->gro_receive(&napi->gro_list, skb);
2421 break;
2422 }
2423 rcu_read_unlock();
2424
2425 if (&ptype->list == head)
2426 goto normal;
2427
2428 if (pp) {
2429 struct sk_buff *nskb = *pp;
2430
2431 *pp = nskb->next;
2432 nskb->next = NULL;
2433 napi_gro_complete(nskb);
2434 count--;
2435 }
2436
2437 if (NAPI_GRO_CB(skb)->same_flow)
2438 goto ok;
2439
2440 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
2441 __skb_push(skb, -skb_network_offset(skb));
2442 goto normal;
2443 }
2444
2445 NAPI_GRO_CB(skb)->count = 1;
2446 skb->next = napi->gro_list;
2447 napi->gro_list = skb;
2448
2449ok:
2450 return NET_RX_SUCCESS;
2451
2452normal:
2453 return netif_receive_skb(skb);
2454}
2455EXPORT_SYMBOL(napi_gro_receive);
2456
2338static int process_backlog(struct napi_struct *napi, int quota) 2457static int process_backlog(struct napi_struct *napi, int quota)
2339{ 2458{
2340 int work = 0; 2459 int work = 0;
@@ -2354,9 +2473,11 @@ static int process_backlog(struct napi_struct *napi, int quota)
2354 } 2473 }
2355 local_irq_enable(); 2474 local_irq_enable();
2356 2475
2357 netif_receive_skb(skb); 2476 napi_gro_receive(napi, skb);
2358 } while (++work < quota && jiffies == start_time); 2477 } while (++work < quota && jiffies == start_time);
2359 2478
2479 napi_gro_flush(napi);
2480
2360 return work; 2481 return work;
2361} 2482}
2362 2483
@@ -2377,6 +2498,68 @@ void __napi_schedule(struct napi_struct *n)
2377} 2498}
2378EXPORT_SYMBOL(__napi_schedule); 2499EXPORT_SYMBOL(__napi_schedule);
2379 2500
2501void __napi_complete(struct napi_struct *n)
2502{
2503 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2504 BUG_ON(n->gro_list);
2505
2506 list_del(&n->poll_list);
2507 smp_mb__before_clear_bit();
2508 clear_bit(NAPI_STATE_SCHED, &n->state);
2509}
2510EXPORT_SYMBOL(__napi_complete);
2511
2512void napi_complete(struct napi_struct *n)
2513{
2514 unsigned long flags;
2515
2516 /*
2517 * don't let napi dequeue from the cpu poll list
2518 * just in case its running on a different cpu
2519 */
2520 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2521 return;
2522
2523 napi_gro_flush(n);
2524 local_irq_save(flags);
2525 __napi_complete(n);
2526 local_irq_restore(flags);
2527}
2528EXPORT_SYMBOL(napi_complete);
2529
2530void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2531 int (*poll)(struct napi_struct *, int), int weight)
2532{
2533 INIT_LIST_HEAD(&napi->poll_list);
2534 napi->gro_list = NULL;
2535 napi->poll = poll;
2536 napi->weight = weight;
2537 list_add(&napi->dev_list, &dev->napi_list);
2538#ifdef CONFIG_NETPOLL
2539 napi->dev = dev;
2540 spin_lock_init(&napi->poll_lock);
2541 napi->poll_owner = -1;
2542#endif
2543 set_bit(NAPI_STATE_SCHED, &napi->state);
2544}
2545EXPORT_SYMBOL(netif_napi_add);
2546
2547void netif_napi_del(struct napi_struct *napi)
2548{
2549 struct sk_buff *skb, *next;
2550
2551 list_del(&napi->dev_list);
2552
2553 for (skb = napi->gro_list; skb; skb = next) {
2554 next = skb->next;
2555 skb->next = NULL;
2556 kfree_skb(skb);
2557 }
2558
2559 napi->gro_list = NULL;
2560}
2561EXPORT_SYMBOL(netif_napi_del);
2562
2380 2563
2381static void net_rx_action(struct softirq_action *h) 2564static void net_rx_action(struct softirq_action *h)
2382{ 2565{
@@ -4380,7 +4563,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4380 4563
4381 netdev_init_queues(dev); 4564 netdev_init_queues(dev);
4382 4565
4383 netpoll_netdev_init(dev); 4566 INIT_LIST_HEAD(&dev->napi_list);
4384 setup(dev); 4567 setup(dev);
4385 strcpy(dev->name, name); 4568 strcpy(dev->name, name);
4386 return dev; 4569 return dev;
@@ -4397,10 +4580,15 @@ EXPORT_SYMBOL(alloc_netdev_mq);
4397 */ 4580 */
4398void free_netdev(struct net_device *dev) 4581void free_netdev(struct net_device *dev)
4399{ 4582{
4583 struct napi_struct *p, *n;
4584
4400 release_net(dev_net(dev)); 4585 release_net(dev_net(dev));
4401 4586
4402 kfree(dev->_tx); 4587 kfree(dev->_tx);
4403 4588
4589 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
4590 netif_napi_del(p);
4591
4404 /* Compatibility with error handling in drivers */ 4592 /* Compatibility with error handling in drivers */
4405 if (dev->reg_state == NETREG_UNINITIALIZED) { 4593 if (dev->reg_state == NETREG_UNINITIALIZED) {
4406 kfree((char *)dev - dev->padded); 4594 kfree((char *)dev - dev->padded);
@@ -4949,6 +5137,7 @@ static int __init net_dev_init(void)
4949 5137
4950 queue->backlog.poll = process_backlog; 5138 queue->backlog.poll = process_backlog;
4951 queue->backlog.weight = weight_p; 5139 queue->backlog.weight = weight_p;
5140 queue->backlog.gro_list = NULL;
4952 } 5141 }
4953 5142
4954 dev_boot_phase = 0; 5143 dev_boot_phase = 0;