aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/netdevice.h80
-rw-r--r--include/linux/netpoll.h5
-rw-r--r--net/core/dev.c193
3 files changed, 219 insertions, 59 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bdf5465deb91..58856b6737fb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -314,8 +314,9 @@ struct napi_struct {
314 spinlock_t poll_lock; 314 spinlock_t poll_lock;
315 int poll_owner; 315 int poll_owner;
316 struct net_device *dev; 316 struct net_device *dev;
317 struct list_head dev_list;
318#endif 317#endif
318 struct list_head dev_list;
319 struct sk_buff *gro_list;
319}; 320};
320 321
321enum 322enum
@@ -376,22 +377,8 @@ static inline int napi_reschedule(struct napi_struct *napi)
376 * 377 *
377 * Mark NAPI processing as complete. 378 * Mark NAPI processing as complete.
378 */ 379 */
379static inline void __napi_complete(struct napi_struct *n) 380extern void __napi_complete(struct napi_struct *n);
380{ 381extern void napi_complete(struct napi_struct *n);
381 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
382 list_del(&n->poll_list);
383 smp_mb__before_clear_bit();
384 clear_bit(NAPI_STATE_SCHED, &n->state);
385}
386
387static inline void napi_complete(struct napi_struct *n)
388{
389 unsigned long flags;
390
391 local_irq_save(flags);
392 __napi_complete(n);
393 local_irq_restore(flags);
394}
395 382
396/** 383/**
397 * napi_disable - prevent NAPI from scheduling 384 * napi_disable - prevent NAPI from scheduling
@@ -640,9 +627,7 @@ struct net_device
640 unsigned long state; 627 unsigned long state;
641 628
642 struct list_head dev_list; 629 struct list_head dev_list;
643#ifdef CONFIG_NETPOLL
644 struct list_head napi_list; 630 struct list_head napi_list;
645#endif
646 631
647 /* Net device features */ 632 /* Net device features */
648 unsigned long features; 633 unsigned long features;
@@ -661,6 +646,7 @@ struct net_device
661#define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ 646#define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */
662 /* do not use LLTX in new drivers */ 647 /* do not use LLTX in new drivers */
663#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ 648#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */
649#define NETIF_F_GRO 16384 /* Generic receive offload */
664#define NETIF_F_LRO 32768 /* large receive offload */ 650#define NETIF_F_LRO 32768 /* large receive offload */
665 651
666 /* Segmentation offload features */ 652 /* Segmentation offload features */
@@ -984,22 +970,8 @@ static inline void *netdev_priv(const struct net_device *dev)
984 * netif_napi_add() must be used to initialize a napi context prior to calling 970 * netif_napi_add() must be used to initialize a napi context prior to calling
985 * *any* of the other napi related functions. 971 * *any* of the other napi related functions.
986 */ 972 */
987static inline void netif_napi_add(struct net_device *dev, 973void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
988 struct napi_struct *napi, 974 int (*poll)(struct napi_struct *, int), int weight);
989 int (*poll)(struct napi_struct *, int),
990 int weight)
991{
992 INIT_LIST_HEAD(&napi->poll_list);
993 napi->poll = poll;
994 napi->weight = weight;
995#ifdef CONFIG_NETPOLL
996 napi->dev = dev;
997 list_add(&napi->dev_list, &dev->napi_list);
998 spin_lock_init(&napi->poll_lock);
999 napi->poll_owner = -1;
1000#endif
1001 set_bit(NAPI_STATE_SCHED, &napi->state);
1002}
1003 975
1004/** 976/**
1005 * netif_napi_del - remove a napi context 977 * netif_napi_del - remove a napi context
@@ -1007,12 +979,20 @@ static inline void netif_napi_add(struct net_device *dev,
1007 * 979 *
1008 * netif_napi_del() removes a napi context from the network device napi list 980 * netif_napi_del() removes a napi context from the network device napi list
1009 */ 981 */
1010static inline void netif_napi_del(struct napi_struct *napi) 982void netif_napi_del(struct napi_struct *napi);
1011{ 983
1012#ifdef CONFIG_NETPOLL 984struct napi_gro_cb {
1013 list_del(&napi->dev_list); 985 /* This is non-zero if the packet may be of the same flow. */
1014#endif 986 int same_flow;
1015} 987
988 /* This is non-zero if the packet cannot be merged with the new skb. */
989 int flush;
990
991 /* Number of segments aggregated. */
992 int count;
993};
994
995#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
1016 996
1017struct packet_type { 997struct packet_type {
1018 __be16 type; /* This is really htons(ether_type). */ 998 __be16 type; /* This is really htons(ether_type). */
@@ -1024,6 +1004,9 @@ struct packet_type {
1024 struct sk_buff *(*gso_segment)(struct sk_buff *skb, 1004 struct sk_buff *(*gso_segment)(struct sk_buff *skb,
1025 int features); 1005 int features);
1026 int (*gso_send_check)(struct sk_buff *skb); 1006 int (*gso_send_check)(struct sk_buff *skb);
1007 struct sk_buff **(*gro_receive)(struct sk_buff **head,
1008 struct sk_buff *skb);
1009 int (*gro_complete)(struct sk_buff *skb);
1027 void *af_packet_priv; 1010 void *af_packet_priv;
1028 struct list_head list; 1011 struct list_head list;
1029}; 1012};
@@ -1377,6 +1360,9 @@ extern int netif_rx(struct sk_buff *skb);
1377extern int netif_rx_ni(struct sk_buff *skb); 1360extern int netif_rx_ni(struct sk_buff *skb);
1378#define HAVE_NETIF_RECEIVE_SKB 1 1361#define HAVE_NETIF_RECEIVE_SKB 1
1379extern int netif_receive_skb(struct sk_buff *skb); 1362extern int netif_receive_skb(struct sk_buff *skb);
1363extern void napi_gro_flush(struct napi_struct *napi);
1364extern int napi_gro_receive(struct napi_struct *napi,
1365 struct sk_buff *skb);
1380extern void netif_nit_deliver(struct sk_buff *skb); 1366extern void netif_nit_deliver(struct sk_buff *skb);
1381extern int dev_valid_name(const char *name); 1367extern int dev_valid_name(const char *name);
1382extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); 1368extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
@@ -1621,17 +1607,7 @@ static inline void __netif_rx_complete(struct net_device *dev,
1621static inline void netif_rx_complete(struct net_device *dev, 1607static inline void netif_rx_complete(struct net_device *dev,
1622 struct napi_struct *napi) 1608 struct napi_struct *napi)
1623{ 1609{
1624 unsigned long flags; 1610 napi_complete(napi);
1625
1626 /*
1627 * don't let napi dequeue from the cpu poll list
1628 * just in case its running on a different cpu
1629 */
1630 if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state)))
1631 return;
1632 local_irq_save(flags);
1633 __netif_rx_complete(dev, napi);
1634 local_irq_restore(flags);
1635} 1611}
1636 1612
1637static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) 1613static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index e3d79593fb3a..e38d3c9dccda 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -94,11 +94,6 @@ static inline void netpoll_poll_unlock(void *have)
94 rcu_read_unlock(); 94 rcu_read_unlock();
95} 95}
96 96
97static inline void netpoll_netdev_init(struct net_device *dev)
98{
99 INIT_LIST_HEAD(&dev->napi_list);
100}
101
102#else 97#else
103static inline int netpoll_rx(struct sk_buff *skb) 98static inline int netpoll_rx(struct sk_buff *skb)
104{ 99{
diff --git a/net/core/dev.c b/net/core/dev.c
index e415f0b0d0d0..d8d7d1fccde4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,9 @@
129 129
130#include "net-sysfs.h" 130#include "net-sysfs.h"
131 131
132/* Instead of increasing this, you should create a hash table. */
133#define MAX_GRO_SKBS 8
134
132/* 135/*
133 * The list of packet types we will receive (as opposed to discard) 136 * The list of packet types we will receive (as opposed to discard)
134 * and the routines to invoke. 137 * and the routines to invoke.
@@ -2335,6 +2338,122 @@ static void flush_backlog(void *arg)
2335 } 2338 }
2336} 2339}
2337 2340
2341static int napi_gro_complete(struct sk_buff *skb)
2342{
2343 struct packet_type *ptype;
2344 __be16 type = skb->protocol;
2345 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2346 int err = -ENOENT;
2347
2348 if (!skb_shinfo(skb)->frag_list)
2349 goto out;
2350
2351 rcu_read_lock();
2352 list_for_each_entry_rcu(ptype, head, list) {
2353 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2354 continue;
2355
2356 err = ptype->gro_complete(skb);
2357 break;
2358 }
2359 rcu_read_unlock();
2360
2361 if (err) {
2362 WARN_ON(&ptype->list == head);
2363 kfree_skb(skb);
2364 return NET_RX_SUCCESS;
2365 }
2366
2367out:
2368 __skb_push(skb, -skb_network_offset(skb));
2369 return netif_receive_skb(skb);
2370}
2371
2372void napi_gro_flush(struct napi_struct *napi)
2373{
2374 struct sk_buff *skb, *next;
2375
2376 for (skb = napi->gro_list; skb; skb = next) {
2377 next = skb->next;
2378 skb->next = NULL;
2379 napi_gro_complete(skb);
2380 }
2381
2382 napi->gro_list = NULL;
2383}
2384EXPORT_SYMBOL(napi_gro_flush);
2385
2386int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2387{
2388 struct sk_buff **pp = NULL;
2389 struct packet_type *ptype;
2390 __be16 type = skb->protocol;
2391 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2392 int count = 0;
2393 int mac_len;
2394
2395 if (!(skb->dev->features & NETIF_F_GRO))
2396 goto normal;
2397
2398 rcu_read_lock();
2399 list_for_each_entry_rcu(ptype, head, list) {
2400 struct sk_buff *p;
2401
2402 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2403 continue;
2404
2405 skb_reset_network_header(skb);
2406 mac_len = skb->network_header - skb->mac_header;
2407 skb->mac_len = mac_len;
2408 NAPI_GRO_CB(skb)->same_flow = 0;
2409 NAPI_GRO_CB(skb)->flush = 0;
2410
2411 for (p = napi->gro_list; p; p = p->next) {
2412 count++;
2413 NAPI_GRO_CB(p)->same_flow =
2414 p->mac_len == mac_len &&
2415 !memcmp(skb_mac_header(p), skb_mac_header(skb),
2416 mac_len);
2417 NAPI_GRO_CB(p)->flush = 0;
2418 }
2419
2420 pp = ptype->gro_receive(&napi->gro_list, skb);
2421 break;
2422 }
2423 rcu_read_unlock();
2424
2425 if (&ptype->list == head)
2426 goto normal;
2427
2428 if (pp) {
2429 struct sk_buff *nskb = *pp;
2430
2431 *pp = nskb->next;
2432 nskb->next = NULL;
2433 napi_gro_complete(nskb);
2434 count--;
2435 }
2436
2437 if (NAPI_GRO_CB(skb)->same_flow)
2438 goto ok;
2439
2440 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
2441 __skb_push(skb, -skb_network_offset(skb));
2442 goto normal;
2443 }
2444
2445 NAPI_GRO_CB(skb)->count = 1;
2446 skb->next = napi->gro_list;
2447 napi->gro_list = skb;
2448
2449ok:
2450 return NET_RX_SUCCESS;
2451
2452normal:
2453 return netif_receive_skb(skb);
2454}
2455EXPORT_SYMBOL(napi_gro_receive);
2456
2338static int process_backlog(struct napi_struct *napi, int quota) 2457static int process_backlog(struct napi_struct *napi, int quota)
2339{ 2458{
2340 int work = 0; 2459 int work = 0;
@@ -2354,9 +2473,11 @@ static int process_backlog(struct napi_struct *napi, int quota)
2354 } 2473 }
2355 local_irq_enable(); 2474 local_irq_enable();
2356 2475
2357 netif_receive_skb(skb); 2476 napi_gro_receive(napi, skb);
2358 } while (++work < quota && jiffies == start_time); 2477 } while (++work < quota && jiffies == start_time);
2359 2478
2479 napi_gro_flush(napi);
2480
2360 return work; 2481 return work;
2361} 2482}
2362 2483
@@ -2377,6 +2498,68 @@ void __napi_schedule(struct napi_struct *n)
2377} 2498}
2378EXPORT_SYMBOL(__napi_schedule); 2499EXPORT_SYMBOL(__napi_schedule);
2379 2500
2501void __napi_complete(struct napi_struct *n)
2502{
2503 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2504 BUG_ON(n->gro_list);
2505
2506 list_del(&n->poll_list);
2507 smp_mb__before_clear_bit();
2508 clear_bit(NAPI_STATE_SCHED, &n->state);
2509}
2510EXPORT_SYMBOL(__napi_complete);
2511
2512void napi_complete(struct napi_struct *n)
2513{
2514 unsigned long flags;
2515
2516 /*
2517 * don't let napi dequeue from the cpu poll list
2518 * just in case its running on a different cpu
2519 */
2520 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2521 return;
2522
2523 napi_gro_flush(n);
2524 local_irq_save(flags);
2525 __napi_complete(n);
2526 local_irq_restore(flags);
2527}
2528EXPORT_SYMBOL(napi_complete);
2529
2530void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2531 int (*poll)(struct napi_struct *, int), int weight)
2532{
2533 INIT_LIST_HEAD(&napi->poll_list);
2534 napi->gro_list = NULL;
2535 napi->poll = poll;
2536 napi->weight = weight;
2537 list_add(&napi->dev_list, &dev->napi_list);
2538#ifdef CONFIG_NETPOLL
2539 napi->dev = dev;
2540 spin_lock_init(&napi->poll_lock);
2541 napi->poll_owner = -1;
2542#endif
2543 set_bit(NAPI_STATE_SCHED, &napi->state);
2544}
2545EXPORT_SYMBOL(netif_napi_add);
2546
2547void netif_napi_del(struct napi_struct *napi)
2548{
2549 struct sk_buff *skb, *next;
2550
2551 list_del(&napi->dev_list);
2552
2553 for (skb = napi->gro_list; skb; skb = next) {
2554 next = skb->next;
2555 skb->next = NULL;
2556 kfree_skb(skb);
2557 }
2558
2559 napi->gro_list = NULL;
2560}
2561EXPORT_SYMBOL(netif_napi_del);
2562
2380 2563
2381static void net_rx_action(struct softirq_action *h) 2564static void net_rx_action(struct softirq_action *h)
2382{ 2565{
@@ -4380,7 +4563,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4380 4563
4381 netdev_init_queues(dev); 4564 netdev_init_queues(dev);
4382 4565
4383 netpoll_netdev_init(dev); 4566 INIT_LIST_HEAD(&dev->napi_list);
4384 setup(dev); 4567 setup(dev);
4385 strcpy(dev->name, name); 4568 strcpy(dev->name, name);
4386 return dev; 4569 return dev;
@@ -4397,10 +4580,15 @@ EXPORT_SYMBOL(alloc_netdev_mq);
4397 */ 4580 */
4398void free_netdev(struct net_device *dev) 4581void free_netdev(struct net_device *dev)
4399{ 4582{
4583 struct napi_struct *p, *n;
4584
4400 release_net(dev_net(dev)); 4585 release_net(dev_net(dev));
4401 4586
4402 kfree(dev->_tx); 4587 kfree(dev->_tx);
4403 4588
4589 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
4590 netif_napi_del(p);
4591
4404 /* Compatibility with error handling in drivers */ 4592 /* Compatibility with error handling in drivers */
4405 if (dev->reg_state == NETREG_UNINITIALIZED) { 4593 if (dev->reg_state == NETREG_UNINITIALIZED) {
4406 kfree((char *)dev - dev->padded); 4594 kfree((char *)dev - dev->padded);
@@ -4949,6 +5137,7 @@ static int __init net_dev_init(void)
4949 5137
4950 queue->backlog.poll = process_backlog; 5138 queue->backlog.poll = process_backlog;
4951 queue->backlog.weight = weight_p; 5139 queue->backlog.weight = weight_p;
5140 queue->backlog.gro_list = NULL;
4952 } 5141 }
4953 5142
4954 dev_boot_phase = 0; 5143 dev_boot_phase = 0;