diff options
-rw-r--r-- | include/linux/netdevice.h | 80 | ||||
-rw-r--r-- | include/linux/netpoll.h | 5 | ||||
-rw-r--r-- | net/core/dev.c | 193 |
3 files changed, 219 insertions, 59 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bdf5465deb91..58856b6737fb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -314,8 +314,9 @@ struct napi_struct { | |||
314 | spinlock_t poll_lock; | 314 | spinlock_t poll_lock; |
315 | int poll_owner; | 315 | int poll_owner; |
316 | struct net_device *dev; | 316 | struct net_device *dev; |
317 | struct list_head dev_list; | ||
318 | #endif | 317 | #endif |
318 | struct list_head dev_list; | ||
319 | struct sk_buff *gro_list; | ||
319 | }; | 320 | }; |
320 | 321 | ||
321 | enum | 322 | enum |
@@ -376,22 +377,8 @@ static inline int napi_reschedule(struct napi_struct *napi) | |||
376 | * | 377 | * |
377 | * Mark NAPI processing as complete. | 378 | * Mark NAPI processing as complete. |
378 | */ | 379 | */ |
379 | static inline void __napi_complete(struct napi_struct *n) | 380 | extern void __napi_complete(struct napi_struct *n); |
380 | { | 381 | extern void napi_complete(struct napi_struct *n); |
381 | BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); | ||
382 | list_del(&n->poll_list); | ||
383 | smp_mb__before_clear_bit(); | ||
384 | clear_bit(NAPI_STATE_SCHED, &n->state); | ||
385 | } | ||
386 | |||
387 | static inline void napi_complete(struct napi_struct *n) | ||
388 | { | ||
389 | unsigned long flags; | ||
390 | |||
391 | local_irq_save(flags); | ||
392 | __napi_complete(n); | ||
393 | local_irq_restore(flags); | ||
394 | } | ||
395 | 382 | ||
396 | /** | 383 | /** |
397 | * napi_disable - prevent NAPI from scheduling | 384 | * napi_disable - prevent NAPI from scheduling |
@@ -640,9 +627,7 @@ struct net_device | |||
640 | unsigned long state; | 627 | unsigned long state; |
641 | 628 | ||
642 | struct list_head dev_list; | 629 | struct list_head dev_list; |
643 | #ifdef CONFIG_NETPOLL | ||
644 | struct list_head napi_list; | 630 | struct list_head napi_list; |
645 | #endif | ||
646 | 631 | ||
647 | /* Net device features */ | 632 | /* Net device features */ |
648 | unsigned long features; | 633 | unsigned long features; |
@@ -661,6 +646,7 @@ struct net_device | |||
661 | #define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ | 646 | #define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ |
662 | /* do not use LLTX in new drivers */ | 647 | /* do not use LLTX in new drivers */ |
663 | #define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ | 648 | #define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ |
649 | #define NETIF_F_GRO 16384 /* Generic receive offload */ | ||
664 | #define NETIF_F_LRO 32768 /* large receive offload */ | 650 | #define NETIF_F_LRO 32768 /* large receive offload */ |
665 | 651 | ||
666 | /* Segmentation offload features */ | 652 | /* Segmentation offload features */ |
@@ -984,22 +970,8 @@ static inline void *netdev_priv(const struct net_device *dev) | |||
984 | * netif_napi_add() must be used to initialize a napi context prior to calling | 970 | * netif_napi_add() must be used to initialize a napi context prior to calling |
985 | * *any* of the other napi related functions. | 971 | * *any* of the other napi related functions. |
986 | */ | 972 | */ |
987 | static inline void netif_napi_add(struct net_device *dev, | 973 | void netif_napi_add(struct net_device *dev, struct napi_struct *napi, |
988 | struct napi_struct *napi, | 974 | int (*poll)(struct napi_struct *, int), int weight); |
989 | int (*poll)(struct napi_struct *, int), | ||
990 | int weight) | ||
991 | { | ||
992 | INIT_LIST_HEAD(&napi->poll_list); | ||
993 | napi->poll = poll; | ||
994 | napi->weight = weight; | ||
995 | #ifdef CONFIG_NETPOLL | ||
996 | napi->dev = dev; | ||
997 | list_add(&napi->dev_list, &dev->napi_list); | ||
998 | spin_lock_init(&napi->poll_lock); | ||
999 | napi->poll_owner = -1; | ||
1000 | #endif | ||
1001 | set_bit(NAPI_STATE_SCHED, &napi->state); | ||
1002 | } | ||
1003 | 975 | ||
1004 | /** | 976 | /** |
1005 | * netif_napi_del - remove a napi context | 977 | * netif_napi_del - remove a napi context |
@@ -1007,12 +979,20 @@ static inline void netif_napi_add(struct net_device *dev, | |||
1007 | * | 979 | * |
1008 | * netif_napi_del() removes a napi context from the network device napi list | 980 | * netif_napi_del() removes a napi context from the network device napi list |
1009 | */ | 981 | */ |
1010 | static inline void netif_napi_del(struct napi_struct *napi) | 982 | void netif_napi_del(struct napi_struct *napi); |
1011 | { | 983 | |
1012 | #ifdef CONFIG_NETPOLL | 984 | struct napi_gro_cb { |
1013 | list_del(&napi->dev_list); | 985 | /* This is non-zero if the packet may be of the same flow. */ |
1014 | #endif | 986 | int same_flow; |
1015 | } | 987 | |
988 | /* This is non-zero if the packet cannot be merged with the new skb. */ | ||
989 | int flush; | ||
990 | |||
991 | /* Number of segments aggregated. */ | ||
992 | int count; | ||
993 | }; | ||
994 | |||
995 | #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) | ||
1016 | 996 | ||
1017 | struct packet_type { | 997 | struct packet_type { |
1018 | __be16 type; /* This is really htons(ether_type). */ | 998 | __be16 type; /* This is really htons(ether_type). */ |
@@ -1024,6 +1004,9 @@ struct packet_type { | |||
1024 | struct sk_buff *(*gso_segment)(struct sk_buff *skb, | 1004 | struct sk_buff *(*gso_segment)(struct sk_buff *skb, |
1025 | int features); | 1005 | int features); |
1026 | int (*gso_send_check)(struct sk_buff *skb); | 1006 | int (*gso_send_check)(struct sk_buff *skb); |
1007 | struct sk_buff **(*gro_receive)(struct sk_buff **head, | ||
1008 | struct sk_buff *skb); | ||
1009 | int (*gro_complete)(struct sk_buff *skb); | ||
1027 | void *af_packet_priv; | 1010 | void *af_packet_priv; |
1028 | struct list_head list; | 1011 | struct list_head list; |
1029 | }; | 1012 | }; |
@@ -1377,6 +1360,9 @@ extern int netif_rx(struct sk_buff *skb); | |||
1377 | extern int netif_rx_ni(struct sk_buff *skb); | 1360 | extern int netif_rx_ni(struct sk_buff *skb); |
1378 | #define HAVE_NETIF_RECEIVE_SKB 1 | 1361 | #define HAVE_NETIF_RECEIVE_SKB 1 |
1379 | extern int netif_receive_skb(struct sk_buff *skb); | 1362 | extern int netif_receive_skb(struct sk_buff *skb); |
1363 | extern void napi_gro_flush(struct napi_struct *napi); | ||
1364 | extern int napi_gro_receive(struct napi_struct *napi, | ||
1365 | struct sk_buff *skb); | ||
1380 | extern void netif_nit_deliver(struct sk_buff *skb); | 1366 | extern void netif_nit_deliver(struct sk_buff *skb); |
1381 | extern int dev_valid_name(const char *name); | 1367 | extern int dev_valid_name(const char *name); |
1382 | extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); | 1368 | extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); |
@@ -1621,17 +1607,7 @@ static inline void __netif_rx_complete(struct net_device *dev, | |||
1621 | static inline void netif_rx_complete(struct net_device *dev, | 1607 | static inline void netif_rx_complete(struct net_device *dev, |
1622 | struct napi_struct *napi) | 1608 | struct napi_struct *napi) |
1623 | { | 1609 | { |
1624 | unsigned long flags; | 1610 | napi_complete(napi); |
1625 | |||
1626 | /* | ||
1627 | * don't let napi dequeue from the cpu poll list | ||
1628 | * just in case its running on a different cpu | ||
1629 | */ | ||
1630 | if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state))) | ||
1631 | return; | ||
1632 | local_irq_save(flags); | ||
1633 | __netif_rx_complete(dev, napi); | ||
1634 | local_irq_restore(flags); | ||
1635 | } | 1611 | } |
1636 | 1612 | ||
1637 | static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) | 1613 | static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) |
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index e3d79593fb3a..e38d3c9dccda 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h | |||
@@ -94,11 +94,6 @@ static inline void netpoll_poll_unlock(void *have) | |||
94 | rcu_read_unlock(); | 94 | rcu_read_unlock(); |
95 | } | 95 | } |
96 | 96 | ||
97 | static inline void netpoll_netdev_init(struct net_device *dev) | ||
98 | { | ||
99 | INIT_LIST_HEAD(&dev->napi_list); | ||
100 | } | ||
101 | |||
102 | #else | 97 | #else |
103 | static inline int netpoll_rx(struct sk_buff *skb) | 98 | static inline int netpoll_rx(struct sk_buff *skb) |
104 | { | 99 | { |
diff --git a/net/core/dev.c b/net/core/dev.c index e415f0b0d0d0..d8d7d1fccde4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -129,6 +129,9 @@ | |||
129 | 129 | ||
130 | #include "net-sysfs.h" | 130 | #include "net-sysfs.h" |
131 | 131 | ||
132 | /* Instead of increasing this, you should create a hash table. */ | ||
133 | #define MAX_GRO_SKBS 8 | ||
134 | |||
132 | /* | 135 | /* |
133 | * The list of packet types we will receive (as opposed to discard) | 136 | * The list of packet types we will receive (as opposed to discard) |
134 | * and the routines to invoke. | 137 | * and the routines to invoke. |
@@ -2335,6 +2338,122 @@ static void flush_backlog(void *arg) | |||
2335 | } | 2338 | } |
2336 | } | 2339 | } |
2337 | 2340 | ||
2341 | static int napi_gro_complete(struct sk_buff *skb) | ||
2342 | { | ||
2343 | struct packet_type *ptype; | ||
2344 | __be16 type = skb->protocol; | ||
2345 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; | ||
2346 | int err = -ENOENT; | ||
2347 | |||
2348 | if (!skb_shinfo(skb)->frag_list) | ||
2349 | goto out; | ||
2350 | |||
2351 | rcu_read_lock(); | ||
2352 | list_for_each_entry_rcu(ptype, head, list) { | ||
2353 | if (ptype->type != type || ptype->dev || !ptype->gro_complete) | ||
2354 | continue; | ||
2355 | |||
2356 | err = ptype->gro_complete(skb); | ||
2357 | break; | ||
2358 | } | ||
2359 | rcu_read_unlock(); | ||
2360 | |||
2361 | if (err) { | ||
2362 | WARN_ON(&ptype->list == head); | ||
2363 | kfree_skb(skb); | ||
2364 | return NET_RX_SUCCESS; | ||
2365 | } | ||
2366 | |||
2367 | out: | ||
2368 | __skb_push(skb, -skb_network_offset(skb)); | ||
2369 | return netif_receive_skb(skb); | ||
2370 | } | ||
2371 | |||
2372 | void napi_gro_flush(struct napi_struct *napi) | ||
2373 | { | ||
2374 | struct sk_buff *skb, *next; | ||
2375 | |||
2376 | for (skb = napi->gro_list; skb; skb = next) { | ||
2377 | next = skb->next; | ||
2378 | skb->next = NULL; | ||
2379 | napi_gro_complete(skb); | ||
2380 | } | ||
2381 | |||
2382 | napi->gro_list = NULL; | ||
2383 | } | ||
2384 | EXPORT_SYMBOL(napi_gro_flush); | ||
2385 | |||
2386 | int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | ||
2387 | { | ||
2388 | struct sk_buff **pp = NULL; | ||
2389 | struct packet_type *ptype; | ||
2390 | __be16 type = skb->protocol; | ||
2391 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; | ||
2392 | int count = 0; | ||
2393 | int mac_len; | ||
2394 | |||
2395 | if (!(skb->dev->features & NETIF_F_GRO)) | ||
2396 | goto normal; | ||
2397 | |||
2398 | rcu_read_lock(); | ||
2399 | list_for_each_entry_rcu(ptype, head, list) { | ||
2400 | struct sk_buff *p; | ||
2401 | |||
2402 | if (ptype->type != type || ptype->dev || !ptype->gro_receive) | ||
2403 | continue; | ||
2404 | |||
2405 | skb_reset_network_header(skb); | ||
2406 | mac_len = skb->network_header - skb->mac_header; | ||
2407 | skb->mac_len = mac_len; | ||
2408 | NAPI_GRO_CB(skb)->same_flow = 0; | ||
2409 | NAPI_GRO_CB(skb)->flush = 0; | ||
2410 | |||
2411 | for (p = napi->gro_list; p; p = p->next) { | ||
2412 | count++; | ||
2413 | NAPI_GRO_CB(p)->same_flow = | ||
2414 | p->mac_len == mac_len && | ||
2415 | !memcmp(skb_mac_header(p), skb_mac_header(skb), | ||
2416 | mac_len); | ||
2417 | NAPI_GRO_CB(p)->flush = 0; | ||
2418 | } | ||
2419 | |||
2420 | pp = ptype->gro_receive(&napi->gro_list, skb); | ||
2421 | break; | ||
2422 | } | ||
2423 | rcu_read_unlock(); | ||
2424 | |||
2425 | if (&ptype->list == head) | ||
2426 | goto normal; | ||
2427 | |||
2428 | if (pp) { | ||
2429 | struct sk_buff *nskb = *pp; | ||
2430 | |||
2431 | *pp = nskb->next; | ||
2432 | nskb->next = NULL; | ||
2433 | napi_gro_complete(nskb); | ||
2434 | count--; | ||
2435 | } | ||
2436 | |||
2437 | if (NAPI_GRO_CB(skb)->same_flow) | ||
2438 | goto ok; | ||
2439 | |||
2440 | if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { | ||
2441 | __skb_push(skb, -skb_network_offset(skb)); | ||
2442 | goto normal; | ||
2443 | } | ||
2444 | |||
2445 | NAPI_GRO_CB(skb)->count = 1; | ||
2446 | skb->next = napi->gro_list; | ||
2447 | napi->gro_list = skb; | ||
2448 | |||
2449 | ok: | ||
2450 | return NET_RX_SUCCESS; | ||
2451 | |||
2452 | normal: | ||
2453 | return netif_receive_skb(skb); | ||
2454 | } | ||
2455 | EXPORT_SYMBOL(napi_gro_receive); | ||
2456 | |||
2338 | static int process_backlog(struct napi_struct *napi, int quota) | 2457 | static int process_backlog(struct napi_struct *napi, int quota) |
2339 | { | 2458 | { |
2340 | int work = 0; | 2459 | int work = 0; |
@@ -2354,9 +2473,11 @@ static int process_backlog(struct napi_struct *napi, int quota) | |||
2354 | } | 2473 | } |
2355 | local_irq_enable(); | 2474 | local_irq_enable(); |
2356 | 2475 | ||
2357 | netif_receive_skb(skb); | 2476 | napi_gro_receive(napi, skb); |
2358 | } while (++work < quota && jiffies == start_time); | 2477 | } while (++work < quota && jiffies == start_time); |
2359 | 2478 | ||
2479 | napi_gro_flush(napi); | ||
2480 | |||
2360 | return work; | 2481 | return work; |
2361 | } | 2482 | } |
2362 | 2483 | ||
@@ -2377,6 +2498,68 @@ void __napi_schedule(struct napi_struct *n) | |||
2377 | } | 2498 | } |
2378 | EXPORT_SYMBOL(__napi_schedule); | 2499 | EXPORT_SYMBOL(__napi_schedule); |
2379 | 2500 | ||
2501 | void __napi_complete(struct napi_struct *n) | ||
2502 | { | ||
2503 | BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); | ||
2504 | BUG_ON(n->gro_list); | ||
2505 | |||
2506 | list_del(&n->poll_list); | ||
2507 | smp_mb__before_clear_bit(); | ||
2508 | clear_bit(NAPI_STATE_SCHED, &n->state); | ||
2509 | } | ||
2510 | EXPORT_SYMBOL(__napi_complete); | ||
2511 | |||
2512 | void napi_complete(struct napi_struct *n) | ||
2513 | { | ||
2514 | unsigned long flags; | ||
2515 | |||
2516 | /* | ||
2517 | * don't let napi dequeue from the cpu poll list | ||
2518 | * just in case its running on a different cpu | ||
2519 | */ | ||
2520 | if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) | ||
2521 | return; | ||
2522 | |||
2523 | napi_gro_flush(n); | ||
2524 | local_irq_save(flags); | ||
2525 | __napi_complete(n); | ||
2526 | local_irq_restore(flags); | ||
2527 | } | ||
2528 | EXPORT_SYMBOL(napi_complete); | ||
2529 | |||
2530 | void netif_napi_add(struct net_device *dev, struct napi_struct *napi, | ||
2531 | int (*poll)(struct napi_struct *, int), int weight) | ||
2532 | { | ||
2533 | INIT_LIST_HEAD(&napi->poll_list); | ||
2534 | napi->gro_list = NULL; | ||
2535 | napi->poll = poll; | ||
2536 | napi->weight = weight; | ||
2537 | list_add(&napi->dev_list, &dev->napi_list); | ||
2538 | #ifdef CONFIG_NETPOLL | ||
2539 | napi->dev = dev; | ||
2540 | spin_lock_init(&napi->poll_lock); | ||
2541 | napi->poll_owner = -1; | ||
2542 | #endif | ||
2543 | set_bit(NAPI_STATE_SCHED, &napi->state); | ||
2544 | } | ||
2545 | EXPORT_SYMBOL(netif_napi_add); | ||
2546 | |||
2547 | void netif_napi_del(struct napi_struct *napi) | ||
2548 | { | ||
2549 | struct sk_buff *skb, *next; | ||
2550 | |||
2551 | list_del(&napi->dev_list); | ||
2552 | |||
2553 | for (skb = napi->gro_list; skb; skb = next) { | ||
2554 | next = skb->next; | ||
2555 | skb->next = NULL; | ||
2556 | kfree_skb(skb); | ||
2557 | } | ||
2558 | |||
2559 | napi->gro_list = NULL; | ||
2560 | } | ||
2561 | EXPORT_SYMBOL(netif_napi_del); | ||
2562 | |||
2380 | 2563 | ||
2381 | static void net_rx_action(struct softirq_action *h) | 2564 | static void net_rx_action(struct softirq_action *h) |
2382 | { | 2565 | { |
@@ -4380,7 +4563,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
4380 | 4563 | ||
4381 | netdev_init_queues(dev); | 4564 | netdev_init_queues(dev); |
4382 | 4565 | ||
4383 | netpoll_netdev_init(dev); | 4566 | INIT_LIST_HEAD(&dev->napi_list); |
4384 | setup(dev); | 4567 | setup(dev); |
4385 | strcpy(dev->name, name); | 4568 | strcpy(dev->name, name); |
4386 | return dev; | 4569 | return dev; |
@@ -4397,10 +4580,15 @@ EXPORT_SYMBOL(alloc_netdev_mq); | |||
4397 | */ | 4580 | */ |
4398 | void free_netdev(struct net_device *dev) | 4581 | void free_netdev(struct net_device *dev) |
4399 | { | 4582 | { |
4583 | struct napi_struct *p, *n; | ||
4584 | |||
4400 | release_net(dev_net(dev)); | 4585 | release_net(dev_net(dev)); |
4401 | 4586 | ||
4402 | kfree(dev->_tx); | 4587 | kfree(dev->_tx); |
4403 | 4588 | ||
4589 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) | ||
4590 | netif_napi_del(p); | ||
4591 | |||
4404 | /* Compatibility with error handling in drivers */ | 4592 | /* Compatibility with error handling in drivers */ |
4405 | if (dev->reg_state == NETREG_UNINITIALIZED) { | 4593 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
4406 | kfree((char *)dev - dev->padded); | 4594 | kfree((char *)dev - dev->padded); |
@@ -4949,6 +5137,7 @@ static int __init net_dev_init(void) | |||
4949 | 5137 | ||
4950 | queue->backlog.poll = process_backlog; | 5138 | queue->backlog.poll = process_backlog; |
4951 | queue->backlog.weight = weight_p; | 5139 | queue->backlog.weight = weight_p; |
5140 | queue->backlog.gro_list = NULL; | ||
4952 | } | 5141 | } |
4953 | 5142 | ||
4954 | dev_boot_phase = 0; | 5143 | dev_boot_phase = 0; |