diff options
| -rw-r--r-- | include/linux/netdevice.h | 80 | ||||
| -rw-r--r-- | include/linux/netpoll.h | 5 | ||||
| -rw-r--r-- | net/core/dev.c | 193 |
3 files changed, 219 insertions, 59 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bdf5465deb91..58856b6737fb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
| @@ -314,8 +314,9 @@ struct napi_struct { | |||
| 314 | spinlock_t poll_lock; | 314 | spinlock_t poll_lock; |
| 315 | int poll_owner; | 315 | int poll_owner; |
| 316 | struct net_device *dev; | 316 | struct net_device *dev; |
| 317 | struct list_head dev_list; | ||
| 318 | #endif | 317 | #endif |
| 318 | struct list_head dev_list; | ||
| 319 | struct sk_buff *gro_list; | ||
| 319 | }; | 320 | }; |
| 320 | 321 | ||
| 321 | enum | 322 | enum |
| @@ -376,22 +377,8 @@ static inline int napi_reschedule(struct napi_struct *napi) | |||
| 376 | * | 377 | * |
| 377 | * Mark NAPI processing as complete. | 378 | * Mark NAPI processing as complete. |
| 378 | */ | 379 | */ |
| 379 | static inline void __napi_complete(struct napi_struct *n) | 380 | extern void __napi_complete(struct napi_struct *n); |
| 380 | { | 381 | extern void napi_complete(struct napi_struct *n); |
| 381 | BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); | ||
| 382 | list_del(&n->poll_list); | ||
| 383 | smp_mb__before_clear_bit(); | ||
| 384 | clear_bit(NAPI_STATE_SCHED, &n->state); | ||
| 385 | } | ||
| 386 | |||
| 387 | static inline void napi_complete(struct napi_struct *n) | ||
| 388 | { | ||
| 389 | unsigned long flags; | ||
| 390 | |||
| 391 | local_irq_save(flags); | ||
| 392 | __napi_complete(n); | ||
| 393 | local_irq_restore(flags); | ||
| 394 | } | ||
| 395 | 382 | ||
| 396 | /** | 383 | /** |
| 397 | * napi_disable - prevent NAPI from scheduling | 384 | * napi_disable - prevent NAPI from scheduling |
| @@ -640,9 +627,7 @@ struct net_device | |||
| 640 | unsigned long state; | 627 | unsigned long state; |
| 641 | 628 | ||
| 642 | struct list_head dev_list; | 629 | struct list_head dev_list; |
| 643 | #ifdef CONFIG_NETPOLL | ||
| 644 | struct list_head napi_list; | 630 | struct list_head napi_list; |
| 645 | #endif | ||
| 646 | 631 | ||
| 647 | /* Net device features */ | 632 | /* Net device features */ |
| 648 | unsigned long features; | 633 | unsigned long features; |
| @@ -661,6 +646,7 @@ struct net_device | |||
| 661 | #define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ | 646 | #define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ |
| 662 | /* do not use LLTX in new drivers */ | 647 | /* do not use LLTX in new drivers */ |
| 663 | #define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ | 648 | #define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ |
| 649 | #define NETIF_F_GRO 16384 /* Generic receive offload */ | ||
| 664 | #define NETIF_F_LRO 32768 /* large receive offload */ | 650 | #define NETIF_F_LRO 32768 /* large receive offload */ |
| 665 | 651 | ||
| 666 | /* Segmentation offload features */ | 652 | /* Segmentation offload features */ |
| @@ -984,22 +970,8 @@ static inline void *netdev_priv(const struct net_device *dev) | |||
| 984 | * netif_napi_add() must be used to initialize a napi context prior to calling | 970 | * netif_napi_add() must be used to initialize a napi context prior to calling |
| 985 | * *any* of the other napi related functions. | 971 | * *any* of the other napi related functions. |
| 986 | */ | 972 | */ |
| 987 | static inline void netif_napi_add(struct net_device *dev, | 973 | void netif_napi_add(struct net_device *dev, struct napi_struct *napi, |
| 988 | struct napi_struct *napi, | 974 | int (*poll)(struct napi_struct *, int), int weight); |
| 989 | int (*poll)(struct napi_struct *, int), | ||
| 990 | int weight) | ||
| 991 | { | ||
| 992 | INIT_LIST_HEAD(&napi->poll_list); | ||
| 993 | napi->poll = poll; | ||
| 994 | napi->weight = weight; | ||
| 995 | #ifdef CONFIG_NETPOLL | ||
| 996 | napi->dev = dev; | ||
| 997 | list_add(&napi->dev_list, &dev->napi_list); | ||
| 998 | spin_lock_init(&napi->poll_lock); | ||
| 999 | napi->poll_owner = -1; | ||
| 1000 | #endif | ||
| 1001 | set_bit(NAPI_STATE_SCHED, &napi->state); | ||
| 1002 | } | ||
| 1003 | 975 | ||
| 1004 | /** | 976 | /** |
| 1005 | * netif_napi_del - remove a napi context | 977 | * netif_napi_del - remove a napi context |
| @@ -1007,12 +979,20 @@ static inline void netif_napi_add(struct net_device *dev, | |||
| 1007 | * | 979 | * |
| 1008 | * netif_napi_del() removes a napi context from the network device napi list | 980 | * netif_napi_del() removes a napi context from the network device napi list |
| 1009 | */ | 981 | */ |
| 1010 | static inline void netif_napi_del(struct napi_struct *napi) | 982 | void netif_napi_del(struct napi_struct *napi); |
| 1011 | { | 983 | |
| 1012 | #ifdef CONFIG_NETPOLL | 984 | struct napi_gro_cb { |
| 1013 | list_del(&napi->dev_list); | 985 | /* This is non-zero if the packet may be of the same flow. */ |
| 1014 | #endif | 986 | int same_flow; |
| 1015 | } | 987 | |
| 988 | /* This is non-zero if the packet cannot be merged with the new skb. */ | ||
| 989 | int flush; | ||
| 990 | |||
| 991 | /* Number of segments aggregated. */ | ||
| 992 | int count; | ||
| 993 | }; | ||
| 994 | |||
| 995 | #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) | ||
| 1016 | 996 | ||
| 1017 | struct packet_type { | 997 | struct packet_type { |
| 1018 | __be16 type; /* This is really htons(ether_type). */ | 998 | __be16 type; /* This is really htons(ether_type). */ |
| @@ -1024,6 +1004,9 @@ struct packet_type { | |||
| 1024 | struct sk_buff *(*gso_segment)(struct sk_buff *skb, | 1004 | struct sk_buff *(*gso_segment)(struct sk_buff *skb, |
| 1025 | int features); | 1005 | int features); |
| 1026 | int (*gso_send_check)(struct sk_buff *skb); | 1006 | int (*gso_send_check)(struct sk_buff *skb); |
| 1007 | struct sk_buff **(*gro_receive)(struct sk_buff **head, | ||
| 1008 | struct sk_buff *skb); | ||
| 1009 | int (*gro_complete)(struct sk_buff *skb); | ||
| 1027 | void *af_packet_priv; | 1010 | void *af_packet_priv; |
| 1028 | struct list_head list; | 1011 | struct list_head list; |
| 1029 | }; | 1012 | }; |
| @@ -1377,6 +1360,9 @@ extern int netif_rx(struct sk_buff *skb); | |||
| 1377 | extern int netif_rx_ni(struct sk_buff *skb); | 1360 | extern int netif_rx_ni(struct sk_buff *skb); |
| 1378 | #define HAVE_NETIF_RECEIVE_SKB 1 | 1361 | #define HAVE_NETIF_RECEIVE_SKB 1 |
| 1379 | extern int netif_receive_skb(struct sk_buff *skb); | 1362 | extern int netif_receive_skb(struct sk_buff *skb); |
| 1363 | extern void napi_gro_flush(struct napi_struct *napi); | ||
| 1364 | extern int napi_gro_receive(struct napi_struct *napi, | ||
| 1365 | struct sk_buff *skb); | ||
| 1380 | extern void netif_nit_deliver(struct sk_buff *skb); | 1366 | extern void netif_nit_deliver(struct sk_buff *skb); |
| 1381 | extern int dev_valid_name(const char *name); | 1367 | extern int dev_valid_name(const char *name); |
| 1382 | extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); | 1368 | extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); |
| @@ -1621,17 +1607,7 @@ static inline void __netif_rx_complete(struct net_device *dev, | |||
| 1621 | static inline void netif_rx_complete(struct net_device *dev, | 1607 | static inline void netif_rx_complete(struct net_device *dev, |
| 1622 | struct napi_struct *napi) | 1608 | struct napi_struct *napi) |
| 1623 | { | 1609 | { |
| 1624 | unsigned long flags; | 1610 | napi_complete(napi); |
| 1625 | |||
| 1626 | /* | ||
| 1627 | * don't let napi dequeue from the cpu poll list | ||
| 1628 | * just in case its running on a different cpu | ||
| 1629 | */ | ||
| 1630 | if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state))) | ||
| 1631 | return; | ||
| 1632 | local_irq_save(flags); | ||
| 1633 | __netif_rx_complete(dev, napi); | ||
| 1634 | local_irq_restore(flags); | ||
| 1635 | } | 1611 | } |
| 1636 | 1612 | ||
| 1637 | static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) | 1613 | static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) |
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index e3d79593fb3a..e38d3c9dccda 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h | |||
| @@ -94,11 +94,6 @@ static inline void netpoll_poll_unlock(void *have) | |||
| 94 | rcu_read_unlock(); | 94 | rcu_read_unlock(); |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | static inline void netpoll_netdev_init(struct net_device *dev) | ||
| 98 | { | ||
| 99 | INIT_LIST_HEAD(&dev->napi_list); | ||
| 100 | } | ||
| 101 | |||
| 102 | #else | 97 | #else |
| 103 | static inline int netpoll_rx(struct sk_buff *skb) | 98 | static inline int netpoll_rx(struct sk_buff *skb) |
| 104 | { | 99 | { |
diff --git a/net/core/dev.c b/net/core/dev.c index e415f0b0d0d0..d8d7d1fccde4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -129,6 +129,9 @@ | |||
| 129 | 129 | ||
| 130 | #include "net-sysfs.h" | 130 | #include "net-sysfs.h" |
| 131 | 131 | ||
| 132 | /* Instead of increasing this, you should create a hash table. */ | ||
| 133 | #define MAX_GRO_SKBS 8 | ||
| 134 | |||
| 132 | /* | 135 | /* |
| 133 | * The list of packet types we will receive (as opposed to discard) | 136 | * The list of packet types we will receive (as opposed to discard) |
| 134 | * and the routines to invoke. | 137 | * and the routines to invoke. |
| @@ -2335,6 +2338,122 @@ static void flush_backlog(void *arg) | |||
| 2335 | } | 2338 | } |
| 2336 | } | 2339 | } |
| 2337 | 2340 | ||
| 2341 | static int napi_gro_complete(struct sk_buff *skb) | ||
| 2342 | { | ||
| 2343 | struct packet_type *ptype; | ||
| 2344 | __be16 type = skb->protocol; | ||
| 2345 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; | ||
| 2346 | int err = -ENOENT; | ||
| 2347 | |||
| 2348 | if (!skb_shinfo(skb)->frag_list) | ||
| 2349 | goto out; | ||
| 2350 | |||
| 2351 | rcu_read_lock(); | ||
| 2352 | list_for_each_entry_rcu(ptype, head, list) { | ||
| 2353 | if (ptype->type != type || ptype->dev || !ptype->gro_complete) | ||
| 2354 | continue; | ||
| 2355 | |||
| 2356 | err = ptype->gro_complete(skb); | ||
| 2357 | break; | ||
| 2358 | } | ||
| 2359 | rcu_read_unlock(); | ||
| 2360 | |||
| 2361 | if (err) { | ||
| 2362 | WARN_ON(&ptype->list == head); | ||
| 2363 | kfree_skb(skb); | ||
| 2364 | return NET_RX_SUCCESS; | ||
| 2365 | } | ||
| 2366 | |||
| 2367 | out: | ||
| 2368 | __skb_push(skb, -skb_network_offset(skb)); | ||
| 2369 | return netif_receive_skb(skb); | ||
| 2370 | } | ||
| 2371 | |||
| 2372 | void napi_gro_flush(struct napi_struct *napi) | ||
| 2373 | { | ||
| 2374 | struct sk_buff *skb, *next; | ||
| 2375 | |||
| 2376 | for (skb = napi->gro_list; skb; skb = next) { | ||
| 2377 | next = skb->next; | ||
| 2378 | skb->next = NULL; | ||
| 2379 | napi_gro_complete(skb); | ||
| 2380 | } | ||
| 2381 | |||
| 2382 | napi->gro_list = NULL; | ||
| 2383 | } | ||
| 2384 | EXPORT_SYMBOL(napi_gro_flush); | ||
| 2385 | |||
| 2386 | int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | ||
| 2387 | { | ||
| 2388 | struct sk_buff **pp = NULL; | ||
| 2389 | struct packet_type *ptype; | ||
| 2390 | __be16 type = skb->protocol; | ||
| 2391 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; | ||
| 2392 | int count = 0; | ||
| 2393 | int mac_len; | ||
| 2394 | |||
| 2395 | if (!(skb->dev->features & NETIF_F_GRO)) | ||
| 2396 | goto normal; | ||
| 2397 | |||
| 2398 | rcu_read_lock(); | ||
| 2399 | list_for_each_entry_rcu(ptype, head, list) { | ||
| 2400 | struct sk_buff *p; | ||
| 2401 | |||
| 2402 | if (ptype->type != type || ptype->dev || !ptype->gro_receive) | ||
| 2403 | continue; | ||
| 2404 | |||
| 2405 | skb_reset_network_header(skb); | ||
| 2406 | mac_len = skb->network_header - skb->mac_header; | ||
| 2407 | skb->mac_len = mac_len; | ||
| 2408 | NAPI_GRO_CB(skb)->same_flow = 0; | ||
| 2409 | NAPI_GRO_CB(skb)->flush = 0; | ||
| 2410 | |||
| 2411 | for (p = napi->gro_list; p; p = p->next) { | ||
| 2412 | count++; | ||
| 2413 | NAPI_GRO_CB(p)->same_flow = | ||
| 2414 | p->mac_len == mac_len && | ||
| 2415 | !memcmp(skb_mac_header(p), skb_mac_header(skb), | ||
| 2416 | mac_len); | ||
| 2417 | NAPI_GRO_CB(p)->flush = 0; | ||
| 2418 | } | ||
| 2419 | |||
| 2420 | pp = ptype->gro_receive(&napi->gro_list, skb); | ||
| 2421 | break; | ||
| 2422 | } | ||
| 2423 | rcu_read_unlock(); | ||
| 2424 | |||
| 2425 | if (&ptype->list == head) | ||
| 2426 | goto normal; | ||
| 2427 | |||
| 2428 | if (pp) { | ||
| 2429 | struct sk_buff *nskb = *pp; | ||
| 2430 | |||
| 2431 | *pp = nskb->next; | ||
| 2432 | nskb->next = NULL; | ||
| 2433 | napi_gro_complete(nskb); | ||
| 2434 | count--; | ||
| 2435 | } | ||
| 2436 | |||
| 2437 | if (NAPI_GRO_CB(skb)->same_flow) | ||
| 2438 | goto ok; | ||
| 2439 | |||
| 2440 | if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { | ||
| 2441 | __skb_push(skb, -skb_network_offset(skb)); | ||
| 2442 | goto normal; | ||
| 2443 | } | ||
| 2444 | |||
| 2445 | NAPI_GRO_CB(skb)->count = 1; | ||
| 2446 | skb->next = napi->gro_list; | ||
| 2447 | napi->gro_list = skb; | ||
| 2448 | |||
| 2449 | ok: | ||
| 2450 | return NET_RX_SUCCESS; | ||
| 2451 | |||
| 2452 | normal: | ||
| 2453 | return netif_receive_skb(skb); | ||
| 2454 | } | ||
| 2455 | EXPORT_SYMBOL(napi_gro_receive); | ||
| 2456 | |||
| 2338 | static int process_backlog(struct napi_struct *napi, int quota) | 2457 | static int process_backlog(struct napi_struct *napi, int quota) |
| 2339 | { | 2458 | { |
| 2340 | int work = 0; | 2459 | int work = 0; |
| @@ -2354,9 +2473,11 @@ static int process_backlog(struct napi_struct *napi, int quota) | |||
| 2354 | } | 2473 | } |
| 2355 | local_irq_enable(); | 2474 | local_irq_enable(); |
| 2356 | 2475 | ||
| 2357 | netif_receive_skb(skb); | 2476 | napi_gro_receive(napi, skb); |
| 2358 | } while (++work < quota && jiffies == start_time); | 2477 | } while (++work < quota && jiffies == start_time); |
| 2359 | 2478 | ||
| 2479 | napi_gro_flush(napi); | ||
| 2480 | |||
| 2360 | return work; | 2481 | return work; |
| 2361 | } | 2482 | } |
| 2362 | 2483 | ||
| @@ -2377,6 +2498,68 @@ void __napi_schedule(struct napi_struct *n) | |||
| 2377 | } | 2498 | } |
| 2378 | EXPORT_SYMBOL(__napi_schedule); | 2499 | EXPORT_SYMBOL(__napi_schedule); |
| 2379 | 2500 | ||
| 2501 | void __napi_complete(struct napi_struct *n) | ||
| 2502 | { | ||
| 2503 | BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); | ||
| 2504 | BUG_ON(n->gro_list); | ||
| 2505 | |||
| 2506 | list_del(&n->poll_list); | ||
| 2507 | smp_mb__before_clear_bit(); | ||
| 2508 | clear_bit(NAPI_STATE_SCHED, &n->state); | ||
| 2509 | } | ||
| 2510 | EXPORT_SYMBOL(__napi_complete); | ||
| 2511 | |||
| 2512 | void napi_complete(struct napi_struct *n) | ||
| 2513 | { | ||
| 2514 | unsigned long flags; | ||
| 2515 | |||
| 2516 | /* | ||
| 2517 | * don't let napi dequeue from the cpu poll list | ||
| 2518 | * just in case its running on a different cpu | ||
| 2519 | */ | ||
| 2520 | if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) | ||
| 2521 | return; | ||
| 2522 | |||
| 2523 | napi_gro_flush(n); | ||
| 2524 | local_irq_save(flags); | ||
| 2525 | __napi_complete(n); | ||
| 2526 | local_irq_restore(flags); | ||
| 2527 | } | ||
| 2528 | EXPORT_SYMBOL(napi_complete); | ||
| 2529 | |||
| 2530 | void netif_napi_add(struct net_device *dev, struct napi_struct *napi, | ||
| 2531 | int (*poll)(struct napi_struct *, int), int weight) | ||
| 2532 | { | ||
| 2533 | INIT_LIST_HEAD(&napi->poll_list); | ||
| 2534 | napi->gro_list = NULL; | ||
| 2535 | napi->poll = poll; | ||
| 2536 | napi->weight = weight; | ||
| 2537 | list_add(&napi->dev_list, &dev->napi_list); | ||
| 2538 | #ifdef CONFIG_NETPOLL | ||
| 2539 | napi->dev = dev; | ||
| 2540 | spin_lock_init(&napi->poll_lock); | ||
| 2541 | napi->poll_owner = -1; | ||
| 2542 | #endif | ||
| 2543 | set_bit(NAPI_STATE_SCHED, &napi->state); | ||
| 2544 | } | ||
| 2545 | EXPORT_SYMBOL(netif_napi_add); | ||
| 2546 | |||
| 2547 | void netif_napi_del(struct napi_struct *napi) | ||
| 2548 | { | ||
| 2549 | struct sk_buff *skb, *next; | ||
| 2550 | |||
| 2551 | list_del(&napi->dev_list); | ||
| 2552 | |||
| 2553 | for (skb = napi->gro_list; skb; skb = next) { | ||
| 2554 | next = skb->next; | ||
| 2555 | skb->next = NULL; | ||
| 2556 | kfree_skb(skb); | ||
| 2557 | } | ||
| 2558 | |||
| 2559 | napi->gro_list = NULL; | ||
| 2560 | } | ||
| 2561 | EXPORT_SYMBOL(netif_napi_del); | ||
| 2562 | |||
| 2380 | 2563 | ||
| 2381 | static void net_rx_action(struct softirq_action *h) | 2564 | static void net_rx_action(struct softirq_action *h) |
| 2382 | { | 2565 | { |
| @@ -4380,7 +4563,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
| 4380 | 4563 | ||
| 4381 | netdev_init_queues(dev); | 4564 | netdev_init_queues(dev); |
| 4382 | 4565 | ||
| 4383 | netpoll_netdev_init(dev); | 4566 | INIT_LIST_HEAD(&dev->napi_list); |
| 4384 | setup(dev); | 4567 | setup(dev); |
| 4385 | strcpy(dev->name, name); | 4568 | strcpy(dev->name, name); |
| 4386 | return dev; | 4569 | return dev; |
| @@ -4397,10 +4580,15 @@ EXPORT_SYMBOL(alloc_netdev_mq); | |||
| 4397 | */ | 4580 | */ |
| 4398 | void free_netdev(struct net_device *dev) | 4581 | void free_netdev(struct net_device *dev) |
| 4399 | { | 4582 | { |
| 4583 | struct napi_struct *p, *n; | ||
| 4584 | |||
| 4400 | release_net(dev_net(dev)); | 4585 | release_net(dev_net(dev)); |
| 4401 | 4586 | ||
| 4402 | kfree(dev->_tx); | 4587 | kfree(dev->_tx); |
| 4403 | 4588 | ||
| 4589 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) | ||
| 4590 | netif_napi_del(p); | ||
| 4591 | |||
| 4404 | /* Compatibility with error handling in drivers */ | 4592 | /* Compatibility with error handling in drivers */ |
| 4405 | if (dev->reg_state == NETREG_UNINITIALIZED) { | 4593 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
| 4406 | kfree((char *)dev - dev->padded); | 4594 | kfree((char *)dev - dev->padded); |
| @@ -4949,6 +5137,7 @@ static int __init net_dev_init(void) | |||
| 4949 | 5137 | ||
| 4950 | queue->backlog.poll = process_backlog; | 5138 | queue->backlog.poll = process_backlog; |
| 4951 | queue->backlog.weight = weight_p; | 5139 | queue->backlog.weight = weight_p; |
| 5140 | queue->backlog.gro_list = NULL; | ||
| 4952 | } | 5141 | } |
| 4953 | 5142 | ||
| 4954 | dev_boot_phase = 0; | 5143 | dev_boot_phase = 0; |
