diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 414 |
1 files changed, 355 insertions, 59 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index ae00b894e675..0ca95d5d7af0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -96,6 +96,7 @@ | |||
96 | #include <linux/skbuff.h> | 96 | #include <linux/skbuff.h> |
97 | #include <net/net_namespace.h> | 97 | #include <net/net_namespace.h> |
98 | #include <net/sock.h> | 98 | #include <net/sock.h> |
99 | #include <net/busy_poll.h> | ||
99 | #include <linux/rtnetlink.h> | 100 | #include <linux/rtnetlink.h> |
100 | #include <linux/stat.h> | 101 | #include <linux/stat.h> |
101 | #include <net/dst.h> | 102 | #include <net/dst.h> |
@@ -137,6 +138,7 @@ | |||
137 | #include <linux/errqueue.h> | 138 | #include <linux/errqueue.h> |
138 | #include <linux/hrtimer.h> | 139 | #include <linux/hrtimer.h> |
139 | #include <linux/netfilter_ingress.h> | 140 | #include <linux/netfilter_ingress.h> |
141 | #include <linux/sctp.h> | ||
140 | 142 | ||
141 | #include "net-sysfs.h" | 143 | #include "net-sysfs.h" |
142 | 144 | ||
@@ -182,8 +184,8 @@ EXPORT_SYMBOL(dev_base_lock); | |||
182 | /* protects napi_hash addition/deletion and napi_gen_id */ | 184 | /* protects napi_hash addition/deletion and napi_gen_id */ |
183 | static DEFINE_SPINLOCK(napi_hash_lock); | 185 | static DEFINE_SPINLOCK(napi_hash_lock); |
184 | 186 | ||
185 | static unsigned int napi_gen_id; | 187 | static unsigned int napi_gen_id = NR_CPUS; |
186 | static DEFINE_HASHTABLE(napi_hash, 8); | 188 | static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); |
187 | 189 | ||
188 | static seqcount_t devnet_rename_seq; | 190 | static seqcount_t devnet_rename_seq; |
189 | 191 | ||
@@ -1674,6 +1676,22 @@ void net_dec_ingress_queue(void) | |||
1674 | EXPORT_SYMBOL_GPL(net_dec_ingress_queue); | 1676 | EXPORT_SYMBOL_GPL(net_dec_ingress_queue); |
1675 | #endif | 1677 | #endif |
1676 | 1678 | ||
1679 | #ifdef CONFIG_NET_EGRESS | ||
1680 | static struct static_key egress_needed __read_mostly; | ||
1681 | |||
1682 | void net_inc_egress_queue(void) | ||
1683 | { | ||
1684 | static_key_slow_inc(&egress_needed); | ||
1685 | } | ||
1686 | EXPORT_SYMBOL_GPL(net_inc_egress_queue); | ||
1687 | |||
1688 | void net_dec_egress_queue(void) | ||
1689 | { | ||
1690 | static_key_slow_dec(&egress_needed); | ||
1691 | } | ||
1692 | EXPORT_SYMBOL_GPL(net_dec_egress_queue); | ||
1693 | #endif | ||
1694 | |||
1677 | static struct static_key netstamp_needed __read_mostly; | 1695 | static struct static_key netstamp_needed __read_mostly; |
1678 | #ifdef HAVE_JUMP_LABEL | 1696 | #ifdef HAVE_JUMP_LABEL |
1679 | /* We are not allowed to call static_key_slow_dec() from irq context | 1697 | /* We are not allowed to call static_key_slow_dec() from irq context |
@@ -2470,6 +2488,141 @@ out: | |||
2470 | } | 2488 | } |
2471 | EXPORT_SYMBOL(skb_checksum_help); | 2489 | EXPORT_SYMBOL(skb_checksum_help); |
2472 | 2490 | ||
2491 | /* skb_csum_offload_check - Driver helper function to determine if a device | ||
2492 | * with limited checksum offload capabilities is able to offload the checksum | ||
2493 | * for a given packet. | ||
2494 | * | ||
2495 | * Arguments: | ||
2496 | * skb - sk_buff for the packet in question | ||
2497 | * spec - contains the description of what device can offload | ||
2498 | * csum_encapped - returns true if the checksum being offloaded is | ||
2499 | * encpasulated. That is it is checksum for the transport header | ||
2500 | * in the inner headers. | ||
2501 | * checksum_help - when set indicates that helper function should | ||
2502 | * call skb_checksum_help if offload checks fail | ||
2503 | * | ||
2504 | * Returns: | ||
2505 | * true: Packet has passed the checksum checks and should be offloadable to | ||
2506 | * the device (a driver may still need to check for additional | ||
2507 | * restrictions of its device) | ||
2508 | * false: Checksum is not offloadable. If checksum_help was set then | ||
2509 | * skb_checksum_help was called to resolve checksum for non-GSO | ||
2510 | * packets and when IP protocol is not SCTP | ||
2511 | */ | ||
2512 | bool __skb_csum_offload_chk(struct sk_buff *skb, | ||
2513 | const struct skb_csum_offl_spec *spec, | ||
2514 | bool *csum_encapped, | ||
2515 | bool csum_help) | ||
2516 | { | ||
2517 | struct iphdr *iph; | ||
2518 | struct ipv6hdr *ipv6; | ||
2519 | void *nhdr; | ||
2520 | int protocol; | ||
2521 | u8 ip_proto; | ||
2522 | |||
2523 | if (skb->protocol == htons(ETH_P_8021Q) || | ||
2524 | skb->protocol == htons(ETH_P_8021AD)) { | ||
2525 | if (!spec->vlan_okay) | ||
2526 | goto need_help; | ||
2527 | } | ||
2528 | |||
2529 | /* We check whether the checksum refers to a transport layer checksum in | ||
2530 | * the outermost header or an encapsulated transport layer checksum that | ||
2531 | * corresponds to the inner headers of the skb. If the checksum is for | ||
2532 | * something else in the packet we need help. | ||
2533 | */ | ||
2534 | if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) { | ||
2535 | /* Non-encapsulated checksum */ | ||
2536 | protocol = eproto_to_ipproto(vlan_get_protocol(skb)); | ||
2537 | nhdr = skb_network_header(skb); | ||
2538 | *csum_encapped = false; | ||
2539 | if (spec->no_not_encapped) | ||
2540 | goto need_help; | ||
2541 | } else if (skb->encapsulation && spec->encap_okay && | ||
2542 | skb_checksum_start_offset(skb) == | ||
2543 | skb_inner_transport_offset(skb)) { | ||
2544 | /* Encapsulated checksum */ | ||
2545 | *csum_encapped = true; | ||
2546 | switch (skb->inner_protocol_type) { | ||
2547 | case ENCAP_TYPE_ETHER: | ||
2548 | protocol = eproto_to_ipproto(skb->inner_protocol); | ||
2549 | break; | ||
2550 | case ENCAP_TYPE_IPPROTO: | ||
2551 | protocol = skb->inner_protocol; | ||
2552 | break; | ||
2553 | } | ||
2554 | nhdr = skb_inner_network_header(skb); | ||
2555 | } else { | ||
2556 | goto need_help; | ||
2557 | } | ||
2558 | |||
2559 | switch (protocol) { | ||
2560 | case IPPROTO_IP: | ||
2561 | if (!spec->ipv4_okay) | ||
2562 | goto need_help; | ||
2563 | iph = nhdr; | ||
2564 | ip_proto = iph->protocol; | ||
2565 | if (iph->ihl != 5 && !spec->ip_options_okay) | ||
2566 | goto need_help; | ||
2567 | break; | ||
2568 | case IPPROTO_IPV6: | ||
2569 | if (!spec->ipv6_okay) | ||
2570 | goto need_help; | ||
2571 | if (spec->no_encapped_ipv6 && *csum_encapped) | ||
2572 | goto need_help; | ||
2573 | ipv6 = nhdr; | ||
2574 | nhdr += sizeof(*ipv6); | ||
2575 | ip_proto = ipv6->nexthdr; | ||
2576 | break; | ||
2577 | default: | ||
2578 | goto need_help; | ||
2579 | } | ||
2580 | |||
2581 | ip_proto_again: | ||
2582 | switch (ip_proto) { | ||
2583 | case IPPROTO_TCP: | ||
2584 | if (!spec->tcp_okay || | ||
2585 | skb->csum_offset != offsetof(struct tcphdr, check)) | ||
2586 | goto need_help; | ||
2587 | break; | ||
2588 | case IPPROTO_UDP: | ||
2589 | if (!spec->udp_okay || | ||
2590 | skb->csum_offset != offsetof(struct udphdr, check)) | ||
2591 | goto need_help; | ||
2592 | break; | ||
2593 | case IPPROTO_SCTP: | ||
2594 | if (!spec->sctp_okay || | ||
2595 | skb->csum_offset != offsetof(struct sctphdr, checksum)) | ||
2596 | goto cant_help; | ||
2597 | break; | ||
2598 | case NEXTHDR_HOP: | ||
2599 | case NEXTHDR_ROUTING: | ||
2600 | case NEXTHDR_DEST: { | ||
2601 | u8 *opthdr = nhdr; | ||
2602 | |||
2603 | if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay) | ||
2604 | goto need_help; | ||
2605 | |||
2606 | ip_proto = opthdr[0]; | ||
2607 | nhdr += (opthdr[1] + 1) << 3; | ||
2608 | |||
2609 | goto ip_proto_again; | ||
2610 | } | ||
2611 | default: | ||
2612 | goto need_help; | ||
2613 | } | ||
2614 | |||
2615 | /* Passed the tests for offloading checksum */ | ||
2616 | return true; | ||
2617 | |||
2618 | need_help: | ||
2619 | if (csum_help && !skb_shinfo(skb)->gso_size) | ||
2620 | skb_checksum_help(skb); | ||
2621 | cant_help: | ||
2622 | return false; | ||
2623 | } | ||
2624 | EXPORT_SYMBOL(__skb_csum_offload_chk); | ||
2625 | |||
2473 | __be16 skb_network_protocol(struct sk_buff *skb, int *depth) | 2626 | __be16 skb_network_protocol(struct sk_buff *skb, int *depth) |
2474 | { | 2627 | { |
2475 | __be16 type = skb->protocol; | 2628 | __be16 type = skb->protocol; |
@@ -2644,7 +2797,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, | |||
2644 | 2797 | ||
2645 | if (skb->ip_summed != CHECKSUM_NONE && | 2798 | if (skb->ip_summed != CHECKSUM_NONE && |
2646 | !can_checksum_protocol(features, type)) { | 2799 | !can_checksum_protocol(features, type)) { |
2647 | features &= ~NETIF_F_ALL_CSUM; | 2800 | features &= ~NETIF_F_CSUM_MASK; |
2648 | } else if (illegal_highdma(skb->dev, skb)) { | 2801 | } else if (illegal_highdma(skb->dev, skb)) { |
2649 | features &= ~NETIF_F_SG; | 2802 | features &= ~NETIF_F_SG; |
2650 | } | 2803 | } |
@@ -2791,7 +2944,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device | |||
2791 | else | 2944 | else |
2792 | skb_set_transport_header(skb, | 2945 | skb_set_transport_header(skb, |
2793 | skb_checksum_start_offset(skb)); | 2946 | skb_checksum_start_offset(skb)); |
2794 | if (!(features & NETIF_F_ALL_CSUM) && | 2947 | if (!(features & NETIF_F_CSUM_MASK) && |
2795 | skb_checksum_help(skb)) | 2948 | skb_checksum_help(skb)) |
2796 | goto out_kfree_skb; | 2949 | goto out_kfree_skb; |
2797 | } | 2950 | } |
@@ -2870,7 +3023,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2870 | bool contended; | 3023 | bool contended; |
2871 | int rc; | 3024 | int rc; |
2872 | 3025 | ||
2873 | qdisc_pkt_len_init(skb); | ||
2874 | qdisc_calculate_pkt_len(skb, q); | 3026 | qdisc_calculate_pkt_len(skb, q); |
2875 | /* | 3027 | /* |
2876 | * Heuristic to force contended enqueues to serialize on a | 3028 | * Heuristic to force contended enqueues to serialize on a |
@@ -2928,7 +3080,8 @@ static void skb_update_prio(struct sk_buff *skb) | |||
2928 | struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); | 3080 | struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); |
2929 | 3081 | ||
2930 | if (!skb->priority && skb->sk && map) { | 3082 | if (!skb->priority && skb->sk && map) { |
2931 | unsigned int prioidx = skb->sk->sk_cgrp_prioidx; | 3083 | unsigned int prioidx = |
3084 | sock_cgroup_prioidx(&skb->sk->sk_cgrp_data); | ||
2932 | 3085 | ||
2933 | if (prioidx < map->priomap_len) | 3086 | if (prioidx < map->priomap_len) |
2934 | skb->priority = map->priomap[prioidx]; | 3087 | skb->priority = map->priomap[prioidx]; |
@@ -2962,6 +3115,49 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) | |||
2962 | } | 3115 | } |
2963 | EXPORT_SYMBOL(dev_loopback_xmit); | 3116 | EXPORT_SYMBOL(dev_loopback_xmit); |
2964 | 3117 | ||
3118 | #ifdef CONFIG_NET_EGRESS | ||
3119 | static struct sk_buff * | ||
3120 | sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) | ||
3121 | { | ||
3122 | struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list); | ||
3123 | struct tcf_result cl_res; | ||
3124 | |||
3125 | if (!cl) | ||
3126 | return skb; | ||
3127 | |||
3128 | /* skb->tc_verd and qdisc_skb_cb(skb)->pkt_len were already set | ||
3129 | * earlier by the caller. | ||
3130 | */ | ||
3131 | qdisc_bstats_cpu_update(cl->q, skb); | ||
3132 | |||
3133 | switch (tc_classify(skb, cl, &cl_res, false)) { | ||
3134 | case TC_ACT_OK: | ||
3135 | case TC_ACT_RECLASSIFY: | ||
3136 | skb->tc_index = TC_H_MIN(cl_res.classid); | ||
3137 | break; | ||
3138 | case TC_ACT_SHOT: | ||
3139 | qdisc_qstats_cpu_drop(cl->q); | ||
3140 | *ret = NET_XMIT_DROP; | ||
3141 | goto drop; | ||
3142 | case TC_ACT_STOLEN: | ||
3143 | case TC_ACT_QUEUED: | ||
3144 | *ret = NET_XMIT_SUCCESS; | ||
3145 | drop: | ||
3146 | kfree_skb(skb); | ||
3147 | return NULL; | ||
3148 | case TC_ACT_REDIRECT: | ||
3149 | /* No need to push/pop skb's mac_header here on egress! */ | ||
3150 | skb_do_redirect(skb); | ||
3151 | *ret = NET_XMIT_SUCCESS; | ||
3152 | return NULL; | ||
3153 | default: | ||
3154 | break; | ||
3155 | } | ||
3156 | |||
3157 | return skb; | ||
3158 | } | ||
3159 | #endif /* CONFIG_NET_EGRESS */ | ||
3160 | |||
2965 | static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) | 3161 | static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) |
2966 | { | 3162 | { |
2967 | #ifdef CONFIG_XPS | 3163 | #ifdef CONFIG_XPS |
@@ -3021,7 +3217,9 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, | |||
3021 | int queue_index = 0; | 3217 | int queue_index = 0; |
3022 | 3218 | ||
3023 | #ifdef CONFIG_XPS | 3219 | #ifdef CONFIG_XPS |
3024 | if (skb->sender_cpu == 0) | 3220 | u32 sender_cpu = skb->sender_cpu - 1; |
3221 | |||
3222 | if (sender_cpu >= (u32)NR_CPUS) | ||
3025 | skb->sender_cpu = raw_smp_processor_id() + 1; | 3223 | skb->sender_cpu = raw_smp_processor_id() + 1; |
3026 | #endif | 3224 | #endif |
3027 | 3225 | ||
@@ -3086,6 +3284,17 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) | |||
3086 | 3284 | ||
3087 | skb_update_prio(skb); | 3285 | skb_update_prio(skb); |
3088 | 3286 | ||
3287 | qdisc_pkt_len_init(skb); | ||
3288 | #ifdef CONFIG_NET_CLS_ACT | ||
3289 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); | ||
3290 | # ifdef CONFIG_NET_EGRESS | ||
3291 | if (static_key_false(&egress_needed)) { | ||
3292 | skb = sch_handle_egress(skb, &rc, dev); | ||
3293 | if (!skb) | ||
3294 | goto out; | ||
3295 | } | ||
3296 | # endif | ||
3297 | #endif | ||
3089 | /* If device/qdisc don't need skb->dst, release it right now while | 3298 | /* If device/qdisc don't need skb->dst, release it right now while |
3090 | * its hot in this cpu cache. | 3299 | * its hot in this cpu cache. |
3091 | */ | 3300 | */ |
@@ -3107,9 +3316,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) | |||
3107 | txq = netdev_pick_tx(dev, skb, accel_priv); | 3316 | txq = netdev_pick_tx(dev, skb, accel_priv); |
3108 | q = rcu_dereference_bh(txq->qdisc); | 3317 | q = rcu_dereference_bh(txq->qdisc); |
3109 | 3318 | ||
3110 | #ifdef CONFIG_NET_CLS_ACT | ||
3111 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); | ||
3112 | #endif | ||
3113 | trace_net_dev_queue(skb); | 3319 | trace_net_dev_queue(skb); |
3114 | if (q->enqueue) { | 3320 | if (q->enqueue) { |
3115 | rc = __dev_xmit_skb(skb, q, dev, txq); | 3321 | rc = __dev_xmit_skb(skb, q, dev, txq); |
@@ -3666,9 +3872,9 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev, | |||
3666 | EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); | 3872 | EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); |
3667 | #endif | 3873 | #endif |
3668 | 3874 | ||
3669 | static inline struct sk_buff *handle_ing(struct sk_buff *skb, | 3875 | static inline struct sk_buff * |
3670 | struct packet_type **pt_prev, | 3876 | sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, |
3671 | int *ret, struct net_device *orig_dev) | 3877 | struct net_device *orig_dev) |
3672 | { | 3878 | { |
3673 | #ifdef CONFIG_NET_CLS_ACT | 3879 | #ifdef CONFIG_NET_CLS_ACT |
3674 | struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); | 3880 | struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); |
@@ -3862,7 +4068,7 @@ another_round: | |||
3862 | skip_taps: | 4068 | skip_taps: |
3863 | #ifdef CONFIG_NET_INGRESS | 4069 | #ifdef CONFIG_NET_INGRESS |
3864 | if (static_key_false(&ingress_needed)) { | 4070 | if (static_key_false(&ingress_needed)) { |
3865 | skb = handle_ing(skb, &pt_prev, &ret, orig_dev); | 4071 | skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev); |
3866 | if (!skb) | 4072 | if (!skb) |
3867 | goto out; | 4073 | goto out; |
3868 | 4074 | ||
@@ -4353,6 +4559,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) | |||
4353 | 4559 | ||
4354 | gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 4560 | gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
4355 | { | 4561 | { |
4562 | skb_mark_napi_id(skb, napi); | ||
4356 | trace_napi_gro_receive_entry(skb); | 4563 | trace_napi_gro_receive_entry(skb); |
4357 | 4564 | ||
4358 | skb_gro_reset_offset(skb); | 4565 | skb_gro_reset_offset(skb); |
@@ -4386,7 +4593,10 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) | |||
4386 | 4593 | ||
4387 | if (!skb) { | 4594 | if (!skb) { |
4388 | skb = napi_alloc_skb(napi, GRO_MAX_HEAD); | 4595 | skb = napi_alloc_skb(napi, GRO_MAX_HEAD); |
4389 | napi->skb = skb; | 4596 | if (skb) { |
4597 | napi->skb = skb; | ||
4598 | skb_mark_napi_id(skb, napi); | ||
4599 | } | ||
4390 | } | 4600 | } |
4391 | return skb; | 4601 | return skb; |
4392 | } | 4602 | } |
@@ -4661,7 +4871,7 @@ void napi_complete_done(struct napi_struct *n, int work_done) | |||
4661 | EXPORT_SYMBOL(napi_complete_done); | 4871 | EXPORT_SYMBOL(napi_complete_done); |
4662 | 4872 | ||
4663 | /* must be called under rcu_read_lock(), as we dont take a reference */ | 4873 | /* must be called under rcu_read_lock(), as we dont take a reference */ |
4664 | struct napi_struct *napi_by_id(unsigned int napi_id) | 4874 | static struct napi_struct *napi_by_id(unsigned int napi_id) |
4665 | { | 4875 | { |
4666 | unsigned int hash = napi_id % HASH_SIZE(napi_hash); | 4876 | unsigned int hash = napi_id % HASH_SIZE(napi_hash); |
4667 | struct napi_struct *napi; | 4877 | struct napi_struct *napi; |
@@ -4672,43 +4882,101 @@ struct napi_struct *napi_by_id(unsigned int napi_id) | |||
4672 | 4882 | ||
4673 | return NULL; | 4883 | return NULL; |
4674 | } | 4884 | } |
4675 | EXPORT_SYMBOL_GPL(napi_by_id); | ||
4676 | 4885 | ||
4677 | void napi_hash_add(struct napi_struct *napi) | 4886 | #if defined(CONFIG_NET_RX_BUSY_POLL) |
4887 | #define BUSY_POLL_BUDGET 8 | ||
4888 | bool sk_busy_loop(struct sock *sk, int nonblock) | ||
4678 | { | 4889 | { |
4679 | if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) { | 4890 | unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; |
4891 | int (*busy_poll)(struct napi_struct *dev); | ||
4892 | struct napi_struct *napi; | ||
4893 | int rc = false; | ||
4680 | 4894 | ||
4681 | spin_lock(&napi_hash_lock); | 4895 | rcu_read_lock(); |
4682 | 4896 | ||
4683 | /* 0 is not a valid id, we also skip an id that is taken | 4897 | napi = napi_by_id(sk->sk_napi_id); |
4684 | * we expect both events to be extremely rare | 4898 | if (!napi) |
4685 | */ | 4899 | goto out; |
4686 | napi->napi_id = 0; | 4900 | |
4687 | while (!napi->napi_id) { | 4901 | /* Note: ndo_busy_poll method is optional in linux-4.5 */ |
4688 | napi->napi_id = ++napi_gen_id; | 4902 | busy_poll = napi->dev->netdev_ops->ndo_busy_poll; |
4689 | if (napi_by_id(napi->napi_id)) | 4903 | |
4690 | napi->napi_id = 0; | 4904 | do { |
4905 | rc = 0; | ||
4906 | local_bh_disable(); | ||
4907 | if (busy_poll) { | ||
4908 | rc = busy_poll(napi); | ||
4909 | } else if (napi_schedule_prep(napi)) { | ||
4910 | void *have = netpoll_poll_lock(napi); | ||
4911 | |||
4912 | if (test_bit(NAPI_STATE_SCHED, &napi->state)) { | ||
4913 | rc = napi->poll(napi, BUSY_POLL_BUDGET); | ||
4914 | trace_napi_poll(napi); | ||
4915 | if (rc == BUSY_POLL_BUDGET) { | ||
4916 | napi_complete_done(napi, rc); | ||
4917 | napi_schedule(napi); | ||
4918 | } | ||
4919 | } | ||
4920 | netpoll_poll_unlock(have); | ||
4691 | } | 4921 | } |
4922 | if (rc > 0) | ||
4923 | NET_ADD_STATS_BH(sock_net(sk), | ||
4924 | LINUX_MIB_BUSYPOLLRXPACKETS, rc); | ||
4925 | local_bh_enable(); | ||
4692 | 4926 | ||
4693 | hlist_add_head_rcu(&napi->napi_hash_node, | 4927 | if (rc == LL_FLUSH_FAILED) |
4694 | &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); | 4928 | break; /* permanent failure */ |
4695 | 4929 | ||
4696 | spin_unlock(&napi_hash_lock); | 4930 | cpu_relax(); |
4697 | } | 4931 | } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && |
4932 | !need_resched() && !busy_loop_timeout(end_time)); | ||
4933 | |||
4934 | rc = !skb_queue_empty(&sk->sk_receive_queue); | ||
4935 | out: | ||
4936 | rcu_read_unlock(); | ||
4937 | return rc; | ||
4938 | } | ||
4939 | EXPORT_SYMBOL(sk_busy_loop); | ||
4940 | |||
4941 | #endif /* CONFIG_NET_RX_BUSY_POLL */ | ||
4942 | |||
4943 | void napi_hash_add(struct napi_struct *napi) | ||
4944 | { | ||
4945 | if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || | ||
4946 | test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) | ||
4947 | return; | ||
4948 | |||
4949 | spin_lock(&napi_hash_lock); | ||
4950 | |||
4951 | /* 0..NR_CPUS+1 range is reserved for sender_cpu use */ | ||
4952 | do { | ||
4953 | if (unlikely(++napi_gen_id < NR_CPUS + 1)) | ||
4954 | napi_gen_id = NR_CPUS + 1; | ||
4955 | } while (napi_by_id(napi_gen_id)); | ||
4956 | napi->napi_id = napi_gen_id; | ||
4957 | |||
4958 | hlist_add_head_rcu(&napi->napi_hash_node, | ||
4959 | &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); | ||
4960 | |||
4961 | spin_unlock(&napi_hash_lock); | ||
4698 | } | 4962 | } |
4699 | EXPORT_SYMBOL_GPL(napi_hash_add); | 4963 | EXPORT_SYMBOL_GPL(napi_hash_add); |
4700 | 4964 | ||
4701 | /* Warning : caller is responsible to make sure rcu grace period | 4965 | /* Warning : caller is responsible to make sure rcu grace period |
4702 | * is respected before freeing memory containing @napi | 4966 | * is respected before freeing memory containing @napi |
4703 | */ | 4967 | */ |
4704 | void napi_hash_del(struct napi_struct *napi) | 4968 | bool napi_hash_del(struct napi_struct *napi) |
4705 | { | 4969 | { |
4970 | bool rcu_sync_needed = false; | ||
4971 | |||
4706 | spin_lock(&napi_hash_lock); | 4972 | spin_lock(&napi_hash_lock); |
4707 | 4973 | ||
4708 | if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) | 4974 | if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) { |
4975 | rcu_sync_needed = true; | ||
4709 | hlist_del_rcu(&napi->napi_hash_node); | 4976 | hlist_del_rcu(&napi->napi_hash_node); |
4710 | 4977 | } | |
4711 | spin_unlock(&napi_hash_lock); | 4978 | spin_unlock(&napi_hash_lock); |
4979 | return rcu_sync_needed; | ||
4712 | } | 4980 | } |
4713 | EXPORT_SYMBOL_GPL(napi_hash_del); | 4981 | EXPORT_SYMBOL_GPL(napi_hash_del); |
4714 | 4982 | ||
@@ -4744,6 +5012,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, | |||
4744 | napi->poll_owner = -1; | 5012 | napi->poll_owner = -1; |
4745 | #endif | 5013 | #endif |
4746 | set_bit(NAPI_STATE_SCHED, &napi->state); | 5014 | set_bit(NAPI_STATE_SCHED, &napi->state); |
5015 | napi_hash_add(napi); | ||
4747 | } | 5016 | } |
4748 | EXPORT_SYMBOL(netif_napi_add); | 5017 | EXPORT_SYMBOL(netif_napi_add); |
4749 | 5018 | ||
@@ -4763,8 +5032,12 @@ void napi_disable(struct napi_struct *n) | |||
4763 | } | 5032 | } |
4764 | EXPORT_SYMBOL(napi_disable); | 5033 | EXPORT_SYMBOL(napi_disable); |
4765 | 5034 | ||
5035 | /* Must be called in process context */ | ||
4766 | void netif_napi_del(struct napi_struct *napi) | 5036 | void netif_napi_del(struct napi_struct *napi) |
4767 | { | 5037 | { |
5038 | might_sleep(); | ||
5039 | if (napi_hash_del(napi)) | ||
5040 | synchronize_net(); | ||
4768 | list_del_init(&napi->dev_list); | 5041 | list_del_init(&napi->dev_list); |
4769 | napi_free_frags(napi); | 5042 | napi_free_frags(napi); |
4770 | 5043 | ||
@@ -5351,7 +5624,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, | |||
5351 | 5624 | ||
5352 | static int __netdev_upper_dev_link(struct net_device *dev, | 5625 | static int __netdev_upper_dev_link(struct net_device *dev, |
5353 | struct net_device *upper_dev, bool master, | 5626 | struct net_device *upper_dev, bool master, |
5354 | void *private) | 5627 | void *upper_priv, void *upper_info) |
5355 | { | 5628 | { |
5356 | struct netdev_notifier_changeupper_info changeupper_info; | 5629 | struct netdev_notifier_changeupper_info changeupper_info; |
5357 | struct netdev_adjacent *i, *j, *to_i, *to_j; | 5630 | struct netdev_adjacent *i, *j, *to_i, *to_j; |
@@ -5375,6 +5648,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, | |||
5375 | changeupper_info.upper_dev = upper_dev; | 5648 | changeupper_info.upper_dev = upper_dev; |
5376 | changeupper_info.master = master; | 5649 | changeupper_info.master = master; |
5377 | changeupper_info.linking = true; | 5650 | changeupper_info.linking = true; |
5651 | changeupper_info.upper_info = upper_info; | ||
5378 | 5652 | ||
5379 | ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, | 5653 | ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, |
5380 | &changeupper_info.info); | 5654 | &changeupper_info.info); |
@@ -5382,7 +5656,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, | |||
5382 | if (ret) | 5656 | if (ret) |
5383 | return ret; | 5657 | return ret; |
5384 | 5658 | ||
5385 | ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, | 5659 | ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv, |
5386 | master); | 5660 | master); |
5387 | if (ret) | 5661 | if (ret) |
5388 | return ret; | 5662 | return ret; |
@@ -5420,8 +5694,12 @@ static int __netdev_upper_dev_link(struct net_device *dev, | |||
5420 | goto rollback_lower_mesh; | 5694 | goto rollback_lower_mesh; |
5421 | } | 5695 | } |
5422 | 5696 | ||
5423 | call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, | 5697 | ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, |
5424 | &changeupper_info.info); | 5698 | &changeupper_info.info); |
5699 | ret = notifier_to_errno(ret); | ||
5700 | if (ret) | ||
5701 | goto rollback_lower_mesh; | ||
5702 | |||
5425 | return 0; | 5703 | return 0; |
5426 | 5704 | ||
5427 | rollback_lower_mesh: | 5705 | rollback_lower_mesh: |
@@ -5475,7 +5753,7 @@ rollback_mesh: | |||
5475 | int netdev_upper_dev_link(struct net_device *dev, | 5753 | int netdev_upper_dev_link(struct net_device *dev, |
5476 | struct net_device *upper_dev) | 5754 | struct net_device *upper_dev) |
5477 | { | 5755 | { |
5478 | return __netdev_upper_dev_link(dev, upper_dev, false, NULL); | 5756 | return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL); |
5479 | } | 5757 | } |
5480 | EXPORT_SYMBOL(netdev_upper_dev_link); | 5758 | EXPORT_SYMBOL(netdev_upper_dev_link); |
5481 | 5759 | ||
@@ -5483,6 +5761,8 @@ EXPORT_SYMBOL(netdev_upper_dev_link); | |||
5483 | * netdev_master_upper_dev_link - Add a master link to the upper device | 5761 | * netdev_master_upper_dev_link - Add a master link to the upper device |
5484 | * @dev: device | 5762 | * @dev: device |
5485 | * @upper_dev: new upper device | 5763 | * @upper_dev: new upper device |
5764 | * @upper_priv: upper device private | ||
5765 | * @upper_info: upper info to be passed down via notifier | ||
5486 | * | 5766 | * |
5487 | * Adds a link to device which is upper to this one. In this case, only | 5767 | * Adds a link to device which is upper to this one. In this case, only |
5488 | * one master upper device can be linked, although other non-master devices | 5768 | * one master upper device can be linked, although other non-master devices |
@@ -5491,20 +5771,14 @@ EXPORT_SYMBOL(netdev_upper_dev_link); | |||
5491 | * counts are adjusted and the function returns zero. | 5771 | * counts are adjusted and the function returns zero. |
5492 | */ | 5772 | */ |
5493 | int netdev_master_upper_dev_link(struct net_device *dev, | 5773 | int netdev_master_upper_dev_link(struct net_device *dev, |
5494 | struct net_device *upper_dev) | 5774 | struct net_device *upper_dev, |
5775 | void *upper_priv, void *upper_info) | ||
5495 | { | 5776 | { |
5496 | return __netdev_upper_dev_link(dev, upper_dev, true, NULL); | 5777 | return __netdev_upper_dev_link(dev, upper_dev, true, |
5778 | upper_priv, upper_info); | ||
5497 | } | 5779 | } |
5498 | EXPORT_SYMBOL(netdev_master_upper_dev_link); | 5780 | EXPORT_SYMBOL(netdev_master_upper_dev_link); |
5499 | 5781 | ||
5500 | int netdev_master_upper_dev_link_private(struct net_device *dev, | ||
5501 | struct net_device *upper_dev, | ||
5502 | void *private) | ||
5503 | { | ||
5504 | return __netdev_upper_dev_link(dev, upper_dev, true, private); | ||
5505 | } | ||
5506 | EXPORT_SYMBOL(netdev_master_upper_dev_link_private); | ||
5507 | |||
5508 | /** | 5782 | /** |
5509 | * netdev_upper_dev_unlink - Removes a link to upper device | 5783 | * netdev_upper_dev_unlink - Removes a link to upper device |
5510 | * @dev: device | 5784 | * @dev: device |
@@ -5663,7 +5937,7 @@ EXPORT_SYMBOL(netdev_lower_dev_get_private); | |||
5663 | 5937 | ||
5664 | 5938 | ||
5665 | int dev_get_nest_level(struct net_device *dev, | 5939 | int dev_get_nest_level(struct net_device *dev, |
5666 | bool (*type_check)(struct net_device *dev)) | 5940 | bool (*type_check)(const struct net_device *dev)) |
5667 | { | 5941 | { |
5668 | struct net_device *lower = NULL; | 5942 | struct net_device *lower = NULL; |
5669 | struct list_head *iter; | 5943 | struct list_head *iter; |
@@ -5685,6 +5959,26 @@ int dev_get_nest_level(struct net_device *dev, | |||
5685 | } | 5959 | } |
5686 | EXPORT_SYMBOL(dev_get_nest_level); | 5960 | EXPORT_SYMBOL(dev_get_nest_level); |
5687 | 5961 | ||
5962 | /** | ||
5963 | * netdev_lower_change - Dispatch event about lower device state change | ||
5964 | * @lower_dev: device | ||
5965 | * @lower_state_info: state to dispatch | ||
5966 | * | ||
5967 | * Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info. | ||
5968 | * The caller must hold the RTNL lock. | ||
5969 | */ | ||
5970 | void netdev_lower_state_changed(struct net_device *lower_dev, | ||
5971 | void *lower_state_info) | ||
5972 | { | ||
5973 | struct netdev_notifier_changelowerstate_info changelowerstate_info; | ||
5974 | |||
5975 | ASSERT_RTNL(); | ||
5976 | changelowerstate_info.lower_state_info = lower_state_info; | ||
5977 | call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev, | ||
5978 | &changelowerstate_info.info); | ||
5979 | } | ||
5980 | EXPORT_SYMBOL(netdev_lower_state_changed); | ||
5981 | |||
5688 | static void dev_change_rx_flags(struct net_device *dev, int flags) | 5982 | static void dev_change_rx_flags(struct net_device *dev, int flags) |
5689 | { | 5983 | { |
5690 | const struct net_device_ops *ops = dev->netdev_ops; | 5984 | const struct net_device_ops *ops = dev->netdev_ops; |
@@ -6375,9 +6669,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, | |||
6375 | /* UFO needs SG and checksumming */ | 6669 | /* UFO needs SG and checksumming */ |
6376 | if (features & NETIF_F_UFO) { | 6670 | if (features & NETIF_F_UFO) { |
6377 | /* maybe split UFO into V4 and V6? */ | 6671 | /* maybe split UFO into V4 and V6? */ |
6378 | if (!((features & NETIF_F_GEN_CSUM) || | 6672 | if (!(features & NETIF_F_HW_CSUM) && |
6379 | (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) | 6673 | ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) != |
6380 | == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | 6674 | (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) { |
6381 | netdev_dbg(dev, | 6675 | netdev_dbg(dev, |
6382 | "Dropping NETIF_F_UFO since no checksum offload features.\n"); | 6676 | "Dropping NETIF_F_UFO since no checksum offload features.\n"); |
6383 | features &= ~NETIF_F_UFO; | 6677 | features &= ~NETIF_F_UFO; |
@@ -7164,11 +7458,13 @@ EXPORT_SYMBOL(alloc_netdev_mqs); | |||
7164 | * This function does the last stage of destroying an allocated device | 7458 | * This function does the last stage of destroying an allocated device |
7165 | * interface. The reference to the device object is released. | 7459 | * interface. The reference to the device object is released. |
7166 | * If this is the last reference then it will be freed. | 7460 | * If this is the last reference then it will be freed. |
7461 | * Must be called in process context. | ||
7167 | */ | 7462 | */ |
7168 | void free_netdev(struct net_device *dev) | 7463 | void free_netdev(struct net_device *dev) |
7169 | { | 7464 | { |
7170 | struct napi_struct *p, *n; | 7465 | struct napi_struct *p, *n; |
7171 | 7466 | ||
7467 | might_sleep(); | ||
7172 | netif_free_tx_queues(dev); | 7468 | netif_free_tx_queues(dev); |
7173 | #ifdef CONFIG_SYSFS | 7469 | #ifdef CONFIG_SYSFS |
7174 | kvfree(dev->_rx); | 7470 | kvfree(dev->_rx); |
@@ -7477,16 +7773,16 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
7477 | netdev_features_t netdev_increment_features(netdev_features_t all, | 7773 | netdev_features_t netdev_increment_features(netdev_features_t all, |
7478 | netdev_features_t one, netdev_features_t mask) | 7774 | netdev_features_t one, netdev_features_t mask) |
7479 | { | 7775 | { |
7480 | if (mask & NETIF_F_GEN_CSUM) | 7776 | if (mask & NETIF_F_HW_CSUM) |
7481 | mask |= NETIF_F_ALL_CSUM; | 7777 | mask |= NETIF_F_CSUM_MASK; |
7482 | mask |= NETIF_F_VLAN_CHALLENGED; | 7778 | mask |= NETIF_F_VLAN_CHALLENGED; |
7483 | 7779 | ||
7484 | all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; | 7780 | all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask; |
7485 | all &= one | ~NETIF_F_ALL_FOR_ALL; | 7781 | all &= one | ~NETIF_F_ALL_FOR_ALL; |
7486 | 7782 | ||
7487 | /* If one device supports hw checksumming, set for all. */ | 7783 | /* If one device supports hw checksumming, set for all. */ |
7488 | if (all & NETIF_F_GEN_CSUM) | 7784 | if (all & NETIF_F_HW_CSUM) |
7489 | all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); | 7785 | all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM); |
7490 | 7786 | ||
7491 | return all; | 7787 | return all; |
7492 | } | 7788 | } |