aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c414
1 files changed, 355 insertions, 59 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index ae00b894e675..0ca95d5d7af0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -96,6 +96,7 @@
96#include <linux/skbuff.h> 96#include <linux/skbuff.h>
97#include <net/net_namespace.h> 97#include <net/net_namespace.h>
98#include <net/sock.h> 98#include <net/sock.h>
99#include <net/busy_poll.h>
99#include <linux/rtnetlink.h> 100#include <linux/rtnetlink.h>
100#include <linux/stat.h> 101#include <linux/stat.h>
101#include <net/dst.h> 102#include <net/dst.h>
@@ -137,6 +138,7 @@
137#include <linux/errqueue.h> 138#include <linux/errqueue.h>
138#include <linux/hrtimer.h> 139#include <linux/hrtimer.h>
139#include <linux/netfilter_ingress.h> 140#include <linux/netfilter_ingress.h>
141#include <linux/sctp.h>
140 142
141#include "net-sysfs.h" 143#include "net-sysfs.h"
142 144
@@ -182,8 +184,8 @@ EXPORT_SYMBOL(dev_base_lock);
182/* protects napi_hash addition/deletion and napi_gen_id */ 184/* protects napi_hash addition/deletion and napi_gen_id */
183static DEFINE_SPINLOCK(napi_hash_lock); 185static DEFINE_SPINLOCK(napi_hash_lock);
184 186
185static unsigned int napi_gen_id; 187static unsigned int napi_gen_id = NR_CPUS;
186static DEFINE_HASHTABLE(napi_hash, 8); 188static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
187 189
188static seqcount_t devnet_rename_seq; 190static seqcount_t devnet_rename_seq;
189 191
@@ -1674,6 +1676,22 @@ void net_dec_ingress_queue(void)
1674EXPORT_SYMBOL_GPL(net_dec_ingress_queue); 1676EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
1675#endif 1677#endif
1676 1678
1679#ifdef CONFIG_NET_EGRESS
1680static struct static_key egress_needed __read_mostly;
1681
1682void net_inc_egress_queue(void)
1683{
1684 static_key_slow_inc(&egress_needed);
1685}
1686EXPORT_SYMBOL_GPL(net_inc_egress_queue);
1687
1688void net_dec_egress_queue(void)
1689{
1690 static_key_slow_dec(&egress_needed);
1691}
1692EXPORT_SYMBOL_GPL(net_dec_egress_queue);
1693#endif
1694
1677static struct static_key netstamp_needed __read_mostly; 1695static struct static_key netstamp_needed __read_mostly;
1678#ifdef HAVE_JUMP_LABEL 1696#ifdef HAVE_JUMP_LABEL
1679/* We are not allowed to call static_key_slow_dec() from irq context 1697/* We are not allowed to call static_key_slow_dec() from irq context
@@ -2470,6 +2488,141 @@ out:
2470} 2488}
2471EXPORT_SYMBOL(skb_checksum_help); 2489EXPORT_SYMBOL(skb_checksum_help);
2472 2490
2491/* skb_csum_offload_check - Driver helper function to determine if a device
2492 * with limited checksum offload capabilities is able to offload the checksum
2493 * for a given packet.
2494 *
2495 * Arguments:
2496 * skb - sk_buff for the packet in question
2497 * spec - contains the description of what device can offload
2498 * csum_encapped - returns true if the checksum being offloaded is
2499 * encpasulated. That is it is checksum for the transport header
2500 * in the inner headers.
2501 * checksum_help - when set indicates that helper function should
2502 * call skb_checksum_help if offload checks fail
2503 *
2504 * Returns:
2505 * true: Packet has passed the checksum checks and should be offloadable to
2506 * the device (a driver may still need to check for additional
2507 * restrictions of its device)
2508 * false: Checksum is not offloadable. If checksum_help was set then
2509 * skb_checksum_help was called to resolve checksum for non-GSO
2510 * packets and when IP protocol is not SCTP
2511 */
2512bool __skb_csum_offload_chk(struct sk_buff *skb,
2513 const struct skb_csum_offl_spec *spec,
2514 bool *csum_encapped,
2515 bool csum_help)
2516{
2517 struct iphdr *iph;
2518 struct ipv6hdr *ipv6;
2519 void *nhdr;
2520 int protocol;
2521 u8 ip_proto;
2522
2523 if (skb->protocol == htons(ETH_P_8021Q) ||
2524 skb->protocol == htons(ETH_P_8021AD)) {
2525 if (!spec->vlan_okay)
2526 goto need_help;
2527 }
2528
2529 /* We check whether the checksum refers to a transport layer checksum in
2530 * the outermost header or an encapsulated transport layer checksum that
2531 * corresponds to the inner headers of the skb. If the checksum is for
2532 * something else in the packet we need help.
2533 */
2534 if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) {
2535 /* Non-encapsulated checksum */
2536 protocol = eproto_to_ipproto(vlan_get_protocol(skb));
2537 nhdr = skb_network_header(skb);
2538 *csum_encapped = false;
2539 if (spec->no_not_encapped)
2540 goto need_help;
2541 } else if (skb->encapsulation && spec->encap_okay &&
2542 skb_checksum_start_offset(skb) ==
2543 skb_inner_transport_offset(skb)) {
2544 /* Encapsulated checksum */
2545 *csum_encapped = true;
2546 switch (skb->inner_protocol_type) {
2547 case ENCAP_TYPE_ETHER:
2548 protocol = eproto_to_ipproto(skb->inner_protocol);
2549 break;
2550 case ENCAP_TYPE_IPPROTO:
2551 protocol = skb->inner_protocol;
2552 break;
2553 }
2554 nhdr = skb_inner_network_header(skb);
2555 } else {
2556 goto need_help;
2557 }
2558
2559 switch (protocol) {
2560 case IPPROTO_IP:
2561 if (!spec->ipv4_okay)
2562 goto need_help;
2563 iph = nhdr;
2564 ip_proto = iph->protocol;
2565 if (iph->ihl != 5 && !spec->ip_options_okay)
2566 goto need_help;
2567 break;
2568 case IPPROTO_IPV6:
2569 if (!spec->ipv6_okay)
2570 goto need_help;
2571 if (spec->no_encapped_ipv6 && *csum_encapped)
2572 goto need_help;
2573 ipv6 = nhdr;
2574 nhdr += sizeof(*ipv6);
2575 ip_proto = ipv6->nexthdr;
2576 break;
2577 default:
2578 goto need_help;
2579 }
2580
2581ip_proto_again:
2582 switch (ip_proto) {
2583 case IPPROTO_TCP:
2584 if (!spec->tcp_okay ||
2585 skb->csum_offset != offsetof(struct tcphdr, check))
2586 goto need_help;
2587 break;
2588 case IPPROTO_UDP:
2589 if (!spec->udp_okay ||
2590 skb->csum_offset != offsetof(struct udphdr, check))
2591 goto need_help;
2592 break;
2593 case IPPROTO_SCTP:
2594 if (!spec->sctp_okay ||
2595 skb->csum_offset != offsetof(struct sctphdr, checksum))
2596 goto cant_help;
2597 break;
2598 case NEXTHDR_HOP:
2599 case NEXTHDR_ROUTING:
2600 case NEXTHDR_DEST: {
2601 u8 *opthdr = nhdr;
2602
2603 if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay)
2604 goto need_help;
2605
2606 ip_proto = opthdr[0];
2607 nhdr += (opthdr[1] + 1) << 3;
2608
2609 goto ip_proto_again;
2610 }
2611 default:
2612 goto need_help;
2613 }
2614
2615 /* Passed the tests for offloading checksum */
2616 return true;
2617
2618need_help:
2619 if (csum_help && !skb_shinfo(skb)->gso_size)
2620 skb_checksum_help(skb);
2621cant_help:
2622 return false;
2623}
2624EXPORT_SYMBOL(__skb_csum_offload_chk);
2625
2473__be16 skb_network_protocol(struct sk_buff *skb, int *depth) 2626__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
2474{ 2627{
2475 __be16 type = skb->protocol; 2628 __be16 type = skb->protocol;
@@ -2644,7 +2797,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
2644 2797
2645 if (skb->ip_summed != CHECKSUM_NONE && 2798 if (skb->ip_summed != CHECKSUM_NONE &&
2646 !can_checksum_protocol(features, type)) { 2799 !can_checksum_protocol(features, type)) {
2647 features &= ~NETIF_F_ALL_CSUM; 2800 features &= ~NETIF_F_CSUM_MASK;
2648 } else if (illegal_highdma(skb->dev, skb)) { 2801 } else if (illegal_highdma(skb->dev, skb)) {
2649 features &= ~NETIF_F_SG; 2802 features &= ~NETIF_F_SG;
2650 } 2803 }
@@ -2791,7 +2944,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
2791 else 2944 else
2792 skb_set_transport_header(skb, 2945 skb_set_transport_header(skb,
2793 skb_checksum_start_offset(skb)); 2946 skb_checksum_start_offset(skb));
2794 if (!(features & NETIF_F_ALL_CSUM) && 2947 if (!(features & NETIF_F_CSUM_MASK) &&
2795 skb_checksum_help(skb)) 2948 skb_checksum_help(skb))
2796 goto out_kfree_skb; 2949 goto out_kfree_skb;
2797 } 2950 }
@@ -2870,7 +3023,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2870 bool contended; 3023 bool contended;
2871 int rc; 3024 int rc;
2872 3025
2873 qdisc_pkt_len_init(skb);
2874 qdisc_calculate_pkt_len(skb, q); 3026 qdisc_calculate_pkt_len(skb, q);
2875 /* 3027 /*
2876 * Heuristic to force contended enqueues to serialize on a 3028 * Heuristic to force contended enqueues to serialize on a
@@ -2928,7 +3080,8 @@ static void skb_update_prio(struct sk_buff *skb)
2928 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); 3080 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
2929 3081
2930 if (!skb->priority && skb->sk && map) { 3082 if (!skb->priority && skb->sk && map) {
2931 unsigned int prioidx = skb->sk->sk_cgrp_prioidx; 3083 unsigned int prioidx =
3084 sock_cgroup_prioidx(&skb->sk->sk_cgrp_data);
2932 3085
2933 if (prioidx < map->priomap_len) 3086 if (prioidx < map->priomap_len)
2934 skb->priority = map->priomap[prioidx]; 3087 skb->priority = map->priomap[prioidx];
@@ -2962,6 +3115,49 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
2962} 3115}
2963EXPORT_SYMBOL(dev_loopback_xmit); 3116EXPORT_SYMBOL(dev_loopback_xmit);
2964 3117
3118#ifdef CONFIG_NET_EGRESS
3119static struct sk_buff *
3120sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
3121{
3122 struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
3123 struct tcf_result cl_res;
3124
3125 if (!cl)
3126 return skb;
3127
3128 /* skb->tc_verd and qdisc_skb_cb(skb)->pkt_len were already set
3129 * earlier by the caller.
3130 */
3131 qdisc_bstats_cpu_update(cl->q, skb);
3132
3133 switch (tc_classify(skb, cl, &cl_res, false)) {
3134 case TC_ACT_OK:
3135 case TC_ACT_RECLASSIFY:
3136 skb->tc_index = TC_H_MIN(cl_res.classid);
3137 break;
3138 case TC_ACT_SHOT:
3139 qdisc_qstats_cpu_drop(cl->q);
3140 *ret = NET_XMIT_DROP;
3141 goto drop;
3142 case TC_ACT_STOLEN:
3143 case TC_ACT_QUEUED:
3144 *ret = NET_XMIT_SUCCESS;
3145drop:
3146 kfree_skb(skb);
3147 return NULL;
3148 case TC_ACT_REDIRECT:
3149 /* No need to push/pop skb's mac_header here on egress! */
3150 skb_do_redirect(skb);
3151 *ret = NET_XMIT_SUCCESS;
3152 return NULL;
3153 default:
3154 break;
3155 }
3156
3157 return skb;
3158}
3159#endif /* CONFIG_NET_EGRESS */
3160
2965static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) 3161static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
2966{ 3162{
2967#ifdef CONFIG_XPS 3163#ifdef CONFIG_XPS
@@ -3021,7 +3217,9 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
3021 int queue_index = 0; 3217 int queue_index = 0;
3022 3218
3023#ifdef CONFIG_XPS 3219#ifdef CONFIG_XPS
3024 if (skb->sender_cpu == 0) 3220 u32 sender_cpu = skb->sender_cpu - 1;
3221
3222 if (sender_cpu >= (u32)NR_CPUS)
3025 skb->sender_cpu = raw_smp_processor_id() + 1; 3223 skb->sender_cpu = raw_smp_processor_id() + 1;
3026#endif 3224#endif
3027 3225
@@ -3086,6 +3284,17 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
3086 3284
3087 skb_update_prio(skb); 3285 skb_update_prio(skb);
3088 3286
3287 qdisc_pkt_len_init(skb);
3288#ifdef CONFIG_NET_CLS_ACT
3289 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
3290# ifdef CONFIG_NET_EGRESS
3291 if (static_key_false(&egress_needed)) {
3292 skb = sch_handle_egress(skb, &rc, dev);
3293 if (!skb)
3294 goto out;
3295 }
3296# endif
3297#endif
3089 /* If device/qdisc don't need skb->dst, release it right now while 3298 /* If device/qdisc don't need skb->dst, release it right now while
3090 * its hot in this cpu cache. 3299 * its hot in this cpu cache.
3091 */ 3300 */
@@ -3107,9 +3316,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
3107 txq = netdev_pick_tx(dev, skb, accel_priv); 3316 txq = netdev_pick_tx(dev, skb, accel_priv);
3108 q = rcu_dereference_bh(txq->qdisc); 3317 q = rcu_dereference_bh(txq->qdisc);
3109 3318
3110#ifdef CONFIG_NET_CLS_ACT
3111 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
3112#endif
3113 trace_net_dev_queue(skb); 3319 trace_net_dev_queue(skb);
3114 if (q->enqueue) { 3320 if (q->enqueue) {
3115 rc = __dev_xmit_skb(skb, q, dev, txq); 3321 rc = __dev_xmit_skb(skb, q, dev, txq);
@@ -3666,9 +3872,9 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
3666EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); 3872EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3667#endif 3873#endif
3668 3874
3669static inline struct sk_buff *handle_ing(struct sk_buff *skb, 3875static inline struct sk_buff *
3670 struct packet_type **pt_prev, 3876sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
3671 int *ret, struct net_device *orig_dev) 3877 struct net_device *orig_dev)
3672{ 3878{
3673#ifdef CONFIG_NET_CLS_ACT 3879#ifdef CONFIG_NET_CLS_ACT
3674 struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); 3880 struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
@@ -3862,7 +4068,7 @@ another_round:
3862skip_taps: 4068skip_taps:
3863#ifdef CONFIG_NET_INGRESS 4069#ifdef CONFIG_NET_INGRESS
3864 if (static_key_false(&ingress_needed)) { 4070 if (static_key_false(&ingress_needed)) {
3865 skb = handle_ing(skb, &pt_prev, &ret, orig_dev); 4071 skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
3866 if (!skb) 4072 if (!skb)
3867 goto out; 4073 goto out;
3868 4074
@@ -4353,6 +4559,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
4353 4559
4354gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 4560gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
4355{ 4561{
4562 skb_mark_napi_id(skb, napi);
4356 trace_napi_gro_receive_entry(skb); 4563 trace_napi_gro_receive_entry(skb);
4357 4564
4358 skb_gro_reset_offset(skb); 4565 skb_gro_reset_offset(skb);
@@ -4386,7 +4593,10 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
4386 4593
4387 if (!skb) { 4594 if (!skb) {
4388 skb = napi_alloc_skb(napi, GRO_MAX_HEAD); 4595 skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
4389 napi->skb = skb; 4596 if (skb) {
4597 napi->skb = skb;
4598 skb_mark_napi_id(skb, napi);
4599 }
4390 } 4600 }
4391 return skb; 4601 return skb;
4392} 4602}
@@ -4661,7 +4871,7 @@ void napi_complete_done(struct napi_struct *n, int work_done)
4661EXPORT_SYMBOL(napi_complete_done); 4871EXPORT_SYMBOL(napi_complete_done);
4662 4872
4663/* must be called under rcu_read_lock(), as we dont take a reference */ 4873/* must be called under rcu_read_lock(), as we dont take a reference */
4664struct napi_struct *napi_by_id(unsigned int napi_id) 4874static struct napi_struct *napi_by_id(unsigned int napi_id)
4665{ 4875{
4666 unsigned int hash = napi_id % HASH_SIZE(napi_hash); 4876 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
4667 struct napi_struct *napi; 4877 struct napi_struct *napi;
@@ -4672,43 +4882,101 @@ struct napi_struct *napi_by_id(unsigned int napi_id)
4672 4882
4673 return NULL; 4883 return NULL;
4674} 4884}
4675EXPORT_SYMBOL_GPL(napi_by_id);
4676 4885
4677void napi_hash_add(struct napi_struct *napi) 4886#if defined(CONFIG_NET_RX_BUSY_POLL)
4887#define BUSY_POLL_BUDGET 8
4888bool sk_busy_loop(struct sock *sk, int nonblock)
4678{ 4889{
4679 if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) { 4890 unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
4891 int (*busy_poll)(struct napi_struct *dev);
4892 struct napi_struct *napi;
4893 int rc = false;
4680 4894
4681 spin_lock(&napi_hash_lock); 4895 rcu_read_lock();
4682 4896
4683 /* 0 is not a valid id, we also skip an id that is taken 4897 napi = napi_by_id(sk->sk_napi_id);
4684 * we expect both events to be extremely rare 4898 if (!napi)
4685 */ 4899 goto out;
4686 napi->napi_id = 0; 4900
4687 while (!napi->napi_id) { 4901 /* Note: ndo_busy_poll method is optional in linux-4.5 */
4688 napi->napi_id = ++napi_gen_id; 4902 busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
4689 if (napi_by_id(napi->napi_id)) 4903
4690 napi->napi_id = 0; 4904 do {
4905 rc = 0;
4906 local_bh_disable();
4907 if (busy_poll) {
4908 rc = busy_poll(napi);
4909 } else if (napi_schedule_prep(napi)) {
4910 void *have = netpoll_poll_lock(napi);
4911
4912 if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
4913 rc = napi->poll(napi, BUSY_POLL_BUDGET);
4914 trace_napi_poll(napi);
4915 if (rc == BUSY_POLL_BUDGET) {
4916 napi_complete_done(napi, rc);
4917 napi_schedule(napi);
4918 }
4919 }
4920 netpoll_poll_unlock(have);
4691 } 4921 }
4922 if (rc > 0)
4923 NET_ADD_STATS_BH(sock_net(sk),
4924 LINUX_MIB_BUSYPOLLRXPACKETS, rc);
4925 local_bh_enable();
4692 4926
4693 hlist_add_head_rcu(&napi->napi_hash_node, 4927 if (rc == LL_FLUSH_FAILED)
4694 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); 4928 break; /* permanent failure */
4695 4929
4696 spin_unlock(&napi_hash_lock); 4930 cpu_relax();
4697 } 4931 } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
4932 !need_resched() && !busy_loop_timeout(end_time));
4933
4934 rc = !skb_queue_empty(&sk->sk_receive_queue);
4935out:
4936 rcu_read_unlock();
4937 return rc;
4938}
4939EXPORT_SYMBOL(sk_busy_loop);
4940
4941#endif /* CONFIG_NET_RX_BUSY_POLL */
4942
4943void napi_hash_add(struct napi_struct *napi)
4944{
4945 if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
4946 test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
4947 return;
4948
4949 spin_lock(&napi_hash_lock);
4950
4951 /* 0..NR_CPUS+1 range is reserved for sender_cpu use */
4952 do {
4953 if (unlikely(++napi_gen_id < NR_CPUS + 1))
4954 napi_gen_id = NR_CPUS + 1;
4955 } while (napi_by_id(napi_gen_id));
4956 napi->napi_id = napi_gen_id;
4957
4958 hlist_add_head_rcu(&napi->napi_hash_node,
4959 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
4960
4961 spin_unlock(&napi_hash_lock);
4698} 4962}
4699EXPORT_SYMBOL_GPL(napi_hash_add); 4963EXPORT_SYMBOL_GPL(napi_hash_add);
4700 4964
4701/* Warning : caller is responsible to make sure rcu grace period 4965/* Warning : caller is responsible to make sure rcu grace period
4702 * is respected before freeing memory containing @napi 4966 * is respected before freeing memory containing @napi
4703 */ 4967 */
4704void napi_hash_del(struct napi_struct *napi) 4968bool napi_hash_del(struct napi_struct *napi)
4705{ 4969{
4970 bool rcu_sync_needed = false;
4971
4706 spin_lock(&napi_hash_lock); 4972 spin_lock(&napi_hash_lock);
4707 4973
4708 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) 4974 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) {
4975 rcu_sync_needed = true;
4709 hlist_del_rcu(&napi->napi_hash_node); 4976 hlist_del_rcu(&napi->napi_hash_node);
4710 4977 }
4711 spin_unlock(&napi_hash_lock); 4978 spin_unlock(&napi_hash_lock);
4979 return rcu_sync_needed;
4712} 4980}
4713EXPORT_SYMBOL_GPL(napi_hash_del); 4981EXPORT_SYMBOL_GPL(napi_hash_del);
4714 4982
@@ -4744,6 +5012,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
4744 napi->poll_owner = -1; 5012 napi->poll_owner = -1;
4745#endif 5013#endif
4746 set_bit(NAPI_STATE_SCHED, &napi->state); 5014 set_bit(NAPI_STATE_SCHED, &napi->state);
5015 napi_hash_add(napi);
4747} 5016}
4748EXPORT_SYMBOL(netif_napi_add); 5017EXPORT_SYMBOL(netif_napi_add);
4749 5018
@@ -4763,8 +5032,12 @@ void napi_disable(struct napi_struct *n)
4763} 5032}
4764EXPORT_SYMBOL(napi_disable); 5033EXPORT_SYMBOL(napi_disable);
4765 5034
5035/* Must be called in process context */
4766void netif_napi_del(struct napi_struct *napi) 5036void netif_napi_del(struct napi_struct *napi)
4767{ 5037{
5038 might_sleep();
5039 if (napi_hash_del(napi))
5040 synchronize_net();
4768 list_del_init(&napi->dev_list); 5041 list_del_init(&napi->dev_list);
4769 napi_free_frags(napi); 5042 napi_free_frags(napi);
4770 5043
@@ -5351,7 +5624,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
5351 5624
5352static int __netdev_upper_dev_link(struct net_device *dev, 5625static int __netdev_upper_dev_link(struct net_device *dev,
5353 struct net_device *upper_dev, bool master, 5626 struct net_device *upper_dev, bool master,
5354 void *private) 5627 void *upper_priv, void *upper_info)
5355{ 5628{
5356 struct netdev_notifier_changeupper_info changeupper_info; 5629 struct netdev_notifier_changeupper_info changeupper_info;
5357 struct netdev_adjacent *i, *j, *to_i, *to_j; 5630 struct netdev_adjacent *i, *j, *to_i, *to_j;
@@ -5375,6 +5648,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
5375 changeupper_info.upper_dev = upper_dev; 5648 changeupper_info.upper_dev = upper_dev;
5376 changeupper_info.master = master; 5649 changeupper_info.master = master;
5377 changeupper_info.linking = true; 5650 changeupper_info.linking = true;
5651 changeupper_info.upper_info = upper_info;
5378 5652
5379 ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, 5653 ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
5380 &changeupper_info.info); 5654 &changeupper_info.info);
@@ -5382,7 +5656,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
5382 if (ret) 5656 if (ret)
5383 return ret; 5657 return ret;
5384 5658
5385 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, 5659 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv,
5386 master); 5660 master);
5387 if (ret) 5661 if (ret)
5388 return ret; 5662 return ret;
@@ -5420,8 +5694,12 @@ static int __netdev_upper_dev_link(struct net_device *dev,
5420 goto rollback_lower_mesh; 5694 goto rollback_lower_mesh;
5421 } 5695 }
5422 5696
5423 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, 5697 ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
5424 &changeupper_info.info); 5698 &changeupper_info.info);
5699 ret = notifier_to_errno(ret);
5700 if (ret)
5701 goto rollback_lower_mesh;
5702
5425 return 0; 5703 return 0;
5426 5704
5427rollback_lower_mesh: 5705rollback_lower_mesh:
@@ -5475,7 +5753,7 @@ rollback_mesh:
5475int netdev_upper_dev_link(struct net_device *dev, 5753int netdev_upper_dev_link(struct net_device *dev,
5476 struct net_device *upper_dev) 5754 struct net_device *upper_dev)
5477{ 5755{
5478 return __netdev_upper_dev_link(dev, upper_dev, false, NULL); 5756 return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
5479} 5757}
5480EXPORT_SYMBOL(netdev_upper_dev_link); 5758EXPORT_SYMBOL(netdev_upper_dev_link);
5481 5759
@@ -5483,6 +5761,8 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
5483 * netdev_master_upper_dev_link - Add a master link to the upper device 5761 * netdev_master_upper_dev_link - Add a master link to the upper device
5484 * @dev: device 5762 * @dev: device
5485 * @upper_dev: new upper device 5763 * @upper_dev: new upper device
5764 * @upper_priv: upper device private
5765 * @upper_info: upper info to be passed down via notifier
5486 * 5766 *
5487 * Adds a link to device which is upper to this one. In this case, only 5767 * Adds a link to device which is upper to this one. In this case, only
5488 * one master upper device can be linked, although other non-master devices 5768 * one master upper device can be linked, although other non-master devices
@@ -5491,20 +5771,14 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
5491 * counts are adjusted and the function returns zero. 5771 * counts are adjusted and the function returns zero.
5492 */ 5772 */
5493int netdev_master_upper_dev_link(struct net_device *dev, 5773int netdev_master_upper_dev_link(struct net_device *dev,
5494 struct net_device *upper_dev) 5774 struct net_device *upper_dev,
5775 void *upper_priv, void *upper_info)
5495{ 5776{
5496 return __netdev_upper_dev_link(dev, upper_dev, true, NULL); 5777 return __netdev_upper_dev_link(dev, upper_dev, true,
5778 upper_priv, upper_info);
5497} 5779}
5498EXPORT_SYMBOL(netdev_master_upper_dev_link); 5780EXPORT_SYMBOL(netdev_master_upper_dev_link);
5499 5781
5500int netdev_master_upper_dev_link_private(struct net_device *dev,
5501 struct net_device *upper_dev,
5502 void *private)
5503{
5504 return __netdev_upper_dev_link(dev, upper_dev, true, private);
5505}
5506EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
5507
5508/** 5782/**
5509 * netdev_upper_dev_unlink - Removes a link to upper device 5783 * netdev_upper_dev_unlink - Removes a link to upper device
5510 * @dev: device 5784 * @dev: device
@@ -5663,7 +5937,7 @@ EXPORT_SYMBOL(netdev_lower_dev_get_private);
5663 5937
5664 5938
5665int dev_get_nest_level(struct net_device *dev, 5939int dev_get_nest_level(struct net_device *dev,
5666 bool (*type_check)(struct net_device *dev)) 5940 bool (*type_check)(const struct net_device *dev))
5667{ 5941{
5668 struct net_device *lower = NULL; 5942 struct net_device *lower = NULL;
5669 struct list_head *iter; 5943 struct list_head *iter;
@@ -5685,6 +5959,26 @@ int dev_get_nest_level(struct net_device *dev,
5685} 5959}
5686EXPORT_SYMBOL(dev_get_nest_level); 5960EXPORT_SYMBOL(dev_get_nest_level);
5687 5961
5962/**
5963 * netdev_lower_change - Dispatch event about lower device state change
5964 * @lower_dev: device
5965 * @lower_state_info: state to dispatch
5966 *
5967 * Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info.
5968 * The caller must hold the RTNL lock.
5969 */
5970void netdev_lower_state_changed(struct net_device *lower_dev,
5971 void *lower_state_info)
5972{
5973 struct netdev_notifier_changelowerstate_info changelowerstate_info;
5974
5975 ASSERT_RTNL();
5976 changelowerstate_info.lower_state_info = lower_state_info;
5977 call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
5978 &changelowerstate_info.info);
5979}
5980EXPORT_SYMBOL(netdev_lower_state_changed);
5981
5688static void dev_change_rx_flags(struct net_device *dev, int flags) 5982static void dev_change_rx_flags(struct net_device *dev, int flags)
5689{ 5983{
5690 const struct net_device_ops *ops = dev->netdev_ops; 5984 const struct net_device_ops *ops = dev->netdev_ops;
@@ -6375,9 +6669,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
6375 /* UFO needs SG and checksumming */ 6669 /* UFO needs SG and checksumming */
6376 if (features & NETIF_F_UFO) { 6670 if (features & NETIF_F_UFO) {
6377 /* maybe split UFO into V4 and V6? */ 6671 /* maybe split UFO into V4 and V6? */
6378 if (!((features & NETIF_F_GEN_CSUM) || 6672 if (!(features & NETIF_F_HW_CSUM) &&
6379 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) 6673 ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) !=
6380 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 6674 (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) {
6381 netdev_dbg(dev, 6675 netdev_dbg(dev,
6382 "Dropping NETIF_F_UFO since no checksum offload features.\n"); 6676 "Dropping NETIF_F_UFO since no checksum offload features.\n");
6383 features &= ~NETIF_F_UFO; 6677 features &= ~NETIF_F_UFO;
@@ -7164,11 +7458,13 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
7164 * This function does the last stage of destroying an allocated device 7458 * This function does the last stage of destroying an allocated device
7165 * interface. The reference to the device object is released. 7459 * interface. The reference to the device object is released.
7166 * If this is the last reference then it will be freed. 7460 * If this is the last reference then it will be freed.
7461 * Must be called in process context.
7167 */ 7462 */
7168void free_netdev(struct net_device *dev) 7463void free_netdev(struct net_device *dev)
7169{ 7464{
7170 struct napi_struct *p, *n; 7465 struct napi_struct *p, *n;
7171 7466
7467 might_sleep();
7172 netif_free_tx_queues(dev); 7468 netif_free_tx_queues(dev);
7173#ifdef CONFIG_SYSFS 7469#ifdef CONFIG_SYSFS
7174 kvfree(dev->_rx); 7470 kvfree(dev->_rx);
@@ -7477,16 +7773,16 @@ static int dev_cpu_callback(struct notifier_block *nfb,
7477netdev_features_t netdev_increment_features(netdev_features_t all, 7773netdev_features_t netdev_increment_features(netdev_features_t all,
7478 netdev_features_t one, netdev_features_t mask) 7774 netdev_features_t one, netdev_features_t mask)
7479{ 7775{
7480 if (mask & NETIF_F_GEN_CSUM) 7776 if (mask & NETIF_F_HW_CSUM)
7481 mask |= NETIF_F_ALL_CSUM; 7777 mask |= NETIF_F_CSUM_MASK;
7482 mask |= NETIF_F_VLAN_CHALLENGED; 7778 mask |= NETIF_F_VLAN_CHALLENGED;
7483 7779
7484 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; 7780 all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask;
7485 all &= one | ~NETIF_F_ALL_FOR_ALL; 7781 all &= one | ~NETIF_F_ALL_FOR_ALL;
7486 7782
7487 /* If one device supports hw checksumming, set for all. */ 7783 /* If one device supports hw checksumming, set for all. */
7488 if (all & NETIF_F_GEN_CSUM) 7784 if (all & NETIF_F_HW_CSUM)
7489 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); 7785 all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM);
7490 7786
7491 return all; 7787 return all;
7492} 7788}