1 files changed, 585 insertions, 1171 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index d0cbc93fcf32..a06a7a58dd11 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -97,8 +97,6 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
 #include <linux/stat.h>
 #include <net/dst.h>
 #include <net/pkt_sched.h>
@@ -106,12 +104,10 @@
 #include <net/xfrm.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
-#include <linux/kmod.h>
 #include <linux/module.h>
 #include <linux/netpoll.h>
 #include <linux/rcupdate.h>
 #include <linux/delay.h>
-#include <net/wext.h>
 #include <net/iw_handler.h>
 #include <asm/current.h>
 #include <linux/audit.h>
@@ -132,9 +128,7 @@
 #include <linux/pci.h>
 #include <linux/inetdevice.h>
 #include <linux/cpu_rmap.h>
-#include <linux/net_tstamp.h>
 #include <linux/static_key.h>
-#include <net/flow_keys.h>
 #include "net-sysfs.h"
@@ -144,41 +138,10 @@
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
-/*
- *      The list of packet types we will receive (as opposed to discard)
- *      and the routines to invoke.
- *
- *      Why 16. Because with 16 the only overlap we get on a hash of the
- *      low nibble of the protocol value is RARP/SNAP/X.25.
- *
- *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
- *             sure which should go first, but I bet it won't make much
- *             difference if we are running VLANs.  The good news is that
- *             this protocol won't be in the list unless compiled in, so
- *             the average user (w/out VLANs) will not be adversely affected.
- *             --BLG
- *
- *              0800    IP
- *              8100    802.1Q VLAN
- *              0001    802.3
- *              0002    AX.25
- *              0004    802.2
- *              8035    RARP
- *              0005    SNAP
- *              0805    X.25
- *              0806    ARP
- *              8137    IPX
- *              0009    Localtalk
- *              86DD    IPv6
- */
-#define PTYPE_HASH_SIZE (16)
-#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
 static DEFINE_SPINLOCK(ptype_lock);
 static DEFINE_SPINLOCK(offload_lock);
-static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
-static struct list_head ptype_all __read_mostly;        /* Taps */
+struct list_head ptype_all __read_mostly;       /* Taps */
 static struct list_head offload_base __read_mostly;
 /*
@@ -203,7 +166,7 @@ static struct list_head offload_base __read_mostly;
 DEFINE_RWLOCK(dev_base_lock);
 EXPORT_SYMBOL(dev_base_lock);
-DEFINE_SEQLOCK(devnet_rename_seq);
+seqcount_t devnet_rename_seq;
 static inline void dev_base_seq_inc(struct net *net)
 {
@@ -695,11 +658,10 @@ __setup("netdev=", netdev_boot_setup);
 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 {
-        struct hlist_node *p;
        struct net_device *dev;
        struct hlist_head *head = dev_name_hash(net, name);
-        hlist_for_each_entry(dev, p, head, name_hlist)
+        hlist_for_each_entry(dev, head, name_hlist)
                if (!strncmp(dev->name, name, IFNAMSIZ))
                        return dev;
@@ -721,11 +683,10 @@ EXPORT_SYMBOL(__dev_get_by_name);
 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
 {
-        struct hlist_node *p;
        struct net_device *dev;
        struct hlist_head *head = dev_name_hash(net, name);
-        hlist_for_each_entry_rcu(dev, p, head, name_hlist)
+        hlist_for_each_entry_rcu(dev, head, name_hlist)
                if (!strncmp(dev->name, name, IFNAMSIZ))
                        return dev;
@@ -772,11 +733,10 @@ EXPORT_SYMBOL(dev_get_by_name);
 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 {
-        struct hlist_node *p;
        struct net_device *dev;
        struct hlist_head *head = dev_index_hash(net, ifindex);
-        hlist_for_each_entry(dev, p, head, index_hlist)
+        hlist_for_each_entry(dev, head, index_hlist)
                if (dev->ifindex == ifindex)
                        return dev;
@@ -797,11 +757,10 @@ EXPORT_SYMBOL(__dev_get_by_index);
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
 {
-        struct hlist_node *p;
        struct net_device *dev;
        struct hlist_head *head = dev_index_hash(net, ifindex);
-        hlist_for_each_entry_rcu(dev, p, head, index_hlist)
+        hlist_for_each_entry_rcu(dev, head, index_hlist)
                if (dev->ifindex == ifindex)
                        return dev;
@@ -1093,10 +1052,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
        if (dev->flags & IFF_UP)
                return -EBUSY;
-        write_seqlock(&devnet_rename_seq);
+        write_seqcount_begin(&devnet_rename_seq);
        if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
-                write_sequnlock(&devnet_rename_seq);
+                write_seqcount_end(&devnet_rename_seq);
                return 0;
        }
@@ -1104,7 +1063,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
        err = dev_get_valid_name(net, dev, newname);
        if (err < 0) {
-                write_sequnlock(&devnet_rename_seq);
+                write_seqcount_end(&devnet_rename_seq);
                return err;
        }
@@ -1112,11 +1071,11 @@ rollback:
        ret = device_rename(&dev->dev, dev->name);
        if (ret) {
                memcpy(dev->name, oldname, IFNAMSIZ);
-                write_sequnlock(&devnet_rename_seq);
+                write_seqcount_end(&devnet_rename_seq);
                return ret;
        }
-        write_sequnlock(&devnet_rename_seq);
+        write_seqcount_end(&devnet_rename_seq);
        write_lock_bh(&dev_base_lock);
        hlist_del_rcu(&dev->name_hlist);
@@ -1135,7 +1094,7 @@ rollback:
                /* err >= 0 after dev_alloc_name() or stores the first errno */
                if (err >= 0) {
                        err = ret;
-                        write_seqlock(&devnet_rename_seq);
+                        write_seqcount_begin(&devnet_rename_seq);
                        memcpy(dev->name, oldname, IFNAMSIZ);
                        goto rollback;
                } else {
@@ -1227,36 +1186,6 @@ void netdev_notify_peers(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_notify_peers);
-/**
- *      dev_load        - load a network module
- *      @net: the applicable net namespace
- *      @name: name of interface
- *
- *      If a network interface is not present and the process has suitable
- *      privileges this function loads the module. If module loading is not
- *      available in this kernel then it becomes a nop.
- */
-void dev_load(struct net *net, const char *name)
-{
-        struct net_device *dev;
-        int no_module;
-        rcu_read_lock();
-        dev = dev_get_by_name_rcu(net, name);
-        rcu_read_unlock();
-        no_module = !dev;
-        if (no_module && capable(CAP_NET_ADMIN))
-                no_module = request_module("netdev-%s", name);
-        if (no_module && capable(CAP_SYS_MODULE)) {
-                if (!request_module("%s", name))
-                        pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated).  Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
-                                name);
-        }
-}
-EXPORT_SYMBOL(dev_load);
 static int __dev_open(struct net_device *dev)
 {
        const struct net_device_ops *ops = dev->netdev_ops;
@@ -1267,6 +1196,14 @@ static int __dev_open(struct net_device *dev)
        if (!netif_device_present(dev))
                return -ENODEV;
+        /* Block netpoll from trying to do any rx path servicing.
+         * If we don't do this there is a chance ndo_poll_controller
+         * or ndo_poll may be running while we open the device
+         */
+        ret = netpoll_rx_disable(dev);
+        if (ret)
+                return ret;
        ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
        ret = notifier_to_errno(ret);
        if (ret)
@@ -1280,6 +1217,8 @@ static int __dev_open(struct net_device *dev)
        if (!ret && ops->ndo_open)
                ret = ops->ndo_open(dev);
+        netpoll_rx_enable(dev);
        if (ret)
                clear_bit(__LINK_STATE_START, &dev->state);
        else {
@@ -1371,9 +1310,16 @@ static int __dev_close(struct net_device *dev)
        int retval;
        LIST_HEAD(single);
+        /* Temporarily disable netpoll until the interface is down */
+        retval = netpoll_rx_disable(dev);
+        if (retval)
+                return retval;
        list_add(&dev->unreg_list, &single);
        retval = __dev_close_many(&single);
        list_del(&single);
+        netpoll_rx_enable(dev);
        return retval;
 }
@@ -1409,14 +1355,22 @@ static int dev_close_many(struct list_head *head)
 */
 int dev_close(struct net_device *dev)
 {
+        int ret = 0;
        if (dev->flags & IFF_UP) {
                LIST_HEAD(single);
+                /* Block netpoll rx while the interface is going down */
+                ret = netpoll_rx_disable(dev);
+                if (ret)
+                        return ret;
                list_add(&dev->unreg_list, &single);
                dev_close_many(&single);
                list_del(&single);
+                netpoll_rx_enable(dev);
        }
-        return 0;
+        return ret;
 }
 EXPORT_SYMBOL(dev_close);
@@ -1621,57 +1575,6 @@ static inline void net_timestamp_set(struct sk_buff *skb)
                        __net_timestamp(SKB);           \
        }                                               \
-static int net_hwtstamp_validate(struct ifreq *ifr)
-{
-        struct hwtstamp_config cfg;
-        enum hwtstamp_tx_types tx_type;
-        enum hwtstamp_rx_filters rx_filter;
-        int tx_type_valid = 0;
-        int rx_filter_valid = 0;
-        if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
-                return -EFAULT;
-        if (cfg.flags) /* reserved for future extensions */
-                return -EINVAL;
-        tx_type = cfg.tx_type;
-        rx_filter = cfg.rx_filter;
-        switch (tx_type) {
-        case HWTSTAMP_TX_OFF:
-        case HWTSTAMP_TX_ON:
-        case HWTSTAMP_TX_ONESTEP_SYNC:
-                tx_type_valid = 1;
-                break;
-        }
-        switch (rx_filter) {
-        case HWTSTAMP_FILTER_NONE:
-        case HWTSTAMP_FILTER_ALL:
-        case HWTSTAMP_FILTER_SOME:
-        case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
-        case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
-        case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
-        case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
-        case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
-        case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
-        case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
-        case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
-        case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
-        case HWTSTAMP_FILTER_PTP_V2_EVENT:
-        case HWTSTAMP_FILTER_PTP_V2_SYNC:
-        case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-                rx_filter_valid = 1;
-                break;
-        }
-        if (!tx_type_valid || !rx_filter_valid)
-                return -ERANGE;
-        return 0;
-}
 static inline bool is_skb_forwardable(struct net_device *dev,
                                      struct sk_buff *skb)
 {
@@ -1857,6 +1760,230 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
        }
 }
+#ifdef CONFIG_XPS
+static DEFINE_MUTEX(xps_map_mutex);
+#define xmap_dereference(P)             \
+        rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
+static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
+                                        int cpu, u16 index)
+{
+        struct xps_map *map = NULL;
+        int pos;
+        if (dev_maps)
+                map = xmap_dereference(dev_maps->cpu_map[cpu]);
+        for (pos = 0; map && pos < map->len; pos++) {
+                if (map->queues[pos] == index) {
+                        if (map->len > 1) {
+                                map->queues[pos] = map->queues[--map->len];
+                        } else {
+                                RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
+                                kfree_rcu(map, rcu);
+                                map = NULL;
+                        }
+                        break;
+                }
+        }
+        return map;
+}
+static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
+{
+        struct xps_dev_maps *dev_maps;
+        int cpu, i;
+        bool active = false;
+        mutex_lock(&xps_map_mutex);
+        dev_maps = xmap_dereference(dev->xps_maps);
+        if (!dev_maps)
+                goto out_no_maps;
+        for_each_possible_cpu(cpu) {
+                for (i = index; i < dev->num_tx_queues; i++) {
+                        if (!remove_xps_queue(dev_maps, cpu, i))
+                                break;
+                }
+                if (i == dev->num_tx_queues)
+                        active = true;
+        }
+        if (!active) {
+                RCU_INIT_POINTER(dev->xps_maps, NULL);
+                kfree_rcu(dev_maps, rcu);
+        }
+        for (i = index; i < dev->num_tx_queues; i++)
+                netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
+                                             NUMA_NO_NODE);
+out_no_maps:
+        mutex_unlock(&xps_map_mutex);
+}
+static struct xps_map *expand_xps_map(struct xps_map *map,
+                                      int cpu, u16 index)
+{
+        struct xps_map *new_map;
+        int alloc_len = XPS_MIN_MAP_ALLOC;
+        int i, pos;
+        for (pos = 0; map && pos < map->len; pos++) {
+                if (map->queues[pos] != index)
+                        continue;
+                return map;
+        }
+        /* Need to add queue to this CPU's existing map */
+        if (map) {
+                if (pos < map->alloc_len)
+                        return map;
+                alloc_len = map->alloc_len * 2;
+        }
+        /* Need to allocate new map to store queue on this CPU's map */
+        new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
+                               cpu_to_node(cpu));
+        if (!new_map)
+                return NULL;
+        for (i = 0; i < pos; i++)
+                new_map->queues[i] = map->queues[i];
+        new_map->alloc_len = alloc_len;
+        new_map->len = pos;
+        return new_map;
+}
+int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
+{
+        struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
+        struct xps_map *map, *new_map;
+        int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
+        int cpu, numa_node_id = -2;
+        bool active = false;
+        mutex_lock(&xps_map_mutex);
+        dev_maps = xmap_dereference(dev->xps_maps);
+        /* allocate memory for queue storage */
+        for_each_online_cpu(cpu) {
+                if (!cpumask_test_cpu(cpu, mask))
+                        continue;
+                if (!new_dev_maps)
+                        new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
+                if (!new_dev_maps) {
+                        mutex_unlock(&xps_map_mutex);
+                        return -ENOMEM;
+                }
+                map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
+                                 NULL;
+                map = expand_xps_map(map, cpu, index);
+                if (!map)
+                        goto error;
+                RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
+        }
+        if (!new_dev_maps)
+                goto out_no_new_maps;
+        for_each_possible_cpu(cpu) {
+                if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
+                        /* add queue to CPU maps */
+                        int pos = 0;
+                        map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+                        while ((pos < map->len) && (map->queues[pos] != index))
+                                pos++;
+                        if (pos == map->len)
+                                map->queues[map->len++] = index;
+#ifdef CONFIG_NUMA
+                        if (numa_node_id == -2)
+                                numa_node_id = cpu_to_node(cpu);
+                        else if (numa_node_id != cpu_to_node(cpu))
+                                numa_node_id = -1;
+#endif
+                } else if (dev_maps) {
+                        /* fill in the new device map from the old device map */
+                        map = xmap_dereference(dev_maps->cpu_map[cpu]);
+                        RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
+                }
+        }
+        rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+        /* Cleanup old maps */
+        if (dev_maps) {
+                for_each_possible_cpu(cpu) {
+                        new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+                        map = xmap_dereference(dev_maps->cpu_map[cpu]);
+                        if (map && map != new_map)
+                                kfree_rcu(map, rcu);
+                }
+                kfree_rcu(dev_maps, rcu);
+        }
+        dev_maps = new_dev_maps;
+        active = true;
+out_no_new_maps:
+        /* update Tx queue numa node */
+        netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
+                                     (numa_node_id >= 0) ? numa_node_id :
+                                     NUMA_NO_NODE);
+        if (!dev_maps)
+                goto out_no_maps;
+        /* removes queue from unused CPUs */
+        for_each_possible_cpu(cpu) {
+                if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
+                        continue;
+                if (remove_xps_queue(dev_maps, cpu, index))
+                        active = true;
+        }
+        /* free map if not active */
+        if (!active) {
+                RCU_INIT_POINTER(dev->xps_maps, NULL);
+                kfree_rcu(dev_maps, rcu);
+        }
+out_no_maps:
+        mutex_unlock(&xps_map_mutex);
+        return 0;
+error:
+        /* remove any maps that we added */
+        for_each_possible_cpu(cpu) {
+                new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+                map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
+                                 NULL;
+                if (new_map && new_map != map)
+                        kfree(new_map);
+        }
+        mutex_unlock(&xps_map_mutex);
+        kfree(new_dev_maps);
+        return -ENOMEM;
+}
+EXPORT_SYMBOL(netif_set_xps_queue);
+#endif
 /*
 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1880,8 +2007,12 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
                if (dev->num_tc)
                        netif_setup_tc(dev, txq);
-                if (txq < dev->real_num_tx_queues)
+                if (txq < dev->real_num_tx_queues) {
                        qdisc_reset_all_tx_gt(dev, txq);
+#ifdef CONFIG_XPS
+                        netif_reset_xps_queues_gt(dev, txq);
+#endif
+                }
        }
        dev->real_num_tx_queues = txq;
@@ -2046,6 +2177,15 @@ int skb_checksum_help(struct sk_buff *skb)
                return -EINVAL;
        }
+        /* Before computing a checksum, we should make sure no frag could
+         * be modified by an external entity : checksum could be wrong.
+         */
+        if (skb_has_shared_frag(skb)) {
+                ret = __skb_linearize(skb);
+                if (ret)
+                        goto out;
+        }
        offset = skb_checksum_start_offset(skb);
        BUG_ON(offset >= skb_headlen(skb));
        csum = skb_checksum(skb, offset, skb->len - offset, 0);
@@ -2069,25 +2209,19 @@ out:
 EXPORT_SYMBOL(skb_checksum_help);
 /**
- *      skb_gso_segment - Perform segmentation on skb.
+ *      skb_mac_gso_segment - mac layer segmentation handler.
 *      @skb: buffer to segment
 *      @features: features for the output path (see dev->features)
- *
- *      This function segments the given skb and returns a list of segments.
- *
- *      It may return NULL if the skb requires no segmentation.  This is
- *      only possible when GSO is used for verifying header integrity.
 */
-struct sk_buff *skb_gso_segment(struct sk_buff *skb,
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
-        netdev_features_t features)
+                                    netdev_features_t features)
 {
        struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
        struct packet_offload *ptype;
        __be16 type = skb->protocol;
-        int vlan_depth = ETH_HLEN;
-        int err;
        while (type == htons(ETH_P_8021Q)) {
+                int vlan_depth = ETH_HLEN;
                struct vlan_hdr *vh;
                if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
@@ -2098,22 +2232,14 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
                vlan_depth += VLAN_HLEN;
        }
-        skb_reset_mac_header(skb);
-        skb->mac_len = skb->network_header - skb->mac_header;
        __skb_pull(skb, skb->mac_len);
-        if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
-                skb_warn_bad_offload(skb);
-                if (skb_header_cloned(skb) &&
-                    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
-                        return ERR_PTR(err);
-        }
        rcu_read_lock();
        list_for_each_entry_rcu(ptype, &offload_base, list) {
                if (ptype->type == type && ptype->callbacks.gso_segment) {
                        if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+                                int err;
                                err = ptype->callbacks.gso_send_check(skb);
                                segs = ERR_PTR(err);
                                if (err || skb_gso_ok(skb, features))
@@ -2131,7 +2257,50 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
        return segs;
 }
-EXPORT_SYMBOL(skb_gso_segment);
+EXPORT_SYMBOL(skb_mac_gso_segment);
+/* openvswitch calls this on rx path, so we need a different check.
+ */
+static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
+{
+        if (tx_path)
+                return skb->ip_summed != CHECKSUM_PARTIAL;
+        else
+                return skb->ip_summed == CHECKSUM_NONE;
+}
+/**
+ *      __skb_gso_segment - Perform segmentation on skb.
+ *      @skb: buffer to segment
+ *      @features: features for the output path (see dev->features)
+ *      @tx_path: whether it is called in TX path
+ *
+ *      This function segments the given skb and returns a list of segments.
+ *
+ *      It may return NULL if the skb requires no segmentation.  This is
+ *      only possible when GSO is used for verifying header integrity.
+ */
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+                                  netdev_features_t features, bool tx_path)
+{
+        if (unlikely(skb_needs_check(skb, tx_path))) {
+                int err;
+                skb_warn_bad_offload(skb);
+                if (skb_header_cloned(skb) &&
+                    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+                        return ERR_PTR(err);
+        }
+        SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
+        skb_reset_mac_header(skb);
+        skb_reset_mac_len(skb);
+        return skb_mac_gso_segment(skb, features);
+}
+EXPORT_SYMBOL(__skb_gso_segment);
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
@@ -2410,126 +2579,28 @@ out:
        return rc;
 }
-static u32 hashrnd __read_mostly;
+static void qdisc_pkt_len_init(struct sk_buff *skb)
-/*
- * Returns a Tx hash based on the given packet descriptor a Tx queues' number
- * to be used as a distribution range.
- */
-u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
-                  unsigned int num_tx_queues)
-{
-        u32 hash;
-        u16 qoffset = 0;
-        u16 qcount = num_tx_queues;
-        if (skb_rx_queue_recorded(skb)) {
-                hash = skb_get_rx_queue(skb);
-                while (unlikely(hash >= num_tx_queues))
-                        hash -= num_tx_queues;
-                return hash;
-        }
-        if (dev->num_tc) {
-                u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
-                qoffset = dev->tc_to_txq[tc].offset;
-                qcount = dev->tc_to_txq[tc].count;
-        }
-        if (skb->sk && skb->sk->sk_hash)
-                hash = skb->sk->sk_hash;
-        else
-                hash = (__force u16) skb->protocol;
-        hash = jhash_1word(hash, hashrnd);
-        return (u16) (((u64) hash * qcount) >> 32) + qoffset;
-}
-EXPORT_SYMBOL(__skb_tx_hash);
-static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
-{
-        if (unlikely(queue_index >= dev->real_num_tx_queues)) {
-                net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
-                                     dev->name, queue_index,
-                                     dev->real_num_tx_queues);
-                return 0;
-        }
-        return queue_index;
-}
-static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 {
-#ifdef CONFIG_XPS
+        const struct skb_shared_info *shinfo = skb_shinfo(skb);
-        struct xps_dev_maps *dev_maps;
-        struct xps_map *map;
-        int queue_index = -1;
-        rcu_read_lock();
+        qdisc_skb_cb(skb)->pkt_len = skb->len;
-        dev_maps = rcu_dereference(dev->xps_maps);
-        if (dev_maps) {
-                map = rcu_dereference(
-                    dev_maps->cpu_map[raw_smp_processor_id()]);
-                if (map) {
-                        if (map->len == 1)
-                                queue_index = map->queues[0];
-                        else {
-                                u32 hash;
-                                if (skb->sk && skb->sk->sk_hash)
-                                        hash = skb->sk->sk_hash;
-                                else
-                                        hash = (__force u16) skb->protocol ^
-                                            skb->rxhash;
-                                hash = jhash_1word(hash, hashrnd);
-                                queue_index = map->queues[
-                                    ((u64)hash * map->len) >> 32];
-                        }
-                        if (unlikely(queue_index >= dev->real_num_tx_queues))
-                                queue_index = -1;
-                }
-        }
-        rcu_read_unlock();
-        return queue_index;
-#else
-        return -1;
-#endif
-}
-struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-                                    struct sk_buff *skb)
-{
-        int queue_index;
-        const struct net_device_ops *ops = dev->netdev_ops;
-        if (dev->real_num_tx_queues == 1)
-                queue_index = 0;
-        else if (ops->ndo_select_queue) {
-                queue_index = ops->ndo_select_queue(dev, skb);
-                queue_index = dev_cap_txqueue(dev, queue_index);
-        } else {
-                struct sock *sk = skb->sk;
-                queue_index = sk_tx_queue_get(sk);
-                if (queue_index < 0 || skb->ooo_okay ||
-                    queue_index >= dev->real_num_tx_queues) {
-                        int old_index = queue_index;
-                        queue_index = get_xps_queue(dev, skb);
+        /* To get more precise estimation of bytes sent on wire,
-                        if (queue_index < 0)
+         * we add to pkt_len the headers size of all segments
-                                queue_index = skb_tx_hash(dev, skb);
+         */
+        if (shinfo->gso_size)  {
+                unsigned int hdr_len;
-                        if (queue_index != old_index && sk) {
+                /* mac layer + network layer */
-                                struct dst_entry *dst =
+                hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
-                                    rcu_dereference_check(sk->sk_dst_cache, 1);
-                                if (dst && skb_dst(skb) == dst)
+                /* + transport layer */
-                                        sk_tx_queue_set(sk, queue_index);
+                if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
-                        }
+                        hdr_len += tcp_hdrlen(skb);
-                }
+                else
+                        hdr_len += sizeof(struct udphdr);
+                qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len;
        }
-        skb_set_queue_mapping(skb, queue_index);
-        return netdev_get_tx_queue(dev, queue_index);
 }
 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
@@ -2540,7 +2611,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
        bool contended;
        int rc;
-        qdisc_skb_cb(skb)->pkt_len = skb->len;
+        qdisc_pkt_len_init(skb);
        qdisc_calculate_pkt_len(skb, q);
        /*
         * Heuristic to force contended enqueues to serialize on a
@@ -2663,6 +2734,8 @@ int dev_queue_xmit(struct sk_buff *skb)
        struct Qdisc *q;
        int rc = -ENOMEM;
+        skb_reset_mac_header(skb);
        /* Disable soft irqs for various locks below. Also
         * stops preemption for RCU.
         */
@@ -2757,41 +2830,6 @@ static inline void ____napi_schedule(struct softnet_data *sd,
        __raise_softirq_irqoff(NET_RX_SOFTIRQ);
 }
-/*
- * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
- * and src/dst port numbers.  Sets rxhash in skb to non-zero hash value
- * on success, zero indicates no valid hash.  Also, sets l4_rxhash in skb
- * if hash is a canonical 4-tuple hash over transport ports.
- */
-void __skb_get_rxhash(struct sk_buff *skb)
-{
-        struct flow_keys keys;
-        u32 hash;
-        if (!skb_flow_dissect(skb, &keys))
-                return;
-        if (keys.ports)
-                skb->l4_rxhash = 1;
-        /* get a consistent hash (same value on both flow directions) */
-        if (((__force u32)keys.dst < (__force u32)keys.src) ||
-            (((__force u32)keys.dst == (__force u32)keys.src) &&
-             ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
-                swap(keys.dst, keys.src);
-                swap(keys.port16[0], keys.port16[1]);
-        }
-        hash = jhash_3words((__force u32)keys.dst,
-                            (__force u32)keys.src,
-                            (__force u32)keys.ports, hashrnd);
-        if (!hash)
-                hash = 1;
-        skb->rxhash = hash;
-}
-EXPORT_SYMBOL(__skb_get_rxhash);
 #ifdef CONFIG_RPS
 /* One global table that all flow-based protocols share. */
@@ -3318,7 +3356,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
        }
 }
-static int __netif_receive_skb(struct sk_buff *skb)
+static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 {
        struct packet_type *ptype, *pt_prev;
        rx_handler_func_t *rx_handler;
@@ -3327,24 +3365,11 @@ static int __netif_receive_skb(struct sk_buff *skb)
        bool deliver_exact = false;
        int ret = NET_RX_DROP;
        __be16 type;
-        unsigned long pflags = current->flags;
        net_timestamp_check(!netdev_tstamp_prequeue, skb);
        trace_netif_receive_skb(skb);
-        /*
-         * PFMEMALLOC skbs are special, they should
-         * - be delivered to SOCK_MEMALLOC sockets only
-         * - stay away from userspace
-         * - have bounded memory usage
-         *
-         * Use PF_MEMALLOC as this saves us from propagating the allocation
-         * context down to all allocation sites.
-         */
-        if (sk_memalloc_socks() && skb_pfmemalloc(skb))
-                current->flags |= PF_MEMALLOC;
        /* if we've gotten here through NAPI, check netpoll */
        if (netpoll_receive_skb(skb))
                goto out;
@@ -3352,7 +3377,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
        orig_dev = skb->dev;
        skb_reset_network_header(skb);
-        skb_reset_transport_header(skb);
+        if (!skb_transport_header_was_set(skb))
+                skb_reset_transport_header(skb);
        skb_reset_mac_len(skb);
        pt_prev = NULL;
@@ -3377,7 +3403,7 @@ another_round:
        }
 #endif
-        if (sk_memalloc_socks() && skb_pfmemalloc(skb))
+        if (pfmemalloc)
                goto skip_taps;
        list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -3396,8 +3422,7 @@ skip_taps:
 ncls:
 #endif
-        if (sk_memalloc_socks() && skb_pfmemalloc(skb)
+        if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
-                                && !skb_pfmemalloc_protocol(skb))
                goto drop;
        if (vlan_tx_tag_present(skb)) {
@@ -3467,7 +3492,31 @@ drop:
 unlock:
        rcu_read_unlock();
 out:
-        tsk_restore_flags(current, pflags, PF_MEMALLOC);
+        return ret;
+}
+static int __netif_receive_skb(struct sk_buff *skb)
+{
+        int ret;
+        if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
+                unsigned long pflags = current->flags;
+                /*
+                 * PFMEMALLOC skbs are special, they should
+                 * - be delivered to SOCK_MEMALLOC sockets only
+                 * - stay away from userspace
+                 * - have bounded memory usage
+                 *
+                 * Use PF_MEMALLOC as this saves us from propagating the allocation
+                 * context down to all allocation sites.
+                 */
+                current->flags |= PF_MEMALLOC;
+                ret = __netif_receive_skb_core(skb, true);
+                tsk_restore_flags(current, pflags, PF_MEMALLOC);
+        } else
+                ret = __netif_receive_skb_core(skb, false);
        return ret;
 }
@@ -3634,7 +3683,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
        __be16 type = skb->protocol;
        struct list_head *head = &offload_base;
        int same_flow;
-        int mac_len;
        enum gro_result ret;
        if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
@@ -3651,8 +3699,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
                        continue;
                skb_set_network_header(skb, skb_gro_offset(skb));
-                mac_len = skb->network_header - skb->mac_header;
+                skb_reset_mac_len(skb);
-                skb->mac_len = mac_len;
                NAPI_GRO_CB(skb)->same_flow = 0;
                NAPI_GRO_CB(skb)->flush = 0;
                NAPI_GRO_CB(skb)->free = 0;
@@ -4134,530 +4181,231 @@ softnet_break:
        goto out;
 }
-static gifconf_func_t *gifconf_list[NPROTO];
+struct netdev_upper {
-/**
- *      register_gifconf        -       register a SIOCGIF handler
- *      @family: Address family
- *      @gifconf: Function handler
- *
- *      Register protocol dependent address dumping routines. The handler
- *      that is passed must not be freed or reused until it has been replaced
- *      by another handler.
- */
-int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
-{
-        if (family >= NPROTO)
-                return -EINVAL;
-        gifconf_list[family] = gifconf;
-        return 0;
-}
-EXPORT_SYMBOL(register_gifconf);
-/*
- *      Map an interface index to its name (SIOCGIFNAME)
- */
-/*
- *      We need this ioctl for efficient implementation of the
- *      if_indextoname() function required by the IPv6 API.  Without
- *      it, we would have to search all the interfaces to find a
- *      match.  --pb
- */
-static int dev_ifname(struct net *net, struct ifreq __user *arg)
-{
        struct net_device *dev;
-        struct ifreq ifr;
+        bool master;
-        unsigned seq;
+        struct list_head list;
+        struct rcu_head rcu;
-        /*
+        struct list_head search_list;
-         *      Fetch the caller's info block.
+};
-         */
-        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+static void __append_search_uppers(struct list_head *search_list,
-                return -EFAULT;
+                                   struct net_device *dev)
+{
+        struct netdev_upper *upper;
-retry:
+        list_for_each_entry(upper, &dev->upper_dev_list, list) {
-        seq = read_seqbegin(&devnet_rename_seq);
+                /* check if this upper is not already in search list */
-        rcu_read_lock();
+                if (list_empty(&upper->search_list))
-        dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
+                        list_add_tail(&upper->search_list, search_list);
-        if (!dev) {
-                rcu_read_unlock();
-                return -ENODEV;
        }
-        strcpy(ifr.ifr_name, dev->name);
-        rcu_read_unlock();
-        if (read_seqretry(&devnet_rename_seq, seq))
-                goto retry;
-        if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
-                return -EFAULT;
-        return 0;
 }
-/*
+static bool __netdev_search_upper_dev(struct net_device *dev,
- *      Perform a SIOCGIFCONF call. This structure will change
+                                      struct net_device *upper_dev)
- *      size eventually, and there is nothing I can do about it.
- *      Thus we will need a 'compatibility mode'.
- */
-static int dev_ifconf(struct net *net, char __user *arg)
 {
-        struct ifconf ifc;
+        LIST_HEAD(search_list);
-        struct net_device *dev;
+        struct netdev_upper *upper;
-        char __user *pos;
+        struct netdev_upper *tmp;
-        int len;
+        bool ret = false;
-        int total;
-        int i;
-        /*
+        __append_search_uppers(&search_list, dev);
-         *      Fetch the caller's info block.
+        list_for_each_entry(upper, &search_list, search_list) {
-         */
+                if (upper->dev == upper_dev) {
+                        ret = true;
-        if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
+                        break;
-                return -EFAULT;
-        pos = ifc.ifc_buf;
-        len = ifc.ifc_len;
-        /*
-         *      Loop over the interfaces, and write an info block for each.
-         */
-        total = 0;
-        for_each_netdev(net, dev) {
-                for (i = 0; i < NPROTO; i++) {
-                        if (gifconf_list[i]) {
-                                int done;
-                                if (!pos)
-                                        done = gifconf_list[i](dev, NULL, 0);
-                                else
-                                        done = gifconf_list[i](dev, pos + total,
-                                                               len - total);
-                                if (done < 0)
-                                        return -EFAULT;
-                                total += done;
-                        }
                }
+                __append_search_uppers(&search_list, upper->dev);
        }
+        list_for_each_entry_safe(upper, tmp, &search_list, search_list)
-        /*
+                INIT_LIST_HEAD(&upper->search_list);
-         *      All done.  Write the updated control block back to the caller.
+        return ret;
-         */
-        ifc.ifc_len = total;
-        /*
-         *      Both BSD and Solaris return 0 here, so we do too.
-         */
-        return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
 }
-#ifdef CONFIG_PROC_FS
+static struct netdev_upper *__netdev_find_upper(struct net_device *dev,
+                                                struct net_device *upper_dev)
-#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
-#define get_bucket(x) ((x) >> BUCKET_SPACE)
-#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
-#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
-static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
 {
-        struct net *net = seq_file_net(seq);
+        struct netdev_upper *upper;
-        struct net_device *dev;
-        struct hlist_node *p;
-        struct hlist_head *h;
-        unsigned int count = 0, offset = get_offset(*pos);
-        h = &net->dev_name_head[get_bucket(*pos)];
+        list_for_each_entry(upper, &dev->upper_dev_list, list) {
-        hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
+                if (upper->dev == upper_dev)
-                if (++count == offset)
+                        return upper;
-                        return dev;
        }
        return NULL;
 }
-static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
+/**
-{
+ * netdev_has_upper_dev - Check if device is linked to an upper device
-        struct net_device *dev;
+ * @dev: device
-        unsigned int bucket;
+ * @upper_dev: upper device to check
+ *
-        do {
+ * Find out if a device is linked to specified upper device and return true
-                dev = dev_from_same_bucket(seq, pos);
+ * in case it is. Note that this checks only immediate upper device,
-                if (dev)
+ * not through a complete stack of devices. The caller must hold the RTNL lock.
-                        return dev;
-                bucket = get_bucket(*pos) + 1;
-                *pos = set_bucket_offset(bucket, 1);
-        } while (bucket < NETDEV_HASHENTRIES);
-        return NULL;
-}
-/*
- *      This is invoked by the /proc filesystem handler to display a device
- *      in detail.
 */
-void *dev_seq_start(struct seq_file *seq, loff_t *pos)
+bool netdev_has_upper_dev(struct net_device *dev,
-        __acquires(RCU)
+                          struct net_device *upper_dev)
-{
-        rcu_read_lock();
-        if (!*pos)
-                return SEQ_START_TOKEN;
-        if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
-                return NULL;
-        return dev_from_bucket(seq, pos);
-}
-void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-        ++*pos;
-        return dev_from_bucket(seq, pos);
-}
-void dev_seq_stop(struct seq_file *seq, void *v)
-        __releases(RCU)
-{
-        rcu_read_unlock();
-}
-static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
-        struct rtnl_link_stats64 temp;
+        ASSERT_RTNL();
-        const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
-        seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+        return __netdev_find_upper(dev, upper_dev);
-                   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
-                   dev->name, stats->rx_bytes, stats->rx_packets,
-                   stats->rx_errors,
-                   stats->rx_dropped + stats->rx_missed_errors,
-                   stats->rx_fifo_errors,
-                   stats->rx_length_errors + stats->rx_over_errors +
-                    stats->rx_crc_errors + stats->rx_frame_errors,
-                   stats->rx_compressed, stats->multicast,
-                   stats->tx_bytes, stats->tx_packets,
-                   stats->tx_errors, stats->tx_dropped,
-                   stats->tx_fifo_errors, stats->collisions,
-                   stats->tx_carrier_errors +
-                    stats->tx_aborted_errors +
-                    stats->tx_window_errors +
-                    stats->tx_heartbeat_errors,
-                   stats->tx_compressed);
 }
+EXPORT_SYMBOL(netdev_has_upper_dev);
-/*
+/**
- *      Called from the PROCfs module. This now uses the new arbitrary sized
+ * netdev_has_any_upper_dev - Check if device is linked to some device
- *      /proc/net interface to create /proc/net/dev
+ * @dev: device
+ *
+ * Find out if a device is linked to an upper device and return true in case
+ * it is. The caller must hold the RTNL lock.
 */
-static int dev_seq_show(struct seq_file *seq, void *v)
+bool netdev_has_any_upper_dev(struct net_device *dev)
-{
-        if (v == SEQ_START_TOKEN)
-                seq_puts(seq, "Inter-|   Receive                            "
-                              "                    |  Transmit\n"
-                              " face |bytes    packets errs drop fifo frame "
-                              "compressed multicast|bytes    packets errs "
-                              "drop fifo colls carrier compressed\n");
-        else
-                dev_seq_printf_stats(seq, v);
-        return 0;
-}
-static struct softnet_data *softnet_get_online(loff_t *pos)
-{
-        struct softnet_data *sd = NULL;
-        while (*pos < nr_cpu_ids)
-                if (cpu_online(*pos)) {
-                        sd = &per_cpu(softnet_data, *pos);
-                        break;
-                } else
-                        ++*pos;
-        return sd;
-}
-static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
 {
-        return softnet_get_online(pos);
+        ASSERT_RTNL();
-}
-static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-        ++*pos;
-        return softnet_get_online(pos);
-}
-static void softnet_seq_stop(struct seq_file *seq, void *v)
+        return !list_empty(&dev->upper_dev_list);
-{
 }
+EXPORT_SYMBOL(netdev_has_any_upper_dev);
-static int softnet_seq_show(struct seq_file *seq, void *v)
+/**
-{
+ * netdev_master_upper_dev_get - Get master upper device
-        struct softnet_data *sd = v;
+ * @dev: device
+ *
-        seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ * Find a master upper device and return pointer to it or NULL in case
-                   sd->processed, sd->dropped, sd->time_squeeze, 0,
+ * it's not there. The caller must hold the RTNL lock.
-                   0, 0, 0, 0, /* was fastroute */
+ */
-                   sd->cpu_collision, sd->received_rps);
+struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
-        return 0;
-}
-static const struct seq_operations dev_seq_ops = {
-        .start = dev_seq_start,
-        .next  = dev_seq_next,
-        .stop  = dev_seq_stop,
-        .show  = dev_seq_show,
-};
-static int dev_seq_open(struct inode *inode, struct file *file)
 {
-        return seq_open_net(inode, file, &dev_seq_ops,
+        struct netdev_upper *upper;
-                            sizeof(struct seq_net_private));
-}
-static const struct file_operations dev_seq_fops = {
+        ASSERT_RTNL();
-        .owner   = THIS_MODULE,
-        .open    = dev_seq_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = seq_release_net,
-};
-static const struct seq_operations softnet_seq_ops = {
+        if (list_empty(&dev->upper_dev_list))
-        .start = softnet_seq_start,
+                return NULL;
-        .next  = softnet_seq_next,
-        .stop  = softnet_seq_stop,
-        .show  = softnet_seq_show,
-};
-static int softnet_seq_open(struct inode *inode, struct file *file)
+        upper = list_first_entry(&dev->upper_dev_list,
-{
+                                 struct netdev_upper, list);
-        return seq_open(file, &softnet_seq_ops);
+        if (likely(upper->master))
+                return upper->dev;
+        return NULL;
 }
+EXPORT_SYMBOL(netdev_master_upper_dev_get);
-static const struct file_operations softnet_seq_fops = {
+/**
-        .owner   = THIS_MODULE,
+ * netdev_master_upper_dev_get_rcu - Get master upper device
-        .open    = softnet_seq_open,
+ * @dev: device
-        .read    = seq_read,
+ *
-        .llseek  = seq_lseek,
+ * Find a master upper device and return pointer to it or NULL in case
-        .release = seq_release,
+ * it's not there. The caller must hold the RCU read lock.
-};
+ */
+struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
-static void *ptype_get_idx(loff_t pos)
 {
-        struct packet_type *pt = NULL;
+        struct netdev_upper *upper;
-        loff_t i = 0;
-        int t;
-        list_for_each_entry_rcu(pt, &ptype_all, list) {
-                if (i == pos)
-                        return pt;
-                ++i;
-        }
-        for (t = 0; t < PTYPE_HASH_SIZE; t++) {
+        upper = list_first_or_null_rcu(&dev->upper_dev_list,
-                list_for_each_entry_rcu(pt, &ptype_base[t], list) {
+                                       struct netdev_upper, list);
-                        if (i == pos)
+        if (upper && likely(upper->master))
-                                return pt;
+                return upper->dev;
-                        ++i;
-                }
-        }
        return NULL;
 }
+EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
-static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
+static int __netdev_upper_dev_link(struct net_device *dev,
-        __acquires(RCU)
+                                   struct net_device *upper_dev, bool master)
 {
-        rcu_read_lock();
+        struct netdev_upper *upper;
-        return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
-}
-static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+        ASSERT_RTNL();
-{
-        struct packet_type *pt;
-        struct list_head *nxt;
-        int hash;
-        ++*pos;
+        if (dev == upper_dev)
-        if (v == SEQ_START_TOKEN)
+                return -EBUSY;
-                return ptype_get_idx(0);
-        pt = v;
+        /* To prevent loops, check if dev is not upper device to upper_dev. */
-        nxt = pt->list.next;
+        if (__netdev_search_upper_dev(upper_dev, dev))
-        if (pt->type == htons(ETH_P_ALL)) {
+                return -EBUSY;
-                if (nxt != &ptype_all)
-                        goto found;
-                hash = 0;
-                nxt = ptype_base[0].next;
-        } else
-                hash = ntohs(pt->type) & PTYPE_HASH_MASK;
-        while (nxt == &ptype_base[hash]) {
+        if (__netdev_find_upper(dev, upper_dev))
-                if (++hash >= PTYPE_HASH_SIZE)
+                return -EEXIST;
-                        return NULL;
-                nxt = ptype_base[hash].next;
-        }
-found:
-        return list_entry(nxt, struct packet_type, list);
-}
-static void ptype_seq_stop(struct seq_file *seq, void *v)
+        if (master && netdev_master_upper_dev_get(dev))
-        __releases(RCU)
+                return -EBUSY;
-{
-        rcu_read_unlock();
-}
-static int ptype_seq_show(struct seq_file *seq, void *v)
+        upper = kmalloc(sizeof(*upper), GFP_KERNEL);
-{
+        if (!upper)
-        struct packet_type *pt = v;
+                return -ENOMEM;
-        if (v == SEQ_START_TOKEN)
+        upper->dev = upper_dev;
-                seq_puts(seq, "Type Device      Function\n");
+        upper->master = master;
-        else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
+        INIT_LIST_HEAD(&upper->search_list);
-                if (pt->type == htons(ETH_P_ALL))
-                        seq_puts(seq, "ALL ");
-                else
-                        seq_printf(seq, "%04x", ntohs(pt->type));
-                seq_printf(seq, " %-8s %pF\n",
+        /* Ensure that master upper link is always the first item in list. */
-                           pt->dev ? pt->dev->name : "", pt->func);
+        if (master)
-        }
+                list_add_rcu(&upper->list, &dev->upper_dev_list);
+        else
+                list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
+        dev_hold(upper_dev);
        return 0;
 }
-static const struct seq_operations ptype_seq_ops = {
+/**
-        .start = ptype_seq_start,
+ * netdev_upper_dev_link - Add a link to the upper device
-        .next  = ptype_seq_next,
+ * @dev: device
-        .stop  = ptype_seq_stop,
+ * @upper_dev: new upper device
-        .show  = ptype_seq_show,
+ *
-};
+ * Adds a link to device which is upper to this one. The caller must hold
+ * the RTNL lock. On a failure a negative errno code is returned.
-static int ptype_seq_open(struct inode *inode, struct file *file)
+ * On success the reference counts are adjusted and the function
-{
+ * returns zero.
-        return seq_open_net(inode, file, &ptype_seq_ops,
+ */
-                        sizeof(struct seq_net_private));
+int netdev_upper_dev_link(struct net_device *dev,
-}
+                          struct net_device *upper_dev)
-static const struct file_operations ptype_seq_fops = {
-        .owner   = THIS_MODULE,
-        .open    = ptype_seq_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = seq_release_net,
-};
-static int __net_init dev_proc_net_init(struct net *net)
-{
-        int rc = -ENOMEM;
-        if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
-                goto out;
-        if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
-                goto out_dev;
-        if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
-                goto out_softnet;
-        if (wext_proc_init(net))
-                goto out_ptype;
-        rc = 0;
-out:
-        return rc;
-out_ptype:
-        proc_net_remove(net, "ptype");
-out_softnet:
-        proc_net_remove(net, "softnet_stat");
-out_dev:
-        proc_net_remove(net, "dev");
-        goto out;
-}
-static void __net_exit dev_proc_net_exit(struct net *net)
-{
-        wext_proc_exit(net);
-        proc_net_remove(net, "ptype");
-        proc_net_remove(net, "softnet_stat");
-        proc_net_remove(net, "dev");
-}
-static struct pernet_operations __net_initdata dev_proc_ops = {
-        .init = dev_proc_net_init,
-        .exit = dev_proc_net_exit,
-};
-static int __init dev_proc_init(void)
 {
-        return register_pernet_subsys(&dev_proc_ops);
+        return __netdev_upper_dev_link(dev, upper_dev, false);
 }
-#else
+EXPORT_SYMBOL(netdev_upper_dev_link);
-#define dev_proc_init() 0
-#endif  /* CONFIG_PROC_FS */
 /**
- *      netdev_set_master       -       set up master pointer
+ * netdev_master_upper_dev_link - Add a master link to the upper device
- *      @slave: slave device
+ * @dev: device
- *      @master: new master device
+ * @upper_dev: new upper device
 *
- *      Changes the master device of the slave. Pass %NULL to break the
+ * Adds a link to device which is upper to this one. In this case, only
- *      bonding. The caller must hold the RTNL semaphore. On a failure
+ * one master upper device can be linked, although other non-master devices
- *      a negative errno code is returned. On success the reference counts
+ * might be linked as well. The caller must hold the RTNL lock.
- *      are adjusted and the function returns zero.
+ * On a failure a negative errno code is returned. On success the reference
+ * counts are adjusted and the function returns zero.
 */
-int netdev_set_master(struct net_device *slave, struct net_device *master)
+int netdev_master_upper_dev_link(struct net_device *dev,
+                                 struct net_device *upper_dev)
 {
-        struct net_device *old = slave->master;
+        return __netdev_upper_dev_link(dev, upper_dev, true);
-        ASSERT_RTNL();
-        if (master) {
-                if (old)
-                        return -EBUSY;
-                dev_hold(master);
-        }
-        slave->master = master;
-        if (old)
-                dev_put(old);
-        return 0;
 }
-EXPORT_SYMBOL(netdev_set_master);
+EXPORT_SYMBOL(netdev_master_upper_dev_link);
 /**
- *      netdev_set_bond_master  -       set up bonding master/slave pair
+ * netdev_upper_dev_unlink - Removes a link to upper device
- *      @slave: slave device
+ * @dev: device
- *      @master: new master device
+ * @upper_dev: new upper device
 *
- *      Changes the master device of the slave. Pass %NULL to break the
+ * Removes a link to device which is upper to this one. The caller must hold
- *      bonding. The caller must hold the RTNL semaphore. On a failure
+ * the RTNL lock.
- *      a negative errno code is returned. On success %RTM_NEWLINK is sent
- *      to the routing socket and the function returns zero.
 */
-int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
+void netdev_upper_dev_unlink(struct net_device *dev,
+                             struct net_device *upper_dev)
 {
-        int err;
+        struct netdev_upper *upper;
        ASSERT_RTNL();
-        err = netdev_set_master(slave, master);
+        upper = __netdev_find_upper(dev, upper_dev);
-        if (err)
+        if (!upper)
-                return err;
+                return;
-        if (master)
+        list_del_rcu(&upper->list);
-                slave->flags |= IFF_SLAVE;
+        dev_put(upper_dev);
-        else
+        kfree_rcu(upper, rcu);
-                slave->flags &= ~IFF_SLAVE;
-        rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
-        return 0;
 }
-EXPORT_SYMBOL(netdev_set_bond_master);
+EXPORT_SYMBOL(netdev_upper_dev_unlink);
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
@@ -5020,381 +4768,33 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
        if (!netif_device_present(dev))
                return -ENODEV;
        err = ops->ndo_set_mac_address(dev, sa);
-        if (!err)
+        if (err)
-                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+                return err;
+        dev->addr_assign_type = NET_ADDR_SET;
+        call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
        add_device_randomness(dev->dev_addr, dev->addr_len);
-        return err;
+        return 0;
 }
 EXPORT_SYMBOL(dev_set_mac_address);
-/*
- *      Perform the SIOCxIFxxx calls, inside rcu_read_lock()
- */
-static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
-{
-        int err;
-        struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
-        if (!dev)
-                return -ENODEV;
-        switch (cmd) {
-        case SIOCGIFFLAGS:      /* Get interface flags */
-                ifr->ifr_flags = (short) dev_get_flags(dev);
-                return 0;
-        case SIOCGIFMETRIC:     /* Get the metric on the interface
-                                   (currently unused) */
-                ifr->ifr_metric = 0;
-                return 0;
-        case SIOCGIFMTU:        /* Get the MTU of a device */
-                ifr->ifr_mtu = dev->mtu;
-                return 0;
-        case SIOCGIFHWADDR:
-                if (!dev->addr_len)
-                        memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
-                else
-                        memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
-                               min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
-                ifr->ifr_hwaddr.sa_family = dev->type;
-                return 0;
-        case SIOCGIFSLAVE:
-                err = -EINVAL;
-                break;
-        case SIOCGIFMAP:
-                ifr->ifr_map.mem_start = dev->mem_start;
-                ifr->ifr_map.mem_end   = dev->mem_end;
-                ifr->ifr_map.base_addr = dev->base_addr;
-                ifr->ifr_map.irq       = dev->irq;
-                ifr->ifr_map.dma       = dev->dma;
-                ifr->ifr_map.port      = dev->if_port;
-                return 0;
-        case SIOCGIFINDEX:
-                ifr->ifr_ifindex = dev->ifindex;
-                return 0;
-        case SIOCGIFTXQLEN:
-                ifr->ifr_qlen = dev->tx_queue_len;
-                return 0;
-        default:
-                /* dev_ioctl() should ensure this case
-                 * is never reached
-                 */
-                WARN_ON(1);
-                err = -ENOTTY;
-                break;
-        }
-        return err;
-}
-/*
- *      Perform the SIOCxIFxxx calls, inside rtnl_lock()
- */
-static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
-{
-        int err;
-        struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
-        const struct net_device_ops *ops;
-        if (!dev)
-                return -ENODEV;
-        ops = dev->netdev_ops;
-        switch (cmd) {
-        case SIOCSIFFLAGS:      /* Set interface flags */
-                return dev_change_flags(dev, ifr->ifr_flags);
-        case SIOCSIFMETRIC:     /* Set the metric on the interface
-                                   (currently unused) */
-                return -EOPNOTSUPP;
-        case SIOCSIFMTU:        /* Set the MTU of a device */
-                return dev_set_mtu(dev, ifr->ifr_mtu);
-        case SIOCSIFHWADDR:
-                return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
-        case SIOCSIFHWBROADCAST:
-                if (ifr->ifr_hwaddr.sa_family != dev->type)
-                        return -EINVAL;
-                memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
-                       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
-                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-                return 0;
-        case SIOCSIFMAP:
-                if (ops->ndo_set_config) {
-                        if (!netif_device_present(dev))
-                                return -ENODEV;
-                        return ops->ndo_set_config(dev, &ifr->ifr_map);
-                }
-                return -EOPNOTSUPP;
-        case SIOCADDMULTI:
-                if (!ops->ndo_set_rx_mode ||
-                    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
-                        return -EINVAL;
-                if (!netif_device_present(dev))
-                        return -ENODEV;
-                return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
-        case SIOCDELMULTI:
-                if (!ops->ndo_set_rx_mode ||
-                    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
-                        return -EINVAL;
-                if (!netif_device_present(dev))
-                        return -ENODEV;
-                return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
-        case SIOCSIFTXQLEN:
-                if (ifr->ifr_qlen < 0)
-                        return -EINVAL;
-                dev->tx_queue_len = ifr->ifr_qlen;
-                return 0;
-        case SIOCSIFNAME:
-                ifr->ifr_newname[IFNAMSIZ-1] = '\0';
-                return dev_change_name(dev, ifr->ifr_newname);
-        case SIOCSHWTSTAMP:
-                err = net_hwtstamp_validate(ifr);
-                if (err)
-                        return err;
-                /* fall through */
-        /*
-         *      Unknown or private ioctl
-         */
-        default:
-                if ((cmd >= SIOCDEVPRIVATE &&
-                    cmd <= SIOCDEVPRIVATE + 15) ||
-                    cmd == SIOCBONDENSLAVE ||
-                    cmd == SIOCBONDRELEASE ||
-                    cmd == SIOCBONDSETHWADDR ||
-                    cmd == SIOCBONDSLAVEINFOQUERY ||
-                    cmd == SIOCBONDINFOQUERY ||
-                    cmd == SIOCBONDCHANGEACTIVE ||
-                    cmd == SIOCGMIIPHY ||
-                    cmd == SIOCGMIIREG ||
-                    cmd == SIOCSMIIREG ||
-                    cmd == SIOCBRADDIF ||
-                    cmd == SIOCBRDELIF ||
-                    cmd == SIOCSHWTSTAMP ||
-                    cmd == SIOCWANDEV) {
-                        err = -EOPNOTSUPP;
-                        if (ops->ndo_do_ioctl) {
-                                if (netif_device_present(dev))
-                                        err = ops->ndo_do_ioctl(dev, ifr, cmd);
-                                else
-                                        err = -ENODEV;
-                        }
-                } else
-                        err = -EINVAL;
-        }
-        return err;
-}
-/*
- *      This function handles all "interface"-type I/O control requests. The actual
- *      'doing' part of this is dev_ifsioc above.
- */
 /**
- *      dev_ioctl       -       network device ioctl
+ *      dev_change_carrier - Change device carrier
- *      @net: the applicable net namespace
+ *      @dev: device
- *      @cmd: command to issue
+ *      @new_carries: new value
- *      @arg: pointer to a struct ifreq in user space
 *
- *      Issue ioctl functions to devices. This is normally called by the
+ *      Change device carrier
- *      user space syscall interfaces but can sometimes be useful for
- *      other purposes. The return value is the return from the syscall if
- *      positive or a negative errno code on error.
 */
+int dev_change_carrier(struct net_device *dev, bool new_carrier)
-int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 {
-        struct ifreq ifr;
+        const struct net_device_ops *ops = dev->netdev_ops;
-        int ret;
-        char *colon;
-        /* One special case: SIOCGIFCONF takes ifconf argument
-           and requires shared lock, because it sleeps writing
-           to user space.
-         */
-        if (cmd == SIOCGIFCONF) {
-                rtnl_lock();
-                ret = dev_ifconf(net, (char __user *) arg);
-                rtnl_unlock();
-                return ret;
-        }
-        if (cmd == SIOCGIFNAME)
-                return dev_ifname(net, (struct ifreq __user *)arg);
-        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
-                return -EFAULT;
-        ifr.ifr_name[IFNAMSIZ-1] = 0;
-        colon = strchr(ifr.ifr_name, ':');
-        if (colon)
-                *colon = 0;
-        /*
-         *      See which interface the caller is talking about.
-         */
-        switch (cmd) {
-        /*
-         *      These ioctl calls:
-         *      - can be done by all.
-         *      - atomic and do not require locking.
-         *      - return a value
-         */
-        case SIOCGIFFLAGS:
-        case SIOCGIFMETRIC:
-        case SIOCGIFMTU:
-        case SIOCGIFHWADDR:
-        case SIOCGIFSLAVE:
-        case SIOCGIFMAP:
-        case SIOCGIFINDEX:
-        case SIOCGIFTXQLEN:
-                dev_load(net, ifr.ifr_name);
-                rcu_read_lock();
-                ret = dev_ifsioc_locked(net, &ifr, cmd);
-                rcu_read_unlock();
-                if (!ret) {
-                        if (colon)
-                                *colon = ':';
-                        if (copy_to_user(arg, &ifr,
-                                         sizeof(struct ifreq)))
-                                ret = -EFAULT;
-                }
-                return ret;
-        case SIOCETHTOOL:
-                dev_load(net, ifr.ifr_name);
-                rtnl_lock();
-                ret = dev_ethtool(net, &ifr);
-                rtnl_unlock();
-                if (!ret) {
-                        if (colon)
-                                *colon = ':';
-                        if (copy_to_user(arg, &ifr,
-                                         sizeof(struct ifreq)))
-                                ret = -EFAULT;
-                }
-                return ret;
-        /*
-         *      These ioctl calls:
-         *      - require superuser power.
-         *      - require strict serialization.
-         *      - return a value
-         */
-        case SIOCGMIIPHY:
-        case SIOCGMIIREG:
-        case SIOCSIFNAME:
-                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-                        return -EPERM;
-                dev_load(net, ifr.ifr_name);
-                rtnl_lock();
-                ret = dev_ifsioc(net, &ifr, cmd);
-                rtnl_unlock();
-                if (!ret) {
-                        if (colon)
-                                *colon = ':';
-                        if (copy_to_user(arg, &ifr,
-                                         sizeof(struct ifreq)))
-                                ret = -EFAULT;
-                }
-                return ret;
-        /*
-         *      These ioctl calls:
-         *      - require superuser power.
-         *      - require strict serialization.
-         *      - do not return a value
-         */
-        case SIOCSIFMAP:
-        case SIOCSIFTXQLEN:
-                if (!capable(CAP_NET_ADMIN))
-                        return -EPERM;
-                /* fall through */
-        /*
-         *      These ioctl calls:
-         *      - require local superuser power.
-         *      - require strict serialization.
-         *      - do not return a value
-         */
-        case SIOCSIFFLAGS:
-        case SIOCSIFMETRIC:
-        case SIOCSIFMTU:
-        case SIOCSIFHWADDR:
-        case SIOCSIFSLAVE:
-        case SIOCADDMULTI:
-        case SIOCDELMULTI:
-        case SIOCSIFHWBROADCAST:
-        case SIOCSMIIREG:
-        case SIOCBONDENSLAVE:
-        case SIOCBONDRELEASE:
-        case SIOCBONDSETHWADDR:
-        case SIOCBONDCHANGEACTIVE:
-        case SIOCBRADDIF:
-        case SIOCBRDELIF:
-        case SIOCSHWTSTAMP:
-                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-                        return -EPERM;
-                /* fall through */
-        case SIOCBONDSLAVEINFOQUERY:
-        case SIOCBONDINFOQUERY:
-                dev_load(net, ifr.ifr_name);
-                rtnl_lock();
-                ret = dev_ifsioc(net, &ifr, cmd);
-                rtnl_unlock();
-                return ret;
-        case SIOCGIFMEM:
-                /* Get the per device memory space. We can add this but
-                 * currently do not support it */
-        case SIOCSIFMEM:
-                /* Set the per device memory buffer space.
-                 * Not applicable in our case */
-        case SIOCSIFLINK:
-                return -ENOTTY;
-        /*
+        if (!ops->ndo_change_carrier)
-         *      Unknown or private ioctl.
+                return -EOPNOTSUPP;
-         */
+        if (!netif_device_present(dev))
-        default:
+                return -ENODEV;
-                if (cmd == SIOCWANDEV ||
+        return ops->ndo_change_carrier(dev, new_carrier);
-                    (cmd >= SIOCDEVPRIVATE &&
-                     cmd <= SIOCDEVPRIVATE + 15)) {
-                        dev_load(net, ifr.ifr_name);
-                        rtnl_lock();
-                        ret = dev_ifsioc(net, &ifr, cmd);
-                        rtnl_unlock();
-                        if (!ret && copy_to_user(arg, &ifr,
-                                                 sizeof(struct ifreq)))
-                                ret = -EFAULT;
-                        return ret;
-                }
-                /* Take care of Wireless Extensions */
-                if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
-                        return wext_handle_ioctl(net, &ifr, cmd, arg);
-                return -ENOTTY;
-        }
 }
+EXPORT_SYMBOL(dev_change_carrier);
 /**
 *      dev_new_index   -       allocate an ifindex
@@ -5482,11 +4882,15 @@ static void rollback_registered_many(struct list_head *head)
                if (dev->netdev_ops->ndo_uninit)
                        dev->netdev_ops->ndo_uninit(dev);
-                /* Notifier chain MUST detach us from master device. */
+                /* Notifier chain MUST detach us all upper devices. */
-                WARN_ON(dev->master);
+                WARN_ON(netdev_has_any_upper_dev(dev));
                /* Remove entries from kobject tree */
                netdev_unregister_kobject(dev);
+#ifdef CONFIG_XPS
+                /* Remove XPS queueing entries */
+                netif_reset_xps_queues_gt(dev, 0);
+#endif
        }
        synchronize_net();
@@ -5664,10 +5068,9 @@ static int netif_alloc_rx_queues(struct net_device *dev)
        BUG_ON(count < 1);
        rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
-        if (!rx) {
+        if (!rx)
-                pr_err("netdev: Unable to allocate %u rx queues\n", count);
                return -ENOMEM;
-        }
        dev->_rx = rx;
        for (i = 0; i < count; i++)
@@ -5698,10 +5101,9 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
        BUG_ON(count < 1);
        tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
-        if (!tx) {
+        if (!tx)
-                pr_err("netdev: Unable to allocate %u tx queues\n", count);
                return -ENOMEM;
-        }
        dev->_tx = tx;
        netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
@@ -5760,6 +5162,14 @@ int register_netdevice(struct net_device *dev)
                }
        }
+        if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) &&
+            (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
+             !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
+                netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
+                ret = -EINVAL;
+                goto err_uninit;
+        }
        ret = -EBUSY;
        if (!dev->ifindex)
                dev->ifindex = dev_new_index(net);
@@ -5815,6 +5225,13 @@ int register_netdevice(struct net_device *dev)
        list_netdevice(dev);
        add_device_randomness(dev->dev_addr, dev->addr_len);
+        /* If the device has permanent device address, driver should
+         * set dev_addr and also addr_assign_type should be set to
+         * NET_ADDR_PERM (default value).
+         */
+        if (dev->addr_assign_type == NET_ADDR_PERM)
+                memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
        /* Notify protocols, that a new device appeared. */
        ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
        ret = notifier_to_errno(ret);
@@ -6121,6 +5538,14 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 static const struct ethtool_ops default_ethtool_ops;
+void netdev_set_default_ethtool_ops(struct net_device *dev,
+                                    const struct ethtool_ops *ops)
+{
+        if (dev->ethtool_ops == &default_ethtool_ops)
+                dev->ethtool_ops = ops;
+}
+EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
 /**
 *      alloc_netdev_mqs - allocate network device
 *      @sizeof_priv:   size of private data to allocate space for
@@ -6165,10 +5590,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        alloc_size += NETDEV_ALIGN - 1;
        p = kzalloc(alloc_size, GFP_KERNEL);
-        if (!p) {
+        if (!p)
-                pr_err("alloc_netdev: Unable to allocate device\n");
                return NULL;
-        }
        dev = PTR_ALIGN(p, NETDEV_ALIGN);
        dev->padded = (char *)dev - (char *)p;
@@ -6191,6 +5614,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        INIT_LIST_HEAD(&dev->napi_list);
        INIT_LIST_HEAD(&dev->unreg_list);
        INIT_LIST_HEAD(&dev->link_watch_list);
+        INIT_LIST_HEAD(&dev->upper_dev_list);
        dev->priv_flags = IFF_XMIT_DST_RELEASE;
        setup(dev);
@@ -6834,19 +6258,9 @@ static int __init net_dev_init(void)
        hotcpu_notifier(dev_cpu_callback, 0);
        dst_init();
-        dev_mcast_init();
        rc = 0;
 out:
        return rc;
 }
 subsys_initcall(net_dev_init);
-static int __init initialize_hashrnd(void)
-{
-        get_random_bytes(&hashrnd, sizeof(hashrnd));
-        return 0;
-}
-late_initcall_sync(initialize_hashrnd);