Merge branch 'master' into for-2.6.35

Conflicts: fs/ext3/fsync.c Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
author: Jens Axboe <jens.axboe@oracle.com> 2010-05-21 15:27:26 -0400
committer: Jens Axboe <jens.axboe@oracle.com> 2010-05-21 15:27:26 -0400
commit: ee9a3607fb03e804ddf624544105f4e34260c380 (patch)
tree: ce41b6e0fa10982a306f6c142a92dbf3c9961284 /net/core
parent: b492e95be0ae672922f4734acf3f5d35c30be948 (diff)
parent: d515e86e639890b33a09390d062b0831664f04a2 (diff)
20 files changed, 2731 insertions, 1441 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 08791ac3e05a..51c3eec850ef 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
-obj-y                += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
+obj-y                += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
                        neighbour.o rtnetlink.o utils.o link_watch.o filter.o
 obj-$(CONFIG_XFRM) += flow.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 2dccd4ee591b..e0097531417a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -86,7 +86,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
        int error;
        DEFINE_WAIT_FUNC(wait, receiver_wake_function);
-        prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+        prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
        /* Socket errors? */
        error = sock_error(sk);
@@ -115,7 +115,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
        error = 0;
        *timeo_p = schedule_timeout(*timeo_p);
 out:
-        finish_wait(sk->sk_sleep, &wait);
+        finish_wait(sk_sleep(sk), &wait);
        return error;
 interrupted:
        error = sock_intr_errno(*timeo_p);
@@ -229,9 +229,18 @@ EXPORT_SYMBOL(skb_free_datagram);
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
 {
-        lock_sock(sk);
+        if (likely(atomic_read(&skb->users) == 1))
-        skb_free_datagram(sk, skb);
+                smp_rmb();
-        release_sock(sk);
+        else if (likely(!atomic_dec_and_test(&skb->users)))
+                return;
+        lock_sock_bh(sk);
+        skb_orphan(skb);
+        sk_mem_reclaim_partial(sk);
+        unlock_sock_bh(sk);
+        /* skb is now orphaned, can be freed outside of locked section */
+        __kfree_skb(skb);
 }
 EXPORT_SYMBOL(skb_free_datagram_locked);
@@ -726,7 +735,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
        struct sock *sk = sock->sk;
        unsigned int mask;
-        sock_poll_wait(file, sk->sk_sleep, wait);
+        sock_poll_wait(file, sk_sleep(sk), wait);
        mask = 0;
        /* exceptional events? */
diff --git a/net/core/dev.c b/net/core/dev.c
index f769098774b7..d273e4e3ecdc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -130,6 +130,7 @@
 #include <linux/jhash.h>
 #include <linux/random.h>
 #include <trace/events/napi.h>
+#include <linux/pci.h>
 #include "net-sysfs.h"
@@ -207,6 +208,20 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
        return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
 }
+static inline void rps_lock(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+        spin_lock(&sd->input_pkt_queue.lock);
+#endif
+}
+static inline void rps_unlock(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+        spin_unlock(&sd->input_pkt_queue.lock);
+#endif
+}
 /* Device list insertion */
 static int list_netdevice(struct net_device *dev)
 {
@@ -249,7 +264,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
 *      queue in the local softnet handler.
 */
-DEFINE_PER_CPU(struct softnet_data, softnet_data);
+DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 EXPORT_PER_CPU_SYMBOL(softnet_data);
 #ifdef CONFIG_LOCKDEP
@@ -773,14 +788,17 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
-        struct net_device *dev;
+        struct net_device *dev, *ret = NULL;
-        rtnl_lock();
+        rcu_read_lock();
-        dev = __dev_getfirstbyhwtype(net, type);
+        for_each_netdev_rcu(net, dev)
-        if (dev)
+                if (dev->type == type) {
-                dev_hold(dev);
+                        dev_hold(dev);
-        rtnl_unlock();
+                        ret = dev;
-        return dev;
+                        break;
+                }
+        rcu_read_unlock();
+        return ret;
 }
 EXPORT_SYMBOL(dev_getfirstbyhwtype);
@@ -984,15 +1002,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
                return err;
 rollback:
-        /* For now only devices in the initial network namespace
+        ret = device_rename(&dev->dev, dev->name);
-         * are in sysfs.
+        if (ret) {
-         */
+                memcpy(dev->name, oldname, IFNAMSIZ);
-        if (net_eq(net, &init_net)) {
+                return ret;
-                ret = device_rename(&dev->dev, dev->name);
-                if (ret) {
-                        memcpy(dev->name, oldname, IFNAMSIZ);
-                        return ret;
-                }
        }
        write_lock_bh(&dev_base_lock);
@@ -1085,9 +1098,9 @@ void netdev_state_change(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_state_change);
-void netdev_bonding_change(struct net_device *dev, unsigned long event)
+int netdev_bonding_change(struct net_device *dev, unsigned long event)
 {
-        call_netdevice_notifiers(event, dev);
+        return call_netdevice_notifiers(event, dev);
 }
 EXPORT_SYMBOL(netdev_bonding_change);
@@ -1417,6 +1430,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 {
+        ASSERT_RTNL();
        return raw_notifier_call_chain(&netdev_chain, val, dev);
 }
@@ -1435,7 +1449,7 @@ void net_disable_timestamp(void)
 }
 EXPORT_SYMBOL(net_disable_timestamp);
-static inline void net_timestamp(struct sk_buff *skb)
+static inline void net_timestamp_set(struct sk_buff *skb)
 {
        if (atomic_read(&netstamp_needed))
                __net_timestamp(skb);
@@ -1443,6 +1457,12 @@ static inline void net_timestamp(struct sk_buff *skb)
                skb->tstamp.tv64 = 0;
 }
+static inline void net_timestamp_check(struct sk_buff *skb)
+{
+        if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
+                __net_timestamp(skb);
+}
 /**
 * dev_forward_skb - loopback an skb to another netif
 *
@@ -1451,7 +1471,7 @@ static inline void net_timestamp(struct sk_buff *skb)
 *
 * return values:
 *      NET_RX_SUCCESS  (no congestion)
- *      NET_RX_DROP     (packet was dropped)
+ *      NET_RX_DROP     (packet was dropped, but freed)
 *
 * dev_forward_skb can be used for injecting an skb from the
 * start_xmit function of one device into the receive queue
@@ -1465,12 +1485,11 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 {
        skb_orphan(skb);
-        if (!(dev->flags & IFF_UP))
+        if (!(dev->flags & IFF_UP) ||
-                return NET_RX_DROP;
+            (skb->len > (dev->mtu + dev->hard_header_len))) {
+                kfree_skb(skb);
-        if (skb->len > (dev->mtu + dev->hard_header_len))
                return NET_RX_DROP;
+        }
        skb_set_dev(skb, dev);
        skb->tstamp.tv64 = 0;
        skb->pkt_type = PACKET_HOST;
@@ -1490,9 +1509,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 #ifdef CONFIG_NET_CLS_ACT
        if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
-                net_timestamp(skb);
+                net_timestamp_set(skb);
 #else
-        net_timestamp(skb);
+        net_timestamp_set(skb);
 #endif
        rcu_read_lock();
@@ -1538,8 +1557,9 @@ static inline void __netif_reschedule(struct Qdisc *q)
        local_irq_save(flags);
        sd = &__get_cpu_var(softnet_data);
-        q->next_sched = sd->output_queue;
+        q->next_sched = NULL;
-        sd->output_queue = q;
+        *sd->output_queue_tailp = q;
+        sd->output_queue_tailp = &q->next_sched;
        raise_softirq_irqoff(NET_TX_SOFTIRQ);
        local_irq_restore(flags);
 }
@@ -1784,18 +1804,27 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
 * 2. No high memory really exists on this machine.
 */
-static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_HIGHMEM
        int i;
+        if (!(dev->features & NETIF_F_HIGHDMA)) {
+                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+                        if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+                                return 1;
+        }
-        if (dev->features & NETIF_F_HIGHDMA)
+        if (PCI_DMA_BUS_IS_PHYS) {
-                return 0;
+                struct device *pdev = dev->dev.parent;
-        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-                if (PageHighMem(skb_shinfo(skb)->frags[i].page))
-                        return 1;
+                if (!pdev)
+                        return 0;
+                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                        dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
+                        if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
+                                return 1;
+                }
+        }
 #endif
        return 0;
 }
@@ -1853,6 +1882,17 @@ static int dev_gso_segment(struct sk_buff *skb)
        return 0;
 }
+/*
+ * Try to orphan skb early, right before transmission by the device.
+ * We cannot orphan skb if tx timestamp is requested, since
+ * drivers need to call skb_tstamp_tx() to send the timestamp.
+ */
+static inline void skb_orphan_try(struct sk_buff *skb)
+{
+        if (!skb_tx(skb)->flags)
+                skb_orphan(skb);
+}
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                        struct netdev_queue *txq)
 {
@@ -1863,13 +1903,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                if (!list_empty(&ptype_all))
                        dev_queue_xmit_nit(skb, dev);
-                if (netif_needs_gso(dev, skb)) {
-                        if (unlikely(dev_gso_segment(skb)))
-                                goto out_kfree_skb;
-                        if (skb->next)
-                                goto gso;
-                }
                /*
                 * If device doesnt need skb->dst, release it right now while
                 * its hot in this cpu cache
@@ -1877,23 +1910,18 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
                        skb_dst_drop(skb);
+                skb_orphan_try(skb);
+                if (netif_needs_gso(dev, skb)) {
+                        if (unlikely(dev_gso_segment(skb)))
+                                goto out_kfree_skb;
+                        if (skb->next)
+                                goto gso;
+                }
                rc = ops->ndo_start_xmit(skb, dev);
                if (rc == NETDEV_TX_OK)
                        txq_trans_update(txq);
-                /*
-                 * TODO: if skb_orphan() was called by
-                 * dev->hard_start_xmit() (for example, the unmodified
-                 * igb driver does that; bnx2 doesn't), then
-                 * skb_tx_software_timestamp() will be unable to send
-                 * back the time stamp.
-                 *
-                 * How can this be prevented? Always create another
-                 * reference to the socket before calling
-                 * dev->hard_start_xmit()? Prevent that skb_orphan()
-                 * does anything in dev->hard_start_xmit() by clearing
-                 * the skb destructor before the call and restoring it
-                 * afterwards, then doing the skb_orphan() ourselves?
-                 */
                return rc;
        }
@@ -1932,7 +1960,7 @@ out_kfree_skb:
        return rc;
 }
-static u32 skb_tx_hashrnd;
+static u32 hashrnd __read_mostly;
 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 {
@@ -1948,9 +1976,9 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
        if (skb->sk && skb->sk->sk_hash)
                hash = skb->sk->sk_hash;
        else
-                hash = skb->protocol;
+                hash = (__force u16) skb->protocol;
-        hash = jhash_1word(hash, skb_tx_hashrnd);
+        hash = jhash_1word(hash, hashrnd);
        return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
 }
@@ -1960,10 +1988,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 {
        if (unlikely(queue_index >= dev->real_num_tx_queues)) {
                if (net_ratelimit()) {
-                        WARN(1, "%s selects TX queue %d, but "
+                        pr_warning("%s selects TX queue %d, but "
-                             "real number of TX queues is %d\n",
+                                "real number of TX queues is %d\n",
-                             dev->name, queue_index,
+                                dev->name, queue_index, dev->real_num_tx_queues);
-                             dev->real_num_tx_queues);
                }
                return 0;
        }
@@ -1990,7 +2017,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
                                queue_index = skb_tx_hash(dev, skb);
                        if (sk) {
-                                struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache);
+                                struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
                                if (dst && skb_dst(skb) == dst)
                                        sk_tx_queue_set(sk, queue_index);
@@ -2020,6 +2047,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                 * waiting to be sent out; and the qdisc is not running -
                 * xmit the skb directly.
                 */
+                if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
+                        skb_dst_force(skb);
                __qdisc_update_bstats(q, skb->len);
                if (sch_direct_xmit(skb, q, dev, txq, root_lock))
                        __qdisc_run(q);
@@ -2028,6 +2057,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                rc = NET_XMIT_SUCCESS;
        } else {
+                skb_dst_force(skb);
                rc = qdisc_enqueue_root(skb, q);
                qdisc_run(q);
        }
@@ -2175,11 +2205,249 @@ EXPORT_SYMBOL(dev_queue_xmit);
  =======================================================================*/
 int netdev_max_backlog __read_mostly = 1000;
+int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
-DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
+/* Called with irq disabled */
+static inline void ____napi_schedule(struct softnet_data *sd,
+                                     struct napi_struct *napi)
+{
+        list_add_tail(&napi->poll_list, &sd->poll_list);
+        __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+}
+#ifdef CONFIG_RPS
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
+EXPORT_SYMBOL(rps_sock_flow_table);
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ * rcu_read_lock must be held on entry.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+                       struct rps_dev_flow **rflowp)
+{
+        struct ipv6hdr *ip6;
+        struct iphdr *ip;
+        struct netdev_rx_queue *rxqueue;
+        struct rps_map *map;
+        struct rps_dev_flow_table *flow_table;
+        struct rps_sock_flow_table *sock_flow_table;
+        int cpu = -1;
+        u8 ip_proto;
+        u16 tcpu;
+        u32 addr1, addr2, ihl;
+        union {
+                u32 v32;
+                u16 v16[2];
+        } ports;
+        if (skb_rx_queue_recorded(skb)) {
+                u16 index = skb_get_rx_queue(skb);
+                if (unlikely(index >= dev->num_rx_queues)) {
+                        if (net_ratelimit()) {
+                                pr_warning("%s received packet on queue "
+                                        "%u, but number of RX queues is %u\n",
+                                        dev->name, index, dev->num_rx_queues);
+                        }
+                        goto done;
+                }
+                rxqueue = dev->_rx + index;
+        } else
+                rxqueue = dev->_rx;
+        if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
+                goto done;
+        if (skb->rxhash)
+                goto got_hash; /* Skip hash computation on packet header */
+        switch (skb->protocol) {
+        case __constant_htons(ETH_P_IP):
+                if (!pskb_may_pull(skb, sizeof(*ip)))
+                        goto done;
+                ip = (struct iphdr *) skb->data;
+                ip_proto = ip->protocol;
+                addr1 = (__force u32) ip->saddr;
+                addr2 = (__force u32) ip->daddr;
+                ihl = ip->ihl;
+                break;
+        case __constant_htons(ETH_P_IPV6):
+                if (!pskb_may_pull(skb, sizeof(*ip6)))
+                        goto done;
+                ip6 = (struct ipv6hdr *) skb->data;
+                ip_proto = ip6->nexthdr;
+                addr1 = (__force u32) ip6->saddr.s6_addr32[3];
+                addr2 = (__force u32) ip6->daddr.s6_addr32[3];
+                ihl = (40 >> 2);
+                break;
+        default:
+                goto done;
+        }
+        switch (ip_proto) {
+        case IPPROTO_TCP:
+        case IPPROTO_UDP:
+        case IPPROTO_DCCP:
+        case IPPROTO_ESP:
+        case IPPROTO_AH:
+        case IPPROTO_SCTP:
+        case IPPROTO_UDPLITE:
+                if (pskb_may_pull(skb, (ihl * 4) + 4)) {
+                        ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
+                        if (ports.v16[1] < ports.v16[0])
+                                swap(ports.v16[0], ports.v16[1]);
+                        break;
+                }
+        default:
+                ports.v32 = 0;
+                break;
+        }
+        /* get a consistent hash (same value on both flow directions) */
+        if (addr2 < addr1)
+                swap(addr1, addr2);
+        skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
+        if (!skb->rxhash)
+                skb->rxhash = 1;
+got_hash:
+        flow_table = rcu_dereference(rxqueue->rps_flow_table);
+        sock_flow_table = rcu_dereference(rps_sock_flow_table);
+        if (flow_table && sock_flow_table) {
+                u16 next_cpu;
+                struct rps_dev_flow *rflow;
+                rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
+                tcpu = rflow->cpu;
+                next_cpu = sock_flow_table->ents[skb->rxhash &
+                    sock_flow_table->mask];
+                /*
+                 * If the desired CPU (where last recvmsg was done) is
+                 * different from current CPU (one in the rx-queue flow
+                 * table entry), switch if one of the following holds:
+                 *   - Current CPU is unset (equal to RPS_NO_CPU).
+                 *   - Current CPU is offline.
+                 *   - The current CPU's queue tail has advanced beyond the
+                 *     last packet that was enqueued using this table entry.
+                 *     This guarantees that all previous packets for the flow
+                 *     have been dequeued, thus preserving in order delivery.
+                 */
+                if (unlikely(tcpu != next_cpu) &&
+                    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
+                     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
+                      rflow->last_qtail)) >= 0)) {
+                        tcpu = rflow->cpu = next_cpu;
+                        if (tcpu != RPS_NO_CPU)
+                                rflow->last_qtail = per_cpu(softnet_data,
+                                    tcpu).input_queue_head;
+                }
+                if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
+                        *rflowp = rflow;
+                        cpu = tcpu;
+                        goto done;
+                }
+        }
+        map = rcu_dereference(rxqueue->rps_map);
+        if (map) {
+                tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+                if (cpu_online(tcpu)) {
+                        cpu = tcpu;
+                        goto done;
+                }
+        }
+done:
+        return cpu;
+}
+/* Called from hardirq (IPI) context */
+static void rps_trigger_softirq(void *data)
+{
+        struct softnet_data *sd = data;
+        ____napi_schedule(sd, &sd->backlog);
+        sd->received_rps++;
+}
+#endif /* CONFIG_RPS */
+/*
+ * Check if this softnet_data structure is another cpu one
+ * If yes, queue it to our IPI list and return 1
+ * If no, return 0
+ */
+static int rps_ipi_queued(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+        struct softnet_data *mysd = &__get_cpu_var(softnet_data);
+        if (sd != mysd) {
+                sd->rps_ipi_next = mysd->rps_ipi_list;
+                mysd->rps_ipi_list = sd;
+                __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+                return 1;
+        }
+#endif /* CONFIG_RPS */
+        return 0;
+}
+/*
+ * enqueue_to_backlog is called to queue an skb to a per CPU backlog
+ * queue (may be a remote CPU queue).
+ */
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+                              unsigned int *qtail)
+{
+        struct softnet_data *sd;
+        unsigned long flags;
+        sd = &per_cpu(softnet_data, cpu);
+        local_irq_save(flags);
+        rps_lock(sd);
+        if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
+                if (skb_queue_len(&sd->input_pkt_queue)) {
+enqueue:
+                        __skb_queue_tail(&sd->input_pkt_queue, skb);
+#ifdef CONFIG_RPS
+                        *qtail = sd->input_queue_head +
+                                        skb_queue_len(&sd->input_pkt_queue);
+#endif
+                        rps_unlock(sd);
+                        local_irq_restore(flags);
+                        return NET_RX_SUCCESS;
+                }
+                /* Schedule NAPI for backlog device
+                 * We can use non atomic operation since we own the queue lock
+                 */
+                if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
+                        if (!rps_ipi_queued(sd))
+                                ____napi_schedule(sd, &sd->backlog);
+                }
+                goto enqueue;
+        }
+        sd->dropped++;
+        rps_unlock(sd);
+        local_irq_restore(flags);
+        kfree_skb(skb);
+        return NET_RX_DROP;
+}
 /**
 *      netif_rx        -       post buffer to the network code
@@ -2198,41 +2466,38 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 int netif_rx(struct sk_buff *skb)
 {
-        struct softnet_data *queue;
+        int ret;
-        unsigned long flags;
        /* if netpoll wants it, pretend we never saw it */
        if (netpoll_rx(skb))
                return NET_RX_DROP;
-        if (!skb->tstamp.tv64)
+        if (netdev_tstamp_prequeue)
-                net_timestamp(skb);
+                net_timestamp_check(skb);
-        /*
+#ifdef CONFIG_RPS
-         * The code is rearranged so that the path is the most
+        {
-         * short when CPU is congested, but is still operating.
+                struct rps_dev_flow voidflow, *rflow = &voidflow;
-         */
+                int cpu;
-        local_irq_save(flags);
-        queue = &__get_cpu_var(softnet_data);
-        __get_cpu_var(netdev_rx_stat).total++;
+                rcu_read_lock();
-        if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
-                if (queue->input_pkt_queue.qlen) {
-enqueue:
-                        __skb_queue_tail(&queue->input_pkt_queue, skb);
-                        local_irq_restore(flags);
-                        return NET_RX_SUCCESS;
-                }
-                napi_schedule(&queue->backlog);
+                cpu = get_rps_cpu(skb->dev, skb, &rflow);
-                goto enqueue;
+                if (cpu < 0)
-        }
+                        cpu = smp_processor_id();
-        __get_cpu_var(netdev_rx_stat).dropped++;
+                ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
-        local_irq_restore(flags);
-        kfree_skb(skb);
+                rcu_read_unlock();
-        return NET_RX_DROP;
+        }
+#else
+        {
+                unsigned int qtail;
+                ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
+                put_cpu();
+        }
+#endif
+        return ret;
 }
 EXPORT_SYMBOL(netif_rx);
@@ -2277,6 +2542,7 @@ static void net_tx_action(struct softirq_action *h)
                local_irq_disable();
                head = sd->output_queue;
                sd->output_queue = NULL;
+                sd->output_queue_tailp = &sd->output_queue;
                local_irq_enable();
                while (head) {
@@ -2353,7 +2619,8 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
 #endif
 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
+struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
+                                             struct sk_buff *skb) __read_mostly;
 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
@@ -2361,14 +2628,17 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
                                             int *ret,
                                             struct net_device *orig_dev)
 {
-        if (skb->dev->macvlan_port == NULL)
+        struct macvlan_port *port;
+        port = rcu_dereference(skb->dev->macvlan_port);
+        if (!port)
                return skb;
        if (*pt_prev) {
                *ret = deliver_skb(skb, *pt_prev, orig_dev);
                *pt_prev = NULL;
        }
-        return macvlan_handle_frame_hook(skb);
+        return macvlan_handle_frame_hook(port, skb);
 }
 #else
 #define handle_macvlan(skb, pt_prev, ret, orig_dev)     (skb)
@@ -2469,22 +2739,56 @@ void netif_nit_deliver(struct sk_buff *skb)
        rcu_read_unlock();
 }
-/**
+static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
- *      netif_receive_skb - process receive buffer from network
+                                              struct net_device *master)
- *      @skb: buffer to process
+{
- *
+        if (skb->pkt_type == PACKET_HOST) {
- *      netif_receive_skb() is the main receive data processing function.
+                u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
- *      It always succeeds. The buffer may be dropped during processing
- *      for congestion control or by the protocol layers.
+                memcpy(dest, master->dev_addr, ETH_ALEN);
- *
+        }
- *      This function may only be called from softirq context and interrupts
+}
- *      should be enabled.
- *
+/* On bonding slaves other than the currently active slave, suppress
- *      Return values (usually ignored):
+ * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
- *      NET_RX_SUCCESS: no congestion
+ * ARP on active-backup slaves with arp_validate enabled.
- *      NET_RX_DROP: packet was dropped
 */
-int netif_receive_skb(struct sk_buff *skb)
+int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
+{
+        struct net_device *dev = skb->dev;
+        if (master->priv_flags & IFF_MASTER_ARPMON)
+                dev->last_rx = jiffies;
+        if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
+                /* Do address unmangle. The local destination address
+                 * will be always the one master has. Provides the right
+                 * functionality in a bridge.
+                 */
+                skb_bond_set_mac_by_master(skb, master);
+        }
+        if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
+                if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
+                    skb->protocol == __cpu_to_be16(ETH_P_ARP))
+                        return 0;
+                if (master->priv_flags & IFF_MASTER_ALB) {
+                        if (skb->pkt_type != PACKET_BROADCAST &&
+                            skb->pkt_type != PACKET_MULTICAST)
+                                return 0;
+                }
+                if (master->priv_flags & IFF_MASTER_8023AD &&
+                    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
+                        return 0;
+                return 1;
+        }
+        return 0;
+}
+EXPORT_SYMBOL(__skb_bond_should_drop);
+static int __netif_receive_skb(struct sk_buff *skb)
 {
        struct packet_type *ptype, *pt_prev;
        struct net_device *orig_dev;
@@ -2494,8 +2798,8 @@ int netif_receive_skb(struct sk_buff *skb)
        int ret = NET_RX_DROP;
        __be16 type;
-        if (!skb->tstamp.tv64)
+        if (!netdev_tstamp_prequeue)
-                net_timestamp(skb);
+                net_timestamp_check(skb);
        if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
                return NET_RX_SUCCESS;
@@ -2517,7 +2821,7 @@ int netif_receive_skb(struct sk_buff *skb)
                        skb->dev = master;
        }
-        __get_cpu_var(netdev_rx_stat).total++;
+        __get_cpu_var(softnet_data).processed++;
        skb_reset_network_header(skb);
        skb_reset_transport_header(skb);
@@ -2595,20 +2899,77 @@ out:
        rcu_read_unlock();
        return ret;
 }
+/**
+ *      netif_receive_skb - process receive buffer from network
+ *      @skb: buffer to process
+ *
+ *      netif_receive_skb() is the main receive data processing function.
+ *      It always succeeds. The buffer may be dropped during processing
+ *      for congestion control or by the protocol layers.
+ *
+ *      This function may only be called from softirq context and interrupts
+ *      should be enabled.
+ *
+ *      Return values (usually ignored):
+ *      NET_RX_SUCCESS: no congestion
+ *      NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb(struct sk_buff *skb)
+{
+        if (netdev_tstamp_prequeue)
+                net_timestamp_check(skb);
+#ifdef CONFIG_RPS
+        {
+                struct rps_dev_flow voidflow, *rflow = &voidflow;
+                int cpu, ret;
+                rcu_read_lock();
+                cpu = get_rps_cpu(skb->dev, skb, &rflow);
+                if (cpu >= 0) {
+                        ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+                        rcu_read_unlock();
+                } else {
+                        rcu_read_unlock();
+                        ret = __netif_receive_skb(skb);
+                }
+                return ret;
+        }
+#else
+        return __netif_receive_skb(skb);
+#endif
+}
 EXPORT_SYMBOL(netif_receive_skb);
-/* Network device is going away, flush any packets still pending  */
+/* Network device is going away, flush any packets still pending
+ * Called with irqs disabled.
+ */
 static void flush_backlog(void *arg)
 {
        struct net_device *dev = arg;
-        struct softnet_data *queue = &__get_cpu_var(softnet_data);
+        struct softnet_data *sd = &__get_cpu_var(softnet_data);
        struct sk_buff *skb, *tmp;
-        skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
+        rps_lock(sd);
+        skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
                if (skb->dev == dev) {
-                        __skb_unlink(skb, &queue->input_pkt_queue);
+                        __skb_unlink(skb, &sd->input_pkt_queue);
                        kfree_skb(skb);
+                        input_queue_head_add(sd, 1);
                }
+        }
+        rps_unlock(sd);
+        skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
+                if (skb->dev == dev) {
+                        __skb_unlink(skb, &sd->process_queue);
+                        kfree_skb(skb);
+                }
+        }
 }
 static int napi_gro_complete(struct sk_buff *skb)
@@ -2911,27 +3272,85 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_gro_frags);
+/*
+ * net_rps_action sends any pending IPI's for rps.
+ * Note: called with local irq disabled, but exits with local irq enabled.
+ */
+static void net_rps_action_and_irq_enable(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+        struct softnet_data *remsd = sd->rps_ipi_list;
+        if (remsd) {
+                sd->rps_ipi_list = NULL;
+                local_irq_enable();
+                /* Send pending IPI's to kick RPS processing on remote cpus. */
+                while (remsd) {
+                        struct softnet_data *next = remsd->rps_ipi_next;
+                        if (cpu_online(remsd->cpu))
+                                __smp_call_function_single(remsd->cpu,
+                                                           &remsd->csd, 0);
+                        remsd = next;
+                }
+        } else
+#endif
+                local_irq_enable();
+}
 static int process_backlog(struct napi_struct *napi, int quota)
 {
        int work = 0;
-        struct softnet_data *queue = &__get_cpu_var(softnet_data);
+        struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
-        unsigned long start_time = jiffies;
+#ifdef CONFIG_RPS
+        /* Check if we have pending ipi, its better to send them now,
+         * not waiting net_rx_action() end.
+         */
+        if (sd->rps_ipi_list) {
+                local_irq_disable();
+                net_rps_action_and_irq_enable(sd);
+        }
+#endif
        napi->weight = weight_p;
-        do {
+        local_irq_disable();
+        while (work < quota) {
                struct sk_buff *skb;
+                unsigned int qlen;
-                local_irq_disable();
+                while ((skb = __skb_dequeue(&sd->process_queue))) {
-                skb = __skb_dequeue(&queue->input_pkt_queue);
-                if (!skb) {
-                        __napi_complete(napi);
                        local_irq_enable();
-                        break;
+                        __netif_receive_skb(skb);
+                        if (++work >= quota)
+                                return work;
+                        local_irq_disable();
                }
-                local_irq_enable();
-                netif_receive_skb(skb);
+                rps_lock(sd);
-        } while (++work < quota && jiffies == start_time);
+                qlen = skb_queue_len(&sd->input_pkt_queue);
+                if (qlen) {
+                        input_queue_head_add(sd, qlen);
+                        skb_queue_splice_tail_init(&sd->input_pkt_queue,
+                                                   &sd->process_queue);
+                }
+                if (qlen < quota - work) {
+                        /*
+                         * Inline a custom version of __napi_complete().
+                         * only current cpu owns and manipulates this napi,
+                         * and NAPI_STATE_SCHED is the only possible flag set on backlog.
+                         * we can use a plain write instead of clear_bit(),
+                         * and we dont need an smp_mb() memory barrier.
+                         */
+                        list_del(&napi->poll_list);
+                        napi->state = 0;
+                        quota = work + qlen;
+                }
+                rps_unlock(sd);
+        }
+        local_irq_enable();
        return work;
 }
@@ -2947,8 +3366,7 @@ void __napi_schedule(struct napi_struct *n)
        unsigned long flags;
        local_irq_save(flags);
-        list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
+        ____napi_schedule(&__get_cpu_var(softnet_data), n);
-        __raise_softirq_irqoff(NET_RX_SOFTIRQ);
        local_irq_restore(flags);
 }
 EXPORT_SYMBOL(__napi_schedule);
@@ -3019,17 +3437,16 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 static void net_rx_action(struct softirq_action *h)
 {
-        struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
+        struct softnet_data *sd = &__get_cpu_var(softnet_data);
        unsigned long time_limit = jiffies + 2;
        int budget = netdev_budget;
        void *have;
        local_irq_disable();
-        while (!list_empty(list)) {
+        while (!list_empty(&sd->poll_list)) {
                struct napi_struct *n;
                int work, weight;
@@ -3047,7 +3464,7 @@ static void net_rx_action(struct softirq_action *h)
                 * entries to the tail of this list, and only ->poll()
                 * calls can remove this head entry from the list.
                 */
-                n = list_first_entry(list, struct napi_struct, poll_list);
+                n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
                have = netpoll_poll_lock(n);
@@ -3082,13 +3499,13 @@ static void net_rx_action(struct softirq_action *h)
                                napi_complete(n);
                                local_irq_disable();
                        } else
-                                list_move_tail(&n->poll_list, list);
+                                list_move_tail(&n->poll_list, &sd->poll_list);
                }
                netpoll_poll_unlock(have);
        }
 out:
-        local_irq_enable();
+        net_rps_action_and_irq_enable(sd);
 #ifdef CONFIG_NET_DMA
        /*
@@ -3101,7 +3518,7 @@ out:
        return;
 softnet_break:
-        __get_cpu_var(netdev_rx_stat).time_squeeze++;
+        sd->time_squeeze++;
        __raise_softirq_irqoff(NET_RX_SOFTIRQ);
        goto out;
 }
@@ -3302,17 +3719,17 @@ static int dev_seq_show(struct seq_file *seq, void *v)
        return 0;
 }
-static struct netif_rx_stats *softnet_get_online(loff_t *pos)
+static struct softnet_data *softnet_get_online(loff_t *pos)
 {
-        struct netif_rx_stats *rc = NULL;
+        struct softnet_data *sd = NULL;
        while (*pos < nr_cpu_ids)
                if (cpu_online(*pos)) {
-                        rc = &per_cpu(netdev_rx_stat, *pos);
+                        sd = &per_cpu(softnet_data, *pos);
                        break;
                } else
                        ++*pos;
-        return rc;
+        return sd;
 }
 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
@@ -3332,12 +3749,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
 static int softnet_seq_show(struct seq_file *seq, void *v)
 {
-        struct netif_rx_stats *s = v;
+        struct softnet_data *sd = v;
-        seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+        seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
-                   s->total, s->dropped, s->time_squeeze, 0,
+                   sd->processed, sd->dropped, sd->time_squeeze, 0,
                   0, 0, 0, 0, /* was fastroute */
-                   s->cpu_collision);
+                   sd->cpu_collision, sd->received_rps);
        return 0;
 }
@@ -3560,11 +3977,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
        slave->master = master;
-        synchronize_net();
+        if (old) {
+                synchronize_net();
-        if (old)
                dev_put(old);
+        }
        if (master)
                slave->flags |= IFF_SLAVE;
        else
@@ -3741,562 +4157,6 @@ void dev_set_rx_mode(struct net_device *dev)
        netif_addr_unlock_bh(dev);
 }
-/* hw addresses list handling functions */
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
-                         int addr_len, unsigned char addr_type)
-{
-        struct netdev_hw_addr *ha;
-        int alloc_size;
-        if (addr_len > MAX_ADDR_LEN)
-                return -EINVAL;
-        list_for_each_entry(ha, &list->list, list) {
-                if (!memcmp(ha->addr, addr, addr_len) &&
-                    ha->type == addr_type) {
-                        ha->refcount++;
-                        return 0;
-                }
-        }
-        alloc_size = sizeof(*ha);
-        if (alloc_size < L1_CACHE_BYTES)
-                alloc_size = L1_CACHE_BYTES;
-        ha = kmalloc(alloc_size, GFP_ATOMIC);
-        if (!ha)
-                return -ENOMEM;
-        memcpy(ha->addr, addr, addr_len);
-        ha->type = addr_type;
-        ha->refcount = 1;
-        ha->synced = false;
-        list_add_tail_rcu(&ha->list, &list->list);
-        list->count++;
-        return 0;
-}
-static void ha_rcu_free(struct rcu_head *head)
-{
-        struct netdev_hw_addr *ha;
-        ha = container_of(head, struct netdev_hw_addr, rcu_head);
-        kfree(ha);
-}
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
-                         int addr_len, unsigned char addr_type)
-{
-        struct netdev_hw_addr *ha;
-        list_for_each_entry(ha, &list->list, list) {
-                if (!memcmp(ha->addr, addr, addr_len) &&
-                    (ha->type == addr_type || !addr_type)) {
-                        if (--ha->refcount)
-                                return 0;
-                        list_del_rcu(&ha->list);
-                        call_rcu(&ha->rcu_head, ha_rcu_free);
-                        list->count--;
-                        return 0;
-                }
-        }
-        return -ENOENT;
-}
-static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
-                                  struct netdev_hw_addr_list *from_list,
-                                  int addr_len,
-                                  unsigned char addr_type)
-{
-        int err;
-        struct netdev_hw_addr *ha, *ha2;
-        unsigned char type;
-        list_for_each_entry(ha, &from_list->list, list) {
-                type = addr_type ? addr_type : ha->type;
-                err = __hw_addr_add(to_list, ha->addr, addr_len, type);
-                if (err)
-                        goto unroll;
-        }
-        return 0;
-unroll:
-        list_for_each_entry(ha2, &from_list->list, list) {
-                if (ha2 == ha)
-                        break;
-                type = addr_type ? addr_type : ha2->type;
-                __hw_addr_del(to_list, ha2->addr, addr_len, type);
-        }
-        return err;
-}
-static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
-                                   struct netdev_hw_addr_list *from_list,
-                                   int addr_len,
-                                   unsigned char addr_type)
-{
-        struct netdev_hw_addr *ha;
-        unsigned char type;
-        list_for_each_entry(ha, &from_list->list, list) {
-                type = addr_type ? addr_type : ha->type;
-                __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
-        }
-}
-static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
-                          struct netdev_hw_addr_list *from_list,
-                          int addr_len)
-{
-        int err = 0;
-        struct netdev_hw_addr *ha, *tmp;
-        list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-                if (!ha->synced) {
-                        err = __hw_addr_add(to_list, ha->addr,
-                                            addr_len, ha->type);
-                        if (err)
-                                break;
-                        ha->synced = true;
-                        ha->refcount++;
-                } else if (ha->refcount == 1) {
-                        __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
-                        __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
-                }
-        }
-        return err;
-}
-static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
-                             struct netdev_hw_addr_list *from_list,
-                             int addr_len)
-{
-        struct netdev_hw_addr *ha, *tmp;
-        list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-                if (ha->synced) {
-                        __hw_addr_del(to_list, ha->addr,
-                                      addr_len, ha->type);
-                        ha->synced = false;
-                        __hw_addr_del(from_list, ha->addr,
-                                      addr_len, ha->type);
-                }
-        }
-}
-static void __hw_addr_flush(struct netdev_hw_addr_list *list)
-{
-        struct netdev_hw_addr *ha, *tmp;
-        list_for_each_entry_safe(ha, tmp, &list->list, list) {
-                list_del_rcu(&ha->list);
-                call_rcu(&ha->rcu_head, ha_rcu_free);
-        }
-        list->count = 0;
-}
-static void __hw_addr_init(struct netdev_hw_addr_list *list)
-{
-        INIT_LIST_HEAD(&list->list);
-        list->count = 0;
-}
-/* Device addresses handling functions */
-static void dev_addr_flush(struct net_device *dev)
-{
-        /* rtnl_mutex must be held here */
-        __hw_addr_flush(&dev->dev_addrs);
-        dev->dev_addr = NULL;
-}
-static int dev_addr_init(struct net_device *dev)
-{
-        unsigned char addr[MAX_ADDR_LEN];
-        struct netdev_hw_addr *ha;
-        int err;
-        /* rtnl_mutex must be held here */
-        __hw_addr_init(&dev->dev_addrs);
-        memset(addr, 0, sizeof(addr));
-        err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
-                            NETDEV_HW_ADDR_T_LAN);
-        if (!err) {
-                /*
-                 * Get the first (previously created) address from the list
-                 * and set dev_addr pointer to this location.
-                 */
-                ha = list_first_entry(&dev->dev_addrs.list,
-                                      struct netdev_hw_addr, list);
-                dev->dev_addr = ha->addr;
-        }
-        return err;
-}
-/**
- *      dev_addr_add    - Add a device address
- *      @dev: device
- *      @addr: address to add
- *      @addr_type: address type
- *
- *      Add a device address to the device or increase the reference count if
- *      it already exists.
- *
- *      The caller must hold the rtnl_mutex.
- */
-int dev_addr_add(struct net_device *dev, unsigned char *addr,
-                 unsigned char addr_type)
-{
-        int err;
-        ASSERT_RTNL();
-        err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
-        if (!err)
-                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-        return err;
-}
-EXPORT_SYMBOL(dev_addr_add);
-/**
- *      dev_addr_del    - Release a device address.
- *      @dev: device
- *      @addr: address to delete
- *      @addr_type: address type
- *
- *      Release reference to a device address and remove it from the device
- *      if the reference count drops to zero.
- *
- *      The caller must hold the rtnl_mutex.
- */
-int dev_addr_del(struct net_device *dev, unsigned char *addr,
-                 unsigned char addr_type)
-{
-        int err;
-        struct netdev_hw_addr *ha;
-        ASSERT_RTNL();
-        /*
-         * We can not remove the first address from the list because
-         * dev->dev_addr points to that.
-         */
-        ha = list_first_entry(&dev->dev_addrs.list,
-                              struct netdev_hw_addr, list);
-        if (ha->addr == dev->dev_addr && ha->refcount == 1)
-                return -ENOENT;
-        err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
-                            addr_type);
-        if (!err)
-                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-        return err;
-}
-EXPORT_SYMBOL(dev_addr_del);
-/**
- *      dev_addr_add_multiple   - Add device addresses from another device
- *      @to_dev: device to which addresses will be added
- *      @from_dev: device from which addresses will be added
- *      @addr_type: address type - 0 means type will be used from from_dev
- *
- *      Add device addresses of the one device to another.
- **
- *      The caller must hold the rtnl_mutex.
- */
-int dev_addr_add_multiple(struct net_device *to_dev,
-                          struct net_device *from_dev,
-                          unsigned char addr_type)
-{
-        int err;
-        ASSERT_RTNL();
-        if (from_dev->addr_len != to_dev->addr_len)
-                return -EINVAL;
-        err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
-                                     to_dev->addr_len, addr_type);
-        if (!err)
-                call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
-        return err;
-}
-EXPORT_SYMBOL(dev_addr_add_multiple);
-/**
- *      dev_addr_del_multiple   - Delete device addresses by another device
- *      @to_dev: device where the addresses will be deleted
- *      @from_dev: device by which addresses the addresses will be deleted
- *      @addr_type: address type - 0 means type will used from from_dev
- *
- *      Deletes addresses in to device by the list of addresses in from device.
- *
- *      The caller must hold the rtnl_mutex.
- */
-int dev_addr_del_multiple(struct net_device *to_dev,
-                          struct net_device *from_dev,
-                          unsigned char addr_type)
-{
-        ASSERT_RTNL();
-        if (from_dev->addr_len != to_dev->addr_len)
-                return -EINVAL;
-        __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
-                               to_dev->addr_len, addr_type);
-        call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
-        return 0;
-}
-EXPORT_SYMBOL(dev_addr_del_multiple);
-/* multicast addresses handling functions */
-int __dev_addr_delete(struct dev_addr_list **list, int *count,
-                      void *addr, int alen, int glbl)
-{
-        struct dev_addr_list *da;
-        for (; (da = *list) != NULL; list = &da->next) {
-                if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
-                    alen == da->da_addrlen) {
-                        if (glbl) {
-                                int old_glbl = da->da_gusers;
-                                da->da_gusers = 0;
-                                if (old_glbl == 0)
-                                        break;
-                        }
-                        if (--da->da_users)
-                                return 0;
-                        *list = da->next;
-                        kfree(da);
-                        (*count)--;
-                        return 0;
-                }
-        }
-        return -ENOENT;
-}
-int __dev_addr_add(struct dev_addr_list **list, int *count,
-                   void *addr, int alen, int glbl)
-{
-        struct dev_addr_list *da;
-        for (da = *list; da != NULL; da = da->next) {
-                if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
-                    da->da_addrlen == alen) {
-                        if (glbl) {
-                                int old_glbl = da->da_gusers;
-                                da->da_gusers = 1;
-                                if (old_glbl)
-                                        return 0;
-                        }
-                        da->da_users++;
-                        return 0;
-                }
-        }
-        da = kzalloc(sizeof(*da), GFP_ATOMIC);
-        if (da == NULL)
-                return -ENOMEM;
-        memcpy(da->da_addr, addr, alen);
-        da->da_addrlen = alen;
-        da->da_users = 1;
-        da->da_gusers = glbl ? 1 : 0;
-        da->next = *list;
-        *list = da;
-        (*count)++;
-        return 0;
-}
-/**
- *      dev_unicast_delete      - Release secondary unicast address.
- *      @dev: device
- *      @addr: address to delete
- *
- *      Release reference to a secondary unicast address and remove it
- *      from the device if the reference count drops to zero.
- *
- *      The caller must hold the rtnl_mutex.
- */
-int dev_unicast_delete(struct net_device *dev, void *addr)
-{
-        int err;
-        ASSERT_RTNL();
-        netif_addr_lock_bh(dev);
-        err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
-                            NETDEV_HW_ADDR_T_UNICAST);
-        if (!err)
-                __dev_set_rx_mode(dev);
-        netif_addr_unlock_bh(dev);
-        return err;
-}
-EXPORT_SYMBOL(dev_unicast_delete);
-/**
- *      dev_unicast_add         - add a secondary unicast address
- *      @dev: device
- *      @addr: address to add
- *
- *      Add a secondary unicast address to the device or increase
- *      the reference count if it already exists.
- *
- *      The caller must hold the rtnl_mutex.
- */
-int dev_unicast_add(struct net_device *dev, void *addr)
-{
-        int err;
-        ASSERT_RTNL();
-        netif_addr_lock_bh(dev);
-        err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
-                            NETDEV_HW_ADDR_T_UNICAST);
-        if (!err)
-                __dev_set_rx_mode(dev);
-        netif_addr_unlock_bh(dev);
-        return err;
-}
-EXPORT_SYMBOL(dev_unicast_add);
-int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
-                    struct dev_addr_list **from, int *from_count)
-{
-        struct dev_addr_list *da, *next;
-        int err = 0;
-        da = *from;
-        while (da != NULL) {
-                next = da->next;
-                if (!da->da_synced) {
-                        err = __dev_addr_add(to, to_count,
-                                             da->da_addr, da->da_addrlen, 0);
-                        if (err < 0)
-                                break;
-                        da->da_synced = 1;
-                        da->da_users++;
-                } else if (da->da_users == 1) {
-                        __dev_addr_delete(to, to_count,
-                                          da->da_addr, da->da_addrlen, 0);
-                        __dev_addr_delete(from, from_count,
-                                          da->da_addr, da->da_addrlen, 0);
-                }
-                da = next;
-        }
-        return err;
-}
-EXPORT_SYMBOL_GPL(__dev_addr_sync);
-void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
-                       struct dev_addr_list **from, int *from_count)
-{
-        struct dev_addr_list *da, *next;
-        da = *from;
-        while (da != NULL) {
-                next = da->next;
-                if (da->da_synced) {
-                        __dev_addr_delete(to, to_count,
-                                          da->da_addr, da->da_addrlen, 0);
-                        da->da_synced = 0;
-                        __dev_addr_delete(from, from_count,
-                                          da->da_addr, da->da_addrlen, 0);
-                }
-                da = next;
-        }
-}
-EXPORT_SYMBOL_GPL(__dev_addr_unsync);
-/**
- *      dev_unicast_sync - Synchronize device's unicast list to another device
- *      @to: destination device
- *      @from: source device
- *
- *      Add newly added addresses to the destination device and release
- *      addresses that have no users left. The source device must be
- *      locked by netif_tx_lock_bh.
- *
- *      This function is intended to be called from the dev->set_rx_mode
- *      function of layered software devices.
- */
-int dev_unicast_sync(struct net_device *to, struct net_device *from)
-{
-        int err = 0;
-        if (to->addr_len != from->addr_len)
-                return -EINVAL;
-        netif_addr_lock_bh(to);
-        err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
-        if (!err)
-                __dev_set_rx_mode(to);
-        netif_addr_unlock_bh(to);
-        return err;
-}
-EXPORT_SYMBOL(dev_unicast_sync);
-/**
- *      dev_unicast_unsync - Remove synchronized addresses from the destination device
- *      @to: destination device
- *      @from: source device
- *
- *      Remove all addresses that were added to the destination device by
- *      dev_unicast_sync(). This function is intended to be called from the
- *      dev->stop function of layered software devices.
- */
-void dev_unicast_unsync(struct net_device *to, struct net_device *from)
-{
-        if (to->addr_len != from->addr_len)
-                return;
-        netif_addr_lock_bh(from);
-        netif_addr_lock(to);
-        __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
-        __dev_set_rx_mode(to);
-        netif_addr_unlock(to);
-        netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_unicast_unsync);
-static void dev_unicast_flush(struct net_device *dev)
-{
-        netif_addr_lock_bh(dev);
-        __hw_addr_flush(&dev->uc);
-        netif_addr_unlock_bh(dev);
-}
-static void dev_unicast_init(struct net_device *dev)
-{
-        __hw_addr_init(&dev->uc);
-}
-static void __dev_addr_discard(struct dev_addr_list **list)
-{
-        struct dev_addr_list *tmp;
-        while (*list != NULL) {
-                tmp = *list;
-                *list = tmp->next;
-                if (tmp->da_users > tmp->da_gusers)
-                        printk("__dev_addr_discard: address leakage! "
-                               "da_users=%d\n", tmp->da_users);
-                kfree(tmp);
-        }
-}
-static void dev_addr_discard(struct net_device *dev)
-{
-        netif_addr_lock_bh(dev);
-        __dev_addr_discard(&dev->mc_list);
-        netdev_mc_count(dev) = 0;
-        netif_addr_unlock_bh(dev);
-}
 /**
 *      dev_get_flags - get flags reported to userspace
 *      @dev: device
@@ -4607,8 +4467,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                        return -EINVAL;
                if (!netif_device_present(dev))
                        return -ENODEV;
-                return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
+                return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
-                                  dev->addr_len, 1);
        case SIOCDELMULTI:
                if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
@@ -4616,8 +4475,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                        return -EINVAL;
                if (!netif_device_present(dev))
                        return -ENODEV;
-                return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
+                return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
-                                     dev->addr_len, 1);
        case SIOCSIFTXQLEN:
                if (ifr->ifr_qlen < 0)
@@ -4924,8 +4782,8 @@ static void rollback_registered_many(struct list_head *head)
                /*
                 *      Flush the unicast and multicast chains
                 */
-                dev_unicast_flush(dev);
+                dev_uc_flush(dev);
-                dev_addr_discard(dev);
+                dev_mc_flush(dev);
                if (dev->netdev_ops->ndo_uninit)
                        dev->netdev_ops->ndo_uninit(dev);
@@ -5074,6 +4932,24 @@ int register_netdevice(struct net_device *dev)
        dev->iflink = -1;
+#ifdef CONFIG_RPS
+        if (!dev->num_rx_queues) {
+                /*
+                 * Allocate a single RX queue if driver never called
+                 * alloc_netdev_mq
+                 */
+                dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
+                if (!dev->_rx) {
+                        ret = -ENOMEM;
+                        goto out;
+                }
+                dev->_rx->first = dev->_rx;
+                atomic_set(&dev->_rx->count, 1);
+                dev->num_rx_queues = 1;
+        }
+#endif
        /* Init, if this function is available */
        if (dev->netdev_ops->ndo_init) {
                ret = dev->netdev_ops->ndo_init(dev);
@@ -5113,8 +4989,6 @@ int register_netdevice(struct net_device *dev)
        if (dev->features & NETIF_F_SG)
                dev->features |= NETIF_F_GSO;
-        netdev_initialize_kobject(dev);
        ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
        ret = notifier_to_errno(ret);
        if (ret)
@@ -5434,6 +5308,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
        struct net_device *dev;
        size_t alloc_size;
        struct net_device *p;
+#ifdef CONFIG_RPS
+        struct netdev_rx_queue *rx;
+        int i;
+#endif
        BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -5459,13 +5337,32 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
                goto free_p;
        }
+#ifdef CONFIG_RPS
+        rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+        if (!rx) {
+                printk(KERN_ERR "alloc_netdev: Unable to allocate "
+                       "rx queues.\n");
+                goto free_tx;
+        }
+        atomic_set(&rx->count, queue_count);
+        /*
+         * Set a pointer to first element in the array which holds the
+         * reference count.
+         */
+        for (i = 0; i < queue_count; i++)
+                rx[i].first = rx;
+#endif
        dev = PTR_ALIGN(p, NETDEV_ALIGN);
        dev->padded = (char *)dev - (char *)p;
        if (dev_addr_init(dev))
-                goto free_tx;
+                goto free_rx;
-        dev_unicast_init(dev);
+        dev_mc_init(dev);
+        dev_uc_init(dev);
        dev_net_set(dev, &init_net);
@@ -5473,6 +5370,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
        dev->num_tx_queues = queue_count;
        dev->real_num_tx_queues = queue_count;
+#ifdef CONFIG_RPS
+        dev->_rx = rx;
+        dev->num_rx_queues = queue_count;
+#endif
        dev->gso_max_size = GSO_MAX_SIZE;
        netdev_init_queues(dev);
@@ -5487,9 +5389,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
        strcpy(dev->name, name);
        return dev;
+free_rx:
+#ifdef CONFIG_RPS
+        kfree(rx);
 free_tx:
+#endif
        kfree(tx);
 free_p:
        kfree(p);
        return NULL;
@@ -5635,15 +5540,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
        if (dev->features & NETIF_F_NETNS_LOCAL)
                goto out;
-#ifdef CONFIG_SYSFS
-        /* Don't allow real devices to be moved when sysfs
-         * is enabled.
-         */
-        err = -EINVAL;
-        if (dev->dev.parent)
-                goto out;
-#endif
        /* Ensure the device has been registrered */
        err = -EINVAL;
        if (dev->reg_state != NETREG_REGISTERED)
@@ -5691,10 +5587,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
        /*
         *      Flush the unicast and multicast chains
         */
-        dev_unicast_flush(dev);
+        dev_uc_flush(dev);
-        dev_addr_discard(dev);
+        dev_mc_flush(dev);
-        netdev_unregister_kobject(dev);
        /* Actually switch the network namespace */
        dev_net_set(dev, net);
@@ -5708,7 +5602,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
        }
        /* Fixup kobjects */
-        err = netdev_register_kobject(dev);
+        err = device_rename(&dev->dev, dev->name);
        WARN_ON(err);
        /* Add the device back in the hashes */
@@ -5735,7 +5629,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
                            void *ocpu)
 {
        struct sk_buff **list_skb;
-        struct Qdisc **list_net;
        struct sk_buff *skb;
        unsigned int cpu, oldcpu = (unsigned long)ocpu;
        struct softnet_data *sd, *oldsd;
@@ -5756,19 +5649,23 @@ static int dev_cpu_callback(struct notifier_block *nfb,
        *list_skb = oldsd->completion_queue;
        oldsd->completion_queue = NULL;
-        /* Find end of our output_queue. */
-        list_net = &sd->output_queue;
-        while (*list_net)
-                list_net = &(*list_net)->next_sched;
        /* Append output queue from offline CPU. */
-        *list_net = oldsd->output_queue;
+        if (oldsd->output_queue) {
-        oldsd->output_queue = NULL;
+                *sd->output_queue_tailp = oldsd->output_queue;
+                sd->output_queue_tailp = oldsd->output_queue_tailp;
+                oldsd->output_queue = NULL;
+                oldsd->output_queue_tailp = &oldsd->output_queue;
+        }
        raise_softirq_irqoff(NET_TX_SOFTIRQ);
        local_irq_enable();
        /* Process offline CPU's input_pkt_queue */
-        while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
+        while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
+                netif_rx(skb);
+                input_queue_head_add(oldsd, 1);
+        }
+        while ((skb = __skb_dequeue(&oldsd->process_queue)))
                netif_rx(skb);
        return NOTIFY_OK;
@@ -5985,17 +5882,26 @@ static int __init net_dev_init(void)
         */
        for_each_possible_cpu(i) {
-                struct softnet_data *queue;
+                struct softnet_data *sd = &per_cpu(softnet_data, i);
-                queue = &per_cpu(softnet_data, i);
+                memset(sd, 0, sizeof(*sd));
-                skb_queue_head_init(&queue->input_pkt_queue);
+                skb_queue_head_init(&sd->input_pkt_queue);
-                queue->completion_queue = NULL;
+                skb_queue_head_init(&sd->process_queue);
-                INIT_LIST_HEAD(&queue->poll_list);
+                sd->completion_queue = NULL;
+                INIT_LIST_HEAD(&sd->poll_list);
+                sd->output_queue = NULL;
+                sd->output_queue_tailp = &sd->output_queue;
+#ifdef CONFIG_RPS
+                sd->csd.func = rps_trigger_softirq;
+                sd->csd.info = sd;
+                sd->csd.flags = 0;
+                sd->cpu = i;
+#endif
-                queue->backlog.poll = process_backlog;
+                sd->backlog.poll = process_backlog;
-                queue->backlog.weight = weight_p;
+                sd->backlog.weight = weight_p;
-                queue->backlog.gro_list = NULL;
+                sd->backlog.gro_list = NULL;
-                queue->backlog.gro_count = 0;
+                sd->backlog.gro_count = 0;
        }
        dev_boot_phase = 0;
@@ -6030,7 +5936,7 @@ subsys_initcall(net_dev_init);
 static int __init initialize_hashrnd(void)
 {
-        get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
+        get_random_bytes(&hashrnd, sizeof(hashrnd));
        return 0;
 }
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
new file mode 100644
index 000000000000..508f9c18992f
--- /dev/null
+++ b/net/core/dev_addr_lists.c
@@ -0,0 +1,741 @@
+/*
+ * net/core/dev_addr_lists.c - Functions for handling net device lists
+ * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This file contains functions for working with unicast, multicast and device
+ * addresses lists.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+/*
+ * General list handling functions
+ */
+static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
+                            unsigned char *addr, int addr_len,
+                            unsigned char addr_type, bool global)
+{
+        struct netdev_hw_addr *ha;
+        int alloc_size;
+        if (addr_len > MAX_ADDR_LEN)
+                return -EINVAL;
+        list_for_each_entry(ha, &list->list, list) {
+                if (!memcmp(ha->addr, addr, addr_len) &&
+                    ha->type == addr_type) {
+                        if (global) {
+                                /* check if addr is already used as global */
+                                if (ha->global_use)
+                                        return 0;
+                                else
+                                        ha->global_use = true;
+                        }
+                        ha->refcount++;
+                        return 0;
+                }
+        }
+        alloc_size = sizeof(*ha);
+        if (alloc_size < L1_CACHE_BYTES)
+                alloc_size = L1_CACHE_BYTES;
+        ha = kmalloc(alloc_size, GFP_ATOMIC);
+        if (!ha)
+                return -ENOMEM;
+        memcpy(ha->addr, addr, addr_len);
+        ha->type = addr_type;
+        ha->refcount = 1;
+        ha->global_use = global;
+        ha->synced = false;
+        list_add_tail_rcu(&ha->list, &list->list);
+        list->count++;
+        return 0;
+}
+static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
+                         int addr_len, unsigned char addr_type)
+{
+        return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
+}
+static void ha_rcu_free(struct rcu_head *head)
+{
+        struct netdev_hw_addr *ha;
+        ha = container_of(head, struct netdev_hw_addr, rcu_head);
+        kfree(ha);
+}
+static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
+                            unsigned char *addr, int addr_len,
+                            unsigned char addr_type, bool global)
+{
+        struct netdev_hw_addr *ha;
+        list_for_each_entry(ha, &list->list, list) {
+                if (!memcmp(ha->addr, addr, addr_len) &&
+                    (ha->type == addr_type || !addr_type)) {
+                        if (global) {
+                                if (!ha->global_use)
+                                        break;
+                                else
+                                        ha->global_use = false;
+                        }
+                        if (--ha->refcount)
+                                return 0;
+                        list_del_rcu(&ha->list);
+                        call_rcu(&ha->rcu_head, ha_rcu_free);
+                        list->count--;
+                        return 0;
+                }
+        }
+        return -ENOENT;
+}
+static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
+                         int addr_len, unsigned char addr_type)
+{
+        return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
+}
+int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+                           struct netdev_hw_addr_list *from_list,
+                           int addr_len, unsigned char addr_type)
+{
+        int err;
+        struct netdev_hw_addr *ha, *ha2;
+        unsigned char type;
+        list_for_each_entry(ha, &from_list->list, list) {
+                type = addr_type ? addr_type : ha->type;
+                err = __hw_addr_add(to_list, ha->addr, addr_len, type);
+                if (err)
+                        goto unroll;
+        }
+        return 0;
+unroll:
+        list_for_each_entry(ha2, &from_list->list, list) {
+                if (ha2 == ha)
+                        break;
+                type = addr_type ? addr_type : ha2->type;
+                __hw_addr_del(to_list, ha2->addr, addr_len, type);
+        }
+        return err;
+}
+EXPORT_SYMBOL(__hw_addr_add_multiple);
+void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+                            struct netdev_hw_addr_list *from_list,
+                            int addr_len, unsigned char addr_type)
+{
+        struct netdev_hw_addr *ha;
+        unsigned char type;
+        list_for_each_entry(ha, &from_list->list, list) {
+                type = addr_type ? addr_type : ha->type;
+                __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
+        }
+}
+EXPORT_SYMBOL(__hw_addr_del_multiple);
+int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+                   struct netdev_hw_addr_list *from_list,
+                   int addr_len)
+{
+        int err = 0;
+        struct netdev_hw_addr *ha, *tmp;
+        list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+                if (!ha->synced) {
+                        err = __hw_addr_add(to_list, ha->addr,
+                                            addr_len, ha->type);
+                        if (err)
+                                break;
+                        ha->synced = true;
+                        ha->refcount++;
+                } else if (ha->refcount == 1) {
+                        __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
+                        __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
+                }
+        }
+        return err;
+}
+EXPORT_SYMBOL(__hw_addr_sync);
+void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+                      struct netdev_hw_addr_list *from_list,
+                      int addr_len)
+{
+        struct netdev_hw_addr *ha, *tmp;
+        list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+                if (ha->synced) {
+                        __hw_addr_del(to_list, ha->addr,
+                                      addr_len, ha->type);
+                        ha->synced = false;
+                        __hw_addr_del(from_list, ha->addr,
+                                      addr_len, ha->type);
+                }
+        }
+}
+EXPORT_SYMBOL(__hw_addr_unsync);
+void __hw_addr_flush(struct netdev_hw_addr_list *list)
+{
+        struct netdev_hw_addr *ha, *tmp;
+        list_for_each_entry_safe(ha, tmp, &list->list, list) {
+                list_del_rcu(&ha->list);
+                call_rcu(&ha->rcu_head, ha_rcu_free);
+        }
+        list->count = 0;
+}
+EXPORT_SYMBOL(__hw_addr_flush);
+void __hw_addr_init(struct netdev_hw_addr_list *list)
+{
+        INIT_LIST_HEAD(&list->list);
+        list->count = 0;
+}
+EXPORT_SYMBOL(__hw_addr_init);
+/*
+ * Device addresses handling functions
+ */
+/**
+ *      dev_addr_flush - Flush device address list
+ *      @dev: device
+ *
+ *      Flush device address list and reset ->dev_addr.
+ *
+ *      The caller must hold the rtnl_mutex.
+ */
+void dev_addr_flush(struct net_device *dev)
+{
+        /* rtnl_mutex must be held here */
+        __hw_addr_flush(&dev->dev_addrs);
+        dev->dev_addr = NULL;
+}
+EXPORT_SYMBOL(dev_addr_flush);
+/**
+ *      dev_addr_init - Init device address list
+ *      @dev: device
+ *
+ *      Init device address list and create the first element,
+ *      used by ->dev_addr.
+ *
+ *      The caller must hold the rtnl_mutex.
+ */
+int dev_addr_init(struct net_device *dev)
+{
+        unsigned char addr[MAX_ADDR_LEN];
+        struct netdev_hw_addr *ha;
+        int err;
+        /* rtnl_mutex must be held here */
+        __hw_addr_init(&dev->dev_addrs);
+        memset(addr, 0, sizeof(addr));
+        err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
+                            NETDEV_HW_ADDR_T_LAN);
+        if (!err) {
+                /*
+                 * Get the first (previously created) address from the list
+                 * and set dev_addr pointer to this location.
+                 */
+                ha = list_first_entry(&dev->dev_addrs.list,
+                                      struct netdev_hw_addr, list);
+                dev->dev_addr = ha->addr;
+        }
+        return err;
+}
+EXPORT_SYMBOL(dev_addr_init);
+/**
+ *      dev_addr_add - Add a device address
+ *      @dev: device
+ *      @addr: address to add
+ *      @addr_type: address type
+ *
+ *      Add a device address to the device or increase the reference count if
+ *      it already exists.
+ *
+ *      The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr,
+                 unsigned char addr_type)
+{
+        int err;
+        ASSERT_RTNL();
+        err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
+        if (!err)
+                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+        return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+/**
+ *      dev_addr_del - Release a device address.
+ *      @dev: device
+ *      @addr: address to delete
+ *      @addr_type: address type
+ *
+ *      Release reference to a device address and remove it from the device
+ *      if the reference count drops to zero.
+ *
+ *      The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr,
+                 unsigned char addr_type)
+{
+        int err;
+        struct netdev_hw_addr *ha;
+        ASSERT_RTNL();
+        /*
+         * We can not remove the first address from the list because
+         * dev->dev_addr points to that.
+         */
+        ha = list_first_entry(&dev->dev_addrs.list,
+                              struct netdev_hw_addr, list);
+        if (ha->addr == dev->dev_addr && ha->refcount == 1)
+                return -ENOENT;
+        err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
+                            addr_type);
+        if (!err)
+                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+        return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+/**
+ *      dev_addr_add_multiple - Add device addresses from another device
+ *      @to_dev: device to which addresses will be added
+ *      @from_dev: device from which addresses will be added
+ *      @addr_type: address type - 0 means type will be used from from_dev
+ *
+ *      Add device addresses of the one device to another.
+ **
+ *      The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+                          struct net_device *from_dev,
+                          unsigned char addr_type)
+{
+        int err;
+        ASSERT_RTNL();
+        if (from_dev->addr_len != to_dev->addr_len)
+                return -EINVAL;
+        err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+                                     to_dev->addr_len, addr_type);
+        if (!err)
+                call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+        return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+/**
+ *      dev_addr_del_multiple - Delete device addresses by another device
+ *      @to_dev: device where the addresses will be deleted
+ *      @from_dev: device by which addresses the addresses will be deleted
+ *      @addr_type: address type - 0 means type will used from from_dev
+ *
+ *      Deletes addresses in to device by the list of addresses in from device.
+ *
+ *      The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+                          struct net_device *from_dev,
+                          unsigned char addr_type)
+{
+        ASSERT_RTNL();
+        if (from_dev->addr_len != to_dev->addr_len)
+                return -EINVAL;
+        __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+                               to_dev->addr_len, addr_type);
+        call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+        return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+/*
+ * Unicast list handling functions
+ */
+/**
+ *      dev_uc_add - Add a secondary unicast address
+ *      @dev: device
+ *      @addr: address to add
+ *
+ *      Add a secondary unicast address to the device or increase
+ *      the reference count if it already exists.
+ */
+int dev_uc_add(struct net_device *dev, unsigned char *addr)
+{
+        int err;
+        netif_addr_lock_bh(dev);
+        err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
+                            NETDEV_HW_ADDR_T_UNICAST);
+        if (!err)
+                __dev_set_rx_mode(dev);
+        netif_addr_unlock_bh(dev);
+        return err;
+}
+EXPORT_SYMBOL(dev_uc_add);
+/**
+ *      dev_uc_del - Release secondary unicast address.
+ *      @dev: device
+ *      @addr: address to delete
+ *
+ *      Release reference to a secondary unicast address and remove it
+ *      from the device if the reference count drops to zero.
+ */
+int dev_uc_del(struct net_device *dev, unsigned char *addr)
+{
+        int err;
+        netif_addr_lock_bh(dev);
+        err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
+                            NETDEV_HW_ADDR_T_UNICAST);
+        if (!err)
+                __dev_set_rx_mode(dev);
+        netif_addr_unlock_bh(dev);
+        return err;
+}
+EXPORT_SYMBOL(dev_uc_del);
+/**
+ *      dev_uc_sync - Synchronize device's unicast list to another device
+ *      @to: destination device
+ *      @from: source device
+ *
+ *      Add newly added addresses to the destination device and release
+ *      addresses that have no users left. The source device must be
+ *      locked by netif_tx_lock_bh.
+ *
+ *      This function is intended to be called from the dev->set_rx_mode
+ *      function of layered software devices.
+ */
+int dev_uc_sync(struct net_device *to, struct net_device *from)
+{
+        int err = 0;
+        if (to->addr_len != from->addr_len)
+                return -EINVAL;
+        netif_addr_lock_bh(to);
+        err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
+        if (!err)
+                __dev_set_rx_mode(to);
+        netif_addr_unlock_bh(to);
+        return err;
+}
+EXPORT_SYMBOL(dev_uc_sync);
+/**
+ *      dev_uc_unsync - Remove synchronized addresses from the destination device
+ *      @to: destination device
+ *      @from: source device
+ *
+ *      Remove all addresses that were added to the destination device by
+ *      dev_uc_sync(). This function is intended to be called from the
+ *      dev->stop function of layered software devices.
+ */
+void dev_uc_unsync(struct net_device *to, struct net_device *from)
+{
+        if (to->addr_len != from->addr_len)
+                return;
+        netif_addr_lock_bh(from);
+        netif_addr_lock(to);
+        __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
+        __dev_set_rx_mode(to);
+        netif_addr_unlock(to);
+        netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_uc_unsync);
+/**
+ *      dev_uc_flush - Flush unicast addresses
+ *      @dev: device
+ *
+ *      Flush unicast addresses.
+ */
+void dev_uc_flush(struct net_device *dev)
+{
+        netif_addr_lock_bh(dev);
+        __hw_addr_flush(&dev->uc);
+        netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_uc_flush);
+/**
+ *      dev_uc_flush - Init unicast address list
+ *      @dev: device
+ *
+ *      Init unicast address list.
+ */
+void dev_uc_init(struct net_device *dev)
+{
+        __hw_addr_init(&dev->uc);
+}
+EXPORT_SYMBOL(dev_uc_init);
+/*
+ * Multicast list handling functions
+ */
+static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
+                        bool global)
+{
+        int err;
+        netif_addr_lock_bh(dev);
+        err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
+                               NETDEV_HW_ADDR_T_MULTICAST, global);
+        if (!err)
+                __dev_set_rx_mode(dev);
+        netif_addr_unlock_bh(dev);
+        return err;
+}
+/**
+ *      dev_mc_add - Add a multicast address
+ *      @dev: device
+ *      @addr: address to add
+ *
+ *      Add a multicast address to the device or increase
+ *      the reference count if it already exists.
+ */
+int dev_mc_add(struct net_device *dev, unsigned char *addr)
+{
+        return __dev_mc_add(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_add);
+/**
+ *      dev_mc_add_global - Add a global multicast address
+ *      @dev: device
+ *      @addr: address to add
+ *
+ *      Add a global multicast address to the device.
+ */
+int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
+{
+        return __dev_mc_add(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_add_global);
+static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
+                        bool global)
+{
+        int err;
+        netif_addr_lock_bh(dev);
+        err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
+                               NETDEV_HW_ADDR_T_MULTICAST, global);
+        if (!err)
+                __dev_set_rx_mode(dev);
+        netif_addr_unlock_bh(dev);
+        return err;
+}
+/**
+ *      dev_mc_del - Delete a multicast address.
+ *      @dev: device
+ *      @addr: address to delete
+ *
+ *      Release reference to a multicast address and remove it
+ *      from the device if the reference count drops to zero.
+ */
+int dev_mc_del(struct net_device *dev, unsigned char *addr)
+{
+        return __dev_mc_del(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_del);
+/**
+ *      dev_mc_del_global - Delete a global multicast address.
+ *      @dev: device
+ *      @addr: address to delete
+ *
+ *      Release reference to a multicast address and remove it
+ *      from the device if the reference count drops to zero.
+ */
+int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
+{
+        return __dev_mc_del(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_del_global);
+/**
+ *      dev_mc_sync - Synchronize device's unicast list to another device
+ *      @to: destination device
+ *      @from: source device
+ *
+ *      Add newly added addresses to the destination device and release
+ *      addresses that have no users left. The source device must be
+ *      locked by netif_tx_lock_bh.
+ *
+ *      This function is intended to be called from the dev->set_multicast_list
+ *      or dev->set_rx_mode function of layered software devices.
+ */
+int dev_mc_sync(struct net_device *to, struct net_device *from)
+{
+        int err = 0;
+        if (to->addr_len != from->addr_len)
+                return -EINVAL;
+        netif_addr_lock_bh(to);
+        err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
+        if (!err)
+                __dev_set_rx_mode(to);
+        netif_addr_unlock_bh(to);
+        return err;
+}
+EXPORT_SYMBOL(dev_mc_sync);
+/**
+ *      dev_mc_unsync - Remove synchronized addresses from the destination device
+ *      @to: destination device
+ *      @from: source device
+ *
+ *      Remove all addresses that were added to the destination device by
+ *      dev_mc_sync(). This function is intended to be called from the
+ *      dev->stop function of layered software devices.
+ */
+void dev_mc_unsync(struct net_device *to, struct net_device *from)
+{
+        if (to->addr_len != from->addr_len)
+                return;
+        netif_addr_lock_bh(from);
+        netif_addr_lock(to);
+        __hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
+        __dev_set_rx_mode(to);
+        netif_addr_unlock(to);
+        netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_mc_unsync);
+/**
+ *      dev_mc_flush - Flush multicast addresses
+ *      @dev: device
+ *
+ *      Flush multicast addresses.
+ */
+void dev_mc_flush(struct net_device *dev)
+{
+        netif_addr_lock_bh(dev);
+        __hw_addr_flush(&dev->mc);
+        netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_mc_flush);
+/**
+ *      dev_mc_flush - Init multicast address list
+ *      @dev: device
+ *
+ *      Init multicast address list.
+ */
+void dev_mc_init(struct net_device *dev)
+{
+        __hw_addr_init(&dev->mc);
+}
+EXPORT_SYMBOL(dev_mc_init);
+#ifdef CONFIG_PROC_FS
+#include <linux/seq_file.h>
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+        struct netdev_hw_addr *ha;
+        struct net_device *dev = v;
+        if (v == SEQ_START_TOKEN)
+                return 0;
+        netif_addr_lock_bh(dev);
+        netdev_for_each_mc_addr(ha, dev) {
+                int i;
+                seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+                           dev->name, ha->refcount, ha->global_use);
+                for (i = 0; i < dev->addr_len; i++)
+                        seq_printf(seq, "%02x", ha->addr[i]);
+                seq_putc(seq, '\n');
+        }
+        netif_addr_unlock_bh(dev);
+        return 0;
+}
+static const struct seq_operations dev_mc_seq_ops = {
+        .start = dev_seq_start,
+        .next  = dev_seq_next,
+        .stop  = dev_seq_stop,
+        .show  = dev_mc_seq_show,
+};
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+        return seq_open_net(inode, file, &dev_mc_seq_ops,
+                            sizeof(struct seq_net_private));
+}
+static const struct file_operations dev_mc_seq_fops = {
+        .owner   = THIS_MODULE,
+        .open    = dev_mc_seq_open,
+        .read    = seq_read,
+        .llseek  = seq_lseek,
+        .release = seq_release_net,
+};
+#endif
+static int __net_init dev_mc_net_init(struct net *net)
+{
+        if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
+                return -ENOMEM;
+        return 0;
+}
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+        proc_net_remove(net, "dev_mcast");
+}
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+        .init = dev_mc_net_init,
+        .exit = dev_mc_net_exit,
+};
+void __init dev_mcast_init(void)
+{
+        register_pernet_subsys(&dev_mc_net_ops);
+}
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
deleted file mode 100644
index 3dc295beb483..000000000000
--- a/net/core/dev_mcast.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- *      Linux NET3:     Multicast List maintenance.
- *
- *      Authors:
- *              Tim Kordas <tjk@nostromo.eeap.cwru.edu>
- *              Richard Underwood <richard@wuzz.demon.co.uk>
- *
- *      Stir fried together from the IP multicast and CAP patches above
- *              Alan Cox <alan@lxorguk.ukuu.org.uk>
- *
- *      Fixes:
- *              Alan Cox        :       Update the device on a real delete
- *                                      rather than any time but...
- *              Alan Cox        :       IFF_ALLMULTI support.
- *              Alan Cox        :       New format set_multicast_list() calls.
- *              Gleb Natapov    :       Remove dev_mc_lock.
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-#include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/arp.h>
-/*
- *      Device multicast list maintenance.
- *
- *      This is used both by IP and by the user level maintenance functions.
- *      Unlike BSD we maintain a usage count on a given multicast address so
- *      that a casual user application can add/delete multicasts used by
- *      protocols without doing damage to the protocols when it deletes the
- *      entries. It also helps IP as it tracks overlapping maps.
- *
- *      Device mc lists are changed by bh at least if IPv6 is enabled,
- *      so that it must be bh protected.
- *
- *      We block accesses to device mc filters with netif_tx_lock.
- */
-/*
- *      Delete a device level multicast
- */
-int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
-{
-        int err;
-        netif_addr_lock_bh(dev);
-        err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
-                                addr, alen, glbl);
-        if (!err) {
-                /*
-                 *      We have altered the list, so the card
-                 *      loaded filter is now wrong. Fix it
-                 */
-                __dev_set_rx_mode(dev);
-        }
-        netif_addr_unlock_bh(dev);
-        return err;
-}
-/*
- *      Add a device level multicast
- */
-int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
-{
-        int err;
-        netif_addr_lock_bh(dev);
-        if (alen != dev->addr_len)
-                err = -EINVAL;
-        else
-                err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
-        if (!err)
-                __dev_set_rx_mode(dev);
-        netif_addr_unlock_bh(dev);
-        return err;
-}
-/**
- *      dev_mc_sync     - Synchronize device's multicast list to another device
- *      @to: destination device
- *      @from: source device
- *
- *      Add newly added addresses to the destination device and release
- *      addresses that have no users left. The source device must be
- *      locked by netif_tx_lock_bh.
- *
- *      This function is intended to be called from the dev->set_multicast_list
- *      or dev->set_rx_mode function of layered software devices.
- */
-int dev_mc_sync(struct net_device *to, struct net_device *from)
-{
-        int err = 0;
-        netif_addr_lock_bh(to);
-        err = __dev_addr_sync(&to->mc_list, &to->mc_count,
-                              &from->mc_list, &from->mc_count);
-        if (!err)
-                __dev_set_rx_mode(to);
-        netif_addr_unlock_bh(to);
-        return err;
-}
-EXPORT_SYMBOL(dev_mc_sync);
-/**
- *      dev_mc_unsync   - Remove synchronized addresses from the destination
- *                        device
- *      @to: destination device
- *      @from: source device
- *
- *      Remove all addresses that were added to the destination device by
- *      dev_mc_sync(). This function is intended to be called from the
- *      dev->stop function of layered software devices.
- */
-void dev_mc_unsync(struct net_device *to, struct net_device *from)
-{
-        netif_addr_lock_bh(from);
-        netif_addr_lock(to);
-        __dev_addr_unsync(&to->mc_list, &to->mc_count,
-                          &from->mc_list, &from->mc_count);
-        __dev_set_rx_mode(to);
-        netif_addr_unlock(to);
-        netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_mc_unsync);
-#ifdef CONFIG_PROC_FS
-static int dev_mc_seq_show(struct seq_file *seq, void *v)
-{
-        struct dev_addr_list *m;
-        struct net_device *dev = v;
-        if (v == SEQ_START_TOKEN)
-                return 0;
-        netif_addr_lock_bh(dev);
-        for (m = dev->mc_list; m; m = m->next) {
-                int i;
-                seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
-                           dev->name, m->dmi_users, m->dmi_gusers);
-                for (i = 0; i < m->dmi_addrlen; i++)
-                        seq_printf(seq, "%02x", m->dmi_addr[i]);
-                seq_putc(seq, '\n');
-        }
-        netif_addr_unlock_bh(dev);
-        return 0;
-}
-static const struct seq_operations dev_mc_seq_ops = {
-        .start = dev_seq_start,
-        .next  = dev_seq_next,
-        .stop  = dev_seq_stop,
-        .show  = dev_mc_seq_show,
-};
-static int dev_mc_seq_open(struct inode *inode, struct file *file)
-{
-        return seq_open_net(inode, file, &dev_mc_seq_ops,
-                            sizeof(struct seq_net_private));
-}
-static const struct file_operations dev_mc_seq_fops = {
-        .owner   = THIS_MODULE,
-        .open    = dev_mc_seq_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = seq_release_net,
-};
-#endif
-static int __net_init dev_mc_net_init(struct net *net)
-{
-        if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
-                return -ENOMEM;
-        return 0;
-}
-static void __net_exit dev_mc_net_exit(struct net *net)
-{
-        proc_net_remove(net, "dev_mcast");
-}
-static struct pernet_operations __net_initdata dev_mc_net_ops = {
-        .init = dev_mc_net_init,
-        .exit = dev_mc_net_exit,
-};
-void __init dev_mcast_init(void)
-{
-        register_pernet_subsys(&dev_mc_net_ops);
-}
-EXPORT_SYMBOL(dev_mc_add);
-EXPORT_SYMBOL(dev_mc_delete);
diff --git a/net/core/dst.c b/net/core/dst.c
index f307bc18f6a0..9920722cc82b 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -44,7 +44,7 @@ static atomic_t			 dst_total = ATOMIC_INIT(0);
 */
 static struct {
        spinlock_t              lock;
-        struct dst_entry        *list;
+        struct dst_entry        *list;
        unsigned long           timer_inc;
        unsigned long           timer_expires;
 } dst_garbage = {
@@ -52,7 +52,7 @@ static struct {
        .timer_inc = DST_GC_MAX,
 };
 static void dst_gc_task(struct work_struct *work);
-static void ___dst_free(struct dst_entry * dst);
+static void ___dst_free(struct dst_entry *dst);
 static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task);
@@ -136,8 +136,8 @@ loop:
                }
                expires = dst_garbage.timer_expires;
                /*
-                 * if the next desired timer is more than 4 seconds in the future
+                 * if the next desired timer is more than 4 seconds in the
-                 * then round the timer to whole seconds
+                 * future then round the timer to whole seconds
                 */
                if (expires > 4*HZ)
                        expires = round_jiffies_relative(expires);
@@ -152,7 +152,8 @@ loop:
                " expires: %lu elapsed: %lu us\n",
                atomic_read(&dst_total), delayed, work_performed,
                expires,
-                elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC);
+                elapsed.tv_sec * USEC_PER_SEC +
+                  elapsed.tv_nsec / NSEC_PER_USEC);
 #endif
 }
@@ -163,9 +164,9 @@ int dst_discard(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dst_discard);
-void * dst_alloc(struct dst_ops * ops)
+void *dst_alloc(struct dst_ops *ops)
 {
-        struct dst_entry * dst;
+        struct dst_entry *dst;
        if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
                if (ops->gc(ops))
@@ -185,19 +186,20 @@ void * dst_alloc(struct dst_ops * ops)
        atomic_inc(&ops->entries);
        return dst;
 }
+EXPORT_SYMBOL(dst_alloc);
-static void ___dst_free(struct dst_entry * dst)
+static void ___dst_free(struct dst_entry *dst)
 {
        /* The first case (dev==NULL) is required, when
           protocol module is unloaded.
         */
-        if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+        if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
                dst->input = dst->output = dst_discard;
-        }
        dst->obsolete = 2;
 }
+EXPORT_SYMBOL(__dst_free);
-void __dst_free(struct dst_entry * dst)
+void __dst_free(struct dst_entry *dst)
 {
        spin_lock_bh(&dst_garbage.lock);
        ___dst_free(dst);
@@ -262,15 +264,16 @@ again:
        }
        return NULL;
 }
+EXPORT_SYMBOL(dst_destroy);
 void dst_release(struct dst_entry *dst)
 {
        if (dst) {
-               int newrefcnt;
+                int newrefcnt;
                smp_mb__before_atomic_dec();
-               newrefcnt = atomic_dec_return(&dst->__refcnt);
+                newrefcnt = atomic_dec_return(&dst->__refcnt);
-               WARN_ON(newrefcnt < 0);
+                WARN_ON(newrefcnt < 0);
        }
 }
 EXPORT_SYMBOL(dst_release);
@@ -283,8 +286,8 @@ EXPORT_SYMBOL(dst_release);
 *
 * Commented and originally written by Alexey.
 */
-static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
-                              int unregister)
+                       int unregister)
 {
        if (dst->ops->ifdown)
                dst->ops->ifdown(dst, dev, unregister);
@@ -306,7 +309,8 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
        }
 }
-static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+static int dst_dev_event(struct notifier_block *this, unsigned long event,
+                         void *ptr)
 {
        struct net_device *dev = ptr;
        struct dst_entry *dst, *last = NULL;
@@ -329,9 +333,8 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void
                        last->next = dst;
                else
                        dst_busy_list = dst;
-                for (; dst; dst = dst->next) {
+                for (; dst; dst = dst->next)
                        dst_ifdown(dst, dev, event != NETDEV_DOWN);
-                }
                mutex_unlock(&dst_gc_mutex);
                break;
        }
@@ -346,7 +349,3 @@ void __init dst_init(void)
 {
        register_netdevice_notifier(&dst_dev_notifier);
 }
-EXPORT_SYMBOL(__dst_free);
-EXPORT_SYMBOL(dst_alloc);
-EXPORT_SYMBOL(dst_destroy);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 9d55c57f318a..a0f4964033d2 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -18,8 +18,8 @@
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
 #include <linux/bitops.h>
+#include <linux/uaccess.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
 /*
 * Some useful ethtool_ops methods that're device independent.
@@ -31,6 +31,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
 {
        return netif_carrier_ok(dev) ? 1 : 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_link);
 u32 ethtool_op_get_rx_csum(struct net_device *dev)
 {
@@ -63,6 +64,7 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
        return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
 int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
 {
@@ -73,11 +75,13 @@ int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
        return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
 u32 ethtool_op_get_sg(struct net_device *dev)
 {
        return (dev->features & NETIF_F_SG) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_sg);
 int ethtool_op_set_sg(struct net_device *dev, u32 data)
 {
@@ -88,11 +92,13 @@ int ethtool_op_set_sg(struct net_device *dev, u32 data)
        return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_sg);
 u32 ethtool_op_get_tso(struct net_device *dev)
 {
        return (dev->features & NETIF_F_TSO) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_tso);
 int ethtool_op_set_tso(struct net_device *dev, u32 data)
 {
@@ -103,11 +109,13 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
        return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tso);
 u32 ethtool_op_get_ufo(struct net_device *dev)
 {
        return (dev->features & NETIF_F_UFO) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_ufo);
 int ethtool_op_set_ufo(struct net_device *dev, u32 data)
 {
@@ -117,12 +125,13 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
                dev->features &= ~NETIF_F_UFO;
        return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_ufo);
 /* the following list of flags are the same as their associated
 * NETIF_F_xxx values in include/linux/netdevice.h
 */
 static const u32 flags_dup_features =
-        (ETH_FLAG_LRO | ETH_FLAG_NTUPLE);
+        (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
 u32 ethtool_op_get_flags(struct net_device *dev)
 {
@@ -133,6 +142,7 @@ u32 ethtool_op_get_flags(struct net_device *dev)
        return dev->features & flags_dup_features;
 }
+EXPORT_SYMBOL(ethtool_op_get_flags);
 int ethtool_op_set_flags(struct net_device *dev, u32 data)
 {
@@ -153,9 +163,15 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data)
                features &= ~NETIF_F_NTUPLE;
        }
+        if (data & ETH_FLAG_RXHASH)
+                features |= NETIF_F_RXHASH;
+        else
+                features &= ~NETIF_F_RXHASH;
        dev->features = features;
        return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_flags);
 void ethtool_ntuple_flush(struct net_device *dev)
 {
@@ -201,7 +217,8 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
        return dev->ethtool_ops->set_settings(dev, &cmd);
 }
-static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
+                                                  void __user *useraddr)
 {
        struct ethtool_drvinfo info;
        const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -241,7 +258,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void _
 }
 static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
-                                          void __user *useraddr)
+                                                    void __user *useraddr)
 {
        struct ethtool_sset_info info;
        const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -300,7 +317,8 @@ out:
        return ret;
 }
-static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
+                                                void __user *useraddr)
 {
        struct ethtool_rxnfc cmd;
@@ -313,7 +331,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __u
        return dev->ethtool_ops->set_rxnfc(dev, &cmd);
 }
-static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
+                                                void __user *useraddr)
 {
        struct ethtool_rxnfc info;
        const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -358,8 +377,8 @@ err_out:
 }
 static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
-                              struct ethtool_rx_ntuple_flow_spec *spec,
+                        struct ethtool_rx_ntuple_flow_spec *spec,
-                              struct ethtool_rx_ntuple_flow_spec_container *fsc)
+                        struct ethtool_rx_ntuple_flow_spec_container *fsc)
 {
        /* don't add filters forever */
@@ -385,7 +404,8 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
        list->count++;
 }
-static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
+                                                    void __user *useraddr)
 {
        struct ethtool_rx_ntuple cmd;
        const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -502,7 +522,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        goto unknown_filter;
-                };
+                }
                /* now the rest of the filters */
                switch (fsc->fs.flow_type) {
@@ -510,125 +530,125 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
                case UDP_V4_FLOW:
                case SCTP_V4_FLOW:
                        sprintf(p, "\tSrc IP addr: 0x%x\n",
-                                fsc->fs.h_u.tcp_ip4_spec.ip4src);
+                                fsc->fs.h_u.tcp_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tSrc IP mask: 0x%x\n",
-                                fsc->fs.m_u.tcp_ip4_spec.ip4src);
+                                fsc->fs.m_u.tcp_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP addr: 0x%x\n",
-                                fsc->fs.h_u.tcp_ip4_spec.ip4dst);
+                                fsc->fs.h_u.tcp_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP mask: 0x%x\n",
-                                fsc->fs.m_u.tcp_ip4_spec.ip4dst);
+                                fsc->fs.m_u.tcp_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.tcp_ip4_spec.psrc,
+                                fsc->fs.h_u.tcp_ip4_spec.psrc,
-                                fsc->fs.m_u.tcp_ip4_spec.psrc);
+                                fsc->fs.m_u.tcp_ip4_spec.psrc);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.tcp_ip4_spec.pdst,
+                                fsc->fs.h_u.tcp_ip4_spec.pdst,
-                                fsc->fs.m_u.tcp_ip4_spec.pdst);
+                                fsc->fs.m_u.tcp_ip4_spec.pdst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.tcp_ip4_spec.tos,
+                                fsc->fs.h_u.tcp_ip4_spec.tos,
-                                fsc->fs.m_u.tcp_ip4_spec.tos);
+                                fsc->fs.m_u.tcp_ip4_spec.tos);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        break;
                case AH_ESP_V4_FLOW:
                case ESP_V4_FLOW:
                        sprintf(p, "\tSrc IP addr: 0x%x\n",
-                                fsc->fs.h_u.ah_ip4_spec.ip4src);
+                                fsc->fs.h_u.ah_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tSrc IP mask: 0x%x\n",
-                                fsc->fs.m_u.ah_ip4_spec.ip4src);
+                                fsc->fs.m_u.ah_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP addr: 0x%x\n",
-                                fsc->fs.h_u.ah_ip4_spec.ip4dst);
+                                fsc->fs.h_u.ah_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP mask: 0x%x\n",
-                                fsc->fs.m_u.ah_ip4_spec.ip4dst);
+                                fsc->fs.m_u.ah_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tSPI: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.ah_ip4_spec.spi,
+                                fsc->fs.h_u.ah_ip4_spec.spi,
-                                fsc->fs.m_u.ah_ip4_spec.spi);
+                                fsc->fs.m_u.ah_ip4_spec.spi);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.ah_ip4_spec.tos,
+                                fsc->fs.h_u.ah_ip4_spec.tos,
-                                fsc->fs.m_u.ah_ip4_spec.tos);
+                                fsc->fs.m_u.ah_ip4_spec.tos);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        break;
                case IP_USER_FLOW:
                        sprintf(p, "\tSrc IP addr: 0x%x\n",
-                                fsc->fs.h_u.raw_ip4_spec.ip4src);
+                                fsc->fs.h_u.raw_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tSrc IP mask: 0x%x\n",
-                                fsc->fs.m_u.raw_ip4_spec.ip4src);
+                                fsc->fs.m_u.raw_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP addr: 0x%x\n",
-                                fsc->fs.h_u.raw_ip4_spec.ip4dst);
+                                fsc->fs.h_u.raw_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP mask: 0x%x\n",
-                                fsc->fs.m_u.raw_ip4_spec.ip4dst);
+                                fsc->fs.m_u.raw_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        break;
                case IPV4_FLOW:
                        sprintf(p, "\tSrc IP addr: 0x%x\n",
-                                fsc->fs.h_u.usr_ip4_spec.ip4src);
+                                fsc->fs.h_u.usr_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tSrc IP mask: 0x%x\n",
-                                fsc->fs.m_u.usr_ip4_spec.ip4src);
+                                fsc->fs.m_u.usr_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP addr: 0x%x\n",
-                                fsc->fs.h_u.usr_ip4_spec.ip4dst);
+                                fsc->fs.h_u.usr_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP mask: 0x%x\n",
-                                fsc->fs.m_u.usr_ip4_spec.ip4dst);
+                                fsc->fs.m_u.usr_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
-                                fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
+                                fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
-                                fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
+                                fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.usr_ip4_spec.tos,
+                                fsc->fs.h_u.usr_ip4_spec.tos,
-                                fsc->fs.m_u.usr_ip4_spec.tos);
+                                fsc->fs.m_u.usr_ip4_spec.tos);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.usr_ip4_spec.ip_ver,
+                                fsc->fs.h_u.usr_ip4_spec.ip_ver,
-                                fsc->fs.m_u.usr_ip4_spec.ip_ver);
+                                fsc->fs.m_u.usr_ip4_spec.ip_ver);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.usr_ip4_spec.proto,
+                                fsc->fs.h_u.usr_ip4_spec.proto,
-                                fsc->fs.m_u.usr_ip4_spec.proto);
+                                fsc->fs.m_u.usr_ip4_spec.proto);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        break;
-                };
+                }
                sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
-                        fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
+                        fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
                p += ETH_GSTRING_LEN;
                num_strings++;
                sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
@@ -641,7 +661,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
                        sprintf(p, "\tAction: Drop\n");
                else
                        sprintf(p, "\tAction: Direct to queue %d\n",
-                                fsc->fs.action);
+                                fsc->fs.action);
                p += ETH_GSTRING_LEN;
                num_strings++;
 unknown_filter:
@@ -853,7 +873,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
        return ret;
 }
-static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
+                                                   void __user *useraddr)
 {
        struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
@@ -867,7 +888,8 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void
        return 0;
 }
-static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
+                                                   void __user *useraddr)
 {
        struct ethtool_coalesce coalesce;
@@ -971,6 +993,7 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
        return dev->ethtool_ops->set_tx_csum(dev, edata.data);
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
 static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
 {
@@ -1042,7 +1065,7 @@ static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
        edata.data = dev->features & NETIF_F_GSO;
        if (copy_to_user(useraddr, &edata, sizeof(edata)))
-                 return -EFAULT;
+                return -EFAULT;
        return 0;
 }
@@ -1065,7 +1088,7 @@ static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
        edata.data = dev->features & NETIF_F_GRO;
        if (copy_to_user(useraddr, &edata, sizeof(edata)))
-                 return -EFAULT;
+                return -EFAULT;
        return 0;
 }
@@ -1277,7 +1300,8 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
        return actor(dev, edata.data);
 }
-static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
+static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
+                                                   char __user *useraddr)
 {
        struct ethtool_flash efl;
@@ -1306,11 +1330,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
        if (!dev->ethtool_ops)
                return -EOPNOTSUPP;
-        if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
+        if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
                return -EFAULT;
        /* Allow some commands to be done by anyone */
-        switch(ethcmd) {
+        switch (ethcmd) {
        case ETHTOOL_GDRVINFO:
        case ETHTOOL_GMSGLVL:
        case ETHTOOL_GCOALESCE:
@@ -1338,10 +1362,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
                        return -EPERM;
        }
-        if (dev->ethtool_ops->begin)
+        if (dev->ethtool_ops->begin) {
-                if ((rc = dev->ethtool_ops->begin(dev)) < 0)
+                rc = dev->ethtool_ops->begin(dev);
+                if (rc  < 0)
                        return rc;
+        }
        old_features = dev->features;
        switch (ethcmd) {
@@ -1531,16 +1556,3 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
        return rc;
 }
-EXPORT_SYMBOL(ethtool_op_get_link);
-EXPORT_SYMBOL(ethtool_op_get_sg);
-EXPORT_SYMBOL(ethtool_op_get_tso);
-EXPORT_SYMBOL(ethtool_op_set_sg);
-EXPORT_SYMBOL(ethtool_op_set_tso);
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
-EXPORT_SYMBOL(ethtool_op_set_ufo);
-EXPORT_SYMBOL(ethtool_op_get_ufo);
-EXPORT_SYMBOL(ethtool_op_set_flags);
-EXPORT_SYMBOL(ethtool_op_get_flags);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index d2c3e7dc2e5f..42e84e08a1be 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -39,6 +39,24 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 }
 EXPORT_SYMBOL(fib_default_rule_add);
+u32 fib_default_rule_pref(struct fib_rules_ops *ops)
+{
+        struct list_head *pos;
+        struct fib_rule *rule;
+        if (!list_empty(&ops->rules_list)) {
+                pos = ops->rules_list.next;
+                if (pos->next != &ops->rules_list) {
+                        rule = list_entry(pos->next, struct fib_rule, list);
+                        if (rule->pref)
+                                return rule->pref - 1;
+                }
+        }
+        return 0;
+}
+EXPORT_SYMBOL(fib_default_rule_pref);
 static void notify_rule_change(int event, struct fib_rule *rule,
                               struct fib_rules_ops *ops, struct nlmsghdr *nlh,
                               u32 pid);
@@ -104,12 +122,12 @@ errout:
 }
 struct fib_rules_ops *
-fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
+fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
 {
        struct fib_rules_ops *ops;
        int err;
-        ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL);
+        ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
        if (ops == NULL)
                return ERR_PTR(-ENOMEM);
@@ -124,7 +142,6 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
        return ops;
 }
 EXPORT_SYMBOL_GPL(fib_rules_register);
 void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
@@ -158,7 +175,6 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
        call_rcu(&ops->rcu, fib_rules_put_rcu);
 }
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
@@ -221,7 +237,6 @@ out:
        return err;
 }
 EXPORT_SYMBOL_GPL(fib_rules_lookup);
 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
@@ -520,6 +535,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
                return -EMSGSIZE;
        frh = nlmsg_data(nlh);
+        frh->family = ops->family;
        frh->table = rule->table;
        NLA_PUT_U32(skb, FRA_TABLE, rule->table);
        frh->res1 = 0;
@@ -614,7 +630,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
                        break;
                cb->args[1] = 0;
-        skip:
+skip:
                idx++;
        }
        rcu_read_unlock();
@@ -686,7 +702,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
        struct fib_rules_ops *ops;
        ASSERT_RTNL();
-        rcu_read_lock();
        switch (event) {
        case NETDEV_REGISTER:
@@ -700,8 +715,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
                break;
        }
-        rcu_read_unlock();
        return NOTIFY_DONE;
 }
diff --git a/net/core/filter.c b/net/core/filter.c
index ff943bed21af..da69fb728d32 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -302,6 +302,8 @@ load_b:
                        A = skb->pkt_type;
                        continue;
                case SKF_AD_IFINDEX:
+                        if (!skb->dev)
+                                return 0;
                        A = skb->dev->ifindex;
                        continue;
                case SKF_AD_MARK:
@@ -310,6 +312,11 @@ load_b:
                case SKF_AD_QUEUE:
                        A = skb->queue_mapping;
                        continue;
+                case SKF_AD_HATYPE:
+                        if (!skb->dev)
+                                return 0;
+                        A = skb->dev->type;
+                        continue;
                case SKF_AD_NLATTR: {
                        struct nlattr *nla;
diff --git a/net/core/flow.c b/net/core/flow.c
index 96015871ecea..161900674009 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,113 +26,158 @@
 #include <linux/security.h>
 struct flow_cache_entry {
-        struct flow_cache_entry *next;
+        union {
-        u16                     family;
+                struct hlist_node       hlist;
-        u8                      dir;
+                struct list_head        gc_list;
-        u32                     genid;
+        } u;
-        struct flowi            key;
+        u16                             family;
-        void                    *object;
+        u8                              dir;
-        atomic_t                *object_ref;
+        u32                             genid;
+        struct flowi                    key;
+        struct flow_cache_object        *object;
 };
-atomic_t flow_cache_genid = ATOMIC_INIT(0);
+struct flow_cache_percpu {
+        struct hlist_head               *hash_table;
-static u32 flow_hash_shift;
+        int                             hash_count;
-#define flow_hash_size  (1 << flow_hash_shift)
+        u32                             hash_rnd;
-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
+        int                             hash_rnd_recalc;
+        struct tasklet_struct           flush_tasklet;
-#define flow_table(cpu) (per_cpu(flow_tables, cpu))
+};
-static struct kmem_cache *flow_cachep __read_mostly;
-static int flow_lwm, flow_hwm;
+struct flow_flush_info {
+        struct flow_cache               *cache;
+        atomic_t                        cpuleft;
+        struct completion               completion;
+};
-struct flow_percpu_info {
+struct flow_cache {
-        int hash_rnd_recalc;
+        u32                             hash_shift;
-        u32 hash_rnd;
+        unsigned long                   order;
-        int count;
+        struct flow_cache_percpu        *percpu;
+        struct notifier_block           hotcpu_notifier;
+        int                             low_watermark;
+        int                             high_watermark;
+        struct timer_list               rnd_timer;
 };
-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
-#define flow_hash_rnd_recalc(cpu) \
+atomic_t flow_cache_genid = ATOMIC_INIT(0);
-        (per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
+static struct flow_cache flow_cache_global;
-#define flow_hash_rnd(cpu) \
+static struct kmem_cache *flow_cachep;
-        (per_cpu(flow_hash_info, cpu).hash_rnd)
-#define flow_count(cpu) \
-        (per_cpu(flow_hash_info, cpu).count)
-static struct timer_list flow_hash_rnd_timer;
+static DEFINE_SPINLOCK(flow_cache_gc_lock);
+static LIST_HEAD(flow_cache_gc_list);
-#define FLOW_HASH_RND_PERIOD    (10 * 60 * HZ)
+#define flow_cache_hash_size(cache)     (1 << (cache)->hash_shift)
+#define FLOW_HASH_RND_PERIOD            (10 * 60 * HZ)
-struct flow_flush_info {
-        atomic_t cpuleft;
-        struct completion completion;
-};
-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
-#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
 static void flow_cache_new_hashrnd(unsigned long arg)
 {
+        struct flow_cache *fc = (void *) arg;
        int i;
        for_each_possible_cpu(i)
-                flow_hash_rnd_recalc(i) = 1;
+                per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
-        flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+        fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-        add_timer(&flow_hash_rnd_timer);
+        add_timer(&fc->rnd_timer);
+}
+static int flow_entry_valid(struct flow_cache_entry *fle)
+{
+        if (atomic_read(&flow_cache_genid) != fle->genid)
+                return 0;
+        if (fle->object && !fle->object->ops->check(fle->object))
+                return 0;
+        return 1;
 }
-static void flow_entry_kill(int cpu, struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache_entry *fle)
 {
        if (fle->object)
-                atomic_dec(fle->object_ref);
+                fle->object->ops->delete(fle->object);
        kmem_cache_free(flow_cachep, fle);
-        flow_count(cpu)--;
 }
-static void __flow_cache_shrink(int cpu, int shrink_to)
+static void flow_cache_gc_task(struct work_struct *work)
 {
-        struct flow_cache_entry *fle, **flp;
+        struct list_head gc_list;
-        int i;
+        struct flow_cache_entry *fce, *n;
-        for (i = 0; i < flow_hash_size; i++) {
+        INIT_LIST_HEAD(&gc_list);
-                int k = 0;
+        spin_lock_bh(&flow_cache_gc_lock);
+        list_splice_tail_init(&flow_cache_gc_list, &gc_list);
+        spin_unlock_bh(&flow_cache_gc_lock);
-                flp = &flow_table(cpu)[i];
+        list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
-                while ((fle = *flp) != NULL && k < shrink_to) {
+                flow_entry_kill(fce);
-                        k++;
+}
-                        flp = &fle->next;
+static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
-                }
-                while ((fle = *flp) != NULL) {
+static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
-                        *flp = fle->next;
+                                     int deleted, struct list_head *gc_list)
-                        flow_entry_kill(cpu, fle);
+{
-                }
+        if (deleted) {
+                fcp->hash_count -= deleted;
+                spin_lock_bh(&flow_cache_gc_lock);
+                list_splice_tail(gc_list, &flow_cache_gc_list);
+                spin_unlock_bh(&flow_cache_gc_lock);
+                schedule_work(&flow_cache_gc_work);
        }
 }
-static void flow_cache_shrink(int cpu)
+static void __flow_cache_shrink(struct flow_cache *fc,
+                                struct flow_cache_percpu *fcp,
+                                int shrink_to)
 {
-        int shrink_to = flow_lwm / flow_hash_size;
+        struct flow_cache_entry *fle;
+        struct hlist_node *entry, *tmp;
+        LIST_HEAD(gc_list);
+        int i, deleted = 0;
+        for (i = 0; i < flow_cache_hash_size(fc); i++) {
+                int saved = 0;
+                hlist_for_each_entry_safe(fle, entry, tmp,
+                                          &fcp->hash_table[i], u.hlist) {
+                        if (saved < shrink_to &&
+                            flow_entry_valid(fle)) {
+                                saved++;
+                        } else {
+                                deleted++;
+                                hlist_del(&fle->u.hlist);
+                                list_add_tail(&fle->u.gc_list, &gc_list);
+                        }
+                }
+        }
-        __flow_cache_shrink(cpu, shrink_to);
+        flow_cache_queue_garbage(fcp, deleted, &gc_list);
 }
-static void flow_new_hash_rnd(int cpu)
+static void flow_cache_shrink(struct flow_cache *fc,
+                              struct flow_cache_percpu *fcp)
 {
-        get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
+        int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
-        flow_hash_rnd_recalc(cpu) = 0;
-        __flow_cache_shrink(cpu, 0);
+        __flow_cache_shrink(fc, fcp, shrink_to);
 }
-static u32 flow_hash_code(struct flowi *key, int cpu)
+static void flow_new_hash_rnd(struct flow_cache *fc,
+                              struct flow_cache_percpu *fcp)
+{
+        get_random_bytes(&fcp->hash_rnd, sizeof(u32));
+        fcp->hash_rnd_recalc = 0;
+        __flow_cache_shrink(fc, fcp, 0);
+}
+static u32 flow_hash_code(struct flow_cache *fc,
+                          struct flow_cache_percpu *fcp,
+                          struct flowi *key)
 {
        u32 *k = (u32 *) key;
-        return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
+        return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
-                (flow_hash_size - 1));
+                & (flow_cache_hash_size(fc) - 1));
 }
 #if (BITS_PER_LONG == 64)
@@ -165,114 +210,117 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
        return 0;
 }
-void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+struct flow_cache_object *
-                        flow_resolve_t resolver)
+flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+                  flow_resolve_t resolver, void *ctx)
 {
-        struct flow_cache_entry *fle, **head;
+        struct flow_cache *fc = &flow_cache_global;
+        struct flow_cache_percpu *fcp;
+        struct flow_cache_entry *fle, *tfle;
+        struct hlist_node *entry;
+        struct flow_cache_object *flo;
        unsigned int hash;
-        int cpu;
        local_bh_disable();
-        cpu = smp_processor_id();
+        fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
        fle = NULL;
+        flo = NULL;
        /* Packet really early in init?  Making flow_cache_init a
         * pre-smp initcall would solve this.  --RR */
-        if (!flow_table(cpu))
+        if (!fcp->hash_table)
                goto nocache;
-        if (flow_hash_rnd_recalc(cpu))
+        if (fcp->hash_rnd_recalc)
-                flow_new_hash_rnd(cpu);
+                flow_new_hash_rnd(fc, fcp);
-        hash = flow_hash_code(key, cpu);
-        head = &flow_table(cpu)[hash];
+        hash = flow_hash_code(fc, fcp, key);
-        for (fle = *head; fle; fle = fle->next) {
+        hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
-                if (fle->family == family &&
+                if (tfle->family == family &&
-                    fle->dir == dir &&
+                    tfle->dir == dir &&
-                    flow_key_compare(key, &fle->key) == 0) {
+                    flow_key_compare(key, &tfle->key) == 0) {
-                        if (fle->genid == atomic_read(&flow_cache_genid)) {
+                        fle = tfle;
-                                void *ret = fle->object;
-                                if (ret)
-                                        atomic_inc(fle->object_ref);
-                                local_bh_enable();
-                                return ret;
-                        }
                        break;
                }
        }
-        if (!fle) {
+        if (unlikely(!fle)) {
-                if (flow_count(cpu) > flow_hwm)
+                if (fcp->hash_count > fc->high_watermark)
-                        flow_cache_shrink(cpu);
+                        flow_cache_shrink(fc, fcp);
                fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
                if (fle) {
-                        fle->next = *head;
-                        *head = fle;
                        fle->family = family;
                        fle->dir = dir;
                        memcpy(&fle->key, key, sizeof(*key));
                        fle->object = NULL;
-                        flow_count(cpu)++;
+                        hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
+                        fcp->hash_count++;
                }
+        } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+                flo = fle->object;
+                if (!flo)
+                        goto ret_object;
+                flo = flo->ops->get(flo);
+                if (flo)
+                        goto ret_object;
+        } else if (fle->object) {
+                flo = fle->object;
+                flo->ops->delete(flo);
+                fle->object = NULL;
        }
 nocache:
-        {
+        flo = NULL;
-                int err;
+        if (fle) {
-                void *obj;
+                flo = fle->object;
-                atomic_t *obj_ref;
+                fle->object = NULL;
-                err = resolver(net, key, family, dir, &obj, &obj_ref);
-                if (fle && !err) {
-                        fle->genid = atomic_read(&flow_cache_genid);
-                        if (fle->object)
-                                atomic_dec(fle->object_ref);
-                        fle->object = obj;
-                        fle->object_ref = obj_ref;
-                        if (obj)
-                                atomic_inc(fle->object_ref);
-                }
-                local_bh_enable();
-                if (err)
-                        obj = ERR_PTR(err);
-                return obj;
        }
+        flo = resolver(net, key, family, dir, flo, ctx);
+        if (fle) {
+                fle->genid = atomic_read(&flow_cache_genid);
+                if (!IS_ERR(flo))
+                        fle->object = flo;
+                else
+                        fle->genid--;
+        } else {
+                if (flo && !IS_ERR(flo))
+                        flo->ops->delete(flo);
+        }
+ret_object:
+        local_bh_enable();
+        return flo;
 }
 static void flow_cache_flush_tasklet(unsigned long data)
 {
        struct flow_flush_info *info = (void *)data;
-        int i;
+        struct flow_cache *fc = info->cache;
-        int cpu;
+        struct flow_cache_percpu *fcp;
+        struct flow_cache_entry *fle;
-        cpu = smp_processor_id();
+        struct hlist_node *entry, *tmp;
-        for (i = 0; i < flow_hash_size; i++) {
+        LIST_HEAD(gc_list);
-                struct flow_cache_entry *fle;
+        int i, deleted = 0;
-                fle = flow_table(cpu)[i];
+        fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
-                for (; fle; fle = fle->next) {
+        for (i = 0; i < flow_cache_hash_size(fc); i++) {
-                        unsigned genid = atomic_read(&flow_cache_genid);
+                hlist_for_each_entry_safe(fle, entry, tmp,
+                                          &fcp->hash_table[i], u.hlist) {
-                        if (!fle->object || fle->genid == genid)
+                        if (flow_entry_valid(fle))
                                continue;
-                        fle->object = NULL;
+                        deleted++;
-                        atomic_dec(fle->object_ref);
+                        hlist_del(&fle->u.hlist);
+                        list_add_tail(&fle->u.gc_list, &gc_list);
                }
        }
+        flow_cache_queue_garbage(fcp, deleted, &gc_list);
        if (atomic_dec_and_test(&info->cpuleft))
                complete(&info->completion);
 }
-static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
 static void flow_cache_flush_per_cpu(void *data)
 {
        struct flow_flush_info *info = data;
@@ -280,8 +328,7 @@ static void flow_cache_flush_per_cpu(void *data)
        struct tasklet_struct *tasklet;
        cpu = smp_processor_id();
+        tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
-        tasklet = flow_flush_tasklet(cpu);
        tasklet->data = (unsigned long)info;
        tasklet_schedule(tasklet);
 }
@@ -294,6 +341,7 @@ void flow_cache_flush(void)
        /* Don't want cpus going down or up during this. */
        get_online_cpus();
        mutex_lock(&flow_flush_sem);
+        info.cache = &flow_cache_global;
        atomic_set(&info.cpuleft, num_online_cpus());
        init_completion(&info.completion);
@@ -307,62 +355,75 @@ void flow_cache_flush(void)
        put_online_cpus();
 }
-static void __init flow_cache_cpu_prepare(int cpu)
+static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
+                                          struct flow_cache_percpu *fcp)
 {
-        struct tasklet_struct *tasklet;
+        fcp->hash_table = (struct hlist_head *)
-        unsigned long order;
+                __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
+        if (!fcp->hash_table)
-        for (order = 0;
+                panic("NET: failed to allocate flow cache order %lu\n", fc->order);
-             (PAGE_SIZE << order) <
-                     (sizeof(struct flow_cache_entry *)*flow_hash_size);
+        fcp->hash_rnd_recalc = 1;
-             order++)
+        fcp->hash_count = 0;
-                /* NOTHING */;
+        tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
-        flow_table(cpu) = (struct flow_cache_entry **)
-                __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
-        if (!flow_table(cpu))
-                panic("NET: failed to allocate flow cache order %lu\n", order);
-        flow_hash_rnd_recalc(cpu) = 1;
-        flow_count(cpu) = 0;
-        tasklet = flow_flush_tasklet(cpu);
-        tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
 }
 static int flow_cache_cpu(struct notifier_block *nfb,
                          unsigned long action,
                          void *hcpu)
 {
+        struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
+        int cpu = (unsigned long) hcpu;
+        struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
        if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
-                __flow_cache_shrink((unsigned long)hcpu, 0);
+                __flow_cache_shrink(fc, fcp, 0);
        return NOTIFY_OK;
 }
-static int __init flow_cache_init(void)
+static int flow_cache_init(struct flow_cache *fc)
 {
+        unsigned long order;
        int i;
-        flow_cachep = kmem_cache_create("flow_cache",
+        fc->hash_shift = 10;
-                                        sizeof(struct flow_cache_entry),
+        fc->low_watermark = 2 * flow_cache_hash_size(fc);
-                                        0, SLAB_PANIC,
+        fc->high_watermark = 4 * flow_cache_hash_size(fc);
-                                        NULL);
-        flow_hash_shift = 10;
+        for (order = 0;
-        flow_lwm = 2 * flow_hash_size;
+             (PAGE_SIZE << order) <
-        flow_hwm = 4 * flow_hash_size;
+                     (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
+             order++)
+                /* NOTHING */;
+        fc->order = order;
+        fc->percpu = alloc_percpu(struct flow_cache_percpu);
-        setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
+        setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
-        flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+                    (unsigned long) fc);
-        add_timer(&flow_hash_rnd_timer);
+        fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+        add_timer(&fc->rnd_timer);
        for_each_possible_cpu(i)
-                flow_cache_cpu_prepare(i);
+                flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
+        fc->hotcpu_notifier = (struct notifier_block){
+                .notifier_call = flow_cache_cpu,
+        };
+        register_hotcpu_notifier(&fc->hotcpu_notifier);
-        hotcpu_notifier(flow_cache_cpu, 0);
        return 0;
 }
-module_init(flow_cache_init);
+static int __init flow_cache_init_global(void)
+{
+        flow_cachep = kmem_cache_create("flow_cache",
+                                        sizeof(struct flow_cache_entry),
+                                        0, SLAB_PANIC, NULL);
+        return flow_cache_init(&flow_cache_global);
+}
+module_init(flow_cache_init_global);
 EXPORT_SYMBOL(flow_cache_genid);
 EXPORT_SYMBOL(flow_cache_lookup);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 59cfc7d8fc45..99e7052d7323 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -14,9 +14,12 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/slab.h>
+#include <linux/nsproxy.h>
 #include <net/sock.h>
+#include <net/net_namespace.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
+#include <linux/vmalloc.h>
 #include <net/wext.h>
 #include "net-sysfs.h"
@@ -466,18 +469,345 @@ static struct attribute_group wireless_group = {
        .attrs = wireless_attrs,
 };
 #endif
 #endif /* CONFIG_SYSFS */
+#ifdef CONFIG_RPS
+/*
+ * RX queue sysfs structures and functions.
+ */
+struct rx_queue_attribute {
+        struct attribute attr;
+        ssize_t (*show)(struct netdev_rx_queue *queue,
+            struct rx_queue_attribute *attr, char *buf);
+        ssize_t (*store)(struct netdev_rx_queue *queue,
+            struct rx_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_rx_queue_attr(_attr) container_of(_attr,             \
+    struct rx_queue_attribute, attr)
+#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
+static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
+                                  char *buf)
+{
+        struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+        struct netdev_rx_queue *queue = to_rx_queue(kobj);
+        if (!attribute->show)
+                return -EIO;
+        return attribute->show(queue, attribute, buf);
+}
+static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
+                                   const char *buf, size_t count)
+{
+        struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+        struct netdev_rx_queue *queue = to_rx_queue(kobj);
+        if (!attribute->store)
+                return -EIO;
+        return attribute->store(queue, attribute, buf, count);
+}
+static struct sysfs_ops rx_queue_sysfs_ops = {
+        .show = rx_queue_attr_show,
+        .store = rx_queue_attr_store,
+};
+static ssize_t show_rps_map(struct netdev_rx_queue *queue,
+                            struct rx_queue_attribute *attribute, char *buf)
+{
+        struct rps_map *map;
+        cpumask_var_t mask;
+        size_t len = 0;
+        int i;
+        if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+                return -ENOMEM;
+        rcu_read_lock();
+        map = rcu_dereference(queue->rps_map);
+        if (map)
+                for (i = 0; i < map->len; i++)
+                        cpumask_set_cpu(map->cpus[i], mask);
+        len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+        if (PAGE_SIZE - len < 3) {
+                rcu_read_unlock();
+                free_cpumask_var(mask);
+                return -EINVAL;
+        }
+        rcu_read_unlock();
+        free_cpumask_var(mask);
+        len += sprintf(buf + len, "\n");
+        return len;
+}
+static void rps_map_release(struct rcu_head *rcu)
+{
+        struct rps_map *map = container_of(rcu, struct rps_map, rcu);
+        kfree(map);
+}
+static ssize_t store_rps_map(struct netdev_rx_queue *queue,
+                      struct rx_queue_attribute *attribute,
+                      const char *buf, size_t len)
+{
+        struct rps_map *old_map, *map;
+        cpumask_var_t mask;
+        int err, cpu, i;
+        static DEFINE_SPINLOCK(rps_map_lock);
+        if (!capable(CAP_NET_ADMIN))
+                return -EPERM;
+        if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+                return -ENOMEM;
+        err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+        if (err) {
+                free_cpumask_var(mask);
+                return err;
+        }
+        map = kzalloc(max_t(unsigned,
+            RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
+            GFP_KERNEL);
+        if (!map) {
+                free_cpumask_var(mask);
+                return -ENOMEM;
+        }
+        i = 0;
+        for_each_cpu_and(cpu, mask, cpu_online_mask)
+                map->cpus[i++] = cpu;
+        if (i)
+                map->len = i;
+        else {
+                kfree(map);
+                map = NULL;
+        }
+        spin_lock(&rps_map_lock);
+        old_map = queue->rps_map;
+        rcu_assign_pointer(queue->rps_map, map);
+        spin_unlock(&rps_map_lock);
+        if (old_map)
+                call_rcu(&old_map->rcu, rps_map_release);
+        free_cpumask_var(mask);
+        return len;
+}
+static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+                                           struct rx_queue_attribute *attr,
+                                           char *buf)
+{
+        struct rps_dev_flow_table *flow_table;
+        unsigned int val = 0;
+        rcu_read_lock();
+        flow_table = rcu_dereference(queue->rps_flow_table);
+        if (flow_table)
+                val = flow_table->mask + 1;
+        rcu_read_unlock();
+        return sprintf(buf, "%u\n", val);
+}
+static void rps_dev_flow_table_release_work(struct work_struct *work)
+{
+        struct rps_dev_flow_table *table = container_of(work,
+            struct rps_dev_flow_table, free_work);
+        vfree(table);
+}
+static void rps_dev_flow_table_release(struct rcu_head *rcu)
+{
+        struct rps_dev_flow_table *table = container_of(rcu,
+            struct rps_dev_flow_table, rcu);
+        INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
+        schedule_work(&table->free_work);
+}
+static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+                                     struct rx_queue_attribute *attr,
+                                     const char *buf, size_t len)
+{
+        unsigned int count;
+        char *endp;
+        struct rps_dev_flow_table *table, *old_table;
+        static DEFINE_SPINLOCK(rps_dev_flow_lock);
+        if (!capable(CAP_NET_ADMIN))
+                return -EPERM;
+        count = simple_strtoul(buf, &endp, 0);
+        if (endp == buf)
+                return -EINVAL;
+        if (count) {
+                int i;
+                if (count > 1<<30) {
+                        /* Enforce a limit to prevent overflow */
+                        return -EINVAL;
+                }
+                count = roundup_pow_of_two(count);
+                table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
+                if (!table)
+                        return -ENOMEM;
+                table->mask = count - 1;
+                for (i = 0; i < count; i++)
+                        table->flows[i].cpu = RPS_NO_CPU;
+        } else
+                table = NULL;
+        spin_lock(&rps_dev_flow_lock);
+        old_table = queue->rps_flow_table;
+        rcu_assign_pointer(queue->rps_flow_table, table);
+        spin_unlock(&rps_dev_flow_lock);
+        if (old_table)
+                call_rcu(&old_table->rcu, rps_dev_flow_table_release);
+        return len;
+}
+static struct rx_queue_attribute rps_cpus_attribute =
+        __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
+        __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+            show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
+static struct attribute *rx_queue_default_attrs[] = {
+        &rps_cpus_attribute.attr,
+        &rps_dev_flow_table_cnt_attribute.attr,
+        NULL
+};
+static void rx_queue_release(struct kobject *kobj)
+{
+        struct netdev_rx_queue *queue = to_rx_queue(kobj);
+        struct netdev_rx_queue *first = queue->first;
+        if (queue->rps_map)
+                call_rcu(&queue->rps_map->rcu, rps_map_release);
+        if (queue->rps_flow_table)
+                call_rcu(&queue->rps_flow_table->rcu,
+                    rps_dev_flow_table_release);
+        if (atomic_dec_and_test(&first->count))
+                kfree(first);
+}
+static struct kobj_type rx_queue_ktype = {
+        .sysfs_ops = &rx_queue_sysfs_ops,
+        .release = rx_queue_release,
+        .default_attrs = rx_queue_default_attrs,
+};
+static int rx_queue_add_kobject(struct net_device *net, int index)
+{
+        struct netdev_rx_queue *queue = net->_rx + index;
+        struct kobject *kobj = &queue->kobj;
+        int error = 0;
+        kobj->kset = net->queues_kset;
+        error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
+            "rx-%u", index);
+        if (error) {
+                kobject_put(kobj);
+                return error;
+        }
+        kobject_uevent(kobj, KOBJ_ADD);
+        return error;
+}
+static int rx_queue_register_kobjects(struct net_device *net)
+{
+        int i;
+        int error = 0;
+        net->queues_kset = kset_create_and_add("queues",
+            NULL, &net->dev.kobj);
+        if (!net->queues_kset)
+                return -ENOMEM;
+        for (i = 0; i < net->num_rx_queues; i++) {
+                error = rx_queue_add_kobject(net, i);
+                if (error)
+                        break;
+        }
+        if (error)
+                while (--i >= 0)
+                        kobject_put(&net->_rx[i].kobj);
+        return error;
+}
+static void rx_queue_remove_kobjects(struct net_device *net)
+{
+        int i;
+        for (i = 0; i < net->num_rx_queues; i++)
+                kobject_put(&net->_rx[i].kobj);
+        kset_unregister(net->queues_kset);
+}
+#endif /* CONFIG_RPS */
+static const void *net_current_ns(void)
+{
+        return current->nsproxy->net_ns;
+}
+static const void *net_initial_ns(void)
+{
+        return &init_net;
+}
+static const void *net_netlink_ns(struct sock *sk)
+{
+        return sock_net(sk);
+}
+static struct kobj_ns_type_operations net_ns_type_operations = {
+        .type = KOBJ_NS_TYPE_NET,
+        .current_ns = net_current_ns,
+        .netlink_ns = net_netlink_ns,
+        .initial_ns = net_initial_ns,
+};
+static void net_kobj_ns_exit(struct net *net)
+{
+        kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
+}
+static struct pernet_operations kobj_net_ops = {
+        .exit = net_kobj_ns_exit,
+};
 #ifdef CONFIG_HOTPLUG
 static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
 {
        struct net_device *dev = to_net_dev(d);
        int retval;
-        if (!net_eq(dev_net(dev), &init_net))
-                return 0;
        /* pass interface to uevent. */
        retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
        if (retval)
@@ -507,6 +837,13 @@ static void netdev_release(struct device *d)
        kfree((char *)dev - dev->padded);
 }
+static const void *net_namespace(struct device *d)
+{
+        struct net_device *dev;
+        dev = container_of(d, struct net_device, dev);
+        return dev_net(dev);
+}
 static struct class net_class = {
        .name = "net",
        .dev_release = netdev_release,
@@ -516,6 +853,8 @@ static struct class net_class = {
 #ifdef CONFIG_HOTPLUG
        .dev_uevent = netdev_uevent,
 #endif
+        .ns_type = &net_ns_type_operations,
+        .namespace = net_namespace,
 };
 /* Delete sysfs entries but hold kobject reference until after all
@@ -527,8 +866,9 @@ void netdev_unregister_kobject(struct net_device * net)
        kobject_get(&dev->kobj);
-        if (!net_eq(dev_net(net), &init_net))
+#ifdef CONFIG_RPS
-                return;
+        rx_queue_remove_kobjects(net);
+#endif
        device_del(dev);
 }
@@ -538,7 +878,9 @@ int netdev_register_kobject(struct net_device *net)
 {
        struct device *dev = &(net->dev);
        const struct attribute_group **groups = net->sysfs_groups;
+        int error = 0;
+        device_initialize(dev);
        dev->class = &net_class;
        dev->platform_data = net;
        dev->groups = groups;
@@ -561,10 +903,19 @@ int netdev_register_kobject(struct net_device *net)
 #endif
 #endif /* CONFIG_SYSFS */
-        if (!net_eq(dev_net(net), &init_net))
+        error = device_add(dev);
-                return 0;
+        if (error)
+                return error;
+#ifdef CONFIG_RPS
+        error = rx_queue_register_kobjects(net);
+        if (error) {
+                device_del(dev);
+                return error;
+        }
+#endif
-        return device_add(dev);
+        return error;
 }
 int netdev_class_create_file(struct class_attribute *class_attr)
@@ -580,13 +931,9 @@ void netdev_class_remove_file(struct class_attribute *class_attr)
 EXPORT_SYMBOL(netdev_class_create_file);
 EXPORT_SYMBOL(netdev_class_remove_file);
-void netdev_initialize_kobject(struct net_device *net)
-{
-        struct device *device = &(net->dev);
-        device_initialize(device);
-}
 int netdev_kobject_init(void)
 {
+        kobj_ns_type_register(&net_ns_type_operations);
+        register_pernet_subsys(&kobj_net_ops);
        return class_register(&net_class);
 }
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 14e7524260b3..805555e8b187 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,5 +4,4 @@
 int netdev_kobject_init(void);
 int netdev_register_kobject(struct net_device *);
 void netdev_unregister_kobject(struct net_device *);
-void netdev_initialize_kobject(struct net_device *);
 #endif
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bd8c4712ea24..c988e685433a 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,6 +27,51 @@ EXPORT_SYMBOL(init_net);
 #define INITIAL_NET_GEN_PTRS    13 /* +1 for len +2 for rcu_head */
+static void net_generic_release(struct rcu_head *rcu)
+{
+        struct net_generic *ng;
+        ng = container_of(rcu, struct net_generic, rcu);
+        kfree(ng);
+}
+static int net_assign_generic(struct net *net, int id, void *data)
+{
+        struct net_generic *ng, *old_ng;
+        BUG_ON(!mutex_is_locked(&net_mutex));
+        BUG_ON(id == 0);
+        ng = old_ng = net->gen;
+        if (old_ng->len >= id)
+                goto assign;
+        ng = kzalloc(sizeof(struct net_generic) +
+                        id * sizeof(void *), GFP_KERNEL);
+        if (ng == NULL)
+                return -ENOMEM;
+        /*
+         * Some synchronisation notes:
+         *
+         * The net_generic explores the net->gen array inside rcu
+         * read section. Besides once set the net->gen->ptr[x]
+         * pointer never changes (see rules in netns/generic.h).
+         *
+         * That said, we simply duplicate this array and schedule
+         * the old copy for kfree after a grace period.
+         */
+        ng->len = id;
+        memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
+        rcu_assign_pointer(net->gen, ng);
+        call_rcu(&old_ng->rcu, net_generic_release);
+assign:
+        ng->ptr[id - 1] = data;
+        return 0;
+}
 static int ops_init(const struct pernet_operations *ops, struct net *net)
 {
        int err;
@@ -469,10 +514,10 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
 *      addition run the exit method for all existing network
 *      namespaces.
 */
-void unregister_pernet_subsys(struct pernet_operations *module)
+void unregister_pernet_subsys(struct pernet_operations *ops)
 {
        mutex_lock(&net_mutex);
-        unregister_pernet_operations(module);
+        unregister_pernet_operations(ops);
        mutex_unlock(&net_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
@@ -526,49 +571,3 @@ void unregister_pernet_device(struct pernet_operations *ops)
        mutex_unlock(&net_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_device);
-static void net_generic_release(struct rcu_head *rcu)
-{
-        struct net_generic *ng;
-        ng = container_of(rcu, struct net_generic, rcu);
-        kfree(ng);
-}
-int net_assign_generic(struct net *net, int id, void *data)
-{
-        struct net_generic *ng, *old_ng;
-        BUG_ON(!mutex_is_locked(&net_mutex));
-        BUG_ON(id == 0);
-        ng = old_ng = net->gen;
-        if (old_ng->len >= id)
-                goto assign;
-        ng = kzalloc(sizeof(struct net_generic) +
-                        id * sizeof(void *), GFP_KERNEL);
-        if (ng == NULL)
-                return -ENOMEM;
-        /*
-         * Some synchronisation notes:
-         *
-         * The net_generic explores the net->gen array inside rcu
-         * read section. Besides once set the net->gen->ptr[x]
-         * pointer never changes (see rules in netns/generic.h).
-         *
-         * That said, we simply duplicate this array and schedule
-         * the old copy for kfree after a grace period.
-         */
-        ng->len = id;
-        memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
-        rcu_assign_pointer(net->gen, ng);
-        call_rcu(&old_ng->rcu, net_generic_release);
-assign:
-        ng->ptr[id - 1] = data;
-        return 0;
-}
-EXPORT_SYMBOL_GPL(net_assign_generic);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a58f59b97597..94825b109551 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -179,9 +179,8 @@ static void service_arp_queue(struct netpoll_info *npi)
        }
 }
-void netpoll_poll(struct netpoll *np)
+void netpoll_poll_dev(struct net_device *dev)
 {
-        struct net_device *dev = np->dev;
        const struct net_device_ops *ops;
        if (!dev || !netif_running(dev))
@@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np)
        zap_completion_queue();
 }
+void netpoll_poll(struct netpoll *np)
+{
+        netpoll_poll_dev(np->dev);
+}
 static void refill_skbs(void)
 {
        struct sk_buff *skb;
@@ -282,7 +286,7 @@ static int netpoll_owner_active(struct net_device *dev)
        return 0;
 }
-static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
        int status = NETDEV_TX_BUSY;
        unsigned long tries;
@@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
                     tries > 0; --tries) {
                        if (__netif_tx_trylock(txq)) {
                                if (!netif_tx_queue_stopped(txq)) {
+                                        dev->priv_flags |= IFF_IN_NETPOLL;
                                        status = ops->ndo_start_xmit(skb, dev);
+                                        dev->priv_flags &= ~IFF_IN_NETPOLL;
                                        if (status == NETDEV_TX_OK)
                                                txq_trans_update(txq);
                                }
@@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np)
                atomic_inc(&npinfo->refcnt);
        }
-        if (!ndev->netdev_ops->ndo_poll_controller) {
+        npinfo->netpoll = np;
+        if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+            !ndev->netdev_ops->ndo_poll_controller) {
                printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
                       np->name, np->dev_name);
                err = -ENOTSUPP;
@@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np)
                        }
                        if (atomic_dec_and_test(&npinfo->refcnt)) {
+                                const struct net_device_ops *ops;
                                skb_queue_purge(&npinfo->arp_tx);
                                skb_queue_purge(&npinfo->txq);
                                cancel_rearming_delayed_work(&npinfo->tx_work);
@@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np)
                                /* clean after last, unfinished work */
                                __skb_queue_purge(&npinfo->txq);
                                kfree(npinfo);
-                                np->dev->npinfo = NULL;
+                                ops = np->dev->netdev_ops;
+                                if (ops->ndo_netpoll_cleanup)
+                                        ops->ndo_netpoll_cleanup(np->dev);
+                                else
+                                        np->dev->npinfo = NULL;
                        }
                }
@@ -908,6 +922,7 @@ void netpoll_set_trap(int trap)
                atomic_dec(&trapped);
 }
+EXPORT_SYMBOL(netpoll_send_skb);
 EXPORT_SYMBOL(netpoll_set_trap);
 EXPORT_SYMBOL(netpoll_trap);
 EXPORT_SYMBOL(netpoll_print_options);
@@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
 EXPORT_SYMBOL(netpoll_setup);
 EXPORT_SYMBOL(netpoll_cleanup);
 EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll_dev);
 EXPORT_SYMBOL(netpoll_poll);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 43923811bd6a..2ad68da418df 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -169,7 +169,7 @@
 #include <asm/dma.h>
 #include <asm/div64.h>          /* do_div */
-#define VERSION         "2.72"
+#define VERSION         "2.73"
 #define IP_NAME_SZ 32
 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
 #define MPLS_STACK_BOTTOM htonl(0x00000100)
@@ -190,6 +190,7 @@
 #define F_IPSEC_ON    (1<<12)   /* ipsec on for flows */
 #define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
 #define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
+#define F_NODE          (1<<15) /* Node memory alloc*/
 /* Thread control flag bits */
 #define T_STOP        (1<<0)    /* Stop run */
@@ -372,6 +373,7 @@ struct pktgen_dev {
        u16 queue_map_min;
        u16 queue_map_max;
+        int node;               /* Memory node */
 #ifdef CONFIG_XFRM
        __u8    ipsmode;                /* IPSEC mode (config) */
@@ -607,6 +609,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
        if (pkt_dev->traffic_class)
                seq_printf(seq, "     traffic_class: 0x%02x\n", pkt_dev->traffic_class);
+        if (pkt_dev->node >= 0)
+                seq_printf(seq, "     node: %d\n", pkt_dev->node);
        seq_printf(seq, "     Flags: ");
        if (pkt_dev->flags & F_IPV6)
@@ -660,6 +665,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
        if (pkt_dev->flags & F_SVID_RND)
                seq_printf(seq, "SVID_RND  ");
+        if (pkt_dev->flags & F_NODE)
+                seq_printf(seq, "NODE_ALLOC  ");
        seq_puts(seq, "\n");
        /* not really stopped, more like last-running-at */
@@ -1074,6 +1082,21 @@ static ssize_t pktgen_if_write(struct file *file,
                        pkt_dev->dst_mac_count);
                return count;
        }
+        if (!strcmp(name, "node")) {
+                len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0)
+                        return len;
+                i += len;
+                if (node_possible(value)) {
+                        pkt_dev->node = value;
+                        sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+                }
+                else
+                        sprintf(pg_result, "ERROR: node not possible");
+                return count;
+        }
        if (!strcmp(name, "flag")) {
                char f[32];
                memset(f, 0, 32);
@@ -1166,12 +1189,18 @@ static ssize_t pktgen_if_write(struct file *file,
                else if (strcmp(f, "!IPV6") == 0)
                        pkt_dev->flags &= ~F_IPV6;
+                else if (strcmp(f, "NODE_ALLOC") == 0)
+                        pkt_dev->flags |= F_NODE;
+                else if (strcmp(f, "!NODE_ALLOC") == 0)
+                        pkt_dev->flags &= ~F_NODE;
                else {
                        sprintf(pg_result,
                                "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
                                f,
                                "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
-                                "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n");
+                                "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
                        return count;
                }
                sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -2572,9 +2601,27 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
        mod_cur_headers(pkt_dev);
        datalen = (odev->hard_header_len + 16) & ~0xf;
-        skb = __netdev_alloc_skb(odev,
-                                 pkt_dev->cur_pkt_size + 64
+        if (pkt_dev->flags & F_NODE) {
-                                 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+                int node;
+                if (pkt_dev->node >= 0)
+                        node = pkt_dev->node;
+                else
+                        node =  numa_node_id();
+                skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64
+                                  + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node);
+                if (likely(skb)) {
+                        skb_reserve(skb, NET_SKB_PAD);
+                        skb->dev = odev;
+                }
+        }
+        else
+          skb = __netdev_alloc_skb(odev,
+                                   pkt_dev->cur_pkt_size + 64
+                                   + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
        if (!skb) {
                sprintf(pkt_dev->result, "No memory");
                return NULL;
@@ -3674,6 +3721,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
        pkt_dev->svlan_p = 0;
        pkt_dev->svlan_cfi = 0;
        pkt_dev->svlan_id = 0xffff;
+        pkt_dev->node = -1;
        err = pktgen_setup_dev(pkt_dev, ifname);
        if (err)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fe776c9ddeca..e4b9870e4706 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -98,7 +98,7 @@ int lockdep_rtnl_is_held(void)
 EXPORT_SYMBOL(lockdep_rtnl_is_held);
 #endif /* #ifdef CONFIG_PROVE_LOCKING */
-static struct rtnl_link *rtnl_msg_handlers[NPROTO];
+static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
 static inline int rtm_msgindex(int msgtype)
 {
@@ -118,7 +118,11 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
 {
        struct rtnl_link *tab;
-        tab = rtnl_msg_handlers[protocol];
+        if (protocol <= RTNL_FAMILY_MAX)
+                tab = rtnl_msg_handlers[protocol];
+        else
+                tab = NULL;
        if (tab == NULL || tab[msgindex].doit == NULL)
                tab = rtnl_msg_handlers[PF_UNSPEC];
@@ -129,7 +133,11 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
 {
        struct rtnl_link *tab;
-        tab = rtnl_msg_handlers[protocol];
+        if (protocol <= RTNL_FAMILY_MAX)
+                tab = rtnl_msg_handlers[protocol];
+        else
+                tab = NULL;
        if (tab == NULL || tab[msgindex].dumpit == NULL)
                tab = rtnl_msg_handlers[PF_UNSPEC];
@@ -159,7 +167,7 @@ int __rtnl_register(int protocol, int msgtype,
        struct rtnl_link *tab;
        int msgindex;
-        BUG_ON(protocol < 0 || protocol >= NPROTO);
+        BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
        msgindex = rtm_msgindex(msgtype);
        tab = rtnl_msg_handlers[protocol];
@@ -211,7 +219,7 @@ int rtnl_unregister(int protocol, int msgtype)
 {
        int msgindex;
-        BUG_ON(protocol < 0 || protocol >= NPROTO);
+        BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
        msgindex = rtm_msgindex(msgtype);
        if (rtnl_msg_handlers[protocol] == NULL)
@@ -233,7 +241,7 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
 */
 void rtnl_unregister_all(int protocol)
 {
-        BUG_ON(protocol < 0 || protocol >= NPROTO);
+        BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
        kfree(rtnl_msg_handlers[protocol]);
        rtnl_msg_handlers[protocol] = NULL;
@@ -600,17 +608,83 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
        a->rx_compressed = b->rx_compressed;
        a->tx_compressed = b->tx_compressed;
-};
+}
+static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b)
+{
+        struct rtnl_link_stats64 a;
+        a.rx_packets = b->rx_packets;
+        a.tx_packets = b->tx_packets;
+        a.rx_bytes = b->rx_bytes;
+        a.tx_bytes = b->tx_bytes;
+        a.rx_errors = b->rx_errors;
+        a.tx_errors = b->tx_errors;
+        a.rx_dropped = b->rx_dropped;
+        a.tx_dropped = b->tx_dropped;
+        a.multicast = b->multicast;
+        a.collisions = b->collisions;
+        a.rx_length_errors = b->rx_length_errors;
+        a.rx_over_errors = b->rx_over_errors;
+        a.rx_crc_errors = b->rx_crc_errors;
+        a.rx_frame_errors = b->rx_frame_errors;
+        a.rx_fifo_errors = b->rx_fifo_errors;
+        a.rx_missed_errors = b->rx_missed_errors;
+        a.tx_aborted_errors = b->tx_aborted_errors;
+        a.tx_carrier_errors = b->tx_carrier_errors;
+        a.tx_fifo_errors = b->tx_fifo_errors;
+        a.tx_heartbeat_errors = b->tx_heartbeat_errors;
+        a.tx_window_errors = b->tx_window_errors;
+        a.rx_compressed = b->rx_compressed;
+        a.tx_compressed = b->tx_compressed;
+        memcpy(v, &a, sizeof(a));
+}
+/* All VF info */
 static inline int rtnl_vfinfo_size(const struct net_device *dev)
 {
-        if (dev->dev.parent && dev_is_pci(dev->dev.parent))
+        if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
-                return dev_num_vf(dev->dev.parent) *
-                        sizeof(struct ifla_vf_info);
+                int num_vfs = dev_num_vf(dev->dev.parent);
-        else
+                size_t size = nlmsg_total_size(sizeof(struct nlattr));
+                size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
+                size += num_vfs * (sizeof(struct ifla_vf_mac) +
+                                  sizeof(struct ifla_vf_vlan) +
+                                  sizeof(struct ifla_vf_tx_rate));
+                return size;
+        } else
                return 0;
 }
+static size_t rtnl_port_size(const struct net_device *dev)
+{
+        size_t port_size = nla_total_size(4)            /* PORT_VF */
+                + nla_total_size(PORT_PROFILE_MAX)      /* PORT_PROFILE */
+                + nla_total_size(sizeof(struct ifla_port_vsi))
+                                                        /* PORT_VSI_TYPE */
+                + nla_total_size(PORT_UUID_MAX)         /* PORT_INSTANCE_UUID */
+                + nla_total_size(PORT_UUID_MAX)         /* PORT_HOST_UUID */
+                + nla_total_size(1)                     /* PROT_VDP_REQUEST */
+                + nla_total_size(2);                    /* PORT_VDP_RESPONSE */
+        size_t vf_ports_size = nla_total_size(sizeof(struct nlattr));
+        size_t vf_port_size = nla_total_size(sizeof(struct nlattr))
+                + port_size;
+        size_t port_self_size = nla_total_size(sizeof(struct nlattr))
+                + port_size;
+        if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+                return 0;
+        if (dev_num_vf(dev->dev.parent))
+                return port_self_size + vf_ports_size +
+                        vf_port_size * dev_num_vf(dev->dev.parent);
+        else
+                return port_self_size;
+}
 static inline size_t if_nlmsg_size(const struct net_device *dev)
 {
        return NLMSG_ALIGN(sizeof(struct ifinfomsg))
@@ -619,6 +693,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
               + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
               + nla_total_size(sizeof(struct rtnl_link_ifmap))
               + nla_total_size(sizeof(struct rtnl_link_stats))
+               + nla_total_size(sizeof(struct rtnl_link_stats64))
               + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
               + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
               + nla_total_size(4) /* IFLA_TXQLEN */
@@ -629,10 +704,83 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
               + nla_total_size(1) /* IFLA_OPERSTATE */
               + nla_total_size(1) /* IFLA_LINKMODE */
               + nla_total_size(4) /* IFLA_NUM_VF */
-               + nla_total_size(rtnl_vfinfo_size(dev)) /* IFLA_VFINFO */
+               + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
+               + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
               + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
 }
+static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
+{
+        struct nlattr *vf_ports;
+        struct nlattr *vf_port;
+        int vf;
+        int err;
+        vf_ports = nla_nest_start(skb, IFLA_VF_PORTS);
+        if (!vf_ports)
+                return -EMSGSIZE;
+        for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
+                vf_port = nla_nest_start(skb, IFLA_VF_PORT);
+                if (!vf_port) {
+                        nla_nest_cancel(skb, vf_ports);
+                        return -EMSGSIZE;
+                }
+                NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
+                err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
+                if (err) {
+nla_put_failure:
+                        nla_nest_cancel(skb, vf_port);
+                        continue;
+                }
+                nla_nest_end(skb, vf_port);
+        }
+        nla_nest_end(skb, vf_ports);
+        return 0;
+}
+static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
+{
+        struct nlattr *port_self;
+        int err;
+        port_self = nla_nest_start(skb, IFLA_PORT_SELF);
+        if (!port_self)
+                return -EMSGSIZE;
+        err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
+        if (err) {
+                nla_nest_cancel(skb, port_self);
+                return err;
+        }
+        nla_nest_end(skb, port_self);
+        return 0;
+}
+static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
+{
+        int err;
+        if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+                return 0;
+        err = rtnl_port_self_fill(skb, dev);
+        if (err)
+                return err;
+        if (dev_num_vf(dev->dev.parent)) {
+                err = rtnl_vf_ports_fill(skb, dev);
+                if (err)
+                        return err;
+        }
+        return 0;
+}
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
                            int type, u32 pid, u32 seq, u32 change,
                            unsigned int flags)
@@ -698,17 +846,52 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
        stats = dev_get_stats(dev);
        copy_rtnl_link_stats(nla_data(attr), stats);
+        attr = nla_reserve(skb, IFLA_STATS64,
+                        sizeof(struct rtnl_link_stats64));
+        if (attr == NULL)
+                goto nla_put_failure;
+        copy_rtnl_link_stats64(nla_data(attr), stats);
+        if (dev->dev.parent)
+                NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
        if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
                int i;
-                struct ifla_vf_info ivi;
-                NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
+                struct nlattr *vfinfo, *vf;
-                for (i = 0; i < dev_num_vf(dev->dev.parent); i++) {
+                int num_vfs = dev_num_vf(dev->dev.parent);
+                vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+                if (!vfinfo)
+                        goto nla_put_failure;
+                for (i = 0; i < num_vfs; i++) {
+                        struct ifla_vf_info ivi;
+                        struct ifla_vf_mac vf_mac;
+                        struct ifla_vf_vlan vf_vlan;
+                        struct ifla_vf_tx_rate vf_tx_rate;
                        if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
                                break;
-                        NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi);
+                        vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
+                        memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+                        vf_vlan.vlan = ivi.vlan;
+                        vf_vlan.qos = ivi.qos;
+                        vf_tx_rate.rate = ivi.tx_rate;
+                        vf = nla_nest_start(skb, IFLA_VF_INFO);
+                        if (!vf) {
+                                nla_nest_cancel(skb, vfinfo);
+                                goto nla_put_failure;
+                        }
+                        NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
+                        NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
+                        NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
+                        nla_nest_end(skb, vf);
                }
+                nla_nest_end(skb, vfinfo);
        }
+        if (rtnl_port_fill(skb, dev))
+                goto nla_put_failure;
        if (dev->rtnl_link_ops) {
                if (rtnl_link_fill(skb, dev) < 0)
                        goto nla_put_failure;
@@ -769,6 +952,22 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
        [IFLA_LINKINFO]         = { .type = NLA_NESTED },
        [IFLA_NET_NS_PID]       = { .type = NLA_U32 },
        [IFLA_IFALIAS]          = { .type = NLA_STRING, .len = IFALIASZ-1 },
+        [IFLA_VFINFO_LIST]      = {. type = NLA_NESTED },
+        [IFLA_VF_PORTS]         = { .type = NLA_NESTED },
+        [IFLA_PORT_SELF]        = { .type = NLA_NESTED },
+};
+EXPORT_SYMBOL(ifla_policy);
+static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
+        [IFLA_INFO_KIND]        = { .type = NLA_STRING },
+        [IFLA_INFO_DATA]        = { .type = NLA_NESTED },
+};
+static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
+        [IFLA_VF_INFO]          = { .type = NLA_NESTED },
+};
+static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
        [IFLA_VF_MAC]           = { .type = NLA_BINARY,
                                    .len = sizeof(struct ifla_vf_mac) },
        [IFLA_VF_VLAN]          = { .type = NLA_BINARY,
@@ -776,11 +975,19 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
        [IFLA_VF_TX_RATE]       = { .type = NLA_BINARY,
                                    .len = sizeof(struct ifla_vf_tx_rate) },
 };
-EXPORT_SYMBOL(ifla_policy);
-static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
+static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
-        [IFLA_INFO_KIND]        = { .type = NLA_STRING },
+        [IFLA_PORT_VF]          = { .type = NLA_U32 },
-        [IFLA_INFO_DATA]        = { .type = NLA_NESTED },
+        [IFLA_PORT_PROFILE]     = { .type = NLA_STRING,
+                                    .len = PORT_PROFILE_MAX },
+        [IFLA_PORT_VSI_TYPE]    = { .type = NLA_BINARY,
+                                    .len = sizeof(struct ifla_port_vsi)},
+        [IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY,
+                                      .len = PORT_UUID_MAX },
+        [IFLA_PORT_HOST_UUID]   = { .type = NLA_STRING,
+                                    .len = PORT_UUID_MAX },
+        [IFLA_PORT_REQUEST]     = { .type = NLA_U8, },
+        [IFLA_PORT_RESPONSE]    = { .type = NLA_U16, },
 };
 struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
@@ -812,6 +1019,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
        return 0;
 }
+static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
+{
+        int rem, err = -EINVAL;
+        struct nlattr *vf;
+        const struct net_device_ops *ops = dev->netdev_ops;
+        nla_for_each_nested(vf, attr, rem) {
+                switch (nla_type(vf)) {
+                case IFLA_VF_MAC: {
+                        struct ifla_vf_mac *ivm;
+                        ivm = nla_data(vf);
+                        err = -EOPNOTSUPP;
+                        if (ops->ndo_set_vf_mac)
+                                err = ops->ndo_set_vf_mac(dev, ivm->vf,
+                                                          ivm->mac);
+                        break;
+                }
+                case IFLA_VF_VLAN: {
+                        struct ifla_vf_vlan *ivv;
+                        ivv = nla_data(vf);
+                        err = -EOPNOTSUPP;
+                        if (ops->ndo_set_vf_vlan)
+                                err = ops->ndo_set_vf_vlan(dev, ivv->vf,
+                                                           ivv->vlan,
+                                                           ivv->qos);
+                        break;
+                }
+                case IFLA_VF_TX_RATE: {
+                        struct ifla_vf_tx_rate *ivt;
+                        ivt = nla_data(vf);
+                        err = -EOPNOTSUPP;
+                        if (ops->ndo_set_vf_tx_rate)
+                                err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
+                                                              ivt->rate);
+                        break;
+                }
+                default:
+                        err = -EINVAL;
+                        break;
+                }
+                if (err)
+                        break;
+        }
+        return err;
+}
 static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
                      struct nlattr **tb, char *ifname, int modified)
 {
@@ -942,37 +1195,61 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
                write_unlock_bh(&dev_base_lock);
        }
-        if (tb[IFLA_VF_MAC]) {
+        if (tb[IFLA_VFINFO_LIST]) {
-                struct ifla_vf_mac *ivm;
+                struct nlattr *attr;
-                ivm = nla_data(tb[IFLA_VF_MAC]);
+                int rem;
-                err = -EOPNOTSUPP;
+                nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
-                if (ops->ndo_set_vf_mac)
+                        if (nla_type(attr) != IFLA_VF_INFO)
-                        err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac);
+                                goto errout;
-                if (err < 0)
+                        err = do_setvfinfo(dev, attr);
-                        goto errout;
+                        if (err < 0)
-                modified = 1;
+                                goto errout;
+                        modified = 1;
+                }
        }
+        err = 0;
+        if (tb[IFLA_VF_PORTS]) {
+                struct nlattr *port[IFLA_PORT_MAX+1];
+                struct nlattr *attr;
+                int vf;
+                int rem;
-        if (tb[IFLA_VF_VLAN]) {
-                struct ifla_vf_vlan *ivv;
-                ivv = nla_data(tb[IFLA_VF_VLAN]);
                err = -EOPNOTSUPP;
-                if (ops->ndo_set_vf_vlan)
+                if (!ops->ndo_set_vf_port)
-                        err = ops->ndo_set_vf_vlan(dev, ivv->vf,
-                                                   ivv->vlan,
-                                                   ivv->qos);
-                if (err < 0)
                        goto errout;
-                modified = 1;
+                nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) {
+                        if (nla_type(attr) != IFLA_VF_PORT)
+                                continue;
+                        err = nla_parse_nested(port, IFLA_PORT_MAX,
+                                attr, ifla_port_policy);
+                        if (err < 0)
+                                goto errout;
+                        if (!port[IFLA_PORT_VF]) {
+                                err = -EOPNOTSUPP;
+                                goto errout;
+                        }
+                        vf = nla_get_u32(port[IFLA_PORT_VF]);
+                        err = ops->ndo_set_vf_port(dev, vf, port);
+                        if (err < 0)
+                                goto errout;
+                        modified = 1;
+                }
        }
        err = 0;
-        if (tb[IFLA_VF_TX_RATE]) {
+        if (tb[IFLA_PORT_SELF]) {
-                struct ifla_vf_tx_rate *ivt;
+                struct nlattr *port[IFLA_PORT_MAX+1];
-                ivt = nla_data(tb[IFLA_VF_TX_RATE]);
+                err = nla_parse_nested(port, IFLA_PORT_MAX,
+                        tb[IFLA_PORT_SELF], ifla_port_policy);
+                if (err < 0)
+                        goto errout;
                err = -EOPNOTSUPP;
-                if (ops->ndo_set_vf_tx_rate)
+                if (ops->ndo_set_vf_port)
-                        err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ivt->rate);
+                        err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port);
                if (err < 0)
                        goto errout;
                modified = 1;
@@ -1336,7 +1613,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
        if (s_idx == 0)
                s_idx = 1;
-        for (idx = 1; idx < NPROTO; idx++) {
+        for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
                int type = cb->nlh->nlmsg_type-RTM_BASE;
                if (idx < s_idx || idx == PF_PACKET)
                        continue;
@@ -1404,9 +1681,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                return 0;
        family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family;
-        if (family >= NPROTO)
-                return -EAFNOSUPPORT;
        sz_idx = type>>2;
        kind = type&3;
@@ -1474,6 +1748,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
        case NETDEV_POST_INIT:
        case NETDEV_REGISTER:
        case NETDEV_CHANGE:
+        case NETDEV_PRE_TYPE_CHANGE:
        case NETDEV_GOING_DOWN:
        case NETDEV_UNREGISTER:
        case NETDEV_UNREGISTER_BATCH:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 931981774b1a..66d9c416851e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -117,7 +117,7 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = {
 *
 *      Out of line support code for skb_put(). Not user callable.
 */
-void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 {
        printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
                          "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -126,7 +126,6 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
               skb->dev ? skb->dev->name : "<NULL>");
        BUG();
 }
-EXPORT_SYMBOL(skb_over_panic);
 /**
 *      skb_under_panic -       private function
@@ -137,7 +136,7 @@ EXPORT_SYMBOL(skb_over_panic);
 *      Out of line support code for skb_push(). Not user callable.
 */
-void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 {
        printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
                          "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -146,7 +145,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
               skb->dev ? skb->dev->name : "<NULL>");
        BUG();
 }
-EXPORT_SYMBOL(skb_under_panic);
 /*      Allocate a new skbuff. We do this ourselves so we can fill in a few
 *      'private' fields and also do memory statistics to find all the
@@ -183,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
        skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
        if (!skb)
                goto out;
+        prefetchw(skb);
        size = SKB_DATA_ALIGN(size);
        data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
                        gfp_mask, node);
        if (!data)
                goto nodata;
+        prefetchw(data + size);
        /*
         * Only clear those fields we need to clear, not those that we will
@@ -210,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
        /* make sure we initialize shinfo sequentially */
        shinfo = skb_shinfo(skb);
+        memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
        atomic_set(&shinfo->dataref, 1);
-        shinfo->nr_frags  = 0;
-        shinfo->gso_size = 0;
-        shinfo->gso_segs = 0;
-        shinfo->gso_type = 0;
-        shinfo->ip6_frag_id = 0;
-        shinfo->tx_flags.flags = 0;
-        skb_frag_list_init(skb);
-        memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
        if (fclone) {
                struct sk_buff *child = skb + 1;
@@ -507,16 +500,10 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
                return 0;
        skb_release_head_state(skb);
        shinfo = skb_shinfo(skb);
+        memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
        atomic_set(&shinfo->dataref, 1);
-        shinfo->nr_frags = 0;
-        shinfo->gso_size = 0;
-        shinfo->gso_segs = 0;
-        shinfo->gso_type = 0;
-        shinfo->ip6_frag_id = 0;
-        shinfo->tx_flags.flags = 0;
-        skb_frag_list_init(skb);
-        memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
        memset(skb, 0, offsetof(struct sk_buff, tail));
        skb->data = skb->head + NET_SKB_PAD;
@@ -533,7 +520,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
        new->transport_header   = old->transport_header;
        new->network_header     = old->network_header;
        new->mac_header         = old->mac_header;
-        skb_dst_set(new, dst_clone(skb_dst(old)));
+        skb_dst_copy(new, old);
+        new->rxhash             = old->rxhash;
 #ifdef CONFIG_XFRM
        new->sp                 = secpath_get(old->sp);
 #endif
@@ -581,6 +569,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
        C(len);
        C(data_len);
        C(mac_len);
+        C(rxhash);
        n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
        n->cloned = 1;
        n->nohdr = 0;
@@ -1051,7 +1040,7 @@ EXPORT_SYMBOL(skb_push);
 */
 unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
 {
-        return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
+        return skb_pull_inline(skb, len);
 }
 EXPORT_SYMBOL(skb_pull);
diff --git a/net/core/sock.c b/net/core/sock.c
index c5812bbc2cc9..bf88a167c8f2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -307,6 +307,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
         */
        skb_len = skb->len;
+        /* we escape from rcu protected region, make sure we dont leak
+         * a norefcounted dst
+         */
+        skb_dst_force(skb);
        spin_lock_irqsave(&list->lock, flags);
        skb->dropcount = atomic_read(&sk->sk_drops);
        __skb_queue_tail(list, skb);
@@ -327,6 +332,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
        skb->dev = NULL;
+        if (sk_rcvqueues_full(sk, skb)) {
+                atomic_inc(&sk->sk_drops);
+                goto discard_and_relse;
+        }
        if (nested)
                bh_lock_sock_nested(sk);
        else
@@ -364,11 +373,11 @@ EXPORT_SYMBOL(sk_reset_txq);
 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
 {
-        struct dst_entry *dst = sk->sk_dst_cache;
+        struct dst_entry *dst = __sk_dst_get(sk);
        if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
                sk_tx_queue_clear(sk);
-                sk->sk_dst_cache = NULL;
+                rcu_assign_pointer(sk->sk_dst_cache, NULL);
                dst_release(dst);
                return NULL;
        }
@@ -1157,7 +1166,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                skb_queue_head_init(&newsk->sk_async_wait_queue);
 #endif
-                rwlock_init(&newsk->sk_dst_lock);
+                spin_lock_init(&newsk->sk_dst_lock);
                rwlock_init(&newsk->sk_callback_lock);
                lockdep_set_class_and_name(&newsk->sk_callback_lock,
                                af_callback_keys + newsk->sk_family,
@@ -1207,7 +1216,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                 */
                sk_refcnt_debug_inc(newsk);
                sk_set_socket(newsk, NULL);
-                newsk->sk_sleep  = NULL;
+                newsk->sk_wq = NULL;
                if (newsk->sk_prot->sockets_allocated)
                        percpu_counter_inc(newsk->sk_prot->sockets_allocated);
@@ -1227,6 +1236,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
        sk->sk_route_caps = dst->dev->features;
        if (sk->sk_route_caps & NETIF_F_GSO)
                sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
+        sk->sk_route_caps &= ~sk->sk_route_nocaps;
        if (sk_can_gso(sk)) {
                if (dst->header_len) {
                        sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
@@ -1395,7 +1405,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
                if (signal_pending(current))
                        break;
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-                prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
                        break;
                if (sk->sk_shutdown & SEND_SHUTDOWN)
@@ -1404,7 +1414,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
                        break;
                timeo = schedule_timeout(timeo);
        }
-        finish_wait(sk->sk_sleep, &wait);
+        finish_wait(sk_sleep(sk), &wait);
        return timeo;
 }
@@ -1531,6 +1541,7 @@ static void __release_sock(struct sock *sk)
                do {
                        struct sk_buff *next = skb->next;
+                        WARN_ON_ONCE(skb_dst_is_noref(skb));
                        skb->next = NULL;
                        sk_backlog_rcv(sk, skb);
@@ -1570,11 +1581,11 @@ int sk_wait_data(struct sock *sk, long *timeo)
        int rc;
        DEFINE_WAIT(wait);
-        prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+        prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
        set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
        rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
        clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-        finish_wait(sk->sk_sleep, &wait);
+        finish_wait(sk_sleep(sk), &wait);
        return rc;
 }
 EXPORT_SYMBOL(sk_wait_data);
@@ -1796,41 +1807,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
 static void sock_def_wakeup(struct sock *sk)
 {
-        read_lock(&sk->sk_callback_lock);
+        struct socket_wq *wq;
-        if (sk_has_sleeper(sk))
-                wake_up_interruptible_all(sk->sk_sleep);
+        rcu_read_lock();
-        read_unlock(&sk->sk_callback_lock);
+        wq = rcu_dereference(sk->sk_wq);
+        if (wq_has_sleeper(wq))
+                wake_up_interruptible_all(&wq->wait);
+        rcu_read_unlock();
 }
 static void sock_def_error_report(struct sock *sk)
 {
-        read_lock(&sk->sk_callback_lock);
+        struct socket_wq *wq;
-        if (sk_has_sleeper(sk))
-                wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
+        rcu_read_lock();
+        wq = rcu_dereference(sk->sk_wq);
+        if (wq_has_sleeper(wq))
+                wake_up_interruptible_poll(&wq->wait, POLLERR);
        sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
-        read_unlock(&sk->sk_callback_lock);
+        rcu_read_unlock();
 }
 static void sock_def_readable(struct sock *sk, int len)
 {
-        read_lock(&sk->sk_callback_lock);
+        struct socket_wq *wq;
-        if (sk_has_sleeper(sk))
-                wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
+        rcu_read_lock();
+        wq = rcu_dereference(sk->sk_wq);
+        if (wq_has_sleeper(wq))
+                wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
                                                POLLRDNORM | POLLRDBAND);
        sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-        read_unlock(&sk->sk_callback_lock);
+        rcu_read_unlock();
 }
 static void sock_def_write_space(struct sock *sk)
 {
-        read_lock(&sk->sk_callback_lock);
+        struct socket_wq *wq;
+        rcu_read_lock();
        /* Do not wake up a writer until he can make "significant"
         * progress.  --DaveM
         */
        if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
-                if (sk_has_sleeper(sk))
+                wq = rcu_dereference(sk->sk_wq);
-                        wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+                if (wq_has_sleeper(wq))
+                        wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
                                                POLLWRNORM | POLLWRBAND);
                /* Should agree with poll, otherwise some programs break */
@@ -1838,7 +1861,7 @@ static void sock_def_write_space(struct sock *sk)
                        sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
        }
-        read_unlock(&sk->sk_callback_lock);
+        rcu_read_unlock();
 }
 static void sock_def_destruct(struct sock *sk)
@@ -1885,7 +1908,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        sk->sk_allocation       =       GFP_KERNEL;
        sk->sk_rcvbuf           =       sysctl_rmem_default;
        sk->sk_sndbuf           =       sysctl_wmem_default;
-        sk->sk_backlog.limit    =       sk->sk_rcvbuf << 1;
        sk->sk_state            =       TCP_CLOSE;
        sk_set_socket(sk, sock);
@@ -1893,12 +1915,12 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        if (sock) {
                sk->sk_type     =       sock->type;
-                sk->sk_sleep    =       &sock->wait;
+                sk->sk_wq       =       sock->wq;
                sock->sk        =       sk;
        } else
-                sk->sk_sleep    =       NULL;
+                sk->sk_wq       =       NULL;
-        rwlock_init(&sk->sk_dst_lock);
+        spin_lock_init(&sk->sk_dst_lock);
        rwlock_init(&sk->sk_callback_lock);
        lockdep_set_class_and_name(&sk->sk_callback_lock,
                        af_callback_keys + sk->sk_family,
diff --git a/net/core/stream.c b/net/core/stream.c
index a37debfeb1b2..cc196f42b8d8 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -28,15 +28,19 @@
 void sk_stream_write_space(struct sock *sk)
 {
        struct socket *sock = sk->sk_socket;
+        struct socket_wq *wq;
        if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
                clear_bit(SOCK_NOSPACE, &sock->flags);
-                if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+                rcu_read_lock();
-                        wake_up_interruptible_poll(sk->sk_sleep, POLLOUT |
+                wq = rcu_dereference(sk->sk_wq);
+                if (wq_has_sleeper(wq))
+                        wake_up_interruptible_poll(&wq->wait, POLLOUT |
                                                POLLWRNORM | POLLWRBAND);
-                if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
+                if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
                        sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
+                rcu_read_unlock();
        }
 }
@@ -66,13 +70,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
                if (signal_pending(tsk))
                        return sock_intr_errno(*timeo_p);
-                prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                sk->sk_write_pending++;
                done = sk_wait_event(sk, timeo_p,
                                     !sk->sk_err &&
                                     !((1 << sk->sk_state) &
                                       ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
-                finish_wait(sk->sk_sleep, &wait);
+                finish_wait(sk_sleep(sk), &wait);
                sk->sk_write_pending--;
        } while (!done);
        return 0;
@@ -96,13 +100,13 @@ void sk_stream_wait_close(struct sock *sk, long timeout)
                DEFINE_WAIT(wait);
                do {
-                        prepare_to_wait(sk->sk_sleep, &wait,
+                        prepare_to_wait(sk_sleep(sk), &wait,
                                        TASK_INTERRUPTIBLE);
                        if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
                                break;
                } while (!signal_pending(current) && timeout);
-                finish_wait(sk->sk_sleep, &wait);
+                finish_wait(sk_sleep(sk), &wait);
        }
 }
@@ -126,7 +130,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
        while (1) {
                set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
-                prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
                        goto do_error;
@@ -157,7 +161,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
                *timeo_p = current_timeo;
        }
 out:
-        finish_wait(sk->sk_sleep, &wait);
+        finish_wait(sk_sleep(sk), &wait);
        return err;
 do_error:
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b7b6b8208f75..01eee5d984be 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -11,12 +11,72 @@
 #include <linux/socket.h>
 #include <linux/netdevice.h>
 #include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <net/ip.h>
 #include <net/sock.h>
+#ifdef CONFIG_RPS
+static int rps_sock_flow_sysctl(ctl_table *table, int write,
+                                void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        unsigned int orig_size, size;
+        int ret, i;
+        ctl_table tmp = {
+                .data = &size,
+                .maxlen = sizeof(size),
+                .mode = table->mode
+        };
+        struct rps_sock_flow_table *orig_sock_table, *sock_table;
+        static DEFINE_MUTEX(sock_flow_mutex);
+        mutex_lock(&sock_flow_mutex);
+        orig_sock_table = rps_sock_flow_table;
+        size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
+        ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+        if (write) {
+                if (size) {
+                        if (size > 1<<30) {
+                                /* Enforce limit to prevent overflow */
+                                mutex_unlock(&sock_flow_mutex);
+                                return -EINVAL;
+                        }
+                        size = roundup_pow_of_two(size);
+                        if (size != orig_size) {
+                                sock_table =
+                                    vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
+                                if (!sock_table) {
+                                        mutex_unlock(&sock_flow_mutex);
+                                        return -ENOMEM;
+                                }
+                                sock_table->mask = size - 1;
+                        } else
+                                sock_table = orig_sock_table;
+                        for (i = 0; i < size; i++)
+                                sock_table->ents[i] = RPS_NO_CPU;
+                } else
+                        sock_table = NULL;
+                if (sock_table != orig_sock_table) {
+                        rcu_assign_pointer(rps_sock_flow_table, sock_table);
+                        synchronize_rcu();
+                        vfree(orig_sock_table);
+                }
+        }
+        mutex_unlock(&sock_flow_mutex);
+        return ret;
+}
+#endif /* CONFIG_RPS */
 static struct ctl_table net_core_table[] = {
 #ifdef CONFIG_NET
        {
@@ -62,6 +122,13 @@ static struct ctl_table net_core_table[] = {
                .proc_handler   = proc_dointvec
        },
        {
+                .procname       = "netdev_tstamp_prequeue",
+                .data           = &netdev_tstamp_prequeue,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = proc_dointvec
+        },
+        {
                .procname       = "message_cost",
                .data           = &net_ratelimit_state.interval,
                .maxlen         = sizeof(int),
@@ -82,6 +149,14 @@ static struct ctl_table net_core_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec
        },
+#ifdef CONFIG_RPS
+        {
+                .procname       = "rps_sock_flow_entries",
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = rps_sock_flow_sysctl
+        },
+#endif
 #endif /* CONFIG_NET */
        {
                .procname       = "netdev_budget",
author	Jens Axboe <jens.axboe@oracle.com>	2010-05-21 15:27:26 -0400
committer	Jens Axboe <jens.axboe@oracle.com>	2010-05-21 15:27:26 -0400
commit	ee9a3607fb03e804ddf624544105f4e34260c380 (patch)
tree	ce41b6e0fa10982a306f6c142a92dbf3c9961284 /net/core
parent	b492e95be0ae672922f4734acf3f5d35c30be948 (diff)
parent	d515e86e639890b33a09390d062b0831664f04a2 (diff)