aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c1402
1 files changed, 654 insertions, 748 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index f769098774b7..d273e4e3ecdc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -130,6 +130,7 @@
130#include <linux/jhash.h> 130#include <linux/jhash.h>
131#include <linux/random.h> 131#include <linux/random.h>
132#include <trace/events/napi.h> 132#include <trace/events/napi.h>
133#include <linux/pci.h>
133 134
134#include "net-sysfs.h" 135#include "net-sysfs.h"
135 136
@@ -207,6 +208,20 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
207 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; 208 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
208} 209}
209 210
211static inline void rps_lock(struct softnet_data *sd)
212{
213#ifdef CONFIG_RPS
214 spin_lock(&sd->input_pkt_queue.lock);
215#endif
216}
217
218static inline void rps_unlock(struct softnet_data *sd)
219{
220#ifdef CONFIG_RPS
221 spin_unlock(&sd->input_pkt_queue.lock);
222#endif
223}
224
210/* Device list insertion */ 225/* Device list insertion */
211static int list_netdevice(struct net_device *dev) 226static int list_netdevice(struct net_device *dev)
212{ 227{
@@ -249,7 +264,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
249 * queue in the local softnet handler. 264 * queue in the local softnet handler.
250 */ 265 */
251 266
252DEFINE_PER_CPU(struct softnet_data, softnet_data); 267DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
253EXPORT_PER_CPU_SYMBOL(softnet_data); 268EXPORT_PER_CPU_SYMBOL(softnet_data);
254 269
255#ifdef CONFIG_LOCKDEP 270#ifdef CONFIG_LOCKDEP
@@ -773,14 +788,17 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype);
773 788
774struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) 789struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
775{ 790{
776 struct net_device *dev; 791 struct net_device *dev, *ret = NULL;
777 792
778 rtnl_lock(); 793 rcu_read_lock();
779 dev = __dev_getfirstbyhwtype(net, type); 794 for_each_netdev_rcu(net, dev)
780 if (dev) 795 if (dev->type == type) {
781 dev_hold(dev); 796 dev_hold(dev);
782 rtnl_unlock(); 797 ret = dev;
783 return dev; 798 break;
799 }
800 rcu_read_unlock();
801 return ret;
784} 802}
785EXPORT_SYMBOL(dev_getfirstbyhwtype); 803EXPORT_SYMBOL(dev_getfirstbyhwtype);
786 804
@@ -984,15 +1002,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
984 return err; 1002 return err;
985 1003
986rollback: 1004rollback:
987 /* For now only devices in the initial network namespace 1005 ret = device_rename(&dev->dev, dev->name);
988 * are in sysfs. 1006 if (ret) {
989 */ 1007 memcpy(dev->name, oldname, IFNAMSIZ);
990 if (net_eq(net, &init_net)) { 1008 return ret;
991 ret = device_rename(&dev->dev, dev->name);
992 if (ret) {
993 memcpy(dev->name, oldname, IFNAMSIZ);
994 return ret;
995 }
996 } 1009 }
997 1010
998 write_lock_bh(&dev_base_lock); 1011 write_lock_bh(&dev_base_lock);
@@ -1085,9 +1098,9 @@ void netdev_state_change(struct net_device *dev)
1085} 1098}
1086EXPORT_SYMBOL(netdev_state_change); 1099EXPORT_SYMBOL(netdev_state_change);
1087 1100
1088void netdev_bonding_change(struct net_device *dev, unsigned long event) 1101int netdev_bonding_change(struct net_device *dev, unsigned long event)
1089{ 1102{
1090 call_netdevice_notifiers(event, dev); 1103 return call_netdevice_notifiers(event, dev);
1091} 1104}
1092EXPORT_SYMBOL(netdev_bonding_change); 1105EXPORT_SYMBOL(netdev_bonding_change);
1093 1106
@@ -1417,6 +1430,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1417 1430
1418int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1431int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1419{ 1432{
1433 ASSERT_RTNL();
1420 return raw_notifier_call_chain(&netdev_chain, val, dev); 1434 return raw_notifier_call_chain(&netdev_chain, val, dev);
1421} 1435}
1422 1436
@@ -1435,7 +1449,7 @@ void net_disable_timestamp(void)
1435} 1449}
1436EXPORT_SYMBOL(net_disable_timestamp); 1450EXPORT_SYMBOL(net_disable_timestamp);
1437 1451
1438static inline void net_timestamp(struct sk_buff *skb) 1452static inline void net_timestamp_set(struct sk_buff *skb)
1439{ 1453{
1440 if (atomic_read(&netstamp_needed)) 1454 if (atomic_read(&netstamp_needed))
1441 __net_timestamp(skb); 1455 __net_timestamp(skb);
@@ -1443,6 +1457,12 @@ static inline void net_timestamp(struct sk_buff *skb)
1443 skb->tstamp.tv64 = 0; 1457 skb->tstamp.tv64 = 0;
1444} 1458}
1445 1459
1460static inline void net_timestamp_check(struct sk_buff *skb)
1461{
1462 if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
1463 __net_timestamp(skb);
1464}
1465
1446/** 1466/**
1447 * dev_forward_skb - loopback an skb to another netif 1467 * dev_forward_skb - loopback an skb to another netif
1448 * 1468 *
@@ -1451,7 +1471,7 @@ static inline void net_timestamp(struct sk_buff *skb)
1451 * 1471 *
1452 * return values: 1472 * return values:
1453 * NET_RX_SUCCESS (no congestion) 1473 * NET_RX_SUCCESS (no congestion)
1454 * NET_RX_DROP (packet was dropped) 1474 * NET_RX_DROP (packet was dropped, but freed)
1455 * 1475 *
1456 * dev_forward_skb can be used for injecting an skb from the 1476 * dev_forward_skb can be used for injecting an skb from the
1457 * start_xmit function of one device into the receive queue 1477 * start_xmit function of one device into the receive queue
@@ -1465,12 +1485,11 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1465{ 1485{
1466 skb_orphan(skb); 1486 skb_orphan(skb);
1467 1487
1468 if (!(dev->flags & IFF_UP)) 1488 if (!(dev->flags & IFF_UP) ||
1469 return NET_RX_DROP; 1489 (skb->len > (dev->mtu + dev->hard_header_len))) {
1470 1490 kfree_skb(skb);
1471 if (skb->len > (dev->mtu + dev->hard_header_len))
1472 return NET_RX_DROP; 1491 return NET_RX_DROP;
1473 1492 }
1474 skb_set_dev(skb, dev); 1493 skb_set_dev(skb, dev);
1475 skb->tstamp.tv64 = 0; 1494 skb->tstamp.tv64 = 0;
1476 skb->pkt_type = PACKET_HOST; 1495 skb->pkt_type = PACKET_HOST;
@@ -1490,9 +1509,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1490 1509
1491#ifdef CONFIG_NET_CLS_ACT 1510#ifdef CONFIG_NET_CLS_ACT
1492 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) 1511 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1493 net_timestamp(skb); 1512 net_timestamp_set(skb);
1494#else 1513#else
1495 net_timestamp(skb); 1514 net_timestamp_set(skb);
1496#endif 1515#endif
1497 1516
1498 rcu_read_lock(); 1517 rcu_read_lock();
@@ -1538,8 +1557,9 @@ static inline void __netif_reschedule(struct Qdisc *q)
1538 1557
1539 local_irq_save(flags); 1558 local_irq_save(flags);
1540 sd = &__get_cpu_var(softnet_data); 1559 sd = &__get_cpu_var(softnet_data);
1541 q->next_sched = sd->output_queue; 1560 q->next_sched = NULL;
1542 sd->output_queue = q; 1561 *sd->output_queue_tailp = q;
1562 sd->output_queue_tailp = &q->next_sched;
1543 raise_softirq_irqoff(NET_TX_SOFTIRQ); 1563 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1544 local_irq_restore(flags); 1564 local_irq_restore(flags);
1545} 1565}
@@ -1784,18 +1804,27 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
1784 * 2. No high memory really exists on this machine. 1804 * 2. No high memory really exists on this machine.
1785 */ 1805 */
1786 1806
1787static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) 1807static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1788{ 1808{
1789#ifdef CONFIG_HIGHMEM 1809#ifdef CONFIG_HIGHMEM
1790 int i; 1810 int i;
1811 if (!(dev->features & NETIF_F_HIGHDMA)) {
1812 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1813 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1814 return 1;
1815 }
1791 1816
1792 if (dev->features & NETIF_F_HIGHDMA) 1817 if (PCI_DMA_BUS_IS_PHYS) {
1793 return 0; 1818 struct device *pdev = dev->dev.parent;
1794
1795 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1796 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1797 return 1;
1798 1819
1820 if (!pdev)
1821 return 0;
1822 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1823 dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
1824 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
1825 return 1;
1826 }
1827 }
1799#endif 1828#endif
1800 return 0; 1829 return 0;
1801} 1830}
@@ -1853,6 +1882,17 @@ static int dev_gso_segment(struct sk_buff *skb)
1853 return 0; 1882 return 0;
1854} 1883}
1855 1884
1885/*
1886 * Try to orphan skb early, right before transmission by the device.
1887 * We cannot orphan skb if tx timestamp is requested, since
1888 * drivers need to call skb_tstamp_tx() to send the timestamp.
1889 */
1890static inline void skb_orphan_try(struct sk_buff *skb)
1891{
1892 if (!skb_tx(skb)->flags)
1893 skb_orphan(skb);
1894}
1895
1856int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 1896int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1857 struct netdev_queue *txq) 1897 struct netdev_queue *txq)
1858{ 1898{
@@ -1863,13 +1903,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1863 if (!list_empty(&ptype_all)) 1903 if (!list_empty(&ptype_all))
1864 dev_queue_xmit_nit(skb, dev); 1904 dev_queue_xmit_nit(skb, dev);
1865 1905
1866 if (netif_needs_gso(dev, skb)) {
1867 if (unlikely(dev_gso_segment(skb)))
1868 goto out_kfree_skb;
1869 if (skb->next)
1870 goto gso;
1871 }
1872
1873 /* 1906 /*
1874 * If device doesnt need skb->dst, release it right now while 1907 * If device doesnt need skb->dst, release it right now while
1875 * its hot in this cpu cache 1908 * its hot in this cpu cache
@@ -1877,23 +1910,18 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1877 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 1910 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1878 skb_dst_drop(skb); 1911 skb_dst_drop(skb);
1879 1912
1913 skb_orphan_try(skb);
1914
1915 if (netif_needs_gso(dev, skb)) {
1916 if (unlikely(dev_gso_segment(skb)))
1917 goto out_kfree_skb;
1918 if (skb->next)
1919 goto gso;
1920 }
1921
1880 rc = ops->ndo_start_xmit(skb, dev); 1922 rc = ops->ndo_start_xmit(skb, dev);
1881 if (rc == NETDEV_TX_OK) 1923 if (rc == NETDEV_TX_OK)
1882 txq_trans_update(txq); 1924 txq_trans_update(txq);
1883 /*
1884 * TODO: if skb_orphan() was called by
1885 * dev->hard_start_xmit() (for example, the unmodified
1886 * igb driver does that; bnx2 doesn't), then
1887 * skb_tx_software_timestamp() will be unable to send
1888 * back the time stamp.
1889 *
1890 * How can this be prevented? Always create another
1891 * reference to the socket before calling
1892 * dev->hard_start_xmit()? Prevent that skb_orphan()
1893 * does anything in dev->hard_start_xmit() by clearing
1894 * the skb destructor before the call and restoring it
1895 * afterwards, then doing the skb_orphan() ourselves?
1896 */
1897 return rc; 1925 return rc;
1898 } 1926 }
1899 1927
@@ -1932,7 +1960,7 @@ out_kfree_skb:
1932 return rc; 1960 return rc;
1933} 1961}
1934 1962
1935static u32 skb_tx_hashrnd; 1963static u32 hashrnd __read_mostly;
1936 1964
1937u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) 1965u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1938{ 1966{
@@ -1948,9 +1976,9 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1948 if (skb->sk && skb->sk->sk_hash) 1976 if (skb->sk && skb->sk->sk_hash)
1949 hash = skb->sk->sk_hash; 1977 hash = skb->sk->sk_hash;
1950 else 1978 else
1951 hash = skb->protocol; 1979 hash = (__force u16) skb->protocol;
1952 1980
1953 hash = jhash_1word(hash, skb_tx_hashrnd); 1981 hash = jhash_1word(hash, hashrnd);
1954 1982
1955 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); 1983 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1956} 1984}
@@ -1960,10 +1988,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1960{ 1988{
1961 if (unlikely(queue_index >= dev->real_num_tx_queues)) { 1989 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1962 if (net_ratelimit()) { 1990 if (net_ratelimit()) {
1963 WARN(1, "%s selects TX queue %d, but " 1991 pr_warning("%s selects TX queue %d, but "
1964 "real number of TX queues is %d\n", 1992 "real number of TX queues is %d\n",
1965 dev->name, queue_index, 1993 dev->name, queue_index, dev->real_num_tx_queues);
1966 dev->real_num_tx_queues);
1967 } 1994 }
1968 return 0; 1995 return 0;
1969 } 1996 }
@@ -1990,7 +2017,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1990 queue_index = skb_tx_hash(dev, skb); 2017 queue_index = skb_tx_hash(dev, skb);
1991 2018
1992 if (sk) { 2019 if (sk) {
1993 struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache); 2020 struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
1994 2021
1995 if (dst && skb_dst(skb) == dst) 2022 if (dst && skb_dst(skb) == dst)
1996 sk_tx_queue_set(sk, queue_index); 2023 sk_tx_queue_set(sk, queue_index);
@@ -2020,6 +2047,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2020 * waiting to be sent out; and the qdisc is not running - 2047 * waiting to be sent out; and the qdisc is not running -
2021 * xmit the skb directly. 2048 * xmit the skb directly.
2022 */ 2049 */
2050 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2051 skb_dst_force(skb);
2023 __qdisc_update_bstats(q, skb->len); 2052 __qdisc_update_bstats(q, skb->len);
2024 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) 2053 if (sch_direct_xmit(skb, q, dev, txq, root_lock))
2025 __qdisc_run(q); 2054 __qdisc_run(q);
@@ -2028,6 +2057,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2028 2057
2029 rc = NET_XMIT_SUCCESS; 2058 rc = NET_XMIT_SUCCESS;
2030 } else { 2059 } else {
2060 skb_dst_force(skb);
2031 rc = qdisc_enqueue_root(skb, q); 2061 rc = qdisc_enqueue_root(skb, q);
2032 qdisc_run(q); 2062 qdisc_run(q);
2033 } 2063 }
@@ -2175,11 +2205,249 @@ EXPORT_SYMBOL(dev_queue_xmit);
2175 =======================================================================*/ 2205 =======================================================================*/
2176 2206
2177int netdev_max_backlog __read_mostly = 1000; 2207int netdev_max_backlog __read_mostly = 1000;
2208int netdev_tstamp_prequeue __read_mostly = 1;
2178int netdev_budget __read_mostly = 300; 2209int netdev_budget __read_mostly = 300;
2179int weight_p __read_mostly = 64; /* old backlog weight */ 2210int weight_p __read_mostly = 64; /* old backlog weight */
2180 2211
2181DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; 2212/* Called with irq disabled */
2213static inline void ____napi_schedule(struct softnet_data *sd,
2214 struct napi_struct *napi)
2215{
2216 list_add_tail(&napi->poll_list, &sd->poll_list);
2217 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2218}
2182 2219
2220#ifdef CONFIG_RPS
2221
2222/* One global table that all flow-based protocols share. */
2223struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2224EXPORT_SYMBOL(rps_sock_flow_table);
2225
2226/*
2227 * get_rps_cpu is called from netif_receive_skb and returns the target
2228 * CPU from the RPS map of the receiving queue for a given skb.
2229 * rcu_read_lock must be held on entry.
2230 */
2231static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2232 struct rps_dev_flow **rflowp)
2233{
2234 struct ipv6hdr *ip6;
2235 struct iphdr *ip;
2236 struct netdev_rx_queue *rxqueue;
2237 struct rps_map *map;
2238 struct rps_dev_flow_table *flow_table;
2239 struct rps_sock_flow_table *sock_flow_table;
2240 int cpu = -1;
2241 u8 ip_proto;
2242 u16 tcpu;
2243 u32 addr1, addr2, ihl;
2244 union {
2245 u32 v32;
2246 u16 v16[2];
2247 } ports;
2248
2249 if (skb_rx_queue_recorded(skb)) {
2250 u16 index = skb_get_rx_queue(skb);
2251 if (unlikely(index >= dev->num_rx_queues)) {
2252 if (net_ratelimit()) {
2253 pr_warning("%s received packet on queue "
2254 "%u, but number of RX queues is %u\n",
2255 dev->name, index, dev->num_rx_queues);
2256 }
2257 goto done;
2258 }
2259 rxqueue = dev->_rx + index;
2260 } else
2261 rxqueue = dev->_rx;
2262
2263 if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2264 goto done;
2265
2266 if (skb->rxhash)
2267 goto got_hash; /* Skip hash computation on packet header */
2268
2269 switch (skb->protocol) {
2270 case __constant_htons(ETH_P_IP):
2271 if (!pskb_may_pull(skb, sizeof(*ip)))
2272 goto done;
2273
2274 ip = (struct iphdr *) skb->data;
2275 ip_proto = ip->protocol;
2276 addr1 = (__force u32) ip->saddr;
2277 addr2 = (__force u32) ip->daddr;
2278 ihl = ip->ihl;
2279 break;
2280 case __constant_htons(ETH_P_IPV6):
2281 if (!pskb_may_pull(skb, sizeof(*ip6)))
2282 goto done;
2283
2284 ip6 = (struct ipv6hdr *) skb->data;
2285 ip_proto = ip6->nexthdr;
2286 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2287 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
2288 ihl = (40 >> 2);
2289 break;
2290 default:
2291 goto done;
2292 }
2293 switch (ip_proto) {
2294 case IPPROTO_TCP:
2295 case IPPROTO_UDP:
2296 case IPPROTO_DCCP:
2297 case IPPROTO_ESP:
2298 case IPPROTO_AH:
2299 case IPPROTO_SCTP:
2300 case IPPROTO_UDPLITE:
2301 if (pskb_may_pull(skb, (ihl * 4) + 4)) {
2302 ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
2303 if (ports.v16[1] < ports.v16[0])
2304 swap(ports.v16[0], ports.v16[1]);
2305 break;
2306 }
2307 default:
2308 ports.v32 = 0;
2309 break;
2310 }
2311
2312 /* get a consistent hash (same value on both flow directions) */
2313 if (addr2 < addr1)
2314 swap(addr1, addr2);
2315 skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2316 if (!skb->rxhash)
2317 skb->rxhash = 1;
2318
2319got_hash:
2320 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2321 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2322 if (flow_table && sock_flow_table) {
2323 u16 next_cpu;
2324 struct rps_dev_flow *rflow;
2325
2326 rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2327 tcpu = rflow->cpu;
2328
2329 next_cpu = sock_flow_table->ents[skb->rxhash &
2330 sock_flow_table->mask];
2331
2332 /*
2333 * If the desired CPU (where last recvmsg was done) is
2334 * different from current CPU (one in the rx-queue flow
2335 * table entry), switch if one of the following holds:
2336 * - Current CPU is unset (equal to RPS_NO_CPU).
2337 * - Current CPU is offline.
2338 * - The current CPU's queue tail has advanced beyond the
2339 * last packet that was enqueued using this table entry.
2340 * This guarantees that all previous packets for the flow
2341 * have been dequeued, thus preserving in order delivery.
2342 */
2343 if (unlikely(tcpu != next_cpu) &&
2344 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2345 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2346 rflow->last_qtail)) >= 0)) {
2347 tcpu = rflow->cpu = next_cpu;
2348 if (tcpu != RPS_NO_CPU)
2349 rflow->last_qtail = per_cpu(softnet_data,
2350 tcpu).input_queue_head;
2351 }
2352 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2353 *rflowp = rflow;
2354 cpu = tcpu;
2355 goto done;
2356 }
2357 }
2358
2359 map = rcu_dereference(rxqueue->rps_map);
2360 if (map) {
2361 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2362
2363 if (cpu_online(tcpu)) {
2364 cpu = tcpu;
2365 goto done;
2366 }
2367 }
2368
2369done:
2370 return cpu;
2371}
2372
2373/* Called from hardirq (IPI) context */
2374static void rps_trigger_softirq(void *data)
2375{
2376 struct softnet_data *sd = data;
2377
2378 ____napi_schedule(sd, &sd->backlog);
2379 sd->received_rps++;
2380}
2381
2382#endif /* CONFIG_RPS */
2383
2384/*
2385 * Check if this softnet_data structure is another cpu one
2386 * If yes, queue it to our IPI list and return 1
2387 * If no, return 0
2388 */
2389static int rps_ipi_queued(struct softnet_data *sd)
2390{
2391#ifdef CONFIG_RPS
2392 struct softnet_data *mysd = &__get_cpu_var(softnet_data);
2393
2394 if (sd != mysd) {
2395 sd->rps_ipi_next = mysd->rps_ipi_list;
2396 mysd->rps_ipi_list = sd;
2397
2398 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2399 return 1;
2400 }
2401#endif /* CONFIG_RPS */
2402 return 0;
2403}
2404
2405/*
2406 * enqueue_to_backlog is called to queue an skb to a per CPU backlog
2407 * queue (may be a remote CPU queue).
2408 */
2409static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2410 unsigned int *qtail)
2411{
2412 struct softnet_data *sd;
2413 unsigned long flags;
2414
2415 sd = &per_cpu(softnet_data, cpu);
2416
2417 local_irq_save(flags);
2418
2419 rps_lock(sd);
2420 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2421 if (skb_queue_len(&sd->input_pkt_queue)) {
2422enqueue:
2423 __skb_queue_tail(&sd->input_pkt_queue, skb);
2424#ifdef CONFIG_RPS
2425 *qtail = sd->input_queue_head +
2426 skb_queue_len(&sd->input_pkt_queue);
2427#endif
2428 rps_unlock(sd);
2429 local_irq_restore(flags);
2430 return NET_RX_SUCCESS;
2431 }
2432
2433 /* Schedule NAPI for backlog device
2434 * We can use non atomic operation since we own the queue lock
2435 */
2436 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
2437 if (!rps_ipi_queued(sd))
2438 ____napi_schedule(sd, &sd->backlog);
2439 }
2440 goto enqueue;
2441 }
2442
2443 sd->dropped++;
2444 rps_unlock(sd);
2445
2446 local_irq_restore(flags);
2447
2448 kfree_skb(skb);
2449 return NET_RX_DROP;
2450}
2183 2451
2184/** 2452/**
2185 * netif_rx - post buffer to the network code 2453 * netif_rx - post buffer to the network code
@@ -2198,41 +2466,38 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
2198 2466
2199int netif_rx(struct sk_buff *skb) 2467int netif_rx(struct sk_buff *skb)
2200{ 2468{
2201 struct softnet_data *queue; 2469 int ret;
2202 unsigned long flags;
2203 2470
2204 /* if netpoll wants it, pretend we never saw it */ 2471 /* if netpoll wants it, pretend we never saw it */
2205 if (netpoll_rx(skb)) 2472 if (netpoll_rx(skb))
2206 return NET_RX_DROP; 2473 return NET_RX_DROP;
2207 2474
2208 if (!skb->tstamp.tv64) 2475 if (netdev_tstamp_prequeue)
2209 net_timestamp(skb); 2476 net_timestamp_check(skb);
2210 2477
2211 /* 2478#ifdef CONFIG_RPS
2212 * The code is rearranged so that the path is the most 2479 {
2213 * short when CPU is congested, but is still operating. 2480 struct rps_dev_flow voidflow, *rflow = &voidflow;
2214 */ 2481 int cpu;
2215 local_irq_save(flags);
2216 queue = &__get_cpu_var(softnet_data);
2217 2482
2218 __get_cpu_var(netdev_rx_stat).total++; 2483 rcu_read_lock();
2219 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
2220 if (queue->input_pkt_queue.qlen) {
2221enqueue:
2222 __skb_queue_tail(&queue->input_pkt_queue, skb);
2223 local_irq_restore(flags);
2224 return NET_RX_SUCCESS;
2225 }
2226 2484
2227 napi_schedule(&queue->backlog); 2485 cpu = get_rps_cpu(skb->dev, skb, &rflow);
2228 goto enqueue; 2486 if (cpu < 0)
2229 } 2487 cpu = smp_processor_id();
2230 2488
2231 __get_cpu_var(netdev_rx_stat).dropped++; 2489 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2232 local_irq_restore(flags);
2233 2490
2234 kfree_skb(skb); 2491 rcu_read_unlock();
2235 return NET_RX_DROP; 2492 }
2493#else
2494 {
2495 unsigned int qtail;
2496 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
2497 put_cpu();
2498 }
2499#endif
2500 return ret;
2236} 2501}
2237EXPORT_SYMBOL(netif_rx); 2502EXPORT_SYMBOL(netif_rx);
2238 2503
@@ -2277,6 +2542,7 @@ static void net_tx_action(struct softirq_action *h)
2277 local_irq_disable(); 2542 local_irq_disable();
2278 head = sd->output_queue; 2543 head = sd->output_queue;
2279 sd->output_queue = NULL; 2544 sd->output_queue = NULL;
2545 sd->output_queue_tailp = &sd->output_queue;
2280 local_irq_enable(); 2546 local_irq_enable();
2281 2547
2282 while (head) { 2548 while (head) {
@@ -2353,7 +2619,8 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2353#endif 2619#endif
2354 2620
2355#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) 2621#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2356struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; 2622struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
2623 struct sk_buff *skb) __read_mostly;
2357EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); 2624EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2358 2625
2359static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, 2626static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
@@ -2361,14 +2628,17 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2361 int *ret, 2628 int *ret,
2362 struct net_device *orig_dev) 2629 struct net_device *orig_dev)
2363{ 2630{
2364 if (skb->dev->macvlan_port == NULL) 2631 struct macvlan_port *port;
2632
2633 port = rcu_dereference(skb->dev->macvlan_port);
2634 if (!port)
2365 return skb; 2635 return skb;
2366 2636
2367 if (*pt_prev) { 2637 if (*pt_prev) {
2368 *ret = deliver_skb(skb, *pt_prev, orig_dev); 2638 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2369 *pt_prev = NULL; 2639 *pt_prev = NULL;
2370 } 2640 }
2371 return macvlan_handle_frame_hook(skb); 2641 return macvlan_handle_frame_hook(port, skb);
2372} 2642}
2373#else 2643#else
2374#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) 2644#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
@@ -2469,22 +2739,56 @@ void netif_nit_deliver(struct sk_buff *skb)
2469 rcu_read_unlock(); 2739 rcu_read_unlock();
2470} 2740}
2471 2741
2472/** 2742static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
2473 * netif_receive_skb - process receive buffer from network 2743 struct net_device *master)
2474 * @skb: buffer to process 2744{
2475 * 2745 if (skb->pkt_type == PACKET_HOST) {
2476 * netif_receive_skb() is the main receive data processing function. 2746 u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
2477 * It always succeeds. The buffer may be dropped during processing 2747
2478 * for congestion control or by the protocol layers. 2748 memcpy(dest, master->dev_addr, ETH_ALEN);
2479 * 2749 }
2480 * This function may only be called from softirq context and interrupts 2750}
2481 * should be enabled. 2751
2482 * 2752/* On bonding slaves other than the currently active slave, suppress
2483 * Return values (usually ignored): 2753 * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
2484 * NET_RX_SUCCESS: no congestion 2754 * ARP on active-backup slaves with arp_validate enabled.
2485 * NET_RX_DROP: packet was dropped
2486 */ 2755 */
2487int netif_receive_skb(struct sk_buff *skb) 2756int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
2757{
2758 struct net_device *dev = skb->dev;
2759
2760 if (master->priv_flags & IFF_MASTER_ARPMON)
2761 dev->last_rx = jiffies;
2762
2763 if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
2764 /* Do address unmangle. The local destination address
2765 * will be always the one master has. Provides the right
2766 * functionality in a bridge.
2767 */
2768 skb_bond_set_mac_by_master(skb, master);
2769 }
2770
2771 if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
2772 if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
2773 skb->protocol == __cpu_to_be16(ETH_P_ARP))
2774 return 0;
2775
2776 if (master->priv_flags & IFF_MASTER_ALB) {
2777 if (skb->pkt_type != PACKET_BROADCAST &&
2778 skb->pkt_type != PACKET_MULTICAST)
2779 return 0;
2780 }
2781 if (master->priv_flags & IFF_MASTER_8023AD &&
2782 skb->protocol == __cpu_to_be16(ETH_P_SLOW))
2783 return 0;
2784
2785 return 1;
2786 }
2787 return 0;
2788}
2789EXPORT_SYMBOL(__skb_bond_should_drop);
2790
2791static int __netif_receive_skb(struct sk_buff *skb)
2488{ 2792{
2489 struct packet_type *ptype, *pt_prev; 2793 struct packet_type *ptype, *pt_prev;
2490 struct net_device *orig_dev; 2794 struct net_device *orig_dev;
@@ -2494,8 +2798,8 @@ int netif_receive_skb(struct sk_buff *skb)
2494 int ret = NET_RX_DROP; 2798 int ret = NET_RX_DROP;
2495 __be16 type; 2799 __be16 type;
2496 2800
2497 if (!skb->tstamp.tv64) 2801 if (!netdev_tstamp_prequeue)
2498 net_timestamp(skb); 2802 net_timestamp_check(skb);
2499 2803
2500 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) 2804 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2501 return NET_RX_SUCCESS; 2805 return NET_RX_SUCCESS;
@@ -2517,7 +2821,7 @@ int netif_receive_skb(struct sk_buff *skb)
2517 skb->dev = master; 2821 skb->dev = master;
2518 } 2822 }
2519 2823
2520 __get_cpu_var(netdev_rx_stat).total++; 2824 __get_cpu_var(softnet_data).processed++;
2521 2825
2522 skb_reset_network_header(skb); 2826 skb_reset_network_header(skb);
2523 skb_reset_transport_header(skb); 2827 skb_reset_transport_header(skb);
@@ -2595,20 +2899,77 @@ out:
2595 rcu_read_unlock(); 2899 rcu_read_unlock();
2596 return ret; 2900 return ret;
2597} 2901}
2902
2903/**
2904 * netif_receive_skb - process receive buffer from network
2905 * @skb: buffer to process
2906 *
2907 * netif_receive_skb() is the main receive data processing function.
2908 * It always succeeds. The buffer may be dropped during processing
2909 * for congestion control or by the protocol layers.
2910 *
2911 * This function may only be called from softirq context and interrupts
2912 * should be enabled.
2913 *
2914 * Return values (usually ignored):
2915 * NET_RX_SUCCESS: no congestion
2916 * NET_RX_DROP: packet was dropped
2917 */
2918int netif_receive_skb(struct sk_buff *skb)
2919{
2920 if (netdev_tstamp_prequeue)
2921 net_timestamp_check(skb);
2922
2923#ifdef CONFIG_RPS
2924 {
2925 struct rps_dev_flow voidflow, *rflow = &voidflow;
2926 int cpu, ret;
2927
2928 rcu_read_lock();
2929
2930 cpu = get_rps_cpu(skb->dev, skb, &rflow);
2931
2932 if (cpu >= 0) {
2933 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2934 rcu_read_unlock();
2935 } else {
2936 rcu_read_unlock();
2937 ret = __netif_receive_skb(skb);
2938 }
2939
2940 return ret;
2941 }
2942#else
2943 return __netif_receive_skb(skb);
2944#endif
2945}
2598EXPORT_SYMBOL(netif_receive_skb); 2946EXPORT_SYMBOL(netif_receive_skb);
2599 2947
2600/* Network device is going away, flush any packets still pending */ 2948/* Network device is going away, flush any packets still pending
2949 * Called with irqs disabled.
2950 */
2601static void flush_backlog(void *arg) 2951static void flush_backlog(void *arg)
2602{ 2952{
2603 struct net_device *dev = arg; 2953 struct net_device *dev = arg;
2604 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2954 struct softnet_data *sd = &__get_cpu_var(softnet_data);
2605 struct sk_buff *skb, *tmp; 2955 struct sk_buff *skb, *tmp;
2606 2956
2607 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp) 2957 rps_lock(sd);
2958 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
2608 if (skb->dev == dev) { 2959 if (skb->dev == dev) {
2609 __skb_unlink(skb, &queue->input_pkt_queue); 2960 __skb_unlink(skb, &sd->input_pkt_queue);
2610 kfree_skb(skb); 2961 kfree_skb(skb);
2962 input_queue_head_add(sd, 1);
2611 } 2963 }
2964 }
2965 rps_unlock(sd);
2966
2967 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
2968 if (skb->dev == dev) {
2969 __skb_unlink(skb, &sd->process_queue);
2970 kfree_skb(skb);
2971 }
2972 }
2612} 2973}
2613 2974
2614static int napi_gro_complete(struct sk_buff *skb) 2975static int napi_gro_complete(struct sk_buff *skb)
@@ -2911,27 +3272,85 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
2911} 3272}
2912EXPORT_SYMBOL(napi_gro_frags); 3273EXPORT_SYMBOL(napi_gro_frags);
2913 3274
3275/*
3276 * net_rps_action sends any pending IPI's for rps.
3277 * Note: called with local irq disabled, but exits with local irq enabled.
3278 */
3279static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3280{
3281#ifdef CONFIG_RPS
3282 struct softnet_data *remsd = sd->rps_ipi_list;
3283
3284 if (remsd) {
3285 sd->rps_ipi_list = NULL;
3286
3287 local_irq_enable();
3288
3289 /* Send pending IPI's to kick RPS processing on remote cpus. */
3290 while (remsd) {
3291 struct softnet_data *next = remsd->rps_ipi_next;
3292
3293 if (cpu_online(remsd->cpu))
3294 __smp_call_function_single(remsd->cpu,
3295 &remsd->csd, 0);
3296 remsd = next;
3297 }
3298 } else
3299#endif
3300 local_irq_enable();
3301}
3302
2914static int process_backlog(struct napi_struct *napi, int quota) 3303static int process_backlog(struct napi_struct *napi, int quota)
2915{ 3304{
2916 int work = 0; 3305 int work = 0;
2917 struct softnet_data *queue = &__get_cpu_var(softnet_data); 3306 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
2918 unsigned long start_time = jiffies;
2919 3307
3308#ifdef CONFIG_RPS
3309 /* Check if we have pending ipi, its better to send them now,
3310 * not waiting net_rx_action() end.
3311 */
3312 if (sd->rps_ipi_list) {
3313 local_irq_disable();
3314 net_rps_action_and_irq_enable(sd);
3315 }
3316#endif
2920 napi->weight = weight_p; 3317 napi->weight = weight_p;
2921 do { 3318 local_irq_disable();
3319 while (work < quota) {
2922 struct sk_buff *skb; 3320 struct sk_buff *skb;
3321 unsigned int qlen;
2923 3322
2924 local_irq_disable(); 3323 while ((skb = __skb_dequeue(&sd->process_queue))) {
2925 skb = __skb_dequeue(&queue->input_pkt_queue);
2926 if (!skb) {
2927 __napi_complete(napi);
2928 local_irq_enable(); 3324 local_irq_enable();
2929 break; 3325 __netif_receive_skb(skb);
3326 if (++work >= quota)
3327 return work;
3328 local_irq_disable();
2930 } 3329 }
2931 local_irq_enable();
2932 3330
2933 netif_receive_skb(skb); 3331 rps_lock(sd);
2934 } while (++work < quota && jiffies == start_time); 3332 qlen = skb_queue_len(&sd->input_pkt_queue);
3333 if (qlen) {
3334 input_queue_head_add(sd, qlen);
3335 skb_queue_splice_tail_init(&sd->input_pkt_queue,
3336 &sd->process_queue);
3337 }
3338 if (qlen < quota - work) {
3339 /*
3340 * Inline a custom version of __napi_complete().
3341 * only current cpu owns and manipulates this napi,
3342 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
3343 * we can use a plain write instead of clear_bit(),
3344 * and we dont need an smp_mb() memory barrier.
3345 */
3346 list_del(&napi->poll_list);
3347 napi->state = 0;
3348
3349 quota = work + qlen;
3350 }
3351 rps_unlock(sd);
3352 }
3353 local_irq_enable();
2935 3354
2936 return work; 3355 return work;
2937} 3356}
@@ -2947,8 +3366,7 @@ void __napi_schedule(struct napi_struct *n)
2947 unsigned long flags; 3366 unsigned long flags;
2948 3367
2949 local_irq_save(flags); 3368 local_irq_save(flags);
2950 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); 3369 ____napi_schedule(&__get_cpu_var(softnet_data), n);
2951 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2952 local_irq_restore(flags); 3370 local_irq_restore(flags);
2953} 3371}
2954EXPORT_SYMBOL(__napi_schedule); 3372EXPORT_SYMBOL(__napi_schedule);
@@ -3019,17 +3437,16 @@ void netif_napi_del(struct napi_struct *napi)
3019} 3437}
3020EXPORT_SYMBOL(netif_napi_del); 3438EXPORT_SYMBOL(netif_napi_del);
3021 3439
3022
3023static void net_rx_action(struct softirq_action *h) 3440static void net_rx_action(struct softirq_action *h)
3024{ 3441{
3025 struct list_head *list = &__get_cpu_var(softnet_data).poll_list; 3442 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3026 unsigned long time_limit = jiffies + 2; 3443 unsigned long time_limit = jiffies + 2;
3027 int budget = netdev_budget; 3444 int budget = netdev_budget;
3028 void *have; 3445 void *have;
3029 3446
3030 local_irq_disable(); 3447 local_irq_disable();
3031 3448
3032 while (!list_empty(list)) { 3449 while (!list_empty(&sd->poll_list)) {
3033 struct napi_struct *n; 3450 struct napi_struct *n;
3034 int work, weight; 3451 int work, weight;
3035 3452
@@ -3047,7 +3464,7 @@ static void net_rx_action(struct softirq_action *h)
3047 * entries to the tail of this list, and only ->poll() 3464 * entries to the tail of this list, and only ->poll()
3048 * calls can remove this head entry from the list. 3465 * calls can remove this head entry from the list.
3049 */ 3466 */
3050 n = list_first_entry(list, struct napi_struct, poll_list); 3467 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
3051 3468
3052 have = netpoll_poll_lock(n); 3469 have = netpoll_poll_lock(n);
3053 3470
@@ -3082,13 +3499,13 @@ static void net_rx_action(struct softirq_action *h)
3082 napi_complete(n); 3499 napi_complete(n);
3083 local_irq_disable(); 3500 local_irq_disable();
3084 } else 3501 } else
3085 list_move_tail(&n->poll_list, list); 3502 list_move_tail(&n->poll_list, &sd->poll_list);
3086 } 3503 }
3087 3504
3088 netpoll_poll_unlock(have); 3505 netpoll_poll_unlock(have);
3089 } 3506 }
3090out: 3507out:
3091 local_irq_enable(); 3508 net_rps_action_and_irq_enable(sd);
3092 3509
3093#ifdef CONFIG_NET_DMA 3510#ifdef CONFIG_NET_DMA
3094 /* 3511 /*
@@ -3101,7 +3518,7 @@ out:
3101 return; 3518 return;
3102 3519
3103softnet_break: 3520softnet_break:
3104 __get_cpu_var(netdev_rx_stat).time_squeeze++; 3521 sd->time_squeeze++;
3105 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 3522 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3106 goto out; 3523 goto out;
3107} 3524}
@@ -3302,17 +3719,17 @@ static int dev_seq_show(struct seq_file *seq, void *v)
3302 return 0; 3719 return 0;
3303} 3720}
3304 3721
3305static struct netif_rx_stats *softnet_get_online(loff_t *pos) 3722static struct softnet_data *softnet_get_online(loff_t *pos)
3306{ 3723{
3307 struct netif_rx_stats *rc = NULL; 3724 struct softnet_data *sd = NULL;
3308 3725
3309 while (*pos < nr_cpu_ids) 3726 while (*pos < nr_cpu_ids)
3310 if (cpu_online(*pos)) { 3727 if (cpu_online(*pos)) {
3311 rc = &per_cpu(netdev_rx_stat, *pos); 3728 sd = &per_cpu(softnet_data, *pos);
3312 break; 3729 break;
3313 } else 3730 } else
3314 ++*pos; 3731 ++*pos;
3315 return rc; 3732 return sd;
3316} 3733}
3317 3734
3318static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) 3735static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
@@ -3332,12 +3749,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
3332 3749
3333static int softnet_seq_show(struct seq_file *seq, void *v) 3750static int softnet_seq_show(struct seq_file *seq, void *v)
3334{ 3751{
3335 struct netif_rx_stats *s = v; 3752 struct softnet_data *sd = v;
3336 3753
3337 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 3754 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3338 s->total, s->dropped, s->time_squeeze, 0, 3755 sd->processed, sd->dropped, sd->time_squeeze, 0,
3339 0, 0, 0, 0, /* was fastroute */ 3756 0, 0, 0, 0, /* was fastroute */
3340 s->cpu_collision); 3757 sd->cpu_collision, sd->received_rps);
3341 return 0; 3758 return 0;
3342} 3759}
3343 3760
@@ -3560,11 +3977,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
3560 3977
3561 slave->master = master; 3978 slave->master = master;
3562 3979
3563 synchronize_net(); 3980 if (old) {
3564 3981 synchronize_net();
3565 if (old)
3566 dev_put(old); 3982 dev_put(old);
3567 3983 }
3568 if (master) 3984 if (master)
3569 slave->flags |= IFF_SLAVE; 3985 slave->flags |= IFF_SLAVE;
3570 else 3986 else
@@ -3741,562 +4157,6 @@ void dev_set_rx_mode(struct net_device *dev)
3741 netif_addr_unlock_bh(dev); 4157 netif_addr_unlock_bh(dev);
3742} 4158}
3743 4159
3744/* hw addresses list handling functions */
3745
3746static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3747 int addr_len, unsigned char addr_type)
3748{
3749 struct netdev_hw_addr *ha;
3750 int alloc_size;
3751
3752 if (addr_len > MAX_ADDR_LEN)
3753 return -EINVAL;
3754
3755 list_for_each_entry(ha, &list->list, list) {
3756 if (!memcmp(ha->addr, addr, addr_len) &&
3757 ha->type == addr_type) {
3758 ha->refcount++;
3759 return 0;
3760 }
3761 }
3762
3763
3764 alloc_size = sizeof(*ha);
3765 if (alloc_size < L1_CACHE_BYTES)
3766 alloc_size = L1_CACHE_BYTES;
3767 ha = kmalloc(alloc_size, GFP_ATOMIC);
3768 if (!ha)
3769 return -ENOMEM;
3770 memcpy(ha->addr, addr, addr_len);
3771 ha->type = addr_type;
3772 ha->refcount = 1;
3773 ha->synced = false;
3774 list_add_tail_rcu(&ha->list, &list->list);
3775 list->count++;
3776 return 0;
3777}
3778
3779static void ha_rcu_free(struct rcu_head *head)
3780{
3781 struct netdev_hw_addr *ha;
3782
3783 ha = container_of(head, struct netdev_hw_addr, rcu_head);
3784 kfree(ha);
3785}
3786
3787static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3788 int addr_len, unsigned char addr_type)
3789{
3790 struct netdev_hw_addr *ha;
3791
3792 list_for_each_entry(ha, &list->list, list) {
3793 if (!memcmp(ha->addr, addr, addr_len) &&
3794 (ha->type == addr_type || !addr_type)) {
3795 if (--ha->refcount)
3796 return 0;
3797 list_del_rcu(&ha->list);
3798 call_rcu(&ha->rcu_head, ha_rcu_free);
3799 list->count--;
3800 return 0;
3801 }
3802 }
3803 return -ENOENT;
3804}
3805
3806static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3807 struct netdev_hw_addr_list *from_list,
3808 int addr_len,
3809 unsigned char addr_type)
3810{
3811 int err;
3812 struct netdev_hw_addr *ha, *ha2;
3813 unsigned char type;
3814
3815 list_for_each_entry(ha, &from_list->list, list) {
3816 type = addr_type ? addr_type : ha->type;
3817 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3818 if (err)
3819 goto unroll;
3820 }
3821 return 0;
3822
3823unroll:
3824 list_for_each_entry(ha2, &from_list->list, list) {
3825 if (ha2 == ha)
3826 break;
3827 type = addr_type ? addr_type : ha2->type;
3828 __hw_addr_del(to_list, ha2->addr, addr_len, type);
3829 }
3830 return err;
3831}
3832
3833static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3834 struct netdev_hw_addr_list *from_list,
3835 int addr_len,
3836 unsigned char addr_type)
3837{
3838 struct netdev_hw_addr *ha;
3839 unsigned char type;
3840
3841 list_for_each_entry(ha, &from_list->list, list) {
3842 type = addr_type ? addr_type : ha->type;
3843 __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
3844 }
3845}
3846
3847static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3848 struct netdev_hw_addr_list *from_list,
3849 int addr_len)
3850{
3851 int err = 0;
3852 struct netdev_hw_addr *ha, *tmp;
3853
3854 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3855 if (!ha->synced) {
3856 err = __hw_addr_add(to_list, ha->addr,
3857 addr_len, ha->type);
3858 if (err)
3859 break;
3860 ha->synced = true;
3861 ha->refcount++;
3862 } else if (ha->refcount == 1) {
3863 __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3864 __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
3865 }
3866 }
3867 return err;
3868}
3869
3870static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3871 struct netdev_hw_addr_list *from_list,
3872 int addr_len)
3873{
3874 struct netdev_hw_addr *ha, *tmp;
3875
3876 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3877 if (ha->synced) {
3878 __hw_addr_del(to_list, ha->addr,
3879 addr_len, ha->type);
3880 ha->synced = false;
3881 __hw_addr_del(from_list, ha->addr,
3882 addr_len, ha->type);
3883 }
3884 }
3885}
3886
3887static void __hw_addr_flush(struct netdev_hw_addr_list *list)
3888{
3889 struct netdev_hw_addr *ha, *tmp;
3890
3891 list_for_each_entry_safe(ha, tmp, &list->list, list) {
3892 list_del_rcu(&ha->list);
3893 call_rcu(&ha->rcu_head, ha_rcu_free);
3894 }
3895 list->count = 0;
3896}
3897
3898static void __hw_addr_init(struct netdev_hw_addr_list *list)
3899{
3900 INIT_LIST_HEAD(&list->list);
3901 list->count = 0;
3902}
3903
3904/* Device addresses handling functions */
3905
3906static void dev_addr_flush(struct net_device *dev)
3907{
3908 /* rtnl_mutex must be held here */
3909
3910 __hw_addr_flush(&dev->dev_addrs);
3911 dev->dev_addr = NULL;
3912}
3913
3914static int dev_addr_init(struct net_device *dev)
3915{
3916 unsigned char addr[MAX_ADDR_LEN];
3917 struct netdev_hw_addr *ha;
3918 int err;
3919
3920 /* rtnl_mutex must be held here */
3921
3922 __hw_addr_init(&dev->dev_addrs);
3923 memset(addr, 0, sizeof(addr));
3924 err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
3925 NETDEV_HW_ADDR_T_LAN);
3926 if (!err) {
3927 /*
3928 * Get the first (previously created) address from the list
3929 * and set dev_addr pointer to this location.
3930 */
3931 ha = list_first_entry(&dev->dev_addrs.list,
3932 struct netdev_hw_addr, list);
3933 dev->dev_addr = ha->addr;
3934 }
3935 return err;
3936}
3937
3938/**
3939 * dev_addr_add - Add a device address
3940 * @dev: device
3941 * @addr: address to add
3942 * @addr_type: address type
3943 *
3944 * Add a device address to the device or increase the reference count if
3945 * it already exists.
3946 *
3947 * The caller must hold the rtnl_mutex.
3948 */
3949int dev_addr_add(struct net_device *dev, unsigned char *addr,
3950 unsigned char addr_type)
3951{
3952 int err;
3953
3954 ASSERT_RTNL();
3955
3956 err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
3957 if (!err)
3958 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3959 return err;
3960}
3961EXPORT_SYMBOL(dev_addr_add);
3962
3963/**
3964 * dev_addr_del - Release a device address.
3965 * @dev: device
3966 * @addr: address to delete
3967 * @addr_type: address type
3968 *
3969 * Release reference to a device address and remove it from the device
3970 * if the reference count drops to zero.
3971 *
3972 * The caller must hold the rtnl_mutex.
3973 */
3974int dev_addr_del(struct net_device *dev, unsigned char *addr,
3975 unsigned char addr_type)
3976{
3977 int err;
3978 struct netdev_hw_addr *ha;
3979
3980 ASSERT_RTNL();
3981
3982 /*
3983 * We can not remove the first address from the list because
3984 * dev->dev_addr points to that.
3985 */
3986 ha = list_first_entry(&dev->dev_addrs.list,
3987 struct netdev_hw_addr, list);
3988 if (ha->addr == dev->dev_addr && ha->refcount == 1)
3989 return -ENOENT;
3990
3991 err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
3992 addr_type);
3993 if (!err)
3994 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3995 return err;
3996}
3997EXPORT_SYMBOL(dev_addr_del);
3998
3999/**
4000 * dev_addr_add_multiple - Add device addresses from another device
4001 * @to_dev: device to which addresses will be added
4002 * @from_dev: device from which addresses will be added
4003 * @addr_type: address type - 0 means type will be used from from_dev
4004 *
4005 * Add device addresses of the one device to another.
4006 **
4007 * The caller must hold the rtnl_mutex.
4008 */
4009int dev_addr_add_multiple(struct net_device *to_dev,
4010 struct net_device *from_dev,
4011 unsigned char addr_type)
4012{
4013 int err;
4014
4015 ASSERT_RTNL();
4016
4017 if (from_dev->addr_len != to_dev->addr_len)
4018 return -EINVAL;
4019 err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
4020 to_dev->addr_len, addr_type);
4021 if (!err)
4022 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
4023 return err;
4024}
4025EXPORT_SYMBOL(dev_addr_add_multiple);
4026
4027/**
4028 * dev_addr_del_multiple - Delete device addresses by another device
4029 * @to_dev: device where the addresses will be deleted
4030 * @from_dev: device by which addresses the addresses will be deleted
4031 * @addr_type: address type - 0 means type will used from from_dev
4032 *
4033 * Deletes addresses in to device by the list of addresses in from device.
4034 *
4035 * The caller must hold the rtnl_mutex.
4036 */
4037int dev_addr_del_multiple(struct net_device *to_dev,
4038 struct net_device *from_dev,
4039 unsigned char addr_type)
4040{
4041 ASSERT_RTNL();
4042
4043 if (from_dev->addr_len != to_dev->addr_len)
4044 return -EINVAL;
4045 __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
4046 to_dev->addr_len, addr_type);
4047 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
4048 return 0;
4049}
4050EXPORT_SYMBOL(dev_addr_del_multiple);
4051
4052/* multicast addresses handling functions */
4053
4054int __dev_addr_delete(struct dev_addr_list **list, int *count,
4055 void *addr, int alen, int glbl)
4056{
4057 struct dev_addr_list *da;
4058
4059 for (; (da = *list) != NULL; list = &da->next) {
4060 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4061 alen == da->da_addrlen) {
4062 if (glbl) {
4063 int old_glbl = da->da_gusers;
4064 da->da_gusers = 0;
4065 if (old_glbl == 0)
4066 break;
4067 }
4068 if (--da->da_users)
4069 return 0;
4070
4071 *list = da->next;
4072 kfree(da);
4073 (*count)--;
4074 return 0;
4075 }
4076 }
4077 return -ENOENT;
4078}
4079
4080int __dev_addr_add(struct dev_addr_list **list, int *count,
4081 void *addr, int alen, int glbl)
4082{
4083 struct dev_addr_list *da;
4084
4085 for (da = *list; da != NULL; da = da->next) {
4086 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4087 da->da_addrlen == alen) {
4088 if (glbl) {
4089 int old_glbl = da->da_gusers;
4090 da->da_gusers = 1;
4091 if (old_glbl)
4092 return 0;
4093 }
4094 da->da_users++;
4095 return 0;
4096 }
4097 }
4098
4099 da = kzalloc(sizeof(*da), GFP_ATOMIC);
4100 if (da == NULL)
4101 return -ENOMEM;
4102 memcpy(da->da_addr, addr, alen);
4103 da->da_addrlen = alen;
4104 da->da_users = 1;
4105 da->da_gusers = glbl ? 1 : 0;
4106 da->next = *list;
4107 *list = da;
4108 (*count)++;
4109 return 0;
4110}
4111
4112/**
4113 * dev_unicast_delete - Release secondary unicast address.
4114 * @dev: device
4115 * @addr: address to delete
4116 *
4117 * Release reference to a secondary unicast address and remove it
4118 * from the device if the reference count drops to zero.
4119 *
4120 * The caller must hold the rtnl_mutex.
4121 */
4122int dev_unicast_delete(struct net_device *dev, void *addr)
4123{
4124 int err;
4125
4126 ASSERT_RTNL();
4127
4128 netif_addr_lock_bh(dev);
4129 err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
4130 NETDEV_HW_ADDR_T_UNICAST);
4131 if (!err)
4132 __dev_set_rx_mode(dev);
4133 netif_addr_unlock_bh(dev);
4134 return err;
4135}
4136EXPORT_SYMBOL(dev_unicast_delete);
4137
4138/**
4139 * dev_unicast_add - add a secondary unicast address
4140 * @dev: device
4141 * @addr: address to add
4142 *
4143 * Add a secondary unicast address to the device or increase
4144 * the reference count if it already exists.
4145 *
4146 * The caller must hold the rtnl_mutex.
4147 */
4148int dev_unicast_add(struct net_device *dev, void *addr)
4149{
4150 int err;
4151
4152 ASSERT_RTNL();
4153
4154 netif_addr_lock_bh(dev);
4155 err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
4156 NETDEV_HW_ADDR_T_UNICAST);
4157 if (!err)
4158 __dev_set_rx_mode(dev);
4159 netif_addr_unlock_bh(dev);
4160 return err;
4161}
4162EXPORT_SYMBOL(dev_unicast_add);
4163
4164int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
4165 struct dev_addr_list **from, int *from_count)
4166{
4167 struct dev_addr_list *da, *next;
4168 int err = 0;
4169
4170 da = *from;
4171 while (da != NULL) {
4172 next = da->next;
4173 if (!da->da_synced) {
4174 err = __dev_addr_add(to, to_count,
4175 da->da_addr, da->da_addrlen, 0);
4176 if (err < 0)
4177 break;
4178 da->da_synced = 1;
4179 da->da_users++;
4180 } else if (da->da_users == 1) {
4181 __dev_addr_delete(to, to_count,
4182 da->da_addr, da->da_addrlen, 0);
4183 __dev_addr_delete(from, from_count,
4184 da->da_addr, da->da_addrlen, 0);
4185 }
4186 da = next;
4187 }
4188 return err;
4189}
4190EXPORT_SYMBOL_GPL(__dev_addr_sync);
4191
4192void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
4193 struct dev_addr_list **from, int *from_count)
4194{
4195 struct dev_addr_list *da, *next;
4196
4197 da = *from;
4198 while (da != NULL) {
4199 next = da->next;
4200 if (da->da_synced) {
4201 __dev_addr_delete(to, to_count,
4202 da->da_addr, da->da_addrlen, 0);
4203 da->da_synced = 0;
4204 __dev_addr_delete(from, from_count,
4205 da->da_addr, da->da_addrlen, 0);
4206 }
4207 da = next;
4208 }
4209}
4210EXPORT_SYMBOL_GPL(__dev_addr_unsync);
4211
4212/**
4213 * dev_unicast_sync - Synchronize device's unicast list to another device
4214 * @to: destination device
4215 * @from: source device
4216 *
4217 * Add newly added addresses to the destination device and release
4218 * addresses that have no users left. The source device must be
4219 * locked by netif_tx_lock_bh.
4220 *
4221 * This function is intended to be called from the dev->set_rx_mode
4222 * function of layered software devices.
4223 */
4224int dev_unicast_sync(struct net_device *to, struct net_device *from)
4225{
4226 int err = 0;
4227
4228 if (to->addr_len != from->addr_len)
4229 return -EINVAL;
4230
4231 netif_addr_lock_bh(to);
4232 err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
4233 if (!err)
4234 __dev_set_rx_mode(to);
4235 netif_addr_unlock_bh(to);
4236 return err;
4237}
4238EXPORT_SYMBOL(dev_unicast_sync);
4239
4240/**
4241 * dev_unicast_unsync - Remove synchronized addresses from the destination device
4242 * @to: destination device
4243 * @from: source device
4244 *
4245 * Remove all addresses that were added to the destination device by
4246 * dev_unicast_sync(). This function is intended to be called from the
4247 * dev->stop function of layered software devices.
4248 */
4249void dev_unicast_unsync(struct net_device *to, struct net_device *from)
4250{
4251 if (to->addr_len != from->addr_len)
4252 return;
4253
4254 netif_addr_lock_bh(from);
4255 netif_addr_lock(to);
4256 __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
4257 __dev_set_rx_mode(to);
4258 netif_addr_unlock(to);
4259 netif_addr_unlock_bh(from);
4260}
4261EXPORT_SYMBOL(dev_unicast_unsync);
4262
4263static void dev_unicast_flush(struct net_device *dev)
4264{
4265 netif_addr_lock_bh(dev);
4266 __hw_addr_flush(&dev->uc);
4267 netif_addr_unlock_bh(dev);
4268}
4269
4270static void dev_unicast_init(struct net_device *dev)
4271{
4272 __hw_addr_init(&dev->uc);
4273}
4274
4275
4276static void __dev_addr_discard(struct dev_addr_list **list)
4277{
4278 struct dev_addr_list *tmp;
4279
4280 while (*list != NULL) {
4281 tmp = *list;
4282 *list = tmp->next;
4283 if (tmp->da_users > tmp->da_gusers)
4284 printk("__dev_addr_discard: address leakage! "
4285 "da_users=%d\n", tmp->da_users);
4286 kfree(tmp);
4287 }
4288}
4289
4290static void dev_addr_discard(struct net_device *dev)
4291{
4292 netif_addr_lock_bh(dev);
4293
4294 __dev_addr_discard(&dev->mc_list);
4295 netdev_mc_count(dev) = 0;
4296
4297 netif_addr_unlock_bh(dev);
4298}
4299
4300/** 4160/**
4301 * dev_get_flags - get flags reported to userspace 4161 * dev_get_flags - get flags reported to userspace
4302 * @dev: device 4162 * @dev: device
@@ -4607,8 +4467,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4607 return -EINVAL; 4467 return -EINVAL;
4608 if (!netif_device_present(dev)) 4468 if (!netif_device_present(dev))
4609 return -ENODEV; 4469 return -ENODEV;
4610 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, 4470 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
4611 dev->addr_len, 1);
4612 4471
4613 case SIOCDELMULTI: 4472 case SIOCDELMULTI:
4614 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || 4473 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
@@ -4616,8 +4475,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4616 return -EINVAL; 4475 return -EINVAL;
4617 if (!netif_device_present(dev)) 4476 if (!netif_device_present(dev))
4618 return -ENODEV; 4477 return -ENODEV;
4619 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, 4478 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
4620 dev->addr_len, 1);
4621 4479
4622 case SIOCSIFTXQLEN: 4480 case SIOCSIFTXQLEN:
4623 if (ifr->ifr_qlen < 0) 4481 if (ifr->ifr_qlen < 0)
@@ -4924,8 +4782,8 @@ static void rollback_registered_many(struct list_head *head)
4924 /* 4782 /*
4925 * Flush the unicast and multicast chains 4783 * Flush the unicast and multicast chains
4926 */ 4784 */
4927 dev_unicast_flush(dev); 4785 dev_uc_flush(dev);
4928 dev_addr_discard(dev); 4786 dev_mc_flush(dev);
4929 4787
4930 if (dev->netdev_ops->ndo_uninit) 4788 if (dev->netdev_ops->ndo_uninit)
4931 dev->netdev_ops->ndo_uninit(dev); 4789 dev->netdev_ops->ndo_uninit(dev);
@@ -5074,6 +4932,24 @@ int register_netdevice(struct net_device *dev)
5074 4932
5075 dev->iflink = -1; 4933 dev->iflink = -1;
5076 4934
4935#ifdef CONFIG_RPS
4936 if (!dev->num_rx_queues) {
4937 /*
4938 * Allocate a single RX queue if driver never called
4939 * alloc_netdev_mq
4940 */
4941
4942 dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
4943 if (!dev->_rx) {
4944 ret = -ENOMEM;
4945 goto out;
4946 }
4947
4948 dev->_rx->first = dev->_rx;
4949 atomic_set(&dev->_rx->count, 1);
4950 dev->num_rx_queues = 1;
4951 }
4952#endif
5077 /* Init, if this function is available */ 4953 /* Init, if this function is available */
5078 if (dev->netdev_ops->ndo_init) { 4954 if (dev->netdev_ops->ndo_init) {
5079 ret = dev->netdev_ops->ndo_init(dev); 4955 ret = dev->netdev_ops->ndo_init(dev);
@@ -5113,8 +4989,6 @@ int register_netdevice(struct net_device *dev)
5113 if (dev->features & NETIF_F_SG) 4989 if (dev->features & NETIF_F_SG)
5114 dev->features |= NETIF_F_GSO; 4990 dev->features |= NETIF_F_GSO;
5115 4991
5116 netdev_initialize_kobject(dev);
5117
5118 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 4992 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5119 ret = notifier_to_errno(ret); 4993 ret = notifier_to_errno(ret);
5120 if (ret) 4994 if (ret)
@@ -5434,6 +5308,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5434 struct net_device *dev; 5308 struct net_device *dev;
5435 size_t alloc_size; 5309 size_t alloc_size;
5436 struct net_device *p; 5310 struct net_device *p;
5311#ifdef CONFIG_RPS
5312 struct netdev_rx_queue *rx;
5313 int i;
5314#endif
5437 5315
5438 BUG_ON(strlen(name) >= sizeof(dev->name)); 5316 BUG_ON(strlen(name) >= sizeof(dev->name));
5439 5317
@@ -5459,13 +5337,32 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5459 goto free_p; 5337 goto free_p;
5460 } 5338 }
5461 5339
5340#ifdef CONFIG_RPS
5341 rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5342 if (!rx) {
5343 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5344 "rx queues.\n");
5345 goto free_tx;
5346 }
5347
5348 atomic_set(&rx->count, queue_count);
5349
5350 /*
5351 * Set a pointer to first element in the array which holds the
5352 * reference count.
5353 */
5354 for (i = 0; i < queue_count; i++)
5355 rx[i].first = rx;
5356#endif
5357
5462 dev = PTR_ALIGN(p, NETDEV_ALIGN); 5358 dev = PTR_ALIGN(p, NETDEV_ALIGN);
5463 dev->padded = (char *)dev - (char *)p; 5359 dev->padded = (char *)dev - (char *)p;
5464 5360
5465 if (dev_addr_init(dev)) 5361 if (dev_addr_init(dev))
5466 goto free_tx; 5362 goto free_rx;
5467 5363
5468 dev_unicast_init(dev); 5364 dev_mc_init(dev);
5365 dev_uc_init(dev);
5469 5366
5470 dev_net_set(dev, &init_net); 5367 dev_net_set(dev, &init_net);
5471 5368
@@ -5473,6 +5370,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5473 dev->num_tx_queues = queue_count; 5370 dev->num_tx_queues = queue_count;
5474 dev->real_num_tx_queues = queue_count; 5371 dev->real_num_tx_queues = queue_count;
5475 5372
5373#ifdef CONFIG_RPS
5374 dev->_rx = rx;
5375 dev->num_rx_queues = queue_count;
5376#endif
5377
5476 dev->gso_max_size = GSO_MAX_SIZE; 5378 dev->gso_max_size = GSO_MAX_SIZE;
5477 5379
5478 netdev_init_queues(dev); 5380 netdev_init_queues(dev);
@@ -5487,9 +5389,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5487 strcpy(dev->name, name); 5389 strcpy(dev->name, name);
5488 return dev; 5390 return dev;
5489 5391
5392free_rx:
5393#ifdef CONFIG_RPS
5394 kfree(rx);
5490free_tx: 5395free_tx:
5396#endif
5491 kfree(tx); 5397 kfree(tx);
5492
5493free_p: 5398free_p:
5494 kfree(p); 5399 kfree(p);
5495 return NULL; 5400 return NULL;
@@ -5635,15 +5540,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5635 if (dev->features & NETIF_F_NETNS_LOCAL) 5540 if (dev->features & NETIF_F_NETNS_LOCAL)
5636 goto out; 5541 goto out;
5637 5542
5638#ifdef CONFIG_SYSFS
5639 /* Don't allow real devices to be moved when sysfs
5640 * is enabled.
5641 */
5642 err = -EINVAL;
5643 if (dev->dev.parent)
5644 goto out;
5645#endif
5646
5647 /* Ensure the device has been registrered */ 5543 /* Ensure the device has been registrered */
5648 err = -EINVAL; 5544 err = -EINVAL;
5649 if (dev->reg_state != NETREG_REGISTERED) 5545 if (dev->reg_state != NETREG_REGISTERED)
@@ -5691,10 +5587,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5691 /* 5587 /*
5692 * Flush the unicast and multicast chains 5588 * Flush the unicast and multicast chains
5693 */ 5589 */
5694 dev_unicast_flush(dev); 5590 dev_uc_flush(dev);
5695 dev_addr_discard(dev); 5591 dev_mc_flush(dev);
5696
5697 netdev_unregister_kobject(dev);
5698 5592
5699 /* Actually switch the network namespace */ 5593 /* Actually switch the network namespace */
5700 dev_net_set(dev, net); 5594 dev_net_set(dev, net);
@@ -5708,7 +5602,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5708 } 5602 }
5709 5603
5710 /* Fixup kobjects */ 5604 /* Fixup kobjects */
5711 err = netdev_register_kobject(dev); 5605 err = device_rename(&dev->dev, dev->name);
5712 WARN_ON(err); 5606 WARN_ON(err);
5713 5607
5714 /* Add the device back in the hashes */ 5608 /* Add the device back in the hashes */
@@ -5735,7 +5629,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
5735 void *ocpu) 5629 void *ocpu)
5736{ 5630{
5737 struct sk_buff **list_skb; 5631 struct sk_buff **list_skb;
5738 struct Qdisc **list_net;
5739 struct sk_buff *skb; 5632 struct sk_buff *skb;
5740 unsigned int cpu, oldcpu = (unsigned long)ocpu; 5633 unsigned int cpu, oldcpu = (unsigned long)ocpu;
5741 struct softnet_data *sd, *oldsd; 5634 struct softnet_data *sd, *oldsd;
@@ -5756,19 +5649,23 @@ static int dev_cpu_callback(struct notifier_block *nfb,
5756 *list_skb = oldsd->completion_queue; 5649 *list_skb = oldsd->completion_queue;
5757 oldsd->completion_queue = NULL; 5650 oldsd->completion_queue = NULL;
5758 5651
5759 /* Find end of our output_queue. */
5760 list_net = &sd->output_queue;
5761 while (*list_net)
5762 list_net = &(*list_net)->next_sched;
5763 /* Append output queue from offline CPU. */ 5652 /* Append output queue from offline CPU. */
5764 *list_net = oldsd->output_queue; 5653 if (oldsd->output_queue) {
5765 oldsd->output_queue = NULL; 5654 *sd->output_queue_tailp = oldsd->output_queue;
5655 sd->output_queue_tailp = oldsd->output_queue_tailp;
5656 oldsd->output_queue = NULL;
5657 oldsd->output_queue_tailp = &oldsd->output_queue;
5658 }
5766 5659
5767 raise_softirq_irqoff(NET_TX_SOFTIRQ); 5660 raise_softirq_irqoff(NET_TX_SOFTIRQ);
5768 local_irq_enable(); 5661 local_irq_enable();
5769 5662
5770 /* Process offline CPU's input_pkt_queue */ 5663 /* Process offline CPU's input_pkt_queue */
5771 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) 5664 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
5665 netif_rx(skb);
5666 input_queue_head_add(oldsd, 1);
5667 }
5668 while ((skb = __skb_dequeue(&oldsd->process_queue)))
5772 netif_rx(skb); 5669 netif_rx(skb);
5773 5670
5774 return NOTIFY_OK; 5671 return NOTIFY_OK;
@@ -5985,17 +5882,26 @@ static int __init net_dev_init(void)
5985 */ 5882 */
5986 5883
5987 for_each_possible_cpu(i) { 5884 for_each_possible_cpu(i) {
5988 struct softnet_data *queue; 5885 struct softnet_data *sd = &per_cpu(softnet_data, i);
5989 5886
5990 queue = &per_cpu(softnet_data, i); 5887 memset(sd, 0, sizeof(*sd));
5991 skb_queue_head_init(&queue->input_pkt_queue); 5888 skb_queue_head_init(&sd->input_pkt_queue);
5992 queue->completion_queue = NULL; 5889 skb_queue_head_init(&sd->process_queue);
5993 INIT_LIST_HEAD(&queue->poll_list); 5890 sd->completion_queue = NULL;
5891 INIT_LIST_HEAD(&sd->poll_list);
5892 sd->output_queue = NULL;
5893 sd->output_queue_tailp = &sd->output_queue;
5894#ifdef CONFIG_RPS
5895 sd->csd.func = rps_trigger_softirq;
5896 sd->csd.info = sd;
5897 sd->csd.flags = 0;
5898 sd->cpu = i;
5899#endif
5994 5900
5995 queue->backlog.poll = process_backlog; 5901 sd->backlog.poll = process_backlog;
5996 queue->backlog.weight = weight_p; 5902 sd->backlog.weight = weight_p;
5997 queue->backlog.gro_list = NULL; 5903 sd->backlog.gro_list = NULL;
5998 queue->backlog.gro_count = 0; 5904 sd->backlog.gro_count = 0;
5999 } 5905 }
6000 5906
6001 dev_boot_phase = 0; 5907 dev_boot_phase = 0;
@@ -6030,7 +5936,7 @@ subsys_initcall(net_dev_init);
6030 5936
6031static int __init initialize_hashrnd(void) 5937static int __init initialize_hashrnd(void)
6032{ 5938{
6033 get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); 5939 get_random_bytes(&hashrnd, sizeof(hashrnd));
6034 return 0; 5940 return 0;
6035} 5941}
6036 5942