aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c1363
1 files changed, 644 insertions, 719 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 264137fce3a2..6c820650b80f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -130,6 +130,7 @@
130#include <linux/jhash.h> 130#include <linux/jhash.h>
131#include <linux/random.h> 131#include <linux/random.h>
132#include <trace/events/napi.h> 132#include <trace/events/napi.h>
133#include <linux/pci.h>
133 134
134#include "net-sysfs.h" 135#include "net-sysfs.h"
135 136
@@ -207,6 +208,20 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
207 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; 208 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
208} 209}
209 210
211static inline void rps_lock(struct softnet_data *sd)
212{
213#ifdef CONFIG_RPS
214 spin_lock(&sd->input_pkt_queue.lock);
215#endif
216}
217
218static inline void rps_unlock(struct softnet_data *sd)
219{
220#ifdef CONFIG_RPS
221 spin_unlock(&sd->input_pkt_queue.lock);
222#endif
223}
224
210/* Device list insertion */ 225/* Device list insertion */
211static int list_netdevice(struct net_device *dev) 226static int list_netdevice(struct net_device *dev)
212{ 227{
@@ -249,7 +264,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
249 * queue in the local softnet handler. 264 * queue in the local softnet handler.
250 */ 265 */
251 266
252DEFINE_PER_CPU(struct softnet_data, softnet_data); 267DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
253EXPORT_PER_CPU_SYMBOL(softnet_data); 268EXPORT_PER_CPU_SYMBOL(softnet_data);
254 269
255#ifdef CONFIG_LOCKDEP 270#ifdef CONFIG_LOCKDEP
@@ -773,14 +788,17 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype);
773 788
774struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) 789struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
775{ 790{
776 struct net_device *dev; 791 struct net_device *dev, *ret = NULL;
777 792
778 rtnl_lock(); 793 rcu_read_lock();
779 dev = __dev_getfirstbyhwtype(net, type); 794 for_each_netdev_rcu(net, dev)
780 if (dev) 795 if (dev->type == type) {
781 dev_hold(dev); 796 dev_hold(dev);
782 rtnl_unlock(); 797 ret = dev;
783 return dev; 798 break;
799 }
800 rcu_read_unlock();
801 return ret;
784} 802}
785EXPORT_SYMBOL(dev_getfirstbyhwtype); 803EXPORT_SYMBOL(dev_getfirstbyhwtype);
786 804
@@ -1085,9 +1103,9 @@ void netdev_state_change(struct net_device *dev)
1085} 1103}
1086EXPORT_SYMBOL(netdev_state_change); 1104EXPORT_SYMBOL(netdev_state_change);
1087 1105
1088void netdev_bonding_change(struct net_device *dev, unsigned long event) 1106int netdev_bonding_change(struct net_device *dev, unsigned long event)
1089{ 1107{
1090 call_netdevice_notifiers(event, dev); 1108 return call_netdevice_notifiers(event, dev);
1091} 1109}
1092EXPORT_SYMBOL(netdev_bonding_change); 1110EXPORT_SYMBOL(netdev_bonding_change);
1093 1111
@@ -1417,6 +1435,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1417 1435
1418int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1436int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1419{ 1437{
1438 ASSERT_RTNL();
1420 return raw_notifier_call_chain(&netdev_chain, val, dev); 1439 return raw_notifier_call_chain(&netdev_chain, val, dev);
1421} 1440}
1422 1441
@@ -1435,7 +1454,7 @@ void net_disable_timestamp(void)
1435} 1454}
1436EXPORT_SYMBOL(net_disable_timestamp); 1455EXPORT_SYMBOL(net_disable_timestamp);
1437 1456
1438static inline void net_timestamp(struct sk_buff *skb) 1457static inline void net_timestamp_set(struct sk_buff *skb)
1439{ 1458{
1440 if (atomic_read(&netstamp_needed)) 1459 if (atomic_read(&netstamp_needed))
1441 __net_timestamp(skb); 1460 __net_timestamp(skb);
@@ -1443,6 +1462,12 @@ static inline void net_timestamp(struct sk_buff *skb)
1443 skb->tstamp.tv64 = 0; 1462 skb->tstamp.tv64 = 0;
1444} 1463}
1445 1464
1465static inline void net_timestamp_check(struct sk_buff *skb)
1466{
1467 if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
1468 __net_timestamp(skb);
1469}
1470
1446/** 1471/**
1447 * dev_forward_skb - loopback an skb to another netif 1472 * dev_forward_skb - loopback an skb to another netif
1448 * 1473 *
@@ -1489,9 +1514,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1489 1514
1490#ifdef CONFIG_NET_CLS_ACT 1515#ifdef CONFIG_NET_CLS_ACT
1491 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) 1516 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1492 net_timestamp(skb); 1517 net_timestamp_set(skb);
1493#else 1518#else
1494 net_timestamp(skb); 1519 net_timestamp_set(skb);
1495#endif 1520#endif
1496 1521
1497 rcu_read_lock(); 1522 rcu_read_lock();
@@ -1537,8 +1562,9 @@ static inline void __netif_reschedule(struct Qdisc *q)
1537 1562
1538 local_irq_save(flags); 1563 local_irq_save(flags);
1539 sd = &__get_cpu_var(softnet_data); 1564 sd = &__get_cpu_var(softnet_data);
1540 q->next_sched = sd->output_queue; 1565 q->next_sched = NULL;
1541 sd->output_queue = q; 1566 *sd->output_queue_tailp = q;
1567 sd->output_queue_tailp = &q->next_sched;
1542 raise_softirq_irqoff(NET_TX_SOFTIRQ); 1568 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1543 local_irq_restore(flags); 1569 local_irq_restore(flags);
1544} 1570}
@@ -1783,18 +1809,27 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
1783 * 2. No high memory really exists on this machine. 1809 * 2. No high memory really exists on this machine.
1784 */ 1810 */
1785 1811
1786static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) 1812static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1787{ 1813{
1788#ifdef CONFIG_HIGHMEM 1814#ifdef CONFIG_HIGHMEM
1789 int i; 1815 int i;
1816 if (!(dev->features & NETIF_F_HIGHDMA)) {
1817 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1818 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1819 return 1;
1820 }
1790 1821
1791 if (dev->features & NETIF_F_HIGHDMA) 1822 if (PCI_DMA_BUS_IS_PHYS) {
1792 return 0; 1823 struct device *pdev = dev->dev.parent;
1793
1794 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1795 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1796 return 1;
1797 1824
1825 if (!pdev)
1826 return 0;
1827 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1828 dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
1829 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
1830 return 1;
1831 }
1832 }
1798#endif 1833#endif
1799 return 0; 1834 return 0;
1800} 1835}
@@ -1852,6 +1887,17 @@ static int dev_gso_segment(struct sk_buff *skb)
1852 return 0; 1887 return 0;
1853} 1888}
1854 1889
1890/*
1891 * Try to orphan skb early, right before transmission by the device.
1892 * We cannot orphan skb if tx timestamp is requested, since
1893 * drivers need to call skb_tstamp_tx() to send the timestamp.
1894 */
1895static inline void skb_orphan_try(struct sk_buff *skb)
1896{
1897 if (!skb_tx(skb)->flags)
1898 skb_orphan(skb);
1899}
1900
1855int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 1901int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1856 struct netdev_queue *txq) 1902 struct netdev_queue *txq)
1857{ 1903{
@@ -1862,13 +1908,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1862 if (!list_empty(&ptype_all)) 1908 if (!list_empty(&ptype_all))
1863 dev_queue_xmit_nit(skb, dev); 1909 dev_queue_xmit_nit(skb, dev);
1864 1910
1865 if (netif_needs_gso(dev, skb)) {
1866 if (unlikely(dev_gso_segment(skb)))
1867 goto out_kfree_skb;
1868 if (skb->next)
1869 goto gso;
1870 }
1871
1872 /* 1911 /*
1873 * If device doesnt need skb->dst, release it right now while 1912 * If device doesnt need skb->dst, release it right now while
1874 * its hot in this cpu cache 1913 * its hot in this cpu cache
@@ -1876,23 +1915,18 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1876 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 1915 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1877 skb_dst_drop(skb); 1916 skb_dst_drop(skb);
1878 1917
1918 skb_orphan_try(skb);
1919
1920 if (netif_needs_gso(dev, skb)) {
1921 if (unlikely(dev_gso_segment(skb)))
1922 goto out_kfree_skb;
1923 if (skb->next)
1924 goto gso;
1925 }
1926
1879 rc = ops->ndo_start_xmit(skb, dev); 1927 rc = ops->ndo_start_xmit(skb, dev);
1880 if (rc == NETDEV_TX_OK) 1928 if (rc == NETDEV_TX_OK)
1881 txq_trans_update(txq); 1929 txq_trans_update(txq);
1882 /*
1883 * TODO: if skb_orphan() was called by
1884 * dev->hard_start_xmit() (for example, the unmodified
1885 * igb driver does that; bnx2 doesn't), then
1886 * skb_tx_software_timestamp() will be unable to send
1887 * back the time stamp.
1888 *
1889 * How can this be prevented? Always create another
1890 * reference to the socket before calling
1891 * dev->hard_start_xmit()? Prevent that skb_orphan()
1892 * does anything in dev->hard_start_xmit() by clearing
1893 * the skb destructor before the call and restoring it
1894 * afterwards, then doing the skb_orphan() ourselves?
1895 */
1896 return rc; 1930 return rc;
1897 } 1931 }
1898 1932
@@ -1931,7 +1965,7 @@ out_kfree_skb:
1931 return rc; 1965 return rc;
1932} 1966}
1933 1967
1934static u32 skb_tx_hashrnd; 1968static u32 hashrnd __read_mostly;
1935 1969
1936u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) 1970u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1937{ 1971{
@@ -1947,9 +1981,9 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1947 if (skb->sk && skb->sk->sk_hash) 1981 if (skb->sk && skb->sk->sk_hash)
1948 hash = skb->sk->sk_hash; 1982 hash = skb->sk->sk_hash;
1949 else 1983 else
1950 hash = skb->protocol; 1984 hash = (__force u16) skb->protocol;
1951 1985
1952 hash = jhash_1word(hash, skb_tx_hashrnd); 1986 hash = jhash_1word(hash, hashrnd);
1953 1987
1954 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); 1988 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1955} 1989}
@@ -1959,10 +1993,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1959{ 1993{
1960 if (unlikely(queue_index >= dev->real_num_tx_queues)) { 1994 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1961 if (net_ratelimit()) { 1995 if (net_ratelimit()) {
1962 WARN(1, "%s selects TX queue %d, but " 1996 pr_warning("%s selects TX queue %d, but "
1963 "real number of TX queues is %d\n", 1997 "real number of TX queues is %d\n",
1964 dev->name, queue_index, 1998 dev->name, queue_index, dev->real_num_tx_queues);
1965 dev->real_num_tx_queues);
1966 } 1999 }
1967 return 0; 2000 return 0;
1968 } 2001 }
@@ -1989,7 +2022,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1989 queue_index = skb_tx_hash(dev, skb); 2022 queue_index = skb_tx_hash(dev, skb);
1990 2023
1991 if (sk) { 2024 if (sk) {
1992 struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache); 2025 struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
1993 2026
1994 if (dst && skb_dst(skb) == dst) 2027 if (dst && skb_dst(skb) == dst)
1995 sk_tx_queue_set(sk, queue_index); 2028 sk_tx_queue_set(sk, queue_index);
@@ -2019,6 +2052,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2019 * waiting to be sent out; and the qdisc is not running - 2052 * waiting to be sent out; and the qdisc is not running -
2020 * xmit the skb directly. 2053 * xmit the skb directly.
2021 */ 2054 */
2055 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2056 skb_dst_force(skb);
2022 __qdisc_update_bstats(q, skb->len); 2057 __qdisc_update_bstats(q, skb->len);
2023 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) 2058 if (sch_direct_xmit(skb, q, dev, txq, root_lock))
2024 __qdisc_run(q); 2059 __qdisc_run(q);
@@ -2027,6 +2062,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2027 2062
2028 rc = NET_XMIT_SUCCESS; 2063 rc = NET_XMIT_SUCCESS;
2029 } else { 2064 } else {
2065 skb_dst_force(skb);
2030 rc = qdisc_enqueue_root(skb, q); 2066 rc = qdisc_enqueue_root(skb, q);
2031 qdisc_run(q); 2067 qdisc_run(q);
2032 } 2068 }
@@ -2174,11 +2210,249 @@ EXPORT_SYMBOL(dev_queue_xmit);
2174 =======================================================================*/ 2210 =======================================================================*/
2175 2211
2176int netdev_max_backlog __read_mostly = 1000; 2212int netdev_max_backlog __read_mostly = 1000;
2213int netdev_tstamp_prequeue __read_mostly = 1;
2177int netdev_budget __read_mostly = 300; 2214int netdev_budget __read_mostly = 300;
2178int weight_p __read_mostly = 64; /* old backlog weight */ 2215int weight_p __read_mostly = 64; /* old backlog weight */
2179 2216
2180DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; 2217/* Called with irq disabled */
2218static inline void ____napi_schedule(struct softnet_data *sd,
2219 struct napi_struct *napi)
2220{
2221 list_add_tail(&napi->poll_list, &sd->poll_list);
2222 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2223}
2224
2225#ifdef CONFIG_RPS
2226
2227/* One global table that all flow-based protocols share. */
2228struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2229EXPORT_SYMBOL(rps_sock_flow_table);
2230
2231/*
2232 * get_rps_cpu is called from netif_receive_skb and returns the target
2233 * CPU from the RPS map of the receiving queue for a given skb.
2234 * rcu_read_lock must be held on entry.
2235 */
2236static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2237 struct rps_dev_flow **rflowp)
2238{
2239 struct ipv6hdr *ip6;
2240 struct iphdr *ip;
2241 struct netdev_rx_queue *rxqueue;
2242 struct rps_map *map;
2243 struct rps_dev_flow_table *flow_table;
2244 struct rps_sock_flow_table *sock_flow_table;
2245 int cpu = -1;
2246 u8 ip_proto;
2247 u16 tcpu;
2248 u32 addr1, addr2, ihl;
2249 union {
2250 u32 v32;
2251 u16 v16[2];
2252 } ports;
2253
2254 if (skb_rx_queue_recorded(skb)) {
2255 u16 index = skb_get_rx_queue(skb);
2256 if (unlikely(index >= dev->num_rx_queues)) {
2257 if (net_ratelimit()) {
2258 pr_warning("%s received packet on queue "
2259 "%u, but number of RX queues is %u\n",
2260 dev->name, index, dev->num_rx_queues);
2261 }
2262 goto done;
2263 }
2264 rxqueue = dev->_rx + index;
2265 } else
2266 rxqueue = dev->_rx;
2267
2268 if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2269 goto done;
2181 2270
2271 if (skb->rxhash)
2272 goto got_hash; /* Skip hash computation on packet header */
2273
2274 switch (skb->protocol) {
2275 case __constant_htons(ETH_P_IP):
2276 if (!pskb_may_pull(skb, sizeof(*ip)))
2277 goto done;
2278
2279 ip = (struct iphdr *) skb->data;
2280 ip_proto = ip->protocol;
2281 addr1 = (__force u32) ip->saddr;
2282 addr2 = (__force u32) ip->daddr;
2283 ihl = ip->ihl;
2284 break;
2285 case __constant_htons(ETH_P_IPV6):
2286 if (!pskb_may_pull(skb, sizeof(*ip6)))
2287 goto done;
2288
2289 ip6 = (struct ipv6hdr *) skb->data;
2290 ip_proto = ip6->nexthdr;
2291 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2292 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
2293 ihl = (40 >> 2);
2294 break;
2295 default:
2296 goto done;
2297 }
2298 switch (ip_proto) {
2299 case IPPROTO_TCP:
2300 case IPPROTO_UDP:
2301 case IPPROTO_DCCP:
2302 case IPPROTO_ESP:
2303 case IPPROTO_AH:
2304 case IPPROTO_SCTP:
2305 case IPPROTO_UDPLITE:
2306 if (pskb_may_pull(skb, (ihl * 4) + 4)) {
2307 ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
2308 if (ports.v16[1] < ports.v16[0])
2309 swap(ports.v16[0], ports.v16[1]);
2310 break;
2311 }
2312 default:
2313 ports.v32 = 0;
2314 break;
2315 }
2316
2317 /* get a consistent hash (same value on both flow directions) */
2318 if (addr2 < addr1)
2319 swap(addr1, addr2);
2320 skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2321 if (!skb->rxhash)
2322 skb->rxhash = 1;
2323
2324got_hash:
2325 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2326 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2327 if (flow_table && sock_flow_table) {
2328 u16 next_cpu;
2329 struct rps_dev_flow *rflow;
2330
2331 rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2332 tcpu = rflow->cpu;
2333
2334 next_cpu = sock_flow_table->ents[skb->rxhash &
2335 sock_flow_table->mask];
2336
2337 /*
2338 * If the desired CPU (where last recvmsg was done) is
2339 * different from current CPU (one in the rx-queue flow
2340 * table entry), switch if one of the following holds:
2341 * - Current CPU is unset (equal to RPS_NO_CPU).
2342 * - Current CPU is offline.
2343 * - The current CPU's queue tail has advanced beyond the
2344 * last packet that was enqueued using this table entry.
2345 * This guarantees that all previous packets for the flow
2346 * have been dequeued, thus preserving in order delivery.
2347 */
2348 if (unlikely(tcpu != next_cpu) &&
2349 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2350 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2351 rflow->last_qtail)) >= 0)) {
2352 tcpu = rflow->cpu = next_cpu;
2353 if (tcpu != RPS_NO_CPU)
2354 rflow->last_qtail = per_cpu(softnet_data,
2355 tcpu).input_queue_head;
2356 }
2357 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2358 *rflowp = rflow;
2359 cpu = tcpu;
2360 goto done;
2361 }
2362 }
2363
2364 map = rcu_dereference(rxqueue->rps_map);
2365 if (map) {
2366 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2367
2368 if (cpu_online(tcpu)) {
2369 cpu = tcpu;
2370 goto done;
2371 }
2372 }
2373
2374done:
2375 return cpu;
2376}
2377
2378/* Called from hardirq (IPI) context */
2379static void rps_trigger_softirq(void *data)
2380{
2381 struct softnet_data *sd = data;
2382
2383 ____napi_schedule(sd, &sd->backlog);
2384 sd->received_rps++;
2385}
2386
2387#endif /* CONFIG_RPS */
2388
2389/*
2390 * Check if this softnet_data structure is another cpu one
2391 * If yes, queue it to our IPI list and return 1
2392 * If no, return 0
2393 */
2394static int rps_ipi_queued(struct softnet_data *sd)
2395{
2396#ifdef CONFIG_RPS
2397 struct softnet_data *mysd = &__get_cpu_var(softnet_data);
2398
2399 if (sd != mysd) {
2400 sd->rps_ipi_next = mysd->rps_ipi_list;
2401 mysd->rps_ipi_list = sd;
2402
2403 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2404 return 1;
2405 }
2406#endif /* CONFIG_RPS */
2407 return 0;
2408}
2409
2410/*
2411 * enqueue_to_backlog is called to queue an skb to a per CPU backlog
2412 * queue (may be a remote CPU queue).
2413 */
2414static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2415 unsigned int *qtail)
2416{
2417 struct softnet_data *sd;
2418 unsigned long flags;
2419
2420 sd = &per_cpu(softnet_data, cpu);
2421
2422 local_irq_save(flags);
2423
2424 rps_lock(sd);
2425 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2426 if (skb_queue_len(&sd->input_pkt_queue)) {
2427enqueue:
2428 __skb_queue_tail(&sd->input_pkt_queue, skb);
2429#ifdef CONFIG_RPS
2430 *qtail = sd->input_queue_head +
2431 skb_queue_len(&sd->input_pkt_queue);
2432#endif
2433 rps_unlock(sd);
2434 local_irq_restore(flags);
2435 return NET_RX_SUCCESS;
2436 }
2437
2438 /* Schedule NAPI for backlog device
2439 * We can use non atomic operation since we own the queue lock
2440 */
2441 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
2442 if (!rps_ipi_queued(sd))
2443 ____napi_schedule(sd, &sd->backlog);
2444 }
2445 goto enqueue;
2446 }
2447
2448 sd->dropped++;
2449 rps_unlock(sd);
2450
2451 local_irq_restore(flags);
2452
2453 kfree_skb(skb);
2454 return NET_RX_DROP;
2455}
2182 2456
2183/** 2457/**
2184 * netif_rx - post buffer to the network code 2458 * netif_rx - post buffer to the network code
@@ -2197,41 +2471,38 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
2197 2471
2198int netif_rx(struct sk_buff *skb) 2472int netif_rx(struct sk_buff *skb)
2199{ 2473{
2200 struct softnet_data *queue; 2474 int ret;
2201 unsigned long flags;
2202 2475
2203 /* if netpoll wants it, pretend we never saw it */ 2476 /* if netpoll wants it, pretend we never saw it */
2204 if (netpoll_rx(skb)) 2477 if (netpoll_rx(skb))
2205 return NET_RX_DROP; 2478 return NET_RX_DROP;
2206 2479
2207 if (!skb->tstamp.tv64) 2480 if (netdev_tstamp_prequeue)
2208 net_timestamp(skb); 2481 net_timestamp_check(skb);
2209 2482
2210 /* 2483#ifdef CONFIG_RPS
2211 * The code is rearranged so that the path is the most 2484 {
2212 * short when CPU is congested, but is still operating. 2485 struct rps_dev_flow voidflow, *rflow = &voidflow;
2213 */ 2486 int cpu;
2214 local_irq_save(flags);
2215 queue = &__get_cpu_var(softnet_data);
2216 2487
2217 __get_cpu_var(netdev_rx_stat).total++; 2488 rcu_read_lock();
2218 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
2219 if (queue->input_pkt_queue.qlen) {
2220enqueue:
2221 __skb_queue_tail(&queue->input_pkt_queue, skb);
2222 local_irq_restore(flags);
2223 return NET_RX_SUCCESS;
2224 }
2225 2489
2226 napi_schedule(&queue->backlog); 2490 cpu = get_rps_cpu(skb->dev, skb, &rflow);
2227 goto enqueue; 2491 if (cpu < 0)
2228 } 2492 cpu = smp_processor_id();
2229 2493
2230 __get_cpu_var(netdev_rx_stat).dropped++; 2494 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2231 local_irq_restore(flags);
2232 2495
2233 kfree_skb(skb); 2496 rcu_read_unlock();
2234 return NET_RX_DROP; 2497 }
2498#else
2499 {
2500 unsigned int qtail;
2501 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
2502 put_cpu();
2503 }
2504#endif
2505 return ret;
2235} 2506}
2236EXPORT_SYMBOL(netif_rx); 2507EXPORT_SYMBOL(netif_rx);
2237 2508
@@ -2276,6 +2547,7 @@ static void net_tx_action(struct softirq_action *h)
2276 local_irq_disable(); 2547 local_irq_disable();
2277 head = sd->output_queue; 2548 head = sd->output_queue;
2278 sd->output_queue = NULL; 2549 sd->output_queue = NULL;
2550 sd->output_queue_tailp = &sd->output_queue;
2279 local_irq_enable(); 2551 local_irq_enable();
2280 2552
2281 while (head) { 2553 while (head) {
@@ -2352,7 +2624,8 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2352#endif 2624#endif
2353 2625
2354#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) 2626#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2355struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; 2627struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
2628 struct sk_buff *skb) __read_mostly;
2356EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); 2629EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2357 2630
2358static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, 2631static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
@@ -2360,14 +2633,17 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2360 int *ret, 2633 int *ret,
2361 struct net_device *orig_dev) 2634 struct net_device *orig_dev)
2362{ 2635{
2363 if (skb->dev->macvlan_port == NULL) 2636 struct macvlan_port *port;
2637
2638 port = rcu_dereference(skb->dev->macvlan_port);
2639 if (!port)
2364 return skb; 2640 return skb;
2365 2641
2366 if (*pt_prev) { 2642 if (*pt_prev) {
2367 *ret = deliver_skb(skb, *pt_prev, orig_dev); 2643 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2368 *pt_prev = NULL; 2644 *pt_prev = NULL;
2369 } 2645 }
2370 return macvlan_handle_frame_hook(skb); 2646 return macvlan_handle_frame_hook(port, skb);
2371} 2647}
2372#else 2648#else
2373#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) 2649#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
@@ -2468,22 +2744,56 @@ void netif_nit_deliver(struct sk_buff *skb)
2468 rcu_read_unlock(); 2744 rcu_read_unlock();
2469} 2745}
2470 2746
2471/** 2747static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
2472 * netif_receive_skb - process receive buffer from network 2748 struct net_device *master)
2473 * @skb: buffer to process 2749{
2474 * 2750 if (skb->pkt_type == PACKET_HOST) {
2475 * netif_receive_skb() is the main receive data processing function. 2751 u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
2476 * It always succeeds. The buffer may be dropped during processing 2752
2477 * for congestion control or by the protocol layers. 2753 memcpy(dest, master->dev_addr, ETH_ALEN);
2478 * 2754 }
2479 * This function may only be called from softirq context and interrupts 2755}
2480 * should be enabled. 2756
2481 * 2757/* On bonding slaves other than the currently active slave, suppress
2482 * Return values (usually ignored): 2758 * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
2483 * NET_RX_SUCCESS: no congestion 2759 * ARP on active-backup slaves with arp_validate enabled.
2484 * NET_RX_DROP: packet was dropped
2485 */ 2760 */
2486int netif_receive_skb(struct sk_buff *skb) 2761int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
2762{
2763 struct net_device *dev = skb->dev;
2764
2765 if (master->priv_flags & IFF_MASTER_ARPMON)
2766 dev->last_rx = jiffies;
2767
2768 if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
2769 /* Do address unmangle. The local destination address
2770 * will be always the one master has. Provides the right
2771 * functionality in a bridge.
2772 */
2773 skb_bond_set_mac_by_master(skb, master);
2774 }
2775
2776 if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
2777 if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
2778 skb->protocol == __cpu_to_be16(ETH_P_ARP))
2779 return 0;
2780
2781 if (master->priv_flags & IFF_MASTER_ALB) {
2782 if (skb->pkt_type != PACKET_BROADCAST &&
2783 skb->pkt_type != PACKET_MULTICAST)
2784 return 0;
2785 }
2786 if (master->priv_flags & IFF_MASTER_8023AD &&
2787 skb->protocol == __cpu_to_be16(ETH_P_SLOW))
2788 return 0;
2789
2790 return 1;
2791 }
2792 return 0;
2793}
2794EXPORT_SYMBOL(__skb_bond_should_drop);
2795
2796static int __netif_receive_skb(struct sk_buff *skb)
2487{ 2797{
2488 struct packet_type *ptype, *pt_prev; 2798 struct packet_type *ptype, *pt_prev;
2489 struct net_device *orig_dev; 2799 struct net_device *orig_dev;
@@ -2493,8 +2803,8 @@ int netif_receive_skb(struct sk_buff *skb)
2493 int ret = NET_RX_DROP; 2803 int ret = NET_RX_DROP;
2494 __be16 type; 2804 __be16 type;
2495 2805
2496 if (!skb->tstamp.tv64) 2806 if (!netdev_tstamp_prequeue)
2497 net_timestamp(skb); 2807 net_timestamp_check(skb);
2498 2808
2499 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) 2809 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2500 return NET_RX_SUCCESS; 2810 return NET_RX_SUCCESS;
@@ -2516,7 +2826,7 @@ int netif_receive_skb(struct sk_buff *skb)
2516 skb->dev = master; 2826 skb->dev = master;
2517 } 2827 }
2518 2828
2519 __get_cpu_var(netdev_rx_stat).total++; 2829 __get_cpu_var(softnet_data).processed++;
2520 2830
2521 skb_reset_network_header(skb); 2831 skb_reset_network_header(skb);
2522 skb_reset_transport_header(skb); 2832 skb_reset_transport_header(skb);
@@ -2594,20 +2904,77 @@ out:
2594 rcu_read_unlock(); 2904 rcu_read_unlock();
2595 return ret; 2905 return ret;
2596} 2906}
2907
2908/**
2909 * netif_receive_skb - process receive buffer from network
2910 * @skb: buffer to process
2911 *
2912 * netif_receive_skb() is the main receive data processing function.
2913 * It always succeeds. The buffer may be dropped during processing
2914 * for congestion control or by the protocol layers.
2915 *
2916 * This function may only be called from softirq context and interrupts
2917 * should be enabled.
2918 *
2919 * Return values (usually ignored):
2920 * NET_RX_SUCCESS: no congestion
2921 * NET_RX_DROP: packet was dropped
2922 */
2923int netif_receive_skb(struct sk_buff *skb)
2924{
2925 if (netdev_tstamp_prequeue)
2926 net_timestamp_check(skb);
2927
2928#ifdef CONFIG_RPS
2929 {
2930 struct rps_dev_flow voidflow, *rflow = &voidflow;
2931 int cpu, ret;
2932
2933 rcu_read_lock();
2934
2935 cpu = get_rps_cpu(skb->dev, skb, &rflow);
2936
2937 if (cpu >= 0) {
2938 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2939 rcu_read_unlock();
2940 } else {
2941 rcu_read_unlock();
2942 ret = __netif_receive_skb(skb);
2943 }
2944
2945 return ret;
2946 }
2947#else
2948 return __netif_receive_skb(skb);
2949#endif
2950}
2597EXPORT_SYMBOL(netif_receive_skb); 2951EXPORT_SYMBOL(netif_receive_skb);
2598 2952
2599/* Network device is going away, flush any packets still pending */ 2953/* Network device is going away, flush any packets still pending
2954 * Called with irqs disabled.
2955 */
2600static void flush_backlog(void *arg) 2956static void flush_backlog(void *arg)
2601{ 2957{
2602 struct net_device *dev = arg; 2958 struct net_device *dev = arg;
2603 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2959 struct softnet_data *sd = &__get_cpu_var(softnet_data);
2604 struct sk_buff *skb, *tmp; 2960 struct sk_buff *skb, *tmp;
2605 2961
2606 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp) 2962 rps_lock(sd);
2963 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
2607 if (skb->dev == dev) { 2964 if (skb->dev == dev) {
2608 __skb_unlink(skb, &queue->input_pkt_queue); 2965 __skb_unlink(skb, &sd->input_pkt_queue);
2609 kfree_skb(skb); 2966 kfree_skb(skb);
2967 input_queue_head_add(sd, 1);
2610 } 2968 }
2969 }
2970 rps_unlock(sd);
2971
2972 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
2973 if (skb->dev == dev) {
2974 __skb_unlink(skb, &sd->process_queue);
2975 kfree_skb(skb);
2976 }
2977 }
2611} 2978}
2612 2979
2613static int napi_gro_complete(struct sk_buff *skb) 2980static int napi_gro_complete(struct sk_buff *skb)
@@ -2910,27 +3277,85 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
2910} 3277}
2911EXPORT_SYMBOL(napi_gro_frags); 3278EXPORT_SYMBOL(napi_gro_frags);
2912 3279
3280/*
3281 * net_rps_action sends any pending IPI's for rps.
3282 * Note: called with local irq disabled, but exits with local irq enabled.
3283 */
3284static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3285{
3286#ifdef CONFIG_RPS
3287 struct softnet_data *remsd = sd->rps_ipi_list;
3288
3289 if (remsd) {
3290 sd->rps_ipi_list = NULL;
3291
3292 local_irq_enable();
3293
3294 /* Send pending IPI's to kick RPS processing on remote cpus. */
3295 while (remsd) {
3296 struct softnet_data *next = remsd->rps_ipi_next;
3297
3298 if (cpu_online(remsd->cpu))
3299 __smp_call_function_single(remsd->cpu,
3300 &remsd->csd, 0);
3301 remsd = next;
3302 }
3303 } else
3304#endif
3305 local_irq_enable();
3306}
3307
2913static int process_backlog(struct napi_struct *napi, int quota) 3308static int process_backlog(struct napi_struct *napi, int quota)
2914{ 3309{
2915 int work = 0; 3310 int work = 0;
2916 struct softnet_data *queue = &__get_cpu_var(softnet_data); 3311 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
2917 unsigned long start_time = jiffies;
2918 3312
3313#ifdef CONFIG_RPS
3314 /* Check if we have pending ipi, its better to send them now,
3315 * not waiting net_rx_action() end.
3316 */
3317 if (sd->rps_ipi_list) {
3318 local_irq_disable();
3319 net_rps_action_and_irq_enable(sd);
3320 }
3321#endif
2919 napi->weight = weight_p; 3322 napi->weight = weight_p;
2920 do { 3323 local_irq_disable();
3324 while (work < quota) {
2921 struct sk_buff *skb; 3325 struct sk_buff *skb;
3326 unsigned int qlen;
2922 3327
2923 local_irq_disable(); 3328 while ((skb = __skb_dequeue(&sd->process_queue))) {
2924 skb = __skb_dequeue(&queue->input_pkt_queue);
2925 if (!skb) {
2926 __napi_complete(napi);
2927 local_irq_enable(); 3329 local_irq_enable();
2928 break; 3330 __netif_receive_skb(skb);
3331 if (++work >= quota)
3332 return work;
3333 local_irq_disable();
2929 } 3334 }
2930 local_irq_enable();
2931 3335
2932 netif_receive_skb(skb); 3336 rps_lock(sd);
2933 } while (++work < quota && jiffies == start_time); 3337 qlen = skb_queue_len(&sd->input_pkt_queue);
3338 if (qlen) {
3339 input_queue_head_add(sd, qlen);
3340 skb_queue_splice_tail_init(&sd->input_pkt_queue,
3341 &sd->process_queue);
3342 }
3343 if (qlen < quota - work) {
3344 /*
3345 * Inline a custom version of __napi_complete().
3346 * only current cpu owns and manipulates this napi,
3347 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
3348 * we can use a plain write instead of clear_bit(),
3349 * and we dont need an smp_mb() memory barrier.
3350 */
3351 list_del(&napi->poll_list);
3352 napi->state = 0;
3353
3354 quota = work + qlen;
3355 }
3356 rps_unlock(sd);
3357 }
3358 local_irq_enable();
2934 3359
2935 return work; 3360 return work;
2936} 3361}
@@ -2946,8 +3371,7 @@ void __napi_schedule(struct napi_struct *n)
2946 unsigned long flags; 3371 unsigned long flags;
2947 3372
2948 local_irq_save(flags); 3373 local_irq_save(flags);
2949 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); 3374 ____napi_schedule(&__get_cpu_var(softnet_data), n);
2950 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2951 local_irq_restore(flags); 3375 local_irq_restore(flags);
2952} 3376}
2953EXPORT_SYMBOL(__napi_schedule); 3377EXPORT_SYMBOL(__napi_schedule);
@@ -3018,17 +3442,16 @@ void netif_napi_del(struct napi_struct *napi)
3018} 3442}
3019EXPORT_SYMBOL(netif_napi_del); 3443EXPORT_SYMBOL(netif_napi_del);
3020 3444
3021
3022static void net_rx_action(struct softirq_action *h) 3445static void net_rx_action(struct softirq_action *h)
3023{ 3446{
3024 struct list_head *list = &__get_cpu_var(softnet_data).poll_list; 3447 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3025 unsigned long time_limit = jiffies + 2; 3448 unsigned long time_limit = jiffies + 2;
3026 int budget = netdev_budget; 3449 int budget = netdev_budget;
3027 void *have; 3450 void *have;
3028 3451
3029 local_irq_disable(); 3452 local_irq_disable();
3030 3453
3031 while (!list_empty(list)) { 3454 while (!list_empty(&sd->poll_list)) {
3032 struct napi_struct *n; 3455 struct napi_struct *n;
3033 int work, weight; 3456 int work, weight;
3034 3457
@@ -3046,7 +3469,7 @@ static void net_rx_action(struct softirq_action *h)
3046 * entries to the tail of this list, and only ->poll() 3469 * entries to the tail of this list, and only ->poll()
3047 * calls can remove this head entry from the list. 3470 * calls can remove this head entry from the list.
3048 */ 3471 */
3049 n = list_first_entry(list, struct napi_struct, poll_list); 3472 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
3050 3473
3051 have = netpoll_poll_lock(n); 3474 have = netpoll_poll_lock(n);
3052 3475
@@ -3081,13 +3504,13 @@ static void net_rx_action(struct softirq_action *h)
3081 napi_complete(n); 3504 napi_complete(n);
3082 local_irq_disable(); 3505 local_irq_disable();
3083 } else 3506 } else
3084 list_move_tail(&n->poll_list, list); 3507 list_move_tail(&n->poll_list, &sd->poll_list);
3085 } 3508 }
3086 3509
3087 netpoll_poll_unlock(have); 3510 netpoll_poll_unlock(have);
3088 } 3511 }
3089out: 3512out:
3090 local_irq_enable(); 3513 net_rps_action_and_irq_enable(sd);
3091 3514
3092#ifdef CONFIG_NET_DMA 3515#ifdef CONFIG_NET_DMA
3093 /* 3516 /*
@@ -3100,7 +3523,7 @@ out:
3100 return; 3523 return;
3101 3524
3102softnet_break: 3525softnet_break:
3103 __get_cpu_var(netdev_rx_stat).time_squeeze++; 3526 sd->time_squeeze++;
3104 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 3527 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3105 goto out; 3528 goto out;
3106} 3529}
@@ -3301,17 +3724,17 @@ static int dev_seq_show(struct seq_file *seq, void *v)
3301 return 0; 3724 return 0;
3302} 3725}
3303 3726
3304static struct netif_rx_stats *softnet_get_online(loff_t *pos) 3727static struct softnet_data *softnet_get_online(loff_t *pos)
3305{ 3728{
3306 struct netif_rx_stats *rc = NULL; 3729 struct softnet_data *sd = NULL;
3307 3730
3308 while (*pos < nr_cpu_ids) 3731 while (*pos < nr_cpu_ids)
3309 if (cpu_online(*pos)) { 3732 if (cpu_online(*pos)) {
3310 rc = &per_cpu(netdev_rx_stat, *pos); 3733 sd = &per_cpu(softnet_data, *pos);
3311 break; 3734 break;
3312 } else 3735 } else
3313 ++*pos; 3736 ++*pos;
3314 return rc; 3737 return sd;
3315} 3738}
3316 3739
3317static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) 3740static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
@@ -3331,12 +3754,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
3331 3754
3332static int softnet_seq_show(struct seq_file *seq, void *v) 3755static int softnet_seq_show(struct seq_file *seq, void *v)
3333{ 3756{
3334 struct netif_rx_stats *s = v; 3757 struct softnet_data *sd = v;
3335 3758
3336 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 3759 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3337 s->total, s->dropped, s->time_squeeze, 0, 3760 sd->processed, sd->dropped, sd->time_squeeze, 0,
3338 0, 0, 0, 0, /* was fastroute */ 3761 0, 0, 0, 0, /* was fastroute */
3339 s->cpu_collision); 3762 sd->cpu_collision, sd->received_rps);
3340 return 0; 3763 return 0;
3341} 3764}
3342 3765
@@ -3559,11 +3982,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
3559 3982
3560 slave->master = master; 3983 slave->master = master;
3561 3984
3562 synchronize_net(); 3985 if (old) {
3563 3986 synchronize_net();
3564 if (old)
3565 dev_put(old); 3987 dev_put(old);
3566 3988 }
3567 if (master) 3989 if (master)
3568 slave->flags |= IFF_SLAVE; 3990 slave->flags |= IFF_SLAVE;
3569 else 3991 else
@@ -3740,562 +4162,6 @@ void dev_set_rx_mode(struct net_device *dev)
3740 netif_addr_unlock_bh(dev); 4162 netif_addr_unlock_bh(dev);
3741} 4163}
3742 4164
3743/* hw addresses list handling functions */
3744
3745static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3746 int addr_len, unsigned char addr_type)
3747{
3748 struct netdev_hw_addr *ha;
3749 int alloc_size;
3750
3751 if (addr_len > MAX_ADDR_LEN)
3752 return -EINVAL;
3753
3754 list_for_each_entry(ha, &list->list, list) {
3755 if (!memcmp(ha->addr, addr, addr_len) &&
3756 ha->type == addr_type) {
3757 ha->refcount++;
3758 return 0;
3759 }
3760 }
3761
3762
3763 alloc_size = sizeof(*ha);
3764 if (alloc_size < L1_CACHE_BYTES)
3765 alloc_size = L1_CACHE_BYTES;
3766 ha = kmalloc(alloc_size, GFP_ATOMIC);
3767 if (!ha)
3768 return -ENOMEM;
3769 memcpy(ha->addr, addr, addr_len);
3770 ha->type = addr_type;
3771 ha->refcount = 1;
3772 ha->synced = false;
3773 list_add_tail_rcu(&ha->list, &list->list);
3774 list->count++;
3775 return 0;
3776}
3777
3778static void ha_rcu_free(struct rcu_head *head)
3779{
3780 struct netdev_hw_addr *ha;
3781
3782 ha = container_of(head, struct netdev_hw_addr, rcu_head);
3783 kfree(ha);
3784}
3785
3786static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3787 int addr_len, unsigned char addr_type)
3788{
3789 struct netdev_hw_addr *ha;
3790
3791 list_for_each_entry(ha, &list->list, list) {
3792 if (!memcmp(ha->addr, addr, addr_len) &&
3793 (ha->type == addr_type || !addr_type)) {
3794 if (--ha->refcount)
3795 return 0;
3796 list_del_rcu(&ha->list);
3797 call_rcu(&ha->rcu_head, ha_rcu_free);
3798 list->count--;
3799 return 0;
3800 }
3801 }
3802 return -ENOENT;
3803}
3804
3805static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3806 struct netdev_hw_addr_list *from_list,
3807 int addr_len,
3808 unsigned char addr_type)
3809{
3810 int err;
3811 struct netdev_hw_addr *ha, *ha2;
3812 unsigned char type;
3813
3814 list_for_each_entry(ha, &from_list->list, list) {
3815 type = addr_type ? addr_type : ha->type;
3816 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3817 if (err)
3818 goto unroll;
3819 }
3820 return 0;
3821
3822unroll:
3823 list_for_each_entry(ha2, &from_list->list, list) {
3824 if (ha2 == ha)
3825 break;
3826 type = addr_type ? addr_type : ha2->type;
3827 __hw_addr_del(to_list, ha2->addr, addr_len, type);
3828 }
3829 return err;
3830}
3831
3832static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3833 struct netdev_hw_addr_list *from_list,
3834 int addr_len,
3835 unsigned char addr_type)
3836{
3837 struct netdev_hw_addr *ha;
3838 unsigned char type;
3839
3840 list_for_each_entry(ha, &from_list->list, list) {
3841 type = addr_type ? addr_type : ha->type;
3842 __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
3843 }
3844}
3845
3846static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3847 struct netdev_hw_addr_list *from_list,
3848 int addr_len)
3849{
3850 int err = 0;
3851 struct netdev_hw_addr *ha, *tmp;
3852
3853 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3854 if (!ha->synced) {
3855 err = __hw_addr_add(to_list, ha->addr,
3856 addr_len, ha->type);
3857 if (err)
3858 break;
3859 ha->synced = true;
3860 ha->refcount++;
3861 } else if (ha->refcount == 1) {
3862 __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3863 __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
3864 }
3865 }
3866 return err;
3867}
3868
3869static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3870 struct netdev_hw_addr_list *from_list,
3871 int addr_len)
3872{
3873 struct netdev_hw_addr *ha, *tmp;
3874
3875 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3876 if (ha->synced) {
3877 __hw_addr_del(to_list, ha->addr,
3878 addr_len, ha->type);
3879 ha->synced = false;
3880 __hw_addr_del(from_list, ha->addr,
3881 addr_len, ha->type);
3882 }
3883 }
3884}
3885
3886static void __hw_addr_flush(struct netdev_hw_addr_list *list)
3887{
3888 struct netdev_hw_addr *ha, *tmp;
3889
3890 list_for_each_entry_safe(ha, tmp, &list->list, list) {
3891 list_del_rcu(&ha->list);
3892 call_rcu(&ha->rcu_head, ha_rcu_free);
3893 }
3894 list->count = 0;
3895}
3896
3897static void __hw_addr_init(struct netdev_hw_addr_list *list)
3898{
3899 INIT_LIST_HEAD(&list->list);
3900 list->count = 0;
3901}
3902
3903/* Device addresses handling functions */
3904
3905static void dev_addr_flush(struct net_device *dev)
3906{
3907 /* rtnl_mutex must be held here */
3908
3909 __hw_addr_flush(&dev->dev_addrs);
3910 dev->dev_addr = NULL;
3911}
3912
3913static int dev_addr_init(struct net_device *dev)
3914{
3915 unsigned char addr[MAX_ADDR_LEN];
3916 struct netdev_hw_addr *ha;
3917 int err;
3918
3919 /* rtnl_mutex must be held here */
3920
3921 __hw_addr_init(&dev->dev_addrs);
3922 memset(addr, 0, sizeof(addr));
3923 err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
3924 NETDEV_HW_ADDR_T_LAN);
3925 if (!err) {
3926 /*
3927 * Get the first (previously created) address from the list
3928 * and set dev_addr pointer to this location.
3929 */
3930 ha = list_first_entry(&dev->dev_addrs.list,
3931 struct netdev_hw_addr, list);
3932 dev->dev_addr = ha->addr;
3933 }
3934 return err;
3935}
3936
3937/**
3938 * dev_addr_add - Add a device address
3939 * @dev: device
3940 * @addr: address to add
3941 * @addr_type: address type
3942 *
3943 * Add a device address to the device or increase the reference count if
3944 * it already exists.
3945 *
3946 * The caller must hold the rtnl_mutex.
3947 */
3948int dev_addr_add(struct net_device *dev, unsigned char *addr,
3949 unsigned char addr_type)
3950{
3951 int err;
3952
3953 ASSERT_RTNL();
3954
3955 err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
3956 if (!err)
3957 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3958 return err;
3959}
3960EXPORT_SYMBOL(dev_addr_add);
3961
3962/**
3963 * dev_addr_del - Release a device address.
3964 * @dev: device
3965 * @addr: address to delete
3966 * @addr_type: address type
3967 *
3968 * Release reference to a device address and remove it from the device
3969 * if the reference count drops to zero.
3970 *
3971 * The caller must hold the rtnl_mutex.
3972 */
3973int dev_addr_del(struct net_device *dev, unsigned char *addr,
3974 unsigned char addr_type)
3975{
3976 int err;
3977 struct netdev_hw_addr *ha;
3978
3979 ASSERT_RTNL();
3980
3981 /*
3982 * We can not remove the first address from the list because
3983 * dev->dev_addr points to that.
3984 */
3985 ha = list_first_entry(&dev->dev_addrs.list,
3986 struct netdev_hw_addr, list);
3987 if (ha->addr == dev->dev_addr && ha->refcount == 1)
3988 return -ENOENT;
3989
3990 err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
3991 addr_type);
3992 if (!err)
3993 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3994 return err;
3995}
3996EXPORT_SYMBOL(dev_addr_del);
3997
3998/**
3999 * dev_addr_add_multiple - Add device addresses from another device
4000 * @to_dev: device to which addresses will be added
4001 * @from_dev: device from which addresses will be added
4002 * @addr_type: address type - 0 means type will be used from from_dev
4003 *
4004 * Add device addresses of the one device to another.
4005 **
4006 * The caller must hold the rtnl_mutex.
4007 */
4008int dev_addr_add_multiple(struct net_device *to_dev,
4009 struct net_device *from_dev,
4010 unsigned char addr_type)
4011{
4012 int err;
4013
4014 ASSERT_RTNL();
4015
4016 if (from_dev->addr_len != to_dev->addr_len)
4017 return -EINVAL;
4018 err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
4019 to_dev->addr_len, addr_type);
4020 if (!err)
4021 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
4022 return err;
4023}
4024EXPORT_SYMBOL(dev_addr_add_multiple);
4025
4026/**
4027 * dev_addr_del_multiple - Delete device addresses by another device
4028 * @to_dev: device where the addresses will be deleted
4029 * @from_dev: device by which addresses the addresses will be deleted
4030 * @addr_type: address type - 0 means type will used from from_dev
4031 *
4032 * Deletes addresses in to device by the list of addresses in from device.
4033 *
4034 * The caller must hold the rtnl_mutex.
4035 */
4036int dev_addr_del_multiple(struct net_device *to_dev,
4037 struct net_device *from_dev,
4038 unsigned char addr_type)
4039{
4040 ASSERT_RTNL();
4041
4042 if (from_dev->addr_len != to_dev->addr_len)
4043 return -EINVAL;
4044 __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
4045 to_dev->addr_len, addr_type);
4046 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
4047 return 0;
4048}
4049EXPORT_SYMBOL(dev_addr_del_multiple);
4050
4051/* multicast addresses handling functions */
4052
4053int __dev_addr_delete(struct dev_addr_list **list, int *count,
4054 void *addr, int alen, int glbl)
4055{
4056 struct dev_addr_list *da;
4057
4058 for (; (da = *list) != NULL; list = &da->next) {
4059 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4060 alen == da->da_addrlen) {
4061 if (glbl) {
4062 int old_glbl = da->da_gusers;
4063 da->da_gusers = 0;
4064 if (old_glbl == 0)
4065 break;
4066 }
4067 if (--da->da_users)
4068 return 0;
4069
4070 *list = da->next;
4071 kfree(da);
4072 (*count)--;
4073 return 0;
4074 }
4075 }
4076 return -ENOENT;
4077}
4078
4079int __dev_addr_add(struct dev_addr_list **list, int *count,
4080 void *addr, int alen, int glbl)
4081{
4082 struct dev_addr_list *da;
4083
4084 for (da = *list; da != NULL; da = da->next) {
4085 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4086 da->da_addrlen == alen) {
4087 if (glbl) {
4088 int old_glbl = da->da_gusers;
4089 da->da_gusers = 1;
4090 if (old_glbl)
4091 return 0;
4092 }
4093 da->da_users++;
4094 return 0;
4095 }
4096 }
4097
4098 da = kzalloc(sizeof(*da), GFP_ATOMIC);
4099 if (da == NULL)
4100 return -ENOMEM;
4101 memcpy(da->da_addr, addr, alen);
4102 da->da_addrlen = alen;
4103 da->da_users = 1;
4104 da->da_gusers = glbl ? 1 : 0;
4105 da->next = *list;
4106 *list = da;
4107 (*count)++;
4108 return 0;
4109}
4110
4111/**
4112 * dev_unicast_delete - Release secondary unicast address.
4113 * @dev: device
4114 * @addr: address to delete
4115 *
4116 * Release reference to a secondary unicast address and remove it
4117 * from the device if the reference count drops to zero.
4118 *
4119 * The caller must hold the rtnl_mutex.
4120 */
4121int dev_unicast_delete(struct net_device *dev, void *addr)
4122{
4123 int err;
4124
4125 ASSERT_RTNL();
4126
4127 netif_addr_lock_bh(dev);
4128 err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
4129 NETDEV_HW_ADDR_T_UNICAST);
4130 if (!err)
4131 __dev_set_rx_mode(dev);
4132 netif_addr_unlock_bh(dev);
4133 return err;
4134}
4135EXPORT_SYMBOL(dev_unicast_delete);
4136
4137/**
4138 * dev_unicast_add - add a secondary unicast address
4139 * @dev: device
4140 * @addr: address to add
4141 *
4142 * Add a secondary unicast address to the device or increase
4143 * the reference count if it already exists.
4144 *
4145 * The caller must hold the rtnl_mutex.
4146 */
4147int dev_unicast_add(struct net_device *dev, void *addr)
4148{
4149 int err;
4150
4151 ASSERT_RTNL();
4152
4153 netif_addr_lock_bh(dev);
4154 err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
4155 NETDEV_HW_ADDR_T_UNICAST);
4156 if (!err)
4157 __dev_set_rx_mode(dev);
4158 netif_addr_unlock_bh(dev);
4159 return err;
4160}
4161EXPORT_SYMBOL(dev_unicast_add);
4162
4163int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
4164 struct dev_addr_list **from, int *from_count)
4165{
4166 struct dev_addr_list *da, *next;
4167 int err = 0;
4168
4169 da = *from;
4170 while (da != NULL) {
4171 next = da->next;
4172 if (!da->da_synced) {
4173 err = __dev_addr_add(to, to_count,
4174 da->da_addr, da->da_addrlen, 0);
4175 if (err < 0)
4176 break;
4177 da->da_synced = 1;
4178 da->da_users++;
4179 } else if (da->da_users == 1) {
4180 __dev_addr_delete(to, to_count,
4181 da->da_addr, da->da_addrlen, 0);
4182 __dev_addr_delete(from, from_count,
4183 da->da_addr, da->da_addrlen, 0);
4184 }
4185 da = next;
4186 }
4187 return err;
4188}
4189EXPORT_SYMBOL_GPL(__dev_addr_sync);
4190
4191void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
4192 struct dev_addr_list **from, int *from_count)
4193{
4194 struct dev_addr_list *da, *next;
4195
4196 da = *from;
4197 while (da != NULL) {
4198 next = da->next;
4199 if (da->da_synced) {
4200 __dev_addr_delete(to, to_count,
4201 da->da_addr, da->da_addrlen, 0);
4202 da->da_synced = 0;
4203 __dev_addr_delete(from, from_count,
4204 da->da_addr, da->da_addrlen, 0);
4205 }
4206 da = next;
4207 }
4208}
4209EXPORT_SYMBOL_GPL(__dev_addr_unsync);
4210
4211/**
4212 * dev_unicast_sync - Synchronize device's unicast list to another device
4213 * @to: destination device
4214 * @from: source device
4215 *
4216 * Add newly added addresses to the destination device and release
4217 * addresses that have no users left. The source device must be
4218 * locked by netif_tx_lock_bh.
4219 *
4220 * This function is intended to be called from the dev->set_rx_mode
4221 * function of layered software devices.
4222 */
4223int dev_unicast_sync(struct net_device *to, struct net_device *from)
4224{
4225 int err = 0;
4226
4227 if (to->addr_len != from->addr_len)
4228 return -EINVAL;
4229
4230 netif_addr_lock_bh(to);
4231 err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
4232 if (!err)
4233 __dev_set_rx_mode(to);
4234 netif_addr_unlock_bh(to);
4235 return err;
4236}
4237EXPORT_SYMBOL(dev_unicast_sync);
4238
4239/**
4240 * dev_unicast_unsync - Remove synchronized addresses from the destination device
4241 * @to: destination device
4242 * @from: source device
4243 *
4244 * Remove all addresses that were added to the destination device by
4245 * dev_unicast_sync(). This function is intended to be called from the
4246 * dev->stop function of layered software devices.
4247 */
4248void dev_unicast_unsync(struct net_device *to, struct net_device *from)
4249{
4250 if (to->addr_len != from->addr_len)
4251 return;
4252
4253 netif_addr_lock_bh(from);
4254 netif_addr_lock(to);
4255 __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
4256 __dev_set_rx_mode(to);
4257 netif_addr_unlock(to);
4258 netif_addr_unlock_bh(from);
4259}
4260EXPORT_SYMBOL(dev_unicast_unsync);
4261
4262static void dev_unicast_flush(struct net_device *dev)
4263{
4264 netif_addr_lock_bh(dev);
4265 __hw_addr_flush(&dev->uc);
4266 netif_addr_unlock_bh(dev);
4267}
4268
4269static void dev_unicast_init(struct net_device *dev)
4270{
4271 __hw_addr_init(&dev->uc);
4272}
4273
4274
4275static void __dev_addr_discard(struct dev_addr_list **list)
4276{
4277 struct dev_addr_list *tmp;
4278
4279 while (*list != NULL) {
4280 tmp = *list;
4281 *list = tmp->next;
4282 if (tmp->da_users > tmp->da_gusers)
4283 printk("__dev_addr_discard: address leakage! "
4284 "da_users=%d\n", tmp->da_users);
4285 kfree(tmp);
4286 }
4287}
4288
4289static void dev_addr_discard(struct net_device *dev)
4290{
4291 netif_addr_lock_bh(dev);
4292
4293 __dev_addr_discard(&dev->mc_list);
4294 netdev_mc_count(dev) = 0;
4295
4296 netif_addr_unlock_bh(dev);
4297}
4298
4299/** 4165/**
4300 * dev_get_flags - get flags reported to userspace 4166 * dev_get_flags - get flags reported to userspace
4301 * @dev: device 4167 * @dev: device
@@ -4606,8 +4472,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4606 return -EINVAL; 4472 return -EINVAL;
4607 if (!netif_device_present(dev)) 4473 if (!netif_device_present(dev))
4608 return -ENODEV; 4474 return -ENODEV;
4609 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, 4475 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
4610 dev->addr_len, 1);
4611 4476
4612 case SIOCDELMULTI: 4477 case SIOCDELMULTI:
4613 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || 4478 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
@@ -4615,8 +4480,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4615 return -EINVAL; 4480 return -EINVAL;
4616 if (!netif_device_present(dev)) 4481 if (!netif_device_present(dev))
4617 return -ENODEV; 4482 return -ENODEV;
4618 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, 4483 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
4619 dev->addr_len, 1);
4620 4484
4621 case SIOCSIFTXQLEN: 4485 case SIOCSIFTXQLEN:
4622 if (ifr->ifr_qlen < 0) 4486 if (ifr->ifr_qlen < 0)
@@ -4923,8 +4787,8 @@ static void rollback_registered_many(struct list_head *head)
4923 /* 4787 /*
4924 * Flush the unicast and multicast chains 4788 * Flush the unicast and multicast chains
4925 */ 4789 */
4926 dev_unicast_flush(dev); 4790 dev_uc_flush(dev);
4927 dev_addr_discard(dev); 4791 dev_mc_flush(dev);
4928 4792
4929 if (dev->netdev_ops->ndo_uninit) 4793 if (dev->netdev_ops->ndo_uninit)
4930 dev->netdev_ops->ndo_uninit(dev); 4794 dev->netdev_ops->ndo_uninit(dev);
@@ -5073,6 +4937,24 @@ int register_netdevice(struct net_device *dev)
5073 4937
5074 dev->iflink = -1; 4938 dev->iflink = -1;
5075 4939
4940#ifdef CONFIG_RPS
4941 if (!dev->num_rx_queues) {
4942 /*
4943 * Allocate a single RX queue if driver never called
4944 * alloc_netdev_mq
4945 */
4946
4947 dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
4948 if (!dev->_rx) {
4949 ret = -ENOMEM;
4950 goto out;
4951 }
4952
4953 dev->_rx->first = dev->_rx;
4954 atomic_set(&dev->_rx->count, 1);
4955 dev->num_rx_queues = 1;
4956 }
4957#endif
5076 /* Init, if this function is available */ 4958 /* Init, if this function is available */
5077 if (dev->netdev_ops->ndo_init) { 4959 if (dev->netdev_ops->ndo_init) {
5078 ret = dev->netdev_ops->ndo_init(dev); 4960 ret = dev->netdev_ops->ndo_init(dev);
@@ -5433,6 +5315,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5433 struct net_device *dev; 5315 struct net_device *dev;
5434 size_t alloc_size; 5316 size_t alloc_size;
5435 struct net_device *p; 5317 struct net_device *p;
5318#ifdef CONFIG_RPS
5319 struct netdev_rx_queue *rx;
5320 int i;
5321#endif
5436 5322
5437 BUG_ON(strlen(name) >= sizeof(dev->name)); 5323 BUG_ON(strlen(name) >= sizeof(dev->name));
5438 5324
@@ -5458,13 +5344,32 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5458 goto free_p; 5344 goto free_p;
5459 } 5345 }
5460 5346
5347#ifdef CONFIG_RPS
5348 rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5349 if (!rx) {
5350 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5351 "rx queues.\n");
5352 goto free_tx;
5353 }
5354
5355 atomic_set(&rx->count, queue_count);
5356
5357 /*
5358 * Set a pointer to first element in the array which holds the
5359 * reference count.
5360 */
5361 for (i = 0; i < queue_count; i++)
5362 rx[i].first = rx;
5363#endif
5364
5461 dev = PTR_ALIGN(p, NETDEV_ALIGN); 5365 dev = PTR_ALIGN(p, NETDEV_ALIGN);
5462 dev->padded = (char *)dev - (char *)p; 5366 dev->padded = (char *)dev - (char *)p;
5463 5367
5464 if (dev_addr_init(dev)) 5368 if (dev_addr_init(dev))
5465 goto free_tx; 5369 goto free_rx;
5466 5370
5467 dev_unicast_init(dev); 5371 dev_mc_init(dev);
5372 dev_uc_init(dev);
5468 5373
5469 dev_net_set(dev, &init_net); 5374 dev_net_set(dev, &init_net);
5470 5375
@@ -5472,6 +5377,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5472 dev->num_tx_queues = queue_count; 5377 dev->num_tx_queues = queue_count;
5473 dev->real_num_tx_queues = queue_count; 5378 dev->real_num_tx_queues = queue_count;
5474 5379
5380#ifdef CONFIG_RPS
5381 dev->_rx = rx;
5382 dev->num_rx_queues = queue_count;
5383#endif
5384
5475 dev->gso_max_size = GSO_MAX_SIZE; 5385 dev->gso_max_size = GSO_MAX_SIZE;
5476 5386
5477 netdev_init_queues(dev); 5387 netdev_init_queues(dev);
@@ -5486,9 +5396,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5486 strcpy(dev->name, name); 5396 strcpy(dev->name, name);
5487 return dev; 5397 return dev;
5488 5398
5399free_rx:
5400#ifdef CONFIG_RPS
5401 kfree(rx);
5489free_tx: 5402free_tx:
5403#endif
5490 kfree(tx); 5404 kfree(tx);
5491
5492free_p: 5405free_p:
5493 kfree(p); 5406 kfree(p);
5494 return NULL; 5407 return NULL;
@@ -5690,8 +5603,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5690 /* 5603 /*
5691 * Flush the unicast and multicast chains 5604 * Flush the unicast and multicast chains
5692 */ 5605 */
5693 dev_unicast_flush(dev); 5606 dev_uc_flush(dev);
5694 dev_addr_discard(dev); 5607 dev_mc_flush(dev);
5695 5608
5696 netdev_unregister_kobject(dev); 5609 netdev_unregister_kobject(dev);
5697 5610
@@ -5734,7 +5647,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
5734 void *ocpu) 5647 void *ocpu)
5735{ 5648{
5736 struct sk_buff **list_skb; 5649 struct sk_buff **list_skb;
5737 struct Qdisc **list_net;
5738 struct sk_buff *skb; 5650 struct sk_buff *skb;
5739 unsigned int cpu, oldcpu = (unsigned long)ocpu; 5651 unsigned int cpu, oldcpu = (unsigned long)ocpu;
5740 struct softnet_data *sd, *oldsd; 5652 struct softnet_data *sd, *oldsd;
@@ -5755,19 +5667,23 @@ static int dev_cpu_callback(struct notifier_block *nfb,
5755 *list_skb = oldsd->completion_queue; 5667 *list_skb = oldsd->completion_queue;
5756 oldsd->completion_queue = NULL; 5668 oldsd->completion_queue = NULL;
5757 5669
5758 /* Find end of our output_queue. */
5759 list_net = &sd->output_queue;
5760 while (*list_net)
5761 list_net = &(*list_net)->next_sched;
5762 /* Append output queue from offline CPU. */ 5670 /* Append output queue from offline CPU. */
5763 *list_net = oldsd->output_queue; 5671 if (oldsd->output_queue) {
5764 oldsd->output_queue = NULL; 5672 *sd->output_queue_tailp = oldsd->output_queue;
5673 sd->output_queue_tailp = oldsd->output_queue_tailp;
5674 oldsd->output_queue = NULL;
5675 oldsd->output_queue_tailp = &oldsd->output_queue;
5676 }
5765 5677
5766 raise_softirq_irqoff(NET_TX_SOFTIRQ); 5678 raise_softirq_irqoff(NET_TX_SOFTIRQ);
5767 local_irq_enable(); 5679 local_irq_enable();
5768 5680
5769 /* Process offline CPU's input_pkt_queue */ 5681 /* Process offline CPU's input_pkt_queue */
5770 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) 5682 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
5683 netif_rx(skb);
5684 input_queue_head_add(oldsd, 1);
5685 }
5686 while ((skb = __skb_dequeue(&oldsd->process_queue)))
5771 netif_rx(skb); 5687 netif_rx(skb);
5772 5688
5773 return NOTIFY_OK; 5689 return NOTIFY_OK;
@@ -5984,17 +5900,26 @@ static int __init net_dev_init(void)
5984 */ 5900 */
5985 5901
5986 for_each_possible_cpu(i) { 5902 for_each_possible_cpu(i) {
5987 struct softnet_data *queue; 5903 struct softnet_data *sd = &per_cpu(softnet_data, i);
5988 5904
5989 queue = &per_cpu(softnet_data, i); 5905 memset(sd, 0, sizeof(*sd));
5990 skb_queue_head_init(&queue->input_pkt_queue); 5906 skb_queue_head_init(&sd->input_pkt_queue);
5991 queue->completion_queue = NULL; 5907 skb_queue_head_init(&sd->process_queue);
5992 INIT_LIST_HEAD(&queue->poll_list); 5908 sd->completion_queue = NULL;
5909 INIT_LIST_HEAD(&sd->poll_list);
5910 sd->output_queue = NULL;
5911 sd->output_queue_tailp = &sd->output_queue;
5912#ifdef CONFIG_RPS
5913 sd->csd.func = rps_trigger_softirq;
5914 sd->csd.info = sd;
5915 sd->csd.flags = 0;
5916 sd->cpu = i;
5917#endif
5993 5918
5994 queue->backlog.poll = process_backlog; 5919 sd->backlog.poll = process_backlog;
5995 queue->backlog.weight = weight_p; 5920 sd->backlog.weight = weight_p;
5996 queue->backlog.gro_list = NULL; 5921 sd->backlog.gro_list = NULL;
5997 queue->backlog.gro_count = 0; 5922 sd->backlog.gro_count = 0;
5998 } 5923 }
5999 5924
6000 dev_boot_phase = 0; 5925 dev_boot_phase = 0;
@@ -6029,7 +5954,7 @@ subsys_initcall(net_dev_init);
6029 5954
6030static int __init initialize_hashrnd(void) 5955static int __init initialize_hashrnd(void)
6031{ 5956{
6032 get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); 5957 get_random_bytes(&hashrnd, sizeof(hashrnd));
6033 return 0; 5958 return 0;
6034} 5959}
6035 5960