aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c613
1 files changed, 369 insertions, 244 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 7ec85e27bee..35dfb831848 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -131,6 +131,7 @@
131#include <trace/events/net.h> 131#include <trace/events/net.h>
132#include <trace/events/skb.h> 132#include <trace/events/skb.h>
133#include <linux/pci.h> 133#include <linux/pci.h>
134#include <linux/inetdevice.h>
134 135
135#include "net-sysfs.h" 136#include "net-sysfs.h"
136 137
@@ -373,6 +374,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
373 * --ANK (980803) 374 * --ANK (980803)
374 */ 375 */
375 376
377static inline struct list_head *ptype_head(const struct packet_type *pt)
378{
379 if (pt->type == htons(ETH_P_ALL))
380 return &ptype_all;
381 else
382 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
383}
384
376/** 385/**
377 * dev_add_pack - add packet handler 386 * dev_add_pack - add packet handler
378 * @pt: packet type declaration 387 * @pt: packet type declaration
@@ -388,16 +397,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
388 397
389void dev_add_pack(struct packet_type *pt) 398void dev_add_pack(struct packet_type *pt)
390{ 399{
391 int hash; 400 struct list_head *head = ptype_head(pt);
392 401
393 spin_lock_bh(&ptype_lock); 402 spin_lock(&ptype_lock);
394 if (pt->type == htons(ETH_P_ALL)) 403 list_add_rcu(&pt->list, head);
395 list_add_rcu(&pt->list, &ptype_all); 404 spin_unlock(&ptype_lock);
396 else {
397 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
398 list_add_rcu(&pt->list, &ptype_base[hash]);
399 }
400 spin_unlock_bh(&ptype_lock);
401} 405}
402EXPORT_SYMBOL(dev_add_pack); 406EXPORT_SYMBOL(dev_add_pack);
403 407
@@ -416,15 +420,10 @@ EXPORT_SYMBOL(dev_add_pack);
416 */ 420 */
417void __dev_remove_pack(struct packet_type *pt) 421void __dev_remove_pack(struct packet_type *pt)
418{ 422{
419 struct list_head *head; 423 struct list_head *head = ptype_head(pt);
420 struct packet_type *pt1; 424 struct packet_type *pt1;
421 425
422 spin_lock_bh(&ptype_lock); 426 spin_lock(&ptype_lock);
423
424 if (pt->type == htons(ETH_P_ALL))
425 head = &ptype_all;
426 else
427 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
428 427
429 list_for_each_entry(pt1, head, list) { 428 list_for_each_entry(pt1, head, list) {
430 if (pt == pt1) { 429 if (pt == pt1) {
@@ -435,7 +434,7 @@ void __dev_remove_pack(struct packet_type *pt)
435 434
436 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); 435 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
437out: 436out:
438 spin_unlock_bh(&ptype_lock); 437 spin_unlock(&ptype_lock);
439} 438}
440EXPORT_SYMBOL(__dev_remove_pack); 439EXPORT_SYMBOL(__dev_remove_pack);
441 440
@@ -1486,8 +1485,9 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1486 skb_orphan(skb); 1485 skb_orphan(skb);
1487 nf_reset(skb); 1486 nf_reset(skb);
1488 1487
1489 if (!(dev->flags & IFF_UP) || 1488 if (unlikely(!(dev->flags & IFF_UP) ||
1490 (skb->len > (dev->mtu + dev->hard_header_len))) { 1489 (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
1490 atomic_long_inc(&dev->rx_dropped);
1491 kfree_skb(skb); 1491 kfree_skb(skb);
1492 return NET_RX_DROP; 1492 return NET_RX_DROP;
1493 } 1493 }
@@ -1555,21 +1555,56 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1555 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 1555 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1556 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 1556 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1557 */ 1557 */
1558void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) 1558int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1559{ 1559{
1560 unsigned int real_num = dev->real_num_tx_queues; 1560 if (txq < 1 || txq > dev->num_tx_queues)
1561 return -EINVAL;
1561 1562
1562 if (unlikely(txq > dev->num_tx_queues)) 1563 if (dev->reg_state == NETREG_REGISTERED) {
1563 ; 1564 ASSERT_RTNL();
1564 else if (txq > real_num) 1565
1565 dev->real_num_tx_queues = txq; 1566 if (txq < dev->real_num_tx_queues)
1566 else if (txq < real_num) { 1567 qdisc_reset_all_tx_gt(dev, txq);
1567 dev->real_num_tx_queues = txq;
1568 qdisc_reset_all_tx_gt(dev, txq);
1569 } 1568 }
1569
1570 dev->real_num_tx_queues = txq;
1571 return 0;
1570} 1572}
1571EXPORT_SYMBOL(netif_set_real_num_tx_queues); 1573EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1572 1574
1575#ifdef CONFIG_RPS
1576/**
1577 * netif_set_real_num_rx_queues - set actual number of RX queues used
1578 * @dev: Network device
1579 * @rxq: Actual number of RX queues
1580 *
1581 * This must be called either with the rtnl_lock held or before
1582 * registration of the net device. Returns 0 on success, or a
1583 * negative error code. If called before registration, it always
1584 * succeeds.
1585 */
1586int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1587{
1588 int rc;
1589
1590 if (rxq < 1 || rxq > dev->num_rx_queues)
1591 return -EINVAL;
1592
1593 if (dev->reg_state == NETREG_REGISTERED) {
1594 ASSERT_RTNL();
1595
1596 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
1597 rxq);
1598 if (rc)
1599 return rc;
1600 }
1601
1602 dev->real_num_rx_queues = rxq;
1603 return 0;
1604}
1605EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1606#endif
1607
1573static inline void __netif_reschedule(struct Qdisc *q) 1608static inline void __netif_reschedule(struct Qdisc *q)
1574{ 1609{
1575 struct softnet_data *sd; 1610 struct softnet_data *sd;
@@ -1650,10 +1685,10 @@ EXPORT_SYMBOL(netif_device_attach);
1650 1685
1651static bool can_checksum_protocol(unsigned long features, __be16 protocol) 1686static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1652{ 1687{
1653 return ((features & NETIF_F_GEN_CSUM) || 1688 return ((features & NETIF_F_NO_CSUM) ||
1654 ((features & NETIF_F_IP_CSUM) && 1689 ((features & NETIF_F_V4_CSUM) &&
1655 protocol == htons(ETH_P_IP)) || 1690 protocol == htons(ETH_P_IP)) ||
1656 ((features & NETIF_F_IPV6_CSUM) && 1691 ((features & NETIF_F_V6_CSUM) &&
1657 protocol == htons(ETH_P_IPV6)) || 1692 protocol == htons(ETH_P_IPV6)) ||
1658 ((features & NETIF_F_FCOE_CRC) && 1693 ((features & NETIF_F_FCOE_CRC) &&
1659 protocol == htons(ETH_P_FCOE))); 1694 protocol == htons(ETH_P_FCOE)));
@@ -1661,17 +1696,18 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1661 1696
1662static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) 1697static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1663{ 1698{
1664 if (can_checksum_protocol(dev->features, skb->protocol)) 1699 __be16 protocol = skb->protocol;
1665 return true; 1700 int features = dev->features;
1666 1701
1667 if (skb->protocol == htons(ETH_P_8021Q)) { 1702 if (vlan_tx_tag_present(skb)) {
1703 features &= dev->vlan_features;
1704 } else if (protocol == htons(ETH_P_8021Q)) {
1668 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 1705 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1669 if (can_checksum_protocol(dev->features & dev->vlan_features, 1706 protocol = veh->h_vlan_encapsulated_proto;
1670 veh->h_vlan_encapsulated_proto)) 1707 features &= dev->vlan_features;
1671 return true;
1672 } 1708 }
1673 1709
1674 return false; 1710 return can_checksum_protocol(features, protocol);
1675} 1711}
1676 1712
1677/** 1713/**
@@ -1760,6 +1796,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1760 __be16 type = skb->protocol; 1796 __be16 type = skb->protocol;
1761 int err; 1797 int err;
1762 1798
1799 if (type == htons(ETH_P_8021Q)) {
1800 struct vlan_ethhdr *veh;
1801
1802 if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
1803 return ERR_PTR(-EINVAL);
1804
1805 veh = (struct vlan_ethhdr *)skb->data;
1806 type = veh->h_vlan_encapsulated_proto;
1807 }
1808
1763 skb_reset_mac_header(skb); 1809 skb_reset_mac_header(skb);
1764 skb->mac_len = skb->network_header - skb->mac_header; 1810 skb->mac_len = skb->network_header - skb->mac_header;
1765 __skb_pull(skb, skb->mac_len); 1811 __skb_pull(skb, skb->mac_len);
@@ -1904,14 +1950,14 @@ static int dev_gso_segment(struct sk_buff *skb)
1904 1950
1905/* 1951/*
1906 * Try to orphan skb early, right before transmission by the device. 1952 * Try to orphan skb early, right before transmission by the device.
1907 * We cannot orphan skb if tx timestamp is requested, since 1953 * We cannot orphan skb if tx timestamp is requested or the sk-reference
1908 * drivers need to call skb_tstamp_tx() to send the timestamp. 1954 * is needed on driver level for other reasons, e.g. see net/can/raw.c
1909 */ 1955 */
1910static inline void skb_orphan_try(struct sk_buff *skb) 1956static inline void skb_orphan_try(struct sk_buff *skb)
1911{ 1957{
1912 struct sock *sk = skb->sk; 1958 struct sock *sk = skb->sk;
1913 1959
1914 if (sk && !skb_tx(skb)->flags) { 1960 if (sk && !skb_shinfo(skb)->tx_flags) {
1915 /* skb_tx_hash() wont be able to get sk. 1961 /* skb_tx_hash() wont be able to get sk.
1916 * We copy sk_hash into skb->rxhash 1962 * We copy sk_hash into skb->rxhash
1917 */ 1963 */
@@ -1931,9 +1977,14 @@ static inline void skb_orphan_try(struct sk_buff *skb)
1931static inline int skb_needs_linearize(struct sk_buff *skb, 1977static inline int skb_needs_linearize(struct sk_buff *skb,
1932 struct net_device *dev) 1978 struct net_device *dev)
1933{ 1979{
1980 int features = dev->features;
1981
1982 if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
1983 features &= dev->vlan_features;
1984
1934 return skb_is_nonlinear(skb) && 1985 return skb_is_nonlinear(skb) &&
1935 ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || 1986 ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
1936 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || 1987 (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
1937 illegal_highdma(dev, skb)))); 1988 illegal_highdma(dev, skb))));
1938} 1989}
1939 1990
@@ -1956,6 +2007,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1956 2007
1957 skb_orphan_try(skb); 2008 skb_orphan_try(skb);
1958 2009
2010 if (vlan_tx_tag_present(skb) &&
2011 !(dev->features & NETIF_F_HW_VLAN_TX)) {
2012 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
2013 if (unlikely(!skb))
2014 goto out;
2015
2016 skb->vlan_tci = 0;
2017 }
2018
1959 if (netif_needs_gso(dev, skb)) { 2019 if (netif_needs_gso(dev, skb)) {
1960 if (unlikely(dev_gso_segment(skb))) 2020 if (unlikely(dev_gso_segment(skb)))
1961 goto out_kfree_skb; 2021 goto out_kfree_skb;
@@ -2019,6 +2079,7 @@ out_kfree_gso_skb:
2019 skb->destructor = DEV_GSO_CB(skb)->destructor; 2079 skb->destructor = DEV_GSO_CB(skb)->destructor;
2020out_kfree_skb: 2080out_kfree_skb:
2021 kfree_skb(skb); 2081 kfree_skb(skb);
2082out:
2022 return rc; 2083 return rc;
2023} 2084}
2024 2085
@@ -2147,6 +2208,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2147 return rc; 2208 return rc;
2148} 2209}
2149 2210
2211static DEFINE_PER_CPU(int, xmit_recursion);
2212#define RECURSION_LIMIT 10
2213
2150/** 2214/**
2151 * dev_queue_xmit - transmit a buffer 2215 * dev_queue_xmit - transmit a buffer
2152 * @skb: buffer to transmit 2216 * @skb: buffer to transmit
@@ -2213,10 +2277,15 @@ int dev_queue_xmit(struct sk_buff *skb)
2213 2277
2214 if (txq->xmit_lock_owner != cpu) { 2278 if (txq->xmit_lock_owner != cpu) {
2215 2279
2280 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2281 goto recursion_alert;
2282
2216 HARD_TX_LOCK(dev, txq, cpu); 2283 HARD_TX_LOCK(dev, txq, cpu);
2217 2284
2218 if (!netif_tx_queue_stopped(txq)) { 2285 if (!netif_tx_queue_stopped(txq)) {
2286 __this_cpu_inc(xmit_recursion);
2219 rc = dev_hard_start_xmit(skb, dev, txq); 2287 rc = dev_hard_start_xmit(skb, dev, txq);
2288 __this_cpu_dec(xmit_recursion);
2220 if (dev_xmit_complete(rc)) { 2289 if (dev_xmit_complete(rc)) {
2221 HARD_TX_UNLOCK(dev, txq); 2290 HARD_TX_UNLOCK(dev, txq);
2222 goto out; 2291 goto out;
@@ -2228,7 +2297,9 @@ int dev_queue_xmit(struct sk_buff *skb)
2228 "queue packet!\n", dev->name); 2297 "queue packet!\n", dev->name);
2229 } else { 2298 } else {
2230 /* Recursion is detected! It is possible, 2299 /* Recursion is detected! It is possible,
2231 * unfortunately */ 2300 * unfortunately
2301 */
2302recursion_alert:
2232 if (net_ratelimit()) 2303 if (net_ratelimit())
2233 printk(KERN_CRIT "Dead loop on virtual device " 2304 printk(KERN_CRIT "Dead loop on virtual device "
2234 "%s, fix it urgently!\n", dev->name); 2305 "%s, fix it urgently!\n", dev->name);
@@ -2264,69 +2335,44 @@ static inline void ____napi_schedule(struct softnet_data *sd,
2264 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2335 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2265} 2336}
2266 2337
2267#ifdef CONFIG_RPS
2268
2269/* One global table that all flow-based protocols share. */
2270struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2271EXPORT_SYMBOL(rps_sock_flow_table);
2272
2273/* 2338/*
2274 * get_rps_cpu is called from netif_receive_skb and returns the target 2339 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2275 * CPU from the RPS map of the receiving queue for a given skb. 2340 * and src/dst port numbers. Returns a non-zero hash number on success
2276 * rcu_read_lock must be held on entry. 2341 * and 0 on failure.
2277 */ 2342 */
2278static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2343__u32 __skb_get_rxhash(struct sk_buff *skb)
2279 struct rps_dev_flow **rflowp)
2280{ 2344{
2345 int nhoff, hash = 0, poff;
2281 struct ipv6hdr *ip6; 2346 struct ipv6hdr *ip6;
2282 struct iphdr *ip; 2347 struct iphdr *ip;
2283 struct netdev_rx_queue *rxqueue;
2284 struct rps_map *map;
2285 struct rps_dev_flow_table *flow_table;
2286 struct rps_sock_flow_table *sock_flow_table;
2287 int cpu = -1;
2288 u8 ip_proto; 2348 u8 ip_proto;
2289 u16 tcpu;
2290 u32 addr1, addr2, ihl; 2349 u32 addr1, addr2, ihl;
2291 union { 2350 union {
2292 u32 v32; 2351 u32 v32;
2293 u16 v16[2]; 2352 u16 v16[2];
2294 } ports; 2353 } ports;
2295 2354
2296 if (skb_rx_queue_recorded(skb)) { 2355 nhoff = skb_network_offset(skb);
2297 u16 index = skb_get_rx_queue(skb);
2298 if (unlikely(index >= dev->num_rx_queues)) {
2299 WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2300 "on queue %u, but number of RX queues is %u\n",
2301 dev->name, index, dev->num_rx_queues);
2302 goto done;
2303 }
2304 rxqueue = dev->_rx + index;
2305 } else
2306 rxqueue = dev->_rx;
2307
2308 if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2309 goto done;
2310
2311 if (skb->rxhash)
2312 goto got_hash; /* Skip hash computation on packet header */
2313 2356
2314 switch (skb->protocol) { 2357 switch (skb->protocol) {
2315 case __constant_htons(ETH_P_IP): 2358 case __constant_htons(ETH_P_IP):
2316 if (!pskb_may_pull(skb, sizeof(*ip))) 2359 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
2317 goto done; 2360 goto done;
2318 2361
2319 ip = (struct iphdr *) skb->data; 2362 ip = (struct iphdr *) (skb->data + nhoff);
2320 ip_proto = ip->protocol; 2363 if (ip->frag_off & htons(IP_MF | IP_OFFSET))
2364 ip_proto = 0;
2365 else
2366 ip_proto = ip->protocol;
2321 addr1 = (__force u32) ip->saddr; 2367 addr1 = (__force u32) ip->saddr;
2322 addr2 = (__force u32) ip->daddr; 2368 addr2 = (__force u32) ip->daddr;
2323 ihl = ip->ihl; 2369 ihl = ip->ihl;
2324 break; 2370 break;
2325 case __constant_htons(ETH_P_IPV6): 2371 case __constant_htons(ETH_P_IPV6):
2326 if (!pskb_may_pull(skb, sizeof(*ip6))) 2372 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
2327 goto done; 2373 goto done;
2328 2374
2329 ip6 = (struct ipv6hdr *) skb->data; 2375 ip6 = (struct ipv6hdr *) (skb->data + nhoff);
2330 ip_proto = ip6->nexthdr; 2376 ip_proto = ip6->nexthdr;
2331 addr1 = (__force u32) ip6->saddr.s6_addr32[3]; 2377 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2332 addr2 = (__force u32) ip6->daddr.s6_addr32[3]; 2378 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2335,33 +2381,81 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2335 default: 2381 default:
2336 goto done; 2382 goto done;
2337 } 2383 }
2338 switch (ip_proto) { 2384
2339 case IPPROTO_TCP: 2385 ports.v32 = 0;
2340 case IPPROTO_UDP: 2386 poff = proto_ports_offset(ip_proto);
2341 case IPPROTO_DCCP: 2387 if (poff >= 0) {
2342 case IPPROTO_ESP: 2388 nhoff += ihl * 4 + poff;
2343 case IPPROTO_AH: 2389 if (pskb_may_pull(skb, nhoff + 4)) {
2344 case IPPROTO_SCTP: 2390 ports.v32 = * (__force u32 *) (skb->data + nhoff);
2345 case IPPROTO_UDPLITE:
2346 if (pskb_may_pull(skb, (ihl * 4) + 4)) {
2347 ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
2348 if (ports.v16[1] < ports.v16[0]) 2391 if (ports.v16[1] < ports.v16[0])
2349 swap(ports.v16[0], ports.v16[1]); 2392 swap(ports.v16[0], ports.v16[1]);
2350 break;
2351 } 2393 }
2352 default:
2353 ports.v32 = 0;
2354 break;
2355 } 2394 }
2356 2395
2357 /* get a consistent hash (same value on both flow directions) */ 2396 /* get a consistent hash (same value on both flow directions) */
2358 if (addr2 < addr1) 2397 if (addr2 < addr1)
2359 swap(addr1, addr2); 2398 swap(addr1, addr2);
2360 skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2361 if (!skb->rxhash)
2362 skb->rxhash = 1;
2363 2399
2364got_hash: 2400 hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2401 if (!hash)
2402 hash = 1;
2403
2404done:
2405 return hash;
2406}
2407EXPORT_SYMBOL(__skb_get_rxhash);
2408
2409#ifdef CONFIG_RPS
2410
2411/* One global table that all flow-based protocols share. */
2412struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2413EXPORT_SYMBOL(rps_sock_flow_table);
2414
2415/*
2416 * get_rps_cpu is called from netif_receive_skb and returns the target
2417 * CPU from the RPS map of the receiving queue for a given skb.
2418 * rcu_read_lock must be held on entry.
2419 */
2420static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2421 struct rps_dev_flow **rflowp)
2422{
2423 struct netdev_rx_queue *rxqueue;
2424 struct rps_map *map;
2425 struct rps_dev_flow_table *flow_table;
2426 struct rps_sock_flow_table *sock_flow_table;
2427 int cpu = -1;
2428 u16 tcpu;
2429
2430 if (skb_rx_queue_recorded(skb)) {
2431 u16 index = skb_get_rx_queue(skb);
2432 if (unlikely(index >= dev->real_num_rx_queues)) {
2433 WARN_ONCE(dev->real_num_rx_queues > 1,
2434 "%s received packet on queue %u, but number "
2435 "of RX queues is %u\n",
2436 dev->name, index, dev->real_num_rx_queues);
2437 goto done;
2438 }
2439 rxqueue = dev->_rx + index;
2440 } else
2441 rxqueue = dev->_rx;
2442
2443 map = rcu_dereference(rxqueue->rps_map);
2444 if (map) {
2445 if (map->len == 1) {
2446 tcpu = map->cpus[0];
2447 if (cpu_online(tcpu))
2448 cpu = tcpu;
2449 goto done;
2450 }
2451 } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
2452 goto done;
2453 }
2454
2455 skb_reset_network_header(skb);
2456 if (!skb_get_rxhash(skb))
2457 goto done;
2458
2365 flow_table = rcu_dereference(rxqueue->rps_flow_table); 2459 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2366 sock_flow_table = rcu_dereference(rps_sock_flow_table); 2460 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2367 if (flow_table && sock_flow_table) { 2461 if (flow_table && sock_flow_table) {
@@ -2401,7 +2495,6 @@ got_hash:
2401 } 2495 }
2402 } 2496 }
2403 2497
2404 map = rcu_dereference(rxqueue->rps_map);
2405 if (map) { 2498 if (map) {
2406 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; 2499 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2407 2500
@@ -2487,6 +2580,7 @@ enqueue:
2487 2580
2488 local_irq_restore(flags); 2581 local_irq_restore(flags);
2489 2582
2583 atomic_long_inc(&skb->dev->rx_dropped);
2490 kfree_skb(skb); 2584 kfree_skb(skb);
2491 return NET_RX_DROP; 2585 return NET_RX_DROP;
2492} 2586}
@@ -2643,11 +2737,10 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2643 * the ingress scheduler, you just cant add policies on ingress. 2737 * the ingress scheduler, you just cant add policies on ingress.
2644 * 2738 *
2645 */ 2739 */
2646static int ing_filter(struct sk_buff *skb) 2740static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
2647{ 2741{
2648 struct net_device *dev = skb->dev; 2742 struct net_device *dev = skb->dev;
2649 u32 ttl = G_TC_RTTL(skb->tc_verd); 2743 u32 ttl = G_TC_RTTL(skb->tc_verd);
2650 struct netdev_queue *rxq;
2651 int result = TC_ACT_OK; 2744 int result = TC_ACT_OK;
2652 struct Qdisc *q; 2745 struct Qdisc *q;
2653 2746
@@ -2661,8 +2754,6 @@ static int ing_filter(struct sk_buff *skb)
2661 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); 2754 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2662 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); 2755 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2663 2756
2664 rxq = &dev->rx_queue;
2665
2666 q = rxq->qdisc; 2757 q = rxq->qdisc;
2667 if (q != &noop_qdisc) { 2758 if (q != &noop_qdisc) {
2668 spin_lock(qdisc_lock(q)); 2759 spin_lock(qdisc_lock(q));
@@ -2678,7 +2769,9 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2678 struct packet_type **pt_prev, 2769 struct packet_type **pt_prev,
2679 int *ret, struct net_device *orig_dev) 2770 int *ret, struct net_device *orig_dev)
2680{ 2771{
2681 if (skb->dev->rx_queue.qdisc == &noop_qdisc) 2772 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
2773
2774 if (!rxq || rxq->qdisc == &noop_qdisc)
2682 goto out; 2775 goto out;
2683 2776
2684 if (*pt_prev) { 2777 if (*pt_prev) {
@@ -2686,7 +2779,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2686 *pt_prev = NULL; 2779 *pt_prev = NULL;
2687 } 2780 }
2688 2781
2689 switch (ing_filter(skb)) { 2782 switch (ing_filter(skb, rxq)) {
2690 case TC_ACT_SHOT: 2783 case TC_ACT_SHOT:
2691 case TC_ACT_STOLEN: 2784 case TC_ACT_STOLEN:
2692 kfree_skb(skb); 2785 kfree_skb(skb);
@@ -2699,33 +2792,6 @@ out:
2699} 2792}
2700#endif 2793#endif
2701 2794
2702/*
2703 * netif_nit_deliver - deliver received packets to network taps
2704 * @skb: buffer
2705 *
2706 * This function is used to deliver incoming packets to network
2707 * taps. It should be used when the normal netif_receive_skb path
2708 * is bypassed, for example because of VLAN acceleration.
2709 */
2710void netif_nit_deliver(struct sk_buff *skb)
2711{
2712 struct packet_type *ptype;
2713
2714 if (list_empty(&ptype_all))
2715 return;
2716
2717 skb_reset_network_header(skb);
2718 skb_reset_transport_header(skb);
2719 skb->mac_len = skb->network_header - skb->mac_header;
2720
2721 rcu_read_lock();
2722 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2723 if (!ptype->dev || ptype->dev == skb->dev)
2724 deliver_skb(skb, ptype, skb->dev);
2725 }
2726 rcu_read_unlock();
2727}
2728
2729/** 2795/**
2730 * netdev_rx_handler_register - register receive handler 2796 * netdev_rx_handler_register - register receive handler
2731 * @dev: device to register a handler for 2797 * @dev: device to register a handler for
@@ -2836,8 +2902,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
2836 net_timestamp_check(skb); 2902 net_timestamp_check(skb);
2837 2903
2838 trace_netif_receive_skb(skb); 2904 trace_netif_receive_skb(skb);
2839 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2840 return NET_RX_SUCCESS;
2841 2905
2842 /* if we've gotten here through NAPI, check netpoll */ 2906 /* if we've gotten here through NAPI, check netpoll */
2843 if (netpoll_receive_skb(skb)) 2907 if (netpoll_receive_skb(skb))
@@ -2851,8 +2915,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
2851 * be delivered to pkt handlers that are exact matches. Also 2915 * be delivered to pkt handlers that are exact matches. Also
2852 * the deliver_no_wcard flag will be set. If packet handlers 2916 * the deliver_no_wcard flag will be set. If packet handlers
2853 * are sensitive to duplicate packets these skbs will need to 2917 * are sensitive to duplicate packets these skbs will need to
2854 * be dropped at the handler. The vlan accel path may have 2918 * be dropped at the handler.
2855 * already set the deliver_no_wcard flag.
2856 */ 2919 */
2857 null_or_orig = NULL; 2920 null_or_orig = NULL;
2858 orig_dev = skb->dev; 2921 orig_dev = skb->dev;
@@ -2911,6 +2974,18 @@ ncls:
2911 goto out; 2974 goto out;
2912 } 2975 }
2913 2976
2977 if (vlan_tx_tag_present(skb)) {
2978 if (pt_prev) {
2979 ret = deliver_skb(skb, pt_prev, orig_dev);
2980 pt_prev = NULL;
2981 }
2982 if (vlan_hwaccel_do_receive(&skb)) {
2983 ret = __netif_receive_skb(skb);
2984 goto out;
2985 } else if (unlikely(!skb))
2986 goto out;
2987 }
2988
2914 /* 2989 /*
2915 * Make sure frames received on VLAN interfaces stacked on 2990 * Make sure frames received on VLAN interfaces stacked on
2916 * bonding interfaces still make their way to any base bonding 2991 * bonding interfaces still make their way to any base bonding
@@ -2938,6 +3013,7 @@ ncls:
2938 if (pt_prev) { 3013 if (pt_prev) {
2939 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 3014 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2940 } else { 3015 } else {
3016 atomic_long_inc(&skb->dev->rx_dropped);
2941 kfree_skb(skb); 3017 kfree_skb(skb);
2942 /* Jamal, now you will not able to escape explaining 3018 /* Jamal, now you will not able to escape explaining
2943 * me how you were going to use this. :-) 3019 * me how you were going to use this. :-)
@@ -3058,7 +3134,7 @@ out:
3058 return netif_receive_skb(skb); 3134 return netif_receive_skb(skb);
3059} 3135}
3060 3136
3061static void napi_gro_flush(struct napi_struct *napi) 3137inline void napi_gro_flush(struct napi_struct *napi)
3062{ 3138{
3063 struct sk_buff *skb, *next; 3139 struct sk_buff *skb, *next;
3064 3140
@@ -3071,6 +3147,7 @@ static void napi_gro_flush(struct napi_struct *napi)
3071 napi->gro_count = 0; 3147 napi->gro_count = 0;
3072 napi->gro_list = NULL; 3148 napi->gro_list = NULL;
3073} 3149}
3150EXPORT_SYMBOL(napi_gro_flush);
3074 3151
3075enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3152enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3076{ 3153{
@@ -3085,7 +3162,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3085 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) 3162 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3086 goto normal; 3163 goto normal;
3087 3164
3088 if (skb_is_gso(skb) || skb_has_frags(skb)) 3165 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3089 goto normal; 3166 goto normal;
3090 3167
3091 rcu_read_lock(); 3168 rcu_read_lock();
@@ -3164,16 +3241,19 @@ normal:
3164} 3241}
3165EXPORT_SYMBOL(dev_gro_receive); 3242EXPORT_SYMBOL(dev_gro_receive);
3166 3243
3167static gro_result_t 3244static inline gro_result_t
3168__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3245__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3169{ 3246{
3170 struct sk_buff *p; 3247 struct sk_buff *p;
3171 3248
3172 for (p = napi->gro_list; p; p = p->next) { 3249 for (p = napi->gro_list; p; p = p->next) {
3173 NAPI_GRO_CB(p)->same_flow = 3250 unsigned long diffs;
3174 (p->dev == skb->dev) && 3251
3175 !compare_ether_header(skb_mac_header(p), 3252 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3253 diffs |= p->vlan_tci ^ skb->vlan_tci;
3254 diffs |= compare_ether_header(skb_mac_header(p),
3176 skb_gro_mac_header(skb)); 3255 skb_gro_mac_header(skb));
3256 NAPI_GRO_CB(p)->same_flow = !diffs;
3177 NAPI_GRO_CB(p)->flush = 0; 3257 NAPI_GRO_CB(p)->flush = 0;
3178 } 3258 }
3179 3259
@@ -3226,14 +3306,14 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3226} 3306}
3227EXPORT_SYMBOL(napi_gro_receive); 3307EXPORT_SYMBOL(napi_gro_receive);
3228 3308
3229void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) 3309static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3230{ 3310{
3231 __skb_pull(skb, skb_headlen(skb)); 3311 __skb_pull(skb, skb_headlen(skb));
3232 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); 3312 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
3313 skb->vlan_tci = 0;
3233 3314
3234 napi->skb = skb; 3315 napi->skb = skb;
3235} 3316}
3236EXPORT_SYMBOL(napi_reuse_skb);
3237 3317
3238struct sk_buff *napi_get_frags(struct napi_struct *napi) 3318struct sk_buff *napi_get_frags(struct napi_struct *napi)
3239{ 3319{
@@ -4867,21 +4947,6 @@ static void rollback_registered(struct net_device *dev)
4867 rollback_registered_many(&single); 4947 rollback_registered_many(&single);
4868} 4948}
4869 4949
4870static void __netdev_init_queue_locks_one(struct net_device *dev,
4871 struct netdev_queue *dev_queue,
4872 void *_unused)
4873{
4874 spin_lock_init(&dev_queue->_xmit_lock);
4875 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4876 dev_queue->xmit_lock_owner = -1;
4877}
4878
4879static void netdev_init_queue_locks(struct net_device *dev)
4880{
4881 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4882 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4883}
4884
4885unsigned long netdev_fix_features(unsigned long features, const char *name) 4950unsigned long netdev_fix_features(unsigned long features, const char *name)
4886{ 4951{
4887 /* Fix illegal SG+CSUM combinations. */ 4952 /* Fix illegal SG+CSUM combinations. */
@@ -4949,6 +5014,66 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
4949} 5014}
4950EXPORT_SYMBOL(netif_stacked_transfer_operstate); 5015EXPORT_SYMBOL(netif_stacked_transfer_operstate);
4951 5016
5017static int netif_alloc_rx_queues(struct net_device *dev)
5018{
5019#ifdef CONFIG_RPS
5020 unsigned int i, count = dev->num_rx_queues;
5021 struct netdev_rx_queue *rx;
5022
5023 BUG_ON(count < 1);
5024
5025 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5026 if (!rx) {
5027 pr_err("netdev: Unable to allocate %u rx queues.\n", count);
5028 return -ENOMEM;
5029 }
5030 dev->_rx = rx;
5031
5032 /*
5033 * Set a pointer to first element in the array which holds the
5034 * reference count.
5035 */
5036 for (i = 0; i < count; i++)
5037 rx[i].first = rx;
5038#endif
5039 return 0;
5040}
5041
5042static int netif_alloc_netdev_queues(struct net_device *dev)
5043{
5044 unsigned int count = dev->num_tx_queues;
5045 struct netdev_queue *tx;
5046
5047 BUG_ON(count < 1);
5048
5049 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5050 if (!tx) {
5051 pr_err("netdev: Unable to allocate %u tx queues.\n",
5052 count);
5053 return -ENOMEM;
5054 }
5055 dev->_tx = tx;
5056 return 0;
5057}
5058
5059static void netdev_init_one_queue(struct net_device *dev,
5060 struct netdev_queue *queue,
5061 void *_unused)
5062{
5063 queue->dev = dev;
5064
5065 /* Initialize queue lock */
5066 spin_lock_init(&queue->_xmit_lock);
5067 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5068 queue->xmit_lock_owner = -1;
5069}
5070
5071static void netdev_init_queues(struct net_device *dev)
5072{
5073 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5074 spin_lock_init(&dev->tx_global_lock);
5075}
5076
4952/** 5077/**
4953 * register_netdevice - register a network device 5078 * register_netdevice - register a network device
4954 * @dev: device to register 5079 * @dev: device to register
@@ -4982,28 +5107,19 @@ int register_netdevice(struct net_device *dev)
4982 5107
4983 spin_lock_init(&dev->addr_list_lock); 5108 spin_lock_init(&dev->addr_list_lock);
4984 netdev_set_addr_lockdep_class(dev); 5109 netdev_set_addr_lockdep_class(dev);
4985 netdev_init_queue_locks(dev);
4986 5110
4987 dev->iflink = -1; 5111 dev->iflink = -1;
4988 5112
4989#ifdef CONFIG_RPS 5113 ret = netif_alloc_rx_queues(dev);
4990 if (!dev->num_rx_queues) { 5114 if (ret)
4991 /* 5115 goto out;
4992 * Allocate a single RX queue if driver never called
4993 * alloc_netdev_mq
4994 */
4995 5116
4996 dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); 5117 ret = netif_alloc_netdev_queues(dev);
4997 if (!dev->_rx) { 5118 if (ret)
4998 ret = -ENOMEM; 5119 goto out;
4999 goto out; 5120
5000 } 5121 netdev_init_queues(dev);
5001 5122
5002 dev->_rx->first = dev->_rx;
5003 atomic_set(&dev->_rx->count, 1);
5004 dev->num_rx_queues = 1;
5005 }
5006#endif
5007 /* Init, if this function is available */ 5123 /* Init, if this function is available */
5008 if (dev->netdev_ops->ndo_init) { 5124 if (dev->netdev_ops->ndo_init) {
5009 ret = dev->netdev_ops->ndo_init(dev); 5125 ret = dev->netdev_ops->ndo_init(dev);
@@ -5043,6 +5159,12 @@ int register_netdevice(struct net_device *dev)
5043 if (dev->features & NETIF_F_SG) 5159 if (dev->features & NETIF_F_SG)
5044 dev->features |= NETIF_F_GSO; 5160 dev->features |= NETIF_F_GSO;
5045 5161
5162 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
5163 * vlan_dev_init() will do the dev->features check, so these features
5164 * are enabled only if supported by underlying device.
5165 */
5166 dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
5167
5046 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5168 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5047 ret = notifier_to_errno(ret); 5169 ret = notifier_to_errno(ret);
5048 if (ret) 5170 if (ret)
@@ -5113,9 +5235,6 @@ int init_dummy_netdev(struct net_device *dev)
5113 */ 5235 */
5114 dev->reg_state = NETREG_DUMMY; 5236 dev->reg_state = NETREG_DUMMY;
5115 5237
5116 /* initialize the ref count */
5117 atomic_set(&dev->refcnt, 1);
5118
5119 /* NAPI wants this */ 5238 /* NAPI wants this */
5120 INIT_LIST_HEAD(&dev->napi_list); 5239 INIT_LIST_HEAD(&dev->napi_list);
5121 5240
@@ -5123,6 +5242,11 @@ int init_dummy_netdev(struct net_device *dev)
5123 set_bit(__LINK_STATE_PRESENT, &dev->state); 5242 set_bit(__LINK_STATE_PRESENT, &dev->state);
5124 set_bit(__LINK_STATE_START, &dev->state); 5243 set_bit(__LINK_STATE_START, &dev->state);
5125 5244
5245 /* Note : We dont allocate pcpu_refcnt for dummy devices,
5246 * because users of this 'device' dont need to change
5247 * its refcount.
5248 */
5249
5126 return 0; 5250 return 0;
5127} 5251}
5128EXPORT_SYMBOL_GPL(init_dummy_netdev); 5252EXPORT_SYMBOL_GPL(init_dummy_netdev);
@@ -5164,6 +5288,16 @@ out:
5164} 5288}
5165EXPORT_SYMBOL(register_netdev); 5289EXPORT_SYMBOL(register_netdev);
5166 5290
5291int netdev_refcnt_read(const struct net_device *dev)
5292{
5293 int i, refcnt = 0;
5294
5295 for_each_possible_cpu(i)
5296 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5297 return refcnt;
5298}
5299EXPORT_SYMBOL(netdev_refcnt_read);
5300
5167/* 5301/*
5168 * netdev_wait_allrefs - wait until all references are gone. 5302 * netdev_wait_allrefs - wait until all references are gone.
5169 * 5303 *
@@ -5178,11 +5312,14 @@ EXPORT_SYMBOL(register_netdev);
5178static void netdev_wait_allrefs(struct net_device *dev) 5312static void netdev_wait_allrefs(struct net_device *dev)
5179{ 5313{
5180 unsigned long rebroadcast_time, warning_time; 5314 unsigned long rebroadcast_time, warning_time;
5315 int refcnt;
5181 5316
5182 linkwatch_forget_dev(dev); 5317 linkwatch_forget_dev(dev);
5183 5318
5184 rebroadcast_time = warning_time = jiffies; 5319 rebroadcast_time = warning_time = jiffies;
5185 while (atomic_read(&dev->refcnt) != 0) { 5320 refcnt = netdev_refcnt_read(dev);
5321
5322 while (refcnt != 0) {
5186 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 5323 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5187 rtnl_lock(); 5324 rtnl_lock();
5188 5325
@@ -5209,11 +5346,13 @@ static void netdev_wait_allrefs(struct net_device *dev)
5209 5346
5210 msleep(250); 5347 msleep(250);
5211 5348
5349 refcnt = netdev_refcnt_read(dev);
5350
5212 if (time_after(jiffies, warning_time + 10 * HZ)) { 5351 if (time_after(jiffies, warning_time + 10 * HZ)) {
5213 printk(KERN_EMERG "unregister_netdevice: " 5352 printk(KERN_EMERG "unregister_netdevice: "
5214 "waiting for %s to become free. Usage " 5353 "waiting for %s to become free. Usage "
5215 "count = %d\n", 5354 "count = %d\n",
5216 dev->name, atomic_read(&dev->refcnt)); 5355 dev->name, refcnt);
5217 warning_time = jiffies; 5356 warning_time = jiffies;
5218 } 5357 }
5219 } 5358 }
@@ -5271,9 +5410,9 @@ void netdev_run_todo(void)
5271 netdev_wait_allrefs(dev); 5410 netdev_wait_allrefs(dev);
5272 5411
5273 /* paranoia */ 5412 /* paranoia */
5274 BUG_ON(atomic_read(&dev->refcnt)); 5413 BUG_ON(netdev_refcnt_read(dev));
5275 WARN_ON(dev->ip_ptr); 5414 WARN_ON(rcu_dereference_raw(dev->ip_ptr));
5276 WARN_ON(dev->ip6_ptr); 5415 WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
5277 WARN_ON(dev->dn_ptr); 5416 WARN_ON(dev->dn_ptr);
5278 5417
5279 if (dev->destructor) 5418 if (dev->destructor)
@@ -5350,30 +5489,34 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5350 5489
5351 if (ops->ndo_get_stats64) { 5490 if (ops->ndo_get_stats64) {
5352 memset(storage, 0, sizeof(*storage)); 5491 memset(storage, 0, sizeof(*storage));
5353 return ops->ndo_get_stats64(dev, storage); 5492 ops->ndo_get_stats64(dev, storage);
5354 } 5493 } else if (ops->ndo_get_stats) {
5355 if (ops->ndo_get_stats) {
5356 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); 5494 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5357 return storage; 5495 } else {
5496 netdev_stats_to_stats64(storage, &dev->stats);
5497 dev_txq_stats_fold(dev, storage);
5358 } 5498 }
5359 netdev_stats_to_stats64(storage, &dev->stats); 5499 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
5360 dev_txq_stats_fold(dev, storage);
5361 return storage; 5500 return storage;
5362} 5501}
5363EXPORT_SYMBOL(dev_get_stats); 5502EXPORT_SYMBOL(dev_get_stats);
5364 5503
5365static void netdev_init_one_queue(struct net_device *dev, 5504struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
5366 struct netdev_queue *queue,
5367 void *_unused)
5368{ 5505{
5369 queue->dev = dev; 5506 struct netdev_queue *queue = dev_ingress_queue(dev);
5370}
5371 5507
5372static void netdev_init_queues(struct net_device *dev) 5508#ifdef CONFIG_NET_CLS_ACT
5373{ 5509 if (queue)
5374 netdev_init_one_queue(dev, &dev->rx_queue, NULL); 5510 return queue;
5375 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 5511 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
5376 spin_lock_init(&dev->tx_global_lock); 5512 if (!queue)
5513 return NULL;
5514 netdev_init_one_queue(dev, queue, NULL);
5515 queue->qdisc = &noop_qdisc;
5516 queue->qdisc_sleeping = &noop_qdisc;
5517 rcu_assign_pointer(dev->ingress_queue, queue);
5518#endif
5519 return queue;
5377} 5520}
5378 5521
5379/** 5522/**
@@ -5390,17 +5533,18 @@ static void netdev_init_queues(struct net_device *dev)
5390struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, 5533struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5391 void (*setup)(struct net_device *), unsigned int queue_count) 5534 void (*setup)(struct net_device *), unsigned int queue_count)
5392{ 5535{
5393 struct netdev_queue *tx;
5394 struct net_device *dev; 5536 struct net_device *dev;
5395 size_t alloc_size; 5537 size_t alloc_size;
5396 struct net_device *p; 5538 struct net_device *p;
5397#ifdef CONFIG_RPS
5398 struct netdev_rx_queue *rx;
5399 int i;
5400#endif
5401 5539
5402 BUG_ON(strlen(name) >= sizeof(dev->name)); 5540 BUG_ON(strlen(name) >= sizeof(dev->name));
5403 5541
5542 if (queue_count < 1) {
5543 pr_err("alloc_netdev: Unable to allocate device "
5544 "with zero queues.\n");
5545 return NULL;
5546 }
5547
5404 alloc_size = sizeof(struct net_device); 5548 alloc_size = sizeof(struct net_device);
5405 if (sizeof_priv) { 5549 if (sizeof_priv) {
5406 /* ensure 32-byte alignment of private area */ 5550 /* ensure 32-byte alignment of private area */
@@ -5416,55 +5560,31 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5416 return NULL; 5560 return NULL;
5417 } 5561 }
5418 5562
5419 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5420 if (!tx) {
5421 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5422 "tx qdiscs.\n");
5423 goto free_p;
5424 }
5425
5426#ifdef CONFIG_RPS
5427 rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5428 if (!rx) {
5429 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5430 "rx queues.\n");
5431 goto free_tx;
5432 }
5433
5434 atomic_set(&rx->count, queue_count);
5435
5436 /*
5437 * Set a pointer to first element in the array which holds the
5438 * reference count.
5439 */
5440 for (i = 0; i < queue_count; i++)
5441 rx[i].first = rx;
5442#endif
5443
5444 dev = PTR_ALIGN(p, NETDEV_ALIGN); 5563 dev = PTR_ALIGN(p, NETDEV_ALIGN);
5445 dev->padded = (char *)dev - (char *)p; 5564 dev->padded = (char *)dev - (char *)p;
5446 5565
5566 dev->pcpu_refcnt = alloc_percpu(int);
5567 if (!dev->pcpu_refcnt)
5568 goto free_p;
5569
5447 if (dev_addr_init(dev)) 5570 if (dev_addr_init(dev))
5448 goto free_rx; 5571 goto free_pcpu;
5449 5572
5450 dev_mc_init(dev); 5573 dev_mc_init(dev);
5451 dev_uc_init(dev); 5574 dev_uc_init(dev);
5452 5575
5453 dev_net_set(dev, &init_net); 5576 dev_net_set(dev, &init_net);
5454 5577
5455 dev->_tx = tx;
5456 dev->num_tx_queues = queue_count; 5578 dev->num_tx_queues = queue_count;
5457 dev->real_num_tx_queues = queue_count; 5579 dev->real_num_tx_queues = queue_count;
5458 5580
5459#ifdef CONFIG_RPS 5581#ifdef CONFIG_RPS
5460 dev->_rx = rx;
5461 dev->num_rx_queues = queue_count; 5582 dev->num_rx_queues = queue_count;
5583 dev->real_num_rx_queues = queue_count;
5462#endif 5584#endif
5463 5585
5464 dev->gso_max_size = GSO_MAX_SIZE; 5586 dev->gso_max_size = GSO_MAX_SIZE;
5465 5587
5466 netdev_init_queues(dev);
5467
5468 INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); 5588 INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5469 dev->ethtool_ntuple_list.count = 0; 5589 dev->ethtool_ntuple_list.count = 0;
5470 INIT_LIST_HEAD(&dev->napi_list); 5590 INIT_LIST_HEAD(&dev->napi_list);
@@ -5475,12 +5595,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5475 strcpy(dev->name, name); 5595 strcpy(dev->name, name);
5476 return dev; 5596 return dev;
5477 5597
5478free_rx: 5598free_pcpu:
5479#ifdef CONFIG_RPS 5599 free_percpu(dev->pcpu_refcnt);
5480 kfree(rx);
5481free_tx:
5482#endif
5483 kfree(tx);
5484free_p: 5600free_p:
5485 kfree(p); 5601 kfree(p);
5486 return NULL; 5602 return NULL;
@@ -5503,6 +5619,8 @@ void free_netdev(struct net_device *dev)
5503 5619
5504 kfree(dev->_tx); 5620 kfree(dev->_tx);
5505 5621
5622 kfree(rcu_dereference_raw(dev->ingress_queue));
5623
5506 /* Flush device addresses */ 5624 /* Flush device addresses */
5507 dev_addr_flush(dev); 5625 dev_addr_flush(dev);
5508 5626
@@ -5512,6 +5630,9 @@ void free_netdev(struct net_device *dev)
5512 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) 5630 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5513 netif_napi_del(p); 5631 netif_napi_del(p);
5514 5632
5633 free_percpu(dev->pcpu_refcnt);
5634 dev->pcpu_refcnt = NULL;
5635
5515 /* Compatibility with error handling in drivers */ 5636 /* Compatibility with error handling in drivers */
5516 if (dev->reg_state == NETREG_UNINITIALIZED) { 5637 if (dev->reg_state == NETREG_UNINITIALIZED) {
5517 kfree((char *)dev - dev->padded); 5638 kfree((char *)dev - dev->padded);
@@ -5666,6 +5787,10 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5666 5787
5667 /* Notify protocols, that we are about to destroy 5788 /* Notify protocols, that we are about to destroy
5668 this device. They should clean all the things. 5789 this device. They should clean all the things.
5790
5791 Note that dev->reg_state stays at NETREG_REGISTERED.
5792 This is wanted because this way 8021q and macvlan know
5793 the device is just moving and can keep their slaves up.
5669 */ 5794 */
5670 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5795 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5671 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); 5796 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);