aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c598
1 files changed, 363 insertions, 235 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 8e07109cc0ef..6d4218cdb739 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,7 @@
129#include <linux/random.h> 129#include <linux/random.h>
130#include <trace/events/napi.h> 130#include <trace/events/napi.h>
131#include <linux/pci.h> 131#include <linux/pci.h>
132#include <linux/inetdevice.h>
132 133
133#include "net-sysfs.h" 134#include "net-sysfs.h"
134 135
@@ -371,6 +372,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
371 * --ANK (980803) 372 * --ANK (980803)
372 */ 373 */
373 374
375static inline struct list_head *ptype_head(const struct packet_type *pt)
376{
377 if (pt->type == htons(ETH_P_ALL))
378 return &ptype_all;
379 else
380 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
381}
382
374/** 383/**
375 * dev_add_pack - add packet handler 384 * dev_add_pack - add packet handler
376 * @pt: packet type declaration 385 * @pt: packet type declaration
@@ -386,16 +395,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
386 395
387void dev_add_pack(struct packet_type *pt) 396void dev_add_pack(struct packet_type *pt)
388{ 397{
389 int hash; 398 struct list_head *head = ptype_head(pt);
390 399
391 spin_lock_bh(&ptype_lock); 400 spin_lock(&ptype_lock);
392 if (pt->type == htons(ETH_P_ALL)) 401 list_add_rcu(&pt->list, head);
393 list_add_rcu(&pt->list, &ptype_all); 402 spin_unlock(&ptype_lock);
394 else {
395 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
396 list_add_rcu(&pt->list, &ptype_base[hash]);
397 }
398 spin_unlock_bh(&ptype_lock);
399} 403}
400EXPORT_SYMBOL(dev_add_pack); 404EXPORT_SYMBOL(dev_add_pack);
401 405
@@ -414,15 +418,10 @@ EXPORT_SYMBOL(dev_add_pack);
414 */ 418 */
415void __dev_remove_pack(struct packet_type *pt) 419void __dev_remove_pack(struct packet_type *pt)
416{ 420{
417 struct list_head *head; 421 struct list_head *head = ptype_head(pt);
418 struct packet_type *pt1; 422 struct packet_type *pt1;
419 423
420 spin_lock_bh(&ptype_lock); 424 spin_lock(&ptype_lock);
421
422 if (pt->type == htons(ETH_P_ALL))
423 head = &ptype_all;
424 else
425 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
426 425
427 list_for_each_entry(pt1, head, list) { 426 list_for_each_entry(pt1, head, list) {
428 if (pt == pt1) { 427 if (pt == pt1) {
@@ -433,7 +432,7 @@ void __dev_remove_pack(struct packet_type *pt)
433 432
434 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); 433 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
435out: 434out:
436 spin_unlock_bh(&ptype_lock); 435 spin_unlock(&ptype_lock);
437} 436}
438EXPORT_SYMBOL(__dev_remove_pack); 437EXPORT_SYMBOL(__dev_remove_pack);
439 438
@@ -1484,8 +1483,9 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1484 skb_orphan(skb); 1483 skb_orphan(skb);
1485 nf_reset(skb); 1484 nf_reset(skb);
1486 1485
1487 if (!(dev->flags & IFF_UP) || 1486 if (unlikely(!(dev->flags & IFF_UP) ||
1488 (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN))) { 1487 (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
1488 atomic_long_inc(&dev->rx_dropped);
1489 kfree_skb(skb); 1489 kfree_skb(skb);
1490 return NET_RX_DROP; 1490 return NET_RX_DROP;
1491 } 1491 }
@@ -1553,21 +1553,56 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1553 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 1553 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1554 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 1554 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1555 */ 1555 */
1556void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) 1556int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1557{ 1557{
1558 unsigned int real_num = dev->real_num_tx_queues; 1558 if (txq < 1 || txq > dev->num_tx_queues)
1559 return -EINVAL;
1559 1560
1560 if (unlikely(txq > dev->num_tx_queues)) 1561 if (dev->reg_state == NETREG_REGISTERED) {
1561 ; 1562 ASSERT_RTNL();
1562 else if (txq > real_num) 1563
1563 dev->real_num_tx_queues = txq; 1564 if (txq < dev->real_num_tx_queues)
1564 else if (txq < real_num) { 1565 qdisc_reset_all_tx_gt(dev, txq);
1565 dev->real_num_tx_queues = txq;
1566 qdisc_reset_all_tx_gt(dev, txq);
1567 } 1566 }
1567
1568 dev->real_num_tx_queues = txq;
1569 return 0;
1568} 1570}
1569EXPORT_SYMBOL(netif_set_real_num_tx_queues); 1571EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1570 1572
1573#ifdef CONFIG_RPS
1574/**
1575 * netif_set_real_num_rx_queues - set actual number of RX queues used
1576 * @dev: Network device
1577 * @rxq: Actual number of RX queues
1578 *
1579 * This must be called either with the rtnl_lock held or before
1580 * registration of the net device. Returns 0 on success, or a
1581 * negative error code. If called before registration, it always
1582 * succeeds.
1583 */
1584int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1585{
1586 int rc;
1587
1588 if (rxq < 1 || rxq > dev->num_rx_queues)
1589 return -EINVAL;
1590
1591 if (dev->reg_state == NETREG_REGISTERED) {
1592 ASSERT_RTNL();
1593
1594 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
1595 rxq);
1596 if (rc)
1597 return rc;
1598 }
1599
1600 dev->real_num_rx_queues = rxq;
1601 return 0;
1602}
1603EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1604#endif
1605
1571static inline void __netif_reschedule(struct Qdisc *q) 1606static inline void __netif_reschedule(struct Qdisc *q)
1572{ 1607{
1573 struct softnet_data *sd; 1608 struct softnet_data *sd;
@@ -1659,7 +1694,12 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1659 1694
1660static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) 1695static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1661{ 1696{
1662 if (can_checksum_protocol(dev->features, skb->protocol)) 1697 int features = dev->features;
1698
1699 if (vlan_tx_tag_present(skb))
1700 features &= dev->vlan_features;
1701
1702 if (can_checksum_protocol(features, skb->protocol))
1663 return true; 1703 return true;
1664 1704
1665 if (skb->protocol == htons(ETH_P_8021Q)) { 1705 if (skb->protocol == htons(ETH_P_8021Q)) {
@@ -1758,6 +1798,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1758 __be16 type = skb->protocol; 1798 __be16 type = skb->protocol;
1759 int err; 1799 int err;
1760 1800
1801 if (type == htons(ETH_P_8021Q)) {
1802 struct vlan_ethhdr *veh;
1803
1804 if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
1805 return ERR_PTR(-EINVAL);
1806
1807 veh = (struct vlan_ethhdr *)skb->data;
1808 type = veh->h_vlan_encapsulated_proto;
1809 }
1810
1761 skb_reset_mac_header(skb); 1811 skb_reset_mac_header(skb);
1762 skb->mac_len = skb->network_header - skb->mac_header; 1812 skb->mac_len = skb->network_header - skb->mac_header;
1763 __skb_pull(skb, skb->mac_len); 1813 __skb_pull(skb, skb->mac_len);
@@ -1902,14 +1952,14 @@ static int dev_gso_segment(struct sk_buff *skb)
1902 1952
1903/* 1953/*
1904 * Try to orphan skb early, right before transmission by the device. 1954 * Try to orphan skb early, right before transmission by the device.
1905 * We cannot orphan skb if tx timestamp is requested, since 1955 * We cannot orphan skb if tx timestamp is requested or the sk-reference
1906 * drivers need to call skb_tstamp_tx() to send the timestamp. 1956 * is needed on driver level for other reasons, e.g. see net/can/raw.c
1907 */ 1957 */
1908static inline void skb_orphan_try(struct sk_buff *skb) 1958static inline void skb_orphan_try(struct sk_buff *skb)
1909{ 1959{
1910 struct sock *sk = skb->sk; 1960 struct sock *sk = skb->sk;
1911 1961
1912 if (sk && !skb_tx(skb)->flags) { 1962 if (sk && !skb_shinfo(skb)->tx_flags) {
1913 /* skb_tx_hash() wont be able to get sk. 1963 /* skb_tx_hash() wont be able to get sk.
1914 * We copy sk_hash into skb->rxhash 1964 * We copy sk_hash into skb->rxhash
1915 */ 1965 */
@@ -1929,9 +1979,14 @@ static inline void skb_orphan_try(struct sk_buff *skb)
1929static inline int skb_needs_linearize(struct sk_buff *skb, 1979static inline int skb_needs_linearize(struct sk_buff *skb,
1930 struct net_device *dev) 1980 struct net_device *dev)
1931{ 1981{
1982 int features = dev->features;
1983
1984 if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
1985 features &= dev->vlan_features;
1986
1932 return skb_is_nonlinear(skb) && 1987 return skb_is_nonlinear(skb) &&
1933 ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || 1988 ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
1934 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || 1989 (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
1935 illegal_highdma(dev, skb)))); 1990 illegal_highdma(dev, skb))));
1936} 1991}
1937 1992
@@ -1954,6 +2009,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1954 2009
1955 skb_orphan_try(skb); 2010 skb_orphan_try(skb);
1956 2011
2012 if (vlan_tx_tag_present(skb) &&
2013 !(dev->features & NETIF_F_HW_VLAN_TX)) {
2014 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
2015 if (unlikely(!skb))
2016 goto out;
2017
2018 skb->vlan_tci = 0;
2019 }
2020
1957 if (netif_needs_gso(dev, skb)) { 2021 if (netif_needs_gso(dev, skb)) {
1958 if (unlikely(dev_gso_segment(skb))) 2022 if (unlikely(dev_gso_segment(skb)))
1959 goto out_kfree_skb; 2023 goto out_kfree_skb;
@@ -2015,6 +2079,7 @@ out_kfree_gso_skb:
2015 skb->destructor = DEV_GSO_CB(skb)->destructor; 2079 skb->destructor = DEV_GSO_CB(skb)->destructor;
2016out_kfree_skb: 2080out_kfree_skb:
2017 kfree_skb(skb); 2081 kfree_skb(skb);
2082out:
2018 return rc; 2083 return rc;
2019} 2084}
2020 2085
@@ -2143,6 +2208,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2143 return rc; 2208 return rc;
2144} 2209}
2145 2210
2211static DEFINE_PER_CPU(int, xmit_recursion);
2212#define RECURSION_LIMIT 3
2213
2146/** 2214/**
2147 * dev_queue_xmit - transmit a buffer 2215 * dev_queue_xmit - transmit a buffer
2148 * @skb: buffer to transmit 2216 * @skb: buffer to transmit
@@ -2208,10 +2276,15 @@ int dev_queue_xmit(struct sk_buff *skb)
2208 2276
2209 if (txq->xmit_lock_owner != cpu) { 2277 if (txq->xmit_lock_owner != cpu) {
2210 2278
2279 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2280 goto recursion_alert;
2281
2211 HARD_TX_LOCK(dev, txq, cpu); 2282 HARD_TX_LOCK(dev, txq, cpu);
2212 2283
2213 if (!netif_tx_queue_stopped(txq)) { 2284 if (!netif_tx_queue_stopped(txq)) {
2285 __this_cpu_inc(xmit_recursion);
2214 rc = dev_hard_start_xmit(skb, dev, txq); 2286 rc = dev_hard_start_xmit(skb, dev, txq);
2287 __this_cpu_dec(xmit_recursion);
2215 if (dev_xmit_complete(rc)) { 2288 if (dev_xmit_complete(rc)) {
2216 HARD_TX_UNLOCK(dev, txq); 2289 HARD_TX_UNLOCK(dev, txq);
2217 goto out; 2290 goto out;
@@ -2223,7 +2296,9 @@ int dev_queue_xmit(struct sk_buff *skb)
2223 "queue packet!\n", dev->name); 2296 "queue packet!\n", dev->name);
2224 } else { 2297 } else {
2225 /* Recursion is detected! It is possible, 2298 /* Recursion is detected! It is possible,
2226 * unfortunately */ 2299 * unfortunately
2300 */
2301recursion_alert:
2227 if (net_ratelimit()) 2302 if (net_ratelimit())
2228 printk(KERN_CRIT "Dead loop on virtual device " 2303 printk(KERN_CRIT "Dead loop on virtual device "
2229 "%s, fix it urgently!\n", dev->name); 2304 "%s, fix it urgently!\n", dev->name);
@@ -2259,69 +2334,44 @@ static inline void ____napi_schedule(struct softnet_data *sd,
2259 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2334 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2260} 2335}
2261 2336
2262#ifdef CONFIG_RPS
2263
2264/* One global table that all flow-based protocols share. */
2265struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2266EXPORT_SYMBOL(rps_sock_flow_table);
2267
2268/* 2337/*
2269 * get_rps_cpu is called from netif_receive_skb and returns the target 2338 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2270 * CPU from the RPS map of the receiving queue for a given skb. 2339 * and src/dst port numbers. Returns a non-zero hash number on success
2271 * rcu_read_lock must be held on entry. 2340 * and 0 on failure.
2272 */ 2341 */
2273static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2342__u32 __skb_get_rxhash(struct sk_buff *skb)
2274 struct rps_dev_flow **rflowp)
2275{ 2343{
2344 int nhoff, hash = 0, poff;
2276 struct ipv6hdr *ip6; 2345 struct ipv6hdr *ip6;
2277 struct iphdr *ip; 2346 struct iphdr *ip;
2278 struct netdev_rx_queue *rxqueue;
2279 struct rps_map *map;
2280 struct rps_dev_flow_table *flow_table;
2281 struct rps_sock_flow_table *sock_flow_table;
2282 int cpu = -1;
2283 u8 ip_proto; 2347 u8 ip_proto;
2284 u16 tcpu;
2285 u32 addr1, addr2, ihl; 2348 u32 addr1, addr2, ihl;
2286 union { 2349 union {
2287 u32 v32; 2350 u32 v32;
2288 u16 v16[2]; 2351 u16 v16[2];
2289 } ports; 2352 } ports;
2290 2353
2291 if (skb_rx_queue_recorded(skb)) { 2354 nhoff = skb_network_offset(skb);
2292 u16 index = skb_get_rx_queue(skb);
2293 if (unlikely(index >= dev->num_rx_queues)) {
2294 WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2295 "on queue %u, but number of RX queues is %u\n",
2296 dev->name, index, dev->num_rx_queues);
2297 goto done;
2298 }
2299 rxqueue = dev->_rx + index;
2300 } else
2301 rxqueue = dev->_rx;
2302
2303 if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2304 goto done;
2305
2306 if (skb->rxhash)
2307 goto got_hash; /* Skip hash computation on packet header */
2308 2355
2309 switch (skb->protocol) { 2356 switch (skb->protocol) {
2310 case __constant_htons(ETH_P_IP): 2357 case __constant_htons(ETH_P_IP):
2311 if (!pskb_may_pull(skb, sizeof(*ip))) 2358 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
2312 goto done; 2359 goto done;
2313 2360
2314 ip = (struct iphdr *) skb->data; 2361 ip = (struct iphdr *) (skb->data + nhoff);
2315 ip_proto = ip->protocol; 2362 if (ip->frag_off & htons(IP_MF | IP_OFFSET))
2363 ip_proto = 0;
2364 else
2365 ip_proto = ip->protocol;
2316 addr1 = (__force u32) ip->saddr; 2366 addr1 = (__force u32) ip->saddr;
2317 addr2 = (__force u32) ip->daddr; 2367 addr2 = (__force u32) ip->daddr;
2318 ihl = ip->ihl; 2368 ihl = ip->ihl;
2319 break; 2369 break;
2320 case __constant_htons(ETH_P_IPV6): 2370 case __constant_htons(ETH_P_IPV6):
2321 if (!pskb_may_pull(skb, sizeof(*ip6))) 2371 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
2322 goto done; 2372 goto done;
2323 2373
2324 ip6 = (struct ipv6hdr *) skb->data; 2374 ip6 = (struct ipv6hdr *) (skb->data + nhoff);
2325 ip_proto = ip6->nexthdr; 2375 ip_proto = ip6->nexthdr;
2326 addr1 = (__force u32) ip6->saddr.s6_addr32[3]; 2376 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2327 addr2 = (__force u32) ip6->daddr.s6_addr32[3]; 2377 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2330,33 +2380,81 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2330 default: 2380 default:
2331 goto done; 2381 goto done;
2332 } 2382 }
2333 switch (ip_proto) { 2383
2334 case IPPROTO_TCP: 2384 ports.v32 = 0;
2335 case IPPROTO_UDP: 2385 poff = proto_ports_offset(ip_proto);
2336 case IPPROTO_DCCP: 2386 if (poff >= 0) {
2337 case IPPROTO_ESP: 2387 nhoff += ihl * 4 + poff;
2338 case IPPROTO_AH: 2388 if (pskb_may_pull(skb, nhoff + 4)) {
2339 case IPPROTO_SCTP: 2389 ports.v32 = * (__force u32 *) (skb->data + nhoff);
2340 case IPPROTO_UDPLITE:
2341 if (pskb_may_pull(skb, (ihl * 4) + 4)) {
2342 ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
2343 if (ports.v16[1] < ports.v16[0]) 2390 if (ports.v16[1] < ports.v16[0])
2344 swap(ports.v16[0], ports.v16[1]); 2391 swap(ports.v16[0], ports.v16[1]);
2345 break;
2346 } 2392 }
2347 default:
2348 ports.v32 = 0;
2349 break;
2350 } 2393 }
2351 2394
2352 /* get a consistent hash (same value on both flow directions) */ 2395 /* get a consistent hash (same value on both flow directions) */
2353 if (addr2 < addr1) 2396 if (addr2 < addr1)
2354 swap(addr1, addr2); 2397 swap(addr1, addr2);
2355 skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2356 if (!skb->rxhash)
2357 skb->rxhash = 1;
2358 2398
2359got_hash: 2399 hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2400 if (!hash)
2401 hash = 1;
2402
2403done:
2404 return hash;
2405}
2406EXPORT_SYMBOL(__skb_get_rxhash);
2407
2408#ifdef CONFIG_RPS
2409
2410/* One global table that all flow-based protocols share. */
2411struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2412EXPORT_SYMBOL(rps_sock_flow_table);
2413
2414/*
2415 * get_rps_cpu is called from netif_receive_skb and returns the target
2416 * CPU from the RPS map of the receiving queue for a given skb.
2417 * rcu_read_lock must be held on entry.
2418 */
2419static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2420 struct rps_dev_flow **rflowp)
2421{
2422 struct netdev_rx_queue *rxqueue;
2423 struct rps_map *map = NULL;
2424 struct rps_dev_flow_table *flow_table;
2425 struct rps_sock_flow_table *sock_flow_table;
2426 int cpu = -1;
2427 u16 tcpu;
2428
2429 if (skb_rx_queue_recorded(skb)) {
2430 u16 index = skb_get_rx_queue(skb);
2431 if (unlikely(index >= dev->real_num_rx_queues)) {
2432 WARN_ONCE(dev->real_num_rx_queues > 1,
2433 "%s received packet on queue %u, but number "
2434 "of RX queues is %u\n",
2435 dev->name, index, dev->real_num_rx_queues);
2436 goto done;
2437 }
2438 rxqueue = dev->_rx + index;
2439 } else
2440 rxqueue = dev->_rx;
2441
2442 if (rxqueue->rps_map) {
2443 map = rcu_dereference(rxqueue->rps_map);
2444 if (map && map->len == 1) {
2445 tcpu = map->cpus[0];
2446 if (cpu_online(tcpu))
2447 cpu = tcpu;
2448 goto done;
2449 }
2450 } else if (!rxqueue->rps_flow_table) {
2451 goto done;
2452 }
2453
2454 skb_reset_network_header(skb);
2455 if (!skb_get_rxhash(skb))
2456 goto done;
2457
2360 flow_table = rcu_dereference(rxqueue->rps_flow_table); 2458 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2361 sock_flow_table = rcu_dereference(rps_sock_flow_table); 2459 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2362 if (flow_table && sock_flow_table) { 2460 if (flow_table && sock_flow_table) {
@@ -2396,7 +2494,6 @@ got_hash:
2396 } 2494 }
2397 } 2495 }
2398 2496
2399 map = rcu_dereference(rxqueue->rps_map);
2400 if (map) { 2497 if (map) {
2401 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; 2498 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2402 2499
@@ -2482,6 +2579,7 @@ enqueue:
2482 2579
2483 local_irq_restore(flags); 2580 local_irq_restore(flags);
2484 2581
2582 atomic_long_inc(&skb->dev->rx_dropped);
2485 kfree_skb(skb); 2583 kfree_skb(skb);
2486 return NET_RX_DROP; 2584 return NET_RX_DROP;
2487} 2585}
@@ -2636,11 +2734,10 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2636 * the ingress scheduler, you just cant add policies on ingress. 2734 * the ingress scheduler, you just cant add policies on ingress.
2637 * 2735 *
2638 */ 2736 */
2639static int ing_filter(struct sk_buff *skb) 2737static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
2640{ 2738{
2641 struct net_device *dev = skb->dev; 2739 struct net_device *dev = skb->dev;
2642 u32 ttl = G_TC_RTTL(skb->tc_verd); 2740 u32 ttl = G_TC_RTTL(skb->tc_verd);
2643 struct netdev_queue *rxq;
2644 int result = TC_ACT_OK; 2741 int result = TC_ACT_OK;
2645 struct Qdisc *q; 2742 struct Qdisc *q;
2646 2743
@@ -2654,8 +2751,6 @@ static int ing_filter(struct sk_buff *skb)
2654 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); 2751 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2655 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); 2752 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2656 2753
2657 rxq = &dev->rx_queue;
2658
2659 q = rxq->qdisc; 2754 q = rxq->qdisc;
2660 if (q != &noop_qdisc) { 2755 if (q != &noop_qdisc) {
2661 spin_lock(qdisc_lock(q)); 2756 spin_lock(qdisc_lock(q));
@@ -2671,7 +2766,9 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2671 struct packet_type **pt_prev, 2766 struct packet_type **pt_prev,
2672 int *ret, struct net_device *orig_dev) 2767 int *ret, struct net_device *orig_dev)
2673{ 2768{
2674 if (skb->dev->rx_queue.qdisc == &noop_qdisc) 2769 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
2770
2771 if (!rxq || rxq->qdisc == &noop_qdisc)
2675 goto out; 2772 goto out;
2676 2773
2677 if (*pt_prev) { 2774 if (*pt_prev) {
@@ -2679,7 +2776,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2679 *pt_prev = NULL; 2776 *pt_prev = NULL;
2680 } 2777 }
2681 2778
2682 switch (ing_filter(skb)) { 2779 switch (ing_filter(skb, rxq)) {
2683 case TC_ACT_SHOT: 2780 case TC_ACT_SHOT:
2684 case TC_ACT_STOLEN: 2781 case TC_ACT_STOLEN:
2685 kfree_skb(skb); 2782 kfree_skb(skb);
@@ -2692,33 +2789,6 @@ out:
2692} 2789}
2693#endif 2790#endif
2694 2791
2695/*
2696 * netif_nit_deliver - deliver received packets to network taps
2697 * @skb: buffer
2698 *
2699 * This function is used to deliver incoming packets to network
2700 * taps. It should be used when the normal netif_receive_skb path
2701 * is bypassed, for example because of VLAN acceleration.
2702 */
2703void netif_nit_deliver(struct sk_buff *skb)
2704{
2705 struct packet_type *ptype;
2706
2707 if (list_empty(&ptype_all))
2708 return;
2709
2710 skb_reset_network_header(skb);
2711 skb_reset_transport_header(skb);
2712 skb->mac_len = skb->network_header - skb->mac_header;
2713
2714 rcu_read_lock();
2715 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2716 if (!ptype->dev || ptype->dev == skb->dev)
2717 deliver_skb(skb, ptype, skb->dev);
2718 }
2719 rcu_read_unlock();
2720}
2721
2722/** 2792/**
2723 * netdev_rx_handler_register - register receive handler 2793 * netdev_rx_handler_register - register receive handler
2724 * @dev: device to register a handler for 2794 * @dev: device to register a handler for
@@ -2828,9 +2898,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
2828 if (!netdev_tstamp_prequeue) 2898 if (!netdev_tstamp_prequeue)
2829 net_timestamp_check(skb); 2899 net_timestamp_check(skb);
2830 2900
2831 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2832 return NET_RX_SUCCESS;
2833
2834 /* if we've gotten here through NAPI, check netpoll */ 2901 /* if we've gotten here through NAPI, check netpoll */
2835 if (netpoll_receive_skb(skb)) 2902 if (netpoll_receive_skb(skb))
2836 return NET_RX_DROP; 2903 return NET_RX_DROP;
@@ -2843,8 +2910,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
2843 * be delivered to pkt handlers that are exact matches. Also 2910 * be delivered to pkt handlers that are exact matches. Also
2844 * the deliver_no_wcard flag will be set. If packet handlers 2911 * the deliver_no_wcard flag will be set. If packet handlers
2845 * are sensitive to duplicate packets these skbs will need to 2912 * are sensitive to duplicate packets these skbs will need to
2846 * be dropped at the handler. The vlan accel path may have 2913 * be dropped at the handler.
2847 * already set the deliver_no_wcard flag.
2848 */ 2914 */
2849 null_or_orig = NULL; 2915 null_or_orig = NULL;
2850 orig_dev = skb->dev; 2916 orig_dev = skb->dev;
@@ -2903,6 +2969,18 @@ ncls:
2903 goto out; 2969 goto out;
2904 } 2970 }
2905 2971
2972 if (vlan_tx_tag_present(skb)) {
2973 if (pt_prev) {
2974 ret = deliver_skb(skb, pt_prev, orig_dev);
2975 pt_prev = NULL;
2976 }
2977 if (vlan_hwaccel_do_receive(&skb)) {
2978 ret = __netif_receive_skb(skb);
2979 goto out;
2980 } else if (unlikely(!skb))
2981 goto out;
2982 }
2983
2906 /* 2984 /*
2907 * Make sure frames received on VLAN interfaces stacked on 2985 * Make sure frames received on VLAN interfaces stacked on
2908 * bonding interfaces still make their way to any base bonding 2986 * bonding interfaces still make their way to any base bonding
@@ -2930,6 +3008,7 @@ ncls:
2930 if (pt_prev) { 3008 if (pt_prev) {
2931 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 3009 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2932 } else { 3010 } else {
3011 atomic_long_inc(&skb->dev->rx_dropped);
2933 kfree_skb(skb); 3012 kfree_skb(skb);
2934 /* Jamal, now you will not able to escape explaining 3013 /* Jamal, now you will not able to escape explaining
2935 * me how you were going to use this. :-) 3014 * me how you were going to use this. :-)
@@ -3050,7 +3129,7 @@ out:
3050 return netif_receive_skb(skb); 3129 return netif_receive_skb(skb);
3051} 3130}
3052 3131
3053static void napi_gro_flush(struct napi_struct *napi) 3132inline void napi_gro_flush(struct napi_struct *napi)
3054{ 3133{
3055 struct sk_buff *skb, *next; 3134 struct sk_buff *skb, *next;
3056 3135
@@ -3063,6 +3142,7 @@ static void napi_gro_flush(struct napi_struct *napi)
3063 napi->gro_count = 0; 3142 napi->gro_count = 0;
3064 napi->gro_list = NULL; 3143 napi->gro_list = NULL;
3065} 3144}
3145EXPORT_SYMBOL(napi_gro_flush);
3066 3146
3067enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3147enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3068{ 3148{
@@ -3077,7 +3157,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3077 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) 3157 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3078 goto normal; 3158 goto normal;
3079 3159
3080 if (skb_is_gso(skb) || skb_has_frags(skb)) 3160 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3081 goto normal; 3161 goto normal;
3082 3162
3083 rcu_read_lock(); 3163 rcu_read_lock();
@@ -3156,16 +3236,19 @@ normal:
3156} 3236}
3157EXPORT_SYMBOL(dev_gro_receive); 3237EXPORT_SYMBOL(dev_gro_receive);
3158 3238
3159static gro_result_t 3239static inline gro_result_t
3160__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3240__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3161{ 3241{
3162 struct sk_buff *p; 3242 struct sk_buff *p;
3163 3243
3164 for (p = napi->gro_list; p; p = p->next) { 3244 for (p = napi->gro_list; p; p = p->next) {
3165 NAPI_GRO_CB(p)->same_flow = 3245 unsigned long diffs;
3166 (p->dev == skb->dev) && 3246
3167 !compare_ether_header(skb_mac_header(p), 3247 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3248 diffs |= p->vlan_tci ^ skb->vlan_tci;
3249 diffs |= compare_ether_header(skb_mac_header(p),
3168 skb_gro_mac_header(skb)); 3250 skb_gro_mac_header(skb));
3251 NAPI_GRO_CB(p)->same_flow = !diffs;
3169 NAPI_GRO_CB(p)->flush = 0; 3252 NAPI_GRO_CB(p)->flush = 0;
3170 } 3253 }
3171 3254
@@ -3218,14 +3301,14 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3218} 3301}
3219EXPORT_SYMBOL(napi_gro_receive); 3302EXPORT_SYMBOL(napi_gro_receive);
3220 3303
3221void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) 3304static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3222{ 3305{
3223 __skb_pull(skb, skb_headlen(skb)); 3306 __skb_pull(skb, skb_headlen(skb));
3224 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); 3307 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
3308 skb->vlan_tci = 0;
3225 3309
3226 napi->skb = skb; 3310 napi->skb = skb;
3227} 3311}
3228EXPORT_SYMBOL(napi_reuse_skb);
3229 3312
3230struct sk_buff *napi_get_frags(struct napi_struct *napi) 3313struct sk_buff *napi_get_frags(struct napi_struct *napi)
3231{ 3314{
@@ -4859,21 +4942,6 @@ static void rollback_registered(struct net_device *dev)
4859 rollback_registered_many(&single); 4942 rollback_registered_many(&single);
4860} 4943}
4861 4944
4862static void __netdev_init_queue_locks_one(struct net_device *dev,
4863 struct netdev_queue *dev_queue,
4864 void *_unused)
4865{
4866 spin_lock_init(&dev_queue->_xmit_lock);
4867 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4868 dev_queue->xmit_lock_owner = -1;
4869}
4870
4871static void netdev_init_queue_locks(struct net_device *dev)
4872{
4873 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4874 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4875}
4876
4877unsigned long netdev_fix_features(unsigned long features, const char *name) 4945unsigned long netdev_fix_features(unsigned long features, const char *name)
4878{ 4946{
4879 /* Fix illegal SG+CSUM combinations. */ 4947 /* Fix illegal SG+CSUM combinations. */
@@ -4941,6 +5009,66 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
4941} 5009}
4942EXPORT_SYMBOL(netif_stacked_transfer_operstate); 5010EXPORT_SYMBOL(netif_stacked_transfer_operstate);
4943 5011
5012static int netif_alloc_rx_queues(struct net_device *dev)
5013{
5014#ifdef CONFIG_RPS
5015 unsigned int i, count = dev->num_rx_queues;
5016 struct netdev_rx_queue *rx;
5017
5018 BUG_ON(count < 1);
5019
5020 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5021 if (!rx) {
5022 pr_err("netdev: Unable to allocate %u rx queues.\n", count);
5023 return -ENOMEM;
5024 }
5025 dev->_rx = rx;
5026
5027 /*
5028 * Set a pointer to first element in the array which holds the
5029 * reference count.
5030 */
5031 for (i = 0; i < count; i++)
5032 rx[i].first = rx;
5033#endif
5034 return 0;
5035}
5036
5037static int netif_alloc_netdev_queues(struct net_device *dev)
5038{
5039 unsigned int count = dev->num_tx_queues;
5040 struct netdev_queue *tx;
5041
5042 BUG_ON(count < 1);
5043
5044 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5045 if (!tx) {
5046 pr_err("netdev: Unable to allocate %u tx queues.\n",
5047 count);
5048 return -ENOMEM;
5049 }
5050 dev->_tx = tx;
5051 return 0;
5052}
5053
5054static void netdev_init_one_queue(struct net_device *dev,
5055 struct netdev_queue *queue,
5056 void *_unused)
5057{
5058 queue->dev = dev;
5059
5060 /* Initialize queue lock */
5061 spin_lock_init(&queue->_xmit_lock);
5062 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5063 queue->xmit_lock_owner = -1;
5064}
5065
5066static void netdev_init_queues(struct net_device *dev)
5067{
5068 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5069 spin_lock_init(&dev->tx_global_lock);
5070}
5071
4944/** 5072/**
4945 * register_netdevice - register a network device 5073 * register_netdevice - register a network device
4946 * @dev: device to register 5074 * @dev: device to register
@@ -4974,28 +5102,19 @@ int register_netdevice(struct net_device *dev)
4974 5102
4975 spin_lock_init(&dev->addr_list_lock); 5103 spin_lock_init(&dev->addr_list_lock);
4976 netdev_set_addr_lockdep_class(dev); 5104 netdev_set_addr_lockdep_class(dev);
4977 netdev_init_queue_locks(dev);
4978 5105
4979 dev->iflink = -1; 5106 dev->iflink = -1;
4980 5107
4981#ifdef CONFIG_RPS 5108 ret = netif_alloc_rx_queues(dev);
4982 if (!dev->num_rx_queues) { 5109 if (ret)
4983 /* 5110 goto out;
4984 * Allocate a single RX queue if driver never called
4985 * alloc_netdev_mq
4986 */
4987 5111
4988 dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); 5112 ret = netif_alloc_netdev_queues(dev);
4989 if (!dev->_rx) { 5113 if (ret)
4990 ret = -ENOMEM; 5114 goto out;
4991 goto out; 5115
4992 } 5116 netdev_init_queues(dev);
4993 5117
4994 dev->_rx->first = dev->_rx;
4995 atomic_set(&dev->_rx->count, 1);
4996 dev->num_rx_queues = 1;
4997 }
4998#endif
4999 /* Init, if this function is available */ 5118 /* Init, if this function is available */
5000 if (dev->netdev_ops->ndo_init) { 5119 if (dev->netdev_ops->ndo_init) {
5001 ret = dev->netdev_ops->ndo_init(dev); 5120 ret = dev->netdev_ops->ndo_init(dev);
@@ -5035,6 +5154,12 @@ int register_netdevice(struct net_device *dev)
5035 if (dev->features & NETIF_F_SG) 5154 if (dev->features & NETIF_F_SG)
5036 dev->features |= NETIF_F_GSO; 5155 dev->features |= NETIF_F_GSO;
5037 5156
5157 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
5158 * vlan_dev_init() will do the dev->features check, so these features
5159 * are enabled only if supported by underlying device.
5160 */
5161 dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
5162
5038 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5163 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5039 ret = notifier_to_errno(ret); 5164 ret = notifier_to_errno(ret);
5040 if (ret) 5165 if (ret)
@@ -5105,9 +5230,6 @@ int init_dummy_netdev(struct net_device *dev)
5105 */ 5230 */
5106 dev->reg_state = NETREG_DUMMY; 5231 dev->reg_state = NETREG_DUMMY;
5107 5232
5108 /* initialize the ref count */
5109 atomic_set(&dev->refcnt, 1);
5110
5111 /* NAPI wants this */ 5233 /* NAPI wants this */
5112 INIT_LIST_HEAD(&dev->napi_list); 5234 INIT_LIST_HEAD(&dev->napi_list);
5113 5235
@@ -5115,6 +5237,11 @@ int init_dummy_netdev(struct net_device *dev)
5115 set_bit(__LINK_STATE_PRESENT, &dev->state); 5237 set_bit(__LINK_STATE_PRESENT, &dev->state);
5116 set_bit(__LINK_STATE_START, &dev->state); 5238 set_bit(__LINK_STATE_START, &dev->state);
5117 5239
5240 /* Note : We dont allocate pcpu_refcnt for dummy devices,
5241 * because users of this 'device' dont need to change
5242 * its refcount.
5243 */
5244
5118 return 0; 5245 return 0;
5119} 5246}
5120EXPORT_SYMBOL_GPL(init_dummy_netdev); 5247EXPORT_SYMBOL_GPL(init_dummy_netdev);
@@ -5156,6 +5283,16 @@ out:
5156} 5283}
5157EXPORT_SYMBOL(register_netdev); 5284EXPORT_SYMBOL(register_netdev);
5158 5285
5286int netdev_refcnt_read(const struct net_device *dev)
5287{
5288 int i, refcnt = 0;
5289
5290 for_each_possible_cpu(i)
5291 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5292 return refcnt;
5293}
5294EXPORT_SYMBOL(netdev_refcnt_read);
5295
5159/* 5296/*
5160 * netdev_wait_allrefs - wait until all references are gone. 5297 * netdev_wait_allrefs - wait until all references are gone.
5161 * 5298 *
@@ -5170,11 +5307,14 @@ EXPORT_SYMBOL(register_netdev);
5170static void netdev_wait_allrefs(struct net_device *dev) 5307static void netdev_wait_allrefs(struct net_device *dev)
5171{ 5308{
5172 unsigned long rebroadcast_time, warning_time; 5309 unsigned long rebroadcast_time, warning_time;
5310 int refcnt;
5173 5311
5174 linkwatch_forget_dev(dev); 5312 linkwatch_forget_dev(dev);
5175 5313
5176 rebroadcast_time = warning_time = jiffies; 5314 rebroadcast_time = warning_time = jiffies;
5177 while (atomic_read(&dev->refcnt) != 0) { 5315 refcnt = netdev_refcnt_read(dev);
5316
5317 while (refcnt != 0) {
5178 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 5318 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5179 rtnl_lock(); 5319 rtnl_lock();
5180 5320
@@ -5201,11 +5341,13 @@ static void netdev_wait_allrefs(struct net_device *dev)
5201 5341
5202 msleep(250); 5342 msleep(250);
5203 5343
5344 refcnt = netdev_refcnt_read(dev);
5345
5204 if (time_after(jiffies, warning_time + 10 * HZ)) { 5346 if (time_after(jiffies, warning_time + 10 * HZ)) {
5205 printk(KERN_EMERG "unregister_netdevice: " 5347 printk(KERN_EMERG "unregister_netdevice: "
5206 "waiting for %s to become free. Usage " 5348 "waiting for %s to become free. Usage "
5207 "count = %d\n", 5349 "count = %d\n",
5208 dev->name, atomic_read(&dev->refcnt)); 5350 dev->name, refcnt);
5209 warning_time = jiffies; 5351 warning_time = jiffies;
5210 } 5352 }
5211 } 5353 }
@@ -5263,8 +5405,8 @@ void netdev_run_todo(void)
5263 netdev_wait_allrefs(dev); 5405 netdev_wait_allrefs(dev);
5264 5406
5265 /* paranoia */ 5407 /* paranoia */
5266 BUG_ON(atomic_read(&dev->refcnt)); 5408 BUG_ON(netdev_refcnt_read(dev));
5267 WARN_ON(dev->ip_ptr); 5409 WARN_ON(rcu_dereference_raw(dev->ip_ptr));
5268 WARN_ON(dev->ip6_ptr); 5410 WARN_ON(dev->ip6_ptr);
5269 WARN_ON(dev->dn_ptr); 5411 WARN_ON(dev->dn_ptr);
5270 5412
@@ -5342,30 +5484,34 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5342 5484
5343 if (ops->ndo_get_stats64) { 5485 if (ops->ndo_get_stats64) {
5344 memset(storage, 0, sizeof(*storage)); 5486 memset(storage, 0, sizeof(*storage));
5345 return ops->ndo_get_stats64(dev, storage); 5487 ops->ndo_get_stats64(dev, storage);
5346 } 5488 } else if (ops->ndo_get_stats) {
5347 if (ops->ndo_get_stats) {
5348 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); 5489 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5349 return storage; 5490 } else {
5491 netdev_stats_to_stats64(storage, &dev->stats);
5492 dev_txq_stats_fold(dev, storage);
5350 } 5493 }
5351 netdev_stats_to_stats64(storage, &dev->stats); 5494 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
5352 dev_txq_stats_fold(dev, storage);
5353 return storage; 5495 return storage;
5354} 5496}
5355EXPORT_SYMBOL(dev_get_stats); 5497EXPORT_SYMBOL(dev_get_stats);
5356 5498
5357static void netdev_init_one_queue(struct net_device *dev, 5499struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
5358 struct netdev_queue *queue,
5359 void *_unused)
5360{ 5500{
5361 queue->dev = dev; 5501 struct netdev_queue *queue = dev_ingress_queue(dev);
5362}
5363 5502
5364static void netdev_init_queues(struct net_device *dev) 5503#ifdef CONFIG_NET_CLS_ACT
5365{ 5504 if (queue)
5366 netdev_init_one_queue(dev, &dev->rx_queue, NULL); 5505 return queue;
5367 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 5506 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
5368 spin_lock_init(&dev->tx_global_lock); 5507 if (!queue)
5508 return NULL;
5509 netdev_init_one_queue(dev, queue, NULL);
5510 queue->qdisc = &noop_qdisc;
5511 queue->qdisc_sleeping = &noop_qdisc;
5512 rcu_assign_pointer(dev->ingress_queue, queue);
5513#endif
5514 return queue;
5369} 5515}
5370 5516
5371/** 5517/**
@@ -5382,17 +5528,18 @@ static void netdev_init_queues(struct net_device *dev)
5382struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, 5528struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5383 void (*setup)(struct net_device *), unsigned int queue_count) 5529 void (*setup)(struct net_device *), unsigned int queue_count)
5384{ 5530{
5385 struct netdev_queue *tx;
5386 struct net_device *dev; 5531 struct net_device *dev;
5387 size_t alloc_size; 5532 size_t alloc_size;
5388 struct net_device *p; 5533 struct net_device *p;
5389#ifdef CONFIG_RPS
5390 struct netdev_rx_queue *rx;
5391 int i;
5392#endif
5393 5534
5394 BUG_ON(strlen(name) >= sizeof(dev->name)); 5535 BUG_ON(strlen(name) >= sizeof(dev->name));
5395 5536
5537 if (queue_count < 1) {
5538 pr_err("alloc_netdev: Unable to allocate device "
5539 "with zero queues.\n");
5540 return NULL;
5541 }
5542
5396 alloc_size = sizeof(struct net_device); 5543 alloc_size = sizeof(struct net_device);
5397 if (sizeof_priv) { 5544 if (sizeof_priv) {
5398 /* ensure 32-byte alignment of private area */ 5545 /* ensure 32-byte alignment of private area */
@@ -5408,55 +5555,31 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5408 return NULL; 5555 return NULL;
5409 } 5556 }
5410 5557
5411 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5412 if (!tx) {
5413 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5414 "tx qdiscs.\n");
5415 goto free_p;
5416 }
5417
5418#ifdef CONFIG_RPS
5419 rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5420 if (!rx) {
5421 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5422 "rx queues.\n");
5423 goto free_tx;
5424 }
5425
5426 atomic_set(&rx->count, queue_count);
5427
5428 /*
5429 * Set a pointer to first element in the array which holds the
5430 * reference count.
5431 */
5432 for (i = 0; i < queue_count; i++)
5433 rx[i].first = rx;
5434#endif
5435
5436 dev = PTR_ALIGN(p, NETDEV_ALIGN); 5558 dev = PTR_ALIGN(p, NETDEV_ALIGN);
5437 dev->padded = (char *)dev - (char *)p; 5559 dev->padded = (char *)dev - (char *)p;
5438 5560
5561 dev->pcpu_refcnt = alloc_percpu(int);
5562 if (!dev->pcpu_refcnt)
5563 goto free_p;
5564
5439 if (dev_addr_init(dev)) 5565 if (dev_addr_init(dev))
5440 goto free_rx; 5566 goto free_pcpu;
5441 5567
5442 dev_mc_init(dev); 5568 dev_mc_init(dev);
5443 dev_uc_init(dev); 5569 dev_uc_init(dev);
5444 5570
5445 dev_net_set(dev, &init_net); 5571 dev_net_set(dev, &init_net);
5446 5572
5447 dev->_tx = tx;
5448 dev->num_tx_queues = queue_count; 5573 dev->num_tx_queues = queue_count;
5449 dev->real_num_tx_queues = queue_count; 5574 dev->real_num_tx_queues = queue_count;
5450 5575
5451#ifdef CONFIG_RPS 5576#ifdef CONFIG_RPS
5452 dev->_rx = rx;
5453 dev->num_rx_queues = queue_count; 5577 dev->num_rx_queues = queue_count;
5578 dev->real_num_rx_queues = queue_count;
5454#endif 5579#endif
5455 5580
5456 dev->gso_max_size = GSO_MAX_SIZE; 5581 dev->gso_max_size = GSO_MAX_SIZE;
5457 5582
5458 netdev_init_queues(dev);
5459
5460 INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); 5583 INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5461 dev->ethtool_ntuple_list.count = 0; 5584 dev->ethtool_ntuple_list.count = 0;
5462 INIT_LIST_HEAD(&dev->napi_list); 5585 INIT_LIST_HEAD(&dev->napi_list);
@@ -5467,12 +5590,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5467 strcpy(dev->name, name); 5590 strcpy(dev->name, name);
5468 return dev; 5591 return dev;
5469 5592
5470free_rx: 5593free_pcpu:
5471#ifdef CONFIG_RPS 5594 free_percpu(dev->pcpu_refcnt);
5472 kfree(rx);
5473free_tx:
5474#endif
5475 kfree(tx);
5476free_p: 5595free_p:
5477 kfree(p); 5596 kfree(p);
5478 return NULL; 5597 return NULL;
@@ -5495,6 +5614,8 @@ void free_netdev(struct net_device *dev)
5495 5614
5496 kfree(dev->_tx); 5615 kfree(dev->_tx);
5497 5616
5617 kfree(rcu_dereference_raw(dev->ingress_queue));
5618
5498 /* Flush device addresses */ 5619 /* Flush device addresses */
5499 dev_addr_flush(dev); 5620 dev_addr_flush(dev);
5500 5621
@@ -5504,6 +5625,9 @@ void free_netdev(struct net_device *dev)
5504 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) 5625 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5505 netif_napi_del(p); 5626 netif_napi_del(p);
5506 5627
5628 free_percpu(dev->pcpu_refcnt);
5629 dev->pcpu_refcnt = NULL;
5630
5507 /* Compatibility with error handling in drivers */ 5631 /* Compatibility with error handling in drivers */
5508 if (dev->reg_state == NETREG_UNINITIALIZED) { 5632 if (dev->reg_state == NETREG_UNINITIALIZED) {
5509 kfree((char *)dev - dev->padded); 5633 kfree((char *)dev - dev->padded);
@@ -5658,6 +5782,10 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5658 5782
5659 /* Notify protocols, that we are about to destroy 5783 /* Notify protocols, that we are about to destroy
5660 this device. They should clean all the things. 5784 this device. They should clean all the things.
5785
5786 Note that dev->reg_state stays at NETREG_REGISTERED.
5787 This is wanted because this way 8021q and macvlan know
5788 the device is just moving and can keep their slaves up.
5661 */ 5789 */
5662 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5790 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5663 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); 5791 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);