diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-10-30 04:43:08 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-10-30 04:43:08 -0400 |
commit | 169ed55bd30305b933f52bfab32a58671d44ab68 (patch) | |
tree | 32e280957474f458901abfce16fa2a1687ef7497 /net/core/dev.c | |
parent | 3d7851b3cdd43a734e5cc4c643fd886ab28ad4d5 (diff) | |
parent | 45f81b1c96d9793e47ce925d257ea693ce0b193e (diff) |
Merge branch 'tip/perf/jump-label-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace into perf/urgent
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 613 |
1 files changed, 369 insertions, 244 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 7ec85e27bee..35dfb831848 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -131,6 +131,7 @@ | |||
131 | #include <trace/events/net.h> | 131 | #include <trace/events/net.h> |
132 | #include <trace/events/skb.h> | 132 | #include <trace/events/skb.h> |
133 | #include <linux/pci.h> | 133 | #include <linux/pci.h> |
134 | #include <linux/inetdevice.h> | ||
134 | 135 | ||
135 | #include "net-sysfs.h" | 136 | #include "net-sysfs.h" |
136 | 137 | ||
@@ -373,6 +374,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) | |||
373 | * --ANK (980803) | 374 | * --ANK (980803) |
374 | */ | 375 | */ |
375 | 376 | ||
377 | static inline struct list_head *ptype_head(const struct packet_type *pt) | ||
378 | { | ||
379 | if (pt->type == htons(ETH_P_ALL)) | ||
380 | return &ptype_all; | ||
381 | else | ||
382 | return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; | ||
383 | } | ||
384 | |||
376 | /** | 385 | /** |
377 | * dev_add_pack - add packet handler | 386 | * dev_add_pack - add packet handler |
378 | * @pt: packet type declaration | 387 | * @pt: packet type declaration |
@@ -388,16 +397,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) | |||
388 | 397 | ||
389 | void dev_add_pack(struct packet_type *pt) | 398 | void dev_add_pack(struct packet_type *pt) |
390 | { | 399 | { |
391 | int hash; | 400 | struct list_head *head = ptype_head(pt); |
392 | 401 | ||
393 | spin_lock_bh(&ptype_lock); | 402 | spin_lock(&ptype_lock); |
394 | if (pt->type == htons(ETH_P_ALL)) | 403 | list_add_rcu(&pt->list, head); |
395 | list_add_rcu(&pt->list, &ptype_all); | 404 | spin_unlock(&ptype_lock); |
396 | else { | ||
397 | hash = ntohs(pt->type) & PTYPE_HASH_MASK; | ||
398 | list_add_rcu(&pt->list, &ptype_base[hash]); | ||
399 | } | ||
400 | spin_unlock_bh(&ptype_lock); | ||
401 | } | 405 | } |
402 | EXPORT_SYMBOL(dev_add_pack); | 406 | EXPORT_SYMBOL(dev_add_pack); |
403 | 407 | ||
@@ -416,15 +420,10 @@ EXPORT_SYMBOL(dev_add_pack); | |||
416 | */ | 420 | */ |
417 | void __dev_remove_pack(struct packet_type *pt) | 421 | void __dev_remove_pack(struct packet_type *pt) |
418 | { | 422 | { |
419 | struct list_head *head; | 423 | struct list_head *head = ptype_head(pt); |
420 | struct packet_type *pt1; | 424 | struct packet_type *pt1; |
421 | 425 | ||
422 | spin_lock_bh(&ptype_lock); | 426 | spin_lock(&ptype_lock); |
423 | |||
424 | if (pt->type == htons(ETH_P_ALL)) | ||
425 | head = &ptype_all; | ||
426 | else | ||
427 | head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; | ||
428 | 427 | ||
429 | list_for_each_entry(pt1, head, list) { | 428 | list_for_each_entry(pt1, head, list) { |
430 | if (pt == pt1) { | 429 | if (pt == pt1) { |
@@ -435,7 +434,7 @@ void __dev_remove_pack(struct packet_type *pt) | |||
435 | 434 | ||
436 | printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); | 435 | printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); |
437 | out: | 436 | out: |
438 | spin_unlock_bh(&ptype_lock); | 437 | spin_unlock(&ptype_lock); |
439 | } | 438 | } |
440 | EXPORT_SYMBOL(__dev_remove_pack); | 439 | EXPORT_SYMBOL(__dev_remove_pack); |
441 | 440 | ||
@@ -1486,8 +1485,9 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | |||
1486 | skb_orphan(skb); | 1485 | skb_orphan(skb); |
1487 | nf_reset(skb); | 1486 | nf_reset(skb); |
1488 | 1487 | ||
1489 | if (!(dev->flags & IFF_UP) || | 1488 | if (unlikely(!(dev->flags & IFF_UP) || |
1490 | (skb->len > (dev->mtu + dev->hard_header_len))) { | 1489 | (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) { |
1490 | atomic_long_inc(&dev->rx_dropped); | ||
1491 | kfree_skb(skb); | 1491 | kfree_skb(skb); |
1492 | return NET_RX_DROP; | 1492 | return NET_RX_DROP; |
1493 | } | 1493 | } |
@@ -1555,21 +1555,56 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1555 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues | 1555 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues |
1556 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. | 1556 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. |
1557 | */ | 1557 | */ |
1558 | void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) | 1558 | int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) |
1559 | { | 1559 | { |
1560 | unsigned int real_num = dev->real_num_tx_queues; | 1560 | if (txq < 1 || txq > dev->num_tx_queues) |
1561 | return -EINVAL; | ||
1561 | 1562 | ||
1562 | if (unlikely(txq > dev->num_tx_queues)) | 1563 | if (dev->reg_state == NETREG_REGISTERED) { |
1563 | ; | 1564 | ASSERT_RTNL(); |
1564 | else if (txq > real_num) | 1565 | |
1565 | dev->real_num_tx_queues = txq; | 1566 | if (txq < dev->real_num_tx_queues) |
1566 | else if (txq < real_num) { | 1567 | qdisc_reset_all_tx_gt(dev, txq); |
1567 | dev->real_num_tx_queues = txq; | ||
1568 | qdisc_reset_all_tx_gt(dev, txq); | ||
1569 | } | 1568 | } |
1569 | |||
1570 | dev->real_num_tx_queues = txq; | ||
1571 | return 0; | ||
1570 | } | 1572 | } |
1571 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); | 1573 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); |
1572 | 1574 | ||
1575 | #ifdef CONFIG_RPS | ||
1576 | /** | ||
1577 | * netif_set_real_num_rx_queues - set actual number of RX queues used | ||
1578 | * @dev: Network device | ||
1579 | * @rxq: Actual number of RX queues | ||
1580 | * | ||
1581 | * This must be called either with the rtnl_lock held or before | ||
1582 | * registration of the net device. Returns 0 on success, or a | ||
1583 | * negative error code. If called before registration, it always | ||
1584 | * succeeds. | ||
1585 | */ | ||
1586 | int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) | ||
1587 | { | ||
1588 | int rc; | ||
1589 | |||
1590 | if (rxq < 1 || rxq > dev->num_rx_queues) | ||
1591 | return -EINVAL; | ||
1592 | |||
1593 | if (dev->reg_state == NETREG_REGISTERED) { | ||
1594 | ASSERT_RTNL(); | ||
1595 | |||
1596 | rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, | ||
1597 | rxq); | ||
1598 | if (rc) | ||
1599 | return rc; | ||
1600 | } | ||
1601 | |||
1602 | dev->real_num_rx_queues = rxq; | ||
1603 | return 0; | ||
1604 | } | ||
1605 | EXPORT_SYMBOL(netif_set_real_num_rx_queues); | ||
1606 | #endif | ||
1607 | |||
1573 | static inline void __netif_reschedule(struct Qdisc *q) | 1608 | static inline void __netif_reschedule(struct Qdisc *q) |
1574 | { | 1609 | { |
1575 | struct softnet_data *sd; | 1610 | struct softnet_data *sd; |
@@ -1650,10 +1685,10 @@ EXPORT_SYMBOL(netif_device_attach); | |||
1650 | 1685 | ||
1651 | static bool can_checksum_protocol(unsigned long features, __be16 protocol) | 1686 | static bool can_checksum_protocol(unsigned long features, __be16 protocol) |
1652 | { | 1687 | { |
1653 | return ((features & NETIF_F_GEN_CSUM) || | 1688 | return ((features & NETIF_F_NO_CSUM) || |
1654 | ((features & NETIF_F_IP_CSUM) && | 1689 | ((features & NETIF_F_V4_CSUM) && |
1655 | protocol == htons(ETH_P_IP)) || | 1690 | protocol == htons(ETH_P_IP)) || |
1656 | ((features & NETIF_F_IPV6_CSUM) && | 1691 | ((features & NETIF_F_V6_CSUM) && |
1657 | protocol == htons(ETH_P_IPV6)) || | 1692 | protocol == htons(ETH_P_IPV6)) || |
1658 | ((features & NETIF_F_FCOE_CRC) && | 1693 | ((features & NETIF_F_FCOE_CRC) && |
1659 | protocol == htons(ETH_P_FCOE))); | 1694 | protocol == htons(ETH_P_FCOE))); |
@@ -1661,17 +1696,18 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) | |||
1661 | 1696 | ||
1662 | static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) | 1697 | static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) |
1663 | { | 1698 | { |
1664 | if (can_checksum_protocol(dev->features, skb->protocol)) | 1699 | __be16 protocol = skb->protocol; |
1665 | return true; | 1700 | int features = dev->features; |
1666 | 1701 | ||
1667 | if (skb->protocol == htons(ETH_P_8021Q)) { | 1702 | if (vlan_tx_tag_present(skb)) { |
1703 | features &= dev->vlan_features; | ||
1704 | } else if (protocol == htons(ETH_P_8021Q)) { | ||
1668 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; | 1705 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; |
1669 | if (can_checksum_protocol(dev->features & dev->vlan_features, | 1706 | protocol = veh->h_vlan_encapsulated_proto; |
1670 | veh->h_vlan_encapsulated_proto)) | 1707 | features &= dev->vlan_features; |
1671 | return true; | ||
1672 | } | 1708 | } |
1673 | 1709 | ||
1674 | return false; | 1710 | return can_checksum_protocol(features, protocol); |
1675 | } | 1711 | } |
1676 | 1712 | ||
1677 | /** | 1713 | /** |
@@ -1760,6 +1796,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) | |||
1760 | __be16 type = skb->protocol; | 1796 | __be16 type = skb->protocol; |
1761 | int err; | 1797 | int err; |
1762 | 1798 | ||
1799 | if (type == htons(ETH_P_8021Q)) { | ||
1800 | struct vlan_ethhdr *veh; | ||
1801 | |||
1802 | if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) | ||
1803 | return ERR_PTR(-EINVAL); | ||
1804 | |||
1805 | veh = (struct vlan_ethhdr *)skb->data; | ||
1806 | type = veh->h_vlan_encapsulated_proto; | ||
1807 | } | ||
1808 | |||
1763 | skb_reset_mac_header(skb); | 1809 | skb_reset_mac_header(skb); |
1764 | skb->mac_len = skb->network_header - skb->mac_header; | 1810 | skb->mac_len = skb->network_header - skb->mac_header; |
1765 | __skb_pull(skb, skb->mac_len); | 1811 | __skb_pull(skb, skb->mac_len); |
@@ -1904,14 +1950,14 @@ static int dev_gso_segment(struct sk_buff *skb) | |||
1904 | 1950 | ||
1905 | /* | 1951 | /* |
1906 | * Try to orphan skb early, right before transmission by the device. | 1952 | * Try to orphan skb early, right before transmission by the device. |
1907 | * We cannot orphan skb if tx timestamp is requested, since | 1953 | * We cannot orphan skb if tx timestamp is requested or the sk-reference |
1908 | * drivers need to call skb_tstamp_tx() to send the timestamp. | 1954 | * is needed on driver level for other reasons, e.g. see net/can/raw.c |
1909 | */ | 1955 | */ |
1910 | static inline void skb_orphan_try(struct sk_buff *skb) | 1956 | static inline void skb_orphan_try(struct sk_buff *skb) |
1911 | { | 1957 | { |
1912 | struct sock *sk = skb->sk; | 1958 | struct sock *sk = skb->sk; |
1913 | 1959 | ||
1914 | if (sk && !skb_tx(skb)->flags) { | 1960 | if (sk && !skb_shinfo(skb)->tx_flags) { |
1915 | /* skb_tx_hash() wont be able to get sk. | 1961 | /* skb_tx_hash() wont be able to get sk. |
1916 | * We copy sk_hash into skb->rxhash | 1962 | * We copy sk_hash into skb->rxhash |
1917 | */ | 1963 | */ |
@@ -1931,9 +1977,14 @@ static inline void skb_orphan_try(struct sk_buff *skb) | |||
1931 | static inline int skb_needs_linearize(struct sk_buff *skb, | 1977 | static inline int skb_needs_linearize(struct sk_buff *skb, |
1932 | struct net_device *dev) | 1978 | struct net_device *dev) |
1933 | { | 1979 | { |
1980 | int features = dev->features; | ||
1981 | |||
1982 | if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb)) | ||
1983 | features &= dev->vlan_features; | ||
1984 | |||
1934 | return skb_is_nonlinear(skb) && | 1985 | return skb_is_nonlinear(skb) && |
1935 | ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || | 1986 | ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) || |
1936 | (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || | 1987 | (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) || |
1937 | illegal_highdma(dev, skb)))); | 1988 | illegal_highdma(dev, skb)))); |
1938 | } | 1989 | } |
1939 | 1990 | ||
@@ -1956,6 +2007,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1956 | 2007 | ||
1957 | skb_orphan_try(skb); | 2008 | skb_orphan_try(skb); |
1958 | 2009 | ||
2010 | if (vlan_tx_tag_present(skb) && | ||
2011 | !(dev->features & NETIF_F_HW_VLAN_TX)) { | ||
2012 | skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); | ||
2013 | if (unlikely(!skb)) | ||
2014 | goto out; | ||
2015 | |||
2016 | skb->vlan_tci = 0; | ||
2017 | } | ||
2018 | |||
1959 | if (netif_needs_gso(dev, skb)) { | 2019 | if (netif_needs_gso(dev, skb)) { |
1960 | if (unlikely(dev_gso_segment(skb))) | 2020 | if (unlikely(dev_gso_segment(skb))) |
1961 | goto out_kfree_skb; | 2021 | goto out_kfree_skb; |
@@ -2019,6 +2079,7 @@ out_kfree_gso_skb: | |||
2019 | skb->destructor = DEV_GSO_CB(skb)->destructor; | 2079 | skb->destructor = DEV_GSO_CB(skb)->destructor; |
2020 | out_kfree_skb: | 2080 | out_kfree_skb: |
2021 | kfree_skb(skb); | 2081 | kfree_skb(skb); |
2082 | out: | ||
2022 | return rc; | 2083 | return rc; |
2023 | } | 2084 | } |
2024 | 2085 | ||
@@ -2147,6 +2208,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2147 | return rc; | 2208 | return rc; |
2148 | } | 2209 | } |
2149 | 2210 | ||
2211 | static DEFINE_PER_CPU(int, xmit_recursion); | ||
2212 | #define RECURSION_LIMIT 10 | ||
2213 | |||
2150 | /** | 2214 | /** |
2151 | * dev_queue_xmit - transmit a buffer | 2215 | * dev_queue_xmit - transmit a buffer |
2152 | * @skb: buffer to transmit | 2216 | * @skb: buffer to transmit |
@@ -2213,10 +2277,15 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2213 | 2277 | ||
2214 | if (txq->xmit_lock_owner != cpu) { | 2278 | if (txq->xmit_lock_owner != cpu) { |
2215 | 2279 | ||
2280 | if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) | ||
2281 | goto recursion_alert; | ||
2282 | |||
2216 | HARD_TX_LOCK(dev, txq, cpu); | 2283 | HARD_TX_LOCK(dev, txq, cpu); |
2217 | 2284 | ||
2218 | if (!netif_tx_queue_stopped(txq)) { | 2285 | if (!netif_tx_queue_stopped(txq)) { |
2286 | __this_cpu_inc(xmit_recursion); | ||
2219 | rc = dev_hard_start_xmit(skb, dev, txq); | 2287 | rc = dev_hard_start_xmit(skb, dev, txq); |
2288 | __this_cpu_dec(xmit_recursion); | ||
2220 | if (dev_xmit_complete(rc)) { | 2289 | if (dev_xmit_complete(rc)) { |
2221 | HARD_TX_UNLOCK(dev, txq); | 2290 | HARD_TX_UNLOCK(dev, txq); |
2222 | goto out; | 2291 | goto out; |
@@ -2228,7 +2297,9 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2228 | "queue packet!\n", dev->name); | 2297 | "queue packet!\n", dev->name); |
2229 | } else { | 2298 | } else { |
2230 | /* Recursion is detected! It is possible, | 2299 | /* Recursion is detected! It is possible, |
2231 | * unfortunately */ | 2300 | * unfortunately |
2301 | */ | ||
2302 | recursion_alert: | ||
2232 | if (net_ratelimit()) | 2303 | if (net_ratelimit()) |
2233 | printk(KERN_CRIT "Dead loop on virtual device " | 2304 | printk(KERN_CRIT "Dead loop on virtual device " |
2234 | "%s, fix it urgently!\n", dev->name); | 2305 | "%s, fix it urgently!\n", dev->name); |
@@ -2264,69 +2335,44 @@ static inline void ____napi_schedule(struct softnet_data *sd, | |||
2264 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | 2335 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
2265 | } | 2336 | } |
2266 | 2337 | ||
2267 | #ifdef CONFIG_RPS | ||
2268 | |||
2269 | /* One global table that all flow-based protocols share. */ | ||
2270 | struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; | ||
2271 | EXPORT_SYMBOL(rps_sock_flow_table); | ||
2272 | |||
2273 | /* | 2338 | /* |
2274 | * get_rps_cpu is called from netif_receive_skb and returns the target | 2339 | * __skb_get_rxhash: calculate a flow hash based on src/dst addresses |
2275 | * CPU from the RPS map of the receiving queue for a given skb. | 2340 | * and src/dst port numbers. Returns a non-zero hash number on success |
2276 | * rcu_read_lock must be held on entry. | 2341 | * and 0 on failure. |
2277 | */ | 2342 | */ |
2278 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 2343 | __u32 __skb_get_rxhash(struct sk_buff *skb) |
2279 | struct rps_dev_flow **rflowp) | ||
2280 | { | 2344 | { |
2345 | int nhoff, hash = 0, poff; | ||
2281 | struct ipv6hdr *ip6; | 2346 | struct ipv6hdr *ip6; |
2282 | struct iphdr *ip; | 2347 | struct iphdr *ip; |
2283 | struct netdev_rx_queue *rxqueue; | ||
2284 | struct rps_map *map; | ||
2285 | struct rps_dev_flow_table *flow_table; | ||
2286 | struct rps_sock_flow_table *sock_flow_table; | ||
2287 | int cpu = -1; | ||
2288 | u8 ip_proto; | 2348 | u8 ip_proto; |
2289 | u16 tcpu; | ||
2290 | u32 addr1, addr2, ihl; | 2349 | u32 addr1, addr2, ihl; |
2291 | union { | 2350 | union { |
2292 | u32 v32; | 2351 | u32 v32; |
2293 | u16 v16[2]; | 2352 | u16 v16[2]; |
2294 | } ports; | 2353 | } ports; |
2295 | 2354 | ||
2296 | if (skb_rx_queue_recorded(skb)) { | 2355 | nhoff = skb_network_offset(skb); |
2297 | u16 index = skb_get_rx_queue(skb); | ||
2298 | if (unlikely(index >= dev->num_rx_queues)) { | ||
2299 | WARN_ONCE(dev->num_rx_queues > 1, "%s received packet " | ||
2300 | "on queue %u, but number of RX queues is %u\n", | ||
2301 | dev->name, index, dev->num_rx_queues); | ||
2302 | goto done; | ||
2303 | } | ||
2304 | rxqueue = dev->_rx + index; | ||
2305 | } else | ||
2306 | rxqueue = dev->_rx; | ||
2307 | |||
2308 | if (!rxqueue->rps_map && !rxqueue->rps_flow_table) | ||
2309 | goto done; | ||
2310 | |||
2311 | if (skb->rxhash) | ||
2312 | goto got_hash; /* Skip hash computation on packet header */ | ||
2313 | 2356 | ||
2314 | switch (skb->protocol) { | 2357 | switch (skb->protocol) { |
2315 | case __constant_htons(ETH_P_IP): | 2358 | case __constant_htons(ETH_P_IP): |
2316 | if (!pskb_may_pull(skb, sizeof(*ip))) | 2359 | if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) |
2317 | goto done; | 2360 | goto done; |
2318 | 2361 | ||
2319 | ip = (struct iphdr *) skb->data; | 2362 | ip = (struct iphdr *) (skb->data + nhoff); |
2320 | ip_proto = ip->protocol; | 2363 | if (ip->frag_off & htons(IP_MF | IP_OFFSET)) |
2364 | ip_proto = 0; | ||
2365 | else | ||
2366 | ip_proto = ip->protocol; | ||
2321 | addr1 = (__force u32) ip->saddr; | 2367 | addr1 = (__force u32) ip->saddr; |
2322 | addr2 = (__force u32) ip->daddr; | 2368 | addr2 = (__force u32) ip->daddr; |
2323 | ihl = ip->ihl; | 2369 | ihl = ip->ihl; |
2324 | break; | 2370 | break; |
2325 | case __constant_htons(ETH_P_IPV6): | 2371 | case __constant_htons(ETH_P_IPV6): |
2326 | if (!pskb_may_pull(skb, sizeof(*ip6))) | 2372 | if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) |
2327 | goto done; | 2373 | goto done; |
2328 | 2374 | ||
2329 | ip6 = (struct ipv6hdr *) skb->data; | 2375 | ip6 = (struct ipv6hdr *) (skb->data + nhoff); |
2330 | ip_proto = ip6->nexthdr; | 2376 | ip_proto = ip6->nexthdr; |
2331 | addr1 = (__force u32) ip6->saddr.s6_addr32[3]; | 2377 | addr1 = (__force u32) ip6->saddr.s6_addr32[3]; |
2332 | addr2 = (__force u32) ip6->daddr.s6_addr32[3]; | 2378 | addr2 = (__force u32) ip6->daddr.s6_addr32[3]; |
@@ -2335,33 +2381,81 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
2335 | default: | 2381 | default: |
2336 | goto done; | 2382 | goto done; |
2337 | } | 2383 | } |
2338 | switch (ip_proto) { | 2384 | |
2339 | case IPPROTO_TCP: | 2385 | ports.v32 = 0; |
2340 | case IPPROTO_UDP: | 2386 | poff = proto_ports_offset(ip_proto); |
2341 | case IPPROTO_DCCP: | 2387 | if (poff >= 0) { |
2342 | case IPPROTO_ESP: | 2388 | nhoff += ihl * 4 + poff; |
2343 | case IPPROTO_AH: | 2389 | if (pskb_may_pull(skb, nhoff + 4)) { |
2344 | case IPPROTO_SCTP: | 2390 | ports.v32 = * (__force u32 *) (skb->data + nhoff); |
2345 | case IPPROTO_UDPLITE: | ||
2346 | if (pskb_may_pull(skb, (ihl * 4) + 4)) { | ||
2347 | ports.v32 = * (__force u32 *) (skb->data + (ihl * 4)); | ||
2348 | if (ports.v16[1] < ports.v16[0]) | 2391 | if (ports.v16[1] < ports.v16[0]) |
2349 | swap(ports.v16[0], ports.v16[1]); | 2392 | swap(ports.v16[0], ports.v16[1]); |
2350 | break; | ||
2351 | } | 2393 | } |
2352 | default: | ||
2353 | ports.v32 = 0; | ||
2354 | break; | ||
2355 | } | 2394 | } |
2356 | 2395 | ||
2357 | /* get a consistent hash (same value on both flow directions) */ | 2396 | /* get a consistent hash (same value on both flow directions) */ |
2358 | if (addr2 < addr1) | 2397 | if (addr2 < addr1) |
2359 | swap(addr1, addr2); | 2398 | swap(addr1, addr2); |
2360 | skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd); | ||
2361 | if (!skb->rxhash) | ||
2362 | skb->rxhash = 1; | ||
2363 | 2399 | ||
2364 | got_hash: | 2400 | hash = jhash_3words(addr1, addr2, ports.v32, hashrnd); |
2401 | if (!hash) | ||
2402 | hash = 1; | ||
2403 | |||
2404 | done: | ||
2405 | return hash; | ||
2406 | } | ||
2407 | EXPORT_SYMBOL(__skb_get_rxhash); | ||
2408 | |||
2409 | #ifdef CONFIG_RPS | ||
2410 | |||
2411 | /* One global table that all flow-based protocols share. */ | ||
2412 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; | ||
2413 | EXPORT_SYMBOL(rps_sock_flow_table); | ||
2414 | |||
2415 | /* | ||
2416 | * get_rps_cpu is called from netif_receive_skb and returns the target | ||
2417 | * CPU from the RPS map of the receiving queue for a given skb. | ||
2418 | * rcu_read_lock must be held on entry. | ||
2419 | */ | ||
2420 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||
2421 | struct rps_dev_flow **rflowp) | ||
2422 | { | ||
2423 | struct netdev_rx_queue *rxqueue; | ||
2424 | struct rps_map *map; | ||
2425 | struct rps_dev_flow_table *flow_table; | ||
2426 | struct rps_sock_flow_table *sock_flow_table; | ||
2427 | int cpu = -1; | ||
2428 | u16 tcpu; | ||
2429 | |||
2430 | if (skb_rx_queue_recorded(skb)) { | ||
2431 | u16 index = skb_get_rx_queue(skb); | ||
2432 | if (unlikely(index >= dev->real_num_rx_queues)) { | ||
2433 | WARN_ONCE(dev->real_num_rx_queues > 1, | ||
2434 | "%s received packet on queue %u, but number " | ||
2435 | "of RX queues is %u\n", | ||
2436 | dev->name, index, dev->real_num_rx_queues); | ||
2437 | goto done; | ||
2438 | } | ||
2439 | rxqueue = dev->_rx + index; | ||
2440 | } else | ||
2441 | rxqueue = dev->_rx; | ||
2442 | |||
2443 | map = rcu_dereference(rxqueue->rps_map); | ||
2444 | if (map) { | ||
2445 | if (map->len == 1) { | ||
2446 | tcpu = map->cpus[0]; | ||
2447 | if (cpu_online(tcpu)) | ||
2448 | cpu = tcpu; | ||
2449 | goto done; | ||
2450 | } | ||
2451 | } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) { | ||
2452 | goto done; | ||
2453 | } | ||
2454 | |||
2455 | skb_reset_network_header(skb); | ||
2456 | if (!skb_get_rxhash(skb)) | ||
2457 | goto done; | ||
2458 | |||
2365 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | 2459 | flow_table = rcu_dereference(rxqueue->rps_flow_table); |
2366 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | 2460 | sock_flow_table = rcu_dereference(rps_sock_flow_table); |
2367 | if (flow_table && sock_flow_table) { | 2461 | if (flow_table && sock_flow_table) { |
@@ -2401,7 +2495,6 @@ got_hash: | |||
2401 | } | 2495 | } |
2402 | } | 2496 | } |
2403 | 2497 | ||
2404 | map = rcu_dereference(rxqueue->rps_map); | ||
2405 | if (map) { | 2498 | if (map) { |
2406 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; | 2499 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; |
2407 | 2500 | ||
@@ -2487,6 +2580,7 @@ enqueue: | |||
2487 | 2580 | ||
2488 | local_irq_restore(flags); | 2581 | local_irq_restore(flags); |
2489 | 2582 | ||
2583 | atomic_long_inc(&skb->dev->rx_dropped); | ||
2490 | kfree_skb(skb); | 2584 | kfree_skb(skb); |
2491 | return NET_RX_DROP; | 2585 | return NET_RX_DROP; |
2492 | } | 2586 | } |
@@ -2643,11 +2737,10 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); | |||
2643 | * the ingress scheduler, you just cant add policies on ingress. | 2737 | * the ingress scheduler, you just cant add policies on ingress. |
2644 | * | 2738 | * |
2645 | */ | 2739 | */ |
2646 | static int ing_filter(struct sk_buff *skb) | 2740 | static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) |
2647 | { | 2741 | { |
2648 | struct net_device *dev = skb->dev; | 2742 | struct net_device *dev = skb->dev; |
2649 | u32 ttl = G_TC_RTTL(skb->tc_verd); | 2743 | u32 ttl = G_TC_RTTL(skb->tc_verd); |
2650 | struct netdev_queue *rxq; | ||
2651 | int result = TC_ACT_OK; | 2744 | int result = TC_ACT_OK; |
2652 | struct Qdisc *q; | 2745 | struct Qdisc *q; |
2653 | 2746 | ||
@@ -2661,8 +2754,6 @@ static int ing_filter(struct sk_buff *skb) | |||
2661 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); | 2754 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); |
2662 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); | 2755 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); |
2663 | 2756 | ||
2664 | rxq = &dev->rx_queue; | ||
2665 | |||
2666 | q = rxq->qdisc; | 2757 | q = rxq->qdisc; |
2667 | if (q != &noop_qdisc) { | 2758 | if (q != &noop_qdisc) { |
2668 | spin_lock(qdisc_lock(q)); | 2759 | spin_lock(qdisc_lock(q)); |
@@ -2678,7 +2769,9 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, | |||
2678 | struct packet_type **pt_prev, | 2769 | struct packet_type **pt_prev, |
2679 | int *ret, struct net_device *orig_dev) | 2770 | int *ret, struct net_device *orig_dev) |
2680 | { | 2771 | { |
2681 | if (skb->dev->rx_queue.qdisc == &noop_qdisc) | 2772 | struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); |
2773 | |||
2774 | if (!rxq || rxq->qdisc == &noop_qdisc) | ||
2682 | goto out; | 2775 | goto out; |
2683 | 2776 | ||
2684 | if (*pt_prev) { | 2777 | if (*pt_prev) { |
@@ -2686,7 +2779,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, | |||
2686 | *pt_prev = NULL; | 2779 | *pt_prev = NULL; |
2687 | } | 2780 | } |
2688 | 2781 | ||
2689 | switch (ing_filter(skb)) { | 2782 | switch (ing_filter(skb, rxq)) { |
2690 | case TC_ACT_SHOT: | 2783 | case TC_ACT_SHOT: |
2691 | case TC_ACT_STOLEN: | 2784 | case TC_ACT_STOLEN: |
2692 | kfree_skb(skb); | 2785 | kfree_skb(skb); |
@@ -2699,33 +2792,6 @@ out: | |||
2699 | } | 2792 | } |
2700 | #endif | 2793 | #endif |
2701 | 2794 | ||
2702 | /* | ||
2703 | * netif_nit_deliver - deliver received packets to network taps | ||
2704 | * @skb: buffer | ||
2705 | * | ||
2706 | * This function is used to deliver incoming packets to network | ||
2707 | * taps. It should be used when the normal netif_receive_skb path | ||
2708 | * is bypassed, for example because of VLAN acceleration. | ||
2709 | */ | ||
2710 | void netif_nit_deliver(struct sk_buff *skb) | ||
2711 | { | ||
2712 | struct packet_type *ptype; | ||
2713 | |||
2714 | if (list_empty(&ptype_all)) | ||
2715 | return; | ||
2716 | |||
2717 | skb_reset_network_header(skb); | ||
2718 | skb_reset_transport_header(skb); | ||
2719 | skb->mac_len = skb->network_header - skb->mac_header; | ||
2720 | |||
2721 | rcu_read_lock(); | ||
2722 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | ||
2723 | if (!ptype->dev || ptype->dev == skb->dev) | ||
2724 | deliver_skb(skb, ptype, skb->dev); | ||
2725 | } | ||
2726 | rcu_read_unlock(); | ||
2727 | } | ||
2728 | |||
2729 | /** | 2795 | /** |
2730 | * netdev_rx_handler_register - register receive handler | 2796 | * netdev_rx_handler_register - register receive handler |
2731 | * @dev: device to register a handler for | 2797 | * @dev: device to register a handler for |
@@ -2836,8 +2902,6 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
2836 | net_timestamp_check(skb); | 2902 | net_timestamp_check(skb); |
2837 | 2903 | ||
2838 | trace_netif_receive_skb(skb); | 2904 | trace_netif_receive_skb(skb); |
2839 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) | ||
2840 | return NET_RX_SUCCESS; | ||
2841 | 2905 | ||
2842 | /* if we've gotten here through NAPI, check netpoll */ | 2906 | /* if we've gotten here through NAPI, check netpoll */ |
2843 | if (netpoll_receive_skb(skb)) | 2907 | if (netpoll_receive_skb(skb)) |
@@ -2851,8 +2915,7 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
2851 | * be delivered to pkt handlers that are exact matches. Also | 2915 | * be delivered to pkt handlers that are exact matches. Also |
2852 | * the deliver_no_wcard flag will be set. If packet handlers | 2916 | * the deliver_no_wcard flag will be set. If packet handlers |
2853 | * are sensitive to duplicate packets these skbs will need to | 2917 | * are sensitive to duplicate packets these skbs will need to |
2854 | * be dropped at the handler. The vlan accel path may have | 2918 | * be dropped at the handler. |
2855 | * already set the deliver_no_wcard flag. | ||
2856 | */ | 2919 | */ |
2857 | null_or_orig = NULL; | 2920 | null_or_orig = NULL; |
2858 | orig_dev = skb->dev; | 2921 | orig_dev = skb->dev; |
@@ -2911,6 +2974,18 @@ ncls: | |||
2911 | goto out; | 2974 | goto out; |
2912 | } | 2975 | } |
2913 | 2976 | ||
2977 | if (vlan_tx_tag_present(skb)) { | ||
2978 | if (pt_prev) { | ||
2979 | ret = deliver_skb(skb, pt_prev, orig_dev); | ||
2980 | pt_prev = NULL; | ||
2981 | } | ||
2982 | if (vlan_hwaccel_do_receive(&skb)) { | ||
2983 | ret = __netif_receive_skb(skb); | ||
2984 | goto out; | ||
2985 | } else if (unlikely(!skb)) | ||
2986 | goto out; | ||
2987 | } | ||
2988 | |||
2914 | /* | 2989 | /* |
2915 | * Make sure frames received on VLAN interfaces stacked on | 2990 | * Make sure frames received on VLAN interfaces stacked on |
2916 | * bonding interfaces still make their way to any base bonding | 2991 | * bonding interfaces still make their way to any base bonding |
@@ -2938,6 +3013,7 @@ ncls: | |||
2938 | if (pt_prev) { | 3013 | if (pt_prev) { |
2939 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | 3014 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
2940 | } else { | 3015 | } else { |
3016 | atomic_long_inc(&skb->dev->rx_dropped); | ||
2941 | kfree_skb(skb); | 3017 | kfree_skb(skb); |
2942 | /* Jamal, now you will not able to escape explaining | 3018 | /* Jamal, now you will not able to escape explaining |
2943 | * me how you were going to use this. :-) | 3019 | * me how you were going to use this. :-) |
@@ -3058,7 +3134,7 @@ out: | |||
3058 | return netif_receive_skb(skb); | 3134 | return netif_receive_skb(skb); |
3059 | } | 3135 | } |
3060 | 3136 | ||
3061 | static void napi_gro_flush(struct napi_struct *napi) | 3137 | inline void napi_gro_flush(struct napi_struct *napi) |
3062 | { | 3138 | { |
3063 | struct sk_buff *skb, *next; | 3139 | struct sk_buff *skb, *next; |
3064 | 3140 | ||
@@ -3071,6 +3147,7 @@ static void napi_gro_flush(struct napi_struct *napi) | |||
3071 | napi->gro_count = 0; | 3147 | napi->gro_count = 0; |
3072 | napi->gro_list = NULL; | 3148 | napi->gro_list = NULL; |
3073 | } | 3149 | } |
3150 | EXPORT_SYMBOL(napi_gro_flush); | ||
3074 | 3151 | ||
3075 | enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 3152 | enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
3076 | { | 3153 | { |
@@ -3085,7 +3162,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
3085 | if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) | 3162 | if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) |
3086 | goto normal; | 3163 | goto normal; |
3087 | 3164 | ||
3088 | if (skb_is_gso(skb) || skb_has_frags(skb)) | 3165 | if (skb_is_gso(skb) || skb_has_frag_list(skb)) |
3089 | goto normal; | 3166 | goto normal; |
3090 | 3167 | ||
3091 | rcu_read_lock(); | 3168 | rcu_read_lock(); |
@@ -3164,16 +3241,19 @@ normal: | |||
3164 | } | 3241 | } |
3165 | EXPORT_SYMBOL(dev_gro_receive); | 3242 | EXPORT_SYMBOL(dev_gro_receive); |
3166 | 3243 | ||
3167 | static gro_result_t | 3244 | static inline gro_result_t |
3168 | __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 3245 | __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
3169 | { | 3246 | { |
3170 | struct sk_buff *p; | 3247 | struct sk_buff *p; |
3171 | 3248 | ||
3172 | for (p = napi->gro_list; p; p = p->next) { | 3249 | for (p = napi->gro_list; p; p = p->next) { |
3173 | NAPI_GRO_CB(p)->same_flow = | 3250 | unsigned long diffs; |
3174 | (p->dev == skb->dev) && | 3251 | |
3175 | !compare_ether_header(skb_mac_header(p), | 3252 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; |
3253 | diffs |= p->vlan_tci ^ skb->vlan_tci; | ||
3254 | diffs |= compare_ether_header(skb_mac_header(p), | ||
3176 | skb_gro_mac_header(skb)); | 3255 | skb_gro_mac_header(skb)); |
3256 | NAPI_GRO_CB(p)->same_flow = !diffs; | ||
3177 | NAPI_GRO_CB(p)->flush = 0; | 3257 | NAPI_GRO_CB(p)->flush = 0; |
3178 | } | 3258 | } |
3179 | 3259 | ||
@@ -3226,14 +3306,14 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
3226 | } | 3306 | } |
3227 | EXPORT_SYMBOL(napi_gro_receive); | 3307 | EXPORT_SYMBOL(napi_gro_receive); |
3228 | 3308 | ||
3229 | void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) | 3309 | static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) |
3230 | { | 3310 | { |
3231 | __skb_pull(skb, skb_headlen(skb)); | 3311 | __skb_pull(skb, skb_headlen(skb)); |
3232 | skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); | 3312 | skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); |
3313 | skb->vlan_tci = 0; | ||
3233 | 3314 | ||
3234 | napi->skb = skb; | 3315 | napi->skb = skb; |
3235 | } | 3316 | } |
3236 | EXPORT_SYMBOL(napi_reuse_skb); | ||
3237 | 3317 | ||
3238 | struct sk_buff *napi_get_frags(struct napi_struct *napi) | 3318 | struct sk_buff *napi_get_frags(struct napi_struct *napi) |
3239 | { | 3319 | { |
@@ -4867,21 +4947,6 @@ static void rollback_registered(struct net_device *dev) | |||
4867 | rollback_registered_many(&single); | 4947 | rollback_registered_many(&single); |
4868 | } | 4948 | } |
4869 | 4949 | ||
4870 | static void __netdev_init_queue_locks_one(struct net_device *dev, | ||
4871 | struct netdev_queue *dev_queue, | ||
4872 | void *_unused) | ||
4873 | { | ||
4874 | spin_lock_init(&dev_queue->_xmit_lock); | ||
4875 | netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); | ||
4876 | dev_queue->xmit_lock_owner = -1; | ||
4877 | } | ||
4878 | |||
4879 | static void netdev_init_queue_locks(struct net_device *dev) | ||
4880 | { | ||
4881 | netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); | ||
4882 | __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); | ||
4883 | } | ||
4884 | |||
4885 | unsigned long netdev_fix_features(unsigned long features, const char *name) | 4950 | unsigned long netdev_fix_features(unsigned long features, const char *name) |
4886 | { | 4951 | { |
4887 | /* Fix illegal SG+CSUM combinations. */ | 4952 | /* Fix illegal SG+CSUM combinations. */ |
@@ -4949,6 +5014,66 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, | |||
4949 | } | 5014 | } |
4950 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); | 5015 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); |
4951 | 5016 | ||
5017 | static int netif_alloc_rx_queues(struct net_device *dev) | ||
5018 | { | ||
5019 | #ifdef CONFIG_RPS | ||
5020 | unsigned int i, count = dev->num_rx_queues; | ||
5021 | struct netdev_rx_queue *rx; | ||
5022 | |||
5023 | BUG_ON(count < 1); | ||
5024 | |||
5025 | rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
5026 | if (!rx) { | ||
5027 | pr_err("netdev: Unable to allocate %u rx queues.\n", count); | ||
5028 | return -ENOMEM; | ||
5029 | } | ||
5030 | dev->_rx = rx; | ||
5031 | |||
5032 | /* | ||
5033 | * Set a pointer to first element in the array which holds the | ||
5034 | * reference count. | ||
5035 | */ | ||
5036 | for (i = 0; i < count; i++) | ||
5037 | rx[i].first = rx; | ||
5038 | #endif | ||
5039 | return 0; | ||
5040 | } | ||
5041 | |||
5042 | static int netif_alloc_netdev_queues(struct net_device *dev) | ||
5043 | { | ||
5044 | unsigned int count = dev->num_tx_queues; | ||
5045 | struct netdev_queue *tx; | ||
5046 | |||
5047 | BUG_ON(count < 1); | ||
5048 | |||
5049 | tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); | ||
5050 | if (!tx) { | ||
5051 | pr_err("netdev: Unable to allocate %u tx queues.\n", | ||
5052 | count); | ||
5053 | return -ENOMEM; | ||
5054 | } | ||
5055 | dev->_tx = tx; | ||
5056 | return 0; | ||
5057 | } | ||
5058 | |||
5059 | static void netdev_init_one_queue(struct net_device *dev, | ||
5060 | struct netdev_queue *queue, | ||
5061 | void *_unused) | ||
5062 | { | ||
5063 | queue->dev = dev; | ||
5064 | |||
5065 | /* Initialize queue lock */ | ||
5066 | spin_lock_init(&queue->_xmit_lock); | ||
5067 | netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); | ||
5068 | queue->xmit_lock_owner = -1; | ||
5069 | } | ||
5070 | |||
5071 | static void netdev_init_queues(struct net_device *dev) | ||
5072 | { | ||
5073 | netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); | ||
5074 | spin_lock_init(&dev->tx_global_lock); | ||
5075 | } | ||
5076 | |||
4952 | /** | 5077 | /** |
4953 | * register_netdevice - register a network device | 5078 | * register_netdevice - register a network device |
4954 | * @dev: device to register | 5079 | * @dev: device to register |
@@ -4982,28 +5107,19 @@ int register_netdevice(struct net_device *dev) | |||
4982 | 5107 | ||
4983 | spin_lock_init(&dev->addr_list_lock); | 5108 | spin_lock_init(&dev->addr_list_lock); |
4984 | netdev_set_addr_lockdep_class(dev); | 5109 | netdev_set_addr_lockdep_class(dev); |
4985 | netdev_init_queue_locks(dev); | ||
4986 | 5110 | ||
4987 | dev->iflink = -1; | 5111 | dev->iflink = -1; |
4988 | 5112 | ||
4989 | #ifdef CONFIG_RPS | 5113 | ret = netif_alloc_rx_queues(dev); |
4990 | if (!dev->num_rx_queues) { | 5114 | if (ret) |
4991 | /* | 5115 | goto out; |
4992 | * Allocate a single RX queue if driver never called | ||
4993 | * alloc_netdev_mq | ||
4994 | */ | ||
4995 | 5116 | ||
4996 | dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); | 5117 | ret = netif_alloc_netdev_queues(dev); |
4997 | if (!dev->_rx) { | 5118 | if (ret) |
4998 | ret = -ENOMEM; | 5119 | goto out; |
4999 | goto out; | 5120 | |
5000 | } | 5121 | netdev_init_queues(dev); |
5001 | 5122 | ||
5002 | dev->_rx->first = dev->_rx; | ||
5003 | atomic_set(&dev->_rx->count, 1); | ||
5004 | dev->num_rx_queues = 1; | ||
5005 | } | ||
5006 | #endif | ||
5007 | /* Init, if this function is available */ | 5123 | /* Init, if this function is available */ |
5008 | if (dev->netdev_ops->ndo_init) { | 5124 | if (dev->netdev_ops->ndo_init) { |
5009 | ret = dev->netdev_ops->ndo_init(dev); | 5125 | ret = dev->netdev_ops->ndo_init(dev); |
@@ -5043,6 +5159,12 @@ int register_netdevice(struct net_device *dev) | |||
5043 | if (dev->features & NETIF_F_SG) | 5159 | if (dev->features & NETIF_F_SG) |
5044 | dev->features |= NETIF_F_GSO; | 5160 | dev->features |= NETIF_F_GSO; |
5045 | 5161 | ||
5162 | /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, | ||
5163 | * vlan_dev_init() will do the dev->features check, so these features | ||
5164 | * are enabled only if supported by underlying device. | ||
5165 | */ | ||
5166 | dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA); | ||
5167 | |||
5046 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); | 5168 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); |
5047 | ret = notifier_to_errno(ret); | 5169 | ret = notifier_to_errno(ret); |
5048 | if (ret) | 5170 | if (ret) |
@@ -5113,9 +5235,6 @@ int init_dummy_netdev(struct net_device *dev) | |||
5113 | */ | 5235 | */ |
5114 | dev->reg_state = NETREG_DUMMY; | 5236 | dev->reg_state = NETREG_DUMMY; |
5115 | 5237 | ||
5116 | /* initialize the ref count */ | ||
5117 | atomic_set(&dev->refcnt, 1); | ||
5118 | |||
5119 | /* NAPI wants this */ | 5238 | /* NAPI wants this */ |
5120 | INIT_LIST_HEAD(&dev->napi_list); | 5239 | INIT_LIST_HEAD(&dev->napi_list); |
5121 | 5240 | ||
@@ -5123,6 +5242,11 @@ int init_dummy_netdev(struct net_device *dev) | |||
5123 | set_bit(__LINK_STATE_PRESENT, &dev->state); | 5242 | set_bit(__LINK_STATE_PRESENT, &dev->state); |
5124 | set_bit(__LINK_STATE_START, &dev->state); | 5243 | set_bit(__LINK_STATE_START, &dev->state); |
5125 | 5244 | ||
5245 | /* Note : We dont allocate pcpu_refcnt for dummy devices, | ||
5246 | * because users of this 'device' dont need to change | ||
5247 | * its refcount. | ||
5248 | */ | ||
5249 | |||
5126 | return 0; | 5250 | return 0; |
5127 | } | 5251 | } |
5128 | EXPORT_SYMBOL_GPL(init_dummy_netdev); | 5252 | EXPORT_SYMBOL_GPL(init_dummy_netdev); |
@@ -5164,6 +5288,16 @@ out: | |||
5164 | } | 5288 | } |
5165 | EXPORT_SYMBOL(register_netdev); | 5289 | EXPORT_SYMBOL(register_netdev); |
5166 | 5290 | ||
5291 | int netdev_refcnt_read(const struct net_device *dev) | ||
5292 | { | ||
5293 | int i, refcnt = 0; | ||
5294 | |||
5295 | for_each_possible_cpu(i) | ||
5296 | refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i); | ||
5297 | return refcnt; | ||
5298 | } | ||
5299 | EXPORT_SYMBOL(netdev_refcnt_read); | ||
5300 | |||
5167 | /* | 5301 | /* |
5168 | * netdev_wait_allrefs - wait until all references are gone. | 5302 | * netdev_wait_allrefs - wait until all references are gone. |
5169 | * | 5303 | * |
@@ -5178,11 +5312,14 @@ EXPORT_SYMBOL(register_netdev); | |||
5178 | static void netdev_wait_allrefs(struct net_device *dev) | 5312 | static void netdev_wait_allrefs(struct net_device *dev) |
5179 | { | 5313 | { |
5180 | unsigned long rebroadcast_time, warning_time; | 5314 | unsigned long rebroadcast_time, warning_time; |
5315 | int refcnt; | ||
5181 | 5316 | ||
5182 | linkwatch_forget_dev(dev); | 5317 | linkwatch_forget_dev(dev); |
5183 | 5318 | ||
5184 | rebroadcast_time = warning_time = jiffies; | 5319 | rebroadcast_time = warning_time = jiffies; |
5185 | while (atomic_read(&dev->refcnt) != 0) { | 5320 | refcnt = netdev_refcnt_read(dev); |
5321 | |||
5322 | while (refcnt != 0) { | ||
5186 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { | 5323 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { |
5187 | rtnl_lock(); | 5324 | rtnl_lock(); |
5188 | 5325 | ||
@@ -5209,11 +5346,13 @@ static void netdev_wait_allrefs(struct net_device *dev) | |||
5209 | 5346 | ||
5210 | msleep(250); | 5347 | msleep(250); |
5211 | 5348 | ||
5349 | refcnt = netdev_refcnt_read(dev); | ||
5350 | |||
5212 | if (time_after(jiffies, warning_time + 10 * HZ)) { | 5351 | if (time_after(jiffies, warning_time + 10 * HZ)) { |
5213 | printk(KERN_EMERG "unregister_netdevice: " | 5352 | printk(KERN_EMERG "unregister_netdevice: " |
5214 | "waiting for %s to become free. Usage " | 5353 | "waiting for %s to become free. Usage " |
5215 | "count = %d\n", | 5354 | "count = %d\n", |
5216 | dev->name, atomic_read(&dev->refcnt)); | 5355 | dev->name, refcnt); |
5217 | warning_time = jiffies; | 5356 | warning_time = jiffies; |
5218 | } | 5357 | } |
5219 | } | 5358 | } |
@@ -5271,9 +5410,9 @@ void netdev_run_todo(void) | |||
5271 | netdev_wait_allrefs(dev); | 5410 | netdev_wait_allrefs(dev); |
5272 | 5411 | ||
5273 | /* paranoia */ | 5412 | /* paranoia */ |
5274 | BUG_ON(atomic_read(&dev->refcnt)); | 5413 | BUG_ON(netdev_refcnt_read(dev)); |
5275 | WARN_ON(dev->ip_ptr); | 5414 | WARN_ON(rcu_dereference_raw(dev->ip_ptr)); |
5276 | WARN_ON(dev->ip6_ptr); | 5415 | WARN_ON(rcu_dereference_raw(dev->ip6_ptr)); |
5277 | WARN_ON(dev->dn_ptr); | 5416 | WARN_ON(dev->dn_ptr); |
5278 | 5417 | ||
5279 | if (dev->destructor) | 5418 | if (dev->destructor) |
@@ -5350,30 +5489,34 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, | |||
5350 | 5489 | ||
5351 | if (ops->ndo_get_stats64) { | 5490 | if (ops->ndo_get_stats64) { |
5352 | memset(storage, 0, sizeof(*storage)); | 5491 | memset(storage, 0, sizeof(*storage)); |
5353 | return ops->ndo_get_stats64(dev, storage); | 5492 | ops->ndo_get_stats64(dev, storage); |
5354 | } | 5493 | } else if (ops->ndo_get_stats) { |
5355 | if (ops->ndo_get_stats) { | ||
5356 | netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); | 5494 | netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); |
5357 | return storage; | 5495 | } else { |
5496 | netdev_stats_to_stats64(storage, &dev->stats); | ||
5497 | dev_txq_stats_fold(dev, storage); | ||
5358 | } | 5498 | } |
5359 | netdev_stats_to_stats64(storage, &dev->stats); | 5499 | storage->rx_dropped += atomic_long_read(&dev->rx_dropped); |
5360 | dev_txq_stats_fold(dev, storage); | ||
5361 | return storage; | 5500 | return storage; |
5362 | } | 5501 | } |
5363 | EXPORT_SYMBOL(dev_get_stats); | 5502 | EXPORT_SYMBOL(dev_get_stats); |
5364 | 5503 | ||
5365 | static void netdev_init_one_queue(struct net_device *dev, | 5504 | struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) |
5366 | struct netdev_queue *queue, | ||
5367 | void *_unused) | ||
5368 | { | 5505 | { |
5369 | queue->dev = dev; | 5506 | struct netdev_queue *queue = dev_ingress_queue(dev); |
5370 | } | ||
5371 | 5507 | ||
5372 | static void netdev_init_queues(struct net_device *dev) | 5508 | #ifdef CONFIG_NET_CLS_ACT |
5373 | { | 5509 | if (queue) |
5374 | netdev_init_one_queue(dev, &dev->rx_queue, NULL); | 5510 | return queue; |
5375 | netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); | 5511 | queue = kzalloc(sizeof(*queue), GFP_KERNEL); |
5376 | spin_lock_init(&dev->tx_global_lock); | 5512 | if (!queue) |
5513 | return NULL; | ||
5514 | netdev_init_one_queue(dev, queue, NULL); | ||
5515 | queue->qdisc = &noop_qdisc; | ||
5516 | queue->qdisc_sleeping = &noop_qdisc; | ||
5517 | rcu_assign_pointer(dev->ingress_queue, queue); | ||
5518 | #endif | ||
5519 | return queue; | ||
5377 | } | 5520 | } |
5378 | 5521 | ||
5379 | /** | 5522 | /** |
@@ -5390,17 +5533,18 @@ static void netdev_init_queues(struct net_device *dev) | |||
5390 | struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | 5533 | struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, |
5391 | void (*setup)(struct net_device *), unsigned int queue_count) | 5534 | void (*setup)(struct net_device *), unsigned int queue_count) |
5392 | { | 5535 | { |
5393 | struct netdev_queue *tx; | ||
5394 | struct net_device *dev; | 5536 | struct net_device *dev; |
5395 | size_t alloc_size; | 5537 | size_t alloc_size; |
5396 | struct net_device *p; | 5538 | struct net_device *p; |
5397 | #ifdef CONFIG_RPS | ||
5398 | struct netdev_rx_queue *rx; | ||
5399 | int i; | ||
5400 | #endif | ||
5401 | 5539 | ||
5402 | BUG_ON(strlen(name) >= sizeof(dev->name)); | 5540 | BUG_ON(strlen(name) >= sizeof(dev->name)); |
5403 | 5541 | ||
5542 | if (queue_count < 1) { | ||
5543 | pr_err("alloc_netdev: Unable to allocate device " | ||
5544 | "with zero queues.\n"); | ||
5545 | return NULL; | ||
5546 | } | ||
5547 | |||
5404 | alloc_size = sizeof(struct net_device); | 5548 | alloc_size = sizeof(struct net_device); |
5405 | if (sizeof_priv) { | 5549 | if (sizeof_priv) { |
5406 | /* ensure 32-byte alignment of private area */ | 5550 | /* ensure 32-byte alignment of private area */ |
@@ -5416,55 +5560,31 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5416 | return NULL; | 5560 | return NULL; |
5417 | } | 5561 | } |
5418 | 5562 | ||
5419 | tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL); | ||
5420 | if (!tx) { | ||
5421 | printk(KERN_ERR "alloc_netdev: Unable to allocate " | ||
5422 | "tx qdiscs.\n"); | ||
5423 | goto free_p; | ||
5424 | } | ||
5425 | |||
5426 | #ifdef CONFIG_RPS | ||
5427 | rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
5428 | if (!rx) { | ||
5429 | printk(KERN_ERR "alloc_netdev: Unable to allocate " | ||
5430 | "rx queues.\n"); | ||
5431 | goto free_tx; | ||
5432 | } | ||
5433 | |||
5434 | atomic_set(&rx->count, queue_count); | ||
5435 | |||
5436 | /* | ||
5437 | * Set a pointer to first element in the array which holds the | ||
5438 | * reference count. | ||
5439 | */ | ||
5440 | for (i = 0; i < queue_count; i++) | ||
5441 | rx[i].first = rx; | ||
5442 | #endif | ||
5443 | |||
5444 | dev = PTR_ALIGN(p, NETDEV_ALIGN); | 5563 | dev = PTR_ALIGN(p, NETDEV_ALIGN); |
5445 | dev->padded = (char *)dev - (char *)p; | 5564 | dev->padded = (char *)dev - (char *)p; |
5446 | 5565 | ||
5566 | dev->pcpu_refcnt = alloc_percpu(int); | ||
5567 | if (!dev->pcpu_refcnt) | ||
5568 | goto free_p; | ||
5569 | |||
5447 | if (dev_addr_init(dev)) | 5570 | if (dev_addr_init(dev)) |
5448 | goto free_rx; | 5571 | goto free_pcpu; |
5449 | 5572 | ||
5450 | dev_mc_init(dev); | 5573 | dev_mc_init(dev); |
5451 | dev_uc_init(dev); | 5574 | dev_uc_init(dev); |
5452 | 5575 | ||
5453 | dev_net_set(dev, &init_net); | 5576 | dev_net_set(dev, &init_net); |
5454 | 5577 | ||
5455 | dev->_tx = tx; | ||
5456 | dev->num_tx_queues = queue_count; | 5578 | dev->num_tx_queues = queue_count; |
5457 | dev->real_num_tx_queues = queue_count; | 5579 | dev->real_num_tx_queues = queue_count; |
5458 | 5580 | ||
5459 | #ifdef CONFIG_RPS | 5581 | #ifdef CONFIG_RPS |
5460 | dev->_rx = rx; | ||
5461 | dev->num_rx_queues = queue_count; | 5582 | dev->num_rx_queues = queue_count; |
5583 | dev->real_num_rx_queues = queue_count; | ||
5462 | #endif | 5584 | #endif |
5463 | 5585 | ||
5464 | dev->gso_max_size = GSO_MAX_SIZE; | 5586 | dev->gso_max_size = GSO_MAX_SIZE; |
5465 | 5587 | ||
5466 | netdev_init_queues(dev); | ||
5467 | |||
5468 | INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); | 5588 | INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); |
5469 | dev->ethtool_ntuple_list.count = 0; | 5589 | dev->ethtool_ntuple_list.count = 0; |
5470 | INIT_LIST_HEAD(&dev->napi_list); | 5590 | INIT_LIST_HEAD(&dev->napi_list); |
@@ -5475,12 +5595,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5475 | strcpy(dev->name, name); | 5595 | strcpy(dev->name, name); |
5476 | return dev; | 5596 | return dev; |
5477 | 5597 | ||
5478 | free_rx: | 5598 | free_pcpu: |
5479 | #ifdef CONFIG_RPS | 5599 | free_percpu(dev->pcpu_refcnt); |
5480 | kfree(rx); | ||
5481 | free_tx: | ||
5482 | #endif | ||
5483 | kfree(tx); | ||
5484 | free_p: | 5600 | free_p: |
5485 | kfree(p); | 5601 | kfree(p); |
5486 | return NULL; | 5602 | return NULL; |
@@ -5503,6 +5619,8 @@ void free_netdev(struct net_device *dev) | |||
5503 | 5619 | ||
5504 | kfree(dev->_tx); | 5620 | kfree(dev->_tx); |
5505 | 5621 | ||
5622 | kfree(rcu_dereference_raw(dev->ingress_queue)); | ||
5623 | |||
5506 | /* Flush device addresses */ | 5624 | /* Flush device addresses */ |
5507 | dev_addr_flush(dev); | 5625 | dev_addr_flush(dev); |
5508 | 5626 | ||
@@ -5512,6 +5630,9 @@ void free_netdev(struct net_device *dev) | |||
5512 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) | 5630 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) |
5513 | netif_napi_del(p); | 5631 | netif_napi_del(p); |
5514 | 5632 | ||
5633 | free_percpu(dev->pcpu_refcnt); | ||
5634 | dev->pcpu_refcnt = NULL; | ||
5635 | |||
5515 | /* Compatibility with error handling in drivers */ | 5636 | /* Compatibility with error handling in drivers */ |
5516 | if (dev->reg_state == NETREG_UNINITIALIZED) { | 5637 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
5517 | kfree((char *)dev - dev->padded); | 5638 | kfree((char *)dev - dev->padded); |
@@ -5666,6 +5787,10 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5666 | 5787 | ||
5667 | /* Notify protocols, that we are about to destroy | 5788 | /* Notify protocols, that we are about to destroy |
5668 | this device. They should clean all the things. | 5789 | this device. They should clean all the things. |
5790 | |||
5791 | Note that dev->reg_state stays at NETREG_REGISTERED. | ||
5792 | This is wanted because this way 8021q and macvlan know | ||
5793 | the device is just moving and can keep their slaves up. | ||
5669 | */ | 5794 | */ |
5670 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 5795 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
5671 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); | 5796 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); |