diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 598 |
1 files changed, 363 insertions, 235 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 8e07109cc0ef..6d4218cdb739 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -129,6 +129,7 @@ | |||
129 | #include <linux/random.h> | 129 | #include <linux/random.h> |
130 | #include <trace/events/napi.h> | 130 | #include <trace/events/napi.h> |
131 | #include <linux/pci.h> | 131 | #include <linux/pci.h> |
132 | #include <linux/inetdevice.h> | ||
132 | 133 | ||
133 | #include "net-sysfs.h" | 134 | #include "net-sysfs.h" |
134 | 135 | ||
@@ -371,6 +372,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) | |||
371 | * --ANK (980803) | 372 | * --ANK (980803) |
372 | */ | 373 | */ |
373 | 374 | ||
375 | static inline struct list_head *ptype_head(const struct packet_type *pt) | ||
376 | { | ||
377 | if (pt->type == htons(ETH_P_ALL)) | ||
378 | return &ptype_all; | ||
379 | else | ||
380 | return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; | ||
381 | } | ||
382 | |||
374 | /** | 383 | /** |
375 | * dev_add_pack - add packet handler | 384 | * dev_add_pack - add packet handler |
376 | * @pt: packet type declaration | 385 | * @pt: packet type declaration |
@@ -386,16 +395,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) | |||
386 | 395 | ||
387 | void dev_add_pack(struct packet_type *pt) | 396 | void dev_add_pack(struct packet_type *pt) |
388 | { | 397 | { |
389 | int hash; | 398 | struct list_head *head = ptype_head(pt); |
390 | 399 | ||
391 | spin_lock_bh(&ptype_lock); | 400 | spin_lock(&ptype_lock); |
392 | if (pt->type == htons(ETH_P_ALL)) | 401 | list_add_rcu(&pt->list, head); |
393 | list_add_rcu(&pt->list, &ptype_all); | 402 | spin_unlock(&ptype_lock); |
394 | else { | ||
395 | hash = ntohs(pt->type) & PTYPE_HASH_MASK; | ||
396 | list_add_rcu(&pt->list, &ptype_base[hash]); | ||
397 | } | ||
398 | spin_unlock_bh(&ptype_lock); | ||
399 | } | 403 | } |
400 | EXPORT_SYMBOL(dev_add_pack); | 404 | EXPORT_SYMBOL(dev_add_pack); |
401 | 405 | ||
@@ -414,15 +418,10 @@ EXPORT_SYMBOL(dev_add_pack); | |||
414 | */ | 418 | */ |
415 | void __dev_remove_pack(struct packet_type *pt) | 419 | void __dev_remove_pack(struct packet_type *pt) |
416 | { | 420 | { |
417 | struct list_head *head; | 421 | struct list_head *head = ptype_head(pt); |
418 | struct packet_type *pt1; | 422 | struct packet_type *pt1; |
419 | 423 | ||
420 | spin_lock_bh(&ptype_lock); | 424 | spin_lock(&ptype_lock); |
421 | |||
422 | if (pt->type == htons(ETH_P_ALL)) | ||
423 | head = &ptype_all; | ||
424 | else | ||
425 | head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; | ||
426 | 425 | ||
427 | list_for_each_entry(pt1, head, list) { | 426 | list_for_each_entry(pt1, head, list) { |
428 | if (pt == pt1) { | 427 | if (pt == pt1) { |
@@ -433,7 +432,7 @@ void __dev_remove_pack(struct packet_type *pt) | |||
433 | 432 | ||
434 | printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); | 433 | printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); |
435 | out: | 434 | out: |
436 | spin_unlock_bh(&ptype_lock); | 435 | spin_unlock(&ptype_lock); |
437 | } | 436 | } |
438 | EXPORT_SYMBOL(__dev_remove_pack); | 437 | EXPORT_SYMBOL(__dev_remove_pack); |
439 | 438 | ||
@@ -1484,8 +1483,9 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | |||
1484 | skb_orphan(skb); | 1483 | skb_orphan(skb); |
1485 | nf_reset(skb); | 1484 | nf_reset(skb); |
1486 | 1485 | ||
1487 | if (!(dev->flags & IFF_UP) || | 1486 | if (unlikely(!(dev->flags & IFF_UP) || |
1488 | (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN))) { | 1487 | (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) { |
1488 | atomic_long_inc(&dev->rx_dropped); | ||
1489 | kfree_skb(skb); | 1489 | kfree_skb(skb); |
1490 | return NET_RX_DROP; | 1490 | return NET_RX_DROP; |
1491 | } | 1491 | } |
@@ -1553,21 +1553,56 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1553 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues | 1553 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues |
1554 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. | 1554 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. |
1555 | */ | 1555 | */ |
1556 | void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) | 1556 | int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) |
1557 | { | 1557 | { |
1558 | unsigned int real_num = dev->real_num_tx_queues; | 1558 | if (txq < 1 || txq > dev->num_tx_queues) |
1559 | return -EINVAL; | ||
1559 | 1560 | ||
1560 | if (unlikely(txq > dev->num_tx_queues)) | 1561 | if (dev->reg_state == NETREG_REGISTERED) { |
1561 | ; | 1562 | ASSERT_RTNL(); |
1562 | else if (txq > real_num) | 1563 | |
1563 | dev->real_num_tx_queues = txq; | 1564 | if (txq < dev->real_num_tx_queues) |
1564 | else if (txq < real_num) { | 1565 | qdisc_reset_all_tx_gt(dev, txq); |
1565 | dev->real_num_tx_queues = txq; | ||
1566 | qdisc_reset_all_tx_gt(dev, txq); | ||
1567 | } | 1566 | } |
1567 | |||
1568 | dev->real_num_tx_queues = txq; | ||
1569 | return 0; | ||
1568 | } | 1570 | } |
1569 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); | 1571 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); |
1570 | 1572 | ||
1573 | #ifdef CONFIG_RPS | ||
1574 | /** | ||
1575 | * netif_set_real_num_rx_queues - set actual number of RX queues used | ||
1576 | * @dev: Network device | ||
1577 | * @rxq: Actual number of RX queues | ||
1578 | * | ||
1579 | * This must be called either with the rtnl_lock held or before | ||
1580 | * registration of the net device. Returns 0 on success, or a | ||
1581 | * negative error code. If called before registration, it always | ||
1582 | * succeeds. | ||
1583 | */ | ||
1584 | int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) | ||
1585 | { | ||
1586 | int rc; | ||
1587 | |||
1588 | if (rxq < 1 || rxq > dev->num_rx_queues) | ||
1589 | return -EINVAL; | ||
1590 | |||
1591 | if (dev->reg_state == NETREG_REGISTERED) { | ||
1592 | ASSERT_RTNL(); | ||
1593 | |||
1594 | rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, | ||
1595 | rxq); | ||
1596 | if (rc) | ||
1597 | return rc; | ||
1598 | } | ||
1599 | |||
1600 | dev->real_num_rx_queues = rxq; | ||
1601 | return 0; | ||
1602 | } | ||
1603 | EXPORT_SYMBOL(netif_set_real_num_rx_queues); | ||
1604 | #endif | ||
1605 | |||
1571 | static inline void __netif_reschedule(struct Qdisc *q) | 1606 | static inline void __netif_reschedule(struct Qdisc *q) |
1572 | { | 1607 | { |
1573 | struct softnet_data *sd; | 1608 | struct softnet_data *sd; |
@@ -1659,7 +1694,12 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) | |||
1659 | 1694 | ||
1660 | static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) | 1695 | static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) |
1661 | { | 1696 | { |
1662 | if (can_checksum_protocol(dev->features, skb->protocol)) | 1697 | int features = dev->features; |
1698 | |||
1699 | if (vlan_tx_tag_present(skb)) | ||
1700 | features &= dev->vlan_features; | ||
1701 | |||
1702 | if (can_checksum_protocol(features, skb->protocol)) | ||
1663 | return true; | 1703 | return true; |
1664 | 1704 | ||
1665 | if (skb->protocol == htons(ETH_P_8021Q)) { | 1705 | if (skb->protocol == htons(ETH_P_8021Q)) { |
@@ -1758,6 +1798,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) | |||
1758 | __be16 type = skb->protocol; | 1798 | __be16 type = skb->protocol; |
1759 | int err; | 1799 | int err; |
1760 | 1800 | ||
1801 | if (type == htons(ETH_P_8021Q)) { | ||
1802 | struct vlan_ethhdr *veh; | ||
1803 | |||
1804 | if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) | ||
1805 | return ERR_PTR(-EINVAL); | ||
1806 | |||
1807 | veh = (struct vlan_ethhdr *)skb->data; | ||
1808 | type = veh->h_vlan_encapsulated_proto; | ||
1809 | } | ||
1810 | |||
1761 | skb_reset_mac_header(skb); | 1811 | skb_reset_mac_header(skb); |
1762 | skb->mac_len = skb->network_header - skb->mac_header; | 1812 | skb->mac_len = skb->network_header - skb->mac_header; |
1763 | __skb_pull(skb, skb->mac_len); | 1813 | __skb_pull(skb, skb->mac_len); |
@@ -1902,14 +1952,14 @@ static int dev_gso_segment(struct sk_buff *skb) | |||
1902 | 1952 | ||
1903 | /* | 1953 | /* |
1904 | * Try to orphan skb early, right before transmission by the device. | 1954 | * Try to orphan skb early, right before transmission by the device. |
1905 | * We cannot orphan skb if tx timestamp is requested, since | 1955 | * We cannot orphan skb if tx timestamp is requested or the sk-reference |
1906 | * drivers need to call skb_tstamp_tx() to send the timestamp. | 1956 | * is needed on driver level for other reasons, e.g. see net/can/raw.c |
1907 | */ | 1957 | */ |
1908 | static inline void skb_orphan_try(struct sk_buff *skb) | 1958 | static inline void skb_orphan_try(struct sk_buff *skb) |
1909 | { | 1959 | { |
1910 | struct sock *sk = skb->sk; | 1960 | struct sock *sk = skb->sk; |
1911 | 1961 | ||
1912 | if (sk && !skb_tx(skb)->flags) { | 1962 | if (sk && !skb_shinfo(skb)->tx_flags) { |
1913 | /* skb_tx_hash() wont be able to get sk. | 1963 | /* skb_tx_hash() wont be able to get sk. |
1914 | * We copy sk_hash into skb->rxhash | 1964 | * We copy sk_hash into skb->rxhash |
1915 | */ | 1965 | */ |
@@ -1929,9 +1979,14 @@ static inline void skb_orphan_try(struct sk_buff *skb) | |||
1929 | static inline int skb_needs_linearize(struct sk_buff *skb, | 1979 | static inline int skb_needs_linearize(struct sk_buff *skb, |
1930 | struct net_device *dev) | 1980 | struct net_device *dev) |
1931 | { | 1981 | { |
1982 | int features = dev->features; | ||
1983 | |||
1984 | if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb)) | ||
1985 | features &= dev->vlan_features; | ||
1986 | |||
1932 | return skb_is_nonlinear(skb) && | 1987 | return skb_is_nonlinear(skb) && |
1933 | ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || | 1988 | ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) || |
1934 | (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || | 1989 | (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) || |
1935 | illegal_highdma(dev, skb)))); | 1990 | illegal_highdma(dev, skb)))); |
1936 | } | 1991 | } |
1937 | 1992 | ||
@@ -1954,6 +2009,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1954 | 2009 | ||
1955 | skb_orphan_try(skb); | 2010 | skb_orphan_try(skb); |
1956 | 2011 | ||
2012 | if (vlan_tx_tag_present(skb) && | ||
2013 | !(dev->features & NETIF_F_HW_VLAN_TX)) { | ||
2014 | skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); | ||
2015 | if (unlikely(!skb)) | ||
2016 | goto out; | ||
2017 | |||
2018 | skb->vlan_tci = 0; | ||
2019 | } | ||
2020 | |||
1957 | if (netif_needs_gso(dev, skb)) { | 2021 | if (netif_needs_gso(dev, skb)) { |
1958 | if (unlikely(dev_gso_segment(skb))) | 2022 | if (unlikely(dev_gso_segment(skb))) |
1959 | goto out_kfree_skb; | 2023 | goto out_kfree_skb; |
@@ -2015,6 +2079,7 @@ out_kfree_gso_skb: | |||
2015 | skb->destructor = DEV_GSO_CB(skb)->destructor; | 2079 | skb->destructor = DEV_GSO_CB(skb)->destructor; |
2016 | out_kfree_skb: | 2080 | out_kfree_skb: |
2017 | kfree_skb(skb); | 2081 | kfree_skb(skb); |
2082 | out: | ||
2018 | return rc; | 2083 | return rc; |
2019 | } | 2084 | } |
2020 | 2085 | ||
@@ -2143,6 +2208,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2143 | return rc; | 2208 | return rc; |
2144 | } | 2209 | } |
2145 | 2210 | ||
2211 | static DEFINE_PER_CPU(int, xmit_recursion); | ||
2212 | #define RECURSION_LIMIT 3 | ||
2213 | |||
2146 | /** | 2214 | /** |
2147 | * dev_queue_xmit - transmit a buffer | 2215 | * dev_queue_xmit - transmit a buffer |
2148 | * @skb: buffer to transmit | 2216 | * @skb: buffer to transmit |
@@ -2208,10 +2276,15 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2208 | 2276 | ||
2209 | if (txq->xmit_lock_owner != cpu) { | 2277 | if (txq->xmit_lock_owner != cpu) { |
2210 | 2278 | ||
2279 | if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) | ||
2280 | goto recursion_alert; | ||
2281 | |||
2211 | HARD_TX_LOCK(dev, txq, cpu); | 2282 | HARD_TX_LOCK(dev, txq, cpu); |
2212 | 2283 | ||
2213 | if (!netif_tx_queue_stopped(txq)) { | 2284 | if (!netif_tx_queue_stopped(txq)) { |
2285 | __this_cpu_inc(xmit_recursion); | ||
2214 | rc = dev_hard_start_xmit(skb, dev, txq); | 2286 | rc = dev_hard_start_xmit(skb, dev, txq); |
2287 | __this_cpu_dec(xmit_recursion); | ||
2215 | if (dev_xmit_complete(rc)) { | 2288 | if (dev_xmit_complete(rc)) { |
2216 | HARD_TX_UNLOCK(dev, txq); | 2289 | HARD_TX_UNLOCK(dev, txq); |
2217 | goto out; | 2290 | goto out; |
@@ -2223,7 +2296,9 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2223 | "queue packet!\n", dev->name); | 2296 | "queue packet!\n", dev->name); |
2224 | } else { | 2297 | } else { |
2225 | /* Recursion is detected! It is possible, | 2298 | /* Recursion is detected! It is possible, |
2226 | * unfortunately */ | 2299 | * unfortunately |
2300 | */ | ||
2301 | recursion_alert: | ||
2227 | if (net_ratelimit()) | 2302 | if (net_ratelimit()) |
2228 | printk(KERN_CRIT "Dead loop on virtual device " | 2303 | printk(KERN_CRIT "Dead loop on virtual device " |
2229 | "%s, fix it urgently!\n", dev->name); | 2304 | "%s, fix it urgently!\n", dev->name); |
@@ -2259,69 +2334,44 @@ static inline void ____napi_schedule(struct softnet_data *sd, | |||
2259 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | 2334 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
2260 | } | 2335 | } |
2261 | 2336 | ||
2262 | #ifdef CONFIG_RPS | ||
2263 | |||
2264 | /* One global table that all flow-based protocols share. */ | ||
2265 | struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; | ||
2266 | EXPORT_SYMBOL(rps_sock_flow_table); | ||
2267 | |||
2268 | /* | 2337 | /* |
2269 | * get_rps_cpu is called from netif_receive_skb and returns the target | 2338 | * __skb_get_rxhash: calculate a flow hash based on src/dst addresses |
2270 | * CPU from the RPS map of the receiving queue for a given skb. | 2339 | * and src/dst port numbers. Returns a non-zero hash number on success |
2271 | * rcu_read_lock must be held on entry. | 2340 | * and 0 on failure. |
2272 | */ | 2341 | */ |
2273 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 2342 | __u32 __skb_get_rxhash(struct sk_buff *skb) |
2274 | struct rps_dev_flow **rflowp) | ||
2275 | { | 2343 | { |
2344 | int nhoff, hash = 0, poff; | ||
2276 | struct ipv6hdr *ip6; | 2345 | struct ipv6hdr *ip6; |
2277 | struct iphdr *ip; | 2346 | struct iphdr *ip; |
2278 | struct netdev_rx_queue *rxqueue; | ||
2279 | struct rps_map *map; | ||
2280 | struct rps_dev_flow_table *flow_table; | ||
2281 | struct rps_sock_flow_table *sock_flow_table; | ||
2282 | int cpu = -1; | ||
2283 | u8 ip_proto; | 2347 | u8 ip_proto; |
2284 | u16 tcpu; | ||
2285 | u32 addr1, addr2, ihl; | 2348 | u32 addr1, addr2, ihl; |
2286 | union { | 2349 | union { |
2287 | u32 v32; | 2350 | u32 v32; |
2288 | u16 v16[2]; | 2351 | u16 v16[2]; |
2289 | } ports; | 2352 | } ports; |
2290 | 2353 | ||
2291 | if (skb_rx_queue_recorded(skb)) { | 2354 | nhoff = skb_network_offset(skb); |
2292 | u16 index = skb_get_rx_queue(skb); | ||
2293 | if (unlikely(index >= dev->num_rx_queues)) { | ||
2294 | WARN_ONCE(dev->num_rx_queues > 1, "%s received packet " | ||
2295 | "on queue %u, but number of RX queues is %u\n", | ||
2296 | dev->name, index, dev->num_rx_queues); | ||
2297 | goto done; | ||
2298 | } | ||
2299 | rxqueue = dev->_rx + index; | ||
2300 | } else | ||
2301 | rxqueue = dev->_rx; | ||
2302 | |||
2303 | if (!rxqueue->rps_map && !rxqueue->rps_flow_table) | ||
2304 | goto done; | ||
2305 | |||
2306 | if (skb->rxhash) | ||
2307 | goto got_hash; /* Skip hash computation on packet header */ | ||
2308 | 2355 | ||
2309 | switch (skb->protocol) { | 2356 | switch (skb->protocol) { |
2310 | case __constant_htons(ETH_P_IP): | 2357 | case __constant_htons(ETH_P_IP): |
2311 | if (!pskb_may_pull(skb, sizeof(*ip))) | 2358 | if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) |
2312 | goto done; | 2359 | goto done; |
2313 | 2360 | ||
2314 | ip = (struct iphdr *) skb->data; | 2361 | ip = (struct iphdr *) (skb->data + nhoff); |
2315 | ip_proto = ip->protocol; | 2362 | if (ip->frag_off & htons(IP_MF | IP_OFFSET)) |
2363 | ip_proto = 0; | ||
2364 | else | ||
2365 | ip_proto = ip->protocol; | ||
2316 | addr1 = (__force u32) ip->saddr; | 2366 | addr1 = (__force u32) ip->saddr; |
2317 | addr2 = (__force u32) ip->daddr; | 2367 | addr2 = (__force u32) ip->daddr; |
2318 | ihl = ip->ihl; | 2368 | ihl = ip->ihl; |
2319 | break; | 2369 | break; |
2320 | case __constant_htons(ETH_P_IPV6): | 2370 | case __constant_htons(ETH_P_IPV6): |
2321 | if (!pskb_may_pull(skb, sizeof(*ip6))) | 2371 | if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) |
2322 | goto done; | 2372 | goto done; |
2323 | 2373 | ||
2324 | ip6 = (struct ipv6hdr *) skb->data; | 2374 | ip6 = (struct ipv6hdr *) (skb->data + nhoff); |
2325 | ip_proto = ip6->nexthdr; | 2375 | ip_proto = ip6->nexthdr; |
2326 | addr1 = (__force u32) ip6->saddr.s6_addr32[3]; | 2376 | addr1 = (__force u32) ip6->saddr.s6_addr32[3]; |
2327 | addr2 = (__force u32) ip6->daddr.s6_addr32[3]; | 2377 | addr2 = (__force u32) ip6->daddr.s6_addr32[3]; |
@@ -2330,33 +2380,81 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
2330 | default: | 2380 | default: |
2331 | goto done; | 2381 | goto done; |
2332 | } | 2382 | } |
2333 | switch (ip_proto) { | 2383 | |
2334 | case IPPROTO_TCP: | 2384 | ports.v32 = 0; |
2335 | case IPPROTO_UDP: | 2385 | poff = proto_ports_offset(ip_proto); |
2336 | case IPPROTO_DCCP: | 2386 | if (poff >= 0) { |
2337 | case IPPROTO_ESP: | 2387 | nhoff += ihl * 4 + poff; |
2338 | case IPPROTO_AH: | 2388 | if (pskb_may_pull(skb, nhoff + 4)) { |
2339 | case IPPROTO_SCTP: | 2389 | ports.v32 = * (__force u32 *) (skb->data + nhoff); |
2340 | case IPPROTO_UDPLITE: | ||
2341 | if (pskb_may_pull(skb, (ihl * 4) + 4)) { | ||
2342 | ports.v32 = * (__force u32 *) (skb->data + (ihl * 4)); | ||
2343 | if (ports.v16[1] < ports.v16[0]) | 2390 | if (ports.v16[1] < ports.v16[0]) |
2344 | swap(ports.v16[0], ports.v16[1]); | 2391 | swap(ports.v16[0], ports.v16[1]); |
2345 | break; | ||
2346 | } | 2392 | } |
2347 | default: | ||
2348 | ports.v32 = 0; | ||
2349 | break; | ||
2350 | } | 2393 | } |
2351 | 2394 | ||
2352 | /* get a consistent hash (same value on both flow directions) */ | 2395 | /* get a consistent hash (same value on both flow directions) */ |
2353 | if (addr2 < addr1) | 2396 | if (addr2 < addr1) |
2354 | swap(addr1, addr2); | 2397 | swap(addr1, addr2); |
2355 | skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd); | ||
2356 | if (!skb->rxhash) | ||
2357 | skb->rxhash = 1; | ||
2358 | 2398 | ||
2359 | got_hash: | 2399 | hash = jhash_3words(addr1, addr2, ports.v32, hashrnd); |
2400 | if (!hash) | ||
2401 | hash = 1; | ||
2402 | |||
2403 | done: | ||
2404 | return hash; | ||
2405 | } | ||
2406 | EXPORT_SYMBOL(__skb_get_rxhash); | ||
2407 | |||
2408 | #ifdef CONFIG_RPS | ||
2409 | |||
2410 | /* One global table that all flow-based protocols share. */ | ||
2411 | struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; | ||
2412 | EXPORT_SYMBOL(rps_sock_flow_table); | ||
2413 | |||
2414 | /* | ||
2415 | * get_rps_cpu is called from netif_receive_skb and returns the target | ||
2416 | * CPU from the RPS map of the receiving queue for a given skb. | ||
2417 | * rcu_read_lock must be held on entry. | ||
2418 | */ | ||
2419 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||
2420 | struct rps_dev_flow **rflowp) | ||
2421 | { | ||
2422 | struct netdev_rx_queue *rxqueue; | ||
2423 | struct rps_map *map = NULL; | ||
2424 | struct rps_dev_flow_table *flow_table; | ||
2425 | struct rps_sock_flow_table *sock_flow_table; | ||
2426 | int cpu = -1; | ||
2427 | u16 tcpu; | ||
2428 | |||
2429 | if (skb_rx_queue_recorded(skb)) { | ||
2430 | u16 index = skb_get_rx_queue(skb); | ||
2431 | if (unlikely(index >= dev->real_num_rx_queues)) { | ||
2432 | WARN_ONCE(dev->real_num_rx_queues > 1, | ||
2433 | "%s received packet on queue %u, but number " | ||
2434 | "of RX queues is %u\n", | ||
2435 | dev->name, index, dev->real_num_rx_queues); | ||
2436 | goto done; | ||
2437 | } | ||
2438 | rxqueue = dev->_rx + index; | ||
2439 | } else | ||
2440 | rxqueue = dev->_rx; | ||
2441 | |||
2442 | if (rxqueue->rps_map) { | ||
2443 | map = rcu_dereference(rxqueue->rps_map); | ||
2444 | if (map && map->len == 1) { | ||
2445 | tcpu = map->cpus[0]; | ||
2446 | if (cpu_online(tcpu)) | ||
2447 | cpu = tcpu; | ||
2448 | goto done; | ||
2449 | } | ||
2450 | } else if (!rxqueue->rps_flow_table) { | ||
2451 | goto done; | ||
2452 | } | ||
2453 | |||
2454 | skb_reset_network_header(skb); | ||
2455 | if (!skb_get_rxhash(skb)) | ||
2456 | goto done; | ||
2457 | |||
2360 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | 2458 | flow_table = rcu_dereference(rxqueue->rps_flow_table); |
2361 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | 2459 | sock_flow_table = rcu_dereference(rps_sock_flow_table); |
2362 | if (flow_table && sock_flow_table) { | 2460 | if (flow_table && sock_flow_table) { |
@@ -2396,7 +2494,6 @@ got_hash: | |||
2396 | } | 2494 | } |
2397 | } | 2495 | } |
2398 | 2496 | ||
2399 | map = rcu_dereference(rxqueue->rps_map); | ||
2400 | if (map) { | 2497 | if (map) { |
2401 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; | 2498 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; |
2402 | 2499 | ||
@@ -2482,6 +2579,7 @@ enqueue: | |||
2482 | 2579 | ||
2483 | local_irq_restore(flags); | 2580 | local_irq_restore(flags); |
2484 | 2581 | ||
2582 | atomic_long_inc(&skb->dev->rx_dropped); | ||
2485 | kfree_skb(skb); | 2583 | kfree_skb(skb); |
2486 | return NET_RX_DROP; | 2584 | return NET_RX_DROP; |
2487 | } | 2585 | } |
@@ -2636,11 +2734,10 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); | |||
2636 | * the ingress scheduler, you just cant add policies on ingress. | 2734 | * the ingress scheduler, you just cant add policies on ingress. |
2637 | * | 2735 | * |
2638 | */ | 2736 | */ |
2639 | static int ing_filter(struct sk_buff *skb) | 2737 | static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) |
2640 | { | 2738 | { |
2641 | struct net_device *dev = skb->dev; | 2739 | struct net_device *dev = skb->dev; |
2642 | u32 ttl = G_TC_RTTL(skb->tc_verd); | 2740 | u32 ttl = G_TC_RTTL(skb->tc_verd); |
2643 | struct netdev_queue *rxq; | ||
2644 | int result = TC_ACT_OK; | 2741 | int result = TC_ACT_OK; |
2645 | struct Qdisc *q; | 2742 | struct Qdisc *q; |
2646 | 2743 | ||
@@ -2654,8 +2751,6 @@ static int ing_filter(struct sk_buff *skb) | |||
2654 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); | 2751 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); |
2655 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); | 2752 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); |
2656 | 2753 | ||
2657 | rxq = &dev->rx_queue; | ||
2658 | |||
2659 | q = rxq->qdisc; | 2754 | q = rxq->qdisc; |
2660 | if (q != &noop_qdisc) { | 2755 | if (q != &noop_qdisc) { |
2661 | spin_lock(qdisc_lock(q)); | 2756 | spin_lock(qdisc_lock(q)); |
@@ -2671,7 +2766,9 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, | |||
2671 | struct packet_type **pt_prev, | 2766 | struct packet_type **pt_prev, |
2672 | int *ret, struct net_device *orig_dev) | 2767 | int *ret, struct net_device *orig_dev) |
2673 | { | 2768 | { |
2674 | if (skb->dev->rx_queue.qdisc == &noop_qdisc) | 2769 | struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); |
2770 | |||
2771 | if (!rxq || rxq->qdisc == &noop_qdisc) | ||
2675 | goto out; | 2772 | goto out; |
2676 | 2773 | ||
2677 | if (*pt_prev) { | 2774 | if (*pt_prev) { |
@@ -2679,7 +2776,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, | |||
2679 | *pt_prev = NULL; | 2776 | *pt_prev = NULL; |
2680 | } | 2777 | } |
2681 | 2778 | ||
2682 | switch (ing_filter(skb)) { | 2779 | switch (ing_filter(skb, rxq)) { |
2683 | case TC_ACT_SHOT: | 2780 | case TC_ACT_SHOT: |
2684 | case TC_ACT_STOLEN: | 2781 | case TC_ACT_STOLEN: |
2685 | kfree_skb(skb); | 2782 | kfree_skb(skb); |
@@ -2692,33 +2789,6 @@ out: | |||
2692 | } | 2789 | } |
2693 | #endif | 2790 | #endif |
2694 | 2791 | ||
2695 | /* | ||
2696 | * netif_nit_deliver - deliver received packets to network taps | ||
2697 | * @skb: buffer | ||
2698 | * | ||
2699 | * This function is used to deliver incoming packets to network | ||
2700 | * taps. It should be used when the normal netif_receive_skb path | ||
2701 | * is bypassed, for example because of VLAN acceleration. | ||
2702 | */ | ||
2703 | void netif_nit_deliver(struct sk_buff *skb) | ||
2704 | { | ||
2705 | struct packet_type *ptype; | ||
2706 | |||
2707 | if (list_empty(&ptype_all)) | ||
2708 | return; | ||
2709 | |||
2710 | skb_reset_network_header(skb); | ||
2711 | skb_reset_transport_header(skb); | ||
2712 | skb->mac_len = skb->network_header - skb->mac_header; | ||
2713 | |||
2714 | rcu_read_lock(); | ||
2715 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | ||
2716 | if (!ptype->dev || ptype->dev == skb->dev) | ||
2717 | deliver_skb(skb, ptype, skb->dev); | ||
2718 | } | ||
2719 | rcu_read_unlock(); | ||
2720 | } | ||
2721 | |||
2722 | /** | 2792 | /** |
2723 | * netdev_rx_handler_register - register receive handler | 2793 | * netdev_rx_handler_register - register receive handler |
2724 | * @dev: device to register a handler for | 2794 | * @dev: device to register a handler for |
@@ -2828,9 +2898,6 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
2828 | if (!netdev_tstamp_prequeue) | 2898 | if (!netdev_tstamp_prequeue) |
2829 | net_timestamp_check(skb); | 2899 | net_timestamp_check(skb); |
2830 | 2900 | ||
2831 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) | ||
2832 | return NET_RX_SUCCESS; | ||
2833 | |||
2834 | /* if we've gotten here through NAPI, check netpoll */ | 2901 | /* if we've gotten here through NAPI, check netpoll */ |
2835 | if (netpoll_receive_skb(skb)) | 2902 | if (netpoll_receive_skb(skb)) |
2836 | return NET_RX_DROP; | 2903 | return NET_RX_DROP; |
@@ -2843,8 +2910,7 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
2843 | * be delivered to pkt handlers that are exact matches. Also | 2910 | * be delivered to pkt handlers that are exact matches. Also |
2844 | * the deliver_no_wcard flag will be set. If packet handlers | 2911 | * the deliver_no_wcard flag will be set. If packet handlers |
2845 | * are sensitive to duplicate packets these skbs will need to | 2912 | * are sensitive to duplicate packets these skbs will need to |
2846 | * be dropped at the handler. The vlan accel path may have | 2913 | * be dropped at the handler. |
2847 | * already set the deliver_no_wcard flag. | ||
2848 | */ | 2914 | */ |
2849 | null_or_orig = NULL; | 2915 | null_or_orig = NULL; |
2850 | orig_dev = skb->dev; | 2916 | orig_dev = skb->dev; |
@@ -2903,6 +2969,18 @@ ncls: | |||
2903 | goto out; | 2969 | goto out; |
2904 | } | 2970 | } |
2905 | 2971 | ||
2972 | if (vlan_tx_tag_present(skb)) { | ||
2973 | if (pt_prev) { | ||
2974 | ret = deliver_skb(skb, pt_prev, orig_dev); | ||
2975 | pt_prev = NULL; | ||
2976 | } | ||
2977 | if (vlan_hwaccel_do_receive(&skb)) { | ||
2978 | ret = __netif_receive_skb(skb); | ||
2979 | goto out; | ||
2980 | } else if (unlikely(!skb)) | ||
2981 | goto out; | ||
2982 | } | ||
2983 | |||
2906 | /* | 2984 | /* |
2907 | * Make sure frames received on VLAN interfaces stacked on | 2985 | * Make sure frames received on VLAN interfaces stacked on |
2908 | * bonding interfaces still make their way to any base bonding | 2986 | * bonding interfaces still make their way to any base bonding |
@@ -2930,6 +3008,7 @@ ncls: | |||
2930 | if (pt_prev) { | 3008 | if (pt_prev) { |
2931 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | 3009 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
2932 | } else { | 3010 | } else { |
3011 | atomic_long_inc(&skb->dev->rx_dropped); | ||
2933 | kfree_skb(skb); | 3012 | kfree_skb(skb); |
2934 | /* Jamal, now you will not able to escape explaining | 3013 | /* Jamal, now you will not able to escape explaining |
2935 | * me how you were going to use this. :-) | 3014 | * me how you were going to use this. :-) |
@@ -3050,7 +3129,7 @@ out: | |||
3050 | return netif_receive_skb(skb); | 3129 | return netif_receive_skb(skb); |
3051 | } | 3130 | } |
3052 | 3131 | ||
3053 | static void napi_gro_flush(struct napi_struct *napi) | 3132 | inline void napi_gro_flush(struct napi_struct *napi) |
3054 | { | 3133 | { |
3055 | struct sk_buff *skb, *next; | 3134 | struct sk_buff *skb, *next; |
3056 | 3135 | ||
@@ -3063,6 +3142,7 @@ static void napi_gro_flush(struct napi_struct *napi) | |||
3063 | napi->gro_count = 0; | 3142 | napi->gro_count = 0; |
3064 | napi->gro_list = NULL; | 3143 | napi->gro_list = NULL; |
3065 | } | 3144 | } |
3145 | EXPORT_SYMBOL(napi_gro_flush); | ||
3066 | 3146 | ||
3067 | enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 3147 | enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
3068 | { | 3148 | { |
@@ -3077,7 +3157,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
3077 | if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) | 3157 | if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) |
3078 | goto normal; | 3158 | goto normal; |
3079 | 3159 | ||
3080 | if (skb_is_gso(skb) || skb_has_frags(skb)) | 3160 | if (skb_is_gso(skb) || skb_has_frag_list(skb)) |
3081 | goto normal; | 3161 | goto normal; |
3082 | 3162 | ||
3083 | rcu_read_lock(); | 3163 | rcu_read_lock(); |
@@ -3156,16 +3236,19 @@ normal: | |||
3156 | } | 3236 | } |
3157 | EXPORT_SYMBOL(dev_gro_receive); | 3237 | EXPORT_SYMBOL(dev_gro_receive); |
3158 | 3238 | ||
3159 | static gro_result_t | 3239 | static inline gro_result_t |
3160 | __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 3240 | __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
3161 | { | 3241 | { |
3162 | struct sk_buff *p; | 3242 | struct sk_buff *p; |
3163 | 3243 | ||
3164 | for (p = napi->gro_list; p; p = p->next) { | 3244 | for (p = napi->gro_list; p; p = p->next) { |
3165 | NAPI_GRO_CB(p)->same_flow = | 3245 | unsigned long diffs; |
3166 | (p->dev == skb->dev) && | 3246 | |
3167 | !compare_ether_header(skb_mac_header(p), | 3247 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; |
3248 | diffs |= p->vlan_tci ^ skb->vlan_tci; | ||
3249 | diffs |= compare_ether_header(skb_mac_header(p), | ||
3168 | skb_gro_mac_header(skb)); | 3250 | skb_gro_mac_header(skb)); |
3251 | NAPI_GRO_CB(p)->same_flow = !diffs; | ||
3169 | NAPI_GRO_CB(p)->flush = 0; | 3252 | NAPI_GRO_CB(p)->flush = 0; |
3170 | } | 3253 | } |
3171 | 3254 | ||
@@ -3218,14 +3301,14 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
3218 | } | 3301 | } |
3219 | EXPORT_SYMBOL(napi_gro_receive); | 3302 | EXPORT_SYMBOL(napi_gro_receive); |
3220 | 3303 | ||
3221 | void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) | 3304 | static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) |
3222 | { | 3305 | { |
3223 | __skb_pull(skb, skb_headlen(skb)); | 3306 | __skb_pull(skb, skb_headlen(skb)); |
3224 | skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); | 3307 | skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); |
3308 | skb->vlan_tci = 0; | ||
3225 | 3309 | ||
3226 | napi->skb = skb; | 3310 | napi->skb = skb; |
3227 | } | 3311 | } |
3228 | EXPORT_SYMBOL(napi_reuse_skb); | ||
3229 | 3312 | ||
3230 | struct sk_buff *napi_get_frags(struct napi_struct *napi) | 3313 | struct sk_buff *napi_get_frags(struct napi_struct *napi) |
3231 | { | 3314 | { |
@@ -4859,21 +4942,6 @@ static void rollback_registered(struct net_device *dev) | |||
4859 | rollback_registered_many(&single); | 4942 | rollback_registered_many(&single); |
4860 | } | 4943 | } |
4861 | 4944 | ||
4862 | static void __netdev_init_queue_locks_one(struct net_device *dev, | ||
4863 | struct netdev_queue *dev_queue, | ||
4864 | void *_unused) | ||
4865 | { | ||
4866 | spin_lock_init(&dev_queue->_xmit_lock); | ||
4867 | netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); | ||
4868 | dev_queue->xmit_lock_owner = -1; | ||
4869 | } | ||
4870 | |||
4871 | static void netdev_init_queue_locks(struct net_device *dev) | ||
4872 | { | ||
4873 | netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); | ||
4874 | __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); | ||
4875 | } | ||
4876 | |||
4877 | unsigned long netdev_fix_features(unsigned long features, const char *name) | 4945 | unsigned long netdev_fix_features(unsigned long features, const char *name) |
4878 | { | 4946 | { |
4879 | /* Fix illegal SG+CSUM combinations. */ | 4947 | /* Fix illegal SG+CSUM combinations. */ |
@@ -4941,6 +5009,66 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, | |||
4941 | } | 5009 | } |
4942 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); | 5010 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); |
4943 | 5011 | ||
5012 | static int netif_alloc_rx_queues(struct net_device *dev) | ||
5013 | { | ||
5014 | #ifdef CONFIG_RPS | ||
5015 | unsigned int i, count = dev->num_rx_queues; | ||
5016 | struct netdev_rx_queue *rx; | ||
5017 | |||
5018 | BUG_ON(count < 1); | ||
5019 | |||
5020 | rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
5021 | if (!rx) { | ||
5022 | pr_err("netdev: Unable to allocate %u rx queues.\n", count); | ||
5023 | return -ENOMEM; | ||
5024 | } | ||
5025 | dev->_rx = rx; | ||
5026 | |||
5027 | /* | ||
5028 | * Set a pointer to first element in the array which holds the | ||
5029 | * reference count. | ||
5030 | */ | ||
5031 | for (i = 0; i < count; i++) | ||
5032 | rx[i].first = rx; | ||
5033 | #endif | ||
5034 | return 0; | ||
5035 | } | ||
5036 | |||
5037 | static int netif_alloc_netdev_queues(struct net_device *dev) | ||
5038 | { | ||
5039 | unsigned int count = dev->num_tx_queues; | ||
5040 | struct netdev_queue *tx; | ||
5041 | |||
5042 | BUG_ON(count < 1); | ||
5043 | |||
5044 | tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); | ||
5045 | if (!tx) { | ||
5046 | pr_err("netdev: Unable to allocate %u tx queues.\n", | ||
5047 | count); | ||
5048 | return -ENOMEM; | ||
5049 | } | ||
5050 | dev->_tx = tx; | ||
5051 | return 0; | ||
5052 | } | ||
5053 | |||
5054 | static void netdev_init_one_queue(struct net_device *dev, | ||
5055 | struct netdev_queue *queue, | ||
5056 | void *_unused) | ||
5057 | { | ||
5058 | queue->dev = dev; | ||
5059 | |||
5060 | /* Initialize queue lock */ | ||
5061 | spin_lock_init(&queue->_xmit_lock); | ||
5062 | netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); | ||
5063 | queue->xmit_lock_owner = -1; | ||
5064 | } | ||
5065 | |||
5066 | static void netdev_init_queues(struct net_device *dev) | ||
5067 | { | ||
5068 | netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); | ||
5069 | spin_lock_init(&dev->tx_global_lock); | ||
5070 | } | ||
5071 | |||
4944 | /** | 5072 | /** |
4945 | * register_netdevice - register a network device | 5073 | * register_netdevice - register a network device |
4946 | * @dev: device to register | 5074 | * @dev: device to register |
@@ -4974,28 +5102,19 @@ int register_netdevice(struct net_device *dev) | |||
4974 | 5102 | ||
4975 | spin_lock_init(&dev->addr_list_lock); | 5103 | spin_lock_init(&dev->addr_list_lock); |
4976 | netdev_set_addr_lockdep_class(dev); | 5104 | netdev_set_addr_lockdep_class(dev); |
4977 | netdev_init_queue_locks(dev); | ||
4978 | 5105 | ||
4979 | dev->iflink = -1; | 5106 | dev->iflink = -1; |
4980 | 5107 | ||
4981 | #ifdef CONFIG_RPS | 5108 | ret = netif_alloc_rx_queues(dev); |
4982 | if (!dev->num_rx_queues) { | 5109 | if (ret) |
4983 | /* | 5110 | goto out; |
4984 | * Allocate a single RX queue if driver never called | ||
4985 | * alloc_netdev_mq | ||
4986 | */ | ||
4987 | 5111 | ||
4988 | dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); | 5112 | ret = netif_alloc_netdev_queues(dev); |
4989 | if (!dev->_rx) { | 5113 | if (ret) |
4990 | ret = -ENOMEM; | 5114 | goto out; |
4991 | goto out; | 5115 | |
4992 | } | 5116 | netdev_init_queues(dev); |
4993 | 5117 | ||
4994 | dev->_rx->first = dev->_rx; | ||
4995 | atomic_set(&dev->_rx->count, 1); | ||
4996 | dev->num_rx_queues = 1; | ||
4997 | } | ||
4998 | #endif | ||
4999 | /* Init, if this function is available */ | 5118 | /* Init, if this function is available */ |
5000 | if (dev->netdev_ops->ndo_init) { | 5119 | if (dev->netdev_ops->ndo_init) { |
5001 | ret = dev->netdev_ops->ndo_init(dev); | 5120 | ret = dev->netdev_ops->ndo_init(dev); |
@@ -5035,6 +5154,12 @@ int register_netdevice(struct net_device *dev) | |||
5035 | if (dev->features & NETIF_F_SG) | 5154 | if (dev->features & NETIF_F_SG) |
5036 | dev->features |= NETIF_F_GSO; | 5155 | dev->features |= NETIF_F_GSO; |
5037 | 5156 | ||
5157 | /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, | ||
5158 | * vlan_dev_init() will do the dev->features check, so these features | ||
5159 | * are enabled only if supported by underlying device. | ||
5160 | */ | ||
5161 | dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA); | ||
5162 | |||
5038 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); | 5163 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); |
5039 | ret = notifier_to_errno(ret); | 5164 | ret = notifier_to_errno(ret); |
5040 | if (ret) | 5165 | if (ret) |
@@ -5105,9 +5230,6 @@ int init_dummy_netdev(struct net_device *dev) | |||
5105 | */ | 5230 | */ |
5106 | dev->reg_state = NETREG_DUMMY; | 5231 | dev->reg_state = NETREG_DUMMY; |
5107 | 5232 | ||
5108 | /* initialize the ref count */ | ||
5109 | atomic_set(&dev->refcnt, 1); | ||
5110 | |||
5111 | /* NAPI wants this */ | 5233 | /* NAPI wants this */ |
5112 | INIT_LIST_HEAD(&dev->napi_list); | 5234 | INIT_LIST_HEAD(&dev->napi_list); |
5113 | 5235 | ||
@@ -5115,6 +5237,11 @@ int init_dummy_netdev(struct net_device *dev) | |||
5115 | set_bit(__LINK_STATE_PRESENT, &dev->state); | 5237 | set_bit(__LINK_STATE_PRESENT, &dev->state); |
5116 | set_bit(__LINK_STATE_START, &dev->state); | 5238 | set_bit(__LINK_STATE_START, &dev->state); |
5117 | 5239 | ||
5240 | /* Note : We dont allocate pcpu_refcnt for dummy devices, | ||
5241 | * because users of this 'device' dont need to change | ||
5242 | * its refcount. | ||
5243 | */ | ||
5244 | |||
5118 | return 0; | 5245 | return 0; |
5119 | } | 5246 | } |
5120 | EXPORT_SYMBOL_GPL(init_dummy_netdev); | 5247 | EXPORT_SYMBOL_GPL(init_dummy_netdev); |
@@ -5156,6 +5283,16 @@ out: | |||
5156 | } | 5283 | } |
5157 | EXPORT_SYMBOL(register_netdev); | 5284 | EXPORT_SYMBOL(register_netdev); |
5158 | 5285 | ||
5286 | int netdev_refcnt_read(const struct net_device *dev) | ||
5287 | { | ||
5288 | int i, refcnt = 0; | ||
5289 | |||
5290 | for_each_possible_cpu(i) | ||
5291 | refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i); | ||
5292 | return refcnt; | ||
5293 | } | ||
5294 | EXPORT_SYMBOL(netdev_refcnt_read); | ||
5295 | |||
5159 | /* | 5296 | /* |
5160 | * netdev_wait_allrefs - wait until all references are gone. | 5297 | * netdev_wait_allrefs - wait until all references are gone. |
5161 | * | 5298 | * |
@@ -5170,11 +5307,14 @@ EXPORT_SYMBOL(register_netdev); | |||
5170 | static void netdev_wait_allrefs(struct net_device *dev) | 5307 | static void netdev_wait_allrefs(struct net_device *dev) |
5171 | { | 5308 | { |
5172 | unsigned long rebroadcast_time, warning_time; | 5309 | unsigned long rebroadcast_time, warning_time; |
5310 | int refcnt; | ||
5173 | 5311 | ||
5174 | linkwatch_forget_dev(dev); | 5312 | linkwatch_forget_dev(dev); |
5175 | 5313 | ||
5176 | rebroadcast_time = warning_time = jiffies; | 5314 | rebroadcast_time = warning_time = jiffies; |
5177 | while (atomic_read(&dev->refcnt) != 0) { | 5315 | refcnt = netdev_refcnt_read(dev); |
5316 | |||
5317 | while (refcnt != 0) { | ||
5178 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { | 5318 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { |
5179 | rtnl_lock(); | 5319 | rtnl_lock(); |
5180 | 5320 | ||
@@ -5201,11 +5341,13 @@ static void netdev_wait_allrefs(struct net_device *dev) | |||
5201 | 5341 | ||
5202 | msleep(250); | 5342 | msleep(250); |
5203 | 5343 | ||
5344 | refcnt = netdev_refcnt_read(dev); | ||
5345 | |||
5204 | if (time_after(jiffies, warning_time + 10 * HZ)) { | 5346 | if (time_after(jiffies, warning_time + 10 * HZ)) { |
5205 | printk(KERN_EMERG "unregister_netdevice: " | 5347 | printk(KERN_EMERG "unregister_netdevice: " |
5206 | "waiting for %s to become free. Usage " | 5348 | "waiting for %s to become free. Usage " |
5207 | "count = %d\n", | 5349 | "count = %d\n", |
5208 | dev->name, atomic_read(&dev->refcnt)); | 5350 | dev->name, refcnt); |
5209 | warning_time = jiffies; | 5351 | warning_time = jiffies; |
5210 | } | 5352 | } |
5211 | } | 5353 | } |
@@ -5263,8 +5405,8 @@ void netdev_run_todo(void) | |||
5263 | netdev_wait_allrefs(dev); | 5405 | netdev_wait_allrefs(dev); |
5264 | 5406 | ||
5265 | /* paranoia */ | 5407 | /* paranoia */ |
5266 | BUG_ON(atomic_read(&dev->refcnt)); | 5408 | BUG_ON(netdev_refcnt_read(dev)); |
5267 | WARN_ON(dev->ip_ptr); | 5409 | WARN_ON(rcu_dereference_raw(dev->ip_ptr)); |
5268 | WARN_ON(dev->ip6_ptr); | 5410 | WARN_ON(dev->ip6_ptr); |
5269 | WARN_ON(dev->dn_ptr); | 5411 | WARN_ON(dev->dn_ptr); |
5270 | 5412 | ||
@@ -5342,30 +5484,34 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, | |||
5342 | 5484 | ||
5343 | if (ops->ndo_get_stats64) { | 5485 | if (ops->ndo_get_stats64) { |
5344 | memset(storage, 0, sizeof(*storage)); | 5486 | memset(storage, 0, sizeof(*storage)); |
5345 | return ops->ndo_get_stats64(dev, storage); | 5487 | ops->ndo_get_stats64(dev, storage); |
5346 | } | 5488 | } else if (ops->ndo_get_stats) { |
5347 | if (ops->ndo_get_stats) { | ||
5348 | netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); | 5489 | netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); |
5349 | return storage; | 5490 | } else { |
5491 | netdev_stats_to_stats64(storage, &dev->stats); | ||
5492 | dev_txq_stats_fold(dev, storage); | ||
5350 | } | 5493 | } |
5351 | netdev_stats_to_stats64(storage, &dev->stats); | 5494 | storage->rx_dropped += atomic_long_read(&dev->rx_dropped); |
5352 | dev_txq_stats_fold(dev, storage); | ||
5353 | return storage; | 5495 | return storage; |
5354 | } | 5496 | } |
5355 | EXPORT_SYMBOL(dev_get_stats); | 5497 | EXPORT_SYMBOL(dev_get_stats); |
5356 | 5498 | ||
5357 | static void netdev_init_one_queue(struct net_device *dev, | 5499 | struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) |
5358 | struct netdev_queue *queue, | ||
5359 | void *_unused) | ||
5360 | { | 5500 | { |
5361 | queue->dev = dev; | 5501 | struct netdev_queue *queue = dev_ingress_queue(dev); |
5362 | } | ||
5363 | 5502 | ||
5364 | static void netdev_init_queues(struct net_device *dev) | 5503 | #ifdef CONFIG_NET_CLS_ACT |
5365 | { | 5504 | if (queue) |
5366 | netdev_init_one_queue(dev, &dev->rx_queue, NULL); | 5505 | return queue; |
5367 | netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); | 5506 | queue = kzalloc(sizeof(*queue), GFP_KERNEL); |
5368 | spin_lock_init(&dev->tx_global_lock); | 5507 | if (!queue) |
5508 | return NULL; | ||
5509 | netdev_init_one_queue(dev, queue, NULL); | ||
5510 | queue->qdisc = &noop_qdisc; | ||
5511 | queue->qdisc_sleeping = &noop_qdisc; | ||
5512 | rcu_assign_pointer(dev->ingress_queue, queue); | ||
5513 | #endif | ||
5514 | return queue; | ||
5369 | } | 5515 | } |
5370 | 5516 | ||
5371 | /** | 5517 | /** |
@@ -5382,17 +5528,18 @@ static void netdev_init_queues(struct net_device *dev) | |||
5382 | struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | 5528 | struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, |
5383 | void (*setup)(struct net_device *), unsigned int queue_count) | 5529 | void (*setup)(struct net_device *), unsigned int queue_count) |
5384 | { | 5530 | { |
5385 | struct netdev_queue *tx; | ||
5386 | struct net_device *dev; | 5531 | struct net_device *dev; |
5387 | size_t alloc_size; | 5532 | size_t alloc_size; |
5388 | struct net_device *p; | 5533 | struct net_device *p; |
5389 | #ifdef CONFIG_RPS | ||
5390 | struct netdev_rx_queue *rx; | ||
5391 | int i; | ||
5392 | #endif | ||
5393 | 5534 | ||
5394 | BUG_ON(strlen(name) >= sizeof(dev->name)); | 5535 | BUG_ON(strlen(name) >= sizeof(dev->name)); |
5395 | 5536 | ||
5537 | if (queue_count < 1) { | ||
5538 | pr_err("alloc_netdev: Unable to allocate device " | ||
5539 | "with zero queues.\n"); | ||
5540 | return NULL; | ||
5541 | } | ||
5542 | |||
5396 | alloc_size = sizeof(struct net_device); | 5543 | alloc_size = sizeof(struct net_device); |
5397 | if (sizeof_priv) { | 5544 | if (sizeof_priv) { |
5398 | /* ensure 32-byte alignment of private area */ | 5545 | /* ensure 32-byte alignment of private area */ |
@@ -5408,55 +5555,31 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5408 | return NULL; | 5555 | return NULL; |
5409 | } | 5556 | } |
5410 | 5557 | ||
5411 | tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL); | ||
5412 | if (!tx) { | ||
5413 | printk(KERN_ERR "alloc_netdev: Unable to allocate " | ||
5414 | "tx qdiscs.\n"); | ||
5415 | goto free_p; | ||
5416 | } | ||
5417 | |||
5418 | #ifdef CONFIG_RPS | ||
5419 | rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
5420 | if (!rx) { | ||
5421 | printk(KERN_ERR "alloc_netdev: Unable to allocate " | ||
5422 | "rx queues.\n"); | ||
5423 | goto free_tx; | ||
5424 | } | ||
5425 | |||
5426 | atomic_set(&rx->count, queue_count); | ||
5427 | |||
5428 | /* | ||
5429 | * Set a pointer to first element in the array which holds the | ||
5430 | * reference count. | ||
5431 | */ | ||
5432 | for (i = 0; i < queue_count; i++) | ||
5433 | rx[i].first = rx; | ||
5434 | #endif | ||
5435 | |||
5436 | dev = PTR_ALIGN(p, NETDEV_ALIGN); | 5558 | dev = PTR_ALIGN(p, NETDEV_ALIGN); |
5437 | dev->padded = (char *)dev - (char *)p; | 5559 | dev->padded = (char *)dev - (char *)p; |
5438 | 5560 | ||
5561 | dev->pcpu_refcnt = alloc_percpu(int); | ||
5562 | if (!dev->pcpu_refcnt) | ||
5563 | goto free_p; | ||
5564 | |||
5439 | if (dev_addr_init(dev)) | 5565 | if (dev_addr_init(dev)) |
5440 | goto free_rx; | 5566 | goto free_pcpu; |
5441 | 5567 | ||
5442 | dev_mc_init(dev); | 5568 | dev_mc_init(dev); |
5443 | dev_uc_init(dev); | 5569 | dev_uc_init(dev); |
5444 | 5570 | ||
5445 | dev_net_set(dev, &init_net); | 5571 | dev_net_set(dev, &init_net); |
5446 | 5572 | ||
5447 | dev->_tx = tx; | ||
5448 | dev->num_tx_queues = queue_count; | 5573 | dev->num_tx_queues = queue_count; |
5449 | dev->real_num_tx_queues = queue_count; | 5574 | dev->real_num_tx_queues = queue_count; |
5450 | 5575 | ||
5451 | #ifdef CONFIG_RPS | 5576 | #ifdef CONFIG_RPS |
5452 | dev->_rx = rx; | ||
5453 | dev->num_rx_queues = queue_count; | 5577 | dev->num_rx_queues = queue_count; |
5578 | dev->real_num_rx_queues = queue_count; | ||
5454 | #endif | 5579 | #endif |
5455 | 5580 | ||
5456 | dev->gso_max_size = GSO_MAX_SIZE; | 5581 | dev->gso_max_size = GSO_MAX_SIZE; |
5457 | 5582 | ||
5458 | netdev_init_queues(dev); | ||
5459 | |||
5460 | INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); | 5583 | INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); |
5461 | dev->ethtool_ntuple_list.count = 0; | 5584 | dev->ethtool_ntuple_list.count = 0; |
5462 | INIT_LIST_HEAD(&dev->napi_list); | 5585 | INIT_LIST_HEAD(&dev->napi_list); |
@@ -5467,12 +5590,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5467 | strcpy(dev->name, name); | 5590 | strcpy(dev->name, name); |
5468 | return dev; | 5591 | return dev; |
5469 | 5592 | ||
5470 | free_rx: | 5593 | free_pcpu: |
5471 | #ifdef CONFIG_RPS | 5594 | free_percpu(dev->pcpu_refcnt); |
5472 | kfree(rx); | ||
5473 | free_tx: | ||
5474 | #endif | ||
5475 | kfree(tx); | ||
5476 | free_p: | 5595 | free_p: |
5477 | kfree(p); | 5596 | kfree(p); |
5478 | return NULL; | 5597 | return NULL; |
@@ -5495,6 +5614,8 @@ void free_netdev(struct net_device *dev) | |||
5495 | 5614 | ||
5496 | kfree(dev->_tx); | 5615 | kfree(dev->_tx); |
5497 | 5616 | ||
5617 | kfree(rcu_dereference_raw(dev->ingress_queue)); | ||
5618 | |||
5498 | /* Flush device addresses */ | 5619 | /* Flush device addresses */ |
5499 | dev_addr_flush(dev); | 5620 | dev_addr_flush(dev); |
5500 | 5621 | ||
@@ -5504,6 +5625,9 @@ void free_netdev(struct net_device *dev) | |||
5504 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) | 5625 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) |
5505 | netif_napi_del(p); | 5626 | netif_napi_del(p); |
5506 | 5627 | ||
5628 | free_percpu(dev->pcpu_refcnt); | ||
5629 | dev->pcpu_refcnt = NULL; | ||
5630 | |||
5507 | /* Compatibility with error handling in drivers */ | 5631 | /* Compatibility with error handling in drivers */ |
5508 | if (dev->reg_state == NETREG_UNINITIALIZED) { | 5632 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
5509 | kfree((char *)dev - dev->padded); | 5633 | kfree((char *)dev - dev->padded); |
@@ -5658,6 +5782,10 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5658 | 5782 | ||
5659 | /* Notify protocols, that we are about to destroy | 5783 | /* Notify protocols, that we are about to destroy |
5660 | this device. They should clean all the things. | 5784 | this device. They should clean all the things. |
5785 | |||
5786 | Note that dev->reg_state stays at NETREG_REGISTERED. | ||
5787 | This is wanted because this way 8021q and macvlan know | ||
5788 | the device is just moving and can keep their slaves up. | ||
5661 | */ | 5789 | */ |
5662 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 5790 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
5663 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); | 5791 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); |