aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c458
1 files changed, 297 insertions, 161 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index d03470f5260a..1ae654391442 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -101,8 +101,6 @@
101#include <linux/proc_fs.h> 101#include <linux/proc_fs.h>
102#include <linux/seq_file.h> 102#include <linux/seq_file.h>
103#include <linux/stat.h> 103#include <linux/stat.h>
104#include <linux/if_bridge.h>
105#include <linux/if_macvlan.h>
106#include <net/dst.h> 104#include <net/dst.h>
107#include <net/pkt_sched.h> 105#include <net/pkt_sched.h>
108#include <net/checksum.h> 106#include <net/checksum.h>
@@ -803,35 +801,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
803EXPORT_SYMBOL(dev_getfirstbyhwtype); 801EXPORT_SYMBOL(dev_getfirstbyhwtype);
804 802
805/** 803/**
806 * dev_get_by_flags - find any device with given flags 804 * dev_get_by_flags_rcu - find any device with given flags
807 * @net: the applicable net namespace 805 * @net: the applicable net namespace
808 * @if_flags: IFF_* values 806 * @if_flags: IFF_* values
809 * @mask: bitmask of bits in if_flags to check 807 * @mask: bitmask of bits in if_flags to check
810 * 808 *
811 * Search for any interface with the given flags. Returns NULL if a device 809 * Search for any interface with the given flags. Returns NULL if a device
812 * is not found or a pointer to the device. The device returned has 810 * is not found or a pointer to the device. Must be called inside
813 * had a reference added and the pointer is safe until the user calls 811 * rcu_read_lock(), and result refcount is unchanged.
814 * dev_put to indicate they have finished with it.
815 */ 812 */
816 813
817struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, 814struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
818 unsigned short mask) 815 unsigned short mask)
819{ 816{
820 struct net_device *dev, *ret; 817 struct net_device *dev, *ret;
821 818
822 ret = NULL; 819 ret = NULL;
823 rcu_read_lock();
824 for_each_netdev_rcu(net, dev) { 820 for_each_netdev_rcu(net, dev) {
825 if (((dev->flags ^ if_flags) & mask) == 0) { 821 if (((dev->flags ^ if_flags) & mask) == 0) {
826 dev_hold(dev);
827 ret = dev; 822 ret = dev;
828 break; 823 break;
829 } 824 }
830 } 825 }
831 rcu_read_unlock();
832 return ret; 826 return ret;
833} 827}
834EXPORT_SYMBOL(dev_get_by_flags); 828EXPORT_SYMBOL(dev_get_by_flags_rcu);
835 829
836/** 830/**
837 * dev_valid_name - check if name is okay for network device 831 * dev_valid_name - check if name is okay for network device
@@ -1488,6 +1482,7 @@ static inline void net_timestamp_check(struct sk_buff *skb)
1488int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) 1482int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1489{ 1483{
1490 skb_orphan(skb); 1484 skb_orphan(skb);
1485 nf_reset(skb);
1491 1486
1492 if (!(dev->flags & IFF_UP) || 1487 if (!(dev->flags & IFF_UP) ||
1493 (skb->len > (dev->mtu + dev->hard_header_len))) { 1488 (skb->len > (dev->mtu + dev->hard_header_len))) {
@@ -1541,7 +1536,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1541 if (net_ratelimit()) 1536 if (net_ratelimit())
1542 printk(KERN_CRIT "protocol %04x is " 1537 printk(KERN_CRIT "protocol %04x is "
1543 "buggy, dev %s\n", 1538 "buggy, dev %s\n",
1544 skb2->protocol, dev->name); 1539 ntohs(skb2->protocol),
1540 dev->name);
1545 skb_reset_network_header(skb2); 1541 skb_reset_network_header(skb2);
1546 } 1542 }
1547 1543
@@ -1553,6 +1549,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1553 rcu_read_unlock(); 1549 rcu_read_unlock();
1554} 1550}
1555 1551
1552/*
1553 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1554 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1555 */
1556void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1557{
1558 unsigned int real_num = dev->real_num_tx_queues;
1559
1560 if (unlikely(txq > dev->num_tx_queues))
1561 ;
1562 else if (txq > real_num)
1563 dev->real_num_tx_queues = txq;
1564 else if (txq < real_num) {
1565 dev->real_num_tx_queues = txq;
1566 qdisc_reset_all_tx_gt(dev, txq);
1567 }
1568}
1569EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1556 1570
1557static inline void __netif_reschedule(struct Qdisc *q) 1571static inline void __netif_reschedule(struct Qdisc *q)
1558{ 1572{
@@ -1893,8 +1907,32 @@ static int dev_gso_segment(struct sk_buff *skb)
1893 */ 1907 */
1894static inline void skb_orphan_try(struct sk_buff *skb) 1908static inline void skb_orphan_try(struct sk_buff *skb)
1895{ 1909{
1896 if (!skb_tx(skb)->flags) 1910 struct sock *sk = skb->sk;
1911
1912 if (sk && !skb_tx(skb)->flags) {
1913 /* skb_tx_hash() wont be able to get sk.
1914 * We copy sk_hash into skb->rxhash
1915 */
1916 if (!skb->rxhash)
1917 skb->rxhash = sk->sk_hash;
1897 skb_orphan(skb); 1918 skb_orphan(skb);
1919 }
1920}
1921
1922/*
1923 * Returns true if either:
1924 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
1925 * 2. skb is fragmented and the device does not support SG, or if
1926 * at least one of fragments is in highmem and device does not
1927 * support DMA from it.
1928 */
1929static inline int skb_needs_linearize(struct sk_buff *skb,
1930 struct net_device *dev)
1931{
1932 return skb_is_nonlinear(skb) &&
1933 ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
1934 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
1935 illegal_highdma(dev, skb))));
1898} 1936}
1899 1937
1900int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 1938int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -1921,6 +1959,22 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1921 goto out_kfree_skb; 1959 goto out_kfree_skb;
1922 if (skb->next) 1960 if (skb->next)
1923 goto gso; 1961 goto gso;
1962 } else {
1963 if (skb_needs_linearize(skb, dev) &&
1964 __skb_linearize(skb))
1965 goto out_kfree_skb;
1966
1967 /* If packet is not checksummed and device does not
1968 * support checksumming for this protocol, complete
1969 * checksumming here.
1970 */
1971 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1972 skb_set_transport_header(skb, skb->csum_start -
1973 skb_headroom(skb));
1974 if (!dev_can_checksum(dev, skb) &&
1975 skb_checksum_help(skb))
1976 goto out_kfree_skb;
1977 }
1924 } 1978 }
1925 1979
1926 rc = ops->ndo_start_xmit(skb, dev); 1980 rc = ops->ndo_start_xmit(skb, dev);
@@ -1980,8 +2034,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1980 if (skb->sk && skb->sk->sk_hash) 2034 if (skb->sk && skb->sk->sk_hash)
1981 hash = skb->sk->sk_hash; 2035 hash = skb->sk->sk_hash;
1982 else 2036 else
1983 hash = (__force u16) skb->protocol; 2037 hash = (__force u16) skb->protocol ^ skb->rxhash;
1984
1985 hash = jhash_1word(hash, hashrnd); 2038 hash = jhash_1word(hash, hashrnd);
1986 2039
1987 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); 2040 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
@@ -2004,12 +2057,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2004static struct netdev_queue *dev_pick_tx(struct net_device *dev, 2057static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2005 struct sk_buff *skb) 2058 struct sk_buff *skb)
2006{ 2059{
2007 u16 queue_index; 2060 int queue_index;
2008 struct sock *sk = skb->sk; 2061 struct sock *sk = skb->sk;
2009 2062
2010 if (sk_tx_queue_recorded(sk)) { 2063 queue_index = sk_tx_queue_get(sk);
2011 queue_index = sk_tx_queue_get(sk); 2064 if (queue_index < 0) {
2012 } else {
2013 const struct net_device_ops *ops = dev->netdev_ops; 2065 const struct net_device_ops *ops = dev->netdev_ops;
2014 2066
2015 if (ops->ndo_select_queue) { 2067 if (ops->ndo_select_queue) {
@@ -2038,14 +2090,24 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2038 struct netdev_queue *txq) 2090 struct netdev_queue *txq)
2039{ 2091{
2040 spinlock_t *root_lock = qdisc_lock(q); 2092 spinlock_t *root_lock = qdisc_lock(q);
2093 bool contended = qdisc_is_running(q);
2041 int rc; 2094 int rc;
2042 2095
2096 /*
2097 * Heuristic to force contended enqueues to serialize on a
2098 * separate lock before trying to get qdisc main lock.
2099 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2100 * and dequeue packets faster.
2101 */
2102 if (unlikely(contended))
2103 spin_lock(&q->busylock);
2104
2043 spin_lock(root_lock); 2105 spin_lock(root_lock);
2044 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { 2106 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2045 kfree_skb(skb); 2107 kfree_skb(skb);
2046 rc = NET_XMIT_DROP; 2108 rc = NET_XMIT_DROP;
2047 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && 2109 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2048 !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { 2110 qdisc_run_begin(q)) {
2049 /* 2111 /*
2050 * This is a work-conserving queue; there are no old skbs 2112 * This is a work-conserving queue; there are no old skbs
2051 * waiting to be sent out; and the qdisc is not running - 2113 * waiting to be sent out; and the qdisc is not running -
@@ -2054,37 +2116,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2054 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) 2116 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2055 skb_dst_force(skb); 2117 skb_dst_force(skb);
2056 __qdisc_update_bstats(q, skb->len); 2118 __qdisc_update_bstats(q, skb->len);
2057 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) 2119 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2120 if (unlikely(contended)) {
2121 spin_unlock(&q->busylock);
2122 contended = false;
2123 }
2058 __qdisc_run(q); 2124 __qdisc_run(q);
2059 else 2125 } else
2060 clear_bit(__QDISC_STATE_RUNNING, &q->state); 2126 qdisc_run_end(q);
2061 2127
2062 rc = NET_XMIT_SUCCESS; 2128 rc = NET_XMIT_SUCCESS;
2063 } else { 2129 } else {
2064 skb_dst_force(skb); 2130 skb_dst_force(skb);
2065 rc = qdisc_enqueue_root(skb, q); 2131 rc = qdisc_enqueue_root(skb, q);
2066 qdisc_run(q); 2132 if (qdisc_run_begin(q)) {
2133 if (unlikely(contended)) {
2134 spin_unlock(&q->busylock);
2135 contended = false;
2136 }
2137 __qdisc_run(q);
2138 }
2067 } 2139 }
2068 spin_unlock(root_lock); 2140 spin_unlock(root_lock);
2069 2141 if (unlikely(contended))
2142 spin_unlock(&q->busylock);
2070 return rc; 2143 return rc;
2071} 2144}
2072 2145
2073/*
2074 * Returns true if either:
2075 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2076 * 2. skb is fragmented and the device does not support SG, or if
2077 * at least one of fragments is in highmem and device does not
2078 * support DMA from it.
2079 */
2080static inline int skb_needs_linearize(struct sk_buff *skb,
2081 struct net_device *dev)
2082{
2083 return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
2084 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
2085 illegal_highdma(dev, skb)));
2086}
2087
2088/** 2146/**
2089 * dev_queue_xmit - transmit a buffer 2147 * dev_queue_xmit - transmit a buffer
2090 * @skb: buffer to transmit 2148 * @skb: buffer to transmit
@@ -2117,25 +2175,6 @@ int dev_queue_xmit(struct sk_buff *skb)
2117 struct Qdisc *q; 2175 struct Qdisc *q;
2118 int rc = -ENOMEM; 2176 int rc = -ENOMEM;
2119 2177
2120 /* GSO will handle the following emulations directly. */
2121 if (netif_needs_gso(dev, skb))
2122 goto gso;
2123
2124 /* Convert a paged skb to linear, if required */
2125 if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
2126 goto out_kfree_skb;
2127
2128 /* If packet is not checksummed and device does not support
2129 * checksumming for this protocol, complete checksumming here.
2130 */
2131 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2132 skb_set_transport_header(skb, skb->csum_start -
2133 skb_headroom(skb));
2134 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
2135 goto out_kfree_skb;
2136 }
2137
2138gso:
2139 /* Disable soft irqs for various locks below. Also 2178 /* Disable soft irqs for various locks below. Also
2140 * stops preemption for RCU. 2179 * stops preemption for RCU.
2141 */ 2180 */
@@ -2194,7 +2233,6 @@ gso:
2194 rc = -ENETDOWN; 2233 rc = -ENETDOWN;
2195 rcu_read_unlock_bh(); 2234 rcu_read_unlock_bh();
2196 2235
2197out_kfree_skb:
2198 kfree_skb(skb); 2236 kfree_skb(skb);
2199 return rc; 2237 return rc;
2200out: 2238out:
@@ -2253,11 +2291,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2253 if (skb_rx_queue_recorded(skb)) { 2291 if (skb_rx_queue_recorded(skb)) {
2254 u16 index = skb_get_rx_queue(skb); 2292 u16 index = skb_get_rx_queue(skb);
2255 if (unlikely(index >= dev->num_rx_queues)) { 2293 if (unlikely(index >= dev->num_rx_queues)) {
2256 if (net_ratelimit()) { 2294 WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2257 pr_warning("%s received packet on queue " 2295 "on queue %u, but number of RX queues is %u\n",
2258 "%u, but number of RX queues is %u\n", 2296 dev->name, index, dev->num_rx_queues);
2259 dev->name, index, dev->num_rx_queues);
2260 }
2261 goto done; 2297 goto done;
2262 } 2298 }
2263 rxqueue = dev->_rx + index; 2299 rxqueue = dev->_rx + index;
@@ -2481,6 +2517,7 @@ int netif_rx(struct sk_buff *skb)
2481 struct rps_dev_flow voidflow, *rflow = &voidflow; 2517 struct rps_dev_flow voidflow, *rflow = &voidflow;
2482 int cpu; 2518 int cpu;
2483 2519
2520 preempt_disable();
2484 rcu_read_lock(); 2521 rcu_read_lock();
2485 2522
2486 cpu = get_rps_cpu(skb->dev, skb, &rflow); 2523 cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -2490,6 +2527,7 @@ int netif_rx(struct sk_buff *skb)
2490 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); 2527 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2491 2528
2492 rcu_read_unlock(); 2529 rcu_read_unlock();
2530 preempt_enable();
2493 } 2531 }
2494#else 2532#else
2495 { 2533 {
@@ -2581,70 +2619,14 @@ static inline int deliver_skb(struct sk_buff *skb,
2581 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 2619 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2582} 2620}
2583 2621
2584#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) 2622#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
2585 2623 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
2586#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2587/* This hook is defined here for ATM LANE */ 2624/* This hook is defined here for ATM LANE */
2588int (*br_fdb_test_addr_hook)(struct net_device *dev, 2625int (*br_fdb_test_addr_hook)(struct net_device *dev,
2589 unsigned char *addr) __read_mostly; 2626 unsigned char *addr) __read_mostly;
2590EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); 2627EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2591#endif 2628#endif
2592 2629
2593/*
2594 * If bridge module is loaded call bridging hook.
2595 * returns NULL if packet was consumed.
2596 */
2597struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2598 struct sk_buff *skb) __read_mostly;
2599EXPORT_SYMBOL_GPL(br_handle_frame_hook);
2600
2601static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2602 struct packet_type **pt_prev, int *ret,
2603 struct net_device *orig_dev)
2604{
2605 struct net_bridge_port *port;
2606
2607 if (skb->pkt_type == PACKET_LOOPBACK ||
2608 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2609 return skb;
2610
2611 if (*pt_prev) {
2612 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2613 *pt_prev = NULL;
2614 }
2615
2616 return br_handle_frame_hook(port, skb);
2617}
2618#else
2619#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
2620#endif
2621
2622#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2623struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
2624 struct sk_buff *skb) __read_mostly;
2625EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2626
2627static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2628 struct packet_type **pt_prev,
2629 int *ret,
2630 struct net_device *orig_dev)
2631{
2632 struct macvlan_port *port;
2633
2634 port = rcu_dereference(skb->dev->macvlan_port);
2635 if (!port)
2636 return skb;
2637
2638 if (*pt_prev) {
2639 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2640 *pt_prev = NULL;
2641 }
2642 return macvlan_handle_frame_hook(port, skb);
2643}
2644#else
2645#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2646#endif
2647
2648#ifdef CONFIG_NET_CLS_ACT 2630#ifdef CONFIG_NET_CLS_ACT
2649/* TODO: Maybe we should just force sch_ingress to be compiled in 2631/* TODO: Maybe we should just force sch_ingress to be compiled in
2650 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions 2632 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2662,10 +2644,10 @@ static int ing_filter(struct sk_buff *skb)
2662 int result = TC_ACT_OK; 2644 int result = TC_ACT_OK;
2663 struct Qdisc *q; 2645 struct Qdisc *q;
2664 2646
2665 if (MAX_RED_LOOP < ttl++) { 2647 if (unlikely(MAX_RED_LOOP < ttl++)) {
2666 printk(KERN_WARNING 2648 if (net_ratelimit())
2667 "Redir loop detected Dropping packet (%d->%d)\n", 2649 pr_warning( "Redir loop detected Dropping packet (%d->%d)\n",
2668 skb->skb_iif, dev->ifindex); 2650 skb->skb_iif, dev->ifindex);
2669 return TC_ACT_SHOT; 2651 return TC_ACT_SHOT;
2670 } 2652 }
2671 2653
@@ -2695,9 +2677,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2695 if (*pt_prev) { 2677 if (*pt_prev) {
2696 *ret = deliver_skb(skb, *pt_prev, orig_dev); 2678 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2697 *pt_prev = NULL; 2679 *pt_prev = NULL;
2698 } else {
2699 /* Huh? Why does turning on AF_PACKET affect this? */
2700 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2701 } 2680 }
2702 2681
2703 switch (ing_filter(skb)) { 2682 switch (ing_filter(skb)) {
@@ -2740,6 +2719,51 @@ void netif_nit_deliver(struct sk_buff *skb)
2740 rcu_read_unlock(); 2719 rcu_read_unlock();
2741} 2720}
2742 2721
2722/**
2723 * netdev_rx_handler_register - register receive handler
2724 * @dev: device to register a handler for
2725 * @rx_handler: receive handler to register
2726 * @rx_handler_data: data pointer that is used by rx handler
2727 *
2728 * Register a receive hander for a device. This handler will then be
2729 * called from __netif_receive_skb. A negative errno code is returned
2730 * on a failure.
2731 *
2732 * The caller must hold the rtnl_mutex.
2733 */
2734int netdev_rx_handler_register(struct net_device *dev,
2735 rx_handler_func_t *rx_handler,
2736 void *rx_handler_data)
2737{
2738 ASSERT_RTNL();
2739
2740 if (dev->rx_handler)
2741 return -EBUSY;
2742
2743 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
2744 rcu_assign_pointer(dev->rx_handler, rx_handler);
2745
2746 return 0;
2747}
2748EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
2749
2750/**
2751 * netdev_rx_handler_unregister - unregister receive handler
2752 * @dev: device to unregister a handler from
2753 *
2754 * Unregister a receive hander from a device.
2755 *
2756 * The caller must hold the rtnl_mutex.
2757 */
2758void netdev_rx_handler_unregister(struct net_device *dev)
2759{
2760
2761 ASSERT_RTNL();
2762 rcu_assign_pointer(dev->rx_handler, NULL);
2763 rcu_assign_pointer(dev->rx_handler_data, NULL);
2764}
2765EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
2766
2743static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, 2767static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
2744 struct net_device *master) 2768 struct net_device *master)
2745{ 2769{
@@ -2761,7 +2785,8 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
2761 if (master->priv_flags & IFF_MASTER_ARPMON) 2785 if (master->priv_flags & IFF_MASTER_ARPMON)
2762 dev->last_rx = jiffies; 2786 dev->last_rx = jiffies;
2763 2787
2764 if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) { 2788 if ((master->priv_flags & IFF_MASTER_ALB) &&
2789 (master->priv_flags & IFF_BRIDGE_PORT)) {
2765 /* Do address unmangle. The local destination address 2790 /* Do address unmangle. The local destination address
2766 * will be always the one master has. Provides the right 2791 * will be always the one master has. Provides the right
2767 * functionality in a bridge. 2792 * functionality in a bridge.
@@ -2792,6 +2817,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
2792static int __netif_receive_skb(struct sk_buff *skb) 2817static int __netif_receive_skb(struct sk_buff *skb)
2793{ 2818{
2794 struct packet_type *ptype, *pt_prev; 2819 struct packet_type *ptype, *pt_prev;
2820 rx_handler_func_t *rx_handler;
2795 struct net_device *orig_dev; 2821 struct net_device *orig_dev;
2796 struct net_device *master; 2822 struct net_device *master;
2797 struct net_device *null_or_orig; 2823 struct net_device *null_or_orig;
@@ -2812,18 +2838,28 @@ static int __netif_receive_skb(struct sk_buff *skb)
2812 if (!skb->skb_iif) 2838 if (!skb->skb_iif)
2813 skb->skb_iif = skb->dev->ifindex; 2839 skb->skb_iif = skb->dev->ifindex;
2814 2840
2841 /*
2842 * bonding note: skbs received on inactive slaves should only
2843 * be delivered to pkt handlers that are exact matches. Also
2844 * the deliver_no_wcard flag will be set. If packet handlers
2845 * are sensitive to duplicate packets these skbs will need to
2846 * be dropped at the handler. The vlan accel path may have
2847 * already set the deliver_no_wcard flag.
2848 */
2815 null_or_orig = NULL; 2849 null_or_orig = NULL;
2816 orig_dev = skb->dev; 2850 orig_dev = skb->dev;
2817 master = ACCESS_ONCE(orig_dev->master); 2851 master = ACCESS_ONCE(orig_dev->master);
2818 if (master) { 2852 if (skb->deliver_no_wcard)
2819 if (skb_bond_should_drop(skb, master)) 2853 null_or_orig = orig_dev;
2854 else if (master) {
2855 if (skb_bond_should_drop(skb, master)) {
2856 skb->deliver_no_wcard = 1;
2820 null_or_orig = orig_dev; /* deliver only exact match */ 2857 null_or_orig = orig_dev; /* deliver only exact match */
2821 else 2858 } else
2822 skb->dev = master; 2859 skb->dev = master;
2823 } 2860 }
2824 2861
2825 __get_cpu_var(softnet_data).processed++; 2862 __this_cpu_inc(softnet_data.processed);
2826
2827 skb_reset_network_header(skb); 2863 skb_reset_network_header(skb);
2828 skb_reset_transport_header(skb); 2864 skb_reset_transport_header(skb);
2829 skb->mac_len = skb->network_header - skb->mac_header; 2865 skb->mac_len = skb->network_header - skb->mac_header;
@@ -2855,12 +2891,17 @@ static int __netif_receive_skb(struct sk_buff *skb)
2855ncls: 2891ncls:
2856#endif 2892#endif
2857 2893
2858 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); 2894 /* Handle special case of bridge or macvlan */
2859 if (!skb) 2895 rx_handler = rcu_dereference(skb->dev->rx_handler);
2860 goto out; 2896 if (rx_handler) {
2861 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); 2897 if (pt_prev) {
2862 if (!skb) 2898 ret = deliver_skb(skb, pt_prev, orig_dev);
2863 goto out; 2899 pt_prev = NULL;
2900 }
2901 skb = rx_handler(skb);
2902 if (!skb)
2903 goto out;
2904 }
2864 2905
2865 /* 2906 /*
2866 * Make sure frames received on VLAN interfaces stacked on 2907 * Make sure frames received on VLAN interfaces stacked on
@@ -2921,6 +2962,9 @@ int netif_receive_skb(struct sk_buff *skb)
2921 if (netdev_tstamp_prequeue) 2962 if (netdev_tstamp_prequeue)
2922 net_timestamp_check(skb); 2963 net_timestamp_check(skb);
2923 2964
2965 if (skb_defer_rx_timestamp(skb))
2966 return NET_RX_SUCCESS;
2967
2924#ifdef CONFIG_RPS 2968#ifdef CONFIG_RPS
2925 { 2969 {
2926 struct rps_dev_flow voidflow, *rflow = &voidflow; 2970 struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -3030,7 +3074,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3030 int mac_len; 3074 int mac_len;
3031 enum gro_result ret; 3075 enum gro_result ret;
3032 3076
3033 if (!(skb->dev->features & NETIF_F_GRO)) 3077 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3034 goto normal; 3078 goto normal;
3035 3079
3036 if (skb_is_gso(skb) || skb_has_frags(skb)) 3080 if (skb_is_gso(skb) || skb_has_frags(skb))
@@ -3117,9 +3161,6 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3117{ 3161{
3118 struct sk_buff *p; 3162 struct sk_buff *p;
3119 3163
3120 if (netpoll_rx_on(skb))
3121 return GRO_NORMAL;
3122
3123 for (p = napi->gro_list; p; p = p->next) { 3164 for (p = napi->gro_list; p; p = p->next) {
3124 NAPI_GRO_CB(p)->same_flow = 3165 NAPI_GRO_CB(p)->same_flow =
3125 (p->dev == skb->dev) && 3166 (p->dev == skb->dev) &&
@@ -3685,10 +3726,11 @@ void dev_seq_stop(struct seq_file *seq, void *v)
3685 3726
3686static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 3727static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3687{ 3728{
3688 const struct net_device_stats *stats = dev_get_stats(dev); 3729 struct rtnl_link_stats64 temp;
3730 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
3689 3731
3690 seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " 3732 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
3691 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", 3733 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
3692 dev->name, stats->rx_bytes, stats->rx_packets, 3734 dev->name, stats->rx_bytes, stats->rx_packets,
3693 stats->rx_errors, 3735 stats->rx_errors,
3694 stats->rx_dropped + stats->rx_missed_errors, 3736 stats->rx_dropped + stats->rx_missed_errors,
@@ -5237,20 +5279,22 @@ void netdev_run_todo(void)
5237/** 5279/**
5238 * dev_txq_stats_fold - fold tx_queues stats 5280 * dev_txq_stats_fold - fold tx_queues stats
5239 * @dev: device to get statistics from 5281 * @dev: device to get statistics from
5240 * @stats: struct net_device_stats to hold results 5282 * @stats: struct rtnl_link_stats64 to hold results
5241 */ 5283 */
5242void dev_txq_stats_fold(const struct net_device *dev, 5284void dev_txq_stats_fold(const struct net_device *dev,
5243 struct net_device_stats *stats) 5285 struct rtnl_link_stats64 *stats)
5244{ 5286{
5245 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; 5287 u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5246 unsigned int i; 5288 unsigned int i;
5247 struct netdev_queue *txq; 5289 struct netdev_queue *txq;
5248 5290
5249 for (i = 0; i < dev->num_tx_queues; i++) { 5291 for (i = 0; i < dev->num_tx_queues; i++) {
5250 txq = netdev_get_tx_queue(dev, i); 5292 txq = netdev_get_tx_queue(dev, i);
5293 spin_lock_bh(&txq->_xmit_lock);
5251 tx_bytes += txq->tx_bytes; 5294 tx_bytes += txq->tx_bytes;
5252 tx_packets += txq->tx_packets; 5295 tx_packets += txq->tx_packets;
5253 tx_dropped += txq->tx_dropped; 5296 tx_dropped += txq->tx_dropped;
5297 spin_unlock_bh(&txq->_xmit_lock);
5254 } 5298 }
5255 if (tx_bytes || tx_packets || tx_dropped) { 5299 if (tx_bytes || tx_packets || tx_dropped) {
5256 stats->tx_bytes = tx_bytes; 5300 stats->tx_bytes = tx_bytes;
@@ -5260,23 +5304,53 @@ void dev_txq_stats_fold(const struct net_device *dev,
5260} 5304}
5261EXPORT_SYMBOL(dev_txq_stats_fold); 5305EXPORT_SYMBOL(dev_txq_stats_fold);
5262 5306
5307/* Convert net_device_stats to rtnl_link_stats64. They have the same
5308 * fields in the same order, with only the type differing.
5309 */
5310static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
5311 const struct net_device_stats *netdev_stats)
5312{
5313#if BITS_PER_LONG == 64
5314 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
5315 memcpy(stats64, netdev_stats, sizeof(*stats64));
5316#else
5317 size_t i, n = sizeof(*stats64) / sizeof(u64);
5318 const unsigned long *src = (const unsigned long *)netdev_stats;
5319 u64 *dst = (u64 *)stats64;
5320
5321 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
5322 sizeof(*stats64) / sizeof(u64));
5323 for (i = 0; i < n; i++)
5324 dst[i] = src[i];
5325#endif
5326}
5327
5263/** 5328/**
5264 * dev_get_stats - get network device statistics 5329 * dev_get_stats - get network device statistics
5265 * @dev: device to get statistics from 5330 * @dev: device to get statistics from
5331 * @storage: place to store stats
5266 * 5332 *
5267 * Get network statistics from device. The device driver may provide 5333 * Get network statistics from device. Return @storage.
5268 * its own method by setting dev->netdev_ops->get_stats; otherwise 5334 * The device driver may provide its own method by setting
5269 * the internal statistics structure is used. 5335 * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
5336 * otherwise the internal statistics structure is used.
5270 */ 5337 */
5271const struct net_device_stats *dev_get_stats(struct net_device *dev) 5338struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5339 struct rtnl_link_stats64 *storage)
5272{ 5340{
5273 const struct net_device_ops *ops = dev->netdev_ops; 5341 const struct net_device_ops *ops = dev->netdev_ops;
5274 5342
5275 if (ops->ndo_get_stats) 5343 if (ops->ndo_get_stats64) {
5276 return ops->ndo_get_stats(dev); 5344 memset(storage, 0, sizeof(*storage));
5277 5345 return ops->ndo_get_stats64(dev, storage);
5278 dev_txq_stats_fold(dev, &dev->stats); 5346 }
5279 return &dev->stats; 5347 if (ops->ndo_get_stats) {
5348 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5349 return storage;
5350 }
5351 netdev_stats_to_stats64(storage, &dev->stats);
5352 dev_txq_stats_fold(dev, storage);
5353 return storage;
5280} 5354}
5281EXPORT_SYMBOL(dev_get_stats); 5355EXPORT_SYMBOL(dev_get_stats);
5282 5356
@@ -5781,6 +5855,68 @@ char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5781 return buffer; 5855 return buffer;
5782} 5856}
5783 5857
5858static int __netdev_printk(const char *level, const struct net_device *dev,
5859 struct va_format *vaf)
5860{
5861 int r;
5862
5863 if (dev && dev->dev.parent)
5864 r = dev_printk(level, dev->dev.parent, "%s: %pV",
5865 netdev_name(dev), vaf);
5866 else if (dev)
5867 r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
5868 else
5869 r = printk("%s(NULL net_device): %pV", level, vaf);
5870
5871 return r;
5872}
5873
5874int netdev_printk(const char *level, const struct net_device *dev,
5875 const char *format, ...)
5876{
5877 struct va_format vaf;
5878 va_list args;
5879 int r;
5880
5881 va_start(args, format);
5882
5883 vaf.fmt = format;
5884 vaf.va = &args;
5885
5886 r = __netdev_printk(level, dev, &vaf);
5887 va_end(args);
5888
5889 return r;
5890}
5891EXPORT_SYMBOL(netdev_printk);
5892
5893#define define_netdev_printk_level(func, level) \
5894int func(const struct net_device *dev, const char *fmt, ...) \
5895{ \
5896 int r; \
5897 struct va_format vaf; \
5898 va_list args; \
5899 \
5900 va_start(args, fmt); \
5901 \
5902 vaf.fmt = fmt; \
5903 vaf.va = &args; \
5904 \
5905 r = __netdev_printk(level, dev, &vaf); \
5906 va_end(args); \
5907 \
5908 return r; \
5909} \
5910EXPORT_SYMBOL(func);
5911
5912define_netdev_printk_level(netdev_emerg, KERN_EMERG);
5913define_netdev_printk_level(netdev_alert, KERN_ALERT);
5914define_netdev_printk_level(netdev_crit, KERN_CRIT);
5915define_netdev_printk_level(netdev_err, KERN_ERR);
5916define_netdev_printk_level(netdev_warn, KERN_WARNING);
5917define_netdev_printk_level(netdev_notice, KERN_NOTICE);
5918define_netdev_printk_level(netdev_info, KERN_INFO);
5919
5784static void __net_exit netdev_exit(struct net *net) 5920static void __net_exit netdev_exit(struct net *net)
5785{ 5921{
5786 kfree(net->dev_name_head); 5922 kfree(net->dev_name_head);