diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 458 |
1 files changed, 297 insertions, 161 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index d03470f5260a..1ae654391442 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -101,8 +101,6 @@ | |||
101 | #include <linux/proc_fs.h> | 101 | #include <linux/proc_fs.h> |
102 | #include <linux/seq_file.h> | 102 | #include <linux/seq_file.h> |
103 | #include <linux/stat.h> | 103 | #include <linux/stat.h> |
104 | #include <linux/if_bridge.h> | ||
105 | #include <linux/if_macvlan.h> | ||
106 | #include <net/dst.h> | 104 | #include <net/dst.h> |
107 | #include <net/pkt_sched.h> | 105 | #include <net/pkt_sched.h> |
108 | #include <net/checksum.h> | 106 | #include <net/checksum.h> |
@@ -803,35 +801,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) | |||
803 | EXPORT_SYMBOL(dev_getfirstbyhwtype); | 801 | EXPORT_SYMBOL(dev_getfirstbyhwtype); |
804 | 802 | ||
805 | /** | 803 | /** |
806 | * dev_get_by_flags - find any device with given flags | 804 | * dev_get_by_flags_rcu - find any device with given flags |
807 | * @net: the applicable net namespace | 805 | * @net: the applicable net namespace |
808 | * @if_flags: IFF_* values | 806 | * @if_flags: IFF_* values |
809 | * @mask: bitmask of bits in if_flags to check | 807 | * @mask: bitmask of bits in if_flags to check |
810 | * | 808 | * |
811 | * Search for any interface with the given flags. Returns NULL if a device | 809 | * Search for any interface with the given flags. Returns NULL if a device |
812 | * is not found or a pointer to the device. The device returned has | 810 | * is not found or a pointer to the device. Must be called inside |
813 | * had a reference added and the pointer is safe until the user calls | 811 | * rcu_read_lock(), and result refcount is unchanged. |
814 | * dev_put to indicate they have finished with it. | ||
815 | */ | 812 | */ |
816 | 813 | ||
817 | struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, | 814 | struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, |
818 | unsigned short mask) | 815 | unsigned short mask) |
819 | { | 816 | { |
820 | struct net_device *dev, *ret; | 817 | struct net_device *dev, *ret; |
821 | 818 | ||
822 | ret = NULL; | 819 | ret = NULL; |
823 | rcu_read_lock(); | ||
824 | for_each_netdev_rcu(net, dev) { | 820 | for_each_netdev_rcu(net, dev) { |
825 | if (((dev->flags ^ if_flags) & mask) == 0) { | 821 | if (((dev->flags ^ if_flags) & mask) == 0) { |
826 | dev_hold(dev); | ||
827 | ret = dev; | 822 | ret = dev; |
828 | break; | 823 | break; |
829 | } | 824 | } |
830 | } | 825 | } |
831 | rcu_read_unlock(); | ||
832 | return ret; | 826 | return ret; |
833 | } | 827 | } |
834 | EXPORT_SYMBOL(dev_get_by_flags); | 828 | EXPORT_SYMBOL(dev_get_by_flags_rcu); |
835 | 829 | ||
836 | /** | 830 | /** |
837 | * dev_valid_name - check if name is okay for network device | 831 | * dev_valid_name - check if name is okay for network device |
@@ -1488,6 +1482,7 @@ static inline void net_timestamp_check(struct sk_buff *skb) | |||
1488 | int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | 1482 | int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) |
1489 | { | 1483 | { |
1490 | skb_orphan(skb); | 1484 | skb_orphan(skb); |
1485 | nf_reset(skb); | ||
1491 | 1486 | ||
1492 | if (!(dev->flags & IFF_UP) || | 1487 | if (!(dev->flags & IFF_UP) || |
1493 | (skb->len > (dev->mtu + dev->hard_header_len))) { | 1488 | (skb->len > (dev->mtu + dev->hard_header_len))) { |
@@ -1541,7 +1536,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1541 | if (net_ratelimit()) | 1536 | if (net_ratelimit()) |
1542 | printk(KERN_CRIT "protocol %04x is " | 1537 | printk(KERN_CRIT "protocol %04x is " |
1543 | "buggy, dev %s\n", | 1538 | "buggy, dev %s\n", |
1544 | skb2->protocol, dev->name); | 1539 | ntohs(skb2->protocol), |
1540 | dev->name); | ||
1545 | skb_reset_network_header(skb2); | 1541 | skb_reset_network_header(skb2); |
1546 | } | 1542 | } |
1547 | 1543 | ||
@@ -1553,6 +1549,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1553 | rcu_read_unlock(); | 1549 | rcu_read_unlock(); |
1554 | } | 1550 | } |
1555 | 1551 | ||
1552 | /* | ||
1553 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues | ||
1554 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. | ||
1555 | */ | ||
1556 | void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) | ||
1557 | { | ||
1558 | unsigned int real_num = dev->real_num_tx_queues; | ||
1559 | |||
1560 | if (unlikely(txq > dev->num_tx_queues)) | ||
1561 | ; | ||
1562 | else if (txq > real_num) | ||
1563 | dev->real_num_tx_queues = txq; | ||
1564 | else if (txq < real_num) { | ||
1565 | dev->real_num_tx_queues = txq; | ||
1566 | qdisc_reset_all_tx_gt(dev, txq); | ||
1567 | } | ||
1568 | } | ||
1569 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); | ||
1556 | 1570 | ||
1557 | static inline void __netif_reschedule(struct Qdisc *q) | 1571 | static inline void __netif_reschedule(struct Qdisc *q) |
1558 | { | 1572 | { |
@@ -1893,8 +1907,32 @@ static int dev_gso_segment(struct sk_buff *skb) | |||
1893 | */ | 1907 | */ |
1894 | static inline void skb_orphan_try(struct sk_buff *skb) | 1908 | static inline void skb_orphan_try(struct sk_buff *skb) |
1895 | { | 1909 | { |
1896 | if (!skb_tx(skb)->flags) | 1910 | struct sock *sk = skb->sk; |
1911 | |||
1912 | if (sk && !skb_tx(skb)->flags) { | ||
1913 | /* skb_tx_hash() wont be able to get sk. | ||
1914 | * We copy sk_hash into skb->rxhash | ||
1915 | */ | ||
1916 | if (!skb->rxhash) | ||
1917 | skb->rxhash = sk->sk_hash; | ||
1897 | skb_orphan(skb); | 1918 | skb_orphan(skb); |
1919 | } | ||
1920 | } | ||
1921 | |||
1922 | /* | ||
1923 | * Returns true if either: | ||
1924 | * 1. skb has frag_list and the device doesn't support FRAGLIST, or | ||
1925 | * 2. skb is fragmented and the device does not support SG, or if | ||
1926 | * at least one of fragments is in highmem and device does not | ||
1927 | * support DMA from it. | ||
1928 | */ | ||
1929 | static inline int skb_needs_linearize(struct sk_buff *skb, | ||
1930 | struct net_device *dev) | ||
1931 | { | ||
1932 | return skb_is_nonlinear(skb) && | ||
1933 | ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || | ||
1934 | (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || | ||
1935 | illegal_highdma(dev, skb)))); | ||
1898 | } | 1936 | } |
1899 | 1937 | ||
1900 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | 1938 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, |
@@ -1921,6 +1959,22 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1921 | goto out_kfree_skb; | 1959 | goto out_kfree_skb; |
1922 | if (skb->next) | 1960 | if (skb->next) |
1923 | goto gso; | 1961 | goto gso; |
1962 | } else { | ||
1963 | if (skb_needs_linearize(skb, dev) && | ||
1964 | __skb_linearize(skb)) | ||
1965 | goto out_kfree_skb; | ||
1966 | |||
1967 | /* If packet is not checksummed and device does not | ||
1968 | * support checksumming for this protocol, complete | ||
1969 | * checksumming here. | ||
1970 | */ | ||
1971 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
1972 | skb_set_transport_header(skb, skb->csum_start - | ||
1973 | skb_headroom(skb)); | ||
1974 | if (!dev_can_checksum(dev, skb) && | ||
1975 | skb_checksum_help(skb)) | ||
1976 | goto out_kfree_skb; | ||
1977 | } | ||
1924 | } | 1978 | } |
1925 | 1979 | ||
1926 | rc = ops->ndo_start_xmit(skb, dev); | 1980 | rc = ops->ndo_start_xmit(skb, dev); |
@@ -1980,8 +2034,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) | |||
1980 | if (skb->sk && skb->sk->sk_hash) | 2034 | if (skb->sk && skb->sk->sk_hash) |
1981 | hash = skb->sk->sk_hash; | 2035 | hash = skb->sk->sk_hash; |
1982 | else | 2036 | else |
1983 | hash = (__force u16) skb->protocol; | 2037 | hash = (__force u16) skb->protocol ^ skb->rxhash; |
1984 | |||
1985 | hash = jhash_1word(hash, hashrnd); | 2038 | hash = jhash_1word(hash, hashrnd); |
1986 | 2039 | ||
1987 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); | 2040 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); |
@@ -2004,12 +2057,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | |||
2004 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, | 2057 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, |
2005 | struct sk_buff *skb) | 2058 | struct sk_buff *skb) |
2006 | { | 2059 | { |
2007 | u16 queue_index; | 2060 | int queue_index; |
2008 | struct sock *sk = skb->sk; | 2061 | struct sock *sk = skb->sk; |
2009 | 2062 | ||
2010 | if (sk_tx_queue_recorded(sk)) { | 2063 | queue_index = sk_tx_queue_get(sk); |
2011 | queue_index = sk_tx_queue_get(sk); | 2064 | if (queue_index < 0) { |
2012 | } else { | ||
2013 | const struct net_device_ops *ops = dev->netdev_ops; | 2065 | const struct net_device_ops *ops = dev->netdev_ops; |
2014 | 2066 | ||
2015 | if (ops->ndo_select_queue) { | 2067 | if (ops->ndo_select_queue) { |
@@ -2038,14 +2090,24 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2038 | struct netdev_queue *txq) | 2090 | struct netdev_queue *txq) |
2039 | { | 2091 | { |
2040 | spinlock_t *root_lock = qdisc_lock(q); | 2092 | spinlock_t *root_lock = qdisc_lock(q); |
2093 | bool contended = qdisc_is_running(q); | ||
2041 | int rc; | 2094 | int rc; |
2042 | 2095 | ||
2096 | /* | ||
2097 | * Heuristic to force contended enqueues to serialize on a | ||
2098 | * separate lock before trying to get qdisc main lock. | ||
2099 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often | ||
2100 | * and dequeue packets faster. | ||
2101 | */ | ||
2102 | if (unlikely(contended)) | ||
2103 | spin_lock(&q->busylock); | ||
2104 | |||
2043 | spin_lock(root_lock); | 2105 | spin_lock(root_lock); |
2044 | if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { | 2106 | if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { |
2045 | kfree_skb(skb); | 2107 | kfree_skb(skb); |
2046 | rc = NET_XMIT_DROP; | 2108 | rc = NET_XMIT_DROP; |
2047 | } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && | 2109 | } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && |
2048 | !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { | 2110 | qdisc_run_begin(q)) { |
2049 | /* | 2111 | /* |
2050 | * This is a work-conserving queue; there are no old skbs | 2112 | * This is a work-conserving queue; there are no old skbs |
2051 | * waiting to be sent out; and the qdisc is not running - | 2113 | * waiting to be sent out; and the qdisc is not running - |
@@ -2054,37 +2116,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2054 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) | 2116 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) |
2055 | skb_dst_force(skb); | 2117 | skb_dst_force(skb); |
2056 | __qdisc_update_bstats(q, skb->len); | 2118 | __qdisc_update_bstats(q, skb->len); |
2057 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) | 2119 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { |
2120 | if (unlikely(contended)) { | ||
2121 | spin_unlock(&q->busylock); | ||
2122 | contended = false; | ||
2123 | } | ||
2058 | __qdisc_run(q); | 2124 | __qdisc_run(q); |
2059 | else | 2125 | } else |
2060 | clear_bit(__QDISC_STATE_RUNNING, &q->state); | 2126 | qdisc_run_end(q); |
2061 | 2127 | ||
2062 | rc = NET_XMIT_SUCCESS; | 2128 | rc = NET_XMIT_SUCCESS; |
2063 | } else { | 2129 | } else { |
2064 | skb_dst_force(skb); | 2130 | skb_dst_force(skb); |
2065 | rc = qdisc_enqueue_root(skb, q); | 2131 | rc = qdisc_enqueue_root(skb, q); |
2066 | qdisc_run(q); | 2132 | if (qdisc_run_begin(q)) { |
2133 | if (unlikely(contended)) { | ||
2134 | spin_unlock(&q->busylock); | ||
2135 | contended = false; | ||
2136 | } | ||
2137 | __qdisc_run(q); | ||
2138 | } | ||
2067 | } | 2139 | } |
2068 | spin_unlock(root_lock); | 2140 | spin_unlock(root_lock); |
2069 | 2141 | if (unlikely(contended)) | |
2142 | spin_unlock(&q->busylock); | ||
2070 | return rc; | 2143 | return rc; |
2071 | } | 2144 | } |
2072 | 2145 | ||
2073 | /* | ||
2074 | * Returns true if either: | ||
2075 | * 1. skb has frag_list and the device doesn't support FRAGLIST, or | ||
2076 | * 2. skb is fragmented and the device does not support SG, or if | ||
2077 | * at least one of fragments is in highmem and device does not | ||
2078 | * support DMA from it. | ||
2079 | */ | ||
2080 | static inline int skb_needs_linearize(struct sk_buff *skb, | ||
2081 | struct net_device *dev) | ||
2082 | { | ||
2083 | return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || | ||
2084 | (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || | ||
2085 | illegal_highdma(dev, skb))); | ||
2086 | } | ||
2087 | |||
2088 | /** | 2146 | /** |
2089 | * dev_queue_xmit - transmit a buffer | 2147 | * dev_queue_xmit - transmit a buffer |
2090 | * @skb: buffer to transmit | 2148 | * @skb: buffer to transmit |
@@ -2117,25 +2175,6 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2117 | struct Qdisc *q; | 2175 | struct Qdisc *q; |
2118 | int rc = -ENOMEM; | 2176 | int rc = -ENOMEM; |
2119 | 2177 | ||
2120 | /* GSO will handle the following emulations directly. */ | ||
2121 | if (netif_needs_gso(dev, skb)) | ||
2122 | goto gso; | ||
2123 | |||
2124 | /* Convert a paged skb to linear, if required */ | ||
2125 | if (skb_needs_linearize(skb, dev) && __skb_linearize(skb)) | ||
2126 | goto out_kfree_skb; | ||
2127 | |||
2128 | /* If packet is not checksummed and device does not support | ||
2129 | * checksumming for this protocol, complete checksumming here. | ||
2130 | */ | ||
2131 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
2132 | skb_set_transport_header(skb, skb->csum_start - | ||
2133 | skb_headroom(skb)); | ||
2134 | if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) | ||
2135 | goto out_kfree_skb; | ||
2136 | } | ||
2137 | |||
2138 | gso: | ||
2139 | /* Disable soft irqs for various locks below. Also | 2178 | /* Disable soft irqs for various locks below. Also |
2140 | * stops preemption for RCU. | 2179 | * stops preemption for RCU. |
2141 | */ | 2180 | */ |
@@ -2194,7 +2233,6 @@ gso: | |||
2194 | rc = -ENETDOWN; | 2233 | rc = -ENETDOWN; |
2195 | rcu_read_unlock_bh(); | 2234 | rcu_read_unlock_bh(); |
2196 | 2235 | ||
2197 | out_kfree_skb: | ||
2198 | kfree_skb(skb); | 2236 | kfree_skb(skb); |
2199 | return rc; | 2237 | return rc; |
2200 | out: | 2238 | out: |
@@ -2253,11 +2291,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
2253 | if (skb_rx_queue_recorded(skb)) { | 2291 | if (skb_rx_queue_recorded(skb)) { |
2254 | u16 index = skb_get_rx_queue(skb); | 2292 | u16 index = skb_get_rx_queue(skb); |
2255 | if (unlikely(index >= dev->num_rx_queues)) { | 2293 | if (unlikely(index >= dev->num_rx_queues)) { |
2256 | if (net_ratelimit()) { | 2294 | WARN_ONCE(dev->num_rx_queues > 1, "%s received packet " |
2257 | pr_warning("%s received packet on queue " | 2295 | "on queue %u, but number of RX queues is %u\n", |
2258 | "%u, but number of RX queues is %u\n", | 2296 | dev->name, index, dev->num_rx_queues); |
2259 | dev->name, index, dev->num_rx_queues); | ||
2260 | } | ||
2261 | goto done; | 2297 | goto done; |
2262 | } | 2298 | } |
2263 | rxqueue = dev->_rx + index; | 2299 | rxqueue = dev->_rx + index; |
@@ -2481,6 +2517,7 @@ int netif_rx(struct sk_buff *skb) | |||
2481 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 2517 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
2482 | int cpu; | 2518 | int cpu; |
2483 | 2519 | ||
2520 | preempt_disable(); | ||
2484 | rcu_read_lock(); | 2521 | rcu_read_lock(); |
2485 | 2522 | ||
2486 | cpu = get_rps_cpu(skb->dev, skb, &rflow); | 2523 | cpu = get_rps_cpu(skb->dev, skb, &rflow); |
@@ -2490,6 +2527,7 @@ int netif_rx(struct sk_buff *skb) | |||
2490 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); | 2527 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
2491 | 2528 | ||
2492 | rcu_read_unlock(); | 2529 | rcu_read_unlock(); |
2530 | preempt_enable(); | ||
2493 | } | 2531 | } |
2494 | #else | 2532 | #else |
2495 | { | 2533 | { |
@@ -2581,70 +2619,14 @@ static inline int deliver_skb(struct sk_buff *skb, | |||
2581 | return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | 2619 | return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
2582 | } | 2620 | } |
2583 | 2621 | ||
2584 | #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) | 2622 | #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ |
2585 | 2623 | (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) | |
2586 | #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) | ||
2587 | /* This hook is defined here for ATM LANE */ | 2624 | /* This hook is defined here for ATM LANE */ |
2588 | int (*br_fdb_test_addr_hook)(struct net_device *dev, | 2625 | int (*br_fdb_test_addr_hook)(struct net_device *dev, |
2589 | unsigned char *addr) __read_mostly; | 2626 | unsigned char *addr) __read_mostly; |
2590 | EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); | 2627 | EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); |
2591 | #endif | 2628 | #endif |
2592 | 2629 | ||
2593 | /* | ||
2594 | * If bridge module is loaded call bridging hook. | ||
2595 | * returns NULL if packet was consumed. | ||
2596 | */ | ||
2597 | struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, | ||
2598 | struct sk_buff *skb) __read_mostly; | ||
2599 | EXPORT_SYMBOL_GPL(br_handle_frame_hook); | ||
2600 | |||
2601 | static inline struct sk_buff *handle_bridge(struct sk_buff *skb, | ||
2602 | struct packet_type **pt_prev, int *ret, | ||
2603 | struct net_device *orig_dev) | ||
2604 | { | ||
2605 | struct net_bridge_port *port; | ||
2606 | |||
2607 | if (skb->pkt_type == PACKET_LOOPBACK || | ||
2608 | (port = rcu_dereference(skb->dev->br_port)) == NULL) | ||
2609 | return skb; | ||
2610 | |||
2611 | if (*pt_prev) { | ||
2612 | *ret = deliver_skb(skb, *pt_prev, orig_dev); | ||
2613 | *pt_prev = NULL; | ||
2614 | } | ||
2615 | |||
2616 | return br_handle_frame_hook(port, skb); | ||
2617 | } | ||
2618 | #else | ||
2619 | #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) | ||
2620 | #endif | ||
2621 | |||
2622 | #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) | ||
2623 | struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p, | ||
2624 | struct sk_buff *skb) __read_mostly; | ||
2625 | EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); | ||
2626 | |||
2627 | static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, | ||
2628 | struct packet_type **pt_prev, | ||
2629 | int *ret, | ||
2630 | struct net_device *orig_dev) | ||
2631 | { | ||
2632 | struct macvlan_port *port; | ||
2633 | |||
2634 | port = rcu_dereference(skb->dev->macvlan_port); | ||
2635 | if (!port) | ||
2636 | return skb; | ||
2637 | |||
2638 | if (*pt_prev) { | ||
2639 | *ret = deliver_skb(skb, *pt_prev, orig_dev); | ||
2640 | *pt_prev = NULL; | ||
2641 | } | ||
2642 | return macvlan_handle_frame_hook(port, skb); | ||
2643 | } | ||
2644 | #else | ||
2645 | #define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) | ||
2646 | #endif | ||
2647 | |||
2648 | #ifdef CONFIG_NET_CLS_ACT | 2630 | #ifdef CONFIG_NET_CLS_ACT |
2649 | /* TODO: Maybe we should just force sch_ingress to be compiled in | 2631 | /* TODO: Maybe we should just force sch_ingress to be compiled in |
2650 | * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions | 2632 | * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions |
@@ -2662,10 +2644,10 @@ static int ing_filter(struct sk_buff *skb) | |||
2662 | int result = TC_ACT_OK; | 2644 | int result = TC_ACT_OK; |
2663 | struct Qdisc *q; | 2645 | struct Qdisc *q; |
2664 | 2646 | ||
2665 | if (MAX_RED_LOOP < ttl++) { | 2647 | if (unlikely(MAX_RED_LOOP < ttl++)) { |
2666 | printk(KERN_WARNING | 2648 | if (net_ratelimit()) |
2667 | "Redir loop detected Dropping packet (%d->%d)\n", | 2649 | pr_warning( "Redir loop detected Dropping packet (%d->%d)\n", |
2668 | skb->skb_iif, dev->ifindex); | 2650 | skb->skb_iif, dev->ifindex); |
2669 | return TC_ACT_SHOT; | 2651 | return TC_ACT_SHOT; |
2670 | } | 2652 | } |
2671 | 2653 | ||
@@ -2695,9 +2677,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, | |||
2695 | if (*pt_prev) { | 2677 | if (*pt_prev) { |
2696 | *ret = deliver_skb(skb, *pt_prev, orig_dev); | 2678 | *ret = deliver_skb(skb, *pt_prev, orig_dev); |
2697 | *pt_prev = NULL; | 2679 | *pt_prev = NULL; |
2698 | } else { | ||
2699 | /* Huh? Why does turning on AF_PACKET affect this? */ | ||
2700 | skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); | ||
2701 | } | 2680 | } |
2702 | 2681 | ||
2703 | switch (ing_filter(skb)) { | 2682 | switch (ing_filter(skb)) { |
@@ -2740,6 +2719,51 @@ void netif_nit_deliver(struct sk_buff *skb) | |||
2740 | rcu_read_unlock(); | 2719 | rcu_read_unlock(); |
2741 | } | 2720 | } |
2742 | 2721 | ||
2722 | /** | ||
2723 | * netdev_rx_handler_register - register receive handler | ||
2724 | * @dev: device to register a handler for | ||
2725 | * @rx_handler: receive handler to register | ||
2726 | * @rx_handler_data: data pointer that is used by rx handler | ||
2727 | * | ||
2728 | * Register a receive hander for a device. This handler will then be | ||
2729 | * called from __netif_receive_skb. A negative errno code is returned | ||
2730 | * on a failure. | ||
2731 | * | ||
2732 | * The caller must hold the rtnl_mutex. | ||
2733 | */ | ||
2734 | int netdev_rx_handler_register(struct net_device *dev, | ||
2735 | rx_handler_func_t *rx_handler, | ||
2736 | void *rx_handler_data) | ||
2737 | { | ||
2738 | ASSERT_RTNL(); | ||
2739 | |||
2740 | if (dev->rx_handler) | ||
2741 | return -EBUSY; | ||
2742 | |||
2743 | rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); | ||
2744 | rcu_assign_pointer(dev->rx_handler, rx_handler); | ||
2745 | |||
2746 | return 0; | ||
2747 | } | ||
2748 | EXPORT_SYMBOL_GPL(netdev_rx_handler_register); | ||
2749 | |||
2750 | /** | ||
2751 | * netdev_rx_handler_unregister - unregister receive handler | ||
2752 | * @dev: device to unregister a handler from | ||
2753 | * | ||
2754 | * Unregister a receive hander from a device. | ||
2755 | * | ||
2756 | * The caller must hold the rtnl_mutex. | ||
2757 | */ | ||
2758 | void netdev_rx_handler_unregister(struct net_device *dev) | ||
2759 | { | ||
2760 | |||
2761 | ASSERT_RTNL(); | ||
2762 | rcu_assign_pointer(dev->rx_handler, NULL); | ||
2763 | rcu_assign_pointer(dev->rx_handler_data, NULL); | ||
2764 | } | ||
2765 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); | ||
2766 | |||
2743 | static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, | 2767 | static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, |
2744 | struct net_device *master) | 2768 | struct net_device *master) |
2745 | { | 2769 | { |
@@ -2761,7 +2785,8 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) | |||
2761 | if (master->priv_flags & IFF_MASTER_ARPMON) | 2785 | if (master->priv_flags & IFF_MASTER_ARPMON) |
2762 | dev->last_rx = jiffies; | 2786 | dev->last_rx = jiffies; |
2763 | 2787 | ||
2764 | if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) { | 2788 | if ((master->priv_flags & IFF_MASTER_ALB) && |
2789 | (master->priv_flags & IFF_BRIDGE_PORT)) { | ||
2765 | /* Do address unmangle. The local destination address | 2790 | /* Do address unmangle. The local destination address |
2766 | * will be always the one master has. Provides the right | 2791 | * will be always the one master has. Provides the right |
2767 | * functionality in a bridge. | 2792 | * functionality in a bridge. |
@@ -2792,6 +2817,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop); | |||
2792 | static int __netif_receive_skb(struct sk_buff *skb) | 2817 | static int __netif_receive_skb(struct sk_buff *skb) |
2793 | { | 2818 | { |
2794 | struct packet_type *ptype, *pt_prev; | 2819 | struct packet_type *ptype, *pt_prev; |
2820 | rx_handler_func_t *rx_handler; | ||
2795 | struct net_device *orig_dev; | 2821 | struct net_device *orig_dev; |
2796 | struct net_device *master; | 2822 | struct net_device *master; |
2797 | struct net_device *null_or_orig; | 2823 | struct net_device *null_or_orig; |
@@ -2812,18 +2838,28 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
2812 | if (!skb->skb_iif) | 2838 | if (!skb->skb_iif) |
2813 | skb->skb_iif = skb->dev->ifindex; | 2839 | skb->skb_iif = skb->dev->ifindex; |
2814 | 2840 | ||
2841 | /* | ||
2842 | * bonding note: skbs received on inactive slaves should only | ||
2843 | * be delivered to pkt handlers that are exact matches. Also | ||
2844 | * the deliver_no_wcard flag will be set. If packet handlers | ||
2845 | * are sensitive to duplicate packets these skbs will need to | ||
2846 | * be dropped at the handler. The vlan accel path may have | ||
2847 | * already set the deliver_no_wcard flag. | ||
2848 | */ | ||
2815 | null_or_orig = NULL; | 2849 | null_or_orig = NULL; |
2816 | orig_dev = skb->dev; | 2850 | orig_dev = skb->dev; |
2817 | master = ACCESS_ONCE(orig_dev->master); | 2851 | master = ACCESS_ONCE(orig_dev->master); |
2818 | if (master) { | 2852 | if (skb->deliver_no_wcard) |
2819 | if (skb_bond_should_drop(skb, master)) | 2853 | null_or_orig = orig_dev; |
2854 | else if (master) { | ||
2855 | if (skb_bond_should_drop(skb, master)) { | ||
2856 | skb->deliver_no_wcard = 1; | ||
2820 | null_or_orig = orig_dev; /* deliver only exact match */ | 2857 | null_or_orig = orig_dev; /* deliver only exact match */ |
2821 | else | 2858 | } else |
2822 | skb->dev = master; | 2859 | skb->dev = master; |
2823 | } | 2860 | } |
2824 | 2861 | ||
2825 | __get_cpu_var(softnet_data).processed++; | 2862 | __this_cpu_inc(softnet_data.processed); |
2826 | |||
2827 | skb_reset_network_header(skb); | 2863 | skb_reset_network_header(skb); |
2828 | skb_reset_transport_header(skb); | 2864 | skb_reset_transport_header(skb); |
2829 | skb->mac_len = skb->network_header - skb->mac_header; | 2865 | skb->mac_len = skb->network_header - skb->mac_header; |
@@ -2855,12 +2891,17 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
2855 | ncls: | 2891 | ncls: |
2856 | #endif | 2892 | #endif |
2857 | 2893 | ||
2858 | skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); | 2894 | /* Handle special case of bridge or macvlan */ |
2859 | if (!skb) | 2895 | rx_handler = rcu_dereference(skb->dev->rx_handler); |
2860 | goto out; | 2896 | if (rx_handler) { |
2861 | skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); | 2897 | if (pt_prev) { |
2862 | if (!skb) | 2898 | ret = deliver_skb(skb, pt_prev, orig_dev); |
2863 | goto out; | 2899 | pt_prev = NULL; |
2900 | } | ||
2901 | skb = rx_handler(skb); | ||
2902 | if (!skb) | ||
2903 | goto out; | ||
2904 | } | ||
2864 | 2905 | ||
2865 | /* | 2906 | /* |
2866 | * Make sure frames received on VLAN interfaces stacked on | 2907 | * Make sure frames received on VLAN interfaces stacked on |
@@ -2921,6 +2962,9 @@ int netif_receive_skb(struct sk_buff *skb) | |||
2921 | if (netdev_tstamp_prequeue) | 2962 | if (netdev_tstamp_prequeue) |
2922 | net_timestamp_check(skb); | 2963 | net_timestamp_check(skb); |
2923 | 2964 | ||
2965 | if (skb_defer_rx_timestamp(skb)) | ||
2966 | return NET_RX_SUCCESS; | ||
2967 | |||
2924 | #ifdef CONFIG_RPS | 2968 | #ifdef CONFIG_RPS |
2925 | { | 2969 | { |
2926 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 2970 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
@@ -3030,7 +3074,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
3030 | int mac_len; | 3074 | int mac_len; |
3031 | enum gro_result ret; | 3075 | enum gro_result ret; |
3032 | 3076 | ||
3033 | if (!(skb->dev->features & NETIF_F_GRO)) | 3077 | if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) |
3034 | goto normal; | 3078 | goto normal; |
3035 | 3079 | ||
3036 | if (skb_is_gso(skb) || skb_has_frags(skb)) | 3080 | if (skb_is_gso(skb) || skb_has_frags(skb)) |
@@ -3117,9 +3161,6 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
3117 | { | 3161 | { |
3118 | struct sk_buff *p; | 3162 | struct sk_buff *p; |
3119 | 3163 | ||
3120 | if (netpoll_rx_on(skb)) | ||
3121 | return GRO_NORMAL; | ||
3122 | |||
3123 | for (p = napi->gro_list; p; p = p->next) { | 3164 | for (p = napi->gro_list; p; p = p->next) { |
3124 | NAPI_GRO_CB(p)->same_flow = | 3165 | NAPI_GRO_CB(p)->same_flow = |
3125 | (p->dev == skb->dev) && | 3166 | (p->dev == skb->dev) && |
@@ -3685,10 +3726,11 @@ void dev_seq_stop(struct seq_file *seq, void *v) | |||
3685 | 3726 | ||
3686 | static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) | 3727 | static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) |
3687 | { | 3728 | { |
3688 | const struct net_device_stats *stats = dev_get_stats(dev); | 3729 | struct rtnl_link_stats64 temp; |
3730 | const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); | ||
3689 | 3731 | ||
3690 | seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " | 3732 | seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " |
3691 | "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", | 3733 | "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", |
3692 | dev->name, stats->rx_bytes, stats->rx_packets, | 3734 | dev->name, stats->rx_bytes, stats->rx_packets, |
3693 | stats->rx_errors, | 3735 | stats->rx_errors, |
3694 | stats->rx_dropped + stats->rx_missed_errors, | 3736 | stats->rx_dropped + stats->rx_missed_errors, |
@@ -5237,20 +5279,22 @@ void netdev_run_todo(void) | |||
5237 | /** | 5279 | /** |
5238 | * dev_txq_stats_fold - fold tx_queues stats | 5280 | * dev_txq_stats_fold - fold tx_queues stats |
5239 | * @dev: device to get statistics from | 5281 | * @dev: device to get statistics from |
5240 | * @stats: struct net_device_stats to hold results | 5282 | * @stats: struct rtnl_link_stats64 to hold results |
5241 | */ | 5283 | */ |
5242 | void dev_txq_stats_fold(const struct net_device *dev, | 5284 | void dev_txq_stats_fold(const struct net_device *dev, |
5243 | struct net_device_stats *stats) | 5285 | struct rtnl_link_stats64 *stats) |
5244 | { | 5286 | { |
5245 | unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; | 5287 | u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0; |
5246 | unsigned int i; | 5288 | unsigned int i; |
5247 | struct netdev_queue *txq; | 5289 | struct netdev_queue *txq; |
5248 | 5290 | ||
5249 | for (i = 0; i < dev->num_tx_queues; i++) { | 5291 | for (i = 0; i < dev->num_tx_queues; i++) { |
5250 | txq = netdev_get_tx_queue(dev, i); | 5292 | txq = netdev_get_tx_queue(dev, i); |
5293 | spin_lock_bh(&txq->_xmit_lock); | ||
5251 | tx_bytes += txq->tx_bytes; | 5294 | tx_bytes += txq->tx_bytes; |
5252 | tx_packets += txq->tx_packets; | 5295 | tx_packets += txq->tx_packets; |
5253 | tx_dropped += txq->tx_dropped; | 5296 | tx_dropped += txq->tx_dropped; |
5297 | spin_unlock_bh(&txq->_xmit_lock); | ||
5254 | } | 5298 | } |
5255 | if (tx_bytes || tx_packets || tx_dropped) { | 5299 | if (tx_bytes || tx_packets || tx_dropped) { |
5256 | stats->tx_bytes = tx_bytes; | 5300 | stats->tx_bytes = tx_bytes; |
@@ -5260,23 +5304,53 @@ void dev_txq_stats_fold(const struct net_device *dev, | |||
5260 | } | 5304 | } |
5261 | EXPORT_SYMBOL(dev_txq_stats_fold); | 5305 | EXPORT_SYMBOL(dev_txq_stats_fold); |
5262 | 5306 | ||
5307 | /* Convert net_device_stats to rtnl_link_stats64. They have the same | ||
5308 | * fields in the same order, with only the type differing. | ||
5309 | */ | ||
5310 | static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, | ||
5311 | const struct net_device_stats *netdev_stats) | ||
5312 | { | ||
5313 | #if BITS_PER_LONG == 64 | ||
5314 | BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); | ||
5315 | memcpy(stats64, netdev_stats, sizeof(*stats64)); | ||
5316 | #else | ||
5317 | size_t i, n = sizeof(*stats64) / sizeof(u64); | ||
5318 | const unsigned long *src = (const unsigned long *)netdev_stats; | ||
5319 | u64 *dst = (u64 *)stats64; | ||
5320 | |||
5321 | BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) != | ||
5322 | sizeof(*stats64) / sizeof(u64)); | ||
5323 | for (i = 0; i < n; i++) | ||
5324 | dst[i] = src[i]; | ||
5325 | #endif | ||
5326 | } | ||
5327 | |||
5263 | /** | 5328 | /** |
5264 | * dev_get_stats - get network device statistics | 5329 | * dev_get_stats - get network device statistics |
5265 | * @dev: device to get statistics from | 5330 | * @dev: device to get statistics from |
5331 | * @storage: place to store stats | ||
5266 | * | 5332 | * |
5267 | * Get network statistics from device. The device driver may provide | 5333 | * Get network statistics from device. Return @storage. |
5268 | * its own method by setting dev->netdev_ops->get_stats; otherwise | 5334 | * The device driver may provide its own method by setting |
5269 | * the internal statistics structure is used. | 5335 | * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats; |
5336 | * otherwise the internal statistics structure is used. | ||
5270 | */ | 5337 | */ |
5271 | const struct net_device_stats *dev_get_stats(struct net_device *dev) | 5338 | struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, |
5339 | struct rtnl_link_stats64 *storage) | ||
5272 | { | 5340 | { |
5273 | const struct net_device_ops *ops = dev->netdev_ops; | 5341 | const struct net_device_ops *ops = dev->netdev_ops; |
5274 | 5342 | ||
5275 | if (ops->ndo_get_stats) | 5343 | if (ops->ndo_get_stats64) { |
5276 | return ops->ndo_get_stats(dev); | 5344 | memset(storage, 0, sizeof(*storage)); |
5277 | 5345 | return ops->ndo_get_stats64(dev, storage); | |
5278 | dev_txq_stats_fold(dev, &dev->stats); | 5346 | } |
5279 | return &dev->stats; | 5347 | if (ops->ndo_get_stats) { |
5348 | netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); | ||
5349 | return storage; | ||
5350 | } | ||
5351 | netdev_stats_to_stats64(storage, &dev->stats); | ||
5352 | dev_txq_stats_fold(dev, storage); | ||
5353 | return storage; | ||
5280 | } | 5354 | } |
5281 | EXPORT_SYMBOL(dev_get_stats); | 5355 | EXPORT_SYMBOL(dev_get_stats); |
5282 | 5356 | ||
@@ -5781,6 +5855,68 @@ char *netdev_drivername(const struct net_device *dev, char *buffer, int len) | |||
5781 | return buffer; | 5855 | return buffer; |
5782 | } | 5856 | } |
5783 | 5857 | ||
5858 | static int __netdev_printk(const char *level, const struct net_device *dev, | ||
5859 | struct va_format *vaf) | ||
5860 | { | ||
5861 | int r; | ||
5862 | |||
5863 | if (dev && dev->dev.parent) | ||
5864 | r = dev_printk(level, dev->dev.parent, "%s: %pV", | ||
5865 | netdev_name(dev), vaf); | ||
5866 | else if (dev) | ||
5867 | r = printk("%s%s: %pV", level, netdev_name(dev), vaf); | ||
5868 | else | ||
5869 | r = printk("%s(NULL net_device): %pV", level, vaf); | ||
5870 | |||
5871 | return r; | ||
5872 | } | ||
5873 | |||
5874 | int netdev_printk(const char *level, const struct net_device *dev, | ||
5875 | const char *format, ...) | ||
5876 | { | ||
5877 | struct va_format vaf; | ||
5878 | va_list args; | ||
5879 | int r; | ||
5880 | |||
5881 | va_start(args, format); | ||
5882 | |||
5883 | vaf.fmt = format; | ||
5884 | vaf.va = &args; | ||
5885 | |||
5886 | r = __netdev_printk(level, dev, &vaf); | ||
5887 | va_end(args); | ||
5888 | |||
5889 | return r; | ||
5890 | } | ||
5891 | EXPORT_SYMBOL(netdev_printk); | ||
5892 | |||
5893 | #define define_netdev_printk_level(func, level) \ | ||
5894 | int func(const struct net_device *dev, const char *fmt, ...) \ | ||
5895 | { \ | ||
5896 | int r; \ | ||
5897 | struct va_format vaf; \ | ||
5898 | va_list args; \ | ||
5899 | \ | ||
5900 | va_start(args, fmt); \ | ||
5901 | \ | ||
5902 | vaf.fmt = fmt; \ | ||
5903 | vaf.va = &args; \ | ||
5904 | \ | ||
5905 | r = __netdev_printk(level, dev, &vaf); \ | ||
5906 | va_end(args); \ | ||
5907 | \ | ||
5908 | return r; \ | ||
5909 | } \ | ||
5910 | EXPORT_SYMBOL(func); | ||
5911 | |||
5912 | define_netdev_printk_level(netdev_emerg, KERN_EMERG); | ||
5913 | define_netdev_printk_level(netdev_alert, KERN_ALERT); | ||
5914 | define_netdev_printk_level(netdev_crit, KERN_CRIT); | ||
5915 | define_netdev_printk_level(netdev_err, KERN_ERR); | ||
5916 | define_netdev_printk_level(netdev_warn, KERN_WARNING); | ||
5917 | define_netdev_printk_level(netdev_notice, KERN_NOTICE); | ||
5918 | define_netdev_printk_level(netdev_info, KERN_INFO); | ||
5919 | |||
5784 | static void __net_exit netdev_exit(struct net *net) | 5920 | static void __net_exit netdev_exit(struct net *net) |
5785 | { | 5921 | { |
5786 | kfree(net->dev_name_head); | 5922 | kfree(net->dev_name_head); |