aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c475
1 files changed, 327 insertions, 148 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 8ae6631abcc..0b88eba97da 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
132#include <trace/events/skb.h> 132#include <trace/events/skb.h>
133#include <linux/pci.h> 133#include <linux/pci.h>
134#include <linux/inetdevice.h> 134#include <linux/inetdevice.h>
135#include <linux/cpu_rmap.h>
135 136
136#include "net-sysfs.h" 137#include "net-sysfs.h"
137 138
@@ -1114,13 +1115,21 @@ EXPORT_SYMBOL(netdev_bonding_change);
1114void dev_load(struct net *net, const char *name) 1115void dev_load(struct net *net, const char *name)
1115{ 1116{
1116 struct net_device *dev; 1117 struct net_device *dev;
1118 int no_module;
1117 1119
1118 rcu_read_lock(); 1120 rcu_read_lock();
1119 dev = dev_get_by_name_rcu(net, name); 1121 dev = dev_get_by_name_rcu(net, name);
1120 rcu_read_unlock(); 1122 rcu_read_unlock();
1121 1123
1122 if (!dev && capable(CAP_NET_ADMIN)) 1124 no_module = !dev;
1123 request_module("%s", name); 1125 if (no_module && capable(CAP_NET_ADMIN))
1126 no_module = request_module("netdev-%s", name);
1127 if (no_module && capable(CAP_SYS_MODULE)) {
1128 if (!request_module("%s", name))
1129 pr_err("Loading kernel module for a network device "
1130"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s "
1131"instead\n", name);
1132 }
1124} 1133}
1125EXPORT_SYMBOL(dev_load); 1134EXPORT_SYMBOL(dev_load);
1126 1135
@@ -1289,7 +1298,7 @@ static int __dev_close(struct net_device *dev)
1289 return retval; 1298 return retval;
1290} 1299}
1291 1300
1292int dev_close_many(struct list_head *head) 1301static int dev_close_many(struct list_head *head)
1293{ 1302{
1294 struct net_device *dev, *tmp; 1303 struct net_device *dev, *tmp;
1295 LIST_HEAD(tmp_list); 1304 LIST_HEAD(tmp_list);
@@ -1597,6 +1606,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1597 rcu_read_unlock(); 1606 rcu_read_unlock();
1598} 1607}
1599 1608
1609/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
1610 * @dev: Network device
1611 * @txq: number of queues available
1612 *
1613 * If real_num_tx_queues is changed the tc mappings may no longer be
1614 * valid. To resolve this verify the tc mapping remains valid and if
1615 * not NULL the mapping. With no priorities mapping to this
1616 * offset/count pair it will no longer be used. In the worst case TC0
1617 * is invalid nothing can be done so disable priority mappings. If is
1618 * expected that drivers will fix this mapping if they can before
1619 * calling netif_set_real_num_tx_queues.
1620 */
1621static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1622{
1623 int i;
1624 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1625
1626 /* If TC0 is invalidated disable TC mapping */
1627 if (tc->offset + tc->count > txq) {
1628 pr_warning("Number of in use tx queues changed "
1629 "invalidating tc mappings. Priority "
1630 "traffic classification disabled!\n");
1631 dev->num_tc = 0;
1632 return;
1633 }
1634
1635 /* Invalidated prio to tc mappings set to TC0 */
1636 for (i = 1; i < TC_BITMASK + 1; i++) {
1637 int q = netdev_get_prio_tc_map(dev, i);
1638
1639 tc = &dev->tc_to_txq[q];
1640 if (tc->offset + tc->count > txq) {
1641 pr_warning("Number of in use tx queues "
1642 "changed. Priority %i to tc "
1643 "mapping %i is no longer valid "
1644 "setting map to 0\n",
1645 i, q);
1646 netdev_set_prio_tc_map(dev, i, 0);
1647 }
1648 }
1649}
1650
1600/* 1651/*
1601 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 1652 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1602 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 1653 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1608,7 +1659,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1608 if (txq < 1 || txq > dev->num_tx_queues) 1659 if (txq < 1 || txq > dev->num_tx_queues)
1609 return -EINVAL; 1660 return -EINVAL;
1610 1661
1611 if (dev->reg_state == NETREG_REGISTERED) { 1662 if (dev->reg_state == NETREG_REGISTERED ||
1663 dev->reg_state == NETREG_UNREGISTERING) {
1612 ASSERT_RTNL(); 1664 ASSERT_RTNL();
1613 1665
1614 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, 1666 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
@@ -1616,6 +1668,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1616 if (rc) 1668 if (rc)
1617 return rc; 1669 return rc;
1618 1670
1671 if (dev->num_tc)
1672 netif_setup_tc(dev, txq);
1673
1619 if (txq < dev->real_num_tx_queues) 1674 if (txq < dev->real_num_tx_queues)
1620 qdisc_reset_all_tx_gt(dev, txq); 1675 qdisc_reset_all_tx_gt(dev, txq);
1621 } 1676 }
@@ -1815,7 +1870,7 @@ EXPORT_SYMBOL(skb_checksum_help);
1815 * It may return NULL if the skb requires no segmentation. This is 1870 * It may return NULL if the skb requires no segmentation. This is
1816 * only possible when GSO is used for verifying header integrity. 1871 * only possible when GSO is used for verifying header integrity.
1817 */ 1872 */
1818struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) 1873struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
1819{ 1874{
1820 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 1875 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1821 struct packet_type *ptype; 1876 struct packet_type *ptype;
@@ -2003,7 +2058,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
2003 protocol == htons(ETH_P_FCOE))); 2058 protocol == htons(ETH_P_FCOE)));
2004} 2059}
2005 2060
2006static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) 2061static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
2007{ 2062{
2008 if (!can_checksum_protocol(features, protocol)) { 2063 if (!can_checksum_protocol(features, protocol)) {
2009 features &= ~NETIF_F_ALL_CSUM; 2064 features &= ~NETIF_F_ALL_CSUM;
@@ -2015,10 +2070,10 @@ static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features
2015 return features; 2070 return features;
2016} 2071}
2017 2072
2018int netif_skb_features(struct sk_buff *skb) 2073u32 netif_skb_features(struct sk_buff *skb)
2019{ 2074{
2020 __be16 protocol = skb->protocol; 2075 __be16 protocol = skb->protocol;
2021 int features = skb->dev->features; 2076 u32 features = skb->dev->features;
2022 2077
2023 if (protocol == htons(ETH_P_8021Q)) { 2078 if (protocol == htons(ETH_P_8021Q)) {
2024 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 2079 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2063,7 +2118,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2063 int rc = NETDEV_TX_OK; 2118 int rc = NETDEV_TX_OK;
2064 2119
2065 if (likely(!skb->next)) { 2120 if (likely(!skb->next)) {
2066 int features; 2121 u32 features;
2067 2122
2068 /* 2123 /*
2069 * If device doesnt need skb->dst, release it right now while 2124 * If device doesnt need skb->dst, release it right now while
@@ -2165,6 +2220,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2165 unsigned int num_tx_queues) 2220 unsigned int num_tx_queues)
2166{ 2221{
2167 u32 hash; 2222 u32 hash;
2223 u16 qoffset = 0;
2224 u16 qcount = num_tx_queues;
2168 2225
2169 if (skb_rx_queue_recorded(skb)) { 2226 if (skb_rx_queue_recorded(skb)) {
2170 hash = skb_get_rx_queue(skb); 2227 hash = skb_get_rx_queue(skb);
@@ -2173,13 +2230,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2173 return hash; 2230 return hash;
2174 } 2231 }
2175 2232
2233 if (dev->num_tc) {
2234 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2235 qoffset = dev->tc_to_txq[tc].offset;
2236 qcount = dev->tc_to_txq[tc].count;
2237 }
2238
2176 if (skb->sk && skb->sk->sk_hash) 2239 if (skb->sk && skb->sk->sk_hash)
2177 hash = skb->sk->sk_hash; 2240 hash = skb->sk->sk_hash;
2178 else 2241 else
2179 hash = (__force u16) skb->protocol ^ skb->rxhash; 2242 hash = (__force u16) skb->protocol ^ skb->rxhash;
2180 hash = jhash_1word(hash, hashrnd); 2243 hash = jhash_1word(hash, hashrnd);
2181 2244
2182 return (u16) (((u64) hash * num_tx_queues) >> 32); 2245 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
2183} 2246}
2184EXPORT_SYMBOL(__skb_tx_hash); 2247EXPORT_SYMBOL(__skb_tx_hash);
2185 2248
@@ -2276,15 +2339,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2276 struct netdev_queue *txq) 2339 struct netdev_queue *txq)
2277{ 2340{
2278 spinlock_t *root_lock = qdisc_lock(q); 2341 spinlock_t *root_lock = qdisc_lock(q);
2279 bool contended = qdisc_is_running(q); 2342 bool contended;
2280 int rc; 2343 int rc;
2281 2344
2345 qdisc_skb_cb(skb)->pkt_len = skb->len;
2346 qdisc_calculate_pkt_len(skb, q);
2282 /* 2347 /*
2283 * Heuristic to force contended enqueues to serialize on a 2348 * Heuristic to force contended enqueues to serialize on a
2284 * separate lock before trying to get qdisc main lock. 2349 * separate lock before trying to get qdisc main lock.
2285 * This permits __QDISC_STATE_RUNNING owner to get the lock more often 2350 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2286 * and dequeue packets faster. 2351 * and dequeue packets faster.
2287 */ 2352 */
2353 contended = qdisc_is_running(q);
2288 if (unlikely(contended)) 2354 if (unlikely(contended))
2289 spin_lock(&q->busylock); 2355 spin_lock(&q->busylock);
2290 2356
@@ -2302,7 +2368,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2302 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) 2368 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2303 skb_dst_force(skb); 2369 skb_dst_force(skb);
2304 2370
2305 qdisc_skb_cb(skb)->pkt_len = skb->len;
2306 qdisc_bstats_update(q, skb); 2371 qdisc_bstats_update(q, skb);
2307 2372
2308 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { 2373 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
@@ -2317,7 +2382,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2317 rc = NET_XMIT_SUCCESS; 2382 rc = NET_XMIT_SUCCESS;
2318 } else { 2383 } else {
2319 skb_dst_force(skb); 2384 skb_dst_force(skb);
2320 rc = qdisc_enqueue_root(skb, q); 2385 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2321 if (qdisc_run_begin(q)) { 2386 if (qdisc_run_begin(q)) {
2322 if (unlikely(contended)) { 2387 if (unlikely(contended)) {
2323 spin_unlock(&q->busylock); 2388 spin_unlock(&q->busylock);
@@ -2536,6 +2601,54 @@ EXPORT_SYMBOL(__skb_get_rxhash);
2536struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; 2601struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2537EXPORT_SYMBOL(rps_sock_flow_table); 2602EXPORT_SYMBOL(rps_sock_flow_table);
2538 2603
2604static struct rps_dev_flow *
2605set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2606 struct rps_dev_flow *rflow, u16 next_cpu)
2607{
2608 u16 tcpu;
2609
2610 tcpu = rflow->cpu = next_cpu;
2611 if (tcpu != RPS_NO_CPU) {
2612#ifdef CONFIG_RFS_ACCEL
2613 struct netdev_rx_queue *rxqueue;
2614 struct rps_dev_flow_table *flow_table;
2615 struct rps_dev_flow *old_rflow;
2616 u32 flow_id;
2617 u16 rxq_index;
2618 int rc;
2619
2620 /* Should we steer this flow to a different hardware queue? */
2621 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2622 !(dev->features & NETIF_F_NTUPLE))
2623 goto out;
2624 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2625 if (rxq_index == skb_get_rx_queue(skb))
2626 goto out;
2627
2628 rxqueue = dev->_rx + rxq_index;
2629 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2630 if (!flow_table)
2631 goto out;
2632 flow_id = skb->rxhash & flow_table->mask;
2633 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2634 rxq_index, flow_id);
2635 if (rc < 0)
2636 goto out;
2637 old_rflow = rflow;
2638 rflow = &flow_table->flows[flow_id];
2639 rflow->cpu = next_cpu;
2640 rflow->filter = rc;
2641 if (old_rflow->filter == rflow->filter)
2642 old_rflow->filter = RPS_NO_FILTER;
2643 out:
2644#endif
2645 rflow->last_qtail =
2646 per_cpu(softnet_data, tcpu).input_queue_head;
2647 }
2648
2649 return rflow;
2650}
2651
2539/* 2652/*
2540 * get_rps_cpu is called from netif_receive_skb and returns the target 2653 * get_rps_cpu is called from netif_receive_skb and returns the target
2541 * CPU from the RPS map of the receiving queue for a given skb. 2654 * CPU from the RPS map of the receiving queue for a given skb.
@@ -2607,12 +2720,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2607 if (unlikely(tcpu != next_cpu) && 2720 if (unlikely(tcpu != next_cpu) &&
2608 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || 2721 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2609 ((int)(per_cpu(softnet_data, tcpu).input_queue_head - 2722 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2610 rflow->last_qtail)) >= 0)) { 2723 rflow->last_qtail)) >= 0))
2611 tcpu = rflow->cpu = next_cpu; 2724 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
2612 if (tcpu != RPS_NO_CPU) 2725
2613 rflow->last_qtail = per_cpu(softnet_data,
2614 tcpu).input_queue_head;
2615 }
2616 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { 2726 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2617 *rflowp = rflow; 2727 *rflowp = rflow;
2618 cpu = tcpu; 2728 cpu = tcpu;
@@ -2633,6 +2743,46 @@ done:
2633 return cpu; 2743 return cpu;
2634} 2744}
2635 2745
2746#ifdef CONFIG_RFS_ACCEL
2747
2748/**
2749 * rps_may_expire_flow - check whether an RFS hardware filter may be removed
2750 * @dev: Device on which the filter was set
2751 * @rxq_index: RX queue index
2752 * @flow_id: Flow ID passed to ndo_rx_flow_steer()
2753 * @filter_id: Filter ID returned by ndo_rx_flow_steer()
2754 *
2755 * Drivers that implement ndo_rx_flow_steer() should periodically call
2756 * this function for each installed filter and remove the filters for
2757 * which it returns %true.
2758 */
2759bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
2760 u32 flow_id, u16 filter_id)
2761{
2762 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
2763 struct rps_dev_flow_table *flow_table;
2764 struct rps_dev_flow *rflow;
2765 bool expire = true;
2766 int cpu;
2767
2768 rcu_read_lock();
2769 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2770 if (flow_table && flow_id <= flow_table->mask) {
2771 rflow = &flow_table->flows[flow_id];
2772 cpu = ACCESS_ONCE(rflow->cpu);
2773 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
2774 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
2775 rflow->last_qtail) <
2776 (int)(10 * flow_table->mask)))
2777 expire = false;
2778 }
2779 rcu_read_unlock();
2780 return expire;
2781}
2782EXPORT_SYMBOL(rps_may_expire_flow);
2783
2784#endif /* CONFIG_RFS_ACCEL */
2785
2636/* Called from hardirq (IPI) context */ 2786/* Called from hardirq (IPI) context */
2637static void rps_trigger_softirq(void *data) 2787static void rps_trigger_softirq(void *data)
2638{ 2788{
@@ -2920,6 +3070,8 @@ out:
2920 * on a failure. 3070 * on a failure.
2921 * 3071 *
2922 * The caller must hold the rtnl_mutex. 3072 * The caller must hold the rtnl_mutex.
3073 *
3074 * For a general description of rx_handler, see enum rx_handler_result.
2923 */ 3075 */
2924int netdev_rx_handler_register(struct net_device *dev, 3076int netdev_rx_handler_register(struct net_device *dev,
2925 rx_handler_func_t *rx_handler, 3077 rx_handler_func_t *rx_handler,
@@ -2954,64 +3106,32 @@ void netdev_rx_handler_unregister(struct net_device *dev)
2954} 3106}
2955EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); 3107EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
2956 3108
2957static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, 3109static void vlan_on_bond_hook(struct sk_buff *skb)
2958 struct net_device *master)
2959{
2960 if (skb->pkt_type == PACKET_HOST) {
2961 u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
2962
2963 memcpy(dest, master->dev_addr, ETH_ALEN);
2964 }
2965}
2966
2967/* On bonding slaves other than the currently active slave, suppress
2968 * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
2969 * ARP on active-backup slaves with arp_validate enabled.
2970 */
2971int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
2972{ 3110{
2973 struct net_device *dev = skb->dev; 3111 /*
2974 3112 * Make sure ARP frames received on VLAN interfaces stacked on
2975 if (master->priv_flags & IFF_MASTER_ARPMON) 3113 * bonding interfaces still make their way to any base bonding
2976 dev->last_rx = jiffies; 3114 * device that may have registered for a specific ptype.
2977 3115 */
2978 if ((master->priv_flags & IFF_MASTER_ALB) && 3116 if (skb->dev->priv_flags & IFF_802_1Q_VLAN &&
2979 (master->priv_flags & IFF_BRIDGE_PORT)) { 3117 vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING &&
2980 /* Do address unmangle. The local destination address 3118 skb->protocol == htons(ETH_P_ARP)) {
2981 * will be always the one master has. Provides the right 3119 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2982 * functionality in a bridge.
2983 */
2984 skb_bond_set_mac_by_master(skb, master);
2985 }
2986
2987 if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
2988 if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
2989 skb->protocol == __cpu_to_be16(ETH_P_ARP))
2990 return 0;
2991
2992 if (master->priv_flags & IFF_MASTER_ALB) {
2993 if (skb->pkt_type != PACKET_BROADCAST &&
2994 skb->pkt_type != PACKET_MULTICAST)
2995 return 0;
2996 }
2997 if (master->priv_flags & IFF_MASTER_8023AD &&
2998 skb->protocol == __cpu_to_be16(ETH_P_SLOW))
2999 return 0;
3000 3120
3001 return 1; 3121 if (!skb2)
3122 return;
3123 skb2->dev = vlan_dev_real_dev(skb->dev);
3124 netif_rx(skb2);
3002 } 3125 }
3003 return 0;
3004} 3126}
3005EXPORT_SYMBOL(__skb_bond_should_drop);
3006 3127
3007static int __netif_receive_skb(struct sk_buff *skb) 3128static int __netif_receive_skb(struct sk_buff *skb)
3008{ 3129{
3009 struct packet_type *ptype, *pt_prev; 3130 struct packet_type *ptype, *pt_prev;
3010 rx_handler_func_t *rx_handler; 3131 rx_handler_func_t *rx_handler;
3011 struct net_device *orig_dev; 3132 struct net_device *orig_dev;
3012 struct net_device *master; 3133 struct net_device *null_or_dev;
3013 struct net_device *null_or_orig; 3134 bool deliver_exact = false;
3014 struct net_device *orig_or_bond;
3015 int ret = NET_RX_DROP; 3135 int ret = NET_RX_DROP;
3016 __be16 type; 3136 __be16 type;
3017 3137
@@ -3026,28 +3146,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
3026 3146
3027 if (!skb->skb_iif) 3147 if (!skb->skb_iif)
3028 skb->skb_iif = skb->dev->ifindex; 3148 skb->skb_iif = skb->dev->ifindex;
3029
3030 /*
3031 * bonding note: skbs received on inactive slaves should only
3032 * be delivered to pkt handlers that are exact matches. Also
3033 * the deliver_no_wcard flag will be set. If packet handlers
3034 * are sensitive to duplicate packets these skbs will need to
3035 * be dropped at the handler.
3036 */
3037 null_or_orig = NULL;
3038 orig_dev = skb->dev; 3149 orig_dev = skb->dev;
3039 master = ACCESS_ONCE(orig_dev->master);
3040 if (skb->deliver_no_wcard)
3041 null_or_orig = orig_dev;
3042 else if (master) {
3043 if (skb_bond_should_drop(skb, master)) {
3044 skb->deliver_no_wcard = 1;
3045 null_or_orig = orig_dev; /* deliver only exact match */
3046 } else
3047 skb->dev = master;
3048 }
3049 3150
3050 __this_cpu_inc(softnet_data.processed);
3051 skb_reset_network_header(skb); 3151 skb_reset_network_header(skb);
3052 skb_reset_transport_header(skb); 3152 skb_reset_transport_header(skb);
3053 skb->mac_len = skb->network_header - skb->mac_header; 3153 skb->mac_len = skb->network_header - skb->mac_header;
@@ -3056,6 +3156,10 @@ static int __netif_receive_skb(struct sk_buff *skb)
3056 3156
3057 rcu_read_lock(); 3157 rcu_read_lock();
3058 3158
3159another_round:
3160
3161 __this_cpu_inc(softnet_data.processed);
3162
3059#ifdef CONFIG_NET_CLS_ACT 3163#ifdef CONFIG_NET_CLS_ACT
3060 if (skb->tc_verd & TC_NCLS) { 3164 if (skb->tc_verd & TC_NCLS) {
3061 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 3165 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -3064,8 +3168,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
3064#endif 3168#endif
3065 3169
3066 list_for_each_entry_rcu(ptype, &ptype_all, list) { 3170 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3067 if (ptype->dev == null_or_orig || ptype->dev == skb->dev || 3171 if (!ptype->dev || ptype->dev == skb->dev) {
3068 ptype->dev == orig_dev) {
3069 if (pt_prev) 3172 if (pt_prev)
3070 ret = deliver_skb(skb, pt_prev, orig_dev); 3173 ret = deliver_skb(skb, pt_prev, orig_dev);
3071 pt_prev = ptype; 3174 pt_prev = ptype;
@@ -3079,16 +3182,24 @@ static int __netif_receive_skb(struct sk_buff *skb)
3079ncls: 3182ncls:
3080#endif 3183#endif
3081 3184
3082 /* Handle special case of bridge or macvlan */
3083 rx_handler = rcu_dereference(skb->dev->rx_handler); 3185 rx_handler = rcu_dereference(skb->dev->rx_handler);
3084 if (rx_handler) { 3186 if (rx_handler) {
3085 if (pt_prev) { 3187 if (pt_prev) {
3086 ret = deliver_skb(skb, pt_prev, orig_dev); 3188 ret = deliver_skb(skb, pt_prev, orig_dev);
3087 pt_prev = NULL; 3189 pt_prev = NULL;
3088 } 3190 }
3089 skb = rx_handler(skb); 3191 switch (rx_handler(&skb)) {
3090 if (!skb) 3192 case RX_HANDLER_CONSUMED:
3091 goto out; 3193 goto out;
3194 case RX_HANDLER_ANOTHER:
3195 goto another_round;
3196 case RX_HANDLER_EXACT:
3197 deliver_exact = true;
3198 case RX_HANDLER_PASS:
3199 break;
3200 default:
3201 BUG();
3202 }
3092 } 3203 }
3093 3204
3094 if (vlan_tx_tag_present(skb)) { 3205 if (vlan_tx_tag_present(skb)) {
@@ -3103,24 +3214,17 @@ ncls:
3103 goto out; 3214 goto out;
3104 } 3215 }
3105 3216
3106 /* 3217 vlan_on_bond_hook(skb);
3107 * Make sure frames received on VLAN interfaces stacked on 3218
3108 * bonding interfaces still make their way to any base bonding 3219 /* deliver only exact match when indicated */
3109 * device that may have registered for a specific ptype. The 3220 null_or_dev = deliver_exact ? skb->dev : NULL;
3110 * handler may have to adjust skb->dev and orig_dev.
3111 */
3112 orig_or_bond = orig_dev;
3113 if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
3114 (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
3115 orig_or_bond = vlan_dev_real_dev(skb->dev);
3116 }
3117 3221
3118 type = skb->protocol; 3222 type = skb->protocol;
3119 list_for_each_entry_rcu(ptype, 3223 list_for_each_entry_rcu(ptype,
3120 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 3224 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3121 if (ptype->type == type && (ptype->dev == null_or_orig || 3225 if (ptype->type == type &&
3122 ptype->dev == skb->dev || ptype->dev == orig_dev || 3226 (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3123 ptype->dev == orig_or_bond)) { 3227 ptype->dev == orig_dev)) {
3124 if (pt_prev) 3228 if (pt_prev)
3125 ret = deliver_skb(skb, pt_prev, orig_dev); 3229 ret = deliver_skb(skb, pt_prev, orig_dev);
3126 pt_prev = ptype; 3230 pt_prev = ptype;
@@ -3917,12 +4021,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3917 4021
3918void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 4022void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3919{ 4023{
3920 struct net_device *dev = (v == SEQ_START_TOKEN) ? 4024 struct net_device *dev = v;
3921 first_net_device(seq_file_net(seq)) : 4025
3922 next_net_device((struct net_device *)v); 4026 if (v == SEQ_START_TOKEN)
4027 dev = first_net_device_rcu(seq_file_net(seq));
4028 else
4029 dev = next_net_device_rcu(dev);
3923 4030
3924 ++*pos; 4031 ++*pos;
3925 return rcu_dereference(dev); 4032 return dev;
3926} 4033}
3927 4034
3928void dev_seq_stop(struct seq_file *seq, void *v) 4035void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4206,15 +4313,14 @@ static int __init dev_proc_init(void)
4206 4313
4207 4314
4208/** 4315/**
4209 * netdev_set_master - set up master/slave pair 4316 * netdev_set_master - set up master pointer
4210 * @slave: slave device 4317 * @slave: slave device
4211 * @master: new master device 4318 * @master: new master device
4212 * 4319 *
4213 * Changes the master device of the slave. Pass %NULL to break the 4320 * Changes the master device of the slave. Pass %NULL to break the
4214 * bonding. The caller must hold the RTNL semaphore. On a failure 4321 * bonding. The caller must hold the RTNL semaphore. On a failure
4215 * a negative errno code is returned. On success the reference counts 4322 * a negative errno code is returned. On success the reference counts
4216 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the 4323 * are adjusted and the function returns zero.
4217 * function returns zero.
4218 */ 4324 */
4219int netdev_set_master(struct net_device *slave, struct net_device *master) 4325int netdev_set_master(struct net_device *slave, struct net_device *master)
4220{ 4326{
@@ -4234,6 +4340,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
4234 synchronize_net(); 4340 synchronize_net();
4235 dev_put(old); 4341 dev_put(old);
4236 } 4342 }
4343 return 0;
4344}
4345EXPORT_SYMBOL(netdev_set_master);
4346
4347/**
4348 * netdev_set_bond_master - set up bonding master/slave pair
4349 * @slave: slave device
4350 * @master: new master device
4351 *
4352 * Changes the master device of the slave. Pass %NULL to break the
4353 * bonding. The caller must hold the RTNL semaphore. On a failure
4354 * a negative errno code is returned. On success %RTM_NEWLINK is sent
4355 * to the routing socket and the function returns zero.
4356 */
4357int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
4358{
4359 int err;
4360
4361 ASSERT_RTNL();
4362
4363 err = netdev_set_master(slave, master);
4364 if (err)
4365 return err;
4237 if (master) 4366 if (master)
4238 slave->flags |= IFF_SLAVE; 4367 slave->flags |= IFF_SLAVE;
4239 else 4368 else
@@ -4242,7 +4371,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
4242 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); 4371 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4243 return 0; 4372 return 0;
4244} 4373}
4245EXPORT_SYMBOL(netdev_set_master); 4374EXPORT_SYMBOL(netdev_set_bond_master);
4246 4375
4247static void dev_change_rx_flags(struct net_device *dev, int flags) 4376static void dev_change_rx_flags(struct net_device *dev, int flags)
4248{ 4377{
@@ -4579,6 +4708,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
4579EXPORT_SYMBOL(dev_set_mtu); 4708EXPORT_SYMBOL(dev_set_mtu);
4580 4709
4581/** 4710/**
4711 * dev_set_group - Change group this device belongs to
4712 * @dev: device
4713 * @new_group: group this device should belong to
4714 */
4715void dev_set_group(struct net_device *dev, int new_group)
4716{
4717 dev->group = new_group;
4718}
4719EXPORT_SYMBOL(dev_set_group);
4720
4721/**
4582 * dev_set_mac_address - Change Media Access Control Address 4722 * dev_set_mac_address - Change Media Access Control Address
4583 * @dev: device 4723 * @dev: device
4584 * @sa: new address 4724 * @sa: new address
@@ -5069,41 +5209,55 @@ static void rollback_registered(struct net_device *dev)
5069 list_del(&single); 5209 list_del(&single);
5070} 5210}
5071 5211
5072unsigned long netdev_fix_features(unsigned long features, const char *name) 5212u32 netdev_fix_features(struct net_device *dev, u32 features)
5073{ 5213{
5214 /* Fix illegal checksum combinations */
5215 if ((features & NETIF_F_HW_CSUM) &&
5216 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5217 netdev_info(dev, "mixed HW and IP checksum settings.\n");
5218 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5219 }
5220
5221 if ((features & NETIF_F_NO_CSUM) &&
5222 (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5223 netdev_info(dev, "mixed no checksumming and other settings.\n");
5224 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5225 }
5226
5074 /* Fix illegal SG+CSUM combinations. */ 5227 /* Fix illegal SG+CSUM combinations. */
5075 if ((features & NETIF_F_SG) && 5228 if ((features & NETIF_F_SG) &&
5076 !(features & NETIF_F_ALL_CSUM)) { 5229 !(features & NETIF_F_ALL_CSUM)) {
5077 if (name) 5230 netdev_info(dev,
5078 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " 5231 "Dropping NETIF_F_SG since no checksum feature.\n");
5079 "checksum feature.\n", name);
5080 features &= ~NETIF_F_SG; 5232 features &= ~NETIF_F_SG;
5081 } 5233 }
5082 5234
5083 /* TSO requires that SG is present as well. */ 5235 /* TSO requires that SG is present as well. */
5084 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { 5236 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
5085 if (name) 5237 netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n");
5086 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
5087 "SG feature.\n", name);
5088 features &= ~NETIF_F_TSO; 5238 features &= ~NETIF_F_TSO;
5089 } 5239 }
5090 5240
5241 /* Software GSO depends on SG. */
5242 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5243 netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5244 features &= ~NETIF_F_GSO;
5245 }
5246
5247 /* UFO needs SG and checksumming */
5091 if (features & NETIF_F_UFO) { 5248 if (features & NETIF_F_UFO) {
5092 /* maybe split UFO into V4 and V6? */ 5249 /* maybe split UFO into V4 and V6? */
5093 if (!((features & NETIF_F_GEN_CSUM) || 5250 if (!((features & NETIF_F_GEN_CSUM) ||
5094 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) 5251 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5095 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5252 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5096 if (name) 5253 netdev_info(dev,
5097 printk(KERN_ERR "%s: Dropping NETIF_F_UFO " 5254 "Dropping NETIF_F_UFO since no checksum offload features.\n");
5098 "since no checksum offload features.\n",
5099 name);
5100 features &= ~NETIF_F_UFO; 5255 features &= ~NETIF_F_UFO;
5101 } 5256 }
5102 5257
5103 if (!(features & NETIF_F_SG)) { 5258 if (!(features & NETIF_F_SG)) {
5104 if (name) 5259 netdev_info(dev,
5105 printk(KERN_ERR "%s: Dropping NETIF_F_UFO " 5260 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5106 "since no NETIF_F_SG feature.\n", name);
5107 features &= ~NETIF_F_UFO; 5261 features &= ~NETIF_F_UFO;
5108 } 5262 }
5109 } 5263 }
@@ -5112,6 +5266,37 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
5112} 5266}
5113EXPORT_SYMBOL(netdev_fix_features); 5267EXPORT_SYMBOL(netdev_fix_features);
5114 5268
5269void netdev_update_features(struct net_device *dev)
5270{
5271 u32 features;
5272 int err = 0;
5273
5274 features = netdev_get_wanted_features(dev);
5275
5276 if (dev->netdev_ops->ndo_fix_features)
5277 features = dev->netdev_ops->ndo_fix_features(dev, features);
5278
5279 /* driver might be less strict about feature dependencies */
5280 features = netdev_fix_features(dev, features);
5281
5282 if (dev->features == features)
5283 return;
5284
5285 netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n",
5286 dev->features, features);
5287
5288 if (dev->netdev_ops->ndo_set_features)
5289 err = dev->netdev_ops->ndo_set_features(dev, features);
5290
5291 if (!err)
5292 dev->features = features;
5293 else if (err < 0)
5294 netdev_err(dev,
5295 "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
5296 err, features, dev->features);
5297}
5298EXPORT_SYMBOL(netdev_update_features);
5299
5115/** 5300/**
5116 * netif_stacked_transfer_operstate - transfer operstate 5301 * netif_stacked_transfer_operstate - transfer operstate
5117 * @rootdev: the root or lower level device to transfer state from 5302 * @rootdev: the root or lower level device to transfer state from
@@ -5246,27 +5431,19 @@ int register_netdevice(struct net_device *dev)
5246 if (dev->iflink == -1) 5431 if (dev->iflink == -1)
5247 dev->iflink = dev->ifindex; 5432 dev->iflink = dev->ifindex;
5248 5433
5249 /* Fix illegal checksum combinations */ 5434 /* Transfer changeable features to wanted_features and enable
5250 if ((dev->features & NETIF_F_HW_CSUM) && 5435 * software offloads (GSO and GRO).
5251 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5436 */
5252 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", 5437 dev->hw_features |= NETIF_F_SOFT_FEATURES;
5253 dev->name); 5438 dev->features |= NETIF_F_SOFT_FEATURES;
5254 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); 5439 dev->wanted_features = dev->features & dev->hw_features;
5255 }
5256 5440
5257 if ((dev->features & NETIF_F_NO_CSUM) && 5441 /* Avoid warning from netdev_fix_features() for GSO without SG */
5258 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5442 if (!(dev->wanted_features & NETIF_F_SG)) {
5259 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", 5443 dev->wanted_features &= ~NETIF_F_GSO;
5260 dev->name); 5444 dev->features &= ~NETIF_F_GSO;
5261 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5262 } 5445 }
5263 5446
5264 dev->features = netdev_fix_features(dev->features, dev->name);
5265
5266 /* Enable software GSO if SG is supported. */
5267 if (dev->features & NETIF_F_SG)
5268 dev->features |= NETIF_F_GSO;
5269
5270 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, 5447 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
5271 * vlan_dev_init() will do the dev->features check, so these features 5448 * vlan_dev_init() will do the dev->features check, so these features
5272 * are enabled only if supported by underlying device. 5449 * are enabled only if supported by underlying device.
@@ -5283,6 +5460,8 @@ int register_netdevice(struct net_device *dev)
5283 goto err_uninit; 5460 goto err_uninit;
5284 dev->reg_state = NETREG_REGISTERED; 5461 dev->reg_state = NETREG_REGISTERED;
5285 5462
5463 netdev_update_features(dev);
5464
5286 /* 5465 /*
5287 * Default initial state at registry is that the 5466 * Default initial state at registry is that the
5288 * device is present. 5467 * device is present.
@@ -5687,6 +5866,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5687#endif 5866#endif
5688 5867
5689 strcpy(dev->name, name); 5868 strcpy(dev->name, name);
5869 dev->group = INIT_NETDEV_GROUP;
5690 return dev; 5870 return dev;
5691 5871
5692free_all: 5872free_all:
@@ -6001,8 +6181,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
6001 * @one to the master device with current feature set @all. Will not 6181 * @one to the master device with current feature set @all. Will not
6002 * enable anything that is off in @mask. Returns the new feature set. 6182 * enable anything that is off in @mask. Returns the new feature set.
6003 */ 6183 */
6004unsigned long netdev_increment_features(unsigned long all, unsigned long one, 6184u32 netdev_increment_features(u32 all, u32 one, u32 mask)
6005 unsigned long mask)
6006{ 6185{
6007 /* If device needs checksumming, downgrade to it. */ 6186 /* If device needs checksumming, downgrade to it. */
6008 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) 6187 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))